diff --git a/0006-Support-LoongArch.patch b/0006-Support-LoongArch.patch new file mode 100644 index 0000000000000000000000000000000000000000..9480ce8812c7345715c934800f54a0218f2e656d --- /dev/null +++ b/0006-Support-LoongArch.patch @@ -0,0 +1,64225 @@ +diff --git a/clang/bindings/python/tests/CMakeLists.txt b/clang/bindings/python/tests/CMakeLists.txt +index c4cd2539e..c8cf4fe27 100644 +--- a/clang/bindings/python/tests/CMakeLists.txt ++++ b/clang/bindings/python/tests/CMakeLists.txt +@@ -43,7 +43,7 @@ endif() + # addressed. + # SystemZ has broken Python/FFI interface: + # https://reviews.llvm.org/D52840#1265716 +-if(${LLVM_NATIVE_ARCH} MATCHES "^(AArch64|Hexagon|Sparc|SystemZ)$") ++if(${LLVM_NATIVE_ARCH} MATCHES "^(AArch64|Hexagon|LoongArch|Sparc|SystemZ)$") + set(RUN_PYTHON_TESTS FALSE) + endif() + +diff --git a/clang/docs/ClangFormattedStatus.rst b/clang/docs/ClangFormattedStatus.rst +index 68c13076c..7caac30a9 100644 +--- a/clang/docs/ClangFormattedStatus.rst ++++ b/clang/docs/ClangFormattedStatus.rst +@@ -5109,21 +5109,6 @@ tree in terms of conformance to :doc:`ClangFormat` as of: March 06, 2022 17:32:2 + - `2` + - `0` + - :good:`100%` +- * - llvm/lib/Target/LoongArch +- - `19` +- - `19` +- - `0` +- - :good:`100%` +- * - llvm/lib/Target/LoongArch/MCTargetDesc +- - `12` +- - `12` +- - `0` +- - :good:`100%` +- * - llvm/lib/Target/LoongArch/TargetInfo +- - `2` +- - `2` +- - `0` +- - :good:`100%` + * - llvm/lib/Target/M68k + - `26` + - `25` +diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst +index e533ecfd5..c758f4473 100644 +--- a/clang/docs/ReleaseNotes.rst ++++ b/clang/docs/ReleaseNotes.rst +@@ -1273,28 +1273,6 @@ Windows Support + + - Clang now passes relevant LTO options to the linker (LLD) in MinGW mode. + +-LoongArch Support +-^^^^^^^^^^^^^^^^^ +-- Added builtins support for all LSX (128-bits SIMD) and LASX (256-bits SIMD) +- instructions. +-- Added builtins support for approximate calculation instructions that were +- introduced in LoongArch Reference Manual V1.10. +-- Made ``-mcmodel=`` compatible with LoongArch gcc that accepted ``normal``, +- ``medium`` and ``extreme``. +-- The ``model`` attribute was now supported for overriding the default code +- model used to access global variables. The following values were supported: +- ``normal``, ``medium`` and ``extreme``. +- +- *Example Code*: +- +- .. code-block:: c +- +- int var __attribute((model("extreme"))); +- +-- Default to ``-fno-direct-access-external-data`` for non-PIC. +-- An ABI mismatch with gcc/g++ about empty structs/unions passing was fixed. +-- ``_mcount`` was generated instead of ``mcount``. +- + RISC-V Support + ^^^^^^^^^^^^^^ + - Unaligned memory accesses can be toggled by ``-m[no-]unaligned-access`` or the +diff --git a/clang/docs/tools/clang-formatted-files.txt b/clang/docs/tools/clang-formatted-files.txt +index 18512b1a7..0cea0bbe7 100644 +--- a/clang/docs/tools/clang-formatted-files.txt ++++ b/clang/docs/tools/clang-formatted-files.txt +@@ -6512,39 +6512,6 @@ llvm/lib/Target/Lanai/MCTargetDesc/LanaiMCTargetDesc.cpp + llvm/lib/Target/Lanai/MCTargetDesc/LanaiMCTargetDesc.h + llvm/lib/Target/Lanai/TargetInfo/LanaiTargetInfo.cpp + llvm/lib/Target/Lanai/TargetInfo/LanaiTargetInfo.h +-llvm/lib/Target/LoongArch/LoongArch.h +-llvm/lib/Target/LoongArch/LoongArchAsmPrinter.cpp +-llvm/lib/Target/LoongArch/LoongArchAsmPrinter.h +-llvm/lib/Target/LoongArch/LoongArchFrameLowering.cpp +-llvm/lib/Target/LoongArch/LoongArchFrameLowering.h +-llvm/lib/Target/LoongArch/LoongArchInstrInfo.cpp +-llvm/lib/Target/LoongArch/LoongArchInstrInfo.h +-llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.cpp +-llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.h +-llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp +-llvm/lib/Target/LoongArch/LoongArchISelLowering.h +-llvm/lib/Target/LoongArch/LoongArchMachineFunctionInfo.h +-llvm/lib/Target/LoongArch/LoongArchMCInstLower.cpp +-llvm/lib/Target/LoongArch/LoongArchRegisterInfo.cpp +-llvm/lib/Target/LoongArch/LoongArchRegisterInfo.h +-llvm/lib/Target/LoongArch/LoongArchSubtarget.cpp +-llvm/lib/Target/LoongArch/LoongArchSubtarget.h +-llvm/lib/Target/LoongArch/LoongArchTargetMachine.cpp +-llvm/lib/Target/LoongArch/LoongArchTargetMachine.h +-llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.cpp +-llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.h +-llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchBaseInfo.cpp +-llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchBaseInfo.h +-llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchELFObjectWriter.cpp +-llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchInstPrinter.cpp +-llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchInstPrinter.h +-llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCAsmInfo.cpp +-llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCAsmInfo.h +-llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCCodeEmitter.cpp +-llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCTargetDesc.cpp +-llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCTargetDesc.h +-llvm/lib/Target/LoongArch/TargetInfo/LoongArchTargetInfo.cpp +-llvm/lib/Target/LoongArch/TargetInfo/LoongArchTargetInfo.h + llvm/lib/Target/M68k/M68k.h + llvm/lib/Target/M68k/M68kAsmPrinter.cpp + llvm/lib/Target/M68k/M68kAsmPrinter.h +diff --git a/clang/include/clang/AST/Attr.h b/clang/include/clang/AST/Attr.h +index 8e9b7ad8b..1b831c951 100644 +--- a/clang/include/clang/AST/Attr.h ++++ b/clang/include/clang/AST/Attr.h +@@ -25,7 +25,6 @@ + #include "clang/Basic/Sanitizers.h" + #include "clang/Basic/SourceLocation.h" + #include "llvm/Frontend/HLSL/HLSLResource.h" +-#include "llvm/Support/CodeGen.h" + #include "llvm/Support/ErrorHandling.h" + #include "llvm/Support/VersionTuple.h" + #include "llvm/Support/raw_ostream.h" +diff --git a/clang/include/clang/Basic/Attr.td b/clang/include/clang/Basic/Attr.td +index dbf2dd212..f90bb1ece 100644 +--- a/clang/include/clang/Basic/Attr.td ++++ b/clang/include/clang/Basic/Attr.td +@@ -143,11 +143,6 @@ def ExternalGlobalVar : SubsetSubjectisLocalExternDecl()}], + "external global variables">; + +-def NonTLSGlobalVar : SubsetSubjecthasGlobalStorage() && +- S->getTLSKind() == 0}], +- "non-TLS global variables">; +- + def InlineFunction : SubsetSubjectisInlineSpecified()}], "inline functions">; + +@@ -439,7 +434,6 @@ def TargetAArch64 : TargetArch<["aarch64", "aarch64_be", "aarch64_32"]>; + def TargetAnyArm : TargetArch; + def TargetAVR : TargetArch<["avr"]>; + def TargetBPF : TargetArch<["bpfel", "bpfeb"]>; +-def TargetLoongArch : TargetArch<["loongarch32", "loongarch64"]>; + def TargetMips32 : TargetArch<["mips", "mipsel"]>; + def TargetAnyMips : TargetArch<["mips", "mipsel", "mips64", "mips64el"]>; + def TargetMSP430 : TargetArch<["msp430"]>; +@@ -850,8 +844,7 @@ def XRayLogArgs : InheritableAttr { + def PatchableFunctionEntry + : InheritableAttr, + TargetSpecificAttr> { ++ ["aarch64", "aarch64_be", "riscv32", "riscv64", "x86", "x86_64"]>> { + let Spellings = [GCC<"patchable_function_entry">]; + let Subjects = SubjectList<[Function, ObjCMethod]>; + let Args = [UnsignedArgument<"Count">, DefaultIntArgument<"Offset", 0>]; +@@ -2814,15 +2807,6 @@ def PragmaClangTextSection : InheritableAttr { + let Documentation = [InternalOnly]; + } + +-def CodeModel : InheritableAttr, TargetSpecificAttr { +- let Spellings = [GCC<"model">]; +- let Args = [EnumArgument<"Model", "llvm::CodeModel::Model", +- ["normal", "medium", "extreme"], ["Small", "Medium", "Large"], +- /*opt=*/0, /*fake=*/0, /*isExternalType=*/1>]; +- let Subjects = SubjectList<[NonTLSGlobalVar], ErrorDiag>; +- let Documentation = [CodeModelDocs]; +-} +- + def Sentinel : InheritableAttr { + let Spellings = [GCC<"sentinel">]; + let Args = [DefaultIntArgument<"Sentinel", 0>, +diff --git a/clang/include/clang/Basic/AttrDocs.td b/clang/include/clang/Basic/AttrDocs.td +index e02a1201e..784666a3b 100644 +--- a/clang/include/clang/Basic/AttrDocs.td ++++ b/clang/include/clang/Basic/AttrDocs.td +@@ -57,15 +57,6 @@ global variable or function should be in after translation. + let Heading = "section, __declspec(allocate)"; + } + +-def CodeModelDocs : Documentation { +- let Category = DocCatVariable; +- let Content = [{ +-The ``model`` attribute allows overriding the translation unit's +-code model (specified by ``-mcmodel``) for a specific global variable. +- }]; +- let Heading = "model"; +-} +- + def UsedDocs : Documentation { + let Category = DocCatFunction; + let Content = [{ +@@ -5608,7 +5599,7 @@ takes precedence over the command line option ``-fpatchable-function-entry=N,M`` + ``M`` defaults to 0 if omitted. + + This attribute is only supported on +-aarch64/aarch64-be/loongarch32/loongarch64/riscv32/riscv64/i386/x86-64 targets. ++aarch64/aarch64-be/riscv32/riscv64/i386/x86-64 targets. + }]; + } + +diff --git a/clang/include/clang/Basic/BuiltinsLoongArch.def b/clang/include/clang/Basic/BuiltinsLoongArch.def +index 95359a3fd..ea4ae55fb 100644 +--- a/clang/include/clang/Basic/BuiltinsLoongArch.def ++++ b/clang/include/clang/Basic/BuiltinsLoongArch.def +@@ -1,28 +1,2006 @@ +-//==- BuiltinsLoongArch.def - LoongArch Builtin function database -- C++ -*-==// ++//===-- BuiltinsLoongArch.def - LoongArch Builtin function database --------*- C++ -*-==// + // +-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +-// See https://llvm.org/LICENSE.txt for license information. +-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception ++// The LLVM Compiler Infrastructure ++// ++// This file is distributed under the University of Illinois Open Source ++// License. See LICENSE.TXT for details. + // + //===----------------------------------------------------------------------===// + // +-// This file defines the LoongArch-specific builtin function database. Users of ++// This file defines the LoongArch-specific builtin function database. Users of + // this file must define the BUILTIN macro to make use of this information. + // + //===----------------------------------------------------------------------===// + +-#if defined(BUILTIN) && !defined(TARGET_BUILTIN) +-# define TARGET_BUILTIN(ID, TYPE, ATTRS, FEATURE) BUILTIN(ID, TYPE, ATTRS) +-#endif ++// The format of this database matches clang/Basic/Builtins.def. ++ ++// LoongArch LSX ++ ++BUILTIN(__builtin_lsx_vclo_b, "V16ScV16Sc", "nc") ++BUILTIN(__builtin_lsx_vclo_h, "V8SsV8Ss", "nc") ++BUILTIN(__builtin_lsx_vclo_w, "V4SiV4Si", "nc") ++BUILTIN(__builtin_lsx_vclo_d, "V2SLLiV2SLLi", "nc") ++ ++BUILTIN(__builtin_lsx_vflogb_s, "V4fV4f", "nc") ++BUILTIN(__builtin_lsx_vflogb_d, "V2dV2d", "nc") ++ ++BUILTIN(__builtin_lsx_vpickve2gr_b, "iV16ScIUi", "nc") ++BUILTIN(__builtin_lsx_vpickve2gr_h, "iV8SsIUi", "nc") ++BUILTIN(__builtin_lsx_vpickve2gr_w, "iV4SiIUi", "nc") ++BUILTIN(__builtin_lsx_vpickve2gr_d, "LLiV2SLLiIUi", "nc") ++ ++BUILTIN(__builtin_lsx_vpickve2gr_bu, "iV16UcIUi", "nc") ++BUILTIN(__builtin_lsx_vpickve2gr_hu, "iV8UsIUi", "nc") ++BUILTIN(__builtin_lsx_vpickve2gr_wu, "iV4UiIUi", "nc") ++BUILTIN(__builtin_lsx_vpickve2gr_du, "LLiV2ULLiIUi", "nc") ++ ++BUILTIN(__builtin_lsx_vreplvei_b, "V16cV16cIUi", "nc") ++BUILTIN(__builtin_lsx_vreplvei_h, "V8sV8sIUi", "nc") ++BUILTIN(__builtin_lsx_vreplvei_w, "V4iV4iIUi", "nc") ++BUILTIN(__builtin_lsx_vreplvei_d, "V2LLiV2LLiIUi", "nc") ++ ++BUILTIN(__builtin_lsx_vmskltz_b, "V16cV16c", "nc") ++BUILTIN(__builtin_lsx_vmskltz_h, "V8sV8s", "nc") ++BUILTIN(__builtin_lsx_vmskltz_w, "V4iV4i", "nc") ++BUILTIN(__builtin_lsx_vmskltz_d, "V2LLiV2LLi", "nc") ++ ++BUILTIN(__builtin_lsx_vfmadd_s, "V4fV4fV4fV4f", "nc") ++BUILTIN(__builtin_lsx_vfmadd_d, "V2dV2dV2dV2d", "nc") ++ ++BUILTIN(__builtin_lsx_vfmsub_s, "V4fV4fV4fV4f", "nc") ++BUILTIN(__builtin_lsx_vfmsub_d, "V2dV2dV2dV2d", "nc") ++ ++BUILTIN(__builtin_lsx_vfnmadd_s, "V4fV4fV4fV4f", "nc") ++BUILTIN(__builtin_lsx_vfnmadd_d, "V2dV2dV2dV2d", "nc") ++ ++BUILTIN(__builtin_lsx_vfnmsub_s, "V4fV4fV4fV4f", "nc") ++BUILTIN(__builtin_lsx_vfnmsub_d, "V2dV2dV2dV2d", "nc") ++ ++BUILTIN(__builtin_lsx_vfcmp_caf_s, "V4SiV4fV4f", "nc") ++BUILTIN(__builtin_lsx_vfcmp_caf_d, "V2SLLiV2dV2d", "nc") ++ ++BUILTIN(__builtin_lsx_vfcmp_cor_s, "V4SiV4fV4f", "nc") ++BUILTIN(__builtin_lsx_vfcmp_cor_d, "V2SLLiV2dV2d", "nc") ++ ++BUILTIN(__builtin_lsx_vfcmp_cun_s, "V4SiV4fV4f", "nc") ++BUILTIN(__builtin_lsx_vfcmp_cun_d, "V2SLLiV2dV2d", "nc") ++ ++BUILTIN(__builtin_lsx_vfcmp_cune_s, "V4SiV4fV4f", "nc") ++BUILTIN(__builtin_lsx_vfcmp_cune_d, "V2SLLiV2dV2d", "nc") ++ ++BUILTIN(__builtin_lsx_vfcmp_cueq_s, "V4SiV4fV4f", "nc") ++BUILTIN(__builtin_lsx_vfcmp_cueq_d, "V2SLLiV2dV2d", "nc") ++ ++BUILTIN(__builtin_lsx_vfcmp_ceq_s, "V4SiV4fV4f", "nc") ++BUILTIN(__builtin_lsx_vfcmp_ceq_d, "V2SLLiV2dV2d", "nc") ++ ++BUILTIN(__builtin_lsx_vfcmp_cne_s, "V4SiV4fV4f", "nc") ++BUILTIN(__builtin_lsx_vfcmp_cne_d, "V2SLLiV2dV2d", "nc") ++ ++BUILTIN(__builtin_lsx_vfcmp_clt_s, "V4SiV4fV4f", "nc") ++BUILTIN(__builtin_lsx_vfcmp_clt_d, "V2SLLiV2dV2d", "nc") ++ ++BUILTIN(__builtin_lsx_vfcmp_cult_s, "V4SiV4fV4f", "nc") ++BUILTIN(__builtin_lsx_vfcmp_cult_d, "V2SLLiV2dV2d", "nc") ++ ++BUILTIN(__builtin_lsx_vfcmp_cle_s, "V4SiV4fV4f", "nc") ++BUILTIN(__builtin_lsx_vfcmp_cle_d, "V2SLLiV2dV2d", "nc") ++ ++BUILTIN(__builtin_lsx_vfcmp_cule_s, "V4SiV4fV4f", "nc") ++BUILTIN(__builtin_lsx_vfcmp_cule_d, "V2SLLiV2dV2d", "nc") ++ ++BUILTIN(__builtin_lsx_vfcmp_saf_s, "V4SiV4fV4f", "nc") ++BUILTIN(__builtin_lsx_vfcmp_saf_d, "V2SLLiV2dV2d", "nc") ++ ++BUILTIN(__builtin_lsx_vfcmp_sor_s, "V4SiV4fV4f", "nc") ++BUILTIN(__builtin_lsx_vfcmp_sor_d, "V2SLLiV2dV2d", "nc") ++ ++BUILTIN(__builtin_lsx_vfcmp_sun_s, "V4SiV4fV4f", "nc") ++BUILTIN(__builtin_lsx_vfcmp_sun_d, "V2SLLiV2dV2d", "nc") ++ ++BUILTIN(__builtin_lsx_vfcmp_sune_s, "V4SiV4fV4f", "nc") ++BUILTIN(__builtin_lsx_vfcmp_sune_d, "V2SLLiV2dV2d", "nc") ++ ++BUILTIN(__builtin_lsx_vfcmp_sueq_s, "V4SiV4fV4f", "nc") ++BUILTIN(__builtin_lsx_vfcmp_sueq_d, "V2SLLiV2dV2d", "nc") ++ ++BUILTIN(__builtin_lsx_vfcmp_seq_s, "V4SiV4fV4f", "nc") ++BUILTIN(__builtin_lsx_vfcmp_seq_d, "V2SLLiV2dV2d", "nc") ++ ++BUILTIN(__builtin_lsx_vfcmp_sne_s, "V4SiV4fV4f", "nc") ++BUILTIN(__builtin_lsx_vfcmp_sne_d, "V2SLLiV2dV2d", "nc") ++ ++BUILTIN(__builtin_lsx_vfcmp_slt_s, "V4SiV4fV4f", "nc") ++BUILTIN(__builtin_lsx_vfcmp_slt_d, "V2SLLiV2dV2d", "nc") ++ ++BUILTIN(__builtin_lsx_vfcmp_sult_s, "V4SiV4fV4f", "nc") ++BUILTIN(__builtin_lsx_vfcmp_sult_d, "V2SLLiV2dV2d", "nc") ++ ++BUILTIN(__builtin_lsx_vfcmp_sle_s, "V4SiV4fV4f", "nc") ++BUILTIN(__builtin_lsx_vfcmp_sle_d, "V2SLLiV2dV2d", "nc") ++ ++BUILTIN(__builtin_lsx_vfcmp_sule_s, "V4SiV4fV4f", "nc") ++BUILTIN(__builtin_lsx_vfcmp_sule_d, "V2SLLiV2dV2d", "nc") ++ ++BUILTIN(__builtin_lsx_vbitsel_v, "V16UcV16UcV16UcV16Uc", "nc") ++ ++BUILTIN(__builtin_lsx_vshuf_b, "V16UcV16UcV16UcV16Uc", "nc") ++ ++BUILTIN(__builtin_lsx_vldrepl_b, "V16cvC*Ii", "nc") ++BUILTIN(__builtin_lsx_vldrepl_h, "V8svC*Ii", "nc") ++BUILTIN(__builtin_lsx_vldrepl_w, "V4ivC*Ii", "nc") ++BUILTIN(__builtin_lsx_vldrepl_d, "V2LLivC*Ii", "nc") ++ ++BUILTIN(__builtin_lsx_vstelm_b, "vV16Scv*IiUi", "nc") ++BUILTIN(__builtin_lsx_vstelm_h, "vV8Ssv*IiUi", "nc") ++BUILTIN(__builtin_lsx_vstelm_w, "vV4Siv*IiUi", "nc") ++BUILTIN(__builtin_lsx_vstelm_d, "vV2SLLiv*IiUi", "nc") ++ ++BUILTIN(__builtin_lsx_vldx, "V16ScvC*LLi", "nc") ++BUILTIN(__builtin_lsx_vstx, "vV16Scv*LLi", "nc") ++ ++BUILTIN(__builtin_lsx_vaddwev_d_w, "V2LLiV4SiV4Si", "nc") ++BUILTIN(__builtin_lsx_vaddwev_w_h, "V4SiV8sV8s", "nc") ++BUILTIN(__builtin_lsx_vaddwev_h_b, "V8sV16cV16c", "nc") ++BUILTIN(__builtin_lsx_vaddwev_q_d, "V2LLiV2LLiV2LLi", "nc") ++ ++BUILTIN(__builtin_lsx_vsubwev_d_w, "V2LLiV4SiV4Si", "nc") ++BUILTIN(__builtin_lsx_vsubwev_w_h, "V4SiV8sV8s", "nc") ++BUILTIN(__builtin_lsx_vsubwev_h_b, "V8sV16cV16c", "nc") ++BUILTIN(__builtin_lsx_vsubwev_q_d, "V2LLiV2LLiV2LLi", "nc") ++ ++BUILTIN(__builtin_lsx_vaddwod_d_w, "V2LLiV4SiV4Si", "nc") ++BUILTIN(__builtin_lsx_vaddwod_w_h, "V4SiV8sV8s", "nc") ++BUILTIN(__builtin_lsx_vaddwod_h_b, "V8sV16cV16c", "nc") ++BUILTIN(__builtin_lsx_vaddwod_q_d, "V2LLiV2LLiV2LLi", "nc") ++ ++BUILTIN(__builtin_lsx_vsubwod_d_w, "V2LLiV4SiV4Si", "nc") ++BUILTIN(__builtin_lsx_vsubwod_w_h, "V4SiV8sV8s", "nc") ++BUILTIN(__builtin_lsx_vsubwod_h_b, "V8sV16cV16c", "nc") ++BUILTIN(__builtin_lsx_vsubwod_q_d, "V2LLiV2LLiV2LLi", "nc") ++ ++BUILTIN(__builtin_lsx_vaddwev_d_wu, "V2LLiV4UiV4Ui", "nc") ++BUILTIN(__builtin_lsx_vaddwev_w_hu, "V4SiV8UsV8Us", "nc") ++BUILTIN(__builtin_lsx_vaddwev_h_bu, "V8sV16UcV16Uc", "nc") ++BUILTIN(__builtin_lsx_vaddwev_q_du, "V2LLiV2ULLiV2ULLi", "nc") ++ ++BUILTIN(__builtin_lsx_vsubwev_d_wu, "V2LLiV4UiV4Ui", "nc") ++BUILTIN(__builtin_lsx_vsubwev_w_hu, "V4SiV8UsV8Us", "nc") ++BUILTIN(__builtin_lsx_vsubwev_h_bu, "V8sV16UcV16Uc", "nc") ++BUILTIN(__builtin_lsx_vsubwev_q_du, "V2LLiV2ULLiV2ULLi", "nc") ++ ++BUILTIN(__builtin_lsx_vaddwod_d_wu, "V2LLiV4UiV4Ui", "nc") ++BUILTIN(__builtin_lsx_vaddwod_w_hu, "V4SiV8UsV8Us", "nc") ++BUILTIN(__builtin_lsx_vaddwod_h_bu, "V8sV16UcV16Uc", "nc") ++BUILTIN(__builtin_lsx_vaddwod_q_du, "V2LLiV2ULLiV2ULLi", "nc") ++ ++BUILTIN(__builtin_lsx_vsubwod_d_wu, "V2LLiV4UiV4Ui", "nc") ++BUILTIN(__builtin_lsx_vsubwod_w_hu, "V4SiV8UsV8Us", "nc") ++BUILTIN(__builtin_lsx_vsubwod_h_bu, "V8sV16UcV16Uc", "nc") ++BUILTIN(__builtin_lsx_vsubwod_q_du, "V2LLiV2ULLiV2ULLi", "nc") ++ ++BUILTIN(__builtin_lsx_vaddwev_d_wu_w, "V2LLiV4UiV4Si", "nc") ++BUILTIN(__builtin_lsx_vaddwev_w_hu_h, "V4SiV8UsV8s", "nc") ++BUILTIN(__builtin_lsx_vaddwev_h_bu_b, "V8sV16UcV16c", "nc") ++BUILTIN(__builtin_lsx_vaddwev_q_du_d, "V2LLiV2ULLiV2LLi", "nc") ++ ++BUILTIN(__builtin_lsx_vaddwod_d_wu_w, "V2LLiV4UiV4Si", "nc") ++BUILTIN(__builtin_lsx_vaddwod_w_hu_h, "V4SiV8UsV8s", "nc") ++BUILTIN(__builtin_lsx_vaddwod_h_bu_b, "V8sV16UcV16c", "nc") ++BUILTIN(__builtin_lsx_vaddwod_q_du_d, "V2LLiV2ULLiV2LLi", "nc") ++ ++BUILTIN(__builtin_lsx_vhaddw_q_d, "V2LLiV2LLiV2LLi", "nc") ++BUILTIN(__builtin_lsx_vhsubw_q_d, "V2LLiV2LLiV2LLi", "nc") ++ ++BUILTIN(__builtin_lsx_vhaddw_qu_du, "V2ULLiV2ULLiV2ULLi", "nc") ++BUILTIN(__builtin_lsx_vhsubw_qu_du, "V2ULLiV2ULLiV2ULLi", "nc") ++ ++BUILTIN(__builtin_lsx_vmuh_b, "V16cV16cV16c", "nc") ++BUILTIN(__builtin_lsx_vmuh_h, "V8sV8sV8s", "nc") ++BUILTIN(__builtin_lsx_vmuh_w, "V4iV4iV4i", "nc") ++BUILTIN(__builtin_lsx_vmuh_d, "V2LLiV2LLiV2LLi", "nc") ++ ++BUILTIN(__builtin_lsx_vmuh_bu, "V16UcV16UcV16Uc", "nc") ++BUILTIN(__builtin_lsx_vmuh_hu, "V8UsV8UsV8Us", "nc") ++BUILTIN(__builtin_lsx_vmuh_wu, "V4UiV4UiV4Ui", "nc") ++BUILTIN(__builtin_lsx_vmuh_du, "V2ULLiV2ULLiV2ULLi", "nc") ++ ++BUILTIN(__builtin_lsx_vmulwev_d_w, "V2LLiV4SiV4Si", "nc") ++BUILTIN(__builtin_lsx_vmulwev_w_h, "V4SiV8sV8s", "nc") ++BUILTIN(__builtin_lsx_vmulwev_h_b, "V8sV16cV16c", "nc") ++BUILTIN(__builtin_lsx_vmulwev_q_d, "V2LLiV2LLiV2LLi", "nc") ++ ++BUILTIN(__builtin_lsx_vmulwod_d_w, "V2LLiV4SiV4Si", "nc") ++BUILTIN(__builtin_lsx_vmulwod_w_h, "V4SiV8sV8s", "nc") ++BUILTIN(__builtin_lsx_vmulwod_h_b, "V8sV16cV16c", "nc") ++BUILTIN(__builtin_lsx_vmulwod_q_d, "V2LLiV2LLiV2LLi", "nc") ++ ++BUILTIN(__builtin_lsx_vmulwev_d_wu, "V2LLiV4UiV4Ui", "nc") ++BUILTIN(__builtin_lsx_vmulwev_w_hu, "V4SiV8UsV8Us", "nc") ++BUILTIN(__builtin_lsx_vmulwev_h_bu, "V8sV16UcV16Uc", "nc") ++BUILTIN(__builtin_lsx_vmulwev_q_du, "V2LLiV2ULLiV2ULLi", "nc") ++ ++BUILTIN(__builtin_lsx_vmulwod_d_wu, "V2LLiV4UiV4Ui", "nc") ++BUILTIN(__builtin_lsx_vmulwod_w_hu, "V4SiV8UsV8Us", "nc") ++BUILTIN(__builtin_lsx_vmulwod_h_bu, "V8sV16UcV16Uc", "nc") ++BUILTIN(__builtin_lsx_vmulwod_q_du, "V2LLiV2ULLiV2ULLi", "nc") ++ ++BUILTIN(__builtin_lsx_vmulwev_d_wu_w, "V2LLiV4UiV4Si", "nc") ++BUILTIN(__builtin_lsx_vmulwev_w_hu_h, "V4SiV8UsV8s", "nc") ++BUILTIN(__builtin_lsx_vmulwev_h_bu_b, "V8sV16UcV16c", "nc") ++BUILTIN(__builtin_lsx_vmulwev_q_du_d, "V2LLiV2ULLiV2LLi", "nc") ++ ++BUILTIN(__builtin_lsx_vmulwod_d_wu_w, "V2LLiV4UiV4Si", "nc") ++BUILTIN(__builtin_lsx_vmulwod_w_hu_h, "V4SiV8UsV8s", "nc") ++BUILTIN(__builtin_lsx_vmulwod_h_bu_b, "V8sV16UcV16c", "nc") ++BUILTIN(__builtin_lsx_vmulwod_q_du_d, "V2LLiV2ULLiV2LLi", "nc") ++ ++BUILTIN(__builtin_lsx_vmaddwev_d_w, "V2LLiV2LLiV4SiV4Si", "nc") ++BUILTIN(__builtin_lsx_vmaddwev_w_h, "V4SiV4SiV8sV8s", "nc") ++BUILTIN(__builtin_lsx_vmaddwev_h_b, "V8sV8sV16cV16c", "nc") ++BUILTIN(__builtin_lsx_vmaddwev_q_d, "V2LLiV2LLiV2LLiV2LLi", "nc") ++ ++BUILTIN(__builtin_lsx_vmaddwod_d_w, "V2LLiV2LLiV4SiV4Si", "nc") ++BUILTIN(__builtin_lsx_vmaddwod_w_h, "V4SiV4SiV8sV8s", "nc") ++BUILTIN(__builtin_lsx_vmaddwod_h_b, "V8sV8sV16cV16c", "nc") ++BUILTIN(__builtin_lsx_vmaddwod_q_d, "V2LLiV2LLiV2LLiV2LLi", "nc") ++ ++BUILTIN(__builtin_lsx_vmaddwev_d_wu, "V2ULLiV2ULLiV4UiV4Ui", "nc") ++BUILTIN(__builtin_lsx_vmaddwev_w_hu, "V4UiV4UiV8UsV8Us", "nc") ++BUILTIN(__builtin_lsx_vmaddwev_h_bu, "V8UsV8UsV16UcV16Uc", "nc") ++BUILTIN(__builtin_lsx_vmaddwev_q_du, "V2ULLiV2ULLiV2ULLiV2ULLi", "nc") ++ ++BUILTIN(__builtin_lsx_vmaddwod_d_wu, "V2ULLiV2ULLiV4UiV4Ui", "nc") ++BUILTIN(__builtin_lsx_vmaddwod_w_hu, "V4UiV4UiV8UsV8Us", "nc") ++BUILTIN(__builtin_lsx_vmaddwod_h_bu, "V8UsV8UsV16UcV16Uc", "nc") ++BUILTIN(__builtin_lsx_vmaddwod_q_du, "V2ULLiV2ULLiV2ULLiV2ULLi", "nc") ++ ++BUILTIN(__builtin_lsx_vmaddwev_d_wu_w, "V2LLiV2LLiV4UiV4Si", "nc") ++BUILTIN(__builtin_lsx_vmaddwev_w_hu_h, "V4SiV4SiV8UsV8s", "nc") ++BUILTIN(__builtin_lsx_vmaddwev_h_bu_b, "V8sV8sV16UcV16c", "nc") ++BUILTIN(__builtin_lsx_vmaddwev_q_du_d, "V2LLiV2LLiV2ULLiV2LLi", "nc") ++ ++BUILTIN(__builtin_lsx_vmaddwod_d_wu_w, "V2LLiV2LLiV4UiV4Si", "nc") ++BUILTIN(__builtin_lsx_vmaddwod_w_hu_h, "V4SiV4SiV8UsV8s", "nc") ++BUILTIN(__builtin_lsx_vmaddwod_h_bu_b, "V8sV8sV16UcV16c", "nc") ++BUILTIN(__builtin_lsx_vmaddwod_q_du_d, "V2LLiV2LLiV2ULLiV2LLi", "nc") ++ ++BUILTIN(__builtin_lsx_vsrln_b_h, "V16ScV8sV8s", "nc") ++BUILTIN(__builtin_lsx_vsrln_h_w, "V8sV4SiV4Si", "nc") ++BUILTIN(__builtin_lsx_vsrln_w_d, "V4SiV2LLiV2LLi", "nc") ++ ++BUILTIN(__builtin_lsx_vsran_b_h, "V16ScV8sV8s", "nc") ++BUILTIN(__builtin_lsx_vsran_h_w, "V8sV4SiV4Si", "nc") ++BUILTIN(__builtin_lsx_vsran_w_d, "V4SiV2LLiV2LLi", "nc") ++ ++BUILTIN(__builtin_lsx_vsrlrn_b_h, "V16ScV8sV8s", "nc") ++BUILTIN(__builtin_lsx_vsrlrn_h_w, "V8sV4SiV4Si", "nc") ++BUILTIN(__builtin_lsx_vsrlrn_w_d, "V4SiV2LLiV2LLi", "nc") ++ ++BUILTIN(__builtin_lsx_vsrarn_b_h, "V16ScV8sV8s", "nc") ++BUILTIN(__builtin_lsx_vsrarn_h_w, "V8sV4SiV4Si", "nc") ++BUILTIN(__builtin_lsx_vsrarn_w_d, "V4SiV2LLiV2LLi", "nc") ++ ++BUILTIN(__builtin_lsx_vssrln_b_h, "V16ScV8sV8s", "nc") ++BUILTIN(__builtin_lsx_vssrln_h_w, "V8sV4SiV4Si", "nc") ++BUILTIN(__builtin_lsx_vssrln_w_d, "V4SiV2LLiV2LLi", "nc") ++ ++BUILTIN(__builtin_lsx_vssran_b_h, "V16ScV8sV8s", "nc") ++BUILTIN(__builtin_lsx_vssran_h_w, "V8sV4SiV4Si", "nc") ++BUILTIN(__builtin_lsx_vssran_w_d, "V4SiV2LLiV2LLi", "nc") ++ ++BUILTIN(__builtin_lsx_vssrlrn_b_h, "V16ScV8sV8s", "nc") ++BUILTIN(__builtin_lsx_vssrlrn_h_w, "V8sV4SiV4Si", "nc") ++BUILTIN(__builtin_lsx_vssrlrn_w_d, "V4SiV2LLiV2LLi", "nc") ++ ++BUILTIN(__builtin_lsx_vssrarn_b_h, "V16ScV8sV8s", "nc") ++BUILTIN(__builtin_lsx_vssrarn_h_w, "V8sV4SiV4Si", "nc") ++BUILTIN(__builtin_lsx_vssrarn_w_d, "V4SiV2LLiV2LLi", "nc") ++ ++BUILTIN(__builtin_lsx_vssrln_bu_h, "V16UcV8UsV8Us", "nc") ++BUILTIN(__builtin_lsx_vssrln_hu_w, "V8UsV4UiV4Ui", "nc") ++BUILTIN(__builtin_lsx_vssrln_wu_d, "V4UiV2ULLiV2ULLi", "nc") ++ ++BUILTIN(__builtin_lsx_vssran_bu_h, "V16UcV8UsV8Us", "nc") ++BUILTIN(__builtin_lsx_vssran_hu_w, "V8UsV4UiV4Ui", "nc") ++BUILTIN(__builtin_lsx_vssran_wu_d, "V4UiV2ULLiV2ULLi", "nc") ++ ++BUILTIN(__builtin_lsx_vssrlrn_bu_h, "V16UcV8UsV8Us", "nc") ++BUILTIN(__builtin_lsx_vssrlrn_hu_w, "V8UsV4UiV4Ui", "nc") ++BUILTIN(__builtin_lsx_vssrlrn_wu_d, "V4UiV2ULLiV2ULLi", "nc") ++ ++BUILTIN(__builtin_lsx_vssrarn_bu_h, "V16UcV8UsV8Us", "nc") ++BUILTIN(__builtin_lsx_vssrarn_hu_w, "V8UsV4UiV4Ui", "nc") ++BUILTIN(__builtin_lsx_vssrarn_wu_d, "V4UiV2ULLiV2ULLi", "nc") ++ ++BUILTIN(__builtin_lsx_vandn_v, "V16UcV16UcV16Uc", "nc") ++ ++BUILTIN(__builtin_lsx_vorn_v, "V16ScV16ScV16Sc", "nc") ++ ++BUILTIN(__builtin_lsx_vfrstp_b, "V16ScV16ScV16ScV16Sc", "nc") ++BUILTIN(__builtin_lsx_vfrstp_h, "V8SsV8SsV8SsV8Ss", "nc") ++ ++BUILTIN(__builtin_lsx_vadd_q, "V2LLiV2LLiV2LLi", "nc") ++BUILTIN(__builtin_lsx_vsub_q, "V2LLiV2LLiV2LLi", "nc") ++ ++BUILTIN(__builtin_lsx_vsigncov_b, "V16ScV16ScV16Sc", "nc") ++BUILTIN(__builtin_lsx_vsigncov_h, "V8SsV8SsV8Ss", "nc") ++BUILTIN(__builtin_lsx_vsigncov_w, "V4SiV4SiV4Si", "nc") ++BUILTIN(__builtin_lsx_vsigncov_d, "V2SLLiV2SLLiV2SLLi", "nc") ++ ++BUILTIN(__builtin_lsx_vfcvt_h_s, "V8sV4fV4f", "nc") ++BUILTIN(__builtin_lsx_vfcvt_s_d, "V4fV2dV2d", "nc") ++ ++BUILTIN(__builtin_lsx_vftint_w_d, "V4SiV2dV2d", "nc") ++BUILTIN(__builtin_lsx_vffint_s_l, "V4fV2LLiV2LLi", "nc") ++ ++BUILTIN(__builtin_lsx_vftintrz_w_d, "V4SiV2dV2d", "nc") ++BUILTIN(__builtin_lsx_vftintrp_w_d, "V4SiV2dV2d", "nc") ++BUILTIN(__builtin_lsx_vftintrm_w_d, "V4SiV2dV2d", "nc") ++BUILTIN(__builtin_lsx_vftintrne_w_d, "V4SiV2dV2d", "nc") ++ ++BUILTIN(__builtin_lsx_vbsrl_v, "V16cV16cIUi", "nc") ++BUILTIN(__builtin_lsx_vbsll_v, "V16cV16cIUi", "nc") ++ ++BUILTIN(__builtin_lsx_vfrstpi_b, "V16cV16cV16cIUi", "nc") ++BUILTIN(__builtin_lsx_vfrstpi_h, "V8sV8sV8sIUi", "nc") ++ ++BUILTIN(__builtin_lsx_vneg_b, "V16cV16c", "nc") ++BUILTIN(__builtin_lsx_vneg_h, "V8sV8s", "nc") ++BUILTIN(__builtin_lsx_vneg_w, "V4iV4i", "nc") ++BUILTIN(__builtin_lsx_vneg_d, "V2LLiV2LLi", "nc") ++ ++BUILTIN(__builtin_lsx_vmskgez_b, "V16cV16c", "nc") ++BUILTIN(__builtin_lsx_vmsknz_b, "V8sV8s", "nc") ++ ++BUILTIN(__builtin_lsx_vfrintrm_s, "V4fV4f", "nc") ++BUILTIN(__builtin_lsx_vfrintrm_d, "V2dV2d", "nc") ++ ++BUILTIN(__builtin_lsx_vfrintrp_s, "V4fV4f", "nc") ++BUILTIN(__builtin_lsx_vfrintrp_d, "V2dV2d", "nc") ++ ++BUILTIN(__builtin_lsx_vfrintrz_s, "V4fV4f", "nc") ++BUILTIN(__builtin_lsx_vfrintrz_d, "V2dV2d", "nc") ++ ++BUILTIN(__builtin_lsx_vfrintrne_s, "V4fV4f", "nc") ++BUILTIN(__builtin_lsx_vfrintrne_d, "V2dV2d", "nc") ++ ++BUILTIN(__builtin_lsx_vffinth_d_w, "V2dV4Si", "nc") ++BUILTIN(__builtin_lsx_vffintl_d_w, "V2dV4Si", "nc") ++ ++BUILTIN(__builtin_lsx_vftintrm_w_s, "V4SiV4f", "nc") ++BUILTIN(__builtin_lsx_vftintrm_l_d, "V2LLiV2d", "nc") ++ ++BUILTIN(__builtin_lsx_vftintrp_w_s, "V4SiV4f", "nc") ++BUILTIN(__builtin_lsx_vftintrp_l_d, "V2LLiV2d", "nc") ++ ++BUILTIN(__builtin_lsx_vftintrz_w_s, "V4SiV4f", "nc") ++BUILTIN(__builtin_lsx_vftintrz_l_d, "V2LLiV2d", "nc") ++ ++BUILTIN(__builtin_lsx_vftintrne_w_s, "V4SiV4f", "nc") ++BUILTIN(__builtin_lsx_vftintrne_l_d, "V2LLiV2d", "nc") ++ ++BUILTIN(__builtin_lsx_vftinth_l_s, "V2LLiV4f", "nc") ++BUILTIN(__builtin_lsx_vftintl_l_s, "V2LLiV4f", "nc") ++ ++BUILTIN(__builtin_lsx_vftintrmh_l_s, "V2LLiV4f", "nc") ++BUILTIN(__builtin_lsx_vftintrml_l_s, "V2LLiV4f", "nc") ++ ++BUILTIN(__builtin_lsx_vftintrph_l_s, "V2LLiV4f", "nc") ++BUILTIN(__builtin_lsx_vftintrpl_l_s, "V2LLiV4f", "nc") ++ ++BUILTIN(__builtin_lsx_vftintrzh_l_s, "V2LLiV4f", "nc") ++BUILTIN(__builtin_lsx_vftintrzl_l_s, "V2LLiV4f", "nc") ++ ++BUILTIN(__builtin_lsx_vftintrneh_l_s, "V2LLiV4f", "nc") ++BUILTIN(__builtin_lsx_vftintrnel_l_s, "V2LLiV4f", "nc") ++ ++BUILTIN(__builtin_lsx_vexth_d_w, "V2LLiV4Si", "nc") ++BUILTIN(__builtin_lsx_vexth_w_h, "V4SiV8s", "nc") ++BUILTIN(__builtin_lsx_vexth_h_b, "V8sV16c", "nc") ++BUILTIN(__builtin_lsx_vexth_q_d, "V2LLiV2LLi", "nc") ++ ++BUILTIN(__builtin_lsx_vexth_du_wu, "V2ULLiV4Ui", "nc") ++BUILTIN(__builtin_lsx_vexth_wu_hu, "V4UiV8Us", "nc") ++BUILTIN(__builtin_lsx_vexth_hu_bu, "V8UsV16Uc", "nc") ++BUILTIN(__builtin_lsx_vexth_qu_du, "V2ULLiV2ULLi", "nc") ++ ++BUILTIN(__builtin_lsx_vsllwil_d_w, "V2LLiV4SiIUi", "nc") ++BUILTIN(__builtin_lsx_vsllwil_w_h, "V4SiV8sIUi", "nc") ++BUILTIN(__builtin_lsx_vsllwil_h_b, "V8sV16cIUi", "nc") ++ ++BUILTIN(__builtin_lsx_vextl_q_d, "V2LLiV2LLi", "nc") ++ ++BUILTIN(__builtin_lsx_vsllwil_du_wu, "V2ULLiV4UiIUi", "nc") ++BUILTIN(__builtin_lsx_vsllwil_wu_hu, "V4UiV8UsIUi", "nc") ++BUILTIN(__builtin_lsx_vsllwil_hu_bu, "V8UsV16UcIUi", "nc") ++ ++BUILTIN(__builtin_lsx_vextl_qu_du, "V2LLiV2ULLi", "nc") ++ ++BUILTIN(__builtin_lsx_vbitclri_b, "V16UcV16UcIUi", "nc") ++BUILTIN(__builtin_lsx_vbitclri_h, "V8UsV8UsIUi", "nc") ++BUILTIN(__builtin_lsx_vbitclri_w, "V4UiV4UiIUi", "nc") ++BUILTIN(__builtin_lsx_vbitclri_d, "V2ULLiV2ULLiIUi", "nc") ++ ++BUILTIN(__builtin_lsx_vbitseti_b, "V16UcV16UcIUi", "nc") ++BUILTIN(__builtin_lsx_vbitseti_h, "V8UsV8UsIUi", "nc") ++BUILTIN(__builtin_lsx_vbitseti_w, "V4UiV4UiIUi", "nc") ++BUILTIN(__builtin_lsx_vbitseti_d, "V2ULLiV2ULLiIUi", "nc") ++ ++BUILTIN(__builtin_lsx_vbitrevi_b, "V16UcV16UcIUi", "nc") ++BUILTIN(__builtin_lsx_vbitrevi_h, "V8UsV8UsIUi", "nc") ++BUILTIN(__builtin_lsx_vbitrevi_w, "V4UiV4UiIUi", "nc") ++BUILTIN(__builtin_lsx_vbitrevi_d, "V2ULLiV2ULLiIUi", "nc") ++ ++BUILTIN(__builtin_lsx_vssrlrni_b_h, "V16cV16cV16cIUi", "nc") ++BUILTIN(__builtin_lsx_vssrlrni_h_w, "V8sV8sV8sIUi", "nc") ++BUILTIN(__builtin_lsx_vssrlrni_w_d, "V4iV4iV4iIUi", "nc") ++BUILTIN(__builtin_lsx_vssrlrni_d_q, "V2LLiV2LLiV2LLiIUi", "nc") ++ ++BUILTIN(__builtin_lsx_vsrani_b_h, "V16cV16cV16cIUi", "nc") ++BUILTIN(__builtin_lsx_vsrani_h_w, "V8sV8sV8sIUi", "nc") ++BUILTIN(__builtin_lsx_vsrani_w_d, "V4iV4iV4iIUi", "nc") ++BUILTIN(__builtin_lsx_vsrani_d_q, "V2LLiV2LLiV2LLiIUi", "nc") ++ ++BUILTIN(__builtin_lsx_vextrins_b, "V16cV16cV16cIUi", "nc") ++BUILTIN(__builtin_lsx_vextrins_h, "V8sV8sV8sIUi", "nc") ++BUILTIN(__builtin_lsx_vextrins_w, "V4iV4iV4iIUi", "nc") ++BUILTIN(__builtin_lsx_vextrins_d, "V2LLiV2LLiV2LLiIUi", "nc") ++ ++BUILTIN(__builtin_lsx_vbitseli_b, "V16UcV16UcV16UcIUi", "nc") ++ ++BUILTIN(__builtin_lsx_vandi_b, "V16UcV16UcIUi", "nc") ++ ++BUILTIN(__builtin_lsx_vori_b, "V16UcV16UcIUi", "nc") ++ ++BUILTIN(__builtin_lsx_vxori_b, "V16UcV16UcIUi", "nc") ++ ++BUILTIN(__builtin_lsx_vnori_b, "V16UcV16UcIUi", "nc") ++ ++BUILTIN(__builtin_lsx_vldi, "V2LLiIi", "nc") ++BUILTIN(__builtin_lsx_vrepli_b, "V16cIi", "nc") ++BUILTIN(__builtin_lsx_vrepli_h, "V8sIi", "nc") ++BUILTIN(__builtin_lsx_vrepli_w, "V4iIi", "nc") ++BUILTIN(__builtin_lsx_vrepli_d, "V2LLiIi", "nc") ++ ++BUILTIN(__builtin_lsx_vpermi_w, "V4iV4iV4iIUi", "nc") ++ ++BUILTIN(__builtin_lsx_vsadd_b, "V16ScV16ScV16Sc", "nc") ++BUILTIN(__builtin_lsx_vsadd_h, "V8SsV8SsV8Ss", "nc") ++BUILTIN(__builtin_lsx_vsadd_w, "V4SiV4SiV4Si", "nc") ++BUILTIN(__builtin_lsx_vsadd_d, "V2SLLiV2SLLiV2SLLi", "nc") ++ ++BUILTIN(__builtin_lsx_vssub_b, "V16ScV16ScV16Sc", "nc") ++BUILTIN(__builtin_lsx_vssub_h, "V8SsV8SsV8Ss", "nc") ++BUILTIN(__builtin_lsx_vssub_w, "V4SiV4SiV4Si", "nc") ++BUILTIN(__builtin_lsx_vssub_d, "V2SLLiV2SLLiV2SLLi", "nc") ++ ++BUILTIN(__builtin_lsx_vsadd_bu, "V16UcV16UcV16Uc", "nc") ++BUILTIN(__builtin_lsx_vsadd_hu, "V8UsV8UsV8Us", "nc") ++BUILTIN(__builtin_lsx_vsadd_wu, "V4UiV4UiV4Ui", "nc") ++BUILTIN(__builtin_lsx_vsadd_du, "V2ULLiV2ULLiV2ULLi", "nc") ++ ++BUILTIN(__builtin_lsx_vssub_bu, "V16UcV16UcV16Uc", "nc") ++BUILTIN(__builtin_lsx_vssub_hu, "V8UsV8UsV8Us", "nc") ++BUILTIN(__builtin_lsx_vssub_wu, "V4UiV4UiV4Ui", "nc") ++BUILTIN(__builtin_lsx_vssub_du, "V2ULLiV2ULLiV2ULLi", "nc") ++ ++BUILTIN(__builtin_lsx_vhaddw_h_b, "V8SsV16ScV16Sc", "nc") ++BUILTIN(__builtin_lsx_vhaddw_w_h, "V4SiV8SsV8Ss", "nc") ++BUILTIN(__builtin_lsx_vhaddw_d_w, "V2SLLiV4SiV4Si", "nc") ++ ++BUILTIN(__builtin_lsx_vhsubw_h_b, "V8SsV16ScV16Sc", "nc") ++BUILTIN(__builtin_lsx_vhsubw_w_h, "V4SiV8SsV8Ss", "nc") ++BUILTIN(__builtin_lsx_vhsubw_d_w, "V2SLLiV4SiV4Si", "nc") ++ ++BUILTIN(__builtin_lsx_vhaddw_hu_bu, "V8UsV16UcV16Uc", "nc") ++BUILTIN(__builtin_lsx_vhaddw_wu_hu, "V4UiV8UsV8Us", "nc") ++BUILTIN(__builtin_lsx_vhaddw_du_wu, "V2ULLiV4UiV4Ui", "nc") ++ ++BUILTIN(__builtin_lsx_vhsubw_hu_bu, "V8UsV16UcV16Uc", "nc") ++BUILTIN(__builtin_lsx_vhsubw_wu_hu, "V4UiV8UsV8Us", "nc") ++BUILTIN(__builtin_lsx_vhsubw_du_wu, "V2ULLiV4UiV4Ui", "nc") ++ ++BUILTIN(__builtin_lsx_vadda_b, "V16ScV16ScV16Sc", "nc") ++BUILTIN(__builtin_lsx_vadda_h, "V8SsV8SsV8Ss", "nc") ++BUILTIN(__builtin_lsx_vadda_w, "V4SiV4SiV4Si", "nc") ++BUILTIN(__builtin_lsx_vadda_d, "V2SLLiV2SLLiV2SLLi", "nc") ++ ++BUILTIN(__builtin_lsx_vabsd_b, "V16ScV16ScV16Sc", "nc") ++BUILTIN(__builtin_lsx_vabsd_h, "V8SsV8SsV8Ss", "nc") ++BUILTIN(__builtin_lsx_vabsd_w, "V4SiV4SiV4Si", "nc") ++BUILTIN(__builtin_lsx_vabsd_d, "V2SLLiV2SLLiV2SLLi", "nc") ++ ++BUILTIN(__builtin_lsx_vabsd_bu, "V16UcV16UcV16Uc", "nc") ++BUILTIN(__builtin_lsx_vabsd_hu, "V8UsV8UsV8Us", "nc") ++BUILTIN(__builtin_lsx_vabsd_wu, "V4UiV4UiV4Ui", "nc") ++BUILTIN(__builtin_lsx_vabsd_du, "V2ULLiV2ULLiV2ULLi", "nc") ++ ++BUILTIN(__builtin_lsx_vavg_b, "V16ScV16ScV16Sc", "nc") ++BUILTIN(__builtin_lsx_vavg_h, "V8SsV8SsV8Ss", "nc") ++BUILTIN(__builtin_lsx_vavg_w, "V4SiV4SiV4Si", "nc") ++BUILTIN(__builtin_lsx_vavg_d, "V2SLLiV2SLLiV2SLLi", "nc") ++ ++BUILTIN(__builtin_lsx_vavg_bu, "V16UcV16UcV16Uc", "nc") ++BUILTIN(__builtin_lsx_vavg_hu, "V8UsV8UsV8Us", "nc") ++BUILTIN(__builtin_lsx_vavg_wu, "V4UiV4UiV4Ui", "nc") ++BUILTIN(__builtin_lsx_vavg_du, "V2ULLiV2ULLiV2ULLi", "nc") ++ ++BUILTIN(__builtin_lsx_vavgr_b, "V16ScV16ScV16Sc", "nc") ++BUILTIN(__builtin_lsx_vavgr_h, "V8SsV8SsV8Ss", "nc") ++BUILTIN(__builtin_lsx_vavgr_w, "V4SiV4SiV4Si", "nc") ++BUILTIN(__builtin_lsx_vavgr_d, "V2SLLiV2SLLiV2SLLi", "nc") ++ ++BUILTIN(__builtin_lsx_vavgr_bu, "V16UcV16UcV16Uc", "nc") ++BUILTIN(__builtin_lsx_vavgr_hu, "V8UsV8UsV8Us", "nc") ++BUILTIN(__builtin_lsx_vavgr_wu, "V4UiV4UiV4Ui", "nc") ++BUILTIN(__builtin_lsx_vavgr_du, "V2ULLiV2ULLiV2ULLi", "nc") ++ ++BUILTIN(__builtin_lsx_vsrlr_b, "V16cV16cV16c", "nc") ++BUILTIN(__builtin_lsx_vsrlr_h, "V8sV8sV8s", "nc") ++BUILTIN(__builtin_lsx_vsrlr_w, "V4iV4iV4i", "nc") ++BUILTIN(__builtin_lsx_vsrlr_d, "V2LLiV2LLiV2LLi", "nc") ++ ++BUILTIN(__builtin_lsx_vsrar_b, "V16cV16cV16c", "nc") ++BUILTIN(__builtin_lsx_vsrar_h, "V8sV8sV8s", "nc") ++BUILTIN(__builtin_lsx_vsrar_w, "V4iV4iV4i", "nc") ++BUILTIN(__builtin_lsx_vsrar_d, "V2LLiV2LLiV2LLi", "nc") ++ ++BUILTIN(__builtin_lsx_vfmax_s, "V4fV4fV4f", "nc") ++BUILTIN(__builtin_lsx_vfmax_d, "V2dV2dV2d", "nc") ++ ++BUILTIN(__builtin_lsx_vfmin_s, "V4fV4fV4f", "nc") ++BUILTIN(__builtin_lsx_vfmin_d, "V2dV2dV2d", "nc") ++ ++BUILTIN(__builtin_lsx_vfmaxa_s, "V4fV4fV4f", "nc") ++BUILTIN(__builtin_lsx_vfmaxa_d, "V2dV2dV2d", "nc") ++ ++BUILTIN(__builtin_lsx_vfmina_s, "V4fV4fV4f", "nc") ++BUILTIN(__builtin_lsx_vfmina_d, "V2dV2dV2d", "nc") ++ ++BUILTIN(__builtin_lsx_vfclass_s, "V4iV4f", "nc") ++BUILTIN(__builtin_lsx_vfclass_d, "V2LLiV2d", "nc") ++ ++BUILTIN(__builtin_lsx_vfrecip_s, "V4fV4f", "nc") ++BUILTIN(__builtin_lsx_vfrecip_d, "V2dV2d", "nc") ++ ++BUILTIN(__builtin_lsx_vfrecipe_s, "V4fV4f", "nc") ++BUILTIN(__builtin_lsx_vfrecipe_d, "V2dV2d", "nc") ++ ++BUILTIN(__builtin_lsx_vfrsqrt_s, "V4fV4f", "nc") ++BUILTIN(__builtin_lsx_vfrsqrt_d, "V2dV2d", "nc") ++ ++BUILTIN(__builtin_lsx_vfrsqrte_s, "V4fV4f", "nc") ++BUILTIN(__builtin_lsx_vfrsqrte_d, "V2dV2d", "nc") ++ ++BUILTIN(__builtin_lsx_vfcvtl_s_h, "V4fV8s", "nc") ++BUILTIN(__builtin_lsx_vfcvtl_d_s, "V2dV4f", "nc") ++ ++BUILTIN(__builtin_lsx_vfcvth_s_h, "V4fV8s", "nc") ++BUILTIN(__builtin_lsx_vfcvth_d_s, "V2dV4f", "nc") ++ ++BUILTIN(__builtin_lsx_vftint_w_s, "V4SiV4f", "nc") ++BUILTIN(__builtin_lsx_vftint_l_d, "V2SLLiV2d", "nc") ++ ++BUILTIN(__builtin_lsx_vftint_wu_s, "V4UiV4f", "nc") ++BUILTIN(__builtin_lsx_vftint_lu_d, "V2ULLiV2d", "nc") ++ ++BUILTIN(__builtin_lsx_vsrlri_b, "V16cV16cIUi", "nc") ++BUILTIN(__builtin_lsx_vsrlri_h, "V8sV8sIUi", "nc") ++BUILTIN(__builtin_lsx_vsrlri_w, "V4iV4iIUi", "nc") ++BUILTIN(__builtin_lsx_vsrlri_d, "V2LLiV2LLiIUi", "nc") ++ ++BUILTIN(__builtin_lsx_vsrari_b, "V16cV16cIUi", "nc") ++BUILTIN(__builtin_lsx_vsrari_h, "V8sV8sIUi", "nc") ++BUILTIN(__builtin_lsx_vsrari_w, "V4iV4iIUi", "nc") ++BUILTIN(__builtin_lsx_vsrari_d, "V2LLiV2LLiIUi", "nc") ++ ++BUILTIN(__builtin_lsx_vsat_b, "V16ScV16ScIUi", "nc") ++BUILTIN(__builtin_lsx_vsat_h, "V8SsV8SsIUi", "nc") ++BUILTIN(__builtin_lsx_vsat_w, "V4SiV4SiIUi", "nc") ++BUILTIN(__builtin_lsx_vsat_d, "V2SLLiV2SLLiIUi", "nc") ++ ++BUILTIN(__builtin_lsx_vsat_bu, "V16UcV16UcIUi", "nc") ++BUILTIN(__builtin_lsx_vsat_hu, "V8UsV8UsIUi", "nc") ++BUILTIN(__builtin_lsx_vsat_wu, "V4UiV4UiIUi", "nc") ++BUILTIN(__builtin_lsx_vsat_du, "V2ULLiV2ULLiIUi", "nc") ++ ++BUILTIN(__builtin_lsx_vsrlni_b_h, "V16cV16cV16cIUi", "nc") ++BUILTIN(__builtin_lsx_vsrlni_h_w, "V8sV8sV8sIUi", "nc") ++BUILTIN(__builtin_lsx_vsrlni_w_d, "V4iV4iV4iIUi", "nc") ++BUILTIN(__builtin_lsx_vsrlni_d_q, "V2LLiV2LLiV2LLiIUi", "nc") ++ ++BUILTIN(__builtin_lsx_vsrlrni_b_h, "V16cV16cV16cIUi", "nc") ++BUILTIN(__builtin_lsx_vsrlrni_h_w, "V8sV8sV8sIUi", "nc") ++BUILTIN(__builtin_lsx_vsrlrni_w_d, "V4iV4iV4iIUi", "nc") ++BUILTIN(__builtin_lsx_vsrlrni_d_q, "V2LLiV2LLiV2LLiIUi", "nc") ++ ++BUILTIN(__builtin_lsx_vssrlni_b_h, "V16cV16cV16cIi", "nc") ++BUILTIN(__builtin_lsx_vssrlni_h_w, "V8sV8sV8sIi", "nc") ++BUILTIN(__builtin_lsx_vssrlni_w_d, "V4iV4iV4iIi", "nc") ++BUILTIN(__builtin_lsx_vssrlni_d_q, "V2LLiV2LLiV2LLiIi", "nc") ++ ++BUILTIN(__builtin_lsx_vssrlrni_bu_h, "V16cV16cV16cIi", "nc") ++BUILTIN(__builtin_lsx_vssrlrni_hu_w, "V8sV8sV8sIi", "nc") ++BUILTIN(__builtin_lsx_vssrlrni_wu_d, "V4iV4iV4iIi", "nc") ++BUILTIN(__builtin_lsx_vssrlrni_du_q, "V2LLiV2LLiV2LLiIi", "nc") ++ ++BUILTIN(__builtin_lsx_vsrarni_b_h, "V16cV16cV16cIi", "nc") ++BUILTIN(__builtin_lsx_vsrarni_h_w, "V8sV8sV8sIi", "nc") ++BUILTIN(__builtin_lsx_vsrarni_w_d, "V4iV4iV4iIi", "nc") ++BUILTIN(__builtin_lsx_vsrarni_d_q, "V2LLiV2LLiV2LLiIi", "nc") ++ ++BUILTIN(__builtin_lsx_vssrani_b_h, "V16cV16cV16cIi", "nc") ++BUILTIN(__builtin_lsx_vssrani_h_w, "V8sV8sV8sIi", "nc") ++BUILTIN(__builtin_lsx_vssrani_w_d, "V4iV4iV4iIi", "nc") ++BUILTIN(__builtin_lsx_vssrani_d_q, "V2LLiV2LLiV2LLiIi", "nc") ++ ++BUILTIN(__builtin_lsx_vssrani_bu_h, "V16cV16cV16cIi", "nc") ++BUILTIN(__builtin_lsx_vssrani_hu_w, "V8sV8sV8sIi", "nc") ++BUILTIN(__builtin_lsx_vssrani_wu_d, "V4iV4iV4iIi", "nc") ++BUILTIN(__builtin_lsx_vssrani_du_q, "V2LLiV2LLiV2LLiIi", "nc") ++ ++BUILTIN(__builtin_lsx_vssrarni_b_h, "V16cV16cV16cIi", "nc") ++BUILTIN(__builtin_lsx_vssrarni_h_w, "V8sV8sV8sIi", "nc") ++BUILTIN(__builtin_lsx_vssrarni_w_d, "V4iV4iV4iIi", "nc") ++BUILTIN(__builtin_lsx_vssrarni_d_q, "V2LLiV2LLiV2LLiIi", "nc") ++ ++BUILTIN(__builtin_lsx_vssrarni_bu_h, "V16cV16cV16cIi", "nc") ++BUILTIN(__builtin_lsx_vssrarni_hu_w, "V8sV8sV8sIi", "nc") ++BUILTIN(__builtin_lsx_vssrarni_wu_d, "V4iV4iV4iIi", "nc") ++BUILTIN(__builtin_lsx_vssrarni_du_q, "V2LLiV2LLiV2LLiIi", "nc") ++ ++BUILTIN(__builtin_lsx_vssrlni_bu_h, "V16cV16cV16cIi", "nc") ++BUILTIN(__builtin_lsx_vssrlni_hu_w, "V8sV8sV8sIi", "nc") ++BUILTIN(__builtin_lsx_vssrlni_wu_d, "V4iV4iV4iIi", "nc") ++BUILTIN(__builtin_lsx_vssrlni_du_q, "V2LLiV2LLiV2LLiIi", "nc") ++ ++BUILTIN(__builtin_lsx_vseq_b, "V16ScV16ScV16Sc", "nc") ++BUILTIN(__builtin_lsx_vseq_h, "V8SsV8SsV8Ss", "nc") ++BUILTIN(__builtin_lsx_vseq_w, "V4SiV4SiV4Si", "nc") ++BUILTIN(__builtin_lsx_vseq_d, "V2SLLiV2SLLiV2SLLi", "nc") ++ ++BUILTIN(__builtin_lsx_vsle_b, "V16ScV16ScV16Sc", "nc") ++BUILTIN(__builtin_lsx_vsle_h, "V8SsV8SsV8Ss", "nc") ++BUILTIN(__builtin_lsx_vsle_w, "V4SiV4SiV4Si", "nc") ++BUILTIN(__builtin_lsx_vsle_d, "V2SLLiV2SLLiV2SLLi", "nc") ++ ++BUILTIN(__builtin_lsx_vsle_bu, "V16ScV16UcV16Uc", "nc") ++BUILTIN(__builtin_lsx_vsle_hu, "V8SsV8UsV8Us", "nc") ++BUILTIN(__builtin_lsx_vsle_wu, "V4SiV4UiV4Ui", "nc") ++BUILTIN(__builtin_lsx_vsle_du, "V2SLLiV2ULLiV2ULLi", "nc") ++ ++BUILTIN(__builtin_lsx_vslt_b, "V16ScV16ScV16Sc", "nc") ++BUILTIN(__builtin_lsx_vslt_h, "V8SsV8SsV8Ss", "nc") ++BUILTIN(__builtin_lsx_vslt_w, "V4SiV4SiV4Si", "nc") ++BUILTIN(__builtin_lsx_vslt_d, "V2SLLiV2SLLiV2SLLi", "nc") ++ ++BUILTIN(__builtin_lsx_vslt_bu, "V16ScV16UcV16Uc", "nc") ++BUILTIN(__builtin_lsx_vslt_hu, "V8SsV8UsV8Us", "nc") ++BUILTIN(__builtin_lsx_vslt_wu, "V4SiV4UiV4Ui", "nc") ++BUILTIN(__builtin_lsx_vslt_du, "V2SLLiV2ULLiV2ULLi", "nc") ++ ++BUILTIN(__builtin_lsx_vadd_b, "V16cV16cV16c", "nc") ++BUILTIN(__builtin_lsx_vadd_h, "V8sV8sV8s", "nc") ++BUILTIN(__builtin_lsx_vadd_w, "V4iV4iV4i", "nc") ++BUILTIN(__builtin_lsx_vadd_d, "V2LLiV2LLiV2LLi", "nc") ++ ++BUILTIN(__builtin_lsx_vsub_b, "V16cV16cV16c", "nc") ++BUILTIN(__builtin_lsx_vsub_h, "V8sV8sV8s", "nc") ++BUILTIN(__builtin_lsx_vsub_w, "V4iV4iV4i", "nc") ++BUILTIN(__builtin_lsx_vsub_d, "V2LLiV2LLiV2LLi", "nc") ++ ++BUILTIN(__builtin_lsx_vmax_b, "V16ScV16ScV16Sc", "nc") ++BUILTIN(__builtin_lsx_vmax_h, "V8SsV8SsV8Ss", "nc") ++BUILTIN(__builtin_lsx_vmax_w, "V4SiV4SiV4Si", "nc") ++BUILTIN(__builtin_lsx_vmax_d, "V2SLLiV2SLLiV2SLLi", "nc") ++ ++BUILTIN(__builtin_lsx_vmin_b, "V16ScV16ScV16Sc", "nc") ++BUILTIN(__builtin_lsx_vmin_h, "V8SsV8SsV8Ss", "nc") ++BUILTIN(__builtin_lsx_vmin_w, "V4SiV4SiV4Si", "nc") ++BUILTIN(__builtin_lsx_vmin_d, "V2SLLiV2SLLiV2SLLi", "nc") ++ ++BUILTIN(__builtin_lsx_vmax_bu, "V16UcV16UcV16Uc", "nc") ++BUILTIN(__builtin_lsx_vmax_hu, "V8UsV8UsV8Us", "nc") ++BUILTIN(__builtin_lsx_vmax_wu, "V4UiV4UiV4Ui", "nc") ++BUILTIN(__builtin_lsx_vmax_du, "V2ULLiV2ULLiV2ULLi", "nc") ++ ++BUILTIN(__builtin_lsx_vmin_bu, "V16UcV16UcV16Uc", "nc") ++BUILTIN(__builtin_lsx_vmin_hu, "V8UsV8UsV8Us", "nc") ++BUILTIN(__builtin_lsx_vmin_wu, "V4UiV4UiV4Ui", "nc") ++BUILTIN(__builtin_lsx_vmin_du, "V2ULLiV2ULLiV2ULLi", "nc") ++ ++BUILTIN(__builtin_lsx_vmul_b, "V16ScV16ScV16Sc", "nc") ++BUILTIN(__builtin_lsx_vmul_h, "V8SsV8SsV8Ss", "nc") ++BUILTIN(__builtin_lsx_vmul_w, "V4SiV4SiV4Si", "nc") ++BUILTIN(__builtin_lsx_vmul_d, "V2SLLiV2SLLiV2SLLi", "nc") ++ ++BUILTIN(__builtin_lsx_vmadd_b, "V16ScV16ScV16ScV16Sc", "nc") ++BUILTIN(__builtin_lsx_vmadd_h, "V8SsV8SsV8SsV8Ss", "nc") ++BUILTIN(__builtin_lsx_vmadd_w, "V4SiV4SiV4SiV4Si", "nc") ++BUILTIN(__builtin_lsx_vmadd_d, "V2SLLiV2SLLiV2SLLiV2SLLi", "nc") ++ ++BUILTIN(__builtin_lsx_vmsub_b, "V16ScV16ScV16ScV16Sc", "nc") ++BUILTIN(__builtin_lsx_vmsub_h, "V8SsV8SsV8SsV8Ss", "nc") ++BUILTIN(__builtin_lsx_vmsub_w, "V4SiV4SiV4SiV4Si", "nc") ++BUILTIN(__builtin_lsx_vmsub_d, "V2SLLiV2SLLiV2SLLiV2SLLi", "nc") ++ ++BUILTIN(__builtin_lsx_vdiv_b, "V16ScV16ScV16Sc", "nc") ++BUILTIN(__builtin_lsx_vdiv_h, "V8SsV8SsV8Ss", "nc") ++BUILTIN(__builtin_lsx_vdiv_w, "V4SiV4SiV4Si", "nc") ++BUILTIN(__builtin_lsx_vdiv_d, "V2SLLiV2SLLiV2SLLi", "nc") ++ ++BUILTIN(__builtin_lsx_vmod_b, "V16ScV16ScV16Sc", "nc") ++BUILTIN(__builtin_lsx_vmod_h, "V8SsV8SsV8Ss", "nc") ++BUILTIN(__builtin_lsx_vmod_w, "V4SiV4SiV4Si", "nc") ++BUILTIN(__builtin_lsx_vmod_d, "V2SLLiV2SLLiV2SLLi", "nc") ++ ++BUILTIN(__builtin_lsx_vdiv_bu, "V16UcV16UcV16Uc", "nc") ++BUILTIN(__builtin_lsx_vdiv_hu, "V8UsV8UsV8Us", "nc") ++BUILTIN(__builtin_lsx_vdiv_wu, "V4UiV4UiV4Ui", "nc") ++BUILTIN(__builtin_lsx_vdiv_du, "V2ULLiV2ULLiV2ULLi", "nc") ++ ++BUILTIN(__builtin_lsx_vsll_b, "V16cV16cV16c", "nc") ++BUILTIN(__builtin_lsx_vsll_h, "V8sV8sV8s", "nc") ++BUILTIN(__builtin_lsx_vsll_w, "V4iV4iV4i", "nc") ++BUILTIN(__builtin_lsx_vsll_d, "V2LLiV2LLiV2LLi", "nc") ++ ++BUILTIN(__builtin_lsx_vsrl_b, "V16cV16cV16c", "nc") ++BUILTIN(__builtin_lsx_vsrl_h, "V8sV8sV8s", "nc") ++BUILTIN(__builtin_lsx_vsrl_w, "V4iV4iV4i", "nc") ++BUILTIN(__builtin_lsx_vsrl_d, "V2LLiV2LLiV2LLi", "nc") ++ ++BUILTIN(__builtin_lsx_vbitclr_b, "V16UcV16UcV16Uc", "nc") ++BUILTIN(__builtin_lsx_vbitclr_h, "V8UsV8UsV8Us", "nc") ++BUILTIN(__builtin_lsx_vbitclr_w, "V4UiV4UiV4Ui", "nc") ++BUILTIN(__builtin_lsx_vbitclr_d, "V2ULLiV2ULLiV2ULLi", "nc") ++ ++BUILTIN(__builtin_lsx_vbitset_b, "V16UcV16UcV16Uc", "nc") ++BUILTIN(__builtin_lsx_vbitset_h, "V8UsV8UsV8Us", "nc") ++BUILTIN(__builtin_lsx_vbitset_w, "V4UiV4UiV4Ui", "nc") ++BUILTIN(__builtin_lsx_vbitset_d, "V2ULLiV2ULLiV2ULLi", "nc") ++ ++BUILTIN(__builtin_lsx_vpackev_b, "V16cV16cV16c", "nc") ++BUILTIN(__builtin_lsx_vpackev_h, "V8sV8sV8s", "nc") ++BUILTIN(__builtin_lsx_vpackev_w, "V4iV4iV4i", "nc") ++BUILTIN(__builtin_lsx_vpackev_d, "V2LLiV2LLiV2LLi", "nc") ++ ++BUILTIN(__builtin_lsx_vpackod_b, "V16cV16cV16c", "nc") ++BUILTIN(__builtin_lsx_vpackod_h, "V8sV8sV8s", "nc") ++BUILTIN(__builtin_lsx_vpackod_w, "V4iV4iV4i", "nc") ++BUILTIN(__builtin_lsx_vpackod_d, "V2LLiV2LLiV2LLi", "nc") ++ ++BUILTIN(__builtin_lsx_vilvl_b, "V16cV16cV16c", "nc") ++BUILTIN(__builtin_lsx_vilvl_h, "V8sV8sV8s", "nc") ++BUILTIN(__builtin_lsx_vilvl_w, "V4iV4iV4i", "nc") ++BUILTIN(__builtin_lsx_vilvl_d, "V2LLiV2LLiV2LLi", "nc") ++ ++BUILTIN(__builtin_lsx_vilvh_b, "V16cV16cV16c", "nc") ++BUILTIN(__builtin_lsx_vilvh_h, "V8sV8sV8s", "nc") ++BUILTIN(__builtin_lsx_vilvh_w, "V4iV4iV4i", "nc") ++BUILTIN(__builtin_lsx_vilvh_d, "V2LLiV2LLiV2LLi", "nc") ++ ++BUILTIN(__builtin_lsx_vpickev_b, "V16cV16cV16c", "nc") ++BUILTIN(__builtin_lsx_vpickev_h, "V8sV8sV8s", "nc") ++BUILTIN(__builtin_lsx_vpickev_w, "V4iV4iV4i", "nc") ++BUILTIN(__builtin_lsx_vpickev_d, "V2LLiV2LLiV2LLi", "nc") ++ ++BUILTIN(__builtin_lsx_vand_v, "V16UcV16UcV16Uc", "nc") ++ ++BUILTIN(__builtin_lsx_vor_v, "V16UcV16UcV16Uc", "nc") ++ ++BUILTIN(__builtin_lsx_vbitrev_b, "V16UcV16UcV16Uc", "nc") ++BUILTIN(__builtin_lsx_vbitrev_h, "V8UsV8UsV8Us", "nc") ++BUILTIN(__builtin_lsx_vbitrev_w, "V4UiV4UiV4Ui", "nc") ++BUILTIN(__builtin_lsx_vbitrev_d, "V2ULLiV2ULLiV2ULLi", "nc") ++ ++BUILTIN(__builtin_lsx_vmod_bu, "V16UcV16UcV16Uc", "nc") ++BUILTIN(__builtin_lsx_vmod_hu, "V8UsV8UsV8Us", "nc") ++BUILTIN(__builtin_lsx_vmod_wu, "V4UiV4UiV4Ui", "nc") ++BUILTIN(__builtin_lsx_vmod_du, "V2ULLiV2ULLiV2ULLi", "nc") ++ ++BUILTIN(__builtin_lsx_vpickod_b, "V16cV16cV16c", "nc") ++BUILTIN(__builtin_lsx_vpickod_h, "V8sV8sV8s", "nc") ++BUILTIN(__builtin_lsx_vpickod_w, "V4iV4iV4i", "nc") ++BUILTIN(__builtin_lsx_vpickod_d, "V2LLiV2LLiV2LLi", "nc") ++ ++BUILTIN(__builtin_lsx_vreplve_b, "V16cV16cUi", "nc") ++BUILTIN(__builtin_lsx_vreplve_h, "V8sV8sUi", "nc") ++BUILTIN(__builtin_lsx_vreplve_w, "V4iV4iUi", "nc") ++BUILTIN(__builtin_lsx_vreplve_d, "V2LLiV2LLiUi", "nc") ++ ++BUILTIN(__builtin_lsx_vsra_b, "V16cV16cV16c", "nc") ++BUILTIN(__builtin_lsx_vsra_h, "V8sV8sV8s", "nc") ++BUILTIN(__builtin_lsx_vsra_w, "V4iV4iV4i", "nc") ++BUILTIN(__builtin_lsx_vsra_d, "V2LLiV2LLiV2LLi", "nc") ++ ++BUILTIN(__builtin_lsx_vxor_v, "V16cV16cV16c", "nc") ++ ++BUILTIN(__builtin_lsx_vnor_v, "V16UcV16UcV16Uc", "nc") ++ ++BUILTIN(__builtin_lsx_vfadd_s, "V4fV4fV4f", "nc") ++BUILTIN(__builtin_lsx_vfadd_d, "V2dV2dV2d", "nc") ++ ++BUILTIN(__builtin_lsx_vfsub_s, "V4fV4fV4f", "nc") ++BUILTIN(__builtin_lsx_vfsub_d, "V2dV2dV2d", "nc") ++ ++BUILTIN(__builtin_lsx_vfmul_s, "V4fV4fV4f", "nc") ++BUILTIN(__builtin_lsx_vfmul_d, "V2dV2dV2d", "nc") ++ ++BUILTIN(__builtin_lsx_vshuf_h, "V8sV8sV8sV8s", "nc") ++BUILTIN(__builtin_lsx_vshuf_w, "V4iV4iV4iV4i", "nc") ++BUILTIN(__builtin_lsx_vshuf_d, "V2LLiV2LLiV2LLiV2LLi", "nc") ++ ++BUILTIN(__builtin_lsx_vseqi_b, "V16ScV16ScISi", "nc") ++BUILTIN(__builtin_lsx_vseqi_h, "V8SsV8SsISi", "nc") ++BUILTIN(__builtin_lsx_vseqi_w, "V4SiV4SiISi", "nc") ++BUILTIN(__builtin_lsx_vseqi_d, "V2SLLiV2SLLiISi", "nc") ++ ++BUILTIN(__builtin_lsx_vslei_b, "V16ScV16ScISi", "nc") ++BUILTIN(__builtin_lsx_vslei_h, "V8SsV8SsISi", "nc") ++BUILTIN(__builtin_lsx_vslei_w, "V4SiV4SiISi", "nc") ++BUILTIN(__builtin_lsx_vslei_d, "V2SLLiV2SLLiISi", "nc") ++ ++BUILTIN(__builtin_lsx_vslei_bu, "V16ScV16UcIUi", "nc") ++BUILTIN(__builtin_lsx_vslei_hu, "V8SsV8UsIUi", "nc") ++BUILTIN(__builtin_lsx_vslei_wu, "V4SiV4UiIUi", "nc") ++BUILTIN(__builtin_lsx_vslei_du, "V2SLLiV2ULLiIUi", "nc") ++ ++BUILTIN(__builtin_lsx_vslti_b, "V16ScV16ScISi", "nc") ++BUILTIN(__builtin_lsx_vslti_h, "V8SsV8SsISi", "nc") ++BUILTIN(__builtin_lsx_vslti_w, "V4SiV4SiISi", "nc") ++BUILTIN(__builtin_lsx_vslti_d, "V2SLLiV2SLLiISi", "nc") ++ ++BUILTIN(__builtin_lsx_vslti_bu, "V16ScV16UcIUi", "nc") ++BUILTIN(__builtin_lsx_vslti_hu, "V8SsV8UsIUi", "nc") ++BUILTIN(__builtin_lsx_vslti_wu, "V4SiV4UiIUi", "nc") ++BUILTIN(__builtin_lsx_vslti_du, "V2SLLiV2ULLiIUi", "nc") ++ ++BUILTIN(__builtin_lsx_vaddi_bu, "V16cV16cIUi", "nc") ++BUILTIN(__builtin_lsx_vaddi_hu, "V8sV8sIUi", "nc") ++BUILTIN(__builtin_lsx_vaddi_wu, "V4iV4iIUi", "nc") ++BUILTIN(__builtin_lsx_vaddi_du, "V2LLiV2LLiIUi", "nc") ++ ++BUILTIN(__builtin_lsx_vsubi_bu, "V16cV16cIUi", "nc") ++BUILTIN(__builtin_lsx_vsubi_hu, "V8sV8sIUi", "nc") ++BUILTIN(__builtin_lsx_vsubi_wu, "V4iV4iIUi", "nc") ++BUILTIN(__builtin_lsx_vsubi_du, "V2LLiV2LLiIUi", "nc") ++ ++BUILTIN(__builtin_lsx_vmaxi_b, "V16ScV16ScIi", "nc") ++BUILTIN(__builtin_lsx_vmaxi_h, "V8SsV8SsIi", "nc") ++BUILTIN(__builtin_lsx_vmaxi_w, "V4SiV4SiIi", "nc") ++BUILTIN(__builtin_lsx_vmaxi_d, "V2SLLiV2SLLiIi", "nc") ++ ++BUILTIN(__builtin_lsx_vmini_b, "V16ScV16ScIi", "nc") ++BUILTIN(__builtin_lsx_vmini_h, "V8SsV8SsIi", "nc") ++BUILTIN(__builtin_lsx_vmini_w, "V4SiV4SiIi", "nc") ++BUILTIN(__builtin_lsx_vmini_d, "V2SLLiV2SLLiIi", "nc") ++ ++BUILTIN(__builtin_lsx_vmaxi_bu, "V16UcV16UcIi", "nc") ++BUILTIN(__builtin_lsx_vmaxi_hu, "V8UsV8UsIi", "nc") ++BUILTIN(__builtin_lsx_vmaxi_wu, "V4UiV4UiIi", "nc") ++BUILTIN(__builtin_lsx_vmaxi_du, "V2ULLiV2ULLiIi", "nc") ++ ++BUILTIN(__builtin_lsx_vmini_bu, "V16UcV16UcIi", "nc") ++BUILTIN(__builtin_lsx_vmini_hu, "V8UsV8UsIi", "nc") ++BUILTIN(__builtin_lsx_vmini_wu, "V4UiV4UiIi", "nc") ++BUILTIN(__builtin_lsx_vmini_du, "V2ULLiV2ULLiIi", "nc") ++ ++BUILTIN(__builtin_lsx_vclz_b, "V16ScV16Sc", "nc") ++BUILTIN(__builtin_lsx_vclz_h, "V8SsV8Ss", "nc") ++BUILTIN(__builtin_lsx_vclz_w, "V4SiV4Si", "nc") ++BUILTIN(__builtin_lsx_vclz_d, "V2SLLiV2SLLi", "nc") ++ ++BUILTIN(__builtin_lsx_vpcnt_b, "V16ScV16Sc", "nc") ++BUILTIN(__builtin_lsx_vpcnt_h, "V8SsV8Ss", "nc") ++BUILTIN(__builtin_lsx_vpcnt_w, "V4SiV4Si", "nc") ++BUILTIN(__builtin_lsx_vpcnt_d, "V2SLLiV2SLLi", "nc") ++ ++BUILTIN(__builtin_lsx_vfsqrt_s, "V4fV4f", "nc") ++BUILTIN(__builtin_lsx_vfsqrt_d, "V2dV2d", "nc") ++ ++BUILTIN(__builtin_lsx_vfrint_s, "V4fV4f", "nc") ++BUILTIN(__builtin_lsx_vfrint_d, "V2dV2d", "nc") ++ ++BUILTIN(__builtin_lsx_vffint_s_w, "V4fV4Si", "nc") ++BUILTIN(__builtin_lsx_vffint_d_l, "V2dV2SLLi", "nc") ++ ++BUILTIN(__builtin_lsx_vffint_s_wu, "V4fV4Ui", "nc") ++BUILTIN(__builtin_lsx_vffint_d_lu, "V2dV2ULLi", "nc") ++ ++BUILTIN(__builtin_lsx_vftintrz_wu_s, "V4UiV4f", "nc") ++BUILTIN(__builtin_lsx_vftintrz_lu_d, "V2ULLiV2d", "nc") ++ ++BUILTIN(__builtin_lsx_vreplgr2vr_b, "V16Sci", "nc") ++BUILTIN(__builtin_lsx_vreplgr2vr_h, "V8Ssi", "nc") ++BUILTIN(__builtin_lsx_vreplgr2vr_w, "V4Sii", "nc") ++BUILTIN(__builtin_lsx_vreplgr2vr_d, "V2SLLiLLi", "nc") ++ ++BUILTIN(__builtin_lsx_vinsgr2vr_b, "V16ScV16SciIUi", "nc") ++BUILTIN(__builtin_lsx_vinsgr2vr_h, "V8SsV8SsiIUi", "nc") ++BUILTIN(__builtin_lsx_vinsgr2vr_w, "V4SiV4SiiIUi", "nc") ++BUILTIN(__builtin_lsx_vinsgr2vr_d, "V2SLLiV2SLLiLLiIUi", "nc") ++ ++BUILTIN(__builtin_lsx_vfdiv_s, "V4fV4fV4f", "nc") ++BUILTIN(__builtin_lsx_vfdiv_d, "V2dV2dV2d", "nc") ++ ++BUILTIN(__builtin_lsx_vslli_b, "V16cV16cIUi", "nc") ++BUILTIN(__builtin_lsx_vslli_h, "V8sV8sIUi", "nc") ++BUILTIN(__builtin_lsx_vslli_w, "V4iV4iIUi", "nc") ++BUILTIN(__builtin_lsx_vslli_d, "V2LLiV2LLiIUi", "nc") ++ ++BUILTIN(__builtin_lsx_vsrli_b, "V16cV16cIUi", "nc") ++BUILTIN(__builtin_lsx_vsrli_h, "V8sV8sIUi", "nc") ++BUILTIN(__builtin_lsx_vsrli_w, "V4iV4iIUi", "nc") ++BUILTIN(__builtin_lsx_vsrli_d, "V2LLiV2LLiIUi", "nc") ++ ++BUILTIN(__builtin_lsx_vsrai_b, "V16cV16cIUi", "nc") ++BUILTIN(__builtin_lsx_vsrai_h, "V8sV8sIUi", "nc") ++BUILTIN(__builtin_lsx_vsrai_w, "V4iV4iIUi", "nc") ++BUILTIN(__builtin_lsx_vsrai_d, "V2LLiV2LLiIUi", "nc") ++ ++BUILTIN(__builtin_lsx_vshuf4i_b, "V16cV16cIUi", "nc") ++BUILTIN(__builtin_lsx_vshuf4i_h, "V8sV8sIUi", "nc") ++BUILTIN(__builtin_lsx_vshuf4i_w, "V4iV4iIUi", "nc") ++BUILTIN(__builtin_lsx_vshuf4i_d, "V2LLiV2LLiV2LLiIUi", "nc") ++ ++BUILTIN(__builtin_lsx_vrotr_b, "V16cV16cV16c", "nc") ++BUILTIN(__builtin_lsx_vrotr_h, "V8sV8sV8s", "nc") ++BUILTIN(__builtin_lsx_vrotr_w, "V4iV4iV4i", "nc") ++BUILTIN(__builtin_lsx_vrotr_d, "V2LLiV2LLiV2LLi", "nc") ++ ++BUILTIN(__builtin_lsx_vrotri_b, "V16cV16cIi", "nc") ++BUILTIN(__builtin_lsx_vrotri_h, "V8sV8sIi", "nc") ++BUILTIN(__builtin_lsx_vrotri_w, "V4iV4iIi", "nc") ++BUILTIN(__builtin_lsx_vrotri_d, "V2LLiV2LLiIi", "nc") ++ ++BUILTIN(__builtin_lsx_vld, "V16ScvC*Ii", "nc") ++ ++BUILTIN(__builtin_lsx_vst, "vV16Scv*Ii", "nc") ++ ++BUILTIN(__builtin_lsx_bz_v, "iV16Uc", "nc") ++ ++BUILTIN(__builtin_lsx_bnz_v, "iV16Uc", "nc") ++ ++BUILTIN(__builtin_lsx_bz_b, "iV16Uc", "nc") ++BUILTIN(__builtin_lsx_bz_h, "iV8Us", "nc") ++BUILTIN(__builtin_lsx_bz_w, "iV4Ui", "nc") ++BUILTIN(__builtin_lsx_bz_d, "iV2ULLi", "nc") ++ ++BUILTIN(__builtin_lsx_bnz_b, "iV16Uc", "nc") ++BUILTIN(__builtin_lsx_bnz_h, "iV8Us", "nc") ++BUILTIN(__builtin_lsx_bnz_w, "iV4Ui", "nc") ++BUILTIN(__builtin_lsx_bnz_d, "iV2ULLi", "nc") ++ ++//LoongArch LASX ++ ++BUILTIN(__builtin_lasx_xvfmadd_s, "V8fV8fV8fV8f", "nc") ++BUILTIN(__builtin_lasx_xvfmadd_d, "V4dV4dV4dV4d", "nc") ++ ++BUILTIN(__builtin_lasx_xvfmsub_s, "V8fV8fV8fV8f", "nc") ++BUILTIN(__builtin_lasx_xvfmsub_d, "V4dV4dV4dV4d", "nc") ++ ++BUILTIN(__builtin_lasx_xvfnmadd_s, "V8fV8fV8fV8f", "nc") ++BUILTIN(__builtin_lasx_xvfnmadd_d, "V4dV4dV4dV4d", "nc") ++ ++BUILTIN(__builtin_lasx_xvfnmsub_s, "V8fV8fV8fV8f", "nc") ++BUILTIN(__builtin_lasx_xvfnmsub_d, "V4dV4dV4dV4d", "nc") ++ ++ ++BUILTIN(__builtin_lasx_xvsll_b, "V32cV32cV32c", "nc") ++BUILTIN(__builtin_lasx_xvsll_h, "V16sV16sV16s", "nc") ++BUILTIN(__builtin_lasx_xvsll_w, "V8iV8iV8i", "nc") ++BUILTIN(__builtin_lasx_xvsll_d, "V4LLiV4LLiV4LLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvslli_b, "V32cV32cIUi", "nc") ++BUILTIN(__builtin_lasx_xvslli_h, "V16sV16sIUi", "nc") ++BUILTIN(__builtin_lasx_xvslli_w, "V8iV8iIUi", "nc") ++BUILTIN(__builtin_lasx_xvslli_d, "V4LLiV4LLiIUi", "nc") ++ ++BUILTIN(__builtin_lasx_xvsra_b, "V32cV32cV32c", "nc") ++BUILTIN(__builtin_lasx_xvsra_h, "V16sV16sV16s", "nc") ++BUILTIN(__builtin_lasx_xvsra_w, "V8iV8iV8i", "nc") ++BUILTIN(__builtin_lasx_xvsra_d, "V4LLiV4LLiV4LLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvsrai_b, "V32cV32cIUi", "nc") ++BUILTIN(__builtin_lasx_xvsrai_h, "V16sV16sIUi", "nc") ++BUILTIN(__builtin_lasx_xvsrai_w, "V8iV8iIUi", "nc") ++BUILTIN(__builtin_lasx_xvsrai_d, "V4LLiV4LLiIUi", "nc") ++ ++BUILTIN(__builtin_lasx_xvsrar_b, "V32cV32cV32c", "nc") ++BUILTIN(__builtin_lasx_xvsrar_h, "V16sV16sV16s", "nc") ++BUILTIN(__builtin_lasx_xvsrar_w, "V8iV8iV8i", "nc") ++BUILTIN(__builtin_lasx_xvsrar_d, "V4LLiV4LLiV4LLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvsrari_b, "V32cV32cIUi", "nc") ++BUILTIN(__builtin_lasx_xvsrari_h, "V16sV16sIUi", "nc") ++BUILTIN(__builtin_lasx_xvsrari_w, "V8iV8iIUi", "nc") ++BUILTIN(__builtin_lasx_xvsrari_d, "V4LLiV4LLiIUi", "nc") ++ ++BUILTIN(__builtin_lasx_xvsrl_b, "V32cV32cV32c", "nc") ++BUILTIN(__builtin_lasx_xvsrl_h, "V16sV16sV16s", "nc") ++BUILTIN(__builtin_lasx_xvsrl_w, "V8iV8iV8i", "nc") ++BUILTIN(__builtin_lasx_xvsrl_d, "V4LLiV4LLiV4LLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvsrli_b, "V32cV32cIUi", "nc") ++BUILTIN(__builtin_lasx_xvsrli_h, "V16sV16sIUi", "nc") ++BUILTIN(__builtin_lasx_xvsrli_w, "V8iV8iIUi", "nc") ++BUILTIN(__builtin_lasx_xvsrli_d, "V4LLiV4LLiIUi", "nc") ++ ++BUILTIN(__builtin_lasx_xvsrlr_b, "V32cV32cV32c", "nc") ++BUILTIN(__builtin_lasx_xvsrlr_h, "V16sV16sV16s", "nc") ++BUILTIN(__builtin_lasx_xvsrlr_w, "V8iV8iV8i", "nc") ++BUILTIN(__builtin_lasx_xvsrlr_d, "V4LLiV4LLiV4LLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvsrlri_b, "V32cV32cIUi", "nc") ++BUILTIN(__builtin_lasx_xvsrlri_h, "V16sV16sIUi", "nc") ++BUILTIN(__builtin_lasx_xvsrlri_w, "V8iV8iIUi", "nc") ++BUILTIN(__builtin_lasx_xvsrlri_d, "V4LLiV4LLiIUi", "nc") ++ ++BUILTIN(__builtin_lasx_xvbitclr_b, "V32UcV32UcV32Uc", "nc") ++BUILTIN(__builtin_lasx_xvbitclr_h, "V16UsV16UsV16Us", "nc") ++BUILTIN(__builtin_lasx_xvbitclr_w, "V8UiV8UiV8Ui", "nc") ++BUILTIN(__builtin_lasx_xvbitclr_d, "V4ULLiV4ULLiV4ULLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvbitclri_b, "V32UcV32UcIUi", "nc") ++BUILTIN(__builtin_lasx_xvbitclri_h, "V16UsV16UsIUi", "nc") ++BUILTIN(__builtin_lasx_xvbitclri_w, "V8UiV8UiIUi", "nc") ++BUILTIN(__builtin_lasx_xvbitclri_d, "V4ULLiV4ULLiIUi", "nc") ++ ++BUILTIN(__builtin_lasx_xvbitset_b, "V32UcV32UcV32Uc", "nc") ++BUILTIN(__builtin_lasx_xvbitset_h, "V16UsV16UsV16Us", "nc") ++BUILTIN(__builtin_lasx_xvbitset_w, "V8UiV8UiV8Ui", "nc") ++BUILTIN(__builtin_lasx_xvbitset_d, "V4ULLiV4ULLiV4ULLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvbitseti_b, "V32UcV32UcIUi", "nc") ++BUILTIN(__builtin_lasx_xvbitseti_h, "V16UsV16UsIUi", "nc") ++BUILTIN(__builtin_lasx_xvbitseti_w, "V8UiV8UiIUi", "nc") ++BUILTIN(__builtin_lasx_xvbitseti_d, "V4ULLiV4ULLiIUi", "nc") ++ ++BUILTIN(__builtin_lasx_xvbitrev_b, "V32UcV32UcV32Uc", "nc") ++BUILTIN(__builtin_lasx_xvbitrev_h, "V16UsV16UsV16Us", "nc") ++BUILTIN(__builtin_lasx_xvbitrev_w, "V8UiV8UiV8Ui", "nc") ++BUILTIN(__builtin_lasx_xvbitrev_d, "V4ULLiV4ULLiV4ULLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvbitrevi_b, "V32UcV32UcIUi", "nc") ++BUILTIN(__builtin_lasx_xvbitrevi_h, "V16UsV16UsIUi", "nc") ++BUILTIN(__builtin_lasx_xvbitrevi_w, "V8UiV8UiIUi", "nc") ++BUILTIN(__builtin_lasx_xvbitrevi_d, "V4ULLiV4ULLiIUi", "nc") ++ ++BUILTIN(__builtin_lasx_xvadd_b, "V32cV32cV32c", "nc") ++BUILTIN(__builtin_lasx_xvadd_h, "V16sV16sV16s", "nc") ++BUILTIN(__builtin_lasx_xvadd_w, "V8iV8iV8i", "nc") ++BUILTIN(__builtin_lasx_xvadd_d, "V4LLiV4LLiV4LLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvaddi_bu, "V32cV32cIUi", "nc") ++BUILTIN(__builtin_lasx_xvaddi_hu, "V16sV16sIUi", "nc") ++BUILTIN(__builtin_lasx_xvaddi_wu, "V8iV8iIUi", "nc") ++BUILTIN(__builtin_lasx_xvaddi_du, "V4LLiV4LLiIUi", "nc") ++ ++BUILTIN(__builtin_lasx_xvsub_b, "V32cV32cV32c", "nc") ++BUILTIN(__builtin_lasx_xvsub_h, "V16sV16sV16s", "nc") ++BUILTIN(__builtin_lasx_xvsub_w, "V8iV8iV8i", "nc") ++BUILTIN(__builtin_lasx_xvsub_d, "V4LLiV4LLiV4LLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvsubi_bu, "V32cV32cIUi", "nc") ++BUILTIN(__builtin_lasx_xvsubi_hu, "V16sV16sIUi", "nc") ++BUILTIN(__builtin_lasx_xvsubi_wu, "V8iV8iIUi", "nc") ++BUILTIN(__builtin_lasx_xvsubi_du, "V4LLiV4LLiIUi", "nc") ++ ++BUILTIN(__builtin_lasx_xvmax_b, "V32ScV32ScV32Sc", "nc") ++BUILTIN(__builtin_lasx_xvmax_h, "V16SsV16SsV16Ss", "nc") ++BUILTIN(__builtin_lasx_xvmax_w, "V8SiV8SiV8Si", "nc") ++BUILTIN(__builtin_lasx_xvmax_d, "V4SLLiV4SLLiV4SLLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvmaxi_b, "V32ScV32ScIi", "nc") ++BUILTIN(__builtin_lasx_xvmaxi_h, "V16SsV16SsIi", "nc") ++BUILTIN(__builtin_lasx_xvmaxi_w, "V8SiV8SiIi", "nc") ++BUILTIN(__builtin_lasx_xvmaxi_d, "V4SLLiV4SLLiIi", "nc") ++ ++BUILTIN(__builtin_lasx_xvmax_bu, "V32UcV32UcV32Uc", "nc") ++BUILTIN(__builtin_lasx_xvmax_hu, "V16UsV16UsV16Us", "nc") ++BUILTIN(__builtin_lasx_xvmax_wu, "V8UiV8UiV8Ui", "nc") ++BUILTIN(__builtin_lasx_xvmax_du, "V4ULLiV4ULLiV4ULLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvmaxi_bu, "V32UcV32UcIi", "nc") ++BUILTIN(__builtin_lasx_xvmaxi_hu, "V16UsV16UsIi", "nc") ++BUILTIN(__builtin_lasx_xvmaxi_wu, "V8UiV8UiIi", "nc") ++BUILTIN(__builtin_lasx_xvmaxi_du, "V4ULLiV4ULLiIi", "nc") ++ ++BUILTIN(__builtin_lasx_xvmin_b, "V32ScV32ScV32Sc", "nc") ++BUILTIN(__builtin_lasx_xvmin_h, "V16SsV16SsV16Ss", "nc") ++BUILTIN(__builtin_lasx_xvmin_w, "V8SiV8SiV8Si", "nc") ++BUILTIN(__builtin_lasx_xvmin_d, "V4SLLiV4SLLiV4SLLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvmini_b, "V32ScV32ScIi", "nc") ++BUILTIN(__builtin_lasx_xvmini_h, "V16SsV16SsIi", "nc") ++BUILTIN(__builtin_lasx_xvmini_w, "V8SiV8SiIi", "nc") ++BUILTIN(__builtin_lasx_xvmini_d, "V4SLLiV4SLLiIi", "nc") ++ ++BUILTIN(__builtin_lasx_xvmin_bu, "V32UcV32UcV32Uc", "nc") ++BUILTIN(__builtin_lasx_xvmin_hu, "V16UsV16UsV16Us", "nc") ++BUILTIN(__builtin_lasx_xvmin_wu, "V8UiV8UiV8Ui", "nc") ++BUILTIN(__builtin_lasx_xvmin_du, "V4ULLiV4ULLiV4ULLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvmini_bu, "V32UcV32UcIi", "nc") ++BUILTIN(__builtin_lasx_xvmini_hu, "V16UsV16UsIi", "nc") ++BUILTIN(__builtin_lasx_xvmini_wu, "V8UiV8UiIi", "nc") ++BUILTIN(__builtin_lasx_xvmini_du, "V4ULLiV4ULLiIi", "nc") ++ ++BUILTIN(__builtin_lasx_xvseq_b, "V32ScV32ScV32Sc", "nc") ++BUILTIN(__builtin_lasx_xvseq_h, "V16SsV16SsV16Ss", "nc") ++BUILTIN(__builtin_lasx_xvseq_w, "V8SiV8SiV8Si", "nc") ++BUILTIN(__builtin_lasx_xvseq_d, "V4SLLiV4SLLiV4SLLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvseqi_b, "V32ScV32ScISi", "nc") ++BUILTIN(__builtin_lasx_xvseqi_h, "V16SsV16SsISi", "nc") ++BUILTIN(__builtin_lasx_xvseqi_w, "V8SiV8SiISi", "nc") ++BUILTIN(__builtin_lasx_xvseqi_d, "V4SLLiV4SLLiISi", "nc") ++ ++BUILTIN(__builtin_lasx_xvslt_b, "V32ScV32ScV32Sc", "nc") ++BUILTIN(__builtin_lasx_xvslt_h, "V16SsV16SsV16Ss", "nc") ++BUILTIN(__builtin_lasx_xvslt_w, "V8SiV8SiV8Si", "nc") ++BUILTIN(__builtin_lasx_xvslt_d, "V4SLLiV4SLLiV4SLLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvslti_b, "V32ScV32ScISi", "nc") ++BUILTIN(__builtin_lasx_xvslti_h, "V16SsV16SsISi", "nc") ++BUILTIN(__builtin_lasx_xvslti_w, "V8SiV8SiISi", "nc") ++BUILTIN(__builtin_lasx_xvslti_d, "V4SLLiV4SLLiISi", "nc") ++ ++BUILTIN(__builtin_lasx_xvslt_bu, "V32ScV32UcV32Uc", "nc") ++BUILTIN(__builtin_lasx_xvslt_hu, "V16SsV16UsV16Us", "nc") ++BUILTIN(__builtin_lasx_xvslt_wu, "V8SiV8UiV8Ui", "nc") ++BUILTIN(__builtin_lasx_xvslt_du, "V4SLLiV4ULLiV4ULLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvslti_bu, "V32ScV32UcIUi", "nc") ++BUILTIN(__builtin_lasx_xvslti_hu, "V16SsV16UsIUi", "nc") ++BUILTIN(__builtin_lasx_xvslti_wu, "V8SiV8UiIUi", "nc") ++BUILTIN(__builtin_lasx_xvslti_du, "V4SLLiV4ULLiIUi", "nc") ++ ++BUILTIN(__builtin_lasx_xvsle_b, "V32ScV32ScV32Sc", "nc") ++BUILTIN(__builtin_lasx_xvsle_h, "V16SsV16SsV16Ss", "nc") ++BUILTIN(__builtin_lasx_xvsle_w, "V8SiV8SiV8Si", "nc") ++BUILTIN(__builtin_lasx_xvsle_d, "V4SLLiV4SLLiV4SLLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvslei_b, "V32ScV32ScISi", "nc") ++BUILTIN(__builtin_lasx_xvslei_h, "V16SsV16SsISi", "nc") ++BUILTIN(__builtin_lasx_xvslei_w, "V8SiV8SiISi", "nc") ++BUILTIN(__builtin_lasx_xvslei_d, "V4SLLiV4SLLiISi", "nc") ++ ++BUILTIN(__builtin_lasx_xvsle_bu, "V32ScV32UcV32Uc", "nc") ++BUILTIN(__builtin_lasx_xvsle_hu, "V16SsV16UsV16Us", "nc") ++BUILTIN(__builtin_lasx_xvsle_wu, "V8SiV8UiV8Ui", "nc") ++BUILTIN(__builtin_lasx_xvsle_du, "V4SLLiV4ULLiV4ULLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvslei_bu, "V32ScV32UcIUi", "nc") ++BUILTIN(__builtin_lasx_xvslei_hu, "V16SsV16UsIUi", "nc") ++BUILTIN(__builtin_lasx_xvslei_wu, "V8SiV8UiIUi", "nc") ++BUILTIN(__builtin_lasx_xvslei_du, "V4SLLiV4ULLiIUi", "nc") ++ ++BUILTIN(__builtin_lasx_xvsat_b, "V32ScV32ScIUi", "nc") ++BUILTIN(__builtin_lasx_xvsat_h, "V16SsV16SsIUi", "nc") ++BUILTIN(__builtin_lasx_xvsat_w, "V8SiV8SiIUi", "nc") ++BUILTIN(__builtin_lasx_xvsat_d, "V4SLLiV4SLLiIUi", "nc") ++ ++BUILTIN(__builtin_lasx_xvsat_bu, "V32UcV32UcIUi", "nc") ++BUILTIN(__builtin_lasx_xvsat_hu, "V16UsV16UsIUi", "nc") ++BUILTIN(__builtin_lasx_xvsat_wu, "V8UiV8UiIUi", "nc") ++BUILTIN(__builtin_lasx_xvsat_du, "V4ULLiV4ULLiIUi", "nc") ++ ++BUILTIN(__builtin_lasx_xvadda_b, "V32ScV32ScV32Sc", "nc") ++BUILTIN(__builtin_lasx_xvadda_h, "V16SsV16SsV16Ss", "nc") ++BUILTIN(__builtin_lasx_xvadda_w, "V8SiV8SiV8Si", "nc") ++BUILTIN(__builtin_lasx_xvadda_d, "V4SLLiV4SLLiV4SLLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvsadd_b, "V32ScV32ScV32Sc", "nc") ++BUILTIN(__builtin_lasx_xvsadd_h, "V16SsV16SsV16Ss", "nc") ++BUILTIN(__builtin_lasx_xvsadd_w, "V8SiV8SiV8Si", "nc") ++BUILTIN(__builtin_lasx_xvsadd_d, "V4SLLiV4SLLiV4SLLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvsadd_bu, "V32UcV32UcV32Uc", "nc") ++BUILTIN(__builtin_lasx_xvsadd_hu, "V16UsV16UsV16Us", "nc") ++BUILTIN(__builtin_lasx_xvsadd_wu, "V8UiV8UiV8Ui", "nc") ++BUILTIN(__builtin_lasx_xvsadd_du, "V4ULLiV4ULLiV4ULLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvavg_b, "V32ScV32ScV32Sc", "nc") ++BUILTIN(__builtin_lasx_xvavg_h, "V16SsV16SsV16Ss", "nc") ++BUILTIN(__builtin_lasx_xvavg_w, "V8SiV8SiV8Si", "nc") ++BUILTIN(__builtin_lasx_xvavg_d, "V4SLLiV4SLLiV4SLLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvavg_bu, "V32UcV32UcV32Uc", "nc") ++BUILTIN(__builtin_lasx_xvavg_hu, "V16UsV16UsV16Us", "nc") ++BUILTIN(__builtin_lasx_xvavg_wu, "V8UiV8UiV8Ui", "nc") ++BUILTIN(__builtin_lasx_xvavg_du, "V4ULLiV4ULLiV4ULLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvavgr_b, "V32ScV32ScV32Sc", "nc") ++BUILTIN(__builtin_lasx_xvavgr_h, "V16SsV16SsV16Ss", "nc") ++BUILTIN(__builtin_lasx_xvavgr_w, "V8SiV8SiV8Si", "nc") ++BUILTIN(__builtin_lasx_xvavgr_d, "V4SLLiV4SLLiV4SLLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvavgr_bu, "V32UcV32UcV32Uc", "nc") ++BUILTIN(__builtin_lasx_xvavgr_hu, "V16UsV16UsV16Us", "nc") ++BUILTIN(__builtin_lasx_xvavgr_wu, "V8UiV8UiV8Ui", "nc") ++BUILTIN(__builtin_lasx_xvavgr_du, "V4ULLiV4ULLiV4ULLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvssub_b, "V32ScV32ScV32Sc", "nc") ++BUILTIN(__builtin_lasx_xvssub_h, "V16SsV16SsV16Ss", "nc") ++BUILTIN(__builtin_lasx_xvssub_w, "V8SiV8SiV8Si", "nc") ++BUILTIN(__builtin_lasx_xvssub_d, "V4SLLiV4SLLiV4SLLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvssub_bu, "V32UcV32UcV32Uc", "nc") ++BUILTIN(__builtin_lasx_xvssub_hu, "V16UsV16UsV16Us", "nc") ++BUILTIN(__builtin_lasx_xvssub_wu, "V8UiV8UiV8Ui", "nc") ++BUILTIN(__builtin_lasx_xvssub_du, "V4ULLiV4ULLiV4ULLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvabsd_b, "V32ScV32ScV32Sc", "nc") ++BUILTIN(__builtin_lasx_xvabsd_h, "V16SsV16SsV16Ss", "nc") ++BUILTIN(__builtin_lasx_xvabsd_w, "V8SiV8SiV8Si", "nc") ++BUILTIN(__builtin_lasx_xvabsd_d, "V4SLLiV4SLLiV4SLLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvabsd_bu, "V32UcV32UcV32Uc", "nc") ++BUILTIN(__builtin_lasx_xvabsd_hu, "V16UsV16UsV16Us", "nc") ++BUILTIN(__builtin_lasx_xvabsd_wu, "V8UiV8UiV8Ui", "nc") ++BUILTIN(__builtin_lasx_xvabsd_du, "V4ULLiV4ULLiV4ULLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvmul_b, "V32ScV32ScV32Sc", "nc") ++BUILTIN(__builtin_lasx_xvmul_h, "V16SsV16SsV16Ss", "nc") ++BUILTIN(__builtin_lasx_xvmul_w, "V8SiV8SiV8Si", "nc") ++BUILTIN(__builtin_lasx_xvmul_d, "V4SLLiV4SLLiV4SLLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvmadd_b, "V32ScV32ScV32ScV32Sc", "nc") ++BUILTIN(__builtin_lasx_xvmadd_h, "V16SsV16SsV16SsV16Ss", "nc") ++BUILTIN(__builtin_lasx_xvmadd_w, "V8SiV8SiV8SiV8Si", "nc") ++BUILTIN(__builtin_lasx_xvmadd_d, "V4SLLiV4SLLiV4SLLiV4SLLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvmsub_b, "V32ScV32ScV32ScV32Sc", "nc") ++BUILTIN(__builtin_lasx_xvmsub_h, "V16SsV16SsV16SsV16Ss", "nc") ++BUILTIN(__builtin_lasx_xvmsub_w, "V8SiV8SiV8SiV8Si", "nc") ++BUILTIN(__builtin_lasx_xvmsub_d, "V4SLLiV4SLLiV4SLLiV4SLLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvdiv_b, "V32ScV32ScV32Sc", "nc") ++BUILTIN(__builtin_lasx_xvdiv_h, "V16SsV16SsV16Ss", "nc") ++BUILTIN(__builtin_lasx_xvdiv_w, "V8SiV8SiV8Si", "nc") ++BUILTIN(__builtin_lasx_xvdiv_d, "V4SLLiV4SLLiV4SLLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvdiv_bu, "V32UcV32UcV32Uc", "nc") ++BUILTIN(__builtin_lasx_xvdiv_hu, "V16UsV16UsV16Us", "nc") ++BUILTIN(__builtin_lasx_xvdiv_wu, "V8UiV8UiV8Ui", "nc") ++BUILTIN(__builtin_lasx_xvdiv_du, "V4ULLiV4ULLiV4ULLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvhaddw_h_b, "V16SsV32ScV32Sc", "nc") ++BUILTIN(__builtin_lasx_xvhaddw_w_h, "V8SiV16SsV16Ss", "nc") ++BUILTIN(__builtin_lasx_xvhaddw_d_w, "V4SLLiV8SiV8Si", "nc") ++ ++BUILTIN(__builtin_lasx_xvhaddw_hu_bu, "V16UsV32UcV32Uc", "nc") ++BUILTIN(__builtin_lasx_xvhaddw_wu_hu, "V8UiV16UsV16Us", "nc") ++BUILTIN(__builtin_lasx_xvhaddw_du_wu, "V4ULLiV8UiV8Ui", "nc") ++ ++BUILTIN(__builtin_lasx_xvhsubw_h_b, "V16SsV32ScV32Sc", "nc") ++BUILTIN(__builtin_lasx_xvhsubw_w_h, "V8SiV16SsV16Ss", "nc") ++BUILTIN(__builtin_lasx_xvhsubw_d_w, "V4SLLiV8SiV8Si", "nc") ++ ++BUILTIN(__builtin_lasx_xvhsubw_hu_bu, "V16UsV32UcV32Uc", "nc") ++BUILTIN(__builtin_lasx_xvhsubw_wu_hu, "V8UiV16UsV16Us", "nc") ++BUILTIN(__builtin_lasx_xvhsubw_du_wu, "V4ULLiV8UiV8Ui", "nc") ++ ++BUILTIN(__builtin_lasx_xvmod_b, "V32ScV32ScV32Sc", "nc") ++BUILTIN(__builtin_lasx_xvmod_h, "V16SsV16SsV16Ss", "nc") ++BUILTIN(__builtin_lasx_xvmod_w, "V8SiV8SiV8Si", "nc") ++BUILTIN(__builtin_lasx_xvmod_d, "V4SLLiV4SLLiV4SLLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvmod_bu, "V32UcV32UcV32Uc", "nc") ++BUILTIN(__builtin_lasx_xvmod_hu, "V16UsV16UsV16Us", "nc") ++BUILTIN(__builtin_lasx_xvmod_wu, "V8UiV8UiV8Ui", "nc") ++BUILTIN(__builtin_lasx_xvmod_du, "V4ULLiV4ULLiV4ULLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvrepl128vei_b, "V32cV32cIUi", "nc") ++BUILTIN(__builtin_lasx_xvrepl128vei_h, "V16sV16sIUi", "nc") ++BUILTIN(__builtin_lasx_xvrepl128vei_w, "V8iV8iIUi", "nc") ++BUILTIN(__builtin_lasx_xvrepl128vei_d, "V4LLiV4LLiIUi", "nc") ++ ++BUILTIN(__builtin_lasx_xvpickev_b, "V32cV32cV32c", "nc") ++BUILTIN(__builtin_lasx_xvpickev_h, "V16sV16sV16s", "nc") ++BUILTIN(__builtin_lasx_xvpickev_w, "V8iV8iV8i", "nc") ++BUILTIN(__builtin_lasx_xvpickev_d, "V4LLiV4LLiV4LLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvpickod_b, "V32cV32cV32c", "nc") ++BUILTIN(__builtin_lasx_xvpickod_h, "V16sV16sV16s", "nc") ++BUILTIN(__builtin_lasx_xvpickod_w, "V8iV8iV8i", "nc") ++BUILTIN(__builtin_lasx_xvpickod_d, "V4LLiV4LLiV4LLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvilvh_b, "V32cV32cV32c", "nc") ++BUILTIN(__builtin_lasx_xvilvh_h, "V16sV16sV16s", "nc") ++BUILTIN(__builtin_lasx_xvilvh_w, "V8iV8iV8i", "nc") ++BUILTIN(__builtin_lasx_xvilvh_d, "V4LLiV4LLiV4LLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvilvl_b, "V32cV32cV32c", "nc") ++BUILTIN(__builtin_lasx_xvilvl_h, "V16sV16sV16s", "nc") ++BUILTIN(__builtin_lasx_xvilvl_w, "V8iV8iV8i", "nc") ++BUILTIN(__builtin_lasx_xvilvl_d, "V4LLiV4LLiV4LLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvpackev_b, "V32cV32cV32c", "nc") ++BUILTIN(__builtin_lasx_xvpackev_h, "V16sV16sV16s", "nc") ++BUILTIN(__builtin_lasx_xvpackev_w, "V8iV8iV8i", "nc") ++BUILTIN(__builtin_lasx_xvpackev_d, "V4LLiV4LLiV4LLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvpackod_b, "V32cV32cV32c", "nc") ++BUILTIN(__builtin_lasx_xvpackod_h, "V16sV16sV16s", "nc") ++BUILTIN(__builtin_lasx_xvpackod_w, "V8iV8iV8i", "nc") ++BUILTIN(__builtin_lasx_xvpackod_d, "V4LLiV4LLiV4LLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvshuf_b, "V32UcV32UcV32UcV32Uc", "nc") ++BUILTIN(__builtin_lasx_xvshuf_h, "V16sV16sV16sV16s", "nc") ++BUILTIN(__builtin_lasx_xvshuf_w, "V8iV8iV8iV8i", "nc") ++BUILTIN(__builtin_lasx_xvshuf_d, "V4LLiV4LLiV4LLiV4LLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvand_v, "V32UcV32UcV32Uc", "nc") ++ ++BUILTIN(__builtin_lasx_xvandi_b, "V32UcV32UcIUi", "nc") ++ ++BUILTIN(__builtin_lasx_xvor_v, "V32UcV32UcV32Uc", "nc") ++ ++BUILTIN(__builtin_lasx_xvori_b, "V32UcV32UcIUi", "nc") ++ ++BUILTIN(__builtin_lasx_xvnor_v, "V32UcV32UcV32Uc", "nc") ++ ++BUILTIN(__builtin_lasx_xvnori_b, "V32UcV32UcIUi", "nc") ++ ++BUILTIN(__builtin_lasx_xvxor_v, "V32cV32cV32c", "nc") ++ ++BUILTIN(__builtin_lasx_xvxori_b, "V32UcV32UcIUi", "nc") ++ ++BUILTIN(__builtin_lasx_xvbitsel_v, "V32UcV32UcV32UcV32Uc", "nc") ++ ++BUILTIN(__builtin_lasx_xvbitseli_b, "V32UcV32UcV32UcIUi", "nc") ++ ++BUILTIN(__builtin_lasx_xvshuf4i_b, "V32cV32cIUi", "nc") ++BUILTIN(__builtin_lasx_xvshuf4i_h, "V16sV16sIUi", "nc") ++BUILTIN(__builtin_lasx_xvshuf4i_w, "V8iV8iIUi", "nc") ++BUILTIN(__builtin_lasx_xvshuf4i_d, "V4LLiV4LLiV4LLiIUi", "nc") ++ ++BUILTIN(__builtin_lasx_xvreplgr2vr_b, "V32Sci", "nc") ++BUILTIN(__builtin_lasx_xvreplgr2vr_h, "V16Ssi", "nc") ++BUILTIN(__builtin_lasx_xvreplgr2vr_w, "V8Sii", "nc") ++BUILTIN(__builtin_lasx_xvreplgr2vr_d, "V4SLLiLLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvpcnt_b, "V32ScV32Sc", "nc") ++BUILTIN(__builtin_lasx_xvpcnt_h, "V16SsV16Ss", "nc") ++BUILTIN(__builtin_lasx_xvpcnt_w, "V8SiV8Si", "nc") ++BUILTIN(__builtin_lasx_xvpcnt_d, "V4SLLiV4SLLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvclo_b, "V32ScV32Sc", "nc") ++BUILTIN(__builtin_lasx_xvclo_h, "V16SsV16Ss", "nc") ++BUILTIN(__builtin_lasx_xvclo_w, "V8SiV8Si", "nc") ++BUILTIN(__builtin_lasx_xvclo_d, "V4SLLiV4SLLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvclz_b, "V32ScV32Sc", "nc") ++BUILTIN(__builtin_lasx_xvclz_h, "V16SsV16Ss", "nc") ++BUILTIN(__builtin_lasx_xvclz_w, "V8SiV8Si", "nc") ++BUILTIN(__builtin_lasx_xvclz_d, "V4SLLiV4SLLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvfcmp_caf_s, "V8SiV8fV8f", "nc") ++BUILTIN(__builtin_lasx_xvfcmp_caf_d, "V4SLLiV4dV4d", "nc") ++ ++BUILTIN(__builtin_lasx_xvfcmp_cor_s, "V8SiV8fV8f", "nc") ++BUILTIN(__builtin_lasx_xvfcmp_cor_d, "V4SLLiV4dV4d", "nc") ++ ++BUILTIN(__builtin_lasx_xvfcmp_cun_s, "V8SiV8fV8f", "nc") ++BUILTIN(__builtin_lasx_xvfcmp_cun_d, "V4SLLiV4dV4d", "nc") ++ ++BUILTIN(__builtin_lasx_xvfcmp_cune_s, "V8SiV8fV8f", "nc") ++BUILTIN(__builtin_lasx_xvfcmp_cune_d, "V4SLLiV4dV4d", "nc") ++ ++BUILTIN(__builtin_lasx_xvfcmp_cueq_s, "V8SiV8fV8f", "nc") ++BUILTIN(__builtin_lasx_xvfcmp_cueq_d, "V4SLLiV4dV4d", "nc") ++ ++BUILTIN(__builtin_lasx_xvfcmp_ceq_s, "V8SiV8fV8f", "nc") ++BUILTIN(__builtin_lasx_xvfcmp_ceq_d, "V4SLLiV4dV4d", "nc") ++ ++BUILTIN(__builtin_lasx_xvfcmp_cne_s, "V8SiV8fV8f", "nc") ++BUILTIN(__builtin_lasx_xvfcmp_cne_d, "V4SLLiV4dV4d", "nc") ++ ++BUILTIN(__builtin_lasx_xvfcmp_clt_s, "V8SiV8fV8f", "nc") ++BUILTIN(__builtin_lasx_xvfcmp_clt_d, "V4SLLiV4dV4d", "nc") ++ ++BUILTIN(__builtin_lasx_xvfcmp_cult_s, "V8SiV8fV8f", "nc") ++BUILTIN(__builtin_lasx_xvfcmp_cult_d, "V4SLLiV4dV4d", "nc") ++ ++BUILTIN(__builtin_lasx_xvfcmp_cle_s, "V8SiV8fV8f", "nc") ++BUILTIN(__builtin_lasx_xvfcmp_cle_d, "V4SLLiV4dV4d", "nc") ++ ++BUILTIN(__builtin_lasx_xvfcmp_cule_s, "V8SiV8fV8f", "nc") ++BUILTIN(__builtin_lasx_xvfcmp_cule_d, "V4SLLiV4dV4d", "nc") ++ ++BUILTIN(__builtin_lasx_xvfcmp_saf_s, "V8SiV8fV8f", "nc") ++BUILTIN(__builtin_lasx_xvfcmp_saf_d, "V4SLLiV4dV4d", "nc") ++ ++BUILTIN(__builtin_lasx_xvfcmp_sor_s, "V8SiV8fV8f", "nc") ++BUILTIN(__builtin_lasx_xvfcmp_sor_d, "V4SLLiV4dV4d", "nc") ++ ++BUILTIN(__builtin_lasx_xvfcmp_sun_s, "V8SiV8fV8f", "nc") ++BUILTIN(__builtin_lasx_xvfcmp_sun_d, "V4SLLiV4dV4d", "nc") ++ ++BUILTIN(__builtin_lasx_xvfcmp_sune_s, "V8SiV8fV8f", "nc") ++BUILTIN(__builtin_lasx_xvfcmp_sune_d, "V4SLLiV4dV4d", "nc") ++ ++BUILTIN(__builtin_lasx_xvfcmp_sueq_s, "V8SiV8fV8f", "nc") ++BUILTIN(__builtin_lasx_xvfcmp_sueq_d, "V4SLLiV4dV4d", "nc") ++ ++BUILTIN(__builtin_lasx_xvfcmp_seq_s, "V8SiV8fV8f", "nc") ++BUILTIN(__builtin_lasx_xvfcmp_seq_d, "V4SLLiV4dV4d", "nc") ++ ++BUILTIN(__builtin_lasx_xvfcmp_sne_s, "V8SiV8fV8f", "nc") ++BUILTIN(__builtin_lasx_xvfcmp_sne_d, "V4SLLiV4dV4d", "nc") ++ ++BUILTIN(__builtin_lasx_xvfcmp_slt_s, "V8SiV8fV8f", "nc") ++BUILTIN(__builtin_lasx_xvfcmp_slt_d, "V4SLLiV4dV4d", "nc") ++ ++BUILTIN(__builtin_lasx_xvfcmp_sult_s, "V8SiV8fV8f", "nc") ++BUILTIN(__builtin_lasx_xvfcmp_sult_d, "V4SLLiV4dV4d", "nc") ++ ++BUILTIN(__builtin_lasx_xvfcmp_sle_s, "V8SiV8fV8f", "nc") ++BUILTIN(__builtin_lasx_xvfcmp_sle_d, "V4SLLiV4dV4d", "nc") ++ ++BUILTIN(__builtin_lasx_xvfcmp_sule_s, "V8SiV8fV8f", "nc") ++BUILTIN(__builtin_lasx_xvfcmp_sule_d, "V4SLLiV4dV4d", "nc") ++ ++BUILTIN(__builtin_lasx_xvfadd_s, "V8fV8fV8f", "nc") ++BUILTIN(__builtin_lasx_xvfadd_d, "V4dV4dV4d", "nc") ++ ++BUILTIN(__builtin_lasx_xvfsub_s, "V8fV8fV8f", "nc") ++BUILTIN(__builtin_lasx_xvfsub_d, "V4dV4dV4d", "nc") ++ ++BUILTIN(__builtin_lasx_xvfmul_s, "V8fV8fV8f", "nc") ++BUILTIN(__builtin_lasx_xvfmul_d, "V4dV4dV4d", "nc") ++ ++BUILTIN(__builtin_lasx_xvfdiv_s, "V8fV8fV8f", "nc") ++BUILTIN(__builtin_lasx_xvfdiv_d, "V4dV4dV4d", "nc") ++ ++BUILTIN(__builtin_lasx_xvfcvt_h_s, "V16sV8fV8f", "nc") ++BUILTIN(__builtin_lasx_xvfcvt_s_d, "V8fV4dV4d", "nc") ++ ++BUILTIN(__builtin_lasx_xvfmin_s, "V8fV8fV8f", "nc") ++BUILTIN(__builtin_lasx_xvfmin_d, "V4dV4dV4d", "nc") ++ ++BUILTIN(__builtin_lasx_xvfmina_s, "V8fV8fV8f", "nc") ++BUILTIN(__builtin_lasx_xvfmina_d, "V4dV4dV4d", "nc") ++ ++BUILTIN(__builtin_lasx_xvfmax_s, "V8fV8fV8f", "nc") ++BUILTIN(__builtin_lasx_xvfmax_d, "V4dV4dV4d", "nc") ++ ++BUILTIN(__builtin_lasx_xvfmaxa_s, "V8fV8fV8f", "nc") ++BUILTIN(__builtin_lasx_xvfmaxa_d, "V4dV4dV4d", "nc") ++ ++BUILTIN(__builtin_lasx_xvfclass_s, "V8iV8f", "nc") ++BUILTIN(__builtin_lasx_xvfclass_d, "V4LLiV4d", "nc") ++ ++BUILTIN(__builtin_lasx_xvfsqrt_s, "V8fV8f", "nc") ++BUILTIN(__builtin_lasx_xvfsqrt_d, "V4dV4d", "nc") ++ ++BUILTIN(__builtin_lasx_xvfrecip_s, "V8fV8f", "nc") ++BUILTIN(__builtin_lasx_xvfrecip_d, "V4dV4d", "nc") ++ ++BUILTIN(__builtin_lasx_xvfrecipe_s, "V8fV8f", "nc") ++BUILTIN(__builtin_lasx_xvfrecipe_d, "V4dV4d", "nc") ++ ++BUILTIN(__builtin_lasx_xvfrint_s, "V8fV8f", "nc") ++BUILTIN(__builtin_lasx_xvfrint_d, "V4dV4d", "nc") ++ ++BUILTIN(__builtin_lasx_xvfrsqrt_s, "V8fV8f", "nc") ++BUILTIN(__builtin_lasx_xvfrsqrt_d, "V4dV4d", "nc") ++ ++BUILTIN(__builtin_lasx_xvfrsqrte_s, "V8fV8f", "nc") ++BUILTIN(__builtin_lasx_xvfrsqrte_d, "V4dV4d", "nc") ++ ++BUILTIN(__builtin_lasx_xvflogb_s, "V8fV8f", "nc") ++BUILTIN(__builtin_lasx_xvflogb_d, "V4dV4d", "nc") ++ ++BUILTIN(__builtin_lasx_xvfcvth_s_h, "V8fV16s", "nc") ++BUILTIN(__builtin_lasx_xvfcvth_d_s, "V4dV8f", "nc") ++ ++BUILTIN(__builtin_lasx_xvfcvtl_s_h, "V8fV16s", "nc") ++BUILTIN(__builtin_lasx_xvfcvtl_d_s, "V4dV8f", "nc") ++ ++BUILTIN(__builtin_lasx_xvftint_w_s, "V8SiV8f", "nc") ++BUILTIN(__builtin_lasx_xvftint_l_d, "V4SLLiV4d", "nc") ++ ++BUILTIN(__builtin_lasx_xvftint_wu_s, "V8UiV8f", "nc") ++BUILTIN(__builtin_lasx_xvftint_lu_d, "V4ULLiV4d", "nc") ++ ++BUILTIN(__builtin_lasx_xvftintrz_w_s, "V8SiV8f", "nc") ++BUILTIN(__builtin_lasx_xvftintrz_l_d, "V4LLiV4d", "nc") ++ ++BUILTIN(__builtin_lasx_xvftintrz_wu_s, "V8UiV8f", "nc") ++BUILTIN(__builtin_lasx_xvftintrz_lu_d, "V4ULLiV4d", "nc") ++ ++BUILTIN(__builtin_lasx_xvffint_s_w, "V8fV8Si", "nc") ++BUILTIN(__builtin_lasx_xvffint_d_l, "V4dV4SLLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvffint_s_wu, "V8fV8Ui", "nc") ++BUILTIN(__builtin_lasx_xvffint_d_lu, "V4dV4ULLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvreplve_b, "V32cV32cUi", "nc") ++BUILTIN(__builtin_lasx_xvreplve_h, "V16sV16sUi", "nc") ++BUILTIN(__builtin_lasx_xvreplve_w, "V8iV8iUi", "nc") ++BUILTIN(__builtin_lasx_xvreplve_d, "V4LLiV4LLiUi", "nc") ++ ++BUILTIN(__builtin_lasx_xvpermi_w, "V8iV8iV8iIUi", "nc") ++ ++BUILTIN(__builtin_lasx_xvandn_v, "V32UcV32UcV32Uc", "nc") ++ ++BUILTIN(__builtin_lasx_xvneg_b, "V32cV32c", "nc") ++BUILTIN(__builtin_lasx_xvneg_h, "V16sV16s", "nc") ++BUILTIN(__builtin_lasx_xvneg_w, "V8iV8i", "nc") ++BUILTIN(__builtin_lasx_xvneg_d, "V4LLiV4LLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvmuh_b, "V32cV32cV32c", "nc") ++BUILTIN(__builtin_lasx_xvmuh_h, "V16sV16sV16s", "nc") ++BUILTIN(__builtin_lasx_xvmuh_w, "V8iV8iV8i", "nc") ++BUILTIN(__builtin_lasx_xvmuh_d, "V4LLiV4LLiV4LLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvmuh_bu, "V32UcV32UcV32Uc", "nc") ++BUILTIN(__builtin_lasx_xvmuh_hu, "V16UsV16UsV16Us", "nc") ++BUILTIN(__builtin_lasx_xvmuh_wu, "V8UiV8UiV8Ui", "nc") ++BUILTIN(__builtin_lasx_xvmuh_du, "V4ULLiV4ULLiV4ULLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvsllwil_d_w, "V4LLiV8SiIUi", "nc") ++BUILTIN(__builtin_lasx_xvsllwil_w_h, "V8SiV16sIUi", "nc") ++BUILTIN(__builtin_lasx_xvsllwil_h_b, "V16sV32cIUi", "nc") ++ ++BUILTIN(__builtin_lasx_xvsllwil_du_wu, "V4ULLiV8UiIUi", "nc") ++BUILTIN(__builtin_lasx_xvsllwil_wu_hu, "V8UiV16UsIUi", "nc") ++BUILTIN(__builtin_lasx_xvsllwil_hu_bu, "V16UsV32UcIUi", "nc") ++ ++BUILTIN(__builtin_lasx_xvsran_b_h, "V32ScV16sV16s", "nc") ++BUILTIN(__builtin_lasx_xvsran_h_w, "V16sV8SiV8Si", "nc") ++BUILTIN(__builtin_lasx_xvsran_w_d, "V8SiV4LLiV4LLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvssran_b_h, "V32ScV16sV16s", "nc") ++BUILTIN(__builtin_lasx_xvssran_h_w, "V16sV8SiV8Si", "nc") ++BUILTIN(__builtin_lasx_xvssran_w_d, "V8SiV4LLiV4LLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvssran_bu_h, "V32UcV16UsV16Us", "nc") ++BUILTIN(__builtin_lasx_xvssran_hu_w, "V16UsV8UiV8Ui", "nc") ++BUILTIN(__builtin_lasx_xvssran_wu_d, "V8UiV4ULLiV4ULLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvsrarn_b_h, "V32ScV16sV16s", "nc") ++BUILTIN(__builtin_lasx_xvsrarn_h_w, "V16sV8SiV8Si", "nc") ++BUILTIN(__builtin_lasx_xvsrarn_w_d, "V8SiV4LLiV4LLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvssrarn_b_h, "V32ScV16sV16s", "nc") ++BUILTIN(__builtin_lasx_xvssrarn_h_w, "V16sV8SiV8Si", "nc") ++BUILTIN(__builtin_lasx_xvssrarn_w_d, "V8SiV4LLiV4LLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvssrarn_bu_h, "V32UcV16UsV16Us", "nc") ++BUILTIN(__builtin_lasx_xvssrarn_hu_w, "V16UsV8UiV8Ui", "nc") ++BUILTIN(__builtin_lasx_xvssrarn_wu_d, "V8UiV4ULLiV4ULLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvsrln_b_h, "V32ScV16sV16s", "nc") ++BUILTIN(__builtin_lasx_xvsrln_h_w, "V16sV8SiV8Si", "nc") ++BUILTIN(__builtin_lasx_xvsrln_w_d, "V8SiV4LLiV4LLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvssrln_bu_h, "V32UcV16UsV16Us", "nc") ++BUILTIN(__builtin_lasx_xvssrln_hu_w, "V16UsV8UiV8Ui", "nc") ++BUILTIN(__builtin_lasx_xvssrln_wu_d, "V8UiV4ULLiV4ULLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvsrlrn_b_h, "V32ScV16sV16s", "nc") ++BUILTIN(__builtin_lasx_xvsrlrn_h_w, "V16sV8SiV8Si", "nc") ++BUILTIN(__builtin_lasx_xvsrlrn_w_d, "V8SiV4LLiV4LLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvssrlrn_bu_h, "V32UcV16UsV16Us", "nc") ++BUILTIN(__builtin_lasx_xvssrlrn_hu_w, "V16UsV8UiV8Ui", "nc") ++BUILTIN(__builtin_lasx_xvssrlrn_wu_d, "V8UiV4ULLiV4ULLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvfrstpi_b, "V32cV32cV32cIUi", "nc") ++BUILTIN(__builtin_lasx_xvfrstpi_h, "V16sV16sV16sIUi", "nc") ++ ++BUILTIN(__builtin_lasx_xvfrstp_b, "V32ScV32ScV32ScV32Sc", "nc") ++BUILTIN(__builtin_lasx_xvfrstp_h, "V16SsV16SsV16SsV16Ss", "nc") ++ ++BUILTIN(__builtin_lasx_xvbsrl_v, "V32cV32cIUi", "nc") ++BUILTIN(__builtin_lasx_xvbsll_v, "V32cV32cIUi", "nc") ++ ++BUILTIN(__builtin_lasx_xvextrins_b, "V32cV32cV32cIUi", "nc") ++BUILTIN(__builtin_lasx_xvextrins_h, "V16sV16sV16sIUi", "nc") ++BUILTIN(__builtin_lasx_xvextrins_w, "V8iV8iV8iIUi", "nc") ++BUILTIN(__builtin_lasx_xvextrins_d, "V4LLiV4LLiV4LLiIUi", "nc") ++ ++BUILTIN(__builtin_lasx_xvmskltz_b, "V32cV32c", "nc") ++BUILTIN(__builtin_lasx_xvmskltz_h, "V16sV16s", "nc") ++BUILTIN(__builtin_lasx_xvmskltz_w, "V8iV8i", "nc") ++BUILTIN(__builtin_lasx_xvmskltz_d, "V4LLiV4LLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvsigncov_b, "V32ScV32ScV32Sc", "nc") ++BUILTIN(__builtin_lasx_xvsigncov_h, "V16SsV16SsV16Ss", "nc") ++BUILTIN(__builtin_lasx_xvsigncov_w, "V8SiV8SiV8Si", "nc") ++BUILTIN(__builtin_lasx_xvsigncov_d, "V4SLLiV4SLLiV4SLLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvftintrne_w_s, "V8SiV8f", "nc") ++BUILTIN(__builtin_lasx_xvftintrne_l_d, "V4LLiV4d", "nc") ++ ++BUILTIN(__builtin_lasx_xvftintrp_w_s, "V8SiV8f", "nc") ++BUILTIN(__builtin_lasx_xvftintrp_l_d, "V4LLiV4d", "nc") ++ ++BUILTIN(__builtin_lasx_xvftintrm_w_s, "V8SiV8f", "nc") ++BUILTIN(__builtin_lasx_xvftintrm_l_d, "V4LLiV4d", "nc") ++ ++BUILTIN(__builtin_lasx_xvftint_w_d, "V8SiV4dV4d", "nc") ++BUILTIN(__builtin_lasx_xvffint_s_l, "V8fV4LLiV4LLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvftintrz_w_d, "V8SiV4dV4d", "nc") ++BUILTIN(__builtin_lasx_xvftintrp_w_d, "V8SiV4dV4d", "nc") ++BUILTIN(__builtin_lasx_xvftintrm_w_d, "V8SiV4dV4d", "nc") ++BUILTIN(__builtin_lasx_xvftintrne_w_d, "V8SiV4dV4d", "nc") ++ ++BUILTIN(__builtin_lasx_xvftinth_l_s, "V4LLiV8f", "nc") ++BUILTIN(__builtin_lasx_xvftintl_l_s, "V4LLiV8f", "nc") ++ ++BUILTIN(__builtin_lasx_xvffinth_d_w, "V4dV8Si", "nc") ++BUILTIN(__builtin_lasx_xvffintl_d_w, "V4dV8Si", "nc") ++ ++BUILTIN(__builtin_lasx_xvftintrzh_l_s, "V4LLiV8f", "nc") ++BUILTIN(__builtin_lasx_xvftintrzl_l_s, "V4LLiV8f", "nc") ++ ++BUILTIN(__builtin_lasx_xvftintrph_l_s, "V4LLiV8f", "nc") ++BUILTIN(__builtin_lasx_xvftintrpl_l_s, "V4LLiV8f", "nc") ++ ++BUILTIN(__builtin_lasx_xvftintrmh_l_s, "V4LLiV8f", "nc") ++BUILTIN(__builtin_lasx_xvftintrml_l_s, "V4LLiV8f", "nc") ++ ++BUILTIN(__builtin_lasx_xvftintrneh_l_s, "V4LLiV8f", "nc") ++BUILTIN(__builtin_lasx_xvftintrnel_l_s, "V4LLiV8f", "nc") ++ ++BUILTIN(__builtin_lasx_xvfrintrne_s, "V8fV8f", "nc") ++BUILTIN(__builtin_lasx_xvfrintrne_d, "V4dV4d", "nc") ++ ++BUILTIN(__builtin_lasx_xvfrintrz_s, "V8fV8f", "nc") ++BUILTIN(__builtin_lasx_xvfrintrz_d, "V4dV4d", "nc") ++ ++BUILTIN(__builtin_lasx_xvfrintrp_s, "V8fV8f", "nc") ++BUILTIN(__builtin_lasx_xvfrintrp_d, "V4dV4d", "nc") ++ ++BUILTIN(__builtin_lasx_xvfrintrm_s, "V8fV8f", "nc") ++BUILTIN(__builtin_lasx_xvfrintrm_d, "V4dV4d", "nc") ++ ++BUILTIN(__builtin_lasx_xvld, "V32ScvC*Ii", "nc") ++ ++BUILTIN(__builtin_lasx_xvst, "vV32Scv*Ii", "nc") ++ ++BUILTIN(__builtin_lasx_xvstelm_b, "vV32Scv*IiUi", "nc") ++BUILTIN(__builtin_lasx_xvstelm_h, "vV16Ssv*IiUi", "nc") ++BUILTIN(__builtin_lasx_xvstelm_w, "vV8Siv*IiUi", "nc") ++BUILTIN(__builtin_lasx_xvstelm_d, "vV4SLLiv*IiUi", "nc") ++ ++BUILTIN(__builtin_lasx_xvinsve0_w, "V8iV8iV8iIUi", "nc") ++BUILTIN(__builtin_lasx_xvinsve0_d, "V4LLiV4LLiV4LLiIUi", "nc") ++ ++BUILTIN(__builtin_lasx_xvpickve_w, "V8iV8iIUi", "nc") ++BUILTIN(__builtin_lasx_xvpickve_d, "V4LLiV4LLiIUi", "nc") ++ ++BUILTIN(__builtin_lasx_xvpickve_w_f, "V8fV8fIUi", "nc") ++BUILTIN(__builtin_lasx_xvpickve_d_f, "V4dV4dIUi", "nc") ++ ++BUILTIN(__builtin_lasx_xvssrlrn_b_h, "V32ScV16sV16s", "nc") ++BUILTIN(__builtin_lasx_xvssrlrn_h_w, "V16sV8SiV8Si", "nc") ++BUILTIN(__builtin_lasx_xvssrlrn_w_d, "V8SiV4LLiV4LLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvssrln_b_h, "V32ScV16sV16s", "nc") ++BUILTIN(__builtin_lasx_xvssrln_h_w, "V16sV8SiV8Si", "nc") ++BUILTIN(__builtin_lasx_xvssrln_w_d, "V8SiV4LLiV4LLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvorn_v, "V32ScV32ScV32Sc", "nc") ++ ++BUILTIN(__builtin_lasx_xvldi, "V4LLiIi", "nc") ++BUILTIN(__builtin_lasx_xvrepli_b, "V32cIi", "nc") ++BUILTIN(__builtin_lasx_xvrepli_h, "V16sIi", "nc") ++BUILTIN(__builtin_lasx_xvrepli_w, "V8iIi", "nc") ++BUILTIN(__builtin_lasx_xvrepli_d, "V4LLiIi", "nc") ++ ++BUILTIN(__builtin_lasx_xvldx, "V32ScvC*LLi", "nc") ++BUILTIN(__builtin_lasx_xvstx, "vV32Scv*LLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvinsgr2vr_w, "V8SiV8SiiIUi", "nc") ++BUILTIN(__builtin_lasx_xvinsgr2vr_d, "V4SLLiV4SLLiLLiIUi", "nc") ++ ++BUILTIN(__builtin_lasx_xvreplve0_b, "V32ScV32Sc", "nc") ++BUILTIN(__builtin_lasx_xvreplve0_h, "V16SsV16Ss", "nc") ++BUILTIN(__builtin_lasx_xvreplve0_w, "V8SiV8Si", "nc") ++BUILTIN(__builtin_lasx_xvreplve0_d, "V4SLLiV4SLLi", "nc") ++BUILTIN(__builtin_lasx_xvreplve0_q, "V32ScV32Sc", "nc") ++ ++BUILTIN(__builtin_lasx_vext2xv_d_w, "V4LLiV8Si", "nc") ++BUILTIN(__builtin_lasx_vext2xv_w_h, "V8SiV16s", "nc") ++BUILTIN(__builtin_lasx_vext2xv_h_b, "V16sV32c", "nc") ++ ++BUILTIN(__builtin_lasx_vext2xv_d_h, "V4LLiV16s", "nc") ++BUILTIN(__builtin_lasx_vext2xv_w_b, "V8SiV32c", "nc") ++BUILTIN(__builtin_lasx_vext2xv_d_b, "V4LLiV32c", "nc") ++ ++BUILTIN(__builtin_lasx_vext2xv_du_wu, "V4LLiV8Si", "nc") ++BUILTIN(__builtin_lasx_vext2xv_wu_hu, "V8SiV16s", "nc") ++BUILTIN(__builtin_lasx_vext2xv_hu_bu, "V16sV32c", "nc") ++ ++BUILTIN(__builtin_lasx_vext2xv_du_hu, "V4LLiV16s", "nc") ++BUILTIN(__builtin_lasx_vext2xv_wu_bu, "V8SiV32c", "nc") ++BUILTIN(__builtin_lasx_vext2xv_du_bu, "V4LLiV32c", "nc") ++ ++BUILTIN(__builtin_lasx_xvpermi_q, "V32cV32cV32cIUi", "nc") ++BUILTIN(__builtin_lasx_xvpermi_d, "V4LLiV4LLiIUi", "nc") ++ ++BUILTIN(__builtin_lasx_xvperm_w, "V8iV8iV8i", "nc") ++ ++BUILTIN(__builtin_lasx_xvldrepl_b, "V32cvC*Ii", "nc") ++BUILTIN(__builtin_lasx_xvldrepl_h, "V16svC*Ii", "nc") ++BUILTIN(__builtin_lasx_xvldrepl_w, "V8ivC*Ii", "nc") ++BUILTIN(__builtin_lasx_xvldrepl_d, "V4LLivC*Ii", "nc") ++ ++BUILTIN(__builtin_lasx_xvpickve2gr_w, "iV8SiIUi", "nc") ++BUILTIN(__builtin_lasx_xvpickve2gr_d, "LLiV4SLLiIUi", "nc") ++ ++BUILTIN(__builtin_lasx_xvpickve2gr_wu, "iV8UiIUi", "nc") ++BUILTIN(__builtin_lasx_xvpickve2gr_du, "LLiV4ULLiIUi", "nc") ++ ++BUILTIN(__builtin_lasx_xvaddwev_d_w, "V4LLiV8SiV8Si", "nc") ++BUILTIN(__builtin_lasx_xvaddwev_w_h, "V8SiV16sV16s", "nc") ++BUILTIN(__builtin_lasx_xvaddwev_h_b, "V16sV32cV32c", "nc") ++BUILTIN(__builtin_lasx_xvaddwev_q_d, "V4LLiV4LLiV4LLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvaddwev_d_wu, "V4LLiV8UiV8Ui", "nc") ++BUILTIN(__builtin_lasx_xvaddwev_w_hu, "V8SiV16UsV16Us", "nc") ++BUILTIN(__builtin_lasx_xvaddwev_h_bu, "V16sV32UcV32Uc", "nc") ++BUILTIN(__builtin_lasx_xvaddwev_q_du, "V4LLiV4ULLiV4ULLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvsubwev_d_w, "V4LLiV8SiV8Si", "nc") ++BUILTIN(__builtin_lasx_xvsubwev_w_h, "V8SiV16sV16s", "nc") ++BUILTIN(__builtin_lasx_xvsubwev_h_b, "V16sV32cV32c", "nc") ++BUILTIN(__builtin_lasx_xvsubwev_q_d, "V4LLiV4LLiV4LLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvsubwev_d_wu, "V4LLiV8UiV8Ui", "nc") ++BUILTIN(__builtin_lasx_xvsubwev_w_hu, "V8SiV16UsV16Us", "nc") ++BUILTIN(__builtin_lasx_xvsubwev_h_bu, "V16sV32UcV32Uc", "nc") ++BUILTIN(__builtin_lasx_xvsubwev_q_du, "V4LLiV4ULLiV4ULLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvmulwev_d_w, "V4LLiV8SiV8Si", "nc") ++BUILTIN(__builtin_lasx_xvmulwev_w_h, "V8SiV16sV16s", "nc") ++BUILTIN(__builtin_lasx_xvmulwev_h_b, "V16sV32cV32c", "nc") ++BUILTIN(__builtin_lasx_xvmulwev_q_d, "V4LLiV4LLiV4LLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvmulwev_d_wu, "V4LLiV8UiV8Ui", "nc") ++BUILTIN(__builtin_lasx_xvmulwev_w_hu, "V8SiV16UsV16Us", "nc") ++BUILTIN(__builtin_lasx_xvmulwev_h_bu, "V16sV32UcV32Uc", "nc") ++BUILTIN(__builtin_lasx_xvmulwev_q_du, "V4LLiV4ULLiV4ULLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvaddwod_d_w, "V4LLiV8SiV8Si", "nc") ++BUILTIN(__builtin_lasx_xvaddwod_w_h, "V8SiV16sV16s", "nc") ++BUILTIN(__builtin_lasx_xvaddwod_h_b, "V16sV32cV32c", "nc") ++BUILTIN(__builtin_lasx_xvaddwod_q_d, "V4LLiV4LLiV4LLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvaddwod_d_wu, "V4LLiV8UiV8Ui", "nc") ++BUILTIN(__builtin_lasx_xvaddwod_w_hu, "V8SiV16UsV16Us", "nc") ++BUILTIN(__builtin_lasx_xvaddwod_h_bu, "V16sV32UcV32Uc", "nc") ++BUILTIN(__builtin_lasx_xvaddwod_q_du, "V4LLiV4ULLiV4ULLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvsubwod_d_w, "V4LLiV8SiV8Si", "nc") ++BUILTIN(__builtin_lasx_xvsubwod_w_h, "V8SiV16sV16s", "nc") ++BUILTIN(__builtin_lasx_xvsubwod_h_b, "V16sV32cV32c", "nc") ++BUILTIN(__builtin_lasx_xvsubwod_q_d, "V4LLiV4LLiV4LLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvsubwod_d_wu, "V4LLiV8UiV8Ui", "nc") ++BUILTIN(__builtin_lasx_xvsubwod_w_hu, "V8SiV16UsV16Us", "nc") ++BUILTIN(__builtin_lasx_xvsubwod_h_bu, "V16sV32UcV32Uc", "nc") ++BUILTIN(__builtin_lasx_xvsubwod_q_du, "V4LLiV4ULLiV4ULLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvmulwod_d_w, "V4LLiV8SiV8Si", "nc") ++BUILTIN(__builtin_lasx_xvmulwod_w_h, "V8SiV16sV16s", "nc") ++BUILTIN(__builtin_lasx_xvmulwod_h_b, "V16sV32cV32c", "nc") ++BUILTIN(__builtin_lasx_xvmulwod_q_d, "V4LLiV4LLiV4LLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvmulwod_d_wu, "V4LLiV8UiV8Ui", "nc") ++BUILTIN(__builtin_lasx_xvmulwod_w_hu, "V8SiV16UsV16Us", "nc") ++BUILTIN(__builtin_lasx_xvmulwod_h_bu, "V16sV32UcV32Uc", "nc") ++BUILTIN(__builtin_lasx_xvmulwod_q_du, "V4LLiV4ULLiV4ULLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvaddwev_d_wu_w, "V4LLiV8UiV8Si", "nc") ++BUILTIN(__builtin_lasx_xvaddwev_w_hu_h, "V8SiV16UsV16s", "nc") ++BUILTIN(__builtin_lasx_xvaddwev_h_bu_b, "V16sV32UcV32c", "nc") ++BUILTIN(__builtin_lasx_xvaddwev_q_du_d, "V4LLiV4ULLiV4LLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvmulwev_d_wu_w, "V4LLiV8UiV8Si", "nc") ++BUILTIN(__builtin_lasx_xvmulwev_w_hu_h, "V8SiV16UsV16s", "nc") ++BUILTIN(__builtin_lasx_xvmulwev_h_bu_b, "V16sV32UcV32c", "nc") ++BUILTIN(__builtin_lasx_xvmulwev_q_du_d, "V4LLiV4ULLiV4LLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvaddwod_d_wu_w, "V4LLiV8UiV8Si", "nc") ++BUILTIN(__builtin_lasx_xvaddwod_w_hu_h, "V8SiV16UsV16s", "nc") ++BUILTIN(__builtin_lasx_xvaddwod_h_bu_b, "V16sV32UcV32c", "nc") ++BUILTIN(__builtin_lasx_xvaddwod_q_du_d, "V4LLiV4ULLiV4LLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvmulwod_d_wu_w, "V4LLiV8UiV8Si", "nc") ++BUILTIN(__builtin_lasx_xvmulwod_w_hu_h, "V8SiV16UsV16s", "nc") ++BUILTIN(__builtin_lasx_xvmulwod_h_bu_b, "V16sV32UcV32c", "nc") ++BUILTIN(__builtin_lasx_xvmulwod_q_du_d, "V4LLiV4ULLiV4LLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvhaddw_q_d, "V4LLiV4LLiV4LLi", "nc") ++BUILTIN(__builtin_lasx_xvhsubw_q_d, "V4LLiV4LLiV4LLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvhaddw_qu_du, "V4ULLiV4ULLiV4ULLi", "nc") ++BUILTIN(__builtin_lasx_xvhsubw_qu_du, "V4ULLiV4ULLiV4ULLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvmaddwev_d_w, "V4LLiV4LLiV8SiV8Si", "nc") ++BUILTIN(__builtin_lasx_xvmaddwev_w_h, "V8SiV8SiV16sV16s", "nc") ++BUILTIN(__builtin_lasx_xvmaddwev_h_b, "V16sV16sV32cV32c", "nc") ++BUILTIN(__builtin_lasx_xvmaddwev_q_d, "V4LLiV4LLiV4LLiV4LLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvmaddwev_d_wu, "V4ULLiV4ULLiV8UiV8Ui", "nc") ++BUILTIN(__builtin_lasx_xvmaddwev_w_hu, "V8UiV8UiV16UsV16Us", "nc") ++BUILTIN(__builtin_lasx_xvmaddwev_h_bu, "V16UsV16UsV32UcV32Uc", "nc") ++BUILTIN(__builtin_lasx_xvmaddwev_q_du, "V4ULLiV4ULLiV4ULLiV4ULLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvmaddwod_d_w, "V4LLiV4LLiV8SiV8Si", "nc") ++BUILTIN(__builtin_lasx_xvmaddwod_w_h, "V8SiV8SiV16sV16s", "nc") ++BUILTIN(__builtin_lasx_xvmaddwod_h_b, "V16sV16sV32cV32c", "nc") ++BUILTIN(__builtin_lasx_xvmaddwod_q_d, "V4LLiV4LLiV4LLiV4LLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvmaddwod_d_wu, "V4ULLiV4ULLiV8UiV8Ui", "nc") ++BUILTIN(__builtin_lasx_xvmaddwod_w_hu, "V8UiV8UiV16UsV16Us", "nc") ++BUILTIN(__builtin_lasx_xvmaddwod_h_bu, "V16UsV16UsV32UcV32Uc", "nc") ++BUILTIN(__builtin_lasx_xvmaddwod_q_du, "V4ULLiV4ULLiV4ULLiV4ULLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvmaddwev_d_wu_w, "V4LLiV4LLiV8UiV8Si", "nc") ++BUILTIN(__builtin_lasx_xvmaddwev_w_hu_h, "V8SiV8SiV16UsV16s", "nc") ++BUILTIN(__builtin_lasx_xvmaddwev_h_bu_b, "V16sV16sV32UcV32c", "nc") ++BUILTIN(__builtin_lasx_xvmaddwev_q_du_d, "V4LLiV4LLiV4ULLiV4LLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvmaddwod_d_wu_w, "V4LLiV4LLiV8UiV8Si", "nc") ++BUILTIN(__builtin_lasx_xvmaddwod_w_hu_h, "V8SiV8SiV16UsV16s", "nc") ++BUILTIN(__builtin_lasx_xvmaddwod_h_bu_b, "V16sV16sV32UcV32c", "nc") ++BUILTIN(__builtin_lasx_xvmaddwod_q_du_d, "V4LLiV4LLiV4ULLiV4LLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvrotr_b, "V32cV32cV32c", "nc") ++BUILTIN(__builtin_lasx_xvrotr_h, "V16sV16sV16s", "nc") ++BUILTIN(__builtin_lasx_xvrotr_w, "V8iV8iV8i", "nc") ++BUILTIN(__builtin_lasx_xvrotr_d, "V4LLiV4LLiV4LLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvadd_q, "V4LLiV4LLiV4LLi", "nc") ++BUILTIN(__builtin_lasx_xvsub_q, "V4LLiV4LLiV4LLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvmskgez_b, "V32cV32c", "nc") ++BUILTIN(__builtin_lasx_xvmsknz_b, "V16sV16s", "nc") ++ ++BUILTIN(__builtin_lasx_xvexth_d_w, "V4LLiV8Si", "nc") ++BUILTIN(__builtin_lasx_xvexth_w_h, "V8SiV16s", "nc") ++BUILTIN(__builtin_lasx_xvexth_h_b, "V16sV32c", "nc") ++BUILTIN(__builtin_lasx_xvexth_q_d, "V4LLiV4LLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvexth_du_wu, "V4ULLiV8Ui", "nc") ++BUILTIN(__builtin_lasx_xvexth_wu_hu, "V8UiV16Us", "nc") ++BUILTIN(__builtin_lasx_xvexth_hu_bu, "V16UsV32Uc", "nc") ++BUILTIN(__builtin_lasx_xvexth_qu_du, "V4ULLiV4ULLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvrotri_b, "V32cV32cIi", "nc") ++BUILTIN(__builtin_lasx_xvrotri_h, "V16sV16sIi", "nc") ++BUILTIN(__builtin_lasx_xvrotri_w, "V8iV8iIi", "nc") ++BUILTIN(__builtin_lasx_xvrotri_d, "V4LLiV4LLiIi", "nc") ++ ++BUILTIN(__builtin_lasx_xvsrlni_b_h, "V32cV32cV32cIUi", "nc") ++BUILTIN(__builtin_lasx_xvsrlni_h_w, "V16sV16sV16sIUi", "nc") ++BUILTIN(__builtin_lasx_xvsrlni_w_d, "V8iV8iV8iIUi", "nc") ++BUILTIN(__builtin_lasx_xvsrlni_d_q, "V4LLiV4LLiV4LLiIUi", "nc") ++ ++BUILTIN(__builtin_lasx_xvsrlrni_b_h, "V32cV32cV32cIUi", "nc") ++BUILTIN(__builtin_lasx_xvsrlrni_h_w, "V16sV16sV16sIUi", "nc") ++BUILTIN(__builtin_lasx_xvsrlrni_w_d, "V8iV8iV8iIUi", "nc") ++BUILTIN(__builtin_lasx_xvsrlrni_d_q, "V4LLiV4LLiV4LLiIUi", "nc") ++ ++BUILTIN(__builtin_lasx_xvssrlni_b_h, "V32cV32cV32cIi", "nc") ++BUILTIN(__builtin_lasx_xvssrlni_h_w, "V16sV16sV16sIi", "nc") ++BUILTIN(__builtin_lasx_xvssrlni_w_d, "V8iV8iV8iIi", "nc") ++BUILTIN(__builtin_lasx_xvssrlni_d_q, "V4LLiV4LLiV4LLiIi", "nc") ++ ++BUILTIN(__builtin_lasx_xvssrlni_bu_h, "V32cV32cV32cIi", "nc") ++BUILTIN(__builtin_lasx_xvssrlni_hu_w, "V16sV16sV16sIi", "nc") ++BUILTIN(__builtin_lasx_xvssrlni_wu_d, "V8iV8iV8iIi", "nc") ++BUILTIN(__builtin_lasx_xvssrlni_du_q, "V4LLiV4LLiV4LLiIi", "nc") ++ ++BUILTIN(__builtin_lasx_xvssrlrni_b_h, "V32cV32cV32cIUi", "nc") ++BUILTIN(__builtin_lasx_xvssrlrni_h_w, "V16sV16sV16sIUi", "nc") ++BUILTIN(__builtin_lasx_xvssrlrni_w_d, "V8iV8iV8iIUi", "nc") ++BUILTIN(__builtin_lasx_xvssrlrni_d_q, "V4LLiV4LLiV4LLiIUi", "nc") ++ ++BUILTIN(__builtin_lasx_xvssrlrni_bu_h, "V32cV32cV32cIi", "nc") ++BUILTIN(__builtin_lasx_xvssrlrni_hu_w, "V16sV16sV16sIi", "nc") ++BUILTIN(__builtin_lasx_xvssrlrni_wu_d, "V8iV8iV8iIi", "nc") ++BUILTIN(__builtin_lasx_xvssrlrni_du_q, "V4LLiV4LLiV4LLiIi", "nc") ++ ++BUILTIN(__builtin_lasx_xvsrani_b_h, "V32cV32cV32cIUi", "nc") ++BUILTIN(__builtin_lasx_xvsrani_h_w, "V16sV16sV16sIUi", "nc") ++BUILTIN(__builtin_lasx_xvsrani_w_d, "V8iV8iV8iIUi", "nc") ++BUILTIN(__builtin_lasx_xvsrani_d_q, "V4LLiV4LLiV4LLiIUi", "nc") ++ ++BUILTIN(__builtin_lasx_xvsrarni_b_h, "V32cV32cV32cIi", "nc") ++BUILTIN(__builtin_lasx_xvsrarni_h_w, "V16sV16sV16sIi", "nc") ++BUILTIN(__builtin_lasx_xvsrarni_w_d, "V8iV8iV8iIi", "nc") ++BUILTIN(__builtin_lasx_xvsrarni_d_q, "V4LLiV4LLiV4LLiIi", "nc") ++ ++BUILTIN(__builtin_lasx_xvssrani_b_h, "V32cV32cV32cIi", "nc") ++BUILTIN(__builtin_lasx_xvssrani_h_w, "V16sV16sV16sIi", "nc") ++BUILTIN(__builtin_lasx_xvssrani_w_d, "V8iV8iV8iIi", "nc") ++BUILTIN(__builtin_lasx_xvssrani_d_q, "V4LLiV4LLiV4LLiIi", "nc") ++ ++BUILTIN(__builtin_lasx_xvssrani_bu_h, "V32cV32cV32cIi", "nc") ++BUILTIN(__builtin_lasx_xvssrani_hu_w, "V16sV16sV16sIi", "nc") ++BUILTIN(__builtin_lasx_xvssrani_wu_d, "V8iV8iV8iIi", "nc") ++BUILTIN(__builtin_lasx_xvssrani_du_q, "V4LLiV4LLiV4LLiIi", "nc") ++ ++BUILTIN(__builtin_lasx_xvssrarni_b_h, "V32cV32cV32cIi", "nc") ++BUILTIN(__builtin_lasx_xvssrarni_h_w, "V16sV16sV16sIi", "nc") ++BUILTIN(__builtin_lasx_xvssrarni_w_d, "V8iV8iV8iIi", "nc") ++BUILTIN(__builtin_lasx_xvssrarni_d_q, "V4LLiV4LLiV4LLiIi", "nc") ++ ++BUILTIN(__builtin_lasx_xvssrarni_bu_h, "V32cV32cV32cIi", "nc") ++BUILTIN(__builtin_lasx_xvssrarni_hu_w, "V16sV16sV16sIi", "nc") ++BUILTIN(__builtin_lasx_xvssrarni_wu_d, "V8iV8iV8iIi", "nc") ++BUILTIN(__builtin_lasx_xvssrarni_du_q, "V4LLiV4LLiV4LLiIi", "nc") ++ ++BUILTIN(__builtin_lasx_xbz_v, "iV32Uc", "nc") ++ ++BUILTIN(__builtin_lasx_xbnz_v, "iV32Uc", "nc") ++ ++BUILTIN(__builtin_lasx_xbz_b, "iV32Uc", "nc") ++BUILTIN(__builtin_lasx_xbz_h, "iV16Us", "nc") ++BUILTIN(__builtin_lasx_xbz_w, "iV8Ui", "nc") ++BUILTIN(__builtin_lasx_xbz_d, "iV4ULLi", "nc") ++ ++BUILTIN(__builtin_lasx_xbnz_b, "iV32Uc", "nc") ++BUILTIN(__builtin_lasx_xbnz_h, "iV16Us", "nc") ++BUILTIN(__builtin_lasx_xbnz_w, "iV8Ui", "nc") ++BUILTIN(__builtin_lasx_xbnz_d, "iV4ULLi", "nc") + +-// Definition of LoongArch basic builtins. +-#include "clang/Basic/BuiltinsLoongArchBase.def" ++BUILTIN(__builtin_lasx_xvextl_q_d, "V4LLiV4LLi", "nc") ++BUILTIN(__builtin_lasx_xvextl_qu_du, "V4LLiV4ULLi", "nc") + +-// Definition of LSX builtins. +-#include "clang/Basic/BuiltinsLoongArchLSX.def" + +-// Definition of LASX builtins. +-#include "clang/Basic/BuiltinsLoongArchLASX.def" ++// LoongArch BASE + ++BUILTIN(__builtin_loongarch_cpucfg, "UiUi", "nc") ++BUILTIN(__builtin_loongarch_csrrd_w, "UiIUi", "nc") ++BUILTIN(__builtin_loongarch_csrrd_d, "ULiIULi", "nc") ++BUILTIN(__builtin_loongarch_csrwr_w, "UiUiIUi", "nc") ++BUILTIN(__builtin_loongarch_csrwr_d, "ULiULiIULi", "nc") ++BUILTIN(__builtin_loongarch_csrxchg_w, "UiUiUiIUi", "nc") ++BUILTIN(__builtin_loongarch_csrxchg_d, "ULiULiULiIULi", "nc") ++BUILTIN(__builtin_loongarch_iocsrrd_b, "UiUi", "nc") ++BUILTIN(__builtin_loongarch_iocsrrd_h, "UiUi", "nc") ++BUILTIN(__builtin_loongarch_iocsrrd_w, "UiUi", "nc") ++BUILTIN(__builtin_loongarch_iocsrrd_d, "ULiUi", "nc") ++BUILTIN(__builtin_loongarch_iocsrwr_b, "vUiUi", "nc") ++BUILTIN(__builtin_loongarch_iocsrwr_h, "vUiUi", "nc") ++BUILTIN(__builtin_loongarch_iocsrwr_w, "vUiUi", "nc") ++BUILTIN(__builtin_loongarch_iocsrwr_d, "vULiUi", "nc") ++BUILTIN(__builtin_loongarch_cacop_w, "viUii", "nc") ++BUILTIN(__builtin_loongarch_cacop_d, "viULiLi", "nc") ++BUILTIN(__builtin_loongarch_crc_w_b_w, "iii", "nc") ++BUILTIN(__builtin_loongarch_crc_w_h_w, "iii", "nc") ++BUILTIN(__builtin_loongarch_crc_w_w_w, "iii", "nc") ++BUILTIN(__builtin_loongarch_crc_w_d_w, "iLii", "nc") ++BUILTIN(__builtin_loongarch_crcc_w_b_w, "iii", "nc") ++BUILTIN(__builtin_loongarch_crcc_w_h_w, "iii", "nc") ++BUILTIN(__builtin_loongarch_crcc_w_w_w, "iii", "nc") ++BUILTIN(__builtin_loongarch_crcc_w_d_w, "iLii", "nc") ++BUILTIN(__builtin_loongarch_tlbclr, "v", "nc") ++BUILTIN(__builtin_loongarch_tlbflush, "v", "nc") ++BUILTIN(__builtin_loongarch_tlbfill, "v", "nc") ++BUILTIN(__builtin_loongarch_tlbrd, "v", "nc") ++BUILTIN(__builtin_loongarch_tlbwr, "v", "nc") ++BUILTIN(__builtin_loongarch_tlbsrch, "v", "nc") ++BUILTIN(__builtin_loongarch_syscall, "vIULi", "nc") ++BUILTIN(__builtin_loongarch_break, "vIULi", "nc") ++BUILTIN(__builtin_loongarch_asrtle_d, "vLiLi", "nc") ++BUILTIN(__builtin_loongarch_asrtgt_d, "vLiLi", "nc") ++BUILTIN(__builtin_loongarch_dbar, "vIULi", "nc") ++BUILTIN(__builtin_loongarch_ibar, "vIULi", "nc") ++BUILTIN(__builtin_loongarch_frecipe_s, "ff", "nc") ++BUILTIN(__builtin_loongarch_frecipe_d, "dd", "nc") ++BUILTIN(__builtin_loongarch_frsqrte_s, "ff", "nc") ++BUILTIN(__builtin_loongarch_frsqrte_d, "dd", "nc") + #undef BUILTIN +-#undef TARGET_BUILTIN +diff --git a/clang/include/clang/Basic/BuiltinsLoongArchBase.def b/clang/include/clang/Basic/BuiltinsLoongArchBase.def +deleted file mode 100644 +index a5a07c167..000000000 +--- a/clang/include/clang/Basic/BuiltinsLoongArchBase.def ++++ /dev/null +@@ -1,58 +0,0 @@ +-//============------------ BuiltinsLoongArchBase.def -------------*- C++ -*-==// +-// +-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +-// See https://llvm.org/LICENSE.txt for license information. +-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +-// +-//===----------------------------------------------------------------------===// +-// +-// This file defines the LoongArch-specific basic builtin function database. +-// Users of this file must define the BUILTIN macro to make use of this +-// information. +-// +-//===----------------------------------------------------------------------===// +- +-TARGET_BUILTIN(__builtin_loongarch_cacop_d, "vWiUWiWi", "nc", "64bit") +-TARGET_BUILTIN(__builtin_loongarch_cacop_w, "viUii", "nc", "32bit") +-TARGET_BUILTIN(__builtin_loongarch_dbar, "vIUi", "nc", "") +-TARGET_BUILTIN(__builtin_loongarch_ibar, "vIUi", "nc", "") +-TARGET_BUILTIN(__builtin_loongarch_movfcsr2gr, "UiIUi", "nc", "f") +-TARGET_BUILTIN(__builtin_loongarch_movgr2fcsr, "vIUiUi", "nc", "f") +-TARGET_BUILTIN(__builtin_loongarch_break, "vIUi", "nc", "") +-TARGET_BUILTIN(__builtin_loongarch_syscall, "vIUi", "nc", "") +-TARGET_BUILTIN(__builtin_loongarch_cpucfg, "UiUi", "nc", "") +-TARGET_BUILTIN(__builtin_loongarch_asrtle_d, "vWiWi", "nc", "64bit") +-TARGET_BUILTIN(__builtin_loongarch_asrtgt_d, "vWiWi", "nc", "64bit") +- +-TARGET_BUILTIN(__builtin_loongarch_crc_w_b_w, "iii", "nc", "64bit") +-TARGET_BUILTIN(__builtin_loongarch_crc_w_h_w, "iii", "nc", "64bit") +-TARGET_BUILTIN(__builtin_loongarch_crc_w_w_w, "iii", "nc", "64bit") +-TARGET_BUILTIN(__builtin_loongarch_crc_w_d_w, "iWii", "nc", "64bit") +-TARGET_BUILTIN(__builtin_loongarch_crcc_w_b_w, "iii", "nc", "64bit") +-TARGET_BUILTIN(__builtin_loongarch_crcc_w_h_w, "iii", "nc", "64bit") +-TARGET_BUILTIN(__builtin_loongarch_crcc_w_w_w, "iii", "nc", "64bit") +-TARGET_BUILTIN(__builtin_loongarch_crcc_w_d_w, "iWii", "nc", "64bit") +- +-TARGET_BUILTIN(__builtin_loongarch_csrrd_w, "UiIUi", "nc", "") +-TARGET_BUILTIN(__builtin_loongarch_csrrd_d, "UWiIUi", "nc", "64bit") +-TARGET_BUILTIN(__builtin_loongarch_csrwr_w, "UiUiIUi", "nc", "") +-TARGET_BUILTIN(__builtin_loongarch_csrwr_d, "UWiUWiIUi", "nc", "64bit") +-TARGET_BUILTIN(__builtin_loongarch_csrxchg_w, "UiUiUiIUi", "nc", "") +-TARGET_BUILTIN(__builtin_loongarch_csrxchg_d, "UWiUWiUWiIUi", "nc", "64bit") +- +-TARGET_BUILTIN(__builtin_loongarch_iocsrrd_b, "UiUi", "nc", "") +-TARGET_BUILTIN(__builtin_loongarch_iocsrrd_h, "UiUi", "nc", "") +-TARGET_BUILTIN(__builtin_loongarch_iocsrrd_w, "UiUi", "nc", "") +-TARGET_BUILTIN(__builtin_loongarch_iocsrrd_d, "UWiUi", "nc", "64bit") +-TARGET_BUILTIN(__builtin_loongarch_iocsrwr_b, "vUiUi", "nc", "") +-TARGET_BUILTIN(__builtin_loongarch_iocsrwr_h, "vUiUi", "nc", "") +-TARGET_BUILTIN(__builtin_loongarch_iocsrwr_w, "vUiUi", "nc", "") +-TARGET_BUILTIN(__builtin_loongarch_iocsrwr_d, "vUWiUi", "nc", "64bit") +- +-TARGET_BUILTIN(__builtin_loongarch_lddir_d, "WiWiIUWi", "nc", "64bit") +-TARGET_BUILTIN(__builtin_loongarch_ldpte_d, "vWiIUWi", "nc", "64bit") +- +-TARGET_BUILTIN(__builtin_loongarch_frecipe_s, "ff", "nc", "f,frecipe") +-TARGET_BUILTIN(__builtin_loongarch_frecipe_d, "dd", "nc", "d,frecipe") +-TARGET_BUILTIN(__builtin_loongarch_frsqrte_s, "ff", "nc", "f,frecipe") +-TARGET_BUILTIN(__builtin_loongarch_frsqrte_d, "dd", "nc", "d,frecipe") +diff --git a/clang/include/clang/Basic/BuiltinsLoongArchLASX.def b/clang/include/clang/Basic/BuiltinsLoongArchLASX.def +deleted file mode 100644 +index 4cf51cc00..000000000 +--- a/clang/include/clang/Basic/BuiltinsLoongArchLASX.def ++++ /dev/null +@@ -1,988 +0,0 @@ +-//=BuiltinsLoongArchLASX.def - LoongArch Builtin function database -- C++ -*-=// +-// +-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +-// See https://llvm.org/LICENSE.txt for license information. +-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +-// +-//===----------------------------------------------------------------------===// +-// +-// This file defines the LoongArch-specific LASX builtin function database. +-// Users of this file must define the BUILTIN macro to make use of this +-// information. +-// +-//===----------------------------------------------------------------------===// +- +-TARGET_BUILTIN(__builtin_lasx_xvadd_b, "V32cV32cV32c", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvadd_h, "V16sV16sV16s", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvadd_w, "V8iV8iV8i", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvadd_d, "V4LLiV4LLiV4LLi", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvadd_q, "V4LLiV4LLiV4LLi", "nc", "lasx") +- +-TARGET_BUILTIN(__builtin_lasx_xvsub_b, "V32cV32cV32c", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvsub_h, "V16sV16sV16s", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvsub_w, "V8iV8iV8i", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvsub_d, "V4LLiV4LLiV4LLi", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvsub_q, "V4LLiV4LLiV4LLi", "nc", "lasx") +- +-TARGET_BUILTIN(__builtin_lasx_xvaddi_bu, "V32cV32cIUi", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvaddi_hu, "V16sV16sIUi", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvaddi_wu, "V8iV8iIUi", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvaddi_du, "V4LLiV4LLiIUi", "nc", "lasx") +- +-TARGET_BUILTIN(__builtin_lasx_xvsubi_bu, "V32cV32cIUi", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvsubi_hu, "V16sV16sIUi", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvsubi_wu, "V8iV8iIUi", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvsubi_du, "V4LLiV4LLiIUi", "nc", "lasx") +- +-TARGET_BUILTIN(__builtin_lasx_xvneg_b, "V32cV32c", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvneg_h, "V16sV16s", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvneg_w, "V8iV8i", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvneg_d, "V4LLiV4LLi", "nc", "lasx") +- +-TARGET_BUILTIN(__builtin_lasx_xvsadd_b, "V32ScV32ScV32Sc", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvsadd_h, "V16SsV16SsV16Ss", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvsadd_w, "V8SiV8SiV8Si", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvsadd_d, "V4SLLiV4SLLiV4SLLi", "nc", "lasx") +- +-TARGET_BUILTIN(__builtin_lasx_xvsadd_bu, "V32UcV32UcV32Uc", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvsadd_hu, "V16UsV16UsV16Us", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvsadd_wu, "V8UiV8UiV8Ui", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvsadd_du, "V4ULLiV4ULLiV4ULLi", "nc", "lasx") +- +-TARGET_BUILTIN(__builtin_lasx_xvssub_b, "V32ScV32ScV32Sc", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvssub_h, "V16SsV16SsV16Ss", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvssub_w, "V8SiV8SiV8Si", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvssub_d, "V4SLLiV4SLLiV4SLLi", "nc", "lasx") +- +-TARGET_BUILTIN(__builtin_lasx_xvssub_bu, "V32UcV32UcV32Uc", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvssub_hu, "V16UsV16UsV16Us", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvssub_wu, "V8UiV8UiV8Ui", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvssub_du, "V4ULLiV4ULLiV4ULLi", "nc", "lasx") +- +-TARGET_BUILTIN(__builtin_lasx_xvhaddw_h_b, "V16SsV32ScV32Sc", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvhaddw_w_h, "V8SiV16SsV16Ss", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvhaddw_d_w, "V4SLLiV8SiV8Si", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvhaddw_q_d, "V4LLiV4LLiV4LLi", "nc", "lasx") +- +-TARGET_BUILTIN(__builtin_lasx_xvhaddw_hu_bu, "V16UsV32UcV32Uc", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvhaddw_wu_hu, "V8UiV16UsV16Us", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvhaddw_du_wu, "V4ULLiV8UiV8Ui", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvhaddw_qu_du, "V4ULLiV4ULLiV4ULLi", "nc", "lasx") +- +-TARGET_BUILTIN(__builtin_lasx_xvhsubw_h_b, "V16SsV32ScV32Sc", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvhsubw_w_h, "V8SiV16SsV16Ss", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvhsubw_d_w, "V4SLLiV8SiV8Si", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvhsubw_q_d, "V4LLiV4LLiV4LLi", "nc", "lasx") +- +-TARGET_BUILTIN(__builtin_lasx_xvhsubw_hu_bu, "V16UsV32UcV32Uc", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvhsubw_wu_hu, "V8UiV16UsV16Us", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvhsubw_du_wu, "V4ULLiV8UiV8Ui", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvhsubw_qu_du, "V4ULLiV4ULLiV4ULLi", "nc", "lasx") +- +-TARGET_BUILTIN(__builtin_lasx_xvaddwev_h_b, "V16sV32cV32c", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvaddwev_w_h, "V8SiV16sV16s", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvaddwev_d_w, "V4LLiV8SiV8Si", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvaddwev_q_d, "V4LLiV4LLiV4LLi", "nc", "lasx") +- +-TARGET_BUILTIN(__builtin_lasx_xvaddwod_h_b, "V16sV32cV32c", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvaddwod_w_h, "V8SiV16sV16s", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvaddwod_d_w, "V4LLiV8SiV8Si", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvaddwod_q_d, "V4LLiV4LLiV4LLi", "nc", "lasx") +- +-TARGET_BUILTIN(__builtin_lasx_xvsubwev_h_b, "V16sV32cV32c", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvsubwev_w_h, "V8SiV16sV16s", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvsubwev_d_w, "V4LLiV8SiV8Si", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvsubwev_q_d, "V4LLiV4LLiV4LLi", "nc", "lasx") +- +-TARGET_BUILTIN(__builtin_lasx_xvsubwod_h_b, "V16sV32cV32c", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvsubwod_w_h, "V8SiV16sV16s", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvsubwod_d_w, "V4LLiV8SiV8Si", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvsubwod_q_d, "V4LLiV4LLiV4LLi", "nc", "lasx") +- +-TARGET_BUILTIN(__builtin_lasx_xvaddwev_h_bu, "V16sV32UcV32Uc", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvaddwev_w_hu, "V8SiV16UsV16Us", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvaddwev_d_wu, "V4LLiV8UiV8Ui", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvaddwev_q_du, "V4LLiV4ULLiV4ULLi", "nc", "lasx") +- +-TARGET_BUILTIN(__builtin_lasx_xvaddwod_h_bu, "V16sV32UcV32Uc", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvaddwod_w_hu, "V8SiV16UsV16Us", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvaddwod_d_wu, "V4LLiV8UiV8Ui", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvaddwod_q_du, "V4LLiV4ULLiV4ULLi", "nc", "lasx") +- +-TARGET_BUILTIN(__builtin_lasx_xvsubwev_h_bu, "V16sV32UcV32Uc", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvsubwev_w_hu, "V8SiV16UsV16Us", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvsubwev_d_wu, "V4LLiV8UiV8Ui", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvsubwev_q_du, "V4LLiV4ULLiV4ULLi", "nc", "lasx") +- +-TARGET_BUILTIN(__builtin_lasx_xvsubwod_h_bu, "V16sV32UcV32Uc", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvsubwod_w_hu, "V8SiV16UsV16Us", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvsubwod_d_wu, "V4LLiV8UiV8Ui", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvsubwod_q_du, "V4LLiV4ULLiV4ULLi", "nc", "lasx") +- +-TARGET_BUILTIN(__builtin_lasx_xvaddwev_h_bu_b, "V16sV32UcV32c", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvaddwev_w_hu_h, "V8SiV16UsV16s", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvaddwev_d_wu_w, "V4LLiV8UiV8Si", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvaddwev_q_du_d, "V4LLiV4ULLiV4LLi", "nc", "lasx") +- +-TARGET_BUILTIN(__builtin_lasx_xvaddwod_h_bu_b, "V16sV32UcV32c", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvaddwod_w_hu_h, "V8SiV16UsV16s", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvaddwod_d_wu_w, "V4LLiV8UiV8Si", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvaddwod_q_du_d, "V4LLiV4ULLiV4LLi", "nc", "lasx") +- +-TARGET_BUILTIN(__builtin_lasx_xvavg_b, "V32ScV32ScV32Sc", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvavg_h, "V16SsV16SsV16Ss", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvavg_w, "V8SiV8SiV8Si", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvavg_d, "V4SLLiV4SLLiV4SLLi", "nc", "lasx") +- +-TARGET_BUILTIN(__builtin_lasx_xvavg_bu, "V32UcV32UcV32Uc", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvavg_hu, "V16UsV16UsV16Us", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvavg_wu, "V8UiV8UiV8Ui", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvavg_du, "V4ULLiV4ULLiV4ULLi", "nc", "lasx") +- +-TARGET_BUILTIN(__builtin_lasx_xvavgr_b, "V32ScV32ScV32Sc", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvavgr_h, "V16SsV16SsV16Ss", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvavgr_w, "V8SiV8SiV8Si", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvavgr_d, "V4SLLiV4SLLiV4SLLi", "nc", "lasx") +- +-TARGET_BUILTIN(__builtin_lasx_xvavgr_bu, "V32UcV32UcV32Uc", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvavgr_hu, "V16UsV16UsV16Us", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvavgr_wu, "V8UiV8UiV8Ui", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvavgr_du, "V4ULLiV4ULLiV4ULLi", "nc", "lasx") +- +-TARGET_BUILTIN(__builtin_lasx_xvabsd_b, "V32ScV32ScV32Sc", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvabsd_h, "V16SsV16SsV16Ss", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvabsd_w, "V8SiV8SiV8Si", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvabsd_d, "V4SLLiV4SLLiV4SLLi", "nc", "lasx") +- +-TARGET_BUILTIN(__builtin_lasx_xvabsd_bu, "V32UcV32UcV32Uc", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvabsd_hu, "V16UsV16UsV16Us", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvabsd_wu, "V8UiV8UiV8Ui", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvabsd_du, "V4ULLiV4ULLiV4ULLi", "nc", "lasx") +- +-TARGET_BUILTIN(__builtin_lasx_xvadda_b, "V32ScV32ScV32Sc", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvadda_h, "V16SsV16SsV16Ss", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvadda_w, "V8SiV8SiV8Si", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvadda_d, "V4SLLiV4SLLiV4SLLi", "nc", "lasx") +- +-TARGET_BUILTIN(__builtin_lasx_xvmax_b, "V32ScV32ScV32Sc", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvmax_h, "V16SsV16SsV16Ss", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvmax_w, "V8SiV8SiV8Si", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvmax_d, "V4SLLiV4SLLiV4SLLi", "nc", "lasx") +- +-TARGET_BUILTIN(__builtin_lasx_xvmaxi_b, "V32ScV32ScIi", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvmaxi_h, "V16SsV16SsIi", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvmaxi_w, "V8SiV8SiIi", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvmaxi_d, "V4SLLiV4SLLiIi", "nc", "lasx") +- +-TARGET_BUILTIN(__builtin_lasx_xvmax_bu, "V32UcV32UcV32Uc", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvmax_hu, "V16UsV16UsV16Us", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvmax_wu, "V8UiV8UiV8Ui", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvmax_du, "V4ULLiV4ULLiV4ULLi", "nc", "lasx") +- +-TARGET_BUILTIN(__builtin_lasx_xvmaxi_bu, "V32UcV32UcIUi", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvmaxi_hu, "V16UsV16UsIUi", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvmaxi_wu, "V8UiV8UiIUi", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvmaxi_du, "V4ULLiV4ULLiIUi", "nc", "lasx") +- +-TARGET_BUILTIN(__builtin_lasx_xvmin_b, "V32ScV32ScV32Sc", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvmin_h, "V16SsV16SsV16Ss", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvmin_w, "V8SiV8SiV8Si", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvmin_d, "V4SLLiV4SLLiV4SLLi", "nc", "lasx") +- +-TARGET_BUILTIN(__builtin_lasx_xvmini_b, "V32ScV32ScIi", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvmini_h, "V16SsV16SsIi", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvmini_w, "V8SiV8SiIi", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvmini_d, "V4SLLiV4SLLiIi", "nc", "lasx") +- +-TARGET_BUILTIN(__builtin_lasx_xvmin_bu, "V32UcV32UcV32Uc", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvmin_hu, "V16UsV16UsV16Us", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvmin_wu, "V8UiV8UiV8Ui", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvmin_du, "V4ULLiV4ULLiV4ULLi", "nc", "lasx") +- +-TARGET_BUILTIN(__builtin_lasx_xvmini_bu, "V32UcV32UcIUi", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvmini_hu, "V16UsV16UsIUi", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvmini_wu, "V8UiV8UiIUi", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvmini_du, "V4ULLiV4ULLiIUi", "nc", "lasx") +- +-TARGET_BUILTIN(__builtin_lasx_xvmul_b, "V32ScV32ScV32Sc", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvmul_h, "V16SsV16SsV16Ss", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvmul_w, "V8SiV8SiV8Si", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvmul_d, "V4SLLiV4SLLiV4SLLi", "nc", "lasx") +- +-TARGET_BUILTIN(__builtin_lasx_xvmuh_b, "V32cV32cV32c", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvmuh_h, "V16sV16sV16s", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvmuh_w, "V8iV8iV8i", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvmuh_d, "V4LLiV4LLiV4LLi", "nc", "lasx") +- +-TARGET_BUILTIN(__builtin_lasx_xvmuh_bu, "V32UcV32UcV32Uc", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvmuh_hu, "V16UsV16UsV16Us", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvmuh_wu, "V8UiV8UiV8Ui", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvmuh_du, "V4ULLiV4ULLiV4ULLi", "nc", "lasx") +- +-TARGET_BUILTIN(__builtin_lasx_xvmulwev_h_b, "V16sV32cV32c", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvmulwev_w_h, "V8SiV16sV16s", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvmulwev_d_w, "V4LLiV8SiV8Si", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvmulwev_q_d, "V4LLiV4LLiV4LLi", "nc", "lasx") +- +-TARGET_BUILTIN(__builtin_lasx_xvmulwod_h_b, "V16sV32cV32c", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvmulwod_w_h, "V8SiV16sV16s", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvmulwod_d_w, "V4LLiV8SiV8Si", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvmulwod_q_d, "V4LLiV4LLiV4LLi", "nc", "lasx") +- +-TARGET_BUILTIN(__builtin_lasx_xvmulwev_h_bu, "V16sV32UcV32Uc", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvmulwev_w_hu, "V8SiV16UsV16Us", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvmulwev_d_wu, "V4LLiV8UiV8Ui", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvmulwev_q_du, "V4LLiV4ULLiV4ULLi", "nc", "lasx") +- +-TARGET_BUILTIN(__builtin_lasx_xvmulwod_h_bu, "V16sV32UcV32Uc", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvmulwod_w_hu, "V8SiV16UsV16Us", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvmulwod_d_wu, "V4LLiV8UiV8Ui", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvmulwod_q_du, "V4LLiV4ULLiV4ULLi", "nc", "lasx") +- +-TARGET_BUILTIN(__builtin_lasx_xvmulwev_h_bu_b, "V16sV32UcV32c", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvmulwev_w_hu_h, "V8SiV16UsV16s", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvmulwev_d_wu_w, "V4LLiV8UiV8Si", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvmulwev_q_du_d, "V4LLiV4ULLiV4LLi", "nc", "lasx") +- +-TARGET_BUILTIN(__builtin_lasx_xvmulwod_h_bu_b, "V16sV32UcV32c", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvmulwod_w_hu_h, "V8SiV16UsV16s", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvmulwod_d_wu_w, "V4LLiV8UiV8Si", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvmulwod_q_du_d, "V4LLiV4ULLiV4LLi", "nc", "lasx") +- +-TARGET_BUILTIN(__builtin_lasx_xvmadd_b, "V32ScV32ScV32ScV32Sc", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvmadd_h, "V16SsV16SsV16SsV16Ss", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvmadd_w, "V8SiV8SiV8SiV8Si", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvmadd_d, "V4SLLiV4SLLiV4SLLiV4SLLi", "nc", "lasx") +- +-TARGET_BUILTIN(__builtin_lasx_xvmsub_b, "V32ScV32ScV32ScV32Sc", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvmsub_h, "V16SsV16SsV16SsV16Ss", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvmsub_w, "V8SiV8SiV8SiV8Si", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvmsub_d, "V4SLLiV4SLLiV4SLLiV4SLLi", "nc", "lasx") +- +-TARGET_BUILTIN(__builtin_lasx_xvmaddwev_h_b, "V16sV16sV32cV32c", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvmaddwev_w_h, "V8SiV8SiV16sV16s", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvmaddwev_d_w, "V4LLiV4LLiV8SiV8Si", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvmaddwev_q_d, "V4LLiV4LLiV4LLiV4LLi", "nc", "lasx") +- +-TARGET_BUILTIN(__builtin_lasx_xvmaddwod_h_b, "V16sV16sV32cV32c", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvmaddwod_w_h, "V8SiV8SiV16sV16s", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvmaddwod_d_w, "V4LLiV4LLiV8SiV8Si", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvmaddwod_q_d, "V4LLiV4LLiV4LLiV4LLi", "nc", "lasx") +- +-TARGET_BUILTIN(__builtin_lasx_xvmaddwev_h_bu, "V16UsV16UsV32UcV32Uc", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvmaddwev_w_hu, "V8UiV8UiV16UsV16Us", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvmaddwev_d_wu, "V4ULLiV4ULLiV8UiV8Ui", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvmaddwev_q_du, "V4ULLiV4ULLiV4ULLiV4ULLi", "nc", "lasx") +- +-TARGET_BUILTIN(__builtin_lasx_xvmaddwod_h_bu, "V16UsV16UsV32UcV32Uc", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvmaddwod_w_hu, "V8UiV8UiV16UsV16Us", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvmaddwod_d_wu, "V4ULLiV4ULLiV8UiV8Ui", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvmaddwod_q_du, "V4ULLiV4ULLiV4ULLiV4ULLi", "nc", "lasx") +- +-TARGET_BUILTIN(__builtin_lasx_xvmaddwev_h_bu_b, "V16sV16sV32UcV32c", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvmaddwev_w_hu_h, "V8SiV8SiV16UsV16s", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvmaddwev_d_wu_w, "V4LLiV4LLiV8UiV8Si", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvmaddwev_q_du_d, "V4LLiV4LLiV4ULLiV4LLi", "nc", "lasx") +- +-TARGET_BUILTIN(__builtin_lasx_xvmaddwod_h_bu_b, "V16sV16sV32UcV32c", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvmaddwod_w_hu_h, "V8SiV8SiV16UsV16s", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvmaddwod_d_wu_w, "V4LLiV4LLiV8UiV8Si", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvmaddwod_q_du_d, "V4LLiV4LLiV4ULLiV4LLi", "nc", "lasx") +- +- +-TARGET_BUILTIN(__builtin_lasx_xvdiv_b, "V32ScV32ScV32Sc", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvdiv_h, "V16SsV16SsV16Ss", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvdiv_w, "V8SiV8SiV8Si", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvdiv_d, "V4SLLiV4SLLiV4SLLi", "nc", "lasx") +- +-TARGET_BUILTIN(__builtin_lasx_xvdiv_bu, "V32UcV32UcV32Uc", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvdiv_hu, "V16UsV16UsV16Us", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvdiv_wu, "V8UiV8UiV8Ui", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvdiv_du, "V4ULLiV4ULLiV4ULLi", "nc", "lasx") +- +-TARGET_BUILTIN(__builtin_lasx_xvmod_b, "V32ScV32ScV32Sc", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvmod_h, "V16SsV16SsV16Ss", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvmod_w, "V8SiV8SiV8Si", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvmod_d, "V4SLLiV4SLLiV4SLLi", "nc", "lasx") +- +-TARGET_BUILTIN(__builtin_lasx_xvmod_bu, "V32UcV32UcV32Uc", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvmod_hu, "V16UsV16UsV16Us", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvmod_wu, "V8UiV8UiV8Ui", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvmod_du, "V4ULLiV4ULLiV4ULLi", "nc", "lasx") +- +-TARGET_BUILTIN(__builtin_lasx_xvsat_b, "V32ScV32ScIUi", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvsat_h, "V16SsV16SsIUi", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvsat_w, "V8SiV8SiIUi", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvsat_d, "V4SLLiV4SLLiIUi", "nc", "lasx") +- +-TARGET_BUILTIN(__builtin_lasx_xvsat_bu, "V32UcV32UcIUi", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvsat_hu, "V16UsV16UsIUi", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvsat_wu, "V8UiV8UiIUi", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvsat_du, "V4ULLiV4ULLiIUi", "nc", "lasx") +- +-TARGET_BUILTIN(__builtin_lasx_xvexth_h_b, "V16sV32c", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvexth_w_h, "V8SiV16s", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvexth_d_w, "V4LLiV8Si", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvexth_q_d, "V4LLiV4LLi", "nc", "lasx") +- +-TARGET_BUILTIN(__builtin_lasx_xvexth_hu_bu, "V16UsV32Uc", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvexth_wu_hu, "V8UiV16Us", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvexth_du_wu, "V4ULLiV8Ui", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvexth_qu_du, "V4ULLiV4ULLi", "nc", "lasx") +- +-TARGET_BUILTIN(__builtin_lasx_vext2xv_h_b, "V16sV32c", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_vext2xv_w_b, "V8SiV32c", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_vext2xv_d_b, "V4LLiV32c", "nc", "lasx") +- +-TARGET_BUILTIN(__builtin_lasx_vext2xv_w_h, "V8SiV16s", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_vext2xv_d_h, "V4LLiV16s", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_vext2xv_d_w, "V4LLiV8Si", "nc", "lasx") +- +-TARGET_BUILTIN(__builtin_lasx_vext2xv_hu_bu, "V16sV32c", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_vext2xv_wu_bu, "V8SiV32c", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_vext2xv_du_bu, "V4LLiV32c", "nc", "lasx") +- +-TARGET_BUILTIN(__builtin_lasx_vext2xv_wu_hu, "V8SiV16s", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_vext2xv_du_hu, "V4LLiV16s", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_vext2xv_du_wu, "V4LLiV8Si", "nc", "lasx") +- +-TARGET_BUILTIN(__builtin_lasx_xvsigncov_b, "V32ScV32ScV32Sc", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvsigncov_h, "V16SsV16SsV16Ss", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvsigncov_w, "V8SiV8SiV8Si", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvsigncov_d, "V4SLLiV4SLLiV4SLLi", "nc", "lasx") +- +-TARGET_BUILTIN(__builtin_lasx_xvmskltz_b, "V32cV32c", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvmskltz_h, "V16sV16s", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvmskltz_w, "V8iV8i", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvmskltz_d, "V4LLiV4LLi", "nc", "lasx") +- +-TARGET_BUILTIN(__builtin_lasx_xvmskgez_b, "V32cV32c", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvmsknz_b, "V16sV16s", "nc", "lasx") +- +-TARGET_BUILTIN(__builtin_lasx_xvldi, "V4LLiIi", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvrepli_b, "V32cIi", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvrepli_h, "V16sIi", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvrepli_w, "V8iIi", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvrepli_d, "V4LLiIi", "nc", "lasx") +- +- +-TARGET_BUILTIN(__builtin_lasx_xvand_v, "V32UcV32UcV32Uc", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvor_v, "V32UcV32UcV32Uc", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvxor_v, "V32cV32cV32c", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvnor_v, "V32UcV32UcV32Uc", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvandn_v, "V32UcV32UcV32Uc", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvorn_v, "V32ScV32ScV32Sc", "nc", "lasx") +- +-TARGET_BUILTIN(__builtin_lasx_xvandi_b, "V32UcV32UcIUi", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvori_b, "V32UcV32UcIUi", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvxori_b, "V32UcV32UcIUi", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvnori_b, "V32UcV32UcIUi", "nc", "lasx") +- +-TARGET_BUILTIN(__builtin_lasx_xvsll_b, "V32cV32cV32c", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvsll_h, "V16sV16sV16s", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvsll_w, "V8iV8iV8i", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvsll_d, "V4LLiV4LLiV4LLi", "nc", "lasx") +- +-TARGET_BUILTIN(__builtin_lasx_xvslli_b, "V32cV32cIUi", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvslli_h, "V16sV16sIUi", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvslli_w, "V8iV8iIUi", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvslli_d, "V4LLiV4LLiIUi", "nc", "lasx") +- +-TARGET_BUILTIN(__builtin_lasx_xvsrl_b, "V32cV32cV32c", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvsrl_h, "V16sV16sV16s", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvsrl_w, "V8iV8iV8i", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvsrl_d, "V4LLiV4LLiV4LLi", "nc", "lasx") +- +-TARGET_BUILTIN(__builtin_lasx_xvsrli_b, "V32cV32cIUi", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvsrli_h, "V16sV16sIUi", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvsrli_w, "V8iV8iIUi", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvsrli_d, "V4LLiV4LLiIUi", "nc", "lasx") +- +-TARGET_BUILTIN(__builtin_lasx_xvsra_b, "V32cV32cV32c", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvsra_h, "V16sV16sV16s", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvsra_w, "V8iV8iV8i", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvsra_d, "V4LLiV4LLiV4LLi", "nc", "lasx") +- +-TARGET_BUILTIN(__builtin_lasx_xvsrai_b, "V32cV32cIUi", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvsrai_h, "V16sV16sIUi", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvsrai_w, "V8iV8iIUi", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvsrai_d, "V4LLiV4LLiIUi", "nc", "lasx") +- +-TARGET_BUILTIN(__builtin_lasx_xvrotr_b, "V32cV32cV32c", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvrotr_h, "V16sV16sV16s", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvrotr_w, "V8iV8iV8i", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvrotr_d, "V4LLiV4LLiV4LLi", "nc", "lasx") +- +-TARGET_BUILTIN(__builtin_lasx_xvrotri_b, "V32cV32cIUi", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvrotri_h, "V16sV16sIUi", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvrotri_w, "V8iV8iIUi", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvrotri_d, "V4LLiV4LLiIUi", "nc", "lasx") +- +-TARGET_BUILTIN(__builtin_lasx_xvsllwil_h_b, "V16sV32cIUi", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvsllwil_w_h, "V8SiV16sIUi", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvsllwil_d_w, "V4LLiV8SiIUi", "nc", "lasx") +- +-TARGET_BUILTIN(__builtin_lasx_xvextl_q_d, "V4LLiV4LLi", "nc", "lasx") +- +-TARGET_BUILTIN(__builtin_lasx_xvsllwil_hu_bu, "V16UsV32UcIUi", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvsllwil_wu_hu, "V8UiV16UsIUi", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvsllwil_du_wu, "V4ULLiV8UiIUi", "nc", "lasx") +- +-TARGET_BUILTIN(__builtin_lasx_xvextl_qu_du, "V4LLiV4ULLi", "nc", "lasx") +- +-TARGET_BUILTIN(__builtin_lasx_xvsrlr_b, "V32cV32cV32c", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvsrlr_h, "V16sV16sV16s", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvsrlr_w, "V8iV8iV8i", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvsrlr_d, "V4LLiV4LLiV4LLi", "nc", "lasx") +- +-TARGET_BUILTIN(__builtin_lasx_xvsrlri_b, "V32cV32cIUi", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvsrlri_h, "V16sV16sIUi", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvsrlri_w, "V8iV8iIUi", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvsrlri_d, "V4LLiV4LLiIUi", "nc", "lasx") +- +-TARGET_BUILTIN(__builtin_lasx_xvsrar_b, "V32cV32cV32c", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvsrar_h, "V16sV16sV16s", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvsrar_w, "V8iV8iV8i", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvsrar_d, "V4LLiV4LLiV4LLi", "nc", "lasx") +- +-TARGET_BUILTIN(__builtin_lasx_xvsrari_b, "V32cV32cIUi", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvsrari_h, "V16sV16sIUi", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvsrari_w, "V8iV8iIUi", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvsrari_d, "V4LLiV4LLiIUi", "nc", "lasx") +- +-TARGET_BUILTIN(__builtin_lasx_xvsrln_b_h, "V32ScV16sV16s", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvsrln_h_w, "V16sV8SiV8Si", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvsrln_w_d, "V8SiV4LLiV4LLi", "nc", "lasx") +- +-TARGET_BUILTIN(__builtin_lasx_xvsran_b_h, "V32ScV16sV16s", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvsran_h_w, "V16sV8SiV8Si", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvsran_w_d, "V8SiV4LLiV4LLi", "nc", "lasx") +- +-TARGET_BUILTIN(__builtin_lasx_xvsrlni_b_h, "V32cV32cV32cIUi", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvsrlni_h_w, "V16sV16sV16sIUi", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvsrlni_w_d, "V8iV8iV8iIUi", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvsrlni_d_q, "V4LLiV4LLiV4LLiIUi", "nc", "lasx") +- +-TARGET_BUILTIN(__builtin_lasx_xvsrani_b_h, "V32cV32cV32cIUi", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvsrani_h_w, "V16sV16sV16sIUi", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvsrani_w_d, "V8iV8iV8iIUi", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvsrani_d_q, "V4LLiV4LLiV4LLiIUi", "nc", "lasx") +- +-TARGET_BUILTIN(__builtin_lasx_xvsrlrn_b_h, "V32ScV16sV16s", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvsrlrn_h_w, "V16sV8SiV8Si", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvsrlrn_w_d, "V8SiV4LLiV4LLi", "nc", "lasx") +- +-TARGET_BUILTIN(__builtin_lasx_xvsrarn_b_h, "V32ScV16sV16s", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvsrarn_h_w, "V16sV8SiV8Si", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvsrarn_w_d, "V8SiV4LLiV4LLi", "nc", "lasx") +- +-TARGET_BUILTIN(__builtin_lasx_xvsrlrni_b_h, "V32cV32cV32cIUi", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvsrlrni_h_w, "V16sV16sV16sIUi", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvsrlrni_w_d, "V8iV8iV8iIUi", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvsrlrni_d_q, "V4LLiV4LLiV4LLiIUi", "nc", "lasx") +- +-TARGET_BUILTIN(__builtin_lasx_xvsrarni_b_h, "V32cV32cV32cIUi", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvsrarni_h_w, "V16sV16sV16sIUi", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvsrarni_w_d, "V8iV8iV8iIUi", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvsrarni_d_q, "V4LLiV4LLiV4LLiIUi", "nc", "lasx") +- +-TARGET_BUILTIN(__builtin_lasx_xvssrln_b_h, "V32ScV16sV16s", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvssrln_h_w, "V16sV8SiV8Si", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvssrln_w_d, "V8SiV4LLiV4LLi", "nc", "lasx") +- +-TARGET_BUILTIN(__builtin_lasx_xvssran_b_h, "V32ScV16sV16s", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvssran_h_w, "V16sV8SiV8Si", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvssran_w_d, "V8SiV4LLiV4LLi", "nc", "lasx") +- +-TARGET_BUILTIN(__builtin_lasx_xvssrln_bu_h, "V32UcV16UsV16Us", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvssrln_hu_w, "V16UsV8UiV8Ui", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvssrln_wu_d, "V8UiV4ULLiV4ULLi", "nc", "lasx") +- +-TARGET_BUILTIN(__builtin_lasx_xvssran_bu_h, "V32UcV16UsV16Us", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvssran_hu_w, "V16UsV8UiV8Ui", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvssran_wu_d, "V8UiV4ULLiV4ULLi", "nc", "lasx") +- +-TARGET_BUILTIN(__builtin_lasx_xvssrlni_b_h, "V32cV32cV32cIUi", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvssrlni_h_w, "V16sV16sV16sIUi", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvssrlni_w_d, "V8iV8iV8iIUi", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvssrlni_d_q, "V4LLiV4LLiV4LLiIUi", "nc", "lasx") +- +-TARGET_BUILTIN(__builtin_lasx_xvssrani_b_h, "V32cV32cV32cIUi", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvssrani_h_w, "V16sV16sV16sIUi", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvssrani_w_d, "V8iV8iV8iIUi", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvssrani_d_q, "V4LLiV4LLiV4LLiIUi", "nc", "lasx") +- +-TARGET_BUILTIN(__builtin_lasx_xvssrlrni_bu_h, "V32cV32cV32cIUi", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvssrlrni_hu_w, "V16sV16sV16sIUi", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvssrlrni_wu_d, "V8iV8iV8iIUi", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvssrlrni_du_q, "V4LLiV4LLiV4LLiIUi", "nc", "lasx") +- +-TARGET_BUILTIN(__builtin_lasx_xvssrani_bu_h, "V32cV32cV32cIUi", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvssrani_hu_w, "V16sV16sV16sIUi", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvssrani_wu_d, "V8iV8iV8iIUi", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvssrani_du_q, "V4LLiV4LLiV4LLiIUi", "nc", "lasx") +- +-TARGET_BUILTIN(__builtin_lasx_xvssrlrn_b_h, "V32ScV16sV16s", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvssrlrn_h_w, "V16sV8SiV8Si", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvssrlrn_w_d, "V8SiV4LLiV4LLi", "nc", "lasx") +- +-TARGET_BUILTIN(__builtin_lasx_xvssrarn_b_h, "V32ScV16sV16s", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvssrarn_h_w, "V16sV8SiV8Si", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvssrarn_w_d, "V8SiV4LLiV4LLi", "nc", "lasx") +- +-TARGET_BUILTIN(__builtin_lasx_xvssrlrn_bu_h, "V32UcV16UsV16Us", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvssrlrn_hu_w, "V16UsV8UiV8Ui", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvssrlrn_wu_d, "V8UiV4ULLiV4ULLi", "nc", "lasx") +- +-TARGET_BUILTIN(__builtin_lasx_xvssrarn_bu_h, "V32UcV16UsV16Us", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvssrarn_hu_w, "V16UsV8UiV8Ui", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvssrarn_wu_d, "V8UiV4ULLiV4ULLi", "nc", "lasx") +- +-TARGET_BUILTIN(__builtin_lasx_xvssrlrni_b_h, "V32cV32cV32cIUi", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvssrlrni_h_w, "V16sV16sV16sIUi", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvssrlrni_w_d, "V8iV8iV8iIUi", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvssrlrni_d_q, "V4LLiV4LLiV4LLiIUi", "nc", "lasx") +- +-TARGET_BUILTIN(__builtin_lasx_xvssrarni_b_h, "V32cV32cV32cIUi", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvssrarni_h_w, "V16sV16sV16sIUi", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvssrarni_w_d, "V8iV8iV8iIUi", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvssrarni_d_q, "V4LLiV4LLiV4LLiIUi", "nc", "lasx") +- +-TARGET_BUILTIN(__builtin_lasx_xvssrlni_bu_h, "V32cV32cV32cIUi", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvssrlni_hu_w, "V16sV16sV16sIUi", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvssrlni_wu_d, "V8iV8iV8iIUi", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvssrlni_du_q, "V4LLiV4LLiV4LLiIUi", "nc", "lasx") +- +-TARGET_BUILTIN(__builtin_lasx_xvssrarni_bu_h, "V32cV32cV32cIUi", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvssrarni_hu_w, "V16sV16sV16sIUi", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvssrarni_wu_d, "V8iV8iV8iIUi", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvssrarni_du_q, "V4LLiV4LLiV4LLiIUi", "nc", "lasx") +- +-TARGET_BUILTIN(__builtin_lasx_xvclo_b, "V32ScV32Sc", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvclo_h, "V16SsV16Ss", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvclo_w, "V8SiV8Si", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvclo_d, "V4SLLiV4SLLi", "nc", "lasx") +- +-TARGET_BUILTIN(__builtin_lasx_xvclz_b, "V32ScV32Sc", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvclz_h, "V16SsV16Ss", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvclz_w, "V8SiV8Si", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvclz_d, "V4SLLiV4SLLi", "nc", "lasx") +- +-TARGET_BUILTIN(__builtin_lasx_xvpcnt_b, "V32ScV32Sc", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvpcnt_h, "V16SsV16Ss", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvpcnt_w, "V8SiV8Si", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvpcnt_d, "V4SLLiV4SLLi", "nc", "lasx") +- +-TARGET_BUILTIN(__builtin_lasx_xvbitclr_b, "V32UcV32UcV32Uc", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvbitclr_h, "V16UsV16UsV16Us", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvbitclr_w, "V8UiV8UiV8Ui", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvbitclr_d, "V4ULLiV4ULLiV4ULLi", "nc", "lasx") +- +-TARGET_BUILTIN(__builtin_lasx_xvbitclri_b, "V32UcV32UcIUi", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvbitclri_h, "V16UsV16UsIUi", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvbitclri_w, "V8UiV8UiIUi", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvbitclri_d, "V4ULLiV4ULLiIUi", "nc", "lasx") +- +-TARGET_BUILTIN(__builtin_lasx_xvbitset_b, "V32UcV32UcV32Uc", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvbitset_h, "V16UsV16UsV16Us", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvbitset_w, "V8UiV8UiV8Ui", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvbitset_d, "V4ULLiV4ULLiV4ULLi", "nc", "lasx") +- +-TARGET_BUILTIN(__builtin_lasx_xvbitseti_b, "V32UcV32UcIUi", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvbitseti_h, "V16UsV16UsIUi", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvbitseti_w, "V8UiV8UiIUi", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvbitseti_d, "V4ULLiV4ULLiIUi", "nc", "lasx") +- +-TARGET_BUILTIN(__builtin_lasx_xvbitrev_b, "V32UcV32UcV32Uc", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvbitrev_h, "V16UsV16UsV16Us", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvbitrev_w, "V8UiV8UiV8Ui", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvbitrev_d, "V4ULLiV4ULLiV4ULLi", "nc", "lasx") +- +-TARGET_BUILTIN(__builtin_lasx_xvbitrevi_b, "V32UcV32UcIUi", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvbitrevi_h, "V16UsV16UsIUi", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvbitrevi_w, "V8UiV8UiIUi", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvbitrevi_d, "V4ULLiV4ULLiIUi", "nc", "lasx") +- +-TARGET_BUILTIN(__builtin_lasx_xvfrstp_b, "V32ScV32ScV32ScV32Sc", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvfrstp_h, "V16SsV16SsV16SsV16Ss", "nc", "lasx") +- +-TARGET_BUILTIN(__builtin_lasx_xvfrstpi_b, "V32cV32cV32cIUi", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvfrstpi_h, "V16sV16sV16sIUi", "nc", "lasx") +- +-TARGET_BUILTIN(__builtin_lasx_xvfadd_s, "V8fV8fV8f", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvfadd_d, "V4dV4dV4d", "nc", "lasx") +- +-TARGET_BUILTIN(__builtin_lasx_xvfsub_s, "V8fV8fV8f", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvfsub_d, "V4dV4dV4d", "nc", "lasx") +- +-TARGET_BUILTIN(__builtin_lasx_xvfmul_s, "V8fV8fV8f", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvfmul_d, "V4dV4dV4d", "nc", "lasx") +- +-TARGET_BUILTIN(__builtin_lasx_xvfdiv_s, "V8fV8fV8f", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvfdiv_d, "V4dV4dV4d", "nc", "lasx") +- +-TARGET_BUILTIN(__builtin_lasx_xvfmadd_s, "V8fV8fV8fV8f", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvfmadd_d, "V4dV4dV4dV4d", "nc", "lasx") +- +-TARGET_BUILTIN(__builtin_lasx_xvfmsub_s, "V8fV8fV8fV8f", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvfmsub_d, "V4dV4dV4dV4d", "nc", "lasx") +- +-TARGET_BUILTIN(__builtin_lasx_xvfnmadd_s, "V8fV8fV8fV8f", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvfnmadd_d, "V4dV4dV4dV4d", "nc", "lasx") +- +-TARGET_BUILTIN(__builtin_lasx_xvfnmsub_s, "V8fV8fV8fV8f", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvfnmsub_d, "V4dV4dV4dV4d", "nc", "lasx") +- +-TARGET_BUILTIN(__builtin_lasx_xvfmax_s, "V8fV8fV8f", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvfmax_d, "V4dV4dV4d", "nc", "lasx") +- +-TARGET_BUILTIN(__builtin_lasx_xvfmin_s, "V8fV8fV8f", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvfmin_d, "V4dV4dV4d", "nc", "lasx") +- +-TARGET_BUILTIN(__builtin_lasx_xvfmaxa_s, "V8fV8fV8f", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvfmaxa_d, "V4dV4dV4d", "nc", "lasx") +- +-TARGET_BUILTIN(__builtin_lasx_xvfmina_s, "V8fV8fV8f", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvfmina_d, "V4dV4dV4d", "nc", "lasx") +- +-TARGET_BUILTIN(__builtin_lasx_xvflogb_s, "V8fV8f", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvflogb_d, "V4dV4d", "nc", "lasx") +- +-TARGET_BUILTIN(__builtin_lasx_xvfclass_s, "V8iV8f", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvfclass_d, "V4LLiV4d", "nc", "lasx") +- +-TARGET_BUILTIN(__builtin_lasx_xvfsqrt_s, "V8fV8f", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvfsqrt_d, "V4dV4d", "nc", "lasx") +- +-TARGET_BUILTIN(__builtin_lasx_xvfrecip_s, "V8fV8f", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvfrecip_d, "V4dV4d", "nc", "lasx") +- +-TARGET_BUILTIN(__builtin_lasx_xvfrecipe_s, "V8fV8f", "nc", "lasx,frecipe") +-TARGET_BUILTIN(__builtin_lasx_xvfrecipe_d, "V4dV4d", "nc", "lasx,frecipe") +- +-TARGET_BUILTIN(__builtin_lasx_xvfrsqrt_s, "V8fV8f", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvfrsqrt_d, "V4dV4d", "nc", "lasx") +- +-TARGET_BUILTIN(__builtin_lasx_xvfrsqrte_s, "V8fV8f", "nc", "lasx,frecipe") +-TARGET_BUILTIN(__builtin_lasx_xvfrsqrte_d, "V4dV4d", "nc", "lasx,frecipe") +- +-TARGET_BUILTIN(__builtin_lasx_xvfcvtl_s_h, "V8fV16s", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvfcvth_s_h, "V8fV16s", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvfcvtl_d_s, "V4dV8f", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvfcvth_d_s, "V4dV8f", "nc", "lasx") +- +-TARGET_BUILTIN(__builtin_lasx_xvfcvt_h_s, "V16sV8fV8f", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvfcvt_s_d, "V8fV4dV4d", "nc", "lasx") +- +-TARGET_BUILTIN(__builtin_lasx_xvfrintrne_s, "V8SiV8f", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvfrintrne_d, "V4LLiV4d", "nc", "lasx") +- +-TARGET_BUILTIN(__builtin_lasx_xvfrintrz_s, "V8SiV8f", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvfrintrz_d, "V4LLiV4d", "nc", "lasx") +- +-TARGET_BUILTIN(__builtin_lasx_xvfrintrp_s, "V8SiV8f", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvfrintrp_d, "V4LLiV4d", "nc", "lasx") +- +-TARGET_BUILTIN(__builtin_lasx_xvfrintrm_s, "V8SiV8f", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvfrintrm_d, "V4LLiV4d", "nc", "lasx") +- +-TARGET_BUILTIN(__builtin_lasx_xvfrint_s, "V8fV8f", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvfrint_d, "V4dV4d", "nc", "lasx") +- +-TARGET_BUILTIN(__builtin_lasx_xvftintrne_w_s, "V8SiV8f", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvftintrne_l_d, "V4LLiV4d", "nc", "lasx") +- +-TARGET_BUILTIN(__builtin_lasx_xvftintrz_w_s, "V8SiV8f", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvftintrz_l_d, "V4LLiV4d", "nc", "lasx") +- +-TARGET_BUILTIN(__builtin_lasx_xvftintrp_w_s, "V8SiV8f", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvftintrp_l_d, "V4LLiV4d", "nc", "lasx") +- +-TARGET_BUILTIN(__builtin_lasx_xvftintrm_w_s, "V8SiV8f", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvftintrm_l_d, "V4LLiV4d", "nc", "lasx") +- +-TARGET_BUILTIN(__builtin_lasx_xvftint_w_s, "V8SiV8f", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvftint_l_d, "V4SLLiV4d", "nc", "lasx") +- +-TARGET_BUILTIN(__builtin_lasx_xvftintrz_wu_s, "V8UiV8f", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvftintrz_lu_d, "V4ULLiV4d", "nc", "lasx") +- +-TARGET_BUILTIN(__builtin_lasx_xvftint_wu_s, "V8UiV8f", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvftint_lu_d, "V4ULLiV4d", "nc", "lasx") +- +-TARGET_BUILTIN(__builtin_lasx_xvftintrne_w_d, "V8SiV4dV4d", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvftintrz_w_d, "V8SiV4dV4d", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvftintrp_w_d, "V8SiV4dV4d", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvftintrm_w_d, "V8SiV4dV4d", "nc", "lasx") +- +-TARGET_BUILTIN(__builtin_lasx_xvftint_w_d, "V8SiV4dV4d", "nc", "lasx") +- +-TARGET_BUILTIN(__builtin_lasx_xvftintrnel_l_s, "V4LLiV8f", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvftintrneh_l_s, "V4LLiV8f", "nc", "lasx") +- +-TARGET_BUILTIN(__builtin_lasx_xvftintrzl_l_s, "V4LLiV8f", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvftintrzh_l_s, "V4LLiV8f", "nc", "lasx") +- +-TARGET_BUILTIN(__builtin_lasx_xvftintrpl_l_s, "V4LLiV8f", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvftintrph_l_s, "V4LLiV8f", "nc", "lasx") +- +-TARGET_BUILTIN(__builtin_lasx_xvftintrml_l_s, "V4LLiV8f", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvftintrmh_l_s, "V4LLiV8f", "nc", "lasx") +- +-TARGET_BUILTIN(__builtin_lasx_xvftintl_l_s, "V4LLiV8f", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvftinth_l_s, "V4LLiV8f", "nc", "lasx") +- +-TARGET_BUILTIN(__builtin_lasx_xvffint_s_w, "V8fV8Si", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvffint_d_l, "V4dV4SLLi", "nc", "lasx") +- +-TARGET_BUILTIN(__builtin_lasx_xvffint_s_wu, "V8fV8Ui", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvffint_d_lu, "V4dV4ULLi", "nc", "lasx") +- +-TARGET_BUILTIN(__builtin_lasx_xvffintl_d_w, "V4dV8Si", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvffinth_d_w, "V4dV8Si", "nc", "lasx") +- +-TARGET_BUILTIN(__builtin_lasx_xvffint_s_l, "V8fV4LLiV4LLi", "nc", "lasx") +- +-TARGET_BUILTIN(__builtin_lasx_xvseq_b, "V32ScV32ScV32Sc", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvseq_h, "V16SsV16SsV16Ss", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvseq_w, "V8SiV8SiV8Si", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvseq_d, "V4SLLiV4SLLiV4SLLi", "nc", "lasx") +- +-TARGET_BUILTIN(__builtin_lasx_xvseqi_b, "V32ScV32ScISi", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvseqi_h, "V16SsV16SsISi", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvseqi_w, "V8SiV8SiISi", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvseqi_d, "V4SLLiV4SLLiISi", "nc", "lasx") +- +-TARGET_BUILTIN(__builtin_lasx_xvsle_b, "V32ScV32ScV32Sc", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvsle_h, "V16SsV16SsV16Ss", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvsle_w, "V8SiV8SiV8Si", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvsle_d, "V4SLLiV4SLLiV4SLLi", "nc", "lasx") +- +-TARGET_BUILTIN(__builtin_lasx_xvslei_b, "V32ScV32ScISi", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvslei_h, "V16SsV16SsISi", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvslei_w, "V8SiV8SiISi", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvslei_d, "V4SLLiV4SLLiISi", "nc", "lasx") +- +-TARGET_BUILTIN(__builtin_lasx_xvsle_bu, "V32ScV32UcV32Uc", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvsle_hu, "V16SsV16UsV16Us", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvsle_wu, "V8SiV8UiV8Ui", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvsle_du, "V4SLLiV4ULLiV4ULLi", "nc", "lasx") +- +-TARGET_BUILTIN(__builtin_lasx_xvslei_bu, "V32ScV32UcIUi", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvslei_hu, "V16SsV16UsIUi", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvslei_wu, "V8SiV8UiIUi", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvslei_du, "V4SLLiV4ULLiIUi", "nc", "lasx") +- +-TARGET_BUILTIN(__builtin_lasx_xvslt_b, "V32ScV32ScV32Sc", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvslt_h, "V16SsV16SsV16Ss", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvslt_w, "V8SiV8SiV8Si", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvslt_d, "V4SLLiV4SLLiV4SLLi", "nc", "lasx") +- +-TARGET_BUILTIN(__builtin_lasx_xvslti_b, "V32ScV32ScISi", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvslti_h, "V16SsV16SsISi", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvslti_w, "V8SiV8SiISi", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvslti_d, "V4SLLiV4SLLiISi", "nc", "lasx") +- +-TARGET_BUILTIN(__builtin_lasx_xvslt_bu, "V32ScV32UcV32Uc", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvslt_hu, "V16SsV16UsV16Us", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvslt_wu, "V8SiV8UiV8Ui", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvslt_du, "V4SLLiV4ULLiV4ULLi", "nc", "lasx") +- +-TARGET_BUILTIN(__builtin_lasx_xvslti_bu, "V32ScV32UcIUi", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvslti_hu, "V16SsV16UsIUi", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvslti_wu, "V8SiV8UiIUi", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvslti_du, "V4SLLiV4ULLiIUi", "nc", "lasx") +- +-TARGET_BUILTIN(__builtin_lasx_xvfcmp_caf_s, "V8SiV8fV8f", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvfcmp_caf_d, "V4SLLiV4dV4d", "nc", "lasx") +- +-TARGET_BUILTIN(__builtin_lasx_xvfcmp_cun_s, "V8SiV8fV8f", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvfcmp_cun_d, "V4SLLiV4dV4d", "nc", "lasx") +- +-TARGET_BUILTIN(__builtin_lasx_xvfcmp_ceq_s, "V8SiV8fV8f", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvfcmp_ceq_d, "V4SLLiV4dV4d", "nc", "lasx") +- +-TARGET_BUILTIN(__builtin_lasx_xvfcmp_cueq_s, "V8SiV8fV8f", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvfcmp_cueq_d, "V4SLLiV4dV4d", "nc", "lasx") +- +-TARGET_BUILTIN(__builtin_lasx_xvfcmp_clt_s, "V8SiV8fV8f", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvfcmp_clt_d, "V4SLLiV4dV4d", "nc", "lasx") +- +-TARGET_BUILTIN(__builtin_lasx_xvfcmp_cult_s, "V8SiV8fV8f", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvfcmp_cult_d, "V4SLLiV4dV4d", "nc", "lasx") +- +-TARGET_BUILTIN(__builtin_lasx_xvfcmp_cle_s, "V8SiV8fV8f", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvfcmp_cle_d, "V4SLLiV4dV4d", "nc", "lasx") +- +-TARGET_BUILTIN(__builtin_lasx_xvfcmp_cule_s, "V8SiV8fV8f", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvfcmp_cule_d, "V4SLLiV4dV4d", "nc", "lasx") +- +-TARGET_BUILTIN(__builtin_lasx_xvfcmp_cne_s, "V8SiV8fV8f", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvfcmp_cne_d, "V4SLLiV4dV4d", "nc", "lasx") +- +-TARGET_BUILTIN(__builtin_lasx_xvfcmp_cor_s, "V8SiV8fV8f", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvfcmp_cor_d, "V4SLLiV4dV4d", "nc", "lasx") +- +-TARGET_BUILTIN(__builtin_lasx_xvfcmp_cune_s, "V8SiV8fV8f", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvfcmp_cune_d, "V4SLLiV4dV4d", "nc", "lasx") +- +-TARGET_BUILTIN(__builtin_lasx_xvfcmp_saf_s, "V8SiV8fV8f", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvfcmp_saf_d, "V4SLLiV4dV4d", "nc", "lasx") +- +-TARGET_BUILTIN(__builtin_lasx_xvfcmp_sun_s, "V8SiV8fV8f", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvfcmp_sun_d, "V4SLLiV4dV4d", "nc", "lasx") +- +-TARGET_BUILTIN(__builtin_lasx_xvfcmp_seq_s, "V8SiV8fV8f", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvfcmp_seq_d, "V4SLLiV4dV4d", "nc", "lasx") +- +-TARGET_BUILTIN(__builtin_lasx_xvfcmp_sueq_s, "V8SiV8fV8f", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvfcmp_sueq_d, "V4SLLiV4dV4d", "nc", "lasx") +- +-TARGET_BUILTIN(__builtin_lasx_xvfcmp_slt_s, "V8SiV8fV8f", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvfcmp_slt_d, "V4SLLiV4dV4d", "nc", "lasx") +- +-TARGET_BUILTIN(__builtin_lasx_xvfcmp_sult_s, "V8SiV8fV8f", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvfcmp_sult_d, "V4SLLiV4dV4d", "nc", "lasx") +- +-TARGET_BUILTIN(__builtin_lasx_xvfcmp_sle_s, "V8SiV8fV8f", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvfcmp_sle_d, "V4SLLiV4dV4d", "nc", "lasx") +- +-TARGET_BUILTIN(__builtin_lasx_xvfcmp_sule_s, "V8SiV8fV8f", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvfcmp_sule_d, "V4SLLiV4dV4d", "nc", "lasx") +- +-TARGET_BUILTIN(__builtin_lasx_xvfcmp_sne_s, "V8SiV8fV8f", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvfcmp_sne_d, "V4SLLiV4dV4d", "nc", "lasx") +- +-TARGET_BUILTIN(__builtin_lasx_xvfcmp_sor_s, "V8SiV8fV8f", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvfcmp_sor_d, "V4SLLiV4dV4d", "nc", "lasx") +- +-TARGET_BUILTIN(__builtin_lasx_xvfcmp_sune_s, "V8SiV8fV8f", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvfcmp_sune_d, "V4SLLiV4dV4d", "nc", "lasx") +- +-TARGET_BUILTIN(__builtin_lasx_xvbitsel_v, "V32UcV32UcV32UcV32Uc", "nc", "lasx") +- +-TARGET_BUILTIN(__builtin_lasx_xvbitseli_b, "V32UcV32UcV32UcIUi", "nc", "lasx") +- +-TARGET_BUILTIN(__builtin_lasx_xvreplgr2vr_b, "V32Sci", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvreplgr2vr_h, "V16Ssi", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvreplgr2vr_w, "V8Sii", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvreplgr2vr_d, "V4SLLiLLi", "nc", "lasx") +- +-TARGET_BUILTIN(__builtin_lasx_xvinsgr2vr_w, "V8SiV8SiiIUi", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvinsgr2vr_d, "V4SLLiV4SLLiLLiIUi", "nc", "lasx") +- +-TARGET_BUILTIN(__builtin_lasx_xvpickve2gr_w, "iV8SiIUi", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvpickve2gr_d, "LLiV4SLLiIUi", "nc", "lasx") +- +-TARGET_BUILTIN(__builtin_lasx_xvpickve2gr_wu, "iV8UiIUi", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvpickve2gr_du, "LLiV4ULLiIUi", "nc", "lasx") +- +-TARGET_BUILTIN(__builtin_lasx_xvreplve_b, "V32cV32cUi", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvreplve_h, "V16sV16sUi", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvreplve_w, "V8iV8iUi", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvreplve_d, "V4LLiV4LLiUi", "nc", "lasx") +- +-TARGET_BUILTIN(__builtin_lasx_xvrepl128vei_b, "V32cV32cIUi", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvrepl128vei_h, "V16sV16sIUi", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvrepl128vei_w, "V8iV8iIUi", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvrepl128vei_d, "V4LLiV4LLiIUi", "nc", "lasx") +- +-TARGET_BUILTIN(__builtin_lasx_xvreplve0_b, "V32ScV32Sc", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvreplve0_h, "V16SsV16Ss", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvreplve0_w, "V8SiV8Si", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvreplve0_d, "V4SLLiV4SLLi", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvreplve0_q, "V32ScV32Sc", "nc", "lasx") +- +-TARGET_BUILTIN(__builtin_lasx_xvinsve0_w, "V8iV8iV8iIUi", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvinsve0_d, "V4LLiV4LLiV4LLiIUi", "nc", "lasx") +- +-TARGET_BUILTIN(__builtin_lasx_xvpickve_w, "V8iV8iIUi", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvpickve_d, "V4LLiV4LLiIUi", "nc", "lasx") +- +-TARGET_BUILTIN(__builtin_lasx_xvpickve_w_f, "V8fV8fIUi", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvpickve_d_f, "V4dV4dIUi", "nc", "lasx") +- +-TARGET_BUILTIN(__builtin_lasx_xvbsll_v, "V32cV32cIUi", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvbsrl_v, "V32cV32cIUi", "nc", "lasx") +- +-TARGET_BUILTIN(__builtin_lasx_xvpackev_b, "V32cV32cV32c", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvpackev_h, "V16sV16sV16s", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvpackev_w, "V8iV8iV8i", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvpackev_d, "V4LLiV4LLiV4LLi", "nc", "lasx") +- +-TARGET_BUILTIN(__builtin_lasx_xvpackod_b, "V32cV32cV32c", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvpackod_h, "V16sV16sV16s", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvpackod_w, "V8iV8iV8i", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvpackod_d, "V4LLiV4LLiV4LLi", "nc", "lasx") +- +-TARGET_BUILTIN(__builtin_lasx_xvpickev_b, "V32cV32cV32c", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvpickev_h, "V16sV16sV16s", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvpickev_w, "V8iV8iV8i", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvpickev_d, "V4LLiV4LLiV4LLi", "nc", "lasx") +- +-TARGET_BUILTIN(__builtin_lasx_xvpickod_b, "V32cV32cV32c", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvpickod_h, "V16sV16sV16s", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvpickod_w, "V8iV8iV8i", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvpickod_d, "V4LLiV4LLiV4LLi", "nc", "lasx") +- +-TARGET_BUILTIN(__builtin_lasx_xvilvl_b, "V32cV32cV32c", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvilvl_h, "V16sV16sV16s", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvilvl_w, "V8iV8iV8i", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvilvl_d, "V4LLiV4LLiV4LLi", "nc", "lasx") +- +-TARGET_BUILTIN(__builtin_lasx_xvilvh_b, "V32cV32cV32c", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvilvh_h, "V16sV16sV16s", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvilvh_w, "V8iV8iV8i", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvilvh_d, "V4LLiV4LLiV4LLi", "nc", "lasx") +- +-TARGET_BUILTIN(__builtin_lasx_xvshuf_b, "V32UcV32UcV32UcV32Uc", "nc", "lasx") +- +-TARGET_BUILTIN(__builtin_lasx_xvshuf_h, "V16sV16sV16sV16s", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvshuf_w, "V8iV8iV8iV8i", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvshuf_d, "V4LLiV4LLiV4LLiV4LLi", "nc", "lasx") +- +-TARGET_BUILTIN(__builtin_lasx_xvperm_w, "V8iV8iV8i", "nc", "lasx") +- +-TARGET_BUILTIN(__builtin_lasx_xvshuf4i_b, "V32cV32cIUi", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvshuf4i_h, "V16sV16sIUi", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvshuf4i_w, "V8iV8iIUi", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvshuf4i_d, "V4LLiV4LLiV4LLiIUi", "nc", "lasx") +- +-TARGET_BUILTIN(__builtin_lasx_xvpermi_w, "V8iV8iV8iIUi", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvpermi_d, "V4LLiV4LLiIUi", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvpermi_q, "V32cV32cV32cIUi", "nc", "lasx") +- +-TARGET_BUILTIN(__builtin_lasx_xvextrins_b, "V32cV32cV32cIUi", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvextrins_h, "V16sV16sV16sIUi", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvextrins_w, "V8iV8iV8iIUi", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvextrins_d, "V4LLiV4LLiV4LLiIUi", "nc", "lasx") +- +-TARGET_BUILTIN(__builtin_lasx_xvld, "V32ScvC*Ii", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvst, "vV32Scv*Ii", "nc", "lasx") +- +-TARGET_BUILTIN(__builtin_lasx_xvldx, "V32ScvC*LLi", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvstx, "vV32Scv*LLi", "nc", "lasx") +- +-TARGET_BUILTIN(__builtin_lasx_xvldrepl_b, "V32cvC*Ii", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvldrepl_h, "V16svC*Ii", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvldrepl_w, "V8ivC*Ii", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvldrepl_d, "V4LLivC*Ii", "nc", "lasx") +- +-TARGET_BUILTIN(__builtin_lasx_xvstelm_b, "vV32Scv*IiUi", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvstelm_h, "vV16Ssv*IiUi", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvstelm_w, "vV8Siv*IiUi", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xvstelm_d, "vV4SLLiv*IiUi", "nc", "lasx") +- +-TARGET_BUILTIN(__builtin_lasx_xbz_v, "iV32Uc", "nc", "lasx") +- +-TARGET_BUILTIN(__builtin_lasx_xbnz_v, "iV32Uc", "nc", "lasx") +- +-TARGET_BUILTIN(__builtin_lasx_xbz_b, "iV32Uc", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xbz_h, "iV16Us", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xbz_w, "iV8Ui", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xbz_d, "iV4ULLi", "nc", "lasx") +- +-TARGET_BUILTIN(__builtin_lasx_xbnz_b, "iV32Uc", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xbnz_h, "iV16Us", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xbnz_w, "iV8Ui", "nc", "lasx") +-TARGET_BUILTIN(__builtin_lasx_xbnz_d, "iV4ULLi", "nc", "lasx") +diff --git a/clang/include/clang/Basic/BuiltinsLoongArchLSX.def b/clang/include/clang/Basic/BuiltinsLoongArchLSX.def +deleted file mode 100644 +index c90f4dc54..000000000 +--- a/clang/include/clang/Basic/BuiltinsLoongArchLSX.def ++++ /dev/null +@@ -1,959 +0,0 @@ +-//=============------------- BuiltinsLoongArchLSX.def --------------- C++ -*-=// +-// +-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +-// See https://llvm.org/LICENSE.txt for license information. +-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +-// +-//===----------------------------------------------------------------------===// +-// +-// This file defines the LoongArch-specific LSX builtin function database. +-// Users of this file must define the BUILTIN macro to make use of this +-// information. +-// +-//===----------------------------------------------------------------------===// +- +-TARGET_BUILTIN(__builtin_lsx_vadd_b, "V16cV16cV16c", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vadd_h, "V8sV8sV8s", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vadd_w, "V4iV4iV4i", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vadd_d, "V2LLiV2LLiV2LLi", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vadd_q, "V2LLiV2LLiV2LLi", "nc", "lsx") +- +-TARGET_BUILTIN(__builtin_lsx_vsub_b, "V16cV16cV16c", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vsub_h, "V8sV8sV8s", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vsub_w, "V4iV4iV4i", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vsub_d, "V2LLiV2LLiV2LLi", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vsub_q, "V2LLiV2LLiV2LLi", "nc", "lsx") +- +-TARGET_BUILTIN(__builtin_lsx_vaddi_bu, "V16cV16cIUi", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vaddi_hu, "V8sV8sIUi", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vaddi_wu, "V4iV4iIUi", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vaddi_du, "V2LLiV2LLiIUi", "nc", "lsx") +- +-TARGET_BUILTIN(__builtin_lsx_vsubi_bu, "V16cV16cIUi", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vsubi_hu, "V8sV8sIUi", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vsubi_wu, "V4iV4iIUi", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vsubi_du, "V2LLiV2LLiIUi", "nc", "lsx") +- +-TARGET_BUILTIN(__builtin_lsx_vneg_b, "V16cV16c", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vneg_h, "V8sV8s", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vneg_w, "V4iV4i", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vneg_d, "V2LLiV2LLi", "nc", "lsx") +- +-TARGET_BUILTIN(__builtin_lsx_vsadd_b, "V16ScV16ScV16Sc", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vsadd_h, "V8SsV8SsV8Ss", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vsadd_w, "V4SiV4SiV4Si", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vsadd_d, "V2SLLiV2SLLiV2SLLi", "nc", "lsx") +- +-TARGET_BUILTIN(__builtin_lsx_vsadd_bu, "V16UcV16UcV16Uc", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vsadd_hu, "V8UsV8UsV8Us", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vsadd_wu, "V4UiV4UiV4Ui", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vsadd_du, "V2ULLiV2ULLiV2ULLi", "nc", "lsx") +- +-TARGET_BUILTIN(__builtin_lsx_vssub_b, "V16ScV16ScV16Sc", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vssub_h, "V8SsV8SsV8Ss", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vssub_w, "V4SiV4SiV4Si", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vssub_d, "V2SLLiV2SLLiV2SLLi", "nc", "lsx") +- +-TARGET_BUILTIN(__builtin_lsx_vssub_bu, "V16UcV16UcV16Uc", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vssub_hu, "V8UsV8UsV8Us", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vssub_wu, "V4UiV4UiV4Ui", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vssub_du, "V2ULLiV2ULLiV2ULLi", "nc", "lsx") +- +-TARGET_BUILTIN(__builtin_lsx_vhaddw_h_b, "V8SsV16ScV16Sc", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vhaddw_w_h, "V4SiV8SsV8Ss", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vhaddw_d_w, "V2SLLiV4SiV4Si", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vhaddw_q_d, "V2LLiV2LLiV2LLi", "nc", "lsx") +- +-TARGET_BUILTIN(__builtin_lsx_vhaddw_hu_bu, "V8UsV16UcV16Uc", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vhaddw_wu_hu, "V4UiV8UsV8Us", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vhaddw_du_wu, "V2ULLiV4UiV4Ui", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vhaddw_qu_du, "V2ULLiV2ULLiV2ULLi", "nc", "lsx") +- +-TARGET_BUILTIN(__builtin_lsx_vhsubw_h_b, "V8SsV16ScV16Sc", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vhsubw_w_h, "V4SiV8SsV8Ss", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vhsubw_d_w, "V2SLLiV4SiV4Si", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vhsubw_q_d, "V2LLiV2LLiV2LLi", "nc", "lsx") +- +-TARGET_BUILTIN(__builtin_lsx_vhsubw_hu_bu, "V8UsV16UcV16Uc", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vhsubw_wu_hu, "V4UiV8UsV8Us", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vhsubw_du_wu, "V2ULLiV4UiV4Ui", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vhsubw_qu_du, "V2ULLiV2ULLiV2ULLi", "nc", "lsx") +- +-TARGET_BUILTIN(__builtin_lsx_vaddwev_h_b, "V8sV16cV16c", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vaddwev_w_h, "V4SiV8sV8s", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vaddwev_d_w, "V2LLiV4SiV4Si", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vaddwev_q_d, "V2LLiV2LLiV2LLi", "nc", "lsx") +- +-TARGET_BUILTIN(__builtin_lsx_vaddwod_h_b, "V8sV16cV16c", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vaddwod_w_h, "V4SiV8sV8s", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vaddwod_d_w, "V2LLiV4SiV4Si", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vaddwod_q_d, "V2LLiV2LLiV2LLi", "nc", "lsx") +- +-TARGET_BUILTIN(__builtin_lsx_vsubwev_h_b, "V8sV16cV16c", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vsubwev_w_h, "V4SiV8sV8s", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vsubwev_d_w, "V2LLiV4SiV4Si", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vsubwev_q_d, "V2LLiV2LLiV2LLi", "nc", "lsx") +- +-TARGET_BUILTIN(__builtin_lsx_vsubwod_h_b, "V8sV16cV16c", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vsubwod_w_h, "V4SiV8sV8s", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vsubwod_d_w, "V2LLiV4SiV4Si", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vsubwod_q_d, "V2LLiV2LLiV2LLi", "nc", "lsx") +- +-TARGET_BUILTIN(__builtin_lsx_vaddwev_h_bu, "V8sV16UcV16Uc", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vaddwev_w_hu, "V4SiV8UsV8Us", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vaddwev_d_wu, "V2LLiV4UiV4Ui", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vaddwev_q_du, "V2LLiV2ULLiV2ULLi", "nc", "lsx") +- +-TARGET_BUILTIN(__builtin_lsx_vaddwod_h_bu, "V8sV16UcV16Uc", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vaddwod_w_hu, "V4SiV8UsV8Us", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vaddwod_d_wu, "V2LLiV4UiV4Ui", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vaddwod_q_du, "V2LLiV2ULLiV2ULLi", "nc", "lsx") +- +-TARGET_BUILTIN(__builtin_lsx_vsubwev_h_bu, "V8sV16UcV16Uc", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vsubwev_w_hu, "V4SiV8UsV8Us", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vsubwev_d_wu, "V2LLiV4UiV4Ui", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vsubwev_q_du, "V2LLiV2ULLiV2ULLi", "nc", "lsx") +- +-TARGET_BUILTIN(__builtin_lsx_vsubwod_h_bu, "V8sV16UcV16Uc", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vsubwod_w_hu, "V4SiV8UsV8Us", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vsubwod_d_wu, "V2LLiV4UiV4Ui", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vsubwod_q_du, "V2LLiV2ULLiV2ULLi", "nc", "lsx") +- +-TARGET_BUILTIN(__builtin_lsx_vaddwev_h_bu_b, "V8sV16UcV16c", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vaddwev_w_hu_h, "V4SiV8UsV8s", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vaddwev_d_wu_w, "V2LLiV4UiV4Si", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vaddwev_q_du_d, "V2LLiV2ULLiV2LLi", "nc", "lsx") +- +-TARGET_BUILTIN(__builtin_lsx_vaddwod_h_bu_b, "V8sV16UcV16c", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vaddwod_w_hu_h, "V4SiV8UsV8s", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vaddwod_d_wu_w, "V2LLiV4UiV4Si", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vaddwod_q_du_d, "V2LLiV2ULLiV2LLi", "nc", "lsx") +- +-TARGET_BUILTIN(__builtin_lsx_vavg_b, "V16ScV16ScV16Sc", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vavg_h, "V8SsV8SsV8Ss", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vavg_w, "V4SiV4SiV4Si", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vavg_d, "V2SLLiV2SLLiV2SLLi", "nc", "lsx") +- +-TARGET_BUILTIN(__builtin_lsx_vavg_bu, "V16UcV16UcV16Uc", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vavg_hu, "V8UsV8UsV8Us", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vavg_wu, "V4UiV4UiV4Ui", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vavg_du, "V2ULLiV2ULLiV2ULLi", "nc", "lsx") +- +-TARGET_BUILTIN(__builtin_lsx_vavgr_b, "V16ScV16ScV16Sc", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vavgr_h, "V8SsV8SsV8Ss", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vavgr_w, "V4SiV4SiV4Si", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vavgr_d, "V2SLLiV2SLLiV2SLLi", "nc", "lsx") +- +-TARGET_BUILTIN(__builtin_lsx_vavgr_bu, "V16UcV16UcV16Uc", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vavgr_hu, "V8UsV8UsV8Us", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vavgr_wu, "V4UiV4UiV4Ui", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vavgr_du, "V2ULLiV2ULLiV2ULLi", "nc", "lsx") +- +-TARGET_BUILTIN(__builtin_lsx_vabsd_b, "V16ScV16ScV16Sc", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vabsd_h, "V8SsV8SsV8Ss", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vabsd_w, "V4SiV4SiV4Si", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vabsd_d, "V2SLLiV2SLLiV2SLLi", "nc", "lsx") +- +-TARGET_BUILTIN(__builtin_lsx_vabsd_bu, "V16UcV16UcV16Uc", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vabsd_hu, "V8UsV8UsV8Us", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vabsd_wu, "V4UiV4UiV4Ui", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vabsd_du, "V2ULLiV2ULLiV2ULLi", "nc", "lsx") +- +-TARGET_BUILTIN(__builtin_lsx_vadda_b, "V16ScV16ScV16Sc", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vadda_h, "V8SsV8SsV8Ss", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vadda_w, "V4SiV4SiV4Si", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vadda_d, "V2SLLiV2SLLiV2SLLi", "nc", "lsx") +- +-TARGET_BUILTIN(__builtin_lsx_vmax_b, "V16ScV16ScV16Sc", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vmax_h, "V8SsV8SsV8Ss", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vmax_w, "V4SiV4SiV4Si", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vmax_d, "V2SLLiV2SLLiV2SLLi", "nc", "lsx") +- +-TARGET_BUILTIN(__builtin_lsx_vmaxi_b, "V16ScV16ScIi", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vmaxi_h, "V8SsV8SsIi", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vmaxi_w, "V4SiV4SiIi", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vmaxi_d, "V2SLLiV2SLLiIi", "nc", "lsx") +- +-TARGET_BUILTIN(__builtin_lsx_vmax_bu, "V16UcV16UcV16Uc", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vmax_hu, "V8UsV8UsV8Us", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vmax_wu, "V4UiV4UiV4Ui", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vmax_du, "V2ULLiV2ULLiV2ULLi", "nc", "lsx") +- +-TARGET_BUILTIN(__builtin_lsx_vmaxi_bu, "V16UcV16UcIUi", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vmaxi_hu, "V8UsV8UsIUi", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vmaxi_wu, "V4UiV4UiIUi", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vmaxi_du, "V2ULLiV2ULLiIUi", "nc", "lsx") +- +-TARGET_BUILTIN(__builtin_lsx_vmin_b, "V16ScV16ScV16Sc", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vmin_h, "V8SsV8SsV8Ss", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vmin_w, "V4SiV4SiV4Si", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vmin_d, "V2SLLiV2SLLiV2SLLi", "nc", "lsx") +- +-TARGET_BUILTIN(__builtin_lsx_vmini_b, "V16ScV16ScIi", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vmini_h, "V8SsV8SsIi", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vmini_w, "V4SiV4SiIi", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vmini_d, "V2SLLiV2SLLiIi", "nc", "lsx") +- +-TARGET_BUILTIN(__builtin_lsx_vmin_bu, "V16UcV16UcV16Uc", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vmin_hu, "V8UsV8UsV8Us", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vmin_wu, "V4UiV4UiV4Ui", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vmin_du, "V2ULLiV2ULLiV2ULLi", "nc", "lsx") +- +-TARGET_BUILTIN(__builtin_lsx_vmini_bu, "V16UcV16UcIUi", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vmini_hu, "V8UsV8UsIUi", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vmini_wu, "V4UiV4UiIUi", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vmini_du, "V2ULLiV2ULLiIUi", "nc", "lsx") +- +-TARGET_BUILTIN(__builtin_lsx_vmul_b, "V16ScV16ScV16Sc", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vmul_h, "V8SsV8SsV8Ss", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vmul_w, "V4SiV4SiV4Si", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vmul_d, "V2SLLiV2SLLiV2SLLi", "nc", "lsx") +- +-TARGET_BUILTIN(__builtin_lsx_vmuh_b, "V16cV16cV16c", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vmuh_h, "V8sV8sV8s", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vmuh_w, "V4iV4iV4i", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vmuh_d, "V2LLiV2LLiV2LLi", "nc", "lsx") +- +-TARGET_BUILTIN(__builtin_lsx_vmuh_bu, "V16UcV16UcV16Uc", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vmuh_hu, "V8UsV8UsV8Us", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vmuh_wu, "V4UiV4UiV4Ui", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vmuh_du, "V2ULLiV2ULLiV2ULLi", "nc", "lsx") +- +-TARGET_BUILTIN(__builtin_lsx_vmulwev_h_b, "V8sV16cV16c", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vmulwev_w_h, "V4SiV8sV8s", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vmulwev_d_w, "V2LLiV4SiV4Si", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vmulwev_q_d, "V2LLiV2LLiV2LLi", "nc", "lsx") +- +-TARGET_BUILTIN(__builtin_lsx_vmulwod_h_b, "V8sV16cV16c", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vmulwod_w_h, "V4SiV8sV8s", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vmulwod_d_w, "V2LLiV4SiV4Si", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vmulwod_q_d, "V2LLiV2LLiV2LLi", "nc", "lsx") +- +-TARGET_BUILTIN(__builtin_lsx_vmulwev_h_bu, "V8sV16UcV16Uc", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vmulwev_w_hu, "V4SiV8UsV8Us", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vmulwev_d_wu, "V2LLiV4UiV4Ui", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vmulwev_q_du, "V2LLiV2ULLiV2ULLi", "nc", "lsx") +- +-TARGET_BUILTIN(__builtin_lsx_vmulwod_h_bu, "V8sV16UcV16Uc", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vmulwod_w_hu, "V4SiV8UsV8Us", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vmulwod_d_wu, "V2LLiV4UiV4Ui", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vmulwod_q_du, "V2LLiV2ULLiV2ULLi", "nc", "lsx") +- +-TARGET_BUILTIN(__builtin_lsx_vmulwev_h_bu_b, "V8sV16UcV16c", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vmulwev_w_hu_h, "V4SiV8UsV8s", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vmulwev_d_wu_w, "V2LLiV4UiV4Si", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vmulwev_q_du_d, "V2LLiV2ULLiV2LLi", "nc", "lsx") +- +-TARGET_BUILTIN(__builtin_lsx_vmulwod_h_bu_b, "V8sV16UcV16c", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vmulwod_w_hu_h, "V4SiV8UsV8s", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vmulwod_d_wu_w, "V2LLiV4UiV4Si", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vmulwod_q_du_d, "V2LLiV2ULLiV2LLi", "nc", "lsx") +- +-TARGET_BUILTIN(__builtin_lsx_vmadd_b, "V16ScV16ScV16ScV16Sc", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vmadd_h, "V8SsV8SsV8SsV8Ss", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vmadd_w, "V4SiV4SiV4SiV4Si", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vmadd_d, "V2SLLiV2SLLiV2SLLiV2SLLi", "nc", "lsx") +- +-TARGET_BUILTIN(__builtin_lsx_vmsub_b, "V16ScV16ScV16ScV16Sc", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vmsub_h, "V8SsV8SsV8SsV8Ss", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vmsub_w, "V4SiV4SiV4SiV4Si", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vmsub_d, "V2SLLiV2SLLiV2SLLiV2SLLi", "nc", "lsx") +- +-TARGET_BUILTIN(__builtin_lsx_vmaddwev_h_b, "V8sV8sV16cV16c", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vmaddwev_w_h, "V4SiV4SiV8sV8s", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vmaddwev_d_w, "V2LLiV2LLiV4SiV4Si", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vmaddwev_q_d, "V2LLiV2LLiV2LLiV2LLi", "nc", "lsx") +- +-TARGET_BUILTIN(__builtin_lsx_vmaddwod_h_b, "V8sV8sV16cV16c", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vmaddwod_w_h, "V4SiV4SiV8sV8s", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vmaddwod_d_w, "V2LLiV2LLiV4SiV4Si", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vmaddwod_q_d, "V2LLiV2LLiV2LLiV2LLi", "nc", "lsx") +- +-TARGET_BUILTIN(__builtin_lsx_vmaddwev_h_bu, "V8UsV8UsV16UcV16Uc", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vmaddwev_w_hu, "V4UiV4UiV8UsV8Us", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vmaddwev_d_wu, "V2ULLiV2ULLiV4UiV4Ui", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vmaddwev_q_du, "V2ULLiV2ULLiV2ULLiV2ULLi", "nc", "lsx") +- +-TARGET_BUILTIN(__builtin_lsx_vmaddwod_h_bu, "V8UsV8UsV16UcV16Uc", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vmaddwod_w_hu, "V4UiV4UiV8UsV8Us", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vmaddwod_d_wu, "V2ULLiV2ULLiV4UiV4Ui", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vmaddwod_q_du, "V2ULLiV2ULLiV2ULLiV2ULLi", "nc", "lsx") +- +-TARGET_BUILTIN(__builtin_lsx_vmaddwev_h_bu_b, "V8sV8sV16UcV16c", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vmaddwev_w_hu_h, "V4SiV4SiV8UsV8s", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vmaddwev_d_wu_w, "V2LLiV2LLiV4UiV4Si", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vmaddwev_q_du_d, "V2LLiV2LLiV2ULLiV2LLi", "nc", "lsx") +- +-TARGET_BUILTIN(__builtin_lsx_vmaddwod_h_bu_b, "V8sV8sV16UcV16c", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vmaddwod_w_hu_h, "V4SiV4SiV8UsV8s", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vmaddwod_d_wu_w, "V2LLiV2LLiV4UiV4Si", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vmaddwod_q_du_d, "V2LLiV2LLiV2ULLiV2LLi", "nc", "lsx") +- +-TARGET_BUILTIN(__builtin_lsx_vdiv_b, "V16ScV16ScV16Sc", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vdiv_h, "V8SsV8SsV8Ss", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vdiv_w, "V4SiV4SiV4Si", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vdiv_d, "V2SLLiV2SLLiV2SLLi", "nc", "lsx") +- +-TARGET_BUILTIN(__builtin_lsx_vdiv_bu, "V16UcV16UcV16Uc", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vdiv_hu, "V8UsV8UsV8Us", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vdiv_wu, "V4UiV4UiV4Ui", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vdiv_du, "V2ULLiV2ULLiV2ULLi", "nc", "lsx") +- +-TARGET_BUILTIN(__builtin_lsx_vmod_b, "V16ScV16ScV16Sc", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vmod_h, "V8SsV8SsV8Ss", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vmod_w, "V4SiV4SiV4Si", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vmod_d, "V2SLLiV2SLLiV2SLLi", "nc", "lsx") +- +- +-TARGET_BUILTIN(__builtin_lsx_vmod_bu, "V16UcV16UcV16Uc", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vmod_hu, "V8UsV8UsV8Us", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vmod_wu, "V4UiV4UiV4Ui", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vmod_du, "V2ULLiV2ULLiV2ULLi", "nc", "lsx") +- +-TARGET_BUILTIN(__builtin_lsx_vsat_b, "V16ScV16ScIUi", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vsat_h, "V8SsV8SsIUi", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vsat_w, "V4SiV4SiIUi", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vsat_d, "V2SLLiV2SLLiIUi", "nc", "lsx") +- +-TARGET_BUILTIN(__builtin_lsx_vsat_bu, "V16UcV16UcIUi", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vsat_hu, "V8UsV8UsIUi", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vsat_wu, "V4UiV4UiIUi", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vsat_du, "V2ULLiV2ULLiIUi", "nc", "lsx") +- +-TARGET_BUILTIN(__builtin_lsx_vexth_h_b, "V8sV16c", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vexth_w_h, "V4SiV8s", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vexth_d_w, "V2LLiV4Si", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vexth_q_d, "V2LLiV2LLi", "nc", "lsx") +- +-TARGET_BUILTIN(__builtin_lsx_vexth_hu_bu, "V8UsV16Uc", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vexth_wu_hu, "V4UiV8Us", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vexth_du_wu, "V2ULLiV4Ui", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vexth_qu_du, "V2ULLiV2ULLi", "nc", "lsx") +- +-TARGET_BUILTIN(__builtin_lsx_vsigncov_b, "V16ScV16ScV16Sc", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vsigncov_h, "V8SsV8SsV8Ss", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vsigncov_w, "V4SiV4SiV4Si", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vsigncov_d, "V2SLLiV2SLLiV2SLLi", "nc", "lsx") +- +-TARGET_BUILTIN(__builtin_lsx_vmskltz_b, "V16cV16c", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vmskltz_h, "V8sV8s", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vmskltz_w, "V4iV4i", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vmskltz_d, "V2LLiV2LLi", "nc", "lsx") +- +-TARGET_BUILTIN(__builtin_lsx_vmskgez_b, "V16cV16c", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vmsknz_b, "V8sV8s", "nc", "lsx") +- +-TARGET_BUILTIN(__builtin_lsx_vldi, "V2LLiIi", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vrepli_b, "V16cIi", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vrepli_h, "V8sIi", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vrepli_w, "V4iIi", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vrepli_d, "V2LLiIi", "nc", "lsx") +- +- +-TARGET_BUILTIN(__builtin_lsx_vand_v, "V16UcV16UcV16Uc", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vor_v, "V16UcV16UcV16Uc", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vxor_v, "V16cV16cV16c", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vnor_v, "V16UcV16UcV16Uc", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vandn_v, "V16UcV16UcV16Uc", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vorn_v, "V16ScV16ScV16Sc", "nc", "lsx") +- +-TARGET_BUILTIN(__builtin_lsx_vandi_b, "V16UcV16UcIUi", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vori_b, "V16UcV16UcIUi", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vxori_b, "V16UcV16UcIUi", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vnori_b, "V16UcV16UcIUi", "nc", "lsx") +- +-TARGET_BUILTIN(__builtin_lsx_vsll_b, "V16cV16cV16c", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vsll_h, "V8sV8sV8s", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vsll_w, "V4iV4iV4i", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vsll_d, "V2LLiV2LLiV2LLi", "nc", "lsx") +- +-TARGET_BUILTIN(__builtin_lsx_vslli_b, "V16cV16cIUi", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vslli_h, "V8sV8sIUi", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vslli_w, "V4iV4iIUi", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vslli_d, "V2LLiV2LLiIUi", "nc", "lsx") +- +-TARGET_BUILTIN(__builtin_lsx_vsrl_b, "V16cV16cV16c", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vsrl_h, "V8sV8sV8s", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vsrl_w, "V4iV4iV4i", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vsrl_d, "V2LLiV2LLiV2LLi", "nc", "lsx") +- +-TARGET_BUILTIN(__builtin_lsx_vsrli_b, "V16cV16cIUi", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vsrli_h, "V8sV8sIUi", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vsrli_w, "V4iV4iIUi", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vsrli_d, "V2LLiV2LLiIUi", "nc", "lsx") +- +-TARGET_BUILTIN(__builtin_lsx_vsra_b, "V16cV16cV16c", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vsra_h, "V8sV8sV8s", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vsra_w, "V4iV4iV4i", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vsra_d, "V2LLiV2LLiV2LLi", "nc", "lsx") +- +-TARGET_BUILTIN(__builtin_lsx_vsrai_b, "V16cV16cIUi", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vsrai_h, "V8sV8sIUi", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vsrai_w, "V4iV4iIUi", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vsrai_d, "V2LLiV2LLiIUi", "nc", "lsx") +- +-TARGET_BUILTIN(__builtin_lsx_vrotr_b, "V16cV16cV16c", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vrotr_h, "V8sV8sV8s", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vrotr_w, "V4iV4iV4i", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vrotr_d, "V2LLiV2LLiV2LLi", "nc", "lsx") +- +-TARGET_BUILTIN(__builtin_lsx_vrotri_b, "V16cV16cIUi", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vrotri_h, "V8sV8sIUi", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vrotri_w, "V4iV4iIUi", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vrotri_d, "V2LLiV2LLiIUi", "nc", "lsx") +- +-TARGET_BUILTIN(__builtin_lsx_vsllwil_h_b, "V8sV16cIUi", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vsllwil_w_h, "V4SiV8sIUi", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vsllwil_d_w, "V2LLiV4SiIUi", "nc", "lsx") +- +-TARGET_BUILTIN(__builtin_lsx_vextl_q_d, "V2LLiV2LLi", "nc", "lsx") +- +-TARGET_BUILTIN(__builtin_lsx_vsllwil_hu_bu, "V8UsV16UcIUi", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vsllwil_wu_hu, "V4UiV8UsIUi", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vsllwil_du_wu, "V2ULLiV4UiIUi", "nc", "lsx") +- +-TARGET_BUILTIN(__builtin_lsx_vextl_qu_du, "V2LLiV2ULLi", "nc", "lsx") +- +-TARGET_BUILTIN(__builtin_lsx_vsrlr_b, "V16cV16cV16c", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vsrlr_h, "V8sV8sV8s", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vsrlr_w, "V4iV4iV4i", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vsrlr_d, "V2LLiV2LLiV2LLi", "nc", "lsx") +- +-TARGET_BUILTIN(__builtin_lsx_vsrlri_b, "V16cV16cIUi", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vsrlri_h, "V8sV8sIUi", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vsrlri_w, "V4iV4iIUi", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vsrlri_d, "V2LLiV2LLiIUi", "nc", "lsx") +- +-TARGET_BUILTIN(__builtin_lsx_vsrar_b, "V16cV16cV16c", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vsrar_h, "V8sV8sV8s", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vsrar_w, "V4iV4iV4i", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vsrar_d, "V2LLiV2LLiV2LLi", "nc", "lsx") +- +-TARGET_BUILTIN(__builtin_lsx_vsrari_b, "V16cV16cIUi", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vsrari_h, "V8sV8sIUi", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vsrari_w, "V4iV4iIUi", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vsrari_d, "V2LLiV2LLiIUi", "nc", "lsx") +- +-TARGET_BUILTIN(__builtin_lsx_vsrln_b_h, "V16ScV8sV8s", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vsrln_h_w, "V8sV4SiV4Si", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vsrln_w_d, "V4SiV2LLiV2LLi", "nc", "lsx") +- +-TARGET_BUILTIN(__builtin_lsx_vsran_b_h, "V16ScV8sV8s", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vsran_h_w, "V8sV4SiV4Si", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vsran_w_d, "V4SiV2LLiV2LLi", "nc", "lsx") +- +-TARGET_BUILTIN(__builtin_lsx_vsrlni_b_h, "V16cV16cV16cIUi", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vsrlni_h_w, "V8sV8sV8sIUi", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vsrlni_w_d, "V4iV4iV4iIUi", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vsrlni_d_q, "V2LLiV2LLiV2LLiIUi", "nc", "lsx") +- +-TARGET_BUILTIN(__builtin_lsx_vsrani_b_h, "V16cV16cV16cIUi", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vsrani_h_w, "V8sV8sV8sIUi", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vsrani_w_d, "V4iV4iV4iIUi", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vsrani_d_q, "V2LLiV2LLiV2LLiIUi", "nc", "lsx") +- +-TARGET_BUILTIN(__builtin_lsx_vsrlrn_b_h, "V16ScV8sV8s", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vsrlrn_h_w, "V8sV4SiV4Si", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vsrlrn_w_d, "V4SiV2LLiV2LLi", "nc", "lsx") +- +-TARGET_BUILTIN(__builtin_lsx_vsrarn_b_h, "V16ScV8sV8s", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vsrarn_h_w, "V8sV4SiV4Si", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vsrarn_w_d, "V4SiV2LLiV2LLi", "nc", "lsx") +- +-TARGET_BUILTIN(__builtin_lsx_vsrlrni_b_h, "V16cV16cV16cIUi", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vsrlrni_h_w, "V8sV8sV8sIUi", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vsrlrni_w_d, "V4iV4iV4iIUi", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vsrlrni_d_q, "V2LLiV2LLiV2LLiIUi", "nc", "lsx") +- +-TARGET_BUILTIN(__builtin_lsx_vsrarni_b_h, "V16cV16cV16cIUi", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vsrarni_h_w, "V8sV8sV8sIUi", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vsrarni_w_d, "V4iV4iV4iIUi", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vsrarni_d_q, "V2LLiV2LLiV2LLiIUi", "nc", "lsx") +- +-TARGET_BUILTIN(__builtin_lsx_vssrln_b_h, "V16ScV8sV8s", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vssrln_h_w, "V8sV4SiV4Si", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vssrln_w_d, "V4SiV2LLiV2LLi", "nc", "lsx") +- +-TARGET_BUILTIN(__builtin_lsx_vssran_b_h, "V16ScV8sV8s", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vssran_h_w, "V8sV4SiV4Si", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vssran_w_d, "V4SiV2LLiV2LLi", "nc", "lsx") +- +-TARGET_BUILTIN(__builtin_lsx_vssrln_bu_h, "V16UcV8UsV8Us", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vssrln_hu_w, "V8UsV4UiV4Ui", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vssrln_wu_d, "V4UiV2ULLiV2ULLi", "nc", "lsx") +- +-TARGET_BUILTIN(__builtin_lsx_vssran_bu_h, "V16UcV8UsV8Us", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vssran_hu_w, "V8UsV4UiV4Ui", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vssran_wu_d, "V4UiV2ULLiV2ULLi", "nc", "lsx") +- +-TARGET_BUILTIN(__builtin_lsx_vssrlni_b_h, "V16cV16cV16cIUi", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vssrlni_h_w, "V8sV8sV8sIUi", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vssrlni_w_d, "V4iV4iV4iIUi", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vssrlni_d_q, "V2LLiV2LLiV2LLiIUi", "nc", "lsx") +- +-TARGET_BUILTIN(__builtin_lsx_vssrani_b_h, "V16cV16cV16cIUi", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vssrani_h_w, "V8sV8sV8sIUi", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vssrani_w_d, "V4iV4iV4iIUi", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vssrani_d_q, "V2LLiV2LLiV2LLiIUi", "nc", "lsx") +- +-TARGET_BUILTIN(__builtin_lsx_vssrlrni_bu_h, "V16cV16cV16cIUi", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vssrlrni_hu_w, "V8sV8sV8sIUi", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vssrlrni_wu_d, "V4iV4iV4iIUi", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vssrlrni_du_q, "V2LLiV2LLiV2LLiIUi", "nc", "lsx") +- +-TARGET_BUILTIN(__builtin_lsx_vssrani_bu_h, "V16cV16cV16cIUi", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vssrani_hu_w, "V8sV8sV8sIUi", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vssrani_wu_d, "V4iV4iV4iIUi", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vssrani_du_q, "V2LLiV2LLiV2LLiIUi", "nc", "lsx") +- +-TARGET_BUILTIN(__builtin_lsx_vssrlrn_b_h, "V16ScV8sV8s", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vssrlrn_h_w, "V8sV4SiV4Si", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vssrlrn_w_d, "V4SiV2LLiV2LLi", "nc", "lsx") +- +-TARGET_BUILTIN(__builtin_lsx_vssrarn_b_h, "V16ScV8sV8s", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vssrarn_h_w, "V8sV4SiV4Si", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vssrarn_w_d, "V4SiV2LLiV2LLi", "nc", "lsx") +- +-TARGET_BUILTIN(__builtin_lsx_vssrlrn_bu_h, "V16UcV8UsV8Us", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vssrlrn_hu_w, "V8UsV4UiV4Ui", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vssrlrn_wu_d, "V4UiV2ULLiV2ULLi", "nc", "lsx") +- +-TARGET_BUILTIN(__builtin_lsx_vssrarn_bu_h, "V16UcV8UsV8Us", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vssrarn_hu_w, "V8UsV4UiV4Ui", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vssrarn_wu_d, "V4UiV2ULLiV2ULLi", "nc", "lsx") +- +-TARGET_BUILTIN(__builtin_lsx_vssrlrni_b_h, "V16cV16cV16cIUi", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vssrlrni_h_w, "V8sV8sV8sIUi", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vssrlrni_w_d, "V4iV4iV4iIUi", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vssrlrni_d_q, "V2LLiV2LLiV2LLiIUi", "nc", "lsx") +- +-TARGET_BUILTIN(__builtin_lsx_vssrarni_b_h, "V16cV16cV16cIUi", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vssrarni_h_w, "V8sV8sV8sIUi", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vssrarni_w_d, "V4iV4iV4iIUi", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vssrarni_d_q, "V2LLiV2LLiV2LLiIUi", "nc", "lsx") +- +-TARGET_BUILTIN(__builtin_lsx_vssrlni_bu_h, "V16cV16cV16cIUi", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vssrlni_hu_w, "V8sV8sV8sIUi", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vssrlni_wu_d, "V4iV4iV4iIUi", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vssrlni_du_q, "V2LLiV2LLiV2LLiIUi", "nc", "lsx") +- +-TARGET_BUILTIN(__builtin_lsx_vssrarni_bu_h, "V16cV16cV16cIUi", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vssrarni_hu_w, "V8sV8sV8sIUi", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vssrarni_wu_d, "V4iV4iV4iIUi", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vssrarni_du_q, "V2LLiV2LLiV2LLiIUi", "nc", "lsx") +- +-TARGET_BUILTIN(__builtin_lsx_vclo_b, "V16ScV16Sc", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vclo_h, "V8SsV8Ss", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vclo_w, "V4SiV4Si", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vclo_d, "V2SLLiV2SLLi", "nc", "lsx") +- +-TARGET_BUILTIN(__builtin_lsx_vclz_b, "V16ScV16Sc", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vclz_h, "V8SsV8Ss", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vclz_w, "V4SiV4Si", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vclz_d, "V2SLLiV2SLLi", "nc", "lsx") +- +-TARGET_BUILTIN(__builtin_lsx_vpcnt_b, "V16ScV16Sc", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vpcnt_h, "V8SsV8Ss", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vpcnt_w, "V4SiV4Si", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vpcnt_d, "V2SLLiV2SLLi", "nc", "lsx") +- +-TARGET_BUILTIN(__builtin_lsx_vbitclr_b, "V16UcV16UcV16Uc", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vbitclr_h, "V8UsV8UsV8Us", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vbitclr_w, "V4UiV4UiV4Ui", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vbitclr_d, "V2ULLiV2ULLiV2ULLi", "nc", "lsx") +- +-TARGET_BUILTIN(__builtin_lsx_vbitclri_b, "V16UcV16UcIUi", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vbitclri_h, "V8UsV8UsIUi", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vbitclri_w, "V4UiV4UiIUi", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vbitclri_d, "V2ULLiV2ULLiIUi", "nc", "lsx") +- +-TARGET_BUILTIN(__builtin_lsx_vbitset_b, "V16UcV16UcV16Uc", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vbitset_h, "V8UsV8UsV8Us", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vbitset_w, "V4UiV4UiV4Ui", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vbitset_d, "V2ULLiV2ULLiV2ULLi", "nc", "lsx") +- +-TARGET_BUILTIN(__builtin_lsx_vbitseti_b, "V16UcV16UcIUi", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vbitseti_h, "V8UsV8UsIUi", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vbitseti_w, "V4UiV4UiIUi", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vbitseti_d, "V2ULLiV2ULLiIUi", "nc", "lsx") +- +-TARGET_BUILTIN(__builtin_lsx_vbitrev_b, "V16UcV16UcV16Uc", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vbitrev_h, "V8UsV8UsV8Us", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vbitrev_w, "V4UiV4UiV4Ui", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vbitrev_d, "V2ULLiV2ULLiV2ULLi", "nc", "lsx") +- +-TARGET_BUILTIN(__builtin_lsx_vbitrevi_b, "V16UcV16UcIUi", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vbitrevi_h, "V8UsV8UsIUi", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vbitrevi_w, "V4UiV4UiIUi", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vbitrevi_d, "V2ULLiV2ULLiIUi", "nc", "lsx") +- +-TARGET_BUILTIN(__builtin_lsx_vfrstp_b, "V16ScV16ScV16ScV16Sc", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vfrstp_h, "V8SsV8SsV8SsV8Ss", "nc", "lsx") +- +-TARGET_BUILTIN(__builtin_lsx_vfrstpi_b, "V16cV16cV16cIUi", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vfrstpi_h, "V8sV8sV8sIUi", "nc", "lsx") +- +-TARGET_BUILTIN(__builtin_lsx_vfadd_s, "V4fV4fV4f", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vfadd_d, "V2dV2dV2d", "nc", "lsx") +- +-TARGET_BUILTIN(__builtin_lsx_vfsub_s, "V4fV4fV4f", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vfsub_d, "V2dV2dV2d", "nc", "lsx") +- +-TARGET_BUILTIN(__builtin_lsx_vfmul_s, "V4fV4fV4f", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vfmul_d, "V2dV2dV2d", "nc", "lsx") +- +-TARGET_BUILTIN(__builtin_lsx_vfdiv_s, "V4fV4fV4f", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vfdiv_d, "V2dV2dV2d", "nc", "lsx") +- +-TARGET_BUILTIN(__builtin_lsx_vfmadd_s, "V4fV4fV4fV4f", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vfmadd_d, "V2dV2dV2dV2d", "nc", "lsx") +- +-TARGET_BUILTIN(__builtin_lsx_vfmsub_s, "V4fV4fV4fV4f", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vfmsub_d, "V2dV2dV2dV2d", "nc", "lsx") +- +-TARGET_BUILTIN(__builtin_lsx_vfnmadd_s, "V4fV4fV4fV4f", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vfnmadd_d, "V2dV2dV2dV2d", "nc", "lsx") +- +-TARGET_BUILTIN(__builtin_lsx_vfnmsub_s, "V4fV4fV4fV4f", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vfnmsub_d, "V2dV2dV2dV2d", "nc", "lsx") +- +-TARGET_BUILTIN(__builtin_lsx_vfmax_s, "V4fV4fV4f", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vfmax_d, "V2dV2dV2d", "nc", "lsx") +- +-TARGET_BUILTIN(__builtin_lsx_vfmin_s, "V4fV4fV4f", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vfmin_d, "V2dV2dV2d", "nc", "lsx") +- +-TARGET_BUILTIN(__builtin_lsx_vfmaxa_s, "V4fV4fV4f", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vfmaxa_d, "V2dV2dV2d", "nc", "lsx") +- +-TARGET_BUILTIN(__builtin_lsx_vfmina_s, "V4fV4fV4f", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vfmina_d, "V2dV2dV2d", "nc", "lsx") +- +-TARGET_BUILTIN(__builtin_lsx_vflogb_s, "V4fV4f", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vflogb_d, "V2dV2d", "nc", "lsx") +- +-TARGET_BUILTIN(__builtin_lsx_vfclass_s, "V4iV4f", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vfclass_d, "V2LLiV2d", "nc", "lsx") +- +-TARGET_BUILTIN(__builtin_lsx_vfsqrt_s, "V4fV4f", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vfsqrt_d, "V2dV2d", "nc", "lsx") +- +-TARGET_BUILTIN(__builtin_lsx_vfrecip_s, "V4fV4f", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vfrecip_d, "V2dV2d", "nc", "lsx") +- +-TARGET_BUILTIN(__builtin_lsx_vfrecipe_s, "V4fV4f", "nc", "lsx,frecipe") +-TARGET_BUILTIN(__builtin_lsx_vfrecipe_d, "V2dV2d", "nc", "lsx,frecipe") +- +-TARGET_BUILTIN(__builtin_lsx_vfrsqrt_s, "V4fV4f", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vfrsqrt_d, "V2dV2d", "nc", "lsx") +- +-TARGET_BUILTIN(__builtin_lsx_vfrsqrte_s, "V4fV4f", "nc", "lsx,frecipe") +-TARGET_BUILTIN(__builtin_lsx_vfrsqrte_d, "V2dV2d", "nc", "lsx,frecipe") +- +-TARGET_BUILTIN(__builtin_lsx_vfcvtl_s_h, "V4fV8s", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vfcvtl_d_s, "V2dV4f", "nc", "lsx") +- +-TARGET_BUILTIN(__builtin_lsx_vfcvth_s_h, "V4fV8s", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vfcvth_d_s, "V2dV4f", "nc", "lsx") +- +-TARGET_BUILTIN(__builtin_lsx_vfcvt_h_s, "V8sV4fV4f", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vfcvt_s_d, "V4fV2dV2d", "nc", "lsx") +- +-TARGET_BUILTIN(__builtin_lsx_vfrintrne_s, "V4SiV4f", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vfrintrne_d, "V2LLiV2d", "nc", "lsx") +- +-TARGET_BUILTIN(__builtin_lsx_vfrintrz_s, "V4SiV4f", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vfrintrz_d, "V2LLiV2d", "nc", "lsx") +- +-TARGET_BUILTIN(__builtin_lsx_vfrintrp_s, "V4SiV4f", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vfrintrp_d, "V2LLiV2d", "nc", "lsx") +- +-TARGET_BUILTIN(__builtin_lsx_vfrintrm_s, "V4SiV4f", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vfrintrm_d, "V2LLiV2d", "nc", "lsx") +- +-TARGET_BUILTIN(__builtin_lsx_vfrint_s, "V4fV4f", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vfrint_d, "V2dV2d", "nc", "lsx") +- +-TARGET_BUILTIN(__builtin_lsx_vftintrne_w_s, "V4SiV4f", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vftintrne_l_d, "V2LLiV2d", "nc", "lsx") +- +-TARGET_BUILTIN(__builtin_lsx_vftintrz_w_s, "V4SiV4f", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vftintrz_l_d, "V2LLiV2d", "nc", "lsx") +- +-TARGET_BUILTIN(__builtin_lsx_vftintrp_w_s, "V4SiV4f", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vftintrp_l_d, "V2LLiV2d", "nc", "lsx") +- +-TARGET_BUILTIN(__builtin_lsx_vftintrm_w_s, "V4SiV4f", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vftintrm_l_d, "V2LLiV2d", "nc", "lsx") +- +-TARGET_BUILTIN(__builtin_lsx_vftint_w_s, "V4SiV4f", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vftint_l_d, "V2SLLiV2d", "nc", "lsx") +- +-TARGET_BUILTIN(__builtin_lsx_vftintrz_wu_s, "V4UiV4f", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vftintrz_lu_d, "V2ULLiV2d", "nc", "lsx") +- +-TARGET_BUILTIN(__builtin_lsx_vftint_wu_s, "V4UiV4f", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vftint_lu_d, "V2ULLiV2d", "nc", "lsx") +- +-TARGET_BUILTIN(__builtin_lsx_vftintrne_w_d, "V4SiV2dV2d", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vftintrz_w_d, "V4SiV2dV2d", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vftintrp_w_d, "V4SiV2dV2d", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vftintrm_w_d, "V4SiV2dV2d", "nc", "lsx") +- +-TARGET_BUILTIN(__builtin_lsx_vftint_w_d, "V4SiV2dV2d", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vftintrnel_l_s, "V2LLiV4f", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vftintrneh_l_s, "V2LLiV4f", "nc", "lsx") +- +-TARGET_BUILTIN(__builtin_lsx_vftintrzl_l_s, "V2LLiV4f", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vftintrzh_l_s, "V2LLiV4f", "nc", "lsx") +- +-TARGET_BUILTIN(__builtin_lsx_vftintrpl_l_s, "V2LLiV4f", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vftintrph_l_s, "V2LLiV4f", "nc", "lsx") +- +-TARGET_BUILTIN(__builtin_lsx_vftintrml_l_s, "V2LLiV4f", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vftintrmh_l_s, "V2LLiV4f", "nc", "lsx") +- +-TARGET_BUILTIN(__builtin_lsx_vftintl_l_s, "V2LLiV4f", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vftinth_l_s, "V2LLiV4f", "nc", "lsx") +- +-TARGET_BUILTIN(__builtin_lsx_vffint_s_w, "V4fV4Si", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vffint_d_l, "V2dV2SLLi", "nc", "lsx") +- +-TARGET_BUILTIN(__builtin_lsx_vffint_s_wu, "V4fV4Ui", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vffint_d_lu, "V2dV2ULLi", "nc", "lsx") +- +-TARGET_BUILTIN(__builtin_lsx_vffintl_d_w, "V2dV4Si", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vffinth_d_w, "V2dV4Si", "nc", "lsx") +- +-TARGET_BUILTIN(__builtin_lsx_vffint_s_l, "V4fV2LLiV2LLi", "nc", "lsx") +- +-TARGET_BUILTIN(__builtin_lsx_vseq_b, "V16ScV16ScV16Sc", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vseq_h, "V8SsV8SsV8Ss", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vseq_w, "V4SiV4SiV4Si", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vseq_d, "V2SLLiV2SLLiV2SLLi", "nc", "lsx") +- +-TARGET_BUILTIN(__builtin_lsx_vseqi_b, "V16ScV16ScISi", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vseqi_h, "V8SsV8SsISi", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vseqi_w, "V4SiV4SiISi", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vseqi_d, "V2SLLiV2SLLiISi", "nc", "lsx") +- +-TARGET_BUILTIN(__builtin_lsx_vsle_b, "V16ScV16ScV16Sc", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vsle_h, "V8SsV8SsV8Ss", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vsle_w, "V4SiV4SiV4Si", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vsle_d, "V2SLLiV2SLLiV2SLLi", "nc", "lsx") +- +-TARGET_BUILTIN(__builtin_lsx_vslei_b, "V16ScV16ScISi", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vslei_h, "V8SsV8SsISi", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vslei_w, "V4SiV4SiISi", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vslei_d, "V2SLLiV2SLLiISi", "nc", "lsx") +- +-TARGET_BUILTIN(__builtin_lsx_vsle_bu, "V16ScV16UcV16Uc", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vsle_hu, "V8SsV8UsV8Us", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vsle_wu, "V4SiV4UiV4Ui", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vsle_du, "V2SLLiV2ULLiV2ULLi", "nc", "lsx") +- +-TARGET_BUILTIN(__builtin_lsx_vslei_bu, "V16ScV16UcIUi", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vslei_hu, "V8SsV8UsIUi", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vslei_wu, "V4SiV4UiIUi", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vslei_du, "V2SLLiV2ULLiIUi", "nc", "lsx") +- +-TARGET_BUILTIN(__builtin_lsx_vslt_b, "V16ScV16ScV16Sc", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vslt_h, "V8SsV8SsV8Ss", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vslt_w, "V4SiV4SiV4Si", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vslt_d, "V2SLLiV2SLLiV2SLLi", "nc", "lsx") +- +-TARGET_BUILTIN(__builtin_lsx_vslti_b, "V16ScV16ScISi", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vslti_h, "V8SsV8SsISi", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vslti_w, "V4SiV4SiISi", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vslti_d, "V2SLLiV2SLLiISi", "nc", "lsx") +- +-TARGET_BUILTIN(__builtin_lsx_vslt_bu, "V16ScV16UcV16Uc", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vslt_hu, "V8SsV8UsV8Us", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vslt_wu, "V4SiV4UiV4Ui", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vslt_du, "V2SLLiV2ULLiV2ULLi", "nc", "lsx") +- +-TARGET_BUILTIN(__builtin_lsx_vslti_bu, "V16ScV16UcIUi", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vslti_hu, "V8SsV8UsIUi", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vslti_wu, "V4SiV4UiIUi", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vslti_du, "V2SLLiV2ULLiIUi", "nc", "lsx") +- +-TARGET_BUILTIN(__builtin_lsx_vfcmp_caf_s, "V4SiV4fV4f", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vfcmp_caf_d, "V2SLLiV2dV2d", "nc", "lsx") +- +-TARGET_BUILTIN(__builtin_lsx_vfcmp_cun_s, "V4SiV4fV4f", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vfcmp_cun_d, "V2SLLiV2dV2d", "nc", "lsx") +- +-TARGET_BUILTIN(__builtin_lsx_vfcmp_ceq_s, "V4SiV4fV4f", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vfcmp_ceq_d, "V2SLLiV2dV2d", "nc", "lsx") +- +-TARGET_BUILTIN(__builtin_lsx_vfcmp_cueq_s, "V4SiV4fV4f", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vfcmp_cueq_d, "V2SLLiV2dV2d", "nc", "lsx") +- +-TARGET_BUILTIN(__builtin_lsx_vfcmp_clt_s, "V4SiV4fV4f", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vfcmp_clt_d, "V2SLLiV2dV2d", "nc", "lsx") +- +-TARGET_BUILTIN(__builtin_lsx_vfcmp_cult_s, "V4SiV4fV4f", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vfcmp_cult_d, "V2SLLiV2dV2d", "nc", "lsx") +- +-TARGET_BUILTIN(__builtin_lsx_vfcmp_cle_s, "V4SiV4fV4f", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vfcmp_cle_d, "V2SLLiV2dV2d", "nc", "lsx") +- +-TARGET_BUILTIN(__builtin_lsx_vfcmp_cule_s, "V4SiV4fV4f", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vfcmp_cule_d, "V2SLLiV2dV2d", "nc", "lsx") +- +-TARGET_BUILTIN(__builtin_lsx_vfcmp_cne_s, "V4SiV4fV4f", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vfcmp_cne_d, "V2SLLiV2dV2d", "nc", "lsx") +- +-TARGET_BUILTIN(__builtin_lsx_vfcmp_cor_s, "V4SiV4fV4f", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vfcmp_cor_d, "V2SLLiV2dV2d", "nc", "lsx") +- +-TARGET_BUILTIN(__builtin_lsx_vfcmp_cune_s, "V4SiV4fV4f", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vfcmp_cune_d, "V2SLLiV2dV2d", "nc", "lsx") +- +-TARGET_BUILTIN(__builtin_lsx_vfcmp_saf_s, "V4SiV4fV4f", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vfcmp_saf_d, "V2SLLiV2dV2d", "nc", "lsx") +- +-TARGET_BUILTIN(__builtin_lsx_vfcmp_sun_s, "V4SiV4fV4f", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vfcmp_sun_d, "V2SLLiV2dV2d", "nc", "lsx") +- +-TARGET_BUILTIN(__builtin_lsx_vfcmp_seq_s, "V4SiV4fV4f", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vfcmp_seq_d, "V2SLLiV2dV2d", "nc", "lsx") +- +-TARGET_BUILTIN(__builtin_lsx_vfcmp_sueq_s, "V4SiV4fV4f", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vfcmp_sueq_d, "V2SLLiV2dV2d", "nc", "lsx") +- +-TARGET_BUILTIN(__builtin_lsx_vfcmp_slt_s, "V4SiV4fV4f", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vfcmp_slt_d, "V2SLLiV2dV2d", "nc", "lsx") +- +-TARGET_BUILTIN(__builtin_lsx_vfcmp_sult_s, "V4SiV4fV4f", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vfcmp_sult_d, "V2SLLiV2dV2d", "nc", "lsx") +- +-TARGET_BUILTIN(__builtin_lsx_vfcmp_sle_s, "V4SiV4fV4f", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vfcmp_sle_d, "V2SLLiV2dV2d", "nc", "lsx") +- +-TARGET_BUILTIN(__builtin_lsx_vfcmp_sule_s, "V4SiV4fV4f", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vfcmp_sule_d, "V2SLLiV2dV2d", "nc", "lsx") +- +-TARGET_BUILTIN(__builtin_lsx_vfcmp_sne_s, "V4SiV4fV4f", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vfcmp_sne_d, "V2SLLiV2dV2d", "nc", "lsx") +- +-TARGET_BUILTIN(__builtin_lsx_vfcmp_sor_s, "V4SiV4fV4f", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vfcmp_sor_d, "V2SLLiV2dV2d", "nc", "lsx") +- +-TARGET_BUILTIN(__builtin_lsx_vfcmp_sune_s, "V4SiV4fV4f", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vfcmp_sune_d, "V2SLLiV2dV2d", "nc", "lsx") +- +-TARGET_BUILTIN(__builtin_lsx_vbitsel_v, "V16UcV16UcV16UcV16Uc", "nc", "lsx") +- +-TARGET_BUILTIN(__builtin_lsx_vbitseli_b, "V16UcV16UcV16UcIUi", "nc", "lsx") +- +-TARGET_BUILTIN(__builtin_lsx_vreplgr2vr_b, "V16Sci", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vreplgr2vr_h, "V8Ssi", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vreplgr2vr_w, "V4Sii", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vreplgr2vr_d, "V2SLLiLLi", "nc", "lsx") +- +-TARGET_BUILTIN(__builtin_lsx_vinsgr2vr_b, "V16ScV16SciIUi", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vinsgr2vr_h, "V8SsV8SsiIUi", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vinsgr2vr_w, "V4SiV4SiiIUi", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vinsgr2vr_d, "V2SLLiV2SLLiLLiIUi", "nc", "lsx") +- +-TARGET_BUILTIN(__builtin_lsx_vpickve2gr_b, "iV16ScIUi", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vpickve2gr_h, "iV8SsIUi", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vpickve2gr_w, "iV4SiIUi", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vpickve2gr_d, "LLiV2SLLiIUi", "nc", "lsx") +- +-TARGET_BUILTIN(__builtin_lsx_vpickve2gr_bu, "iV16UcIUi", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vpickve2gr_hu, "iV8UsIUi", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vpickve2gr_wu, "iV4UiIUi", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vpickve2gr_du, "LLiV2ULLiIUi", "nc", "lsx") +- +-TARGET_BUILTIN(__builtin_lsx_vreplve_b, "V16cV16cUi", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vreplve_h, "V8sV8sUi", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vreplve_w, "V4iV4iUi", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vreplve_d, "V2LLiV2LLiUi", "nc", "lsx") +- +-TARGET_BUILTIN(__builtin_lsx_vreplvei_b, "V16cV16cIUi", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vreplvei_h, "V8sV8sIUi", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vreplvei_w, "V4iV4iIUi", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vreplvei_d, "V2LLiV2LLiIUi", "nc", "lsx") +- +-TARGET_BUILTIN(__builtin_lsx_vbsll_v, "V16cV16cIUi", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vbsrl_v, "V16cV16cIUi", "nc", "lsx") +- +-TARGET_BUILTIN(__builtin_lsx_vpackev_b, "V16cV16cV16c", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vpackev_h, "V8sV8sV8s", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vpackev_w, "V4iV4iV4i", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vpackev_d, "V2LLiV2LLiV2LLi", "nc", "lsx") +- +-TARGET_BUILTIN(__builtin_lsx_vpackod_b, "V16cV16cV16c", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vpackod_h, "V8sV8sV8s", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vpackod_w, "V4iV4iV4i", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vpackod_d, "V2LLiV2LLiV2LLi", "nc", "lsx") +- +-TARGET_BUILTIN(__builtin_lsx_vpickev_b, "V16cV16cV16c", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vpickev_h, "V8sV8sV8s", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vpickev_w, "V4iV4iV4i", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vpickev_d, "V2LLiV2LLiV2LLi", "nc", "lsx") +- +-TARGET_BUILTIN(__builtin_lsx_vpickod_b, "V16cV16cV16c", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vpickod_h, "V8sV8sV8s", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vpickod_w, "V4iV4iV4i", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vpickod_d, "V2LLiV2LLiV2LLi", "nc", "lsx") +- +-TARGET_BUILTIN(__builtin_lsx_vilvl_b, "V16cV16cV16c", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vilvl_h, "V8sV8sV8s", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vilvl_w, "V4iV4iV4i", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vilvl_d, "V2LLiV2LLiV2LLi", "nc", "lsx") +- +-TARGET_BUILTIN(__builtin_lsx_vilvh_b, "V16cV16cV16c", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vilvh_h, "V8sV8sV8s", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vilvh_w, "V4iV4iV4i", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vilvh_d, "V2LLiV2LLiV2LLi", "nc", "lsx") +- +-TARGET_BUILTIN(__builtin_lsx_vshuf_b, "V16UcV16UcV16UcV16Uc", "nc", "lsx") +- +-TARGET_BUILTIN(__builtin_lsx_vshuf_h, "V8sV8sV8sV8s", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vshuf_w, "V4iV4iV4iV4i", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vshuf_d, "V2LLiV2LLiV2LLiV2LLi", "nc", "lsx") +- +-TARGET_BUILTIN(__builtin_lsx_vshuf4i_b, "V16cV16cIUi", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vshuf4i_h, "V8sV8sIUi", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vshuf4i_w, "V4iV4iIUi", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vshuf4i_d, "V2LLiV2LLiV2LLiIUi", "nc", "lsx") +- +-TARGET_BUILTIN(__builtin_lsx_vpermi_w, "V4iV4iV4iIUi", "nc", "lsx") +- +-TARGET_BUILTIN(__builtin_lsx_vextrins_b, "V16cV16cV16cIUi", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vextrins_h, "V8sV8sV8sIUi", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vextrins_w, "V4iV4iV4iIUi", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vextrins_d, "V2LLiV2LLiV2LLiIUi", "nc", "lsx") +- +-TARGET_BUILTIN(__builtin_lsx_vld, "V16ScvC*Ii", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vst, "vV16Scv*Ii", "nc", "lsx") +- +-TARGET_BUILTIN(__builtin_lsx_vldx, "V16ScvC*LLi", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vstx, "vV16Scv*LLi", "nc", "lsx") +- +-TARGET_BUILTIN(__builtin_lsx_vldrepl_b, "V16cvC*Ii", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vldrepl_h, "V8svC*Ii", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vldrepl_w, "V4ivC*Ii", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vldrepl_d, "V2LLivC*Ii", "nc", "lsx") +- +-TARGET_BUILTIN(__builtin_lsx_vstelm_b, "vV16Scv*IiUi", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vstelm_h, "vV8Ssv*IiUi", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vstelm_w, "vV4Siv*IiUi", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_vstelm_d, "vV2SLLiv*IiUi", "nc", "lsx") +- +-TARGET_BUILTIN(__builtin_lsx_bz_v, "iV16Uc", "nc", "lsx") +- +-TARGET_BUILTIN(__builtin_lsx_bnz_v, "iV16Uc", "nc", "lsx") +- +-TARGET_BUILTIN(__builtin_lsx_bz_b, "iV16Uc", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_bz_h, "iV8Us", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_bz_w, "iV4Ui", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_bz_d, "iV2ULLi", "nc", "lsx") +- +-TARGET_BUILTIN(__builtin_lsx_bnz_b, "iV16Uc", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_bnz_h, "iV8Us", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_bnz_w, "iV4Ui", "nc", "lsx") +-TARGET_BUILTIN(__builtin_lsx_bnz_d, "iV2ULLi", "nc", "lsx") +diff --git a/clang/include/clang/Basic/DiagnosticDriverKinds.td b/clang/include/clang/Basic/DiagnosticDriverKinds.td +index 094fe1950..f294336ff 100644 +--- a/clang/include/clang/Basic/DiagnosticDriverKinds.td ++++ b/clang/include/clang/Basic/DiagnosticDriverKinds.td +@@ -769,18 +769,6 @@ def warn_drv_sarif_format_unstable : Warning< + def err_drv_riscv_unsupported_with_linker_relaxation : Error< + "%0 is unsupported with RISC-V linker relaxation (-mrelax)">; + +-def warn_drv_loongarch_conflicting_implied_val : Warning< +- "ignoring '%0' as it conflicts with that implied by '%1' (%2)">, +- InGroup; +-def err_drv_loongarch_invalid_mfpu_EQ : Error< +- "invalid argument '%0' to -mfpu=; must be one of: 64, 32, none, 0 (alias for none)">; +-def err_drv_loongarch_wrong_fpu_width_for_lsx : Error< +- "wrong fpu width; LSX depends on 64-bit FPU.">; +-def err_drv_loongarch_wrong_fpu_width_for_lasx : Error< +- "wrong fpu width; LASX depends on 64-bit FPU.">; +-def err_drv_loongarch_invalid_simd_option_combination : Error< +- "invalid option combination; LASX depends on LSX.">; +- + def err_drv_expand_response_file : Error< + "failed to expand response file: %0">; + +diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td +index ef8c111b1..d4bb04774 100644 +--- a/clang/include/clang/Basic/DiagnosticSemaKinds.td ++++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td +@@ -3430,8 +3430,6 @@ def warn_objc_redundant_literal_use : Warning< + def err_attr_tlsmodel_arg : Error<"tls_model must be \"global-dynamic\", " + "\"local-dynamic\", \"initial-exec\" or \"local-exec\"">; + +-def err_attr_codemodel_arg : Error<"code model '%0' is not supported on this target">; +- + def err_aix_attr_unsupported_tls_model : Error<"TLS model '%0' is not yet supported on AIX">; + + def err_tls_var_aligned_over_maximum : Error< +diff --git a/clang/include/clang/Basic/TargetBuiltins.h b/clang/include/clang/Basic/TargetBuiltins.h +index c31834fb5..c11297242 100644 +--- a/clang/include/clang/Basic/TargetBuiltins.h ++++ b/clang/include/clang/Basic/TargetBuiltins.h +@@ -167,7 +167,7 @@ namespace clang { + /// LoongArch builtins + namespace LoongArch { + enum { +- LastTIBuiltin = clang::Builtin::FirstTSBuiltin - 1, ++ LastTIBuiltin = clang::Builtin::FirstTSBuiltin-1, + #define BUILTIN(ID, TYPE, ATTRS) BI##ID, + #include "clang/Basic/BuiltinsLoongArch.def" + LastTSBuiltin +@@ -371,7 +371,8 @@ namespace clang { + PPC::LastTSBuiltin, NVPTX::LastTSBuiltin, AMDGPU::LastTSBuiltin, + X86::LastTSBuiltin, VE::LastTSBuiltin, RISCV::LastTSBuiltin, + Hexagon::LastTSBuiltin, Mips::LastTSBuiltin, XCore::LastTSBuiltin, +- SystemZ::LastTSBuiltin, WebAssembly::LastTSBuiltin}); ++ SystemZ::LastTSBuiltin, WebAssembly::LastTSBuiltin, ++ LoongArch::LastTSBuiltin}); + + } // end namespace clang. + +diff --git a/clang/include/clang/Basic/TargetCXXABI.def b/clang/include/clang/Basic/TargetCXXABI.def +index 9501cca76..8ea4becef 100644 +--- a/clang/include/clang/Basic/TargetCXXABI.def ++++ b/clang/include/clang/Basic/TargetCXXABI.def +@@ -88,6 +88,12 @@ ITANIUM_CXXABI(GenericAArch64, "aarch64") + /// - representation of member function pointers adjusted as in ARM. + ITANIUM_CXXABI(GenericMIPS, "mips") + ++/// The generic LoongArch ABI is a modified version of the Itanium ABI. ++/// ++/// At the moment, only change from the generic ABI in this case is: ++/// - representation of member function pointers adjusted as in ARM. ++ITANIUM_CXXABI(GenericLoongArch, "loongarch") ++ + /// The WebAssembly ABI is a modified version of the Itanium ABI. + /// + /// The changes from the Itanium ABI are: +diff --git a/clang/include/clang/Basic/TargetCXXABI.h b/clang/include/clang/Basic/TargetCXXABI.h +index c113a6a04..2c1bcf387 100644 +--- a/clang/include/clang/Basic/TargetCXXABI.h ++++ b/clang/include/clang/Basic/TargetCXXABI.h +@@ -100,6 +100,9 @@ public: + case GenericAArch64: + return T.isAArch64(); + ++ case GenericLoongArch: ++ return T.isLoongArch(); ++ + case GenericMIPS: + return T.isMIPS(); + +@@ -164,6 +167,7 @@ public: + case Fuchsia: + case GenericARM: + case GenericAArch64: ++ case GenericLoongArch: + case GenericMIPS: + // TODO: ARM-style pointers to member functions put the discriminator in + // the this adjustment, so they don't require functions to have any +@@ -248,6 +252,7 @@ public: + case GenericItanium: + case iOS: // old iOS compilers did not follow this rule + case Microsoft: ++ case GenericLoongArch: + case GenericMIPS: + case XL: + return true; +@@ -286,6 +291,7 @@ public: + case GenericAArch64: + case GenericARM: + case iOS: ++ case GenericLoongArch: + case GenericMIPS: + case XL: + return UseTailPaddingUnlessPOD03; +diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td +index 175bedbfb..4ad4e745e 100644 +--- a/clang/include/clang/Driver/Options.td ++++ b/clang/include/clang/Driver/Options.td +@@ -4624,16 +4624,16 @@ def mrvv_vector_bits_EQ : Joined<["-"], "mrvv-vector-bits=">, Group, + "(RISC-V only)">; + + def munaligned_access : Flag<["-"], "munaligned-access">, Group, +- HelpText<"Allow memory accesses to be unaligned (AArch32/AArch64/LoongArch/RISC-V only)">; ++ HelpText<"Allow memory accesses to be unaligned">; + def mno_unaligned_access : Flag<["-"], "mno-unaligned-access">, Group, +- HelpText<"Force all memory accesses to be aligned (AArch32/AArch64/LoongArch/RISC-V only)">; ++ HelpText<"Force all memory accesses to be aligned">; + } // let Flags = [TargetSpecific] + def mstrict_align : Flag<["-"], "mstrict-align">, Alias, + Flags<[HelpHidden]>, Visibility<[ClangOption, CC1Option]>, + HelpText<"Force all memory accesses to be aligned (same as mno-unaligned-access)">; + def mno_strict_align : Flag<["-"], "mno-strict-align">, Alias, + Flags<[HelpHidden]>, Visibility<[ClangOption, CC1Option]>, +- HelpText<"Allow memory accesses to be unaligned (same as munaligned-access)">; ++ HelpText<"Allow memory accesses to be unaligned (LoongArch only, same as munaligned-access)">; + let Flags = [TargetSpecific] in { + def mno_thumb : Flag<["-"], "mno-thumb">, Group; + def mrestrict_it: Flag<["-"], "mrestrict-it">, Group, +@@ -5026,13 +5026,13 @@ def mfentry : Flag<["-"], "mfentry">, HelpText<"Insert calls to fentry at functi + Visibility<[ClangOption, CC1Option]>, Group, + MarshallingInfoFlag>; + def mlsx : Flag<["-"], "mlsx">, Group, +- HelpText<"Enable Loongson SIMD Extension (LSX).">; ++ HelpText<"Use LARCH Loongson LSX instructions.">; + def mno_lsx : Flag<["-"], "mno-lsx">, Group, +- HelpText<"Disable Loongson SIMD Extension (LSX).">; ++ HelpText<"Disable LARCH Loongson LSX instructions.">; + def mlasx : Flag<["-"], "mlasx">, Group, +- HelpText<"Enable Loongson Advanced SIMD Extension (LASX).">; ++ HelpText<"Enable LARCH Loongson LASX instructions.">; + def mno_lasx : Flag<["-"], "mno-lasx">, Group, +- HelpText<"Disable Loongson Advanced SIMD Extension (LASX).">; ++ HelpText<"Disable LARCH Loongson LASX instructions.">; + def mnop_mcount : Flag<["-"], "mnop-mcount">, HelpText<"Generate mcount/__fentry__ calls as nops. To activate they need to be patched in.">, + Visibility<[ClangOption, CC1Option]>, Group, + MarshallingInfoFlag>; +@@ -5074,8 +5074,8 @@ def mdsp : Flag<["-"], "mdsp">, Group; + def mno_dsp : Flag<["-"], "mno-dsp">, Group; + def mdspr2 : Flag<["-"], "mdspr2">, Group; + def mno_dspr2 : Flag<["-"], "mno-dspr2">, Group; +-def msingle_float : Flag<["-"], "msingle-float">, Group; +-def mdouble_float : Flag<["-"], "mdouble-float">, Group; ++def msingle_float : Flag<["-"], "msingle-float">, Group; ++def mdouble_float : Flag<["-"], "mdouble-float">, Group; + def mmadd4 : Flag<["-"], "mmadd4">, Group, + HelpText<"Enable the generation of 4-operand madd.s, madd.d and related instructions.">; + def mno_madd4 : Flag<["-"], "mno-madd4">, Group, +@@ -5195,6 +5195,12 @@ def mno_relax_pic_calls : Flag<["-"], "mno-relax-pic-calls">, + Group, + HelpText<"Do not produce relaxation hints for linkers to try optimizing PIC " + "call sequences into direct calls (MIPS only)">, Flags<[HelpHidden]>; ++def mfix_loongson3_llsc : Flag<["-"], "mfix-loongson3-llsc">, ++ Group, ++ HelpText<"Workaround loongson3 llsc erratum (MIPS only)">; ++def mno_fix_loongson3_llsc : Flag<["-"], "mno-fix-loongson3-llsc">, ++ Group, ++ HelpText<"Don't Workaround loongson3 llsc erratum (MIPS only)">; + def mglibc : Flag<["-"], "mglibc">, Group, Flags<[HelpHidden]>; + def muclibc : Flag<["-"], "muclibc">, Group, Flags<[HelpHidden]>; + def module_file_info : Flag<["-"], "module-file-info">, Flags<[]>, +diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h +index 6adb8fb79..a65155c6d 100644 +--- a/clang/include/clang/Sema/Sema.h ++++ b/clang/include/clang/Sema/Sema.h +@@ -13924,13 +13924,14 @@ private: + bool CheckRISCVBuiltinFunctionCall(const TargetInfo &TI, unsigned BuiltinID, + CallExpr *TheCall); + void checkRVVTypeSupport(QualType Ty, SourceLocation Loc, Decl *D); +- bool CheckLoongArchBuiltinFunctionCall(const TargetInfo &TI, +- unsigned BuiltinID, CallExpr *TheCall); + bool CheckWebAssemblyBuiltinFunctionCall(const TargetInfo &TI, + unsigned BuiltinID, + CallExpr *TheCall); + bool CheckNVPTXBuiltinFunctionCall(const TargetInfo &TI, unsigned BuiltinID, + CallExpr *TheCall); ++ bool CheckLoongArchBuiltinFunctionCall(const TargetInfo &TI, ++ unsigned BuiltinID, ++ CallExpr *TheCall); + + bool SemaBuiltinVAStart(unsigned BuiltinID, CallExpr *TheCall); + bool SemaBuiltinVAStartARMMicrosoft(CallExpr *Call); +diff --git a/clang/include/module.modulemap b/clang/include/module.modulemap +index 52395ee9b..74fce9f75 100644 +--- a/clang/include/module.modulemap ++++ b/clang/include/module.modulemap +@@ -49,9 +49,6 @@ module Clang_Basic { + textual header "clang/Basic/BuiltinsHexagonDep.def" + textual header "clang/Basic/BuiltinsHexagonMapCustomDep.def" + textual header "clang/Basic/BuiltinsLoongArch.def" +- textual header "clang/Basic/BuiltinsLoongArchBase.def" +- textual header "clang/Basic/BuiltinsLoongArchLSX.def" +- textual header "clang/Basic/BuiltinsLoongArchLASX.def" + textual header "clang/Basic/BuiltinsMips.def" + textual header "clang/Basic/BuiltinsNEON.def" + textual header "clang/Basic/BuiltinsNVPTX.def" +diff --git a/clang/lib/AST/ASTContext.cpp b/clang/lib/AST/ASTContext.cpp +index cc5de9a62..a4f38e80b 100644 +--- a/clang/lib/AST/ASTContext.cpp ++++ b/clang/lib/AST/ASTContext.cpp +@@ -829,6 +829,7 @@ CXXABI *ASTContext::createCXXABI(const TargetInfo &T) { + case TargetCXXABI::iOS: + case TargetCXXABI::WatchOS: + case TargetCXXABI::GenericAArch64: ++ case TargetCXXABI::GenericLoongArch: + case TargetCXXABI::GenericMIPS: + case TargetCXXABI::GenericItanium: + case TargetCXXABI::WebAssembly: +@@ -12034,6 +12035,7 @@ MangleContext *ASTContext::createMangleContext(const TargetInfo *T) { + case TargetCXXABI::GenericAArch64: + case TargetCXXABI::GenericItanium: + case TargetCXXABI::GenericARM: ++ case TargetCXXABI::GenericLoongArch: + case TargetCXXABI::GenericMIPS: + case TargetCXXABI::iOS: + case TargetCXXABI::WebAssembly: +diff --git a/clang/lib/Basic/Targets.cpp b/clang/lib/Basic/Targets.cpp +index e3283510c..1ae169946 100644 +--- a/clang/lib/Basic/Targets.cpp ++++ b/clang/lib/Basic/Targets.cpp +@@ -733,21 +733,14 @@ std::unique_ptr AllocateTarget(const llvm::Triple &Triple, + default: + return std::make_unique(Triple, Opts); + } +- case llvm::Triple::loongarch32: +- switch (os) { +- case llvm::Triple::Linux: +- return std::make_unique>(Triple, +- Opts); +- default: +- return std::make_unique(Triple, Opts); +- } ++ + case llvm::Triple::loongarch64: + switch (os) { + case llvm::Triple::Linux: +- return std::make_unique>(Triple, ++ return std::make_unique>(Triple, + Opts); + default: +- return std::make_unique(Triple, Opts); ++ return std::make_unique(Triple, Opts); + } + } + } +diff --git a/clang/lib/Basic/Targets/LoongArch.cpp b/clang/lib/Basic/Targets/LoongArch.cpp +index 88537989a..39e1b5cac 100644 +--- a/clang/lib/Basic/Targets/LoongArch.cpp ++++ b/clang/lib/Basic/Targets/LoongArch.cpp +@@ -1,4 +1,4 @@ +-//===--- LoongArch.cpp - Implement LoongArch target feature support -------===// ++//===--- LoongArch.cpp - Implement LoongArch target feature support -----------------===// + // + // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + // See https://llvm.org/LICENSE.txt for license information. +@@ -11,289 +11,177 @@ + //===----------------------------------------------------------------------===// + + #include "LoongArch.h" ++#include "Targets.h" + #include "clang/Basic/Diagnostic.h" + #include "clang/Basic/MacroBuilder.h" + #include "clang/Basic/TargetBuiltins.h" +-#include "llvm/Support/raw_ostream.h" +-#include "llvm/TargetParser/LoongArchTargetParser.h" ++#include "llvm/ADT/StringSwitch.h" + + using namespace clang; + using namespace clang::targets; + +-ArrayRef LoongArchTargetInfo::getGCCRegNames() const { +- static const char *const GCCRegNames[] = { +- // General purpose registers. +- "$r0", "$r1", "$r2", "$r3", "$r4", "$r5", "$r6", "$r7", "$r8", "$r9", +- "$r10", "$r11", "$r12", "$r13", "$r14", "$r15", "$r16", "$r17", "$r18", +- "$r19", "$r20", "$r21", "$r22", "$r23", "$r24", "$r25", "$r26", "$r27", +- "$r28", "$r29", "$r30", "$r31", +- // Floating point registers. +- "$f0", "$f1", "$f2", "$f3", "$f4", "$f5", "$f6", "$f7", "$f8", "$f9", +- "$f10", "$f11", "$f12", "$f13", "$f14", "$f15", "$f16", "$f17", "$f18", +- "$f19", "$f20", "$f21", "$f22", "$f23", "$f24", "$f25", "$f26", "$f27", +- "$f28", "$f29", "$f30", "$f31", +- // Condition flag registers. +- "$fcc0", "$fcc1", "$fcc2", "$fcc3", "$fcc4", "$fcc5", "$fcc6", "$fcc7", +- // 128-bit vector registers. +- "$vr0", "$vr1", "$vr2", "$vr3", "$vr4", "$vr5", "$vr6", "$vr7", "$vr8", +- "$vr9", "$vr10", "$vr11", "$vr12", "$vr13", "$vr14", "$vr15", "$vr16", +- "$vr17", "$vr18", "$vr19", "$vr20", "$vr21", "$vr22", "$vr23", "$vr24", +- "$vr25", "$vr26", "$vr27", "$vr28", "$vr29", "$vr30", "$vr31", +- // 256-bit vector registers. +- "$xr0", "$xr1", "$xr2", "$xr3", "$xr4", "$xr5", "$xr6", "$xr7", "$xr8", +- "$xr9", "$xr10", "$xr11", "$xr12", "$xr13", "$xr14", "$xr15", "$xr16", +- "$xr17", "$xr18", "$xr19", "$xr20", "$xr21", "$xr22", "$xr23", "$xr24", +- "$xr25", "$xr26", "$xr27", "$xr28", "$xr29", "$xr30", "$xr31"}; +- return llvm::ArrayRef(GCCRegNames); +-} ++const Builtin::Info LoongArchTargetInfo::BuiltinInfo[] = { ++#define BUILTIN(ID, TYPE, ATTRS) \ ++ {#ID, TYPE, ATTRS, nullptr, HeaderDesc::NO_HEADER, ALL_LANGUAGES}, ++#define LIBBUILTIN(ID, TYPE, ATTRS, HEADER) \ ++ {#ID, TYPE, ATTRS, nullptr, HeaderDesc::HEADER, ALL_LANGUAGES}, ++#include "clang/Basic/BuiltinsLoongArch.def" ++}; + +-ArrayRef +-LoongArchTargetInfo::getGCCRegAliases() const { +- static const TargetInfo::GCCRegAlias GCCRegAliases[] = { +- {{"zero", "$zero", "r0"}, "$r0"}, +- {{"ra", "$ra", "r1"}, "$r1"}, +- {{"tp", "$tp", "r2"}, "$r2"}, +- {{"sp", "$sp", "r3"}, "$r3"}, +- {{"a0", "$a0", "r4"}, "$r4"}, +- {{"a1", "$a1", "r5"}, "$r5"}, +- {{"a2", "$a2", "r6"}, "$r6"}, +- {{"a3", "$a3", "r7"}, "$r7"}, +- {{"a4", "$a4", "r8"}, "$r8"}, +- {{"a5", "$a5", "r9"}, "$r9"}, +- {{"a6", "$a6", "r10"}, "$r10"}, +- {{"a7", "$a7", "r11"}, "$r11"}, +- {{"t0", "$t0", "r12"}, "$r12"}, +- {{"t1", "$t1", "r13"}, "$r13"}, +- {{"t2", "$t2", "r14"}, "$r14"}, +- {{"t3", "$t3", "r15"}, "$r15"}, +- {{"t4", "$t4", "r16"}, "$r16"}, +- {{"t5", "$t5", "r17"}, "$r17"}, +- {{"t6", "$t6", "r18"}, "$r18"}, +- {{"t7", "$t7", "r19"}, "$r19"}, +- {{"t8", "$t8", "r20"}, "$r20"}, +- {{"r21"}, "$r21"}, +- {{"s9", "$s9", "r22", "fp", "$fp"}, "$r22"}, +- {{"s0", "$s0", "r23"}, "$r23"}, +- {{"s1", "$s1", "r24"}, "$r24"}, +- {{"s2", "$s2", "r25"}, "$r25"}, +- {{"s3", "$s3", "r26"}, "$r26"}, +- {{"s4", "$s4", "r27"}, "$r27"}, +- {{"s5", "$s5", "r28"}, "$r28"}, +- {{"s6", "$s6", "r29"}, "$r29"}, +- {{"s7", "$s7", "r30"}, "$r30"}, +- {{"s8", "$s8", "r31"}, "$r31"}, +- {{"$fa0"}, "$f0"}, +- {{"$fa1"}, "$f1"}, +- {{"$fa2"}, "$f2"}, +- {{"$fa3"}, "$f3"}, +- {{"$fa4"}, "$f4"}, +- {{"$fa5"}, "$f5"}, +- {{"$fa6"}, "$f6"}, +- {{"$fa7"}, "$f7"}, +- {{"$ft0"}, "$f8"}, +- {{"$ft1"}, "$f9"}, +- {{"$ft2"}, "$f10"}, +- {{"$ft3"}, "$f11"}, +- {{"$ft4"}, "$f12"}, +- {{"$ft5"}, "$f13"}, +- {{"$ft6"}, "$f14"}, +- {{"$ft7"}, "$f15"}, +- {{"$ft8"}, "$f16"}, +- {{"$ft9"}, "$f17"}, +- {{"$ft10"}, "$f18"}, +- {{"$ft11"}, "$f19"}, +- {{"$ft12"}, "$f20"}, +- {{"$ft13"}, "$f21"}, +- {{"$ft14"}, "$f22"}, +- {{"$ft15"}, "$f23"}, +- {{"$fs0"}, "$f24"}, +- {{"$fs1"}, "$f25"}, +- {{"$fs2"}, "$f26"}, +- {{"$fs3"}, "$f27"}, +- {{"$fs4"}, "$f28"}, +- {{"$fs5"}, "$f29"}, +- {{"$fs6"}, "$f30"}, +- {{"$fs7"}, "$f31"}, +- }; +- return llvm::ArrayRef(GCCRegAliases); ++bool LoongArchTargetInfo::processorSupportsGPR64() const { ++ return llvm::StringSwitch(CPU) ++ .Case("loongarch64", true) ++ .Case("la264", true) ++ .Case("la364", true) ++ .Case("la464", true) ++ .Case("la664", true) ++ .Default(false); ++ return false; + } + +-bool LoongArchTargetInfo::validateAsmConstraint( +- const char *&Name, TargetInfo::ConstraintInfo &Info) const { +- // See the GCC definitions here: +- // https://gcc.gnu.org/onlinedocs/gccint/Machine-Constraints.html +- // Note that the 'm' constraint is handled in TargetInfo. +- switch (*Name) { +- default: +- return false; +- case 'f': +- // A floating-point register (if available). +- Info.setAllowsRegister(); +- return true; +- case 'k': +- // A memory operand whose address is formed by a base register and +- // (optionally scaled) index register. +- Info.setAllowsMemory(); +- return true; +- case 'l': +- // A signed 16-bit constant. +- Info.setRequiresImmediate(-32768, 32767); +- return true; +- case 'I': +- // A signed 12-bit constant (for arithmetic instructions). +- Info.setRequiresImmediate(-2048, 2047); +- return true; +- case 'J': +- // Integer zero. +- Info.setRequiresImmediate(0); +- return true; +- case 'K': +- // An unsigned 12-bit constant (for logic instructions). +- Info.setRequiresImmediate(0, 4095); +- return true; +- case 'Z': +- // ZB: An address that is held in a general-purpose register. The offset is +- // zero. +- // ZC: A memory operand whose address is formed by a base register +- // and offset that is suitable for use in instructions with the same +- // addressing mode as ll.w and sc.w. +- if (Name[1] == 'C' || Name[1] == 'B') { +- Info.setAllowsMemory(); +- ++Name; // Skip over 'Z'. +- return true; +- } +- return false; +- } ++static constexpr llvm::StringLiteral ValidCPUNames[] = { ++ {"loongarch64"}, {"la264"}, {"la364"}, {"la464"}, {"la664"}}; ++ ++bool LoongArchTargetInfo::isValidCPUName(StringRef Name) const { ++ return llvm::find(ValidCPUNames, Name) != std::end(ValidCPUNames); + } + +-std::string +-LoongArchTargetInfo::convertConstraint(const char *&Constraint) const { +- std::string R; +- switch (*Constraint) { +- case 'Z': +- // "ZC"/"ZB" are two-character constraints; add "^" hint for later +- // parsing. +- R = "^" + std::string(Constraint, 2); +- ++Constraint; +- break; +- default: +- R = TargetInfo::convertConstraint(Constraint); +- break; +- } +- return R; ++void LoongArchTargetInfo::fillValidCPUList( ++ SmallVectorImpl &Values) const { ++ Values.append(std::begin(ValidCPUNames), std::end(ValidCPUNames)); + } + + void LoongArchTargetInfo::getTargetDefines(const LangOptions &Opts, +- MacroBuilder &Builder) const { ++ MacroBuilder &Builder) const { + Builder.defineMacro("__loongarch__"); + unsigned GRLen = getRegisterWidth(); + Builder.defineMacro("__loongarch_grlen", Twine(GRLen)); + if (GRLen == 64) + Builder.defineMacro("__loongarch64"); + +- if (HasFeatureD) +- Builder.defineMacro("__loongarch_frlen", "64"); +- else if (HasFeatureF) +- Builder.defineMacro("__loongarch_frlen", "32"); +- else +- Builder.defineMacro("__loongarch_frlen", "0"); ++ if (ABI == "lp32") { ++ Builder.defineMacro("__loongarch32"); ++ } else { ++ Builder.defineMacro("__loongarch_lp64"); ++ } + +- // Define __loongarch_arch. +- StringRef ArchName = getCPU(); +- Builder.defineMacro("__loongarch_arch", Twine('"') + ArchName + Twine('"')); ++ if (ABI == "lp32") { ++ Builder.defineMacro("_ABILP32", "1"); ++ } else if (ABI == "lpx32") { ++ Builder.defineMacro("_ABILPX32", "2"); ++ } else if (ABI == "lp64") { ++ Builder.defineMacro("_ABILP64", "3"); ++ Builder.defineMacro("_LOONGARCH_SIM", "_ABILP64"); ++ } else ++ llvm_unreachable("Invalid ABI."); ++ ++ Builder.defineMacro("__REGISTER_PREFIX__", ""); ++ ++ switch (FloatABI) { ++ case HardFloat: ++ Builder.defineMacro("__loongarch_hard_float", Twine(1)); ++ Builder.defineMacro(IsSingleFloat ? "__loongarch_single_float" ++ : "__loongarch_double_float", ++ Twine(1)); ++ break; ++ case SoftFloat: ++ Builder.defineMacro("__loongarch_soft_float", Twine(1)); ++ break; ++ } + +- // Define __loongarch_tune. +- StringRef TuneCPU = getTargetOpts().TuneCPU; +- if (TuneCPU.empty()) +- TuneCPU = ArchName; +- Builder.defineMacro("__loongarch_tune", Twine('"') + TuneCPU + Twine('"')); ++ switch (FPMode) { ++ case FP32: ++ Builder.defineMacro("__loongarch_fpr", Twine(32)); ++ Builder.defineMacro("__loongarch_frlen", Twine(32)); ++ break; ++ case FP64: ++ Builder.defineMacro("__loongarch_fpr", Twine(64)); ++ Builder.defineMacro("__loongarch_frlen", Twine(64)); ++ break; ++ } + +- if (HasFeatureLSX) ++ if (HasLSX) + Builder.defineMacro("__loongarch_sx", Twine(1)); +- if (HasFeatureLASX) ++ ++ if (HasLASX) + Builder.defineMacro("__loongarch_asx", Twine(1)); + +- StringRef ABI = getABI(); +- if (ABI == "lp64d" || ABI == "lp64f" || ABI == "lp64s") +- Builder.defineMacro("__loongarch_lp64"); ++ Builder.defineMacro("_LOONGARCH_SZPTR", Twine(getPointerWidth(LangAS::Default))); ++ Builder.defineMacro("_LOONGARCH_SZINT", Twine(getIntWidth())); ++ Builder.defineMacro("_LOONGARCH_SZLONG", Twine(getLongWidth())); + +- if (ABI == "lp64d" || ABI == "ilp32d") { +- Builder.defineMacro("__loongarch_hard_float"); +- Builder.defineMacro("__loongarch_double_float"); +- } else if (ABI == "lp64f" || ABI == "ilp32f") { +- Builder.defineMacro("__loongarch_hard_float"); +- Builder.defineMacro("__loongarch_single_float"); +- } else if (ABI == "lp64s" || ABI == "ilp32s") { +- Builder.defineMacro("__loongarch_soft_float"); +- } ++ Builder.defineMacro("_LOONGARCH_ARCH", "\"" + CPU + "\""); ++ Builder.defineMacro("_LOONGARCH_ARCH_" + StringRef(CPU).upper()); + + Builder.defineMacro("__GCC_HAVE_SYNC_COMPARE_AND_SWAP_1"); + Builder.defineMacro("__GCC_HAVE_SYNC_COMPARE_AND_SWAP_2"); + Builder.defineMacro("__GCC_HAVE_SYNC_COMPARE_AND_SWAP_4"); +- if (GRLen == 64) +- Builder.defineMacro("__GCC_HAVE_SYNC_COMPARE_AND_SWAP_8"); +-} + +-static constexpr Builtin::Info BuiltinInfo[] = { +-#define BUILTIN(ID, TYPE, ATTRS) \ +- {#ID, TYPE, ATTRS, nullptr, HeaderDesc::NO_HEADER, ALL_LANGUAGES}, +-#define TARGET_BUILTIN(ID, TYPE, ATTRS, FEATURE) \ +- {#ID, TYPE, ATTRS, FEATURE, HeaderDesc::NO_HEADER, ALL_LANGUAGES}, +-#include "clang/Basic/BuiltinsLoongArch.def" +-}; +- +-bool LoongArchTargetInfo::initFeatureMap( +- llvm::StringMap &Features, DiagnosticsEngine &Diags, StringRef CPU, +- const std::vector &FeaturesVec) const { +- if (getTriple().getArch() == llvm::Triple::loongarch64) +- Features["64bit"] = true; +- if (getTriple().getArch() == llvm::Triple::loongarch32) +- Features["32bit"] = true; +- +- return TargetInfo::initFeatureMap(Features, Diags, CPU, FeaturesVec); ++ // 32-bit loongarch processors don't have the necessary lld/scd instructions ++ // found in 64-bit processors. In the case of lp32 on a 64-bit processor, ++ // the instructions exist but using them violates the ABI since they ++ // require 64-bit GPRs and LP32 only supports 32-bit GPRs. ++ if (ABI == "lpx32" || ABI == "lp64") ++ Builder.defineMacro("__GCC_HAVE_SYNC_COMPARE_AND_SWAP_8"); + } + +-/// Return true if has this feature. + bool LoongArchTargetInfo::hasFeature(StringRef Feature) const { +- bool Is64Bit = getTriple().getArch() == llvm::Triple::loongarch64; +- // TODO: Handle more features. + return llvm::StringSwitch(Feature) +- .Case("loongarch32", !Is64Bit) +- .Case("loongarch64", Is64Bit) +- .Case("32bit", !Is64Bit) +- .Case("64bit", Is64Bit) +- .Case("lsx", HasFeatureLSX) +- .Case("lasx", HasFeatureLASX) ++ .Case("fp64", FPMode == FP64) ++ .Case("lsx", HasLSX) ++ .Case("lasx", HasLASX) + .Default(false); + } + + ArrayRef LoongArchTargetInfo::getTargetBuiltins() const { +- return llvm::ArrayRef(BuiltinInfo, clang::LoongArch::LastTSBuiltin - +- Builtin::FirstTSBuiltin); ++ return llvm::ArrayRef(BuiltinInfo, clang::LoongArch::LastTSBuiltin - ++ Builtin::FirstTSBuiltin); + } + +-bool LoongArchTargetInfo::handleTargetFeatures( +- std::vector &Features, DiagnosticsEngine &Diags) { +- for (const auto &Feature : Features) { +- if (Feature == "+d" || Feature == "+f") { +- // "d" implies "f". +- HasFeatureF = true; +- if (Feature == "+d") { +- HasFeatureD = true; +- } +- } else if (Feature == "+lsx") +- HasFeatureLSX = true; +- else if (Feature == "+lasx") +- HasFeatureLASX = true; ++bool LoongArchTargetInfo::validateTarget(DiagnosticsEngine &Diags) const { ++ // FIXME: It's valid to use LP32 on a 64-bit CPU but the backend can't handle ++ // this yet. It's better to fail here than on the backend assertion. ++ if (processorSupportsGPR64() && ABI == "lp32") { ++ Diags.Report(diag::err_target_unsupported_abi) << ABI << CPU; ++ return false; + } +- return true; +-} + +-bool LoongArchTargetInfo::isValidCPUName(StringRef Name) const { +- return llvm::LoongArch::isValidCPUName(Name); +-} ++ // 64-bit ABI's require 64-bit CPU's. ++ if (!processorSupportsGPR64() && (ABI == "lpx32" || ABI == "lp64")) { ++ Diags.Report(diag::err_target_unsupported_abi) << ABI << CPU; ++ return false; ++ } + +-void LoongArchTargetInfo::fillValidCPUList( +- SmallVectorImpl &Values) const { +- llvm::LoongArch::fillValidCPUList(Values); ++ // FIXME: It's valid to use lp32 on a loongarch64 triple but the backend ++ // can't handle this yet. It's better to fail here than on the ++ // backend assertion. ++ if (getTriple().isLoongArch64() && ABI == "lp32") { ++ Diags.Report(diag::err_target_unsupported_abi_for_triple) ++ << ABI << getTriple().str(); ++ return false; ++ } ++ ++ // FIXME: It's valid to use lpx32/lp64 on a loongarch32 triple but the backend ++ // can't handle this yet. It's better to fail here than on the ++ // backend assertion. ++ if (getTriple().isLoongArch32() && (ABI == "lpx32" || ABI == "lp64")) { ++ Diags.Report(diag::err_target_unsupported_abi_for_triple) ++ << ABI << getTriple().str(); ++ return false; ++ } ++ ++ // -mfp32 and lpx32/lp64 ABIs are incompatible ++ if (FPMode != FP64 && !IsSingleFloat && ++ (ABI == "lpx32" || ABI == "lp64")) { ++ Diags.Report(diag::err_opt_not_valid_with_opt) << "-mfp32" << ABI; ++ return false; ++ } ++ ++ if (FPMode != FP64 && (CPU == "loongarch64" || CPU == "la264" || ++ CPU == "la364" || CPU == "la464" || CPU == "la664")) { ++ Diags.Report(diag::err_opt_not_valid_with_opt) << "-mfp32" << CPU; ++ return false; ++ } ++ ++ return true; + } +diff --git a/clang/lib/Basic/Targets/LoongArch.h b/clang/lib/Basic/Targets/LoongArch.h +index 331310249..b86825811 100644 +--- a/clang/lib/Basic/Targets/LoongArch.h ++++ b/clang/lib/Basic/Targets/LoongArch.h +@@ -1,4 +1,4 @@ +-//===-- LoongArch.h - Declare LoongArch target feature support --*- C++ -*-===// ++//===--- LoongArch.h - Declare LoongArch target feature support -----------*- C++ -*-===// + // + // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + // See https://llvm.org/LICENSE.txt for license information. +@@ -22,133 +22,368 @@ namespace clang { + namespace targets { + + class LLVM_LIBRARY_VISIBILITY LoongArchTargetInfo : public TargetInfo { ++ void setDataLayout() { ++ StringRef Layout; ++ ++ if (ABI == "lp32") ++ Layout = "m:m-p:32:32-i8:8:32-i16:16:32-i64:64-n32-S64"; ++ else if (ABI == "lpx32") ++ Layout = "m:e-p:32:32-i8:8:32-i16:16:32-i64:64-n32:64-S128"; ++ else if (ABI == "lp64") ++ Layout = "m:e-i8:8:32-i16:16:32-i64:64-n32:64-S128"; ++ else ++ llvm_unreachable("Invalid ABI"); ++ ++ resetDataLayout(("e-" + Layout).str()); ++ } ++ ++ static const Builtin::Info BuiltinInfo[]; ++ std::string CPU; ++ bool IsSingleFloat; ++ enum LoongArchFloatABI { HardFloat, SoftFloat } FloatABI; ++ bool HasLSX; ++ bool HasLASX; ++ + protected: ++ enum FPModeEnum { FP32, FP64 } FPMode; + std::string ABI; +- std::string CPU; +- bool HasFeatureD; +- bool HasFeatureF; +- bool HasFeatureLSX; +- bool HasFeatureLASX; + + public: + LoongArchTargetInfo(const llvm::Triple &Triple, const TargetOptions &) +- : TargetInfo(Triple) { +- HasFeatureD = false; +- HasFeatureF = false; +- HasFeatureLSX = false; +- HasFeatureLASX = false; +- LongDoubleWidth = 128; +- LongDoubleAlign = 128; +- LongDoubleFormat = &llvm::APFloat::IEEEquad(); +- MCountName = "_mcount"; +- SuitableAlign = 128; +- WCharType = SignedInt; +- WIntType = UnsignedInt; +- } ++ : TargetInfo(Triple), IsSingleFloat(false), FloatABI(HardFloat), ++ HasLSX(false), HasLASX(false), FPMode(FP64) { ++ TheCXXABI.set(TargetCXXABI::GenericLoongArch); + +- bool setCPU(const std::string &Name) override { +- if (!isValidCPUName(Name)) +- return false; +- CPU = Name; +- return true; ++ if (Triple.isLoongArch32()) ++ setABI("lp32"); ++ else if (Triple.getEnvironment() == llvm::Triple::GNUABILPX32) ++ setABI("lpx32"); ++ else ++ setABI("lp64"); ++ ++ if ( ABI == "lp64") ++ CPU = "loongarch64"; + } + +- StringRef getCPU() const { return CPU; } ++ bool processorSupportsGPR64() const; + + StringRef getABI() const override { return ABI; } + +- void getTargetDefines(const LangOptions &Opts, +- MacroBuilder &Builder) const override; +- +- ArrayRef getTargetBuiltins() const override; ++ bool setABI(const std::string &Name) override { ++ if (Name == "lp32") { ++ setLP32ABITypes(); ++ ABI = Name; ++ return true; ++ } + +- BuiltinVaListKind getBuiltinVaListKind() const override { +- return TargetInfo::VoidPtrBuiltinVaList; ++ if (Name == "lpx32") { ++ //setLPX32ABITypes(); ++ //ABI = Name; ++ //return true; ++ //TODO: implement ++ return false; ++ } ++ if (Name == "lp64") { ++ setLP64ABITypes(); ++ ABI = Name; ++ return true; ++ } ++ return false; + } + +- std::string_view getClobbers() const override { return ""; } +- +- ArrayRef getGCCRegNames() const override; ++ void setLP32ABITypes() { ++ Int64Type = SignedLongLong; ++ IntMaxType = Int64Type; ++ LongDoubleFormat = &llvm::APFloat::IEEEdouble(); ++ LongDoubleWidth = LongDoubleAlign = 64; ++ LongWidth = LongAlign = 32; ++ MaxAtomicPromoteWidth = MaxAtomicInlineWidth = 32; ++ PointerWidth = PointerAlign = 32; ++ PtrDiffType = SignedInt; ++ SizeType = UnsignedInt; ++ SuitableAlign = 64; ++ } + +- int getEHDataRegisterNumber(unsigned RegNo) const override { +- if (RegNo == 0) +- return 4; +- if (RegNo == 1) +- return 5; +- return -1; ++ void setLPX32LP64ABITypes() { ++ LongDoubleWidth = LongDoubleAlign = 128; ++ LongDoubleFormat = &llvm::APFloat::IEEEquad(); ++ if (getTriple().isOSFreeBSD()) { ++ LongDoubleWidth = LongDoubleAlign = 64; ++ LongDoubleFormat = &llvm::APFloat::IEEEdouble(); ++ } ++ MaxAtomicPromoteWidth = MaxAtomicInlineWidth = 64; ++ SuitableAlign = 128; + } + +- ArrayRef getGCCRegAliases() const override; ++ void setLP64ABITypes() { ++ setLPX32LP64ABITypes(); ++ if (getTriple().isOSOpenBSD()) { ++ Int64Type = SignedLongLong; ++ } else { ++ Int64Type = SignedLong; ++ } ++ IntMaxType = Int64Type; ++ LongWidth = LongAlign = 64; ++ PointerWidth = PointerAlign = 64; ++ PtrDiffType = SignedLong; ++ SizeType = UnsignedLong; ++ } + +- bool validateAsmConstraint(const char *&Name, +- TargetInfo::ConstraintInfo &Info) const override; +- std::string convertConstraint(const char *&Constraint) const override; ++ void setLPX32ABITypes() { ++ setLPX32LP64ABITypes(); ++ Int64Type = SignedLongLong; ++ IntMaxType = Int64Type; ++ LongWidth = LongAlign = 32; ++ PointerWidth = PointerAlign = 32; ++ PtrDiffType = SignedInt; ++ SizeType = UnsignedInt; ++ } + +- bool hasBitIntType() const override { return true; } ++ bool isValidCPUName(StringRef Name) const override; ++ void fillValidCPUList(SmallVectorImpl &Values) const override; + +- bool handleTargetFeatures(std::vector &Features, +- DiagnosticsEngine &Diags) override; ++ bool setCPU(const std::string &Name) override { ++ CPU = Name; ++ return isValidCPUName(Name); ++ } + ++ const std::string &getCPU() const { return CPU; } + bool + initFeatureMap(llvm::StringMap &Features, DiagnosticsEngine &Diags, + StringRef CPU, +- const std::vector &FeaturesVec) const override; ++ const std::vector &FeaturesVec) const override { ++ return TargetInfo::initFeatureMap(Features, Diags, CPU, FeaturesVec); ++ } ++ ++ void getTargetDefines(const LangOptions &Opts, ++ MacroBuilder &Builder) const override; ++ ++ ArrayRef getTargetBuiltins() const override; + + bool hasFeature(StringRef Feature) const override; + +- bool isValidCPUName(StringRef Name) const override; +- void fillValidCPUList(SmallVectorImpl &Values) const override; +-}; ++ bool hasBitIntType() const override { return true; } + +-class LLVM_LIBRARY_VISIBILITY LoongArch32TargetInfo +- : public LoongArchTargetInfo { +-public: +- LoongArch32TargetInfo(const llvm::Triple &Triple, const TargetOptions &Opts) +- : LoongArchTargetInfo(Triple, Opts) { +- IntPtrType = SignedInt; +- PtrDiffType = SignedInt; +- SizeType = UnsignedInt; +- resetDataLayout("e-m:e-p:32:32-i64:64-n32-S128"); +- // TODO: select appropriate ABI. +- setABI("ilp32d"); ++ BuiltinVaListKind getBuiltinVaListKind() const override { ++ return TargetInfo::VoidPtrBuiltinVaList; + } + +- bool setABI(const std::string &Name) override { +- if (Name == "ilp32d" || Name == "ilp32f" || Name == "ilp32s") { +- ABI = Name; ++ ArrayRef getGCCRegNames() const override { ++ static const char *const GCCRegNames[] = { ++ // CPU register names ++ // Must match second column of GCCRegAliases ++ "$r0", "$r1", "$r2", "$r3", "$r4", "$r5", "$r6", "$r7", "$r8", "$r9", ++ "$r10", "$r11", "$r12", "$r13", "$r14", "$r15", "$r16", "$r17", "$r18", ++ "$r19", "$r20", "$r21", "$r22", "$r23", "$r24", "$r25", "$r26", "$r27", ++ "$r28", "$r29", "$r30", "$r31", ++ // Floating point register names ++ "$f0", "$f1", "$f2", "$f3", "$f4", "$f5", "$f6", "$f7", "$f8", "$f9", ++ "$f10", "$f11", "$f12", "$f13", "$f14", "$f15", "$f16", "$f17", "$f18", ++ "$f19", "$f20", "$f21", "$f22", "$f23", "$f24", "$f25", "$f26", "$f27", ++ "$f28", "$f29", "$f30", "$f31", ++ // condition register names ++ "$fcc0", "$fcc1", "$fcc2", "$fcc3", "$fcc4", "$fcc5", "$fcc6", "$fcc7", ++ // LSX register names ++ "$vr0", "$vr1", "$vr2", "$vr3", "$vr4", "$vr5", "$vr6", "$vr7", "$vr8", ++ "$vr9", "$vr10", "$vr11", "$vr12", "$vr13", "$vr14", "$vr15", "$vr16", ++ "$vr17", "$vr18", "$vr19", "$vr20", "$vr21", "$vr22", "$vr23", "$vr24", ++ "$vr25", "$vr26", "$vr27", "$vr28", "$vr29", "$vr30", "$vr31", ++ // LASX register names ++ "$xr0", "$xr1", "$xr2", "$xr3", "$xr4", "$xr5", "$xr6", "$xr7", "$xr8", ++ "$xr9", "$xr10", "$xr11", "$xr12", "$xr13", "$xr14", "$xr15", "$xr16", ++ "$xr17", "$xr18", "$xr19", "$xr20", "$xr21", "$xr22", "$xr23", "$xr24", ++ "$xr25", "$xr26", "$xr27", "$xr28", "$xr29", "$xr30", "$xr31" ++ ++ }; ++ return llvm::ArrayRef(GCCRegNames); ++ } ++ ++ bool validateAsmConstraint(const char *&Name, ++ TargetInfo::ConstraintInfo &Info) const override { ++ switch (*Name) { ++ default: ++ return false; ++ case 'r': // CPU registers. ++ case 'f': // floating-point registers. ++ Info.setAllowsRegister(); ++ return true; ++ case 'l': // Signed 16-bit constant ++ case 'I': // Signed 12-bit constant ++ case 'K': // Unsigned 12-bit constant ++ case 'J': // Integer 0 ++ case 'G': // Floating-point 0 + return true; ++ case 'm': // Memory address with 12-bit offset ++ case 'R': // An address that can be used in a non-macro load or store ++ Info.setAllowsMemory(); ++ return true; ++ case 'Z': ++ if (Name[1] == 'C' // Memory address with 16-bit and 4 bytes aligned offset ++ || Name[1] == 'B' ) { // Memory address with 0 offset ++ Info.setAllowsMemory(); ++ Name++; // Skip over 'Z'. ++ return true; ++ } ++ return false; + } +- return false; + } +- void setMaxAtomicWidth() override { +- MaxAtomicPromoteWidth = MaxAtomicInlineWidth = 32; ++ ++ std::string convertConstraint(const char *&Constraint) const override { ++ std::string R; ++ switch (*Constraint) { ++ case 'Z': // Two-character constraint; add "^" hint for later parsing. ++ if (Constraint[1] == 'C' || Constraint[1] == 'B') { ++ R = std::string("^") + std::string(Constraint, 2); ++ Constraint++; ++ return R; ++ } ++ break; ++ } ++ return TargetInfo::convertConstraint(Constraint); + } +-}; + +-class LLVM_LIBRARY_VISIBILITY LoongArch64TargetInfo +- : public LoongArchTargetInfo { +-public: +- LoongArch64TargetInfo(const llvm::Triple &Triple, const TargetOptions &Opts) +- : LoongArchTargetInfo(Triple, Opts) { +- LongWidth = LongAlign = PointerWidth = PointerAlign = 64; +- IntMaxType = Int64Type = SignedLong; +- resetDataLayout("e-m:e-p:64:64-i64:64-i128:128-n64-S128"); +- // TODO: select appropriate ABI. +- setABI("lp64d"); ++ std::string_view getClobbers() const override { ++#if 0 ++ // In GCC, $1 is not widely used in generated code (it's used only in a few ++ // specific situations), so there is no real need for users to add it to ++ // the clobbers list if they want to use it in their inline assembly code. ++ // ++ // In LLVM, $1 is treated as a normal GPR and is always allocatable during ++ // code generation, so using it in inline assembly without adding it to the ++ // clobbers list can cause conflicts between the inline assembly code and ++ // the surrounding generated code. ++ // ++ // Another problem is that LLVM is allowed to choose $1 for inline assembly ++ // operands, which will conflict with the ".set at" assembler option (which ++ // we use only for inline assembly, in order to maintain compatibility with ++ // GCC) and will also conflict with the user's usage of $1. ++ // ++ // The easiest way to avoid these conflicts and keep $1 as an allocatable ++ // register for generated code is to automatically clobber $1 for all inline ++ // assembly code. ++ // ++ // FIXME: We should automatically clobber $1 only for inline assembly code ++ // which actually uses it. This would allow LLVM to use $1 for inline ++ // assembly operands if the user's assembly code doesn't use it. ++ return "~{$1}"; ++#endif ++ return ""; + } + +- bool setABI(const std::string &Name) override { +- if (Name == "lp64d" || Name == "lp64f" || Name == "lp64s") { +- ABI = Name; +- return true; ++ bool handleTargetFeatures(std::vector &Features, ++ DiagnosticsEngine &Diags) override { ++ IsSingleFloat = false; ++ FloatABI = HardFloat; ++ FPMode = FP64; ++ ++ for (const auto &Feature : Features) { ++ if (Feature == "+single-float") ++ IsSingleFloat = true; ++ else if (Feature == "+soft-float") ++ FloatABI = SoftFloat; ++ else if (Feature == "+lsx") ++ HasLSX = true; ++ else if (Feature == "+lasx") { ++ HasLASX = true; ++ HasLSX = true; ++ } else if (Feature == "+fp64") ++ FPMode = FP64; ++ else if (Feature == "-fp64") ++ FPMode = FP32; + } +- return false; ++ ++ setDataLayout(); ++ ++ return true; + } +- void setMaxAtomicWidth() override { +- MaxAtomicPromoteWidth = MaxAtomicInlineWidth = 64; ++ ++ int getEHDataRegisterNumber(unsigned RegNo) const override { ++ if (RegNo == 0) ++ return 4; ++ if (RegNo == 1) ++ return 5; ++ return -1; ++ } ++ ++ bool isCLZForZeroUndef() const override { return false; } ++ ++ ArrayRef getGCCRegAliases() const override { ++ static const TargetInfo::GCCRegAlias GCCRegAliases[] = { ++ {{"zero", "$zero", "r0", "$0"}, "$r0"}, ++ {{"ra", "$ra", "r1", "$1"}, "$r1"}, ++ {{"tp", "$tp", "r2", "$2"}, "$r2"}, ++ {{"sp", "$sp", "r3", "$3"}, "$r3"}, ++ {{"a0", "$a0", "r4", "$4", "v0"}, "$r4"}, ++ {{"a1", "$a1", "r5", "$5", "v1"}, "$r5"}, ++ {{"a2", "$a2", "r6", "$6"}, "$r6"}, ++ {{"a3", "$a3", "r7", "$7"}, "$r7"}, ++ {{"a4", "$a4", "r8", "$8"}, "$r8"}, ++ {{"a5", "$a5", "r9", "$9"}, "$r9"}, ++ {{"a6", "$a6", "r10", "$10"}, "$r10"}, ++ {{"a7", "$a7", "r11", "$11"}, "$r11"}, ++ {{"t0", "$t0", "r12", "$12"}, "$r12"}, ++ {{"t1", "$t1", "r13", "$13"}, "$r13"}, ++ {{"t2", "$t2", "r14", "$14"}, "$r14"}, ++ {{"t3", "$t3", "r15", "$15"}, "$r15"}, ++ {{"t4", "$t4", "r16", "$16"}, "$r16"}, ++ {{"t5", "$t5", "r17", "$17"}, "$r17"}, ++ {{"t6", "$t6", "r18", "$18"}, "$r18"}, ++ {{"t7", "$t7", "r19", "$19"}, "$r19"}, ++ {{"t8", "$t8", "r20", "$20"}, "$r20"}, ++ //{{"x", "$x", "r21", "$21"}, "$r21"}, ++ {{"fp", "$fp", "r22", "$22"}, "$r22"}, ++ {{"s0", "$s0", "r23", "$23"}, "$r23"}, ++ {{"s1", "$s1", "r24", "$24"}, "$r24"}, ++ {{"s2", "$s2", "r25", "$25"}, "$r25"}, ++ {{"s3", "$s3", "r26", "$26"}, "$r26"}, ++ {{"s4", "$s4", "r27", "$27"}, "$r27"}, ++ {{"s5", "$s5", "r28", "$28"}, "$r28"}, ++ {{"s6", "$s6", "r29", "$29"}, "$r29"}, ++ {{"s7", "$s7", "r30", "$30"}, "$r30"}, ++ {{"s8", "$s8", "r31", "$31"}, "$r31"}, ++ {{"fa0", "$fa0", "f0"}, "$f0"}, ++ {{"fa1", "$fa1", "f1"}, "$f1"}, ++ {{"fa2", "$fa2", "f2"}, "$f2"}, ++ {{"fa3", "$fa3", "f3"}, "$f3"}, ++ {{"fa4", "$fa4", "f4"}, "$f4"}, ++ {{"fa5", "$fa5", "f5"}, "$f5"}, ++ {{"fa6", "$fa6", "f6"}, "$f6"}, ++ {{"fa7", "$fa7", "f7"}, "$f7"}, ++ {{"ft0", "$ft0", "f8"}, "$f8"}, ++ {{"ft1", "$ft1", "f9"}, "$f9"}, ++ {{"ft2", "$ft2", "f10"}, "$f10"}, ++ {{"ft3", "$ft3", "f11"}, "$f11"}, ++ {{"ft4", "$ft4", "f12"}, "$f12"}, ++ {{"ft5", "$ft5", "f13"}, "$f13"}, ++ {{"ft6", "$ft6", "f14"}, "$f14"}, ++ {{"ft7", "$ft7", "f15"}, "$f15"}, ++ {{"ft8", "$ft8", "f16"}, "$f16"}, ++ {{"ft9", "$ft9", "f17"}, "$f17"}, ++ {{"ft10", "$ft10", "f18"}, "$f18"}, ++ {{"ft11", "$ft11", "f19"}, "$f19"}, ++ {{"ft12", "$ft12", "f20"}, "$f20"}, ++ {{"ft13", "$ft13", "f21"}, "$f21"}, ++ {{"ft14", "$ft14", "f22"}, "$f22"}, ++ {{"ft15", "$ft15", "f23"}, "$f23"}, ++ {{"fs0", "$fs0", "f24"}, "$f24"}, ++ {{"fs1", "$fs1", "f25"}, "$f25"}, ++ {{"fs2", "$fs2", "f26"}, "$f26"}, ++ {{"fs3", "$fs3", "f27"}, "$f27"}, ++ {{"fs4", "$fs4", "f28"}, "$f28"}, ++ {{"fs5", "$fs5", "f29"}, "$f29"}, ++ {{"fs6", "$fs6", "f30"}, "$f30"}, ++ {{"fs7", "$fs7", "f31"}, "$f31"}, ++ }; ++ return llvm::ArrayRef(GCCRegAliases); + } ++ ++ bool hasInt128Type() const override { ++ return (ABI == "lpx32" || ABI == "lp64") || getTargetOpts().ForceEnableInt128; ++ } ++ ++ bool validateTarget(DiagnosticsEngine &Diags) const override; + }; +-} // end namespace targets +-} // end namespace clang ++} // namespace targets ++} // namespace clang + + #endif // LLVM_CLANG_LIB_BASIC_TARGETS_LOONGARCH_H +diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp +index eb13cd40e..8afab1265 100644 +--- a/clang/lib/CodeGen/CodeGenModule.cpp ++++ b/clang/lib/CodeGen/CodeGenModule.cpp +@@ -91,6 +91,7 @@ static CGCXXABI *createCXXABI(CodeGenModule &CGM) { + case TargetCXXABI::GenericARM: + case TargetCXXABI::iOS: + case TargetCXXABI::WatchOS: ++ case TargetCXXABI::GenericLoongArch: + case TargetCXXABI::GenericMIPS: + case TargetCXXABI::GenericItanium: + case TargetCXXABI::WebAssembly: +@@ -306,17 +307,8 @@ createTargetCodeGenInfo(CodeGenModule &CGM) { + case llvm::Triple::bpfeb: + case llvm::Triple::bpfel: + return createBPFTargetCodeGenInfo(CGM); +- case llvm::Triple::loongarch32: +- case llvm::Triple::loongarch64: { +- StringRef ABIStr = Target.getABI(); +- unsigned ABIFRLen = 0; +- if (ABIStr.ends_with("f")) +- ABIFRLen = 32; +- else if (ABIStr.ends_with("d")) +- ABIFRLen = 64; +- return createLoongArchTargetCodeGenInfo( +- CGM, Target.getPointerWidth(LangAS::Default), ABIFRLen); +- } ++ case llvm::Triple::loongarch64: ++ return createLoongArchTargetCodeGenInfo(CGM, 64, 64); + } + } + +@@ -4909,10 +4901,6 @@ CodeGenModule::GetOrCreateLLVMGlobal(StringRef MangledName, llvm::Type *Ty, + isExternallyVisible(D->getLinkageAndVisibility().getLinkage())) + GV->setSection(".cp.rodata"); + +- // Handle code model attribute +- if (const auto *CMA = D->getAttr()) +- GV->setCodeModel(CMA->getModel()); +- + // Check if we a have a const declaration with an initializer, we may be + // able to emit it as available_externally to expose it's value to the + // optimizer. +diff --git a/clang/lib/CodeGen/ItaniumCXXABI.cpp b/clang/lib/CodeGen/ItaniumCXXABI.cpp +index d173806ec..50f6f1c96 100644 +--- a/clang/lib/CodeGen/ItaniumCXXABI.cpp ++++ b/clang/lib/CodeGen/ItaniumCXXABI.cpp +@@ -567,6 +567,9 @@ CodeGen::CGCXXABI *CodeGen::CreateItaniumCXXABI(CodeGenModule &CGM) { + return new ItaniumCXXABI(CGM, /*UseARMMethodPtrABI=*/true, + /*UseARMGuardVarABI=*/true); + ++ case TargetCXXABI::GenericLoongArch: ++ return new ItaniumCXXABI(CGM, /*UseARMMethodPtrABI=*/true); ++ + case TargetCXXABI::GenericMIPS: + return new ItaniumCXXABI(CGM, /*UseARMMethodPtrABI=*/true); + +diff --git a/clang/lib/CodeGen/Targets/LoongArch.cpp b/clang/lib/CodeGen/Targets/LoongArch.cpp +index 63b9a1fdb..dad516dec 100644 +--- a/clang/lib/CodeGen/Targets/LoongArch.cpp ++++ b/clang/lib/CodeGen/Targets/LoongArch.cpp +@@ -12,9 +12,8 @@ + using namespace clang; + using namespace clang::CodeGen; + +-// LoongArch ABI Implementation. Documented at +-// https://loongson.github.io/LoongArch-Documentation/LoongArch-ELF-ABI-EN.html +-// ++//===----------------------------------------------------------------------===// ++// LoongArch ABI Implementation. + //===----------------------------------------------------------------------===// + + namespace { +@@ -22,13 +21,12 @@ class LoongArchABIInfo : public DefaultABIInfo { + private: + // Size of the integer ('r') registers in bits. + unsigned GRLen; +- // Size of the floating point ('f') registers in bits. ++ // Size of the floating point ('f') registers in bits. Note that the target ++ // ISA might have a wider FRLen than the selected ABI. + unsigned FRLen; +- // Number of general-purpose argument registers. +- static const int NumGARs = 8; +- // Number of floating-point argument registers. +- static const int NumFARs = 8; +- bool detectFARsEligibleStructHelper(QualType Ty, CharUnits CurOff, ++ static const int NumArgGPRs = 8; ++ static const int NumArgFPRs = 8; ++ bool detectFPCCEligibleStructHelper(QualType Ty, CharUnits CurOff, + llvm::Type *&Field1Ty, + CharUnits &Field1Off, + llvm::Type *&Field2Ty, +@@ -38,22 +36,31 @@ public: + LoongArchABIInfo(CodeGen::CodeGenTypes &CGT, unsigned GRLen, unsigned FRLen) + : DefaultABIInfo(CGT), GRLen(GRLen), FRLen(FRLen) {} + ++ // DefaultABIInfo's classifyReturnType and classifyArgumentType are ++ // non-virtual, but computeInfo is virtual, so we overload it. + void computeInfo(CGFunctionInfo &FI) const override; + +- ABIArgInfo classifyArgumentType(QualType Ty, bool IsFixed, int &GARsLeft, +- int &FARsLeft) const; ++ ABIArgInfo classifyArgumentType(QualType Ty, bool IsFixed, int &ArgGPRsLeft, ++ int &ArgFPRsLeft) const; + ABIArgInfo classifyReturnType(QualType RetTy) const; + ++ uint64_t MinABIStackAlignInBytes = 8; ++ uint64_t StackAlignInBytes = 16; ++ llvm::Type* HandleAggregates(QualType Ty, uint64_t TySize) const; ++ llvm::Type* getPaddingType(uint64_t Align, uint64_t Offset) const; ++ void CoerceToIntArgs(uint64_t TySize, ++ SmallVectorImpl &ArgList) const; ++ + Address EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, + QualType Ty) const override; + + ABIArgInfo extendType(QualType Ty) const; + +- bool detectFARsEligibleStruct(QualType Ty, llvm::Type *&Field1Ty, ++ bool detectFPCCEligibleStruct(QualType Ty, llvm::Type *&Field1Ty, + CharUnits &Field1Off, llvm::Type *&Field2Ty, + CharUnits &Field2Off, int &NeededArgGPRs, + int &NeededArgFPRs) const; +- ABIArgInfo coerceAndExpandFARsEligibleStruct(llvm::Type *Field1Ty, ++ ABIArgInfo coerceAndExpandFPCCEligibleStruct(llvm::Type *Field1Ty, + CharUnits Field1Off, + llvm::Type *Field2Ty, + CharUnits Field2Off) const; +@@ -66,43 +73,37 @@ void LoongArchABIInfo::computeInfo(CGFunctionInfo &FI) const { + FI.getReturnInfo() = classifyReturnType(RetTy); + + // IsRetIndirect is true if classifyArgumentType indicated the value should +- // be passed indirect, or if the type size is a scalar greater than 2*GRLen +- // and not a complex type with elements <= FRLen. e.g. fp128 is passed direct +- // in LLVM IR, relying on the backend lowering code to rewrite the argument +- // list and pass indirectly on LA32. +- bool IsRetIndirect = FI.getReturnInfo().getKind() == ABIArgInfo::Indirect; +- if (!IsRetIndirect && RetTy->isScalarType() && +- getContext().getTypeSize(RetTy) > (2 * GRLen)) { +- if (RetTy->isComplexType() && FRLen) { +- QualType EltTy = RetTy->castAs()->getElementType(); +- IsRetIndirect = getContext().getTypeSize(EltTy) > FRLen; +- } else { +- // This is a normal scalar > 2*GRLen, such as fp128 on LA32. +- IsRetIndirect = true; +- } +- } +- +- // We must track the number of GARs and FARs used in order to conform to the +- // LoongArch ABI. As GAR usage is different for variadic arguments, we must +- // also track whether we are examining a vararg or not. +- int GARsLeft = IsRetIndirect ? NumGARs - 1 : NumGARs; +- int FARsLeft = FRLen ? NumFARs : 0; ++ // be passed indirect or if the type size is greater than 2*grlen. ++ bool IsRetIndirect = FI.getReturnInfo().getKind() == ABIArgInfo::Indirect || ++ getContext().getTypeSize(RetTy) > (2 * GRLen); ++ ++ // We must track the number of GPRs used in order to conform to the LoongArch ++ // ABI, as integer scalars passed in registers should have signext/zeroext ++ // when promoted, but are anyext if passed on the stack. As GPR usage is ++ // different for variadic arguments, we must also track whether we are ++ // examining a vararg or not. ++ int ArgGPRsLeft = IsRetIndirect ? NumArgGPRs - 1 : NumArgGPRs; ++ int ArgFPRsLeft = FRLen ? NumArgFPRs : 0; + int NumFixedArgs = FI.getNumRequiredArgs(); + + int ArgNum = 0; + for (auto &ArgInfo : FI.arguments()) { +- ArgInfo.info = classifyArgumentType( +- ArgInfo.type, /*IsFixed=*/ArgNum < NumFixedArgs, GARsLeft, FARsLeft); ++ bool IsFixed = ArgNum < NumFixedArgs; ++ ArgInfo.info = ++ classifyArgumentType(ArgInfo.type, IsFixed, ArgGPRsLeft, ArgFPRsLeft); + ArgNum++; + } + } + +-// Returns true if the struct is a potential candidate to be passed in FARs (and +-// GARs). If this function returns true, the caller is responsible for checking +-// that if there is only a single field then that field is a float. +-bool LoongArchABIInfo::detectFARsEligibleStructHelper( +- QualType Ty, CharUnits CurOff, llvm::Type *&Field1Ty, CharUnits &Field1Off, +- llvm::Type *&Field2Ty, CharUnits &Field2Off) const { ++// Returns true if the struct is a potential candidate for the floating point ++// calling convention. If this function returns true, the caller is ++// responsible for checking that if there is only a single field then that ++// field is a float. ++bool LoongArchABIInfo::detectFPCCEligibleStructHelper(QualType Ty, CharUnits CurOff, ++ llvm::Type *&Field1Ty, ++ CharUnits &Field1Off, ++ llvm::Type *&Field2Ty, ++ CharUnits &Field2Off) const { + bool IsInt = Ty->isIntegralOrEnumerationType(); + bool IsFloat = Ty->isRealFloatingType(); + +@@ -157,8 +158,9 @@ bool LoongArchABIInfo::detectFARsEligibleStructHelper( + } + CharUnits EltSize = getContext().getTypeSizeInChars(EltTy); + for (uint64_t i = 0; i < ArraySize; ++i) { +- if (!detectFARsEligibleStructHelper(EltTy, CurOff, Field1Ty, Field1Off, +- Field2Ty, Field2Off)) ++ bool Ret = detectFPCCEligibleStructHelper(EltTy, CurOff, Field1Ty, ++ Field1Off, Field2Ty, Field2Off); ++ if (!Ret) + return false; + CurOff += EltSize; + } +@@ -183,31 +185,40 @@ bool LoongArchABIInfo::detectFARsEligibleStructHelper( + for (const CXXBaseSpecifier &B : CXXRD->bases()) { + const auto *BDecl = + cast(B.getType()->castAs()->getDecl()); +- if (!detectFARsEligibleStructHelper( +- B.getType(), CurOff + Layout.getBaseClassOffset(BDecl), +- Field1Ty, Field1Off, Field2Ty, Field2Off)) ++ CharUnits BaseOff = Layout.getBaseClassOffset(BDecl); ++ bool Ret = detectFPCCEligibleStructHelper(B.getType(), CurOff + BaseOff, ++ Field1Ty, Field1Off, Field2Ty, ++ Field2Off); ++ if (!Ret) + return false; + } + } ++ int ZeroWidthBitFieldCount = 0; + for (const FieldDecl *FD : RD->fields()) { ++ uint64_t FieldOffInBits = Layout.getFieldOffset(FD->getFieldIndex()); + QualType QTy = FD->getType(); + if (FD->isBitField()) { + unsigned BitWidth = FD->getBitWidthValue(getContext()); +- // Zero-width bitfields are ignored. +- if (BitWidth == 0) +- continue; + // Allow a bitfield with a type greater than GRLen as long as the + // bitwidth is GRLen or less. +- if (getContext().getTypeSize(QTy) > GRLen && BitWidth <= GRLen) { ++ if (getContext().getTypeSize(QTy) > GRLen && BitWidth <= GRLen) + QTy = getContext().getIntTypeForBitwidth(GRLen, false); ++ if (BitWidth == 0) { ++ ZeroWidthBitFieldCount++; ++ continue; + } + } + +- if (!detectFARsEligibleStructHelper( +- QTy, +- CurOff + getContext().toCharUnitsFromBits( +- Layout.getFieldOffset(FD->getFieldIndex())), +- Field1Ty, Field1Off, Field2Ty, Field2Off)) ++ bool Ret = detectFPCCEligibleStructHelper( ++ QTy, CurOff + getContext().toCharUnitsFromBits(FieldOffInBits), ++ Field1Ty, Field1Off, Field2Ty, Field2Off); ++ if (!Ret) ++ return false; ++ ++ // As a quirk of the ABI, zero-width bitfields aren't ignored for fp+fp ++ // or int+fp structs, but are ignored for a struct with an fp field and ++ // any number of zero-width bitfields. ++ if (Field2Ty && ZeroWidthBitFieldCount > 0) + return false; + } + return Field1Ty != nullptr; +@@ -216,19 +227,22 @@ bool LoongArchABIInfo::detectFARsEligibleStructHelper( + return false; + } + +-// Determine if a struct is eligible to be passed in FARs (and GARs) (i.e., when +-// flattened it contains a single fp value, fp+fp, or int+fp of appropriate +-// size). If so, NeededFARs and NeededGARs are incremented appropriately. +-bool LoongArchABIInfo::detectFARsEligibleStruct( +- QualType Ty, llvm::Type *&Field1Ty, CharUnits &Field1Off, +- llvm::Type *&Field2Ty, CharUnits &Field2Off, int &NeededGARs, +- int &NeededFARs) const { ++// Determine if a struct is eligible for passing according to the floating ++// point calling convention (i.e., when flattened it contains a single fp ++// value, fp+fp, or int+fp of appropriate size). If so, NeededArgFPRs and ++// NeededArgGPRs are incremented appropriately. ++bool LoongArchABIInfo::detectFPCCEligibleStruct(QualType Ty, llvm::Type *&Field1Ty, ++ CharUnits &Field1Off, ++ llvm::Type *&Field2Ty, ++ CharUnits &Field2Off, ++ int &NeededArgGPRs, ++ int &NeededArgFPRs) const { + Field1Ty = nullptr; + Field2Ty = nullptr; +- NeededGARs = 0; +- NeededFARs = 0; +- if (!detectFARsEligibleStructHelper(Ty, CharUnits::Zero(), Field1Ty, +- Field1Off, Field2Ty, Field2Off)) ++ NeededArgGPRs = 0; ++ NeededArgFPRs = 0; ++ if (!detectFPCCEligibleStructHelper( ++ Ty, CharUnits::Zero(), Field1Ty, Field1Off, Field2Ty, Field2Off)) + return false; + if (!Field1Ty) + return false; +@@ -236,20 +250,20 @@ bool LoongArchABIInfo::detectFARsEligibleStruct( + if (Field1Ty && !Field2Ty && !Field1Ty->isFloatingPointTy()) + return false; + if (Field1Ty && Field1Ty->isFloatingPointTy()) +- NeededFARs++; ++ NeededArgFPRs++; + else if (Field1Ty) +- NeededGARs++; ++ NeededArgGPRs++; + if (Field2Ty && Field2Ty->isFloatingPointTy()) +- NeededFARs++; ++ NeededArgFPRs++; + else if (Field2Ty) +- NeededGARs++; ++ NeededArgGPRs++; + return true; + } + + // Call getCoerceAndExpand for the two-element flattened struct described by + // Field1Ty, Field1Off, Field2Ty, Field2Off. This method will create an + // appropriate coerceToType and unpaddedCoerceToType. +-ABIArgInfo LoongArchABIInfo::coerceAndExpandFARsEligibleStruct( ++ABIArgInfo LoongArchABIInfo::coerceAndExpandFPCCEligibleStruct( + llvm::Type *Field1Ty, CharUnits Field1Off, llvm::Type *Field2Ty, + CharUnits Field2Off) const { + SmallVector CoerceElts; +@@ -289,99 +303,195 @@ ABIArgInfo LoongArchABIInfo::coerceAndExpandFARsEligibleStruct( + CoerceElts.push_back(Field2Ty); + UnpaddedCoerceElts.push_back(Field2Ty); + +- return ABIArgInfo::getCoerceAndExpand( +- llvm::StructType::get(getVMContext(), CoerceElts, IsPacked), +- llvm::StructType::get(getVMContext(), UnpaddedCoerceElts, IsPacked)); ++ auto CoerceToType = ++ llvm::StructType::get(getVMContext(), CoerceElts, IsPacked); ++ auto UnpaddedCoerceToType = ++ llvm::StructType::get(getVMContext(), UnpaddedCoerceElts, IsPacked); ++ ++ return ABIArgInfo::getCoerceAndExpand(CoerceToType, UnpaddedCoerceToType); ++} ++ ++void LoongArchABIInfo::CoerceToIntArgs( ++ uint64_t TySize, SmallVectorImpl &ArgList) const { ++ llvm::IntegerType *IntTy = ++ llvm::IntegerType::get(getVMContext(), MinABIStackAlignInBytes * 8); ++ ++ // Add (TySize / MinABIStackAlignInBytes) args of IntTy. ++ for (unsigned N = TySize / (MinABIStackAlignInBytes * 8); N; --N) ++ ArgList.push_back(IntTy); ++ ++ // If necessary, add one more integer type to ArgList. ++ unsigned R = TySize % (MinABIStackAlignInBytes * 8); ++ ++ if (R) ++ ArgList.push_back(llvm::IntegerType::get(getVMContext(), R)); ++} ++ ++llvm::Type* LoongArchABIInfo::HandleAggregates(QualType Ty, uint64_t TySize) const { ++ SmallVector ArgList, IntArgList; ++ ++ if (Ty->isComplexType()) ++ return CGT.ConvertType(Ty); ++ ++ const RecordType *RT = Ty->getAs(); ++ ++ // Unions/vectors are passed in integer registers. ++ if (!RT || !RT->isStructureOrClassType()) { ++ CoerceToIntArgs(TySize, ArgList); ++ return llvm::StructType::get(getVMContext(), ArgList); ++ } ++ ++ const RecordDecl *RD = RT->getDecl(); ++ const ASTRecordLayout &Layout = getContext().getASTRecordLayout(RD); ++ assert(!(TySize % 8) && "Size of structure must be multiple of 8."); ++ ++ uint64_t LastOffset = 0; ++ unsigned idx = 0; ++ llvm::IntegerType *I64 = llvm::IntegerType::get(getVMContext(), 64); ++ ++ // Iterate over fields in the struct/class and check if there are any aligned ++ // double fields. ++ for (RecordDecl::field_iterator i = RD->field_begin(), e = RD->field_end(); ++ i != e; ++i, ++idx) { ++ const QualType Ty = i->getType(); ++ const BuiltinType *BT = Ty->getAs(); ++ ++ if (!BT || BT->getKind() != BuiltinType::Double) ++ continue; ++ ++ uint64_t Offset = Layout.getFieldOffset(idx); ++ if (Offset % 64) // Ignore doubles that are not aligned. ++ continue; ++ ++ // Add ((Offset - LastOffset) / 64) args of type i64. ++ for (unsigned j = (Offset - LastOffset) / 64; j > 0; --j) ++ ArgList.push_back(I64); ++ ++ // Add double type. ++ ArgList.push_back(llvm::Type::getDoubleTy(getVMContext())); ++ LastOffset = Offset + 64; ++ } ++ ++ CoerceToIntArgs(TySize - LastOffset, IntArgList); ++ ArgList.append(IntArgList.begin(), IntArgList.end()); ++ ++ return llvm::StructType::get(getVMContext(), ArgList); ++} ++ ++llvm::Type * LoongArchABIInfo::getPaddingType(uint64_t OrigOffset, ++ uint64_t Offset) const { ++ if (OrigOffset + MinABIStackAlignInBytes > Offset) ++ return nullptr; ++ ++ return llvm::IntegerType::get(getVMContext(), (Offset - OrigOffset) * 8); + } + + ABIArgInfo LoongArchABIInfo::classifyArgumentType(QualType Ty, bool IsFixed, +- int &GARsLeft, +- int &FARsLeft) const { +- assert(GARsLeft <= NumGARs && "GAR tracking underflow"); ++ int &ArgGPRsLeft, ++ int &ArgFPRsLeft) const { ++ assert(ArgGPRsLeft <= NumArgGPRs && "Arg GPR tracking underflow"); + Ty = useFirstFieldIfTransparentUnion(Ty); + + // Structures with either a non-trivial destructor or a non-trivial + // copy constructor are always passed indirectly. + if (CGCXXABI::RecordArgABI RAA = getRecordArgABI(Ty, getCXXABI())) { +- if (GARsLeft) +- GARsLeft -= 1; ++ if (ArgGPRsLeft) ++ ArgGPRsLeft -= 1; + return getNaturalAlignIndirect(Ty, /*ByVal=*/RAA == + CGCXXABI::RAA_DirectInMemory); + } + + uint64_t Size = getContext().getTypeSize(Ty); +- + // Ignore empty struct or union whose size is zero, e.g. `struct { }` in C or + // `struct { int a[0]; }` in C++. In C++, `struct { }` is empty but it's size + // is 1 byte and g++ doesn't ignore it; clang++ matches this behaviour. + if (isEmptyRecord(getContext(), Ty, true) && Size == 0) + return ABIArgInfo::getIgnore(); + +- // Pass floating point values via FARs if possible. ++ // Pass floating point values via FPRs if possible. + if (IsFixed && Ty->isFloatingType() && !Ty->isComplexType() && +- FRLen >= Size && FARsLeft) { +- FARsLeft--; ++ FRLen >= Size && ArgFPRsLeft) { ++ ArgFPRsLeft--; + return ABIArgInfo::getDirect(); + } + +- // Complex types for the *f or *d ABI must be passed directly rather than ++ // Complex types for the hard float ABI must be passed direct rather than + // using CoerceAndExpand. +- if (IsFixed && Ty->isComplexType() && FRLen && FARsLeft >= 2) { +- QualType EltTy = Ty->castAs()->getElementType(); ++ if (IsFixed && Ty->isComplexType() && FRLen && ArgFPRsLeft >= 2) { ++ QualType EltTy = Ty->getAs()->getElementType(); + if (getContext().getTypeSize(EltTy) <= FRLen) { +- FARsLeft -= 2; ++ ArgFPRsLeft -= 2; + return ABIArgInfo::getDirect(); + } + } + ++ if (Ty->isVectorType() && (((getContext().getTypeSize(Ty) == 128) && ++ (getTarget().hasFeature("lsx"))) || ++ ((getContext().getTypeSize(Ty) == 256) && ++ getTarget().hasFeature("lasx")))) ++ return ABIArgInfo::getDirect(); ++ + if (IsFixed && FRLen && Ty->isStructureOrClassType()) { + llvm::Type *Field1Ty = nullptr; + llvm::Type *Field2Ty = nullptr; + CharUnits Field1Off = CharUnits::Zero(); + CharUnits Field2Off = CharUnits::Zero(); +- int NeededGARs = 0; +- int NeededFARs = 0; +- bool IsCandidate = detectFARsEligibleStruct( +- Ty, Field1Ty, Field1Off, Field2Ty, Field2Off, NeededGARs, NeededFARs); +- if (IsCandidate && NeededGARs <= GARsLeft && NeededFARs <= FARsLeft) { +- GARsLeft -= NeededGARs; +- FARsLeft -= NeededFARs; +- return coerceAndExpandFARsEligibleStruct(Field1Ty, Field1Off, Field2Ty, ++ int NeededArgGPRs = 0; ++ int NeededArgFPRs = 0; ++ bool IsCandidate = ++ detectFPCCEligibleStruct(Ty, Field1Ty, Field1Off, Field2Ty, Field2Off, ++ NeededArgGPRs, NeededArgFPRs); ++ if (IsCandidate && NeededArgGPRs <= ArgGPRsLeft && ++ NeededArgFPRs <= ArgFPRsLeft) { ++ ArgGPRsLeft -= NeededArgGPRs; ++ ArgFPRsLeft -= NeededArgFPRs; ++ return coerceAndExpandFPCCEligibleStruct(Field1Ty, Field1Off, Field2Ty, + Field2Off); + } ++ } else if (Ty->isStructureOrClassType() && Size == 128 && ++ isAggregateTypeForABI(Ty)) { ++ uint64_t Offset = 8; ++ uint64_t OrigOffset = Offset; ++ uint64_t TySize = getContext().getTypeSize(Ty); ++ uint64_t Align = getContext().getTypeAlign(Ty) / 8; ++ ++ Align = std::min(std::max(Align, (uint64_t)MinABIStackAlignInBytes), ++ (uint64_t)StackAlignInBytes); ++ unsigned CurrOffset = llvm::alignTo(Offset, Align); ++ Offset = CurrOffset + llvm::alignTo(TySize, Align * 8) / 8; ++ ++ ABIArgInfo ArgInfo = ++ ABIArgInfo::getDirect(HandleAggregates(Ty, TySize), 0, ++ getPaddingType(OrigOffset, CurrOffset)); ++ ArgInfo.setInReg(true); ++ return ArgInfo; + } + + uint64_t NeededAlign = getContext().getTypeAlign(Ty); +- // Determine the number of GARs needed to pass the current argument ++ // Determine the number of GPRs needed to pass the current argument + // according to the ABI. 2*GRLen-aligned varargs are passed in "aligned" + // register pairs, so may consume 3 registers. +- int NeededGARs = 1; ++ int NeededArgGPRs = 1; + if (!IsFixed && NeededAlign == 2 * GRLen) +- NeededGARs = 2 + (GARsLeft % 2); ++ NeededArgGPRs = 2 + (ArgGPRsLeft % 2); + else if (Size > GRLen && Size <= 2 * GRLen) +- NeededGARs = 2; ++ NeededArgGPRs = 2; + +- if (NeededGARs > GARsLeft) +- NeededGARs = GARsLeft; ++ if (NeededArgGPRs > ArgGPRsLeft) { ++ NeededArgGPRs = ArgGPRsLeft; ++ } + +- GARsLeft -= NeededGARs; ++ ArgGPRsLeft -= NeededArgGPRs; + + if (!isAggregateTypeForABI(Ty) && !Ty->isVectorType()) { + // Treat an enum type as its underlying type. + if (const EnumType *EnumTy = Ty->getAs()) + Ty = EnumTy->getDecl()->getIntegerType(); + +- // All integral types are promoted to GRLen width. +- if (Size < GRLen && Ty->isIntegralOrEnumerationType()) ++ // All integral types are promoted to GRLen width, unless passed on the ++ // stack. ++ if (Size < GRLen && Ty->isIntegralOrEnumerationType()) { + return extendType(Ty); +- +- if (const auto *EIT = Ty->getAs()) { +- if (EIT->getNumBits() < GRLen) +- return extendType(Ty); +- if (EIT->getNumBits() > 128 || +- (!getContext().getTargetInfo().hasInt128Type() && +- EIT->getNumBits() > 64)) +- return getNaturalAlignIndirect(Ty, /*ByVal=*/false); + } + + return ABIArgInfo::getDirect(); +@@ -390,19 +500,20 @@ ABIArgInfo LoongArchABIInfo::classifyArgumentType(QualType Ty, bool IsFixed, + // Aggregates which are <= 2*GRLen will be passed in registers if possible, + // so coerce to integers. + if (Size <= 2 * GRLen) { ++ unsigned Alignment = getContext().getTypeAlign(Ty); ++ + // Use a single GRLen int if possible, 2*GRLen if 2*GRLen alignment is +- // required, and a 2-element GRLen array if only GRLen alignment is +- // required. ++ // required, and a 2-element GRLen array if only GRLen alignment is required. + if (Size <= GRLen) { + return ABIArgInfo::getDirect( + llvm::IntegerType::get(getVMContext(), GRLen)); +- } +- if (getContext().getTypeAlign(Ty) == 2 * GRLen) { ++ } else if (Alignment == 2 * GRLen) { + return ABIArgInfo::getDirect( + llvm::IntegerType::get(getVMContext(), 2 * GRLen)); ++ } else { ++ return ABIArgInfo::getDirect(llvm::ArrayType::get( ++ llvm::IntegerType::get(getVMContext(), GRLen), 2)); + } +- return ABIArgInfo::getDirect( +- llvm::ArrayType::get(llvm::IntegerType::get(getVMContext(), GRLen), 2)); + } + return getNaturalAlignIndirect(Ty, /*ByVal=*/false); + } +@@ -410,11 +521,14 @@ ABIArgInfo LoongArchABIInfo::classifyArgumentType(QualType Ty, bool IsFixed, + ABIArgInfo LoongArchABIInfo::classifyReturnType(QualType RetTy) const { + if (RetTy->isVoidType()) + return ABIArgInfo::getIgnore(); ++ ++ int ArgGPRsLeft = 2; ++ int ArgFPRsLeft = FRLen ? 2 : 0; ++ + // The rules for return and argument types are the same, so defer to + // classifyArgumentType. +- int GARsLeft = 2; +- int FARsLeft = FRLen ? 2 : 0; +- return classifyArgumentType(RetTy, /*IsFixed=*/true, GARsLeft, FARsLeft); ++ return classifyArgumentType(RetTy, /*IsFixed=*/true, ArgGPRsLeft, ++ ArgFPRsLeft); + } + + Address LoongArchABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, +@@ -422,22 +536,23 @@ Address LoongArchABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, + CharUnits SlotSize = CharUnits::fromQuantity(GRLen / 8); + + // Empty records are ignored for parameter passing purposes. +- if (isEmptyRecord(getContext(), Ty, true)) ++ if (isEmptyRecord(getContext(), Ty, true)) { + return Address(CGF.Builder.CreateLoad(VAListAddr), + CGF.ConvertTypeForMem(Ty), SlotSize); ++ } + + auto TInfo = getContext().getTypeInfoInChars(Ty); + +- // Arguments bigger than 2*GRLen bytes are passed indirectly. +- return emitVoidPtrVAArg(CGF, VAListAddr, Ty, +- /*IsIndirect=*/TInfo.Width > 2 * SlotSize, TInfo, +- SlotSize, +- /*AllowHigherAlign=*/true); ++ // Arguments bigger than 2*GRlen bytes are passed indirectly. ++ bool IsIndirect = TInfo.Width > 2 * SlotSize; ++ ++ return emitVoidPtrVAArg(CGF, VAListAddr, Ty, IsIndirect, TInfo, ++ SlotSize, /*AllowHigherAlign=*/true); + } + + ABIArgInfo LoongArchABIInfo::extendType(QualType Ty) const { + int TySize = getContext().getTypeSize(Ty); +- // LA64 ABI requires unsigned 32 bit integers to be sign extended. ++ // LP64 ABI requires unsigned 32 bit integers to be sign extended. + if (GRLen == 64 && Ty->isUnsignedIntegerOrEnumerationType() && TySize == 32) + return ABIArgInfo::getSignExtend(Ty); + return ABIArgInfo::getExtend(Ty); +@@ -448,10 +563,15 @@ class LoongArchTargetCodeGenInfo : public TargetCodeGenInfo { + public: + LoongArchTargetCodeGenInfo(CodeGen::CodeGenTypes &CGT, unsigned GRLen, + unsigned FRLen) +- : TargetCodeGenInfo( +- std::make_unique(CGT, GRLen, FRLen)) {} ++ : TargetCodeGenInfo(std::make_unique( ++ CGT, GRLen, FRLen)) {} ++ ++ void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV, ++ CodeGen::CodeGenModule &CGM) const override { ++ return; ++ } + }; +-} // namespace ++} // end anonymous namespace + + std::unique_ptr + CodeGen::createLoongArchTargetCodeGenInfo(CodeGenModule &CGM, unsigned GRLen, +diff --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp +index 93cddf742..79907900f 100644 +--- a/clang/lib/Driver/Driver.cpp ++++ b/clang/lib/Driver/Driver.cpp +@@ -641,6 +641,30 @@ static llvm::Triple computeTargetTriple(const Driver &D, + Target.setVendorName("intel"); + } + ++ // If target is LoongArch adjust the target triple ++ // accordingly to provided ABI name. ++ if (Target.isLoongArch()) { ++ if ((A = Args.getLastArg(options::OPT_mabi_EQ))) { ++ StringRef ABIName = A->getValue(); ++ if (ABIName == "lp32") { ++ Target = Target.get32BitArchVariant(); ++ if (Target.getEnvironment() == llvm::Triple::GNUABI64 || ++ Target.getEnvironment() == llvm::Triple::GNUABILPX32) ++ Target.setEnvironment(llvm::Triple::GNU); ++ } else if (ABIName == "lpx32") { ++ Target = Target.get64BitArchVariant(); ++ if (Target.getEnvironment() == llvm::Triple::GNU || ++ Target.getEnvironment() == llvm::Triple::GNUABI64) ++ Target.setEnvironment(llvm::Triple::GNUABILPX32); ++ } else if (ABIName == "lp64") { ++ Target = Target.get64BitArchVariant(); ++ if (Target.getEnvironment() == llvm::Triple::GNU || ++ Target.getEnvironment() == llvm::Triple::GNUABILPX32) ++ Target.setEnvironment(llvm::Triple::GNUABI64); ++ } ++ } ++ } ++ + // If target is MIPS adjust the target triple + // accordingly to provided ABI name. + if (Target.isMIPS()) { +diff --git a/clang/lib/Driver/ToolChains/Arch/LoongArch.cpp b/clang/lib/Driver/ToolChains/Arch/LoongArch.cpp +index 31153a67a..4449c1a77 100644 +--- a/clang/lib/Driver/ToolChains/Arch/LoongArch.cpp ++++ b/clang/lib/Driver/ToolChains/Arch/LoongArch.cpp +@@ -1,4 +1,4 @@ +-//===--- LoongArch.cpp - LoongArch Helpers for Tools ------------*- C++ -*-===// ++//===--- LoongArch.cpp - Tools Implementations -----------------------*- C++ -*-===// + // + // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + // See https://llvm.org/LICENSE.txt for license information. +@@ -8,225 +8,205 @@ + + #include "LoongArch.h" + #include "ToolChains/CommonArgs.h" +-#include "clang/Basic/DiagnosticDriver.h" + #include "clang/Driver/Driver.h" + #include "clang/Driver/DriverDiagnostic.h" + #include "clang/Driver/Options.h" +-#include "llvm/TargetParser/Host.h" +-#include "llvm/TargetParser/LoongArchTargetParser.h" ++#include "llvm/ADT/StringSwitch.h" ++#include "llvm/Option/ArgList.h" + + using namespace clang::driver; + using namespace clang::driver::tools; + using namespace clang; + using namespace llvm::opt; + +-StringRef loongarch::getLoongArchABI(const Driver &D, const ArgList &Args, +- const llvm::Triple &Triple) { +- assert((Triple.getArch() == llvm::Triple::loongarch32 || +- Triple.getArch() == llvm::Triple::loongarch64) && +- "Unexpected triple"); +- bool IsLA32 = Triple.getArch() == llvm::Triple::loongarch32; +- +- // Record -mabi value for later use. +- const Arg *MABIArg = Args.getLastArg(options::OPT_mabi_EQ); +- StringRef MABIValue; +- if (MABIArg) { +- MABIValue = MABIArg->getValue(); ++// Get CPU and ABI names. They are not independent ++// so we have to calculate them together. ++void loongarch::getLoongArchCPUAndABI(const ArgList &Args, const llvm::Triple &Triple, ++ StringRef &CPUName, StringRef &ABIName) { ++ const char *DefLoongArch32CPU = "loongarch32"; ++ const char *DefLoongArch64CPU = "loongarch64"; ++ ++ if (Arg *A = Args.getLastArg(clang::driver::options::OPT_march_EQ, ++ options::OPT_mcpu_EQ)) ++ CPUName = A->getValue(); ++ ++ if (Arg *A = Args.getLastArg(options::OPT_mabi_EQ)) { ++ ABIName = A->getValue(); ++ // Convert a GNU style LoongArch ABI name to the name ++ // accepted by LLVM LoongArch backend. ++ ABIName = llvm::StringSwitch(ABIName) ++ .Case("32", "lp32") ++ .Case("64", "lp64") ++ .Default(ABIName); + } + +- // Parse -mfpu value for later use. +- const Arg *MFPUArg = Args.getLastArg(options::OPT_mfpu_EQ); +- int FPU = -1; +- if (MFPUArg) { +- StringRef V = MFPUArg->getValue(); +- if (V == "64") +- FPU = 64; +- else if (V == "32") +- FPU = 32; +- else if (V == "0" || V == "none") +- FPU = 0; +- else +- D.Diag(diag::err_drv_loongarch_invalid_mfpu_EQ) << V; ++ // Setup default CPU and ABI names. ++ if (CPUName.empty() && ABIName.empty()) { ++ switch (Triple.getArch()) { ++ default: ++ llvm_unreachable("Unexpected triple arch name"); ++ case llvm::Triple::loongarch32: ++ CPUName = DefLoongArch32CPU; ++ break; ++ case llvm::Triple::loongarch64: ++ CPUName = DefLoongArch64CPU; ++ break; ++ } + } + +- // Check -m*-float firstly since they have highest priority. +- if (const Arg *A = Args.getLastArg(options::OPT_mdouble_float, +- options::OPT_msingle_float, +- options::OPT_msoft_float)) { +- StringRef ImpliedABI; +- int ImpliedFPU = -1; +- if (A->getOption().matches(options::OPT_mdouble_float)) { +- ImpliedABI = IsLA32 ? "ilp32d" : "lp64d"; +- ImpliedFPU = 64; +- } +- if (A->getOption().matches(options::OPT_msingle_float)) { +- ImpliedABI = IsLA32 ? "ilp32f" : "lp64f"; +- ImpliedFPU = 32; +- } +- if (A->getOption().matches(options::OPT_msoft_float)) { +- ImpliedABI = IsLA32 ? "ilp32s" : "lp64s"; +- ImpliedFPU = 0; +- } ++ if (ABIName.empty() && (Triple.getEnvironment() == llvm::Triple::GNUABILPX32)) ++ ABIName = "lpx32"; + +- // Check `-mabi=` and `-mfpu=` settings and report if they conflict with +- // the higher-priority settings implied by -m*-float. +- // +- // ImpliedABI and ImpliedFPU are guaranteed to have valid values because +- // one of the match arms must match if execution can arrive here at all. +- if (!MABIValue.empty() && ImpliedABI != MABIValue) +- D.Diag(diag::warn_drv_loongarch_conflicting_implied_val) +- << MABIArg->getAsString(Args) << A->getAsString(Args) << ImpliedABI; ++ if (ABIName.empty()) { ++ ABIName = ++ llvm::StringSwitch(CPUName) ++ .Case("loongarch32", "lp32") ++ .Cases("loongarch64", "la264", "la364", "la464", "la664", "lp64") ++ .Default(""); ++ } + +- if (FPU != -1 && ImpliedFPU != FPU) +- D.Diag(diag::warn_drv_loongarch_conflicting_implied_val) +- << MFPUArg->getAsString(Args) << A->getAsString(Args) << ImpliedFPU; ++ if (ABIName.empty()) { ++ // Deduce ABI name from the target triple. ++ ABIName = Triple.isLoongArch32() ? "lp32" : "lp64"; ++ } + +- return ImpliedABI; ++ if (CPUName.empty()) { ++ // Deduce CPU name from ABI name. ++ CPUName = llvm::StringSwitch(ABIName) ++ .Case("lp32", DefLoongArch32CPU) ++ .Cases("lpx32", "lp64", DefLoongArch64CPU) ++ .Default(""); + } + +- // If `-mabi=` is specified, use it. +- if (!MABIValue.empty()) +- return MABIValue; +- +- // Select abi based on -mfpu=xx. +- switch (FPU) { +- case 64: +- return IsLA32 ? "ilp32d" : "lp64d"; +- case 32: +- return IsLA32 ? "ilp32f" : "lp64f"; +- case 0: +- return IsLA32 ? "ilp32s" : "lp64s"; ++ // FIXME: Warn on inconsistent use of -march and -mabi. ++} ++ ++std::string loongarch::getLoongArchABILibSuffix(const ArgList &Args, ++ const llvm::Triple &Triple) { ++ StringRef CPUName, ABIName; ++ tools::loongarch::getLoongArchCPUAndABI(Args, Triple, CPUName, ABIName); ++ return llvm::StringSwitch(ABIName) ++ .Case("lp32", "") ++ .Case("lpx32", "32") ++ .Case("lp64", "64"); ++} ++ ++// Convert ABI name to the GNU tools acceptable variant. ++StringRef loongarch::getGnuCompatibleLoongArchABIName(StringRef ABI) { ++ return llvm::StringSwitch(ABI) ++ .Case("lp32", "32") ++ .Case("lp64", "64") ++ .Default(ABI); ++} ++ ++// Select the LoongArch float ABI as determined by -msoft-float, -mhard-float, ++// and -mfloat-abi=. ++loongarch::FloatABI loongarch::getLoongArchFloatABI(const Driver &D, const ArgList &Args) { ++ loongarch::FloatABI ABI = loongarch::FloatABI::Invalid; ++ if (Arg *A = ++ Args.getLastArg(options::OPT_msoft_float, options::OPT_mhard_float, ++ options::OPT_mfloat_abi_EQ)) { ++ if (A->getOption().matches(options::OPT_msoft_float)) ++ ABI = loongarch::FloatABI::Soft; ++ else if (A->getOption().matches(options::OPT_mhard_float)) ++ ABI = loongarch::FloatABI::Hard; ++ else { ++ ABI = llvm::StringSwitch(A->getValue()) ++ .Case("soft", loongarch::FloatABI::Soft) ++ .Case("hard", loongarch::FloatABI::Hard) ++ .Default(loongarch::FloatABI::Invalid); ++ if (ABI == loongarch::FloatABI::Invalid && !StringRef(A->getValue()).empty()) { ++ D.Diag(clang::diag::err_drv_invalid_mfloat_abi) << A->getAsString(Args); ++ ABI = loongarch::FloatABI::Hard; ++ } ++ } + } + +- // Choose a default based on the triple. +- // Honor the explicit ABI modifier suffix in triple's environment part if +- // present, falling back to {ILP32,LP64}D otherwise. +- switch (Triple.getEnvironment()) { +- case llvm::Triple::GNUSF: +- return IsLA32 ? "ilp32s" : "lp64s"; +- case llvm::Triple::GNUF32: +- return IsLA32 ? "ilp32f" : "lp64f"; +- case llvm::Triple::GNUF64: +- // This was originally permitted (and indeed the canonical way) to +- // represent the {ILP32,LP64}D ABIs, but in Feb 2023 Loongson decided to +- // drop the explicit suffix in favor of unmarked `-gnu` for the +- // "general-purpose" ABIs, among other non-technical reasons. +- // +- // The spec change did not mention whether existing usages of "gnuf64" +- // shall remain valid or not, so we are going to continue recognizing it +- // for some time, until it is clear that everyone else has migrated away +- // from it. +- [[fallthrough]]; +- case llvm::Triple::GNU: +- default: +- return IsLA32 ? "ilp32d" : "lp64d"; ++ // If unspecified, choose the default based on the platform. ++ if (ABI == loongarch::FloatABI::Invalid) { ++ // Assume "hard", because it's a default value used by gcc. ++ // When we start to recognize specific target LoongArch processors, ++ // we will be able to select the default more correctly. ++ ABI = loongarch::FloatABI::Hard; + } ++ ++ assert(ABI != loongarch::FloatABI::Invalid && "must select an ABI"); ++ return ABI; + } + +-void loongarch::getLoongArchTargetFeatures(const Driver &D, +- const llvm::Triple &Triple, +- const ArgList &Args, +- std::vector &Features) { +- std::string ArchName; +- if (const Arg *A = Args.getLastArg(options::OPT_march_EQ)) +- ArchName = A->getValue(); +- ArchName = postProcessTargetCPUString(ArchName, Triple); +- llvm::LoongArch::getArchFeatures(ArchName, Features); +- +- // Select floating-point features determined by -mdouble-float, +- // -msingle-float, -msoft-float and -mfpu. +- // Note: -m*-float wins any other options. +- if (const Arg *A = Args.getLastArg(options::OPT_mdouble_float, +- options::OPT_msingle_float, +- options::OPT_msoft_float)) { +- if (A->getOption().matches(options::OPT_mdouble_float)) { +- Features.push_back("+f"); +- Features.push_back("+d"); +- } else if (A->getOption().matches(options::OPT_msingle_float)) { +- Features.push_back("+f"); +- Features.push_back("-d"); +- } else /*Soft-float*/ { +- Features.push_back("-f"); +- Features.push_back("-d"); +- } +- } else if (const Arg *A = Args.getLastArg(options::OPT_mfpu_EQ)) { +- StringRef FPU = A->getValue(); +- if (FPU == "64") { +- Features.push_back("+f"); +- Features.push_back("+d"); +- } else if (FPU == "32") { +- Features.push_back("+f"); +- Features.push_back("-d"); +- } else if (FPU == "0" || FPU == "none") { +- Features.push_back("-f"); +- Features.push_back("-d"); +- } else { +- D.Diag(diag::err_drv_loongarch_invalid_mfpu_EQ) << FPU; +- } ++void loongarch::getLoongArchTargetFeatures(const Driver &D, const llvm::Triple &Triple, ++ const ArgList &Args, ++ std::vector &Features) { ++ StringRef CPUName; ++ StringRef ABIName; ++ getLoongArchCPUAndABI(Args, Triple, CPUName, ABIName); ++ ABIName = getGnuCompatibleLoongArchABIName(ABIName); ++ ++ // At final link time, LP32 and LPX32 with CPIC will have another section ++ // added to the binary which contains the stub functions to perform ++ // any fixups required for PIC code. ++ ++ bool IsLP64 = ABIName == "64"; ++ bool NonPIC = false; ++ ++ Arg *LastPICArg = Args.getLastArg(options::OPT_fPIC, options::OPT_fno_PIC, ++ options::OPT_fpic, options::OPT_fno_pic, ++ options::OPT_fPIE, options::OPT_fno_PIE, ++ options::OPT_fpie, options::OPT_fno_pie); ++ if (LastPICArg) { ++ Option O = LastPICArg->getOption(); ++ NonPIC = ++ (O.matches(options::OPT_fno_PIC) || O.matches(options::OPT_fno_pic) || ++ O.matches(options::OPT_fno_PIE) || O.matches(options::OPT_fno_pie)); + } + +- // Select the `ual` feature determined by -m[no-]unaligned-access +- // or the alias -m[no-]strict-align. +- AddTargetFeature(Args, Features, options::OPT_munaligned_access, +- options::OPT_mno_unaligned_access, "ual"); +- +- // Accept but warn about these TargetSpecific options. +- if (Arg *A = Args.getLastArgNoClaim(options::OPT_mabi_EQ)) +- A->ignoreTargetSpecific(); +- if (Arg *A = Args.getLastArgNoClaim(options::OPT_mfpu_EQ)) +- A->ignoreTargetSpecific(); +- +- // Select lsx feature determined by -m[no-]lsx. +- if (const Arg *A = Args.getLastArg(options::OPT_mlsx, options::OPT_mno_lsx)) { +- // LSX depends on 64-bit FPU. +- // -m*-float and -mfpu=none/0/32 conflict with -mlsx. +- if (A->getOption().matches(options::OPT_mlsx)) { +- if (llvm::find(Features, "-d") != Features.end()) +- D.Diag(diag::err_drv_loongarch_wrong_fpu_width_for_lsx); +- else /*-mlsx*/ +- Features.push_back("+lsx"); +- } else /*-mno-lsx*/ { +- Features.push_back("-lsx"); +- } ++ if (IsLP64 && NonPIC) { ++ NonPIC = false; + } + +- // Select lasx feature determined by -m[no-]lasx. +- if (const Arg *A = +- Args.getLastArg(options::OPT_mlasx, options::OPT_mno_lasx)) { +- // LASX depends on 64-bit FPU and LSX. +- // -mno-lsx conflicts with -mlasx. +- if (A->getOption().matches(options::OPT_mlasx)) { +- if (llvm::find(Features, "-d") != Features.end()) +- D.Diag(diag::err_drv_loongarch_wrong_fpu_width_for_lasx); +- else if (llvm::find(Features, "-lsx") != Features.end()) +- D.Diag(diag::err_drv_loongarch_invalid_simd_option_combination); +- else { /*-mlasx*/ +- Features.push_back("+lsx"); +- Features.push_back("+lasx"); +- } +- } else /*-mno-lasx*/ +- Features.push_back("-lasx"); ++ loongarch::FloatABI FloatABI = loongarch::getLoongArchFloatABI(D, Args); ++ if (FloatABI == loongarch::FloatABI::Soft) { ++ // FIXME: Note, this is a hack. We need to pass the selected float ++ // mode to the LoongArchTargetInfoBase to define appropriate macros there. ++ // Now it is the only method. ++ Features.push_back("+soft-float"); + } +-} + +-std::string loongarch::postProcessTargetCPUString(const std::string &CPU, +- const llvm::Triple &Triple) { +- std::string CPUString = CPU; +- if (CPUString == "native") { +- CPUString = llvm::sys::getHostCPUName(); +- if (CPUString == "generic") +- CPUString = llvm::LoongArch::getDefaultArch(Triple.isLoongArch64()); ++ AddTargetFeature(Args, Features, options::OPT_msingle_float, ++ options::OPT_mdouble_float, "single-float"); ++ ++ AddTargetFeature(Args, Features, options::OPT_mlsx, options::OPT_mno_lsx, ++ "lsx"); ++ AddTargetFeature(Args, Features, options::OPT_mlasx, options::OPT_mno_lasx, ++ "lasx"); ++ ++ AddTargetFeature(Args, Features, options::OPT_munaligned_access, ++ options::OPT_mno_unaligned_access, "unaligned-access"); ++ ++ // Add the last -mfp32/-mfp64, if none are given and fp64 is default, ++ // pass fp64. ++ if (Arg *A = Args.getLastArg(options::OPT_mfp32, ++ options::OPT_mfp64)) { ++ if (A->getOption().matches(options::OPT_mfp32)) ++ Features.push_back("-fp64"); ++ else ++ Features.push_back("+fp64"); ++ } else if (loongarch::isFP64Default(Args)) { ++ Features.push_back("+fp64"); + } +- if (CPUString.empty()) +- CPUString = llvm::LoongArch::getDefaultArch(Triple.isLoongArch64()); +- return CPUString; ++ ++} ++ ++bool loongarch::hasLoongArchAbiArg(const ArgList &Args, const char *Value) { ++ Arg *A = Args.getLastArg(options::OPT_mabi_EQ); ++ return A && (A->getValue() == StringRef(Value)); ++} ++ ++bool loongarch::isUCLibc(const ArgList &Args) { ++ Arg *A = Args.getLastArg(options::OPT_m_libc_Group); ++ return A && A->getOption().matches(options::OPT_muclibc); + } + +-std::string loongarch::getLoongArchTargetCPU(const llvm::opt::ArgList &Args, +- const llvm::Triple &Triple) { +- std::string CPU; +- // If we have -march, use that. +- if (const Arg *A = Args.getLastArg(options::OPT_march_EQ)) +- CPU = A->getValue(); +- return postProcessTargetCPUString(CPU, Triple); ++bool loongarch::isFP64Default(const ArgList &Args) { ++ return Args.getLastArg(options::OPT_msingle_float) ? false : true; + } +diff --git a/clang/lib/Driver/ToolChains/Arch/LoongArch.h b/clang/lib/Driver/ToolChains/Arch/LoongArch.h +index d8280cd83..bf32eb117 100644 +--- a/clang/lib/Driver/ToolChains/Arch/LoongArch.h ++++ b/clang/lib/Driver/ToolChains/Arch/LoongArch.h +@@ -1,4 +1,4 @@ +-//===--- LoongArch.h - LoongArch-specific Tool Helpers ----------*- C++ -*-===// ++//===--- LoongArch.h - LoongArch-specific Tool Helpers ----------------------*- C++ -*-===// + // + // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + // See https://llvm.org/LICENSE.txt for license information. +@@ -12,25 +12,37 @@ + #include "clang/Driver/Driver.h" + #include "llvm/ADT/StringRef.h" + #include "llvm/Option/Option.h" ++#include "llvm/TargetParser/Triple.h" ++#include ++#include + + namespace clang { + namespace driver { + namespace tools { +-namespace loongarch { +-void getLoongArchTargetFeatures(const Driver &D, const llvm::Triple &Triple, +- const llvm::opt::ArgList &Args, +- std::vector &Features); + +-StringRef getLoongArchABI(const Driver &D, const llvm::opt::ArgList &Args, +- const llvm::Triple &Triple); ++namespace loongarch { ++enum class FloatABI { ++ Invalid, ++ Soft, ++ Hard, ++}; + +-std::string postProcessTargetCPUString(const std::string &CPU, +- const llvm::Triple &Triple); ++void getLoongArchCPUAndABI(const llvm::opt::ArgList &Args, ++ const llvm::Triple &Triple, StringRef &CPUName, ++ StringRef &ABIName); ++void getLoongArchTargetFeatures(const Driver &D, const llvm::Triple &Triple, ++ const llvm::opt::ArgList &Args, ++ std::vector &Features); ++StringRef getGnuCompatibleLoongArchABIName(StringRef ABI); ++loongarch::FloatABI getLoongArchFloatABI(const Driver &D, const llvm::opt::ArgList &Args); ++std::string getLoongArchABILibSuffix(const llvm::opt::ArgList &Args, ++ const llvm::Triple &Triple); ++bool hasLoongArchAbiArg(const llvm::opt::ArgList &Args, const char *Value); ++bool isUCLibc(const llvm::opt::ArgList &Args); ++bool isFP64Default(const llvm::opt::ArgList &Args); + +-std::string getLoongArchTargetCPU(const llvm::opt::ArgList &Args, +- const llvm::Triple &Triple); + } // end namespace loongarch +-} // end namespace tools ++} // end namespace target + } // end namespace driver + } // end namespace clang + +diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp +index aa344b346..68a06d578 100644 +--- a/clang/lib/Driver/ToolChains/Clang.cpp ++++ b/clang/lib/Driver/ToolChains/Clang.cpp +@@ -58,7 +58,6 @@ + #include "llvm/Support/YAMLParser.h" + #include "llvm/TargetParser/ARMTargetParserCommon.h" + #include "llvm/TargetParser/Host.h" +-#include "llvm/TargetParser/LoongArchTargetParser.h" + #include "llvm/TargetParser/RISCVTargetParser.h" + #include + +@@ -1782,20 +1781,41 @@ void Clang::AddAArch64TargetArgs(const ArgList &Args, + + void Clang::AddLoongArchTargetArgs(const ArgList &Args, + ArgStringList &CmdArgs) const { ++ const Driver &D = getToolChain().getDriver(); ++ StringRef CPUName; ++ StringRef ABIName; + const llvm::Triple &Triple = getToolChain().getTriple(); ++ loongarch::getLoongArchCPUAndABI(Args, Triple, CPUName, ABIName); + + CmdArgs.push_back("-target-abi"); +- CmdArgs.push_back( +- loongarch::getLoongArchABI(getToolChain().getDriver(), Args, Triple) +- .data()); ++ CmdArgs.push_back(ABIName.data()); + +- // Handle -mtune. +- if (const Arg *A = Args.getLastArg(options::OPT_mtune_EQ)) { +- std::string TuneCPU = A->getValue(); +- TuneCPU = loongarch::postProcessTargetCPUString(TuneCPU, Triple); +- CmdArgs.push_back("-tune-cpu"); +- CmdArgs.push_back(Args.MakeArgString(TuneCPU)); ++ loongarch::FloatABI ABI = loongarch::getLoongArchFloatABI(D, Args); ++ if (ABI == loongarch::FloatABI::Soft) { ++ // Floating point operations and argument passing are soft. ++ CmdArgs.push_back("-msoft-float"); ++ CmdArgs.push_back("-mfloat-abi"); ++ CmdArgs.push_back("soft"); ++ } else { ++ // Floating point operations and argument passing are hard. ++ assert(ABI == loongarch::FloatABI::Hard && "Invalid float abi!"); ++ CmdArgs.push_back("-mfloat-abi"); ++ CmdArgs.push_back("hard"); ++ } ++ ++ if (Arg *A = Args.getLastArg(options::OPT_mcheck_zero_division, ++ options::OPT_mno_check_zero_division)) { ++ if (A->getOption().matches(options::OPT_mno_check_zero_division)) { ++ CmdArgs.push_back("-mllvm"); ++ CmdArgs.push_back("-mnocheck-zero-division"); ++ } + } ++ ++ llvm::Reloc::Model RelocationModel; ++ unsigned PICLevel; ++ bool IsPIE; ++ std::tie(RelocationModel, PICLevel, IsPIE) = ++ ParsePICArgs(getToolChain(), Args); + } + + void Clang::AddMIPSTargetArgs(const ArgList &Args, +@@ -1944,6 +1964,15 @@ void Clang::AddMIPSTargetArgs(const ArgList &Args, + CmdArgs.push_back("-mips-jalr-reloc=0"); + } + } ++ ++ if (Arg *A = Args.getLastArg(options::OPT_mfix_loongson3_llsc, ++ options::OPT_mno_fix_loongson3_llsc)) { ++ CmdArgs.push_back("-mllvm"); ++ if (A->getOption().matches(options::OPT_mfix_loongson3_llsc)) ++ CmdArgs.push_back("-mips-fix-loongson3-llsc=1"); ++ else ++ CmdArgs.push_back("-mips-fix-loongson3-llsc=0"); ++ } + } + + void Clang::AddPPCTargetArgs(const ArgList &Args, +@@ -5680,15 +5709,10 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA, + // defaults to -fno-direct-access-external-data. Pass the option if different + // from the default. + if (Arg *A = Args.getLastArg(options::OPT_fdirect_access_external_data, +- options::OPT_fno_direct_access_external_data)) { ++ options::OPT_fno_direct_access_external_data)) + if (A->getOption().matches(options::OPT_fdirect_access_external_data) != + (PICLevel == 0)) + A->render(Args, CmdArgs); +- } else if (PICLevel == 0 && Triple.isLoongArch()) { +- // Some targets default to -fno-direct-access-external-data even for +- // -fno-pic. +- CmdArgs.push_back("-fno-direct-access-external-data"); +- } + + if (Args.hasFlag(options::OPT_fno_plt, options::OPT_fplt, false)) { + CmdArgs.push_back("-fno-plt"); +@@ -5757,17 +5781,7 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA, + D.Diag(diag::err_drv_argument_only_allowed_with) + << A->getAsString(Args) << "-fno-pic"; + } else if (Triple.isLoongArch()) { +- if (CM == "extreme" && +- Args.hasFlagNoClaim(options::OPT_fplt, options::OPT_fno_plt, false)) +- D.Diag(diag::err_drv_argument_not_allowed_with) +- << A->getAsString(Args) << "-fplt"; +- Ok = CM == "normal" || CM == "medium" || CM == "extreme"; +- // Convert to LLVM recognizable names. +- if (Ok) +- CM = llvm::StringSwitch(CM) +- .Case("normal", "small") +- .Case("extreme", "large") +- .Default(CM); ++ Ok = CM == "small" || CM == "large"; + } else if (Triple.isPPC64() || Triple.isOSAIX()) { + Ok = CM == "small" || CM == "medium" || CM == "large"; + } else if (Triple.isRISCV()) { +@@ -6514,8 +6528,7 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA, + if (Arg *A = Args.getLastArg(options::OPT_fpatchable_function_entry_EQ)) { + StringRef S0 = A->getValue(), S = S0; + unsigned Size, Offset = 0; +- if (!Triple.isAArch64() && !Triple.isLoongArch() && !Triple.isRISCV() && +- !Triple.isX86()) ++ if (!Triple.isAArch64() && !Triple.isRISCV() && !Triple.isX86()) + D.Diag(diag::err_drv_unsupported_opt_for_target) + << A->getAsString(Args) << TripleStr; + else if (S.consumeInteger(10, Size) || +@@ -8209,6 +8222,17 @@ const char *Clang::getDependencyFileName(const ArgList &Args, + + // Begin ClangAs + ++void ClangAs::AddLoongArchTargetArgs(const ArgList &Args, ++ ArgStringList &CmdArgs) const { ++ StringRef CPUName; ++ StringRef ABIName; ++ const llvm::Triple &Triple = getToolChain().getTriple(); ++ loongarch::getLoongArchCPUAndABI(Args, Triple, CPUName, ABIName); ++ ++ CmdArgs.push_back("-target-abi"); ++ CmdArgs.push_back(ABIName.data()); ++} ++ + void ClangAs::AddMIPSTargetArgs(const ArgList &Args, + ArgStringList &CmdArgs) const { + StringRef CPUName; +@@ -8237,14 +8261,6 @@ void ClangAs::AddX86TargetArgs(const ArgList &Args, + } + } + +-void ClangAs::AddLoongArchTargetArgs(const ArgList &Args, +- ArgStringList &CmdArgs) const { +- CmdArgs.push_back("-target-abi"); +- CmdArgs.push_back(loongarch::getLoongArchABI(getToolChain().getDriver(), Args, +- getToolChain().getTriple()) +- .data()); +-} +- + void ClangAs::AddRISCVTargetArgs(const ArgList &Args, + ArgStringList &CmdArgs) const { + const llvm::Triple &Triple = getToolChain().getTriple(); +@@ -8409,6 +8425,11 @@ void ClangAs::ConstructJob(Compilation &C, const JobAction &JA, + default: + break; + ++ case llvm::Triple::loongarch32: ++ case llvm::Triple::loongarch64: ++ AddLoongArchTargetArgs(Args, CmdArgs); ++ break; ++ + case llvm::Triple::mips: + case llvm::Triple::mipsel: + case llvm::Triple::mips64: +@@ -8443,11 +8464,6 @@ void ClangAs::ConstructJob(Compilation &C, const JobAction &JA, + } + break; + +- case llvm::Triple::loongarch32: +- case llvm::Triple::loongarch64: +- AddLoongArchTargetArgs(Args, CmdArgs); +- break; +- + case llvm::Triple::riscv32: + case llvm::Triple::riscv64: + AddRISCVTargetArgs(Args, CmdArgs); +diff --git a/clang/lib/Driver/ToolChains/CommonArgs.cpp b/clang/lib/Driver/ToolChains/CommonArgs.cpp +index 2b916f000..8013d9660 100644 +--- a/clang/lib/Driver/ToolChains/CommonArgs.cpp ++++ b/clang/lib/Driver/ToolChains/CommonArgs.cpp +@@ -100,6 +100,8 @@ static bool useFramePointerForTargetByDefault(const llvm::opt::ArgList &Args, + // XCore never wants frame pointers, regardless of OS. + // WebAssembly never wants frame pointers. + return false; ++ case llvm::Triple::loongarch64: ++ case llvm::Triple::loongarch32: + case llvm::Triple::ppc: + case llvm::Triple::ppcle: + case llvm::Triple::ppc64: +@@ -112,8 +114,6 @@ static bool useFramePointerForTargetByDefault(const llvm::opt::ArgList &Args, + case llvm::Triple::amdgcn: + case llvm::Triple::r600: + case llvm::Triple::csky: +- case llvm::Triple::loongarch32: +- case llvm::Triple::loongarch64: + return !clang::driver::tools::areOptimizationsEnabled(Args); + default: + break; +@@ -546,6 +546,14 @@ std::string tools::getCPUName(const Driver &D, const ArgList &Args, + return A->getValue(); + return ""; + ++ case llvm::Triple::loongarch32: ++ case llvm::Triple::loongarch64: { ++ StringRef CPUName; ++ StringRef ABIName; ++ loongarch::getLoongArchCPUAndABI(Args, T, CPUName, ABIName); ++ return std::string(CPUName); ++ } ++ + case llvm::Triple::m68k: + return m68k::getM68kTargetCPU(Args); + +@@ -614,10 +622,6 @@ std::string tools::getCPUName(const Driver &D, const ArgList &Args, + case llvm::Triple::wasm32: + case llvm::Triple::wasm64: + return std::string(getWebAssemblyTargetCPU(Args)); +- +- case llvm::Triple::loongarch32: +- case llvm::Triple::loongarch64: +- return loongarch::getLoongArchTargetCPU(Args, T); + } + } + +@@ -648,6 +652,10 @@ void tools::getTargetFeatures(const Driver &D, const llvm::Triple &Triple, + case llvm::Triple::thumbeb: + arm::getARMTargetFeatures(D, Triple, Args, Features, ForAS); + break; ++ case llvm::Triple::loongarch32: ++ case llvm::Triple::loongarch64: ++ loongarch::getLoongArchTargetFeatures(D, Triple, Args, Features); ++ break; + case llvm::Triple::ppc: + case llvm::Triple::ppcle: + case llvm::Triple::ppc64: +@@ -702,10 +710,6 @@ void tools::getTargetFeatures(const Driver &D, const llvm::Triple &Triple, + case llvm::Triple::csky: + csky::getCSKYTargetFeatures(D, Triple, Args, CmdArgs, Features); + break; +- case llvm::Triple::loongarch32: +- case llvm::Triple::loongarch64: +- loongarch::getLoongArchTargetFeatures(D, Triple, Args, Features); +- break; + } + + for (auto Feature : unifyTargetFeatures(Features)) { +@@ -1944,6 +1948,18 @@ tools::ParsePICArgs(const ToolChain &ToolChain, const ArgList &Args) { + if ((ROPI || RWPI) && (PIC || PIE)) + ToolChain.getDriver().Diag(diag::err_drv_ropi_rwpi_incompatible_with_pic); + ++ if (Triple.isLoongArch()) { ++ StringRef CPUName; ++ StringRef ABIName; ++ loongarch::getLoongArchCPUAndABI(Args, Triple, CPUName, ABIName); ++ // When targeting the LP64 ABI, PIC is the default. ++ if (ABIName == "lp64") ++ PIC = true; ++ // Unlike other architectures, LoongArch, even with -fPIC/-mxgot/multigot, ++ // does not use PIC level 2 for historical reasons. ++ IsPICLevelTwo = false; ++ } ++ + if (Triple.isMIPS()) { + StringRef CPUName; + StringRef ABIName; +diff --git a/clang/lib/Driver/ToolChains/Gnu.cpp b/clang/lib/Driver/ToolChains/Gnu.cpp +index e5e1b1d77..1788d65f2 100644 +--- a/clang/lib/Driver/ToolChains/Gnu.cpp ++++ b/clang/lib/Driver/ToolChains/Gnu.cpp +@@ -237,6 +237,10 @@ static const char *getLDMOption(const llvm::Triple &T, const ArgList &Args) { + case llvm::Triple::thumbeb: + return tools::arm::isARMBigEndian(T, Args) ? "armelfb_linux_eabi" + : "armelf_linux_eabi"; ++ case llvm::Triple::loongarch32: ++ return "elf32loongarch"; ++ case llvm::Triple::loongarch64: ++ return "elf64loongarch"; + case llvm::Triple::m68k: + return "m68kelf"; + case llvm::Triple::ppc: +@@ -260,10 +264,6 @@ static const char *getLDMOption(const llvm::Triple &T, const ArgList &Args) { + return "elf32_sparc"; + case llvm::Triple::sparcv9: + return "elf64_sparc"; +- case llvm::Triple::loongarch32: +- return "elf32loongarch"; +- case llvm::Triple::loongarch64: +- return "elf64loongarch"; + case llvm::Triple::mips: + return "elf32btsmip"; + case llvm::Triple::mipsel: +@@ -426,6 +426,11 @@ void tools::gnutools::Linker::ConstructJob(Compilation &C, const JobAction &JA, + if (Triple.isRISCV()) + CmdArgs.push_back("-X"); + ++ // Defaults to --discard-locals because assembler generates temporary symbols ++ // referenced by relocations, such as ".LBB9_4". ++ if (Triple.isLoongArch()) ++ CmdArgs.push_back("-X"); ++ + const bool IsShared = Args.hasArg(options::OPT_shared); + if (IsShared) + CmdArgs.push_back("-shared"); +@@ -839,11 +844,61 @@ void tools::gnutools::Assembler::ConstructJob(Compilation &C, + + break; + } +- // TODO: handle loongarch32. ++ case llvm::Triple::loongarch32: + case llvm::Triple::loongarch64: { +- StringRef ABIName = +- loongarch::getLoongArchABI(D, Args, getToolChain().getTriple()); +- CmdArgs.push_back(Args.MakeArgString("-mabi=" + ABIName)); ++ StringRef CPUName; ++ StringRef ABIName; ++ loongarch::getLoongArchCPUAndABI(Args, getToolChain().getTriple(), CPUName, ABIName); ++ ABIName = loongarch::getGnuCompatibleLoongArchABIName(ABIName); ++ ++ //FIXME: Currently gnu as doesn't support -march ++ //CmdArgs.push_back("-march=loongarch"); ++ //CmdArgs.push_back(CPUName.data()); ++ ++ //FIXME: modify loongarch::getGnuCompatibleLoongArchABIName() ++ CmdArgs.push_back("-mabi=lp64"); ++ //CmdArgs.push_back(ABIName.data()); ++ ++ // -mno-shared should be emitted unless -fpic, -fpie, -fPIC, -fPIE, ++ // or -mshared (not implemented) is in effect. ++ if (RelocationModel == llvm::Reloc::Static) ++ CmdArgs.push_back("-mno-shared"); ++ ++ // LLVM doesn't support -mplt yet and acts as if it is always given. ++ // However, -mplt has no effect with the LP64 ABI. ++ if (ABIName != "64") ++ CmdArgs.push_back("-call_nonpic"); ++ ++ break; ++ ++ // Add the last -mfp32/-mfp64. ++ if (Arg *A = Args.getLastArg(options::OPT_mfp32, ++ options::OPT_mfp64)) { ++ A->claim(); ++ A->render(Args, CmdArgs); ++ } ++ ++ if (Arg *A = Args.getLastArg(options::OPT_mlsx, options::OPT_mno_lsx)) { ++ // Do not use AddLastArg because not all versions of LoongArch assembler ++ // support -mlsx / -mno-lsx options. ++ if (A->getOption().matches(options::OPT_mlsx)) ++ CmdArgs.push_back(Args.MakeArgString("-mlsx")); ++ } ++ ++ if (Arg *A = Args.getLastArg(options::OPT_mlasx, options::OPT_mno_lasx)) { ++ // Do not use AddLastArg because not all versions of LoongArch assembler ++ // support -mlasx / -mno-lasx options. ++ if (A->getOption().matches(options::OPT_mlasx)) ++ CmdArgs.push_back(Args.MakeArgString("-mlasx")); ++ } ++ ++ Args.AddLastArg(CmdArgs, options::OPT_mhard_float, ++ options::OPT_msoft_float); ++ ++ Args.AddLastArg(CmdArgs, options::OPT_mdouble_float, ++ options::OPT_msingle_float); ++ ++ AddAssemblerKPIC(getToolChain(), Args, CmdArgs); + break; + } + case llvm::Triple::mips: +@@ -2493,10 +2548,6 @@ void Generic_GCC::GCCInstallationDetector::AddDefaultGCCPrefixes( + "i586-suse-linux", "i686-montavista-linux", + }; + +- static const char *const LoongArch64LibDirs[] = {"/lib64", "/lib"}; +- static const char *const LoongArch64Triples[] = { +- "loongarch64-linux-gnu", "loongarch64-unknown-linux-gnu"}; +- + static const char *const M68kLibDirs[] = {"/lib"}; + static const char *const M68kTriples[] = { + "m68k-linux-gnu", "m68k-unknown-linux-gnu", "m68k-suse-linux"}; +@@ -2572,6 +2623,10 @@ void Generic_GCC::GCCInstallationDetector::AddDefaultGCCPrefixes( + "s390x-linux-gnu", "s390x-unknown-linux-gnu", "s390x-ibm-linux-gnu", + "s390x-suse-linux", "s390x-redhat-linux"}; + ++ static const char *const LoongArch64LibDirs[] = {"/lib64", "/lib"}; ++ static const char *const LoongArch64Triples[] = { ++ "loongarch64-linux-gnu", "loongarch64-unknown-linux-gnu", ++ "loongarch64-loongson-linux-gnu", "loongarch64-redhat-linux"}; + + using std::begin; + using std::end; +@@ -2744,7 +2799,6 @@ void Generic_GCC::GCCInstallationDetector::AddDefaultGCCPrefixes( + BiarchTripleAliases.append(begin(X32Triples), end(X32Triples)); + } + break; +- // TODO: Handle loongarch32. + case llvm::Triple::loongarch64: + LibDirs.append(begin(LoongArch64LibDirs), end(LoongArch64LibDirs)); + TripleAliases.append(begin(LoongArch64Triples), end(LoongArch64Triples)); +@@ -3108,6 +3162,7 @@ bool Generic_GCC::isPICDefault() const { + switch (getArch()) { + case llvm::Triple::x86_64: + return getTriple().isOSWindows(); ++ case llvm::Triple::loongarch64: + case llvm::Triple::mips64: + case llvm::Triple::mips64el: + return true; +diff --git a/clang/lib/Driver/ToolChains/Linux.cpp b/clang/lib/Driver/ToolChains/Linux.cpp +index 4300a2bdf..26d864dd0 100644 +--- a/clang/lib/Driver/ToolChains/Linux.cpp ++++ b/clang/lib/Driver/ToolChains/Linux.cpp +@@ -90,38 +90,10 @@ std::string Linux::getMultiarchTriple(const Driver &D, + case llvm::Triple::aarch64_be: + return "aarch64_be-linux-gnu"; + +- case llvm::Triple::loongarch64: { +- const char *Libc; +- const char *FPFlavor; +- +- if (TargetTriple.isGNUEnvironment()) { +- Libc = "gnu"; +- } else if (TargetTriple.isMusl()) { +- Libc = "musl"; +- } else { +- return TargetTriple.str(); +- } +- +- switch (TargetEnvironment) { +- default: +- return TargetTriple.str(); +- case llvm::Triple::GNUSF: +- FPFlavor = "sf"; +- break; +- case llvm::Triple::GNUF32: +- FPFlavor = "f32"; +- break; +- case llvm::Triple::GNU: +- case llvm::Triple::GNUF64: +- // This was going to be "f64" in an earlier Toolchain Conventions +- // revision, but starting from Feb 2023 the F64 ABI variants are +- // unmarked in their canonical forms. +- FPFlavor = ""; +- break; +- } +- +- return (Twine("loongarch64-linux-") + Libc + FPFlavor).str(); +- } ++ case llvm::Triple::loongarch32: ++ return "loongarch32-linux-gnu"; ++ case llvm::Triple::loongarch64: ++ return "loongarch64-linux-gnu"; + + case llvm::Triple::m68k: + return "m68k-linux-gnu"; +@@ -347,7 +319,7 @@ ToolChain::RuntimeLibType Linux::GetDefaultRuntimeLibType() const { + } + + unsigned Linux::GetDefaultDwarfVersion() const { +- if (getTriple().isAndroid()) ++ if (getTriple().isAndroid() || getTriple().isLoongArch()) + return 4; + return ToolChain::GetDefaultDwarfVersion(); + } +@@ -503,22 +475,11 @@ std::string Linux::getDynamicLinker(const ArgList &Args) const { + Loader = HF ? "ld-linux-armhf.so.3" : "ld-linux.so.3"; + break; + } +- case llvm::Triple::loongarch32: { +- LibDir = "lib32"; +- Loader = +- ("ld-linux-loongarch-" + +- tools::loongarch::getLoongArchABI(getDriver(), Args, Triple) + ".so.1") +- .str(); +- break; +- } +- case llvm::Triple::loongarch64: { +- LibDir = "lib64"; +- Loader = +- ("ld-linux-loongarch-" + +- tools::loongarch::getLoongArchABI(getDriver(), Args, Triple) + ".so.1") +- .str(); ++ case llvm::Triple::loongarch32: ++ case llvm::Triple::loongarch64: ++ LibDir = "lib" + tools::loongarch::getLoongArchABILibSuffix(Args, Triple); ++ Loader = "ld.so.1"; + break; +- } + case llvm::Triple::m68k: + LibDir = "lib"; + Loader = "ld.so.1"; +diff --git a/clang/lib/Driver/XRayArgs.cpp b/clang/lib/Driver/XRayArgs.cpp +index 8c5134e25..b38b0e350 100644 +--- a/clang/lib/Driver/XRayArgs.cpp ++++ b/clang/lib/Driver/XRayArgs.cpp +@@ -48,6 +48,7 @@ XRayArgs::XRayArgs(const ToolChain &TC, const ArgList &Args) { + case llvm::Triple::aarch64: + case llvm::Triple::hexagon: + case llvm::Triple::ppc64le: ++ case llvm::Triple::loongarch32: + case llvm::Triple::loongarch64: + case llvm::Triple::mips: + case llvm::Triple::mipsel: +diff --git a/clang/lib/Headers/CMakeLists.txt b/clang/lib/Headers/CMakeLists.txt +index b9a966be7..29533fa12 100644 +--- a/clang/lib/Headers/CMakeLists.txt ++++ b/clang/lib/Headers/CMakeLists.txt +@@ -92,8 +92,8 @@ set(hlsl_files + ) + + set(loongarch_files +- larchintrin.h + lasxintrin.h ++ larchintrin.h + lsxintrin.h + ) + +diff --git a/clang/lib/Headers/larchintrin.h b/clang/lib/Headers/larchintrin.h +index f42182959..591ba1e44 100644 +--- a/clang/lib/Headers/larchintrin.h ++++ b/clang/lib/Headers/larchintrin.h +@@ -1,246 +1,358 @@ +-/*===------------ larchintrin.h - LoongArch intrinsics ---------------------=== +- * +- * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +- * See https://llvm.org/LICENSE.txt for license information. +- * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +- * +- *===-----------------------------------------------------------------------=== +- */ +- +-#ifndef _LOONGARCH_BASE_INTRIN_H +-#define _LOONGARCH_BASE_INTRIN_H ++//===----------- larchintrin.h - LoongArch BASE intrinsics ------------------===// ++// ++// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. ++// See https://llvm.org/LICENSE.txt for license information. ++// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception ++// ++//===----------------------------------------------------------------------===// ++// ++// This file contains the LoongArch Base intrinsics ++// ++//===----------------------------------------------------------------------===// ++#ifndef __LOONGARCH_BASE_H ++#define __LOONGARCH_BASE_H + + #ifdef __cplusplus + extern "C" { + #endif + +-typedef struct rdtime { +- unsigned int value; +- unsigned int timeid; ++typedef struct drdtime{ ++ unsigned long dvalue; ++ unsigned long dtimeid; ++} __drdtime_t; ++ ++typedef struct rdtime{ ++ unsigned int value; ++ unsigned int timeid; + } __rdtime_t; + +-#if __loongarch_grlen == 64 +-typedef struct drdtime { +- unsigned long dvalue; +- unsigned long dtimeid; +-} __drdtime_t; ++/* Assembly instruction format: rd, csr_num */ ++/* Data types in instruction templates: unsigned int, uimm14_32 */ ++#define __csrrd_w(/*uimm14_32*/ _1) \ ++ ((unsigned int)__builtin_loongarch_csrrd_w(_1)) + +-extern __inline __drdtime_t +- __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +- __rdtime_d(void) { +- __drdtime_t __drdtime; +- __asm__ volatile( +- "rdtime.d %[val], %[tid]\n\t" +- : [val] "=&r"(__drdtime.dvalue), [tid] "=&r"(__drdtime.dtimeid)); +- return __drdtime; +-} +-#endif ++/* Assembly instruction format: rd, csr_num */ ++/* Data types in instruction templates: unsigned int, uimm14_32 */ ++#define __csrwr_w(/*unsigned int*/ _1, /*uimm14_32*/ _2) \ ++ ((unsigned int)__builtin_loongarch_csrwr_w((unsigned int)(_1), (_2))) + +-extern __inline __rdtime_t +- __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +- __rdtimeh_w(void) { +- __rdtime_t __rdtime; +- __asm__ volatile("rdtimeh.w %[val], %[tid]\n\t" +- : [val] "=&r"(__rdtime.value), [tid] "=&r"(__rdtime.timeid)); +- return __rdtime; +-} ++/* Assembly instruction format: rd, rj, csr_num */ ++/* Data types in instruction templates: unsigned int, unsigned int, uimm14_32 */ ++#define __csrxchg_w(/*unsigned int*/ _1, /*unsigned int*/ _2, \ ++ /*uimm14_32*/ _3) \ ++ ((unsigned int)__builtin_loongarch_csrxchg_w((unsigned int)(_1), \ ++ (unsigned int)(_2), (_3))) + +-extern __inline __rdtime_t +- __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +- __rdtimel_w(void) { +- __rdtime_t __rdtime; +- __asm__ volatile("rdtimel.w %[val], %[tid]\n\t" +- : [val] "=&r"(__rdtime.value), [tid] "=&r"(__rdtime.timeid)); +- return __rdtime; +-} ++/* Assembly instruction format: rd, csr_num */ ++/* Data types in instruction templates: unsigned long int, uimm14 */ ++#define __csrrd_d(/*uimm14*/ _1) \ ++ ((unsigned long int)__builtin_loongarch_csrrd_d(_1)) + +-#if __loongarch_grlen == 64 +-extern __inline int +- __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +- __crc_w_b_w(char _1, int _2) { +- return (int)__builtin_loongarch_crc_w_b_w((char)_1, (int)_2); ++/* Assembly instruction format: rd, csr_num */ ++/* Data types in instruction templates: unsigned long int, uimm14 */ ++#define __csrwr_d(/*unsigned long int*/ _1, /*uimm14*/ _2) \ ++ ((unsigned long int)__builtin_loongarch_csrwr_d((unsigned long int)(_1), \ ++ (_2))) ++ ++/* Assembly instruction format: rd, rj, csr_num */ ++/* Data types in instruction templates: unsigned long int, unsigned long int, uimm14 */ ++#define __csrxchg_d(/*unsigned long int*/ _1, /*unsigned long int*/ _2, \ ++ /*uimm14*/ _3) \ ++ ((unsigned long int)__builtin_loongarch_csrxchg_d( \ ++ (unsigned long int)(_1), (unsigned long int)(_2), (_3))) ++ ++/* Assembly instruction format: rd, rj */ ++/* Data types in instruction templates: unsigned char, unsigned int */ ++extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) ++unsigned char __iocsrrd_b(unsigned int _1) ++{ ++ return (unsigned char)__builtin_loongarch_iocsrrd_b((unsigned int)_1); + } + +-extern __inline int +- __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +- __crc_w_h_w(short _1, int _2) { +- return (int)__builtin_loongarch_crc_w_h_w((short)_1, (int)_2); ++/* Assembly instruction format: rd, rj */ ++/* Data types in instruction templates: unsigned short, unsigned int */ ++extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) ++unsigned short __iocsrrd_h(unsigned int _1) ++{ ++ return (unsigned short)__builtin_loongarch_iocsrrd_h((unsigned int)_1); + } + +-extern __inline int +- __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +- __crc_w_w_w(int _1, int _2) { +- return (int)__builtin_loongarch_crc_w_w_w((int)_1, (int)_2); ++/* Assembly instruction format: rd, rj */ ++/* Data types in instruction templates: unsigned int, unsigned int */ ++extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) ++unsigned int __iocsrrd_w(unsigned int _1) ++{ ++ return (unsigned int)__builtin_loongarch_iocsrrd_w((unsigned int)_1); + } + +-extern __inline int +- __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +- __crc_w_d_w(long int _1, int _2) { +- return (int)__builtin_loongarch_crc_w_d_w((long int)_1, (int)_2); ++/* Assembly instruction format: rd, rj */ ++/* Data types in instruction templates: unsigned long int, unsigned int */ ++extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) ++unsigned long int __iocsrrd_d(unsigned int _1) ++{ ++ return (unsigned long int)__builtin_loongarch_iocsrrd_d((unsigned int)_1); + } + +-extern __inline int +- __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +- __crcc_w_b_w(char _1, int _2) { +- return (int)__builtin_loongarch_crcc_w_b_w((char)_1, (int)_2); ++/* Assembly instruction format: rd, rj */ ++/* Data types in instruction templates: unsigned char, unsigned int */ ++extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) ++void __iocsrwr_b(unsigned char _1, unsigned int _2) ++{ ++ return (void)__builtin_loongarch_iocsrwr_b((unsigned char)_1, (unsigned int)_2); + } + +-extern __inline int +- __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +- __crcc_w_h_w(short _1, int _2) { +- return (int)__builtin_loongarch_crcc_w_h_w((short)_1, (int)_2); ++/* Assembly instruction format: rd, rj */ ++/* Data types in instruction templates: unsigned short, unsigned int */ ++extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) ++void __iocsrwr_h(unsigned short _1, unsigned int _2) ++{ ++ return (void)__builtin_loongarch_iocsrwr_h((unsigned short)_1, (unsigned int)_2); + } + +-extern __inline int +- __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +- __crcc_w_w_w(int _1, int _2) { +- return (int)__builtin_loongarch_crcc_w_w_w((int)_1, (int)_2); ++/* Assembly instruction format: rd, rj */ ++/* Data types in instruction templates: unsigned int, unsigned int */ ++extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) ++void __iocsrwr_w(unsigned int _1, unsigned int _2) ++{ ++ return (void)__builtin_loongarch_iocsrwr_w((unsigned int)_1, (unsigned int)_2); + } + +-extern __inline int ++extern __inline unsigned int + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +- __crcc_w_d_w(long int _1, int _2) { +- return (int)__builtin_loongarch_crcc_w_d_w((long int)_1, (int)_2); ++ __cpucfg(unsigned int _1) { ++ return (unsigned int)__builtin_loongarch_cpucfg((unsigned int)_1); + } +-#endif + +-#define __break(/*ui15*/ _1) __builtin_loongarch_break((_1)) ++/* Assembly instruction format: rd, rj */ ++/* Data types in instruction templates: unsigned long int, unsigned int */ ++extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) ++void __iocsrwr_d(unsigned long int _1, unsigned int _2) ++{ ++ return (void)__builtin_loongarch_iocsrwr_d((unsigned long int)_1, (unsigned int)_2); ++} + +-#if __loongarch_grlen == 32 ++/* Assembly instruction format: op, rj, si12 */ ++/* Data types in instruction templates: uimm5, unsigned int, simm12 */ + #define __cacop_w(/*uimm5*/ _1, /*unsigned int*/ _2, /*simm12*/ _3) \ + ((void)__builtin_loongarch_cacop_w((_1), (unsigned int)(_2), (_3))) +-#endif + +-#if __loongarch_grlen == 64 ++/* Assembly instruction format: op, rj, si12 */ ++/* Data types in instruction templates: uimm5, unsigned long int, simm12 */ + #define __cacop_d(/*uimm5*/ _1, /*unsigned long int*/ _2, /*simm12*/ _3) \ + ((void)__builtin_loongarch_cacop_d((_1), (unsigned long int)(_2), (_3))) +-#endif +- +-#define __dbar(/*ui15*/ _1) __builtin_loongarch_dbar((_1)) + +-#define __ibar(/*ui15*/ _1) __builtin_loongarch_ibar((_1)) +- +-#define __movfcsr2gr(/*ui5*/ _1) __builtin_loongarch_movfcsr2gr((_1)); +- +-#define __movgr2fcsr(/*ui5*/ _1, _2) \ +- __builtin_loongarch_movgr2fcsr((_1), (unsigned int)_2); +- +-#define __syscall(/*ui15*/ _1) __builtin_loongarch_syscall((_1)) ++#define __rdtime_d __builtin_loongarch_rdtime_d ++#define __rdtimel_w __builtin_loongarch_rdtimel_w ++#define __rdtimeh_w __builtin_loongarch_rdtimeh_w ++ ++extern __inline __drdtime_t __attribute__((__gnu_inline__, __always_inline__, __artificial__)) ++__builtin_loongarch_rdtime_d (void) ++{ ++ __drdtime_t drdtime; ++ __asm__ volatile ( ++ "rdtime.d\t%[val],%[tid]\n\t" ++ : [val]"=&r"(drdtime.dvalue),[tid]"=&r"(drdtime.dtimeid) ++ : ++ ); ++ return drdtime; ++} + +-#define __csrrd_w(/*ui14*/ _1) ((unsigned int)__builtin_loongarch_csrrd_w((_1))) ++extern __inline __rdtime_t __attribute__((__gnu_inline__, __always_inline__, __artificial__)) ++__builtin_loongarch_rdtimeh_w (void) ++{ ++ __rdtime_t rdtime; ++ __asm__ volatile ( ++ "rdtimeh.w\t%[val],%[tid]\n\t" ++ : [val]"=&r"(rdtime.value),[tid]"=&r"(rdtime.timeid) ++ : ++ ); ++ return rdtime; ++} + +-#define __csrwr_w(/*unsigned int*/ _1, /*ui14*/ _2) \ +- ((unsigned int)__builtin_loongarch_csrwr_w((unsigned int)(_1), (_2))) ++extern __inline __rdtime_t __attribute__((__gnu_inline__, __always_inline__, __artificial__)) ++__builtin_loongarch_rdtimel_w (void) ++{ ++ __rdtime_t rdtime; ++ __asm__ volatile ( ++ "rdtimel.w\t%[val],%[tid]\n\t" ++ : [val]"=&r"(rdtime.value),[tid]"=&r"(rdtime.timeid) ++ : ++ ); ++ return rdtime; ++} + +-#define __csrxchg_w(/*unsigned int*/ _1, /*unsigned int*/ _2, /*ui14*/ _3) \ +- ((unsigned int)__builtin_loongarch_csrxchg_w((unsigned int)(_1), \ +- (unsigned int)(_2), (_3))) ++/* Assembly instruction format: rd, rj, rk */ ++/* Data types in instruction templates: int, char, int */ ++extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) ++int __crc_w_b_w(char _1, int _2) ++{ ++ return (int)__builtin_loongarch_crc_w_b_w((char)_1, (int)_2); ++} + +-#if __loongarch_grlen == 64 +-#define __csrrd_d(/*ui14*/ _1) \ +- ((unsigned long int)__builtin_loongarch_csrrd_d((_1))) ++/* Assembly instruction format: rd, rj, rk */ ++/* Data types in instruction templates: int, short, int */ ++extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) ++int __crc_w_h_w(short _1, int _2) ++{ ++ return (int)__builtin_loongarch_crc_w_h_w((short)_1, (int)_2); ++} + +-#define __csrwr_d(/*unsigned long int*/ _1, /*ui14*/ _2) \ +- ((unsigned long int)__builtin_loongarch_csrwr_d((unsigned long int)(_1), \ +- (_2))) ++/* Assembly instruction format: rd, rj, rk */ ++/* Data types in instruction templates: int, int, int */ ++extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) ++int __crc_w_w_w(int _1, int _2) ++{ ++ return (int)__builtin_loongarch_crc_w_w_w((int)_1, (int)_2); ++} + +-#define __csrxchg_d(/*unsigned long int*/ _1, /*unsigned long int*/ _2, \ +- /*ui14*/ _3) \ +- ((unsigned long int)__builtin_loongarch_csrxchg_d( \ +- (unsigned long int)(_1), (unsigned long int)(_2), (_3))) +-#endif ++/* Assembly instruction format: rd, rj, rk */ ++/* Data types in instruction templates: int, long int, int */ ++extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) ++int __crc_w_d_w(long int _1, int _2) ++{ ++ return (int)__builtin_loongarch_crc_w_d_w((long int)_1, (int)_2); ++} + +-extern __inline unsigned char +- __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +- __iocsrrd_b(unsigned int _1) { +- return (unsigned char)__builtin_loongarch_iocsrrd_b((unsigned int)_1); ++/* Assembly instruction format: rd, rj, rk */ ++/* Data types in instruction templates: int, char, int */ ++extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) ++int __crcc_w_b_w(char _1, int _2) ++{ ++ return (int)__builtin_loongarch_crcc_w_b_w((char)_1, (int)_2); + } + +-extern __inline unsigned short +- __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +- __iocsrrd_h(unsigned int _1) { +- return (unsigned short)__builtin_loongarch_iocsrrd_h((unsigned int)_1); ++/* Assembly instruction format: rd, rj, rk */ ++/* Data types in instruction templates: int, short, int */ ++extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) ++int __crcc_w_h_w(short _1, int _2) ++{ ++ return (int)__builtin_loongarch_crcc_w_h_w((short)_1, (int)_2); + } + +-extern __inline unsigned int +- __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +- __iocsrrd_w(unsigned int _1) { +- return (unsigned int)__builtin_loongarch_iocsrrd_w((unsigned int)_1); ++/* Assembly instruction format: rd, rj, rk */ ++/* Data types in instruction templates: int, int, int */ ++extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) ++int __crcc_w_w_w(int _1, int _2) ++{ ++ return (int)__builtin_loongarch_crcc_w_w_w((int)_1, (int)_2); + } + +-#if __loongarch_grlen == 64 +-extern __inline unsigned long int +- __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +- __iocsrrd_d(unsigned int _1) { +- return (unsigned long int)__builtin_loongarch_iocsrrd_d((unsigned int)_1); ++/* Assembly instruction format: rd, rj, rk */ ++/* Data types in instruction templates: int, long int, int */ ++extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) ++int __crcc_w_d_w(long int _1, int _2) ++{ ++ return (int)__builtin_loongarch_crcc_w_d_w((long int)_1, (int)_2); + } +-#endif + +-extern __inline void +- __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +- __iocsrwr_b(unsigned char _1, unsigned int _2) { +- __builtin_loongarch_iocsrwr_b((unsigned char)_1, (unsigned int)_2); ++extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) ++void __tlbclr() ++{ ++ return (void)__builtin_loongarch_tlbclr(); + } + +-extern __inline void +- __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +- __iocsrwr_h(unsigned short _1, unsigned int _2) { +- __builtin_loongarch_iocsrwr_h((unsigned short)_1, (unsigned int)_2); ++extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) ++void __tlbflush() ++{ ++ return (void)__builtin_loongarch_tlbflush(); + } + +-extern __inline void +- __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +- __iocsrwr_w(unsigned int _1, unsigned int _2) { +- __builtin_loongarch_iocsrwr_w((unsigned int)_1, (unsigned int)_2); ++extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) ++void __tlbfill() ++{ ++ return (void)__builtin_loongarch_tlbfill(); + } + +-extern __inline unsigned int +- __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +- __cpucfg(unsigned int _1) { +- return (unsigned int)__builtin_loongarch_cpucfg((unsigned int)_1); ++extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) ++void __tlbrd() ++{ ++ return (void)__builtin_loongarch_tlbrd(); + } + +-#if __loongarch_grlen == 64 +-extern __inline void +- __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +- __iocsrwr_d(unsigned long int _1, unsigned int _2) { +- __builtin_loongarch_iocsrwr_d((unsigned long int)_1, (unsigned int)_2); ++extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) ++void __tlbwr() ++{ ++ return (void)__builtin_loongarch_tlbwr(); + } + +-extern __inline void +- __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +- __asrtgt_d(long int _1, long int _2) { +- __builtin_loongarch_asrtgt_d((long int)_1, (long int)_2); ++extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) ++void __tlbsrch() ++{ ++ return (void)__builtin_loongarch_tlbsrch(); + } + +-extern __inline void +- __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +- __asrtle_d(long int _1, long int _2) { +- __builtin_loongarch_asrtle_d((long int)_1, (long int)_2); ++/* Assembly instruction format: code */ ++/* Data types in instruction templates: uimm15 */ ++#define __syscall(/*uimm15*/ _1) ((void)__builtin_loongarch_syscall(_1)) ++ ++/* Assembly instruction format: code */ ++/* Data types in instruction templates: uimm15 */ ++#define __break(/*uimm15*/ _1) ((void)__builtin_loongarch_break(_1)) ++ ++/* Assembly instruction format: hint */ ++/* Data types in instruction templates: uimm15 */ ++#define __dbar(/*uimm15*/ _1) ((void)__builtin_loongarch_dbar(_1)) ++ ++/* Assembly instruction format: hint */ ++/* Data types in instruction templates: uimm15 */ ++#define __ibar(/*uimm15*/ _1) ((void)__builtin_loongarch_ibar(_1)) ++ ++/* Assembly instruction format: rj, rk */ ++/* Data types in instruction templates: long int, long int */ ++extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) ++void __asrtle_d(long int _1, long int _2) ++{ ++ return (void)__builtin_loongarch_asrtle_d((long int)_1, (long int)_2); + } +-#endif + +-#if __loongarch_grlen == 64 +-#define __lddir_d(/*long int*/ _1, /*ui5*/ _2) \ +- ((long int)__builtin_loongarch_lddir_d((long int)(_1), (_2))) ++/* Assembly instruction format: rj, rk */ ++/* Data types in instruction templates: long int, long int */ ++extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) ++void __asrtgt_d(long int _1, long int _2) ++{ ++ return (void)__builtin_loongarch_asrtgt_d((long int)_1, (long int)_2); ++} + +-#define __ldpte_d(/*long int*/ _1, /*ui5*/ _2) \ +- ((void)__builtin_loongarch_ldpte_d((long int)(_1), (_2))) +-#endif ++#define __movfcsr2gr(uimm5) \ ++({ \ ++ unsigned int rd; \ ++ __asm__ volatile ( \ ++ "movfcsr2gr %0, $fcsr" #uimm5 \ ++ : "=&r"(rd) \ ++ : \ ++ ); rd; \ ++}) ++ ++#define __movgr2fcsr(uimm5, rj) \ ++{ \ ++ __asm__ volatile ( \ ++ "movgr2fcsr $fcsr" #uimm5 ", %0" \ ++ : \ ++ : "r" (rj) \ ++ ); \ ++} + ++/* Assembly instruction format: fd, fj */ ++/* Data types in instruction templates: float, float */ + #define __frecipe_s(/*float*/ _1) \ + (float)__builtin_loongarch_frecipe_s((float)_1) + ++/* Assembly instruction format: fd, fj */ ++/* Data types in instruction templates: double, double */ + #define __frecipe_d(/*double*/ _1) \ + (double)__builtin_loongarch_frecipe_d((double)_1) + ++/* Assembly instruction format: fd, fj */ ++/* Data types in instruction templates: float, float */ + #define __frsqrte_s(/*float*/ _1) \ + (float)__builtin_loongarch_frsqrte_s((float)_1) + ++/* Assembly instruction format: fd, fj */ ++/* Data types in instruction templates: double, double */ + #define __frsqrte_d(/*double*/ _1) \ + (double)__builtin_loongarch_frsqrte_d((double)_1) + + #ifdef __cplusplus + } + #endif +-#endif /* _LOONGARCH_BASE_INTRIN_H */ ++#endif /* __LOONGARCH_BASE_H */ +diff --git a/clang/lib/Headers/lasxintrin.h b/clang/lib/Headers/lasxintrin.h +index dafc2a2f3..6a0c44a60 100644 +--- a/clang/lib/Headers/lasxintrin.h ++++ b/clang/lib/Headers/lasxintrin.h +@@ -1,14 +1,19 @@ +-/*===------------ lasxintrin.h - LoongArch LASX intrinsics -----------------=== +- * +- * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +- * See https://llvm.org/LICENSE.txt for license information. +- * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +- * +- *===-----------------------------------------------------------------------=== +- */ +- +-#ifndef _LOONGSON_ASXINTRIN_H +-#define _LOONGSON_ASXINTRIN_H 1 ++//===----------- lasxintrin.h - LoongArch LASX intrinsics ++//------------------===// ++// ++// The LLVM Compiler Infrastructure ++// ++// This file is distributed under the University of Illinois Open Source ++// License. See LICENSE.TXT for details. ++// ++//===----------------------------------------------------------------------===// ++// ++// This file contains the LoongArch LASX intrinsics. ++// ++//===----------------------------------------------------------------------===// ++ ++#ifndef _GCC_LOONGSON_ASXINTRIN_H ++#define _GCC_LOONGSON_ASXINTRIN_H 1 + + #if defined(__loongarch_asx) + +@@ -40,3114 +45,4286 @@ typedef float __m256 __attribute__((__vector_size__(32), __may_alias__)); + typedef long long __m256i __attribute__((__vector_size__(32), __may_alias__)); + typedef double __m256d __attribute__((__vector_size__(32), __may_alias__)); + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V32QI, V32QI, V32QI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsll_b(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsll_b((v32i8)_1, (v32i8)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V16HI, V16HI, V16HI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsll_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsll_h((v16i16)_1, (v16i16)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V8SI, V8SI, V8SI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsll_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsll_w((v8i32)_1, (v8i32)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DI, V4DI, V4DI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsll_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsll_d((v4i64)_1, (v4i64)_2); + } + ++/* Assembly instruction format: xd, xj, ui3. */ ++/* Data types in instruction templates: V32QI, V32QI, UQI. */ + #define __lasx_xvslli_b(/*__m256i*/ _1, /*ui3*/ _2) \ + ((__m256i)__builtin_lasx_xvslli_b((v32i8)(_1), (_2))) + ++/* Assembly instruction format: xd, xj, ui4. */ ++/* Data types in instruction templates: V16HI, V16HI, UQI. */ + #define __lasx_xvslli_h(/*__m256i*/ _1, /*ui4*/ _2) \ + ((__m256i)__builtin_lasx_xvslli_h((v16i16)(_1), (_2))) + ++/* Assembly instruction format: xd, xj, ui5. */ ++/* Data types in instruction templates: V8SI, V8SI, UQI. */ + #define __lasx_xvslli_w(/*__m256i*/ _1, /*ui5*/ _2) \ + ((__m256i)__builtin_lasx_xvslli_w((v8i32)(_1), (_2))) + ++/* Assembly instruction format: xd, xj, ui6. */ ++/* Data types in instruction templates: V4DI, V4DI, UQI. */ + #define __lasx_xvslli_d(/*__m256i*/ _1, /*ui6*/ _2) \ + ((__m256i)__builtin_lasx_xvslli_d((v4i64)(_1), (_2))) + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V32QI, V32QI, V32QI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsra_b(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsra_b((v32i8)_1, (v32i8)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V16HI, V16HI, V16HI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsra_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsra_h((v16i16)_1, (v16i16)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V8SI, V8SI, V8SI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsra_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsra_w((v8i32)_1, (v8i32)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DI, V4DI, V4DI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsra_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsra_d((v4i64)_1, (v4i64)_2); + } + ++/* Assembly instruction format: xd, xj, ui3. */ ++/* Data types in instruction templates: V32QI, V32QI, UQI. */ + #define __lasx_xvsrai_b(/*__m256i*/ _1, /*ui3*/ _2) \ + ((__m256i)__builtin_lasx_xvsrai_b((v32i8)(_1), (_2))) + ++/* Assembly instruction format: xd, xj, ui4. */ ++/* Data types in instruction templates: V16HI, V16HI, UQI. */ + #define __lasx_xvsrai_h(/*__m256i*/ _1, /*ui4*/ _2) \ + ((__m256i)__builtin_lasx_xvsrai_h((v16i16)(_1), (_2))) + ++/* Assembly instruction format: xd, xj, ui5. */ ++/* Data types in instruction templates: V8SI, V8SI, UQI. */ + #define __lasx_xvsrai_w(/*__m256i*/ _1, /*ui5*/ _2) \ + ((__m256i)__builtin_lasx_xvsrai_w((v8i32)(_1), (_2))) + ++/* Assembly instruction format: xd, xj, ui6. */ ++/* Data types in instruction templates: V4DI, V4DI, UQI. */ + #define __lasx_xvsrai_d(/*__m256i*/ _1, /*ui6*/ _2) \ + ((__m256i)__builtin_lasx_xvsrai_d((v4i64)(_1), (_2))) + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V32QI, V32QI, V32QI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsrar_b(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsrar_b((v32i8)_1, (v32i8)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V16HI, V16HI, V16HI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsrar_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsrar_h((v16i16)_1, (v16i16)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V8SI, V8SI, V8SI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsrar_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsrar_w((v8i32)_1, (v8i32)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DI, V4DI, V4DI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsrar_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsrar_d((v4i64)_1, (v4i64)_2); + } + ++/* Assembly instruction format: xd, xj, ui3. */ ++/* Data types in instruction templates: V32QI, V32QI, UQI. */ + #define __lasx_xvsrari_b(/*__m256i*/ _1, /*ui3*/ _2) \ + ((__m256i)__builtin_lasx_xvsrari_b((v32i8)(_1), (_2))) + ++/* Assembly instruction format: xd, xj, ui4. */ ++/* Data types in instruction templates: V16HI, V16HI, UQI. */ + #define __lasx_xvsrari_h(/*__m256i*/ _1, /*ui4*/ _2) \ + ((__m256i)__builtin_lasx_xvsrari_h((v16i16)(_1), (_2))) + ++/* Assembly instruction format: xd, xj, ui5. */ ++/* Data types in instruction templates: V8SI, V8SI, UQI. */ + #define __lasx_xvsrari_w(/*__m256i*/ _1, /*ui5*/ _2) \ + ((__m256i)__builtin_lasx_xvsrari_w((v8i32)(_1), (_2))) + ++/* Assembly instruction format: xd, xj, ui6. */ ++/* Data types in instruction templates: V4DI, V4DI, UQI. */ + #define __lasx_xvsrari_d(/*__m256i*/ _1, /*ui6*/ _2) \ + ((__m256i)__builtin_lasx_xvsrari_d((v4i64)(_1), (_2))) + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V32QI, V32QI, V32QI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsrl_b(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsrl_b((v32i8)_1, (v32i8)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V16HI, V16HI, V16HI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsrl_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsrl_h((v16i16)_1, (v16i16)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V8SI, V8SI, V8SI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsrl_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsrl_w((v8i32)_1, (v8i32)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DI, V4DI, V4DI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsrl_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsrl_d((v4i64)_1, (v4i64)_2); + } + ++/* Assembly instruction format: xd, xj, ui3. */ ++/* Data types in instruction templates: V32QI, V32QI, UQI. */ + #define __lasx_xvsrli_b(/*__m256i*/ _1, /*ui3*/ _2) \ + ((__m256i)__builtin_lasx_xvsrli_b((v32i8)(_1), (_2))) + ++/* Assembly instruction format: xd, xj, ui4. */ ++/* Data types in instruction templates: V16HI, V16HI, UQI. */ + #define __lasx_xvsrli_h(/*__m256i*/ _1, /*ui4*/ _2) \ + ((__m256i)__builtin_lasx_xvsrli_h((v16i16)(_1), (_2))) + ++/* Assembly instruction format: xd, xj, ui5. */ ++/* Data types in instruction templates: V8SI, V8SI, UQI. */ + #define __lasx_xvsrli_w(/*__m256i*/ _1, /*ui5*/ _2) \ + ((__m256i)__builtin_lasx_xvsrli_w((v8i32)(_1), (_2))) + ++/* Assembly instruction format: xd, xj, ui6. */ ++/* Data types in instruction templates: V4DI, V4DI, UQI. */ + #define __lasx_xvsrli_d(/*__m256i*/ _1, /*ui6*/ _2) \ + ((__m256i)__builtin_lasx_xvsrli_d((v4i64)(_1), (_2))) + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V32QI, V32QI, V32QI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsrlr_b(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsrlr_b((v32i8)_1, (v32i8)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V16HI, V16HI, V16HI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsrlr_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsrlr_h((v16i16)_1, (v16i16)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V8SI, V8SI, V8SI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsrlr_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsrlr_w((v8i32)_1, (v8i32)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DI, V4DI, V4DI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsrlr_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsrlr_d((v4i64)_1, (v4i64)_2); + } + ++/* Assembly instruction format: xd, xj, ui3. */ ++/* Data types in instruction templates: V32QI, V32QI, UQI. */ + #define __lasx_xvsrlri_b(/*__m256i*/ _1, /*ui3*/ _2) \ + ((__m256i)__builtin_lasx_xvsrlri_b((v32i8)(_1), (_2))) + ++/* Assembly instruction format: xd, xj, ui4. */ ++/* Data types in instruction templates: V16HI, V16HI, UQI. */ + #define __lasx_xvsrlri_h(/*__m256i*/ _1, /*ui4*/ _2) \ + ((__m256i)__builtin_lasx_xvsrlri_h((v16i16)(_1), (_2))) + ++/* Assembly instruction format: xd, xj, ui5. */ ++/* Data types in instruction templates: V8SI, V8SI, UQI. */ + #define __lasx_xvsrlri_w(/*__m256i*/ _1, /*ui5*/ _2) \ + ((__m256i)__builtin_lasx_xvsrlri_w((v8i32)(_1), (_2))) + ++/* Assembly instruction format: xd, xj, ui6. */ ++/* Data types in instruction templates: V4DI, V4DI, UQI. */ + #define __lasx_xvsrlri_d(/*__m256i*/ _1, /*ui6*/ _2) \ + ((__m256i)__builtin_lasx_xvsrlri_d((v4i64)(_1), (_2))) + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: UV32QI, UV32QI, UV32QI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvbitclr_b(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvbitclr_b((v32u8)_1, (v32u8)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: UV16HI, UV16HI, UV16HI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvbitclr_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvbitclr_h((v16u16)_1, (v16u16)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: UV8SI, UV8SI, UV8SI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvbitclr_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvbitclr_w((v8u32)_1, (v8u32)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: UV4DI, UV4DI, UV4DI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvbitclr_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvbitclr_d((v4u64)_1, (v4u64)_2); + } + ++/* Assembly instruction format: xd, xj, ui3. */ ++/* Data types in instruction templates: UV32QI, UV32QI, UQI. */ + #define __lasx_xvbitclri_b(/*__m256i*/ _1, /*ui3*/ _2) \ + ((__m256i)__builtin_lasx_xvbitclri_b((v32u8)(_1), (_2))) + ++/* Assembly instruction format: xd, xj, ui4. */ ++/* Data types in instruction templates: UV16HI, UV16HI, UQI. */ + #define __lasx_xvbitclri_h(/*__m256i*/ _1, /*ui4*/ _2) \ + ((__m256i)__builtin_lasx_xvbitclri_h((v16u16)(_1), (_2))) + ++/* Assembly instruction format: xd, xj, ui5. */ ++/* Data types in instruction templates: UV8SI, UV8SI, UQI. */ + #define __lasx_xvbitclri_w(/*__m256i*/ _1, /*ui5*/ _2) \ + ((__m256i)__builtin_lasx_xvbitclri_w((v8u32)(_1), (_2))) + ++/* Assembly instruction format: xd, xj, ui6. */ ++/* Data types in instruction templates: UV4DI, UV4DI, UQI. */ + #define __lasx_xvbitclri_d(/*__m256i*/ _1, /*ui6*/ _2) \ + ((__m256i)__builtin_lasx_xvbitclri_d((v4u64)(_1), (_2))) + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: UV32QI, UV32QI, UV32QI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvbitset_b(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvbitset_b((v32u8)_1, (v32u8)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: UV16HI, UV16HI, UV16HI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvbitset_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvbitset_h((v16u16)_1, (v16u16)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: UV8SI, UV8SI, UV8SI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvbitset_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvbitset_w((v8u32)_1, (v8u32)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: UV4DI, UV4DI, UV4DI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvbitset_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvbitset_d((v4u64)_1, (v4u64)_2); + } + ++/* Assembly instruction format: xd, xj, ui3. */ ++/* Data types in instruction templates: UV32QI, UV32QI, UQI. */ + #define __lasx_xvbitseti_b(/*__m256i*/ _1, /*ui3*/ _2) \ + ((__m256i)__builtin_lasx_xvbitseti_b((v32u8)(_1), (_2))) + ++/* Assembly instruction format: xd, xj, ui4. */ ++/* Data types in instruction templates: UV16HI, UV16HI, UQI. */ + #define __lasx_xvbitseti_h(/*__m256i*/ _1, /*ui4*/ _2) \ + ((__m256i)__builtin_lasx_xvbitseti_h((v16u16)(_1), (_2))) + ++/* Assembly instruction format: xd, xj, ui5. */ ++/* Data types in instruction templates: UV8SI, UV8SI, UQI. */ + #define __lasx_xvbitseti_w(/*__m256i*/ _1, /*ui5*/ _2) \ + ((__m256i)__builtin_lasx_xvbitseti_w((v8u32)(_1), (_2))) + ++/* Assembly instruction format: xd, xj, ui6. */ ++/* Data types in instruction templates: UV4DI, UV4DI, UQI. */ + #define __lasx_xvbitseti_d(/*__m256i*/ _1, /*ui6*/ _2) \ + ((__m256i)__builtin_lasx_xvbitseti_d((v4u64)(_1), (_2))) + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: UV32QI, UV32QI, UV32QI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvbitrev_b(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvbitrev_b((v32u8)_1, (v32u8)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: UV16HI, UV16HI, UV16HI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvbitrev_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvbitrev_h((v16u16)_1, (v16u16)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: UV8SI, UV8SI, UV8SI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvbitrev_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvbitrev_w((v8u32)_1, (v8u32)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: UV4DI, UV4DI, UV4DI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvbitrev_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvbitrev_d((v4u64)_1, (v4u64)_2); + } + ++/* Assembly instruction format: xd, xj, ui3. */ ++/* Data types in instruction templates: UV32QI, UV32QI, UQI. */ + #define __lasx_xvbitrevi_b(/*__m256i*/ _1, /*ui3*/ _2) \ + ((__m256i)__builtin_lasx_xvbitrevi_b((v32u8)(_1), (_2))) + ++/* Assembly instruction format: xd, xj, ui4. */ ++/* Data types in instruction templates: UV16HI, UV16HI, UQI. */ + #define __lasx_xvbitrevi_h(/*__m256i*/ _1, /*ui4*/ _2) \ + ((__m256i)__builtin_lasx_xvbitrevi_h((v16u16)(_1), (_2))) + ++/* Assembly instruction format: xd, xj, ui5. */ ++/* Data types in instruction templates: UV8SI, UV8SI, UQI. */ + #define __lasx_xvbitrevi_w(/*__m256i*/ _1, /*ui5*/ _2) \ + ((__m256i)__builtin_lasx_xvbitrevi_w((v8u32)(_1), (_2))) + ++/* Assembly instruction format: xd, xj, ui6. */ ++/* Data types in instruction templates: UV4DI, UV4DI, UQI. */ + #define __lasx_xvbitrevi_d(/*__m256i*/ _1, /*ui6*/ _2) \ + ((__m256i)__builtin_lasx_xvbitrevi_d((v4u64)(_1), (_2))) + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V32QI, V32QI, V32QI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvadd_b(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvadd_b((v32i8)_1, (v32i8)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V16HI, V16HI, V16HI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvadd_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvadd_h((v16i16)_1, (v16i16)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V8SI, V8SI, V8SI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvadd_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvadd_w((v8i32)_1, (v8i32)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DI, V4DI, V4DI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvadd_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvadd_d((v4i64)_1, (v4i64)_2); + } + ++/* Assembly instruction format: xd, xj, ui5. */ ++/* Data types in instruction templates: V32QI, V32QI, UQI. */ + #define __lasx_xvaddi_bu(/*__m256i*/ _1, /*ui5*/ _2) \ + ((__m256i)__builtin_lasx_xvaddi_bu((v32i8)(_1), (_2))) + ++/* Assembly instruction format: xd, xj, ui5. */ ++/* Data types in instruction templates: V16HI, V16HI, UQI. */ + #define __lasx_xvaddi_hu(/*__m256i*/ _1, /*ui5*/ _2) \ + ((__m256i)__builtin_lasx_xvaddi_hu((v16i16)(_1), (_2))) + ++/* Assembly instruction format: xd, xj, ui5. */ ++/* Data types in instruction templates: V8SI, V8SI, UQI. */ + #define __lasx_xvaddi_wu(/*__m256i*/ _1, /*ui5*/ _2) \ + ((__m256i)__builtin_lasx_xvaddi_wu((v8i32)(_1), (_2))) + ++/* Assembly instruction format: xd, xj, ui5. */ ++/* Data types in instruction templates: V4DI, V4DI, UQI. */ + #define __lasx_xvaddi_du(/*__m256i*/ _1, /*ui5*/ _2) \ + ((__m256i)__builtin_lasx_xvaddi_du((v4i64)(_1), (_2))) + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V32QI, V32QI, V32QI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsub_b(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsub_b((v32i8)_1, (v32i8)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V16HI, V16HI, V16HI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsub_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsub_h((v16i16)_1, (v16i16)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V8SI, V8SI, V8SI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsub_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsub_w((v8i32)_1, (v8i32)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DI, V4DI, V4DI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsub_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsub_d((v4i64)_1, (v4i64)_2); + } + ++/* Assembly instruction format: xd, xj, ui5. */ ++/* Data types in instruction templates: V32QI, V32QI, UQI. */ + #define __lasx_xvsubi_bu(/*__m256i*/ _1, /*ui5*/ _2) \ + ((__m256i)__builtin_lasx_xvsubi_bu((v32i8)(_1), (_2))) + ++/* Assembly instruction format: xd, xj, ui5. */ ++/* Data types in instruction templates: V16HI, V16HI, UQI. */ + #define __lasx_xvsubi_hu(/*__m256i*/ _1, /*ui5*/ _2) \ + ((__m256i)__builtin_lasx_xvsubi_hu((v16i16)(_1), (_2))) + ++/* Assembly instruction format: xd, xj, ui5. */ ++/* Data types in instruction templates: V8SI, V8SI, UQI. */ + #define __lasx_xvsubi_wu(/*__m256i*/ _1, /*ui5*/ _2) \ + ((__m256i)__builtin_lasx_xvsubi_wu((v8i32)(_1), (_2))) + ++/* Assembly instruction format: xd, xj, ui5. */ ++/* Data types in instruction templates: V4DI, V4DI, UQI. */ + #define __lasx_xvsubi_du(/*__m256i*/ _1, /*ui5*/ _2) \ + ((__m256i)__builtin_lasx_xvsubi_du((v4i64)(_1), (_2))) + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V32QI, V32QI, V32QI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmax_b(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmax_b((v32i8)_1, (v32i8)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V16HI, V16HI, V16HI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmax_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmax_h((v16i16)_1, (v16i16)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V8SI, V8SI, V8SI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmax_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmax_w((v8i32)_1, (v8i32)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DI, V4DI, V4DI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmax_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmax_d((v4i64)_1, (v4i64)_2); + } + ++/* Assembly instruction format: xd, xj, si5. */ ++/* Data types in instruction templates: V32QI, V32QI, QI. */ + #define __lasx_xvmaxi_b(/*__m256i*/ _1, /*si5*/ _2) \ + ((__m256i)__builtin_lasx_xvmaxi_b((v32i8)(_1), (_2))) + ++/* Assembly instruction format: xd, xj, si5. */ ++/* Data types in instruction templates: V16HI, V16HI, QI. */ + #define __lasx_xvmaxi_h(/*__m256i*/ _1, /*si5*/ _2) \ + ((__m256i)__builtin_lasx_xvmaxi_h((v16i16)(_1), (_2))) + ++/* Assembly instruction format: xd, xj, si5. */ ++/* Data types in instruction templates: V8SI, V8SI, QI. */ + #define __lasx_xvmaxi_w(/*__m256i*/ _1, /*si5*/ _2) \ + ((__m256i)__builtin_lasx_xvmaxi_w((v8i32)(_1), (_2))) + ++/* Assembly instruction format: xd, xj, si5. */ ++/* Data types in instruction templates: V4DI, V4DI, QI. */ + #define __lasx_xvmaxi_d(/*__m256i*/ _1, /*si5*/ _2) \ + ((__m256i)__builtin_lasx_xvmaxi_d((v4i64)(_1), (_2))) + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: UV32QI, UV32QI, UV32QI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmax_bu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmax_bu((v32u8)_1, (v32u8)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: UV16HI, UV16HI, UV16HI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmax_hu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmax_hu((v16u16)_1, (v16u16)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: UV8SI, UV8SI, UV8SI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmax_wu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmax_wu((v8u32)_1, (v8u32)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: UV4DI, UV4DI, UV4DI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmax_du(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmax_du((v4u64)_1, (v4u64)_2); + } + ++/* Assembly instruction format: xd, xj, ui5. */ ++/* Data types in instruction templates: UV32QI, UV32QI, UQI. */ + #define __lasx_xvmaxi_bu(/*__m256i*/ _1, /*ui5*/ _2) \ + ((__m256i)__builtin_lasx_xvmaxi_bu((v32u8)(_1), (_2))) + ++/* Assembly instruction format: xd, xj, ui5. */ ++/* Data types in instruction templates: UV16HI, UV16HI, UQI. */ + #define __lasx_xvmaxi_hu(/*__m256i*/ _1, /*ui5*/ _2) \ + ((__m256i)__builtin_lasx_xvmaxi_hu((v16u16)(_1), (_2))) + ++/* Assembly instruction format: xd, xj, ui5. */ ++/* Data types in instruction templates: UV8SI, UV8SI, UQI. */ + #define __lasx_xvmaxi_wu(/*__m256i*/ _1, /*ui5*/ _2) \ + ((__m256i)__builtin_lasx_xvmaxi_wu((v8u32)(_1), (_2))) + ++/* Assembly instruction format: xd, xj, ui5. */ ++/* Data types in instruction templates: UV4DI, UV4DI, UQI. */ + #define __lasx_xvmaxi_du(/*__m256i*/ _1, /*ui5*/ _2) \ + ((__m256i)__builtin_lasx_xvmaxi_du((v4u64)(_1), (_2))) + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V32QI, V32QI, V32QI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmin_b(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmin_b((v32i8)_1, (v32i8)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V16HI, V16HI, V16HI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmin_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmin_h((v16i16)_1, (v16i16)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V8SI, V8SI, V8SI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmin_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmin_w((v8i32)_1, (v8i32)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DI, V4DI, V4DI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmin_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmin_d((v4i64)_1, (v4i64)_2); + } + ++/* Assembly instruction format: xd, xj, si5. */ ++/* Data types in instruction templates: V32QI, V32QI, QI. */ + #define __lasx_xvmini_b(/*__m256i*/ _1, /*si5*/ _2) \ + ((__m256i)__builtin_lasx_xvmini_b((v32i8)(_1), (_2))) + ++/* Assembly instruction format: xd, xj, si5. */ ++/* Data types in instruction templates: V16HI, V16HI, QI. */ + #define __lasx_xvmini_h(/*__m256i*/ _1, /*si5*/ _2) \ + ((__m256i)__builtin_lasx_xvmini_h((v16i16)(_1), (_2))) + ++/* Assembly instruction format: xd, xj, si5. */ ++/* Data types in instruction templates: V8SI, V8SI, QI. */ + #define __lasx_xvmini_w(/*__m256i*/ _1, /*si5*/ _2) \ + ((__m256i)__builtin_lasx_xvmini_w((v8i32)(_1), (_2))) + ++/* Assembly instruction format: xd, xj, si5. */ ++/* Data types in instruction templates: V4DI, V4DI, QI. */ + #define __lasx_xvmini_d(/*__m256i*/ _1, /*si5*/ _2) \ + ((__m256i)__builtin_lasx_xvmini_d((v4i64)(_1), (_2))) + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: UV32QI, UV32QI, UV32QI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmin_bu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmin_bu((v32u8)_1, (v32u8)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: UV16HI, UV16HI, UV16HI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmin_hu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmin_hu((v16u16)_1, (v16u16)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: UV8SI, UV8SI, UV8SI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmin_wu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmin_wu((v8u32)_1, (v8u32)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: UV4DI, UV4DI, UV4DI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmin_du(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmin_du((v4u64)_1, (v4u64)_2); + } + ++/* Assembly instruction format: xd, xj, ui5. */ ++/* Data types in instruction templates: UV32QI, UV32QI, UQI. */ + #define __lasx_xvmini_bu(/*__m256i*/ _1, /*ui5*/ _2) \ + ((__m256i)__builtin_lasx_xvmini_bu((v32u8)(_1), (_2))) + ++/* Assembly instruction format: xd, xj, ui5. */ ++/* Data types in instruction templates: UV16HI, UV16HI, UQI. */ + #define __lasx_xvmini_hu(/*__m256i*/ _1, /*ui5*/ _2) \ + ((__m256i)__builtin_lasx_xvmini_hu((v16u16)(_1), (_2))) + ++/* Assembly instruction format: xd, xj, ui5. */ ++/* Data types in instruction templates: UV8SI, UV8SI, UQI. */ + #define __lasx_xvmini_wu(/*__m256i*/ _1, /*ui5*/ _2) \ + ((__m256i)__builtin_lasx_xvmini_wu((v8u32)(_1), (_2))) + ++/* Assembly instruction format: xd, xj, ui5. */ ++/* Data types in instruction templates: UV4DI, UV4DI, UQI. */ + #define __lasx_xvmini_du(/*__m256i*/ _1, /*ui5*/ _2) \ + ((__m256i)__builtin_lasx_xvmini_du((v4u64)(_1), (_2))) + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V32QI, V32QI, V32QI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvseq_b(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvseq_b((v32i8)_1, (v32i8)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V16HI, V16HI, V16HI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvseq_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvseq_h((v16i16)_1, (v16i16)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V8SI, V8SI, V8SI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvseq_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvseq_w((v8i32)_1, (v8i32)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DI, V4DI, V4DI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvseq_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvseq_d((v4i64)_1, (v4i64)_2); + } + ++/* Assembly instruction format: xd, xj, si5. */ ++/* Data types in instruction templates: V32QI, V32QI, QI. */ + #define __lasx_xvseqi_b(/*__m256i*/ _1, /*si5*/ _2) \ + ((__m256i)__builtin_lasx_xvseqi_b((v32i8)(_1), (_2))) + ++/* Assembly instruction format: xd, xj, si5. */ ++/* Data types in instruction templates: V16HI, V16HI, QI. */ + #define __lasx_xvseqi_h(/*__m256i*/ _1, /*si5*/ _2) \ + ((__m256i)__builtin_lasx_xvseqi_h((v16i16)(_1), (_2))) + ++/* Assembly instruction format: xd, xj, si5. */ ++/* Data types in instruction templates: V8SI, V8SI, QI. */ + #define __lasx_xvseqi_w(/*__m256i*/ _1, /*si5*/ _2) \ + ((__m256i)__builtin_lasx_xvseqi_w((v8i32)(_1), (_2))) + ++/* Assembly instruction format: xd, xj, si5. */ ++/* Data types in instruction templates: V4DI, V4DI, QI. */ + #define __lasx_xvseqi_d(/*__m256i*/ _1, /*si5*/ _2) \ + ((__m256i)__builtin_lasx_xvseqi_d((v4i64)(_1), (_2))) + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V32QI, V32QI, V32QI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvslt_b(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvslt_b((v32i8)_1, (v32i8)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V16HI, V16HI, V16HI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvslt_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvslt_h((v16i16)_1, (v16i16)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V8SI, V8SI, V8SI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvslt_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvslt_w((v8i32)_1, (v8i32)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DI, V4DI, V4DI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvslt_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvslt_d((v4i64)_1, (v4i64)_2); + } + ++/* Assembly instruction format: xd, xj, si5. */ ++/* Data types in instruction templates: V32QI, V32QI, QI. */ + #define __lasx_xvslti_b(/*__m256i*/ _1, /*si5*/ _2) \ + ((__m256i)__builtin_lasx_xvslti_b((v32i8)(_1), (_2))) + ++/* Assembly instruction format: xd, xj, si5. */ ++/* Data types in instruction templates: V16HI, V16HI, QI. */ + #define __lasx_xvslti_h(/*__m256i*/ _1, /*si5*/ _2) \ + ((__m256i)__builtin_lasx_xvslti_h((v16i16)(_1), (_2))) + ++/* Assembly instruction format: xd, xj, si5. */ ++/* Data types in instruction templates: V8SI, V8SI, QI. */ + #define __lasx_xvslti_w(/*__m256i*/ _1, /*si5*/ _2) \ + ((__m256i)__builtin_lasx_xvslti_w((v8i32)(_1), (_2))) + ++/* Assembly instruction format: xd, xj, si5. */ ++/* Data types in instruction templates: V4DI, V4DI, QI. */ + #define __lasx_xvslti_d(/*__m256i*/ _1, /*si5*/ _2) \ + ((__m256i)__builtin_lasx_xvslti_d((v4i64)(_1), (_2))) + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V32QI, UV32QI, UV32QI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvslt_bu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvslt_bu((v32u8)_1, (v32u8)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V16HI, UV16HI, UV16HI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvslt_hu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvslt_hu((v16u16)_1, (v16u16)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V8SI, UV8SI, UV8SI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvslt_wu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvslt_wu((v8u32)_1, (v8u32)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DI, UV4DI, UV4DI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvslt_du(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvslt_du((v4u64)_1, (v4u64)_2); + } + ++/* Assembly instruction format: xd, xj, ui5. */ ++/* Data types in instruction templates: V32QI, UV32QI, UQI. */ + #define __lasx_xvslti_bu(/*__m256i*/ _1, /*ui5*/ _2) \ + ((__m256i)__builtin_lasx_xvslti_bu((v32u8)(_1), (_2))) + ++/* Assembly instruction format: xd, xj, ui5. */ ++/* Data types in instruction templates: V16HI, UV16HI, UQI. */ + #define __lasx_xvslti_hu(/*__m256i*/ _1, /*ui5*/ _2) \ + ((__m256i)__builtin_lasx_xvslti_hu((v16u16)(_1), (_2))) + ++/* Assembly instruction format: xd, xj, ui5. */ ++/* Data types in instruction templates: V8SI, UV8SI, UQI. */ + #define __lasx_xvslti_wu(/*__m256i*/ _1, /*ui5*/ _2) \ + ((__m256i)__builtin_lasx_xvslti_wu((v8u32)(_1), (_2))) + ++/* Assembly instruction format: xd, xj, ui5. */ ++/* Data types in instruction templates: V4DI, UV4DI, UQI. */ + #define __lasx_xvslti_du(/*__m256i*/ _1, /*ui5*/ _2) \ + ((__m256i)__builtin_lasx_xvslti_du((v4u64)(_1), (_2))) + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V32QI, V32QI, V32QI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsle_b(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsle_b((v32i8)_1, (v32i8)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V16HI, V16HI, V16HI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsle_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsle_h((v16i16)_1, (v16i16)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V8SI, V8SI, V8SI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsle_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsle_w((v8i32)_1, (v8i32)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DI, V4DI, V4DI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsle_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsle_d((v4i64)_1, (v4i64)_2); + } + ++/* Assembly instruction format: xd, xj, si5. */ ++/* Data types in instruction templates: V32QI, V32QI, QI. */ + #define __lasx_xvslei_b(/*__m256i*/ _1, /*si5*/ _2) \ + ((__m256i)__builtin_lasx_xvslei_b((v32i8)(_1), (_2))) + ++/* Assembly instruction format: xd, xj, si5. */ ++/* Data types in instruction templates: V16HI, V16HI, QI. */ + #define __lasx_xvslei_h(/*__m256i*/ _1, /*si5*/ _2) \ + ((__m256i)__builtin_lasx_xvslei_h((v16i16)(_1), (_2))) + ++/* Assembly instruction format: xd, xj, si5. */ ++/* Data types in instruction templates: V8SI, V8SI, QI. */ + #define __lasx_xvslei_w(/*__m256i*/ _1, /*si5*/ _2) \ + ((__m256i)__builtin_lasx_xvslei_w((v8i32)(_1), (_2))) + ++/* Assembly instruction format: xd, xj, si5. */ ++/* Data types in instruction templates: V4DI, V4DI, QI. */ + #define __lasx_xvslei_d(/*__m256i*/ _1, /*si5*/ _2) \ + ((__m256i)__builtin_lasx_xvslei_d((v4i64)(_1), (_2))) + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V32QI, UV32QI, UV32QI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsle_bu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsle_bu((v32u8)_1, (v32u8)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V16HI, UV16HI, UV16HI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsle_hu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsle_hu((v16u16)_1, (v16u16)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V8SI, UV8SI, UV8SI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsle_wu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsle_wu((v8u32)_1, (v8u32)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DI, UV4DI, UV4DI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsle_du(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsle_du((v4u64)_1, (v4u64)_2); + } + ++/* Assembly instruction format: xd, xj, ui5. */ ++/* Data types in instruction templates: V32QI, UV32QI, UQI. */ + #define __lasx_xvslei_bu(/*__m256i*/ _1, /*ui5*/ _2) \ + ((__m256i)__builtin_lasx_xvslei_bu((v32u8)(_1), (_2))) + ++/* Assembly instruction format: xd, xj, ui5. */ ++/* Data types in instruction templates: V16HI, UV16HI, UQI. */ + #define __lasx_xvslei_hu(/*__m256i*/ _1, /*ui5*/ _2) \ + ((__m256i)__builtin_lasx_xvslei_hu((v16u16)(_1), (_2))) + ++/* Assembly instruction format: xd, xj, ui5. */ ++/* Data types in instruction templates: V8SI, UV8SI, UQI. */ + #define __lasx_xvslei_wu(/*__m256i*/ _1, /*ui5*/ _2) \ + ((__m256i)__builtin_lasx_xvslei_wu((v8u32)(_1), (_2))) + ++/* Assembly instruction format: xd, xj, ui5. */ ++/* Data types in instruction templates: V4DI, UV4DI, UQI. */ + #define __lasx_xvslei_du(/*__m256i*/ _1, /*ui5*/ _2) \ + ((__m256i)__builtin_lasx_xvslei_du((v4u64)(_1), (_2))) + ++/* Assembly instruction format: xd, xj, ui3. */ ++/* Data types in instruction templates: V32QI, V32QI, UQI. */ + #define __lasx_xvsat_b(/*__m256i*/ _1, /*ui3*/ _2) \ + ((__m256i)__builtin_lasx_xvsat_b((v32i8)(_1), (_2))) + ++/* Assembly instruction format: xd, xj, ui4. */ ++/* Data types in instruction templates: V16HI, V16HI, UQI. */ + #define __lasx_xvsat_h(/*__m256i*/ _1, /*ui4*/ _2) \ + ((__m256i)__builtin_lasx_xvsat_h((v16i16)(_1), (_2))) + ++/* Assembly instruction format: xd, xj, ui5. */ ++/* Data types in instruction templates: V8SI, V8SI, UQI. */ + #define __lasx_xvsat_w(/*__m256i*/ _1, /*ui5*/ _2) \ + ((__m256i)__builtin_lasx_xvsat_w((v8i32)(_1), (_2))) + ++/* Assembly instruction format: xd, xj, ui6. */ ++/* Data types in instruction templates: V4DI, V4DI, UQI. */ + #define __lasx_xvsat_d(/*__m256i*/ _1, /*ui6*/ _2) \ + ((__m256i)__builtin_lasx_xvsat_d((v4i64)(_1), (_2))) + ++/* Assembly instruction format: xd, xj, ui3. */ ++/* Data types in instruction templates: UV32QI, UV32QI, UQI. */ + #define __lasx_xvsat_bu(/*__m256i*/ _1, /*ui3*/ _2) \ + ((__m256i)__builtin_lasx_xvsat_bu((v32u8)(_1), (_2))) + ++/* Assembly instruction format: xd, xj, ui4. */ ++/* Data types in instruction templates: UV16HI, UV16HI, UQI. */ + #define __lasx_xvsat_hu(/*__m256i*/ _1, /*ui4*/ _2) \ + ((__m256i)__builtin_lasx_xvsat_hu((v16u16)(_1), (_2))) + ++/* Assembly instruction format: xd, xj, ui5. */ ++/* Data types in instruction templates: UV8SI, UV8SI, UQI. */ + #define __lasx_xvsat_wu(/*__m256i*/ _1, /*ui5*/ _2) \ + ((__m256i)__builtin_lasx_xvsat_wu((v8u32)(_1), (_2))) + ++/* Assembly instruction format: xd, xj, ui6. */ ++/* Data types in instruction templates: UV4DI, UV4DI, UQI. */ + #define __lasx_xvsat_du(/*__m256i*/ _1, /*ui6*/ _2) \ + ((__m256i)__builtin_lasx_xvsat_du((v4u64)(_1), (_2))) + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V32QI, V32QI, V32QI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvadda_b(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvadda_b((v32i8)_1, (v32i8)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V16HI, V16HI, V16HI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvadda_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvadda_h((v16i16)_1, (v16i16)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V8SI, V8SI, V8SI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvadda_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvadda_w((v8i32)_1, (v8i32)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DI, V4DI, V4DI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvadda_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvadda_d((v4i64)_1, (v4i64)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V32QI, V32QI, V32QI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsadd_b(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsadd_b((v32i8)_1, (v32i8)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V16HI, V16HI, V16HI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsadd_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsadd_h((v16i16)_1, (v16i16)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V8SI, V8SI, V8SI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsadd_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsadd_w((v8i32)_1, (v8i32)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DI, V4DI, V4DI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsadd_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsadd_d((v4i64)_1, (v4i64)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: UV32QI, UV32QI, UV32QI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsadd_bu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsadd_bu((v32u8)_1, (v32u8)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: UV16HI, UV16HI, UV16HI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsadd_hu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsadd_hu((v16u16)_1, (v16u16)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: UV8SI, UV8SI, UV8SI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsadd_wu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsadd_wu((v8u32)_1, (v8u32)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: UV4DI, UV4DI, UV4DI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsadd_du(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsadd_du((v4u64)_1, (v4u64)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V32QI, V32QI, V32QI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvavg_b(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvavg_b((v32i8)_1, (v32i8)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V16HI, V16HI, V16HI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvavg_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvavg_h((v16i16)_1, (v16i16)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V8SI, V8SI, V8SI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvavg_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvavg_w((v8i32)_1, (v8i32)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DI, V4DI, V4DI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvavg_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvavg_d((v4i64)_1, (v4i64)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: UV32QI, UV32QI, UV32QI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvavg_bu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvavg_bu((v32u8)_1, (v32u8)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: UV16HI, UV16HI, UV16HI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvavg_hu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvavg_hu((v16u16)_1, (v16u16)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: UV8SI, UV8SI, UV8SI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvavg_wu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvavg_wu((v8u32)_1, (v8u32)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: UV4DI, UV4DI, UV4DI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvavg_du(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvavg_du((v4u64)_1, (v4u64)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V32QI, V32QI, V32QI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvavgr_b(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvavgr_b((v32i8)_1, (v32i8)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V16HI, V16HI, V16HI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvavgr_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvavgr_h((v16i16)_1, (v16i16)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V8SI, V8SI, V8SI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvavgr_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvavgr_w((v8i32)_1, (v8i32)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DI, V4DI, V4DI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvavgr_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvavgr_d((v4i64)_1, (v4i64)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: UV32QI, UV32QI, UV32QI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvavgr_bu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvavgr_bu((v32u8)_1, (v32u8)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: UV16HI, UV16HI, UV16HI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvavgr_hu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvavgr_hu((v16u16)_1, (v16u16)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: UV8SI, UV8SI, UV8SI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvavgr_wu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvavgr_wu((v8u32)_1, (v8u32)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: UV4DI, UV4DI, UV4DI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvavgr_du(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvavgr_du((v4u64)_1, (v4u64)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V32QI, V32QI, V32QI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvssub_b(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvssub_b((v32i8)_1, (v32i8)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V16HI, V16HI, V16HI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvssub_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvssub_h((v16i16)_1, (v16i16)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V8SI, V8SI, V8SI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvssub_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvssub_w((v8i32)_1, (v8i32)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DI, V4DI, V4DI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvssub_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvssub_d((v4i64)_1, (v4i64)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: UV32QI, UV32QI, UV32QI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvssub_bu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvssub_bu((v32u8)_1, (v32u8)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: UV16HI, UV16HI, UV16HI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvssub_hu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvssub_hu((v16u16)_1, (v16u16)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: UV8SI, UV8SI, UV8SI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvssub_wu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvssub_wu((v8u32)_1, (v8u32)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: UV4DI, UV4DI, UV4DI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvssub_du(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvssub_du((v4u64)_1, (v4u64)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V32QI, V32QI, V32QI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvabsd_b(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvabsd_b((v32i8)_1, (v32i8)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V16HI, V16HI, V16HI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvabsd_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvabsd_h((v16i16)_1, (v16i16)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V8SI, V8SI, V8SI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvabsd_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvabsd_w((v8i32)_1, (v8i32)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DI, V4DI, V4DI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvabsd_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvabsd_d((v4i64)_1, (v4i64)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: UV32QI, UV32QI, UV32QI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvabsd_bu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvabsd_bu((v32u8)_1, (v32u8)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: UV16HI, UV16HI, UV16HI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvabsd_hu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvabsd_hu((v16u16)_1, (v16u16)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: UV8SI, UV8SI, UV8SI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvabsd_wu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvabsd_wu((v8u32)_1, (v8u32)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: UV4DI, UV4DI, UV4DI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvabsd_du(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvabsd_du((v4u64)_1, (v4u64)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V32QI, V32QI, V32QI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmul_b(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmul_b((v32i8)_1, (v32i8)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V16HI, V16HI, V16HI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmul_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmul_h((v16i16)_1, (v16i16)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V8SI, V8SI, V8SI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmul_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmul_w((v8i32)_1, (v8i32)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DI, V4DI, V4DI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmul_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmul_d((v4i64)_1, (v4i64)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V32QI, V32QI, V32QI, V32QI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmadd_b(__m256i _1, __m256i _2, __m256i _3) { + return (__m256i)__builtin_lasx_xvmadd_b((v32i8)_1, (v32i8)_2, (v32i8)_3); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V16HI, V16HI, V16HI, V16HI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmadd_h(__m256i _1, __m256i _2, __m256i _3) { + return (__m256i)__builtin_lasx_xvmadd_h((v16i16)_1, (v16i16)_2, (v16i16)_3); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V8SI, V8SI, V8SI, V8SI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmadd_w(__m256i _1, __m256i _2, __m256i _3) { + return (__m256i)__builtin_lasx_xvmadd_w((v8i32)_1, (v8i32)_2, (v8i32)_3); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DI, V4DI, V4DI, V4DI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmadd_d(__m256i _1, __m256i _2, __m256i _3) { + return (__m256i)__builtin_lasx_xvmadd_d((v4i64)_1, (v4i64)_2, (v4i64)_3); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V32QI, V32QI, V32QI, V32QI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmsub_b(__m256i _1, __m256i _2, __m256i _3) { + return (__m256i)__builtin_lasx_xvmsub_b((v32i8)_1, (v32i8)_2, (v32i8)_3); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V16HI, V16HI, V16HI, V16HI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmsub_h(__m256i _1, __m256i _2, __m256i _3) { + return (__m256i)__builtin_lasx_xvmsub_h((v16i16)_1, (v16i16)_2, (v16i16)_3); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V8SI, V8SI, V8SI, V8SI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmsub_w(__m256i _1, __m256i _2, __m256i _3) { + return (__m256i)__builtin_lasx_xvmsub_w((v8i32)_1, (v8i32)_2, (v8i32)_3); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DI, V4DI, V4DI, V4DI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmsub_d(__m256i _1, __m256i _2, __m256i _3) { + return (__m256i)__builtin_lasx_xvmsub_d((v4i64)_1, (v4i64)_2, (v4i64)_3); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V32QI, V32QI, V32QI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvdiv_b(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvdiv_b((v32i8)_1, (v32i8)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V16HI, V16HI, V16HI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvdiv_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvdiv_h((v16i16)_1, (v16i16)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V8SI, V8SI, V8SI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvdiv_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvdiv_w((v8i32)_1, (v8i32)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DI, V4DI, V4DI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvdiv_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvdiv_d((v4i64)_1, (v4i64)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: UV32QI, UV32QI, UV32QI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvdiv_bu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvdiv_bu((v32u8)_1, (v32u8)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: UV16HI, UV16HI, UV16HI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvdiv_hu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvdiv_hu((v16u16)_1, (v16u16)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: UV8SI, UV8SI, UV8SI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvdiv_wu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvdiv_wu((v8u32)_1, (v8u32)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: UV4DI, UV4DI, UV4DI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvdiv_du(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvdiv_du((v4u64)_1, (v4u64)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V16HI, V32QI, V32QI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvhaddw_h_b(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvhaddw_h_b((v32i8)_1, (v32i8)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V8SI, V16HI, V16HI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvhaddw_w_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvhaddw_w_h((v16i16)_1, (v16i16)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DI, V8SI, V8SI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvhaddw_d_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvhaddw_d_w((v8i32)_1, (v8i32)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: UV16HI, UV32QI, UV32QI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvhaddw_hu_bu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvhaddw_hu_bu((v32u8)_1, (v32u8)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: UV8SI, UV16HI, UV16HI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvhaddw_wu_hu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvhaddw_wu_hu((v16u16)_1, (v16u16)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: UV4DI, UV8SI, UV8SI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvhaddw_du_wu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvhaddw_du_wu((v8u32)_1, (v8u32)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V16HI, V32QI, V32QI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvhsubw_h_b(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvhsubw_h_b((v32i8)_1, (v32i8)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V8SI, V16HI, V16HI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvhsubw_w_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvhsubw_w_h((v16i16)_1, (v16i16)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DI, V8SI, V8SI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvhsubw_d_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvhsubw_d_w((v8i32)_1, (v8i32)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V16HI, UV32QI, UV32QI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvhsubw_hu_bu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvhsubw_hu_bu((v32u8)_1, (v32u8)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V8SI, UV16HI, UV16HI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvhsubw_wu_hu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvhsubw_wu_hu((v16u16)_1, (v16u16)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DI, UV8SI, UV8SI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvhsubw_du_wu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvhsubw_du_wu((v8u32)_1, (v8u32)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V32QI, V32QI, V32QI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmod_b(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmod_b((v32i8)_1, (v32i8)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V16HI, V16HI, V16HI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmod_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmod_h((v16i16)_1, (v16i16)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V8SI, V8SI, V8SI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmod_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmod_w((v8i32)_1, (v8i32)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DI, V4DI, V4DI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmod_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmod_d((v4i64)_1, (v4i64)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: UV32QI, UV32QI, UV32QI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmod_bu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmod_bu((v32u8)_1, (v32u8)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: UV16HI, UV16HI, UV16HI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmod_hu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmod_hu((v16u16)_1, (v16u16)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: UV8SI, UV8SI, UV8SI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmod_wu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmod_wu((v8u32)_1, (v8u32)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: UV4DI, UV4DI, UV4DI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmod_du(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmod_du((v4u64)_1, (v4u64)_2); + } + ++/* Assembly instruction format: xd, xj, ui4. */ ++/* Data types in instruction templates: V32QI, V32QI, UQI. */ + #define __lasx_xvrepl128vei_b(/*__m256i*/ _1, /*ui4*/ _2) \ + ((__m256i)__builtin_lasx_xvrepl128vei_b((v32i8)(_1), (_2))) + ++/* Assembly instruction format: xd, xj, ui3. */ ++/* Data types in instruction templates: V16HI, V16HI, UQI. */ + #define __lasx_xvrepl128vei_h(/*__m256i*/ _1, /*ui3*/ _2) \ + ((__m256i)__builtin_lasx_xvrepl128vei_h((v16i16)(_1), (_2))) + ++/* Assembly instruction format: xd, xj, ui2. */ ++/* Data types in instruction templates: V8SI, V8SI, UQI. */ + #define __lasx_xvrepl128vei_w(/*__m256i*/ _1, /*ui2*/ _2) \ + ((__m256i)__builtin_lasx_xvrepl128vei_w((v8i32)(_1), (_2))) + ++/* Assembly instruction format: xd, xj, ui1. */ ++/* Data types in instruction templates: V4DI, V4DI, UQI. */ + #define __lasx_xvrepl128vei_d(/*__m256i*/ _1, /*ui1*/ _2) \ + ((__m256i)__builtin_lasx_xvrepl128vei_d((v4i64)(_1), (_2))) + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V32QI, V32QI, V32QI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvpickev_b(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvpickev_b((v32i8)_1, (v32i8)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V16HI, V16HI, V16HI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvpickev_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvpickev_h((v16i16)_1, (v16i16)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V8SI, V8SI, V8SI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvpickev_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvpickev_w((v8i32)_1, (v8i32)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DI, V4DI, V4DI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvpickev_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvpickev_d((v4i64)_1, (v4i64)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V32QI, V32QI, V32QI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvpickod_b(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvpickod_b((v32i8)_1, (v32i8)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V16HI, V16HI, V16HI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvpickod_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvpickod_h((v16i16)_1, (v16i16)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V8SI, V8SI, V8SI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvpickod_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvpickod_w((v8i32)_1, (v8i32)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DI, V4DI, V4DI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvpickod_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvpickod_d((v4i64)_1, (v4i64)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V32QI, V32QI, V32QI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvilvh_b(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvilvh_b((v32i8)_1, (v32i8)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V16HI, V16HI, V16HI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvilvh_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvilvh_h((v16i16)_1, (v16i16)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V8SI, V8SI, V8SI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvilvh_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvilvh_w((v8i32)_1, (v8i32)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DI, V4DI, V4DI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvilvh_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvilvh_d((v4i64)_1, (v4i64)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V32QI, V32QI, V32QI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvilvl_b(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvilvl_b((v32i8)_1, (v32i8)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V16HI, V16HI, V16HI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvilvl_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvilvl_h((v16i16)_1, (v16i16)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V8SI, V8SI, V8SI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvilvl_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvilvl_w((v8i32)_1, (v8i32)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DI, V4DI, V4DI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvilvl_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvilvl_d((v4i64)_1, (v4i64)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V32QI, V32QI, V32QI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvpackev_b(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvpackev_b((v32i8)_1, (v32i8)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V16HI, V16HI, V16HI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvpackev_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvpackev_h((v16i16)_1, (v16i16)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V8SI, V8SI, V8SI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvpackev_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvpackev_w((v8i32)_1, (v8i32)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DI, V4DI, V4DI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvpackev_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvpackev_d((v4i64)_1, (v4i64)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V32QI, V32QI, V32QI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvpackod_b(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvpackod_b((v32i8)_1, (v32i8)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V16HI, V16HI, V16HI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvpackod_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvpackod_h((v16i16)_1, (v16i16)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V8SI, V8SI, V8SI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvpackod_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvpackod_w((v8i32)_1, (v8i32)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DI, V4DI, V4DI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvpackod_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvpackod_d((v4i64)_1, (v4i64)_2); + } + ++/* Assembly instruction format: xd, xj, xk, xa. */ ++/* Data types in instruction templates: V32QI, V32QI, V32QI, V32QI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvshuf_b(__m256i _1, __m256i _2, __m256i _3) { + return (__m256i)__builtin_lasx_xvshuf_b((v32i8)_1, (v32i8)_2, (v32i8)_3); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V16HI, V16HI, V16HI, V16HI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvshuf_h(__m256i _1, __m256i _2, __m256i _3) { + return (__m256i)__builtin_lasx_xvshuf_h((v16i16)_1, (v16i16)_2, (v16i16)_3); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V8SI, V8SI, V8SI, V8SI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvshuf_w(__m256i _1, __m256i _2, __m256i _3) { + return (__m256i)__builtin_lasx_xvshuf_w((v8i32)_1, (v8i32)_2, (v8i32)_3); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DI, V4DI, V4DI, V4DI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvshuf_d(__m256i _1, __m256i _2, __m256i _3) { + return (__m256i)__builtin_lasx_xvshuf_d((v4i64)_1, (v4i64)_2, (v4i64)_3); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: UV32QI, UV32QI, UV32QI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvand_v(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvand_v((v32u8)_1, (v32u8)_2); + } + ++/* Assembly instruction format: xd, xj, ui8. */ ++/* Data types in instruction templates: UV32QI, UV32QI, UQI. */ + #define __lasx_xvandi_b(/*__m256i*/ _1, /*ui8*/ _2) \ + ((__m256i)__builtin_lasx_xvandi_b((v32u8)(_1), (_2))) + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: UV32QI, UV32QI, UV32QI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvor_v(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvor_v((v32u8)_1, (v32u8)_2); + } + ++/* Assembly instruction format: xd, xj, ui8. */ ++/* Data types in instruction templates: UV32QI, UV32QI, UQI. */ + #define __lasx_xvori_b(/*__m256i*/ _1, /*ui8*/ _2) \ + ((__m256i)__builtin_lasx_xvori_b((v32u8)(_1), (_2))) + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: UV32QI, UV32QI, UV32QI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvnor_v(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvnor_v((v32u8)_1, (v32u8)_2); + } + ++/* Assembly instruction format: xd, xj, ui8. */ ++/* Data types in instruction templates: UV32QI, UV32QI, UQI. */ + #define __lasx_xvnori_b(/*__m256i*/ _1, /*ui8*/ _2) \ + ((__m256i)__builtin_lasx_xvnori_b((v32u8)(_1), (_2))) + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: UV32QI, UV32QI, UV32QI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvxor_v(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvxor_v((v32u8)_1, (v32u8)_2); + } + ++/* Assembly instruction format: xd, xj, ui8. */ ++/* Data types in instruction templates: UV32QI, UV32QI, UQI. */ + #define __lasx_xvxori_b(/*__m256i*/ _1, /*ui8*/ _2) \ + ((__m256i)__builtin_lasx_xvxori_b((v32u8)(_1), (_2))) + ++/* Assembly instruction format: xd, xj, xk, xa. */ ++/* Data types in instruction templates: UV32QI, UV32QI, UV32QI, UV32QI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvbitsel_v(__m256i _1, __m256i _2, __m256i _3) { + return (__m256i)__builtin_lasx_xvbitsel_v((v32u8)_1, (v32u8)_2, (v32u8)_3); + } + ++/* Assembly instruction format: xd, xj, ui8. */ ++/* Data types in instruction templates: UV32QI, UV32QI, UV32QI, USI. */ + #define __lasx_xvbitseli_b(/*__m256i*/ _1, /*__m256i*/ _2, /*ui8*/ _3) \ + ((__m256i)__builtin_lasx_xvbitseli_b((v32u8)(_1), (v32u8)(_2), (_3))) + ++/* Assembly instruction format: xd, xj, ui8. */ ++/* Data types in instruction templates: V32QI, V32QI, USI. */ + #define __lasx_xvshuf4i_b(/*__m256i*/ _1, /*ui8*/ _2) \ + ((__m256i)__builtin_lasx_xvshuf4i_b((v32i8)(_1), (_2))) + ++/* Assembly instruction format: xd, xj, ui8. */ ++/* Data types in instruction templates: V16HI, V16HI, USI. */ + #define __lasx_xvshuf4i_h(/*__m256i*/ _1, /*ui8*/ _2) \ + ((__m256i)__builtin_lasx_xvshuf4i_h((v16i16)(_1), (_2))) + ++/* Assembly instruction format: xd, xj, ui8. */ ++/* Data types in instruction templates: V8SI, V8SI, USI. */ + #define __lasx_xvshuf4i_w(/*__m256i*/ _1, /*ui8*/ _2) \ + ((__m256i)__builtin_lasx_xvshuf4i_w((v8i32)(_1), (_2))) + ++/* Assembly instruction format: xd, rj. */ ++/* Data types in instruction templates: V32QI, SI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvreplgr2vr_b(int _1) { + return (__m256i)__builtin_lasx_xvreplgr2vr_b((int)_1); + } + ++/* Assembly instruction format: xd, rj. */ ++/* Data types in instruction templates: V16HI, SI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvreplgr2vr_h(int _1) { + return (__m256i)__builtin_lasx_xvreplgr2vr_h((int)_1); + } + ++/* Assembly instruction format: xd, rj. */ ++/* Data types in instruction templates: V8SI, SI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvreplgr2vr_w(int _1) { + return (__m256i)__builtin_lasx_xvreplgr2vr_w((int)_1); + } + ++/* Assembly instruction format: xd, rj. */ ++/* Data types in instruction templates: V4DI, DI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvreplgr2vr_d(long int _1) { + return (__m256i)__builtin_lasx_xvreplgr2vr_d((long int)_1); + } + ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: V32QI, V32QI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvpcnt_b(__m256i _1) { + return (__m256i)__builtin_lasx_xvpcnt_b((v32i8)_1); + } + ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: V16HI, V16HI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvpcnt_h(__m256i _1) { + return (__m256i)__builtin_lasx_xvpcnt_h((v16i16)_1); + } + ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: V8SI, V8SI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvpcnt_w(__m256i _1) { + return (__m256i)__builtin_lasx_xvpcnt_w((v8i32)_1); + } + ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: V4DI, V4DI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvpcnt_d(__m256i _1) { + return (__m256i)__builtin_lasx_xvpcnt_d((v4i64)_1); + } + ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: V32QI, V32QI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvclo_b(__m256i _1) { + return (__m256i)__builtin_lasx_xvclo_b((v32i8)_1); + } + ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: V16HI, V16HI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvclo_h(__m256i _1) { + return (__m256i)__builtin_lasx_xvclo_h((v16i16)_1); + } + ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: V8SI, V8SI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvclo_w(__m256i _1) { + return (__m256i)__builtin_lasx_xvclo_w((v8i32)_1); + } + ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: V4DI, V4DI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvclo_d(__m256i _1) { + return (__m256i)__builtin_lasx_xvclo_d((v4i64)_1); + } + ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: V32QI, V32QI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvclz_b(__m256i _1) { + return (__m256i)__builtin_lasx_xvclz_b((v32i8)_1); + } + ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: V16HI, V16HI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvclz_h(__m256i _1) { + return (__m256i)__builtin_lasx_xvclz_h((v16i16)_1); + } + ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: V8SI, V8SI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvclz_w(__m256i _1) { + return (__m256i)__builtin_lasx_xvclz_w((v8i32)_1); + } + ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: V4DI, V4DI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvclz_d(__m256i _1) { + return (__m256i)__builtin_lasx_xvclz_d((v4i64)_1); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V8SF, V8SF, V8SF. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 + __lasx_xvfadd_s(__m256 _1, __m256 _2) { + return (__m256)__builtin_lasx_xvfadd_s((v8f32)_1, (v8f32)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DF, V4DF, V4DF. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d + __lasx_xvfadd_d(__m256d _1, __m256d _2) { + return (__m256d)__builtin_lasx_xvfadd_d((v4f64)_1, (v4f64)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V8SF, V8SF, V8SF. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 + __lasx_xvfsub_s(__m256 _1, __m256 _2) { + return (__m256)__builtin_lasx_xvfsub_s((v8f32)_1, (v8f32)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DF, V4DF, V4DF. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d + __lasx_xvfsub_d(__m256d _1, __m256d _2) { + return (__m256d)__builtin_lasx_xvfsub_d((v4f64)_1, (v4f64)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V8SF, V8SF, V8SF. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 + __lasx_xvfmul_s(__m256 _1, __m256 _2) { + return (__m256)__builtin_lasx_xvfmul_s((v8f32)_1, (v8f32)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DF, V4DF, V4DF. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d + __lasx_xvfmul_d(__m256d _1, __m256d _2) { + return (__m256d)__builtin_lasx_xvfmul_d((v4f64)_1, (v4f64)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V8SF, V8SF, V8SF. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 + __lasx_xvfdiv_s(__m256 _1, __m256 _2) { + return (__m256)__builtin_lasx_xvfdiv_s((v8f32)_1, (v8f32)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DF, V4DF, V4DF. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d + __lasx_xvfdiv_d(__m256d _1, __m256d _2) { + return (__m256d)__builtin_lasx_xvfdiv_d((v4f64)_1, (v4f64)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V16HI, V8SF, V8SF. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvfcvt_h_s(__m256 _1, __m256 _2) { + return (__m256i)__builtin_lasx_xvfcvt_h_s((v8f32)_1, (v8f32)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V8SF, V4DF, V4DF. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 + __lasx_xvfcvt_s_d(__m256d _1, __m256d _2) { + return (__m256)__builtin_lasx_xvfcvt_s_d((v4f64)_1, (v4f64)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V8SF, V8SF, V8SF. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 + __lasx_xvfmin_s(__m256 _1, __m256 _2) { + return (__m256)__builtin_lasx_xvfmin_s((v8f32)_1, (v8f32)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DF, V4DF, V4DF. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d + __lasx_xvfmin_d(__m256d _1, __m256d _2) { + return (__m256d)__builtin_lasx_xvfmin_d((v4f64)_1, (v4f64)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V8SF, V8SF, V8SF. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 + __lasx_xvfmina_s(__m256 _1, __m256 _2) { + return (__m256)__builtin_lasx_xvfmina_s((v8f32)_1, (v8f32)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DF, V4DF, V4DF. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d + __lasx_xvfmina_d(__m256d _1, __m256d _2) { + return (__m256d)__builtin_lasx_xvfmina_d((v4f64)_1, (v4f64)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V8SF, V8SF, V8SF. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 + __lasx_xvfmax_s(__m256 _1, __m256 _2) { + return (__m256)__builtin_lasx_xvfmax_s((v8f32)_1, (v8f32)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DF, V4DF, V4DF. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d + __lasx_xvfmax_d(__m256d _1, __m256d _2) { + return (__m256d)__builtin_lasx_xvfmax_d((v4f64)_1, (v4f64)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V8SF, V8SF, V8SF. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 + __lasx_xvfmaxa_s(__m256 _1, __m256 _2) { + return (__m256)__builtin_lasx_xvfmaxa_s((v8f32)_1, (v8f32)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DF, V4DF, V4DF. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d + __lasx_xvfmaxa_d(__m256d _1, __m256d _2) { + return (__m256d)__builtin_lasx_xvfmaxa_d((v4f64)_1, (v4f64)_2); + } + ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: V8SI, V8SF. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvfclass_s(__m256 _1) { + return (__m256i)__builtin_lasx_xvfclass_s((v8f32)_1); + } + ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: V4DI, V4DF. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvfclass_d(__m256d _1) { + return (__m256i)__builtin_lasx_xvfclass_d((v4f64)_1); + } + ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: V8SF, V8SF. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 + __lasx_xvfsqrt_s(__m256 _1) { + return (__m256)__builtin_lasx_xvfsqrt_s((v8f32)_1); + } + ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: V4DF, V4DF. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d + __lasx_xvfsqrt_d(__m256d _1) { + return (__m256d)__builtin_lasx_xvfsqrt_d((v4f64)_1); + } + ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: V8SF, V8SF. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 + __lasx_xvfrecip_s(__m256 _1) { + return (__m256)__builtin_lasx_xvfrecip_s((v8f32)_1); + } + ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: V4DF, V4DF. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d + __lasx_xvfrecip_d(__m256d _1) { + return (__m256d)__builtin_lasx_xvfrecip_d((v4f64)_1); + } + ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: V8SF, V8SF. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 + __lasx_xvfrecipe_s(__m256 _1) { + return (__m256)__builtin_lasx_xvfrecipe_s((v8f32)_1); + } + ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: V4DF, V4DF. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d + __lasx_xvfrecipe_d(__m256d _1) { + return (__m256d)__builtin_lasx_xvfrecipe_d((v4f64)_1); + } + ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: V8SF, V8SF. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 + __lasx_xvfrint_s(__m256 _1) { + return (__m256)__builtin_lasx_xvfrint_s((v8f32)_1); + } + ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: V4DF, V4DF. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d + __lasx_xvfrint_d(__m256d _1) { + return (__m256d)__builtin_lasx_xvfrint_d((v4f64)_1); + } + ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: V8SF, V8SF. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 + __lasx_xvfrsqrt_s(__m256 _1) { + return (__m256)__builtin_lasx_xvfrsqrt_s((v8f32)_1); + } + ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: V4DF, V4DF. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d + __lasx_xvfrsqrt_d(__m256d _1) { + return (__m256d)__builtin_lasx_xvfrsqrt_d((v4f64)_1); + } + ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: V8SF, V8SF. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 + __lasx_xvfrsqrte_s(__m256 _1) { + return (__m256)__builtin_lasx_xvfrsqrte_s((v8f32)_1); + } + ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: V4DF, V4DF. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d + __lasx_xvfrsqrte_d(__m256d _1) { + return (__m256d)__builtin_lasx_xvfrsqrte_d((v4f64)_1); + } + ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: V8SF, V8SF. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 + __lasx_xvflogb_s(__m256 _1) { + return (__m256)__builtin_lasx_xvflogb_s((v8f32)_1); + } + ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: V4DF, V4DF. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d + __lasx_xvflogb_d(__m256d _1) { + return (__m256d)__builtin_lasx_xvflogb_d((v4f64)_1); + } + ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: V8SF, V16HI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 + __lasx_xvfcvth_s_h(__m256i _1) { + return (__m256)__builtin_lasx_xvfcvth_s_h((v16i16)_1); + } + ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: V4DF, V8SF. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d + __lasx_xvfcvth_d_s(__m256 _1) { + return (__m256d)__builtin_lasx_xvfcvth_d_s((v8f32)_1); + } + ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: V8SF, V16HI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 + __lasx_xvfcvtl_s_h(__m256i _1) { + return (__m256)__builtin_lasx_xvfcvtl_s_h((v16i16)_1); + } + ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: V4DF, V8SF. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d + __lasx_xvfcvtl_d_s(__m256 _1) { + return (__m256d)__builtin_lasx_xvfcvtl_d_s((v8f32)_1); + } + ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: V8SI, V8SF. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvftint_w_s(__m256 _1) { + return (__m256i)__builtin_lasx_xvftint_w_s((v8f32)_1); + } + ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: V4DI, V4DF. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvftint_l_d(__m256d _1) { + return (__m256i)__builtin_lasx_xvftint_l_d((v4f64)_1); + } + ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: UV8SI, V8SF. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvftint_wu_s(__m256 _1) { + return (__m256i)__builtin_lasx_xvftint_wu_s((v8f32)_1); + } + ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: UV4DI, V4DF. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvftint_lu_d(__m256d _1) { + return (__m256i)__builtin_lasx_xvftint_lu_d((v4f64)_1); + } + ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: V8SI, V8SF. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvftintrz_w_s(__m256 _1) { + return (__m256i)__builtin_lasx_xvftintrz_w_s((v8f32)_1); + } + ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: V4DI, V4DF. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvftintrz_l_d(__m256d _1) { + return (__m256i)__builtin_lasx_xvftintrz_l_d((v4f64)_1); + } + ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: UV8SI, V8SF. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvftintrz_wu_s(__m256 _1) { + return (__m256i)__builtin_lasx_xvftintrz_wu_s((v8f32)_1); + } + ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: UV4DI, V4DF. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvftintrz_lu_d(__m256d _1) { + return (__m256i)__builtin_lasx_xvftintrz_lu_d((v4f64)_1); + } + ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: V8SF, V8SI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 + __lasx_xvffint_s_w(__m256i _1) { + return (__m256)__builtin_lasx_xvffint_s_w((v8i32)_1); + } + ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: V4DF, V4DI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d + __lasx_xvffint_d_l(__m256i _1) { + return (__m256d)__builtin_lasx_xvffint_d_l((v4i64)_1); + } + ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: V8SF, UV8SI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 + __lasx_xvffint_s_wu(__m256i _1) { + return (__m256)__builtin_lasx_xvffint_s_wu((v8u32)_1); + } + ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: V4DF, UV4DI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d + __lasx_xvffint_d_lu(__m256i _1) { + return (__m256d)__builtin_lasx_xvffint_d_lu((v4u64)_1); + } + ++/* Assembly instruction format: xd, xj, rk. */ ++/* Data types in instruction templates: V32QI, V32QI, SI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvreplve_b(__m256i _1, int _2) { + return (__m256i)__builtin_lasx_xvreplve_b((v32i8)_1, (int)_2); + } + ++/* Assembly instruction format: xd, xj, rk. */ ++/* Data types in instruction templates: V16HI, V16HI, SI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvreplve_h(__m256i _1, int _2) { + return (__m256i)__builtin_lasx_xvreplve_h((v16i16)_1, (int)_2); + } + ++/* Assembly instruction format: xd, xj, rk. */ ++/* Data types in instruction templates: V8SI, V8SI, SI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvreplve_w(__m256i _1, int _2) { + return (__m256i)__builtin_lasx_xvreplve_w((v8i32)_1, (int)_2); + } + ++/* Assembly instruction format: xd, xj, rk. */ ++/* Data types in instruction templates: V4DI, V4DI, SI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvreplve_d(__m256i _1, int _2) { + return (__m256i)__builtin_lasx_xvreplve_d((v4i64)_1, (int)_2); + } + ++/* Assembly instruction format: xd, xj, ui8. */ ++/* Data types in instruction templates: V8SI, V8SI, V8SI, USI. */ + #define __lasx_xvpermi_w(/*__m256i*/ _1, /*__m256i*/ _2, /*ui8*/ _3) \ + ((__m256i)__builtin_lasx_xvpermi_w((v8i32)(_1), (v8i32)(_2), (_3))) + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: UV32QI, UV32QI, UV32QI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvandn_v(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvandn_v((v32u8)_1, (v32u8)_2); + } + ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: V32QI, V32QI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvneg_b(__m256i _1) { + return (__m256i)__builtin_lasx_xvneg_b((v32i8)_1); + } + ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: V16HI, V16HI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvneg_h(__m256i _1) { + return (__m256i)__builtin_lasx_xvneg_h((v16i16)_1); + } + ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: V8SI, V8SI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvneg_w(__m256i _1) { + return (__m256i)__builtin_lasx_xvneg_w((v8i32)_1); + } + ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: V4DI, V4DI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvneg_d(__m256i _1) { + return (__m256i)__builtin_lasx_xvneg_d((v4i64)_1); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V32QI, V32QI, V32QI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmuh_b(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmuh_b((v32i8)_1, (v32i8)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V16HI, V16HI, V16HI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmuh_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmuh_h((v16i16)_1, (v16i16)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V8SI, V8SI, V8SI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmuh_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmuh_w((v8i32)_1, (v8i32)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DI, V4DI, V4DI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmuh_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmuh_d((v4i64)_1, (v4i64)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: UV32QI, UV32QI, UV32QI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmuh_bu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmuh_bu((v32u8)_1, (v32u8)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: UV16HI, UV16HI, UV16HI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmuh_hu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmuh_hu((v16u16)_1, (v16u16)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: UV8SI, UV8SI, UV8SI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmuh_wu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmuh_wu((v8u32)_1, (v8u32)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: UV4DI, UV4DI, UV4DI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmuh_du(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmuh_du((v4u64)_1, (v4u64)_2); + } + ++/* Assembly instruction format: xd, xj, ui3. */ ++/* Data types in instruction templates: V16HI, V32QI, UQI. */ + #define __lasx_xvsllwil_h_b(/*__m256i*/ _1, /*ui3*/ _2) \ + ((__m256i)__builtin_lasx_xvsllwil_h_b((v32i8)(_1), (_2))) + ++/* Assembly instruction format: xd, xj, ui4. */ ++/* Data types in instruction templates: V8SI, V16HI, UQI. */ + #define __lasx_xvsllwil_w_h(/*__m256i*/ _1, /*ui4*/ _2) \ + ((__m256i)__builtin_lasx_xvsllwil_w_h((v16i16)(_1), (_2))) + ++/* Assembly instruction format: xd, xj, ui5. */ ++/* Data types in instruction templates: V4DI, V8SI, UQI. */ + #define __lasx_xvsllwil_d_w(/*__m256i*/ _1, /*ui5*/ _2) \ + ((__m256i)__builtin_lasx_xvsllwil_d_w((v8i32)(_1), (_2))) + ++/* Assembly instruction format: xd, xj, ui3. */ ++/* Data types in instruction templates: UV16HI, UV32QI, UQI. */ + #define __lasx_xvsllwil_hu_bu(/*__m256i*/ _1, /*ui3*/ _2) \ + ((__m256i)__builtin_lasx_xvsllwil_hu_bu((v32u8)(_1), (_2))) + ++/* Assembly instruction format: xd, xj, ui4. */ ++/* Data types in instruction templates: UV8SI, UV16HI, UQI. */ + #define __lasx_xvsllwil_wu_hu(/*__m256i*/ _1, /*ui4*/ _2) \ + ((__m256i)__builtin_lasx_xvsllwil_wu_hu((v16u16)(_1), (_2))) + ++/* Assembly instruction format: xd, xj, ui5. */ ++/* Data types in instruction templates: UV4DI, UV8SI, UQI. */ + #define __lasx_xvsllwil_du_wu(/*__m256i*/ _1, /*ui5*/ _2) \ + ((__m256i)__builtin_lasx_xvsllwil_du_wu((v8u32)(_1), (_2))) + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V32QI, V16HI, V16HI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsran_b_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsran_b_h((v16i16)_1, (v16i16)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V16HI, V8SI, V8SI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsran_h_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsran_h_w((v8i32)_1, (v8i32)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V8SI, V4DI, V4DI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsran_w_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsran_w_d((v4i64)_1, (v4i64)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V32QI, V16HI, V16HI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvssran_b_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvssran_b_h((v16i16)_1, (v16i16)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V16HI, V8SI, V8SI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvssran_h_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvssran_h_w((v8i32)_1, (v8i32)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V8SI, V4DI, V4DI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvssran_w_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvssran_w_d((v4i64)_1, (v4i64)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: UV32QI, UV16HI, UV16HI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvssran_bu_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvssran_bu_h((v16u16)_1, (v16u16)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: UV16HI, UV8SI, UV8SI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvssran_hu_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvssran_hu_w((v8u32)_1, (v8u32)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: UV8SI, UV4DI, UV4DI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvssran_wu_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvssran_wu_d((v4u64)_1, (v4u64)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V32QI, V16HI, V16HI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsrarn_b_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsrarn_b_h((v16i16)_1, (v16i16)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V16HI, V8SI, V8SI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsrarn_h_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsrarn_h_w((v8i32)_1, (v8i32)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V8SI, V4DI, V4DI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsrarn_w_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsrarn_w_d((v4i64)_1, (v4i64)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V32QI, V16HI, V16HI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvssrarn_b_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvssrarn_b_h((v16i16)_1, (v16i16)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V16HI, V8SI, V8SI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvssrarn_h_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvssrarn_h_w((v8i32)_1, (v8i32)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V8SI, V4DI, V4DI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvssrarn_w_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvssrarn_w_d((v4i64)_1, (v4i64)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: UV32QI, UV16HI, UV16HI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvssrarn_bu_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvssrarn_bu_h((v16u16)_1, (v16u16)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: UV16HI, UV8SI, UV8SI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvssrarn_hu_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvssrarn_hu_w((v8u32)_1, (v8u32)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: UV8SI, UV4DI, UV4DI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvssrarn_wu_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvssrarn_wu_d((v4u64)_1, (v4u64)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V32QI, V16HI, V16HI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsrln_b_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsrln_b_h((v16i16)_1, (v16i16)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V16HI, V8SI, V8SI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsrln_h_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsrln_h_w((v8i32)_1, (v8i32)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V8SI, V4DI, V4DI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsrln_w_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsrln_w_d((v4i64)_1, (v4i64)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: UV32QI, UV16HI, UV16HI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvssrln_bu_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvssrln_bu_h((v16u16)_1, (v16u16)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: UV16HI, UV8SI, UV8SI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvssrln_hu_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvssrln_hu_w((v8u32)_1, (v8u32)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: UV8SI, UV4DI, UV4DI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvssrln_wu_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvssrln_wu_d((v4u64)_1, (v4u64)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V32QI, V16HI, V16HI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsrlrn_b_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsrlrn_b_h((v16i16)_1, (v16i16)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V16HI, V8SI, V8SI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsrlrn_h_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsrlrn_h_w((v8i32)_1, (v8i32)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V8SI, V4DI, V4DI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsrlrn_w_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsrlrn_w_d((v4i64)_1, (v4i64)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: UV32QI, UV16HI, UV16HI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvssrlrn_bu_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvssrlrn_bu_h((v16u16)_1, (v16u16)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: UV16HI, UV8SI, UV8SI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvssrlrn_hu_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvssrlrn_hu_w((v8u32)_1, (v8u32)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: UV8SI, UV4DI, UV4DI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvssrlrn_wu_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvssrlrn_wu_d((v4u64)_1, (v4u64)_2); + } + ++/* Assembly instruction format: xd, xj, ui5. */ ++/* Data types in instruction templates: V32QI, V32QI, V32QI, UQI. */ + #define __lasx_xvfrstpi_b(/*__m256i*/ _1, /*__m256i*/ _2, /*ui5*/ _3) \ + ((__m256i)__builtin_lasx_xvfrstpi_b((v32i8)(_1), (v32i8)(_2), (_3))) + ++/* Assembly instruction format: xd, xj, ui5. */ ++/* Data types in instruction templates: V16HI, V16HI, V16HI, UQI. */ + #define __lasx_xvfrstpi_h(/*__m256i*/ _1, /*__m256i*/ _2, /*ui5*/ _3) \ + ((__m256i)__builtin_lasx_xvfrstpi_h((v16i16)(_1), (v16i16)(_2), (_3))) + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V32QI, V32QI, V32QI, V32QI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvfrstp_b(__m256i _1, __m256i _2, __m256i _3) { + return (__m256i)__builtin_lasx_xvfrstp_b((v32i8)_1, (v32i8)_2, (v32i8)_3); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V16HI, V16HI, V16HI, V16HI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvfrstp_h(__m256i _1, __m256i _2, __m256i _3) { + return (__m256i)__builtin_lasx_xvfrstp_h((v16i16)_1, (v16i16)_2, (v16i16)_3); + } + ++/* Assembly instruction format: xd, xj, ui8. */ ++/* Data types in instruction templates: V4DI, V4DI, V4DI, USI. */ + #define __lasx_xvshuf4i_d(/*__m256i*/ _1, /*__m256i*/ _2, /*ui8*/ _3) \ + ((__m256i)__builtin_lasx_xvshuf4i_d((v4i64)(_1), (v4i64)(_2), (_3))) + ++/* Assembly instruction format: xd, xj, ui5. */ ++/* Data types in instruction templates: V32QI, V32QI, UQI. */ + #define __lasx_xvbsrl_v(/*__m256i*/ _1, /*ui5*/ _2) \ + ((__m256i)__builtin_lasx_xvbsrl_v((v32i8)(_1), (_2))) + ++/* Assembly instruction format: xd, xj, ui5. */ ++/* Data types in instruction templates: V32QI, V32QI, UQI. */ + #define __lasx_xvbsll_v(/*__m256i*/ _1, /*ui5*/ _2) \ + ((__m256i)__builtin_lasx_xvbsll_v((v32i8)(_1), (_2))) + ++/* Assembly instruction format: xd, xj, ui8. */ ++/* Data types in instruction templates: V32QI, V32QI, V32QI, USI. */ + #define __lasx_xvextrins_b(/*__m256i*/ _1, /*__m256i*/ _2, /*ui8*/ _3) \ + ((__m256i)__builtin_lasx_xvextrins_b((v32i8)(_1), (v32i8)(_2), (_3))) + ++/* Assembly instruction format: xd, xj, ui8. */ ++/* Data types in instruction templates: V16HI, V16HI, V16HI, USI. */ + #define __lasx_xvextrins_h(/*__m256i*/ _1, /*__m256i*/ _2, /*ui8*/ _3) \ + ((__m256i)__builtin_lasx_xvextrins_h((v16i16)(_1), (v16i16)(_2), (_3))) + ++/* Assembly instruction format: xd, xj, ui8. */ ++/* Data types in instruction templates: V8SI, V8SI, V8SI, USI. */ + #define __lasx_xvextrins_w(/*__m256i*/ _1, /*__m256i*/ _2, /*ui8*/ _3) \ + ((__m256i)__builtin_lasx_xvextrins_w((v8i32)(_1), (v8i32)(_2), (_3))) + ++/* Assembly instruction format: xd, xj, ui8. */ ++/* Data types in instruction templates: V4DI, V4DI, V4DI, USI. */ + #define __lasx_xvextrins_d(/*__m256i*/ _1, /*__m256i*/ _2, /*ui8*/ _3) \ + ((__m256i)__builtin_lasx_xvextrins_d((v4i64)(_1), (v4i64)(_2), (_3))) + ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: V32QI, V32QI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmskltz_b(__m256i _1) { + return (__m256i)__builtin_lasx_xvmskltz_b((v32i8)_1); + } + ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: V16HI, V16HI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmskltz_h(__m256i _1) { + return (__m256i)__builtin_lasx_xvmskltz_h((v16i16)_1); + } + ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: V8SI, V8SI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmskltz_w(__m256i _1) { + return (__m256i)__builtin_lasx_xvmskltz_w((v8i32)_1); + } + ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: V4DI, V4DI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmskltz_d(__m256i _1) { + return (__m256i)__builtin_lasx_xvmskltz_d((v4i64)_1); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V32QI, V32QI, V32QI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsigncov_b(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsigncov_b((v32i8)_1, (v32i8)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V16HI, V16HI, V16HI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsigncov_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsigncov_h((v16i16)_1, (v16i16)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V8SI, V8SI, V8SI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsigncov_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsigncov_w((v8i32)_1, (v8i32)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DI, V4DI, V4DI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsigncov_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsigncov_d((v4i64)_1, (v4i64)_2); + } + ++/* Assembly instruction format: xd, xj, xk, xa. */ ++/* Data types in instruction templates: V8SF, V8SF, V8SF, V8SF. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 + __lasx_xvfmadd_s(__m256 _1, __m256 _2, __m256 _3) { + return (__m256)__builtin_lasx_xvfmadd_s((v8f32)_1, (v8f32)_2, (v8f32)_3); + } + ++/* Assembly instruction format: xd, xj, xk, xa. */ ++/* Data types in instruction templates: V4DF, V4DF, V4DF, V4DF. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d + __lasx_xvfmadd_d(__m256d _1, __m256d _2, __m256d _3) { + return (__m256d)__builtin_lasx_xvfmadd_d((v4f64)_1, (v4f64)_2, (v4f64)_3); + } + ++/* Assembly instruction format: xd, xj, xk, xa. */ ++/* Data types in instruction templates: V8SF, V8SF, V8SF, V8SF. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 + __lasx_xvfmsub_s(__m256 _1, __m256 _2, __m256 _3) { + return (__m256)__builtin_lasx_xvfmsub_s((v8f32)_1, (v8f32)_2, (v8f32)_3); + } + ++/* Assembly instruction format: xd, xj, xk, xa. */ ++/* Data types in instruction templates: V4DF, V4DF, V4DF, V4DF. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d + __lasx_xvfmsub_d(__m256d _1, __m256d _2, __m256d _3) { + return (__m256d)__builtin_lasx_xvfmsub_d((v4f64)_1, (v4f64)_2, (v4f64)_3); + } + ++/* Assembly instruction format: xd, xj, xk, xa. */ ++/* Data types in instruction templates: V8SF, V8SF, V8SF, V8SF. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 + __lasx_xvfnmadd_s(__m256 _1, __m256 _2, __m256 _3) { + return (__m256)__builtin_lasx_xvfnmadd_s((v8f32)_1, (v8f32)_2, (v8f32)_3); + } + ++/* Assembly instruction format: xd, xj, xk, xa. */ ++/* Data types in instruction templates: V4DF, V4DF, V4DF, V4DF. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d + __lasx_xvfnmadd_d(__m256d _1, __m256d _2, __m256d _3) { + return (__m256d)__builtin_lasx_xvfnmadd_d((v4f64)_1, (v4f64)_2, (v4f64)_3); + } + ++/* Assembly instruction format: xd, xj, xk, xa. */ ++/* Data types in instruction templates: V8SF, V8SF, V8SF, V8SF. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 + __lasx_xvfnmsub_s(__m256 _1, __m256 _2, __m256 _3) { + return (__m256)__builtin_lasx_xvfnmsub_s((v8f32)_1, (v8f32)_2, (v8f32)_3); + } + ++/* Assembly instruction format: xd, xj, xk, xa. */ ++/* Data types in instruction templates: V4DF, V4DF, V4DF, V4DF. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d + __lasx_xvfnmsub_d(__m256d _1, __m256d _2, __m256d _3) { + return (__m256d)__builtin_lasx_xvfnmsub_d((v4f64)_1, (v4f64)_2, (v4f64)_3); + } + ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: V8SI, V8SF. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvftintrne_w_s(__m256 _1) { + return (__m256i)__builtin_lasx_xvftintrne_w_s((v8f32)_1); + } + ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: V4DI, V4DF. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvftintrne_l_d(__m256d _1) { + return (__m256i)__builtin_lasx_xvftintrne_l_d((v4f64)_1); + } + ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: V8SI, V8SF. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvftintrp_w_s(__m256 _1) { + return (__m256i)__builtin_lasx_xvftintrp_w_s((v8f32)_1); + } + ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: V4DI, V4DF. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvftintrp_l_d(__m256d _1) { + return (__m256i)__builtin_lasx_xvftintrp_l_d((v4f64)_1); + } + ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: V8SI, V8SF. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvftintrm_w_s(__m256 _1) { + return (__m256i)__builtin_lasx_xvftintrm_w_s((v8f32)_1); + } + ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: V4DI, V4DF. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvftintrm_l_d(__m256d _1) { + return (__m256i)__builtin_lasx_xvftintrm_l_d((v4f64)_1); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V8SI, V4DF, V4DF. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvftint_w_d(__m256d _1, __m256d _2) { + return (__m256i)__builtin_lasx_xvftint_w_d((v4f64)_1, (v4f64)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V8SF, V4DI, V4DI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 + __lasx_xvffint_s_l(__m256i _1, __m256i _2) { + return (__m256)__builtin_lasx_xvffint_s_l((v4i64)_1, (v4i64)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V8SI, V4DF, V4DF. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvftintrz_w_d(__m256d _1, __m256d _2) { + return (__m256i)__builtin_lasx_xvftintrz_w_d((v4f64)_1, (v4f64)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V8SI, V4DF, V4DF. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvftintrp_w_d(__m256d _1, __m256d _2) { + return (__m256i)__builtin_lasx_xvftintrp_w_d((v4f64)_1, (v4f64)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V8SI, V4DF, V4DF. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvftintrm_w_d(__m256d _1, __m256d _2) { + return (__m256i)__builtin_lasx_xvftintrm_w_d((v4f64)_1, (v4f64)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V8SI, V4DF, V4DF. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvftintrne_w_d(__m256d _1, __m256d _2) { + return (__m256i)__builtin_lasx_xvftintrne_w_d((v4f64)_1, (v4f64)_2); + } + ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: V4DI, V8SF. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvftinth_l_s(__m256 _1) { + return (__m256i)__builtin_lasx_xvftinth_l_s((v8f32)_1); + } + ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: V4DI, V8SF. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvftintl_l_s(__m256 _1) { + return (__m256i)__builtin_lasx_xvftintl_l_s((v8f32)_1); + } + ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: V4DF, V8SI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d + __lasx_xvffinth_d_w(__m256i _1) { + return (__m256d)__builtin_lasx_xvffinth_d_w((v8i32)_1); + } + ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: V4DF, V8SI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d + __lasx_xvffintl_d_w(__m256i _1) { + return (__m256d)__builtin_lasx_xvffintl_d_w((v8i32)_1); + } + ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: V4DI, V8SF. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvftintrzh_l_s(__m256 _1) { + return (__m256i)__builtin_lasx_xvftintrzh_l_s((v8f32)_1); + } + ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: V4DI, V8SF. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvftintrzl_l_s(__m256 _1) { + return (__m256i)__builtin_lasx_xvftintrzl_l_s((v8f32)_1); + } + ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: V4DI, V8SF. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvftintrph_l_s(__m256 _1) { + return (__m256i)__builtin_lasx_xvftintrph_l_s((v8f32)_1); + } + ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: V4DI, V8SF. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvftintrpl_l_s(__m256 _1) { + return (__m256i)__builtin_lasx_xvftintrpl_l_s((v8f32)_1); + } + ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: V4DI, V8SF. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvftintrmh_l_s(__m256 _1) { + return (__m256i)__builtin_lasx_xvftintrmh_l_s((v8f32)_1); + } + ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: V4DI, V8SF. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvftintrml_l_s(__m256 _1) { + return (__m256i)__builtin_lasx_xvftintrml_l_s((v8f32)_1); + } + ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: V4DI, V8SF. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvftintrneh_l_s(__m256 _1) { + return (__m256i)__builtin_lasx_xvftintrneh_l_s((v8f32)_1); + } + ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: V4DI, V8SF. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvftintrnel_l_s(__m256 _1) { + return (__m256i)__builtin_lasx_xvftintrnel_l_s((v8f32)_1); + } + ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: V8SF, V8SF. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 + __lasx_xvfrintrne_s(__m256 _1) { + return (__m256)__builtin_lasx_xvfrintrne_s((v8f32)_1); + } + ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: V4DF, V4DF. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d + __lasx_xvfrintrne_d(__m256d _1) { + return (__m256d)__builtin_lasx_xvfrintrne_d((v4f64)_1); + } + ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: V8SF, V8SF. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 + __lasx_xvfrintrz_s(__m256 _1) { + return (__m256)__builtin_lasx_xvfrintrz_s((v8f32)_1); + } + ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: V4DF, V4DF. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d + __lasx_xvfrintrz_d(__m256d _1) { + return (__m256d)__builtin_lasx_xvfrintrz_d((v4f64)_1); + } + ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: V8SF, V8SF. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 + __lasx_xvfrintrp_s(__m256 _1) { + return (__m256)__builtin_lasx_xvfrintrp_s((v8f32)_1); + } + ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: V4DF, V4DF. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d + __lasx_xvfrintrp_d(__m256d _1) { + return (__m256d)__builtin_lasx_xvfrintrp_d((v4f64)_1); + } + ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: V8SF, V8SF. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 + __lasx_xvfrintrm_s(__m256 _1) { + return (__m256)__builtin_lasx_xvfrintrm_s((v8f32)_1); + } + ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: V4DF, V4DF. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d + __lasx_xvfrintrm_d(__m256d _1) { + return (__m256d)__builtin_lasx_xvfrintrm_d((v4f64)_1); + } + ++/* Assembly instruction format: xd, rj, si12. */ ++/* Data types in instruction templates: V32QI, CVPOINTER, SI. */ + #define __lasx_xvld(/*void **/ _1, /*si12*/ _2) \ + ((__m256i)__builtin_lasx_xvld((void const *)(_1), (_2))) + ++/* Assembly instruction format: xd, rj, si12. */ ++/* Data types in instruction templates: VOID, V32QI, CVPOINTER, SI. */ + #define __lasx_xvst(/*__m256i*/ _1, /*void **/ _2, /*si12*/ _3) \ + ((void)__builtin_lasx_xvst((v32i8)(_1), (void *)(_2), (_3))) + ++/* Assembly instruction format: xd, rj, si8, idx. */ ++/* Data types in instruction templates: VOID, V32QI, CVPOINTER, SI, UQI. */ + #define __lasx_xvstelm_b(/*__m256i*/ _1, /*void **/ _2, /*si8*/ _3, \ + /*idx*/ _4) \ + ((void)__builtin_lasx_xvstelm_b((v32i8)(_1), (void *)(_2), (_3), (_4))) + ++/* Assembly instruction format: xd, rj, si8, idx. */ ++/* Data types in instruction templates: VOID, V16HI, CVPOINTER, SI, UQI. */ + #define __lasx_xvstelm_h(/*__m256i*/ _1, /*void **/ _2, /*si8*/ _3, \ + /*idx*/ _4) \ + ((void)__builtin_lasx_xvstelm_h((v16i16)(_1), (void *)(_2), (_3), (_4))) + ++/* Assembly instruction format: xd, rj, si8, idx. */ ++/* Data types in instruction templates: VOID, V8SI, CVPOINTER, SI, UQI. */ + #define __lasx_xvstelm_w(/*__m256i*/ _1, /*void **/ _2, /*si8*/ _3, \ + /*idx*/ _4) \ + ((void)__builtin_lasx_xvstelm_w((v8i32)(_1), (void *)(_2), (_3), (_4))) + ++/* Assembly instruction format: xd, rj, si8, idx. */ ++/* Data types in instruction templates: VOID, V4DI, CVPOINTER, SI, UQI. */ + #define __lasx_xvstelm_d(/*__m256i*/ _1, /*void **/ _2, /*si8*/ _3, \ + /*idx*/ _4) \ + ((void)__builtin_lasx_xvstelm_d((v4i64)(_1), (void *)(_2), (_3), (_4))) + ++/* Assembly instruction format: xd, xj, ui3. */ ++/* Data types in instruction templates: V8SI, V8SI, V8SI, UQI. */ + #define __lasx_xvinsve0_w(/*__m256i*/ _1, /*__m256i*/ _2, /*ui3*/ _3) \ + ((__m256i)__builtin_lasx_xvinsve0_w((v8i32)(_1), (v8i32)(_2), (_3))) + ++/* Assembly instruction format: xd, xj, ui2. */ ++/* Data types in instruction templates: V4DI, V4DI, V4DI, UQI. */ + #define __lasx_xvinsve0_d(/*__m256i*/ _1, /*__m256i*/ _2, /*ui2*/ _3) \ + ((__m256i)__builtin_lasx_xvinsve0_d((v4i64)(_1), (v4i64)(_2), (_3))) + ++/* Assembly instruction format: xd, xj, ui3. */ ++/* Data types in instruction templates: V8SI, V8SI, UQI. */ + #define __lasx_xvpickve_w(/*__m256i*/ _1, /*ui3*/ _2) \ + ((__m256i)__builtin_lasx_xvpickve_w((v8i32)(_1), (_2))) + ++/* Assembly instruction format: xd, xj, ui2. */ ++/* Data types in instruction templates: V4DI, V4DI, UQI. */ + #define __lasx_xvpickve_d(/*__m256i*/ _1, /*ui2*/ _2) \ + ((__m256i)__builtin_lasx_xvpickve_d((v4i64)(_1), (_2))) + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V32QI, V16HI, V16HI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvssrlrn_b_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvssrlrn_b_h((v16i16)_1, (v16i16)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V16HI, V8SI, V8SI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvssrlrn_h_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvssrlrn_h_w((v8i32)_1, (v8i32)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V8SI, V4DI, V4DI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvssrlrn_w_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvssrlrn_w_d((v4i64)_1, (v4i64)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V32QI, V16HI, V16HI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvssrln_b_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvssrln_b_h((v16i16)_1, (v16i16)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V16HI, V8SI, V8SI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvssrln_h_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvssrln_h_w((v8i32)_1, (v8i32)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V8SI, V4DI, V4DI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvssrln_w_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvssrln_w_d((v4i64)_1, (v4i64)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V32QI, V32QI, V32QI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvorn_v(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvorn_v((v32i8)_1, (v32i8)_2); + } + ++/* Assembly instruction format: xd, i13. */ ++/* Data types in instruction templates: V4DI, HI. */ + #define __lasx_xvldi(/*i13*/ _1) ((__m256i)__builtin_lasx_xvldi((_1))) + ++/* Assembly instruction format: xd, rj, rk. */ ++/* Data types in instruction templates: V32QI, CVPOINTER, DI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvldx(void const *_1, long int _2) { + return (__m256i)__builtin_lasx_xvldx((void const *)_1, (long int)_2); + } + ++/* Assembly instruction format: xd, rj, rk. */ ++/* Data types in instruction templates: VOID, V32QI, CVPOINTER, DI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) void + __lasx_xvstx(__m256i _1, void *_2, long int _3) { + return (void)__builtin_lasx_xvstx((v32i8)_1, (void *)_2, (long int)_3); + } + ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: UV4DI, UV4DI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvextl_qu_du(__m256i _1) { + return (__m256i)__builtin_lasx_xvextl_qu_du((v4u64)_1); + } + ++/* Assembly instruction format: xd, rj, ui3. */ ++/* Data types in instruction templates: V8SI, V8SI, SI, UQI. */ + #define __lasx_xvinsgr2vr_w(/*__m256i*/ _1, /*int*/ _2, /*ui3*/ _3) \ + ((__m256i)__builtin_lasx_xvinsgr2vr_w((v8i32)(_1), (int)(_2), (_3))) + ++/* Assembly instruction format: xd, rj, ui2. */ ++/* Data types in instruction templates: V4DI, V4DI, DI, UQI. */ + #define __lasx_xvinsgr2vr_d(/*__m256i*/ _1, /*long int*/ _2, /*ui2*/ _3) \ + ((__m256i)__builtin_lasx_xvinsgr2vr_d((v4i64)(_1), (long int)(_2), (_3))) + ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: V32QI, V32QI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvreplve0_b(__m256i _1) { + return (__m256i)__builtin_lasx_xvreplve0_b((v32i8)_1); + } + ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: V16HI, V16HI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvreplve0_h(__m256i _1) { + return (__m256i)__builtin_lasx_xvreplve0_h((v16i16)_1); + } + ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: V8SI, V8SI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvreplve0_w(__m256i _1) { + return (__m256i)__builtin_lasx_xvreplve0_w((v8i32)_1); + } + ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: V4DI, V4DI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvreplve0_d(__m256i _1) { + return (__m256i)__builtin_lasx_xvreplve0_d((v4i64)_1); + } + ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: V32QI, V32QI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvreplve0_q(__m256i _1) { + return (__m256i)__builtin_lasx_xvreplve0_q((v32i8)_1); + } + ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: V16HI, V32QI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_vext2xv_h_b(__m256i _1) { + return (__m256i)__builtin_lasx_vext2xv_h_b((v32i8)_1); + } + ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: V8SI, V16HI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_vext2xv_w_h(__m256i _1) { + return (__m256i)__builtin_lasx_vext2xv_w_h((v16i16)_1); + } + ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: V4DI, V8SI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_vext2xv_d_w(__m256i _1) { + return (__m256i)__builtin_lasx_vext2xv_d_w((v8i32)_1); + } + ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: V8SI, V32QI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_vext2xv_w_b(__m256i _1) { + return (__m256i)__builtin_lasx_vext2xv_w_b((v32i8)_1); + } + ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: V4DI, V16HI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_vext2xv_d_h(__m256i _1) { + return (__m256i)__builtin_lasx_vext2xv_d_h((v16i16)_1); + } + ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: V4DI, V32QI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_vext2xv_d_b(__m256i _1) { + return (__m256i)__builtin_lasx_vext2xv_d_b((v32i8)_1); + } + ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: V16HI, V32QI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_vext2xv_hu_bu(__m256i _1) { + return (__m256i)__builtin_lasx_vext2xv_hu_bu((v32i8)_1); + } + ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: V8SI, V16HI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_vext2xv_wu_hu(__m256i _1) { + return (__m256i)__builtin_lasx_vext2xv_wu_hu((v16i16)_1); + } + ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: V4DI, V8SI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_vext2xv_du_wu(__m256i _1) { + return (__m256i)__builtin_lasx_vext2xv_du_wu((v8i32)_1); + } + ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: V8SI, V32QI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_vext2xv_wu_bu(__m256i _1) { + return (__m256i)__builtin_lasx_vext2xv_wu_bu((v32i8)_1); + } + ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: V4DI, V16HI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_vext2xv_du_hu(__m256i _1) { + return (__m256i)__builtin_lasx_vext2xv_du_hu((v16i16)_1); + } + ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: V4DI, V32QI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_vext2xv_du_bu(__m256i _1) { + return (__m256i)__builtin_lasx_vext2xv_du_bu((v32i8)_1); + } + ++/* Assembly instruction format: xd, xj, ui8. */ ++/* Data types in instruction templates: V32QI, V32QI, V32QI, USI. */ + #define __lasx_xvpermi_q(/*__m256i*/ _1, /*__m256i*/ _2, /*ui8*/ _3) \ + ((__m256i)__builtin_lasx_xvpermi_q((v32i8)(_1), (v32i8)(_2), (_3))) + ++/* Assembly instruction format: xd, xj, ui8. */ ++/* Data types in instruction templates: V4DI, V4DI, USI. */ + #define __lasx_xvpermi_d(/*__m256i*/ _1, /*ui8*/ _2) \ + ((__m256i)__builtin_lasx_xvpermi_d((v4i64)(_1), (_2))) + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V8SI, V8SI, V8SI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvperm_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvperm_w((v8i32)_1, (v8i32)_2); + } + ++/* Assembly instruction format: xd, rj, si12. */ ++/* Data types in instruction templates: V32QI, CVPOINTER, SI. */ + #define __lasx_xvldrepl_b(/*void **/ _1, /*si12*/ _2) \ + ((__m256i)__builtin_lasx_xvldrepl_b((void const *)(_1), (_2))) + ++/* Assembly instruction format: xd, rj, si11. */ ++/* Data types in instruction templates: V16HI, CVPOINTER, SI. */ + #define __lasx_xvldrepl_h(/*void **/ _1, /*si11*/ _2) \ + ((__m256i)__builtin_lasx_xvldrepl_h((void const *)(_1), (_2))) + ++/* Assembly instruction format: xd, rj, si10. */ ++/* Data types in instruction templates: V8SI, CVPOINTER, SI. */ + #define __lasx_xvldrepl_w(/*void **/ _1, /*si10*/ _2) \ + ((__m256i)__builtin_lasx_xvldrepl_w((void const *)(_1), (_2))) + ++/* Assembly instruction format: xd, rj, si9. */ ++/* Data types in instruction templates: V4DI, CVPOINTER, SI. */ + #define __lasx_xvldrepl_d(/*void **/ _1, /*si9*/ _2) \ + ((__m256i)__builtin_lasx_xvldrepl_d((void const *)(_1), (_2))) + ++/* Assembly instruction format: rd, xj, ui3. */ ++/* Data types in instruction templates: SI, V8SI, UQI. */ + #define __lasx_xvpickve2gr_w(/*__m256i*/ _1, /*ui3*/ _2) \ + ((int)__builtin_lasx_xvpickve2gr_w((v8i32)(_1), (_2))) + ++/* Assembly instruction format: rd, xj, ui3. */ ++/* Data types in instruction templates: USI, V8SI, UQI. */ + #define __lasx_xvpickve2gr_wu(/*__m256i*/ _1, /*ui3*/ _2) \ + ((unsigned int)__builtin_lasx_xvpickve2gr_wu((v8i32)(_1), (_2))) + ++/* Assembly instruction format: rd, xj, ui2. */ ++/* Data types in instruction templates: DI, V4DI, UQI. */ + #define __lasx_xvpickve2gr_d(/*__m256i*/ _1, /*ui2*/ _2) \ + ((long int)__builtin_lasx_xvpickve2gr_d((v4i64)(_1), (_2))) + ++/* Assembly instruction format: rd, xj, ui2. */ ++/* Data types in instruction templates: UDI, V4DI, UQI. */ + #define __lasx_xvpickve2gr_du(/*__m256i*/ _1, /*ui2*/ _2) \ + ((unsigned long int)__builtin_lasx_xvpickve2gr_du((v4i64)(_1), (_2))) + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DI, V4DI, V4DI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvaddwev_q_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvaddwev_q_d((v4i64)_1, (v4i64)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DI, V8SI, V8SI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvaddwev_d_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvaddwev_d_w((v8i32)_1, (v8i32)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V8SI, V16HI, V16HI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvaddwev_w_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvaddwev_w_h((v16i16)_1, (v16i16)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V16HI, V32QI, V32QI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvaddwev_h_b(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvaddwev_h_b((v32i8)_1, (v32i8)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DI, UV4DI, UV4DI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvaddwev_q_du(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvaddwev_q_du((v4u64)_1, (v4u64)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DI, UV8SI, UV8SI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvaddwev_d_wu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvaddwev_d_wu((v8u32)_1, (v8u32)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V8SI, UV16HI, UV16HI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvaddwev_w_hu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvaddwev_w_hu((v16u16)_1, (v16u16)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V16HI, UV32QI, UV32QI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvaddwev_h_bu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvaddwev_h_bu((v32u8)_1, (v32u8)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DI, V4DI, V4DI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsubwev_q_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsubwev_q_d((v4i64)_1, (v4i64)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DI, V8SI, V8SI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsubwev_d_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsubwev_d_w((v8i32)_1, (v8i32)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V8SI, V16HI, V16HI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsubwev_w_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsubwev_w_h((v16i16)_1, (v16i16)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V16HI, V32QI, V32QI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsubwev_h_b(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsubwev_h_b((v32i8)_1, (v32i8)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DI, UV4DI, UV4DI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsubwev_q_du(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsubwev_q_du((v4u64)_1, (v4u64)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DI, UV8SI, UV8SI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsubwev_d_wu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsubwev_d_wu((v8u32)_1, (v8u32)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V8SI, UV16HI, UV16HI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsubwev_w_hu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsubwev_w_hu((v16u16)_1, (v16u16)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V16HI, UV32QI, UV32QI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsubwev_h_bu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsubwev_h_bu((v32u8)_1, (v32u8)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DI, V4DI, V4DI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmulwev_q_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmulwev_q_d((v4i64)_1, (v4i64)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DI, V8SI, V8SI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmulwev_d_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmulwev_d_w((v8i32)_1, (v8i32)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V8SI, V16HI, V16HI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmulwev_w_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmulwev_w_h((v16i16)_1, (v16i16)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V16HI, V32QI, V32QI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmulwev_h_b(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmulwev_h_b((v32i8)_1, (v32i8)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DI, UV4DI, UV4DI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmulwev_q_du(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmulwev_q_du((v4u64)_1, (v4u64)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DI, UV8SI, UV8SI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmulwev_d_wu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmulwev_d_wu((v8u32)_1, (v8u32)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V8SI, UV16HI, UV16HI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmulwev_w_hu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmulwev_w_hu((v16u16)_1, (v16u16)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V16HI, UV32QI, UV32QI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmulwev_h_bu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmulwev_h_bu((v32u8)_1, (v32u8)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DI, V4DI, V4DI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvaddwod_q_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvaddwod_q_d((v4i64)_1, (v4i64)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DI, V8SI, V8SI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvaddwod_d_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvaddwod_d_w((v8i32)_1, (v8i32)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V8SI, V16HI, V16HI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvaddwod_w_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvaddwod_w_h((v16i16)_1, (v16i16)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V16HI, V32QI, V32QI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvaddwod_h_b(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvaddwod_h_b((v32i8)_1, (v32i8)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DI, UV4DI, UV4DI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvaddwod_q_du(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvaddwod_q_du((v4u64)_1, (v4u64)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DI, UV8SI, UV8SI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvaddwod_d_wu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvaddwod_d_wu((v8u32)_1, (v8u32)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V8SI, UV16HI, UV16HI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvaddwod_w_hu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvaddwod_w_hu((v16u16)_1, (v16u16)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V16HI, UV32QI, UV32QI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvaddwod_h_bu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvaddwod_h_bu((v32u8)_1, (v32u8)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DI, V4DI, V4DI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsubwod_q_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsubwod_q_d((v4i64)_1, (v4i64)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DI, V8SI, V8SI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsubwod_d_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsubwod_d_w((v8i32)_1, (v8i32)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V8SI, V16HI, V16HI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsubwod_w_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsubwod_w_h((v16i16)_1, (v16i16)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V16HI, V32QI, V32QI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsubwod_h_b(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsubwod_h_b((v32i8)_1, (v32i8)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DI, UV4DI, UV4DI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsubwod_q_du(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsubwod_q_du((v4u64)_1, (v4u64)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DI, UV8SI, UV8SI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsubwod_d_wu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsubwod_d_wu((v8u32)_1, (v8u32)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V8SI, UV16HI, UV16HI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsubwod_w_hu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsubwod_w_hu((v16u16)_1, (v16u16)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V16HI, UV32QI, UV32QI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsubwod_h_bu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsubwod_h_bu((v32u8)_1, (v32u8)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DI, V4DI, V4DI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmulwod_q_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmulwod_q_d((v4i64)_1, (v4i64)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DI, V8SI, V8SI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmulwod_d_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmulwod_d_w((v8i32)_1, (v8i32)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V8SI, V16HI, V16HI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmulwod_w_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmulwod_w_h((v16i16)_1, (v16i16)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V16HI, V32QI, V32QI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmulwod_h_b(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmulwod_h_b((v32i8)_1, (v32i8)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DI, UV4DI, UV4DI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmulwod_q_du(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmulwod_q_du((v4u64)_1, (v4u64)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DI, UV8SI, UV8SI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmulwod_d_wu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmulwod_d_wu((v8u32)_1, (v8u32)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V8SI, UV16HI, UV16HI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmulwod_w_hu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmulwod_w_hu((v16u16)_1, (v16u16)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V16HI, UV32QI, UV32QI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmulwod_h_bu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmulwod_h_bu((v32u8)_1, (v32u8)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DI, UV8SI, V8SI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvaddwev_d_wu_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvaddwev_d_wu_w((v8u32)_1, (v8i32)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V8SI, UV16HI, V16HI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvaddwev_w_hu_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvaddwev_w_hu_h((v16u16)_1, (v16i16)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V16HI, UV32QI, V32QI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvaddwev_h_bu_b(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvaddwev_h_bu_b((v32u8)_1, (v32i8)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DI, UV8SI, V8SI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmulwev_d_wu_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmulwev_d_wu_w((v8u32)_1, (v8i32)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V8SI, UV16HI, V16HI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmulwev_w_hu_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmulwev_w_hu_h((v16u16)_1, (v16i16)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V16HI, UV32QI, V32QI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmulwev_h_bu_b(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmulwev_h_bu_b((v32u8)_1, (v32i8)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DI, UV8SI, V8SI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvaddwod_d_wu_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvaddwod_d_wu_w((v8u32)_1, (v8i32)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V8SI, UV16HI, V16HI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvaddwod_w_hu_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvaddwod_w_hu_h((v16u16)_1, (v16i16)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V16HI, UV32QI, V32QI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvaddwod_h_bu_b(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvaddwod_h_bu_b((v32u8)_1, (v32i8)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DI, UV8SI, V8SI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmulwod_d_wu_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmulwod_d_wu_w((v8u32)_1, (v8i32)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V8SI, UV16HI, V16HI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmulwod_w_hu_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmulwod_w_hu_h((v16u16)_1, (v16i16)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V16HI, UV32QI, V32QI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmulwod_h_bu_b(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmulwod_h_bu_b((v32u8)_1, (v32i8)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DI, V4DI, V4DI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvhaddw_q_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvhaddw_q_d((v4i64)_1, (v4i64)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: UV4DI, UV4DI, UV4DI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvhaddw_qu_du(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvhaddw_qu_du((v4u64)_1, (v4u64)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DI, V4DI, V4DI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvhsubw_q_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvhsubw_q_d((v4i64)_1, (v4i64)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: UV4DI, UV4DI, UV4DI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvhsubw_qu_du(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvhsubw_qu_du((v4u64)_1, (v4u64)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DI, V4DI, V4DI, V4DI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmaddwev_q_d(__m256i _1, __m256i _2, __m256i _3) { + return (__m256i)__builtin_lasx_xvmaddwev_q_d((v4i64)_1, (v4i64)_2, (v4i64)_3); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DI, V4DI, V8SI, V8SI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmaddwev_d_w(__m256i _1, __m256i _2, __m256i _3) { + return (__m256i)__builtin_lasx_xvmaddwev_d_w((v4i64)_1, (v8i32)_2, (v8i32)_3); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V8SI, V8SI, V16HI, V16HI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmaddwev_w_h(__m256i _1, __m256i _2, __m256i _3) { +@@ -3155,6 +4332,8 @@ extern __inline + (v16i16)_3); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V16HI, V16HI, V32QI, V32QI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmaddwev_h_b(__m256i _1, __m256i _2, __m256i _3) { +@@ -3162,6 +4341,8 @@ extern __inline + (v32i8)_3); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: UV4DI, UV4DI, UV4DI, UV4DI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmaddwev_q_du(__m256i _1, __m256i _2, __m256i _3) { +@@ -3169,6 +4350,8 @@ extern __inline + (v4u64)_3); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: UV4DI, UV4DI, UV8SI, UV8SI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmaddwev_d_wu(__m256i _1, __m256i _2, __m256i _3) { +@@ -3176,6 +4359,8 @@ extern __inline + (v8u32)_3); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: UV8SI, UV8SI, UV16HI, UV16HI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmaddwev_w_hu(__m256i _1, __m256i _2, __m256i _3) { +@@ -3183,6 +4368,8 @@ extern __inline + (v16u16)_3); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: UV16HI, UV16HI, UV32QI, UV32QI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmaddwev_h_bu(__m256i _1, __m256i _2, __m256i _3) { +@@ -3190,18 +4377,24 @@ extern __inline + (v32u8)_3); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DI, V4DI, V4DI, V4DI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmaddwod_q_d(__m256i _1, __m256i _2, __m256i _3) { + return (__m256i)__builtin_lasx_xvmaddwod_q_d((v4i64)_1, (v4i64)_2, (v4i64)_3); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DI, V4DI, V8SI, V8SI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmaddwod_d_w(__m256i _1, __m256i _2, __m256i _3) { + return (__m256i)__builtin_lasx_xvmaddwod_d_w((v4i64)_1, (v8i32)_2, (v8i32)_3); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V8SI, V8SI, V16HI, V16HI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmaddwod_w_h(__m256i _1, __m256i _2, __m256i _3) { +@@ -3209,6 +4402,8 @@ extern __inline + (v16i16)_3); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V16HI, V16HI, V32QI, V32QI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmaddwod_h_b(__m256i _1, __m256i _2, __m256i _3) { +@@ -3216,6 +4411,8 @@ extern __inline + (v32i8)_3); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: UV4DI, UV4DI, UV4DI, UV4DI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmaddwod_q_du(__m256i _1, __m256i _2, __m256i _3) { +@@ -3223,6 +4420,8 @@ extern __inline + (v4u64)_3); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: UV4DI, UV4DI, UV8SI, UV8SI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmaddwod_d_wu(__m256i _1, __m256i _2, __m256i _3) { +@@ -3230,6 +4429,8 @@ extern __inline + (v8u32)_3); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: UV8SI, UV8SI, UV16HI, UV16HI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmaddwod_w_hu(__m256i _1, __m256i _2, __m256i _3) { +@@ -3237,6 +4438,8 @@ extern __inline + (v16u16)_3); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: UV16HI, UV16HI, UV32QI, UV32QI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmaddwod_h_bu(__m256i _1, __m256i _2, __m256i _3) { +@@ -3244,6 +4447,8 @@ extern __inline + (v32u8)_3); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DI, V4DI, UV4DI, V4DI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmaddwev_q_du_d(__m256i _1, __m256i _2, __m256i _3) { +@@ -3251,6 +4456,8 @@ extern __inline + (v4i64)_3); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DI, V4DI, UV8SI, V8SI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmaddwev_d_wu_w(__m256i _1, __m256i _2, __m256i _3) { +@@ -3258,6 +4465,8 @@ extern __inline + (v8i32)_3); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V8SI, V8SI, UV16HI, V16HI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmaddwev_w_hu_h(__m256i _1, __m256i _2, __m256i _3) { +@@ -3265,6 +4474,8 @@ extern __inline + (v16i16)_3); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V16HI, V16HI, UV32QI, V32QI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmaddwev_h_bu_b(__m256i _1, __m256i _2, __m256i _3) { +@@ -3272,6 +4483,8 @@ extern __inline + (v32i8)_3); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DI, V4DI, UV4DI, V4DI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmaddwod_q_du_d(__m256i _1, __m256i _2, __m256i _3) { +@@ -3279,6 +4492,8 @@ extern __inline + (v4i64)_3); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DI, V4DI, UV8SI, V8SI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmaddwod_d_wu_w(__m256i _1, __m256i _2, __m256i _3) { +@@ -3286,6 +4501,8 @@ extern __inline + (v8i32)_3); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V8SI, V8SI, UV16HI, V16HI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmaddwod_w_hu_h(__m256i _1, __m256i _2, __m256i _3) { +@@ -3293,6 +4510,8 @@ extern __inline + (v16i16)_3); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V16HI, V16HI, UV32QI, V32QI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmaddwod_h_bu_b(__m256i _1, __m256i _2, __m256i _3) { +@@ -3300,585 +4519,851 @@ extern __inline + (v32i8)_3); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V32QI, V32QI, V32QI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvrotr_b(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvrotr_b((v32i8)_1, (v32i8)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V16HI, V16HI, V16HI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvrotr_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvrotr_h((v16i16)_1, (v16i16)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V8SI, V8SI, V8SI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvrotr_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvrotr_w((v8i32)_1, (v8i32)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DI, V4DI, V4DI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvrotr_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvrotr_d((v4i64)_1, (v4i64)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DI, V4DI, V4DI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvadd_q(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvadd_q((v4i64)_1, (v4i64)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DI, V4DI, V4DI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsub_q(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsub_q((v4i64)_1, (v4i64)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DI, UV4DI, V4DI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvaddwev_q_du_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvaddwev_q_du_d((v4u64)_1, (v4i64)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DI, UV4DI, V4DI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvaddwod_q_du_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvaddwod_q_du_d((v4u64)_1, (v4i64)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DI, UV4DI, V4DI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmulwev_q_du_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmulwev_q_du_d((v4u64)_1, (v4i64)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DI, UV4DI, V4DI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmulwod_q_du_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmulwod_q_du_d((v4u64)_1, (v4i64)_2); + } + ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: V32QI, V32QI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmskgez_b(__m256i _1) { + return (__m256i)__builtin_lasx_xvmskgez_b((v32i8)_1); + } + ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: V32QI, V32QI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmsknz_b(__m256i _1) { + return (__m256i)__builtin_lasx_xvmsknz_b((v32i8)_1); + } + ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: V16HI, V32QI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvexth_h_b(__m256i _1) { + return (__m256i)__builtin_lasx_xvexth_h_b((v32i8)_1); + } + ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: V8SI, V16HI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvexth_w_h(__m256i _1) { + return (__m256i)__builtin_lasx_xvexth_w_h((v16i16)_1); + } + ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: V4DI, V8SI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvexth_d_w(__m256i _1) { + return (__m256i)__builtin_lasx_xvexth_d_w((v8i32)_1); + } + ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: V4DI, V4DI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvexth_q_d(__m256i _1) { + return (__m256i)__builtin_lasx_xvexth_q_d((v4i64)_1); + } + ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: UV16HI, UV32QI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvexth_hu_bu(__m256i _1) { + return (__m256i)__builtin_lasx_xvexth_hu_bu((v32u8)_1); + } + ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: UV8SI, UV16HI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvexth_wu_hu(__m256i _1) { + return (__m256i)__builtin_lasx_xvexth_wu_hu((v16u16)_1); + } + ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: UV4DI, UV8SI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvexth_du_wu(__m256i _1) { + return (__m256i)__builtin_lasx_xvexth_du_wu((v8u32)_1); + } + ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: UV4DI, UV4DI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvexth_qu_du(__m256i _1) { + return (__m256i)__builtin_lasx_xvexth_qu_du((v4u64)_1); + } + ++/* Assembly instruction format: xd, xj, ui3. */ ++/* Data types in instruction templates: V32QI, V32QI, UQI. */ + #define __lasx_xvrotri_b(/*__m256i*/ _1, /*ui3*/ _2) \ + ((__m256i)__builtin_lasx_xvrotri_b((v32i8)(_1), (_2))) + ++/* Assembly instruction format: xd, xj, ui4. */ ++/* Data types in instruction templates: V16HI, V16HI, UQI. */ + #define __lasx_xvrotri_h(/*__m256i*/ _1, /*ui4*/ _2) \ + ((__m256i)__builtin_lasx_xvrotri_h((v16i16)(_1), (_2))) + ++/* Assembly instruction format: xd, xj, ui5. */ ++/* Data types in instruction templates: V8SI, V8SI, UQI. */ + #define __lasx_xvrotri_w(/*__m256i*/ _1, /*ui5*/ _2) \ + ((__m256i)__builtin_lasx_xvrotri_w((v8i32)(_1), (_2))) + ++/* Assembly instruction format: xd, xj, ui6. */ ++/* Data types in instruction templates: V4DI, V4DI, UQI. */ + #define __lasx_xvrotri_d(/*__m256i*/ _1, /*ui6*/ _2) \ + ((__m256i)__builtin_lasx_xvrotri_d((v4i64)(_1), (_2))) + ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: V4DI, V4DI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvextl_q_d(__m256i _1) { + return (__m256i)__builtin_lasx_xvextl_q_d((v4i64)_1); + } + ++/* Assembly instruction format: xd, xj, ui4. */ ++/* Data types in instruction templates: V32QI, V32QI, V32QI, USI. */ + #define __lasx_xvsrlni_b_h(/*__m256i*/ _1, /*__m256i*/ _2, /*ui4*/ _3) \ + ((__m256i)__builtin_lasx_xvsrlni_b_h((v32i8)(_1), (v32i8)(_2), (_3))) + ++/* Assembly instruction format: xd, xj, ui5. */ ++/* Data types in instruction templates: V16HI, V16HI, V16HI, USI. */ + #define __lasx_xvsrlni_h_w(/*__m256i*/ _1, /*__m256i*/ _2, /*ui5*/ _3) \ + ((__m256i)__builtin_lasx_xvsrlni_h_w((v16i16)(_1), (v16i16)(_2), (_3))) + ++/* Assembly instruction format: xd, xj, ui6. */ ++/* Data types in instruction templates: V8SI, V8SI, V8SI, USI. */ + #define __lasx_xvsrlni_w_d(/*__m256i*/ _1, /*__m256i*/ _2, /*ui6*/ _3) \ + ((__m256i)__builtin_lasx_xvsrlni_w_d((v8i32)(_1), (v8i32)(_2), (_3))) + ++/* Assembly instruction format: xd, xj, ui7. */ ++/* Data types in instruction templates: V4DI, V4DI, V4DI, USI. */ + #define __lasx_xvsrlni_d_q(/*__m256i*/ _1, /*__m256i*/ _2, /*ui7*/ _3) \ + ((__m256i)__builtin_lasx_xvsrlni_d_q((v4i64)(_1), (v4i64)(_2), (_3))) + ++/* Assembly instruction format: xd, xj, ui4. */ ++/* Data types in instruction templates: V32QI, V32QI, V32QI, USI. */ + #define __lasx_xvsrlrni_b_h(/*__m256i*/ _1, /*__m256i*/ _2, /*ui4*/ _3) \ + ((__m256i)__builtin_lasx_xvsrlrni_b_h((v32i8)(_1), (v32i8)(_2), (_3))) + ++/* Assembly instruction format: xd, xj, ui5. */ ++/* Data types in instruction templates: V16HI, V16HI, V16HI, USI. */ + #define __lasx_xvsrlrni_h_w(/*__m256i*/ _1, /*__m256i*/ _2, /*ui5*/ _3) \ + ((__m256i)__builtin_lasx_xvsrlrni_h_w((v16i16)(_1), (v16i16)(_2), (_3))) + ++/* Assembly instruction format: xd, xj, ui6. */ ++/* Data types in instruction templates: V8SI, V8SI, V8SI, USI. */ + #define __lasx_xvsrlrni_w_d(/*__m256i*/ _1, /*__m256i*/ _2, /*ui6*/ _3) \ + ((__m256i)__builtin_lasx_xvsrlrni_w_d((v8i32)(_1), (v8i32)(_2), (_3))) + ++/* Assembly instruction format: xd, xj, ui7. */ ++/* Data types in instruction templates: V4DI, V4DI, V4DI, USI. */ + #define __lasx_xvsrlrni_d_q(/*__m256i*/ _1, /*__m256i*/ _2, /*ui7*/ _3) \ + ((__m256i)__builtin_lasx_xvsrlrni_d_q((v4i64)(_1), (v4i64)(_2), (_3))) + ++/* Assembly instruction format: xd, xj, ui4. */ ++/* Data types in instruction templates: V32QI, V32QI, V32QI, USI. */ + #define __lasx_xvssrlni_b_h(/*__m256i*/ _1, /*__m256i*/ _2, /*ui4*/ _3) \ + ((__m256i)__builtin_lasx_xvssrlni_b_h((v32i8)(_1), (v32i8)(_2), (_3))) + ++/* Assembly instruction format: xd, xj, ui5. */ ++/* Data types in instruction templates: V16HI, V16HI, V16HI, USI. */ + #define __lasx_xvssrlni_h_w(/*__m256i*/ _1, /*__m256i*/ _2, /*ui5*/ _3) \ + ((__m256i)__builtin_lasx_xvssrlni_h_w((v16i16)(_1), (v16i16)(_2), (_3))) + ++/* Assembly instruction format: xd, xj, ui6. */ ++/* Data types in instruction templates: V8SI, V8SI, V8SI, USI. */ + #define __lasx_xvssrlni_w_d(/*__m256i*/ _1, /*__m256i*/ _2, /*ui6*/ _3) \ + ((__m256i)__builtin_lasx_xvssrlni_w_d((v8i32)(_1), (v8i32)(_2), (_3))) + ++/* Assembly instruction format: xd, xj, ui7. */ ++/* Data types in instruction templates: V4DI, V4DI, V4DI, USI. */ + #define __lasx_xvssrlni_d_q(/*__m256i*/ _1, /*__m256i*/ _2, /*ui7*/ _3) \ + ((__m256i)__builtin_lasx_xvssrlni_d_q((v4i64)(_1), (v4i64)(_2), (_3))) + ++/* Assembly instruction format: xd, xj, ui4. */ ++/* Data types in instruction templates: UV32QI, UV32QI, V32QI, USI. */ + #define __lasx_xvssrlni_bu_h(/*__m256i*/ _1, /*__m256i*/ _2, /*ui4*/ _3) \ + ((__m256i)__builtin_lasx_xvssrlni_bu_h((v32u8)(_1), (v32i8)(_2), (_3))) + ++/* Assembly instruction format: xd, xj, ui5. */ ++/* Data types in instruction templates: UV16HI, UV16HI, V16HI, USI. */ + #define __lasx_xvssrlni_hu_w(/*__m256i*/ _1, /*__m256i*/ _2, /*ui5*/ _3) \ + ((__m256i)__builtin_lasx_xvssrlni_hu_w((v16u16)(_1), (v16i16)(_2), (_3))) + ++/* Assembly instruction format: xd, xj, ui6. */ ++/* Data types in instruction templates: UV8SI, UV8SI, V8SI, USI. */ + #define __lasx_xvssrlni_wu_d(/*__m256i*/ _1, /*__m256i*/ _2, /*ui6*/ _3) \ + ((__m256i)__builtin_lasx_xvssrlni_wu_d((v8u32)(_1), (v8i32)(_2), (_3))) + ++/* Assembly instruction format: xd, xj, ui7. */ ++/* Data types in instruction templates: UV4DI, UV4DI, V4DI, USI. */ + #define __lasx_xvssrlni_du_q(/*__m256i*/ _1, /*__m256i*/ _2, /*ui7*/ _3) \ + ((__m256i)__builtin_lasx_xvssrlni_du_q((v4u64)(_1), (v4i64)(_2), (_3))) + ++/* Assembly instruction format: xd, xj, ui4. */ ++/* Data types in instruction templates: V32QI, V32QI, V32QI, USI. */ + #define __lasx_xvssrlrni_b_h(/*__m256i*/ _1, /*__m256i*/ _2, /*ui4*/ _3) \ + ((__m256i)__builtin_lasx_xvssrlrni_b_h((v32i8)(_1), (v32i8)(_2), (_3))) + ++/* Assembly instruction format: xd, xj, ui5. */ ++/* Data types in instruction templates: V16HI, V16HI, V16HI, USI. */ + #define __lasx_xvssrlrni_h_w(/*__m256i*/ _1, /*__m256i*/ _2, /*ui5*/ _3) \ + ((__m256i)__builtin_lasx_xvssrlrni_h_w((v16i16)(_1), (v16i16)(_2), (_3))) + ++/* Assembly instruction format: xd, xj, ui6. */ ++/* Data types in instruction templates: V8SI, V8SI, V8SI, USI. */ + #define __lasx_xvssrlrni_w_d(/*__m256i*/ _1, /*__m256i*/ _2, /*ui6*/ _3) \ + ((__m256i)__builtin_lasx_xvssrlrni_w_d((v8i32)(_1), (v8i32)(_2), (_3))) + ++/* Assembly instruction format: xd, xj, ui7. */ ++/* Data types in instruction templates: V4DI, V4DI, V4DI, USI. */ + #define __lasx_xvssrlrni_d_q(/*__m256i*/ _1, /*__m256i*/ _2, /*ui7*/ _3) \ + ((__m256i)__builtin_lasx_xvssrlrni_d_q((v4i64)(_1), (v4i64)(_2), (_3))) + ++/* Assembly instruction format: xd, xj, ui4. */ ++/* Data types in instruction templates: UV32QI, UV32QI, V32QI, USI. */ + #define __lasx_xvssrlrni_bu_h(/*__m256i*/ _1, /*__m256i*/ _2, /*ui4*/ _3) \ + ((__m256i)__builtin_lasx_xvssrlrni_bu_h((v32u8)(_1), (v32i8)(_2), (_3))) + ++/* Assembly instruction format: xd, xj, ui5. */ ++/* Data types in instruction templates: UV16HI, UV16HI, V16HI, USI. */ + #define __lasx_xvssrlrni_hu_w(/*__m256i*/ _1, /*__m256i*/ _2, /*ui5*/ _3) \ + ((__m256i)__builtin_lasx_xvssrlrni_hu_w((v16u16)(_1), (v16i16)(_2), (_3))) + ++/* Assembly instruction format: xd, xj, ui6. */ ++/* Data types in instruction templates: UV8SI, UV8SI, V8SI, USI. */ + #define __lasx_xvssrlrni_wu_d(/*__m256i*/ _1, /*__m256i*/ _2, /*ui6*/ _3) \ + ((__m256i)__builtin_lasx_xvssrlrni_wu_d((v8u32)(_1), (v8i32)(_2), (_3))) + ++/* Assembly instruction format: xd, xj, ui7. */ ++/* Data types in instruction templates: UV4DI, UV4DI, V4DI, USI. */ + #define __lasx_xvssrlrni_du_q(/*__m256i*/ _1, /*__m256i*/ _2, /*ui7*/ _3) \ + ((__m256i)__builtin_lasx_xvssrlrni_du_q((v4u64)(_1), (v4i64)(_2), (_3))) + ++/* Assembly instruction format: xd, xj, ui4. */ ++/* Data types in instruction templates: V32QI, V32QI, V32QI, USI. */ + #define __lasx_xvsrani_b_h(/*__m256i*/ _1, /*__m256i*/ _2, /*ui4*/ _3) \ + ((__m256i)__builtin_lasx_xvsrani_b_h((v32i8)(_1), (v32i8)(_2), (_3))) + ++/* Assembly instruction format: xd, xj, ui5. */ ++/* Data types in instruction templates: V16HI, V16HI, V16HI, USI. */ + #define __lasx_xvsrani_h_w(/*__m256i*/ _1, /*__m256i*/ _2, /*ui5*/ _3) \ + ((__m256i)__builtin_lasx_xvsrani_h_w((v16i16)(_1), (v16i16)(_2), (_3))) + ++/* Assembly instruction format: xd, xj, ui6. */ ++/* Data types in instruction templates: V8SI, V8SI, V8SI, USI. */ + #define __lasx_xvsrani_w_d(/*__m256i*/ _1, /*__m256i*/ _2, /*ui6*/ _3) \ + ((__m256i)__builtin_lasx_xvsrani_w_d((v8i32)(_1), (v8i32)(_2), (_3))) + ++/* Assembly instruction format: xd, xj, ui7. */ ++/* Data types in instruction templates: V4DI, V4DI, V4DI, USI. */ + #define __lasx_xvsrani_d_q(/*__m256i*/ _1, /*__m256i*/ _2, /*ui7*/ _3) \ + ((__m256i)__builtin_lasx_xvsrani_d_q((v4i64)(_1), (v4i64)(_2), (_3))) + ++/* Assembly instruction format: xd, xj, ui4. */ ++/* Data types in instruction templates: V32QI, V32QI, V32QI, USI. */ + #define __lasx_xvsrarni_b_h(/*__m256i*/ _1, /*__m256i*/ _2, /*ui4*/ _3) \ + ((__m256i)__builtin_lasx_xvsrarni_b_h((v32i8)(_1), (v32i8)(_2), (_3))) + ++/* Assembly instruction format: xd, xj, ui5. */ ++/* Data types in instruction templates: V16HI, V16HI, V16HI, USI. */ + #define __lasx_xvsrarni_h_w(/*__m256i*/ _1, /*__m256i*/ _2, /*ui5*/ _3) \ + ((__m256i)__builtin_lasx_xvsrarni_h_w((v16i16)(_1), (v16i16)(_2), (_3))) + ++/* Assembly instruction format: xd, xj, ui6. */ ++/* Data types in instruction templates: V8SI, V8SI, V8SI, USI. */ + #define __lasx_xvsrarni_w_d(/*__m256i*/ _1, /*__m256i*/ _2, /*ui6*/ _3) \ + ((__m256i)__builtin_lasx_xvsrarni_w_d((v8i32)(_1), (v8i32)(_2), (_3))) + ++/* Assembly instruction format: xd, xj, ui7. */ ++/* Data types in instruction templates: V4DI, V4DI, V4DI, USI. */ + #define __lasx_xvsrarni_d_q(/*__m256i*/ _1, /*__m256i*/ _2, /*ui7*/ _3) \ + ((__m256i)__builtin_lasx_xvsrarni_d_q((v4i64)(_1), (v4i64)(_2), (_3))) + ++/* Assembly instruction format: xd, xj, ui4. */ ++/* Data types in instruction templates: V32QI, V32QI, V32QI, USI. */ + #define __lasx_xvssrani_b_h(/*__m256i*/ _1, /*__m256i*/ _2, /*ui4*/ _3) \ + ((__m256i)__builtin_lasx_xvssrani_b_h((v32i8)(_1), (v32i8)(_2), (_3))) + ++/* Assembly instruction format: xd, xj, ui5. */ ++/* Data types in instruction templates: V16HI, V16HI, V16HI, USI. */ + #define __lasx_xvssrani_h_w(/*__m256i*/ _1, /*__m256i*/ _2, /*ui5*/ _3) \ + ((__m256i)__builtin_lasx_xvssrani_h_w((v16i16)(_1), (v16i16)(_2), (_3))) + ++/* Assembly instruction format: xd, xj, ui6. */ ++/* Data types in instruction templates: V8SI, V8SI, V8SI, USI. */ + #define __lasx_xvssrani_w_d(/*__m256i*/ _1, /*__m256i*/ _2, /*ui6*/ _3) \ + ((__m256i)__builtin_lasx_xvssrani_w_d((v8i32)(_1), (v8i32)(_2), (_3))) + ++/* Assembly instruction format: xd, xj, ui7. */ ++/* Data types in instruction templates: V4DI, V4DI, V4DI, USI. */ + #define __lasx_xvssrani_d_q(/*__m256i*/ _1, /*__m256i*/ _2, /*ui7*/ _3) \ + ((__m256i)__builtin_lasx_xvssrani_d_q((v4i64)(_1), (v4i64)(_2), (_3))) + ++/* Assembly instruction format: xd, xj, ui4. */ ++/* Data types in instruction templates: UV32QI, UV32QI, V32QI, USI. */ + #define __lasx_xvssrani_bu_h(/*__m256i*/ _1, /*__m256i*/ _2, /*ui4*/ _3) \ + ((__m256i)__builtin_lasx_xvssrani_bu_h((v32u8)(_1), (v32i8)(_2), (_3))) + ++/* Assembly instruction format: xd, xj, ui5. */ ++/* Data types in instruction templates: UV16HI, UV16HI, V16HI, USI. */ + #define __lasx_xvssrani_hu_w(/*__m256i*/ _1, /*__m256i*/ _2, /*ui5*/ _3) \ + ((__m256i)__builtin_lasx_xvssrani_hu_w((v16u16)(_1), (v16i16)(_2), (_3))) + ++/* Assembly instruction format: xd, xj, ui6. */ ++/* Data types in instruction templates: UV8SI, UV8SI, V8SI, USI. */ + #define __lasx_xvssrani_wu_d(/*__m256i*/ _1, /*__m256i*/ _2, /*ui6*/ _3) \ + ((__m256i)__builtin_lasx_xvssrani_wu_d((v8u32)(_1), (v8i32)(_2), (_3))) + ++/* Assembly instruction format: xd, xj, ui7. */ ++/* Data types in instruction templates: UV4DI, UV4DI, V4DI, USI. */ + #define __lasx_xvssrani_du_q(/*__m256i*/ _1, /*__m256i*/ _2, /*ui7*/ _3) \ + ((__m256i)__builtin_lasx_xvssrani_du_q((v4u64)(_1), (v4i64)(_2), (_3))) + ++/* Assembly instruction format: xd, xj, ui4. */ ++/* Data types in instruction templates: V32QI, V32QI, V32QI, USI. */ + #define __lasx_xvssrarni_b_h(/*__m256i*/ _1, /*__m256i*/ _2, /*ui4*/ _3) \ + ((__m256i)__builtin_lasx_xvssrarni_b_h((v32i8)(_1), (v32i8)(_2), (_3))) + ++/* Assembly instruction format: xd, xj, ui5. */ ++/* Data types in instruction templates: V16HI, V16HI, V16HI, USI. */ + #define __lasx_xvssrarni_h_w(/*__m256i*/ _1, /*__m256i*/ _2, /*ui5*/ _3) \ + ((__m256i)__builtin_lasx_xvssrarni_h_w((v16i16)(_1), (v16i16)(_2), (_3))) + ++/* Assembly instruction format: xd, xj, ui6. */ ++/* Data types in instruction templates: V8SI, V8SI, V8SI, USI. */ + #define __lasx_xvssrarni_w_d(/*__m256i*/ _1, /*__m256i*/ _2, /*ui6*/ _3) \ + ((__m256i)__builtin_lasx_xvssrarni_w_d((v8i32)(_1), (v8i32)(_2), (_3))) + ++/* Assembly instruction format: xd, xj, ui7. */ ++/* Data types in instruction templates: V4DI, V4DI, V4DI, USI. */ + #define __lasx_xvssrarni_d_q(/*__m256i*/ _1, /*__m256i*/ _2, /*ui7*/ _3) \ + ((__m256i)__builtin_lasx_xvssrarni_d_q((v4i64)(_1), (v4i64)(_2), (_3))) + ++/* Assembly instruction format: xd, xj, ui4. */ ++/* Data types in instruction templates: UV32QI, UV32QI, V32QI, USI. */ + #define __lasx_xvssrarni_bu_h(/*__m256i*/ _1, /*__m256i*/ _2, /*ui4*/ _3) \ + ((__m256i)__builtin_lasx_xvssrarni_bu_h((v32u8)(_1), (v32i8)(_2), (_3))) + ++/* Assembly instruction format: xd, xj, ui5. */ ++/* Data types in instruction templates: UV16HI, UV16HI, V16HI, USI. */ + #define __lasx_xvssrarni_hu_w(/*__m256i*/ _1, /*__m256i*/ _2, /*ui5*/ _3) \ + ((__m256i)__builtin_lasx_xvssrarni_hu_w((v16u16)(_1), (v16i16)(_2), (_3))) + ++/* Assembly instruction format: xd, xj, ui6. */ ++/* Data types in instruction templates: UV8SI, UV8SI, V8SI, USI. */ + #define __lasx_xvssrarni_wu_d(/*__m256i*/ _1, /*__m256i*/ _2, /*ui6*/ _3) \ + ((__m256i)__builtin_lasx_xvssrarni_wu_d((v8u32)(_1), (v8i32)(_2), (_3))) + ++/* Assembly instruction format: xd, xj, ui7. */ ++/* Data types in instruction templates: UV4DI, UV4DI, V4DI, USI. */ + #define __lasx_xvssrarni_du_q(/*__m256i*/ _1, /*__m256i*/ _2, /*ui7*/ _3) \ + ((__m256i)__builtin_lasx_xvssrarni_du_q((v4u64)(_1), (v4i64)(_2), (_3))) + ++/* Assembly instruction format: cd, xj. */ ++/* Data types in instruction templates: SI, UV32QI. */ + #define __lasx_xbnz_b(/*__m256i*/ _1) ((int)__builtin_lasx_xbnz_b((v32u8)(_1))) + ++/* Assembly instruction format: cd, xj. */ ++/* Data types in instruction templates: SI, UV4DI. */ + #define __lasx_xbnz_d(/*__m256i*/ _1) ((int)__builtin_lasx_xbnz_d((v4u64)(_1))) + ++/* Assembly instruction format: cd, xj. */ ++/* Data types in instruction templates: SI, UV16HI. */ + #define __lasx_xbnz_h(/*__m256i*/ _1) ((int)__builtin_lasx_xbnz_h((v16u16)(_1))) + ++/* Assembly instruction format: cd, xj. */ ++/* Data types in instruction templates: SI, UV32QI. */ + #define __lasx_xbnz_v(/*__m256i*/ _1) ((int)__builtin_lasx_xbnz_v((v32u8)(_1))) + ++/* Assembly instruction format: cd, xj. */ ++/* Data types in instruction templates: SI, UV8SI. */ + #define __lasx_xbnz_w(/*__m256i*/ _1) ((int)__builtin_lasx_xbnz_w((v8u32)(_1))) + ++/* Assembly instruction format: cd, xj. */ ++/* Data types in instruction templates: SI, UV32QI. */ + #define __lasx_xbz_b(/*__m256i*/ _1) ((int)__builtin_lasx_xbz_b((v32u8)(_1))) + ++/* Assembly instruction format: cd, xj. */ ++/* Data types in instruction templates: SI, UV4DI. */ + #define __lasx_xbz_d(/*__m256i*/ _1) ((int)__builtin_lasx_xbz_d((v4u64)(_1))) + ++/* Assembly instruction format: cd, xj. */ ++/* Data types in instruction templates: SI, UV16HI. */ + #define __lasx_xbz_h(/*__m256i*/ _1) ((int)__builtin_lasx_xbz_h((v16u16)(_1))) + ++/* Assembly instruction format: cd, xj. */ ++/* Data types in instruction templates: SI, UV32QI. */ + #define __lasx_xbz_v(/*__m256i*/ _1) ((int)__builtin_lasx_xbz_v((v32u8)(_1))) + ++/* Assembly instruction format: cd, xj. */ ++/* Data types in instruction templates: SI, UV8SI. */ + #define __lasx_xbz_w(/*__m256i*/ _1) ((int)__builtin_lasx_xbz_w((v8u32)(_1))) + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DI, V4DF, V4DF. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvfcmp_caf_d(__m256d _1, __m256d _2) { + return (__m256i)__builtin_lasx_xvfcmp_caf_d((v4f64)_1, (v4f64)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V8SI, V8SF, V8SF. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvfcmp_caf_s(__m256 _1, __m256 _2) { + return (__m256i)__builtin_lasx_xvfcmp_caf_s((v8f32)_1, (v8f32)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DI, V4DF, V4DF. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvfcmp_ceq_d(__m256d _1, __m256d _2) { + return (__m256i)__builtin_lasx_xvfcmp_ceq_d((v4f64)_1, (v4f64)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V8SI, V8SF, V8SF. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvfcmp_ceq_s(__m256 _1, __m256 _2) { + return (__m256i)__builtin_lasx_xvfcmp_ceq_s((v8f32)_1, (v8f32)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DI, V4DF, V4DF. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvfcmp_cle_d(__m256d _1, __m256d _2) { + return (__m256i)__builtin_lasx_xvfcmp_cle_d((v4f64)_1, (v4f64)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V8SI, V8SF, V8SF. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvfcmp_cle_s(__m256 _1, __m256 _2) { + return (__m256i)__builtin_lasx_xvfcmp_cle_s((v8f32)_1, (v8f32)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DI, V4DF, V4DF. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvfcmp_clt_d(__m256d _1, __m256d _2) { + return (__m256i)__builtin_lasx_xvfcmp_clt_d((v4f64)_1, (v4f64)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V8SI, V8SF, V8SF. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvfcmp_clt_s(__m256 _1, __m256 _2) { + return (__m256i)__builtin_lasx_xvfcmp_clt_s((v8f32)_1, (v8f32)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DI, V4DF, V4DF. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvfcmp_cne_d(__m256d _1, __m256d _2) { + return (__m256i)__builtin_lasx_xvfcmp_cne_d((v4f64)_1, (v4f64)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V8SI, V8SF, V8SF. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvfcmp_cne_s(__m256 _1, __m256 _2) { + return (__m256i)__builtin_lasx_xvfcmp_cne_s((v8f32)_1, (v8f32)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DI, V4DF, V4DF. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvfcmp_cor_d(__m256d _1, __m256d _2) { + return (__m256i)__builtin_lasx_xvfcmp_cor_d((v4f64)_1, (v4f64)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V8SI, V8SF, V8SF. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvfcmp_cor_s(__m256 _1, __m256 _2) { + return (__m256i)__builtin_lasx_xvfcmp_cor_s((v8f32)_1, (v8f32)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DI, V4DF, V4DF. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvfcmp_cueq_d(__m256d _1, __m256d _2) { + return (__m256i)__builtin_lasx_xvfcmp_cueq_d((v4f64)_1, (v4f64)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V8SI, V8SF, V8SF. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvfcmp_cueq_s(__m256 _1, __m256 _2) { + return (__m256i)__builtin_lasx_xvfcmp_cueq_s((v8f32)_1, (v8f32)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DI, V4DF, V4DF. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvfcmp_cule_d(__m256d _1, __m256d _2) { + return (__m256i)__builtin_lasx_xvfcmp_cule_d((v4f64)_1, (v4f64)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V8SI, V8SF, V8SF. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvfcmp_cule_s(__m256 _1, __m256 _2) { + return (__m256i)__builtin_lasx_xvfcmp_cule_s((v8f32)_1, (v8f32)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DI, V4DF, V4DF. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvfcmp_cult_d(__m256d _1, __m256d _2) { + return (__m256i)__builtin_lasx_xvfcmp_cult_d((v4f64)_1, (v4f64)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V8SI, V8SF, V8SF. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvfcmp_cult_s(__m256 _1, __m256 _2) { + return (__m256i)__builtin_lasx_xvfcmp_cult_s((v8f32)_1, (v8f32)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DI, V4DF, V4DF. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvfcmp_cun_d(__m256d _1, __m256d _2) { + return (__m256i)__builtin_lasx_xvfcmp_cun_d((v4f64)_1, (v4f64)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DI, V4DF, V4DF. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvfcmp_cune_d(__m256d _1, __m256d _2) { + return (__m256i)__builtin_lasx_xvfcmp_cune_d((v4f64)_1, (v4f64)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V8SI, V8SF, V8SF. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvfcmp_cune_s(__m256 _1, __m256 _2) { + return (__m256i)__builtin_lasx_xvfcmp_cune_s((v8f32)_1, (v8f32)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V8SI, V8SF, V8SF. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvfcmp_cun_s(__m256 _1, __m256 _2) { + return (__m256i)__builtin_lasx_xvfcmp_cun_s((v8f32)_1, (v8f32)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DI, V4DF, V4DF. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvfcmp_saf_d(__m256d _1, __m256d _2) { + return (__m256i)__builtin_lasx_xvfcmp_saf_d((v4f64)_1, (v4f64)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V8SI, V8SF, V8SF. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvfcmp_saf_s(__m256 _1, __m256 _2) { + return (__m256i)__builtin_lasx_xvfcmp_saf_s((v8f32)_1, (v8f32)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DI, V4DF, V4DF. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvfcmp_seq_d(__m256d _1, __m256d _2) { + return (__m256i)__builtin_lasx_xvfcmp_seq_d((v4f64)_1, (v4f64)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V8SI, V8SF, V8SF. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvfcmp_seq_s(__m256 _1, __m256 _2) { + return (__m256i)__builtin_lasx_xvfcmp_seq_s((v8f32)_1, (v8f32)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DI, V4DF, V4DF. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvfcmp_sle_d(__m256d _1, __m256d _2) { + return (__m256i)__builtin_lasx_xvfcmp_sle_d((v4f64)_1, (v4f64)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V8SI, V8SF, V8SF. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvfcmp_sle_s(__m256 _1, __m256 _2) { + return (__m256i)__builtin_lasx_xvfcmp_sle_s((v8f32)_1, (v8f32)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DI, V4DF, V4DF. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvfcmp_slt_d(__m256d _1, __m256d _2) { + return (__m256i)__builtin_lasx_xvfcmp_slt_d((v4f64)_1, (v4f64)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V8SI, V8SF, V8SF. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvfcmp_slt_s(__m256 _1, __m256 _2) { + return (__m256i)__builtin_lasx_xvfcmp_slt_s((v8f32)_1, (v8f32)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DI, V4DF, V4DF. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvfcmp_sne_d(__m256d _1, __m256d _2) { + return (__m256i)__builtin_lasx_xvfcmp_sne_d((v4f64)_1, (v4f64)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V8SI, V8SF, V8SF. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvfcmp_sne_s(__m256 _1, __m256 _2) { + return (__m256i)__builtin_lasx_xvfcmp_sne_s((v8f32)_1, (v8f32)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DI, V4DF, V4DF. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvfcmp_sor_d(__m256d _1, __m256d _2) { + return (__m256i)__builtin_lasx_xvfcmp_sor_d((v4f64)_1, (v4f64)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V8SI, V8SF, V8SF. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvfcmp_sor_s(__m256 _1, __m256 _2) { + return (__m256i)__builtin_lasx_xvfcmp_sor_s((v8f32)_1, (v8f32)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DI, V4DF, V4DF. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvfcmp_sueq_d(__m256d _1, __m256d _2) { + return (__m256i)__builtin_lasx_xvfcmp_sueq_d((v4f64)_1, (v4f64)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V8SI, V8SF, V8SF. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvfcmp_sueq_s(__m256 _1, __m256 _2) { + return (__m256i)__builtin_lasx_xvfcmp_sueq_s((v8f32)_1, (v8f32)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DI, V4DF, V4DF. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvfcmp_sule_d(__m256d _1, __m256d _2) { + return (__m256i)__builtin_lasx_xvfcmp_sule_d((v4f64)_1, (v4f64)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V8SI, V8SF, V8SF. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvfcmp_sule_s(__m256 _1, __m256 _2) { + return (__m256i)__builtin_lasx_xvfcmp_sule_s((v8f32)_1, (v8f32)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DI, V4DF, V4DF. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvfcmp_sult_d(__m256d _1, __m256d _2) { + return (__m256i)__builtin_lasx_xvfcmp_sult_d((v4f64)_1, (v4f64)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V8SI, V8SF, V8SF. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvfcmp_sult_s(__m256 _1, __m256 _2) { + return (__m256i)__builtin_lasx_xvfcmp_sult_s((v8f32)_1, (v8f32)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DI, V4DF, V4DF. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvfcmp_sun_d(__m256d _1, __m256d _2) { + return (__m256i)__builtin_lasx_xvfcmp_sun_d((v4f64)_1, (v4f64)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V4DI, V4DF, V4DF. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvfcmp_sune_d(__m256d _1, __m256d _2) { + return (__m256i)__builtin_lasx_xvfcmp_sune_d((v4f64)_1, (v4f64)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V8SI, V8SF, V8SF. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvfcmp_sune_s(__m256 _1, __m256 _2) { + return (__m256i)__builtin_lasx_xvfcmp_sune_s((v8f32)_1, (v8f32)_2); + } + ++/* Assembly instruction format: xd, xj, xk. */ ++/* Data types in instruction templates: V8SI, V8SF, V8SF. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvfcmp_sun_s(__m256 _1, __m256 _2) { + return (__m256i)__builtin_lasx_xvfcmp_sun_s((v8f32)_1, (v8f32)_2); + } + ++/* Assembly instruction format: xd, xj, ui2. */ ++/* Data types in instruction templates: V4DF, V4DF, UQI. */ + #define __lasx_xvpickve_d_f(/*__m256d*/ _1, /*ui2*/ _2) \ + ((__m256d)__builtin_lasx_xvpickve_d_f((v4f64)(_1), (_2))) + ++/* Assembly instruction format: xd, xj, ui3. */ ++/* Data types in instruction templates: V8SF, V8SF, UQI. */ + #define __lasx_xvpickve_w_f(/*__m256*/ _1, /*ui3*/ _2) \ + ((__m256)__builtin_lasx_xvpickve_w_f((v8f32)(_1), (_2))) + ++/* Assembly instruction format: xd, si10. */ ++/* Data types in instruction templates: V32QI, HI. */ + #define __lasx_xvrepli_b(/*si10*/ _1) ((__m256i)__builtin_lasx_xvrepli_b((_1))) + ++/* Assembly instruction format: xd, si10. */ ++/* Data types in instruction templates: V4DI, HI. */ + #define __lasx_xvrepli_d(/*si10*/ _1) ((__m256i)__builtin_lasx_xvrepli_d((_1))) + ++/* Assembly instruction format: xd, si10. */ ++/* Data types in instruction templates: V16HI, HI. */ + #define __lasx_xvrepli_h(/*si10*/ _1) ((__m256i)__builtin_lasx_xvrepli_h((_1))) + ++/* Assembly instruction format: xd, si10. */ ++/* Data types in instruction templates: V8SI, HI. */ + #define __lasx_xvrepli_w(/*si10*/ _1) ((__m256i)__builtin_lasx_xvrepli_w((_1))) + + #endif /* defined(__loongarch_asx). */ +-#endif /* _LOONGSON_ASXINTRIN_H. */ ++#endif /* _GCC_LOONGSON_ASXINTRIN_H. */ +diff --git a/clang/lib/Headers/lsxintrin.h b/clang/lib/Headers/lsxintrin.h +index f347955ce..fa094d5d7 100644 +--- a/clang/lib/Headers/lsxintrin.h ++++ b/clang/lib/Headers/lsxintrin.h +@@ -1,14 +1,18 @@ +-/*===------------- lsxintrin.h - LoongArch LSX intrinsics ------------------=== +- * +- * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +- * See https://llvm.org/LICENSE.txt for license information. +- * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +- * +- *===-----------------------------------------------------------------------=== +- */ +- +-#ifndef _LOONGSON_SXINTRIN_H +-#define _LOONGSON_SXINTRIN_H 1 ++//===----------- lsxintrin.h - LoongArch LSX intrinsics ------------------===// ++// ++// The LLVM Compiler Infrastructure ++// ++// This file is distributed under the University of Illinois Open Source ++// License. See LICENSE.TXT for details. ++// ++//===----------------------------------------------------------------------===// ++// ++// This file contains the LoongArch LSX intrinsics. ++// ++//===----------------------------------------------------------------------===// ++ ++#ifndef _GCC_LOONGSON_SXINTRIN_H ++#define _GCC_LOONGSON_SXINTRIN_H 1 + + #if defined(__loongarch_sx) + typedef signed char v16i8 __attribute__((vector_size(16), aligned(16))); +@@ -36,2997 +40,4123 @@ typedef long long __m128i __attribute__((__vector_size__(16), __may_alias__)); + typedef float __m128 __attribute__((__vector_size__(16), __may_alias__)); + typedef double __m128d __attribute__((__vector_size__(16), __may_alias__)); + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V16QI, V16QI, V16QI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsll_b(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsll_b((v16i8)_1, (v16i8)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V8HI, V8HI, V8HI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsll_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsll_h((v8i16)_1, (v8i16)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V4SI, V4SI, V4SI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsll_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsll_w((v4i32)_1, (v4i32)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DI, V2DI, V2DI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsll_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsll_d((v2i64)_1, (v2i64)_2); + } + ++/* Assembly instruction format: vd, vj, ui3. */ ++/* Data types in instruction templates: V16QI, V16QI, UQI. */ + #define __lsx_vslli_b(/*__m128i*/ _1, /*ui3*/ _2) \ + ((__m128i)__builtin_lsx_vslli_b((v16i8)(_1), (_2))) + ++/* Assembly instruction format: vd, vj, ui4. */ ++/* Data types in instruction templates: V8HI, V8HI, UQI. */ + #define __lsx_vslli_h(/*__m128i*/ _1, /*ui4*/ _2) \ + ((__m128i)__builtin_lsx_vslli_h((v8i16)(_1), (_2))) + ++/* Assembly instruction format: vd, vj, ui5. */ ++/* Data types in instruction templates: V4SI, V4SI, UQI. */ + #define __lsx_vslli_w(/*__m128i*/ _1, /*ui5*/ _2) \ + ((__m128i)__builtin_lsx_vslli_w((v4i32)(_1), (_2))) + ++/* Assembly instruction format: vd, vj, ui6. */ ++/* Data types in instruction templates: V2DI, V2DI, UQI. */ + #define __lsx_vslli_d(/*__m128i*/ _1, /*ui6*/ _2) \ + ((__m128i)__builtin_lsx_vslli_d((v2i64)(_1), (_2))) + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V16QI, V16QI, V16QI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsra_b(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsra_b((v16i8)_1, (v16i8)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V8HI, V8HI, V8HI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsra_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsra_h((v8i16)_1, (v8i16)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V4SI, V4SI, V4SI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsra_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsra_w((v4i32)_1, (v4i32)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DI, V2DI, V2DI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsra_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsra_d((v2i64)_1, (v2i64)_2); + } + ++/* Assembly instruction format: vd, vj, ui3. */ ++/* Data types in instruction templates: V16QI, V16QI, UQI. */ + #define __lsx_vsrai_b(/*__m128i*/ _1, /*ui3*/ _2) \ + ((__m128i)__builtin_lsx_vsrai_b((v16i8)(_1), (_2))) + ++/* Assembly instruction format: vd, vj, ui4. */ ++/* Data types in instruction templates: V8HI, V8HI, UQI. */ + #define __lsx_vsrai_h(/*__m128i*/ _1, /*ui4*/ _2) \ + ((__m128i)__builtin_lsx_vsrai_h((v8i16)(_1), (_2))) + ++/* Assembly instruction format: vd, vj, ui5. */ ++/* Data types in instruction templates: V4SI, V4SI, UQI. */ + #define __lsx_vsrai_w(/*__m128i*/ _1, /*ui5*/ _2) \ + ((__m128i)__builtin_lsx_vsrai_w((v4i32)(_1), (_2))) + ++/* Assembly instruction format: vd, vj, ui6. */ ++/* Data types in instruction templates: V2DI, V2DI, UQI. */ + #define __lsx_vsrai_d(/*__m128i*/ _1, /*ui6*/ _2) \ + ((__m128i)__builtin_lsx_vsrai_d((v2i64)(_1), (_2))) + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V16QI, V16QI, V16QI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsrar_b(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsrar_b((v16i8)_1, (v16i8)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V8HI, V8HI, V8HI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsrar_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsrar_h((v8i16)_1, (v8i16)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V4SI, V4SI, V4SI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsrar_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsrar_w((v4i32)_1, (v4i32)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DI, V2DI, V2DI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsrar_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsrar_d((v2i64)_1, (v2i64)_2); + } + ++/* Assembly instruction format: vd, vj, ui3. */ ++/* Data types in instruction templates: V16QI, V16QI, UQI. */ + #define __lsx_vsrari_b(/*__m128i*/ _1, /*ui3*/ _2) \ + ((__m128i)__builtin_lsx_vsrari_b((v16i8)(_1), (_2))) + ++/* Assembly instruction format: vd, vj, ui4. */ ++/* Data types in instruction templates: V8HI, V8HI, UQI. */ + #define __lsx_vsrari_h(/*__m128i*/ _1, /*ui4*/ _2) \ + ((__m128i)__builtin_lsx_vsrari_h((v8i16)(_1), (_2))) + ++/* Assembly instruction format: vd, vj, ui5. */ ++/* Data types in instruction templates: V4SI, V4SI, UQI. */ + #define __lsx_vsrari_w(/*__m128i*/ _1, /*ui5*/ _2) \ + ((__m128i)__builtin_lsx_vsrari_w((v4i32)(_1), (_2))) + ++/* Assembly instruction format: vd, vj, ui6. */ ++/* Data types in instruction templates: V2DI, V2DI, UQI. */ + #define __lsx_vsrari_d(/*__m128i*/ _1, /*ui6*/ _2) \ + ((__m128i)__builtin_lsx_vsrari_d((v2i64)(_1), (_2))) + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V16QI, V16QI, V16QI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsrl_b(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsrl_b((v16i8)_1, (v16i8)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V8HI, V8HI, V8HI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsrl_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsrl_h((v8i16)_1, (v8i16)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V4SI, V4SI, V4SI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsrl_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsrl_w((v4i32)_1, (v4i32)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DI, V2DI, V2DI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsrl_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsrl_d((v2i64)_1, (v2i64)_2); + } + ++/* Assembly instruction format: vd, vj, ui3. */ ++/* Data types in instruction templates: V16QI, V16QI, UQI. */ + #define __lsx_vsrli_b(/*__m128i*/ _1, /*ui3*/ _2) \ + ((__m128i)__builtin_lsx_vsrli_b((v16i8)(_1), (_2))) + ++/* Assembly instruction format: vd, vj, ui4. */ ++/* Data types in instruction templates: V8HI, V8HI, UQI. */ + #define __lsx_vsrli_h(/*__m128i*/ _1, /*ui4*/ _2) \ + ((__m128i)__builtin_lsx_vsrli_h((v8i16)(_1), (_2))) + ++/* Assembly instruction format: vd, vj, ui5. */ ++/* Data types in instruction templates: V4SI, V4SI, UQI. */ + #define __lsx_vsrli_w(/*__m128i*/ _1, /*ui5*/ _2) \ + ((__m128i)__builtin_lsx_vsrli_w((v4i32)(_1), (_2))) + ++/* Assembly instruction format: vd, vj, ui6. */ ++/* Data types in instruction templates: V2DI, V2DI, UQI. */ + #define __lsx_vsrli_d(/*__m128i*/ _1, /*ui6*/ _2) \ + ((__m128i)__builtin_lsx_vsrli_d((v2i64)(_1), (_2))) + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V16QI, V16QI, V16QI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsrlr_b(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsrlr_b((v16i8)_1, (v16i8)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V8HI, V8HI, V8HI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsrlr_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsrlr_h((v8i16)_1, (v8i16)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V4SI, V4SI, V4SI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsrlr_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsrlr_w((v4i32)_1, (v4i32)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DI, V2DI, V2DI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsrlr_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsrlr_d((v2i64)_1, (v2i64)_2); + } + ++/* Assembly instruction format: vd, vj, ui3. */ ++/* Data types in instruction templates: V16QI, V16QI, UQI. */ + #define __lsx_vsrlri_b(/*__m128i*/ _1, /*ui3*/ _2) \ + ((__m128i)__builtin_lsx_vsrlri_b((v16i8)(_1), (_2))) + ++/* Assembly instruction format: vd, vj, ui4. */ ++/* Data types in instruction templates: V8HI, V8HI, UQI. */ + #define __lsx_vsrlri_h(/*__m128i*/ _1, /*ui4*/ _2) \ + ((__m128i)__builtin_lsx_vsrlri_h((v8i16)(_1), (_2))) + ++/* Assembly instruction format: vd, vj, ui5. */ ++/* Data types in instruction templates: V4SI, V4SI, UQI. */ + #define __lsx_vsrlri_w(/*__m128i*/ _1, /*ui5*/ _2) \ + ((__m128i)__builtin_lsx_vsrlri_w((v4i32)(_1), (_2))) + ++/* Assembly instruction format: vd, vj, ui6. */ ++/* Data types in instruction templates: V2DI, V2DI, UQI. */ + #define __lsx_vsrlri_d(/*__m128i*/ _1, /*ui6*/ _2) \ + ((__m128i)__builtin_lsx_vsrlri_d((v2i64)(_1), (_2))) + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: UV16QI, UV16QI, UV16QI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vbitclr_b(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vbitclr_b((v16u8)_1, (v16u8)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: UV8HI, UV8HI, UV8HI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vbitclr_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vbitclr_h((v8u16)_1, (v8u16)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: UV4SI, UV4SI, UV4SI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vbitclr_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vbitclr_w((v4u32)_1, (v4u32)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: UV2DI, UV2DI, UV2DI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vbitclr_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vbitclr_d((v2u64)_1, (v2u64)_2); + } + ++/* Assembly instruction format: vd, vj, ui3. */ ++/* Data types in instruction templates: UV16QI, UV16QI, UQI. */ + #define __lsx_vbitclri_b(/*__m128i*/ _1, /*ui3*/ _2) \ + ((__m128i)__builtin_lsx_vbitclri_b((v16u8)(_1), (_2))) + ++/* Assembly instruction format: vd, vj, ui4. */ ++/* Data types in instruction templates: UV8HI, UV8HI, UQI. */ + #define __lsx_vbitclri_h(/*__m128i*/ _1, /*ui4*/ _2) \ + ((__m128i)__builtin_lsx_vbitclri_h((v8u16)(_1), (_2))) + ++/* Assembly instruction format: vd, vj, ui5. */ ++/* Data types in instruction templates: UV4SI, UV4SI, UQI. */ + #define __lsx_vbitclri_w(/*__m128i*/ _1, /*ui5*/ _2) \ + ((__m128i)__builtin_lsx_vbitclri_w((v4u32)(_1), (_2))) + ++/* Assembly instruction format: vd, vj, ui6. */ ++/* Data types in instruction templates: UV2DI, UV2DI, UQI. */ + #define __lsx_vbitclri_d(/*__m128i*/ _1, /*ui6*/ _2) \ + ((__m128i)__builtin_lsx_vbitclri_d((v2u64)(_1), (_2))) + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: UV16QI, UV16QI, UV16QI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vbitset_b(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vbitset_b((v16u8)_1, (v16u8)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: UV8HI, UV8HI, UV8HI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vbitset_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vbitset_h((v8u16)_1, (v8u16)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: UV4SI, UV4SI, UV4SI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vbitset_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vbitset_w((v4u32)_1, (v4u32)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: UV2DI, UV2DI, UV2DI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vbitset_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vbitset_d((v2u64)_1, (v2u64)_2); + } + ++/* Assembly instruction format: vd, vj, ui3. */ ++/* Data types in instruction templates: UV16QI, UV16QI, UQI. */ + #define __lsx_vbitseti_b(/*__m128i*/ _1, /*ui3*/ _2) \ + ((__m128i)__builtin_lsx_vbitseti_b((v16u8)(_1), (_2))) + ++/* Assembly instruction format: vd, vj, ui4. */ ++/* Data types in instruction templates: UV8HI, UV8HI, UQI. */ + #define __lsx_vbitseti_h(/*__m128i*/ _1, /*ui4*/ _2) \ + ((__m128i)__builtin_lsx_vbitseti_h((v8u16)(_1), (_2))) + ++/* Assembly instruction format: vd, vj, ui5. */ ++/* Data types in instruction templates: UV4SI, UV4SI, UQI. */ + #define __lsx_vbitseti_w(/*__m128i*/ _1, /*ui5*/ _2) \ + ((__m128i)__builtin_lsx_vbitseti_w((v4u32)(_1), (_2))) + ++/* Assembly instruction format: vd, vj, ui6. */ ++/* Data types in instruction templates: UV2DI, UV2DI, UQI. */ + #define __lsx_vbitseti_d(/*__m128i*/ _1, /*ui6*/ _2) \ + ((__m128i)__builtin_lsx_vbitseti_d((v2u64)(_1), (_2))) + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: UV16QI, UV16QI, UV16QI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vbitrev_b(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vbitrev_b((v16u8)_1, (v16u8)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: UV8HI, UV8HI, UV8HI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vbitrev_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vbitrev_h((v8u16)_1, (v8u16)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: UV4SI, UV4SI, UV4SI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vbitrev_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vbitrev_w((v4u32)_1, (v4u32)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: UV2DI, UV2DI, UV2DI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vbitrev_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vbitrev_d((v2u64)_1, (v2u64)_2); + } + ++/* Assembly instruction format: vd, vj, ui3. */ ++/* Data types in instruction templates: UV16QI, UV16QI, UQI. */ + #define __lsx_vbitrevi_b(/*__m128i*/ _1, /*ui3*/ _2) \ + ((__m128i)__builtin_lsx_vbitrevi_b((v16u8)(_1), (_2))) + ++/* Assembly instruction format: vd, vj, ui4. */ ++/* Data types in instruction templates: UV8HI, UV8HI, UQI. */ + #define __lsx_vbitrevi_h(/*__m128i*/ _1, /*ui4*/ _2) \ + ((__m128i)__builtin_lsx_vbitrevi_h((v8u16)(_1), (_2))) + ++/* Assembly instruction format: vd, vj, ui5. */ ++/* Data types in instruction templates: UV4SI, UV4SI, UQI. */ + #define __lsx_vbitrevi_w(/*__m128i*/ _1, /*ui5*/ _2) \ + ((__m128i)__builtin_lsx_vbitrevi_w((v4u32)(_1), (_2))) + ++/* Assembly instruction format: vd, vj, ui6. */ ++/* Data types in instruction templates: UV2DI, UV2DI, UQI. */ + #define __lsx_vbitrevi_d(/*__m128i*/ _1, /*ui6*/ _2) \ + ((__m128i)__builtin_lsx_vbitrevi_d((v2u64)(_1), (_2))) + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V16QI, V16QI, V16QI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vadd_b(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vadd_b((v16i8)_1, (v16i8)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V8HI, V8HI, V8HI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vadd_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vadd_h((v8i16)_1, (v8i16)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V4SI, V4SI, V4SI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vadd_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vadd_w((v4i32)_1, (v4i32)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DI, V2DI, V2DI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vadd_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vadd_d((v2i64)_1, (v2i64)_2); + } + ++/* Assembly instruction format: vd, vj, ui5. */ ++/* Data types in instruction templates: V16QI, V16QI, UQI. */ + #define __lsx_vaddi_bu(/*__m128i*/ _1, /*ui5*/ _2) \ + ((__m128i)__builtin_lsx_vaddi_bu((v16i8)(_1), (_2))) + ++/* Assembly instruction format: vd, vj, ui5. */ ++/* Data types in instruction templates: V8HI, V8HI, UQI. */ + #define __lsx_vaddi_hu(/*__m128i*/ _1, /*ui5*/ _2) \ + ((__m128i)__builtin_lsx_vaddi_hu((v8i16)(_1), (_2))) + ++/* Assembly instruction format: vd, vj, ui5. */ ++/* Data types in instruction templates: V4SI, V4SI, UQI. */ + #define __lsx_vaddi_wu(/*__m128i*/ _1, /*ui5*/ _2) \ + ((__m128i)__builtin_lsx_vaddi_wu((v4i32)(_1), (_2))) + ++/* Assembly instruction format: vd, vj, ui5. */ ++/* Data types in instruction templates: V2DI, V2DI, UQI. */ + #define __lsx_vaddi_du(/*__m128i*/ _1, /*ui5*/ _2) \ + ((__m128i)__builtin_lsx_vaddi_du((v2i64)(_1), (_2))) + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V16QI, V16QI, V16QI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsub_b(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsub_b((v16i8)_1, (v16i8)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V8HI, V8HI, V8HI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsub_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsub_h((v8i16)_1, (v8i16)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V4SI, V4SI, V4SI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsub_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsub_w((v4i32)_1, (v4i32)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DI, V2DI, V2DI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsub_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsub_d((v2i64)_1, (v2i64)_2); + } + ++/* Assembly instruction format: vd, vj, ui5. */ ++/* Data types in instruction templates: V16QI, V16QI, UQI. */ + #define __lsx_vsubi_bu(/*__m128i*/ _1, /*ui5*/ _2) \ + ((__m128i)__builtin_lsx_vsubi_bu((v16i8)(_1), (_2))) + ++/* Assembly instruction format: vd, vj, ui5. */ ++/* Data types in instruction templates: V8HI, V8HI, UQI. */ + #define __lsx_vsubi_hu(/*__m128i*/ _1, /*ui5*/ _2) \ + ((__m128i)__builtin_lsx_vsubi_hu((v8i16)(_1), (_2))) + ++/* Assembly instruction format: vd, vj, ui5. */ ++/* Data types in instruction templates: V4SI, V4SI, UQI. */ + #define __lsx_vsubi_wu(/*__m128i*/ _1, /*ui5*/ _2) \ + ((__m128i)__builtin_lsx_vsubi_wu((v4i32)(_1), (_2))) + ++/* Assembly instruction format: vd, vj, ui5. */ ++/* Data types in instruction templates: V2DI, V2DI, UQI. */ + #define __lsx_vsubi_du(/*__m128i*/ _1, /*ui5*/ _2) \ + ((__m128i)__builtin_lsx_vsubi_du((v2i64)(_1), (_2))) + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V16QI, V16QI, V16QI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmax_b(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmax_b((v16i8)_1, (v16i8)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V8HI, V8HI, V8HI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmax_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmax_h((v8i16)_1, (v8i16)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V4SI, V4SI, V4SI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmax_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmax_w((v4i32)_1, (v4i32)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DI, V2DI, V2DI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmax_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmax_d((v2i64)_1, (v2i64)_2); + } + ++/* Assembly instruction format: vd, vj, si5. */ ++/* Data types in instruction templates: V16QI, V16QI, QI. */ + #define __lsx_vmaxi_b(/*__m128i*/ _1, /*si5*/ _2) \ + ((__m128i)__builtin_lsx_vmaxi_b((v16i8)(_1), (_2))) + ++/* Assembly instruction format: vd, vj, si5. */ ++/* Data types in instruction templates: V8HI, V8HI, QI. */ + #define __lsx_vmaxi_h(/*__m128i*/ _1, /*si5*/ _2) \ + ((__m128i)__builtin_lsx_vmaxi_h((v8i16)(_1), (_2))) + ++/* Assembly instruction format: vd, vj, si5. */ ++/* Data types in instruction templates: V4SI, V4SI, QI. */ + #define __lsx_vmaxi_w(/*__m128i*/ _1, /*si5*/ _2) \ + ((__m128i)__builtin_lsx_vmaxi_w((v4i32)(_1), (_2))) + ++/* Assembly instruction format: vd, vj, si5. */ ++/* Data types in instruction templates: V2DI, V2DI, QI. */ + #define __lsx_vmaxi_d(/*__m128i*/ _1, /*si5*/ _2) \ + ((__m128i)__builtin_lsx_vmaxi_d((v2i64)(_1), (_2))) + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: UV16QI, UV16QI, UV16QI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmax_bu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmax_bu((v16u8)_1, (v16u8)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: UV8HI, UV8HI, UV8HI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmax_hu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmax_hu((v8u16)_1, (v8u16)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: UV4SI, UV4SI, UV4SI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmax_wu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmax_wu((v4u32)_1, (v4u32)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: UV2DI, UV2DI, UV2DI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmax_du(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmax_du((v2u64)_1, (v2u64)_2); + } + ++/* Assembly instruction format: vd, vj, ui5. */ ++/* Data types in instruction templates: UV16QI, UV16QI, UQI. */ + #define __lsx_vmaxi_bu(/*__m128i*/ _1, /*ui5*/ _2) \ + ((__m128i)__builtin_lsx_vmaxi_bu((v16u8)(_1), (_2))) + ++/* Assembly instruction format: vd, vj, ui5. */ ++/* Data types in instruction templates: UV8HI, UV8HI, UQI. */ + #define __lsx_vmaxi_hu(/*__m128i*/ _1, /*ui5*/ _2) \ + ((__m128i)__builtin_lsx_vmaxi_hu((v8u16)(_1), (_2))) + ++/* Assembly instruction format: vd, vj, ui5. */ ++/* Data types in instruction templates: UV4SI, UV4SI, UQI. */ + #define __lsx_vmaxi_wu(/*__m128i*/ _1, /*ui5*/ _2) \ + ((__m128i)__builtin_lsx_vmaxi_wu((v4u32)(_1), (_2))) + ++/* Assembly instruction format: vd, vj, ui5. */ ++/* Data types in instruction templates: UV2DI, UV2DI, UQI. */ + #define __lsx_vmaxi_du(/*__m128i*/ _1, /*ui5*/ _2) \ + ((__m128i)__builtin_lsx_vmaxi_du((v2u64)(_1), (_2))) + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V16QI, V16QI, V16QI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmin_b(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmin_b((v16i8)_1, (v16i8)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V8HI, V8HI, V8HI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmin_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmin_h((v8i16)_1, (v8i16)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V4SI, V4SI, V4SI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmin_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmin_w((v4i32)_1, (v4i32)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DI, V2DI, V2DI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmin_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmin_d((v2i64)_1, (v2i64)_2); + } + ++/* Assembly instruction format: vd, vj, si5. */ ++/* Data types in instruction templates: V16QI, V16QI, QI. */ + #define __lsx_vmini_b(/*__m128i*/ _1, /*si5*/ _2) \ + ((__m128i)__builtin_lsx_vmini_b((v16i8)(_1), (_2))) + ++/* Assembly instruction format: vd, vj, si5. */ ++/* Data types in instruction templates: V8HI, V8HI, QI. */ + #define __lsx_vmini_h(/*__m128i*/ _1, /*si5*/ _2) \ + ((__m128i)__builtin_lsx_vmini_h((v8i16)(_1), (_2))) + ++/* Assembly instruction format: vd, vj, si5. */ ++/* Data types in instruction templates: V4SI, V4SI, QI. */ + #define __lsx_vmini_w(/*__m128i*/ _1, /*si5*/ _2) \ + ((__m128i)__builtin_lsx_vmini_w((v4i32)(_1), (_2))) + ++/* Assembly instruction format: vd, vj, si5. */ ++/* Data types in instruction templates: V2DI, V2DI, QI. */ + #define __lsx_vmini_d(/*__m128i*/ _1, /*si5*/ _2) \ + ((__m128i)__builtin_lsx_vmini_d((v2i64)(_1), (_2))) + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: UV16QI, UV16QI, UV16QI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmin_bu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmin_bu((v16u8)_1, (v16u8)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: UV8HI, UV8HI, UV8HI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmin_hu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmin_hu((v8u16)_1, (v8u16)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: UV4SI, UV4SI, UV4SI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmin_wu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmin_wu((v4u32)_1, (v4u32)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: UV2DI, UV2DI, UV2DI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmin_du(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmin_du((v2u64)_1, (v2u64)_2); + } + ++/* Assembly instruction format: vd, vj, ui5. */ ++/* Data types in instruction templates: UV16QI, UV16QI, UQI. */ + #define __lsx_vmini_bu(/*__m128i*/ _1, /*ui5*/ _2) \ + ((__m128i)__builtin_lsx_vmini_bu((v16u8)(_1), (_2))) + ++/* Assembly instruction format: vd, vj, ui5. */ ++/* Data types in instruction templates: UV8HI, UV8HI, UQI. */ + #define __lsx_vmini_hu(/*__m128i*/ _1, /*ui5*/ _2) \ + ((__m128i)__builtin_lsx_vmini_hu((v8u16)(_1), (_2))) + ++/* Assembly instruction format: vd, vj, ui5. */ ++/* Data types in instruction templates: UV4SI, UV4SI, UQI. */ + #define __lsx_vmini_wu(/*__m128i*/ _1, /*ui5*/ _2) \ + ((__m128i)__builtin_lsx_vmini_wu((v4u32)(_1), (_2))) + ++/* Assembly instruction format: vd, vj, ui5. */ ++/* Data types in instruction templates: UV2DI, UV2DI, UQI. */ + #define __lsx_vmini_du(/*__m128i*/ _1, /*ui5*/ _2) \ + ((__m128i)__builtin_lsx_vmini_du((v2u64)(_1), (_2))) + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V16QI, V16QI, V16QI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vseq_b(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vseq_b((v16i8)_1, (v16i8)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V8HI, V8HI, V8HI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vseq_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vseq_h((v8i16)_1, (v8i16)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V4SI, V4SI, V4SI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vseq_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vseq_w((v4i32)_1, (v4i32)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DI, V2DI, V2DI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vseq_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vseq_d((v2i64)_1, (v2i64)_2); + } + ++/* Assembly instruction format: vd, vj, si5. */ ++/* Data types in instruction templates: V16QI, V16QI, QI. */ + #define __lsx_vseqi_b(/*__m128i*/ _1, /*si5*/ _2) \ + ((__m128i)__builtin_lsx_vseqi_b((v16i8)(_1), (_2))) + ++/* Assembly instruction format: vd, vj, si5. */ ++/* Data types in instruction templates: V8HI, V8HI, QI. */ + #define __lsx_vseqi_h(/*__m128i*/ _1, /*si5*/ _2) \ + ((__m128i)__builtin_lsx_vseqi_h((v8i16)(_1), (_2))) + ++/* Assembly instruction format: vd, vj, si5. */ ++/* Data types in instruction templates: V4SI, V4SI, QI. */ + #define __lsx_vseqi_w(/*__m128i*/ _1, /*si5*/ _2) \ + ((__m128i)__builtin_lsx_vseqi_w((v4i32)(_1), (_2))) + ++/* Assembly instruction format: vd, vj, si5. */ ++/* Data types in instruction templates: V2DI, V2DI, QI. */ + #define __lsx_vseqi_d(/*__m128i*/ _1, /*si5*/ _2) \ + ((__m128i)__builtin_lsx_vseqi_d((v2i64)(_1), (_2))) + ++/* Assembly instruction format: vd, vj, si5. */ ++/* Data types in instruction templates: V16QI, V16QI, QI. */ + #define __lsx_vslti_b(/*__m128i*/ _1, /*si5*/ _2) \ + ((__m128i)__builtin_lsx_vslti_b((v16i8)(_1), (_2))) + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V16QI, V16QI, V16QI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vslt_b(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vslt_b((v16i8)_1, (v16i8)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V8HI, V8HI, V8HI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vslt_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vslt_h((v8i16)_1, (v8i16)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V4SI, V4SI, V4SI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vslt_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vslt_w((v4i32)_1, (v4i32)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DI, V2DI, V2DI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vslt_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vslt_d((v2i64)_1, (v2i64)_2); + } + ++/* Assembly instruction format: vd, vj, si5. */ ++/* Data types in instruction templates: V8HI, V8HI, QI. */ + #define __lsx_vslti_h(/*__m128i*/ _1, /*si5*/ _2) \ + ((__m128i)__builtin_lsx_vslti_h((v8i16)(_1), (_2))) + ++/* Assembly instruction format: vd, vj, si5. */ ++/* Data types in instruction templates: V4SI, V4SI, QI. */ + #define __lsx_vslti_w(/*__m128i*/ _1, /*si5*/ _2) \ + ((__m128i)__builtin_lsx_vslti_w((v4i32)(_1), (_2))) + ++/* Assembly instruction format: vd, vj, si5. */ ++/* Data types in instruction templates: V2DI, V2DI, QI. */ + #define __lsx_vslti_d(/*__m128i*/ _1, /*si5*/ _2) \ + ((__m128i)__builtin_lsx_vslti_d((v2i64)(_1), (_2))) + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V16QI, UV16QI, UV16QI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vslt_bu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vslt_bu((v16u8)_1, (v16u8)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V8HI, UV8HI, UV8HI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vslt_hu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vslt_hu((v8u16)_1, (v8u16)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V4SI, UV4SI, UV4SI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vslt_wu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vslt_wu((v4u32)_1, (v4u32)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DI, UV2DI, UV2DI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vslt_du(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vslt_du((v2u64)_1, (v2u64)_2); + } + ++/* Assembly instruction format: vd, vj, ui5. */ ++/* Data types in instruction templates: V16QI, UV16QI, UQI. */ + #define __lsx_vslti_bu(/*__m128i*/ _1, /*ui5*/ _2) \ + ((__m128i)__builtin_lsx_vslti_bu((v16u8)(_1), (_2))) + ++/* Assembly instruction format: vd, vj, ui5. */ ++/* Data types in instruction templates: V8HI, UV8HI, UQI. */ + #define __lsx_vslti_hu(/*__m128i*/ _1, /*ui5*/ _2) \ + ((__m128i)__builtin_lsx_vslti_hu((v8u16)(_1), (_2))) + ++/* Assembly instruction format: vd, vj, ui5. */ ++/* Data types in instruction templates: V4SI, UV4SI, UQI. */ + #define __lsx_vslti_wu(/*__m128i*/ _1, /*ui5*/ _2) \ + ((__m128i)__builtin_lsx_vslti_wu((v4u32)(_1), (_2))) + ++/* Assembly instruction format: vd, vj, ui5. */ ++/* Data types in instruction templates: V2DI, UV2DI, UQI. */ + #define __lsx_vslti_du(/*__m128i*/ _1, /*ui5*/ _2) \ + ((__m128i)__builtin_lsx_vslti_du((v2u64)(_1), (_2))) + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V16QI, V16QI, V16QI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsle_b(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsle_b((v16i8)_1, (v16i8)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V8HI, V8HI, V8HI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsle_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsle_h((v8i16)_1, (v8i16)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V4SI, V4SI, V4SI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsle_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsle_w((v4i32)_1, (v4i32)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DI, V2DI, V2DI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsle_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsle_d((v2i64)_1, (v2i64)_2); + } + ++/* Assembly instruction format: vd, vj, si5. */ ++/* Data types in instruction templates: V16QI, V16QI, QI. */ + #define __lsx_vslei_b(/*__m128i*/ _1, /*si5*/ _2) \ + ((__m128i)__builtin_lsx_vslei_b((v16i8)(_1), (_2))) + ++/* Assembly instruction format: vd, vj, si5. */ ++/* Data types in instruction templates: V8HI, V8HI, QI. */ + #define __lsx_vslei_h(/*__m128i*/ _1, /*si5*/ _2) \ + ((__m128i)__builtin_lsx_vslei_h((v8i16)(_1), (_2))) + ++/* Assembly instruction format: vd, vj, si5. */ ++/* Data types in instruction templates: V4SI, V4SI, QI. */ + #define __lsx_vslei_w(/*__m128i*/ _1, /*si5*/ _2) \ + ((__m128i)__builtin_lsx_vslei_w((v4i32)(_1), (_2))) + ++/* Assembly instruction format: vd, vj, si5. */ ++/* Data types in instruction templates: V2DI, V2DI, QI. */ + #define __lsx_vslei_d(/*__m128i*/ _1, /*si5*/ _2) \ + ((__m128i)__builtin_lsx_vslei_d((v2i64)(_1), (_2))) + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V16QI, UV16QI, UV16QI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsle_bu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsle_bu((v16u8)_1, (v16u8)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V8HI, UV8HI, UV8HI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsle_hu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsle_hu((v8u16)_1, (v8u16)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V4SI, UV4SI, UV4SI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsle_wu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsle_wu((v4u32)_1, (v4u32)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DI, UV2DI, UV2DI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsle_du(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsle_du((v2u64)_1, (v2u64)_2); + } + ++/* Assembly instruction format: vd, vj, ui5. */ ++/* Data types in instruction templates: V16QI, UV16QI, UQI. */ + #define __lsx_vslei_bu(/*__m128i*/ _1, /*ui5*/ _2) \ + ((__m128i)__builtin_lsx_vslei_bu((v16u8)(_1), (_2))) + ++/* Assembly instruction format: vd, vj, ui5. */ ++/* Data types in instruction templates: V8HI, UV8HI, UQI. */ + #define __lsx_vslei_hu(/*__m128i*/ _1, /*ui5*/ _2) \ + ((__m128i)__builtin_lsx_vslei_hu((v8u16)(_1), (_2))) + ++/* Assembly instruction format: vd, vj, ui5. */ ++/* Data types in instruction templates: V4SI, UV4SI, UQI. */ + #define __lsx_vslei_wu(/*__m128i*/ _1, /*ui5*/ _2) \ + ((__m128i)__builtin_lsx_vslei_wu((v4u32)(_1), (_2))) + ++/* Assembly instruction format: vd, vj, ui5. */ ++/* Data types in instruction templates: V2DI, UV2DI, UQI. */ + #define __lsx_vslei_du(/*__m128i*/ _1, /*ui5*/ _2) \ + ((__m128i)__builtin_lsx_vslei_du((v2u64)(_1), (_2))) + ++/* Assembly instruction format: vd, vj, ui3. */ ++/* Data types in instruction templates: V16QI, V16QI, UQI. */ + #define __lsx_vsat_b(/*__m128i*/ _1, /*ui3*/ _2) \ + ((__m128i)__builtin_lsx_vsat_b((v16i8)(_1), (_2))) + ++/* Assembly instruction format: vd, vj, ui4. */ ++/* Data types in instruction templates: V8HI, V8HI, UQI. */ + #define __lsx_vsat_h(/*__m128i*/ _1, /*ui4*/ _2) \ + ((__m128i)__builtin_lsx_vsat_h((v8i16)(_1), (_2))) + ++/* Assembly instruction format: vd, vj, ui5. */ ++/* Data types in instruction templates: V4SI, V4SI, UQI. */ + #define __lsx_vsat_w(/*__m128i*/ _1, /*ui5*/ _2) \ + ((__m128i)__builtin_lsx_vsat_w((v4i32)(_1), (_2))) + ++/* Assembly instruction format: vd, vj, ui6. */ ++/* Data types in instruction templates: V2DI, V2DI, UQI. */ + #define __lsx_vsat_d(/*__m128i*/ _1, /*ui6*/ _2) \ + ((__m128i)__builtin_lsx_vsat_d((v2i64)(_1), (_2))) + ++/* Assembly instruction format: vd, vj, ui3. */ ++/* Data types in instruction templates: UV16QI, UV16QI, UQI. */ + #define __lsx_vsat_bu(/*__m128i*/ _1, /*ui3*/ _2) \ + ((__m128i)__builtin_lsx_vsat_bu((v16u8)(_1), (_2))) + ++/* Assembly instruction format: vd, vj, ui4. */ ++/* Data types in instruction templates: UV8HI, UV8HI, UQI. */ + #define __lsx_vsat_hu(/*__m128i*/ _1, /*ui4*/ _2) \ + ((__m128i)__builtin_lsx_vsat_hu((v8u16)(_1), (_2))) + ++/* Assembly instruction format: vd, vj, ui5. */ ++/* Data types in instruction templates: UV4SI, UV4SI, UQI. */ + #define __lsx_vsat_wu(/*__m128i*/ _1, /*ui5*/ _2) \ + ((__m128i)__builtin_lsx_vsat_wu((v4u32)(_1), (_2))) + ++/* Assembly instruction format: vd, vj, ui6. */ ++/* Data types in instruction templates: UV2DI, UV2DI, UQI. */ + #define __lsx_vsat_du(/*__m128i*/ _1, /*ui6*/ _2) \ + ((__m128i)__builtin_lsx_vsat_du((v2u64)(_1), (_2))) + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V16QI, V16QI, V16QI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vadda_b(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vadda_b((v16i8)_1, (v16i8)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V8HI, V8HI, V8HI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vadda_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vadda_h((v8i16)_1, (v8i16)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V4SI, V4SI, V4SI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vadda_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vadda_w((v4i32)_1, (v4i32)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DI, V2DI, V2DI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vadda_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vadda_d((v2i64)_1, (v2i64)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V16QI, V16QI, V16QI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsadd_b(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsadd_b((v16i8)_1, (v16i8)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V8HI, V8HI, V8HI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsadd_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsadd_h((v8i16)_1, (v8i16)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V4SI, V4SI, V4SI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsadd_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsadd_w((v4i32)_1, (v4i32)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DI, V2DI, V2DI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsadd_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsadd_d((v2i64)_1, (v2i64)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: UV16QI, UV16QI, UV16QI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsadd_bu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsadd_bu((v16u8)_1, (v16u8)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: UV8HI, UV8HI, UV8HI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsadd_hu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsadd_hu((v8u16)_1, (v8u16)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: UV4SI, UV4SI, UV4SI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsadd_wu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsadd_wu((v4u32)_1, (v4u32)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: UV2DI, UV2DI, UV2DI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsadd_du(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsadd_du((v2u64)_1, (v2u64)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V16QI, V16QI, V16QI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vavg_b(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vavg_b((v16i8)_1, (v16i8)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V8HI, V8HI, V8HI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vavg_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vavg_h((v8i16)_1, (v8i16)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V4SI, V4SI, V4SI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vavg_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vavg_w((v4i32)_1, (v4i32)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DI, V2DI, V2DI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vavg_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vavg_d((v2i64)_1, (v2i64)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: UV16QI, UV16QI, UV16QI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vavg_bu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vavg_bu((v16u8)_1, (v16u8)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: UV8HI, UV8HI, UV8HI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vavg_hu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vavg_hu((v8u16)_1, (v8u16)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: UV4SI, UV4SI, UV4SI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vavg_wu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vavg_wu((v4u32)_1, (v4u32)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: UV2DI, UV2DI, UV2DI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vavg_du(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vavg_du((v2u64)_1, (v2u64)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V16QI, V16QI, V16QI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vavgr_b(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vavgr_b((v16i8)_1, (v16i8)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V8HI, V8HI, V8HI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vavgr_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vavgr_h((v8i16)_1, (v8i16)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V4SI, V4SI, V4SI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vavgr_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vavgr_w((v4i32)_1, (v4i32)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DI, V2DI, V2DI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vavgr_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vavgr_d((v2i64)_1, (v2i64)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: UV16QI, UV16QI, UV16QI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vavgr_bu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vavgr_bu((v16u8)_1, (v16u8)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: UV8HI, UV8HI, UV8HI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vavgr_hu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vavgr_hu((v8u16)_1, (v8u16)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: UV4SI, UV4SI, UV4SI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vavgr_wu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vavgr_wu((v4u32)_1, (v4u32)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: UV2DI, UV2DI, UV2DI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vavgr_du(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vavgr_du((v2u64)_1, (v2u64)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V16QI, V16QI, V16QI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vssub_b(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vssub_b((v16i8)_1, (v16i8)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V8HI, V8HI, V8HI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vssub_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vssub_h((v8i16)_1, (v8i16)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V4SI, V4SI, V4SI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vssub_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vssub_w((v4i32)_1, (v4i32)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DI, V2DI, V2DI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vssub_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vssub_d((v2i64)_1, (v2i64)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: UV16QI, UV16QI, UV16QI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vssub_bu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vssub_bu((v16u8)_1, (v16u8)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: UV8HI, UV8HI, UV8HI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vssub_hu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vssub_hu((v8u16)_1, (v8u16)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: UV4SI, UV4SI, UV4SI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vssub_wu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vssub_wu((v4u32)_1, (v4u32)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: UV2DI, UV2DI, UV2DI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vssub_du(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vssub_du((v2u64)_1, (v2u64)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V16QI, V16QI, V16QI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vabsd_b(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vabsd_b((v16i8)_1, (v16i8)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V8HI, V8HI, V8HI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vabsd_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vabsd_h((v8i16)_1, (v8i16)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V4SI, V4SI, V4SI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vabsd_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vabsd_w((v4i32)_1, (v4i32)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DI, V2DI, V2DI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vabsd_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vabsd_d((v2i64)_1, (v2i64)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: UV16QI, UV16QI, UV16QI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vabsd_bu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vabsd_bu((v16u8)_1, (v16u8)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: UV8HI, UV8HI, UV8HI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vabsd_hu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vabsd_hu((v8u16)_1, (v8u16)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: UV4SI, UV4SI, UV4SI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vabsd_wu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vabsd_wu((v4u32)_1, (v4u32)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: UV2DI, UV2DI, UV2DI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vabsd_du(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vabsd_du((v2u64)_1, (v2u64)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V16QI, V16QI, V16QI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmul_b(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmul_b((v16i8)_1, (v16i8)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V8HI, V8HI, V8HI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmul_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmul_h((v8i16)_1, (v8i16)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V4SI, V4SI, V4SI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmul_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmul_w((v4i32)_1, (v4i32)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DI, V2DI, V2DI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmul_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmul_d((v2i64)_1, (v2i64)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V16QI, V16QI, V16QI, V16QI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmadd_b(__m128i _1, __m128i _2, __m128i _3) { + return (__m128i)__builtin_lsx_vmadd_b((v16i8)_1, (v16i8)_2, (v16i8)_3); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V8HI, V8HI, V8HI, V8HI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmadd_h(__m128i _1, __m128i _2, __m128i _3) { + return (__m128i)__builtin_lsx_vmadd_h((v8i16)_1, (v8i16)_2, (v8i16)_3); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V4SI, V4SI, V4SI, V4SI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmadd_w(__m128i _1, __m128i _2, __m128i _3) { + return (__m128i)__builtin_lsx_vmadd_w((v4i32)_1, (v4i32)_2, (v4i32)_3); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DI, V2DI, V2DI, V2DI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmadd_d(__m128i _1, __m128i _2, __m128i _3) { + return (__m128i)__builtin_lsx_vmadd_d((v2i64)_1, (v2i64)_2, (v2i64)_3); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V16QI, V16QI, V16QI, V16QI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmsub_b(__m128i _1, __m128i _2, __m128i _3) { + return (__m128i)__builtin_lsx_vmsub_b((v16i8)_1, (v16i8)_2, (v16i8)_3); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V8HI, V8HI, V8HI, V8HI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmsub_h(__m128i _1, __m128i _2, __m128i _3) { + return (__m128i)__builtin_lsx_vmsub_h((v8i16)_1, (v8i16)_2, (v8i16)_3); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V4SI, V4SI, V4SI, V4SI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmsub_w(__m128i _1, __m128i _2, __m128i _3) { + return (__m128i)__builtin_lsx_vmsub_w((v4i32)_1, (v4i32)_2, (v4i32)_3); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DI, V2DI, V2DI, V2DI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmsub_d(__m128i _1, __m128i _2, __m128i _3) { + return (__m128i)__builtin_lsx_vmsub_d((v2i64)_1, (v2i64)_2, (v2i64)_3); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V16QI, V16QI, V16QI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vdiv_b(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vdiv_b((v16i8)_1, (v16i8)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V8HI, V8HI, V8HI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vdiv_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vdiv_h((v8i16)_1, (v8i16)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V4SI, V4SI, V4SI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vdiv_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vdiv_w((v4i32)_1, (v4i32)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DI, V2DI, V2DI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vdiv_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vdiv_d((v2i64)_1, (v2i64)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: UV16QI, UV16QI, UV16QI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vdiv_bu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vdiv_bu((v16u8)_1, (v16u8)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: UV8HI, UV8HI, UV8HI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vdiv_hu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vdiv_hu((v8u16)_1, (v8u16)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: UV4SI, UV4SI, UV4SI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vdiv_wu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vdiv_wu((v4u32)_1, (v4u32)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: UV2DI, UV2DI, UV2DI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vdiv_du(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vdiv_du((v2u64)_1, (v2u64)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V8HI, V16QI, V16QI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vhaddw_h_b(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vhaddw_h_b((v16i8)_1, (v16i8)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V4SI, V8HI, V8HI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vhaddw_w_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vhaddw_w_h((v8i16)_1, (v8i16)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DI, V4SI, V4SI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vhaddw_d_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vhaddw_d_w((v4i32)_1, (v4i32)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: UV8HI, UV16QI, UV16QI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vhaddw_hu_bu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vhaddw_hu_bu((v16u8)_1, (v16u8)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: UV4SI, UV8HI, UV8HI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vhaddw_wu_hu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vhaddw_wu_hu((v8u16)_1, (v8u16)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: UV2DI, UV4SI, UV4SI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vhaddw_du_wu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vhaddw_du_wu((v4u32)_1, (v4u32)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V8HI, V16QI, V16QI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vhsubw_h_b(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vhsubw_h_b((v16i8)_1, (v16i8)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V4SI, V8HI, V8HI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vhsubw_w_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vhsubw_w_h((v8i16)_1, (v8i16)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DI, V4SI, V4SI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vhsubw_d_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vhsubw_d_w((v4i32)_1, (v4i32)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V8HI, UV16QI, UV16QI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vhsubw_hu_bu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vhsubw_hu_bu((v16u8)_1, (v16u8)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V4SI, UV8HI, UV8HI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vhsubw_wu_hu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vhsubw_wu_hu((v8u16)_1, (v8u16)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DI, UV4SI, UV4SI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vhsubw_du_wu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vhsubw_du_wu((v4u32)_1, (v4u32)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V16QI, V16QI, V16QI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmod_b(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmod_b((v16i8)_1, (v16i8)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V8HI, V8HI, V8HI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmod_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmod_h((v8i16)_1, (v8i16)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V4SI, V4SI, V4SI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmod_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmod_w((v4i32)_1, (v4i32)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DI, V2DI, V2DI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmod_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmod_d((v2i64)_1, (v2i64)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: UV16QI, UV16QI, UV16QI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmod_bu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmod_bu((v16u8)_1, (v16u8)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: UV8HI, UV8HI, UV8HI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmod_hu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmod_hu((v8u16)_1, (v8u16)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: UV4SI, UV4SI, UV4SI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmod_wu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmod_wu((v4u32)_1, (v4u32)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: UV2DI, UV2DI, UV2DI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmod_du(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmod_du((v2u64)_1, (v2u64)_2); + } + ++/* Assembly instruction format: vd, vj, rk. */ ++/* Data types in instruction templates: V16QI, V16QI, SI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vreplve_b(__m128i _1, int _2) { + return (__m128i)__builtin_lsx_vreplve_b((v16i8)_1, (int)_2); + } + ++/* Assembly instruction format: vd, vj, rk. */ ++/* Data types in instruction templates: V8HI, V8HI, SI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vreplve_h(__m128i _1, int _2) { + return (__m128i)__builtin_lsx_vreplve_h((v8i16)_1, (int)_2); + } + ++/* Assembly instruction format: vd, vj, rk. */ ++/* Data types in instruction templates: V4SI, V4SI, SI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vreplve_w(__m128i _1, int _2) { + return (__m128i)__builtin_lsx_vreplve_w((v4i32)_1, (int)_2); + } + ++/* Assembly instruction format: vd, vj, rk. */ ++/* Data types in instruction templates: V2DI, V2DI, SI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vreplve_d(__m128i _1, int _2) { + return (__m128i)__builtin_lsx_vreplve_d((v2i64)_1, (int)_2); + } + ++/* Assembly instruction format: vd, vj, ui4. */ ++/* Data types in instruction templates: V16QI, V16QI, UQI. */ + #define __lsx_vreplvei_b(/*__m128i*/ _1, /*ui4*/ _2) \ + ((__m128i)__builtin_lsx_vreplvei_b((v16i8)(_1), (_2))) + ++/* Assembly instruction format: vd, vj, ui3. */ ++/* Data types in instruction templates: V8HI, V8HI, UQI. */ + #define __lsx_vreplvei_h(/*__m128i*/ _1, /*ui3*/ _2) \ + ((__m128i)__builtin_lsx_vreplvei_h((v8i16)(_1), (_2))) + ++/* Assembly instruction format: vd, vj, ui2. */ ++/* Data types in instruction templates: V4SI, V4SI, UQI. */ + #define __lsx_vreplvei_w(/*__m128i*/ _1, /*ui2*/ _2) \ + ((__m128i)__builtin_lsx_vreplvei_w((v4i32)(_1), (_2))) + ++/* Assembly instruction format: vd, vj, ui1. */ ++/* Data types in instruction templates: V2DI, V2DI, UQI. */ + #define __lsx_vreplvei_d(/*__m128i*/ _1, /*ui1*/ _2) \ + ((__m128i)__builtin_lsx_vreplvei_d((v2i64)(_1), (_2))) + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V16QI, V16QI, V16QI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vpickev_b(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vpickev_b((v16i8)_1, (v16i8)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V8HI, V8HI, V8HI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vpickev_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vpickev_h((v8i16)_1, (v8i16)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V4SI, V4SI, V4SI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vpickev_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vpickev_w((v4i32)_1, (v4i32)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DI, V2DI, V2DI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vpickev_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vpickev_d((v2i64)_1, (v2i64)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V16QI, V16QI, V16QI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vpickod_b(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vpickod_b((v16i8)_1, (v16i8)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V8HI, V8HI, V8HI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vpickod_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vpickod_h((v8i16)_1, (v8i16)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V4SI, V4SI, V4SI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vpickod_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vpickod_w((v4i32)_1, (v4i32)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DI, V2DI, V2DI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vpickod_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vpickod_d((v2i64)_1, (v2i64)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V16QI, V16QI, V16QI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vilvh_b(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vilvh_b((v16i8)_1, (v16i8)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V8HI, V8HI, V8HI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vilvh_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vilvh_h((v8i16)_1, (v8i16)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V4SI, V4SI, V4SI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vilvh_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vilvh_w((v4i32)_1, (v4i32)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DI, V2DI, V2DI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vilvh_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vilvh_d((v2i64)_1, (v2i64)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V16QI, V16QI, V16QI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vilvl_b(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vilvl_b((v16i8)_1, (v16i8)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V8HI, V8HI, V8HI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vilvl_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vilvl_h((v8i16)_1, (v8i16)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V4SI, V4SI, V4SI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vilvl_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vilvl_w((v4i32)_1, (v4i32)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DI, V2DI, V2DI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vilvl_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vilvl_d((v2i64)_1, (v2i64)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V16QI, V16QI, V16QI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vpackev_b(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vpackev_b((v16i8)_1, (v16i8)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V8HI, V8HI, V8HI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vpackev_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vpackev_h((v8i16)_1, (v8i16)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V4SI, V4SI, V4SI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vpackev_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vpackev_w((v4i32)_1, (v4i32)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DI, V2DI, V2DI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vpackev_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vpackev_d((v2i64)_1, (v2i64)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V16QI, V16QI, V16QI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vpackod_b(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vpackod_b((v16i8)_1, (v16i8)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V8HI, V8HI, V8HI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vpackod_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vpackod_h((v8i16)_1, (v8i16)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V4SI, V4SI, V4SI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vpackod_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vpackod_w((v4i32)_1, (v4i32)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DI, V2DI, V2DI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vpackod_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vpackod_d((v2i64)_1, (v2i64)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V8HI, V8HI, V8HI, V8HI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vshuf_h(__m128i _1, __m128i _2, __m128i _3) { + return (__m128i)__builtin_lsx_vshuf_h((v8i16)_1, (v8i16)_2, (v8i16)_3); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V4SI, V4SI, V4SI, V4SI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vshuf_w(__m128i _1, __m128i _2, __m128i _3) { + return (__m128i)__builtin_lsx_vshuf_w((v4i32)_1, (v4i32)_2, (v4i32)_3); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DI, V2DI, V2DI, V2DI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vshuf_d(__m128i _1, __m128i _2, __m128i _3) { + return (__m128i)__builtin_lsx_vshuf_d((v2i64)_1, (v2i64)_2, (v2i64)_3); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: UV16QI, UV16QI, UV16QI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vand_v(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vand_v((v16u8)_1, (v16u8)_2); + } + ++/* Assembly instruction format: vd, vj, ui8. */ ++/* Data types in instruction templates: UV16QI, UV16QI, UQI. */ + #define __lsx_vandi_b(/*__m128i*/ _1, /*ui8*/ _2) \ + ((__m128i)__builtin_lsx_vandi_b((v16u8)(_1), (_2))) + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: UV16QI, UV16QI, UV16QI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vor_v(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vor_v((v16u8)_1, (v16u8)_2); + } + ++/* Assembly instruction format: vd, vj, ui8. */ ++/* Data types in instruction templates: UV16QI, UV16QI, UQI. */ + #define __lsx_vori_b(/*__m128i*/ _1, /*ui8*/ _2) \ + ((__m128i)__builtin_lsx_vori_b((v16u8)(_1), (_2))) + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: UV16QI, UV16QI, UV16QI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vnor_v(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vnor_v((v16u8)_1, (v16u8)_2); + } + ++/* Assembly instruction format: vd, vj, ui8. */ ++/* Data types in instruction templates: UV16QI, UV16QI, UQI. */ + #define __lsx_vnori_b(/*__m128i*/ _1, /*ui8*/ _2) \ + ((__m128i)__builtin_lsx_vnori_b((v16u8)(_1), (_2))) + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: UV16QI, UV16QI, UV16QI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vxor_v(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vxor_v((v16u8)_1, (v16u8)_2); + } + ++/* Assembly instruction format: vd, vj, ui8. */ ++/* Data types in instruction templates: UV16QI, UV16QI, UQI. */ + #define __lsx_vxori_b(/*__m128i*/ _1, /*ui8*/ _2) \ + ((__m128i)__builtin_lsx_vxori_b((v16u8)(_1), (_2))) + ++/* Assembly instruction format: vd, vj, vk, va. */ ++/* Data types in instruction templates: UV16QI, UV16QI, UV16QI, UV16QI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vbitsel_v(__m128i _1, __m128i _2, __m128i _3) { + return (__m128i)__builtin_lsx_vbitsel_v((v16u8)_1, (v16u8)_2, (v16u8)_3); + } + ++/* Assembly instruction format: vd, vj, ui8. */ ++/* Data types in instruction templates: UV16QI, UV16QI, UV16QI, USI. */ + #define __lsx_vbitseli_b(/*__m128i*/ _1, /*__m128i*/ _2, /*ui8*/ _3) \ + ((__m128i)__builtin_lsx_vbitseli_b((v16u8)(_1), (v16u8)(_2), (_3))) + ++/* Assembly instruction format: vd, vj, ui8. */ ++/* Data types in instruction templates: V16QI, V16QI, USI. */ + #define __lsx_vshuf4i_b(/*__m128i*/ _1, /*ui8*/ _2) \ + ((__m128i)__builtin_lsx_vshuf4i_b((v16i8)(_1), (_2))) + ++/* Assembly instruction format: vd, vj, ui8. */ ++/* Data types in instruction templates: V8HI, V8HI, USI. */ + #define __lsx_vshuf4i_h(/*__m128i*/ _1, /*ui8*/ _2) \ + ((__m128i)__builtin_lsx_vshuf4i_h((v8i16)(_1), (_2))) + ++/* Assembly instruction format: vd, vj, ui8. */ ++/* Data types in instruction templates: V4SI, V4SI, USI. */ + #define __lsx_vshuf4i_w(/*__m128i*/ _1, /*ui8*/ _2) \ + ((__m128i)__builtin_lsx_vshuf4i_w((v4i32)(_1), (_2))) + ++/* Assembly instruction format: vd, rj. */ ++/* Data types in instruction templates: V16QI, SI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vreplgr2vr_b(int _1) { + return (__m128i)__builtin_lsx_vreplgr2vr_b((int)_1); + } + ++/* Assembly instruction format: vd, rj. */ ++/* Data types in instruction templates: V8HI, SI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vreplgr2vr_h(int _1) { + return (__m128i)__builtin_lsx_vreplgr2vr_h((int)_1); + } + ++/* Assembly instruction format: vd, rj. */ ++/* Data types in instruction templates: V4SI, SI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vreplgr2vr_w(int _1) { + return (__m128i)__builtin_lsx_vreplgr2vr_w((int)_1); + } + ++/* Assembly instruction format: vd, rj. */ ++/* Data types in instruction templates: V2DI, DI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vreplgr2vr_d(long int _1) { + return (__m128i)__builtin_lsx_vreplgr2vr_d((long int)_1); + } + ++/* Assembly instruction format: vd, vj. */ ++/* Data types in instruction templates: V16QI, V16QI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vpcnt_b(__m128i _1) { + return (__m128i)__builtin_lsx_vpcnt_b((v16i8)_1); + } + ++/* Assembly instruction format: vd, vj. */ ++/* Data types in instruction templates: V8HI, V8HI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vpcnt_h(__m128i _1) { + return (__m128i)__builtin_lsx_vpcnt_h((v8i16)_1); + } + ++/* Assembly instruction format: vd, vj. */ ++/* Data types in instruction templates: V4SI, V4SI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vpcnt_w(__m128i _1) { + return (__m128i)__builtin_lsx_vpcnt_w((v4i32)_1); + } + ++/* Assembly instruction format: vd, vj. */ ++/* Data types in instruction templates: V2DI, V2DI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vpcnt_d(__m128i _1) { + return (__m128i)__builtin_lsx_vpcnt_d((v2i64)_1); + } + ++/* Assembly instruction format: vd, vj. */ ++/* Data types in instruction templates: V16QI, V16QI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vclo_b(__m128i _1) { + return (__m128i)__builtin_lsx_vclo_b((v16i8)_1); + } + ++/* Assembly instruction format: vd, vj. */ ++/* Data types in instruction templates: V8HI, V8HI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vclo_h(__m128i _1) { + return (__m128i)__builtin_lsx_vclo_h((v8i16)_1); + } + ++/* Assembly instruction format: vd, vj. */ ++/* Data types in instruction templates: V4SI, V4SI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vclo_w(__m128i _1) { + return (__m128i)__builtin_lsx_vclo_w((v4i32)_1); + } + ++/* Assembly instruction format: vd, vj. */ ++/* Data types in instruction templates: V2DI, V2DI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vclo_d(__m128i _1) { + return (__m128i)__builtin_lsx_vclo_d((v2i64)_1); + } + ++/* Assembly instruction format: vd, vj. */ ++/* Data types in instruction templates: V16QI, V16QI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vclz_b(__m128i _1) { + return (__m128i)__builtin_lsx_vclz_b((v16i8)_1); + } + ++/* Assembly instruction format: vd, vj. */ ++/* Data types in instruction templates: V8HI, V8HI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vclz_h(__m128i _1) { + return (__m128i)__builtin_lsx_vclz_h((v8i16)_1); + } + ++/* Assembly instruction format: vd, vj. */ ++/* Data types in instruction templates: V4SI, V4SI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vclz_w(__m128i _1) { + return (__m128i)__builtin_lsx_vclz_w((v4i32)_1); + } + ++/* Assembly instruction format: vd, vj. */ ++/* Data types in instruction templates: V2DI, V2DI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vclz_d(__m128i _1) { + return (__m128i)__builtin_lsx_vclz_d((v2i64)_1); + } + ++/* Assembly instruction format: rd, vj, ui4. */ ++/* Data types in instruction templates: SI, V16QI, UQI. */ + #define __lsx_vpickve2gr_b(/*__m128i*/ _1, /*ui4*/ _2) \ + ((int)__builtin_lsx_vpickve2gr_b((v16i8)(_1), (_2))) + ++/* Assembly instruction format: rd, vj, ui3. */ ++/* Data types in instruction templates: SI, V8HI, UQI. */ + #define __lsx_vpickve2gr_h(/*__m128i*/ _1, /*ui3*/ _2) \ + ((int)__builtin_lsx_vpickve2gr_h((v8i16)(_1), (_2))) + ++/* Assembly instruction format: rd, vj, ui2. */ ++/* Data types in instruction templates: SI, V4SI, UQI. */ + #define __lsx_vpickve2gr_w(/*__m128i*/ _1, /*ui2*/ _2) \ + ((int)__builtin_lsx_vpickve2gr_w((v4i32)(_1), (_2))) + ++/* Assembly instruction format: rd, vj, ui1. */ ++/* Data types in instruction templates: DI, V2DI, UQI. */ + #define __lsx_vpickve2gr_d(/*__m128i*/ _1, /*ui1*/ _2) \ + ((long int)__builtin_lsx_vpickve2gr_d((v2i64)(_1), (_2))) + ++/* Assembly instruction format: rd, vj, ui4. */ ++/* Data types in instruction templates: USI, V16QI, UQI. */ + #define __lsx_vpickve2gr_bu(/*__m128i*/ _1, /*ui4*/ _2) \ + ((unsigned int)__builtin_lsx_vpickve2gr_bu((v16i8)(_1), (_2))) + ++/* Assembly instruction format: rd, vj, ui3. */ ++/* Data types in instruction templates: USI, V8HI, UQI. */ + #define __lsx_vpickve2gr_hu(/*__m128i*/ _1, /*ui3*/ _2) \ + ((unsigned int)__builtin_lsx_vpickve2gr_hu((v8i16)(_1), (_2))) + ++/* Assembly instruction format: rd, vj, ui2. */ ++/* Data types in instruction templates: USI, V4SI, UQI. */ + #define __lsx_vpickve2gr_wu(/*__m128i*/ _1, /*ui2*/ _2) \ + ((unsigned int)__builtin_lsx_vpickve2gr_wu((v4i32)(_1), (_2))) + ++/* Assembly instruction format: rd, vj, ui1. */ ++/* Data types in instruction templates: UDI, V2DI, UQI. */ + #define __lsx_vpickve2gr_du(/*__m128i*/ _1, /*ui1*/ _2) \ + ((unsigned long int)__builtin_lsx_vpickve2gr_du((v2i64)(_1), (_2))) + ++/* Assembly instruction format: vd, rj, ui4. */ ++/* Data types in instruction templates: V16QI, V16QI, SI, UQI. */ + #define __lsx_vinsgr2vr_b(/*__m128i*/ _1, /*int*/ _2, /*ui4*/ _3) \ + ((__m128i)__builtin_lsx_vinsgr2vr_b((v16i8)(_1), (int)(_2), (_3))) + ++/* Assembly instruction format: vd, rj, ui3. */ ++/* Data types in instruction templates: V8HI, V8HI, SI, UQI. */ + #define __lsx_vinsgr2vr_h(/*__m128i*/ _1, /*int*/ _2, /*ui3*/ _3) \ + ((__m128i)__builtin_lsx_vinsgr2vr_h((v8i16)(_1), (int)(_2), (_3))) + ++/* Assembly instruction format: vd, rj, ui2. */ ++/* Data types in instruction templates: V4SI, V4SI, SI, UQI. */ + #define __lsx_vinsgr2vr_w(/*__m128i*/ _1, /*int*/ _2, /*ui2*/ _3) \ + ((__m128i)__builtin_lsx_vinsgr2vr_w((v4i32)(_1), (int)(_2), (_3))) + ++/* Assembly instruction format: vd, rj, ui1. */ ++/* Data types in instruction templates: V2DI, V2DI, DI, UQI. */ + #define __lsx_vinsgr2vr_d(/*__m128i*/ _1, /*long int*/ _2, /*ui1*/ _3) \ + ((__m128i)__builtin_lsx_vinsgr2vr_d((v2i64)(_1), (long int)(_2), (_3))) + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V4SF, V4SF, V4SF. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 + __lsx_vfadd_s(__m128 _1, __m128 _2) { + return (__m128)__builtin_lsx_vfadd_s((v4f32)_1, (v4f32)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DF, V2DF, V2DF. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d + __lsx_vfadd_d(__m128d _1, __m128d _2) { + return (__m128d)__builtin_lsx_vfadd_d((v2f64)_1, (v2f64)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V4SF, V4SF, V4SF. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 + __lsx_vfsub_s(__m128 _1, __m128 _2) { + return (__m128)__builtin_lsx_vfsub_s((v4f32)_1, (v4f32)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DF, V2DF, V2DF. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d + __lsx_vfsub_d(__m128d _1, __m128d _2) { + return (__m128d)__builtin_lsx_vfsub_d((v2f64)_1, (v2f64)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V4SF, V4SF, V4SF. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 + __lsx_vfmul_s(__m128 _1, __m128 _2) { + return (__m128)__builtin_lsx_vfmul_s((v4f32)_1, (v4f32)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DF, V2DF, V2DF. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d + __lsx_vfmul_d(__m128d _1, __m128d _2) { + return (__m128d)__builtin_lsx_vfmul_d((v2f64)_1, (v2f64)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V4SF, V4SF, V4SF. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 + __lsx_vfdiv_s(__m128 _1, __m128 _2) { + return (__m128)__builtin_lsx_vfdiv_s((v4f32)_1, (v4f32)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DF, V2DF, V2DF. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d + __lsx_vfdiv_d(__m128d _1, __m128d _2) { + return (__m128d)__builtin_lsx_vfdiv_d((v2f64)_1, (v2f64)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V8HI, V4SF, V4SF. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vfcvt_h_s(__m128 _1, __m128 _2) { + return (__m128i)__builtin_lsx_vfcvt_h_s((v4f32)_1, (v4f32)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V4SF, V2DF, V2DF. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 + __lsx_vfcvt_s_d(__m128d _1, __m128d _2) { + return (__m128)__builtin_lsx_vfcvt_s_d((v2f64)_1, (v2f64)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V4SF, V4SF, V4SF. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 + __lsx_vfmin_s(__m128 _1, __m128 _2) { + return (__m128)__builtin_lsx_vfmin_s((v4f32)_1, (v4f32)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DF, V2DF, V2DF. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d + __lsx_vfmin_d(__m128d _1, __m128d _2) { + return (__m128d)__builtin_lsx_vfmin_d((v2f64)_1, (v2f64)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V4SF, V4SF, V4SF. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 + __lsx_vfmina_s(__m128 _1, __m128 _2) { + return (__m128)__builtin_lsx_vfmina_s((v4f32)_1, (v4f32)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DF, V2DF, V2DF. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d + __lsx_vfmina_d(__m128d _1, __m128d _2) { + return (__m128d)__builtin_lsx_vfmina_d((v2f64)_1, (v2f64)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V4SF, V4SF, V4SF. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 + __lsx_vfmax_s(__m128 _1, __m128 _2) { + return (__m128)__builtin_lsx_vfmax_s((v4f32)_1, (v4f32)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DF, V2DF, V2DF. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d + __lsx_vfmax_d(__m128d _1, __m128d _2) { + return (__m128d)__builtin_lsx_vfmax_d((v2f64)_1, (v2f64)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V4SF, V4SF, V4SF. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 + __lsx_vfmaxa_s(__m128 _1, __m128 _2) { + return (__m128)__builtin_lsx_vfmaxa_s((v4f32)_1, (v4f32)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DF, V2DF, V2DF. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d + __lsx_vfmaxa_d(__m128d _1, __m128d _2) { + return (__m128d)__builtin_lsx_vfmaxa_d((v2f64)_1, (v2f64)_2); + } + ++/* Assembly instruction format: vd, vj. */ ++/* Data types in instruction templates: V4SI, V4SF. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vfclass_s(__m128 _1) { + return (__m128i)__builtin_lsx_vfclass_s((v4f32)_1); + } + ++/* Assembly instruction format: vd, vj. */ ++/* Data types in instruction templates: V2DI, V2DF. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vfclass_d(__m128d _1) { + return (__m128i)__builtin_lsx_vfclass_d((v2f64)_1); + } + ++/* Assembly instruction format: vd, vj. */ ++/* Data types in instruction templates: V4SF, V4SF. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 + __lsx_vfsqrt_s(__m128 _1) { + return (__m128)__builtin_lsx_vfsqrt_s((v4f32)_1); + } + ++/* Assembly instruction format: vd, vj. */ ++/* Data types in instruction templates: V2DF, V2DF. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d + __lsx_vfsqrt_d(__m128d _1) { + return (__m128d)__builtin_lsx_vfsqrt_d((v2f64)_1); + } + ++/* Assembly instruction format: vd, vj. */ ++/* Data types in instruction templates: V4SF, V4SF. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 + __lsx_vfrecip_s(__m128 _1) { + return (__m128)__builtin_lsx_vfrecip_s((v4f32)_1); + } + ++/* Assembly instruction format: vd, vj. */ ++/* Data types in instruction templates: V2DF, V2DF. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d + __lsx_vfrecip_d(__m128d _1) { + return (__m128d)__builtin_lsx_vfrecip_d((v2f64)_1); + } + ++/* Assembly instruction format: vd, vj. */ ++/* Data types in instruction templates: V4SF, V4SF. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 + __lsx_vfrecipe_s(__m128 _1) { + return (__m128)__builtin_lsx_vfrecipe_s((v4f32)_1); + } + ++/* Assembly instruction format: vd, vj. */ ++/* Data types in instruction templates: V2DF, V2DF. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d + __lsx_vfrecipe_d(__m128d _1) { + return (__m128d)__builtin_lsx_vfrecipe_d((v2f64)_1); + } + ++/* Assembly instruction format: vd, vj. */ ++/* Data types in instruction templates: V4SF, V4SF. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 + __lsx_vfrint_s(__m128 _1) { + return (__m128)__builtin_lsx_vfrint_s((v4f32)_1); + } + ++/* Assembly instruction format: vd, vj. */ ++/* Data types in instruction templates: V2DF, V2DF. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d + __lsx_vfrint_d(__m128d _1) { + return (__m128d)__builtin_lsx_vfrint_d((v2f64)_1); + } + ++/* Assembly instruction format: vd, vj. */ ++/* Data types in instruction templates: V4SF, V4SF. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 + __lsx_vfrsqrt_s(__m128 _1) { + return (__m128)__builtin_lsx_vfrsqrt_s((v4f32)_1); + } + ++/* Assembly instruction format: vd, vj. */ ++/* Data types in instruction templates: V2DF, V2DF. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d + __lsx_vfrsqrt_d(__m128d _1) { + return (__m128d)__builtin_lsx_vfrsqrt_d((v2f64)_1); + } + ++/* Assembly instruction format: vd, vj. */ ++/* Data types in instruction templates: V4SF, V4SF. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 + __lsx_vfrsqrte_s(__m128 _1) { + return (__m128)__builtin_lsx_vfrsqrte_s((v4f32)_1); + } + ++/* Assembly instruction format: vd, vj. */ ++/* Data types in instruction templates: V2DF, V2DF. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d + __lsx_vfrsqrte_d(__m128d _1) { + return (__m128d)__builtin_lsx_vfrsqrte_d((v2f64)_1); + } + ++/* Assembly instruction format: vd, vj. */ ++/* Data types in instruction templates: V4SF, V4SF. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 + __lsx_vflogb_s(__m128 _1) { + return (__m128)__builtin_lsx_vflogb_s((v4f32)_1); + } + ++/* Assembly instruction format: vd, vj. */ ++/* Data types in instruction templates: V2DF, V2DF. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d + __lsx_vflogb_d(__m128d _1) { + return (__m128d)__builtin_lsx_vflogb_d((v2f64)_1); + } + ++/* Assembly instruction format: vd, vj. */ ++/* Data types in instruction templates: V4SF, V8HI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 + __lsx_vfcvth_s_h(__m128i _1) { + return (__m128)__builtin_lsx_vfcvth_s_h((v8i16)_1); + } + ++/* Assembly instruction format: vd, vj. */ ++/* Data types in instruction templates: V2DF, V4SF. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d + __lsx_vfcvth_d_s(__m128 _1) { + return (__m128d)__builtin_lsx_vfcvth_d_s((v4f32)_1); + } + ++/* Assembly instruction format: vd, vj. */ ++/* Data types in instruction templates: V4SF, V8HI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 + __lsx_vfcvtl_s_h(__m128i _1) { + return (__m128)__builtin_lsx_vfcvtl_s_h((v8i16)_1); + } + ++/* Assembly instruction format: vd, vj. */ ++/* Data types in instruction templates: V2DF, V4SF. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d + __lsx_vfcvtl_d_s(__m128 _1) { + return (__m128d)__builtin_lsx_vfcvtl_d_s((v4f32)_1); + } + ++/* Assembly instruction format: vd, vj. */ ++/* Data types in instruction templates: V4SI, V4SF. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vftint_w_s(__m128 _1) { + return (__m128i)__builtin_lsx_vftint_w_s((v4f32)_1); + } + ++/* Assembly instruction format: vd, vj. */ ++/* Data types in instruction templates: V2DI, V2DF. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vftint_l_d(__m128d _1) { + return (__m128i)__builtin_lsx_vftint_l_d((v2f64)_1); + } + ++/* Assembly instruction format: vd, vj. */ ++/* Data types in instruction templates: UV4SI, V4SF. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vftint_wu_s(__m128 _1) { + return (__m128i)__builtin_lsx_vftint_wu_s((v4f32)_1); + } + ++/* Assembly instruction format: vd, vj. */ ++/* Data types in instruction templates: UV2DI, V2DF. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vftint_lu_d(__m128d _1) { + return (__m128i)__builtin_lsx_vftint_lu_d((v2f64)_1); + } + ++/* Assembly instruction format: vd, vj. */ ++/* Data types in instruction templates: V4SI, V4SF. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vftintrz_w_s(__m128 _1) { + return (__m128i)__builtin_lsx_vftintrz_w_s((v4f32)_1); + } + ++/* Assembly instruction format: vd, vj. */ ++/* Data types in instruction templates: V2DI, V2DF. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vftintrz_l_d(__m128d _1) { + return (__m128i)__builtin_lsx_vftintrz_l_d((v2f64)_1); + } + ++/* Assembly instruction format: vd, vj. */ ++/* Data types in instruction templates: UV4SI, V4SF. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vftintrz_wu_s(__m128 _1) { + return (__m128i)__builtin_lsx_vftintrz_wu_s((v4f32)_1); + } + ++/* Assembly instruction format: vd, vj. */ ++/* Data types in instruction templates: UV2DI, V2DF. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vftintrz_lu_d(__m128d _1) { + return (__m128i)__builtin_lsx_vftintrz_lu_d((v2f64)_1); + } + ++/* Assembly instruction format: vd, vj. */ ++/* Data types in instruction templates: V4SF, V4SI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 + __lsx_vffint_s_w(__m128i _1) { + return (__m128)__builtin_lsx_vffint_s_w((v4i32)_1); + } + ++/* Assembly instruction format: vd, vj. */ ++/* Data types in instruction templates: V2DF, V2DI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d + __lsx_vffint_d_l(__m128i _1) { + return (__m128d)__builtin_lsx_vffint_d_l((v2i64)_1); + } + ++/* Assembly instruction format: vd, vj. */ ++/* Data types in instruction templates: V4SF, UV4SI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 + __lsx_vffint_s_wu(__m128i _1) { + return (__m128)__builtin_lsx_vffint_s_wu((v4u32)_1); + } + ++/* Assembly instruction format: vd, vj. */ ++/* Data types in instruction templates: V2DF, UV2DI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d + __lsx_vffint_d_lu(__m128i _1) { + return (__m128d)__builtin_lsx_vffint_d_lu((v2u64)_1); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: UV16QI, UV16QI, UV16QI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vandn_v(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vandn_v((v16u8)_1, (v16u8)_2); + } + ++/* Assembly instruction format: vd, vj. */ ++/* Data types in instruction templates: V16QI, V16QI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vneg_b(__m128i _1) { + return (__m128i)__builtin_lsx_vneg_b((v16i8)_1); + } + ++/* Assembly instruction format: vd, vj. */ ++/* Data types in instruction templates: V8HI, V8HI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vneg_h(__m128i _1) { + return (__m128i)__builtin_lsx_vneg_h((v8i16)_1); + } + ++/* Assembly instruction format: vd, vj. */ ++/* Data types in instruction templates: V4SI, V4SI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vneg_w(__m128i _1) { + return (__m128i)__builtin_lsx_vneg_w((v4i32)_1); + } + ++/* Assembly instruction format: vd, vj. */ ++/* Data types in instruction templates: V2DI, V2DI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vneg_d(__m128i _1) { + return (__m128i)__builtin_lsx_vneg_d((v2i64)_1); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V16QI, V16QI, V16QI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmuh_b(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmuh_b((v16i8)_1, (v16i8)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V8HI, V8HI, V8HI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmuh_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmuh_h((v8i16)_1, (v8i16)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V4SI, V4SI, V4SI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmuh_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmuh_w((v4i32)_1, (v4i32)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DI, V2DI, V2DI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmuh_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmuh_d((v2i64)_1, (v2i64)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: UV16QI, UV16QI, UV16QI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmuh_bu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmuh_bu((v16u8)_1, (v16u8)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: UV8HI, UV8HI, UV8HI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmuh_hu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmuh_hu((v8u16)_1, (v8u16)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: UV4SI, UV4SI, UV4SI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmuh_wu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmuh_wu((v4u32)_1, (v4u32)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: UV2DI, UV2DI, UV2DI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmuh_du(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmuh_du((v2u64)_1, (v2u64)_2); + } + ++/* Assembly instruction format: vd, vj, ui3. */ ++/* Data types in instruction templates: V8HI, V16QI, UQI. */ + #define __lsx_vsllwil_h_b(/*__m128i*/ _1, /*ui3*/ _2) \ + ((__m128i)__builtin_lsx_vsllwil_h_b((v16i8)(_1), (_2))) + ++/* Assembly instruction format: vd, vj, ui4. */ ++/* Data types in instruction templates: V4SI, V8HI, UQI. */ + #define __lsx_vsllwil_w_h(/*__m128i*/ _1, /*ui4*/ _2) \ + ((__m128i)__builtin_lsx_vsllwil_w_h((v8i16)(_1), (_2))) + ++/* Assembly instruction format: vd, vj, ui5. */ ++/* Data types in instruction templates: V2DI, V4SI, UQI. */ + #define __lsx_vsllwil_d_w(/*__m128i*/ _1, /*ui5*/ _2) \ + ((__m128i)__builtin_lsx_vsllwil_d_w((v4i32)(_1), (_2))) + ++/* Assembly instruction format: vd, vj, ui3. */ ++/* Data types in instruction templates: UV8HI, UV16QI, UQI. */ + #define __lsx_vsllwil_hu_bu(/*__m128i*/ _1, /*ui3*/ _2) \ + ((__m128i)__builtin_lsx_vsllwil_hu_bu((v16u8)(_1), (_2))) + ++/* Assembly instruction format: vd, vj, ui4. */ ++/* Data types in instruction templates: UV4SI, UV8HI, UQI. */ + #define __lsx_vsllwil_wu_hu(/*__m128i*/ _1, /*ui4*/ _2) \ + ((__m128i)__builtin_lsx_vsllwil_wu_hu((v8u16)(_1), (_2))) + ++/* Assembly instruction format: vd, vj, ui5. */ ++/* Data types in instruction templates: UV2DI, UV4SI, UQI. */ + #define __lsx_vsllwil_du_wu(/*__m128i*/ _1, /*ui5*/ _2) \ + ((__m128i)__builtin_lsx_vsllwil_du_wu((v4u32)(_1), (_2))) + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V16QI, V8HI, V8HI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsran_b_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsran_b_h((v8i16)_1, (v8i16)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V8HI, V4SI, V4SI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsran_h_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsran_h_w((v4i32)_1, (v4i32)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V4SI, V2DI, V2DI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsran_w_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsran_w_d((v2i64)_1, (v2i64)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V16QI, V8HI, V8HI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vssran_b_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vssran_b_h((v8i16)_1, (v8i16)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V8HI, V4SI, V4SI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vssran_h_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vssran_h_w((v4i32)_1, (v4i32)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V4SI, V2DI, V2DI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vssran_w_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vssran_w_d((v2i64)_1, (v2i64)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: UV16QI, UV8HI, UV8HI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vssran_bu_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vssran_bu_h((v8u16)_1, (v8u16)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: UV8HI, UV4SI, UV4SI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vssran_hu_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vssran_hu_w((v4u32)_1, (v4u32)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: UV4SI, UV2DI, UV2DI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vssran_wu_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vssran_wu_d((v2u64)_1, (v2u64)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V16QI, V8HI, V8HI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsrarn_b_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsrarn_b_h((v8i16)_1, (v8i16)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V8HI, V4SI, V4SI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsrarn_h_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsrarn_h_w((v4i32)_1, (v4i32)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V4SI, V2DI, V2DI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsrarn_w_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsrarn_w_d((v2i64)_1, (v2i64)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V16QI, V8HI, V8HI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vssrarn_b_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vssrarn_b_h((v8i16)_1, (v8i16)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V8HI, V4SI, V4SI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vssrarn_h_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vssrarn_h_w((v4i32)_1, (v4i32)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V4SI, V2DI, V2DI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vssrarn_w_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vssrarn_w_d((v2i64)_1, (v2i64)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: UV16QI, UV8HI, UV8HI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vssrarn_bu_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vssrarn_bu_h((v8u16)_1, (v8u16)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: UV8HI, UV4SI, UV4SI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vssrarn_hu_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vssrarn_hu_w((v4u32)_1, (v4u32)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: UV4SI, UV2DI, UV2DI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vssrarn_wu_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vssrarn_wu_d((v2u64)_1, (v2u64)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V16QI, V8HI, V8HI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsrln_b_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsrln_b_h((v8i16)_1, (v8i16)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V8HI, V4SI, V4SI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsrln_h_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsrln_h_w((v4i32)_1, (v4i32)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V4SI, V2DI, V2DI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsrln_w_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsrln_w_d((v2i64)_1, (v2i64)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: UV16QI, UV8HI, UV8HI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vssrln_bu_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vssrln_bu_h((v8u16)_1, (v8u16)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: UV8HI, UV4SI, UV4SI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vssrln_hu_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vssrln_hu_w((v4u32)_1, (v4u32)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: UV4SI, UV2DI, UV2DI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vssrln_wu_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vssrln_wu_d((v2u64)_1, (v2u64)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V16QI, V8HI, V8HI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsrlrn_b_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsrlrn_b_h((v8i16)_1, (v8i16)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V8HI, V4SI, V4SI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsrlrn_h_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsrlrn_h_w((v4i32)_1, (v4i32)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V4SI, V2DI, V2DI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsrlrn_w_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsrlrn_w_d((v2i64)_1, (v2i64)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: UV16QI, UV8HI, UV8HI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vssrlrn_bu_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vssrlrn_bu_h((v8u16)_1, (v8u16)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: UV8HI, UV4SI, UV4SI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vssrlrn_hu_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vssrlrn_hu_w((v4u32)_1, (v4u32)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: UV4SI, UV2DI, UV2DI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vssrlrn_wu_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vssrlrn_wu_d((v2u64)_1, (v2u64)_2); + } + ++/* Assembly instruction format: vd, vj, ui5. */ ++/* Data types in instruction templates: V16QI, V16QI, V16QI, UQI. */ + #define __lsx_vfrstpi_b(/*__m128i*/ _1, /*__m128i*/ _2, /*ui5*/ _3) \ + ((__m128i)__builtin_lsx_vfrstpi_b((v16i8)(_1), (v16i8)(_2), (_3))) + ++/* Assembly instruction format: vd, vj, ui5. */ ++/* Data types in instruction templates: V8HI, V8HI, V8HI, UQI. */ + #define __lsx_vfrstpi_h(/*__m128i*/ _1, /*__m128i*/ _2, /*ui5*/ _3) \ + ((__m128i)__builtin_lsx_vfrstpi_h((v8i16)(_1), (v8i16)(_2), (_3))) + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V16QI, V16QI, V16QI, V16QI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vfrstp_b(__m128i _1, __m128i _2, __m128i _3) { + return (__m128i)__builtin_lsx_vfrstp_b((v16i8)_1, (v16i8)_2, (v16i8)_3); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V8HI, V8HI, V8HI, V8HI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vfrstp_h(__m128i _1, __m128i _2, __m128i _3) { + return (__m128i)__builtin_lsx_vfrstp_h((v8i16)_1, (v8i16)_2, (v8i16)_3); + } + ++/* Assembly instruction format: vd, vj, ui8. */ ++/* Data types in instruction templates: V2DI, V2DI, V2DI, USI. */ + #define __lsx_vshuf4i_d(/*__m128i*/ _1, /*__m128i*/ _2, /*ui8*/ _3) \ + ((__m128i)__builtin_lsx_vshuf4i_d((v2i64)(_1), (v2i64)(_2), (_3))) + ++/* Assembly instruction format: vd, vj, ui5. */ ++/* Data types in instruction templates: V16QI, V16QI, UQI. */ + #define __lsx_vbsrl_v(/*__m128i*/ _1, /*ui5*/ _2) \ + ((__m128i)__builtin_lsx_vbsrl_v((v16i8)(_1), (_2))) + ++/* Assembly instruction format: vd, vj, ui5. */ ++/* Data types in instruction templates: V16QI, V16QI, UQI. */ + #define __lsx_vbsll_v(/*__m128i*/ _1, /*ui5*/ _2) \ + ((__m128i)__builtin_lsx_vbsll_v((v16i8)(_1), (_2))) + ++/* Assembly instruction format: vd, vj, ui8. */ ++/* Data types in instruction templates: V16QI, V16QI, V16QI, USI. */ + #define __lsx_vextrins_b(/*__m128i*/ _1, /*__m128i*/ _2, /*ui8*/ _3) \ + ((__m128i)__builtin_lsx_vextrins_b((v16i8)(_1), (v16i8)(_2), (_3))) + ++/* Assembly instruction format: vd, vj, ui8. */ ++/* Data types in instruction templates: V8HI, V8HI, V8HI, USI. */ + #define __lsx_vextrins_h(/*__m128i*/ _1, /*__m128i*/ _2, /*ui8*/ _3) \ + ((__m128i)__builtin_lsx_vextrins_h((v8i16)(_1), (v8i16)(_2), (_3))) + ++/* Assembly instruction format: vd, vj, ui8. */ ++/* Data types in instruction templates: V4SI, V4SI, V4SI, USI. */ + #define __lsx_vextrins_w(/*__m128i*/ _1, /*__m128i*/ _2, /*ui8*/ _3) \ + ((__m128i)__builtin_lsx_vextrins_w((v4i32)(_1), (v4i32)(_2), (_3))) + ++/* Assembly instruction format: vd, vj, ui8. */ ++/* Data types in instruction templates: V2DI, V2DI, V2DI, USI. */ + #define __lsx_vextrins_d(/*__m128i*/ _1, /*__m128i*/ _2, /*ui8*/ _3) \ + ((__m128i)__builtin_lsx_vextrins_d((v2i64)(_1), (v2i64)(_2), (_3))) + ++/* Assembly instruction format: vd, vj. */ ++/* Data types in instruction templates: V16QI, V16QI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmskltz_b(__m128i _1) { + return (__m128i)__builtin_lsx_vmskltz_b((v16i8)_1); + } + ++/* Assembly instruction format: vd, vj. */ ++/* Data types in instruction templates: V8HI, V8HI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmskltz_h(__m128i _1) { + return (__m128i)__builtin_lsx_vmskltz_h((v8i16)_1); + } + ++/* Assembly instruction format: vd, vj. */ ++/* Data types in instruction templates: V4SI, V4SI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmskltz_w(__m128i _1) { + return (__m128i)__builtin_lsx_vmskltz_w((v4i32)_1); + } + ++/* Assembly instruction format: vd, vj. */ ++/* Data types in instruction templates: V2DI, V2DI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmskltz_d(__m128i _1) { + return (__m128i)__builtin_lsx_vmskltz_d((v2i64)_1); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V16QI, V16QI, V16QI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsigncov_b(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsigncov_b((v16i8)_1, (v16i8)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V8HI, V8HI, V8HI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsigncov_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsigncov_h((v8i16)_1, (v8i16)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V4SI, V4SI, V4SI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsigncov_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsigncov_w((v4i32)_1, (v4i32)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DI, V2DI, V2DI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsigncov_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsigncov_d((v2i64)_1, (v2i64)_2); + } + ++/* Assembly instruction format: vd, vj, vk, va. */ ++/* Data types in instruction templates: V4SF, V4SF, V4SF, V4SF. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 + __lsx_vfmadd_s(__m128 _1, __m128 _2, __m128 _3) { + return (__m128)__builtin_lsx_vfmadd_s((v4f32)_1, (v4f32)_2, (v4f32)_3); + } + ++/* Assembly instruction format: vd, vj, vk, va. */ ++/* Data types in instruction templates: V2DF, V2DF, V2DF, V2DF. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d + __lsx_vfmadd_d(__m128d _1, __m128d _2, __m128d _3) { + return (__m128d)__builtin_lsx_vfmadd_d((v2f64)_1, (v2f64)_2, (v2f64)_3); + } + ++/* Assembly instruction format: vd, vj, vk, va. */ ++/* Data types in instruction templates: V4SF, V4SF, V4SF, V4SF. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 + __lsx_vfmsub_s(__m128 _1, __m128 _2, __m128 _3) { + return (__m128)__builtin_lsx_vfmsub_s((v4f32)_1, (v4f32)_2, (v4f32)_3); + } + ++/* Assembly instruction format: vd, vj, vk, va. */ ++/* Data types in instruction templates: V2DF, V2DF, V2DF, V2DF. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d + __lsx_vfmsub_d(__m128d _1, __m128d _2, __m128d _3) { + return (__m128d)__builtin_lsx_vfmsub_d((v2f64)_1, (v2f64)_2, (v2f64)_3); + } + ++/* Assembly instruction format: vd, vj, vk, va. */ ++/* Data types in instruction templates: V4SF, V4SF, V4SF, V4SF. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 + __lsx_vfnmadd_s(__m128 _1, __m128 _2, __m128 _3) { + return (__m128)__builtin_lsx_vfnmadd_s((v4f32)_1, (v4f32)_2, (v4f32)_3); + } + ++/* Assembly instruction format: vd, vj, vk, va. */ ++/* Data types in instruction templates: V2DF, V2DF, V2DF, V2DF. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d + __lsx_vfnmadd_d(__m128d _1, __m128d _2, __m128d _3) { + return (__m128d)__builtin_lsx_vfnmadd_d((v2f64)_1, (v2f64)_2, (v2f64)_3); + } + ++/* Assembly instruction format: vd, vj, vk, va. */ ++/* Data types in instruction templates: V4SF, V4SF, V4SF, V4SF. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 + __lsx_vfnmsub_s(__m128 _1, __m128 _2, __m128 _3) { + return (__m128)__builtin_lsx_vfnmsub_s((v4f32)_1, (v4f32)_2, (v4f32)_3); + } + ++/* Assembly instruction format: vd, vj, vk, va. */ ++/* Data types in instruction templates: V2DF, V2DF, V2DF, V2DF. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d + __lsx_vfnmsub_d(__m128d _1, __m128d _2, __m128d _3) { + return (__m128d)__builtin_lsx_vfnmsub_d((v2f64)_1, (v2f64)_2, (v2f64)_3); + } + ++/* Assembly instruction format: vd, vj. */ ++/* Data types in instruction templates: V4SI, V4SF. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vftintrne_w_s(__m128 _1) { + return (__m128i)__builtin_lsx_vftintrne_w_s((v4f32)_1); + } + ++/* Assembly instruction format: vd, vj. */ ++/* Data types in instruction templates: V2DI, V2DF. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vftintrne_l_d(__m128d _1) { + return (__m128i)__builtin_lsx_vftintrne_l_d((v2f64)_1); + } + ++/* Assembly instruction format: vd, vj. */ ++/* Data types in instruction templates: V4SI, V4SF. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vftintrp_w_s(__m128 _1) { + return (__m128i)__builtin_lsx_vftintrp_w_s((v4f32)_1); + } + ++/* Assembly instruction format: vd, vj. */ ++/* Data types in instruction templates: V2DI, V2DF. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vftintrp_l_d(__m128d _1) { + return (__m128i)__builtin_lsx_vftintrp_l_d((v2f64)_1); + } + ++/* Assembly instruction format: vd, vj. */ ++/* Data types in instruction templates: V4SI, V4SF. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vftintrm_w_s(__m128 _1) { + return (__m128i)__builtin_lsx_vftintrm_w_s((v4f32)_1); + } + ++/* Assembly instruction format: vd, vj. */ ++/* Data types in instruction templates: V2DI, V2DF. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vftintrm_l_d(__m128d _1) { + return (__m128i)__builtin_lsx_vftintrm_l_d((v2f64)_1); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V4SI, V2DF, V2DF. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vftint_w_d(__m128d _1, __m128d _2) { + return (__m128i)__builtin_lsx_vftint_w_d((v2f64)_1, (v2f64)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V4SF, V2DI, V2DI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 + __lsx_vffint_s_l(__m128i _1, __m128i _2) { + return (__m128)__builtin_lsx_vffint_s_l((v2i64)_1, (v2i64)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V4SI, V2DF, V2DF. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vftintrz_w_d(__m128d _1, __m128d _2) { + return (__m128i)__builtin_lsx_vftintrz_w_d((v2f64)_1, (v2f64)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V4SI, V2DF, V2DF. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vftintrp_w_d(__m128d _1, __m128d _2) { + return (__m128i)__builtin_lsx_vftintrp_w_d((v2f64)_1, (v2f64)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V4SI, V2DF, V2DF. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vftintrm_w_d(__m128d _1, __m128d _2) { + return (__m128i)__builtin_lsx_vftintrm_w_d((v2f64)_1, (v2f64)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V4SI, V2DF, V2DF. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vftintrne_w_d(__m128d _1, __m128d _2) { + return (__m128i)__builtin_lsx_vftintrne_w_d((v2f64)_1, (v2f64)_2); + } + ++/* Assembly instruction format: vd, vj. */ ++/* Data types in instruction templates: V2DI, V4SF. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vftintl_l_s(__m128 _1) { + return (__m128i)__builtin_lsx_vftintl_l_s((v4f32)_1); + } + ++/* Assembly instruction format: vd, vj. */ ++/* Data types in instruction templates: V2DI, V4SF. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vftinth_l_s(__m128 _1) { + return (__m128i)__builtin_lsx_vftinth_l_s((v4f32)_1); + } + ++/* Assembly instruction format: vd, vj. */ ++/* Data types in instruction templates: V2DF, V4SI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d + __lsx_vffinth_d_w(__m128i _1) { + return (__m128d)__builtin_lsx_vffinth_d_w((v4i32)_1); + } + ++/* Assembly instruction format: vd, vj. */ ++/* Data types in instruction templates: V2DF, V4SI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d + __lsx_vffintl_d_w(__m128i _1) { + return (__m128d)__builtin_lsx_vffintl_d_w((v4i32)_1); + } + ++/* Assembly instruction format: vd, vj. */ ++/* Data types in instruction templates: V2DI, V4SF. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vftintrzl_l_s(__m128 _1) { + return (__m128i)__builtin_lsx_vftintrzl_l_s((v4f32)_1); + } + ++/* Assembly instruction format: vd, vj. */ ++/* Data types in instruction templates: V2DI, V4SF. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vftintrzh_l_s(__m128 _1) { + return (__m128i)__builtin_lsx_vftintrzh_l_s((v4f32)_1); + } + ++/* Assembly instruction format: vd, vj. */ ++/* Data types in instruction templates: V2DI, V4SF. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vftintrpl_l_s(__m128 _1) { + return (__m128i)__builtin_lsx_vftintrpl_l_s((v4f32)_1); + } + ++/* Assembly instruction format: vd, vj. */ ++/* Data types in instruction templates: V2DI, V4SF. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vftintrph_l_s(__m128 _1) { + return (__m128i)__builtin_lsx_vftintrph_l_s((v4f32)_1); + } + ++/* Assembly instruction format: vd, vj. */ ++/* Data types in instruction templates: V2DI, V4SF. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vftintrml_l_s(__m128 _1) { + return (__m128i)__builtin_lsx_vftintrml_l_s((v4f32)_1); + } + ++/* Assembly instruction format: vd, vj. */ ++/* Data types in instruction templates: V2DI, V4SF. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vftintrmh_l_s(__m128 _1) { + return (__m128i)__builtin_lsx_vftintrmh_l_s((v4f32)_1); + } + ++/* Assembly instruction format: vd, vj. */ ++/* Data types in instruction templates: V2DI, V4SF. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vftintrnel_l_s(__m128 _1) { + return (__m128i)__builtin_lsx_vftintrnel_l_s((v4f32)_1); + } + ++/* Assembly instruction format: vd, vj. */ ++/* Data types in instruction templates: V2DI, V4SF. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vftintrneh_l_s(__m128 _1) { + return (__m128i)__builtin_lsx_vftintrneh_l_s((v4f32)_1); + } + ++/* Assembly instruction format: vd, vj. */ ++/* Data types in instruction templates: V4SF, V4SF. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 + __lsx_vfrintrne_s(__m128 _1) { + return (__m128)__builtin_lsx_vfrintrne_s((v4f32)_1); + } + ++/* Assembly instruction format: vd, vj. */ ++/* Data types in instruction templates: V2DF, V2DF. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d + __lsx_vfrintrne_d(__m128d _1) { + return (__m128d)__builtin_lsx_vfrintrne_d((v2f64)_1); + } + ++/* Assembly instruction format: vd, vj. */ ++/* Data types in instruction templates: V4SF, V4SF. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 + __lsx_vfrintrz_s(__m128 _1) { + return (__m128)__builtin_lsx_vfrintrz_s((v4f32)_1); + } + ++/* Assembly instruction format: vd, vj. */ ++/* Data types in instruction templates: V2DF, V2DF. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d + __lsx_vfrintrz_d(__m128d _1) { + return (__m128d)__builtin_lsx_vfrintrz_d((v2f64)_1); + } + ++/* Assembly instruction format: vd, vj. */ ++/* Data types in instruction templates: V4SF, V4SF. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 + __lsx_vfrintrp_s(__m128 _1) { + return (__m128)__builtin_lsx_vfrintrp_s((v4f32)_1); + } + ++/* Assembly instruction format: vd, vj. */ ++/* Data types in instruction templates: V2DF, V2DF. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d + __lsx_vfrintrp_d(__m128d _1) { + return (__m128d)__builtin_lsx_vfrintrp_d((v2f64)_1); + } + ++/* Assembly instruction format: vd, vj. */ ++/* Data types in instruction templates: V4SF, V4SF. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 + __lsx_vfrintrm_s(__m128 _1) { + return (__m128)__builtin_lsx_vfrintrm_s((v4f32)_1); + } + ++/* Assembly instruction format: vd, vj. */ ++/* Data types in instruction templates: V2DF, V2DF. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d + __lsx_vfrintrm_d(__m128d _1) { + return (__m128d)__builtin_lsx_vfrintrm_d((v2f64)_1); + } + ++/* Assembly instruction format: vd, rj, si8, idx. */ ++/* Data types in instruction templates: VOID, V16QI, CVPOINTER, SI, UQI. */ + #define __lsx_vstelm_b(/*__m128i*/ _1, /*void **/ _2, /*si8*/ _3, /*idx*/ _4) \ + ((void)__builtin_lsx_vstelm_b((v16i8)(_1), (void *)(_2), (_3), (_4))) + ++/* Assembly instruction format: vd, rj, si8, idx. */ ++/* Data types in instruction templates: VOID, V8HI, CVPOINTER, SI, UQI. */ + #define __lsx_vstelm_h(/*__m128i*/ _1, /*void **/ _2, /*si8*/ _3, /*idx*/ _4) \ + ((void)__builtin_lsx_vstelm_h((v8i16)(_1), (void *)(_2), (_3), (_4))) + ++/* Assembly instruction format: vd, rj, si8, idx. */ ++/* Data types in instruction templates: VOID, V4SI, CVPOINTER, SI, UQI. */ + #define __lsx_vstelm_w(/*__m128i*/ _1, /*void **/ _2, /*si8*/ _3, /*idx*/ _4) \ + ((void)__builtin_lsx_vstelm_w((v4i32)(_1), (void *)(_2), (_3), (_4))) + ++/* Assembly instruction format: vd, rj, si8, idx. */ ++/* Data types in instruction templates: VOID, V2DI, CVPOINTER, SI, UQI. */ + #define __lsx_vstelm_d(/*__m128i*/ _1, /*void **/ _2, /*si8*/ _3, /*idx*/ _4) \ + ((void)__builtin_lsx_vstelm_d((v2i64)(_1), (void *)(_2), (_3), (_4))) + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DI, V4SI, V4SI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vaddwev_d_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vaddwev_d_w((v4i32)_1, (v4i32)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V4SI, V8HI, V8HI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vaddwev_w_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vaddwev_w_h((v8i16)_1, (v8i16)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V8HI, V16QI, V16QI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vaddwev_h_b(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vaddwev_h_b((v16i8)_1, (v16i8)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DI, V4SI, V4SI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vaddwod_d_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vaddwod_d_w((v4i32)_1, (v4i32)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V4SI, V8HI, V8HI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vaddwod_w_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vaddwod_w_h((v8i16)_1, (v8i16)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V8HI, V16QI, V16QI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vaddwod_h_b(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vaddwod_h_b((v16i8)_1, (v16i8)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DI, UV4SI, UV4SI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vaddwev_d_wu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vaddwev_d_wu((v4u32)_1, (v4u32)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V4SI, UV8HI, UV8HI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vaddwev_w_hu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vaddwev_w_hu((v8u16)_1, (v8u16)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V8HI, UV16QI, UV16QI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vaddwev_h_bu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vaddwev_h_bu((v16u8)_1, (v16u8)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DI, UV4SI, UV4SI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vaddwod_d_wu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vaddwod_d_wu((v4u32)_1, (v4u32)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V4SI, UV8HI, UV8HI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vaddwod_w_hu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vaddwod_w_hu((v8u16)_1, (v8u16)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V8HI, UV16QI, UV16QI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vaddwod_h_bu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vaddwod_h_bu((v16u8)_1, (v16u8)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DI, UV4SI, V4SI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vaddwev_d_wu_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vaddwev_d_wu_w((v4u32)_1, (v4i32)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V4SI, UV8HI, V8HI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vaddwev_w_hu_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vaddwev_w_hu_h((v8u16)_1, (v8i16)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V8HI, UV16QI, V16QI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vaddwev_h_bu_b(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vaddwev_h_bu_b((v16u8)_1, (v16i8)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DI, UV4SI, V4SI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vaddwod_d_wu_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vaddwod_d_wu_w((v4u32)_1, (v4i32)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V4SI, UV8HI, V8HI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vaddwod_w_hu_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vaddwod_w_hu_h((v8u16)_1, (v8i16)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V8HI, UV16QI, V16QI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vaddwod_h_bu_b(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vaddwod_h_bu_b((v16u8)_1, (v16i8)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DI, V4SI, V4SI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsubwev_d_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsubwev_d_w((v4i32)_1, (v4i32)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V4SI, V8HI, V8HI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsubwev_w_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsubwev_w_h((v8i16)_1, (v8i16)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V8HI, V16QI, V16QI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsubwev_h_b(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsubwev_h_b((v16i8)_1, (v16i8)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DI, V4SI, V4SI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsubwod_d_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsubwod_d_w((v4i32)_1, (v4i32)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V4SI, V8HI, V8HI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsubwod_w_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsubwod_w_h((v8i16)_1, (v8i16)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V8HI, V16QI, V16QI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsubwod_h_b(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsubwod_h_b((v16i8)_1, (v16i8)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DI, UV4SI, UV4SI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsubwev_d_wu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsubwev_d_wu((v4u32)_1, (v4u32)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V4SI, UV8HI, UV8HI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsubwev_w_hu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsubwev_w_hu((v8u16)_1, (v8u16)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V8HI, UV16QI, UV16QI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsubwev_h_bu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsubwev_h_bu((v16u8)_1, (v16u8)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DI, UV4SI, UV4SI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsubwod_d_wu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsubwod_d_wu((v4u32)_1, (v4u32)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V4SI, UV8HI, UV8HI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsubwod_w_hu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsubwod_w_hu((v8u16)_1, (v8u16)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V8HI, UV16QI, UV16QI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsubwod_h_bu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsubwod_h_bu((v16u8)_1, (v16u8)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DI, V2DI, V2DI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vaddwev_q_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vaddwev_q_d((v2i64)_1, (v2i64)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DI, V2DI, V2DI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vaddwod_q_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vaddwod_q_d((v2i64)_1, (v2i64)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DI, UV2DI, UV2DI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vaddwev_q_du(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vaddwev_q_du((v2u64)_1, (v2u64)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DI, UV2DI, UV2DI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vaddwod_q_du(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vaddwod_q_du((v2u64)_1, (v2u64)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DI, V2DI, V2DI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsubwev_q_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsubwev_q_d((v2i64)_1, (v2i64)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DI, V2DI, V2DI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsubwod_q_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsubwod_q_d((v2i64)_1, (v2i64)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DI, UV2DI, UV2DI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsubwev_q_du(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsubwev_q_du((v2u64)_1, (v2u64)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DI, UV2DI, UV2DI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsubwod_q_du(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsubwod_q_du((v2u64)_1, (v2u64)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DI, UV2DI, V2DI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vaddwev_q_du_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vaddwev_q_du_d((v2u64)_1, (v2i64)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DI, UV2DI, V2DI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vaddwod_q_du_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vaddwod_q_du_d((v2u64)_1, (v2i64)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DI, V4SI, V4SI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmulwev_d_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmulwev_d_w((v4i32)_1, (v4i32)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V4SI, V8HI, V8HI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmulwev_w_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmulwev_w_h((v8i16)_1, (v8i16)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V8HI, V16QI, V16QI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmulwev_h_b(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmulwev_h_b((v16i8)_1, (v16i8)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DI, V4SI, V4SI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmulwod_d_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmulwod_d_w((v4i32)_1, (v4i32)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V4SI, V8HI, V8HI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmulwod_w_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmulwod_w_h((v8i16)_1, (v8i16)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V8HI, V16QI, V16QI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmulwod_h_b(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmulwod_h_b((v16i8)_1, (v16i8)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DI, UV4SI, UV4SI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmulwev_d_wu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmulwev_d_wu((v4u32)_1, (v4u32)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V4SI, UV8HI, UV8HI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmulwev_w_hu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmulwev_w_hu((v8u16)_1, (v8u16)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V8HI, UV16QI, UV16QI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmulwev_h_bu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmulwev_h_bu((v16u8)_1, (v16u8)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DI, UV4SI, UV4SI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmulwod_d_wu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmulwod_d_wu((v4u32)_1, (v4u32)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V4SI, UV8HI, UV8HI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmulwod_w_hu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmulwod_w_hu((v8u16)_1, (v8u16)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V8HI, UV16QI, UV16QI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmulwod_h_bu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmulwod_h_bu((v16u8)_1, (v16u8)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DI, UV4SI, V4SI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmulwev_d_wu_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmulwev_d_wu_w((v4u32)_1, (v4i32)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V4SI, UV8HI, V8HI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmulwev_w_hu_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmulwev_w_hu_h((v8u16)_1, (v8i16)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V8HI, UV16QI, V16QI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmulwev_h_bu_b(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmulwev_h_bu_b((v16u8)_1, (v16i8)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DI, UV4SI, V4SI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmulwod_d_wu_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmulwod_d_wu_w((v4u32)_1, (v4i32)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V4SI, UV8HI, V8HI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmulwod_w_hu_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmulwod_w_hu_h((v8u16)_1, (v8i16)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V8HI, UV16QI, V16QI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmulwod_h_bu_b(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmulwod_h_bu_b((v16u8)_1, (v16i8)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DI, V2DI, V2DI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmulwev_q_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmulwev_q_d((v2i64)_1, (v2i64)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DI, V2DI, V2DI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmulwod_q_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmulwod_q_d((v2i64)_1, (v2i64)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DI, UV2DI, UV2DI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmulwev_q_du(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmulwev_q_du((v2u64)_1, (v2u64)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DI, UV2DI, UV2DI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmulwod_q_du(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmulwod_q_du((v2u64)_1, (v2u64)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DI, UV2DI, V2DI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmulwev_q_du_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmulwev_q_du_d((v2u64)_1, (v2i64)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DI, UV2DI, V2DI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmulwod_q_du_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmulwod_q_du_d((v2u64)_1, (v2i64)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DI, V2DI, V2DI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vhaddw_q_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vhaddw_q_d((v2i64)_1, (v2i64)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: UV2DI, UV2DI, UV2DI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vhaddw_qu_du(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vhaddw_qu_du((v2u64)_1, (v2u64)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DI, V2DI, V2DI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vhsubw_q_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vhsubw_q_d((v2i64)_1, (v2i64)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: UV2DI, UV2DI, UV2DI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vhsubw_qu_du(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vhsubw_qu_du((v2u64)_1, (v2u64)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DI, V2DI, V4SI, V4SI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmaddwev_d_w(__m128i _1, __m128i _2, __m128i _3) { + return (__m128i)__builtin_lsx_vmaddwev_d_w((v2i64)_1, (v4i32)_2, (v4i32)_3); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V4SI, V4SI, V8HI, V8HI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmaddwev_w_h(__m128i _1, __m128i _2, __m128i _3) { + return (__m128i)__builtin_lsx_vmaddwev_w_h((v4i32)_1, (v8i16)_2, (v8i16)_3); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V8HI, V8HI, V16QI, V16QI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmaddwev_h_b(__m128i _1, __m128i _2, __m128i _3) { + return (__m128i)__builtin_lsx_vmaddwev_h_b((v8i16)_1, (v16i8)_2, (v16i8)_3); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: UV2DI, UV2DI, UV4SI, UV4SI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmaddwev_d_wu(__m128i _1, __m128i _2, __m128i _3) { + return (__m128i)__builtin_lsx_vmaddwev_d_wu((v2u64)_1, (v4u32)_2, (v4u32)_3); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: UV4SI, UV4SI, UV8HI, UV8HI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmaddwev_w_hu(__m128i _1, __m128i _2, __m128i _3) { + return (__m128i)__builtin_lsx_vmaddwev_w_hu((v4u32)_1, (v8u16)_2, (v8u16)_3); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: UV8HI, UV8HI, UV16QI, UV16QI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmaddwev_h_bu(__m128i _1, __m128i _2, __m128i _3) { + return (__m128i)__builtin_lsx_vmaddwev_h_bu((v8u16)_1, (v16u8)_2, (v16u8)_3); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DI, V2DI, V4SI, V4SI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmaddwod_d_w(__m128i _1, __m128i _2, __m128i _3) { + return (__m128i)__builtin_lsx_vmaddwod_d_w((v2i64)_1, (v4i32)_2, (v4i32)_3); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V4SI, V4SI, V8HI, V8HI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmaddwod_w_h(__m128i _1, __m128i _2, __m128i _3) { + return (__m128i)__builtin_lsx_vmaddwod_w_h((v4i32)_1, (v8i16)_2, (v8i16)_3); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V8HI, V8HI, V16QI, V16QI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmaddwod_h_b(__m128i _1, __m128i _2, __m128i _3) { + return (__m128i)__builtin_lsx_vmaddwod_h_b((v8i16)_1, (v16i8)_2, (v16i8)_3); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: UV2DI, UV2DI, UV4SI, UV4SI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmaddwod_d_wu(__m128i _1, __m128i _2, __m128i _3) { + return (__m128i)__builtin_lsx_vmaddwod_d_wu((v2u64)_1, (v4u32)_2, (v4u32)_3); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: UV4SI, UV4SI, UV8HI, UV8HI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmaddwod_w_hu(__m128i _1, __m128i _2, __m128i _3) { + return (__m128i)__builtin_lsx_vmaddwod_w_hu((v4u32)_1, (v8u16)_2, (v8u16)_3); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: UV8HI, UV8HI, UV16QI, UV16QI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmaddwod_h_bu(__m128i _1, __m128i _2, __m128i _3) { + return (__m128i)__builtin_lsx_vmaddwod_h_bu((v8u16)_1, (v16u8)_2, (v16u8)_3); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DI, V2DI, UV4SI, V4SI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmaddwev_d_wu_w(__m128i _1, __m128i _2, __m128i _3) { +@@ -3034,6 +4164,8 @@ extern __inline + (v4i32)_3); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V4SI, V4SI, UV8HI, V8HI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmaddwev_w_hu_h(__m128i _1, __m128i _2, __m128i _3) { +@@ -3041,6 +4173,8 @@ extern __inline + (v8i16)_3); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V8HI, V8HI, UV16QI, V16QI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmaddwev_h_bu_b(__m128i _1, __m128i _2, __m128i _3) { +@@ -3048,6 +4182,8 @@ extern __inline + (v16i8)_3); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DI, V2DI, UV4SI, V4SI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmaddwod_d_wu_w(__m128i _1, __m128i _2, __m128i _3) { +@@ -3055,6 +4191,8 @@ extern __inline + (v4i32)_3); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V4SI, V4SI, UV8HI, V8HI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmaddwod_w_hu_h(__m128i _1, __m128i _2, __m128i _3) { +@@ -3062,6 +4200,8 @@ extern __inline + (v8i16)_3); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V8HI, V8HI, UV16QI, V16QI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmaddwod_h_bu_b(__m128i _1, __m128i _2, __m128i _3) { +@@ -3069,30 +4209,40 @@ extern __inline + (v16i8)_3); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DI, V2DI, V2DI, V2DI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmaddwev_q_d(__m128i _1, __m128i _2, __m128i _3) { + return (__m128i)__builtin_lsx_vmaddwev_q_d((v2i64)_1, (v2i64)_2, (v2i64)_3); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DI, V2DI, V2DI, V2DI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmaddwod_q_d(__m128i _1, __m128i _2, __m128i _3) { + return (__m128i)__builtin_lsx_vmaddwod_q_d((v2i64)_1, (v2i64)_2, (v2i64)_3); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: UV2DI, UV2DI, UV2DI, UV2DI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmaddwev_q_du(__m128i _1, __m128i _2, __m128i _3) { + return (__m128i)__builtin_lsx_vmaddwev_q_du((v2u64)_1, (v2u64)_2, (v2u64)_3); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: UV2DI, UV2DI, UV2DI, UV2DI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmaddwod_q_du(__m128i _1, __m128i _2, __m128i _3) { + return (__m128i)__builtin_lsx_vmaddwod_q_du((v2u64)_1, (v2u64)_2, (v2u64)_3); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DI, V2DI, UV2DI, V2DI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmaddwev_q_du_d(__m128i _1, __m128i _2, __m128i _3) { +@@ -3100,6 +4250,8 @@ extern __inline + (v2i64)_3); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DI, V2DI, UV2DI, V2DI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmaddwod_q_du_d(__m128i _1, __m128i _2, __m128i _3) { +@@ -3107,644 +4259,936 @@ extern __inline + (v2i64)_3); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V16QI, V16QI, V16QI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vrotr_b(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vrotr_b((v16i8)_1, (v16i8)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V8HI, V8HI, V8HI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vrotr_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vrotr_h((v8i16)_1, (v8i16)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V4SI, V4SI, V4SI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vrotr_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vrotr_w((v4i32)_1, (v4i32)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DI, V2DI, V2DI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vrotr_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vrotr_d((v2i64)_1, (v2i64)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DI, V2DI, V2DI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vadd_q(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vadd_q((v2i64)_1, (v2i64)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DI, V2DI, V2DI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsub_q(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsub_q((v2i64)_1, (v2i64)_2); + } + ++/* Assembly instruction format: vd, rj, si12. */ ++/* Data types in instruction templates: V16QI, CVPOINTER, SI. */ + #define __lsx_vldrepl_b(/*void **/ _1, /*si12*/ _2) \ + ((__m128i)__builtin_lsx_vldrepl_b((void const *)(_1), (_2))) + ++/* Assembly instruction format: vd, rj, si11. */ ++/* Data types in instruction templates: V8HI, CVPOINTER, SI. */ + #define __lsx_vldrepl_h(/*void **/ _1, /*si11*/ _2) \ + ((__m128i)__builtin_lsx_vldrepl_h((void const *)(_1), (_2))) + ++/* Assembly instruction format: vd, rj, si10. */ ++/* Data types in instruction templates: V4SI, CVPOINTER, SI. */ + #define __lsx_vldrepl_w(/*void **/ _1, /*si10*/ _2) \ + ((__m128i)__builtin_lsx_vldrepl_w((void const *)(_1), (_2))) + ++/* Assembly instruction format: vd, rj, si9. */ ++/* Data types in instruction templates: V2DI, CVPOINTER, SI. */ + #define __lsx_vldrepl_d(/*void **/ _1, /*si9*/ _2) \ + ((__m128i)__builtin_lsx_vldrepl_d((void const *)(_1), (_2))) + ++/* Assembly instruction format: vd, vj. */ ++/* Data types in instruction templates: V16QI, V16QI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmskgez_b(__m128i _1) { + return (__m128i)__builtin_lsx_vmskgez_b((v16i8)_1); + } + ++/* Assembly instruction format: vd, vj. */ ++/* Data types in instruction templates: V16QI, V16QI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmsknz_b(__m128i _1) { + return (__m128i)__builtin_lsx_vmsknz_b((v16i8)_1); + } + ++/* Assembly instruction format: vd, vj. */ ++/* Data types in instruction templates: V8HI, V16QI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vexth_h_b(__m128i _1) { + return (__m128i)__builtin_lsx_vexth_h_b((v16i8)_1); + } + ++/* Assembly instruction format: vd, vj. */ ++/* Data types in instruction templates: V4SI, V8HI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vexth_w_h(__m128i _1) { + return (__m128i)__builtin_lsx_vexth_w_h((v8i16)_1); + } + ++/* Assembly instruction format: vd, vj. */ ++/* Data types in instruction templates: V2DI, V4SI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vexth_d_w(__m128i _1) { + return (__m128i)__builtin_lsx_vexth_d_w((v4i32)_1); + } + ++/* Assembly instruction format: vd, vj. */ ++/* Data types in instruction templates: V2DI, V2DI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vexth_q_d(__m128i _1) { + return (__m128i)__builtin_lsx_vexth_q_d((v2i64)_1); + } + ++/* Assembly instruction format: vd, vj. */ ++/* Data types in instruction templates: UV8HI, UV16QI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vexth_hu_bu(__m128i _1) { + return (__m128i)__builtin_lsx_vexth_hu_bu((v16u8)_1); + } + ++/* Assembly instruction format: vd, vj. */ ++/* Data types in instruction templates: UV4SI, UV8HI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vexth_wu_hu(__m128i _1) { + return (__m128i)__builtin_lsx_vexth_wu_hu((v8u16)_1); + } + ++/* Assembly instruction format: vd, vj. */ ++/* Data types in instruction templates: UV2DI, UV4SI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vexth_du_wu(__m128i _1) { + return (__m128i)__builtin_lsx_vexth_du_wu((v4u32)_1); + } + ++/* Assembly instruction format: vd, vj. */ ++/* Data types in instruction templates: UV2DI, UV2DI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vexth_qu_du(__m128i _1) { + return (__m128i)__builtin_lsx_vexth_qu_du((v2u64)_1); + } + ++/* Assembly instruction format: vd, vj, ui3. */ ++/* Data types in instruction templates: V16QI, V16QI, UQI. */ + #define __lsx_vrotri_b(/*__m128i*/ _1, /*ui3*/ _2) \ + ((__m128i)__builtin_lsx_vrotri_b((v16i8)(_1), (_2))) + ++/* Assembly instruction format: vd, vj, ui4. */ ++/* Data types in instruction templates: V8HI, V8HI, UQI. */ + #define __lsx_vrotri_h(/*__m128i*/ _1, /*ui4*/ _2) \ + ((__m128i)__builtin_lsx_vrotri_h((v8i16)(_1), (_2))) + ++/* Assembly instruction format: vd, vj, ui5. */ ++/* Data types in instruction templates: V4SI, V4SI, UQI. */ + #define __lsx_vrotri_w(/*__m128i*/ _1, /*ui5*/ _2) \ + ((__m128i)__builtin_lsx_vrotri_w((v4i32)(_1), (_2))) + ++/* Assembly instruction format: vd, vj, ui6. */ ++/* Data types in instruction templates: V2DI, V2DI, UQI. */ + #define __lsx_vrotri_d(/*__m128i*/ _1, /*ui6*/ _2) \ + ((__m128i)__builtin_lsx_vrotri_d((v2i64)(_1), (_2))) + ++/* Assembly instruction format: vd, vj. */ ++/* Data types in instruction templates: V2DI, V2DI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vextl_q_d(__m128i _1) { + return (__m128i)__builtin_lsx_vextl_q_d((v2i64)_1); + } + ++/* Assembly instruction format: vd, vj, ui4. */ ++/* Data types in instruction templates: V16QI, V16QI, V16QI, USI. */ + #define __lsx_vsrlni_b_h(/*__m128i*/ _1, /*__m128i*/ _2, /*ui4*/ _3) \ + ((__m128i)__builtin_lsx_vsrlni_b_h((v16i8)(_1), (v16i8)(_2), (_3))) + ++/* Assembly instruction format: vd, vj, ui5. */ ++/* Data types in instruction templates: V8HI, V8HI, V8HI, USI. */ + #define __lsx_vsrlni_h_w(/*__m128i*/ _1, /*__m128i*/ _2, /*ui5*/ _3) \ + ((__m128i)__builtin_lsx_vsrlni_h_w((v8i16)(_1), (v8i16)(_2), (_3))) + ++/* Assembly instruction format: vd, vj, ui6. */ ++/* Data types in instruction templates: V4SI, V4SI, V4SI, USI. */ + #define __lsx_vsrlni_w_d(/*__m128i*/ _1, /*__m128i*/ _2, /*ui6*/ _3) \ + ((__m128i)__builtin_lsx_vsrlni_w_d((v4i32)(_1), (v4i32)(_2), (_3))) + ++/* Assembly instruction format: vd, vj, ui7. */ ++/* Data types in instruction templates: V2DI, V2DI, V2DI, USI. */ + #define __lsx_vsrlni_d_q(/*__m128i*/ _1, /*__m128i*/ _2, /*ui7*/ _3) \ + ((__m128i)__builtin_lsx_vsrlni_d_q((v2i64)(_1), (v2i64)(_2), (_3))) + ++/* Assembly instruction format: vd, vj, ui4. */ ++/* Data types in instruction templates: V16QI, V16QI, V16QI, USI. */ + #define __lsx_vsrlrni_b_h(/*__m128i*/ _1, /*__m128i*/ _2, /*ui4*/ _3) \ + ((__m128i)__builtin_lsx_vsrlrni_b_h((v16i8)(_1), (v16i8)(_2), (_3))) + ++/* Assembly instruction format: vd, vj, ui5. */ ++/* Data types in instruction templates: V8HI, V8HI, V8HI, USI. */ + #define __lsx_vsrlrni_h_w(/*__m128i*/ _1, /*__m128i*/ _2, /*ui5*/ _3) \ + ((__m128i)__builtin_lsx_vsrlrni_h_w((v8i16)(_1), (v8i16)(_2), (_3))) + ++/* Assembly instruction format: vd, vj, ui6. */ ++/* Data types in instruction templates: V4SI, V4SI, V4SI, USI. */ + #define __lsx_vsrlrni_w_d(/*__m128i*/ _1, /*__m128i*/ _2, /*ui6*/ _3) \ + ((__m128i)__builtin_lsx_vsrlrni_w_d((v4i32)(_1), (v4i32)(_2), (_3))) + ++/* Assembly instruction format: vd, vj, ui7. */ ++/* Data types in instruction templates: V2DI, V2DI, V2DI, USI. */ + #define __lsx_vsrlrni_d_q(/*__m128i*/ _1, /*__m128i*/ _2, /*ui7*/ _3) \ + ((__m128i)__builtin_lsx_vsrlrni_d_q((v2i64)(_1), (v2i64)(_2), (_3))) + ++/* Assembly instruction format: vd, vj, ui4. */ ++/* Data types in instruction templates: V16QI, V16QI, V16QI, USI. */ + #define __lsx_vssrlni_b_h(/*__m128i*/ _1, /*__m128i*/ _2, /*ui4*/ _3) \ + ((__m128i)__builtin_lsx_vssrlni_b_h((v16i8)(_1), (v16i8)(_2), (_3))) + ++/* Assembly instruction format: vd, vj, ui5. */ ++/* Data types in instruction templates: V8HI, V8HI, V8HI, USI. */ + #define __lsx_vssrlni_h_w(/*__m128i*/ _1, /*__m128i*/ _2, /*ui5*/ _3) \ + ((__m128i)__builtin_lsx_vssrlni_h_w((v8i16)(_1), (v8i16)(_2), (_3))) + ++/* Assembly instruction format: vd, vj, ui6. */ ++/* Data types in instruction templates: V4SI, V4SI, V4SI, USI. */ + #define __lsx_vssrlni_w_d(/*__m128i*/ _1, /*__m128i*/ _2, /*ui6*/ _3) \ + ((__m128i)__builtin_lsx_vssrlni_w_d((v4i32)(_1), (v4i32)(_2), (_3))) + ++/* Assembly instruction format: vd, vj, ui7. */ ++/* Data types in instruction templates: V2DI, V2DI, V2DI, USI. */ + #define __lsx_vssrlni_d_q(/*__m128i*/ _1, /*__m128i*/ _2, /*ui7*/ _3) \ + ((__m128i)__builtin_lsx_vssrlni_d_q((v2i64)(_1), (v2i64)(_2), (_3))) + ++/* Assembly instruction format: vd, vj, ui4. */ ++/* Data types in instruction templates: UV16QI, UV16QI, V16QI, USI. */ + #define __lsx_vssrlni_bu_h(/*__m128i*/ _1, /*__m128i*/ _2, /*ui4*/ _3) \ + ((__m128i)__builtin_lsx_vssrlni_bu_h((v16u8)(_1), (v16i8)(_2), (_3))) + ++/* Assembly instruction format: vd, vj, ui5. */ ++/* Data types in instruction templates: UV8HI, UV8HI, V8HI, USI. */ + #define __lsx_vssrlni_hu_w(/*__m128i*/ _1, /*__m128i*/ _2, /*ui5*/ _3) \ + ((__m128i)__builtin_lsx_vssrlni_hu_w((v8u16)(_1), (v8i16)(_2), (_3))) + ++/* Assembly instruction format: vd, vj, ui6. */ ++/* Data types in instruction templates: UV4SI, UV4SI, V4SI, USI. */ + #define __lsx_vssrlni_wu_d(/*__m128i*/ _1, /*__m128i*/ _2, /*ui6*/ _3) \ + ((__m128i)__builtin_lsx_vssrlni_wu_d((v4u32)(_1), (v4i32)(_2), (_3))) + ++/* Assembly instruction format: vd, vj, ui7. */ ++/* Data types in instruction templates: UV2DI, UV2DI, V2DI, USI. */ + #define __lsx_vssrlni_du_q(/*__m128i*/ _1, /*__m128i*/ _2, /*ui7*/ _3) \ + ((__m128i)__builtin_lsx_vssrlni_du_q((v2u64)(_1), (v2i64)(_2), (_3))) + ++/* Assembly instruction format: vd, vj, ui4. */ ++/* Data types in instruction templates: V16QI, V16QI, V16QI, USI. */ + #define __lsx_vssrlrni_b_h(/*__m128i*/ _1, /*__m128i*/ _2, /*ui4*/ _3) \ + ((__m128i)__builtin_lsx_vssrlrni_b_h((v16i8)(_1), (v16i8)(_2), (_3))) + ++/* Assembly instruction format: vd, vj, ui5. */ ++/* Data types in instruction templates: V8HI, V8HI, V8HI, USI. */ + #define __lsx_vssrlrni_h_w(/*__m128i*/ _1, /*__m128i*/ _2, /*ui5*/ _3) \ + ((__m128i)__builtin_lsx_vssrlrni_h_w((v8i16)(_1), (v8i16)(_2), (_3))) + ++/* Assembly instruction format: vd, vj, ui6. */ ++/* Data types in instruction templates: V4SI, V4SI, V4SI, USI. */ + #define __lsx_vssrlrni_w_d(/*__m128i*/ _1, /*__m128i*/ _2, /*ui6*/ _3) \ + ((__m128i)__builtin_lsx_vssrlrni_w_d((v4i32)(_1), (v4i32)(_2), (_3))) + ++/* Assembly instruction format: vd, vj, ui7. */ ++/* Data types in instruction templates: V2DI, V2DI, V2DI, USI. */ + #define __lsx_vssrlrni_d_q(/*__m128i*/ _1, /*__m128i*/ _2, /*ui7*/ _3) \ + ((__m128i)__builtin_lsx_vssrlrni_d_q((v2i64)(_1), (v2i64)(_2), (_3))) + ++/* Assembly instruction format: vd, vj, ui4. */ ++/* Data types in instruction templates: UV16QI, UV16QI, V16QI, USI. */ + #define __lsx_vssrlrni_bu_h(/*__m128i*/ _1, /*__m128i*/ _2, /*ui4*/ _3) \ + ((__m128i)__builtin_lsx_vssrlrni_bu_h((v16u8)(_1), (v16i8)(_2), (_3))) + ++/* Assembly instruction format: vd, vj, ui5. */ ++/* Data types in instruction templates: UV8HI, UV8HI, V8HI, USI. */ + #define __lsx_vssrlrni_hu_w(/*__m128i*/ _1, /*__m128i*/ _2, /*ui5*/ _3) \ + ((__m128i)__builtin_lsx_vssrlrni_hu_w((v8u16)(_1), (v8i16)(_2), (_3))) + ++/* Assembly instruction format: vd, vj, ui6. */ ++/* Data types in instruction templates: UV4SI, UV4SI, V4SI, USI. */ + #define __lsx_vssrlrni_wu_d(/*__m128i*/ _1, /*__m128i*/ _2, /*ui6*/ _3) \ + ((__m128i)__builtin_lsx_vssrlrni_wu_d((v4u32)(_1), (v4i32)(_2), (_3))) + ++/* Assembly instruction format: vd, vj, ui7. */ ++/* Data types in instruction templates: UV2DI, UV2DI, V2DI, USI. */ + #define __lsx_vssrlrni_du_q(/*__m128i*/ _1, /*__m128i*/ _2, /*ui7*/ _3) \ + ((__m128i)__builtin_lsx_vssrlrni_du_q((v2u64)(_1), (v2i64)(_2), (_3))) + ++/* Assembly instruction format: vd, vj, ui4. */ ++/* Data types in instruction templates: V16QI, V16QI, V16QI, USI. */ + #define __lsx_vsrani_b_h(/*__m128i*/ _1, /*__m128i*/ _2, /*ui4*/ _3) \ + ((__m128i)__builtin_lsx_vsrani_b_h((v16i8)(_1), (v16i8)(_2), (_3))) + ++/* Assembly instruction format: vd, vj, ui5. */ ++/* Data types in instruction templates: V8HI, V8HI, V8HI, USI. */ + #define __lsx_vsrani_h_w(/*__m128i*/ _1, /*__m128i*/ _2, /*ui5*/ _3) \ + ((__m128i)__builtin_lsx_vsrani_h_w((v8i16)(_1), (v8i16)(_2), (_3))) + ++/* Assembly instruction format: vd, vj, ui6. */ ++/* Data types in instruction templates: V4SI, V4SI, V4SI, USI. */ + #define __lsx_vsrani_w_d(/*__m128i*/ _1, /*__m128i*/ _2, /*ui6*/ _3) \ + ((__m128i)__builtin_lsx_vsrani_w_d((v4i32)(_1), (v4i32)(_2), (_3))) + ++/* Assembly instruction format: vd, vj, ui7. */ ++/* Data types in instruction templates: V2DI, V2DI, V2DI, USI. */ + #define __lsx_vsrani_d_q(/*__m128i*/ _1, /*__m128i*/ _2, /*ui7*/ _3) \ + ((__m128i)__builtin_lsx_vsrani_d_q((v2i64)(_1), (v2i64)(_2), (_3))) + ++/* Assembly instruction format: vd, vj, ui4. */ ++/* Data types in instruction templates: V16QI, V16QI, V16QI, USI. */ + #define __lsx_vsrarni_b_h(/*__m128i*/ _1, /*__m128i*/ _2, /*ui4*/ _3) \ + ((__m128i)__builtin_lsx_vsrarni_b_h((v16i8)(_1), (v16i8)(_2), (_3))) + ++/* Assembly instruction format: vd, vj, ui5. */ ++/* Data types in instruction templates: V8HI, V8HI, V8HI, USI. */ + #define __lsx_vsrarni_h_w(/*__m128i*/ _1, /*__m128i*/ _2, /*ui5*/ _3) \ + ((__m128i)__builtin_lsx_vsrarni_h_w((v8i16)(_1), (v8i16)(_2), (_3))) + ++/* Assembly instruction format: vd, vj, ui6. */ ++/* Data types in instruction templates: V4SI, V4SI, V4SI, USI. */ + #define __lsx_vsrarni_w_d(/*__m128i*/ _1, /*__m128i*/ _2, /*ui6*/ _3) \ + ((__m128i)__builtin_lsx_vsrarni_w_d((v4i32)(_1), (v4i32)(_2), (_3))) + ++/* Assembly instruction format: vd, vj, ui7. */ ++/* Data types in instruction templates: V2DI, V2DI, V2DI, USI. */ + #define __lsx_vsrarni_d_q(/*__m128i*/ _1, /*__m128i*/ _2, /*ui7*/ _3) \ + ((__m128i)__builtin_lsx_vsrarni_d_q((v2i64)(_1), (v2i64)(_2), (_3))) + ++/* Assembly instruction format: vd, vj, ui4. */ ++/* Data types in instruction templates: V16QI, V16QI, V16QI, USI. */ + #define __lsx_vssrani_b_h(/*__m128i*/ _1, /*__m128i*/ _2, /*ui4*/ _3) \ + ((__m128i)__builtin_lsx_vssrani_b_h((v16i8)(_1), (v16i8)(_2), (_3))) + ++/* Assembly instruction format: vd, vj, ui5. */ ++/* Data types in instruction templates: V8HI, V8HI, V8HI, USI. */ + #define __lsx_vssrani_h_w(/*__m128i*/ _1, /*__m128i*/ _2, /*ui5*/ _3) \ + ((__m128i)__builtin_lsx_vssrani_h_w((v8i16)(_1), (v8i16)(_2), (_3))) + ++/* Assembly instruction format: vd, vj, ui6. */ ++/* Data types in instruction templates: V4SI, V4SI, V4SI, USI. */ + #define __lsx_vssrani_w_d(/*__m128i*/ _1, /*__m128i*/ _2, /*ui6*/ _3) \ + ((__m128i)__builtin_lsx_vssrani_w_d((v4i32)(_1), (v4i32)(_2), (_3))) + ++/* Assembly instruction format: vd, vj, ui7. */ ++/* Data types in instruction templates: V2DI, V2DI, V2DI, USI. */ + #define __lsx_vssrani_d_q(/*__m128i*/ _1, /*__m128i*/ _2, /*ui7*/ _3) \ + ((__m128i)__builtin_lsx_vssrani_d_q((v2i64)(_1), (v2i64)(_2), (_3))) + ++/* Assembly instruction format: vd, vj, ui4. */ ++/* Data types in instruction templates: UV16QI, UV16QI, V16QI, USI. */ + #define __lsx_vssrani_bu_h(/*__m128i*/ _1, /*__m128i*/ _2, /*ui4*/ _3) \ + ((__m128i)__builtin_lsx_vssrani_bu_h((v16u8)(_1), (v16i8)(_2), (_3))) + ++/* Assembly instruction format: vd, vj, ui5. */ ++/* Data types in instruction templates: UV8HI, UV8HI, V8HI, USI. */ + #define __lsx_vssrani_hu_w(/*__m128i*/ _1, /*__m128i*/ _2, /*ui5*/ _3) \ + ((__m128i)__builtin_lsx_vssrani_hu_w((v8u16)(_1), (v8i16)(_2), (_3))) + ++/* Assembly instruction format: vd, vj, ui6. */ ++/* Data types in instruction templates: UV4SI, UV4SI, V4SI, USI. */ + #define __lsx_vssrani_wu_d(/*__m128i*/ _1, /*__m128i*/ _2, /*ui6*/ _3) \ + ((__m128i)__builtin_lsx_vssrani_wu_d((v4u32)(_1), (v4i32)(_2), (_3))) + ++/* Assembly instruction format: vd, vj, ui7. */ ++/* Data types in instruction templates: UV2DI, UV2DI, V2DI, USI. */ + #define __lsx_vssrani_du_q(/*__m128i*/ _1, /*__m128i*/ _2, /*ui7*/ _3) \ + ((__m128i)__builtin_lsx_vssrani_du_q((v2u64)(_1), (v2i64)(_2), (_3))) + ++/* Assembly instruction format: vd, vj, ui4. */ ++/* Data types in instruction templates: V16QI, V16QI, V16QI, USI. */ + #define __lsx_vssrarni_b_h(/*__m128i*/ _1, /*__m128i*/ _2, /*ui4*/ _3) \ + ((__m128i)__builtin_lsx_vssrarni_b_h((v16i8)(_1), (v16i8)(_2), (_3))) + ++/* Assembly instruction format: vd, vj, ui5. */ ++/* Data types in instruction templates: V8HI, V8HI, V8HI, USI. */ + #define __lsx_vssrarni_h_w(/*__m128i*/ _1, /*__m128i*/ _2, /*ui5*/ _3) \ + ((__m128i)__builtin_lsx_vssrarni_h_w((v8i16)(_1), (v8i16)(_2), (_3))) + ++/* Assembly instruction format: vd, vj, ui6. */ ++/* Data types in instruction templates: V4SI, V4SI, V4SI, USI. */ + #define __lsx_vssrarni_w_d(/*__m128i*/ _1, /*__m128i*/ _2, /*ui6*/ _3) \ + ((__m128i)__builtin_lsx_vssrarni_w_d((v4i32)(_1), (v4i32)(_2), (_3))) + ++/* Assembly instruction format: vd, vj, ui7. */ ++/* Data types in instruction templates: V2DI, V2DI, V2DI, USI. */ + #define __lsx_vssrarni_d_q(/*__m128i*/ _1, /*__m128i*/ _2, /*ui7*/ _3) \ + ((__m128i)__builtin_lsx_vssrarni_d_q((v2i64)(_1), (v2i64)(_2), (_3))) + ++/* Assembly instruction format: vd, vj, ui4. */ ++/* Data types in instruction templates: UV16QI, UV16QI, V16QI, USI. */ + #define __lsx_vssrarni_bu_h(/*__m128i*/ _1, /*__m128i*/ _2, /*ui4*/ _3) \ + ((__m128i)__builtin_lsx_vssrarni_bu_h((v16u8)(_1), (v16i8)(_2), (_3))) + ++/* Assembly instruction format: vd, vj, ui5. */ ++/* Data types in instruction templates: UV8HI, UV8HI, V8HI, USI. */ + #define __lsx_vssrarni_hu_w(/*__m128i*/ _1, /*__m128i*/ _2, /*ui5*/ _3) \ + ((__m128i)__builtin_lsx_vssrarni_hu_w((v8u16)(_1), (v8i16)(_2), (_3))) + ++/* Assembly instruction format: vd, vj, ui6. */ ++/* Data types in instruction templates: UV4SI, UV4SI, V4SI, USI. */ + #define __lsx_vssrarni_wu_d(/*__m128i*/ _1, /*__m128i*/ _2, /*ui6*/ _3) \ + ((__m128i)__builtin_lsx_vssrarni_wu_d((v4u32)(_1), (v4i32)(_2), (_3))) + ++/* Assembly instruction format: vd, vj, ui7. */ ++/* Data types in instruction templates: UV2DI, UV2DI, V2DI, USI. */ + #define __lsx_vssrarni_du_q(/*__m128i*/ _1, /*__m128i*/ _2, /*ui7*/ _3) \ + ((__m128i)__builtin_lsx_vssrarni_du_q((v2u64)(_1), (v2i64)(_2), (_3))) + ++/* Assembly instruction format: vd, vj, ui8. */ ++/* Data types in instruction templates: V4SI, V4SI, V4SI, USI. */ + #define __lsx_vpermi_w(/*__m128i*/ _1, /*__m128i*/ _2, /*ui8*/ _3) \ + ((__m128i)__builtin_lsx_vpermi_w((v4i32)(_1), (v4i32)(_2), (_3))) + ++/* Assembly instruction format: vd, rj, si12. */ ++/* Data types in instruction templates: V16QI, CVPOINTER, SI. */ + #define __lsx_vld(/*void **/ _1, /*si12*/ _2) \ + ((__m128i)__builtin_lsx_vld((void const *)(_1), (_2))) + ++/* Assembly instruction format: vd, rj, si12. */ ++/* Data types in instruction templates: VOID, V16QI, CVPOINTER, SI. */ + #define __lsx_vst(/*__m128i*/ _1, /*void **/ _2, /*si12*/ _3) \ + ((void)__builtin_lsx_vst((v16i8)(_1), (void *)(_2), (_3))) + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V16QI, V8HI, V8HI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vssrlrn_b_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vssrlrn_b_h((v8i16)_1, (v8i16)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V8HI, V4SI, V4SI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vssrlrn_h_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vssrlrn_h_w((v4i32)_1, (v4i32)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V4SI, V2DI, V2DI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vssrlrn_w_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vssrlrn_w_d((v2i64)_1, (v2i64)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V16QI, V8HI, V8HI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vssrln_b_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vssrln_b_h((v8i16)_1, (v8i16)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V8HI, V4SI, V4SI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vssrln_h_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vssrln_h_w((v4i32)_1, (v4i32)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V4SI, V2DI, V2DI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vssrln_w_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vssrln_w_d((v2i64)_1, (v2i64)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V16QI, V16QI, V16QI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vorn_v(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vorn_v((v16i8)_1, (v16i8)_2); + } + ++/* Assembly instruction format: vd, i13. */ ++/* Data types in instruction templates: V2DI, HI. */ + #define __lsx_vldi(/*i13*/ _1) ((__m128i)__builtin_lsx_vldi((_1))) + ++/* Assembly instruction format: vd, vj, vk, va. */ ++/* Data types in instruction templates: V16QI, V16QI, V16QI, V16QI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vshuf_b(__m128i _1, __m128i _2, __m128i _3) { + return (__m128i)__builtin_lsx_vshuf_b((v16i8)_1, (v16i8)_2, (v16i8)_3); + } + ++/* Assembly instruction format: vd, rj, rk. */ ++/* Data types in instruction templates: V16QI, CVPOINTER, DI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vldx(void const *_1, long int _2) { + return (__m128i)__builtin_lsx_vldx((void const *)_1, (long int)_2); + } + ++/* Assembly instruction format: vd, rj, rk. */ ++/* Data types in instruction templates: VOID, V16QI, CVPOINTER, DI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) void + __lsx_vstx(__m128i _1, void *_2, long int _3) { + return (void)__builtin_lsx_vstx((v16i8)_1, (void *)_2, (long int)_3); + } + ++/* Assembly instruction format: vd, vj. */ ++/* Data types in instruction templates: UV2DI, UV2DI. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vextl_qu_du(__m128i _1) { + return (__m128i)__builtin_lsx_vextl_qu_du((v2u64)_1); + } + ++/* Assembly instruction format: cd, vj. */ ++/* Data types in instruction templates: SI, UV16QI. */ + #define __lsx_bnz_b(/*__m128i*/ _1) ((int)__builtin_lsx_bnz_b((v16u8)(_1))) + ++/* Assembly instruction format: cd, vj. */ ++/* Data types in instruction templates: SI, UV2DI. */ + #define __lsx_bnz_d(/*__m128i*/ _1) ((int)__builtin_lsx_bnz_d((v2u64)(_1))) + ++/* Assembly instruction format: cd, vj. */ ++/* Data types in instruction templates: SI, UV8HI. */ + #define __lsx_bnz_h(/*__m128i*/ _1) ((int)__builtin_lsx_bnz_h((v8u16)(_1))) + ++/* Assembly instruction format: cd, vj. */ ++/* Data types in instruction templates: SI, UV16QI. */ + #define __lsx_bnz_v(/*__m128i*/ _1) ((int)__builtin_lsx_bnz_v((v16u8)(_1))) + ++/* Assembly instruction format: cd, vj. */ ++/* Data types in instruction templates: SI, UV4SI. */ + #define __lsx_bnz_w(/*__m128i*/ _1) ((int)__builtin_lsx_bnz_w((v4u32)(_1))) + ++/* Assembly instruction format: cd, vj. */ ++/* Data types in instruction templates: SI, UV16QI. */ + #define __lsx_bz_b(/*__m128i*/ _1) ((int)__builtin_lsx_bz_b((v16u8)(_1))) + ++/* Assembly instruction format: cd, vj. */ ++/* Data types in instruction templates: SI, UV2DI. */ + #define __lsx_bz_d(/*__m128i*/ _1) ((int)__builtin_lsx_bz_d((v2u64)(_1))) + ++/* Assembly instruction format: cd, vj. */ ++/* Data types in instruction templates: SI, UV8HI. */ + #define __lsx_bz_h(/*__m128i*/ _1) ((int)__builtin_lsx_bz_h((v8u16)(_1))) + ++/* Assembly instruction format: cd, vj. */ ++/* Data types in instruction templates: SI, UV16QI. */ + #define __lsx_bz_v(/*__m128i*/ _1) ((int)__builtin_lsx_bz_v((v16u8)(_1))) + ++/* Assembly instruction format: cd, vj. */ ++/* Data types in instruction templates: SI, UV4SI. */ + #define __lsx_bz_w(/*__m128i*/ _1) ((int)__builtin_lsx_bz_w((v4u32)(_1))) + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DI, V2DF, V2DF. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vfcmp_caf_d(__m128d _1, __m128d _2) { + return (__m128i)__builtin_lsx_vfcmp_caf_d((v2f64)_1, (v2f64)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V4SI, V4SF, V4SF. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vfcmp_caf_s(__m128 _1, __m128 _2) { + return (__m128i)__builtin_lsx_vfcmp_caf_s((v4f32)_1, (v4f32)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DI, V2DF, V2DF. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vfcmp_ceq_d(__m128d _1, __m128d _2) { + return (__m128i)__builtin_lsx_vfcmp_ceq_d((v2f64)_1, (v2f64)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V4SI, V4SF, V4SF. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vfcmp_ceq_s(__m128 _1, __m128 _2) { + return (__m128i)__builtin_lsx_vfcmp_ceq_s((v4f32)_1, (v4f32)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DI, V2DF, V2DF. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vfcmp_cle_d(__m128d _1, __m128d _2) { + return (__m128i)__builtin_lsx_vfcmp_cle_d((v2f64)_1, (v2f64)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V4SI, V4SF, V4SF. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vfcmp_cle_s(__m128 _1, __m128 _2) { + return (__m128i)__builtin_lsx_vfcmp_cle_s((v4f32)_1, (v4f32)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DI, V2DF, V2DF. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vfcmp_clt_d(__m128d _1, __m128d _2) { + return (__m128i)__builtin_lsx_vfcmp_clt_d((v2f64)_1, (v2f64)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V4SI, V4SF, V4SF. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vfcmp_clt_s(__m128 _1, __m128 _2) { + return (__m128i)__builtin_lsx_vfcmp_clt_s((v4f32)_1, (v4f32)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DI, V2DF, V2DF. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vfcmp_cne_d(__m128d _1, __m128d _2) { + return (__m128i)__builtin_lsx_vfcmp_cne_d((v2f64)_1, (v2f64)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V4SI, V4SF, V4SF. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vfcmp_cne_s(__m128 _1, __m128 _2) { + return (__m128i)__builtin_lsx_vfcmp_cne_s((v4f32)_1, (v4f32)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DI, V2DF, V2DF. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vfcmp_cor_d(__m128d _1, __m128d _2) { + return (__m128i)__builtin_lsx_vfcmp_cor_d((v2f64)_1, (v2f64)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V4SI, V4SF, V4SF. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vfcmp_cor_s(__m128 _1, __m128 _2) { + return (__m128i)__builtin_lsx_vfcmp_cor_s((v4f32)_1, (v4f32)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DI, V2DF, V2DF. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vfcmp_cueq_d(__m128d _1, __m128d _2) { + return (__m128i)__builtin_lsx_vfcmp_cueq_d((v2f64)_1, (v2f64)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V4SI, V4SF, V4SF. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vfcmp_cueq_s(__m128 _1, __m128 _2) { + return (__m128i)__builtin_lsx_vfcmp_cueq_s((v4f32)_1, (v4f32)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DI, V2DF, V2DF. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vfcmp_cule_d(__m128d _1, __m128d _2) { + return (__m128i)__builtin_lsx_vfcmp_cule_d((v2f64)_1, (v2f64)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V4SI, V4SF, V4SF. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vfcmp_cule_s(__m128 _1, __m128 _2) { + return (__m128i)__builtin_lsx_vfcmp_cule_s((v4f32)_1, (v4f32)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DI, V2DF, V2DF. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vfcmp_cult_d(__m128d _1, __m128d _2) { + return (__m128i)__builtin_lsx_vfcmp_cult_d((v2f64)_1, (v2f64)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V4SI, V4SF, V4SF. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vfcmp_cult_s(__m128 _1, __m128 _2) { + return (__m128i)__builtin_lsx_vfcmp_cult_s((v4f32)_1, (v4f32)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DI, V2DF, V2DF. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vfcmp_cun_d(__m128d _1, __m128d _2) { + return (__m128i)__builtin_lsx_vfcmp_cun_d((v2f64)_1, (v2f64)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DI, V2DF, V2DF. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vfcmp_cune_d(__m128d _1, __m128d _2) { + return (__m128i)__builtin_lsx_vfcmp_cune_d((v2f64)_1, (v2f64)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V4SI, V4SF, V4SF. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vfcmp_cune_s(__m128 _1, __m128 _2) { + return (__m128i)__builtin_lsx_vfcmp_cune_s((v4f32)_1, (v4f32)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V4SI, V4SF, V4SF. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vfcmp_cun_s(__m128 _1, __m128 _2) { + return (__m128i)__builtin_lsx_vfcmp_cun_s((v4f32)_1, (v4f32)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DI, V2DF, V2DF. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vfcmp_saf_d(__m128d _1, __m128d _2) { + return (__m128i)__builtin_lsx_vfcmp_saf_d((v2f64)_1, (v2f64)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V4SI, V4SF, V4SF. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vfcmp_saf_s(__m128 _1, __m128 _2) { + return (__m128i)__builtin_lsx_vfcmp_saf_s((v4f32)_1, (v4f32)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DI, V2DF, V2DF. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vfcmp_seq_d(__m128d _1, __m128d _2) { + return (__m128i)__builtin_lsx_vfcmp_seq_d((v2f64)_1, (v2f64)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V4SI, V4SF, V4SF. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vfcmp_seq_s(__m128 _1, __m128 _2) { + return (__m128i)__builtin_lsx_vfcmp_seq_s((v4f32)_1, (v4f32)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DI, V2DF, V2DF. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vfcmp_sle_d(__m128d _1, __m128d _2) { + return (__m128i)__builtin_lsx_vfcmp_sle_d((v2f64)_1, (v2f64)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V4SI, V4SF, V4SF. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vfcmp_sle_s(__m128 _1, __m128 _2) { + return (__m128i)__builtin_lsx_vfcmp_sle_s((v4f32)_1, (v4f32)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DI, V2DF, V2DF. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vfcmp_slt_d(__m128d _1, __m128d _2) { + return (__m128i)__builtin_lsx_vfcmp_slt_d((v2f64)_1, (v2f64)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V4SI, V4SF, V4SF. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vfcmp_slt_s(__m128 _1, __m128 _2) { + return (__m128i)__builtin_lsx_vfcmp_slt_s((v4f32)_1, (v4f32)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DI, V2DF, V2DF. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vfcmp_sne_d(__m128d _1, __m128d _2) { + return (__m128i)__builtin_lsx_vfcmp_sne_d((v2f64)_1, (v2f64)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V4SI, V4SF, V4SF. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vfcmp_sne_s(__m128 _1, __m128 _2) { + return (__m128i)__builtin_lsx_vfcmp_sne_s((v4f32)_1, (v4f32)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DI, V2DF, V2DF. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vfcmp_sor_d(__m128d _1, __m128d _2) { + return (__m128i)__builtin_lsx_vfcmp_sor_d((v2f64)_1, (v2f64)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V4SI, V4SF, V4SF. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vfcmp_sor_s(__m128 _1, __m128 _2) { + return (__m128i)__builtin_lsx_vfcmp_sor_s((v4f32)_1, (v4f32)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DI, V2DF, V2DF. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vfcmp_sueq_d(__m128d _1, __m128d _2) { + return (__m128i)__builtin_lsx_vfcmp_sueq_d((v2f64)_1, (v2f64)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V4SI, V4SF, V4SF. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vfcmp_sueq_s(__m128 _1, __m128 _2) { + return (__m128i)__builtin_lsx_vfcmp_sueq_s((v4f32)_1, (v4f32)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DI, V2DF, V2DF. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vfcmp_sule_d(__m128d _1, __m128d _2) { + return (__m128i)__builtin_lsx_vfcmp_sule_d((v2f64)_1, (v2f64)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V4SI, V4SF, V4SF. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vfcmp_sule_s(__m128 _1, __m128 _2) { + return (__m128i)__builtin_lsx_vfcmp_sule_s((v4f32)_1, (v4f32)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DI, V2DF, V2DF. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vfcmp_sult_d(__m128d _1, __m128d _2) { + return (__m128i)__builtin_lsx_vfcmp_sult_d((v2f64)_1, (v2f64)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V4SI, V4SF, V4SF. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vfcmp_sult_s(__m128 _1, __m128 _2) { + return (__m128i)__builtin_lsx_vfcmp_sult_s((v4f32)_1, (v4f32)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DI, V2DF, V2DF. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vfcmp_sun_d(__m128d _1, __m128d _2) { + return (__m128i)__builtin_lsx_vfcmp_sun_d((v2f64)_1, (v2f64)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V2DI, V2DF, V2DF. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vfcmp_sune_d(__m128d _1, __m128d _2) { + return (__m128i)__builtin_lsx_vfcmp_sune_d((v2f64)_1, (v2f64)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V4SI, V4SF, V4SF. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vfcmp_sune_s(__m128 _1, __m128 _2) { + return (__m128i)__builtin_lsx_vfcmp_sune_s((v4f32)_1, (v4f32)_2); + } + ++/* Assembly instruction format: vd, vj, vk. */ ++/* Data types in instruction templates: V4SI, V4SF, V4SF. */ + extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vfcmp_sun_s(__m128 _1, __m128 _2) { + return (__m128i)__builtin_lsx_vfcmp_sun_s((v4f32)_1, (v4f32)_2); + } + ++/* Assembly instruction format: vd, si10. */ ++/* Data types in instruction templates: V16QI, HI. */ + #define __lsx_vrepli_b(/*si10*/ _1) ((__m128i)__builtin_lsx_vrepli_b((_1))) + ++/* Assembly instruction format: vd, si10. */ ++/* Data types in instruction templates: V2DI, HI. */ + #define __lsx_vrepli_d(/*si10*/ _1) ((__m128i)__builtin_lsx_vrepli_d((_1))) + ++/* Assembly instruction format: vd, si10. */ ++/* Data types in instruction templates: V8HI, HI. */ + #define __lsx_vrepli_h(/*si10*/ _1) ((__m128i)__builtin_lsx_vrepli_h((_1))) + ++/* Assembly instruction format: vd, si10. */ ++/* Data types in instruction templates: V4SI, HI. */ + #define __lsx_vrepli_w(/*si10*/ _1) ((__m128i)__builtin_lsx_vrepli_w((_1))) + + #endif /* defined(__loongarch_sx) */ +-#endif /* _LOONGSON_SXINTRIN_H */ ++#endif /* _GCC_LOONGSON_SXINTRIN_H */ +diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp +index 09b7e1c62..badfaca96 100644 +--- a/clang/lib/Sema/SemaChecking.cpp ++++ b/clang/lib/Sema/SemaChecking.cpp +@@ -2104,15 +2104,15 @@ bool Sema::CheckTSBuiltinFunctionCall(const TargetInfo &TI, unsigned BuiltinID, + case llvm::Triple::riscv32: + case llvm::Triple::riscv64: + return CheckRISCVBuiltinFunctionCall(TI, BuiltinID, TheCall); +- case llvm::Triple::loongarch32: +- case llvm::Triple::loongarch64: +- return CheckLoongArchBuiltinFunctionCall(TI, BuiltinID, TheCall); + case llvm::Triple::wasm32: + case llvm::Triple::wasm64: + return CheckWebAssemblyBuiltinFunctionCall(TI, BuiltinID, TheCall); + case llvm::Triple::nvptx: + case llvm::Triple::nvptx64: + return CheckNVPTXBuiltinFunctionCall(TI, BuiltinID, TheCall); ++ case llvm::Triple::loongarch32: ++ case llvm::Triple::loongarch64: ++ return CheckLoongArchBuiltinFunctionCall(TI, BuiltinID, TheCall); + } + } + +@@ -4103,520 +4103,27 @@ bool Sema::CheckHexagonBuiltinFunctionCall(unsigned BuiltinID, + return CheckHexagonBuiltinArgument(BuiltinID, TheCall); + } + +-bool Sema::CheckLoongArchBuiltinFunctionCall(const TargetInfo &TI, +- unsigned BuiltinID, +- CallExpr *TheCall) { +- switch (BuiltinID) { +- default: +- break; +- // Basic intrinsics. +- case LoongArch::BI__builtin_loongarch_cacop_d: +- case LoongArch::BI__builtin_loongarch_cacop_w: { +- SemaBuiltinConstantArgRange(TheCall, 0, 0, llvm::maxUIntN(5)); +- SemaBuiltinConstantArgRange(TheCall, 2, llvm::minIntN(12), +- llvm::maxIntN(12)); +- break; ++bool Sema::CheckMipsBuiltinFunctionCall(const TargetInfo &TI, ++ unsigned BuiltinID, CallExpr *TheCall) { ++ return CheckMipsBuiltinCpu(TI, BuiltinID, TheCall) || ++ CheckMipsBuiltinArgument(BuiltinID, TheCall); ++} ++ ++bool Sema::CheckMipsBuiltinCpu(const TargetInfo &TI, unsigned BuiltinID, ++ CallExpr *TheCall) { ++ ++ if (Mips::BI__builtin_mips_addu_qb <= BuiltinID && ++ BuiltinID <= Mips::BI__builtin_mips_lwx) { ++ if (!TI.hasFeature("dsp")) ++ return Diag(TheCall->getBeginLoc(), diag::err_mips_builtin_requires_dsp); + } +- case LoongArch::BI__builtin_loongarch_break: +- case LoongArch::BI__builtin_loongarch_dbar: +- case LoongArch::BI__builtin_loongarch_ibar: +- case LoongArch::BI__builtin_loongarch_syscall: +- // Check if immediate is in [0, 32767]. +- return SemaBuiltinConstantArgRange(TheCall, 0, 0, 32767); +- case LoongArch::BI__builtin_loongarch_csrrd_w: +- case LoongArch::BI__builtin_loongarch_csrrd_d: +- return SemaBuiltinConstantArgRange(TheCall, 0, 0, 16383); +- case LoongArch::BI__builtin_loongarch_csrwr_w: +- case LoongArch::BI__builtin_loongarch_csrwr_d: +- return SemaBuiltinConstantArgRange(TheCall, 1, 0, 16383); +- case LoongArch::BI__builtin_loongarch_csrxchg_w: +- case LoongArch::BI__builtin_loongarch_csrxchg_d: +- return SemaBuiltinConstantArgRange(TheCall, 2, 0, 16383); +- case LoongArch::BI__builtin_loongarch_lddir_d: +- case LoongArch::BI__builtin_loongarch_ldpte_d: +- return SemaBuiltinConstantArgRange(TheCall, 1, 0, 31); +- case LoongArch::BI__builtin_loongarch_movfcsr2gr: +- case LoongArch::BI__builtin_loongarch_movgr2fcsr: +- return SemaBuiltinConstantArgRange(TheCall, 0, 0, llvm::maxUIntN(2)); + +- // LSX intrinsics. +- case LoongArch::BI__builtin_lsx_vbitclri_b: +- case LoongArch::BI__builtin_lsx_vbitrevi_b: +- case LoongArch::BI__builtin_lsx_vbitseti_b: +- case LoongArch::BI__builtin_lsx_vsat_b: +- case LoongArch::BI__builtin_lsx_vsat_bu: +- case LoongArch::BI__builtin_lsx_vslli_b: +- case LoongArch::BI__builtin_lsx_vsrai_b: +- case LoongArch::BI__builtin_lsx_vsrari_b: +- case LoongArch::BI__builtin_lsx_vsrli_b: +- case LoongArch::BI__builtin_lsx_vsllwil_h_b: +- case LoongArch::BI__builtin_lsx_vsllwil_hu_bu: +- case LoongArch::BI__builtin_lsx_vrotri_b: +- case LoongArch::BI__builtin_lsx_vsrlri_b: +- return SemaBuiltinConstantArgRange(TheCall, 1, 0, 7); +- case LoongArch::BI__builtin_lsx_vbitclri_h: +- case LoongArch::BI__builtin_lsx_vbitrevi_h: +- case LoongArch::BI__builtin_lsx_vbitseti_h: +- case LoongArch::BI__builtin_lsx_vsat_h: +- case LoongArch::BI__builtin_lsx_vsat_hu: +- case LoongArch::BI__builtin_lsx_vslli_h: +- case LoongArch::BI__builtin_lsx_vsrai_h: +- case LoongArch::BI__builtin_lsx_vsrari_h: +- case LoongArch::BI__builtin_lsx_vsrli_h: +- case LoongArch::BI__builtin_lsx_vsllwil_w_h: +- case LoongArch::BI__builtin_lsx_vsllwil_wu_hu: +- case LoongArch::BI__builtin_lsx_vrotri_h: +- case LoongArch::BI__builtin_lsx_vsrlri_h: +- return SemaBuiltinConstantArgRange(TheCall, 1, 0, 15); +- case LoongArch::BI__builtin_lsx_vssrarni_b_h: +- case LoongArch::BI__builtin_lsx_vssrarni_bu_h: +- case LoongArch::BI__builtin_lsx_vssrani_b_h: +- case LoongArch::BI__builtin_lsx_vssrani_bu_h: +- case LoongArch::BI__builtin_lsx_vsrarni_b_h: +- case LoongArch::BI__builtin_lsx_vsrlni_b_h: +- case LoongArch::BI__builtin_lsx_vsrlrni_b_h: +- case LoongArch::BI__builtin_lsx_vssrlni_b_h: +- case LoongArch::BI__builtin_lsx_vssrlni_bu_h: +- case LoongArch::BI__builtin_lsx_vssrlrni_b_h: +- case LoongArch::BI__builtin_lsx_vssrlrni_bu_h: +- case LoongArch::BI__builtin_lsx_vsrani_b_h: +- return SemaBuiltinConstantArgRange(TheCall, 2, 0, 15); +- case LoongArch::BI__builtin_lsx_vslei_bu: +- case LoongArch::BI__builtin_lsx_vslei_hu: +- case LoongArch::BI__builtin_lsx_vslei_wu: +- case LoongArch::BI__builtin_lsx_vslei_du: +- case LoongArch::BI__builtin_lsx_vslti_bu: +- case LoongArch::BI__builtin_lsx_vslti_hu: +- case LoongArch::BI__builtin_lsx_vslti_wu: +- case LoongArch::BI__builtin_lsx_vslti_du: +- case LoongArch::BI__builtin_lsx_vmaxi_bu: +- case LoongArch::BI__builtin_lsx_vmaxi_hu: +- case LoongArch::BI__builtin_lsx_vmaxi_wu: +- case LoongArch::BI__builtin_lsx_vmaxi_du: +- case LoongArch::BI__builtin_lsx_vmini_bu: +- case LoongArch::BI__builtin_lsx_vmini_hu: +- case LoongArch::BI__builtin_lsx_vmini_wu: +- case LoongArch::BI__builtin_lsx_vmini_du: +- case LoongArch::BI__builtin_lsx_vaddi_bu: +- case LoongArch::BI__builtin_lsx_vaddi_hu: +- case LoongArch::BI__builtin_lsx_vaddi_wu: +- case LoongArch::BI__builtin_lsx_vaddi_du: +- case LoongArch::BI__builtin_lsx_vbitclri_w: +- case LoongArch::BI__builtin_lsx_vbitrevi_w: +- case LoongArch::BI__builtin_lsx_vbitseti_w: +- case LoongArch::BI__builtin_lsx_vsat_w: +- case LoongArch::BI__builtin_lsx_vsat_wu: +- case LoongArch::BI__builtin_lsx_vslli_w: +- case LoongArch::BI__builtin_lsx_vsrai_w: +- case LoongArch::BI__builtin_lsx_vsrari_w: +- case LoongArch::BI__builtin_lsx_vsrli_w: +- case LoongArch::BI__builtin_lsx_vsllwil_d_w: +- case LoongArch::BI__builtin_lsx_vsllwil_du_wu: +- case LoongArch::BI__builtin_lsx_vsrlri_w: +- case LoongArch::BI__builtin_lsx_vrotri_w: +- case LoongArch::BI__builtin_lsx_vsubi_bu: +- case LoongArch::BI__builtin_lsx_vsubi_hu: +- case LoongArch::BI__builtin_lsx_vbsrl_v: +- case LoongArch::BI__builtin_lsx_vbsll_v: +- case LoongArch::BI__builtin_lsx_vsubi_wu: +- case LoongArch::BI__builtin_lsx_vsubi_du: +- return SemaBuiltinConstantArgRange(TheCall, 1, 0, 31); +- case LoongArch::BI__builtin_lsx_vssrarni_h_w: +- case LoongArch::BI__builtin_lsx_vssrarni_hu_w: +- case LoongArch::BI__builtin_lsx_vssrani_h_w: +- case LoongArch::BI__builtin_lsx_vssrani_hu_w: +- case LoongArch::BI__builtin_lsx_vsrarni_h_w: +- case LoongArch::BI__builtin_lsx_vsrani_h_w: +- case LoongArch::BI__builtin_lsx_vfrstpi_b: +- case LoongArch::BI__builtin_lsx_vfrstpi_h: +- case LoongArch::BI__builtin_lsx_vsrlni_h_w: +- case LoongArch::BI__builtin_lsx_vsrlrni_h_w: +- case LoongArch::BI__builtin_lsx_vssrlni_h_w: +- case LoongArch::BI__builtin_lsx_vssrlni_hu_w: +- case LoongArch::BI__builtin_lsx_vssrlrni_h_w: +- case LoongArch::BI__builtin_lsx_vssrlrni_hu_w: +- return SemaBuiltinConstantArgRange(TheCall, 2, 0, 31); +- case LoongArch::BI__builtin_lsx_vbitclri_d: +- case LoongArch::BI__builtin_lsx_vbitrevi_d: +- case LoongArch::BI__builtin_lsx_vbitseti_d: +- case LoongArch::BI__builtin_lsx_vsat_d: +- case LoongArch::BI__builtin_lsx_vsat_du: +- case LoongArch::BI__builtin_lsx_vslli_d: +- case LoongArch::BI__builtin_lsx_vsrai_d: +- case LoongArch::BI__builtin_lsx_vsrli_d: +- case LoongArch::BI__builtin_lsx_vsrari_d: +- case LoongArch::BI__builtin_lsx_vrotri_d: +- case LoongArch::BI__builtin_lsx_vsrlri_d: +- return SemaBuiltinConstantArgRange(TheCall, 1, 0, 63); +- case LoongArch::BI__builtin_lsx_vssrarni_w_d: +- case LoongArch::BI__builtin_lsx_vssrarni_wu_d: +- case LoongArch::BI__builtin_lsx_vssrani_w_d: +- case LoongArch::BI__builtin_lsx_vssrani_wu_d: +- case LoongArch::BI__builtin_lsx_vsrarni_w_d: +- case LoongArch::BI__builtin_lsx_vsrlni_w_d: +- case LoongArch::BI__builtin_lsx_vsrlrni_w_d: +- case LoongArch::BI__builtin_lsx_vssrlni_w_d: +- case LoongArch::BI__builtin_lsx_vssrlni_wu_d: +- case LoongArch::BI__builtin_lsx_vssrlrni_w_d: +- case LoongArch::BI__builtin_lsx_vssrlrni_wu_d: +- case LoongArch::BI__builtin_lsx_vsrani_w_d: +- return SemaBuiltinConstantArgRange(TheCall, 2, 0, 63); +- case LoongArch::BI__builtin_lsx_vssrarni_d_q: +- case LoongArch::BI__builtin_lsx_vssrarni_du_q: +- case LoongArch::BI__builtin_lsx_vssrani_d_q: +- case LoongArch::BI__builtin_lsx_vssrani_du_q: +- case LoongArch::BI__builtin_lsx_vsrarni_d_q: +- case LoongArch::BI__builtin_lsx_vssrlni_d_q: +- case LoongArch::BI__builtin_lsx_vssrlni_du_q: +- case LoongArch::BI__builtin_lsx_vssrlrni_d_q: +- case LoongArch::BI__builtin_lsx_vssrlrni_du_q: +- case LoongArch::BI__builtin_lsx_vsrani_d_q: +- case LoongArch::BI__builtin_lsx_vsrlrni_d_q: +- case LoongArch::BI__builtin_lsx_vsrlni_d_q: +- return SemaBuiltinConstantArgRange(TheCall, 2, 0, 127); +- case LoongArch::BI__builtin_lsx_vseqi_b: +- case LoongArch::BI__builtin_lsx_vseqi_h: +- case LoongArch::BI__builtin_lsx_vseqi_w: +- case LoongArch::BI__builtin_lsx_vseqi_d: +- case LoongArch::BI__builtin_lsx_vslti_b: +- case LoongArch::BI__builtin_lsx_vslti_h: +- case LoongArch::BI__builtin_lsx_vslti_w: +- case LoongArch::BI__builtin_lsx_vslti_d: +- case LoongArch::BI__builtin_lsx_vslei_b: +- case LoongArch::BI__builtin_lsx_vslei_h: +- case LoongArch::BI__builtin_lsx_vslei_w: +- case LoongArch::BI__builtin_lsx_vslei_d: +- case LoongArch::BI__builtin_lsx_vmaxi_b: +- case LoongArch::BI__builtin_lsx_vmaxi_h: +- case LoongArch::BI__builtin_lsx_vmaxi_w: +- case LoongArch::BI__builtin_lsx_vmaxi_d: +- case LoongArch::BI__builtin_lsx_vmini_b: +- case LoongArch::BI__builtin_lsx_vmini_h: +- case LoongArch::BI__builtin_lsx_vmini_w: +- case LoongArch::BI__builtin_lsx_vmini_d: +- return SemaBuiltinConstantArgRange(TheCall, 1, -16, 15); +- case LoongArch::BI__builtin_lsx_vandi_b: +- case LoongArch::BI__builtin_lsx_vnori_b: +- case LoongArch::BI__builtin_lsx_vori_b: +- case LoongArch::BI__builtin_lsx_vshuf4i_b: +- case LoongArch::BI__builtin_lsx_vshuf4i_h: +- case LoongArch::BI__builtin_lsx_vshuf4i_w: +- case LoongArch::BI__builtin_lsx_vxori_b: +- return SemaBuiltinConstantArgRange(TheCall, 1, 0, 255); +- case LoongArch::BI__builtin_lsx_vbitseli_b: +- case LoongArch::BI__builtin_lsx_vshuf4i_d: +- case LoongArch::BI__builtin_lsx_vextrins_b: +- case LoongArch::BI__builtin_lsx_vextrins_h: +- case LoongArch::BI__builtin_lsx_vextrins_w: +- case LoongArch::BI__builtin_lsx_vextrins_d: +- case LoongArch::BI__builtin_lsx_vpermi_w: +- return SemaBuiltinConstantArgRange(TheCall, 2, 0, 255); +- case LoongArch::BI__builtin_lsx_vpickve2gr_b: +- case LoongArch::BI__builtin_lsx_vpickve2gr_bu: +- case LoongArch::BI__builtin_lsx_vreplvei_b: +- return SemaBuiltinConstantArgRange(TheCall, 1, 0, 15); +- case LoongArch::BI__builtin_lsx_vinsgr2vr_b: +- return SemaBuiltinConstantArgRange(TheCall, 2, 0, 15); +- case LoongArch::BI__builtin_lsx_vpickve2gr_h: +- case LoongArch::BI__builtin_lsx_vpickve2gr_hu: +- case LoongArch::BI__builtin_lsx_vreplvei_h: +- return SemaBuiltinConstantArgRange(TheCall, 1, 0, 7); +- case LoongArch::BI__builtin_lsx_vinsgr2vr_h: +- return SemaBuiltinConstantArgRange(TheCall, 2, 0, 7); +- case LoongArch::BI__builtin_lsx_vpickve2gr_w: +- case LoongArch::BI__builtin_lsx_vpickve2gr_wu: +- case LoongArch::BI__builtin_lsx_vreplvei_w: +- return SemaBuiltinConstantArgRange(TheCall, 1, 0, 3); +- case LoongArch::BI__builtin_lsx_vinsgr2vr_w: +- return SemaBuiltinConstantArgRange(TheCall, 2, 0, 3); +- case LoongArch::BI__builtin_lsx_vpickve2gr_d: +- case LoongArch::BI__builtin_lsx_vpickve2gr_du: +- case LoongArch::BI__builtin_lsx_vreplvei_d: +- return SemaBuiltinConstantArgRange(TheCall, 1, 0, 1); +- case LoongArch::BI__builtin_lsx_vinsgr2vr_d: +- return SemaBuiltinConstantArgRange(TheCall, 2, 0, 1); +- case LoongArch::BI__builtin_lsx_vstelm_b: +- return SemaBuiltinConstantArgRange(TheCall, 2, -128, 127) || +- SemaBuiltinConstantArgRange(TheCall, 3, 0, 15); +- case LoongArch::BI__builtin_lsx_vstelm_h: +- return SemaBuiltinConstantArgRange(TheCall, 2, -256, 254) || +- SemaBuiltinConstantArgRange(TheCall, 3, 0, 7); +- case LoongArch::BI__builtin_lsx_vstelm_w: +- return SemaBuiltinConstantArgRange(TheCall, 2, -512, 508) || +- SemaBuiltinConstantArgRange(TheCall, 3, 0, 3); +- case LoongArch::BI__builtin_lsx_vstelm_d: +- return SemaBuiltinConstantArgRange(TheCall, 2, -1024, 1016) || +- SemaBuiltinConstantArgRange(TheCall, 3, 0, 1); +- case LoongArch::BI__builtin_lsx_vldrepl_b: +- case LoongArch::BI__builtin_lsx_vld: +- return SemaBuiltinConstantArgRange(TheCall, 1, -2048, 2047); +- case LoongArch::BI__builtin_lsx_vldrepl_h: +- return SemaBuiltinConstantArgRange(TheCall, 1, -2048, 2046); +- case LoongArch::BI__builtin_lsx_vldrepl_w: +- return SemaBuiltinConstantArgRange(TheCall, 1, -2048, 2044); +- case LoongArch::BI__builtin_lsx_vldrepl_d: +- return SemaBuiltinConstantArgRange(TheCall, 1, -2048, 2040); +- case LoongArch::BI__builtin_lsx_vst: +- return SemaBuiltinConstantArgRange(TheCall, 2, -2048, 2047); +- case LoongArch::BI__builtin_lsx_vldi: +- return SemaBuiltinConstantArgRange(TheCall, 0, -4096, 4095); +- case LoongArch::BI__builtin_lsx_vrepli_b: +- case LoongArch::BI__builtin_lsx_vrepli_h: +- case LoongArch::BI__builtin_lsx_vrepli_w: +- case LoongArch::BI__builtin_lsx_vrepli_d: +- return SemaBuiltinConstantArgRange(TheCall, 0, -512, 511); +- +- // LASX intrinsics. +- case LoongArch::BI__builtin_lasx_xvbitclri_b: +- case LoongArch::BI__builtin_lasx_xvbitrevi_b: +- case LoongArch::BI__builtin_lasx_xvbitseti_b: +- case LoongArch::BI__builtin_lasx_xvsat_b: +- case LoongArch::BI__builtin_lasx_xvsat_bu: +- case LoongArch::BI__builtin_lasx_xvslli_b: +- case LoongArch::BI__builtin_lasx_xvsrai_b: +- case LoongArch::BI__builtin_lasx_xvsrari_b: +- case LoongArch::BI__builtin_lasx_xvsrli_b: +- case LoongArch::BI__builtin_lasx_xvsllwil_h_b: +- case LoongArch::BI__builtin_lasx_xvsllwil_hu_bu: +- case LoongArch::BI__builtin_lasx_xvrotri_b: +- case LoongArch::BI__builtin_lasx_xvsrlri_b: +- return SemaBuiltinConstantArgRange(TheCall, 1, 0, 7); +- case LoongArch::BI__builtin_lasx_xvbitclri_h: +- case LoongArch::BI__builtin_lasx_xvbitrevi_h: +- case LoongArch::BI__builtin_lasx_xvbitseti_h: +- case LoongArch::BI__builtin_lasx_xvsat_h: +- case LoongArch::BI__builtin_lasx_xvsat_hu: +- case LoongArch::BI__builtin_lasx_xvslli_h: +- case LoongArch::BI__builtin_lasx_xvsrai_h: +- case LoongArch::BI__builtin_lasx_xvsrari_h: +- case LoongArch::BI__builtin_lasx_xvsrli_h: +- case LoongArch::BI__builtin_lasx_xvsllwil_w_h: +- case LoongArch::BI__builtin_lasx_xvsllwil_wu_hu: +- case LoongArch::BI__builtin_lasx_xvrotri_h: +- case LoongArch::BI__builtin_lasx_xvsrlri_h: +- return SemaBuiltinConstantArgRange(TheCall, 1, 0, 15); +- case LoongArch::BI__builtin_lasx_xvssrarni_b_h: +- case LoongArch::BI__builtin_lasx_xvssrarni_bu_h: +- case LoongArch::BI__builtin_lasx_xvssrani_b_h: +- case LoongArch::BI__builtin_lasx_xvssrani_bu_h: +- case LoongArch::BI__builtin_lasx_xvsrarni_b_h: +- case LoongArch::BI__builtin_lasx_xvsrlni_b_h: +- case LoongArch::BI__builtin_lasx_xvsrlrni_b_h: +- case LoongArch::BI__builtin_lasx_xvssrlni_b_h: +- case LoongArch::BI__builtin_lasx_xvssrlni_bu_h: +- case LoongArch::BI__builtin_lasx_xvssrlrni_b_h: +- case LoongArch::BI__builtin_lasx_xvssrlrni_bu_h: +- case LoongArch::BI__builtin_lasx_xvsrani_b_h: +- return SemaBuiltinConstantArgRange(TheCall, 2, 0, 15); +- case LoongArch::BI__builtin_lasx_xvslei_bu: +- case LoongArch::BI__builtin_lasx_xvslei_hu: +- case LoongArch::BI__builtin_lasx_xvslei_wu: +- case LoongArch::BI__builtin_lasx_xvslei_du: +- case LoongArch::BI__builtin_lasx_xvslti_bu: +- case LoongArch::BI__builtin_lasx_xvslti_hu: +- case LoongArch::BI__builtin_lasx_xvslti_wu: +- case LoongArch::BI__builtin_lasx_xvslti_du: +- case LoongArch::BI__builtin_lasx_xvmaxi_bu: +- case LoongArch::BI__builtin_lasx_xvmaxi_hu: +- case LoongArch::BI__builtin_lasx_xvmaxi_wu: +- case LoongArch::BI__builtin_lasx_xvmaxi_du: +- case LoongArch::BI__builtin_lasx_xvmini_bu: +- case LoongArch::BI__builtin_lasx_xvmini_hu: +- case LoongArch::BI__builtin_lasx_xvmini_wu: +- case LoongArch::BI__builtin_lasx_xvmini_du: +- case LoongArch::BI__builtin_lasx_xvaddi_bu: +- case LoongArch::BI__builtin_lasx_xvaddi_hu: +- case LoongArch::BI__builtin_lasx_xvaddi_wu: +- case LoongArch::BI__builtin_lasx_xvaddi_du: +- case LoongArch::BI__builtin_lasx_xvbitclri_w: +- case LoongArch::BI__builtin_lasx_xvbitrevi_w: +- case LoongArch::BI__builtin_lasx_xvbitseti_w: +- case LoongArch::BI__builtin_lasx_xvsat_w: +- case LoongArch::BI__builtin_lasx_xvsat_wu: +- case LoongArch::BI__builtin_lasx_xvslli_w: +- case LoongArch::BI__builtin_lasx_xvsrai_w: +- case LoongArch::BI__builtin_lasx_xvsrari_w: +- case LoongArch::BI__builtin_lasx_xvsrli_w: +- case LoongArch::BI__builtin_lasx_xvsllwil_d_w: +- case LoongArch::BI__builtin_lasx_xvsllwil_du_wu: +- case LoongArch::BI__builtin_lasx_xvsrlri_w: +- case LoongArch::BI__builtin_lasx_xvrotri_w: +- case LoongArch::BI__builtin_lasx_xvsubi_bu: +- case LoongArch::BI__builtin_lasx_xvsubi_hu: +- case LoongArch::BI__builtin_lasx_xvsubi_wu: +- case LoongArch::BI__builtin_lasx_xvsubi_du: +- case LoongArch::BI__builtin_lasx_xvbsrl_v: +- case LoongArch::BI__builtin_lasx_xvbsll_v: +- return SemaBuiltinConstantArgRange(TheCall, 1, 0, 31); +- case LoongArch::BI__builtin_lasx_xvssrarni_h_w: +- case LoongArch::BI__builtin_lasx_xvssrarni_hu_w: +- case LoongArch::BI__builtin_lasx_xvssrani_h_w: +- case LoongArch::BI__builtin_lasx_xvssrani_hu_w: +- case LoongArch::BI__builtin_lasx_xvsrarni_h_w: +- case LoongArch::BI__builtin_lasx_xvsrani_h_w: +- case LoongArch::BI__builtin_lasx_xvfrstpi_b: +- case LoongArch::BI__builtin_lasx_xvfrstpi_h: +- case LoongArch::BI__builtin_lasx_xvsrlni_h_w: +- case LoongArch::BI__builtin_lasx_xvsrlrni_h_w: +- case LoongArch::BI__builtin_lasx_xvssrlni_h_w: +- case LoongArch::BI__builtin_lasx_xvssrlni_hu_w: +- case LoongArch::BI__builtin_lasx_xvssrlrni_h_w: +- case LoongArch::BI__builtin_lasx_xvssrlrni_hu_w: +- return SemaBuiltinConstantArgRange(TheCall, 2, 0, 31); +- case LoongArch::BI__builtin_lasx_xvbitclri_d: +- case LoongArch::BI__builtin_lasx_xvbitrevi_d: +- case LoongArch::BI__builtin_lasx_xvbitseti_d: +- case LoongArch::BI__builtin_lasx_xvsat_d: +- case LoongArch::BI__builtin_lasx_xvsat_du: +- case LoongArch::BI__builtin_lasx_xvslli_d: +- case LoongArch::BI__builtin_lasx_xvsrai_d: +- case LoongArch::BI__builtin_lasx_xvsrli_d: +- case LoongArch::BI__builtin_lasx_xvsrari_d: +- case LoongArch::BI__builtin_lasx_xvrotri_d: +- case LoongArch::BI__builtin_lasx_xvsrlri_d: +- return SemaBuiltinConstantArgRange(TheCall, 1, 0, 63); +- case LoongArch::BI__builtin_lasx_xvssrarni_w_d: +- case LoongArch::BI__builtin_lasx_xvssrarni_wu_d: +- case LoongArch::BI__builtin_lasx_xvssrani_w_d: +- case LoongArch::BI__builtin_lasx_xvssrani_wu_d: +- case LoongArch::BI__builtin_lasx_xvsrarni_w_d: +- case LoongArch::BI__builtin_lasx_xvsrlni_w_d: +- case LoongArch::BI__builtin_lasx_xvsrlrni_w_d: +- case LoongArch::BI__builtin_lasx_xvssrlni_w_d: +- case LoongArch::BI__builtin_lasx_xvssrlni_wu_d: +- case LoongArch::BI__builtin_lasx_xvssrlrni_w_d: +- case LoongArch::BI__builtin_lasx_xvssrlrni_wu_d: +- case LoongArch::BI__builtin_lasx_xvsrani_w_d: +- return SemaBuiltinConstantArgRange(TheCall, 2, 0, 63); +- case LoongArch::BI__builtin_lasx_xvssrarni_d_q: +- case LoongArch::BI__builtin_lasx_xvssrarni_du_q: +- case LoongArch::BI__builtin_lasx_xvssrani_d_q: +- case LoongArch::BI__builtin_lasx_xvssrani_du_q: +- case LoongArch::BI__builtin_lasx_xvsrarni_d_q: +- case LoongArch::BI__builtin_lasx_xvssrlni_d_q: +- case LoongArch::BI__builtin_lasx_xvssrlni_du_q: +- case LoongArch::BI__builtin_lasx_xvssrlrni_d_q: +- case LoongArch::BI__builtin_lasx_xvssrlrni_du_q: +- case LoongArch::BI__builtin_lasx_xvsrani_d_q: +- case LoongArch::BI__builtin_lasx_xvsrlni_d_q: +- case LoongArch::BI__builtin_lasx_xvsrlrni_d_q: +- return SemaBuiltinConstantArgRange(TheCall, 2, 0, 127); +- case LoongArch::BI__builtin_lasx_xvseqi_b: +- case LoongArch::BI__builtin_lasx_xvseqi_h: +- case LoongArch::BI__builtin_lasx_xvseqi_w: +- case LoongArch::BI__builtin_lasx_xvseqi_d: +- case LoongArch::BI__builtin_lasx_xvslti_b: +- case LoongArch::BI__builtin_lasx_xvslti_h: +- case LoongArch::BI__builtin_lasx_xvslti_w: +- case LoongArch::BI__builtin_lasx_xvslti_d: +- case LoongArch::BI__builtin_lasx_xvslei_b: +- case LoongArch::BI__builtin_lasx_xvslei_h: +- case LoongArch::BI__builtin_lasx_xvslei_w: +- case LoongArch::BI__builtin_lasx_xvslei_d: +- case LoongArch::BI__builtin_lasx_xvmaxi_b: +- case LoongArch::BI__builtin_lasx_xvmaxi_h: +- case LoongArch::BI__builtin_lasx_xvmaxi_w: +- case LoongArch::BI__builtin_lasx_xvmaxi_d: +- case LoongArch::BI__builtin_lasx_xvmini_b: +- case LoongArch::BI__builtin_lasx_xvmini_h: +- case LoongArch::BI__builtin_lasx_xvmini_w: +- case LoongArch::BI__builtin_lasx_xvmini_d: +- return SemaBuiltinConstantArgRange(TheCall, 1, -16, 15); +- case LoongArch::BI__builtin_lasx_xvandi_b: +- case LoongArch::BI__builtin_lasx_xvnori_b: +- case LoongArch::BI__builtin_lasx_xvori_b: +- case LoongArch::BI__builtin_lasx_xvshuf4i_b: +- case LoongArch::BI__builtin_lasx_xvshuf4i_h: +- case LoongArch::BI__builtin_lasx_xvshuf4i_w: +- case LoongArch::BI__builtin_lasx_xvxori_b: +- case LoongArch::BI__builtin_lasx_xvpermi_d: +- return SemaBuiltinConstantArgRange(TheCall, 1, 0, 255); +- case LoongArch::BI__builtin_lasx_xvbitseli_b: +- case LoongArch::BI__builtin_lasx_xvshuf4i_d: +- case LoongArch::BI__builtin_lasx_xvextrins_b: +- case LoongArch::BI__builtin_lasx_xvextrins_h: +- case LoongArch::BI__builtin_lasx_xvextrins_w: +- case LoongArch::BI__builtin_lasx_xvextrins_d: +- case LoongArch::BI__builtin_lasx_xvpermi_q: +- case LoongArch::BI__builtin_lasx_xvpermi_w: +- return SemaBuiltinConstantArgRange(TheCall, 2, 0, 255); +- case LoongArch::BI__builtin_lasx_xvrepl128vei_b: +- return SemaBuiltinConstantArgRange(TheCall, 1, 0, 15); +- case LoongArch::BI__builtin_lasx_xvrepl128vei_h: +- case LoongArch::BI__builtin_lasx_xvpickve2gr_w: +- case LoongArch::BI__builtin_lasx_xvpickve2gr_wu: +- case LoongArch::BI__builtin_lasx_xvpickve_w_f: +- case LoongArch::BI__builtin_lasx_xvpickve_w: +- return SemaBuiltinConstantArgRange(TheCall, 1, 0, 7); +- case LoongArch::BI__builtin_lasx_xvinsgr2vr_w: +- case LoongArch::BI__builtin_lasx_xvinsve0_w: +- return SemaBuiltinConstantArgRange(TheCall, 2, 0, 7); +- case LoongArch::BI__builtin_lasx_xvrepl128vei_w: +- case LoongArch::BI__builtin_lasx_xvpickve2gr_d: +- case LoongArch::BI__builtin_lasx_xvpickve2gr_du: +- case LoongArch::BI__builtin_lasx_xvpickve_d_f: +- case LoongArch::BI__builtin_lasx_xvpickve_d: +- return SemaBuiltinConstantArgRange(TheCall, 1, 0, 3); +- case LoongArch::BI__builtin_lasx_xvinsve0_d: +- case LoongArch::BI__builtin_lasx_xvinsgr2vr_d: +- return SemaBuiltinConstantArgRange(TheCall, 2, 0, 3); +- case LoongArch::BI__builtin_lasx_xvstelm_b: +- return SemaBuiltinConstantArgRange(TheCall, 2, -128, 127) || +- SemaBuiltinConstantArgRange(TheCall, 3, 0, 31); +- case LoongArch::BI__builtin_lasx_xvstelm_h: +- return SemaBuiltinConstantArgRange(TheCall, 2, -256, 254) || +- SemaBuiltinConstantArgRange(TheCall, 3, 0, 15); +- case LoongArch::BI__builtin_lasx_xvstelm_w: +- return SemaBuiltinConstantArgRange(TheCall, 2, -512, 508) || +- SemaBuiltinConstantArgRange(TheCall, 3, 0, 7); +- case LoongArch::BI__builtin_lasx_xvstelm_d: +- return SemaBuiltinConstantArgRange(TheCall, 2, -1024, 1016) || +- SemaBuiltinConstantArgRange(TheCall, 3, 0, 3); +- case LoongArch::BI__builtin_lasx_xvrepl128vei_d: +- return SemaBuiltinConstantArgRange(TheCall, 1, 0, 1); +- case LoongArch::BI__builtin_lasx_xvldrepl_b: +- case LoongArch::BI__builtin_lasx_xvld: +- return SemaBuiltinConstantArgRange(TheCall, 1, -2048, 2047); +- case LoongArch::BI__builtin_lasx_xvldrepl_h: +- return SemaBuiltinConstantArgRange(TheCall, 1, -2048, 2046); +- case LoongArch::BI__builtin_lasx_xvldrepl_w: +- return SemaBuiltinConstantArgRange(TheCall, 1, -2048, 2044); +- case LoongArch::BI__builtin_lasx_xvldrepl_d: +- return SemaBuiltinConstantArgRange(TheCall, 1, -2048, 2040); +- case LoongArch::BI__builtin_lasx_xvst: +- return SemaBuiltinConstantArgRange(TheCall, 2, -2048, 2047); +- case LoongArch::BI__builtin_lasx_xvldi: +- return SemaBuiltinConstantArgRange(TheCall, 0, -4096, 4095); +- case LoongArch::BI__builtin_lasx_xvrepli_b: +- case LoongArch::BI__builtin_lasx_xvrepli_h: +- case LoongArch::BI__builtin_lasx_xvrepli_w: +- case LoongArch::BI__builtin_lasx_xvrepli_d: +- return SemaBuiltinConstantArgRange(TheCall, 0, -512, 511); +- } +- return false; +-} +- +-bool Sema::CheckMipsBuiltinFunctionCall(const TargetInfo &TI, +- unsigned BuiltinID, CallExpr *TheCall) { +- return CheckMipsBuiltinCpu(TI, BuiltinID, TheCall) || +- CheckMipsBuiltinArgument(BuiltinID, TheCall); +-} +- +-bool Sema::CheckMipsBuiltinCpu(const TargetInfo &TI, unsigned BuiltinID, +- CallExpr *TheCall) { +- +- if (Mips::BI__builtin_mips_addu_qb <= BuiltinID && +- BuiltinID <= Mips::BI__builtin_mips_lwx) { +- if (!TI.hasFeature("dsp")) +- return Diag(TheCall->getBeginLoc(), diag::err_mips_builtin_requires_dsp); +- } +- +- if (Mips::BI__builtin_mips_absq_s_qb <= BuiltinID && +- BuiltinID <= Mips::BI__builtin_mips_subuh_r_qb) { +- if (!TI.hasFeature("dspr2")) +- return Diag(TheCall->getBeginLoc(), +- diag::err_mips_builtin_requires_dspr2); +- } ++ if (Mips::BI__builtin_mips_absq_s_qb <= BuiltinID && ++ BuiltinID <= Mips::BI__builtin_mips_subuh_r_qb) { ++ if (!TI.hasFeature("dspr2")) ++ return Diag(TheCall->getBeginLoc(), ++ diag::err_mips_builtin_requires_dspr2); ++ } + + if (Mips::BI__builtin_msa_add_a_b <= BuiltinID && + BuiltinID <= Mips::BI__builtin_msa_xori_b) { +@@ -6115,6 +5622,575 @@ bool Sema::CheckRISCVBuiltinFunctionCall(const TargetInfo &TI, + return false; + } + ++// CheckLoongArchBuiltinFunctionCall - Checks the constant value passed to the ++// intrinsic is correct. ++// ++// FIXME: The size tests here should instead be tablegen'd along with the ++// definitions from include/clang/Basic/BuiltinsLoongArch.def. ++// FIXME: GCC is strict on signedness for some of these intrinsics, we should ++// be too. ++bool Sema::CheckLoongArchBuiltinFunctionCall(const TargetInfo &TI, ++ unsigned BuiltinID, ++ CallExpr *TheCall) { ++ unsigned i = 0, l = 0, u = 0, m = 0; ++ switch (BuiltinID) { ++ default: return false; ++ // LSX/LASX intrinsics. ++ // These intrinsics take an unsigned 3 bit immediate. ++ case LoongArch::BI__builtin_lsx_vbitclri_b: ++ case LoongArch::BI__builtin_lasx_xvbitclri_b: ++ case LoongArch::BI__builtin_lsx_vbitrevi_b: ++ case LoongArch::BI__builtin_lasx_xvbitrevi_b: ++ case LoongArch::BI__builtin_lsx_vbitseti_b: ++ case LoongArch::BI__builtin_lasx_xvbitseti_b: ++ case LoongArch::BI__builtin_lsx_vsat_b: ++ case LoongArch::BI__builtin_lsx_vsat_bu: ++ case LoongArch::BI__builtin_lasx_xvsat_b: ++ case LoongArch::BI__builtin_lasx_xvsat_bu: ++ case LoongArch::BI__builtin_lsx_vslli_b: ++ case LoongArch::BI__builtin_lasx_xvslli_b: ++ case LoongArch::BI__builtin_lsx_vsrai_b: ++ case LoongArch::BI__builtin_lasx_xvsrai_b: ++ case LoongArch::BI__builtin_lsx_vsrari_b: ++ case LoongArch::BI__builtin_lasx_xvsrari_b: ++ case LoongArch::BI__builtin_lsx_vsrli_b: ++ case LoongArch::BI__builtin_lasx_xvsrli_b: ++ case LoongArch::BI__builtin_lsx_vsllwil_h_b: ++ case LoongArch::BI__builtin_lsx_vsllwil_hu_bu: ++ case LoongArch::BI__builtin_lasx_xvsllwil_h_b: ++ case LoongArch::BI__builtin_lasx_xvsllwil_hu_bu: ++ case LoongArch::BI__builtin_lsx_vrotri_b: ++ case LoongArch::BI__builtin_lasx_xvrotri_b: ++ case LoongArch::BI__builtin_lasx_xvsrlri_b: ++ case LoongArch::BI__builtin_lsx_vsrlri_b: ++ i = 1; ++ l = 0; ++ u = 7; ++ break; ++ // These intrinsics take an unsigned 4 bit immediate. ++ case LoongArch::BI__builtin_lsx_vbitclri_h: ++ case LoongArch::BI__builtin_lasx_xvbitclri_h: ++ case LoongArch::BI__builtin_lsx_vbitrevi_h: ++ case LoongArch::BI__builtin_lasx_xvbitrevi_h: ++ case LoongArch::BI__builtin_lsx_vbitseti_h: ++ case LoongArch::BI__builtin_lasx_xvbitseti_h: ++ case LoongArch::BI__builtin_lsx_vsat_h: ++ case LoongArch::BI__builtin_lsx_vsat_hu: ++ case LoongArch::BI__builtin_lasx_xvsat_h: ++ case LoongArch::BI__builtin_lasx_xvsat_hu: ++ case LoongArch::BI__builtin_lsx_vslli_h: ++ case LoongArch::BI__builtin_lasx_xvslli_h: ++ case LoongArch::BI__builtin_lsx_vsrai_h: ++ case LoongArch::BI__builtin_lasx_xvsrai_h: ++ case LoongArch::BI__builtin_lsx_vsrari_h: ++ case LoongArch::BI__builtin_lasx_xvsrari_h: ++ case LoongArch::BI__builtin_lsx_vsrli_h: ++ case LoongArch::BI__builtin_lasx_xvsrli_h: ++ case LoongArch::BI__builtin_lsx_vsllwil_w_h: ++ case LoongArch::BI__builtin_lsx_vsllwil_wu_hu: ++ case LoongArch::BI__builtin_lasx_xvsllwil_w_h: ++ case LoongArch::BI__builtin_lasx_xvsllwil_wu_hu: ++ case LoongArch::BI__builtin_lsx_vrotri_h: ++ case LoongArch::BI__builtin_lasx_xvrotri_h: ++ case LoongArch::BI__builtin_lasx_xvsrlri_h: ++ case LoongArch::BI__builtin_lsx_vsrlri_h: ++ i = 1; ++ l = 0; ++ u = 15; ++ break; ++ case LoongArch::BI__builtin_lsx_vssrarni_b_h: ++ case LoongArch::BI__builtin_lsx_vssrarni_bu_h: ++ case LoongArch::BI__builtin_lasx_xvssrarni_b_h: ++ case LoongArch::BI__builtin_lasx_xvssrarni_bu_h: ++ case LoongArch::BI__builtin_lsx_vssrani_b_h: ++ case LoongArch::BI__builtin_lsx_vssrani_bu_h: ++ case LoongArch::BI__builtin_lasx_xvssrani_b_h: ++ case LoongArch::BI__builtin_lasx_xvssrani_bu_h: ++ case LoongArch::BI__builtin_lsx_vsrarni_b_h: ++ case LoongArch::BI__builtin_lasx_xvsrarni_b_h: ++ case LoongArch::BI__builtin_lsx_vsrlni_b_h: ++ case LoongArch::BI__builtin_lasx_xvsrlni_b_h: ++ case LoongArch::BI__builtin_lsx_vsrlrni_b_h: ++ case LoongArch::BI__builtin_lasx_xvsrlrni_b_h: ++ case LoongArch::BI__builtin_lsx_vssrlni_b_h: ++ case LoongArch::BI__builtin_lsx_vssrlni_bu_h: ++ case LoongArch::BI__builtin_lasx_xvssrlni_b_h: ++ case LoongArch::BI__builtin_lasx_xvssrlni_bu_h: ++ case LoongArch::BI__builtin_lsx_vssrlrni_b_h: ++ case LoongArch::BI__builtin_lsx_vssrlrni_bu_h: ++ case LoongArch::BI__builtin_lasx_xvssrlrni_b_h: ++ case LoongArch::BI__builtin_lasx_xvssrlrni_bu_h: ++ case LoongArch::BI__builtin_lsx_vsrani_b_h: ++ case LoongArch::BI__builtin_lasx_xvsrani_b_h: ++ i = 2; ++ l = 0; ++ u = 15; ++ break; ++ // These intrinsics take an unsigned 5 bit immediate. ++ // The first block of intrinsics actually have an unsigned 5 bit field, ++ // not a df/n field. ++ case LoongArch::BI__builtin_lsx_vslei_bu: ++ case LoongArch::BI__builtin_lsx_vslei_hu: ++ case LoongArch::BI__builtin_lsx_vslei_wu: ++ case LoongArch::BI__builtin_lsx_vslei_du: ++ case LoongArch::BI__builtin_lasx_xvslei_bu: ++ case LoongArch::BI__builtin_lasx_xvslei_hu: ++ case LoongArch::BI__builtin_lasx_xvslei_wu: ++ case LoongArch::BI__builtin_lasx_xvslei_du: ++ case LoongArch::BI__builtin_lsx_vslti_bu: ++ case LoongArch::BI__builtin_lsx_vslti_hu: ++ case LoongArch::BI__builtin_lsx_vslti_wu: ++ case LoongArch::BI__builtin_lsx_vslti_du: ++ case LoongArch::BI__builtin_lasx_xvslti_bu: ++ case LoongArch::BI__builtin_lasx_xvslti_hu: ++ case LoongArch::BI__builtin_lasx_xvslti_wu: ++ case LoongArch::BI__builtin_lasx_xvslti_du: ++ case LoongArch::BI__builtin_lsx_vmaxi_bu: ++ case LoongArch::BI__builtin_lsx_vmaxi_hu: ++ case LoongArch::BI__builtin_lsx_vmaxi_wu: ++ case LoongArch::BI__builtin_lsx_vmaxi_du: ++ case LoongArch::BI__builtin_lasx_xvmaxi_bu: ++ case LoongArch::BI__builtin_lasx_xvmaxi_hu: ++ case LoongArch::BI__builtin_lasx_xvmaxi_wu: ++ case LoongArch::BI__builtin_lasx_xvmaxi_du: ++ case LoongArch::BI__builtin_lsx_vmini_bu: ++ case LoongArch::BI__builtin_lsx_vmini_hu: ++ case LoongArch::BI__builtin_lsx_vmini_wu: ++ case LoongArch::BI__builtin_lsx_vmini_du: ++ case LoongArch::BI__builtin_lasx_xvmini_bu: ++ case LoongArch::BI__builtin_lasx_xvmini_hu: ++ case LoongArch::BI__builtin_lasx_xvmini_wu: ++ case LoongArch::BI__builtin_lasx_xvmini_du: ++ case LoongArch::BI__builtin_lsx_vaddi_bu: ++ case LoongArch::BI__builtin_lsx_vaddi_hu: ++ case LoongArch::BI__builtin_lsx_vaddi_wu: ++ case LoongArch::BI__builtin_lsx_vaddi_du: ++ case LoongArch::BI__builtin_lasx_xvaddi_bu: ++ case LoongArch::BI__builtin_lasx_xvaddi_hu: ++ case LoongArch::BI__builtin_lasx_xvaddi_wu: ++ case LoongArch::BI__builtin_lasx_xvaddi_du: ++ case LoongArch::BI__builtin_lsx_vbitclri_w: ++ case LoongArch::BI__builtin_lasx_xvbitclri_w: ++ case LoongArch::BI__builtin_lsx_vbitrevi_w: ++ case LoongArch::BI__builtin_lasx_xvbitrevi_w: ++ case LoongArch::BI__builtin_lsx_vbitseti_w: ++ case LoongArch::BI__builtin_lasx_xvbitseti_w: ++ case LoongArch::BI__builtin_lsx_vsat_w: ++ case LoongArch::BI__builtin_lsx_vsat_wu: ++ case LoongArch::BI__builtin_lasx_xvsat_w: ++ case LoongArch::BI__builtin_lasx_xvsat_wu: ++ case LoongArch::BI__builtin_lsx_vslli_w: ++ case LoongArch::BI__builtin_lasx_xvslli_w: ++ case LoongArch::BI__builtin_lsx_vsrai_w: ++ case LoongArch::BI__builtin_lasx_xvsrai_w: ++ case LoongArch::BI__builtin_lsx_vsrari_w: ++ case LoongArch::BI__builtin_lasx_xvsrari_w: ++ case LoongArch::BI__builtin_lsx_vsrli_w: ++ case LoongArch::BI__builtin_lasx_xvsrli_w: ++ case LoongArch::BI__builtin_lsx_vsllwil_d_w: ++ case LoongArch::BI__builtin_lsx_vsllwil_du_wu: ++ case LoongArch::BI__builtin_lasx_xvsllwil_d_w: ++ case LoongArch::BI__builtin_lasx_xvsllwil_du_wu: ++ case LoongArch::BI__builtin_lsx_vsrlri_w: ++ case LoongArch::BI__builtin_lasx_xvsrlri_w: ++ case LoongArch::BI__builtin_lsx_vrotri_w: ++ case LoongArch::BI__builtin_lasx_xvrotri_w: ++ case LoongArch::BI__builtin_lsx_vsubi_bu: ++ case LoongArch::BI__builtin_lsx_vsubi_hu: ++ case LoongArch::BI__builtin_lasx_xvsubi_bu: ++ case LoongArch::BI__builtin_lasx_xvsubi_hu: ++ case LoongArch::BI__builtin_lasx_xvsubi_wu: ++ case LoongArch::BI__builtin_lasx_xvsubi_du: ++ case LoongArch::BI__builtin_lsx_vbsrl_v: ++ case LoongArch::BI__builtin_lsx_vbsll_v: ++ case LoongArch::BI__builtin_lasx_xvbsrl_v: ++ case LoongArch::BI__builtin_lasx_xvbsll_v: ++ case LoongArch::BI__builtin_lsx_vsubi_wu: ++ case LoongArch::BI__builtin_lsx_vsubi_du: ++ i = 1; ++ l = 0; ++ u = 31; ++ break; ++ case LoongArch::BI__builtin_lsx_vssrarni_h_w: ++ case LoongArch::BI__builtin_lsx_vssrarni_hu_w: ++ case LoongArch::BI__builtin_lasx_xvssrarni_h_w: ++ case LoongArch::BI__builtin_lasx_xvssrarni_hu_w: ++ case LoongArch::BI__builtin_lsx_vssrani_h_w: ++ case LoongArch::BI__builtin_lsx_vssrani_hu_w: ++ case LoongArch::BI__builtin_lasx_xvssrani_h_w: ++ case LoongArch::BI__builtin_lasx_xvssrani_hu_w: ++ case LoongArch::BI__builtin_lsx_vsrarni_h_w: ++ case LoongArch::BI__builtin_lasx_xvsrarni_h_w: ++ case LoongArch::BI__builtin_lsx_vsrani_h_w: ++ case LoongArch::BI__builtin_lasx_xvsrani_h_w: ++ case LoongArch::BI__builtin_lsx_vfrstpi_b: ++ case LoongArch::BI__builtin_lsx_vfrstpi_h: ++ case LoongArch::BI__builtin_lasx_xvfrstpi_b: ++ case LoongArch::BI__builtin_lasx_xvfrstpi_h: ++ case LoongArch::BI__builtin_lsx_vsrlni_h_w: ++ case LoongArch::BI__builtin_lasx_xvsrlni_h_w: ++ case LoongArch::BI__builtin_lsx_vsrlrni_h_w: ++ case LoongArch::BI__builtin_lasx_xvsrlrni_h_w: ++ case LoongArch::BI__builtin_lsx_vssrlni_h_w: ++ case LoongArch::BI__builtin_lsx_vssrlni_hu_w: ++ case LoongArch::BI__builtin_lasx_xvssrlni_h_w: ++ case LoongArch::BI__builtin_lasx_xvssrlni_hu_w: ++ case LoongArch::BI__builtin_lsx_vssrlrni_h_w: ++ case LoongArch::BI__builtin_lsx_vssrlrni_hu_w: ++ case LoongArch::BI__builtin_lasx_xvssrlrni_h_w: ++ case LoongArch::BI__builtin_lasx_xvssrlrni_hu_w: ++ i = 2; ++ l = 0; ++ u = 31; ++ break; ++ case LoongArch::BI__builtin_lasx_xvstelm_b: ++ return SemaBuiltinConstantArgRange(TheCall, 2, -128, 127) || ++ SemaBuiltinConstantArgRange(TheCall, 3, 0, 31); ++ // These intrinsics take an unsigned 6 bit immediate. ++ case LoongArch::BI__builtin_lsx_vbitclri_d: ++ case LoongArch::BI__builtin_lasx_xvbitclri_d: ++ case LoongArch::BI__builtin_lsx_vbitrevi_d: ++ case LoongArch::BI__builtin_lasx_xvbitrevi_d: ++ case LoongArch::BI__builtin_lsx_vbitseti_d: ++ case LoongArch::BI__builtin_lasx_xvbitseti_d: ++ case LoongArch::BI__builtin_lsx_vsat_d: ++ case LoongArch::BI__builtin_lsx_vsat_du: ++ case LoongArch::BI__builtin_lasx_xvsat_d: ++ case LoongArch::BI__builtin_lasx_xvsat_du: ++ case LoongArch::BI__builtin_lsx_vslli_d: ++ case LoongArch::BI__builtin_lasx_xvslli_d: ++ case LoongArch::BI__builtin_lsx_vsrai_d: ++ case LoongArch::BI__builtin_lasx_xvsrai_d: ++ case LoongArch::BI__builtin_lsx_vsrli_d: ++ case LoongArch::BI__builtin_lasx_xvsrli_d: ++ case LoongArch::BI__builtin_lsx_vsrari_d: ++ case LoongArch::BI__builtin_lasx_xvsrari_d: ++ case LoongArch::BI__builtin_lsx_vrotri_d: ++ case LoongArch::BI__builtin_lasx_xvrotri_d: ++ case LoongArch::BI__builtin_lasx_xvsrlri_d: ++ case LoongArch::BI__builtin_lsx_vsrlri_d: ++ i = 1; ++ l = 0; ++ u = 63; ++ break; ++ case LoongArch::BI__builtin_lsx_vssrarni_w_d: ++ case LoongArch::BI__builtin_lsx_vssrarni_wu_d: ++ case LoongArch::BI__builtin_lasx_xvssrarni_w_d: ++ case LoongArch::BI__builtin_lasx_xvssrarni_wu_d: ++ case LoongArch::BI__builtin_lsx_vssrani_w_d: ++ case LoongArch::BI__builtin_lsx_vssrani_wu_d: ++ case LoongArch::BI__builtin_lasx_xvssrani_w_d: ++ case LoongArch::BI__builtin_lasx_xvssrani_wu_d: ++ case LoongArch::BI__builtin_lsx_vsrarni_w_d: ++ case LoongArch::BI__builtin_lasx_xvsrarni_w_d: ++ case LoongArch::BI__builtin_lsx_vsrlni_w_d: ++ case LoongArch::BI__builtin_lasx_xvsrlni_w_d: ++ case LoongArch::BI__builtin_lsx_vsrlrni_w_d: ++ case LoongArch::BI__builtin_lasx_xvsrlrni_w_d: ++ case LoongArch::BI__builtin_lsx_vssrlni_w_d: ++ case LoongArch::BI__builtin_lsx_vssrlni_wu_d: ++ case LoongArch::BI__builtin_lasx_xvssrlni_w_d: ++ case LoongArch::BI__builtin_lasx_xvssrlni_wu_d: ++ case LoongArch::BI__builtin_lsx_vssrlrni_w_d: ++ case LoongArch::BI__builtin_lsx_vssrlrni_wu_d: ++ case LoongArch::BI__builtin_lasx_xvssrlrni_w_d: ++ case LoongArch::BI__builtin_lasx_xvssrlrni_wu_d: ++ case LoongArch::BI__builtin_lsx_vsrani_w_d: ++ case LoongArch::BI__builtin_lasx_xvsrani_w_d: ++ i = 2; ++ l = 0; ++ u = 63; ++ break; ++ // These intrinsics take an unsigned 7 bit immediate. ++ case LoongArch::BI__builtin_lsx_vssrarni_d_q: ++ case LoongArch::BI__builtin_lsx_vssrarni_du_q: ++ case LoongArch::BI__builtin_lasx_xvssrarni_d_q: ++ case LoongArch::BI__builtin_lasx_xvssrarni_du_q: ++ case LoongArch::BI__builtin_lsx_vssrani_d_q: ++ case LoongArch::BI__builtin_lsx_vssrani_du_q: ++ case LoongArch::BI__builtin_lasx_xvssrani_d_q: ++ case LoongArch::BI__builtin_lasx_xvssrani_du_q: ++ case LoongArch::BI__builtin_lsx_vsrarni_d_q: ++ case LoongArch::BI__builtin_lasx_xvsrarni_d_q: ++ case LoongArch::BI__builtin_lsx_vssrlni_d_q: ++ case LoongArch::BI__builtin_lsx_vssrlni_du_q: ++ case LoongArch::BI__builtin_lasx_xvssrlni_d_q: ++ case LoongArch::BI__builtin_lasx_xvssrlni_du_q: ++ case LoongArch::BI__builtin_lsx_vssrlrni_d_q: ++ case LoongArch::BI__builtin_lsx_vssrlrni_du_q: ++ case LoongArch::BI__builtin_lasx_xvssrlrni_d_q: ++ case LoongArch::BI__builtin_lasx_xvssrlrni_du_q: ++ case LoongArch::BI__builtin_lsx_vsrani_d_q: ++ case LoongArch::BI__builtin_lasx_xvsrani_d_q: ++ case LoongArch::BI__builtin_lasx_xvsrlni_d_q: ++ case LoongArch::BI__builtin_lsx_vsrlrni_d_q: ++ case LoongArch::BI__builtin_lasx_xvsrlrni_d_q: ++ case LoongArch::BI__builtin_lsx_vsrlni_d_q: ++ i = 2; ++ l = 0; ++ u = 127; ++ break; ++ // These intrinsics take a signed 5 bit immediate. ++ case LoongArch::BI__builtin_lsx_vseqi_b: ++ case LoongArch::BI__builtin_lsx_vseqi_h: ++ case LoongArch::BI__builtin_lsx_vseqi_w: ++ case LoongArch::BI__builtin_lsx_vseqi_d: ++ case LoongArch::BI__builtin_lasx_xvseqi_b: ++ case LoongArch::BI__builtin_lasx_xvseqi_h: ++ case LoongArch::BI__builtin_lasx_xvseqi_w: ++ case LoongArch::BI__builtin_lasx_xvseqi_d: ++ case LoongArch::BI__builtin_lsx_vslti_b: ++ case LoongArch::BI__builtin_lsx_vslti_h: ++ case LoongArch::BI__builtin_lsx_vslti_w: ++ case LoongArch::BI__builtin_lsx_vslti_d: ++ case LoongArch::BI__builtin_lasx_xvslti_b: ++ case LoongArch::BI__builtin_lasx_xvslti_h: ++ case LoongArch::BI__builtin_lasx_xvslti_w: ++ case LoongArch::BI__builtin_lasx_xvslti_d: ++ case LoongArch::BI__builtin_lsx_vslei_b: ++ case LoongArch::BI__builtin_lsx_vslei_h: ++ case LoongArch::BI__builtin_lsx_vslei_w: ++ case LoongArch::BI__builtin_lsx_vslei_d: ++ case LoongArch::BI__builtin_lasx_xvslei_b: ++ case LoongArch::BI__builtin_lasx_xvslei_h: ++ case LoongArch::BI__builtin_lasx_xvslei_w: ++ case LoongArch::BI__builtin_lasx_xvslei_d: ++ case LoongArch::BI__builtin_lsx_vmaxi_b: ++ case LoongArch::BI__builtin_lsx_vmaxi_h: ++ case LoongArch::BI__builtin_lsx_vmaxi_w: ++ case LoongArch::BI__builtin_lsx_vmaxi_d: ++ case LoongArch::BI__builtin_lasx_xvmaxi_b: ++ case LoongArch::BI__builtin_lasx_xvmaxi_h: ++ case LoongArch::BI__builtin_lasx_xvmaxi_w: ++ case LoongArch::BI__builtin_lasx_xvmaxi_d: ++ case LoongArch::BI__builtin_lsx_vmini_b: ++ case LoongArch::BI__builtin_lsx_vmini_h: ++ case LoongArch::BI__builtin_lsx_vmini_w: ++ case LoongArch::BI__builtin_lasx_xvmini_b: ++ case LoongArch::BI__builtin_lasx_xvmini_h: ++ case LoongArch::BI__builtin_lasx_xvmini_w: ++ case LoongArch::BI__builtin_lasx_xvmini_d: ++ case LoongArch::BI__builtin_lsx_vmini_d: ++ i = 1; ++ l = -16; ++ u = 15; ++ break; ++ // These intrinsics take a signed 9 bit immediate. ++ case LoongArch::BI__builtin_lasx_xvldrepl_d: ++ case LoongArch::BI__builtin_lsx_vldrepl_d: ++ i = 1; ++ l = -256; ++ u = 255; ++ break; ++ // These intrinsics take an unsigned 8 bit immediate. ++ case LoongArch::BI__builtin_lsx_vandi_b: ++ case LoongArch::BI__builtin_lasx_xvandi_b: ++ case LoongArch::BI__builtin_lsx_vnori_b: ++ case LoongArch::BI__builtin_lasx_xvnori_b: ++ case LoongArch::BI__builtin_lsx_vori_b: ++ case LoongArch::BI__builtin_lasx_xvori_b: ++ case LoongArch::BI__builtin_lsx_vshuf4i_b: ++ case LoongArch::BI__builtin_lsx_vshuf4i_h: ++ case LoongArch::BI__builtin_lsx_vshuf4i_w: ++ case LoongArch::BI__builtin_lasx_xvshuf4i_b: ++ case LoongArch::BI__builtin_lasx_xvshuf4i_h: ++ case LoongArch::BI__builtin_lasx_xvshuf4i_w: ++ case LoongArch::BI__builtin_lasx_xvxori_b: ++ case LoongArch::BI__builtin_lasx_xvpermi_d: ++ case LoongArch::BI__builtin_lsx_vxori_b: ++ i = 1; ++ l = 0; ++ u = 255; ++ break; ++ case LoongArch::BI__builtin_lsx_vbitseli_b: ++ case LoongArch::BI__builtin_lasx_xvbitseli_b: ++ case LoongArch::BI__builtin_lsx_vshuf4i_d: ++ case LoongArch::BI__builtin_lasx_xvshuf4i_d: ++ case LoongArch::BI__builtin_lsx_vextrins_b: ++ case LoongArch::BI__builtin_lsx_vextrins_h: ++ case LoongArch::BI__builtin_lsx_vextrins_w: ++ case LoongArch::BI__builtin_lsx_vextrins_d: ++ case LoongArch::BI__builtin_lasx_xvextrins_b: ++ case LoongArch::BI__builtin_lasx_xvextrins_h: ++ case LoongArch::BI__builtin_lasx_xvextrins_w: ++ case LoongArch::BI__builtin_lasx_xvextrins_d: ++ case LoongArch::BI__builtin_lasx_xvpermi_q: ++ case LoongArch::BI__builtin_lsx_vpermi_w: ++ case LoongArch::BI__builtin_lasx_xvpermi_w: ++ i = 2; ++ l = 0; ++ u = 255; ++ break; ++ // df/n format ++ // These intrinsics take an unsigned 4 bit immediate. ++ case LoongArch::BI__builtin_lsx_vpickve2gr_b: ++ case LoongArch::BI__builtin_lsx_vpickve2gr_bu: ++ case LoongArch::BI__builtin_lasx_xvrepl128vei_b: ++ case LoongArch::BI__builtin_lsx_vreplvei_b: ++ i = 1; ++ l = 0; ++ u = 15; ++ break; ++ case LoongArch::BI__builtin_lsx_vinsgr2vr_b: ++ i = 2; ++ l = 0; ++ u = 15; ++ break; ++ case LoongArch::BI__builtin_lasx_xvstelm_h: ++ case LoongArch::BI__builtin_lsx_vstelm_b: ++ return SemaBuiltinConstantArgRange(TheCall, 2, -128, 127) || ++ SemaBuiltinConstantArgRange(TheCall, 3, 0, 15); ++ // These intrinsics take an unsigned 3 bit immediate. ++ case LoongArch::BI__builtin_lsx_vpickve2gr_h: ++ case LoongArch::BI__builtin_lsx_vpickve2gr_hu: ++ case LoongArch::BI__builtin_lasx_xvrepl128vei_h: ++ case LoongArch::BI__builtin_lasx_xvpickve2gr_w: ++ case LoongArch::BI__builtin_lasx_xvpickve2gr_wu: ++ case LoongArch::BI__builtin_lasx_xvpickve_w: ++ case LoongArch::BI__builtin_lsx_vreplvei_h: ++ i = 1; ++ l = 0; ++ u = 7; ++ break; ++ case LoongArch::BI__builtin_lsx_vinsgr2vr_h: ++ case LoongArch::BI__builtin_lasx_xvinsgr2vr_w: ++ case LoongArch::BI__builtin_lasx_xvinsve0_w: ++ i = 2; ++ l = 0; ++ u = 7; ++ break; ++ case LoongArch::BI__builtin_lasx_xvstelm_w: ++ case LoongArch::BI__builtin_lsx_vstelm_h: ++ return SemaBuiltinConstantArgRange(TheCall, 2, -128, 127) || ++ SemaBuiltinConstantArgRange(TheCall, 3, 0, 7); ++ // These intrinsics take an unsigned 2 bit immediate. ++ case LoongArch::BI__builtin_lsx_vpickve2gr_w: ++ case LoongArch::BI__builtin_lsx_vpickve2gr_wu: ++ case LoongArch::BI__builtin_lasx_xvrepl128vei_w: ++ case LoongArch::BI__builtin_lasx_xvpickve2gr_d: ++ case LoongArch::BI__builtin_lasx_xvpickve2gr_du: ++ case LoongArch::BI__builtin_lasx_xvpickve_d: ++ case LoongArch::BI__builtin_lsx_vreplvei_w: ++ i = 1; ++ l = 0; ++ u = 3; ++ break; ++ case LoongArch::BI__builtin_lsx_vinsgr2vr_w: ++ case LoongArch::BI__builtin_lasx_xvinsve0_d: ++ case LoongArch::BI__builtin_lasx_xvinsgr2vr_d: ++ i = 2; ++ l = 0; ++ u = 3; ++ break; ++ case LoongArch::BI__builtin_lasx_xvstelm_d: ++ case LoongArch::BI__builtin_lsx_vstelm_w: ++ return SemaBuiltinConstantArgRange(TheCall, 2, -128, 127) || ++ SemaBuiltinConstantArgRange(TheCall, 3, 0, 3); ++ // These intrinsics take an unsigned 1 bit immediate. ++ case LoongArch::BI__builtin_lsx_vpickve2gr_d: ++ case LoongArch::BI__builtin_lsx_vpickve2gr_du: ++ case LoongArch::BI__builtin_lasx_xvrepl128vei_d: ++ case LoongArch::BI__builtin_lsx_vreplvei_d: ++ i = 1; ++ l = 0; ++ u = 1; ++ break; ++ case LoongArch::BI__builtin_lsx_vinsgr2vr_d: ++ i = 2; ++ l = 0; ++ u = 1; ++ break; ++ case LoongArch::BI__builtin_lsx_vstelm_d: ++ return SemaBuiltinConstantArgRange(TheCall, 2, -128, 127) || ++ SemaBuiltinConstantArgRange(TheCall, 3, 0, 1); ++ // Memory offsets and immediate loads. ++ // These intrinsics take a signed 10 bit immediate. ++ case LoongArch::BI__builtin_lsx_vrepli_b: ++ case LoongArch::BI__builtin_lsx_vrepli_h: ++ case LoongArch::BI__builtin_lsx_vrepli_w: ++ case LoongArch::BI__builtin_lsx_vrepli_d: ++ case LoongArch::BI__builtin_lasx_xvrepli_b: ++ case LoongArch::BI__builtin_lasx_xvrepli_h: ++ case LoongArch::BI__builtin_lasx_xvrepli_w: ++ case LoongArch::BI__builtin_lasx_xvrepli_d: ++ i = 0; ++ l = -512; ++ u = 511; ++ break; ++ case LoongArch::BI__builtin_lasx_xvldrepl_w: ++ case LoongArch::BI__builtin_lsx_vldrepl_w: ++ i = 1; ++ l = -512; ++ u = 511; ++ break; ++ case LoongArch::BI__builtin_lasx_xvldrepl_h: ++ case LoongArch::BI__builtin_lsx_vldrepl_h: ++ i = 1; ++ l = -1024; ++ u = 1023; ++ break; ++ case LoongArch::BI__builtin_lasx_xvldrepl_b: ++ case LoongArch::BI__builtin_lsx_vldrepl_b: ++ i = 1; ++ l = -2048; ++ u = 2047; ++ break; ++ case LoongArch::BI__builtin_lasx_xvld: ++ case LoongArch::BI__builtin_lsx_vld: ++ i = 1; ++ l = -2048; ++ u = 2047; ++ break; ++ case LoongArch::BI__builtin_lsx_vst: ++ case LoongArch::BI__builtin_lasx_xvst: ++ i = 2; ++ l = -2048; ++ u = 2047; ++ break; ++ case LoongArch::BI__builtin_lasx_xvldi: ++ case LoongArch::BI__builtin_lsx_vldi: ++ i = 0; ++ l = -4096; ++ u = 4095; ++ break; ++ // These intrinsics take an unsigned 5 bit immediate and a signed 12 bit immediate. ++ case LoongArch::BI__builtin_loongarch_cacop_w: ++ case LoongArch::BI__builtin_loongarch_cacop_d: ++ return SemaBuiltinConstantArgRange(TheCall, 0, 0, 31) || ++ SemaBuiltinConstantArgRange(TheCall, 2, -2048, 2047); ++ // These intrinsics take an unsigned 14 bit immediate. ++ case LoongArch::BI__builtin_loongarch_csrrd_w: ++ case LoongArch::BI__builtin_loongarch_csrrd_d: ++ i = 0; ++ l = 0; ++ u = 16383; ++ break; ++ case LoongArch::BI__builtin_loongarch_csrwr_w: ++ case LoongArch::BI__builtin_loongarch_csrwr_d: ++ i = 1; ++ l = 0; ++ u = 16383; ++ break; ++ case LoongArch::BI__builtin_loongarch_csrxchg_w: ++ case LoongArch::BI__builtin_loongarch_csrxchg_d: ++ i = 2; ++ l = 0; ++ u = 16383; ++ break; ++ // These intrinsics take an unsigned 15 bit immediate. ++ case LoongArch::BI__builtin_loongarch_dbar: ++ case LoongArch::BI__builtin_loongarch_ibar: ++ case LoongArch::BI__builtin_loongarch_syscall: ++ case LoongArch::BI__builtin_loongarch_break: i = 0; l = 0; u = 32767; break; ++ } ++ ++ if (!m) ++ return SemaBuiltinConstantArgRange(TheCall, i, l, u); ++ ++ return SemaBuiltinConstantArgRange(TheCall, i, l, u) || ++ SemaBuiltinConstantArgMultiple(TheCall, i, m); ++} ++ + bool Sema::CheckSystemZBuiltinFunctionCall(unsigned BuiltinID, + CallExpr *TheCall) { + if (BuiltinID == SystemZ::BI__builtin_tabort) { +diff --git a/clang/lib/Sema/SemaDeclAttr.cpp b/clang/lib/Sema/SemaDeclAttr.cpp +index 6f462de4b..4d4126a7f 100644 +--- a/clang/lib/Sema/SemaDeclAttr.cpp ++++ b/clang/lib/Sema/SemaDeclAttr.cpp +@@ -3369,22 +3369,6 @@ static void handleSectionAttr(Sema &S, Decl *D, const ParsedAttr &AL) { + } + } + +-static void handleCodeModelAttr(Sema &S, Decl *D, const ParsedAttr &AL) { +- StringRef Str; +- SourceLocation LiteralLoc; +- // Check that it is a string. +- if (!S.checkStringLiteralArgumentAttr(AL, 0, Str, &LiteralLoc)) +- return; +- +- llvm::CodeModel::Model CM; +- if (!CodeModelAttr::ConvertStrToModel(Str, CM)) { +- S.Diag(LiteralLoc, diag::err_attr_codemodel_arg) << Str; +- return; +- } +- +- D->addAttr(::new (S.Context) CodeModelAttr(S.Context, AL, CM)); +-} +- + // This is used for `__declspec(code_seg("segname"))` on a decl. + // `#pragma code_seg("segname")` uses checkSectionName() instead. + static bool checkCodeSegName(Sema &S, SourceLocation LiteralLoc, +@@ -9458,9 +9442,6 @@ ProcessDeclAttribute(Sema &S, Scope *scope, Decl *D, const ParsedAttr &AL, + case ParsedAttr::AT_Section: + handleSectionAttr(S, D, AL); + break; +- case ParsedAttr::AT_CodeModel: +- handleCodeModelAttr(S, D, AL); +- break; + case ParsedAttr::AT_RandomizeLayout: + handleRandomizeLayoutAttr(S, D, AL); + break; +diff --git a/clang/test/CodeGen/LoongArch/abi-lp64d-empty-structs.c b/clang/test/CodeGen/LoongArch/abi-lp64d-empty-structs.c +index 2f7596f0e..2affc5a3d 100644 +--- a/clang/test/CodeGen/LoongArch/abi-lp64d-empty-structs.c ++++ b/clang/test/CodeGen/LoongArch/abi-lp64d-empty-structs.c +@@ -1,6 +1,6 @@ +-// RUN: %clang_cc1 -triple loongarch64 -target-feature +f -target-feature +d -target-abi lp64d -emit-llvm %s -o - | \ ++// RUN: %clang_cc1 -triple loongarch64 -target-abi lp64 -emit-llvm %s -o - | \ + // RUN: FileCheck --check-prefix=CHECK-C %s +-// RUN: %clang_cc1 -triple loongarch64 -target-feature +f -target-feature +d -target-abi lp64d -emit-llvm %s -o - -x c++ | \ ++// RUN: %clang_cc1 -triple loongarch64 -target-abi lp64 -emit-llvm %s -o - -x c++ | \ + // RUN: FileCheck --check-prefix=CHECK-CXX %s + + // Fields containing empty structs are ignored when flattening +@@ -14,32 +14,32 @@ + struct empty { struct { struct { } e; }; }; + struct s1 { struct empty e; float f; }; + +-// CHECK-C: define{{.*}} float @test_s1(float {{.*}}) +-// CHECK-CXX: define{{.*}} float @_Z7test_s12s1(float {{.*}}) ++// CHECK-C: define{{.*}} float @test_s1(float{{.*}}) ++// CHECK-CXX: define{{.*}} float @_Z7test_s12s1(float{{.*}}) + struct s1 test_s1(struct s1 a) { + return a; + } + + struct s2 { struct empty e; int32_t i; float f; }; + +-// CHECK-C: define{{.*}} { i32, float } @test_s2(i32 {{.*}}, float {{.*}}) +-// CHECK-CXX: define{{.*}} { i32, float } @_Z7test_s22s2(i32 {{.*}}, float {{.*}}) ++// CHECK-C: define{{.*}} { i32, float } @test_s2(i32{{.*}}, float{{.*}}) ++// CHECK-CXX: define{{.*}} { i32, float } @_Z7test_s22s2(i32{{.*}}, float{{.*}}) + struct s2 test_s2(struct s2 a) { + return a; + } + + struct s3 { struct empty e; float f; float g; }; + +-// CHECK-C: define{{.*}} { float, float } @test_s3(float {{.*}}, float {{.*}}) +-// CHECK-CXX: define{{.*}} { float, float } @_Z7test_s32s3(float {{.*}}, float {{.*}}) ++// CHECK-C: define{{.*}} { float, float } @test_s3(float{{.*}}, float{{.*}}) ++// CHECK-CXX: define{{.*}} { float, float } @_Z7test_s32s3(float{{.*}}, float{{.*}}) + struct s3 test_s3(struct s3 a) { + return a; + } + + struct s4 { struct empty e; float __complex__ c; }; + +-// CHECK-C: define{{.*}} { float, float } @test_s4(float {{.*}}, float {{.*}}) +-// CHECK-CXX: define{{.*}} { float, float } @_Z7test_s42s4(float {{.*}}, float {{.*}}) ++// CHECK-C: define{{.*}} { float, float } @test_s4(float{{.*}}, float{{.*}}) ++// CHECK-CXX: define{{.*}} { float, float } @_Z7test_s42s4(float{{.*}}, float{{.*}}) + struct s4 test_s4(struct s4 a) { + return a; + } +@@ -49,8 +49,8 @@ struct s4 test_s4(struct s4 a) { + + struct s5 { struct empty e[1]; float f; }; + +-// CHECK-C: define{{.*}} float @test_s5(float {{.*}}) +-// CHECK-CXX: define{{.*}} i64 @_Z7test_s52s5(i64 {{.*}}) ++// CHECK-C: define{{.*}} float @test_s5(float{{.*}}) ++// CHECK-CXX: define{{.*}} i64 @_Z7test_s52s5(i64{{.*}}) + struct s5 test_s5(struct s5 a) { + return a; + } +@@ -58,16 +58,16 @@ struct s5 test_s5(struct s5 a) { + struct empty_arr { struct { struct { } e[1]; }; }; + struct s6 { struct empty_arr e; float f; }; + +-// CHECK-C: define{{.*}} float @test_s6(float {{.*}}) +-// CHECK-CXX: define{{.*}} i64 @_Z7test_s62s6(i64 {{.*}}) ++// CHECK-C: define{{.*}} float @test_s6(float{{.*}}) ++// CHECK-CXX: define{{.*}} i64 @_Z7test_s62s6(i64{{.*}}) + struct s6 test_s6(struct s6 a) { + return a; + } + + struct s7 { struct empty e[0]; float f; }; + +-// CHECK-C: define{{.*}} float @test_s7(float {{.*}}) +-// CHECK-CXX: define{{.*}} float @_Z7test_s72s7(float {{.*}}) ++// CHECK-C: define{{.*}} float @test_s7(float{{.*}}) ++// CHECK-CXX: define{{.*}} float @_Z7test_s72s7(float{{.*}}) + struct s7 test_s7(struct s7 a) { + return a; + } +@@ -75,8 +75,8 @@ struct s7 test_s7(struct s7 a) { + struct empty_arr0 { struct { struct { } e[0]; }; }; + struct s8 { struct empty_arr0 e; float f; }; + +-// CHECK-C: define{{.*}} float @test_s8(float {{.*}}) +-// CHECK-CXX: define{{.*}} float @_Z7test_s82s8(float {{.*}}) ++// CHECK-C: define{{.*}} float @test_s8(float{{.*}}) ++// CHECK-CXX: define{{.*}} float @_Z7test_s82s8(float{{.*}}) + struct s8 test_s8(struct s8 a) { + return a; + } +@@ -87,7 +87,9 @@ struct s8 test_s8(struct s8 a) { + + // CHECK-C: define{{.*}} void @test_s9() + // CHECK-CXX: define{{.*}} i64 @_Z7test_s92s9(i64 {{.*}}) +-struct s9 { struct empty e; }; ++struct s9 { ++ struct empty e; ++}; + struct s9 test_s9(struct s9 a) { + return a; + } +diff --git a/clang/test/CodeGen/LoongArch/abi-lp64d-empty-unions.c b/clang/test/CodeGen/LoongArch/abi-lp64d-empty-unions.c +index 363e37efb..771b1837f 100644 +--- a/clang/test/CodeGen/LoongArch/abi-lp64d-empty-unions.c ++++ b/clang/test/CodeGen/LoongArch/abi-lp64d-empty-unions.c +@@ -1,13 +1,13 @@ +-// RUN: %clang_cc1 -triple loongarch64 -target-feature +f -target-feature +d -target-abi lp64d -emit-llvm %s -o - | \ ++// RUN: %clang_cc1 -triple loongarch64 -target-abi lp64 -emit-llvm %s -o - | \ + // RUN: FileCheck --check-prefix=CHECK-C %s +-// RUN: %clang_cc1 -triple loongarch64 -target-feature +f -target-feature +d -target-abi lp64d -emit-llvm %s -o - -x c++ | \ ++// RUN: %clang_cc1 -triple loongarch64 -target-abi lp64 -emit-llvm %s -o - -x c++ | \ + // RUN: FileCheck --check-prefix=CHECK-CXX %s + + #include + + // CHECK-C: define{{.*}} void @test1() + // CHECK-CXX: define{{.*}} i64 @_Z5test12u1(i64{{[^,]*}}) +-union u1 { }; ++union u1 {}; + union u1 test1(union u1 a) { + return a; + } +diff --git a/clang/test/CodeGen/LoongArch/abi-lp64d.c b/clang/test/CodeGen/LoongArch/abi-lp64d.c +index 66b480a7f..51174cd31 100644 +--- a/clang/test/CodeGen/LoongArch/abi-lp64d.c ++++ b/clang/test/CodeGen/LoongArch/abi-lp64d.c +@@ -1,5 +1,5 @@ +-// RUN: %clang_cc1 -triple loongarch64 -target-feature +f -target-feature +d -target-abi lp64d \ +-// RUN: -emit-llvm %s -o - | FileCheck %s ++// RUN: %clang_cc1 -triple loongarch64 -target-abi lp64 \ ++ // RUN: -emit-llvm %s -o - | FileCheck %s + + /// This test checks the calling convention of the lp64d ABI. + +@@ -59,6 +59,7 @@ long double check_longdouble() { return 0; } + + /// Part 1: Scalar arguments and return value. + ++/// The lp64d abi says: + /// 1. 1 < WOA <= GRLEN + /// a. Argument is passed in a single argument register, or on the stack by + /// value if none is available. +@@ -80,19 +81,21 @@ long double check_longdouble() { return 0; } + /// bits are passed on the stack. If no GAR is available, it’s passed on the + /// stack. + +-/// Note that most of these conventions are handled by the backend, so here we ++/// Note that most of these conventions are handled at the llvm side, so here we + /// only check the correctness of argument (or return value)'s sign/zero + /// extension attribute. + +-// CHECK-LABEL: define{{.*}} signext i32 @f_scalar(i1 noundef zeroext %a, i8 noundef signext %b, i8 noundef zeroext %c, i16 noundef signext %d, i16 noundef zeroext %e, i32 noundef signext %f, i32 noundef signext %g, i64 noundef %h, i1 noundef zeroext %i, i8 noundef signext %j, i8 noundef zeroext %k, i16 noundef signext %l, i16 noundef zeroext %m, i32 noundef signext %n, i32 noundef signext %o, i64 noundef %p) ++// CHECK-LABEL: define{{.*}} signext i32 @f_scalar(i1{{.*}} zeroext %a, i8{{.*}} signext %b, i8{{.*}} zeroext %c, i16{{.*}} signext %d, i16{{.*}} zeroext %e, i32{{.*}} signext %f, i32{{.*}} signext %g, i64{{.*}} %h, i1{{.*}} zeroext %i, i8{{.*}} signext %j, i8{{.*}} zeroext %k, i16{{.*}} signext %l, i16{{.*}} zeroext %m, i32{{.*}} signext %n, i32{{.*}} signext %o, i64{{.*}} %p) + int f_scalar(_Bool a, int8_t b, uint8_t c, int16_t d, uint16_t e, int32_t f, +- uint32_t g, int64_t h, _Bool i, int8_t j, uint8_t k, int16_t l, +- uint16_t m, int32_t n, uint32_t o, int64_t p) { ++ uint32_t g, int64_t h, /* begin of stack passing -> */ _Bool i, ++ int8_t j, uint8_t k, int16_t l, uint16_t m, int32_t n, ++ uint32_t o, int64_t p) { + return 0; + } + + /// Part 2: Structure arguments and return value. + ++/// The lp64d abi says: + /// Empty structures are ignored by C compilers which support them as a + /// non-standard extension(same as union arguments and return values). Bits + /// unused due to padding, and bits past the end of a structure whose size in +@@ -321,7 +324,7 @@ struct i64x4_s { + int64_t a, b, c, d; + }; + +-// CHECK-LABEL: define{{.*}} void @f_i64x4_s(ptr{{.*}} sret(%struct.i64x4_s) align 8 %agg.result, ptr{{.*}} %x) ++// CHECK-LABEL: define{{.*}} void @f_i64x4_s(ptr{{.*}} sret(%struct.i64x4_s){{.*}} %agg.result, ptr{{.*}} %x) + struct i64x4_s f_i64x4_s(struct i64x4_s x) { + return x; + } +@@ -330,7 +333,7 @@ struct f64x4_s { + double a, b, c, d; + }; + +-// CHECK-LABEL: define{{.*}} void @f_f64x4_s(ptr{{.*}} sret(%struct.f64x4_s) align 8 %agg.result, ptr{{.*}} %x) ++// CHECK-LABEL: define{{.*}} void @f_f64x4_s(ptr{{.*}} sret(%struct.f64x4_s){{.*}} %agg.result, ptr{{.*}} %x) + struct f64x4_s f_f64x4_s(struct f64x4_s x) { + return x; + } +@@ -397,7 +400,7 @@ union i64_arr3_u { + int64_t a[3]; + }; + +-// CHECK-LABEL: define{{.*}} void @f_i64_arr3_u(ptr{{.*}} sret(%union.i64_arr3_u) align 8 %agg.result, ptr{{.*}} %x) ++// CHECK-LABEL: define{{.*}} void @f_i64_arr3_u(ptr{{.*}} sret(%union.i64_arr3_u){{.*}} %agg.result, ptr{{.*}} %x) + union i64_arr3_u f_i64_arr3_u(union i64_arr3_u x) { + return x; + } +@@ -408,10 +411,10 @@ union i64_arr3_u f_i64_arr3_u(union i64_arr3_u x) { + /// floating-point number, is passed as though it were a structure containing + /// two floating-point reals. + +-// CHECK-LABEL: define{{.*}} { float, float } @f_floatcomplex(float noundef %x.coerce0, float noundef %x.coerce1) ++// CHECK-LABEL: define{{.*}} { float, float } @f_floatcomplex(float{{.*}} %x.coerce0, float{{.*}} %x.coerce1) + float __complex__ f_floatcomplex(float __complex__ x) { return x; } + +-// CHECK-LABEL: define{{.*}} { double, double } @f_doublecomplex(double noundef %x.coerce0, double noundef %x.coerce1) ++// CHECK-LABEL: define{{.*}} { double, double } @f_doublecomplex(double{{.*}} %x.coerce0, double{{.*}} %x.coerce1) + double __complex__ f_doublecomplex(double __complex__ x) { return x; } + + struct floatcomplex_s { +@@ -430,6 +433,36 @@ struct doublecomplex_s f_doublecomplex_s(struct doublecomplex_s x) { + return x; + } + ++/// Complex floating-point values or structs containing a single complex ++/// floating-point value should be passed in GPRs if no two FPRs is available. ++ ++ ++// CHECK: define{{.*}} void @f_floatcomplex_insufficient_fprs1(float{{.*}} %a.coerce0, float{{.*}} %a.coerce1, float{{.*}} %b.coerce0, float{{.*}} %b.coerce1, float{{.*}} %c.coerce0, float{{.*}} %c.coerce1, float{{.*}} %d.coerce0, float{{.*}} %d.coerce1, i64{{.*}} %e.coerce) ++void f_floatcomplex_insufficient_fprs1(float __complex__ a, float __complex__ b, ++ float __complex__ c, float __complex__ d, ++ float __complex__ e) {} ++ ++ ++// CHECK: define{{.*}} void @f_floatcomplex_s_arg_insufficient_fprs1(float{{.*}} %0, float{{.*}} %1, float{{.*}} %2, float{{.*}} %3, float{{.*}} %4, float{{.*}} %5, float{{.*}} %6, float{{.*}} %7, i64 %e.coerce) ++void f_floatcomplex_s_arg_insufficient_fprs1(struct floatcomplex_s a, ++ struct floatcomplex_s b, ++ struct floatcomplex_s c, ++ struct floatcomplex_s d, ++ struct floatcomplex_s e) {} ++ ++// CHECK: define{{.*}} void @f_floatcomplex_insufficient_fprs2(float{{.*}} %a, float{{.*}} %b.coerce0, float{{.*}} %b.coerce1, float{{.*}} %c.coerce0, float{{.*}} %c.coerce1, float{{.*}} %d.coerce0, float{{.*}} %d.coerce1, i64{{.*}} %e.coerce) ++void f_floatcomplex_insufficient_fprs2(float a, ++ float __complex__ b, float __complex__ c, ++ float __complex__ d, float __complex__ e) {} ++ ++ ++// CHECK: define{{.*}} void @f_floatcomplex_s_arg_insufficient_fprs2(float{{.*}} %a, float{{.*}} %0, float{{.*}} %1, float{{.*}} %2, float{{.*}} %3, float{{.*}} %4, float{{.*}} %5, i64{{.*}} %e.coerce) ++void f_floatcomplex_s_arg_insufficient_fprs2(float a, ++ struct floatcomplex_s b, ++ struct floatcomplex_s c, ++ struct floatcomplex_s d, ++ struct floatcomplex_s e) {} ++ + /// Part 5: Variadic arguments. + + /// Variadic arguments are passed in GARs in the same manner as named arguments. +@@ -437,27 +470,28 @@ struct doublecomplex_s f_doublecomplex_s(struct doublecomplex_s x) { + int f_va_callee(int, ...); + + // CHECK-LABEL: define{{.*}} void @f_va_caller() +-// CHECK: call signext i32 (i32, ...) @f_va_callee(i32 noundef signext 1, i32 noundef signext 2, i64 noundef 3, double noundef 4.000000e+00, double noundef 5.000000e+00, i64 {{.*}}, [2 x i64] {{.*}}) ++// CHECK: call signext i32 (i32, ...) @f_va_callee(i32{{.*}} signext 1, i32{{.*}} signext 2, i64{{.*}} 3, double{{.*}} 4.000000e+00, double{{.*}} 5.000000e+00, i64 {{.*}}, i64 {{.*}}, i64 {{.*}}) + void f_va_caller(void) { + f_va_callee(1, 2, 3LL, 4.0f, 5.0, (struct i16x4_s){6, 7, 8, 9}, + (struct i64x2_s){10, 11}); + } + +-// CHECK-LABEL: @f_va_int( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[FMT_ADDR:%.*]] = alloca ptr, align 8 +-// CHECK-NEXT: [[VA:%.*]] = alloca ptr, align 8 +-// CHECK-NEXT: [[V:%.*]] = alloca i32, align 4 +-// CHECK-NEXT: store ptr [[FMT:%.*]], ptr [[FMT_ADDR]], align 8 +-// CHECK-NEXT: call void @llvm.va_start(ptr [[VA]]) +-// CHECK-NEXT: [[ARGP_CUR:%.*]] = load ptr, ptr [[VA]], align 8 +-// CHECK-NEXT: [[ARGP_NEXT:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR]], i64 8 +-// CHECK-NEXT: store ptr [[ARGP_NEXT]], ptr [[VA]], align 8 +-// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARGP_CUR]], align 8 +-// CHECK-NEXT: store i32 [[TMP0]], ptr [[V]], align 4 +-// CHECK-NEXT: call void @llvm.va_end(ptr [[VA]]) +-// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[V]], align 4 +-// CHECK-NEXT: ret i32 [[TMP1]] ++// CHECK-LABE: define signext i32 @f_va_int(ptr{{.*}} %fmt, ...) ++// CHECK: entry: ++// CHECK: %fmt.addr = alloca ptr, align 8 ++// CHECK: %va = alloca ptr, align 8 ++// CHECK: %v = alloca i32, align 4 ++// CHECK: store ptr %fmt, ptr %fmt.addr, align 8 ++// CHECK: call void @llvm.va_start(ptr %va) ++// CHECK: %argp.cur = load ptr, ptr %va, align 8 ++// CHECK: %argp.next = getelementptr inbounds i8, ptr %argp.cur, i64 8 ++// CHECK: store ptr %argp.next, ptr %va, align 8 ++// CHECK: %0 = load i32, ptr %argp.cur, align 8 ++// CHECK: store i32 %0, ptr %v, align 4 ++// CHECK: call void @llvm.va_end(ptr %va) ++// CHECK: %1 = load i32, ptr %v, align 4 ++// CHECK: ret i32 %1 ++// CHECK: } + int f_va_int(char *fmt, ...) { + __builtin_va_list va; + __builtin_va_start(va, fmt); +@@ -465,24 +499,3 @@ int f_va_int(char *fmt, ...) { + __builtin_va_end(va); + return v; + } +- +-/// Part 6. Structures with zero size fields (bitfields or arrays). +- +-/// Check that zero size fields in structure are ignored. +-/// Note that this rule is not explicitly documented in ABI spec but it matches +-/// GCC's behavior. +- +-struct f64x2_zsfs_s { +- double a; +- int : 0; +- __int128_t : 0; +- int b[0]; +- __int128_t c[0]; +- double d; +-}; +- +-// CHECK-LABEL: define{{.*}} { double, double } @f_f64x2_zsfs_s(double %0, double %1) +-struct f64x2_zsfs_s f_f64x2_zsfs_s(struct f64x2_zsfs_s x) { +- return x; +-} +- +diff --git a/clang/test/CodeGen/LoongArch/atomics.c b/clang/test/CodeGen/LoongArch/atomics.c +deleted file mode 100644 +index edc58d30d..000000000 +--- a/clang/test/CodeGen/LoongArch/atomics.c ++++ /dev/null +@@ -1,45 +0,0 @@ +-// RUN: %clang_cc1 -triple loongarch32 -O1 -emit-llvm %s -o - \ +-// RUN: | FileCheck %s --check-prefix=LA32 +-// RUN: %clang_cc1 -triple loongarch64 -O1 -emit-llvm %s -o - \ +-// RUN: | FileCheck %s --check-prefix=LA64 +- +-/// This test demonstrates that MaxAtomicInlineWidth is set appropriately. +- +-#include +-#include +- +-void test_i8_atomics(_Atomic(int8_t) * a, int8_t b) { +- // LA32: load atomic i8, ptr %a seq_cst, align 1 +- // LA32: store atomic i8 %b, ptr %a seq_cst, align 1 +- // LA32: atomicrmw add ptr %a, i8 %b seq_cst +- // LA64: load atomic i8, ptr %a seq_cst, align 1 +- // LA64: store atomic i8 %b, ptr %a seq_cst, align 1 +- // LA64: atomicrmw add ptr %a, i8 %b seq_cst +- __c11_atomic_load(a, memory_order_seq_cst); +- __c11_atomic_store(a, b, memory_order_seq_cst); +- __c11_atomic_fetch_add(a, b, memory_order_seq_cst); +-} +- +-void test_i32_atomics(_Atomic(int32_t) * a, int32_t b) { +- // LA32: load atomic i32, ptr %a seq_cst, align 4 +- // LA32: store atomic i32 %b, ptr %a seq_cst, align 4 +- // LA32: atomicrmw add ptr %a, i32 %b seq_cst +- // LA64: load atomic i32, ptr %a seq_cst, align 4 +- // LA64: store atomic i32 %b, ptr %a seq_cst, align 4 +- // LA64: atomicrmw add ptr %a, i32 %b seq_cst +- __c11_atomic_load(a, memory_order_seq_cst); +- __c11_atomic_store(a, b, memory_order_seq_cst); +- __c11_atomic_fetch_add(a, b, memory_order_seq_cst); +-} +- +-void test_i64_atomics(_Atomic(int64_t) * a, int64_t b) { +- // LA32: call i64 @__atomic_load_8 +- // LA32: call void @__atomic_store_8 +- // LA32: call i64 @__atomic_fetch_add_8 +- // LA64: load atomic i64, ptr %a seq_cst, align 8 +- // LA64: store atomic i64 %b, ptr %a seq_cst, align 8 +- // LA64: atomicrmw add ptr %a, i64 %b seq_cst +- __c11_atomic_load(a, memory_order_seq_cst); +- __c11_atomic_store(a, b, memory_order_seq_cst); +- __c11_atomic_fetch_add(a, b, memory_order_seq_cst); +-} +diff --git a/clang/test/CodeGen/LoongArch/attributes.cpp b/clang/test/CodeGen/LoongArch/attributes.cpp +deleted file mode 100644 +index fb700ad30..000000000 +--- a/clang/test/CodeGen/LoongArch/attributes.cpp ++++ /dev/null +@@ -1,34 +0,0 @@ +-// RUN: %clang_cc1 -emit-llvm -triple loongarch64 %s -o - | FileCheck %s +- +-// CHECK: @_ZL2v1 ={{.*}} global i32 0, code_model "small" +-static int v1 __attribute__((model("normal"))); +- +-void use1() { +- v1 = 1; +-} +- +-// CHECK: @v2 ={{.*}} global i32 0, code_model "medium" +-int v2 __attribute__((model("medium"))); +- +-// CHECK: @v3 ={{.*}} global float 0.000000e+00, code_model "large" +-float v3 __attribute__((model("extreme"))); +- +-// CHECK: @_ZL2v4IiE ={{.*}} global i32 0, code_model "medium" +-template +-static T v4 __attribute__((model("medium"))); +- +-void use2() { +- v4 = 1; +-} +- +-struct S { +- double d; +-}; +- +-// CHECK: @v5 ={{.*}} global {{.*}}, code_model "medium" +-S v5 __attribute__((model("medium"))); +- +-typedef void (*F)(); +- +-// CHECK: @v6 ={{.*}} global ptr null, code_model "large" +-F v6 __attribute__((model("extreme"))); +diff --git a/clang/test/CodeGen/LoongArch/builtin-dbl-approximate.c b/clang/test/CodeGen/LoongArch/builtin-dbl-approximate.c +index e5fe68434..35a19323b 100644 +--- a/clang/test/CodeGen/LoongArch/builtin-dbl-approximate.c ++++ b/clang/test/CodeGen/LoongArch/builtin-dbl-approximate.c +@@ -1,45 +1,40 @@ +-// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +-// RUN: %clang_cc1 -triple loongarch32 -target-feature +d -target-feature +frecipe -O2 -emit-llvm %s -o - | FileCheck %s +-// RUN: %clang_cc1 -triple loongarch64 -target-feature +d -target-feature +frecipe -O2 -emit-llvm %s -o - | FileCheck %s ++// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py ++// RUN: %clang_cc1 -triple loongarch64 -O2 -emit-llvm %s -o - | FileCheck %s + + #include + +-// CHECK-LABEL: @frecipe_d ++// CHECK-LABEL: @frecipe_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call double @llvm.loongarch.frecipe.d(double [[A:%.*]]) ++// CHECK-NEXT: [[TMP0:%.*]] = tail call double @llvm.loongarch.frecipe.d(double [[_1:%.*]]) + // CHECK-NEXT: ret double [[TMP0]] + // +-double frecipe_d (double _1) +-{ +- return __builtin_loongarch_frecipe_d (_1); ++double frecipe_d(double _1) { ++ return __builtin_loongarch_frecipe_d(_1); + } + +-// CHECK-LABEL: @frsqrte_d ++// CHECK-LABEL: @frsqrte_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call double @llvm.loongarch.frsqrte.d(double [[A:%.*]]) ++// CHECK-NEXT: [[TMP0:%.*]] = tail call double @llvm.loongarch.frsqrte.d(double [[_1:%.*]]) + // CHECK-NEXT: ret double [[TMP0]] + // +-double frsqrte_d (double _1) +-{ +- return __builtin_loongarch_frsqrte_d (_1); ++double frsqrte_d(double _1) { ++ return __builtin_loongarch_frsqrte_d(_1); + } + +-// CHECK-LABEL: @frecipe_d_alia ++// CHECK-LABEL: @frecipe_d_alia( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call double @llvm.loongarch.frecipe.d(double [[A:%.*]]) ++// CHECK-NEXT: [[TMP0:%.*]] = tail call double @llvm.loongarch.frecipe.d(double [[_1:%.*]]) + // CHECK-NEXT: ret double [[TMP0]] + // +-double frecipe_d_alia (double _1) +-{ +- return __frecipe_d (_1); ++double frecipe_d_alia(double _1) { ++ return __frecipe_d(_1); + } + +-// CHECK-LABEL: @frsqrte_d_alia ++// CHECK-LABEL: @frsqrte_d_alia( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call double @llvm.loongarch.frsqrte.d(double [[A:%.*]]) ++// CHECK-NEXT: [[TMP0:%.*]] = tail call double @llvm.loongarch.frsqrte.d(double [[_1:%.*]]) + // CHECK-NEXT: ret double [[TMP0]] + // +-double frsqrte_d_alia (double _1) +-{ +- return __frsqrte_d (_1); ++double frsqrte_d_alia(double _1) { ++ return __frsqrte_d(_1); + } +diff --git a/clang/test/CodeGen/LoongArch/builtin-flt-approximate.c b/clang/test/CodeGen/LoongArch/builtin-flt-approximate.c +index 47bb47084..548c2b6b4 100644 +--- a/clang/test/CodeGen/LoongArch/builtin-flt-approximate.c ++++ b/clang/test/CodeGen/LoongArch/builtin-flt-approximate.c +@@ -1,45 +1,40 @@ +-// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +-// RUN: %clang_cc1 -triple loongarch32 -target-feature +f -target-feature +frecipe -O2 -emit-llvm %s -o - | FileCheck %s +-// RUN: %clang_cc1 -triple loongarch64 -target-feature +f -target-feature +frecipe -O2 -emit-llvm %s -o - | FileCheck %s ++// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py ++// RUN: %clang_cc1 -triple loongarch64 -O2 -emit-llvm %s -o - | FileCheck %s + + #include + +-// CHECK-LABEL: @frecipe_s ++// CHECK-LABEL: @frecipe_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call float @llvm.loongarch.frecipe.s(float [[A:%.*]]) ++// CHECK-NEXT: [[TMP0:%.*]] = tail call float @llvm.loongarch.frecipe.s(float [[_1:%.*]]) + // CHECK-NEXT: ret float [[TMP0]] + // +-float frecipe_s (float _1) +-{ +- return __builtin_loongarch_frecipe_s (_1); ++float frecipe_s(float _1) { ++ return __builtin_loongarch_frecipe_s(_1); + } + +-// CHECK-LABEL: @frsqrte_s ++// CHECK-LABEL: @frsqrte_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call float @llvm.loongarch.frsqrte.s(float [[A:%.*]]) ++// CHECK-NEXT: [[TMP0:%.*]] = tail call float @llvm.loongarch.frsqrte.s(float [[_1:%.*]]) + // CHECK-NEXT: ret float [[TMP0]] + // +-float frsqrte_s (float _1) +-{ +- return __builtin_loongarch_frsqrte_s (_1); ++float frsqrte_s(float _1) { ++ return __builtin_loongarch_frsqrte_s(_1); + } + +-// CHECK-LABEL: @frecipe_s_alia ++// CHECK-LABEL: @frecipe_s_alia( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call float @llvm.loongarch.frecipe.s(float [[A:%.*]]) ++// CHECK-NEXT: [[TMP0:%.*]] = tail call float @llvm.loongarch.frecipe.s(float [[_1:%.*]]) + // CHECK-NEXT: ret float [[TMP0]] + // +-float frecipe_s_alia (float _1) +-{ +- return __frecipe_s (_1); ++float frecipe_s_alia(float _1) { ++ return __frecipe_s(_1); + } + +-// CHECK-LABEL: @frsqrte_s_alia ++// CHECK-LABEL: @frsqrte_s_alia( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call float @llvm.loongarch.frsqrte.s(float [[A:%.*]]) ++// CHECK-NEXT: [[TMP0:%.*]] = tail call float @llvm.loongarch.frsqrte.s(float [[_1:%.*]]) + // CHECK-NEXT: ret float [[TMP0]] + // +-float frsqrte_s_alia (float _1) +-{ +- return __frsqrte_s (_1); ++float frsqrte_s_alia(float _1) { ++ return __frsqrte_s(_1); + } +diff --git a/clang/test/CodeGen/LoongArch/inline-asm-constraints-error.c b/clang/test/CodeGen/LoongArch/inline-asm-constraints-error.c +deleted file mode 100644 +index b4430cf40..000000000 +--- a/clang/test/CodeGen/LoongArch/inline-asm-constraints-error.c ++++ /dev/null +@@ -1,23 +0,0 @@ +-// RUN: not %clang_cc1 -triple loongarch32 -O2 -emit-llvm %s 2>&1 -o - | FileCheck %s +-// RUN: not %clang_cc1 -triple loongarch64 -O2 -emit-llvm %s 2>&1 -o - | FileCheck %s +- +-void test_l(void) { +-// CHECK: :[[#@LINE+1]]:27: error: value '32768' out of range for constraint 'l' +- asm volatile ("" :: "l"(32768)); +-// CHECK: :[[#@LINE+1]]:27: error: value '-32769' out of range for constraint 'l' +- asm volatile ("" :: "l"(-32769)); +-} +- +-void test_I(void) { +-// CHECK: :[[#@LINE+1]]:27: error: value '2048' out of range for constraint 'I' +- asm volatile ("" :: "I"(2048)); +-// CHECK: :[[#@LINE+1]]:27: error: value '-2049' out of range for constraint 'I' +- asm volatile ("" :: "I"(-2049)); +-} +- +-void test_K(void) { +-// CHECK: :[[#@LINE+1]]:27: error: value '4096' out of range for constraint 'K' +- asm volatile ("" :: "K"(4096)); +-// CHECK: :[[#@LINE+1]]:27: error: value '-1' out of range for constraint 'K' +- asm volatile ("" :: "K"(-1)); +-} +diff --git a/clang/test/CodeGen/LoongArch/inline-asm-constraints.c b/clang/test/CodeGen/LoongArch/inline-asm-constraints.c +deleted file mode 100644 +index b19494284..000000000 +--- a/clang/test/CodeGen/LoongArch/inline-asm-constraints.c ++++ /dev/null +@@ -1,70 +0,0 @@ +-// RUN: %clang_cc1 -triple loongarch32 -O2 -emit-llvm %s -o - | FileCheck %s +-// RUN: %clang_cc1 -triple loongarch64 -O2 -emit-llvm %s -o - | FileCheck %s +- +-/// Test LoongArch specific inline assembly constraints. +- +-float f; +-double d; +-void test_f(void) { +-// CHECK-LABEL: define{{.*}} void @test_f() +-// CHECK: [[FLT_ARG:%[a-zA-Z_0-9]+]] = load float, ptr @f +-// CHECK: call void asm sideeffect "", "f"(float [[FLT_ARG]]) +- asm volatile ("" :: "f"(f)); +-// CHECK: [[FLT_ARG:%[a-zA-Z_0-9]+]] = load double, ptr @d +-// CHECK: call void asm sideeffect "", "f"(double [[FLT_ARG]]) +- asm volatile ("" :: "f"(d)); +-} +- +-void test_k(int *p, int idx) { +-// CHECK-LABEL: define{{.*}} void @test_k(ptr noundef %p, i32 noundef{{.*}} %idx) +-// CHECK: call void asm sideeffect "", "*k"(ptr elementtype(i32) %{{.*}}) +- asm volatile("" :: "k"(*(p+idx))); +-} +- +-void test_l(void) { +-// CHECK-LABEL: define{{.*}} void @test_l() +-// CHECK: call void asm sideeffect "", "l"(i32 32767) +- asm volatile ("" :: "l"(32767)); +-// CHECK: call void asm sideeffect "", "l"(i32 -32768) +- asm volatile ("" :: "l"(-32768)); +-} +- +-void test_m(int *p) { +-// CHECK-LABEL: define{{.*}} void @test_m(ptr noundef %p) +-// CHECK: call void asm sideeffect "", "*m"(ptr nonnull elementtype(i32) %{{.*}}) +- asm volatile("" :: "m"(*(p+4))); +-} +- +-void test_I(void) { +-// CHECK-LABEL: define{{.*}} void @test_I() +-// CHECK: call void asm sideeffect "", "I"(i32 2047) +- asm volatile ("" :: "I"(2047)); +-// CHECK: call void asm sideeffect "", "I"(i32 -2048) +- asm volatile ("" :: "I"(-2048)); +-} +- +-void test_J(void) { +-// CHECK-LABEL: define{{.*}} void @test_J() +-// CHECK: call void asm sideeffect "", "J"(i32 0) +- asm volatile ("" :: "J"(0)); +-} +- +-void test_K(void) { +-// CHECK-LABEL: define{{.*}} void @test_K() +-// CHECK: call void asm sideeffect "", "K"(i32 4095) +- asm volatile ("" :: "K"(4095)); +-// CHECK: call void asm sideeffect "", "K"(i32 0) +- asm volatile ("" :: "K"(0)); +-} +- +-void test_ZB(int *p) { +-// CHECK-LABEL: define{{.*}} void @test_ZB(ptr noundef %p) +-// CHECK: call void asm sideeffect "", "*^ZB"(ptr elementtype(i32) %p) +- asm volatile ("" :: "ZB"(*p)); +-} +- +-void test_ZC(int *p) { +-// CHECK-LABEL: define{{.*}} void @test_ZC(ptr noundef %p) +-// CHECK: call void asm sideeffect "", "*^ZC"(ptr elementtype(i32) %p) +- asm volatile ("" :: "ZC"(*p)); +-} +diff --git a/clang/test/CodeGen/LoongArch/inline-asm-gcc-regs-error.c b/clang/test/CodeGen/LoongArch/inline-asm-gcc-regs-error.c +deleted file mode 100644 +index c5ecf0c92..000000000 +--- a/clang/test/CodeGen/LoongArch/inline-asm-gcc-regs-error.c ++++ /dev/null +@@ -1,20 +0,0 @@ +-// RUN: not %clang_cc1 -triple loongarch32 -emit-llvm %s 2>&1 -o - | FileCheck %s +-// RUN: not %clang_cc1 -triple loongarch64 -emit-llvm %s 2>&1 -o - | FileCheck %s +- +-void test(void) { +-// CHECK: :[[#@LINE+1]]:24: error: unknown register name '$r32' in asm +- register int a0 asm ("$r32"); +-// CHECK: :[[#@LINE+1]]:26: error: unknown register name '$f32' in asm +- register float a1 asm ("$f32"); +-// CHECK: :[[#@LINE+1]]:24: error: unknown register name '$foo' in asm +- register int a2 asm ("$foo"); +- +-/// Names not prefixed with '$' are invalid. +- +-// CHECK: :[[#@LINE+1]]:26: error: unknown register name 'f0' in asm +- register float a5 asm ("f0"); +-// CHECK: :[[#@LINE+1]]:26: error: unknown register name 'fa0' in asm +- register float a6 asm ("fa0"); +-// CHECK: :[[#@LINE+1]]:15: error: unknown register name 'fcc0' in asm +- asm ("" ::: "fcc0"); +-} +diff --git a/clang/test/CodeGen/LoongArch/inline-asm-gcc-regs.c b/clang/test/CodeGen/LoongArch/inline-asm-gcc-regs.c +deleted file mode 100644 +index e1015f6fc..000000000 +--- a/clang/test/CodeGen/LoongArch/inline-asm-gcc-regs.c ++++ /dev/null +@@ -1,126 +0,0 @@ +-// RUN: %clang_cc1 -triple loongarch32 -emit-llvm -O2 %s -o - | FileCheck %s +-// RUN: %clang_cc1 -triple loongarch64 -emit-llvm -O2 %s -o - | FileCheck %s +- +-/// Check GCC register names and alias can be used in register variable definition. +- +-// CHECK-LABEL: @test_r0 +-// CHECK: call void asm sideeffect "", "{$r0}"(i32 undef) +-void test_r0() { +- register int a asm ("$r0"); +- register int b asm ("r0"); +- asm ("" :: "r" (a)); +- asm ("" :: "r" (b)); +-} +- +-// CHECK-LABEL: @test_r12 +-// CHECK: call void asm sideeffect "", "{$r12}"(i32 undef) +-void test_r12() { +- register int a asm ("$r12"); +- register int b asm ("r12"); +- asm ("" :: "r" (a)); +- asm ("" :: "r" (b)); +-} +- +-// CHECK-LABEL: @test_r31 +-// CHECK: call void asm sideeffect "", "{$r31}"(i32 undef) +-void test_r31() { +- register int a asm ("$r31"); +- register int b asm ("r31"); +- asm ("" :: "r" (a)); +- asm ("" :: "r" (b)); +-} +- +-// CHECK-LABEL: @test_zero +-// CHECK: call void asm sideeffect "", "{$r0}"(i32 undef) +-void test_zero() { +- register int a asm ("$zero"); +- register int b asm ("zero"); +- asm ("" :: "r" (a)); +- asm ("" :: "r" (b)); +-} +- +-// CHECK-LABEL: @test_a0 +-// CHECK: call void asm sideeffect "", "{$r4}"(i32 undef) +-void test_a0() { +- register int a asm ("$a0"); +- register int b asm ("a0"); +- asm ("" :: "r" (a)); +- asm ("" :: "r" (b)); +-} +- +-// CHECK-LABEL: @test_t1 +-// CHECK: call void asm sideeffect "", "{$r13}"(i32 undef) +-void test_t1() { +- register int a asm ("$t1"); +- register int b asm ("t1"); +- asm ("" :: "r" (a)); +- asm ("" :: "r" (b)); +-} +- +-// CHECK-LABEL: @test_fp +-// CHECK: call void asm sideeffect "", "{$r22}"(i32 undef) +-void test_fp() { +- register int a asm ("$fp"); +- register int b asm ("fp"); +- asm ("" :: "r" (a)); +- asm ("" :: "r" (b)); +-} +- +-// CHECK-LABEL: @test_s2 +-// CHECK: call void asm sideeffect "", "{$r25}"(i32 undef) +-void test_s2() { +- register int a asm ("$s2"); +- register int b asm ("s2"); +- asm ("" :: "r" (a)); +- asm ("" :: "r" (b)); +-} +- +-// CHECK-LABEL: @test_f0 +-// CHECK: call void asm sideeffect "", "{$f0}"(float undef) +-void test_f0() { +- register float a asm ("$f0"); +- asm ("" :: "f" (a)); +-} +- +-// CHECK-LABEL: @test_f14 +-// CHECK: call void asm sideeffect "", "{$f14}"(float undef) +-void test_f14() { +- register float a asm ("$f14"); +- asm ("" :: "f" (a)); +-} +- +-// CHECK-LABEL: @test_f31 +-// CHECK: call void asm sideeffect "", "{$f31}"(float undef) +-void test_f31() { +- register float a asm ("$f31"); +- asm ("" :: "f" (a)); +-} +- +-// CHECK-LABEL: @test_fa0 +-// CHECK: call void asm sideeffect "", "{$f0}"(float undef) +-void test_fa0() { +- register float a asm ("$fa0"); +- asm ("" :: "f" (a)); +-} +- +-// CHECK-LABEL: @test_ft1 +-// CHECK: call void asm sideeffect "", "{$f9}"(float undef) +-void test_ft1() { +- register float a asm ("$ft1"); +- asm ("" :: "f" (a)); +-} +- +-// CHECK-LABEL: @test_fs2 +-// CHECK: call void asm sideeffect "", "{$f26}"(float undef) +-void test_fs2() { +- register float a asm ("$fs2"); +- asm ("" :: "f" (a)); +-} +- +-// CHECK-LABEL: @test_fcc +-// CHECK: call void asm sideeffect "", "~{$fcc0}"() +-// CHECK: call void asm sideeffect "", "~{$fcc7}"() +-void test_fcc() { +- asm ("" ::: "$fcc0"); +- asm ("" ::: "$fcc7"); +-} +diff --git a/clang/test/CodeGen/LoongArch/inline-asm-operand-modifiers.c b/clang/test/CodeGen/LoongArch/inline-asm-operand-modifiers.c +deleted file mode 100644 +index b36fe7a7b..000000000 +--- a/clang/test/CodeGen/LoongArch/inline-asm-operand-modifiers.c ++++ /dev/null +@@ -1,25 +0,0 @@ +-// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py +-// RUN: %clang_cc1 -triple loongarch32 -O2 -emit-llvm %s -o - | FileCheck %s +-// RUN: %clang_cc1 -triple loongarch64 -O2 -emit-llvm %s -o - | FileCheck %s +- +-/// Test LoongArch specific operand modifiers (i.e. operand codes). +- +-// CHECK-LABEL: @test_z_zero( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 asm sideeffect "add.w $0, $1, ${2:z}", "=r,r,ri"(i32 [[A:%.*]], i32 0) #[[ATTR1:[0-9]+]], !srcloc !2 +-// CHECK-NEXT: ret void +-// +-void test_z_zero(int a) { +- int tmp; +- asm volatile ("add.w %0, %1, %z2" : "=r" (tmp) : "r" (a), "ri" (0)); +-} +- +-// CHECK-LABEL: @test_z_nonzero( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 asm sideeffect "add.w $0, $1, ${2:z}", "=r,r,ri"(i32 [[A:%.*]], i32 1) #[[ATTR1]], !srcloc !3 +-// CHECK-NEXT: ret void +-// +-void test_z_nonzero(int a) { +- int tmp; +- asm volatile ("add.w %0, %1, %z2" : "=r" (tmp) : "r" (a), "ri" (1)); +-} +diff --git a/clang/test/CodeGen/LoongArch/inlineasm-float-double-in-gpr.c b/clang/test/CodeGen/LoongArch/inlineasm-float-double-in-gpr.c +new file mode 100644 +index 000000000..e4a03d782 +--- /dev/null ++++ b/clang/test/CodeGen/LoongArch/inlineasm-float-double-in-gpr.c +@@ -0,0 +1,49 @@ ++// RUN: %clang_cc1 -triple loongarch64 -O2 -emit-llvm %s -o - \ ++// RUN: | FileCheck %s ++ ++float f; ++double d; ++ ++// CHECK-LABEL: @reg_float( ++// CHECK: [[FLT_ARG:%.*]] = load float, ptr @f ++// CHECK: call void asm sideeffect "", "r"(float [[FLT_ARG]]) ++// CHECK: ret void ++void reg_float() { ++ float a = f; ++ asm volatile("" ++ : ++ : "r"(a)); ++} ++ ++// CHECK-LABEL: @r4_float( ++// CHECK: [[FLT_ARG:%.*]] = load float, ptr @f ++// CHECK: call void asm sideeffect "", "{$r4}"(float [[FLT_ARG]]) ++// CHECK: ret void ++void r4_float() { ++ register float a asm("$r4") = f; ++ asm volatile("" ++ : ++ : "r"(a)); ++} ++ ++// CHECK-LABEL: @reg_double( ++// CHECK: [[DBL_ARG:%.*]] = load double, ptr @d ++// CHECK: call void asm sideeffect "", "r"(double [[DBL_ARG]]) ++// CHECK: ret void ++void reg_double() { ++ double a = d; ++ asm volatile("" ++ : ++ : "r"(a)); ++} ++ ++// CHECK-LABEL: @r4_double( ++// CHECK: [[DBL_ARG:%.*]] = load double, ptr @d ++// CHECK: call void asm sideeffect "", "{$r4}"(double [[DBL_ARG]]) ++// CHECK: ret void ++void r4_double() { ++ register double a asm("$r4") = d; ++ asm volatile("" ++ : ++ : "r"(a)); ++} +diff --git a/clang/test/CodeGen/LoongArch/intrinsic-la32-error.c b/clang/test/CodeGen/LoongArch/intrinsic-la32-error.c +deleted file mode 100644 +index 026a2db00..000000000 +--- a/clang/test/CodeGen/LoongArch/intrinsic-la32-error.c ++++ /dev/null +@@ -1,119 +0,0 @@ +-// RUN: %clang_cc1 -triple loongarch32 -emit-llvm -S -verify %s -o /dev/null +-// RUN: not %clang_cc1 -triple loongarch32 -DFEATURE_CHECK -emit-llvm %s -o /dev/null 2>&1 \ +-// RUN: | FileCheck %s +- +-#include +- +-#ifdef FEATURE_CHECK +-void test_feature(long *v_l, unsigned long *v_ul, int *v_i, unsigned ui, char c, short s) { +-// CHECK: error: '__builtin_loongarch_cacop_d' needs target feature 64bit +- __builtin_loongarch_cacop_d(1, v_ul[0], 1024); +- +-// CHECK: error: '__builtin_loongarch_crc_w_b_w' needs target feature 64bit +- v_i[0] = __builtin_loongarch_crc_w_b_w(c, v_i[0]); +-// CHECK: error: '__builtin_loongarch_crc_w_h_w' needs target feature 64bit +- v_i[1] = __builtin_loongarch_crc_w_h_w(c, v_i[0]); +-// CHECK: error: '__builtin_loongarch_crc_w_w_w' needs target feature 64bit +- v_i[2] = __builtin_loongarch_crc_w_w_w(c, v_i[0]); +-// CHECK: error: '__builtin_loongarch_crc_w_d_w' needs target feature 64bit +- v_i[3] = __builtin_loongarch_crc_w_d_w(c, v_i[0]); +- +-// CHECK: error: '__builtin_loongarch_crcc_w_b_w' needs target feature 64bit +- v_i[4] = __builtin_loongarch_crcc_w_b_w(c, v_i[0]); +-// CHECK: error: '__builtin_loongarch_crcc_w_h_w' needs target feature 64bit +- v_i[5] = __builtin_loongarch_crcc_w_h_w(s, v_i[0]); +-// CHECK: error: '__builtin_loongarch_crcc_w_w_w' needs target feature 64bit +- v_i[6] = __builtin_loongarch_crcc_w_w_w(v_i[0], v_i[1]); +-// CHECK: error: '__builtin_loongarch_crcc_w_d_w' needs target feature 64bit +- v_i[7] = __builtin_loongarch_crcc_w_d_w(v_l[0], v_i[0]); +- +-// CHECK: error: '__builtin_loongarch_csrrd_d' needs target feature 64bit +- v_ul[0] = __builtin_loongarch_csrrd_d(1); +-// CHECK: error: '__builtin_loongarch_csrwr_d' needs target feature 64bit +- v_ul[1] = __builtin_loongarch_csrwr_d(v_ul[0], 1); +-// CHECK: error: '__builtin_loongarch_csrxchg_d' needs target feature 64bit +- v_ul[2] = __builtin_loongarch_csrxchg_d(v_ul[0], v_ul[1], 1); +- +- +-// CHECK: error: '__builtin_loongarch_iocsrrd_d' needs target feature 64bit +- v_ul[3] = __builtin_loongarch_iocsrrd_d(ui); +-// CHECK: error: '__builtin_loongarch_iocsrwr_d' needs target feature 64bit +- __builtin_loongarch_iocsrwr_d(v_ul[0], ui); +- +-// CHECK: error: '__builtin_loongarch_asrtle_d' needs target feature 64bit +- __builtin_loongarch_asrtle_d(v_l[0], v_l[1]); +-// CHECK: error: '__builtin_loongarch_asrtgt_d' needs target feature 64bit +- __builtin_loongarch_asrtgt_d(v_l[0], v_l[1]); +- +-// CHECK: error: '__builtin_loongarch_lddir_d' needs target feature 64bit +- v_ul[4] = __builtin_loongarch_lddir_d(v_l[0], 1); +-// CHECK: error: '__builtin_loongarch_ldpte_d' needs target feature 64bit +- __builtin_loongarch_ldpte_d(v_l[0], 1); +-} +-#endif +- +-void cacop_d(unsigned long int a) { +- __builtin_loongarch_cacop_w(-1, a, 1024); // expected-error {{argument value -1 is outside the valid range [0, 31]}} +- __builtin_loongarch_cacop_w(32, a, 1024); // expected-error {{argument value 32 is outside the valid range [0, 31]}} +- __builtin_loongarch_cacop_w(1, a, -4096); // expected-error {{argument value -4096 is outside the valid range [-2048, 2047]}} +- __builtin_loongarch_cacop_w(1, a, 4096); // expected-error {{argument value 4096 is outside the valid range [-2048, 2047]}} +-} +- +-void dbar(int a) { +- __builtin_loongarch_dbar(32768); // expected-error {{argument value 32768 is outside the valid range [0, 32767]}} +- __builtin_loongarch_dbar(-1); // expected-error {{argument value 4294967295 is outside the valid range [0, 32767]}} +- __builtin_loongarch_dbar(a); // expected-error {{argument to '__builtin_loongarch_dbar' must be a constant integer}} +-} +- +-void ibar(int a) { +- __builtin_loongarch_ibar(32769); // expected-error {{argument value 32769 is outside the valid range [0, 32767]}} +- __builtin_loongarch_ibar(-1); // expected-error {{argument value 4294967295 is outside the valid range [0, 32767]}} +- __builtin_loongarch_ibar(a); // expected-error {{argument to '__builtin_loongarch_ibar' must be a constant integer}} +-} +- +-void loongarch_break(int a) { +- __builtin_loongarch_break(32769); // expected-error {{argument value 32769 is outside the valid range [0, 32767]}} +- __builtin_loongarch_break(-1); // expected-error {{argument value 4294967295 is outside the valid range [0, 32767]}} +- __builtin_loongarch_break(a); // expected-error {{argument to '__builtin_loongarch_break' must be a constant integer}} +-} +- +-int movfcsr2gr_out_of_lo_range(int a) { +- int b = __builtin_loongarch_movfcsr2gr(-1); // expected-error {{argument value 4294967295 is outside the valid range [0, 3]}} +- int c = __builtin_loongarch_movfcsr2gr(32); // expected-error {{argument value 32 is outside the valid range [0, 3]}} +- int d = __builtin_loongarch_movfcsr2gr(a); // expected-error {{argument to '__builtin_loongarch_movfcsr2gr' must be a constant integer}} +- return 0; +-} +- +-void movgr2fcsr(int a, int b) { +- __builtin_loongarch_movgr2fcsr(-1, b); // expected-error {{argument value 4294967295 is outside the valid range [0, 3]}} +- __builtin_loongarch_movgr2fcsr(32, b); // expected-error {{argument value 32 is outside the valid range [0, 3]}} +- __builtin_loongarch_movgr2fcsr(a, b); // expected-error {{argument to '__builtin_loongarch_movgr2fcsr' must be a constant integer}} +-} +- +-void syscall(int a) { +- __builtin_loongarch_syscall(32769); // expected-error {{argument value 32769 is outside the valid range [0, 32767]}} +- __builtin_loongarch_syscall(-1); // expected-error {{argument value 4294967295 is outside the valid range [0, 32767]}} +- __builtin_loongarch_syscall(a); // expected-error {{argument to '__builtin_loongarch_syscall' must be a constant integer}} +-} +- +-void csrrd_w(int a) { +- __builtin_loongarch_csrrd_w(16384); // expected-error {{argument value 16384 is outside the valid range [0, 16383]}} +- __builtin_loongarch_csrrd_w(-1); // expected-error {{argument value 4294967295 is outside the valid range [0, 16383]}} +- __builtin_loongarch_csrrd_w(a); // expected-error {{argument to '__builtin_loongarch_csrrd_w' must be a constant integer}} +-} +- +-void csrwr_w(unsigned int a) { +- __builtin_loongarch_csrwr_w(a, 16384); // expected-error {{argument value 16384 is outside the valid range [0, 16383]}} +- __builtin_loongarch_csrwr_w(a, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 16383]}} +- __builtin_loongarch_csrwr_w(a, a); // expected-error {{argument to '__builtin_loongarch_csrwr_w' must be a constant integer}} +-} +- +-void csrxchg_w(unsigned int a, unsigned int b) { +- __builtin_loongarch_csrxchg_w(a, b, 16384); // expected-error {{argument value 16384 is outside the valid range [0, 16383]}} +- __builtin_loongarch_csrxchg_w(a, b, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 16383]}} +- __builtin_loongarch_csrxchg_w(a, b, b); // expected-error {{argument to '__builtin_loongarch_csrxchg_w' must be a constant integer}} +-} +- +-void rdtime_d() { +- __rdtime_d(); // expected-error {{call to undeclared function '__rdtime_d'}} +-} +diff --git a/clang/test/CodeGen/LoongArch/intrinsic-la32.c b/clang/test/CodeGen/LoongArch/intrinsic-la32.c +deleted file mode 100644 +index eb3f8cbe7..000000000 +--- a/clang/test/CodeGen/LoongArch/intrinsic-la32.c ++++ /dev/null +@@ -1,228 +0,0 @@ +-// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py +-// RUN: %clang_cc1 -triple loongarch32 -target-feature +f -O2 -emit-llvm %s -o - \ +-// RUN: | FileCheck %s -check-prefix=LA32 +- +-#include +- +-// LA32-LABEL: @dbar( +-// LA32-NEXT: entry: +-// LA32-NEXT: tail call void @llvm.loongarch.dbar(i32 0) +-// LA32-NEXT: tail call void @llvm.loongarch.dbar(i32 0) +-// LA32-NEXT: ret void +-// +-void dbar() { +- __dbar(0); +- __builtin_loongarch_dbar(0); +-} +- +-// LA32-LABEL: @ibar( +-// LA32-NEXT: entry: +-// LA32-NEXT: tail call void @llvm.loongarch.ibar(i32 0) +-// LA32-NEXT: tail call void @llvm.loongarch.ibar(i32 0) +-// LA32-NEXT: ret void +-// +-void ibar() { +- __ibar(0); +- __builtin_loongarch_ibar(0); +-} +- +-// LA32-LABEL: @loongarch_break( +-// LA32-NEXT: entry: +-// LA32-NEXT: tail call void @llvm.loongarch.break(i32 1) +-// LA32-NEXT: tail call void @llvm.loongarch.break(i32 1) +-// LA32-NEXT: ret void +-// +-void loongarch_break() { +- __break(1); +- __builtin_loongarch_break(1); +-} +- +-// LA32-LABEL: @syscall( +-// LA32-NEXT: entry: +-// LA32-NEXT: tail call void @llvm.loongarch.syscall(i32 1) +-// LA32-NEXT: tail call void @llvm.loongarch.syscall(i32 1) +-// LA32-NEXT: ret void +-// +-void syscall() { +- __syscall(1); +- __builtin_loongarch_syscall(1); +-} +- +-// LA32-LABEL: @csrrd_w( +-// LA32-NEXT: entry: +-// LA32-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.csrrd.w(i32 1) +-// LA32-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.csrrd.w(i32 1) +-// LA32-NEXT: ret i32 0 +-// +-unsigned int csrrd_w() { +- unsigned int a = __csrrd_w(1); +- unsigned int b = __builtin_loongarch_csrrd_w(1); +- return 0; +-} +- +-// LA32-LABEL: @csrwr_w( +-// LA32-NEXT: entry: +-// LA32-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.csrwr.w(i32 [[A:%.*]], i32 1) +-// LA32-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.csrwr.w(i32 [[A]], i32 1) +-// LA32-NEXT: ret i32 0 +-// +-unsigned int csrwr_w(unsigned int a) { +- unsigned int b = __csrwr_w(a, 1); +- unsigned int c = __builtin_loongarch_csrwr_w(a, 1); +- return 0; +-} +- +-// LA32-LABEL: @csrxchg_w( +-// LA32-NEXT: entry: +-// LA32-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.csrxchg.w(i32 [[A:%.*]], i32 [[B:%.*]], i32 1) +-// LA32-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.csrxchg.w(i32 [[A]], i32 [[B]], i32 1) +-// LA32-NEXT: ret i32 0 +-// +-unsigned int csrxchg_w(unsigned int a, unsigned int b) { +- unsigned int c = __csrxchg_w(a, b, 1); +- unsigned int d = __builtin_loongarch_csrxchg_w(a, b, 1); +- return 0; +-} +- +-// LA32-LABEL: @iocsrrd_b( +-// LA32-NEXT: entry: +-// LA32-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.iocsrrd.b(i32 [[A:%.*]]) +-// LA32-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.iocsrrd.b(i32 [[A]]) +-// LA32-NEXT: ret i8 0 +-// +-unsigned char iocsrrd_b(unsigned int a) { +- unsigned char b = __iocsrrd_b(a); +- unsigned char c = __builtin_loongarch_iocsrrd_b(a); +- return 0; +-} +- +-// LA32-LABEL: @iocsrrd_h( +-// LA32-NEXT: entry: +-// LA32-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.iocsrrd.h(i32 [[A:%.*]]) +-// LA32-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.iocsrrd.h(i32 [[A]]) +-// LA32-NEXT: ret i16 0 +-// +-unsigned short iocsrrd_h(unsigned int a) { +- unsigned short b = __iocsrrd_h(a); +- unsigned short c = __builtin_loongarch_iocsrrd_h(a); +- return 0; +-} +- +-// LA32-LABEL: @iocsrrd_w( +-// LA32-NEXT: entry: +-// LA32-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.iocsrrd.w(i32 [[A:%.*]]) +-// LA32-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.iocsrrd.w(i32 [[A]]) +-// LA32-NEXT: ret i32 0 +-// +-unsigned int iocsrrd_w(unsigned int a) { +- unsigned int b = __iocsrrd_w(a); +- unsigned int c = __builtin_loongarch_iocsrrd_w(a); +- return 0; +-} +- +-// LA32-LABEL: @iocsrwr_b( +-// LA32-NEXT: entry: +-// LA32-NEXT: [[CONV_I:%.*]] = zext i8 [[A:%.*]] to i32 +-// LA32-NEXT: tail call void @llvm.loongarch.iocsrwr.b(i32 [[CONV_I]], i32 [[B:%.*]]) +-// LA32-NEXT: tail call void @llvm.loongarch.iocsrwr.b(i32 [[CONV_I]], i32 [[B]]) +-// LA32-NEXT: ret void +-// +-void iocsrwr_b(unsigned char a, unsigned int b) { +- __iocsrwr_b(a, b); +- __builtin_loongarch_iocsrwr_b(a, b); +-} +- +-// LA32-LABEL: @iocsrwr_h( +-// LA32-NEXT: entry: +-// LA32-NEXT: [[CONV_I:%.*]] = zext i16 [[A:%.*]] to i32 +-// LA32-NEXT: tail call void @llvm.loongarch.iocsrwr.h(i32 [[CONV_I]], i32 [[B:%.*]]) +-// LA32-NEXT: tail call void @llvm.loongarch.iocsrwr.h(i32 [[CONV_I]], i32 [[B]]) +-// LA32-NEXT: ret void +-// +-void iocsrwr_h(unsigned short a, unsigned int b) { +- __iocsrwr_h(a, b); +- __builtin_loongarch_iocsrwr_h(a, b); +-} +- +-// LA32-LABEL: @iocsrwr_w( +-// LA32-NEXT: entry: +-// LA32-NEXT: tail call void @llvm.loongarch.iocsrwr.w(i32 [[A:%.*]], i32 [[B:%.*]]) +-// LA32-NEXT: tail call void @llvm.loongarch.iocsrwr.w(i32 [[A]], i32 [[B]]) +-// LA32-NEXT: ret void +-// +-void iocsrwr_w(unsigned int a, unsigned int b) { +- __iocsrwr_w(a, b); +- __builtin_loongarch_iocsrwr_w(a, b); +-} +- +-// LA32-LABEL: @cpucfg( +-// LA32-NEXT: entry: +-// LA32-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.cpucfg(i32 [[A:%.*]]) +-// LA32-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.cpucfg(i32 [[A]]) +-// LA32-NEXT: ret i32 0 +-// +-unsigned int cpucfg(unsigned int a) { +- unsigned int b = __cpucfg(a); +- unsigned int c = __builtin_loongarch_cpucfg(a); +- return 0; +-} +- +-// LA32-LABEL: @rdtime( +-// LA32-NEXT: entry: +-// LA32-NEXT: [[TMP0:%.*]] = tail call { i32, i32 } asm sideeffect "rdtimeh.w $0, $1\0A\09", "=&r,=&r"() #[[ATTR1:[0-9]+]], !srcloc [[META2:![0-9]+]] +-// LA32-NEXT: [[TMP1:%.*]] = tail call { i32, i32 } asm sideeffect "rdtimel.w $0, $1\0A\09", "=&r,=&r"() #[[ATTR1]], !srcloc [[META3:![0-9]+]] +-// LA32-NEXT: ret void +-// +-void rdtime() { +- __rdtimeh_w(); +- __rdtimel_w(); +-} +- +-// LA32-LABEL: @loongarch_movfcsr2gr( +-// LA32-NEXT: entry: +-// LA32-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.movfcsr2gr(i32 1) +-// LA32-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.movfcsr2gr(i32 1) +-// LA32-NEXT: ret i32 0 +-// +-int loongarch_movfcsr2gr() { +- int a = __movfcsr2gr(1); +- int b = __builtin_loongarch_movfcsr2gr(1); +- return 0; +-} +- +-// LA32-LABEL: @loongarch_movgr2fcsr( +-// LA32-NEXT: entry: +-// LA32-NEXT: tail call void @llvm.loongarch.movgr2fcsr(i32 1, i32 [[A:%.*]]) +-// LA32-NEXT: tail call void @llvm.loongarch.movgr2fcsr(i32 1, i32 [[A]]) +-// LA32-NEXT: ret void +-// +-void loongarch_movgr2fcsr(int a) { +- __movgr2fcsr(1, a); +- __builtin_loongarch_movgr2fcsr(1, a); +-} +- +-// LA32-LABEL: @cacop_w( +-// LA32-NEXT: entry: +-// LA32-NEXT: tail call void @llvm.loongarch.cacop.w(i32 1, i32 [[A:%.*]], i32 1024) +-// LA32-NEXT: tail call void @llvm.loongarch.cacop.w(i32 1, i32 [[A]], i32 1024) +-// LA32-NEXT: ret void +-// +-void cacop_w(unsigned long int a) { +- __cacop_w(1, a, 1024); +- __builtin_loongarch_cacop_w(1, a, 1024); +-} +- +-// LA32-LABEL: @iocsrrd_h_result( +-// LA32-NEXT: entry: +-// LA32-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.iocsrrd.h(i32 [[A:%.*]]) +-// LA32-NEXT: [[CONV_I:%.*]] = trunc i32 [[TMP0]] to i16 +-// LA32-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.iocsrrd.h(i32 [[A]]) +-// LA32-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16 +-// LA32-NEXT: [[CONV3:%.*]] = add i16 [[TMP2]], [[CONV_I]] +-// LA32-NEXT: ret i16 [[CONV3]] +-// +-unsigned short iocsrrd_h_result(unsigned int a) { +- unsigned short b = __iocsrrd_h(a); +- unsigned short c = __builtin_loongarch_iocsrrd_h(a); +- return b+c; +-} +diff --git a/clang/test/CodeGen/LoongArch/intrinsic-la64-error.c b/clang/test/CodeGen/LoongArch/intrinsic-la64-error.c +deleted file mode 100644 +index a3242dfd4..000000000 +--- a/clang/test/CodeGen/LoongArch/intrinsic-la64-error.c ++++ /dev/null +@@ -1,67 +0,0 @@ +-// RUN: %clang_cc1 -triple loongarch64 -emit-llvm -S -verify %s -o /dev/null +-// RUN: not %clang_cc1 -triple loongarch64 -DFEATURE_CHECK -emit-llvm %s -o /dev/null 2>&1 \ +-// RUN: | FileCheck %s +- +-#include +- +-#ifdef FEATURE_CHECK +-void test_feature(unsigned long *v_ul, int *v_i, float a, double b) { +-// CHECK: error: '__builtin_loongarch_cacop_w' needs target feature 32bit +- __builtin_loongarch_cacop_w(1, v_ul[0], 1024); +-// CHECK: error: '__builtin_loongarch_movfcsr2gr' needs target feature f +- v_i[0] = __builtin_loongarch_movfcsr2gr(1); +-// CHECK: error: '__builtin_loongarch_movgr2fcsr' needs target feature f +- __builtin_loongarch_movgr2fcsr(1, v_i[1]); +-// CHECK: error: '__builtin_loongarch_frecipe_s' needs target feature f,frecipe +- float f1 = __builtin_loongarch_frecipe_s(a); +-// CHECK: error: '__builtin_loongarch_frsqrte_s' needs target feature f,frecipe +- float f2 = __builtin_loongarch_frsqrte_s(a); +-// CHECK: error: '__builtin_loongarch_frecipe_d' needs target feature d,frecipe +- double d1 = __builtin_loongarch_frecipe_d(b); +-// CHECK: error: '__builtin_loongarch_frsqrte_d' needs target feature d,frecipe +- double d2 = __builtin_loongarch_frsqrte_d(b); +-} +-#endif +- +-void csrrd_d(int a) { +- __builtin_loongarch_csrrd_d(16384); // expected-error {{argument value 16384 is outside the valid range [0, 16383]}} +- __builtin_loongarch_csrrd_d(-1); // expected-error {{argument value 4294967295 is outside the valid range [0, 16383]}} +- __builtin_loongarch_csrrd_d(a); // expected-error {{argument to '__builtin_loongarch_csrrd_d' must be a constant integer}} +-} +- +-void csrwr_d(unsigned long int a) { +- __builtin_loongarch_csrwr_d(a, 16384); // expected-error {{argument value 16384 is outside the valid range [0, 16383]}} +- __builtin_loongarch_csrwr_d(a, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 16383]}} +- __builtin_loongarch_csrwr_d(a, a); // expected-error {{argument to '__builtin_loongarch_csrwr_d' must be a constant integer}} +-} +- +-void csrxchg_d(unsigned long int a, unsigned long int b) { +- __builtin_loongarch_csrxchg_d(a, b, 16384); // expected-error {{argument value 16384 is outside the valid range [0, 16383]}} +- __builtin_loongarch_csrxchg_d(a, b, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 16383]}} +- __builtin_loongarch_csrxchg_d(a, b, b); // expected-error {{argument to '__builtin_loongarch_csrxchg_d' must be a constant integer}} +-} +- +-void lddir_d(long int a, int b) { +- __builtin_loongarch_lddir_d(a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} +- __builtin_loongarch_lddir_d(a, -1); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 31]}} +- __builtin_loongarch_lddir_d(a, b); // expected-error {{argument to '__builtin_loongarch_lddir_d' must be a constant integer}} +-} +- +-void ldpte_d(long int a, int b) { +- __builtin_loongarch_ldpte_d(a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} +- __builtin_loongarch_ldpte_d(a, -1); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 31]}} +- __builtin_loongarch_ldpte_d(a, b); // expected-error {{argument to '__builtin_loongarch_ldpte_d' must be a constant integer}} +-} +- +-int movfcsr2gr_out_of_lo_range(int a) { +- int b = __builtin_loongarch_movfcsr2gr(-1); // expected-error {{argument value 4294967295 is outside the valid range [0, 3]}} +- int c = __builtin_loongarch_movfcsr2gr(32); // expected-error {{argument value 32 is outside the valid range [0, 3]}} +- int d = __builtin_loongarch_movfcsr2gr(a); // expected-error {{argument to '__builtin_loongarch_movfcsr2gr' must be a constant integer}} +- return 0; +-} +- +-void movgr2fcsr(int a, int b) { +- __builtin_loongarch_movgr2fcsr(-1, b); // expected-error {{argument value 4294967295 is outside the valid range [0, 3]}} +- __builtin_loongarch_movgr2fcsr(32, b); // expected-error {{argument value 32 is outside the valid range [0, 3]}} +- __builtin_loongarch_movgr2fcsr(a, b); // expected-error {{argument to '__builtin_loongarch_movgr2fcsr' must be a constant integer}} +-} +diff --git a/clang/test/CodeGen/LoongArch/intrinsic-la64.c b/clang/test/CodeGen/LoongArch/intrinsic-la64.c +deleted file mode 100644 +index 50ec358f5..000000000 +--- a/clang/test/CodeGen/LoongArch/intrinsic-la64.c ++++ /dev/null +@@ -1,444 +0,0 @@ +-// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py +-// RUN: %clang_cc1 -triple loongarch64 -target-feature +f -O2 -emit-llvm %s -o - | FileCheck %s +- +-#include +- +-// CHECK-LABEL: @dbar( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: tail call void @llvm.loongarch.dbar(i32 0) +-// CHECK-NEXT: tail call void @llvm.loongarch.dbar(i32 0) +-// CHECK-NEXT: ret void +-// +-void dbar() { +- __dbar(0); +- __builtin_loongarch_dbar(0); +-} +- +-// CHECK-LABEL: @ibar( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: tail call void @llvm.loongarch.ibar(i32 0) +-// CHECK-NEXT: tail call void @llvm.loongarch.ibar(i32 0) +-// CHECK-NEXT: ret void +-// +-void ibar() { +- __ibar(0); +- __builtin_loongarch_ibar(0); +-} +- +-// CHECK-LABEL: @loongarch_break( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: tail call void @llvm.loongarch.break(i32 1) +-// CHECK-NEXT: tail call void @llvm.loongarch.break(i32 1) +-// CHECK-NEXT: ret void +-// +-void loongarch_break() { +- __break(1); +- __builtin_loongarch_break(1); +-} +- +-// CHECK-LABEL: @syscall( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: tail call void @llvm.loongarch.syscall(i32 1) +-// CHECK-NEXT: tail call void @llvm.loongarch.syscall(i32 1) +-// CHECK-NEXT: ret void +-// +-void syscall() { +- __syscall(1); +- __builtin_loongarch_syscall(1); +-} +- +-// CHECK-LABEL: @csrrd_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.csrrd.w(i32 1) +-// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.csrrd.w(i32 1) +-// CHECK-NEXT: ret i32 0 +-// +-unsigned int csrrd_w() { +- unsigned int a = __csrrd_w(1); +- unsigned int b = __builtin_loongarch_csrrd_w(1); +- return 0; +-} +- +-// CHECK-LABEL: @csrwr_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.csrwr.w(i32 [[A:%.*]], i32 1) +-// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.csrwr.w(i32 [[A]], i32 1) +-// CHECK-NEXT: ret i32 0 +-// +-unsigned int csrwr_w(unsigned int a) { +- unsigned int b = __csrwr_w(a, 1); +- unsigned int c = __builtin_loongarch_csrwr_w(a, 1); +- return 0; +-} +- +-// CHECK-LABEL: @csrxchg_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.csrxchg.w(i32 [[A:%.*]], i32 [[B:%.*]], i32 1) +-// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.csrxchg.w(i32 [[A]], i32 [[B]], i32 1) +-// CHECK-NEXT: ret i32 0 +-// +-unsigned int csrxchg_w(unsigned int a, unsigned int b) { +- unsigned int c = __csrxchg_w(a, b, 1); +- unsigned int d = __builtin_loongarch_csrxchg_w(a, b, 1); +- return 0; +-} +- +-// CHECK-LABEL: @crc_w_b_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = shl i32 [[A:%.*]], 24 +-// CHECK-NEXT: [[CONV_I:%.*]] = ashr exact i32 [[TMP0]], 24 +-// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.crc.w.b.w(i32 [[CONV_I]], i32 [[B:%.*]]) +-// CHECK-NEXT: [[TMP2:%.*]] = tail call i32 @llvm.loongarch.crc.w.b.w(i32 [[A]], i32 [[B]]) +-// CHECK-NEXT: ret i32 0 +-// +-int crc_w_b_w(int a, int b) { +- int c = __crc_w_b_w(a, b); +- int d = __builtin_loongarch_crc_w_b_w(a, b); +- return 0; +-} +- +-// CHECK-LABEL: @crc_w_h_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = shl i32 [[A:%.*]], 16 +-// CHECK-NEXT: [[CONV_I:%.*]] = ashr exact i32 [[TMP0]], 16 +-// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.crc.w.h.w(i32 [[CONV_I]], i32 [[B:%.*]]) +-// CHECK-NEXT: [[TMP2:%.*]] = tail call i32 @llvm.loongarch.crc.w.h.w(i32 [[A]], i32 [[B]]) +-// CHECK-NEXT: ret i32 0 +-// +-int crc_w_h_w(int a, int b) { +- int c = __crc_w_h_w(a, b); +- int d = __builtin_loongarch_crc_w_h_w(a, b); +- return 0; +-} +- +-// CHECK-LABEL: @crc_w_w_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.crc.w.w.w(i32 [[A:%.*]], i32 [[B:%.*]]) +-// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.crc.w.w.w(i32 [[A]], i32 [[B]]) +-// CHECK-NEXT: ret i32 0 +-// +-int crc_w_w_w(int a, int b) { +- int c = __crc_w_w_w(a, b); +- int d = __builtin_loongarch_crc_w_w_w(a, b); +- return 0; +-} +- +-// CHECK-LABEL: @cacop_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: tail call void @llvm.loongarch.cacop.d(i64 1, i64 [[A:%.*]], i64 1024) +-// CHECK-NEXT: tail call void @llvm.loongarch.cacop.d(i64 1, i64 [[A]], i64 1024) +-// CHECK-NEXT: ret void +-// +-void cacop_d(unsigned long int a) { +- __cacop_d(1, a, 1024); +- __builtin_loongarch_cacop_d(1, a, 1024); +-} +- +-// CHECK-LABEL: @crc_w_d_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.crc.w.d.w(i64 [[A:%.*]], i32 [[B:%.*]]) +-// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.crc.w.d.w(i64 [[A]], i32 [[B]]) +-// CHECK-NEXT: ret i32 0 +-// +-int crc_w_d_w(long int a, int b) { +- int c = __crc_w_d_w(a, b); +- int d = __builtin_loongarch_crc_w_d_w(a, b); +- return 0; +-} +- +-// CHECK-LABEL: @crcc_w_b_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = shl i32 [[A:%.*]], 24 +-// CHECK-NEXT: [[CONV_I:%.*]] = ashr exact i32 [[TMP0]], 24 +-// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.crcc.w.b.w(i32 [[CONV_I]], i32 [[B:%.*]]) +-// CHECK-NEXT: [[TMP2:%.*]] = tail call i32 @llvm.loongarch.crcc.w.b.w(i32 [[A]], i32 [[B]]) +-// CHECK-NEXT: ret i32 0 +-// +-int crcc_w_b_w(int a, int b) { +- int c = __crcc_w_b_w(a, b); +- int d = __builtin_loongarch_crcc_w_b_w(a, b); +- return 0; +-} +- +-// CHECK-LABEL: @crcc_w_h_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = shl i32 [[A:%.*]], 16 +-// CHECK-NEXT: [[CONV_I:%.*]] = ashr exact i32 [[TMP0]], 16 +-// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.crcc.w.h.w(i32 [[CONV_I]], i32 [[B:%.*]]) +-// CHECK-NEXT: [[TMP2:%.*]] = tail call i32 @llvm.loongarch.crcc.w.h.w(i32 [[A]], i32 [[B]]) +-// CHECK-NEXT: ret i32 0 +-// +-int crcc_w_h_w(int a, int b) { +- int c = __crcc_w_h_w(a, b); +- int d = __builtin_loongarch_crcc_w_h_w(a, b); +- return 0; +-} +- +-// CHECK-LABEL: @crcc_w_w_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.crcc.w.w.w(i32 [[A:%.*]], i32 [[B:%.*]]) +-// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.crcc.w.w.w(i32 [[A]], i32 [[B]]) +-// CHECK-NEXT: ret i32 0 +-// +-int crcc_w_w_w(int a, int b) { +- int c = __crcc_w_w_w(a, b); +- int d = __builtin_loongarch_crcc_w_w_w(a, b); +- return 0; +-} +- +-// CHECK-LABEL: @crcc_w_d_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.crcc.w.d.w(i64 [[A:%.*]], i32 [[B:%.*]]) +-// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.crcc.w.d.w(i64 [[A]], i32 [[B]]) +-// CHECK-NEXT: ret i32 0 +-// +-int crcc_w_d_w(long int a, int b) { +- int c = __crcc_w_d_w(a, b); +- int d = __builtin_loongarch_crcc_w_d_w(a, b); +- return 0; +-} +- +-// CHECK-LABEL: @csrrd_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.loongarch.csrrd.d(i32 1) +-// CHECK-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.loongarch.csrrd.d(i32 1) +-// CHECK-NEXT: ret i64 0 +-// +-unsigned long int csrrd_d() { +- unsigned long int a = __csrrd_d(1); +- unsigned long int b = __builtin_loongarch_csrrd_d(1); +- return 0; +-} +- +-// CHECK-LABEL: @csrwr_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.loongarch.csrwr.d(i64 [[A:%.*]], i32 1) +-// CHECK-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.loongarch.csrwr.d(i64 [[A]], i32 1) +-// CHECK-NEXT: ret i64 0 +-// +-unsigned long int csrwr_d(unsigned long int a) { +- unsigned long int b = __csrwr_d(a, 1); +- unsigned long int c = __builtin_loongarch_csrwr_d(a, 1); +- return 0; +-} +- +-// CHECK-LABEL: @csrxchg_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.loongarch.csrxchg.d(i64 [[A:%.*]], i64 [[B:%.*]], i32 1) +-// CHECK-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.loongarch.csrxchg.d(i64 [[A]], i64 [[B]], i32 1) +-// CHECK-NEXT: ret i64 0 +-// +-unsigned long int csrxchg_d(unsigned long int a, unsigned long int b) { +- unsigned long int c = __csrxchg_d(a, b, 1); +- unsigned long int d = __builtin_loongarch_csrxchg_d(a, b, 1); +- return 0; +-} +- +-// CHECK-LABEL: @iocsrrd_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.iocsrrd.b(i32 [[A:%.*]]) +-// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.iocsrrd.b(i32 [[A]]) +-// CHECK-NEXT: ret i8 0 +-// +-unsigned char iocsrrd_b(unsigned int a) { +- unsigned char b = __iocsrrd_b(a); +- unsigned char c = __builtin_loongarch_iocsrrd_b(a); +- return 0; +-} +- +-// CHECK-LABEL: @iocsrrd_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.iocsrrd.h(i32 [[A:%.*]]) +-// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.iocsrrd.h(i32 [[A]]) +-// CHECK-NEXT: ret i16 0 +-// +-unsigned short iocsrrd_h(unsigned int a) { +- unsigned short b = __iocsrrd_h(a); +- unsigned short c = __builtin_loongarch_iocsrrd_h(a); +- return 0; +-} +- +-// CHECK-LABEL: @iocsrrd_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.iocsrrd.w(i32 [[A:%.*]]) +-// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.iocsrrd.w(i32 [[A]]) +-// CHECK-NEXT: ret i32 0 +-// +-unsigned int iocsrrd_w(unsigned int a) { +- unsigned int b = __iocsrrd_w(a); +- unsigned int c = __builtin_loongarch_iocsrrd_w(a); +- return 0; +-} +- +-// CHECK-LABEL: @iocsrwr_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[CONV_I:%.*]] = zext i8 [[A:%.*]] to i32 +-// CHECK-NEXT: tail call void @llvm.loongarch.iocsrwr.b(i32 [[CONV_I]], i32 [[B:%.*]]) +-// CHECK-NEXT: tail call void @llvm.loongarch.iocsrwr.b(i32 [[CONV_I]], i32 [[B]]) +-// CHECK-NEXT: ret void +-// +-void iocsrwr_b(unsigned char a, unsigned int b) { +- __iocsrwr_b(a, b); +- __builtin_loongarch_iocsrwr_b(a, b); +-} +- +-// CHECK-LABEL: @iocsrwr_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[CONV_I:%.*]] = zext i16 [[A:%.*]] to i32 +-// CHECK-NEXT: tail call void @llvm.loongarch.iocsrwr.h(i32 [[CONV_I]], i32 [[B:%.*]]) +-// CHECK-NEXT: tail call void @llvm.loongarch.iocsrwr.h(i32 [[CONV_I]], i32 [[B]]) +-// CHECK-NEXT: ret void +-// +-void iocsrwr_h(unsigned short a, unsigned int b) { +- __iocsrwr_h(a, b); +- __builtin_loongarch_iocsrwr_h(a, b); +-} +- +-// CHECK-LABEL: @iocsrwr_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: tail call void @llvm.loongarch.iocsrwr.w(i32 [[A:%.*]], i32 [[B:%.*]]) +-// CHECK-NEXT: tail call void @llvm.loongarch.iocsrwr.w(i32 [[A]], i32 [[B]]) +-// CHECK-NEXT: ret void +-// +-void iocsrwr_w(unsigned int a, unsigned int b) { +- __iocsrwr_w(a, b); +- __builtin_loongarch_iocsrwr_w(a, b); +-} +- +-// CHECK-LABEL: @iocsrrd_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.loongarch.iocsrrd.d(i32 [[A:%.*]]) +-// CHECK-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.loongarch.iocsrrd.d(i32 [[A]]) +-// CHECK-NEXT: ret i64 0 +-// +-unsigned long int iocsrrd_d(unsigned int a) { +- unsigned long int b = __iocsrrd_d(a); +- unsigned long int c = __builtin_loongarch_iocsrrd_d(a); +- return 0; +-} +- +-// CHECK-LABEL: @iocsrwr_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: tail call void @llvm.loongarch.iocsrwr.d(i64 [[A:%.*]], i32 [[B:%.*]]) +-// CHECK-NEXT: tail call void @llvm.loongarch.iocsrwr.d(i64 [[A]], i32 [[B]]) +-// CHECK-NEXT: ret void +-// +-void iocsrwr_d(unsigned long int a, unsigned int b) { +- __iocsrwr_d(a, b); +- __builtin_loongarch_iocsrwr_d(a, b); +-} +- +-// CHECK-LABEL: @asrtle_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: tail call void @llvm.loongarch.asrtle.d(i64 [[A:%.*]], i64 [[B:%.*]]) +-// CHECK-NEXT: tail call void @llvm.loongarch.asrtle.d(i64 [[A]], i64 [[B]]) +-// CHECK-NEXT: ret void +-// +-void asrtle_d(long int a, long int b) { +- __asrtle_d(a, b); +- __builtin_loongarch_asrtle_d(a, b); +-} +- +-// CHECK-LABEL: @asrtgt_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: tail call void @llvm.loongarch.asrtgt.d(i64 [[A:%.*]], i64 [[B:%.*]]) +-// CHECK-NEXT: tail call void @llvm.loongarch.asrtgt.d(i64 [[A]], i64 [[B]]) +-// CHECK-NEXT: ret void +-// +-void asrtgt_d(long int a, long int b) { +- __asrtgt_d(a, b); +- __builtin_loongarch_asrtgt_d(a, b); +-} +- +-// CHECK-LABEL: @lddir_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.loongarch.lddir.d(i64 [[A:%.*]], i64 1) +-// CHECK-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.loongarch.lddir.d(i64 [[A]], i64 1) +-// CHECK-NEXT: ret i64 0 +-// +-long int lddir_d(long int a) { +- long int b = __lddir_d(a, 1); +- long int c = __builtin_loongarch_lddir_d(a, 1); +- return 0; +-} +- +-// CHECK-LABEL: @ldpte_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: tail call void @llvm.loongarch.ldpte.d(i64 [[A:%.*]], i64 1) +-// CHECK-NEXT: tail call void @llvm.loongarch.ldpte.d(i64 [[A]], i64 1) +-// CHECK-NEXT: ret void +-// +-void ldpte_d(long int a) { +- __ldpte_d(a, 1); +- __builtin_loongarch_ldpte_d(a, 1); +-} +- +-// CHECK-LABEL: @cpucfg( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.cpucfg(i32 [[A:%.*]]) +-// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.cpucfg(i32 [[A]]) +-// CHECK-NEXT: ret i32 0 +-// +-unsigned int cpucfg(unsigned int a) { +- unsigned int b = __cpucfg(a); +- unsigned int c = __builtin_loongarch_cpucfg(a); +- return 0; +-} +- +-// CHECK-LABEL: @rdtime_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call { i64, i64 } asm sideeffect "rdtime.d $0, $1\0A\09", "=&r,=&r"() #[[ATTR1:[0-9]+]], !srcloc [[META2:![0-9]+]] +-// CHECK-NEXT: ret void +-// +-void rdtime_d() { +- __rdtime_d(); +-} +- +-// CHECK-LABEL: @rdtime( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call { i32, i32 } asm sideeffect "rdtimeh.w $0, $1\0A\09", "=&r,=&r"() #[[ATTR1]], !srcloc [[META3:![0-9]+]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call { i32, i32 } asm sideeffect "rdtimel.w $0, $1\0A\09", "=&r,=&r"() #[[ATTR1]], !srcloc [[META4:![0-9]+]] +-// CHECK-NEXT: ret void +-// +-void rdtime() { +- __rdtimeh_w(); +- __rdtimel_w(); +-} +- +-// CHECK-LABEL: @loongarch_movfcsr2gr( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.movfcsr2gr(i32 1) +-// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.movfcsr2gr(i32 1) +-// CHECK-NEXT: ret i32 0 +-// +-int loongarch_movfcsr2gr() { +- int a = __movfcsr2gr(1); +- int b = __builtin_loongarch_movfcsr2gr(1); +- return 0; +-} +- +-// CHECK-LABEL: @loongarch_movgr2fcsr( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: tail call void @llvm.loongarch.movgr2fcsr(i32 1, i32 [[A:%.*]]) +-// CHECK-NEXT: tail call void @llvm.loongarch.movgr2fcsr(i32 1, i32 [[A]]) +-// CHECK-NEXT: ret void +-// +-void loongarch_movgr2fcsr(int a) { +- __movgr2fcsr(1, a); +- __builtin_loongarch_movgr2fcsr(1, a); +-} +- +-// CHECK-LABEL: @iocsrrd_h_result( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.iocsrrd.h(i32 [[A:%.*]]) +-// CHECK-NEXT: [[CONV_I:%.*]] = trunc i32 [[TMP0]] to i16 +-// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.iocsrrd.h(i32 [[A]]) +-// CHECK-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16 +-// CHECK-NEXT: [[CONV3:%.*]] = add i16 [[TMP2]], [[CONV_I]] +-// CHECK-NEXT: ret i16 [[CONV3]] +-// +-unsigned short iocsrrd_h_result(unsigned int a) { +- unsigned short b = __iocsrrd_h(a); +- unsigned short c = __builtin_loongarch_iocsrrd_h(a); +- return b+c; +-} +diff --git a/clang/test/CodeGen/LoongArch/lasx/builtin-alias-error.c b/clang/test/CodeGen/LoongArch/lasx/builtin-alias-error.c +deleted file mode 100644 +index 2a3862bbe..000000000 +--- a/clang/test/CodeGen/LoongArch/lasx/builtin-alias-error.c ++++ /dev/null +@@ -1,1373 +0,0 @@ +-// RUN: %clang_cc1 -triple loongarch64 -target-feature +lasx -verify %s +- +-#include +- +-v32i8 xvslli_b(v32i8 _1, int var) { +- v32i8 res = __lasx_xvslli_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} +- res |= __lasx_xvslli_b(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} +- res |= __lasx_xvslli_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvslli_b' must be a constant integer}} +- return res; +-} +- +-v16i16 xvslli_h(v16i16 _1, int var) { +- v16i16 res = __lasx_xvslli_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} +- res |= __lasx_xvslli_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} +- res |= __lasx_xvslli_h(_1, var); // expected-error {{argument to '__builtin_lasx_xvslli_h' must be a constant integer}} +- return res; +-} +- +-v8i32 xvslli_w(v8i32 _1, int var) { +- v8i32 res = __lasx_xvslli_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} +- res |= __lasx_xvslli_w(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} +- res |= __lasx_xvslli_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvslli_w' must be a constant integer}} +- return res; +-} +- +-v4i64 xvslli_d(v4i64 _1, int var) { +- v4i64 res = __lasx_xvslli_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} +- res |= __lasx_xvslli_d(_1, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} +- res |= __lasx_xvslli_d(_1, var); // expected-error {{argument to '__builtin_lasx_xvslli_d' must be a constant integer}} +- return res; +-} +- +-v32i8 xvsrai_b(v32i8 _1, int var) { +- v32i8 res = __lasx_xvsrai_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} +- res |= __lasx_xvsrai_b(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} +- res |= __lasx_xvsrai_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvsrai_b' must be a constant integer}} +- return res; +-} +- +-v16i16 xvsrai_h(v16i16 _1, int var) { +- v16i16 res = __lasx_xvsrai_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} +- res |= __lasx_xvsrai_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} +- res |= __lasx_xvsrai_h(_1, var); // expected-error {{argument to '__builtin_lasx_xvsrai_h' must be a constant integer}} +- return res; +-} +- +-v8i32 xvsrai_w(v8i32 _1, int var) { +- v8i32 res = __lasx_xvsrai_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} +- res |= __lasx_xvsrai_w(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} +- res |= __lasx_xvsrai_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvsrai_w' must be a constant integer}} +- return res; +-} +- +-v4i64 xvsrai_d(v4i64 _1, int var) { +- v4i64 res = __lasx_xvsrai_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} +- res |= __lasx_xvsrai_d(_1, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} +- res |= __lasx_xvsrai_d(_1, var); // expected-error {{argument to '__builtin_lasx_xvsrai_d' must be a constant integer}} +- return res; +-} +- +-v32i8 xvsrari_b(v32i8 _1, int var) { +- v32i8 res = __lasx_xvsrari_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} +- res |= __lasx_xvsrari_b(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} +- res |= __lasx_xvsrari_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvsrari_b' must be a constant integer}} +- return res; +-} +- +-v16i16 xvsrari_h(v16i16 _1, int var) { +- v16i16 res = __lasx_xvsrari_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} +- res |= __lasx_xvsrari_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} +- res |= __lasx_xvsrari_h(_1, var); // expected-error {{argument to '__builtin_lasx_xvsrari_h' must be a constant integer}} +- return res; +-} +- +-v8i32 xvsrari_w(v8i32 _1, int var) { +- v8i32 res = __lasx_xvsrari_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} +- res |= __lasx_xvsrari_w(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} +- res |= __lasx_xvsrari_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvsrari_w' must be a constant integer}} +- return res; +-} +- +-v4i64 xvsrari_d(v4i64 _1, int var) { +- v4i64 res = __lasx_xvsrari_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} +- res |= __lasx_xvsrari_d(_1, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} +- res |= __lasx_xvsrari_d(_1, var); // expected-error {{argument to '__builtin_lasx_xvsrari_d' must be a constant integer}} +- return res; +-} +- +-v32i8 xvsrli_b(v32i8 _1, int var) { +- v32i8 res = __lasx_xvsrli_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} +- res |= __lasx_xvsrli_b(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} +- res |= __lasx_xvsrli_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvsrli_b' must be a constant integer}} +- return res; +-} +- +-v16i16 xvsrli_h(v16i16 _1, int var) { +- v16i16 res = __lasx_xvsrli_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} +- res |= __lasx_xvsrli_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} +- res |= __lasx_xvsrli_h(_1, var); // expected-error {{argument to '__builtin_lasx_xvsrli_h' must be a constant integer}} +- return res; +-} +- +-v8i32 xvsrli_w(v8i32 _1, int var) { +- v8i32 res = __lasx_xvsrli_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} +- res |= __lasx_xvsrli_w(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} +- res |= __lasx_xvsrli_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvsrli_w' must be a constant integer}} +- return res; +-} +- +-v4i64 xvsrli_d(v4i64 _1, int var) { +- v4i64 res = __lasx_xvsrli_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} +- res |= __lasx_xvsrli_d(_1, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} +- res |= __lasx_xvsrli_d(_1, var); // expected-error {{argument to '__builtin_lasx_xvsrli_d' must be a constant integer}} +- return res; +-} +- +-v32i8 xvsrlri_b(v32i8 _1, int var) { +- v32i8 res = __lasx_xvsrlri_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} +- res |= __lasx_xvsrlri_b(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} +- res |= __lasx_xvsrlri_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvsrlri_b' must be a constant integer}} +- return res; +-} +- +-v16i16 xvsrlri_h(v16i16 _1, int var) { +- v16i16 res = __lasx_xvsrlri_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} +- res |= __lasx_xvsrlri_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} +- res |= __lasx_xvsrlri_h(_1, var); // expected-error {{argument to '__builtin_lasx_xvsrlri_h' must be a constant integer}} +- return res; +-} +- +-v8i32 xvsrlri_w(v8i32 _1, int var) { +- v8i32 res = __lasx_xvsrlri_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} +- res |= __lasx_xvsrlri_w(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} +- res |= __lasx_xvsrlri_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvsrlri_w' must be a constant integer}} +- return res; +-} +- +-v4i64 xvsrlri_d(v4i64 _1, int var) { +- v4i64 res = __lasx_xvsrlri_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} +- res |= __lasx_xvsrlri_d(_1, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} +- res |= __lasx_xvsrlri_d(_1, var); // expected-error {{argument to '__builtin_lasx_xvsrlri_d' must be a constant integer}} +- return res; +-} +- +-v32u8 xvbitclri_b(v32u8 _1, int var) { +- v32u8 res = __lasx_xvbitclri_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} +- res |= __lasx_xvbitclri_b(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} +- res |= __lasx_xvbitclri_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvbitclri_b' must be a constant integer}} +- return res; +-} +- +-v16u16 xvbitclri_h(v16u16 _1, int var) { +- v16u16 res = __lasx_xvbitclri_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} +- res |= __lasx_xvbitclri_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} +- res |= __lasx_xvbitclri_h(_1, var); // expected-error {{argument to '__builtin_lasx_xvbitclri_h' must be a constant integer}} +- return res; +-} +- +-v8u32 xvbitclri_w(v8u32 _1, int var) { +- v8u32 res = __lasx_xvbitclri_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} +- res |= __lasx_xvbitclri_w(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} +- res |= __lasx_xvbitclri_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvbitclri_w' must be a constant integer}} +- return res; +-} +- +-v4u64 xvbitclri_d(v4u64 _1, int var) { +- v4u64 res = __lasx_xvbitclri_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} +- res |= __lasx_xvbitclri_d(_1, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} +- res |= __lasx_xvbitclri_d(_1, var); // expected-error {{argument to '__builtin_lasx_xvbitclri_d' must be a constant integer}} +- return res; +-} +- +-v32u8 xvbitseti_b(v32u8 _1, int var) { +- v32u8 res = __lasx_xvbitseti_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} +- res |= __lasx_xvbitseti_b(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} +- res |= __lasx_xvbitseti_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvbitseti_b' must be a constant integer}} +- return res; +-} +- +-v16u16 xvbitseti_h(v16u16 _1, int var) { +- v16u16 res = __lasx_xvbitseti_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} +- res |= __lasx_xvbitseti_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} +- res |= __lasx_xvbitseti_h(_1, var); // expected-error {{argument to '__builtin_lasx_xvbitseti_h' must be a constant integer}} +- return res; +-} +- +-v8u32 xvbitseti_w(v8u32 _1, int var) { +- v8u32 res = __lasx_xvbitseti_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} +- res |= __lasx_xvbitseti_w(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} +- res |= __lasx_xvbitseti_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvbitseti_w' must be a constant integer}} +- return res; +-} +- +-v4u64 xvbitseti_d(v4u64 _1, int var) { +- v4u64 res = __lasx_xvbitseti_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} +- res |= __lasx_xvbitseti_d(_1, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} +- res |= __lasx_xvbitseti_d(_1, var); // expected-error {{argument to '__builtin_lasx_xvbitseti_d' must be a constant integer}} +- return res; +-} +- +-v32u8 xvbitrevi_b(v32u8 _1, int var) { +- v32u8 res = __lasx_xvbitrevi_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} +- res |= __lasx_xvbitrevi_b(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} +- res |= __lasx_xvbitrevi_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvbitrevi_b' must be a constant integer}} +- return res; +-} +- +-v16u16 xvbitrevi_h(v16u16 _1, int var) { +- v16u16 res = __lasx_xvbitrevi_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} +- res |= __lasx_xvbitrevi_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} +- res |= __lasx_xvbitrevi_h(_1, var); // expected-error {{argument to '__builtin_lasx_xvbitrevi_h' must be a constant integer}} +- return res; +-} +- +-v8u32 xvbitrevi_w(v8u32 _1, int var) { +- v8u32 res = __lasx_xvbitrevi_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} +- res |= __lasx_xvbitrevi_w(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} +- res |= __lasx_xvbitrevi_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvbitrevi_w' must be a constant integer}} +- return res; +-} +- +-v4u64 xvbitrevi_d(v4u64 _1, int var) { +- v4u64 res = __lasx_xvbitrevi_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} +- res |= __lasx_xvbitrevi_d(_1, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} +- res |= __lasx_xvbitrevi_d(_1, var); // expected-error {{argument to '__builtin_lasx_xvbitrevi_d' must be a constant integer}} +- return res; +-} +- +-v32i8 xvaddi_bu(v32i8 _1, int var) { +- v32i8 res = __lasx_xvaddi_bu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} +- res |= __lasx_xvaddi_bu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} +- res |= __lasx_xvaddi_bu(_1, var); // expected-error {{argument to '__builtin_lasx_xvaddi_bu' must be a constant integer}} +- return res; +-} +- +-v16i16 xvaddi_hu(v16i16 _1, int var) { +- v16i16 res = __lasx_xvaddi_hu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} +- res |= __lasx_xvaddi_hu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} +- res |= __lasx_xvaddi_hu(_1, var); // expected-error {{argument to '__builtin_lasx_xvaddi_hu' must be a constant integer}} +- return res; +-} +- +-v8i32 xvaddi_wu(v8i32 _1, int var) { +- v8i32 res = __lasx_xvaddi_wu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} +- res |= __lasx_xvaddi_wu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} +- res |= __lasx_xvaddi_wu(_1, var); // expected-error {{argument to '__builtin_lasx_xvaddi_wu' must be a constant integer}} +- return res; +-} +- +-v4i64 xvaddi_du(v4i64 _1, int var) { +- v4i64 res = __lasx_xvaddi_du(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} +- res |= __lasx_xvaddi_du(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} +- res |= __lasx_xvaddi_du(_1, var); // expected-error {{argument to '__builtin_lasx_xvaddi_du' must be a constant integer}} +- return res; +-} +- +-v32i8 xvsubi_bu(v32i8 _1, int var) { +- v32i8 res = __lasx_xvsubi_bu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} +- res |= __lasx_xvsubi_bu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} +- res |= __lasx_xvsubi_bu(_1, var); // expected-error {{argument to '__builtin_lasx_xvsubi_bu' must be a constant integer}} +- return res; +-} +- +-v16i16 xvsubi_hu(v16i16 _1, int var) { +- v16i16 res = __lasx_xvsubi_hu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} +- res |= __lasx_xvsubi_hu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} +- res |= __lasx_xvsubi_hu(_1, var); // expected-error {{argument to '__builtin_lasx_xvsubi_hu' must be a constant integer}} +- return res; +-} +- +-v8i32 xvsubi_wu(v8i32 _1, int var) { +- v8i32 res = __lasx_xvsubi_wu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} +- res |= __lasx_xvsubi_wu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} +- res |= __lasx_xvsubi_wu(_1, var); // expected-error {{argument to '__builtin_lasx_xvsubi_wu' must be a constant integer}} +- return res; +-} +- +-v4i64 xvsubi_du(v4i64 _1, int var) { +- v4i64 res = __lasx_xvsubi_du(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} +- res |= __lasx_xvsubi_du(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} +- res |= __lasx_xvsubi_du(_1, var); // expected-error {{argument to '__builtin_lasx_xvsubi_du' must be a constant integer}} +- return res; +-} +- +-v32i8 xvmaxi_b(v32i8 _1, int var) { +- v32i8 res = __lasx_xvmaxi_b(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} +- res |= __lasx_xvmaxi_b(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} +- res |= __lasx_xvmaxi_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvmaxi_b' must be a constant integer}} +- return res; +-} +- +-v16i16 xvmaxi_h(v16i16 _1, int var) { +- v16i16 res = __lasx_xvmaxi_h(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} +- res |= __lasx_xvmaxi_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} +- res |= __lasx_xvmaxi_h(_1, var); // expected-error {{argument to '__builtin_lasx_xvmaxi_h' must be a constant integer}} +- return res; +-} +- +-v8i32 xvmaxi_w(v8i32 _1, int var) { +- v8i32 res = __lasx_xvmaxi_w(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} +- res |= __lasx_xvmaxi_w(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} +- res |= __lasx_xvmaxi_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvmaxi_w' must be a constant integer}} +- return res; +-} +- +-v4i64 xvmaxi_d(v4i64 _1, int var) { +- v4i64 res = __lasx_xvmaxi_d(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} +- res |= __lasx_xvmaxi_d(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} +- res |= __lasx_xvmaxi_d(_1, var); // expected-error {{argument to '__builtin_lasx_xvmaxi_d' must be a constant integer}} +- return res; +-} +- +-v32u8 xvmaxi_bu(v32u8 _1, int var) { +- v32u8 res = __lasx_xvmaxi_bu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} +- res |= __lasx_xvmaxi_bu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} +- res |= __lasx_xvmaxi_bu(_1, var); // expected-error {{argument to '__builtin_lasx_xvmaxi_bu' must be a constant integer}} +- return res; +-} +- +-v16u16 xvmaxi_hu(v16u16 _1, int var) { +- v16u16 res = __lasx_xvmaxi_hu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} +- res |= __lasx_xvmaxi_hu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} +- res |= __lasx_xvmaxi_hu(_1, var); // expected-error {{argument to '__builtin_lasx_xvmaxi_hu' must be a constant integer}} +- return res; +-} +- +-v8u32 xvmaxi_wu(v8u32 _1, int var) { +- v8u32 res = __lasx_xvmaxi_wu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} +- res |= __lasx_xvmaxi_wu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} +- res |= __lasx_xvmaxi_wu(_1, var); // expected-error {{argument to '__builtin_lasx_xvmaxi_wu' must be a constant integer}} +- return res; +-} +- +-v4u64 xvmaxi_du(v4u64 _1, int var) { +- v4u64 res = __lasx_xvmaxi_du(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} +- res |= __lasx_xvmaxi_du(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} +- res |= __lasx_xvmaxi_du(_1, var); // expected-error {{argument to '__builtin_lasx_xvmaxi_du' must be a constant integer}} +- return res; +-} +- +-v32i8 xvmini_b(v32i8 _1, int var) { +- v32i8 res = __lasx_xvmini_b(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} +- res |= __lasx_xvmini_b(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} +- res |= __lasx_xvmini_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvmini_b' must be a constant integer}} +- return res; +-} +- +-v16i16 xvmini_h(v16i16 _1, int var) { +- v16i16 res = __lasx_xvmini_h(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} +- res |= __lasx_xvmini_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} +- res |= __lasx_xvmini_h(_1, var); // expected-error {{argument to '__builtin_lasx_xvmini_h' must be a constant integer}}} +- return res; +-} +- +-v8i32 xvmini_w(v8i32 _1, int var) { +- v8i32 res = __lasx_xvmini_w(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} +- res |= __lasx_xvmini_w(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} +- res |= __lasx_xvmini_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvmini_w' must be a constant integer}} +- return res; +-} +- +-v4i64 xvmini_d(v4i64 _1, int var) { +- v4i64 res = __lasx_xvmini_d(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} +- res |= __lasx_xvmini_d(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} +- res |= __lasx_xvmini_d(_1, var); // expected-error {{argument to '__builtin_lasx_xvmini_d' must be a constant integer}} +- return res; +-} +- +-v32u8 xvmini_bu(v32u8 _1, int var) { +- v32u8 res = __lasx_xvmini_bu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} +- res |= __lasx_xvmini_bu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} +- res |= __lasx_xvmini_bu(_1, var); // expected-error {{argument to '__builtin_lasx_xvmini_bu' must be a constant integer}} +- return res; +-} +- +-v16u16 xvmini_hu(v16u16 _1, int var) { +- v16u16 res = __lasx_xvmini_hu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} +- res |= __lasx_xvmini_hu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} +- res |= __lasx_xvmini_hu(_1, var); // expected-error {{argument to '__builtin_lasx_xvmini_hu' must be a constant integer}} +- return res; +-} +- +-v8u32 xvmini_wu(v8u32 _1, int var) { +- v8u32 res = __lasx_xvmini_wu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} +- res |= __lasx_xvmini_wu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} +- res |= __lasx_xvmini_wu(_1, var); // expected-error {{argument to '__builtin_lasx_xvmini_wu' must be a constant integer}} +- return res; +-} +- +-v4u64 xvmini_du(v4u64 _1, int var) { +- v4u64 res = __lasx_xvmini_du(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} +- res |= __lasx_xvmini_du(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} +- res |= __lasx_xvmini_du(_1, var); // expected-error {{argument to '__builtin_lasx_xvmini_du' must be a constant integer}} +- return res; +-} +- +-v32i8 xvseqi_b(v32i8 _1, int var) { +- v32i8 res = __lasx_xvseqi_b(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} +- res |= __lasx_xvseqi_b(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} +- res |= __lasx_xvseqi_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvseqi_b' must be a constant integer}} +- return res; +-} +- +-v16i16 xvseqi_h(v16i16 _1, int var) { +- v16i16 res = __lasx_xvseqi_h(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} +- res |= __lasx_xvseqi_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} +- res |= __lasx_xvseqi_h(_1, var); // expected-error {{argument to '__builtin_lasx_xvseqi_h' must be a constant integer}} +- return res; +-} +- +-v8i32 xvseqi_w(v8i32 _1, int var) { +- v8i32 res = __lasx_xvseqi_w(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} +- res |= __lasx_xvseqi_w(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} +- res |= __lasx_xvseqi_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvseqi_w' must be a constant integer}} +- return res; +-} +- +-v4i64 xvseqi_d(v4i64 _1, int var) { +- v4i64 res = __lasx_xvseqi_d(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} +- res |= __lasx_xvseqi_d(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} +- res |= __lasx_xvseqi_d(_1, var); // expected-error {{argument to '__builtin_lasx_xvseqi_d' must be a constant integer}} +- return res; +-} +- +-v32i8 xvslti_b(v32i8 _1, int var) { +- v32i8 res = __lasx_xvslti_b(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} +- res |= __lasx_xvslti_b(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} +- res |= __lasx_xvslti_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvslti_b' must be a constant integer}} +- return res; +-} +- +-v16i16 xvslti_h(v16i16 _1, int var) { +- v16i16 res = __lasx_xvslti_h(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} +- res |= __lasx_xvslti_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} +- res |= __lasx_xvslti_h(_1, var); // expected-error {{argument to '__builtin_lasx_xvslti_h' must be a constant integer}} +- return res; +-} +- +-v8i32 xvslti_w(v8i32 _1, int var) { +- v8i32 res = __lasx_xvslti_w(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} +- res |= __lasx_xvslti_w(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} +- res |= __lasx_xvslti_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvslti_w' must be a constant integer}} +- return res; +-} +- +-v4i64 xvslti_d(v4i64 _1, int var) { +- v4i64 res = __lasx_xvslti_d(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} +- res |= __lasx_xvslti_d(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} +- res |= __lasx_xvslti_d(_1, var); // expected-error {{argument to '__builtin_lasx_xvslti_d' must be a constant integer}} +- return res; +-} +- +-v32i8 xvslti_bu(v32u8 _1, int var) { +- v32i8 res = __lasx_xvslti_bu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} +- res |= __lasx_xvslti_bu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} +- res |= __lasx_xvslti_bu(_1, var); // expected-error {{argument to '__builtin_lasx_xvslti_bu' must be a constant integer}} +- return res; +-} +- +-v16i16 xvslti_hu(v16u16 _1, int var) { +- v16i16 res = __lasx_xvslti_hu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} +- res |= __lasx_xvslti_hu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} +- res |= __lasx_xvslti_hu(_1, var); // expected-error {{argument to '__builtin_lasx_xvslti_hu' must be a constant integer}} +- return res; +-} +- +-v8i32 xvslti_wu(v8u32 _1, int var) { +- v8i32 res = __lasx_xvslti_wu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} +- res |= __lasx_xvslti_wu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} +- res |= __lasx_xvslti_wu(_1, var); // expected-error {{argument to '__builtin_lasx_xvslti_wu' must be a constant integer}} +- return res; +-} +- +-v4i64 xvslti_du(v4u64 _1, int var) { +- v4i64 res = __lasx_xvslti_du(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} +- res |= __lasx_xvslti_du(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} +- res |= __lasx_xvslti_du(_1, var); // expected-error {{argument to '__builtin_lasx_xvslti_du' must be a constant integer}} +- return res; +-} +- +-v32i8 xvslei_b(v32i8 _1, int var) { +- v32i8 res = __lasx_xvslei_b(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} +- res |= __lasx_xvslei_b(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} +- res |= __lasx_xvslei_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvslei_b' must be a constant integer}} +- return res; +-} +- +-v16i16 xvslei_h(v16i16 _1, int var) { +- v16i16 res = __lasx_xvslei_h(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} +- res |= __lasx_xvslei_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} +- res |= __lasx_xvslei_h(_1, var); // expected-error {{argument to '__builtin_lasx_xvslei_h' must be a constant integer}} +- return res; +-} +- +-v8i32 xvslei_w(v8i32 _1, int var) { +- v8i32 res = __lasx_xvslei_w(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} +- res |= __lasx_xvslei_w(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} +- res |= __lasx_xvslei_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvslei_w' must be a constant integer}} +- return res; +-} +- +-v4i64 xvslei_d(v4i64 _1, int var) { +- v4i64 res = __lasx_xvslei_d(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} +- res |= __lasx_xvslei_d(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} +- res |= __lasx_xvslei_d(_1, var); // expected-error {{argument to '__builtin_lasx_xvslei_d' must be a constant integer}} +- return res; +-} +- +-v32i8 xvslei_bu(v32u8 _1, int var) { +- v32i8 res = __lasx_xvslei_bu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} +- res |= __lasx_xvslei_bu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} +- res |= __lasx_xvslei_bu(_1, var); // expected-error {{argument to '__builtin_lasx_xvslei_bu' must be a constant integer}} +- return res; +-} +- +-v16i16 xvslei_hu(v16u16 _1, int var) { +- v16i16 res = __lasx_xvslei_hu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} +- res |= __lasx_xvslei_hu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} +- res |= __lasx_xvslei_hu(_1, var); // expected-error {{argument to '__builtin_lasx_xvslei_hu' must be a constant integer}} +- return res; +-} +- +-v8i32 xvslei_wu(v8u32 _1, int var) { +- v8i32 res = __lasx_xvslei_wu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} +- res |= __lasx_xvslei_wu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} +- res |= __lasx_xvslei_wu(_1, var); // expected-error {{argument to '__builtin_lasx_xvslei_wu' must be a constant integer}} +- return res; +-} +- +-v4i64 xvslei_du(v4u64 _1, int var) { +- v4i64 res = __lasx_xvslei_du(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} +- res |= __lasx_xvslei_du(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} +- res |= __lasx_xvslei_du(_1, var); // expected-error {{argument to '__builtin_lasx_xvslei_du' must be a constant integer}} +- return res; +-} +- +-v32i8 xvsat_b(v32i8 _1, int var) { +- v32i8 res = __lasx_xvsat_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} +- res |= __lasx_xvsat_b(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} +- res |= __lasx_xvsat_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvsat_b' must be a constant integer}} +- return res; +-} +- +-v16i16 xvsat_h(v16i16 _1, int var) { +- v16i16 res = __lasx_xvsat_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} +- res |= __lasx_xvsat_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} +- res |= __lasx_xvsat_h(_1, var); // expected-error {{argument to '__builtin_lasx_xvsat_h' must be a constant integer}} +- return res; +-} +- +-v8i32 xvsat_w(v8i32 _1, int var) { +- v8i32 res = __lasx_xvsat_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} +- res |= __lasx_xvsat_w(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} +- res |= __lasx_xvsat_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvsat_w' must be a constant integer}} +- return res; +-} +- +-v4i64 xvsat_d(v4i64 _1, int var) { +- v4i64 res = __lasx_xvsat_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} +- res |= __lasx_xvsat_d(_1, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} +- res |= __lasx_xvsat_d(_1, var); // expected-error {{argument to '__builtin_lasx_xvsat_d' must be a constant integer}} +- return res; +-} +- +-v32u8 xvsat_bu(v32u8 _1, int var) { +- v32u8 res = __lasx_xvsat_bu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} +- res |= __lasx_xvsat_bu(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} +- res |= __lasx_xvsat_bu(_1, var); // expected-error {{argument to '__builtin_lasx_xvsat_bu' must be a constant integer}} +- return res; +-} +- +-v16u16 xvsat_hu(v16u16 _1, int var) { +- v16u16 res = __lasx_xvsat_hu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} +- res |= __lasx_xvsat_hu(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} +- res |= __lasx_xvsat_hu(_1, var); // expected-error {{argument to '__builtin_lasx_xvsat_hu' must be a constant integer}} +- return res; +-} +- +-v8u32 xvsat_wu(v8u32 _1, int var) { +- v8u32 res = __lasx_xvsat_wu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} +- res |= __lasx_xvsat_wu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} +- res |= __lasx_xvsat_wu(_1, var); // expected-error {{argument to '__builtin_lasx_xvsat_wu' must be a constant integer}} +- return res; +-} +- +-v4u64 xvsat_du(v4u64 _1, int var) { +- v4u64 res = __lasx_xvsat_du(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} +- res |= __lasx_xvsat_du(_1, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} +- res |= __lasx_xvsat_du(_1, var); // expected-error {{argument to '__builtin_lasx_xvsat_du' must be a constant integer}} +- return res; +-} +- +-v32i8 xvrepl128vei_b(v32i8 _1, int var) { +- v32i8 res = __lasx_xvrepl128vei_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} +- res |= __lasx_xvrepl128vei_b(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} +- res |= __lasx_xvrepl128vei_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvrepl128vei_b' must be a constant integer}} +- return res; +-} +- +-v16i16 xvrepl128vei_h(v16i16 _1, int var) { +- v16i16 res = __lasx_xvrepl128vei_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} +- res |= __lasx_xvrepl128vei_h(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} +- res |= __lasx_xvrepl128vei_h(_1, var); // expected-error {{argument to '__builtin_lasx_xvrepl128vei_h' must be a constant integer}} +- return res; +-} +- +-v8i32 xvrepl128vei_w(v8i32 _1, int var) { +- v8i32 res = __lasx_xvrepl128vei_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 3]}} +- res |= __lasx_xvrepl128vei_w(_1, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}} +- res |= __lasx_xvrepl128vei_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvrepl128vei_w' must be a constant integer}} +- return res; +-} +- +-v4i64 xvrepl128vei_d(v4i64 _1, int var) { +- v4i64 res = __lasx_xvrepl128vei_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 1]}} +- res |= __lasx_xvrepl128vei_d(_1, 2); // expected-error {{argument value 2 is outside the valid range [0, 1]}} +- res |= __lasx_xvrepl128vei_d(_1, var); // expected-error {{argument to '__builtin_lasx_xvrepl128vei_d' must be a constant integer}} +- return res; +-} +- +-v32u8 xvandi_b(v32u8 _1, int var) { +- v32u8 res = __lasx_xvandi_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} +- res |= __lasx_xvandi_b(_1, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} +- res |= __lasx_xvandi_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvandi_b' must be a constant integer}} +- return res; +-} +- +-v32u8 xvori_b(v32u8 _1, int var) { +- v32u8 res = __lasx_xvori_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} +- res |= __lasx_xvori_b(_1, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} +- res |= __lasx_xvori_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvori_b' must be a constant integer}} +- return res; +-} +- +-v32u8 xvnori_b(v32u8 _1, int var) { +- v32u8 res = __lasx_xvnori_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} +- res |= __lasx_xvnori_b(_1, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} +- res |= __lasx_xvnori_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvnori_b' must be a constant integer}} +- return res; +-} +- +-v32u8 xvxori_b(v32u8 _1, int var) { +- v32u8 res = __lasx_xvxori_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} +- res |= __lasx_xvxori_b(_1, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} +- res |= __lasx_xvxori_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvxori_b' must be a constant integer}} +- return res; +-} +- +-v32u8 xvbitseli_b(v32u8 _1, v32u8 _2, int var) { +- v32u8 res = __lasx_xvbitseli_b(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} +- res |= __lasx_xvbitseli_b(_1, _2, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} +- res |= __lasx_xvbitseli_b(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvbitseli_b' must be a constant integer}} +- return res; +-} +- +-v32i8 xvshuf4i_b(v32i8 _1, int var) { +- v32i8 res = __lasx_xvshuf4i_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} +- res |= __lasx_xvshuf4i_b(_1, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} +- res |= __lasx_xvshuf4i_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvshuf4i_b' must be a constant integer}} +- return res; +-} +- +-v16i16 xvshuf4i_h(v16i16 _1, int var) { +- v16i16 res = __lasx_xvshuf4i_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} +- res |= __lasx_xvshuf4i_h(_1, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} +- res |= __lasx_xvshuf4i_h(_1, var); // expected-error {{argument to '__builtin_lasx_xvshuf4i_h' must be a constant integer}} +- return res; +-} +- +-v8i32 xvshuf4i_w(v8i32 _1, int var) { +- v8i32 res = __lasx_xvshuf4i_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} +- res |= __lasx_xvshuf4i_w(_1, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} +- res |= __lasx_xvshuf4i_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvshuf4i_w' must be a constant integer}} +- return res; +-} +- +-v4i64 xvshuf4i_d(v4i64 _1, v4i64 _2, int var) { +- v4i64 res = __lasx_xvshuf4i_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} +- res |= __lasx_xvshuf4i_d(_1, _2, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} +- res |= __lasx_xvshuf4i_d(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvshuf4i_d' must be a constant integer}} +- return res; +-} +- +-v8i32 xvpermi_w(v8i32 _1, v8i32 _2, int var) { +- v8i32 res = __lasx_xvpermi_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} +- res |= __lasx_xvpermi_w(_1, _2, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} +- res |= __lasx_xvpermi_w(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvpermi_w' must be a constant integer}} +- return res; +-} +- +-v4i64 xvpermi_d(v4i64 _1, int var) { +- v4i64 res = __lasx_xvpermi_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} +- res |= __lasx_xvpermi_d(_1, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} +- res |= __lasx_xvpermi_d(_1, var); // expected-error {{argument to '__builtin_lasx_xvpermi_d' must be a constant integer}} +- return res; +-} +- +-v32i8 xvpermi_q(v32i8 _1, v32i8 _2, int var) { +- v32i8 res = __lasx_xvpermi_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} +- res |= __lasx_xvpermi_q(_1, _2, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} +- res |= __lasx_xvpermi_q(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvpermi_q' must be a constant integer}} +- return res; +-} +- +-v16i16 xvsllwil_h_b(v32i8 _1, int var) { +- v16i16 res = __lasx_xvsllwil_h_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} +- res |= __lasx_xvsllwil_h_b(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} +- res |= __lasx_xvsllwil_h_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvsllwil_h_b' must be a constant integer}} +- return res; +-} +- +-v8i32 xvsllwil_w_h(v16i16 _1, int var) { +- v8i32 res = __lasx_xvsllwil_w_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} +- res |= __lasx_xvsllwil_w_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} +- res |= __lasx_xvsllwil_w_h(_1, var); // expected-error {{argument to '__builtin_lasx_xvsllwil_w_h' must be a constant integer}} +- return res; +-} +- +-v4i64 xvsllwil_d_w(v8i32 _1, int var) { +- v4i64 res = __lasx_xvsllwil_d_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} +- res |= __lasx_xvsllwil_d_w(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} +- res |= __lasx_xvsllwil_d_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvsllwil_d_w' must be a constant integer}} +- return res; +-} +- +-v16u16 xvsllwil_hu_bu(v32u8 _1, int var) { +- v16u16 res = __lasx_xvsllwil_hu_bu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} +- res |= __lasx_xvsllwil_hu_bu(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} +- res |= __lasx_xvsllwil_hu_bu(_1, var); // expected-error {{argument to '__builtin_lasx_xvsllwil_hu_bu' must be a constant integer}} +- return res; +-} +- +-v8u32 xvsllwil_wu_hu(v16u16 _1, int var) { +- v8u32 res = __lasx_xvsllwil_wu_hu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} +- res |= __lasx_xvsllwil_wu_hu(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} +- res |= __lasx_xvsllwil_wu_hu(_1, var); // expected-error {{argument to '__builtin_lasx_xvsllwil_wu_hu' must be a constant integer}} +- return res; +-} +- +-v4u64 xvsllwil_du_wu(v8u32 _1, int var) { +- v4u64 res = __lasx_xvsllwil_du_wu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} +- res |= __lasx_xvsllwil_du_wu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} +- res |= __lasx_xvsllwil_du_wu(_1, var); // expected-error {{argument to '__builtin_lasx_xvsllwil_du_wu' must be a constant integer}} +- return res; +-} +- +-v32i8 xvfrstpi_b(v32i8 _1, v32i8 _2, int var) { +- v32i8 res = __lasx_xvfrstpi_b(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} +- res |= __lasx_xvfrstpi_b(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} +- res |= __lasx_xvfrstpi_b(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvfrstpi_b' must be a constant integer}} +- return res; +-} +- +-v16i16 xvfrstpi_h(v16i16 _1, v16i16 _2, int var) { +- v16i16 res = __lasx_xvfrstpi_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} +- res |= __lasx_xvfrstpi_h(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} +- res |= __lasx_xvfrstpi_h(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvfrstpi_h' must be a constant integer}} +- return res; +-} +- +-v32i8 xvbsrl_v(v32i8 _1, int var) { +- v32i8 res = __lasx_xvbsrl_v(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} +- res |= __lasx_xvbsrl_v(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} +- res |= __lasx_xvbsrl_v(_1, var); // expected-error {{argument to '__builtin_lasx_xvbsrl_v' must be a constant integer}} +- return res; +-} +- +-v32i8 xvbsll_v(v32i8 _1, int var) { +- v32i8 res = __lasx_xvbsll_v(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} +- res |= __lasx_xvbsll_v(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} +- res |= __lasx_xvbsll_v(_1, var); // expected-error {{argument to '__builtin_lasx_xvbsll_v' must be a constant integer}} +- return res; +-} +- +-v32i8 xvextrins_b(v32i8 _1, v32i8 _2, int var) { +- v32i8 res = __lasx_xvextrins_b(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} +- res |= __lasx_xvextrins_b(_1, _2, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} +- res |= __lasx_xvextrins_b(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvextrins_b' must be a constant integer}} +- return res; +-} +- +-v16i16 xvextrins_h(v16i16 _1, v16i16 _2, int var) { +- v16i16 res = __lasx_xvextrins_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} +- res |= __lasx_xvextrins_h(_1, _2, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} +- res |= __lasx_xvextrins_h(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvextrins_h' must be a constant integer}} +- return res; +-} +- +-v8i32 xvextrins_w(v8i32 _1, v8i32 _2, int var) { +- v8i32 res = __lasx_xvextrins_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} +- res |= __lasx_xvextrins_w(_1, _2, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} +- res |= __lasx_xvextrins_w(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvextrins_w' must be a constant integer}} +- return res; +-} +- +-v4i64 xvextrins_d(v4i64 _1, v4i64 _2, int var) { +- v4i64 res = __lasx_xvextrins_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} +- res |= __lasx_xvextrins_d(_1, _2, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} +- res |= __lasx_xvextrins_d(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvextrins_d' must be a constant integer}} +- return res; +-} +- +-v32i8 xvld(void *_1, int var) { +- v32i8 res = __lasx_xvld(_1, -2049); // expected-error {{argument value -2049 is outside the valid range [-2048, 2047]}} +- res |= __lasx_xvld(_1, 2048); // expected-error {{argument value 2048 is outside the valid range [-2048, 2047]}} +- res |= __lasx_xvld(_1, var); // expected-error {{argument to '__builtin_lasx_xvld' must be a constant integer}} +- return res; +-} +- +-void xvst(v32i8 _1, void *_2, int var) { +- __lasx_xvst(_1, _2, -2049); // expected-error {{argument value -2049 is outside the valid range [-2048, 2047]}} +- __lasx_xvst(_1, _2, 2048); // expected-error {{argument value 2048 is outside the valid range [-2048, 2047]}} +- __lasx_xvst(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvst' must be a constant integer}} +-} +- +-void xvstelm_b(v32i8 _1, void * _2, int var) { +- __lasx_xvstelm_b(_1, _2, -129, 1); // expected-error {{argument value -129 is outside the valid range [-128, 127]}} +- __lasx_xvstelm_b(_1, _2, 128, 1); // expected-error {{argument value 128 is outside the valid range [-128, 127]}} +- __lasx_xvstelm_b(_1, _2, var, 1); // expected-error {{argument to '__builtin_lasx_xvstelm_b' must be a constant integer}} +-} +- +-void xvstelm_h(v16i16 _1, void * _2, int var) { +- __lasx_xvstelm_h(_1, _2, -258, 1); // expected-error {{argument value -258 is outside the valid range [-256, 254]}} +- __lasx_xvstelm_h(_1, _2, 256, 1); // expected-error {{argument value 256 is outside the valid range [-256, 254]}} +- __lasx_xvstelm_h(_1, _2, var, 1); // expected-error {{argument to '__builtin_lasx_xvstelm_h' must be a constant integer}} +-} +- +-void xvstelm_w(v8i32 _1, void * _2, int var) { +- __lasx_xvstelm_w(_1, _2, -516, 1); // expected-error {{argument value -516 is outside the valid range [-512, 508]}} +- __lasx_xvstelm_w(_1, _2, 512, 1); // expected-error {{argument value 512 is outside the valid range [-512, 508]}} +- __lasx_xvstelm_w(_1, _2, var, 1); // expected-error {{argument to '__builtin_lasx_xvstelm_w' must be a constant integer}} +-} +- +-void xvstelm_d(v4i64 _1, void * _2, int var) { +- __lasx_xvstelm_d(_1, _2, -1032, 1); // expected-error {{argument value -1032 is outside the valid range [-1024, 1016]}} +- __lasx_xvstelm_d(_1, _2, 1024, 1); // expected-error {{argument value 1024 is outside the valid range [-1024, 1016]}} +- __lasx_xvstelm_d(_1, _2, var, 1); // expected-error {{argument to '__builtin_lasx_xvstelm_d' must be a constant integer}} +-} +- +-void xvstelm_b_idx(v32i8 _1, void * _2, int var) { +- __lasx_xvstelm_b(_1, _2, 1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} +- __lasx_xvstelm_b(_1, _2, 1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} +- __lasx_xvstelm_b(_1, _2, 1, var); // expected-error {{argument to '__builtin_lasx_xvstelm_b' must be a constant integer}} +-} +- +-void xvstelm_h_idx(v16i16 _1, void * _2, int var) { +- __lasx_xvstelm_h(_1, _2, 2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} +- __lasx_xvstelm_h(_1, _2, 2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} +- __lasx_xvstelm_h(_1, _2, 2, var); // expected-error {{argument to '__builtin_lasx_xvstelm_h' must be a constant integer}} +-} +- +-void xvstelm_w_idx(v8i32 _1, void * _2, int var) { +- __lasx_xvstelm_w(_1, _2, 4, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} +- __lasx_xvstelm_w(_1, _2, 4, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} +- __lasx_xvstelm_w(_1, _2, 4, var); // expected-error {{argument to '__builtin_lasx_xvstelm_w' must be a constant integer}} +-} +- +-void xvstelm_d_idx(v4i64 _1, void * _2, int var) { +- __lasx_xvstelm_d(_1, _2, 8, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 3]}} +- __lasx_xvstelm_d(_1, _2, 8, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}} +- __lasx_xvstelm_d(_1, _2, 8, var); // expected-error {{argument to '__builtin_lasx_xvstelm_d' must be a constant integer}} +-} +- +-v8i32 xvinsve0_w(v8i32 _1, v8i32 _2, int var) { +- v8i32 res = __lasx_xvinsve0_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} +- res |= __lasx_xvinsve0_w(_1, _2, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} +- res |= __lasx_xvinsve0_w(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvinsve0_w' must be a constant integer}} +- return res; +-} +- +-v4i64 xvinsve0_d(v4i64 _1, v4i64 _2, int var) { +- v4i64 res = __lasx_xvinsve0_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 3]}} +- res |= __lasx_xvinsve0_d(_1, _2, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}} +- res |= __lasx_xvinsve0_d(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvinsve0_d' must be a constant integer}} +- return res; +-} +- +-v8i32 xvpickve_w(v8i32 _1, int var) { +- v8i32 res = __lasx_xvpickve_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} +- res |= __lasx_xvpickve_w(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} +- res |= __lasx_xvpickve_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvpickve_w' must be a constant integer}} +- return res; +-} +- +-v4i64 xvpickve_d(v4i64 _1, int var) { +- v4i64 res = __lasx_xvpickve_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 3]}} +- res |= __lasx_xvpickve_d(_1, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}} +- res |= __lasx_xvpickve_d(_1, var); // expected-error {{argument to '__builtin_lasx_xvpickve_d' must be a constant integer}} +- return res; +-} +- +-v4i64 xvldi(int var) { +- v4i64 res = __lasx_xvldi(-4097); // expected-error {{argument value -4097 is outside the valid range [-4096, 4095]}} +- res |= __lasx_xvldi(4096); // expected-error {{argument value 4096 is outside the valid range [-4096, 4095]}} +- res |= __lasx_xvldi(var); // expected-error {{argument to '__builtin_lasx_xvldi' must be a constant integer}} +- return res; +-} +- +-v8i32 xvinsgr2vr_w(v8i32 _1, int var) { +- v8i32 res = __lasx_xvinsgr2vr_w(_1, 1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} +- res |= __lasx_xvinsgr2vr_w(_1, 1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} +- res |= __lasx_xvinsgr2vr_w(_1, 1, var); // expected-error {{argument to '__builtin_lasx_xvinsgr2vr_w' must be a constant integer}} +- return res; +-} +- +-v4i64 xvinsgr2vr_d(v4i64 _1, int var) { +- v4i64 res = __lasx_xvinsgr2vr_d(_1, 1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 3]}} +- res |= __lasx_xvinsgr2vr_d(_1, 1, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}} +- res |= __lasx_xvinsgr2vr_d(_1, 1, var); // expected-error {{argument to '__builtin_lasx_xvinsgr2vr_d' must be a constant integer}} +- return res; +-} +- +-v32i8 xvldrepl_b(void *_1, int var) { +- v32i8 res = __lasx_xvldrepl_b(_1, -2049); // expected-error {{argument value -2049 is outside the valid range [-2048, 2047]}} +- res |= __lasx_xvldrepl_b(_1, 2048); // expected-error {{argument value 2048 is outside the valid range [-2048, 2047]}} +- res |= __lasx_xvldrepl_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvldrepl_b' must be a constant integer}} +- return res; +-} +- +-v16i16 xvldrepl_h(void *_1, int var) { +- v16i16 res = __lasx_xvldrepl_h(_1, -2050); // expected-error {{argument value -2050 is outside the valid range [-2048, 2046]}} +- res |= __lasx_xvldrepl_h(_1, 2048); // expected-error {{argument value 2048 is outside the valid range [-2048, 2046]}} +- res |= __lasx_xvldrepl_h(_1, var); // expected-error {{argument to '__builtin_lasx_xvldrepl_h' must be a constant integer}} +- return res; +-} +- +-v8i32 xvldrepl_w(void *_1, int var) { +- v8i32 res = __lasx_xvldrepl_w(_1, -2052); // expected-error {{argument value -2052 is outside the valid range [-2048, 2044]}} +- res |= __lasx_xvldrepl_w(_1, 2048); // expected-error {{argument value 2048 is outside the valid range [-2048, 2044]}} +- res |= __lasx_xvldrepl_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvldrepl_w' must be a constant integer}} +- return res; +-} +- +-v4i64 xvldrepl_d(void *_1, int var) { +- v4i64 res = __lasx_xvldrepl_d(_1, -2056); // expected-error {{argument value -2056 is outside the valid range [-2048, 2040]}} +- res |= __lasx_xvldrepl_d(_1, 2048); // expected-error {{argument value 2048 is outside the valid range [-2048, 2040]}} +- res |= __lasx_xvldrepl_d(_1, var); // expected-error {{argument to '__builtin_lasx_xvldrepl_d' must be a constant integer}} +- return res; +-} +- +-int xvpickve2gr_w(v8i32 _1, int var) { +- int res = __lasx_xvpickve2gr_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} +- res |= __lasx_xvpickve2gr_w(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} +- res |= __lasx_xvpickve2gr_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvpickve2gr_w' must be a constant integer}} +- return res; +-} +- +-unsigned int xvpickve2gr_wu(v8i32 _1, int var) { +- unsigned int res = __lasx_xvpickve2gr_wu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} +- res |= __lasx_xvpickve2gr_wu(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} +- res |= __lasx_xvpickve2gr_wu(_1, var); // expected-error {{argument to '__builtin_lasx_xvpickve2gr_wu' must be a constant integer}} +- return res; +-} +- +-long xvpickve2gr_d(v4i64 _1, int var) { +- long res = __lasx_xvpickve2gr_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 3]}} +- res |= __lasx_xvpickve2gr_d(_1, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}} +- res |= __lasx_xvpickve2gr_d(_1, var); // expected-error {{argument to '__builtin_lasx_xvpickve2gr_d' must be a constant integer}} +- return res; +-} +- +-unsigned long int xvpickve2gr_du(v4i64 _1, int var) { +- unsigned long int res = __lasx_xvpickve2gr_du(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 3]}} +- res |= __lasx_xvpickve2gr_du(_1, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}} +- res |= __lasx_xvpickve2gr_du(_1, var); // expected-error {{argument to '__builtin_lasx_xvpickve2gr_du' must be a constant integer}} +- return res; +-} +- +-v32i8 xvrotri_b(v32i8 _1, int var) { +- v32i8 res = __lasx_xvrotri_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} +- res |= __lasx_xvrotri_b(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} +- res |= __lasx_xvrotri_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvrotri_b' must be a constant integer}} +- return res; +-} +- +-v16i16 xvrotri_h(v16i16 _1, int var) { +- v16i16 res = __lasx_xvrotri_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} +- res |= __lasx_xvrotri_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} +- res |= __lasx_xvrotri_h(_1, var); // expected-error {{argument to '__builtin_lasx_xvrotri_h' must be a constant integer}} +- return res; +-} +- +-v8i32 xvrotri_w(v8i32 _1, int var) { +- v8i32 res = __lasx_xvrotri_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} +- res |= __lasx_xvrotri_w(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} +- res |= __lasx_xvrotri_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvrotri_w' must be a constant integer}} +- return res; +-} +- +-v4i64 xvrotri_d(v4i64 _1, int var) { +- v4i64 res = __lasx_xvrotri_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} +- res |= __lasx_xvrotri_d(_1, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} +- res |= __lasx_xvrotri_d(_1, var); // expected-error {{argument to '__builtin_lasx_xvrotri_d' must be a constant integer}} +- return res; +-} +- +-v32i8 xvsrlni_b_h(v32i8 _1, v32i8 _2, int var) { +- v32i8 res = __lasx_xvsrlni_b_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} +- res |= __lasx_xvsrlni_b_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} +- res |= __lasx_xvsrlni_b_h(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvsrlni_b_h' must be a constant integer}} +- return res; +-} +- +-v16i16 xvsrlni_h_w(v16i16 _1, v16i16 _2, int var) { +- v16i16 res = __lasx_xvsrlni_h_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} +- res |= __lasx_xvsrlni_h_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} +- res |= __lasx_xvsrlni_h_w(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvsrlni_h_w' must be a constant integer}} +- return res; +-} +- +-v8i32 xvsrlni_w_d(v8i32 _1, v8i32 _2, int var) { +- v8i32 res = __lasx_xvsrlni_w_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} +- res |= __lasx_xvsrlni_w_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} +- res |= __lasx_xvsrlni_w_d(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvsrlni_w_d' must be a constant integer}} +- return res; +-} +- +-v4i64 xvsrlni_d_q(v4i64 _1, v4i64 _2, int var) { +- v4i64 res = __lasx_xvsrlni_d_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} +- res |= __lasx_xvsrlni_d_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} +- res |= __lasx_xvsrlni_d_q(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvsrlni_d_q' must be a constant integer}} +- return res; +-} +- +-v32i8 xvsrlrni_b_h(v32i8 _1, v32i8 _2, int var) { +- v32i8 res = __lasx_xvsrlrni_b_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} +- res |= __lasx_xvsrlrni_b_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} +- res |= __lasx_xvsrlrni_b_h(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvsrlrni_b_h' must be a constant integer}} +- return res; +-} +- +-v16i16 xvsrlrni_h_w(v16i16 _1, v16i16 _2, int var) { +- v16i16 res = __lasx_xvsrlrni_h_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} +- res |= __lasx_xvsrlrni_h_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} +- res |= __lasx_xvsrlrni_h_w(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvsrlrni_h_w' must be a constant integer}} +- return res; +-} +- +-v8i32 xvsrlrni_w_d(v8i32 _1, v8i32 _2, int var) { +- v8i32 res = __lasx_xvsrlrni_w_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} +- res |= __lasx_xvsrlrni_w_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} +- res |= __lasx_xvsrlrni_w_d(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvsrlrni_w_d' must be a constant integer}} +- return res; +-} +- +-v4i64 xvsrlrni_d_q(v4i64 _1, v4i64 _2, int var) { +- v4i64 res = __lasx_xvsrlrni_d_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} +- res |= __lasx_xvsrlrni_d_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} +- res |= __lasx_xvsrlrni_d_q(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvsrlrni_d_q' must be a constant integer}} +- return res; +-} +- +-v32i8 xvssrlni_b_h(v32i8 _1, v32i8 _2, int var) { +- v32i8 res = __lasx_xvssrlni_b_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} +- res |= __lasx_xvssrlni_b_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} +- res |= __lasx_xvssrlni_b_h(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrlni_b_h' must be a constant integer}} +- return res; +-} +- +-v16i16 xvssrlni_h_w(v16i16 _1, v16i16 _2, int var) { +- v16i16 res = __lasx_xvssrlni_h_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} +- res |= __lasx_xvssrlni_h_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} +- res |= __lasx_xvssrlni_h_w(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrlni_h_w' must be a constant integer}} +- return res; +-} +- +-v8i32 xvssrlni_w_d(v8i32 _1, v8i32 _2, int var) { +- v8i32 res = __lasx_xvssrlni_w_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} +- res |= __lasx_xvssrlni_w_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} +- res |= __lasx_xvssrlni_w_d(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrlni_w_d' must be a constant integer}} +- return res; +-} +- +-v4i64 xvssrlni_d_q(v4i64 _1, v4i64 _2, int var) { +- v4i64 res = __lasx_xvssrlni_d_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} +- res |= __lasx_xvssrlni_d_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} +- res |= __lasx_xvssrlni_d_q(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrlni_d_q' must be a constant integer}} +- return res; +-} +- +-v32u8 xvssrlni_bu_h(v32u8 _1, v32i8 _2, int var) { +- v32u8 res = __lasx_xvssrlni_bu_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} +- res |= __lasx_xvssrlni_bu_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} +- res |= __lasx_xvssrlni_bu_h(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrlni_bu_h' must be a constant integer}} +- return res; +-} +- +-v16u16 xvssrlni_hu_w(v16u16 _1, v16i16 _2, int var) { +- v16u16 res = __lasx_xvssrlni_hu_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} +- res |= __lasx_xvssrlni_hu_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} +- res |= __lasx_xvssrlni_hu_w(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrlni_hu_w' must be a constant integer}} +- return res; +-} +- +-v8u32 xvssrlni_wu_d(v8u32 _1, v8i32 _2, int var) { +- v8u32 res = __lasx_xvssrlni_wu_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} +- res |= __lasx_xvssrlni_wu_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} +- res |= __lasx_xvssrlni_wu_d(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrlni_wu_d' must be a constant integer}} +- return res; +-} +- +-v4u64 xvssrlni_du_q(v4u64 _1, v4i64 _2, int var) { +- v4u64 res = __lasx_xvssrlni_du_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} +- res |= __lasx_xvssrlni_du_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} +- res |= __lasx_xvssrlni_du_q(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrlni_du_q' must be a constant integer}} +- return res; +-} +- +-v32i8 xvssrlrni_b_h(v32i8 _1, v32i8 _2, int var) { +- v32i8 res = __lasx_xvssrlrni_b_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} +- res |= __lasx_xvssrlrni_b_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} +- res |= __lasx_xvssrlrni_b_h(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrlrni_b_h' must be a constant integer}} +- return res; +-} +- +-v16i16 xvssrlrni_h_w(v16i16 _1, v16i16 _2, int var) { +- v16i16 res = __lasx_xvssrlrni_h_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} +- res |= __lasx_xvssrlrni_h_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} +- res |= __lasx_xvssrlrni_h_w(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrlrni_h_w' must be a constant integer}} +- return res; +-} +- +-v8i32 xvssrlrni_w_d(v8i32 _1, v8i32 _2, int var) { +- v8i32 res = __lasx_xvssrlrni_w_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} +- res |= __lasx_xvssrlrni_w_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} +- res |= __lasx_xvssrlrni_w_d(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrlrni_w_d' must be a constant integer}} +- return res; +-} +- +-v4i64 xvssrlrni_d_q(v4i64 _1, v4i64 _2, int var) { +- v4i64 res = __lasx_xvssrlrni_d_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} +- res |= __lasx_xvssrlrni_d_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} +- res |= __lasx_xvssrlrni_d_q(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrlrni_d_q' must be a constant integer}} +- return res; +-} +- +-v32u8 xvssrlrni_bu_h(v32u8 _1, v32i8 _2, int var) { +- v32u8 res = __lasx_xvssrlrni_bu_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} +- res |= __lasx_xvssrlrni_bu_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} +- res |= __lasx_xvssrlrni_bu_h(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrlrni_bu_h' must be a constant integer}} +- return res; +-} +- +-v16u16 xvssrlrni_hu_w(v16u16 _1, v16i16 _2, int var) { +- v16u16 res = __lasx_xvssrlrni_hu_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} +- res |= __lasx_xvssrlrni_hu_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} +- res |= __lasx_xvssrlrni_hu_w(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrlrni_hu_w' must be a constant integer}} +- return res; +-} +- +-v8u32 xvssrlrni_wu_d(v8u32 _1, v8i32 _2, int var) { +- v8u32 res = __lasx_xvssrlrni_wu_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} +- res |= __lasx_xvssrlrni_wu_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} +- res |= __lasx_xvssrlrni_wu_d(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrlrni_wu_d' must be a constant integer}} +- return res; +-} +- +-v4u64 xvssrlrni_du_q(v4u64 _1, v4i64 _2, int var) { +- v4u64 res = __lasx_xvssrlrni_du_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} +- res |= __lasx_xvssrlrni_du_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} +- res |= __lasx_xvssrlrni_du_q(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrlrni_du_q' must be a constant integer}} +- return res; +-} +- +-v32i8 xvsrani_b_h(v32i8 _1, v32i8 _2, int var) { +- v32i8 res = __lasx_xvsrani_b_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} +- res |= __lasx_xvsrani_b_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} +- res |= __lasx_xvsrani_b_h(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvsrani_b_h' must be a constant integer}} +- return res; +-} +- +-v16i16 xvsrani_h_w(v16i16 _1, v16i16 _2, int var) { +- v16i16 res = __lasx_xvsrani_h_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} +- res |= __lasx_xvsrani_h_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} +- res |= __lasx_xvsrani_h_w(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvsrani_h_w' must be a constant integer}} +- return res; +-} +- +-v8i32 xvsrani_w_d(v8i32 _1, v8i32 _2, int var) { +- v8i32 res = __lasx_xvsrani_w_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} +- res |= __lasx_xvsrani_w_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} +- res |= __lasx_xvsrani_w_d(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvsrani_w_d' must be a constant integer}} +- return res; +-} +- +-v4i64 xvsrani_d_q(v4i64 _1, v4i64 _2, int var) { +- v4i64 res = __lasx_xvsrani_d_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} +- res |= __lasx_xvsrani_d_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} +- res |= __lasx_xvsrani_d_q(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvsrani_d_q' must be a constant integer}} +- return res; +-} +- +-v32i8 xvsrarni_b_h(v32i8 _1, v32i8 _2, int var) { +- v32i8 res = __lasx_xvsrarni_b_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} +- res |= __lasx_xvsrarni_b_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} +- res |= __lasx_xvsrarni_b_h(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvsrarni_b_h' must be a constant integer}} +- return res; +-} +- +-v16i16 xvsrarni_h_w(v16i16 _1, v16i16 _2, int var) { +- v16i16 res = __lasx_xvsrarni_h_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} +- res |= __lasx_xvsrarni_h_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} +- res |= __lasx_xvsrarni_h_w(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvsrarni_h_w' must be a constant integer}} +- return res; +-} +- +-v8i32 xvsrarni_w_d(v8i32 _1, v8i32 _2, int var) { +- v8i32 res = __lasx_xvsrarni_w_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} +- res |= __lasx_xvsrarni_w_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} +- res |= __lasx_xvsrarni_w_d(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvsrarni_w_d' must be a constant integer}} +- return res; +-} +- +-v4i64 xvsrarni_d_q(v4i64 _1, v4i64 _2, int var) { +- v4i64 res = __lasx_xvsrarni_d_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} +- res |= __lasx_xvsrarni_d_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} +- res |= __lasx_xvsrarni_d_q(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvsrarni_d_q' must be a constant integer}} +- return res; +-} +- +-v32i8 xvssrani_b_h(v32i8 _1, v32i8 _2, int var) { +- v32i8 res = __lasx_xvssrani_b_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} +- res |= __lasx_xvssrani_b_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} +- res |= __lasx_xvssrani_b_h(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrani_b_h' must be a constant integer}} +- return res; +-} +- +-v16i16 xvssrani_h_w(v16i16 _1, v16i16 _2, int var) { +- v16i16 res = __lasx_xvssrani_h_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} +- res |= __lasx_xvssrani_h_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} +- res |= __lasx_xvssrani_h_w(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrani_h_w' must be a constant integer}} +- return res; +-} +- +-v8i32 xvssrani_w_d(v8i32 _1, v8i32 _2, int var) { +- v8i32 res = __lasx_xvssrani_w_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} +- res |= __lasx_xvssrani_w_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} +- res |= __lasx_xvssrani_w_d(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrani_w_d' must be a constant integer}} +- return res; +-} +- +-v4i64 xvssrani_d_q(v4i64 _1, v4i64 _2, int var) { +- v4i64 res = __lasx_xvssrani_d_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} +- res |= __lasx_xvssrani_d_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} +- res |= __lasx_xvssrani_d_q(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrani_d_q' must be a constant integer}} +- return res; +-} +- +-v32u8 xvssrani_bu_h(v32u8 _1, v32i8 _2, int var) { +- v32u8 res = __lasx_xvssrani_bu_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} +- res |= __lasx_xvssrani_bu_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} +- res |= __lasx_xvssrani_bu_h(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrani_bu_h' must be a constant integer}} +- return res; +-} +- +-v16u16 xvssrani_hu_w(v16u16 _1, v16i16 _2, int var) { +- v16u16 res = __lasx_xvssrani_hu_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} +- res |= __lasx_xvssrani_hu_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} +- res |= __lasx_xvssrani_hu_w(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrani_hu_w' must be a constant integer}} +- return res; +-} +- +-v8u32 xvssrani_wu_d(v8u32 _1, v8i32 _2, int var) { +- v8u32 res = __lasx_xvssrani_wu_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} +- res |= __lasx_xvssrani_wu_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} +- res |= __lasx_xvssrani_wu_d(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrani_wu_d' must be a constant integer}} +- return res; +-} +- +-v4u64 xvssrani_du_q(v4u64 _1, v4i64 _2, int var) { +- v4u64 res = __lasx_xvssrani_du_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} +- res |= __lasx_xvssrani_du_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} +- res |= __lasx_xvssrani_du_q(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrani_du_q' must be a constant integer}} +- return res; +-} +- +-v32i8 xvssrarni_b_h(v32i8 _1, v32i8 _2, int var) { +- v32i8 res = __lasx_xvssrarni_b_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} +- res |= __lasx_xvssrarni_b_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} +- res |= __lasx_xvssrarni_b_h(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrarni_b_h' must be a constant integer}} +- return res; +-} +- +-v16i16 xvssrarni_h_w(v16i16 _1, v16i16 _2, int var) { +- v16i16 res = __lasx_xvssrarni_h_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} +- res |= __lasx_xvssrarni_h_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} +- res |= __lasx_xvssrarni_h_w(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrarni_h_w' must be a constant integer}} +- return res; +-} +- +-v8i32 xvssrarni_w_d(v8i32 _1, v8i32 _2, int var) { +- v8i32 res = __lasx_xvssrarni_w_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} +- res |= __lasx_xvssrarni_w_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} +- res |= __lasx_xvssrarni_w_d(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrarni_w_d' must be a constant integer}} +- return res; +-} +- +-v4i64 xvssrarni_d_q(v4i64 _1, v4i64 _2, int var) { +- v4i64 res = __lasx_xvssrarni_d_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} +- res |= __lasx_xvssrarni_d_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} +- res |= __lasx_xvssrarni_d_q(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrarni_d_q' must be a constant integer}} +- return res; +-} +- +-v32u8 xvssrarni_bu_h(v32u8 _1, v32i8 _2, int var) { +- v32u8 res = __lasx_xvssrarni_bu_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} +- res |= __lasx_xvssrarni_bu_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} +- res |= __lasx_xvssrarni_bu_h(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrarni_bu_h' must be a constant integer}} +- return res; +-} +- +-v16u16 xvssrarni_hu_w(v16u16 _1, v16i16 _2, int var) { +- v16u16 res = __lasx_xvssrarni_hu_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} +- res |= __lasx_xvssrarni_hu_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} +- res |= __lasx_xvssrarni_hu_w(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrarni_hu_w' must be a constant integer}} +- return res; +-} +- +-v8u32 xvssrarni_wu_d(v8u32 _1, v8i32 _2, int var) { +- v8u32 res = __lasx_xvssrarni_wu_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} +- res |= __lasx_xvssrarni_wu_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} +- res |= __lasx_xvssrarni_wu_d(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrarni_wu_d' must be a constant integer}} +- return res; +-} +- +-v4u64 xvssrarni_du_q(v4u64 _1, v4i64 _2, int var) { +- v4u64 res = __lasx_xvssrarni_du_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} +- res |= __lasx_xvssrarni_du_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} +- res |= __lasx_xvssrarni_du_q(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrarni_du_q' must be a constant integer}} +- return res; +-} +- +-v4f64 xvpickve_d_f(v4f64 _1, int var) { +- v4f64 res = __lasx_xvpickve_d_f(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 3]}} +- res += __lasx_xvpickve_d_f(_1, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}} +- res += __lasx_xvpickve_d_f(_1, var); // expected-error {{argument to '__builtin_lasx_xvpickve_d_f' must be a constant integer}} +- return res; +-} +- +-v8f32 xvpickve_w_f(v8f32 _1, int var) { +- v8f32 res = __lasx_xvpickve_w_f(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} +- res += __lasx_xvpickve_w_f(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} +- res += __lasx_xvpickve_w_f(_1, var); // expected-error {{argument to '__builtin_lasx_xvpickve_w_f' must be a constant integer}} +- return res; +-} +- +-v32i8 xvrepli_b(int var) { +- v32i8 res = __lasx_xvrepli_b(-513); // expected-error {{argument value -513 is outside the valid range [-512, 511]}} +- res |= __lasx_xvrepli_b(512); // expected-error {{argument value 512 is outside the valid range [-512, 511]}} +- res |= __lasx_xvrepli_b(var); // expected-error {{argument to '__builtin_lasx_xvrepli_b' must be a constant integer}} +- return res; +-} +- +-v4i64 xvrepli_d(int var) { +- v4i64 res = __lasx_xvrepli_d(-513); // expected-error {{argument value -513 is outside the valid range [-512, 511]}} +- res |= __lasx_xvrepli_d(512); // expected-error {{argument value 512 is outside the valid range [-512, 511]}} +- res |= __lasx_xvrepli_d(var); // expected-error {{argument to '__builtin_lasx_xvrepli_d' must be a constant integer}} +- return res; +-} +- +-v16i16 xvrepli_h(int var) { +- v16i16 res = __lasx_xvrepli_h(-513); // expected-error {{argument value -513 is outside the valid range [-512, 511]}} +- res |= __lasx_xvrepli_h(512); // expected-error {{argument value 512 is outside the valid range [-512, 511]}} +- res |= __lasx_xvrepli_h(var); // expected-error {{argument to '__builtin_lasx_xvrepli_h' must be a constant integer}} +- return res; +-} +- +-v8i32 xvrepli_w(int var) { +- v8i32 res = __lasx_xvrepli_w(-513); // expected-error {{argument value -513 is outside the valid range [-512, 511]}} +- res |= __lasx_xvrepli_w(512); // expected-error {{argument value 512 is outside the valid range [-512, 511]}} +- res |= __lasx_xvrepli_w(var); // expected-error {{argument to '__builtin_lasx_xvrepli_w' must be a constant integer}} +- return res; +-} +diff --git a/clang/test/CodeGen/LoongArch/lasx/builtin-alias.c b/clang/test/CodeGen/LoongArch/lasx/builtin-alias.c +deleted file mode 100644 +index 9a8ce224b..000000000 +--- a/clang/test/CodeGen/LoongArch/lasx/builtin-alias.c ++++ /dev/null +@@ -1,6386 +0,0 @@ +-// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py +-// RUN: %clang_cc1 -triple loongarch64 -target-feature +lasx -O2 -emit-llvm %s -o - | FileCheck %s +- +-#include +- +-// CHECK-LABEL: @xvsll_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2:![0-9]+]] +-// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsll.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) +-// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v32i8 xvsll_b(v32i8 _1, v32i8 _2) { return __lasx_xvsll_b(_1, _2); } +-// CHECK-LABEL: @xvsll_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsll.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) +-// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v16i16 xvsll_h(v16i16 _1, v16i16 _2) { return __lasx_xvsll_h(_1, _2); } +-// CHECK-LABEL: @xvsll_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsll.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) +-// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8i32 xvsll_w(v8i32 _1, v8i32 _2) { return __lasx_xvsll_w(_1, _2); } +-// CHECK-LABEL: @xvsll_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsll.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +-// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvsll_d(v4i64 _1, v4i64 _2) { return __lasx_xvsll_d(_1, _2); } +-// CHECK-LABEL: @xvslli_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvslli.b(<32 x i8> [[_1]], i32 1) +-// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v32i8 xvslli_b(v32i8 _1) { return __lasx_xvslli_b(_1, 1); } +-// CHECK-LABEL: @xvslli_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvslli.h(<16 x i16> [[_1]], i32 1) +-// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v16i16 xvslli_h(v16i16 _1) { return __lasx_xvslli_h(_1, 1); } +-// CHECK-LABEL: @xvslli_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvslli.w(<8 x i32> [[_1]], i32 1) +-// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8i32 xvslli_w(v8i32 _1) { return __lasx_xvslli_w(_1, 1); } +-// CHECK-LABEL: @xvslli_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvslli.d(<4 x i64> [[_1]], i32 1) +-// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvslli_d(v4i64 _1) { return __lasx_xvslli_d(_1, 1); } +-// CHECK-LABEL: @xvsra_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsra.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) +-// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v32i8 xvsra_b(v32i8 _1, v32i8 _2) { return __lasx_xvsra_b(_1, _2); } +-// CHECK-LABEL: @xvsra_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsra.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) +-// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v16i16 xvsra_h(v16i16 _1, v16i16 _2) { return __lasx_xvsra_h(_1, _2); } +-// CHECK-LABEL: @xvsra_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsra.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) +-// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8i32 xvsra_w(v8i32 _1, v8i32 _2) { return __lasx_xvsra_w(_1, _2); } +-// CHECK-LABEL: @xvsra_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsra.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +-// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvsra_d(v4i64 _1, v4i64 _2) { return __lasx_xvsra_d(_1, _2); } +-// CHECK-LABEL: @xvsrai_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrai.b(<32 x i8> [[_1]], i32 1) +-// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v32i8 xvsrai_b(v32i8 _1) { return __lasx_xvsrai_b(_1, 1); } +-// CHECK-LABEL: @xvsrai_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrai.h(<16 x i16> [[_1]], i32 1) +-// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v16i16 xvsrai_h(v16i16 _1) { return __lasx_xvsrai_h(_1, 1); } +-// CHECK-LABEL: @xvsrai_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrai.w(<8 x i32> [[_1]], i32 1) +-// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8i32 xvsrai_w(v8i32 _1) { return __lasx_xvsrai_w(_1, 1); } +-// CHECK-LABEL: @xvsrai_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrai.d(<4 x i64> [[_1]], i32 1) +-// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvsrai_d(v4i64 _1) { return __lasx_xvsrai_d(_1, 1); } +-// CHECK-LABEL: @xvsrar_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrar.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) +-// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v32i8 xvsrar_b(v32i8 _1, v32i8 _2) { return __lasx_xvsrar_b(_1, _2); } +-// CHECK-LABEL: @xvsrar_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrar.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) +-// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v16i16 xvsrar_h(v16i16 _1, v16i16 _2) { return __lasx_xvsrar_h(_1, _2); } +-// CHECK-LABEL: @xvsrar_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrar.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) +-// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8i32 xvsrar_w(v8i32 _1, v8i32 _2) { return __lasx_xvsrar_w(_1, _2); } +-// CHECK-LABEL: @xvsrar_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrar.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +-// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvsrar_d(v4i64 _1, v4i64 _2) { return __lasx_xvsrar_d(_1, _2); } +-// CHECK-LABEL: @xvsrari_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrari.b(<32 x i8> [[_1]], i32 1) +-// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v32i8 xvsrari_b(v32i8 _1) { return __lasx_xvsrari_b(_1, 1); } +-// CHECK-LABEL: @xvsrari_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrari.h(<16 x i16> [[_1]], i32 1) +-// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v16i16 xvsrari_h(v16i16 _1) { return __lasx_xvsrari_h(_1, 1); } +-// CHECK-LABEL: @xvsrari_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrari.w(<8 x i32> [[_1]], i32 1) +-// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8i32 xvsrari_w(v8i32 _1) { return __lasx_xvsrari_w(_1, 1); } +-// CHECK-LABEL: @xvsrari_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrari.d(<4 x i64> [[_1]], i32 1) +-// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvsrari_d(v4i64 _1) { return __lasx_xvsrari_d(_1, 1); } +-// CHECK-LABEL: @xvsrl_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrl.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) +-// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v32i8 xvsrl_b(v32i8 _1, v32i8 _2) { return __lasx_xvsrl_b(_1, _2); } +-// CHECK-LABEL: @xvsrl_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrl.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) +-// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v16i16 xvsrl_h(v16i16 _1, v16i16 _2) { return __lasx_xvsrl_h(_1, _2); } +-// CHECK-LABEL: @xvsrl_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrl.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) +-// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8i32 xvsrl_w(v8i32 _1, v8i32 _2) { return __lasx_xvsrl_w(_1, _2); } +-// CHECK-LABEL: @xvsrl_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrl.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +-// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvsrl_d(v4i64 _1, v4i64 _2) { return __lasx_xvsrl_d(_1, _2); } +-// CHECK-LABEL: @xvsrli_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrli.b(<32 x i8> [[_1]], i32 1) +-// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v32i8 xvsrli_b(v32i8 _1) { return __lasx_xvsrli_b(_1, 1); } +-// CHECK-LABEL: @xvsrli_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrli.h(<16 x i16> [[_1]], i32 1) +-// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v16i16 xvsrli_h(v16i16 _1) { return __lasx_xvsrli_h(_1, 1); } +-// CHECK-LABEL: @xvsrli_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrli.w(<8 x i32> [[_1]], i32 1) +-// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8i32 xvsrli_w(v8i32 _1) { return __lasx_xvsrli_w(_1, 1); } +-// CHECK-LABEL: @xvsrli_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrli.d(<4 x i64> [[_1]], i32 1) +-// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvsrli_d(v4i64 _1) { return __lasx_xvsrli_d(_1, 1); } +-// CHECK-LABEL: @xvsrlr_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrlr.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) +-// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v32i8 xvsrlr_b(v32i8 _1, v32i8 _2) { return __lasx_xvsrlr_b(_1, _2); } +-// CHECK-LABEL: @xvsrlr_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrlr.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) +-// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v16i16 xvsrlr_h(v16i16 _1, v16i16 _2) { return __lasx_xvsrlr_h(_1, _2); } +-// CHECK-LABEL: @xvsrlr_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrlr.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) +-// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8i32 xvsrlr_w(v8i32 _1, v8i32 _2) { return __lasx_xvsrlr_w(_1, _2); } +-// CHECK-LABEL: @xvsrlr_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrlr.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +-// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvsrlr_d(v4i64 _1, v4i64 _2) { return __lasx_xvsrlr_d(_1, _2); } +-// CHECK-LABEL: @xvsrlri_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrlri.b(<32 x i8> [[_1]], i32 1) +-// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v32i8 xvsrlri_b(v32i8 _1) { return __lasx_xvsrlri_b(_1, 1); } +-// CHECK-LABEL: @xvsrlri_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrlri.h(<16 x i16> [[_1]], i32 1) +-// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v16i16 xvsrlri_h(v16i16 _1) { return __lasx_xvsrlri_h(_1, 1); } +-// CHECK-LABEL: @xvsrlri_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrlri.w(<8 x i32> [[_1]], i32 1) +-// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8i32 xvsrlri_w(v8i32 _1) { return __lasx_xvsrlri_w(_1, 1); } +-// CHECK-LABEL: @xvsrlri_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrlri.d(<4 x i64> [[_1]], i32 1) +-// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvsrlri_d(v4i64 _1) { return __lasx_xvsrlri_d(_1, 1); } +-// CHECK-LABEL: @xvbitclr_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbitclr.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) +-// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v32u8 xvbitclr_b(v32u8 _1, v32u8 _2) { return __lasx_xvbitclr_b(_1, _2); } +-// CHECK-LABEL: @xvbitclr_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvbitclr.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) +-// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v16u16 xvbitclr_h(v16u16 _1, v16u16 _2) { return __lasx_xvbitclr_h(_1, _2); } +-// CHECK-LABEL: @xvbitclr_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvbitclr.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) +-// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8u32 xvbitclr_w(v8u32 _1, v8u32 _2) { return __lasx_xvbitclr_w(_1, _2); } +-// CHECK-LABEL: @xvbitclr_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvbitclr.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +-// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4u64 xvbitclr_d(v4u64 _1, v4u64 _2) { return __lasx_xvbitclr_d(_1, _2); } +-// CHECK-LABEL: @xvbitclri_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbitclri.b(<32 x i8> [[_1]], i32 1) +-// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v32u8 xvbitclri_b(v32u8 _1) { return __lasx_xvbitclri_b(_1, 1); } +-// CHECK-LABEL: @xvbitclri_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvbitclri.h(<16 x i16> [[_1]], i32 1) +-// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v16u16 xvbitclri_h(v16u16 _1) { return __lasx_xvbitclri_h(_1, 1); } +-// CHECK-LABEL: @xvbitclri_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvbitclri.w(<8 x i32> [[_1]], i32 1) +-// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8u32 xvbitclri_w(v8u32 _1) { return __lasx_xvbitclri_w(_1, 1); } +-// CHECK-LABEL: @xvbitclri_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvbitclri.d(<4 x i64> [[_1]], i32 1) +-// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4u64 xvbitclri_d(v4u64 _1) { return __lasx_xvbitclri_d(_1, 1); } +-// CHECK-LABEL: @xvbitset_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbitset.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) +-// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v32u8 xvbitset_b(v32u8 _1, v32u8 _2) { return __lasx_xvbitset_b(_1, _2); } +-// CHECK-LABEL: @xvbitset_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvbitset.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) +-// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v16u16 xvbitset_h(v16u16 _1, v16u16 _2) { return __lasx_xvbitset_h(_1, _2); } +-// CHECK-LABEL: @xvbitset_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvbitset.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) +-// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8u32 xvbitset_w(v8u32 _1, v8u32 _2) { return __lasx_xvbitset_w(_1, _2); } +-// CHECK-LABEL: @xvbitset_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvbitset.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +-// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4u64 xvbitset_d(v4u64 _1, v4u64 _2) { return __lasx_xvbitset_d(_1, _2); } +-// CHECK-LABEL: @xvbitseti_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbitseti.b(<32 x i8> [[_1]], i32 1) +-// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v32u8 xvbitseti_b(v32u8 _1) { return __lasx_xvbitseti_b(_1, 1); } +-// CHECK-LABEL: @xvbitseti_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvbitseti.h(<16 x i16> [[_1]], i32 1) +-// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v16u16 xvbitseti_h(v16u16 _1) { return __lasx_xvbitseti_h(_1, 1); } +-// CHECK-LABEL: @xvbitseti_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvbitseti.w(<8 x i32> [[_1]], i32 1) +-// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8u32 xvbitseti_w(v8u32 _1) { return __lasx_xvbitseti_w(_1, 1); } +-// CHECK-LABEL: @xvbitseti_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvbitseti.d(<4 x i64> [[_1]], i32 1) +-// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4u64 xvbitseti_d(v4u64 _1) { return __lasx_xvbitseti_d(_1, 1); } +-// CHECK-LABEL: @xvbitrev_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbitrev.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) +-// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v32u8 xvbitrev_b(v32u8 _1, v32u8 _2) { return __lasx_xvbitrev_b(_1, _2); } +-// CHECK-LABEL: @xvbitrev_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvbitrev.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) +-// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v16u16 xvbitrev_h(v16u16 _1, v16u16 _2) { return __lasx_xvbitrev_h(_1, _2); } +-// CHECK-LABEL: @xvbitrev_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvbitrev.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) +-// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8u32 xvbitrev_w(v8u32 _1, v8u32 _2) { return __lasx_xvbitrev_w(_1, _2); } +-// CHECK-LABEL: @xvbitrev_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvbitrev.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +-// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4u64 xvbitrev_d(v4u64 _1, v4u64 _2) { return __lasx_xvbitrev_d(_1, _2); } +-// CHECK-LABEL: @xvbitrevi_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbitrevi.b(<32 x i8> [[_1]], i32 1) +-// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v32u8 xvbitrevi_b(v32u8 _1) { return __lasx_xvbitrevi_b(_1, 1); } +-// CHECK-LABEL: @xvbitrevi_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvbitrevi.h(<16 x i16> [[_1]], i32 1) +-// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v16u16 xvbitrevi_h(v16u16 _1) { return __lasx_xvbitrevi_h(_1, 1); } +-// CHECK-LABEL: @xvbitrevi_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvbitrevi.w(<8 x i32> [[_1]], i32 1) +-// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8u32 xvbitrevi_w(v8u32 _1) { return __lasx_xvbitrevi_w(_1, 1); } +-// CHECK-LABEL: @xvbitrevi_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvbitrevi.d(<4 x i64> [[_1]], i32 1) +-// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4u64 xvbitrevi_d(v4u64 _1) { return __lasx_xvbitrevi_d(_1, 1); } +-// CHECK-LABEL: @xvadd_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvadd.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) +-// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v32i8 xvadd_b(v32i8 _1, v32i8 _2) { return __lasx_xvadd_b(_1, _2); } +-// CHECK-LABEL: @xvadd_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvadd.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) +-// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v16i16 xvadd_h(v16i16 _1, v16i16 _2) { return __lasx_xvadd_h(_1, _2); } +-// CHECK-LABEL: @xvadd_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvadd.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) +-// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8i32 xvadd_w(v8i32 _1, v8i32 _2) { return __lasx_xvadd_w(_1, _2); } +-// CHECK-LABEL: @xvadd_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvadd.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +-// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvadd_d(v4i64 _1, v4i64 _2) { return __lasx_xvadd_d(_1, _2); } +-// CHECK-LABEL: @xvaddi_bu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvaddi.bu(<32 x i8> [[_1]], i32 1) +-// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v32i8 xvaddi_bu(v32i8 _1) { return __lasx_xvaddi_bu(_1, 1); } +-// CHECK-LABEL: @xvaddi_hu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvaddi.hu(<16 x i16> [[_1]], i32 1) +-// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v16i16 xvaddi_hu(v16i16 _1) { return __lasx_xvaddi_hu(_1, 1); } +-// CHECK-LABEL: @xvaddi_wu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvaddi.wu(<8 x i32> [[_1]], i32 1) +-// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8i32 xvaddi_wu(v8i32 _1) { return __lasx_xvaddi_wu(_1, 1); } +-// CHECK-LABEL: @xvaddi_du( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddi.du(<4 x i64> [[_1]], i32 1) +-// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvaddi_du(v4i64 _1) { return __lasx_xvaddi_du(_1, 1); } +-// CHECK-LABEL: @xvsub_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsub.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) +-// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v32i8 xvsub_b(v32i8 _1, v32i8 _2) { return __lasx_xvsub_b(_1, _2); } +-// CHECK-LABEL: @xvsub_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsub.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) +-// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v16i16 xvsub_h(v16i16 _1, v16i16 _2) { return __lasx_xvsub_h(_1, _2); } +-// CHECK-LABEL: @xvsub_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsub.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) +-// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8i32 xvsub_w(v8i32 _1, v8i32 _2) { return __lasx_xvsub_w(_1, _2); } +-// CHECK-LABEL: @xvsub_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsub.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +-// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvsub_d(v4i64 _1, v4i64 _2) { return __lasx_xvsub_d(_1, _2); } +-// CHECK-LABEL: @xvsubi_bu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsubi.bu(<32 x i8> [[_1]], i32 1) +-// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v32i8 xvsubi_bu(v32i8 _1) { return __lasx_xvsubi_bu(_1, 1); } +-// CHECK-LABEL: @xvsubi_hu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsubi.hu(<16 x i16> [[_1]], i32 1) +-// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v16i16 xvsubi_hu(v16i16 _1) { return __lasx_xvsubi_hu(_1, 1); } +-// CHECK-LABEL: @xvsubi_wu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsubi.wu(<8 x i32> [[_1]], i32 1) +-// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8i32 xvsubi_wu(v8i32 _1) { return __lasx_xvsubi_wu(_1, 1); } +-// CHECK-LABEL: @xvsubi_du( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsubi.du(<4 x i64> [[_1]], i32 1) +-// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvsubi_du(v4i64 _1) { return __lasx_xvsubi_du(_1, 1); } +-// CHECK-LABEL: @xvmax_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmax.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) +-// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v32i8 xvmax_b(v32i8 _1, v32i8 _2) { return __lasx_xvmax_b(_1, _2); } +-// CHECK-LABEL: @xvmax_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmax.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) +-// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v16i16 xvmax_h(v16i16 _1, v16i16 _2) { return __lasx_xvmax_h(_1, _2); } +-// CHECK-LABEL: @xvmax_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmax.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) +-// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8i32 xvmax_w(v8i32 _1, v8i32 _2) { return __lasx_xvmax_w(_1, _2); } +-// CHECK-LABEL: @xvmax_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmax.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +-// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvmax_d(v4i64 _1, v4i64 _2) { return __lasx_xvmax_d(_1, _2); } +-// CHECK-LABEL: @xvmaxi_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmaxi.b(<32 x i8> [[_1]], i32 1) +-// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v32i8 xvmaxi_b(v32i8 _1) { return __lasx_xvmaxi_b(_1, 1); } +-// CHECK-LABEL: @xvmaxi_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmaxi.h(<16 x i16> [[_1]], i32 1) +-// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v16i16 xvmaxi_h(v16i16 _1) { return __lasx_xvmaxi_h(_1, 1); } +-// CHECK-LABEL: @xvmaxi_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmaxi.w(<8 x i32> [[_1]], i32 1) +-// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8i32 xvmaxi_w(v8i32 _1) { return __lasx_xvmaxi_w(_1, 1); } +-// CHECK-LABEL: @xvmaxi_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaxi.d(<4 x i64> [[_1]], i32 1) +-// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvmaxi_d(v4i64 _1) { return __lasx_xvmaxi_d(_1, 1); } +-// CHECK-LABEL: @xvmax_bu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmax.bu(<32 x i8> [[_124]], <32 x i8> [[_235]]) +-// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v32u8 xvmax_bu(v32u8 _1, v32u8 _2) { return __lasx_xvmax_bu(_1, _2); } +-// CHECK-LABEL: @xvmax_hu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmax.hu(<16 x i16> [[_124]], <16 x i16> [[_235]]) +-// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v16u16 xvmax_hu(v16u16 _1, v16u16 _2) { return __lasx_xvmax_hu(_1, _2); } +-// CHECK-LABEL: @xvmax_wu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmax.wu(<8 x i32> [[_124]], <8 x i32> [[_235]]) +-// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8u32 xvmax_wu(v8u32 _1, v8u32 _2) { return __lasx_xvmax_wu(_1, _2); } +-// CHECK-LABEL: @xvmax_du( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmax.du(<4 x i64> [[_1]], <4 x i64> [[_2]]) +-// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4u64 xvmax_du(v4u64 _1, v4u64 _2) { return __lasx_xvmax_du(_1, _2); } +-// CHECK-LABEL: @xvmaxi_bu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmaxi.bu(<32 x i8> [[_1]], i32 1) +-// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v32u8 xvmaxi_bu(v32u8 _1) { return __lasx_xvmaxi_bu(_1, 1); } +-// CHECK-LABEL: @xvmaxi_hu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmaxi.hu(<16 x i16> [[_1]], i32 1) +-// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v16u16 xvmaxi_hu(v16u16 _1) { return __lasx_xvmaxi_hu(_1, 1); } +-// CHECK-LABEL: @xvmaxi_wu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmaxi.wu(<8 x i32> [[_1]], i32 1) +-// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8u32 xvmaxi_wu(v8u32 _1) { return __lasx_xvmaxi_wu(_1, 1); } +-// CHECK-LABEL: @xvmaxi_du( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaxi.du(<4 x i64> [[_1]], i32 1) +-// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4u64 xvmaxi_du(v4u64 _1) { return __lasx_xvmaxi_du(_1, 1); } +-// CHECK-LABEL: @xvmin_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmin.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) +-// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v32i8 xvmin_b(v32i8 _1, v32i8 _2) { return __lasx_xvmin_b(_1, _2); } +-// CHECK-LABEL: @xvmin_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmin.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) +-// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v16i16 xvmin_h(v16i16 _1, v16i16 _2) { return __lasx_xvmin_h(_1, _2); } +-// CHECK-LABEL: @xvmin_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmin.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) +-// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8i32 xvmin_w(v8i32 _1, v8i32 _2) { return __lasx_xvmin_w(_1, _2); } +-// CHECK-LABEL: @xvmin_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmin.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +-// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvmin_d(v4i64 _1, v4i64 _2) { return __lasx_xvmin_d(_1, _2); } +-// CHECK-LABEL: @xvmini_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmini.b(<32 x i8> [[_1]], i32 1) +-// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v32i8 xvmini_b(v32i8 _1) { return __lasx_xvmini_b(_1, 1); } +-// CHECK-LABEL: @xvmini_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmini.h(<16 x i16> [[_1]], i32 1) +-// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v16i16 xvmini_h(v16i16 _1) { return __lasx_xvmini_h(_1, 1); } +-// CHECK-LABEL: @xvmini_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmini.w(<8 x i32> [[_1]], i32 1) +-// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8i32 xvmini_w(v8i32 _1) { return __lasx_xvmini_w(_1, 1); } +-// CHECK-LABEL: @xvmini_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmini.d(<4 x i64> [[_1]], i32 1) +-// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvmini_d(v4i64 _1) { return __lasx_xvmini_d(_1, 1); } +-// CHECK-LABEL: @xvmin_bu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmin.bu(<32 x i8> [[_124]], <32 x i8> [[_235]]) +-// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v32u8 xvmin_bu(v32u8 _1, v32u8 _2) { return __lasx_xvmin_bu(_1, _2); } +-// CHECK-LABEL: @xvmin_hu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmin.hu(<16 x i16> [[_124]], <16 x i16> [[_235]]) +-// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v16u16 xvmin_hu(v16u16 _1, v16u16 _2) { return __lasx_xvmin_hu(_1, _2); } +-// CHECK-LABEL: @xvmin_wu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmin.wu(<8 x i32> [[_124]], <8 x i32> [[_235]]) +-// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8u32 xvmin_wu(v8u32 _1, v8u32 _2) { return __lasx_xvmin_wu(_1, _2); } +-// CHECK-LABEL: @xvmin_du( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmin.du(<4 x i64> [[_1]], <4 x i64> [[_2]]) +-// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4u64 xvmin_du(v4u64 _1, v4u64 _2) { return __lasx_xvmin_du(_1, _2); } +-// CHECK-LABEL: @xvmini_bu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmini.bu(<32 x i8> [[_1]], i32 1) +-// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v32u8 xvmini_bu(v32u8 _1) { return __lasx_xvmini_bu(_1, 1); } +-// CHECK-LABEL: @xvmini_hu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmini.hu(<16 x i16> [[_1]], i32 1) +-// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v16u16 xvmini_hu(v16u16 _1) { return __lasx_xvmini_hu(_1, 1); } +-// CHECK-LABEL: @xvmini_wu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmini.wu(<8 x i32> [[_1]], i32 1) +-// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8u32 xvmini_wu(v8u32 _1) { return __lasx_xvmini_wu(_1, 1); } +-// CHECK-LABEL: @xvmini_du( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmini.du(<4 x i64> [[_1]], i32 1) +-// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4u64 xvmini_du(v4u64 _1) { return __lasx_xvmini_du(_1, 1); } +-// CHECK-LABEL: @xvseq_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvseq.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) +-// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v32i8 xvseq_b(v32i8 _1, v32i8 _2) { return __lasx_xvseq_b(_1, _2); } +-// CHECK-LABEL: @xvseq_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvseq.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) +-// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v16i16 xvseq_h(v16i16 _1, v16i16 _2) { return __lasx_xvseq_h(_1, _2); } +-// CHECK-LABEL: @xvseq_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvseq.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) +-// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8i32 xvseq_w(v8i32 _1, v8i32 _2) { return __lasx_xvseq_w(_1, _2); } +-// CHECK-LABEL: @xvseq_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvseq.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +-// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvseq_d(v4i64 _1, v4i64 _2) { return __lasx_xvseq_d(_1, _2); } +-// CHECK-LABEL: @xvseqi_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvseqi.b(<32 x i8> [[_1]], i32 1) +-// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v32i8 xvseqi_b(v32i8 _1) { return __lasx_xvseqi_b(_1, 1); } +-// CHECK-LABEL: @xvseqi_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvseqi.h(<16 x i16> [[_1]], i32 1) +-// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v16i16 xvseqi_h(v16i16 _1) { return __lasx_xvseqi_h(_1, 1); } +-// CHECK-LABEL: @xvseqi_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvseqi.w(<8 x i32> [[_1]], i32 1) +-// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8i32 xvseqi_w(v8i32 _1) { return __lasx_xvseqi_w(_1, 1); } +-// CHECK-LABEL: @xvseqi_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvseqi.d(<4 x i64> [[_1]], i32 1) +-// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvseqi_d(v4i64 _1) { return __lasx_xvseqi_d(_1, 1); } +-// CHECK-LABEL: @xvslt_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvslt.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) +-// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v32i8 xvslt_b(v32i8 _1, v32i8 _2) { return __lasx_xvslt_b(_1, _2); } +-// CHECK-LABEL: @xvslt_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvslt.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) +-// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v16i16 xvslt_h(v16i16 _1, v16i16 _2) { return __lasx_xvslt_h(_1, _2); } +-// CHECK-LABEL: @xvslt_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvslt.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) +-// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8i32 xvslt_w(v8i32 _1, v8i32 _2) { return __lasx_xvslt_w(_1, _2); } +-// CHECK-LABEL: @xvslt_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvslt.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +-// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvslt_d(v4i64 _1, v4i64 _2) { return __lasx_xvslt_d(_1, _2); } +-// CHECK-LABEL: @xvslti_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvslti.b(<32 x i8> [[_1]], i32 1) +-// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v32i8 xvslti_b(v32i8 _1) { return __lasx_xvslti_b(_1, 1); } +-// CHECK-LABEL: @xvslti_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvslti.h(<16 x i16> [[_1]], i32 1) +-// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v16i16 xvslti_h(v16i16 _1) { return __lasx_xvslti_h(_1, 1); } +-// CHECK-LABEL: @xvslti_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvslti.w(<8 x i32> [[_1]], i32 1) +-// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8i32 xvslti_w(v8i32 _1) { return __lasx_xvslti_w(_1, 1); } +-// CHECK-LABEL: @xvslti_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvslti.d(<4 x i64> [[_1]], i32 1) +-// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvslti_d(v4i64 _1) { return __lasx_xvslti_d(_1, 1); } +-// CHECK-LABEL: @xvslt_bu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvslt.bu(<32 x i8> [[_124]], <32 x i8> [[_235]]) +-// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v32i8 xvslt_bu(v32u8 _1, v32u8 _2) { return __lasx_xvslt_bu(_1, _2); } +-// CHECK-LABEL: @xvslt_hu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvslt.hu(<16 x i16> [[_124]], <16 x i16> [[_235]]) +-// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v16i16 xvslt_hu(v16u16 _1, v16u16 _2) { return __lasx_xvslt_hu(_1, _2); } +-// CHECK-LABEL: @xvslt_wu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvslt.wu(<8 x i32> [[_124]], <8 x i32> [[_235]]) +-// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8i32 xvslt_wu(v8u32 _1, v8u32 _2) { return __lasx_xvslt_wu(_1, _2); } +-// CHECK-LABEL: @xvslt_du( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvslt.du(<4 x i64> [[_1]], <4 x i64> [[_2]]) +-// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvslt_du(v4u64 _1, v4u64 _2) { return __lasx_xvslt_du(_1, _2); } +-// CHECK-LABEL: @xvslti_bu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvslti.bu(<32 x i8> [[_1]], i32 1) +-// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v32i8 xvslti_bu(v32u8 _1) { return __lasx_xvslti_bu(_1, 1); } +-// CHECK-LABEL: @xvslti_hu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvslti.hu(<16 x i16> [[_1]], i32 1) +-// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v16i16 xvslti_hu(v16u16 _1) { return __lasx_xvslti_hu(_1, 1); } +-// CHECK-LABEL: @xvslti_wu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvslti.wu(<8 x i32> [[_1]], i32 1) +-// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8i32 xvslti_wu(v8u32 _1) { return __lasx_xvslti_wu(_1, 1); } +-// CHECK-LABEL: @xvslti_du( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvslti.du(<4 x i64> [[_1]], i32 1) +-// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvslti_du(v4u64 _1) { return __lasx_xvslti_du(_1, 1); } +-// CHECK-LABEL: @xvsle_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsle.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) +-// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v32i8 xvsle_b(v32i8 _1, v32i8 _2) { return __lasx_xvsle_b(_1, _2); } +-// CHECK-LABEL: @xvsle_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsle.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) +-// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v16i16 xvsle_h(v16i16 _1, v16i16 _2) { return __lasx_xvsle_h(_1, _2); } +-// CHECK-LABEL: @xvsle_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsle.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) +-// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8i32 xvsle_w(v8i32 _1, v8i32 _2) { return __lasx_xvsle_w(_1, _2); } +-// CHECK-LABEL: @xvsle_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsle.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +-// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvsle_d(v4i64 _1, v4i64 _2) { return __lasx_xvsle_d(_1, _2); } +-// CHECK-LABEL: @xvslei_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvslei.b(<32 x i8> [[_1]], i32 1) +-// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v32i8 xvslei_b(v32i8 _1) { return __lasx_xvslei_b(_1, 1); } +-// CHECK-LABEL: @xvslei_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvslei.h(<16 x i16> [[_1]], i32 1) +-// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v16i16 xvslei_h(v16i16 _1) { return __lasx_xvslei_h(_1, 1); } +-// CHECK-LABEL: @xvslei_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvslei.w(<8 x i32> [[_1]], i32 1) +-// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8i32 xvslei_w(v8i32 _1) { return __lasx_xvslei_w(_1, 1); } +-// CHECK-LABEL: @xvslei_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvslei.d(<4 x i64> [[_1]], i32 1) +-// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvslei_d(v4i64 _1) { return __lasx_xvslei_d(_1, 1); } +-// CHECK-LABEL: @xvsle_bu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsle.bu(<32 x i8> [[_124]], <32 x i8> [[_235]]) +-// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v32i8 xvsle_bu(v32u8 _1, v32u8 _2) { return __lasx_xvsle_bu(_1, _2); } +-// CHECK-LABEL: @xvsle_hu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsle.hu(<16 x i16> [[_124]], <16 x i16> [[_235]]) +-// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v16i16 xvsle_hu(v16u16 _1, v16u16 _2) { return __lasx_xvsle_hu(_1, _2); } +-// CHECK-LABEL: @xvsle_wu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsle.wu(<8 x i32> [[_124]], <8 x i32> [[_235]]) +-// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8i32 xvsle_wu(v8u32 _1, v8u32 _2) { return __lasx_xvsle_wu(_1, _2); } +-// CHECK-LABEL: @xvsle_du( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsle.du(<4 x i64> [[_1]], <4 x i64> [[_2]]) +-// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvsle_du(v4u64 _1, v4u64 _2) { return __lasx_xvsle_du(_1, _2); } +-// CHECK-LABEL: @xvslei_bu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvslei.bu(<32 x i8> [[_1]], i32 1) +-// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v32i8 xvslei_bu(v32u8 _1) { return __lasx_xvslei_bu(_1, 1); } +-// CHECK-LABEL: @xvslei_hu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvslei.hu(<16 x i16> [[_1]], i32 1) +-// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v16i16 xvslei_hu(v16u16 _1) { return __lasx_xvslei_hu(_1, 1); } +-// CHECK-LABEL: @xvslei_wu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvslei.wu(<8 x i32> [[_1]], i32 1) +-// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8i32 xvslei_wu(v8u32 _1) { return __lasx_xvslei_wu(_1, 1); } +-// CHECK-LABEL: @xvslei_du( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvslei.du(<4 x i64> [[_1]], i32 1) +-// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvslei_du(v4u64 _1) { return __lasx_xvslei_du(_1, 1); } +-// CHECK-LABEL: @xvsat_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsat.b(<32 x i8> [[_1]], i32 1) +-// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v32i8 xvsat_b(v32i8 _1) { return __lasx_xvsat_b(_1, 1); } +-// CHECK-LABEL: @xvsat_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsat.h(<16 x i16> [[_1]], i32 1) +-// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v16i16 xvsat_h(v16i16 _1) { return __lasx_xvsat_h(_1, 1); } +-// CHECK-LABEL: @xvsat_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsat.w(<8 x i32> [[_1]], i32 1) +-// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8i32 xvsat_w(v8i32 _1) { return __lasx_xvsat_w(_1, 1); } +-// CHECK-LABEL: @xvsat_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsat.d(<4 x i64> [[_1]], i32 1) +-// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvsat_d(v4i64 _1) { return __lasx_xvsat_d(_1, 1); } +-// CHECK-LABEL: @xvsat_bu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsat.bu(<32 x i8> [[_1]], i32 1) +-// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v32u8 xvsat_bu(v32u8 _1) { return __lasx_xvsat_bu(_1, 1); } +-// CHECK-LABEL: @xvsat_hu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsat.hu(<16 x i16> [[_1]], i32 1) +-// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v16u16 xvsat_hu(v16u16 _1) { return __lasx_xvsat_hu(_1, 1); } +-// CHECK-LABEL: @xvsat_wu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsat.wu(<8 x i32> [[_1]], i32 1) +-// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8u32 xvsat_wu(v8u32 _1) { return __lasx_xvsat_wu(_1, 1); } +-// CHECK-LABEL: @xvsat_du( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsat.du(<4 x i64> [[_1]], i32 1) +-// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4u64 xvsat_du(v4u64 _1) { return __lasx_xvsat_du(_1, 1); } +-// CHECK-LABEL: @xvadda_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvadda.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) +-// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v32i8 xvadda_b(v32i8 _1, v32i8 _2) { return __lasx_xvadda_b(_1, _2); } +-// CHECK-LABEL: @xvadda_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvadda.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) +-// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v16i16 xvadda_h(v16i16 _1, v16i16 _2) { return __lasx_xvadda_h(_1, _2); } +-// CHECK-LABEL: @xvadda_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvadda.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) +-// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8i32 xvadda_w(v8i32 _1, v8i32 _2) { return __lasx_xvadda_w(_1, _2); } +-// CHECK-LABEL: @xvadda_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvadda.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +-// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvadda_d(v4i64 _1, v4i64 _2) { return __lasx_xvadda_d(_1, _2); } +-// CHECK-LABEL: @xvsadd_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsadd.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) +-// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v32i8 xvsadd_b(v32i8 _1, v32i8 _2) { return __lasx_xvsadd_b(_1, _2); } +-// CHECK-LABEL: @xvsadd_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsadd.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) +-// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v16i16 xvsadd_h(v16i16 _1, v16i16 _2) { return __lasx_xvsadd_h(_1, _2); } +-// CHECK-LABEL: @xvsadd_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsadd.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) +-// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8i32 xvsadd_w(v8i32 _1, v8i32 _2) { return __lasx_xvsadd_w(_1, _2); } +-// CHECK-LABEL: @xvsadd_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsadd.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +-// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvsadd_d(v4i64 _1, v4i64 _2) { return __lasx_xvsadd_d(_1, _2); } +-// CHECK-LABEL: @xvsadd_bu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsadd.bu(<32 x i8> [[_124]], <32 x i8> [[_235]]) +-// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v32u8 xvsadd_bu(v32u8 _1, v32u8 _2) { return __lasx_xvsadd_bu(_1, _2); } +-// CHECK-LABEL: @xvsadd_hu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsadd.hu(<16 x i16> [[_124]], <16 x i16> [[_235]]) +-// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v16u16 xvsadd_hu(v16u16 _1, v16u16 _2) { return __lasx_xvsadd_hu(_1, _2); } +-// CHECK-LABEL: @xvsadd_wu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsadd.wu(<8 x i32> [[_124]], <8 x i32> [[_235]]) +-// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8u32 xvsadd_wu(v8u32 _1, v8u32 _2) { return __lasx_xvsadd_wu(_1, _2); } +-// CHECK-LABEL: @xvsadd_du( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsadd.du(<4 x i64> [[_1]], <4 x i64> [[_2]]) +-// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4u64 xvsadd_du(v4u64 _1, v4u64 _2) { return __lasx_xvsadd_du(_1, _2); } +-// CHECK-LABEL: @xvavg_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvavg.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) +-// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v32i8 xvavg_b(v32i8 _1, v32i8 _2) { return __lasx_xvavg_b(_1, _2); } +-// CHECK-LABEL: @xvavg_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvavg.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) +-// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v16i16 xvavg_h(v16i16 _1, v16i16 _2) { return __lasx_xvavg_h(_1, _2); } +-// CHECK-LABEL: @xvavg_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvavg.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) +-// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8i32 xvavg_w(v8i32 _1, v8i32 _2) { return __lasx_xvavg_w(_1, _2); } +-// CHECK-LABEL: @xvavg_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvavg.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +-// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvavg_d(v4i64 _1, v4i64 _2) { return __lasx_xvavg_d(_1, _2); } +-// CHECK-LABEL: @xvavg_bu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvavg.bu(<32 x i8> [[_124]], <32 x i8> [[_235]]) +-// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v32u8 xvavg_bu(v32u8 _1, v32u8 _2) { return __lasx_xvavg_bu(_1, _2); } +-// CHECK-LABEL: @xvavg_hu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvavg.hu(<16 x i16> [[_124]], <16 x i16> [[_235]]) +-// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v16u16 xvavg_hu(v16u16 _1, v16u16 _2) { return __lasx_xvavg_hu(_1, _2); } +-// CHECK-LABEL: @xvavg_wu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvavg.wu(<8 x i32> [[_124]], <8 x i32> [[_235]]) +-// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8u32 xvavg_wu(v8u32 _1, v8u32 _2) { return __lasx_xvavg_wu(_1, _2); } +-// CHECK-LABEL: @xvavg_du( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvavg.du(<4 x i64> [[_1]], <4 x i64> [[_2]]) +-// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4u64 xvavg_du(v4u64 _1, v4u64 _2) { return __lasx_xvavg_du(_1, _2); } +-// CHECK-LABEL: @xvavgr_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvavgr.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) +-// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v32i8 xvavgr_b(v32i8 _1, v32i8 _2) { return __lasx_xvavgr_b(_1, _2); } +-// CHECK-LABEL: @xvavgr_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvavgr.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) +-// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v16i16 xvavgr_h(v16i16 _1, v16i16 _2) { return __lasx_xvavgr_h(_1, _2); } +-// CHECK-LABEL: @xvavgr_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvavgr.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) +-// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8i32 xvavgr_w(v8i32 _1, v8i32 _2) { return __lasx_xvavgr_w(_1, _2); } +-// CHECK-LABEL: @xvavgr_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvavgr.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +-// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvavgr_d(v4i64 _1, v4i64 _2) { return __lasx_xvavgr_d(_1, _2); } +-// CHECK-LABEL: @xvavgr_bu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvavgr.bu(<32 x i8> [[_124]], <32 x i8> [[_235]]) +-// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v32u8 xvavgr_bu(v32u8 _1, v32u8 _2) { return __lasx_xvavgr_bu(_1, _2); } +-// CHECK-LABEL: @xvavgr_hu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvavgr.hu(<16 x i16> [[_124]], <16 x i16> [[_235]]) +-// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v16u16 xvavgr_hu(v16u16 _1, v16u16 _2) { return __lasx_xvavgr_hu(_1, _2); } +-// CHECK-LABEL: @xvavgr_wu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvavgr.wu(<8 x i32> [[_124]], <8 x i32> [[_235]]) +-// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8u32 xvavgr_wu(v8u32 _1, v8u32 _2) { return __lasx_xvavgr_wu(_1, _2); } +-// CHECK-LABEL: @xvavgr_du( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvavgr.du(<4 x i64> [[_1]], <4 x i64> [[_2]]) +-// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4u64 xvavgr_du(v4u64 _1, v4u64 _2) { return __lasx_xvavgr_du(_1, _2); } +-// CHECK-LABEL: @xvssub_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssub.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) +-// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v32i8 xvssub_b(v32i8 _1, v32i8 _2) { return __lasx_xvssub_b(_1, _2); } +-// CHECK-LABEL: @xvssub_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssub.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) +-// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v16i16 xvssub_h(v16i16 _1, v16i16 _2) { return __lasx_xvssub_h(_1, _2); } +-// CHECK-LABEL: @xvssub_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssub.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) +-// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8i32 xvssub_w(v8i32 _1, v8i32 _2) { return __lasx_xvssub_w(_1, _2); } +-// CHECK-LABEL: @xvssub_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssub.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +-// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvssub_d(v4i64 _1, v4i64 _2) { return __lasx_xvssub_d(_1, _2); } +-// CHECK-LABEL: @xvssub_bu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssub.bu(<32 x i8> [[_124]], <32 x i8> [[_235]]) +-// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v32u8 xvssub_bu(v32u8 _1, v32u8 _2) { return __lasx_xvssub_bu(_1, _2); } +-// CHECK-LABEL: @xvssub_hu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssub.hu(<16 x i16> [[_124]], <16 x i16> [[_235]]) +-// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v16u16 xvssub_hu(v16u16 _1, v16u16 _2) { return __lasx_xvssub_hu(_1, _2); } +-// CHECK-LABEL: @xvssub_wu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssub.wu(<8 x i32> [[_124]], <8 x i32> [[_235]]) +-// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8u32 xvssub_wu(v8u32 _1, v8u32 _2) { return __lasx_xvssub_wu(_1, _2); } +-// CHECK-LABEL: @xvssub_du( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssub.du(<4 x i64> [[_1]], <4 x i64> [[_2]]) +-// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4u64 xvssub_du(v4u64 _1, v4u64 _2) { return __lasx_xvssub_du(_1, _2); } +-// CHECK-LABEL: @xvabsd_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvabsd.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) +-// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v32i8 xvabsd_b(v32i8 _1, v32i8 _2) { return __lasx_xvabsd_b(_1, _2); } +-// CHECK-LABEL: @xvabsd_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvabsd.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) +-// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v16i16 xvabsd_h(v16i16 _1, v16i16 _2) { return __lasx_xvabsd_h(_1, _2); } +-// CHECK-LABEL: @xvabsd_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvabsd.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) +-// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8i32 xvabsd_w(v8i32 _1, v8i32 _2) { return __lasx_xvabsd_w(_1, _2); } +-// CHECK-LABEL: @xvabsd_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvabsd.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +-// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvabsd_d(v4i64 _1, v4i64 _2) { return __lasx_xvabsd_d(_1, _2); } +-// CHECK-LABEL: @xvabsd_bu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvabsd.bu(<32 x i8> [[_124]], <32 x i8> [[_235]]) +-// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v32u8 xvabsd_bu(v32u8 _1, v32u8 _2) { return __lasx_xvabsd_bu(_1, _2); } +-// CHECK-LABEL: @xvabsd_hu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvabsd.hu(<16 x i16> [[_124]], <16 x i16> [[_235]]) +-// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v16u16 xvabsd_hu(v16u16 _1, v16u16 _2) { return __lasx_xvabsd_hu(_1, _2); } +-// CHECK-LABEL: @xvabsd_wu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvabsd.wu(<8 x i32> [[_124]], <8 x i32> [[_235]]) +-// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8u32 xvabsd_wu(v8u32 _1, v8u32 _2) { return __lasx_xvabsd_wu(_1, _2); } +-// CHECK-LABEL: @xvabsd_du( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvabsd.du(<4 x i64> [[_1]], <4 x i64> [[_2]]) +-// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4u64 xvabsd_du(v4u64 _1, v4u64 _2) { return __lasx_xvabsd_du(_1, _2); } +-// CHECK-LABEL: @xvmul_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmul.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) +-// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v32i8 xvmul_b(v32i8 _1, v32i8 _2) { return __lasx_xvmul_b(_1, _2); } +-// CHECK-LABEL: @xvmul_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmul.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) +-// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v16i16 xvmul_h(v16i16 _1, v16i16 _2) { return __lasx_xvmul_h(_1, _2); } +-// CHECK-LABEL: @xvmul_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmul.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) +-// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8i32 xvmul_w(v8i32 _1, v8i32 _2) { return __lasx_xvmul_w(_1, _2); } +-// CHECK-LABEL: @xvmul_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmul.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +-// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvmul_d(v4i64 _1, v4i64 _2) { return __lasx_xvmul_d(_1, _2); } +-// CHECK-LABEL: @xvmadd_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_136:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_247:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_358:%.*]] = load <32 x i8>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP3:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmadd.b(<32 x i8> [[_136]], <32 x i8> [[_247]], <32 x i8> [[_358]]) +-// CHECK-NEXT: store <32 x i8> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v32i8 xvmadd_b(v32i8 _1, v32i8 _2, v32i8 _3) { return __lasx_xvmadd_b(_1, _2, _3); } +-// CHECK-LABEL: @xvmadd_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_136:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_247:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_358:%.*]] = load <16 x i16>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP3:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmadd.h(<16 x i16> [[_136]], <16 x i16> [[_247]], <16 x i16> [[_358]]) +-// CHECK-NEXT: store <16 x i16> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v16i16 xvmadd_h(v16i16 _1, v16i16 _2, v16i16 _3) { return __lasx_xvmadd_h(_1, _2, _3); } +-// CHECK-LABEL: @xvmadd_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_136:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_247:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_358:%.*]] = load <8 x i32>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmadd.w(<8 x i32> [[_136]], <8 x i32> [[_247]], <8 x i32> [[_358]]) +-// CHECK-NEXT: store <8 x i32> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8i32 xvmadd_w(v8i32 _1, v8i32 _2, v8i32 _3) { return __lasx_xvmadd_w(_1, _2, _3); } +-// CHECK-LABEL: @xvmadd_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_3:%.*]] = load <4 x i64>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmadd.d(<4 x i64> [[_1]], <4 x i64> [[_2]], <4 x i64> [[_3]]) +-// CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvmadd_d(v4i64 _1, v4i64 _2, v4i64 _3) { return __lasx_xvmadd_d(_1, _2, _3); } +-// CHECK-LABEL: @xvmsub_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_136:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_247:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_358:%.*]] = load <32 x i8>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP3:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmsub.b(<32 x i8> [[_136]], <32 x i8> [[_247]], <32 x i8> [[_358]]) +-// CHECK-NEXT: store <32 x i8> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v32i8 xvmsub_b(v32i8 _1, v32i8 _2, v32i8 _3) { return __lasx_xvmsub_b(_1, _2, _3); } +-// CHECK-LABEL: @xvmsub_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_136:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_247:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_358:%.*]] = load <16 x i16>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP3:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmsub.h(<16 x i16> [[_136]], <16 x i16> [[_247]], <16 x i16> [[_358]]) +-// CHECK-NEXT: store <16 x i16> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v16i16 xvmsub_h(v16i16 _1, v16i16 _2, v16i16 _3) { return __lasx_xvmsub_h(_1, _2, _3); } +-// CHECK-LABEL: @xvmsub_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_136:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_247:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_358:%.*]] = load <8 x i32>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmsub.w(<8 x i32> [[_136]], <8 x i32> [[_247]], <8 x i32> [[_358]]) +-// CHECK-NEXT: store <8 x i32> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8i32 xvmsub_w(v8i32 _1, v8i32 _2, v8i32 _3) { return __lasx_xvmsub_w(_1, _2, _3); } +-// CHECK-LABEL: @xvmsub_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_3:%.*]] = load <4 x i64>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmsub.d(<4 x i64> [[_1]], <4 x i64> [[_2]], <4 x i64> [[_3]]) +-// CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvmsub_d(v4i64 _1, v4i64 _2, v4i64 _3) { return __lasx_xvmsub_d(_1, _2, _3); } +-// CHECK-LABEL: @xvdiv_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvdiv.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) +-// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v32i8 xvdiv_b(v32i8 _1, v32i8 _2) { return __lasx_xvdiv_b(_1, _2); } +-// CHECK-LABEL: @xvdiv_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvdiv.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) +-// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v16i16 xvdiv_h(v16i16 _1, v16i16 _2) { return __lasx_xvdiv_h(_1, _2); } +-// CHECK-LABEL: @xvdiv_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvdiv.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) +-// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8i32 xvdiv_w(v8i32 _1, v8i32 _2) { return __lasx_xvdiv_w(_1, _2); } +-// CHECK-LABEL: @xvdiv_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvdiv.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +-// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvdiv_d(v4i64 _1, v4i64 _2) { return __lasx_xvdiv_d(_1, _2); } +-// CHECK-LABEL: @xvdiv_bu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvdiv.bu(<32 x i8> [[_124]], <32 x i8> [[_235]]) +-// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v32u8 xvdiv_bu(v32u8 _1, v32u8 _2) { return __lasx_xvdiv_bu(_1, _2); } +-// CHECK-LABEL: @xvdiv_hu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvdiv.hu(<16 x i16> [[_124]], <16 x i16> [[_235]]) +-// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v16u16 xvdiv_hu(v16u16 _1, v16u16 _2) { return __lasx_xvdiv_hu(_1, _2); } +-// CHECK-LABEL: @xvdiv_wu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvdiv.wu(<8 x i32> [[_124]], <8 x i32> [[_235]]) +-// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8u32 xvdiv_wu(v8u32 _1, v8u32 _2) { return __lasx_xvdiv_wu(_1, _2); } +-// CHECK-LABEL: @xvdiv_du( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvdiv.du(<4 x i64> [[_1]], <4 x i64> [[_2]]) +-// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4u64 xvdiv_du(v4u64 _1, v4u64 _2) { return __lasx_xvdiv_du(_1, _2); } +-// CHECK-LABEL: @xvhaddw_h_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvhaddw.h.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) +-// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v16i16 xvhaddw_h_b(v32i8 _1, v32i8 _2) { return __lasx_xvhaddw_h_b(_1, _2); } +-// CHECK-LABEL: @xvhaddw_w_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvhaddw.w.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) +-// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8i32 xvhaddw_w_h(v16i16 _1, v16i16 _2) { return __lasx_xvhaddw_w_h(_1, _2); } +-// CHECK-LABEL: @xvhaddw_d_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvhaddw.d.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) +-// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvhaddw_d_w(v8i32 _1, v8i32 _2) { return __lasx_xvhaddw_d_w(_1, _2); } +-// CHECK-LABEL: @xvhaddw_hu_bu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvhaddw.hu.bu(<32 x i8> [[_124]], <32 x i8> [[_235]]) +-// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v16u16 xvhaddw_hu_bu(v32u8 _1, v32u8 _2) { return __lasx_xvhaddw_hu_bu(_1, _2); } +-// CHECK-LABEL: @xvhaddw_wu_hu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvhaddw.wu.hu(<16 x i16> [[_124]], <16 x i16> [[_235]]) +-// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8u32 xvhaddw_wu_hu(v16u16 _1, v16u16 _2) { return __lasx_xvhaddw_wu_hu(_1, _2); } +-// CHECK-LABEL: @xvhaddw_du_wu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvhaddw.du.wu(<8 x i32> [[_124]], <8 x i32> [[_235]]) +-// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4u64 xvhaddw_du_wu(v8u32 _1, v8u32 _2) { return __lasx_xvhaddw_du_wu(_1, _2); } +-// CHECK-LABEL: @xvhsubw_h_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvhsubw.h.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) +-// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v16i16 xvhsubw_h_b(v32i8 _1, v32i8 _2) { return __lasx_xvhsubw_h_b(_1, _2); } +-// CHECK-LABEL: @xvhsubw_w_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvhsubw.w.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) +-// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8i32 xvhsubw_w_h(v16i16 _1, v16i16 _2) { return __lasx_xvhsubw_w_h(_1, _2); } +-// CHECK-LABEL: @xvhsubw_d_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvhsubw.d.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) +-// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvhsubw_d_w(v8i32 _1, v8i32 _2) { return __lasx_xvhsubw_d_w(_1, _2); } +-// CHECK-LABEL: @xvhsubw_hu_bu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvhsubw.hu.bu(<32 x i8> [[_124]], <32 x i8> [[_235]]) +-// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v16i16 xvhsubw_hu_bu(v32u8 _1, v32u8 _2) { return __lasx_xvhsubw_hu_bu(_1, _2); } +-// CHECK-LABEL: @xvhsubw_wu_hu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvhsubw.wu.hu(<16 x i16> [[_124]], <16 x i16> [[_235]]) +-// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8i32 xvhsubw_wu_hu(v16u16 _1, v16u16 _2) { return __lasx_xvhsubw_wu_hu(_1, _2); } +-// CHECK-LABEL: @xvhsubw_du_wu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvhsubw.du.wu(<8 x i32> [[_124]], <8 x i32> [[_235]]) +-// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvhsubw_du_wu(v8u32 _1, v8u32 _2) { return __lasx_xvhsubw_du_wu(_1, _2); } +-// CHECK-LABEL: @xvmod_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmod.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) +-// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v32i8 xvmod_b(v32i8 _1, v32i8 _2) { return __lasx_xvmod_b(_1, _2); } +-// CHECK-LABEL: @xvmod_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmod.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) +-// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v16i16 xvmod_h(v16i16 _1, v16i16 _2) { return __lasx_xvmod_h(_1, _2); } +-// CHECK-LABEL: @xvmod_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmod.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) +-// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8i32 xvmod_w(v8i32 _1, v8i32 _2) { return __lasx_xvmod_w(_1, _2); } +-// CHECK-LABEL: @xvmod_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmod.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +-// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvmod_d(v4i64 _1, v4i64 _2) { return __lasx_xvmod_d(_1, _2); } +-// CHECK-LABEL: @xvmod_bu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmod.bu(<32 x i8> [[_124]], <32 x i8> [[_235]]) +-// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v32u8 xvmod_bu(v32u8 _1, v32u8 _2) { return __lasx_xvmod_bu(_1, _2); } +-// CHECK-LABEL: @xvmod_hu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmod.hu(<16 x i16> [[_124]], <16 x i16> [[_235]]) +-// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v16u16 xvmod_hu(v16u16 _1, v16u16 _2) { return __lasx_xvmod_hu(_1, _2); } +-// CHECK-LABEL: @xvmod_wu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmod.wu(<8 x i32> [[_124]], <8 x i32> [[_235]]) +-// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8u32 xvmod_wu(v8u32 _1, v8u32 _2) { return __lasx_xvmod_wu(_1, _2); } +-// CHECK-LABEL: @xvmod_du( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmod.du(<4 x i64> [[_1]], <4 x i64> [[_2]]) +-// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4u64 xvmod_du(v4u64 _1, v4u64 _2) { return __lasx_xvmod_du(_1, _2); } +-// CHECK-LABEL: @xvrepl128vei_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvrepl128vei.b(<32 x i8> [[_1]], i32 1) +-// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v32i8 xvrepl128vei_b(v32i8 _1) { return __lasx_xvrepl128vei_b(_1, 1); } +-// CHECK-LABEL: @xvrepl128vei_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvrepl128vei.h(<16 x i16> [[_1]], i32 1) +-// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v16i16 xvrepl128vei_h(v16i16 _1) { return __lasx_xvrepl128vei_h(_1, 1); } +-// CHECK-LABEL: @xvrepl128vei_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvrepl128vei.w(<8 x i32> [[_1]], i32 1) +-// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8i32 xvrepl128vei_w(v8i32 _1) { return __lasx_xvrepl128vei_w(_1, 1); } +-// CHECK-LABEL: @xvrepl128vei_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvrepl128vei.d(<4 x i64> [[_1]], i32 1) +-// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvrepl128vei_d(v4i64 _1) { return __lasx_xvrepl128vei_d(_1, 1); } +-// CHECK-LABEL: @xvpickev_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvpickev.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) +-// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v32i8 xvpickev_b(v32i8 _1, v32i8 _2) { return __lasx_xvpickev_b(_1, _2); } +-// CHECK-LABEL: @xvpickev_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvpickev.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) +-// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v16i16 xvpickev_h(v16i16 _1, v16i16 _2) { return __lasx_xvpickev_h(_1, _2); } +-// CHECK-LABEL: @xvpickev_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvpickev.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) +-// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8i32 xvpickev_w(v8i32 _1, v8i32 _2) { return __lasx_xvpickev_w(_1, _2); } +-// CHECK-LABEL: @xvpickev_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvpickev.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +-// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvpickev_d(v4i64 _1, v4i64 _2) { return __lasx_xvpickev_d(_1, _2); } +-// CHECK-LABEL: @xvpickod_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvpickod.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) +-// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v32i8 xvpickod_b(v32i8 _1, v32i8 _2) { return __lasx_xvpickod_b(_1, _2); } +-// CHECK-LABEL: @xvpickod_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvpickod.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) +-// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v16i16 xvpickod_h(v16i16 _1, v16i16 _2) { return __lasx_xvpickod_h(_1, _2); } +-// CHECK-LABEL: @xvpickod_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvpickod.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) +-// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8i32 xvpickod_w(v8i32 _1, v8i32 _2) { return __lasx_xvpickod_w(_1, _2); } +-// CHECK-LABEL: @xvpickod_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvpickod.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +-// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvpickod_d(v4i64 _1, v4i64 _2) { return __lasx_xvpickod_d(_1, _2); } +-// CHECK-LABEL: @xvilvh_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvilvh.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) +-// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v32i8 xvilvh_b(v32i8 _1, v32i8 _2) { return __lasx_xvilvh_b(_1, _2); } +-// CHECK-LABEL: @xvilvh_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvilvh.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) +-// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v16i16 xvilvh_h(v16i16 _1, v16i16 _2) { return __lasx_xvilvh_h(_1, _2); } +-// CHECK-LABEL: @xvilvh_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvilvh.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) +-// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8i32 xvilvh_w(v8i32 _1, v8i32 _2) { return __lasx_xvilvh_w(_1, _2); } +-// CHECK-LABEL: @xvilvh_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvilvh.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +-// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvilvh_d(v4i64 _1, v4i64 _2) { return __lasx_xvilvh_d(_1, _2); } +-// CHECK-LABEL: @xvilvl_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvilvl.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) +-// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v32i8 xvilvl_b(v32i8 _1, v32i8 _2) { return __lasx_xvilvl_b(_1, _2); } +-// CHECK-LABEL: @xvilvl_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvilvl.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) +-// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v16i16 xvilvl_h(v16i16 _1, v16i16 _2) { return __lasx_xvilvl_h(_1, _2); } +-// CHECK-LABEL: @xvilvl_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvilvl.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) +-// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8i32 xvilvl_w(v8i32 _1, v8i32 _2) { return __lasx_xvilvl_w(_1, _2); } +-// CHECK-LABEL: @xvilvl_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvilvl.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +-// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvilvl_d(v4i64 _1, v4i64 _2) { return __lasx_xvilvl_d(_1, _2); } +-// CHECK-LABEL: @xvpackev_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvpackev.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) +-// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v32i8 xvpackev_b(v32i8 _1, v32i8 _2) { return __lasx_xvpackev_b(_1, _2); } +-// CHECK-LABEL: @xvpackev_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvpackev.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) +-// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v16i16 xvpackev_h(v16i16 _1, v16i16 _2) { return __lasx_xvpackev_h(_1, _2); } +-// CHECK-LABEL: @xvpackev_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvpackev.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) +-// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8i32 xvpackev_w(v8i32 _1, v8i32 _2) { return __lasx_xvpackev_w(_1, _2); } +-// CHECK-LABEL: @xvpackev_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvpackev.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +-// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvpackev_d(v4i64 _1, v4i64 _2) { return __lasx_xvpackev_d(_1, _2); } +-// CHECK-LABEL: @xvpackod_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvpackod.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) +-// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v32i8 xvpackod_b(v32i8 _1, v32i8 _2) { return __lasx_xvpackod_b(_1, _2); } +-// CHECK-LABEL: @xvpackod_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvpackod.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) +-// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v16i16 xvpackod_h(v16i16 _1, v16i16 _2) { return __lasx_xvpackod_h(_1, _2); } +-// CHECK-LABEL: @xvpackod_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvpackod.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) +-// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8i32 xvpackod_w(v8i32 _1, v8i32 _2) { return __lasx_xvpackod_w(_1, _2); } +-// CHECK-LABEL: @xvpackod_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvpackod.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +-// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvpackod_d(v4i64 _1, v4i64 _2) { return __lasx_xvpackod_d(_1, _2); } +-// CHECK-LABEL: @xvshuf_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_136:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_247:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_358:%.*]] = load <32 x i8>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP3:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvshuf.b(<32 x i8> [[_136]], <32 x i8> [[_247]], <32 x i8> [[_358]]) +-// CHECK-NEXT: store <32 x i8> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v32i8 xvshuf_b(v32i8 _1, v32i8 _2, v32i8 _3) { return __lasx_xvshuf_b(_1, _2, _3); } +-// CHECK-LABEL: @xvshuf_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_136:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_247:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_358:%.*]] = load <16 x i16>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP3:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvshuf.h(<16 x i16> [[_136]], <16 x i16> [[_247]], <16 x i16> [[_358]]) +-// CHECK-NEXT: store <16 x i16> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v16i16 xvshuf_h(v16i16 _1, v16i16 _2, v16i16 _3) { return __lasx_xvshuf_h(_1, _2, _3); } +-// CHECK-LABEL: @xvshuf_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_136:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_247:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_358:%.*]] = load <8 x i32>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvshuf.w(<8 x i32> [[_136]], <8 x i32> [[_247]], <8 x i32> [[_358]]) +-// CHECK-NEXT: store <8 x i32> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8i32 xvshuf_w(v8i32 _1, v8i32 _2, v8i32 _3) { return __lasx_xvshuf_w(_1, _2, _3); } +-// CHECK-LABEL: @xvshuf_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_3:%.*]] = load <4 x i64>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvshuf.d(<4 x i64> [[_1]], <4 x i64> [[_2]], <4 x i64> [[_3]]) +-// CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvshuf_d(v4i64 _1, v4i64 _2, v4i64 _3) { return __lasx_xvshuf_d(_1, _2, _3); } +-// CHECK-LABEL: @xvand_v( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvand.v(<32 x i8> [[_124]], <32 x i8> [[_235]]) +-// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v32u8 xvand_v(v32u8 _1, v32u8 _2) { return __lasx_xvand_v(_1, _2); } +-// CHECK-LABEL: @xvandi_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvandi.b(<32 x i8> [[_1]], i32 1) +-// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v32u8 xvandi_b(v32u8 _1) { return __lasx_xvandi_b(_1, 1); } +-// CHECK-LABEL: @xvor_v( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvor.v(<32 x i8> [[_124]], <32 x i8> [[_235]]) +-// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v32u8 xvor_v(v32u8 _1, v32u8 _2) { return __lasx_xvor_v(_1, _2); } +-// CHECK-LABEL: @xvori_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvori.b(<32 x i8> [[_1]], i32 1) +-// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v32u8 xvori_b(v32u8 _1) { return __lasx_xvori_b(_1, 1); } +-// CHECK-LABEL: @xvnor_v( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvnor.v(<32 x i8> [[_124]], <32 x i8> [[_235]]) +-// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v32u8 xvnor_v(v32u8 _1, v32u8 _2) { return __lasx_xvnor_v(_1, _2); } +-// CHECK-LABEL: @xvnori_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvnori.b(<32 x i8> [[_1]], i32 1) +-// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v32u8 xvnori_b(v32u8 _1) { return __lasx_xvnori_b(_1, 1); } +-// CHECK-LABEL: @xvxor_v( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvxor.v(<32 x i8> [[_124]], <32 x i8> [[_235]]) +-// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v32u8 xvxor_v(v32u8 _1, v32u8 _2) { return __lasx_xvxor_v(_1, _2); } +-// CHECK-LABEL: @xvxori_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvxori.b(<32 x i8> [[_1]], i32 1) +-// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v32u8 xvxori_b(v32u8 _1) { return __lasx_xvxori_b(_1, 1); } +-// CHECK-LABEL: @xvbitsel_v( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_136:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_247:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_358:%.*]] = load <32 x i8>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP3:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbitsel.v(<32 x i8> [[_136]], <32 x i8> [[_247]], <32 x i8> [[_358]]) +-// CHECK-NEXT: store <32 x i8> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v32u8 xvbitsel_v(v32u8 _1, v32u8 _2, v32u8 _3) { return __lasx_xvbitsel_v(_1, _2, _3); } +-// CHECK-LABEL: @xvbitseli_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbitseli.b(<32 x i8> [[_1]], <32 x i8> [[_2]], i32 1) +-// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v32u8 xvbitseli_b(v32u8 _1, v32u8 _2) { return __lasx_xvbitseli_b(_1, _2, 1); } +-// CHECK-LABEL: @xvshuf4i_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvshuf4i.b(<32 x i8> [[_1]], i32 1) +-// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v32i8 xvshuf4i_b(v32i8 _1) { return __lasx_xvshuf4i_b(_1, 1); } +-// CHECK-LABEL: @xvshuf4i_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvshuf4i.h(<16 x i16> [[_1]], i32 1) +-// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v16i16 xvshuf4i_h(v16i16 _1) { return __lasx_xvshuf4i_h(_1, 1); } +-// CHECK-LABEL: @xvshuf4i_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvshuf4i.w(<8 x i32> [[_1]], i32 1) +-// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8i32 xvshuf4i_w(v8i32 _1) { return __lasx_xvshuf4i_w(_1, 1); } +-// CHECK-LABEL: @xvreplgr2vr_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvreplgr2vr.b(i32 [[_1:%.*]]) +-// CHECK-NEXT: store <32 x i8> [[TMP0]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v32i8 xvreplgr2vr_b(int _1) { return __lasx_xvreplgr2vr_b(_1); } +-// CHECK-LABEL: @xvreplgr2vr_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvreplgr2vr.h(i32 [[_1:%.*]]) +-// CHECK-NEXT: store <16 x i16> [[TMP0]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v16i16 xvreplgr2vr_h(int _1) { return __lasx_xvreplgr2vr_h(_1); } +-// CHECK-LABEL: @xvreplgr2vr_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvreplgr2vr.w(i32 [[_1:%.*]]) +-// CHECK-NEXT: store <8 x i32> [[TMP0]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8i32 xvreplgr2vr_w(int _1) { return __lasx_xvreplgr2vr_w(_1); } +-// CHECK-LABEL: @xvreplgr2vr_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[CONV:%.*]] = sext i32 [[_1:%.*]] to i64 +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvreplgr2vr.d(i64 [[CONV]]) +-// CHECK-NEXT: store <4 x i64> [[TMP0]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvreplgr2vr_d(int _1) { return __lasx_xvreplgr2vr_d(_1); } +-// CHECK-LABEL: @xvpcnt_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_112:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvpcnt.b(<32 x i8> [[_112]]) +-// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v32i8 xvpcnt_b(v32i8 _1) { return __lasx_xvpcnt_b(_1); } +-// CHECK-LABEL: @xvpcnt_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_112:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvpcnt.h(<16 x i16> [[_112]]) +-// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v16i16 xvpcnt_h(v16i16 _1) { return __lasx_xvpcnt_h(_1); } +-// CHECK-LABEL: @xvpcnt_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_112:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvpcnt.w(<8 x i32> [[_112]]) +-// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8i32 xvpcnt_w(v8i32 _1) { return __lasx_xvpcnt_w(_1); } +-// CHECK-LABEL: @xvpcnt_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvpcnt.d(<4 x i64> [[_1]]) +-// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvpcnt_d(v4i64 _1) { return __lasx_xvpcnt_d(_1); } +-// CHECK-LABEL: @xvclo_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_112:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvclo.b(<32 x i8> [[_112]]) +-// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v32i8 xvclo_b(v32i8 _1) { return __lasx_xvclo_b(_1); } +-// CHECK-LABEL: @xvclo_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_112:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvclo.h(<16 x i16> [[_112]]) +-// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v16i16 xvclo_h(v16i16 _1) { return __lasx_xvclo_h(_1); } +-// CHECK-LABEL: @xvclo_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_112:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvclo.w(<8 x i32> [[_112]]) +-// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8i32 xvclo_w(v8i32 _1) { return __lasx_xvclo_w(_1); } +-// CHECK-LABEL: @xvclo_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvclo.d(<4 x i64> [[_1]]) +-// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvclo_d(v4i64 _1) { return __lasx_xvclo_d(_1); } +-// CHECK-LABEL: @xvclz_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_112:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvclz.b(<32 x i8> [[_112]]) +-// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v32i8 xvclz_b(v32i8 _1) { return __lasx_xvclz_b(_1); } +-// CHECK-LABEL: @xvclz_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_112:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvclz.h(<16 x i16> [[_112]]) +-// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v16i16 xvclz_h(v16i16 _1) { return __lasx_xvclz_h(_1); } +-// CHECK-LABEL: @xvclz_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_112:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvclz.w(<8 x i32> [[_112]]) +-// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8i32 xvclz_w(v8i32 _1) { return __lasx_xvclz_w(_1); } +-// CHECK-LABEL: @xvclz_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvclz.d(<4 x i64> [[_1]]) +-// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvclz_d(v4i64 _1) { return __lasx_xvclz_d(_1); } +-// CHECK-LABEL: @xvfadd_s( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfadd.s(<8 x float> [[_1]], <8 x float> [[_2]]) +-// CHECK-NEXT: store <8 x float> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8f32 xvfadd_s(v8f32 _1, v8f32 _2) { return __lasx_xvfadd_s(_1, _2); } +-// CHECK-LABEL: @xvfadd_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfadd.d(<4 x double> [[_1]], <4 x double> [[_2]]) +-// CHECK-NEXT: store <4 x double> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4f64 xvfadd_d(v4f64 _1, v4f64 _2) { return __lasx_xvfadd_d(_1, _2); } +-// CHECK-LABEL: @xvfsub_s( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfsub.s(<8 x float> [[_1]], <8 x float> [[_2]]) +-// CHECK-NEXT: store <8 x float> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8f32 xvfsub_s(v8f32 _1, v8f32 _2) { return __lasx_xvfsub_s(_1, _2); } +-// CHECK-LABEL: @xvfsub_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfsub.d(<4 x double> [[_1]], <4 x double> [[_2]]) +-// CHECK-NEXT: store <4 x double> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4f64 xvfsub_d(v4f64 _1, v4f64 _2) { return __lasx_xvfsub_d(_1, _2); } +-// CHECK-LABEL: @xvfmul_s( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfmul.s(<8 x float> [[_1]], <8 x float> [[_2]]) +-// CHECK-NEXT: store <8 x float> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8f32 xvfmul_s(v8f32 _1, v8f32 _2) { return __lasx_xvfmul_s(_1, _2); } +-// CHECK-LABEL: @xvfmul_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfmul.d(<4 x double> [[_1]], <4 x double> [[_2]]) +-// CHECK-NEXT: store <4 x double> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4f64 xvfmul_d(v4f64 _1, v4f64 _2) { return __lasx_xvfmul_d(_1, _2); } +-// CHECK-LABEL: @xvfdiv_s( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfdiv.s(<8 x float> [[_1]], <8 x float> [[_2]]) +-// CHECK-NEXT: store <8 x float> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8f32 xvfdiv_s(v8f32 _1, v8f32 _2) { return __lasx_xvfdiv_s(_1, _2); } +-// CHECK-LABEL: @xvfdiv_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfdiv.d(<4 x double> [[_1]], <4 x double> [[_2]]) +-// CHECK-NEXT: store <4 x double> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4f64 xvfdiv_d(v4f64 _1, v4f64 _2) { return __lasx_xvfdiv_d(_1, _2); } +-// CHECK-LABEL: @xvfcvt_h_s( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvfcvt.h.s(<8 x float> [[_1]], <8 x float> [[_2]]) +-// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v16i16 xvfcvt_h_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcvt_h_s(_1, _2); } +-// CHECK-LABEL: @xvfcvt_s_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfcvt.s.d(<4 x double> [[_1]], <4 x double> [[_2]]) +-// CHECK-NEXT: store <8 x float> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8f32 xvfcvt_s_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcvt_s_d(_1, _2); } +-// CHECK-LABEL: @xvfmin_s( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfmin.s(<8 x float> [[_1]], <8 x float> [[_2]]) +-// CHECK-NEXT: store <8 x float> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8f32 xvfmin_s(v8f32 _1, v8f32 _2) { return __lasx_xvfmin_s(_1, _2); } +-// CHECK-LABEL: @xvfmin_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfmin.d(<4 x double> [[_1]], <4 x double> [[_2]]) +-// CHECK-NEXT: store <4 x double> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4f64 xvfmin_d(v4f64 _1, v4f64 _2) { return __lasx_xvfmin_d(_1, _2); } +-// CHECK-LABEL: @xvfmina_s( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfmina.s(<8 x float> [[_1]], <8 x float> [[_2]]) +-// CHECK-NEXT: store <8 x float> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8f32 xvfmina_s(v8f32 _1, v8f32 _2) { return __lasx_xvfmina_s(_1, _2); } +-// CHECK-LABEL: @xvfmina_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfmina.d(<4 x double> [[_1]], <4 x double> [[_2]]) +-// CHECK-NEXT: store <4 x double> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4f64 xvfmina_d(v4f64 _1, v4f64 _2) { return __lasx_xvfmina_d(_1, _2); } +-// CHECK-LABEL: @xvfmax_s( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfmax.s(<8 x float> [[_1]], <8 x float> [[_2]]) +-// CHECK-NEXT: store <8 x float> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8f32 xvfmax_s(v8f32 _1, v8f32 _2) { return __lasx_xvfmax_s(_1, _2); } +-// CHECK-LABEL: @xvfmax_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfmax.d(<4 x double> [[_1]], <4 x double> [[_2]]) +-// CHECK-NEXT: store <4 x double> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4f64 xvfmax_d(v4f64 _1, v4f64 _2) { return __lasx_xvfmax_d(_1, _2); } +-// CHECK-LABEL: @xvfmaxa_s( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfmaxa.s(<8 x float> [[_1]], <8 x float> [[_2]]) +-// CHECK-NEXT: store <8 x float> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8f32 xvfmaxa_s(v8f32 _1, v8f32 _2) { return __lasx_xvfmaxa_s(_1, _2); } +-// CHECK-LABEL: @xvfmaxa_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfmaxa.d(<4 x double> [[_1]], <4 x double> [[_2]]) +-// CHECK-NEXT: store <4 x double> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4f64 xvfmaxa_d(v4f64 _1, v4f64 _2) { return __lasx_xvfmaxa_d(_1, _2); } +-// CHECK-LABEL: @xvfclass_s( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfclass.s(<8 x float> [[_1]]) +-// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8i32 xvfclass_s(v8f32 _1) { return __lasx_xvfclass_s(_1); } +-// CHECK-LABEL: @xvfclass_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfclass.d(<4 x double> [[_1]]) +-// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvfclass_d(v4f64 _1) { return __lasx_xvfclass_d(_1); } +-// CHECK-LABEL: @xvfsqrt_s( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfsqrt.s(<8 x float> [[_1]]) +-// CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8f32 xvfsqrt_s(v8f32 _1) { return __lasx_xvfsqrt_s(_1); } +-// CHECK-LABEL: @xvfsqrt_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfsqrt.d(<4 x double> [[_1]]) +-// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4f64 xvfsqrt_d(v4f64 _1) { return __lasx_xvfsqrt_d(_1); } +-// CHECK-LABEL: @xvfrecip_s( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfrecip.s(<8 x float> [[_1]]) +-// CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8f32 xvfrecip_s(v8f32 _1) { return __lasx_xvfrecip_s(_1); } +-// CHECK-LABEL: @xvfrecip_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfrecip.d(<4 x double> [[_1]]) +-// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4f64 xvfrecip_d(v4f64 _1) { return __lasx_xvfrecip_d(_1); } +-// CHECK-LABEL: @xvfrint_s( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfrint.s(<8 x float> [[_1]]) +-// CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8f32 xvfrint_s(v8f32 _1) { return __lasx_xvfrint_s(_1); } +-// CHECK-LABEL: @xvfrint_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfrint.d(<4 x double> [[_1]]) +-// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4f64 xvfrint_d(v4f64 _1) { return __lasx_xvfrint_d(_1); } +-// CHECK-LABEL: @xvfrsqrt_s( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfrsqrt.s(<8 x float> [[_1]]) +-// CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8f32 xvfrsqrt_s(v8f32 _1) { return __lasx_xvfrsqrt_s(_1); } +-// CHECK-LABEL: @xvfrsqrt_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfrsqrt.d(<4 x double> [[_1]]) +-// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4f64 xvfrsqrt_d(v4f64 _1) { return __lasx_xvfrsqrt_d(_1); } +-// CHECK-LABEL: @xvflogb_s( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvflogb.s(<8 x float> [[_1]]) +-// CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8f32 xvflogb_s(v8f32 _1) { return __lasx_xvflogb_s(_1); } +-// CHECK-LABEL: @xvflogb_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvflogb.d(<4 x double> [[_1]]) +-// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4f64 xvflogb_d(v4f64 _1) { return __lasx_xvflogb_d(_1); } +-// CHECK-LABEL: @xvfcvth_s_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_112:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfcvth.s.h(<16 x i16> [[_112]]) +-// CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8f32 xvfcvth_s_h(v16i16 _1) { return __lasx_xvfcvth_s_h(_1); } +-// CHECK-LABEL: @xvfcvth_d_s( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfcvth.d.s(<8 x float> [[_1]]) +-// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4f64 xvfcvth_d_s(v8f32 _1) { return __lasx_xvfcvth_d_s(_1); } +-// CHECK-LABEL: @xvfcvtl_s_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_112:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfcvtl.s.h(<16 x i16> [[_112]]) +-// CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8f32 xvfcvtl_s_h(v16i16 _1) { return __lasx_xvfcvtl_s_h(_1); } +-// CHECK-LABEL: @xvfcvtl_d_s( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfcvtl.d.s(<8 x float> [[_1]]) +-// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4f64 xvfcvtl_d_s(v8f32 _1) { return __lasx_xvfcvtl_d_s(_1); } +-// CHECK-LABEL: @xvftint_w_s( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftint.w.s(<8 x float> [[_1]]) +-// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8i32 xvftint_w_s(v8f32 _1) { return __lasx_xvftint_w_s(_1); } +-// CHECK-LABEL: @xvftint_l_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftint.l.d(<4 x double> [[_1]]) +-// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvftint_l_d(v4f64 _1) { return __lasx_xvftint_l_d(_1); } +-// CHECK-LABEL: @xvftint_wu_s( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftint.wu.s(<8 x float> [[_1]]) +-// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8u32 xvftint_wu_s(v8f32 _1) { return __lasx_xvftint_wu_s(_1); } +-// CHECK-LABEL: @xvftint_lu_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftint.lu.d(<4 x double> [[_1]]) +-// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4u64 xvftint_lu_d(v4f64 _1) { return __lasx_xvftint_lu_d(_1); } +-// CHECK-LABEL: @xvftintrz_w_s( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrz.w.s(<8 x float> [[_1]]) +-// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8i32 xvftintrz_w_s(v8f32 _1) { return __lasx_xvftintrz_w_s(_1); } +-// CHECK-LABEL: @xvftintrz_l_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrz.l.d(<4 x double> [[_1]]) +-// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvftintrz_l_d(v4f64 _1) { return __lasx_xvftintrz_l_d(_1); } +-// CHECK-LABEL: @xvftintrz_wu_s( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrz.wu.s(<8 x float> [[_1]]) +-// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8u32 xvftintrz_wu_s(v8f32 _1) { return __lasx_xvftintrz_wu_s(_1); } +-// CHECK-LABEL: @xvftintrz_lu_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrz.lu.d(<4 x double> [[_1]]) +-// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4u64 xvftintrz_lu_d(v4f64 _1) { return __lasx_xvftintrz_lu_d(_1); } +-// CHECK-LABEL: @xvffint_s_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_112:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvffint.s.w(<8 x i32> [[_112]]) +-// CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8f32 xvffint_s_w(v8i32 _1) { return __lasx_xvffint_s_w(_1); } +-// CHECK-LABEL: @xvffint_d_l( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvffint.d.l(<4 x i64> [[_1]]) +-// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4f64 xvffint_d_l(v4i64 _1) { return __lasx_xvffint_d_l(_1); } +-// CHECK-LABEL: @xvffint_s_wu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_112:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvffint.s.wu(<8 x i32> [[_112]]) +-// CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8f32 xvffint_s_wu(v8u32 _1) { return __lasx_xvffint_s_wu(_1); } +-// CHECK-LABEL: @xvffint_d_lu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvffint.d.lu(<4 x i64> [[_1]]) +-// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4f64 xvffint_d_lu(v4u64 _1) { return __lasx_xvffint_d_lu(_1); } +-// CHECK-LABEL: @xvreplve_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_112:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvreplve.b(<32 x i8> [[_112]], i32 [[_2:%.*]]) +-// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v32i8 xvreplve_b(v32i8 _1, int _2) { return __lasx_xvreplve_b(_1, _2); } +-// CHECK-LABEL: @xvreplve_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_112:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvreplve.h(<16 x i16> [[_112]], i32 [[_2:%.*]]) +-// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v16i16 xvreplve_h(v16i16 _1, int _2) { return __lasx_xvreplve_h(_1, _2); } +-// CHECK-LABEL: @xvreplve_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_112:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvreplve.w(<8 x i32> [[_112]], i32 [[_2:%.*]]) +-// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8i32 xvreplve_w(v8i32 _1, int _2) { return __lasx_xvreplve_w(_1, _2); } +-// CHECK-LABEL: @xvreplve_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvreplve.d(<4 x i64> [[_1]], i32 [[_2:%.*]]) +-// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvreplve_d(v4i64 _1, int _2) { return __lasx_xvreplve_d(_1, _2); } +-// CHECK-LABEL: @xvpermi_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvpermi.w(<8 x i32> [[_1]], <8 x i32> [[_2]], i32 1) +-// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8i32 xvpermi_w(v8i32 _1, v8i32 _2) { return __lasx_xvpermi_w(_1, _2, 1); } +-// CHECK-LABEL: @xvandn_v( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvandn.v(<32 x i8> [[_124]], <32 x i8> [[_235]]) +-// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v32u8 xvandn_v(v32u8 _1, v32u8 _2) { return __lasx_xvandn_v(_1, _2); } +-// CHECK-LABEL: @xvneg_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_112:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvneg.b(<32 x i8> [[_112]]) +-// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v32i8 xvneg_b(v32i8 _1) { return __lasx_xvneg_b(_1); } +-// CHECK-LABEL: @xvneg_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_112:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvneg.h(<16 x i16> [[_112]]) +-// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v16i16 xvneg_h(v16i16 _1) { return __lasx_xvneg_h(_1); } +-// CHECK-LABEL: @xvneg_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_112:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvneg.w(<8 x i32> [[_112]]) +-// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8i32 xvneg_w(v8i32 _1) { return __lasx_xvneg_w(_1); } +-// CHECK-LABEL: @xvneg_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvneg.d(<4 x i64> [[_1]]) +-// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvneg_d(v4i64 _1) { return __lasx_xvneg_d(_1); } +-// CHECK-LABEL: @xvmuh_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmuh.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) +-// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v32i8 xvmuh_b(v32i8 _1, v32i8 _2) { return __lasx_xvmuh_b(_1, _2); } +-// CHECK-LABEL: @xvmuh_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmuh.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) +-// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v16i16 xvmuh_h(v16i16 _1, v16i16 _2) { return __lasx_xvmuh_h(_1, _2); } +-// CHECK-LABEL: @xvmuh_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmuh.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) +-// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8i32 xvmuh_w(v8i32 _1, v8i32 _2) { return __lasx_xvmuh_w(_1, _2); } +-// CHECK-LABEL: @xvmuh_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmuh.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +-// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvmuh_d(v4i64 _1, v4i64 _2) { return __lasx_xvmuh_d(_1, _2); } +-// CHECK-LABEL: @xvmuh_bu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmuh.bu(<32 x i8> [[_124]], <32 x i8> [[_235]]) +-// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v32u8 xvmuh_bu(v32u8 _1, v32u8 _2) { return __lasx_xvmuh_bu(_1, _2); } +-// CHECK-LABEL: @xvmuh_hu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmuh.hu(<16 x i16> [[_124]], <16 x i16> [[_235]]) +-// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v16u16 xvmuh_hu(v16u16 _1, v16u16 _2) { return __lasx_xvmuh_hu(_1, _2); } +-// CHECK-LABEL: @xvmuh_wu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmuh.wu(<8 x i32> [[_124]], <8 x i32> [[_235]]) +-// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8u32 xvmuh_wu(v8u32 _1, v8u32 _2) { return __lasx_xvmuh_wu(_1, _2); } +-// CHECK-LABEL: @xvmuh_du( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmuh.du(<4 x i64> [[_1]], <4 x i64> [[_2]]) +-// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4u64 xvmuh_du(v4u64 _1, v4u64 _2) { return __lasx_xvmuh_du(_1, _2); } +-// CHECK-LABEL: @xvsllwil_h_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsllwil.h.b(<32 x i8> [[_1]], i32 1) +-// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v16i16 xvsllwil_h_b(v32i8 _1) { return __lasx_xvsllwil_h_b(_1, 1); } +-// CHECK-LABEL: @xvsllwil_w_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsllwil.w.h(<16 x i16> [[_1]], i32 1) +-// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8i32 xvsllwil_w_h(v16i16 _1) { return __lasx_xvsllwil_w_h(_1, 1); } +-// CHECK-LABEL: @xvsllwil_d_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsllwil.d.w(<8 x i32> [[_1]], i32 1) +-// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvsllwil_d_w(v8i32 _1) { return __lasx_xvsllwil_d_w(_1, 1); } +-// CHECK-LABEL: @xvsllwil_hu_bu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsllwil.hu.bu(<32 x i8> [[_1]], i32 1) +-// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v16u16 xvsllwil_hu_bu(v32u8 _1) { return __lasx_xvsllwil_hu_bu(_1, 1); } +-// CHECK-LABEL: @xvsllwil_wu_hu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsllwil.wu.hu(<16 x i16> [[_1]], i32 1) +-// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8u32 xvsllwil_wu_hu(v16u16 _1) { return __lasx_xvsllwil_wu_hu(_1, 1); } +-// CHECK-LABEL: @xvsllwil_du_wu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsllwil.du.wu(<8 x i32> [[_1]], i32 1) +-// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4u64 xvsllwil_du_wu(v8u32 _1) { return __lasx_xvsllwil_du_wu(_1, 1); } +-// CHECK-LABEL: @xvsran_b_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsran.b.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) +-// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v32i8 xvsran_b_h(v16i16 _1, v16i16 _2) { return __lasx_xvsran_b_h(_1, _2); } +-// CHECK-LABEL: @xvsran_h_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsran.h.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) +-// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v16i16 xvsran_h_w(v8i32 _1, v8i32 _2) { return __lasx_xvsran_h_w(_1, _2); } +-// CHECK-LABEL: @xvsran_w_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsran.w.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +-// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8i32 xvsran_w_d(v4i64 _1, v4i64 _2) { return __lasx_xvsran_w_d(_1, _2); } +-// CHECK-LABEL: @xvssran_b_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssran.b.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) +-// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v32i8 xvssran_b_h(v16i16 _1, v16i16 _2) { return __lasx_xvssran_b_h(_1, _2); } +-// CHECK-LABEL: @xvssran_h_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssran.h.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) +-// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v16i16 xvssran_h_w(v8i32 _1, v8i32 _2) { return __lasx_xvssran_h_w(_1, _2); } +-// CHECK-LABEL: @xvssran_w_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssran.w.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +-// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8i32 xvssran_w_d(v4i64 _1, v4i64 _2) { return __lasx_xvssran_w_d(_1, _2); } +-// CHECK-LABEL: @xvssran_bu_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssran.bu.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) +-// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v32u8 xvssran_bu_h(v16u16 _1, v16u16 _2) { return __lasx_xvssran_bu_h(_1, _2); } +-// CHECK-LABEL: @xvssran_hu_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssran.hu.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) +-// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v16u16 xvssran_hu_w(v8u32 _1, v8u32 _2) { return __lasx_xvssran_hu_w(_1, _2); } +-// CHECK-LABEL: @xvssran_wu_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssran.wu.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +-// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8u32 xvssran_wu_d(v4u64 _1, v4u64 _2) { return __lasx_xvssran_wu_d(_1, _2); } +-// CHECK-LABEL: @xvsrarn_b_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrarn.b.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) +-// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v32i8 xvsrarn_b_h(v16i16 _1, v16i16 _2) { return __lasx_xvsrarn_b_h(_1, _2); } +-// CHECK-LABEL: @xvsrarn_h_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrarn.h.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) +-// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v16i16 xvsrarn_h_w(v8i32 _1, v8i32 _2) { return __lasx_xvsrarn_h_w(_1, _2); } +-// CHECK-LABEL: @xvsrarn_w_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrarn.w.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +-// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8i32 xvsrarn_w_d(v4i64 _1, v4i64 _2) { return __lasx_xvsrarn_w_d(_1, _2); } +-// CHECK-LABEL: @xvssrarn_b_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrarn.b.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) +-// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v32i8 xvssrarn_b_h(v16i16 _1, v16i16 _2) { return __lasx_xvssrarn_b_h(_1, _2); } +-// CHECK-LABEL: @xvssrarn_h_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrarn.h.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) +-// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v16i16 xvssrarn_h_w(v8i32 _1, v8i32 _2) { return __lasx_xvssrarn_h_w(_1, _2); } +-// CHECK-LABEL: @xvssrarn_w_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrarn.w.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +-// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8i32 xvssrarn_w_d(v4i64 _1, v4i64 _2) { return __lasx_xvssrarn_w_d(_1, _2); } +-// CHECK-LABEL: @xvssrarn_bu_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrarn.bu.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) +-// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v32u8 xvssrarn_bu_h(v16u16 _1, v16u16 _2) { return __lasx_xvssrarn_bu_h(_1, _2); } +-// CHECK-LABEL: @xvssrarn_hu_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrarn.hu.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) +-// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v16u16 xvssrarn_hu_w(v8u32 _1, v8u32 _2) { return __lasx_xvssrarn_hu_w(_1, _2); } +-// CHECK-LABEL: @xvssrarn_wu_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrarn.wu.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +-// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8u32 xvssrarn_wu_d(v4u64 _1, v4u64 _2) { return __lasx_xvssrarn_wu_d(_1, _2); } +-// CHECK-LABEL: @xvsrln_b_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrln.b.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) +-// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v32i8 xvsrln_b_h(v16i16 _1, v16i16 _2) { return __lasx_xvsrln_b_h(_1, _2); } +-// CHECK-LABEL: @xvsrln_h_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrln.h.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) +-// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v16i16 xvsrln_h_w(v8i32 _1, v8i32 _2) { return __lasx_xvsrln_h_w(_1, _2); } +-// CHECK-LABEL: @xvsrln_w_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrln.w.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +-// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8i32 xvsrln_w_d(v4i64 _1, v4i64 _2) { return __lasx_xvsrln_w_d(_1, _2); } +-// CHECK-LABEL: @xvssrln_bu_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrln.bu.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) +-// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v32u8 xvssrln_bu_h(v16u16 _1, v16u16 _2) { return __lasx_xvssrln_bu_h(_1, _2); } +-// CHECK-LABEL: @xvssrln_hu_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrln.hu.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) +-// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v16u16 xvssrln_hu_w(v8u32 _1, v8u32 _2) { return __lasx_xvssrln_hu_w(_1, _2); } +-// CHECK-LABEL: @xvssrln_wu_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrln.wu.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +-// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8u32 xvssrln_wu_d(v4u64 _1, v4u64 _2) { return __lasx_xvssrln_wu_d(_1, _2); } +-// CHECK-LABEL: @xvsrlrn_b_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrlrn.b.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) +-// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v32i8 xvsrlrn_b_h(v16i16 _1, v16i16 _2) { return __lasx_xvsrlrn_b_h(_1, _2); } +-// CHECK-LABEL: @xvsrlrn_h_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrlrn.h.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) +-// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v16i16 xvsrlrn_h_w(v8i32 _1, v8i32 _2) { return __lasx_xvsrlrn_h_w(_1, _2); } +-// CHECK-LABEL: @xvsrlrn_w_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrlrn.w.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +-// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8i32 xvsrlrn_w_d(v4i64 _1, v4i64 _2) { return __lasx_xvsrlrn_w_d(_1, _2); } +-// CHECK-LABEL: @xvssrlrn_bu_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrlrn.bu.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) +-// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v32u8 xvssrlrn_bu_h(v16u16 _1, v16u16 _2) { return __lasx_xvssrlrn_bu_h(_1, _2); } +-// CHECK-LABEL: @xvssrlrn_hu_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrlrn.hu.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) +-// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v16u16 xvssrlrn_hu_w(v8u32 _1, v8u32 _2) { return __lasx_xvssrlrn_hu_w(_1, _2); } +-// CHECK-LABEL: @xvssrlrn_wu_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrlrn.wu.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +-// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8u32 xvssrlrn_wu_d(v4u64 _1, v4u64 _2) { return __lasx_xvssrlrn_wu_d(_1, _2); } +-// CHECK-LABEL: @xvfrstpi_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvfrstpi.b(<32 x i8> [[_1]], <32 x i8> [[_2]], i32 1) +-// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v32i8 xvfrstpi_b(v32i8 _1, v32i8 _2) { return __lasx_xvfrstpi_b(_1, _2, 1); } +-// CHECK-LABEL: @xvfrstpi_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvfrstpi.h(<16 x i16> [[_1]], <16 x i16> [[_2]], i32 1) +-// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v16i16 xvfrstpi_h(v16i16 _1, v16i16 _2) { return __lasx_xvfrstpi_h(_1, _2, 1); } +-// CHECK-LABEL: @xvfrstp_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_136:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_247:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_358:%.*]] = load <32 x i8>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP3:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvfrstp.b(<32 x i8> [[_136]], <32 x i8> [[_247]], <32 x i8> [[_358]]) +-// CHECK-NEXT: store <32 x i8> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v32i8 xvfrstp_b(v32i8 _1, v32i8 _2, v32i8 _3) { return __lasx_xvfrstp_b(_1, _2, _3); } +-// CHECK-LABEL: @xvfrstp_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_136:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_247:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_358:%.*]] = load <16 x i16>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP3:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvfrstp.h(<16 x i16> [[_136]], <16 x i16> [[_247]], <16 x i16> [[_358]]) +-// CHECK-NEXT: store <16 x i16> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v16i16 xvfrstp_h(v16i16 _1, v16i16 _2, v16i16 _3) { return __lasx_xvfrstp_h(_1, _2, _3); } +-// CHECK-LABEL: @xvshuf4i_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvshuf4i.d(<4 x i64> [[_1]], <4 x i64> [[_2]], i32 1) +-// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvshuf4i_d(v4i64 _1, v4i64 _2) { return __lasx_xvshuf4i_d(_1, _2, 1); } +-// CHECK-LABEL: @xvbsrl_v( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbsrl.v(<32 x i8> [[_1]], i32 1) +-// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v32i8 xvbsrl_v(v32i8 _1) { return __lasx_xvbsrl_v(_1, 1); } +-// CHECK-LABEL: @xvbsll_v( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbsll.v(<32 x i8> [[_1]], i32 1) +-// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v32i8 xvbsll_v(v32i8 _1) { return __lasx_xvbsll_v(_1, 1); } +-// CHECK-LABEL: @xvextrins_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvextrins.b(<32 x i8> [[_1]], <32 x i8> [[_2]], i32 1) +-// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v32i8 xvextrins_b(v32i8 _1, v32i8 _2) { return __lasx_xvextrins_b(_1, _2, 1); } +-// CHECK-LABEL: @xvextrins_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvextrins.h(<16 x i16> [[_1]], <16 x i16> [[_2]], i32 1) +-// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v16i16 xvextrins_h(v16i16 _1, v16i16 _2) { return __lasx_xvextrins_h(_1, _2, 1); } +-// CHECK-LABEL: @xvextrins_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvextrins.w(<8 x i32> [[_1]], <8 x i32> [[_2]], i32 1) +-// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8i32 xvextrins_w(v8i32 _1, v8i32 _2) { return __lasx_xvextrins_w(_1, _2, 1); } +-// CHECK-LABEL: @xvextrins_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvextrins.d(<4 x i64> [[_1]], <4 x i64> [[_2]], i32 1) +-// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvextrins_d(v4i64 _1, v4i64 _2) { return __lasx_xvextrins_d(_1, _2, 1); } +-// CHECK-LABEL: @xvmskltz_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_112:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmskltz.b(<32 x i8> [[_112]]) +-// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v32i8 xvmskltz_b(v32i8 _1) { return __lasx_xvmskltz_b(_1); } +-// CHECK-LABEL: @xvmskltz_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_112:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmskltz.h(<16 x i16> [[_112]]) +-// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v16i16 xvmskltz_h(v16i16 _1) { return __lasx_xvmskltz_h(_1); } +-// CHECK-LABEL: @xvmskltz_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_112:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmskltz.w(<8 x i32> [[_112]]) +-// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8i32 xvmskltz_w(v8i32 _1) { return __lasx_xvmskltz_w(_1); } +-// CHECK-LABEL: @xvmskltz_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmskltz.d(<4 x i64> [[_1]]) +-// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvmskltz_d(v4i64 _1) { return __lasx_xvmskltz_d(_1); } +-// CHECK-LABEL: @xvsigncov_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsigncov.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) +-// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v32i8 xvsigncov_b(v32i8 _1, v32i8 _2) { return __lasx_xvsigncov_b(_1, _2); } +-// CHECK-LABEL: @xvsigncov_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsigncov.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) +-// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v16i16 xvsigncov_h(v16i16 _1, v16i16 _2) { return __lasx_xvsigncov_h(_1, _2); } +-// CHECK-LABEL: @xvsigncov_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsigncov.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) +-// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8i32 xvsigncov_w(v8i32 _1, v8i32 _2) { return __lasx_xvsigncov_w(_1, _2); } +-// CHECK-LABEL: @xvsigncov_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsigncov.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +-// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvsigncov_d(v4i64 _1, v4i64 _2) { return __lasx_xvsigncov_d(_1, _2); } +-// CHECK-LABEL: @xvfmadd_s( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_3:%.*]] = load <8 x float>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfmadd.s(<8 x float> [[_1]], <8 x float> [[_2]], <8 x float> [[_3]]) +-// CHECK-NEXT: store <8 x float> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8f32 xvfmadd_s(v8f32 _1, v8f32 _2, v8f32 _3) { return __lasx_xvfmadd_s(_1, _2, _3); } +-// CHECK-LABEL: @xvfmadd_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_3:%.*]] = load <4 x double>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfmadd.d(<4 x double> [[_1]], <4 x double> [[_2]], <4 x double> [[_3]]) +-// CHECK-NEXT: store <4 x double> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4f64 xvfmadd_d(v4f64 _1, v4f64 _2, v4f64 _3) { return __lasx_xvfmadd_d(_1, _2, _3); } +-// CHECK-LABEL: @xvfmsub_s( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_3:%.*]] = load <8 x float>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfmsub.s(<8 x float> [[_1]], <8 x float> [[_2]], <8 x float> [[_3]]) +-// CHECK-NEXT: store <8 x float> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8f32 xvfmsub_s(v8f32 _1, v8f32 _2, v8f32 _3) { return __lasx_xvfmsub_s(_1, _2, _3); } +-// CHECK-LABEL: @xvfmsub_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_3:%.*]] = load <4 x double>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfmsub.d(<4 x double> [[_1]], <4 x double> [[_2]], <4 x double> [[_3]]) +-// CHECK-NEXT: store <4 x double> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4f64 xvfmsub_d(v4f64 _1, v4f64 _2, v4f64 _3) { return __lasx_xvfmsub_d(_1, _2, _3); } +-// CHECK-LABEL: @xvfnmadd_s( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_3:%.*]] = load <8 x float>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfnmadd.s(<8 x float> [[_1]], <8 x float> [[_2]], <8 x float> [[_3]]) +-// CHECK-NEXT: store <8 x float> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8f32 xvfnmadd_s(v8f32 _1, v8f32 _2, v8f32 _3) { return __lasx_xvfnmadd_s(_1, _2, _3); } +-// CHECK-LABEL: @xvfnmadd_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_3:%.*]] = load <4 x double>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfnmadd.d(<4 x double> [[_1]], <4 x double> [[_2]], <4 x double> [[_3]]) +-// CHECK-NEXT: store <4 x double> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4f64 xvfnmadd_d(v4f64 _1, v4f64 _2, v4f64 _3) { return __lasx_xvfnmadd_d(_1, _2, _3); } +-// CHECK-LABEL: @xvfnmsub_s( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_3:%.*]] = load <8 x float>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfnmsub.s(<8 x float> [[_1]], <8 x float> [[_2]], <8 x float> [[_3]]) +-// CHECK-NEXT: store <8 x float> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8f32 xvfnmsub_s(v8f32 _1, v8f32 _2, v8f32 _3) { return __lasx_xvfnmsub_s(_1, _2, _3); } +-// CHECK-LABEL: @xvfnmsub_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_3:%.*]] = load <4 x double>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfnmsub.d(<4 x double> [[_1]], <4 x double> [[_2]], <4 x double> [[_3]]) +-// CHECK-NEXT: store <4 x double> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4f64 xvfnmsub_d(v4f64 _1, v4f64 _2, v4f64 _3) { return __lasx_xvfnmsub_d(_1, _2, _3); } +-// CHECK-LABEL: @xvftintrne_w_s( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrne.w.s(<8 x float> [[_1]]) +-// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8i32 xvftintrne_w_s(v8f32 _1) { return __lasx_xvftintrne_w_s(_1); } +-// CHECK-LABEL: @xvftintrne_l_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrne.l.d(<4 x double> [[_1]]) +-// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvftintrne_l_d(v4f64 _1) { return __lasx_xvftintrne_l_d(_1); } +-// CHECK-LABEL: @xvftintrp_w_s( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrp.w.s(<8 x float> [[_1]]) +-// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8i32 xvftintrp_w_s(v8f32 _1) { return __lasx_xvftintrp_w_s(_1); } +-// CHECK-LABEL: @xvftintrp_l_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrp.l.d(<4 x double> [[_1]]) +-// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvftintrp_l_d(v4f64 _1) { return __lasx_xvftintrp_l_d(_1); } +-// CHECK-LABEL: @xvftintrm_w_s( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrm.w.s(<8 x float> [[_1]]) +-// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8i32 xvftintrm_w_s(v8f32 _1) { return __lasx_xvftintrm_w_s(_1); } +-// CHECK-LABEL: @xvftintrm_l_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrm.l.d(<4 x double> [[_1]]) +-// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvftintrm_l_d(v4f64 _1) { return __lasx_xvftintrm_l_d(_1); } +-// CHECK-LABEL: @xvftint_w_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftint.w.d(<4 x double> [[_1]], <4 x double> [[_2]]) +-// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8i32 xvftint_w_d(v4f64 _1, v4f64 _2) { return __lasx_xvftint_w_d(_1, _2); } +-// CHECK-LABEL: @xvffint_s_l( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvffint.s.l(<4 x i64> [[_1]], <4 x i64> [[_2]]) +-// CHECK-NEXT: store <8 x float> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8f32 xvffint_s_l(v4i64 _1, v4i64 _2) { return __lasx_xvffint_s_l(_1, _2); } +-// CHECK-LABEL: @xvftintrz_w_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrz.w.d(<4 x double> [[_1]], <4 x double> [[_2]]) +-// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8i32 xvftintrz_w_d(v4f64 _1, v4f64 _2) { return __lasx_xvftintrz_w_d(_1, _2); } +-// CHECK-LABEL: @xvftintrp_w_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrp.w.d(<4 x double> [[_1]], <4 x double> [[_2]]) +-// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8i32 xvftintrp_w_d(v4f64 _1, v4f64 _2) { return __lasx_xvftintrp_w_d(_1, _2); } +-// CHECK-LABEL: @xvftintrm_w_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrm.w.d(<4 x double> [[_1]], <4 x double> [[_2]]) +-// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8i32 xvftintrm_w_d(v4f64 _1, v4f64 _2) { return __lasx_xvftintrm_w_d(_1, _2); } +-// CHECK-LABEL: @xvftintrne_w_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrne.w.d(<4 x double> [[_1]], <4 x double> [[_2]]) +-// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8i32 xvftintrne_w_d(v4f64 _1, v4f64 _2) { return __lasx_xvftintrne_w_d(_1, _2); } +-// CHECK-LABEL: @xvftinth_l_s( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftinth.l.s(<8 x float> [[_1]]) +-// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvftinth_l_s(v8f32 _1) { return __lasx_xvftinth_l_s(_1); } +-// CHECK-LABEL: @xvftintl_l_s( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintl.l.s(<8 x float> [[_1]]) +-// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvftintl_l_s(v8f32 _1) { return __lasx_xvftintl_l_s(_1); } +-// CHECK-LABEL: @xvffinth_d_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_112:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvffinth.d.w(<8 x i32> [[_112]]) +-// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4f64 xvffinth_d_w(v8i32 _1) { return __lasx_xvffinth_d_w(_1); } +-// CHECK-LABEL: @xvffintl_d_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_112:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvffintl.d.w(<8 x i32> [[_112]]) +-// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4f64 xvffintl_d_w(v8i32 _1) { return __lasx_xvffintl_d_w(_1); } +-// CHECK-LABEL: @xvftintrzh_l_s( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrzh.l.s(<8 x float> [[_1]]) +-// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvftintrzh_l_s(v8f32 _1) { return __lasx_xvftintrzh_l_s(_1); } +-// CHECK-LABEL: @xvftintrzl_l_s( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrzl.l.s(<8 x float> [[_1]]) +-// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvftintrzl_l_s(v8f32 _1) { return __lasx_xvftintrzl_l_s(_1); } +-// CHECK-LABEL: @xvftintrph_l_s( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrph.l.s(<8 x float> [[_1]]) +-// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvftintrph_l_s(v8f32 _1) { return __lasx_xvftintrph_l_s(_1); } +-// CHECK-LABEL: @xvftintrpl_l_s( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrpl.l.s(<8 x float> [[_1]]) +-// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvftintrpl_l_s(v8f32 _1) { return __lasx_xvftintrpl_l_s(_1); } +-// CHECK-LABEL: @xvftintrmh_l_s( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrmh.l.s(<8 x float> [[_1]]) +-// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvftintrmh_l_s(v8f32 _1) { return __lasx_xvftintrmh_l_s(_1); } +-// CHECK-LABEL: @xvftintrml_l_s( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrml.l.s(<8 x float> [[_1]]) +-// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvftintrml_l_s(v8f32 _1) { return __lasx_xvftintrml_l_s(_1); } +-// CHECK-LABEL: @xvftintrneh_l_s( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrneh.l.s(<8 x float> [[_1]]) +-// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvftintrneh_l_s(v8f32 _1) { return __lasx_xvftintrneh_l_s(_1); } +-// CHECK-LABEL: @xvftintrnel_l_s( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrnel.l.s(<8 x float> [[_1]]) +-// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvftintrnel_l_s(v8f32 _1) { return __lasx_xvftintrnel_l_s(_1); } +-// CHECK-LABEL: @xvfrintrne_s( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfrintrne.s(<8 x float> [[_1]]) +-// CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8i32 xvfrintrne_s(v8f32 _1) { return __lasx_xvfrintrne_s(_1); } +-// CHECK-LABEL: @xvfrintrne_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfrintrne.d(<4 x double> [[_1]]) +-// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvfrintrne_d(v4f64 _1) { return __lasx_xvfrintrne_d(_1); } +-// CHECK-LABEL: @xvfrintrz_s( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfrintrz.s(<8 x float> [[_1]]) +-// CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8i32 xvfrintrz_s(v8f32 _1) { return __lasx_xvfrintrz_s(_1); } +-// CHECK-LABEL: @xvfrintrz_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfrintrz.d(<4 x double> [[_1]]) +-// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvfrintrz_d(v4f64 _1) { return __lasx_xvfrintrz_d(_1); } +-// CHECK-LABEL: @xvfrintrp_s( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfrintrp.s(<8 x float> [[_1]]) +-// CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8i32 xvfrintrp_s(v8f32 _1) { return __lasx_xvfrintrp_s(_1); } +-// CHECK-LABEL: @xvfrintrp_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfrintrp.d(<4 x double> [[_1]]) +-// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvfrintrp_d(v4f64 _1) { return __lasx_xvfrintrp_d(_1); } +-// CHECK-LABEL: @xvfrintrm_s( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfrintrm.s(<8 x float> [[_1]]) +-// CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8i32 xvfrintrm_s(v8f32 _1) { return __lasx_xvfrintrm_s(_1); } +-// CHECK-LABEL: @xvfrintrm_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfrintrm.d(<4 x double> [[_1]]) +-// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvfrintrm_d(v4f64 _1) { return __lasx_xvfrintrm_d(_1); } +-// CHECK-LABEL: @xvld( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvld(ptr [[_1:%.*]], i32 1) +-// CHECK-NEXT: store <32 x i8> [[TMP0]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v32i8 xvld(void * _1) { return __lasx_xvld(_1, 1); } +-// CHECK-LABEL: @xvst( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: tail call void @llvm.loongarch.lasx.xvst(<32 x i8> [[_1]], ptr [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret void +-// +-void xvst(v32i8 _1, void * _2) { return __lasx_xvst(_1, _2, 1); } +-// CHECK-LABEL: @xvstelm_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: tail call void @llvm.loongarch.lasx.xvstelm.b(<32 x i8> [[_1]], ptr [[_2:%.*]], i32 1, i32 1) +-// CHECK-NEXT: ret void +-// +-void xvstelm_b(v32i8 _1, void * _2) { return __lasx_xvstelm_b(_1, _2, 1, 1); } +-// CHECK-LABEL: @xvstelm_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: tail call void @llvm.loongarch.lasx.xvstelm.h(<16 x i16> [[_1]], ptr [[_2:%.*]], i32 2, i32 1) +-// CHECK-NEXT: ret void +-// +-void xvstelm_h(v16i16 _1, void * _2) { return __lasx_xvstelm_h(_1, _2, 2, 1); } +-// CHECK-LABEL: @xvstelm_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: tail call void @llvm.loongarch.lasx.xvstelm.w(<8 x i32> [[_1]], ptr [[_2:%.*]], i32 4, i32 1) +-// CHECK-NEXT: ret void +-// +-void xvstelm_w(v8i32 _1, void * _2) { return __lasx_xvstelm_w(_1, _2, 4, 1); } +-// CHECK-LABEL: @xvstelm_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: tail call void @llvm.loongarch.lasx.xvstelm.d(<4 x i64> [[_1]], ptr [[_2:%.*]], i32 8, i32 1) +-// CHECK-NEXT: ret void +-// +-void xvstelm_d(v4i64 _1, void * _2) { return __lasx_xvstelm_d(_1, _2, 8, 1); } +-// CHECK-LABEL: @xvinsve0_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvinsve0.w(<8 x i32> [[_1]], <8 x i32> [[_2]], i32 1) +-// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8i32 xvinsve0_w(v8i32 _1, v8i32 _2) { return __lasx_xvinsve0_w(_1, _2, 1); } +-// CHECK-LABEL: @xvinsve0_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvinsve0.d(<4 x i64> [[_1]], <4 x i64> [[_2]], i32 1) +-// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvinsve0_d(v4i64 _1, v4i64 _2) { return __lasx_xvinsve0_d(_1, _2, 1); } +-// CHECK-LABEL: @xvpickve_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvpickve.w(<8 x i32> [[_1]], i32 1) +-// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8i32 xvpickve_w(v8i32 _1) { return __lasx_xvpickve_w(_1, 1); } +-// CHECK-LABEL: @xvpickve_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvpickve.d(<4 x i64> [[_1]], i32 1) +-// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvpickve_d(v4i64 _1) { return __lasx_xvpickve_d(_1, 1); } +-// CHECK-LABEL: @xvssrlrn_b_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrlrn.b.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) +-// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v32i8 xvssrlrn_b_h(v16i16 _1, v16i16 _2) { return __lasx_xvssrlrn_b_h(_1, _2); } +-// CHECK-LABEL: @xvssrlrn_h_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrlrn.h.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) +-// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v16i16 xvssrlrn_h_w(v8i32 _1, v8i32 _2) { return __lasx_xvssrlrn_h_w(_1, _2); } +-// CHECK-LABEL: @xvssrlrn_w_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrlrn.w.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +-// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8i32 xvssrlrn_w_d(v4i64 _1, v4i64 _2) { return __lasx_xvssrlrn_w_d(_1, _2); } +-// CHECK-LABEL: @xvssrln_b_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrln.b.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) +-// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v32i8 xvssrln_b_h(v16i16 _1, v16i16 _2) { return __lasx_xvssrln_b_h(_1, _2); } +-// CHECK-LABEL: @xvssrln_h_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrln.h.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) +-// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v16i16 xvssrln_h_w(v8i32 _1, v8i32 _2) { return __lasx_xvssrln_h_w(_1, _2); } +-// CHECK-LABEL: @xvssrln_w_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrln.w.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +-// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8i32 xvssrln_w_d(v4i64 _1, v4i64 _2) { return __lasx_xvssrln_w_d(_1, _2); } +-// CHECK-LABEL: @xvorn_v( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvorn.v(<32 x i8> [[_124]], <32 x i8> [[_235]]) +-// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v32i8 xvorn_v(v32i8 _1, v32i8 _2) { return __lasx_xvorn_v(_1, _2); } +-// CHECK-LABEL: @xvldi( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvldi(i32 1) +-// CHECK-NEXT: store <4 x i64> [[TMP0]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvldi() { return __lasx_xvldi(1); } +-// CHECK-LABEL: @xvldx( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvldx(ptr [[_1:%.*]], i64 1), !noalias [[META5:![0-9]+]] +-// CHECK-NEXT: store <32 x i8> [[TMP0]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v32i8 xvldx(void * _1) { return __lasx_xvldx(_1, 1); } +-// CHECK-LABEL: @xvstx( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_112:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: tail call void @llvm.loongarch.lasx.xvstx(<32 x i8> [[_112]], ptr [[_2:%.*]], i64 1) +-// CHECK-NEXT: ret void +-// +-void xvstx(v32i8 _1, void * _2) { return __lasx_xvstx(_1, _2, 1); } +-// CHECK-LABEL: @xvextl_qu_du( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvextl.qu.du(<4 x i64> [[_1]]) +-// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4u64 xvextl_qu_du(v4u64 _1) { return __lasx_xvextl_qu_du(_1); } +-// CHECK-LABEL: @xvinsgr2vr_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvinsgr2vr.w(<8 x i32> [[_1]], i32 1, i32 1) +-// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8i32 xvinsgr2vr_w(v8i32 _1) { return __lasx_xvinsgr2vr_w(_1, 1, 1); } +-// CHECK-LABEL: @xvinsgr2vr_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvinsgr2vr.d(<4 x i64> [[_1]], i64 1, i32 1) +-// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvinsgr2vr_d(v4i64 _1) { return __lasx_xvinsgr2vr_d(_1, 1, 1); } +-// CHECK-LABEL: @xvreplve0_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_112:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvreplve0.b(<32 x i8> [[_112]]) +-// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v32i8 xvreplve0_b(v32i8 _1) { return __lasx_xvreplve0_b(_1); } +-// CHECK-LABEL: @xvreplve0_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_112:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvreplve0.h(<16 x i16> [[_112]]) +-// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v16i16 xvreplve0_h(v16i16 _1) { return __lasx_xvreplve0_h(_1); } +-// CHECK-LABEL: @xvreplve0_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_112:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvreplve0.w(<8 x i32> [[_112]]) +-// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8i32 xvreplve0_w(v8i32 _1) { return __lasx_xvreplve0_w(_1); } +-// CHECK-LABEL: @xvreplve0_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvreplve0.d(<4 x i64> [[_1]]) +-// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvreplve0_d(v4i64 _1) { return __lasx_xvreplve0_d(_1); } +-// CHECK-LABEL: @xvreplve0_q( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_112:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvreplve0.q(<32 x i8> [[_112]]) +-// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v32i8 xvreplve0_q(v32i8 _1) { return __lasx_xvreplve0_q(_1); } +-// CHECK-LABEL: @vext2xv_h_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_112:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.vext2xv.h.b(<32 x i8> [[_112]]) +-// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v16i16 vext2xv_h_b(v32i8 _1) { return __lasx_vext2xv_h_b(_1); } +-// CHECK-LABEL: @vext2xv_w_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_112:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.vext2xv.w.h(<16 x i16> [[_112]]) +-// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8i32 vext2xv_w_h(v16i16 _1) { return __lasx_vext2xv_w_h(_1); } +-// CHECK-LABEL: @vext2xv_d_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_112:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.vext2xv.d.w(<8 x i32> [[_112]]) +-// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 vext2xv_d_w(v8i32 _1) { return __lasx_vext2xv_d_w(_1); } +-// CHECK-LABEL: @vext2xv_w_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_112:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.vext2xv.w.b(<32 x i8> [[_112]]) +-// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8i32 vext2xv_w_b(v32i8 _1) { return __lasx_vext2xv_w_b(_1); } +-// CHECK-LABEL: @vext2xv_d_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_112:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.vext2xv.d.h(<16 x i16> [[_112]]) +-// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 vext2xv_d_h(v16i16 _1) { return __lasx_vext2xv_d_h(_1); } +-// CHECK-LABEL: @vext2xv_d_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_112:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.vext2xv.d.b(<32 x i8> [[_112]]) +-// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 vext2xv_d_b(v32i8 _1) { return __lasx_vext2xv_d_b(_1); } +-// CHECK-LABEL: @vext2xv_hu_bu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_112:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.vext2xv.hu.bu(<32 x i8> [[_112]]) +-// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v16i16 vext2xv_hu_bu(v32i8 _1) { return __lasx_vext2xv_hu_bu(_1); } +-// CHECK-LABEL: @vext2xv_wu_hu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_112:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.vext2xv.wu.hu(<16 x i16> [[_112]]) +-// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8i32 vext2xv_wu_hu(v16i16 _1) { return __lasx_vext2xv_wu_hu(_1); } +-// CHECK-LABEL: @vext2xv_du_wu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_112:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.vext2xv.du.wu(<8 x i32> [[_112]]) +-// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 vext2xv_du_wu(v8i32 _1) { return __lasx_vext2xv_du_wu(_1); } +-// CHECK-LABEL: @vext2xv_wu_bu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_112:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.vext2xv.wu.bu(<32 x i8> [[_112]]) +-// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8i32 vext2xv_wu_bu(v32i8 _1) { return __lasx_vext2xv_wu_bu(_1); } +-// CHECK-LABEL: @vext2xv_du_hu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_112:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.vext2xv.du.hu(<16 x i16> [[_112]]) +-// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 vext2xv_du_hu(v16i16 _1) { return __lasx_vext2xv_du_hu(_1); } +-// CHECK-LABEL: @vext2xv_du_bu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_112:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.vext2xv.du.bu(<32 x i8> [[_112]]) +-// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 vext2xv_du_bu(v32i8 _1) { return __lasx_vext2xv_du_bu(_1); } +-// CHECK-LABEL: @xvpermi_q( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvpermi.q(<32 x i8> [[_1]], <32 x i8> [[_2]], i32 1) +-// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v32i8 xvpermi_q(v32i8 _1, v32i8 _2) { return __lasx_xvpermi_q(_1, _2, 1); } +-// CHECK-LABEL: @xvpermi_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvpermi.d(<4 x i64> [[_1]], i32 1) +-// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvpermi_d(v4i64 _1) { return __lasx_xvpermi_d(_1, 1); } +-// CHECK-LABEL: @xvperm_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvperm.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) +-// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8i32 xvperm_w(v8i32 _1, v8i32 _2) { return __lasx_xvperm_w(_1, _2); } +-// CHECK-LABEL: @xvldrepl_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvldrepl.b(ptr [[_1:%.*]], i32 1) +-// CHECK-NEXT: store <32 x i8> [[TMP0]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v32i8 xvldrepl_b(void * _1) { return __lasx_xvldrepl_b(_1, 1); } +-// CHECK-LABEL: @xvldrepl_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvldrepl.h(ptr [[_1:%.*]], i32 2) +-// CHECK-NEXT: store <16 x i16> [[TMP0]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v16i16 xvldrepl_h(void * _1) { return __lasx_xvldrepl_h(_1, 2); } +-// CHECK-LABEL: @xvldrepl_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvldrepl.w(ptr [[_1:%.*]], i32 4) +-// CHECK-NEXT: store <8 x i32> [[TMP0]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8i32 xvldrepl_w(void * _1) { return __lasx_xvldrepl_w(_1, 4); } +-// CHECK-LABEL: @xvldrepl_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvldrepl.d(ptr [[_1:%.*]], i32 8) +-// CHECK-NEXT: store <4 x i64> [[TMP0]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvldrepl_d(void * _1) { return __lasx_xvldrepl_d(_1, 8); } +-// CHECK-LABEL: @xvpickve2gr_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lasx.xvpickve2gr.w(<8 x i32> [[_1]], i32 1) +-// CHECK-NEXT: ret i32 [[TMP1]] +-// +-int xvpickve2gr_w(v8i32 _1) { return __lasx_xvpickve2gr_w(_1, 1); } +-// CHECK-LABEL: @xvpickve2gr_wu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lasx.xvpickve2gr.wu(<8 x i32> [[_1]], i32 1) +-// CHECK-NEXT: ret i32 [[TMP1]] +-// +-unsigned int xvpickve2gr_wu(v8i32 _1) { return __lasx_xvpickve2gr_wu(_1, 1); } +-// CHECK-LABEL: @xvpickve2gr_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.loongarch.lasx.xvpickve2gr.d(<4 x i64> [[_1]], i32 1) +-// CHECK-NEXT: ret i64 [[TMP1]] +-// +-long xvpickve2gr_d(v4i64 _1) { return __lasx_xvpickve2gr_d(_1, 1); } +-// CHECK-LABEL: @xvpickve2gr_du( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.loongarch.lasx.xvpickve2gr.du(<4 x i64> [[_1]], i32 1) +-// CHECK-NEXT: ret i64 [[TMP1]] +-// +-unsigned long int xvpickve2gr_du(v4i64 _1) { return __lasx_xvpickve2gr_du(_1, 1); } +-// CHECK-LABEL: @xvaddwev_q_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwev.q.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +-// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvaddwev_q_d(v4i64 _1, v4i64 _2) { return __lasx_xvaddwev_q_d(_1, _2); } +-// CHECK-LABEL: @xvaddwev_d_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwev.d.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) +-// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvaddwev_d_w(v8i32 _1, v8i32 _2) { return __lasx_xvaddwev_d_w(_1, _2); } +-// CHECK-LABEL: @xvaddwev_w_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvaddwev.w.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) +-// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8i32 xvaddwev_w_h(v16i16 _1, v16i16 _2) { return __lasx_xvaddwev_w_h(_1, _2); } +-// CHECK-LABEL: @xvaddwev_h_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvaddwev.h.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) +-// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v16i16 xvaddwev_h_b(v32i8 _1, v32i8 _2) { return __lasx_xvaddwev_h_b(_1, _2); } +-// CHECK-LABEL: @xvaddwev_q_du( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwev.q.du(<4 x i64> [[_1]], <4 x i64> [[_2]]) +-// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvaddwev_q_du(v4u64 _1, v4u64 _2) { return __lasx_xvaddwev_q_du(_1, _2); } +-// CHECK-LABEL: @xvaddwev_d_wu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwev.d.wu(<8 x i32> [[_124]], <8 x i32> [[_235]]) +-// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvaddwev_d_wu(v8u32 _1, v8u32 _2) { return __lasx_xvaddwev_d_wu(_1, _2); } +-// CHECK-LABEL: @xvaddwev_w_hu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvaddwev.w.hu(<16 x i16> [[_124]], <16 x i16> [[_235]]) +-// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8i32 xvaddwev_w_hu(v16u16 _1, v16u16 _2) { return __lasx_xvaddwev_w_hu(_1, _2); } +-// CHECK-LABEL: @xvaddwev_h_bu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvaddwev.h.bu(<32 x i8> [[_124]], <32 x i8> [[_235]]) +-// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v16i16 xvaddwev_h_bu(v32u8 _1, v32u8 _2) { return __lasx_xvaddwev_h_bu(_1, _2); } +-// CHECK-LABEL: @xvsubwev_q_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsubwev.q.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +-// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvsubwev_q_d(v4i64 _1, v4i64 _2) { return __lasx_xvsubwev_q_d(_1, _2); } +-// CHECK-LABEL: @xvsubwev_d_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsubwev.d.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) +-// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvsubwev_d_w(v8i32 _1, v8i32 _2) { return __lasx_xvsubwev_d_w(_1, _2); } +-// CHECK-LABEL: @xvsubwev_w_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsubwev.w.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) +-// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8i32 xvsubwev_w_h(v16i16 _1, v16i16 _2) { return __lasx_xvsubwev_w_h(_1, _2); } +-// CHECK-LABEL: @xvsubwev_h_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsubwev.h.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) +-// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v16i16 xvsubwev_h_b(v32i8 _1, v32i8 _2) { return __lasx_xvsubwev_h_b(_1, _2); } +-// CHECK-LABEL: @xvsubwev_q_du( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsubwev.q.du(<4 x i64> [[_1]], <4 x i64> [[_2]]) +-// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvsubwev_q_du(v4u64 _1, v4u64 _2) { return __lasx_xvsubwev_q_du(_1, _2); } +-// CHECK-LABEL: @xvsubwev_d_wu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsubwev.d.wu(<8 x i32> [[_124]], <8 x i32> [[_235]]) +-// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvsubwev_d_wu(v8u32 _1, v8u32 _2) { return __lasx_xvsubwev_d_wu(_1, _2); } +-// CHECK-LABEL: @xvsubwev_w_hu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsubwev.w.hu(<16 x i16> [[_124]], <16 x i16> [[_235]]) +-// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8i32 xvsubwev_w_hu(v16u16 _1, v16u16 _2) { return __lasx_xvsubwev_w_hu(_1, _2); } +-// CHECK-LABEL: @xvsubwev_h_bu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsubwev.h.bu(<32 x i8> [[_124]], <32 x i8> [[_235]]) +-// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v16i16 xvsubwev_h_bu(v32u8 _1, v32u8 _2) { return __lasx_xvsubwev_h_bu(_1, _2); } +-// CHECK-LABEL: @xvmulwev_q_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwev.q.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +-// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvmulwev_q_d(v4i64 _1, v4i64 _2) { return __lasx_xvmulwev_q_d(_1, _2); } +-// CHECK-LABEL: @xvmulwev_d_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwev.d.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) +-// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvmulwev_d_w(v8i32 _1, v8i32 _2) { return __lasx_xvmulwev_d_w(_1, _2); } +-// CHECK-LABEL: @xvmulwev_w_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmulwev.w.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) +-// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8i32 xvmulwev_w_h(v16i16 _1, v16i16 _2) { return __lasx_xvmulwev_w_h(_1, _2); } +-// CHECK-LABEL: @xvmulwev_h_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmulwev.h.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) +-// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v16i16 xvmulwev_h_b(v32i8 _1, v32i8 _2) { return __lasx_xvmulwev_h_b(_1, _2); } +-// CHECK-LABEL: @xvmulwev_q_du( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwev.q.du(<4 x i64> [[_1]], <4 x i64> [[_2]]) +-// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvmulwev_q_du(v4u64 _1, v4u64 _2) { return __lasx_xvmulwev_q_du(_1, _2); } +-// CHECK-LABEL: @xvmulwev_d_wu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwev.d.wu(<8 x i32> [[_124]], <8 x i32> [[_235]]) +-// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvmulwev_d_wu(v8u32 _1, v8u32 _2) { return __lasx_xvmulwev_d_wu(_1, _2); } +-// CHECK-LABEL: @xvmulwev_w_hu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmulwev.w.hu(<16 x i16> [[_124]], <16 x i16> [[_235]]) +-// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8i32 xvmulwev_w_hu(v16u16 _1, v16u16 _2) { return __lasx_xvmulwev_w_hu(_1, _2); } +-// CHECK-LABEL: @xvmulwev_h_bu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmulwev.h.bu(<32 x i8> [[_124]], <32 x i8> [[_235]]) +-// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v16i16 xvmulwev_h_bu(v32u8 _1, v32u8 _2) { return __lasx_xvmulwev_h_bu(_1, _2); } +-// CHECK-LABEL: @xvaddwod_q_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwod.q.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +-// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvaddwod_q_d(v4i64 _1, v4i64 _2) { return __lasx_xvaddwod_q_d(_1, _2); } +-// CHECK-LABEL: @xvaddwod_d_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwod.d.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) +-// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvaddwod_d_w(v8i32 _1, v8i32 _2) { return __lasx_xvaddwod_d_w(_1, _2); } +-// CHECK-LABEL: @xvaddwod_w_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvaddwod.w.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) +-// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8i32 xvaddwod_w_h(v16i16 _1, v16i16 _2) { return __lasx_xvaddwod_w_h(_1, _2); } +-// CHECK-LABEL: @xvaddwod_h_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvaddwod.h.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) +-// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v16i16 xvaddwod_h_b(v32i8 _1, v32i8 _2) { return __lasx_xvaddwod_h_b(_1, _2); } +-// CHECK-LABEL: @xvaddwod_q_du( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwod.q.du(<4 x i64> [[_1]], <4 x i64> [[_2]]) +-// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvaddwod_q_du(v4u64 _1, v4u64 _2) { return __lasx_xvaddwod_q_du(_1, _2); } +-// CHECK-LABEL: @xvaddwod_d_wu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwod.d.wu(<8 x i32> [[_124]], <8 x i32> [[_235]]) +-// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvaddwod_d_wu(v8u32 _1, v8u32 _2) { return __lasx_xvaddwod_d_wu(_1, _2); } +-// CHECK-LABEL: @xvaddwod_w_hu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvaddwod.w.hu(<16 x i16> [[_124]], <16 x i16> [[_235]]) +-// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8i32 xvaddwod_w_hu(v16u16 _1, v16u16 _2) { return __lasx_xvaddwod_w_hu(_1, _2); } +-// CHECK-LABEL: @xvaddwod_h_bu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvaddwod.h.bu(<32 x i8> [[_124]], <32 x i8> [[_235]]) +-// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v16i16 xvaddwod_h_bu(v32u8 _1, v32u8 _2) { return __lasx_xvaddwod_h_bu(_1, _2); } +-// CHECK-LABEL: @xvsubwod_q_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsubwod.q.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +-// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvsubwod_q_d(v4i64 _1, v4i64 _2) { return __lasx_xvsubwod_q_d(_1, _2); } +-// CHECK-LABEL: @xvsubwod_d_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsubwod.d.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) +-// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvsubwod_d_w(v8i32 _1, v8i32 _2) { return __lasx_xvsubwod_d_w(_1, _2); } +-// CHECK-LABEL: @xvsubwod_w_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsubwod.w.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) +-// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8i32 xvsubwod_w_h(v16i16 _1, v16i16 _2) { return __lasx_xvsubwod_w_h(_1, _2); } +-// CHECK-LABEL: @xvsubwod_h_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsubwod.h.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) +-// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v16i16 xvsubwod_h_b(v32i8 _1, v32i8 _2) { return __lasx_xvsubwod_h_b(_1, _2); } +-// CHECK-LABEL: @xvsubwod_q_du( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsubwod.q.du(<4 x i64> [[_1]], <4 x i64> [[_2]]) +-// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvsubwod_q_du(v4u64 _1, v4u64 _2) { return __lasx_xvsubwod_q_du(_1, _2); } +-// CHECK-LABEL: @xvsubwod_d_wu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsubwod.d.wu(<8 x i32> [[_124]], <8 x i32> [[_235]]) +-// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvsubwod_d_wu(v8u32 _1, v8u32 _2) { return __lasx_xvsubwod_d_wu(_1, _2); } +-// CHECK-LABEL: @xvsubwod_w_hu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsubwod.w.hu(<16 x i16> [[_124]], <16 x i16> [[_235]]) +-// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8i32 xvsubwod_w_hu(v16u16 _1, v16u16 _2) { return __lasx_xvsubwod_w_hu(_1, _2); } +-// CHECK-LABEL: @xvsubwod_h_bu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsubwod.h.bu(<32 x i8> [[_124]], <32 x i8> [[_235]]) +-// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v16i16 xvsubwod_h_bu(v32u8 _1, v32u8 _2) { return __lasx_xvsubwod_h_bu(_1, _2); } +-// CHECK-LABEL: @xvmulwod_q_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwod.q.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +-// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvmulwod_q_d(v4i64 _1, v4i64 _2) { return __lasx_xvmulwod_q_d(_1, _2); } +-// CHECK-LABEL: @xvmulwod_d_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwod.d.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) +-// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvmulwod_d_w(v8i32 _1, v8i32 _2) { return __lasx_xvmulwod_d_w(_1, _2); } +-// CHECK-LABEL: @xvmulwod_w_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmulwod.w.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) +-// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8i32 xvmulwod_w_h(v16i16 _1, v16i16 _2) { return __lasx_xvmulwod_w_h(_1, _2); } +-// CHECK-LABEL: @xvmulwod_h_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmulwod.h.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) +-// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v16i16 xvmulwod_h_b(v32i8 _1, v32i8 _2) { return __lasx_xvmulwod_h_b(_1, _2); } +-// CHECK-LABEL: @xvmulwod_q_du( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwod.q.du(<4 x i64> [[_1]], <4 x i64> [[_2]]) +-// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvmulwod_q_du(v4u64 _1, v4u64 _2) { return __lasx_xvmulwod_q_du(_1, _2); } +-// CHECK-LABEL: @xvmulwod_d_wu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwod.d.wu(<8 x i32> [[_124]], <8 x i32> [[_235]]) +-// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvmulwod_d_wu(v8u32 _1, v8u32 _2) { return __lasx_xvmulwod_d_wu(_1, _2); } +-// CHECK-LABEL: @xvmulwod_w_hu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmulwod.w.hu(<16 x i16> [[_124]], <16 x i16> [[_235]]) +-// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8i32 xvmulwod_w_hu(v16u16 _1, v16u16 _2) { return __lasx_xvmulwod_w_hu(_1, _2); } +-// CHECK-LABEL: @xvmulwod_h_bu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmulwod.h.bu(<32 x i8> [[_124]], <32 x i8> [[_235]]) +-// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v16i16 xvmulwod_h_bu(v32u8 _1, v32u8 _2) { return __lasx_xvmulwod_h_bu(_1, _2); } +-// CHECK-LABEL: @xvaddwev_d_wu_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwev.d.wu.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) +-// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvaddwev_d_wu_w(v8u32 _1, v8i32 _2) { return __lasx_xvaddwev_d_wu_w(_1, _2); } +-// CHECK-LABEL: @xvaddwev_w_hu_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvaddwev.w.hu.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) +-// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8i32 xvaddwev_w_hu_h(v16u16 _1, v16i16 _2) { return __lasx_xvaddwev_w_hu_h(_1, _2); } +-// CHECK-LABEL: @xvaddwev_h_bu_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvaddwev.h.bu.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) +-// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v16i16 xvaddwev_h_bu_b(v32u8 _1, v32i8 _2) { return __lasx_xvaddwev_h_bu_b(_1, _2); } +-// CHECK-LABEL: @xvmulwev_d_wu_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwev.d.wu.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) +-// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvmulwev_d_wu_w(v8u32 _1, v8i32 _2) { return __lasx_xvmulwev_d_wu_w(_1, _2); } +-// CHECK-LABEL: @xvmulwev_w_hu_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmulwev.w.hu.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) +-// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8i32 xvmulwev_w_hu_h(v16u16 _1, v16i16 _2) { return __lasx_xvmulwev_w_hu_h(_1, _2); } +-// CHECK-LABEL: @xvmulwev_h_bu_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmulwev.h.bu.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) +-// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v16i16 xvmulwev_h_bu_b(v32u8 _1, v32i8 _2) { return __lasx_xvmulwev_h_bu_b(_1, _2); } +-// CHECK-LABEL: @xvaddwod_d_wu_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwod.d.wu.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) +-// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvaddwod_d_wu_w(v8u32 _1, v8i32 _2) { return __lasx_xvaddwod_d_wu_w(_1, _2); } +-// CHECK-LABEL: @xvaddwod_w_hu_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvaddwod.w.hu.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) +-// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8i32 xvaddwod_w_hu_h(v16u16 _1, v16i16 _2) { return __lasx_xvaddwod_w_hu_h(_1, _2); } +-// CHECK-LABEL: @xvaddwod_h_bu_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvaddwod.h.bu.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) +-// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v16i16 xvaddwod_h_bu_b(v32u8 _1, v32i8 _2) { return __lasx_xvaddwod_h_bu_b(_1, _2); } +-// CHECK-LABEL: @xvmulwod_d_wu_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwod.d.wu.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) +-// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvmulwod_d_wu_w(v8u32 _1, v8i32 _2) { return __lasx_xvmulwod_d_wu_w(_1, _2); } +-// CHECK-LABEL: @xvmulwod_w_hu_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmulwod.w.hu.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) +-// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8i32 xvmulwod_w_hu_h(v16u16 _1, v16i16 _2) { return __lasx_xvmulwod_w_hu_h(_1, _2); } +-// CHECK-LABEL: @xvmulwod_h_bu_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmulwod.h.bu.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) +-// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v16i16 xvmulwod_h_bu_b(v32u8 _1, v32i8 _2) { return __lasx_xvmulwod_h_bu_b(_1, _2); } +-// CHECK-LABEL: @xvhaddw_q_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvhaddw.q.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +-// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvhaddw_q_d(v4i64 _1, v4i64 _2) { return __lasx_xvhaddw_q_d(_1, _2); } +-// CHECK-LABEL: @xvhaddw_qu_du( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvhaddw.qu.du(<4 x i64> [[_1]], <4 x i64> [[_2]]) +-// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4u64 xvhaddw_qu_du(v4u64 _1, v4u64 _2) { return __lasx_xvhaddw_qu_du(_1, _2); } +-// CHECK-LABEL: @xvhsubw_q_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvhsubw.q.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +-// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvhsubw_q_d(v4i64 _1, v4i64 _2) { return __lasx_xvhsubw_q_d(_1, _2); } +-// CHECK-LABEL: @xvhsubw_qu_du( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvhsubw.qu.du(<4 x i64> [[_1]], <4 x i64> [[_2]]) +-// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4u64 xvhsubw_qu_du(v4u64 _1, v4u64 _2) { return __lasx_xvhsubw_qu_du(_1, _2); } +-// CHECK-LABEL: @xvmaddwev_q_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_3:%.*]] = load <4 x i64>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwev.q.d(<4 x i64> [[_1]], <4 x i64> [[_2]], <4 x i64> [[_3]]) +-// CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvmaddwev_q_d(v4i64 _1, v4i64 _2, v4i64 _3) { return __lasx_xvmaddwev_q_d(_1, _2, _3); } +-// CHECK-LABEL: @xvmaddwev_d_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_346:%.*]] = load <8 x i32>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwev.d.w(<4 x i64> [[_1]], <8 x i32> [[_235]], <8 x i32> [[_346]]) +-// CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvmaddwev_d_w(v4i64 _1, v8i32 _2, v8i32 _3) { return __lasx_xvmaddwev_d_w(_1, _2, _3); } +-// CHECK-LABEL: @xvmaddwev_w_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_136:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_247:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_358:%.*]] = load <16 x i16>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmaddwev.w.h(<8 x i32> [[_136]], <16 x i16> [[_247]], <16 x i16> [[_358]]) +-// CHECK-NEXT: store <8 x i32> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8i32 xvmaddwev_w_h(v8i32 _1, v16i16 _2, v16i16 _3) { return __lasx_xvmaddwev_w_h(_1, _2, _3); } +-// CHECK-LABEL: @xvmaddwev_h_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_136:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_247:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_358:%.*]] = load <32 x i8>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP3:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmaddwev.h.b(<16 x i16> [[_136]], <32 x i8> [[_247]], <32 x i8> [[_358]]) +-// CHECK-NEXT: store <16 x i16> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v16i16 xvmaddwev_h_b(v16i16 _1, v32i8 _2, v32i8 _3) { return __lasx_xvmaddwev_h_b(_1, _2, _3); } +-// CHECK-LABEL: @xvmaddwev_q_du( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_3:%.*]] = load <4 x i64>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwev.q.du(<4 x i64> [[_1]], <4 x i64> [[_2]], <4 x i64> [[_3]]) +-// CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4u64 xvmaddwev_q_du(v4u64 _1, v4u64 _2, v4u64 _3) { return __lasx_xvmaddwev_q_du(_1, _2, _3); } +-// CHECK-LABEL: @xvmaddwev_d_wu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_346:%.*]] = load <8 x i32>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwev.d.wu(<4 x i64> [[_1]], <8 x i32> [[_235]], <8 x i32> [[_346]]) +-// CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4u64 xvmaddwev_d_wu(v4u64 _1, v8u32 _2, v8u32 _3) { return __lasx_xvmaddwev_d_wu(_1, _2, _3); } +-// CHECK-LABEL: @xvmaddwev_w_hu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_136:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_247:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_358:%.*]] = load <16 x i16>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmaddwev.w.hu(<8 x i32> [[_136]], <16 x i16> [[_247]], <16 x i16> [[_358]]) +-// CHECK-NEXT: store <8 x i32> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8u32 xvmaddwev_w_hu(v8u32 _1, v16u16 _2, v16u16 _3) { return __lasx_xvmaddwev_w_hu(_1, _2, _3); } +-// CHECK-LABEL: @xvmaddwev_h_bu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_136:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_247:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_358:%.*]] = load <32 x i8>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP3:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmaddwev.h.bu(<16 x i16> [[_136]], <32 x i8> [[_247]], <32 x i8> [[_358]]) +-// CHECK-NEXT: store <16 x i16> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v16u16 xvmaddwev_h_bu(v16u16 _1, v32u8 _2, v32u8 _3) { return __lasx_xvmaddwev_h_bu(_1, _2, _3); } +-// CHECK-LABEL: @xvmaddwod_q_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_3:%.*]] = load <4 x i64>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwod.q.d(<4 x i64> [[_1]], <4 x i64> [[_2]], <4 x i64> [[_3]]) +-// CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvmaddwod_q_d(v4i64 _1, v4i64 _2, v4i64 _3) { return __lasx_xvmaddwod_q_d(_1, _2, _3); } +-// CHECK-LABEL: @xvmaddwod_d_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_346:%.*]] = load <8 x i32>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwod.d.w(<4 x i64> [[_1]], <8 x i32> [[_235]], <8 x i32> [[_346]]) +-// CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvmaddwod_d_w(v4i64 _1, v8i32 _2, v8i32 _3) { return __lasx_xvmaddwod_d_w(_1, _2, _3); } +-// CHECK-LABEL: @xvmaddwod_w_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_136:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_247:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_358:%.*]] = load <16 x i16>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmaddwod.w.h(<8 x i32> [[_136]], <16 x i16> [[_247]], <16 x i16> [[_358]]) +-// CHECK-NEXT: store <8 x i32> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8i32 xvmaddwod_w_h(v8i32 _1, v16i16 _2, v16i16 _3) { return __lasx_xvmaddwod_w_h(_1, _2, _3); } +-// CHECK-LABEL: @xvmaddwod_h_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_136:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_247:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_358:%.*]] = load <32 x i8>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP3:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmaddwod.h.b(<16 x i16> [[_136]], <32 x i8> [[_247]], <32 x i8> [[_358]]) +-// CHECK-NEXT: store <16 x i16> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v16i16 xvmaddwod_h_b(v16i16 _1, v32i8 _2, v32i8 _3) { return __lasx_xvmaddwod_h_b(_1, _2, _3); } +-// CHECK-LABEL: @xvmaddwod_q_du( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_3:%.*]] = load <4 x i64>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwod.q.du(<4 x i64> [[_1]], <4 x i64> [[_2]], <4 x i64> [[_3]]) +-// CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4u64 xvmaddwod_q_du(v4u64 _1, v4u64 _2, v4u64 _3) { return __lasx_xvmaddwod_q_du(_1, _2, _3); } +-// CHECK-LABEL: @xvmaddwod_d_wu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_346:%.*]] = load <8 x i32>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwod.d.wu(<4 x i64> [[_1]], <8 x i32> [[_235]], <8 x i32> [[_346]]) +-// CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4u64 xvmaddwod_d_wu(v4u64 _1, v8u32 _2, v8u32 _3) { return __lasx_xvmaddwod_d_wu(_1, _2, _3); } +-// CHECK-LABEL: @xvmaddwod_w_hu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_136:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_247:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_358:%.*]] = load <16 x i16>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmaddwod.w.hu(<8 x i32> [[_136]], <16 x i16> [[_247]], <16 x i16> [[_358]]) +-// CHECK-NEXT: store <8 x i32> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8u32 xvmaddwod_w_hu(v8u32 _1, v16u16 _2, v16u16 _3) { return __lasx_xvmaddwod_w_hu(_1, _2, _3); } +-// CHECK-LABEL: @xvmaddwod_h_bu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_136:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_247:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_358:%.*]] = load <32 x i8>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP3:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmaddwod.h.bu(<16 x i16> [[_136]], <32 x i8> [[_247]], <32 x i8> [[_358]]) +-// CHECK-NEXT: store <16 x i16> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v16u16 xvmaddwod_h_bu(v16u16 _1, v32u8 _2, v32u8 _3) { return __lasx_xvmaddwod_h_bu(_1, _2, _3); } +-// CHECK-LABEL: @xvmaddwev_q_du_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_3:%.*]] = load <4 x i64>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwev.q.du.d(<4 x i64> [[_1]], <4 x i64> [[_2]], <4 x i64> [[_3]]) +-// CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvmaddwev_q_du_d(v4i64 _1, v4u64 _2, v4i64 _3) { return __lasx_xvmaddwev_q_du_d(_1, _2, _3); } +-// CHECK-LABEL: @xvmaddwev_d_wu_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_346:%.*]] = load <8 x i32>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwev.d.wu.w(<4 x i64> [[_1]], <8 x i32> [[_235]], <8 x i32> [[_346]]) +-// CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvmaddwev_d_wu_w(v4i64 _1, v8u32 _2, v8i32 _3) { return __lasx_xvmaddwev_d_wu_w(_1, _2, _3); } +-// CHECK-LABEL: @xvmaddwev_w_hu_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_136:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_247:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_358:%.*]] = load <16 x i16>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmaddwev.w.hu.h(<8 x i32> [[_136]], <16 x i16> [[_247]], <16 x i16> [[_358]]) +-// CHECK-NEXT: store <8 x i32> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8i32 xvmaddwev_w_hu_h(v8i32 _1, v16u16 _2, v16i16 _3) { return __lasx_xvmaddwev_w_hu_h(_1, _2, _3); } +-// CHECK-LABEL: @xvmaddwev_h_bu_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_136:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_247:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_358:%.*]] = load <32 x i8>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP3:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmaddwev.h.bu.b(<16 x i16> [[_136]], <32 x i8> [[_247]], <32 x i8> [[_358]]) +-// CHECK-NEXT: store <16 x i16> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v16i16 xvmaddwev_h_bu_b(v16i16 _1, v32u8 _2, v32i8 _3) { return __lasx_xvmaddwev_h_bu_b(_1, _2, _3); } +-// CHECK-LABEL: @xvmaddwod_q_du_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_3:%.*]] = load <4 x i64>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwod.q.du.d(<4 x i64> [[_1]], <4 x i64> [[_2]], <4 x i64> [[_3]]) +-// CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvmaddwod_q_du_d(v4i64 _1, v4u64 _2, v4i64 _3) { return __lasx_xvmaddwod_q_du_d(_1, _2, _3); } +-// CHECK-LABEL: @xvmaddwod_d_wu_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_346:%.*]] = load <8 x i32>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwod.d.wu.w(<4 x i64> [[_1]], <8 x i32> [[_235]], <8 x i32> [[_346]]) +-// CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvmaddwod_d_wu_w(v4i64 _1, v8u32 _2, v8i32 _3) { return __lasx_xvmaddwod_d_wu_w(_1, _2, _3); } +-// CHECK-LABEL: @xvmaddwod_w_hu_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_136:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_247:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_358:%.*]] = load <16 x i16>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmaddwod.w.hu.h(<8 x i32> [[_136]], <16 x i16> [[_247]], <16 x i16> [[_358]]) +-// CHECK-NEXT: store <8 x i32> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8i32 xvmaddwod_w_hu_h(v8i32 _1, v16u16 _2, v16i16 _3) { return __lasx_xvmaddwod_w_hu_h(_1, _2, _3); } +-// CHECK-LABEL: @xvmaddwod_h_bu_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_136:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_247:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_358:%.*]] = load <32 x i8>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP3:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmaddwod.h.bu.b(<16 x i16> [[_136]], <32 x i8> [[_247]], <32 x i8> [[_358]]) +-// CHECK-NEXT: store <16 x i16> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v16i16 xvmaddwod_h_bu_b(v16i16 _1, v32u8 _2, v32i8 _3) { return __lasx_xvmaddwod_h_bu_b(_1, _2, _3); } +-// CHECK-LABEL: @xvrotr_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvrotr.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) +-// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v32i8 xvrotr_b(v32i8 _1, v32i8 _2) { return __lasx_xvrotr_b(_1, _2); } +-// CHECK-LABEL: @xvrotr_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvrotr.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) +-// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v16i16 xvrotr_h(v16i16 _1, v16i16 _2) { return __lasx_xvrotr_h(_1, _2); } +-// CHECK-LABEL: @xvrotr_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvrotr.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) +-// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8i32 xvrotr_w(v8i32 _1, v8i32 _2) { return __lasx_xvrotr_w(_1, _2); } +-// CHECK-LABEL: @xvrotr_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvrotr.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +-// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvrotr_d(v4i64 _1, v4i64 _2) { return __lasx_xvrotr_d(_1, _2); } +-// CHECK-LABEL: @xvadd_q( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvadd.q(<4 x i64> [[_1]], <4 x i64> [[_2]]) +-// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvadd_q(v4i64 _1, v4i64 _2) { return __lasx_xvadd_q(_1, _2); } +-// CHECK-LABEL: @xvsub_q( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsub.q(<4 x i64> [[_1]], <4 x i64> [[_2]]) +-// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvsub_q(v4i64 _1, v4i64 _2) { return __lasx_xvsub_q(_1, _2); } +-// CHECK-LABEL: @xvaddwev_q_du_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwev.q.du.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +-// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvaddwev_q_du_d(v4u64 _1, v4i64 _2) { return __lasx_xvaddwev_q_du_d(_1, _2); } +-// CHECK-LABEL: @xvaddwod_q_du_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwod.q.du.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +-// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvaddwod_q_du_d(v4u64 _1, v4i64 _2) { return __lasx_xvaddwod_q_du_d(_1, _2); } +-// CHECK-LABEL: @xvmulwev_q_du_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwev.q.du.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +-// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvmulwev_q_du_d(v4u64 _1, v4i64 _2) { return __lasx_xvmulwev_q_du_d(_1, _2); } +-// CHECK-LABEL: @xvmulwod_q_du_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwod.q.du.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +-// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvmulwod_q_du_d(v4u64 _1, v4i64 _2) { return __lasx_xvmulwod_q_du_d(_1, _2); } +-// CHECK-LABEL: @xvmskgez_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_112:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmskgez.b(<32 x i8> [[_112]]) +-// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v32i8 xvmskgez_b(v32i8 _1) { return __lasx_xvmskgez_b(_1); } +-// CHECK-LABEL: @xvmsknz_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_112:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmsknz.b(<32 x i8> [[_112]]) +-// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v32i8 xvmsknz_b(v32i8 _1) { return __lasx_xvmsknz_b(_1); } +-// CHECK-LABEL: @xvexth_h_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_112:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvexth.h.b(<32 x i8> [[_112]]) +-// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v16i16 xvexth_h_b(v32i8 _1) { return __lasx_xvexth_h_b(_1); } +-// CHECK-LABEL: @xvexth_w_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_112:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvexth.w.h(<16 x i16> [[_112]]) +-// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8i32 xvexth_w_h(v16i16 _1) { return __lasx_xvexth_w_h(_1); } +-// CHECK-LABEL: @xvexth_d_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_112:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvexth.d.w(<8 x i32> [[_112]]) +-// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvexth_d_w(v8i32 _1) { return __lasx_xvexth_d_w(_1); } +-// CHECK-LABEL: @xvexth_q_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvexth.q.d(<4 x i64> [[_1]]) +-// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvexth_q_d(v4i64 _1) { return __lasx_xvexth_q_d(_1); } +-// CHECK-LABEL: @xvexth_hu_bu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_112:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvexth.hu.bu(<32 x i8> [[_112]]) +-// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v16u16 xvexth_hu_bu(v32u8 _1) { return __lasx_xvexth_hu_bu(_1); } +-// CHECK-LABEL: @xvexth_wu_hu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_112:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvexth.wu.hu(<16 x i16> [[_112]]) +-// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8u32 xvexth_wu_hu(v16u16 _1) { return __lasx_xvexth_wu_hu(_1); } +-// CHECK-LABEL: @xvexth_du_wu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_112:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvexth.du.wu(<8 x i32> [[_112]]) +-// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4u64 xvexth_du_wu(v8u32 _1) { return __lasx_xvexth_du_wu(_1); } +-// CHECK-LABEL: @xvexth_qu_du( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvexth.qu.du(<4 x i64> [[_1]]) +-// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4u64 xvexth_qu_du(v4u64 _1) { return __lasx_xvexth_qu_du(_1); } +-// CHECK-LABEL: @xvrotri_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvrotri.b(<32 x i8> [[_1]], i32 1) +-// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v32i8 xvrotri_b(v32i8 _1) { return __lasx_xvrotri_b(_1, 1); } +-// CHECK-LABEL: @xvrotri_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvrotri.h(<16 x i16> [[_1]], i32 1) +-// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v16i16 xvrotri_h(v16i16 _1) { return __lasx_xvrotri_h(_1, 1); } +-// CHECK-LABEL: @xvrotri_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvrotri.w(<8 x i32> [[_1]], i32 1) +-// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8i32 xvrotri_w(v8i32 _1) { return __lasx_xvrotri_w(_1, 1); } +-// CHECK-LABEL: @xvrotri_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvrotri.d(<4 x i64> [[_1]], i32 1) +-// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvrotri_d(v4i64 _1) { return __lasx_xvrotri_d(_1, 1); } +-// CHECK-LABEL: @xvextl_q_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvextl.q.d(<4 x i64> [[_1]]) +-// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvextl_q_d(v4i64 _1) { return __lasx_xvextl_q_d(_1); } +-// CHECK-LABEL: @xvsrlni_b_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrlni.b.h(<32 x i8> [[_1]], <32 x i8> [[_2]], i32 1) +-// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v32i8 xvsrlni_b_h(v32i8 _1, v32i8 _2) { return __lasx_xvsrlni_b_h(_1, _2, 1); } +-// CHECK-LABEL: @xvsrlni_h_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrlni.h.w(<16 x i16> [[_1]], <16 x i16> [[_2]], i32 1) +-// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v16i16 xvsrlni_h_w(v16i16 _1, v16i16 _2) { return __lasx_xvsrlni_h_w(_1, _2, 1); } +-// CHECK-LABEL: @xvsrlni_w_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrlni.w.d(<8 x i32> [[_1]], <8 x i32> [[_2]], i32 1) +-// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8i32 xvsrlni_w_d(v8i32 _1, v8i32 _2) { return __lasx_xvsrlni_w_d(_1, _2, 1); } +-// CHECK-LABEL: @xvsrlni_d_q( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrlni.d.q(<4 x i64> [[_1]], <4 x i64> [[_2]], i32 1) +-// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvsrlni_d_q(v4i64 _1, v4i64 _2) { return __lasx_xvsrlni_d_q(_1, _2, 1); } +-// CHECK-LABEL: @xvsrlrni_b_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrlrni.b.h(<32 x i8> [[_1]], <32 x i8> [[_2]], i32 1) +-// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v32i8 xvsrlrni_b_h(v32i8 _1, v32i8 _2) { return __lasx_xvsrlrni_b_h(_1, _2, 1); } +-// CHECK-LABEL: @xvsrlrni_h_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrlrni.h.w(<16 x i16> [[_1]], <16 x i16> [[_2]], i32 1) +-// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v16i16 xvsrlrni_h_w(v16i16 _1, v16i16 _2) { return __lasx_xvsrlrni_h_w(_1, _2, 1); } +-// CHECK-LABEL: @xvsrlrni_w_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrlrni.w.d(<8 x i32> [[_1]], <8 x i32> [[_2]], i32 1) +-// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8i32 xvsrlrni_w_d(v8i32 _1, v8i32 _2) { return __lasx_xvsrlrni_w_d(_1, _2, 1); } +-// CHECK-LABEL: @xvsrlrni_d_q( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrlrni.d.q(<4 x i64> [[_1]], <4 x i64> [[_2]], i32 1) +-// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvsrlrni_d_q(v4i64 _1, v4i64 _2) { return __lasx_xvsrlrni_d_q(_1, _2, 1); } +-// CHECK-LABEL: @xvssrlni_b_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrlni.b.h(<32 x i8> [[_1]], <32 x i8> [[_2]], i32 1) +-// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v32i8 xvssrlni_b_h(v32i8 _1, v32i8 _2) { return __lasx_xvssrlni_b_h(_1, _2, 1); } +-// CHECK-LABEL: @xvssrlni_h_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrlni.h.w(<16 x i16> [[_1]], <16 x i16> [[_2]], i32 1) +-// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v16i16 xvssrlni_h_w(v16i16 _1, v16i16 _2) { return __lasx_xvssrlni_h_w(_1, _2, 1); } +-// CHECK-LABEL: @xvssrlni_w_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrlni.w.d(<8 x i32> [[_1]], <8 x i32> [[_2]], i32 1) +-// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8i32 xvssrlni_w_d(v8i32 _1, v8i32 _2) { return __lasx_xvssrlni_w_d(_1, _2, 1); } +-// CHECK-LABEL: @xvssrlni_d_q( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssrlni.d.q(<4 x i64> [[_1]], <4 x i64> [[_2]], i32 1) +-// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvssrlni_d_q(v4i64 _1, v4i64 _2) { return __lasx_xvssrlni_d_q(_1, _2, 1); } +-// CHECK-LABEL: @xvssrlni_bu_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrlni.bu.h(<32 x i8> [[_1]], <32 x i8> [[_2]], i32 1) +-// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v32u8 xvssrlni_bu_h(v32u8 _1, v32i8 _2) { return __lasx_xvssrlni_bu_h(_1, _2, 1); } +-// CHECK-LABEL: @xvssrlni_hu_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrlni.hu.w(<16 x i16> [[_1]], <16 x i16> [[_2]], i32 1) +-// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v16u16 xvssrlni_hu_w(v16u16 _1, v16i16 _2) { return __lasx_xvssrlni_hu_w(_1, _2, 1); } +-// CHECK-LABEL: @xvssrlni_wu_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrlni.wu.d(<8 x i32> [[_1]], <8 x i32> [[_2]], i32 1) +-// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8u32 xvssrlni_wu_d(v8u32 _1, v8i32 _2) { return __lasx_xvssrlni_wu_d(_1, _2, 1); } +-// CHECK-LABEL: @xvssrlni_du_q( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssrlni.du.q(<4 x i64> [[_1]], <4 x i64> [[_2]], i32 1) +-// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4u64 xvssrlni_du_q(v4u64 _1, v4i64 _2) { return __lasx_xvssrlni_du_q(_1, _2, 1); } +-// CHECK-LABEL: @xvssrlrni_b_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrlrni.b.h(<32 x i8> [[_1]], <32 x i8> [[_2]], i32 1) +-// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v32i8 xvssrlrni_b_h(v32i8 _1, v32i8 _2) { return __lasx_xvssrlrni_b_h(_1, _2, 1); } +-// CHECK-LABEL: @xvssrlrni_h_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrlrni.h.w(<16 x i16> [[_1]], <16 x i16> [[_2]], i32 1) +-// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v16i16 xvssrlrni_h_w(v16i16 _1, v16i16 _2) { return __lasx_xvssrlrni_h_w(_1, _2, 1); } +-// CHECK-LABEL: @xvssrlrni_w_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrlrni.w.d(<8 x i32> [[_1]], <8 x i32> [[_2]], i32 1) +-// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8i32 xvssrlrni_w_d(v8i32 _1, v8i32 _2) { return __lasx_xvssrlrni_w_d(_1, _2, 1); } +-// CHECK-LABEL: @xvssrlrni_d_q( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssrlrni.d.q(<4 x i64> [[_1]], <4 x i64> [[_2]], i32 1) +-// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvssrlrni_d_q(v4i64 _1, v4i64 _2) { return __lasx_xvssrlrni_d_q(_1, _2, 1); } +-// CHECK-LABEL: @xvssrlrni_bu_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrlrni.bu.h(<32 x i8> [[_1]], <32 x i8> [[_2]], i32 1) +-// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v32u8 xvssrlrni_bu_h(v32u8 _1, v32i8 _2) { return __lasx_xvssrlrni_bu_h(_1, _2, 1); } +-// CHECK-LABEL: @xvssrlrni_hu_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrlrni.hu.w(<16 x i16> [[_1]], <16 x i16> [[_2]], i32 1) +-// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v16u16 xvssrlrni_hu_w(v16u16 _1, v16i16 _2) { return __lasx_xvssrlrni_hu_w(_1, _2, 1); } +-// CHECK-LABEL: @xvssrlrni_wu_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrlrni.wu.d(<8 x i32> [[_1]], <8 x i32> [[_2]], i32 1) +-// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8u32 xvssrlrni_wu_d(v8u32 _1, v8i32 _2) { return __lasx_xvssrlrni_wu_d(_1, _2, 1); } +-// CHECK-LABEL: @xvssrlrni_du_q( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssrlrni.du.q(<4 x i64> [[_1]], <4 x i64> [[_2]], i32 1) +-// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4u64 xvssrlrni_du_q(v4u64 _1, v4i64 _2) { return __lasx_xvssrlrni_du_q(_1, _2, 1); } +-// CHECK-LABEL: @xvsrani_b_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrani.b.h(<32 x i8> [[_1]], <32 x i8> [[_2]], i32 1) +-// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v32i8 xvsrani_b_h(v32i8 _1, v32i8 _2) { return __lasx_xvsrani_b_h(_1, _2, 1); } +-// CHECK-LABEL: @xvsrani_h_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrani.h.w(<16 x i16> [[_1]], <16 x i16> [[_2]], i32 1) +-// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v16i16 xvsrani_h_w(v16i16 _1, v16i16 _2) { return __lasx_xvsrani_h_w(_1, _2, 1); } +-// CHECK-LABEL: @xvsrani_w_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrani.w.d(<8 x i32> [[_1]], <8 x i32> [[_2]], i32 1) +-// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8i32 xvsrani_w_d(v8i32 _1, v8i32 _2) { return __lasx_xvsrani_w_d(_1, _2, 1); } +-// CHECK-LABEL: @xvsrani_d_q( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrani.d.q(<4 x i64> [[_1]], <4 x i64> [[_2]], i32 1) +-// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvsrani_d_q(v4i64 _1, v4i64 _2) { return __lasx_xvsrani_d_q(_1, _2, 1); } +-// CHECK-LABEL: @xvsrarni_b_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrarni.b.h(<32 x i8> [[_1]], <32 x i8> [[_2]], i32 1) +-// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v32i8 xvsrarni_b_h(v32i8 _1, v32i8 _2) { return __lasx_xvsrarni_b_h(_1, _2, 1); } +-// CHECK-LABEL: @xvsrarni_h_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrarni.h.w(<16 x i16> [[_1]], <16 x i16> [[_2]], i32 1) +-// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v16i16 xvsrarni_h_w(v16i16 _1, v16i16 _2) { return __lasx_xvsrarni_h_w(_1, _2, 1); } +-// CHECK-LABEL: @xvsrarni_w_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrarni.w.d(<8 x i32> [[_1]], <8 x i32> [[_2]], i32 1) +-// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8i32 xvsrarni_w_d(v8i32 _1, v8i32 _2) { return __lasx_xvsrarni_w_d(_1, _2, 1); } +-// CHECK-LABEL: @xvsrarni_d_q( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrarni.d.q(<4 x i64> [[_1]], <4 x i64> [[_2]], i32 1) +-// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvsrarni_d_q(v4i64 _1, v4i64 _2) { return __lasx_xvsrarni_d_q(_1, _2, 1); } +-// CHECK-LABEL: @xvssrani_b_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrani.b.h(<32 x i8> [[_1]], <32 x i8> [[_2]], i32 1) +-// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v32i8 xvssrani_b_h(v32i8 _1, v32i8 _2) { return __lasx_xvssrani_b_h(_1, _2, 1); } +-// CHECK-LABEL: @xvssrani_h_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrani.h.w(<16 x i16> [[_1]], <16 x i16> [[_2]], i32 1) +-// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v16i16 xvssrani_h_w(v16i16 _1, v16i16 _2) { return __lasx_xvssrani_h_w(_1, _2, 1); } +-// CHECK-LABEL: @xvssrani_w_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrani.w.d(<8 x i32> [[_1]], <8 x i32> [[_2]], i32 1) +-// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8i32 xvssrani_w_d(v8i32 _1, v8i32 _2) { return __lasx_xvssrani_w_d(_1, _2, 1); } +-// CHECK-LABEL: @xvssrani_d_q( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssrani.d.q(<4 x i64> [[_1]], <4 x i64> [[_2]], i32 1) +-// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvssrani_d_q(v4i64 _1, v4i64 _2) { return __lasx_xvssrani_d_q(_1, _2, 1); } +-// CHECK-LABEL: @xvssrani_bu_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrani.bu.h(<32 x i8> [[_1]], <32 x i8> [[_2]], i32 1) +-// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v32u8 xvssrani_bu_h(v32u8 _1, v32i8 _2) { return __lasx_xvssrani_bu_h(_1, _2, 1); } +-// CHECK-LABEL: @xvssrani_hu_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrani.hu.w(<16 x i16> [[_1]], <16 x i16> [[_2]], i32 1) +-// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v16u16 xvssrani_hu_w(v16u16 _1, v16i16 _2) { return __lasx_xvssrani_hu_w(_1, _2, 1); } +-// CHECK-LABEL: @xvssrani_wu_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrani.wu.d(<8 x i32> [[_1]], <8 x i32> [[_2]], i32 1) +-// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8u32 xvssrani_wu_d(v8u32 _1, v8i32 _2) { return __lasx_xvssrani_wu_d(_1, _2, 1); } +-// CHECK-LABEL: @xvssrani_du_q( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssrani.du.q(<4 x i64> [[_1]], <4 x i64> [[_2]], i32 1) +-// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4u64 xvssrani_du_q(v4u64 _1, v4i64 _2) { return __lasx_xvssrani_du_q(_1, _2, 1); } +-// CHECK-LABEL: @xvssrarni_b_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrarni.b.h(<32 x i8> [[_1]], <32 x i8> [[_2]], i32 1) +-// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v32i8 xvssrarni_b_h(v32i8 _1, v32i8 _2) { return __lasx_xvssrarni_b_h(_1, _2, 1); } +-// CHECK-LABEL: @xvssrarni_h_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrarni.h.w(<16 x i16> [[_1]], <16 x i16> [[_2]], i32 1) +-// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v16i16 xvssrarni_h_w(v16i16 _1, v16i16 _2) { return __lasx_xvssrarni_h_w(_1, _2, 1); } +-// CHECK-LABEL: @xvssrarni_w_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrarni.w.d(<8 x i32> [[_1]], <8 x i32> [[_2]], i32 1) +-// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8i32 xvssrarni_w_d(v8i32 _1, v8i32 _2) { return __lasx_xvssrarni_w_d(_1, _2, 1); } +-// CHECK-LABEL: @xvssrarni_d_q( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssrarni.d.q(<4 x i64> [[_1]], <4 x i64> [[_2]], i32 1) +-// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvssrarni_d_q(v4i64 _1, v4i64 _2) { return __lasx_xvssrarni_d_q(_1, _2, 1); } +-// CHECK-LABEL: @xvssrarni_bu_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrarni.bu.h(<32 x i8> [[_1]], <32 x i8> [[_2]], i32 1) +-// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v32u8 xvssrarni_bu_h(v32u8 _1, v32i8 _2) { return __lasx_xvssrarni_bu_h(_1, _2, 1); } +-// CHECK-LABEL: @xvssrarni_hu_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrarni.hu.w(<16 x i16> [[_1]], <16 x i16> [[_2]], i32 1) +-// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v16u16 xvssrarni_hu_w(v16u16 _1, v16i16 _2) { return __lasx_xvssrarni_hu_w(_1, _2, 1); } +-// CHECK-LABEL: @xvssrarni_wu_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrarni.wu.d(<8 x i32> [[_1]], <8 x i32> [[_2]], i32 1) +-// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8u32 xvssrarni_wu_d(v8u32 _1, v8i32 _2) { return __lasx_xvssrarni_wu_d(_1, _2, 1); } +-// CHECK-LABEL: @xvssrarni_du_q( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssrarni.du.q(<4 x i64> [[_1]], <4 x i64> [[_2]], i32 1) +-// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4u64 xvssrarni_du_q(v4u64 _1, v4i64 _2) { return __lasx_xvssrarni_du_q(_1, _2, 1); } +-// CHECK-LABEL: @xbnz_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lasx.xbnz.b(<32 x i8> [[_1]]) +-// CHECK-NEXT: ret i32 [[TMP1]] +-// +-int xbnz_b(v32u8 _1) { return __lasx_xbnz_b(_1); } +-// CHECK-LABEL: @xbnz_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lasx.xbnz.d(<4 x i64> [[_1]]) +-// CHECK-NEXT: ret i32 [[TMP1]] +-// +-int xbnz_d(v4u64 _1) { return __lasx_xbnz_d(_1); } +-// CHECK-LABEL: @xbnz_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lasx.xbnz.h(<16 x i16> [[_1]]) +-// CHECK-NEXT: ret i32 [[TMP1]] +-// +-int xbnz_h(v16u16 _1) { return __lasx_xbnz_h(_1); } +-// CHECK-LABEL: @xbnz_v( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lasx.xbnz.v(<32 x i8> [[_1]]) +-// CHECK-NEXT: ret i32 [[TMP1]] +-// +-int xbnz_v(v32u8 _1) { return __lasx_xbnz_v(_1); } +-// CHECK-LABEL: @xbnz_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lasx.xbnz.w(<8 x i32> [[_1]]) +-// CHECK-NEXT: ret i32 [[TMP1]] +-// +-int xbnz_w(v8u32 _1) { return __lasx_xbnz_w(_1); } +-// CHECK-LABEL: @xbz_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lasx.xbz.b(<32 x i8> [[_1]]) +-// CHECK-NEXT: ret i32 [[TMP1]] +-// +-int xbz_b(v32u8 _1) { return __lasx_xbz_b(_1); } +-// CHECK-LABEL: @xbz_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lasx.xbz.d(<4 x i64> [[_1]]) +-// CHECK-NEXT: ret i32 [[TMP1]] +-// +-int xbz_d(v4u64 _1) { return __lasx_xbz_d(_1); } +-// CHECK-LABEL: @xbz_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lasx.xbz.h(<16 x i16> [[_1]]) +-// CHECK-NEXT: ret i32 [[TMP1]] +-// +-int xbz_h(v16u16 _1) { return __lasx_xbz_h(_1); } +-// CHECK-LABEL: @xbz_v( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lasx.xbz.v(<32 x i8> [[_1]]) +-// CHECK-NEXT: ret i32 [[TMP1]] +-// +-int xbz_v(v32u8 _1) { return __lasx_xbz_v(_1); } +-// CHECK-LABEL: @xbz_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lasx.xbz.w(<8 x i32> [[_1]]) +-// CHECK-NEXT: ret i32 [[TMP1]] +-// +-int xbz_w(v8u32 _1) { return __lasx_xbz_w(_1); } +-// CHECK-LABEL: @xvfcmp_caf_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.caf.d(<4 x double> [[_1]], <4 x double> [[_2]]) +-// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvfcmp_caf_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_caf_d(_1, _2); } +-// CHECK-LABEL: @xvfcmp_caf_s( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.caf.s(<8 x float> [[_1]], <8 x float> [[_2]]) +-// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8i32 xvfcmp_caf_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_caf_s(_1, _2); } +-// CHECK-LABEL: @xvfcmp_ceq_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.ceq.d(<4 x double> [[_1]], <4 x double> [[_2]]) +-// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvfcmp_ceq_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_ceq_d(_1, _2); } +-// CHECK-LABEL: @xvfcmp_ceq_s( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.ceq.s(<8 x float> [[_1]], <8 x float> [[_2]]) +-// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8i32 xvfcmp_ceq_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_ceq_s(_1, _2); } +-// CHECK-LABEL: @xvfcmp_cle_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cle.d(<4 x double> [[_1]], <4 x double> [[_2]]) +-// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvfcmp_cle_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_cle_d(_1, _2); } +-// CHECK-LABEL: @xvfcmp_cle_s( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cle.s(<8 x float> [[_1]], <8 x float> [[_2]]) +-// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8i32 xvfcmp_cle_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_cle_s(_1, _2); } +-// CHECK-LABEL: @xvfcmp_clt_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.clt.d(<4 x double> [[_1]], <4 x double> [[_2]]) +-// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvfcmp_clt_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_clt_d(_1, _2); } +-// CHECK-LABEL: @xvfcmp_clt_s( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.clt.s(<8 x float> [[_1]], <8 x float> [[_2]]) +-// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8i32 xvfcmp_clt_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_clt_s(_1, _2); } +-// CHECK-LABEL: @xvfcmp_cne_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cne.d(<4 x double> [[_1]], <4 x double> [[_2]]) +-// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvfcmp_cne_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_cne_d(_1, _2); } +-// CHECK-LABEL: @xvfcmp_cne_s( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cne.s(<8 x float> [[_1]], <8 x float> [[_2]]) +-// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8i32 xvfcmp_cne_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_cne_s(_1, _2); } +-// CHECK-LABEL: @xvfcmp_cor_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cor.d(<4 x double> [[_1]], <4 x double> [[_2]]) +-// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvfcmp_cor_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_cor_d(_1, _2); } +-// CHECK-LABEL: @xvfcmp_cor_s( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cor.s(<8 x float> [[_1]], <8 x float> [[_2]]) +-// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8i32 xvfcmp_cor_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_cor_s(_1, _2); } +-// CHECK-LABEL: @xvfcmp_cueq_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cueq.d(<4 x double> [[_1]], <4 x double> [[_2]]) +-// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvfcmp_cueq_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_cueq_d(_1, _2); } +-// CHECK-LABEL: @xvfcmp_cueq_s( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cueq.s(<8 x float> [[_1]], <8 x float> [[_2]]) +-// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8i32 xvfcmp_cueq_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_cueq_s(_1, _2); } +-// CHECK-LABEL: @xvfcmp_cule_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cule.d(<4 x double> [[_1]], <4 x double> [[_2]]) +-// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvfcmp_cule_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_cule_d(_1, _2); } +-// CHECK-LABEL: @xvfcmp_cule_s( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cule.s(<8 x float> [[_1]], <8 x float> [[_2]]) +-// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8i32 xvfcmp_cule_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_cule_s(_1, _2); } +-// CHECK-LABEL: @xvfcmp_cult_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cult.d(<4 x double> [[_1]], <4 x double> [[_2]]) +-// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvfcmp_cult_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_cult_d(_1, _2); } +-// CHECK-LABEL: @xvfcmp_cult_s( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cult.s(<8 x float> [[_1]], <8 x float> [[_2]]) +-// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8i32 xvfcmp_cult_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_cult_s(_1, _2); } +-// CHECK-LABEL: @xvfcmp_cun_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cun.d(<4 x double> [[_1]], <4 x double> [[_2]]) +-// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvfcmp_cun_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_cun_d(_1, _2); } +-// CHECK-LABEL: @xvfcmp_cune_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cune.d(<4 x double> [[_1]], <4 x double> [[_2]]) +-// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvfcmp_cune_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_cune_d(_1, _2); } +-// CHECK-LABEL: @xvfcmp_cune_s( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cune.s(<8 x float> [[_1]], <8 x float> [[_2]]) +-// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8i32 xvfcmp_cune_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_cune_s(_1, _2); } +-// CHECK-LABEL: @xvfcmp_cun_s( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cun.s(<8 x float> [[_1]], <8 x float> [[_2]]) +-// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8i32 xvfcmp_cun_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_cun_s(_1, _2); } +-// CHECK-LABEL: @xvfcmp_saf_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.saf.d(<4 x double> [[_1]], <4 x double> [[_2]]) +-// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvfcmp_saf_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_saf_d(_1, _2); } +-// CHECK-LABEL: @xvfcmp_saf_s( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.saf.s(<8 x float> [[_1]], <8 x float> [[_2]]) +-// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8i32 xvfcmp_saf_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_saf_s(_1, _2); } +-// CHECK-LABEL: @xvfcmp_seq_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.seq.d(<4 x double> [[_1]], <4 x double> [[_2]]) +-// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvfcmp_seq_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_seq_d(_1, _2); } +-// CHECK-LABEL: @xvfcmp_seq_s( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.seq.s(<8 x float> [[_1]], <8 x float> [[_2]]) +-// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8i32 xvfcmp_seq_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_seq_s(_1, _2); } +-// CHECK-LABEL: @xvfcmp_sle_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sle.d(<4 x double> [[_1]], <4 x double> [[_2]]) +-// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvfcmp_sle_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_sle_d(_1, _2); } +-// CHECK-LABEL: @xvfcmp_sle_s( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sle.s(<8 x float> [[_1]], <8 x float> [[_2]]) +-// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8i32 xvfcmp_sle_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_sle_s(_1, _2); } +-// CHECK-LABEL: @xvfcmp_slt_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.slt.d(<4 x double> [[_1]], <4 x double> [[_2]]) +-// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvfcmp_slt_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_slt_d(_1, _2); } +-// CHECK-LABEL: @xvfcmp_slt_s( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.slt.s(<8 x float> [[_1]], <8 x float> [[_2]]) +-// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8i32 xvfcmp_slt_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_slt_s(_1, _2); } +-// CHECK-LABEL: @xvfcmp_sne_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sne.d(<4 x double> [[_1]], <4 x double> [[_2]]) +-// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvfcmp_sne_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_sne_d(_1, _2); } +-// CHECK-LABEL: @xvfcmp_sne_s( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sne.s(<8 x float> [[_1]], <8 x float> [[_2]]) +-// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8i32 xvfcmp_sne_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_sne_s(_1, _2); } +-// CHECK-LABEL: @xvfcmp_sor_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sor.d(<4 x double> [[_1]], <4 x double> [[_2]]) +-// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvfcmp_sor_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_sor_d(_1, _2); } +-// CHECK-LABEL: @xvfcmp_sor_s( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sor.s(<8 x float> [[_1]], <8 x float> [[_2]]) +-// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8i32 xvfcmp_sor_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_sor_s(_1, _2); } +-// CHECK-LABEL: @xvfcmp_sueq_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sueq.d(<4 x double> [[_1]], <4 x double> [[_2]]) +-// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvfcmp_sueq_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_sueq_d(_1, _2); } +-// CHECK-LABEL: @xvfcmp_sueq_s( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sueq.s(<8 x float> [[_1]], <8 x float> [[_2]]) +-// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8i32 xvfcmp_sueq_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_sueq_s(_1, _2); } +-// CHECK-LABEL: @xvfcmp_sule_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sule.d(<4 x double> [[_1]], <4 x double> [[_2]]) +-// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvfcmp_sule_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_sule_d(_1, _2); } +-// CHECK-LABEL: @xvfcmp_sule_s( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sule.s(<8 x float> [[_1]], <8 x float> [[_2]]) +-// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8i32 xvfcmp_sule_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_sule_s(_1, _2); } +-// CHECK-LABEL: @xvfcmp_sult_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sult.d(<4 x double> [[_1]], <4 x double> [[_2]]) +-// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvfcmp_sult_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_sult_d(_1, _2); } +-// CHECK-LABEL: @xvfcmp_sult_s( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sult.s(<8 x float> [[_1]], <8 x float> [[_2]]) +-// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8i32 xvfcmp_sult_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_sult_s(_1, _2); } +-// CHECK-LABEL: @xvfcmp_sun_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sun.d(<4 x double> [[_1]], <4 x double> [[_2]]) +-// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvfcmp_sun_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_sun_d(_1, _2); } +-// CHECK-LABEL: @xvfcmp_sune_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sune.d(<4 x double> [[_1]], <4 x double> [[_2]]) +-// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvfcmp_sune_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_sune_d(_1, _2); } +-// CHECK-LABEL: @xvfcmp_sune_s( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sune.s(<8 x float> [[_1]], <8 x float> [[_2]]) +-// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8i32 xvfcmp_sune_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_sune_s(_1, _2); } +-// CHECK-LABEL: @xvfcmp_sun_s( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sun.s(<8 x float> [[_1]], <8 x float> [[_2]]) +-// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8i32 xvfcmp_sun_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_sun_s(_1, _2); } +-// CHECK-LABEL: @xvpickve_d_f( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvpickve.d.f(<4 x double> [[_1]], i32 1) +-// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4f64 xvpickve_d_f(v4f64 _1) { return __lasx_xvpickve_d_f(_1, 1); } +-// CHECK-LABEL: @xvpickve_w_f( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvpickve.w.f(<8 x float> [[_1]], i32 1) +-// CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8f32 xvpickve_w_f(v8f32 _1) { return __lasx_xvpickve_w_f(_1, 1); } +-// CHECK-LABEL: @xvrepli_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvrepli.b(i32 1) +-// CHECK-NEXT: store <32 x i8> [[TMP0]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v32i8 xvrepli_b() { return __lasx_xvrepli_b(1); } +-// CHECK-LABEL: @xvrepli_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvrepli.d(i32 1) +-// CHECK-NEXT: store <4 x i64> [[TMP0]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvrepli_d() { return __lasx_xvrepli_d(1); } +-// CHECK-LABEL: @xvrepli_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvrepli.h(i32 1) +-// CHECK-NEXT: store <16 x i16> [[TMP0]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v16i16 xvrepli_h() { return __lasx_xvrepli_h(1); } +-// CHECK-LABEL: @xvrepli_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvrepli.w(i32 1) +-// CHECK-NEXT: store <8 x i32> [[TMP0]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8i32 xvrepli_w() { return __lasx_xvrepli_w(1); } +diff --git a/clang/test/CodeGen/LoongArch/lasx/builtin-approximate-alias.c b/clang/test/CodeGen/LoongArch/lasx/builtin-approximate-alias.c +deleted file mode 100644 +index b79f93940..000000000 +--- a/clang/test/CodeGen/LoongArch/lasx/builtin-approximate-alias.c ++++ /dev/null +@@ -1,37 +0,0 @@ +-// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py +-// RUN: %clang_cc1 -triple loongarch64 -target-feature +lasx -target-feature +frecipe -O2 -emit-llvm %s -o - | FileCheck %s +- +-#include +- +-// CHECK-LABEL: @xvfrecipe_s( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2:![0-9]+]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfrecipe.s(<8 x float> [[_1]]) +-// CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8f32 xvfrecipe_s(v8f32 _1) { return __lasx_xvfrecipe_s(_1); } +-// CHECK-LABEL: @xvfrecipe_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2:![0-9]+]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfrecipe.d(<4 x double> [[_1]]) +-// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4f64 xvfrecipe_d(v4f64 _1) { return __lasx_xvfrecipe_d(_1); } +-// CHECK-LABEL: @xvfrsqrte_s( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2:![0-9]+]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfrsqrte.s(<8 x float> [[_1]]) +-// CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8f32 xvfrsqrte_s(v8f32 _1) { return __lasx_xvfrsqrte_s(_1); } +-// CHECK-LABEL: @xvfrsqrte_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2:![0-9]+]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfrsqrte.d(<4 x double> [[_1]]) +-// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4f64 xvfrsqrte_d(v4f64 _1) { return __lasx_xvfrsqrte_d(_1); } +diff --git a/clang/test/CodeGen/LoongArch/lasx/builtin-approximate.c b/clang/test/CodeGen/LoongArch/lasx/builtin-approximate.c +index 63e9ba639..24b57527b 100644 +--- a/clang/test/CodeGen/LoongArch/lasx/builtin-approximate.c ++++ b/clang/test/CodeGen/LoongArch/lasx/builtin-approximate.c +@@ -1,38 +1,30 @@ + // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py +-// RUN: %clang_cc1 -triple loongarch64 -target-feature +lasx -target-feature +frecipe -O2 -emit-llvm %s -o - | FileCheck %s ++// RUN: %clang_cc1 -triple loongarch64 -target-feature +lasx -O2 -emit-llvm %s -o - | FileCheck %s + + typedef float v8f32 __attribute__((vector_size(32), aligned(32))); + typedef double v4f64 __attribute__((vector_size(32), aligned(32))); + +-// CHECK-LABEL: @xvfrecipe_s ++// CHECK-LABEL: @xvfrecipe_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2:![0-9]+]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfrecipe.s(<8 x float> [[_1]]) +-// CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfrecipe.s(<8 x float> [[_1:%.*]]) ++// CHECK-NEXT: ret <8 x float> [[TMP0]] + // + v8f32 xvfrecipe_s(v8f32 _1) { return __builtin_lasx_xvfrecipe_s(_1); } +-// CHECK-LABEL: @xvfrecipe_d ++// CHECK-LABEL: @xvfrecipe_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2:![0-9]+]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfrecipe.d(<4 x double> [[_1]]) +-// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfrecipe.d(<4 x double> [[_1:%.*]]) ++// CHECK-NEXT: ret <4 x double> [[TMP0]] + // + v4f64 xvfrecipe_d(v4f64 _1) { return __builtin_lasx_xvfrecipe_d(_1); } +-// CHECK-LABEL: @xvfrsqrte_s ++// CHECK-LABEL: @xvfrsqrte_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2:![0-9]+]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfrsqrte.s(<8 x float> [[_1]]) +-// CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfrsqrte.s(<8 x float> [[_1:%.*]]) ++// CHECK-NEXT: ret <8 x float> [[TMP0]] + // + v8f32 xvfrsqrte_s(v8f32 _1) { return __builtin_lasx_xvfrsqrte_s(_1); } +-// CHECK-LABEL: @xvfrsqrte_d ++// CHECK-LABEL: @xvfrsqrte_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2:![0-9]+]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfrsqrte.d(<4 x double> [[_1]]) +-// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfrsqrte.d(<4 x double> [[_1:%.*]]) ++// CHECK-NEXT: ret <4 x double> [[TMP0]] + // + v4f64 xvfrsqrte_d(v4f64 _1) { return __builtin_lasx_xvfrsqrte_d(_1); } +diff --git a/clang/test/CodeGen/LoongArch/lasx/builtin-error.c b/clang/test/CodeGen/LoongArch/lasx/builtin-error.c +deleted file mode 100644 +index 724484465..000000000 +--- a/clang/test/CodeGen/LoongArch/lasx/builtin-error.c ++++ /dev/null +@@ -1,1392 +0,0 @@ +-// RUN: %clang_cc1 -triple loongarch64 -target-feature +lasx -verify %s +- +-typedef signed char v32i8 __attribute__((vector_size(32), aligned(32))); +-typedef signed char v32i8_b __attribute__((vector_size(32), aligned(1))); +-typedef unsigned char v32u8 __attribute__((vector_size(32), aligned(32))); +-typedef unsigned char v32u8_b __attribute__((vector_size(32), aligned(1))); +-typedef short v16i16 __attribute__((vector_size(32), aligned(32))); +-typedef short v16i16_h __attribute__((vector_size(32), aligned(2))); +-typedef unsigned short v16u16 __attribute__((vector_size(32), aligned(32))); +-typedef unsigned short v16u16_h __attribute__((vector_size(32), aligned(2))); +-typedef int v8i32 __attribute__((vector_size(32), aligned(32))); +-typedef int v8i32_w __attribute__((vector_size(32), aligned(4))); +-typedef unsigned int v8u32 __attribute__((vector_size(32), aligned(32))); +-typedef unsigned int v8u32_w __attribute__((vector_size(32), aligned(4))); +-typedef long long v4i64 __attribute__((vector_size(32), aligned(32))); +-typedef long long v4i64_d __attribute__((vector_size(32), aligned(8))); +-typedef unsigned long long v4u64 __attribute__((vector_size(32), aligned(32))); +-typedef unsigned long long v4u64_d __attribute__((vector_size(32), aligned(8))); +-typedef float v8f32 __attribute__((vector_size(32), aligned(32))); +-typedef float v8f32_w __attribute__((vector_size(32), aligned(4))); +-typedef double v4f64 __attribute__((vector_size(32), aligned(32))); +-typedef double v4f64_d __attribute__((vector_size(32), aligned(8))); +- +-v32i8 xvslli_b(v32i8 _1, int var) { +- v32i8 res = __builtin_lasx_xvslli_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} +- res |= __builtin_lasx_xvslli_b(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} +- res |= __builtin_lasx_xvslli_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvslli_b' must be a constant integer}} +- return res; +-} +- +-v16i16 xvslli_h(v16i16 _1, int var) { +- v16i16 res = __builtin_lasx_xvslli_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} +- res |= __builtin_lasx_xvslli_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} +- res |= __builtin_lasx_xvslli_h(_1, var); // expected-error {{argument to '__builtin_lasx_xvslli_h' must be a constant integer}} +- return res; +-} +- +-v8i32 xvslli_w(v8i32 _1, int var) { +- v8i32 res = __builtin_lasx_xvslli_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} +- res |= __builtin_lasx_xvslli_w(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} +- res |= __builtin_lasx_xvslli_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvslli_w' must be a constant integer}} +- return res; +-} +- +-v4i64 xvslli_d(v4i64 _1, int var) { +- v4i64 res = __builtin_lasx_xvslli_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} +- res |= __builtin_lasx_xvslli_d(_1, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} +- res |= __builtin_lasx_xvslli_d(_1, var); // expected-error {{argument to '__builtin_lasx_xvslli_d' must be a constant integer}} +- return res; +-} +- +-v32i8 xvsrai_b(v32i8 _1, int var) { +- v32i8 res = __builtin_lasx_xvsrai_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} +- res |= __builtin_lasx_xvsrai_b(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} +- res |= __builtin_lasx_xvsrai_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvsrai_b' must be a constant integer}} +- return res; +-} +- +-v16i16 xvsrai_h(v16i16 _1, int var) { +- v16i16 res = __builtin_lasx_xvsrai_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} +- res |= __builtin_lasx_xvsrai_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} +- res |= __builtin_lasx_xvsrai_h(_1, var); // expected-error {{argument to '__builtin_lasx_xvsrai_h' must be a constant integer}} +- return res; +-} +- +-v8i32 xvsrai_w(v8i32 _1, int var) { +- v8i32 res = __builtin_lasx_xvsrai_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} +- res |= __builtin_lasx_xvsrai_w(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} +- res |= __builtin_lasx_xvsrai_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvsrai_w' must be a constant integer}} +- return res; +-} +- +-v4i64 xvsrai_d(v4i64 _1, int var) { +- v4i64 res = __builtin_lasx_xvsrai_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} +- res |= __builtin_lasx_xvsrai_d(_1, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} +- res |= __builtin_lasx_xvsrai_d(_1, var); // expected-error {{argument to '__builtin_lasx_xvsrai_d' must be a constant integer}} +- return res; +-} +- +-v32i8 xvsrari_b(v32i8 _1, int var) { +- v32i8 res = __builtin_lasx_xvsrari_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} +- res |= __builtin_lasx_xvsrari_b(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} +- res |= __builtin_lasx_xvsrari_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvsrari_b' must be a constant integer}} +- return res; +-} +- +-v16i16 xvsrari_h(v16i16 _1, int var) { +- v16i16 res = __builtin_lasx_xvsrari_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} +- res |= __builtin_lasx_xvsrari_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} +- res |= __builtin_lasx_xvsrari_h(_1, var); // expected-error {{argument to '__builtin_lasx_xvsrari_h' must be a constant integer}} +- return res; +-} +- +-v8i32 xvsrari_w(v8i32 _1, int var) { +- v8i32 res = __builtin_lasx_xvsrari_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} +- res |= __builtin_lasx_xvsrari_w(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} +- res |= __builtin_lasx_xvsrari_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvsrari_w' must be a constant integer}} +- return res; +-} +- +-v4i64 xvsrari_d(v4i64 _1, int var) { +- v4i64 res = __builtin_lasx_xvsrari_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} +- res |= __builtin_lasx_xvsrari_d(_1, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} +- res |= __builtin_lasx_xvsrari_d(_1, var); // expected-error {{argument to '__builtin_lasx_xvsrari_d' must be a constant integer}} +- return res; +-} +- +-v32i8 xvsrli_b(v32i8 _1, int var) { +- v32i8 res = __builtin_lasx_xvsrli_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} +- res |= __builtin_lasx_xvsrli_b(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} +- res |= __builtin_lasx_xvsrli_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvsrli_b' must be a constant integer}} +- return res; +-} +- +-v16i16 xvsrli_h(v16i16 _1, int var) { +- v16i16 res = __builtin_lasx_xvsrli_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} +- res |= __builtin_lasx_xvsrli_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} +- res |= __builtin_lasx_xvsrli_h(_1, var); // expected-error {{argument to '__builtin_lasx_xvsrli_h' must be a constant integer}} +- return res; +-} +- +-v8i32 xvsrli_w(v8i32 _1, int var) { +- v8i32 res = __builtin_lasx_xvsrli_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} +- res |= __builtin_lasx_xvsrli_w(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} +- res |= __builtin_lasx_xvsrli_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvsrli_w' must be a constant integer}} +- return res; +-} +- +-v4i64 xvsrli_d(v4i64 _1, int var) { +- v4i64 res = __builtin_lasx_xvsrli_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} +- res |= __builtin_lasx_xvsrli_d(_1, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} +- res |= __builtin_lasx_xvsrli_d(_1, var); // expected-error {{argument to '__builtin_lasx_xvsrli_d' must be a constant integer}} +- return res; +-} +- +-v32i8 xvsrlri_b(v32i8 _1, int var) { +- v32i8 res = __builtin_lasx_xvsrlri_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} +- res |= __builtin_lasx_xvsrlri_b(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} +- res |= __builtin_lasx_xvsrlri_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvsrlri_b' must be a constant integer}} +- return res; +-} +- +-v16i16 xvsrlri_h(v16i16 _1, int var) { +- v16i16 res = __builtin_lasx_xvsrlri_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} +- res |= __builtin_lasx_xvsrlri_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} +- res |= __builtin_lasx_xvsrlri_h(_1, var); // expected-error {{argument to '__builtin_lasx_xvsrlri_h' must be a constant integer}} +- return res; +-} +- +-v8i32 xvsrlri_w(v8i32 _1, int var) { +- v8i32 res = __builtin_lasx_xvsrlri_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} +- res |= __builtin_lasx_xvsrlri_w(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} +- res |= __builtin_lasx_xvsrlri_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvsrlri_w' must be a constant integer}} +- return res; +-} +- +-v4i64 xvsrlri_d(v4i64 _1, int var) { +- v4i64 res = __builtin_lasx_xvsrlri_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} +- res |= __builtin_lasx_xvsrlri_d(_1, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} +- res |= __builtin_lasx_xvsrlri_d(_1, var); // expected-error {{argument to '__builtin_lasx_xvsrlri_d' must be a constant integer}} +- return res; +-} +- +-v32u8 xvbitclri_b(v32u8 _1, int var) { +- v32u8 res = __builtin_lasx_xvbitclri_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} +- res |= __builtin_lasx_xvbitclri_b(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} +- res |= __builtin_lasx_xvbitclri_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvbitclri_b' must be a constant integer}} +- return res; +-} +- +-v16u16 xvbitclri_h(v16u16 _1, int var) { +- v16u16 res = __builtin_lasx_xvbitclri_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} +- res |= __builtin_lasx_xvbitclri_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} +- res |= __builtin_lasx_xvbitclri_h(_1, var); // expected-error {{argument to '__builtin_lasx_xvbitclri_h' must be a constant integer}} +- return res; +-} +- +-v8u32 xvbitclri_w(v8u32 _1, int var) { +- v8u32 res = __builtin_lasx_xvbitclri_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} +- res |= __builtin_lasx_xvbitclri_w(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} +- res |= __builtin_lasx_xvbitclri_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvbitclri_w' must be a constant integer}} +- return res; +-} +- +-v4u64 xvbitclri_d(v4u64 _1, int var) { +- v4u64 res = __builtin_lasx_xvbitclri_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} +- res |= __builtin_lasx_xvbitclri_d(_1, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} +- res |= __builtin_lasx_xvbitclri_d(_1, var); // expected-error {{argument to '__builtin_lasx_xvbitclri_d' must be a constant integer}} +- return res; +-} +- +-v32u8 xvbitseti_b(v32u8 _1, int var) { +- v32u8 res = __builtin_lasx_xvbitseti_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} +- res |= __builtin_lasx_xvbitseti_b(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} +- res |= __builtin_lasx_xvbitseti_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvbitseti_b' must be a constant integer}} +- return res; +-} +- +-v16u16 xvbitseti_h(v16u16 _1, int var) { +- v16u16 res = __builtin_lasx_xvbitseti_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} +- res |= __builtin_lasx_xvbitseti_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} +- res |= __builtin_lasx_xvbitseti_h(_1, var); // expected-error {{argument to '__builtin_lasx_xvbitseti_h' must be a constant integer}} +- return res; +-} +- +-v8u32 xvbitseti_w(v8u32 _1, int var) { +- v8u32 res = __builtin_lasx_xvbitseti_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} +- res |= __builtin_lasx_xvbitseti_w(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} +- res |= __builtin_lasx_xvbitseti_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvbitseti_w' must be a constant integer}} +- return res; +-} +- +-v4u64 xvbitseti_d(v4u64 _1, int var) { +- v4u64 res = __builtin_lasx_xvbitseti_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} +- res |= __builtin_lasx_xvbitseti_d(_1, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} +- res |= __builtin_lasx_xvbitseti_d(_1, var); // expected-error {{argument to '__builtin_lasx_xvbitseti_d' must be a constant integer}} +- return res; +-} +- +-v32u8 xvbitrevi_b(v32u8 _1, int var) { +- v32u8 res = __builtin_lasx_xvbitrevi_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} +- res |= __builtin_lasx_xvbitrevi_b(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} +- res |= __builtin_lasx_xvbitrevi_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvbitrevi_b' must be a constant integer}} +- return res; +-} +- +-v16u16 xvbitrevi_h(v16u16 _1, int var) { +- v16u16 res = __builtin_lasx_xvbitrevi_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} +- res |= __builtin_lasx_xvbitrevi_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} +- res |= __builtin_lasx_xvbitrevi_h(_1, var); // expected-error {{argument to '__builtin_lasx_xvbitrevi_h' must be a constant integer}} +- return res; +-} +- +-v8u32 xvbitrevi_w(v8u32 _1, int var) { +- v8u32 res = __builtin_lasx_xvbitrevi_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} +- res |= __builtin_lasx_xvbitrevi_w(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} +- res |= __builtin_lasx_xvbitrevi_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvbitrevi_w' must be a constant integer}} +- return res; +-} +- +-v4u64 xvbitrevi_d(v4u64 _1, int var) { +- v4u64 res = __builtin_lasx_xvbitrevi_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} +- res |= __builtin_lasx_xvbitrevi_d(_1, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} +- res |= __builtin_lasx_xvbitrevi_d(_1, var); // expected-error {{argument to '__builtin_lasx_xvbitrevi_d' must be a constant integer}} +- return res; +-} +- +-v32i8 xvaddi_bu(v32i8 _1, int var) { +- v32i8 res = __builtin_lasx_xvaddi_bu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} +- res |= __builtin_lasx_xvaddi_bu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} +- res |= __builtin_lasx_xvaddi_bu(_1, var); // expected-error {{argument to '__builtin_lasx_xvaddi_bu' must be a constant integer}} +- return res; +-} +- +-v16i16 xvaddi_hu(v16i16 _1, int var) { +- v16i16 res = __builtin_lasx_xvaddi_hu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} +- res |= __builtin_lasx_xvaddi_hu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} +- res |= __builtin_lasx_xvaddi_hu(_1, var); // expected-error {{argument to '__builtin_lasx_xvaddi_hu' must be a constant integer}} +- return res; +-} +- +-v8i32 xvaddi_wu(v8i32 _1, int var) { +- v8i32 res = __builtin_lasx_xvaddi_wu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} +- res |= __builtin_lasx_xvaddi_wu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} +- res |= __builtin_lasx_xvaddi_wu(_1, var); // expected-error {{argument to '__builtin_lasx_xvaddi_wu' must be a constant integer}} +- return res; +-} +- +-v4i64 xvaddi_du(v4i64 _1, int var) { +- v4i64 res = __builtin_lasx_xvaddi_du(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} +- res |= __builtin_lasx_xvaddi_du(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} +- res |= __builtin_lasx_xvaddi_du(_1, var); // expected-error {{argument to '__builtin_lasx_xvaddi_du' must be a constant integer}} +- return res; +-} +- +-v32i8 xvsubi_bu(v32i8 _1, int var) { +- v32i8 res = __builtin_lasx_xvsubi_bu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} +- res |= __builtin_lasx_xvsubi_bu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} +- res |= __builtin_lasx_xvsubi_bu(_1, var); // expected-error {{argument to '__builtin_lasx_xvsubi_bu' must be a constant integer}} +- return res; +-} +- +-v16i16 xvsubi_hu(v16i16 _1, int var) { +- v16i16 res = __builtin_lasx_xvsubi_hu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} +- res |= __builtin_lasx_xvsubi_hu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} +- res |= __builtin_lasx_xvsubi_hu(_1, var); // expected-error {{argument to '__builtin_lasx_xvsubi_hu' must be a constant integer}} +- return res; +-} +- +-v8i32 xvsubi_wu(v8i32 _1, int var) { +- v8i32 res = __builtin_lasx_xvsubi_wu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} +- res |= __builtin_lasx_xvsubi_wu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} +- res |= __builtin_lasx_xvsubi_wu(_1, var); // expected-error {{argument to '__builtin_lasx_xvsubi_wu' must be a constant integer}} +- return res; +-} +- +-v4i64 xvsubi_du(v4i64 _1, int var) { +- v4i64 res = __builtin_lasx_xvsubi_du(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} +- res |= __builtin_lasx_xvsubi_du(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} +- res |= __builtin_lasx_xvsubi_du(_1, var); // expected-error {{argument to '__builtin_lasx_xvsubi_du' must be a constant integer}} +- return res; +-} +- +-v32i8 xvmaxi_b(v32i8 _1, int var) { +- v32i8 res = __builtin_lasx_xvmaxi_b(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} +- res |= __builtin_lasx_xvmaxi_b(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} +- res |= __builtin_lasx_xvmaxi_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvmaxi_b' must be a constant integer}} +- return res; +-} +- +-v16i16 xvmaxi_h(v16i16 _1, int var) { +- v16i16 res = __builtin_lasx_xvmaxi_h(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} +- res |= __builtin_lasx_xvmaxi_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} +- res |= __builtin_lasx_xvmaxi_h(_1, var); // expected-error {{argument to '__builtin_lasx_xvmaxi_h' must be a constant integer}} +- return res; +-} +- +-v8i32 xvmaxi_w(v8i32 _1, int var) { +- v8i32 res = __builtin_lasx_xvmaxi_w(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} +- res |= __builtin_lasx_xvmaxi_w(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} +- res |= __builtin_lasx_xvmaxi_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvmaxi_w' must be a constant integer}} +- return res; +-} +- +-v4i64 xvmaxi_d(v4i64 _1, int var) { +- v4i64 res = __builtin_lasx_xvmaxi_d(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} +- res |= __builtin_lasx_xvmaxi_d(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} +- res |= __builtin_lasx_xvmaxi_d(_1, var); // expected-error {{argument to '__builtin_lasx_xvmaxi_d' must be a constant integer}} +- return res; +-} +- +-v32u8 xvmaxi_bu(v32u8 _1, int var) { +- v32u8 res = __builtin_lasx_xvmaxi_bu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} +- res |= __builtin_lasx_xvmaxi_bu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} +- res |= __builtin_lasx_xvmaxi_bu(_1, var); // expected-error {{argument to '__builtin_lasx_xvmaxi_bu' must be a constant integer}} +- return res; +-} +- +-v16u16 xvmaxi_hu(v16u16 _1, int var) { +- v16u16 res = __builtin_lasx_xvmaxi_hu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} +- res |= __builtin_lasx_xvmaxi_hu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} +- res |= __builtin_lasx_xvmaxi_hu(_1, var); // expected-error {{argument to '__builtin_lasx_xvmaxi_hu' must be a constant integer}} +- return res; +-} +- +-v8u32 xvmaxi_wu(v8u32 _1, int var) { +- v8u32 res = __builtin_lasx_xvmaxi_wu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} +- res |= __builtin_lasx_xvmaxi_wu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} +- res |= __builtin_lasx_xvmaxi_wu(_1, var); // expected-error {{argument to '__builtin_lasx_xvmaxi_wu' must be a constant integer}} +- return res; +-} +- +-v4u64 xvmaxi_du(v4u64 _1, int var) { +- v4u64 res = __builtin_lasx_xvmaxi_du(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} +- res |= __builtin_lasx_xvmaxi_du(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} +- res |= __builtin_lasx_xvmaxi_du(_1, var); // expected-error {{argument to '__builtin_lasx_xvmaxi_du' must be a constant integer}} +- return res; +-} +- +-v32i8 xvmini_b(v32i8 _1, int var) { +- v32i8 res = __builtin_lasx_xvmini_b(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} +- res |= __builtin_lasx_xvmini_b(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} +- res |= __builtin_lasx_xvmini_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvmini_b' must be a constant integer}} +- return res; +-} +- +-v16i16 xvmini_h(v16i16 _1, int var) { +- v16i16 res = __builtin_lasx_xvmini_h(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} +- res |= __builtin_lasx_xvmini_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} +- res |= __builtin_lasx_xvmini_h(_1, var); // expected-error {{argument to '__builtin_lasx_xvmini_h' must be a constant integer}}} +- return res; +-} +- +-v8i32 xvmini_w(v8i32 _1, int var) { +- v8i32 res = __builtin_lasx_xvmini_w(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} +- res |= __builtin_lasx_xvmini_w(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} +- res |= __builtin_lasx_xvmini_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvmini_w' must be a constant integer}} +- return res; +-} +- +-v4i64 xvmini_d(v4i64 _1, int var) { +- v4i64 res = __builtin_lasx_xvmini_d(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} +- res |= __builtin_lasx_xvmini_d(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} +- res |= __builtin_lasx_xvmini_d(_1, var); // expected-error {{argument to '__builtin_lasx_xvmini_d' must be a constant integer}} +- return res; +-} +- +-v32u8 xvmini_bu(v32u8 _1, int var) { +- v32u8 res = __builtin_lasx_xvmini_bu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} +- res |= __builtin_lasx_xvmini_bu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} +- res |= __builtin_lasx_xvmini_bu(_1, var); // expected-error {{argument to '__builtin_lasx_xvmini_bu' must be a constant integer}} +- return res; +-} +- +-v16u16 xvmini_hu(v16u16 _1, int var) { +- v16u16 res = __builtin_lasx_xvmini_hu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} +- res |= __builtin_lasx_xvmini_hu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} +- res |= __builtin_lasx_xvmini_hu(_1, var); // expected-error {{argument to '__builtin_lasx_xvmini_hu' must be a constant integer}} +- return res; +-} +- +-v8u32 xvmini_wu(v8u32 _1, int var) { +- v8u32 res = __builtin_lasx_xvmini_wu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} +- res |= __builtin_lasx_xvmini_wu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} +- res |= __builtin_lasx_xvmini_wu(_1, var); // expected-error {{argument to '__builtin_lasx_xvmini_wu' must be a constant integer}} +- return res; +-} +- +-v4u64 xvmini_du(v4u64 _1, int var) { +- v4u64 res = __builtin_lasx_xvmini_du(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} +- res |= __builtin_lasx_xvmini_du(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} +- res |= __builtin_lasx_xvmini_du(_1, var); // expected-error {{argument to '__builtin_lasx_xvmini_du' must be a constant integer}} +- return res; +-} +- +-v32i8 xvseqi_b(v32i8 _1, int var) { +- v32i8 res = __builtin_lasx_xvseqi_b(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} +- res |= __builtin_lasx_xvseqi_b(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} +- res |= __builtin_lasx_xvseqi_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvseqi_b' must be a constant integer}} +- return res; +-} +- +-v16i16 xvseqi_h(v16i16 _1, int var) { +- v16i16 res = __builtin_lasx_xvseqi_h(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} +- res |= __builtin_lasx_xvseqi_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} +- res |= __builtin_lasx_xvseqi_h(_1, var); // expected-error {{argument to '__builtin_lasx_xvseqi_h' must be a constant integer}} +- return res; +-} +- +-v8i32 xvseqi_w(v8i32 _1, int var) { +- v8i32 res = __builtin_lasx_xvseqi_w(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} +- res |= __builtin_lasx_xvseqi_w(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} +- res |= __builtin_lasx_xvseqi_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvseqi_w' must be a constant integer}} +- return res; +-} +- +-v4i64 xvseqi_d(v4i64 _1, int var) { +- v4i64 res = __builtin_lasx_xvseqi_d(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} +- res |= __builtin_lasx_xvseqi_d(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} +- res |= __builtin_lasx_xvseqi_d(_1, var); // expected-error {{argument to '__builtin_lasx_xvseqi_d' must be a constant integer}} +- return res; +-} +- +-v32i8 xvslti_b(v32i8 _1, int var) { +- v32i8 res = __builtin_lasx_xvslti_b(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} +- res |= __builtin_lasx_xvslti_b(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} +- res |= __builtin_lasx_xvslti_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvslti_b' must be a constant integer}} +- return res; +-} +- +-v16i16 xvslti_h(v16i16 _1, int var) { +- v16i16 res = __builtin_lasx_xvslti_h(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} +- res |= __builtin_lasx_xvslti_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} +- res |= __builtin_lasx_xvslti_h(_1, var); // expected-error {{argument to '__builtin_lasx_xvslti_h' must be a constant integer}} +- return res; +-} +- +-v8i32 xvslti_w(v8i32 _1, int var) { +- v8i32 res = __builtin_lasx_xvslti_w(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} +- res |= __builtin_lasx_xvslti_w(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} +- res |= __builtin_lasx_xvslti_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvslti_w' must be a constant integer}} +- return res; +-} +- +-v4i64 xvslti_d(v4i64 _1, int var) { +- v4i64 res = __builtin_lasx_xvslti_d(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} +- res |= __builtin_lasx_xvslti_d(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} +- res |= __builtin_lasx_xvslti_d(_1, var); // expected-error {{argument to '__builtin_lasx_xvslti_d' must be a constant integer}} +- return res; +-} +- +-v32i8 xvslti_bu(v32u8 _1, int var) { +- v32i8 res = __builtin_lasx_xvslti_bu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} +- res |= __builtin_lasx_xvslti_bu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} +- res |= __builtin_lasx_xvslti_bu(_1, var); // expected-error {{argument to '__builtin_lasx_xvslti_bu' must be a constant integer}} +- return res; +-} +- +-v16i16 xvslti_hu(v16u16 _1, int var) { +- v16i16 res = __builtin_lasx_xvslti_hu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} +- res |= __builtin_lasx_xvslti_hu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} +- res |= __builtin_lasx_xvslti_hu(_1, var); // expected-error {{argument to '__builtin_lasx_xvslti_hu' must be a constant integer}} +- return res; +-} +- +-v8i32 xvslti_wu(v8u32 _1, int var) { +- v8i32 res = __builtin_lasx_xvslti_wu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} +- res |= __builtin_lasx_xvslti_wu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} +- res |= __builtin_lasx_xvslti_wu(_1, var); // expected-error {{argument to '__builtin_lasx_xvslti_wu' must be a constant integer}} +- return res; +-} +- +-v4i64 xvslti_du(v4u64 _1, int var) { +- v4i64 res = __builtin_lasx_xvslti_du(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} +- res |= __builtin_lasx_xvslti_du(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} +- res |= __builtin_lasx_xvslti_du(_1, var); // expected-error {{argument to '__builtin_lasx_xvslti_du' must be a constant integer}} +- return res; +-} +- +-v32i8 xvslei_b(v32i8 _1, int var) { +- v32i8 res = __builtin_lasx_xvslei_b(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} +- res |= __builtin_lasx_xvslei_b(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} +- res |= __builtin_lasx_xvslei_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvslei_b' must be a constant integer}} +- return res; +-} +- +-v16i16 xvslei_h(v16i16 _1, int var) { +- v16i16 res = __builtin_lasx_xvslei_h(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} +- res |= __builtin_lasx_xvslei_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} +- res |= __builtin_lasx_xvslei_h(_1, var); // expected-error {{argument to '__builtin_lasx_xvslei_h' must be a constant integer}} +- return res; +-} +- +-v8i32 xvslei_w(v8i32 _1, int var) { +- v8i32 res = __builtin_lasx_xvslei_w(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} +- res |= __builtin_lasx_xvslei_w(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} +- res |= __builtin_lasx_xvslei_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvslei_w' must be a constant integer}} +- return res; +-} +- +-v4i64 xvslei_d(v4i64 _1, int var) { +- v4i64 res = __builtin_lasx_xvslei_d(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} +- res |= __builtin_lasx_xvslei_d(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} +- res |= __builtin_lasx_xvslei_d(_1, var); // expected-error {{argument to '__builtin_lasx_xvslei_d' must be a constant integer}} +- return res; +-} +- +-v32i8 xvslei_bu(v32u8 _1, int var) { +- v32i8 res = __builtin_lasx_xvslei_bu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} +- res |= __builtin_lasx_xvslei_bu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} +- res |= __builtin_lasx_xvslei_bu(_1, var); // expected-error {{argument to '__builtin_lasx_xvslei_bu' must be a constant integer}} +- return res; +-} +- +-v16i16 xvslei_hu(v16u16 _1, int var) { +- v16i16 res = __builtin_lasx_xvslei_hu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} +- res |= __builtin_lasx_xvslei_hu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} +- res |= __builtin_lasx_xvslei_hu(_1, var); // expected-error {{argument to '__builtin_lasx_xvslei_hu' must be a constant integer}} +- return res; +-} +- +-v8i32 xvslei_wu(v8u32 _1, int var) { +- v8i32 res = __builtin_lasx_xvslei_wu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} +- res |= __builtin_lasx_xvslei_wu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} +- res |= __builtin_lasx_xvslei_wu(_1, var); // expected-error {{argument to '__builtin_lasx_xvslei_wu' must be a constant integer}} +- return res; +-} +- +-v4i64 xvslei_du(v4u64 _1, int var) { +- v4i64 res = __builtin_lasx_xvslei_du(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} +- res |= __builtin_lasx_xvslei_du(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} +- res |= __builtin_lasx_xvslei_du(_1, var); // expected-error {{argument to '__builtin_lasx_xvslei_du' must be a constant integer}} +- return res; +-} +- +-v32i8 xvsat_b(v32i8 _1, int var) { +- v32i8 res = __builtin_lasx_xvsat_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} +- res |= __builtin_lasx_xvsat_b(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} +- res |= __builtin_lasx_xvsat_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvsat_b' must be a constant integer}} +- return res; +-} +- +-v16i16 xvsat_h(v16i16 _1, int var) { +- v16i16 res = __builtin_lasx_xvsat_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} +- res |= __builtin_lasx_xvsat_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} +- res |= __builtin_lasx_xvsat_h(_1, var); // expected-error {{argument to '__builtin_lasx_xvsat_h' must be a constant integer}} +- return res; +-} +- +-v8i32 xvsat_w(v8i32 _1, int var) { +- v8i32 res = __builtin_lasx_xvsat_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} +- res |= __builtin_lasx_xvsat_w(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} +- res |= __builtin_lasx_xvsat_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvsat_w' must be a constant integer}} +- return res; +-} +- +-v4i64 xvsat_d(v4i64 _1, int var) { +- v4i64 res = __builtin_lasx_xvsat_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} +- res |= __builtin_lasx_xvsat_d(_1, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} +- res |= __builtin_lasx_xvsat_d(_1, var); // expected-error {{argument to '__builtin_lasx_xvsat_d' must be a constant integer}} +- return res; +-} +- +-v32u8 xvsat_bu(v32u8 _1, int var) { +- v32u8 res = __builtin_lasx_xvsat_bu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} +- res |= __builtin_lasx_xvsat_bu(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} +- res |= __builtin_lasx_xvsat_bu(_1, var); // expected-error {{argument to '__builtin_lasx_xvsat_bu' must be a constant integer}} +- return res; +-} +- +-v16u16 xvsat_hu(v16u16 _1, int var) { +- v16u16 res = __builtin_lasx_xvsat_hu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} +- res |= __builtin_lasx_xvsat_hu(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} +- res |= __builtin_lasx_xvsat_hu(_1, var); // expected-error {{argument to '__builtin_lasx_xvsat_hu' must be a constant integer}} +- return res; +-} +- +-v8u32 xvsat_wu(v8u32 _1, int var) { +- v8u32 res = __builtin_lasx_xvsat_wu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} +- res |= __builtin_lasx_xvsat_wu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} +- res |= __builtin_lasx_xvsat_wu(_1, var); // expected-error {{argument to '__builtin_lasx_xvsat_wu' must be a constant integer}} +- return res; +-} +- +-v4u64 xvsat_du(v4u64 _1, int var) { +- v4u64 res = __builtin_lasx_xvsat_du(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} +- res |= __builtin_lasx_xvsat_du(_1, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} +- res |= __builtin_lasx_xvsat_du(_1, var); // expected-error {{argument to '__builtin_lasx_xvsat_du' must be a constant integer}} +- return res; +-} +- +-v32i8 xvrepl128vei_b(v32i8 _1, int var) { +- v32i8 res = __builtin_lasx_xvrepl128vei_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} +- res |= __builtin_lasx_xvrepl128vei_b(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} +- res |= __builtin_lasx_xvrepl128vei_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvrepl128vei_b' must be a constant integer}} +- return res; +-} +- +-v16i16 xvrepl128vei_h(v16i16 _1, int var) { +- v16i16 res = __builtin_lasx_xvrepl128vei_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} +- res |= __builtin_lasx_xvrepl128vei_h(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} +- res |= __builtin_lasx_xvrepl128vei_h(_1, var); // expected-error {{argument to '__builtin_lasx_xvrepl128vei_h' must be a constant integer}} +- return res; +-} +- +-v8i32 xvrepl128vei_w(v8i32 _1, int var) { +- v8i32 res = __builtin_lasx_xvrepl128vei_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 3]}} +- res |= __builtin_lasx_xvrepl128vei_w(_1, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}} +- res |= __builtin_lasx_xvrepl128vei_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvrepl128vei_w' must be a constant integer}} +- return res; +-} +- +-v4i64 xvrepl128vei_d(v4i64 _1, int var) { +- v4i64 res = __builtin_lasx_xvrepl128vei_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 1]}} +- res |= __builtin_lasx_xvrepl128vei_d(_1, 2); // expected-error {{argument value 2 is outside the valid range [0, 1]}} +- res |= __builtin_lasx_xvrepl128vei_d(_1, var); // expected-error {{argument to '__builtin_lasx_xvrepl128vei_d' must be a constant integer}} +- return res; +-} +- +-v32u8 xvandi_b(v32u8 _1, int var) { +- v32u8 res = __builtin_lasx_xvandi_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} +- res |= __builtin_lasx_xvandi_b(_1, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} +- res |= __builtin_lasx_xvandi_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvandi_b' must be a constant integer}} +- return res; +-} +- +-v32u8 xvori_b(v32u8 _1, int var) { +- v32u8 res = __builtin_lasx_xvori_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} +- res |= __builtin_lasx_xvori_b(_1, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} +- res |= __builtin_lasx_xvori_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvori_b' must be a constant integer}} +- return res; +-} +- +-v32u8 xvnori_b(v32u8 _1, int var) { +- v32u8 res = __builtin_lasx_xvnori_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} +- res |= __builtin_lasx_xvnori_b(_1, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} +- res |= __builtin_lasx_xvnori_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvnori_b' must be a constant integer}} +- return res; +-} +- +-v32u8 xvxori_b(v32u8 _1, int var) { +- v32u8 res = __builtin_lasx_xvxori_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} +- res |= __builtin_lasx_xvxori_b(_1, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} +- res |= __builtin_lasx_xvxori_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvxori_b' must be a constant integer}} +- return res; +-} +- +-v32u8 xvbitseli_b(v32u8 _1, v32u8 _2, int var) { +- v32u8 res = __builtin_lasx_xvbitseli_b(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} +- res |= __builtin_lasx_xvbitseli_b(_1, _2, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} +- res |= __builtin_lasx_xvbitseli_b(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvbitseli_b' must be a constant integer}} +- return res; +-} +- +-v32i8 xvshuf4i_b(v32i8 _1, int var) { +- v32i8 res = __builtin_lasx_xvshuf4i_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} +- res |= __builtin_lasx_xvshuf4i_b(_1, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} +- res |= __builtin_lasx_xvshuf4i_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvshuf4i_b' must be a constant integer}} +- return res; +-} +- +-v16i16 xvshuf4i_h(v16i16 _1, int var) { +- v16i16 res = __builtin_lasx_xvshuf4i_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} +- res |= __builtin_lasx_xvshuf4i_h(_1, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} +- res |= __builtin_lasx_xvshuf4i_h(_1, var); // expected-error {{argument to '__builtin_lasx_xvshuf4i_h' must be a constant integer}} +- return res; +-} +- +-v8i32 xvshuf4i_w(v8i32 _1, int var) { +- v8i32 res = __builtin_lasx_xvshuf4i_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} +- res |= __builtin_lasx_xvshuf4i_w(_1, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} +- res |= __builtin_lasx_xvshuf4i_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvshuf4i_w' must be a constant integer}} +- return res; +-} +- +-v4i64 xvshuf4i_d(v4i64 _1, v4i64 _2, int var) { +- v4i64 res = __builtin_lasx_xvshuf4i_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} +- res |= __builtin_lasx_xvshuf4i_d(_1, _2, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} +- res |= __builtin_lasx_xvshuf4i_d(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvshuf4i_d' must be a constant integer}} +- return res; +-} +- +-v8i32 xvpermi_w(v8i32 _1, v8i32 _2, int var) { +- v8i32 res = __builtin_lasx_xvpermi_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} +- res |= __builtin_lasx_xvpermi_w(_1, _2, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} +- res |= __builtin_lasx_xvpermi_w(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvpermi_w' must be a constant integer}} +- return res; +-} +- +-v4i64 xvpermi_d(v4i64 _1, int var) { +- v4i64 res = __builtin_lasx_xvpermi_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} +- res |= __builtin_lasx_xvpermi_d(_1, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} +- res |= __builtin_lasx_xvpermi_d(_1, var); // expected-error {{argument to '__builtin_lasx_xvpermi_d' must be a constant integer}} +- return res; +-} +- +-v32i8 xvpermi_q(v32i8 _1, v32i8 _2, int var) { +- v32i8 res = __builtin_lasx_xvpermi_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} +- res |= __builtin_lasx_xvpermi_q(_1, _2, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} +- res |= __builtin_lasx_xvpermi_q(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvpermi_q' must be a constant integer}} +- return res; +-} +- +-v16i16 xvsllwil_h_b(v32i8 _1, int var) { +- v16i16 res = __builtin_lasx_xvsllwil_h_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} +- res |= __builtin_lasx_xvsllwil_h_b(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} +- res |= __builtin_lasx_xvsllwil_h_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvsllwil_h_b' must be a constant integer}} +- return res; +-} +- +-v8i32 xvsllwil_w_h(v16i16 _1, int var) { +- v8i32 res = __builtin_lasx_xvsllwil_w_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} +- res |= __builtin_lasx_xvsllwil_w_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} +- res |= __builtin_lasx_xvsllwil_w_h(_1, var); // expected-error {{argument to '__builtin_lasx_xvsllwil_w_h' must be a constant integer}} +- return res; +-} +- +-v4i64 xvsllwil_d_w(v8i32 _1, int var) { +- v4i64 res = __builtin_lasx_xvsllwil_d_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} +- res |= __builtin_lasx_xvsllwil_d_w(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} +- res |= __builtin_lasx_xvsllwil_d_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvsllwil_d_w' must be a constant integer}} +- return res; +-} +- +-v16u16 xvsllwil_hu_bu(v32u8 _1, int var) { +- v16u16 res = __builtin_lasx_xvsllwil_hu_bu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} +- res |= __builtin_lasx_xvsllwil_hu_bu(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} +- res |= __builtin_lasx_xvsllwil_hu_bu(_1, var); // expected-error {{argument to '__builtin_lasx_xvsllwil_hu_bu' must be a constant integer}} +- return res; +-} +- +-v8u32 xvsllwil_wu_hu(v16u16 _1, int var) { +- v8u32 res = __builtin_lasx_xvsllwil_wu_hu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} +- res |= __builtin_lasx_xvsllwil_wu_hu(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} +- res |= __builtin_lasx_xvsllwil_wu_hu(_1, var); // expected-error {{argument to '__builtin_lasx_xvsllwil_wu_hu' must be a constant integer}} +- return res; +-} +- +-v4u64 xvsllwil_du_wu(v8u32 _1, int var) { +- v4u64 res = __builtin_lasx_xvsllwil_du_wu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} +- res |= __builtin_lasx_xvsllwil_du_wu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} +- res |= __builtin_lasx_xvsllwil_du_wu(_1, var); // expected-error {{argument to '__builtin_lasx_xvsllwil_du_wu' must be a constant integer}} +- return res; +-} +- +-v32i8 xvfrstpi_b(v32i8 _1, v32i8 _2, int var) { +- v32i8 res = __builtin_lasx_xvfrstpi_b(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} +- res |= __builtin_lasx_xvfrstpi_b(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} +- res |= __builtin_lasx_xvfrstpi_b(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvfrstpi_b' must be a constant integer}} +- return res; +-} +- +-v16i16 xvfrstpi_h(v16i16 _1, v16i16 _2, int var) { +- v16i16 res = __builtin_lasx_xvfrstpi_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} +- res |= __builtin_lasx_xvfrstpi_h(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} +- res |= __builtin_lasx_xvfrstpi_h(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvfrstpi_h' must be a constant integer}} +- return res; +-} +- +-v32i8 xvbsrl_v(v32i8 _1, int var) { +- v32i8 res = __builtin_lasx_xvbsrl_v(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} +- res |= __builtin_lasx_xvbsrl_v(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} +- res |= __builtin_lasx_xvbsrl_v(_1, var); // expected-error {{argument to '__builtin_lasx_xvbsrl_v' must be a constant integer}} +- return res; +-} +- +-v32i8 xvbsll_v(v32i8 _1, int var) { +- v32i8 res = __builtin_lasx_xvbsll_v(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} +- res |= __builtin_lasx_xvbsll_v(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} +- res |= __builtin_lasx_xvbsll_v(_1, var); // expected-error {{argument to '__builtin_lasx_xvbsll_v' must be a constant integer}} +- return res; +-} +- +-v32i8 xvextrins_b(v32i8 _1, v32i8 _2, int var) { +- v32i8 res = __builtin_lasx_xvextrins_b(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} +- res |= __builtin_lasx_xvextrins_b(_1, _2, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} +- res |= __builtin_lasx_xvextrins_b(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvextrins_b' must be a constant integer}} +- return res; +-} +- +-v16i16 xvextrins_h(v16i16 _1, v16i16 _2, int var) { +- v16i16 res = __builtin_lasx_xvextrins_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} +- res |= __builtin_lasx_xvextrins_h(_1, _2, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} +- res |= __builtin_lasx_xvextrins_h(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvextrins_h' must be a constant integer}} +- return res; +-} +- +-v8i32 xvextrins_w(v8i32 _1, v8i32 _2, int var) { +- v8i32 res = __builtin_lasx_xvextrins_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} +- res |= __builtin_lasx_xvextrins_w(_1, _2, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} +- res |= __builtin_lasx_xvextrins_w(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvextrins_w' must be a constant integer}} +- return res; +-} +- +-v4i64 xvextrins_d(v4i64 _1, v4i64 _2, int var) { +- v4i64 res = __builtin_lasx_xvextrins_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} +- res |= __builtin_lasx_xvextrins_d(_1, _2, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} +- res |= __builtin_lasx_xvextrins_d(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvextrins_d' must be a constant integer}} +- return res; +-} +- +-v32i8 xvld(void *_1, int var) { +- v32i8 res = __builtin_lasx_xvld(_1, -2049); // expected-error {{argument value -2049 is outside the valid range [-2048, 2047]}} +- res |= __builtin_lasx_xvld(_1, 2048); // expected-error {{argument value 2048 is outside the valid range [-2048, 2047]}} +- res |= __builtin_lasx_xvld(_1, var); // expected-error {{argument to '__builtin_lasx_xvld' must be a constant integer}} +- return res; +-} +- +-void xvst(v32i8 _1, void *_2, int var) { +- __builtin_lasx_xvst(_1, _2, -2049); // expected-error {{argument value -2049 is outside the valid range [-2048, 2047]}} +- __builtin_lasx_xvst(_1, _2, 2048); // expected-error {{argument value 2048 is outside the valid range [-2048, 2047]}} +- __builtin_lasx_xvst(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvst' must be a constant integer}} +-} +- +-void xvstelm_b(v32i8 _1, void * _2, int var) { +- __builtin_lasx_xvstelm_b(_1, _2, -129, 1); // expected-error {{argument value -129 is outside the valid range [-128, 127]}} +- __builtin_lasx_xvstelm_b(_1, _2, 128, 1); // expected-error {{argument value 128 is outside the valid range [-128, 127]}} +- __builtin_lasx_xvstelm_b(_1, _2, var, 1); // expected-error {{argument to '__builtin_lasx_xvstelm_b' must be a constant integer}} +-} +- +-void xvstelm_h(v16i16 _1, void * _2, int var) { +- __builtin_lasx_xvstelm_h(_1, _2, -258, 1); // expected-error {{argument value -258 is outside the valid range [-256, 254]}} +- __builtin_lasx_xvstelm_h(_1, _2, 256, 1); // expected-error {{argument value 256 is outside the valid range [-256, 254]}} +- __builtin_lasx_xvstelm_h(_1, _2, var, 1); // expected-error {{argument to '__builtin_lasx_xvstelm_h' must be a constant integer}} +-} +- +-void xvstelm_w(v8i32 _1, void * _2, int var) { +- __builtin_lasx_xvstelm_w(_1, _2, -516, 1); // expected-error {{argument value -516 is outside the valid range [-512, 508]}} +- __builtin_lasx_xvstelm_w(_1, _2, 512, 1); // expected-error {{argument value 512 is outside the valid range [-512, 508]}} +- __builtin_lasx_xvstelm_w(_1, _2, var, 1); // expected-error {{argument to '__builtin_lasx_xvstelm_w' must be a constant integer}} +-} +- +-void xvstelm_d(v4i64 _1, void * _2, int var) { +- __builtin_lasx_xvstelm_d(_1, _2, -1032, 1); // expected-error {{argument value -1032 is outside the valid range [-1024, 1016]}} +- __builtin_lasx_xvstelm_d(_1, _2, 1024, 1); // expected-error {{argument value 1024 is outside the valid range [-1024, 1016]}} +- __builtin_lasx_xvstelm_d(_1, _2, var, 1); // expected-error {{argument to '__builtin_lasx_xvstelm_d' must be a constant integer}} +-} +- +-void xvstelm_b_idx(v32i8 _1, void * _2, int var) { +- __builtin_lasx_xvstelm_b(_1, _2, 1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} +- __builtin_lasx_xvstelm_b(_1, _2, 1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} +- __builtin_lasx_xvstelm_b(_1, _2, 1, var); // expected-error {{argument to '__builtin_lasx_xvstelm_b' must be a constant integer}} +-} +- +-void xvstelm_h_idx(v16i16 _1, void * _2, int var) { +- __builtin_lasx_xvstelm_h(_1, _2, 2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} +- __builtin_lasx_xvstelm_h(_1, _2, 2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} +- __builtin_lasx_xvstelm_h(_1, _2, 2, var); // expected-error {{argument to '__builtin_lasx_xvstelm_h' must be a constant integer}} +-} +- +-void xvstelm_w_idx(v8i32 _1, void * _2, int var) { +- __builtin_lasx_xvstelm_w(_1, _2, 4, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} +- __builtin_lasx_xvstelm_w(_1, _2, 4, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} +- __builtin_lasx_xvstelm_w(_1, _2, 4, var); // expected-error {{argument to '__builtin_lasx_xvstelm_w' must be a constant integer}} +-} +- +-void xvstelm_d_idx(v4i64 _1, void * _2, int var) { +- __builtin_lasx_xvstelm_d(_1, _2, 8, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 3]}} +- __builtin_lasx_xvstelm_d(_1, _2, 8, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}} +- __builtin_lasx_xvstelm_d(_1, _2, 8, var); // expected-error {{argument to '__builtin_lasx_xvstelm_d' must be a constant integer}} +-} +- +-v8i32 xvinsve0_w(v8i32 _1, v8i32 _2, int var) { +- v8i32 res = __builtin_lasx_xvinsve0_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} +- res |= __builtin_lasx_xvinsve0_w(_1, _2, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} +- res |= __builtin_lasx_xvinsve0_w(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvinsve0_w' must be a constant integer}} +- return res; +-} +- +-v4i64 xvinsve0_d(v4i64 _1, v4i64 _2, int var) { +- v4i64 res = __builtin_lasx_xvinsve0_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 3]}} +- res |= __builtin_lasx_xvinsve0_d(_1, _2, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}} +- res |= __builtin_lasx_xvinsve0_d(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvinsve0_d' must be a constant integer}} +- return res; +-} +- +-v8i32 xvpickve_w(v8i32 _1, int var) { +- v8i32 res = __builtin_lasx_xvpickve_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} +- res |= __builtin_lasx_xvpickve_w(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} +- res |= __builtin_lasx_xvpickve_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvpickve_w' must be a constant integer}} +- return res; +-} +- +-v4i64 xvpickve_d(v4i64 _1, int var) { +- v4i64 res = __builtin_lasx_xvpickve_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 3]}} +- res |= __builtin_lasx_xvpickve_d(_1, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}} +- res |= __builtin_lasx_xvpickve_d(_1, var); // expected-error {{argument to '__builtin_lasx_xvpickve_d' must be a constant integer}} +- return res; +-} +- +-v4i64 xvldi(int var) { +- v4i64 res = __builtin_lasx_xvldi(-4097); // expected-error {{argument value -4097 is outside the valid range [-4096, 4095]}} +- res |= __builtin_lasx_xvldi(4096); // expected-error {{argument value 4096 is outside the valid range [-4096, 4095]}} +- res |= __builtin_lasx_xvldi(var); // expected-error {{argument to '__builtin_lasx_xvldi' must be a constant integer}} +- return res; +-} +- +-v8i32 xvinsgr2vr_w(v8i32 _1, int var) { +- v8i32 res = __builtin_lasx_xvinsgr2vr_w(_1, 1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} +- res |= __builtin_lasx_xvinsgr2vr_w(_1, 1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} +- res |= __builtin_lasx_xvinsgr2vr_w(_1, 1, var); // expected-error {{argument to '__builtin_lasx_xvinsgr2vr_w' must be a constant integer}} +- return res; +-} +- +-v4i64 xvinsgr2vr_d(v4i64 _1, int var) { +- v4i64 res = __builtin_lasx_xvinsgr2vr_d(_1, 1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 3]}} +- res |= __builtin_lasx_xvinsgr2vr_d(_1, 1, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}} +- res |= __builtin_lasx_xvinsgr2vr_d(_1, 1, var); // expected-error {{argument to '__builtin_lasx_xvinsgr2vr_d' must be a constant integer}} +- return res; +-} +- +-v32i8 xvldrepl_b(void *_1, int var) { +- v32i8 res = __builtin_lasx_xvldrepl_b(_1, -2049); // expected-error {{argument value -2049 is outside the valid range [-2048, 2047]}} +- res |= __builtin_lasx_xvldrepl_b(_1, 2048); // expected-error {{argument value 2048 is outside the valid range [-2048, 2047]}} +- res |= __builtin_lasx_xvldrepl_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvldrepl_b' must be a constant integer}} +- return res; +-} +- +-v16i16 xvldrepl_h(void *_1, int var) { +- v16i16 res = __builtin_lasx_xvldrepl_h(_1, -2050); // expected-error {{argument value -2050 is outside the valid range [-2048, 2046]}} +- res |= __builtin_lasx_xvldrepl_h(_1, 2048); // expected-error {{argument value 2048 is outside the valid range [-2048, 2046]}} +- res |= __builtin_lasx_xvldrepl_h(_1, var); // expected-error {{argument to '__builtin_lasx_xvldrepl_h' must be a constant integer}} +- return res; +-} +- +-v8i32 xvldrepl_w(void *_1, int var) { +- v8i32 res = __builtin_lasx_xvldrepl_w(_1, -2052); // expected-error {{argument value -2052 is outside the valid range [-2048, 2044]}} +- res |= __builtin_lasx_xvldrepl_w(_1, 2048); // expected-error {{argument value 2048 is outside the valid range [-2048, 2044]}} +- res |= __builtin_lasx_xvldrepl_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvldrepl_w' must be a constant integer}} +- return res; +-} +- +-v4i64 xvldrepl_d(void *_1, int var) { +- v4i64 res = __builtin_lasx_xvldrepl_d(_1, -2056); // expected-error {{argument value -2056 is outside the valid range [-2048, 2040]}} +- res |= __builtin_lasx_xvldrepl_d(_1, 2048); // expected-error {{argument value 2048 is outside the valid range [-2048, 2040]}} +- res |= __builtin_lasx_xvldrepl_d(_1, var); // expected-error {{argument to '__builtin_lasx_xvldrepl_d' must be a constant integer}} +- return res; +-} +- +-int xvpickve2gr_w(v8i32 _1, int var) { +- int res = __builtin_lasx_xvpickve2gr_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} +- res |= __builtin_lasx_xvpickve2gr_w(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} +- res |= __builtin_lasx_xvpickve2gr_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvpickve2gr_w' must be a constant integer}} +- return res; +-} +- +-unsigned int xvpickve2gr_wu(v8i32 _1, int var) { +- unsigned int res = __builtin_lasx_xvpickve2gr_wu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} +- res |= __builtin_lasx_xvpickve2gr_wu(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} +- res |= __builtin_lasx_xvpickve2gr_wu(_1, var); // expected-error {{argument to '__builtin_lasx_xvpickve2gr_wu' must be a constant integer}} +- return res; +-} +- +-long xvpickve2gr_d(v4i64 _1, int var) { +- long res = __builtin_lasx_xvpickve2gr_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 3]}} +- res |= __builtin_lasx_xvpickve2gr_d(_1, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}} +- res |= __builtin_lasx_xvpickve2gr_d(_1, var); // expected-error {{argument to '__builtin_lasx_xvpickve2gr_d' must be a constant integer}} +- return res; +-} +- +-unsigned long int xvpickve2gr_du(v4i64 _1, int var) { +- unsigned long int res = __builtin_lasx_xvpickve2gr_du(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 3]}} +- res |= __builtin_lasx_xvpickve2gr_du(_1, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}} +- res |= __builtin_lasx_xvpickve2gr_du(_1, var); // expected-error {{argument to '__builtin_lasx_xvpickve2gr_du' must be a constant integer}} +- return res; +-} +- +-v32i8 xvrotri_b(v32i8 _1, int var) { +- v32i8 res = __builtin_lasx_xvrotri_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} +- res |= __builtin_lasx_xvrotri_b(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} +- res |= __builtin_lasx_xvrotri_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvrotri_b' must be a constant integer}} +- return res; +-} +- +-v16i16 xvrotri_h(v16i16 _1, int var) { +- v16i16 res = __builtin_lasx_xvrotri_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} +- res |= __builtin_lasx_xvrotri_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} +- res |= __builtin_lasx_xvrotri_h(_1, var); // expected-error {{argument to '__builtin_lasx_xvrotri_h' must be a constant integer}} +- return res; +-} +- +-v8i32 xvrotri_w(v8i32 _1, int var) { +- v8i32 res = __builtin_lasx_xvrotri_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} +- res |= __builtin_lasx_xvrotri_w(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} +- res |= __builtin_lasx_xvrotri_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvrotri_w' must be a constant integer}} +- return res; +-} +- +-v4i64 xvrotri_d(v4i64 _1, int var) { +- v4i64 res = __builtin_lasx_xvrotri_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} +- res |= __builtin_lasx_xvrotri_d(_1, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} +- res |= __builtin_lasx_xvrotri_d(_1, var); // expected-error {{argument to '__builtin_lasx_xvrotri_d' must be a constant integer}} +- return res; +-} +- +-v32i8 xvsrlni_b_h(v32i8 _1, v32i8 _2, int var) { +- v32i8 res = __builtin_lasx_xvsrlni_b_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} +- res |= __builtin_lasx_xvsrlni_b_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} +- res |= __builtin_lasx_xvsrlni_b_h(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvsrlni_b_h' must be a constant integer}} +- return res; +-} +- +-v16i16 xvsrlni_h_w(v16i16 _1, v16i16 _2, int var) { +- v16i16 res = __builtin_lasx_xvsrlni_h_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} +- res |= __builtin_lasx_xvsrlni_h_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} +- res |= __builtin_lasx_xvsrlni_h_w(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvsrlni_h_w' must be a constant integer}} +- return res; +-} +- +-v8i32 xvsrlni_w_d(v8i32 _1, v8i32 _2, int var) { +- v8i32 res = __builtin_lasx_xvsrlni_w_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} +- res |= __builtin_lasx_xvsrlni_w_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} +- res |= __builtin_lasx_xvsrlni_w_d(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvsrlni_w_d' must be a constant integer}} +- return res; +-} +- +-v4i64 xvsrlni_d_q(v4i64 _1, v4i64 _2, int var) { +- v4i64 res = __builtin_lasx_xvsrlni_d_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} +- res |= __builtin_lasx_xvsrlni_d_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} +- res |= __builtin_lasx_xvsrlni_d_q(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvsrlni_d_q' must be a constant integer}} +- return res; +-} +- +-v32i8 xvsrlrni_b_h(v32i8 _1, v32i8 _2, int var) { +- v32i8 res = __builtin_lasx_xvsrlrni_b_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} +- res |= __builtin_lasx_xvsrlrni_b_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} +- res |= __builtin_lasx_xvsrlrni_b_h(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvsrlrni_b_h' must be a constant integer}} +- return res; +-} +- +-v16i16 xvsrlrni_h_w(v16i16 _1, v16i16 _2, int var) { +- v16i16 res = __builtin_lasx_xvsrlrni_h_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} +- res |= __builtin_lasx_xvsrlrni_h_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} +- res |= __builtin_lasx_xvsrlrni_h_w(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvsrlrni_h_w' must be a constant integer}} +- return res; +-} +- +-v8i32 xvsrlrni_w_d(v8i32 _1, v8i32 _2, int var) { +- v8i32 res = __builtin_lasx_xvsrlrni_w_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} +- res |= __builtin_lasx_xvsrlrni_w_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} +- res |= __builtin_lasx_xvsrlrni_w_d(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvsrlrni_w_d' must be a constant integer}} +- return res; +-} +- +-v4i64 xvsrlrni_d_q(v4i64 _1, v4i64 _2, int var) { +- v4i64 res = __builtin_lasx_xvsrlrni_d_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} +- res |= __builtin_lasx_xvsrlrni_d_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} +- res |= __builtin_lasx_xvsrlrni_d_q(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvsrlrni_d_q' must be a constant integer}} +- return res; +-} +- +-v32i8 xvssrlni_b_h(v32i8 _1, v32i8 _2, int var) { +- v32i8 res = __builtin_lasx_xvssrlni_b_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} +- res |= __builtin_lasx_xvssrlni_b_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} +- res |= __builtin_lasx_xvssrlni_b_h(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrlni_b_h' must be a constant integer}} +- return res; +-} +- +-v16i16 xvssrlni_h_w(v16i16 _1, v16i16 _2, int var) { +- v16i16 res = __builtin_lasx_xvssrlni_h_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} +- res |= __builtin_lasx_xvssrlni_h_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} +- res |= __builtin_lasx_xvssrlni_h_w(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrlni_h_w' must be a constant integer}} +- return res; +-} +- +-v8i32 xvssrlni_w_d(v8i32 _1, v8i32 _2, int var) { +- v8i32 res = __builtin_lasx_xvssrlni_w_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} +- res |= __builtin_lasx_xvssrlni_w_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} +- res |= __builtin_lasx_xvssrlni_w_d(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrlni_w_d' must be a constant integer}} +- return res; +-} +- +-v4i64 xvssrlni_d_q(v4i64 _1, v4i64 _2, int var) { +- v4i64 res = __builtin_lasx_xvssrlni_d_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} +- res |= __builtin_lasx_xvssrlni_d_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} +- res |= __builtin_lasx_xvssrlni_d_q(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrlni_d_q' must be a constant integer}} +- return res; +-} +- +-v32u8 xvssrlni_bu_h(v32u8 _1, v32i8 _2, int var) { +- v32u8 res = __builtin_lasx_xvssrlni_bu_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} +- res |= __builtin_lasx_xvssrlni_bu_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} +- res |= __builtin_lasx_xvssrlni_bu_h(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrlni_bu_h' must be a constant integer}} +- return res; +-} +- +-v16u16 xvssrlni_hu_w(v16u16 _1, v16i16 _2, int var) { +- v16u16 res = __builtin_lasx_xvssrlni_hu_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} +- res |= __builtin_lasx_xvssrlni_hu_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} +- res |= __builtin_lasx_xvssrlni_hu_w(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrlni_hu_w' must be a constant integer}} +- return res; +-} +- +-v8u32 xvssrlni_wu_d(v8u32 _1, v8i32 _2, int var) { +- v8u32 res = __builtin_lasx_xvssrlni_wu_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} +- res |= __builtin_lasx_xvssrlni_wu_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} +- res |= __builtin_lasx_xvssrlni_wu_d(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrlni_wu_d' must be a constant integer}} +- return res; +-} +- +-v4u64 xvssrlni_du_q(v4u64 _1, v4i64 _2, int var) { +- v4u64 res = __builtin_lasx_xvssrlni_du_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} +- res |= __builtin_lasx_xvssrlni_du_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} +- res |= __builtin_lasx_xvssrlni_du_q(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrlni_du_q' must be a constant integer}} +- return res; +-} +- +-v32i8 xvssrlrni_b_h(v32i8 _1, v32i8 _2, int var) { +- v32i8 res = __builtin_lasx_xvssrlrni_b_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} +- res |= __builtin_lasx_xvssrlrni_b_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} +- res |= __builtin_lasx_xvssrlrni_b_h(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrlrni_b_h' must be a constant integer}} +- return res; +-} +- +-v16i16 xvssrlrni_h_w(v16i16 _1, v16i16 _2, int var) { +- v16i16 res = __builtin_lasx_xvssrlrni_h_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} +- res |= __builtin_lasx_xvssrlrni_h_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} +- res |= __builtin_lasx_xvssrlrni_h_w(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrlrni_h_w' must be a constant integer}} +- return res; +-} +- +-v8i32 xvssrlrni_w_d(v8i32 _1, v8i32 _2, int var) { +- v8i32 res = __builtin_lasx_xvssrlrni_w_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} +- res |= __builtin_lasx_xvssrlrni_w_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} +- res |= __builtin_lasx_xvssrlrni_w_d(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrlrni_w_d' must be a constant integer}} +- return res; +-} +- +-v4i64 xvssrlrni_d_q(v4i64 _1, v4i64 _2, int var) { +- v4i64 res = __builtin_lasx_xvssrlrni_d_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} +- res |= __builtin_lasx_xvssrlrni_d_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} +- res |= __builtin_lasx_xvssrlrni_d_q(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrlrni_d_q' must be a constant integer}} +- return res; +-} +- +-v32u8 xvssrlrni_bu_h(v32u8 _1, v32i8 _2, int var) { +- v32u8 res = __builtin_lasx_xvssrlrni_bu_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} +- res |= __builtin_lasx_xvssrlrni_bu_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} +- res |= __builtin_lasx_xvssrlrni_bu_h(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrlrni_bu_h' must be a constant integer}} +- return res; +-} +- +-v16u16 xvssrlrni_hu_w(v16u16 _1, v16i16 _2, int var) { +- v16u16 res = __builtin_lasx_xvssrlrni_hu_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} +- res |= __builtin_lasx_xvssrlrni_hu_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} +- res |= __builtin_lasx_xvssrlrni_hu_w(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrlrni_hu_w' must be a constant integer}} +- return res; +-} +- +-v8u32 xvssrlrni_wu_d(v8u32 _1, v8i32 _2, int var) { +- v8u32 res = __builtin_lasx_xvssrlrni_wu_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} +- res |= __builtin_lasx_xvssrlrni_wu_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} +- res |= __builtin_lasx_xvssrlrni_wu_d(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrlrni_wu_d' must be a constant integer}} +- return res; +-} +- +-v4u64 xvssrlrni_du_q(v4u64 _1, v4i64 _2, int var) { +- v4u64 res = __builtin_lasx_xvssrlrni_du_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} +- res |= __builtin_lasx_xvssrlrni_du_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} +- res |= __builtin_lasx_xvssrlrni_du_q(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrlrni_du_q' must be a constant integer}} +- return res; +-} +- +-v32i8 xvsrani_b_h(v32i8 _1, v32i8 _2, int var) { +- v32i8 res = __builtin_lasx_xvsrani_b_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} +- res |= __builtin_lasx_xvsrani_b_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} +- res |= __builtin_lasx_xvsrani_b_h(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvsrani_b_h' must be a constant integer}} +- return res; +-} +- +-v16i16 xvsrani_h_w(v16i16 _1, v16i16 _2, int var) { +- v16i16 res = __builtin_lasx_xvsrani_h_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} +- res |= __builtin_lasx_xvsrani_h_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} +- res |= __builtin_lasx_xvsrani_h_w(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvsrani_h_w' must be a constant integer}} +- return res; +-} +- +-v8i32 xvsrani_w_d(v8i32 _1, v8i32 _2, int var) { +- v8i32 res = __builtin_lasx_xvsrani_w_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} +- res |= __builtin_lasx_xvsrani_w_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} +- res |= __builtin_lasx_xvsrani_w_d(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvsrani_w_d' must be a constant integer}} +- return res; +-} +- +-v4i64 xvsrani_d_q(v4i64 _1, v4i64 _2, int var) { +- v4i64 res = __builtin_lasx_xvsrani_d_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} +- res |= __builtin_lasx_xvsrani_d_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} +- res |= __builtin_lasx_xvsrani_d_q(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvsrani_d_q' must be a constant integer}} +- return res; +-} +- +-v32i8 xvsrarni_b_h(v32i8 _1, v32i8 _2, int var) { +- v32i8 res = __builtin_lasx_xvsrarni_b_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} +- res |= __builtin_lasx_xvsrarni_b_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} +- res |= __builtin_lasx_xvsrarni_b_h(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvsrarni_b_h' must be a constant integer}} +- return res; +-} +- +-v16i16 xvsrarni_h_w(v16i16 _1, v16i16 _2, int var) { +- v16i16 res = __builtin_lasx_xvsrarni_h_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} +- res |= __builtin_lasx_xvsrarni_h_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} +- res |= __builtin_lasx_xvsrarni_h_w(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvsrarni_h_w' must be a constant integer}} +- return res; +-} +- +-v8i32 xvsrarni_w_d(v8i32 _1, v8i32 _2, int var) { +- v8i32 res = __builtin_lasx_xvsrarni_w_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} +- res |= __builtin_lasx_xvsrarni_w_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} +- res |= __builtin_lasx_xvsrarni_w_d(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvsrarni_w_d' must be a constant integer}} +- return res; +-} +- +-v4i64 xvsrarni_d_q(v4i64 _1, v4i64 _2, int var) { +- v4i64 res = __builtin_lasx_xvsrarni_d_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} +- res |= __builtin_lasx_xvsrarni_d_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} +- res |= __builtin_lasx_xvsrarni_d_q(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvsrarni_d_q' must be a constant integer}} +- return res; +-} +- +-v32i8 xvssrani_b_h(v32i8 _1, v32i8 _2, int var) { +- v32i8 res = __builtin_lasx_xvssrani_b_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} +- res |= __builtin_lasx_xvssrani_b_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} +- res |= __builtin_lasx_xvssrani_b_h(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrani_b_h' must be a constant integer}} +- return res; +-} +- +-v16i16 xvssrani_h_w(v16i16 _1, v16i16 _2, int var) { +- v16i16 res = __builtin_lasx_xvssrani_h_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} +- res |= __builtin_lasx_xvssrani_h_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} +- res |= __builtin_lasx_xvssrani_h_w(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrani_h_w' must be a constant integer}} +- return res; +-} +- +-v8i32 xvssrani_w_d(v8i32 _1, v8i32 _2, int var) { +- v8i32 res = __builtin_lasx_xvssrani_w_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} +- res |= __builtin_lasx_xvssrani_w_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} +- res |= __builtin_lasx_xvssrani_w_d(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrani_w_d' must be a constant integer}} +- return res; +-} +- +-v4i64 xvssrani_d_q(v4i64 _1, v4i64 _2, int var) { +- v4i64 res = __builtin_lasx_xvssrani_d_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} +- res |= __builtin_lasx_xvssrani_d_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} +- res |= __builtin_lasx_xvssrani_d_q(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrani_d_q' must be a constant integer}} +- return res; +-} +- +-v32u8 xvssrani_bu_h(v32u8 _1, v32i8 _2, int var) { +- v32u8 res = __builtin_lasx_xvssrani_bu_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} +- res |= __builtin_lasx_xvssrani_bu_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} +- res |= __builtin_lasx_xvssrani_bu_h(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrani_bu_h' must be a constant integer}} +- return res; +-} +- +-v16u16 xvssrani_hu_w(v16u16 _1, v16i16 _2, int var) { +- v16u16 res = __builtin_lasx_xvssrani_hu_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} +- res |= __builtin_lasx_xvssrani_hu_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} +- res |= __builtin_lasx_xvssrani_hu_w(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrani_hu_w' must be a constant integer}} +- return res; +-} +- +-v8u32 xvssrani_wu_d(v8u32 _1, v8i32 _2, int var) { +- v8u32 res = __builtin_lasx_xvssrani_wu_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} +- res |= __builtin_lasx_xvssrani_wu_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} +- res |= __builtin_lasx_xvssrani_wu_d(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrani_wu_d' must be a constant integer}} +- return res; +-} +- +-v4u64 xvssrani_du_q(v4u64 _1, v4i64 _2, int var) { +- v4u64 res = __builtin_lasx_xvssrani_du_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} +- res |= __builtin_lasx_xvssrani_du_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} +- res |= __builtin_lasx_xvssrani_du_q(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrani_du_q' must be a constant integer}} +- return res; +-} +- +-v32i8 xvssrarni_b_h(v32i8 _1, v32i8 _2, int var) { +- v32i8 res = __builtin_lasx_xvssrarni_b_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} +- res |= __builtin_lasx_xvssrarni_b_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} +- res |= __builtin_lasx_xvssrarni_b_h(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrarni_b_h' must be a constant integer}} +- return res; +-} +- +-v16i16 xvssrarni_h_w(v16i16 _1, v16i16 _2, int var) { +- v16i16 res = __builtin_lasx_xvssrarni_h_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} +- res |= __builtin_lasx_xvssrarni_h_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} +- res |= __builtin_lasx_xvssrarni_h_w(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrarni_h_w' must be a constant integer}} +- return res; +-} +- +-v8i32 xvssrarni_w_d(v8i32 _1, v8i32 _2, int var) { +- v8i32 res = __builtin_lasx_xvssrarni_w_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} +- res |= __builtin_lasx_xvssrarni_w_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} +- res |= __builtin_lasx_xvssrarni_w_d(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrarni_w_d' must be a constant integer}} +- return res; +-} +- +-v4i64 xvssrarni_d_q(v4i64 _1, v4i64 _2, int var) { +- v4i64 res = __builtin_lasx_xvssrarni_d_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} +- res |= __builtin_lasx_xvssrarni_d_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} +- res |= __builtin_lasx_xvssrarni_d_q(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrarni_d_q' must be a constant integer}} +- return res; +-} +- +-v32u8 xvssrarni_bu_h(v32u8 _1, v32i8 _2, int var) { +- v32u8 res = __builtin_lasx_xvssrarni_bu_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} +- res |= __builtin_lasx_xvssrarni_bu_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} +- res |= __builtin_lasx_xvssrarni_bu_h(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrarni_bu_h' must be a constant integer}} +- return res; +-} +- +-v16u16 xvssrarni_hu_w(v16u16 _1, v16i16 _2, int var) { +- v16u16 res = __builtin_lasx_xvssrarni_hu_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} +- res |= __builtin_lasx_xvssrarni_hu_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} +- res |= __builtin_lasx_xvssrarni_hu_w(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrarni_hu_w' must be a constant integer}} +- return res; +-} +- +-v8u32 xvssrarni_wu_d(v8u32 _1, v8i32 _2, int var) { +- v8u32 res = __builtin_lasx_xvssrarni_wu_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} +- res |= __builtin_lasx_xvssrarni_wu_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} +- res |= __builtin_lasx_xvssrarni_wu_d(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrarni_wu_d' must be a constant integer}} +- return res; +-} +- +-v4u64 xvssrarni_du_q(v4u64 _1, v4i64 _2, int var) { +- v4u64 res = __builtin_lasx_xvssrarni_du_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} +- res |= __builtin_lasx_xvssrarni_du_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} +- res |= __builtin_lasx_xvssrarni_du_q(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrarni_du_q' must be a constant integer}} +- return res; +-} +- +-v4f64 xvpickve_d_f(v4f64 _1, int var) { +- v4f64 res = __builtin_lasx_xvpickve_d_f(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 3]}} +- res += __builtin_lasx_xvpickve_d_f(_1, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}} +- res += __builtin_lasx_xvpickve_d_f(_1, var); // expected-error {{argument to '__builtin_lasx_xvpickve_d_f' must be a constant integer}} +- return res; +-} +- +-v8f32 xvpickve_w_f(v8f32 _1, int var) { +- v8f32 res = __builtin_lasx_xvpickve_w_f(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} +- res += __builtin_lasx_xvpickve_w_f(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} +- res += __builtin_lasx_xvpickve_w_f(_1, var); // expected-error {{argument to '__builtin_lasx_xvpickve_w_f' must be a constant integer}} +- return res; +-} +- +-v32i8 xvrepli_b(int var) { +- v32i8 res = __builtin_lasx_xvrepli_b(-513); // expected-error {{argument value -513 is outside the valid range [-512, 511]}} +- res |= __builtin_lasx_xvrepli_b(512); // expected-error {{argument value 512 is outside the valid range [-512, 511]}} +- res |= __builtin_lasx_xvrepli_b(var); // expected-error {{argument to '__builtin_lasx_xvrepli_b' must be a constant integer}} +- return res; +-} +- +-v4i64 xvrepli_d(int var) { +- v4i64 res = __builtin_lasx_xvrepli_d(-513); // expected-error {{argument value -513 is outside the valid range [-512, 511]}} +- res |= __builtin_lasx_xvrepli_d(512); // expected-error {{argument value 512 is outside the valid range [-512, 511]}} +- res |= __builtin_lasx_xvrepli_d(var); // expected-error {{argument to '__builtin_lasx_xvrepli_d' must be a constant integer}} +- return res; +-} +- +-v16i16 xvrepli_h(int var) { +- v16i16 res = __builtin_lasx_xvrepli_h(-513); // expected-error {{argument value -513 is outside the valid range [-512, 511]}} +- res |= __builtin_lasx_xvrepli_h(512); // expected-error {{argument value 512 is outside the valid range [-512, 511]}} +- res |= __builtin_lasx_xvrepli_h(var); // expected-error {{argument to '__builtin_lasx_xvrepli_h' must be a constant integer}} +- return res; +-} +- +-v8i32 xvrepli_w(int var) { +- v8i32 res = __builtin_lasx_xvrepli_w(-513); // expected-error {{argument value -513 is outside the valid range [-512, 511]}} +- res |= __builtin_lasx_xvrepli_w(512); // expected-error {{argument value 512 is outside the valid range [-512, 511]}} +- res |= __builtin_lasx_xvrepli_w(var); // expected-error {{argument to '__builtin_lasx_xvrepli_w' must be a constant integer}} +- return res; +-} +diff --git a/clang/test/CodeGen/LoongArch/lasx/builtin.c b/clang/test/CodeGen/LoongArch/lasx/builtin.c +deleted file mode 100644 +index f52a23a5f..000000000 +--- a/clang/test/CodeGen/LoongArch/lasx/builtin.c ++++ /dev/null +@@ -1,6408 +0,0 @@ +-// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py +-// RUN: %clang_cc1 -triple loongarch64 -target-feature +lasx -O2 -emit-llvm %s -o - | FileCheck %s +- +-typedef signed char v32i8 __attribute__((vector_size(32), aligned(32))); +-typedef signed char v32i8_b __attribute__((vector_size(32), aligned(1))); +-typedef unsigned char v32u8 __attribute__((vector_size(32), aligned(32))); +-typedef unsigned char v32u8_b __attribute__((vector_size(32), aligned(1))); +-typedef short v16i16 __attribute__((vector_size(32), aligned(32))); +-typedef short v16i16_h __attribute__((vector_size(32), aligned(2))); +-typedef unsigned short v16u16 __attribute__((vector_size(32), aligned(32))); +-typedef unsigned short v16u16_h __attribute__((vector_size(32), aligned(2))); +-typedef int v8i32 __attribute__((vector_size(32), aligned(32))); +-typedef int v8i32_w __attribute__((vector_size(32), aligned(4))); +-typedef unsigned int v8u32 __attribute__((vector_size(32), aligned(32))); +-typedef unsigned int v8u32_w __attribute__((vector_size(32), aligned(4))); +-typedef long long v4i64 __attribute__((vector_size(32), aligned(32))); +-typedef long long v4i64_d __attribute__((vector_size(32), aligned(8))); +-typedef unsigned long long v4u64 __attribute__((vector_size(32), aligned(32))); +-typedef unsigned long long v4u64_d __attribute__((vector_size(32), aligned(8))); +-typedef float v8f32 __attribute__((vector_size(32), aligned(32))); +-typedef float v8f32_w __attribute__((vector_size(32), aligned(4))); +-typedef double v4f64 __attribute__((vector_size(32), aligned(32))); +-typedef double v4f64_d __attribute__((vector_size(32), aligned(8))); +- +-typedef double v4f64 __attribute__((vector_size(32), aligned(32))); +-typedef double v4f64_d __attribute__((vector_size(32), aligned(8))); +- +-// CHECK-LABEL: @xvsll_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2:![0-9]+]] +-// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsll.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) +-// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v32i8 xvsll_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvsll_b(_1, _2); } +-// CHECK-LABEL: @xvsll_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsll.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) +-// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v16i16 xvsll_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvsll_h(_1, _2); } +-// CHECK-LABEL: @xvsll_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsll.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) +-// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8i32 xvsll_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvsll_w(_1, _2); } +-// CHECK-LABEL: @xvsll_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsll.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +-// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvsll_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvsll_d(_1, _2); } +-// CHECK-LABEL: @xvslli_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvslli.b(<32 x i8> [[_1]], i32 1) +-// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v32i8 xvslli_b(v32i8 _1) { return __builtin_lasx_xvslli_b(_1, 1); } +-// CHECK-LABEL: @xvslli_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvslli.h(<16 x i16> [[_1]], i32 1) +-// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v16i16 xvslli_h(v16i16 _1) { return __builtin_lasx_xvslli_h(_1, 1); } +-// CHECK-LABEL: @xvslli_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvslli.w(<8 x i32> [[_1]], i32 1) +-// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8i32 xvslli_w(v8i32 _1) { return __builtin_lasx_xvslli_w(_1, 1); } +-// CHECK-LABEL: @xvslli_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvslli.d(<4 x i64> [[_1]], i32 1) +-// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvslli_d(v4i64 _1) { return __builtin_lasx_xvslli_d(_1, 1); } +-// CHECK-LABEL: @xvsra_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsra.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) +-// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v32i8 xvsra_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvsra_b(_1, _2); } +-// CHECK-LABEL: @xvsra_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsra.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) +-// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v16i16 xvsra_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvsra_h(_1, _2); } +-// CHECK-LABEL: @xvsra_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsra.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) +-// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8i32 xvsra_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvsra_w(_1, _2); } +-// CHECK-LABEL: @xvsra_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsra.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +-// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvsra_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvsra_d(_1, _2); } +-// CHECK-LABEL: @xvsrai_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrai.b(<32 x i8> [[_1]], i32 1) +-// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v32i8 xvsrai_b(v32i8 _1) { return __builtin_lasx_xvsrai_b(_1, 1); } +-// CHECK-LABEL: @xvsrai_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrai.h(<16 x i16> [[_1]], i32 1) +-// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v16i16 xvsrai_h(v16i16 _1) { return __builtin_lasx_xvsrai_h(_1, 1); } +-// CHECK-LABEL: @xvsrai_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrai.w(<8 x i32> [[_1]], i32 1) +-// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8i32 xvsrai_w(v8i32 _1) { return __builtin_lasx_xvsrai_w(_1, 1); } +-// CHECK-LABEL: @xvsrai_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrai.d(<4 x i64> [[_1]], i32 1) +-// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvsrai_d(v4i64 _1) { return __builtin_lasx_xvsrai_d(_1, 1); } +-// CHECK-LABEL: @xvsrar_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrar.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) +-// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v32i8 xvsrar_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvsrar_b(_1, _2); } +-// CHECK-LABEL: @xvsrar_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrar.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) +-// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v16i16 xvsrar_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvsrar_h(_1, _2); } +-// CHECK-LABEL: @xvsrar_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrar.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) +-// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8i32 xvsrar_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvsrar_w(_1, _2); } +-// CHECK-LABEL: @xvsrar_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrar.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +-// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvsrar_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvsrar_d(_1, _2); } +-// CHECK-LABEL: @xvsrari_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrari.b(<32 x i8> [[_1]], i32 1) +-// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v32i8 xvsrari_b(v32i8 _1) { return __builtin_lasx_xvsrari_b(_1, 1); } +-// CHECK-LABEL: @xvsrari_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrari.h(<16 x i16> [[_1]], i32 1) +-// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v16i16 xvsrari_h(v16i16 _1) { return __builtin_lasx_xvsrari_h(_1, 1); } +-// CHECK-LABEL: @xvsrari_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrari.w(<8 x i32> [[_1]], i32 1) +-// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8i32 xvsrari_w(v8i32 _1) { return __builtin_lasx_xvsrari_w(_1, 1); } +-// CHECK-LABEL: @xvsrari_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrari.d(<4 x i64> [[_1]], i32 1) +-// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvsrari_d(v4i64 _1) { return __builtin_lasx_xvsrari_d(_1, 1); } +-// CHECK-LABEL: @xvsrl_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrl.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) +-// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v32i8 xvsrl_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvsrl_b(_1, _2); } +-// CHECK-LABEL: @xvsrl_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrl.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) +-// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v16i16 xvsrl_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvsrl_h(_1, _2); } +-// CHECK-LABEL: @xvsrl_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrl.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) +-// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8i32 xvsrl_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvsrl_w(_1, _2); } +-// CHECK-LABEL: @xvsrl_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrl.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +-// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvsrl_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvsrl_d(_1, _2); } +-// CHECK-LABEL: @xvsrli_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrli.b(<32 x i8> [[_1]], i32 1) +-// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v32i8 xvsrli_b(v32i8 _1) { return __builtin_lasx_xvsrli_b(_1, 1); } +-// CHECK-LABEL: @xvsrli_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrli.h(<16 x i16> [[_1]], i32 1) +-// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v16i16 xvsrli_h(v16i16 _1) { return __builtin_lasx_xvsrli_h(_1, 1); } +-// CHECK-LABEL: @xvsrli_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrli.w(<8 x i32> [[_1]], i32 1) +-// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8i32 xvsrli_w(v8i32 _1) { return __builtin_lasx_xvsrli_w(_1, 1); } +-// CHECK-LABEL: @xvsrli_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrli.d(<4 x i64> [[_1]], i32 1) +-// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvsrli_d(v4i64 _1) { return __builtin_lasx_xvsrli_d(_1, 1); } +-// CHECK-LABEL: @xvsrlr_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrlr.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) +-// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v32i8 xvsrlr_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvsrlr_b(_1, _2); } +-// CHECK-LABEL: @xvsrlr_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrlr.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) +-// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v16i16 xvsrlr_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvsrlr_h(_1, _2); } +-// CHECK-LABEL: @xvsrlr_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrlr.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) +-// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8i32 xvsrlr_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvsrlr_w(_1, _2); } +-// CHECK-LABEL: @xvsrlr_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrlr.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +-// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvsrlr_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvsrlr_d(_1, _2); } +-// CHECK-LABEL: @xvsrlri_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrlri.b(<32 x i8> [[_1]], i32 1) +-// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v32i8 xvsrlri_b(v32i8 _1) { return __builtin_lasx_xvsrlri_b(_1, 1); } +-// CHECK-LABEL: @xvsrlri_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrlri.h(<16 x i16> [[_1]], i32 1) +-// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v16i16 xvsrlri_h(v16i16 _1) { return __builtin_lasx_xvsrlri_h(_1, 1); } +-// CHECK-LABEL: @xvsrlri_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrlri.w(<8 x i32> [[_1]], i32 1) +-// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8i32 xvsrlri_w(v8i32 _1) { return __builtin_lasx_xvsrlri_w(_1, 1); } +-// CHECK-LABEL: @xvsrlri_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrlri.d(<4 x i64> [[_1]], i32 1) +-// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvsrlri_d(v4i64 _1) { return __builtin_lasx_xvsrlri_d(_1, 1); } +-// CHECK-LABEL: @xvbitclr_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbitclr.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) +-// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v32u8 xvbitclr_b(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvbitclr_b(_1, _2); } +-// CHECK-LABEL: @xvbitclr_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvbitclr.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) +-// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v16u16 xvbitclr_h(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvbitclr_h(_1, _2); } +-// CHECK-LABEL: @xvbitclr_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvbitclr.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) +-// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8u32 xvbitclr_w(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvbitclr_w(_1, _2); } +-// CHECK-LABEL: @xvbitclr_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvbitclr.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +-// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4u64 xvbitclr_d(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvbitclr_d(_1, _2); } +-// CHECK-LABEL: @xvbitclri_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbitclri.b(<32 x i8> [[_1]], i32 1) +-// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v32u8 xvbitclri_b(v32u8 _1) { return __builtin_lasx_xvbitclri_b(_1, 1); } +-// CHECK-LABEL: @xvbitclri_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvbitclri.h(<16 x i16> [[_1]], i32 1) +-// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v16u16 xvbitclri_h(v16u16 _1) { return __builtin_lasx_xvbitclri_h(_1, 1); } +-// CHECK-LABEL: @xvbitclri_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvbitclri.w(<8 x i32> [[_1]], i32 1) +-// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8u32 xvbitclri_w(v8u32 _1) { return __builtin_lasx_xvbitclri_w(_1, 1); } +-// CHECK-LABEL: @xvbitclri_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvbitclri.d(<4 x i64> [[_1]], i32 1) +-// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4u64 xvbitclri_d(v4u64 _1) { return __builtin_lasx_xvbitclri_d(_1, 1); } +-// CHECK-LABEL: @xvbitset_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbitset.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) +-// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v32u8 xvbitset_b(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvbitset_b(_1, _2); } +-// CHECK-LABEL: @xvbitset_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvbitset.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) +-// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v16u16 xvbitset_h(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvbitset_h(_1, _2); } +-// CHECK-LABEL: @xvbitset_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvbitset.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) +-// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8u32 xvbitset_w(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvbitset_w(_1, _2); } +-// CHECK-LABEL: @xvbitset_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvbitset.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +-// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4u64 xvbitset_d(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvbitset_d(_1, _2); } +-// CHECK-LABEL: @xvbitseti_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbitseti.b(<32 x i8> [[_1]], i32 1) +-// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v32u8 xvbitseti_b(v32u8 _1) { return __builtin_lasx_xvbitseti_b(_1, 1); } +-// CHECK-LABEL: @xvbitseti_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvbitseti.h(<16 x i16> [[_1]], i32 1) +-// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v16u16 xvbitseti_h(v16u16 _1) { return __builtin_lasx_xvbitseti_h(_1, 1); } +-// CHECK-LABEL: @xvbitseti_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvbitseti.w(<8 x i32> [[_1]], i32 1) +-// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8u32 xvbitseti_w(v8u32 _1) { return __builtin_lasx_xvbitseti_w(_1, 1); } +-// CHECK-LABEL: @xvbitseti_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvbitseti.d(<4 x i64> [[_1]], i32 1) +-// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4u64 xvbitseti_d(v4u64 _1) { return __builtin_lasx_xvbitseti_d(_1, 1); } +-// CHECK-LABEL: @xvbitrev_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbitrev.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) +-// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v32u8 xvbitrev_b(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvbitrev_b(_1, _2); } +-// CHECK-LABEL: @xvbitrev_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvbitrev.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) +-// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v16u16 xvbitrev_h(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvbitrev_h(_1, _2); } +-// CHECK-LABEL: @xvbitrev_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvbitrev.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) +-// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8u32 xvbitrev_w(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvbitrev_w(_1, _2); } +-// CHECK-LABEL: @xvbitrev_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvbitrev.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +-// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4u64 xvbitrev_d(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvbitrev_d(_1, _2); } +-// CHECK-LABEL: @xvbitrevi_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbitrevi.b(<32 x i8> [[_1]], i32 1) +-// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v32u8 xvbitrevi_b(v32u8 _1) { return __builtin_lasx_xvbitrevi_b(_1, 1); } +-// CHECK-LABEL: @xvbitrevi_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvbitrevi.h(<16 x i16> [[_1]], i32 1) +-// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v16u16 xvbitrevi_h(v16u16 _1) { return __builtin_lasx_xvbitrevi_h(_1, 1); } +-// CHECK-LABEL: @xvbitrevi_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvbitrevi.w(<8 x i32> [[_1]], i32 1) +-// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8u32 xvbitrevi_w(v8u32 _1) { return __builtin_lasx_xvbitrevi_w(_1, 1); } +-// CHECK-LABEL: @xvbitrevi_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvbitrevi.d(<4 x i64> [[_1]], i32 1) +-// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4u64 xvbitrevi_d(v4u64 _1) { return __builtin_lasx_xvbitrevi_d(_1, 1); } +-// CHECK-LABEL: @xvadd_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvadd.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) +-// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v32i8 xvadd_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvadd_b(_1, _2); } +-// CHECK-LABEL: @xvadd_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvadd.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) +-// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v16i16 xvadd_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvadd_h(_1, _2); } +-// CHECK-LABEL: @xvadd_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvadd.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) +-// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8i32 xvadd_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvadd_w(_1, _2); } +-// CHECK-LABEL: @xvadd_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvadd.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +-// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvadd_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvadd_d(_1, _2); } +-// CHECK-LABEL: @xvaddi_bu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvaddi.bu(<32 x i8> [[_1]], i32 1) +-// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v32i8 xvaddi_bu(v32i8 _1) { return __builtin_lasx_xvaddi_bu(_1, 1); } +-// CHECK-LABEL: @xvaddi_hu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvaddi.hu(<16 x i16> [[_1]], i32 1) +-// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v16i16 xvaddi_hu(v16i16 _1) { return __builtin_lasx_xvaddi_hu(_1, 1); } +-// CHECK-LABEL: @xvaddi_wu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvaddi.wu(<8 x i32> [[_1]], i32 1) +-// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8i32 xvaddi_wu(v8i32 _1) { return __builtin_lasx_xvaddi_wu(_1, 1); } +-// CHECK-LABEL: @xvaddi_du( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddi.du(<4 x i64> [[_1]], i32 1) +-// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvaddi_du(v4i64 _1) { return __builtin_lasx_xvaddi_du(_1, 1); } +-// CHECK-LABEL: @xvsub_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsub.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) +-// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v32i8 xvsub_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvsub_b(_1, _2); } +-// CHECK-LABEL: @xvsub_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsub.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) +-// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v16i16 xvsub_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvsub_h(_1, _2); } +-// CHECK-LABEL: @xvsub_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsub.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) +-// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8i32 xvsub_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvsub_w(_1, _2); } +-// CHECK-LABEL: @xvsub_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsub.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +-// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvsub_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvsub_d(_1, _2); } +-// CHECK-LABEL: @xvsubi_bu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsubi.bu(<32 x i8> [[_1]], i32 1) +-// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v32i8 xvsubi_bu(v32i8 _1) { return __builtin_lasx_xvsubi_bu(_1, 1); } +-// CHECK-LABEL: @xvsubi_hu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsubi.hu(<16 x i16> [[_1]], i32 1) +-// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v16i16 xvsubi_hu(v16i16 _1) { return __builtin_lasx_xvsubi_hu(_1, 1); } +-// CHECK-LABEL: @xvsubi_wu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsubi.wu(<8 x i32> [[_1]], i32 1) +-// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8i32 xvsubi_wu(v8i32 _1) { return __builtin_lasx_xvsubi_wu(_1, 1); } +-// CHECK-LABEL: @xvsubi_du( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsubi.du(<4 x i64> [[_1]], i32 1) +-// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvsubi_du(v4i64 _1) { return __builtin_lasx_xvsubi_du(_1, 1); } +-// CHECK-LABEL: @xvmax_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmax.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) +-// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v32i8 xvmax_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvmax_b(_1, _2); } +-// CHECK-LABEL: @xvmax_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmax.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) +-// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v16i16 xvmax_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvmax_h(_1, _2); } +-// CHECK-LABEL: @xvmax_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmax.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) +-// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8i32 xvmax_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvmax_w(_1, _2); } +-// CHECK-LABEL: @xvmax_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmax.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +-// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvmax_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvmax_d(_1, _2); } +-// CHECK-LABEL: @xvmaxi_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmaxi.b(<32 x i8> [[_1]], i32 1) +-// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v32i8 xvmaxi_b(v32i8 _1) { return __builtin_lasx_xvmaxi_b(_1, 1); } +-// CHECK-LABEL: @xvmaxi_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmaxi.h(<16 x i16> [[_1]], i32 1) +-// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v16i16 xvmaxi_h(v16i16 _1) { return __builtin_lasx_xvmaxi_h(_1, 1); } +-// CHECK-LABEL: @xvmaxi_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmaxi.w(<8 x i32> [[_1]], i32 1) +-// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8i32 xvmaxi_w(v8i32 _1) { return __builtin_lasx_xvmaxi_w(_1, 1); } +-// CHECK-LABEL: @xvmaxi_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaxi.d(<4 x i64> [[_1]], i32 1) +-// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvmaxi_d(v4i64 _1) { return __builtin_lasx_xvmaxi_d(_1, 1); } +-// CHECK-LABEL: @xvmax_bu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmax.bu(<32 x i8> [[_1]], <32 x i8> [[_2]]) +-// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v32u8 xvmax_bu(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvmax_bu(_1, _2); } +-// CHECK-LABEL: @xvmax_hu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmax.hu(<16 x i16> [[_1]], <16 x i16> [[_2]]) +-// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v16u16 xvmax_hu(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvmax_hu(_1, _2); } +-// CHECK-LABEL: @xvmax_wu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmax.wu(<8 x i32> [[_1]], <8 x i32> [[_2]]) +-// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8u32 xvmax_wu(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvmax_wu(_1, _2); } +-// CHECK-LABEL: @xvmax_du( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmax.du(<4 x i64> [[_1]], <4 x i64> [[_2]]) +-// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4u64 xvmax_du(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvmax_du(_1, _2); } +-// CHECK-LABEL: @xvmaxi_bu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmaxi.bu(<32 x i8> [[_1]], i32 1) +-// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v32u8 xvmaxi_bu(v32u8 _1) { return __builtin_lasx_xvmaxi_bu(_1, 1); } +-// CHECK-LABEL: @xvmaxi_hu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmaxi.hu(<16 x i16> [[_1]], i32 1) +-// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v16u16 xvmaxi_hu(v16u16 _1) { return __builtin_lasx_xvmaxi_hu(_1, 1); } +-// CHECK-LABEL: @xvmaxi_wu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmaxi.wu(<8 x i32> [[_1]], i32 1) +-// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8u32 xvmaxi_wu(v8u32 _1) { return __builtin_lasx_xvmaxi_wu(_1, 1); } +-// CHECK-LABEL: @xvmaxi_du( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaxi.du(<4 x i64> [[_1]], i32 1) +-// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4u64 xvmaxi_du(v4u64 _1) { return __builtin_lasx_xvmaxi_du(_1, 1); } +-// CHECK-LABEL: @xvmin_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmin.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) +-// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v32i8 xvmin_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvmin_b(_1, _2); } +-// CHECK-LABEL: @xvmin_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmin.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) +-// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v16i16 xvmin_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvmin_h(_1, _2); } +-// CHECK-LABEL: @xvmin_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmin.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) +-// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8i32 xvmin_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvmin_w(_1, _2); } +-// CHECK-LABEL: @xvmin_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmin.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +-// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvmin_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvmin_d(_1, _2); } +-// CHECK-LABEL: @xvmini_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmini.b(<32 x i8> [[_1]], i32 1) +-// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v32i8 xvmini_b(v32i8 _1) { return __builtin_lasx_xvmini_b(_1, 1); } +-// CHECK-LABEL: @xvmini_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmini.h(<16 x i16> [[_1]], i32 1) +-// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v16i16 xvmini_h(v16i16 _1) { return __builtin_lasx_xvmini_h(_1, 1); } +-// CHECK-LABEL: @xvmini_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmini.w(<8 x i32> [[_1]], i32 1) +-// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8i32 xvmini_w(v8i32 _1) { return __builtin_lasx_xvmini_w(_1, 1); } +-// CHECK-LABEL: @xvmini_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmini.d(<4 x i64> [[_1]], i32 1) +-// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvmini_d(v4i64 _1) { return __builtin_lasx_xvmini_d(_1, 1); } +-// CHECK-LABEL: @xvmin_bu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmin.bu(<32 x i8> [[_1]], <32 x i8> [[_2]]) +-// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v32u8 xvmin_bu(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvmin_bu(_1, _2); } +-// CHECK-LABEL: @xvmin_hu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmin.hu(<16 x i16> [[_1]], <16 x i16> [[_2]]) +-// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v16u16 xvmin_hu(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvmin_hu(_1, _2); } +-// CHECK-LABEL: @xvmin_wu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmin.wu(<8 x i32> [[_1]], <8 x i32> [[_2]]) +-// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8u32 xvmin_wu(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvmin_wu(_1, _2); } +-// CHECK-LABEL: @xvmin_du( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmin.du(<4 x i64> [[_1]], <4 x i64> [[_2]]) +-// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4u64 xvmin_du(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvmin_du(_1, _2); } +-// CHECK-LABEL: @xvmini_bu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmini.bu(<32 x i8> [[_1]], i32 1) +-// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v32u8 xvmini_bu(v32u8 _1) { return __builtin_lasx_xvmini_bu(_1, 1); } +-// CHECK-LABEL: @xvmini_hu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmini.hu(<16 x i16> [[_1]], i32 1) +-// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v16u16 xvmini_hu(v16u16 _1) { return __builtin_lasx_xvmini_hu(_1, 1); } +-// CHECK-LABEL: @xvmini_wu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmini.wu(<8 x i32> [[_1]], i32 1) +-// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8u32 xvmini_wu(v8u32 _1) { return __builtin_lasx_xvmini_wu(_1, 1); } +-// CHECK-LABEL: @xvmini_du( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmini.du(<4 x i64> [[_1]], i32 1) +-// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4u64 xvmini_du(v4u64 _1) { return __builtin_lasx_xvmini_du(_1, 1); } +-// CHECK-LABEL: @xvseq_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvseq.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) +-// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v32i8 xvseq_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvseq_b(_1, _2); } +-// CHECK-LABEL: @xvseq_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvseq.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) +-// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v16i16 xvseq_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvseq_h(_1, _2); } +-// CHECK-LABEL: @xvseq_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvseq.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) +-// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8i32 xvseq_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvseq_w(_1, _2); } +-// CHECK-LABEL: @xvseq_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvseq.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +-// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvseq_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvseq_d(_1, _2); } +-// CHECK-LABEL: @xvseqi_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvseqi.b(<32 x i8> [[_1]], i32 1) +-// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v32i8 xvseqi_b(v32i8 _1) { return __builtin_lasx_xvseqi_b(_1, 1); } +-// CHECK-LABEL: @xvseqi_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvseqi.h(<16 x i16> [[_1]], i32 1) +-// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v16i16 xvseqi_h(v16i16 _1) { return __builtin_lasx_xvseqi_h(_1, 1); } +-// CHECK-LABEL: @xvseqi_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvseqi.w(<8 x i32> [[_1]], i32 1) +-// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8i32 xvseqi_w(v8i32 _1) { return __builtin_lasx_xvseqi_w(_1, 1); } +-// CHECK-LABEL: @xvseqi_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvseqi.d(<4 x i64> [[_1]], i32 1) +-// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvseqi_d(v4i64 _1) { return __builtin_lasx_xvseqi_d(_1, 1); } +-// CHECK-LABEL: @xvslt_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvslt.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) +-// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v32i8 xvslt_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvslt_b(_1, _2); } +-// CHECK-LABEL: @xvslt_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvslt.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) +-// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v16i16 xvslt_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvslt_h(_1, _2); } +-// CHECK-LABEL: @xvslt_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvslt.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) +-// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8i32 xvslt_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvslt_w(_1, _2); } +-// CHECK-LABEL: @xvslt_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvslt.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +-// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvslt_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvslt_d(_1, _2); } +-// CHECK-LABEL: @xvslti_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvslti.b(<32 x i8> [[_1]], i32 1) +-// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v32i8 xvslti_b(v32i8 _1) { return __builtin_lasx_xvslti_b(_1, 1); } +-// CHECK-LABEL: @xvslti_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvslti.h(<16 x i16> [[_1]], i32 1) +-// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v16i16 xvslti_h(v16i16 _1) { return __builtin_lasx_xvslti_h(_1, 1); } +-// CHECK-LABEL: @xvslti_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvslti.w(<8 x i32> [[_1]], i32 1) +-// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8i32 xvslti_w(v8i32 _1) { return __builtin_lasx_xvslti_w(_1, 1); } +-// CHECK-LABEL: @xvslti_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvslti.d(<4 x i64> [[_1]], i32 1) +-// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvslti_d(v4i64 _1) { return __builtin_lasx_xvslti_d(_1, 1); } +-// CHECK-LABEL: @xvslt_bu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvslt.bu(<32 x i8> [[_1]], <32 x i8> [[_2]]) +-// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v32i8 xvslt_bu(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvslt_bu(_1, _2); } +-// CHECK-LABEL: @xvslt_hu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvslt.hu(<16 x i16> [[_1]], <16 x i16> [[_2]]) +-// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v16i16 xvslt_hu(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvslt_hu(_1, _2); } +-// CHECK-LABEL: @xvslt_wu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvslt.wu(<8 x i32> [[_1]], <8 x i32> [[_2]]) +-// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8i32 xvslt_wu(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvslt_wu(_1, _2); } +-// CHECK-LABEL: @xvslt_du( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvslt.du(<4 x i64> [[_1]], <4 x i64> [[_2]]) +-// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvslt_du(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvslt_du(_1, _2); } +-// CHECK-LABEL: @xvslti_bu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvslti.bu(<32 x i8> [[_1]], i32 1) +-// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v32i8 xvslti_bu(v32u8 _1) { return __builtin_lasx_xvslti_bu(_1, 1); } +-// CHECK-LABEL: @xvslti_hu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvslti.hu(<16 x i16> [[_1]], i32 1) +-// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v16i16 xvslti_hu(v16u16 _1) { return __builtin_lasx_xvslti_hu(_1, 1); } +-// CHECK-LABEL: @xvslti_wu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvslti.wu(<8 x i32> [[_1]], i32 1) +-// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8i32 xvslti_wu(v8u32 _1) { return __builtin_lasx_xvslti_wu(_1, 1); } +-// CHECK-LABEL: @xvslti_du( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvslti.du(<4 x i64> [[_1]], i32 1) +-// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvslti_du(v4u64 _1) { return __builtin_lasx_xvslti_du(_1, 1); } +-// CHECK-LABEL: @xvsle_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsle.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) +-// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v32i8 xvsle_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvsle_b(_1, _2); } +-// CHECK-LABEL: @xvsle_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsle.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) +-// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v16i16 xvsle_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvsle_h(_1, _2); } +-// CHECK-LABEL: @xvsle_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsle.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) +-// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8i32 xvsle_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvsle_w(_1, _2); } +-// CHECK-LABEL: @xvsle_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsle.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +-// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvsle_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvsle_d(_1, _2); } +-// CHECK-LABEL: @xvslei_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvslei.b(<32 x i8> [[_1]], i32 1) +-// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v32i8 xvslei_b(v32i8 _1) { return __builtin_lasx_xvslei_b(_1, 1); } +-// CHECK-LABEL: @xvslei_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvslei.h(<16 x i16> [[_1]], i32 1) +-// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v16i16 xvslei_h(v16i16 _1) { return __builtin_lasx_xvslei_h(_1, 1); } +-// CHECK-LABEL: @xvslei_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvslei.w(<8 x i32> [[_1]], i32 1) +-// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8i32 xvslei_w(v8i32 _1) { return __builtin_lasx_xvslei_w(_1, 1); } +-// CHECK-LABEL: @xvslei_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvslei.d(<4 x i64> [[_1]], i32 1) +-// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvslei_d(v4i64 _1) { return __builtin_lasx_xvslei_d(_1, 1); } +-// CHECK-LABEL: @xvsle_bu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsle.bu(<32 x i8> [[_1]], <32 x i8> [[_2]]) +-// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v32i8 xvsle_bu(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvsle_bu(_1, _2); } +-// CHECK-LABEL: @xvsle_hu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsle.hu(<16 x i16> [[_1]], <16 x i16> [[_2]]) +-// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v16i16 xvsle_hu(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvsle_hu(_1, _2); } +-// CHECK-LABEL: @xvsle_wu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsle.wu(<8 x i32> [[_1]], <8 x i32> [[_2]]) +-// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8i32 xvsle_wu(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvsle_wu(_1, _2); } +-// CHECK-LABEL: @xvsle_du( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsle.du(<4 x i64> [[_1]], <4 x i64> [[_2]]) +-// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvsle_du(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvsle_du(_1, _2); } +-// CHECK-LABEL: @xvslei_bu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvslei.bu(<32 x i8> [[_1]], i32 1) +-// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v32i8 xvslei_bu(v32u8 _1) { return __builtin_lasx_xvslei_bu(_1, 1); } +-// CHECK-LABEL: @xvslei_hu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvslei.hu(<16 x i16> [[_1]], i32 1) +-// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v16i16 xvslei_hu(v16u16 _1) { return __builtin_lasx_xvslei_hu(_1, 1); } +-// CHECK-LABEL: @xvslei_wu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvslei.wu(<8 x i32> [[_1]], i32 1) +-// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8i32 xvslei_wu(v8u32 _1) { return __builtin_lasx_xvslei_wu(_1, 1); } +-// CHECK-LABEL: @xvslei_du( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvslei.du(<4 x i64> [[_1]], i32 1) +-// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvslei_du(v4u64 _1) { return __builtin_lasx_xvslei_du(_1, 1); } +-// CHECK-LABEL: @xvsat_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsat.b(<32 x i8> [[_1]], i32 1) +-// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v32i8 xvsat_b(v32i8 _1) { return __builtin_lasx_xvsat_b(_1, 1); } +-// CHECK-LABEL: @xvsat_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsat.h(<16 x i16> [[_1]], i32 1) +-// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v16i16 xvsat_h(v16i16 _1) { return __builtin_lasx_xvsat_h(_1, 1); } +-// CHECK-LABEL: @xvsat_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsat.w(<8 x i32> [[_1]], i32 1) +-// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8i32 xvsat_w(v8i32 _1) { return __builtin_lasx_xvsat_w(_1, 1); } +-// CHECK-LABEL: @xvsat_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsat.d(<4 x i64> [[_1]], i32 1) +-// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvsat_d(v4i64 _1) { return __builtin_lasx_xvsat_d(_1, 1); } +-// CHECK-LABEL: @xvsat_bu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsat.bu(<32 x i8> [[_1]], i32 1) +-// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v32u8 xvsat_bu(v32u8 _1) { return __builtin_lasx_xvsat_bu(_1, 1); } +-// CHECK-LABEL: @xvsat_hu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsat.hu(<16 x i16> [[_1]], i32 1) +-// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v16u16 xvsat_hu(v16u16 _1) { return __builtin_lasx_xvsat_hu(_1, 1); } +-// CHECK-LABEL: @xvsat_wu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsat.wu(<8 x i32> [[_1]], i32 1) +-// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8u32 xvsat_wu(v8u32 _1) { return __builtin_lasx_xvsat_wu(_1, 1); } +-// CHECK-LABEL: @xvsat_du( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsat.du(<4 x i64> [[_1]], i32 1) +-// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4u64 xvsat_du(v4u64 _1) { return __builtin_lasx_xvsat_du(_1, 1); } +-// CHECK-LABEL: @xvadda_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvadda.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) +-// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v32i8 xvadda_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvadda_b(_1, _2); } +-// CHECK-LABEL: @xvadda_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvadda.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) +-// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v16i16 xvadda_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvadda_h(_1, _2); } +-// CHECK-LABEL: @xvadda_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvadda.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) +-// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8i32 xvadda_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvadda_w(_1, _2); } +-// CHECK-LABEL: @xvadda_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvadda.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +-// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvadda_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvadda_d(_1, _2); } +-// CHECK-LABEL: @xvsadd_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsadd.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) +-// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v32i8 xvsadd_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvsadd_b(_1, _2); } +-// CHECK-LABEL: @xvsadd_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsadd.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) +-// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v16i16 xvsadd_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvsadd_h(_1, _2); } +-// CHECK-LABEL: @xvsadd_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsadd.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) +-// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8i32 xvsadd_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvsadd_w(_1, _2); } +-// CHECK-LABEL: @xvsadd_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsadd.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +-// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvsadd_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvsadd_d(_1, _2); } +-// CHECK-LABEL: @xvsadd_bu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsadd.bu(<32 x i8> [[_1]], <32 x i8> [[_2]]) +-// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v32u8 xvsadd_bu(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvsadd_bu(_1, _2); } +-// CHECK-LABEL: @xvsadd_hu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsadd.hu(<16 x i16> [[_1]], <16 x i16> [[_2]]) +-// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v16u16 xvsadd_hu(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvsadd_hu(_1, _2); } +-// CHECK-LABEL: @xvsadd_wu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsadd.wu(<8 x i32> [[_1]], <8 x i32> [[_2]]) +-// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8u32 xvsadd_wu(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvsadd_wu(_1, _2); } +-// CHECK-LABEL: @xvsadd_du( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsadd.du(<4 x i64> [[_1]], <4 x i64> [[_2]]) +-// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4u64 xvsadd_du(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvsadd_du(_1, _2); } +-// CHECK-LABEL: @xvavg_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvavg.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) +-// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v32i8 xvavg_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvavg_b(_1, _2); } +-// CHECK-LABEL: @xvavg_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvavg.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) +-// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v16i16 xvavg_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvavg_h(_1, _2); } +-// CHECK-LABEL: @xvavg_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvavg.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) +-// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8i32 xvavg_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvavg_w(_1, _2); } +-// CHECK-LABEL: @xvavg_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvavg.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +-// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvavg_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvavg_d(_1, _2); } +-// CHECK-LABEL: @xvavg_bu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvavg.bu(<32 x i8> [[_1]], <32 x i8> [[_2]]) +-// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v32u8 xvavg_bu(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvavg_bu(_1, _2); } +-// CHECK-LABEL: @xvavg_hu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvavg.hu(<16 x i16> [[_1]], <16 x i16> [[_2]]) +-// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v16u16 xvavg_hu(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvavg_hu(_1, _2); } +-// CHECK-LABEL: @xvavg_wu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvavg.wu(<8 x i32> [[_1]], <8 x i32> [[_2]]) +-// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8u32 xvavg_wu(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvavg_wu(_1, _2); } +-// CHECK-LABEL: @xvavg_du( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvavg.du(<4 x i64> [[_1]], <4 x i64> [[_2]]) +-// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4u64 xvavg_du(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvavg_du(_1, _2); } +-// CHECK-LABEL: @xvavgr_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvavgr.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) +-// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v32i8 xvavgr_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvavgr_b(_1, _2); } +-// CHECK-LABEL: @xvavgr_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvavgr.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) +-// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v16i16 xvavgr_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvavgr_h(_1, _2); } +-// CHECK-LABEL: @xvavgr_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvavgr.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) +-// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8i32 xvavgr_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvavgr_w(_1, _2); } +-// CHECK-LABEL: @xvavgr_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvavgr.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +-// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvavgr_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvavgr_d(_1, _2); } +-// CHECK-LABEL: @xvavgr_bu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvavgr.bu(<32 x i8> [[_1]], <32 x i8> [[_2]]) +-// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v32u8 xvavgr_bu(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvavgr_bu(_1, _2); } +-// CHECK-LABEL: @xvavgr_hu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvavgr.hu(<16 x i16> [[_1]], <16 x i16> [[_2]]) +-// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v16u16 xvavgr_hu(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvavgr_hu(_1, _2); } +-// CHECK-LABEL: @xvavgr_wu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvavgr.wu(<8 x i32> [[_1]], <8 x i32> [[_2]]) +-// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8u32 xvavgr_wu(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvavgr_wu(_1, _2); } +-// CHECK-LABEL: @xvavgr_du( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvavgr.du(<4 x i64> [[_1]], <4 x i64> [[_2]]) +-// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4u64 xvavgr_du(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvavgr_du(_1, _2); } +-// CHECK-LABEL: @xvssub_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssub.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) +-// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v32i8 xvssub_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvssub_b(_1, _2); } +-// CHECK-LABEL: @xvssub_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssub.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) +-// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v16i16 xvssub_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvssub_h(_1, _2); } +-// CHECK-LABEL: @xvssub_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssub.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) +-// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8i32 xvssub_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvssub_w(_1, _2); } +-// CHECK-LABEL: @xvssub_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssub.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +-// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvssub_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvssub_d(_1, _2); } +-// CHECK-LABEL: @xvssub_bu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssub.bu(<32 x i8> [[_1]], <32 x i8> [[_2]]) +-// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v32u8 xvssub_bu(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvssub_bu(_1, _2); } +-// CHECK-LABEL: @xvssub_hu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssub.hu(<16 x i16> [[_1]], <16 x i16> [[_2]]) +-// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v16u16 xvssub_hu(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvssub_hu(_1, _2); } +-// CHECK-LABEL: @xvssub_wu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssub.wu(<8 x i32> [[_1]], <8 x i32> [[_2]]) +-// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8u32 xvssub_wu(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvssub_wu(_1, _2); } +-// CHECK-LABEL: @xvssub_du( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssub.du(<4 x i64> [[_1]], <4 x i64> [[_2]]) +-// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4u64 xvssub_du(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvssub_du(_1, _2); } +-// CHECK-LABEL: @xvabsd_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvabsd.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) +-// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v32i8 xvabsd_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvabsd_b(_1, _2); } +-// CHECK-LABEL: @xvabsd_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvabsd.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) +-// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v16i16 xvabsd_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvabsd_h(_1, _2); } +-// CHECK-LABEL: @xvabsd_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvabsd.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) +-// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8i32 xvabsd_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvabsd_w(_1, _2); } +-// CHECK-LABEL: @xvabsd_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvabsd.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +-// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvabsd_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvabsd_d(_1, _2); } +-// CHECK-LABEL: @xvabsd_bu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvabsd.bu(<32 x i8> [[_1]], <32 x i8> [[_2]]) +-// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v32u8 xvabsd_bu(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvabsd_bu(_1, _2); } +-// CHECK-LABEL: @xvabsd_hu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvabsd.hu(<16 x i16> [[_1]], <16 x i16> [[_2]]) +-// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v16u16 xvabsd_hu(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvabsd_hu(_1, _2); } +-// CHECK-LABEL: @xvabsd_wu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvabsd.wu(<8 x i32> [[_1]], <8 x i32> [[_2]]) +-// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8u32 xvabsd_wu(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvabsd_wu(_1, _2); } +-// CHECK-LABEL: @xvabsd_du( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvabsd.du(<4 x i64> [[_1]], <4 x i64> [[_2]]) +-// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4u64 xvabsd_du(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvabsd_du(_1, _2); } +-// CHECK-LABEL: @xvmul_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmul.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) +-// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v32i8 xvmul_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvmul_b(_1, _2); } +-// CHECK-LABEL: @xvmul_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmul.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) +-// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v16i16 xvmul_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvmul_h(_1, _2); } +-// CHECK-LABEL: @xvmul_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmul.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) +-// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8i32 xvmul_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvmul_w(_1, _2); } +-// CHECK-LABEL: @xvmul_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmul.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +-// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvmul_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvmul_d(_1, _2); } +-// CHECK-LABEL: @xvmadd_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_3:%.*]] = load <32 x i8>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP3:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmadd.b(<32 x i8> [[_1]], <32 x i8> [[_2]], <32 x i8> [[_3]]) +-// CHECK-NEXT: store <32 x i8> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v32i8 xvmadd_b(v32i8 _1, v32i8 _2, v32i8 _3) { return __builtin_lasx_xvmadd_b(_1, _2, _3); } +-// CHECK-LABEL: @xvmadd_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_3:%.*]] = load <16 x i16>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP3:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmadd.h(<16 x i16> [[_1]], <16 x i16> [[_2]], <16 x i16> [[_3]]) +-// CHECK-NEXT: store <16 x i16> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v16i16 xvmadd_h(v16i16 _1, v16i16 _2, v16i16 _3) { return __builtin_lasx_xvmadd_h(_1, _2, _3); } +-// CHECK-LABEL: @xvmadd_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_3:%.*]] = load <8 x i32>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmadd.w(<8 x i32> [[_1]], <8 x i32> [[_2]], <8 x i32> [[_3]]) +-// CHECK-NEXT: store <8 x i32> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8i32 xvmadd_w(v8i32 _1, v8i32 _2, v8i32 _3) { return __builtin_lasx_xvmadd_w(_1, _2, _3); } +-// CHECK-LABEL: @xvmadd_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_3:%.*]] = load <4 x i64>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmadd.d(<4 x i64> [[_1]], <4 x i64> [[_2]], <4 x i64> [[_3]]) +-// CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvmadd_d(v4i64 _1, v4i64 _2, v4i64 _3) { return __builtin_lasx_xvmadd_d(_1, _2, _3); } +-// CHECK-LABEL: @xvmsub_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_3:%.*]] = load <32 x i8>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP3:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmsub.b(<32 x i8> [[_1]], <32 x i8> [[_2]], <32 x i8> [[_3]]) +-// CHECK-NEXT: store <32 x i8> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v32i8 xvmsub_b(v32i8 _1, v32i8 _2, v32i8 _3) { return __builtin_lasx_xvmsub_b(_1, _2, _3); } +-// CHECK-LABEL: @xvmsub_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_3:%.*]] = load <16 x i16>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP3:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmsub.h(<16 x i16> [[_1]], <16 x i16> [[_2]], <16 x i16> [[_3]]) +-// CHECK-NEXT: store <16 x i16> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v16i16 xvmsub_h(v16i16 _1, v16i16 _2, v16i16 _3) { return __builtin_lasx_xvmsub_h(_1, _2, _3); } +-// CHECK-LABEL: @xvmsub_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_3:%.*]] = load <8 x i32>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmsub.w(<8 x i32> [[_1]], <8 x i32> [[_2]], <8 x i32> [[_3]]) +-// CHECK-NEXT: store <8 x i32> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8i32 xvmsub_w(v8i32 _1, v8i32 _2, v8i32 _3) { return __builtin_lasx_xvmsub_w(_1, _2, _3); } +-// CHECK-LABEL: @xvmsub_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_3:%.*]] = load <4 x i64>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmsub.d(<4 x i64> [[_1]], <4 x i64> [[_2]], <4 x i64> [[_3]]) +-// CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvmsub_d(v4i64 _1, v4i64 _2, v4i64 _3) { return __builtin_lasx_xvmsub_d(_1, _2, _3); } +-// CHECK-LABEL: @xvdiv_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvdiv.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) +-// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v32i8 xvdiv_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvdiv_b(_1, _2); } +-// CHECK-LABEL: @xvdiv_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvdiv.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) +-// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v16i16 xvdiv_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvdiv_h(_1, _2); } +-// CHECK-LABEL: @xvdiv_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvdiv.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) +-// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8i32 xvdiv_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvdiv_w(_1, _2); } +-// CHECK-LABEL: @xvdiv_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvdiv.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +-// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvdiv_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvdiv_d(_1, _2); } +-// CHECK-LABEL: @xvdiv_bu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvdiv.bu(<32 x i8> [[_1]], <32 x i8> [[_2]]) +-// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v32u8 xvdiv_bu(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvdiv_bu(_1, _2); } +-// CHECK-LABEL: @xvdiv_hu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvdiv.hu(<16 x i16> [[_1]], <16 x i16> [[_2]]) +-// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v16u16 xvdiv_hu(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvdiv_hu(_1, _2); } +-// CHECK-LABEL: @xvdiv_wu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvdiv.wu(<8 x i32> [[_1]], <8 x i32> [[_2]]) +-// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8u32 xvdiv_wu(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvdiv_wu(_1, _2); } +-// CHECK-LABEL: @xvdiv_du( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvdiv.du(<4 x i64> [[_1]], <4 x i64> [[_2]]) +-// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4u64 xvdiv_du(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvdiv_du(_1, _2); } +-// CHECK-LABEL: @xvhaddw_h_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvhaddw.h.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) +-// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v16i16 xvhaddw_h_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvhaddw_h_b(_1, _2); } +-// CHECK-LABEL: @xvhaddw_w_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvhaddw.w.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) +-// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8i32 xvhaddw_w_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvhaddw_w_h(_1, _2); } +-// CHECK-LABEL: @xvhaddw_d_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvhaddw.d.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) +-// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvhaddw_d_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvhaddw_d_w(_1, _2); } +-// CHECK-LABEL: @xvhaddw_hu_bu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvhaddw.hu.bu(<32 x i8> [[_1]], <32 x i8> [[_2]]) +-// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v16u16 xvhaddw_hu_bu(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvhaddw_hu_bu(_1, _2); } +-// CHECK-LABEL: @xvhaddw_wu_hu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvhaddw.wu.hu(<16 x i16> [[_1]], <16 x i16> [[_2]]) +-// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8u32 xvhaddw_wu_hu(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvhaddw_wu_hu(_1, _2); } +-// CHECK-LABEL: @xvhaddw_du_wu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvhaddw.du.wu(<8 x i32> [[_1]], <8 x i32> [[_2]]) +-// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4u64 xvhaddw_du_wu(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvhaddw_du_wu(_1, _2); } +-// CHECK-LABEL: @xvhsubw_h_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvhsubw.h.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) +-// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v16i16 xvhsubw_h_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvhsubw_h_b(_1, _2); } +-// CHECK-LABEL: @xvhsubw_w_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvhsubw.w.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) +-// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8i32 xvhsubw_w_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvhsubw_w_h(_1, _2); } +-// CHECK-LABEL: @xvhsubw_d_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvhsubw.d.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) +-// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvhsubw_d_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvhsubw_d_w(_1, _2); } +-// CHECK-LABEL: @xvhsubw_hu_bu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvhsubw.hu.bu(<32 x i8> [[_1]], <32 x i8> [[_2]]) +-// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v16i16 xvhsubw_hu_bu(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvhsubw_hu_bu(_1, _2); } +-// CHECK-LABEL: @xvhsubw_wu_hu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvhsubw.wu.hu(<16 x i16> [[_1]], <16 x i16> [[_2]]) +-// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8i32 xvhsubw_wu_hu(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvhsubw_wu_hu(_1, _2); } +-// CHECK-LABEL: @xvhsubw_du_wu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvhsubw.du.wu(<8 x i32> [[_1]], <8 x i32> [[_2]]) +-// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvhsubw_du_wu(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvhsubw_du_wu(_1, _2); } +-// CHECK-LABEL: @xvmod_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmod.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) +-// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v32i8 xvmod_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvmod_b(_1, _2); } +-// CHECK-LABEL: @xvmod_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmod.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) +-// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v16i16 xvmod_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvmod_h(_1, _2); } +-// CHECK-LABEL: @xvmod_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmod.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) +-// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8i32 xvmod_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvmod_w(_1, _2); } +-// CHECK-LABEL: @xvmod_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmod.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +-// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvmod_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvmod_d(_1, _2); } +-// CHECK-LABEL: @xvmod_bu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmod.bu(<32 x i8> [[_1]], <32 x i8> [[_2]]) +-// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v32u8 xvmod_bu(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvmod_bu(_1, _2); } +-// CHECK-LABEL: @xvmod_hu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmod.hu(<16 x i16> [[_1]], <16 x i16> [[_2]]) +-// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v16u16 xvmod_hu(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvmod_hu(_1, _2); } +-// CHECK-LABEL: @xvmod_wu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmod.wu(<8 x i32> [[_1]], <8 x i32> [[_2]]) +-// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8u32 xvmod_wu(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvmod_wu(_1, _2); } +-// CHECK-LABEL: @xvmod_du( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmod.du(<4 x i64> [[_1]], <4 x i64> [[_2]]) +-// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4u64 xvmod_du(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvmod_du(_1, _2); } +-// CHECK-LABEL: @xvrepl128vei_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvrepl128vei.b(<32 x i8> [[_1]], i32 1) +-// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v32i8 xvrepl128vei_b(v32i8 _1) { return __builtin_lasx_xvrepl128vei_b(_1, 1); } +-// CHECK-LABEL: @xvrepl128vei_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvrepl128vei.h(<16 x i16> [[_1]], i32 1) +-// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v16i16 xvrepl128vei_h(v16i16 _1) { return __builtin_lasx_xvrepl128vei_h(_1, 1); } +-// CHECK-LABEL: @xvrepl128vei_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvrepl128vei.w(<8 x i32> [[_1]], i32 1) +-// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8i32 xvrepl128vei_w(v8i32 _1) { return __builtin_lasx_xvrepl128vei_w(_1, 1); } +-// CHECK-LABEL: @xvrepl128vei_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvrepl128vei.d(<4 x i64> [[_1]], i32 1) +-// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvrepl128vei_d(v4i64 _1) { return __builtin_lasx_xvrepl128vei_d(_1, 1); } +-// CHECK-LABEL: @xvpickev_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvpickev.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) +-// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v32i8 xvpickev_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvpickev_b(_1, _2); } +-// CHECK-LABEL: @xvpickev_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvpickev.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) +-// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v16i16 xvpickev_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvpickev_h(_1, _2); } +-// CHECK-LABEL: @xvpickev_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvpickev.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) +-// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8i32 xvpickev_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvpickev_w(_1, _2); } +-// CHECK-LABEL: @xvpickev_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvpickev.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +-// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvpickev_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvpickev_d(_1, _2); } +-// CHECK-LABEL: @xvpickod_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvpickod.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) +-// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v32i8 xvpickod_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvpickod_b(_1, _2); } +-// CHECK-LABEL: @xvpickod_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvpickod.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) +-// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v16i16 xvpickod_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvpickod_h(_1, _2); } +-// CHECK-LABEL: @xvpickod_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvpickod.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) +-// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8i32 xvpickod_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvpickod_w(_1, _2); } +-// CHECK-LABEL: @xvpickod_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvpickod.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +-// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvpickod_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvpickod_d(_1, _2); } +-// CHECK-LABEL: @xvilvh_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvilvh.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) +-// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v32i8 xvilvh_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvilvh_b(_1, _2); } +-// CHECK-LABEL: @xvilvh_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvilvh.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) +-// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v16i16 xvilvh_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvilvh_h(_1, _2); } +-// CHECK-LABEL: @xvilvh_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvilvh.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) +-// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8i32 xvilvh_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvilvh_w(_1, _2); } +-// CHECK-LABEL: @xvilvh_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvilvh.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +-// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvilvh_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvilvh_d(_1, _2); } +-// CHECK-LABEL: @xvilvl_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvilvl.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) +-// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v32i8 xvilvl_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvilvl_b(_1, _2); } +-// CHECK-LABEL: @xvilvl_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvilvl.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) +-// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v16i16 xvilvl_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvilvl_h(_1, _2); } +-// CHECK-LABEL: @xvilvl_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvilvl.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) +-// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8i32 xvilvl_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvilvl_w(_1, _2); } +-// CHECK-LABEL: @xvilvl_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvilvl.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +-// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvilvl_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvilvl_d(_1, _2); } +-// CHECK-LABEL: @xvpackev_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvpackev.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) +-// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v32i8 xvpackev_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvpackev_b(_1, _2); } +-// CHECK-LABEL: @xvpackev_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvpackev.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) +-// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v16i16 xvpackev_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvpackev_h(_1, _2); } +-// CHECK-LABEL: @xvpackev_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvpackev.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) +-// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8i32 xvpackev_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvpackev_w(_1, _2); } +-// CHECK-LABEL: @xvpackev_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvpackev.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +-// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvpackev_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvpackev_d(_1, _2); } +-// CHECK-LABEL: @xvpackod_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvpackod.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) +-// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v32i8 xvpackod_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvpackod_b(_1, _2); } +-// CHECK-LABEL: @xvpackod_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvpackod.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) +-// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v16i16 xvpackod_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvpackod_h(_1, _2); } +-// CHECK-LABEL: @xvpackod_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvpackod.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) +-// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8i32 xvpackod_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvpackod_w(_1, _2); } +-// CHECK-LABEL: @xvpackod_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvpackod.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +-// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvpackod_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvpackod_d(_1, _2); } +-// CHECK-LABEL: @xvshuf_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_3:%.*]] = load <32 x i8>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP3:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvshuf.b(<32 x i8> [[_1]], <32 x i8> [[_2]], <32 x i8> [[_3]]) +-// CHECK-NEXT: store <32 x i8> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v32i8 xvshuf_b(v32i8 _1, v32i8 _2, v32i8 _3) { return __builtin_lasx_xvshuf_b(_1, _2, _3); } +-// CHECK-LABEL: @xvshuf_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_3:%.*]] = load <16 x i16>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP3:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvshuf.h(<16 x i16> [[_1]], <16 x i16> [[_2]], <16 x i16> [[_3]]) +-// CHECK-NEXT: store <16 x i16> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v16i16 xvshuf_h(v16i16 _1, v16i16 _2, v16i16 _3) { return __builtin_lasx_xvshuf_h(_1, _2, _3); } +-// CHECK-LABEL: @xvshuf_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_3:%.*]] = load <8 x i32>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvshuf.w(<8 x i32> [[_1]], <8 x i32> [[_2]], <8 x i32> [[_3]]) +-// CHECK-NEXT: store <8 x i32> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8i32 xvshuf_w(v8i32 _1, v8i32 _2, v8i32 _3) { return __builtin_lasx_xvshuf_w(_1, _2, _3); } +-// CHECK-LABEL: @xvshuf_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_3:%.*]] = load <4 x i64>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvshuf.d(<4 x i64> [[_1]], <4 x i64> [[_2]], <4 x i64> [[_3]]) +-// CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvshuf_d(v4i64 _1, v4i64 _2, v4i64 _3) { return __builtin_lasx_xvshuf_d(_1, _2, _3); } +-// CHECK-LABEL: @xvand_v( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvand.v(<32 x i8> [[_1]], <32 x i8> [[_2]]) +-// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v32u8 xvand_v(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvand_v(_1, _2); } +-// CHECK-LABEL: @xvandi_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvandi.b(<32 x i8> [[_1]], i32 1) +-// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v32u8 xvandi_b(v32u8 _1) { return __builtin_lasx_xvandi_b(_1, 1); } +-// CHECK-LABEL: @xvor_v( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvor.v(<32 x i8> [[_1]], <32 x i8> [[_2]]) +-// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v32u8 xvor_v(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvor_v(_1, _2); } +-// CHECK-LABEL: @xvori_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvori.b(<32 x i8> [[_1]], i32 1) +-// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v32u8 xvori_b(v32u8 _1) { return __builtin_lasx_xvori_b(_1, 1); } +-// CHECK-LABEL: @xvnor_v( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvnor.v(<32 x i8> [[_1]], <32 x i8> [[_2]]) +-// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v32u8 xvnor_v(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvnor_v(_1, _2); } +-// CHECK-LABEL: @xvnori_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvnori.b(<32 x i8> [[_1]], i32 1) +-// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v32u8 xvnori_b(v32u8 _1) { return __builtin_lasx_xvnori_b(_1, 1); } +-// CHECK-LABEL: @xvxor_v( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvxor.v(<32 x i8> [[_1]], <32 x i8> [[_2]]) +-// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v32u8 xvxor_v(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvxor_v(_1, _2); } +-// CHECK-LABEL: @xvxori_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvxori.b(<32 x i8> [[_1]], i32 1) +-// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v32u8 xvxori_b(v32u8 _1) { return __builtin_lasx_xvxori_b(_1, 1); } +-// CHECK-LABEL: @xvbitsel_v( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_3:%.*]] = load <32 x i8>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP3:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbitsel.v(<32 x i8> [[_1]], <32 x i8> [[_2]], <32 x i8> [[_3]]) +-// CHECK-NEXT: store <32 x i8> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v32u8 xvbitsel_v(v32u8 _1, v32u8 _2, v32u8 _3) { return __builtin_lasx_xvbitsel_v(_1, _2, _3); } +-// CHECK-LABEL: @xvbitseli_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbitseli.b(<32 x i8> [[_1]], <32 x i8> [[_2]], i32 1) +-// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v32u8 xvbitseli_b(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvbitseli_b(_1, _2, 1); } +-// CHECK-LABEL: @xvshuf4i_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvshuf4i.b(<32 x i8> [[_1]], i32 1) +-// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v32i8 xvshuf4i_b(v32i8 _1) { return __builtin_lasx_xvshuf4i_b(_1, 1); } +-// CHECK-LABEL: @xvshuf4i_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvshuf4i.h(<16 x i16> [[_1]], i32 1) +-// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v16i16 xvshuf4i_h(v16i16 _1) { return __builtin_lasx_xvshuf4i_h(_1, 1); } +-// CHECK-LABEL: @xvshuf4i_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvshuf4i.w(<8 x i32> [[_1]], i32 1) +-// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8i32 xvshuf4i_w(v8i32 _1) { return __builtin_lasx_xvshuf4i_w(_1, 1); } +-// CHECK-LABEL: @xvreplgr2vr_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvreplgr2vr.b(i32 [[_1:%.*]]) +-// CHECK-NEXT: store <32 x i8> [[TMP0]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v32i8 xvreplgr2vr_b(int _1) { return __builtin_lasx_xvreplgr2vr_b(_1); } +-// CHECK-LABEL: @xvreplgr2vr_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvreplgr2vr.h(i32 [[_1:%.*]]) +-// CHECK-NEXT: store <16 x i16> [[TMP0]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v16i16 xvreplgr2vr_h(int _1) { return __builtin_lasx_xvreplgr2vr_h(_1); } +-// CHECK-LABEL: @xvreplgr2vr_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvreplgr2vr.w(i32 [[_1:%.*]]) +-// CHECK-NEXT: store <8 x i32> [[TMP0]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8i32 xvreplgr2vr_w(int _1) { return __builtin_lasx_xvreplgr2vr_w(_1); } +-// CHECK-LABEL: @xvreplgr2vr_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[CONV:%.*]] = sext i32 [[_1:%.*]] to i64 +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvreplgr2vr.d(i64 [[CONV]]) +-// CHECK-NEXT: store <4 x i64> [[TMP0]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvreplgr2vr_d(int _1) { return __builtin_lasx_xvreplgr2vr_d(_1); } +-// CHECK-LABEL: @xvpcnt_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvpcnt.b(<32 x i8> [[_1]]) +-// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v32i8 xvpcnt_b(v32i8 _1) { return __builtin_lasx_xvpcnt_b(_1); } +-// CHECK-LABEL: @xvpcnt_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvpcnt.h(<16 x i16> [[_1]]) +-// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v16i16 xvpcnt_h(v16i16 _1) { return __builtin_lasx_xvpcnt_h(_1); } +-// CHECK-LABEL: @xvpcnt_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvpcnt.w(<8 x i32> [[_1]]) +-// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8i32 xvpcnt_w(v8i32 _1) { return __builtin_lasx_xvpcnt_w(_1); } +-// CHECK-LABEL: @xvpcnt_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvpcnt.d(<4 x i64> [[_1]]) +-// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvpcnt_d(v4i64 _1) { return __builtin_lasx_xvpcnt_d(_1); } +-// CHECK-LABEL: @xvclo_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvclo.b(<32 x i8> [[_1]]) +-// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v32i8 xvclo_b(v32i8 _1) { return __builtin_lasx_xvclo_b(_1); } +-// CHECK-LABEL: @xvclo_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvclo.h(<16 x i16> [[_1]]) +-// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v16i16 xvclo_h(v16i16 _1) { return __builtin_lasx_xvclo_h(_1); } +-// CHECK-LABEL: @xvclo_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvclo.w(<8 x i32> [[_1]]) +-// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8i32 xvclo_w(v8i32 _1) { return __builtin_lasx_xvclo_w(_1); } +-// CHECK-LABEL: @xvclo_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvclo.d(<4 x i64> [[_1]]) +-// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvclo_d(v4i64 _1) { return __builtin_lasx_xvclo_d(_1); } +-// CHECK-LABEL: @xvclz_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvclz.b(<32 x i8> [[_1]]) +-// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v32i8 xvclz_b(v32i8 _1) { return __builtin_lasx_xvclz_b(_1); } +-// CHECK-LABEL: @xvclz_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvclz.h(<16 x i16> [[_1]]) +-// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v16i16 xvclz_h(v16i16 _1) { return __builtin_lasx_xvclz_h(_1); } +-// CHECK-LABEL: @xvclz_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvclz.w(<8 x i32> [[_1]]) +-// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8i32 xvclz_w(v8i32 _1) { return __builtin_lasx_xvclz_w(_1); } +-// CHECK-LABEL: @xvclz_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvclz.d(<4 x i64> [[_1]]) +-// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvclz_d(v4i64 _1) { return __builtin_lasx_xvclz_d(_1); } +-// CHECK-LABEL: @xvfadd_s( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfadd.s(<8 x float> [[_1]], <8 x float> [[_2]]) +-// CHECK-NEXT: store <8 x float> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8f32 xvfadd_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfadd_s(_1, _2); } +-// CHECK-LABEL: @xvfadd_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfadd.d(<4 x double> [[_1]], <4 x double> [[_2]]) +-// CHECK-NEXT: store <4 x double> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4f64 xvfadd_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfadd_d(_1, _2); } +-// CHECK-LABEL: @xvfsub_s( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfsub.s(<8 x float> [[_1]], <8 x float> [[_2]]) +-// CHECK-NEXT: store <8 x float> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8f32 xvfsub_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfsub_s(_1, _2); } +-// CHECK-LABEL: @xvfsub_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfsub.d(<4 x double> [[_1]], <4 x double> [[_2]]) +-// CHECK-NEXT: store <4 x double> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4f64 xvfsub_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfsub_d(_1, _2); } +-// CHECK-LABEL: @xvfmul_s( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfmul.s(<8 x float> [[_1]], <8 x float> [[_2]]) +-// CHECK-NEXT: store <8 x float> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8f32 xvfmul_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfmul_s(_1, _2); } +-// CHECK-LABEL: @xvfmul_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfmul.d(<4 x double> [[_1]], <4 x double> [[_2]]) +-// CHECK-NEXT: store <4 x double> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4f64 xvfmul_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfmul_d(_1, _2); } +-// CHECK-LABEL: @xvfdiv_s( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfdiv.s(<8 x float> [[_1]], <8 x float> [[_2]]) +-// CHECK-NEXT: store <8 x float> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8f32 xvfdiv_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfdiv_s(_1, _2); } +-// CHECK-LABEL: @xvfdiv_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfdiv.d(<4 x double> [[_1]], <4 x double> [[_2]]) +-// CHECK-NEXT: store <4 x double> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4f64 xvfdiv_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfdiv_d(_1, _2); } +-// CHECK-LABEL: @xvfcvt_h_s( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvfcvt.h.s(<8 x float> [[_1]], <8 x float> [[_2]]) +-// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v16i16 xvfcvt_h_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcvt_h_s(_1, _2); } +-// CHECK-LABEL: @xvfcvt_s_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfcvt.s.d(<4 x double> [[_1]], <4 x double> [[_2]]) +-// CHECK-NEXT: store <8 x float> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8f32 xvfcvt_s_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcvt_s_d(_1, _2); } +-// CHECK-LABEL: @xvfmin_s( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfmin.s(<8 x float> [[_1]], <8 x float> [[_2]]) +-// CHECK-NEXT: store <8 x float> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8f32 xvfmin_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfmin_s(_1, _2); } +-// CHECK-LABEL: @xvfmin_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfmin.d(<4 x double> [[_1]], <4 x double> [[_2]]) +-// CHECK-NEXT: store <4 x double> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4f64 xvfmin_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfmin_d(_1, _2); } +-// CHECK-LABEL: @xvfmina_s( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfmina.s(<8 x float> [[_1]], <8 x float> [[_2]]) +-// CHECK-NEXT: store <8 x float> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8f32 xvfmina_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfmina_s(_1, _2); } +-// CHECK-LABEL: @xvfmina_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfmina.d(<4 x double> [[_1]], <4 x double> [[_2]]) +-// CHECK-NEXT: store <4 x double> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4f64 xvfmina_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfmina_d(_1, _2); } +-// CHECK-LABEL: @xvfmax_s( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfmax.s(<8 x float> [[_1]], <8 x float> [[_2]]) +-// CHECK-NEXT: store <8 x float> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8f32 xvfmax_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfmax_s(_1, _2); } +-// CHECK-LABEL: @xvfmax_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfmax.d(<4 x double> [[_1]], <4 x double> [[_2]]) +-// CHECK-NEXT: store <4 x double> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4f64 xvfmax_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfmax_d(_1, _2); } +-// CHECK-LABEL: @xvfmaxa_s( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfmaxa.s(<8 x float> [[_1]], <8 x float> [[_2]]) +-// CHECK-NEXT: store <8 x float> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8f32 xvfmaxa_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfmaxa_s(_1, _2); } +-// CHECK-LABEL: @xvfmaxa_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfmaxa.d(<4 x double> [[_1]], <4 x double> [[_2]]) +-// CHECK-NEXT: store <4 x double> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4f64 xvfmaxa_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfmaxa_d(_1, _2); } +-// CHECK-LABEL: @xvfclass_s( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfclass.s(<8 x float> [[_1]]) +-// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8i32 xvfclass_s(v8f32 _1) { return __builtin_lasx_xvfclass_s(_1); } +-// CHECK-LABEL: @xvfclass_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfclass.d(<4 x double> [[_1]]) +-// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvfclass_d(v4f64 _1) { return __builtin_lasx_xvfclass_d(_1); } +-// CHECK-LABEL: @xvfsqrt_s( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfsqrt.s(<8 x float> [[_1]]) +-// CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8f32 xvfsqrt_s(v8f32 _1) { return __builtin_lasx_xvfsqrt_s(_1); } +-// CHECK-LABEL: @xvfsqrt_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfsqrt.d(<4 x double> [[_1]]) +-// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4f64 xvfsqrt_d(v4f64 _1) { return __builtin_lasx_xvfsqrt_d(_1); } +-// CHECK-LABEL: @xvfrecip_s( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfrecip.s(<8 x float> [[_1]]) +-// CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8f32 xvfrecip_s(v8f32 _1) { return __builtin_lasx_xvfrecip_s(_1); } +-// CHECK-LABEL: @xvfrecip_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfrecip.d(<4 x double> [[_1]]) +-// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4f64 xvfrecip_d(v4f64 _1) { return __builtin_lasx_xvfrecip_d(_1); } +-// CHECK-LABEL: @xvfrint_s( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfrint.s(<8 x float> [[_1]]) +-// CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8f32 xvfrint_s(v8f32 _1) { return __builtin_lasx_xvfrint_s(_1); } +-// CHECK-LABEL: @xvfrint_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfrint.d(<4 x double> [[_1]]) +-// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4f64 xvfrint_d(v4f64 _1) { return __builtin_lasx_xvfrint_d(_1); } +-// CHECK-LABEL: @xvfrsqrt_s( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfrsqrt.s(<8 x float> [[_1]]) +-// CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8f32 xvfrsqrt_s(v8f32 _1) { return __builtin_lasx_xvfrsqrt_s(_1); } +-// CHECK-LABEL: @xvfrsqrt_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfrsqrt.d(<4 x double> [[_1]]) +-// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4f64 xvfrsqrt_d(v4f64 _1) { return __builtin_lasx_xvfrsqrt_d(_1); } +-// CHECK-LABEL: @xvflogb_s( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvflogb.s(<8 x float> [[_1]]) +-// CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8f32 xvflogb_s(v8f32 _1) { return __builtin_lasx_xvflogb_s(_1); } +-// CHECK-LABEL: @xvflogb_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvflogb.d(<4 x double> [[_1]]) +-// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4f64 xvflogb_d(v4f64 _1) { return __builtin_lasx_xvflogb_d(_1); } +-// CHECK-LABEL: @xvfcvth_s_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfcvth.s.h(<16 x i16> [[_1]]) +-// CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8f32 xvfcvth_s_h(v16i16 _1) { return __builtin_lasx_xvfcvth_s_h(_1); } +-// CHECK-LABEL: @xvfcvth_d_s( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfcvth.d.s(<8 x float> [[_1]]) +-// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4f64 xvfcvth_d_s(v8f32 _1) { return __builtin_lasx_xvfcvth_d_s(_1); } +-// CHECK-LABEL: @xvfcvtl_s_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfcvtl.s.h(<16 x i16> [[_1]]) +-// CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8f32 xvfcvtl_s_h(v16i16 _1) { return __builtin_lasx_xvfcvtl_s_h(_1); } +-// CHECK-LABEL: @xvfcvtl_d_s( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfcvtl.d.s(<8 x float> [[_1]]) +-// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4f64 xvfcvtl_d_s(v8f32 _1) { return __builtin_lasx_xvfcvtl_d_s(_1); } +-// CHECK-LABEL: @xvftint_w_s( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftint.w.s(<8 x float> [[_1]]) +-// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8i32 xvftint_w_s(v8f32 _1) { return __builtin_lasx_xvftint_w_s(_1); } +-// CHECK-LABEL: @xvftint_l_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftint.l.d(<4 x double> [[_1]]) +-// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvftint_l_d(v4f64 _1) { return __builtin_lasx_xvftint_l_d(_1); } +-// CHECK-LABEL: @xvftint_wu_s( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftint.wu.s(<8 x float> [[_1]]) +-// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8u32 xvftint_wu_s(v8f32 _1) { return __builtin_lasx_xvftint_wu_s(_1); } +-// CHECK-LABEL: @xvftint_lu_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftint.lu.d(<4 x double> [[_1]]) +-// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4u64 xvftint_lu_d(v4f64 _1) { return __builtin_lasx_xvftint_lu_d(_1); } +-// CHECK-LABEL: @xvftintrz_w_s( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrz.w.s(<8 x float> [[_1]]) +-// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8i32 xvftintrz_w_s(v8f32 _1) { return __builtin_lasx_xvftintrz_w_s(_1); } +-// CHECK-LABEL: @xvftintrz_l_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrz.l.d(<4 x double> [[_1]]) +-// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvftintrz_l_d(v4f64 _1) { return __builtin_lasx_xvftintrz_l_d(_1); } +-// CHECK-LABEL: @xvftintrz_wu_s( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrz.wu.s(<8 x float> [[_1]]) +-// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8u32 xvftintrz_wu_s(v8f32 _1) { return __builtin_lasx_xvftintrz_wu_s(_1); } +-// CHECK-LABEL: @xvftintrz_lu_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrz.lu.d(<4 x double> [[_1]]) +-// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4u64 xvftintrz_lu_d(v4f64 _1) { return __builtin_lasx_xvftintrz_lu_d(_1); } +-// CHECK-LABEL: @xvffint_s_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvffint.s.w(<8 x i32> [[_1]]) +-// CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8f32 xvffint_s_w(v8i32 _1) { return __builtin_lasx_xvffint_s_w(_1); } +-// CHECK-LABEL: @xvffint_d_l( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvffint.d.l(<4 x i64> [[_1]]) +-// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4f64 xvffint_d_l(v4i64 _1) { return __builtin_lasx_xvffint_d_l(_1); } +-// CHECK-LABEL: @xvffint_s_wu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvffint.s.wu(<8 x i32> [[_1]]) +-// CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8f32 xvffint_s_wu(v8u32 _1) { return __builtin_lasx_xvffint_s_wu(_1); } +-// CHECK-LABEL: @xvffint_d_lu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvffint.d.lu(<4 x i64> [[_1]]) +-// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4f64 xvffint_d_lu(v4u64 _1) { return __builtin_lasx_xvffint_d_lu(_1); } +-// CHECK-LABEL: @xvreplve_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvreplve.b(<32 x i8> [[_1]], i32 [[_2:%.*]]) +-// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v32i8 xvreplve_b(v32i8 _1, int _2) { return __builtin_lasx_xvreplve_b(_1, _2); } +-// CHECK-LABEL: @xvreplve_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvreplve.h(<16 x i16> [[_1]], i32 [[_2:%.*]]) +-// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v16i16 xvreplve_h(v16i16 _1, int _2) { return __builtin_lasx_xvreplve_h(_1, _2); } +-// CHECK-LABEL: @xvreplve_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvreplve.w(<8 x i32> [[_1]], i32 [[_2:%.*]]) +-// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8i32 xvreplve_w(v8i32 _1, int _2) { return __builtin_lasx_xvreplve_w(_1, _2); } +-// CHECK-LABEL: @xvreplve_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvreplve.d(<4 x i64> [[_1]], i32 [[_2:%.*]]) +-// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvreplve_d(v4i64 _1, int _2) { return __builtin_lasx_xvreplve_d(_1, _2); } +-// CHECK-LABEL: @xvpermi_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvpermi.w(<8 x i32> [[_1]], <8 x i32> [[_2]], i32 1) +-// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8i32 xvpermi_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvpermi_w(_1, _2, 1); } +-// CHECK-LABEL: @xvandn_v( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvandn.v(<32 x i8> [[_1]], <32 x i8> [[_2]]) +-// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v32u8 xvandn_v(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvandn_v(_1, _2); } +-// CHECK-LABEL: @xvneg_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvneg.b(<32 x i8> [[_1]]) +-// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v32i8 xvneg_b(v32i8 _1) { return __builtin_lasx_xvneg_b(_1); } +-// CHECK-LABEL: @xvneg_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvneg.h(<16 x i16> [[_1]]) +-// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v16i16 xvneg_h(v16i16 _1) { return __builtin_lasx_xvneg_h(_1); } +-// CHECK-LABEL: @xvneg_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvneg.w(<8 x i32> [[_1]]) +-// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8i32 xvneg_w(v8i32 _1) { return __builtin_lasx_xvneg_w(_1); } +-// CHECK-LABEL: @xvneg_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvneg.d(<4 x i64> [[_1]]) +-// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvneg_d(v4i64 _1) { return __builtin_lasx_xvneg_d(_1); } +-// CHECK-LABEL: @xvmuh_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmuh.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) +-// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v32i8 xvmuh_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvmuh_b(_1, _2); } +-// CHECK-LABEL: @xvmuh_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmuh.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) +-// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v16i16 xvmuh_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvmuh_h(_1, _2); } +-// CHECK-LABEL: @xvmuh_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmuh.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) +-// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8i32 xvmuh_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvmuh_w(_1, _2); } +-// CHECK-LABEL: @xvmuh_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmuh.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +-// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvmuh_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvmuh_d(_1, _2); } +-// CHECK-LABEL: @xvmuh_bu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmuh.bu(<32 x i8> [[_1]], <32 x i8> [[_2]]) +-// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v32u8 xvmuh_bu(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvmuh_bu(_1, _2); } +-// CHECK-LABEL: @xvmuh_hu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmuh.hu(<16 x i16> [[_1]], <16 x i16> [[_2]]) +-// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v16u16 xvmuh_hu(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvmuh_hu(_1, _2); } +-// CHECK-LABEL: @xvmuh_wu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmuh.wu(<8 x i32> [[_1]], <8 x i32> [[_2]]) +-// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8u32 xvmuh_wu(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvmuh_wu(_1, _2); } +-// CHECK-LABEL: @xvmuh_du( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmuh.du(<4 x i64> [[_1]], <4 x i64> [[_2]]) +-// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4u64 xvmuh_du(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvmuh_du(_1, _2); } +-// CHECK-LABEL: @xvsllwil_h_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsllwil.h.b(<32 x i8> [[_1]], i32 1) +-// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v16i16 xvsllwil_h_b(v32i8 _1) { return __builtin_lasx_xvsllwil_h_b(_1, 1); } +-// CHECK-LABEL: @xvsllwil_w_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsllwil.w.h(<16 x i16> [[_1]], i32 1) +-// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8i32 xvsllwil_w_h(v16i16 _1) { return __builtin_lasx_xvsllwil_w_h(_1, 1); } +-// CHECK-LABEL: @xvsllwil_d_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsllwil.d.w(<8 x i32> [[_1]], i32 1) +-// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvsllwil_d_w(v8i32 _1) { return __builtin_lasx_xvsllwil_d_w(_1, 1); } +-// CHECK-LABEL: @xvsllwil_hu_bu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsllwil.hu.bu(<32 x i8> [[_1]], i32 1) +-// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v16u16 xvsllwil_hu_bu(v32u8 _1) { return __builtin_lasx_xvsllwil_hu_bu(_1, 1); } +-// CHECK-LABEL: @xvsllwil_wu_hu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsllwil.wu.hu(<16 x i16> [[_1]], i32 1) +-// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8u32 xvsllwil_wu_hu(v16u16 _1) { return __builtin_lasx_xvsllwil_wu_hu(_1, 1); } +-// CHECK-LABEL: @xvsllwil_du_wu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsllwil.du.wu(<8 x i32> [[_1]], i32 1) +-// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4u64 xvsllwil_du_wu(v8u32 _1) { return __builtin_lasx_xvsllwil_du_wu(_1, 1); } +-// CHECK-LABEL: @xvsran_b_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsran.b.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) +-// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v32i8 xvsran_b_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvsran_b_h(_1, _2); } +-// CHECK-LABEL: @xvsran_h_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsran.h.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) +-// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v16i16 xvsran_h_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvsran_h_w(_1, _2); } +-// CHECK-LABEL: @xvsran_w_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsran.w.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +-// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8i32 xvsran_w_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvsran_w_d(_1, _2); } +-// CHECK-LABEL: @xvssran_b_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssran.b.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) +-// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v32i8 xvssran_b_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvssran_b_h(_1, _2); } +-// CHECK-LABEL: @xvssran_h_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssran.h.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) +-// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v16i16 xvssran_h_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvssran_h_w(_1, _2); } +-// CHECK-LABEL: @xvssran_w_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssran.w.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +-// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8i32 xvssran_w_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvssran_w_d(_1, _2); } +-// CHECK-LABEL: @xvssran_bu_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssran.bu.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) +-// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v32u8 xvssran_bu_h(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvssran_bu_h(_1, _2); } +-// CHECK-LABEL: @xvssran_hu_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssran.hu.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) +-// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v16u16 xvssran_hu_w(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvssran_hu_w(_1, _2); } +-// CHECK-LABEL: @xvssran_wu_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssran.wu.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +-// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8u32 xvssran_wu_d(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvssran_wu_d(_1, _2); } +-// CHECK-LABEL: @xvsrarn_b_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrarn.b.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) +-// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v32i8 xvsrarn_b_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvsrarn_b_h(_1, _2); } +-// CHECK-LABEL: @xvsrarn_h_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrarn.h.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) +-// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v16i16 xvsrarn_h_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvsrarn_h_w(_1, _2); } +-// CHECK-LABEL: @xvsrarn_w_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrarn.w.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +-// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8i32 xvsrarn_w_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvsrarn_w_d(_1, _2); } +-// CHECK-LABEL: @xvssrarn_b_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrarn.b.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) +-// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v32i8 xvssrarn_b_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvssrarn_b_h(_1, _2); } +-// CHECK-LABEL: @xvssrarn_h_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrarn.h.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) +-// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v16i16 xvssrarn_h_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvssrarn_h_w(_1, _2); } +-// CHECK-LABEL: @xvssrarn_w_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrarn.w.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +-// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8i32 xvssrarn_w_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvssrarn_w_d(_1, _2); } +-// CHECK-LABEL: @xvssrarn_bu_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrarn.bu.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) +-// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v32u8 xvssrarn_bu_h(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvssrarn_bu_h(_1, _2); } +-// CHECK-LABEL: @xvssrarn_hu_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrarn.hu.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) +-// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v16u16 xvssrarn_hu_w(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvssrarn_hu_w(_1, _2); } +-// CHECK-LABEL: @xvssrarn_wu_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrarn.wu.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +-// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8u32 xvssrarn_wu_d(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvssrarn_wu_d(_1, _2); } +-// CHECK-LABEL: @xvsrln_b_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrln.b.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) +-// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v32i8 xvsrln_b_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvsrln_b_h(_1, _2); } +-// CHECK-LABEL: @xvsrln_h_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrln.h.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) +-// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v16i16 xvsrln_h_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvsrln_h_w(_1, _2); } +-// CHECK-LABEL: @xvsrln_w_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrln.w.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +-// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8i32 xvsrln_w_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvsrln_w_d(_1, _2); } +-// CHECK-LABEL: @xvssrln_bu_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrln.bu.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) +-// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v32u8 xvssrln_bu_h(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvssrln_bu_h(_1, _2); } +-// CHECK-LABEL: @xvssrln_hu_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrln.hu.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) +-// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v16u16 xvssrln_hu_w(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvssrln_hu_w(_1, _2); } +-// CHECK-LABEL: @xvssrln_wu_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrln.wu.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +-// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8u32 xvssrln_wu_d(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvssrln_wu_d(_1, _2); } +-// CHECK-LABEL: @xvsrlrn_b_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrlrn.b.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) +-// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v32i8 xvsrlrn_b_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvsrlrn_b_h(_1, _2); } +-// CHECK-LABEL: @xvsrlrn_h_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrlrn.h.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) +-// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v16i16 xvsrlrn_h_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvsrlrn_h_w(_1, _2); } +-// CHECK-LABEL: @xvsrlrn_w_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrlrn.w.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +-// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8i32 xvsrlrn_w_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvsrlrn_w_d(_1, _2); } +-// CHECK-LABEL: @xvssrlrn_bu_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrlrn.bu.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) +-// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v32u8 xvssrlrn_bu_h(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvssrlrn_bu_h(_1, _2); } +-// CHECK-LABEL: @xvssrlrn_hu_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrlrn.hu.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) +-// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v16u16 xvssrlrn_hu_w(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvssrlrn_hu_w(_1, _2); } +-// CHECK-LABEL: @xvssrlrn_wu_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrlrn.wu.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +-// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8u32 xvssrlrn_wu_d(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvssrlrn_wu_d(_1, _2); } +-// CHECK-LABEL: @xvfrstpi_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvfrstpi.b(<32 x i8> [[_1]], <32 x i8> [[_2]], i32 1) +-// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v32i8 xvfrstpi_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvfrstpi_b(_1, _2, 1); } +-// CHECK-LABEL: @xvfrstpi_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvfrstpi.h(<16 x i16> [[_1]], <16 x i16> [[_2]], i32 1) +-// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v16i16 xvfrstpi_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvfrstpi_h(_1, _2, 1); } +-// CHECK-LABEL: @xvfrstp_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_3:%.*]] = load <32 x i8>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP3:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvfrstp.b(<32 x i8> [[_1]], <32 x i8> [[_2]], <32 x i8> [[_3]]) +-// CHECK-NEXT: store <32 x i8> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v32i8 xvfrstp_b(v32i8 _1, v32i8 _2, v32i8 _3) { return __builtin_lasx_xvfrstp_b(_1, _2, _3); } +-// CHECK-LABEL: @xvfrstp_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_3:%.*]] = load <16 x i16>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP3:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvfrstp.h(<16 x i16> [[_1]], <16 x i16> [[_2]], <16 x i16> [[_3]]) +-// CHECK-NEXT: store <16 x i16> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v16i16 xvfrstp_h(v16i16 _1, v16i16 _2, v16i16 _3) { return __builtin_lasx_xvfrstp_h(_1, _2, _3); } +-// CHECK-LABEL: @xvshuf4i_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvshuf4i.d(<4 x i64> [[_1]], <4 x i64> [[_2]], i32 1) +-// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvshuf4i_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvshuf4i_d(_1, _2, 1); } +-// CHECK-LABEL: @xvbsrl_v( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbsrl.v(<32 x i8> [[_1]], i32 1) +-// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v32i8 xvbsrl_v(v32i8 _1) { return __builtin_lasx_xvbsrl_v(_1, 1); } +-// CHECK-LABEL: @xvbsll_v( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbsll.v(<32 x i8> [[_1]], i32 1) +-// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v32i8 xvbsll_v(v32i8 _1) { return __builtin_lasx_xvbsll_v(_1, 1); } +-// CHECK-LABEL: @xvextrins_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvextrins.b(<32 x i8> [[_1]], <32 x i8> [[_2]], i32 1) +-// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v32i8 xvextrins_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvextrins_b(_1, _2, 1); } +-// CHECK-LABEL: @xvextrins_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvextrins.h(<16 x i16> [[_1]], <16 x i16> [[_2]], i32 1) +-// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v16i16 xvextrins_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvextrins_h(_1, _2, 1); } +-// CHECK-LABEL: @xvextrins_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvextrins.w(<8 x i32> [[_1]], <8 x i32> [[_2]], i32 1) +-// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8i32 xvextrins_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvextrins_w(_1, _2, 1); } +-// CHECK-LABEL: @xvextrins_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvextrins.d(<4 x i64> [[_1]], <4 x i64> [[_2]], i32 1) +-// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvextrins_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvextrins_d(_1, _2, 1); } +-// CHECK-LABEL: @xvmskltz_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmskltz.b(<32 x i8> [[_1]]) +-// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v32i8 xvmskltz_b(v32i8 _1) { return __builtin_lasx_xvmskltz_b(_1); } +-// CHECK-LABEL: @xvmskltz_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmskltz.h(<16 x i16> [[_1]]) +-// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v16i16 xvmskltz_h(v16i16 _1) { return __builtin_lasx_xvmskltz_h(_1); } +-// CHECK-LABEL: @xvmskltz_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmskltz.w(<8 x i32> [[_1]]) +-// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8i32 xvmskltz_w(v8i32 _1) { return __builtin_lasx_xvmskltz_w(_1); } +-// CHECK-LABEL: @xvmskltz_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmskltz.d(<4 x i64> [[_1]]) +-// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvmskltz_d(v4i64 _1) { return __builtin_lasx_xvmskltz_d(_1); } +-// CHECK-LABEL: @xvsigncov_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsigncov.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) +-// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v32i8 xvsigncov_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvsigncov_b(_1, _2); } +-// CHECK-LABEL: @xvsigncov_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsigncov.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) +-// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v16i16 xvsigncov_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvsigncov_h(_1, _2); } +-// CHECK-LABEL: @xvsigncov_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsigncov.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) +-// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8i32 xvsigncov_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvsigncov_w(_1, _2); } +-// CHECK-LABEL: @xvsigncov_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsigncov.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +-// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvsigncov_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvsigncov_d(_1, _2); } +-// CHECK-LABEL: @xvfmadd_s( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_3:%.*]] = load <8 x float>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfmadd.s(<8 x float> [[_1]], <8 x float> [[_2]], <8 x float> [[_3]]) +-// CHECK-NEXT: store <8 x float> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8f32 xvfmadd_s(v8f32 _1, v8f32 _2, v8f32 _3) { return __builtin_lasx_xvfmadd_s(_1, _2, _3); } +-// CHECK-LABEL: @xvfmadd_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_3:%.*]] = load <4 x double>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfmadd.d(<4 x double> [[_1]], <4 x double> [[_2]], <4 x double> [[_3]]) +-// CHECK-NEXT: store <4 x double> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4f64 xvfmadd_d(v4f64 _1, v4f64 _2, v4f64 _3) { return __builtin_lasx_xvfmadd_d(_1, _2, _3); } +-// CHECK-LABEL: @xvfmsub_s( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_3:%.*]] = load <8 x float>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfmsub.s(<8 x float> [[_1]], <8 x float> [[_2]], <8 x float> [[_3]]) +-// CHECK-NEXT: store <8 x float> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8f32 xvfmsub_s(v8f32 _1, v8f32 _2, v8f32 _3) { return __builtin_lasx_xvfmsub_s(_1, _2, _3); } +-// CHECK-LABEL: @xvfmsub_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_3:%.*]] = load <4 x double>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfmsub.d(<4 x double> [[_1]], <4 x double> [[_2]], <4 x double> [[_3]]) +-// CHECK-NEXT: store <4 x double> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4f64 xvfmsub_d(v4f64 _1, v4f64 _2, v4f64 _3) { return __builtin_lasx_xvfmsub_d(_1, _2, _3); } +-// CHECK-LABEL: @xvfnmadd_s( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_3:%.*]] = load <8 x float>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfnmadd.s(<8 x float> [[_1]], <8 x float> [[_2]], <8 x float> [[_3]]) +-// CHECK-NEXT: store <8 x float> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8f32 xvfnmadd_s(v8f32 _1, v8f32 _2, v8f32 _3) { return __builtin_lasx_xvfnmadd_s(_1, _2, _3); } +-// CHECK-LABEL: @xvfnmadd_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_3:%.*]] = load <4 x double>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfnmadd.d(<4 x double> [[_1]], <4 x double> [[_2]], <4 x double> [[_3]]) +-// CHECK-NEXT: store <4 x double> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4f64 xvfnmadd_d(v4f64 _1, v4f64 _2, v4f64 _3) { return __builtin_lasx_xvfnmadd_d(_1, _2, _3); } +-// CHECK-LABEL: @xvfnmsub_s( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_3:%.*]] = load <8 x float>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfnmsub.s(<8 x float> [[_1]], <8 x float> [[_2]], <8 x float> [[_3]]) +-// CHECK-NEXT: store <8 x float> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8f32 xvfnmsub_s(v8f32 _1, v8f32 _2, v8f32 _3) { return __builtin_lasx_xvfnmsub_s(_1, _2, _3); } +-// CHECK-LABEL: @xvfnmsub_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_3:%.*]] = load <4 x double>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfnmsub.d(<4 x double> [[_1]], <4 x double> [[_2]], <4 x double> [[_3]]) +-// CHECK-NEXT: store <4 x double> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4f64 xvfnmsub_d(v4f64 _1, v4f64 _2, v4f64 _3) { return __builtin_lasx_xvfnmsub_d(_1, _2, _3); } +-// CHECK-LABEL: @xvftintrne_w_s( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrne.w.s(<8 x float> [[_1]]) +-// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8i32 xvftintrne_w_s(v8f32 _1) { return __builtin_lasx_xvftintrne_w_s(_1); } +-// CHECK-LABEL: @xvftintrne_l_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrne.l.d(<4 x double> [[_1]]) +-// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvftintrne_l_d(v4f64 _1) { return __builtin_lasx_xvftintrne_l_d(_1); } +-// CHECK-LABEL: @xvftintrp_w_s( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrp.w.s(<8 x float> [[_1]]) +-// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8i32 xvftintrp_w_s(v8f32 _1) { return __builtin_lasx_xvftintrp_w_s(_1); } +-// CHECK-LABEL: @xvftintrp_l_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrp.l.d(<4 x double> [[_1]]) +-// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvftintrp_l_d(v4f64 _1) { return __builtin_lasx_xvftintrp_l_d(_1); } +-// CHECK-LABEL: @xvftintrm_w_s( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrm.w.s(<8 x float> [[_1]]) +-// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8i32 xvftintrm_w_s(v8f32 _1) { return __builtin_lasx_xvftintrm_w_s(_1); } +-// CHECK-LABEL: @xvftintrm_l_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrm.l.d(<4 x double> [[_1]]) +-// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvftintrm_l_d(v4f64 _1) { return __builtin_lasx_xvftintrm_l_d(_1); } +-// CHECK-LABEL: @xvftint_w_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftint.w.d(<4 x double> [[_1]], <4 x double> [[_2]]) +-// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8i32 xvftint_w_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvftint_w_d(_1, _2); } +-// CHECK-LABEL: @xvffint_s_l( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvffint.s.l(<4 x i64> [[_1]], <4 x i64> [[_2]]) +-// CHECK-NEXT: store <8 x float> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8f32 xvffint_s_l(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvffint_s_l(_1, _2); } +-// CHECK-LABEL: @xvftintrz_w_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrz.w.d(<4 x double> [[_1]], <4 x double> [[_2]]) +-// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8i32 xvftintrz_w_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvftintrz_w_d(_1, _2); } +-// CHECK-LABEL: @xvftintrp_w_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrp.w.d(<4 x double> [[_1]], <4 x double> [[_2]]) +-// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8i32 xvftintrp_w_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvftintrp_w_d(_1, _2); } +-// CHECK-LABEL: @xvftintrm_w_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrm.w.d(<4 x double> [[_1]], <4 x double> [[_2]]) +-// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8i32 xvftintrm_w_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvftintrm_w_d(_1, _2); } +-// CHECK-LABEL: @xvftintrne_w_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrne.w.d(<4 x double> [[_1]], <4 x double> [[_2]]) +-// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8i32 xvftintrne_w_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvftintrne_w_d(_1, _2); } +-// CHECK-LABEL: @xvftinth_l_s( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftinth.l.s(<8 x float> [[_1]]) +-// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvftinth_l_s(v8f32 _1) { return __builtin_lasx_xvftinth_l_s(_1); } +-// CHECK-LABEL: @xvftintl_l_s( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintl.l.s(<8 x float> [[_1]]) +-// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvftintl_l_s(v8f32 _1) { return __builtin_lasx_xvftintl_l_s(_1); } +-// CHECK-LABEL: @xvffinth_d_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvffinth.d.w(<8 x i32> [[_1]]) +-// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4f64 xvffinth_d_w(v8i32 _1) { return __builtin_lasx_xvffinth_d_w(_1); } +-// CHECK-LABEL: @xvffintl_d_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvffintl.d.w(<8 x i32> [[_1]]) +-// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4f64 xvffintl_d_w(v8i32 _1) { return __builtin_lasx_xvffintl_d_w(_1); } +-// CHECK-LABEL: @xvftintrzh_l_s( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrzh.l.s(<8 x float> [[_1]]) +-// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvftintrzh_l_s(v8f32 _1) { return __builtin_lasx_xvftintrzh_l_s(_1); } +-// CHECK-LABEL: @xvftintrzl_l_s( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrzl.l.s(<8 x float> [[_1]]) +-// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvftintrzl_l_s(v8f32 _1) { return __builtin_lasx_xvftintrzl_l_s(_1); } +-// CHECK-LABEL: @xvftintrph_l_s( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrph.l.s(<8 x float> [[_1]]) +-// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvftintrph_l_s(v8f32 _1) { return __builtin_lasx_xvftintrph_l_s(_1); } +-// CHECK-LABEL: @xvftintrpl_l_s( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrpl.l.s(<8 x float> [[_1]]) +-// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvftintrpl_l_s(v8f32 _1) { return __builtin_lasx_xvftintrpl_l_s(_1); } +-// CHECK-LABEL: @xvftintrmh_l_s( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrmh.l.s(<8 x float> [[_1]]) +-// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvftintrmh_l_s(v8f32 _1) { return __builtin_lasx_xvftintrmh_l_s(_1); } +-// CHECK-LABEL: @xvftintrml_l_s( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrml.l.s(<8 x float> [[_1]]) +-// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvftintrml_l_s(v8f32 _1) { return __builtin_lasx_xvftintrml_l_s(_1); } +-// CHECK-LABEL: @xvftintrneh_l_s( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrneh.l.s(<8 x float> [[_1]]) +-// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvftintrneh_l_s(v8f32 _1) { return __builtin_lasx_xvftintrneh_l_s(_1); } +-// CHECK-LABEL: @xvftintrnel_l_s( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrnel.l.s(<8 x float> [[_1]]) +-// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvftintrnel_l_s(v8f32 _1) { return __builtin_lasx_xvftintrnel_l_s(_1); } +-// CHECK-LABEL: @xvfrintrne_s( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfrintrne.s(<8 x float> [[_1]]) +-// CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8i32 xvfrintrne_s(v8f32 _1) { return __builtin_lasx_xvfrintrne_s(_1); } +-// CHECK-LABEL: @xvfrintrne_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfrintrne.d(<4 x double> [[_1]]) +-// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvfrintrne_d(v4f64 _1) { return __builtin_lasx_xvfrintrne_d(_1); } +-// CHECK-LABEL: @xvfrintrz_s( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfrintrz.s(<8 x float> [[_1]]) +-// CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8i32 xvfrintrz_s(v8f32 _1) { return __builtin_lasx_xvfrintrz_s(_1); } +-// CHECK-LABEL: @xvfrintrz_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfrintrz.d(<4 x double> [[_1]]) +-// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvfrintrz_d(v4f64 _1) { return __builtin_lasx_xvfrintrz_d(_1); } +-// CHECK-LABEL: @xvfrintrp_s( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfrintrp.s(<8 x float> [[_1]]) +-// CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8i32 xvfrintrp_s(v8f32 _1) { return __builtin_lasx_xvfrintrp_s(_1); } +-// CHECK-LABEL: @xvfrintrp_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfrintrp.d(<4 x double> [[_1]]) +-// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvfrintrp_d(v4f64 _1) { return __builtin_lasx_xvfrintrp_d(_1); } +-// CHECK-LABEL: @xvfrintrm_s( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfrintrm.s(<8 x float> [[_1]]) +-// CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8i32 xvfrintrm_s(v8f32 _1) { return __builtin_lasx_xvfrintrm_s(_1); } +-// CHECK-LABEL: @xvfrintrm_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfrintrm.d(<4 x double> [[_1]]) +-// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvfrintrm_d(v4f64 _1) { return __builtin_lasx_xvfrintrm_d(_1); } +-// CHECK-LABEL: @xvld( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvld(ptr [[_1:%.*]], i32 1) +-// CHECK-NEXT: store <32 x i8> [[TMP0]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v32i8 xvld(void *_1) { return __builtin_lasx_xvld(_1, 1); } +-// CHECK-LABEL: @xvst( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: tail call void @llvm.loongarch.lasx.xvst(<32 x i8> [[_1]], ptr [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret void +-// +-void xvst(v32i8 _1, void *_2) { return __builtin_lasx_xvst(_1, _2, 1); } +-// CHECK-LABEL: @xvstelm_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: tail call void @llvm.loongarch.lasx.xvstelm.b(<32 x i8> [[_1]], ptr [[_2:%.*]], i32 1, i32 1) +-// CHECK-NEXT: ret void +-// +-void xvstelm_b(v32i8 _1, void * _2) { return __builtin_lasx_xvstelm_b(_1, _2, 1, 1); } +-// CHECK-LABEL: @xvstelm_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: tail call void @llvm.loongarch.lasx.xvstelm.h(<16 x i16> [[_1]], ptr [[_2:%.*]], i32 2, i32 1) +-// CHECK-NEXT: ret void +-// +-void xvstelm_h(v16i16 _1, void * _2) { return __builtin_lasx_xvstelm_h(_1, _2, 2, 1); } +-// CHECK-LABEL: @xvstelm_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: tail call void @llvm.loongarch.lasx.xvstelm.w(<8 x i32> [[_1]], ptr [[_2:%.*]], i32 4, i32 1) +-// CHECK-NEXT: ret void +-// +-void xvstelm_w(v8i32 _1, void * _2) { return __builtin_lasx_xvstelm_w(_1, _2, 4, 1); } +-// CHECK-LABEL: @xvstelm_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: tail call void @llvm.loongarch.lasx.xvstelm.d(<4 x i64> [[_1]], ptr [[_2:%.*]], i32 8, i32 1) +-// CHECK-NEXT: ret void +-// +-void xvstelm_d(v4i64 _1, void * _2) { return __builtin_lasx_xvstelm_d(_1, _2, 8, 1); } +-// CHECK-LABEL: @xvinsve0_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvinsve0.w(<8 x i32> [[_1]], <8 x i32> [[_2]], i32 1) +-// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8i32 xvinsve0_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvinsve0_w(_1, _2, 1); } +-// CHECK-LABEL: @xvinsve0_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvinsve0.d(<4 x i64> [[_1]], <4 x i64> [[_2]], i32 1) +-// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvinsve0_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvinsve0_d(_1, _2, 1); } +-// CHECK-LABEL: @xvpickve_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvpickve.w(<8 x i32> [[_1]], i32 1) +-// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8i32 xvpickve_w(v8i32 _1) { return __builtin_lasx_xvpickve_w(_1, 1); } +-// CHECK-LABEL: @xvpickve_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvpickve.d(<4 x i64> [[_1]], i32 1) +-// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvpickve_d(v4i64 _1) { return __builtin_lasx_xvpickve_d(_1, 1); } +-// CHECK-LABEL: @xvssrlrn_b_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrlrn.b.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) +-// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v32i8 xvssrlrn_b_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvssrlrn_b_h(_1, _2); } +-// CHECK-LABEL: @xvssrlrn_h_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrlrn.h.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) +-// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v16i16 xvssrlrn_h_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvssrlrn_h_w(_1, _2); } +-// CHECK-LABEL: @xvssrlrn_w_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrlrn.w.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +-// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8i32 xvssrlrn_w_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvssrlrn_w_d(_1, _2); } +-// CHECK-LABEL: @xvssrln_b_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrln.b.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) +-// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v32i8 xvssrln_b_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvssrln_b_h(_1, _2); } +-// CHECK-LABEL: @xvssrln_h_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrln.h.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) +-// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v16i16 xvssrln_h_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvssrln_h_w(_1, _2); } +-// CHECK-LABEL: @xvssrln_w_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrln.w.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +-// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8i32 xvssrln_w_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvssrln_w_d(_1, _2); } +-// CHECK-LABEL: @xvorn_v( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvorn.v(<32 x i8> [[_1]], <32 x i8> [[_2]]) +-// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v32i8 xvorn_v(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvorn_v(_1, _2); } +-// CHECK-LABEL: @xvldi( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvldi(i32 1) +-// CHECK-NEXT: store <4 x i64> [[TMP0]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvldi() { return __builtin_lasx_xvldi(1); } +-// CHECK-LABEL: @xvldx( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvldx(ptr [[_1:%.*]], i64 1) +-// CHECK-NEXT: store <32 x i8> [[TMP0]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v32i8 xvldx(void *_1) { return __builtin_lasx_xvldx(_1, 1); } +-// CHECK-LABEL: @xvstx( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: tail call void @llvm.loongarch.lasx.xvstx(<32 x i8> [[_1]], ptr [[_2:%.*]], i64 1) +-// CHECK-NEXT: ret void +-// +-void xvstx(v32i8 _1, void *_2) { return __builtin_lasx_xvstx(_1, _2, 1); } +-// CHECK-LABEL: @xvextl_qu_du( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvextl.qu.du(<4 x i64> [[_1]]) +-// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4u64 xvextl_qu_du(v4u64 _1) { return __builtin_lasx_xvextl_qu_du(_1); } +-// CHECK-LABEL: @xvinsgr2vr_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvinsgr2vr.w(<8 x i32> [[_1]], i32 1, i32 1) +-// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8i32 xvinsgr2vr_w(v8i32 _1) { return __builtin_lasx_xvinsgr2vr_w(_1, 1, 1); } +-// CHECK-LABEL: @xvinsgr2vr_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvinsgr2vr.d(<4 x i64> [[_1]], i64 1, i32 1) +-// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvinsgr2vr_d(v4i64 _1) { return __builtin_lasx_xvinsgr2vr_d(_1, 1, 1); } +-// CHECK-LABEL: @xvreplve0_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvreplve0.b(<32 x i8> [[_1]]) +-// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v32i8 xvreplve0_b(v32i8 _1) { return __builtin_lasx_xvreplve0_b(_1); } +-// CHECK-LABEL: @xvreplve0_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvreplve0.h(<16 x i16> [[_1]]) +-// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v16i16 xvreplve0_h(v16i16 _1) { return __builtin_lasx_xvreplve0_h(_1); } +-// CHECK-LABEL: @xvreplve0_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvreplve0.w(<8 x i32> [[_1]]) +-// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8i32 xvreplve0_w(v8i32 _1) { return __builtin_lasx_xvreplve0_w(_1); } +-// CHECK-LABEL: @xvreplve0_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvreplve0.d(<4 x i64> [[_1]]) +-// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvreplve0_d(v4i64 _1) { return __builtin_lasx_xvreplve0_d(_1); } +-// CHECK-LABEL: @xvreplve0_q( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvreplve0.q(<32 x i8> [[_1]]) +-// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v32i8 xvreplve0_q(v32i8 _1) { return __builtin_lasx_xvreplve0_q(_1); } +-// CHECK-LABEL: @vext2xv_h_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.vext2xv.h.b(<32 x i8> [[_1]]) +-// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v16i16 vext2xv_h_b(v32i8 _1) { return __builtin_lasx_vext2xv_h_b(_1); } +-// CHECK-LABEL: @vext2xv_w_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.vext2xv.w.h(<16 x i16> [[_1]]) +-// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8i32 vext2xv_w_h(v16i16 _1) { return __builtin_lasx_vext2xv_w_h(_1); } +-// CHECK-LABEL: @vext2xv_d_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.vext2xv.d.w(<8 x i32> [[_1]]) +-// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 vext2xv_d_w(v8i32 _1) { return __builtin_lasx_vext2xv_d_w(_1); } +-// CHECK-LABEL: @vext2xv_w_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.vext2xv.w.b(<32 x i8> [[_1]]) +-// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8i32 vext2xv_w_b(v32i8 _1) { return __builtin_lasx_vext2xv_w_b(_1); } +-// CHECK-LABEL: @vext2xv_d_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.vext2xv.d.h(<16 x i16> [[_1]]) +-// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 vext2xv_d_h(v16i16 _1) { return __builtin_lasx_vext2xv_d_h(_1); } +-// CHECK-LABEL: @vext2xv_d_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.vext2xv.d.b(<32 x i8> [[_1]]) +-// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 vext2xv_d_b(v32i8 _1) { return __builtin_lasx_vext2xv_d_b(_1); } +-// CHECK-LABEL: @vext2xv_hu_bu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.vext2xv.hu.bu(<32 x i8> [[_1]]) +-// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v16i16 vext2xv_hu_bu(v32i8 _1) { return __builtin_lasx_vext2xv_hu_bu(_1); } +-// CHECK-LABEL: @vext2xv_wu_hu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.vext2xv.wu.hu(<16 x i16> [[_1]]) +-// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8i32 vext2xv_wu_hu(v16i16 _1) { return __builtin_lasx_vext2xv_wu_hu(_1); } +-// CHECK-LABEL: @vext2xv_du_wu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.vext2xv.du.wu(<8 x i32> [[_1]]) +-// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 vext2xv_du_wu(v8i32 _1) { return __builtin_lasx_vext2xv_du_wu(_1); } +-// CHECK-LABEL: @vext2xv_wu_bu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.vext2xv.wu.bu(<32 x i8> [[_1]]) +-// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8i32 vext2xv_wu_bu(v32i8 _1) { return __builtin_lasx_vext2xv_wu_bu(_1); } +-// CHECK-LABEL: @vext2xv_du_hu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.vext2xv.du.hu(<16 x i16> [[_1]]) +-// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 vext2xv_du_hu(v16i16 _1) { return __builtin_lasx_vext2xv_du_hu(_1); } +-// CHECK-LABEL: @vext2xv_du_bu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.vext2xv.du.bu(<32 x i8> [[_1]]) +-// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 vext2xv_du_bu(v32i8 _1) { return __builtin_lasx_vext2xv_du_bu(_1); } +-// CHECK-LABEL: @xvpermi_q( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvpermi.q(<32 x i8> [[_1]], <32 x i8> [[_2]], i32 1) +-// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v32i8 xvpermi_q(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvpermi_q(_1, _2, 1); } +-// CHECK-LABEL: @xvpermi_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvpermi.d(<4 x i64> [[_1]], i32 1) +-// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvpermi_d(v4i64 _1) { return __builtin_lasx_xvpermi_d(_1, 1); } +-// CHECK-LABEL: @xvperm_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvperm.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) +-// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8i32 xvperm_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvperm_w(_1, _2); } +-// CHECK-LABEL: @xvldrepl_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvldrepl.b(ptr [[_1:%.*]], i32 1) +-// CHECK-NEXT: store <32 x i8> [[TMP0]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v32i8 xvldrepl_b(void *_1) { return __builtin_lasx_xvldrepl_b(_1, 1); } +-// CHECK-LABEL: @xvldrepl_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvldrepl.h(ptr [[_1:%.*]], i32 2) +-// CHECK-NEXT: store <16 x i16> [[TMP0]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v16i16 xvldrepl_h(void *_1) { return __builtin_lasx_xvldrepl_h(_1, 2); } +-// CHECK-LABEL: @xvldrepl_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvldrepl.w(ptr [[_1:%.*]], i32 4) +-// CHECK-NEXT: store <8 x i32> [[TMP0]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8i32 xvldrepl_w(void *_1) { return __builtin_lasx_xvldrepl_w(_1, 4); } +-// CHECK-LABEL: @xvldrepl_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvldrepl.d(ptr [[_1:%.*]], i32 8) +-// CHECK-NEXT: store <4 x i64> [[TMP0]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvldrepl_d(void *_1) { return __builtin_lasx_xvldrepl_d(_1, 8); } +-// CHECK-LABEL: @xvpickve2gr_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lasx.xvpickve2gr.w(<8 x i32> [[_1]], i32 1) +-// CHECK-NEXT: ret i32 [[TMP1]] +-// +-int xvpickve2gr_w(v8i32 _1) { return __builtin_lasx_xvpickve2gr_w(_1, 1); } +-// CHECK-LABEL: @xvpickve2gr_wu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lasx.xvpickve2gr.wu(<8 x i32> [[_1]], i32 1) +-// CHECK-NEXT: ret i32 [[TMP1]] +-// +-unsigned int xvpickve2gr_wu(v8i32 _1) { return __builtin_lasx_xvpickve2gr_wu(_1, 1); } +-// CHECK-LABEL: @xvpickve2gr_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.loongarch.lasx.xvpickve2gr.d(<4 x i64> [[_1]], i32 1) +-// CHECK-NEXT: ret i64 [[TMP1]] +-// +-long xvpickve2gr_d(v4i64 _1) { return __builtin_lasx_xvpickve2gr_d(_1, 1); } +-// CHECK-LABEL: @xvpickve2gr_du( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.loongarch.lasx.xvpickve2gr.du(<4 x i64> [[_1]], i32 1) +-// CHECK-NEXT: ret i64 [[TMP1]] +-// +-unsigned long int xvpickve2gr_du(v4i64 _1) { return __builtin_lasx_xvpickve2gr_du(_1, 1); } +-// CHECK-LABEL: @xvaddwev_q_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwev.q.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +-// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvaddwev_q_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvaddwev_q_d(_1, _2); } +-// CHECK-LABEL: @xvaddwev_d_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwev.d.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) +-// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvaddwev_d_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvaddwev_d_w(_1, _2); } +-// CHECK-LABEL: @xvaddwev_w_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvaddwev.w.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) +-// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8i32 xvaddwev_w_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvaddwev_w_h(_1, _2); } +-// CHECK-LABEL: @xvaddwev_h_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvaddwev.h.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) +-// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v16i16 xvaddwev_h_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvaddwev_h_b(_1, _2); } +-// CHECK-LABEL: @xvaddwev_q_du( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwev.q.du(<4 x i64> [[_1]], <4 x i64> [[_2]]) +-// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvaddwev_q_du(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvaddwev_q_du(_1, _2); } +-// CHECK-LABEL: @xvaddwev_d_wu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwev.d.wu(<8 x i32> [[_1]], <8 x i32> [[_2]]) +-// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvaddwev_d_wu(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvaddwev_d_wu(_1, _2); } +-// CHECK-LABEL: @xvaddwev_w_hu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvaddwev.w.hu(<16 x i16> [[_1]], <16 x i16> [[_2]]) +-// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8i32 xvaddwev_w_hu(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvaddwev_w_hu(_1, _2); } +-// CHECK-LABEL: @xvaddwev_h_bu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvaddwev.h.bu(<32 x i8> [[_1]], <32 x i8> [[_2]]) +-// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v16i16 xvaddwev_h_bu(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvaddwev_h_bu(_1, _2); } +-// CHECK-LABEL: @xvsubwev_q_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsubwev.q.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +-// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvsubwev_q_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvsubwev_q_d(_1, _2); } +-// CHECK-LABEL: @xvsubwev_d_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsubwev.d.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) +-// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvsubwev_d_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvsubwev_d_w(_1, _2); } +-// CHECK-LABEL: @xvsubwev_w_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsubwev.w.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) +-// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8i32 xvsubwev_w_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvsubwev_w_h(_1, _2); } +-// CHECK-LABEL: @xvsubwev_h_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsubwev.h.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) +-// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v16i16 xvsubwev_h_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvsubwev_h_b(_1, _2); } +-// CHECK-LABEL: @xvsubwev_q_du( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsubwev.q.du(<4 x i64> [[_1]], <4 x i64> [[_2]]) +-// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvsubwev_q_du(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvsubwev_q_du(_1, _2); } +-// CHECK-LABEL: @xvsubwev_d_wu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsubwev.d.wu(<8 x i32> [[_1]], <8 x i32> [[_2]]) +-// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvsubwev_d_wu(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvsubwev_d_wu(_1, _2); } +-// CHECK-LABEL: @xvsubwev_w_hu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsubwev.w.hu(<16 x i16> [[_1]], <16 x i16> [[_2]]) +-// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8i32 xvsubwev_w_hu(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvsubwev_w_hu(_1, _2); } +-// CHECK-LABEL: @xvsubwev_h_bu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsubwev.h.bu(<32 x i8> [[_1]], <32 x i8> [[_2]]) +-// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v16i16 xvsubwev_h_bu(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvsubwev_h_bu(_1, _2); } +-// CHECK-LABEL: @xvmulwev_q_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwev.q.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +-// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvmulwev_q_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvmulwev_q_d(_1, _2); } +-// CHECK-LABEL: @xvmulwev_d_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwev.d.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) +-// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvmulwev_d_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvmulwev_d_w(_1, _2); } +-// CHECK-LABEL: @xvmulwev_w_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmulwev.w.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) +-// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8i32 xvmulwev_w_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvmulwev_w_h(_1, _2); } +-// CHECK-LABEL: @xvmulwev_h_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmulwev.h.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) +-// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v16i16 xvmulwev_h_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvmulwev_h_b(_1, _2); } +-// CHECK-LABEL: @xvmulwev_q_du( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwev.q.du(<4 x i64> [[_1]], <4 x i64> [[_2]]) +-// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvmulwev_q_du(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvmulwev_q_du(_1, _2); } +-// CHECK-LABEL: @xvmulwev_d_wu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwev.d.wu(<8 x i32> [[_1]], <8 x i32> [[_2]]) +-// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvmulwev_d_wu(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvmulwev_d_wu(_1, _2); } +-// CHECK-LABEL: @xvmulwev_w_hu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmulwev.w.hu(<16 x i16> [[_1]], <16 x i16> [[_2]]) +-// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8i32 xvmulwev_w_hu(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvmulwev_w_hu(_1, _2); } +-// CHECK-LABEL: @xvmulwev_h_bu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmulwev.h.bu(<32 x i8> [[_1]], <32 x i8> [[_2]]) +-// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v16i16 xvmulwev_h_bu(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvmulwev_h_bu(_1, _2); } +-// CHECK-LABEL: @xvaddwod_q_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwod.q.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +-// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvaddwod_q_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvaddwod_q_d(_1, _2); } +-// CHECK-LABEL: @xvaddwod_d_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwod.d.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) +-// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvaddwod_d_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvaddwod_d_w(_1, _2); } +-// CHECK-LABEL: @xvaddwod_w_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvaddwod.w.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) +-// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8i32 xvaddwod_w_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvaddwod_w_h(_1, _2); } +-// CHECK-LABEL: @xvaddwod_h_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvaddwod.h.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) +-// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v16i16 xvaddwod_h_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvaddwod_h_b(_1, _2); } +-// CHECK-LABEL: @xvaddwod_q_du( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwod.q.du(<4 x i64> [[_1]], <4 x i64> [[_2]]) +-// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvaddwod_q_du(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvaddwod_q_du(_1, _2); } +-// CHECK-LABEL: @xvaddwod_d_wu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwod.d.wu(<8 x i32> [[_1]], <8 x i32> [[_2]]) +-// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvaddwod_d_wu(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvaddwod_d_wu(_1, _2); } +-// CHECK-LABEL: @xvaddwod_w_hu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvaddwod.w.hu(<16 x i16> [[_1]], <16 x i16> [[_2]]) +-// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8i32 xvaddwod_w_hu(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvaddwod_w_hu(_1, _2); } +-// CHECK-LABEL: @xvaddwod_h_bu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvaddwod.h.bu(<32 x i8> [[_1]], <32 x i8> [[_2]]) +-// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v16i16 xvaddwod_h_bu(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvaddwod_h_bu(_1, _2); } +-// CHECK-LABEL: @xvsubwod_q_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsubwod.q.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +-// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvsubwod_q_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvsubwod_q_d(_1, _2); } +-// CHECK-LABEL: @xvsubwod_d_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsubwod.d.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) +-// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvsubwod_d_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvsubwod_d_w(_1, _2); } +-// CHECK-LABEL: @xvsubwod_w_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsubwod.w.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) +-// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8i32 xvsubwod_w_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvsubwod_w_h(_1, _2); } +-// CHECK-LABEL: @xvsubwod_h_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsubwod.h.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) +-// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v16i16 xvsubwod_h_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvsubwod_h_b(_1, _2); } +-// CHECK-LABEL: @xvsubwod_q_du( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsubwod.q.du(<4 x i64> [[_1]], <4 x i64> [[_2]]) +-// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvsubwod_q_du(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvsubwod_q_du(_1, _2); } +-// CHECK-LABEL: @xvsubwod_d_wu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsubwod.d.wu(<8 x i32> [[_1]], <8 x i32> [[_2]]) +-// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvsubwod_d_wu(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvsubwod_d_wu(_1, _2); } +-// CHECK-LABEL: @xvsubwod_w_hu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsubwod.w.hu(<16 x i16> [[_1]], <16 x i16> [[_2]]) +-// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8i32 xvsubwod_w_hu(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvsubwod_w_hu(_1, _2); } +-// CHECK-LABEL: @xvsubwod_h_bu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsubwod.h.bu(<32 x i8> [[_1]], <32 x i8> [[_2]]) +-// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v16i16 xvsubwod_h_bu(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvsubwod_h_bu(_1, _2); } +-// CHECK-LABEL: @xvmulwod_q_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwod.q.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +-// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvmulwod_q_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvmulwod_q_d(_1, _2); } +-// CHECK-LABEL: @xvmulwod_d_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwod.d.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) +-// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvmulwod_d_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvmulwod_d_w(_1, _2); } +-// CHECK-LABEL: @xvmulwod_w_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmulwod.w.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) +-// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8i32 xvmulwod_w_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvmulwod_w_h(_1, _2); } +-// CHECK-LABEL: @xvmulwod_h_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmulwod.h.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) +-// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v16i16 xvmulwod_h_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvmulwod_h_b(_1, _2); } +-// CHECK-LABEL: @xvmulwod_q_du( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwod.q.du(<4 x i64> [[_1]], <4 x i64> [[_2]]) +-// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvmulwod_q_du(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvmulwod_q_du(_1, _2); } +-// CHECK-LABEL: @xvmulwod_d_wu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwod.d.wu(<8 x i32> [[_1]], <8 x i32> [[_2]]) +-// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvmulwod_d_wu(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvmulwod_d_wu(_1, _2); } +-// CHECK-LABEL: @xvmulwod_w_hu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmulwod.w.hu(<16 x i16> [[_1]], <16 x i16> [[_2]]) +-// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8i32 xvmulwod_w_hu(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvmulwod_w_hu(_1, _2); } +-// CHECK-LABEL: @xvmulwod_h_bu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmulwod.h.bu(<32 x i8> [[_1]], <32 x i8> [[_2]]) +-// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v16i16 xvmulwod_h_bu(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvmulwod_h_bu(_1, _2); } +-// CHECK-LABEL: @xvaddwev_d_wu_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwev.d.wu.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) +-// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvaddwev_d_wu_w(v8u32 _1, v8i32 _2) { return __builtin_lasx_xvaddwev_d_wu_w(_1, _2); } +-// CHECK-LABEL: @xvaddwev_w_hu_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvaddwev.w.hu.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) +-// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8i32 xvaddwev_w_hu_h(v16u16 _1, v16i16 _2) { return __builtin_lasx_xvaddwev_w_hu_h(_1, _2); } +-// CHECK-LABEL: @xvaddwev_h_bu_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvaddwev.h.bu.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) +-// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v16i16 xvaddwev_h_bu_b(v32u8 _1, v32i8 _2) { return __builtin_lasx_xvaddwev_h_bu_b(_1, _2); } +-// CHECK-LABEL: @xvmulwev_d_wu_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwev.d.wu.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) +-// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvmulwev_d_wu_w(v8u32 _1, v8i32 _2) { return __builtin_lasx_xvmulwev_d_wu_w(_1, _2); } +-// CHECK-LABEL: @xvmulwev_w_hu_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmulwev.w.hu.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) +-// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8i32 xvmulwev_w_hu_h(v16u16 _1, v16i16 _2) { return __builtin_lasx_xvmulwev_w_hu_h(_1, _2); } +-// CHECK-LABEL: @xvmulwev_h_bu_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmulwev.h.bu.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) +-// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v16i16 xvmulwev_h_bu_b(v32u8 _1, v32i8 _2) { return __builtin_lasx_xvmulwev_h_bu_b(_1, _2); } +-// CHECK-LABEL: @xvaddwod_d_wu_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwod.d.wu.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) +-// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvaddwod_d_wu_w(v8u32 _1, v8i32 _2) { return __builtin_lasx_xvaddwod_d_wu_w(_1, _2); } +-// CHECK-LABEL: @xvaddwod_w_hu_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvaddwod.w.hu.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) +-// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8i32 xvaddwod_w_hu_h(v16u16 _1, v16i16 _2) { return __builtin_lasx_xvaddwod_w_hu_h(_1, _2); } +-// CHECK-LABEL: @xvaddwod_h_bu_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvaddwod.h.bu.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) +-// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v16i16 xvaddwod_h_bu_b(v32u8 _1, v32i8 _2) { return __builtin_lasx_xvaddwod_h_bu_b(_1, _2); } +-// CHECK-LABEL: @xvmulwod_d_wu_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwod.d.wu.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) +-// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvmulwod_d_wu_w(v8u32 _1, v8i32 _2) { return __builtin_lasx_xvmulwod_d_wu_w(_1, _2); } +-// CHECK-LABEL: @xvmulwod_w_hu_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmulwod.w.hu.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) +-// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8i32 xvmulwod_w_hu_h(v16u16 _1, v16i16 _2) { return __builtin_lasx_xvmulwod_w_hu_h(_1, _2); } +-// CHECK-LABEL: @xvmulwod_h_bu_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmulwod.h.bu.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) +-// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v16i16 xvmulwod_h_bu_b(v32u8 _1, v32i8 _2) { return __builtin_lasx_xvmulwod_h_bu_b(_1, _2); } +-// CHECK-LABEL: @xvhaddw_q_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvhaddw.q.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +-// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvhaddw_q_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvhaddw_q_d(_1, _2); } +-// CHECK-LABEL: @xvhaddw_qu_du( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvhaddw.qu.du(<4 x i64> [[_1]], <4 x i64> [[_2]]) +-// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4u64 xvhaddw_qu_du(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvhaddw_qu_du(_1, _2); } +-// CHECK-LABEL: @xvhsubw_q_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvhsubw.q.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +-// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvhsubw_q_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvhsubw_q_d(_1, _2); } +-// CHECK-LABEL: @xvhsubw_qu_du( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvhsubw.qu.du(<4 x i64> [[_1]], <4 x i64> [[_2]]) +-// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4u64 xvhsubw_qu_du(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvhsubw_qu_du(_1, _2); } +-// CHECK-LABEL: @xvmaddwev_q_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_3:%.*]] = load <4 x i64>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwev.q.d(<4 x i64> [[_1]], <4 x i64> [[_2]], <4 x i64> [[_3]]) +-// CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvmaddwev_q_d(v4i64 _1, v4i64 _2, v4i64 _3) { return __builtin_lasx_xvmaddwev_q_d(_1, _2, _3); } +-// CHECK-LABEL: @xvmaddwev_d_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_3:%.*]] = load <8 x i32>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwev.d.w(<4 x i64> [[_1]], <8 x i32> [[_2]], <8 x i32> [[_3]]) +-// CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvmaddwev_d_w(v4i64 _1, v8i32 _2, v8i32 _3) { return __builtin_lasx_xvmaddwev_d_w(_1, _2, _3); } +-// CHECK-LABEL: @xvmaddwev_w_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_3:%.*]] = load <16 x i16>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmaddwev.w.h(<8 x i32> [[_1]], <16 x i16> [[_2]], <16 x i16> [[_3]]) +-// CHECK-NEXT: store <8 x i32> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8i32 xvmaddwev_w_h(v8i32 _1, v16i16 _2, v16i16 _3) { return __builtin_lasx_xvmaddwev_w_h(_1, _2, _3); } +-// CHECK-LABEL: @xvmaddwev_h_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_3:%.*]] = load <32 x i8>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP3:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmaddwev.h.b(<16 x i16> [[_1]], <32 x i8> [[_2]], <32 x i8> [[_3]]) +-// CHECK-NEXT: store <16 x i16> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v16i16 xvmaddwev_h_b(v16i16 _1, v32i8 _2, v32i8 _3) { return __builtin_lasx_xvmaddwev_h_b(_1, _2, _3); } +-// CHECK-LABEL: @xvmaddwev_q_du( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_3:%.*]] = load <4 x i64>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwev.q.du(<4 x i64> [[_1]], <4 x i64> [[_2]], <4 x i64> [[_3]]) +-// CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4u64 xvmaddwev_q_du(v4u64 _1, v4u64 _2, v4u64 _3) { return __builtin_lasx_xvmaddwev_q_du(_1, _2, _3); } +-// CHECK-LABEL: @xvmaddwev_d_wu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_3:%.*]] = load <8 x i32>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwev.d.wu(<4 x i64> [[_1]], <8 x i32> [[_2]], <8 x i32> [[_3]]) +-// CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4u64 xvmaddwev_d_wu(v4u64 _1, v8u32 _2, v8u32 _3) { return __builtin_lasx_xvmaddwev_d_wu(_1, _2, _3); } +-// CHECK-LABEL: @xvmaddwev_w_hu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_3:%.*]] = load <16 x i16>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmaddwev.w.hu(<8 x i32> [[_1]], <16 x i16> [[_2]], <16 x i16> [[_3]]) +-// CHECK-NEXT: store <8 x i32> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8u32 xvmaddwev_w_hu(v8u32 _1, v16u16 _2, v16u16 _3) { return __builtin_lasx_xvmaddwev_w_hu(_1, _2, _3); } +-// CHECK-LABEL: @xvmaddwev_h_bu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_3:%.*]] = load <32 x i8>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP3:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmaddwev.h.bu(<16 x i16> [[_1]], <32 x i8> [[_2]], <32 x i8> [[_3]]) +-// CHECK-NEXT: store <16 x i16> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v16u16 xvmaddwev_h_bu(v16u16 _1, v32u8 _2, v32u8 _3) { return __builtin_lasx_xvmaddwev_h_bu(_1, _2, _3); } +-// CHECK-LABEL: @xvmaddwod_q_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_3:%.*]] = load <4 x i64>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwod.q.d(<4 x i64> [[_1]], <4 x i64> [[_2]], <4 x i64> [[_3]]) +-// CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvmaddwod_q_d(v4i64 _1, v4i64 _2, v4i64 _3) { return __builtin_lasx_xvmaddwod_q_d(_1, _2, _3); } +-// CHECK-LABEL: @xvmaddwod_d_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_3:%.*]] = load <8 x i32>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwod.d.w(<4 x i64> [[_1]], <8 x i32> [[_2]], <8 x i32> [[_3]]) +-// CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvmaddwod_d_w(v4i64 _1, v8i32 _2, v8i32 _3) { return __builtin_lasx_xvmaddwod_d_w(_1, _2, _3); } +-// CHECK-LABEL: @xvmaddwod_w_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_3:%.*]] = load <16 x i16>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmaddwod.w.h(<8 x i32> [[_1]], <16 x i16> [[_2]], <16 x i16> [[_3]]) +-// CHECK-NEXT: store <8 x i32> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8i32 xvmaddwod_w_h(v8i32 _1, v16i16 _2, v16i16 _3) { return __builtin_lasx_xvmaddwod_w_h(_1, _2, _3); } +-// CHECK-LABEL: @xvmaddwod_h_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_3:%.*]] = load <32 x i8>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP3:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmaddwod.h.b(<16 x i16> [[_1]], <32 x i8> [[_2]], <32 x i8> [[_3]]) +-// CHECK-NEXT: store <16 x i16> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v16i16 xvmaddwod_h_b(v16i16 _1, v32i8 _2, v32i8 _3) { return __builtin_lasx_xvmaddwod_h_b(_1, _2, _3); } +-// CHECK-LABEL: @xvmaddwod_q_du( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_3:%.*]] = load <4 x i64>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwod.q.du(<4 x i64> [[_1]], <4 x i64> [[_2]], <4 x i64> [[_3]]) +-// CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4u64 xvmaddwod_q_du(v4u64 _1, v4u64 _2, v4u64 _3) { return __builtin_lasx_xvmaddwod_q_du(_1, _2, _3); } +-// CHECK-LABEL: @xvmaddwod_d_wu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_3:%.*]] = load <8 x i32>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwod.d.wu(<4 x i64> [[_1]], <8 x i32> [[_2]], <8 x i32> [[_3]]) +-// CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4u64 xvmaddwod_d_wu(v4u64 _1, v8u32 _2, v8u32 _3) { return __builtin_lasx_xvmaddwod_d_wu(_1, _2, _3); } +-// CHECK-LABEL: @xvmaddwod_w_hu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_3:%.*]] = load <16 x i16>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmaddwod.w.hu(<8 x i32> [[_1]], <16 x i16> [[_2]], <16 x i16> [[_3]]) +-// CHECK-NEXT: store <8 x i32> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8u32 xvmaddwod_w_hu(v8u32 _1, v16u16 _2, v16u16 _3) { return __builtin_lasx_xvmaddwod_w_hu(_1, _2, _3); } +-// CHECK-LABEL: @xvmaddwod_h_bu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_3:%.*]] = load <32 x i8>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP3:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmaddwod.h.bu(<16 x i16> [[_1]], <32 x i8> [[_2]], <32 x i8> [[_3]]) +-// CHECK-NEXT: store <16 x i16> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v16u16 xvmaddwod_h_bu(v16u16 _1, v32u8 _2, v32u8 _3) { return __builtin_lasx_xvmaddwod_h_bu(_1, _2, _3); } +-// CHECK-LABEL: @xvmaddwev_q_du_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_3:%.*]] = load <4 x i64>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwev.q.du.d(<4 x i64> [[_1]], <4 x i64> [[_2]], <4 x i64> [[_3]]) +-// CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvmaddwev_q_du_d(v4i64 _1, v4u64 _2, v4i64 _3) { return __builtin_lasx_xvmaddwev_q_du_d(_1, _2, _3); } +-// CHECK-LABEL: @xvmaddwev_d_wu_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_3:%.*]] = load <8 x i32>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwev.d.wu.w(<4 x i64> [[_1]], <8 x i32> [[_2]], <8 x i32> [[_3]]) +-// CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvmaddwev_d_wu_w(v4i64 _1, v8u32 _2, v8i32 _3) { return __builtin_lasx_xvmaddwev_d_wu_w(_1, _2, _3); } +-// CHECK-LABEL: @xvmaddwev_w_hu_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_3:%.*]] = load <16 x i16>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmaddwev.w.hu.h(<8 x i32> [[_1]], <16 x i16> [[_2]], <16 x i16> [[_3]]) +-// CHECK-NEXT: store <8 x i32> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8i32 xvmaddwev_w_hu_h(v8i32 _1, v16u16 _2, v16i16 _3) { return __builtin_lasx_xvmaddwev_w_hu_h(_1, _2, _3); } +-// CHECK-LABEL: @xvmaddwev_h_bu_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_3:%.*]] = load <32 x i8>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP3:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmaddwev.h.bu.b(<16 x i16> [[_1]], <32 x i8> [[_2]], <32 x i8> [[_3]]) +-// CHECK-NEXT: store <16 x i16> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v16i16 xvmaddwev_h_bu_b(v16i16 _1, v32u8 _2, v32i8 _3) { return __builtin_lasx_xvmaddwev_h_bu_b(_1, _2, _3); } +-// CHECK-LABEL: @xvmaddwod_q_du_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_3:%.*]] = load <4 x i64>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwod.q.du.d(<4 x i64> [[_1]], <4 x i64> [[_2]], <4 x i64> [[_3]]) +-// CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvmaddwod_q_du_d(v4i64 _1, v4u64 _2, v4i64 _3) { return __builtin_lasx_xvmaddwod_q_du_d(_1, _2, _3); } +-// CHECK-LABEL: @xvmaddwod_d_wu_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_3:%.*]] = load <8 x i32>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwod.d.wu.w(<4 x i64> [[_1]], <8 x i32> [[_2]], <8 x i32> [[_3]]) +-// CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvmaddwod_d_wu_w(v4i64 _1, v8u32 _2, v8i32 _3) { return __builtin_lasx_xvmaddwod_d_wu_w(_1, _2, _3); } +-// CHECK-LABEL: @xvmaddwod_w_hu_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_3:%.*]] = load <16 x i16>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmaddwod.w.hu.h(<8 x i32> [[_1]], <16 x i16> [[_2]], <16 x i16> [[_3]]) +-// CHECK-NEXT: store <8 x i32> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8i32 xvmaddwod_w_hu_h(v8i32 _1, v16u16 _2, v16i16 _3) { return __builtin_lasx_xvmaddwod_w_hu_h(_1, _2, _3); } +-// CHECK-LABEL: @xvmaddwod_h_bu_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_3:%.*]] = load <32 x i8>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP3:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmaddwod.h.bu.b(<16 x i16> [[_1]], <32 x i8> [[_2]], <32 x i8> [[_3]]) +-// CHECK-NEXT: store <16 x i16> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v16i16 xvmaddwod_h_bu_b(v16i16 _1, v32u8 _2, v32i8 _3) { return __builtin_lasx_xvmaddwod_h_bu_b(_1, _2, _3); } +-// CHECK-LABEL: @xvrotr_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvrotr.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) +-// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v32i8 xvrotr_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvrotr_b(_1, _2); } +-// CHECK-LABEL: @xvrotr_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvrotr.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) +-// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v16i16 xvrotr_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvrotr_h(_1, _2); } +-// CHECK-LABEL: @xvrotr_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvrotr.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) +-// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8i32 xvrotr_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvrotr_w(_1, _2); } +-// CHECK-LABEL: @xvrotr_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvrotr.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +-// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvrotr_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvrotr_d(_1, _2); } +-// CHECK-LABEL: @xvadd_q( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvadd.q(<4 x i64> [[_1]], <4 x i64> [[_2]]) +-// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvadd_q(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvadd_q(_1, _2); } +-// CHECK-LABEL: @xvsub_q( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsub.q(<4 x i64> [[_1]], <4 x i64> [[_2]]) +-// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvsub_q(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvsub_q(_1, _2); } +-// CHECK-LABEL: @xvaddwev_q_du_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwev.q.du.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +-// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvaddwev_q_du_d(v4u64 _1, v4i64 _2) { return __builtin_lasx_xvaddwev_q_du_d(_1, _2); } +-// CHECK-LABEL: @xvaddwod_q_du_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwod.q.du.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +-// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvaddwod_q_du_d(v4u64 _1, v4i64 _2) { return __builtin_lasx_xvaddwod_q_du_d(_1, _2); } +-// CHECK-LABEL: @xvmulwev_q_du_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwev.q.du.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +-// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvmulwev_q_du_d(v4u64 _1, v4i64 _2) { return __builtin_lasx_xvmulwev_q_du_d(_1, _2); } +-// CHECK-LABEL: @xvmulwod_q_du_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwod.q.du.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +-// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvmulwod_q_du_d(v4u64 _1, v4i64 _2) { return __builtin_lasx_xvmulwod_q_du_d(_1, _2); } +-// CHECK-LABEL: @xvmskgez_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmskgez.b(<32 x i8> [[_1]]) +-// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v32i8 xvmskgez_b(v32i8 _1) { return __builtin_lasx_xvmskgez_b(_1); } +-// CHECK-LABEL: @xvmsknz_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmsknz.b(<32 x i8> [[_1]]) +-// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v32i8 xvmsknz_b(v32i8 _1) { return __builtin_lasx_xvmsknz_b(_1); } +-// CHECK-LABEL: @xvexth_h_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvexth.h.b(<32 x i8> [[_1]]) +-// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v16i16 xvexth_h_b(v32i8 _1) { return __builtin_lasx_xvexth_h_b(_1); } +-// CHECK-LABEL: @xvexth_w_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvexth.w.h(<16 x i16> [[_1]]) +-// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8i32 xvexth_w_h(v16i16 _1) { return __builtin_lasx_xvexth_w_h(_1); } +-// CHECK-LABEL: @xvexth_d_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvexth.d.w(<8 x i32> [[_1]]) +-// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvexth_d_w(v8i32 _1) { return __builtin_lasx_xvexth_d_w(_1); } +-// CHECK-LABEL: @xvexth_q_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvexth.q.d(<4 x i64> [[_1]]) +-// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvexth_q_d(v4i64 _1) { return __builtin_lasx_xvexth_q_d(_1); } +-// CHECK-LABEL: @xvexth_hu_bu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvexth.hu.bu(<32 x i8> [[_1]]) +-// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v16u16 xvexth_hu_bu(v32u8 _1) { return __builtin_lasx_xvexth_hu_bu(_1); } +-// CHECK-LABEL: @xvexth_wu_hu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvexth.wu.hu(<16 x i16> [[_1]]) +-// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8u32 xvexth_wu_hu(v16u16 _1) { return __builtin_lasx_xvexth_wu_hu(_1); } +-// CHECK-LABEL: @xvexth_du_wu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvexth.du.wu(<8 x i32> [[_1]]) +-// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4u64 xvexth_du_wu(v8u32 _1) { return __builtin_lasx_xvexth_du_wu(_1); } +-// CHECK-LABEL: @xvexth_qu_du( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvexth.qu.du(<4 x i64> [[_1]]) +-// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4u64 xvexth_qu_du(v4u64 _1) { return __builtin_lasx_xvexth_qu_du(_1); } +-// CHECK-LABEL: @xvrotri_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvrotri.b(<32 x i8> [[_1]], i32 1) +-// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v32i8 xvrotri_b(v32i8 _1) { return __builtin_lasx_xvrotri_b(_1, 1); } +-// CHECK-LABEL: @xvrotri_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvrotri.h(<16 x i16> [[_1]], i32 1) +-// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v16i16 xvrotri_h(v16i16 _1) { return __builtin_lasx_xvrotri_h(_1, 1); } +-// CHECK-LABEL: @xvrotri_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvrotri.w(<8 x i32> [[_1]], i32 1) +-// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8i32 xvrotri_w(v8i32 _1) { return __builtin_lasx_xvrotri_w(_1, 1); } +-// CHECK-LABEL: @xvrotri_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvrotri.d(<4 x i64> [[_1]], i32 1) +-// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvrotri_d(v4i64 _1) { return __builtin_lasx_xvrotri_d(_1, 1); } +-// CHECK-LABEL: @xvextl_q_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvextl.q.d(<4 x i64> [[_1]]) +-// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvextl_q_d(v4i64 _1) { return __builtin_lasx_xvextl_q_d(_1); } +-// CHECK-LABEL: @xvsrlni_b_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrlni.b.h(<32 x i8> [[_1]], <32 x i8> [[_2]], i32 1) +-// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v32i8 xvsrlni_b_h(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvsrlni_b_h(_1, _2, 1); } +-// CHECK-LABEL: @xvsrlni_h_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrlni.h.w(<16 x i16> [[_1]], <16 x i16> [[_2]], i32 1) +-// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v16i16 xvsrlni_h_w(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvsrlni_h_w(_1, _2, 1); } +-// CHECK-LABEL: @xvsrlni_w_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrlni.w.d(<8 x i32> [[_1]], <8 x i32> [[_2]], i32 1) +-// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8i32 xvsrlni_w_d(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvsrlni_w_d(_1, _2, 1); } +-// CHECK-LABEL: @xvsrlni_d_q( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrlni.d.q(<4 x i64> [[_1]], <4 x i64> [[_2]], i32 1) +-// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvsrlni_d_q(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvsrlni_d_q(_1, _2, 1); } +-// CHECK-LABEL: @xvsrlrni_b_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrlrni.b.h(<32 x i8> [[_1]], <32 x i8> [[_2]], i32 1) +-// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v32i8 xvsrlrni_b_h(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvsrlrni_b_h(_1, _2, 1); } +-// CHECK-LABEL: @xvsrlrni_h_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrlrni.h.w(<16 x i16> [[_1]], <16 x i16> [[_2]], i32 1) +-// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v16i16 xvsrlrni_h_w(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvsrlrni_h_w(_1, _2, 1); } +-// CHECK-LABEL: @xvsrlrni_w_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrlrni.w.d(<8 x i32> [[_1]], <8 x i32> [[_2]], i32 1) +-// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8i32 xvsrlrni_w_d(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvsrlrni_w_d(_1, _2, 1); } +-// CHECK-LABEL: @xvsrlrni_d_q( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrlrni.d.q(<4 x i64> [[_1]], <4 x i64> [[_2]], i32 1) +-// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvsrlrni_d_q(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvsrlrni_d_q(_1, _2, 1); } +-// CHECK-LABEL: @xvssrlni_b_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrlni.b.h(<32 x i8> [[_1]], <32 x i8> [[_2]], i32 1) +-// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v32i8 xvssrlni_b_h(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvssrlni_b_h(_1, _2, 1); } +-// CHECK-LABEL: @xvssrlni_h_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrlni.h.w(<16 x i16> [[_1]], <16 x i16> [[_2]], i32 1) +-// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v16i16 xvssrlni_h_w(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvssrlni_h_w(_1, _2, 1); } +-// CHECK-LABEL: @xvssrlni_w_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrlni.w.d(<8 x i32> [[_1]], <8 x i32> [[_2]], i32 1) +-// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8i32 xvssrlni_w_d(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvssrlni_w_d(_1, _2, 1); } +-// CHECK-LABEL: @xvssrlni_d_q( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssrlni.d.q(<4 x i64> [[_1]], <4 x i64> [[_2]], i32 1) +-// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvssrlni_d_q(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvssrlni_d_q(_1, _2, 1); } +-// CHECK-LABEL: @xvssrlni_bu_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrlni.bu.h(<32 x i8> [[_1]], <32 x i8> [[_2]], i32 1) +-// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v32u8 xvssrlni_bu_h(v32u8 _1, v32i8 _2) { return __builtin_lasx_xvssrlni_bu_h(_1, _2, 1); } +-// CHECK-LABEL: @xvssrlni_hu_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrlni.hu.w(<16 x i16> [[_1]], <16 x i16> [[_2]], i32 1) +-// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v16u16 xvssrlni_hu_w(v16u16 _1, v16i16 _2) { return __builtin_lasx_xvssrlni_hu_w(_1, _2, 1); } +-// CHECK-LABEL: @xvssrlni_wu_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrlni.wu.d(<8 x i32> [[_1]], <8 x i32> [[_2]], i32 1) +-// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8u32 xvssrlni_wu_d(v8u32 _1, v8i32 _2) { return __builtin_lasx_xvssrlni_wu_d(_1, _2, 1); } +-// CHECK-LABEL: @xvssrlni_du_q( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssrlni.du.q(<4 x i64> [[_1]], <4 x i64> [[_2]], i32 1) +-// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4u64 xvssrlni_du_q(v4u64 _1, v4i64 _2) { return __builtin_lasx_xvssrlni_du_q(_1, _2, 1); } +-// CHECK-LABEL: @xvssrlrni_b_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrlrni.b.h(<32 x i8> [[_1]], <32 x i8> [[_2]], i32 1) +-// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v32i8 xvssrlrni_b_h(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvssrlrni_b_h(_1, _2, 1); } +-// CHECK-LABEL: @xvssrlrni_h_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrlrni.h.w(<16 x i16> [[_1]], <16 x i16> [[_2]], i32 1) +-// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v16i16 xvssrlrni_h_w(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvssrlrni_h_w(_1, _2, 1); } +-// CHECK-LABEL: @xvssrlrni_w_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrlrni.w.d(<8 x i32> [[_1]], <8 x i32> [[_2]], i32 1) +-// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8i32 xvssrlrni_w_d(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvssrlrni_w_d(_1, _2, 1); } +-// CHECK-LABEL: @xvssrlrni_d_q( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssrlrni.d.q(<4 x i64> [[_1]], <4 x i64> [[_2]], i32 1) +-// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvssrlrni_d_q(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvssrlrni_d_q(_1, _2, 1); } +-// CHECK-LABEL: @xvssrlrni_bu_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrlrni.bu.h(<32 x i8> [[_1]], <32 x i8> [[_2]], i32 1) +-// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v32u8 xvssrlrni_bu_h(v32u8 _1, v32i8 _2) { return __builtin_lasx_xvssrlrni_bu_h(_1, _2, 1); } +-// CHECK-LABEL: @xvssrlrni_hu_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrlrni.hu.w(<16 x i16> [[_1]], <16 x i16> [[_2]], i32 1) +-// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v16u16 xvssrlrni_hu_w(v16u16 _1, v16i16 _2) { return __builtin_lasx_xvssrlrni_hu_w(_1, _2, 1); } +-// CHECK-LABEL: @xvssrlrni_wu_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrlrni.wu.d(<8 x i32> [[_1]], <8 x i32> [[_2]], i32 1) +-// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8u32 xvssrlrni_wu_d(v8u32 _1, v8i32 _2) { return __builtin_lasx_xvssrlrni_wu_d(_1, _2, 1); } +-// CHECK-LABEL: @xvssrlrni_du_q( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssrlrni.du.q(<4 x i64> [[_1]], <4 x i64> [[_2]], i32 1) +-// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4u64 xvssrlrni_du_q(v4u64 _1, v4i64 _2) { return __builtin_lasx_xvssrlrni_du_q(_1, _2, 1); } +-// CHECK-LABEL: @xvsrani_b_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrani.b.h(<32 x i8> [[_1]], <32 x i8> [[_2]], i32 1) +-// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v32i8 xvsrani_b_h(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvsrani_b_h(_1, _2, 1); } +-// CHECK-LABEL: @xvsrani_h_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrani.h.w(<16 x i16> [[_1]], <16 x i16> [[_2]], i32 1) +-// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v16i16 xvsrani_h_w(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvsrani_h_w(_1, _2, 1); } +-// CHECK-LABEL: @xvsrani_w_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrani.w.d(<8 x i32> [[_1]], <8 x i32> [[_2]], i32 1) +-// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8i32 xvsrani_w_d(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvsrani_w_d(_1, _2, 1); } +-// CHECK-LABEL: @xvsrani_d_q( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrani.d.q(<4 x i64> [[_1]], <4 x i64> [[_2]], i32 1) +-// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvsrani_d_q(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvsrani_d_q(_1, _2, 1); } +-// CHECK-LABEL: @xvsrarni_b_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrarni.b.h(<32 x i8> [[_1]], <32 x i8> [[_2]], i32 1) +-// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v32i8 xvsrarni_b_h(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvsrarni_b_h(_1, _2, 1); } +-// CHECK-LABEL: @xvsrarni_h_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrarni.h.w(<16 x i16> [[_1]], <16 x i16> [[_2]], i32 1) +-// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v16i16 xvsrarni_h_w(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvsrarni_h_w(_1, _2, 1); } +-// CHECK-LABEL: @xvsrarni_w_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrarni.w.d(<8 x i32> [[_1]], <8 x i32> [[_2]], i32 1) +-// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8i32 xvsrarni_w_d(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvsrarni_w_d(_1, _2, 1); } +-// CHECK-LABEL: @xvsrarni_d_q( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrarni.d.q(<4 x i64> [[_1]], <4 x i64> [[_2]], i32 1) +-// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvsrarni_d_q(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvsrarni_d_q(_1, _2, 1); } +-// CHECK-LABEL: @xvssrani_b_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrani.b.h(<32 x i8> [[_1]], <32 x i8> [[_2]], i32 1) +-// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v32i8 xvssrani_b_h(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvssrani_b_h(_1, _2, 1); } +-// CHECK-LABEL: @xvssrani_h_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrani.h.w(<16 x i16> [[_1]], <16 x i16> [[_2]], i32 1) +-// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v16i16 xvssrani_h_w(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvssrani_h_w(_1, _2, 1); } +-// CHECK-LABEL: @xvssrani_w_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrani.w.d(<8 x i32> [[_1]], <8 x i32> [[_2]], i32 1) +-// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8i32 xvssrani_w_d(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvssrani_w_d(_1, _2, 1); } +-// CHECK-LABEL: @xvssrani_d_q( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssrani.d.q(<4 x i64> [[_1]], <4 x i64> [[_2]], i32 1) +-// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvssrani_d_q(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvssrani_d_q(_1, _2, 1); } +-// CHECK-LABEL: @xvssrani_bu_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrani.bu.h(<32 x i8> [[_1]], <32 x i8> [[_2]], i32 1) +-// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v32u8 xvssrani_bu_h(v32u8 _1, v32i8 _2) { return __builtin_lasx_xvssrani_bu_h(_1, _2, 1); } +-// CHECK-LABEL: @xvssrani_hu_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrani.hu.w(<16 x i16> [[_1]], <16 x i16> [[_2]], i32 1) +-// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v16u16 xvssrani_hu_w(v16u16 _1, v16i16 _2) { return __builtin_lasx_xvssrani_hu_w(_1, _2, 1); } +-// CHECK-LABEL: @xvssrani_wu_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrani.wu.d(<8 x i32> [[_1]], <8 x i32> [[_2]], i32 1) +-// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8u32 xvssrani_wu_d(v8u32 _1, v8i32 _2) { return __builtin_lasx_xvssrani_wu_d(_1, _2, 1); } +-// CHECK-LABEL: @xvssrani_du_q( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssrani.du.q(<4 x i64> [[_1]], <4 x i64> [[_2]], i32 1) +-// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4u64 xvssrani_du_q(v4u64 _1, v4i64 _2) { return __builtin_lasx_xvssrani_du_q(_1, _2, 1); } +-// CHECK-LABEL: @xvssrarni_b_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrarni.b.h(<32 x i8> [[_1]], <32 x i8> [[_2]], i32 1) +-// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v32i8 xvssrarni_b_h(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvssrarni_b_h(_1, _2, 1); } +-// CHECK-LABEL: @xvssrarni_h_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrarni.h.w(<16 x i16> [[_1]], <16 x i16> [[_2]], i32 1) +-// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v16i16 xvssrarni_h_w(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvssrarni_h_w(_1, _2, 1); } +-// CHECK-LABEL: @xvssrarni_w_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrarni.w.d(<8 x i32> [[_1]], <8 x i32> [[_2]], i32 1) +-// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8i32 xvssrarni_w_d(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvssrarni_w_d(_1, _2, 1); } +-// CHECK-LABEL: @xvssrarni_d_q( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssrarni.d.q(<4 x i64> [[_1]], <4 x i64> [[_2]], i32 1) +-// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvssrarni_d_q(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvssrarni_d_q(_1, _2, 1); } +-// CHECK-LABEL: @xvssrarni_bu_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrarni.bu.h(<32 x i8> [[_1]], <32 x i8> [[_2]], i32 1) +-// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v32u8 xvssrarni_bu_h(v32u8 _1, v32i8 _2) { return __builtin_lasx_xvssrarni_bu_h(_1, _2, 1); } +-// CHECK-LABEL: @xvssrarni_hu_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrarni.hu.w(<16 x i16> [[_1]], <16 x i16> [[_2]], i32 1) +-// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v16u16 xvssrarni_hu_w(v16u16 _1, v16i16 _2) { return __builtin_lasx_xvssrarni_hu_w(_1, _2, 1); } +-// CHECK-LABEL: @xvssrarni_wu_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrarni.wu.d(<8 x i32> [[_1]], <8 x i32> [[_2]], i32 1) +-// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8u32 xvssrarni_wu_d(v8u32 _1, v8i32 _2) { return __builtin_lasx_xvssrarni_wu_d(_1, _2, 1); } +-// CHECK-LABEL: @xvssrarni_du_q( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssrarni.du.q(<4 x i64> [[_1]], <4 x i64> [[_2]], i32 1) +-// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4u64 xvssrarni_du_q(v4u64 _1, v4i64 _2) { return __builtin_lasx_xvssrarni_du_q(_1, _2, 1); } +-// CHECK-LABEL: @xbnz_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lasx.xbnz.b(<32 x i8> [[_1]]) +-// CHECK-NEXT: ret i32 [[TMP1]] +-// +-int xbnz_b(v32u8 _1) { return __builtin_lasx_xbnz_b(_1); } +-// CHECK-LABEL: @xbnz_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lasx.xbnz.d(<4 x i64> [[_1]]) +-// CHECK-NEXT: ret i32 [[TMP1]] +-// +-int xbnz_d(v4u64 _1) { return __builtin_lasx_xbnz_d(_1); } +-// CHECK-LABEL: @xbnz_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lasx.xbnz.h(<16 x i16> [[_1]]) +-// CHECK-NEXT: ret i32 [[TMP1]] +-// +-int xbnz_h(v16u16 _1) { return __builtin_lasx_xbnz_h(_1); } +-// CHECK-LABEL: @xbnz_v( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lasx.xbnz.v(<32 x i8> [[_1]]) +-// CHECK-NEXT: ret i32 [[TMP1]] +-// +-int xbnz_v(v32u8 _1) { return __builtin_lasx_xbnz_v(_1); } +-// CHECK-LABEL: @xbnz_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lasx.xbnz.w(<8 x i32> [[_1]]) +-// CHECK-NEXT: ret i32 [[TMP1]] +-// +-int xbnz_w(v8u32 _1) { return __builtin_lasx_xbnz_w(_1); } +-// CHECK-LABEL: @xbz_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lasx.xbz.b(<32 x i8> [[_1]]) +-// CHECK-NEXT: ret i32 [[TMP1]] +-// +-int xbz_b(v32u8 _1) { return __builtin_lasx_xbz_b(_1); } +-// CHECK-LABEL: @xbz_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lasx.xbz.d(<4 x i64> [[_1]]) +-// CHECK-NEXT: ret i32 [[TMP1]] +-// +-int xbz_d(v4u64 _1) { return __builtin_lasx_xbz_d(_1); } +-// CHECK-LABEL: @xbz_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lasx.xbz.h(<16 x i16> [[_1]]) +-// CHECK-NEXT: ret i32 [[TMP1]] +-// +-int xbz_h(v16u16 _1) { return __builtin_lasx_xbz_h(_1); } +-// CHECK-LABEL: @xbz_v( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lasx.xbz.v(<32 x i8> [[_1]]) +-// CHECK-NEXT: ret i32 [[TMP1]] +-// +-int xbz_v(v32u8 _1) { return __builtin_lasx_xbz_v(_1); } +-// CHECK-LABEL: @xbz_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lasx.xbz.w(<8 x i32> [[_1]]) +-// CHECK-NEXT: ret i32 [[TMP1]] +-// +-int xbz_w(v8u32 _1) { return __builtin_lasx_xbz_w(_1); } +-// CHECK-LABEL: @xvfcmp_caf_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.caf.d(<4 x double> [[_1]], <4 x double> [[_2]]) +-// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvfcmp_caf_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_caf_d(_1, _2); } +-// CHECK-LABEL: @xvfcmp_caf_s( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.caf.s(<8 x float> [[_1]], <8 x float> [[_2]]) +-// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8i32 xvfcmp_caf_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_caf_s(_1, _2); } +-// CHECK-LABEL: @xvfcmp_ceq_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.ceq.d(<4 x double> [[_1]], <4 x double> [[_2]]) +-// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvfcmp_ceq_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_ceq_d(_1, _2); } +-// CHECK-LABEL: @xvfcmp_ceq_s( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.ceq.s(<8 x float> [[_1]], <8 x float> [[_2]]) +-// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8i32 xvfcmp_ceq_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_ceq_s(_1, _2); } +-// CHECK-LABEL: @xvfcmp_cle_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cle.d(<4 x double> [[_1]], <4 x double> [[_2]]) +-// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvfcmp_cle_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_cle_d(_1, _2); } +-// CHECK-LABEL: @xvfcmp_cle_s( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cle.s(<8 x float> [[_1]], <8 x float> [[_2]]) +-// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8i32 xvfcmp_cle_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_cle_s(_1, _2); } +-// CHECK-LABEL: @xvfcmp_clt_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.clt.d(<4 x double> [[_1]], <4 x double> [[_2]]) +-// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvfcmp_clt_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_clt_d(_1, _2); } +-// CHECK-LABEL: @xvfcmp_clt_s( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.clt.s(<8 x float> [[_1]], <8 x float> [[_2]]) +-// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8i32 xvfcmp_clt_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_clt_s(_1, _2); } +-// CHECK-LABEL: @xvfcmp_cne_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cne.d(<4 x double> [[_1]], <4 x double> [[_2]]) +-// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvfcmp_cne_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_cne_d(_1, _2); } +-// CHECK-LABEL: @xvfcmp_cne_s( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cne.s(<8 x float> [[_1]], <8 x float> [[_2]]) +-// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8i32 xvfcmp_cne_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_cne_s(_1, _2); } +-// CHECK-LABEL: @xvfcmp_cor_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cor.d(<4 x double> [[_1]], <4 x double> [[_2]]) +-// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvfcmp_cor_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_cor_d(_1, _2); } +-// CHECK-LABEL: @xvfcmp_cor_s( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cor.s(<8 x float> [[_1]], <8 x float> [[_2]]) +-// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8i32 xvfcmp_cor_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_cor_s(_1, _2); } +-// CHECK-LABEL: @xvfcmp_cueq_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cueq.d(<4 x double> [[_1]], <4 x double> [[_2]]) +-// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvfcmp_cueq_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_cueq_d(_1, _2); } +-// CHECK-LABEL: @xvfcmp_cueq_s( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cueq.s(<8 x float> [[_1]], <8 x float> [[_2]]) +-// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8i32 xvfcmp_cueq_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_cueq_s(_1, _2); } +-// CHECK-LABEL: @xvfcmp_cule_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cule.d(<4 x double> [[_1]], <4 x double> [[_2]]) +-// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvfcmp_cule_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_cule_d(_1, _2); } +-// CHECK-LABEL: @xvfcmp_cule_s( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cule.s(<8 x float> [[_1]], <8 x float> [[_2]]) +-// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8i32 xvfcmp_cule_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_cule_s(_1, _2); } +-// CHECK-LABEL: @xvfcmp_cult_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cult.d(<4 x double> [[_1]], <4 x double> [[_2]]) +-// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvfcmp_cult_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_cult_d(_1, _2); } +-// CHECK-LABEL: @xvfcmp_cult_s( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cult.s(<8 x float> [[_1]], <8 x float> [[_2]]) +-// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8i32 xvfcmp_cult_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_cult_s(_1, _2); } +-// CHECK-LABEL: @xvfcmp_cun_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cun.d(<4 x double> [[_1]], <4 x double> [[_2]]) +-// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvfcmp_cun_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_cun_d(_1, _2); } +-// CHECK-LABEL: @xvfcmp_cune_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cune.d(<4 x double> [[_1]], <4 x double> [[_2]]) +-// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvfcmp_cune_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_cune_d(_1, _2); } +-// CHECK-LABEL: @xvfcmp_cune_s( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cune.s(<8 x float> [[_1]], <8 x float> [[_2]]) +-// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8i32 xvfcmp_cune_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_cune_s(_1, _2); } +-// CHECK-LABEL: @xvfcmp_cun_s( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cun.s(<8 x float> [[_1]], <8 x float> [[_2]]) +-// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8i32 xvfcmp_cun_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_cun_s(_1, _2); } +-// CHECK-LABEL: @xvfcmp_saf_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.saf.d(<4 x double> [[_1]], <4 x double> [[_2]]) +-// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvfcmp_saf_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_saf_d(_1, _2); } +-// CHECK-LABEL: @xvfcmp_saf_s( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.saf.s(<8 x float> [[_1]], <8 x float> [[_2]]) +-// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8i32 xvfcmp_saf_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_saf_s(_1, _2); } +-// CHECK-LABEL: @xvfcmp_seq_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.seq.d(<4 x double> [[_1]], <4 x double> [[_2]]) +-// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvfcmp_seq_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_seq_d(_1, _2); } +-// CHECK-LABEL: @xvfcmp_seq_s( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.seq.s(<8 x float> [[_1]], <8 x float> [[_2]]) +-// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8i32 xvfcmp_seq_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_seq_s(_1, _2); } +-// CHECK-LABEL: @xvfcmp_sle_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sle.d(<4 x double> [[_1]], <4 x double> [[_2]]) +-// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvfcmp_sle_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_sle_d(_1, _2); } +-// CHECK-LABEL: @xvfcmp_sle_s( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sle.s(<8 x float> [[_1]], <8 x float> [[_2]]) +-// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8i32 xvfcmp_sle_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_sle_s(_1, _2); } +-// CHECK-LABEL: @xvfcmp_slt_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.slt.d(<4 x double> [[_1]], <4 x double> [[_2]]) +-// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvfcmp_slt_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_slt_d(_1, _2); } +-// CHECK-LABEL: @xvfcmp_slt_s( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.slt.s(<8 x float> [[_1]], <8 x float> [[_2]]) +-// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8i32 xvfcmp_slt_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_slt_s(_1, _2); } +-// CHECK-LABEL: @xvfcmp_sne_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sne.d(<4 x double> [[_1]], <4 x double> [[_2]]) +-// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvfcmp_sne_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_sne_d(_1, _2); } +-// CHECK-LABEL: @xvfcmp_sne_s( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sne.s(<8 x float> [[_1]], <8 x float> [[_2]]) +-// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8i32 xvfcmp_sne_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_sne_s(_1, _2); } +-// CHECK-LABEL: @xvfcmp_sor_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sor.d(<4 x double> [[_1]], <4 x double> [[_2]]) +-// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvfcmp_sor_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_sor_d(_1, _2); } +-// CHECK-LABEL: @xvfcmp_sor_s( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sor.s(<8 x float> [[_1]], <8 x float> [[_2]]) +-// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8i32 xvfcmp_sor_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_sor_s(_1, _2); } +-// CHECK-LABEL: @xvfcmp_sueq_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sueq.d(<4 x double> [[_1]], <4 x double> [[_2]]) +-// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvfcmp_sueq_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_sueq_d(_1, _2); } +-// CHECK-LABEL: @xvfcmp_sueq_s( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sueq.s(<8 x float> [[_1]], <8 x float> [[_2]]) +-// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8i32 xvfcmp_sueq_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_sueq_s(_1, _2); } +-// CHECK-LABEL: @xvfcmp_sule_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sule.d(<4 x double> [[_1]], <4 x double> [[_2]]) +-// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvfcmp_sule_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_sule_d(_1, _2); } +-// CHECK-LABEL: @xvfcmp_sule_s( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sule.s(<8 x float> [[_1]], <8 x float> [[_2]]) +-// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8i32 xvfcmp_sule_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_sule_s(_1, _2); } +-// CHECK-LABEL: @xvfcmp_sult_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sult.d(<4 x double> [[_1]], <4 x double> [[_2]]) +-// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvfcmp_sult_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_sult_d(_1, _2); } +-// CHECK-LABEL: @xvfcmp_sult_s( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sult.s(<8 x float> [[_1]], <8 x float> [[_2]]) +-// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8i32 xvfcmp_sult_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_sult_s(_1, _2); } +-// CHECK-LABEL: @xvfcmp_sun_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sun.d(<4 x double> [[_1]], <4 x double> [[_2]]) +-// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvfcmp_sun_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_sun_d(_1, _2); } +-// CHECK-LABEL: @xvfcmp_sune_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sune.d(<4 x double> [[_1]], <4 x double> [[_2]]) +-// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvfcmp_sune_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_sune_d(_1, _2); } +-// CHECK-LABEL: @xvfcmp_sune_s( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sune.s(<8 x float> [[_1]], <8 x float> [[_2]]) +-// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8i32 xvfcmp_sune_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_sune_s(_1, _2); } +-// CHECK-LABEL: @xvfcmp_sun_s( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sun.s(<8 x float> [[_1]], <8 x float> [[_2]]) +-// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8i32 xvfcmp_sun_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_sun_s(_1, _2); } +-// CHECK-LABEL: @xvpickve_d_f( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvpickve.d.f(<4 x double> [[_1]], i32 1) +-// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4f64 xvpickve_d_f(v4f64 _1) { return __builtin_lasx_xvpickve_d_f(_1, 1); } +-// CHECK-LABEL: @xvpickve_w_f( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvpickve.w.f(<8 x float> [[_1]], i32 1) +-// CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8f32 xvpickve_w_f(v8f32 _1) { return __builtin_lasx_xvpickve_w_f(_1, 1); } +-// CHECK-LABEL: @xvrepli_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvrepli.b(i32 1) +-// CHECK-NEXT: store <32 x i8> [[TMP0]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v32i8 xvrepli_b() { return __builtin_lasx_xvrepli_b(1); } +-// CHECK-LABEL: @xvrepli_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvrepli.d(i32 1) +-// CHECK-NEXT: store <4 x i64> [[TMP0]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v4i64 xvrepli_d() { return __builtin_lasx_xvrepli_d(1); } +-// CHECK-LABEL: @xvrepli_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvrepli.h(i32 1) +-// CHECK-NEXT: store <16 x i16> [[TMP0]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v16i16 xvrepli_h() { return __builtin_lasx_xvrepli_h(1); } +-// CHECK-LABEL: @xvrepli_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvrepli.w(i32 1) +-// CHECK-NEXT: store <8 x i32> [[TMP0]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +-// CHECK-NEXT: ret void +-// +-v8i32 xvrepli_w() { return __builtin_lasx_xvrepli_w(1); } +diff --git a/clang/test/CodeGen/LoongArch/lasx/inline-asm-gcc-regs-error.c b/clang/test/CodeGen/LoongArch/lasx/inline-asm-gcc-regs-error.c +deleted file mode 100644 +index 1fd602574..000000000 +--- a/clang/test/CodeGen/LoongArch/lasx/inline-asm-gcc-regs-error.c ++++ /dev/null +@@ -1,10 +0,0 @@ +-// RUN: not %clang_cc1 -triple loongarch64 -emit-llvm -O2 %s 2>&1 -o - | FileCheck %s +- +-typedef signed char v32i8 __attribute__((vector_size(32), aligned(32))); +- +-void test() { +-// CHECK: :[[#@LINE+1]]:28: error: unknown register name 'xr0' in asm +- register v32i8 p0 asm ("xr0"); +-// CHECK: :[[#@LINE+1]]:29: error: unknown register name '$xr32' in asm +- register v32i8 p32 asm ("$xr32"); +-} +diff --git a/clang/test/CodeGen/LoongArch/lasx/inline-asm-gcc-regs.c b/clang/test/CodeGen/LoongArch/lasx/inline-asm-gcc-regs.c +deleted file mode 100644 +index ed1a9660a..000000000 +--- a/clang/test/CodeGen/LoongArch/lasx/inline-asm-gcc-regs.c ++++ /dev/null +@@ -1,36 +0,0 @@ +-// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --filter "^define |tail call" +-// RUN: %clang_cc1 -triple loongarch64 -emit-llvm -O2 %s -o - | FileCheck %s +- +-typedef signed char v32i8 __attribute__((vector_size(32), aligned(32))); +- +-// CHECK-LABEL: @test_xr0( +-// CHECK: tail call void asm sideeffect "", "{$xr0}"(<32 x i8> undef) #[[ATTR1:[0-9]+]], !srcloc !2 +-// +-void test_xr0() { +- register v32i8 a asm ("$xr0"); +- asm ("" :: "f"(a)); +-} +- +-// CHECK-LABEL: @test_xr7( +-// CHECK: tail call void asm sideeffect "", "{$xr7}"(<32 x i8> undef) #[[ATTR1]], !srcloc !3 +-// +-void test_xr7() { +- register v32i8 a asm ("$xr7"); +- asm ("" :: "f"(a)); +-} +- +-// CHECK-LABEL: @test_xr15( +-// CHECK: tail call void asm sideeffect "", "{$xr15}"(<32 x i8> undef) #[[ATTR1]], !srcloc !4 +-// +-void test_xr15() { +- register v32i8 a asm ("$xr15"); +- asm ("" :: "f"(a)); +-} +- +-// CHECK-LABEL: @test_xr31( +-// CHECK: tail call void asm sideeffect "", "{$xr31}"(<32 x i8> undef) #[[ATTR1]], !srcloc !5 +-// +-void test_xr31() { +- register v32i8 a asm ("$xr31"); +- asm ("" :: "f"(a)); +-} +diff --git a/clang/test/CodeGen/LoongArch/lasx/inline-asm-operand-modifier.c b/clang/test/CodeGen/LoongArch/lasx/inline-asm-operand-modifier.c +deleted file mode 100644 +index a5cc8798f..000000000 +--- a/clang/test/CodeGen/LoongArch/lasx/inline-asm-operand-modifier.c ++++ /dev/null +@@ -1,15 +0,0 @@ +-// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 2 +-// RUN: %clang_cc1 -triple loongarch64 -emit-llvm -O2 %s -o - | FileCheck %s +- +-typedef long long v4i64 __attribute__ ((vector_size(32), aligned(32))); +- +-// CHECK-LABEL: define dso_local void @test_u +-// CHECK-SAME: () local_unnamed_addr #[[ATTR0:[0-9]+]] { +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> asm sideeffect "xvldi ${0:u}, 1", "=f"() #[[ATTR1:[0-9]+]], !srcloc !2 +-// CHECK-NEXT: ret void +-// +-void test_u() { +- v4i64 v4i64_r; +- asm volatile ("xvldi %u0, 1" : "=f" (v4i64_r)); +-} +diff --git a/clang/test/CodeGen/LoongArch/lasx/xvrepli-builtin-alias-error.c b/clang/test/CodeGen/LoongArch/lasx/xvrepli-builtin-alias-error.c +new file mode 100644 +index 000000000..7b298b21d +--- /dev/null ++++ b/clang/test/CodeGen/LoongArch/lasx/xvrepli-builtin-alias-error.c +@@ -0,0 +1,31 @@ ++// RUN: %clang_cc1 -triple loongarch64 -target-feature +lasx -verify %s ++ ++#include ++ ++v32i8 xvrepli_b(int var) { ++ v32i8 res = __lasx_xvrepli_b(-513); // expected-error {{argument value -513 is outside the valid range [-512, 511]}} ++ res |= __lasx_xvrepli_b(512); // expected-error {{argument value 512 is outside the valid range [-512, 511]}} ++ res |= __lasx_xvrepli_b(var); // expected-error {{argument to '__builtin_lasx_xvrepli_b' must be a constant integer}} ++ return res; ++} ++ ++v4i64 xvrepli_d(int var) { ++ v4i64 res = __lasx_xvrepli_d(-513); // expected-error {{argument value -513 is outside the valid range [-512, 511]}} ++ res |= __lasx_xvrepli_d(512); // expected-error {{argument value 512 is outside the valid range [-512, 511]}} ++ res |= __lasx_xvrepli_d(var); // expected-error {{argument to '__builtin_lasx_xvrepli_d' must be a constant integer}} ++ return res; ++} ++ ++v16i16 xvrepli_h(int var) { ++ v16i16 res = __lasx_xvrepli_h(-513); // expected-error {{argument value -513 is outside the valid range [-512, 511]}} ++ res |= __lasx_xvrepli_h(512); // expected-error {{argument value 512 is outside the valid range [-512, 511]}} ++ res |= __lasx_xvrepli_h(var); // expected-error {{argument to '__builtin_lasx_xvrepli_h' must be a constant integer}} ++ return res; ++} ++ ++v8i32 xvrepli_w(int var) { ++ v8i32 res = __lasx_xvrepli_w(-513); // expected-error {{argument value -513 is outside the valid range [-512, 511]}} ++ res |= __lasx_xvrepli_w(512); // expected-error {{argument value 512 is outside the valid range [-512, 511]}} ++ res |= __lasx_xvrepli_w(var); // expected-error {{argument to '__builtin_lasx_xvrepli_w' must be a constant integer}} ++ return res; ++} +diff --git a/clang/test/CodeGen/LoongArch/lasx/xvrepli-builtin-error.c b/clang/test/CodeGen/LoongArch/lasx/xvrepli-builtin-error.c +new file mode 100644 +index 000000000..56c650ec6 +--- /dev/null ++++ b/clang/test/CodeGen/LoongArch/lasx/xvrepli-builtin-error.c +@@ -0,0 +1,50 @@ ++// RUN: %clang_cc1 -triple loongarch64 -target-feature +lasx -verify %s ++ ++typedef signed char v32i8 __attribute__((vector_size(32), aligned(32))); ++typedef signed char v32i8_b __attribute__((vector_size(32), aligned(1))); ++typedef unsigned char v32u8 __attribute__((vector_size(32), aligned(32))); ++typedef unsigned char v32u8_b __attribute__((vector_size(32), aligned(1))); ++typedef short v16i16 __attribute__((vector_size(32), aligned(32))); ++typedef short v16i16_h __attribute__((vector_size(32), aligned(2))); ++typedef unsigned short v16u16 __attribute__((vector_size(32), aligned(32))); ++typedef unsigned short v16u16_h __attribute__((vector_size(32), aligned(2))); ++typedef int v8i32 __attribute__((vector_size(32), aligned(32))); ++typedef int v8i32_w __attribute__((vector_size(32), aligned(4))); ++typedef unsigned int v8u32 __attribute__((vector_size(32), aligned(32))); ++typedef unsigned int v8u32_w __attribute__((vector_size(32), aligned(4))); ++typedef long long v4i64 __attribute__((vector_size(32), aligned(32))); ++typedef long long v4i64_d __attribute__((vector_size(32), aligned(8))); ++typedef unsigned long long v4u64 __attribute__((vector_size(32), aligned(32))); ++typedef unsigned long long v4u64_d __attribute__((vector_size(32), aligned(8))); ++typedef float v8f32 __attribute__((vector_size(32), aligned(32))); ++typedef float v8f32_w __attribute__((vector_size(32), aligned(4))); ++typedef double v4f64 __attribute__((vector_size(32), aligned(32))); ++typedef double v4f64_d __attribute__((vector_size(32), aligned(8))); ++ ++v32i8 xvrepli_b(int var) { ++ v32i8 res = __builtin_lasx_xvrepli_b(-513); // expected-error {{argument value -513 is outside the valid range [-512, 511]}} ++ res |= __builtin_lasx_xvrepli_b(512); // expected-error {{argument value 512 is outside the valid range [-512, 511]}} ++ res |= __builtin_lasx_xvrepli_b(var); // expected-error {{argument to '__builtin_lasx_xvrepli_b' must be a constant integer}} ++ return res; ++} ++ ++v4i64 xvrepli_d(int var) { ++ v4i64 res = __builtin_lasx_xvrepli_d(-513); // expected-error {{argument value -513 is outside the valid range [-512, 511]}} ++ res |= __builtin_lasx_xvrepli_d(512); // expected-error {{argument value 512 is outside the valid range [-512, 511]}} ++ res |= __builtin_lasx_xvrepli_d(var); // expected-error {{argument to '__builtin_lasx_xvrepli_d' must be a constant integer}} ++ return res; ++} ++ ++v16i16 xvrepli_h(int var) { ++ v16i16 res = __builtin_lasx_xvrepli_h(-513); // expected-error {{argument value -513 is outside the valid range [-512, 511]}} ++ res |= __builtin_lasx_xvrepli_h(512); // expected-error {{argument value 512 is outside the valid range [-512, 511]}} ++ res |= __builtin_lasx_xvrepli_h(var); // expected-error {{argument to '__builtin_lasx_xvrepli_h' must be a constant integer}} ++ return res; ++} ++ ++v8i32 xvrepli_w(int var) { ++ v8i32 res = __builtin_lasx_xvrepli_w(-513); // expected-error {{argument value -513 is outside the valid range [-512, 511]}} ++ res |= __builtin_lasx_xvrepli_w(512); // expected-error {{argument value 512 is outside the valid range [-512, 511]}} ++ res |= __builtin_lasx_xvrepli_w(var); // expected-error {{argument to '__builtin_lasx_xvrepli_w' must be a constant integer}} ++ return res; ++} +diff --git a/clang/test/CodeGen/LoongArch/lsx/builtin-alias-error.c b/clang/test/CodeGen/LoongArch/lsx/builtin-alias-error.c +deleted file mode 100644 +index 69cf2254f..000000000 +--- a/clang/test/CodeGen/LoongArch/lsx/builtin-alias-error.c ++++ /dev/null +@@ -1,1359 +0,0 @@ +-// RUN: %clang_cc1 -triple loongarch64 -target-feature +lsx -verify %s +- +-#include +- +-v16i8 vslli_b(v16i8 _1, int var) { +- v16i8 res = __lsx_vslli_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} +- res |= __lsx_vslli_b(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} +- res |= __lsx_vslli_b(_1, var); // expected-error {{argument to '__builtin_lsx_vslli_b' must be a constant integer}} +- return res; +-} +- +-v8i16 vslli_h(v8i16 _1, int var) { +- v8i16 res = __lsx_vslli_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} +- res |= __lsx_vslli_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} +- res |= __lsx_vslli_h(_1, var); // expected-error {{argument to '__builtin_lsx_vslli_h' must be a constant integer}} +- return res; +-} +- +-v4i32 vslli_w(v4i32 _1, int var) { +- v4i32 res = __lsx_vslli_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} +- res |= __lsx_vslli_w(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} +- res |= __lsx_vslli_w(_1, var); // expected-error {{argument to '__builtin_lsx_vslli_w' must be a constant integer}} +- return res; +-} +- +-v2i64 vslli_d(v2i64 _1, int var) { +- v2i64 res = __lsx_vslli_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} +- res |= __lsx_vslli_d(_1, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} +- res |= __lsx_vslli_d(_1, var); // expected-error {{argument to '__builtin_lsx_vslli_d' must be a constant integer}} +- return res; +-} +- +-v16i8 vsrai_b(v16i8 _1, int var) { +- v16i8 res = __lsx_vsrai_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} +- res |= __lsx_vsrai_b(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} +- res |= __lsx_vsrai_b(_1, var); // expected-error {{argument to '__builtin_lsx_vsrai_b' must be a constant integer}} +- return res; +-} +- +-v8i16 vsrai_h(v8i16 _1, int var) { +- v8i16 res = __lsx_vsrai_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} +- res |= __lsx_vsrai_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} +- res |= __lsx_vsrai_h(_1, var); // expected-error {{argument to '__builtin_lsx_vsrai_h' must be a constant integer}} +- return res; +-} +- +-v4i32 vsrai_w(v4i32 _1, int var) { +- v4i32 res = __lsx_vsrai_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} +- res |= __lsx_vsrai_w(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} +- res |= __lsx_vsrai_w(_1, var); // expected-error {{argument to '__builtin_lsx_vsrai_w' must be a constant integer}} +- return res; +-} +- +-v2i64 vsrai_d(v2i64 _1, int var) { +- v2i64 res = __lsx_vsrai_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} +- res |= __lsx_vsrai_d(_1, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} +- res |= __lsx_vsrai_d(_1, var); // expected-error {{argument to '__builtin_lsx_vsrai_d' must be a constant integer}} +- return res; +-} +- +-v16i8 vsrari_b(v16i8 _1, int var) { +- v16i8 res = __lsx_vsrari_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} +- res |= __lsx_vsrari_b(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} +- res |= __lsx_vsrari_b(_1, var); // expected-error {{argument to '__builtin_lsx_vsrari_b' must be a constant integer}} +- return res; +-} +- +-v8i16 vsrari_h(v8i16 _1, int var) { +- v8i16 res = __lsx_vsrari_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} +- res |= __lsx_vsrari_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} +- res |= __lsx_vsrari_h(_1, var); // expected-error {{argument to '__builtin_lsx_vsrari_h' must be a constant integer}} +- return res; +-} +- +-v4i32 vsrari_w(v4i32 _1, int var) { +- v4i32 res = __lsx_vsrari_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} +- res |= __lsx_vsrari_w(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} +- res |= __lsx_vsrari_w(_1, var); // expected-error {{argument to '__builtin_lsx_vsrari_w' must be a constant integer}} +- return res; +-} +- +-v2i64 vsrari_d(v2i64 _1, int var) { +- v2i64 res = __lsx_vsrari_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} +- res |= __lsx_vsrari_d(_1, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} +- res |= __lsx_vsrari_d(_1, var); // expected-error {{argument to '__builtin_lsx_vsrari_d' must be a constant integer}} +- return res; +-} +- +-v16i8 vsrli_b(v16i8 _1, int var) { +- v16i8 res = __lsx_vsrli_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} +- res |= __lsx_vsrli_b(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} +- res |= __lsx_vsrli_b(_1, var); // expected-error {{argument to '__builtin_lsx_vsrli_b' must be a constant integer}} +- return res; +-} +- +-v8i16 vsrli_h(v8i16 _1, int var) { +- v8i16 res = __lsx_vsrli_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} +- res |= __lsx_vsrli_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} +- res |= __lsx_vsrli_h(_1, var); // expected-error {{argument to '__builtin_lsx_vsrli_h' must be a constant integer}} +- return res; +-} +- +-v4i32 vsrli_w(v4i32 _1, int var) { +- v4i32 res = __lsx_vsrli_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} +- res |= __lsx_vsrli_w(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} +- res |= __lsx_vsrli_w(_1, var); // expected-error {{argument to '__builtin_lsx_vsrli_w' must be a constant integer}} +- return res; +-} +- +-v2i64 vsrli_d(v2i64 _1, int var) { +- v2i64 res = __lsx_vsrli_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} +- res |= __lsx_vsrli_d(_1, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} +- res |= __lsx_vsrli_d(_1, var); // expected-error {{argument to '__builtin_lsx_vsrli_d' must be a constant integer}} +- return res; +-} +- +-v16i8 vsrlri_b(v16i8 _1, int var) { +- v16i8 res = __lsx_vsrlri_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} +- res |= __lsx_vsrlri_b(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} +- res |= __lsx_vsrlri_b(_1, var); // expected-error {{argument to '__builtin_lsx_vsrlri_b' must be a constant integer}} +- return res; +-} +- +-v8i16 vsrlri_h(v8i16 _1, int var) { +- v8i16 res = __lsx_vsrlri_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} +- res |= __lsx_vsrlri_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} +- res |= __lsx_vsrlri_h(_1, var); // expected-error {{argument to '__builtin_lsx_vsrlri_h' must be a constant integer}} +- return res; +-} +- +-v4i32 vsrlri_w(v4i32 _1, int var) { +- v4i32 res = __lsx_vsrlri_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} +- res |= __lsx_vsrlri_w(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} +- res |= __lsx_vsrlri_w(_1, var); // expected-error {{argument to '__builtin_lsx_vsrlri_w' must be a constant integer}} +- return res; +-} +- +-v2i64 vsrlri_d(v2i64 _1, int var) { +- v2i64 res = __lsx_vsrlri_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} +- res |= __lsx_vsrlri_d(_1, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} +- res |= __lsx_vsrlri_d(_1, var); // expected-error {{argument to '__builtin_lsx_vsrlri_d' must be a constant integer}} +- return res; +-} +- +-v16u8 vbitclri_b(v16u8 _1, int var) { +- v16u8 res = __lsx_vbitclri_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} +- res |= __lsx_vbitclri_b(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} +- res |= __lsx_vbitclri_b(_1, var); // expected-error {{argument to '__builtin_lsx_vbitclri_b' must be a constant integer}} +- return res; +-} +- +-v8u16 vbitclri_h(v8u16 _1, int var) { +- v8u16 res = __lsx_vbitclri_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} +- res |= __lsx_vbitclri_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} +- res |= __lsx_vbitclri_h(_1, var); // expected-error {{argument to '__builtin_lsx_vbitclri_h' must be a constant integer}} +- return res; +-} +- +-v4u32 vbitclri_w(v4u32 _1, int var) { +- v4u32 res = __lsx_vbitclri_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} +- res |= __lsx_vbitclri_w(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} +- res |= __lsx_vbitclri_w(_1, var); // expected-error {{argument to '__builtin_lsx_vbitclri_w' must be a constant integer}} +- return res; +-} +- +-v2u64 vbitclri_d(v2u64 _1, int var) { +- v2u64 res = __lsx_vbitclri_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} +- res |= __lsx_vbitclri_d(_1, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} +- res |= __lsx_vbitclri_d(_1, var); // expected-error {{argument to '__builtin_lsx_vbitclri_d' must be a constant integer}} +- return res; +-} +- +-v16u8 vbitseti_b(v16u8 _1, int var) { +- v16u8 res = __lsx_vbitseti_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} +- res |= __lsx_vbitseti_b(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} +- res |= __lsx_vbitseti_b(_1, var); // expected-error {{argument to '__builtin_lsx_vbitseti_b' must be a constant integer}} +- return res; +-} +- +-v8u16 vbitseti_h(v8u16 _1, int var) { +- v8u16 res = __lsx_vbitseti_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} +- res |= __lsx_vbitseti_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} +- res |= __lsx_vbitseti_h(_1, var); // expected-error {{argument to '__builtin_lsx_vbitseti_h' must be a constant integer}} +- return res; +-} +- +-v4u32 vbitseti_w(v4u32 _1, int var) { +- v4u32 res = __lsx_vbitseti_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} +- res |= __lsx_vbitseti_w(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} +- res |= __lsx_vbitseti_w(_1, var); // expected-error {{argument to '__builtin_lsx_vbitseti_w' must be a constant integer}} +- return res; +-} +- +-v2u64 vbitseti_d(v2u64 _1, int var) { +- v2u64 res = __lsx_vbitseti_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} +- res |= __lsx_vbitseti_d(_1, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} +- res |= __lsx_vbitseti_d(_1, var); // expected-error {{argument to '__builtin_lsx_vbitseti_d' must be a constant integer}} +- return res; +-} +- +-v16u8 vbitrevi_b(v16u8 _1, int var) { +- v16u8 res = __lsx_vbitrevi_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} +- res |= __lsx_vbitrevi_b(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} +- res |= __lsx_vbitrevi_b(_1, var); // expected-error {{argument to '__builtin_lsx_vbitrevi_b' must be a constant integer}} +- return res; +-} +- +-v8u16 vbitrevi_h(v8u16 _1, int var) { +- v8u16 res = __lsx_vbitrevi_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} +- res |= __lsx_vbitrevi_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} +- res |= __lsx_vbitrevi_h(_1, var); // expected-error {{argument to '__builtin_lsx_vbitrevi_h' must be a constant integer}} +- return res; +-} +- +-v4u32 vbitrevi_w(v4u32 _1, int var) { +- v4u32 res = __lsx_vbitrevi_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} +- res |= __lsx_vbitrevi_w(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} +- res |= __lsx_vbitrevi_w(_1, var); // expected-error {{argument to '__builtin_lsx_vbitrevi_w' must be a constant integer}} +- return res; +-} +- +-v2u64 vbitrevi_d(v2u64 _1, int var) { +- v2u64 res = __lsx_vbitrevi_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} +- res |= __lsx_vbitrevi_d(_1, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} +- res |= __lsx_vbitrevi_d(_1, var); // expected-error {{argument to '__builtin_lsx_vbitrevi_d' must be a constant integer}} +- return res; +-} +- +-v16i8 vaddi_bu(v16i8 _1, int var) { +- v16i8 res = __lsx_vaddi_bu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} +- res |= __lsx_vaddi_bu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} +- res |= __lsx_vaddi_bu(_1, var); // expected-error {{argument to '__builtin_lsx_vaddi_bu' must be a constant integer}} +- return res; +-} +- +-v8i16 vaddi_hu(v8i16 _1, int var) { +- v8i16 res = __lsx_vaddi_hu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} +- res |= __lsx_vaddi_hu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} +- res |= __lsx_vaddi_hu(_1, var); // expected-error {{argument to '__builtin_lsx_vaddi_hu' must be a constant integer}} +- return res; +-} +- +-v4i32 vaddi_wu(v4i32 _1, int var) { +- v4i32 res = __lsx_vaddi_wu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} +- res |= __lsx_vaddi_wu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} +- res |= __lsx_vaddi_wu(_1, var); // expected-error {{argument to '__builtin_lsx_vaddi_wu' must be a constant integer}} +- return res; +-} +- +-v2i64 vaddi_du(v2i64 _1, int var) { +- v2i64 res = __lsx_vaddi_du(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} +- res |= __lsx_vaddi_du(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} +- res |= __lsx_vaddi_du(_1, var); // expected-error {{argument to '__builtin_lsx_vaddi_du' must be a constant integer}} +- return res; +-} +- +-v16i8 vsubi_bu(v16i8 _1, int var) { +- v16i8 res = __lsx_vsubi_bu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} +- res |= __lsx_vsubi_bu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} +- res |= __lsx_vsubi_bu(_1, var); // expected-error {{argument to '__builtin_lsx_vsubi_bu' must be a constant integer}} +- return res; +-} +- +-v8i16 vsubi_hu(v8i16 _1, int var) { +- v8i16 res = __lsx_vsubi_hu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} +- res |= __lsx_vsubi_hu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} +- res |= __lsx_vsubi_hu(_1, var); // expected-error {{argument to '__builtin_lsx_vsubi_hu' must be a constant integer}} +- return res; +-} +- +-v4i32 vsubi_wu(v4i32 _1, int var) { +- v4i32 res = __lsx_vsubi_wu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} +- res |= __lsx_vsubi_wu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} +- res |= __lsx_vsubi_wu(_1, var); // expected-error {{argument to '__builtin_lsx_vsubi_wu' must be a constant integer}} +- return res; +-} +- +-v2i64 vsubi_du(v2i64 _1, int var) { +- v2i64 res = __lsx_vsubi_du(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} +- res |= __lsx_vsubi_du(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} +- res |= __lsx_vsubi_du(_1, var); // expected-error {{argument to '__builtin_lsx_vsubi_du' must be a constant integer}} +- return res; +-} +- +-v16i8 vmaxi_b(v16i8 _1, int var) { +- v16i8 res = __lsx_vmaxi_b(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} +- res |= __lsx_vmaxi_b(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} +- res |= __lsx_vmaxi_b(_1, var); // expected-error {{argument to '__builtin_lsx_vmaxi_b' must be a constant integer}} +- return res; +-} +- +-v8i16 vmaxi_h(v8i16 _1, int var) { +- v8i16 res = __lsx_vmaxi_h(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} +- res |= __lsx_vmaxi_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} +- res |= __lsx_vmaxi_h(_1, var); // expected-error {{argument to '__builtin_lsx_vmaxi_h' must be a constant integer}} +- return res; +-} +- +-v4i32 vmaxi_w(v4i32 _1, int var) { +- v4i32 res = __lsx_vmaxi_w(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} +- res |= __lsx_vmaxi_w(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} +- res |= __lsx_vmaxi_w(_1, var); // expected-error {{argument to '__builtin_lsx_vmaxi_w' must be a constant integer}} +- return res; +-} +- +-v2i64 vmaxi_d(v2i64 _1, int var) { +- v2i64 res = __lsx_vmaxi_d(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} +- res |= __lsx_vmaxi_d(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} +- res |= __lsx_vmaxi_d(_1, var); // expected-error {{argument to '__builtin_lsx_vmaxi_d' must be a constant integer}} +- return res; +-} +- +-v16u8 vmaxi_bu(v16u8 _1, int var) { +- v16u8 res = __lsx_vmaxi_bu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} +- res |= __lsx_vmaxi_bu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} +- res |= __lsx_vmaxi_bu(_1, var); // expected-error {{argument to '__builtin_lsx_vmaxi_bu' must be a constant integer}} +- return res; +-} +- +-v8u16 vmaxi_hu(v8u16 _1, int var) { +- v8u16 res = __lsx_vmaxi_hu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} +- res |= __lsx_vmaxi_hu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} +- res |= __lsx_vmaxi_hu(_1, var); // expected-error {{argument to '__builtin_lsx_vmaxi_hu' must be a constant integer}} +- return res; +-} +- +-v4u32 vmaxi_wu(v4u32 _1, int var) { +- v4u32 res = __lsx_vmaxi_wu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} +- res |= __lsx_vmaxi_wu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} +- res |= __lsx_vmaxi_wu(_1, var); // expected-error {{argument to '__builtin_lsx_vmaxi_wu' must be a constant integer}} +- return res; +-} +- +-v2u64 vmaxi_du(v2u64 _1, int var) { +- v2u64 res = __lsx_vmaxi_du(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} +- res |= __lsx_vmaxi_du(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} +- res |= __lsx_vmaxi_du(_1, var); // expected-error {{argument to '__builtin_lsx_vmaxi_du' must be a constant integer}} +- return res; +-} +- +-v16i8 vmini_b(v16i8 _1, int var) { +- v16i8 res = __lsx_vmini_b(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} +- res |= __lsx_vmini_b(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} +- res |= __lsx_vmini_b(_1, var); // expected-error {{argument to '__builtin_lsx_vmini_b' must be a constant integer}} +- return res; +-} +- +-v8i16 vmini_h(v8i16 _1, int var) { +- v8i16 res = __lsx_vmini_h(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} +- res |= __lsx_vmini_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} +- res |= __lsx_vmini_h(_1, var); // expected-error {{argument to '__builtin_lsx_vmini_h' must be a constant integer}}} +- return res; +-} +- +-v4i32 vmini_w(v4i32 _1, int var) { +- v4i32 res = __lsx_vmini_w(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} +- res |= __lsx_vmini_w(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} +- res |= __lsx_vmini_w(_1, var); // expected-error {{argument to '__builtin_lsx_vmini_w' must be a constant integer}} +- return res; +-} +- +-v2i64 vmini_d(v2i64 _1, int var) { +- v2i64 res = __lsx_vmini_d(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} +- res |= __lsx_vmini_d(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} +- res |= __lsx_vmini_d(_1, var); // expected-error {{argument to '__builtin_lsx_vmini_d' must be a constant integer}} +- return res; +-} +- +-v16u8 vmini_bu(v16u8 _1, int var) { +- v16u8 res = __lsx_vmini_bu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} +- res |= __lsx_vmini_bu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} +- res |= __lsx_vmini_bu(_1, var); // expected-error {{argument to '__builtin_lsx_vmini_bu' must be a constant integer}} +- return res; +-} +- +-v8u16 vmini_hu(v8u16 _1, int var) { +- v8u16 res = __lsx_vmini_hu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} +- res |= __lsx_vmini_hu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} +- res |= __lsx_vmini_hu(_1, var); // expected-error {{argument to '__builtin_lsx_vmini_hu' must be a constant integer}} +- return res; +-} +- +-v4u32 vmini_wu(v4u32 _1, int var) { +- v4u32 res = __lsx_vmini_wu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} +- res |= __lsx_vmini_wu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} +- res |= __lsx_vmini_wu(_1, var); // expected-error {{argument to '__builtin_lsx_vmini_wu' must be a constant integer}} +- return res; +-} +- +-v2u64 vmini_du(v2u64 _1, int var) { +- v2u64 res = __lsx_vmini_du(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} +- res |= __lsx_vmini_du(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} +- res |= __lsx_vmini_du(_1, var); // expected-error {{argument to '__builtin_lsx_vmini_du' must be a constant integer}} +- return res; +-} +- +-v16i8 vseqi_b(v16i8 _1, int var) { +- v16i8 res = __lsx_vseqi_b(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} +- res |= __lsx_vseqi_b(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} +- res |= __lsx_vseqi_b(_1, var); // expected-error {{argument to '__builtin_lsx_vseqi_b' must be a constant integer}} +- return res; +-} +- +-v8i16 vseqi_h(v8i16 _1, int var) { +- v8i16 res = __lsx_vseqi_h(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} +- res |= __lsx_vseqi_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} +- res |= __lsx_vseqi_h(_1, var); // expected-error {{argument to '__builtin_lsx_vseqi_h' must be a constant integer}} +- return res; +-} +- +-v4i32 vseqi_w(v4i32 _1, int var) { +- v4i32 res = __lsx_vseqi_w(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} +- res |= __lsx_vseqi_w(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} +- res |= __lsx_vseqi_w(_1, var); // expected-error {{argument to '__builtin_lsx_vseqi_w' must be a constant integer}} +- return res; +-} +- +-v2i64 vseqi_d(v2i64 _1, int var) { +- v2i64 res = __lsx_vseqi_d(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} +- res |= __lsx_vseqi_d(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} +- res |= __lsx_vseqi_d(_1, var); // expected-error {{argument to '__builtin_lsx_vseqi_d' must be a constant integer}} +- return res; +-} +- +-v16i8 vslti_b(v16i8 _1, int var) { +- v16i8 res = __lsx_vslti_b(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} +- res |= __lsx_vslti_b(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} +- res |= __lsx_vslti_b(_1, var); // expected-error {{argument to '__builtin_lsx_vslti_b' must be a constant integer}} +- return res; +-} +- +-v8i16 vslti_h(v8i16 _1, int var) { +- v8i16 res = __lsx_vslti_h(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} +- res |= __lsx_vslti_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} +- res |= __lsx_vslti_h(_1, var); // expected-error {{argument to '__builtin_lsx_vslti_h' must be a constant integer}} +- return res; +-} +- +-v4i32 vslti_w(v4i32 _1, int var) { +- v4i32 res = __lsx_vslti_w(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} +- res |= __lsx_vslti_w(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} +- res |= __lsx_vslti_w(_1, var); // expected-error {{argument to '__builtin_lsx_vslti_w' must be a constant integer}} +- return res; +-} +- +-v2i64 vslti_d(v2i64 _1, int var) { +- v2i64 res = __lsx_vslti_d(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} +- res |= __lsx_vslti_d(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} +- res |= __lsx_vslti_d(_1, var); // expected-error {{argument to '__builtin_lsx_vslti_d' must be a constant integer}} +- return res; +-} +- +-v16i8 vslti_bu(v16u8 _1, int var) { +- v16i8 res = __lsx_vslti_bu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} +- res |= __lsx_vslti_bu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} +- res |= __lsx_vslti_bu(_1, var); // expected-error {{argument to '__builtin_lsx_vslti_bu' must be a constant integer}} +- return res; +-} +- +-v8i16 vslti_hu(v8u16 _1, int var) { +- v8i16 res = __lsx_vslti_hu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} +- res |= __lsx_vslti_hu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} +- res |= __lsx_vslti_hu(_1, var); // expected-error {{argument to '__builtin_lsx_vslti_hu' must be a constant integer}} +- return res; +-} +- +-v4i32 vslti_wu(v4u32 _1, int var) { +- v4i32 res = __lsx_vslti_wu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} +- res |= __lsx_vslti_wu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} +- res |= __lsx_vslti_wu(_1, var); // expected-error {{argument to '__builtin_lsx_vslti_wu' must be a constant integer}} +- return res; +-} +- +-v2i64 vslti_du(v2u64 _1, int var) { +- v2i64 res = __lsx_vslti_du(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} +- res |= __lsx_vslti_du(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} +- res |= __lsx_vslti_du(_1, var); // expected-error {{argument to '__builtin_lsx_vslti_du' must be a constant integer}} +- return res; +-} +- +-v16i8 vslei_b(v16i8 _1, int var) { +- v16i8 res = __lsx_vslei_b(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} +- res |= __lsx_vslei_b(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} +- res |= __lsx_vslei_b(_1, var); // expected-error {{argument to '__builtin_lsx_vslei_b' must be a constant integer}} +- return res; +-} +- +-v8i16 vslei_h(v8i16 _1, int var) { +- v8i16 res = __lsx_vslei_h(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} +- res |= __lsx_vslei_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} +- res |= __lsx_vslei_h(_1, var); // expected-error {{argument to '__builtin_lsx_vslei_h' must be a constant integer}} +- return res; +-} +- +-v4i32 vslei_w(v4i32 _1, int var) { +- v4i32 res = __lsx_vslei_w(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} +- res |= __lsx_vslei_w(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} +- res |= __lsx_vslei_w(_1, var); // expected-error {{argument to '__builtin_lsx_vslei_w' must be a constant integer}} +- return res; +-} +- +-v2i64 vslei_d(v2i64 _1, int var) { +- v2i64 res = __lsx_vslei_d(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} +- res |= __lsx_vslei_d(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} +- res |= __lsx_vslei_d(_1, var); // expected-error {{argument to '__builtin_lsx_vslei_d' must be a constant integer}} +- return res; +-} +- +-v16i8 vslei_bu(v16u8 _1, int var) { +- v16i8 res = __lsx_vslei_bu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} +- res |= __lsx_vslei_bu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} +- res |= __lsx_vslei_bu(_1, var); // expected-error {{argument to '__builtin_lsx_vslei_bu' must be a constant integer}} +- return res; +-} +- +-v8i16 vslei_hu(v8u16 _1, int var) { +- v8i16 res = __lsx_vslei_hu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} +- res |= __lsx_vslei_hu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} +- res |= __lsx_vslei_hu(_1, var); // expected-error {{argument to '__builtin_lsx_vslei_hu' must be a constant integer}} +- return res; +-} +- +-v4i32 vslei_wu(v4u32 _1, int var) { +- v4i32 res = __lsx_vslei_wu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} +- res |= __lsx_vslei_wu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} +- res |= __lsx_vslei_wu(_1, var); // expected-error {{argument to '__builtin_lsx_vslei_wu' must be a constant integer}} +- return res; +-} +- +-v2i64 vslei_du(v2u64 _1, int var) { +- v2i64 res = __lsx_vslei_du(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} +- res |= __lsx_vslei_du(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} +- res |= __lsx_vslei_du(_1, var); // expected-error {{argument to '__builtin_lsx_vslei_du' must be a constant integer}} +- return res; +-} +- +-v16i8 vsat_b(v16i8 _1, int var) { +- v16i8 res = __lsx_vsat_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} +- res |= __lsx_vsat_b(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} +- res |= __lsx_vsat_b(_1, var); // expected-error {{argument to '__builtin_lsx_vsat_b' must be a constant integer}} +- return res; +-} +- +-v8i16 vsat_h(v8i16 _1, int var) { +- v8i16 res = __lsx_vsat_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} +- res |= __lsx_vsat_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} +- res |= __lsx_vsat_h(_1, var); // expected-error {{argument to '__builtin_lsx_vsat_h' must be a constant integer}} +- return res; +-} +- +-v4i32 vsat_w(v4i32 _1, int var) { +- v4i32 res = __lsx_vsat_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} +- res |= __lsx_vsat_w(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} +- res |= __lsx_vsat_w(_1, var); // expected-error {{argument to '__builtin_lsx_vsat_w' must be a constant integer}} +- return res; +-} +- +-v2i64 vsat_d(v2i64 _1, int var) { +- v2i64 res = __lsx_vsat_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} +- res |= __lsx_vsat_d(_1, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} +- res |= __lsx_vsat_d(_1, var); // expected-error {{argument to '__builtin_lsx_vsat_d' must be a constant integer}} +- return res; +-} +- +-v16u8 vsat_bu(v16u8 _1, int var) { +- v16u8 res = __lsx_vsat_bu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} +- res |= __lsx_vsat_bu(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} +- res |= __lsx_vsat_bu(_1, var); // expected-error {{argument to '__builtin_lsx_vsat_bu' must be a constant integer}} +- return res; +-} +- +-v8u16 vsat_hu(v8u16 _1, int var) { +- v8u16 res = __lsx_vsat_hu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} +- res |= __lsx_vsat_hu(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} +- res |= __lsx_vsat_hu(_1, var); // expected-error {{argument to '__builtin_lsx_vsat_hu' must be a constant integer}} +- return res; +-} +- +-v4u32 vsat_wu(v4u32 _1, int var) { +- v4u32 res = __lsx_vsat_wu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} +- res |= __lsx_vsat_wu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} +- res |= __lsx_vsat_wu(_1, var); // expected-error {{argument to '__builtin_lsx_vsat_wu' must be a constant integer}} +- return res; +-} +- +-v2u64 vsat_du(v2u64 _1, int var) { +- v2u64 res = __lsx_vsat_du(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} +- res |= __lsx_vsat_du(_1, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} +- res |= __lsx_vsat_du(_1, var); // expected-error {{argument to '__builtin_lsx_vsat_du' must be a constant integer}} +- return res; +-} +- +-v16i8 vreplvei_b(v16i8 _1, int var) { +- v16i8 res = __lsx_vreplvei_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} +- res |= __lsx_vreplvei_b(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} +- res |= __lsx_vreplvei_b(_1, var); // expected-error {{argument to '__builtin_lsx_vreplvei_b' must be a constant integer}} +- return res; +-} +- +-v8i16 vreplvei_h(v8i16 _1, int var) { +- v8i16 res = __lsx_vreplvei_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} +- res |= __lsx_vreplvei_h(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} +- res |= __lsx_vreplvei_h(_1, var); // expected-error {{argument to '__builtin_lsx_vreplvei_h' must be a constant integer}} +- return res; +-} +- +-v4i32 vreplvei_w(v4i32 _1, int var) { +- v4i32 res = __lsx_vreplvei_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 3]}} +- res |= __lsx_vreplvei_w(_1, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}} +- res |= __lsx_vreplvei_w(_1, var); // expected-error {{argument to '__builtin_lsx_vreplvei_w' must be a constant integer}} +- return res; +-} +- +-v2i64 vreplvei_d(v2i64 _1, int var) { +- v2i64 res = __lsx_vreplvei_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 1]}} +- res |= __lsx_vreplvei_d(_1, 2); // expected-error {{argument value 2 is outside the valid range [0, 1]}} +- res |= __lsx_vreplvei_d(_1, var); // expected-error {{argument to '__builtin_lsx_vreplvei_d' must be a constant integer}} +- return res; +-} +- +-v16u8 vandi_b(v16u8 _1, int var) { +- v16u8 res = __lsx_vandi_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} +- res |= __lsx_vandi_b(_1, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} +- res |= __lsx_vandi_b(_1, var); // expected-error {{argument to '__builtin_lsx_vandi_b' must be a constant integer}} +- return res; +-} +- +-v16u8 vori_b(v16u8 _1, int var) { +- v16u8 res = __lsx_vori_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} +- res |= __lsx_vori_b(_1, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} +- res |= __lsx_vori_b(_1, var); // expected-error {{argument to '__builtin_lsx_vori_b' must be a constant integer}} +- return res; +-} +- +-v16u8 vnori_b(v16u8 _1, int var) { +- v16u8 res = __lsx_vnori_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} +- res |= __lsx_vnori_b(_1, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} +- res |= __lsx_vnori_b(_1, var); // expected-error {{argument to '__builtin_lsx_vnori_b' must be a constant integer}} +- return res; +-} +- +-v16u8 vxori_b(v16u8 _1, int var) { +- v16u8 res = __lsx_vxori_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} +- res |= __lsx_vxori_b(_1, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} +- res |= __lsx_vxori_b(_1, var); // expected-error {{argument to '__builtin_lsx_vxori_b' must be a constant integer}} +- return res; +-} +- +-v16u8 vbitseli_b(v16u8 _1, v16u8 _2, int var) { +- v16u8 res = __lsx_vbitseli_b(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} +- res |= __lsx_vbitseli_b(_1, _2, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} +- res |= __lsx_vbitseli_b(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vbitseli_b' must be a constant integer}} +- return res; +-} +- +-v16i8 vshuf4i_b(v16i8 _1, int var) { +- v16i8 res = __lsx_vshuf4i_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} +- res |= __lsx_vshuf4i_b(_1, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} +- res |= __lsx_vshuf4i_b(_1, var); // expected-error {{argument to '__builtin_lsx_vshuf4i_b' must be a constant integer}} +- return res; +-} +- +-v8i16 vshuf4i_h(v8i16 _1, int var) { +- v8i16 res = __lsx_vshuf4i_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} +- res |= __lsx_vshuf4i_h(_1, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} +- res |= __lsx_vshuf4i_h(_1, var); // expected-error {{argument to '__builtin_lsx_vshuf4i_h' must be a constant integer}} +- return res; +-} +- +-v4i32 vshuf4i_w(v4i32 _1, int var) { +- v4i32 res = __lsx_vshuf4i_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} +- res |= __lsx_vshuf4i_w(_1, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} +- res |= __lsx_vshuf4i_w(_1, var); // expected-error {{argument to '__builtin_lsx_vshuf4i_w' must be a constant integer}} +- return res; +-} +- +-int vpickve2gr_b(v16i8 _1, int var) { +- int res = __lsx_vpickve2gr_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} +- res |= __lsx_vpickve2gr_b(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} +- res |= __lsx_vpickve2gr_b(_1, var); // expected-error {{argument to '__builtin_lsx_vpickve2gr_b' must be a constant integer}} +- return res; +-} +- +-int vpickve2gr_h(v8i16 _1, int var) { +- int res = __lsx_vpickve2gr_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} +- res |= __lsx_vpickve2gr_h(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} +- res |= __lsx_vpickve2gr_h(_1, var); // expected-error {{argument to '__builtin_lsx_vpickve2gr_h' must be a constant integer}} +- return res; +-} +- +-int vpickve2gr_w(v4i32 _1, int var) { +- int res = __lsx_vpickve2gr_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 3]}} +- res |= __lsx_vpickve2gr_w(_1, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}} +- res |= __lsx_vpickve2gr_w(_1, var); // expected-error {{argument to '__builtin_lsx_vpickve2gr_w' must be a constant integer}} +- return res; +-} +- +-long vpickve2gr_d(v2i64 _1, int var) { +- long res = __lsx_vpickve2gr_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 1]}} +- res |= __lsx_vpickve2gr_d(_1, 2); // expected-error {{argument value 2 is outside the valid range [0, 1]}} +- res |= __lsx_vpickve2gr_d(_1, var); // expected-error {{argument to '__builtin_lsx_vpickve2gr_d' must be a constant integer}} +- return res; +-} +- +-unsigned int vpickve2gr_bu(v16i8 _1, int var) { +- unsigned int res = __lsx_vpickve2gr_bu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} +- res |= __lsx_vpickve2gr_bu(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} +- res |= __lsx_vpickve2gr_bu(_1, var); // expected-error {{argument to '__builtin_lsx_vpickve2gr_bu' must be a constant integer}} +- return res; +-} +- +-unsigned int vpickve2gr_hu(v8i16 _1, int var) { +- unsigned int res = __lsx_vpickve2gr_hu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} +- res |= __lsx_vpickve2gr_hu(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} +- res |= __lsx_vpickve2gr_hu(_1, var); // expected-error {{argument to '__builtin_lsx_vpickve2gr_hu' must be a constant integer}} +- return res; +-} +- +-unsigned int vpickve2gr_wu(v4i32 _1, int var) { +- unsigned int res = __lsx_vpickve2gr_wu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 3]}} +- res |= __lsx_vpickve2gr_wu(_1, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}} +- res |= __lsx_vpickve2gr_wu(_1, var); // expected-error {{argument to '__builtin_lsx_vpickve2gr_wu' must be a constant integer}} +- return res; +-} +- +-unsigned long int vpickve2gr_du(v2i64 _1, int var) { +- unsigned long int res = __lsx_vpickve2gr_du(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 1]}} +- res |= __lsx_vpickve2gr_du(_1, 2); // expected-error {{argument value 2 is outside the valid range [0, 1]}} +- res |= __lsx_vpickve2gr_du(_1, var); // expected-error {{argument to '__builtin_lsx_vpickve2gr_du' must be a constant integer}} +- return res; +-} +- +-v16i8 vinsgr2vr_b(v16i8 _1, int var) { +- v16i8 res = __lsx_vinsgr2vr_b(_1, 1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} +- res |= __lsx_vinsgr2vr_b(_1, 1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} +- res |= __lsx_vinsgr2vr_b(_1, 1, var); // expected-error {{argument to '__builtin_lsx_vinsgr2vr_b' must be a constant integer}} +- return res; +-} +- +-v8i16 vinsgr2vr_h(v8i16 _1, int var) { +- v8i16 res = __lsx_vinsgr2vr_h(_1, 1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} +- res |= __lsx_vinsgr2vr_h(_1, 1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} +- res |= __lsx_vinsgr2vr_h(_1, 1, var); // expected-error {{argument to '__builtin_lsx_vinsgr2vr_h' must be a constant integer}} +- return res; +-} +- +-v4i32 vinsgr2vr_w(v4i32 _1, int var) { +- v4i32 res = __lsx_vinsgr2vr_w(_1, 1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 3]}} +- res |= __lsx_vinsgr2vr_w(_1, 1, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}} +- res |= __lsx_vinsgr2vr_w(_1, 1, var); // expected-error {{argument to '__builtin_lsx_vinsgr2vr_w' must be a constant integer}} +- return res; +-} +- +-v2i64 vinsgr2vr_d(v2i64 _1, int var) { +- v2i64 res = __lsx_vinsgr2vr_d(_1, 1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 1]}} +- res |= __lsx_vinsgr2vr_d(_1, 1, 2); // expected-error {{argument value 2 is outside the valid range [0, 1]}} +- res |= __lsx_vinsgr2vr_d(_1, 1, var); // expected-error {{argument to '__builtin_lsx_vinsgr2vr_d' must be a constant integer}} +- return res; +-} +- +-v8i16 vsllwil_h_b(v16i8 _1, int var) { +- v8i16 res = __lsx_vsllwil_h_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} +- res |= __lsx_vsllwil_h_b(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} +- res |= __lsx_vsllwil_h_b(_1, var); // expected-error {{argument to '__builtin_lsx_vsllwil_h_b' must be a constant integer}} +- return res; +-} +- +-v4i32 vsllwil_w_h(v8i16 _1, int var) { +- v4i32 res = __lsx_vsllwil_w_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} +- res |= __lsx_vsllwil_w_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} +- res |= __lsx_vsllwil_w_h(_1, var); // expected-error {{argument to '__builtin_lsx_vsllwil_w_h' must be a constant integer}} +- return res; +-} +- +-v2i64 vsllwil_d_w(v4i32 _1, int var) { +- v2i64 res = __lsx_vsllwil_d_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} +- res |= __lsx_vsllwil_d_w(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} +- res |= __lsx_vsllwil_d_w(_1, var); // expected-error {{argument to '__builtin_lsx_vsllwil_d_w' must be a constant integer}} +- return res; +-} +- +-v8u16 vsllwil_hu_bu(v16u8 _1, int var) { +- v8u16 res = __lsx_vsllwil_hu_bu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} +- res |= __lsx_vsllwil_hu_bu(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} +- res |= __lsx_vsllwil_hu_bu(_1, var); // expected-error {{argument to '__builtin_lsx_vsllwil_hu_bu' must be a constant integer}} +- return res; +-} +- +-v4u32 vsllwil_wu_hu(v8u16 _1, int var) { +- v4u32 res = __lsx_vsllwil_wu_hu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} +- res |= __lsx_vsllwil_wu_hu(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} +- res |= __lsx_vsllwil_wu_hu(_1, var); // expected-error {{argument to '__builtin_lsx_vsllwil_wu_hu' must be a constant integer}} +- return res; +-} +- +-v2u64 vsllwil_du_wu(v4u32 _1, int var) { +- v2u64 res = __lsx_vsllwil_du_wu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} +- res |= __lsx_vsllwil_du_wu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} +- res |= __lsx_vsllwil_du_wu(_1, var); // expected-error {{argument to '__builtin_lsx_vsllwil_du_wu' must be a constant integer}} +- return res; +-} +- +-v16i8 vfrstpi_b(v16i8 _1, v16i8 _2, int var) { +- v16i8 res = __lsx_vfrstpi_b(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} +- res |= __lsx_vfrstpi_b(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} +- res |= __lsx_vfrstpi_b(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vfrstpi_b' must be a constant integer}} +- return res; +-} +- +-v8i16 vfrstpi_h(v8i16 _1, v8i16 _2, int var) { +- v8i16 res = __lsx_vfrstpi_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} +- res |= __lsx_vfrstpi_h(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} +- res |= __lsx_vfrstpi_h(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vfrstpi_h' must be a constant integer}} +- return res; +-} +- +-v2i64 vshuf4i_d(v2i64 _1, v2i64 _2, int var) { +- v2i64 res = __lsx_vshuf4i_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} +- res |= __lsx_vshuf4i_d(_1, _2, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} +- res |= __lsx_vshuf4i_d(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vshuf4i_d' must be a constant integer}} +- return res; +-} +- +-v16i8 vbsrl_v(v16i8 _1, int var) { +- v16i8 res = __lsx_vbsrl_v(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} +- res |= __lsx_vbsrl_v(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} +- res |= __lsx_vbsrl_v(_1, var); // expected-error {{argument to '__builtin_lsx_vbsrl_v' must be a constant integer}} +- return res; +-} +- +-v16i8 vbsll_v(v16i8 _1, int var) { +- v16i8 res = __lsx_vbsll_v(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} +- res |= __lsx_vbsll_v(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} +- res |= __lsx_vbsll_v(_1, var); // expected-error {{argument to '__builtin_lsx_vbsll_v' must be a constant integer}} +- return res; +-} +- +-v16i8 vextrins_b(v16i8 _1, v16i8 _2, int var) { +- v16i8 res = __lsx_vextrins_b(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} +- res |= __lsx_vextrins_b(_1, _2, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} +- res |= __lsx_vextrins_b(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vextrins_b' must be a constant integer}} +- return res; +-} +- +-v8i16 vextrins_h(v8i16 _1, v8i16 _2, int var) { +- v8i16 res = __lsx_vextrins_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} +- res |= __lsx_vextrins_h(_1, _2, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} +- res |= __lsx_vextrins_h(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vextrins_h' must be a constant integer}} +- return res; +-} +- +-v4i32 vextrins_w(v4i32 _1, v4i32 _2, int var) { +- v4i32 res = __lsx_vextrins_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} +- res |= __lsx_vextrins_w(_1, _2, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} +- res |= __lsx_vextrins_w(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vextrins_w' must be a constant integer}} +- return res; +-} +- +-v2i64 vextrins_d(v2i64 _1, v2i64 _2, int var) { +- v2i64 res = __lsx_vextrins_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} +- res |= __lsx_vextrins_d(_1, _2, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} +- res |= __lsx_vextrins_d(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vextrins_d' must be a constant integer}} +- return res; +-} +- +-void vstelm_b_idx(v16i8 _1, void *_2, int var) { +- __lsx_vstelm_b(_1, _2, 1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} +- __lsx_vstelm_b(_1, _2, 1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} +- __lsx_vstelm_b(_1, _2, 1, var); // expected-error {{argument to '__builtin_lsx_vstelm_b' must be a constant integer}} +-} +- +-void vstelm_h_idx(v8i16 _1, void *_2, int var) { +- __lsx_vstelm_h(_1, _2, 2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} +- __lsx_vstelm_h(_1, _2, 2, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} +- __lsx_vstelm_h(_1, _2, 2, var); // expected-error {{argument to '__builtin_lsx_vstelm_h' must be a constant integer}} +-} +- +-void vstelm_w_idx(v4i32 _1, void *_2, int var) { +- __lsx_vstelm_w(_1, _2, 4, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 3]}} +- __lsx_vstelm_w(_1, _2, 4, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}} +- __lsx_vstelm_w(_1, _2, 4, var); // expected-error {{argument to '__builtin_lsx_vstelm_w' must be a constant integer}} +-} +- +-void vstelm_d_idx(v2i64 _1, void *_2, int var) { +- __lsx_vstelm_d(_1, _2, 8, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 1]}} +- __lsx_vstelm_d(_1, _2, 8, 2); // expected-error {{argument value 2 is outside the valid range [0, 1]}} +- __lsx_vstelm_d(_1, _2, 8, var); // expected-error {{argument to '__builtin_lsx_vstelm_d' must be a constant integer}} +-} +- +-void vstelm_b(v16i8 _1, void *_2, int var) { +- __lsx_vstelm_b(_1, _2, -129, 1); // expected-error {{argument value -129 is outside the valid range [-128, 127]}} +- __lsx_vstelm_b(_1, _2, 128, 1); // expected-error {{argument value 128 is outside the valid range [-128, 127]}} +- __lsx_vstelm_b(_1, _2, var, 1); // expected-error {{argument to '__builtin_lsx_vstelm_b' must be a constant integer}} +-} +- +-void vstelm_h(v8i16 _1, void *_2, int var) { +- __lsx_vstelm_h(_1, _2, -258, 1); // expected-error {{argument value -258 is outside the valid range [-256, 254]}} +- __lsx_vstelm_h(_1, _2, 256, 1); // expected-error {{argument value 256 is outside the valid range [-256, 254]}} +- __lsx_vstelm_h(_1, _2, var, 1); // expected-error {{argument to '__builtin_lsx_vstelm_h' must be a constant integer}} +-} +- +-void vstelm_w(v4i32 _1, void *_2, int var) { +- __lsx_vstelm_w(_1, _2, -516, 1); // expected-error {{argument value -516 is outside the valid range [-512, 508]}} +- __lsx_vstelm_w(_1, _2, 512, 1); // expected-error {{argument value 512 is outside the valid range [-512, 508]}} +- __lsx_vstelm_w(_1, _2, var, 1); // expected-error {{argument to '__builtin_lsx_vstelm_w' must be a constant integer}} +-} +- +-void vstelm_d(v2i64 _1, void *_2, int var) { +- __lsx_vstelm_d(_1, _2, -1032, 1); // expected-error {{argument value -1032 is outside the valid range [-1024, 1016]}} +- __lsx_vstelm_d(_1, _2, 1024, 1); // expected-error {{argument value 1024 is outside the valid range [-1024, 1016]}} +- __lsx_vstelm_d(_1, _2, var, 1); // expected-error {{argument to '__builtin_lsx_vstelm_d' must be a constant integer}} +-} +- +-v16i8 vldrepl_b(void *_1, int var) { +- v16i8 res = __lsx_vldrepl_b(_1, -2049); // expected-error {{argument value -2049 is outside the valid range [-2048, 2047]}} +- res |= __lsx_vldrepl_b(_1, 2048); // expected-error {{argument value 2048 is outside the valid range [-2048, 2047]}} +- res |= __lsx_vldrepl_b(_1, var); // expected-error {{argument to '__builtin_lsx_vldrepl_b' must be a constant integer}} +- return res; +-} +- +-v8i16 vldrepl_h(void *_1, int var) { +- v8i16 res = __lsx_vldrepl_h(_1, -2050); // expected-error {{argument value -2050 is outside the valid range [-2048, 2046]}} +- res |= __lsx_vldrepl_h(_1, 2048); // expected-error {{argument value 2048 is outside the valid range [-2048, 2046]}} +- res |= __lsx_vldrepl_h(_1, var); // expected-error {{argument to '__builtin_lsx_vldrepl_h' must be a constant integer}} +- return res; +-} +- +-v4i32 vldrepl_w(void *_1, int var) { +- v4i32 res = __lsx_vldrepl_w(_1, -2052); // expected-error {{argument value -2052 is outside the valid range [-2048, 2044]}} +- res |= __lsx_vldrepl_w(_1, 2048); // expected-error {{argument value 2048 is outside the valid range [-2048, 2044]}} +- res |= __lsx_vldrepl_w(_1, var); // expected-error {{argument to '__builtin_lsx_vldrepl_w' must be a constant integer}} +- return res; +-} +- +-v2i64 vldrepl_d(void *_1, int var) { +- v2i64 res = __lsx_vldrepl_d(_1, -2056); // expected-error {{argument value -2056 is outside the valid range [-2048, 2040]}} +- res |= __lsx_vldrepl_d(_1, 2048); // expected-error {{argument value 2048 is outside the valid range [-2048, 2040]}} +- res |= __lsx_vldrepl_d(_1, var); // expected-error {{argument to '__builtin_lsx_vldrepl_d' must be a constant integer}} +- return res; +-} +- +-v16i8 vrotri_b(v16i8 _1, int var) { +- v16i8 res = __lsx_vrotri_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} +- res |= __lsx_vrotri_b(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} +- res |= __lsx_vrotri_b(_1, var); // expected-error {{argument to '__builtin_lsx_vrotri_b' must be a constant integer}} +- return res; +-} +- +-v8i16 vrotri_h(v8i16 _1, int var) { +- v8i16 res = __lsx_vrotri_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} +- res |= __lsx_vrotri_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} +- res |= __lsx_vrotri_h(_1, var); // expected-error {{argument to '__builtin_lsx_vrotri_h' must be a constant integer}} +- return res; +-} +- +-v4i32 vrotri_w(v4i32 _1, int var) { +- v4i32 res = __lsx_vrotri_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} +- res |= __lsx_vrotri_w(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} +- res |= __lsx_vrotri_w(_1, var); // expected-error {{argument to '__builtin_lsx_vrotri_w' must be a constant integer}} +- return res; +-} +- +-v2i64 vrotri_d(v2i64 _1, int var) { +- v2i64 res = __lsx_vrotri_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} +- res |= __lsx_vrotri_d(_1, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} +- res |= __lsx_vrotri_d(_1, var); // expected-error {{argument to '__builtin_lsx_vrotri_d' must be a constant integer}} +- return res; +-} +- +-v16i8 vsrlni_b_h(v16i8 _1, v16i8 _2, int var) { +- v16i8 res = __lsx_vsrlni_b_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} +- res |= __lsx_vsrlni_b_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} +- res |= __lsx_vsrlni_b_h(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vsrlni_b_h' must be a constant integer}} +- return res; +-} +- +-v8i16 vsrlni_h_w(v8i16 _1, v8i16 _2, int var) { +- v8i16 res = __lsx_vsrlni_h_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} +- res |= __lsx_vsrlni_h_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} +- res |= __lsx_vsrlni_h_w(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vsrlni_h_w' must be a constant integer}} +- return res; +-} +- +-v4i32 vsrlni_w_d(v4i32 _1, v4i32 _2, int var) { +- v4i32 res = __lsx_vsrlni_w_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} +- res |= __lsx_vsrlni_w_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} +- res |= __lsx_vsrlni_w_d(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vsrlni_w_d' must be a constant integer}} +- return res; +-} +- +-v2i64 vsrlni_d_q(v2i64 _1, v2i64 _2, int var) { +- v2i64 res = __lsx_vsrlni_d_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} +- res |= __lsx_vsrlni_d_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} +- res |= __lsx_vsrlni_d_q(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vsrlni_d_q' must be a constant integer}} +- return res; +-} +- +-v16i8 vsrlrni_b_h(v16i8 _1, v16i8 _2, int var) { +- v16i8 res = __lsx_vsrlrni_b_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} +- res |= __lsx_vsrlrni_b_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} +- res |= __lsx_vsrlrni_b_h(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vsrlrni_b_h' must be a constant integer}} +- return res; +-} +- +-v8i16 vsrlrni_h_w(v8i16 _1, v8i16 _2, int var) { +- v8i16 res = __lsx_vsrlrni_h_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} +- res |= __lsx_vsrlrni_h_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} +- res |= __lsx_vsrlrni_h_w(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vsrlrni_h_w' must be a constant integer}} +- return res; +-} +- +-v4i32 vsrlrni_w_d(v4i32 _1, v4i32 _2, int var) { +- v4i32 res = __lsx_vsrlrni_w_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} +- res |= __lsx_vsrlrni_w_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} +- res |= __lsx_vsrlrni_w_d(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vsrlrni_w_d' must be a constant integer}} +- return res; +-} +- +-v2i64 vsrlrni_d_q(v2i64 _1, v2i64 _2, int var) { +- v2i64 res = __lsx_vsrlrni_d_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} +- res |= __lsx_vsrlrni_d_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} +- res |= __lsx_vsrlrni_d_q(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vsrlrni_d_q' must be a constant integer}} +- return res; +-} +- +-v16i8 vssrlni_b_h(v16i8 _1, v16i8 _2, int var) { +- v16i8 res = __lsx_vssrlni_b_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} +- res |= __lsx_vssrlni_b_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} +- res |= __lsx_vssrlni_b_h(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrlni_b_h' must be a constant integer}} +- return res; +-} +- +-v8i16 vssrlni_h_w(v8i16 _1, v8i16 _2, int var) { +- v8i16 res = __lsx_vssrlni_h_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} +- res |= __lsx_vssrlni_h_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} +- res |= __lsx_vssrlni_h_w(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrlni_h_w' must be a constant integer}} +- return res; +-} +- +-v4i32 vssrlni_w_d(v4i32 _1, v4i32 _2, int var) { +- v4i32 res = __lsx_vssrlni_w_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} +- res |= __lsx_vssrlni_w_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} +- res |= __lsx_vssrlni_w_d(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrlni_w_d' must be a constant integer}} +- return res; +-} +- +-v2i64 vssrlni_d_q(v2i64 _1, v2i64 _2, int var) { +- v2i64 res = __lsx_vssrlni_d_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} +- res |= __lsx_vssrlni_d_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} +- res |= __lsx_vssrlni_d_q(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrlni_d_q' must be a constant integer}} +- return res; +-} +- +-v16u8 vssrlni_bu_h(v16u8 _1, v16i8 _2, int var) { +- v16u8 res = __lsx_vssrlni_bu_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} +- res |= __lsx_vssrlni_bu_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} +- res |= __lsx_vssrlni_bu_h(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrlni_bu_h' must be a constant integer}} +- return res; +-} +- +-v8u16 vssrlni_hu_w(v8u16 _1, v8i16 _2, int var) { +- v8u16 res = __lsx_vssrlni_hu_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} +- res |= __lsx_vssrlni_hu_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} +- res |= __lsx_vssrlni_hu_w(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrlni_hu_w' must be a constant integer}} +- return res; +-} +- +-v4u32 vssrlni_wu_d(v4u32 _1, v4i32 _2, int var) { +- v4u32 res = __lsx_vssrlni_wu_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} +- res |= __lsx_vssrlni_wu_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} +- res |= __lsx_vssrlni_wu_d(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrlni_wu_d' must be a constant integer}} +- return res; +-} +- +-v2u64 vssrlni_du_q(v2u64 _1, v2i64 _2, int var) { +- v2u64 res = __lsx_vssrlni_du_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} +- res |= __lsx_vssrlni_du_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} +- res |= __lsx_vssrlni_du_q(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrlni_du_q' must be a constant integer}} +- return res; +-} +- +-v16i8 vssrlrni_b_h(v16i8 _1, v16i8 _2, int var) { +- v16i8 res = __lsx_vssrlrni_b_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} +- res |= __lsx_vssrlrni_b_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} +- res |= __lsx_vssrlrni_b_h(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrlrni_b_h' must be a constant integer}} +- return res; +-} +- +-v8i16 vssrlrni_h_w(v8i16 _1, v8i16 _2, int var) { +- v8i16 res = __lsx_vssrlrni_h_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} +- res |= __lsx_vssrlrni_h_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} +- res |= __lsx_vssrlrni_h_w(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrlrni_h_w' must be a constant integer}} +- return res; +-} +- +-v4i32 vssrlrni_w_d(v4i32 _1, v4i32 _2, int var) { +- v4i32 res = __lsx_vssrlrni_w_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} +- res |= __lsx_vssrlrni_w_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} +- res |= __lsx_vssrlrni_w_d(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrlrni_w_d' must be a constant integer}} +- return res; +-} +- +-v2i64 vssrlrni_d_q(v2i64 _1, v2i64 _2, int var) { +- v2i64 res = __lsx_vssrlrni_d_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} +- res |= __lsx_vssrlrni_d_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} +- res |= __lsx_vssrlrni_d_q(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrlrni_d_q' must be a constant integer}} +- return res; +-} +- +-v16u8 vssrlrni_bu_h(v16u8 _1, v16i8 _2, int var) { +- v16u8 res = __lsx_vssrlrni_bu_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} +- res |= __lsx_vssrlrni_bu_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} +- res |= __lsx_vssrlrni_bu_h(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrlrni_bu_h' must be a constant integer}} +- return res; +-} +- +-v8u16 vssrlrni_hu_w(v8u16 _1, v8i16 _2, int var) { +- v8u16 res = __lsx_vssrlrni_hu_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} +- res |= __lsx_vssrlrni_hu_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} +- res |= __lsx_vssrlrni_hu_w(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrlrni_hu_w' must be a constant integer}} +- return res; +-} +- +-v4u32 vssrlrni_wu_d(v4u32 _1, v4i32 _2, int var) { +- v4u32 res = __lsx_vssrlrni_wu_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} +- res |= __lsx_vssrlrni_wu_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} +- res |= __lsx_vssrlrni_wu_d(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrlrni_wu_d' must be a constant integer}} +- return res; +-} +- +-v2u64 vssrlrni_du_q(v2u64 _1, v2i64 _2, int var) { +- v2u64 res = __lsx_vssrlrni_du_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} +- res |= __lsx_vssrlrni_du_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} +- res |= __lsx_vssrlrni_du_q(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrlrni_du_q' must be a constant integer}} +- return res; +-} +- +-v16i8 vsrani_b_h(v16i8 _1, v16i8 _2, int var) { +- v16i8 res = __lsx_vsrani_b_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} +- res |= __lsx_vsrani_b_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} +- res |= __lsx_vsrani_b_h(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vsrani_b_h' must be a constant integer}} +- return res; +-} +- +-v8i16 vsrani_h_w(v8i16 _1, v8i16 _2, int var) { +- v8i16 res = __lsx_vsrani_h_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} +- res |= __lsx_vsrani_h_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} +- res |= __lsx_vsrani_h_w(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vsrani_h_w' must be a constant integer}} +- return res; +-} +- +-v4i32 vsrani_w_d(v4i32 _1, v4i32 _2, int var) { +- v4i32 res = __lsx_vsrani_w_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} +- res |= __lsx_vsrani_w_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} +- res |= __lsx_vsrani_w_d(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vsrani_w_d' must be a constant integer}} +- return res; +-} +- +-v2i64 vsrani_d_q(v2i64 _1, v2i64 _2, int var) { +- v2i64 res = __lsx_vsrani_d_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} +- res |= __lsx_vsrani_d_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} +- res |= __lsx_vsrani_d_q(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vsrani_d_q' must be a constant integer}} +- return res; +-} +- +-v16i8 vsrarni_b_h(v16i8 _1, v16i8 _2, int var) { +- v16i8 res = __lsx_vsrarni_b_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} +- res |= __lsx_vsrarni_b_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} +- res |= __lsx_vsrarni_b_h(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vsrarni_b_h' must be a constant integer}} +- return res; +-} +- +-v8i16 vsrarni_h_w(v8i16 _1, v8i16 _2, int var) { +- v8i16 res = __lsx_vsrarni_h_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} +- res |= __lsx_vsrarni_h_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} +- res |= __lsx_vsrarni_h_w(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vsrarni_h_w' must be a constant integer}} +- return res; +-} +- +-v4i32 vsrarni_w_d(v4i32 _1, v4i32 _2, int var) { +- v4i32 res = __lsx_vsrarni_w_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} +- res |= __lsx_vsrarni_w_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} +- res |= __lsx_vsrarni_w_d(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vsrarni_w_d' must be a constant integer}} +- return res; +-} +- +-v2i64 vsrarni_d_q(v2i64 _1, v2i64 _2, int var) { +- v2i64 res = __lsx_vsrarni_d_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} +- res |= __lsx_vsrarni_d_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} +- res |= __lsx_vsrarni_d_q(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vsrarni_d_q' must be a constant integer}} +- return res; +-} +- +-v16i8 vssrani_b_h(v16i8 _1, v16i8 _2, int var) { +- v16i8 res = __lsx_vssrani_b_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} +- res |= __lsx_vssrani_b_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} +- res |= __lsx_vssrani_b_h(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrani_b_h' must be a constant integer}} +- return res; +-} +- +-v8i16 vssrani_h_w(v8i16 _1, v8i16 _2, int var) { +- v8i16 res = __lsx_vssrani_h_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} +- res |= __lsx_vssrani_h_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} +- res |= __lsx_vssrani_h_w(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrani_h_w' must be a constant integer}} +- return res; +-} +- +-v4i32 vssrani_w_d(v4i32 _1, v4i32 _2, int var) { +- v4i32 res = __lsx_vssrani_w_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} +- res |= __lsx_vssrani_w_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} +- res |= __lsx_vssrani_w_d(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrani_w_d' must be a constant integer}} +- return res; +-} +- +-v2i64 vssrani_d_q(v2i64 _1, v2i64 _2, int var) { +- v2i64 res = __lsx_vssrani_d_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} +- res |= __lsx_vssrani_d_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} +- res |= __lsx_vssrani_d_q(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrani_d_q' must be a constant integer}} +- return res; +-} +- +-v16u8 vssrani_bu_h(v16u8 _1, v16i8 _2, int var) { +- v16u8 res = __lsx_vssrani_bu_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} +- res |= __lsx_vssrani_bu_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} +- res |= __lsx_vssrani_bu_h(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrani_bu_h' must be a constant integer}} +- return res; +-} +- +-v8u16 vssrani_hu_w(v8u16 _1, v8i16 _2, int var) { +- v8u16 res = __lsx_vssrani_hu_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} +- res |= __lsx_vssrani_hu_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} +- res |= __lsx_vssrani_hu_w(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrani_hu_w' must be a constant integer}} +- return res; +-} +- +-v4u32 vssrani_wu_d(v4u32 _1, v4i32 _2, int var) { +- v4u32 res = __lsx_vssrani_wu_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} +- res |= __lsx_vssrani_wu_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} +- res |= __lsx_vssrani_wu_d(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrani_wu_d' must be a constant integer}} +- return res; +-} +- +-v2u64 vssrani_du_q(v2u64 _1, v2i64 _2, int var) { +- v2u64 res = __lsx_vssrani_du_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} +- res |= __lsx_vssrani_du_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} +- res |= __lsx_vssrani_du_q(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrani_du_q' must be a constant integer}} +- return res; +-} +- +-v16i8 vssrarni_b_h(v16i8 _1, v16i8 _2, int var) { +- v16i8 res = __lsx_vssrarni_b_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} +- res |= __lsx_vssrarni_b_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} +- res |= __lsx_vssrarni_b_h(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrarni_b_h' must be a constant integer}} +- return res; +-} +- +-v8i16 vssrarni_h_w(v8i16 _1, v8i16 _2, int var) { +- v8i16 res = __lsx_vssrarni_h_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} +- res |= __lsx_vssrarni_h_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} +- res |= __lsx_vssrarni_h_w(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrarni_h_w' must be a constant integer}} +- return res; +-} +- +-v4i32 vssrarni_w_d(v4i32 _1, v4i32 _2, int var) { +- v4i32 res = __lsx_vssrarni_w_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} +- res |= __lsx_vssrarni_w_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} +- res |= __lsx_vssrarni_w_d(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrarni_w_d' must be a constant integer}} +- return res; +-} +- +-v2i64 vssrarni_d_q(v2i64 _1, v2i64 _2, int var) { +- v2i64 res = __lsx_vssrarni_d_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} +- res |= __lsx_vssrarni_d_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} +- res |= __lsx_vssrarni_d_q(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrarni_d_q' must be a constant integer}} +- return res; +-} +- +-v16u8 vssrarni_bu_h(v16u8 _1, v16i8 _2, int var) { +- v16u8 res = __lsx_vssrarni_bu_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} +- res |= __lsx_vssrarni_bu_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} +- res |= __lsx_vssrarni_bu_h(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrarni_bu_h' must be a constant integer}} +- return res; +-} +- +-v8u16 vssrarni_hu_w(v8u16 _1, v8i16 _2, int var) { +- v8u16 res = __lsx_vssrarni_hu_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} +- res |= __lsx_vssrarni_hu_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} +- res |= __lsx_vssrarni_hu_w(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrarni_hu_w' must be a constant integer}} +- return res; +-} +- +-v4u32 vssrarni_wu_d(v4u32 _1, v4i32 _2, int var) { +- v4u32 res = __lsx_vssrarni_wu_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} +- res |= __lsx_vssrarni_wu_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} +- res |= __lsx_vssrarni_wu_d(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrarni_wu_d' must be a constant integer}} +- return res; +-} +- +-v2u64 vssrarni_du_q(v2u64 _1, v2i64 _2, int var) { +- v2u64 res = __lsx_vssrarni_du_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} +- res |= __lsx_vssrarni_du_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} +- res |= __lsx_vssrarni_du_q(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrarni_du_q' must be a constant integer}} +- return res; +-} +- +-v4i32 vpermi_w(v4i32 _1, v4i32 _2, int var) { +- v4i32 res = __lsx_vpermi_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} +- res |= __lsx_vpermi_w(_1, _2, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} +- res |= __lsx_vpermi_w(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vpermi_w' must be a constant integer}} +- return res; +-} +- +-v16i8 vld(void *_1, int var) { +- v16i8 res = __lsx_vld(_1, -2049); // expected-error {{argument value -2049 is outside the valid range [-2048, 2047]}} +- res |= __lsx_vld(_1, 2048); // expected-error {{argument value 2048 is outside the valid range [-2048, 2047]}} +- res |= __lsx_vld(_1, var); // expected-error {{argument to '__builtin_lsx_vld' must be a constant integer}} +- return res; +-} +- +-void vst(v16i8 _1, void *_2, int var) { +- __lsx_vst(_1, _2, -2049); // expected-error {{argument value -2049 is outside the valid range [-2048, 2047]}} +- __lsx_vst(_1, _2, 2048); // expected-error {{argument value 2048 is outside the valid range [-2048, 2047]}} +- __lsx_vst(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vst' must be a constant integer}} +-} +- +-v2i64 vldi(int var) { +- v2i64 res = __lsx_vldi(-4097); // expected-error {{argument value -4097 is outside the valid range [-4096, 4095]}} +- res |= __lsx_vldi(4096); // expected-error {{argument value 4096 is outside the valid range [-4096, 4095]}} +- res |= __lsx_vldi(var); // expected-error {{argument to '__builtin_lsx_vldi' must be a constant integer}} +- return res; +-} +- +-v16i8 vrepli_b(int var) { +- v16i8 res = __lsx_vrepli_b(-513); // expected-error {{argument value -513 is outside the valid range [-512, 511]}} +- res |= __lsx_vrepli_b(512); // expected-error {{argument value 512 is outside the valid range [-512, 511]}} +- res |= __lsx_vrepli_b(var); // expected-error {{argument to '__builtin_lsx_vrepli_b' must be a constant integer}} +- return res; +-} +- +-v2i64 vrepli_d(int var) { +- v2i64 res = __lsx_vrepli_d(-513); // expected-error {{argument value -513 is outside the valid range [-512, 511]}} +- res |= __lsx_vrepli_d(512); // expected-error {{argument value 512 is outside the valid range [-512, 511]}} +- res |= __lsx_vrepli_d(var); // expected-error {{argument to '__builtin_lsx_vrepli_d' must be a constant integer}} +- return res; +-} +- +-v8i16 vrepli_h(int var) { +- v8i16 res = __lsx_vrepli_h(-513); // expected-error {{argument value -513 is outside the valid range [-512, 511]}} +- res |= __lsx_vrepli_h(512); // expected-error {{argument value 512 is outside the valid range [-512, 511]}} +- res |= __lsx_vrepli_h(var); // expected-error {{argument to '__builtin_lsx_vrepli_h' must be a constant integer}} +- return res; +-} +- +-v4i32 vrepli_w(int var) { +- v4i32 res = __lsx_vrepli_w(-513); // expected-error {{argument value -513 is outside the valid range [-512, 511]}} +- res |= __lsx_vrepli_w(512); // expected-error {{argument value 512 is outside the valid range [-512, 511]}} +- res |= __lsx_vrepli_w(var); // expected-error {{argument to '__builtin_lsx_vrepli_w' must be a constant integer}} +- return res; +-} +diff --git a/clang/test/CodeGen/LoongArch/lsx/builtin-alias.c b/clang/test/CodeGen/LoongArch/lsx/builtin-alias.c +deleted file mode 100644 +index 7a84e0ae2..000000000 +--- a/clang/test/CodeGen/LoongArch/lsx/builtin-alias.c ++++ /dev/null +@@ -1,6359 +0,0 @@ +-// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py +-// RUN: %clang_cc1 -triple loongarch64 -target-feature +lsx -O2 -emit-llvm %s -o - | FileCheck %s +- +-#include +- +-// CHECK-LABEL: @vsll_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsll.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v16i8 vsll_b(v16i8 _1, v16i8 _2) { return __lsx_vsll_b(_1, _2); } +-// CHECK-LABEL: @vsll_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsll.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v8i16 vsll_h(v8i16 _1, v8i16 _2) { return __lsx_vsll_h(_1, _2); } +-// CHECK-LABEL: @vsll_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsll.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v4i32 vsll_w(v4i32 _1, v4i32 _2) { return __lsx_vsll_w(_1, _2); } +-// CHECK-LABEL: @vsll_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsll.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v2i64 vsll_d(v2i64 _1, v2i64 _2) { return __lsx_vsll_d(_1, _2); } +-// CHECK-LABEL: @vslli_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vslli.b(<16 x i8> [[TMP0]], i32 1) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v16i8 vslli_b(v16i8 _1) { return __lsx_vslli_b(_1, 1); } +-// CHECK-LABEL: @vslli_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vslli.h(<8 x i16> [[TMP0]], i32 1) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v8i16 vslli_h(v8i16 _1) { return __lsx_vslli_h(_1, 1); } +-// CHECK-LABEL: @vslli_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vslli.w(<4 x i32> [[TMP0]], i32 1) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v4i32 vslli_w(v4i32 _1) { return __lsx_vslli_w(_1, 1); } +-// CHECK-LABEL: @vslli_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vslli.d(<2 x i64> [[TMP0]], i32 1) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v2i64 vslli_d(v2i64 _1) { return __lsx_vslli_d(_1, 1); } +-// CHECK-LABEL: @vsra_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsra.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v16i8 vsra_b(v16i8 _1, v16i8 _2) { return __lsx_vsra_b(_1, _2); } +-// CHECK-LABEL: @vsra_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsra.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v8i16 vsra_h(v8i16 _1, v8i16 _2) { return __lsx_vsra_h(_1, _2); } +-// CHECK-LABEL: @vsra_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsra.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v4i32 vsra_w(v4i32 _1, v4i32 _2) { return __lsx_vsra_w(_1, _2); } +-// CHECK-LABEL: @vsra_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsra.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v2i64 vsra_d(v2i64 _1, v2i64 _2) { return __lsx_vsra_d(_1, _2); } +-// CHECK-LABEL: @vsrai_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrai.b(<16 x i8> [[TMP0]], i32 1) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v16i8 vsrai_b(v16i8 _1) { return __lsx_vsrai_b(_1, 1); } +-// CHECK-LABEL: @vsrai_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrai.h(<8 x i16> [[TMP0]], i32 1) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v8i16 vsrai_h(v8i16 _1) { return __lsx_vsrai_h(_1, 1); } +-// CHECK-LABEL: @vsrai_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrai.w(<4 x i32> [[TMP0]], i32 1) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v4i32 vsrai_w(v4i32 _1) { return __lsx_vsrai_w(_1, 1); } +-// CHECK-LABEL: @vsrai_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrai.d(<2 x i64> [[TMP0]], i32 1) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v2i64 vsrai_d(v2i64 _1) { return __lsx_vsrai_d(_1, 1); } +-// CHECK-LABEL: @vsrar_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrar.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v16i8 vsrar_b(v16i8 _1, v16i8 _2) { return __lsx_vsrar_b(_1, _2); } +-// CHECK-LABEL: @vsrar_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrar.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v8i16 vsrar_h(v8i16 _1, v8i16 _2) { return __lsx_vsrar_h(_1, _2); } +-// CHECK-LABEL: @vsrar_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrar.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v4i32 vsrar_w(v4i32 _1, v4i32 _2) { return __lsx_vsrar_w(_1, _2); } +-// CHECK-LABEL: @vsrar_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrar.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v2i64 vsrar_d(v2i64 _1, v2i64 _2) { return __lsx_vsrar_d(_1, _2); } +-// CHECK-LABEL: @vsrari_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrari.b(<16 x i8> [[TMP0]], i32 1) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v16i8 vsrari_b(v16i8 _1) { return __lsx_vsrari_b(_1, 1); } +-// CHECK-LABEL: @vsrari_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrari.h(<8 x i16> [[TMP0]], i32 1) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v8i16 vsrari_h(v8i16 _1) { return __lsx_vsrari_h(_1, 1); } +-// CHECK-LABEL: @vsrari_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrari.w(<4 x i32> [[TMP0]], i32 1) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v4i32 vsrari_w(v4i32 _1) { return __lsx_vsrari_w(_1, 1); } +-// CHECK-LABEL: @vsrari_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrari.d(<2 x i64> [[TMP0]], i32 1) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v2i64 vsrari_d(v2i64 _1) { return __lsx_vsrari_d(_1, 1); } +-// CHECK-LABEL: @vsrl_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrl.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v16i8 vsrl_b(v16i8 _1, v16i8 _2) { return __lsx_vsrl_b(_1, _2); } +-// CHECK-LABEL: @vsrl_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrl.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v8i16 vsrl_h(v8i16 _1, v8i16 _2) { return __lsx_vsrl_h(_1, _2); } +-// CHECK-LABEL: @vsrl_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrl.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v4i32 vsrl_w(v4i32 _1, v4i32 _2) { return __lsx_vsrl_w(_1, _2); } +-// CHECK-LABEL: @vsrl_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrl.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v2i64 vsrl_d(v2i64 _1, v2i64 _2) { return __lsx_vsrl_d(_1, _2); } +-// CHECK-LABEL: @vsrli_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrli.b(<16 x i8> [[TMP0]], i32 1) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v16i8 vsrli_b(v16i8 _1) { return __lsx_vsrli_b(_1, 1); } +-// CHECK-LABEL: @vsrli_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrli.h(<8 x i16> [[TMP0]], i32 1) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v8i16 vsrli_h(v8i16 _1) { return __lsx_vsrli_h(_1, 1); } +-// CHECK-LABEL: @vsrli_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrli.w(<4 x i32> [[TMP0]], i32 1) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v4i32 vsrli_w(v4i32 _1) { return __lsx_vsrli_w(_1, 1); } +-// CHECK-LABEL: @vsrli_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrli.d(<2 x i64> [[TMP0]], i32 1) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v2i64 vsrli_d(v2i64 _1) { return __lsx_vsrli_d(_1, 1); } +-// CHECK-LABEL: @vsrlr_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrlr.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v16i8 vsrlr_b(v16i8 _1, v16i8 _2) { return __lsx_vsrlr_b(_1, _2); } +-// CHECK-LABEL: @vsrlr_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrlr.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v8i16 vsrlr_h(v8i16 _1, v8i16 _2) { return __lsx_vsrlr_h(_1, _2); } +-// CHECK-LABEL: @vsrlr_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrlr.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v4i32 vsrlr_w(v4i32 _1, v4i32 _2) { return __lsx_vsrlr_w(_1, _2); } +-// CHECK-LABEL: @vsrlr_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrlr.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v2i64 vsrlr_d(v2i64 _1, v2i64 _2) { return __lsx_vsrlr_d(_1, _2); } +-// CHECK-LABEL: @vsrlri_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrlri.b(<16 x i8> [[TMP0]], i32 1) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v16i8 vsrlri_b(v16i8 _1) { return __lsx_vsrlri_b(_1, 1); } +-// CHECK-LABEL: @vsrlri_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrlri.h(<8 x i16> [[TMP0]], i32 1) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v8i16 vsrlri_h(v8i16 _1) { return __lsx_vsrlri_h(_1, 1); } +-// CHECK-LABEL: @vsrlri_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrlri.w(<4 x i32> [[TMP0]], i32 1) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v4i32 vsrlri_w(v4i32 _1) { return __lsx_vsrlri_w(_1, 1); } +-// CHECK-LABEL: @vsrlri_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrlri.d(<2 x i64> [[TMP0]], i32 1) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v2i64 vsrlri_d(v2i64 _1) { return __lsx_vsrlri_d(_1, 1); } +-// CHECK-LABEL: @vbitclr_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vbitclr.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v16u8 vbitclr_b(v16u8 _1, v16u8 _2) { return __lsx_vbitclr_b(_1, _2); } +-// CHECK-LABEL: @vbitclr_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vbitclr.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v8u16 vbitclr_h(v8u16 _1, v8u16 _2) { return __lsx_vbitclr_h(_1, _2); } +-// CHECK-LABEL: @vbitclr_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vbitclr.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v4u32 vbitclr_w(v4u32 _1, v4u32 _2) { return __lsx_vbitclr_w(_1, _2); } +-// CHECK-LABEL: @vbitclr_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vbitclr.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v2u64 vbitclr_d(v2u64 _1, v2u64 _2) { return __lsx_vbitclr_d(_1, _2); } +-// CHECK-LABEL: @vbitclri_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vbitclri.b(<16 x i8> [[TMP0]], i32 1) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v16u8 vbitclri_b(v16u8 _1) { return __lsx_vbitclri_b(_1, 1); } +-// CHECK-LABEL: @vbitclri_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vbitclri.h(<8 x i16> [[TMP0]], i32 1) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v8u16 vbitclri_h(v8u16 _1) { return __lsx_vbitclri_h(_1, 1); } +-// CHECK-LABEL: @vbitclri_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vbitclri.w(<4 x i32> [[TMP0]], i32 1) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v4u32 vbitclri_w(v4u32 _1) { return __lsx_vbitclri_w(_1, 1); } +-// CHECK-LABEL: @vbitclri_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vbitclri.d(<2 x i64> [[TMP0]], i32 1) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v2u64 vbitclri_d(v2u64 _1) { return __lsx_vbitclri_d(_1, 1); } +-// CHECK-LABEL: @vbitset_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vbitset.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v16u8 vbitset_b(v16u8 _1, v16u8 _2) { return __lsx_vbitset_b(_1, _2); } +-// CHECK-LABEL: @vbitset_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vbitset.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v8u16 vbitset_h(v8u16 _1, v8u16 _2) { return __lsx_vbitset_h(_1, _2); } +-// CHECK-LABEL: @vbitset_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vbitset.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v4u32 vbitset_w(v4u32 _1, v4u32 _2) { return __lsx_vbitset_w(_1, _2); } +-// CHECK-LABEL: @vbitset_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vbitset.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v2u64 vbitset_d(v2u64 _1, v2u64 _2) { return __lsx_vbitset_d(_1, _2); } +-// CHECK-LABEL: @vbitseti_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vbitseti.b(<16 x i8> [[TMP0]], i32 1) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v16u8 vbitseti_b(v16u8 _1) { return __lsx_vbitseti_b(_1, 1); } +-// CHECK-LABEL: @vbitseti_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vbitseti.h(<8 x i16> [[TMP0]], i32 1) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v8u16 vbitseti_h(v8u16 _1) { return __lsx_vbitseti_h(_1, 1); } +-// CHECK-LABEL: @vbitseti_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vbitseti.w(<4 x i32> [[TMP0]], i32 1) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v4u32 vbitseti_w(v4u32 _1) { return __lsx_vbitseti_w(_1, 1); } +-// CHECK-LABEL: @vbitseti_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vbitseti.d(<2 x i64> [[TMP0]], i32 1) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v2u64 vbitseti_d(v2u64 _1) { return __lsx_vbitseti_d(_1, 1); } +-// CHECK-LABEL: @vbitrev_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vbitrev.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v16u8 vbitrev_b(v16u8 _1, v16u8 _2) { return __lsx_vbitrev_b(_1, _2); } +-// CHECK-LABEL: @vbitrev_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vbitrev.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v8u16 vbitrev_h(v8u16 _1, v8u16 _2) { return __lsx_vbitrev_h(_1, _2); } +-// CHECK-LABEL: @vbitrev_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vbitrev.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v4u32 vbitrev_w(v4u32 _1, v4u32 _2) { return __lsx_vbitrev_w(_1, _2); } +-// CHECK-LABEL: @vbitrev_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vbitrev.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v2u64 vbitrev_d(v2u64 _1, v2u64 _2) { return __lsx_vbitrev_d(_1, _2); } +-// CHECK-LABEL: @vbitrevi_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vbitrevi.b(<16 x i8> [[TMP0]], i32 1) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v16u8 vbitrevi_b(v16u8 _1) { return __lsx_vbitrevi_b(_1, 1); } +-// CHECK-LABEL: @vbitrevi_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vbitrevi.h(<8 x i16> [[TMP0]], i32 1) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v8u16 vbitrevi_h(v8u16 _1) { return __lsx_vbitrevi_h(_1, 1); } +-// CHECK-LABEL: @vbitrevi_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vbitrevi.w(<4 x i32> [[TMP0]], i32 1) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v4u32 vbitrevi_w(v4u32 _1) { return __lsx_vbitrevi_w(_1, 1); } +-// CHECK-LABEL: @vbitrevi_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vbitrevi.d(<2 x i64> [[TMP0]], i32 1) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v2u64 vbitrevi_d(v2u64 _1) { return __lsx_vbitrevi_d(_1, 1); } +-// CHECK-LABEL: @vadd_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vadd.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v16i8 vadd_b(v16i8 _1, v16i8 _2) { return __lsx_vadd_b(_1, _2); } +-// CHECK-LABEL: @vadd_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vadd.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v8i16 vadd_h(v8i16 _1, v8i16 _2) { return __lsx_vadd_h(_1, _2); } +-// CHECK-LABEL: @vadd_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vadd.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v4i32 vadd_w(v4i32 _1, v4i32 _2) { return __lsx_vadd_w(_1, _2); } +-// CHECK-LABEL: @vadd_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vadd.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v2i64 vadd_d(v2i64 _1, v2i64 _2) { return __lsx_vadd_d(_1, _2); } +-// CHECK-LABEL: @vaddi_bu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vaddi.bu(<16 x i8> [[TMP0]], i32 1) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v16i8 vaddi_bu(v16i8 _1) { return __lsx_vaddi_bu(_1, 1); } +-// CHECK-LABEL: @vaddi_hu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vaddi.hu(<8 x i16> [[TMP0]], i32 1) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v8i16 vaddi_hu(v8i16 _1) { return __lsx_vaddi_hu(_1, 1); } +-// CHECK-LABEL: @vaddi_wu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vaddi.wu(<4 x i32> [[TMP0]], i32 1) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v4i32 vaddi_wu(v4i32 _1) { return __lsx_vaddi_wu(_1, 1); } +-// CHECK-LABEL: @vaddi_du( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddi.du(<2 x i64> [[TMP0]], i32 1) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v2i64 vaddi_du(v2i64 _1) { return __lsx_vaddi_du(_1, 1); } +-// CHECK-LABEL: @vsub_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsub.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v16i8 vsub_b(v16i8 _1, v16i8 _2) { return __lsx_vsub_b(_1, _2); } +-// CHECK-LABEL: @vsub_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsub.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v8i16 vsub_h(v8i16 _1, v8i16 _2) { return __lsx_vsub_h(_1, _2); } +-// CHECK-LABEL: @vsub_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsub.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v4i32 vsub_w(v4i32 _1, v4i32 _2) { return __lsx_vsub_w(_1, _2); } +-// CHECK-LABEL: @vsub_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsub.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v2i64 vsub_d(v2i64 _1, v2i64 _2) { return __lsx_vsub_d(_1, _2); } +-// CHECK-LABEL: @vsubi_bu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsubi.bu(<16 x i8> [[TMP0]], i32 1) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v16i8 vsubi_bu(v16i8 _1) { return __lsx_vsubi_bu(_1, 1); } +-// CHECK-LABEL: @vsubi_hu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsubi.hu(<8 x i16> [[TMP0]], i32 1) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v8i16 vsubi_hu(v8i16 _1) { return __lsx_vsubi_hu(_1, 1); } +-// CHECK-LABEL: @vsubi_wu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsubi.wu(<4 x i32> [[TMP0]], i32 1) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v4i32 vsubi_wu(v4i32 _1) { return __lsx_vsubi_wu(_1, 1); } +-// CHECK-LABEL: @vsubi_du( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsubi.du(<2 x i64> [[TMP0]], i32 1) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v2i64 vsubi_du(v2i64 _1) { return __lsx_vsubi_du(_1, 1); } +-// CHECK-LABEL: @vmax_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmax.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v16i8 vmax_b(v16i8 _1, v16i8 _2) { return __lsx_vmax_b(_1, _2); } +-// CHECK-LABEL: @vmax_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmax.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v8i16 vmax_h(v8i16 _1, v8i16 _2) { return __lsx_vmax_h(_1, _2); } +-// CHECK-LABEL: @vmax_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmax.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v4i32 vmax_w(v4i32 _1, v4i32 _2) { return __lsx_vmax_w(_1, _2); } +-// CHECK-LABEL: @vmax_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmax.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v2i64 vmax_d(v2i64 _1, v2i64 _2) { return __lsx_vmax_d(_1, _2); } +-// CHECK-LABEL: @vmaxi_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmaxi.b(<16 x i8> [[TMP0]], i32 1) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v16i8 vmaxi_b(v16i8 _1) { return __lsx_vmaxi_b(_1, 1); } +-// CHECK-LABEL: @vmaxi_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmaxi.h(<8 x i16> [[TMP0]], i32 1) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v8i16 vmaxi_h(v8i16 _1) { return __lsx_vmaxi_h(_1, 1); } +-// CHECK-LABEL: @vmaxi_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmaxi.w(<4 x i32> [[TMP0]], i32 1) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v4i32 vmaxi_w(v4i32 _1) { return __lsx_vmaxi_w(_1, 1); } +-// CHECK-LABEL: @vmaxi_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaxi.d(<2 x i64> [[TMP0]], i32 1) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v2i64 vmaxi_d(v2i64 _1) { return __lsx_vmaxi_d(_1, 1); } +-// CHECK-LABEL: @vmax_bu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmax.bu(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v16u8 vmax_bu(v16u8 _1, v16u8 _2) { return __lsx_vmax_bu(_1, _2); } +-// CHECK-LABEL: @vmax_hu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmax.hu(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v8u16 vmax_hu(v8u16 _1, v8u16 _2) { return __lsx_vmax_hu(_1, _2); } +-// CHECK-LABEL: @vmax_wu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmax.wu(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v4u32 vmax_wu(v4u32 _1, v4u32 _2) { return __lsx_vmax_wu(_1, _2); } +-// CHECK-LABEL: @vmax_du( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmax.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v2u64 vmax_du(v2u64 _1, v2u64 _2) { return __lsx_vmax_du(_1, _2); } +-// CHECK-LABEL: @vmaxi_bu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmaxi.bu(<16 x i8> [[TMP0]], i32 1) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v16u8 vmaxi_bu(v16u8 _1) { return __lsx_vmaxi_bu(_1, 1); } +-// CHECK-LABEL: @vmaxi_hu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmaxi.hu(<8 x i16> [[TMP0]], i32 1) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v8u16 vmaxi_hu(v8u16 _1) { return __lsx_vmaxi_hu(_1, 1); } +-// CHECK-LABEL: @vmaxi_wu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmaxi.wu(<4 x i32> [[TMP0]], i32 1) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v4u32 vmaxi_wu(v4u32 _1) { return __lsx_vmaxi_wu(_1, 1); } +-// CHECK-LABEL: @vmaxi_du( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaxi.du(<2 x i64> [[TMP0]], i32 1) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v2u64 vmaxi_du(v2u64 _1) { return __lsx_vmaxi_du(_1, 1); } +-// CHECK-LABEL: @vmin_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmin.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v16i8 vmin_b(v16i8 _1, v16i8 _2) { return __lsx_vmin_b(_1, _2); } +-// CHECK-LABEL: @vmin_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmin.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v8i16 vmin_h(v8i16 _1, v8i16 _2) { return __lsx_vmin_h(_1, _2); } +-// CHECK-LABEL: @vmin_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmin.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v4i32 vmin_w(v4i32 _1, v4i32 _2) { return __lsx_vmin_w(_1, _2); } +-// CHECK-LABEL: @vmin_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmin.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v2i64 vmin_d(v2i64 _1, v2i64 _2) { return __lsx_vmin_d(_1, _2); } +-// CHECK-LABEL: @vmini_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmini.b(<16 x i8> [[TMP0]], i32 1) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v16i8 vmini_b(v16i8 _1) { return __lsx_vmini_b(_1, 1); } +-// CHECK-LABEL: @vmini_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmini.h(<8 x i16> [[TMP0]], i32 1) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v8i16 vmini_h(v8i16 _1) { return __lsx_vmini_h(_1, 1); } +-// CHECK-LABEL: @vmini_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmini.w(<4 x i32> [[TMP0]], i32 1) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v4i32 vmini_w(v4i32 _1) { return __lsx_vmini_w(_1, 1); } +-// CHECK-LABEL: @vmini_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmini.d(<2 x i64> [[TMP0]], i32 1) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v2i64 vmini_d(v2i64 _1) { return __lsx_vmini_d(_1, 1); } +-// CHECK-LABEL: @vmin_bu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmin.bu(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v16u8 vmin_bu(v16u8 _1, v16u8 _2) { return __lsx_vmin_bu(_1, _2); } +-// CHECK-LABEL: @vmin_hu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmin.hu(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v8u16 vmin_hu(v8u16 _1, v8u16 _2) { return __lsx_vmin_hu(_1, _2); } +-// CHECK-LABEL: @vmin_wu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmin.wu(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v4u32 vmin_wu(v4u32 _1, v4u32 _2) { return __lsx_vmin_wu(_1, _2); } +-// CHECK-LABEL: @vmin_du( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmin.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v2u64 vmin_du(v2u64 _1, v2u64 _2) { return __lsx_vmin_du(_1, _2); } +-// CHECK-LABEL: @vmini_bu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmini.bu(<16 x i8> [[TMP0]], i32 1) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v16u8 vmini_bu(v16u8 _1) { return __lsx_vmini_bu(_1, 1); } +-// CHECK-LABEL: @vmini_hu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmini.hu(<8 x i16> [[TMP0]], i32 1) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v8u16 vmini_hu(v8u16 _1) { return __lsx_vmini_hu(_1, 1); } +-// CHECK-LABEL: @vmini_wu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmini.wu(<4 x i32> [[TMP0]], i32 1) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v4u32 vmini_wu(v4u32 _1) { return __lsx_vmini_wu(_1, 1); } +-// CHECK-LABEL: @vmini_du( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmini.du(<2 x i64> [[TMP0]], i32 1) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v2u64 vmini_du(v2u64 _1) { return __lsx_vmini_du(_1, 1); } +-// CHECK-LABEL: @vseq_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vseq.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v16i8 vseq_b(v16i8 _1, v16i8 _2) { return __lsx_vseq_b(_1, _2); } +-// CHECK-LABEL: @vseq_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vseq.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v8i16 vseq_h(v8i16 _1, v8i16 _2) { return __lsx_vseq_h(_1, _2); } +-// CHECK-LABEL: @vseq_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vseq.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v4i32 vseq_w(v4i32 _1, v4i32 _2) { return __lsx_vseq_w(_1, _2); } +-// CHECK-LABEL: @vseq_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vseq.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v2i64 vseq_d(v2i64 _1, v2i64 _2) { return __lsx_vseq_d(_1, _2); } +-// CHECK-LABEL: @vseqi_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vseqi.b(<16 x i8> [[TMP0]], i32 1) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v16i8 vseqi_b(v16i8 _1) { return __lsx_vseqi_b(_1, 1); } +-// CHECK-LABEL: @vseqi_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vseqi.h(<8 x i16> [[TMP0]], i32 1) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v8i16 vseqi_h(v8i16 _1) { return __lsx_vseqi_h(_1, 1); } +-// CHECK-LABEL: @vseqi_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vseqi.w(<4 x i32> [[TMP0]], i32 1) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v4i32 vseqi_w(v4i32 _1) { return __lsx_vseqi_w(_1, 1); } +-// CHECK-LABEL: @vseqi_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vseqi.d(<2 x i64> [[TMP0]], i32 1) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v2i64 vseqi_d(v2i64 _1) { return __lsx_vseqi_d(_1, 1); } +-// CHECK-LABEL: @vslti_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vslti.b(<16 x i8> [[TMP0]], i32 1) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v16i8 vslti_b(v16i8 _1) { return __lsx_vslti_b(_1, 1); } +-// CHECK-LABEL: @vslt_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vslt.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v16i8 vslt_b(v16i8 _1, v16i8 _2) { return __lsx_vslt_b(_1, _2); } +-// CHECK-LABEL: @vslt_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vslt.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v8i16 vslt_h(v8i16 _1, v8i16 _2) { return __lsx_vslt_h(_1, _2); } +-// CHECK-LABEL: @vslt_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vslt.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v4i32 vslt_w(v4i32 _1, v4i32 _2) { return __lsx_vslt_w(_1, _2); } +-// CHECK-LABEL: @vslt_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vslt.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v2i64 vslt_d(v2i64 _1, v2i64 _2) { return __lsx_vslt_d(_1, _2); } +-// CHECK-LABEL: @vslti_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vslti.h(<8 x i16> [[TMP0]], i32 1) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v8i16 vslti_h(v8i16 _1) { return __lsx_vslti_h(_1, 1); } +-// CHECK-LABEL: @vslti_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vslti.w(<4 x i32> [[TMP0]], i32 1) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v4i32 vslti_w(v4i32 _1) { return __lsx_vslti_w(_1, 1); } +-// CHECK-LABEL: @vslti_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vslti.d(<2 x i64> [[TMP0]], i32 1) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v2i64 vslti_d(v2i64 _1) { return __lsx_vslti_d(_1, 1); } +-// CHECK-LABEL: @vslt_bu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vslt.bu(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v16i8 vslt_bu(v16u8 _1, v16u8 _2) { return __lsx_vslt_bu(_1, _2); } +-// CHECK-LABEL: @vslt_hu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vslt.hu(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v8i16 vslt_hu(v8u16 _1, v8u16 _2) { return __lsx_vslt_hu(_1, _2); } +-// CHECK-LABEL: @vslt_wu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vslt.wu(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v4i32 vslt_wu(v4u32 _1, v4u32 _2) { return __lsx_vslt_wu(_1, _2); } +-// CHECK-LABEL: @vslt_du( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vslt.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v2i64 vslt_du(v2u64 _1, v2u64 _2) { return __lsx_vslt_du(_1, _2); } +-// CHECK-LABEL: @vslti_bu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vslti.bu(<16 x i8> [[TMP0]], i32 1) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v16i8 vslti_bu(v16u8 _1) { return __lsx_vslti_bu(_1, 1); } +-// CHECK-LABEL: @vslti_hu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vslti.hu(<8 x i16> [[TMP0]], i32 1) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v8i16 vslti_hu(v8u16 _1) { return __lsx_vslti_hu(_1, 1); } +-// CHECK-LABEL: @vslti_wu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vslti.wu(<4 x i32> [[TMP0]], i32 1) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v4i32 vslti_wu(v4u32 _1) { return __lsx_vslti_wu(_1, 1); } +-// CHECK-LABEL: @vslti_du( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vslti.du(<2 x i64> [[TMP0]], i32 1) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v2i64 vslti_du(v2u64 _1) { return __lsx_vslti_du(_1, 1); } +-// CHECK-LABEL: @vsle_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsle.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v16i8 vsle_b(v16i8 _1, v16i8 _2) { return __lsx_vsle_b(_1, _2); } +-// CHECK-LABEL: @vsle_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsle.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v8i16 vsle_h(v8i16 _1, v8i16 _2) { return __lsx_vsle_h(_1, _2); } +-// CHECK-LABEL: @vsle_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsle.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v4i32 vsle_w(v4i32 _1, v4i32 _2) { return __lsx_vsle_w(_1, _2); } +-// CHECK-LABEL: @vsle_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsle.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v2i64 vsle_d(v2i64 _1, v2i64 _2) { return __lsx_vsle_d(_1, _2); } +-// CHECK-LABEL: @vslei_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vslei.b(<16 x i8> [[TMP0]], i32 1) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v16i8 vslei_b(v16i8 _1) { return __lsx_vslei_b(_1, 1); } +-// CHECK-LABEL: @vslei_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vslei.h(<8 x i16> [[TMP0]], i32 1) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v8i16 vslei_h(v8i16 _1) { return __lsx_vslei_h(_1, 1); } +-// CHECK-LABEL: @vslei_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vslei.w(<4 x i32> [[TMP0]], i32 1) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v4i32 vslei_w(v4i32 _1) { return __lsx_vslei_w(_1, 1); } +-// CHECK-LABEL: @vslei_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vslei.d(<2 x i64> [[TMP0]], i32 1) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v2i64 vslei_d(v2i64 _1) { return __lsx_vslei_d(_1, 1); } +-// CHECK-LABEL: @vsle_bu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsle.bu(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v16i8 vsle_bu(v16u8 _1, v16u8 _2) { return __lsx_vsle_bu(_1, _2); } +-// CHECK-LABEL: @vsle_hu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsle.hu(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v8i16 vsle_hu(v8u16 _1, v8u16 _2) { return __lsx_vsle_hu(_1, _2); } +-// CHECK-LABEL: @vsle_wu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsle.wu(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v4i32 vsle_wu(v4u32 _1, v4u32 _2) { return __lsx_vsle_wu(_1, _2); } +-// CHECK-LABEL: @vsle_du( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsle.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v2i64 vsle_du(v2u64 _1, v2u64 _2) { return __lsx_vsle_du(_1, _2); } +-// CHECK-LABEL: @vslei_bu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vslei.bu(<16 x i8> [[TMP0]], i32 1) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v16i8 vslei_bu(v16u8 _1) { return __lsx_vslei_bu(_1, 1); } +-// CHECK-LABEL: @vslei_hu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vslei.hu(<8 x i16> [[TMP0]], i32 1) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v8i16 vslei_hu(v8u16 _1) { return __lsx_vslei_hu(_1, 1); } +-// CHECK-LABEL: @vslei_wu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vslei.wu(<4 x i32> [[TMP0]], i32 1) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v4i32 vslei_wu(v4u32 _1) { return __lsx_vslei_wu(_1, 1); } +-// CHECK-LABEL: @vslei_du( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vslei.du(<2 x i64> [[TMP0]], i32 1) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v2i64 vslei_du(v2u64 _1) { return __lsx_vslei_du(_1, 1); } +-// CHECK-LABEL: @vsat_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsat.b(<16 x i8> [[TMP0]], i32 1) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v16i8 vsat_b(v16i8 _1) { return __lsx_vsat_b(_1, 1); } +-// CHECK-LABEL: @vsat_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsat.h(<8 x i16> [[TMP0]], i32 1) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v8i16 vsat_h(v8i16 _1) { return __lsx_vsat_h(_1, 1); } +-// CHECK-LABEL: @vsat_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsat.w(<4 x i32> [[TMP0]], i32 1) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v4i32 vsat_w(v4i32 _1) { return __lsx_vsat_w(_1, 1); } +-// CHECK-LABEL: @vsat_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsat.d(<2 x i64> [[TMP0]], i32 1) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v2i64 vsat_d(v2i64 _1) { return __lsx_vsat_d(_1, 1); } +-// CHECK-LABEL: @vsat_bu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsat.bu(<16 x i8> [[TMP0]], i32 1) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v16u8 vsat_bu(v16u8 _1) { return __lsx_vsat_bu(_1, 1); } +-// CHECK-LABEL: @vsat_hu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsat.hu(<8 x i16> [[TMP0]], i32 1) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v8u16 vsat_hu(v8u16 _1) { return __lsx_vsat_hu(_1, 1); } +-// CHECK-LABEL: @vsat_wu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsat.wu(<4 x i32> [[TMP0]], i32 1) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v4u32 vsat_wu(v4u32 _1) { return __lsx_vsat_wu(_1, 1); } +-// CHECK-LABEL: @vsat_du( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsat.du(<2 x i64> [[TMP0]], i32 1) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v2u64 vsat_du(v2u64 _1) { return __lsx_vsat_du(_1, 1); } +-// CHECK-LABEL: @vadda_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vadda.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v16i8 vadda_b(v16i8 _1, v16i8 _2) { return __lsx_vadda_b(_1, _2); } +-// CHECK-LABEL: @vadda_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vadda.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v8i16 vadda_h(v8i16 _1, v8i16 _2) { return __lsx_vadda_h(_1, _2); } +-// CHECK-LABEL: @vadda_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vadda.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v4i32 vadda_w(v4i32 _1, v4i32 _2) { return __lsx_vadda_w(_1, _2); } +-// CHECK-LABEL: @vadda_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vadda.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v2i64 vadda_d(v2i64 _1, v2i64 _2) { return __lsx_vadda_d(_1, _2); } +-// CHECK-LABEL: @vsadd_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsadd.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v16i8 vsadd_b(v16i8 _1, v16i8 _2) { return __lsx_vsadd_b(_1, _2); } +-// CHECK-LABEL: @vsadd_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsadd.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v8i16 vsadd_h(v8i16 _1, v8i16 _2) { return __lsx_vsadd_h(_1, _2); } +-// CHECK-LABEL: @vsadd_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsadd.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v4i32 vsadd_w(v4i32 _1, v4i32 _2) { return __lsx_vsadd_w(_1, _2); } +-// CHECK-LABEL: @vsadd_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsadd.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v2i64 vsadd_d(v2i64 _1, v2i64 _2) { return __lsx_vsadd_d(_1, _2); } +-// CHECK-LABEL: @vsadd_bu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsadd.bu(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v16u8 vsadd_bu(v16u8 _1, v16u8 _2) { return __lsx_vsadd_bu(_1, _2); } +-// CHECK-LABEL: @vsadd_hu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsadd.hu(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v8u16 vsadd_hu(v8u16 _1, v8u16 _2) { return __lsx_vsadd_hu(_1, _2); } +-// CHECK-LABEL: @vsadd_wu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsadd.wu(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v4u32 vsadd_wu(v4u32 _1, v4u32 _2) { return __lsx_vsadd_wu(_1, _2); } +-// CHECK-LABEL: @vsadd_du( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsadd.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v2u64 vsadd_du(v2u64 _1, v2u64 _2) { return __lsx_vsadd_du(_1, _2); } +-// CHECK-LABEL: @vavg_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vavg.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v16i8 vavg_b(v16i8 _1, v16i8 _2) { return __lsx_vavg_b(_1, _2); } +-// CHECK-LABEL: @vavg_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vavg.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v8i16 vavg_h(v8i16 _1, v8i16 _2) { return __lsx_vavg_h(_1, _2); } +-// CHECK-LABEL: @vavg_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vavg.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v4i32 vavg_w(v4i32 _1, v4i32 _2) { return __lsx_vavg_w(_1, _2); } +-// CHECK-LABEL: @vavg_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vavg.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v2i64 vavg_d(v2i64 _1, v2i64 _2) { return __lsx_vavg_d(_1, _2); } +-// CHECK-LABEL: @vavg_bu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vavg.bu(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v16u8 vavg_bu(v16u8 _1, v16u8 _2) { return __lsx_vavg_bu(_1, _2); } +-// CHECK-LABEL: @vavg_hu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vavg.hu(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v8u16 vavg_hu(v8u16 _1, v8u16 _2) { return __lsx_vavg_hu(_1, _2); } +-// CHECK-LABEL: @vavg_wu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vavg.wu(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v4u32 vavg_wu(v4u32 _1, v4u32 _2) { return __lsx_vavg_wu(_1, _2); } +-// CHECK-LABEL: @vavg_du( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vavg.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v2u64 vavg_du(v2u64 _1, v2u64 _2) { return __lsx_vavg_du(_1, _2); } +-// CHECK-LABEL: @vavgr_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vavgr.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v16i8 vavgr_b(v16i8 _1, v16i8 _2) { return __lsx_vavgr_b(_1, _2); } +-// CHECK-LABEL: @vavgr_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vavgr.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v8i16 vavgr_h(v8i16 _1, v8i16 _2) { return __lsx_vavgr_h(_1, _2); } +-// CHECK-LABEL: @vavgr_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vavgr.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v4i32 vavgr_w(v4i32 _1, v4i32 _2) { return __lsx_vavgr_w(_1, _2); } +-// CHECK-LABEL: @vavgr_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vavgr.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v2i64 vavgr_d(v2i64 _1, v2i64 _2) { return __lsx_vavgr_d(_1, _2); } +-// CHECK-LABEL: @vavgr_bu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vavgr.bu(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v16u8 vavgr_bu(v16u8 _1, v16u8 _2) { return __lsx_vavgr_bu(_1, _2); } +-// CHECK-LABEL: @vavgr_hu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vavgr.hu(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v8u16 vavgr_hu(v8u16 _1, v8u16 _2) { return __lsx_vavgr_hu(_1, _2); } +-// CHECK-LABEL: @vavgr_wu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vavgr.wu(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v4u32 vavgr_wu(v4u32 _1, v4u32 _2) { return __lsx_vavgr_wu(_1, _2); } +-// CHECK-LABEL: @vavgr_du( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vavgr.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v2u64 vavgr_du(v2u64 _1, v2u64 _2) { return __lsx_vavgr_du(_1, _2); } +-// CHECK-LABEL: @vssub_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssub.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v16i8 vssub_b(v16i8 _1, v16i8 _2) { return __lsx_vssub_b(_1, _2); } +-// CHECK-LABEL: @vssub_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssub.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v8i16 vssub_h(v8i16 _1, v8i16 _2) { return __lsx_vssub_h(_1, _2); } +-// CHECK-LABEL: @vssub_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssub.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v4i32 vssub_w(v4i32 _1, v4i32 _2) { return __lsx_vssub_w(_1, _2); } +-// CHECK-LABEL: @vssub_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vssub.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v2i64 vssub_d(v2i64 _1, v2i64 _2) { return __lsx_vssub_d(_1, _2); } +-// CHECK-LABEL: @vssub_bu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssub.bu(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v16u8 vssub_bu(v16u8 _1, v16u8 _2) { return __lsx_vssub_bu(_1, _2); } +-// CHECK-LABEL: @vssub_hu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssub.hu(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v8u16 vssub_hu(v8u16 _1, v8u16 _2) { return __lsx_vssub_hu(_1, _2); } +-// CHECK-LABEL: @vssub_wu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssub.wu(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v4u32 vssub_wu(v4u32 _1, v4u32 _2) { return __lsx_vssub_wu(_1, _2); } +-// CHECK-LABEL: @vssub_du( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vssub.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v2u64 vssub_du(v2u64 _1, v2u64 _2) { return __lsx_vssub_du(_1, _2); } +-// CHECK-LABEL: @vabsd_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vabsd.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v16i8 vabsd_b(v16i8 _1, v16i8 _2) { return __lsx_vabsd_b(_1, _2); } +-// CHECK-LABEL: @vabsd_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vabsd.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v8i16 vabsd_h(v8i16 _1, v8i16 _2) { return __lsx_vabsd_h(_1, _2); } +-// CHECK-LABEL: @vabsd_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vabsd.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v4i32 vabsd_w(v4i32 _1, v4i32 _2) { return __lsx_vabsd_w(_1, _2); } +-// CHECK-LABEL: @vabsd_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vabsd.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v2i64 vabsd_d(v2i64 _1, v2i64 _2) { return __lsx_vabsd_d(_1, _2); } +-// CHECK-LABEL: @vabsd_bu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vabsd.bu(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v16u8 vabsd_bu(v16u8 _1, v16u8 _2) { return __lsx_vabsd_bu(_1, _2); } +-// CHECK-LABEL: @vabsd_hu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vabsd.hu(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v8u16 vabsd_hu(v8u16 _1, v8u16 _2) { return __lsx_vabsd_hu(_1, _2); } +-// CHECK-LABEL: @vabsd_wu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vabsd.wu(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v4u32 vabsd_wu(v4u32 _1, v4u32 _2) { return __lsx_vabsd_wu(_1, _2); } +-// CHECK-LABEL: @vabsd_du( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vabsd.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v2u64 vabsd_du(v2u64 _1, v2u64 _2) { return __lsx_vabsd_du(_1, _2); } +-// CHECK-LABEL: @vmul_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmul.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v16i8 vmul_b(v16i8 _1, v16i8 _2) { return __lsx_vmul_b(_1, _2); } +-// CHECK-LABEL: @vmul_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmul.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v8i16 vmul_h(v8i16 _1, v8i16 _2) { return __lsx_vmul_h(_1, _2); } +-// CHECK-LABEL: @vmul_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmul.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v4i32 vmul_w(v4i32 _1, v4i32 _2) { return __lsx_vmul_w(_1, _2); } +-// CHECK-LABEL: @vmul_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmul.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v2i64 vmul_d(v2i64 _1, v2i64 _2) { return __lsx_vmul_d(_1, _2); } +-// CHECK-LABEL: @vmadd_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP3:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmadd.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]]) +-// CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP3]] to i128 +-// CHECK-NEXT: ret i128 [[TMP4]] +-// +-v16i8 vmadd_b(v16i8 _1, v16i8 _2, v16i8 _3) { +- return __lsx_vmadd_b(_1, _2, _3); +-} +-// CHECK-LABEL: @vmadd_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmadd.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]], <8 x i16> [[TMP2]]) +-// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to i128 +-// CHECK-NEXT: ret i128 [[TMP4]] +-// +-v8i16 vmadd_h(v8i16 _1, v8i16 _2, v8i16 _3) { +- return __lsx_vmadd_h(_1, _2, _3); +-} +-// CHECK-LABEL: @vmadd_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmadd.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]], <4 x i32> [[TMP2]]) +-// CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to i128 +-// CHECK-NEXT: ret i128 [[TMP4]] +-// +-v4i32 vmadd_w(v4i32 _1, v4i32 _2, v4i32 _3) { +- return __lsx_vmadd_w(_1, _2, _3); +-} +-// CHECK-LABEL: @vmadd_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP3:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmadd.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], <2 x i64> [[TMP2]]) +-// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to i128 +-// CHECK-NEXT: ret i128 [[TMP4]] +-// +-v2i64 vmadd_d(v2i64 _1, v2i64 _2, v2i64 _3) { +- return __lsx_vmadd_d(_1, _2, _3); +-} +-// CHECK-LABEL: @vmsub_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP3:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmsub.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]]) +-// CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP3]] to i128 +-// CHECK-NEXT: ret i128 [[TMP4]] +-// +-v16i8 vmsub_b(v16i8 _1, v16i8 _2, v16i8 _3) { +- return __lsx_vmsub_b(_1, _2, _3); +-} +-// CHECK-LABEL: @vmsub_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmsub.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]], <8 x i16> [[TMP2]]) +-// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to i128 +-// CHECK-NEXT: ret i128 [[TMP4]] +-// +-v8i16 vmsub_h(v8i16 _1, v8i16 _2, v8i16 _3) { +- return __lsx_vmsub_h(_1, _2, _3); +-} +-// CHECK-LABEL: @vmsub_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmsub.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]], <4 x i32> [[TMP2]]) +-// CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to i128 +-// CHECK-NEXT: ret i128 [[TMP4]] +-// +-v4i32 vmsub_w(v4i32 _1, v4i32 _2, v4i32 _3) { +- return __lsx_vmsub_w(_1, _2, _3); +-} +-// CHECK-LABEL: @vmsub_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP3:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmsub.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], <2 x i64> [[TMP2]]) +-// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to i128 +-// CHECK-NEXT: ret i128 [[TMP4]] +-// +-v2i64 vmsub_d(v2i64 _1, v2i64 _2, v2i64 _3) { +- return __lsx_vmsub_d(_1, _2, _3); +-} +-// CHECK-LABEL: @vdiv_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vdiv.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v16i8 vdiv_b(v16i8 _1, v16i8 _2) { return __lsx_vdiv_b(_1, _2); } +-// CHECK-LABEL: @vdiv_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vdiv.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v8i16 vdiv_h(v8i16 _1, v8i16 _2) { return __lsx_vdiv_h(_1, _2); } +-// CHECK-LABEL: @vdiv_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vdiv.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v4i32 vdiv_w(v4i32 _1, v4i32 _2) { return __lsx_vdiv_w(_1, _2); } +-// CHECK-LABEL: @vdiv_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vdiv.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v2i64 vdiv_d(v2i64 _1, v2i64 _2) { return __lsx_vdiv_d(_1, _2); } +-// CHECK-LABEL: @vdiv_bu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vdiv.bu(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v16u8 vdiv_bu(v16u8 _1, v16u8 _2) { return __lsx_vdiv_bu(_1, _2); } +-// CHECK-LABEL: @vdiv_hu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vdiv.hu(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v8u16 vdiv_hu(v8u16 _1, v8u16 _2) { return __lsx_vdiv_hu(_1, _2); } +-// CHECK-LABEL: @vdiv_wu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vdiv.wu(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v4u32 vdiv_wu(v4u32 _1, v4u32 _2) { return __lsx_vdiv_wu(_1, _2); } +-// CHECK-LABEL: @vdiv_du( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vdiv.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v2u64 vdiv_du(v2u64 _1, v2u64 _2) { return __lsx_vdiv_du(_1, _2); } +-// CHECK-LABEL: @vhaddw_h_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vhaddw.h.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v8i16 vhaddw_h_b(v16i8 _1, v16i8 _2) { return __lsx_vhaddw_h_b(_1, _2); } +-// CHECK-LABEL: @vhaddw_w_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vhaddw.w.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v4i32 vhaddw_w_h(v8i16 _1, v8i16 _2) { return __lsx_vhaddw_w_h(_1, _2); } +-// CHECK-LABEL: @vhaddw_d_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vhaddw.d.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v2i64 vhaddw_d_w(v4i32 _1, v4i32 _2) { return __lsx_vhaddw_d_w(_1, _2); } +-// CHECK-LABEL: @vhaddw_hu_bu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vhaddw.hu.bu(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v8u16 vhaddw_hu_bu(v16u8 _1, v16u8 _2) { return __lsx_vhaddw_hu_bu(_1, _2); } +-// CHECK-LABEL: @vhaddw_wu_hu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vhaddw.wu.hu(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v4u32 vhaddw_wu_hu(v8u16 _1, v8u16 _2) { return __lsx_vhaddw_wu_hu(_1, _2); } +-// CHECK-LABEL: @vhaddw_du_wu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vhaddw.du.wu(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v2u64 vhaddw_du_wu(v4u32 _1, v4u32 _2) { return __lsx_vhaddw_du_wu(_1, _2); } +-// CHECK-LABEL: @vhsubw_h_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vhsubw.h.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v8i16 vhsubw_h_b(v16i8 _1, v16i8 _2) { return __lsx_vhsubw_h_b(_1, _2); } +-// CHECK-LABEL: @vhsubw_w_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vhsubw.w.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v4i32 vhsubw_w_h(v8i16 _1, v8i16 _2) { return __lsx_vhsubw_w_h(_1, _2); } +-// CHECK-LABEL: @vhsubw_d_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vhsubw.d.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v2i64 vhsubw_d_w(v4i32 _1, v4i32 _2) { return __lsx_vhsubw_d_w(_1, _2); } +-// CHECK-LABEL: @vhsubw_hu_bu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vhsubw.hu.bu(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v8i16 vhsubw_hu_bu(v16u8 _1, v16u8 _2) { return __lsx_vhsubw_hu_bu(_1, _2); } +-// CHECK-LABEL: @vhsubw_wu_hu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vhsubw.wu.hu(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v4i32 vhsubw_wu_hu(v8u16 _1, v8u16 _2) { return __lsx_vhsubw_wu_hu(_1, _2); } +-// CHECK-LABEL: @vhsubw_du_wu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vhsubw.du.wu(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v2i64 vhsubw_du_wu(v4u32 _1, v4u32 _2) { return __lsx_vhsubw_du_wu(_1, _2); } +-// CHECK-LABEL: @vmod_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmod.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v16i8 vmod_b(v16i8 _1, v16i8 _2) { return __lsx_vmod_b(_1, _2); } +-// CHECK-LABEL: @vmod_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmod.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v8i16 vmod_h(v8i16 _1, v8i16 _2) { return __lsx_vmod_h(_1, _2); } +-// CHECK-LABEL: @vmod_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmod.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v4i32 vmod_w(v4i32 _1, v4i32 _2) { return __lsx_vmod_w(_1, _2); } +-// CHECK-LABEL: @vmod_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmod.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v2i64 vmod_d(v2i64 _1, v2i64 _2) { return __lsx_vmod_d(_1, _2); } +-// CHECK-LABEL: @vmod_bu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmod.bu(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v16u8 vmod_bu(v16u8 _1, v16u8 _2) { return __lsx_vmod_bu(_1, _2); } +-// CHECK-LABEL: @vmod_hu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmod.hu(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v8u16 vmod_hu(v8u16 _1, v8u16 _2) { return __lsx_vmod_hu(_1, _2); } +-// CHECK-LABEL: @vmod_wu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmod.wu(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v4u32 vmod_wu(v4u32 _1, v4u32 _2) { return __lsx_vmod_wu(_1, _2); } +-// CHECK-LABEL: @vmod_du( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmod.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v2u64 vmod_du(v2u64 _1, v2u64 _2) { return __lsx_vmod_du(_1, _2); } +-// CHECK-LABEL: @vreplve_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vreplve.b(<16 x i8> [[TMP0]], i32 [[_2:%.*]]) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v16i8 vreplve_b(v16i8 _1, int _2) { return __lsx_vreplve_b(_1, _2); } +-// CHECK-LABEL: @vreplve_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vreplve.h(<8 x i16> [[TMP0]], i32 [[_2:%.*]]) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v8i16 vreplve_h(v8i16 _1, int _2) { return __lsx_vreplve_h(_1, _2); } +-// CHECK-LABEL: @vreplve_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vreplve.w(<4 x i32> [[TMP0]], i32 [[_2:%.*]]) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v4i32 vreplve_w(v4i32 _1, int _2) { return __lsx_vreplve_w(_1, _2); } +-// CHECK-LABEL: @vreplve_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vreplve.d(<2 x i64> [[TMP0]], i32 [[_2:%.*]]) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v2i64 vreplve_d(v2i64 _1, int _2) { return __lsx_vreplve_d(_1, _2); } +-// CHECK-LABEL: @vreplvei_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vreplvei.b(<16 x i8> [[TMP0]], i32 1) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v16i8 vreplvei_b(v16i8 _1) { return __lsx_vreplvei_b(_1, 1); } +-// CHECK-LABEL: @vreplvei_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vreplvei.h(<8 x i16> [[TMP0]], i32 1) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v8i16 vreplvei_h(v8i16 _1) { return __lsx_vreplvei_h(_1, 1); } +-// CHECK-LABEL: @vreplvei_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vreplvei.w(<4 x i32> [[TMP0]], i32 1) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v4i32 vreplvei_w(v4i32 _1) { return __lsx_vreplvei_w(_1, 1); } +-// CHECK-LABEL: @vreplvei_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vreplvei.d(<2 x i64> [[TMP0]], i32 1) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v2i64 vreplvei_d(v2i64 _1) { return __lsx_vreplvei_d(_1, 1); } +-// CHECK-LABEL: @vpickev_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vpickev.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v16i8 vpickev_b(v16i8 _1, v16i8 _2) { return __lsx_vpickev_b(_1, _2); } +-// CHECK-LABEL: @vpickev_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vpickev.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v8i16 vpickev_h(v8i16 _1, v8i16 _2) { return __lsx_vpickev_h(_1, _2); } +-// CHECK-LABEL: @vpickev_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vpickev.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v4i32 vpickev_w(v4i32 _1, v4i32 _2) { return __lsx_vpickev_w(_1, _2); } +-// CHECK-LABEL: @vpickev_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vpickev.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v2i64 vpickev_d(v2i64 _1, v2i64 _2) { return __lsx_vpickev_d(_1, _2); } +-// CHECK-LABEL: @vpickod_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vpickod.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v16i8 vpickod_b(v16i8 _1, v16i8 _2) { return __lsx_vpickod_b(_1, _2); } +-// CHECK-LABEL: @vpickod_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vpickod.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v8i16 vpickod_h(v8i16 _1, v8i16 _2) { return __lsx_vpickod_h(_1, _2); } +-// CHECK-LABEL: @vpickod_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vpickod.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v4i32 vpickod_w(v4i32 _1, v4i32 _2) { return __lsx_vpickod_w(_1, _2); } +-// CHECK-LABEL: @vpickod_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vpickod.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v2i64 vpickod_d(v2i64 _1, v2i64 _2) { return __lsx_vpickod_d(_1, _2); } +-// CHECK-LABEL: @vilvh_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vilvh.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v16i8 vilvh_b(v16i8 _1, v16i8 _2) { return __lsx_vilvh_b(_1, _2); } +-// CHECK-LABEL: @vilvh_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vilvh.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v8i16 vilvh_h(v8i16 _1, v8i16 _2) { return __lsx_vilvh_h(_1, _2); } +-// CHECK-LABEL: @vilvh_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vilvh.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v4i32 vilvh_w(v4i32 _1, v4i32 _2) { return __lsx_vilvh_w(_1, _2); } +-// CHECK-LABEL: @vilvh_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vilvh.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v2i64 vilvh_d(v2i64 _1, v2i64 _2) { return __lsx_vilvh_d(_1, _2); } +-// CHECK-LABEL: @vilvl_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vilvl.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v16i8 vilvl_b(v16i8 _1, v16i8 _2) { return __lsx_vilvl_b(_1, _2); } +-// CHECK-LABEL: @vilvl_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vilvl.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v8i16 vilvl_h(v8i16 _1, v8i16 _2) { return __lsx_vilvl_h(_1, _2); } +-// CHECK-LABEL: @vilvl_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vilvl.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v4i32 vilvl_w(v4i32 _1, v4i32 _2) { return __lsx_vilvl_w(_1, _2); } +-// CHECK-LABEL: @vilvl_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vilvl.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v2i64 vilvl_d(v2i64 _1, v2i64 _2) { return __lsx_vilvl_d(_1, _2); } +-// CHECK-LABEL: @vpackev_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vpackev.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v16i8 vpackev_b(v16i8 _1, v16i8 _2) { return __lsx_vpackev_b(_1, _2); } +-// CHECK-LABEL: @vpackev_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vpackev.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v8i16 vpackev_h(v8i16 _1, v8i16 _2) { return __lsx_vpackev_h(_1, _2); } +-// CHECK-LABEL: @vpackev_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vpackev.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v4i32 vpackev_w(v4i32 _1, v4i32 _2) { return __lsx_vpackev_w(_1, _2); } +-// CHECK-LABEL: @vpackev_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vpackev.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v2i64 vpackev_d(v2i64 _1, v2i64 _2) { return __lsx_vpackev_d(_1, _2); } +-// CHECK-LABEL: @vpackod_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vpackod.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v16i8 vpackod_b(v16i8 _1, v16i8 _2) { return __lsx_vpackod_b(_1, _2); } +-// CHECK-LABEL: @vpackod_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vpackod.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v8i16 vpackod_h(v8i16 _1, v8i16 _2) { return __lsx_vpackod_h(_1, _2); } +-// CHECK-LABEL: @vpackod_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vpackod.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v4i32 vpackod_w(v4i32 _1, v4i32 _2) { return __lsx_vpackod_w(_1, _2); } +-// CHECK-LABEL: @vpackod_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vpackod.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v2i64 vpackod_d(v2i64 _1, v2i64 _2) { return __lsx_vpackod_d(_1, _2); } +-// CHECK-LABEL: @vshuf_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vshuf.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]], <8 x i16> [[TMP2]]) +-// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to i128 +-// CHECK-NEXT: ret i128 [[TMP4]] +-// +-v8i16 vshuf_h(v8i16 _1, v8i16 _2, v8i16 _3) { +- return __lsx_vshuf_h(_1, _2, _3); +-} +-// CHECK-LABEL: @vshuf_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vshuf.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]], <4 x i32> [[TMP2]]) +-// CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to i128 +-// CHECK-NEXT: ret i128 [[TMP4]] +-// +-v4i32 vshuf_w(v4i32 _1, v4i32 _2, v4i32 _3) { +- return __lsx_vshuf_w(_1, _2, _3); +-} +-// CHECK-LABEL: @vshuf_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP3:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vshuf.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], <2 x i64> [[TMP2]]) +-// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to i128 +-// CHECK-NEXT: ret i128 [[TMP4]] +-// +-v2i64 vshuf_d(v2i64 _1, v2i64 _2, v2i64 _3) { +- return __lsx_vshuf_d(_1, _2, _3); +-} +-// CHECK-LABEL: @vand_v( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vand.v(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v16u8 vand_v(v16u8 _1, v16u8 _2) { return __lsx_vand_v(_1, _2); } +-// CHECK-LABEL: @vandi_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vandi.b(<16 x i8> [[TMP0]], i32 1) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v16u8 vandi_b(v16u8 _1) { return __lsx_vandi_b(_1, 1); } +-// CHECK-LABEL: @vor_v( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vor.v(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v16u8 vor_v(v16u8 _1, v16u8 _2) { return __lsx_vor_v(_1, _2); } +-// CHECK-LABEL: @vori_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vori.b(<16 x i8> [[TMP0]], i32 1) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v16u8 vori_b(v16u8 _1) { return __lsx_vori_b(_1, 1); } +-// CHECK-LABEL: @vnor_v( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vnor.v(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v16u8 vnor_v(v16u8 _1, v16u8 _2) { return __lsx_vnor_v(_1, _2); } +-// CHECK-LABEL: @vnori_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vnori.b(<16 x i8> [[TMP0]], i32 1) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v16u8 vnori_b(v16u8 _1) { return __lsx_vnori_b(_1, 1); } +-// CHECK-LABEL: @vxor_v( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vxor.v(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v16u8 vxor_v(v16u8 _1, v16u8 _2) { return __lsx_vxor_v(_1, _2); } +-// CHECK-LABEL: @vxori_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vxori.b(<16 x i8> [[TMP0]], i32 1) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v16u8 vxori_b(v16u8 _1) { return __lsx_vxori_b(_1, 1); } +-// CHECK-LABEL: @vbitsel_v( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP3:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vbitsel.v(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]]) +-// CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP3]] to i128 +-// CHECK-NEXT: ret i128 [[TMP4]] +-// +-v16u8 vbitsel_v(v16u8 _1, v16u8 _2, v16u8 _3) { +- return __lsx_vbitsel_v(_1, _2, _3); +-} +-// CHECK-LABEL: @vbitseli_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vbitseli.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], i32 1) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v16u8 vbitseli_b(v16u8 _1, v16u8 _2) { return __lsx_vbitseli_b(_1, _2, 1); } +-// CHECK-LABEL: @vshuf4i_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vshuf4i.b(<16 x i8> [[TMP0]], i32 1) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v16i8 vshuf4i_b(v16i8 _1) { return __lsx_vshuf4i_b(_1, 1); } +-// CHECK-LABEL: @vshuf4i_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vshuf4i.h(<8 x i16> [[TMP0]], i32 1) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v8i16 vshuf4i_h(v8i16 _1) { return __lsx_vshuf4i_h(_1, 1); } +-// CHECK-LABEL: @vshuf4i_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vshuf4i.w(<4 x i32> [[TMP0]], i32 1) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v4i32 vshuf4i_w(v4i32 _1) { return __lsx_vshuf4i_w(_1, 1); } +-// CHECK-LABEL: @vreplgr2vr_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vreplgr2vr.b(i32 [[_1:%.*]]) +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to i128 +-// CHECK-NEXT: ret i128 [[TMP1]] +-// +-v16i8 vreplgr2vr_b(int _1) { return __lsx_vreplgr2vr_b(_1); } +-// CHECK-LABEL: @vreplgr2vr_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vreplgr2vr.h(i32 [[_1:%.*]]) +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[TMP0]] to i128 +-// CHECK-NEXT: ret i128 [[TMP1]] +-// +-v8i16 vreplgr2vr_h(int _1) { return __lsx_vreplgr2vr_h(_1); } +-// CHECK-LABEL: @vreplgr2vr_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vreplgr2vr.w(i32 [[_1:%.*]]) +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[TMP0]] to i128 +-// CHECK-NEXT: ret i128 [[TMP1]] +-// +-v4i32 vreplgr2vr_w(int _1) { return __lsx_vreplgr2vr_w(_1); } +-// CHECK-LABEL: @vreplgr2vr_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vreplgr2vr.d(i64 [[_1:%.*]]) +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[TMP0]] to i128 +-// CHECK-NEXT: ret i128 [[TMP1]] +-// +-v2i64 vreplgr2vr_d(long _1) { return __lsx_vreplgr2vr_d(_1); } +-// CHECK-LABEL: @vpcnt_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vpcnt.b(<16 x i8> [[TMP0]]) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v16i8 vpcnt_b(v16i8 _1) { return __lsx_vpcnt_b(_1); } +-// CHECK-LABEL: @vpcnt_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vpcnt.h(<8 x i16> [[TMP0]]) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v8i16 vpcnt_h(v8i16 _1) { return __lsx_vpcnt_h(_1); } +-// CHECK-LABEL: @vpcnt_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vpcnt.w(<4 x i32> [[TMP0]]) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v4i32 vpcnt_w(v4i32 _1) { return __lsx_vpcnt_w(_1); } +-// CHECK-LABEL: @vpcnt_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vpcnt.d(<2 x i64> [[TMP0]]) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v2i64 vpcnt_d(v2i64 _1) { return __lsx_vpcnt_d(_1); } +-// CHECK-LABEL: @vclo_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vclo.b(<16 x i8> [[TMP0]]) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v16i8 vclo_b(v16i8 _1) { return __lsx_vclo_b(_1); } +-// CHECK-LABEL: @vclo_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vclo.h(<8 x i16> [[TMP0]]) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v8i16 vclo_h(v8i16 _1) { return __lsx_vclo_h(_1); } +-// CHECK-LABEL: @vclo_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vclo.w(<4 x i32> [[TMP0]]) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v4i32 vclo_w(v4i32 _1) { return __lsx_vclo_w(_1); } +-// CHECK-LABEL: @vclo_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vclo.d(<2 x i64> [[TMP0]]) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v2i64 vclo_d(v2i64 _1) { return __lsx_vclo_d(_1); } +-// CHECK-LABEL: @vclz_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vclz.b(<16 x i8> [[TMP0]]) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v16i8 vclz_b(v16i8 _1) { return __lsx_vclz_b(_1); } +-// CHECK-LABEL: @vclz_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vclz.h(<8 x i16> [[TMP0]]) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v8i16 vclz_h(v8i16 _1) { return __lsx_vclz_h(_1); } +-// CHECK-LABEL: @vclz_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vclz.w(<4 x i32> [[TMP0]]) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v4i32 vclz_w(v4i32 _1) { return __lsx_vclz_w(_1); } +-// CHECK-LABEL: @vclz_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vclz.d(<2 x i64> [[TMP0]]) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v2i64 vclz_d(v2i64 _1) { return __lsx_vclz_d(_1); } +-// CHECK-LABEL: @vpickve2gr_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lsx.vpickve2gr.b(<16 x i8> [[TMP0]], i32 1) +-// CHECK-NEXT: ret i32 [[TMP1]] +-// +-int vpickve2gr_b(v16i8 _1) { return __lsx_vpickve2gr_b(_1, 1); } +-// CHECK-LABEL: @vpickve2gr_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lsx.vpickve2gr.h(<8 x i16> [[TMP0]], i32 1) +-// CHECK-NEXT: ret i32 [[TMP1]] +-// +-int vpickve2gr_h(v8i16 _1) { return __lsx_vpickve2gr_h(_1, 1); } +-// CHECK-LABEL: @vpickve2gr_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lsx.vpickve2gr.w(<4 x i32> [[TMP0]], i32 1) +-// CHECK-NEXT: ret i32 [[TMP1]] +-// +-int vpickve2gr_w(v4i32 _1) { return __lsx_vpickve2gr_w(_1, 1); } +-// CHECK-LABEL: @vpickve2gr_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.loongarch.lsx.vpickve2gr.d(<2 x i64> [[TMP0]], i32 1) +-// CHECK-NEXT: ret i64 [[TMP1]] +-// +-long vpickve2gr_d(v2i64 _1) { return __lsx_vpickve2gr_d(_1, 1); } +-// CHECK-LABEL: @vpickve2gr_bu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lsx.vpickve2gr.bu(<16 x i8> [[TMP0]], i32 1) +-// CHECK-NEXT: ret i32 [[TMP1]] +-// +-unsigned int vpickve2gr_bu(v16i8 _1) { return __lsx_vpickve2gr_bu(_1, 1); } +-// CHECK-LABEL: @vpickve2gr_hu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lsx.vpickve2gr.hu(<8 x i16> [[TMP0]], i32 1) +-// CHECK-NEXT: ret i32 [[TMP1]] +-// +-unsigned int vpickve2gr_hu(v8i16 _1) { return __lsx_vpickve2gr_hu(_1, 1); } +-// CHECK-LABEL: @vpickve2gr_wu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lsx.vpickve2gr.wu(<4 x i32> [[TMP0]], i32 1) +-// CHECK-NEXT: ret i32 [[TMP1]] +-// +-unsigned int vpickve2gr_wu(v4i32 _1) { return __lsx_vpickve2gr_wu(_1, 1); } +-// CHECK-LABEL: @vpickve2gr_du( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.loongarch.lsx.vpickve2gr.du(<2 x i64> [[TMP0]], i32 1) +-// CHECK-NEXT: ret i64 [[TMP1]] +-// +-unsigned long int vpickve2gr_du(v2i64 _1) { return __lsx_vpickve2gr_du(_1, 1); } +-// CHECK-LABEL: @vinsgr2vr_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vinsgr2vr.b(<16 x i8> [[TMP0]], i32 1, i32 1) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v16i8 vinsgr2vr_b(v16i8 _1) { return __lsx_vinsgr2vr_b(_1, 1, 1); } +-// CHECK-LABEL: @vinsgr2vr_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vinsgr2vr.h(<8 x i16> [[TMP0]], i32 1, i32 1) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v8i16 vinsgr2vr_h(v8i16 _1) { return __lsx_vinsgr2vr_h(_1, 1, 1); } +-// CHECK-LABEL: @vinsgr2vr_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vinsgr2vr.w(<4 x i32> [[TMP0]], i32 1, i32 1) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v4i32 vinsgr2vr_w(v4i32 _1) { return __lsx_vinsgr2vr_w(_1, 1, 1); } +-// CHECK-LABEL: @vinsgr2vr_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vinsgr2vr.d(<2 x i64> [[TMP0]], i64 1, i32 1) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v2i64 vinsgr2vr_d(v2i64 _1) { return __lsx_vinsgr2vr_d(_1, 1, 1); } +-// CHECK-LABEL: @vfadd_s( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfadd.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x float> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v4f32 vfadd_s(v4f32 _1, v4f32 _2) { return __lsx_vfadd_s(_1, _2); } +-// CHECK-LABEL: @vfadd_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfadd.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x double> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v2f64 vfadd_d(v2f64 _1, v2f64 _2) { return __lsx_vfadd_d(_1, _2); } +-// CHECK-LABEL: @vfsub_s( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfsub.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x float> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v4f32 vfsub_s(v4f32 _1, v4f32 _2) { return __lsx_vfsub_s(_1, _2); } +-// CHECK-LABEL: @vfsub_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfsub.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x double> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v2f64 vfsub_d(v2f64 _1, v2f64 _2) { return __lsx_vfsub_d(_1, _2); } +-// CHECK-LABEL: @vfmul_s( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfmul.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x float> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v4f32 vfmul_s(v4f32 _1, v4f32 _2) { return __lsx_vfmul_s(_1, _2); } +-// CHECK-LABEL: @vfmul_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfmul.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x double> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v2f64 vfmul_d(v2f64 _1, v2f64 _2) { return __lsx_vfmul_d(_1, _2); } +-// CHECK-LABEL: @vfdiv_s( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfdiv.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x float> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v4f32 vfdiv_s(v4f32 _1, v4f32 _2) { return __lsx_vfdiv_s(_1, _2); } +-// CHECK-LABEL: @vfdiv_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfdiv.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x double> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v2f64 vfdiv_d(v2f64 _1, v2f64 _2) { return __lsx_vfdiv_d(_1, _2); } +-// CHECK-LABEL: @vfcvt_h_s( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vfcvt.h.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v8i16 vfcvt_h_s(v4f32 _1, v4f32 _2) { return __lsx_vfcvt_h_s(_1, _2); } +-// CHECK-LABEL: @vfcvt_s_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfcvt.s.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x float> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v4f32 vfcvt_s_d(v2f64 _1, v2f64 _2) { return __lsx_vfcvt_s_d(_1, _2); } +-// CHECK-LABEL: @vfmin_s( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfmin.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x float> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v4f32 vfmin_s(v4f32 _1, v4f32 _2) { return __lsx_vfmin_s(_1, _2); } +-// CHECK-LABEL: @vfmin_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfmin.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x double> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v2f64 vfmin_d(v2f64 _1, v2f64 _2) { return __lsx_vfmin_d(_1, _2); } +-// CHECK-LABEL: @vfmina_s( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfmina.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x float> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v4f32 vfmina_s(v4f32 _1, v4f32 _2) { return __lsx_vfmina_s(_1, _2); } +-// CHECK-LABEL: @vfmina_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfmina.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x double> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v2f64 vfmina_d(v2f64 _1, v2f64 _2) { return __lsx_vfmina_d(_1, _2); } +-// CHECK-LABEL: @vfmax_s( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfmax.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x float> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v4f32 vfmax_s(v4f32 _1, v4f32 _2) { return __lsx_vfmax_s(_1, _2); } +-// CHECK-LABEL: @vfmax_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfmax.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x double> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v2f64 vfmax_d(v2f64 _1, v2f64 _2) { return __lsx_vfmax_d(_1, _2); } +-// CHECK-LABEL: @vfmaxa_s( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfmaxa.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x float> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v4f32 vfmaxa_s(v4f32 _1, v4f32 _2) { return __lsx_vfmaxa_s(_1, _2); } +-// CHECK-LABEL: @vfmaxa_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfmaxa.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x double> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v2f64 vfmaxa_d(v2f64 _1, v2f64 _2) { return __lsx_vfmaxa_d(_1, _2); } +-// CHECK-LABEL: @vfclass_s( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfclass.s(<4 x float> [[TMP0]]) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v4i32 vfclass_s(v4f32 _1) { return __lsx_vfclass_s(_1); } +-// CHECK-LABEL: @vfclass_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfclass.d(<2 x double> [[TMP0]]) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v2i64 vfclass_d(v2f64 _1) { return __lsx_vfclass_d(_1); } +-// CHECK-LABEL: @vfsqrt_s( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfsqrt.s(<4 x float> [[TMP0]]) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v4f32 vfsqrt_s(v4f32 _1) { return __lsx_vfsqrt_s(_1); } +-// CHECK-LABEL: @vfsqrt_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfsqrt.d(<2 x double> [[TMP0]]) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v2f64 vfsqrt_d(v2f64 _1) { return __lsx_vfsqrt_d(_1); } +-// CHECK-LABEL: @vfrecip_s( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfrecip.s(<4 x float> [[TMP0]]) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v4f32 vfrecip_s(v4f32 _1) { return __lsx_vfrecip_s(_1); } +-// CHECK-LABEL: @vfrecip_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfrecip.d(<2 x double> [[TMP0]]) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v2f64 vfrecip_d(v2f64 _1) { return __lsx_vfrecip_d(_1); } +-// CHECK-LABEL: @vfrint_s( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfrint.s(<4 x float> [[TMP0]]) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v4f32 vfrint_s(v4f32 _1) { return __lsx_vfrint_s(_1); } +-// CHECK-LABEL: @vfrint_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfrint.d(<2 x double> [[TMP0]]) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v2f64 vfrint_d(v2f64 _1) { return __lsx_vfrint_d(_1); } +-// CHECK-LABEL: @vfrsqrt_s( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfrsqrt.s(<4 x float> [[TMP0]]) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v4f32 vfrsqrt_s(v4f32 _1) { return __lsx_vfrsqrt_s(_1); } +-// CHECK-LABEL: @vfrsqrt_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfrsqrt.d(<2 x double> [[TMP0]]) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v2f64 vfrsqrt_d(v2f64 _1) { return __lsx_vfrsqrt_d(_1); } +-// CHECK-LABEL: @vflogb_s( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vflogb.s(<4 x float> [[TMP0]]) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v4f32 vflogb_s(v4f32 _1) { return __lsx_vflogb_s(_1); } +-// CHECK-LABEL: @vflogb_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vflogb.d(<2 x double> [[TMP0]]) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v2f64 vflogb_d(v2f64 _1) { return __lsx_vflogb_d(_1); } +-// CHECK-LABEL: @vfcvth_s_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfcvth.s.h(<8 x i16> [[TMP0]]) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v4f32 vfcvth_s_h(v8i16 _1) { return __lsx_vfcvth_s_h(_1); } +-// CHECK-LABEL: @vfcvth_d_s( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfcvth.d.s(<4 x float> [[TMP0]]) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v2f64 vfcvth_d_s(v4f32 _1) { return __lsx_vfcvth_d_s(_1); } +-// CHECK-LABEL: @vfcvtl_s_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfcvtl.s.h(<8 x i16> [[TMP0]]) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v4f32 vfcvtl_s_h(v8i16 _1) { return __lsx_vfcvtl_s_h(_1); } +-// CHECK-LABEL: @vfcvtl_d_s( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfcvtl.d.s(<4 x float> [[TMP0]]) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v2f64 vfcvtl_d_s(v4f32 _1) { return __lsx_vfcvtl_d_s(_1); } +-// CHECK-LABEL: @vftint_w_s( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftint.w.s(<4 x float> [[TMP0]]) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v4i32 vftint_w_s(v4f32 _1) { return __lsx_vftint_w_s(_1); } +-// CHECK-LABEL: @vftint_l_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftint.l.d(<2 x double> [[TMP0]]) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v2i64 vftint_l_d(v2f64 _1) { return __lsx_vftint_l_d(_1); } +-// CHECK-LABEL: @vftint_wu_s( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftint.wu.s(<4 x float> [[TMP0]]) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v4u32 vftint_wu_s(v4f32 _1) { return __lsx_vftint_wu_s(_1); } +-// CHECK-LABEL: @vftint_lu_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftint.lu.d(<2 x double> [[TMP0]]) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v2u64 vftint_lu_d(v2f64 _1) { return __lsx_vftint_lu_d(_1); } +-// CHECK-LABEL: @vftintrz_w_s( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftintrz.w.s(<4 x float> [[TMP0]]) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v4i32 vftintrz_w_s(v4f32 _1) { return __lsx_vftintrz_w_s(_1); } +-// CHECK-LABEL: @vftintrz_l_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrz.l.d(<2 x double> [[TMP0]]) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v2i64 vftintrz_l_d(v2f64 _1) { return __lsx_vftintrz_l_d(_1); } +-// CHECK-LABEL: @vftintrz_wu_s( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftintrz.wu.s(<4 x float> [[TMP0]]) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v4u32 vftintrz_wu_s(v4f32 _1) { return __lsx_vftintrz_wu_s(_1); } +-// CHECK-LABEL: @vftintrz_lu_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrz.lu.d(<2 x double> [[TMP0]]) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v2u64 vftintrz_lu_d(v2f64 _1) { return __lsx_vftintrz_lu_d(_1); } +-// CHECK-LABEL: @vffint_s_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vffint.s.w(<4 x i32> [[TMP0]]) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v4f32 vffint_s_w(v4i32 _1) { return __lsx_vffint_s_w(_1); } +-// CHECK-LABEL: @vffint_d_l( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vffint.d.l(<2 x i64> [[TMP0]]) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v2f64 vffint_d_l(v2i64 _1) { return __lsx_vffint_d_l(_1); } +-// CHECK-LABEL: @vffint_s_wu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vffint.s.wu(<4 x i32> [[TMP0]]) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v4f32 vffint_s_wu(v4u32 _1) { return __lsx_vffint_s_wu(_1); } +-// CHECK-LABEL: @vffint_d_lu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vffint.d.lu(<2 x i64> [[TMP0]]) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v2f64 vffint_d_lu(v2u64 _1) { return __lsx_vffint_d_lu(_1); } +-// CHECK-LABEL: @vandn_v( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vandn.v(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v16u8 vandn_v(v16u8 _1, v16u8 _2) { return __lsx_vandn_v(_1, _2); } +-// CHECK-LABEL: @vneg_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vneg.b(<16 x i8> [[TMP0]]) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v16i8 vneg_b(v16i8 _1) { return __lsx_vneg_b(_1); } +-// CHECK-LABEL: @vneg_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vneg.h(<8 x i16> [[TMP0]]) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v8i16 vneg_h(v8i16 _1) { return __lsx_vneg_h(_1); } +-// CHECK-LABEL: @vneg_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vneg.w(<4 x i32> [[TMP0]]) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v4i32 vneg_w(v4i32 _1) { return __lsx_vneg_w(_1); } +-// CHECK-LABEL: @vneg_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vneg.d(<2 x i64> [[TMP0]]) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v2i64 vneg_d(v2i64 _1) { return __lsx_vneg_d(_1); } +-// CHECK-LABEL: @vmuh_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmuh.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v16i8 vmuh_b(v16i8 _1, v16i8 _2) { return __lsx_vmuh_b(_1, _2); } +-// CHECK-LABEL: @vmuh_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmuh.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v8i16 vmuh_h(v8i16 _1, v8i16 _2) { return __lsx_vmuh_h(_1, _2); } +-// CHECK-LABEL: @vmuh_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmuh.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v4i32 vmuh_w(v4i32 _1, v4i32 _2) { return __lsx_vmuh_w(_1, _2); } +-// CHECK-LABEL: @vmuh_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmuh.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v2i64 vmuh_d(v2i64 _1, v2i64 _2) { return __lsx_vmuh_d(_1, _2); } +-// CHECK-LABEL: @vmuh_bu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmuh.bu(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v16u8 vmuh_bu(v16u8 _1, v16u8 _2) { return __lsx_vmuh_bu(_1, _2); } +-// CHECK-LABEL: @vmuh_hu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmuh.hu(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v8u16 vmuh_hu(v8u16 _1, v8u16 _2) { return __lsx_vmuh_hu(_1, _2); } +-// CHECK-LABEL: @vmuh_wu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmuh.wu(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v4u32 vmuh_wu(v4u32 _1, v4u32 _2) { return __lsx_vmuh_wu(_1, _2); } +-// CHECK-LABEL: @vmuh_du( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmuh.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v2u64 vmuh_du(v2u64 _1, v2u64 _2) { return __lsx_vmuh_du(_1, _2); } +-// CHECK-LABEL: @vsllwil_h_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsllwil.h.b(<16 x i8> [[TMP0]], i32 1) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v8i16 vsllwil_h_b(v16i8 _1) { return __lsx_vsllwil_h_b(_1, 1); } +-// CHECK-LABEL: @vsllwil_w_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsllwil.w.h(<8 x i16> [[TMP0]], i32 1) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v4i32 vsllwil_w_h(v8i16 _1) { return __lsx_vsllwil_w_h(_1, 1); } +-// CHECK-LABEL: @vsllwil_d_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsllwil.d.w(<4 x i32> [[TMP0]], i32 1) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v2i64 vsllwil_d_w(v4i32 _1) { return __lsx_vsllwil_d_w(_1, 1); } +-// CHECK-LABEL: @vsllwil_hu_bu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsllwil.hu.bu(<16 x i8> [[TMP0]], i32 1) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v8u16 vsllwil_hu_bu(v16u8 _1) { return __lsx_vsllwil_hu_bu(_1, 1); } +-// CHECK-LABEL: @vsllwil_wu_hu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsllwil.wu.hu(<8 x i16> [[TMP0]], i32 1) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v4u32 vsllwil_wu_hu(v8u16 _1) { return __lsx_vsllwil_wu_hu(_1, 1); } +-// CHECK-LABEL: @vsllwil_du_wu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsllwil.du.wu(<4 x i32> [[TMP0]], i32 1) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v2u64 vsllwil_du_wu(v4u32 _1) { return __lsx_vsllwil_du_wu(_1, 1); } +-// CHECK-LABEL: @vsran_b_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsran.b.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v16i8 vsran_b_h(v8i16 _1, v8i16 _2) { return __lsx_vsran_b_h(_1, _2); } +-// CHECK-LABEL: @vsran_h_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsran.h.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v8i16 vsran_h_w(v4i32 _1, v4i32 _2) { return __lsx_vsran_h_w(_1, _2); } +-// CHECK-LABEL: @vsran_w_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsran.w.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v4i32 vsran_w_d(v2i64 _1, v2i64 _2) { return __lsx_vsran_w_d(_1, _2); } +-// CHECK-LABEL: @vssran_b_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssran.b.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v16i8 vssran_b_h(v8i16 _1, v8i16 _2) { return __lsx_vssran_b_h(_1, _2); } +-// CHECK-LABEL: @vssran_h_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssran.h.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v8i16 vssran_h_w(v4i32 _1, v4i32 _2) { return __lsx_vssran_h_w(_1, _2); } +-// CHECK-LABEL: @vssran_w_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssran.w.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v4i32 vssran_w_d(v2i64 _1, v2i64 _2) { return __lsx_vssran_w_d(_1, _2); } +-// CHECK-LABEL: @vssran_bu_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssran.bu.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v16u8 vssran_bu_h(v8u16 _1, v8u16 _2) { return __lsx_vssran_bu_h(_1, _2); } +-// CHECK-LABEL: @vssran_hu_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssran.hu.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v8u16 vssran_hu_w(v4u32 _1, v4u32 _2) { return __lsx_vssran_hu_w(_1, _2); } +-// CHECK-LABEL: @vssran_wu_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssran.wu.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v4u32 vssran_wu_d(v2u64 _1, v2u64 _2) { return __lsx_vssran_wu_d(_1, _2); } +-// CHECK-LABEL: @vsrarn_b_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrarn.b.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v16i8 vsrarn_b_h(v8i16 _1, v8i16 _2) { return __lsx_vsrarn_b_h(_1, _2); } +-// CHECK-LABEL: @vsrarn_h_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrarn.h.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v8i16 vsrarn_h_w(v4i32 _1, v4i32 _2) { return __lsx_vsrarn_h_w(_1, _2); } +-// CHECK-LABEL: @vsrarn_w_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrarn.w.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v4i32 vsrarn_w_d(v2i64 _1, v2i64 _2) { return __lsx_vsrarn_w_d(_1, _2); } +-// CHECK-LABEL: @vssrarn_b_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrarn.b.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v16i8 vssrarn_b_h(v8i16 _1, v8i16 _2) { return __lsx_vssrarn_b_h(_1, _2); } +-// CHECK-LABEL: @vssrarn_h_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrarn.h.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v8i16 vssrarn_h_w(v4i32 _1, v4i32 _2) { return __lsx_vssrarn_h_w(_1, _2); } +-// CHECK-LABEL: @vssrarn_w_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrarn.w.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v4i32 vssrarn_w_d(v2i64 _1, v2i64 _2) { return __lsx_vssrarn_w_d(_1, _2); } +-// CHECK-LABEL: @vssrarn_bu_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrarn.bu.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v16u8 vssrarn_bu_h(v8u16 _1, v8u16 _2) { return __lsx_vssrarn_bu_h(_1, _2); } +-// CHECK-LABEL: @vssrarn_hu_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrarn.hu.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v8u16 vssrarn_hu_w(v4u32 _1, v4u32 _2) { return __lsx_vssrarn_hu_w(_1, _2); } +-// CHECK-LABEL: @vssrarn_wu_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrarn.wu.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v4u32 vssrarn_wu_d(v2u64 _1, v2u64 _2) { return __lsx_vssrarn_wu_d(_1, _2); } +-// CHECK-LABEL: @vsrln_b_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrln.b.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v16i8 vsrln_b_h(v8i16 _1, v8i16 _2) { return __lsx_vsrln_b_h(_1, _2); } +-// CHECK-LABEL: @vsrln_h_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrln.h.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v8i16 vsrln_h_w(v4i32 _1, v4i32 _2) { return __lsx_vsrln_h_w(_1, _2); } +-// CHECK-LABEL: @vsrln_w_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrln.w.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v4i32 vsrln_w_d(v2i64 _1, v2i64 _2) { return __lsx_vsrln_w_d(_1, _2); } +-// CHECK-LABEL: @vssrln_bu_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrln.bu.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v16u8 vssrln_bu_h(v8u16 _1, v8u16 _2) { return __lsx_vssrln_bu_h(_1, _2); } +-// CHECK-LABEL: @vssrln_hu_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrln.hu.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v8u16 vssrln_hu_w(v4u32 _1, v4u32 _2) { return __lsx_vssrln_hu_w(_1, _2); } +-// CHECK-LABEL: @vssrln_wu_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrln.wu.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v4u32 vssrln_wu_d(v2u64 _1, v2u64 _2) { return __lsx_vssrln_wu_d(_1, _2); } +-// CHECK-LABEL: @vsrlrn_b_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrlrn.b.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v16i8 vsrlrn_b_h(v8i16 _1, v8i16 _2) { return __lsx_vsrlrn_b_h(_1, _2); } +-// CHECK-LABEL: @vsrlrn_h_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrlrn.h.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v8i16 vsrlrn_h_w(v4i32 _1, v4i32 _2) { return __lsx_vsrlrn_h_w(_1, _2); } +-// CHECK-LABEL: @vsrlrn_w_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrlrn.w.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v4i32 vsrlrn_w_d(v2i64 _1, v2i64 _2) { return __lsx_vsrlrn_w_d(_1, _2); } +-// CHECK-LABEL: @vssrlrn_bu_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrlrn.bu.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v16u8 vssrlrn_bu_h(v8u16 _1, v8u16 _2) { return __lsx_vssrlrn_bu_h(_1, _2); } +-// CHECK-LABEL: @vssrlrn_hu_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrlrn.hu.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v8u16 vssrlrn_hu_w(v4u32 _1, v4u32 _2) { return __lsx_vssrlrn_hu_w(_1, _2); } +-// CHECK-LABEL: @vssrlrn_wu_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrlrn.wu.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v4u32 vssrlrn_wu_d(v2u64 _1, v2u64 _2) { return __lsx_vssrlrn_wu_d(_1, _2); } +-// CHECK-LABEL: @vfrstpi_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vfrstpi.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], i32 1) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v16i8 vfrstpi_b(v16i8 _1, v16i8 _2) { return __lsx_vfrstpi_b(_1, _2, 1); } +-// CHECK-LABEL: @vfrstpi_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vfrstpi.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]], i32 1) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v8i16 vfrstpi_h(v8i16 _1, v8i16 _2) { return __lsx_vfrstpi_h(_1, _2, 1); } +-// CHECK-LABEL: @vfrstp_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP3:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vfrstp.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]]) +-// CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP3]] to i128 +-// CHECK-NEXT: ret i128 [[TMP4]] +-// +-v16i8 vfrstp_b(v16i8 _1, v16i8 _2, v16i8 _3) { +- return __lsx_vfrstp_b(_1, _2, _3); +-} +-// CHECK-LABEL: @vfrstp_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vfrstp.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]], <8 x i16> [[TMP2]]) +-// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to i128 +-// CHECK-NEXT: ret i128 [[TMP4]] +-// +-v8i16 vfrstp_h(v8i16 _1, v8i16 _2, v8i16 _3) { +- return __lsx_vfrstp_h(_1, _2, _3); +-} +-// CHECK-LABEL: @vshuf4i_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vshuf4i.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], i32 1) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v2i64 vshuf4i_d(v2i64 _1, v2i64 _2) { return __lsx_vshuf4i_d(_1, _2, 1); } +-// CHECK-LABEL: @vbsrl_v( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vbsrl.v(<16 x i8> [[TMP0]], i32 1) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v16i8 vbsrl_v(v16i8 _1) { return __lsx_vbsrl_v(_1, 1); } +-// CHECK-LABEL: @vbsll_v( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vbsll.v(<16 x i8> [[TMP0]], i32 1) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v16i8 vbsll_v(v16i8 _1) { return __lsx_vbsll_v(_1, 1); } +-// CHECK-LABEL: @vextrins_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vextrins.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], i32 1) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v16i8 vextrins_b(v16i8 _1, v16i8 _2) { return __lsx_vextrins_b(_1, _2, 1); } +-// CHECK-LABEL: @vextrins_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vextrins.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]], i32 1) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v8i16 vextrins_h(v8i16 _1, v8i16 _2) { return __lsx_vextrins_h(_1, _2, 1); } +-// CHECK-LABEL: @vextrins_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vextrins.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]], i32 1) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v4i32 vextrins_w(v4i32 _1, v4i32 _2) { return __lsx_vextrins_w(_1, _2, 1); } +-// CHECK-LABEL: @vextrins_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vextrins.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], i32 1) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v2i64 vextrins_d(v2i64 _1, v2i64 _2) { return __lsx_vextrins_d(_1, _2, 1); } +-// CHECK-LABEL: @vmskltz_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmskltz.b(<16 x i8> [[TMP0]]) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v16i8 vmskltz_b(v16i8 _1) { return __lsx_vmskltz_b(_1); } +-// CHECK-LABEL: @vmskltz_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmskltz.h(<8 x i16> [[TMP0]]) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v8i16 vmskltz_h(v8i16 _1) { return __lsx_vmskltz_h(_1); } +-// CHECK-LABEL: @vmskltz_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmskltz.w(<4 x i32> [[TMP0]]) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v4i32 vmskltz_w(v4i32 _1) { return __lsx_vmskltz_w(_1); } +-// CHECK-LABEL: @vmskltz_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmskltz.d(<2 x i64> [[TMP0]]) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v2i64 vmskltz_d(v2i64 _1) { return __lsx_vmskltz_d(_1); } +-// CHECK-LABEL: @vsigncov_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsigncov.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v16i8 vsigncov_b(v16i8 _1, v16i8 _2) { return __lsx_vsigncov_b(_1, _2); } +-// CHECK-LABEL: @vsigncov_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsigncov.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v8i16 vsigncov_h(v8i16 _1, v8i16 _2) { return __lsx_vsigncov_h(_1, _2); } +-// CHECK-LABEL: @vsigncov_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsigncov.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v4i32 vsigncov_w(v4i32 _1, v4i32 _2) { return __lsx_vsigncov_w(_1, _2); } +-// CHECK-LABEL: @vsigncov_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsigncov.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v2i64 vsigncov_d(v2i64 _1, v2i64 _2) { return __lsx_vsigncov_d(_1, _2); } +-// CHECK-LABEL: @vfmadd_s( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <4 x float> +-// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfmadd.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]], <4 x float> [[TMP2]]) +-// CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x float> [[TMP3]] to i128 +-// CHECK-NEXT: ret i128 [[TMP4]] +-// +-v4f32 vfmadd_s(v4f32 _1, v4f32 _2, v4f32 _3) { +- return __lsx_vfmadd_s(_1, _2, _3); +-} +-// CHECK-LABEL: @vfmadd_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <2 x double> +-// CHECK-NEXT: [[TMP3:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfmadd.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]], <2 x double> [[TMP2]]) +-// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x double> [[TMP3]] to i128 +-// CHECK-NEXT: ret i128 [[TMP4]] +-// +-v2f64 vfmadd_d(v2f64 _1, v2f64 _2, v2f64 _3) { +- return __lsx_vfmadd_d(_1, _2, _3); +-} +-// CHECK-LABEL: @vfmsub_s( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <4 x float> +-// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfmsub.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]], <4 x float> [[TMP2]]) +-// CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x float> [[TMP3]] to i128 +-// CHECK-NEXT: ret i128 [[TMP4]] +-// +-v4f32 vfmsub_s(v4f32 _1, v4f32 _2, v4f32 _3) { +- return __lsx_vfmsub_s(_1, _2, _3); +-} +-// CHECK-LABEL: @vfmsub_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <2 x double> +-// CHECK-NEXT: [[TMP3:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfmsub.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]], <2 x double> [[TMP2]]) +-// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x double> [[TMP3]] to i128 +-// CHECK-NEXT: ret i128 [[TMP4]] +-// +-v2f64 vfmsub_d(v2f64 _1, v2f64 _2, v2f64 _3) { +- return __lsx_vfmsub_d(_1, _2, _3); +-} +-// CHECK-LABEL: @vfnmadd_s( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <4 x float> +-// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfnmadd.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]], <4 x float> [[TMP2]]) +-// CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x float> [[TMP3]] to i128 +-// CHECK-NEXT: ret i128 [[TMP4]] +-// +-v4f32 vfnmadd_s(v4f32 _1, v4f32 _2, v4f32 _3) { +- return __lsx_vfnmadd_s(_1, _2, _3); +-} +-// CHECK-LABEL: @vfnmadd_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <2 x double> +-// CHECK-NEXT: [[TMP3:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfnmadd.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]], <2 x double> [[TMP2]]) +-// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x double> [[TMP3]] to i128 +-// CHECK-NEXT: ret i128 [[TMP4]] +-// +-v2f64 vfnmadd_d(v2f64 _1, v2f64 _2, v2f64 _3) { +- return __lsx_vfnmadd_d(_1, _2, _3); +-} +-// CHECK-LABEL: @vfnmsub_s( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <4 x float> +-// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfnmsub.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]], <4 x float> [[TMP2]]) +-// CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x float> [[TMP3]] to i128 +-// CHECK-NEXT: ret i128 [[TMP4]] +-// +-v4f32 vfnmsub_s(v4f32 _1, v4f32 _2, v4f32 _3) { +- return __lsx_vfnmsub_s(_1, _2, _3); +-} +-// CHECK-LABEL: @vfnmsub_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <2 x double> +-// CHECK-NEXT: [[TMP3:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfnmsub.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]], <2 x double> [[TMP2]]) +-// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x double> [[TMP3]] to i128 +-// CHECK-NEXT: ret i128 [[TMP4]] +-// +-v2f64 vfnmsub_d(v2f64 _1, v2f64 _2, v2f64 _3) { +- return __lsx_vfnmsub_d(_1, _2, _3); +-} +-// CHECK-LABEL: @vftintrne_w_s( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftintrne.w.s(<4 x float> [[TMP0]]) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v4i32 vftintrne_w_s(v4f32 _1) { return __lsx_vftintrne_w_s(_1); } +-// CHECK-LABEL: @vftintrne_l_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrne.l.d(<2 x double> [[TMP0]]) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v2i64 vftintrne_l_d(v2f64 _1) { return __lsx_vftintrne_l_d(_1); } +-// CHECK-LABEL: @vftintrp_w_s( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftintrp.w.s(<4 x float> [[TMP0]]) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v4i32 vftintrp_w_s(v4f32 _1) { return __lsx_vftintrp_w_s(_1); } +-// CHECK-LABEL: @vftintrp_l_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrp.l.d(<2 x double> [[TMP0]]) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v2i64 vftintrp_l_d(v2f64 _1) { return __lsx_vftintrp_l_d(_1); } +-// CHECK-LABEL: @vftintrm_w_s( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftintrm.w.s(<4 x float> [[TMP0]]) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v4i32 vftintrm_w_s(v4f32 _1) { return __lsx_vftintrm_w_s(_1); } +-// CHECK-LABEL: @vftintrm_l_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrm.l.d(<2 x double> [[TMP0]]) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v2i64 vftintrm_l_d(v2f64 _1) { return __lsx_vftintrm_l_d(_1); } +-// CHECK-LABEL: @vftint_w_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftint.w.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v4i32 vftint_w_d(v2f64 _1, v2f64 _2) { return __lsx_vftint_w_d(_1, _2); } +-// CHECK-LABEL: @vffint_s_l( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vffint.s.l(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x float> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v4f32 vffint_s_l(v2i64 _1, v2i64 _2) { return __lsx_vffint_s_l(_1, _2); } +-// CHECK-LABEL: @vftintrz_w_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftintrz.w.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v4i32 vftintrz_w_d(v2f64 _1, v2f64 _2) { return __lsx_vftintrz_w_d(_1, _2); } +-// CHECK-LABEL: @vftintrp_w_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftintrp.w.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v4i32 vftintrp_w_d(v2f64 _1, v2f64 _2) { return __lsx_vftintrp_w_d(_1, _2); } +-// CHECK-LABEL: @vftintrm_w_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftintrm.w.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v4i32 vftintrm_w_d(v2f64 _1, v2f64 _2) { return __lsx_vftintrm_w_d(_1, _2); } +-// CHECK-LABEL: @vftintrne_w_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftintrne.w.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v4i32 vftintrne_w_d(v2f64 _1, v2f64 _2) { return __lsx_vftintrne_w_d(_1, _2); } +-// CHECK-LABEL: @vftintl_l_s( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintl.l.s(<4 x float> [[TMP0]]) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v2i64 vftintl_l_s(v4f32 _1) { return __lsx_vftintl_l_s(_1); } +-// CHECK-LABEL: @vftinth_l_s( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftinth.l.s(<4 x float> [[TMP0]]) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v2i64 vftinth_l_s(v4f32 _1) { return __lsx_vftinth_l_s(_1); } +-// CHECK-LABEL: @vffinth_d_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vffinth.d.w(<4 x i32> [[TMP0]]) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v2f64 vffinth_d_w(v4i32 _1) { return __lsx_vffinth_d_w(_1); } +-// CHECK-LABEL: @vffintl_d_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vffintl.d.w(<4 x i32> [[TMP0]]) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v2f64 vffintl_d_w(v4i32 _1) { return __lsx_vffintl_d_w(_1); } +-// CHECK-LABEL: @vftintrzl_l_s( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrzl.l.s(<4 x float> [[TMP0]]) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v2i64 vftintrzl_l_s(v4f32 _1) { return __lsx_vftintrzl_l_s(_1); } +-// CHECK-LABEL: @vftintrzh_l_s( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrzh.l.s(<4 x float> [[TMP0]]) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v2i64 vftintrzh_l_s(v4f32 _1) { return __lsx_vftintrzh_l_s(_1); } +-// CHECK-LABEL: @vftintrpl_l_s( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrpl.l.s(<4 x float> [[TMP0]]) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v2i64 vftintrpl_l_s(v4f32 _1) { return __lsx_vftintrpl_l_s(_1); } +-// CHECK-LABEL: @vftintrph_l_s( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrph.l.s(<4 x float> [[TMP0]]) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v2i64 vftintrph_l_s(v4f32 _1) { return __lsx_vftintrph_l_s(_1); } +-// CHECK-LABEL: @vftintrml_l_s( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrml.l.s(<4 x float> [[TMP0]]) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v2i64 vftintrml_l_s(v4f32 _1) { return __lsx_vftintrml_l_s(_1); } +-// CHECK-LABEL: @vftintrmh_l_s( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrmh.l.s(<4 x float> [[TMP0]]) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v2i64 vftintrmh_l_s(v4f32 _1) { return __lsx_vftintrmh_l_s(_1); } +-// CHECK-LABEL: @vftintrnel_l_s( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrnel.l.s(<4 x float> [[TMP0]]) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v2i64 vftintrnel_l_s(v4f32 _1) { return __lsx_vftintrnel_l_s(_1); } +-// CHECK-LABEL: @vftintrneh_l_s( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrneh.l.s(<4 x float> [[TMP0]]) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v2i64 vftintrneh_l_s(v4f32 _1) { return __lsx_vftintrneh_l_s(_1); } +-// CHECK-LABEL: @vfrintrne_s( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfrintrne.s(<4 x float> [[TMP0]]) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v4i32 vfrintrne_s(v4f32 _1) { return __lsx_vfrintrne_s(_1); } +-// CHECK-LABEL: @vfrintrne_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfrintrne.d(<2 x double> [[TMP0]]) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v2i64 vfrintrne_d(v2f64 _1) { return __lsx_vfrintrne_d(_1); } +-// CHECK-LABEL: @vfrintrz_s( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfrintrz.s(<4 x float> [[TMP0]]) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v4i32 vfrintrz_s(v4f32 _1) { return __lsx_vfrintrz_s(_1); } +-// CHECK-LABEL: @vfrintrz_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfrintrz.d(<2 x double> [[TMP0]]) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v2i64 vfrintrz_d(v2f64 _1) { return __lsx_vfrintrz_d(_1); } +-// CHECK-LABEL: @vfrintrp_s( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfrintrp.s(<4 x float> [[TMP0]]) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v4i32 vfrintrp_s(v4f32 _1) { return __lsx_vfrintrp_s(_1); } +-// CHECK-LABEL: @vfrintrp_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfrintrp.d(<2 x double> [[TMP0]]) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v2i64 vfrintrp_d(v2f64 _1) { return __lsx_vfrintrp_d(_1); } +-// CHECK-LABEL: @vfrintrm_s( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfrintrm.s(<4 x float> [[TMP0]]) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v4i32 vfrintrm_s(v4f32 _1) { return __lsx_vfrintrm_s(_1); } +-// CHECK-LABEL: @vfrintrm_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfrintrm.d(<2 x double> [[TMP0]]) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v2i64 vfrintrm_d(v2f64 _1) { return __lsx_vfrintrm_d(_1); } +-// CHECK-LABEL: @vstelm_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: tail call void @llvm.loongarch.lsx.vstelm.b(<16 x i8> [[TMP0]], ptr [[_2:%.*]], i32 1, i32 1) +-// CHECK-NEXT: ret void +-// +-void vstelm_b(v16i8 _1, void *_2) { return __lsx_vstelm_b(_1, _2, 1, 1); } +-// CHECK-LABEL: @vstelm_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: tail call void @llvm.loongarch.lsx.vstelm.h(<8 x i16> [[TMP0]], ptr [[_2:%.*]], i32 2, i32 1) +-// CHECK-NEXT: ret void +-// +-void vstelm_h(v8i16 _1, void *_2) { return __lsx_vstelm_h(_1, _2, 2, 1); } +-// CHECK-LABEL: @vstelm_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: tail call void @llvm.loongarch.lsx.vstelm.w(<4 x i32> [[TMP0]], ptr [[_2:%.*]], i32 4, i32 1) +-// CHECK-NEXT: ret void +-// +-void vstelm_w(v4i32 _1, void *_2) { return __lsx_vstelm_w(_1, _2, 4, 1); } +-// CHECK-LABEL: @vstelm_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: tail call void @llvm.loongarch.lsx.vstelm.d(<2 x i64> [[TMP0]], ptr [[_2:%.*]], i32 8, i32 1) +-// CHECK-NEXT: ret void +-// +-void vstelm_d(v2i64 _1, void *_2) { return __lsx_vstelm_d(_1, _2, 8, 1); } +-// CHECK-LABEL: @vaddwev_d_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwev.d.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v2i64 vaddwev_d_w(v4i32 _1, v4i32 _2) { return __lsx_vaddwev_d_w(_1, _2); } +-// CHECK-LABEL: @vaddwev_w_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vaddwev.w.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v4i32 vaddwev_w_h(v8i16 _1, v8i16 _2) { return __lsx_vaddwev_w_h(_1, _2); } +-// CHECK-LABEL: @vaddwev_h_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vaddwev.h.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v8i16 vaddwev_h_b(v16i8 _1, v16i8 _2) { return __lsx_vaddwev_h_b(_1, _2); } +-// CHECK-LABEL: @vaddwod_d_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwod.d.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v2i64 vaddwod_d_w(v4i32 _1, v4i32 _2) { return __lsx_vaddwod_d_w(_1, _2); } +-// CHECK-LABEL: @vaddwod_w_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vaddwod.w.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v4i32 vaddwod_w_h(v8i16 _1, v8i16 _2) { return __lsx_vaddwod_w_h(_1, _2); } +-// CHECK-LABEL: @vaddwod_h_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vaddwod.h.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v8i16 vaddwod_h_b(v16i8 _1, v16i8 _2) { return __lsx_vaddwod_h_b(_1, _2); } +-// CHECK-LABEL: @vaddwev_d_wu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwev.d.wu(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v2i64 vaddwev_d_wu(v4u32 _1, v4u32 _2) { return __lsx_vaddwev_d_wu(_1, _2); } +-// CHECK-LABEL: @vaddwev_w_hu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vaddwev.w.hu(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v4i32 vaddwev_w_hu(v8u16 _1, v8u16 _2) { return __lsx_vaddwev_w_hu(_1, _2); } +-// CHECK-LABEL: @vaddwev_h_bu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vaddwev.h.bu(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v8i16 vaddwev_h_bu(v16u8 _1, v16u8 _2) { return __lsx_vaddwev_h_bu(_1, _2); } +-// CHECK-LABEL: @vaddwod_d_wu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwod.d.wu(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v2i64 vaddwod_d_wu(v4u32 _1, v4u32 _2) { return __lsx_vaddwod_d_wu(_1, _2); } +-// CHECK-LABEL: @vaddwod_w_hu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vaddwod.w.hu(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v4i32 vaddwod_w_hu(v8u16 _1, v8u16 _2) { return __lsx_vaddwod_w_hu(_1, _2); } +-// CHECK-LABEL: @vaddwod_h_bu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vaddwod.h.bu(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v8i16 vaddwod_h_bu(v16u8 _1, v16u8 _2) { return __lsx_vaddwod_h_bu(_1, _2); } +-// CHECK-LABEL: @vaddwev_d_wu_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwev.d.wu.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v2i64 vaddwev_d_wu_w(v4u32 _1, v4i32 _2) { +- return __lsx_vaddwev_d_wu_w(_1, _2); +-} +-// CHECK-LABEL: @vaddwev_w_hu_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vaddwev.w.hu.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v4i32 vaddwev_w_hu_h(v8u16 _1, v8i16 _2) { +- return __lsx_vaddwev_w_hu_h(_1, _2); +-} +-// CHECK-LABEL: @vaddwev_h_bu_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vaddwev.h.bu.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v8i16 vaddwev_h_bu_b(v16u8 _1, v16i8 _2) { +- return __lsx_vaddwev_h_bu_b(_1, _2); +-} +-// CHECK-LABEL: @vaddwod_d_wu_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwod.d.wu.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v2i64 vaddwod_d_wu_w(v4u32 _1, v4i32 _2) { +- return __lsx_vaddwod_d_wu_w(_1, _2); +-} +-// CHECK-LABEL: @vaddwod_w_hu_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vaddwod.w.hu.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v4i32 vaddwod_w_hu_h(v8u16 _1, v8i16 _2) { +- return __lsx_vaddwod_w_hu_h(_1, _2); +-} +-// CHECK-LABEL: @vaddwod_h_bu_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vaddwod.h.bu.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v8i16 vaddwod_h_bu_b(v16u8 _1, v16i8 _2) { +- return __lsx_vaddwod_h_bu_b(_1, _2); +-} +-// CHECK-LABEL: @vsubwev_d_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsubwev.d.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v2i64 vsubwev_d_w(v4i32 _1, v4i32 _2) { return __lsx_vsubwev_d_w(_1, _2); } +-// CHECK-LABEL: @vsubwev_w_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsubwev.w.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v4i32 vsubwev_w_h(v8i16 _1, v8i16 _2) { return __lsx_vsubwev_w_h(_1, _2); } +-// CHECK-LABEL: @vsubwev_h_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsubwev.h.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v8i16 vsubwev_h_b(v16i8 _1, v16i8 _2) { return __lsx_vsubwev_h_b(_1, _2); } +-// CHECK-LABEL: @vsubwod_d_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsubwod.d.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v2i64 vsubwod_d_w(v4i32 _1, v4i32 _2) { return __lsx_vsubwod_d_w(_1, _2); } +-// CHECK-LABEL: @vsubwod_w_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsubwod.w.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v4i32 vsubwod_w_h(v8i16 _1, v8i16 _2) { return __lsx_vsubwod_w_h(_1, _2); } +-// CHECK-LABEL: @vsubwod_h_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsubwod.h.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v8i16 vsubwod_h_b(v16i8 _1, v16i8 _2) { return __lsx_vsubwod_h_b(_1, _2); } +-// CHECK-LABEL: @vsubwev_d_wu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsubwev.d.wu(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v2i64 vsubwev_d_wu(v4u32 _1, v4u32 _2) { return __lsx_vsubwev_d_wu(_1, _2); } +-// CHECK-LABEL: @vsubwev_w_hu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsubwev.w.hu(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v4i32 vsubwev_w_hu(v8u16 _1, v8u16 _2) { return __lsx_vsubwev_w_hu(_1, _2); } +-// CHECK-LABEL: @vsubwev_h_bu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsubwev.h.bu(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v8i16 vsubwev_h_bu(v16u8 _1, v16u8 _2) { return __lsx_vsubwev_h_bu(_1, _2); } +-// CHECK-LABEL: @vsubwod_d_wu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsubwod.d.wu(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v2i64 vsubwod_d_wu(v4u32 _1, v4u32 _2) { return __lsx_vsubwod_d_wu(_1, _2); } +-// CHECK-LABEL: @vsubwod_w_hu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsubwod.w.hu(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v4i32 vsubwod_w_hu(v8u16 _1, v8u16 _2) { return __lsx_vsubwod_w_hu(_1, _2); } +-// CHECK-LABEL: @vsubwod_h_bu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsubwod.h.bu(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v8i16 vsubwod_h_bu(v16u8 _1, v16u8 _2) { return __lsx_vsubwod_h_bu(_1, _2); } +-// CHECK-LABEL: @vaddwev_q_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwev.q.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v2i64 vaddwev_q_d(v2i64 _1, v2i64 _2) { return __lsx_vaddwev_q_d(_1, _2); } +-// CHECK-LABEL: @vaddwod_q_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwod.q.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v2i64 vaddwod_q_d(v2i64 _1, v2i64 _2) { return __lsx_vaddwod_q_d(_1, _2); } +-// CHECK-LABEL: @vaddwev_q_du( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwev.q.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v2i64 vaddwev_q_du(v2u64 _1, v2u64 _2) { return __lsx_vaddwev_q_du(_1, _2); } +-// CHECK-LABEL: @vaddwod_q_du( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwod.q.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v2i64 vaddwod_q_du(v2u64 _1, v2u64 _2) { return __lsx_vaddwod_q_du(_1, _2); } +-// CHECK-LABEL: @vsubwev_q_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsubwev.q.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v2i64 vsubwev_q_d(v2i64 _1, v2i64 _2) { return __lsx_vsubwev_q_d(_1, _2); } +-// CHECK-LABEL: @vsubwod_q_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsubwod.q.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v2i64 vsubwod_q_d(v2i64 _1, v2i64 _2) { return __lsx_vsubwod_q_d(_1, _2); } +-// CHECK-LABEL: @vsubwev_q_du( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsubwev.q.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v2i64 vsubwev_q_du(v2u64 _1, v2u64 _2) { return __lsx_vsubwev_q_du(_1, _2); } +-// CHECK-LABEL: @vsubwod_q_du( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsubwod.q.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v2i64 vsubwod_q_du(v2u64 _1, v2u64 _2) { return __lsx_vsubwod_q_du(_1, _2); } +-// CHECK-LABEL: @vaddwev_q_du_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwev.q.du.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v2i64 vaddwev_q_du_d(v2u64 _1, v2i64 _2) { +- return __lsx_vaddwev_q_du_d(_1, _2); +-} +-// CHECK-LABEL: @vaddwod_q_du_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwod.q.du.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v2i64 vaddwod_q_du_d(v2u64 _1, v2i64 _2) { +- return __lsx_vaddwod_q_du_d(_1, _2); +-} +-// CHECK-LABEL: @vmulwev_d_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwev.d.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v2i64 vmulwev_d_w(v4i32 _1, v4i32 _2) { return __lsx_vmulwev_d_w(_1, _2); } +-// CHECK-LABEL: @vmulwev_w_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmulwev.w.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v4i32 vmulwev_w_h(v8i16 _1, v8i16 _2) { return __lsx_vmulwev_w_h(_1, _2); } +-// CHECK-LABEL: @vmulwev_h_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmulwev.h.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v8i16 vmulwev_h_b(v16i8 _1, v16i8 _2) { return __lsx_vmulwev_h_b(_1, _2); } +-// CHECK-LABEL: @vmulwod_d_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwod.d.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v2i64 vmulwod_d_w(v4i32 _1, v4i32 _2) { return __lsx_vmulwod_d_w(_1, _2); } +-// CHECK-LABEL: @vmulwod_w_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmulwod.w.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v4i32 vmulwod_w_h(v8i16 _1, v8i16 _2) { return __lsx_vmulwod_w_h(_1, _2); } +-// CHECK-LABEL: @vmulwod_h_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmulwod.h.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v8i16 vmulwod_h_b(v16i8 _1, v16i8 _2) { return __lsx_vmulwod_h_b(_1, _2); } +-// CHECK-LABEL: @vmulwev_d_wu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwev.d.wu(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v2i64 vmulwev_d_wu(v4u32 _1, v4u32 _2) { return __lsx_vmulwev_d_wu(_1, _2); } +-// CHECK-LABEL: @vmulwev_w_hu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmulwev.w.hu(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v4i32 vmulwev_w_hu(v8u16 _1, v8u16 _2) { return __lsx_vmulwev_w_hu(_1, _2); } +-// CHECK-LABEL: @vmulwev_h_bu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmulwev.h.bu(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v8i16 vmulwev_h_bu(v16u8 _1, v16u8 _2) { return __lsx_vmulwev_h_bu(_1, _2); } +-// CHECK-LABEL: @vmulwod_d_wu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwod.d.wu(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v2i64 vmulwod_d_wu(v4u32 _1, v4u32 _2) { return __lsx_vmulwod_d_wu(_1, _2); } +-// CHECK-LABEL: @vmulwod_w_hu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmulwod.w.hu(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v4i32 vmulwod_w_hu(v8u16 _1, v8u16 _2) { return __lsx_vmulwod_w_hu(_1, _2); } +-// CHECK-LABEL: @vmulwod_h_bu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmulwod.h.bu(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v8i16 vmulwod_h_bu(v16u8 _1, v16u8 _2) { return __lsx_vmulwod_h_bu(_1, _2); } +-// CHECK-LABEL: @vmulwev_d_wu_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwev.d.wu.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v2i64 vmulwev_d_wu_w(v4u32 _1, v4i32 _2) { +- return __lsx_vmulwev_d_wu_w(_1, _2); +-} +-// CHECK-LABEL: @vmulwev_w_hu_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmulwev.w.hu.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v4i32 vmulwev_w_hu_h(v8u16 _1, v8i16 _2) { +- return __lsx_vmulwev_w_hu_h(_1, _2); +-} +-// CHECK-LABEL: @vmulwev_h_bu_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmulwev.h.bu.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v8i16 vmulwev_h_bu_b(v16u8 _1, v16i8 _2) { +- return __lsx_vmulwev_h_bu_b(_1, _2); +-} +-// CHECK-LABEL: @vmulwod_d_wu_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwod.d.wu.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v2i64 vmulwod_d_wu_w(v4u32 _1, v4i32 _2) { +- return __lsx_vmulwod_d_wu_w(_1, _2); +-} +-// CHECK-LABEL: @vmulwod_w_hu_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmulwod.w.hu.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v4i32 vmulwod_w_hu_h(v8u16 _1, v8i16 _2) { +- return __lsx_vmulwod_w_hu_h(_1, _2); +-} +-// CHECK-LABEL: @vmulwod_h_bu_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmulwod.h.bu.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v8i16 vmulwod_h_bu_b(v16u8 _1, v16i8 _2) { +- return __lsx_vmulwod_h_bu_b(_1, _2); +-} +-// CHECK-LABEL: @vmulwev_q_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwev.q.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v2i64 vmulwev_q_d(v2i64 _1, v2i64 _2) { return __lsx_vmulwev_q_d(_1, _2); } +-// CHECK-LABEL: @vmulwod_q_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwod.q.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v2i64 vmulwod_q_d(v2i64 _1, v2i64 _2) { return __lsx_vmulwod_q_d(_1, _2); } +-// CHECK-LABEL: @vmulwev_q_du( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwev.q.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v2i64 vmulwev_q_du(v2u64 _1, v2u64 _2) { return __lsx_vmulwev_q_du(_1, _2); } +-// CHECK-LABEL: @vmulwod_q_du( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwod.q.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v2i64 vmulwod_q_du(v2u64 _1, v2u64 _2) { return __lsx_vmulwod_q_du(_1, _2); } +-// CHECK-LABEL: @vmulwev_q_du_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwev.q.du.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v2i64 vmulwev_q_du_d(v2u64 _1, v2i64 _2) { +- return __lsx_vmulwev_q_du_d(_1, _2); +-} +-// CHECK-LABEL: @vmulwod_q_du_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwod.q.du.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v2i64 vmulwod_q_du_d(v2u64 _1, v2i64 _2) { +- return __lsx_vmulwod_q_du_d(_1, _2); +-} +-// CHECK-LABEL: @vhaddw_q_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vhaddw.q.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v2i64 vhaddw_q_d(v2i64 _1, v2i64 _2) { return __lsx_vhaddw_q_d(_1, _2); } +-// CHECK-LABEL: @vhaddw_qu_du( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vhaddw.qu.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v2u64 vhaddw_qu_du(v2u64 _1, v2u64 _2) { return __lsx_vhaddw_qu_du(_1, _2); } +-// CHECK-LABEL: @vhsubw_q_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vhsubw.q.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v2i64 vhsubw_q_d(v2i64 _1, v2i64 _2) { return __lsx_vhsubw_q_d(_1, _2); } +-// CHECK-LABEL: @vhsubw_qu_du( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vhsubw.qu.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v2u64 vhsubw_qu_du(v2u64 _1, v2u64 _2) { return __lsx_vhsubw_qu_du(_1, _2); } +-// CHECK-LABEL: @vmaddwev_d_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP3:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwev.d.w(<2 x i64> [[TMP0]], <4 x i32> [[TMP1]], <4 x i32> [[TMP2]]) +-// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to i128 +-// CHECK-NEXT: ret i128 [[TMP4]] +-// +-v2i64 vmaddwev_d_w(v2i64 _1, v4i32 _2, v4i32 _3) { +- return __lsx_vmaddwev_d_w(_1, _2, _3); +-} +-// CHECK-LABEL: @vmaddwev_w_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmaddwev.w.h(<4 x i32> [[TMP0]], <8 x i16> [[TMP1]], <8 x i16> [[TMP2]]) +-// CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to i128 +-// CHECK-NEXT: ret i128 [[TMP4]] +-// +-v4i32 vmaddwev_w_h(v4i32 _1, v8i16 _2, v8i16 _3) { +- return __lsx_vmaddwev_w_h(_1, _2, _3); +-} +-// CHECK-LABEL: @vmaddwev_h_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmaddwev.h.b(<8 x i16> [[TMP0]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]]) +-// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to i128 +-// CHECK-NEXT: ret i128 [[TMP4]] +-// +-v8i16 vmaddwev_h_b(v8i16 _1, v16i8 _2, v16i8 _3) { +- return __lsx_vmaddwev_h_b(_1, _2, _3); +-} +-// CHECK-LABEL: @vmaddwev_d_wu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP3:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwev.d.wu(<2 x i64> [[TMP0]], <4 x i32> [[TMP1]], <4 x i32> [[TMP2]]) +-// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to i128 +-// CHECK-NEXT: ret i128 [[TMP4]] +-// +-v2u64 vmaddwev_d_wu(v2u64 _1, v4u32 _2, v4u32 _3) { +- return __lsx_vmaddwev_d_wu(_1, _2, _3); +-} +-// CHECK-LABEL: @vmaddwev_w_hu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmaddwev.w.hu(<4 x i32> [[TMP0]], <8 x i16> [[TMP1]], <8 x i16> [[TMP2]]) +-// CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to i128 +-// CHECK-NEXT: ret i128 [[TMP4]] +-// +-v4u32 vmaddwev_w_hu(v4u32 _1, v8u16 _2, v8u16 _3) { +- return __lsx_vmaddwev_w_hu(_1, _2, _3); +-} +-// CHECK-LABEL: @vmaddwev_h_bu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmaddwev.h.bu(<8 x i16> [[TMP0]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]]) +-// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to i128 +-// CHECK-NEXT: ret i128 [[TMP4]] +-// +-v8u16 vmaddwev_h_bu(v8u16 _1, v16u8 _2, v16u8 _3) { +- return __lsx_vmaddwev_h_bu(_1, _2, _3); +-} +-// CHECK-LABEL: @vmaddwod_d_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP3:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwod.d.w(<2 x i64> [[TMP0]], <4 x i32> [[TMP1]], <4 x i32> [[TMP2]]) +-// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to i128 +-// CHECK-NEXT: ret i128 [[TMP4]] +-// +-v2i64 vmaddwod_d_w(v2i64 _1, v4i32 _2, v4i32 _3) { +- return __lsx_vmaddwod_d_w(_1, _2, _3); +-} +-// CHECK-LABEL: @vmaddwod_w_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmaddwod.w.h(<4 x i32> [[TMP0]], <8 x i16> [[TMP1]], <8 x i16> [[TMP2]]) +-// CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to i128 +-// CHECK-NEXT: ret i128 [[TMP4]] +-// +-v4i32 vmaddwod_w_h(v4i32 _1, v8i16 _2, v8i16 _3) { +- return __lsx_vmaddwod_w_h(_1, _2, _3); +-} +-// CHECK-LABEL: @vmaddwod_h_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmaddwod.h.b(<8 x i16> [[TMP0]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]]) +-// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to i128 +-// CHECK-NEXT: ret i128 [[TMP4]] +-// +-v8i16 vmaddwod_h_b(v8i16 _1, v16i8 _2, v16i8 _3) { +- return __lsx_vmaddwod_h_b(_1, _2, _3); +-} +-// CHECK-LABEL: @vmaddwod_d_wu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP3:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwod.d.wu(<2 x i64> [[TMP0]], <4 x i32> [[TMP1]], <4 x i32> [[TMP2]]) +-// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to i128 +-// CHECK-NEXT: ret i128 [[TMP4]] +-// +-v2u64 vmaddwod_d_wu(v2u64 _1, v4u32 _2, v4u32 _3) { +- return __lsx_vmaddwod_d_wu(_1, _2, _3); +-} +-// CHECK-LABEL: @vmaddwod_w_hu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmaddwod.w.hu(<4 x i32> [[TMP0]], <8 x i16> [[TMP1]], <8 x i16> [[TMP2]]) +-// CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to i128 +-// CHECK-NEXT: ret i128 [[TMP4]] +-// +-v4u32 vmaddwod_w_hu(v4u32 _1, v8u16 _2, v8u16 _3) { +- return __lsx_vmaddwod_w_hu(_1, _2, _3); +-} +-// CHECK-LABEL: @vmaddwod_h_bu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmaddwod.h.bu(<8 x i16> [[TMP0]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]]) +-// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to i128 +-// CHECK-NEXT: ret i128 [[TMP4]] +-// +-v8u16 vmaddwod_h_bu(v8u16 _1, v16u8 _2, v16u8 _3) { +- return __lsx_vmaddwod_h_bu(_1, _2, _3); +-} +-// CHECK-LABEL: @vmaddwev_d_wu_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP3:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwev.d.wu.w(<2 x i64> [[TMP0]], <4 x i32> [[TMP1]], <4 x i32> [[TMP2]]) +-// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to i128 +-// CHECK-NEXT: ret i128 [[TMP4]] +-// +-v2i64 vmaddwev_d_wu_w(v2i64 _1, v4u32 _2, v4i32 _3) { +- return __lsx_vmaddwev_d_wu_w(_1, _2, _3); +-} +-// CHECK-LABEL: @vmaddwev_w_hu_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmaddwev.w.hu.h(<4 x i32> [[TMP0]], <8 x i16> [[TMP1]], <8 x i16> [[TMP2]]) +-// CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to i128 +-// CHECK-NEXT: ret i128 [[TMP4]] +-// +-v4i32 vmaddwev_w_hu_h(v4i32 _1, v8u16 _2, v8i16 _3) { +- return __lsx_vmaddwev_w_hu_h(_1, _2, _3); +-} +-// CHECK-LABEL: @vmaddwev_h_bu_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmaddwev.h.bu.b(<8 x i16> [[TMP0]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]]) +-// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to i128 +-// CHECK-NEXT: ret i128 [[TMP4]] +-// +-v8i16 vmaddwev_h_bu_b(v8i16 _1, v16u8 _2, v16i8 _3) { +- return __lsx_vmaddwev_h_bu_b(_1, _2, _3); +-} +-// CHECK-LABEL: @vmaddwod_d_wu_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP3:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwod.d.wu.w(<2 x i64> [[TMP0]], <4 x i32> [[TMP1]], <4 x i32> [[TMP2]]) +-// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to i128 +-// CHECK-NEXT: ret i128 [[TMP4]] +-// +-v2i64 vmaddwod_d_wu_w(v2i64 _1, v4u32 _2, v4i32 _3) { +- return __lsx_vmaddwod_d_wu_w(_1, _2, _3); +-} +-// CHECK-LABEL: @vmaddwod_w_hu_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmaddwod.w.hu.h(<4 x i32> [[TMP0]], <8 x i16> [[TMP1]], <8 x i16> [[TMP2]]) +-// CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to i128 +-// CHECK-NEXT: ret i128 [[TMP4]] +-// +-v4i32 vmaddwod_w_hu_h(v4i32 _1, v8u16 _2, v8i16 _3) { +- return __lsx_vmaddwod_w_hu_h(_1, _2, _3); +-} +-// CHECK-LABEL: @vmaddwod_h_bu_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmaddwod.h.bu.b(<8 x i16> [[TMP0]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]]) +-// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to i128 +-// CHECK-NEXT: ret i128 [[TMP4]] +-// +-v8i16 vmaddwod_h_bu_b(v8i16 _1, v16u8 _2, v16i8 _3) { +- return __lsx_vmaddwod_h_bu_b(_1, _2, _3); +-} +-// CHECK-LABEL: @vmaddwev_q_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP3:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwev.q.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], <2 x i64> [[TMP2]]) +-// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to i128 +-// CHECK-NEXT: ret i128 [[TMP4]] +-// +-v2i64 vmaddwev_q_d(v2i64 _1, v2i64 _2, v2i64 _3) { +- return __lsx_vmaddwev_q_d(_1, _2, _3); +-} +-// CHECK-LABEL: @vmaddwod_q_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP3:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwod.q.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], <2 x i64> [[TMP2]]) +-// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to i128 +-// CHECK-NEXT: ret i128 [[TMP4]] +-// +-v2i64 vmaddwod_q_d(v2i64 _1, v2i64 _2, v2i64 _3) { +- return __lsx_vmaddwod_q_d(_1, _2, _3); +-} +-// CHECK-LABEL: @vmaddwev_q_du( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP3:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwev.q.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], <2 x i64> [[TMP2]]) +-// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to i128 +-// CHECK-NEXT: ret i128 [[TMP4]] +-// +-v2u64 vmaddwev_q_du(v2u64 _1, v2u64 _2, v2u64 _3) { +- return __lsx_vmaddwev_q_du(_1, _2, _3); +-} +-// CHECK-LABEL: @vmaddwod_q_du( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP3:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwod.q.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], <2 x i64> [[TMP2]]) +-// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to i128 +-// CHECK-NEXT: ret i128 [[TMP4]] +-// +-v2u64 vmaddwod_q_du(v2u64 _1, v2u64 _2, v2u64 _3) { +- return __lsx_vmaddwod_q_du(_1, _2, _3); +-} +-// CHECK-LABEL: @vmaddwev_q_du_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP3:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwev.q.du.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], <2 x i64> [[TMP2]]) +-// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to i128 +-// CHECK-NEXT: ret i128 [[TMP4]] +-// +-v2i64 vmaddwev_q_du_d(v2i64 _1, v2u64 _2, v2i64 _3) { +- return __lsx_vmaddwev_q_du_d(_1, _2, _3); +-} +-// CHECK-LABEL: @vmaddwod_q_du_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP3:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwod.q.du.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], <2 x i64> [[TMP2]]) +-// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to i128 +-// CHECK-NEXT: ret i128 [[TMP4]] +-// +-v2i64 vmaddwod_q_du_d(v2i64 _1, v2u64 _2, v2i64 _3) { +- return __lsx_vmaddwod_q_du_d(_1, _2, _3); +-} +-// CHECK-LABEL: @vrotr_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vrotr.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v16i8 vrotr_b(v16i8 _1, v16i8 _2) { return __lsx_vrotr_b(_1, _2); } +-// CHECK-LABEL: @vrotr_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vrotr.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v8i16 vrotr_h(v8i16 _1, v8i16 _2) { return __lsx_vrotr_h(_1, _2); } +-// CHECK-LABEL: @vrotr_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vrotr.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v4i32 vrotr_w(v4i32 _1, v4i32 _2) { return __lsx_vrotr_w(_1, _2); } +-// CHECK-LABEL: @vrotr_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vrotr.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v2i64 vrotr_d(v2i64 _1, v2i64 _2) { return __lsx_vrotr_d(_1, _2); } +-// CHECK-LABEL: @vadd_q( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vadd.q(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v2i64 vadd_q(v2i64 _1, v2i64 _2) { return __lsx_vadd_q(_1, _2); } +-// CHECK-LABEL: @vsub_q( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsub.q(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v2i64 vsub_q(v2i64 _1, v2i64 _2) { return __lsx_vsub_q(_1, _2); } +-// CHECK-LABEL: @vldrepl_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vldrepl.b(ptr [[_1:%.*]], i32 1) +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to i128 +-// CHECK-NEXT: ret i128 [[TMP1]] +-// +-v16i8 vldrepl_b(void *_1) { return __lsx_vldrepl_b(_1, 1); } +-// CHECK-LABEL: @vldrepl_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vldrepl.h(ptr [[_1:%.*]], i32 2) +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[TMP0]] to i128 +-// CHECK-NEXT: ret i128 [[TMP1]] +-// +-v8i16 vldrepl_h(void *_1) { return __lsx_vldrepl_h(_1, 2); } +-// CHECK-LABEL: @vldrepl_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vldrepl.w(ptr [[_1:%.*]], i32 4) +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[TMP0]] to i128 +-// CHECK-NEXT: ret i128 [[TMP1]] +-// +-v4i32 vldrepl_w(void *_1) { return __lsx_vldrepl_w(_1, 4); } +-// CHECK-LABEL: @vldrepl_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vldrepl.d(ptr [[_1:%.*]], i32 8) +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[TMP0]] to i128 +-// CHECK-NEXT: ret i128 [[TMP1]] +-// +-v2i64 vldrepl_d(void *_1) { return __lsx_vldrepl_d(_1, 8); } +-// CHECK-LABEL: @vmskgez_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmskgez.b(<16 x i8> [[TMP0]]) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v16i8 vmskgez_b(v16i8 _1) { return __lsx_vmskgez_b(_1); } +-// CHECK-LABEL: @vmsknz_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmsknz.b(<16 x i8> [[TMP0]]) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v16i8 vmsknz_b(v16i8 _1) { return __lsx_vmsknz_b(_1); } +-// CHECK-LABEL: @vexth_h_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vexth.h.b(<16 x i8> [[TMP0]]) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v8i16 vexth_h_b(v16i8 _1) { return __lsx_vexth_h_b(_1); } +-// CHECK-LABEL: @vexth_w_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vexth.w.h(<8 x i16> [[TMP0]]) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v4i32 vexth_w_h(v8i16 _1) { return __lsx_vexth_w_h(_1); } +-// CHECK-LABEL: @vexth_d_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vexth.d.w(<4 x i32> [[TMP0]]) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v2i64 vexth_d_w(v4i32 _1) { return __lsx_vexth_d_w(_1); } +-// CHECK-LABEL: @vexth_q_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vexth.q.d(<2 x i64> [[TMP0]]) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v2i64 vexth_q_d(v2i64 _1) { return __lsx_vexth_q_d(_1); } +-// CHECK-LABEL: @vexth_hu_bu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vexth.hu.bu(<16 x i8> [[TMP0]]) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v8u16 vexth_hu_bu(v16u8 _1) { return __lsx_vexth_hu_bu(_1); } +-// CHECK-LABEL: @vexth_wu_hu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vexth.wu.hu(<8 x i16> [[TMP0]]) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v4u32 vexth_wu_hu(v8u16 _1) { return __lsx_vexth_wu_hu(_1); } +-// CHECK-LABEL: @vexth_du_wu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vexth.du.wu(<4 x i32> [[TMP0]]) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v2u64 vexth_du_wu(v4u32 _1) { return __lsx_vexth_du_wu(_1); } +-// CHECK-LABEL: @vexth_qu_du( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vexth.qu.du(<2 x i64> [[TMP0]]) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v2u64 vexth_qu_du(v2u64 _1) { return __lsx_vexth_qu_du(_1); } +-// CHECK-LABEL: @vrotri_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vrotri.b(<16 x i8> [[TMP0]], i32 1) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v16i8 vrotri_b(v16i8 _1) { return __lsx_vrotri_b(_1, 1); } +-// CHECK-LABEL: @vrotri_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vrotri.h(<8 x i16> [[TMP0]], i32 1) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v8i16 vrotri_h(v8i16 _1) { return __lsx_vrotri_h(_1, 1); } +-// CHECK-LABEL: @vrotri_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vrotri.w(<4 x i32> [[TMP0]], i32 1) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v4i32 vrotri_w(v4i32 _1) { return __lsx_vrotri_w(_1, 1); } +-// CHECK-LABEL: @vrotri_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vrotri.d(<2 x i64> [[TMP0]], i32 1) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v2i64 vrotri_d(v2i64 _1) { return __lsx_vrotri_d(_1, 1); } +-// CHECK-LABEL: @vextl_q_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vextl.q.d(<2 x i64> [[TMP0]]) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v2i64 vextl_q_d(v2i64 _1) { return __lsx_vextl_q_d(_1); } +-// CHECK-LABEL: @vsrlni_b_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrlni.b.h(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], i32 1) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v16i8 vsrlni_b_h(v16i8 _1, v16i8 _2) { return __lsx_vsrlni_b_h(_1, _2, 1); } +-// CHECK-LABEL: @vsrlni_h_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrlni.h.w(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]], i32 1) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v8i16 vsrlni_h_w(v8i16 _1, v8i16 _2) { return __lsx_vsrlni_h_w(_1, _2, 1); } +-// CHECK-LABEL: @vsrlni_w_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrlni.w.d(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]], i32 1) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v4i32 vsrlni_w_d(v4i32 _1, v4i32 _2) { return __lsx_vsrlni_w_d(_1, _2, 1); } +-// CHECK-LABEL: @vsrlni_d_q( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrlni.d.q(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], i32 1) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v2i64 vsrlni_d_q(v2i64 _1, v2i64 _2) { return __lsx_vsrlni_d_q(_1, _2, 1); } +-// CHECK-LABEL: @vsrlrni_b_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrlrni.b.h(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], i32 1) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v16i8 vsrlrni_b_h(v16i8 _1, v16i8 _2) { return __lsx_vsrlrni_b_h(_1, _2, 1); } +-// CHECK-LABEL: @vsrlrni_h_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrlrni.h.w(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]], i32 1) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v8i16 vsrlrni_h_w(v8i16 _1, v8i16 _2) { return __lsx_vsrlrni_h_w(_1, _2, 1); } +-// CHECK-LABEL: @vsrlrni_w_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrlrni.w.d(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]], i32 1) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v4i32 vsrlrni_w_d(v4i32 _1, v4i32 _2) { return __lsx_vsrlrni_w_d(_1, _2, 1); } +-// CHECK-LABEL: @vsrlrni_d_q( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrlrni.d.q(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], i32 1) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v2i64 vsrlrni_d_q(v2i64 _1, v2i64 _2) { return __lsx_vsrlrni_d_q(_1, _2, 1); } +-// CHECK-LABEL: @vssrlni_b_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrlni.b.h(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], i32 1) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v16i8 vssrlni_b_h(v16i8 _1, v16i8 _2) { return __lsx_vssrlni_b_h(_1, _2, 1); } +-// CHECK-LABEL: @vssrlni_h_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrlni.h.w(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]], i32 1) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v8i16 vssrlni_h_w(v8i16 _1, v8i16 _2) { return __lsx_vssrlni_h_w(_1, _2, 1); } +-// CHECK-LABEL: @vssrlni_w_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrlni.w.d(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]], i32 1) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v4i32 vssrlni_w_d(v4i32 _1, v4i32 _2) { return __lsx_vssrlni_w_d(_1, _2, 1); } +-// CHECK-LABEL: @vssrlni_d_q( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vssrlni.d.q(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], i32 1) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v2i64 vssrlni_d_q(v2i64 _1, v2i64 _2) { return __lsx_vssrlni_d_q(_1, _2, 1); } +-// CHECK-LABEL: @vssrlni_bu_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrlni.bu.h(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], i32 1) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v16u8 vssrlni_bu_h(v16u8 _1, v16i8 _2) { return __lsx_vssrlni_bu_h(_1, _2, 1); } +-// CHECK-LABEL: @vssrlni_hu_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrlni.hu.w(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]], i32 1) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v8u16 vssrlni_hu_w(v8u16 _1, v8i16 _2) { return __lsx_vssrlni_hu_w(_1, _2, 1); } +-// CHECK-LABEL: @vssrlni_wu_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrlni.wu.d(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]], i32 1) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v4u32 vssrlni_wu_d(v4u32 _1, v4i32 _2) { return __lsx_vssrlni_wu_d(_1, _2, 1); } +-// CHECK-LABEL: @vssrlni_du_q( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vssrlni.du.q(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], i32 1) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v2u64 vssrlni_du_q(v2u64 _1, v2i64 _2) { return __lsx_vssrlni_du_q(_1, _2, 1); } +-// CHECK-LABEL: @vssrlrni_b_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrlrni.b.h(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], i32 1) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v16i8 vssrlrni_b_h(v16i8 _1, v16i8 _2) { return __lsx_vssrlrni_b_h(_1, _2, 1); } +-// CHECK-LABEL: @vssrlrni_h_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrlrni.h.w(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]], i32 1) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v8i16 vssrlrni_h_w(v8i16 _1, v8i16 _2) { return __lsx_vssrlrni_h_w(_1, _2, 1); } +-// CHECK-LABEL: @vssrlrni_w_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrlrni.w.d(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]], i32 1) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v4i32 vssrlrni_w_d(v4i32 _1, v4i32 _2) { return __lsx_vssrlrni_w_d(_1, _2, 1); } +-// CHECK-LABEL: @vssrlrni_d_q( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vssrlrni.d.q(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], i32 1) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v2i64 vssrlrni_d_q(v2i64 _1, v2i64 _2) { return __lsx_vssrlrni_d_q(_1, _2, 1); } +-// CHECK-LABEL: @vssrlrni_bu_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrlrni.bu.h(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], i32 1) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v16u8 vssrlrni_bu_h(v16u8 _1, v16i8 _2) { +- return __lsx_vssrlrni_bu_h(_1, _2, 1); +-} +-// CHECK-LABEL: @vssrlrni_hu_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrlrni.hu.w(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]], i32 1) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v8u16 vssrlrni_hu_w(v8u16 _1, v8i16 _2) { +- return __lsx_vssrlrni_hu_w(_1, _2, 1); +-} +-// CHECK-LABEL: @vssrlrni_wu_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrlrni.wu.d(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]], i32 1) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v4u32 vssrlrni_wu_d(v4u32 _1, v4i32 _2) { +- return __lsx_vssrlrni_wu_d(_1, _2, 1); +-} +-// CHECK-LABEL: @vssrlrni_du_q( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vssrlrni.du.q(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], i32 1) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v2u64 vssrlrni_du_q(v2u64 _1, v2i64 _2) { +- return __lsx_vssrlrni_du_q(_1, _2, 1); +-} +-// CHECK-LABEL: @vsrani_b_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrani.b.h(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], i32 1) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v16i8 vsrani_b_h(v16i8 _1, v16i8 _2) { return __lsx_vsrani_b_h(_1, _2, 1); } +-// CHECK-LABEL: @vsrani_h_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrani.h.w(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]], i32 1) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v8i16 vsrani_h_w(v8i16 _1, v8i16 _2) { return __lsx_vsrani_h_w(_1, _2, 1); } +-// CHECK-LABEL: @vsrani_w_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrani.w.d(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]], i32 1) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v4i32 vsrani_w_d(v4i32 _1, v4i32 _2) { return __lsx_vsrani_w_d(_1, _2, 1); } +-// CHECK-LABEL: @vsrani_d_q( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrani.d.q(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], i32 1) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v2i64 vsrani_d_q(v2i64 _1, v2i64 _2) { return __lsx_vsrani_d_q(_1, _2, 1); } +-// CHECK-LABEL: @vsrarni_b_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrarni.b.h(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], i32 1) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v16i8 vsrarni_b_h(v16i8 _1, v16i8 _2) { return __lsx_vsrarni_b_h(_1, _2, 1); } +-// CHECK-LABEL: @vsrarni_h_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrarni.h.w(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]], i32 1) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v8i16 vsrarni_h_w(v8i16 _1, v8i16 _2) { return __lsx_vsrarni_h_w(_1, _2, 1); } +-// CHECK-LABEL: @vsrarni_w_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrarni.w.d(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]], i32 1) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v4i32 vsrarni_w_d(v4i32 _1, v4i32 _2) { return __lsx_vsrarni_w_d(_1, _2, 1); } +-// CHECK-LABEL: @vsrarni_d_q( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrarni.d.q(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], i32 1) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v2i64 vsrarni_d_q(v2i64 _1, v2i64 _2) { return __lsx_vsrarni_d_q(_1, _2, 1); } +-// CHECK-LABEL: @vssrani_b_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrani.b.h(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], i32 1) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v16i8 vssrani_b_h(v16i8 _1, v16i8 _2) { return __lsx_vssrani_b_h(_1, _2, 1); } +-// CHECK-LABEL: @vssrani_h_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrani.h.w(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]], i32 1) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v8i16 vssrani_h_w(v8i16 _1, v8i16 _2) { return __lsx_vssrani_h_w(_1, _2, 1); } +-// CHECK-LABEL: @vssrani_w_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrani.w.d(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]], i32 1) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v4i32 vssrani_w_d(v4i32 _1, v4i32 _2) { return __lsx_vssrani_w_d(_1, _2, 1); } +-// CHECK-LABEL: @vssrani_d_q( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vssrani.d.q(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], i32 1) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v2i64 vssrani_d_q(v2i64 _1, v2i64 _2) { return __lsx_vssrani_d_q(_1, _2, 1); } +-// CHECK-LABEL: @vssrani_bu_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrani.bu.h(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], i32 1) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v16u8 vssrani_bu_h(v16u8 _1, v16i8 _2) { return __lsx_vssrani_bu_h(_1, _2, 1); } +-// CHECK-LABEL: @vssrani_hu_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrani.hu.w(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]], i32 1) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v8u16 vssrani_hu_w(v8u16 _1, v8i16 _2) { return __lsx_vssrani_hu_w(_1, _2, 1); } +-// CHECK-LABEL: @vssrani_wu_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrani.wu.d(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]], i32 1) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v4u32 vssrani_wu_d(v4u32 _1, v4i32 _2) { return __lsx_vssrani_wu_d(_1, _2, 1); } +-// CHECK-LABEL: @vssrani_du_q( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vssrani.du.q(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], i32 1) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v2u64 vssrani_du_q(v2u64 _1, v2i64 _2) { return __lsx_vssrani_du_q(_1, _2, 1); } +-// CHECK-LABEL: @vssrarni_b_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrarni.b.h(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], i32 1) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v16i8 vssrarni_b_h(v16i8 _1, v16i8 _2) { return __lsx_vssrarni_b_h(_1, _2, 1); } +-// CHECK-LABEL: @vssrarni_h_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrarni.h.w(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]], i32 1) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v8i16 vssrarni_h_w(v8i16 _1, v8i16 _2) { return __lsx_vssrarni_h_w(_1, _2, 1); } +-// CHECK-LABEL: @vssrarni_w_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrarni.w.d(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]], i32 1) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v4i32 vssrarni_w_d(v4i32 _1, v4i32 _2) { return __lsx_vssrarni_w_d(_1, _2, 1); } +-// CHECK-LABEL: @vssrarni_d_q( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vssrarni.d.q(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], i32 1) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v2i64 vssrarni_d_q(v2i64 _1, v2i64 _2) { return __lsx_vssrarni_d_q(_1, _2, 1); } +-// CHECK-LABEL: @vssrarni_bu_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrarni.bu.h(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], i32 1) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v16u8 vssrarni_bu_h(v16u8 _1, v16i8 _2) { +- return __lsx_vssrarni_bu_h(_1, _2, 1); +-} +-// CHECK-LABEL: @vssrarni_hu_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrarni.hu.w(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]], i32 1) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v8u16 vssrarni_hu_w(v8u16 _1, v8i16 _2) { +- return __lsx_vssrarni_hu_w(_1, _2, 1); +-} +-// CHECK-LABEL: @vssrarni_wu_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrarni.wu.d(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]], i32 1) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v4u32 vssrarni_wu_d(v4u32 _1, v4i32 _2) { +- return __lsx_vssrarni_wu_d(_1, _2, 1); +-} +-// CHECK-LABEL: @vssrarni_du_q( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vssrarni.du.q(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], i32 1) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v2u64 vssrarni_du_q(v2u64 _1, v2i64 _2) { +- return __lsx_vssrarni_du_q(_1, _2, 1); +-} +-// CHECK-LABEL: @vpermi_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vpermi.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]], i32 1) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v4i32 vpermi_w(v4i32 _1, v4i32 _2) { return __lsx_vpermi_w(_1, _2, 1); } +-// CHECK-LABEL: @vld( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vld(ptr [[_1:%.*]], i32 1) +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to i128 +-// CHECK-NEXT: ret i128 [[TMP1]] +-// +-v16i8 vld(void *_1) { return __lsx_vld(_1, 1); } +-// CHECK-LABEL: @vst( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: tail call void @llvm.loongarch.lsx.vst(<16 x i8> [[TMP0]], ptr [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret void +-// +-void vst(v16i8 _1, void *_2) { return __lsx_vst(_1, _2, 1); } +-// CHECK-LABEL: @vssrlrn_b_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrlrn.b.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v16i8 vssrlrn_b_h(v8i16 _1, v8i16 _2) { return __lsx_vssrlrn_b_h(_1, _2); } +-// CHECK-LABEL: @vssrlrn_h_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrlrn.h.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v8i16 vssrlrn_h_w(v4i32 _1, v4i32 _2) { return __lsx_vssrlrn_h_w(_1, _2); } +-// CHECK-LABEL: @vssrlrn_w_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrlrn.w.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v4i32 vssrlrn_w_d(v2i64 _1, v2i64 _2) { return __lsx_vssrlrn_w_d(_1, _2); } +-// CHECK-LABEL: @vssrln_b_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrln.b.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v16i8 vssrln_b_h(v8i16 _1, v8i16 _2) { return __lsx_vssrln_b_h(_1, _2); } +-// CHECK-LABEL: @vssrln_h_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrln.h.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v8i16 vssrln_h_w(v4i32 _1, v4i32 _2) { return __lsx_vssrln_h_w(_1, _2); } +-// CHECK-LABEL: @vssrln_w_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrln.w.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v4i32 vssrln_w_d(v2i64 _1, v2i64 _2) { return __lsx_vssrln_w_d(_1, _2); } +-// CHECK-LABEL: @vorn_v( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vorn.v(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v16i8 vorn_v(v16i8 _1, v16i8 _2) { return __lsx_vorn_v(_1, _2); } +-// CHECK-LABEL: @vldi( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vldi(i32 1) +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[TMP0]] to i128 +-// CHECK-NEXT: ret i128 [[TMP1]] +-// +-v2i64 vldi() { return __lsx_vldi(1); } +-// CHECK-LABEL: @vshuf_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP3:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vshuf.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]]) +-// CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP3]] to i128 +-// CHECK-NEXT: ret i128 [[TMP4]] +-// +-v16i8 vshuf_b(v16i8 _1, v16i8 _2, v16i8 _3) { +- return __lsx_vshuf_b(_1, _2, _3); +-} +-// CHECK-LABEL: @vldx( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vldx(ptr [[_1:%.*]], i64 1) +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to i128 +-// CHECK-NEXT: ret i128 [[TMP1]] +-// +-v16i8 vldx(void *_1) { return __lsx_vldx(_1, 1); } +-// CHECK-LABEL: @vstx( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: tail call void @llvm.loongarch.lsx.vstx(<16 x i8> [[TMP0]], ptr [[_2:%.*]], i64 1) +-// CHECK-NEXT: ret void +-// +-void vstx(v16i8 _1, void *_2) { return __lsx_vstx(_1, _2, 1); } +-// CHECK-LABEL: @vextl_qu_du( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vextl.qu.du(<2 x i64> [[TMP0]]) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v2u64 vextl_qu_du(v2u64 _1) { return __lsx_vextl_qu_du(_1); } +-// CHECK-LABEL: @bnz_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lsx.bnz.b(<16 x i8> [[TMP0]]) +-// CHECK-NEXT: ret i32 [[TMP1]] +-// +-int bnz_b(v16u8 _1) { return __lsx_bnz_b(_1); } +-// CHECK-LABEL: @bnz_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lsx.bnz.d(<2 x i64> [[TMP0]]) +-// CHECK-NEXT: ret i32 [[TMP1]] +-// +-int bnz_d(v2u64 _1) { return __lsx_bnz_d(_1); } +-// CHECK-LABEL: @bnz_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lsx.bnz.h(<8 x i16> [[TMP0]]) +-// CHECK-NEXT: ret i32 [[TMP1]] +-// +-int bnz_h(v8u16 _1) { return __lsx_bnz_h(_1); } +-// CHECK-LABEL: @bnz_v( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lsx.bnz.v(<16 x i8> [[TMP0]]) +-// CHECK-NEXT: ret i32 [[TMP1]] +-// +-int bnz_v(v16u8 _1) { return __lsx_bnz_v(_1); } +-// CHECK-LABEL: @bnz_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lsx.bnz.w(<4 x i32> [[TMP0]]) +-// CHECK-NEXT: ret i32 [[TMP1]] +-// +-int bnz_w(v4u32 _1) { return __lsx_bnz_w(_1); } +-// CHECK-LABEL: @bz_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lsx.bz.b(<16 x i8> [[TMP0]]) +-// CHECK-NEXT: ret i32 [[TMP1]] +-// +-int bz_b(v16u8 _1) { return __lsx_bz_b(_1); } +-// CHECK-LABEL: @bz_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lsx.bz.d(<2 x i64> [[TMP0]]) +-// CHECK-NEXT: ret i32 [[TMP1]] +-// +-int bz_d(v2u64 _1) { return __lsx_bz_d(_1); } +-// CHECK-LABEL: @bz_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lsx.bz.h(<8 x i16> [[TMP0]]) +-// CHECK-NEXT: ret i32 [[TMP1]] +-// +-int bz_h(v8u16 _1) { return __lsx_bz_h(_1); } +-// CHECK-LABEL: @bz_v( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lsx.bz.v(<16 x i8> [[TMP0]]) +-// CHECK-NEXT: ret i32 [[TMP1]] +-// +-int bz_v(v16u8 _1) { return __lsx_bz_v(_1); } +-// CHECK-LABEL: @bz_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lsx.bz.w(<4 x i32> [[TMP0]]) +-// CHECK-NEXT: ret i32 [[TMP1]] +-// +-int bz_w(v4u32 _1) { return __lsx_bz_w(_1); } +-// CHECK-LABEL: @vfcmp_caf_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.caf.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v2i64 vfcmp_caf_d(v2f64 _1, v2f64 _2) { return __lsx_vfcmp_caf_d(_1, _2); } +-// CHECK-LABEL: @vfcmp_caf_s( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.caf.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v4i32 vfcmp_caf_s(v4f32 _1, v4f32 _2) { return __lsx_vfcmp_caf_s(_1, _2); } +-// CHECK-LABEL: @vfcmp_ceq_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.ceq.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v2i64 vfcmp_ceq_d(v2f64 _1, v2f64 _2) { return __lsx_vfcmp_ceq_d(_1, _2); } +-// CHECK-LABEL: @vfcmp_ceq_s( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.ceq.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v4i32 vfcmp_ceq_s(v4f32 _1, v4f32 _2) { return __lsx_vfcmp_ceq_s(_1, _2); } +-// CHECK-LABEL: @vfcmp_cle_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.cle.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v2i64 vfcmp_cle_d(v2f64 _1, v2f64 _2) { return __lsx_vfcmp_cle_d(_1, _2); } +-// CHECK-LABEL: @vfcmp_cle_s( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.cle.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v4i32 vfcmp_cle_s(v4f32 _1, v4f32 _2) { return __lsx_vfcmp_cle_s(_1, _2); } +-// CHECK-LABEL: @vfcmp_clt_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.clt.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v2i64 vfcmp_clt_d(v2f64 _1, v2f64 _2) { return __lsx_vfcmp_clt_d(_1, _2); } +-// CHECK-LABEL: @vfcmp_clt_s( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.clt.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v4i32 vfcmp_clt_s(v4f32 _1, v4f32 _2) { return __lsx_vfcmp_clt_s(_1, _2); } +-// CHECK-LABEL: @vfcmp_cne_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.cne.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v2i64 vfcmp_cne_d(v2f64 _1, v2f64 _2) { return __lsx_vfcmp_cne_d(_1, _2); } +-// CHECK-LABEL: @vfcmp_cne_s( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.cne.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v4i32 vfcmp_cne_s(v4f32 _1, v4f32 _2) { return __lsx_vfcmp_cne_s(_1, _2); } +-// CHECK-LABEL: @vfcmp_cor_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.cor.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v2i64 vfcmp_cor_d(v2f64 _1, v2f64 _2) { return __lsx_vfcmp_cor_d(_1, _2); } +-// CHECK-LABEL: @vfcmp_cor_s( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.cor.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v4i32 vfcmp_cor_s(v4f32 _1, v4f32 _2) { return __lsx_vfcmp_cor_s(_1, _2); } +-// CHECK-LABEL: @vfcmp_cueq_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.cueq.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v2i64 vfcmp_cueq_d(v2f64 _1, v2f64 _2) { return __lsx_vfcmp_cueq_d(_1, _2); } +-// CHECK-LABEL: @vfcmp_cueq_s( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.cueq.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v4i32 vfcmp_cueq_s(v4f32 _1, v4f32 _2) { return __lsx_vfcmp_cueq_s(_1, _2); } +-// CHECK-LABEL: @vfcmp_cule_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.cule.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v2i64 vfcmp_cule_d(v2f64 _1, v2f64 _2) { return __lsx_vfcmp_cule_d(_1, _2); } +-// CHECK-LABEL: @vfcmp_cule_s( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.cule.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v4i32 vfcmp_cule_s(v4f32 _1, v4f32 _2) { return __lsx_vfcmp_cule_s(_1, _2); } +-// CHECK-LABEL: @vfcmp_cult_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.cult.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v2i64 vfcmp_cult_d(v2f64 _1, v2f64 _2) { return __lsx_vfcmp_cult_d(_1, _2); } +-// CHECK-LABEL: @vfcmp_cult_s( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.cult.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v4i32 vfcmp_cult_s(v4f32 _1, v4f32 _2) { return __lsx_vfcmp_cult_s(_1, _2); } +-// CHECK-LABEL: @vfcmp_cun_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.cun.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v2i64 vfcmp_cun_d(v2f64 _1, v2f64 _2) { return __lsx_vfcmp_cun_d(_1, _2); } +-// CHECK-LABEL: @vfcmp_cune_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.cune.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v2i64 vfcmp_cune_d(v2f64 _1, v2f64 _2) { return __lsx_vfcmp_cune_d(_1, _2); } +-// CHECK-LABEL: @vfcmp_cune_s( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.cune.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v4i32 vfcmp_cune_s(v4f32 _1, v4f32 _2) { return __lsx_vfcmp_cune_s(_1, _2); } +-// CHECK-LABEL: @vfcmp_cun_s( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.cun.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v4i32 vfcmp_cun_s(v4f32 _1, v4f32 _2) { return __lsx_vfcmp_cun_s(_1, _2); } +-// CHECK-LABEL: @vfcmp_saf_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.saf.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v2i64 vfcmp_saf_d(v2f64 _1, v2f64 _2) { return __lsx_vfcmp_saf_d(_1, _2); } +-// CHECK-LABEL: @vfcmp_saf_s( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.saf.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v4i32 vfcmp_saf_s(v4f32 _1, v4f32 _2) { return __lsx_vfcmp_saf_s(_1, _2); } +-// CHECK-LABEL: @vfcmp_seq_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.seq.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v2i64 vfcmp_seq_d(v2f64 _1, v2f64 _2) { return __lsx_vfcmp_seq_d(_1, _2); } +-// CHECK-LABEL: @vfcmp_seq_s( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.seq.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v4i32 vfcmp_seq_s(v4f32 _1, v4f32 _2) { return __lsx_vfcmp_seq_s(_1, _2); } +-// CHECK-LABEL: @vfcmp_sle_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.sle.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v2i64 vfcmp_sle_d(v2f64 _1, v2f64 _2) { return __lsx_vfcmp_sle_d(_1, _2); } +-// CHECK-LABEL: @vfcmp_sle_s( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.sle.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v4i32 vfcmp_sle_s(v4f32 _1, v4f32 _2) { return __lsx_vfcmp_sle_s(_1, _2); } +-// CHECK-LABEL: @vfcmp_slt_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.slt.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v2i64 vfcmp_slt_d(v2f64 _1, v2f64 _2) { return __lsx_vfcmp_slt_d(_1, _2); } +-// CHECK-LABEL: @vfcmp_slt_s( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.slt.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v4i32 vfcmp_slt_s(v4f32 _1, v4f32 _2) { return __lsx_vfcmp_slt_s(_1, _2); } +-// CHECK-LABEL: @vfcmp_sne_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.sne.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v2i64 vfcmp_sne_d(v2f64 _1, v2f64 _2) { return __lsx_vfcmp_sne_d(_1, _2); } +-// CHECK-LABEL: @vfcmp_sne_s( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.sne.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v4i32 vfcmp_sne_s(v4f32 _1, v4f32 _2) { return __lsx_vfcmp_sne_s(_1, _2); } +-// CHECK-LABEL: @vfcmp_sor_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.sor.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v2i64 vfcmp_sor_d(v2f64 _1, v2f64 _2) { return __lsx_vfcmp_sor_d(_1, _2); } +-// CHECK-LABEL: @vfcmp_sor_s( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.sor.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v4i32 vfcmp_sor_s(v4f32 _1, v4f32 _2) { return __lsx_vfcmp_sor_s(_1, _2); } +-// CHECK-LABEL: @vfcmp_sueq_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.sueq.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v2i64 vfcmp_sueq_d(v2f64 _1, v2f64 _2) { return __lsx_vfcmp_sueq_d(_1, _2); } +-// CHECK-LABEL: @vfcmp_sueq_s( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.sueq.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v4i32 vfcmp_sueq_s(v4f32 _1, v4f32 _2) { return __lsx_vfcmp_sueq_s(_1, _2); } +-// CHECK-LABEL: @vfcmp_sule_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.sule.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v2i64 vfcmp_sule_d(v2f64 _1, v2f64 _2) { return __lsx_vfcmp_sule_d(_1, _2); } +-// CHECK-LABEL: @vfcmp_sule_s( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.sule.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v4i32 vfcmp_sule_s(v4f32 _1, v4f32 _2) { return __lsx_vfcmp_sule_s(_1, _2); } +-// CHECK-LABEL: @vfcmp_sult_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.sult.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v2i64 vfcmp_sult_d(v2f64 _1, v2f64 _2) { return __lsx_vfcmp_sult_d(_1, _2); } +-// CHECK-LABEL: @vfcmp_sult_s( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.sult.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v4i32 vfcmp_sult_s(v4f32 _1, v4f32 _2) { return __lsx_vfcmp_sult_s(_1, _2); } +-// CHECK-LABEL: @vfcmp_sun_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.sun.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v2i64 vfcmp_sun_d(v2f64 _1, v2f64 _2) { return __lsx_vfcmp_sun_d(_1, _2); } +-// CHECK-LABEL: @vfcmp_sune_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.sune.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v2i64 vfcmp_sune_d(v2f64 _1, v2f64 _2) { return __lsx_vfcmp_sune_d(_1, _2); } +-// CHECK-LABEL: @vfcmp_sune_s( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.sune.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v4i32 vfcmp_sune_s(v4f32 _1, v4f32 _2) { return __lsx_vfcmp_sune_s(_1, _2); } +-// CHECK-LABEL: @vfcmp_sun_s( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.sun.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v4i32 vfcmp_sun_s(v4f32 _1, v4f32 _2) { return __lsx_vfcmp_sun_s(_1, _2); } +-// CHECK-LABEL: @vrepli_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vrepli.b(i32 1) +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to i128 +-// CHECK-NEXT: ret i128 [[TMP1]] +-// +-v16i8 vrepli_b() { return __lsx_vrepli_b(1); } +-// CHECK-LABEL: @vrepli_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vrepli.d(i32 1) +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[TMP0]] to i128 +-// CHECK-NEXT: ret i128 [[TMP1]] +-// +-v2i64 vrepli_d() { return __lsx_vrepli_d(1); } +-// CHECK-LABEL: @vrepli_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vrepli.h(i32 1) +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[TMP0]] to i128 +-// CHECK-NEXT: ret i128 [[TMP1]] +-// +-v8i16 vrepli_h() { return __lsx_vrepli_h(1); } +-// CHECK-LABEL: @vrepli_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vrepli.w(i32 1) +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[TMP0]] to i128 +-// CHECK-NEXT: ret i128 [[TMP1]] +-// +-v4i32 vrepli_w() { return __lsx_vrepli_w(1); } +diff --git a/clang/test/CodeGen/LoongArch/lsx/builtin-approximate-alias.c b/clang/test/CodeGen/LoongArch/lsx/builtin-approximate-alias.c +deleted file mode 100644 +index f26f032c8..000000000 +--- a/clang/test/CodeGen/LoongArch/lsx/builtin-approximate-alias.c ++++ /dev/null +@@ -1,37 +0,0 @@ +-// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py +-// RUN: %clang_cc1 -triple loongarch64 -target-feature +lsx -target-feature +frecipe -O2 -emit-llvm %s -o - | FileCheck %s +- +-#include +- +-// CHECK-LABEL: @vfrecipe_s( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfrecipe.s(<4 x float> [[TMP0]]) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v4f32 vfrecipe_s(v4f32 _1) { return __lsx_vfrecipe_s(_1); } +-// CHECK-LABEL: @vfrecipe_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfrecipe.d(<2 x double> [[TMP0]]) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v2f64 vfrecipe_d(v2f64 _1) { return __lsx_vfrecipe_d(_1); } +-// CHECK-LABEL: @vfrsqrte_s( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfrsqrte.s(<4 x float> [[TMP0]]) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v4f32 vfrsqrte_s(v4f32 _1) { return __lsx_vfrsqrte_s(_1); } +-// CHECK-LABEL: @vfrsqrte_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfrsqrte.d(<2 x double> [[TMP0]]) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v2f64 vfrsqrte_d(v2f64 _1) { return __lsx_vfrsqrte_d(_1); } +diff --git a/clang/test/CodeGen/LoongArch/lsx/builtin-approximate.c b/clang/test/CodeGen/LoongArch/lsx/builtin-approximate.c +index 39fa1663d..171680e6d 100644 +--- a/clang/test/CodeGen/LoongArch/lsx/builtin-approximate.c ++++ b/clang/test/CodeGen/LoongArch/lsx/builtin-approximate.c +@@ -1,38 +1,30 @@ + // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py +-// RUN: %clang_cc1 -triple loongarch64 -target-feature +lsx -target-feature +frecipe -O2 -emit-llvm %s -o - | FileCheck %s ++// RUN: %clang_cc1 -triple loongarch64 -target-feature +lsx -O2 -emit-llvm %s -o - | FileCheck %s + +-typedef float v4f32 __attribute__ ((vector_size(16), aligned(16))); +-typedef double v2f64 __attribute__ ((vector_size(16), aligned(16))); ++typedef float v4f32 __attribute__((vector_size(16), aligned(16))); ++typedef double v2f64 __attribute__((vector_size(16), aligned(16))); + +-// CHECK-LABEL: @vfrecipe_s ++// CHECK-LABEL: @vfrecipe_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfrecipe.s(<4 x float> [[TMP0]]) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfrecipe.s(<4 x float> [[_1:%.*]]) ++// CHECK-NEXT: ret <4 x float> [[TMP0]] + // +-v4f32 vfrecipe_s (v4f32 _1) { return __builtin_lsx_vfrecipe_s (_1); } +-// CHECK-LABEL: @vfrecipe_d ++v4f32 vfrecipe_s(v4f32 _1) { return __builtin_lsx_vfrecipe_s(_1); } ++// CHECK-LABEL: @vfrecipe_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfrecipe.d(<2 x double> [[TMP0]]) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfrecipe.d(<2 x double> [[_1:%.*]]) ++// CHECK-NEXT: ret <2 x double> [[TMP0]] + // +-v2f64 vfrecipe_d (v2f64 _1) { return __builtin_lsx_vfrecipe_d (_1); } +-// CHECK-LABEL: @vfrsqrte_s ++v2f64 vfrecipe_d(v2f64 _1) { return __builtin_lsx_vfrecipe_d(_1); } ++// CHECK-LABEL: @vfrsqrte_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfrsqrte.s(<4 x float> [[TMP0]]) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfrsqrte.s(<4 x float> [[_1:%.*]]) ++// CHECK-NEXT: ret <4 x float> [[TMP0]] + // +-v4f32 vfrsqrte_s (v4f32 _1) { return __builtin_lsx_vfrsqrte_s (_1); } +-// CHECK-LABEL: @vfrsqrte_d ++v4f32 vfrsqrte_s(v4f32 _1) { return __builtin_lsx_vfrsqrte_s(_1); } ++// CHECK-LABEL: @vfrsqrte_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfrsqrte.d(<2 x double> [[TMP0]]) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] ++// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfrsqrte.d(<2 x double> [[_1:%.*]]) ++// CHECK-NEXT: ret <2 x double> [[TMP0]] + // +-v2f64 vfrsqrte_d (v2f64 _1) { return __builtin_lsx_vfrsqrte_d (_1); } ++v2f64 vfrsqrte_d(v2f64 _1) { return __builtin_lsx_vfrsqrte_d(_1); } +diff --git a/clang/test/CodeGen/LoongArch/lsx/builtin-error.c b/clang/test/CodeGen/LoongArch/lsx/builtin-error.c +deleted file mode 100644 +index 3fc5f73f1..000000000 +--- a/clang/test/CodeGen/LoongArch/lsx/builtin-error.c ++++ /dev/null +@@ -1,1382 +0,0 @@ +-// RUN: %clang_cc1 -triple loongarch64 -target-feature +lsx -verify %s +- +-typedef signed char v16i8 __attribute__((vector_size(16), aligned(16))); +-typedef signed char v16i8_b __attribute__((vector_size(16), aligned(1))); +-typedef unsigned char v16u8 __attribute__((vector_size(16), aligned(16))); +-typedef unsigned char v16u8_b __attribute__((vector_size(16), aligned(1))); +-typedef short v8i16 __attribute__((vector_size(16), aligned(16))); +-typedef short v8i16_h __attribute__((vector_size(16), aligned(2))); +-typedef unsigned short v8u16 __attribute__((vector_size(16), aligned(16))); +-typedef unsigned short v8u16_h __attribute__((vector_size(16), aligned(2))); +-typedef int v4i32 __attribute__((vector_size(16), aligned(16))); +-typedef int v4i32_w __attribute__((vector_size(16), aligned(4))); +-typedef unsigned int v4u32 __attribute__((vector_size(16), aligned(16))); +-typedef unsigned int v4u32_w __attribute__((vector_size(16), aligned(4))); +-typedef long long v2i64 __attribute__((vector_size(16), aligned(16))); +-typedef long long v2i64_d __attribute__((vector_size(16), aligned(8))); +-typedef unsigned long long v2u64 __attribute__((vector_size(16), aligned(16))); +-typedef unsigned long long v2u64_d __attribute__((vector_size(16), aligned(8))); +-typedef float v4f32 __attribute__((vector_size(16), aligned(16))); +-typedef float v4f32_w __attribute__((vector_size(16), aligned(4))); +-typedef double v2f64 __attribute__((vector_size(16), aligned(16))); +-typedef double v2f64_d __attribute__((vector_size(16), aligned(8))); +- +-typedef long long __m128i __attribute__((__vector_size__(16), __may_alias__)); +-typedef float __m128 __attribute__((__vector_size__(16), __may_alias__)); +-typedef double __m128d __attribute__((__vector_size__(16), __may_alias__)); +- +-v16i8 vslli_b(v16i8 _1, int var) { +- v16i8 res = __builtin_lsx_vslli_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} +- res |= __builtin_lsx_vslli_b(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} +- res |= __builtin_lsx_vslli_b(_1, var); // expected-error {{argument to '__builtin_lsx_vslli_b' must be a constant integer}} +- return res; +-} +- +-v8i16 vslli_h(v8i16 _1, int var) { +- v8i16 res = __builtin_lsx_vslli_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} +- res |= __builtin_lsx_vslli_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} +- res |= __builtin_lsx_vslli_h(_1, var); // expected-error {{argument to '__builtin_lsx_vslli_h' must be a constant integer}} +- return res; +-} +- +-v4i32 vslli_w(v4i32 _1, int var) { +- v4i32 res = __builtin_lsx_vslli_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} +- res |= __builtin_lsx_vslli_w(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} +- res |= __builtin_lsx_vslli_w(_1, var); // expected-error {{argument to '__builtin_lsx_vslli_w' must be a constant integer}} +- return res; +-} +- +-v2i64 vslli_d(v2i64 _1, int var) { +- v2i64 res = __builtin_lsx_vslli_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} +- res |= __builtin_lsx_vslli_d(_1, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} +- res |= __builtin_lsx_vslli_d(_1, var); // expected-error {{argument to '__builtin_lsx_vslli_d' must be a constant integer}} +- return res; +-} +- +-v16i8 vsrai_b(v16i8 _1, int var) { +- v16i8 res = __builtin_lsx_vsrai_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} +- res |= __builtin_lsx_vsrai_b(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} +- res |= __builtin_lsx_vsrai_b(_1, var); // expected-error {{argument to '__builtin_lsx_vsrai_b' must be a constant integer}} +- return res; +-} +- +-v8i16 vsrai_h(v8i16 _1, int var) { +- v8i16 res = __builtin_lsx_vsrai_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} +- res |= __builtin_lsx_vsrai_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} +- res |= __builtin_lsx_vsrai_h(_1, var); // expected-error {{argument to '__builtin_lsx_vsrai_h' must be a constant integer}} +- return res; +-} +- +-v4i32 vsrai_w(v4i32 _1, int var) { +- v4i32 res = __builtin_lsx_vsrai_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} +- res |= __builtin_lsx_vsrai_w(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} +- res |= __builtin_lsx_vsrai_w(_1, var); // expected-error {{argument to '__builtin_lsx_vsrai_w' must be a constant integer}} +- return res; +-} +- +-v2i64 vsrai_d(v2i64 _1, int var) { +- v2i64 res = __builtin_lsx_vsrai_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} +- res |= __builtin_lsx_vsrai_d(_1, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} +- res |= __builtin_lsx_vsrai_d(_1, var); // expected-error {{argument to '__builtin_lsx_vsrai_d' must be a constant integer}} +- return res; +-} +- +-v16i8 vsrari_b(v16i8 _1, int var) { +- v16i8 res = __builtin_lsx_vsrari_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} +- res |= __builtin_lsx_vsrari_b(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} +- res |= __builtin_lsx_vsrari_b(_1, var); // expected-error {{argument to '__builtin_lsx_vsrari_b' must be a constant integer}} +- return res; +-} +- +-v8i16 vsrari_h(v8i16 _1, int var) { +- v8i16 res = __builtin_lsx_vsrari_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} +- res |= __builtin_lsx_vsrari_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} +- res |= __builtin_lsx_vsrari_h(_1, var); // expected-error {{argument to '__builtin_lsx_vsrari_h' must be a constant integer}} +- return res; +-} +- +-v4i32 vsrari_w(v4i32 _1, int var) { +- v4i32 res = __builtin_lsx_vsrari_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} +- res |= __builtin_lsx_vsrari_w(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} +- res |= __builtin_lsx_vsrari_w(_1, var); // expected-error {{argument to '__builtin_lsx_vsrari_w' must be a constant integer}} +- return res; +-} +- +-v2i64 vsrari_d(v2i64 _1, int var) { +- v2i64 res = __builtin_lsx_vsrari_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} +- res |= __builtin_lsx_vsrari_d(_1, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} +- res |= __builtin_lsx_vsrari_d(_1, var); // expected-error {{argument to '__builtin_lsx_vsrari_d' must be a constant integer}} +- return res; +-} +- +-v16i8 vsrli_b(v16i8 _1, int var) { +- v16i8 res = __builtin_lsx_vsrli_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} +- res |= __builtin_lsx_vsrli_b(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} +- res |= __builtin_lsx_vsrli_b(_1, var); // expected-error {{argument to '__builtin_lsx_vsrli_b' must be a constant integer}} +- return res; +-} +- +-v8i16 vsrli_h(v8i16 _1, int var) { +- v8i16 res = __builtin_lsx_vsrli_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} +- res |= __builtin_lsx_vsrli_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} +- res |= __builtin_lsx_vsrli_h(_1, var); // expected-error {{argument to '__builtin_lsx_vsrli_h' must be a constant integer}} +- return res; +-} +- +-v4i32 vsrli_w(v4i32 _1, int var) { +- v4i32 res = __builtin_lsx_vsrli_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} +- res |= __builtin_lsx_vsrli_w(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} +- res |= __builtin_lsx_vsrli_w(_1, var); // expected-error {{argument to '__builtin_lsx_vsrli_w' must be a constant integer}} +- return res; +-} +- +-v2i64 vsrli_d(v2i64 _1, int var) { +- v2i64 res = __builtin_lsx_vsrli_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} +- res |= __builtin_lsx_vsrli_d(_1, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} +- res |= __builtin_lsx_vsrli_d(_1, var); // expected-error {{argument to '__builtin_lsx_vsrli_d' must be a constant integer}} +- return res; +-} +- +-v16i8 vsrlri_b(v16i8 _1, int var) { +- v16i8 res = __builtin_lsx_vsrlri_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} +- res |= __builtin_lsx_vsrlri_b(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} +- res |= __builtin_lsx_vsrlri_b(_1, var); // expected-error {{argument to '__builtin_lsx_vsrlri_b' must be a constant integer}} +- return res; +-} +- +-v8i16 vsrlri_h(v8i16 _1, int var) { +- v8i16 res = __builtin_lsx_vsrlri_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} +- res |= __builtin_lsx_vsrlri_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} +- res |= __builtin_lsx_vsrlri_h(_1, var); // expected-error {{argument to '__builtin_lsx_vsrlri_h' must be a constant integer}} +- return res; +-} +- +-v4i32 vsrlri_w(v4i32 _1, int var) { +- v4i32 res = __builtin_lsx_vsrlri_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} +- res |= __builtin_lsx_vsrlri_w(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} +- res |= __builtin_lsx_vsrlri_w(_1, var); // expected-error {{argument to '__builtin_lsx_vsrlri_w' must be a constant integer}} +- return res; +-} +- +-v2i64 vsrlri_d(v2i64 _1, int var) { +- v2i64 res = __builtin_lsx_vsrlri_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} +- res |= __builtin_lsx_vsrlri_d(_1, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} +- res |= __builtin_lsx_vsrlri_d(_1, var); // expected-error {{argument to '__builtin_lsx_vsrlri_d' must be a constant integer}} +- return res; +-} +- +-v16u8 vbitclri_b(v16u8 _1, int var) { +- v16u8 res = __builtin_lsx_vbitclri_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} +- res |= __builtin_lsx_vbitclri_b(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} +- res |= __builtin_lsx_vbitclri_b(_1, var); // expected-error {{argument to '__builtin_lsx_vbitclri_b' must be a constant integer}} +- return res; +-} +- +-v8u16 vbitclri_h(v8u16 _1, int var) { +- v8u16 res = __builtin_lsx_vbitclri_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} +- res |= __builtin_lsx_vbitclri_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} +- res |= __builtin_lsx_vbitclri_h(_1, var); // expected-error {{argument to '__builtin_lsx_vbitclri_h' must be a constant integer}} +- return res; +-} +- +-v4u32 vbitclri_w(v4u32 _1, int var) { +- v4u32 res = __builtin_lsx_vbitclri_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} +- res |= __builtin_lsx_vbitclri_w(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} +- res |= __builtin_lsx_vbitclri_w(_1, var); // expected-error {{argument to '__builtin_lsx_vbitclri_w' must be a constant integer}} +- return res; +-} +- +-v2u64 vbitclri_d(v2u64 _1, int var) { +- v2u64 res = __builtin_lsx_vbitclri_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} +- res |= __builtin_lsx_vbitclri_d(_1, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} +- res |= __builtin_lsx_vbitclri_d(_1, var); // expected-error {{argument to '__builtin_lsx_vbitclri_d' must be a constant integer}} +- return res; +-} +- +-v16u8 vbitseti_b(v16u8 _1, int var) { +- v16u8 res = __builtin_lsx_vbitseti_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} +- res |= __builtin_lsx_vbitseti_b(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} +- res |= __builtin_lsx_vbitseti_b(_1, var); // expected-error {{argument to '__builtin_lsx_vbitseti_b' must be a constant integer}} +- return res; +-} +- +-v8u16 vbitseti_h(v8u16 _1, int var) { +- v8u16 res = __builtin_lsx_vbitseti_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} +- res |= __builtin_lsx_vbitseti_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} +- res |= __builtin_lsx_vbitseti_h(_1, var); // expected-error {{argument to '__builtin_lsx_vbitseti_h' must be a constant integer}} +- return res; +-} +- +-v4u32 vbitseti_w(v4u32 _1, int var) { +- v4u32 res = __builtin_lsx_vbitseti_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} +- res |= __builtin_lsx_vbitseti_w(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} +- res |= __builtin_lsx_vbitseti_w(_1, var); // expected-error {{argument to '__builtin_lsx_vbitseti_w' must be a constant integer}} +- return res; +-} +- +-v2u64 vbitseti_d(v2u64 _1, int var) { +- v2u64 res = __builtin_lsx_vbitseti_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} +- res |= __builtin_lsx_vbitseti_d(_1, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} +- res |= __builtin_lsx_vbitseti_d(_1, var); // expected-error {{argument to '__builtin_lsx_vbitseti_d' must be a constant integer}} +- return res; +-} +- +-v16u8 vbitrevi_b(v16u8 _1, int var) { +- v16u8 res = __builtin_lsx_vbitrevi_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} +- res |= __builtin_lsx_vbitrevi_b(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} +- res |= __builtin_lsx_vbitrevi_b(_1, var); // expected-error {{argument to '__builtin_lsx_vbitrevi_b' must be a constant integer}} +- return res; +-} +- +-v8u16 vbitrevi_h(v8u16 _1, int var) { +- v8u16 res = __builtin_lsx_vbitrevi_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} +- res |= __builtin_lsx_vbitrevi_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} +- res |= __builtin_lsx_vbitrevi_h(_1, var); // expected-error {{argument to '__builtin_lsx_vbitrevi_h' must be a constant integer}} +- return res; +-} +- +-v4u32 vbitrevi_w(v4u32 _1, int var) { +- v4u32 res = __builtin_lsx_vbitrevi_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} +- res |= __builtin_lsx_vbitrevi_w(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} +- res |= __builtin_lsx_vbitrevi_w(_1, var); // expected-error {{argument to '__builtin_lsx_vbitrevi_w' must be a constant integer}} +- return res; +-} +- +-v2u64 vbitrevi_d(v2u64 _1, int var) { +- v2u64 res = __builtin_lsx_vbitrevi_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} +- res |= __builtin_lsx_vbitrevi_d(_1, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} +- res |= __builtin_lsx_vbitrevi_d(_1, var); // expected-error {{argument to '__builtin_lsx_vbitrevi_d' must be a constant integer}} +- return res; +-} +- +-v16i8 vaddi_bu(v16i8 _1, int var) { +- v16i8 res = __builtin_lsx_vaddi_bu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} +- res |= __builtin_lsx_vaddi_bu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} +- res |= __builtin_lsx_vaddi_bu(_1, var); // expected-error {{argument to '__builtin_lsx_vaddi_bu' must be a constant integer}} +- return res; +-} +- +-v8i16 vaddi_hu(v8i16 _1, int var) { +- v8i16 res = __builtin_lsx_vaddi_hu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} +- res |= __builtin_lsx_vaddi_hu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} +- res |= __builtin_lsx_vaddi_hu(_1, var); // expected-error {{argument to '__builtin_lsx_vaddi_hu' must be a constant integer}} +- return res; +-} +- +-v4i32 vaddi_wu(v4i32 _1, int var) { +- v4i32 res = __builtin_lsx_vaddi_wu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} +- res |= __builtin_lsx_vaddi_wu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} +- res |= __builtin_lsx_vaddi_wu(_1, var); // expected-error {{argument to '__builtin_lsx_vaddi_wu' must be a constant integer}} +- return res; +-} +- +-v2i64 vaddi_du(v2i64 _1, int var) { +- v2i64 res = __builtin_lsx_vaddi_du(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} +- res |= __builtin_lsx_vaddi_du(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} +- res |= __builtin_lsx_vaddi_du(_1, var); // expected-error {{argument to '__builtin_lsx_vaddi_du' must be a constant integer}} +- return res; +-} +- +-v16i8 vsubi_bu(v16i8 _1, int var) { +- v16i8 res = __builtin_lsx_vsubi_bu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} +- res |= __builtin_lsx_vsubi_bu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} +- res |= __builtin_lsx_vsubi_bu(_1, var); // expected-error {{argument to '__builtin_lsx_vsubi_bu' must be a constant integer}} +- return res; +-} +- +-v8i16 vsubi_hu(v8i16 _1, int var) { +- v8i16 res = __builtin_lsx_vsubi_hu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} +- res |= __builtin_lsx_vsubi_hu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} +- res |= __builtin_lsx_vsubi_hu(_1, var); // expected-error {{argument to '__builtin_lsx_vsubi_hu' must be a constant integer}} +- return res; +-} +- +-v4i32 vsubi_wu(v4i32 _1, int var) { +- v4i32 res = __builtin_lsx_vsubi_wu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} +- res |= __builtin_lsx_vsubi_wu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} +- res |= __builtin_lsx_vsubi_wu(_1, var); // expected-error {{argument to '__builtin_lsx_vsubi_wu' must be a constant integer}} +- return res; +-} +- +-v2i64 vsubi_du(v2i64 _1, int var) { +- v2i64 res = __builtin_lsx_vsubi_du(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} +- res |= __builtin_lsx_vsubi_du(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} +- res |= __builtin_lsx_vsubi_du(_1, var); // expected-error {{argument to '__builtin_lsx_vsubi_du' must be a constant integer}} +- return res; +-} +- +-v16i8 vmaxi_b(v16i8 _1, int var) { +- v16i8 res = __builtin_lsx_vmaxi_b(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} +- res |= __builtin_lsx_vmaxi_b(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} +- res |= __builtin_lsx_vmaxi_b(_1, var); // expected-error {{argument to '__builtin_lsx_vmaxi_b' must be a constant integer}} +- return res; +-} +- +-v8i16 vmaxi_h(v8i16 _1, int var) { +- v8i16 res = __builtin_lsx_vmaxi_h(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} +- res |= __builtin_lsx_vmaxi_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} +- res |= __builtin_lsx_vmaxi_h(_1, var); // expected-error {{argument to '__builtin_lsx_vmaxi_h' must be a constant integer}} +- return res; +-} +- +-v4i32 vmaxi_w(v4i32 _1, int var) { +- v4i32 res = __builtin_lsx_vmaxi_w(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} +- res |= __builtin_lsx_vmaxi_w(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} +- res |= __builtin_lsx_vmaxi_w(_1, var); // expected-error {{argument to '__builtin_lsx_vmaxi_w' must be a constant integer}} +- return res; +-} +- +-v2i64 vmaxi_d(v2i64 _1, int var) { +- v2i64 res = __builtin_lsx_vmaxi_d(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} +- res |= __builtin_lsx_vmaxi_d(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} +- res |= __builtin_lsx_vmaxi_d(_1, var); // expected-error {{argument to '__builtin_lsx_vmaxi_d' must be a constant integer}} +- return res; +-} +- +-v16u8 vmaxi_bu(v16u8 _1, int var) { +- v16u8 res = __builtin_lsx_vmaxi_bu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} +- res |= __builtin_lsx_vmaxi_bu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} +- res |= __builtin_lsx_vmaxi_bu(_1, var); // expected-error {{argument to '__builtin_lsx_vmaxi_bu' must be a constant integer}} +- return res; +-} +- +-v8u16 vmaxi_hu(v8u16 _1, int var) { +- v8u16 res = __builtin_lsx_vmaxi_hu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} +- res |= __builtin_lsx_vmaxi_hu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} +- res |= __builtin_lsx_vmaxi_hu(_1, var); // expected-error {{argument to '__builtin_lsx_vmaxi_hu' must be a constant integer}} +- return res; +-} +- +-v4u32 vmaxi_wu(v4u32 _1, int var) { +- v4u32 res = __builtin_lsx_vmaxi_wu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} +- res |= __builtin_lsx_vmaxi_wu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} +- res |= __builtin_lsx_vmaxi_wu(_1, var); // expected-error {{argument to '__builtin_lsx_vmaxi_wu' must be a constant integer}} +- return res; +-} +- +-v2u64 vmaxi_du(v2u64 _1, int var) { +- v2u64 res = __builtin_lsx_vmaxi_du(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} +- res |= __builtin_lsx_vmaxi_du(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} +- res |= __builtin_lsx_vmaxi_du(_1, var); // expected-error {{argument to '__builtin_lsx_vmaxi_du' must be a constant integer}} +- return res; +-} +- +-v16i8 vmini_b(v16i8 _1, int var) { +- v16i8 res = __builtin_lsx_vmini_b(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} +- res |= __builtin_lsx_vmini_b(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} +- res |= __builtin_lsx_vmini_b(_1, var); // expected-error {{argument to '__builtin_lsx_vmini_b' must be a constant integer}} +- return res; +-} +- +-v8i16 vmini_h(v8i16 _1, int var) { +- v8i16 res = __builtin_lsx_vmini_h(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} +- res |= __builtin_lsx_vmini_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} +- res |= __builtin_lsx_vmini_h(_1, var); // expected-error {{argument to '__builtin_lsx_vmini_h' must be a constant integer}}} +- return res; +-} +- +-v4i32 vmini_w(v4i32 _1, int var) { +- v4i32 res = __builtin_lsx_vmini_w(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} +- res |= __builtin_lsx_vmini_w(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} +- res |= __builtin_lsx_vmini_w(_1, var); // expected-error {{argument to '__builtin_lsx_vmini_w' must be a constant integer}} +- return res; +-} +- +-v2i64 vmini_d(v2i64 _1, int var) { +- v2i64 res = __builtin_lsx_vmini_d(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} +- res |= __builtin_lsx_vmini_d(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} +- res |= __builtin_lsx_vmini_d(_1, var); // expected-error {{argument to '__builtin_lsx_vmini_d' must be a constant integer}} +- return res; +-} +- +-v16u8 vmini_bu(v16u8 _1, int var) { +- v16u8 res = __builtin_lsx_vmini_bu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} +- res |= __builtin_lsx_vmini_bu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} +- res |= __builtin_lsx_vmini_bu(_1, var); // expected-error {{argument to '__builtin_lsx_vmini_bu' must be a constant integer}} +- return res; +-} +- +-v8u16 vmini_hu(v8u16 _1, int var) { +- v8u16 res = __builtin_lsx_vmini_hu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} +- res |= __builtin_lsx_vmini_hu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} +- res |= __builtin_lsx_vmini_hu(_1, var); // expected-error {{argument to '__builtin_lsx_vmini_hu' must be a constant integer}} +- return res; +-} +- +-v4u32 vmini_wu(v4u32 _1, int var) { +- v4u32 res = __builtin_lsx_vmini_wu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} +- res |= __builtin_lsx_vmini_wu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} +- res |= __builtin_lsx_vmini_wu(_1, var); // expected-error {{argument to '__builtin_lsx_vmini_wu' must be a constant integer}} +- return res; +-} +- +-v2u64 vmini_du(v2u64 _1, int var) { +- v2u64 res = __builtin_lsx_vmini_du(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} +- res |= __builtin_lsx_vmini_du(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} +- res |= __builtin_lsx_vmini_du(_1, var); // expected-error {{argument to '__builtin_lsx_vmini_du' must be a constant integer}} +- return res; +-} +- +-v16i8 vseqi_b(v16i8 _1, int var) { +- v16i8 res = __builtin_lsx_vseqi_b(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} +- res |= __builtin_lsx_vseqi_b(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} +- res |= __builtin_lsx_vseqi_b(_1, var); // expected-error {{argument to '__builtin_lsx_vseqi_b' must be a constant integer}} +- return res; +-} +- +-v8i16 vseqi_h(v8i16 _1, int var) { +- v8i16 res = __builtin_lsx_vseqi_h(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} +- res |= __builtin_lsx_vseqi_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} +- res |= __builtin_lsx_vseqi_h(_1, var); // expected-error {{argument to '__builtin_lsx_vseqi_h' must be a constant integer}} +- return res; +-} +- +-v4i32 vseqi_w(v4i32 _1, int var) { +- v4i32 res = __builtin_lsx_vseqi_w(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} +- res |= __builtin_lsx_vseqi_w(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} +- res |= __builtin_lsx_vseqi_w(_1, var); // expected-error {{argument to '__builtin_lsx_vseqi_w' must be a constant integer}} +- return res; +-} +- +-v2i64 vseqi_d(v2i64 _1, int var) { +- v2i64 res = __builtin_lsx_vseqi_d(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} +- res |= __builtin_lsx_vseqi_d(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} +- res |= __builtin_lsx_vseqi_d(_1, var); // expected-error {{argument to '__builtin_lsx_vseqi_d' must be a constant integer}} +- return res; +-} +- +-v16i8 vslti_b(v16i8 _1, int var) { +- v16i8 res = __builtin_lsx_vslti_b(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} +- res |= __builtin_lsx_vslti_b(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} +- res |= __builtin_lsx_vslti_b(_1, var); // expected-error {{argument to '__builtin_lsx_vslti_b' must be a constant integer}} +- return res; +-} +- +-v8i16 vslti_h(v8i16 _1, int var) { +- v8i16 res = __builtin_lsx_vslti_h(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} +- res |= __builtin_lsx_vslti_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} +- res |= __builtin_lsx_vslti_h(_1, var); // expected-error {{argument to '__builtin_lsx_vslti_h' must be a constant integer}} +- return res; +-} +- +-v4i32 vslti_w(v4i32 _1, int var) { +- v4i32 res = __builtin_lsx_vslti_w(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} +- res |= __builtin_lsx_vslti_w(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} +- res |= __builtin_lsx_vslti_w(_1, var); // expected-error {{argument to '__builtin_lsx_vslti_w' must be a constant integer}} +- return res; +-} +- +-v2i64 vslti_d(v2i64 _1, int var) { +- v2i64 res = __builtin_lsx_vslti_d(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} +- res |= __builtin_lsx_vslti_d(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} +- res |= __builtin_lsx_vslti_d(_1, var); // expected-error {{argument to '__builtin_lsx_vslti_d' must be a constant integer}} +- return res; +-} +- +-v16i8 vslti_bu(v16u8 _1, int var) { +- v16i8 res = __builtin_lsx_vslti_bu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} +- res |= __builtin_lsx_vslti_bu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} +- res |= __builtin_lsx_vslti_bu(_1, var); // expected-error {{argument to '__builtin_lsx_vslti_bu' must be a constant integer}} +- return res; +-} +- +-v8i16 vslti_hu(v8u16 _1, int var) { +- v8i16 res = __builtin_lsx_vslti_hu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} +- res |= __builtin_lsx_vslti_hu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} +- res |= __builtin_lsx_vslti_hu(_1, var); // expected-error {{argument to '__builtin_lsx_vslti_hu' must be a constant integer}} +- return res; +-} +- +-v4i32 vslti_wu(v4u32 _1, int var) { +- v4i32 res = __builtin_lsx_vslti_wu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} +- res |= __builtin_lsx_vslti_wu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} +- res |= __builtin_lsx_vslti_wu(_1, var); // expected-error {{argument to '__builtin_lsx_vslti_wu' must be a constant integer}} +- return res; +-} +- +-v2i64 vslti_du(v2u64 _1, int var) { +- v2i64 res = __builtin_lsx_vslti_du(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} +- res |= __builtin_lsx_vslti_du(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} +- res |= __builtin_lsx_vslti_du(_1, var); // expected-error {{argument to '__builtin_lsx_vslti_du' must be a constant integer}} +- return res; +-} +- +-v16i8 vslei_b(v16i8 _1, int var) { +- v16i8 res = __builtin_lsx_vslei_b(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} +- res |= __builtin_lsx_vslei_b(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} +- res |= __builtin_lsx_vslei_b(_1, var); // expected-error {{argument to '__builtin_lsx_vslei_b' must be a constant integer}} +- return res; +-} +- +-v8i16 vslei_h(v8i16 _1, int var) { +- v8i16 res = __builtin_lsx_vslei_h(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} +- res |= __builtin_lsx_vslei_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} +- res |= __builtin_lsx_vslei_h(_1, var); // expected-error {{argument to '__builtin_lsx_vslei_h' must be a constant integer}} +- return res; +-} +- +-v4i32 vslei_w(v4i32 _1, int var) { +- v4i32 res = __builtin_lsx_vslei_w(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} +- res |= __builtin_lsx_vslei_w(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} +- res |= __builtin_lsx_vslei_w(_1, var); // expected-error {{argument to '__builtin_lsx_vslei_w' must be a constant integer}} +- return res; +-} +- +-v2i64 vslei_d(v2i64 _1, int var) { +- v2i64 res = __builtin_lsx_vslei_d(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} +- res |= __builtin_lsx_vslei_d(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} +- res |= __builtin_lsx_vslei_d(_1, var); // expected-error {{argument to '__builtin_lsx_vslei_d' must be a constant integer}} +- return res; +-} +- +-v16i8 vslei_bu(v16u8 _1, int var) { +- v16i8 res = __builtin_lsx_vslei_bu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} +- res |= __builtin_lsx_vslei_bu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} +- res |= __builtin_lsx_vslei_bu(_1, var); // expected-error {{argument to '__builtin_lsx_vslei_bu' must be a constant integer}} +- return res; +-} +- +-v8i16 vslei_hu(v8u16 _1, int var) { +- v8i16 res = __builtin_lsx_vslei_hu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} +- res |= __builtin_lsx_vslei_hu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} +- res |= __builtin_lsx_vslei_hu(_1, var); // expected-error {{argument to '__builtin_lsx_vslei_hu' must be a constant integer}} +- return res; +-} +- +-v4i32 vslei_wu(v4u32 _1, int var) { +- v4i32 res = __builtin_lsx_vslei_wu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} +- res |= __builtin_lsx_vslei_wu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} +- res |= __builtin_lsx_vslei_wu(_1, var); // expected-error {{argument to '__builtin_lsx_vslei_wu' must be a constant integer}} +- return res; +-} +- +-v2i64 vslei_du(v2u64 _1, int var) { +- v2i64 res = __builtin_lsx_vslei_du(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} +- res |= __builtin_lsx_vslei_du(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} +- res |= __builtin_lsx_vslei_du(_1, var); // expected-error {{argument to '__builtin_lsx_vslei_du' must be a constant integer}} +- return res; +-} +- +-v16i8 vsat_b(v16i8 _1, int var) { +- v16i8 res = __builtin_lsx_vsat_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} +- res |= __builtin_lsx_vsat_b(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} +- res |= __builtin_lsx_vsat_b(_1, var); // expected-error {{argument to '__builtin_lsx_vsat_b' must be a constant integer}} +- return res; +-} +- +-v8i16 vsat_h(v8i16 _1, int var) { +- v8i16 res = __builtin_lsx_vsat_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} +- res |= __builtin_lsx_vsat_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} +- res |= __builtin_lsx_vsat_h(_1, var); // expected-error {{argument to '__builtin_lsx_vsat_h' must be a constant integer}} +- return res; +-} +- +-v4i32 vsat_w(v4i32 _1, int var) { +- v4i32 res = __builtin_lsx_vsat_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} +- res |= __builtin_lsx_vsat_w(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} +- res |= __builtin_lsx_vsat_w(_1, var); // expected-error {{argument to '__builtin_lsx_vsat_w' must be a constant integer}} +- return res; +-} +- +-v2i64 vsat_d(v2i64 _1, int var) { +- v2i64 res = __builtin_lsx_vsat_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} +- res |= __builtin_lsx_vsat_d(_1, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} +- res |= __builtin_lsx_vsat_d(_1, var); // expected-error {{argument to '__builtin_lsx_vsat_d' must be a constant integer}} +- return res; +-} +- +-v16u8 vsat_bu(v16u8 _1, int var) { +- v16u8 res = __builtin_lsx_vsat_bu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} +- res |= __builtin_lsx_vsat_bu(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} +- res |= __builtin_lsx_vsat_bu(_1, var); // expected-error {{argument to '__builtin_lsx_vsat_bu' must be a constant integer}} +- return res; +-} +- +-v8u16 vsat_hu(v8u16 _1, int var) { +- v8u16 res = __builtin_lsx_vsat_hu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} +- res |= __builtin_lsx_vsat_hu(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} +- res |= __builtin_lsx_vsat_hu(_1, var); // expected-error {{argument to '__builtin_lsx_vsat_hu' must be a constant integer}} +- return res; +-} +- +-v4u32 vsat_wu(v4u32 _1, int var) { +- v4u32 res = __builtin_lsx_vsat_wu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} +- res |= __builtin_lsx_vsat_wu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} +- res |= __builtin_lsx_vsat_wu(_1, var); // expected-error {{argument to '__builtin_lsx_vsat_wu' must be a constant integer}} +- return res; +-} +- +-v2u64 vsat_du(v2u64 _1, int var) { +- v2u64 res = __builtin_lsx_vsat_du(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} +- res |= __builtin_lsx_vsat_du(_1, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} +- res |= __builtin_lsx_vsat_du(_1, var); // expected-error {{argument to '__builtin_lsx_vsat_du' must be a constant integer}} +- return res; +-} +- +-v16i8 vreplvei_b(v16i8 _1, int var) { +- v16i8 res = __builtin_lsx_vreplvei_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} +- res |= __builtin_lsx_vreplvei_b(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} +- res |= __builtin_lsx_vreplvei_b(_1, var); // expected-error {{argument to '__builtin_lsx_vreplvei_b' must be a constant integer}} +- return res; +-} +- +-v8i16 vreplvei_h(v8i16 _1, int var) { +- v8i16 res = __builtin_lsx_vreplvei_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} +- res |= __builtin_lsx_vreplvei_h(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} +- res |= __builtin_lsx_vreplvei_h(_1, var); // expected-error {{argument to '__builtin_lsx_vreplvei_h' must be a constant integer}} +- return res; +-} +- +-v4i32 vreplvei_w(v4i32 _1, int var) { +- v4i32 res = __builtin_lsx_vreplvei_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 3]}} +- res |= __builtin_lsx_vreplvei_w(_1, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}} +- res |= __builtin_lsx_vreplvei_w(_1, var); // expected-error {{argument to '__builtin_lsx_vreplvei_w' must be a constant integer}} +- return res; +-} +- +-v2i64 vreplvei_d(v2i64 _1, int var) { +- v2i64 res = __builtin_lsx_vreplvei_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 1]}} +- res |= __builtin_lsx_vreplvei_d(_1, 2); // expected-error {{argument value 2 is outside the valid range [0, 1]}} +- res |= __builtin_lsx_vreplvei_d(_1, var); // expected-error {{argument to '__builtin_lsx_vreplvei_d' must be a constant integer}} +- return res; +-} +- +-v16u8 vandi_b(v16u8 _1, int var) { +- v16u8 res = __builtin_lsx_vandi_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} +- res |= __builtin_lsx_vandi_b(_1, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} +- res |= __builtin_lsx_vandi_b(_1, var); // expected-error {{argument to '__builtin_lsx_vandi_b' must be a constant integer}} +- return res; +-} +- +-v16u8 vori_b(v16u8 _1, int var) { +- v16u8 res = __builtin_lsx_vori_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} +- res |= __builtin_lsx_vori_b(_1, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} +- res |= __builtin_lsx_vori_b(_1, var); // expected-error {{argument to '__builtin_lsx_vori_b' must be a constant integer}} +- return res; +-} +- +-v16u8 vnori_b(v16u8 _1, int var) { +- v16u8 res = __builtin_lsx_vnori_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} +- res |= __builtin_lsx_vnori_b(_1, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} +- res |= __builtin_lsx_vnori_b(_1, var); // expected-error {{argument to '__builtin_lsx_vnori_b' must be a constant integer}} +- return res; +-} +- +-v16u8 vxori_b(v16u8 _1, int var) { +- v16u8 res = __builtin_lsx_vxori_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} +- res |= __builtin_lsx_vxori_b(_1, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} +- res |= __builtin_lsx_vxori_b(_1, var); // expected-error {{argument to '__builtin_lsx_vxori_b' must be a constant integer}} +- return res; +-} +- +-v16u8 vbitseli_b(v16u8 _1, v16u8 _2, int var) { +- v16u8 res = __builtin_lsx_vbitseli_b(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} +- res |= __builtin_lsx_vbitseli_b(_1, _2, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} +- res |= __builtin_lsx_vbitseli_b(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vbitseli_b' must be a constant integer}} +- return res; +-} +- +-v16i8 vshuf4i_b(v16i8 _1, int var) { +- v16i8 res = __builtin_lsx_vshuf4i_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} +- res |= __builtin_lsx_vshuf4i_b(_1, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} +- res |= __builtin_lsx_vshuf4i_b(_1, var); // expected-error {{argument to '__builtin_lsx_vshuf4i_b' must be a constant integer}} +- return res; +-} +- +-v8i16 vshuf4i_h(v8i16 _1, int var) { +- v8i16 res = __builtin_lsx_vshuf4i_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} +- res |= __builtin_lsx_vshuf4i_h(_1, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} +- res |= __builtin_lsx_vshuf4i_h(_1, var); // expected-error {{argument to '__builtin_lsx_vshuf4i_h' must be a constant integer}} +- return res; +-} +- +-v4i32 vshuf4i_w(v4i32 _1, int var) { +- v4i32 res = __builtin_lsx_vshuf4i_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} +- res |= __builtin_lsx_vshuf4i_w(_1, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} +- res |= __builtin_lsx_vshuf4i_w(_1, var); // expected-error {{argument to '__builtin_lsx_vshuf4i_w' must be a constant integer}} +- return res; +-} +- +-int vpickve2gr_b(v16i8 _1, int var) { +- int res = __builtin_lsx_vpickve2gr_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} +- res |= __builtin_lsx_vpickve2gr_b(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} +- res |= __builtin_lsx_vpickve2gr_b(_1, var); // expected-error {{argument to '__builtin_lsx_vpickve2gr_b' must be a constant integer}} +- return res; +-} +- +-int vpickve2gr_h(v8i16 _1, int var) { +- int res = __builtin_lsx_vpickve2gr_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} +- res |= __builtin_lsx_vpickve2gr_h(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} +- res |= __builtin_lsx_vpickve2gr_h(_1, var); // expected-error {{argument to '__builtin_lsx_vpickve2gr_h' must be a constant integer}} +- return res; +-} +- +-int vpickve2gr_w(v4i32 _1, int var) { +- int res = __builtin_lsx_vpickve2gr_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 3]}} +- res |= __builtin_lsx_vpickve2gr_w(_1, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}} +- res |= __builtin_lsx_vpickve2gr_w(_1, var); // expected-error {{argument to '__builtin_lsx_vpickve2gr_w' must be a constant integer}} +- return res; +-} +- +-long vpickve2gr_d(v2i64 _1, int var) { +- long res = __builtin_lsx_vpickve2gr_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 1]}} +- res |= __builtin_lsx_vpickve2gr_d(_1, 2); // expected-error {{argument value 2 is outside the valid range [0, 1]}} +- res |= __builtin_lsx_vpickve2gr_d(_1, var); // expected-error {{argument to '__builtin_lsx_vpickve2gr_d' must be a constant integer}} +- return res; +-} +- +-unsigned int vpickve2gr_bu(v16i8 _1, int var) { +- unsigned int res = __builtin_lsx_vpickve2gr_bu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} +- res |= __builtin_lsx_vpickve2gr_bu(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} +- res |= __builtin_lsx_vpickve2gr_bu(_1, var); // expected-error {{argument to '__builtin_lsx_vpickve2gr_bu' must be a constant integer}} +- return res; +-} +- +-unsigned int vpickve2gr_hu(v8i16 _1, int var) { +- unsigned int res = __builtin_lsx_vpickve2gr_hu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} +- res |= __builtin_lsx_vpickve2gr_hu(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} +- res |= __builtin_lsx_vpickve2gr_hu(_1, var); // expected-error {{argument to '__builtin_lsx_vpickve2gr_hu' must be a constant integer}} +- return res; +-} +- +-unsigned int vpickve2gr_wu(v4i32 _1, int var) { +- unsigned int res = __builtin_lsx_vpickve2gr_wu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 3]}} +- res |= __builtin_lsx_vpickve2gr_wu(_1, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}} +- res |= __builtin_lsx_vpickve2gr_wu(_1, var); // expected-error {{argument to '__builtin_lsx_vpickve2gr_wu' must be a constant integer}} +- return res; +-} +- +-unsigned long int vpickve2gr_du(v2i64 _1, int var) { +- unsigned long int res = __builtin_lsx_vpickve2gr_du(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 1]}} +- res |= __builtin_lsx_vpickve2gr_du(_1, 2); // expected-error {{argument value 2 is outside the valid range [0, 1]}} +- res |= __builtin_lsx_vpickve2gr_du(_1, var); // expected-error {{argument to '__builtin_lsx_vpickve2gr_du' must be a constant integer}} +- return res; +-} +- +-v16i8 vinsgr2vr_b(v16i8 _1, int var) { +- v16i8 res = __builtin_lsx_vinsgr2vr_b(_1, 1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} +- res |= __builtin_lsx_vinsgr2vr_b(_1, 1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} +- res |= __builtin_lsx_vinsgr2vr_b(_1, 1, var); // expected-error {{argument to '__builtin_lsx_vinsgr2vr_b' must be a constant integer}} +- return res; +-} +- +-v8i16 vinsgr2vr_h(v8i16 _1, int var) { +- v8i16 res = __builtin_lsx_vinsgr2vr_h(_1, 1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} +- res |= __builtin_lsx_vinsgr2vr_h(_1, 1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} +- res |= __builtin_lsx_vinsgr2vr_h(_1, 1, var); // expected-error {{argument to '__builtin_lsx_vinsgr2vr_h' must be a constant integer}} +- return res; +-} +- +-v4i32 vinsgr2vr_w(v4i32 _1, int var) { +- v4i32 res = __builtin_lsx_vinsgr2vr_w(_1, 1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 3]}} +- res |= __builtin_lsx_vinsgr2vr_w(_1, 1, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}} +- res |= __builtin_lsx_vinsgr2vr_w(_1, 1, var); // expected-error {{argument to '__builtin_lsx_vinsgr2vr_w' must be a constant integer}} +- return res; +-} +- +-v2i64 vinsgr2vr_d(v2i64 _1, int var) { +- v2i64 res = __builtin_lsx_vinsgr2vr_d(_1, 1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 1]}} +- res |= __builtin_lsx_vinsgr2vr_d(_1, 1, 2); // expected-error {{argument value 2 is outside the valid range [0, 1]}} +- res |= __builtin_lsx_vinsgr2vr_d(_1, 1, var); // expected-error {{argument to '__builtin_lsx_vinsgr2vr_d' must be a constant integer}} +- return res; +-} +- +-v8i16 vsllwil_h_b(v16i8 _1, int var) { +- v8i16 res = __builtin_lsx_vsllwil_h_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} +- res |= __builtin_lsx_vsllwil_h_b(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} +- res |= __builtin_lsx_vsllwil_h_b(_1, var); // expected-error {{argument to '__builtin_lsx_vsllwil_h_b' must be a constant integer}} +- return res; +-} +- +-v4i32 vsllwil_w_h(v8i16 _1, int var) { +- v4i32 res = __builtin_lsx_vsllwil_w_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} +- res |= __builtin_lsx_vsllwil_w_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} +- res |= __builtin_lsx_vsllwil_w_h(_1, var); // expected-error {{argument to '__builtin_lsx_vsllwil_w_h' must be a constant integer}} +- return res; +-} +- +-v2i64 vsllwil_d_w(v4i32 _1, int var) { +- v2i64 res = __builtin_lsx_vsllwil_d_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} +- res |= __builtin_lsx_vsllwil_d_w(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} +- res |= __builtin_lsx_vsllwil_d_w(_1, var); // expected-error {{argument to '__builtin_lsx_vsllwil_d_w' must be a constant integer}} +- return res; +-} +- +-v8u16 vsllwil_hu_bu(v16u8 _1, int var) { +- v8u16 res = __builtin_lsx_vsllwil_hu_bu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} +- res |= __builtin_lsx_vsllwil_hu_bu(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} +- res |= __builtin_lsx_vsllwil_hu_bu(_1, var); // expected-error {{argument to '__builtin_lsx_vsllwil_hu_bu' must be a constant integer}} +- return res; +-} +- +-v4u32 vsllwil_wu_hu(v8u16 _1, int var) { +- v4u32 res = __builtin_lsx_vsllwil_wu_hu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} +- res |= __builtin_lsx_vsllwil_wu_hu(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} +- res |= __builtin_lsx_vsllwil_wu_hu(_1, var); // expected-error {{argument to '__builtin_lsx_vsllwil_wu_hu' must be a constant integer}} +- return res; +-} +- +-v2u64 vsllwil_du_wu(v4u32 _1, int var) { +- v2u64 res = __builtin_lsx_vsllwil_du_wu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} +- res |= __builtin_lsx_vsllwil_du_wu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} +- res |= __builtin_lsx_vsllwil_du_wu(_1, var); // expected-error {{argument to '__builtin_lsx_vsllwil_du_wu' must be a constant integer}} +- return res; +-} +- +-v16i8 vfrstpi_b(v16i8 _1, v16i8 _2, int var) { +- v16i8 res = __builtin_lsx_vfrstpi_b(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} +- res |= __builtin_lsx_vfrstpi_b(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} +- res |= __builtin_lsx_vfrstpi_b(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vfrstpi_b' must be a constant integer}} +- return res; +-} +- +-v8i16 vfrstpi_h(v8i16 _1, v8i16 _2, int var) { +- v8i16 res = __builtin_lsx_vfrstpi_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} +- res |= __builtin_lsx_vfrstpi_h(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} +- res |= __builtin_lsx_vfrstpi_h(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vfrstpi_h' must be a constant integer}} +- return res; +-} +- +-v2i64 vshuf4i_d(v2i64 _1, v2i64 _2, int var) { +- v2i64 res = __builtin_lsx_vshuf4i_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} +- res |= __builtin_lsx_vshuf4i_d(_1, _2, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} +- res |= __builtin_lsx_vshuf4i_d(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vshuf4i_d' must be a constant integer}} +- return res; +-} +- +-v16i8 vbsrl_v(v16i8 _1, int var) { +- v16i8 res = __builtin_lsx_vbsrl_v(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} +- res |= __builtin_lsx_vbsrl_v(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} +- res |= __builtin_lsx_vbsrl_v(_1, var); // expected-error {{argument to '__builtin_lsx_vbsrl_v' must be a constant integer}} +- return res; +-} +- +-v16i8 vbsll_v(v16i8 _1, int var) { +- v16i8 res = __builtin_lsx_vbsll_v(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} +- res |= __builtin_lsx_vbsll_v(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} +- res |= __builtin_lsx_vbsll_v(_1, var); // expected-error {{argument to '__builtin_lsx_vbsll_v' must be a constant integer}} +- return res; +-} +- +-v16i8 vextrins_b(v16i8 _1, v16i8 _2, int var) { +- v16i8 res = __builtin_lsx_vextrins_b(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} +- res |= __builtin_lsx_vextrins_b(_1, _2, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} +- res |= __builtin_lsx_vextrins_b(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vextrins_b' must be a constant integer}} +- return res; +-} +- +-v8i16 vextrins_h(v8i16 _1, v8i16 _2, int var) { +- v8i16 res = __builtin_lsx_vextrins_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} +- res |= __builtin_lsx_vextrins_h(_1, _2, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} +- res |= __builtin_lsx_vextrins_h(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vextrins_h' must be a constant integer}} +- return res; +-} +- +-v4i32 vextrins_w(v4i32 _1, v4i32 _2, int var) { +- v4i32 res = __builtin_lsx_vextrins_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} +- res |= __builtin_lsx_vextrins_w(_1, _2, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} +- res |= __builtin_lsx_vextrins_w(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vextrins_w' must be a constant integer}} +- return res; +-} +- +-v2i64 vextrins_d(v2i64 _1, v2i64 _2, int var) { +- v2i64 res = __builtin_lsx_vextrins_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} +- res |= __builtin_lsx_vextrins_d(_1, _2, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} +- res |= __builtin_lsx_vextrins_d(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vextrins_d' must be a constant integer}} +- return res; +-} +- +-void vstelm_b_idx(v16i8 _1, void *_2, int var) { +- __builtin_lsx_vstelm_b(_1, _2, 1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} +- __builtin_lsx_vstelm_b(_1, _2, 1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} +- __builtin_lsx_vstelm_b(_1, _2, 1, var); // expected-error {{argument to '__builtin_lsx_vstelm_b' must be a constant integer}} +-} +- +-void vstelm_h_idx(v8i16 _1, void *_2, int var) { +- __builtin_lsx_vstelm_h(_1, _2, 2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} +- __builtin_lsx_vstelm_h(_1, _2, 2, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} +- __builtin_lsx_vstelm_h(_1, _2, 2, var); // expected-error {{argument to '__builtin_lsx_vstelm_h' must be a constant integer}} +-} +- +-void vstelm_w_idx(v4i32 _1, void *_2, int var) { +- __builtin_lsx_vstelm_w(_1, _2, 4, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 3]}} +- __builtin_lsx_vstelm_w(_1, _2, 4, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}} +- __builtin_lsx_vstelm_w(_1, _2, 4, var); // expected-error {{argument to '__builtin_lsx_vstelm_w' must be a constant integer}} +-} +- +-void vstelm_d_idx(v2i64 _1, void *_2, int var) { +- __builtin_lsx_vstelm_d(_1, _2, 8, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 1]}} +- __builtin_lsx_vstelm_d(_1, _2, 8, 2); // expected-error {{argument value 2 is outside the valid range [0, 1]}} +- __builtin_lsx_vstelm_d(_1, _2, 8, var); // expected-error {{argument to '__builtin_lsx_vstelm_d' must be a constant integer}} +-} +- +-void vstelm_b(v16i8 _1, void *_2, int var) { +- __builtin_lsx_vstelm_b(_1, _2, -129, 1); // expected-error {{argument value -129 is outside the valid range [-128, 127]}} +- __builtin_lsx_vstelm_b(_1, _2, 128, 1); // expected-error {{argument value 128 is outside the valid range [-128, 127]}} +- __builtin_lsx_vstelm_b(_1, _2, var, 1); // expected-error {{argument to '__builtin_lsx_vstelm_b' must be a constant integer}} +-} +- +-void vstelm_h(v8i16 _1, void *_2, int var) { +- __builtin_lsx_vstelm_h(_1, _2, -258, 1); // expected-error {{argument value -258 is outside the valid range [-256, 254]}} +- __builtin_lsx_vstelm_h(_1, _2, 256, 1); // expected-error {{argument value 256 is outside the valid range [-256, 254]}} +- __builtin_lsx_vstelm_h(_1, _2, var, 1); // expected-error {{argument to '__builtin_lsx_vstelm_h' must be a constant integer}} +-} +- +-void vstelm_w(v4i32 _1, void *_2, int var) { +- __builtin_lsx_vstelm_w(_1, _2, -516, 1); // expected-error {{argument value -516 is outside the valid range [-512, 508]}} +- __builtin_lsx_vstelm_w(_1, _2, 512, 1); // expected-error {{argument value 512 is outside the valid range [-512, 508]}} +- __builtin_lsx_vstelm_w(_1, _2, var, 1); // expected-error {{argument to '__builtin_lsx_vstelm_w' must be a constant integer}} +-} +- +-void vstelm_d(v2i64 _1, void *_2, int var) { +- __builtin_lsx_vstelm_d(_1, _2, -1032, 1); // expected-error {{argument value -1032 is outside the valid range [-1024, 1016]}} +- __builtin_lsx_vstelm_d(_1, _2, 1024, 1); // expected-error {{argument value 1024 is outside the valid range [-1024, 1016]}} +- __builtin_lsx_vstelm_d(_1, _2, var, 1); // expected-error {{argument to '__builtin_lsx_vstelm_d' must be a constant integer}} +-} +- +-v16i8 vldrepl_b(void *_1, int var) { +- v16i8 res = __builtin_lsx_vldrepl_b(_1, -2049); // expected-error {{argument value -2049 is outside the valid range [-2048, 2047]}} +- res |= __builtin_lsx_vldrepl_b(_1, 2048); // expected-error {{argument value 2048 is outside the valid range [-2048, 2047]}} +- res |= __builtin_lsx_vldrepl_b(_1, var); // expected-error {{argument to '__builtin_lsx_vldrepl_b' must be a constant integer}} +- return res; +-} +- +-v8i16 vldrepl_h(void *_1, int var) { +- v8i16 res = __builtin_lsx_vldrepl_h(_1, -2050); // expected-error {{argument value -2050 is outside the valid range [-2048, 2046]}} +- res |= __builtin_lsx_vldrepl_h(_1, 2048); // expected-error {{argument value 2048 is outside the valid range [-2048, 2046]}} +- res |= __builtin_lsx_vldrepl_h(_1, var); // expected-error {{argument to '__builtin_lsx_vldrepl_h' must be a constant integer}} +- return res; +-} +- +-v4i32 vldrepl_w(void *_1, int var) { +- v4i32 res = __builtin_lsx_vldrepl_w(_1, -2052); // expected-error {{argument value -2052 is outside the valid range [-2048, 2044]}} +- res |= __builtin_lsx_vldrepl_w(_1, 2048); // expected-error {{argument value 2048 is outside the valid range [-2048, 2044]}} +- res |= __builtin_lsx_vldrepl_w(_1, var); // expected-error {{argument to '__builtin_lsx_vldrepl_w' must be a constant integer}} +- return res; +-} +- +-v2i64 vldrepl_d(void *_1, int var) { +- v2i64 res = __builtin_lsx_vldrepl_d(_1, -2056); // expected-error {{argument value -2056 is outside the valid range [-2048, 2040]}} +- res |= __builtin_lsx_vldrepl_d(_1, 2048); // expected-error {{argument value 2048 is outside the valid range [-2048, 2040]}} +- res |= __builtin_lsx_vldrepl_d(_1, var); // expected-error {{argument to '__builtin_lsx_vldrepl_d' must be a constant integer}} +- return res; +-} +- +-v16i8 vrotri_b(v16i8 _1, int var) { +- v16i8 res = __builtin_lsx_vrotri_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} +- res |= __builtin_lsx_vrotri_b(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} +- res |= __builtin_lsx_vrotri_b(_1, var); // expected-error {{argument to '__builtin_lsx_vrotri_b' must be a constant integer}} +- return res; +-} +- +-v8i16 vrotri_h(v8i16 _1, int var) { +- v8i16 res = __builtin_lsx_vrotri_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} +- res |= __builtin_lsx_vrotri_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} +- res |= __builtin_lsx_vrotri_h(_1, var); // expected-error {{argument to '__builtin_lsx_vrotri_h' must be a constant integer}} +- return res; +-} +- +-v4i32 vrotri_w(v4i32 _1, int var) { +- v4i32 res = __builtin_lsx_vrotri_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} +- res |= __builtin_lsx_vrotri_w(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} +- res |= __builtin_lsx_vrotri_w(_1, var); // expected-error {{argument to '__builtin_lsx_vrotri_w' must be a constant integer}} +- return res; +-} +- +-v2i64 vrotri_d(v2i64 _1, int var) { +- v2i64 res = __builtin_lsx_vrotri_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} +- res |= __builtin_lsx_vrotri_d(_1, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} +- res |= __builtin_lsx_vrotri_d(_1, var); // expected-error {{argument to '__builtin_lsx_vrotri_d' must be a constant integer}} +- return res; +-} +- +-v16i8 vsrlni_b_h(v16i8 _1, v16i8 _2, int var) { +- v16i8 res = __builtin_lsx_vsrlni_b_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} +- res |= __builtin_lsx_vsrlni_b_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} +- res |= __builtin_lsx_vsrlni_b_h(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vsrlni_b_h' must be a constant integer}} +- return res; +-} +- +-v8i16 vsrlni_h_w(v8i16 _1, v8i16 _2, int var) { +- v8i16 res = __builtin_lsx_vsrlni_h_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} +- res |= __builtin_lsx_vsrlni_h_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} +- res |= __builtin_lsx_vsrlni_h_w(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vsrlni_h_w' must be a constant integer}} +- return res; +-} +- +-v4i32 vsrlni_w_d(v4i32 _1, v4i32 _2, int var) { +- v4i32 res = __builtin_lsx_vsrlni_w_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} +- res |= __builtin_lsx_vsrlni_w_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} +- res |= __builtin_lsx_vsrlni_w_d(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vsrlni_w_d' must be a constant integer}} +- return res; +-} +- +-v2i64 vsrlni_d_q(v2i64 _1, v2i64 _2, int var) { +- v2i64 res = __builtin_lsx_vsrlni_d_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} +- res |= __builtin_lsx_vsrlni_d_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} +- res |= __builtin_lsx_vsrlni_d_q(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vsrlni_d_q' must be a constant integer}} +- return res; +-} +- +-v16i8 vsrlrni_b_h(v16i8 _1, v16i8 _2, int var) { +- v16i8 res = __builtin_lsx_vsrlrni_b_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} +- res |= __builtin_lsx_vsrlrni_b_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} +- res |= __builtin_lsx_vsrlrni_b_h(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vsrlrni_b_h' must be a constant integer}} +- return res; +-} +- +-v8i16 vsrlrni_h_w(v8i16 _1, v8i16 _2, int var) { +- v8i16 res = __builtin_lsx_vsrlrni_h_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} +- res |= __builtin_lsx_vsrlrni_h_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} +- res |= __builtin_lsx_vsrlrni_h_w(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vsrlrni_h_w' must be a constant integer}} +- return res; +-} +- +-v4i32 vsrlrni_w_d(v4i32 _1, v4i32 _2, int var) { +- v4i32 res = __builtin_lsx_vsrlrni_w_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} +- res |= __builtin_lsx_vsrlrni_w_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} +- res |= __builtin_lsx_vsrlrni_w_d(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vsrlrni_w_d' must be a constant integer}} +- return res; +-} +- +-v2i64 vsrlrni_d_q(v2i64 _1, v2i64 _2, int var) { +- v2i64 res = __builtin_lsx_vsrlrni_d_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} +- res |= __builtin_lsx_vsrlrni_d_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} +- res |= __builtin_lsx_vsrlrni_d_q(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vsrlrni_d_q' must be a constant integer}} +- return res; +-} +- +-v16i8 vssrlni_b_h(v16i8 _1, v16i8 _2, int var) { +- v16i8 res = __builtin_lsx_vssrlni_b_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} +- res |= __builtin_lsx_vssrlni_b_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} +- res |= __builtin_lsx_vssrlni_b_h(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrlni_b_h' must be a constant integer}} +- return res; +-} +- +-v8i16 vssrlni_h_w(v8i16 _1, v8i16 _2, int var) { +- v8i16 res = __builtin_lsx_vssrlni_h_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} +- res |= __builtin_lsx_vssrlni_h_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} +- res |= __builtin_lsx_vssrlni_h_w(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrlni_h_w' must be a constant integer}} +- return res; +-} +- +-v4i32 vssrlni_w_d(v4i32 _1, v4i32 _2, int var) { +- v4i32 res = __builtin_lsx_vssrlni_w_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} +- res |= __builtin_lsx_vssrlni_w_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} +- res |= __builtin_lsx_vssrlni_w_d(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrlni_w_d' must be a constant integer}} +- return res; +-} +- +-v2i64 vssrlni_d_q(v2i64 _1, v2i64 _2, int var) { +- v2i64 res = __builtin_lsx_vssrlni_d_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} +- res |= __builtin_lsx_vssrlni_d_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} +- res |= __builtin_lsx_vssrlni_d_q(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrlni_d_q' must be a constant integer}} +- return res; +-} +- +-v16u8 vssrlni_bu_h(v16u8 _1, v16i8 _2, int var) { +- v16u8 res = __builtin_lsx_vssrlni_bu_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} +- res |= __builtin_lsx_vssrlni_bu_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} +- res |= __builtin_lsx_vssrlni_bu_h(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrlni_bu_h' must be a constant integer}} +- return res; +-} +- +-v8u16 vssrlni_hu_w(v8u16 _1, v8i16 _2, int var) { +- v8u16 res = __builtin_lsx_vssrlni_hu_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} +- res |= __builtin_lsx_vssrlni_hu_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} +- res |= __builtin_lsx_vssrlni_hu_w(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrlni_hu_w' must be a constant integer}} +- return res; +-} +- +-v4u32 vssrlni_wu_d(v4u32 _1, v4i32 _2, int var) { +- v4u32 res = __builtin_lsx_vssrlni_wu_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} +- res |= __builtin_lsx_vssrlni_wu_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} +- res |= __builtin_lsx_vssrlni_wu_d(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrlni_wu_d' must be a constant integer}} +- return res; +-} +- +-v2u64 vssrlni_du_q(v2u64 _1, v2i64 _2, int var) { +- v2u64 res = __builtin_lsx_vssrlni_du_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} +- res |= __builtin_lsx_vssrlni_du_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} +- res |= __builtin_lsx_vssrlni_du_q(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrlni_du_q' must be a constant integer}} +- return res; +-} +- +-v16i8 vssrlrni_b_h(v16i8 _1, v16i8 _2, int var) { +- v16i8 res = __builtin_lsx_vssrlrni_b_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} +- res |= __builtin_lsx_vssrlrni_b_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} +- res |= __builtin_lsx_vssrlrni_b_h(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrlrni_b_h' must be a constant integer}} +- return res; +-} +- +-v8i16 vssrlrni_h_w(v8i16 _1, v8i16 _2, int var) { +- v8i16 res = __builtin_lsx_vssrlrni_h_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} +- res |= __builtin_lsx_vssrlrni_h_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} +- res |= __builtin_lsx_vssrlrni_h_w(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrlrni_h_w' must be a constant integer}} +- return res; +-} +- +-v4i32 vssrlrni_w_d(v4i32 _1, v4i32 _2, int var) { +- v4i32 res = __builtin_lsx_vssrlrni_w_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} +- res |= __builtin_lsx_vssrlrni_w_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} +- res |= __builtin_lsx_vssrlrni_w_d(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrlrni_w_d' must be a constant integer}} +- return res; +-} +- +-v2i64 vssrlrni_d_q(v2i64 _1, v2i64 _2, int var) { +- v2i64 res = __builtin_lsx_vssrlrni_d_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} +- res |= __builtin_lsx_vssrlrni_d_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} +- res |= __builtin_lsx_vssrlrni_d_q(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrlrni_d_q' must be a constant integer}} +- return res; +-} +- +-v16u8 vssrlrni_bu_h(v16u8 _1, v16i8 _2, int var) { +- v16u8 res = __builtin_lsx_vssrlrni_bu_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} +- res |= __builtin_lsx_vssrlrni_bu_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} +- res |= __builtin_lsx_vssrlrni_bu_h(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrlrni_bu_h' must be a constant integer}} +- return res; +-} +- +-v8u16 vssrlrni_hu_w(v8u16 _1, v8i16 _2, int var) { +- v8u16 res = __builtin_lsx_vssrlrni_hu_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} +- res |= __builtin_lsx_vssrlrni_hu_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} +- res |= __builtin_lsx_vssrlrni_hu_w(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrlrni_hu_w' must be a constant integer}} +- return res; +-} +- +-v4u32 vssrlrni_wu_d(v4u32 _1, v4i32 _2, int var) { +- v4u32 res = __builtin_lsx_vssrlrni_wu_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} +- res |= __builtin_lsx_vssrlrni_wu_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} +- res |= __builtin_lsx_vssrlrni_wu_d(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrlrni_wu_d' must be a constant integer}} +- return res; +-} +- +-v2u64 vssrlrni_du_q(v2u64 _1, v2i64 _2, int var) { +- v2u64 res = __builtin_lsx_vssrlrni_du_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} +- res |= __builtin_lsx_vssrlrni_du_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} +- res |= __builtin_lsx_vssrlrni_du_q(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrlrni_du_q' must be a constant integer}} +- return res; +-} +- +-v16i8 vsrani_b_h(v16i8 _1, v16i8 _2, int var) { +- v16i8 res = __builtin_lsx_vsrani_b_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} +- res |= __builtin_lsx_vsrani_b_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} +- res |= __builtin_lsx_vsrani_b_h(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vsrani_b_h' must be a constant integer}} +- return res; +-} +- +-v8i16 vsrani_h_w(v8i16 _1, v8i16 _2, int var) { +- v8i16 res = __builtin_lsx_vsrani_h_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} +- res |= __builtin_lsx_vsrani_h_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} +- res |= __builtin_lsx_vsrani_h_w(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vsrani_h_w' must be a constant integer}} +- return res; +-} +- +-v4i32 vsrani_w_d(v4i32 _1, v4i32 _2, int var) { +- v4i32 res = __builtin_lsx_vsrani_w_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} +- res |= __builtin_lsx_vsrani_w_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} +- res |= __builtin_lsx_vsrani_w_d(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vsrani_w_d' must be a constant integer}} +- return res; +-} +- +-v2i64 vsrani_d_q(v2i64 _1, v2i64 _2, int var) { +- v2i64 res = __builtin_lsx_vsrani_d_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} +- res |= __builtin_lsx_vsrani_d_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} +- res |= __builtin_lsx_vsrani_d_q(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vsrani_d_q' must be a constant integer}} +- return res; +-} +- +-v16i8 vsrarni_b_h(v16i8 _1, v16i8 _2, int var) { +- v16i8 res = __builtin_lsx_vsrarni_b_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} +- res |= __builtin_lsx_vsrarni_b_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} +- res |= __builtin_lsx_vsrarni_b_h(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vsrarni_b_h' must be a constant integer}} +- return res; +-} +- +-v8i16 vsrarni_h_w(v8i16 _1, v8i16 _2, int var) { +- v8i16 res = __builtin_lsx_vsrarni_h_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} +- res |= __builtin_lsx_vsrarni_h_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} +- res |= __builtin_lsx_vsrarni_h_w(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vsrarni_h_w' must be a constant integer}} +- return res; +-} +- +-v4i32 vsrarni_w_d(v4i32 _1, v4i32 _2, int var) { +- v4i32 res = __builtin_lsx_vsrarni_w_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} +- res |= __builtin_lsx_vsrarni_w_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} +- res |= __builtin_lsx_vsrarni_w_d(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vsrarni_w_d' must be a constant integer}} +- return res; +-} +- +-v2i64 vsrarni_d_q(v2i64 _1, v2i64 _2, int var) { +- v2i64 res = __builtin_lsx_vsrarni_d_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} +- res |= __builtin_lsx_vsrarni_d_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} +- res |= __builtin_lsx_vsrarni_d_q(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vsrarni_d_q' must be a constant integer}} +- return res; +-} +- +-v16i8 vssrani_b_h(v16i8 _1, v16i8 _2, int var) { +- v16i8 res = __builtin_lsx_vssrani_b_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} +- res |= __builtin_lsx_vssrani_b_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} +- res |= __builtin_lsx_vssrani_b_h(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrani_b_h' must be a constant integer}} +- return res; +-} +- +-v8i16 vssrani_h_w(v8i16 _1, v8i16 _2, int var) { +- v8i16 res = __builtin_lsx_vssrani_h_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} +- res |= __builtin_lsx_vssrani_h_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} +- res |= __builtin_lsx_vssrani_h_w(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrani_h_w' must be a constant integer}} +- return res; +-} +- +-v4i32 vssrani_w_d(v4i32 _1, v4i32 _2, int var) { +- v4i32 res = __builtin_lsx_vssrani_w_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} +- res |= __builtin_lsx_vssrani_w_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} +- res |= __builtin_lsx_vssrani_w_d(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrani_w_d' must be a constant integer}} +- return res; +-} +- +-v2i64 vssrani_d_q(v2i64 _1, v2i64 _2, int var) { +- v2i64 res = __builtin_lsx_vssrani_d_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} +- res |= __builtin_lsx_vssrani_d_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} +- res |= __builtin_lsx_vssrani_d_q(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrani_d_q' must be a constant integer}} +- return res; +-} +- +-v16u8 vssrani_bu_h(v16u8 _1, v16i8 _2, int var) { +- v16u8 res = __builtin_lsx_vssrani_bu_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} +- res |= __builtin_lsx_vssrani_bu_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} +- res |= __builtin_lsx_vssrani_bu_h(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrani_bu_h' must be a constant integer}} +- return res; +-} +- +-v8u16 vssrani_hu_w(v8u16 _1, v8i16 _2, int var) { +- v8u16 res = __builtin_lsx_vssrani_hu_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} +- res |= __builtin_lsx_vssrani_hu_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} +- res |= __builtin_lsx_vssrani_hu_w(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrani_hu_w' must be a constant integer}} +- return res; +-} +- +-v4u32 vssrani_wu_d(v4u32 _1, v4i32 _2, int var) { +- v4u32 res = __builtin_lsx_vssrani_wu_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} +- res |= __builtin_lsx_vssrani_wu_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} +- res |= __builtin_lsx_vssrani_wu_d(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrani_wu_d' must be a constant integer}} +- return res; +-} +- +-v2u64 vssrani_du_q(v2u64 _1, v2i64 _2, int var) { +- v2u64 res = __builtin_lsx_vssrani_du_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} +- res |= __builtin_lsx_vssrani_du_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} +- res |= __builtin_lsx_vssrani_du_q(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrani_du_q' must be a constant integer}} +- return res; +-} +- +-v16i8 vssrarni_b_h(v16i8 _1, v16i8 _2, int var) { +- v16i8 res = __builtin_lsx_vssrarni_b_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} +- res |= __builtin_lsx_vssrarni_b_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} +- res |= __builtin_lsx_vssrarni_b_h(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrarni_b_h' must be a constant integer}} +- return res; +-} +- +-v8i16 vssrarni_h_w(v8i16 _1, v8i16 _2, int var) { +- v8i16 res = __builtin_lsx_vssrarni_h_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} +- res |= __builtin_lsx_vssrarni_h_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} +- res |= __builtin_lsx_vssrarni_h_w(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrarni_h_w' must be a constant integer}} +- return res; +-} +- +-v4i32 vssrarni_w_d(v4i32 _1, v4i32 _2, int var) { +- v4i32 res = __builtin_lsx_vssrarni_w_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} +- res |= __builtin_lsx_vssrarni_w_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} +- res |= __builtin_lsx_vssrarni_w_d(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrarni_w_d' must be a constant integer}} +- return res; +-} +- +-v2i64 vssrarni_d_q(v2i64 _1, v2i64 _2, int var) { +- v2i64 res = __builtin_lsx_vssrarni_d_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} +- res |= __builtin_lsx_vssrarni_d_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} +- res |= __builtin_lsx_vssrarni_d_q(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrarni_d_q' must be a constant integer}} +- return res; +-} +- +-v16u8 vssrarni_bu_h(v16u8 _1, v16i8 _2, int var) { +- v16u8 res = __builtin_lsx_vssrarni_bu_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} +- res |= __builtin_lsx_vssrarni_bu_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} +- res |= __builtin_lsx_vssrarni_bu_h(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrarni_bu_h' must be a constant integer}} +- return res; +-} +- +-v8u16 vssrarni_hu_w(v8u16 _1, v8i16 _2, int var) { +- v8u16 res = __builtin_lsx_vssrarni_hu_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} +- res |= __builtin_lsx_vssrarni_hu_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} +- res |= __builtin_lsx_vssrarni_hu_w(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrarni_hu_w' must be a constant integer}} +- return res; +-} +- +-v4u32 vssrarni_wu_d(v4u32 _1, v4i32 _2, int var) { +- v4u32 res = __builtin_lsx_vssrarni_wu_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} +- res |= __builtin_lsx_vssrarni_wu_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} +- res |= __builtin_lsx_vssrarni_wu_d(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrarni_wu_d' must be a constant integer}} +- return res; +-} +- +-v2u64 vssrarni_du_q(v2u64 _1, v2i64 _2, int var) { +- v2u64 res = __builtin_lsx_vssrarni_du_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} +- res |= __builtin_lsx_vssrarni_du_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} +- res |= __builtin_lsx_vssrarni_du_q(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrarni_du_q' must be a constant integer}} +- return res; +-} +- +-v4i32 vpermi_w(v4i32 _1, v4i32 _2, int var) { +- v4i32 res = __builtin_lsx_vpermi_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} +- res |= __builtin_lsx_vpermi_w(_1, _2, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} +- res |= __builtin_lsx_vpermi_w(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vpermi_w' must be a constant integer}} +- return res; +-} +- +-v16i8 vld(void *_1, int var) { +- v16i8 res = __builtin_lsx_vld(_1, -2049); // expected-error {{argument value -2049 is outside the valid range [-2048, 2047]}} +- res |= __builtin_lsx_vld(_1, 2048); // expected-error {{argument value 2048 is outside the valid range [-2048, 2047]}} +- res |= __builtin_lsx_vld(_1, var); // expected-error {{argument to '__builtin_lsx_vld' must be a constant integer}} +- return res; +-} +- +-void vst(v16i8 _1, void *_2, int var) { +- __builtin_lsx_vst(_1, _2, -2049); // expected-error {{argument value -2049 is outside the valid range [-2048, 2047]}} +- __builtin_lsx_vst(_1, _2, 2048); // expected-error {{argument value 2048 is outside the valid range [-2048, 2047]}} +- __builtin_lsx_vst(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vst' must be a constant integer}} +-} +- +-v2i64 vldi(int var) { +- v2i64 res = __builtin_lsx_vldi(-4097); // expected-error {{argument value -4097 is outside the valid range [-4096, 4095]}} +- res |= __builtin_lsx_vldi(4096); // expected-error {{argument value 4096 is outside the valid range [-4096, 4095]}} +- res |= __builtin_lsx_vldi(var); // expected-error {{argument to '__builtin_lsx_vldi' must be a constant integer}} +- return res; +-} +- +-v16i8 vrepli_b(int var) { +- v16i8 res = __builtin_lsx_vrepli_b(-513); // expected-error {{argument value -513 is outside the valid range [-512, 511]}} +- res |= __builtin_lsx_vrepli_b(512); // expected-error {{argument value 512 is outside the valid range [-512, 511]}} +- res |= __builtin_lsx_vrepli_b(var); // expected-error {{argument to '__builtin_lsx_vrepli_b' must be a constant integer}} +- return res; +-} +- +-v2i64 vrepli_d(int var) { +- v2i64 res = __builtin_lsx_vrepli_d(-513); // expected-error {{argument value -513 is outside the valid range [-512, 511]}} +- res |= __builtin_lsx_vrepli_d(512); // expected-error {{argument value 512 is outside the valid range [-512, 511]}} +- res |= __builtin_lsx_vrepli_d(var); // expected-error {{argument to '__builtin_lsx_vrepli_d' must be a constant integer}} +- return res; +-} +- +-v8i16 vrepli_h(int var) { +- v8i16 res = __builtin_lsx_vrepli_h(-513); // expected-error {{argument value -513 is outside the valid range [-512, 511]}} +- res |= __builtin_lsx_vrepli_h(512); // expected-error {{argument value 512 is outside the valid range [-512, 511]}} +- res |= __builtin_lsx_vrepli_h(var); // expected-error {{argument to '__builtin_lsx_vrepli_h' must be a constant integer}} +- return res; +-} +- +-v4i32 vrepli_w(int var) { +- v4i32 res = __builtin_lsx_vrepli_w(-513); // expected-error {{argument value -513 is outside the valid range [-512, 511]}} +- res |= __builtin_lsx_vrepli_w(512); // expected-error {{argument value 512 is outside the valid range [-512, 511]}} +- res |= __builtin_lsx_vrepli_w(var); // expected-error {{argument to '__builtin_lsx_vrepli_w' must be a constant integer}} +- return res; +-} +diff --git a/clang/test/CodeGen/LoongArch/lsx/builtin.c b/clang/test/CodeGen/LoongArch/lsx/builtin.c +deleted file mode 100644 +index 05a3d13a7..000000000 +--- a/clang/test/CodeGen/LoongArch/lsx/builtin.c ++++ /dev/null +@@ -1,7101 +0,0 @@ +-// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py +-// RUN: %clang_cc1 -triple loongarch64 -target-feature +lsx -O2 -emit-llvm %s -o - | FileCheck %s +- +-typedef signed char v16i8 __attribute__ ((vector_size(16), aligned(16))); +-typedef signed char v16i8_b __attribute__ ((vector_size(16), aligned(1))); +-typedef unsigned char v16u8 __attribute__ ((vector_size(16), aligned(16))); +-typedef unsigned char v16u8_b __attribute__ ((vector_size(16), aligned(1))); +-typedef short v8i16 __attribute__ ((vector_size(16), aligned(16))); +-typedef short v8i16_h __attribute__ ((vector_size(16), aligned(2))); +-typedef unsigned short v8u16 __attribute__ ((vector_size(16), aligned(16))); +-typedef unsigned short v8u16_h __attribute__ ((vector_size(16), aligned(2))); +-typedef int v4i32 __attribute__ ((vector_size(16), aligned(16))); +-typedef int v4i32_w __attribute__ ((vector_size(16), aligned(4))); +-typedef unsigned int v4u32 __attribute__ ((vector_size(16), aligned(16))); +-typedef unsigned int v4u32_w __attribute__ ((vector_size(16), aligned(4))); +-typedef long long v2i64 __attribute__ ((vector_size(16), aligned(16))); +-typedef long long v2i64_d __attribute__ ((vector_size(16), aligned(8))); +-typedef unsigned long long v2u64 __attribute__ ((vector_size(16), aligned(16))); +-typedef unsigned long long v2u64_d __attribute__ ((vector_size(16), aligned(8))); +-typedef float v4f32 __attribute__ ((vector_size(16), aligned(16))); +-typedef float v4f32_w __attribute__ ((vector_size(16), aligned(4))); +-typedef double v2f64 __attribute__ ((vector_size(16), aligned(16))); +-typedef double v2f64_d __attribute__ ((vector_size(16), aligned(8))); +- +-typedef long long __m128i __attribute__ ((__vector_size__ (16), __may_alias__)); +-typedef float __m128 __attribute__ ((__vector_size__ (16), __may_alias__)); +-typedef double __m128d __attribute__ ((__vector_size__ (16), __may_alias__)); +- +- +-// CHECK-LABEL: @vsll_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsll.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v16i8 vsll_b(v16i8 _1, v16i8 _2) { return __builtin_lsx_vsll_b(_1, _2); } +-// CHECK-LABEL: @vsll_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsll.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v8i16 vsll_h(v8i16 _1, v8i16 _2) { return __builtin_lsx_vsll_h(_1, _2); } +-// CHECK-LABEL: @vsll_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsll.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v4i32 vsll_w(v4i32 _1, v4i32 _2) { return __builtin_lsx_vsll_w(_1, _2); } +-// CHECK-LABEL: @vsll_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsll.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v2i64 vsll_d(v2i64 _1, v2i64 _2) { return __builtin_lsx_vsll_d(_1, _2); } +-// CHECK-LABEL: @vslli_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vslli.b(<16 x i8> [[TMP0]], i32 1) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v16i8 vslli_b(v16i8 _1) { return __builtin_lsx_vslli_b(_1, 1); } +-// CHECK-LABEL: @vslli_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vslli.h(<8 x i16> [[TMP0]], i32 1) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v8i16 vslli_h(v8i16 _1) { return __builtin_lsx_vslli_h(_1, 1); } +-// CHECK-LABEL: @vslli_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vslli.w(<4 x i32> [[TMP0]], i32 1) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v4i32 vslli_w(v4i32 _1) { return __builtin_lsx_vslli_w(_1, 1); } +-// CHECK-LABEL: @vslli_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vslli.d(<2 x i64> [[TMP0]], i32 1) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v2i64 vslli_d(v2i64 _1) { return __builtin_lsx_vslli_d(_1, 1); } +-// CHECK-LABEL: @vsra_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsra.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v16i8 vsra_b(v16i8 _1, v16i8 _2) { return __builtin_lsx_vsra_b(_1, _2); } +-// CHECK-LABEL: @vsra_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsra.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v8i16 vsra_h(v8i16 _1, v8i16 _2) { return __builtin_lsx_vsra_h(_1, _2); } +-// CHECK-LABEL: @vsra_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsra.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v4i32 vsra_w(v4i32 _1, v4i32 _2) { return __builtin_lsx_vsra_w(_1, _2); } +-// CHECK-LABEL: @vsra_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsra.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v2i64 vsra_d(v2i64 _1, v2i64 _2) { return __builtin_lsx_vsra_d(_1, _2); } +-// CHECK-LABEL: @vsrai_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrai.b(<16 x i8> [[TMP0]], i32 1) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v16i8 vsrai_b(v16i8 _1) { return __builtin_lsx_vsrai_b(_1, 1); } +-// CHECK-LABEL: @vsrai_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrai.h(<8 x i16> [[TMP0]], i32 1) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v8i16 vsrai_h(v8i16 _1) { return __builtin_lsx_vsrai_h(_1, 1); } +-// CHECK-LABEL: @vsrai_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrai.w(<4 x i32> [[TMP0]], i32 1) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v4i32 vsrai_w(v4i32 _1) { return __builtin_lsx_vsrai_w(_1, 1); } +-// CHECK-LABEL: @vsrai_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrai.d(<2 x i64> [[TMP0]], i32 1) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v2i64 vsrai_d(v2i64 _1) { return __builtin_lsx_vsrai_d(_1, 1); } +-// CHECK-LABEL: @vsrar_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrar.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v16i8 vsrar_b(v16i8 _1, v16i8 _2) { +- return __builtin_lsx_vsrar_b(_1, _2); +-} +-// CHECK-LABEL: @vsrar_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrar.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v8i16 vsrar_h(v8i16 _1, v8i16 _2) { +- return __builtin_lsx_vsrar_h(_1, _2); +-} +-// CHECK-LABEL: @vsrar_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrar.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v4i32 vsrar_w(v4i32 _1, v4i32 _2) { +- return __builtin_lsx_vsrar_w(_1, _2); +-} +-// CHECK-LABEL: @vsrar_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrar.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v2i64 vsrar_d(v2i64 _1, v2i64 _2) { +- return __builtin_lsx_vsrar_d(_1, _2); +-} +-// CHECK-LABEL: @vsrari_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrari.b(<16 x i8> [[TMP0]], i32 1) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v16i8 vsrari_b(v16i8 _1) { return __builtin_lsx_vsrari_b(_1, 1); } +-// CHECK-LABEL: @vsrari_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrari.h(<8 x i16> [[TMP0]], i32 1) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v8i16 vsrari_h(v8i16 _1) { return __builtin_lsx_vsrari_h(_1, 1); } +-// CHECK-LABEL: @vsrari_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrari.w(<4 x i32> [[TMP0]], i32 1) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v4i32 vsrari_w(v4i32 _1) { return __builtin_lsx_vsrari_w(_1, 1); } +-// CHECK-LABEL: @vsrari_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrari.d(<2 x i64> [[TMP0]], i32 1) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v2i64 vsrari_d(v2i64 _1) { return __builtin_lsx_vsrari_d(_1, 1); } +-// CHECK-LABEL: @vsrl_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrl.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v16i8 vsrl_b(v16i8 _1, v16i8 _2) { return __builtin_lsx_vsrl_b(_1, _2); } +-// CHECK-LABEL: @vsrl_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrl.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v8i16 vsrl_h(v8i16 _1, v8i16 _2) { return __builtin_lsx_vsrl_h(_1, _2); } +-// CHECK-LABEL: @vsrl_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrl.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v4i32 vsrl_w(v4i32 _1, v4i32 _2) { return __builtin_lsx_vsrl_w(_1, _2); } +-// CHECK-LABEL: @vsrl_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrl.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v2i64 vsrl_d(v2i64 _1, v2i64 _2) { return __builtin_lsx_vsrl_d(_1, _2); } +-// CHECK-LABEL: @vsrli_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrli.b(<16 x i8> [[TMP0]], i32 1) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v16i8 vsrli_b(v16i8 _1) { return __builtin_lsx_vsrli_b(_1, 1); } +-// CHECK-LABEL: @vsrli_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrli.h(<8 x i16> [[TMP0]], i32 1) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v8i16 vsrli_h(v8i16 _1) { return __builtin_lsx_vsrli_h(_1, 1); } +-// CHECK-LABEL: @vsrli_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrli.w(<4 x i32> [[TMP0]], i32 1) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v4i32 vsrli_w(v4i32 _1) { return __builtin_lsx_vsrli_w(_1, 1); } +-// CHECK-LABEL: @vsrli_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrli.d(<2 x i64> [[TMP0]], i32 1) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v2i64 vsrli_d(v2i64 _1) { return __builtin_lsx_vsrli_d(_1, 1); } +-// CHECK-LABEL: @vsrlr_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrlr.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v16i8 vsrlr_b(v16i8 _1, v16i8 _2) { +- return __builtin_lsx_vsrlr_b(_1, _2); +-} +-// CHECK-LABEL: @vsrlr_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrlr.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v8i16 vsrlr_h(v8i16 _1, v8i16 _2) { +- return __builtin_lsx_vsrlr_h(_1, _2); +-} +-// CHECK-LABEL: @vsrlr_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrlr.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v4i32 vsrlr_w(v4i32 _1, v4i32 _2) { +- return __builtin_lsx_vsrlr_w(_1, _2); +-} +-// CHECK-LABEL: @vsrlr_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrlr.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v2i64 vsrlr_d(v2i64 _1, v2i64 _2) { +- return __builtin_lsx_vsrlr_d(_1, _2); +-} +-// CHECK-LABEL: @vsrlri_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrlri.b(<16 x i8> [[TMP0]], i32 1) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v16i8 vsrlri_b(v16i8 _1) { return __builtin_lsx_vsrlri_b(_1, 1); } +-// CHECK-LABEL: @vsrlri_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrlri.h(<8 x i16> [[TMP0]], i32 1) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v8i16 vsrlri_h(v8i16 _1) { return __builtin_lsx_vsrlri_h(_1, 1); } +-// CHECK-LABEL: @vsrlri_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrlri.w(<4 x i32> [[TMP0]], i32 1) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v4i32 vsrlri_w(v4i32 _1) { return __builtin_lsx_vsrlri_w(_1, 1); } +-// CHECK-LABEL: @vsrlri_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrlri.d(<2 x i64> [[TMP0]], i32 1) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v2i64 vsrlri_d(v2i64 _1) { return __builtin_lsx_vsrlri_d(_1, 1); } +-// CHECK-LABEL: @vbitclr_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vbitclr.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v16u8 vbitclr_b(v16u8 _1, v16u8 _2) { +- return __builtin_lsx_vbitclr_b(_1, _2); +-} +-// CHECK-LABEL: @vbitclr_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vbitclr.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v8u16 vbitclr_h(v8u16 _1, v8u16 _2) { +- return __builtin_lsx_vbitclr_h(_1, _2); +-} +-// CHECK-LABEL: @vbitclr_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vbitclr.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v4u32 vbitclr_w(v4u32 _1, v4u32 _2) { +- return __builtin_lsx_vbitclr_w(_1, _2); +-} +-// CHECK-LABEL: @vbitclr_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vbitclr.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v2u64 vbitclr_d(v2u64 _1, v2u64 _2) { +- return __builtin_lsx_vbitclr_d(_1, _2); +-} +-// CHECK-LABEL: @vbitclri_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vbitclri.b(<16 x i8> [[TMP0]], i32 1) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v16u8 vbitclri_b(v16u8 _1) { return __builtin_lsx_vbitclri_b(_1, 1); } +-// CHECK-LABEL: @vbitclri_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vbitclri.h(<8 x i16> [[TMP0]], i32 1) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v8u16 vbitclri_h(v8u16 _1) { return __builtin_lsx_vbitclri_h(_1, 1); } +-// CHECK-LABEL: @vbitclri_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vbitclri.w(<4 x i32> [[TMP0]], i32 1) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v4u32 vbitclri_w(v4u32 _1) { return __builtin_lsx_vbitclri_w(_1, 1); } +-// CHECK-LABEL: @vbitclri_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vbitclri.d(<2 x i64> [[TMP0]], i32 1) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v2u64 vbitclri_d(v2u64 _1) { return __builtin_lsx_vbitclri_d(_1, 1); } +-// CHECK-LABEL: @vbitset_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vbitset.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v16u8 vbitset_b(v16u8 _1, v16u8 _2) { +- return __builtin_lsx_vbitset_b(_1, _2); +-} +-// CHECK-LABEL: @vbitset_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vbitset.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v8u16 vbitset_h(v8u16 _1, v8u16 _2) { +- return __builtin_lsx_vbitset_h(_1, _2); +-} +-// CHECK-LABEL: @vbitset_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vbitset.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v4u32 vbitset_w(v4u32 _1, v4u32 _2) { +- return __builtin_lsx_vbitset_w(_1, _2); +-} +-// CHECK-LABEL: @vbitset_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vbitset.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v2u64 vbitset_d(v2u64 _1, v2u64 _2) { +- return __builtin_lsx_vbitset_d(_1, _2); +-} +-// CHECK-LABEL: @vbitseti_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vbitseti.b(<16 x i8> [[TMP0]], i32 1) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v16u8 vbitseti_b(v16u8 _1) { return __builtin_lsx_vbitseti_b(_1, 1); } +-// CHECK-LABEL: @vbitseti_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vbitseti.h(<8 x i16> [[TMP0]], i32 1) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v8u16 vbitseti_h(v8u16 _1) { return __builtin_lsx_vbitseti_h(_1, 1); } +-// CHECK-LABEL: @vbitseti_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vbitseti.w(<4 x i32> [[TMP0]], i32 1) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v4u32 vbitseti_w(v4u32 _1) { return __builtin_lsx_vbitseti_w(_1, 1); } +-// CHECK-LABEL: @vbitseti_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vbitseti.d(<2 x i64> [[TMP0]], i32 1) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v2u64 vbitseti_d(v2u64 _1) { return __builtin_lsx_vbitseti_d(_1, 1); } +-// CHECK-LABEL: @vbitrev_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vbitrev.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v16u8 vbitrev_b(v16u8 _1, v16u8 _2) { +- return __builtin_lsx_vbitrev_b(_1, _2); +-} +-// CHECK-LABEL: @vbitrev_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vbitrev.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v8u16 vbitrev_h(v8u16 _1, v8u16 _2) { +- return __builtin_lsx_vbitrev_h(_1, _2); +-} +-// CHECK-LABEL: @vbitrev_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vbitrev.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v4u32 vbitrev_w(v4u32 _1, v4u32 _2) { +- return __builtin_lsx_vbitrev_w(_1, _2); +-} +-// CHECK-LABEL: @vbitrev_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vbitrev.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v2u64 vbitrev_d(v2u64 _1, v2u64 _2) { +- return __builtin_lsx_vbitrev_d(_1, _2); +-} +-// CHECK-LABEL: @vbitrevi_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vbitrevi.b(<16 x i8> [[TMP0]], i32 1) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v16u8 vbitrevi_b(v16u8 _1) { return __builtin_lsx_vbitrevi_b(_1, 1); } +-// CHECK-LABEL: @vbitrevi_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vbitrevi.h(<8 x i16> [[TMP0]], i32 1) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v8u16 vbitrevi_h(v8u16 _1) { return __builtin_lsx_vbitrevi_h(_1, 1); } +-// CHECK-LABEL: @vbitrevi_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vbitrevi.w(<4 x i32> [[TMP0]], i32 1) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v4u32 vbitrevi_w(v4u32 _1) { return __builtin_lsx_vbitrevi_w(_1, 1); } +-// CHECK-LABEL: @vbitrevi_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vbitrevi.d(<2 x i64> [[TMP0]], i32 1) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v2u64 vbitrevi_d(v2u64 _1) { return __builtin_lsx_vbitrevi_d(_1, 1); } +-// CHECK-LABEL: @vadd_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vadd.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v16i8 vadd_b(v16i8 _1, v16i8 _2) { return __builtin_lsx_vadd_b(_1, _2); } +-// CHECK-LABEL: @vadd_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vadd.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v8i16 vadd_h(v8i16 _1, v8i16 _2) { return __builtin_lsx_vadd_h(_1, _2); } +-// CHECK-LABEL: @vadd_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vadd.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v4i32 vadd_w(v4i32 _1, v4i32 _2) { return __builtin_lsx_vadd_w(_1, _2); } +-// CHECK-LABEL: @vadd_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vadd.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v2i64 vadd_d(v2i64 _1, v2i64 _2) { return __builtin_lsx_vadd_d(_1, _2); } +-// CHECK-LABEL: @vaddi_bu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vaddi.bu(<16 x i8> [[TMP0]], i32 1) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v16i8 vaddi_bu(v16i8 _1) { return __builtin_lsx_vaddi_bu(_1, 1); } +-// CHECK-LABEL: @vaddi_hu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vaddi.hu(<8 x i16> [[TMP0]], i32 1) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v8i16 vaddi_hu(v8i16 _1) { return __builtin_lsx_vaddi_hu(_1, 1); } +-// CHECK-LABEL: @vaddi_wu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vaddi.wu(<4 x i32> [[TMP0]], i32 1) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v4i32 vaddi_wu(v4i32 _1) { return __builtin_lsx_vaddi_wu(_1, 1); } +-// CHECK-LABEL: @vaddi_du( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddi.du(<2 x i64> [[TMP0]], i32 1) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v2i64 vaddi_du(v2i64 _1) { return __builtin_lsx_vaddi_du(_1, 1); } +-// CHECK-LABEL: @vsub_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsub.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v16i8 vsub_b(v16i8 _1, v16i8 _2) { return __builtin_lsx_vsub_b(_1, _2); } +-// CHECK-LABEL: @vsub_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsub.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v8i16 vsub_h(v8i16 _1, v8i16 _2) { return __builtin_lsx_vsub_h(_1, _2); } +-// CHECK-LABEL: @vsub_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsub.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v4i32 vsub_w(v4i32 _1, v4i32 _2) { return __builtin_lsx_vsub_w(_1, _2); } +-// CHECK-LABEL: @vsub_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsub.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v2i64 vsub_d(v2i64 _1, v2i64 _2) { return __builtin_lsx_vsub_d(_1, _2); } +-// CHECK-LABEL: @vsubi_bu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsubi.bu(<16 x i8> [[TMP0]], i32 1) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v16i8 vsubi_bu(v16i8 _1) { return __builtin_lsx_vsubi_bu(_1, 1); } +-// CHECK-LABEL: @vsubi_hu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsubi.hu(<8 x i16> [[TMP0]], i32 1) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v8i16 vsubi_hu(v8i16 _1) { return __builtin_lsx_vsubi_hu(_1, 1); } +-// CHECK-LABEL: @vsubi_wu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsubi.wu(<4 x i32> [[TMP0]], i32 1) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v4i32 vsubi_wu(v4i32 _1) { return __builtin_lsx_vsubi_wu(_1, 1); } +-// CHECK-LABEL: @vsubi_du( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsubi.du(<2 x i64> [[TMP0]], i32 1) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v2i64 vsubi_du(v2i64 _1) { return __builtin_lsx_vsubi_du(_1, 1); } +-// CHECK-LABEL: @vmax_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmax.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v16i8 vmax_b(v16i8 _1, v16i8 _2) { return __builtin_lsx_vmax_b(_1, _2); } +-// CHECK-LABEL: @vmax_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmax.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v8i16 vmax_h(v8i16 _1, v8i16 _2) { return __builtin_lsx_vmax_h(_1, _2); } +-// CHECK-LABEL: @vmax_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmax.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v4i32 vmax_w(v4i32 _1, v4i32 _2) { return __builtin_lsx_vmax_w(_1, _2); } +-// CHECK-LABEL: @vmax_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmax.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v2i64 vmax_d(v2i64 _1, v2i64 _2) { return __builtin_lsx_vmax_d(_1, _2); } +-// CHECK-LABEL: @vmaxi_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmaxi.b(<16 x i8> [[TMP0]], i32 1) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v16i8 vmaxi_b(v16i8 _1) { return __builtin_lsx_vmaxi_b(_1, 1); } +-// CHECK-LABEL: @vmaxi_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmaxi.h(<8 x i16> [[TMP0]], i32 1) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v8i16 vmaxi_h(v8i16 _1) { return __builtin_lsx_vmaxi_h(_1, 1); } +-// CHECK-LABEL: @vmaxi_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmaxi.w(<4 x i32> [[TMP0]], i32 1) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v4i32 vmaxi_w(v4i32 _1) { return __builtin_lsx_vmaxi_w(_1, 1); } +-// CHECK-LABEL: @vmaxi_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaxi.d(<2 x i64> [[TMP0]], i32 1) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v2i64 vmaxi_d(v2i64 _1) { return __builtin_lsx_vmaxi_d(_1, 1); } +-// CHECK-LABEL: @vmax_bu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmax.bu(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v16u8 vmax_bu(v16u8 _1, v16u8 _2) { +- return __builtin_lsx_vmax_bu(_1, _2); +-} +-// CHECK-LABEL: @vmax_hu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmax.hu(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v8u16 vmax_hu(v8u16 _1, v8u16 _2) { +- return __builtin_lsx_vmax_hu(_1, _2); +-} +-// CHECK-LABEL: @vmax_wu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmax.wu(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v4u32 vmax_wu(v4u32 _1, v4u32 _2) { +- return __builtin_lsx_vmax_wu(_1, _2); +-} +-// CHECK-LABEL: @vmax_du( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmax.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v2u64 vmax_du(v2u64 _1, v2u64 _2) { +- return __builtin_lsx_vmax_du(_1, _2); +-} +-// CHECK-LABEL: @vmaxi_bu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmaxi.bu(<16 x i8> [[TMP0]], i32 1) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v16u8 vmaxi_bu(v16u8 _1) { return __builtin_lsx_vmaxi_bu(_1, 1); } +-// CHECK-LABEL: @vmaxi_hu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmaxi.hu(<8 x i16> [[TMP0]], i32 1) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v8u16 vmaxi_hu(v8u16 _1) { return __builtin_lsx_vmaxi_hu(_1, 1); } +-// CHECK-LABEL: @vmaxi_wu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmaxi.wu(<4 x i32> [[TMP0]], i32 1) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v4u32 vmaxi_wu(v4u32 _1) { return __builtin_lsx_vmaxi_wu(_1, 1); } +-// CHECK-LABEL: @vmaxi_du( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaxi.du(<2 x i64> [[TMP0]], i32 1) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v2u64 vmaxi_du(v2u64 _1) { return __builtin_lsx_vmaxi_du(_1, 1); } +-// CHECK-LABEL: @vmin_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmin.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v16i8 vmin_b(v16i8 _1, v16i8 _2) { return __builtin_lsx_vmin_b(_1, _2); } +-// CHECK-LABEL: @vmin_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmin.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v8i16 vmin_h(v8i16 _1, v8i16 _2) { return __builtin_lsx_vmin_h(_1, _2); } +-// CHECK-LABEL: @vmin_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmin.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v4i32 vmin_w(v4i32 _1, v4i32 _2) { return __builtin_lsx_vmin_w(_1, _2); } +-// CHECK-LABEL: @vmin_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmin.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v2i64 vmin_d(v2i64 _1, v2i64 _2) { return __builtin_lsx_vmin_d(_1, _2); } +-// CHECK-LABEL: @vmini_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmini.b(<16 x i8> [[TMP0]], i32 1) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v16i8 vmini_b(v16i8 _1) { return __builtin_lsx_vmini_b(_1, 1); } +-// CHECK-LABEL: @vmini_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmini.h(<8 x i16> [[TMP0]], i32 1) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v8i16 vmini_h(v8i16 _1) { return __builtin_lsx_vmini_h(_1, 1); } +-// CHECK-LABEL: @vmini_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmini.w(<4 x i32> [[TMP0]], i32 1) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v4i32 vmini_w(v4i32 _1) { return __builtin_lsx_vmini_w(_1, 1); } +-// CHECK-LABEL: @vmini_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmini.d(<2 x i64> [[TMP0]], i32 1) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v2i64 vmini_d(v2i64 _1) { return __builtin_lsx_vmini_d(_1, 1); } +-// CHECK-LABEL: @vmin_bu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmin.bu(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v16u8 vmin_bu(v16u8 _1, v16u8 _2) { +- return __builtin_lsx_vmin_bu(_1, _2); +-} +-// CHECK-LABEL: @vmin_hu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmin.hu(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v8u16 vmin_hu(v8u16 _1, v8u16 _2) { +- return __builtin_lsx_vmin_hu(_1, _2); +-} +-// CHECK-LABEL: @vmin_wu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmin.wu(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v4u32 vmin_wu(v4u32 _1, v4u32 _2) { +- return __builtin_lsx_vmin_wu(_1, _2); +-} +-// CHECK-LABEL: @vmin_du( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmin.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v2u64 vmin_du(v2u64 _1, v2u64 _2) { +- return __builtin_lsx_vmin_du(_1, _2); +-} +-// CHECK-LABEL: @vmini_bu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmini.bu(<16 x i8> [[TMP0]], i32 1) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v16u8 vmini_bu(v16u8 _1) { return __builtin_lsx_vmini_bu(_1, 1); } +-// CHECK-LABEL: @vmini_hu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmini.hu(<8 x i16> [[TMP0]], i32 1) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v8u16 vmini_hu(v8u16 _1) { return __builtin_lsx_vmini_hu(_1, 1); } +-// CHECK-LABEL: @vmini_wu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmini.wu(<4 x i32> [[TMP0]], i32 1) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v4u32 vmini_wu(v4u32 _1) { return __builtin_lsx_vmini_wu(_1, 1); } +-// CHECK-LABEL: @vmini_du( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmini.du(<2 x i64> [[TMP0]], i32 1) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v2u64 vmini_du(v2u64 _1) { return __builtin_lsx_vmini_du(_1, 1); } +-// CHECK-LABEL: @vseq_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vseq.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v16i8 vseq_b(v16i8 _1, v16i8 _2) { return __builtin_lsx_vseq_b(_1, _2); } +-// CHECK-LABEL: @vseq_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vseq.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v8i16 vseq_h(v8i16 _1, v8i16 _2) { return __builtin_lsx_vseq_h(_1, _2); } +-// CHECK-LABEL: @vseq_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vseq.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v4i32 vseq_w(v4i32 _1, v4i32 _2) { return __builtin_lsx_vseq_w(_1, _2); } +-// CHECK-LABEL: @vseq_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vseq.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v2i64 vseq_d(v2i64 _1, v2i64 _2) { return __builtin_lsx_vseq_d(_1, _2); } +-// CHECK-LABEL: @vseqi_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vseqi.b(<16 x i8> [[TMP0]], i32 1) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v16i8 vseqi_b(v16i8 _1) { return __builtin_lsx_vseqi_b(_1, 1); } +-// CHECK-LABEL: @vseqi_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vseqi.h(<8 x i16> [[TMP0]], i32 1) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v8i16 vseqi_h(v8i16 _1) { return __builtin_lsx_vseqi_h(_1, 1); } +-// CHECK-LABEL: @vseqi_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vseqi.w(<4 x i32> [[TMP0]], i32 1) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v4i32 vseqi_w(v4i32 _1) { return __builtin_lsx_vseqi_w(_1, 1); } +-// CHECK-LABEL: @vseqi_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vseqi.d(<2 x i64> [[TMP0]], i32 1) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v2i64 vseqi_d(v2i64 _1) { return __builtin_lsx_vseqi_d(_1, 1); } +-// CHECK-LABEL: @vslti_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vslti.b(<16 x i8> [[TMP0]], i32 1) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v16i8 vslti_b(v16i8 _1) { return __builtin_lsx_vslti_b(_1, 1); } +-// CHECK-LABEL: @vslt_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vslt.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v16i8 vslt_b(v16i8 _1, v16i8 _2) { return __builtin_lsx_vslt_b(_1, _2); } +-// CHECK-LABEL: @vslt_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vslt.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v8i16 vslt_h(v8i16 _1, v8i16 _2) { return __builtin_lsx_vslt_h(_1, _2); } +-// CHECK-LABEL: @vslt_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vslt.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v4i32 vslt_w(v4i32 _1, v4i32 _2) { return __builtin_lsx_vslt_w(_1, _2); } +-// CHECK-LABEL: @vslt_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vslt.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v2i64 vslt_d(v2i64 _1, v2i64 _2) { return __builtin_lsx_vslt_d(_1, _2); } +-// CHECK-LABEL: @vslti_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vslti.h(<8 x i16> [[TMP0]], i32 1) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v8i16 vslti_h(v8i16 _1) { return __builtin_lsx_vslti_h(_1, 1); } +-// CHECK-LABEL: @vslti_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vslti.w(<4 x i32> [[TMP0]], i32 1) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v4i32 vslti_w(v4i32 _1) { return __builtin_lsx_vslti_w(_1, 1); } +-// CHECK-LABEL: @vslti_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vslti.d(<2 x i64> [[TMP0]], i32 1) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v2i64 vslti_d(v2i64 _1) { return __builtin_lsx_vslti_d(_1, 1); } +-// CHECK-LABEL: @vslt_bu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vslt.bu(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v16i8 vslt_bu(v16u8 _1, v16u8 _2) { +- return __builtin_lsx_vslt_bu(_1, _2); +-} +-// CHECK-LABEL: @vslt_hu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vslt.hu(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v8i16 vslt_hu(v8u16 _1, v8u16 _2) { +- return __builtin_lsx_vslt_hu(_1, _2); +-} +-// CHECK-LABEL: @vslt_wu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vslt.wu(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v4i32 vslt_wu(v4u32 _1, v4u32 _2) { +- return __builtin_lsx_vslt_wu(_1, _2); +-} +-// CHECK-LABEL: @vslt_du( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vslt.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v2i64 vslt_du(v2u64 _1, v2u64 _2) { +- return __builtin_lsx_vslt_du(_1, _2); +-} +-// CHECK-LABEL: @vslti_bu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vslti.bu(<16 x i8> [[TMP0]], i32 1) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v16i8 vslti_bu(v16u8 _1) { return __builtin_lsx_vslti_bu(_1, 1); } +-// CHECK-LABEL: @vslti_hu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vslti.hu(<8 x i16> [[TMP0]], i32 1) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v8i16 vslti_hu(v8u16 _1) { return __builtin_lsx_vslti_hu(_1, 1); } +-// CHECK-LABEL: @vslti_wu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vslti.wu(<4 x i32> [[TMP0]], i32 1) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v4i32 vslti_wu(v4u32 _1) { return __builtin_lsx_vslti_wu(_1, 1); } +-// CHECK-LABEL: @vslti_du( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vslti.du(<2 x i64> [[TMP0]], i32 1) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v2i64 vslti_du(v2u64 _1) { return __builtin_lsx_vslti_du(_1, 1); } +-// CHECK-LABEL: @vsle_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsle.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v16i8 vsle_b(v16i8 _1, v16i8 _2) { return __builtin_lsx_vsle_b(_1, _2); } +-// CHECK-LABEL: @vsle_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsle.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v8i16 vsle_h(v8i16 _1, v8i16 _2) { return __builtin_lsx_vsle_h(_1, _2); } +-// CHECK-LABEL: @vsle_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsle.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v4i32 vsle_w(v4i32 _1, v4i32 _2) { return __builtin_lsx_vsle_w(_1, _2); } +-// CHECK-LABEL: @vsle_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsle.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v2i64 vsle_d(v2i64 _1, v2i64 _2) { return __builtin_lsx_vsle_d(_1, _2); } +-// CHECK-LABEL: @vslei_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vslei.b(<16 x i8> [[TMP0]], i32 1) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v16i8 vslei_b(v16i8 _1) { return __builtin_lsx_vslei_b(_1, 1); } +-// CHECK-LABEL: @vslei_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vslei.h(<8 x i16> [[TMP0]], i32 1) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v8i16 vslei_h(v8i16 _1) { return __builtin_lsx_vslei_h(_1, 1); } +-// CHECK-LABEL: @vslei_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vslei.w(<4 x i32> [[TMP0]], i32 1) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v4i32 vslei_w(v4i32 _1) { return __builtin_lsx_vslei_w(_1, 1); } +-// CHECK-LABEL: @vslei_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vslei.d(<2 x i64> [[TMP0]], i32 1) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v2i64 vslei_d(v2i64 _1) { return __builtin_lsx_vslei_d(_1, 1); } +-// CHECK-LABEL: @vsle_bu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsle.bu(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v16i8 vsle_bu(v16u8 _1, v16u8 _2) { +- return __builtin_lsx_vsle_bu(_1, _2); +-} +-// CHECK-LABEL: @vsle_hu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsle.hu(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v8i16 vsle_hu(v8u16 _1, v8u16 _2) { +- return __builtin_lsx_vsle_hu(_1, _2); +-} +-// CHECK-LABEL: @vsle_wu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsle.wu(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v4i32 vsle_wu(v4u32 _1, v4u32 _2) { +- return __builtin_lsx_vsle_wu(_1, _2); +-} +-// CHECK-LABEL: @vsle_du( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsle.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v2i64 vsle_du(v2u64 _1, v2u64 _2) { +- return __builtin_lsx_vsle_du(_1, _2); +-} +-// CHECK-LABEL: @vslei_bu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vslei.bu(<16 x i8> [[TMP0]], i32 1) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v16i8 vslei_bu(v16u8 _1) { return __builtin_lsx_vslei_bu(_1, 1); } +-// CHECK-LABEL: @vslei_hu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vslei.hu(<8 x i16> [[TMP0]], i32 1) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v8i16 vslei_hu(v8u16 _1) { return __builtin_lsx_vslei_hu(_1, 1); } +-// CHECK-LABEL: @vslei_wu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vslei.wu(<4 x i32> [[TMP0]], i32 1) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v4i32 vslei_wu(v4u32 _1) { return __builtin_lsx_vslei_wu(_1, 1); } +-// CHECK-LABEL: @vslei_du( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vslei.du(<2 x i64> [[TMP0]], i32 1) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v2i64 vslei_du(v2u64 _1) { return __builtin_lsx_vslei_du(_1, 1); } +-// CHECK-LABEL: @vsat_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsat.b(<16 x i8> [[TMP0]], i32 1) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v16i8 vsat_b(v16i8 _1) { return __builtin_lsx_vsat_b(_1, 1); } +-// CHECK-LABEL: @vsat_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsat.h(<8 x i16> [[TMP0]], i32 1) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v8i16 vsat_h(v8i16 _1) { return __builtin_lsx_vsat_h(_1, 1); } +-// CHECK-LABEL: @vsat_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsat.w(<4 x i32> [[TMP0]], i32 1) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v4i32 vsat_w(v4i32 _1) { return __builtin_lsx_vsat_w(_1, 1); } +-// CHECK-LABEL: @vsat_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsat.d(<2 x i64> [[TMP0]], i32 1) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v2i64 vsat_d(v2i64 _1) { return __builtin_lsx_vsat_d(_1, 1); } +-// CHECK-LABEL: @vsat_bu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsat.bu(<16 x i8> [[TMP0]], i32 1) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v16u8 vsat_bu(v16u8 _1) { return __builtin_lsx_vsat_bu(_1, 1); } +-// CHECK-LABEL: @vsat_hu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsat.hu(<8 x i16> [[TMP0]], i32 1) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v8u16 vsat_hu(v8u16 _1) { return __builtin_lsx_vsat_hu(_1, 1); } +-// CHECK-LABEL: @vsat_wu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsat.wu(<4 x i32> [[TMP0]], i32 1) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v4u32 vsat_wu(v4u32 _1) { return __builtin_lsx_vsat_wu(_1, 1); } +-// CHECK-LABEL: @vsat_du( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsat.du(<2 x i64> [[TMP0]], i32 1) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v2u64 vsat_du(v2u64 _1) { return __builtin_lsx_vsat_du(_1, 1); } +-// CHECK-LABEL: @vadda_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vadda.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v16i8 vadda_b(v16i8 _1, v16i8 _2) { +- return __builtin_lsx_vadda_b(_1, _2); +-} +-// CHECK-LABEL: @vadda_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vadda.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v8i16 vadda_h(v8i16 _1, v8i16 _2) { +- return __builtin_lsx_vadda_h(_1, _2); +-} +-// CHECK-LABEL: @vadda_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vadda.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v4i32 vadda_w(v4i32 _1, v4i32 _2) { +- return __builtin_lsx_vadda_w(_1, _2); +-} +-// CHECK-LABEL: @vadda_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vadda.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v2i64 vadda_d(v2i64 _1, v2i64 _2) { +- return __builtin_lsx_vadda_d(_1, _2); +-} +-// CHECK-LABEL: @vsadd_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsadd.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v16i8 vsadd_b(v16i8 _1, v16i8 _2) { +- return __builtin_lsx_vsadd_b(_1, _2); +-} +-// CHECK-LABEL: @vsadd_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsadd.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v8i16 vsadd_h(v8i16 _1, v8i16 _2) { +- return __builtin_lsx_vsadd_h(_1, _2); +-} +-// CHECK-LABEL: @vsadd_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsadd.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v4i32 vsadd_w(v4i32 _1, v4i32 _2) { +- return __builtin_lsx_vsadd_w(_1, _2); +-} +-// CHECK-LABEL: @vsadd_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsadd.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v2i64 vsadd_d(v2i64 _1, v2i64 _2) { +- return __builtin_lsx_vsadd_d(_1, _2); +-} +-// CHECK-LABEL: @vsadd_bu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsadd.bu(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v16u8 vsadd_bu(v16u8 _1, v16u8 _2) { +- return __builtin_lsx_vsadd_bu(_1, _2); +-} +-// CHECK-LABEL: @vsadd_hu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsadd.hu(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v8u16 vsadd_hu(v8u16 _1, v8u16 _2) { +- return __builtin_lsx_vsadd_hu(_1, _2); +-} +-// CHECK-LABEL: @vsadd_wu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsadd.wu(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v4u32 vsadd_wu(v4u32 _1, v4u32 _2) { +- return __builtin_lsx_vsadd_wu(_1, _2); +-} +-// CHECK-LABEL: @vsadd_du( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsadd.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v2u64 vsadd_du(v2u64 _1, v2u64 _2) { +- return __builtin_lsx_vsadd_du(_1, _2); +-} +-// CHECK-LABEL: @vavg_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vavg.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v16i8 vavg_b(v16i8 _1, v16i8 _2) { return __builtin_lsx_vavg_b(_1, _2); } +-// CHECK-LABEL: @vavg_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vavg.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v8i16 vavg_h(v8i16 _1, v8i16 _2) { return __builtin_lsx_vavg_h(_1, _2); } +-// CHECK-LABEL: @vavg_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vavg.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v4i32 vavg_w(v4i32 _1, v4i32 _2) { return __builtin_lsx_vavg_w(_1, _2); } +-// CHECK-LABEL: @vavg_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vavg.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v2i64 vavg_d(v2i64 _1, v2i64 _2) { return __builtin_lsx_vavg_d(_1, _2); } +-// CHECK-LABEL: @vavg_bu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vavg.bu(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v16u8 vavg_bu(v16u8 _1, v16u8 _2) { +- return __builtin_lsx_vavg_bu(_1, _2); +-} +-// CHECK-LABEL: @vavg_hu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vavg.hu(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v8u16 vavg_hu(v8u16 _1, v8u16 _2) { +- return __builtin_lsx_vavg_hu(_1, _2); +-} +-// CHECK-LABEL: @vavg_wu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vavg.wu(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v4u32 vavg_wu(v4u32 _1, v4u32 _2) { +- return __builtin_lsx_vavg_wu(_1, _2); +-} +-// CHECK-LABEL: @vavg_du( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vavg.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v2u64 vavg_du(v2u64 _1, v2u64 _2) { +- return __builtin_lsx_vavg_du(_1, _2); +-} +-// CHECK-LABEL: @vavgr_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vavgr.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v16i8 vavgr_b(v16i8 _1, v16i8 _2) { +- return __builtin_lsx_vavgr_b(_1, _2); +-} +-// CHECK-LABEL: @vavgr_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vavgr.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v8i16 vavgr_h(v8i16 _1, v8i16 _2) { +- return __builtin_lsx_vavgr_h(_1, _2); +-} +-// CHECK-LABEL: @vavgr_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vavgr.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v4i32 vavgr_w(v4i32 _1, v4i32 _2) { +- return __builtin_lsx_vavgr_w(_1, _2); +-} +-// CHECK-LABEL: @vavgr_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vavgr.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v2i64 vavgr_d(v2i64 _1, v2i64 _2) { +- return __builtin_lsx_vavgr_d(_1, _2); +-} +-// CHECK-LABEL: @vavgr_bu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vavgr.bu(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v16u8 vavgr_bu(v16u8 _1, v16u8 _2) { +- return __builtin_lsx_vavgr_bu(_1, _2); +-} +-// CHECK-LABEL: @vavgr_hu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vavgr.hu(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v8u16 vavgr_hu(v8u16 _1, v8u16 _2) { +- return __builtin_lsx_vavgr_hu(_1, _2); +-} +-// CHECK-LABEL: @vavgr_wu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vavgr.wu(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v4u32 vavgr_wu(v4u32 _1, v4u32 _2) { +- return __builtin_lsx_vavgr_wu(_1, _2); +-} +-// CHECK-LABEL: @vavgr_du( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vavgr.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v2u64 vavgr_du(v2u64 _1, v2u64 _2) { +- return __builtin_lsx_vavgr_du(_1, _2); +-} +-// CHECK-LABEL: @vssub_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssub.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v16i8 vssub_b(v16i8 _1, v16i8 _2) { +- return __builtin_lsx_vssub_b(_1, _2); +-} +-// CHECK-LABEL: @vssub_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssub.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v8i16 vssub_h(v8i16 _1, v8i16 _2) { +- return __builtin_lsx_vssub_h(_1, _2); +-} +-// CHECK-LABEL: @vssub_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssub.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v4i32 vssub_w(v4i32 _1, v4i32 _2) { +- return __builtin_lsx_vssub_w(_1, _2); +-} +-// CHECK-LABEL: @vssub_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vssub.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v2i64 vssub_d(v2i64 _1, v2i64 _2) { +- return __builtin_lsx_vssub_d(_1, _2); +-} +-// CHECK-LABEL: @vssub_bu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssub.bu(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v16u8 vssub_bu(v16u8 _1, v16u8 _2) { +- return __builtin_lsx_vssub_bu(_1, _2); +-} +-// CHECK-LABEL: @vssub_hu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssub.hu(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v8u16 vssub_hu(v8u16 _1, v8u16 _2) { +- return __builtin_lsx_vssub_hu(_1, _2); +-} +-// CHECK-LABEL: @vssub_wu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssub.wu(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v4u32 vssub_wu(v4u32 _1, v4u32 _2) { +- return __builtin_lsx_vssub_wu(_1, _2); +-} +-// CHECK-LABEL: @vssub_du( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vssub.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v2u64 vssub_du(v2u64 _1, v2u64 _2) { +- return __builtin_lsx_vssub_du(_1, _2); +-} +-// CHECK-LABEL: @vabsd_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vabsd.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v16i8 vabsd_b(v16i8 _1, v16i8 _2) { +- return __builtin_lsx_vabsd_b(_1, _2); +-} +-// CHECK-LABEL: @vabsd_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vabsd.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v8i16 vabsd_h(v8i16 _1, v8i16 _2) { +- return __builtin_lsx_vabsd_h(_1, _2); +-} +-// CHECK-LABEL: @vabsd_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vabsd.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v4i32 vabsd_w(v4i32 _1, v4i32 _2) { +- return __builtin_lsx_vabsd_w(_1, _2); +-} +-// CHECK-LABEL: @vabsd_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vabsd.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v2i64 vabsd_d(v2i64 _1, v2i64 _2) { +- return __builtin_lsx_vabsd_d(_1, _2); +-} +-// CHECK-LABEL: @vabsd_bu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vabsd.bu(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v16u8 vabsd_bu(v16u8 _1, v16u8 _2) { +- return __builtin_lsx_vabsd_bu(_1, _2); +-} +-// CHECK-LABEL: @vabsd_hu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vabsd.hu(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v8u16 vabsd_hu(v8u16 _1, v8u16 _2) { +- return __builtin_lsx_vabsd_hu(_1, _2); +-} +-// CHECK-LABEL: @vabsd_wu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vabsd.wu(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v4u32 vabsd_wu(v4u32 _1, v4u32 _2) { +- return __builtin_lsx_vabsd_wu(_1, _2); +-} +-// CHECK-LABEL: @vabsd_du( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vabsd.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v2u64 vabsd_du(v2u64 _1, v2u64 _2) { +- return __builtin_lsx_vabsd_du(_1, _2); +-} +-// CHECK-LABEL: @vmul_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmul.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v16i8 vmul_b(v16i8 _1, v16i8 _2) { return __builtin_lsx_vmul_b(_1, _2); } +-// CHECK-LABEL: @vmul_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmul.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v8i16 vmul_h(v8i16 _1, v8i16 _2) { return __builtin_lsx_vmul_h(_1, _2); } +-// CHECK-LABEL: @vmul_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmul.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v4i32 vmul_w(v4i32 _1, v4i32 _2) { return __builtin_lsx_vmul_w(_1, _2); } +-// CHECK-LABEL: @vmul_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmul.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v2i64 vmul_d(v2i64 _1, v2i64 _2) { return __builtin_lsx_vmul_d(_1, _2); } +-// CHECK-LABEL: @vmadd_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP3:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmadd.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]]) +-// CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP3]] to i128 +-// CHECK-NEXT: ret i128 [[TMP4]] +-// +-v16i8 vmadd_b(v16i8 _1, v16i8 _2, v16i8 _3) { +- return __builtin_lsx_vmadd_b(_1, _2, _3); +-} +-// CHECK-LABEL: @vmadd_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmadd.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]], <8 x i16> [[TMP2]]) +-// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to i128 +-// CHECK-NEXT: ret i128 [[TMP4]] +-// +-v8i16 vmadd_h(v8i16 _1, v8i16 _2, v8i16 _3) { +- return __builtin_lsx_vmadd_h(_1, _2, _3); +-} +-// CHECK-LABEL: @vmadd_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmadd.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]], <4 x i32> [[TMP2]]) +-// CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to i128 +-// CHECK-NEXT: ret i128 [[TMP4]] +-// +-v4i32 vmadd_w(v4i32 _1, v4i32 _2, v4i32 _3) { +- return __builtin_lsx_vmadd_w(_1, _2, _3); +-} +-// CHECK-LABEL: @vmadd_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP3:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmadd.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], <2 x i64> [[TMP2]]) +-// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to i128 +-// CHECK-NEXT: ret i128 [[TMP4]] +-// +-v2i64 vmadd_d(v2i64 _1, v2i64 _2, v2i64 _3) { +- return __builtin_lsx_vmadd_d(_1, _2, _3); +-} +-// CHECK-LABEL: @vmsub_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP3:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmsub.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]]) +-// CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP3]] to i128 +-// CHECK-NEXT: ret i128 [[TMP4]] +-// +-v16i8 vmsub_b(v16i8 _1, v16i8 _2, v16i8 _3) { +- return __builtin_lsx_vmsub_b(_1, _2, _3); +-} +-// CHECK-LABEL: @vmsub_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmsub.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]], <8 x i16> [[TMP2]]) +-// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to i128 +-// CHECK-NEXT: ret i128 [[TMP4]] +-// +-v8i16 vmsub_h(v8i16 _1, v8i16 _2, v8i16 _3) { +- return __builtin_lsx_vmsub_h(_1, _2, _3); +-} +-// CHECK-LABEL: @vmsub_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmsub.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]], <4 x i32> [[TMP2]]) +-// CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to i128 +-// CHECK-NEXT: ret i128 [[TMP4]] +-// +-v4i32 vmsub_w(v4i32 _1, v4i32 _2, v4i32 _3) { +- return __builtin_lsx_vmsub_w(_1, _2, _3); +-} +-// CHECK-LABEL: @vmsub_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP3:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmsub.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], <2 x i64> [[TMP2]]) +-// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to i128 +-// CHECK-NEXT: ret i128 [[TMP4]] +-// +-v2i64 vmsub_d(v2i64 _1, v2i64 _2, v2i64 _3) { +- return __builtin_lsx_vmsub_d(_1, _2, _3); +-} +-// CHECK-LABEL: @vdiv_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vdiv.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v16i8 vdiv_b(v16i8 _1, v16i8 _2) { return __builtin_lsx_vdiv_b(_1, _2); } +-// CHECK-LABEL: @vdiv_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vdiv.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v8i16 vdiv_h(v8i16 _1, v8i16 _2) { return __builtin_lsx_vdiv_h(_1, _2); } +-// CHECK-LABEL: @vdiv_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vdiv.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v4i32 vdiv_w(v4i32 _1, v4i32 _2) { return __builtin_lsx_vdiv_w(_1, _2); } +-// CHECK-LABEL: @vdiv_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vdiv.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v2i64 vdiv_d(v2i64 _1, v2i64 _2) { return __builtin_lsx_vdiv_d(_1, _2); } +-// CHECK-LABEL: @vdiv_bu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vdiv.bu(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v16u8 vdiv_bu(v16u8 _1, v16u8 _2) { +- return __builtin_lsx_vdiv_bu(_1, _2); +-} +-// CHECK-LABEL: @vdiv_hu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vdiv.hu(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v8u16 vdiv_hu(v8u16 _1, v8u16 _2) { +- return __builtin_lsx_vdiv_hu(_1, _2); +-} +-// CHECK-LABEL: @vdiv_wu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vdiv.wu(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v4u32 vdiv_wu(v4u32 _1, v4u32 _2) { +- return __builtin_lsx_vdiv_wu(_1, _2); +-} +-// CHECK-LABEL: @vdiv_du( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vdiv.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v2u64 vdiv_du(v2u64 _1, v2u64 _2) { +- return __builtin_lsx_vdiv_du(_1, _2); +-} +-// CHECK-LABEL: @vhaddw_h_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vhaddw.h.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v8i16 vhaddw_h_b(v16i8 _1, v16i8 _2) { +- return __builtin_lsx_vhaddw_h_b(_1, _2); +-} +-// CHECK-LABEL: @vhaddw_w_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vhaddw.w.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v4i32 vhaddw_w_h(v8i16 _1, v8i16 _2) { +- return __builtin_lsx_vhaddw_w_h(_1, _2); +-} +-// CHECK-LABEL: @vhaddw_d_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vhaddw.d.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v2i64 vhaddw_d_w(v4i32 _1, v4i32 _2) { +- return __builtin_lsx_vhaddw_d_w(_1, _2); +-} +-// CHECK-LABEL: @vhaddw_hu_bu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vhaddw.hu.bu(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v8u16 vhaddw_hu_bu(v16u8 _1, v16u8 _2) { +- return __builtin_lsx_vhaddw_hu_bu(_1, _2); +-} +-// CHECK-LABEL: @vhaddw_wu_hu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vhaddw.wu.hu(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v4u32 vhaddw_wu_hu(v8u16 _1, v8u16 _2) { +- return __builtin_lsx_vhaddw_wu_hu(_1, _2); +-} +-// CHECK-LABEL: @vhaddw_du_wu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vhaddw.du.wu(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v2u64 vhaddw_du_wu(v4u32 _1, v4u32 _2) { +- return __builtin_lsx_vhaddw_du_wu(_1, _2); +-} +-// CHECK-LABEL: @vhsubw_h_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vhsubw.h.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v8i16 vhsubw_h_b(v16i8 _1, v16i8 _2) { +- return __builtin_lsx_vhsubw_h_b(_1, _2); +-} +-// CHECK-LABEL: @vhsubw_w_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vhsubw.w.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v4i32 vhsubw_w_h(v8i16 _1, v8i16 _2) { +- return __builtin_lsx_vhsubw_w_h(_1, _2); +-} +-// CHECK-LABEL: @vhsubw_d_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vhsubw.d.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v2i64 vhsubw_d_w(v4i32 _1, v4i32 _2) { +- return __builtin_lsx_vhsubw_d_w(_1, _2); +-} +-// CHECK-LABEL: @vhsubw_hu_bu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vhsubw.hu.bu(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v8i16 vhsubw_hu_bu(v16u8 _1, v16u8 _2) { +- return __builtin_lsx_vhsubw_hu_bu(_1, _2); +-} +-// CHECK-LABEL: @vhsubw_wu_hu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vhsubw.wu.hu(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v4i32 vhsubw_wu_hu(v8u16 _1, v8u16 _2) { +- return __builtin_lsx_vhsubw_wu_hu(_1, _2); +-} +-// CHECK-LABEL: @vhsubw_du_wu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vhsubw.du.wu(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v2i64 vhsubw_du_wu(v4u32 _1, v4u32 _2) { +- return __builtin_lsx_vhsubw_du_wu(_1, _2); +-} +-// CHECK-LABEL: @vmod_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmod.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v16i8 vmod_b(v16i8 _1, v16i8 _2) { return __builtin_lsx_vmod_b(_1, _2); } +-// CHECK-LABEL: @vmod_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmod.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v8i16 vmod_h(v8i16 _1, v8i16 _2) { return __builtin_lsx_vmod_h(_1, _2); } +-// CHECK-LABEL: @vmod_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmod.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v4i32 vmod_w(v4i32 _1, v4i32 _2) { return __builtin_lsx_vmod_w(_1, _2); } +-// CHECK-LABEL: @vmod_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmod.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v2i64 vmod_d(v2i64 _1, v2i64 _2) { return __builtin_lsx_vmod_d(_1, _2); } +-// CHECK-LABEL: @vmod_bu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmod.bu(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v16u8 vmod_bu(v16u8 _1, v16u8 _2) { +- return __builtin_lsx_vmod_bu(_1, _2); +-} +-// CHECK-LABEL: @vmod_hu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmod.hu(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v8u16 vmod_hu(v8u16 _1, v8u16 _2) { +- return __builtin_lsx_vmod_hu(_1, _2); +-} +-// CHECK-LABEL: @vmod_wu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmod.wu(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v4u32 vmod_wu(v4u32 _1, v4u32 _2) { +- return __builtin_lsx_vmod_wu(_1, _2); +-} +-// CHECK-LABEL: @vmod_du( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmod.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v2u64 vmod_du(v2u64 _1, v2u64 _2) { +- return __builtin_lsx_vmod_du(_1, _2); +-} +-// CHECK-LABEL: @vreplve_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vreplve.b(<16 x i8> [[TMP0]], i32 [[_2:%.*]]) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v16i8 vreplve_b(v16i8 _1, int _2) { +- return __builtin_lsx_vreplve_b(_1, _2); +-} +-// CHECK-LABEL: @vreplve_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vreplve.h(<8 x i16> [[TMP0]], i32 [[_2:%.*]]) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v8i16 vreplve_h(v8i16 _1, int _2) { +- return __builtin_lsx_vreplve_h(_1, _2); +-} +-// CHECK-LABEL: @vreplve_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vreplve.w(<4 x i32> [[TMP0]], i32 [[_2:%.*]]) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v4i32 vreplve_w(v4i32 _1, int _2) { +- return __builtin_lsx_vreplve_w(_1, _2); +-} +-// CHECK-LABEL: @vreplve_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vreplve.d(<2 x i64> [[TMP0]], i32 [[_2:%.*]]) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v2i64 vreplve_d(v2i64 _1, int _2) { +- return __builtin_lsx_vreplve_d(_1, _2); +-} +-// CHECK-LABEL: @vreplvei_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vreplvei.b(<16 x i8> [[TMP0]], i32 1) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v16i8 vreplvei_b(v16i8 _1) { return __builtin_lsx_vreplvei_b(_1, 1); } +-// CHECK-LABEL: @vreplvei_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vreplvei.h(<8 x i16> [[TMP0]], i32 1) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v8i16 vreplvei_h(v8i16 _1) { return __builtin_lsx_vreplvei_h(_1, 1); } +-// CHECK-LABEL: @vreplvei_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vreplvei.w(<4 x i32> [[TMP0]], i32 1) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v4i32 vreplvei_w(v4i32 _1) { return __builtin_lsx_vreplvei_w(_1, 1); } +-// CHECK-LABEL: @vreplvei_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vreplvei.d(<2 x i64> [[TMP0]], i32 1) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v2i64 vreplvei_d(v2i64 _1) { return __builtin_lsx_vreplvei_d(_1, 1); } +-// CHECK-LABEL: @vpickev_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vpickev.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v16i8 vpickev_b(v16i8 _1, v16i8 _2) { +- return __builtin_lsx_vpickev_b(_1, _2); +-} +-// CHECK-LABEL: @vpickev_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vpickev.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v8i16 vpickev_h(v8i16 _1, v8i16 _2) { +- return __builtin_lsx_vpickev_h(_1, _2); +-} +-// CHECK-LABEL: @vpickev_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vpickev.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v4i32 vpickev_w(v4i32 _1, v4i32 _2) { +- return __builtin_lsx_vpickev_w(_1, _2); +-} +-// CHECK-LABEL: @vpickev_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vpickev.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v2i64 vpickev_d(v2i64 _1, v2i64 _2) { +- return __builtin_lsx_vpickev_d(_1, _2); +-} +-// CHECK-LABEL: @vpickod_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vpickod.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v16i8 vpickod_b(v16i8 _1, v16i8 _2) { +- return __builtin_lsx_vpickod_b(_1, _2); +-} +-// CHECK-LABEL: @vpickod_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vpickod.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v8i16 vpickod_h(v8i16 _1, v8i16 _2) { +- return __builtin_lsx_vpickod_h(_1, _2); +-} +-// CHECK-LABEL: @vpickod_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vpickod.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v4i32 vpickod_w(v4i32 _1, v4i32 _2) { +- return __builtin_lsx_vpickod_w(_1, _2); +-} +-// CHECK-LABEL: @vpickod_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vpickod.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v2i64 vpickod_d(v2i64 _1, v2i64 _2) { +- return __builtin_lsx_vpickod_d(_1, _2); +-} +-// CHECK-LABEL: @vilvh_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vilvh.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v16i8 vilvh_b(v16i8 _1, v16i8 _2) { +- return __builtin_lsx_vilvh_b(_1, _2); +-} +-// CHECK-LABEL: @vilvh_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vilvh.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v8i16 vilvh_h(v8i16 _1, v8i16 _2) { +- return __builtin_lsx_vilvh_h(_1, _2); +-} +-// CHECK-LABEL: @vilvh_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vilvh.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v4i32 vilvh_w(v4i32 _1, v4i32 _2) { +- return __builtin_lsx_vilvh_w(_1, _2); +-} +-// CHECK-LABEL: @vilvh_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vilvh.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v2i64 vilvh_d(v2i64 _1, v2i64 _2) { +- return __builtin_lsx_vilvh_d(_1, _2); +-} +-// CHECK-LABEL: @vilvl_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vilvl.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v16i8 vilvl_b(v16i8 _1, v16i8 _2) { +- return __builtin_lsx_vilvl_b(_1, _2); +-} +-// CHECK-LABEL: @vilvl_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vilvl.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v8i16 vilvl_h(v8i16 _1, v8i16 _2) { +- return __builtin_lsx_vilvl_h(_1, _2); +-} +-// CHECK-LABEL: @vilvl_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vilvl.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v4i32 vilvl_w(v4i32 _1, v4i32 _2) { +- return __builtin_lsx_vilvl_w(_1, _2); +-} +-// CHECK-LABEL: @vilvl_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vilvl.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v2i64 vilvl_d(v2i64 _1, v2i64 _2) { +- return __builtin_lsx_vilvl_d(_1, _2); +-} +-// CHECK-LABEL: @vpackev_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vpackev.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v16i8 vpackev_b(v16i8 _1, v16i8 _2) { +- return __builtin_lsx_vpackev_b(_1, _2); +-} +-// CHECK-LABEL: @vpackev_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vpackev.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v8i16 vpackev_h(v8i16 _1, v8i16 _2) { +- return __builtin_lsx_vpackev_h(_1, _2); +-} +-// CHECK-LABEL: @vpackev_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vpackev.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v4i32 vpackev_w(v4i32 _1, v4i32 _2) { +- return __builtin_lsx_vpackev_w(_1, _2); +-} +-// CHECK-LABEL: @vpackev_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vpackev.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v2i64 vpackev_d(v2i64 _1, v2i64 _2) { +- return __builtin_lsx_vpackev_d(_1, _2); +-} +-// CHECK-LABEL: @vpackod_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vpackod.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v16i8 vpackod_b(v16i8 _1, v16i8 _2) { +- return __builtin_lsx_vpackod_b(_1, _2); +-} +-// CHECK-LABEL: @vpackod_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vpackod.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v8i16 vpackod_h(v8i16 _1, v8i16 _2) { +- return __builtin_lsx_vpackod_h(_1, _2); +-} +-// CHECK-LABEL: @vpackod_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vpackod.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v4i32 vpackod_w(v4i32 _1, v4i32 _2) { +- return __builtin_lsx_vpackod_w(_1, _2); +-} +-// CHECK-LABEL: @vpackod_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vpackod.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v2i64 vpackod_d(v2i64 _1, v2i64 _2) { +- return __builtin_lsx_vpackod_d(_1, _2); +-} +-// CHECK-LABEL: @vshuf_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vshuf.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]], <8 x i16> [[TMP2]]) +-// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to i128 +-// CHECK-NEXT: ret i128 [[TMP4]] +-// +-v8i16 vshuf_h(v8i16 _1, v8i16 _2, v8i16 _3) { +- return __builtin_lsx_vshuf_h(_1, _2, _3); +-} +-// CHECK-LABEL: @vshuf_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vshuf.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]], <4 x i32> [[TMP2]]) +-// CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to i128 +-// CHECK-NEXT: ret i128 [[TMP4]] +-// +-v4i32 vshuf_w(v4i32 _1, v4i32 _2, v4i32 _3) { +- return __builtin_lsx_vshuf_w(_1, _2, _3); +-} +-// CHECK-LABEL: @vshuf_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP3:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vshuf.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], <2 x i64> [[TMP2]]) +-// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to i128 +-// CHECK-NEXT: ret i128 [[TMP4]] +-// +-v2i64 vshuf_d(v2i64 _1, v2i64 _2, v2i64 _3) { +- return __builtin_lsx_vshuf_d(_1, _2, _3); +-} +-// CHECK-LABEL: @vand_v( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vand.v(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v16u8 vand_v(v16u8 _1, v16u8 _2) { return __builtin_lsx_vand_v(_1, _2); } +-// CHECK-LABEL: @vandi_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vandi.b(<16 x i8> [[TMP0]], i32 1) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v16u8 vandi_b(v16u8 _1) { return __builtin_lsx_vandi_b(_1, 1); } +-// CHECK-LABEL: @vor_v( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vor.v(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v16u8 vor_v(v16u8 _1, v16u8 _2) { return __builtin_lsx_vor_v(_1, _2); } +-// CHECK-LABEL: @vori_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vori.b(<16 x i8> [[TMP0]], i32 1) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v16u8 vori_b(v16u8 _1) { return __builtin_lsx_vori_b(_1, 1); } +-// CHECK-LABEL: @vnor_v( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vnor.v(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v16u8 vnor_v(v16u8 _1, v16u8 _2) { return __builtin_lsx_vnor_v(_1, _2); } +-// CHECK-LABEL: @vnori_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vnori.b(<16 x i8> [[TMP0]], i32 1) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v16u8 vnori_b(v16u8 _1) { return __builtin_lsx_vnori_b(_1, 1); } +-// CHECK-LABEL: @vxor_v( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vxor.v(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v16u8 vxor_v(v16u8 _1, v16u8 _2) { return __builtin_lsx_vxor_v(_1, _2); } +-// CHECK-LABEL: @vxori_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vxori.b(<16 x i8> [[TMP0]], i32 1) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v16u8 vxori_b(v16u8 _1) { return __builtin_lsx_vxori_b(_1, 1); } +-// CHECK-LABEL: @vbitsel_v( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP3:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vbitsel.v(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]]) +-// CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP3]] to i128 +-// CHECK-NEXT: ret i128 [[TMP4]] +-// +-v16u8 vbitsel_v(v16u8 _1, v16u8 _2, v16u8 _3) { +- return __builtin_lsx_vbitsel_v(_1, _2, _3); +-} +-// CHECK-LABEL: @vbitseli_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vbitseli.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], i32 1) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v16u8 vbitseli_b(v16u8 _1, v16u8 _2) { +- return __builtin_lsx_vbitseli_b(_1, _2, 1); +-} +-// CHECK-LABEL: @vshuf4i_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vshuf4i.b(<16 x i8> [[TMP0]], i32 1) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v16i8 vshuf4i_b(v16i8 _1) { return __builtin_lsx_vshuf4i_b(_1, 1); } +-// CHECK-LABEL: @vshuf4i_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vshuf4i.h(<8 x i16> [[TMP0]], i32 1) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v8i16 vshuf4i_h(v8i16 _1) { return __builtin_lsx_vshuf4i_h(_1, 1); } +-// CHECK-LABEL: @vshuf4i_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vshuf4i.w(<4 x i32> [[TMP0]], i32 1) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v4i32 vshuf4i_w(v4i32 _1) { return __builtin_lsx_vshuf4i_w(_1, 1); } +-// CHECK-LABEL: @vreplgr2vr_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vreplgr2vr.b(i32 [[_1:%.*]]) +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to i128 +-// CHECK-NEXT: ret i128 [[TMP1]] +-// +-v16i8 vreplgr2vr_b(int _1) { return __builtin_lsx_vreplgr2vr_b(_1); } +-// CHECK-LABEL: @vreplgr2vr_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vreplgr2vr.h(i32 [[_1:%.*]]) +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[TMP0]] to i128 +-// CHECK-NEXT: ret i128 [[TMP1]] +-// +-v8i16 vreplgr2vr_h(int _1) { return __builtin_lsx_vreplgr2vr_h(_1); } +-// CHECK-LABEL: @vreplgr2vr_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vreplgr2vr.w(i32 [[_1:%.*]]) +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[TMP0]] to i128 +-// CHECK-NEXT: ret i128 [[TMP1]] +-// +-v4i32 vreplgr2vr_w(int _1) { return __builtin_lsx_vreplgr2vr_w(_1); } +-// CHECK-LABEL: @vreplgr2vr_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vreplgr2vr.d(i64 [[_1:%.*]]) +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[TMP0]] to i128 +-// CHECK-NEXT: ret i128 [[TMP1]] +-// +-v2i64 vreplgr2vr_d(long _1) { return __builtin_lsx_vreplgr2vr_d(_1); } +-// CHECK-LABEL: @vpcnt_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vpcnt.b(<16 x i8> [[TMP0]]) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v16i8 vpcnt_b(v16i8 _1) { return __builtin_lsx_vpcnt_b(_1); } +-// CHECK-LABEL: @vpcnt_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vpcnt.h(<8 x i16> [[TMP0]]) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v8i16 vpcnt_h(v8i16 _1) { return __builtin_lsx_vpcnt_h(_1); } +-// CHECK-LABEL: @vpcnt_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vpcnt.w(<4 x i32> [[TMP0]]) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v4i32 vpcnt_w(v4i32 _1) { return __builtin_lsx_vpcnt_w(_1); } +-// CHECK-LABEL: @vpcnt_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vpcnt.d(<2 x i64> [[TMP0]]) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v2i64 vpcnt_d(v2i64 _1) { return __builtin_lsx_vpcnt_d(_1); } +-// CHECK-LABEL: @vclo_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vclo.b(<16 x i8> [[TMP0]]) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v16i8 vclo_b(v16i8 _1) { return __builtin_lsx_vclo_b(_1); } +-// CHECK-LABEL: @vclo_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vclo.h(<8 x i16> [[TMP0]]) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v8i16 vclo_h(v8i16 _1) { return __builtin_lsx_vclo_h(_1); } +-// CHECK-LABEL: @vclo_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vclo.w(<4 x i32> [[TMP0]]) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v4i32 vclo_w(v4i32 _1) { return __builtin_lsx_vclo_w(_1); } +-// CHECK-LABEL: @vclo_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vclo.d(<2 x i64> [[TMP0]]) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v2i64 vclo_d(v2i64 _1) { return __builtin_lsx_vclo_d(_1); } +-// CHECK-LABEL: @vclz_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vclz.b(<16 x i8> [[TMP0]]) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v16i8 vclz_b(v16i8 _1) { return __builtin_lsx_vclz_b(_1); } +-// CHECK-LABEL: @vclz_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vclz.h(<8 x i16> [[TMP0]]) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v8i16 vclz_h(v8i16 _1) { return __builtin_lsx_vclz_h(_1); } +-// CHECK-LABEL: @vclz_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vclz.w(<4 x i32> [[TMP0]]) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v4i32 vclz_w(v4i32 _1) { return __builtin_lsx_vclz_w(_1); } +-// CHECK-LABEL: @vclz_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vclz.d(<2 x i64> [[TMP0]]) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v2i64 vclz_d(v2i64 _1) { return __builtin_lsx_vclz_d(_1); } +-// CHECK-LABEL: @vpickve2gr_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lsx.vpickve2gr.b(<16 x i8> [[TMP0]], i32 1) +-// CHECK-NEXT: ret i32 [[TMP1]] +-// +-int vpickve2gr_b(v16i8 _1) { return __builtin_lsx_vpickve2gr_b(_1, 1); } +-// CHECK-LABEL: @vpickve2gr_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lsx.vpickve2gr.h(<8 x i16> [[TMP0]], i32 1) +-// CHECK-NEXT: ret i32 [[TMP1]] +-// +-int vpickve2gr_h(v8i16 _1) { return __builtin_lsx_vpickve2gr_h(_1, 1); } +-// CHECK-LABEL: @vpickve2gr_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lsx.vpickve2gr.w(<4 x i32> [[TMP0]], i32 1) +-// CHECK-NEXT: ret i32 [[TMP1]] +-// +-int vpickve2gr_w(v4i32 _1) { return __builtin_lsx_vpickve2gr_w(_1, 1); } +-// CHECK-LABEL: @vpickve2gr_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.loongarch.lsx.vpickve2gr.d(<2 x i64> [[TMP0]], i32 1) +-// CHECK-NEXT: ret i64 [[TMP1]] +-// +-long vpickve2gr_d(v2i64 _1) { return __builtin_lsx_vpickve2gr_d(_1, 1); } +-// CHECK-LABEL: @vpickve2gr_bu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lsx.vpickve2gr.bu(<16 x i8> [[TMP0]], i32 1) +-// CHECK-NEXT: ret i32 [[TMP1]] +-// +-unsigned int vpickve2gr_bu(v16i8 _1) { +- return __builtin_lsx_vpickve2gr_bu(_1, 1); +-} +-// CHECK-LABEL: @vpickve2gr_hu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lsx.vpickve2gr.hu(<8 x i16> [[TMP0]], i32 1) +-// CHECK-NEXT: ret i32 [[TMP1]] +-// +-unsigned int vpickve2gr_hu(v8i16 _1) { +- return __builtin_lsx_vpickve2gr_hu(_1, 1); +-} +-// CHECK-LABEL: @vpickve2gr_wu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lsx.vpickve2gr.wu(<4 x i32> [[TMP0]], i32 1) +-// CHECK-NEXT: ret i32 [[TMP1]] +-// +-unsigned int vpickve2gr_wu(v4i32 _1) { +- return __builtin_lsx_vpickve2gr_wu(_1, 1); +-} +-// CHECK-LABEL: @vpickve2gr_du( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.loongarch.lsx.vpickve2gr.du(<2 x i64> [[TMP0]], i32 1) +-// CHECK-NEXT: ret i64 [[TMP1]] +-// +-unsigned long int vpickve2gr_du(v2i64 _1) { +- return __builtin_lsx_vpickve2gr_du(_1, 1); +-} +-// CHECK-LABEL: @vinsgr2vr_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vinsgr2vr.b(<16 x i8> [[TMP0]], i32 1, i32 1) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v16i8 vinsgr2vr_b(v16i8 _1) { +- return __builtin_lsx_vinsgr2vr_b(_1, 1, 1); +-} +-// CHECK-LABEL: @vinsgr2vr_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vinsgr2vr.h(<8 x i16> [[TMP0]], i32 1, i32 1) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v8i16 vinsgr2vr_h(v8i16 _1) { +- return __builtin_lsx_vinsgr2vr_h(_1, 1, 1); +-} +-// CHECK-LABEL: @vinsgr2vr_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vinsgr2vr.w(<4 x i32> [[TMP0]], i32 1, i32 1) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v4i32 vinsgr2vr_w(v4i32 _1) { +- return __builtin_lsx_vinsgr2vr_w(_1, 1, 1); +-} +-// CHECK-LABEL: @vinsgr2vr_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vinsgr2vr.d(<2 x i64> [[TMP0]], i64 1, i32 1) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v2i64 vinsgr2vr_d(v2i64 _1) { +- return __builtin_lsx_vinsgr2vr_d(_1, 1, 1); +-} +-// CHECK-LABEL: @vfadd_s( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfadd.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x float> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v4f32 vfadd_s(v4f32 _1, v4f32 _2) { +- return __builtin_lsx_vfadd_s(_1, _2); +-} +-// CHECK-LABEL: @vfadd_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfadd.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x double> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v2f64 vfadd_d(v2f64 _1, v2f64 _2) { +- return __builtin_lsx_vfadd_d(_1, _2); +-} +-// CHECK-LABEL: @vfsub_s( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfsub.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x float> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v4f32 vfsub_s(v4f32 _1, v4f32 _2) { +- return __builtin_lsx_vfsub_s(_1, _2); +-} +-// CHECK-LABEL: @vfsub_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfsub.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x double> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v2f64 vfsub_d(v2f64 _1, v2f64 _2) { +- return __builtin_lsx_vfsub_d(_1, _2); +-} +-// CHECK-LABEL: @vfmul_s( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfmul.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x float> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v4f32 vfmul_s(v4f32 _1, v4f32 _2) { +- return __builtin_lsx_vfmul_s(_1, _2); +-} +-// CHECK-LABEL: @vfmul_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfmul.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x double> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v2f64 vfmul_d(v2f64 _1, v2f64 _2) { +- return __builtin_lsx_vfmul_d(_1, _2); +-} +-// CHECK-LABEL: @vfdiv_s( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfdiv.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x float> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v4f32 vfdiv_s(v4f32 _1, v4f32 _2) { +- return __builtin_lsx_vfdiv_s(_1, _2); +-} +-// CHECK-LABEL: @vfdiv_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfdiv.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x double> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v2f64 vfdiv_d(v2f64 _1, v2f64 _2) { +- return __builtin_lsx_vfdiv_d(_1, _2); +-} +-// CHECK-LABEL: @vfcvt_h_s( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vfcvt.h.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v8i16 vfcvt_h_s(v4f32 _1, v4f32 _2) { +- return __builtin_lsx_vfcvt_h_s(_1, _2); +-} +-// CHECK-LABEL: @vfcvt_s_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfcvt.s.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x float> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v4f32 vfcvt_s_d(v2f64 _1, v2f64 _2) { +- return __builtin_lsx_vfcvt_s_d(_1, _2); +-} +-// CHECK-LABEL: @vfmin_s( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfmin.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x float> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v4f32 vfmin_s(v4f32 _1, v4f32 _2) { +- return __builtin_lsx_vfmin_s(_1, _2); +-} +-// CHECK-LABEL: @vfmin_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfmin.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x double> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v2f64 vfmin_d(v2f64 _1, v2f64 _2) { +- return __builtin_lsx_vfmin_d(_1, _2); +-} +-// CHECK-LABEL: @vfmina_s( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfmina.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x float> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v4f32 vfmina_s(v4f32 _1, v4f32 _2) { +- return __builtin_lsx_vfmina_s(_1, _2); +-} +-// CHECK-LABEL: @vfmina_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfmina.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x double> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v2f64 vfmina_d(v2f64 _1, v2f64 _2) { +- return __builtin_lsx_vfmina_d(_1, _2); +-} +-// CHECK-LABEL: @vfmax_s( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfmax.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x float> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v4f32 vfmax_s(v4f32 _1, v4f32 _2) { +- return __builtin_lsx_vfmax_s(_1, _2); +-} +-// CHECK-LABEL: @vfmax_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfmax.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x double> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v2f64 vfmax_d(v2f64 _1, v2f64 _2) { +- return __builtin_lsx_vfmax_d(_1, _2); +-} +-// CHECK-LABEL: @vfmaxa_s( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfmaxa.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x float> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v4f32 vfmaxa_s(v4f32 _1, v4f32 _2) { +- return __builtin_lsx_vfmaxa_s(_1, _2); +-} +-// CHECK-LABEL: @vfmaxa_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfmaxa.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x double> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v2f64 vfmaxa_d(v2f64 _1, v2f64 _2) { +- return __builtin_lsx_vfmaxa_d(_1, _2); +-} +-// CHECK-LABEL: @vfclass_s( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfclass.s(<4 x float> [[TMP0]]) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v4i32 vfclass_s(v4f32 _1) { return __builtin_lsx_vfclass_s(_1); } +-// CHECK-LABEL: @vfclass_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfclass.d(<2 x double> [[TMP0]]) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v2i64 vfclass_d(v2f64 _1) { return __builtin_lsx_vfclass_d(_1); } +-// CHECK-LABEL: @vfsqrt_s( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfsqrt.s(<4 x float> [[TMP0]]) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v4f32 vfsqrt_s(v4f32 _1) { return __builtin_lsx_vfsqrt_s(_1); } +-// CHECK-LABEL: @vfsqrt_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfsqrt.d(<2 x double> [[TMP0]]) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v2f64 vfsqrt_d(v2f64 _1) { return __builtin_lsx_vfsqrt_d(_1); } +-// CHECK-LABEL: @vfrecip_s( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfrecip.s(<4 x float> [[TMP0]]) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v4f32 vfrecip_s(v4f32 _1) { return __builtin_lsx_vfrecip_s(_1); } +-// CHECK-LABEL: @vfrecip_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfrecip.d(<2 x double> [[TMP0]]) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v2f64 vfrecip_d(v2f64 _1) { return __builtin_lsx_vfrecip_d(_1); } +-// CHECK-LABEL: @vfrint_s( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfrint.s(<4 x float> [[TMP0]]) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v4f32 vfrint_s(v4f32 _1) { return __builtin_lsx_vfrint_s(_1); } +-// CHECK-LABEL: @vfrint_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfrint.d(<2 x double> [[TMP0]]) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v2f64 vfrint_d(v2f64 _1) { return __builtin_lsx_vfrint_d(_1); } +-// CHECK-LABEL: @vfrsqrt_s( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfrsqrt.s(<4 x float> [[TMP0]]) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v4f32 vfrsqrt_s(v4f32 _1) { return __builtin_lsx_vfrsqrt_s(_1); } +-// CHECK-LABEL: @vfrsqrt_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfrsqrt.d(<2 x double> [[TMP0]]) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v2f64 vfrsqrt_d(v2f64 _1) { return __builtin_lsx_vfrsqrt_d(_1); } +-// CHECK-LABEL: @vflogb_s( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vflogb.s(<4 x float> [[TMP0]]) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v4f32 vflogb_s(v4f32 _1) { return __builtin_lsx_vflogb_s(_1); } +-// CHECK-LABEL: @vflogb_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vflogb.d(<2 x double> [[TMP0]]) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v2f64 vflogb_d(v2f64 _1) { return __builtin_lsx_vflogb_d(_1); } +-// CHECK-LABEL: @vfcvth_s_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfcvth.s.h(<8 x i16> [[TMP0]]) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v4f32 vfcvth_s_h(v8i16 _1) { return __builtin_lsx_vfcvth_s_h(_1); } +-// CHECK-LABEL: @vfcvth_d_s( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfcvth.d.s(<4 x float> [[TMP0]]) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v2f64 vfcvth_d_s(v4f32 _1) { return __builtin_lsx_vfcvth_d_s(_1); } +-// CHECK-LABEL: @vfcvtl_s_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfcvtl.s.h(<8 x i16> [[TMP0]]) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v4f32 vfcvtl_s_h(v8i16 _1) { return __builtin_lsx_vfcvtl_s_h(_1); } +-// CHECK-LABEL: @vfcvtl_d_s( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfcvtl.d.s(<4 x float> [[TMP0]]) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v2f64 vfcvtl_d_s(v4f32 _1) { return __builtin_lsx_vfcvtl_d_s(_1); } +-// CHECK-LABEL: @vftint_w_s( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftint.w.s(<4 x float> [[TMP0]]) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v4i32 vftint_w_s(v4f32 _1) { return __builtin_lsx_vftint_w_s(_1); } +-// CHECK-LABEL: @vftint_l_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftint.l.d(<2 x double> [[TMP0]]) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v2i64 vftint_l_d(v2f64 _1) { return __builtin_lsx_vftint_l_d(_1); } +-// CHECK-LABEL: @vftint_wu_s( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftint.wu.s(<4 x float> [[TMP0]]) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v4u32 vftint_wu_s(v4f32 _1) { return __builtin_lsx_vftint_wu_s(_1); } +-// CHECK-LABEL: @vftint_lu_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftint.lu.d(<2 x double> [[TMP0]]) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v2u64 vftint_lu_d(v2f64 _1) { return __builtin_lsx_vftint_lu_d(_1); } +-// CHECK-LABEL: @vftintrz_w_s( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftintrz.w.s(<4 x float> [[TMP0]]) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v4i32 vftintrz_w_s(v4f32 _1) { return __builtin_lsx_vftintrz_w_s(_1); } +-// CHECK-LABEL: @vftintrz_l_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrz.l.d(<2 x double> [[TMP0]]) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v2i64 vftintrz_l_d(v2f64 _1) { return __builtin_lsx_vftintrz_l_d(_1); } +-// CHECK-LABEL: @vftintrz_wu_s( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftintrz.wu.s(<4 x float> [[TMP0]]) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v4u32 vftintrz_wu_s(v4f32 _1) { return __builtin_lsx_vftintrz_wu_s(_1); } +-// CHECK-LABEL: @vftintrz_lu_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrz.lu.d(<2 x double> [[TMP0]]) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v2u64 vftintrz_lu_d(v2f64 _1) { return __builtin_lsx_vftintrz_lu_d(_1); } +-// CHECK-LABEL: @vffint_s_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vffint.s.w(<4 x i32> [[TMP0]]) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v4f32 vffint_s_w(v4i32 _1) { return __builtin_lsx_vffint_s_w(_1); } +-// CHECK-LABEL: @vffint_d_l( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vffint.d.l(<2 x i64> [[TMP0]]) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v2f64 vffint_d_l(v2i64 _1) { return __builtin_lsx_vffint_d_l(_1); } +-// CHECK-LABEL: @vffint_s_wu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vffint.s.wu(<4 x i32> [[TMP0]]) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v4f32 vffint_s_wu(v4u32 _1) { return __builtin_lsx_vffint_s_wu(_1); } +-// CHECK-LABEL: @vffint_d_lu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vffint.d.lu(<2 x i64> [[TMP0]]) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v2f64 vffint_d_lu(v2u64 _1) { return __builtin_lsx_vffint_d_lu(_1); } +-// CHECK-LABEL: @vandn_v( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vandn.v(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v16u8 vandn_v(v16u8 _1, v16u8 _2) { +- return __builtin_lsx_vandn_v(_1, _2); +-} +-// CHECK-LABEL: @vneg_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vneg.b(<16 x i8> [[TMP0]]) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v16i8 vneg_b(v16i8 _1) { return __builtin_lsx_vneg_b(_1); } +-// CHECK-LABEL: @vneg_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vneg.h(<8 x i16> [[TMP0]]) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v8i16 vneg_h(v8i16 _1) { return __builtin_lsx_vneg_h(_1); } +-// CHECK-LABEL: @vneg_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vneg.w(<4 x i32> [[TMP0]]) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v4i32 vneg_w(v4i32 _1) { return __builtin_lsx_vneg_w(_1); } +-// CHECK-LABEL: @vneg_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vneg.d(<2 x i64> [[TMP0]]) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v2i64 vneg_d(v2i64 _1) { return __builtin_lsx_vneg_d(_1); } +-// CHECK-LABEL: @vmuh_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmuh.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v16i8 vmuh_b(v16i8 _1, v16i8 _2) { return __builtin_lsx_vmuh_b(_1, _2); } +-// CHECK-LABEL: @vmuh_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmuh.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v8i16 vmuh_h(v8i16 _1, v8i16 _2) { return __builtin_lsx_vmuh_h(_1, _2); } +-// CHECK-LABEL: @vmuh_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmuh.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v4i32 vmuh_w(v4i32 _1, v4i32 _2) { return __builtin_lsx_vmuh_w(_1, _2); } +-// CHECK-LABEL: @vmuh_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmuh.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v2i64 vmuh_d(v2i64 _1, v2i64 _2) { return __builtin_lsx_vmuh_d(_1, _2); } +-// CHECK-LABEL: @vmuh_bu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmuh.bu(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v16u8 vmuh_bu(v16u8 _1, v16u8 _2) { +- return __builtin_lsx_vmuh_bu(_1, _2); +-} +-// CHECK-LABEL: @vmuh_hu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmuh.hu(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v8u16 vmuh_hu(v8u16 _1, v8u16 _2) { +- return __builtin_lsx_vmuh_hu(_1, _2); +-} +-// CHECK-LABEL: @vmuh_wu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmuh.wu(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v4u32 vmuh_wu(v4u32 _1, v4u32 _2) { +- return __builtin_lsx_vmuh_wu(_1, _2); +-} +-// CHECK-LABEL: @vmuh_du( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmuh.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v2u64 vmuh_du(v2u64 _1, v2u64 _2) { +- return __builtin_lsx_vmuh_du(_1, _2); +-} +-// CHECK-LABEL: @vsllwil_h_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsllwil.h.b(<16 x i8> [[TMP0]], i32 1) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v8i16 vsllwil_h_b(v16i8 _1) { return __builtin_lsx_vsllwil_h_b(_1, 1); } +-// CHECK-LABEL: @vsllwil_w_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsllwil.w.h(<8 x i16> [[TMP0]], i32 1) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v4i32 vsllwil_w_h(v8i16 _1) { return __builtin_lsx_vsllwil_w_h(_1, 1); } +-// CHECK-LABEL: @vsllwil_d_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsllwil.d.w(<4 x i32> [[TMP0]], i32 1) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v2i64 vsllwil_d_w(v4i32 _1) { return __builtin_lsx_vsllwil_d_w(_1, 1); } +-// CHECK-LABEL: @vsllwil_hu_bu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsllwil.hu.bu(<16 x i8> [[TMP0]], i32 1) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v8u16 vsllwil_hu_bu(v16u8 _1) { +- return __builtin_lsx_vsllwil_hu_bu(_1, 1); +-} +-// CHECK-LABEL: @vsllwil_wu_hu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsllwil.wu.hu(<8 x i16> [[TMP0]], i32 1) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v4u32 vsllwil_wu_hu(v8u16 _1) { +- return __builtin_lsx_vsllwil_wu_hu(_1, 1); +-} +-// CHECK-LABEL: @vsllwil_du_wu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsllwil.du.wu(<4 x i32> [[TMP0]], i32 1) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v2u64 vsllwil_du_wu(v4u32 _1) { +- return __builtin_lsx_vsllwil_du_wu(_1, 1); +-} +-// CHECK-LABEL: @vsran_b_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsran.b.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v16i8 vsran_b_h(v8i16 _1, v8i16 _2) { +- return __builtin_lsx_vsran_b_h(_1, _2); +-} +-// CHECK-LABEL: @vsran_h_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsran.h.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v8i16 vsran_h_w(v4i32 _1, v4i32 _2) { +- return __builtin_lsx_vsran_h_w(_1, _2); +-} +-// CHECK-LABEL: @vsran_w_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsran.w.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v4i32 vsran_w_d(v2i64 _1, v2i64 _2) { +- return __builtin_lsx_vsran_w_d(_1, _2); +-} +-// CHECK-LABEL: @vssran_b_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssran.b.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v16i8 vssran_b_h(v8i16 _1, v8i16 _2) { +- return __builtin_lsx_vssran_b_h(_1, _2); +-} +-// CHECK-LABEL: @vssran_h_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssran.h.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v8i16 vssran_h_w(v4i32 _1, v4i32 _2) { +- return __builtin_lsx_vssran_h_w(_1, _2); +-} +-// CHECK-LABEL: @vssran_w_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssran.w.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v4i32 vssran_w_d(v2i64 _1, v2i64 _2) { +- return __builtin_lsx_vssran_w_d(_1, _2); +-} +-// CHECK-LABEL: @vssran_bu_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssran.bu.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v16u8 vssran_bu_h(v8u16 _1, v8u16 _2) { +- return __builtin_lsx_vssran_bu_h(_1, _2); +-} +-// CHECK-LABEL: @vssran_hu_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssran.hu.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v8u16 vssran_hu_w(v4u32 _1, v4u32 _2) { +- return __builtin_lsx_vssran_hu_w(_1, _2); +-} +-// CHECK-LABEL: @vssran_wu_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssran.wu.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v4u32 vssran_wu_d(v2u64 _1, v2u64 _2) { +- return __builtin_lsx_vssran_wu_d(_1, _2); +-} +-// CHECK-LABEL: @vsrarn_b_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrarn.b.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v16i8 vsrarn_b_h(v8i16 _1, v8i16 _2) { +- return __builtin_lsx_vsrarn_b_h(_1, _2); +-} +-// CHECK-LABEL: @vsrarn_h_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrarn.h.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v8i16 vsrarn_h_w(v4i32 _1, v4i32 _2) { +- return __builtin_lsx_vsrarn_h_w(_1, _2); +-} +-// CHECK-LABEL: @vsrarn_w_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrarn.w.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v4i32 vsrarn_w_d(v2i64 _1, v2i64 _2) { +- return __builtin_lsx_vsrarn_w_d(_1, _2); +-} +-// CHECK-LABEL: @vssrarn_b_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrarn.b.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v16i8 vssrarn_b_h(v8i16 _1, v8i16 _2) { +- return __builtin_lsx_vssrarn_b_h(_1, _2); +-} +-// CHECK-LABEL: @vssrarn_h_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrarn.h.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v8i16 vssrarn_h_w(v4i32 _1, v4i32 _2) { +- return __builtin_lsx_vssrarn_h_w(_1, _2); +-} +-// CHECK-LABEL: @vssrarn_w_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrarn.w.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v4i32 vssrarn_w_d(v2i64 _1, v2i64 _2) { +- return __builtin_lsx_vssrarn_w_d(_1, _2); +-} +-// CHECK-LABEL: @vssrarn_bu_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrarn.bu.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v16u8 vssrarn_bu_h(v8u16 _1, v8u16 _2) { +- return __builtin_lsx_vssrarn_bu_h(_1, _2); +-} +-// CHECK-LABEL: @vssrarn_hu_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrarn.hu.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v8u16 vssrarn_hu_w(v4u32 _1, v4u32 _2) { +- return __builtin_lsx_vssrarn_hu_w(_1, _2); +-} +-// CHECK-LABEL: @vssrarn_wu_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrarn.wu.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v4u32 vssrarn_wu_d(v2u64 _1, v2u64 _2) { +- return __builtin_lsx_vssrarn_wu_d(_1, _2); +-} +-// CHECK-LABEL: @vsrln_b_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrln.b.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v16i8 vsrln_b_h(v8i16 _1, v8i16 _2) { +- return __builtin_lsx_vsrln_b_h(_1, _2); +-} +-// CHECK-LABEL: @vsrln_h_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrln.h.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v8i16 vsrln_h_w(v4i32 _1, v4i32 _2) { +- return __builtin_lsx_vsrln_h_w(_1, _2); +-} +-// CHECK-LABEL: @vsrln_w_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrln.w.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v4i32 vsrln_w_d(v2i64 _1, v2i64 _2) { +- return __builtin_lsx_vsrln_w_d(_1, _2); +-} +-// CHECK-LABEL: @vssrln_bu_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrln.bu.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v16u8 vssrln_bu_h(v8u16 _1, v8u16 _2) { +- return __builtin_lsx_vssrln_bu_h(_1, _2); +-} +-// CHECK-LABEL: @vssrln_hu_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrln.hu.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v8u16 vssrln_hu_w(v4u32 _1, v4u32 _2) { +- return __builtin_lsx_vssrln_hu_w(_1, _2); +-} +-// CHECK-LABEL: @vssrln_wu_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrln.wu.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v4u32 vssrln_wu_d(v2u64 _1, v2u64 _2) { +- return __builtin_lsx_vssrln_wu_d(_1, _2); +-} +-// CHECK-LABEL: @vsrlrn_b_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrlrn.b.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v16i8 vsrlrn_b_h(v8i16 _1, v8i16 _2) { +- return __builtin_lsx_vsrlrn_b_h(_1, _2); +-} +-// CHECK-LABEL: @vsrlrn_h_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrlrn.h.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v8i16 vsrlrn_h_w(v4i32 _1, v4i32 _2) { +- return __builtin_lsx_vsrlrn_h_w(_1, _2); +-} +-// CHECK-LABEL: @vsrlrn_w_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrlrn.w.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v4i32 vsrlrn_w_d(v2i64 _1, v2i64 _2) { +- return __builtin_lsx_vsrlrn_w_d(_1, _2); +-} +-// CHECK-LABEL: @vssrlrn_bu_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrlrn.bu.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v16u8 vssrlrn_bu_h(v8u16 _1, v8u16 _2) { +- return __builtin_lsx_vssrlrn_bu_h(_1, _2); +-} +-// CHECK-LABEL: @vssrlrn_hu_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrlrn.hu.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v8u16 vssrlrn_hu_w(v4u32 _1, v4u32 _2) { +- return __builtin_lsx_vssrlrn_hu_w(_1, _2); +-} +-// CHECK-LABEL: @vssrlrn_wu_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrlrn.wu.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v4u32 vssrlrn_wu_d(v2u64 _1, v2u64 _2) { +- return __builtin_lsx_vssrlrn_wu_d(_1, _2); +-} +-// CHECK-LABEL: @vfrstpi_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vfrstpi.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], i32 1) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v16i8 vfrstpi_b(v16i8 _1, v16i8 _2) { +- return __builtin_lsx_vfrstpi_b(_1, _2, 1); +-} +-// CHECK-LABEL: @vfrstpi_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vfrstpi.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]], i32 1) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v8i16 vfrstpi_h(v8i16 _1, v8i16 _2) { +- return __builtin_lsx_vfrstpi_h(_1, _2, 1); +-} +-// CHECK-LABEL: @vfrstp_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP3:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vfrstp.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]]) +-// CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP3]] to i128 +-// CHECK-NEXT: ret i128 [[TMP4]] +-// +-v16i8 vfrstp_b(v16i8 _1, v16i8 _2, v16i8 _3) { +- return __builtin_lsx_vfrstp_b(_1, _2, _3); +-} +-// CHECK-LABEL: @vfrstp_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vfrstp.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]], <8 x i16> [[TMP2]]) +-// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to i128 +-// CHECK-NEXT: ret i128 [[TMP4]] +-// +-v8i16 vfrstp_h(v8i16 _1, v8i16 _2, v8i16 _3) { +- return __builtin_lsx_vfrstp_h(_1, _2, _3); +-} +-// CHECK-LABEL: @vshuf4i_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vshuf4i.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], i32 1) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v2i64 vshuf4i_d(v2i64 _1, v2i64 _2) { +- return __builtin_lsx_vshuf4i_d(_1, _2, 1); +-} +-// CHECK-LABEL: @vbsrl_v( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vbsrl.v(<16 x i8> [[TMP0]], i32 1) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v16i8 vbsrl_v(v16i8 _1) { return __builtin_lsx_vbsrl_v(_1, 1); } +-// CHECK-LABEL: @vbsll_v( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vbsll.v(<16 x i8> [[TMP0]], i32 1) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v16i8 vbsll_v(v16i8 _1) { return __builtin_lsx_vbsll_v(_1, 1); } +-// CHECK-LABEL: @vextrins_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vextrins.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], i32 1) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v16i8 vextrins_b(v16i8 _1, v16i8 _2) { +- return __builtin_lsx_vextrins_b(_1, _2, 1); +-} +-// CHECK-LABEL: @vextrins_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vextrins.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]], i32 1) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v8i16 vextrins_h(v8i16 _1, v8i16 _2) { +- return __builtin_lsx_vextrins_h(_1, _2, 1); +-} +-// CHECK-LABEL: @vextrins_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vextrins.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]], i32 1) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v4i32 vextrins_w(v4i32 _1, v4i32 _2) { +- return __builtin_lsx_vextrins_w(_1, _2, 1); +-} +-// CHECK-LABEL: @vextrins_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vextrins.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], i32 1) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v2i64 vextrins_d(v2i64 _1, v2i64 _2) { +- return __builtin_lsx_vextrins_d(_1, _2, 1); +-} +-// CHECK-LABEL: @vmskltz_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmskltz.b(<16 x i8> [[TMP0]]) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v16i8 vmskltz_b(v16i8 _1) { return __builtin_lsx_vmskltz_b(_1); } +-// CHECK-LABEL: @vmskltz_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmskltz.h(<8 x i16> [[TMP0]]) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v8i16 vmskltz_h(v8i16 _1) { return __builtin_lsx_vmskltz_h(_1); } +-// CHECK-LABEL: @vmskltz_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmskltz.w(<4 x i32> [[TMP0]]) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v4i32 vmskltz_w(v4i32 _1) { return __builtin_lsx_vmskltz_w(_1); } +-// CHECK-LABEL: @vmskltz_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmskltz.d(<2 x i64> [[TMP0]]) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v2i64 vmskltz_d(v2i64 _1) { return __builtin_lsx_vmskltz_d(_1); } +-// CHECK-LABEL: @vsigncov_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsigncov.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v16i8 vsigncov_b(v16i8 _1, v16i8 _2) { +- return __builtin_lsx_vsigncov_b(_1, _2); +-} +-// CHECK-LABEL: @vsigncov_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsigncov.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v8i16 vsigncov_h(v8i16 _1, v8i16 _2) { +- return __builtin_lsx_vsigncov_h(_1, _2); +-} +-// CHECK-LABEL: @vsigncov_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsigncov.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v4i32 vsigncov_w(v4i32 _1, v4i32 _2) { +- return __builtin_lsx_vsigncov_w(_1, _2); +-} +-// CHECK-LABEL: @vsigncov_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsigncov.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v2i64 vsigncov_d(v2i64 _1, v2i64 _2) { +- return __builtin_lsx_vsigncov_d(_1, _2); +-} +-// CHECK-LABEL: @vfmadd_s( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <4 x float> +-// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfmadd.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]], <4 x float> [[TMP2]]) +-// CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x float> [[TMP3]] to i128 +-// CHECK-NEXT: ret i128 [[TMP4]] +-// +-v4f32 vfmadd_s(v4f32 _1, v4f32 _2, v4f32 _3) { +- return __builtin_lsx_vfmadd_s(_1, _2, _3); +-} +-// CHECK-LABEL: @vfmadd_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <2 x double> +-// CHECK-NEXT: [[TMP3:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfmadd.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]], <2 x double> [[TMP2]]) +-// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x double> [[TMP3]] to i128 +-// CHECK-NEXT: ret i128 [[TMP4]] +-// +-v2f64 vfmadd_d(v2f64 _1, v2f64 _2, v2f64 _3) { +- return __builtin_lsx_vfmadd_d(_1, _2, _3); +-} +-// CHECK-LABEL: @vfmsub_s( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <4 x float> +-// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfmsub.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]], <4 x float> [[TMP2]]) +-// CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x float> [[TMP3]] to i128 +-// CHECK-NEXT: ret i128 [[TMP4]] +-// +-v4f32 vfmsub_s(v4f32 _1, v4f32 _2, v4f32 _3) { +- return __builtin_lsx_vfmsub_s(_1, _2, _3); +-} +-// CHECK-LABEL: @vfmsub_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <2 x double> +-// CHECK-NEXT: [[TMP3:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfmsub.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]], <2 x double> [[TMP2]]) +-// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x double> [[TMP3]] to i128 +-// CHECK-NEXT: ret i128 [[TMP4]] +-// +-v2f64 vfmsub_d(v2f64 _1, v2f64 _2, v2f64 _3) { +- return __builtin_lsx_vfmsub_d(_1, _2, _3); +-} +-// CHECK-LABEL: @vfnmadd_s( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <4 x float> +-// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfnmadd.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]], <4 x float> [[TMP2]]) +-// CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x float> [[TMP3]] to i128 +-// CHECK-NEXT: ret i128 [[TMP4]] +-// +-v4f32 vfnmadd_s(v4f32 _1, v4f32 _2, v4f32 _3) { +- return __builtin_lsx_vfnmadd_s(_1, _2, _3); +-} +-// CHECK-LABEL: @vfnmadd_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <2 x double> +-// CHECK-NEXT: [[TMP3:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfnmadd.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]], <2 x double> [[TMP2]]) +-// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x double> [[TMP3]] to i128 +-// CHECK-NEXT: ret i128 [[TMP4]] +-// +-v2f64 vfnmadd_d(v2f64 _1, v2f64 _2, v2f64 _3) { +- return __builtin_lsx_vfnmadd_d(_1, _2, _3); +-} +-// CHECK-LABEL: @vfnmsub_s( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <4 x float> +-// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfnmsub.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]], <4 x float> [[TMP2]]) +-// CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x float> [[TMP3]] to i128 +-// CHECK-NEXT: ret i128 [[TMP4]] +-// +-v4f32 vfnmsub_s(v4f32 _1, v4f32 _2, v4f32 _3) { +- return __builtin_lsx_vfnmsub_s(_1, _2, _3); +-} +-// CHECK-LABEL: @vfnmsub_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <2 x double> +-// CHECK-NEXT: [[TMP3:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfnmsub.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]], <2 x double> [[TMP2]]) +-// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x double> [[TMP3]] to i128 +-// CHECK-NEXT: ret i128 [[TMP4]] +-// +-v2f64 vfnmsub_d(v2f64 _1, v2f64 _2, v2f64 _3) { +- return __builtin_lsx_vfnmsub_d(_1, _2, _3); +-} +-// CHECK-LABEL: @vftintrne_w_s( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftintrne.w.s(<4 x float> [[TMP0]]) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v4i32 vftintrne_w_s(v4f32 _1) { return __builtin_lsx_vftintrne_w_s(_1); } +-// CHECK-LABEL: @vftintrne_l_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrne.l.d(<2 x double> [[TMP0]]) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v2i64 vftintrne_l_d(v2f64 _1) { return __builtin_lsx_vftintrne_l_d(_1); } +-// CHECK-LABEL: @vftintrp_w_s( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftintrp.w.s(<4 x float> [[TMP0]]) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v4i32 vftintrp_w_s(v4f32 _1) { return __builtin_lsx_vftintrp_w_s(_1); } +-// CHECK-LABEL: @vftintrp_l_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrp.l.d(<2 x double> [[TMP0]]) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v2i64 vftintrp_l_d(v2f64 _1) { return __builtin_lsx_vftintrp_l_d(_1); } +-// CHECK-LABEL: @vftintrm_w_s( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftintrm.w.s(<4 x float> [[TMP0]]) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v4i32 vftintrm_w_s(v4f32 _1) { return __builtin_lsx_vftintrm_w_s(_1); } +-// CHECK-LABEL: @vftintrm_l_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrm.l.d(<2 x double> [[TMP0]]) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v2i64 vftintrm_l_d(v2f64 _1) { return __builtin_lsx_vftintrm_l_d(_1); } +-// CHECK-LABEL: @vftint_w_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftint.w.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v4i32 vftint_w_d(v2f64 _1, v2f64 _2) { +- return __builtin_lsx_vftint_w_d(_1, _2); +-} +-// CHECK-LABEL: @vffint_s_l( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vffint.s.l(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x float> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v4f32 vffint_s_l(v2i64 _1, v2i64 _2) { +- return __builtin_lsx_vffint_s_l(_1, _2); +-} +-// CHECK-LABEL: @vftintrz_w_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftintrz.w.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v4i32 vftintrz_w_d(v2f64 _1, v2f64 _2) { +- return __builtin_lsx_vftintrz_w_d(_1, _2); +-} +-// CHECK-LABEL: @vftintrp_w_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftintrp.w.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v4i32 vftintrp_w_d(v2f64 _1, v2f64 _2) { +- return __builtin_lsx_vftintrp_w_d(_1, _2); +-} +-// CHECK-LABEL: @vftintrm_w_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftintrm.w.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v4i32 vftintrm_w_d(v2f64 _1, v2f64 _2) { +- return __builtin_lsx_vftintrm_w_d(_1, _2); +-} +-// CHECK-LABEL: @vftintrne_w_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftintrne.w.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v4i32 vftintrne_w_d(v2f64 _1, v2f64 _2) { +- return __builtin_lsx_vftintrne_w_d(_1, _2); +-} +-// CHECK-LABEL: @vftintl_l_s( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintl.l.s(<4 x float> [[TMP0]]) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v2i64 vftintl_l_s(v4f32 _1) { return __builtin_lsx_vftintl_l_s(_1); } +-// CHECK-LABEL: @vftinth_l_s( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftinth.l.s(<4 x float> [[TMP0]]) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v2i64 vftinth_l_s(v4f32 _1) { return __builtin_lsx_vftinth_l_s(_1); } +-// CHECK-LABEL: @vffinth_d_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vffinth.d.w(<4 x i32> [[TMP0]]) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v2f64 vffinth_d_w(v4i32 _1) { return __builtin_lsx_vffinth_d_w(_1); } +-// CHECK-LABEL: @vffintl_d_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vffintl.d.w(<4 x i32> [[TMP0]]) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v2f64 vffintl_d_w(v4i32 _1) { return __builtin_lsx_vffintl_d_w(_1); } +-// CHECK-LABEL: @vftintrzl_l_s( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrzl.l.s(<4 x float> [[TMP0]]) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v2i64 vftintrzl_l_s(v4f32 _1) { return __builtin_lsx_vftintrzl_l_s(_1); } +-// CHECK-LABEL: @vftintrzh_l_s( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrzh.l.s(<4 x float> [[TMP0]]) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v2i64 vftintrzh_l_s(v4f32 _1) { return __builtin_lsx_vftintrzh_l_s(_1); } +-// CHECK-LABEL: @vftintrpl_l_s( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrpl.l.s(<4 x float> [[TMP0]]) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v2i64 vftintrpl_l_s(v4f32 _1) { return __builtin_lsx_vftintrpl_l_s(_1); } +-// CHECK-LABEL: @vftintrph_l_s( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrph.l.s(<4 x float> [[TMP0]]) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v2i64 vftintrph_l_s(v4f32 _1) { return __builtin_lsx_vftintrph_l_s(_1); } +-// CHECK-LABEL: @vftintrml_l_s( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrml.l.s(<4 x float> [[TMP0]]) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v2i64 vftintrml_l_s(v4f32 _1) { return __builtin_lsx_vftintrml_l_s(_1); } +-// CHECK-LABEL: @vftintrmh_l_s( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrmh.l.s(<4 x float> [[TMP0]]) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v2i64 vftintrmh_l_s(v4f32 _1) { return __builtin_lsx_vftintrmh_l_s(_1); } +-// CHECK-LABEL: @vftintrnel_l_s( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrnel.l.s(<4 x float> [[TMP0]]) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v2i64 vftintrnel_l_s(v4f32 _1) { +- return __builtin_lsx_vftintrnel_l_s(_1); +-} +-// CHECK-LABEL: @vftintrneh_l_s( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrneh.l.s(<4 x float> [[TMP0]]) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v2i64 vftintrneh_l_s(v4f32 _1) { +- return __builtin_lsx_vftintrneh_l_s(_1); +-} +-// CHECK-LABEL: @vfrintrne_s( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfrintrne.s(<4 x float> [[TMP0]]) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v4i32 vfrintrne_s(v4f32 _1) { return __builtin_lsx_vfrintrne_s(_1); } +-// CHECK-LABEL: @vfrintrne_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfrintrne.d(<2 x double> [[TMP0]]) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v2i64 vfrintrne_d(v2f64 _1) { return __builtin_lsx_vfrintrne_d(_1); } +-// CHECK-LABEL: @vfrintrz_s( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfrintrz.s(<4 x float> [[TMP0]]) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v4i32 vfrintrz_s(v4f32 _1) { return __builtin_lsx_vfrintrz_s(_1); } +-// CHECK-LABEL: @vfrintrz_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfrintrz.d(<2 x double> [[TMP0]]) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v2i64 vfrintrz_d(v2f64 _1) { return __builtin_lsx_vfrintrz_d(_1); } +-// CHECK-LABEL: @vfrintrp_s( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfrintrp.s(<4 x float> [[TMP0]]) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v4i32 vfrintrp_s(v4f32 _1) { return __builtin_lsx_vfrintrp_s(_1); } +-// CHECK-LABEL: @vfrintrp_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfrintrp.d(<2 x double> [[TMP0]]) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v2i64 vfrintrp_d(v2f64 _1) { return __builtin_lsx_vfrintrp_d(_1); } +-// CHECK-LABEL: @vfrintrm_s( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfrintrm.s(<4 x float> [[TMP0]]) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v4i32 vfrintrm_s(v4f32 _1) { return __builtin_lsx_vfrintrm_s(_1); } +-// CHECK-LABEL: @vfrintrm_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfrintrm.d(<2 x double> [[TMP0]]) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v2i64 vfrintrm_d(v2f64 _1) { return __builtin_lsx_vfrintrm_d(_1); } +-// CHECK-LABEL: @vstelm_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: tail call void @llvm.loongarch.lsx.vstelm.b(<16 x i8> [[TMP0]], ptr [[_2:%.*]], i32 1, i32 1) +-// CHECK-NEXT: ret void +-// +-void vstelm_b(v16i8 _1, void *_2) { +- return __builtin_lsx_vstelm_b(_1, _2, 1, 1); +-} +-// CHECK-LABEL: @vstelm_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: tail call void @llvm.loongarch.lsx.vstelm.h(<8 x i16> [[TMP0]], ptr [[_2:%.*]], i32 2, i32 1) +-// CHECK-NEXT: ret void +-// +-void vstelm_h(v8i16 _1, void *_2) { +- return __builtin_lsx_vstelm_h(_1, _2, 2, 1); +-} +-// CHECK-LABEL: @vstelm_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: tail call void @llvm.loongarch.lsx.vstelm.w(<4 x i32> [[TMP0]], ptr [[_2:%.*]], i32 4, i32 1) +-// CHECK-NEXT: ret void +-// +-void vstelm_w(v4i32 _1, void *_2) { +- return __builtin_lsx_vstelm_w(_1, _2, 4, 1); +-} +-// CHECK-LABEL: @vstelm_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: tail call void @llvm.loongarch.lsx.vstelm.d(<2 x i64> [[TMP0]], ptr [[_2:%.*]], i32 8, i32 1) +-// CHECK-NEXT: ret void +-// +-void vstelm_d(v2i64 _1, void *_2) { +- return __builtin_lsx_vstelm_d(_1, _2, 8, 1); +-} +-// CHECK-LABEL: @vaddwev_d_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwev.d.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v2i64 vaddwev_d_w(v4i32 _1, v4i32 _2) { +- return __builtin_lsx_vaddwev_d_w(_1, _2); +-} +-// CHECK-LABEL: @vaddwev_w_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vaddwev.w.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v4i32 vaddwev_w_h(v8i16 _1, v8i16 _2) { +- return __builtin_lsx_vaddwev_w_h(_1, _2); +-} +-// CHECK-LABEL: @vaddwev_h_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vaddwev.h.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v8i16 vaddwev_h_b(v16i8 _1, v16i8 _2) { +- return __builtin_lsx_vaddwev_h_b(_1, _2); +-} +-// CHECK-LABEL: @vaddwod_d_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwod.d.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v2i64 vaddwod_d_w(v4i32 _1, v4i32 _2) { +- return __builtin_lsx_vaddwod_d_w(_1, _2); +-} +-// CHECK-LABEL: @vaddwod_w_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vaddwod.w.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v4i32 vaddwod_w_h(v8i16 _1, v8i16 _2) { +- return __builtin_lsx_vaddwod_w_h(_1, _2); +-} +-// CHECK-LABEL: @vaddwod_h_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vaddwod.h.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v8i16 vaddwod_h_b(v16i8 _1, v16i8 _2) { +- return __builtin_lsx_vaddwod_h_b(_1, _2); +-} +-// CHECK-LABEL: @vaddwev_d_wu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwev.d.wu(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v2i64 vaddwev_d_wu(v4u32 _1, v4u32 _2) { +- return __builtin_lsx_vaddwev_d_wu(_1, _2); +-} +-// CHECK-LABEL: @vaddwev_w_hu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vaddwev.w.hu(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v4i32 vaddwev_w_hu(v8u16 _1, v8u16 _2) { +- return __builtin_lsx_vaddwev_w_hu(_1, _2); +-} +-// CHECK-LABEL: @vaddwev_h_bu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vaddwev.h.bu(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v8i16 vaddwev_h_bu(v16u8 _1, v16u8 _2) { +- return __builtin_lsx_vaddwev_h_bu(_1, _2); +-} +-// CHECK-LABEL: @vaddwod_d_wu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwod.d.wu(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v2i64 vaddwod_d_wu(v4u32 _1, v4u32 _2) { +- return __builtin_lsx_vaddwod_d_wu(_1, _2); +-} +-// CHECK-LABEL: @vaddwod_w_hu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vaddwod.w.hu(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v4i32 vaddwod_w_hu(v8u16 _1, v8u16 _2) { +- return __builtin_lsx_vaddwod_w_hu(_1, _2); +-} +-// CHECK-LABEL: @vaddwod_h_bu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vaddwod.h.bu(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v8i16 vaddwod_h_bu(v16u8 _1, v16u8 _2) { +- return __builtin_lsx_vaddwod_h_bu(_1, _2); +-} +-// CHECK-LABEL: @vaddwev_d_wu_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwev.d.wu.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v2i64 vaddwev_d_wu_w(v4u32 _1, v4i32 _2) { +- return __builtin_lsx_vaddwev_d_wu_w(_1, _2); +-} +-// CHECK-LABEL: @vaddwev_w_hu_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vaddwev.w.hu.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v4i32 vaddwev_w_hu_h(v8u16 _1, v8i16 _2) { +- return __builtin_lsx_vaddwev_w_hu_h(_1, _2); +-} +-// CHECK-LABEL: @vaddwev_h_bu_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vaddwev.h.bu.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v8i16 vaddwev_h_bu_b(v16u8 _1, v16i8 _2) { +- return __builtin_lsx_vaddwev_h_bu_b(_1, _2); +-} +-// CHECK-LABEL: @vaddwod_d_wu_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwod.d.wu.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v2i64 vaddwod_d_wu_w(v4u32 _1, v4i32 _2) { +- return __builtin_lsx_vaddwod_d_wu_w(_1, _2); +-} +-// CHECK-LABEL: @vaddwod_w_hu_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vaddwod.w.hu.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v4i32 vaddwod_w_hu_h(v8u16 _1, v8i16 _2) { +- return __builtin_lsx_vaddwod_w_hu_h(_1, _2); +-} +-// CHECK-LABEL: @vaddwod_h_bu_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vaddwod.h.bu.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v8i16 vaddwod_h_bu_b(v16u8 _1, v16i8 _2) { +- return __builtin_lsx_vaddwod_h_bu_b(_1, _2); +-} +-// CHECK-LABEL: @vsubwev_d_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsubwev.d.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v2i64 vsubwev_d_w(v4i32 _1, v4i32 _2) { +- return __builtin_lsx_vsubwev_d_w(_1, _2); +-} +-// CHECK-LABEL: @vsubwev_w_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsubwev.w.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v4i32 vsubwev_w_h(v8i16 _1, v8i16 _2) { +- return __builtin_lsx_vsubwev_w_h(_1, _2); +-} +-// CHECK-LABEL: @vsubwev_h_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsubwev.h.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v8i16 vsubwev_h_b(v16i8 _1, v16i8 _2) { +- return __builtin_lsx_vsubwev_h_b(_1, _2); +-} +-// CHECK-LABEL: @vsubwod_d_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsubwod.d.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v2i64 vsubwod_d_w(v4i32 _1, v4i32 _2) { +- return __builtin_lsx_vsubwod_d_w(_1, _2); +-} +-// CHECK-LABEL: @vsubwod_w_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsubwod.w.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v4i32 vsubwod_w_h(v8i16 _1, v8i16 _2) { +- return __builtin_lsx_vsubwod_w_h(_1, _2); +-} +-// CHECK-LABEL: @vsubwod_h_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsubwod.h.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v8i16 vsubwod_h_b(v16i8 _1, v16i8 _2) { +- return __builtin_lsx_vsubwod_h_b(_1, _2); +-} +-// CHECK-LABEL: @vsubwev_d_wu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsubwev.d.wu(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v2i64 vsubwev_d_wu(v4u32 _1, v4u32 _2) { +- return __builtin_lsx_vsubwev_d_wu(_1, _2); +-} +-// CHECK-LABEL: @vsubwev_w_hu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsubwev.w.hu(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v4i32 vsubwev_w_hu(v8u16 _1, v8u16 _2) { +- return __builtin_lsx_vsubwev_w_hu(_1, _2); +-} +-// CHECK-LABEL: @vsubwev_h_bu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsubwev.h.bu(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v8i16 vsubwev_h_bu(v16u8 _1, v16u8 _2) { +- return __builtin_lsx_vsubwev_h_bu(_1, _2); +-} +-// CHECK-LABEL: @vsubwod_d_wu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsubwod.d.wu(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v2i64 vsubwod_d_wu(v4u32 _1, v4u32 _2) { +- return __builtin_lsx_vsubwod_d_wu(_1, _2); +-} +-// CHECK-LABEL: @vsubwod_w_hu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsubwod.w.hu(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v4i32 vsubwod_w_hu(v8u16 _1, v8u16 _2) { +- return __builtin_lsx_vsubwod_w_hu(_1, _2); +-} +-// CHECK-LABEL: @vsubwod_h_bu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsubwod.h.bu(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v8i16 vsubwod_h_bu(v16u8 _1, v16u8 _2) { +- return __builtin_lsx_vsubwod_h_bu(_1, _2); +-} +-// CHECK-LABEL: @vaddwev_q_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwev.q.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v2i64 vaddwev_q_d(v2i64 _1, v2i64 _2) { +- return __builtin_lsx_vaddwev_q_d(_1, _2); +-} +-// CHECK-LABEL: @vaddwod_q_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwod.q.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v2i64 vaddwod_q_d(v2i64 _1, v2i64 _2) { +- return __builtin_lsx_vaddwod_q_d(_1, _2); +-} +-// CHECK-LABEL: @vaddwev_q_du( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwev.q.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v2i64 vaddwev_q_du(v2u64 _1, v2u64 _2) { +- return __builtin_lsx_vaddwev_q_du(_1, _2); +-} +-// CHECK-LABEL: @vaddwod_q_du( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwod.q.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v2i64 vaddwod_q_du(v2u64 _1, v2u64 _2) { +- return __builtin_lsx_vaddwod_q_du(_1, _2); +-} +-// CHECK-LABEL: @vsubwev_q_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsubwev.q.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v2i64 vsubwev_q_d(v2i64 _1, v2i64 _2) { +- return __builtin_lsx_vsubwev_q_d(_1, _2); +-} +-// CHECK-LABEL: @vsubwod_q_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsubwod.q.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v2i64 vsubwod_q_d(v2i64 _1, v2i64 _2) { +- return __builtin_lsx_vsubwod_q_d(_1, _2); +-} +-// CHECK-LABEL: @vsubwev_q_du( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsubwev.q.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v2i64 vsubwev_q_du(v2u64 _1, v2u64 _2) { +- return __builtin_lsx_vsubwev_q_du(_1, _2); +-} +-// CHECK-LABEL: @vsubwod_q_du( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsubwod.q.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v2i64 vsubwod_q_du(v2u64 _1, v2u64 _2) { +- return __builtin_lsx_vsubwod_q_du(_1, _2); +-} +-// CHECK-LABEL: @vaddwev_q_du_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwev.q.du.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v2i64 vaddwev_q_du_d(v2u64 _1, v2i64 _2) { +- return __builtin_lsx_vaddwev_q_du_d(_1, _2); +-} +-// CHECK-LABEL: @vaddwod_q_du_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwod.q.du.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v2i64 vaddwod_q_du_d(v2u64 _1, v2i64 _2) { +- return __builtin_lsx_vaddwod_q_du_d(_1, _2); +-} +-// CHECK-LABEL: @vmulwev_d_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwev.d.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v2i64 vmulwev_d_w(v4i32 _1, v4i32 _2) { +- return __builtin_lsx_vmulwev_d_w(_1, _2); +-} +-// CHECK-LABEL: @vmulwev_w_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmulwev.w.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v4i32 vmulwev_w_h(v8i16 _1, v8i16 _2) { +- return __builtin_lsx_vmulwev_w_h(_1, _2); +-} +-// CHECK-LABEL: @vmulwev_h_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmulwev.h.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v8i16 vmulwev_h_b(v16i8 _1, v16i8 _2) { +- return __builtin_lsx_vmulwev_h_b(_1, _2); +-} +-// CHECK-LABEL: @vmulwod_d_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwod.d.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v2i64 vmulwod_d_w(v4i32 _1, v4i32 _2) { +- return __builtin_lsx_vmulwod_d_w(_1, _2); +-} +-// CHECK-LABEL: @vmulwod_w_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmulwod.w.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v4i32 vmulwod_w_h(v8i16 _1, v8i16 _2) { +- return __builtin_lsx_vmulwod_w_h(_1, _2); +-} +-// CHECK-LABEL: @vmulwod_h_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmulwod.h.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v8i16 vmulwod_h_b(v16i8 _1, v16i8 _2) { +- return __builtin_lsx_vmulwod_h_b(_1, _2); +-} +-// CHECK-LABEL: @vmulwev_d_wu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwev.d.wu(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v2i64 vmulwev_d_wu(v4u32 _1, v4u32 _2) { +- return __builtin_lsx_vmulwev_d_wu(_1, _2); +-} +-// CHECK-LABEL: @vmulwev_w_hu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmulwev.w.hu(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v4i32 vmulwev_w_hu(v8u16 _1, v8u16 _2) { +- return __builtin_lsx_vmulwev_w_hu(_1, _2); +-} +-// CHECK-LABEL: @vmulwev_h_bu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmulwev.h.bu(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v8i16 vmulwev_h_bu(v16u8 _1, v16u8 _2) { +- return __builtin_lsx_vmulwev_h_bu(_1, _2); +-} +-// CHECK-LABEL: @vmulwod_d_wu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwod.d.wu(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v2i64 vmulwod_d_wu(v4u32 _1, v4u32 _2) { +- return __builtin_lsx_vmulwod_d_wu(_1, _2); +-} +-// CHECK-LABEL: @vmulwod_w_hu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmulwod.w.hu(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v4i32 vmulwod_w_hu(v8u16 _1, v8u16 _2) { +- return __builtin_lsx_vmulwod_w_hu(_1, _2); +-} +-// CHECK-LABEL: @vmulwod_h_bu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmulwod.h.bu(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v8i16 vmulwod_h_bu(v16u8 _1, v16u8 _2) { +- return __builtin_lsx_vmulwod_h_bu(_1, _2); +-} +-// CHECK-LABEL: @vmulwev_d_wu_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwev.d.wu.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v2i64 vmulwev_d_wu_w(v4u32 _1, v4i32 _2) { +- return __builtin_lsx_vmulwev_d_wu_w(_1, _2); +-} +-// CHECK-LABEL: @vmulwev_w_hu_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmulwev.w.hu.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v4i32 vmulwev_w_hu_h(v8u16 _1, v8i16 _2) { +- return __builtin_lsx_vmulwev_w_hu_h(_1, _2); +-} +-// CHECK-LABEL: @vmulwev_h_bu_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmulwev.h.bu.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v8i16 vmulwev_h_bu_b(v16u8 _1, v16i8 _2) { +- return __builtin_lsx_vmulwev_h_bu_b(_1, _2); +-} +-// CHECK-LABEL: @vmulwod_d_wu_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwod.d.wu.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v2i64 vmulwod_d_wu_w(v4u32 _1, v4i32 _2) { +- return __builtin_lsx_vmulwod_d_wu_w(_1, _2); +-} +-// CHECK-LABEL: @vmulwod_w_hu_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmulwod.w.hu.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v4i32 vmulwod_w_hu_h(v8u16 _1, v8i16 _2) { +- return __builtin_lsx_vmulwod_w_hu_h(_1, _2); +-} +-// CHECK-LABEL: @vmulwod_h_bu_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmulwod.h.bu.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v8i16 vmulwod_h_bu_b(v16u8 _1, v16i8 _2) { +- return __builtin_lsx_vmulwod_h_bu_b(_1, _2); +-} +-// CHECK-LABEL: @vmulwev_q_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwev.q.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v2i64 vmulwev_q_d(v2i64 _1, v2i64 _2) { +- return __builtin_lsx_vmulwev_q_d(_1, _2); +-} +-// CHECK-LABEL: @vmulwod_q_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwod.q.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v2i64 vmulwod_q_d(v2i64 _1, v2i64 _2) { +- return __builtin_lsx_vmulwod_q_d(_1, _2); +-} +-// CHECK-LABEL: @vmulwev_q_du( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwev.q.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v2i64 vmulwev_q_du(v2u64 _1, v2u64 _2) { +- return __builtin_lsx_vmulwev_q_du(_1, _2); +-} +-// CHECK-LABEL: @vmulwod_q_du( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwod.q.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v2i64 vmulwod_q_du(v2u64 _1, v2u64 _2) { +- return __builtin_lsx_vmulwod_q_du(_1, _2); +-} +-// CHECK-LABEL: @vmulwev_q_du_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwev.q.du.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v2i64 vmulwev_q_du_d(v2u64 _1, v2i64 _2) { +- return __builtin_lsx_vmulwev_q_du_d(_1, _2); +-} +-// CHECK-LABEL: @vmulwod_q_du_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwod.q.du.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v2i64 vmulwod_q_du_d(v2u64 _1, v2i64 _2) { +- return __builtin_lsx_vmulwod_q_du_d(_1, _2); +-} +-// CHECK-LABEL: @vhaddw_q_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vhaddw.q.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v2i64 vhaddw_q_d(v2i64 _1, v2i64 _2) { +- return __builtin_lsx_vhaddw_q_d(_1, _2); +-} +-// CHECK-LABEL: @vhaddw_qu_du( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vhaddw.qu.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v2u64 vhaddw_qu_du(v2u64 _1, v2u64 _2) { +- return __builtin_lsx_vhaddw_qu_du(_1, _2); +-} +-// CHECK-LABEL: @vhsubw_q_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vhsubw.q.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v2i64 vhsubw_q_d(v2i64 _1, v2i64 _2) { +- return __builtin_lsx_vhsubw_q_d(_1, _2); +-} +-// CHECK-LABEL: @vhsubw_qu_du( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vhsubw.qu.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v2u64 vhsubw_qu_du(v2u64 _1, v2u64 _2) { +- return __builtin_lsx_vhsubw_qu_du(_1, _2); +-} +-// CHECK-LABEL: @vmaddwev_d_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP3:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwev.d.w(<2 x i64> [[TMP0]], <4 x i32> [[TMP1]], <4 x i32> [[TMP2]]) +-// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to i128 +-// CHECK-NEXT: ret i128 [[TMP4]] +-// +-v2i64 vmaddwev_d_w(v2i64 _1, v4i32 _2, v4i32 _3) { +- return __builtin_lsx_vmaddwev_d_w(_1, _2, _3); +-} +-// CHECK-LABEL: @vmaddwev_w_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmaddwev.w.h(<4 x i32> [[TMP0]], <8 x i16> [[TMP1]], <8 x i16> [[TMP2]]) +-// CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to i128 +-// CHECK-NEXT: ret i128 [[TMP4]] +-// +-v4i32 vmaddwev_w_h(v4i32 _1, v8i16 _2, v8i16 _3) { +- return __builtin_lsx_vmaddwev_w_h(_1, _2, _3); +-} +-// CHECK-LABEL: @vmaddwev_h_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmaddwev.h.b(<8 x i16> [[TMP0]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]]) +-// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to i128 +-// CHECK-NEXT: ret i128 [[TMP4]] +-// +-v8i16 vmaddwev_h_b(v8i16 _1, v16i8 _2, v16i8 _3) { +- return __builtin_lsx_vmaddwev_h_b(_1, _2, _3); +-} +-// CHECK-LABEL: @vmaddwev_d_wu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP3:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwev.d.wu(<2 x i64> [[TMP0]], <4 x i32> [[TMP1]], <4 x i32> [[TMP2]]) +-// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to i128 +-// CHECK-NEXT: ret i128 [[TMP4]] +-// +-v2u64 vmaddwev_d_wu(v2u64 _1, v4u32 _2, v4u32 _3) { +- return __builtin_lsx_vmaddwev_d_wu(_1, _2, _3); +-} +-// CHECK-LABEL: @vmaddwev_w_hu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmaddwev.w.hu(<4 x i32> [[TMP0]], <8 x i16> [[TMP1]], <8 x i16> [[TMP2]]) +-// CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to i128 +-// CHECK-NEXT: ret i128 [[TMP4]] +-// +-v4u32 vmaddwev_w_hu(v4u32 _1, v8u16 _2, v8u16 _3) { +- return __builtin_lsx_vmaddwev_w_hu(_1, _2, _3); +-} +-// CHECK-LABEL: @vmaddwev_h_bu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmaddwev.h.bu(<8 x i16> [[TMP0]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]]) +-// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to i128 +-// CHECK-NEXT: ret i128 [[TMP4]] +-// +-v8u16 vmaddwev_h_bu(v8u16 _1, v16u8 _2, v16u8 _3) { +- return __builtin_lsx_vmaddwev_h_bu(_1, _2, _3); +-} +-// CHECK-LABEL: @vmaddwod_d_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP3:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwod.d.w(<2 x i64> [[TMP0]], <4 x i32> [[TMP1]], <4 x i32> [[TMP2]]) +-// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to i128 +-// CHECK-NEXT: ret i128 [[TMP4]] +-// +-v2i64 vmaddwod_d_w(v2i64 _1, v4i32 _2, v4i32 _3) { +- return __builtin_lsx_vmaddwod_d_w(_1, _2, _3); +-} +-// CHECK-LABEL: @vmaddwod_w_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmaddwod.w.h(<4 x i32> [[TMP0]], <8 x i16> [[TMP1]], <8 x i16> [[TMP2]]) +-// CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to i128 +-// CHECK-NEXT: ret i128 [[TMP4]] +-// +-v4i32 vmaddwod_w_h(v4i32 _1, v8i16 _2, v8i16 _3) { +- return __builtin_lsx_vmaddwod_w_h(_1, _2, _3); +-} +-// CHECK-LABEL: @vmaddwod_h_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmaddwod.h.b(<8 x i16> [[TMP0]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]]) +-// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to i128 +-// CHECK-NEXT: ret i128 [[TMP4]] +-// +-v8i16 vmaddwod_h_b(v8i16 _1, v16i8 _2, v16i8 _3) { +- return __builtin_lsx_vmaddwod_h_b(_1, _2, _3); +-} +-// CHECK-LABEL: @vmaddwod_d_wu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP3:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwod.d.wu(<2 x i64> [[TMP0]], <4 x i32> [[TMP1]], <4 x i32> [[TMP2]]) +-// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to i128 +-// CHECK-NEXT: ret i128 [[TMP4]] +-// +-v2u64 vmaddwod_d_wu(v2u64 _1, v4u32 _2, v4u32 _3) { +- return __builtin_lsx_vmaddwod_d_wu(_1, _2, _3); +-} +-// CHECK-LABEL: @vmaddwod_w_hu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmaddwod.w.hu(<4 x i32> [[TMP0]], <8 x i16> [[TMP1]], <8 x i16> [[TMP2]]) +-// CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to i128 +-// CHECK-NEXT: ret i128 [[TMP4]] +-// +-v4u32 vmaddwod_w_hu(v4u32 _1, v8u16 _2, v8u16 _3) { +- return __builtin_lsx_vmaddwod_w_hu(_1, _2, _3); +-} +-// CHECK-LABEL: @vmaddwod_h_bu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmaddwod.h.bu(<8 x i16> [[TMP0]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]]) +-// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to i128 +-// CHECK-NEXT: ret i128 [[TMP4]] +-// +-v8u16 vmaddwod_h_bu(v8u16 _1, v16u8 _2, v16u8 _3) { +- return __builtin_lsx_vmaddwod_h_bu(_1, _2, _3); +-} +-// CHECK-LABEL: @vmaddwev_d_wu_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP3:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwev.d.wu.w(<2 x i64> [[TMP0]], <4 x i32> [[TMP1]], <4 x i32> [[TMP2]]) +-// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to i128 +-// CHECK-NEXT: ret i128 [[TMP4]] +-// +-v2i64 vmaddwev_d_wu_w(v2i64 _1, v4u32 _2, v4i32 _3) { +- return __builtin_lsx_vmaddwev_d_wu_w(_1, _2, _3); +-} +-// CHECK-LABEL: @vmaddwev_w_hu_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmaddwev.w.hu.h(<4 x i32> [[TMP0]], <8 x i16> [[TMP1]], <8 x i16> [[TMP2]]) +-// CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to i128 +-// CHECK-NEXT: ret i128 [[TMP4]] +-// +-v4i32 vmaddwev_w_hu_h(v4i32 _1, v8u16 _2, v8i16 _3) { +- return __builtin_lsx_vmaddwev_w_hu_h(_1, _2, _3); +-} +-// CHECK-LABEL: @vmaddwev_h_bu_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmaddwev.h.bu.b(<8 x i16> [[TMP0]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]]) +-// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to i128 +-// CHECK-NEXT: ret i128 [[TMP4]] +-// +-v8i16 vmaddwev_h_bu_b(v8i16 _1, v16u8 _2, v16i8 _3) { +- return __builtin_lsx_vmaddwev_h_bu_b(_1, _2, _3); +-} +-// CHECK-LABEL: @vmaddwod_d_wu_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP3:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwod.d.wu.w(<2 x i64> [[TMP0]], <4 x i32> [[TMP1]], <4 x i32> [[TMP2]]) +-// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to i128 +-// CHECK-NEXT: ret i128 [[TMP4]] +-// +-v2i64 vmaddwod_d_wu_w(v2i64 _1, v4u32 _2, v4i32 _3) { +- return __builtin_lsx_vmaddwod_d_wu_w(_1, _2, _3); +-} +-// CHECK-LABEL: @vmaddwod_w_hu_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmaddwod.w.hu.h(<4 x i32> [[TMP0]], <8 x i16> [[TMP1]], <8 x i16> [[TMP2]]) +-// CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to i128 +-// CHECK-NEXT: ret i128 [[TMP4]] +-// +-v4i32 vmaddwod_w_hu_h(v4i32 _1, v8u16 _2, v8i16 _3) { +- return __builtin_lsx_vmaddwod_w_hu_h(_1, _2, _3); +-} +-// CHECK-LABEL: @vmaddwod_h_bu_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmaddwod.h.bu.b(<8 x i16> [[TMP0]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]]) +-// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to i128 +-// CHECK-NEXT: ret i128 [[TMP4]] +-// +-v8i16 vmaddwod_h_bu_b(v8i16 _1, v16u8 _2, v16i8 _3) { +- return __builtin_lsx_vmaddwod_h_bu_b(_1, _2, _3); +-} +-// CHECK-LABEL: @vmaddwev_q_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP3:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwev.q.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], <2 x i64> [[TMP2]]) +-// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to i128 +-// CHECK-NEXT: ret i128 [[TMP4]] +-// +-v2i64 vmaddwev_q_d(v2i64 _1, v2i64 _2, v2i64 _3) { +- return __builtin_lsx_vmaddwev_q_d(_1, _2, _3); +-} +-// CHECK-LABEL: @vmaddwod_q_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP3:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwod.q.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], <2 x i64> [[TMP2]]) +-// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to i128 +-// CHECK-NEXT: ret i128 [[TMP4]] +-// +-v2i64 vmaddwod_q_d(v2i64 _1, v2i64 _2, v2i64 _3) { +- return __builtin_lsx_vmaddwod_q_d(_1, _2, _3); +-} +-// CHECK-LABEL: @vmaddwev_q_du( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP3:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwev.q.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], <2 x i64> [[TMP2]]) +-// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to i128 +-// CHECK-NEXT: ret i128 [[TMP4]] +-// +-v2u64 vmaddwev_q_du(v2u64 _1, v2u64 _2, v2u64 _3) { +- return __builtin_lsx_vmaddwev_q_du(_1, _2, _3); +-} +-// CHECK-LABEL: @vmaddwod_q_du( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP3:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwod.q.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], <2 x i64> [[TMP2]]) +-// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to i128 +-// CHECK-NEXT: ret i128 [[TMP4]] +-// +-v2u64 vmaddwod_q_du(v2u64 _1, v2u64 _2, v2u64 _3) { +- return __builtin_lsx_vmaddwod_q_du(_1, _2, _3); +-} +-// CHECK-LABEL: @vmaddwev_q_du_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP3:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwev.q.du.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], <2 x i64> [[TMP2]]) +-// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to i128 +-// CHECK-NEXT: ret i128 [[TMP4]] +-// +-v2i64 vmaddwev_q_du_d(v2i64 _1, v2u64 _2, v2i64 _3) { +- return __builtin_lsx_vmaddwev_q_du_d(_1, _2, _3); +-} +-// CHECK-LABEL: @vmaddwod_q_du_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP3:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwod.q.du.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], <2 x i64> [[TMP2]]) +-// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to i128 +-// CHECK-NEXT: ret i128 [[TMP4]] +-// +-v2i64 vmaddwod_q_du_d(v2i64 _1, v2u64 _2, v2i64 _3) { +- return __builtin_lsx_vmaddwod_q_du_d(_1, _2, _3); +-} +-// CHECK-LABEL: @vrotr_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vrotr.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v16i8 vrotr_b(v16i8 _1, v16i8 _2) { +- return __builtin_lsx_vrotr_b(_1, _2); +-} +-// CHECK-LABEL: @vrotr_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vrotr.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v8i16 vrotr_h(v8i16 _1, v8i16 _2) { +- return __builtin_lsx_vrotr_h(_1, _2); +-} +-// CHECK-LABEL: @vrotr_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vrotr.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v4i32 vrotr_w(v4i32 _1, v4i32 _2) { +- return __builtin_lsx_vrotr_w(_1, _2); +-} +-// CHECK-LABEL: @vrotr_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vrotr.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v2i64 vrotr_d(v2i64 _1, v2i64 _2) { +- return __builtin_lsx_vrotr_d(_1, _2); +-} +-// CHECK-LABEL: @vadd_q( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vadd.q(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v2i64 vadd_q(v2i64 _1, v2i64 _2) { return __builtin_lsx_vadd_q(_1, _2); } +-// CHECK-LABEL: @vsub_q( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsub.q(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v2i64 vsub_q(v2i64 _1, v2i64 _2) { return __builtin_lsx_vsub_q(_1, _2); } +-// CHECK-LABEL: @vldrepl_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vldrepl.b(ptr [[_1:%.*]], i32 1) +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to i128 +-// CHECK-NEXT: ret i128 [[TMP1]] +-// +-v16i8 vldrepl_b(void *_1) { return __builtin_lsx_vldrepl_b(_1, 1); } +-// CHECK-LABEL: @vldrepl_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vldrepl.h(ptr [[_1:%.*]], i32 2) +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[TMP0]] to i128 +-// CHECK-NEXT: ret i128 [[TMP1]] +-// +-v8i16 vldrepl_h(void *_1) { return __builtin_lsx_vldrepl_h(_1, 2); } +-// CHECK-LABEL: @vldrepl_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vldrepl.w(ptr [[_1:%.*]], i32 4) +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[TMP0]] to i128 +-// CHECK-NEXT: ret i128 [[TMP1]] +-// +-v4i32 vldrepl_w(void *_1) { return __builtin_lsx_vldrepl_w(_1, 4); } +-// CHECK-LABEL: @vldrepl_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vldrepl.d(ptr [[_1:%.*]], i32 8) +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[TMP0]] to i128 +-// CHECK-NEXT: ret i128 [[TMP1]] +-// +-v2i64 vldrepl_d(void *_1) { return __builtin_lsx_vldrepl_d(_1, 8); } +-// CHECK-LABEL: @vmskgez_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmskgez.b(<16 x i8> [[TMP0]]) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v16i8 vmskgez_b(v16i8 _1) { return __builtin_lsx_vmskgez_b(_1); } +-// CHECK-LABEL: @vmsknz_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmsknz.b(<16 x i8> [[TMP0]]) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v16i8 vmsknz_b(v16i8 _1) { return __builtin_lsx_vmsknz_b(_1); } +-// CHECK-LABEL: @vexth_h_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vexth.h.b(<16 x i8> [[TMP0]]) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v8i16 vexth_h_b(v16i8 _1) { return __builtin_lsx_vexth_h_b(_1); } +-// CHECK-LABEL: @vexth_w_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vexth.w.h(<8 x i16> [[TMP0]]) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v4i32 vexth_w_h(v8i16 _1) { return __builtin_lsx_vexth_w_h(_1); } +-// CHECK-LABEL: @vexth_d_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vexth.d.w(<4 x i32> [[TMP0]]) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v2i64 vexth_d_w(v4i32 _1) { return __builtin_lsx_vexth_d_w(_1); } +-// CHECK-LABEL: @vexth_q_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vexth.q.d(<2 x i64> [[TMP0]]) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v2i64 vexth_q_d(v2i64 _1) { return __builtin_lsx_vexth_q_d(_1); } +-// CHECK-LABEL: @vexth_hu_bu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vexth.hu.bu(<16 x i8> [[TMP0]]) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v8u16 vexth_hu_bu(v16u8 _1) { return __builtin_lsx_vexth_hu_bu(_1); } +-// CHECK-LABEL: @vexth_wu_hu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vexth.wu.hu(<8 x i16> [[TMP0]]) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v4u32 vexth_wu_hu(v8u16 _1) { return __builtin_lsx_vexth_wu_hu(_1); } +-// CHECK-LABEL: @vexth_du_wu( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vexth.du.wu(<4 x i32> [[TMP0]]) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v2u64 vexth_du_wu(v4u32 _1) { return __builtin_lsx_vexth_du_wu(_1); } +-// CHECK-LABEL: @vexth_qu_du( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vexth.qu.du(<2 x i64> [[TMP0]]) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v2u64 vexth_qu_du(v2u64 _1) { return __builtin_lsx_vexth_qu_du(_1); } +-// CHECK-LABEL: @vrotri_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vrotri.b(<16 x i8> [[TMP0]], i32 1) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v16i8 vrotri_b(v16i8 _1) { return __builtin_lsx_vrotri_b(_1, 1); } +-// CHECK-LABEL: @vrotri_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vrotri.h(<8 x i16> [[TMP0]], i32 1) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v8i16 vrotri_h(v8i16 _1) { return __builtin_lsx_vrotri_h(_1, 1); } +-// CHECK-LABEL: @vrotri_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vrotri.w(<4 x i32> [[TMP0]], i32 1) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v4i32 vrotri_w(v4i32 _1) { return __builtin_lsx_vrotri_w(_1, 1); } +-// CHECK-LABEL: @vrotri_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vrotri.d(<2 x i64> [[TMP0]], i32 1) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v2i64 vrotri_d(v2i64 _1) { return __builtin_lsx_vrotri_d(_1, 1); } +-// CHECK-LABEL: @vextl_q_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vextl.q.d(<2 x i64> [[TMP0]]) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v2i64 vextl_q_d(v2i64 _1) { return __builtin_lsx_vextl_q_d(_1); } +-// CHECK-LABEL: @vsrlni_b_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrlni.b.h(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], i32 1) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v16i8 vsrlni_b_h(v16i8 _1, v16i8 _2) { +- return __builtin_lsx_vsrlni_b_h(_1, _2, 1); +-} +-// CHECK-LABEL: @vsrlni_h_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrlni.h.w(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]], i32 1) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v8i16 vsrlni_h_w(v8i16 _1, v8i16 _2) { +- return __builtin_lsx_vsrlni_h_w(_1, _2, 1); +-} +-// CHECK-LABEL: @vsrlni_w_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrlni.w.d(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]], i32 1) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v4i32 vsrlni_w_d(v4i32 _1, v4i32 _2) { +- return __builtin_lsx_vsrlni_w_d(_1, _2, 1); +-} +-// CHECK-LABEL: @vsrlni_d_q( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrlni.d.q(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], i32 1) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v2i64 vsrlni_d_q(v2i64 _1, v2i64 _2) { +- return __builtin_lsx_vsrlni_d_q(_1, _2, 1); +-} +-// CHECK-LABEL: @vsrlrni_b_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrlrni.b.h(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], i32 1) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v16i8 vsrlrni_b_h(v16i8 _1, v16i8 _2) { +- return __builtin_lsx_vsrlrni_b_h(_1, _2, 1); +-} +-// CHECK-LABEL: @vsrlrni_h_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrlrni.h.w(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]], i32 1) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v8i16 vsrlrni_h_w(v8i16 _1, v8i16 _2) { +- return __builtin_lsx_vsrlrni_h_w(_1, _2, 1); +-} +-// CHECK-LABEL: @vsrlrni_w_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrlrni.w.d(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]], i32 1) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v4i32 vsrlrni_w_d(v4i32 _1, v4i32 _2) { +- return __builtin_lsx_vsrlrni_w_d(_1, _2, 1); +-} +-// CHECK-LABEL: @vsrlrni_d_q( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrlrni.d.q(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], i32 1) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v2i64 vsrlrni_d_q(v2i64 _1, v2i64 _2) { +- return __builtin_lsx_vsrlrni_d_q(_1, _2, 1); +-} +-// CHECK-LABEL: @vssrlni_b_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrlni.b.h(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], i32 1) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v16i8 vssrlni_b_h(v16i8 _1, v16i8 _2) { +- return __builtin_lsx_vssrlni_b_h(_1, _2, 1); +-} +-// CHECK-LABEL: @vssrlni_h_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrlni.h.w(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]], i32 1) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v8i16 vssrlni_h_w(v8i16 _1, v8i16 _2) { +- return __builtin_lsx_vssrlni_h_w(_1, _2, 1); +-} +-// CHECK-LABEL: @vssrlni_w_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrlni.w.d(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]], i32 1) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v4i32 vssrlni_w_d(v4i32 _1, v4i32 _2) { +- return __builtin_lsx_vssrlni_w_d(_1, _2, 1); +-} +-// CHECK-LABEL: @vssrlni_d_q( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vssrlni.d.q(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], i32 1) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v2i64 vssrlni_d_q(v2i64 _1, v2i64 _2) { +- return __builtin_lsx_vssrlni_d_q(_1, _2, 1); +-} +-// CHECK-LABEL: @vssrlni_bu_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrlni.bu.h(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], i32 1) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v16u8 vssrlni_bu_h(v16u8 _1, v16i8 _2) { +- return __builtin_lsx_vssrlni_bu_h(_1, _2, 1); +-} +-// CHECK-LABEL: @vssrlni_hu_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrlni.hu.w(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]], i32 1) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v8u16 vssrlni_hu_w(v8u16 _1, v8i16 _2) { +- return __builtin_lsx_vssrlni_hu_w(_1, _2, 1); +-} +-// CHECK-LABEL: @vssrlni_wu_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrlni.wu.d(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]], i32 1) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v4u32 vssrlni_wu_d(v4u32 _1, v4i32 _2) { +- return __builtin_lsx_vssrlni_wu_d(_1, _2, 1); +-} +-// CHECK-LABEL: @vssrlni_du_q( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vssrlni.du.q(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], i32 1) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v2u64 vssrlni_du_q(v2u64 _1, v2i64 _2) { +- return __builtin_lsx_vssrlni_du_q(_1, _2, 1); +-} +-// CHECK-LABEL: @vssrlrni_b_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrlrni.b.h(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], i32 1) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v16i8 vssrlrni_b_h(v16i8 _1, v16i8 _2) { +- return __builtin_lsx_vssrlrni_b_h(_1, _2, 1); +-} +-// CHECK-LABEL: @vssrlrni_h_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrlrni.h.w(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]], i32 1) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v8i16 vssrlrni_h_w(v8i16 _1, v8i16 _2) { +- return __builtin_lsx_vssrlrni_h_w(_1, _2, 1); +-} +-// CHECK-LABEL: @vssrlrni_w_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrlrni.w.d(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]], i32 1) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v4i32 vssrlrni_w_d(v4i32 _1, v4i32 _2) { +- return __builtin_lsx_vssrlrni_w_d(_1, _2, 1); +-} +-// CHECK-LABEL: @vssrlrni_d_q( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vssrlrni.d.q(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], i32 1) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v2i64 vssrlrni_d_q(v2i64 _1, v2i64 _2) { +- return __builtin_lsx_vssrlrni_d_q(_1, _2, 1); +-} +-// CHECK-LABEL: @vssrlrni_bu_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrlrni.bu.h(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], i32 1) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v16u8 vssrlrni_bu_h(v16u8 _1, v16i8 _2) { +- return __builtin_lsx_vssrlrni_bu_h(_1, _2, 1); +-} +-// CHECK-LABEL: @vssrlrni_hu_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrlrni.hu.w(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]], i32 1) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v8u16 vssrlrni_hu_w(v8u16 _1, v8i16 _2) { +- return __builtin_lsx_vssrlrni_hu_w(_1, _2, 1); +-} +-// CHECK-LABEL: @vssrlrni_wu_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrlrni.wu.d(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]], i32 1) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v4u32 vssrlrni_wu_d(v4u32 _1, v4i32 _2) { +- return __builtin_lsx_vssrlrni_wu_d(_1, _2, 1); +-} +-// CHECK-LABEL: @vssrlrni_du_q( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vssrlrni.du.q(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], i32 1) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v2u64 vssrlrni_du_q(v2u64 _1, v2i64 _2) { +- return __builtin_lsx_vssrlrni_du_q(_1, _2, 1); +-} +-// CHECK-LABEL: @vsrani_b_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrani.b.h(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], i32 1) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v16i8 vsrani_b_h(v16i8 _1, v16i8 _2) { +- return __builtin_lsx_vsrani_b_h(_1, _2, 1); +-} +-// CHECK-LABEL: @vsrani_h_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrani.h.w(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]], i32 1) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v8i16 vsrani_h_w(v8i16 _1, v8i16 _2) { +- return __builtin_lsx_vsrani_h_w(_1, _2, 1); +-} +-// CHECK-LABEL: @vsrani_w_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrani.w.d(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]], i32 1) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v4i32 vsrani_w_d(v4i32 _1, v4i32 _2) { +- return __builtin_lsx_vsrani_w_d(_1, _2, 1); +-} +-// CHECK-LABEL: @vsrani_d_q( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrani.d.q(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], i32 1) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v2i64 vsrani_d_q(v2i64 _1, v2i64 _2) { +- return __builtin_lsx_vsrani_d_q(_1, _2, 1); +-} +-// CHECK-LABEL: @vsrarni_b_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrarni.b.h(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], i32 1) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v16i8 vsrarni_b_h(v16i8 _1, v16i8 _2) { +- return __builtin_lsx_vsrarni_b_h(_1, _2, 1); +-} +-// CHECK-LABEL: @vsrarni_h_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrarni.h.w(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]], i32 1) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v8i16 vsrarni_h_w(v8i16 _1, v8i16 _2) { +- return __builtin_lsx_vsrarni_h_w(_1, _2, 1); +-} +-// CHECK-LABEL: @vsrarni_w_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrarni.w.d(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]], i32 1) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v4i32 vsrarni_w_d(v4i32 _1, v4i32 _2) { +- return __builtin_lsx_vsrarni_w_d(_1, _2, 1); +-} +-// CHECK-LABEL: @vsrarni_d_q( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrarni.d.q(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], i32 1) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v2i64 vsrarni_d_q(v2i64 _1, v2i64 _2) { +- return __builtin_lsx_vsrarni_d_q(_1, _2, 1); +-} +-// CHECK-LABEL: @vssrani_b_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrani.b.h(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], i32 1) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v16i8 vssrani_b_h(v16i8 _1, v16i8 _2) { +- return __builtin_lsx_vssrani_b_h(_1, _2, 1); +-} +-// CHECK-LABEL: @vssrani_h_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrani.h.w(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]], i32 1) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v8i16 vssrani_h_w(v8i16 _1, v8i16 _2) { +- return __builtin_lsx_vssrani_h_w(_1, _2, 1); +-} +-// CHECK-LABEL: @vssrani_w_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrani.w.d(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]], i32 1) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v4i32 vssrani_w_d(v4i32 _1, v4i32 _2) { +- return __builtin_lsx_vssrani_w_d(_1, _2, 1); +-} +-// CHECK-LABEL: @vssrani_d_q( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vssrani.d.q(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], i32 1) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v2i64 vssrani_d_q(v2i64 _1, v2i64 _2) { +- return __builtin_lsx_vssrani_d_q(_1, _2, 1); +-} +-// CHECK-LABEL: @vssrani_bu_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrani.bu.h(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], i32 1) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v16u8 vssrani_bu_h(v16u8 _1, v16i8 _2) { +- return __builtin_lsx_vssrani_bu_h(_1, _2, 1); +-} +-// CHECK-LABEL: @vssrani_hu_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrani.hu.w(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]], i32 1) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v8u16 vssrani_hu_w(v8u16 _1, v8i16 _2) { +- return __builtin_lsx_vssrani_hu_w(_1, _2, 1); +-} +-// CHECK-LABEL: @vssrani_wu_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrani.wu.d(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]], i32 1) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v4u32 vssrani_wu_d(v4u32 _1, v4i32 _2) { +- return __builtin_lsx_vssrani_wu_d(_1, _2, 1); +-} +-// CHECK-LABEL: @vssrani_du_q( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vssrani.du.q(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], i32 1) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v2u64 vssrani_du_q(v2u64 _1, v2i64 _2) { +- return __builtin_lsx_vssrani_du_q(_1, _2, 1); +-} +-// CHECK-LABEL: @vssrarni_b_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrarni.b.h(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], i32 1) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v16i8 vssrarni_b_h(v16i8 _1, v16i8 _2) { +- return __builtin_lsx_vssrarni_b_h(_1, _2, 1); +-} +-// CHECK-LABEL: @vssrarni_h_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrarni.h.w(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]], i32 1) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v8i16 vssrarni_h_w(v8i16 _1, v8i16 _2) { +- return __builtin_lsx_vssrarni_h_w(_1, _2, 1); +-} +-// CHECK-LABEL: @vssrarni_w_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrarni.w.d(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]], i32 1) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v4i32 vssrarni_w_d(v4i32 _1, v4i32 _2) { +- return __builtin_lsx_vssrarni_w_d(_1, _2, 1); +-} +-// CHECK-LABEL: @vssrarni_d_q( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vssrarni.d.q(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], i32 1) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v2i64 vssrarni_d_q(v2i64 _1, v2i64 _2) { +- return __builtin_lsx_vssrarni_d_q(_1, _2, 1); +-} +-// CHECK-LABEL: @vssrarni_bu_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrarni.bu.h(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], i32 1) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v16u8 vssrarni_bu_h(v16u8 _1, v16i8 _2) { +- return __builtin_lsx_vssrarni_bu_h(_1, _2, 1); +-} +-// CHECK-LABEL: @vssrarni_hu_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrarni.hu.w(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]], i32 1) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v8u16 vssrarni_hu_w(v8u16 _1, v8i16 _2) { +- return __builtin_lsx_vssrarni_hu_w(_1, _2, 1); +-} +-// CHECK-LABEL: @vssrarni_wu_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrarni.wu.d(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]], i32 1) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v4u32 vssrarni_wu_d(v4u32 _1, v4i32 _2) { +- return __builtin_lsx_vssrarni_wu_d(_1, _2, 1); +-} +-// CHECK-LABEL: @vssrarni_du_q( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vssrarni.du.q(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], i32 1) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v2u64 vssrarni_du_q(v2u64 _1, v2i64 _2) { +- return __builtin_lsx_vssrarni_du_q(_1, _2, 1); +-} +-// CHECK-LABEL: @vpermi_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vpermi.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]], i32 1) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v4i32 vpermi_w(v4i32 _1, v4i32 _2) { +- return __builtin_lsx_vpermi_w(_1, _2, 1); +-} +-// CHECK-LABEL: @vld( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vld(ptr [[_1:%.*]], i32 1) +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to i128 +-// CHECK-NEXT: ret i128 [[TMP1]] +-// +-v16i8 vld(void *_1) { return __builtin_lsx_vld(_1, 1); } +-// CHECK-LABEL: @vst( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: tail call void @llvm.loongarch.lsx.vst(<16 x i8> [[TMP0]], ptr [[_2:%.*]], i32 1) +-// CHECK-NEXT: ret void +-// +-void vst(v16i8 _1, void *_2) { return __builtin_lsx_vst(_1, _2, 1); } +-// CHECK-LABEL: @vssrlrn_b_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrlrn.b.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v16i8 vssrlrn_b_h(v8i16 _1, v8i16 _2) { +- return __builtin_lsx_vssrlrn_b_h(_1, _2); +-} +-// CHECK-LABEL: @vssrlrn_h_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrlrn.h.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v8i16 vssrlrn_h_w(v4i32 _1, v4i32 _2) { +- return __builtin_lsx_vssrlrn_h_w(_1, _2); +-} +-// CHECK-LABEL: @vssrlrn_w_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrlrn.w.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v4i32 vssrlrn_w_d(v2i64 _1, v2i64 _2) { +- return __builtin_lsx_vssrlrn_w_d(_1, _2); +-} +-// CHECK-LABEL: @vssrln_b_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrln.b.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v16i8 vssrln_b_h(v8i16 _1, v8i16 _2) { +- return __builtin_lsx_vssrln_b_h(_1, _2); +-} +-// CHECK-LABEL: @vssrln_h_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrln.h.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v8i16 vssrln_h_w(v4i32 _1, v4i32 _2) { +- return __builtin_lsx_vssrln_h_w(_1, _2); +-} +-// CHECK-LABEL: @vssrln_w_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrln.w.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v4i32 vssrln_w_d(v2i64 _1, v2i64 _2) { +- return __builtin_lsx_vssrln_w_d(_1, _2); +-} +-// CHECK-LABEL: @vorn_v( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vorn.v(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v16i8 vorn_v(v16i8 _1, v16i8 _2) { return __builtin_lsx_vorn_v(_1, _2); } +-// CHECK-LABEL: @vldi( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vldi(i32 1) +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[TMP0]] to i128 +-// CHECK-NEXT: ret i128 [[TMP1]] +-// +-v2i64 vldi() { return __builtin_lsx_vldi(1); } +-// CHECK-LABEL: @vshuf_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP3:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vshuf.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]]) +-// CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP3]] to i128 +-// CHECK-NEXT: ret i128 [[TMP4]] +-// +-v16i8 vshuf_b(v16i8 _1, v16i8 _2, v16i8 _3) { +- return __builtin_lsx_vshuf_b(_1, _2, _3); +-} +-// CHECK-LABEL: @vldx( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vldx(ptr [[_1:%.*]], i64 1) +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to i128 +-// CHECK-NEXT: ret i128 [[TMP1]] +-// +-v16i8 vldx(void *_1) { return __builtin_lsx_vldx(_1, 1); } +-// CHECK-LABEL: @vstx( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: tail call void @llvm.loongarch.lsx.vstx(<16 x i8> [[TMP0]], ptr [[_2:%.*]], i64 1) +-// CHECK-NEXT: ret void +-// +-void vstx(v16i8 _1, void *_2) { return __builtin_lsx_vstx(_1, _2, 1); } +-// CHECK-LABEL: @vextl_qu_du( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vextl.qu.du(<2 x i64> [[TMP0]]) +-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +-// CHECK-NEXT: ret i128 [[TMP2]] +-// +-v2u64 vextl_qu_du(v2u64 _1) { return __builtin_lsx_vextl_qu_du(_1); } +-// CHECK-LABEL: @bnz_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lsx.bnz.b(<16 x i8> [[TMP0]]) +-// CHECK-NEXT: ret i32 [[TMP1]] +-// +-int bnz_b(v16u8 _1) { return __builtin_lsx_bnz_b(_1); } +-// CHECK-LABEL: @bnz_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lsx.bnz.d(<2 x i64> [[TMP0]]) +-// CHECK-NEXT: ret i32 [[TMP1]] +-// +-int bnz_d(v2u64 _1) { return __builtin_lsx_bnz_d(_1); } +-// CHECK-LABEL: @bnz_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lsx.bnz.h(<8 x i16> [[TMP0]]) +-// CHECK-NEXT: ret i32 [[TMP1]] +-// +-int bnz_h(v8u16 _1) { return __builtin_lsx_bnz_h(_1); } +-// CHECK-LABEL: @bnz_v( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lsx.bnz.v(<16 x i8> [[TMP0]]) +-// CHECK-NEXT: ret i32 [[TMP1]] +-// +-int bnz_v(v16u8 _1) { return __builtin_lsx_bnz_v(_1); } +-// CHECK-LABEL: @bnz_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lsx.bnz.w(<4 x i32> [[TMP0]]) +-// CHECK-NEXT: ret i32 [[TMP1]] +-// +-int bnz_w(v4u32 _1) { return __builtin_lsx_bnz_w(_1); } +-// CHECK-LABEL: @bz_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lsx.bz.b(<16 x i8> [[TMP0]]) +-// CHECK-NEXT: ret i32 [[TMP1]] +-// +-int bz_b(v16u8 _1) { return __builtin_lsx_bz_b(_1); } +-// CHECK-LABEL: @bz_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lsx.bz.d(<2 x i64> [[TMP0]]) +-// CHECK-NEXT: ret i32 [[TMP1]] +-// +-int bz_d(v2u64 _1) { return __builtin_lsx_bz_d(_1); } +-// CHECK-LABEL: @bz_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lsx.bz.h(<8 x i16> [[TMP0]]) +-// CHECK-NEXT: ret i32 [[TMP1]] +-// +-int bz_h(v8u16 _1) { return __builtin_lsx_bz_h(_1); } +-// CHECK-LABEL: @bz_v( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lsx.bz.v(<16 x i8> [[TMP0]]) +-// CHECK-NEXT: ret i32 [[TMP1]] +-// +-int bz_v(v16u8 _1) { return __builtin_lsx_bz_v(_1); } +-// CHECK-LABEL: @bz_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +-// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lsx.bz.w(<4 x i32> [[TMP0]]) +-// CHECK-NEXT: ret i32 [[TMP1]] +-// +-int bz_w(v4u32 _1) { return __builtin_lsx_bz_w(_1); } +-// CHECK-LABEL: @vfcmp_caf_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.caf.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v2i64 vfcmp_caf_d(v2f64 _1, v2f64 _2) { +- return __builtin_lsx_vfcmp_caf_d(_1, _2); +-} +-// CHECK-LABEL: @vfcmp_caf_s( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.caf.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v4i32 vfcmp_caf_s(v4f32 _1, v4f32 _2) { +- return __builtin_lsx_vfcmp_caf_s(_1, _2); +-} +-// CHECK-LABEL: @vfcmp_ceq_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.ceq.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v2i64 vfcmp_ceq_d(v2f64 _1, v2f64 _2) { +- return __builtin_lsx_vfcmp_ceq_d(_1, _2); +-} +-// CHECK-LABEL: @vfcmp_ceq_s( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.ceq.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v4i32 vfcmp_ceq_s(v4f32 _1, v4f32 _2) { +- return __builtin_lsx_vfcmp_ceq_s(_1, _2); +-} +-// CHECK-LABEL: @vfcmp_cle_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.cle.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v2i64 vfcmp_cle_d(v2f64 _1, v2f64 _2) { +- return __builtin_lsx_vfcmp_cle_d(_1, _2); +-} +-// CHECK-LABEL: @vfcmp_cle_s( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.cle.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v4i32 vfcmp_cle_s(v4f32 _1, v4f32 _2) { +- return __builtin_lsx_vfcmp_cle_s(_1, _2); +-} +-// CHECK-LABEL: @vfcmp_clt_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.clt.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v2i64 vfcmp_clt_d(v2f64 _1, v2f64 _2) { +- return __builtin_lsx_vfcmp_clt_d(_1, _2); +-} +-// CHECK-LABEL: @vfcmp_clt_s( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.clt.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v4i32 vfcmp_clt_s(v4f32 _1, v4f32 _2) { +- return __builtin_lsx_vfcmp_clt_s(_1, _2); +-} +-// CHECK-LABEL: @vfcmp_cne_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.cne.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v2i64 vfcmp_cne_d(v2f64 _1, v2f64 _2) { +- return __builtin_lsx_vfcmp_cne_d(_1, _2); +-} +-// CHECK-LABEL: @vfcmp_cne_s( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.cne.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v4i32 vfcmp_cne_s(v4f32 _1, v4f32 _2) { +- return __builtin_lsx_vfcmp_cne_s(_1, _2); +-} +-// CHECK-LABEL: @vfcmp_cor_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.cor.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v2i64 vfcmp_cor_d(v2f64 _1, v2f64 _2) { +- return __builtin_lsx_vfcmp_cor_d(_1, _2); +-} +-// CHECK-LABEL: @vfcmp_cor_s( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.cor.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v4i32 vfcmp_cor_s(v4f32 _1, v4f32 _2) { +- return __builtin_lsx_vfcmp_cor_s(_1, _2); +-} +-// CHECK-LABEL: @vfcmp_cueq_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.cueq.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v2i64 vfcmp_cueq_d(v2f64 _1, v2f64 _2) { +- return __builtin_lsx_vfcmp_cueq_d(_1, _2); +-} +-// CHECK-LABEL: @vfcmp_cueq_s( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.cueq.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v4i32 vfcmp_cueq_s(v4f32 _1, v4f32 _2) { +- return __builtin_lsx_vfcmp_cueq_s(_1, _2); +-} +-// CHECK-LABEL: @vfcmp_cule_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.cule.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v2i64 vfcmp_cule_d(v2f64 _1, v2f64 _2) { +- return __builtin_lsx_vfcmp_cule_d(_1, _2); +-} +-// CHECK-LABEL: @vfcmp_cule_s( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.cule.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v4i32 vfcmp_cule_s(v4f32 _1, v4f32 _2) { +- return __builtin_lsx_vfcmp_cule_s(_1, _2); +-} +-// CHECK-LABEL: @vfcmp_cult_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.cult.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v2i64 vfcmp_cult_d(v2f64 _1, v2f64 _2) { +- return __builtin_lsx_vfcmp_cult_d(_1, _2); +-} +-// CHECK-LABEL: @vfcmp_cult_s( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.cult.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v4i32 vfcmp_cult_s(v4f32 _1, v4f32 _2) { +- return __builtin_lsx_vfcmp_cult_s(_1, _2); +-} +-// CHECK-LABEL: @vfcmp_cun_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.cun.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v2i64 vfcmp_cun_d(v2f64 _1, v2f64 _2) { +- return __builtin_lsx_vfcmp_cun_d(_1, _2); +-} +-// CHECK-LABEL: @vfcmp_cune_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.cune.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v2i64 vfcmp_cune_d(v2f64 _1, v2f64 _2) { +- return __builtin_lsx_vfcmp_cune_d(_1, _2); +-} +-// CHECK-LABEL: @vfcmp_cune_s( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.cune.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v4i32 vfcmp_cune_s(v4f32 _1, v4f32 _2) { +- return __builtin_lsx_vfcmp_cune_s(_1, _2); +-} +-// CHECK-LABEL: @vfcmp_cun_s( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.cun.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v4i32 vfcmp_cun_s(v4f32 _1, v4f32 _2) { +- return __builtin_lsx_vfcmp_cun_s(_1, _2); +-} +-// CHECK-LABEL: @vfcmp_saf_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.saf.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v2i64 vfcmp_saf_d(v2f64 _1, v2f64 _2) { +- return __builtin_lsx_vfcmp_saf_d(_1, _2); +-} +-// CHECK-LABEL: @vfcmp_saf_s( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.saf.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v4i32 vfcmp_saf_s(v4f32 _1, v4f32 _2) { +- return __builtin_lsx_vfcmp_saf_s(_1, _2); +-} +-// CHECK-LABEL: @vfcmp_seq_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.seq.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v2i64 vfcmp_seq_d(v2f64 _1, v2f64 _2) { +- return __builtin_lsx_vfcmp_seq_d(_1, _2); +-} +-// CHECK-LABEL: @vfcmp_seq_s( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.seq.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v4i32 vfcmp_seq_s(v4f32 _1, v4f32 _2) { +- return __builtin_lsx_vfcmp_seq_s(_1, _2); +-} +-// CHECK-LABEL: @vfcmp_sle_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.sle.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v2i64 vfcmp_sle_d(v2f64 _1, v2f64 _2) { +- return __builtin_lsx_vfcmp_sle_d(_1, _2); +-} +-// CHECK-LABEL: @vfcmp_sle_s( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.sle.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v4i32 vfcmp_sle_s(v4f32 _1, v4f32 _2) { +- return __builtin_lsx_vfcmp_sle_s(_1, _2); +-} +-// CHECK-LABEL: @vfcmp_slt_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.slt.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v2i64 vfcmp_slt_d(v2f64 _1, v2f64 _2) { +- return __builtin_lsx_vfcmp_slt_d(_1, _2); +-} +-// CHECK-LABEL: @vfcmp_slt_s( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.slt.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v4i32 vfcmp_slt_s(v4f32 _1, v4f32 _2) { +- return __builtin_lsx_vfcmp_slt_s(_1, _2); +-} +-// CHECK-LABEL: @vfcmp_sne_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.sne.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v2i64 vfcmp_sne_d(v2f64 _1, v2f64 _2) { +- return __builtin_lsx_vfcmp_sne_d(_1, _2); +-} +-// CHECK-LABEL: @vfcmp_sne_s( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.sne.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v4i32 vfcmp_sne_s(v4f32 _1, v4f32 _2) { +- return __builtin_lsx_vfcmp_sne_s(_1, _2); +-} +-// CHECK-LABEL: @vfcmp_sor_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.sor.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v2i64 vfcmp_sor_d(v2f64 _1, v2f64 _2) { +- return __builtin_lsx_vfcmp_sor_d(_1, _2); +-} +-// CHECK-LABEL: @vfcmp_sor_s( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.sor.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v4i32 vfcmp_sor_s(v4f32 _1, v4f32 _2) { +- return __builtin_lsx_vfcmp_sor_s(_1, _2); +-} +-// CHECK-LABEL: @vfcmp_sueq_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.sueq.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v2i64 vfcmp_sueq_d(v2f64 _1, v2f64 _2) { +- return __builtin_lsx_vfcmp_sueq_d(_1, _2); +-} +-// CHECK-LABEL: @vfcmp_sueq_s( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.sueq.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v4i32 vfcmp_sueq_s(v4f32 _1, v4f32 _2) { +- return __builtin_lsx_vfcmp_sueq_s(_1, _2); +-} +-// CHECK-LABEL: @vfcmp_sule_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.sule.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v2i64 vfcmp_sule_d(v2f64 _1, v2f64 _2) { +- return __builtin_lsx_vfcmp_sule_d(_1, _2); +-} +-// CHECK-LABEL: @vfcmp_sule_s( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.sule.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v4i32 vfcmp_sule_s(v4f32 _1, v4f32 _2) { +- return __builtin_lsx_vfcmp_sule_s(_1, _2); +-} +-// CHECK-LABEL: @vfcmp_sult_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.sult.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v2i64 vfcmp_sult_d(v2f64 _1, v2f64 _2) { +- return __builtin_lsx_vfcmp_sult_d(_1, _2); +-} +-// CHECK-LABEL: @vfcmp_sult_s( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.sult.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v4i32 vfcmp_sult_s(v4f32 _1, v4f32 _2) { +- return __builtin_lsx_vfcmp_sult_s(_1, _2); +-} +-// CHECK-LABEL: @vfcmp_sun_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.sun.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v2i64 vfcmp_sun_d(v2f64 _1, v2f64 _2) { +- return __builtin_lsx_vfcmp_sun_d(_1, _2); +-} +-// CHECK-LABEL: @vfcmp_sune_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.sune.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v2i64 vfcmp_sune_d(v2f64 _1, v2f64 _2) { +- return __builtin_lsx_vfcmp_sune_d(_1, _2); +-} +-// CHECK-LABEL: @vfcmp_sune_s( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.sune.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v4i32 vfcmp_sune_s(v4f32 _1, v4f32 _2) { +- return __builtin_lsx_vfcmp_sune_s(_1, _2); +-} +-// CHECK-LABEL: @vfcmp_sun_s( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> +-// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.sun.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) +-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +-// CHECK-NEXT: ret i128 [[TMP3]] +-// +-v4i32 vfcmp_sun_s(v4f32 _1, v4f32 _2) { +- return __builtin_lsx_vfcmp_sun_s(_1, _2); +-} +-// CHECK-LABEL: @vrepli_b( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vrepli.b(i32 1) +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to i128 +-// CHECK-NEXT: ret i128 [[TMP1]] +-// +-v16i8 vrepli_b() { return __builtin_lsx_vrepli_b(1); } +-// CHECK-LABEL: @vrepli_d( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vrepli.d(i32 1) +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[TMP0]] to i128 +-// CHECK-NEXT: ret i128 [[TMP1]] +-// +-v2i64 vrepli_d() { return __builtin_lsx_vrepli_d(1); } +-// CHECK-LABEL: @vrepli_h( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vrepli.h(i32 1) +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[TMP0]] to i128 +-// CHECK-NEXT: ret i128 [[TMP1]] +-// +-v8i16 vrepli_h() { return __builtin_lsx_vrepli_h(1); } +-// CHECK-LABEL: @vrepli_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vrepli.w(i32 1) +-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[TMP0]] to i128 +-// CHECK-NEXT: ret i128 [[TMP1]] +-// +-v4i32 vrepli_w() { return __builtin_lsx_vrepli_w(1); } +diff --git a/clang/test/CodeGen/LoongArch/lsx/inline-asm-gcc-regs-error.c b/clang/test/CodeGen/LoongArch/lsx/inline-asm-gcc-regs-error.c +deleted file mode 100644 +index 54132307e..000000000 +--- a/clang/test/CodeGen/LoongArch/lsx/inline-asm-gcc-regs-error.c ++++ /dev/null +@@ -1,10 +0,0 @@ +-// RUN: not %clang_cc1 -triple loongarch64 -emit-llvm -O2 %s 2>&1 -o - | FileCheck %s +- +-typedef signed char v16i8 __attribute__((vector_size(16), aligned(16))); +- +-void test() { +-// CHECK: :[[#@LINE+1]]:28: error: unknown register name 'vr0' in asm +- register v16i8 p0 asm ("vr0"); +-// CHECK: :[[#@LINE+1]]:29: error: unknown register name '$vr32' in asm +- register v16i8 p32 asm ("$vr32"); +-} +diff --git a/clang/test/CodeGen/LoongArch/lsx/inline-asm-gcc-regs.c b/clang/test/CodeGen/LoongArch/lsx/inline-asm-gcc-regs.c +deleted file mode 100644 +index b05b1c8c1..000000000 +--- a/clang/test/CodeGen/LoongArch/lsx/inline-asm-gcc-regs.c ++++ /dev/null +@@ -1,36 +0,0 @@ +-// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --filter "^define |tail call" +-// RUN: %clang_cc1 -triple loongarch64 -emit-llvm -O2 %s -o - | FileCheck %s +- +-typedef signed char v16i8 __attribute__((vector_size(16), aligned(16))); +- +-// CHECK-LABEL: @test_vr0( +-// CHECK: tail call void asm sideeffect "", "{$vr0}"(<16 x i8> undef) #[[ATTR1:[0-9]+]], !srcloc !2 +-// +-void test_vr0() { +- register v16i8 a asm ("$vr0"); +- asm ("" :: "f"(a)); +-} +- +-// CHECK-LABEL: @test_vr7( +-// CHECK: tail call void asm sideeffect "", "{$vr7}"(<16 x i8> undef) #[[ATTR1]], !srcloc !3 +-// +-void test_vr7() { +- register v16i8 a asm ("$vr7"); +- asm ("" :: "f"(a)); +-} +- +-// CHECK-LABEL: @test_vr15( +-// CHECK: tail call void asm sideeffect "", "{$vr15}"(<16 x i8> undef) #[[ATTR1]], !srcloc !4 +-// +-void test_vr15() { +- register v16i8 a asm ("$vr15"); +- asm ("" :: "f"(a)); +-} +- +-// CHECK-LABEL: @test_vr31( +-// CHECK: tail call void asm sideeffect "", "{$vr31}"(<16 x i8> undef) #[[ATTR1]], !srcloc !5 +-// +-void test_vr31() { +- register v16i8 a asm ("$vr31"); +- asm ("" :: "f"(a)); +-} +diff --git a/clang/test/CodeGen/LoongArch/lsx/inline-asm-operand-modifier.c b/clang/test/CodeGen/LoongArch/lsx/inline-asm-operand-modifier.c +deleted file mode 100644 +index 5e0fae984..000000000 +--- a/clang/test/CodeGen/LoongArch/lsx/inline-asm-operand-modifier.c ++++ /dev/null +@@ -1,15 +0,0 @@ +-// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 2 +-// RUN: %clang_cc1 -triple loongarch64 -emit-llvm -O2 %s -o - | FileCheck %s +- +-typedef long long v2i64 __attribute__ ((vector_size(16), aligned(16))); +- +-// CHECK-LABEL: define dso_local void @test_w +-// CHECK-SAME: () local_unnamed_addr #[[ATTR0:[0-9]+]] { +-// CHECK-NEXT: entry: +-// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> asm sideeffect "vldi ${0:w}, 1", "=f"() #[[ATTR1:[0-9]+]], !srcloc !2 +-// CHECK-NEXT: ret void +-// +-void test_w() { +- v2i64 v2i64_r; +- asm volatile ("vldi %w0, 1" : "=f" (v2i64_r)); +-} +diff --git a/clang/test/CodeGen/LoongArch/lsx/vrepli-builtin-alias-error.c b/clang/test/CodeGen/LoongArch/lsx/vrepli-builtin-alias-error.c +new file mode 100644 +index 000000000..1cbc0e072 +--- /dev/null ++++ b/clang/test/CodeGen/LoongArch/lsx/vrepli-builtin-alias-error.c +@@ -0,0 +1,31 @@ ++// RUN: %clang_cc1 -triple loongarch64 -target-feature +lsx -verify %s ++ ++#include ++ ++v16i8 vrepli_b(int var) { ++ v16i8 res = __lsx_vrepli_b(-513); // expected-error {{argument value -513 is outside the valid range [-512, 511]}} ++ res |= __lsx_vrepli_b(512); // expected-error {{argument value 512 is outside the valid range [-512, 511]}} ++ res |= __lsx_vrepli_b(var); // expected-error {{argument to '__builtin_lsx_vrepli_b' must be a constant integer}} ++ return res; ++} ++ ++v2i64 vrepli_d(int var) { ++ v2i64 res = __lsx_vrepli_d(-513); // expected-error {{argument value -513 is outside the valid range [-512, 511]}} ++ res |= __lsx_vrepli_d(512); // expected-error {{argument value 512 is outside the valid range [-512, 511]}} ++ res |= __lsx_vrepli_d(var); // expected-error {{argument to '__builtin_lsx_vrepli_d' must be a constant integer}} ++ return res; ++} ++ ++v8i16 vrepli_h(int var) { ++ v8i16 res = __lsx_vrepli_h(-513); // expected-error {{argument value -513 is outside the valid range [-512, 511]}} ++ res |= __lsx_vrepli_h(512); // expected-error {{argument value 512 is outside the valid range [-512, 511]}} ++ res |= __lsx_vrepli_h(var); // expected-error {{argument to '__builtin_lsx_vrepli_h' must be a constant integer}} ++ return res; ++} ++ ++v4i32 vrepli_w(int var) { ++ v4i32 res = __lsx_vrepli_w(-513); // expected-error {{argument value -513 is outside the valid range [-512, 511]}} ++ res |= __lsx_vrepli_w(512); // expected-error {{argument value 512 is outside the valid range [-512, 511]}} ++ res |= __lsx_vrepli_w(var); // expected-error {{argument to '__builtin_lsx_vrepli_w' must be a constant integer}} ++ return res; ++} +diff --git a/clang/test/CodeGen/LoongArch/lsx/vrepli-builtin-error.c b/clang/test/CodeGen/LoongArch/lsx/vrepli-builtin-error.c +new file mode 100644 +index 000000000..0ddc2d6e9 +--- /dev/null ++++ b/clang/test/CodeGen/LoongArch/lsx/vrepli-builtin-error.c +@@ -0,0 +1,54 @@ ++// RUN: %clang_cc1 -triple loongarch64 -target-feature +lsx -verify %s ++ ++typedef signed char v16i8 __attribute__((vector_size(16), aligned(16))); ++typedef signed char v16i8_b __attribute__((vector_size(16), aligned(1))); ++typedef unsigned char v16u8 __attribute__((vector_size(16), aligned(16))); ++typedef unsigned char v16u8_b __attribute__((vector_size(16), aligned(1))); ++typedef short v8i16 __attribute__((vector_size(16), aligned(16))); ++typedef short v8i16_h __attribute__((vector_size(16), aligned(2))); ++typedef unsigned short v8u16 __attribute__((vector_size(16), aligned(16))); ++typedef unsigned short v8u16_h __attribute__((vector_size(16), aligned(2))); ++typedef int v4i32 __attribute__((vector_size(16), aligned(16))); ++typedef int v4i32_w __attribute__((vector_size(16), aligned(4))); ++typedef unsigned int v4u32 __attribute__((vector_size(16), aligned(16))); ++typedef unsigned int v4u32_w __attribute__((vector_size(16), aligned(4))); ++typedef long long v2i64 __attribute__((vector_size(16), aligned(16))); ++typedef long long v2i64_d __attribute__((vector_size(16), aligned(8))); ++typedef unsigned long long v2u64 __attribute__((vector_size(16), aligned(16))); ++typedef unsigned long long v2u64_d __attribute__((vector_size(16), aligned(8))); ++typedef float v4f32 __attribute__((vector_size(16), aligned(16))); ++typedef float v4f32_w __attribute__((vector_size(16), aligned(4))); ++typedef double v2f64 __attribute__((vector_size(16), aligned(16))); ++typedef double v2f64_d __attribute__((vector_size(16), aligned(8))); ++ ++typedef long long __m128i __attribute__((__vector_size__(16), __may_alias__)); ++typedef float __m128 __attribute__((__vector_size__(16), __may_alias__)); ++typedef double __m128d __attribute__((__vector_size__(16), __may_alias__)); ++ ++v16i8 vrepli_b(int var) { ++ v16i8 res = __builtin_lsx_vrepli_b(-513); // expected-error {{argument value -513 is outside the valid range [-512, 511]}} ++ res |= __builtin_lsx_vrepli_b(512); // expected-error {{argument value 512 is outside the valid range [-512, 511]}} ++ res |= __builtin_lsx_vrepli_b(var); // expected-error {{argument to '__builtin_lsx_vrepli_b' must be a constant integer}} ++ return res; ++} ++ ++v2i64 vrepli_d(int var) { ++ v2i64 res = __builtin_lsx_vrepli_d(-513); // expected-error {{argument value -513 is outside the valid range [-512, 511]}} ++ res |= __builtin_lsx_vrepli_d(512); // expected-error {{argument value 512 is outside the valid range [-512, 511]}} ++ res |= __builtin_lsx_vrepli_d(var); // expected-error {{argument to '__builtin_lsx_vrepli_d' must be a constant integer}} ++ return res; ++} ++ ++v8i16 vrepli_h(int var) { ++ v8i16 res = __builtin_lsx_vrepli_h(-513); // expected-error {{argument value -513 is outside the valid range [-512, 511]}} ++ res |= __builtin_lsx_vrepli_h(512); // expected-error {{argument value 512 is outside the valid range [-512, 511]}} ++ res |= __builtin_lsx_vrepli_h(var); // expected-error {{argument to '__builtin_lsx_vrepli_h' must be a constant integer}} ++ return res; ++} ++ ++v4i32 vrepli_w(int var) { ++ v4i32 res = __builtin_lsx_vrepli_w(-513); // expected-error {{argument value -513 is outside the valid range [-512, 511]}} ++ res |= __builtin_lsx_vrepli_w(512); // expected-error {{argument value 512 is outside the valid range [-512, 511]}} ++ res |= __builtin_lsx_vrepli_w(var); // expected-error {{argument to '__builtin_lsx_vrepli_w' must be a constant integer}} ++ return res; ++} +diff --git a/clang/test/CodeGen/LoongArch/lsx/vsrlrni-builtin-alias-error.c b/clang/test/CodeGen/LoongArch/lsx/vsrlrni-builtin-alias-error.c +new file mode 100644 +index 000000000..767e39918 +--- /dev/null ++++ b/clang/test/CodeGen/LoongArch/lsx/vsrlrni-builtin-alias-error.c +@@ -0,0 +1,31 @@ ++// RUN: %clang_cc1 -triple loongarch64 -target-feature +lsx -verify %s ++ ++#include ++ ++v16i8 vsrlrni_b_h(v16i8 _1, v16i8 _2, int var) { ++ v16i8 res = __lsx_vsrlrni_b_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} ++ res |= __lsx_vsrlrni_b_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ res |= __lsx_vsrlrni_b_h(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vsrlrni_b_h' must be a constant integer}} ++ return res; ++} ++ ++v8i16 vsrlrni_h_w(v8i16 _1, v8i16 _2, int var) { ++ v8i16 res = __lsx_vsrlrni_h_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __lsx_vsrlrni_h_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __lsx_vsrlrni_h_w(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vsrlrni_h_w' must be a constant integer}} ++ return res; ++} ++ ++v4i32 vsrlrni_w_d(v4i32 _1, v4i32 _2, int var) { ++ v4i32 res = __lsx_vsrlrni_w_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} ++ res |= __lsx_vsrlrni_w_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ res |= __lsx_vsrlrni_w_d(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vsrlrni_w_d' must be a constant integer}} ++ return res; ++} ++ ++v2i64 vsrlrni_d_q(v2i64 _1, v2i64 _2, int var) { ++ v2i64 res = __lsx_vsrlrni_d_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} ++ res |= __lsx_vsrlrni_d_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} ++ res |= __lsx_vsrlrni_d_q(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vsrlrni_d_q' must be a constant integer}} ++ return res; ++} +diff --git a/clang/test/CodeGen/LoongArch/lsx/vsrlrni-builtin-error.c b/clang/test/CodeGen/LoongArch/lsx/vsrlrni-builtin-error.c +new file mode 100644 +index 000000000..9227a139e +--- /dev/null ++++ b/clang/test/CodeGen/LoongArch/lsx/vsrlrni-builtin-error.c +@@ -0,0 +1,54 @@ ++// RUN: %clang_cc1 -triple loongarch64 -target-feature +lsx -verify %s ++ ++typedef signed char v16i8 __attribute__((vector_size(16), aligned(16))); ++typedef signed char v16i8_b __attribute__((vector_size(16), aligned(1))); ++typedef unsigned char v16u8 __attribute__((vector_size(16), aligned(16))); ++typedef unsigned char v16u8_b __attribute__((vector_size(16), aligned(1))); ++typedef short v8i16 __attribute__((vector_size(16), aligned(16))); ++typedef short v8i16_h __attribute__((vector_size(16), aligned(2))); ++typedef unsigned short v8u16 __attribute__((vector_size(16), aligned(16))); ++typedef unsigned short v8u16_h __attribute__((vector_size(16), aligned(2))); ++typedef int v4i32 __attribute__((vector_size(16), aligned(16))); ++typedef int v4i32_w __attribute__((vector_size(16), aligned(4))); ++typedef unsigned int v4u32 __attribute__((vector_size(16), aligned(16))); ++typedef unsigned int v4u32_w __attribute__((vector_size(16), aligned(4))); ++typedef long long v2i64 __attribute__((vector_size(16), aligned(16))); ++typedef long long v2i64_d __attribute__((vector_size(16), aligned(8))); ++typedef unsigned long long v2u64 __attribute__((vector_size(16), aligned(16))); ++typedef unsigned long long v2u64_d __attribute__((vector_size(16), aligned(8))); ++typedef float v4f32 __attribute__((vector_size(16), aligned(16))); ++typedef float v4f32_w __attribute__((vector_size(16), aligned(4))); ++typedef double v2f64 __attribute__((vector_size(16), aligned(16))); ++typedef double v2f64_d __attribute__((vector_size(16), aligned(8))); ++ ++typedef long long __m128i __attribute__((__vector_size__(16), __may_alias__)); ++typedef float __m128 __attribute__((__vector_size__(16), __may_alias__)); ++typedef double __m128d __attribute__((__vector_size__(16), __may_alias__)); ++ ++v16i8 vsrlrni_b_h(v16i8 _1, v16i8 _2, int var) { ++ v16i8 res = __builtin_lsx_vsrlrni_b_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} ++ res |= __builtin_lsx_vsrlrni_b_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ res |= __builtin_lsx_vsrlrni_b_h(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vsrlrni_b_h' must be a constant integer}} ++ return res; ++} ++ ++v8i16 vsrlrni_h_w(v8i16 _1, v8i16 _2, int var) { ++ v8i16 res = __builtin_lsx_vsrlrni_h_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} ++ res |= __builtin_lsx_vsrlrni_h_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ res |= __builtin_lsx_vsrlrni_h_w(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vsrlrni_h_w' must be a constant integer}} ++ return res; ++} ++ ++v4i32 vsrlrni_w_d(v4i32 _1, v4i32 _2, int var) { ++ v4i32 res = __builtin_lsx_vsrlrni_w_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} ++ res |= __builtin_lsx_vsrlrni_w_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ res |= __builtin_lsx_vsrlrni_w_d(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vsrlrni_w_d' must be a constant integer}} ++ return res; ++} ++ ++v2i64 vsrlrni_d_q(v2i64 _1, v2i64 _2, int var) { ++ v2i64 res = __builtin_lsx_vsrlrni_d_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} ++ res |= __builtin_lsx_vsrlrni_d_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} ++ res |= __builtin_lsx_vsrlrni_d_q(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vsrlrni_d_q' must be a constant integer}} ++ return res; ++} +diff --git a/clang/test/CodeGen/builtins-loongarch-base.c b/clang/test/CodeGen/builtins-loongarch-base.c +new file mode 100644 +index 000000000..cdff582fa +--- /dev/null ++++ b/clang/test/CodeGen/builtins-loongarch-base.c +@@ -0,0 +1,409 @@ ++// REQUIRES: loongarch-registered-target ++// RUN: %clang_cc1 -triple loongarch64-linux-gnu -emit-llvm %s -o - | FileCheck %s ++ ++#include ++ ++typedef char i8; ++typedef unsigned char u8; ++typedef short i16; ++typedef unsigned short u16; ++typedef int i32; ++typedef unsigned int u32; ++ ++#if __LONG_MAX__ == __LONG_LONG_MAX__ ++typedef long int i64; ++typedef unsigned long int u64; ++#else ++typedef long long i64; ++typedef unsigned long long u64; ++#endif ++ ++__drdtime_t drdtime; ++__rdtime_t rdtime; ++ ++void cpucfg(){ ++ ++ u32 u32_r, u32_a; ++ // __cpucfg ++ // rd, rj ++ // unsigned int, unsigned int ++ u32_r= __builtin_loongarch_cpucfg(u32_a); // CHECK: call i32 @llvm.loongarch.cpucfg ++ ++} ++ ++void csrrd_w() { ++ ++ u32 u32_r; ++ // __csrrd_w ++ // rd, csr_num ++ // unsigned int, uimm14_32 ++ u32_r = __builtin_loongarch_csrrd_w(1); // CHECK: call i32 @llvm.loongarch.csrrd.w ++} ++ ++void csrrd_d() { ++ ++ u64 u64_r; ++ // __csrrd_d ++ // rd, csr_num ++ // unsigned long int, uimm14 ++ u64_r = __builtin_loongarch_csrrd_d(1); // CHECK: call i64 @llvm.loongarch.csrrd.d ++} ++ ++void csrwr_w() { ++ ++ u32 u32_r, u32_a; ++ // __csrwr_w ++ // rd, csr_num ++ // unsigned int, uimm14_32 ++ u32_r = __builtin_loongarch_csrwr_w(u32_a, 1); // CHECK: call i32 @llvm.loongarch.csrwr.w ++} ++ ++void csrwr_d() { ++ ++ u64 u64_r, u64_a; ++ // __csrwr_d ++ // rd, csr_num ++ // unsigned long int, uimm14 ++ u64_r = __builtin_loongarch_csrwr_d(u64_a, 1); // CHECK: call i64 @llvm.loongarch.csrwr.d ++} ++ ++void csrxchg_w() { ++ ++ u32 u32_r, u32_a, u32_b; ++ // __csrxchg_w ++ // rd, rj, csr_num ++ // unsigned int, unsigned int, uimm14_32 ++ u32_r = __builtin_loongarch_csrxchg_w(u32_a, u32_b, 1); // CHECK: call i32 @llvm.loongarch.csrxchg.w ++} ++ ++void csrxchg_d() { ++ ++ u64 u64_r, u64_a, u64_b; ++ // __csrxchg_d ++ // rd, rj, csr_num ++ // unsigned long int, unsigned long int, uimm14 ++ u64_r = __builtin_loongarch_csrxchg_d(u64_a, u64_b, 1); // CHECK: call i64 @llvm.loongarch.csrxchg.d ++} ++ ++void iocsrrd_b(){ ++ ++ u32 u32_a; ++ u8 u8_r; ++ // __iocsrrd_b ++ // rd, rj ++ // unsigned char, unsigned int ++ u8_r=__builtin_loongarch_iocsrrd_b(u32_a); // CHECK: call i32 @llvm.loongarch.iocsrrd.b ++ ++} ++ ++void iocsrrd_h(){ ++ ++ u32 u32_a; ++ u16 u16_r; ++ // __iocsrrd_h ++ // rd, rj ++ // unsigned short, unsigned int ++ u16_r=__builtin_loongarch_iocsrrd_h(u32_a); // CHECK: call i32 @llvm.loongarch.iocsrrd.h ++ ++} ++ ++void iocsrrd_w(){ ++ ++ u32 u32_r, u32_a; ++ // __iocsrrd_w ++ // rd, rj ++ // unsigned int, unsigned int ++ u32_r=__builtin_loongarch_iocsrrd_w(u32_a); // CHECK: call i32 @llvm.loongarch.iocsrrd.w ++ ++} ++ ++void iocsrrd_d(){ ++ ++ u32 u32_a; ++ u64 u64_r; ++ // __iocsrrd_d ++ // rd, rj ++ // unsigned long int, unsigned int ++ u64_r=__builtin_loongarch_iocsrrd_d(u32_a); // CHECK: call i64 @llvm.loongarch.iocsrrd.d ++ ++} ++ ++void iocsrwr_b(){ ++ ++ u32 u32_a; ++ u8 u8_a; ++ // __iocsrwr_b ++ // rd, rj ++ // unsigned char, unsigned int ++ __builtin_loongarch_iocsrwr_b(u8_a, u32_a); // CHECK: void @llvm.loongarch.iocsrwr.b ++ ++} ++ ++void iocsrwr_h(){ ++ ++ u32 u32_a; ++ u16 u16_a; ++ // __iocsrwr_h ++ // rd, rj ++ // unsigned short, unsigned int ++ __builtin_loongarch_iocsrwr_h(u16_a, u32_a); // CHECK: void @llvm.loongarch.iocsrwr.h ++ ++} ++ ++void iocsrwr_w(){ ++ ++ u32 u32_a, u32_b; ++ // __iocsrwr_w ++ // rd, rj ++ // unsigned int, unsigned int ++ __builtin_loongarch_iocsrwr_w(u32_a, u32_b); // CHECK: void @llvm.loongarch.iocsrwr.w ++ ++} ++ ++void iocsrwr_d(){ ++ ++ u32 u32_a; ++ u64 u64_a; ++ // __iocsrwr_d ++ // rd, rj ++ // unsigned long int, unsigned int ++ __builtin_loongarch_iocsrwr_d(u64_a, u32_a); // CHECK: void @llvm.loongarch.iocsrwr.d ++ ++} ++ ++void cacop_w() { ++ ++ i32 i32_a; ++ // __cacop_w ++ // op, rj, si12 ++ // uimm5, unsigned int, simm12 ++ __builtin_loongarch_cacop_w(1, i32_a, 2); // CHECK: void @llvm.loongarch.cacop.w ++} ++ ++void cacop_d() { ++ ++ i64 i64_a; ++ // __cacop_d ++ // op, rj, si12 ++ // uimm5, unsigned long int, simm12 ++ __builtin_loongarch_cacop_d(1, i64_a, 2); // CHECK: void @llvm.loongarch.cacop.d ++} ++ ++void rdtime_d(){ ++ ++ drdtime= __builtin_loongarch_rdtime_d(); // CHECK: call { i64, i64 } asm sideeffect "rdtime.d\09$0,$1\0A\09", "=&r,=&r"() ++ ++} ++ ++void rdtimeh_w(){ ++ ++ rdtime= __builtin_loongarch_rdtimeh_w(); // CHECK: call { i32, i32 } asm sideeffect "rdtimeh.w\09$0,$1\0A\09", "=&r,=&r"() ++ ++} ++ ++void rdtimel_w(){ ++ ++ rdtime= __builtin_loongarch_rdtimel_w(); // CHECK: call { i32, i32 } asm sideeffect "rdtimel.w\09$0,$1\0A\09", "=&r,=&r"() ++ ++} ++ ++void crc_w_b_w(){ ++ ++ i32 i32_r, i32_a; ++ i8 i8_a; ++ // __crc_w_b_w ++ // rd, rj, rk ++ // int, char, int ++ i32_r=__builtin_loongarch_crc_w_b_w(i8_a, i32_a); // CHECK: call i32 @llvm.loongarch.crc.w.b.w ++ ++} ++ ++void crc_w_h_w(){ ++ ++ i32 i32_r, i32_a; ++ i16 i16_a; ++ // __crc_w_h_w ++ // rd, rj, rk ++ // int, short, int ++ i32_r=__builtin_loongarch_crc_w_h_w(i16_a, i32_a); // CHECK: call i32 @llvm.loongarch.crc.w.h.w ++ ++} ++ ++void crc_w_w_w(){ ++ ++ i32 i32_r, i32_a, i32_b; ++ // __crc_w_w_w ++ // rd, rj, rk ++ // int, int, int ++ i32_r=__builtin_loongarch_crc_w_w_w(i32_a, i32_b); // CHECK: call i32 @llvm.loongarch.crc.w.w.w ++ ++} ++ ++void crc_w_d_w(){ ++ ++ i32 i32_r, i32_a; ++ i64 i64_a; ++ // __crc_w_d_w ++ // rd, rj, rk ++ // int, long int, int ++ i32_r=__builtin_loongarch_crc_w_d_w(i64_a, i32_a); // CHECK: call i32 @llvm.loongarch.crc.w.d.w ++ ++} ++ ++void crcc_w_b_w(){ ++ ++ i32 i32_r, i32_a; ++ i8 i8_a; ++ // __crcc_w_b_w ++ // rd, rj, rk ++ // int, char, int ++ i32_r=__builtin_loongarch_crcc_w_b_w(i8_a, i32_a); // CHECK: call i32 @llvm.loongarch.crcc.w.b.w ++ ++} ++ ++void crcc_w_h_w(){ ++ ++ i32 i32_r, i32_a; ++ i16 i16_a; ++ // __crcc_w_h_w ++ // rd, rj, rk ++ // int, short, int ++ i32_r=__builtin_loongarch_crcc_w_h_w(i16_a, i32_a); // CHECK: call i32 @llvm.loongarch.crcc.w.h.w ++ ++} ++ ++void crcc_w_w_w(){ ++ ++ i32 i32_r, i32_a, i32_b; ++ // __crcc_w_w_w ++ // rd, rj, rk ++ // int, int, int ++ i32_r=__builtin_loongarch_crcc_w_w_w(i32_a, i32_b); // CHECK: call i32 @llvm.loongarch.crcc.w.w.w ++ ++} ++ ++void crcc_w_d_w(){ ++ ++ i32 i32_r, i32_a; ++ i64 i64_a; ++ // __crcc_w_d_w ++ // rd, rj, rk ++ // int, long int, int ++ i32_r=__builtin_loongarch_crcc_w_d_w(i64_a, i32_a); // CHECK: call i32 @llvm.loongarch.crcc.w.d.w ++ ++} ++ ++void tlbclr(){ ++ ++ // __tlbclr ++ __builtin_loongarch_tlbclr(); // CHECK: call void @llvm.loongarch.tlbclr ++ ++} ++ ++void tlbflush(){ ++ ++ // __tlbflush ++ __builtin_loongarch_tlbflush(); // CHECK: call void @llvm.loongarch.tlbflush ++ ++} ++ ++void tlbfill(){ ++ ++ // __tlbfill ++ __builtin_loongarch_tlbfill(); // CHECK: call void @llvm.loongarch.tlbfill ++ ++} ++ ++void tlbrd(){ ++ ++ // __tlbrd ++ __builtin_loongarch_tlbrd(); // CHECK: call void @llvm.loongarch.tlbrd ++ ++} ++ ++void tlbwr(){ ++ ++ // __tlbwr ++ __builtin_loongarch_tlbwr(); // CHECK: call void @llvm.loongarch.tlbwr ++ ++} ++ ++void tlbsrch(){ ++ ++ // __tlbsrch ++ __builtin_loongarch_tlbsrch(); // CHECK: call void @llvm.loongarch.tlbsrch ++ ++} ++ ++void syscall(){ ++ ++ // __syscall ++ // Code ++ // uimm15 ++ __builtin_loongarch_syscall(1); // CHECK: call void @llvm.loongarch.syscall ++ ++} ++ ++void break_builtin(){ ++ ++ // __break ++ // Code ++ // uimm15 ++ __builtin_loongarch_break(1); // CHECK: call void @llvm.loongarch.break ++ ++} ++ ++void asrtle_d(){ ++ ++ i64 i64_a, i64_b; ++ // __asrtle_d ++ // rj, rk ++ // long int, long int ++ __builtin_loongarch_asrtle_d(i64_a, i64_b); // CHECK: call void @llvm.loongarch.asrtle.d ++ ++} ++ ++void asrtgt_d(){ ++ ++ i64 i64_a, i64_b; ++ // __asrtgt_d ++ // rj, rk ++ // long int, long int ++ __builtin_loongarch_asrtgt_d(i64_a, i64_b); // CHECK: call void @llvm.loongarch.asrtgt.d ++ ++} ++ ++void dbar(){ ++ ++ // __dbar ++ // hint ++ // uimm15 ++ __builtin_loongarch_dbar(0); // CHECK: call void @llvm.loongarch.dbar ++ ++} ++ ++void ibar(){ ++ ++ // __ibar ++ // hint ++ // uimm15 ++ __builtin_loongarch_ibar(0); // CHECK: call void @llvm.loongarch.ibar ++ ++} ++ ++void movfcsr2gr(){ ++ ++ u32 u32_r; ++ // __movfcsr2gr ++ u32_r=__movfcsr2gr(0); // CHECK: call i32 asm sideeffect "movfcsr2gr $0, $$fcsr0", "=&r"() ++ ++} ++ ++ ++void movgr2fcsr() { ++ ++ u32 u32_a; ++ // __movgr2fcsr ++ __movgr2fcsr(0, u32_a); // CHECK: call void asm sideeffect "movgr2fcsr $$fcsr0, $0", "r"(i32 %0) ++ ++} +diff --git a/clang/test/CodeGen/builtins-loongarch-lasx-error.c b/clang/test/CodeGen/builtins-loongarch-lasx-error.c +new file mode 100644 +index 000000000..99f2687e4 +--- /dev/null ++++ b/clang/test/CodeGen/builtins-loongarch-lasx-error.c +@@ -0,0 +1,266 @@ ++// REQUIRES: loongarch-registered-target ++// RUN: %clang_cc1 -triple loongarch64-unknown-linux-gnu -fsyntax-only %s \ ++// RUN: -target-feature +lasx \ ++// RUN: -verify -o - 2>&1 ++ ++#include ++ ++void test() { ++ v32i8 v32i8_a = (v32i8){0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, ++ 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31}; ++ v32i8 v32i8_b = (v32i8){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, ++ 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32}; ++ v32i8 v32i8_c = (v32i8){2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, ++ 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33}; ++ v32i8 v32i8_r; ++ ++ v16i16 v16i16_a = (v16i16){0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}; ++ v16i16 v16i16_b = (v16i16){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}; ++ v16i16 v16i16_c = (v16i16){2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17}; ++ v16i16 v16i16_r; ++ ++ v8i32 v8i32_a = (v8i32){0, 1, 2, 3, 4, 5, 6, 7}; ++ v8i32 v8i32_b = (v8i32){1, 2, 3, 4, 5, 6, 7, 8}; ++ v8i32 v8i32_c = (v8i32){2, 3, 4, 5, 6, 7, 8, 9}; ++ v8i32 v8i32_r; ++ ++ v4i64 v4i64_a = (v4i64){0, 1, 2, 3}; ++ v4i64 v4i64_b = (v4i64){1, 2, 3, 4}; ++ v4i64 v4i64_c = (v4i64){2, 3, 4, 5}; ++ v4i64 v4i64_r; ++ ++ v32u8 v32u8_a = (v32u8){0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, ++ 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31}; ++ v32u8 v32u8_b = (v32u8){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, ++ 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32}; ++ v32u8 v32u8_c = (v32u8){2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, ++ 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33}; ++ v32u8 v32u8_r; ++ ++ v16u16 v16u16_a = (v16u16){0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}; ++ v16u16 v16u16_b = (v16u16){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}; ++ v16u16 v16u16_c = (v16u16){2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17}; ++ v16u16 v16u16_r; ++ ++ v8u32 v8u32_a = (v8u32){0, 1, 2, 3, 4, 5, 6, 7}; ++ v8u32 v8u32_b = (v8u32){1, 2, 3, 4, 5, 6, 7, 8}; ++ v8u32 v8u32_c = (v8u32){2, 3, 4, 5, 6, 7, 8, 9}; ++ v8u32 v8u32_r; ++ ++ v4u64 v4u64_a = (v4u64){0, 1, 2, 3}; ++ v4u64 v4u64_b = (v4u64){1, 2, 3, 4}; ++ v4u64 v4u64_c = (v4u64){2, 3, 4, 5}; ++ v4u64 v4u64_r; ++ ++ v8f32 v8f32_a = (v8f32){0.5, 1, 2, 3, 4, 5, 6, 7}; ++ v8f32 v8f32_b = (v8f32){1.5, 2, 3, 4, 5, 6, 7, 8}; ++ v8f32 v8f32_c = (v8f32){2.5, 3, 4, 5, 6, 7, 8, 9}; ++ v8f32 v8f32_r; ++ v4f64 v4f64_a = (v4f64){0.5, 1, 2, 3}; ++ v4f64 v4f64_b = (v4f64){1.5, 2, 3, 4}; ++ v4f64 v4f64_c = (v4f64){2.5, 3, 4, 5}; ++ v4f64 v4f64_r; ++ ++ int i32_r; ++ int i32_a = 1; ++ int i32_b = 2; ++ unsigned int u32_r; ++ unsigned int u32_a = 1; ++ unsigned int u32_b = 2; ++ long long i64_r; ++ long long i64_a = 1; ++ long long i64_b = 2; ++ long long i64_c = 3; ++ unsigned long long u64_r; ++ unsigned long long u64_a = 1; ++ unsigned long long u64_b = 2; ++ unsigned long long u64_c = 3; ++ ++ v32i8_r = __lasx_xvslli_b(v32i8_a, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} ++ v16i16_r = __lasx_xvslli_h(v16i16_a, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ v8i32_r = __lasx_xvslli_w(v8i32_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v4i64_r = __lasx_xvslli_d(v4i64_a, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ v32i8_r = __lasx_xvsrai_b(v32i8_a, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} ++ v16i16_r = __lasx_xvsrai_h(v16i16_a, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ v8i32_r = __lasx_xvsrai_w(v8i32_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v4i64_r = __lasx_xvsrai_d(v4i64_a, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ v32i8_r = __lasx_xvsrari_b(v32i8_a, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} ++ v16i16_r = __lasx_xvsrari_h(v16i16_a, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ v8i32_r = __lasx_xvsrari_w(v8i32_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v4i64_r = __lasx_xvsrari_d(v4i64_a, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ v32i8_r = __lasx_xvsrli_b(v32i8_a, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} ++ v16i16_r = __lasx_xvsrli_h(v16i16_a, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ v8i32_r = __lasx_xvsrli_w(v8i32_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v4i64_r = __lasx_xvsrli_d(v4i64_a, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ v32i8_r = __lasx_xvsrlri_b(v32i8_a, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} ++ v16i16_r = __lasx_xvsrlri_h(v16i16_a, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ v8i32_r = __lasx_xvsrlri_w(v8i32_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v4i64_r = __lasx_xvsrlri_d(v4i64_a, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ v32u8_r = __lasx_xvbitclri_b(v32u8_a, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} ++ v16u16_r = __lasx_xvbitclri_h(v16u16_a, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ v8u32_r = __lasx_xvbitclri_w(v8u32_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v4u64_r = __lasx_xvbitclri_d(v4u64_a, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ v32u8_r = __lasx_xvbitseti_b(v32u8_a, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} ++ v16u16_r = __lasx_xvbitseti_h(v16u16_a, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ v8u32_r = __lasx_xvbitseti_w(v8u32_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v4u64_r = __lasx_xvbitseti_d(v4u64_a, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ v32u8_r = __lasx_xvbitrevi_b(v32u8_a, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} ++ v16u16_r = __lasx_xvbitrevi_h(v16u16_a, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ v8u32_r = __lasx_xvbitrevi_w(v8u32_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v4u64_r = __lasx_xvbitrevi_d(v4u64_a, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ v32i8_r = __lasx_xvaddi_bu(v32i8_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v16i16_r = __lasx_xvaddi_hu(v16i16_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v8i32_r = __lasx_xvaddi_wu(v8i32_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v4i64_r = __lasx_xvaddi_du(v4i64_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v32i8_r = __lasx_xvsubi_bu(v32i8_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v16i16_r = __lasx_xvsubi_hu(v16i16_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v8i32_r = __lasx_xvsubi_wu(v8i32_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v4i64_r = __lasx_xvsubi_du(v4i64_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v32i8_r = __lasx_xvmaxi_b(v32i8_a, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} ++ v16i16_r = __lasx_xvmaxi_h(v16i16_a, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} ++ v8i32_r = __lasx_xvmaxi_w(v8i32_a, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} ++ v4i64_r = __lasx_xvmaxi_d(v4i64_a, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} ++ v32u8_r = __lasx_xvmaxi_bu(v32u8_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v16u16_r = __lasx_xvmaxi_hu(v16u16_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v8u32_r = __lasx_xvmaxi_wu(v8u32_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v4u64_r = __lasx_xvmaxi_du(v4u64_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v32i8_r = __lasx_xvmini_b(v32i8_a, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} ++ v16i16_r = __lasx_xvmini_h(v16i16_a, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} ++ v8i32_r = __lasx_xvmini_w(v8i32_a, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} ++ v4i64_r = __lasx_xvmini_d(v4i64_a, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} ++ v32u8_r = __lasx_xvmini_bu(v32u8_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v16u16_r = __lasx_xvmini_hu(v16u16_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v8u32_r = __lasx_xvmini_wu(v8u32_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v4u64_r = __lasx_xvmini_du(v4u64_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v32i8_r = __lasx_xvseqi_b(v32i8_a, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} ++ v16i16_r = __lasx_xvseqi_h(v16i16_a, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} ++ v8i32_r = __lasx_xvseqi_w(v8i32_a, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} ++ v4i64_r = __lasx_xvseqi_d(v4i64_a, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} ++ v32i8_r = __lasx_xvslti_b(v32i8_a, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} ++ v16i16_r = __lasx_xvslti_h(v16i16_a, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} ++ v8i32_r = __lasx_xvslti_w(v8i32_a, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} ++ v4i64_r = __lasx_xvslti_d(v4i64_a, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} ++ v32i8_r = __lasx_xvslti_bu(v32u8_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v16i16_r = __lasx_xvslti_hu(v16u16_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v8i32_r = __lasx_xvslti_wu(v8u32_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v4i64_r = __lasx_xvslti_du(v4u64_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v32i8_r = __lasx_xvslei_b(v32i8_a, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} ++ v16i16_r = __lasx_xvslei_h(v16i16_a, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} ++ v8i32_r = __lasx_xvslei_w(v8i32_a, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} ++ v4i64_r = __lasx_xvslei_d(v4i64_a, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} ++ v32i8_r = __lasx_xvslei_bu(v32u8_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v16i16_r = __lasx_xvslei_hu(v16u16_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v8i32_r = __lasx_xvslei_wu(v8u32_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v4i64_r = __lasx_xvslei_du(v4u64_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v32i8_r = __lasx_xvsat_b(v32i8_a, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} ++ v16i16_r = __lasx_xvsat_h(v16i16_a, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ v8i32_r = __lasx_xvsat_w(v8i32_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v4i64_r = __lasx_xvsat_d(v4i64_a, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ v32u8_r = __lasx_xvsat_bu(v32u8_a, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} ++ v16u16_r = __lasx_xvsat_hu(v16u16_a, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ v8u32_r = __lasx_xvsat_wu(v8u32_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v4u64_r = __lasx_xvsat_du(v4u64_a, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ v32i8_r = __lasx_xvrepl128vei_b(v32i8_a, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ v16i16_r = __lasx_xvrepl128vei_h(v16i16_a, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} ++ v8i32_r = __lasx_xvrepl128vei_w(v8i32_a, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}} ++ v4i64_r = __lasx_xvrepl128vei_d(v4i64_a, 2); // expected-error {{argument value 2 is outside the valid range [0, 1]}} ++ v32u8_r = __lasx_xvandi_b(v32u8_a, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} ++ v32u8_r = __lasx_xvori_b(v32u8_a, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} ++ v32u8_r = __lasx_xvnori_b(v32u8_a, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} ++ v32u8_r = __lasx_xvxori_b(v32u8_a, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} ++ v32u8_r = __lasx_xvbitseli_b(v32u8_a, v32u8_b, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} ++ v32i8_r = __lasx_xvshuf4i_b(v32i8_a, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} ++ v16i16_r = __lasx_xvshuf4i_h(v16i16_a, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} ++ v8i32_r = __lasx_xvshuf4i_w(v8i32_a, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} ++ v8i32_r = __lasx_xvpermi_w(v8i32_a, v8i32_b, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} ++ v16i16_r = __lasx_xvsllwil_h_b(v32i8_a, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} ++ v8i32_r = __lasx_xvsllwil_w_h(v16i16_a, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ v4i64_r = __lasx_xvsllwil_d_w(v8i32_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v16u16_r = __lasx_xvsllwil_hu_bu(v32u8_a, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} ++ v8u32_r = __lasx_xvsllwil_wu_hu(v16u16_a, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ v4u64_r = __lasx_xvsllwil_du_wu(v8u32_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v32i8_r = __lasx_xvfrstpi_b(v32i8_a, v32i8_b, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v16i16_r = __lasx_xvfrstpi_h(v16i16_a, v16i16_b, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v4i64_r = __lasx_xvshuf4i_d(v4i64_a, v4i64_b, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} ++ v32i8_r = __lasx_xvbsrl_v(v32i8_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v32i8_r = __lasx_xvbsll_v(v32i8_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v32i8_r = __lasx_xvextrins_b(v32i8_a, v32i8_b, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} ++ v16i16_r = __lasx_xvextrins_h(v16i16_a, v16i16_b, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} ++ v8i32_r = __lasx_xvextrins_w(v8i32_a, v8i32_b, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} ++ v4i64_r = __lasx_xvextrins_d(v4i64_a, v4i64_b, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} ++ v32i8_r = __lasx_xvld(&v32i8_a, -2049); // expected-error {{argument value -2049 is outside the valid range [-2048, 2047]}} ++ __lasx_xvst(v32i8_a, &v32i8_b, -2049); // expected-error {{argument value -2049 is outside the valid range [-2048, 2047]}} ++ __lasx_xvstelm_b(v32i8_a, &v32i8_b, 0, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ __lasx_xvstelm_h(v16i16_a, &v16i16_b, 0, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ __lasx_xvstelm_w(v8i32_a, &v8i32_b, 0, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} ++ __lasx_xvstelm_d(v4i64_a, &v4i64_b, 0, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}} ++ v8i32_r = __lasx_xvinsve0_w(v8i32_a, v8i32_b, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} ++ v4i64_r = __lasx_xvinsve0_d(v4i64_a, v4i64_b, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}} ++ v8i32_r = __lasx_xvpickve_w(v8i32_b, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} ++ v4i64_r = __lasx_xvpickve_d(v4i64_b, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}} ++ v4i64_r = __lasx_xvldi(-4097); // expected-error {{argument value -4097 is outside the valid range [-4096, 4095]}} ++ v8i32_r = __lasx_xvinsgr2vr_w(v8i32_a, i32_b, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} ++ v4i64_r = __lasx_xvinsgr2vr_d(v4i64_a, i64_b, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}} ++ v32i8_r = __lasx_xvpermi_q(v32i8_a, v32i8_b, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} ++ v4i64_r = __lasx_xvpermi_d(v4i64_a, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} ++ v32i8_r = __lasx_xvldrepl_b(&v32i8_a, -2049); // expected-error {{argument value -2049 is outside the valid range [-2048, 2047]}} ++ v16i16_r = __lasx_xvldrepl_h(&v16i16_a, -1025); // expected-error {{argument value -1025 is outside the valid range [-1024, 1023]}} ++ v8i32_r = __lasx_xvldrepl_w(&v8i32_a, -513); // expected-error {{argument value -513 is outside the valid range [-512, 511]}} ++ v4i64_r = __lasx_xvldrepl_d(&v4i64_a, -257); // expected-error {{argument value -257 is outside the valid range [-256, 255]}} ++ i32_r = __lasx_xvpickve2gr_w(v8i32_a, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} ++ u32_r = __lasx_xvpickve2gr_wu(v8i32_a, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} ++ i64_r = __lasx_xvpickve2gr_d(v4i64_a, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}} ++ u64_r = __lasx_xvpickve2gr_du(v4i64_a, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}} ++ v32i8_r = __lasx_xvrotri_b(v32i8_a, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} ++ v16i16_r = __lasx_xvrotri_h(v16i16_a, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ v8i32_r = __lasx_xvrotri_w(v8i32_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v4i64_r = __lasx_xvrotri_d(v4i64_a, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ v32i8_r = __lasx_xvsrlni_b_h(v32i8_a, v32i8_b, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ v16i16_r = __lasx_xvsrlni_h_w(v16i16_a, v16i16_b, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v8i32_r = __lasx_xvsrlni_w_d(v8i32_a, v8i32_b, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ v4i64_r = __lasx_xvsrlni_d_q(v4i64_a, v4i64_b, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} ++ v32i8_r = __lasx_xvsrlrni_b_h(v32i8_a, v32i8_b, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ v16i16_r = __lasx_xvsrlrni_h_w(v16i16_a, v16i16_b, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v8i32_r = __lasx_xvsrlrni_w_d(v8i32_a, v8i32_b, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ v4i64_r = __lasx_xvsrlrni_d_q(v4i64_a, v4i64_b, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} ++ v32i8_r = __lasx_xvssrlni_b_h(v32i8_a, v32i8_b, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ v16i16_r = __lasx_xvssrlni_h_w(v16i16_a, v16i16_b, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v8i32_r = __lasx_xvssrlni_w_d(v8i32_a, v8i32_b, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ v4i64_r = __lasx_xvssrlni_d_q(v4i64_a, v4i64_b, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} ++ v32u8_r = __lasx_xvssrlni_bu_h(v32u8_a, v32i8_b, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ v16u16_r = __lasx_xvssrlni_hu_w(v16u16_a, v16i16_b, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v8u32_r = __lasx_xvssrlni_wu_d(v8u32_a, v8i32_b, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ v4u64_r = __lasx_xvssrlni_du_q(v4u64_a, v4i64_b, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} ++ v32i8_r = __lasx_xvssrlrni_b_h(v32i8_a, v32i8_b, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ v16i16_r = __lasx_xvssrlrni_h_w(v16i16_a, v16i16_b, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v8i32_r = __lasx_xvssrlrni_w_d(v8i32_a, v8i32_b, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ v4i64_r = __lasx_xvssrlrni_d_q(v4i64_a, v4i64_b, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} ++ v32u8_r = __lasx_xvssrlrni_bu_h(v32u8_a, v32i8_b, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ v16u16_r = __lasx_xvssrlrni_hu_w(v16u16_a, v16i16_b, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v8u32_r = __lasx_xvssrlrni_wu_d(v8u32_a, v8i32_b, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ v4u64_r = __lasx_xvssrlrni_du_q(v4u64_a, v4i64_b, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} ++ v32i8_r = __lasx_xvsrani_b_h(v32i8_a, v32i8_b, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ v16i16_r = __lasx_xvsrani_h_w(v16i16_a, v16i16_b, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v8i32_r = __lasx_xvsrani_w_d(v8i32_a, v8i32_b, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ v4i64_r = __lasx_xvsrani_d_q(v4i64_a, v4i64_b, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} ++ v32i8_r = __lasx_xvsrarni_b_h(v32i8_a, v32i8_b, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ v16i16_r = __lasx_xvsrarni_h_w(v16i16_a, v16i16_b, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v8i32_r = __lasx_xvsrarni_w_d(v8i32_a, v8i32_b, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ v4i64_r = __lasx_xvsrarni_d_q(v4i64_a, v4i64_b, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} ++ v32i8_r = __lasx_xvssrani_b_h(v32i8_a, v32i8_b, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ v16i16_r = __lasx_xvssrani_h_w(v16i16_a, v16i16_b, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v8i32_r = __lasx_xvssrani_w_d(v8i32_a, v8i32_b, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ v4i64_r = __lasx_xvssrani_d_q(v4i64_a, v4i64_b, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} ++ v32u8_r = __lasx_xvssrani_bu_h(v32u8_a, v32i8_b, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ v16u16_r = __lasx_xvssrani_hu_w(v16u16_a, v16i16_b, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v8u32_r = __lasx_xvssrani_wu_d(v8u32_a, v8i32_b, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ v4u64_r = __lasx_xvssrani_du_q(v4u64_a, v4i64_b, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} ++ v32i8_r = __lasx_xvssrarni_b_h(v32i8_a, v32i8_b, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ v16i16_r = __lasx_xvssrarni_h_w(v16i16_a, v16i16_b, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v8i32_r = __lasx_xvssrarni_w_d(v8i32_a, v8i32_b, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ v4i64_r = __lasx_xvssrarni_d_q(v4i64_a, v4i64_b, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} ++ v32u8_r = __lasx_xvssrarni_bu_h(v32u8_a, v32i8_b, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ v16u16_r = __lasx_xvssrarni_hu_w(v16u16_a, v16i16_b, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v8u32_r = __lasx_xvssrarni_wu_d(v8u32_a, v8i32_b, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ v4u64_r = __lasx_xvssrarni_du_q(v4u64_a, v4i64_b, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} ++} +diff --git a/clang/test/CodeGen/builtins-loongarch-lasx.c b/clang/test/CodeGen/builtins-loongarch-lasx.c +new file mode 100644 +index 000000000..b289b1863 +--- /dev/null ++++ b/clang/test/CodeGen/builtins-loongarch-lasx.c +@@ -0,0 +1,3792 @@ ++// REQUIRES: loongarch-registered-target ++// RUN: %clang_cc1 -triple loongarch64-unknown-linux-gnu -emit-llvm %s \ ++// RUN: -target-feature +lasx \ ++// RUN: -o - | FileCheck %s ++ ++#include ++ ++#define ui1_b 1 ++#define ui2 1 ++#define ui2_b ui2 ++#define ui3 4 ++#define ui3_b ui3 ++#define ui4 7 ++#define ui4_b ui4 ++#define ui5 25 ++#define ui5_b ui5 ++#define ui6 44 ++#define ui6_b ui6 ++#define ui7 100 ++#define ui7_b ui7 ++#define ui8 127 //200 ++#define ui8_b ui8 ++#define si5_b -4 ++#define si8 -100 ++#define si9 0 ++#define si10 0 ++#define si11 0 ++#define si12 0 ++#define i10 500 ++#define i13 4000 ++#define mode 0 ++#define idx1 1 ++#define idx2 2 ++#define idx3 4 ++#define idx4 8 ++ ++void test(void) { ++ v32i8 v32i8_a = (v32i8){0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, ++ 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31}; ++ v32i8 v32i8_b = (v32i8){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, ++ 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32}; ++ v32i8 v32i8_c = (v32i8){2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, ++ 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33}; ++ v32i8 v32i8_r; ++ ++ v16i16 v16i16_a = (v16i16){0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}; ++ v16i16 v16i16_b = (v16i16){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}; ++ v16i16 v16i16_c = (v16i16){2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17}; ++ v16i16 v16i16_r; ++ ++ v8i32 v8i32_a = (v8i32){0, 1, 2, 3, 4, 5, 6, 7}; ++ v8i32 v8i32_b = (v8i32){1, 2, 3, 4, 5, 6, 7, 8}; ++ v8i32 v8i32_c = (v8i32){2, 3, 4, 5, 6, 7, 8, 9}; ++ v8i32 v8i32_r; ++ ++ v4i64 v4i64_a = (v4i64){0, 1, 2, 3}; ++ v4i64 v4i64_b = (v4i64){1, 2, 3, 4}; ++ v4i64 v4i64_c = (v4i64){2, 3, 4, 5}; ++ v4i64 v4i64_r; ++ ++ v32u8 v32u8_a = (v32u8){0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, ++ 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31}; ++ v32u8 v32u8_b = (v32u8){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, ++ 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32}; ++ v32u8 v32u8_c = (v32u8){2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, ++ 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33}; ++ v32u8 v32u8_r; ++ ++ v16u16 v16u16_a = (v16u16){0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}; ++ v16u16 v16u16_b = (v16u16){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}; ++ v16u16 v16u16_c = (v16u16){2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17}; ++ v16u16 v16u16_r; ++ ++ v8u32 v8u32_a = (v8u32){0, 1, 2, 3, 4, 5, 6, 7}; ++ v8u32 v8u32_b = (v8u32){1, 2, 3, 4, 5, 6, 7, 8}; ++ v8u32 v8u32_c = (v8u32){2, 3, 4, 5, 6, 7, 8, 9}; ++ v8u32 v8u32_r; ++ ++ v4u64 v4u64_a = (v4u64){0, 1, 2, 3}; ++ v4u64 v4u64_b = (v4u64){1, 2, 3, 4}; ++ v4u64 v4u64_c = (v4u64){2, 3, 4, 5}; ++ v4u64 v4u64_r; ++ ++ v8f32 v8f32_a = (v8f32){0.5, 1, 2, 3, 4, 5, 6, 7}; ++ v8f32 v8f32_b = (v8f32){1.5, 2, 3, 4, 5, 6, 7, 8}; ++ v8f32 v8f32_c = (v8f32){2.5, 3, 4, 5, 6, 7, 8, 9}; ++ v8f32 v8f32_r; ++ v4f64 v4f64_a = (v4f64){0.5, 1, 2, 3}; ++ v4f64 v4f64_b = (v4f64){1.5, 2, 3, 4}; ++ v4f64 v4f64_c = (v4f64){2.5, 3, 4, 5}; ++ v4f64 v4f64_r; ++ ++ int i32_r; ++ int i32_a = 1; ++ int i32_b = 2; ++ unsigned int u32_r; ++ unsigned int u32_a = 1; ++ unsigned int u32_b = 2; ++ long long i64_r; ++ long long i64_a = 1; ++ long long i64_b = 2; ++ long long i64_c = 3; ++ long int i64_d = 0; ++ unsigned long long u64_r; ++ unsigned long long u64_a = 1; ++ unsigned long long u64_b = 2; ++ unsigned long long u64_c = 3; ++ ++ // __lasx_xvsll_b ++ // xd, xj, xk ++ // V32QI, V32QI, V32QI ++ v32i8_r = __lasx_xvsll_b(v32i8_a, v32i8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvsll.b( ++ ++ // __lasx_xvsll_h ++ // xd, xj, xk ++ // V16HI, V16HI, V16HI ++ v16i16_r = __lasx_xvsll_h(v16i16_a, v16i16_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvsll.h( ++ ++ // __lasx_xvsll_w ++ // xd, xj, xk ++ // V8SI, V8SI, V8SI ++ v8i32_r = __lasx_xvsll_w(v8i32_a, v8i32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvsll.w( ++ ++ // __lasx_xvsll_d ++ // xd, xj, xk ++ // V4DI, V4DI, V4DI ++ v4i64_r = __lasx_xvsll_d(v4i64_a, v4i64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvsll.d( ++ ++ // __lasx_xvslli_b ++ // xd, xj, ui3 ++ // V32QI, V32QI, UQI ++ v32i8_r = __lasx_xvslli_b(v32i8_a, ui3_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvslli.b( ++ ++ // __lasx_xvslli_h ++ // xd, xj, ui4 ++ // V16HI, V16HI, UQI ++ v16i16_r = __lasx_xvslli_h(v16i16_a, ui4_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvslli.h( ++ ++ // __lasx_xvslli_w ++ // xd, xj, ui5 ++ // V8SI, V8SI, UQI ++ v8i32_r = __lasx_xvslli_w(v8i32_a, ui5_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvslli.w( ++ ++ // __lasx_xvslli_d ++ // xd, xj, ui6 ++ // V4DI, V4DI, UQI ++ v4i64_r = __lasx_xvslli_d(v4i64_a, ui6_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvslli.d( ++ ++ // __lasx_xvsra_b ++ // xd, xj, xk ++ // V32QI, V32QI, V32QI ++ v32i8_r = __lasx_xvsra_b(v32i8_a, v32i8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvsra.b( ++ ++ // __lasx_xvsra_h ++ // xd, xj, xk ++ // V16HI, V16HI, V16HI ++ v16i16_r = __lasx_xvsra_h(v16i16_a, v16i16_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvsra.h( ++ ++ // __lasx_xvsra_w ++ // xd, xj, xk ++ // V8SI, V8SI, V8SI ++ v8i32_r = __lasx_xvsra_w(v8i32_a, v8i32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvsra.w( ++ ++ // __lasx_xvsra_d ++ // xd, xj, xk ++ // V4DI, V4DI, V4DI ++ v4i64_r = __lasx_xvsra_d(v4i64_a, v4i64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvsra.d( ++ ++ // __lasx_xvsrai_b ++ // xd, xj, ui3 ++ // V32QI, V32QI, UQI ++ v32i8_r = __lasx_xvsrai_b(v32i8_a, ui3_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvsrai.b( ++ ++ // __lasx_xvsrai_h ++ // xd, xj, ui4 ++ // V16HI, V16HI, UQI ++ v16i16_r = __lasx_xvsrai_h(v16i16_a, ui4_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvsrai.h( ++ ++ // __lasx_xvsrai_w ++ // xd, xj, ui5 ++ // V8SI, V8SI, UQI ++ v8i32_r = __lasx_xvsrai_w(v8i32_a, ui5_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvsrai.w( ++ ++ // __lasx_xvsrai_d ++ // xd, xj, ui6 ++ // V4DI, V4DI, UQI ++ v4i64_r = __lasx_xvsrai_d(v4i64_a, ui6_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvsrai.d( ++ ++ // __lasx_xvsrar_b ++ // xd, xj, xk ++ // V32QI, V32QI, V32QI ++ v32i8_r = __lasx_xvsrar_b(v32i8_a, v32i8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvsrar.b( ++ ++ // __lasx_xvsrar_h ++ // xd, xj, xk ++ // V16HI, V16HI, V16HI ++ v16i16_r = __lasx_xvsrar_h(v16i16_a, v16i16_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvsrar.h( ++ ++ // __lasx_xvsrar_w ++ // xd, xj, xk ++ // V8SI, V8SI, V8SI ++ v8i32_r = __lasx_xvsrar_w(v8i32_a, v8i32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvsrar.w( ++ ++ // __lasx_xvsrar_d ++ // xd, xj, xk ++ // V4DI, V4DI, V4DI ++ v4i64_r = __lasx_xvsrar_d(v4i64_a, v4i64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvsrar.d( ++ ++ // __lasx_xvsrari_b ++ // xd, xj, ui3 ++ // V32QI, V32QI, UQI ++ v32i8_r = __lasx_xvsrari_b(v32i8_a, ui3_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvsrari.b( ++ ++ // __lasx_xvsrari_h ++ // xd, xj, ui4 ++ // V16HI, V16HI, UQI ++ v16i16_r = __lasx_xvsrari_h(v16i16_a, ui4_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvsrari.h( ++ ++ // __lasx_xvsrari_w ++ // xd, xj, ui5 ++ // V8SI, V8SI, UQI ++ v8i32_r = __lasx_xvsrari_w(v8i32_a, ui5_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvsrari.w( ++ ++ // __lasx_xvsrari_d ++ // xd, xj, ui6 ++ // V4DI, V4DI, UQI ++ v4i64_r = __lasx_xvsrari_d(v4i64_a, ui6_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvsrari.d( ++ ++ // __lasx_xvsrl_b ++ // xd, xj, xk ++ // V32QI, V32QI, V32QI ++ v32i8_r = __lasx_xvsrl_b(v32i8_a, v32i8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvsrl.b( ++ ++ // __lasx_xvsrl_h ++ // xd, xj, xk ++ // V16HI, V16HI, V16HI ++ v16i16_r = __lasx_xvsrl_h(v16i16_a, v16i16_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvsrl.h( ++ ++ // __lasx_xvsrl_w ++ // xd, xj, xk ++ // V8SI, V8SI, V8SI ++ v8i32_r = __lasx_xvsrl_w(v8i32_a, v8i32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvsrl.w( ++ ++ // __lasx_xvsrl_d ++ // xd, xj, xk ++ // V4DI, V4DI, V4DI ++ v4i64_r = __lasx_xvsrl_d(v4i64_a, v4i64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvsrl.d( ++ ++ // __lasx_xvsrli_b ++ // xd, xj, ui3 ++ // V32QI, V32QI, UQI ++ v32i8_r = __lasx_xvsrli_b(v32i8_a, ui3_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvsrli.b( ++ ++ // __lasx_xvsrli_h ++ // xd, xj, ui4 ++ // V16HI, V16HI, UQI ++ v16i16_r = __lasx_xvsrli_h(v16i16_a, ui4_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvsrli.h( ++ ++ // __lasx_xvsrli_w ++ // xd, xj, ui5 ++ // V8SI, V8SI, UQI ++ v8i32_r = __lasx_xvsrli_w(v8i32_a, ui5_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvsrli.w( ++ ++ // __lasx_xvsrli_d ++ // xd, xj, ui6 ++ // V4DI, V4DI, UQI ++ v4i64_r = __lasx_xvsrli_d(v4i64_a, ui6_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvsrli.d( ++ ++ // __lasx_xvsrlr_b ++ // xd, xj, xk ++ // V32QI, V32QI, V32QI ++ v32i8_r = __lasx_xvsrlr_b(v32i8_a, v32i8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvsrlr.b( ++ ++ // __lasx_xvsrlr_h ++ // xd, xj, xk ++ // V16HI, V16HI, V16HI ++ v16i16_r = __lasx_xvsrlr_h(v16i16_a, v16i16_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvsrlr.h( ++ ++ // __lasx_xvsrlr_w ++ // xd, xj, xk ++ // V8SI, V8SI, V8SI ++ v8i32_r = __lasx_xvsrlr_w(v8i32_a, v8i32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvsrlr.w( ++ ++ // __lasx_xvsrlr_d ++ // xd, xj, xk ++ // V4DI, V4DI, V4DI ++ v4i64_r = __lasx_xvsrlr_d(v4i64_a, v4i64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvsrlr.d( ++ ++ // __lasx_xvsrlri_b ++ // xd, xj, ui3 ++ // V32QI, V32QI, UQI ++ v32i8_r = __lasx_xvsrlri_b(v32i8_a, ui3_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvsrlri.b( ++ ++ // __lasx_xvsrlri_h ++ // xd, xj, ui4 ++ // V16HI, V16HI, UQI ++ v16i16_r = __lasx_xvsrlri_h(v16i16_a, ui4_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvsrlri.h( ++ ++ // __lasx_xvsrlri_w ++ // xd, xj, ui5 ++ // V8SI, V8SI, UQI ++ v8i32_r = __lasx_xvsrlri_w(v8i32_a, ui5_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvsrlri.w( ++ ++ // __lasx_xvsrlri_d ++ // xd, xj, ui6 ++ // V4DI, V4DI, UQI ++ v4i64_r = __lasx_xvsrlri_d(v4i64_a, ui6_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvsrlri.d( ++ ++ // __lasx_xvbitclr_b ++ // xd, xj, xk ++ // UV32QI, UV32QI, UV32QI ++ v32u8_r = __lasx_xvbitclr_b(v32u8_a, v32u8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvbitclr.b( ++ ++ // __lasx_xvbitclr_h ++ // xd, xj, xk ++ // UV16HI, UV16HI, UV16HI ++ v16u16_r = __lasx_xvbitclr_h(v16u16_a, v16u16_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvbitclr.h( ++ ++ // __lasx_xvbitclr_w ++ // xd, xj, xk ++ // UV8SI, UV8SI, UV8SI ++ v8u32_r = __lasx_xvbitclr_w(v8u32_a, v8u32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvbitclr.w( ++ ++ // __lasx_xvbitclr_d ++ // xd, xj, xk ++ // UV4DI, UV4DI, UV4DI ++ v4u64_r = __lasx_xvbitclr_d(v4u64_a, v4u64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvbitclr.d( ++ ++ // __lasx_xvbitclri_b ++ // xd, xj, ui3 ++ // UV32QI, UV32QI, UQI ++ v32u8_r = __lasx_xvbitclri_b(v32u8_a, ui3_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvbitclri.b( ++ ++ // __lasx_xvbitclri_h ++ // xd, xj, ui4 ++ // UV16HI, UV16HI, UQI ++ v16u16_r = __lasx_xvbitclri_h(v16u16_a, ui4_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvbitclri.h( ++ ++ // __lasx_xvbitclri_w ++ // xd, xj, ui5 ++ // UV8SI, UV8SI, UQI ++ v8u32_r = __lasx_xvbitclri_w(v8u32_a, ui5_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvbitclri.w( ++ ++ // __lasx_xvbitclri_d ++ // xd, xj, ui6 ++ // UV4DI, UV4DI, UQI ++ v4u64_r = __lasx_xvbitclri_d(v4u64_a, ui6_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvbitclri.d( ++ ++ // __lasx_xvbitset_b ++ // xd, xj, xk ++ // UV32QI, UV32QI, UV32QI ++ v32u8_r = __lasx_xvbitset_b(v32u8_a, v32u8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvbitset.b( ++ ++ // __lasx_xvbitset_h ++ // xd, xj, xk ++ // UV16HI, UV16HI, UV16HI ++ v16u16_r = __lasx_xvbitset_h(v16u16_a, v16u16_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvbitset.h( ++ ++ // __lasx_xvbitset_w ++ // xd, xj, xk ++ // UV8SI, UV8SI, UV8SI ++ v8u32_r = __lasx_xvbitset_w(v8u32_a, v8u32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvbitset.w( ++ ++ // __lasx_xvbitset_d ++ // xd, xj, xk ++ // UV4DI, UV4DI, UV4DI ++ v4u64_r = __lasx_xvbitset_d(v4u64_a, v4u64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvbitset.d( ++ ++ // __lasx_xvbitseti_b ++ // xd, xj, ui3 ++ // UV32QI, UV32QI, UQI ++ v32u8_r = __lasx_xvbitseti_b(v32u8_a, ui3_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvbitseti.b( ++ ++ // __lasx_xvbitseti_h ++ // xd, xj, ui4 ++ // UV16HI, UV16HI, UQI ++ v16u16_r = __lasx_xvbitseti_h(v16u16_a, ui4_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvbitseti.h( ++ ++ // __lasx_xvbitseti_w ++ // xd, xj, ui5 ++ // UV8SI, UV8SI, UQI ++ v8u32_r = __lasx_xvbitseti_w(v8u32_a, ui5_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvbitseti.w( ++ ++ // __lasx_xvbitseti_d ++ // xd, xj, ui6 ++ // UV4DI, UV4DI, UQI ++ v4u64_r = __lasx_xvbitseti_d(v4u64_a, ui6_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvbitseti.d( ++ ++ // __lasx_xvbitrev_b ++ // xd, xj, xk ++ // UV32QI, UV32QI, UV32QI ++ v32u8_r = __lasx_xvbitrev_b(v32u8_a, v32u8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvbitrev.b( ++ ++ // __lasx_xvbitrev_h ++ // xd, xj, xk ++ // UV16HI, UV16HI, UV16HI ++ v16u16_r = __lasx_xvbitrev_h(v16u16_a, v16u16_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvbitrev.h( ++ ++ // __lasx_xvbitrev_w ++ // xd, xj, xk ++ // UV8SI, UV8SI, UV8SI ++ v8u32_r = __lasx_xvbitrev_w(v8u32_a, v8u32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvbitrev.w( ++ ++ // __lasx_xvbitrev_d ++ // xd, xj, xk ++ // UV4DI, UV4DI, UV4DI ++ v4u64_r = __lasx_xvbitrev_d(v4u64_a, v4u64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvbitrev.d( ++ ++ // __lasx_xvbitrevi_b ++ // xd, xj, ui3 ++ // UV32QI, UV32QI, UQI ++ v32u8_r = __lasx_xvbitrevi_b(v32u8_a, ui3_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvbitrevi.b( ++ ++ // __lasx_xvbitrevi_h ++ // xd, xj, ui4 ++ // UV16HI, UV16HI, UQI ++ v16u16_r = __lasx_xvbitrevi_h(v16u16_a, ui4_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvbitrevi.h( ++ ++ // __lasx_xvbitrevi_w ++ // xd, xj, ui5 ++ // UV8SI, UV8SI, UQI ++ v8u32_r = __lasx_xvbitrevi_w(v8u32_a, ui5_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvbitrevi.w( ++ ++ // __lasx_xvbitrevi_d ++ // xd, xj, ui6 ++ // UV4DI, UV4DI, UQI ++ v4u64_r = __lasx_xvbitrevi_d(v4u64_a, ui6_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvbitrevi.d( ++ ++ // __lasx_xvadd_b ++ // xd, xj, xk ++ // V32QI, V32QI, V32QI ++ v32i8_r = __lasx_xvadd_b(v32i8_a, v32i8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvadd.b( ++ ++ // __lasx_xvadd_h ++ // xd, xj, xk ++ // V16HI, V16HI, V16HI ++ v16i16_r = __lasx_xvadd_h(v16i16_a, v16i16_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvadd.h( ++ ++ // __lasx_xvadd_w ++ // xd, xj, xk ++ // V8SI, V8SI, V8SI ++ v8i32_r = __lasx_xvadd_w(v8i32_a, v8i32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvadd.w( ++ ++ // __lasx_xvadd_d ++ // xd, xj, xk ++ // V4DI, V4DI, V4DI ++ v4i64_r = __lasx_xvadd_d(v4i64_a, v4i64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvadd.d( ++ ++ // __lasx_xvaddi_bu ++ // xd, xj, ui5 ++ // V32QI, V32QI, UQI ++ v32i8_r = __lasx_xvaddi_bu(v32i8_a, ui5_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvaddi.bu( ++ ++ // __lasx_xvaddi_hu ++ // xd, xj, ui5 ++ // V16HI, V16HI, UQI ++ v16i16_r = __lasx_xvaddi_hu(v16i16_a, ui5_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvaddi.hu( ++ ++ // __lasx_xvaddi_wu ++ // xd, xj, ui5 ++ // V8SI, V8SI, UQI ++ v8i32_r = __lasx_xvaddi_wu(v8i32_a, ui5_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvaddi.wu( ++ ++ // __lasx_xvaddi_du ++ // xd, xj, ui5 ++ // V4DI, V4DI, UQI ++ v4i64_r = __lasx_xvaddi_du(v4i64_a, ui5_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvaddi.du( ++ ++ // __lasx_xvsub_b ++ // xd, xj, xk ++ // V32QI, V32QI, V32QI ++ v32i8_r = __lasx_xvsub_b(v32i8_a, v32i8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvsub.b( ++ ++ // __lasx_xvsub_h ++ // xd, xj, xk ++ // V16HI, V16HI, V16HI ++ v16i16_r = __lasx_xvsub_h(v16i16_a, v16i16_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvsub.h( ++ ++ // __lasx_xvsub_w ++ // xd, xj, xk ++ // V8SI, V8SI, V8SI ++ v8i32_r = __lasx_xvsub_w(v8i32_a, v8i32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvsub.w( ++ ++ // __lasx_xvsub_d ++ // xd, xj, xk ++ // V4DI, V4DI, V4DI ++ v4i64_r = __lasx_xvsub_d(v4i64_a, v4i64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvsub.d( ++ ++ // __lasx_xvsubi_bu ++ // xd, xj, ui5 ++ // V32QI, V32QI, UQI ++ v32i8_r = __lasx_xvsubi_bu(v32i8_a, ui5_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvsubi.bu( ++ ++ // __lasx_xvsubi_hu ++ // xd, xj, ui5 ++ // V16HI, V16HI, UQI ++ v16i16_r = __lasx_xvsubi_hu(v16i16_a, ui5_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvsubi.hu( ++ ++ // __lasx_xvsubi_wu ++ // xd, xj, ui5 ++ // V8SI, V8SI, UQI ++ v8i32_r = __lasx_xvsubi_wu(v8i32_a, ui5_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvsubi.wu( ++ ++ // __lasx_xvsubi_du ++ // xd, xj, ui5 ++ // V4DI, V4DI, UQI ++ v4i64_r = __lasx_xvsubi_du(v4i64_a, ui5_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvsubi.du( ++ ++ // __lasx_xvmax_b ++ // xd, xj, xk ++ // V32QI, V32QI, V32QI ++ v32i8_r = __lasx_xvmax_b(v32i8_a, v32i8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvmax.b( ++ ++ // __lasx_xvmax_h ++ // xd, xj, xk ++ // V16HI, V16HI, V16HI ++ v16i16_r = __lasx_xvmax_h(v16i16_a, v16i16_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvmax.h( ++ ++ // __lasx_xvmax_w ++ // xd, xj, xk ++ // V8SI, V8SI, V8SI ++ v8i32_r = __lasx_xvmax_w(v8i32_a, v8i32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvmax.w( ++ ++ // __lasx_xvmax_d ++ // xd, xj, xk ++ // V4DI, V4DI, V4DI ++ v4i64_r = __lasx_xvmax_d(v4i64_a, v4i64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvmax.d( ++ ++ // __lasx_xvmaxi_b ++ // xd, xj, si5 ++ // V32QI, V32QI, QI ++ v32i8_r = __lasx_xvmaxi_b(v32i8_a, si5_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvmaxi.b( ++ ++ // __lasx_xvmaxi_h ++ // xd, xj, si5 ++ // V16HI, V16HI, QI ++ v16i16_r = __lasx_xvmaxi_h(v16i16_a, si5_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvmaxi.h( ++ ++ // __lasx_xvmaxi_w ++ // xd, xj, si5 ++ // V8SI, V8SI, QI ++ v8i32_r = __lasx_xvmaxi_w(v8i32_a, si5_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvmaxi.w( ++ ++ // __lasx_xvmaxi_d ++ // xd, xj, si5 ++ // V4DI, V4DI, QI ++ v4i64_r = __lasx_xvmaxi_d(v4i64_a, si5_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvmaxi.d( ++ ++ // __lasx_xvmax_bu ++ // xd, xj, xk ++ // UV32QI, UV32QI, UV32QI ++ v32u8_r = __lasx_xvmax_bu(v32u8_a, v32u8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvmax.bu( ++ ++ // __lasx_xvmax_hu ++ // xd, xj, xk ++ // UV16HI, UV16HI, UV16HI ++ v16u16_r = __lasx_xvmax_hu(v16u16_a, v16u16_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvmax.hu( ++ ++ // __lasx_xvmax_wu ++ // xd, xj, xk ++ // UV8SI, UV8SI, UV8SI ++ v8u32_r = __lasx_xvmax_wu(v8u32_a, v8u32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvmax.wu( ++ ++ // __lasx_xvmax_du ++ // xd, xj, xk ++ // UV4DI, UV4DI, UV4DI ++ v4u64_r = __lasx_xvmax_du(v4u64_a, v4u64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvmax.du( ++ ++ // __lasx_xvmaxi_bu ++ // xd, xj, ui5 ++ // UV32QI, UV32QI, UQI ++ v32u8_r = __lasx_xvmaxi_bu(v32u8_a, ui5_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvmaxi.bu( ++ ++ // __lasx_xvmaxi_hu ++ // xd, xj, ui5 ++ // UV16HI, UV16HI, UQI ++ v16u16_r = __lasx_xvmaxi_hu(v16u16_a, ui5_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvmaxi.hu( ++ ++ // __lasx_xvmaxi_wu ++ // xd, xj, ui5 ++ // UV8SI, UV8SI, UQI ++ v8u32_r = __lasx_xvmaxi_wu(v8u32_a, ui5_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvmaxi.wu( ++ ++ // __lasx_xvmaxi_du ++ // xd, xj, ui5 ++ // UV4DI, UV4DI, UQI ++ v4u64_r = __lasx_xvmaxi_du(v4u64_a, ui5_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvmaxi.du( ++ ++ // __lasx_xvmin_b ++ // xd, xj, xk ++ // V32QI, V32QI, V32QI ++ v32i8_r = __lasx_xvmin_b(v32i8_a, v32i8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvmin.b( ++ ++ // __lasx_xvmin_h ++ // xd, xj, xk ++ // V16HI, V16HI, V16HI ++ v16i16_r = __lasx_xvmin_h(v16i16_a, v16i16_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvmin.h( ++ ++ // __lasx_xvmin_w ++ // xd, xj, xk ++ // V8SI, V8SI, V8SI ++ v8i32_r = __lasx_xvmin_w(v8i32_a, v8i32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvmin.w( ++ ++ // __lasx_xvmin_d ++ // xd, xj, xk ++ // V4DI, V4DI, V4DI ++ v4i64_r = __lasx_xvmin_d(v4i64_a, v4i64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvmin.d( ++ ++ // __lasx_xvmini_b ++ // xd, xj, si5 ++ // V32QI, V32QI, QI ++ v32i8_r = __lasx_xvmini_b(v32i8_a, si5_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvmini.b( ++ ++ // __lasx_xvmini_h ++ // xd, xj, si5 ++ // V16HI, V16HI, QI ++ v16i16_r = __lasx_xvmini_h(v16i16_a, si5_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvmini.h( ++ ++ // __lasx_xvmini_w ++ // xd, xj, si5 ++ // V8SI, V8SI, QI ++ v8i32_r = __lasx_xvmini_w(v8i32_a, si5_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvmini.w( ++ ++ // __lasx_xvmini_d ++ // xd, xj, si5 ++ // V4DI, V4DI, QI ++ v4i64_r = __lasx_xvmini_d(v4i64_a, si5_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvmini.d( ++ ++ // __lasx_xvmin_bu ++ // xd, xj, xk ++ // UV32QI, UV32QI, UV32QI ++ v32u8_r = __lasx_xvmin_bu(v32u8_a, v32u8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvmin.bu( ++ ++ // __lasx_xvmin_hu ++ // xd, xj, xk ++ // UV16HI, UV16HI, UV16HI ++ v16u16_r = __lasx_xvmin_hu(v16u16_a, v16u16_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvmin.hu( ++ ++ // __lasx_xvmin_wu ++ // xd, xj, xk ++ // UV8SI, UV8SI, UV8SI ++ v8u32_r = __lasx_xvmin_wu(v8u32_a, v8u32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvmin.wu( ++ ++ // __lasx_xvmin_du ++ // xd, xj, xk ++ // UV4DI, UV4DI, UV4DI ++ v4u64_r = __lasx_xvmin_du(v4u64_a, v4u64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvmin.du( ++ ++ // __lasx_xvmini_bu ++ // xd, xj, ui5 ++ // UV32QI, UV32QI, UQI ++ v32u8_r = __lasx_xvmini_bu(v32u8_a, ui5_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvmini.bu( ++ ++ // __lasx_xvmini_hu ++ // xd, xj, ui5 ++ // UV16HI, UV16HI, UQI ++ v16u16_r = __lasx_xvmini_hu(v16u16_a, ui5_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvmini.hu( ++ ++ // __lasx_xvmini_wu ++ // xd, xj, ui5 ++ // UV8SI, UV8SI, UQI ++ v8u32_r = __lasx_xvmini_wu(v8u32_a, ui5_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvmini.wu( ++ ++ // __lasx_xvmini_du ++ // xd, xj, ui5 ++ // UV4DI, UV4DI, UQI ++ v4u64_r = __lasx_xvmini_du(v4u64_a, ui5_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvmini.du( ++ ++ // __lasx_xvseq_b ++ // xd, xj, xk ++ // V32QI, V32QI, V32QI ++ v32i8_r = __lasx_xvseq_b(v32i8_a, v32i8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvseq.b( ++ ++ // __lasx_xvseq_h ++ // xd, xj, xk ++ // V16HI, V16HI, V16HI ++ v16i16_r = __lasx_xvseq_h(v16i16_a, v16i16_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvseq.h( ++ ++ // __lasx_xvseq_w ++ // xd, xj, xk ++ // V8SI, V8SI, V8SI ++ v8i32_r = __lasx_xvseq_w(v8i32_a, v8i32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvseq.w( ++ ++ // __lasx_xvseq_d ++ // xd, xj, xk ++ // V4DI, V4DI, V4DI ++ v4i64_r = __lasx_xvseq_d(v4i64_a, v4i64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvseq.d( ++ ++ // __lasx_xvseqi_b ++ // xd, xj, si5 ++ // V32QI, V32QI, QI ++ v32i8_r = __lasx_xvseqi_b(v32i8_a, si5_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvseqi.b( ++ ++ // __lasx_xvseqi_h ++ // xd, xj, si5 ++ // V16HI, V16HI, QI ++ v16i16_r = __lasx_xvseqi_h(v16i16_a, si5_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvseqi.h( ++ ++ // __lasx_xvseqi_w ++ // xd, xj, si5 ++ // V8SI, V8SI, QI ++ v8i32_r = __lasx_xvseqi_w(v8i32_a, si5_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvseqi.w( ++ ++ // __lasx_xvseqi_d ++ // xd, xj, si5 ++ // V4DI, V4DI, QI ++ v4i64_r = __lasx_xvseqi_d(v4i64_a, si5_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvseqi.d( ++ ++ // __lasx_xvslt_b ++ // xd, xj, xk ++ // V32QI, V32QI, V32QI ++ v32i8_r = __lasx_xvslt_b(v32i8_a, v32i8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvslt.b( ++ ++ // __lasx_xvslt_h ++ // xd, xj, xk ++ // V16HI, V16HI, V16HI ++ v16i16_r = __lasx_xvslt_h(v16i16_a, v16i16_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvslt.h( ++ ++ // __lasx_xvslt_w ++ // xd, xj, xk ++ // V8SI, V8SI, V8SI ++ v8i32_r = __lasx_xvslt_w(v8i32_a, v8i32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvslt.w( ++ ++ // __lasx_xvslt_d ++ // xd, xj, xk ++ // V4DI, V4DI, V4DI ++ v4i64_r = __lasx_xvslt_d(v4i64_a, v4i64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvslt.d( ++ ++ // __lasx_xvslti_b ++ // xd, xj, si5 ++ // V32QI, V32QI, QI ++ v32i8_r = __lasx_xvslti_b(v32i8_a, si5_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvslti.b( ++ ++ // __lasx_xvslti_h ++ // xd, xj, si5 ++ // V16HI, V16HI, QI ++ v16i16_r = __lasx_xvslti_h(v16i16_a, si5_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvslti.h( ++ ++ // __lasx_xvslti_w ++ // xd, xj, si5 ++ // V8SI, V8SI, QI ++ v8i32_r = __lasx_xvslti_w(v8i32_a, si5_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvslti.w( ++ ++ // __lasx_xvslti_d ++ // xd, xj, si5 ++ // V4DI, V4DI, QI ++ v4i64_r = __lasx_xvslti_d(v4i64_a, si5_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvslti.d( ++ ++ // __lasx_xvslt_bu ++ // xd, xj, xk ++ // V32QI, UV32QI, UV32QI ++ v32i8_r = __lasx_xvslt_bu(v32u8_a, v32u8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvslt.bu( ++ ++ // __lasx_xvslt_hu ++ // xd, xj, xk ++ // V16HI, UV16HI, UV16HI ++ v16i16_r = __lasx_xvslt_hu(v16u16_a, v16u16_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvslt.hu( ++ ++ // __lasx_xvslt_wu ++ // xd, xj, xk ++ // V8SI, UV8SI, UV8SI ++ v8i32_r = __lasx_xvslt_wu(v8u32_a, v8u32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvslt.wu( ++ ++ // __lasx_xvslt_du ++ // xd, xj, xk ++ // V4DI, UV4DI, UV4DI ++ v4i64_r = __lasx_xvslt_du(v4u64_a, v4u64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvslt.du( ++ ++ // __lasx_xvslti_bu ++ // xd, xj, ui5 ++ // V32QI, UV32QI, UQI ++ v32i8_r = __lasx_xvslti_bu(v32u8_a, ui5_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvslti.bu( ++ ++ // __lasx_xvslti_hu ++ // xd, xj, ui5 ++ // V16HI, UV16HI, UQI ++ v16i16_r = __lasx_xvslti_hu(v16u16_a, ui5_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvslti.hu( ++ ++ // __lasx_xvslti_wu ++ // xd, xj, ui5 ++ // V8SI, UV8SI, UQI ++ v8i32_r = __lasx_xvslti_wu(v8u32_a, ui5_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvslti.wu( ++ ++ // __lasx_xvslti_du ++ // xd, xj, ui5 ++ // V4DI, UV4DI, UQI ++ v4i64_r = __lasx_xvslti_du(v4u64_a, ui5_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvslti.du( ++ ++ // __lasx_xvsle_b ++ // xd, xj, xk ++ // V32QI, V32QI, V32QI ++ v32i8_r = __lasx_xvsle_b(v32i8_a, v32i8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvsle.b( ++ ++ // __lasx_xvsle_h ++ // xd, xj, xk ++ // V16HI, V16HI, V16HI ++ v16i16_r = __lasx_xvsle_h(v16i16_a, v16i16_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvsle.h( ++ ++ // __lasx_xvsle_w ++ // xd, xj, xk ++ // V8SI, V8SI, V8SI ++ v8i32_r = __lasx_xvsle_w(v8i32_a, v8i32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvsle.w( ++ ++ // __lasx_xvsle_d ++ // xd, xj, xk ++ // V4DI, V4DI, V4DI ++ v4i64_r = __lasx_xvsle_d(v4i64_a, v4i64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvsle.d( ++ ++ // __lasx_xvslei_b ++ // xd, xj, si5 ++ // V32QI, V32QI, QI ++ v32i8_r = __lasx_xvslei_b(v32i8_a, si5_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvslei.b( ++ ++ // __lasx_xvslei_h ++ // xd, xj, si5 ++ // V16HI, V16HI, QI ++ v16i16_r = __lasx_xvslei_h(v16i16_a, si5_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvslei.h( ++ ++ // __lasx_xvslei_w ++ // xd, xj, si5 ++ // V8SI, V8SI, QI ++ v8i32_r = __lasx_xvslei_w(v8i32_a, si5_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvslei.w( ++ ++ // __lasx_xvslei_d ++ // xd, xj, si5 ++ // V4DI, V4DI, QI ++ v4i64_r = __lasx_xvslei_d(v4i64_a, si5_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvslei.d( ++ ++ // __lasx_xvsle_bu ++ // xd, xj, xk ++ // V32QI, UV32QI, UV32QI ++ v32i8_r = __lasx_xvsle_bu(v32u8_a, v32u8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvsle.bu( ++ ++ // __lasx_xvsle_hu ++ // xd, xj, xk ++ // V16HI, UV16HI, UV16HI ++ v16i16_r = __lasx_xvsle_hu(v16u16_a, v16u16_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvsle.hu( ++ ++ // __lasx_xvsle_wu ++ // xd, xj, xk ++ // V8SI, UV8SI, UV8SI ++ v8i32_r = __lasx_xvsle_wu(v8u32_a, v8u32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvsle.wu( ++ ++ // __lasx_xvsle_du ++ // xd, xj, xk ++ // V4DI, UV4DI, UV4DI ++ v4i64_r = __lasx_xvsle_du(v4u64_a, v4u64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvsle.du( ++ ++ // __lasx_xvslei_bu ++ // xd, xj, ui5 ++ // V32QI, UV32QI, UQI ++ v32i8_r = __lasx_xvslei_bu(v32u8_a, ui5_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvslei.bu( ++ ++ // __lasx_xvslei_hu ++ // xd, xj, ui5 ++ // V16HI, UV16HI, UQI ++ v16i16_r = __lasx_xvslei_hu(v16u16_a, ui5_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvslei.hu( ++ ++ // __lasx_xvslei_wu ++ // xd, xj, ui5 ++ // V8SI, UV8SI, UQI ++ v8i32_r = __lasx_xvslei_wu(v8u32_a, ui5_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvslei.wu( ++ ++ // __lasx_xvslei_du ++ // xd, xj, ui5 ++ // V4DI, UV4DI, UQI ++ v4i64_r = __lasx_xvslei_du(v4u64_a, ui5_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvslei.du( ++ ++ // __lasx_xvsat_b ++ // xd, xj, ui3 ++ // V32QI, V32QI, UQI ++ v32i8_r = __lasx_xvsat_b(v32i8_a, ui3_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvsat.b( ++ ++ // __lasx_xvsat_h ++ // xd, xj, ui4 ++ // V16HI, V16HI, UQI ++ v16i16_r = __lasx_xvsat_h(v16i16_a, ui4_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvsat.h( ++ ++ // __lasx_xvsat_w ++ // xd, xj, ui5 ++ // V8SI, V8SI, UQI ++ v8i32_r = __lasx_xvsat_w(v8i32_a, ui5_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvsat.w( ++ ++ // __lasx_xvsat_d ++ // xd, xj, ui6 ++ // V4DI, V4DI, UQI ++ v4i64_r = __lasx_xvsat_d(v4i64_a, ui6_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvsat.d( ++ ++ // __lasx_xvsat_bu ++ // xd, xj, ui3 ++ // UV32QI, UV32QI, UQI ++ v32u8_r = __lasx_xvsat_bu(v32u8_a, ui3_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvsat.bu( ++ ++ // __lasx_xvsat_hu ++ // xd, xj, ui4 ++ // UV16HI, UV16HI, UQI ++ v16u16_r = __lasx_xvsat_hu(v16u16_a, ui4_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvsat.hu( ++ ++ // __lasx_xvsat_wu ++ // xd, xj, ui5 ++ // UV8SI, UV8SI, UQI ++ v8u32_r = __lasx_xvsat_wu(v8u32_a, ui5_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvsat.wu( ++ ++ // __lasx_xvsat_du ++ // xd, xj, ui6 ++ // UV4DI, UV4DI, UQI ++ v4u64_r = __lasx_xvsat_du(v4u64_a, ui6_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvsat.du( ++ ++ // __lasx_xvadda_b ++ // xd, xj, xk ++ // V32QI, V32QI, V32QI ++ v32i8_r = __lasx_xvadda_b(v32i8_a, v32i8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvadda.b( ++ ++ // __lasx_xvadda_h ++ // xd, xj, xk ++ // V16HI, V16HI, V16HI ++ v16i16_r = __lasx_xvadda_h(v16i16_a, v16i16_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvadda.h( ++ ++ // __lasx_xvadda_w ++ // xd, xj, xk ++ // V8SI, V8SI, V8SI ++ v8i32_r = __lasx_xvadda_w(v8i32_a, v8i32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvadda.w( ++ ++ // __lasx_xvadda_d ++ // xd, xj, xk ++ // V4DI, V4DI, V4DI ++ v4i64_r = __lasx_xvadda_d(v4i64_a, v4i64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvadda.d( ++ ++ // __lasx_xvsadd_b ++ // xd, xj, xk ++ // V32QI, V32QI, V32QI ++ v32i8_r = __lasx_xvsadd_b(v32i8_a, v32i8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvsadd.b( ++ ++ // __lasx_xvsadd_h ++ // xd, xj, xk ++ // V16HI, V16HI, V16HI ++ v16i16_r = __lasx_xvsadd_h(v16i16_a, v16i16_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvsadd.h( ++ ++ // __lasx_xvsadd_w ++ // xd, xj, xk ++ // V8SI, V8SI, V8SI ++ v8i32_r = __lasx_xvsadd_w(v8i32_a, v8i32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvsadd.w( ++ ++ // __lasx_xvsadd_d ++ // xd, xj, xk ++ // V4DI, V4DI, V4DI ++ v4i64_r = __lasx_xvsadd_d(v4i64_a, v4i64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvsadd.d( ++ ++ // __lasx_xvsadd_bu ++ // xd, xj, xk ++ // UV32QI, UV32QI, UV32QI ++ v32u8_r = __lasx_xvsadd_bu(v32u8_a, v32u8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvsadd.bu( ++ ++ // __lasx_xvsadd_hu ++ // xd, xj, xk ++ // UV16HI, UV16HI, UV16HI ++ v16u16_r = __lasx_xvsadd_hu(v16u16_a, v16u16_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvsadd.hu( ++ ++ // __lasx_xvsadd_wu ++ // xd, xj, xk ++ // UV8SI, UV8SI, UV8SI ++ v8u32_r = __lasx_xvsadd_wu(v8u32_a, v8u32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvsadd.wu( ++ ++ // __lasx_xvsadd_du ++ // xd, xj, xk ++ // UV4DI, UV4DI, UV4DI ++ v4u64_r = __lasx_xvsadd_du(v4u64_a, v4u64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvsadd.du( ++ ++ // __lasx_xvavg_b ++ // xd, xj, xk ++ // V32QI, V32QI, V32QI ++ v32i8_r = __lasx_xvavg_b(v32i8_a, v32i8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvavg.b( ++ ++ // __lasx_xvavg_h ++ // xd, xj, xk ++ // V16HI, V16HI, V16HI ++ v16i16_r = __lasx_xvavg_h(v16i16_a, v16i16_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvavg.h( ++ ++ // __lasx_xvavg_w ++ // xd, xj, xk ++ // V8SI, V8SI, V8SI ++ v8i32_r = __lasx_xvavg_w(v8i32_a, v8i32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvavg.w( ++ ++ // __lasx_xvavg_d ++ // xd, xj, xk ++ // V4DI, V4DI, V4DI ++ v4i64_r = __lasx_xvavg_d(v4i64_a, v4i64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvavg.d( ++ ++ // __lasx_xvavg_bu ++ // xd, xj, xk ++ // UV32QI, UV32QI, UV32QI ++ v32u8_r = __lasx_xvavg_bu(v32u8_a, v32u8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvavg.bu( ++ ++ // __lasx_xvavg_hu ++ // xd, xj, xk ++ // UV16HI, UV16HI, UV16HI ++ v16u16_r = __lasx_xvavg_hu(v16u16_a, v16u16_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvavg.hu( ++ ++ // __lasx_xvavg_wu ++ // xd, xj, xk ++ // UV8SI, UV8SI, UV8SI ++ v8u32_r = __lasx_xvavg_wu(v8u32_a, v8u32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvavg.wu( ++ ++ // __lasx_xvavg_du ++ // xd, xj, xk ++ // UV4DI, UV4DI, UV4DI ++ v4u64_r = __lasx_xvavg_du(v4u64_a, v4u64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvavg.du( ++ ++ // __lasx_xvavgr_b ++ // xd, xj, xk ++ // V32QI, V32QI, V32QI ++ v32i8_r = __lasx_xvavgr_b(v32i8_a, v32i8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvavgr.b( ++ ++ // __lasx_xvavgr_h ++ // xd, xj, xk ++ // V16HI, V16HI, V16HI ++ v16i16_r = __lasx_xvavgr_h(v16i16_a, v16i16_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvavgr.h( ++ ++ // __lasx_xvavgr_w ++ // xd, xj, xk ++ // V8SI, V8SI, V8SI ++ v8i32_r = __lasx_xvavgr_w(v8i32_a, v8i32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvavgr.w( ++ ++ // __lasx_xvavgr_d ++ // xd, xj, xk ++ // V4DI, V4DI, V4DI ++ v4i64_r = __lasx_xvavgr_d(v4i64_a, v4i64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvavgr.d( ++ ++ // __lasx_xvavgr_bu ++ // xd, xj, xk ++ // UV32QI, UV32QI, UV32QI ++ v32u8_r = __lasx_xvavgr_bu(v32u8_a, v32u8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvavgr.bu( ++ ++ // __lasx_xvavgr_hu ++ // xd, xj, xk ++ // UV16HI, UV16HI, UV16HI ++ v16u16_r = __lasx_xvavgr_hu(v16u16_a, v16u16_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvavgr.hu( ++ ++ // __lasx_xvavgr_wu ++ // xd, xj, xk ++ // UV8SI, UV8SI, UV8SI ++ v8u32_r = __lasx_xvavgr_wu(v8u32_a, v8u32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvavgr.wu( ++ ++ // __lasx_xvavgr_du ++ // xd, xj, xk ++ // UV4DI, UV4DI, UV4DI ++ v4u64_r = __lasx_xvavgr_du(v4u64_a, v4u64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvavgr.du( ++ ++ // __lasx_xvssub_b ++ // xd, xj, xk ++ // V32QI, V32QI, V32QI ++ v32i8_r = __lasx_xvssub_b(v32i8_a, v32i8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvssub.b( ++ ++ // __lasx_xvssub_h ++ // xd, xj, xk ++ // V16HI, V16HI, V16HI ++ v16i16_r = __lasx_xvssub_h(v16i16_a, v16i16_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvssub.h( ++ ++ // __lasx_xvssub_w ++ // xd, xj, xk ++ // V8SI, V8SI, V8SI ++ v8i32_r = __lasx_xvssub_w(v8i32_a, v8i32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvssub.w( ++ ++ // __lasx_xvssub_d ++ // xd, xj, xk ++ // V4DI, V4DI, V4DI ++ v4i64_r = __lasx_xvssub_d(v4i64_a, v4i64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvssub.d( ++ ++ // __lasx_xvssub_bu ++ // xd, xj, xk ++ // UV32QI, UV32QI, UV32QI ++ v32u8_r = __lasx_xvssub_bu(v32u8_a, v32u8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvssub.bu( ++ ++ // __lasx_xvssub_hu ++ // xd, xj, xk ++ // UV16HI, UV16HI, UV16HI ++ v16u16_r = __lasx_xvssub_hu(v16u16_a, v16u16_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvssub.hu( ++ ++ // __lasx_xvssub_wu ++ // xd, xj, xk ++ // UV8SI, UV8SI, UV8SI ++ v8u32_r = __lasx_xvssub_wu(v8u32_a, v8u32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvssub.wu( ++ ++ // __lasx_xvssub_du ++ // xd, xj, xk ++ // UV4DI, UV4DI, UV4DI ++ v4u64_r = __lasx_xvssub_du(v4u64_a, v4u64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvssub.du( ++ ++ // __lasx_xvabsd_b ++ // xd, xj, xk ++ // V32QI, V32QI, V32QI ++ v32i8_r = __lasx_xvabsd_b(v32i8_a, v32i8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvabsd.b( ++ ++ // __lasx_xvabsd_h ++ // xd, xj, xk ++ // V16HI, V16HI, V16HI ++ v16i16_r = __lasx_xvabsd_h(v16i16_a, v16i16_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvabsd.h( ++ ++ // __lasx_xvabsd_w ++ // xd, xj, xk ++ // V8SI, V8SI, V8SI ++ v8i32_r = __lasx_xvabsd_w(v8i32_a, v8i32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvabsd.w( ++ ++ // __lasx_xvabsd_d ++ // xd, xj, xk ++ // V4DI, V4DI, V4DI ++ v4i64_r = __lasx_xvabsd_d(v4i64_a, v4i64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvabsd.d( ++ ++ // __lasx_xvabsd_bu ++ // xd, xj, xk ++ // UV32QI, UV32QI, UV32QI ++ v32u8_r = __lasx_xvabsd_bu(v32u8_a, v32u8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvabsd.bu( ++ ++ // __lasx_xvabsd_hu ++ // xd, xj, xk ++ // UV16HI, UV16HI, UV16HI ++ v16u16_r = __lasx_xvabsd_hu(v16u16_a, v16u16_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvabsd.hu( ++ ++ // __lasx_xvabsd_wu ++ // xd, xj, xk ++ // UV8SI, UV8SI, UV8SI ++ v8u32_r = __lasx_xvabsd_wu(v8u32_a, v8u32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvabsd.wu( ++ ++ // __lasx_xvabsd_du ++ // xd, xj, xk ++ // UV4DI, UV4DI, UV4DI ++ v4u64_r = __lasx_xvabsd_du(v4u64_a, v4u64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvabsd.du( ++ ++ // __lasx_xvmul_b ++ // xd, xj, xk ++ // V32QI, V32QI, V32QI ++ v32i8_r = __lasx_xvmul_b(v32i8_a, v32i8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvmul.b( ++ ++ // __lasx_xvmul_h ++ // xd, xj, xk ++ // V16HI, V16HI, V16HI ++ v16i16_r = __lasx_xvmul_h(v16i16_a, v16i16_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvmul.h( ++ ++ // __lasx_xvmul_w ++ // xd, xj, xk ++ // V8SI, V8SI, V8SI ++ v8i32_r = __lasx_xvmul_w(v8i32_a, v8i32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvmul.w( ++ ++ // __lasx_xvmul_d ++ // xd, xj, xk ++ // V4DI, V4DI, V4DI ++ v4i64_r = __lasx_xvmul_d(v4i64_a, v4i64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvmul.d( ++ ++ // __lasx_xvmadd_b ++ // xd, xj, xk ++ // V32QI, V32QI, V32QI, V32QI ++ v32i8_r = __lasx_xvmadd_b(v32i8_a, v32i8_b, v32i8_c); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvmadd.b( ++ ++ // __lasx_xvmadd_h ++ // xd, xj, xk ++ // V16HI, V16HI, V16HI, V16HI ++ v16i16_r = __lasx_xvmadd_h(v16i16_a, v16i16_b, v16i16_c); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvmadd.h( ++ ++ // __lasx_xvmadd_w ++ // xd, xj, xk ++ // V8SI, V8SI, V8SI, V8SI ++ v8i32_r = __lasx_xvmadd_w(v8i32_a, v8i32_b, v8i32_c); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvmadd.w( ++ ++ // __lasx_xvmadd_d ++ // xd, xj, xk ++ // V4DI, V4DI, V4DI, V4DI ++ v4i64_r = __lasx_xvmadd_d(v4i64_a, v4i64_b, v4i64_c); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvmadd.d( ++ ++ // __lasx_xvmsub_b ++ // xd, xj, xk ++ // V32QI, V32QI, V32QI, V32QI ++ v32i8_r = __lasx_xvmsub_b(v32i8_a, v32i8_b, v32i8_c); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvmsub.b( ++ ++ // __lasx_xvmsub_h ++ // xd, xj, xk ++ // V16HI, V16HI, V16HI, V16HI ++ v16i16_r = __lasx_xvmsub_h(v16i16_a, v16i16_b, v16i16_c); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvmsub.h( ++ ++ // __lasx_xvmsub_w ++ // xd, xj, xk ++ // V8SI, V8SI, V8SI, V8SI ++ v8i32_r = __lasx_xvmsub_w(v8i32_a, v8i32_b, v8i32_c); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvmsub.w( ++ ++ // __lasx_xvmsub_d ++ // xd, xj, xk ++ // V4DI, V4DI, V4DI, V4DI ++ v4i64_r = __lasx_xvmsub_d(v4i64_a, v4i64_b, v4i64_c); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvmsub.d( ++ ++ // __lasx_xvdiv_b ++ // xd, xj, xk ++ // V32QI, V32QI, V32QI ++ v32i8_r = __lasx_xvdiv_b(v32i8_a, v32i8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvdiv.b( ++ ++ // __lasx_xvdiv_h ++ // xd, xj, xk ++ // V16HI, V16HI, V16HI ++ v16i16_r = __lasx_xvdiv_h(v16i16_a, v16i16_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvdiv.h( ++ ++ // __lasx_xvdiv_w ++ // xd, xj, xk ++ // V8SI, V8SI, V8SI ++ v8i32_r = __lasx_xvdiv_w(v8i32_a, v8i32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvdiv.w( ++ ++ // __lasx_xvdiv_d ++ // xd, xj, xk ++ // V4DI, V4DI, V4DI ++ v4i64_r = __lasx_xvdiv_d(v4i64_a, v4i64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvdiv.d( ++ ++ // __lasx_xvdiv_bu ++ // xd, xj, xk ++ // UV32QI, UV32QI, UV32QI ++ v32u8_r = __lasx_xvdiv_bu(v32u8_a, v32u8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvdiv.bu( ++ ++ // __lasx_xvdiv_hu ++ // xd, xj, xk ++ // UV16HI, UV16HI, UV16HI ++ v16u16_r = __lasx_xvdiv_hu(v16u16_a, v16u16_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvdiv.hu( ++ ++ // __lasx_xvdiv_wu ++ // xd, xj, xk ++ // UV8SI, UV8SI, UV8SI ++ v8u32_r = __lasx_xvdiv_wu(v8u32_a, v8u32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvdiv.wu( ++ ++ // __lasx_xvdiv_du ++ // xd, xj, xk ++ // UV4DI, UV4DI, UV4DI ++ v4u64_r = __lasx_xvdiv_du(v4u64_a, v4u64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvdiv.du( ++ ++ // __lasx_xvhaddw_h_b ++ // xd, xj, xk ++ // V16HI, V32QI, V32QI ++ v16i16_r = __lasx_xvhaddw_h_b(v32i8_a, v32i8_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvhaddw.h.b( ++ ++ // __lasx_xvhaddw_w_h ++ // xd, xj, xk ++ // V8SI, V16HI, V16HI ++ v8i32_r = __lasx_xvhaddw_w_h(v16i16_a, v16i16_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvhaddw.w.h( ++ ++ // __lasx_xvhaddw_d_w ++ // xd, xj, xk ++ // V4DI, V8SI, V8SI ++ v4i64_r = __lasx_xvhaddw_d_w(v8i32_a, v8i32_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvhaddw.d.w( ++ ++ // __lasx_xvhaddw_hu_bu ++ // xd, xj, xk ++ // UV16HI, UV32QI, UV32QI ++ v16u16_r = __lasx_xvhaddw_hu_bu(v32u8_a, v32u8_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvhaddw.hu.bu( ++ ++ // __lasx_xvhaddw_wu_hu ++ // xd, xj, xk ++ // UV8SI, UV16HI, UV16HI ++ v8u32_r = __lasx_xvhaddw_wu_hu(v16u16_a, v16u16_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvhaddw.wu.hu( ++ ++ // __lasx_xvhaddw_du_wu ++ // xd, xj, xk ++ // UV4DI, UV8SI, UV8SI ++ v4u64_r = __lasx_xvhaddw_du_wu(v8u32_a, v8u32_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvhaddw.du.wu( ++ ++ // __lasx_xvhsubw_h_b ++ // xd, xj, xk ++ // V16HI, V32QI, V32QI ++ v16i16_r = __lasx_xvhsubw_h_b(v32i8_a, v32i8_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvhsubw.h.b( ++ ++ // __lasx_xvhsubw_w_h ++ // xd, xj, xk ++ // V8SI, V16HI, V16HI ++ v8i32_r = __lasx_xvhsubw_w_h(v16i16_a, v16i16_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvhsubw.w.h( ++ ++ // __lasx_xvhsubw_d_w ++ // xd, xj, xk ++ // V4DI, V8SI, V8SI ++ v4i64_r = __lasx_xvhsubw_d_w(v8i32_a, v8i32_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvhsubw.d.w( ++ ++ // __lasx_xvhsubw_hu_bu ++ // xd, xj, xk ++ // V16HI, UV32QI, UV32QI ++ v16i16_r = __lasx_xvhsubw_hu_bu(v32u8_a, v32u8_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvhsubw.hu.bu( ++ ++ // __lasx_xvhsubw_wu_hu ++ // xd, xj, xk ++ // V8SI, UV16HI, UV16HI ++ v8i32_r = __lasx_xvhsubw_wu_hu(v16u16_a, v16u16_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvhsubw.wu.hu( ++ ++ // __lasx_xvhsubw_du_wu ++ // xd, xj, xk ++ // V4DI, UV8SI, UV8SI ++ v4i64_r = __lasx_xvhsubw_du_wu(v8u32_a, v8u32_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvhsubw.du.wu( ++ ++ // __lasx_xvmod_b ++ // xd, xj, xk ++ // V32QI, V32QI, V32QI ++ v32i8_r = __lasx_xvmod_b(v32i8_a, v32i8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvmod.b( ++ ++ // __lasx_xvmod_h ++ // xd, xj, xk ++ // V16HI, V16HI, V16HI ++ v16i16_r = __lasx_xvmod_h(v16i16_a, v16i16_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvmod.h( ++ ++ // __lasx_xvmod_w ++ // xd, xj, xk ++ // V8SI, V8SI, V8SI ++ v8i32_r = __lasx_xvmod_w(v8i32_a, v8i32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvmod.w( ++ ++ // __lasx_xvmod_d ++ // xd, xj, xk ++ // V4DI, V4DI, V4DI ++ v4i64_r = __lasx_xvmod_d(v4i64_a, v4i64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvmod.d( ++ ++ // __lasx_xvmod_bu ++ // xd, xj, xk ++ // UV32QI, UV32QI, UV32QI ++ v32u8_r = __lasx_xvmod_bu(v32u8_a, v32u8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvmod.bu( ++ ++ // __lasx_xvmod_hu ++ // xd, xj, xk ++ // UV16HI, UV16HI, UV16HI ++ v16u16_r = __lasx_xvmod_hu(v16u16_a, v16u16_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvmod.hu( ++ ++ // __lasx_xvmod_wu ++ // xd, xj, xk ++ // UV8SI, UV8SI, UV8SI ++ v8u32_r = __lasx_xvmod_wu(v8u32_a, v8u32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvmod.wu( ++ ++ // __lasx_xvmod_du ++ // xd, xj, xk ++ // UV4DI, UV4DI, UV4DI ++ v4u64_r = __lasx_xvmod_du(v4u64_a, v4u64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvmod.du( ++ ++ // __lasx_xvrepl128vei_b ++ // xd, xj, ui4 ++ // V32QI, V32QI, UQI ++ v32i8_r = __lasx_xvrepl128vei_b(v32i8_a, ui4_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvrepl128vei.b( ++ ++ // __lasx_xvrepl128vei_h ++ // xd, xj, ui3 ++ // V16HI, V16HI, UQI ++ v16i16_r = __lasx_xvrepl128vei_h(v16i16_a, ui3_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvrepl128vei.h( ++ ++ // __lasx_xvrepl128vei_w ++ // xd, xj, ui2 ++ // V8SI, V8SI, UQI ++ v8i32_r = __lasx_xvrepl128vei_w(v8i32_a, ui2_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvrepl128vei.w( ++ ++ // __lasx_xvrepl128vei_d ++ // xd, xj, ui1 ++ // V4DI, V4DI, UQI ++ v4i64_r = __lasx_xvrepl128vei_d(v4i64_a, ui1_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvrepl128vei.d( ++ ++ // __lasx_xvpickev_b ++ // xd, xj, xk ++ // V32QI, V32QI, V32QI ++ v32i8_r = __lasx_xvpickev_b(v32i8_a, v32i8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvpickev.b( ++ ++ // __lasx_xvpickev_h ++ // xd, xj, xk ++ // V16HI, V16HI, V16HI ++ v16i16_r = __lasx_xvpickev_h(v16i16_a, v16i16_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvpickev.h( ++ ++ // __lasx_xvpickev_w ++ // xd, xj, xk ++ // V8SI, V8SI, V8SI ++ v8i32_r = __lasx_xvpickev_w(v8i32_a, v8i32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvpickev.w( ++ ++ // __lasx_xvpickev_d ++ // xd, xj, xk ++ // V4DI, V4DI, V4DI ++ v4i64_r = __lasx_xvpickev_d(v4i64_a, v4i64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvpickev.d( ++ ++ // __lasx_xvpickod_b ++ // xd, xj, xk ++ // V32QI, V32QI, V32QI ++ v32i8_r = __lasx_xvpickod_b(v32i8_a, v32i8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvpickod.b( ++ ++ // __lasx_xvpickod_h ++ // xd, xj, xk ++ // V16HI, V16HI, V16HI ++ v16i16_r = __lasx_xvpickod_h(v16i16_a, v16i16_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvpickod.h( ++ ++ // __lasx_xvpickod_w ++ // xd, xj, xk ++ // V8SI, V8SI, V8SI ++ v8i32_r = __lasx_xvpickod_w(v8i32_a, v8i32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvpickod.w( ++ ++ // __lasx_xvpickod_d ++ // xd, xj, xk ++ // V4DI, V4DI, V4DI ++ v4i64_r = __lasx_xvpickod_d(v4i64_a, v4i64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvpickod.d( ++ ++ // __lasx_xvilvh_b ++ // xd, xj, xk ++ // V32QI, V32QI, V32QI ++ v32i8_r = __lasx_xvilvh_b(v32i8_a, v32i8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvilvh.b( ++ ++ // __lasx_xvilvh_h ++ // xd, xj, xk ++ // V16HI, V16HI, V16HI ++ v16i16_r = __lasx_xvilvh_h(v16i16_a, v16i16_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvilvh.h( ++ ++ // __lasx_xvilvh_w ++ // xd, xj, xk ++ // V8SI, V8SI, V8SI ++ v8i32_r = __lasx_xvilvh_w(v8i32_a, v8i32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvilvh.w( ++ ++ // __lasx_xvilvh_d ++ // xd, xj, xk ++ // V4DI, V4DI, V4DI ++ v4i64_r = __lasx_xvilvh_d(v4i64_a, v4i64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvilvh.d( ++ ++ // __lasx_xvilvl_b ++ // xd, xj, xk ++ // V32QI, V32QI, V32QI ++ v32i8_r = __lasx_xvilvl_b(v32i8_a, v32i8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvilvl.b( ++ ++ // __lasx_xvilvl_h ++ // xd, xj, xk ++ // V16HI, V16HI, V16HI ++ v16i16_r = __lasx_xvilvl_h(v16i16_a, v16i16_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvilvl.h( ++ ++ // __lasx_xvilvl_w ++ // xd, xj, xk ++ // V8SI, V8SI, V8SI ++ v8i32_r = __lasx_xvilvl_w(v8i32_a, v8i32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvilvl.w( ++ ++ // __lasx_xvilvl_d ++ // xd, xj, xk ++ // V4DI, V4DI, V4DI ++ v4i64_r = __lasx_xvilvl_d(v4i64_a, v4i64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvilvl.d( ++ ++ // __lasx_xvpackev_b ++ // xd, xj, xk ++ // V32QI, V32QI, V32QI ++ v32i8_r = __lasx_xvpackev_b(v32i8_a, v32i8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvpackev.b( ++ ++ // __lasx_xvpackev_h ++ // xd, xj, xk ++ // V16HI, V16HI, V16HI ++ v16i16_r = __lasx_xvpackev_h(v16i16_a, v16i16_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvpackev.h( ++ ++ // __lasx_xvpackev_w ++ // xd, xj, xk ++ // V8SI, V8SI, V8SI ++ v8i32_r = __lasx_xvpackev_w(v8i32_a, v8i32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvpackev.w( ++ ++ // __lasx_xvpackev_d ++ // xd, xj, xk ++ // V4DI, V4DI, V4DI ++ v4i64_r = __lasx_xvpackev_d(v4i64_a, v4i64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvpackev.d( ++ ++ // __lasx_xvpackod_b ++ // xd, xj, xk ++ // V32QI, V32QI, V32QI ++ v32i8_r = __lasx_xvpackod_b(v32i8_a, v32i8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvpackod.b( ++ ++ // __lasx_xvpackod_h ++ // xd, xj, xk ++ // V16HI, V16HI, V16HI ++ v16i16_r = __lasx_xvpackod_h(v16i16_a, v16i16_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvpackod.h( ++ ++ // __lasx_xvpackod_w ++ // xd, xj, xk ++ // V8SI, V8SI, V8SI ++ v8i32_r = __lasx_xvpackod_w(v8i32_a, v8i32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvpackod.w( ++ ++ // __lasx_xvpackod_d ++ // xd, xj, xk ++ // V4DI, V4DI, V4DI ++ v4i64_r = __lasx_xvpackod_d(v4i64_a, v4i64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvpackod.d( ++ ++ // __lasx_xvshuf_b ++ // xd, xj, xk, xa ++ // V32QI, V32QI, V32QI, V32QI ++ v32i8_r = __lasx_xvshuf_b(v32i8_a, v32i8_b, v32i8_c); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvshuf.b( ++ ++ // __lasx_xvshuf_h ++ // xd, xj, xk ++ // V16HI, V16HI, V16HI, V16HI ++ v16i16_r = __lasx_xvshuf_h(v16i16_a, v16i16_b, v16i16_c); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvshuf.h( ++ ++ // __lasx_xvshuf_w ++ // xd, xj, xk ++ // V8SI, V8SI, V8SI, V8SI ++ v8i32_r = __lasx_xvshuf_w(v8i32_a, v8i32_b, v8i32_c); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvshuf.w( ++ ++ // __lasx_xvshuf_d ++ // xd, xj, xk ++ // V4DI, V4DI, V4DI, V4DI ++ v4i64_r = __lasx_xvshuf_d(v4i64_a, v4i64_b, v4i64_c); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvshuf.d( ++ ++ // __lasx_xvand_v ++ // xd, xj, xk ++ // UV32QI, UV32QI, UV32QI ++ v32u8_r = __lasx_xvand_v(v32u8_a, v32u8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvand.v( ++ ++ // __lasx_xvandi_b ++ // xd, xj, ui8 ++ // UV32QI, UV32QI, UQI ++ v32u8_r = __lasx_xvandi_b(v32u8_a, ui8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvandi.b( ++ ++ // __lasx_xvor_v ++ // xd, xj, xk ++ // UV32QI, UV32QI, UV32QI ++ v32u8_r = __lasx_xvor_v(v32u8_a, v32u8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvor.v( ++ ++ // __lasx_xvori_b ++ // xd, xj, ui8 ++ // UV32QI, UV32QI, UQI ++ v32u8_r = __lasx_xvori_b(v32u8_a, ui8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvori.b( ++ ++ // __lasx_xvnor_v ++ // xd, xj, xk ++ // UV32QI, UV32QI, UV32QI ++ v32u8_r = __lasx_xvnor_v(v32u8_a, v32u8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvnor.v( ++ ++ // __lasx_xvnori_b ++ // xd, xj, ui8 ++ // UV32QI, UV32QI, UQI ++ v32u8_r = __lasx_xvnori_b(v32u8_a, ui8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvnori.b( ++ ++ // __lasx_xvxor_v ++ // xd, xj, xk ++ // UV32QI, UV32QI, UV32QI ++ v32u8_r = __lasx_xvxor_v(v32u8_a, v32u8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvxor.v( ++ ++ // __lasx_xvxori_b ++ // xd, xj, ui8 ++ // UV32QI, UV32QI, UQI ++ v32u8_r = __lasx_xvxori_b(v32u8_a, ui8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvxori.b( ++ ++ // __lasx_xvbitsel_v ++ // xd, xj, xk, xa ++ // UV32QI, UV32QI, UV32QI, UV32QI ++ v32u8_r = __lasx_xvbitsel_v(v32u8_a, v32u8_b, v32u8_c); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvbitsel.v( ++ ++ // __lasx_xvbitseli_b ++ // xd, xj, ui8 ++ // UV32QI, UV32QI, UV32QI, UQI ++ v32u8_r = __lasx_xvbitseli_b(v32u8_a, v32u8_b, ui8); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvbitseli.b( ++ ++ // __lasx_xvshuf4i_b ++ // xd, xj, ui8 ++ // V32QI, V32QI, USI ++ v32i8_r = __lasx_xvshuf4i_b(v32i8_a, ui8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvshuf4i.b( ++ ++ // __lasx_xvshuf4i_h ++ // xd, xj, ui8 ++ // V16HI, V16HI, USI ++ v16i16_r = __lasx_xvshuf4i_h(v16i16_a, ui8_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvshuf4i.h( ++ ++ // __lasx_xvshuf4i_w ++ // xd, xj, ui8 ++ // V8SI, V8SI, USI ++ v8i32_r = __lasx_xvshuf4i_w(v8i32_a, ui8_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvshuf4i.w( ++ ++ // __lasx_xvreplgr2vr_b ++ // xd, rj ++ // V32QI, SI ++ v32i8_r = __lasx_xvreplgr2vr_b(i32_a); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvreplgr2vr.b( ++ ++ // __lasx_xvreplgr2vr_h ++ // xd, rj ++ // V16HI, SI ++ v16i16_r = __lasx_xvreplgr2vr_h(i32_a); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvreplgr2vr.h( ++ ++ // __lasx_xvreplgr2vr_w ++ // xd, rj ++ // V8SI, SI ++ v8i32_r = __lasx_xvreplgr2vr_w(i32_a); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvreplgr2vr.w( ++ ++ // __lasx_xvreplgr2vr_d ++ // xd, rj ++ // V4DI, DI ++ v4i64_r = __lasx_xvreplgr2vr_d(i64_a); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvreplgr2vr.d( ++ ++ // __lasx_xvpcnt_b ++ // xd, xj ++ // V32QI, V32QI ++ v32i8_r = __lasx_xvpcnt_b(v32i8_a); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvpcnt.b( ++ ++ // __lasx_xvpcnt_h ++ // xd, xj ++ // V16HI, V16HI ++ v16i16_r = __lasx_xvpcnt_h(v16i16_a); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvpcnt.h( ++ ++ // __lasx_xvpcnt_w ++ // xd, xj ++ // V8SI, V8SI ++ v8i32_r = __lasx_xvpcnt_w(v8i32_a); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvpcnt.w( ++ ++ // __lasx_xvpcnt_d ++ // xd, xj ++ // V4DI, V4DI ++ v4i64_r = __lasx_xvpcnt_d(v4i64_a); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvpcnt.d( ++ ++ // __lasx_xvclo_b ++ // xd, xj ++ // V32QI, V32QI ++ v32i8_r = __lasx_xvclo_b(v32i8_a); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvclo.b( ++ ++ // __lasx_xvclo_h ++ // xd, xj ++ // V16HI, V16HI ++ v16i16_r = __lasx_xvclo_h(v16i16_a); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvclo.h( ++ ++ // __lasx_xvclo_w ++ // xd, xj ++ // V8SI, V8SI ++ v8i32_r = __lasx_xvclo_w(v8i32_a); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvclo.w( ++ ++ // __lasx_xvclo_d ++ // xd, xj ++ // V4DI, V4DI ++ v4i64_r = __lasx_xvclo_d(v4i64_a); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvclo.d( ++ ++ // __lasx_xvclz_b ++ // xd, xj ++ // V32QI, V32QI ++ v32i8_r = __lasx_xvclz_b(v32i8_a); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvclz.b( ++ ++ // __lasx_xvclz_h ++ // xd, xj ++ // V16HI, V16HI ++ v16i16_r = __lasx_xvclz_h(v16i16_a); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvclz.h( ++ ++ // __lasx_xvclz_w ++ // xd, xj ++ // V8SI, V8SI ++ v8i32_r = __lasx_xvclz_w(v8i32_a); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvclz.w( ++ ++ // __lasx_xvclz_d ++ // xd, xj ++ // V4DI, V4DI ++ v4i64_r = __lasx_xvclz_d(v4i64_a); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvclz.d( ++ ++ // __lasx_xvfcmp_caf_s ++ // xd, xj, xk ++ // V8SI, V8SF, V8SF ++ v8i32_r = __lasx_xvfcmp_caf_s(v8f32_a, v8f32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvfcmp.caf.s( ++ ++ // __lasx_xvfcmp_caf_d ++ // xd, xj, xk ++ // V4DI, V4DF, V4DF ++ v4i64_r = __lasx_xvfcmp_caf_d(v4f64_a, v4f64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvfcmp.caf.d( ++ ++ // __lasx_xvfcmp_cor_s ++ // xd, xj, xk ++ // V8SI, V8SF, V8SF ++ v8i32_r = __lasx_xvfcmp_cor_s(v8f32_a, v8f32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cor.s( ++ ++ // __lasx_xvfcmp_cor_d ++ // xd, xj, xk ++ // V4DI, V4DF, V4DF ++ v4i64_r = __lasx_xvfcmp_cor_d(v4f64_a, v4f64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cor.d( ++ ++ // __lasx_xvfcmp_cun_s ++ // xd, xj, xk ++ // V8SI, V8SF, V8SF ++ v8i32_r = __lasx_xvfcmp_cun_s(v8f32_a, v8f32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cun.s( ++ ++ // __lasx_xvfcmp_cun_d ++ // xd, xj, xk ++ // V4DI, V4DF, V4DF ++ v4i64_r = __lasx_xvfcmp_cun_d(v4f64_a, v4f64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cun.d( ++ ++ // __lasx_xvfcmp_cune_s ++ // xd, xj, xk ++ // V8SI, V8SF, V8SF ++ v8i32_r = __lasx_xvfcmp_cune_s(v8f32_a, v8f32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cune.s( ++ ++ // __lasx_xvfcmp_cune_d ++ // xd, xj, xk ++ // V4DI, V4DF, V4DF ++ v4i64_r = __lasx_xvfcmp_cune_d(v4f64_a, v4f64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cune.d( ++ ++ // __lasx_xvfcmp_cueq_s ++ // xd, xj, xk ++ // V8SI, V8SF, V8SF ++ v8i32_r = __lasx_xvfcmp_cueq_s(v8f32_a, v8f32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cueq.s( ++ ++ // __lasx_xvfcmp_cueq_d ++ // xd, xj, xk ++ // V4DI, V4DF, V4DF ++ v4i64_r = __lasx_xvfcmp_cueq_d(v4f64_a, v4f64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cueq.d( ++ ++ // __lasx_xvfcmp_ceq_s ++ // xd, xj, xk ++ // V8SI, V8SF, V8SF ++ v8i32_r = __lasx_xvfcmp_ceq_s(v8f32_a, v8f32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvfcmp.ceq.s( ++ ++ // __lasx_xvfcmp_ceq_d ++ // xd, xj, xk ++ // V4DI, V4DF, V4DF ++ v4i64_r = __lasx_xvfcmp_ceq_d(v4f64_a, v4f64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvfcmp.ceq.d( ++ ++ // __lasx_xvfcmp_cne_s ++ // xd, xj, xk ++ // V8SI, V8SF, V8SF ++ v8i32_r = __lasx_xvfcmp_cne_s(v8f32_a, v8f32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cne.s( ++ ++ // __lasx_xvfcmp_cne_d ++ // xd, xj, xk ++ // V4DI, V4DF, V4DF ++ v4i64_r = __lasx_xvfcmp_cne_d(v4f64_a, v4f64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cne.d( ++ ++ // __lasx_xvfcmp_clt_s ++ // xd, xj, xk ++ // V8SI, V8SF, V8SF ++ v8i32_r = __lasx_xvfcmp_clt_s(v8f32_a, v8f32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvfcmp.clt.s( ++ ++ // __lasx_xvfcmp_clt_d ++ // xd, xj, xk ++ // V4DI, V4DF, V4DF ++ v4i64_r = __lasx_xvfcmp_clt_d(v4f64_a, v4f64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvfcmp.clt.d( ++ ++ // __lasx_xvfcmp_cult_s ++ // xd, xj, xk ++ // V8SI, V8SF, V8SF ++ v8i32_r = __lasx_xvfcmp_cult_s(v8f32_a, v8f32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cult.s( ++ ++ // __lasx_xvfcmp_cult_d ++ // xd, xj, xk ++ // V4DI, V4DF, V4DF ++ v4i64_r = __lasx_xvfcmp_cult_d(v4f64_a, v4f64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cult.d( ++ ++ // __lasx_xvfcmp_cle_s ++ // xd, xj, xk ++ // V8SI, V8SF, V8SF ++ v8i32_r = __lasx_xvfcmp_cle_s(v8f32_a, v8f32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cle.s( ++ ++ // __lasx_xvfcmp_cle_d ++ // xd, xj, xk ++ // V4DI, V4DF, V4DF ++ v4i64_r = __lasx_xvfcmp_cle_d(v4f64_a, v4f64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cle.d( ++ ++ // __lasx_xvfcmp_cule_s ++ // xd, xj, xk ++ // V8SI, V8SF, V8SF ++ v8i32_r = __lasx_xvfcmp_cule_s(v8f32_a, v8f32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cule.s( ++ ++ // __lasx_xvfcmp_cule_d ++ // xd, xj, xk ++ // V4DI, V4DF, V4DF ++ v4i64_r = __lasx_xvfcmp_cule_d(v4f64_a, v4f64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cule.d( ++ ++ // __lasx_xvfcmp_saf_s ++ // xd, xj, xk ++ // V8SI, V8SF, V8SF ++ v8i32_r = __lasx_xvfcmp_saf_s(v8f32_a, v8f32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvfcmp.saf.s( ++ ++ // __lasx_xvfcmp_saf_d ++ // xd, xj, xk ++ // V4DI, V4DF, V4DF ++ v4i64_r = __lasx_xvfcmp_saf_d(v4f64_a, v4f64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvfcmp.saf.d( ++ ++ // __lasx_xvfcmp_sor_s ++ // xd, xj, xk ++ // V8SI, V8SF, V8SF ++ v8i32_r = __lasx_xvfcmp_sor_s(v8f32_a, v8f32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sor.s( ++ ++ // __lasx_xvfcmp_sor_d ++ // xd, xj, xk ++ // V4DI, V4DF, V4DF ++ v4i64_r = __lasx_xvfcmp_sor_d(v4f64_a, v4f64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sor.d( ++ ++ // __lasx_xvfcmp_sun_s ++ // xd, xj, xk ++ // V8SI, V8SF, V8SF ++ v8i32_r = __lasx_xvfcmp_sun_s(v8f32_a, v8f32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sun.s( ++ ++ // __lasx_xvfcmp_sun_d ++ // xd, xj, xk ++ // V4DI, V4DF, V4DF ++ v4i64_r = __lasx_xvfcmp_sun_d(v4f64_a, v4f64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sun.d( ++ ++ // __lasx_xvfcmp_sune_s ++ // xd, xj, xk ++ // V8SI, V8SF, V8SF ++ v8i32_r = __lasx_xvfcmp_sune_s(v8f32_a, v8f32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sune.s( ++ ++ // __lasx_xvfcmp_sune_d ++ // xd, xj, xk ++ // V4DI, V4DF, V4DF ++ v4i64_r = __lasx_xvfcmp_sune_d(v4f64_a, v4f64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sune.d( ++ ++ // __lasx_xvfcmp_sueq_s ++ // xd, xj, xk ++ // V8SI, V8SF, V8SF ++ v8i32_r = __lasx_xvfcmp_sueq_s(v8f32_a, v8f32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sueq.s( ++ ++ // __lasx_xvfcmp_sueq_d ++ // xd, xj, xk ++ // V4DI, V4DF, V4DF ++ v4i64_r = __lasx_xvfcmp_sueq_d(v4f64_a, v4f64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sueq.d( ++ ++ // __lasx_xvfcmp_seq_s ++ // xd, xj, xk ++ // V8SI, V8SF, V8SF ++ v8i32_r = __lasx_xvfcmp_seq_s(v8f32_a, v8f32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvfcmp.seq.s( ++ ++ // __lasx_xvfcmp_seq_d ++ // xd, xj, xk ++ // V4DI, V4DF, V4DF ++ v4i64_r = __lasx_xvfcmp_seq_d(v4f64_a, v4f64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvfcmp.seq.d( ++ ++ // __lasx_xvfcmp_sne_s ++ // xd, xj, xk ++ // V8SI, V8SF, V8SF ++ v8i32_r = __lasx_xvfcmp_sne_s(v8f32_a, v8f32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sne.s( ++ ++ // __lasx_xvfcmp_sne_d ++ // xd, xj, xk ++ // V4DI, V4DF, V4DF ++ v4i64_r = __lasx_xvfcmp_sne_d(v4f64_a, v4f64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sne.d( ++ ++ // __lasx_xvfcmp_slt_s ++ // xd, xj, xk ++ // V8SI, V8SF, V8SF ++ v8i32_r = __lasx_xvfcmp_slt_s(v8f32_a, v8f32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvfcmp.slt.s( ++ ++ // __lasx_xvfcmp_slt_d ++ // xd, xj, xk ++ // V4DI, V4DF, V4DF ++ v4i64_r = __lasx_xvfcmp_slt_d(v4f64_a, v4f64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvfcmp.slt.d( ++ ++ // __lasx_xvfcmp_sult_s ++ // xd, xj, xk ++ // V8SI, V8SF, V8SF ++ v8i32_r = __lasx_xvfcmp_sult_s(v8f32_a, v8f32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sult.s( ++ ++ // __lasx_xvfcmp_sult_d ++ // xd, xj, xk ++ // V4DI, V4DF, V4DF ++ v4i64_r = __lasx_xvfcmp_sult_d(v4f64_a, v4f64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sult.d( ++ ++ // __lasx_xvfcmp_sle_s ++ // xd, xj, xk ++ // V8SI, V8SF, V8SF ++ v8i32_r = __lasx_xvfcmp_sle_s(v8f32_a, v8f32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sle.s( ++ ++ // __lasx_xvfcmp_sle_d ++ // xd, xj, xk ++ // V4DI, V4DF, V4DF ++ v4i64_r = __lasx_xvfcmp_sle_d(v4f64_a, v4f64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sle.d( ++ ++ // __lasx_xvfcmp_sule_s ++ // xd, xj, xk ++ // V8SI, V8SF, V8SF ++ v8i32_r = __lasx_xvfcmp_sule_s(v8f32_a, v8f32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sule.s( ++ ++ // __lasx_xvfcmp_sule_d ++ // xd, xj, xk ++ // V4DI, V4DF, V4DF ++ v4i64_r = __lasx_xvfcmp_sule_d(v4f64_a, v4f64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sule.d( ++ ++ // __lasx_xvfadd_s ++ // xd, xj, xk ++ // V8SF, V8SF, V8SF ++ v8f32_r = __lasx_xvfadd_s(v8f32_a, v8f32_b); // CHECK: call <8 x float> @llvm.loongarch.lasx.xvfadd.s( ++ ++ // __lasx_xvfadd_d ++ // xd, xj, xk ++ // V4DF, V4DF, V4DF ++ v4f64_r = __lasx_xvfadd_d(v4f64_a, v4f64_b); // CHECK: call <4 x double> @llvm.loongarch.lasx.xvfadd.d( ++ ++ // __lasx_xvfsub_s ++ // xd, xj, xk ++ // V8SF, V8SF, V8SF ++ v8f32_r = __lasx_xvfsub_s(v8f32_a, v8f32_b); // CHECK: call <8 x float> @llvm.loongarch.lasx.xvfsub.s( ++ ++ // __lasx_xvfsub_d ++ // xd, xj, xk ++ // V4DF, V4DF, V4DF ++ v4f64_r = __lasx_xvfsub_d(v4f64_a, v4f64_b); // CHECK: call <4 x double> @llvm.loongarch.lasx.xvfsub.d( ++ ++ // __lasx_xvfmul_s ++ // xd, xj, xk ++ // V8SF, V8SF, V8SF ++ v8f32_r = __lasx_xvfmul_s(v8f32_a, v8f32_b); // CHECK: call <8 x float> @llvm.loongarch.lasx.xvfmul.s( ++ ++ // __lasx_xvfmul_d ++ // xd, xj, xk ++ // V4DF, V4DF, V4DF ++ v4f64_r = __lasx_xvfmul_d(v4f64_a, v4f64_b); // CHECK: call <4 x double> @llvm.loongarch.lasx.xvfmul.d( ++ ++ // __lasx_xvfdiv_s ++ // xd, xj, xk ++ // V8SF, V8SF, V8SF ++ v8f32_r = __lasx_xvfdiv_s(v8f32_a, v8f32_b); // CHECK: call <8 x float> @llvm.loongarch.lasx.xvfdiv.s( ++ ++ // __lasx_xvfdiv_d ++ // xd, xj, xk ++ // V4DF, V4DF, V4DF ++ v4f64_r = __lasx_xvfdiv_d(v4f64_a, v4f64_b); // CHECK: call <4 x double> @llvm.loongarch.lasx.xvfdiv.d( ++ ++ // __lasx_xvfcvt_h_s ++ // xd, xj, xk ++ // V16HI, V8SF, V8SF ++ v16i16_r = __lasx_xvfcvt_h_s(v8f32_a, v8f32_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvfcvt.h.s( ++ ++ // __lasx_xvfcvt_s_d ++ // xd, xj, xk ++ // V8SF, V4DF, V4DF ++ v8f32_r = __lasx_xvfcvt_s_d(v4f64_a, v4f64_b); // CHECK: call <8 x float> @llvm.loongarch.lasx.xvfcvt.s.d( ++ ++ // __lasx_xvfmin_s ++ // xd, xj, xk ++ // V8SF, V8SF, V8SF ++ v8f32_r = __lasx_xvfmin_s(v8f32_a, v8f32_b); // CHECK: call <8 x float> @llvm.loongarch.lasx.xvfmin.s( ++ ++ // __lasx_xvfmin_d ++ // xd, xj, xk ++ // V4DF, V4DF, V4DF ++ v4f64_r = __lasx_xvfmin_d(v4f64_a, v4f64_b); // CHECK: call <4 x double> @llvm.loongarch.lasx.xvfmin.d( ++ ++ // __lasx_xvfmina_s ++ // xd, xj, xk ++ // V8SF, V8SF, V8SF ++ v8f32_r = __lasx_xvfmina_s(v8f32_a, v8f32_b); // CHECK: call <8 x float> @llvm.loongarch.lasx.xvfmina.s( ++ ++ // __lasx_xvfmina_d ++ // xd, xj, xk ++ // V4DF, V4DF, V4DF ++ v4f64_r = __lasx_xvfmina_d(v4f64_a, v4f64_b); // CHECK: call <4 x double> @llvm.loongarch.lasx.xvfmina.d( ++ ++ // __lasx_xvfmax_s ++ // xd, xj, xk ++ // V8SF, V8SF, V8SF ++ v8f32_r = __lasx_xvfmax_s(v8f32_a, v8f32_b); // CHECK: call <8 x float> @llvm.loongarch.lasx.xvfmax.s( ++ ++ // __lasx_xvfmax_d ++ // xd, xj, xk ++ // V4DF, V4DF, V4DF ++ v4f64_r = __lasx_xvfmax_d(v4f64_a, v4f64_b); // CHECK: call <4 x double> @llvm.loongarch.lasx.xvfmax.d( ++ ++ // __lasx_xvfmaxa_s ++ // xd, xj, xk ++ // V8SF, V8SF, V8SF ++ v8f32_r = __lasx_xvfmaxa_s(v8f32_a, v8f32_b); // CHECK: call <8 x float> @llvm.loongarch.lasx.xvfmaxa.s( ++ ++ // __lasx_xvfmaxa_d ++ // xd, xj, xk ++ // V4DF, V4DF, V4DF ++ v4f64_r = __lasx_xvfmaxa_d(v4f64_a, v4f64_b); // CHECK: call <4 x double> @llvm.loongarch.lasx.xvfmaxa.d( ++ ++ // __lasx_xvfclass_s ++ // xd, xj ++ // V8SI, V8SF ++ v8i32_r = __lasx_xvfclass_s(v8f32_a); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvfclass.s( ++ ++ // __lasx_xvfclass_d ++ // xd, xj ++ // V4DI, V4DF ++ v4i64_r = __lasx_xvfclass_d(v4f64_a); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvfclass.d( ++ ++ // __lasx_xvfsqrt_s ++ // xd, xj ++ // V8SF, V8SF ++ v8f32_r = __lasx_xvfsqrt_s(v8f32_a); // CHECK: call <8 x float> @llvm.loongarch.lasx.xvfsqrt.s( ++ ++ // __lasx_xvfsqrt_d ++ // xd, xj ++ // V4DF, V4DF ++ v4f64_r = __lasx_xvfsqrt_d(v4f64_a); // CHECK: call <4 x double> @llvm.loongarch.lasx.xvfsqrt.d( ++ ++ // __lasx_xvfrecip_s ++ // xd, xj ++ // V8SF, V8SF ++ v8f32_r = __lasx_xvfrecip_s(v8f32_a); // CHECK: call <8 x float> @llvm.loongarch.lasx.xvfrecip.s( ++ ++ // __lasx_xvfrecip_d ++ // xd, xj ++ // V4DF, V4DF ++ v4f64_r = __lasx_xvfrecip_d(v4f64_a); // CHECK: call <4 x double> @llvm.loongarch.lasx.xvfrecip.d( ++ ++ // __lasx_xvfrecipe_s ++ // xd, xj ++ // V8SF, V8SF ++ v8f32_r = __lasx_xvfrecipe_s(v8f32_a); // CHECK: call <8 x float> @llvm.loongarch.lasx.xvfrecipe.s( ++ ++ // __lasx_xvfrecipe_d ++ // xd, xj ++ // V4DF, V4DF ++ v4f64_r = __lasx_xvfrecipe_d(v4f64_a); // CHECK: call <4 x double> @llvm.loongarch.lasx.xvfrecipe.d( ++ ++ // __lasx_xvfrint_s ++ // xd, xj ++ // V8SF, V8SF ++ v8f32_r = __lasx_xvfrint_s(v8f32_a); // CHECK: call <8 x float> @llvm.loongarch.lasx.xvfrint.s( ++ ++ // __lasx_xvfrint_d ++ // xd, xj ++ // V4DF, V4DF ++ v4f64_r = __lasx_xvfrint_d(v4f64_a); // CHECK: call <4 x double> @llvm.loongarch.lasx.xvfrint.d( ++ ++ // __lasx_xvfrsqrt_s ++ // xd, xj ++ // V8SF, V8SF ++ v8f32_r = __lasx_xvfrsqrt_s(v8f32_a); // CHECK: call <8 x float> @llvm.loongarch.lasx.xvfrsqrt.s( ++ ++ // __lasx_xvfrsqrt_d ++ // xd, xj ++ // V4DF, V4DF ++ v4f64_r = __lasx_xvfrsqrt_d(v4f64_a); // CHECK: call <4 x double> @llvm.loongarch.lasx.xvfrsqrt.d( ++ ++ // __lasx_xvfrsqrte_s ++ // xd, xj ++ // V8SF, V8SF ++ v8f32_r = __lasx_xvfrsqrte_s(v8f32_a); // CHECK: call <8 x float> @llvm.loongarch.lasx.xvfrsqrte.s( ++ ++ // __lasx_xvfrsqrte_d ++ // xd, xj ++ // V4DF, V4DF ++ v4f64_r = __lasx_xvfrsqrte_d(v4f64_a); // CHECK: call <4 x double> @llvm.loongarch.lasx.xvfrsqrte.d( ++ ++ // __lasx_xvflogb_s ++ // xd, xj ++ // V8SF, V8SF ++ v8f32_r = __lasx_xvflogb_s(v8f32_a); // CHECK: call <8 x float> @llvm.loongarch.lasx.xvflogb.s( ++ ++ // __lasx_xvflogb_d ++ // xd, xj ++ // V4DF, V4DF ++ v4f64_r = __lasx_xvflogb_d(v4f64_a); // CHECK: call <4 x double> @llvm.loongarch.lasx.xvflogb.d( ++ ++ // __lasx_xvfcvth_s_h ++ // xd, xj ++ // V8SF, V16HI ++ v8f32_r = __lasx_xvfcvth_s_h(v16i16_a); // CHECK: call <8 x float> @llvm.loongarch.lasx.xvfcvth.s.h( ++ ++ // __lasx_xvfcvth_d_s ++ // xd, xj ++ // V4DF, V8SF ++ v4f64_r = __lasx_xvfcvth_d_s(v8f32_a); // CHECK: call <4 x double> @llvm.loongarch.lasx.xvfcvth.d.s( ++ ++ // __lasx_xvfcvtl_s_h ++ // xd, xj ++ // V8SF, V16HI ++ v8f32_r = __lasx_xvfcvtl_s_h(v16i16_a); // CHECK: call <8 x float> @llvm.loongarch.lasx.xvfcvtl.s.h( ++ ++ // __lasx_xvfcvtl_d_s ++ // xd, xj ++ // V4DF, V8SF ++ v4f64_r = __lasx_xvfcvtl_d_s(v8f32_a); // CHECK: call <4 x double> @llvm.loongarch.lasx.xvfcvtl.d.s( ++ ++ // __lasx_xvftint_w_s ++ // xd, xj ++ // V8SI, V8SF ++ v8i32_r = __lasx_xvftint_w_s(v8f32_a); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvftint.w.s( ++ ++ // __lasx_xvftint_l_d ++ // xd, xj ++ // V4DI, V4DF ++ v4i64_r = __lasx_xvftint_l_d(v4f64_a); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvftint.l.d( ++ ++ // __lasx_xvftint_wu_s ++ // xd, xj ++ // UV8SI, V8SF ++ v8u32_r = __lasx_xvftint_wu_s(v8f32_a); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvftint.wu.s( ++ ++ // __lasx_xvftint_lu_d ++ // xd, xj ++ // UV4DI, V4DF ++ v4u64_r = __lasx_xvftint_lu_d(v4f64_a); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvftint.lu.d( ++ ++ // __lasx_xvftintrz_w_s ++ // xd, xj ++ // V8SI, V8SF ++ v8i32_r = __lasx_xvftintrz_w_s(v8f32_a); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvftintrz.w.s( ++ ++ // __lasx_xvftintrz_l_d ++ // xd, xj ++ // V4DI, V4DF ++ v4i64_r = __lasx_xvftintrz_l_d(v4f64_a); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvftintrz.l.d( ++ ++ // __lasx_xvftintrz_wu_s ++ // xd, xj ++ // UV8SI, V8SF ++ v8u32_r = __lasx_xvftintrz_wu_s(v8f32_a); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvftintrz.wu.s( ++ ++ // __lasx_xvftintrz_lu_d ++ // xd, xj ++ // UV4DI, V4DF ++ v4u64_r = __lasx_xvftintrz_lu_d(v4f64_a); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvftintrz.lu.d( ++ ++ // __lasx_xvffint_s_w ++ // xd, xj ++ // V8SF, V8SI ++ v8f32_r = __lasx_xvffint_s_w(v8i32_a); // CHECK: call <8 x float> @llvm.loongarch.lasx.xvffint.s.w( ++ ++ // __lasx_xvffint_d_l ++ // xd, xj ++ // V4DF, V4DI ++ v4f64_r = __lasx_xvffint_d_l(v4i64_a); // CHECK: call <4 x double> @llvm.loongarch.lasx.xvffint.d.l( ++ ++ // __lasx_xvffint_s_wu ++ // xd, xj ++ // V8SF, UV8SI ++ v8f32_r = __lasx_xvffint_s_wu(v8u32_a); // CHECK: call <8 x float> @llvm.loongarch.lasx.xvffint.s.wu( ++ ++ // __lasx_xvffint_d_lu ++ // xd, xj ++ // V4DF, UV4DI ++ v4f64_r = __lasx_xvffint_d_lu(v4u64_a); // CHECK: call <4 x double> @llvm.loongarch.lasx.xvffint.d.lu( ++ ++ // __lasx_xvreplve_b ++ // xd, xj, rk ++ // V32QI, V32QI, SI ++ v32i8_r = __lasx_xvreplve_b(v32i8_a, i32_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvreplve.b( ++ ++ // __lasx_xvreplve_h ++ // xd, xj, rk ++ // V16HI, V16HI, SI ++ v16i16_r = __lasx_xvreplve_h(v16i16_a, i32_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvreplve.h( ++ ++ // __lasx_xvreplve_w ++ // xd, xj, rk ++ // V8SI, V8SI, SI ++ v8i32_r = __lasx_xvreplve_w(v8i32_a, i32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvreplve.w( ++ ++ // __lasx_xvreplve_d ++ // xd, xj, rk ++ // V4DI, V4DI, SI ++ v4i64_r = __lasx_xvreplve_d(v4i64_a, i32_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvreplve.d( ++ ++ // __lasx_xvpermi_w ++ // xd, xj, ui8 ++ // V8SI, V8SI, V8SI, USI ++ v8i32_r = __lasx_xvpermi_w(v8i32_a, v8i32_b, ui8); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvpermi.w( ++ ++ // __lasx_xvandn_v ++ // xd, xj, xk ++ // UV32QI, UV32QI, UV32QI ++ v32u8_r = __lasx_xvandn_v(v32u8_a, v32u8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvandn.v( ++ ++ // __lasx_xvneg_b ++ // xd, xj ++ // V32QI, V32QI ++ v32i8_r = __lasx_xvneg_b(v32i8_a); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvneg.b( ++ ++ // __lasx_xvneg_h ++ // xd, xj ++ // V16HI, V16HI ++ v16i16_r = __lasx_xvneg_h(v16i16_a); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvneg.h( ++ ++ // __lasx_xvneg_w ++ // xd, xj ++ // V8SI, V8SI ++ v8i32_r = __lasx_xvneg_w(v8i32_a); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvneg.w( ++ ++ // __lasx_xvneg_d ++ // xd, xj ++ // V4DI, V4DI ++ v4i64_r = __lasx_xvneg_d(v4i64_a); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvneg.d( ++ ++ // __lasx_xvmuh_b ++ // xd, xj, xk ++ // V32QI, V32QI, V32QI ++ v32i8_r = __lasx_xvmuh_b(v32i8_a, v32i8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvmuh.b( ++ ++ // __lasx_xvmuh_h ++ // xd, xj, xk ++ // V16HI, V16HI, V16HI ++ v16i16_r = __lasx_xvmuh_h(v16i16_a, v16i16_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvmuh.h( ++ ++ // __lasx_xvmuh_w ++ // xd, xj, xk ++ // V8SI, V8SI, V8SI ++ v8i32_r = __lasx_xvmuh_w(v8i32_a, v8i32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvmuh.w( ++ ++ // __lasx_xvmuh_d ++ // xd, xj, xk ++ // V4DI, V4DI, V4DI ++ v4i64_r = __lasx_xvmuh_d(v4i64_a, v4i64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvmuh.d( ++ ++ // __lasx_xvmuh_bu ++ // xd, xj, xk ++ // UV32QI, UV32QI, UV32QI ++ v32u8_r = __lasx_xvmuh_bu(v32u8_a, v32u8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvmuh.bu( ++ ++ // __lasx_xvmuh_hu ++ // xd, xj, xk ++ // UV16HI, UV16HI, UV16HI ++ v16u16_r = __lasx_xvmuh_hu(v16u16_a, v16u16_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvmuh.hu( ++ ++ // __lasx_xvmuh_wu ++ // xd, xj, xk ++ // UV8SI, UV8SI, UV8SI ++ v8u32_r = __lasx_xvmuh_wu(v8u32_a, v8u32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvmuh.wu( ++ ++ // __lasx_xvmuh_du ++ // xd, xj, xk ++ // UV4DI, UV4DI, UV4DI ++ v4u64_r = __lasx_xvmuh_du(v4u64_a, v4u64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvmuh.du( ++ ++ // __lasx_xvsllwil_h_b ++ // xd, xj, ui3 ++ // V16HI, V32QI, UQI ++ v16i16_r = __lasx_xvsllwil_h_b(v32i8_a, ui3_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvsllwil.h.b( ++ ++ // __lasx_xvsllwil_w_h ++ // xd, xj, ui4 ++ // V8SI, V16HI, UQI ++ v8i32_r = __lasx_xvsllwil_w_h(v16i16_a, ui4_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvsllwil.w.h( ++ ++ // __lasx_xvsllwil_d_w ++ // xd, xj, ui5 ++ // V4DI, V8SI, UQI ++ v4i64_r = __lasx_xvsllwil_d_w(v8i32_a, ui5_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvsllwil.d.w( ++ ++ // __lasx_xvsllwil_hu_bu ++ // xd, xj, ui3 ++ // UV16HI, UV32QI, UQI ++ v16u16_r = __lasx_xvsllwil_hu_bu(v32u8_a, ui3_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvsllwil.hu.bu( ++ ++ // __lasx_xvsllwil_wu_hu ++ // xd, xj, ui4 ++ // UV8SI, UV16HI, UQI ++ v8u32_r = __lasx_xvsllwil_wu_hu(v16u16_a, ui4_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvsllwil.wu.hu( ++ ++ // __lasx_xvsllwil_du_wu ++ // xd, xj, ui5 ++ // UV4DI, UV8SI, UQI ++ v4u64_r = __lasx_xvsllwil_du_wu(v8u32_a, ui5_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvsllwil.du.wu( ++ ++ // __lasx_xvsran_b_h ++ // xd, xj, xk ++ // V32QI, V16HI, V16HI ++ v32i8_r = __lasx_xvsran_b_h(v16i16_a, v16i16_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvsran.b.h( ++ ++ // __lasx_xvsran_h_w ++ // xd, xj, xk ++ // V16HI, V8SI, V8SI ++ v16i16_r = __lasx_xvsran_h_w(v8i32_a, v8i32_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvsran.h.w( ++ ++ // __lasx_xvsran_w_d ++ // xd, xj, xk ++ // V8SI, V4DI, V4DI ++ v8i32_r = __lasx_xvsran_w_d(v4i64_a, v4i64_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvsran.w.d( ++ ++ // __lasx_xvssran_b_h ++ // xd, xj, xk ++ // V32QI, V16HI, V16HI ++ v32i8_r = __lasx_xvssran_b_h(v16i16_a, v16i16_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvssran.b.h( ++ ++ // __lasx_xvssran_h_w ++ // xd, xj, xk ++ // V16HI, V8SI, V8SI ++ v16i16_r = __lasx_xvssran_h_w(v8i32_a, v8i32_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvssran.h.w( ++ ++ // __lasx_xvssran_w_d ++ // xd, xj, xk ++ // V8SI, V4DI, V4DI ++ v8i32_r = __lasx_xvssran_w_d(v4i64_a, v4i64_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvssran.w.d( ++ ++ // __lasx_xvssran_bu_h ++ // xd, xj, xk ++ // UV32QI, UV16HI, UV16HI ++ v32u8_r = __lasx_xvssran_bu_h(v16u16_a, v16u16_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvssran.bu.h( ++ ++ // __lasx_xvssran_hu_w ++ // xd, xj, xk ++ // UV16HI, UV8SI, UV8SI ++ v16u16_r = __lasx_xvssran_hu_w(v8u32_a, v8u32_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvssran.hu.w( ++ ++ // __lasx_xvssran_wu_d ++ // xd, xj, xk ++ // UV8SI, UV4DI, UV4DI ++ v8u32_r = __lasx_xvssran_wu_d(v4u64_a, v4u64_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvssran.wu.d( ++ ++ // __lasx_xvsrarn_b_h ++ // xd, xj, xk ++ // V32QI, V16HI, V16HI ++ v32i8_r = __lasx_xvsrarn_b_h(v16i16_a, v16i16_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvsrarn.b.h( ++ ++ // __lasx_xvsrarn_h_w ++ // xd, xj, xk ++ // V16HI, V8SI, V8SI ++ v16i16_r = __lasx_xvsrarn_h_w(v8i32_a, v8i32_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvsrarn.h.w( ++ ++ // __lasx_xvsrarn_w_d ++ // xd, xj, xk ++ // V8SI, V4DI, V4DI ++ v8i32_r = __lasx_xvsrarn_w_d(v4i64_a, v4i64_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvsrarn.w.d( ++ ++ // __lasx_xvssrarn_b_h ++ // xd, xj, xk ++ // V32QI, V16HI, V16HI ++ v32i8_r = __lasx_xvssrarn_b_h(v16i16_a, v16i16_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvssrarn.b.h( ++ ++ // __lasx_xvssrarn_h_w ++ // xd, xj, xk ++ // V16HI, V8SI, V8SI ++ v16i16_r = __lasx_xvssrarn_h_w(v8i32_a, v8i32_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvssrarn.h.w( ++ ++ // __lasx_xvssrarn_w_d ++ // xd, xj, xk ++ // V8SI, V4DI, V4DI ++ v8i32_r = __lasx_xvssrarn_w_d(v4i64_a, v4i64_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvssrarn.w.d( ++ ++ // __lasx_xvssrarn_bu_h ++ // xd, xj, xk ++ // UV32QI, UV16HI, UV16HI ++ v32u8_r = __lasx_xvssrarn_bu_h(v16u16_a, v16u16_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvssrarn.bu.h( ++ ++ // __lasx_xvssrarn_hu_w ++ // xd, xj, xk ++ // UV16HI, UV8SI, UV8SI ++ v16u16_r = __lasx_xvssrarn_hu_w(v8u32_a, v8u32_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvssrarn.hu.w( ++ ++ // __lasx_xvssrarn_wu_d ++ // xd, xj, xk ++ // UV8SI, UV4DI, UV4DI ++ v8u32_r = __lasx_xvssrarn_wu_d(v4u64_a, v4u64_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvssrarn.wu.d( ++ ++ // __lasx_xvsrln_b_h ++ // xd, xj, xk ++ // V32QI, V16HI, V16HI ++ v32i8_r = __lasx_xvsrln_b_h(v16i16_a, v16i16_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvsrln.b.h( ++ ++ // __lasx_xvsrln_h_w ++ // xd, xj, xk ++ // V16HI, V8SI, V8SI ++ v16i16_r = __lasx_xvsrln_h_w(v8i32_a, v8i32_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvsrln.h.w( ++ ++ // __lasx_xvsrln_w_d ++ // xd, xj, xk ++ // V8SI, V4DI, V4DI ++ v8i32_r = __lasx_xvsrln_w_d(v4i64_a, v4i64_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvsrln.w.d( ++ ++ // __lasx_xvssrln_bu_h ++ // xd, xj, xk ++ // UV32QI, UV16HI, UV16HI ++ v32u8_r = __lasx_xvssrln_bu_h(v16u16_a, v16u16_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvssrln.bu.h( ++ ++ // __lasx_xvssrln_hu_w ++ // xd, xj, xk ++ // UV16HI, UV8SI, UV8SI ++ v16u16_r = __lasx_xvssrln_hu_w(v8u32_a, v8u32_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvssrln.hu.w( ++ ++ // __lasx_xvssrln_wu_d ++ // xd, xj, xk ++ // UV8SI, UV4DI, UV4DI ++ v8u32_r = __lasx_xvssrln_wu_d(v4u64_a, v4u64_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvssrln.wu.d( ++ ++ // __lasx_xvsrlrn_b_h ++ // xd, xj, xk ++ // V32QI, V16HI, V16HI ++ v32i8_r = __lasx_xvsrlrn_b_h(v16i16_a, v16i16_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvsrlrn.b.h( ++ ++ // __lasx_xvsrlrn_h_w ++ // xd, xj, xk ++ // V16HI, V8SI, V8SI ++ v16i16_r = __lasx_xvsrlrn_h_w(v8i32_a, v8i32_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvsrlrn.h.w( ++ ++ // __lasx_xvsrlrn_w_d ++ // xd, xj, xk ++ // V8SI, V4DI, V4DI ++ v8i32_r = __lasx_xvsrlrn_w_d(v4i64_a, v4i64_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvsrlrn.w.d( ++ ++ // __lasx_xvssrlrn_bu_h ++ // xd, xj, xk ++ // UV32QI, UV16HI, UV16HI ++ v32u8_r = __lasx_xvssrlrn_bu_h(v16u16_a, v16u16_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvssrlrn.bu.h( ++ ++ // __lasx_xvssrlrn_hu_w ++ // xd, xj, xk ++ // UV16HI, UV8SI, UV8SI ++ v16u16_r = __lasx_xvssrlrn_hu_w(v8u32_a, v8u32_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvssrlrn.hu.w( ++ ++ // __lasx_xvssrlrn_wu_d ++ // xd, xj, xk ++ // UV8SI, UV4DI, UV4DI ++ v8u32_r = __lasx_xvssrlrn_wu_d(v4u64_a, v4u64_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvssrlrn.wu.d( ++ ++ // __lasx_xvfrstpi_b ++ // xd, xj, ui5 ++ // V32QI, V32QI, V32QI, UQI ++ v32i8_r = __lasx_xvfrstpi_b(v32i8_a, v32i8_b, ui5); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvfrstpi.b( ++ ++ // __lasx_xvfrstpi_h ++ // xd, xj, ui5 ++ // V16HI, V16HI, V16HI, UQI ++ v16i16_r = __lasx_xvfrstpi_h(v16i16_a, v16i16_b, ui5); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvfrstpi.h( ++ ++ // __lasx_xvfrstp_b ++ // xd, xj, xk ++ // V32QI, V32QI, V32QI, V32QI ++ v32i8_r = __lasx_xvfrstp_b(v32i8_a, v32i8_b, v32i8_c); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvfrstp.b( ++ ++ // __lasx_xvfrstp_h ++ // xd, xj, xk ++ // V16HI, V16HI, V16HI, V16HI ++ v16i16_r = __lasx_xvfrstp_h(v16i16_a, v16i16_b, v16i16_c); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvfrstp.h( ++ ++ // __lasx_xvshuf4i_d ++ // xd, xj, ui8 ++ // V4DI, V4DI, V4DI, USI ++ v4i64_r = __lasx_xvshuf4i_d(v4i64_a, v4i64_b, ui8); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvshuf4i.d( ++ ++ // __lasx_xvbsrl_v ++ // xd, xj, ui5 ++ // V32QI, V32QI, UQI ++ v32i8_r = __lasx_xvbsrl_v(v32i8_a, ui5_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvbsrl.v( ++ ++ // __lasx_xvbsll_v ++ // xd, xj, ui5 ++ // V32QI, V32QI, UQI ++ v32i8_r = __lasx_xvbsll_v(v32i8_a, ui5_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvbsll.v( ++ ++ // __lasx_xvextrins_b ++ // xd, xj, ui8 ++ // V32QI, V32QI, V32QI, UQI ++ v32i8_r = __lasx_xvextrins_b(v32i8_a, v32i8_b, ui8); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvextrins.b( ++ ++ // __lasx_xvextrins_h ++ // xd, xj, ui8 ++ // V16HI, V16HI, V16HI, UQI ++ v16i16_r = __lasx_xvextrins_h(v16i16_a, v16i16_b, ui8); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvextrins.h( ++ ++ // __lasx_xvextrins_w ++ // xd, xj, ui8 ++ // V8SI, V8SI, V8SI, UQI ++ v8i32_r = __lasx_xvextrins_w(v8i32_a, v8i32_b, ui8); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvextrins.w( ++ ++ // __lasx_xvextrins_d ++ // xd, xj, ui8 ++ // V4DI, V4DI, V4DI, UQI ++ v4i64_r = __lasx_xvextrins_d(v4i64_a, v4i64_b, ui8); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvextrins.d( ++ ++ // __lasx_xvmskltz_b ++ // xd, xj ++ // V32QI, V32QI ++ v32i8_r = __lasx_xvmskltz_b(v32i8_a); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvmskltz.b( ++ ++ // __lasx_xvmskltz_h ++ // xd, xj ++ // V16HI, V16HI ++ v16i16_r = __lasx_xvmskltz_h(v16i16_a); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvmskltz.h( ++ ++ // __lasx_xvmskltz_w ++ // xd, xj ++ // V8SI, V8SI ++ v8i32_r = __lasx_xvmskltz_w(v8i32_a); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvmskltz.w( ++ ++ // __lasx_xvmskltz_d ++ // xd, xj ++ // V4DI, V4DI ++ v4i64_r = __lasx_xvmskltz_d(v4i64_a); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvmskltz.d( ++ ++ // __lasx_xvsigncov_b ++ // xd, xj, xk ++ // V32QI, V32QI, V32QI ++ v32i8_r = __lasx_xvsigncov_b(v32i8_a, v32i8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvsigncov.b( ++ ++ // __lasx_xvsigncov_h ++ // xd, xj, xk ++ // V16HI, V16HI, V16HI ++ v16i16_r = __lasx_xvsigncov_h(v16i16_a, v16i16_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvsigncov.h( ++ ++ // __lasx_xvsigncov_w ++ // xd, xj, xk ++ // V8SI, V8SI, V8SI ++ v8i32_r = __lasx_xvsigncov_w(v8i32_a, v8i32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvsigncov.w( ++ ++ // __lasx_xvsigncov_d ++ // xd, xj, xk ++ // V4DI, V4DI, V4DI ++ v4i64_r = __lasx_xvsigncov_d(v4i64_a, v4i64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvsigncov.d( ++ ++ // __lasx_xvfmadd_s ++ // xd, xj, xk, xa ++ // V8SF, V8SF, V8SF, V8SF ++ v8f32_r = __lasx_xvfmadd_s(v8f32_a, v8f32_b, v8f32_c); // CHECK: call <8 x float> @llvm.loongarch.lasx.xvfmadd.s( ++ ++ // __lasx_xvfmadd_d ++ // xd, xj, xk, xa ++ // V4DF, V4DF, V4DF, V4DF ++ v4f64_r = __lasx_xvfmadd_d(v4f64_a, v4f64_b, v4f64_c); // CHECK: call <4 x double> @llvm.loongarch.lasx.xvfmadd.d( ++ ++ // __lasx_xvfmsub_s ++ // xd, xj, xk, xa ++ // V8SF, V8SF, V8SF, V8SF ++ v8f32_r = __lasx_xvfmsub_s(v8f32_a, v8f32_b, v8f32_c); // CHECK: call <8 x float> @llvm.loongarch.lasx.xvfmsub.s( ++ ++ // __lasx_xvfmsub_d ++ // xd, xj, xk, xa ++ // V4DF, V4DF, V4DF, V4DF ++ v4f64_r = __lasx_xvfmsub_d(v4f64_a, v4f64_b, v4f64_c); // CHECK: call <4 x double> @llvm.loongarch.lasx.xvfmsub.d( ++ ++ // __lasx_xvfnmadd_s ++ // xd, xj, xk, xa ++ // V8SF, V8SF, V8SF, V8SF ++ v8f32_r = __lasx_xvfnmadd_s(v8f32_a, v8f32_b, v8f32_c); // CHECK: call <8 x float> @llvm.loongarch.lasx.xvfnmadd.s( ++ ++ // __lasx_xvfnmadd_d ++ // xd, xj, xk, xa ++ // V4DF, V4DF, V4DF, V4DF ++ v4f64_r = __lasx_xvfnmadd_d(v4f64_a, v4f64_b, v4f64_c); // CHECK: call <4 x double> @llvm.loongarch.lasx.xvfnmadd.d( ++ ++ // __lasx_xvfnmsub_s ++ // xd, xj, xk, xa ++ // V8SF, V8SF, V8SF, V8SF ++ v8f32_r = __lasx_xvfnmsub_s(v8f32_a, v8f32_b, v8f32_c); // CHECK: call <8 x float> @llvm.loongarch.lasx.xvfnmsub.s( ++ ++ // __lasx_xvfnmsub_d ++ // xd, xj, xk, xa ++ // V4DF, V4DF, V4DF, V4DF ++ v4f64_r = __lasx_xvfnmsub_d(v4f64_a, v4f64_b, v4f64_c); // CHECK: call <4 x double> @llvm.loongarch.lasx.xvfnmsub.d( ++ ++ // __lasx_xvftintrne_w_s ++ // xd, xj ++ // V8SI, V8SF ++ v8i32_r = __lasx_xvftintrne_w_s(v8f32_a); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvftintrne.w.s( ++ ++ // __lasx_xvftintrne_l_d ++ // xd, xj ++ // V4DI, V4DF ++ v4i64_r = __lasx_xvftintrne_l_d(v4f64_a); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvftintrne.l.d( ++ ++ // __lasx_xvftintrp_w_s ++ // xd, xj ++ // V8SI, V8SF ++ v8i32_r = __lasx_xvftintrp_w_s(v8f32_a); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvftintrp.w.s( ++ ++ // __lasx_xvftintrp_l_d ++ // xd, xj ++ // V4DI, V4DF ++ v4i64_r = __lasx_xvftintrp_l_d(v4f64_a); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvftintrp.l.d( ++ ++ // __lasx_xvftintrm_w_s ++ // xd, xj ++ // V8SI, V8SF ++ v8i32_r = __lasx_xvftintrm_w_s(v8f32_a); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvftintrm.w.s( ++ ++ // __lasx_xvftintrm_l_d ++ // xd, xj ++ // V4DI, V4DF ++ v4i64_r = __lasx_xvftintrm_l_d(v4f64_a); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvftintrm.l.d( ++ ++ // __lasx_xvftint_w_d ++ // xd, xj, xk ++ // V8SI, V4DF, V4DF ++ v8i32_r = __lasx_xvftint_w_d(v4f64_a, v4f64_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvftint.w.d( ++ ++ // __lasx_xvffint_s_l ++ // xd, xj, xk ++ // V8SF, V4DI, V4DI ++ v8f32_r = __lasx_xvffint_s_l(v4i64_a, v4i64_b); // CHECK: call <8 x float> @llvm.loongarch.lasx.xvffint.s.l( ++ ++ // __lasx_xvftintrz_w_d ++ // xd, xj, xk ++ // V8SI, V4DF, V4DF ++ v8i32_r = __lasx_xvftintrz_w_d(v4f64_a, v4f64_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvftintrz.w.d( ++ ++ // __lasx_xvftintrp_w_d ++ // xd, xj, xk ++ // V8SI, V4DF, V4DF ++ v8i32_r = __lasx_xvftintrp_w_d(v4f64_a, v4f64_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvftintrp.w.d( ++ ++ // __lasx_xvftintrm_w_d ++ // xd, xj, xk ++ // V8SI, V4DF, V4DF ++ v8i32_r = __lasx_xvftintrm_w_d(v4f64_a, v4f64_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvftintrm.w.d( ++ ++ // __lasx_xvftintrne_w_d ++ // xd, xj, xk ++ // V8SI, V4DF, V4DF ++ v8i32_r = __lasx_xvftintrne_w_d(v4f64_a, v4f64_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvftintrne.w.d( ++ ++ // __lasx_xvftinth_l_s ++ // xd, xj ++ // V4DI, V8SF ++ v4i64_r = __lasx_xvftinth_l_s(v8f32_a); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvftinth.l.s( ++ ++ // __lasx_xvftintl_l_s ++ // xd, xj ++ // V4DI, V8SF ++ v4i64_r = __lasx_xvftintl_l_s(v8f32_a); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvftintl.l.s( ++ ++ // __lasx_xvffinth_d_w ++ // xd, xj ++ // V4DF, V8SI ++ v4f64_r = __lasx_xvffinth_d_w(v8i32_a); // CHECK: call <4 x double> @llvm.loongarch.lasx.xvffinth.d.w( ++ ++ // __lasx_xvffintl_d_w ++ // xd, xj ++ // V4DF, V8SI ++ v4f64_r = __lasx_xvffintl_d_w(v8i32_a); // CHECK: call <4 x double> @llvm.loongarch.lasx.xvffintl.d.w( ++ ++ // __lasx_xvftintrzh_l_s ++ // xd, xj ++ // V4DI, V8SF ++ v4i64_r = __lasx_xvftintrzh_l_s(v8f32_a); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvftintrzh.l.s( ++ ++ // __lasx_xvftintrzl_l_s ++ // xd, xj ++ // V4DI, V8SF ++ v4i64_r = __lasx_xvftintrzl_l_s(v8f32_a); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvftintrzl.l.s( ++ ++ // __lasx_xvftintrph_l_s ++ // xd, xj ++ // V4DI, V8SF ++ v4i64_r = __lasx_xvftintrph_l_s(v8f32_a); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvftintrph.l.s( ++ ++ // __lasx_xvftintrpl_l_s ++ // xd, xj ++ // V4DI, V8SF ++ v4i64_r = __lasx_xvftintrpl_l_s(v8f32_a); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvftintrpl.l.s( ++ ++ // __lasx_xvftintrmh_l_s ++ // xd, xj ++ // V4DI, V8SF ++ v4i64_r = __lasx_xvftintrmh_l_s(v8f32_a); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvftintrmh.l.s( ++ ++ // __lasx_xvftintrml_l_s ++ // xd, xj ++ // V4DI, V8SF ++ v4i64_r = __lasx_xvftintrml_l_s(v8f32_a); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvftintrml.l.s( ++ ++ // __lasx_xvftintrneh_l_s ++ // xd, xj ++ // V4DI, V8SF ++ v4i64_r = __lasx_xvftintrneh_l_s(v8f32_a); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvftintrneh.l.s( ++ ++ // __lasx_xvftintrnel_l_s ++ // xd, xj ++ // V4DI, V8SF ++ v4i64_r = __lasx_xvftintrnel_l_s(v8f32_a); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvftintrnel.l.s( ++ ++ // __lasx_xvfrintrne_s ++ // xd, xj ++ // V8SF, V8SF ++ v8f32_r = __lasx_xvfrintrne_s(v8f32_a); // CHECK: call <8 x float> @llvm.loongarch.lasx.xvfrintrne.s( ++ ++ // __lasx_xvfrintrne_d ++ // xd, xj ++ // V4DF, V4DF ++ v4f64_r = __lasx_xvfrintrne_d(v4f64_a); // CHECK: call <4 x double> @llvm.loongarch.lasx.xvfrintrne.d( ++ ++ // __lasx_xvfrintrz_s ++ // xd, xj ++ // V8SF, V8SF ++ v8f32_r = __lasx_xvfrintrz_s(v8f32_a); // CHECK: call <8 x float> @llvm.loongarch.lasx.xvfrintrz.s( ++ ++ // __lasx_xvfrintrz_d ++ // xd, xj ++ // V4DF, V4DF ++ v4f64_r = __lasx_xvfrintrz_d(v4f64_a); // CHECK: call <4 x double> @llvm.loongarch.lasx.xvfrintrz.d( ++ ++ // __lasx_xvfrintrp_s ++ // xd, xj ++ // V8SF, V8SF ++ v8f32_r = __lasx_xvfrintrp_s(v8f32_a); // CHECK: call <8 x float> @llvm.loongarch.lasx.xvfrintrp.s( ++ ++ // __lasx_xvfrintrp_d ++ // xd, xj ++ // V4DF, V4DF ++ v4f64_r = __lasx_xvfrintrp_d(v4f64_a); // CHECK: call <4 x double> @llvm.loongarch.lasx.xvfrintrp.d( ++ ++ // __lasx_xvfrintrm_s ++ // xd, xj ++ // V8SF, V8SF ++ v8f32_r = __lasx_xvfrintrm_s(v8f32_a); // CHECK: call <8 x float> @llvm.loongarch.lasx.xvfrintrm.s( ++ ++ // __lasx_xvfrintrm_d ++ // xd, xj ++ // V4DF, V4DF ++ v4f64_r = __lasx_xvfrintrm_d(v4f64_a); // CHECK: call <4 x double> @llvm.loongarch.lasx.xvfrintrm.d( ++ ++ // __lasx_xvld ++ // xd, rj, si12 ++ // V32QI, CVPOINTER, SI ++ v32i8_r = __lasx_xvld(&v32i8_a, si12); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvld( ++ ++ // __lasx_xvst ++ // xd, rj, si12 ++ // VOID, V32QI, CVPOINTER, SI ++ __lasx_xvst(v32i8_a, &v32i8_b, si12); // CHECK: call void @llvm.loongarch.lasx.xvst( ++ ++ // __lasx_xvstelm_b ++ // xd, rj, si8, idx ++ // VOID, V32QI, CVPOINTER, SI, UQI ++ __lasx_xvstelm_b(v32i8_a, &v32i8_b, 0, idx4); // CHECK: call void @llvm.loongarch.lasx.xvstelm.b( ++ ++ // __lasx_xvstelm_h ++ // xd, rj, si8, idx ++ // VOID, V16HI, CVPOINTER, SI, UQI ++ __lasx_xvstelm_h(v16i16_a, &v16i16_b, 0, idx3); // CHECK: call void @llvm.loongarch.lasx.xvstelm.h( ++ ++ // __lasx_xvstelm_w ++ // xd, rj, si8, idx ++ // VOID, V8SI, CVPOINTER, SI, UQI ++ __lasx_xvstelm_w(v8i32_a, &v8i32_b, 0, idx2); // CHECK: call void @llvm.loongarch.lasx.xvstelm.w( ++ ++ // __lasx_xvstelm_d ++ // xd, rj, si8, idx ++ // VOID, V4DI, CVPOINTER, SI, UQI ++ __lasx_xvstelm_d(v4i64_a, &v4i64_b, 0, idx1); // CHECK: call void @llvm.loongarch.lasx.xvstelm.d( ++ ++ // __lasx_xvinsve0_w ++ // xd, xj, ui3 ++ // V8SI, V8SI, V8SI, UQI ++ v8i32_r = __lasx_xvinsve0_w(v8i32_a, v8i32_b, 2); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvinsve0.w( ++ ++ // __lasx_xvinsve0_d ++ // xd, xj, ui2 ++ // V4DI, V4DI, V4DI, UQI ++ v4i64_r = __lasx_xvinsve0_d(v4i64_a, v4i64_b, ui2); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvinsve0.d( ++ ++ // __lasx_xvpickve_w ++ // xd, xj, ui3 ++ // V8SI, V8SI, UQI ++ v8i32_r = __lasx_xvpickve_w(v8i32_b, 2); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvpickve.w( ++ ++ // __lasx_xvpickve_d ++ // xd, xj, ui2 ++ // V4DI, V4DI, UQI ++ v4i64_r = __lasx_xvpickve_d(v4i64_b, ui2); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvpickve.d( ++ ++ // __lasx_xvssrlrn_b_h ++ // xd, xj, xk ++ // V32QI, V16HI, V16HI ++ v32i8_r = __lasx_xvssrlrn_b_h(v16i16_a, v16i16_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvssrlrn.b.h( ++ ++ // __lasx_xvssrlrn_h_w ++ // xd, xj, xk ++ // V16HI, V8SI, V8SI ++ v16i16_r = __lasx_xvssrlrn_h_w(v8i32_a, v8i32_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvssrlrn.h.w( ++ ++ // __lasx_xvssrlrn_w_d ++ // xd, xj, xk ++ // V8SI, V4DI, V4DI ++ v8i32_r = __lasx_xvssrlrn_w_d(v4i64_a, v4i64_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvssrlrn.w.d( ++ ++ // __lasx_xvssrln_b_h ++ // xd, xj, xk ++ // V32QI, V16HI, V16HI ++ v32i8_r = __lasx_xvssrln_b_h(v16i16_a, v16i16_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvssrln.b.h( ++ ++ // __lasx_xvssrln_h_w ++ // xd, xj, xk ++ // V16HI, V8SI, V8SI ++ v16i16_r = __lasx_xvssrln_h_w(v8i32_a, v8i32_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvssrln.h.w( ++ ++ // __lasx_xvssrln_w_d ++ // xd, xj, xk ++ // V8SI, V4DI, V4DI ++ v8i32_r = __lasx_xvssrln_w_d(v4i64_a, v4i64_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvssrln.w.d( ++ ++ // __lasx_xvorn_v ++ // xd, xj, xk ++ // V32QI, V32QI, V32QI ++ v32i8_r = __lasx_xvorn_v(v32i8_a, v32i8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvorn.v( ++ ++ // __lasx_xvldi ++ // xd, i13 ++ // V4DI, HI ++ v4i64_r = __lasx_xvldi(i13); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvldi( ++ ++ // __lasx_xvldx ++ // xd, rj, rk ++ // V32QI, CVPOINTER, DI ++ v32i8_r = __lasx_xvldx(&v32i8_a, i64_d); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvldx( ++ ++ // __lasx_xvstx ++ // xd, rj, rk ++ // VOID, V32QI, CVPOINTER, DI ++ __lasx_xvstx(v32i8_a, &v32i8_b, i64_d); // CHECK: call void @llvm.loongarch.lasx.xvstx( ++ ++ // __lasx_xvinsgr2vr_w ++ // xd, rj, ui3 ++ // V8SI, V8SI, SI, UQI ++ v8i32_r = __lasx_xvinsgr2vr_w(v8i32_a, i32_b, ui3); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvinsgr2vr.w( ++ ++ // __lasx_xvinsgr2vr_d ++ // xd, rj, ui2 ++ // V4DI, V4DI, DI, UQI ++ v4i64_r = __lasx_xvinsgr2vr_d(v4i64_a, i64_b, ui2); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvinsgr2vr.d( ++ ++ // __lasx_xvreplve0_b ++ // xd, xj ++ // V32QI, V32QI ++ v32i8_r = __lasx_xvreplve0_b(v32i8_a); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvreplve0.b( ++ ++ // __lasx_xvreplve0_h ++ // xd, xj ++ // V16HI, V16HI ++ v16i16_r = __lasx_xvreplve0_h(v16i16_a); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvreplve0.h( ++ ++ // __lasx_xvreplve0_w ++ // xd, xj ++ // V8SI, V8SI ++ v8i32_r = __lasx_xvreplve0_w(v8i32_a); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvreplve0.w( ++ ++ // __lasx_xvreplve0_d ++ // xd, xj ++ // V4DI, V4DI ++ v4i64_r = __lasx_xvreplve0_d(v4i64_a); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvreplve0.d( ++ ++ // __lasx_xvreplve0_q ++ // xd, xj ++ // V32QI, V32QI ++ v32i8_r = __lasx_xvreplve0_q(v32i8_a); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvreplve0.q( ++ ++ // __lasx_vext2xv_h_b ++ // xd, xj ++ // V16HI, V32QI ++ v16i16_r = __lasx_vext2xv_h_b(v32i8_a); // CHECK: call <16 x i16> @llvm.loongarch.lasx.vext2xv.h.b( ++ ++ // __lasx_vext2xv_w_h ++ // xd, xj ++ // V8SI, V16HI ++ v8i32_r = __lasx_vext2xv_w_h(v16i16_a); // CHECK: call <8 x i32> @llvm.loongarch.lasx.vext2xv.w.h( ++ ++ // __lasx_vext2xv_d_w ++ // xd, xj ++ // V4DI, V8SI ++ v4i64_r = __lasx_vext2xv_d_w(v8i32_a); // CHECK: call <4 x i64> @llvm.loongarch.lasx.vext2xv.d.w( ++ ++ // __lasx_vext2xv_w_b ++ // xd, xj ++ // V8SI, V32QI ++ v8i32_r = __lasx_vext2xv_w_b(v32i8_a); // CHECK: call <8 x i32> @llvm.loongarch.lasx.vext2xv.w.b( ++ ++ //gcc build fail ++ // __lasx_vext2xv_d_h ++ // xd, xj ++ // V4DI, V16HI ++ v4i64_r = __lasx_vext2xv_d_h(v16i16_a); // CHECK: call <4 x i64> @llvm.loongarch.lasx.vext2xv.d.h( ++ ++ // __lasx_vext2xv_d_b ++ // xd, xj ++ // V4DI, V32QI ++ v4i64_r = __lasx_vext2xv_d_b(v32i8_a); // CHECK: call <4 x i64> @llvm.loongarch.lasx.vext2xv.d.b( ++ ++ // __lasx_vext2xv_hu_bu ++ // xd, xj ++ // V16HI, V32QI ++ v16i16_r = __lasx_vext2xv_hu_bu(v32i8_a); // CHECK: call <16 x i16> @llvm.loongarch.lasx.vext2xv.hu.bu( ++ ++ // __lasx_vext2xv_wu_hu ++ // xd, xj ++ // V8SI, V16HI ++ v8i32_r = __lasx_vext2xv_wu_hu(v16i16_a); // CHECK: call <8 x i32> @llvm.loongarch.lasx.vext2xv.wu.hu( ++ ++ // __lasx_vext2xv_du_wu ++ // xd, xj ++ // V4DI, V8SI ++ v4i64_r = __lasx_vext2xv_du_wu(v8i32_a); // CHECK: call <4 x i64> @llvm.loongarch.lasx.vext2xv.du.wu( ++ ++ // __lasx_vext2xv_wu_bu ++ // xd, xj ++ // V8SI, V32QI ++ v8i32_r = __lasx_vext2xv_wu_bu(v32i8_a); // CHECK: call <8 x i32> @llvm.loongarch.lasx.vext2xv.wu.bu( ++ ++ //gcc build fail ++ // __lasx_vext2xv_du_hu ++ // xd, xj ++ // V4DI, V16HI ++ v4i64_r = __lasx_vext2xv_du_hu(v16i16_a); // CHECK: call <4 x i64> @llvm.loongarch.lasx.vext2xv.du.hu( ++ ++ // __lasx_vext2xv_du_bu ++ // xd, xj ++ // V4DI, V32QI ++ v4i64_r = __lasx_vext2xv_du_bu(v32i8_a); // CHECK: call <4 x i64> @llvm.loongarch.lasx.vext2xv.du.bu( ++ ++ // __lasx_xvpermi_q ++ // xd, xj, ui8 ++ // V32QI, V32QI, V32QI, USI ++ v32i8_r = __lasx_xvpermi_q(v32i8_a, v32i8_b, ui8); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvpermi.q( ++ ++ // __lasx_xvpermi_d ++ // xd, xj, ui8 ++ // V4DI, V4DI, USI ++ v4i64_r = __lasx_xvpermi_d(v4i64_a, ui8); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvpermi.d( ++ ++ // __lasx_xvperm_w ++ // xd, xj, xk ++ // V8SI, V8SI, V8SI ++ v8i32_r = __lasx_xvperm_w(v8i32_a, v8i32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvperm.w( ++ ++ // __lasx_xvldrepl_b ++ // xd, rj, si12 ++ // V32QI, CVPOINTER, SI ++ v32i8_r = __lasx_xvldrepl_b(&v32i8_a, si12); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvldrepl.b( ++ ++ // __lasx_xvldrepl_h ++ // xd, rj, si11 ++ // V16HI, CVPOINTER, SI ++ v16i16_r = __lasx_xvldrepl_h(&v16i16_a, si11); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvldrepl.h( ++ ++ // __lasx_xvldrepl_w ++ // xd, rj, si10 ++ // V8SI, CVPOINTER, SI ++ v8i32_r = __lasx_xvldrepl_w(&v8i32_a, si10); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvldrepl.w( ++ ++ // __lasx_xvldrepl_d ++ // xd, rj, si9 ++ // V4DI, CVPOINTER, SI ++ v4i64_r = __lasx_xvldrepl_d(&v4i64_a, si9); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvldrepl.d( ++ ++ // __lasx_xvpickve2gr_w ++ // rd, xj, ui3 ++ // SI, V8SI, UQI ++ i32_r = __lasx_xvpickve2gr_w(v8i32_a, ui3_b); // CHECK: call i32 @llvm.loongarch.lasx.xvpickve2gr.w( ++ ++ // __lasx_xvpickve2gr_wu ++ // rd, xj, ui3 ++ // USI, V8SI, UQI ++ u32_r = __lasx_xvpickve2gr_wu(v8i32_a, ui3_b); // CHECK: call i32 @llvm.loongarch.lasx.xvpickve2gr.wu( ++ ++ // __lasx_xvpickve2gr_d ++ // rd, xj, ui2 ++ // DI, V4DI, UQI ++ i64_r = __lasx_xvpickve2gr_d(v4i64_a, ui2_b); // CHECK: call i64 @llvm.loongarch.lasx.xvpickve2gr.d( ++ ++ // __lasx_xvpickve2gr_du ++ // rd, xj, ui2 ++ // UDI, V4DI, UQI ++ u64_r = __lasx_xvpickve2gr_du(v4i64_a, ui2_b); // CHECK: call i64 @llvm.loongarch.lasx.xvpickve2gr.du( ++ ++ // __lasx_xvaddwev_q_d ++ // xd, xj, xk ++ // V4DI, V4DI, V4DI ++ v4i64_r = __lasx_xvaddwev_q_d(v4i64_a, v4i64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvaddwev.q.d( ++ ++ // __lasx_xvaddwev_d_w ++ // xd, xj, xk ++ // V4DI, V8SI, V8SI ++ v4i64_r = __lasx_xvaddwev_d_w(v8i32_a, v8i32_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvaddwev.d.w( ++ ++ // __lasx_xvaddwev_w_h ++ // xd, xj, xk ++ // V8SI, V16HI, V16HI ++ v8i32_r = __lasx_xvaddwev_w_h(v16i16_a, v16i16_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvaddwev.w.h( ++ ++ // __lasx_xvaddwev_h_b ++ // xd, xj, xk ++ // V16HI, V32QI, V32QI ++ v16i16_r = __lasx_xvaddwev_h_b(v32i8_a, v32i8_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvaddwev.h.b( ++ ++ // __lasx_xvaddwev_q_du ++ // xd, xj, xk ++ // V4DI, UV4DI, UV4DI ++ v4i64_r = __lasx_xvaddwev_q_du(v4u64_a, v4u64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvaddwev.q.du( ++ ++ // __lasx_xvaddwev_d_wu ++ // xd, xj, xk ++ // V4DI, UV8SI, UV8SI ++ v4i64_r = __lasx_xvaddwev_d_wu(v8u32_a, v8u32_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvaddwev.d.wu( ++ ++ // __lasx_xvaddwev_w_hu ++ // xd, xj, xk ++ // V8SI, UV16HI, UV16HI ++ v8i32_r = __lasx_xvaddwev_w_hu(v16u16_a, v16u16_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvaddwev.w.hu( ++ ++ // __lasx_xvaddwev_h_bu ++ // xd, xj, xk ++ // V16HI, UV32QI, UV32QI ++ v16i16_r = __lasx_xvaddwev_h_bu(v32u8_a, v32u8_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvaddwev.h.bu( ++ ++ // __lasx_xvsubwev_q_d ++ // xd, xj, xk ++ // V4DI, V4DI, V4DI ++ v4i64_r = __lasx_xvsubwev_q_d(v4i64_a, v4i64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvsubwev.q.d( ++ ++ // __lasx_xvsubwev_d_w ++ // xd, xj, xk ++ // V4DI, V8SI, V8SI ++ v4i64_r = __lasx_xvsubwev_d_w(v8i32_a, v8i32_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvsubwev.d.w( ++ ++ // __lasx_xvsubwev_w_h ++ // xd, xj, xk ++ // V8SI, V16HI, V16HI ++ v8i32_r = __lasx_xvsubwev_w_h(v16i16_a, v16i16_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvsubwev.w.h( ++ ++ // __lasx_xvsubwev_h_b ++ // xd, xj, xk ++ // V16HI, V32QI, V32QI ++ v16i16_r = __lasx_xvsubwev_h_b(v32i8_a, v32i8_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvsubwev.h.b( ++ ++ // __lasx_xvsubwev_q_du ++ // xd, xj, xk ++ // V4DI, UV4DI, UV4DI ++ v4i64_r = __lasx_xvsubwev_q_du(v4u64_a, v4u64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvsubwev.q.du( ++ ++ // __lasx_xvsubwev_d_wu ++ // xd, xj, xk ++ // V4DI, UV8SI, UV8SI ++ v4i64_r = __lasx_xvsubwev_d_wu(v8u32_a, v8u32_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvsubwev.d.wu( ++ ++ // __lasx_xvsubwev_w_hu ++ // xd, xj, xk ++ // V8SI, UV16HI, UV16HI ++ v8i32_r = __lasx_xvsubwev_w_hu(v16u16_a, v16u16_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvsubwev.w.hu( ++ ++ // __lasx_xvsubwev_h_bu ++ // xd, xj, xk ++ // V16HI, UV32QI, UV32QI ++ v16i16_r = __lasx_xvsubwev_h_bu(v32u8_a, v32u8_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvsubwev.h.bu( ++ ++ // __lasx_xvmulwev_q_d ++ // xd, xj, xk ++ // V4DI, V4DI, V4DI ++ v4i64_r = __lasx_xvmulwev_q_d(v4i64_a, v4i64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvmulwev.q.d( ++ ++ // __lasx_xvmulwev_d_w ++ // xd, xj, xk ++ // V4DI, V8SI, V8SI ++ v4i64_r = __lasx_xvmulwev_d_w(v8i32_a, v8i32_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvmulwev.d.w( ++ ++ // __lasx_xvmulwev_w_h ++ // xd, xj, xk ++ // V8SI, V16HI, V16HI ++ v8i32_r = __lasx_xvmulwev_w_h(v16i16_a, v16i16_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvmulwev.w.h( ++ ++ // __lasx_xvmulwev_h_b ++ // xd, xj, xk ++ // V16HI, V32QI, V32QI ++ v16i16_r = __lasx_xvmulwev_h_b(v32i8_a, v32i8_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvmulwev.h.b( ++ ++ // __lasx_xvmulwev_q_du ++ // xd, xj, xk ++ // V4DI, UV4DI, UV4DI ++ v4i64_r = __lasx_xvmulwev_q_du(v4u64_a, v4u64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvmulwev.q.du( ++ ++ // __lasx_xvmulwev_d_wu ++ // xd, xj, xk ++ // V4DI, UV8SI, UV8SI ++ v4i64_r = __lasx_xvmulwev_d_wu(v8u32_a, v8u32_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvmulwev.d.wu( ++ ++ // __lasx_xvmulwev_w_hu ++ // xd, xj, xk ++ // V8SI, UV16HI, UV16HI ++ v8i32_r = __lasx_xvmulwev_w_hu(v16u16_a, v16u16_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvmulwev.w.hu( ++ ++ // __lasx_xvmulwev_h_bu ++ // xd, xj, xk ++ // V16HI, UV32QI, UV32QI ++ v16i16_r = __lasx_xvmulwev_h_bu(v32u8_a, v32u8_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvmulwev.h.bu( ++ ++ // __lasx_xvaddwod_q_d ++ // xd, xj, xk ++ // V4DI, V4DI, V4DI ++ v4i64_r = __lasx_xvaddwod_q_d(v4i64_a, v4i64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvaddwod.q.d( ++ ++ // __lasx_xvaddwod_d_w ++ // xd, xj, xk ++ // V4DI, V8SI, V8SI ++ v4i64_r = __lasx_xvaddwod_d_w(v8i32_a, v8i32_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvaddwod.d.w( ++ ++ // __lasx_xvaddwod_w_h ++ // xd, xj, xk ++ // V8SI, V16HI, V16HI ++ v8i32_r = __lasx_xvaddwod_w_h(v16i16_a, v16i16_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvaddwod.w.h( ++ ++ // __lasx_xvaddwod_h_b ++ // xd, xj, xk ++ // V16HI, V32QI, V32QI ++ v16i16_r = __lasx_xvaddwod_h_b(v32i8_a, v32i8_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvaddwod.h.b( ++ ++ // __lasx_xvaddwod_q_du ++ // xd, xj, xk ++ // V4DI, UV4DI, UV4DI ++ v4i64_r = __lasx_xvaddwod_q_du(v4u64_a, v4u64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvaddwod.q.du( ++ ++ // __lasx_xvaddwod_d_wu ++ // xd, xj, xk ++ // V4DI, UV8SI, UV8SI ++ v4i64_r = __lasx_xvaddwod_d_wu(v8u32_a, v8u32_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvaddwod.d.wu( ++ ++ // __lasx_xvaddwod_w_hu ++ // xd, xj, xk ++ // V8SI, UV16HI, UV16HI ++ v8i32_r = __lasx_xvaddwod_w_hu(v16u16_a, v16u16_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvaddwod.w.hu( ++ ++ // __lasx_xvaddwod_h_bu ++ // xd, xj, xk ++ // V16HI, UV32QI, UV32QI ++ v16i16_r = __lasx_xvaddwod_h_bu(v32u8_a, v32u8_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvaddwod.h.bu( ++ ++ // __lasx_xvsubwod_q_d ++ // xd, xj, xk ++ // V4DI, V4DI, V4DI ++ v4i64_r = __lasx_xvsubwod_q_d(v4i64_a, v4i64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvsubwod.q.d( ++ ++ // __lasx_xvsubwod_d_w ++ // xd, xj, xk ++ // V4DI, V8SI, V8SI ++ v4i64_r = __lasx_xvsubwod_d_w(v8i32_a, v8i32_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvsubwod.d.w( ++ ++ // __lasx_xvsubwod_w_h ++ // xd, xj, xk ++ // V8SI, V16HI, V16HI ++ v8i32_r = __lasx_xvsubwod_w_h(v16i16_a, v16i16_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvsubwod.w.h( ++ ++ // __lasx_xvsubwod_h_b ++ // xd, xj, xk ++ // V16HI, V32QI, V32QI ++ v16i16_r = __lasx_xvsubwod_h_b(v32i8_a, v32i8_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvsubwod.h.b( ++ ++ // __lasx_xvsubwod_q_du ++ // xd, xj, xk ++ // V4DI, UV4DI, UV4DI ++ v4i64_r = __lasx_xvsubwod_q_du(v4u64_a, v4u64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvsubwod.q.du( ++ ++ // __lasx_xvsubwod_d_wu ++ // xd, xj, xk ++ // V4DI, UV8SI, UV8SI ++ v4i64_r = __lasx_xvsubwod_d_wu(v8u32_a, v8u32_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvsubwod.d.wu( ++ ++ // __lasx_xvsubwod_w_hu ++ // xd, xj, xk ++ // V8SI, UV16HI, UV16HI ++ v8i32_r = __lasx_xvsubwod_w_hu(v16u16_a, v16u16_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvsubwod.w.hu( ++ ++ // __lasx_xvsubwod_h_bu ++ // xd, xj, xk ++ // V16HI, UV32QI, UV32QI ++ v16i16_r = __lasx_xvsubwod_h_bu(v32u8_a, v32u8_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvsubwod.h.bu( ++ ++ // __lasx_xvmulwod_q_d ++ // xd, xj, xk ++ // V4DI, V4DI, V4DI ++ v4i64_r = __lasx_xvmulwod_q_d(v4i64_a, v4i64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvmulwod.q.d( ++ ++ // __lasx_xvmulwod_d_w ++ // xd, xj, xk ++ // V4DI, V8SI, V8SI ++ v4i64_r = __lasx_xvmulwod_d_w(v8i32_a, v8i32_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvmulwod.d.w( ++ ++ // __lasx_xvmulwod_w_h ++ // xd, xj, xk ++ // V8SI, V16HI, V16HI ++ v8i32_r = __lasx_xvmulwod_w_h(v16i16_a, v16i16_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvmulwod.w.h( ++ ++ // __lasx_xvmulwod_h_b ++ // xd, xj, xk ++ // V16HI, V32QI, V32QI ++ v16i16_r = __lasx_xvmulwod_h_b(v32i8_a, v32i8_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvmulwod.h.b( ++ ++ // __lasx_xvmulwod_q_du ++ // xd, xj, xk ++ // V4DI, UV4DI, UV4DI ++ v4i64_r = __lasx_xvmulwod_q_du(v4u64_a, v4u64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvmulwod.q.du( ++ ++ // __lasx_xvmulwod_d_wu ++ // xd, xj, xk ++ // V4DI, UV8SI, UV8SI ++ v4i64_r = __lasx_xvmulwod_d_wu(v8u32_a, v8u32_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvmulwod.d.wu( ++ ++ // __lasx_xvmulwod_w_hu ++ // xd, xj, xk ++ // V8SI, UV16HI, UV16HI ++ v8i32_r = __lasx_xvmulwod_w_hu(v16u16_a, v16u16_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvmulwod.w.hu( ++ ++ // __lasx_xvmulwod_h_bu ++ // xd, xj, xk ++ // V16HI, UV32QI, UV32QI ++ v16i16_r = __lasx_xvmulwod_h_bu(v32u8_a, v32u8_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvmulwod.h.bu( ++ ++ // __lasx_xvaddwev_d_wu_w ++ // xd, xj, xk ++ // V4DI, UV8SI, V8SI ++ v4i64_r = __lasx_xvaddwev_d_wu_w(v8u32_a, v8i32_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvaddwev.d.wu.w( ++ ++ // __lasx_xvaddwev_w_hu_h ++ // xd, xj, xk ++ // V8SI, UV16HI, V16HI ++ v8i32_r = __lasx_xvaddwev_w_hu_h(v16u16_a, v16i16_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvaddwev.w.hu.h( ++ ++ // __lasx_xvaddwev_h_bu_b ++ // xd, xj, xk ++ // V16HI, UV32QI, V32QI ++ v16i16_r = __lasx_xvaddwev_h_bu_b(v32u8_a, v32i8_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvaddwev.h.bu.b( ++ ++ // __lasx_xvmulwev_d_wu_w ++ // xd, xj, xk ++ // V4DI, UV8SI, V8SI ++ v4i64_r = __lasx_xvmulwev_d_wu_w(v8u32_a, v8i32_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvmulwev.d.wu.w( ++ ++ // __lasx_xvmulwev_w_hu_h ++ // xd, xj, xk ++ // V8SI, UV16HI, V16HI ++ v8i32_r = __lasx_xvmulwev_w_hu_h(v16u16_a, v16i16_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvmulwev.w.hu.h( ++ ++ // __lasx_xvmulwev_h_bu_b ++ // xd, xj, xk ++ // V16HI, UV32QI, V32QI ++ v16i16_r = __lasx_xvmulwev_h_bu_b(v32u8_a, v32i8_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvmulwev.h.bu.b( ++ ++ // __lasx_xvaddwod_d_wu_w ++ // xd, xj, xk ++ // V4DI, UV8SI, V8SI ++ v4i64_r = __lasx_xvaddwod_d_wu_w(v8u32_a, v8i32_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvaddwod.d.wu.w( ++ ++ // __lasx_xvaddwod_w_hu_h ++ // xd, xj, xk ++ // V8SI, UV16HI, V16HI ++ v8i32_r = __lasx_xvaddwod_w_hu_h(v16u16_a, v16i16_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvaddwod.w.hu.h( ++ ++ // __lasx_xvaddwod_h_bu_b ++ // xd, xj, xk ++ // V16HI, UV32QI, V32QI ++ v16i16_r = __lasx_xvaddwod_h_bu_b(v32u8_a, v32i8_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvaddwod.h.bu.b( ++ ++ // __lasx_xvmulwod_d_wu_w ++ // xd, xj, xk ++ // V4DI, UV8SI, V8SI ++ v4i64_r = __lasx_xvmulwod_d_wu_w(v8u32_a, v8i32_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvmulwod.d.wu.w( ++ ++ // __lasx_xvmulwod_w_hu_h ++ // xd, xj, xk ++ // V8SI, UV16HI, V16HI ++ v8i32_r = __lasx_xvmulwod_w_hu_h(v16u16_a, v16i16_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvmulwod.w.hu.h( ++ ++ // __lasx_xvmulwod_h_bu_b ++ // xd, xj, xk ++ // V16HI, UV32QI, V32QI ++ v16i16_r = __lasx_xvmulwod_h_bu_b(v32u8_a, v32i8_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvmulwod.h.bu.b( ++ ++ // __lasx_xvhaddw_q_d ++ // xd, xj, xk ++ // V4DI, V4DI, V4DI ++ v4i64_r = __lasx_xvhaddw_q_d(v4i64_a, v4i64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvhaddw.q.d( ++ ++ // __lasx_xvhaddw_qu_du ++ // xd, xj, xk ++ // UV4DI, UV4DI, UV4DI ++ v4u64_r = __lasx_xvhaddw_qu_du(v4u64_a, v4u64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvhaddw.qu.du( ++ ++ // __lasx_xvhsubw_q_d ++ // xd, xj, xk ++ // V4DI, V4DI, V4DI ++ v4i64_r = __lasx_xvhsubw_q_d(v4i64_a, v4i64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvhsubw.q.d( ++ ++ // __lasx_xvhsubw_qu_du ++ // xd, xj, xk ++ // UV4DI, UV4DI, UV4DI ++ v4u64_r = __lasx_xvhsubw_qu_du(v4u64_a, v4u64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvhsubw.qu.du( ++ ++ // __lasx_xvmaddwev_q_d ++ // xd, xj, xk ++ // V4DI, V4DI, V4DI, V4DI ++ v4i64_r = __lasx_xvmaddwev_q_d(v4i64_a, v4i64_b, v4i64_c); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvmaddwev.q.d( ++ ++ // __lasx_xvmaddwev_d_w ++ // xd, xj, xk ++ // V4DI, V4DI, V8SI, V8SI ++ v4i64_r = __lasx_xvmaddwev_d_w(v4i64_a, v8i32_b, v8i32_c); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvmaddwev.d.w( ++ ++ // __lasx_xvmaddwev_w_h ++ // xd, xj, xk ++ // V8SI, V8SI, V16HI, V16HI ++ v8i32_r = __lasx_xvmaddwev_w_h(v8i32_a, v16i16_b, v16i16_c); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvmaddwev.w.h( ++ ++ // __lasx_xvmaddwev_h_b ++ // xd, xj, xk ++ // V16HI, V16HI, V32QI, V32QI ++ v16i16_r = __lasx_xvmaddwev_h_b(v16i16_a, v32i8_b, v32i8_c); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvmaddwev.h.b( ++ ++ // __lasx_xvmaddwev_q_du ++ // xd, xj, xk ++ // UV4DI, UV4DI, UV4DI, UV4DI ++ v4u64_r = __lasx_xvmaddwev_q_du(v4u64_a, v4u64_b, v4u64_c); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvmaddwev.q.du( ++ ++ // __lasx_xvmaddwev_d_wu ++ // xd, xj, xk ++ // UV4DI, UV4DI, UV8SI, UV8SI ++ v4u64_r = __lasx_xvmaddwev_d_wu(v4u64_a, v8u32_b, v8u32_c); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvmaddwev.d.wu( ++ ++ // __lasx_xvmaddwev_w_hu ++ // xd, xj, xk ++ // UV8SI, UV8SI, UV16HI, UV16HI ++ v8u32_r = __lasx_xvmaddwev_w_hu(v8u32_a, v16u16_b, v16u16_c); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvmaddwev.w.hu( ++ ++ // __lasx_xvmaddwev_h_bu ++ // xd, xj, xk ++ // UV16HI, UV16HI, UV32QI, UV32QI ++ v16u16_r = __lasx_xvmaddwev_h_bu(v16u16_a, v32u8_b, v32u8_c); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvmaddwev.h.bu( ++ ++ // __lasx_xvmaddwod_q_d ++ // xd, xj, xk ++ // V4DI, V4DI, V4DI, V4DI ++ v4i64_r = __lasx_xvmaddwod_q_d(v4i64_a, v4i64_b, v4i64_c); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvmaddwod.q.d( ++ ++ // __lasx_xvmaddwod_d_w ++ // xd, xj, xk ++ // V4DI, V4DI, V8SI, V8SI ++ v4i64_r = __lasx_xvmaddwod_d_w(v4i64_a, v8i32_b, v8i32_c); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvmaddwod.d.w( ++ ++ // __lasx_xvmaddwod_w_h ++ // xd, xj, xk ++ // V8SI, V8SI, V16HI, V16HI ++ v8i32_r = __lasx_xvmaddwod_w_h(v8i32_a, v16i16_b, v16i16_c); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvmaddwod.w.h( ++ ++ // __lasx_xvmaddwod_h_b ++ // xd, xj, xk ++ // V16HI, V16HI, V32QI, V32QI ++ v16i16_r = __lasx_xvmaddwod_h_b(v16i16_a, v32i8_b, v32i8_c); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvmaddwod.h.b( ++ ++ // __lasx_xvmaddwod_q_du ++ // xd, xj, xk ++ // UV4DI, UV4DI, UV4DI, UV4DI ++ v4u64_r = __lasx_xvmaddwod_q_du(v4u64_a, v4u64_b, v4u64_c); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvmaddwod.q.du( ++ ++ // __lasx_xvmaddwod_d_wu ++ // xd, xj, xk ++ // UV4DI, UV4DI, UV8SI, UV8SI ++ v4u64_r = __lasx_xvmaddwod_d_wu(v4u64_a, v8u32_b, v8u32_c); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvmaddwod.d.wu( ++ ++ // __lasx_xvmaddwod_w_hu ++ // xd, xj, xk ++ // UV8SI, UV8SI, UV16HI, UV16HI ++ v8u32_r = __lasx_xvmaddwod_w_hu(v8u32_a, v16u16_b, v16u16_c); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvmaddwod.w.hu( ++ ++ // __lasx_xvmaddwod_h_bu ++ // xd, xj, xk ++ // UV16HI, UV16HI, UV32QI, UV32QI ++ v16u16_r = __lasx_xvmaddwod_h_bu(v16u16_a, v32u8_b, v32u8_c); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvmaddwod.h.bu( ++ ++ // __lasx_xvmaddwev_q_du_d ++ // xd, xj, xk ++ // V4DI, V4DI, UV4DI, V4DI ++ v4i64_r = __lasx_xvmaddwev_q_du_d(v4i64_a, v4u64_b, v4i64_c); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvmaddwev.q.du.d( ++ ++ // __lasx_xvmaddwev_d_wu_w ++ // xd, xj, xk ++ // V4DI, V4DI, UV8SI, V8SI ++ v4i64_r = __lasx_xvmaddwev_d_wu_w(v4i64_a, v8u32_b, v8i32_c); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvmaddwev.d.wu.w( ++ ++ // __lasx_xvmaddwev_w_hu_h ++ // xd, xj, xk ++ // V8SI, V8SI, UV16HI, V16HI ++ v8i32_r = __lasx_xvmaddwev_w_hu_h(v8i32_a, v16u16_b, v16i16_c); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvmaddwev.w.hu.h( ++ ++ // __lasx_xvmaddwev_h_bu_b ++ // xd, xj, xk ++ // V16HI, V16HI, UV32QI, V32QI ++ v16i16_r = __lasx_xvmaddwev_h_bu_b(v16i16_a, v32u8_b, v32i8_c); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvmaddwev.h.bu.b( ++ ++ // __lasx_xvmaddwod_q_du_d ++ // xd, xj, xk ++ // V4DI, V4DI, UV4DI, V4DI ++ v4i64_r = __lasx_xvmaddwod_q_du_d(v4i64_a, v4u64_b, v4i64_c); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvmaddwod.q.du.d( ++ ++ // __lasx_xvmaddwod_d_wu_w ++ // xd, xj, xk ++ // V4DI, V4DI, UV8SI, V8SI ++ v4i64_r = __lasx_xvmaddwod_d_wu_w(v4i64_a, v8u32_b, v8i32_c); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvmaddwod.d.wu.w( ++ ++ // __lasx_xvmaddwod_w_hu_h ++ // xd, xj, xk ++ // V8SI, V8SI, UV16HI, V16HI ++ v8i32_r = __lasx_xvmaddwod_w_hu_h(v8i32_a, v16u16_b, v16i16_c); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvmaddwod.w.hu.h( ++ ++ // __lasx_xvmaddwod_h_bu_b ++ // xd, xj, xk ++ // V16HI, V16HI, UV32QI, V32QI ++ v16i16_r = __lasx_xvmaddwod_h_bu_b(v16i16_a, v32u8_b, v32i8_c); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvmaddwod.h.bu.b( ++ ++ // __lasx_xvrotr_b ++ // xd, xj, xk ++ // V32QI, V32QI, V32QI ++ v32i8_r = __lasx_xvrotr_b(v32i8_a, v32i8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvrotr.b( ++ ++ // __lasx_xvrotr_h ++ // xd, xj, xk ++ // V16HI, V16HI, V16HI ++ v16i16_r = __lasx_xvrotr_h(v16i16_a, v16i16_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvrotr.h( ++ ++ // __lasx_xvrotr_w ++ // xd, xj, xk ++ // V8SI, V8SI, V8SI ++ v8i32_r = __lasx_xvrotr_w(v8i32_a, v8i32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvrotr.w( ++ ++ // __lasx_xvrotr_d ++ // xd, xj, xk ++ // V4DI, V4DI, V4DI ++ v4i64_r = __lasx_xvrotr_d(v4i64_a, v4i64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvrotr.d( ++ ++ // __lasx_xvadd_q ++ // xd, xj, xk ++ // V4DI, V4DI, V4DI ++ v4i64_r = __lasx_xvadd_q(v4i64_a, v4i64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvadd.q( ++ ++ // __lasx_xvsub_q ++ // xd, xj, xk ++ // V4DI, V4DI, V4DI ++ v4i64_r = __lasx_xvsub_q(v4i64_a, v4i64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvsub.q( ++ ++ // __lasx_xvaddwev_q_du_d ++ // xd, xj, xk ++ // V4DI, UV4DI, V4DI ++ v4i64_r = __lasx_xvaddwev_q_du_d(v4u64_a, v4i64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvaddwev.q.du.d( ++ ++ // __lasx_xvaddwod_q_du_d ++ // xd, xj, xk ++ // V4DI, UV4DI, V4DI ++ v4i64_r = __lasx_xvaddwod_q_du_d(v4u64_a, v4i64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvaddwod.q.du.d( ++ ++ // __lasx_xvmulwev_q_du_d ++ // xd, xj, xk ++ // V4DI, UV4DI, V4DI ++ v4i64_r = __lasx_xvmulwev_q_du_d(v4u64_a, v4i64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvmulwev.q.du.d( ++ ++ // __lasx_xvmulwod_q_du_d ++ // xd, xj, xk ++ // V4DI, UV4DI, V4DI ++ v4i64_r = __lasx_xvmulwod_q_du_d(v4u64_a, v4i64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvmulwod.q.du.d( ++ ++ // __lasx_xvmskgez_b ++ // xd, xj ++ // V32QI, V32QI ++ v32i8_r = __lasx_xvmskgez_b(v32i8_a); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvmskgez.b( ++ ++ // __lasx_xvmsknz_b ++ // xd, xj ++ // V32QI, V32QI ++ v32i8_r = __lasx_xvmsknz_b(v32i8_a); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvmsknz.b( ++ ++ // __lasx_xvexth_h_b ++ // xd, xj ++ // V16HI, V32QI ++ v16i16_r = __lasx_xvexth_h_b(v32i8_a); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvexth.h.b( ++ ++ // __lasx_xvexth_w_h ++ // xd, xj ++ // V8SI, V16HI ++ v8i32_r = __lasx_xvexth_w_h(v16i16_a); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvexth.w.h( ++ ++ // __lasx_xvexth_d_w ++ // xd, xj ++ // V4DI, V8SI ++ v4i64_r = __lasx_xvexth_d_w(v8i32_a); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvexth.d.w( ++ ++ // __lasx_xvexth_q_d ++ // xd, xj ++ // V4DI, V4DI ++ v4i64_r = __lasx_xvexth_q_d(v4i64_a); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvexth.q.d( ++ ++ // __lasx_xvexth_hu_bu ++ // xd, xj ++ // UV16HI, UV32QI ++ v16u16_r = __lasx_xvexth_hu_bu(v32u8_a); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvexth.hu.bu( ++ ++ // __lasx_xvexth_wu_hu ++ // xd, xj ++ // UV8SI, UV16HI ++ v8u32_r = __lasx_xvexth_wu_hu(v16u16_a); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvexth.wu.hu( ++ ++ // __lasx_xvexth_du_wu ++ // xd, xj ++ // UV4DI, UV8SI ++ v4u64_r = __lasx_xvexth_du_wu(v8u32_a); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvexth.du.wu( ++ ++ // __lasx_xvexth_qu_du ++ // xd, xj ++ // UV4DI, UV4DI ++ v4u64_r = __lasx_xvexth_qu_du(v4u64_a); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvexth.qu.du( ++ ++ // __lasx_xvextl_q_d ++ // xd, xj ++ // V4DI, V4DI ++ v4i64_r = __lasx_xvextl_q_d(v4i64_a); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvextl.q.d( ++ ++ // __lasx_xvextl_qu_du ++ // xd, xj ++ // UV4DI, UV4DI ++ v4u64_r = __lasx_xvextl_qu_du(v4u64_a); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvextl.qu.du( ++ ++ // __lasx_xvrotri_b ++ // xd, xj, ui3 ++ // V32QI, V32QI, UQI ++ v32i8_r = __lasx_xvrotri_b(v32i8_a, ui3_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvrotri.b( ++ ++ // __lasx_xvrotri_h ++ // xd, xj, ui4 ++ // V16HI, V16HI, UQI ++ v16i16_r = __lasx_xvrotri_h(v16i16_a, ui4_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvrotri.h( ++ ++ // __lasx_xvrotri_w ++ // xd, xj, ui5 ++ // V8SI, V8SI, UQI ++ v8i32_r = __lasx_xvrotri_w(v8i32_a, ui5_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvrotri.w( ++ ++ // __lasx_xvrotri_d ++ // xd, xj, ui6 ++ // V4DI, V4DI, UQI ++ v4i64_r = __lasx_xvrotri_d(v4i64_a, ui6_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvrotri.d( ++ ++ // __lasx_xvsrlni_b_h ++ // xd, xj, ui4 ++ // V32QI, V32QI, V32QI, USI ++ v32i8_r = __lasx_xvsrlni_b_h(v32i8_a, v32i8_b, ui4); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvsrlni.b.h( ++ ++ // __lasx_xvsrlni_h_w ++ // xd, xj, ui5 ++ // V16HI, V16HI, V16HI, USI ++ v16i16_r = __lasx_xvsrlni_h_w(v16i16_a, v16i16_b, ui5); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvsrlni.h.w( ++ ++ // __lasx_xvsrlni_w_d ++ // xd, xj, ui6 ++ // V8SI, V8SI, V8SI, USI ++ v8i32_r = __lasx_xvsrlni_w_d(v8i32_a, v8i32_b, ui6); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvsrlni.w.d( ++ ++ // __lasx_xvsrlni_d_q ++ // xd, xj, ui7 ++ // V4DI, V4DI, V4DI, USI ++ v4i64_r = __lasx_xvsrlni_d_q(v4i64_a, v4i64_b, ui7); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvsrlni.d.q( ++ ++ // __lasx_xvsrlrni_b_h ++ // xd, xj, ui4 ++ // V32QI, V32QI, V32QI, USI ++ v32i8_r = __lasx_xvsrlrni_b_h(v32i8_a, v32i8_b, ui4); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvsrlrni.b.h( ++ ++ // __lasx_xvsrlrni_h_w ++ // xd, xj, ui5 ++ // V16HI, V16HI, V16HI, USI ++ v16i16_r = __lasx_xvsrlrni_h_w(v16i16_a, v16i16_b, ui5); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvsrlrni.h.w( ++ ++ // __lasx_xvsrlrni_w_d ++ // xd, xj, ui6 ++ // V8SI, V8SI, V8SI, USI ++ v8i32_r = __lasx_xvsrlrni_w_d(v8i32_a, v8i32_b, ui6); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvsrlrni.w.d( ++ ++ // __lasx_xvsrlrni_d_q ++ // xd, xj, ui7 ++ // V4DI, V4DI, V4DI, USI ++ v4i64_r = __lasx_xvsrlrni_d_q(v4i64_a, v4i64_b, ui7); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvsrlrni.d.q( ++ ++ // __lasx_xvssrlni_b_h ++ // xd, xj, ui4 ++ // V32QI, V32QI, V32QI, USI ++ v32i8_r = __lasx_xvssrlni_b_h(v32i8_a, v32i8_b, ui4); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvssrlni.b.h( ++ ++ // __lasx_xvssrlni_h_w ++ // xd, xj, ui5 ++ // V16HI, V16HI, V16HI, USI ++ v16i16_r = __lasx_xvssrlni_h_w(v16i16_a, v16i16_b, ui5); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvssrlni.h.w( ++ ++ // __lasx_xvssrlni_w_d ++ // xd, xj, ui6 ++ // V8SI, V8SI, V8SI, USI ++ v8i32_r = __lasx_xvssrlni_w_d(v8i32_a, v8i32_b, ui6); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvssrlni.w.d( ++ ++ // __lasx_xvssrlni_d_q ++ // xd, xj, ui7 ++ // V4DI, V4DI, V4DI, USI ++ v4i64_r = __lasx_xvssrlni_d_q(v4i64_a, v4i64_b, ui7); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvssrlni.d.q( ++ ++ // __lasx_xvssrlni_bu_h ++ // xd, xj, ui4 ++ // UV32QI, UV32QI, V32QI, USI ++ v32u8_r = __lasx_xvssrlni_bu_h(v32u8_a, v32i8_b, ui4); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvssrlni.bu.h( ++ ++ // __lasx_xvssrlni_hu_w ++ // xd, xj, ui5 ++ // UV16HI, UV16HI, V16HI, USI ++ v16u16_r = __lasx_xvssrlni_hu_w(v16u16_a, v16i16_b, ui5); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvssrlni.hu.w( ++ ++ // __lasx_xvssrlni_wu_d ++ // xd, xj, ui6 ++ // UV8SI, UV8SI, V8SI, USI ++ v8u32_r = __lasx_xvssrlni_wu_d(v8u32_a, v8i32_b, ui6); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvssrlni.wu.d( ++ ++ // __lasx_xvssrlni_du_q ++ // xd, xj, ui7 ++ // UV4DI, UV4DI, V4DI, USI ++ v4u64_r = __lasx_xvssrlni_du_q(v4u64_a, v4i64_b, ui7); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvssrlni.du.q( ++ ++ // __lasx_xvssrlrni_b_h ++ // xd, xj, ui4 ++ // V32QI, V32QI, V32QI, USI ++ v32i8_r = __lasx_xvssrlrni_b_h(v32i8_a, v32i8_b, ui4); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvssrlrni.b.h( ++ ++ // __lasx_xvssrlrni_h_w ++ // xd, xj, ui5 ++ // V16HI, V16HI, V16HI, USI ++ v16i16_r = __lasx_xvssrlrni_h_w(v16i16_a, v16i16_b, ui5); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvssrlrni.h.w( ++ ++ // __lasx_xvssrlrni_w_d ++ // xd, xj, ui6 ++ // V8SI, V8SI, V8SI, USI ++ v8i32_r = __lasx_xvssrlrni_w_d(v8i32_a, v8i32_b, ui6); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvssrlrni.w.d( ++ ++ // __lasx_xvssrlrni_d_q ++ // xd, xj, ui7 ++ // V4DI, V4DI, V4DI, USI ++ v4i64_r = __lasx_xvssrlrni_d_q(v4i64_a, v4i64_b, ui7); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvssrlrni.d.q( ++ ++ // __lasx_xvssrlrni_bu_h ++ // xd, xj, ui4 ++ // UV32QI, UV32QI, V32QI, USI ++ v32u8_r = __lasx_xvssrlrni_bu_h(v32u8_a, v32i8_b, ui4); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvssrlrni.bu.h( ++ ++ // __lasx_xvssrlrni_hu_w ++ // xd, xj, ui5 ++ // UV16HI, UV16HI, V16HI, USI ++ v16u16_r = __lasx_xvssrlrni_hu_w(v16u16_a, v16i16_b, ui5); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvssrlrni.hu.w( ++ ++ // __lasx_xvssrlrni_wu_d ++ // xd, xj, ui6 ++ // UV8SI, UV8SI, V8SI, USI ++ v8u32_r = __lasx_xvssrlrni_wu_d(v8u32_a, v8i32_b, ui6); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvssrlrni.wu.d( ++ ++ // __lasx_xvssrlrni_du_q ++ // xd, xj, ui7 ++ // UV4DI, UV4DI, V4DI, USI ++ v4u64_r = __lasx_xvssrlrni_du_q(v4u64_a, v4i64_b, ui7); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvssrlrni.du.q( ++ ++ // __lasx_xvsrani_b_h ++ // xd, xj, ui4 ++ // V32QI, V32QI, V32QI, USI ++ v32i8_r = __lasx_xvsrani_b_h(v32i8_a, v32i8_b, ui4); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvsrani.b.h( ++ ++ // __lasx_xvsrani_h_w ++ // xd, xj, ui5 ++ // V16HI, V16HI, V16HI, USI ++ v16i16_r = __lasx_xvsrani_h_w(v16i16_a, v16i16_b, ui5); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvsrani.h.w( ++ ++ // __lasx_xvsrani_w_d ++ // xd, xj, ui6 ++ // V8SI, V8SI, V8SI, USI ++ v8i32_r = __lasx_xvsrani_w_d(v8i32_a, v8i32_b, ui6); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvsrani.w.d( ++ ++ // __lasx_xvsrani_d_q ++ // xd, xj, ui7 ++ // V4DI, V4DI, V4DI, USI ++ v4i64_r = __lasx_xvsrani_d_q(v4i64_a, v4i64_b, ui7); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvsrani.d.q( ++ ++ // __lasx_xvsrarni_b_h ++ // xd, xj, ui4 ++ // V32QI, V32QI, V32QI, USI ++ v32i8_r = __lasx_xvsrarni_b_h(v32i8_a, v32i8_b, ui4); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvsrarni.b.h( ++ ++ // __lasx_xvsrarni_h_w ++ // xd, xj, ui5 ++ // V16HI, V16HI, V16HI, USI ++ v16i16_r = __lasx_xvsrarni_h_w(v16i16_a, v16i16_b, ui5); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvsrarni.h.w( ++ ++ // __lasx_xvsrarni_w_d ++ // xd, xj, ui6 ++ // V8SI, V8SI, V8SI, USI ++ v8i32_r = __lasx_xvsrarni_w_d(v8i32_a, v8i32_b, ui6); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvsrarni.w.d( ++ ++ // __lasx_xvsrarni_d_q ++ // xd, xj, ui7 ++ // V4DI, V4DI, V4DI, USI ++ v4i64_r = __lasx_xvsrarni_d_q(v4i64_a, v4i64_b, ui7); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvsrarni.d.q( ++ ++ // __lasx_xvssrani_b_h ++ // xd, xj, ui4 ++ // V32QI, V32QI, V32QI, USI ++ v32i8_r = __lasx_xvssrani_b_h(v32i8_a, v32i8_b, ui4); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvssrani.b.h( ++ ++ // __lasx_xvssrani_h_w ++ // xd, xj, ui5 ++ // V16HI, V16HI, V16HI, USI ++ v16i16_r = __lasx_xvssrani_h_w(v16i16_a, v16i16_b, ui5); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvssrani.h.w( ++ ++ // __lasx_xvssrani_w_d ++ // xd, xj, ui6 ++ // V8SI, V8SI, V8SI, USI ++ v8i32_r = __lasx_xvssrani_w_d(v8i32_a, v8i32_b, ui6); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvssrani.w.d( ++ ++ // __lasx_xvssrani_d_q ++ // xd, xj, ui7 ++ // V4DI, V4DI, V4DI, USI ++ v4i64_r = __lasx_xvssrani_d_q(v4i64_a, v4i64_b, ui7); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvssrani.d.q( ++ ++ // __lasx_xvssrani_bu_h ++ // xd, xj, ui4 ++ // UV32QI, UV32QI, V32QI, USI ++ v32u8_r = __lasx_xvssrani_bu_h(v32u8_a, v32i8_b, ui4); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvssrani.bu.h( ++ ++ // __lasx_xvssrani_hu_w ++ // xd, xj, ui5 ++ // UV16HI, UV16HI, V16HI, USI ++ v16u16_r = __lasx_xvssrani_hu_w(v16u16_a, v16i16_b, ui5); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvssrani.hu.w( ++ ++ // __lasx_xvssrani_wu_d ++ // xd, xj, ui6 ++ // UV8SI, UV8SI, V8SI, USI ++ v8u32_r = __lasx_xvssrani_wu_d(v8u32_a, v8i32_b, ui6); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvssrani.wu.d( ++ ++ // __lasx_xvssrani_du_q ++ // xd, xj, ui7 ++ // UV4DI, UV4DI, V4DI, USI ++ v4u64_r = __lasx_xvssrani_du_q(v4u64_a, v4i64_b, ui7); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvssrani.du.q( ++ ++ // __lasx_xvssrarni_b_h ++ // xd, xj, ui4 ++ // V32QI, V32QI, V32QI, USI ++ v32i8_r = __lasx_xvssrarni_b_h(v32i8_a, v32i8_b, ui4); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvssrarni.b.h( ++ ++ // __lasx_xvssrarni_h_w ++ // xd, xj, ui5 ++ // V16HI, V16HI, V16HI, USI ++ v16i16_r = __lasx_xvssrarni_h_w(v16i16_a, v16i16_b, ui5); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvssrarni.h.w( ++ ++ // __lasx_xvssrarni_w_d ++ // xd, xj, ui6 ++ // V8SI, V8SI, V8SI, USI ++ v8i32_r = __lasx_xvssrarni_w_d(v8i32_a, v8i32_b, ui6); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvssrarni.w.d( ++ ++ // __lasx_xvssrarni_d_q ++ // xd, xj, ui7 ++ // V4DI, V4DI, V4DI, USI ++ v4i64_r = __lasx_xvssrarni_d_q(v4i64_a, v4i64_b, ui7); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvssrarni.d.q( ++ ++ // __lasx_xvssrarni_bu_h ++ // xd, xj, ui4 ++ // UV32QI, UV32QI, V32QI, USI ++ v32u8_r = __lasx_xvssrarni_bu_h(v32u8_a, v32i8_b, ui4); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvssrarni.bu.h( ++ ++ // __lasx_xvssrarni_hu_w ++ // xd, xj, ui5 ++ // UV16HI, UV16HI, V16HI, USI ++ v16u16_r = __lasx_xvssrarni_hu_w(v16u16_a, v16i16_b, ui5); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvssrarni.hu.w( ++ ++ // __lasx_xvssrarni_wu_d ++ // xd, xj, ui6 ++ // UV8SI, UV8SI, V8SI, USI ++ v8u32_r = __lasx_xvssrarni_wu_d(v8u32_a, v8i32_b, ui6); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvssrarni.wu.d( ++ ++ // __lasx_xvssrarni_du_q ++ // xd, xj, ui7 ++ // UV4DI, UV4DI, V4DI, USI ++ v4u64_r = __lasx_xvssrarni_du_q(v4u64_a, v4i64_b, ui7); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvssrarni.du.q( ++ ++ // __lasx_xbnz_v ++ // rd, xj ++ // SI, UV32QI ++ i32_r = __lasx_xbnz_v(v32u8_a); // CHECK: call i32 @llvm.loongarch.lasx.xbnz.v( ++ ++ // __lasx_xbz_v ++ // rd, xj ++ // SI, UV32QI ++ i32_r = __lasx_xbz_v(v32u8_a); // CHECK: call i32 @llvm.loongarch.lasx.xbz.v( ++ ++ // __lasx_xbnz_b ++ // rd, xj ++ // SI, UV32QI ++ i32_r = __lasx_xbnz_b(v32u8_a); // CHECK: call i32 @llvm.loongarch.lasx.xbnz.b( ++ ++ // __lasx_xbnz_h ++ // rd, xj ++ // SI, UV16HI ++ i32_r = __lasx_xbnz_h(v16u16_a); // CHECK: call i32 @llvm.loongarch.lasx.xbnz.h( ++ ++ // __lasx_xbnz_w ++ // rd, xj ++ // SI, UV8SI ++ i32_r = __lasx_xbnz_w(v8u32_a); // CHECK: call i32 @llvm.loongarch.lasx.xbnz.w( ++ ++ // __lasx_xbnz_d ++ // rd, xj ++ // SI, UV4DI ++ i32_r = __lasx_xbnz_d(v4u64_a); // CHECK: call i32 @llvm.loongarch.lasx.xbnz.d( ++ ++ // __lasx_xbz_b ++ // rd, xj ++ // SI, UV32QI ++ i32_r = __lasx_xbz_b(v32u8_a); // CHECK: call i32 @llvm.loongarch.lasx.xbz.b( ++ ++ // __lasx_xbz_h ++ // rd, xj ++ // SI, UV16HI ++ i32_r = __lasx_xbz_h(v16u16_a); // CHECK: call i32 @llvm.loongarch.lasx.xbz.h( ++ ++ // __lasx_xbz_w ++ // rd, xj ++ // SI, UV8SI ++ i32_r = __lasx_xbz_w(v8u32_a); // CHECK: call i32 @llvm.loongarch.lasx.xbz.w( ++ ++ // __lasx_xbz_d ++ // rd, xj ++ // SI, UV4DI ++ i32_r = __lasx_xbz_d(v4u64_a); // CHECK: call i32 @llvm.loongarch.lasx.xbz.d( ++ ++ v32i8_r = __lasx_xvrepli_b(2); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvrepli.b( ++ ++ v16i16_r = __lasx_xvrepli_h(2); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvrepli.h( ++ ++ v8i32_r = __lasx_xvrepli_w(2); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvrepli.w( ++ ++ v4i64_r = __lasx_xvrepli_d(2); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvrepli.d( ++ ++ v4f64_r = __lasx_xvpickve_d_f(v4f64_a, 2); // CHECK: call <4 x double> @llvm.loongarch.lasx.xvpickve.d.f( ++ ++ v8f32_r = __lasx_xvpickve_w_f(v8f32_a, 2); // CHECK: call <8 x float> @llvm.loongarch.lasx.xvpickve.w.f( ++} +diff --git a/clang/test/CodeGen/builtins-loongarch-lsx-error.c b/clang/test/CodeGen/builtins-loongarch-lsx-error.c +new file mode 100644 +index 000000000..f566a7362 +--- /dev/null ++++ b/clang/test/CodeGen/builtins-loongarch-lsx-error.c +@@ -0,0 +1,250 @@ ++// REQUIRES: loongarch-registered-target ++// RUN: %clang_cc1 -triple loongarch64-unknown-linux-gnu -fsyntax-only %s \ ++// RUN: -target-feature +lsx \ ++// RUN: -verify -o - 2>&1 ++ ++#include ++ ++void test() { ++ v16i8 v16i8_a = (v16i8){0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}; ++ v16i8 v16i8_b = (v16i8){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}; ++ v16i8 v16i8_c = (v16i8){2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17}; ++ v16i8 v16i8_r; ++ v8i16 v8i16_a = (v8i16){0, 1, 2, 3, 4, 5, 6, 7}; ++ v8i16 v8i16_b = (v8i16){1, 2, 3, 4, 5, 6, 7, 8}; ++ v8i16 v8i16_c = (v8i16){2, 3, 4, 5, 6, 7, 8, 9}; ++ v8i16 v8i16_r; ++ v4i32 v4i32_a = (v4i32){0, 1, 2, 3}; ++ v4i32 v4i32_b = (v4i32){1, 2, 3, 4}; ++ v4i32 v4i32_c = (v4i32){2, 3, 4, 5}; ++ v4i32 v4i32_r; ++ v2i64 v2i64_a = (v2i64){0, 1}; ++ v2i64 v2i64_b = (v2i64){1, 2}; ++ v2i64 v2i64_c = (v2i64){2, 3}; ++ v2i64 v2i64_r; ++ ++ v16u8 v16u8_a = (v16u8){0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}; ++ v16u8 v16u8_b = (v16u8){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}; ++ v16u8 v16u8_c = (v16u8){2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17}; ++ v16u8 v16u8_r; ++ v8u16 v8u16_a = (v8u16){0, 1, 2, 3, 4, 5, 6, 7}; ++ v8u16 v8u16_b = (v8u16){1, 2, 3, 4, 5, 6, 7, 8}; ++ v8u16 v8u16_c = (v8u16){2, 3, 4, 5, 6, 7, 8, 9}; ++ v8u16 v8u16_r; ++ v4u32 v4u32_a = (v4u32){0, 1, 2, 3}; ++ v4u32 v4u32_b = (v4u32){1, 2, 3, 4}; ++ v4u32 v4u32_c = (v4u32){2, 3, 4, 5}; ++ v4u32 v4u32_r; ++ v2u64 v2u64_a = (v2u64){0, 1}; ++ v2u64 v2u64_b = (v2u64){1, 2}; ++ v2u64 v2u64_c = (v2u64){2, 3}; ++ v2u64 v2u64_r; ++ ++ v4f32 v4f32_a = (v4f32){0.5, 1, 2, 3}; ++ v4f32 v4f32_b = (v4f32){1.5, 2, 3, 4}; ++ v4f32 v4f32_c = (v4f32){2.5, 3, 4, 5}; ++ v4f32 v4f32_r; ++ v2f64 v2f64_a = (v2f64){0.5, 1}; ++ v2f64 v2f64_b = (v2f64){1.5, 2}; ++ v2f64 v2f64_c = (v2f64){2.5, 3}; ++ v2f64 v2f64_r; ++ ++ int i32_r; ++ int i32_a = 1; ++ int i32_b = 2; ++ unsigned int u32_r; ++ unsigned int u32_a = 1; ++ unsigned int u32_b = 2; ++ long long i64_r; ++ long long i64_a = 1; ++ long long i64_b = 2; ++ long long i64_c = 3; ++ unsigned long long u64_r; ++ unsigned long long u64_a = 1; ++ unsigned long long u64_b = 2; ++ unsigned long long u64_c = 3; ++ ++ v16i8_r = __lsx_vslli_b(v16i8_a, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} ++ v8i16_r = __lsx_vslli_h(v8i16_a, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ v4i32_r = __lsx_vslli_w(v4i32_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v2i64_r = __lsx_vslli_d(v2i64_a, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ v16i8_r = __lsx_vsrai_b(v16i8_a, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} ++ v8i16_r = __lsx_vsrai_h(v8i16_a, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ v4i32_r = __lsx_vsrai_w(v4i32_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v2i64_r = __lsx_vsrai_d(v2i64_a, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ v16i8_r = __lsx_vsrari_b(v16i8_a, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} ++ v8i16_r = __lsx_vsrari_h(v8i16_a, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ v4i32_r = __lsx_vsrari_w(v4i32_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v2i64_r = __lsx_vsrari_d(v2i64_a, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ v16i8_r = __lsx_vsrli_b(v16i8_a, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} ++ v8i16_r = __lsx_vsrli_h(v8i16_a, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ v4i32_r = __lsx_vsrli_w(v4i32_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v2i64_r = __lsx_vsrli_d(v2i64_a, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ v16i8_r = __lsx_vsrlri_b(v16i8_a, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} ++ v8i16_r = __lsx_vsrlri_h(v8i16_a, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ v4i32_r = __lsx_vsrlri_w(v4i32_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v2i64_r = __lsx_vsrlri_d(v2i64_a, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ v16u8_r = __lsx_vbitclri_b(v16u8_a, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} ++ v8u16_r = __lsx_vbitclri_h(v8u16_a, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ v4u32_r = __lsx_vbitclri_w(v4u32_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v2u64_r = __lsx_vbitclri_d(v2u64_a, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ v16u8_r = __lsx_vbitseti_b(v16u8_a, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} ++ v8u16_r = __lsx_vbitseti_h(v8u16_a, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ v4u32_r = __lsx_vbitseti_w(v4u32_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v2u64_r = __lsx_vbitseti_d(v2u64_a, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ v16u8_r = __lsx_vbitrevi_b(v16u8_a, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} ++ v8u16_r = __lsx_vbitrevi_h(v8u16_a, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ v4u32_r = __lsx_vbitrevi_w(v4u32_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v2u64_r = __lsx_vbitrevi_d(v2u64_a, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ v16i8_r = __lsx_vaddi_bu(v16i8_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v8i16_r = __lsx_vaddi_hu(v8i16_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v4i32_r = __lsx_vaddi_wu(v4i32_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v2i64_r = __lsx_vaddi_du(v2i64_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v16i8_r = __lsx_vsubi_bu(v16i8_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v8i16_r = __lsx_vsubi_hu(v8i16_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v4i32_r = __lsx_vsubi_wu(v4i32_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v2i64_r = __lsx_vsubi_du(v2i64_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v16i8_r = __lsx_vmaxi_b(v16i8_a, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} ++ v8i16_r = __lsx_vmaxi_h(v8i16_a, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} ++ v4i32_r = __lsx_vmaxi_w(v4i32_a, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} ++ v2i64_r = __lsx_vmaxi_d(v2i64_a, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} ++ v16u8_r = __lsx_vmaxi_bu(v16u8_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v8u16_r = __lsx_vmaxi_hu(v8u16_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v4u32_r = __lsx_vmaxi_wu(v4u32_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v2u64_r = __lsx_vmaxi_du(v2u64_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v16i8_r = __lsx_vmini_b(v16i8_a, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} ++ v8i16_r = __lsx_vmini_h(v8i16_a, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} ++ v4i32_r = __lsx_vmini_w(v4i32_a, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} ++ v2i64_r = __lsx_vmini_d(v2i64_a, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} ++ v16u8_r = __lsx_vmini_bu(v16u8_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v8u16_r = __lsx_vmini_hu(v8u16_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v4u32_r = __lsx_vmini_wu(v4u32_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v2u64_r = __lsx_vmini_du(v2u64_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v16i8_r = __lsx_vseqi_b(v16i8_a, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} ++ v8i16_r = __lsx_vseqi_h(v8i16_a, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} ++ v4i32_r = __lsx_vseqi_w(v4i32_a, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} ++ v2i64_r = __lsx_vseqi_d(v2i64_a, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} ++ v16i8_r = __lsx_vslti_b(v16i8_a, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} ++ v8i16_r = __lsx_vslti_h(v8i16_a, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} ++ v4i32_r = __lsx_vslti_w(v4i32_a, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} ++ v2i64_r = __lsx_vslti_d(v2i64_a, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} ++ v16i8_r = __lsx_vslti_bu(v16u8_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v8i16_r = __lsx_vslti_hu(v8u16_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v4i32_r = __lsx_vslti_wu(v4u32_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v2i64_r = __lsx_vslti_du(v2u64_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v16i8_r = __lsx_vslei_b(v16i8_a, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} ++ v8i16_r = __lsx_vslei_h(v8i16_a, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} ++ v4i32_r = __lsx_vslei_w(v4i32_a, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} ++ v2i64_r = __lsx_vslei_d(v2i64_a, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} ++ v16i8_r = __lsx_vslei_bu(v16u8_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v8i16_r = __lsx_vslei_hu(v8u16_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v4i32_r = __lsx_vslei_wu(v4u32_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v2i64_r = __lsx_vslei_du(v2u64_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v16i8_r = __lsx_vsat_b(v16i8_a, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} ++ v8i16_r = __lsx_vsat_h(v8i16_a, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ v4i32_r = __lsx_vsat_w(v4i32_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v2i64_r = __lsx_vsat_d(v2i64_a, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ v16u8_r = __lsx_vsat_bu(v16u8_a, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} ++ v8u16_r = __lsx_vsat_hu(v8u16_a, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ v4u32_r = __lsx_vsat_wu(v4u32_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v2u64_r = __lsx_vsat_du(v2u64_a, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ v16i8_r = __lsx_vreplvei_b(v16i8_a, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ v8i16_r = __lsx_vreplvei_h(v8i16_a, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} ++ v4i32_r = __lsx_vreplvei_w(v4i32_a, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}} ++ v2i64_r = __lsx_vreplvei_d(v2i64_a, 2); // expected-error {{argument value 2 is outside the valid range [0, 1]}} ++ v16u8_r = __lsx_vandi_b(v16u8_a, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} ++ v16u8_r = __lsx_vori_b(v16u8_a, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} ++ v16u8_r = __lsx_vnori_b(v16u8_a, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} ++ v16u8_r = __lsx_vxori_b(v16u8_a, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} ++ v16u8_r = __lsx_vbitseli_b(v16u8_a, v16u8_b, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} ++ v16i8_r = __lsx_vshuf4i_b(v16i8_a, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} ++ v8i16_r = __lsx_vshuf4i_h(v8i16_a, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} ++ v4i32_r = __lsx_vshuf4i_w(v4i32_a, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} ++ i32_r = __lsx_vpickve2gr_b(v16i8_a, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ i32_r = __lsx_vpickve2gr_h(v8i16_a, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} ++ i32_r = __lsx_vpickve2gr_w(v4i32_a, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}} ++ i64_r = __lsx_vpickve2gr_d(v2i64_a, 2); // expected-error {{argument value 2 is outside the valid range [0, 1]}} ++ u32_r = __lsx_vpickve2gr_bu(v16i8_a, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ u32_r = __lsx_vpickve2gr_hu(v8i16_a, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} ++ u32_r = __lsx_vpickve2gr_wu(v4i32_a, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}} ++ u64_r = __lsx_vpickve2gr_du(v2i64_a, 2); // expected-error {{argument value 2 is outside the valid range [0, 1]}} ++ v16i8_r = __lsx_vinsgr2vr_b(v16i8_a, i32_b, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ v8i16_r = __lsx_vinsgr2vr_h(v8i16_a, i32_b, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} ++ v4i32_r = __lsx_vinsgr2vr_w(v4i32_a, i32_b, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}} ++ v2i64_r = __lsx_vinsgr2vr_d(v2i64_a, i32_b, 2); // expected-error {{argument value 2 is outside the valid range [0, 1]}} ++ v8i16_r = __lsx_vsllwil_h_b(v16i8_a, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} ++ v4i32_r = __lsx_vsllwil_w_h(v8i16_a, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ v2i64_r = __lsx_vsllwil_d_w(v4i32_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v8u16_r = __lsx_vsllwil_hu_bu(v16u8_a, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} ++ v4u32_r = __lsx_vsllwil_wu_hu(v8u16_a, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ v2u64_r = __lsx_vsllwil_du_wu(v4u32_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v16i8_r = __lsx_vfrstpi_b(v16i8_a, v16i8_b, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v8i16_r = __lsx_vfrstpi_h(v8i16_a, v8i16_b, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v2i64_r = __lsx_vshuf4i_d(v2i64_a, v2i64_b, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} ++ v16i8_r = __lsx_vbsrl_v(v16i8_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v16i8_r = __lsx_vbsll_v(v16i8_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v16i8_r = __lsx_vextrins_b(v16i8_a, v16i8_b, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} ++ v8i16_r = __lsx_vextrins_h(v8i16_a, v8i16_b, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} ++ v4i32_r = __lsx_vextrins_w(v4i32_a, v4i32_b, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} ++ v2i64_r = __lsx_vextrins_d(v2i64_a, v2i64_b, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} ++ __lsx_vstelm_b(v16i8_a, &v16i8_b, 0, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ __lsx_vstelm_h(v8i16_a, &v8i16_b, 0, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} ++ __lsx_vstelm_w(v4i32_a, &v4i32_b, 0, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}} ++ __lsx_vstelm_d(v2i64_a, &v2i64_b, 0, 2); // expected-error {{argument value 2 is outside the valid range [0, 1]}} ++ v16i8_r = __lsx_vldrepl_b(&v16i8_a, -2049); // expected-error {{argument value -2049 is outside the valid range [-2048, 2047]}} ++ v8i16_r = __lsx_vldrepl_h(&v8i16_a, -1025); // expected-error {{argument value -1025 is outside the valid range [-1024, 1023]}} ++ v4i32_r = __lsx_vldrepl_w(&v4i32_a, -513); // expected-error {{argument value -513 is outside the valid range [-512, 511]}} ++ v2i64_r = __lsx_vldrepl_d(&v2i64_a, -257); // expected-error {{argument value -257 is outside the valid range [-256, 255]}} ++ v16i8_r = __lsx_vrotri_b(v16i8_a, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} ++ v8i16_r = __lsx_vrotri_h(v8i16_a, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ v4i32_r = __lsx_vrotri_w(v4i32_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v2i64_r = __lsx_vrotri_d(v2i64_a, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ v16i8_r = __lsx_vsrlni_b_h(v16i8_a, v16i8_b, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ v8i16_r = __lsx_vsrlni_h_w(v8i16_a, v8i16_b, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v4i32_r = __lsx_vsrlni_w_d(v4i32_a, v4i32_b, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ v2i64_r = __lsx_vsrlni_d_q(v2i64_a, v2i64_b, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} ++ v16i8_r = __lsx_vssrlni_b_h(v16i8_a, v16i8_b, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ v8i16_r = __lsx_vssrlni_h_w(v8i16_a, v8i16_b, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v4i32_r = __lsx_vssrlni_w_d(v4i32_a, v4i32_b, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ v2i64_r = __lsx_vssrlni_d_q(v2i64_a, v2i64_b, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} ++ v16u8_r = __lsx_vssrlni_bu_h(v16u8_a, v16i8_b, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ v8u16_r = __lsx_vssrlni_hu_w(v8u16_a, v8i16_b, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v4u32_r = __lsx_vssrlni_wu_d(v4u32_a, v4i32_b, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ v2u64_r = __lsx_vssrlni_du_q(v2u64_a, v2i64_b, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} ++ v16i8_r = __lsx_vssrlrni_b_h(v16i8_a, v16i8_b, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ v8i16_r = __lsx_vssrlrni_h_w(v8i16_a, v8i16_b, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v4i32_r = __lsx_vssrlrni_w_d(v4i32_a, v4i32_b, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ v2i64_r = __lsx_vssrlrni_d_q(v2i64_a, v2i64_b, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} ++ v16u8_r = __lsx_vssrlrni_bu_h(v16u8_a, v16i8_b, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ v8u16_r = __lsx_vssrlrni_hu_w(v8u16_a, v8i16_b, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v4u32_r = __lsx_vssrlrni_wu_d(v4u32_a, v4i32_b, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ v2u64_r = __lsx_vssrlrni_du_q(v2u64_a, v2i64_b, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} ++ v16i8_r = __lsx_vsrani_b_h(v16i8_a, v16i8_b, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ v8i16_r = __lsx_vsrani_h_w(v8i16_a, v8i16_b, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v4i32_r = __lsx_vsrani_w_d(v4i32_a, v4i32_b, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ v2i64_r = __lsx_vsrani_d_q(v2i64_a, v2i64_b, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} ++ v16i8_r = __lsx_vsrarni_b_h(v16i8_a, v16i8_b, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ v8i16_r = __lsx_vsrarni_h_w(v8i16_a, v8i16_b, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v4i32_r = __lsx_vsrarni_w_d(v4i32_a, v4i32_b, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ v2i64_r = __lsx_vsrarni_d_q(v2i64_a, v2i64_b, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} ++ v16i8_r = __lsx_vssrani_b_h(v16i8_a, v16i8_b, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ v8i16_r = __lsx_vssrani_h_w(v8i16_a, v8i16_b, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v4i32_r = __lsx_vssrani_w_d(v4i32_a, v4i32_b, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ v2i64_r = __lsx_vssrani_d_q(v2i64_a, v2i64_b, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} ++ v16u8_r = __lsx_vssrani_bu_h(v16u8_a, v16i8_b, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ v8u16_r = __lsx_vssrani_hu_w(v8u16_a, v8i16_b, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v4u32_r = __lsx_vssrani_wu_d(v4u32_a, v4i32_b, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ v2u64_r = __lsx_vssrani_du_q(v2u64_a, v2i64_b, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} ++ v16i8_r = __lsx_vssrarni_b_h(v16i8_a, v16i8_b, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ v8i16_r = __lsx_vssrarni_h_w(v8i16_a, v8i16_b, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v4i32_r = __lsx_vssrarni_w_d(v4i32_a, v4i32_b, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ v2i64_r = __lsx_vssrarni_d_q(v2i64_a, v2i64_b, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} ++ v16u8_r = __lsx_vssrarni_bu_h(v16u8_a, v16i8_b, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ v8u16_r = __lsx_vssrarni_hu_w(v8u16_a, v8i16_b, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v4u32_r = __lsx_vssrarni_wu_d(v4u32_a, v4i32_b, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ v2u64_r = __lsx_vssrarni_du_q(v2u64_a, v2i64_b, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} ++ v4i32_r = __lsx_vpermi_w(v4i32_a, v4i32_b, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} ++ v16i8_r = __lsx_vld(&v16i8_a, -2049); // expected-error {{argument value -2049 is outside the valid range [-2048, 2047]}} ++ __lsx_vst(v16i8_a, &v16i8_b, -2049); // expected-error {{argument value -2049 is outside the valid range [-2048, 2047]}} ++ v2i64_r = __lsx_vldi(-4097); // expected-error {{argument value -4097 is outside the valid range [-4096, 4095]}} ++} +diff --git a/clang/test/CodeGen/builtins-loongarch-lsx.c b/clang/test/CodeGen/builtins-loongarch-lsx.c +new file mode 100644 +index 000000000..ebc3bb8f2 +--- /dev/null ++++ b/clang/test/CodeGen/builtins-loongarch-lsx.c +@@ -0,0 +1,3665 @@ ++// REQUIRES: loongarch-registered-target ++// RUN: %clang_cc1 -triple loongarch64-unknown-linux-gnu -emit-llvm %s \ ++// RUN: -target-feature +lsx \ ++// RUN: -o - | FileCheck %s ++ ++#include ++ ++#define ui1 0 ++#define ui2 1 ++#define ui3 4 ++#define ui4 7 ++#define ui5 25 ++#define ui6 44 ++#define ui7 100 ++#define ui8 127 //200 ++#define si5 -4 ++#define si8 -100 ++#define si9 0 ++#define si10 0 ++#define si11 0 ++#define si12 0 ++#define i10 500 ++#define i13 4000 ++#define mode 11 ++#define idx1 1 ++#define idx2 2 ++#define idx3 4 ++#define idx4 8 ++ ++void test(void) { ++ v16i8 v16i8_a = (v16i8){0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}; ++ v16i8 v16i8_b = (v16i8){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}; ++ v16i8 v16i8_c = (v16i8){2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17}; ++ v16i8 v16i8_r; ++ v8i16 v8i16_a = (v8i16){0, 1, 2, 3, 4, 5, 6, 7}; ++ v8i16 v8i16_b = (v8i16){1, 2, 3, 4, 5, 6, 7, 8}; ++ v8i16 v8i16_c = (v8i16){2, 3, 4, 5, 6, 7, 8, 9}; ++ v8i16 v8i16_r; ++ v4i32 v4i32_a = (v4i32){0, 1, 2, 3}; ++ v4i32 v4i32_b = (v4i32){1, 2, 3, 4}; ++ v4i32 v4i32_c = (v4i32){2, 3, 4, 5}; ++ v4i32 v4i32_r; ++ v2i64 v2i64_a = (v2i64){0, 1}; ++ v2i64 v2i64_b = (v2i64){1, 2}; ++ v2i64 v2i64_c = (v2i64){2, 3}; ++ v2i64 v2i64_r; ++ ++ v16u8 v16u8_a = (v16u8){0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}; ++ v16u8 v16u8_b = (v16u8){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}; ++ v16u8 v16u8_c = (v16u8){2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17}; ++ v16u8 v16u8_r; ++ v8u16 v8u16_a = (v8u16){0, 1, 2, 3, 4, 5, 6, 7}; ++ v8u16 v8u16_b = (v8u16){1, 2, 3, 4, 5, 6, 7, 8}; ++ v8u16 v8u16_c = (v8u16){2, 3, 4, 5, 6, 7, 8, 9}; ++ v8u16 v8u16_r; ++ v4u32 v4u32_a = (v4u32){0, 1, 2, 3}; ++ v4u32 v4u32_b = (v4u32){1, 2, 3, 4}; ++ v4u32 v4u32_c = (v4u32){2, 3, 4, 5}; ++ v4u32 v4u32_r; ++ v2u64 v2u64_a = (v2u64){0, 1}; ++ v2u64 v2u64_b = (v2u64){1, 2}; ++ v2u64 v2u64_c = (v2u64){2, 3}; ++ v2u64 v2u64_r; ++ ++ v4f32 v4f32_a = (v4f32){0.5, 1, 2, 3}; ++ v4f32 v4f32_b = (v4f32){1.5, 2, 3, 4}; ++ v4f32 v4f32_c = (v4f32){2.5, 3, 4, 5}; ++ v4f32 v4f32_r; ++ v2f64 v2f64_a = (v2f64){0.5, 1}; ++ v2f64 v2f64_b = (v2f64){1.5, 2}; ++ v2f64 v2f64_c = (v2f64){2.5, 3}; ++ v2f64 v2f64_r; ++ ++ int i32_r; ++ int i32_a = 1; ++ int i32_b = 2; ++ unsigned int u32_r; ++ unsigned int u32_a = 1; ++ unsigned int u32_b = 2; ++ long long i64_r; ++ long long i64_a = 1; ++ long long i64_b = 2; ++ long long i64_c = 3; ++ long int i64_d = 0; ++ unsigned long long u64_r; ++ unsigned long long u64_a = 1; ++ unsigned long long u64_b = 2; ++ unsigned long long u64_c = 3; ++ ++ // __lsx_vsll_b ++ // vd, vj, vk ++ // V16QI, V16QI, V16QI ++ v16i8_r = __lsx_vsll_b(v16i8_a, v16i8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vsll.b( ++ ++ // __lsx_vsll_h ++ // vd, vj, vk ++ // V8HI, V8HI, V8HI ++ v8i16_r = __lsx_vsll_h(v8i16_a, v8i16_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vsll.h( ++ ++ // __lsx_vsll_w ++ // vd, vj, vk ++ // V4SI, V4SI, V4SI ++ v4i32_r = __lsx_vsll_w(v4i32_a, v4i32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vsll.w( ++ ++ // __lsx_vsll_d ++ // vd, vj, vk ++ // V2DI, V2DI, V2DI ++ v2i64_r = __lsx_vsll_d(v2i64_a, v2i64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vsll.d( ++ ++ // __lsx_vslli_b ++ // vd, vj, ui3 ++ // V16QI, V16QI, UQI ++ v16i8_r = __lsx_vslli_b(v16i8_a, ui3); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vslli.b( ++ ++ // __lsx_vslli_h ++ // vd, vj, ui4 ++ // V8HI, V8HI, UQI ++ v8i16_r = __lsx_vslli_h(v8i16_a, ui4); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vslli.h( ++ ++ // __lsx_vslli_w ++ // vd, vj, ui5 ++ // V4SI, V4SI, UQI ++ v4i32_r = __lsx_vslli_w(v4i32_a, ui5); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vslli.w( ++ ++ // __lsx_vslli_d ++ // vd, vj, ui6 ++ // V2DI, V2DI, UQI ++ v2i64_r = __lsx_vslli_d(v2i64_a, ui6); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vslli.d( ++ ++ // __lsx_vsra_b ++ // vd, vj, vk ++ // V16QI, V16QI, V16QI ++ v16i8_r = __lsx_vsra_b(v16i8_a, v16i8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vsra.b( ++ ++ // __lsx_vsra_h ++ // vd, vj, vk ++ // V8HI, V8HI, V8HI ++ v8i16_r = __lsx_vsra_h(v8i16_a, v8i16_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vsra.h( ++ ++ // __lsx_vsra_w ++ // vd, vj, vk ++ // V4SI, V4SI, V4SI ++ v4i32_r = __lsx_vsra_w(v4i32_a, v4i32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vsra.w( ++ ++ // __lsx_vsra_d ++ // vd, vj, vk ++ // V2DI, V2DI, V2DI ++ v2i64_r = __lsx_vsra_d(v2i64_a, v2i64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vsra.d( ++ ++ // __lsx_vsrai_b ++ // vd, vj, ui3 ++ // V16QI, V16QI, UQI ++ v16i8_r = __lsx_vsrai_b(v16i8_a, ui3); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vsrai.b( ++ ++ // __lsx_vsrai_h ++ // vd, vj, ui4 ++ // V8HI, V8HI, UQI ++ v8i16_r = __lsx_vsrai_h(v8i16_a, ui4); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vsrai.h( ++ ++ // __lsx_vsrai_w ++ // vd, vj, ui5 ++ // V4SI, V4SI, UQI ++ v4i32_r = __lsx_vsrai_w(v4i32_a, ui5); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vsrai.w( ++ ++ // __lsx_vsrai_d ++ // vd, vj, ui6 ++ // V2DI, V2DI, UQI ++ v2i64_r = __lsx_vsrai_d(v2i64_a, ui6); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vsrai.d( ++ ++ // __lsx_vsrar_b ++ // vd, vj, vk ++ // V16QI, V16QI, V16QI ++ v16i8_r = __lsx_vsrar_b(v16i8_a, v16i8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vsrar.b( ++ ++ // __lsx_vsrar_h ++ // vd, vj, vk ++ // V8HI, V8HI, V8HI ++ v8i16_r = __lsx_vsrar_h(v8i16_a, v8i16_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vsrar.h( ++ ++ // __lsx_vsrar_w ++ // vd, vj, vk ++ // V4SI, V4SI, V4SI ++ v4i32_r = __lsx_vsrar_w(v4i32_a, v4i32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vsrar.w( ++ ++ // __lsx_vsrar_d ++ // vd, vj, vk ++ // V2DI, V2DI, V2DI ++ v2i64_r = __lsx_vsrar_d(v2i64_a, v2i64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vsrar.d( ++ ++ // __lsx_vsrari_b ++ // vd, vj, ui3 ++ // V16QI, V16QI, UQI ++ v16i8_r = __lsx_vsrari_b(v16i8_a, ui3); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vsrari.b( ++ ++ // __lsx_vsrari_h ++ // vd, vj, ui4 ++ // V8HI, V8HI, UQI ++ v8i16_r = __lsx_vsrari_h(v8i16_a, ui4); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vsrari.h( ++ ++ // __lsx_vsrari_w ++ // vd, vj, ui5 ++ // V4SI, V4SI, UQI ++ v4i32_r = __lsx_vsrari_w(v4i32_a, ui5); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vsrari.w( ++ ++ // __lsx_vsrari_d ++ // vd, vj, ui6 ++ // V2DI, V2DI, UQI ++ v2i64_r = __lsx_vsrari_d(v2i64_a, ui6); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vsrari.d( ++ ++ // __lsx_vsrl_b ++ // vd, vj, vk ++ // V16QI, V16QI, V16QI ++ v16i8_r = __lsx_vsrl_b(v16i8_a, v16i8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vsrl.b( ++ ++ // __lsx_vsrl_h ++ // vd, vj, vk ++ // V8HI, V8HI, V8HI ++ v8i16_r = __lsx_vsrl_h(v8i16_a, v8i16_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vsrl.h( ++ ++ // __lsx_vsrl_w ++ // vd, vj, vk ++ // V4SI, V4SI, V4SI ++ v4i32_r = __lsx_vsrl_w(v4i32_a, v4i32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vsrl.w( ++ ++ // __lsx_vsrl_d ++ // vd, vj, vk ++ // V2DI, V2DI, V2DI ++ v2i64_r = __lsx_vsrl_d(v2i64_a, v2i64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vsrl.d( ++ ++ // __lsx_vsrli_b ++ // vd, vj, ui3 ++ // V16QI, V16QI, UQI ++ v16i8_r = __lsx_vsrli_b(v16i8_a, ui3); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vsrli.b( ++ ++ // __lsx_vsrli_h ++ // vd, vj, ui4 ++ // V8HI, V8HI, UQI ++ v8i16_r = __lsx_vsrli_h(v8i16_a, ui4); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vsrli.h( ++ ++ // __lsx_vsrli_w ++ // vd, vj, ui5 ++ // V4SI, V4SI, UQI ++ v4i32_r = __lsx_vsrli_w(v4i32_a, ui5); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vsrli.w( ++ ++ // __lsx_vsrli_d ++ // vd, vj, ui6 ++ // V2DI, V2DI, UQI ++ v2i64_r = __lsx_vsrli_d(v2i64_a, ui6); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vsrli.d( ++ ++ // __lsx_vsrlr_b ++ // vd, vj, vk ++ // V16QI, V16QI, V16QI ++ v16i8_r = __lsx_vsrlr_b(v16i8_a, v16i8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vsrlr.b( ++ ++ // __lsx_vsrlr_h ++ // vd, vj, vk ++ // V8HI, V8HI, V8HI ++ v8i16_r = __lsx_vsrlr_h(v8i16_a, v8i16_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vsrlr.h( ++ ++ // __lsx_vsrlr_w ++ // vd, vj, vk ++ // V4SI, V4SI, V4SI ++ v4i32_r = __lsx_vsrlr_w(v4i32_a, v4i32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vsrlr.w( ++ ++ // __lsx_vsrlr_d ++ // vd, vj, vk ++ // V2DI, V2DI, V2DI ++ v2i64_r = __lsx_vsrlr_d(v2i64_a, v2i64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vsrlr.d( ++ ++ // __lsx_vsrlri_b ++ // vd, vj, ui3 ++ // V16QI, V16QI, UQI ++ v16i8_r = __lsx_vsrlri_b(v16i8_a, ui3); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vsrlri.b( ++ ++ // __lsx_vsrlri_h ++ // vd, vj, ui4 ++ // V8HI, V8HI, UQI ++ v8i16_r = __lsx_vsrlri_h(v8i16_a, ui4); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vsrlri.h( ++ ++ // __lsx_vsrlri_w ++ // vd, vj, ui5 ++ // V4SI, V4SI, UQI ++ v4i32_r = __lsx_vsrlri_w(v4i32_a, ui5); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vsrlri.w( ++ ++ // __lsx_vsrlri_d ++ // vd, vj, ui6 ++ // V2DI, V2DI, UQI ++ v2i64_r = __lsx_vsrlri_d(v2i64_a, ui6); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vsrlri.d( ++ ++ // __lsx_vbitclr_b ++ // vd, vj, vk ++ // UV16QI, UV16QI, UV16QI ++ v16u8_r = __lsx_vbitclr_b(v16u8_a, v16u8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vbitclr.b( ++ ++ // __lsx_vbitclr_h ++ // vd, vj, vk ++ // UV8HI, UV8HI, UV8HI ++ v8u16_r = __lsx_vbitclr_h(v8u16_a, v8u16_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vbitclr.h( ++ ++ // __lsx_vbitclr_w ++ // vd, vj, vk ++ // UV4SI, UV4SI, UV4SI ++ v4u32_r = __lsx_vbitclr_w(v4u32_a, v4u32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vbitclr.w( ++ ++ // __lsx_vbitclr_d ++ // vd, vj, vk ++ // UV2DI, UV2DI, UV2DI ++ v2u64_r = __lsx_vbitclr_d(v2u64_a, v2u64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vbitclr.d( ++ ++ // __lsx_vbitclri_b ++ // vd, vj, ui3 ++ // UV16QI, UV16QI, UQI ++ v16u8_r = __lsx_vbitclri_b(v16u8_a, ui3); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vbitclri.b( ++ ++ // __lsx_vbitclri_h ++ // vd, vj, ui4 ++ // UV8HI, UV8HI, UQI ++ v8u16_r = __lsx_vbitclri_h(v8u16_a, ui4); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vbitclri.h( ++ ++ // __lsx_vbitclri_w ++ // vd, vj, ui5 ++ // UV4SI, UV4SI, UQI ++ v4u32_r = __lsx_vbitclri_w(v4u32_a, ui5); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vbitclri.w( ++ ++ // __lsx_vbitclri_d ++ // vd, vj, ui6 ++ // UV2DI, UV2DI, UQI ++ v2u64_r = __lsx_vbitclri_d(v2u64_a, ui6); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vbitclri.d( ++ ++ // __lsx_vbitset_b ++ // vd, vj, vk ++ // UV16QI, UV16QI, UV16QI ++ v16u8_r = __lsx_vbitset_b(v16u8_a, v16u8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vbitset.b( ++ ++ // __lsx_vbitset_h ++ // vd, vj, vk ++ // UV8HI, UV8HI, UV8HI ++ v8u16_r = __lsx_vbitset_h(v8u16_a, v8u16_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vbitset.h( ++ ++ // __lsx_vbitset_w ++ // vd, vj, vk ++ // UV4SI, UV4SI, UV4SI ++ v4u32_r = __lsx_vbitset_w(v4u32_a, v4u32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vbitset.w( ++ ++ // __lsx_vbitset_d ++ // vd, vj, vk ++ // UV2DI, UV2DI, UV2DI ++ v2u64_r = __lsx_vbitset_d(v2u64_a, v2u64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vbitset.d( ++ ++ // __lsx_vbitseti_b ++ // vd, vj, ui3 ++ // UV16QI, UV16QI, UQI ++ v16u8_r = __lsx_vbitseti_b(v16u8_a, ui3); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vbitseti.b( ++ ++ // __lsx_vbitseti_h ++ // vd, vj, ui4 ++ // UV8HI, UV8HI, UQI ++ v8u16_r = __lsx_vbitseti_h(v8u16_a, ui4); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vbitseti.h( ++ ++ // __lsx_vbitseti_w ++ // vd, vj, ui5 ++ // UV4SI, UV4SI, UQI ++ v4u32_r = __lsx_vbitseti_w(v4u32_a, ui5); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vbitseti.w( ++ ++ // __lsx_vbitseti_d ++ // vd, vj, ui6 ++ // UV2DI, UV2DI, UQI ++ v2u64_r = __lsx_vbitseti_d(v2u64_a, ui6); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vbitseti.d( ++ ++ // __lsx_vbitrev_b ++ // vd, vj, vk ++ // UV16QI, UV16QI, UV16QI ++ v16u8_r = __lsx_vbitrev_b(v16u8_a, v16u8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vbitrev.b( ++ ++ // __lsx_vbitrev_h ++ // vd, vj, vk ++ // UV8HI, UV8HI, UV8HI ++ v8u16_r = __lsx_vbitrev_h(v8u16_a, v8u16_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vbitrev.h( ++ ++ // __lsx_vbitrev_w ++ // vd, vj, vk ++ // UV4SI, UV4SI, UV4SI ++ v4u32_r = __lsx_vbitrev_w(v4u32_a, v4u32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vbitrev.w( ++ ++ // __lsx_vbitrev_d ++ // vd, vj, vk ++ // UV2DI, UV2DI, UV2DI ++ v2u64_r = __lsx_vbitrev_d(v2u64_a, v2u64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vbitrev.d( ++ ++ // __lsx_vbitrevi_b ++ // vd, vj, ui3 ++ // UV16QI, UV16QI, UQI ++ v16u8_r = __lsx_vbitrevi_b(v16u8_a, ui3); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vbitrevi.b( ++ ++ // __lsx_vbitrevi_h ++ // vd, vj, ui4 ++ // UV8HI, UV8HI, UQI ++ v8u16_r = __lsx_vbitrevi_h(v8u16_a, ui4); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vbitrevi.h( ++ ++ // __lsx_vbitrevi_w ++ // vd, vj, ui5 ++ // UV4SI, UV4SI, UQI ++ v4u32_r = __lsx_vbitrevi_w(v4u32_a, ui5); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vbitrevi.w( ++ ++ // __lsx_vbitrevi_d ++ // vd, vj, ui6 ++ // UV2DI, UV2DI, UQI ++ v2u64_r = __lsx_vbitrevi_d(v2u64_a, ui6); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vbitrevi.d( ++ ++ // __lsx_vadd_b ++ // vd, vj, vk ++ // V16QI, V16QI, V16QI ++ v16i8_r = __lsx_vadd_b(v16i8_a, v16i8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vadd.b( ++ ++ // __lsx_vadd_h ++ // vd, vj, vk ++ // V8HI, V8HI, V8HI ++ v8i16_r = __lsx_vadd_h(v8i16_a, v8i16_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vadd.h( ++ ++ // __lsx_vadd_w ++ // vd, vj, vk ++ // V4SI, V4SI, V4SI ++ v4i32_r = __lsx_vadd_w(v4i32_a, v4i32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vadd.w( ++ ++ // __lsx_vadd_d ++ // vd, vj, vk ++ // V2DI, V2DI, V2DI ++ v2i64_r = __lsx_vadd_d(v2i64_a, v2i64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vadd.d( ++ ++ // __lsx_vaddi_bu ++ // vd, vj, ui5 ++ // V16QI, V16QI, UQI ++ v16i8_r = __lsx_vaddi_bu(v16i8_a, ui5); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vaddi.bu( ++ ++ // __lsx_vaddi_hu ++ // vd, vj, ui5 ++ // V8HI, V8HI, UQI ++ v8i16_r = __lsx_vaddi_hu(v8i16_a, ui5); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vaddi.hu( ++ ++ // __lsx_vaddi_wu ++ // vd, vj, ui5 ++ // V4SI, V4SI, UQI ++ v4i32_r = __lsx_vaddi_wu(v4i32_a, ui5); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vaddi.wu( ++ ++ // __lsx_vaddi_du ++ // vd, vj, ui5 ++ // V2DI, V2DI, UQI ++ v2i64_r = __lsx_vaddi_du(v2i64_a, ui5); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vaddi.du( ++ ++ // __lsx_vsub_b ++ // vd, vj, vk ++ // V16QI, V16QI, V16QI ++ v16i8_r = __lsx_vsub_b(v16i8_a, v16i8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vsub.b( ++ ++ // __lsx_vsub_h ++ // vd, vj, vk ++ // V8HI, V8HI, V8HI ++ v8i16_r = __lsx_vsub_h(v8i16_a, v8i16_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vsub.h( ++ ++ // __lsx_vsub_w ++ // vd, vj, vk ++ // V4SI, V4SI, V4SI ++ v4i32_r = __lsx_vsub_w(v4i32_a, v4i32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vsub.w( ++ ++ // __lsx_vsub_d ++ // vd, vj, vk ++ // V2DI, V2DI, V2DI ++ v2i64_r = __lsx_vsub_d(v2i64_a, v2i64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vsub.d( ++ ++ // __lsx_vsubi_bu ++ // vd, vj, ui5 ++ // V16QI, V16QI, UQI ++ v16i8_r = __lsx_vsubi_bu(v16i8_a, ui5); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vsubi.bu( ++ ++ // __lsx_vsubi_hu ++ // vd, vj, ui5 ++ // V8HI, V8HI, UQI ++ v8i16_r = __lsx_vsubi_hu(v8i16_a, ui5); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vsubi.hu( ++ ++ // __lsx_vsubi_wu ++ // vd, vj, ui5 ++ // V4SI, V4SI, UQI ++ v4i32_r = __lsx_vsubi_wu(v4i32_a, ui5); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vsubi.wu( ++ ++ // __lsx_vsubi_du ++ // vd, vj, ui5 ++ // V2DI, V2DI, UQI ++ v2i64_r = __lsx_vsubi_du(v2i64_a, ui5); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vsubi.du( ++ ++ // __lsx_vmax_b ++ // vd, vj, vk ++ // V16QI, V16QI, V16QI ++ v16i8_r = __lsx_vmax_b(v16i8_a, v16i8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vmax.b( ++ ++ // __lsx_vmax_h ++ // vd, vj, vk ++ // V8HI, V8HI, V8HI ++ v8i16_r = __lsx_vmax_h(v8i16_a, v8i16_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vmax.h( ++ ++ // __lsx_vmax_w ++ // vd, vj, vk ++ // V4SI, V4SI, V4SI ++ v4i32_r = __lsx_vmax_w(v4i32_a, v4i32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vmax.w( ++ ++ // __lsx_vmax_d ++ // vd, vj, vk ++ // V2DI, V2DI, V2DI ++ v2i64_r = __lsx_vmax_d(v2i64_a, v2i64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vmax.d( ++ ++ // __lsx_vmaxi_b ++ // vd, vj, si5 ++ // V16QI, V16QI, QI ++ v16i8_r = __lsx_vmaxi_b(v16i8_a, si5); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vmaxi.b( ++ ++ // __lsx_vmaxi_h ++ // vd, vj, si5 ++ // V8HI, V8HI, QI ++ v8i16_r = __lsx_vmaxi_h(v8i16_a, si5); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vmaxi.h( ++ ++ // __lsx_vmaxi_w ++ // vd, vj, si5 ++ // V4SI, V4SI, QI ++ v4i32_r = __lsx_vmaxi_w(v4i32_a, si5); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vmaxi.w( ++ ++ // __lsx_vmaxi_d ++ // vd, vj, si5 ++ // V2DI, V2DI, QI ++ v2i64_r = __lsx_vmaxi_d(v2i64_a, si5); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vmaxi.d( ++ ++ // __lsx_vmax_bu ++ // vd, vj, vk ++ // UV16QI, UV16QI, UV16QI ++ v16u8_r = __lsx_vmax_bu(v16u8_a, v16u8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vmax.bu( ++ ++ // __lsx_vmax_hu ++ // vd, vj, vk ++ // UV8HI, UV8HI, UV8HI ++ v8u16_r = __lsx_vmax_hu(v8u16_a, v8u16_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vmax.hu( ++ ++ // __lsx_vmax_wu ++ // vd, vj, vk ++ // UV4SI, UV4SI, UV4SI ++ v4u32_r = __lsx_vmax_wu(v4u32_a, v4u32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vmax.wu( ++ ++ // __lsx_vmax_du ++ // vd, vj, vk ++ // UV2DI, UV2DI, UV2DI ++ v2u64_r = __lsx_vmax_du(v2u64_a, v2u64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vmax.du( ++ ++ // __lsx_vmaxi_bu ++ // vd, vj, ui5 ++ // UV16QI, UV16QI, UQI ++ v16u8_r = __lsx_vmaxi_bu(v16u8_a, ui5); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vmaxi.bu( ++ ++ // __lsx_vmaxi_hu ++ // vd, vj, ui5 ++ // UV8HI, UV8HI, UQI ++ v8u16_r = __lsx_vmaxi_hu(v8u16_a, ui5); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vmaxi.hu( ++ ++ // __lsx_vmaxi_wu ++ // vd, vj, ui5 ++ // UV4SI, UV4SI, UQI ++ v4u32_r = __lsx_vmaxi_wu(v4u32_a, ui5); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vmaxi.wu( ++ ++ // __lsx_vmaxi_du ++ // vd, vj, ui5 ++ // UV2DI, UV2DI, UQI ++ v2u64_r = __lsx_vmaxi_du(v2u64_a, ui5); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vmaxi.du( ++ ++ // __lsx_vmin_b ++ // vd, vj, vk ++ // V16QI, V16QI, V16QI ++ v16i8_r = __lsx_vmin_b(v16i8_a, v16i8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vmin.b( ++ ++ // __lsx_vmin_h ++ // vd, vj, vk ++ // V8HI, V8HI, V8HI ++ v8i16_r = __lsx_vmin_h(v8i16_a, v8i16_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vmin.h( ++ ++ // __lsx_vmin_w ++ // vd, vj, vk ++ // V4SI, V4SI, V4SI ++ v4i32_r = __lsx_vmin_w(v4i32_a, v4i32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vmin.w( ++ ++ // __lsx_vmin_d ++ // vd, vj, vk ++ // V2DI, V2DI, V2DI ++ v2i64_r = __lsx_vmin_d(v2i64_a, v2i64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vmin.d( ++ ++ // __lsx_vmini_b ++ // vd, vj, si5 ++ // V16QI, V16QI, QI ++ v16i8_r = __lsx_vmini_b(v16i8_a, si5); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vmini.b( ++ ++ // __lsx_vmini_h ++ // vd, vj, si5 ++ // V8HI, V8HI, QI ++ v8i16_r = __lsx_vmini_h(v8i16_a, si5); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vmini.h( ++ ++ // __lsx_vmini_w ++ // vd, vj, si5 ++ // V4SI, V4SI, QI ++ v4i32_r = __lsx_vmini_w(v4i32_a, si5); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vmini.w( ++ ++ // __lsx_vmini_d ++ // vd, vj, si5 ++ // V2DI, V2DI, QI ++ v2i64_r = __lsx_vmini_d(v2i64_a, si5); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vmini.d( ++ ++ // __lsx_vmin_bu ++ // vd, vj, vk ++ // UV16QI, UV16QI, UV16QI ++ v16u8_r = __lsx_vmin_bu(v16u8_a, v16u8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vmin.bu( ++ ++ // __lsx_vmin_hu ++ // vd, vj, vk ++ // UV8HI, UV8HI, UV8HI ++ v8u16_r = __lsx_vmin_hu(v8u16_a, v8u16_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vmin.hu( ++ ++ // __lsx_vmin_wu ++ // vd, vj, vk ++ // UV4SI, UV4SI, UV4SI ++ v4u32_r = __lsx_vmin_wu(v4u32_a, v4u32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vmin.wu( ++ ++ // __lsx_vmin_du ++ // vd, vj, vk ++ // UV2DI, UV2DI, UV2DI ++ v2u64_r = __lsx_vmin_du(v2u64_a, v2u64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vmin.du( ++ ++ // __lsx_vmini_bu ++ // vd, vj, ui5 ++ // UV16QI, UV16QI, UQI ++ v16u8_r = __lsx_vmini_bu(v16u8_a, ui5); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vmini.bu( ++ ++ // __lsx_vmini_hu ++ // vd, vj, ui5 ++ // UV8HI, UV8HI, UQI ++ v8u16_r = __lsx_vmini_hu(v8u16_a, ui5); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vmini.hu( ++ ++ // __lsx_vmini_wu ++ // vd, vj, ui5 ++ // UV4SI, UV4SI, UQI ++ v4u32_r = __lsx_vmini_wu(v4u32_a, ui5); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vmini.wu( ++ ++ // __lsx_vmini_du ++ // vd, vj, ui5 ++ // UV2DI, UV2DI, UQI ++ v2u64_r = __lsx_vmini_du(v2u64_a, ui5); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vmini.du( ++ ++ // __lsx_vseq_b ++ // vd, vj, vk ++ // V16QI, V16QI, V16QI ++ v16i8_r = __lsx_vseq_b(v16i8_a, v16i8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vseq.b( ++ ++ // __lsx_vseq_h ++ // vd, vj, vk ++ // V8HI, V8HI, V8HI ++ v8i16_r = __lsx_vseq_h(v8i16_a, v8i16_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vseq.h( ++ ++ // __lsx_vseq_w ++ // vd, vj, vk ++ // V4SI, V4SI, V4SI ++ v4i32_r = __lsx_vseq_w(v4i32_a, v4i32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vseq.w( ++ ++ // __lsx_vseq_d ++ // vd, vj, vk ++ // V2DI, V2DI, V2DI ++ v2i64_r = __lsx_vseq_d(v2i64_a, v2i64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vseq.d( ++ ++ // __lsx_vseqi_b ++ // vd, vj, si5 ++ // V16QI, V16QI, QI ++ v16i8_r = __lsx_vseqi_b(v16i8_a, si5); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vseqi.b( ++ ++ // __lsx_vseqi_h ++ // vd, vj, si5 ++ // V8HI, V8HI, QI ++ v8i16_r = __lsx_vseqi_h(v8i16_a, si5); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vseqi.h( ++ ++ // __lsx_vseqi_w ++ // vd, vj, si5 ++ // V4SI, V4SI, QI ++ v4i32_r = __lsx_vseqi_w(v4i32_a, si5); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vseqi.w( ++ ++ // __lsx_vseqi_d ++ // vd, vj, si5 ++ // V2DI, V2DI, QI ++ v2i64_r = __lsx_vseqi_d(v2i64_a, si5); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vseqi.d( ++ ++ // __lsx_vslti_b ++ // vd, vj, si5 ++ // V16QI, V16QI, QI ++ v16i8_r = __lsx_vslti_b(v16i8_a, si5); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vslti.b( ++ ++ // __lsx_vslt_b ++ // vd, vj, vk ++ // V16QI, V16QI, V16QI ++ v16i8_r = __lsx_vslt_b(v16i8_a, v16i8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vslt.b( ++ ++ // __lsx_vslt_h ++ // vd, vj, vk ++ // V8HI, V8HI, V8HI ++ v8i16_r = __lsx_vslt_h(v8i16_a, v8i16_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vslt.h( ++ ++ // __lsx_vslt_w ++ // vd, vj, vk ++ // V4SI, V4SI, V4SI ++ v4i32_r = __lsx_vslt_w(v4i32_a, v4i32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vslt.w( ++ ++ // __lsx_vslt_d ++ // vd, vj, vk ++ // V2DI, V2DI, V2DI ++ v2i64_r = __lsx_vslt_d(v2i64_a, v2i64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vslt.d( ++ ++ // __lsx_vslti_h ++ // vd, vj, si5 ++ // V8HI, V8HI, QI ++ v8i16_r = __lsx_vslti_h(v8i16_a, si5); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vslti.h( ++ ++ // __lsx_vslti_w ++ // vd, vj, si5 ++ // V4SI, V4SI, QI ++ v4i32_r = __lsx_vslti_w(v4i32_a, si5); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vslti.w( ++ ++ // __lsx_vslti_d ++ // vd, vj, si5 ++ // V2DI, V2DI, QI ++ v2i64_r = __lsx_vslti_d(v2i64_a, si5); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vslti.d( ++ ++ // __lsx_vslt_bu ++ // vd, vj, vk ++ // V16QI, UV16QI, UV16QI ++ v16i8_r = __lsx_vslt_bu(v16u8_a, v16u8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vslt.bu( ++ ++ // __lsx_vslt_hu ++ // vd, vj, vk ++ // V8HI, UV8HI, UV8HI ++ v8i16_r = __lsx_vslt_hu(v8u16_a, v8u16_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vslt.hu( ++ ++ // __lsx_vslt_wu ++ // vd, vj, vk ++ // V4SI, UV4SI, UV4SI ++ v4i32_r = __lsx_vslt_wu(v4u32_a, v4u32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vslt.wu( ++ ++ // __lsx_vslt_du ++ // vd, vj, vk ++ // V2DI, UV2DI, UV2DI ++ v2i64_r = __lsx_vslt_du(v2u64_a, v2u64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vslt.du( ++ ++ // __lsx_vslti_bu ++ // vd, vj, ui5 ++ // V16QI, UV16QI, UQI ++ v16i8_r = __lsx_vslti_bu(v16u8_a, ui5); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vslti.bu( ++ ++ // __lsx_vslti_hu ++ // vd, vj, ui5 ++ // V8HI, UV8HI, UQI ++ v8i16_r = __lsx_vslti_hu(v8u16_a, ui5); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vslti.hu( ++ ++ // __lsx_vslti_wu ++ // vd, vj, ui5 ++ // V4SI, UV4SI, UQI ++ v4i32_r = __lsx_vslti_wu(v4u32_a, ui5); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vslti.wu( ++ ++ // __lsx_vslti_du ++ // vd, vj, ui5 ++ // V2DI, UV2DI, UQI ++ v2i64_r = __lsx_vslti_du(v2u64_a, ui5); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vslti.du( ++ ++ // __lsx_vsle_b ++ // vd, vj, vk ++ // V16QI, V16QI, V16QI ++ v16i8_r = __lsx_vsle_b(v16i8_a, v16i8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vsle.b( ++ ++ // __lsx_vsle_h ++ // vd, vj, vk ++ // V8HI, V8HI, V8HI ++ v8i16_r = __lsx_vsle_h(v8i16_a, v8i16_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vsle.h( ++ ++ // __lsx_vsle_w ++ // vd, vj, vk ++ // V4SI, V4SI, V4SI ++ v4i32_r = __lsx_vsle_w(v4i32_a, v4i32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vsle.w( ++ ++ // __lsx_vsle_d ++ // vd, vj, vk ++ // V2DI, V2DI, V2DI ++ v2i64_r = __lsx_vsle_d(v2i64_a, v2i64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vsle.d( ++ ++ // __lsx_vslei_b ++ // vd, vj, si5 ++ // V16QI, V16QI, QI ++ v16i8_r = __lsx_vslei_b(v16i8_a, si5); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vslei.b( ++ ++ // __lsx_vslei_h ++ // vd, vj, si5 ++ // V8HI, V8HI, QI ++ v8i16_r = __lsx_vslei_h(v8i16_a, si5); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vslei.h( ++ ++ // __lsx_vslei_w ++ // vd, vj, si5 ++ // V4SI, V4SI, QI ++ v4i32_r = __lsx_vslei_w(v4i32_a, si5); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vslei.w( ++ ++ // __lsx_vslei_d ++ // vd, vj, si5 ++ // V2DI, V2DI, QI ++ v2i64_r = __lsx_vslei_d(v2i64_a, si5); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vslei.d( ++ ++ // __lsx_vsle_bu ++ // vd, vj, vk ++ // V16QI, UV16QI, UV16QI ++ v16i8_r = __lsx_vsle_bu(v16u8_a, v16u8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vsle.bu( ++ ++ // __lsx_vsle_hu ++ // vd, vj, vk ++ // V8HI, UV8HI, UV8HI ++ v8i16_r = __lsx_vsle_hu(v8u16_a, v8u16_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vsle.hu( ++ ++ // __lsx_vsle_wu ++ // vd, vj, vk ++ // V4SI, UV4SI, UV4SI ++ v4i32_r = __lsx_vsle_wu(v4u32_a, v4u32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vsle.wu( ++ ++ // __lsx_vsle_du ++ // vd, vj, vk ++ // V2DI, UV2DI, UV2DI ++ v2i64_r = __lsx_vsle_du(v2u64_a, v2u64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vsle.du( ++ ++ // __lsx_vslei_bu ++ // vd, vj, ui5 ++ // V16QI, UV16QI, UQI ++ v16i8_r = __lsx_vslei_bu(v16u8_a, ui5); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vslei.bu( ++ ++ // __lsx_vslei_hu ++ // vd, vj, ui5 ++ // V8HI, UV8HI, UQI ++ v8i16_r = __lsx_vslei_hu(v8u16_a, ui5); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vslei.hu( ++ ++ // __lsx_vslei_wu ++ // vd, vj, ui5 ++ // V4SI, UV4SI, UQI ++ v4i32_r = __lsx_vslei_wu(v4u32_a, ui5); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vslei.wu( ++ ++ // __lsx_vslei_du ++ // vd, vj, ui5 ++ // V2DI, UV2DI, UQI ++ v2i64_r = __lsx_vslei_du(v2u64_a, ui5); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vslei.du( ++ ++ // __lsx_vsat_b ++ // vd, vj, ui3 ++ // V16QI, V16QI, UQI ++ v16i8_r = __lsx_vsat_b(v16i8_a, ui3); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vsat.b( ++ ++ // __lsx_vsat_h ++ // vd, vj, ui4 ++ // V8HI, V8HI, UQI ++ v8i16_r = __lsx_vsat_h(v8i16_a, ui4); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vsat.h( ++ ++ // __lsx_vsat_w ++ // vd, vj, ui5 ++ // V4SI, V4SI, UQI ++ v4i32_r = __lsx_vsat_w(v4i32_a, ui5); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vsat.w( ++ ++ // __lsx_vsat_d ++ // vd, vj, ui6 ++ // V2DI, V2DI, UQI ++ v2i64_r = __lsx_vsat_d(v2i64_a, ui6); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vsat.d( ++ ++ // __lsx_vsat_bu ++ // vd, vj, ui3 ++ // UV16QI, UV16QI, UQI ++ v16u8_r = __lsx_vsat_bu(v16u8_a, ui3); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vsat.bu( ++ ++ // __lsx_vsat_hu ++ // vd, vj, ui4 ++ // UV8HI, UV8HI, UQI ++ v8u16_r = __lsx_vsat_hu(v8u16_a, ui4); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vsat.hu( ++ ++ // __lsx_vsat_wu ++ // vd, vj, ui5 ++ // UV4SI, UV4SI, UQI ++ v4u32_r = __lsx_vsat_wu(v4u32_a, ui5); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vsat.wu( ++ ++ // __lsx_vsat_du ++ // vd, vj, ui6 ++ // UV2DI, UV2DI, UQI ++ v2u64_r = __lsx_vsat_du(v2u64_a, ui6); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vsat.du( ++ ++ // __lsx_vadda_b ++ // vd, vj, vk ++ // V16QI, V16QI, V16QI ++ v16i8_r = __lsx_vadda_b(v16i8_a, v16i8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vadda.b( ++ ++ // __lsx_vadda_h ++ // vd, vj, vk ++ // V8HI, V8HI, V8HI ++ v8i16_r = __lsx_vadda_h(v8i16_a, v8i16_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vadda.h( ++ ++ // __lsx_vadda_w ++ // vd, vj, vk ++ // V4SI, V4SI, V4SI ++ v4i32_r = __lsx_vadda_w(v4i32_a, v4i32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vadda.w( ++ ++ // __lsx_vadda_d ++ // vd, vj, vk ++ // V2DI, V2DI, V2DI ++ v2i64_r = __lsx_vadda_d(v2i64_a, v2i64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vadda.d( ++ ++ // __lsx_vsadd_b ++ // vd, vj, vk ++ // V16QI, V16QI, V16QI ++ v16i8_r = __lsx_vsadd_b(v16i8_a, v16i8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vsadd.b( ++ ++ // __lsx_vsadd_h ++ // vd, vj, vk ++ // V8HI, V8HI, V8HI ++ v8i16_r = __lsx_vsadd_h(v8i16_a, v8i16_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vsadd.h( ++ ++ // __lsx_vsadd_w ++ // vd, vj, vk ++ // V4SI, V4SI, V4SI ++ v4i32_r = __lsx_vsadd_w(v4i32_a, v4i32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vsadd.w( ++ ++ // __lsx_vsadd_d ++ // vd, vj, vk ++ // V2DI, V2DI, V2DI ++ v2i64_r = __lsx_vsadd_d(v2i64_a, v2i64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vsadd.d( ++ ++ // __lsx_vsadd_bu ++ // vd, vj, vk ++ // UV16QI, UV16QI, UV16QI ++ v16u8_r = __lsx_vsadd_bu(v16u8_a, v16u8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vsadd.bu( ++ ++ // __lsx_vsadd_hu ++ // vd, vj, vk ++ // UV8HI, UV8HI, UV8HI ++ v8u16_r = __lsx_vsadd_hu(v8u16_a, v8u16_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vsadd.hu( ++ ++ // __lsx_vsadd_wu ++ // vd, vj, vk ++ // UV4SI, UV4SI, UV4SI ++ v4u32_r = __lsx_vsadd_wu(v4u32_a, v4u32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vsadd.wu( ++ ++ // __lsx_vsadd_du ++ // vd, vj, vk ++ // UV2DI, UV2DI, UV2DI ++ v2u64_r = __lsx_vsadd_du(v2u64_a, v2u64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vsadd.du( ++ ++ // __lsx_vavg_b ++ // vd, vj, vk ++ // V16QI, V16QI, V16QI ++ v16i8_r = __lsx_vavg_b(v16i8_a, v16i8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vavg.b( ++ ++ // __lsx_vavg_h ++ // vd, vj, vk ++ // V8HI, V8HI, V8HI ++ v8i16_r = __lsx_vavg_h(v8i16_a, v8i16_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vavg.h( ++ ++ // __lsx_vavg_w ++ // vd, vj, vk ++ // V4SI, V4SI, V4SI ++ v4i32_r = __lsx_vavg_w(v4i32_a, v4i32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vavg.w( ++ ++ // __lsx_vavg_d ++ // vd, vj, vk ++ // V2DI, V2DI, V2DI ++ v2i64_r = __lsx_vavg_d(v2i64_a, v2i64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vavg.d( ++ ++ // __lsx_vavg_bu ++ // vd, vj, vk ++ // UV16QI, UV16QI, UV16QI ++ v16u8_r = __lsx_vavg_bu(v16u8_a, v16u8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vavg.bu( ++ ++ // __lsx_vavg_hu ++ // vd, vj, vk ++ // UV8HI, UV8HI, UV8HI ++ v8u16_r = __lsx_vavg_hu(v8u16_a, v8u16_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vavg.hu( ++ ++ // __lsx_vavg_wu ++ // vd, vj, vk ++ // UV4SI, UV4SI, UV4SI ++ v4u32_r = __lsx_vavg_wu(v4u32_a, v4u32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vavg.wu( ++ ++ // __lsx_vavg_du ++ // vd, vj, vk ++ // UV2DI, UV2DI, UV2DI ++ v2u64_r = __lsx_vavg_du(v2u64_a, v2u64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vavg.du( ++ ++ // __lsx_vavgr_b ++ // vd, vj, vk ++ // V16QI, V16QI, V16QI ++ v16i8_r = __lsx_vavgr_b(v16i8_a, v16i8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vavgr.b( ++ ++ // __lsx_vavgr_h ++ // vd, vj, vk ++ // V8HI, V8HI, V8HI ++ v8i16_r = __lsx_vavgr_h(v8i16_a, v8i16_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vavgr.h( ++ ++ // __lsx_vavgr_w ++ // vd, vj, vk ++ // V4SI, V4SI, V4SI ++ v4i32_r = __lsx_vavgr_w(v4i32_a, v4i32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vavgr.w( ++ ++ // __lsx_vavgr_d ++ // vd, vj, vk ++ // V2DI, V2DI, V2DI ++ v2i64_r = __lsx_vavgr_d(v2i64_a, v2i64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vavgr.d( ++ ++ // __lsx_vavgr_bu ++ // vd, vj, vk ++ // UV16QI, UV16QI, UV16QI ++ v16u8_r = __lsx_vavgr_bu(v16u8_a, v16u8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vavgr.bu( ++ ++ // __lsx_vavgr_hu ++ // vd, vj, vk ++ // UV8HI, UV8HI, UV8HI ++ v8u16_r = __lsx_vavgr_hu(v8u16_a, v8u16_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vavgr.hu( ++ ++ // __lsx_vavgr_wu ++ // vd, vj, vk ++ // UV4SI, UV4SI, UV4SI ++ v4u32_r = __lsx_vavgr_wu(v4u32_a, v4u32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vavgr.wu( ++ ++ // __lsx_vavgr_du ++ // vd, vj, vk ++ // UV2DI, UV2DI, UV2DI ++ v2u64_r = __lsx_vavgr_du(v2u64_a, v2u64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vavgr.du( ++ ++ // __lsx_vssub_b ++ // vd, vj, vk ++ // V16QI, V16QI, V16QI ++ v16i8_r = __lsx_vssub_b(v16i8_a, v16i8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vssub.b( ++ ++ // __lsx_vssub_h ++ // vd, vj, vk ++ // V8HI, V8HI, V8HI ++ v8i16_r = __lsx_vssub_h(v8i16_a, v8i16_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vssub.h( ++ ++ // __lsx_vssub_w ++ // vd, vj, vk ++ // V4SI, V4SI, V4SI ++ v4i32_r = __lsx_vssub_w(v4i32_a, v4i32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vssub.w( ++ ++ // __lsx_vssub_d ++ // vd, vj, vk ++ // V2DI, V2DI, V2DI ++ v2i64_r = __lsx_vssub_d(v2i64_a, v2i64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vssub.d( ++ ++ // __lsx_vssub_bu ++ // vd, vj, vk ++ // UV16QI, UV16QI, UV16QI ++ v16u8_r = __lsx_vssub_bu(v16u8_a, v16u8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vssub.bu( ++ ++ // __lsx_vssub_hu ++ // vd, vj, vk ++ // UV8HI, UV8HI, UV8HI ++ v8u16_r = __lsx_vssub_hu(v8u16_a, v8u16_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vssub.hu( ++ ++ // __lsx_vssub_wu ++ // vd, vj, vk ++ // UV4SI, UV4SI, UV4SI ++ v4u32_r = __lsx_vssub_wu(v4u32_a, v4u32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vssub.wu( ++ ++ // __lsx_vssub_du ++ // vd, vj, vk ++ // UV2DI, UV2DI, UV2DI ++ v2u64_r = __lsx_vssub_du(v2u64_a, v2u64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vssub.du( ++ ++ // __lsx_vabsd_b ++ // vd, vj, vk ++ // V16QI, V16QI, V16QI ++ v16i8_r = __lsx_vabsd_b(v16i8_a, v16i8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vabsd.b( ++ ++ // __lsx_vabsd_h ++ // vd, vj, vk ++ // V8HI, V8HI, V8HI ++ v8i16_r = __lsx_vabsd_h(v8i16_a, v8i16_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vabsd.h( ++ ++ // __lsx_vabsd_w ++ // vd, vj, vk ++ // V4SI, V4SI, V4SI ++ v4i32_r = __lsx_vabsd_w(v4i32_a, v4i32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vabsd.w( ++ ++ // __lsx_vabsd_d ++ // vd, vj, vk ++ // V2DI, V2DI, V2DI ++ v2i64_r = __lsx_vabsd_d(v2i64_a, v2i64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vabsd.d( ++ ++ // __lsx_vabsd_bu ++ // vd, vj, vk ++ // UV16QI, UV16QI, UV16QI ++ v16u8_r = __lsx_vabsd_bu(v16u8_a, v16u8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vabsd.bu( ++ ++ // __lsx_vabsd_hu ++ // vd, vj, vk ++ // UV8HI, UV8HI, UV8HI ++ v8u16_r = __lsx_vabsd_hu(v8u16_a, v8u16_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vabsd.hu( ++ ++ // __lsx_vabsd_wu ++ // vd, vj, vk ++ // UV4SI, UV4SI, UV4SI ++ v4u32_r = __lsx_vabsd_wu(v4u32_a, v4u32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vabsd.wu( ++ ++ // __lsx_vabsd_du ++ // vd, vj, vk ++ // UV2DI, UV2DI, UV2DI ++ v2u64_r = __lsx_vabsd_du(v2u64_a, v2u64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vabsd.du( ++ ++ // __lsx_vmul_b ++ // vd, vj, vk ++ // V16QI, V16QI, V16QI ++ v16i8_r = __lsx_vmul_b(v16i8_a, v16i8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vmul.b( ++ ++ // __lsx_vmul_h ++ // vd, vj, vk ++ // V8HI, V8HI, V8HI ++ v8i16_r = __lsx_vmul_h(v8i16_a, v8i16_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vmul.h( ++ ++ // __lsx_vmul_w ++ // vd, vj, vk ++ // V4SI, V4SI, V4SI ++ v4i32_r = __lsx_vmul_w(v4i32_a, v4i32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vmul.w( ++ ++ // __lsx_vmul_d ++ // vd, vj, vk ++ // V2DI, V2DI, V2DI ++ v2i64_r = __lsx_vmul_d(v2i64_a, v2i64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vmul.d( ++ ++ // __lsx_vmadd_b ++ // vd, vj, vk ++ // V16QI, V16QI, V16QI, V16QI ++ v16i8_r = __lsx_vmadd_b(v16i8_a, v16i8_b, v16i8_c); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vmadd.b( ++ ++ // __lsx_vmadd_h ++ // vd, vj, vk ++ // V8HI, V8HI, V8HI, V8HI ++ v8i16_r = __lsx_vmadd_h(v8i16_a, v8i16_b, v8i16_c); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vmadd.h( ++ ++ // __lsx_vmadd_w ++ // vd, vj, vk ++ // V4SI, V4SI, V4SI, V4SI ++ v4i32_r = __lsx_vmadd_w(v4i32_a, v4i32_b, v4i32_c); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vmadd.w( ++ ++ // __lsx_vmadd_d ++ // vd, vj, vk ++ // V2DI, V2DI, V2DI, V2DI ++ v2i64_r = __lsx_vmadd_d(v2i64_a, v2i64_b, v2i64_c); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vmadd.d( ++ ++ // __lsx_vmsub_b ++ // vd, vj, vk ++ // V16QI, V16QI, V16QI, V16QI ++ v16i8_r = __lsx_vmsub_b(v16i8_a, v16i8_b, v16i8_c); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vmsub.b( ++ ++ // __lsx_vmsub_h ++ // vd, vj, vk ++ // V8HI, V8HI, V8HI, V8HI ++ v8i16_r = __lsx_vmsub_h(v8i16_a, v8i16_b, v8i16_c); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vmsub.h( ++ ++ // __lsx_vmsub_w ++ // vd, vj, vk ++ // V4SI, V4SI, V4SI, V4SI ++ v4i32_r = __lsx_vmsub_w(v4i32_a, v4i32_b, v4i32_c); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vmsub.w( ++ ++ // __lsx_vmsub_d ++ // vd, vj, vk ++ // V2DI, V2DI, V2DI, V2DI ++ v2i64_r = __lsx_vmsub_d(v2i64_a, v2i64_b, v2i64_c); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vmsub.d( ++ ++ // __lsx_vdiv_b ++ // vd, vj, vk ++ // V16QI, V16QI, V16QI ++ v16i8_r = __lsx_vdiv_b(v16i8_a, v16i8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vdiv.b( ++ ++ // __lsx_vdiv_h ++ // vd, vj, vk ++ // V8HI, V8HI, V8HI ++ v8i16_r = __lsx_vdiv_h(v8i16_a, v8i16_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vdiv.h( ++ ++ // __lsx_vdiv_w ++ // vd, vj, vk ++ // V4SI, V4SI, V4SI ++ v4i32_r = __lsx_vdiv_w(v4i32_a, v4i32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vdiv.w( ++ ++ // __lsx_vdiv_d ++ // vd, vj, vk ++ // V2DI, V2DI, V2DI ++ v2i64_r = __lsx_vdiv_d(v2i64_a, v2i64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vdiv.d( ++ ++ // __lsx_vdiv_bu ++ // vd, vj, vk ++ // UV16QI, UV16QI, UV16QI ++ v16u8_r = __lsx_vdiv_bu(v16u8_a, v16u8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vdiv.bu( ++ ++ // __lsx_vdiv_hu ++ // vd, vj, vk ++ // UV8HI, UV8HI, UV8HI ++ v8u16_r = __lsx_vdiv_hu(v8u16_a, v8u16_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vdiv.hu( ++ ++ // __lsx_vdiv_wu ++ // vd, vj, vk ++ // UV4SI, UV4SI, UV4SI ++ v4u32_r = __lsx_vdiv_wu(v4u32_a, v4u32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vdiv.wu( ++ ++ // __lsx_vdiv_du ++ // vd, vj, vk ++ // UV2DI, UV2DI, UV2DI ++ v2u64_r = __lsx_vdiv_du(v2u64_a, v2u64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vdiv.du( ++ ++ // __lsx_vhaddw_h_b ++ // vd, vj, vk ++ // V8HI, V16QI, V16QI ++ v8i16_r = __lsx_vhaddw_h_b(v16i8_a, v16i8_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vhaddw.h.b( ++ ++ // __lsx_vhaddw_w_h ++ // vd, vj, vk ++ // V4SI, V8HI, V8HI ++ v4i32_r = __lsx_vhaddw_w_h(v8i16_a, v8i16_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vhaddw.w.h( ++ ++ // __lsx_vhaddw_d_w ++ // vd, vj, vk ++ // V2DI, V4SI, V4SI ++ v2i64_r = __lsx_vhaddw_d_w(v4i32_a, v4i32_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vhaddw.d.w( ++ ++ // __lsx_vhaddw_hu_bu ++ // vd, vj, vk ++ // UV8HI, UV16QI, UV16QI ++ v8u16_r = __lsx_vhaddw_hu_bu(v16u8_a, v16u8_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vhaddw.hu.bu( ++ ++ // __lsx_vhaddw_wu_hu ++ // vd, vj, vk ++ // UV4SI, UV8HI, UV8HI ++ v4u32_r = __lsx_vhaddw_wu_hu(v8u16_a, v8u16_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vhaddw.wu.hu( ++ ++ // __lsx_vhaddw_du_wu ++ // vd, vj, vk ++ // UV2DI, UV4SI, UV4SI ++ v2u64_r = __lsx_vhaddw_du_wu(v4u32_a, v4u32_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vhaddw.du.wu( ++ ++ // __lsx_vhsubw_h_b ++ // vd, vj, vk ++ // V8HI, V16QI, V16QI ++ v8i16_r = __lsx_vhsubw_h_b(v16i8_a, v16i8_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vhsubw.h.b( ++ ++ // __lsx_vhsubw_w_h ++ // vd, vj, vk ++ // V4SI, V8HI, V8HI ++ v4i32_r = __lsx_vhsubw_w_h(v8i16_a, v8i16_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vhsubw.w.h( ++ ++ // __lsx_vhsubw_d_w ++ // vd, vj, vk ++ // V2DI, V4SI, V4SI ++ v2i64_r = __lsx_vhsubw_d_w(v4i32_a, v4i32_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vhsubw.d.w( ++ ++ // __lsx_vhsubw_hu_bu ++ // vd, vj, vk ++ // V8HI, UV16QI, UV16QI ++ v8i16_r = __lsx_vhsubw_hu_bu(v16u8_a, v16u8_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vhsubw.hu.bu( ++ ++ // __lsx_vhsubw_wu_hu ++ // vd, vj, vk ++ // V4SI, UV8HI, UV8HI ++ v4i32_r = __lsx_vhsubw_wu_hu(v8u16_a, v8u16_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vhsubw.wu.hu( ++ ++ // __lsx_vhsubw_du_wu ++ // vd, vj, vk ++ // V2DI, UV4SI, UV4SI ++ v2i64_r = __lsx_vhsubw_du_wu(v4u32_a, v4u32_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vhsubw.du.wu( ++ ++ // __lsx_vmod_b ++ // vd, vj, vk ++ // V16QI, V16QI, V16QI ++ v16i8_r = __lsx_vmod_b(v16i8_a, v16i8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vmod.b( ++ ++ // __lsx_vmod_h ++ // vd, vj, vk ++ // V8HI, V8HI, V8HI ++ v8i16_r = __lsx_vmod_h(v8i16_a, v8i16_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vmod.h( ++ ++ // __lsx_vmod_w ++ // vd, vj, vk ++ // V4SI, V4SI, V4SI ++ v4i32_r = __lsx_vmod_w(v4i32_a, v4i32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vmod.w( ++ ++ // __lsx_vmod_d ++ // vd, vj, vk ++ // V2DI, V2DI, V2DI ++ v2i64_r = __lsx_vmod_d(v2i64_a, v2i64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vmod.d( ++ ++ // __lsx_vmod_bu ++ // vd, vj, vk ++ // UV16QI, UV16QI, UV16QI ++ v16u8_r = __lsx_vmod_bu(v16u8_a, v16u8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vmod.bu( ++ ++ // __lsx_vmod_hu ++ // vd, vj, vk ++ // UV8HI, UV8HI, UV8HI ++ v8u16_r = __lsx_vmod_hu(v8u16_a, v8u16_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vmod.hu( ++ ++ // __lsx_vmod_wu ++ // vd, vj, vk ++ // UV4SI, UV4SI, UV4SI ++ v4u32_r = __lsx_vmod_wu(v4u32_a, v4u32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vmod.wu( ++ ++ // __lsx_vmod_du ++ // vd, vj, vk ++ // UV2DI, UV2DI, UV2DI ++ v2u64_r = __lsx_vmod_du(v2u64_a, v2u64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vmod.du( ++ ++ // __lsx_vreplve_b ++ // vd, vj, rk ++ // V16QI, V16QI, SI ++ v16i8_r = __lsx_vreplve_b(v16i8_a, i32_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vreplve.b( ++ ++ // __lsx_vreplve_h ++ // vd, vj, rk ++ // V8HI, V8HI, SI ++ v8i16_r = __lsx_vreplve_h(v8i16_a, i32_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vreplve.h( ++ ++ // __lsx_vreplve_w ++ // vd, vj, rk ++ // V4SI, V4SI, SI ++ v4i32_r = __lsx_vreplve_w(v4i32_a, i32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vreplve.w( ++ ++ // __lsx_vreplve_d ++ // vd, vj, rk ++ // V2DI, V2DI, SI ++ v2i64_r = __lsx_vreplve_d(v2i64_a, i32_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vreplve.d( ++ ++ // __lsx_vreplvei_b ++ // vd, vj, ui4 ++ // V16QI, V16QI, UQI ++ v16i8_r = __lsx_vreplvei_b(v16i8_a, ui4); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vreplvei.b( ++ ++ // __lsx_vreplvei_h ++ // vd, vj, ui3 ++ // V8HI, V8HI, UQI ++ v8i16_r = __lsx_vreplvei_h(v8i16_a, ui3); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vreplvei.h( ++ ++ // __lsx_vreplvei_w ++ // vd, vj, ui2 ++ // V4SI, V4SI, UQI ++ v4i32_r = __lsx_vreplvei_w(v4i32_a, ui2); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vreplvei.w( ++ ++ // __lsx_vreplvei_d ++ // vd, vj, ui1 ++ // V2DI, V2DI, UQI ++ v2i64_r = __lsx_vreplvei_d(v2i64_a, ui1); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vreplvei.d( ++ ++ // __lsx_vpickev_b ++ // vd, vj, vk ++ // V16QI, V16QI, V16QI ++ v16i8_r = __lsx_vpickev_b(v16i8_a, v16i8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vpickev.b( ++ ++ // __lsx_vpickev_h ++ // vd, vj, vk ++ // V8HI, V8HI, V8HI ++ v8i16_r = __lsx_vpickev_h(v8i16_a, v8i16_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vpickev.h( ++ ++ // __lsx_vpickev_w ++ // vd, vj, vk ++ // V4SI, V4SI, V4SI ++ v4i32_r = __lsx_vpickev_w(v4i32_a, v4i32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vpickev.w( ++ ++ // __lsx_vpickev_d ++ // vd, vj, vk ++ // V2DI, V2DI, V2DI ++ v2i64_r = __lsx_vpickev_d(v2i64_a, v2i64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vpickev.d( ++ ++ // __lsx_vpickod_b ++ // vd, vj, vk ++ // V16QI, V16QI, V16QI ++ v16i8_r = __lsx_vpickod_b(v16i8_a, v16i8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vpickod.b( ++ ++ // __lsx_vpickod_h ++ // vd, vj, vk ++ // V8HI, V8HI, V8HI ++ v8i16_r = __lsx_vpickod_h(v8i16_a, v8i16_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vpickod.h( ++ ++ // __lsx_vpickod_w ++ // vd, vj, vk ++ // V4SI, V4SI, V4SI ++ v4i32_r = __lsx_vpickod_w(v4i32_a, v4i32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vpickod.w( ++ ++ // __lsx_vpickod_d ++ // vd, vj, vk ++ // V2DI, V2DI, V2DI ++ v2i64_r = __lsx_vpickod_d(v2i64_a, v2i64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vpickod.d( ++ ++ // __lsx_vilvh_b ++ // vd, vj, vk ++ // V16QI, V16QI, V16QI ++ v16i8_r = __lsx_vilvh_b(v16i8_a, v16i8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vilvh.b( ++ ++ // __lsx_vilvh_h ++ // vd, vj, vk ++ // V8HI, V8HI, V8HI ++ v8i16_r = __lsx_vilvh_h(v8i16_a, v8i16_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vilvh.h( ++ ++ // __lsx_vilvh_w ++ // vd, vj, vk ++ // V4SI, V4SI, V4SI ++ v4i32_r = __lsx_vilvh_w(v4i32_a, v4i32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vilvh.w( ++ ++ // __lsx_vilvh_d ++ // vd, vj, vk ++ // V2DI, V2DI, V2DI ++ v2i64_r = __lsx_vilvh_d(v2i64_a, v2i64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vilvh.d( ++ ++ // __lsx_vilvl_b ++ // vd, vj, vk ++ // V16QI, V16QI, V16QI ++ v16i8_r = __lsx_vilvl_b(v16i8_a, v16i8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vilvl.b( ++ ++ // __lsx_vilvl_h ++ // vd, vj, vk ++ // V8HI, V8HI, V8HI ++ v8i16_r = __lsx_vilvl_h(v8i16_a, v8i16_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vilvl.h( ++ ++ // __lsx_vilvl_w ++ // vd, vj, vk ++ // V4SI, V4SI, V4SI ++ v4i32_r = __lsx_vilvl_w(v4i32_a, v4i32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vilvl.w( ++ ++ // __lsx_vilvl_d ++ // vd, vj, vk ++ // V2DI, V2DI, V2DI ++ v2i64_r = __lsx_vilvl_d(v2i64_a, v2i64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vilvl.d( ++ ++ // __lsx_vpackev_b ++ // vd, vj, vk ++ // V16QI, V16QI, V16QI ++ v16i8_r = __lsx_vpackev_b(v16i8_a, v16i8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vpackev.b( ++ ++ // __lsx_vpackev_h ++ // vd, vj, vk ++ // V8HI, V8HI, V8HI ++ v8i16_r = __lsx_vpackev_h(v8i16_a, v8i16_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vpackev.h( ++ ++ // __lsx_vpackev_w ++ // vd, vj, vk ++ // V4SI, V4SI, V4SI ++ v4i32_r = __lsx_vpackev_w(v4i32_a, v4i32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vpackev.w( ++ ++ // __lsx_vpackev_d ++ // vd, vj, vk ++ // V2DI, V2DI, V2DI ++ v2i64_r = __lsx_vpackev_d(v2i64_a, v2i64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vpackev.d( ++ ++ // __lsx_vpackod_b ++ // vd, vj, vk ++ // V16QI, V16QI, V16QI ++ v16i8_r = __lsx_vpackod_b(v16i8_a, v16i8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vpackod.b( ++ ++ // __lsx_vpackod_h ++ // vd, vj, vk ++ // V8HI, V8HI, V8HI ++ v8i16_r = __lsx_vpackod_h(v8i16_a, v8i16_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vpackod.h( ++ ++ // __lsx_vpackod_w ++ // vd, vj, vk ++ // V4SI, V4SI, V4SI ++ v4i32_r = __lsx_vpackod_w(v4i32_a, v4i32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vpackod.w( ++ ++ // __lsx_vpackod_d ++ // vd, vj, vk ++ // V2DI, V2DI, V2DI ++ v2i64_r = __lsx_vpackod_d(v2i64_a, v2i64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vpackod.d( ++ ++ // __lsx_vshuf_h ++ // vd, vj, vk ++ // V8HI, V8HI, V8HI, V8HI ++ v8i16_r = __lsx_vshuf_h(v8i16_a, v8i16_b, v8i16_c); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vshuf.h( ++ ++ // __lsx_vshuf_w ++ // vd, vj, vk ++ // V4SI, V4SI, V4SI, V4SI ++ v4i32_r = __lsx_vshuf_w(v4i32_a, v4i32_b, v4i32_c); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vshuf.w( ++ ++ // __lsx_vshuf_d ++ // vd, vj, vk ++ // V2DI, V2DI, V2DI, V2DI ++ v2i64_r = __lsx_vshuf_d(v2i64_a, v2i64_b, v2i64_c); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vshuf.d( ++ ++ // __lsx_vand_v ++ // vd, vj, vk ++ // UV16QI, UV16QI, UV16QI ++ v16u8_r = __lsx_vand_v(v16u8_a, v16u8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vand.v( ++ ++ // __lsx_vandi_b ++ // vd, vj, ui8 ++ // UV16QI, UV16QI, UQI ++ v16u8_r = __lsx_vandi_b(v16u8_a, ui8); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vandi.b( ++ ++ // __lsx_vor_v ++ // vd, vj, vk ++ // UV16QI, UV16QI, UV16QI ++ v16u8_r = __lsx_vor_v(v16u8_a, v16u8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vor.v( ++ ++ // __lsx_vori_b ++ // vd, vj, ui8 ++ // UV16QI, UV16QI, UQI ++ v16u8_r = __lsx_vori_b(v16u8_a, ui8); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vori.b( ++ ++ // __lsx_vnor_v ++ // vd, vj, vk ++ // UV16QI, UV16QI, UV16QI ++ v16u8_r = __lsx_vnor_v(v16u8_a, v16u8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vnor.v( ++ ++ // __lsx_vnori_b ++ // vd, vj, ui8 ++ // UV16QI, UV16QI, UQI ++ v16u8_r = __lsx_vnori_b(v16u8_a, ui8); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vnori.b( ++ ++ // __lsx_vxor_v ++ // vd, vj, vk ++ // UV16QI, UV16QI, UV16QI ++ v16u8_r = __lsx_vxor_v(v16u8_a, v16u8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vxor.v( ++ ++ // __lsx_vxori_b ++ // vd, vj, ui8 ++ // UV16QI, UV16QI, UQI ++ v16u8_r = __lsx_vxori_b(v16u8_a, ui8); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vxori.b( ++ ++ // __lsx_vbitsel_v ++ // vd, vj, vk, va ++ // UV16QI, UV16QI, UV16QI, UV16QI ++ v16u8_r = __lsx_vbitsel_v(v16u8_a, v16u8_b, v16u8_c); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vbitsel.v( ++ ++ // __lsx_vbitseli_b ++ // vd, vj, ui8 ++ // UV16QI, UV16QI, UV16QI, UQI ++ v16u8_r = __lsx_vbitseli_b(v16u8_a, v16u8_b, ui8); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vbitseli.b( ++ ++ // __lsx_vshuf4i_b ++ // vd, vj, ui8 ++ // V16QI, V16QI, UQI ++ v16i8_r = __lsx_vshuf4i_b(v16i8_a, ui8); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vshuf4i.b( ++ ++ // __lsx_vshuf4i_h ++ // vd, vj, ui8 ++ // V8HI, V8HI, UQI ++ v8i16_r = __lsx_vshuf4i_h(v8i16_a, ui8); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vshuf4i.h( ++ ++ // __lsx_vshuf4i_w ++ // vd, vj, ui8 ++ // V4SI, V4SI, UQI ++ v4i32_r = __lsx_vshuf4i_w(v4i32_a, ui8); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vshuf4i.w( ++ ++ // __lsx_vreplgr2vr_b ++ // vd, rj ++ // V16QI, SI ++ v16i8_r = __lsx_vreplgr2vr_b(i32_a); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vreplgr2vr.b( ++ ++ // __lsx_vreplgr2vr_h ++ // vd, rj ++ // V8HI, SI ++ v8i16_r = __lsx_vreplgr2vr_h(i32_a); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vreplgr2vr.h( ++ ++ // __lsx_vreplgr2vr_w ++ // vd, rj ++ // V4SI, SI ++ v4i32_r = __lsx_vreplgr2vr_w(i32_a); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vreplgr2vr.w( ++ ++ // __lsx_vreplgr2vr_d ++ // vd, rj ++ // V2DI, DI ++ v2i64_r = __lsx_vreplgr2vr_d(i64_a); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vreplgr2vr.d( ++ ++ // __lsx_vpcnt_b ++ // vd, vj ++ // V16QI, V16QI ++ v16i8_r = __lsx_vpcnt_b(v16i8_a); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vpcnt.b( ++ ++ // __lsx_vpcnt_h ++ // vd, vj ++ // V8HI, V8HI ++ v8i16_r = __lsx_vpcnt_h(v8i16_a); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vpcnt.h( ++ ++ // __lsx_vpcnt_w ++ // vd, vj ++ // V4SI, V4SI ++ v4i32_r = __lsx_vpcnt_w(v4i32_a); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vpcnt.w( ++ ++ // __lsx_vpcnt_d ++ // vd, vj ++ // V2DI, V2DI ++ v2i64_r = __lsx_vpcnt_d(v2i64_a); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vpcnt.d( ++ ++ // __lsx_vclo_b ++ // vd, vj ++ // V16QI, V16QI ++ v16i8_r = __lsx_vclo_b(v16i8_a); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vclo.b( ++ ++ // __lsx_vclo_h ++ // vd, vj ++ // V8HI, V8HI ++ v8i16_r = __lsx_vclo_h(v8i16_a); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vclo.h( ++ ++ // __lsx_vclo_w ++ // vd, vj ++ // V4SI, V4SI ++ v4i32_r = __lsx_vclo_w(v4i32_a); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vclo.w( ++ ++ // __lsx_vclo_d ++ // vd, vj ++ // V2DI, V2DI ++ v2i64_r = __lsx_vclo_d(v2i64_a); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vclo.d( ++ ++ // __lsx_vclz_b ++ // vd, vj ++ // V16QI, V16QI ++ v16i8_r = __lsx_vclz_b(v16i8_a); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vclz.b( ++ ++ // __lsx_vclz_h ++ // vd, vj ++ // V8HI, V8HI ++ v8i16_r = __lsx_vclz_h(v8i16_a); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vclz.h( ++ ++ // __lsx_vclz_w ++ // vd, vj ++ // V4SI, V4SI ++ v4i32_r = __lsx_vclz_w(v4i32_a); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vclz.w( ++ ++ // __lsx_vclz_d ++ // vd, vj ++ // V2DI, V2DI ++ v2i64_r = __lsx_vclz_d(v2i64_a); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vclz.d( ++ ++ // __lsx_vpickve2gr_b ++ // rd, vj, ui4 ++ // SI, V16QI, UQI ++ i32_r = __lsx_vpickve2gr_b(v16i8_a, ui4); // CHECK: call i32 @llvm.loongarch.lsx.vpickve2gr.b( ++ ++ // __lsx_vpickve2gr_h ++ // rd, vj, ui3 ++ // SI, V8HI, UQI ++ i32_r = __lsx_vpickve2gr_h(v8i16_a, ui3); // CHECK: call i32 @llvm.loongarch.lsx.vpickve2gr.h( ++ ++ // __lsx_vpickve2gr_w ++ // rd, vj, ui2 ++ // SI, V4SI, UQI ++ i32_r = __lsx_vpickve2gr_w(v4i32_a, ui2); // CHECK: call i32 @llvm.loongarch.lsx.vpickve2gr.w( ++ ++ // __lsx_vpickve2gr_d ++ // rd, vj, ui1 ++ // DI, V2DI, UQI ++ i64_r = __lsx_vpickve2gr_d(v2i64_a, ui1); // CHECK: call i64 @llvm.loongarch.lsx.vpickve2gr.d( ++ ++ // __lsx_vpickve2gr_bu ++ // rd, vj, ui4 ++ // USI, V16QI, UQI ++ u32_r = __lsx_vpickve2gr_bu(v16i8_a, ui4); // CHECK: call i32 @llvm.loongarch.lsx.vpickve2gr.bu( ++ ++ // __lsx_vpickve2gr_hu ++ // rd, vj, ui3 ++ // USI, V8HI, UQI ++ u32_r = __lsx_vpickve2gr_hu(v8i16_a, ui3); // CHECK: call i32 @llvm.loongarch.lsx.vpickve2gr.hu( ++ ++ // __lsx_vpickve2gr_wu ++ // rd, vj, ui2 ++ // USI, V4SI, UQI ++ u32_r = __lsx_vpickve2gr_wu(v4i32_a, ui2); // CHECK: call i32 @llvm.loongarch.lsx.vpickve2gr.wu( ++ ++ // __lsx_vpickve2gr_du ++ // rd, vj, ui1 ++ // UDI, V2DI, UQI ++ u64_r = __lsx_vpickve2gr_du(v2i64_a, ui1); // CHECK: call i64 @llvm.loongarch.lsx.vpickve2gr.du( ++ ++ // __lsx_vinsgr2vr_b ++ // vd, rj, ui4 ++ // V16QI, V16QI, SI, UQI ++ v16i8_r = __lsx_vinsgr2vr_b(v16i8_a, i32_b, ui4); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vinsgr2vr.b( ++ ++ // __lsx_vinsgr2vr_h ++ // vd, rj, ui3 ++ // V8HI, V8HI, SI, UQI ++ v8i16_r = __lsx_vinsgr2vr_h(v8i16_a, i32_b, ui3); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vinsgr2vr.h( ++ ++ // __lsx_vinsgr2vr_w ++ // vd, rj, ui2 ++ // V4SI, V4SI, SI, UQI ++ v4i32_r = __lsx_vinsgr2vr_w(v4i32_a, i32_b, ui2); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vinsgr2vr.w( ++ ++ // __lsx_vinsgr2vr_d ++ // vd, rj, ui1 ++ // V2DI, V2DI, SI, UQI ++ v2i64_r = __lsx_vinsgr2vr_d(v2i64_a, i32_b, ui1); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vinsgr2vr.d( ++ ++ // __lsx_vfcmp_caf_s ++ // vd, vj, vk ++ // V4SI, V4SF, V4SF ++ v4i32_r = __lsx_vfcmp_caf_s(v4f32_a, v4f32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vfcmp.caf.s( ++ ++ // __lsx_vfcmp_caf_d ++ // vd, vj, vk ++ // V2DI, V2DF, V2DF ++ v2i64_r = __lsx_vfcmp_caf_d(v2f64_a, v2f64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vfcmp.caf.d( ++ ++ // __lsx_vfcmp_cor_s ++ // vd, vj, vk ++ // V4SI, V4SF, V4SF ++ v4i32_r = __lsx_vfcmp_cor_s(v4f32_a, v4f32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vfcmp.cor.s( ++ ++ // __lsx_vfcmp_cor_d ++ // vd, vj, vk ++ // V2DI, V2DF, V2DF ++ v2i64_r = __lsx_vfcmp_cor_d(v2f64_a, v2f64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vfcmp.cor.d( ++ ++ // __lsx_vfcmp_cun_s ++ // vd, vj, vk ++ // V4SI, V4SF, V4SF ++ v4i32_r = __lsx_vfcmp_cun_s(v4f32_a, v4f32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vfcmp.cun.s( ++ ++ // __lsx_vfcmp_cun_d ++ // vd, vj, vk ++ // V2DI, V2DF, V2DF ++ v2i64_r = __lsx_vfcmp_cun_d(v2f64_a, v2f64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vfcmp.cun.d( ++ ++ // __lsx_vfcmp_cune_s ++ // vd, vj, vk ++ // V4SI, V4SF, V4SF ++ v4i32_r = __lsx_vfcmp_cune_s(v4f32_a, v4f32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vfcmp.cune.s( ++ ++ // __lsx_vfcmp_cune_d ++ // vd, vj, vk ++ // V2DI, V2DF, V2DF ++ v2i64_r = __lsx_vfcmp_cune_d(v2f64_a, v2f64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vfcmp.cune.d( ++ ++ // __lsx_vfcmp_cueq_s ++ // vd, vj, vk ++ // V4SI, V4SF, V4SF ++ v4i32_r = __lsx_vfcmp_cueq_s(v4f32_a, v4f32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vfcmp.cueq.s( ++ ++ // __lsx_vfcmp_cueq_d ++ // vd, vj, vk ++ // V2DI, V2DF, V2DF ++ v2i64_r = __lsx_vfcmp_cueq_d(v2f64_a, v2f64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vfcmp.cueq.d( ++ ++ // __lsx_vfcmp_ceq_s ++ // vd, vj, vk ++ // V4SI, V4SF, V4SF ++ v4i32_r = __lsx_vfcmp_ceq_s(v4f32_a, v4f32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vfcmp.ceq.s( ++ ++ // __lsx_vfcmp_ceq_d ++ // vd, vj, vk ++ // V2DI, V2DF, V2DF ++ v2i64_r = __lsx_vfcmp_ceq_d(v2f64_a, v2f64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vfcmp.ceq.d( ++ ++ // __lsx_vfcmp_cne_s ++ // vd, vj, vk ++ // V4SI, V4SF, V4SF ++ v4i32_r = __lsx_vfcmp_cne_s(v4f32_a, v4f32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vfcmp.cne.s( ++ ++ // __lsx_vfcmp_cne_d ++ // vd, vj, vk ++ // V2DI, V2DF, V2DF ++ v2i64_r = __lsx_vfcmp_cne_d(v2f64_a, v2f64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vfcmp.cne.d( ++ ++ // __lsx_vfcmp_clt_s ++ // vd, vj, vk ++ // V4SI, V4SF, V4SF ++ v4i32_r = __lsx_vfcmp_clt_s(v4f32_a, v4f32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vfcmp.clt.s( ++ ++ // __lsx_vfcmp_clt_d ++ // vd, vj, vk ++ // V2DI, V2DF, V2DF ++ v2i64_r = __lsx_vfcmp_clt_d(v2f64_a, v2f64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vfcmp.clt.d( ++ ++ // __lsx_vfcmp_cult_s ++ // vd, vj, vk ++ // V4SI, V4SF, V4SF ++ v4i32_r = __lsx_vfcmp_cult_s(v4f32_a, v4f32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vfcmp.cult.s( ++ ++ // __lsx_vfcmp_cult_d ++ // vd, vj, vk ++ // V2DI, V2DF, V2DF ++ v2i64_r = __lsx_vfcmp_cult_d(v2f64_a, v2f64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vfcmp.cult.d( ++ ++ // __lsx_vfcmp_cle_s ++ // vd, vj, vk ++ // V4SI, V4SF, V4SF ++ v4i32_r = __lsx_vfcmp_cle_s(v4f32_a, v4f32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vfcmp.cle.s( ++ ++ // __lsx_vfcmp_cle_d ++ // vd, vj, vk ++ // V2DI, V2DF, V2DF ++ v2i64_r = __lsx_vfcmp_cle_d(v2f64_a, v2f64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vfcmp.cle.d( ++ ++ // __lsx_vfcmp_cule_s ++ // vd, vj, vk ++ // V4SI, V4SF, V4SF ++ v4i32_r = __lsx_vfcmp_cule_s(v4f32_a, v4f32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vfcmp.cule.s( ++ ++ // __lsx_vfcmp_cule_d ++ // vd, vj, vk ++ // V2DI, V2DF, V2DF ++ v2i64_r = __lsx_vfcmp_cule_d(v2f64_a, v2f64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vfcmp.cule.d( ++ ++ // __lsx_vfcmp_saf_s ++ // vd, vj, vk ++ // V4SI, V4SF, V4SF ++ v4i32_r = __lsx_vfcmp_saf_s(v4f32_a, v4f32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vfcmp.saf.s( ++ ++ // __lsx_vfcmp_saf_d ++ // vd, vj, vk ++ // V2DI, V2DF, V2DF ++ v2i64_r = __lsx_vfcmp_saf_d(v2f64_a, v2f64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vfcmp.saf.d( ++ ++ // __lsx_vfcmp_sor_s ++ // vd, vj, vk ++ // V4SI, V4SF, V4SF ++ v4i32_r = __lsx_vfcmp_sor_s(v4f32_a, v4f32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vfcmp.sor.s( ++ ++ // __lsx_vfcmp_sor_d ++ // vd, vj, vk ++ // V2DI, V2DF, V2DF ++ v2i64_r = __lsx_vfcmp_sor_d(v2f64_a, v2f64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vfcmp.sor.d( ++ ++ // __lsx_vfcmp_sun_s ++ // vd, vj, vk ++ // V4SI, V4SF, V4SF ++ v4i32_r = __lsx_vfcmp_sun_s(v4f32_a, v4f32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vfcmp.sun.s( ++ ++ // __lsx_vfcmp_sun_d ++ // vd, vj, vk ++ // V2DI, V2DF, V2DF ++ v2i64_r = __lsx_vfcmp_sun_d(v2f64_a, v2f64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vfcmp.sun.d( ++ ++ // __lsx_vfcmp_sune_s ++ // vd, vj, vk ++ // V4SI, V4SF, V4SF ++ v4i32_r = __lsx_vfcmp_sune_s(v4f32_a, v4f32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vfcmp.sune.s( ++ ++ // __lsx_vfcmp_sune_d ++ // vd, vj, vk ++ // V2DI, V2DF, V2DF ++ v2i64_r = __lsx_vfcmp_sune_d(v2f64_a, v2f64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vfcmp.sune.d( ++ ++ // __lsx_vfcmp_sueq_s ++ // vd, vj, vk ++ // V4SI, V4SF, V4SF ++ v4i32_r = __lsx_vfcmp_sueq_s(v4f32_a, v4f32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vfcmp.sueq.s( ++ ++ // __lsx_vfcmp_sueq_d ++ // vd, vj, vk ++ // V2DI, V2DF, V2DF ++ v2i64_r = __lsx_vfcmp_sueq_d(v2f64_a, v2f64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vfcmp.sueq.d( ++ ++ // __lsx_vfcmp_seq_s ++ // vd, vj, vk ++ // V4SI, V4SF, V4SF ++ v4i32_r = __lsx_vfcmp_seq_s(v4f32_a, v4f32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vfcmp.seq.s( ++ ++ // __lsx_vfcmp_seq_d ++ // vd, vj, vk ++ // V2DI, V2DF, V2DF ++ v2i64_r = __lsx_vfcmp_seq_d(v2f64_a, v2f64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vfcmp.seq.d( ++ ++ // __lsx_vfcmp_sne_s ++ // vd, vj, vk ++ // V4SI, V4SF, V4SF ++ v4i32_r = __lsx_vfcmp_sne_s(v4f32_a, v4f32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vfcmp.sne.s( ++ ++ // __lsx_vfcmp_sne_d ++ // vd, vj, vk ++ // V2DI, V2DF, V2DF ++ v2i64_r = __lsx_vfcmp_sne_d(v2f64_a, v2f64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vfcmp.sne.d( ++ ++ // __lsx_vfcmp_slt_s ++ // vd, vj, vk ++ // V4SI, V4SF, V4SF ++ v4i32_r = __lsx_vfcmp_slt_s(v4f32_a, v4f32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vfcmp.slt.s( ++ ++ // __lsx_vfcmp_slt_d ++ // vd, vj, vk ++ // V2DI, V2DF, V2DF ++ v2i64_r = __lsx_vfcmp_slt_d(v2f64_a, v2f64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vfcmp.slt.d( ++ ++ // __lsx_vfcmp_sult_s ++ // vd, vj, vk ++ // V4SI, V4SF, V4SF ++ v4i32_r = __lsx_vfcmp_sult_s(v4f32_a, v4f32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vfcmp.sult.s( ++ ++ // __lsx_vfcmp_sult_d ++ // vd, vj, vk ++ // V2DI, V2DF, V2DF ++ v2i64_r = __lsx_vfcmp_sult_d(v2f64_a, v2f64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vfcmp.sult.d( ++ ++ // __lsx_vfcmp_sle_s ++ // vd, vj, vk ++ // V4SI, V4SF, V4SF ++ v4i32_r = __lsx_vfcmp_sle_s(v4f32_a, v4f32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vfcmp.sle.s( ++ ++ // __lsx_vfcmp_sle_d ++ // vd, vj, vk ++ // V2DI, V2DF, V2DF ++ v2i64_r = __lsx_vfcmp_sle_d(v2f64_a, v2f64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vfcmp.sle.d( ++ ++ // __lsx_vfcmp_sule_s ++ // vd, vj, vk ++ // V4SI, V4SF, V4SF ++ v4i32_r = __lsx_vfcmp_sule_s(v4f32_a, v4f32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vfcmp.sule.s( ++ ++ // __lsx_vfcmp_sule_d ++ // vd, vj, vk ++ // V2DI, V2DF, V2DF ++ v2i64_r = __lsx_vfcmp_sule_d(v2f64_a, v2f64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vfcmp.sule.d( ++ ++ // __lsx_vfadd_s ++ // vd, vj, vk ++ // V4SF, V4SF, V4SF ++ v4f32_r = __lsx_vfadd_s(v4f32_a, v4f32_b); // CHECK: call <4 x float> @llvm.loongarch.lsx.vfadd.s( ++ // __lsx_vfadd_d ++ // vd, vj, vk ++ // V2DF, V2DF, V2DF ++ v2f64_r = __lsx_vfadd_d(v2f64_a, v2f64_b); // CHECK: call <2 x double> @llvm.loongarch.lsx.vfadd.d( ++ ++ // __lsx_vfsub_s ++ // vd, vj, vk ++ // V4SF, V4SF, V4SF ++ v4f32_r = __lsx_vfsub_s(v4f32_a, v4f32_b); // CHECK: call <4 x float> @llvm.loongarch.lsx.vfsub.s( ++ ++ // __lsx_vfsub_d ++ // vd, vj, vk ++ // V2DF, V2DF, V2DF ++ v2f64_r = __lsx_vfsub_d(v2f64_a, v2f64_b); // CHECK: call <2 x double> @llvm.loongarch.lsx.vfsub.d( ++ ++ // __lsx_vfmul_s ++ // vd, vj, vk ++ // V4SF, V4SF, V4SF ++ v4f32_r = __lsx_vfmul_s(v4f32_a, v4f32_b); // CHECK: call <4 x float> @llvm.loongarch.lsx.vfmul.s( ++ ++ // __lsx_vfmul_d ++ // vd, vj, vk ++ // V2DF, V2DF, V2DF ++ v2f64_r = __lsx_vfmul_d(v2f64_a, v2f64_b); // CHECK: call <2 x double> @llvm.loongarch.lsx.vfmul.d( ++ ++ // __lsx_vfdiv_s ++ // vd, vj, vk ++ // V4SF, V4SF, V4SF ++ v4f32_r = __lsx_vfdiv_s(v4f32_a, v4f32_b); // CHECK: call <4 x float> @llvm.loongarch.lsx.vfdiv.s( ++ ++ // __lsx_vfdiv_d ++ // vd, vj, vk ++ // V2DF, V2DF, V2DF ++ v2f64_r = __lsx_vfdiv_d(v2f64_a, v2f64_b); // CHECK: call <2 x double> @llvm.loongarch.lsx.vfdiv.d( ++ ++ // __lsx_vfcvt_h_s ++ // vd, vj, vk ++ // V8HI, V4SF, V4SF ++ v8i16_r = __lsx_vfcvt_h_s(v4f32_a, v4f32_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vfcvt.h.s( ++ ++ // __lsx_vfcvt_s_d ++ // vd, vj, vk ++ // V4SF, V2DF, V2DF ++ v4f32_r = __lsx_vfcvt_s_d(v2f64_a, v2f64_b); // CHECK: call <4 x float> @llvm.loongarch.lsx.vfcvt.s.d( ++ ++ // __lsx_vfmin_s ++ // vd, vj, vk ++ // V4SF, V4SF, V4SF ++ v4f32_r = __lsx_vfmin_s(v4f32_a, v4f32_b); // CHECK: call <4 x float> @llvm.loongarch.lsx.vfmin.s( ++ ++ // __lsx_vfmin_d ++ // vd, vj, vk ++ // V2DF, V2DF, V2DF ++ v2f64_r = __lsx_vfmin_d(v2f64_a, v2f64_b); // CHECK: call <2 x double> @llvm.loongarch.lsx.vfmin.d( ++ ++ // __lsx_vfmina_s ++ // vd, vj, vk ++ // V4SF, V4SF, V4SF ++ v4f32_r = __lsx_vfmina_s(v4f32_a, v4f32_b); // CHECK: call <4 x float> @llvm.loongarch.lsx.vfmina.s( ++ ++ // __lsx_vfmina_d ++ // vd, vj, vk ++ // V2DF, V2DF, V2DF ++ v2f64_r = __lsx_vfmina_d(v2f64_a, v2f64_b); // CHECK: call <2 x double> @llvm.loongarch.lsx.vfmina.d( ++ ++ // __lsx_vfmax_s ++ // vd, vj, vk ++ // V4SF, V4SF, V4SF ++ v4f32_r = __lsx_vfmax_s(v4f32_a, v4f32_b); // CHECK: call <4 x float> @llvm.loongarch.lsx.vfmax.s( ++ ++ // __lsx_vfmax_d ++ // vd, vj, vk ++ // V2DF, V2DF, V2DF ++ v2f64_r = __lsx_vfmax_d(v2f64_a, v2f64_b); // CHECK: call <2 x double> @llvm.loongarch.lsx.vfmax.d( ++ ++ // __lsx_vfmaxa_s ++ // vd, vj, vk ++ // V4SF, V4SF, V4SF ++ v4f32_r = __lsx_vfmaxa_s(v4f32_a, v4f32_b); // CHECK: call <4 x float> @llvm.loongarch.lsx.vfmaxa.s( ++ ++ // __lsx_vfmaxa_d ++ // vd, vj, vk ++ // V2DF, V2DF, V2DF ++ v2f64_r = __lsx_vfmaxa_d(v2f64_a, v2f64_b); // CHECK: call <2 x double> @llvm.loongarch.lsx.vfmaxa.d( ++ ++ // __lsx_vfclass_s ++ // vd, vj ++ // V4SI, V4SF ++ v4i32_r = __lsx_vfclass_s(v4f32_a); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vfclass.s( ++ ++ // __lsx_vfclass_d ++ // vd, vj ++ // V2DI, V2DF ++ v2i64_r = __lsx_vfclass_d(v2f64_a); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vfclass.d( ++ ++ // __lsx_vfsqrt_s ++ // vd, vj ++ // V4SF, V4SF ++ v4f32_r = __lsx_vfsqrt_s(v4f32_a); // CHECK: call <4 x float> @llvm.loongarch.lsx.vfsqrt.s( ++ ++ // __lsx_vfsqrt_d ++ // vd, vj ++ // V2DF, V2DF ++ v2f64_r = __lsx_vfsqrt_d(v2f64_a); // CHECK: call <2 x double> @llvm.loongarch.lsx.vfsqrt.d( ++ ++ // __lsx_vfrecip_s ++ // vd, vj ++ // V4SF, V4SF ++ v4f32_r = __lsx_vfrecip_s(v4f32_a); // CHECK: call <4 x float> @llvm.loongarch.lsx.vfrecip.s( ++ ++ // __lsx_vfrecip_d ++ // vd, vj ++ // V2DF, V2DF ++ v2f64_r = __lsx_vfrecip_d(v2f64_a); // CHECK: call <2 x double> @llvm.loongarch.lsx.vfrecip.d( ++ ++ // __lsx_vfrecipe_s ++ // vd, vj ++ // V4SF, V4SF ++ v4f32_r = __lsx_vfrecipe_s(v4f32_a); // CHECK: call <4 x float> @llvm.loongarch.lsx.vfrecipe.s( ++ ++ // __lsx_vfrecipe_d ++ // vd, vj ++ // V2DF, V2DF ++ v2f64_r = __lsx_vfrecipe_d(v2f64_a); // CHECK: call <2 x double> @llvm.loongarch.lsx.vfrecipe.d( ++ ++ // __lsx_vfrint_s ++ // vd, vj ++ // V4SF, V4SF ++ v4f32_r = __lsx_vfrint_s(v4f32_a); // CHECK: call <4 x float> @llvm.loongarch.lsx.vfrint.s( ++ ++ // __lsx_vfrint_d ++ // vd, vj ++ // V2DF, V2DF ++ v2f64_r = __lsx_vfrint_d(v2f64_a); // CHECK: call <2 x double> @llvm.loongarch.lsx.vfrint.d( ++ ++ // __lsx_vfrsqrt_s ++ // vd, vj ++ // V4SF, V4SF ++ v4f32_r = __lsx_vfrsqrt_s(v4f32_a); // CHECK: call <4 x float> @llvm.loongarch.lsx.vfrsqrt.s( ++ ++ // __lsx_vfrsqrt_d ++ // vd, vj ++ // V2DF, V2DF ++ v2f64_r = __lsx_vfrsqrt_d(v2f64_a); // CHECK: call <2 x double> @llvm.loongarch.lsx.vfrsqrt.d( ++ ++ // __lsx_vfrsqrte_s ++ // vd, vj ++ // V4SF, V4SF ++ v4f32_r = __lsx_vfrsqrte_s(v4f32_a); // CHECK: call <4 x float> @llvm.loongarch.lsx.vfrsqrte.s( ++ ++ // __lsx_vfrsqrte_d ++ // vd, vj ++ // V2DF, V2DF ++ v2f64_r = __lsx_vfrsqrte_d(v2f64_a); // CHECK: call <2 x double> @llvm.loongarch.lsx.vfrsqrte.d( ++ ++ // __lsx_vflogb_s ++ // vd, vj ++ // V4SF, V4SF ++ v4f32_r = __lsx_vflogb_s(v4f32_a); // CHECK: call <4 x float> @llvm.loongarch.lsx.vflogb.s( ++ ++ // __lsx_vflogb_d ++ // vd, vj ++ // V2DF, V2DF ++ v2f64_r = __lsx_vflogb_d(v2f64_a); // CHECK: call <2 x double> @llvm.loongarch.lsx.vflogb.d( ++ ++ // __lsx_vfcvth_s_h ++ // vd, vj ++ // V4SF, V8HI ++ v4f32_r = __lsx_vfcvth_s_h(v8i16_a); // CHECK: call <4 x float> @llvm.loongarch.lsx.vfcvth.s.h( ++ ++ // __lsx_vfcvth_d_s ++ // vd, vj ++ // V2DF, V4SF ++ v2f64_r = __lsx_vfcvth_d_s(v4f32_a); // CHECK: call <2 x double> @llvm.loongarch.lsx.vfcvth.d.s( ++ ++ //gcc build fail ++ ++ // __lsx_vfcvtl_s_h ++ // vd, vj ++ // V4SF, V8HI ++ v4f32_r = __lsx_vfcvtl_s_h(v8i16_a); // CHECK: call <4 x float> @llvm.loongarch.lsx.vfcvtl.s.h( ++ ++ // __lsx_vfcvtl_d_s ++ // vd, vj ++ // V2DF, V4SF ++ v2f64_r = __lsx_vfcvtl_d_s(v4f32_a); // CHECK: call <2 x double> @llvm.loongarch.lsx.vfcvtl.d.s( ++ ++ // __lsx_vftint_w_s ++ // vd, vj ++ // V4SI, V4SF ++ v4i32_r = __lsx_vftint_w_s(v4f32_a); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vftint.w.s( ++ ++ // __lsx_vftint_l_d ++ // vd, vj ++ // V2DI, V2DF ++ v2i64_r = __lsx_vftint_l_d(v2f64_a); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vftint.l.d( ++ ++ // __lsx_vftint_wu_s ++ // vd, vj ++ // UV4SI, V4SF ++ v4u32_r = __lsx_vftint_wu_s(v4f32_a); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vftint.wu.s( ++ ++ // __lsx_vftint_lu_d ++ // vd, vj ++ // UV2DI, V2DF ++ v2u64_r = __lsx_vftint_lu_d(v2f64_a); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vftint.lu.d( ++ ++ // __lsx_vftintrz_w_s ++ // vd, vj ++ // V4SI, V4SF ++ v4i32_r = __lsx_vftintrz_w_s(v4f32_a); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vftintrz.w.s( ++ ++ // __lsx_vftintrz_l_d ++ // vd, vj ++ // V2DI, V2DF ++ v2i64_r = __lsx_vftintrz_l_d(v2f64_a); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vftintrz.l.d( ++ ++ // __lsx_vftintrz_wu_s ++ // vd, vj ++ // UV4SI, V4SF ++ v4u32_r = __lsx_vftintrz_wu_s(v4f32_a); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vftintrz.wu.s( ++ ++ // __lsx_vftintrz_lu_d ++ // vd, vj ++ // UV2DI, V2DF ++ v2u64_r = __lsx_vftintrz_lu_d(v2f64_a); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vftintrz.lu.d( ++ ++ // __lsx_vffint_s_w ++ // vd, vj ++ // V4SF, V4SI ++ v4f32_r = __lsx_vffint_s_w(v4i32_a); // CHECK: call <4 x float> @llvm.loongarch.lsx.vffint.s.w( ++ ++ // __lsx_vffint_d_l ++ // vd, vj ++ // V2DF, V2DI ++ v2f64_r = __lsx_vffint_d_l(v2i64_a); // CHECK: call <2 x double> @llvm.loongarch.lsx.vffint.d.l( ++ ++ // __lsx_vffint_s_wu ++ // vd, vj ++ // V4SF, UV4SI ++ v4f32_r = __lsx_vffint_s_wu(v4u32_a); // CHECK: call <4 x float> @llvm.loongarch.lsx.vffint.s.wu( ++ ++ // __lsx_vffint_d_lu ++ // vd, vj ++ // V2DF, UV2DI ++ v2f64_r = __lsx_vffint_d_lu(v2u64_a); // CHECK: call <2 x double> @llvm.loongarch.lsx.vffint.d.lu( ++ ++ // __lsx_vandn_v ++ // vd, vj, vk ++ // UV16QI, UV16QI, UV16QI ++ v16u8_r = __lsx_vandn_v(v16u8_a, v16u8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vandn.v( ++ ++ // __lsx_vneg_b ++ // vd, vj ++ // V16QI, V16QI ++ v16i8_r = __lsx_vneg_b(v16i8_a); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vneg.b( ++ ++ // __lsx_vneg_h ++ // vd, vj ++ // V8HI, V8HI ++ v8i16_r = __lsx_vneg_h(v8i16_a); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vneg.h( ++ ++ // __lsx_vneg_w ++ // vd, vj ++ // V4SI, V4SI ++ v4i32_r = __lsx_vneg_w(v4i32_a); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vneg.w( ++ ++ // __lsx_vneg_d ++ // vd, vj ++ // V2DI, V2DI ++ v2i64_r = __lsx_vneg_d(v2i64_a); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vneg.d( ++ ++ // __lsx_vmuh_b ++ // vd, vj, vk ++ // V16QI, V16QI, V16QI ++ v16i8_r = __lsx_vmuh_b(v16i8_a, v16i8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vmuh.b( ++ ++ // __lsx_vmuh_h ++ // vd, vj, vk ++ // V8HI, V8HI, V8HI ++ v8i16_r = __lsx_vmuh_h(v8i16_a, v8i16_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vmuh.h( ++ ++ // __lsx_vmuh_w ++ // vd, vj, vk ++ // V4SI, V4SI, V4SI ++ v4i32_r = __lsx_vmuh_w(v4i32_a, v4i32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vmuh.w( ++ ++ // __lsx_vmuh_d ++ // vd, vj, vk ++ // V2DI, V2DI, V2DI ++ v2i64_r = __lsx_vmuh_d(v2i64_a, v2i64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vmuh.d( ++ ++ // __lsx_vmuh_bu ++ // vd, vj, vk ++ // UV16QI, UV16QI, UV16QI ++ v16u8_r = __lsx_vmuh_bu(v16u8_a, v16u8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vmuh.bu( ++ ++ // __lsx_vmuh_hu ++ // vd, vj, vk ++ // UV8HI, UV8HI, UV8HI ++ v8u16_r = __lsx_vmuh_hu(v8u16_a, v8u16_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vmuh.hu( ++ ++ // __lsx_vmuh_wu ++ // vd, vj, vk ++ // UV4SI, UV4SI, UV4SI ++ v4u32_r = __lsx_vmuh_wu(v4u32_a, v4u32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vmuh.wu( ++ ++ // __lsx_vmuh_du ++ // vd, vj, vk ++ // UV2DI, UV2DI, UV2DI ++ v2u64_r = __lsx_vmuh_du(v2u64_a, v2u64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vmuh.du( ++ ++ // __lsx_vsllwil_h_b ++ // vd, vj, ui3 ++ // V8HI, V16QI, UQI ++ v8i16_r = __lsx_vsllwil_h_b(v16i8_a, ui3); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vsllwil.h.b( ++ ++ // __lsx_vsllwil_w_h ++ // vd, vj, ui4 ++ // V4SI, V8HI, UQI ++ v4i32_r = __lsx_vsllwil_w_h(v8i16_a, ui4); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vsllwil.w.h( ++ ++ // __lsx_vsllwil_d_w ++ // vd, vj, ui5 ++ // V2DI, V4SI, UQI ++ v2i64_r = __lsx_vsllwil_d_w(v4i32_a, ui5); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vsllwil.d.w( ++ ++ // __lsx_vsllwil_hu_bu ++ // vd, vj, ui3 ++ // UV8HI, UV16QI, UQI ++ v8u16_r = __lsx_vsllwil_hu_bu(v16u8_a, ui3); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vsllwil.hu.bu( ++ ++ // __lsx_vsllwil_wu_hu ++ // vd, vj, ui4 ++ // UV4SI, UV8HI, UQI ++ v4u32_r = __lsx_vsllwil_wu_hu(v8u16_a, ui4); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vsllwil.wu.hu( ++ ++ // __lsx_vsllwil_du_wu ++ // vd, vj, ui5 ++ // UV2DI, UV4SI, UQI ++ v2u64_r = __lsx_vsllwil_du_wu(v4u32_a, ui5); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vsllwil.du.wu( ++ ++ // __lsx_vsran_b_h ++ // vd, vj, vk ++ // V16QI, V8HI, V8HI ++ v16i8_r = __lsx_vsran_b_h(v8i16_a, v8i16_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vsran.b.h( ++ ++ // __lsx_vsran_h_w ++ // vd, vj, vk ++ // V8HI, V4SI, V4SI ++ v8i16_r = __lsx_vsran_h_w(v4i32_a, v4i32_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vsran.h.w( ++ ++ // __lsx_vsran_w_d ++ // vd, vj, vk ++ // V4SI, V2DI, V2DI ++ v4i32_r = __lsx_vsran_w_d(v2i64_a, v2i64_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vsran.w.d( ++ ++ // __lsx_vssran_b_h ++ // vd, vj, vk ++ // V16QI, V8HI, V8HI ++ v16i8_r = __lsx_vssran_b_h(v8i16_a, v8i16_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vssran.b.h( ++ ++ // __lsx_vssran_h_w ++ // vd, vj, vk ++ // V8HI, V4SI, V4SI ++ v8i16_r = __lsx_vssran_h_w(v4i32_a, v4i32_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vssran.h.w( ++ ++ // __lsx_vssran_w_d ++ // vd, vj, vk ++ // V4SI, V2DI, V2DI ++ v4i32_r = __lsx_vssran_w_d(v2i64_a, v2i64_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vssran.w.d( ++ ++ // __lsx_vssran_bu_h ++ // vd, vj, vk ++ // UV16QI, UV8HI, UV8HI ++ v16u8_r = __lsx_vssran_bu_h(v8u16_a, v8u16_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vssran.bu.h( ++ ++ // __lsx_vssran_hu_w ++ // vd, vj, vk ++ // UV8HI, UV4SI, UV4SI ++ v8u16_r = __lsx_vssran_hu_w(v4u32_a, v4u32_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vssran.hu.w( ++ ++ // __lsx_vssran_wu_d ++ // vd, vj, vk ++ // UV4SI, UV2DI, UV2DI ++ v4u32_r = __lsx_vssran_wu_d(v2u64_a, v2u64_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vssran.wu.d( ++ ++ // __lsx_vsrarn_b_h ++ // vd, vj, vk ++ // V16QI, V8HI, V8HI ++ v16i8_r = __lsx_vsrarn_b_h(v8i16_a, v8i16_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vsrarn.b.h( ++ ++ // __lsx_vsrarn_h_w ++ // vd, vj, vk ++ // V8HI, V4SI, V4SI ++ v8i16_r = __lsx_vsrarn_h_w(v4i32_a, v4i32_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vsrarn.h.w( ++ ++ // __lsx_vsrarn_w_d ++ // vd, vj, vk ++ // V4SI, V2DI, V2DI ++ v4i32_r = __lsx_vsrarn_w_d(v2i64_a, v2i64_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vsrarn.w.d( ++ ++ // __lsx_vssrarn_b_h ++ // vd, vj, vk ++ // V16QI, V8HI, V8HI ++ v16i8_r = __lsx_vssrarn_b_h(v8i16_a, v8i16_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vssrarn.b.h( ++ ++ // __lsx_vssrarn_h_w ++ // vd, vj, vk ++ // V8HI, V4SI, V4SI ++ v8i16_r = __lsx_vssrarn_h_w(v4i32_a, v4i32_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vssrarn.h.w( ++ ++ // __lsx_vssrarn_w_d ++ // vd, vj, vk ++ // V4SI, V2DI, V2DI ++ v4i32_r = __lsx_vssrarn_w_d(v2i64_a, v2i64_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vssrarn.w.d( ++ ++ // __lsx_vssrarn_bu_h ++ // vd, vj, vk ++ // UV16QI, UV8HI, UV8HI ++ v16u8_r = __lsx_vssrarn_bu_h(v8u16_a, v8u16_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vssrarn.bu.h( ++ ++ // __lsx_vssrarn_hu_w ++ // vd, vj, vk ++ // UV8HI, UV4SI, UV4SI ++ v8u16_r = __lsx_vssrarn_hu_w(v4u32_a, v4u32_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vssrarn.hu.w( ++ ++ // __lsx_vssrarn_wu_d ++ // vd, vj, vk ++ // UV4SI, UV2DI, UV2DI ++ v4u32_r = __lsx_vssrarn_wu_d(v2u64_a, v2u64_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vssrarn.wu.d( ++ ++ // __lsx_vsrln_b_h ++ // vd, vj, vk ++ // V16QI, V8HI, V8HI ++ v16i8_r = __lsx_vsrln_b_h(v8i16_a, v8i16_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vsrln.b.h( ++ ++ // __lsx_vsrln_h_w ++ // vd, vj, vk ++ // V8HI, V4SI, V4SI ++ v8i16_r = __lsx_vsrln_h_w(v4i32_a, v4i32_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vsrln.h.w( ++ ++ // __lsx_vsrln_w_d ++ // vd, vj, vk ++ // V4SI, V2DI, V2DI ++ v4i32_r = __lsx_vsrln_w_d(v2i64_a, v2i64_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vsrln.w.d( ++ ++ // __lsx_vssrln_bu_h ++ // vd, vj, vk ++ // UV16QI, UV8HI, UV8HI ++ v16u8_r = __lsx_vssrln_bu_h(v8u16_a, v8u16_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vssrln.bu.h( ++ ++ // __lsx_vssrln_hu_w ++ // vd, vj, vk ++ // UV8HI, UV4SI, UV4SI ++ v8u16_r = __lsx_vssrln_hu_w(v4u32_a, v4u32_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vssrln.hu.w( ++ ++ // __lsx_vssrln_wu_d ++ // vd, vj, vk ++ // UV4SI, UV2DI, UV2DI ++ v4u32_r = __lsx_vssrln_wu_d(v2u64_a, v2u64_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vssrln.wu.d( ++ ++ // __lsx_vsrlrn_b_h ++ // vd, vj, vk ++ // V16QI, V8HI, V8HI ++ v16i8_r = __lsx_vsrlrn_b_h(v8i16_a, v8i16_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vsrlrn.b.h( ++ ++ // __lsx_vsrlrn_h_w ++ // vd, vj, vk ++ // V8HI, V4SI, V4SI ++ v8i16_r = __lsx_vsrlrn_h_w(v4i32_a, v4i32_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vsrlrn.h.w( ++ ++ // __lsx_vsrlrn_w_d ++ // vd, vj, vk ++ // V4SI, V2DI, V2DI ++ v4i32_r = __lsx_vsrlrn_w_d(v2i64_a, v2i64_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vsrlrn.w.d( ++ ++ // __lsx_vssrlrn_bu_h ++ // vd, vj, vk ++ // UV16QI, UV8HI, UV8HI ++ v16u8_r = __lsx_vssrlrn_bu_h(v8u16_a, v8u16_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vssrlrn.bu.h( ++ ++ // __lsx_vssrlrn_hu_w ++ // vd, vj, vk ++ // UV8HI, UV4SI, UV4SI ++ v8u16_r = __lsx_vssrlrn_hu_w(v4u32_a, v4u32_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vssrlrn.hu.w( ++ ++ // __lsx_vssrlrn_wu_d ++ // vd, vj, vk ++ // UV4SI, UV2DI, UV2DI ++ v4u32_r = __lsx_vssrlrn_wu_d(v2u64_a, v2u64_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vssrlrn.wu.d( ++ ++ // __lsx_vfrstpi_b ++ // vd, vj, ui5 ++ // V16QI, V16QI, V16QI, UQI ++ v16i8_r = __lsx_vfrstpi_b(v16i8_a, v16i8_b, ui5); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vfrstpi.b( ++ ++ // __lsx_vfrstpi_h ++ // vd, vj, ui5 ++ // V8HI, V8HI, V8HI, UQI ++ v8i16_r = __lsx_vfrstpi_h(v8i16_a, v8i16_b, ui5); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vfrstpi.h( ++ ++ // __lsx_vfrstp_b ++ // vd, vj, vk ++ // V16QI, V16QI, V16QI, V16QI ++ v16i8_r = __lsx_vfrstp_b(v16i8_a, v16i8_b, v16i8_c); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vfrstp.b( ++ ++ // __lsx_vfrstp_h ++ // vd, vj, vk ++ // V8HI, V8HI, V8HI, V8HI ++ v8i16_r = __lsx_vfrstp_h(v8i16_a, v8i16_b, v8i16_c); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vfrstp.h( ++ ++ // __lsx_vshuf4i_d ++ // vd, vj, ui8 ++ // V2DI, V2DI, V2DI, USI ++ v2i64_r = __lsx_vshuf4i_d(v2i64_a, v2i64_b, ui8); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vshuf4i.d( ++ ++ // __lsx_vbsrl_v ++ // vd, vj, ui5 ++ // V16QI, V16QI, UQI ++ v16i8_r = __lsx_vbsrl_v(v16i8_a, ui5); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vbsrl.v( ++ ++ // __lsx_vbsll_v ++ // vd, vj, ui5 ++ // V16QI, V16QI, UQI ++ v16i8_r = __lsx_vbsll_v(v16i8_a, ui5); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vbsll.v( ++ ++ // __lsx_vextrins_b ++ // vd, vj, ui8 ++ // V16QI, V16QI, V16QI, UQI ++ v16i8_r = __lsx_vextrins_b(v16i8_a, v16i8_b, ui8); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vextrins.b( ++ ++ // __lsx_vextrins_h ++ // vd, vj, ui8 ++ // V8HI, V8HI, V8HI, UQI ++ v8i16_r = __lsx_vextrins_h(v8i16_a, v8i16_b, ui8); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vextrins.h( ++ ++ // __lsx_vextrins_w ++ // vd, vj, ui8 ++ // V4SI, V4SI, V4SI, UQI ++ v4i32_r = __lsx_vextrins_w(v4i32_a, v4i32_b, ui8); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vextrins.w( ++ ++ // __lsx_vextrins_d ++ // vd, vj, ui8 ++ // V2DI, V2DI, V2DI, UQI ++ v2i64_r = __lsx_vextrins_d(v2i64_a, v2i64_b, ui8); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vextrins.d( ++ ++ // __lsx_vmskltz_b ++ // vd, vj ++ // V16QI, V16QI ++ v16i8_r = __lsx_vmskltz_b(v16i8_a); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vmskltz.b( ++ ++ // __lsx_vmskltz_h ++ // vd, vj ++ // V8HI, V8HI ++ v8i16_r = __lsx_vmskltz_h(v8i16_a); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vmskltz.h( ++ ++ // __lsx_vmskltz_w ++ // vd, vj ++ // V4SI, V4SI ++ v4i32_r = __lsx_vmskltz_w(v4i32_a); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vmskltz.w( ++ ++ // __lsx_vmskltz_d ++ // vd, vj ++ // V2DI, V2DI ++ v2i64_r = __lsx_vmskltz_d(v2i64_a); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vmskltz.d( ++ ++ // __lsx_vsigncov_b ++ // vd, vj, vk ++ // V16QI, V16QI, V16QI ++ v16i8_r = __lsx_vsigncov_b(v16i8_a, v16i8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vsigncov.b( ++ ++ // __lsx_vsigncov_h ++ // vd, vj, vk ++ // V8HI, V8HI, V8HI ++ v8i16_r = __lsx_vsigncov_h(v8i16_a, v8i16_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vsigncov.h( ++ ++ // __lsx_vsigncov_w ++ // vd, vj, vk ++ // V4SI, V4SI, V4SI ++ v4i32_r = __lsx_vsigncov_w(v4i32_a, v4i32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vsigncov.w( ++ ++ // __lsx_vsigncov_d ++ // vd, vj, vk ++ // V2DI, V2DI, V2DI ++ v2i64_r = __lsx_vsigncov_d(v2i64_a, v2i64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vsigncov.d( ++ ++ // __lsx_vfmadd_s ++ // vd, vj, vk, va ++ // V4SF, V4SF, V4SF, V4SF ++ v4f32_r = __lsx_vfmadd_s(v4f32_a, v4f32_b, v4f32_c); // CHECK: call <4 x float> @llvm.loongarch.lsx.vfmadd.s( ++ ++ // __lsx_vfmadd_d ++ // vd, vj, vk, va ++ // V2DF, V2DF, V2DF, V2DF ++ v2f64_r = __lsx_vfmadd_d(v2f64_a, v2f64_b, v2f64_c); // CHECK: call <2 x double> @llvm.loongarch.lsx.vfmadd.d( ++ ++ // __lsx_vfmsub_s ++ // vd, vj, vk, va ++ // V4SF, V4SF, V4SF, V4SF ++ v4f32_r = __lsx_vfmsub_s(v4f32_a, v4f32_b, v4f32_c); // CHECK: call <4 x float> @llvm.loongarch.lsx.vfmsub.s( ++ ++ // __lsx_vfmsub_d ++ // vd, vj, vk, va ++ // V2DF, V2DF, V2DF, V2DF ++ v2f64_r = __lsx_vfmsub_d(v2f64_a, v2f64_b, v2f64_c); // CHECK: call <2 x double> @llvm.loongarch.lsx.vfmsub.d( ++ ++ // __lsx_vfnmadd_s ++ // vd, vj, vk, va ++ // V4SF, V4SF, V4SF, V4SF ++ v4f32_r = __lsx_vfnmadd_s(v4f32_a, v4f32_b, v4f32_c); // CHECK: call <4 x float> @llvm.loongarch.lsx.vfnmadd.s( ++ ++ // __lsx_vfnmadd_d ++ // vd, vj, vk, va ++ // V2DF, V2DF, V2DF, V2DF ++ v2f64_r = __lsx_vfnmadd_d(v2f64_a, v2f64_b, v2f64_c); // CHECK: call <2 x double> @llvm.loongarch.lsx.vfnmadd.d( ++ ++ // __lsx_vfnmsub_s ++ // vd, vj, vk, va ++ // V4SF, V4SF, V4SF, V4SF ++ v4f32_r = __lsx_vfnmsub_s(v4f32_a, v4f32_b, v4f32_c); // CHECK: call <4 x float> @llvm.loongarch.lsx.vfnmsub.s( ++ ++ // __lsx_vfnmsub_d ++ // vd, vj, vk, va ++ // V2DF, V2DF, V2DF, V2DF ++ v2f64_r = __lsx_vfnmsub_d(v2f64_a, v2f64_b, v2f64_c); // CHECK: call <2 x double> @llvm.loongarch.lsx.vfnmsub.d( ++ ++ // __lsx_vftintrne_w_s ++ // vd, vj ++ // V4SI, V4SF ++ v4i32_r = __lsx_vftintrne_w_s(v4f32_a); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vftintrne.w.s( ++ ++ // __lsx_vftintrne_l_d ++ // vd, vj ++ // V2DI, V2DF ++ v2i64_r = __lsx_vftintrne_l_d(v2f64_a); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vftintrne.l.d( ++ ++ // __lsx_vftintrp_w_s ++ // vd, vj ++ // V4SI, V4SF ++ v4i32_r = __lsx_vftintrp_w_s(v4f32_a); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vftintrp.w.s( ++ ++ // __lsx_vftintrp_l_d ++ // vd, vj ++ // V2DI, V2DF ++ v2i64_r = __lsx_vftintrp_l_d(v2f64_a); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vftintrp.l.d( ++ ++ // __lsx_vftintrm_w_s ++ // vd, vj ++ // V4SI, V4SF ++ v4i32_r = __lsx_vftintrm_w_s(v4f32_a); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vftintrm.w.s( ++ ++ // __lsx_vftintrm_l_d ++ // vd, vj ++ // V2DI, V2DF ++ v2i64_r = __lsx_vftintrm_l_d(v2f64_a); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vftintrm.l.d( ++ ++ // __lsx_vftint_w_d ++ // vd, vj, vk ++ // V4SI, V2DF, V2DF ++ v4i32_r = __lsx_vftint_w_d(v2f64_a, v2f64_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vftint.w.d( ++ ++ // __lsx_vffint_s_l ++ // vd, vj, vk ++ // V4SF, V2DI, V2DI ++ v4f32_r = __lsx_vffint_s_l(v2i64_a, v2i64_b); // CHECK: call <4 x float> @llvm.loongarch.lsx.vffint.s.l( ++ ++ // __lsx_vftintrz_w_d ++ // vd, vj, vk ++ // V4SI, V2DF, V2DF ++ v4i32_r = __lsx_vftintrz_w_d(v2f64_a, v2f64_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vftintrz.w.d( ++ ++ // __lsx_vftintrp_w_d ++ // vd, vj, vk ++ // V4SI, V2DF, V2DF ++ v4i32_r = __lsx_vftintrp_w_d(v2f64_a, v2f64_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vftintrp.w.d( ++ ++ // __lsx_vftintrm_w_d ++ // vd, vj, vk ++ // V4SI, V2DF, V2DF ++ v4i32_r = __lsx_vftintrm_w_d(v2f64_a, v2f64_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vftintrm.w.d( ++ ++ // __lsx_vftintrne_w_d ++ // vd, vj, vk ++ // V4SI, V2DF, V2DF ++ v4i32_r = __lsx_vftintrne_w_d(v2f64_a, v2f64_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vftintrne.w.d( ++ ++ // __lsx_vftintl_l_s ++ // vd, vj ++ // V2DI, V4SF ++ v2i64_r = __lsx_vftintl_l_s(v4f32_a); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vftintl.l.s( ++ ++ // __lsx_vftinth_l_s ++ // vd, vj ++ // V2DI, V4SF ++ v2i64_r = __lsx_vftinth_l_s(v4f32_a); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vftinth.l.s( ++ ++ // __lsx_vffinth_d_w ++ // vd, vj ++ // V2DF, V4SI ++ v2f64_r = __lsx_vffinth_d_w(v4i32_a); // CHECK: call <2 x double> @llvm.loongarch.lsx.vffinth.d.w( ++ ++ // __lsx_vffintl_d_w ++ // vd, vj ++ // V2DF, V4SI ++ v2f64_r = __lsx_vffintl_d_w(v4i32_a); // CHECK: call <2 x double> @llvm.loongarch.lsx.vffintl.d.w( ++ ++ // __lsx_vftintrzl_l_s ++ // vd, vj ++ // V2DI, V4SF ++ v2i64_r = __lsx_vftintrzl_l_s(v4f32_a); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vftintrzl.l.s( ++ ++ // __lsx_vftintrzh_l_s ++ // vd, vj ++ // V2DI, V4SF ++ v2i64_r = __lsx_vftintrzh_l_s(v4f32_a); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vftintrzh.l.s( ++ ++ // __lsx_vftintrpl_l_s ++ // vd, vj ++ // V2DI, V4SF ++ v2i64_r = __lsx_vftintrpl_l_s(v4f32_a); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vftintrpl.l.s( ++ ++ // __lsx_vftintrph_l_s ++ // vd, vj ++ // V2DI, V4SF ++ v2i64_r = __lsx_vftintrph_l_s(v4f32_a); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vftintrph.l.s( ++ ++ // __lsx_vftintrml_l_s ++ // vd, vj ++ // V2DI, V4SF ++ v2i64_r = __lsx_vftintrml_l_s(v4f32_a); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vftintrml.l.s( ++ ++ // __lsx_vftintrmh_l_s ++ // vd, vj ++ // V2DI, V4SF ++ v2i64_r = __lsx_vftintrmh_l_s(v4f32_a); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vftintrmh.l.s( ++ ++ // __lsx_vftintrnel_l_s ++ // vd, vj ++ // V2DI, V4SF ++ v2i64_r = __lsx_vftintrnel_l_s(v4f32_a); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vftintrnel.l.s( ++ ++ // __lsx_vftintrneh_l_s ++ // vd, vj ++ // V2DI, V4SF ++ v2i64_r = __lsx_vftintrneh_l_s(v4f32_a); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vftintrneh.l.s( ++ ++ // __lsx_vfrintrne_s ++ // vd, vj ++ // V4SF, V4SF ++ v4f32_r = __lsx_vfrintrne_s(v4f32_a); // CHECK: call <4 x float> @llvm.loongarch.lsx.vfrintrne.s( ++ ++ // __lsx_vfrintrne_d ++ // vd, vj ++ // V2DF, V2DF ++ v2f64_r = __lsx_vfrintrne_d(v2f64_a); // CHECK: call <2 x double> @llvm.loongarch.lsx.vfrintrne.d( ++ ++ // __lsx_vfrintrz_s ++ // vd, vj ++ // V4SF, V4SF ++ v4f32_r = __lsx_vfrintrz_s(v4f32_a); // CHECK: call <4 x float> @llvm.loongarch.lsx.vfrintrz.s( ++ ++ // __lsx_vfrintrz_d ++ // vd, vj ++ // V2DF, V2DF ++ v2f64_r = __lsx_vfrintrz_d(v2f64_a); // CHECK: call <2 x double> @llvm.loongarch.lsx.vfrintrz.d( ++ ++ // __lsx_vfrintrp_s ++ // vd, vj ++ // V4SF, V4SF ++ v4f32_r = __lsx_vfrintrp_s(v4f32_a); // CHECK: call <4 x float> @llvm.loongarch.lsx.vfrintrp.s( ++ ++ // __lsx_vfrintrp_d ++ // vd, vj ++ // V2DF, V2DF ++ v2f64_r = __lsx_vfrintrp_d(v2f64_a); // CHECK: call <2 x double> @llvm.loongarch.lsx.vfrintrp.d( ++ ++ // __lsx_vfrintrm_s ++ // vd, vj ++ // V4SF, V4SF ++ v4f32_r = __lsx_vfrintrm_s(v4f32_a); // CHECK: call <4 x float> @llvm.loongarch.lsx.vfrintrm.s( ++ ++ // __lsx_vfrintrm_d ++ // vd, vj ++ // V2DF, V2DF ++ v2f64_r = __lsx_vfrintrm_d(v2f64_a); // CHECK: call <2 x double> @llvm.loongarch.lsx.vfrintrm.d( ++ ++ // __lsx_vstelm_b ++ // vd, rj, si8, idx ++ // VOID, V16QI, CVPOINTER, SI, UQI ++ __lsx_vstelm_b(v16i8_a, &v16i8_b, 0, idx4); // CHECK: call void @llvm.loongarch.lsx.vstelm.b( ++ // __lsx_vstelm_h ++ // vd, rj, si8, idx ++ // VOID, V8HI, CVPOINTER, SI, UQI ++ __lsx_vstelm_h(v8i16_a, &v8i16_b, 0, idx3); // CHECK: call void @llvm.loongarch.lsx.vstelm.h( ++ ++ // __lsx_vstelm_w ++ // vd, rj, si8, idx ++ // VOID, V4SI, CVPOINTER, SI, UQI ++ __lsx_vstelm_w(v4i32_a, &v4i32_b, 0, idx2); // CHECK: call void @llvm.loongarch.lsx.vstelm.w( ++ ++ // __lsx_vstelm_d ++ // vd, rj, si8, idx ++ // VOID, V2DI, CVPOINTER, SI, UQI ++ __lsx_vstelm_d(v2i64_a, &v2i64_b, 0, idx1); // CHECK: call void @llvm.loongarch.lsx.vstelm.d( ++ ++ // __lsx_vaddwev_d_w ++ // vd, vj, vk ++ // V2DI, V4SI, V4SI ++ v2i64_r = __lsx_vaddwev_d_w(v4i32_a, v4i32_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vaddwev.d.w( ++ ++ // __lsx_vaddwev_w_h ++ // vd, vj, vk ++ // V4SI, V8HI, V8HI ++ v4i32_r = __lsx_vaddwev_w_h(v8i16_a, v8i16_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vaddwev.w.h( ++ ++ // __lsx_vaddwev_h_b ++ // vd, vj, vk ++ // V8HI, V16QI, V16QI ++ v8i16_r = __lsx_vaddwev_h_b(v16i8_a, v16i8_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vaddwev.h.b( ++ ++ // __lsx_vaddwod_d_w ++ // vd, vj, vk ++ // V2DI, V4SI, V4SI ++ v2i64_r = __lsx_vaddwod_d_w(v4i32_a, v4i32_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vaddwod.d.w( ++ ++ // __lsx_vaddwod_w_h ++ // vd, vj, vk ++ // V4SI, V8HI, V8HI ++ v4i32_r = __lsx_vaddwod_w_h(v8i16_a, v8i16_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vaddwod.w.h( ++ ++ // __lsx_vaddwod_h_b ++ // vd, vj, vk ++ // V8HI, V16QI, V16QI ++ v8i16_r = __lsx_vaddwod_h_b(v16i8_a, v16i8_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vaddwod.h.b( ++ ++ // __lsx_vaddwev_d_wu ++ // vd, vj, vk ++ // V2DI, UV4SI, UV4SI ++ v2i64_r = __lsx_vaddwev_d_wu(v4u32_a, v4u32_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vaddwev.d.wu( ++ ++ // __lsx_vaddwev_w_hu ++ // vd, vj, vk ++ // V4SI, UV8HI, UV8HI ++ v4i32_r = __lsx_vaddwev_w_hu(v8u16_a, v8u16_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vaddwev.w.hu( ++ ++ // __lsx_vaddwev_h_bu ++ // vd, vj, vk ++ // V8HI, UV16QI, UV16QI ++ v8i16_r = __lsx_vaddwev_h_bu(v16u8_a, v16u8_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vaddwev.h.bu( ++ ++ // __lsx_vaddwod_d_wu ++ // vd, vj, vk ++ // V2DI, UV4SI, UV4SI ++ v2i64_r = __lsx_vaddwod_d_wu(v4u32_a, v4u32_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vaddwod.d.wu( ++ ++ // __lsx_vaddwod_w_hu ++ // vd, vj, vk ++ // V4SI, UV8HI, UV8HI ++ v4i32_r = __lsx_vaddwod_w_hu(v8u16_a, v8u16_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vaddwod.w.hu( ++ ++ // __lsx_vaddwod_h_bu ++ // vd, vj, vk ++ // V8HI, UV16QI, UV16QI ++ v8i16_r = __lsx_vaddwod_h_bu(v16u8_a, v16u8_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vaddwod.h.bu( ++ ++ // __lsx_vaddwev_d_wu_w ++ // vd, vj, vk ++ // V2DI, UV4SI, V4SI ++ v2i64_r = __lsx_vaddwev_d_wu_w(v4u32_a, v4i32_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vaddwev.d.wu.w( ++ ++ // __lsx_vaddwev_w_hu_h ++ // vd, vj, vk ++ // V4SI, UV8HI, V8HI ++ v4i32_r = __lsx_vaddwev_w_hu_h(v8u16_a, v8i16_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vaddwev.w.hu.h( ++ ++ // __lsx_vaddwev_h_bu_b ++ // vd, vj, vk ++ // V8HI, UV16QI, V16QI ++ v8i16_r = __lsx_vaddwev_h_bu_b(v16u8_a, v16i8_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vaddwev.h.bu.b( ++ ++ // __lsx_vaddwod_d_wu_w ++ // vd, vj, vk ++ // V2DI, UV4SI, V4SI ++ v2i64_r = __lsx_vaddwod_d_wu_w(v4u32_a, v4i32_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vaddwod.d.wu.w( ++ ++ // __lsx_vaddwod_w_hu_h ++ // vd, vj, vk ++ // V4SI, UV8HI, V8HI ++ v4i32_r = __lsx_vaddwod_w_hu_h(v8u16_a, v8i16_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vaddwod.w.hu.h( ++ ++ // __lsx_vaddwod_h_bu_b ++ // vd, vj, vk ++ // V8HI, UV16QI, V16QI ++ v8i16_r = __lsx_vaddwod_h_bu_b(v16u8_a, v16i8_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vaddwod.h.bu.b( ++ ++ // __lsx_vsubwev_d_w ++ // vd, vj, vk ++ // V2DI, V4SI, V4SI ++ v2i64_r = __lsx_vsubwev_d_w(v4i32_a, v4i32_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vsubwev.d.w( ++ ++ // __lsx_vsubwev_w_h ++ // vd, vj, vk ++ // V4SI, V8HI, V8HI ++ v4i32_r = __lsx_vsubwev_w_h(v8i16_a, v8i16_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vsubwev.w.h( ++ ++ // __lsx_vsubwev_h_b ++ // vd, vj, vk ++ // V8HI, V16QI, V16QI ++ v8i16_r = __lsx_vsubwev_h_b(v16i8_a, v16i8_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vsubwev.h.b( ++ ++ // __lsx_vsubwod_d_w ++ // vd, vj, vk ++ // V2DI, V4SI, V4SI ++ v2i64_r = __lsx_vsubwod_d_w(v4i32_a, v4i32_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vsubwod.d.w( ++ ++ // __lsx_vsubwod_w_h ++ // vd, vj, vk ++ // V4SI, V8HI, V8HI ++ v4i32_r = __lsx_vsubwod_w_h(v8i16_a, v8i16_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vsubwod.w.h( ++ ++ // __lsx_vsubwod_h_b ++ // vd, vj, vk ++ // V8HI, V16QI, V16QI ++ v8i16_r = __lsx_vsubwod_h_b(v16i8_a, v16i8_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vsubwod.h.b( ++ ++ // __lsx_vsubwev_d_wu ++ // vd, vj, vk ++ // V2DI, UV4SI, UV4SI ++ v2i64_r = __lsx_vsubwev_d_wu(v4u32_a, v4u32_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vsubwev.d.wu( ++ ++ // __lsx_vsubwev_w_hu ++ // vd, vj, vk ++ // V4SI, UV8HI, UV8HI ++ v4i32_r = __lsx_vsubwev_w_hu(v8u16_a, v8u16_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vsubwev.w.hu( ++ ++ // __lsx_vsubwev_h_bu ++ // vd, vj, vk ++ // V8HI, UV16QI, UV16QI ++ v8i16_r = __lsx_vsubwev_h_bu(v16u8_a, v16u8_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vsubwev.h.bu( ++ ++ // __lsx_vsubwod_d_wu ++ // vd, vj, vk ++ // V2DI, UV4SI, UV4SI ++ v2i64_r = __lsx_vsubwod_d_wu(v4u32_a, v4u32_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vsubwod.d.wu( ++ ++ // __lsx_vsubwod_w_hu ++ // vd, vj, vk ++ // V4SI, UV8HI, UV8HI ++ v4i32_r = __lsx_vsubwod_w_hu(v8u16_a, v8u16_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vsubwod.w.hu( ++ ++ // __lsx_vsubwod_h_bu ++ // vd, vj, vk ++ // V8HI, UV16QI, UV16QI ++ v8i16_r = __lsx_vsubwod_h_bu(v16u8_a, v16u8_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vsubwod.h.bu( ++ ++ // __lsx_vaddwev_q_d ++ // vd, vj, vk ++ // V2DI, V2DI, V2DI ++ v2i64_r = __lsx_vaddwev_q_d(v2i64_a, v2i64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vaddwev.q.d( ++ ++ // __lsx_vaddwod_q_d ++ // vd, vj, vk ++ // V2DI, V2DI, V2DI ++ v2i64_r = __lsx_vaddwod_q_d(v2i64_a, v2i64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vaddwod.q.d( ++ ++ // __lsx_vaddwev_q_du ++ // vd, vj, vk ++ // V2DI, UV2DI, UV2DI ++ v2i64_r = __lsx_vaddwev_q_du(v2u64_a, v2u64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vaddwev.q.du( ++ ++ // __lsx_vaddwod_q_du ++ // vd, vj, vk ++ // V2DI, UV2DI, UV2DI ++ v2i64_r = __lsx_vaddwod_q_du(v2u64_a, v2u64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vaddwod.q.du( ++ ++ // __lsx_vsubwev_q_d ++ // vd, vj, vk ++ // V2DI, V2DI, V2DI ++ v2i64_r = __lsx_vsubwev_q_d(v2i64_a, v2i64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vsubwev.q.d( ++ ++ // __lsx_vsubwod_q_d ++ // vd, vj, vk ++ // V2DI, V2DI, V2DI ++ v2i64_r = __lsx_vsubwod_q_d(v2i64_a, v2i64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vsubwod.q.d( ++ ++ // __lsx_vsubwev_q_du ++ // vd, vj, vk ++ // V2DI, UV2DI, UV2DI ++ v2i64_r = __lsx_vsubwev_q_du(v2u64_a, v2u64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vsubwev.q.du( ++ ++ // __lsx_vsubwod_q_du ++ // vd, vj, vk ++ // V2DI, UV2DI, UV2DI ++ v2i64_r = __lsx_vsubwod_q_du(v2u64_a, v2u64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vsubwod.q.du( ++ ++ // __lsx_vaddwev_q_du_d ++ // vd, vj, vk ++ // V2DI, UV2DI, V2DI ++ v2i64_r = __lsx_vaddwev_q_du_d(v2u64_a, v2i64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vaddwev.q.du.d( ++ ++ // __lsx_vaddwod_q_du_d ++ // vd, vj, vk ++ // V2DI, UV2DI, V2DI ++ v2i64_r = __lsx_vaddwod_q_du_d(v2u64_a, v2i64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vaddwod.q.du.d( ++ ++ // __lsx_vmulwev_d_w ++ // vd, vj, vk ++ // V2DI, V4SI, V4SI ++ v2i64_r = __lsx_vmulwev_d_w(v4i32_a, v4i32_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vmulwev.d.w( ++ ++ // __lsx_vmulwev_w_h ++ // vd, vj, vk ++ // V4SI, V8HI, V8HI ++ v4i32_r = __lsx_vmulwev_w_h(v8i16_a, v8i16_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vmulwev.w.h( ++ ++ // __lsx_vmulwev_h_b ++ // vd, vj, vk ++ // V8HI, V16QI, V16QI ++ v8i16_r = __lsx_vmulwev_h_b(v16i8_a, v16i8_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vmulwev.h.b( ++ ++ // __lsx_vmulwod_d_w ++ // vd, vj, vk ++ // V2DI, V4SI, V4SI ++ v2i64_r = __lsx_vmulwod_d_w(v4i32_a, v4i32_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vmulwod.d.w( ++ ++ // __lsx_vmulwod_w_h ++ // vd, vj, vk ++ // V4SI, V8HI, V8HI ++ v4i32_r = __lsx_vmulwod_w_h(v8i16_a, v8i16_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vmulwod.w.h( ++ ++ // __lsx_vmulwod_h_b ++ // vd, vj, vk ++ // V8HI, V16QI, V16QI ++ v8i16_r = __lsx_vmulwod_h_b(v16i8_a, v16i8_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vmulwod.h.b( ++ ++ // __lsx_vmulwev_d_wu ++ // vd, vj, vk ++ // V2DI, UV4SI, UV4SI ++ v2i64_r = __lsx_vmulwev_d_wu(v4u32_a, v4u32_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vmulwev.d.wu( ++ ++ // __lsx_vmulwev_w_hu ++ // vd, vj, vk ++ // V4SI, UV8HI, UV8HI ++ v4i32_r = __lsx_vmulwev_w_hu(v8u16_a, v8u16_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vmulwev.w.hu( ++ ++ // __lsx_vmulwev_h_bu ++ // vd, vj, vk ++ // V8HI, UV16QI, UV16QI ++ v8i16_r = __lsx_vmulwev_h_bu(v16u8_a, v16u8_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vmulwev.h.bu( ++ ++ // __lsx_vmulwod_d_wu ++ // vd, vj, vk ++ // V2DI, UV4SI, UV4SI ++ v2i64_r = __lsx_vmulwod_d_wu(v4u32_a, v4u32_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vmulwod.d.wu( ++ ++ // __lsx_vmulwod_w_hu ++ // vd, vj, vk ++ // V4SI, UV8HI, UV8HI ++ v4i32_r = __lsx_vmulwod_w_hu(v8u16_a, v8u16_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vmulwod.w.hu( ++ ++ // __lsx_vmulwod_h_bu ++ // vd, vj, vk ++ // V8HI, UV16QI, UV16QI ++ v8i16_r = __lsx_vmulwod_h_bu(v16u8_a, v16u8_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vmulwod.h.bu( ++ ++ // __lsx_vmulwev_d_wu_w ++ // vd, vj, vk ++ // V2DI, UV4SI, V4SI ++ v2i64_r = __lsx_vmulwev_d_wu_w(v4u32_a, v4i32_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vmulwev.d.wu.w( ++ ++ // __lsx_vmulwev_w_hu_h ++ // vd, vj, vk ++ // V4SI, UV8HI, V8HI ++ v4i32_r = __lsx_vmulwev_w_hu_h(v8u16_a, v8i16_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vmulwev.w.hu.h( ++ ++ // __lsx_vmulwev_h_bu_b ++ // vd, vj, vk ++ // V8HI, UV16QI, V16QI ++ v8i16_r = __lsx_vmulwev_h_bu_b(v16u8_a, v16i8_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vmulwev.h.bu.b( ++ ++ // __lsx_vmulwod_d_wu_w ++ // vd, vj, vk ++ // V2DI, UV4SI, V4SI ++ v2i64_r = __lsx_vmulwod_d_wu_w(v4u32_a, v4i32_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vmulwod.d.wu.w( ++ ++ // __lsx_vmulwod_w_hu_h ++ // vd, vj, vk ++ // V4SI, UV8HI, V8HI ++ v4i32_r = __lsx_vmulwod_w_hu_h(v8u16_a, v8i16_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vmulwod.w.hu.h( ++ ++ // __lsx_vmulwod_h_bu_b ++ // vd, vj, vk ++ // V8HI, UV16QI, V16QI ++ v8i16_r = __lsx_vmulwod_h_bu_b(v16u8_a, v16i8_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vmulwod.h.bu.b( ++ ++ // __lsx_vmulwev_q_d ++ // vd, vj, vk ++ // V2DI, V2DI, V2DI ++ v2i64_r = __lsx_vmulwev_q_d(v2i64_a, v2i64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vmulwev.q.d( ++ ++ // __lsx_vmulwod_q_d ++ // vd, vj, vk ++ // V2DI, V2DI, V2DI ++ v2i64_r = __lsx_vmulwod_q_d(v2i64_a, v2i64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vmulwod.q.d( ++ ++ // __lsx_vmulwev_q_du ++ // vd, vj, vk ++ // V2DI, UV2DI, UV2DI ++ v2i64_r = __lsx_vmulwev_q_du(v2u64_a, v2u64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vmulwev.q.du( ++ ++ // __lsx_vmulwod_q_du ++ // vd, vj, vk ++ // V2DI, UV2DI, UV2DI ++ v2i64_r = __lsx_vmulwod_q_du(v2u64_a, v2u64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vmulwod.q.du( ++ ++ // __lsx_vmulwev_q_du_d ++ // vd, vj, vk ++ // V2DI, UV2DI, V2DI ++ v2i64_r = __lsx_vmulwev_q_du_d(v2u64_a, v2i64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vmulwev.q.du.d( ++ ++ // __lsx_vmulwod_q_du_d ++ // vd, vj, vk ++ // V2DI, UV2DI, V2DI ++ v2i64_r = __lsx_vmulwod_q_du_d(v2u64_a, v2i64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vmulwod.q.du.d( ++ ++ // __lsx_vhaddw_q_d ++ // vd, vj, vk ++ // V2DI, V2DI, V2DI ++ v2i64_r = __lsx_vhaddw_q_d(v2i64_a, v2i64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vhaddw.q.d( ++ ++ // __lsx_vhaddw_qu_du ++ // vd, vj, vk ++ // UV2DI, UV2DI, UV2DI ++ v2u64_r = __lsx_vhaddw_qu_du(v2u64_a, v2u64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vhaddw.qu.du( ++ ++ // __lsx_vhsubw_q_d ++ // vd, vj, vk ++ // V2DI, V2DI, V2DI ++ v2i64_r = __lsx_vhsubw_q_d(v2i64_a, v2i64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vhsubw.q.d( ++ ++ // __lsx_vhsubw_qu_du ++ // vd, vj, vk ++ // UV2DI, UV2DI, UV2DI ++ v2u64_r = __lsx_vhsubw_qu_du(v2u64_a, v2u64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vhsubw.qu.du( ++ ++ // __lsx_vmaddwev_d_w ++ // vd, vj, vk ++ // V2DI, V2DI, V4SI, V4SI ++ v2i64_r = __lsx_vmaddwev_d_w(v2i64_a, v4i32_b, v4i32_c); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vmaddwev.d.w( ++ ++ // __lsx_vmaddwev_w_h ++ // vd, vj, vk ++ // V4SI, V4SI, V8HI, V8HI ++ v4i32_r = __lsx_vmaddwev_w_h(v4i32_a, v8i16_b, v8i16_c); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vmaddwev.w.h( ++ ++ // __lsx_vmaddwev_h_b ++ // vd, vj, vk ++ // V8HI, V8HI, V16QI, V16QI ++ v8i16_r = __lsx_vmaddwev_h_b(v8i16_a, v16i8_b, v16i8_c); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vmaddwev.h.b( ++ ++ // __lsx_vmaddwev_d_wu ++ // vd, vj, vk ++ // UV2DI, UV2DI, UV4SI, UV4SI ++ v2u64_r = __lsx_vmaddwev_d_wu(v2u64_a, v4u32_b, v4u32_c); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vmaddwev.d.wu( ++ ++ // __lsx_vmaddwev_w_hu ++ // vd, vj, vk ++ // UV4SI, UV4SI, UV8HI, UV8HI ++ v4u32_r = __lsx_vmaddwev_w_hu(v4u32_a, v8u16_b, v8u16_c); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vmaddwev.w.hu( ++ ++ // __lsx_vmaddwev_h_bu ++ // vd, vj, vk ++ // UV8HI, UV8HI, UV16QI, UV16QI ++ v8u16_r = __lsx_vmaddwev_h_bu(v8u16_a, v16u8_b, v16u8_c); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vmaddwev.h.bu( ++ ++ // __lsx_vmaddwod_d_w ++ // vd, vj, vk ++ // V2DI, V2DI, V4SI, V4SI ++ v2i64_r = __lsx_vmaddwod_d_w(v2i64_a, v4i32_b, v4i32_c); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vmaddwod.d.w( ++ ++ // __lsx_vmaddwod_w_h ++ // vd, vj, vk ++ // V4SI, V4SI, V8HI, V8HI ++ v4i32_r = __lsx_vmaddwod_w_h(v4i32_a, v8i16_b, v8i16_c); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vmaddwod.w.h( ++ ++ // __lsx_vmaddwod_h_b ++ // vd, vj, vk ++ // V8HI, V8HI, V16QI, V16QI ++ v8i16_r = __lsx_vmaddwod_h_b(v8i16_a, v16i8_b, v16i8_c); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vmaddwod.h.b( ++ ++ // __lsx_vmaddwod_d_wu ++ // vd, vj, vk ++ // UV2DI, UV2DI, UV4SI, UV4SI ++ v2u64_r = __lsx_vmaddwod_d_wu(v2u64_a, v4u32_b, v4u32_c); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vmaddwod.d.wu( ++ ++ // __lsx_vmaddwod_w_hu ++ // vd, vj, vk ++ // UV4SI, UV4SI, UV8HI, UV8HI ++ v4u32_r = __lsx_vmaddwod_w_hu(v4u32_a, v8u16_b, v8u16_c); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vmaddwod.w.hu( ++ ++ // __lsx_vmaddwod_h_bu ++ // vd, vj, vk ++ // UV8HI, UV8HI, UV16QI, UV16QI ++ v8u16_r = __lsx_vmaddwod_h_bu(v8u16_a, v16u8_b, v16u8_c); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vmaddwod.h.bu( ++ ++ // __lsx_vmaddwev_d_wu_w ++ // vd, vj, vk ++ // V2DI, V2DI, UV4SI, V4SI ++ v2i64_r = __lsx_vmaddwev_d_wu_w(v2i64_a, v4u32_b, v4i32_c); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vmaddwev.d.wu.w( ++ ++ // __lsx_vmaddwev_w_hu_h ++ // vd, vj, vk ++ // V4SI, V4SI, UV8HI, V8HI ++ v4i32_r = __lsx_vmaddwev_w_hu_h(v4i32_a, v8u16_b, v8i16_c); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vmaddwev.w.hu.h( ++ ++ // __lsx_vmaddwev_h_bu_b ++ // vd, vj, vk ++ // V8HI, V8HI, UV16QI, V16QI ++ v8i16_r = __lsx_vmaddwev_h_bu_b(v8i16_a, v16u8_b, v16i8_c); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vmaddwev.h.bu.b( ++ ++ // __lsx_vmaddwod_d_wu_w ++ // vd, vj, vk ++ // V2DI, V2DI, UV4SI, V4SI ++ v2i64_r = __lsx_vmaddwod_d_wu_w(v2i64_a, v4u32_b, v4i32_c); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vmaddwod.d.wu.w( ++ ++ // __lsx_vmaddwod_w_hu_h ++ // vd, vj, vk ++ // V4SI, V4SI, UV8HI, V8HI ++ v4i32_r = __lsx_vmaddwod_w_hu_h(v4i32_a, v8u16_b, v8i16_c); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vmaddwod.w.hu.h( ++ ++ // __lsx_vmaddwod_h_bu_b ++ // vd, vj, vk ++ // V8HI, V8HI, UV16QI, V16QI ++ v8i16_r = __lsx_vmaddwod_h_bu_b(v8i16_a, v16u8_b, v16i8_c); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vmaddwod.h.bu.b( ++ ++ // __lsx_vmaddwev_q_d ++ // vd, vj, vk ++ // V2DI, V2DI, V2DI, V2DI ++ v2i64_r = __lsx_vmaddwev_q_d(v2i64_a, v2i64_b, v2i64_c); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vmaddwev.q.d( ++ ++ // __lsx_vmaddwod_q_d ++ // vd, vj, vk ++ // V2DI, V2DI, V2DI, V2DI ++ v2i64_r = __lsx_vmaddwod_q_d(v2i64_a, v2i64_b, v2i64_c); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vmaddwod.q.d( ++ ++ // __lsx_vmaddwev_q_du ++ // vd, vj, vk ++ // UV2DI, UV2DI, UV2DI, UV2DI ++ v2u64_r = __lsx_vmaddwev_q_du(v2u64_a, v2u64_b, v2u64_c); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vmaddwev.q.du( ++ ++ // __lsx_vmaddwod_q_du ++ // vd, vj, vk ++ // UV2DI, UV2DI, UV2DI, UV2DI ++ v2u64_r = __lsx_vmaddwod_q_du(v2u64_a, v2u64_b, v2u64_c); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vmaddwod.q.du( ++ ++ // __lsx_vmaddwev_q_du_d ++ // vd, vj, vk ++ // V2DI, V2DI, UV2DI, V2DI ++ v2i64_r = __lsx_vmaddwev_q_du_d(v2i64_a, v2u64_b, v2i64_c); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vmaddwev.q.du.d( ++ ++ // __lsx_vmaddwod_q_du_d ++ // vd, vj, vk ++ // V2DI, V2DI, UV2DI, V2DI ++ v2i64_r = __lsx_vmaddwod_q_du_d(v2i64_a, v2u64_b, v2i64_c); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vmaddwod.q.du.d( ++ ++ // __lsx_vrotr_b ++ // vd, vj, vk ++ // V16QI, V16QI, V16QI ++ v16i8_r = __lsx_vrotr_b(v16i8_a, v16i8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vrotr.b( ++ ++ // __lsx_vrotr_h ++ // vd, vj, vk ++ // V8HI, V8HI, V8HI ++ v8i16_r = __lsx_vrotr_h(v8i16_a, v8i16_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vrotr.h( ++ ++ // __lsx_vrotr_w ++ // vd, vj, vk ++ // V4SI, V4SI, V4SI ++ v4i32_r = __lsx_vrotr_w(v4i32_a, v4i32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vrotr.w( ++ ++ // __lsx_vrotr_d ++ // vd, vj, vk ++ // V2DI, V2DI, V2DI ++ v2i64_r = __lsx_vrotr_d(v2i64_a, v2i64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vrotr.d( ++ ++ // __lsx_vadd_q ++ // vd, vj, vk ++ // V2DI, V2DI, V2DI ++ v2i64_r = __lsx_vadd_q(v2i64_a, v2i64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vadd.q( ++ ++ // __lsx_vsub_q ++ // vd, vj, vk ++ // V2DI, V2DI, V2DI ++ v2i64_r = __lsx_vsub_q(v2i64_a, v2i64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vsub.q( ++ ++ // __lsx_vldrepl_b ++ // vd, rj, si12 ++ // V16QI, CVPOINTER, SI ++ v16i8_r = __lsx_vldrepl_b(&v16i8_a, si12); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vldrepl.b( ++ ++ // __lsx_vldrepl_h ++ // vd, rj, si11 ++ // V8HI, CVPOINTER, SI ++ v8i16_r = __lsx_vldrepl_h(&v8i16_a, si11); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vldrepl.h( ++ ++ // __lsx_vldrepl_w ++ // vd, rj, si10 ++ // V4SI, CVPOINTER, SI ++ v4i32_r = __lsx_vldrepl_w(&v4i32_a, si10); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vldrepl.w( ++ ++ // __lsx_vldrepl_d ++ // vd, rj, si9 ++ // V2DI, CVPOINTER, SI ++ v2i64_r = __lsx_vldrepl_d(&v2i64_a, si9); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vldrepl.d( ++ ++ // __lsx_vmskgez_b ++ // vd, vj ++ // V16QI, V16QI ++ v16i8_r = __lsx_vmskgez_b(v16i8_a); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vmskgez.b( ++ ++ // __lsx_vmsknz_b ++ // vd, vj ++ // V16QI, V16QI ++ v16i8_r = __lsx_vmsknz_b(v16i8_a); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vmsknz.b( ++ ++ // __lsx_vexth_h_b ++ // vd, vj ++ // V8HI, V16QI ++ v8i16_r = __lsx_vexth_h_b(v16i8_a); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vexth.h.b( ++ ++ // __lsx_vexth_w_h ++ // vd, vj ++ // V4SI, V8HI ++ v4i32_r = __lsx_vexth_w_h(v8i16_a); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vexth.w.h( ++ ++ // __lsx_vexth_d_w ++ // vd, vj ++ // V2DI, V4SI ++ v2i64_r = __lsx_vexth_d_w(v4i32_a); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vexth.d.w( ++ ++ // __lsx_vexth_q_d ++ // vd, vj ++ // V2DI, V2DI ++ v2i64_r = __lsx_vexth_q_d(v2i64_a); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vexth.q.d( ++ ++ // __lsx_vexth_hu_bu ++ // vd, vj ++ // UV8HI, UV16QI ++ v8u16_r = __lsx_vexth_hu_bu(v16u8_a); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vexth.hu.bu( ++ ++ // __lsx_vexth_wu_hu ++ // vd, vj ++ // UV4SI, UV8HI ++ v4u32_r = __lsx_vexth_wu_hu(v8u16_a); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vexth.wu.hu( ++ ++ // __lsx_vexth_du_wu ++ // vd, vj ++ // UV2DI, UV4SI ++ v2u64_r = __lsx_vexth_du_wu(v4u32_a); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vexth.du.wu( ++ ++ // __lsx_vexth_qu_du ++ // vd, vj ++ // UV2DI, UV2DI ++ v2u64_r = __lsx_vexth_qu_du(v2u64_a); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vexth.qu.du( ++ ++ // __lsx_vrotri_b ++ // vd, vj, ui3 ++ // V16QI, V16QI, UQI ++ v16i8_r = __lsx_vrotri_b(v16i8_a, ui3); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vrotri.b( ++ ++ // __lsx_vrotri_h ++ // vd, vj, ui4 ++ // V8HI, V8HI, UQI ++ v8i16_r = __lsx_vrotri_h(v8i16_a, ui4); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vrotri.h( ++ ++ // __lsx_vrotri_w ++ // vd, vj, ui5 ++ // V4SI, V4SI, UQI ++ v4i32_r = __lsx_vrotri_w(v4i32_a, ui5); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vrotri.w( ++ ++ // __lsx_vrotri_d ++ // vd, vj, ui6 ++ // V2DI, V2DI, UQI ++ v2i64_r = __lsx_vrotri_d(v2i64_a, ui6); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vrotri.d( ++ ++ // __lsx_vextl_q_d ++ // vd, vj ++ // V2DI, V2DI ++ v2i64_r = __lsx_vextl_q_d(v2i64_a); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vextl.q.d( ++ ++ // __lsx_vsrlni_b_h ++ // vd, vj, ui4 ++ // V16QI, V16QI, V16QI, USI ++ v16i8_r = __lsx_vsrlni_b_h(v16i8_a, v16i8_b, ui4); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vsrlni.b.h( ++ ++ // __lsx_vsrlni_h_w ++ // vd, vj, ui5 ++ // V8HI, V8HI, V8HI, USI ++ v8i16_r = __lsx_vsrlni_h_w(v8i16_a, v8i16_b, ui5); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vsrlni.h.w( ++ ++ // __lsx_vsrlni_w_d ++ // vd, vj, ui6 ++ // V4SI, V4SI, V4SI, USI ++ v4i32_r = __lsx_vsrlni_w_d(v4i32_a, v4i32_b, ui6); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vsrlni.w.d( ++ ++ // __lsx_vsrlni_d_q ++ // vd, vj, ui7 ++ // V2DI, V2DI, V2DI, USI ++ v2i64_r = __lsx_vsrlni_d_q(v2i64_a, v2i64_b, ui7); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vsrlni.d.q( ++ ++ // __lsx_vssrlni_b_h ++ // vd, vj, ui4 ++ // V16QI, V16QI, V16QI, USI ++ v16i8_r = __lsx_vssrlni_b_h(v16i8_a, v16i8_b, ui4); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vssrlni.b.h( ++ ++ // __lsx_vssrlni_h_w ++ // vd, vj, ui5 ++ // V8HI, V8HI, V8HI, USI ++ v8i16_r = __lsx_vssrlni_h_w(v8i16_a, v8i16_b, ui5); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vssrlni.h.w( ++ ++ // __lsx_vssrlni_w_d ++ // vd, vj, ui6 ++ // V4SI, V4SI, V4SI, USI ++ v4i32_r = __lsx_vssrlni_w_d(v4i32_a, v4i32_b, ui6); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vssrlni.w.d( ++ ++ // __lsx_vssrlni_d_q ++ // vd, vj, ui7 ++ // V2DI, V2DI, V2DI, USI ++ v2i64_r = __lsx_vssrlni_d_q(v2i64_a, v2i64_b, ui7); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vssrlni.d.q( ++ ++ // __lsx_vssrlni_bu_h ++ // vd, vj, ui4 ++ // UV16QI, UV16QI, V16QI, USI ++ v16u8_r = __lsx_vssrlni_bu_h(v16u8_a, v16i8_b, ui4); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vssrlni.bu.h( ++ ++ // __lsx_vssrlni_hu_w ++ // vd, vj, ui5 ++ // UV8HI, UV8HI, V8HI, USI ++ v8u16_r = __lsx_vssrlni_hu_w(v8u16_a, v8i16_b, ui5); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vssrlni.hu.w( ++ ++ // __lsx_vssrlni_wu_d ++ // vd, vj, ui6 ++ // UV4SI, UV4SI, V4SI, USI ++ v4u32_r = __lsx_vssrlni_wu_d(v4u32_a, v4i32_b, ui6); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vssrlni.wu.d( ++ ++ // __lsx_vssrlni_du_q ++ // vd, vj, ui7 ++ // UV2DI, UV2DI, V2DI, USI ++ v2u64_r = __lsx_vssrlni_du_q(v2u64_a, v2i64_b, ui7); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vssrlni.du.q( ++ ++ // __lsx_vssrlrni_b_h ++ // vd, vj, ui4 ++ // V16QI, V16QI, V16QI, USI ++ v16i8_r = __lsx_vssrlrni_b_h(v16i8_a, v16i8_b, ui4); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vssrlrni.b.h( ++ ++ // __lsx_vssrlrni_h_w ++ // vd, vj, ui5 ++ // V8HI, V8HI, V8HI, USI ++ v8i16_r = __lsx_vssrlrni_h_w(v8i16_a, v8i16_b, ui5); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vssrlrni.h.w( ++ ++ // __lsx_vssrlrni_w_d ++ // vd, vj, ui6 ++ // V4SI, V4SI, V4SI, USI ++ v4i32_r = __lsx_vssrlrni_w_d(v4i32_a, v4i32_b, ui6); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vssrlrni.w.d( ++ ++ // __lsx_vssrlrni_d_q ++ // vd, vj, ui7 ++ // V2DI, V2DI, V2DI, USI ++ v2i64_r = __lsx_vssrlrni_d_q(v2i64_a, v2i64_b, ui7); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vssrlrni.d.q( ++ ++ // __lsx_vssrlrni_bu_h ++ // vd, vj, ui4 ++ // UV16QI, UV16QI, V16QI, USI ++ v16u8_r = __lsx_vssrlrni_bu_h(v16u8_a, v16i8_b, ui4); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vssrlrni.bu.h( ++ ++ // __lsx_vssrlrni_hu_w ++ // vd, vj, ui5 ++ // UV8HI, UV8HI, V8HI, USI ++ v8u16_r = __lsx_vssrlrni_hu_w(v8u16_a, v8i16_b, ui5); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vssrlrni.hu.w( ++ ++ // __lsx_vssrlrni_wu_d ++ // vd, vj, ui6 ++ // UV4SI, UV4SI, V4SI, USI ++ v4u32_r = __lsx_vssrlrni_wu_d(v4u32_a, v4i32_b, ui6); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vssrlrni.wu.d( ++ ++ // __lsx_vssrlrni_du_q ++ // vd, vj, ui7 ++ // UV2DI, UV2DI, V2DI, USI ++ v2u64_r = __lsx_vssrlrni_du_q(v2u64_a, v2i64_b, ui7); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vssrlrni.du.q( ++ ++ // __lsx_vsrani_b_h ++ // vd, vj, ui4 ++ // V16QI, V16QI, V16QI, USI ++ v16i8_r = __lsx_vsrani_b_h(v16i8_a, v16i8_b, ui4); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vsrani.b.h( ++ ++ // __lsx_vsrani_h_w ++ // vd, vj, ui5 ++ // V8HI, V8HI, V8HI, USI ++ v8i16_r = __lsx_vsrani_h_w(v8i16_a, v8i16_b, ui5); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vsrani.h.w( ++ ++ // __lsx_vsrani_w_d ++ // vd, vj, ui6 ++ // V4SI, V4SI, V4SI, USI ++ v4i32_r = __lsx_vsrani_w_d(v4i32_a, v4i32_b, ui6); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vsrani.w.d( ++ ++ // __lsx_vsrani_d_q ++ // vd, vj, ui7 ++ // V2DI, V2DI, V2DI, USI ++ v2i64_r = __lsx_vsrani_d_q(v2i64_a, v2i64_b, ui7); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vsrani.d.q( ++ ++ // __lsx_vsrarni_b_h ++ // vd, vj, ui4 ++ // V16QI, V16QI, V16QI, USI ++ v16i8_r = __lsx_vsrarni_b_h(v16i8_a, v16i8_b, ui4); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vsrarni.b.h( ++ ++ // __lsx_vsrarni_h_w ++ // vd, vj, ui5 ++ // V8HI, V8HI, V8HI, USI ++ v8i16_r = __lsx_vsrarni_h_w(v8i16_a, v8i16_b, ui5); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vsrarni.h.w( ++ ++ // __lsx_vsrarni_w_d ++ // vd, vj, ui6 ++ // V4SI, V4SI, V4SI, USI ++ v4i32_r = __lsx_vsrarni_w_d(v4i32_a, v4i32_b, ui6); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vsrarni.w.d( ++ ++ // __lsx_vsrarni_d_q ++ // vd, vj, ui7 ++ // V2DI, V2DI, V2DI, USI ++ v2i64_r = __lsx_vsrarni_d_q(v2i64_a, v2i64_b, ui7); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vsrarni.d.q( ++ ++ // __lsx_vssrani_b_h ++ // vd, vj, ui4 ++ // V16QI, V16QI, V16QI, USI ++ v16i8_r = __lsx_vssrani_b_h(v16i8_a, v16i8_b, ui4); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vssrani.b.h( ++ ++ // __lsx_vssrani_h_w ++ // vd, vj, ui5 ++ // V8HI, V8HI, V8HI, USI ++ v8i16_r = __lsx_vssrani_h_w(v8i16_a, v8i16_b, ui5); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vssrani.h.w( ++ ++ // __lsx_vssrani_w_d ++ // vd, vj, ui6 ++ // V4SI, V4SI, V4SI, USI ++ v4i32_r = __lsx_vssrani_w_d(v4i32_a, v4i32_b, ui6); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vssrani.w.d( ++ ++ // __lsx_vssrani_d_q ++ // vd, vj, ui7 ++ // V2DI, V2DI, V2DI, USI ++ v2i64_r = __lsx_vssrani_d_q(v2i64_a, v2i64_b, ui7); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vssrani.d.q( ++ ++ // __lsx_vssrani_bu_h ++ // vd, vj, ui4 ++ // UV16QI, UV16QI, V16QI, USI ++ v16u8_r = __lsx_vssrani_bu_h(v16u8_a, v16i8_b, ui4); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vssrani.bu.h( ++ ++ // __lsx_vssrani_hu_w ++ // vd, vj, ui5 ++ // UV8HI, UV8HI, V8HI, USI ++ v8u16_r = __lsx_vssrani_hu_w(v8u16_a, v8i16_b, ui5); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vssrani.hu.w( ++ ++ // __lsx_vssrani_wu_d ++ // vd, vj, ui6 ++ // UV4SI, UV4SI, V4SI, USI ++ v4u32_r = __lsx_vssrani_wu_d(v4u32_a, v4i32_b, ui6); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vssrani.wu.d( ++ ++ // __lsx_vssrani_du_q ++ // vd, vj, ui7 ++ // UV2DI, UV2DI, V2DI, USI ++ v2u64_r = __lsx_vssrani_du_q(v2u64_a, v2i64_b, ui7); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vssrani.du.q( ++ ++ // __lsx_vssrarni_b_h ++ // vd, vj, ui4 ++ // V16QI, V16QI, V16QI, USI ++ v16i8_r = __lsx_vssrarni_b_h(v16i8_a, v16i8_b, ui4); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vssrarni.b.h( ++ ++ // __lsx_vssrarni_h_w ++ // vd, vj, ui5 ++ // V8HI, V8HI, V8HI, USI ++ v8i16_r = __lsx_vssrarni_h_w(v8i16_a, v8i16_b, ui5); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vssrarni.h.w( ++ ++ // __lsx_vssrarni_w_d ++ // vd, vj, ui6 ++ // V4SI, V4SI, V4SI, USI ++ v4i32_r = __lsx_vssrarni_w_d(v4i32_a, v4i32_b, ui6); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vssrarni.w.d( ++ ++ // __lsx_vssrarni_d_q ++ // vd, vj, ui7 ++ // V2DI, V2DI, V2DI, USI ++ v2i64_r = __lsx_vssrarni_d_q(v2i64_a, v2i64_b, ui7); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vssrarni.d.q( ++ ++ // __lsx_vssrarni_bu_h ++ // vd, vj, ui4 ++ // UV16QI, UV16QI, V16QI, USI ++ v16u8_r = __lsx_vssrarni_bu_h(v16u8_a, v16i8_b, ui4); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vssrarni.bu.h( ++ ++ // __lsx_vssrarni_hu_w ++ // vd, vj, ui5 ++ // UV8HI, UV8HI, V8HI, USI ++ v8u16_r = __lsx_vssrarni_hu_w(v8u16_a, v8i16_b, ui5); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vssrarni.hu.w( ++ ++ // __lsx_vssrarni_wu_d ++ // vd, vj, ui6 ++ // UV4SI, UV4SI, V4SI, USI ++ v4u32_r = __lsx_vssrarni_wu_d(v4u32_a, v4i32_b, ui6); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vssrarni.wu.d( ++ ++ // __lsx_vssrarni_du_q ++ // vd, vj, ui7 ++ // UV2DI, UV2DI, V2DI, USI ++ v2u64_r = __lsx_vssrarni_du_q(v2u64_a, v2i64_b, ui7); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vssrarni.du.q( ++ ++ // __lsx_vpermi_w ++ // vd, vj, ui8 ++ // V4SI, V4SI, V4SI, USI ++ v4i32_r = __lsx_vpermi_w(v4i32_a, v4i32_b, ui8); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vpermi.w( ++ ++ // __lsx_vld ++ // vd, rj, si12 ++ // V16QI, CVPOINTER, SI ++ v16i8_r = __lsx_vld(&v16i8_a, si12); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vld( ++ ++ // __lsx_vst ++ // vd, rj, si12 ++ // VOID, V16QI, CVPOINTER, SI ++ __lsx_vst(v16i8_a, &v16i8_b, 0); // CHECK: call void @llvm.loongarch.lsx.vst( ++ ++ // __lsx_vssrlrn_b_h ++ // vd, vj, vk ++ // V16QI, V8HI, V8HI ++ v16i8_r = __lsx_vssrlrn_b_h(v8i16_a, v8i16_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vssrlrn.b.h( ++ ++ // __lsx_vssrlrn_h_w ++ // vd, vj, vk ++ // V8HI, V4SI, V4SI ++ v8i16_r = __lsx_vssrlrn_h_w(v4i32_a, v4i32_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vssrlrn.h.w( ++ ++ // __lsx_vssrlrn_w_d ++ // vd, vj, vk ++ // V4SI, V2DI, V2DI ++ v4i32_r = __lsx_vssrlrn_w_d(v2i64_a, v2i64_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vssrlrn.w.d( ++ ++ // __lsx_vssrln_b_h ++ // vd, vj, vk ++ // V16QI, V8HI, V8HI ++ v16i8_r = __lsx_vssrln_b_h(v8i16_a, v8i16_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vssrln.b.h( ++ ++ // __lsx_vssrln_h_w ++ // vd, vj, vk ++ // V8HI, V4SI, V4SI ++ v8i16_r = __lsx_vssrln_h_w(v4i32_a, v4i32_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vssrln.h.w( ++ ++ // __lsx_vssrln_w_d ++ // vd, vj, vk ++ // V4SI, V2DI, V2DI ++ v4i32_r = __lsx_vssrln_w_d(v2i64_a, v2i64_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vssrln.w.d( ++ ++ // __lsx_vorn_v ++ // vd, vj, vk ++ // V16QI, V16QI, V16QI ++ v16i8_r = __lsx_vorn_v(v16i8_a, v16i8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vorn.v( ++ ++ // __lsx_vldi ++ // vd, i13 ++ // V2DI, HI ++ v2i64_r = __lsx_vldi(i13); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vldi( ++ ++ // __lsx_vshuf_b ++ // vd, vj, vk, va ++ // V16QI, V16QI, V16QI, V16QI ++ v16i8_r = __lsx_vshuf_b(v16i8_a, v16i8_b, v16i8_c); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vshuf.b( ++ ++ // __lsx_vldx ++ // vd, rj, rk ++ // V16QI, CVPOINTER, DI ++ v16i8_r = __lsx_vldx(&v16i8_a, i64_d); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vldx( ++ ++ // __lsx_vstx ++ // vd, rj, rk ++ // VOID, V16QI, CVPOINTER, DI ++ __lsx_vstx(v16i8_a, &v16i8_b, i64_d); // CHECK: call void @llvm.loongarch.lsx.vstx( ++ ++ // __lsx_vextl_qu_du ++ // vd, vj ++ // UV2DI, UV2DI ++ v2u64_r = __lsx_vextl_qu_du(v2u64_a); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vextl.qu.du( ++ ++ // __lsx_bnz_v ++ // rd, vj ++ // SI, UV16QI ++ i32_r = __lsx_bnz_v(v16u8_a); // CHECK: call i32 @llvm.loongarch.lsx.bnz.v( ++ ++ // __lsx_bz_v ++ // rd, vj ++ // SI, UV16QI ++ i32_r = __lsx_bz_v(v16u8_a); // CHECK: call i32 @llvm.loongarch.lsx.bz.v( ++ ++ // __lsx_bnz_b ++ // rd, vj ++ // SI, UV16QI ++ i32_r = __lsx_bnz_b(v16u8_a); // CHECK: call i32 @llvm.loongarch.lsx.bnz.b( ++ ++ // __lsx_bnz_h ++ // rd, vj ++ // SI, UV8HI ++ i32_r = __lsx_bnz_h(v8u16_a); // CHECK: call i32 @llvm.loongarch.lsx.bnz.h( ++ ++ // __lsx_bnz_w ++ // rd, vj ++ // SI, UV4SI ++ i32_r = __lsx_bnz_w(v4u32_a); // CHECK: call i32 @llvm.loongarch.lsx.bnz.w( ++ ++ // __lsx_bnz_d ++ // rd, vj ++ // SI, UV2DI ++ i32_r = __lsx_bnz_d(v2u64_a); // CHECK: call i32 @llvm.loongarch.lsx.bnz.d( ++ ++ // __lsx_bz_b ++ // rd, vj ++ // SI, UV16QI ++ i32_r = __lsx_bz_b(v16u8_a); // CHECK: call i32 @llvm.loongarch.lsx.bz.b( ++ ++ // __lsx_bz_h ++ // rd, vj ++ // SI, UV8HI ++ i32_r = __lsx_bz_h(v8u16_a); // CHECK: call i32 @llvm.loongarch.lsx.bz.h( ++ ++ // __lsx_bz_w ++ // rd, vj ++ // SI, UV4SI ++ i32_r = __lsx_bz_w(v4u32_a); // CHECK: call i32 @llvm.loongarch.lsx.bz.w( ++ ++ // __lsx_bz_d ++ // rd, vj ++ // SI, UV2DI ++ i32_r = __lsx_bz_d(v2u64_a); // CHECK: call i32 @llvm.loongarch.lsx.bz.d( ++ ++ v16i8_r = __lsx_vsrlrni_b_h(v16i8_a, v16i8_b, 2); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vsrlrni.b.h( ++ ++ v8i16_r = __lsx_vsrlrni_h_w(v8i16_a, v8i16_b, 2); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vsrlrni.h.w( ++ ++ v4i32_r = __lsx_vsrlrni_w_d(v4i32_a, v4i32_b, 2); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vsrlrni.w.d( ++ ++ v2i64_r = __lsx_vsrlrni_d_q(v2i64_a, v2i64_b, 2); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vsrlrni.d.q( ++ ++ v16i8_r = __lsx_vrepli_b(2); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vrepli.b( ++ ++ v8i16_r = __lsx_vrepli_h(2); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vrepli.h( ++ ++ v4i32_r = __lsx_vrepli_w(2); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vrepli.w( ++ ++ v2i64_r = __lsx_vrepli_d(2); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vrepli.d( ++} +diff --git a/clang/test/CodeGen/builtins-loongarch.c b/clang/test/CodeGen/builtins-loongarch.c +deleted file mode 100644 +index 3469b1f95..000000000 +--- a/clang/test/CodeGen/builtins-loongarch.c ++++ /dev/null +@@ -1,10 +0,0 @@ +-// RUN: %clang_cc1 -Wall -Wno-unused-but-set-variable -Werror -triple loongarch32 -disable-O0-optnone -emit-llvm -o - %s | opt -S -passes=mem2reg | FileCheck %s +-// RUN: %clang_cc1 -Wall -Wno-unused-but-set-variable -Werror -triple loongarch64 -disable-O0-optnone -emit-llvm -o - %s | opt -S -passes=mem2reg | FileCheck %s +- +-void test_eh_return_data_regno(void) { +- // CHECK: store volatile i32 4 +- // CHECK: store volatile i32 5 +- volatile int res; +- res = __builtin_eh_return_data_regno(0); +- res = __builtin_eh_return_data_regno(1); +-} +diff --git a/clang/test/CodeGen/ext-int-cc.c b/clang/test/CodeGen/ext-int-cc.c +index 001e866d3..c81fec98d 100644 +--- a/clang/test/CodeGen/ext-int-cc.c ++++ b/clang/test/CodeGen/ext-int-cc.c +@@ -27,8 +27,6 @@ + // RUN: %clang_cc1 -no-enable-noundef-analysis -triple arm64_32-apple-ios -O3 -disable-llvm-passes -emit-llvm -o - %s | FileCheck %s --check-prefixes=AARCH64 + // RUN: %clang_cc1 -no-enable-noundef-analysis -triple arm64_32-apple-ios -target-abi darwinpcs -O3 -disable-llvm-passes -emit-llvm -o - %s | FileCheck %s --check-prefixes=AARCH64DARWIN + // RUN: %clang_cc1 -no-enable-noundef-analysis -triple arm -O3 -disable-llvm-passes -emit-llvm -o - %s | FileCheck %s --check-prefixes=ARM +-// RUN: %clang_cc1 -no-enable-noundef-analysis -triple loongarch64 -O3 -disable-llvm-passes -emit-llvm -o - %s | FileCheck %s --check-prefixes=LA64 +-// RUN: %clang_cc1 -no-enable-noundef-analysis -triple loongarch32 -O3 -disable-llvm-passes -emit-llvm -o - %s | FileCheck %s --check-prefixes=LA32 + + // Make sure 128 and 64 bit versions are passed like integers. + void ParamPassing(_BitInt(128) b, _BitInt(64) c) {} +@@ -59,8 +57,6 @@ void ParamPassing(_BitInt(128) b, _BitInt(64) c) {} + // AARCH64: define{{.*}} void @ParamPassing(i128 %{{.+}}, i64 %{{.+}}) + // AARCH64DARWIN: define{{.*}} void @ParamPassing(i128 %{{.+}}, i64 %{{.+}}) + // ARM: define{{.*}} arm_aapcscc void @ParamPassing(ptr byval(i128) align 8 %{{.+}}, i64 %{{.+}}) +-// LA64: define{{.*}} void @ParamPassing(i128 %{{.+}}, i64 %{{.+}}) +-// LA32: define{{.*}} void @ParamPassing(ptr %{{.+}}, i64 %{{.+}}) + + void ParamPassing2(_BitInt(127) b, _BitInt(63) c) {} + // LIN64: define{{.*}} void @ParamPassing2(i64 %{{.+}}, i64 %{{.+}}, i64 %{{.+}}) +@@ -90,8 +86,6 @@ void ParamPassing2(_BitInt(127) b, _BitInt(63) c) {} + // AARCH64: define{{.*}} void @ParamPassing2(i127 %{{.+}}, i63 %{{.+}}) + // AARCH64DARWIN: define{{.*}} void @ParamPassing2(i127 %{{.+}}, i63 %{{.+}}) + // ARM: define{{.*}} arm_aapcscc void @ParamPassing2(ptr byval(i127) align 8 %{{.+}}, i63 %{{.+}}) +-// LA64: define{{.*}} void @ParamPassing2(i127 %{{.+}}, i63 signext %{{.+}}) +-// LA32: define{{.*}} void @ParamPassing2(ptr %{{.+}}, i63 %{{.+}}) + + // Make sure we follow the signext rules for promotable integer types. + void ParamPassing3(_BitInt(15) a, _BitInt(31) b) {} +@@ -122,8 +116,6 @@ void ParamPassing3(_BitInt(15) a, _BitInt(31) b) {} + // AARCH64: define{{.*}} void @ParamPassing3(i15 %{{.+}}, i31 %{{.+}}) + // AARCH64DARWIN: define{{.*}} void @ParamPassing3(i15 signext %{{.+}}, i31 signext %{{.+}}) + // ARM: define{{.*}} arm_aapcscc void @ParamPassing3(i15 signext %{{.+}}, i31 signext %{{.+}}) +-// LA64: define{{.*}} void @ParamPassing3(i15 signext %{{.+}}, i31 signext %{{.+}}) +-// LA32: define{{.*}} void @ParamPassing3(i15 signext %{{.+}}, i31 signext %{{.+}}) + + #if __BITINT_MAXWIDTH__ > 128 + // When supported, bit-precise types that are >128 are passed indirectly. Note, +@@ -158,8 +150,6 @@ void ParamPassing4(_BitInt(129) a) {} + // AARCH64-NOT: define{{.*}} void @ParamPassing4(ptr byval(i129) align 8 %{{.+}}) + // AARCH64DARWIN-NOT: define{{.*}} void @ParamPassing4(ptr byval(i129) align 8 %{{.+}}) + // ARM-NOT: define{{.*}} arm_aapcscc void @ParamPassing4(ptr byval(i129) align 8 %{{.+}}) +-// LA64-NOT: define{{.*}} void @ParamPassing4(ptr %{{.+}}) +-// LA32-NOT: define{{.*}} void @ParamPassing4(ptr %{{.+}}) + #endif + + _BitInt(63) ReturnPassing(void){} +@@ -190,8 +180,6 @@ _BitInt(63) ReturnPassing(void){} + // AARCH64: define{{.*}} i63 @ReturnPassing( + // AARCH64DARWIN: define{{.*}} i63 @ReturnPassing( + // ARM: define{{.*}} arm_aapcscc i63 @ReturnPassing( +-// LA64: define{{.*}} signext i63 @ReturnPassing( +-// LA32: define{{.*}} i63 @ReturnPassing( + + _BitInt(64) ReturnPassing2(void){} + // LIN64: define{{.*}} i64 @ReturnPassing2( +@@ -221,8 +209,6 @@ _BitInt(64) ReturnPassing2(void){} + // AARCH64: define{{.*}} i64 @ReturnPassing2( + // AARCH64DARWIN: define{{.*}} i64 @ReturnPassing2( + // ARM: define{{.*}} arm_aapcscc i64 @ReturnPassing2( +-// LA64: define{{.*}} i64 @ReturnPassing2( +-// LA32: define{{.*}} i64 @ReturnPassing2( + + _BitInt(127) ReturnPassing3(void){} + // LIN64: define{{.*}} { i64, i64 } @ReturnPassing3( +@@ -254,8 +240,6 @@ _BitInt(127) ReturnPassing3(void){} + // AARCH64: define{{.*}} i127 @ReturnPassing3( + // AARCH64DARWIN: define{{.*}} i127 @ReturnPassing3( + // ARM: define{{.*}} arm_aapcscc void @ReturnPassing3(ptr dead_on_unwind noalias writable sret +-// LA64: define{{.*}} i127 @ReturnPassing3( +-// LA32: define{{.*}} void @ReturnPassing3(ptr dead_on_unwind noalias writable sret + + _BitInt(128) ReturnPassing4(void){} + // LIN64: define{{.*}} { i64, i64 } @ReturnPassing4( +@@ -285,8 +269,6 @@ _BitInt(128) ReturnPassing4(void){} + // AARCH64: define{{.*}} i128 @ReturnPassing4( + // AARCH64DARWIN: define{{.*}} i128 @ReturnPassing4( + // ARM: define{{.*}} arm_aapcscc void @ReturnPassing4(ptr dead_on_unwind noalias writable sret +-// LA64: define{{.*}} i128 @ReturnPassing4( +-// LA32: define{{.*}} void @ReturnPassing4(ptr dead_on_unwind noalias writable sret + + #if __BITINT_MAXWIDTH__ > 128 + _BitInt(129) ReturnPassing5(void){} +diff --git a/clang/test/CodeGen/loongarch-inline-asm-modifiers.c b/clang/test/CodeGen/loongarch-inline-asm-modifiers.c +new file mode 100644 +index 000000000..412eca2bd +--- /dev/null ++++ b/clang/test/CodeGen/loongarch-inline-asm-modifiers.c +@@ -0,0 +1,50 @@ ++// RUN: %clang -target loongarch64-unknown-linux-gnu -S -o - -emit-llvm %s \ ++// RUN: | FileCheck %s ++ ++// This checks that the frontend will accept inline asm operand modifiers ++ ++int printf(const char*, ...); ++ ++typedef long long v2i64 __attribute__ ((vector_size(16), aligned(16))); ++typedef long long v4i64 __attribute__ ((vector_size(32), aligned(32))); ++ ++// CHECK: %{{[0-9]+}} = call i32 asm "ld.w $0,$1;\0A", "=r,*m"(ptr elementtype(i32) getelementptr inbounds (i32, ptr @b, i64 4)) ++// CHECK: %{{[0-9]+}} = call i32 asm "ld.w $0,${1:D};\0A", "=r,*m"(ptr elementtype(i32) getelementptr inbounds (i32, ptr @b, i64 4)) ++// CHECK: %{{[0-9]+}} = call <2 x i64> asm "vldi ${0:w},1", "=f" ++// CHECK: %{{[0-9]+}} = call <4 x i64> asm "xldi ${0:u},1", "=f" ++int b[8] = {0,1,2,3,4,5,6,7}; ++int main() ++{ ++ int i; ++ v2i64 v2i64_r; ++ v4i64 v4i64_r; ++ ++ // The first word. Notice, no 'D' ++ {asm ( ++ "ld.w %0,%1;\n" ++ : "=r" (i) ++ : "m" (*(b+4)));} ++ ++ printf("%d\n",i); ++ ++ // The second word ++ {asm ( ++ "ld.w %0,%D1;\n" ++ : "=r" (i) ++ : "m" (*(b+4)) ++ );} ++ ++ // LSX registers ++ { asm("vldi %w0,1" ++ : "=f"(v2i64_r)); } ++ ++ printf("%d\n", i); ++ ++ // LASX registers ++ { asm("xldi %u0,1" ++ : "=f"(v4i64_r)); } ++ ++ printf("%d\n",i); ++ ++ return 1; ++} +diff --git a/clang/test/CodeGen/loongarch-inline-asm.c b/clang/test/CodeGen/loongarch-inline-asm.c +new file mode 100644 +index 000000000..1f995ac79 +--- /dev/null ++++ b/clang/test/CodeGen/loongarch-inline-asm.c +@@ -0,0 +1,31 @@ ++// REQUIRES: loongarch-registered-target ++// RUN: %clang_cc1 -triple loongarch64-linux-gnu -emit-llvm -o - %s | FileCheck %s ++ ++int data; ++ ++void m () { ++ asm("ld.w $r1, %0" :: "m"(data)); ++ // CHECK: call void asm sideeffect "ld.w $$r1, $0", "*m"(ptr elementtype(i32) @data) ++} ++ ++void ZC () { ++ asm("ll.w $r1, %0" :: "ZC"(data)); ++ // CHECK: call void asm sideeffect "ll.w $$r1, $0", "*^ZC"(ptr elementtype(i32) @data) ++} ++ ++void ZB () { ++ asm("amadd_db.w $zero, $r1, %0" :: "ZB"(data)); ++ // CHECK: call void asm sideeffect "amadd_db.w $$zero, $$r1, $0", "*^ZB"(ptr elementtype(i32) @data) ++} ++ ++void R () { ++ asm("ld.w $r1, %0" :: "R"(data)); ++ // CHECK: call void asm sideeffect "ld.w $$r1, $0", "*R"(ptr elementtype(i32) @data) ++} ++ ++int *p; ++void preld () { ++ asm("preld 0, %0, 2" :: "r"(p)); ++ // CHECK: %0 = load ptr, ptr @p, align 8 ++ // CHECK: call void asm sideeffect "preld 0, $0, 2", "r"(ptr %0) ++} +diff --git a/clang/test/CodeGen/mcount.c b/clang/test/CodeGen/mcount.c +index bdd609c1d..8f994ab4e 100644 +--- a/clang/test/CodeGen/mcount.c ++++ b/clang/test/CodeGen/mcount.c +@@ -7,8 +7,6 @@ + // RUN: %clang_cc1 -pg -triple x86_64-netbsd -emit-llvm -o - %s | FileCheck -check-prefixes=CHECK-DOUBLE-PREFIXED,NO-MCOUNT1 %s + // RUN: %clang_cc1 -pg -triple arm-netbsd-eabi -emit-llvm -o - %s | FileCheck -check-prefixes=CHECK-DOUBLE-PREFIXED,NO-MCOUNT1 %s + // RUN: %clang_cc1 -pg -triple aarch64-netbsd -emit-llvm -o - %s | FileCheck -check-prefixes=CHECK-DOUBLE-PREFIXED,NO-MCOUNT1 %s +-// RUN: %clang_cc1 -pg -triple loongarch32 -emit-llvm -o - %s | FileCheck -check-prefixes=CHECK-PREFIXED,NO-MCOUNT1 %s +-// RUN: %clang_cc1 -pg -triple loongarch64 -emit-llvm -o - %s | FileCheck -check-prefixes=CHECK-PREFIXED,NO-MCOUNT1 %s + // RUN: %clang_cc1 -pg -triple mips-netbsd -emit-llvm -o - %s | FileCheck -check-prefixes=CHECK-DOUBLE-PREFIXED,NO-MCOUNT1 %s + // RUN: %clang_cc1 -pg -triple mips-unknown-gnu-linux -emit-llvm -o - %s | FileCheck -check-prefixes=CHECK-PREFIXED,NO-MCOUNT1 %s + // RUN: %clang_cc1 -pg -triple mipsel-unknown-gnu-linux -emit-llvm -o - %s | FileCheck -check-prefixes=CHECK-PREFIXED,NO-MCOUNT1 %s +diff --git a/clang/test/CodeGenCXX/LoongArch/abi-lp64d-D91269.cpp b/clang/test/CodeGenCXX/LoongArch/abi-lp64d-D91269.cpp +new file mode 100644 +index 000000000..289d7a430 +--- /dev/null ++++ b/clang/test/CodeGenCXX/LoongArch/abi-lp64d-D91269.cpp +@@ -0,0 +1,35 @@ ++/// Ported from https://reviews.llvm.org/D91269. ++ ++// RUN: %clang_cc1 -triple loongarch64 -target-abi lp64 -emit-llvm %s -o - | FileCheck %s ++ ++struct empty_float2 { struct {}; float f; float g; }; ++ ++// CHECK: define{{.*}} float @_Z14f_empty_float212empty_float2(float{{[^,]*}}, float{{[^,]*}}) ++// CHECK: { [4 x i8], float, float } ++float f_empty_float2(struct empty_float2 a) { ++ return a.g; ++} ++ ++struct empty_double2 { struct {}; double f; double g; }; ++ ++// CHECK: define{{.*}} double @_Z15f_empty_double213empty_double2(double{{[^,]*}}, double{{[^,]*}}) ++// CHECK: { [8 x i8], double, double } ++double f_empty_double2(struct empty_double2 a) { ++ return a.g; ++} ++ ++struct empty_float_double { struct {}; float f; double g; }; ++ ++// CHECK: define{{.*}} double @_Z20f_empty_float_double18empty_float_double(float{{[^,]*}}, double{{[^,]*}}) ++// CHECK: { [4 x i8], float, double } ++double f_empty_float_double(struct empty_float_double a) { ++ return a.g; ++} ++ ++struct empty_double_float { struct {}; double f; float g; }; ++ ++// CHECK: define{{.*}} double @_Z20f_empty_double_float18empty_double_float(double{{[^,]*}}, float{{[^,]*}}) ++// CHECK: { [8 x i8], double, float } ++double f_empty_double_float(struct empty_double_float a) { ++ return a.g; ++} +diff --git a/clang/test/CodeGenCXX/LoongArch/abi-lp64d-D91278.cpp b/clang/test/CodeGenCXX/LoongArch/abi-lp64d-D91278.cpp +new file mode 100644 +index 000000000..4934fe018 +--- /dev/null ++++ b/clang/test/CodeGenCXX/LoongArch/abi-lp64d-D91278.cpp +@@ -0,0 +1,26 @@ ++/// Ported from https://reviews.llvm.org/D91278. ++ ++// RUN: %clang_cc1 -triple loongarch64 -target-abi lp64 \ ++// RUN: -emit-llvm %s -o - | FileCheck %s ++ ++struct empty_complex_f { ++ struct {}; ++ float _Complex fc; ++}; ++ ++// CHECK: define{{.*}} float @_Z17f_empty_complex_f15empty_complex_f(float{{[^,]*}}, float{{[^,]*}}) ++// CHECK: { [4 x i8], float, float } ++float f_empty_complex_f(struct empty_complex_f a) { ++ return __imag__ a.fc; ++} ++ ++struct empty_complex_d { ++ struct {}; ++ double _Complex fc; ++}; ++ ++// CHECK: define{{.*}} double @_Z17f_empty_complex_d15empty_complex_d(double{{[^,]*}}, double{{[^,]*}}) ++// CHECK: { [8 x i8], double, double } ++double f_empty_complex_d(struct empty_complex_d a) { ++ return __imag__ a.fc; ++} +diff --git a/clang/test/CodeGenCXX/LoongArch/abi-lp64d-struct-inherit.cpp b/clang/test/CodeGenCXX/LoongArch/abi-lp64d-struct-inherit.cpp +index 6d8018564..dc5ffaf08 100644 +--- a/clang/test/CodeGenCXX/LoongArch/abi-lp64d-struct-inherit.cpp ++++ b/clang/test/CodeGenCXX/LoongArch/abi-lp64d-struct-inherit.cpp +@@ -1,4 +1,4 @@ +-// RUN: %clang_cc1 -triple loongarch64 -target-feature +f -target-feature +d -target-abi lp64d \ ++// RUN: %clang_cc1 -triple loongarch64 -target-abi lp64 \ + // RUN: -emit-llvm %s -o - | FileCheck %s + + #include +@@ -69,12 +69,12 @@ struct child5_virtual_s : virtual parent5_virtual_s { + float f1; + }; + +-// CHECK-LABEL: define{{.*}} void @_ZN16child5_virtual_sC1EOS_(ptr noundef nonnull align 8 dereferenceable(12) %this, ptr noundef nonnull align 8 dereferenceable(12) %0) ++// CHECK-LABEL: define{{.*}} void @_ZN16child5_virtual_sC1EOS_(ptr{{.*}} %this, ptr{{.*}} dereferenceable(12) %0) + struct child5_virtual_s int32_float_virtual_struct_inheritance(struct child5_virtual_s a) { + return a; + } + +-/// Check for correct lowering in the presence of diamond inheritance. ++/// Check for correct lowering in the presence of diamoned inheritance. + + struct parent6_float_s { + float f1; +diff --git a/clang/test/Driver/Inputs/debian_loong64_tree/usr/include/c++/13/backward/.keep b/clang/test/Driver/Inputs/debian_loong64_tree/usr/include/c++/13/backward/.keep +deleted file mode 100644 +index e69de29bb..000000000 +diff --git a/clang/test/Driver/Inputs/debian_loong64_tree/usr/include/loongarch64-linux-gnu/c++/.keep b/clang/test/Driver/Inputs/debian_loong64_tree/usr/include/loongarch64-linux-gnu/c++/.keep +deleted file mode 100644 +index e69de29bb..000000000 +diff --git a/clang/test/Driver/Inputs/debian_loong64_tree/usr/lib/gcc/loongarch64-linux-gnu/13/crtbegin.o b/clang/test/Driver/Inputs/debian_loong64_tree/usr/lib/gcc/loongarch64-linux-gnu/13/crtbegin.o +deleted file mode 100644 +index e69de29bb..000000000 +diff --git a/clang/test/Driver/Inputs/debian_loong64_tree/usr/lib/gcc/loongarch64-linux-gnu/13/crtend.o b/clang/test/Driver/Inputs/debian_loong64_tree/usr/lib/gcc/loongarch64-linux-gnu/13/crtend.o +deleted file mode 100644 +index e69de29bb..000000000 +diff --git a/clang/test/Driver/Inputs/debian_loong64_tree/usr/lib/gcc/loongarch64-linux-gnu/13/include/.keep b/clang/test/Driver/Inputs/debian_loong64_tree/usr/lib/gcc/loongarch64-linux-gnu/13/include/.keep +deleted file mode 100644 +index e69de29bb..000000000 +diff --git a/clang/test/Driver/Inputs/debian_loong64_tree/usr/lib/loongarch64-linux-gnu/crt1.o b/clang/test/Driver/Inputs/debian_loong64_tree/usr/lib/loongarch64-linux-gnu/crt1.o +deleted file mode 100644 +index e69de29bb..000000000 +diff --git a/clang/test/Driver/Inputs/debian_loong64_tree/usr/lib/loongarch64-linux-gnu/crti.o b/clang/test/Driver/Inputs/debian_loong64_tree/usr/lib/loongarch64-linux-gnu/crti.o +deleted file mode 100644 +index e69de29bb..000000000 +diff --git a/clang/test/Driver/Inputs/debian_loong64_tree/usr/lib/loongarch64-linux-gnu/crtn.o b/clang/test/Driver/Inputs/debian_loong64_tree/usr/lib/loongarch64-linux-gnu/crtn.o +deleted file mode 100644 +index e69de29bb..000000000 +diff --git a/clang/test/Driver/Inputs/multilib_loongarch_linux_sdk/bin/.keep b/clang/test/Driver/Inputs/multilib_loongarch_linux_sdk/bin/.keep +deleted file mode 100644 +index e69de29bb..000000000 +diff --git a/clang/test/Driver/Inputs/multilib_loongarch_linux_sdk/include/.keep b/clang/test/Driver/Inputs/multilib_loongarch_linux_sdk/include/.keep +deleted file mode 100644 +index e69de29bb..000000000 +diff --git a/clang/test/Driver/Inputs/multilib_loongarch_linux_sdk/lib/gcc/loongarch64-unknown-linux-gnu/12.1.0/base/lp64d/crtbegin.o b/clang/test/Driver/Inputs/multilib_loongarch_linux_sdk/lib/gcc/loongarch64-unknown-linux-gnu/12.1.0/base/lp64d/crtbegin.o +deleted file mode 100644 +index e69de29bb..000000000 +diff --git a/clang/test/Driver/Inputs/multilib_loongarch_linux_sdk/lib/gcc/loongarch64-unknown-linux-gnu/12.1.0/base/lp64f/crtbegin.o b/clang/test/Driver/Inputs/multilib_loongarch_linux_sdk/lib/gcc/loongarch64-unknown-linux-gnu/12.1.0/base/lp64f/crtbegin.o +deleted file mode 100644 +index e69de29bb..000000000 +diff --git a/clang/test/Driver/Inputs/multilib_loongarch_linux_sdk/lib/gcc/loongarch64-unknown-linux-gnu/12.1.0/base/lp64s/crtbegin.o b/clang/test/Driver/Inputs/multilib_loongarch_linux_sdk/lib/gcc/loongarch64-unknown-linux-gnu/12.1.0/base/lp64s/crtbegin.o +deleted file mode 100644 +index e69de29bb..000000000 +diff --git a/clang/test/Driver/Inputs/multilib_loongarch_linux_sdk/lib/gcc/loongarch64-unknown-linux-gnu/12.1.0/crtbegin.o b/clang/test/Driver/Inputs/multilib_loongarch_linux_sdk/lib/gcc/loongarch64-unknown-linux-gnu/12.1.0/crtbegin.o +deleted file mode 100644 +index e69de29bb..000000000 +diff --git a/clang/test/Driver/Inputs/multilib_loongarch_linux_sdk/loongarch64-unknown-linux-gnu/bin/ld b/clang/test/Driver/Inputs/multilib_loongarch_linux_sdk/loongarch64-unknown-linux-gnu/bin/ld +deleted file mode 100755 +index b23e55619..000000000 +--- a/clang/test/Driver/Inputs/multilib_loongarch_linux_sdk/loongarch64-unknown-linux-gnu/bin/ld ++++ /dev/null +@@ -1 +0,0 @@ +-#!/bin/true +diff --git a/clang/test/Driver/Inputs/multilib_loongarch_linux_sdk/loongarch64-unknown-linux-gnu/lib/.keep b/clang/test/Driver/Inputs/multilib_loongarch_linux_sdk/loongarch64-unknown-linux-gnu/lib/.keep +deleted file mode 100644 +index e69de29bb..000000000 +diff --git a/clang/test/Driver/Inputs/multilib_loongarch_linux_sdk/loongarch64-unknown-linux-gnu/lib64/.keep b/clang/test/Driver/Inputs/multilib_loongarch_linux_sdk/loongarch64-unknown-linux-gnu/lib64/.keep +deleted file mode 100644 +index e69de29bb..000000000 +diff --git a/clang/test/Driver/Inputs/multilib_loongarch_linux_sdk/sysroot/usr/lib/.keep b/clang/test/Driver/Inputs/multilib_loongarch_linux_sdk/sysroot/usr/lib/.keep +deleted file mode 100644 +index e69de29bb..000000000 +diff --git a/clang/test/Driver/Inputs/multilib_loongarch_linux_sdk/sysroot/usr/lib64/.keep b/clang/test/Driver/Inputs/multilib_loongarch_linux_sdk/sysroot/usr/lib64/.keep +deleted file mode 100644 +index e69de29bb..000000000 +diff --git a/clang/test/Driver/fdirect-access-external-data.c b/clang/test/Driver/fdirect-access-external-data.c +index a6da776e6..f132b1b08 100644 +--- a/clang/test/Driver/fdirect-access-external-data.c ++++ b/clang/test/Driver/fdirect-access-external-data.c +@@ -9,12 +9,6 @@ + // RUN: %clang -### -c -target aarch64 %s -fpic 2>&1 | FileCheck %s --check-prefix=DEFAULT + // RUN: %clang -### -c -target aarch64 %s -fpic -fdirect-access-external-data 2>&1 | FileCheck %s --check-prefix=DIRECT + +-/// loongarch* targets default to -fno-direct-access-external-data even for -fno-pic. +-// RUN: %clang -### -c --target=loongarch64 -fno-pic %s 2>&1 | FileCheck %s --check-prefix=INDIRECT +-// RUN: %clang -### -c --target=loongarch64 -fpie %s 2>&1 | FileCheck %s --check-prefix=DEFAULT +-// RUN: %clang -### -c --target=loongarch32 -fno-pic -fdirect-access-external-data %s 2>&1 | FileCheck %s --check-prefix=DEFAULT +-// RUN: %clang -### -c --target=loongarch32 -fpie -fdirect-access-external-data %s 2>&1 | FileCheck %s --check-prefix=DIRECT +- + // DEFAULT-NOT: direct-access-external-data" + // DIRECT: "-fdirect-access-external-data" + // INDIRECT: "-fno-direct-access-external-data" +diff --git a/clang/test/Driver/fpatchable-function-entry.c b/clang/test/Driver/fpatchable-function-entry.c +index 4d0d60958..da7370a4d 100644 +--- a/clang/test/Driver/fpatchable-function-entry.c ++++ b/clang/test/Driver/fpatchable-function-entry.c +@@ -2,8 +2,6 @@ + // RUN: %clang -target x86_64 %s -fpatchable-function-entry=1 -c -### 2>&1 | FileCheck %s + // RUN: %clang -target aarch64 %s -fpatchable-function-entry=1 -c -### 2>&1 | FileCheck %s + // RUN: %clang -target aarch64 %s -fpatchable-function-entry=1,0 -c -### 2>&1 | FileCheck %s +-// RUN: %clang -target loongarch32 %s -fpatchable-function-entry=1,0 -c -### 2>&1 | FileCheck %s +-// RUN: %clang -target loongarch64 %s -fpatchable-function-entry=1,0 -c -### 2>&1 | FileCheck %s + // RUN: %clang -target riscv32 %s -fpatchable-function-entry=1,0 -c -### 2>&1 | FileCheck %s + // RUN: %clang -target riscv64 %s -fpatchable-function-entry=1,0 -c -### 2>&1 | FileCheck %s + // CHECK: "-fpatchable-function-entry=1" +diff --git a/clang/test/Driver/frame-pointer.c b/clang/test/Driver/frame-pointer.c +index 2b4287bf4..9538715f9 100644 +--- a/clang/test/Driver/frame-pointer.c ++++ b/clang/test/Driver/frame-pointer.c +@@ -61,18 +61,6 @@ + // RUN: %clang --target=riscv64-linux-android -### -S -O1 %s 2>&1 | FileCheck -check-prefix=CHECK-ANDROID-64 %s + // RUN: %clang --target=riscv64-linux-android -### -S -Os %s 2>&1 | FileCheck -check-prefix=CHECK-ANDROID-64 %s + +-// RUN: %clang --target=loongarch32 -### -S -O0 %s -o %t.s 2>&1 | FileCheck -check-prefix=CHECK0-32 %s +-// RUN: %clang --target=loongarch32 -### -S -O1 %s -o %t.s 2>&1 | FileCheck -check-prefix=CHECK1-32 %s +-// RUN: %clang --target=loongarch32 -### -S -O2 %s -o %t.s 2>&1 | FileCheck -check-prefix=CHECK2-32 %s +-// RUN: %clang --target=loongarch32 -### -S -O3 %s -o %t.s 2>&1 | FileCheck -check-prefix=CHECK3-32 %s +-// RUN: %clang --target=loongarch32 -### -S -Os %s -o %t.s 2>&1 | FileCheck -check-prefix=CHECKs-32 %s +- +-// RUN: %clang --target=loongarch64 -### -S -O0 %s -o %t.s 2>&1 | FileCheck -check-prefix=CHECK0-64 %s +-// RUN: %clang --target=loongarch64 -### -S -O1 %s -o %t.s 2>&1 | FileCheck -check-prefix=CHECK1-64 %s +-// RUN: %clang --target=loongarch64 -### -S -O2 %s -o %t.s 2>&1 | FileCheck -check-prefix=CHECK2-64 %s +-// RUN: %clang --target=loongarch64 -### -S -O3 %s -o %t.s 2>&1 | FileCheck -check-prefix=CHECK3-64 %s +-// RUN: %clang --target=loongarch64 -### -S -Os %s -o %t.s 2>&1 | FileCheck -check-prefix=CHECKs-64 %s +- + // CHECK0-32: -mframe-pointer=all + // CHECK1-32-NOT: -mframe-pointer=all + // CHECK2-32-NOT: -mframe-pointer=all +diff --git a/clang/test/Driver/fsanitize.c b/clang/test/Driver/fsanitize.c +index 167182504..07e4703b5 100644 +--- a/clang/test/Driver/fsanitize.c ++++ b/clang/test/Driver/fsanitize.c +@@ -430,12 +430,6 @@ + // RUN: %clang --target=riscv64-linux-gnu -fsanitize=address,leak -fno-sanitize=address %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-SANA-SANL-NO-SANA-RISCV64 + // CHECK-SANA-SANL-NO-SANA-RISCV64: "-fsanitize=leak" + +-// RUN: %clang --target=loongarch64-unknown-linux-gnu -fsanitize=leak %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-SANL-LOONGARCH64 +-// CHECK-SANL-LOONGARCH64: "-fsanitize=leak" +- +-// RUN: %clang --target=loongarch64-unknown-linux-gnu -fsanitize=address,leak -fno-sanitize=address %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-SANA-SANL-NO-SANA-LOONGARCH64 +-// CHECK-SANA-SANL-NO-SANA-LOONGARCH64: "-fsanitize=leak" +- + // RUN: %clang --target=x86_64-linux-gnu -fsanitize=memory %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-MSAN + // CHECK-MSAN: "-fno-assume-sane-operator-new" + // RUN: %clang --target=x86_64-linux-gnu -fsanitize=address %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-ASAN +@@ -886,7 +880,6 @@ + // RUN: %clang --target=arm-linux-androideabi -fsanitize=scudo %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-SCUDO + // RUN: %clang --target=x86_64-linux-gnu -fsanitize=scudo %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-SCUDO + // RUN: %clang --target=i386-linux-gnu -fsanitize=scudo %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-SCUDO +-// RUN: %clang --target=loongarch64-unknown-linux-gnu -fsanitize=scudo %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-SCUDO + // RUN: %clang --target=mips64-unknown-linux-gnu -fsanitize=scudo %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-SCUDO + // RUN: %clang --target=mips64el-unknown-linux-gnu -fsanitize=scudo %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-SCUDO + // RUN: %clang --target=mips-unknown-linux-gnu -fsanitize=scudo %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-SCUDO +diff --git a/clang/test/Driver/linux-header-search.cpp b/clang/test/Driver/linux-header-search.cpp +index dd4d6eb48..2b73b0faa 100644 +--- a/clang/test/Driver/linux-header-search.cpp ++++ b/clang/test/Driver/linux-header-search.cpp +@@ -244,32 +244,6 @@ + // CHECK-GENTOO-4-9-X-32: "-internal-externc-isystem" "[[SYSROOT]]/include" + // CHECK-GENTOO-4-9-X-32: "-internal-externc-isystem" "[[SYSROOT]]/usr/include" + // +-// Check header search on Debian loong64 +-// RUN: %clang -### %s -fsyntax-only 2>&1 \ +-// RUN: --target=loongarch64-unknown-linux-gnu -stdlib=libstdc++ \ +-// RUN: --sysroot=%S/Inputs/debian_loong64_tree \ +-// RUN: --gcc-toolchain="" \ +-// RUN: | FileCheck --check-prefix=CHECK-LOONG64-GNU %s +-// +-// Check that "-gnuf64" is seen as "-gnu" for loong64. +-// RUN: %clang -### %s -fsyntax-only 2>&1 \ +-// RUN: --target=loongarch64-unknown-linux-gnuf64 -stdlib=libstdc++ \ +-// RUN: --sysroot=%S/Inputs/debian_loong64_tree \ +-// RUN: --gcc-toolchain="" \ +-// RUN: | FileCheck --check-prefix=CHECK-LOONG64-GNU %s +-// CHECK-LOONG64-GNU: "-cc1" +-// CHECK-LOONG64-GNU: "-resource-dir" "[[RESOURCE_DIR:[^"]+]]" +-// CHECK-LOONG64-GNU: "-isysroot" "[[SYSROOT:[^"]+]]" +-// CHECK-LOONG64-GNU: "-internal-isystem" "[[SYSROOT]]/usr/lib/gcc/loongarch64-linux-gnu/13/../../../../include/c++/13" +-// CHECK-LOONG64-GNU: "-internal-isystem" "[[SYSROOT]]/usr/lib/gcc/loongarch64-linux-gnu/13/../../../../include/c++/13/loongarch64-linux-gnu" +-// CHECK-LOONG64-GNU: "-internal-isystem" "[[SYSROOT]]/usr/lib/gcc/loongarch64-linux-gnu/13/../../../../include/c++/13/backward" +-// CHECK-LOONG64-GNU: "-internal-isystem" "[[RESOURCE_DIR]]{{/|\\\\}}include" +-// CHECK-LOONG64-GNU: "-internal-isystem" "[[SYSROOT]]/usr/local/include" +-// CHECK-LOONG64-GNU: "-internal-isystem" "[[SYSROOT]]/usr/lib/gcc/loongarch64-linux-gnu/13/../../../../loongarch64-linux-gnu/include" +-// CHECK-LOONG64-GNU: "-internal-externc-isystem" "[[SYSROOT]]/usr/include/loongarch64-linux-gnu" +-// CHECK-LOONG64-GNU: "-internal-externc-isystem" "[[SYSROOT]]/include" +-// CHECK-LOONG64-GNU: "-internal-externc-isystem" "[[SYSROOT]]/usr/include" +-// + // Check header search on Debian 6 / MIPS64 + // RUN: %clang -### %s -fsyntax-only 2>&1 \ + // RUN: --target=mips64-unknown-linux-gnuabi64 -stdlib=libstdc++ \ +diff --git a/clang/test/Driver/linux-ld.c b/clang/test/Driver/linux-ld.c +index b8efd64cd..bc74583bb 100644 +--- a/clang/test/Driver/linux-ld.c ++++ b/clang/test/Driver/linux-ld.c +@@ -827,30 +827,6 @@ + // CHECK-ARM-HF: "-dynamic-linker" "{{.*}}/lib/ld-linux-armhf.so.3" + // + // RUN: %clang -### %s -no-pie 2>&1 \ +-// RUN: --target=loongarch64-linux-gnu \ +-// RUN: | FileCheck --check-prefix=CHECK-LOONGARCH-LP64D %s +-// RUN: %clang -### %s -no-pie 2>&1 \ +-// RUN: --target=loongarch64-linux-gnuf64 \ +-// RUN: | FileCheck --check-prefix=CHECK-LOONGARCH-LP64D %s +-// CHECK-LOONGARCH-LP64D: "{{.*}}ld{{(.exe)?}}" +-// CHECK-LOONGARCH-LP64D: "-m" "elf64loongarch" +-// CHECK-LOONGARCH-LP64D: "-dynamic-linker" "{{.*}}/lib64/ld-linux-loongarch-lp64d.so.1" +-// +-// RUN: %clang -### %s -no-pie 2>&1 \ +-// RUN: --target=loongarch64-linux-gnuf32 \ +-// RUN: | FileCheck --check-prefix=CHECK-LOONGARCH-LP64F %s +-// CHECK-LOONGARCH-LP64F: "{{.*}}ld{{(.exe)?}}" +-// CHECK-LOONGARCH-LP64F: "-m" "elf64loongarch" +-// CHECK-LOONGARCH-LP64F: "-dynamic-linker" "{{.*}}/lib64/ld-linux-loongarch-lp64f.so.1" +-// +-// RUN: %clang -### %s -no-pie 2>&1 \ +-// RUN: --target=loongarch64-linux-gnusf \ +-// RUN: | FileCheck --check-prefix=CHECK-LOONGARCH-LP64S %s +-// CHECK-LOONGARCH-LP64S: "{{.*}}ld{{(.exe)?}}" +-// CHECK-LOONGARCH-LP64S: "-m" "elf64loongarch" +-// CHECK-LOONGARCH-LP64S: "-dynamic-linker" "{{.*}}/lib64/ld-linux-loongarch-lp64s.so.1" +-// +-// RUN: %clang -### %s -no-pie 2>&1 \ + // RUN: --target=powerpc64-linux-gnu \ + // RUN: | FileCheck --check-prefix=CHECK-PPC64 %s + // CHECK-PPC64: "{{.*}}ld{{(.exe)?}}" +@@ -1365,28 +1341,6 @@ + // RUN: not %clang -### %s --target=aarch64-linux -fandroid-pad-segment 2>&1 | FileCheck --check-prefix=ERR-ANDROID-PAD-EHDR %s + // ERR-ANDROID-PAD-EHDR: error: unsupported option '-fandroid-pad-segment' for target 'aarch64-linux' + +-// Check linker invocation on a Debian LoongArch sysroot. +-// RUN: %clang -### %s -no-pie 2>&1 \ +-// RUN: --target=loongarch64-linux-gnu -rtlib=platform --unwindlib=platform \ +-// RUN: --gcc-toolchain="" \ +-// RUN: --sysroot=%S/Inputs/debian_loong64_tree \ +-// RUN: | FileCheck --check-prefix=CHECK-DEBIAN-ML-LOONG64 %s +-// +-// Check that "-gnuf64" is seen as "-gnu" for loong64. +-// RUN: %clang -### %s -no-pie 2>&1 \ +-// RUN: --target=loongarch64-linux-gnuf64 -rtlib=platform --unwindlib=platform \ +-// RUN: --gcc-toolchain="" \ +-// RUN: --sysroot=%S/Inputs/debian_loong64_tree \ +-// RUN: | FileCheck --check-prefix=CHECK-DEBIAN-ML-LOONG64 %s +-// CHECK-DEBIAN-ML-LOONG64: "{{.*}}ld{{(.exe)?}}" "--sysroot=[[SYSROOT:[^"]+]]" +-// CHECK-DEBIAN-ML-LOONG64: "[[SYSROOT]]/usr/lib/loongarch64-linux-gnu/crt1.o" +-// CHECK-DEBIAN-ML-LOONG64: "[[SYSROOT]]/usr/lib/loongarch64-linux-gnu/crti.o" +-// CHECK-DEBIAN-ML-LOONG64: "[[SYSROOT]]/usr/lib/gcc/loongarch64-linux-gnu/13/crtbegin.o" +-// CHECK-DEBIAN-ML-LOONG64: "-L[[SYSROOT]]/usr/lib/gcc/loongarch64-linux-gnu/13" +-// CHECK-DEBIAN-ML-LOONG64: "-L[[SYSROOT]]/usr/lib/loongarch64-linux-gnu" +-// CHECK-DEBIAN-ML-LOONG64: "-L[[SYSROOT]]/usr/lib" +-// CHECK-DEBIAN-ML-LOONG64: "[[SYSROOT]]/usr/lib/gcc/loongarch64-linux-gnu/13/crtend.o" +-// CHECK-DEBIAN-ML-LOONG64: "[[SYSROOT]]/usr/lib/loongarch64-linux-gnu/crtn.o" + // + // Check linker invocation on Debian 6 MIPS 32/64-bit. + // RUN: %clang -### %s -no-pie 2>&1 \ +diff --git a/clang/test/Driver/loongarch-abi-error.c b/clang/test/Driver/loongarch-abi-error.c +deleted file mode 100644 +index 2d4f4b925..000000000 +--- a/clang/test/Driver/loongarch-abi-error.c ++++ /dev/null +@@ -1,21 +0,0 @@ +-// RUN: not %clang --target=loongarch32-unknown-elf %s -fsyntax-only -mabi=lp64s 2>&1 \ +-// RUN: | FileCheck --check-prefix=CHECK-LA32-LP64S %s +-// RUN: not %clang --target=loongarch32-unknown-elf %s -fsyntax-only -mabi=lp64f 2>&1 \ +-// RUN: | FileCheck --check-prefix=CHECK-LA32-LP64F %s +-// RUN: not %clang --target=loongarch32-unknown-elf %s -fsyntax-only -mabi=lp64d 2>&1 \ +-// RUN: | FileCheck --check-prefix=CHECK-LA32-LP64D %s +- +-// RUN: not %clang --target=loongarch64-unknown-elf %s -fsyntax-only -mabi=ilp32s 2>&1 \ +-// RUN: | FileCheck --check-prefix=CHECK-LA64-ILP32S %s +-// RUN: not %clang --target=loongarch64-unknown-elf %s -fsyntax-only -mabi=ilp32f 2>&1 \ +-// RUN: | FileCheck --check-prefix=CHECK-LA64-ILP32F %s +-// RUN: not %clang --target=loongarch64-unknown-elf %s -fsyntax-only -mabi=ilp32d 2>&1 \ +-// RUN: | FileCheck --check-prefix=CHECK-LA64-ILP32D %s +- +-// CHECK-LA32-LP64S: error: unknown target ABI 'lp64s' +-// CHECK-LA32-LP64F: error: unknown target ABI 'lp64f' +-// CHECK-LA32-LP64D: error: unknown target ABI 'lp64d' +- +-// CHECK-LA64-ILP32S: error: unknown target ABI 'ilp32s' +-// CHECK-LA64-ILP32F: error: unknown target ABI 'ilp32f' +-// CHECK-LA64-ILP32D: error: unknown target ABI 'ilp32d' +diff --git a/clang/test/Driver/loongarch-abi.c b/clang/test/Driver/loongarch-abi.c +deleted file mode 100644 +index 12a81d664..000000000 +--- a/clang/test/Driver/loongarch-abi.c ++++ /dev/null +@@ -1,53 +0,0 @@ +-// RUN: %clang --target=loongarch32-unknown-elf %s -fsyntax-only -### 2>&1 \ +-// RUN: | FileCheck --check-prefix=ILP32D %s +-// RUN: %clang --target=loongarch32-unknown-elf %s -fsyntax-only -### -mabi=ilp32s 2>&1 \ +-// RUN: | FileCheck --check-prefix=ILP32S %s +-// RUN: %clang --target=loongarch32-unknown-elf %s -fsyntax-only -### -mabi=ilp32f 2>&1 \ +-// RUN: | FileCheck --check-prefix=ILP32F %s +-// RUN: %clang --target=loongarch32-unknown-elf %s -fsyntax-only -### -mabi=ilp32d 2>&1 \ +-// RUN: | FileCheck --check-prefix=ILP32D %s +- +-// RUN: %clang --target=loongarch64-unknown-elf %s -fsyntax-only -### 2>&1 \ +-// RUN: | FileCheck --check-prefix=LP64D %s +-// RUN: %clang --target=loongarch64-unknown-elf %s -fsyntax-only -### -mabi=lp64s 2>&1 \ +-// RUN: | FileCheck --check-prefix=LP64S %s +-// RUN: %clang --target=loongarch64-unknown-elf %s -fsyntax-only -### -mabi=lp64f 2>&1 \ +-// RUN: | FileCheck --check-prefix=LP64F %s +-// RUN: %clang --target=loongarch64-unknown-elf %s -fsyntax-only -### -mabi=lp64d 2>&1 \ +-// RUN: | FileCheck --check-prefix=LP64D %s +- +-// RUN: %clang --target=loongarch32-linux-gnusf %s -fsyntax-only -### 2>&1 \ +-// RUN: | FileCheck --check-prefix=ILP32S %s +-// RUN: %clang --target=loongarch32-linux-gnuf32 %s -fsyntax-only -### 2>&1 \ +-// RUN: | FileCheck --check-prefix=ILP32F %s +-// RUN: %clang --target=loongarch32-linux-gnuf64 %s -fsyntax-only -### 2>&1 \ +-// RUN: | FileCheck --check-prefix=ILP32D %s +-// RUN: %clang --target=loongarch32-linux-gnu %s -fsyntax-only -### 2>&1 \ +-// RUN: | FileCheck --check-prefix=ILP32D %s +- +-// RUN: %clang --target=loongarch64-linux-gnusf %s -fsyntax-only -### 2>&1 \ +-// RUN: | FileCheck --check-prefix=LP64S %s +-// RUN: %clang --target=loongarch64-linux-gnuf32 %s -fsyntax-only -### 2>&1 \ +-// RUN: | FileCheck --check-prefix=LP64F %s +-// RUN: %clang --target=loongarch64-linux-gnuf64 %s -fsyntax-only -### 2>&1 \ +-// RUN: | FileCheck --check-prefix=LP64D %s +-// RUN: %clang --target=loongarch64-linux-gnu %s -fsyntax-only -### 2>&1 \ +-// RUN: | FileCheck --check-prefix=LP64D %s +- +-// Check that -mabi prevails in case of conflicts with the triple-implied ABI. +-// RUN: %clang --target=loongarch32-linux-gnuf64 %s -fsyntax-only -### -mabi=ilp32s 2>&1 \ +-// RUN: | FileCheck --check-prefix=ILP32S %s +-// RUN: %clang --target=loongarch64-linux-gnuf64 %s -fsyntax-only -### -mabi=lp64s 2>&1 \ +-// RUN: | FileCheck --check-prefix=LP64S %s +-// RUN: %clang --target=loongarch32-linux-gnu %s -fsyntax-only -### -mabi=ilp32s 2>&1 \ +-// RUN: | FileCheck --check-prefix=ILP32S %s +-// RUN: %clang --target=loongarch64-linux-gnu %s -fsyntax-only -### -mabi=lp64s 2>&1 \ +-// RUN: | FileCheck --check-prefix=LP64S %s +- +-// ILP32S: "-target-abi" "ilp32s" +-// ILP32F: "-target-abi" "ilp32f" +-// ILP32D: "-target-abi" "ilp32d" +- +-// LP64S: "-target-abi" "lp64s" +-// LP64F: "-target-abi" "lp64f" +-// LP64D: "-target-abi" "lp64d" +diff --git a/clang/test/Driver/loongarch-alignment-feature.c b/clang/test/Driver/loongarch-alignment-feature.c +new file mode 100644 +index 000000000..2270ff536 +--- /dev/null ++++ b/clang/test/Driver/loongarch-alignment-feature.c +@@ -0,0 +1,8 @@ ++// RUN: %clang -target loongarch64-unknown-linux-gnu -mno-strict-align -### %s 2> %t ++// RUN: FileCheck --check-prefix=CHECK-UNALIGNED < %t %s ++ ++// RUN: %clang -target loongarch64-unknown-linux-gnu -mstrict-align -### %s 2> %t ++// RUN: FileCheck --check-prefix=CHECK-ALIGNED < %t %s ++ ++// CHECK-UNALIGNED: "-target-feature" "+unaligned-access" ++// CHECK-ALIGNED: "-target-feature" "-unaligned-access" +diff --git a/clang/test/Driver/loongarch-as.s b/clang/test/Driver/loongarch-as.s +deleted file mode 100644 +index 6f6d87f6a..000000000 +--- a/clang/test/Driver/loongarch-as.s ++++ /dev/null +@@ -1,15 +0,0 @@ +-/// This file checks options are correctly passed to as for LoongArch targets. +- +-/// Check `-mabi`. +-// RUN: %clang --target=loongarch64 -### -fno-integrated-as -c %s 2>&1 | \ +-// RUN: FileCheck -DABI=lp64d --check-prefix=ABI %s +-// RUN: %clang --target=loongarch64 -mabi=lp64d -### -fno-integrated-as -c %s 2>&1 | \ +-// RUN: FileCheck -DABI=lp64d --check-prefix=ABI %s +-// RUN: %clang --target=loongarch64 -mabi=lp64f -### -fno-integrated-as -c %s 2>&1 | \ +-// RUN: FileCheck -DABI=lp64f --check-prefix=ABI %s +-// RUN: %clang --target=loongarch64 -mabi=lp64s -### -fno-integrated-as -c %s 2>&1 | \ +-// RUN: FileCheck -DABI=lp64s --check-prefix=ABI %s +- +-// ALL: as +- +-// ABI: "-mabi=[[ABI]]" +diff --git a/clang/test/Driver/loongarch-default-features.c b/clang/test/Driver/loongarch-default-features.c +deleted file mode 100644 +index 3cdf3ba3d..000000000 +--- a/clang/test/Driver/loongarch-default-features.c ++++ /dev/null +@@ -1,9 +0,0 @@ +-// RUN: %clang --target=loongarch32 -S -emit-llvm %s -o - | FileCheck %s --check-prefix=LA32 +-// RUN: %clang --target=loongarch64 -S -emit-llvm %s -o - | FileCheck %s --check-prefix=LA64 +- +-// LA32: "target-features"="+32bit" +-// LA64: "target-features"="+64bit,+d,+f,+ual" +- +-int foo(void) { +- return 3; +-} +diff --git a/clang/test/Driver/loongarch-ias.s b/clang/test/Driver/loongarch-ias.s +deleted file mode 100644 +index 6fec9e6e1..000000000 +--- a/clang/test/Driver/loongarch-ias.s ++++ /dev/null +@@ -1,23 +0,0 @@ +-/// This file checks options are correctly passed to cc1as for LoongArch targets. +- +-/// Check `-target-abi`. +-// RUN: %clang --target=loongarch32 -### -fintegrated-as -c %s 2>&1 | \ +-// RUN: FileCheck -DABI=ilp32d --check-prefix=ABI %s +-// RUN: %clang --target=loongarch32 -mabi=ilp32d -### -fintegrated-as -c %s 2>&1 | \ +-// RUN: FileCheck -DABI=ilp32d --check-prefix=ABI %s +-// RUN: %clang --target=loongarch32 -mabi=ilp32f -### -fintegrated-as -c %s 2>&1 | \ +-// RUN: FileCheck -DABI=ilp32f --check-prefix=ABI %s +-// RUN: %clang --target=loongarch32 -mabi=ilp32s -### -fintegrated-as -c %s 2>&1 | \ +-// RUN: FileCheck -DABI=ilp32s --check-prefix=ABI %s +-// RUN: %clang --target=loongarch64 -### -fintegrated-as -c %s 2>&1 | \ +-// RUN: FileCheck -DABI=lp64d --check-prefix=ABI %s +-// RUN: %clang --target=loongarch64 -mabi=lp64d -### -fintegrated-as -c %s 2>&1 | \ +-// RUN: FileCheck -DABI=lp64d --check-prefix=ABI %s +-// RUN: %clang --target=loongarch64 -mabi=lp64f -### -fintegrated-as -c %s 2>&1 | \ +-// RUN: FileCheck -DABI=lp64f --check-prefix=ABI %s +-// RUN: %clang --target=loongarch64 -mabi=lp64s -### -fintegrated-as -c %s 2>&1 | \ +-// RUN: FileCheck -DABI=lp64s --check-prefix=ABI %s +- +-// ALL: -cc1as +- +-// ABI: "-target-abi" "[[ABI]]" +diff --git a/clang/test/Driver/loongarch-march-error.c b/clang/test/Driver/loongarch-march-error.c +deleted file mode 100644 +index 6bfaa18b0..000000000 +--- a/clang/test/Driver/loongarch-march-error.c ++++ /dev/null +@@ -1,7 +0,0 @@ +-// RUN: not %clang --target=loongarch64 -march=loongarch -fsyntax-only %s 2>&1 | \ +-// RUN: FileCheck -DCPU=loongarch %s +-// RUN: not %clang --target=loongarch64 -march=LA464 -fsyntax-only %s 2>&1 | \ +-// RUN: FileCheck -DCPU=LA464 %s +- +-// CHECK: error: unknown target CPU '[[CPU]]' +-// CHECK-NEXT: note: valid target CPU values are: {{.*}} +diff --git a/clang/test/Driver/loongarch-march.c b/clang/test/Driver/loongarch-march.c +index 9214130cd..8978b731d 100644 +--- a/clang/test/Driver/loongarch-march.c ++++ b/clang/test/Driver/loongarch-march.c +@@ -1,27 +1,18 @@ +-// RUN: %clang --target=loongarch64 -march=loongarch64 -fsyntax-only %s -### 2>&1 | \ +-// RUN: FileCheck %s --check-prefix=CC1-LOONGARCH64 +-// RUN: %clang --target=loongarch64 -march=la464 -fsyntax-only %s -### 2>&1 | \ +-// RUN: FileCheck %s --check-prefix=CC1-LA464 +-// RUN: %clang --target=loongarch64 -march=loongarch64 -S -emit-llvm %s -o - | \ +-// RUN: FileCheck %s --check-prefix=IR-LOONGARCH64 +-// RUN: %clang --target=loongarch64 -march=la464 -S -emit-llvm %s -o - | \ +-// RUN: FileCheck %s --check-prefix=IR-LA464 ++/// This test checks the valid cpu model which is supported by LoongArch. + +-// CC1-LOONGARCH64: "-target-cpu" "loongarch64" +-// CC1-LOONGARCH64-NOT: "-target-feature" +-// CC1-LOONGARCH64: "-target-feature" "+64bit" "-target-feature" "+f" "-target-feature" "+d" "-target-feature" "+ual" +-// CC1-LOONGARCH64-NOT: "-target-feature" +-// CC1-LOONGARCH64: "-target-abi" "lp64d" ++// RUN: %clang --target=loongarch64 -march=la264 -fsyntax-only %s -### 2>&1 \ ++// RUN: | FileCheck -check-prefixes=CPU,ABI %s -DCPU=la264 -DABI=lp64 ++// RUN: %clang --target=loongarch64 -march=la364 -fsyntax-only %s -### 2>&1 \ ++// RUN: | FileCheck -check-prefixes=CPU,ABI %s -DCPU=la364 -DABI=lp64 ++// RUN: %clang --target=loongarch64 -march=la464 -fsyntax-only %s -### 2>&1 \ ++// RUN: | FileCheck -check-prefixes=CPU,ABI %s -DCPU=la464 -DABI=lp64 ++// RUN: %clang --target=loongarch64 -march=la664 -fsyntax-only %s -### 2>&1 \ ++// RUN: | FileCheck -check-prefixes=CPU,ABI %s -DCPU=la664 -DABI=lp64 + +-// CC1-LA464: "-target-cpu" "la464" +-// CC1-LA464-NOT: "-target-feature" +-// CC1-LA464: "-target-feature" "+64bit" "-target-feature" "+f" "-target-feature" "+d" "-target-feature" "+lsx" "-target-feature" "+lasx" "-target-feature" "+ual" +-// CC1-LA464-NOT: "-target-feature" +-// CC1-LA464: "-target-abi" "lp64d" ++// RUN: not %clang --target=loongarch64 -march=xxx -fsyntax-only %s 2>&1 \ ++// RUN: | FileCheck -check-prefix=INVALID %s + +-// IR-LOONGARCH64: attributes #[[#]] ={{.*}}"target-cpu"="loongarch64" {{.*}}"target-features"="+64bit,+d,+f,+ual" +-// IR-LA464: attributes #[[#]] ={{.*}}"target-cpu"="la464" {{.*}}"target-features"="+64bit,+d,+f,+lasx,+lsx,+ual" ++// CPU: "-target-cpu" "[[CPU]]" ++// ABI: "-target-abi" "[[ABI]]" + +-int foo(void) { +- return 3; +-} ++// INVALID: error: unknown target CPU 'xxx' +diff --git a/clang/test/Driver/loongarch-mdouble-float.c b/clang/test/Driver/loongarch-mdouble-float.c +deleted file mode 100644 +index caa9ab42c..000000000 +--- a/clang/test/Driver/loongarch-mdouble-float.c ++++ /dev/null +@@ -1,24 +0,0 @@ +-// RUN: %clang --target=loongarch64 -mdouble-float -fsyntax-only %s -### 2>&1 | \ +-// RUN: FileCheck %s --check-prefix=CC1 +-// RUN: %clang --target=loongarch64 -mdouble-float -mfpu=64 -mabi=lp64d -fsyntax-only %s -### 2>&1 | \ +-// RUN: FileCheck %s --check-prefixes=CC1,NOWARN +-// RUN: %clang --target=loongarch64 -mdouble-float -mfpu=0 -mabi=lp64s -fsyntax-only %s -### 2>&1 | \ +-// RUN: FileCheck %s --check-prefixes=CC1,WARN,WARN-FPU0 +-// RUN: %clang --target=loongarch64 -mdouble-float -mfpu=none -mabi=lp64s -fsyntax-only %s -### 2>&1 | \ +-// RUN: FileCheck %s --check-prefixes=CC1,WARN,WARN-FPUNONE +-// RUN: %clang --target=loongarch64 -mdouble-float -S -emit-llvm %s -o - | \ +-// RUN: FileCheck %s --check-prefix=IR +- +-// NOWARN-NOT: warning: +-// WARN: warning: ignoring '-mabi=lp64s' as it conflicts with that implied by '-mdouble-float' (lp64d) +-// WARN-FPU0: warning: ignoring '-mfpu=0' as it conflicts with that implied by '-mdouble-float' (64) +-// WARN-FPUNONE: warning: ignoring '-mfpu=none' as it conflicts with that implied by '-mdouble-float' (64) +- +-// CC1: "-target-feature" "+f"{{.*}} "-target-feature" "+d" +-// CC1: "-target-abi" "lp64d" +- +-// IR: attributes #[[#]] ={{.*}}"target-features"="{{(.*,)?}}+d,{{(.*,)?}}+f{{(,.*)?}}" +- +-int foo(void) { +- return 3; +-} +diff --git a/clang/test/Driver/loongarch-mfpu-error.c b/clang/test/Driver/loongarch-mfpu-error.c +deleted file mode 100644 +index 88a6c7487..000000000 +--- a/clang/test/Driver/loongarch-mfpu-error.c ++++ /dev/null +@@ -1,3 +0,0 @@ +-// RUN: not %clang --target=loongarch64 -mfpu=xxx %s -### 2>&1 | FileCheck %s +- +-// CHECK: invalid argument 'xxx' to -mfpu=; must be one of: 64, 32, none, 0 (alias for none) +diff --git a/clang/test/Driver/loongarch-mfpu.c b/clang/test/Driver/loongarch-mfpu.c +deleted file mode 100644 +index 753057d70..000000000 +--- a/clang/test/Driver/loongarch-mfpu.c ++++ /dev/null +@@ -1,34 +0,0 @@ +-// RUN: %clang --target=loongarch64 -mfpu=64 -fsyntax-only %s -### 2>&1 | \ +-// RUN: FileCheck %s --check-prefix=CC1-FPU64 +-// RUN: %clang --target=loongarch64 -mfpu=32 -fsyntax-only %s -### 2>&1 | \ +-// RUN: FileCheck %s --check-prefix=CC1-FPU32 +-// RUN: %clang --target=loongarch64 -mfpu=0 -fsyntax-only %s -### 2>&1 | \ +-// RUN: FileCheck %s --check-prefix=CC1-FPU0 +-// RUN: %clang --target=loongarch64 -mfpu=none -fsyntax-only %s -### 2>&1 | \ +-// RUN: FileCheck %s --check-prefix=CC1-FPU0 +- +-// RUN: %clang --target=loongarch64 -mfpu=64 -S -emit-llvm %s -o - | \ +-// RUN: FileCheck %s --check-prefix=IR-FPU64 +-// RUN: %clang --target=loongarch64 -mfpu=32 -S -emit-llvm %s -o - | \ +-// RUN: FileCheck %s --check-prefix=IR-FPU32 +-// RUN: %clang --target=loongarch64 -mfpu=0 -S -emit-llvm %s -o - | \ +-// RUN: FileCheck %s --check-prefix=IR-FPU0 +-// RUN: %clang --target=loongarch64 -mfpu=none -S -emit-llvm %s -o - | \ +-// RUN: FileCheck %s --check-prefix=IR-FPU0 +- +-// CC1-FPU64: "-target-feature" "+f"{{.*}} "-target-feature" "+d" +-// CC1-FPU64: "-target-abi" "lp64d" +- +-// CC1-FPU32: "-target-feature" "+f"{{.*}} "-target-feature" "-d" +-// CC1-FPU32: "-target-abi" "lp64f" +- +-// CC1-FPU0: "-target-feature" "-f"{{.*}} "-target-feature" "-d" +-// CC1-FPU0: "-target-abi" "lp64s" +- +-// IR-FPU64: attributes #[[#]] ={{.*}}"target-features"="{{(.*,)?}}+d,{{(.*,)?}}+f{{(,.*)?}}" +-// IR-FPU32: attributes #[[#]] ={{.*}}"target-features"="{{(.*,)?}}+f,{{(.*,)?}}-d{{(,.*)?}}" +-// IR-FPU0: attributes #[[#]] ={{.*}}"target-features"="{{(.*,)?}}-d,{{(.*,)?}}-f{{(,.*)?}}" +- +-int foo(void) { +- return 3; +-} +diff --git a/clang/test/Driver/loongarch-mlasx-error.c b/clang/test/Driver/loongarch-mlasx-error.c +deleted file mode 100644 +index e66f277f7..000000000 +--- a/clang/test/Driver/loongarch-mlasx-error.c ++++ /dev/null +@@ -1,15 +0,0 @@ +-// RUN: not %clang --target=loongarch64 %s -fsyntax-only -mlasx -msingle-float 2>&1 \ +-// RUN: FileCheck --check-prefix=ERROR_LASX_FPU64 %s +-// RUN: not %clang --target=loongarch64 %s -fsyntax-only -mlasx -msoft-float 2>&1 \ +-// RUN: FileCheck --check-prefix=ERROR_LASX_FPU64 %s +-// RUN: not %clang --target=loongarch64 %s -fsyntax-only -mlasx -mfpu=32 2>&1 \ +-// RUN: FileCheck --check-prefix=ERROR_LASX_FPU64 %s +-// RUN: not %clang --target=loongarch64 %s -fsyntax-only -mlasx -mfpu=0 2>&1 \ +-// RUN: FileCheck --check-prefix=ERROR_LASX_FPU64 %s +-// RUN: not %clang --target=loongarch64 %s -fsyntax-only -mlasx -mfpu=none 2>&1 \ +-// RUN: FileCheck --check-prefix=ERROR_LASX_FPU64 %s +-// RUN: not %clang --target=loongarch64 %s -fsyntax-only -mlasx -mno-lsx 2>&1 \ +-// RUN: FileCheck --check-prefix=ERROR_LASX_FPU128 %s +- +-// ERROR_LASX_FPU64: error: wrong fpu width; LASX depends on 64-bit FPU. +-// ERROR_LASX_FPU128: error: invalid option combination; LASX depends on LSX. +diff --git a/clang/test/Driver/loongarch-mlasx.c b/clang/test/Driver/loongarch-mlasx.c +deleted file mode 100644 +index 0b934f125..000000000 +--- a/clang/test/Driver/loongarch-mlasx.c ++++ /dev/null +@@ -1,37 +0,0 @@ +-/// Test -m[no-]lasx options. +- +-// RUN: %clang --target=loongarch64 -mlasx -fsyntax-only %s -### 2>&1 | \ +-// RUN: FileCheck %s --check-prefix=CC1-LASX +-// RUN: %clang --target=loongarch64 -mno-lasx -fsyntax-only %s -### 2>&1 | \ +-// RUN: FileCheck %s --check-prefix=CC1-NOLASX +-// RUN: %clang --target=loongarch64 -mlasx -mno-lasx -fsyntax-only %s -### 2>&1 | \ +-// RUN: FileCheck %s --check-prefix=CC1-NOLASX +-// RUN: %clang --target=loongarch64 -mno-lasx -mlasx -fsyntax-only %s -### 2>&1 | \ +-// RUN: FileCheck %s --check-prefix=CC1-LASX +-// RUN: %clang --target=loongarch64 -mlsx -mlasx -fsyntax-only %s -### 2>&1 | \ +-// RUN: FileCheck %s --check-prefix=CC1-LASX +-// RUN: %clang --target=loongarch64 -mlasx -mlsx -fsyntax-only %s -### 2>&1 | \ +-// RUN: FileCheck %s --check-prefix=CC1-LASX +- +-// RUN: %clang --target=loongarch64 -mlasx -S -emit-llvm %s -o - | \ +-// RUN: FileCheck %s --check-prefix=IR-LASX +-// RUN: %clang --target=loongarch64 -mno-lasx -S -emit-llvm %s -o - | \ +-// RUN: FileCheck %s --check-prefix=IR-NOLASX +-// RUN: %clang --target=loongarch64 -mlasx -mno-lasx -S -emit-llvm %s -o - | \ +-// RUN: FileCheck %s --check-prefix=IR-NOLASX +-// RUN: %clang --target=loongarch64 -mno-lasx -mlasx -S -emit-llvm %s -o - | \ +-// RUN: FileCheck %s --check-prefix=IR-LASX +-// RUN: %clang --target=loongarch64 -mlsx -mlasx -S -emit-llvm %s -o - | \ +-// RUN: FileCheck %s --check-prefix=IR-LASX +-// RUN: %clang --target=loongarch64 -mlasx -mlsx -S -emit-llvm %s -o - | \ +-// RUN: FileCheck %s --check-prefix=IR-LASX +- +-// CC1-LASX: "-target-feature" "+lsx" "-target-feature" "+lasx" +-// CC1-NOLASX: "-target-feature" "-lasx" +- +-// IR-LASX: attributes #[[#]] ={{.*}}"target-features"="{{(.*,)?}}+lasx{{(,.*)?}}" +-// IR-NOLASX: attributes #[[#]] ={{.*}}"target-features"="{{(.*,)?}}-lasx{{(,.*)?}}" +- +-int foo(void){ +- return 3; +-} +diff --git a/clang/test/Driver/loongarch-mlsx-error.c b/clang/test/Driver/loongarch-mlsx-error.c +deleted file mode 100644 +index bd6b8e271..000000000 +--- a/clang/test/Driver/loongarch-mlsx-error.c ++++ /dev/null +@@ -1,12 +0,0 @@ +-// RUN: not %clang --target=loongarch64 %s -fsyntax-only -mlsx -msingle-float 2>&1 \ +-// RUN: FileCheck --check-prefix=ERROR_LSX_FPU64 %s +-// RUN: not %clang --target=loongarch64 %s -fsyntax-only -mlsx -msoft-float 2>&1 \ +-// RUN: FileCheck --check-prefix=ERROR_LSX_FPU64 %s +-// RUN: not %clang --target=loongarch64 %s -fsyntax-only -mlsx -mfpu=32 2>&1 \ +-// RUN: FileCheck --check-prefix=ERROR_LSX_FPU64 %s +-// RUN: not %clang --target=loongarch64 %s -fsyntax-only -mlsx -mfpu=0 2>&1 \ +-// RUN: FileCheck --check-prefix=ERROR_LSX_FPU64 %s +-// RUN: not %clang --target=loongarch64 %s -fsyntax-only -mlsx -mfpu=none 2>&1 \ +-// RUN: FileCheck --check-prefix=ERROR_LSX_FPU64 %s +- +-// ERROR_LSX_FPU64: error: wrong fpu width; LSX depends on 64-bit FPU. +diff --git a/clang/test/Driver/loongarch-mlsx.c b/clang/test/Driver/loongarch-mlsx.c +deleted file mode 100644 +index 7d4307b07..000000000 +--- a/clang/test/Driver/loongarch-mlsx.c ++++ /dev/null +@@ -1,41 +0,0 @@ +-/// Test -m[no-]lsx options. +- +-// RUN: %clang --target=loongarch64 -mlsx -fsyntax-only %s -### 2>&1 | \ +-// RUN: FileCheck %s --check-prefix=CC1-LSX +-// RUN: %clang --target=loongarch64 -mno-lsx -fsyntax-only %s -### 2>&1 | \ +-// RUN: FileCheck %s --check-prefix=CC1-NOLSX +-// RUN: %clang --target=loongarch64 -mlsx -mno-lsx -fsyntax-only %s -### 2>&1 | \ +-// RUN: FileCheck %s --check-prefix=CC1-NOLSX +-// RUN: %clang --target=loongarch64 -mno-lsx -mlsx -fsyntax-only %s -### 2>&1 | \ +-// RUN: FileCheck %s --check-prefix=CC1-LSX +-// RUN: %clang --target=loongarch64 -mlsx -mno-lasx -fsyntax-only %s -### 2>&1 | \ +-// RUN: FileCheck %s --check-prefix=CC1-LSX +-// RUN: %clang --target=loongarch64 -mno-lasx -mlsx -fsyntax-only %s -### 2>&1 | \ +-// RUN: FileCheck %s --check-prefix=CC1-LSX +-// RUN: %clang --target=loongarch64 -mno-lsx -mno-lasx -fsyntax-only %s -### 2>&1 | \ +-// RUN: FileCheck %s --check-prefix=CC1-NOLSX +- +-// RUN: %clang --target=loongarch64 -mlsx -S -emit-llvm %s -o - | \ +-// RUN: FileCheck %s --check-prefix=IR-LSX +-// RUN: %clang --target=loongarch64 -mno-lsx -S -emit-llvm %s -o - | \ +-// RUN: FileCheck %s --check-prefix=IR-NOLSX +-// RUN: %clang --target=loongarch64 -mlsx -mno-lsx -S -emit-llvm %s -o - | \ +-// RUN: FileCheck %s --check-prefix=IR-NOLSX +-// RUN: %clang --target=loongarch64 -mno-lsx -mlsx -S -emit-llvm %s -o - | \ +-// RUN: FileCheck %s --check-prefix=IR-LSX +-// RUN: %clang --target=loongarch64 -mlsx -mno-lasx -S -emit-llvm %s -o - | \ +-// RUN: FileCheck %s --check-prefix=IR-LSX +-// RUN: %clang --target=loongarch64 -mno-lasx -mlsx -S -emit-llvm %s -o - | \ +-// RUN: FileCheck %s --check-prefix=IR-LSX +-// RUN: %clang --target=loongarch64 -mno-lsx -mno-lasx -S -emit-llvm %s -o - | \ +-// RUN: FileCheck %s --check-prefix=IR-NOLSX +- +-// CC1-LSX: "-target-feature" "+lsx" +-// CC1-NOLSX: "-target-feature" "-lsx" +- +-// IR-LSX: attributes #[[#]] ={{.*}}"target-features"="{{(.*,)?}}+lsx{{(,.*)?}}" +-// IR-NOLSX: attributes #[[#]] ={{.*}}"target-features"="{{(.*,)?}}-lsx{{(,.*)?}}" +- +-int foo(void){ +- return 3; +-} +diff --git a/clang/test/Driver/loongarch-msingle-float.c b/clang/test/Driver/loongarch-msingle-float.c +deleted file mode 100644 +index bd9b3e8a8..000000000 +--- a/clang/test/Driver/loongarch-msingle-float.c ++++ /dev/null +@@ -1,21 +0,0 @@ +-// RUN: %clang --target=loongarch64 -msingle-float -fsyntax-only %s -### 2>&1 | \ +-// RUN: FileCheck %s --check-prefix=CC1 +-// RUN: %clang --target=loongarch64 -msingle-float -mfpu=32 -mabi=lp64f -fsyntax-only %s -### 2>&1 | \ +-// RUN: FileCheck %s --check-prefixes=CC1,NOWARN +-// RUN: %clang --target=loongarch64 -msingle-float -mfpu=64 -mabi=lp64s -fsyntax-only %s -### 2>&1 | \ +-// RUN: FileCheck %s --check-prefixes=CC1,WARN +-// RUN: %clang --target=loongarch64 -msingle-float -S -emit-llvm %s -o - | \ +-// RUN: FileCheck %s --check-prefix=IR +- +-// NOWARN-NOT: warning: +-// WARN: warning: ignoring '-mabi=lp64s' as it conflicts with that implied by '-msingle-float' (lp64f) +-// WARN: warning: ignoring '-mfpu=64' as it conflicts with that implied by '-msingle-float' (32) +- +-// CC1: "-target-feature" "+f"{{.*}} "-target-feature" "-d" +-// CC1: "-target-abi" "lp64f" +- +-// IR: attributes #[[#]] ={{.*}}"target-features"="{{(.*,)?}}+f,{{(.*,)?}}-d" +- +-int foo(void) { +- return 3; +-} +diff --git a/clang/test/Driver/loongarch-msoft-float.c b/clang/test/Driver/loongarch-msoft-float.c +deleted file mode 100644 +index 0e5121ac8..000000000 +--- a/clang/test/Driver/loongarch-msoft-float.c ++++ /dev/null +@@ -1,21 +0,0 @@ +-// RUN: %clang --target=loongarch64 -msoft-float -fsyntax-only %s -### 2>&1 | \ +-// RUN: FileCheck %s --check-prefix=CC1 +-// RUN: %clang --target=loongarch64 -msoft-float -mfpu=0 -mabi=lp64s -fsyntax-only %s -### 2>&1 | \ +-// RUN: FileCheck %s --check-prefixes=CC1,NOWARN +-// RUN: %clang --target=loongarch64 -msoft-float -mfpu=64 -mabi=lp64d -fsyntax-only %s -### 2>&1 | \ +-// RUN: FileCheck %s --check-prefixes=CC1,WARN +-// RUN: %clang --target=loongarch64 -msoft-float -S -emit-llvm %s -o - | \ +-// RUN: FileCheck %s --check-prefix=IR +- +-// NOWARN-NOT: warning: +-// WARN: warning: ignoring '-mabi=lp64d' as it conflicts with that implied by '-msoft-float' (lp64s) +-// WARN: warning: ignoring '-mfpu=64' as it conflicts with that implied by '-msoft-float' (0) +- +-// CC1: "-target-feature" "-f"{{.*}} "-target-feature" "-d" +-// CC1: "-target-abi" "lp64s" +- +-// IR: attributes #[[#]] ={{.*}}"target-features"="{{(.*,)?}}-d,{{(.*,)?}}-f{{(,.*)?}}" +- +-int foo(void) { +- return 3; +-} +diff --git a/clang/test/Driver/loongarch-mtune.c b/clang/test/Driver/loongarch-mtune.c +deleted file mode 100644 +index 6f3f39e9b..000000000 +--- a/clang/test/Driver/loongarch-mtune.c ++++ /dev/null +@@ -1,34 +0,0 @@ +-// RUN: %clang --target=loongarch64 -mtune=loongarch64 -fsyntax-only %s -### 2>&1 | \ +-// RUN: FileCheck %s --check-prefix=CC1ARG -DCPU=loongarch64 +-// RUN: %clang --target=loongarch64 -mtune=loongarch64 -S -emit-llvm %s -o - | \ +-// RUN: FileCheck %s --check-prefix=IRATTR -DCPU=loongarch64 +- +-// RUN: %clang --target=loongarch64 -mtune=la464 -fsyntax-only %s -### 2>&1 | \ +-// RUN: FileCheck %s --check-prefix=CC1ARG -DCPU=la464 +-// RUN: %clang --target=loongarch64 -mtune=la464 -S -emit-llvm %s -o - | \ +-// RUN: FileCheck %s --check-prefix=IRATTR -DCPU=la464 +- +-// RUN: %clang --target=loongarch64 -mtune=invalidcpu -fsyntax-only %s -### 2>&1 | \ +-// RUN: FileCheck %s --check-prefix=CC1ARG -DCPU=invalidcpu +-// RUN: not %clang --target=loongarch64 -mtune=invalidcpu -S -emit-llvm %s -o /dev/null 2>&1 | \ +-// RUN: FileCheck %s --check-prefix=ERROR -DCPU=invalidcpu +- +-// RUN: %clang --target=loongarch64 -mtune=generic -fsyntax-only %s -### 2>&1 | \ +-// RUN: FileCheck %s --check-prefix=CC1ARG -DCPU=generic +-// RUN: not %clang --target=loongarch64 -mtune=generic -S -emit-llvm %s -o /dev/null 2>&1 | \ +-// RUN: FileCheck %s --check-prefix=ERROR -DCPU=generic +- +-// RUN: %clang --target=loongarch64 -mtune=generic-la64 -fsyntax-only %s -### 2>&1 | \ +-// RUN: FileCheck %s --check-prefix=CC1ARG -DCPU=generic-la64 +-// RUN: not %clang --target=loongarch64 -mtune=generic-la64 -S -emit-llvm %s -o /dev/null 2>&1 | \ +-// RUN: FileCheck %s --check-prefix=ERROR -DCPU=generic-la64 +- +-// CC1ARG: "-tune-cpu" "[[CPU]]" +-// IRATTR: "tune-cpu"="[[CPU]]" +- +-// ERROR: error: unknown target CPU '[[CPU]]' +-// ERROR-NEXT: note: valid target CPU values are: {{.*}} +- +-int foo(void) { +- return 3; +-} +diff --git a/clang/test/Driver/loongarch-munaligned-access.c b/clang/test/Driver/loongarch-munaligned-access.c +deleted file mode 100644 +index 44edb2eb1..000000000 +--- a/clang/test/Driver/loongarch-munaligned-access.c ++++ /dev/null +@@ -1,61 +0,0 @@ +-/// Test -m[no-]unaligned-access and -m[no-]strict-align options. +- +-// RUN: %clang --target=loongarch64 -munaligned-access -fsyntax-only %s -### 2>&1 | \ +-// RUN: FileCheck %s --check-prefix=CC1-UNALIGNED +-// RUN: %clang --target=loongarch64 -mno-unaligned-access -fsyntax-only %s -### 2>&1 | \ +-// RUN: FileCheck %s --check-prefix=CC1-NO-UNALIGNED +-// RUN: %clang --target=loongarch64 -mstrict-align -fsyntax-only %s -### 2>&1 | \ +-// RUN: FileCheck %s --check-prefix=CC1-NO-UNALIGNED +-// RUN: %clang --target=loongarch64 -mno-strict-align -fsyntax-only %s -### 2>&1 | \ +-// RUN: FileCheck %s --check-prefix=CC1-UNALIGNED +-// RUN: %clang --target=loongarch64 -munaligned-access -mno-unaligned-access -fsyntax-only %s -### 2>&1 | \ +-// RUN: FileCheck %s --check-prefix=CC1-NO-UNALIGNED +-// RUN: %clang --target=loongarch64 -mno-unaligned-access -munaligned-access -fsyntax-only %s -### 2>&1 | \ +-// RUN: FileCheck %s --check-prefix=CC1-UNALIGNED +-// RUN: %clang --target=loongarch64 -mstrict-align -mno-strict-align -fsyntax-only %s -### 2>&1 | \ +-// RUN: FileCheck %s --check-prefix=CC1-UNALIGNED +-// RUN: %clang --target=loongarch64 -mno-strict-align -mstrict-align -fsyntax-only %s -### 2>&1 | \ +-// RUN: FileCheck %s --check-prefix=CC1-NO-UNALIGNED +-// RUN: %clang --target=loongarch64 -munaligned-access -mstrict-align -fsyntax-only %s -### 2>&1 | \ +-// RUN: FileCheck %s --check-prefix=CC1-NO-UNALIGNED +-// RUN: %clang --target=loongarch64 -mstrict-align -munaligned-access -fsyntax-only %s -### 2>&1 | \ +-// RUN: FileCheck %s --check-prefix=CC1-UNALIGNED +-// RUN: %clang --target=loongarch64 -mno-unaligned-access -mno-strict-align -fsyntax-only %s -### 2>&1 | \ +-// RUN: FileCheck %s --check-prefix=CC1-UNALIGNED +-// RUN: %clang --target=loongarch64 -mno-strict-align -mno-unaligned-access -fsyntax-only %s -### 2>&1 | \ +-// RUN: FileCheck %s --check-prefix=CC1-NO-UNALIGNED +- +-// RUN: %clang --target=loongarch64 -munaligned-access -S -emit-llvm %s -o - | \ +-// RUN: FileCheck %s --check-prefix=IR-UNALIGNED +-// RUN: %clang --target=loongarch64 -mno-unaligned-access -S -emit-llvm %s -o - | \ +-// RUN: FileCheck %s --check-prefix=IR-NO-UNALIGNED +-// RUN: %clang --target=loongarch64 -mstrict-align -S -emit-llvm %s -o - | \ +-// RUN: FileCheck %s --check-prefix=IR-NO-UNALIGNED +-// RUN: %clang --target=loongarch64 -mno-strict-align -S -emit-llvm %s -o - | \ +-// RUN: FileCheck %s --check-prefix=IR-UNALIGNED +-// RUN: %clang --target=loongarch64 -munaligned-access -mno-unaligned-access -S -emit-llvm %s -o - | \ +-// RUN: FileCheck %s --check-prefix=IR-NO-UNALIGNED +-// RUN: %clang --target=loongarch64 -mno-unaligned-access -munaligned-access -S -emit-llvm %s -o - | \ +-// RUN: FileCheck %s --check-prefix=IR-UNALIGNED +-// RUN: %clang --target=loongarch64 -mstrict-align -mno-strict-align -S -emit-llvm %s -o - | \ +-// RUN: FileCheck %s --check-prefix=IR-UNALIGNED +-// RUN: %clang --target=loongarch64 -mno-strict-align -mstrict-align -S -emit-llvm %s -o - | \ +-// RUN: FileCheck %s --check-prefix=IR-NO-UNALIGNED +-// RUN: %clang --target=loongarch64 -munaligned-access -mstrict-align -S -emit-llvm %s -o - | \ +-// RUN: FileCheck %s --check-prefix=IR-NO-UNALIGNED +-// RUN: %clang --target=loongarch64 -mstrict-align -munaligned-access -S -emit-llvm %s -o - | \ +-// RUN: FileCheck %s --check-prefix=IR-UNALIGNED +-// RUN: %clang --target=loongarch64 -mno-unaligned-access -mno-strict-align -S -emit-llvm %s -o - | \ +-// RUN: FileCheck %s --check-prefix=IR-UNALIGNED +-// RUN: %clang --target=loongarch64 -mno-strict-align -mno-unaligned-access -S -emit-llvm %s -o - | \ +-// RUN: FileCheck %s --check-prefix=IR-NO-UNALIGNED +- +-// CC1-UNALIGNED: "-target-feature" "+ual" +-// CC1-NO-UNALIGNED: "-target-feature" "-ual" +- +-// IR-UNALIGNED: attributes #[[#]] ={{.*}}"target-features"="{{(.*,)?}}+ual{{(,.*)?}}" +-// IR-NO-UNALIGNED: attributes #[[#]] ={{.*}}"target-features"="{{(.*,)?}}-ual{{(,.*)?}}" +- +-int foo(void) { +- return 3; +-} +diff --git a/clang/test/Driver/loongarch-toolchain.c b/clang/test/Driver/loongarch-toolchain.c +index 6d62b1101..62da40265 100644 +--- a/clang/test/Driver/loongarch-toolchain.c ++++ b/clang/test/Driver/loongarch-toolchain.c +@@ -1,27 +1,15 @@ +-// UNSUPPORTED: system-windows +-/// A basic clang -cc1 command-line, and simple environment check. ++/// Check the behavior of toolchain for LoongArch. ++/// REQUIRES: loongarch-registered-target ++/// UNSUPPORTED: system-windows + +-// RUN: %clang %s -### --target=loongarch32 2>&1 | FileCheck --check-prefix=CC1 %s -DTRIPLE=loongarch32 +-// RUN: %clang %s -### --target=loongarch64 2>&1 | FileCheck --check-prefix=CC1 %s -DTRIPLE=loongarch64 ++///----------------------------------------------------------------------------- ++/// Checking dwarf-version + +-// CC1: "-cc1" "-triple" "[[TRIPLE]]" ++// RUN: %clang -### -g --target=loongarch64-linux %s 2>&1 | FileCheck -check-prefix=DWARF_VER %s ++// DWARF_VER: "-dwarf-version=4" + +-/// In the below tests, --rtlib=platform is used so that the driver ignores +-/// the configure-time CLANG_DEFAULT_RTLIB option when choosing the runtime lib. ++///----------------------------------------------------------------------------- ++/// Checking the "-X" options is passed to linker (ld or lld) + +-// RUN: env "PATH=" %clang -### %s -fuse-ld=ld -no-pie -mabi=lp64d \ +-// RUN: --target=loongarch64-unknown-linux-gnu --rtlib=platform --unwindlib=platform \ +-// RUN: --gcc-toolchain=%S/Inputs/multilib_loongarch_linux_sdk \ +-// RUN: --sysroot=%S/Inputs/multilib_loongarch_linux_sdk/sysroot 2>&1 \ +-// RUN: | FileCheck --check-prefix=LA64 %s +- +-// LA64: "{{.*}}/Inputs/multilib_loongarch_linux_sdk/lib/gcc/loongarch64-unknown-linux-gnu/12.1.0/../../../../loongarch64-unknown-linux-gnu/bin/ld" +-// LA64-SAME: {{^}} "--sysroot={{.*}}/Inputs/multilib_loongarch_linux_sdk/sysroot" +-// LA64-SAME: "-m" "elf64loongarch" +-// LA64-SAME: "-dynamic-linker" "/lib64/ld-linux-loongarch-lp64d.so.1" +-// LA64-SAME: "{{.*}}/Inputs/multilib_loongarch_linux_sdk/lib/gcc/loongarch64-unknown-linux-gnu/12.1.0/crtbegin.o" +-// LA64-SAME: "-L{{.*}}/Inputs/multilib_loongarch_linux_sdk/lib/gcc/loongarch64-unknown-linux-gnu/12.1.0" +-// LA64-SAME: {{^}} "-L{{.*}}/Inputs/multilib_loongarch_linux_sdk/lib/gcc/loongarch64-unknown-linux-gnu/12.1.0/../../../../loongarch64-unknown-linux-gnu/lib/../lib64" +-// LA64-SAME: {{^}} "-L{{.*}}/Inputs/multilib_loongarch_linux_sdk/sysroot/usr/lib/../lib64" +-// LA64-SAME: {{^}} "-L{{.*}}/Inputs/multilib_loongarch_linux_sdk/lib/gcc/loongarch64-unknown-linux-gnu/12.1.0/../../../../loongarch64-unknown-linux-gnu/lib" +-// LA64-SAME: {{^}} "-L{{.*}}/Inputs/multilib_loongarch_linux_sdk/sysroot/usr/lib" ++// RUN: %clang -### -g --target=loongarch64-linux %s 2>&1 | FileCheck -check-prefix=LINKER_X_OPT %s ++// LINKER_X_OPT: "-X" +diff --git a/clang/test/Driver/mcmodel.c b/clang/test/Driver/mcmodel.c +index 1eb6ae16f..d8a41b0f5 100644 +--- a/clang/test/Driver/mcmodel.c ++++ b/clang/test/Driver/mcmodel.c +@@ -15,14 +15,6 @@ + // RUN: not %clang -### -c --target=aarch64 -mcmodel=medium %s 2>&1 | FileCheck --check-prefix=ERR-MEDIUM %s + // RUN: not %clang -### -c --target=aarch64 -mcmodel=kernel %s 2>&1 | FileCheck --check-prefix=ERR-KERNEL %s + // RUN: not %clang --target=aarch64_32-linux -### -S -mcmodel=small %s 2>&1 | FileCheck --check-prefix=ERR-AARCH64_32 %s +-// RUN: %clang --target=loongarch64 -### -S -mcmodel=normal %s 2>&1 | FileCheck --check-prefix=SMALL %s +-// RUN: %clang --target=loongarch64 -### -S -mcmodel=medium %s 2>&1 | FileCheck --check-prefix=MEDIUM %s +-// RUN: %clang --target=loongarch64 -### -S -mcmodel=extreme %s 2>&1 | FileCheck --check-prefix=LARGE %s +-// RUN: not %clang --target=loongarch64 -### -S -mcmodel=tiny %s 2>&1 | FileCheck --check-prefix=ERR-TINY %s +-// RUN: not %clang --target=loongarch64 -### -S -mcmodel=small %s 2>&1 | FileCheck --check-prefix=ERR-SMALL %s +-// RUN: not %clang --target=loongarch64 -### -S -mcmodel=kernel %s 2>&1 | FileCheck --check-prefix=ERR-KERNEL %s +-// RUN: not %clang --target=loongarch64 -### -S -mcmodel=large %s 2>&1 | FileCheck --check-prefix=ERR-LARGE %s +-// RUN: not %clang --target=loongarch64 -### -S -mcmodel=extreme -fplt %s 2>&1 | FileCheck --check-prefix=ERR-LOONGARCH64-PLT-EXTREME %s + + // TINY: "-mcmodel=tiny" + // SMALL: "-mcmodel=small" +@@ -33,14 +25,9 @@ + + // INVALID: error: unsupported argument 'lager' to option '-mcmodel=' for target '{{.*}}' + +-// ERR-TINY: error: unsupported argument 'tiny' to option '-mcmodel=' for target '{{.*}}' +-// ERR-SMALL: error: unsupported argument 'small' to option '-mcmodel=' for target '{{.*}}' + // ERR-MEDIUM: error: unsupported argument 'medium' to option '-mcmodel=' for target '{{.*}}' + // ERR-KERNEL: error: unsupported argument 'kernel' to option '-mcmodel=' for target '{{.*}}' + // ERR-LARGE: error: unsupported argument 'large' to option '-mcmodel=' for target '{{.*}}' + + // AARCH64-PIC-LARGE: error: invalid argument '-mcmodel=large' only allowed with '-fno-pic' + // ERR-AARCH64_32: error: unsupported argument 'small' to option '-mcmodel=' for target 'aarch64_32-unknown-linux' +- +-// ERR-LOONGARCH64-PLT-LARGE: error: invalid argument '-mcmodel=large' not allowed with '-fplt' +-// ERR-LOONGARCH64-PLT-EXTREME: error: invalid argument '-mcmodel=extreme' not allowed with '-fplt' +diff --git a/clang/test/Driver/munaligned-access-unused.c b/clang/test/Driver/munaligned-access-unused.c +deleted file mode 100644 +index 1d86edb79..000000000 +--- a/clang/test/Driver/munaligned-access-unused.c ++++ /dev/null +@@ -1,8 +0,0 @@ +-/// Check -m[no-]unaligned-access and -m[no-]strict-align are warned unused on a target that does not support them. +- +-// RUN: not %clang --target=x86_64 -munaligned-access -fsyntax-only %s -### 2>&1 | FileCheck %s -DOPTION=unaligned-access +-// RUN: not %clang --target=x86_64 -mno-unaligned-access -fsyntax-only %s -### 2>&1 | FileCheck %s -DOPTION=no-unaligned-access +-// RUN: not %clang --target=x86_64 -mstrict-align -fsyntax-only %s -### 2>&1 | FileCheck %s -DOPTION=strict-align +-// RUN: not %clang --target=x86_64 -mno-strict-align -fsyntax-only %s -### 2>&1 | FileCheck %s -DOPTION=no-strict-align +- +-// CHECK: error: unsupported option '-m{{(no-)?}}unaligned-access' for target '{{.*}}' +diff --git a/clang/test/Preprocessor/init-loongarch.c b/clang/test/Preprocessor/init-loongarch.c +index 10a4cc7e6..a32853d17 100644 +--- a/clang/test/Preprocessor/init-loongarch.c ++++ b/clang/test/Preprocessor/init-loongarch.c +@@ -1,854 +1,10 @@ +-// RUN: %clang_cc1 -E -dM -ffreestanding -fgnuc-version=4.2.1 -triple loongarch32 /dev/null \ +-// RUN: | FileCheck --match-full-lines --check-prefix=LA32 %s +-// RUN: %clang_cc1 -E -dM -ffreestanding -fgnuc-version=4.2.1 -triple loongarch32-unknown-linux /dev/null \ +-// RUN: | FileCheck --match-full-lines --check-prefixes=LA32,LA32-LINUX %s +-// RUN: %clang_cc1 -E -dM -ffreestanding -fgnuc-version=4.2.1 -triple loongarch32 \ +-// RUN: -fforce-enable-int128 /dev/null | FileCheck --match-full-lines \ +-// RUN: --check-prefixes=LA32,LA32-INT128 %s +- +-// RUN: %clang_cc1 -E -dM -ffreestanding -fgnuc-version=4.2.1 -triple loongarch64 /dev/null \ +-// RUN: | FileCheck --match-full-lines --check-prefix=LA64 %s +-// RUN: %clang_cc1 -E -dM -ffreestanding -fgnuc-version=4.2.1 -triple loongarch64-unknown-linux /dev/null \ +-// RUN: | FileCheck --match-full-lines --check-prefixes=LA64,LA64-LINUX %s +-// RUN: %clang_cc1 -E -dM -ffreestanding -fgnuc-version=4.2.1 -triple loongarch64 \ +-// RUN: -fforce-enable-int128 /dev/null | FileCheck --match-full-lines \ +-// RUN: --check-prefixes=LA64,LA64-INT128 %s +- +-//// Note that common macros are tested in init.c, such as __VERSION__. So they're not listed here. +- +-// LA32: #define _ILP32 1 +-// LA32: #define __ATOMIC_ACQUIRE 2 +-// LA32-NEXT: #define __ATOMIC_ACQ_REL 4 +-// LA32-NEXT: #define __ATOMIC_CONSUME 1 +-// LA32-NEXT: #define __ATOMIC_RELAXED 0 +-// LA32-NEXT: #define __ATOMIC_RELEASE 3 +-// LA32-NEXT: #define __ATOMIC_SEQ_CST 5 +-// LA32: #define __BIGGEST_ALIGNMENT__ 16 +-// LA32: #define __BITINT_MAXWIDTH__ 128 +-// LA32: #define __BOOL_WIDTH__ 8 +-// LA32: #define __BYTE_ORDER__ __ORDER_LITTLE_ENDIAN__ +-// LA32: #define __CHAR16_TYPE__ unsigned short +-// LA32: #define __CHAR32_TYPE__ unsigned int +-// LA32: #define __CHAR_BIT__ 8 +-// LA32: #define __CLANG_ATOMIC_BOOL_LOCK_FREE 2 +-// LA32: #define __CLANG_ATOMIC_CHAR16_T_LOCK_FREE 2 +-// LA32: #define __CLANG_ATOMIC_CHAR32_T_LOCK_FREE 2 +-// LA32: #define __CLANG_ATOMIC_CHAR_LOCK_FREE 2 +-// LA32: #define __CLANG_ATOMIC_INT_LOCK_FREE 2 +-// LA32: #define __CLANG_ATOMIC_LLONG_LOCK_FREE 1 +-// LA32: #define __CLANG_ATOMIC_LONG_LOCK_FREE 2 +-// LA32: #define __CLANG_ATOMIC_POINTER_LOCK_FREE 2 +-// LA32: #define __CLANG_ATOMIC_SHORT_LOCK_FREE 2 +-// LA32: #define __CLANG_ATOMIC_WCHAR_T_LOCK_FREE 2 +-// LA32: #define __DBL_DECIMAL_DIG__ 17 +-// LA32: #define __DBL_DENORM_MIN__ 4.9406564584124654e-324 +-// LA32: #define __DBL_DIG__ 15 +-// LA32: #define __DBL_EPSILON__ 2.2204460492503131e-16 +-// LA32: #define __DBL_HAS_DENORM__ 1 +-// LA32: #define __DBL_HAS_INFINITY__ 1 +-// LA32: #define __DBL_HAS_QUIET_NAN__ 1 +-// LA32: #define __DBL_MANT_DIG__ 53 +-// LA32: #define __DBL_MAX_10_EXP__ 308 +-// LA32: #define __DBL_MAX_EXP__ 1024 +-// LA32: #define __DBL_MAX__ 1.7976931348623157e+308 +-// LA32: #define __DBL_MIN_10_EXP__ (-307) +-// LA32: #define __DBL_MIN_EXP__ (-1021) +-// LA32: #define __DBL_MIN__ 2.2250738585072014e-308 +-// LA32: #define __DECIMAL_DIG__ __LDBL_DECIMAL_DIG__ +-// LA32: #define __FLT_DECIMAL_DIG__ 9 +-// LA32: #define __FLT_DENORM_MIN__ 1.40129846e-45F +-// LA32: #define __FLT_DIG__ 6 +-// LA32: #define __FLT_EPSILON__ 1.19209290e-7F +-// LA32: #define __FLT_HAS_DENORM__ 1 +-// LA32: #define __FLT_HAS_INFINITY__ 1 +-// LA32: #define __FLT_HAS_QUIET_NAN__ 1 +-// LA32: #define __FLT_MANT_DIG__ 24 +-// LA32: #define __FLT_MAX_10_EXP__ 38 +-// LA32: #define __FLT_MAX_EXP__ 128 +-// LA32: #define __FLT_MAX__ 3.40282347e+38F +-// LA32: #define __FLT_MIN_10_EXP__ (-37) +-// LA32: #define __FLT_MIN_EXP__ (-125) +-// LA32: #define __FLT_MIN__ 1.17549435e-38F +-// LA32: #define __FLT_RADIX__ 2 +-// LA32: #define __GCC_ATOMIC_BOOL_LOCK_FREE 2 +-// LA32: #define __GCC_ATOMIC_CHAR16_T_LOCK_FREE 2 +-// LA32: #define __GCC_ATOMIC_CHAR32_T_LOCK_FREE 2 +-// LA32: #define __GCC_ATOMIC_CHAR_LOCK_FREE 2 +-// LA32: #define __GCC_ATOMIC_INT_LOCK_FREE 2 +-// LA32: #define __GCC_ATOMIC_LLONG_LOCK_FREE 1 +-// LA32: #define __GCC_ATOMIC_LONG_LOCK_FREE 2 +-// LA32: #define __GCC_ATOMIC_POINTER_LOCK_FREE 2 +-// LA32: #define __GCC_ATOMIC_SHORT_LOCK_FREE 2 +-// LA32: #define __GCC_ATOMIC_TEST_AND_SET_TRUEVAL 1 +-// LA32: #define __GCC_ATOMIC_WCHAR_T_LOCK_FREE 2 +-// LA32: #define __GCC_HAVE_SYNC_COMPARE_AND_SWAP_1 1 +-// LA32: #define __GCC_HAVE_SYNC_COMPARE_AND_SWAP_2 1 +-// LA32: #define __GCC_HAVE_SYNC_COMPARE_AND_SWAP_4 1 +-// LA32: #define __ILP32__ 1 +-// LA32: #define __INT16_C_SUFFIX__ +-// LA32: #define __INT16_FMTd__ "hd" +-// LA32: #define __INT16_FMTi__ "hi" +-// LA32: #define __INT16_MAX__ 32767 +-// LA32: #define __INT16_TYPE__ short +-// LA32: #define __INT32_C_SUFFIX__ +-// LA32: #define __INT32_FMTd__ "d" +-// LA32: #define __INT32_FMTi__ "i" +-// LA32: #define __INT32_MAX__ 2147483647 +-// LA32: #define __INT32_TYPE__ int +-// LA32: #define __INT64_C_SUFFIX__ LL +-// LA32: #define __INT64_FMTd__ "lld" +-// LA32: #define __INT64_FMTi__ "lli" +-// LA32: #define __INT64_MAX__ 9223372036854775807LL +-// LA32: #define __INT64_TYPE__ long long int +-// LA32: #define __INT8_C_SUFFIX__ +-// LA32: #define __INT8_FMTd__ "hhd" +-// LA32: #define __INT8_FMTi__ "hhi" +-// LA32: #define __INT8_MAX__ 127 +-// LA32: #define __INT8_TYPE__ signed char +-// LA32: #define __INTMAX_C_SUFFIX__ LL +-// LA32: #define __INTMAX_FMTd__ "lld" +-// LA32: #define __INTMAX_FMTi__ "lli" +-// LA32: #define __INTMAX_MAX__ 9223372036854775807LL +-// LA32: #define __INTMAX_TYPE__ long long int +-// LA32: #define __INTMAX_WIDTH__ 64 +-// LA32: #define __INTPTR_FMTd__ "d" +-// LA32: #define __INTPTR_FMTi__ "i" +-// LA32: #define __INTPTR_MAX__ 2147483647 +-// LA32: #define __INTPTR_TYPE__ int +-// LA32: #define __INTPTR_WIDTH__ 32 +-// LA32: #define __INT_FAST16_FMTd__ "hd" +-// LA32: #define __INT_FAST16_FMTi__ "hi" +-// LA32: #define __INT_FAST16_MAX__ 32767 +-// LA32: #define __INT_FAST16_TYPE__ short +-// LA32: #define __INT_FAST16_WIDTH__ 16 +-// LA32: #define __INT_FAST32_FMTd__ "d" +-// LA32: #define __INT_FAST32_FMTi__ "i" +-// LA32: #define __INT_FAST32_MAX__ 2147483647 +-// LA32: #define __INT_FAST32_TYPE__ int +-// LA32: #define __INT_FAST32_WIDTH__ 32 +-// LA32: #define __INT_FAST64_FMTd__ "lld" +-// LA32: #define __INT_FAST64_FMTi__ "lli" +-// LA32: #define __INT_FAST64_MAX__ 9223372036854775807LL +-// LA32: #define __INT_FAST64_TYPE__ long long int +-// LA32: #define __INT_FAST64_WIDTH__ 64 +-// LA32: #define __INT_FAST8_FMTd__ "hhd" +-// LA32: #define __INT_FAST8_FMTi__ "hhi" +-// LA32: #define __INT_FAST8_MAX__ 127 +-// LA32: #define __INT_FAST8_TYPE__ signed char +-// LA32: #define __INT_FAST8_WIDTH__ 8 +-// LA32: #define __INT_LEAST16_FMTd__ "hd" +-// LA32: #define __INT_LEAST16_FMTi__ "hi" +-// LA32: #define __INT_LEAST16_MAX__ 32767 +-// LA32: #define __INT_LEAST16_TYPE__ short +-// LA32: #define __INT_LEAST16_WIDTH__ 16 +-// LA32: #define __INT_LEAST32_FMTd__ "d" +-// LA32: #define __INT_LEAST32_FMTi__ "i" +-// LA32: #define __INT_LEAST32_MAX__ 2147483647 +-// LA32: #define __INT_LEAST32_TYPE__ int +-// LA32: #define __INT_LEAST32_WIDTH__ 32 +-// LA32: #define __INT_LEAST64_FMTd__ "lld" +-// LA32: #define __INT_LEAST64_FMTi__ "lli" +-// LA32: #define __INT_LEAST64_MAX__ 9223372036854775807LL +-// LA32: #define __INT_LEAST64_TYPE__ long long int +-// LA32: #define __INT_LEAST64_WIDTH__ 64 +-// LA32: #define __INT_LEAST8_FMTd__ "hhd" +-// LA32: #define __INT_LEAST8_FMTi__ "hhi" +-// LA32: #define __INT_LEAST8_MAX__ 127 +-// LA32: #define __INT_LEAST8_TYPE__ signed char +-// LA32: #define __INT_LEAST8_WIDTH__ 8 +-// LA32: #define __INT_MAX__ 2147483647 +-// LA32: #define __INT_WIDTH__ 32 +-// LA32: #define __LDBL_DECIMAL_DIG__ 36 +-// LA32: #define __LDBL_DENORM_MIN__ 6.47517511943802511092443895822764655e-4966L +-// LA32: #define __LDBL_DIG__ 33 +-// LA32: #define __LDBL_EPSILON__ 1.92592994438723585305597794258492732e-34L +-// LA32: #define __LDBL_HAS_DENORM__ 1 +-// LA32: #define __LDBL_HAS_INFINITY__ 1 +-// LA32: #define __LDBL_HAS_QUIET_NAN__ 1 +-// LA32: #define __LDBL_MANT_DIG__ 113 +-// LA32: #define __LDBL_MAX_10_EXP__ 4932 +-// LA32: #define __LDBL_MAX_EXP__ 16384 +-// LA32: #define __LDBL_MAX__ 1.18973149535723176508575932662800702e+4932L +-// LA32: #define __LDBL_MIN_10_EXP__ (-4931) +-// LA32: #define __LDBL_MIN_EXP__ (-16381) +-// LA32: #define __LDBL_MIN__ 3.36210314311209350626267781732175260e-4932L +-// LA32: #define __LITTLE_ENDIAN__ 1 +-// LA32: #define __LLONG_WIDTH__ 64 +-// LA32: #define __LONG_LONG_MAX__ 9223372036854775807LL +-// LA32: #define __LONG_MAX__ 2147483647L +-// LA32: #define __LONG_WIDTH__ 32 +-// LA32: #define __MEMORY_SCOPE_DEVICE 1 +-// LA32: #define __MEMORY_SCOPE_SINGLE 4 +-// LA32: #define __MEMORY_SCOPE_SYSTEM 0 +-// LA32: #define __MEMORY_SCOPE_WRKGRP 2 +-// LA32: #define __MEMORY_SCOPE_WVFRNT 3 +-// LA32: #define __NO_INLINE__ 1 +-// LA32: #define __NO_MATH_ERRNO__ 1 +-// LA32: #define __OBJC_BOOL_IS_BOOL 0 +-// LA32: #define __OPENCL_MEMORY_SCOPE_ALL_SVM_DEVICES 3 +-// LA32: #define __OPENCL_MEMORY_SCOPE_DEVICE 2 +-// LA32: #define __OPENCL_MEMORY_SCOPE_SUB_GROUP 4 +-// LA32: #define __OPENCL_MEMORY_SCOPE_WORK_GROUP 1 +-// LA32: #define __OPENCL_MEMORY_SCOPE_WORK_ITEM 0 +-// LA32: #define __POINTER_WIDTH__ 32 +-// LA32: #define __PRAGMA_REDEFINE_EXTNAME 1 +-// LA32: #define __PTRDIFF_FMTd__ "d" +-// LA32: #define __PTRDIFF_FMTi__ "i" +-// LA32: #define __PTRDIFF_MAX__ 2147483647 +-// LA32: #define __PTRDIFF_TYPE__ int +-// LA32: #define __PTRDIFF_WIDTH__ 32 +-// LA32: #define __SCHAR_MAX__ 127 +-// LA32: #define __SHRT_MAX__ 32767 +-// LA32: #define __SHRT_WIDTH__ 16 +-// LA32: #define __SIG_ATOMIC_MAX__ 2147483647 +-// LA32: #define __SIG_ATOMIC_WIDTH__ 32 +-// LA32: #define __SIZEOF_DOUBLE__ 8 +-// LA32: #define __SIZEOF_FLOAT__ 4 +-// LA32-INT128: #define __SIZEOF_INT128__ 16 +-// LA32: #define __SIZEOF_INT__ 4 +-// LA32: #define __SIZEOF_LONG_DOUBLE__ 16 +-// LA32: #define __SIZEOF_LONG_LONG__ 8 +-// LA32: #define __SIZEOF_LONG__ 4 +-// LA32: #define __SIZEOF_POINTER__ 4 +-// LA32: #define __SIZEOF_PTRDIFF_T__ 4 +-// LA32: #define __SIZEOF_SHORT__ 2 +-// LA32: #define __SIZEOF_SIZE_T__ 4 +-// LA32: #define __SIZEOF_WCHAR_T__ 4 +-// LA32: #define __SIZEOF_WINT_T__ 4 +-// LA32: #define __SIZE_FMTX__ "X" +-// LA32: #define __SIZE_FMTo__ "o" +-// LA32: #define __SIZE_FMTu__ "u" +-// LA32: #define __SIZE_FMTx__ "x" +-// LA32: #define __SIZE_MAX__ 4294967295U +-// LA32: #define __SIZE_TYPE__ unsigned int +-// LA32: #define __SIZE_WIDTH__ 32 +-// LA32: #define __STDC_HOSTED__ 0 +-// LA32: #define __STDC_UTF_16__ 1 +-// LA32: #define __STDC_UTF_32__ 1 +-// LA32: #define __STDC_VERSION__ 201710L +-// LA32: #define __STDC__ 1 +-// LA32: #define __UINT16_C_SUFFIX__ +-// LA32: #define __UINT16_FMTX__ "hX" +-// LA32: #define __UINT16_FMTo__ "ho" +-// LA32: #define __UINT16_FMTu__ "hu" +-// LA32: #define __UINT16_FMTx__ "hx" +-// LA32: #define __UINT16_MAX__ 65535 +-// LA32: #define __UINT16_TYPE__ unsigned short +-// LA32: #define __UINT32_C_SUFFIX__ U +-// LA32: #define __UINT32_FMTX__ "X" +-// LA32: #define __UINT32_FMTo__ "o" +-// LA32: #define __UINT32_FMTu__ "u" +-// LA32: #define __UINT32_FMTx__ "x" +-// LA32: #define __UINT32_MAX__ 4294967295U +-// LA32: #define __UINT32_TYPE__ unsigned int +-// LA32: #define __UINT64_C_SUFFIX__ ULL +-// LA32: #define __UINT64_FMTX__ "llX" +-// LA32: #define __UINT64_FMTo__ "llo" +-// LA32: #define __UINT64_FMTu__ "llu" +-// LA32: #define __UINT64_FMTx__ "llx" +-// LA32: #define __UINT64_MAX__ 18446744073709551615ULL +-// LA32: #define __UINT64_TYPE__ long long unsigned int +-// LA32: #define __UINT8_C_SUFFIX__ +-// LA32: #define __UINT8_FMTX__ "hhX" +-// LA32: #define __UINT8_FMTo__ "hho" +-// LA32: #define __UINT8_FMTu__ "hhu" +-// LA32: #define __UINT8_FMTx__ "hhx" +-// LA32: #define __UINT8_MAX__ 255 +-// LA32: #define __UINT8_TYPE__ unsigned char +-// LA32: #define __UINTMAX_C_SUFFIX__ ULL +-// LA32: #define __UINTMAX_FMTX__ "llX" +-// LA32: #define __UINTMAX_FMTo__ "llo" +-// LA32: #define __UINTMAX_FMTu__ "llu" +-// LA32: #define __UINTMAX_FMTx__ "llx" +-// LA32: #define __UINTMAX_MAX__ 18446744073709551615ULL +-// LA32: #define __UINTMAX_TYPE__ long long unsigned int +-// LA32: #define __UINTMAX_WIDTH__ 64 +-// LA32: #define __UINTPTR_FMTX__ "X" +-// LA32: #define __UINTPTR_FMTo__ "o" +-// LA32: #define __UINTPTR_FMTu__ "u" +-// LA32: #define __UINTPTR_FMTx__ "x" +-// LA32: #define __UINTPTR_MAX__ 4294967295U +-// LA32: #define __UINTPTR_TYPE__ unsigned int +-// LA32: #define __UINTPTR_WIDTH__ 32 +-// LA32: #define __UINT_FAST16_FMTX__ "hX" +-// LA32: #define __UINT_FAST16_FMTo__ "ho" +-// LA32: #define __UINT_FAST16_FMTu__ "hu" +-// LA32: #define __UINT_FAST16_FMTx__ "hx" +-// LA32: #define __UINT_FAST16_MAX__ 65535 +-// TODO: LoongArch GCC defines UINT_FAST16 to be long unsigned int +-// LA32: #define __UINT_FAST16_TYPE__ unsigned short +-// LA32: #define __UINT_FAST32_FMTX__ "X" +-// LA32: #define __UINT_FAST32_FMTo__ "o" +-// LA32: #define __UINT_FAST32_FMTu__ "u" +-// LA32: #define __UINT_FAST32_FMTx__ "x" +-// LA32: #define __UINT_FAST32_MAX__ 4294967295U +-// LA32: #define __UINT_FAST32_TYPE__ unsigned int +-// LA32: #define __UINT_FAST64_FMTX__ "llX" +-// LA32: #define __UINT_FAST64_FMTo__ "llo" +-// LA32: #define __UINT_FAST64_FMTu__ "llu" +-// LA32: #define __UINT_FAST64_FMTx__ "llx" +-// LA32: #define __UINT_FAST64_MAX__ 18446744073709551615ULL +-// LA32: #define __UINT_FAST64_TYPE__ long long unsigned int +-// LA32: #define __UINT_FAST8_FMTX__ "hhX" +-// LA32: #define __UINT_FAST8_FMTo__ "hho" +-// LA32: #define __UINT_FAST8_FMTu__ "hhu" +-// LA32: #define __UINT_FAST8_FMTx__ "hhx" +-// LA32: #define __UINT_FAST8_MAX__ 255 +-// LA32: #define __UINT_FAST8_TYPE__ unsigned char +-// LA32: #define __UINT_LEAST16_FMTX__ "hX" +-// LA32: #define __UINT_LEAST16_FMTo__ "ho" +-// LA32: #define __UINT_LEAST16_FMTu__ "hu" +-// LA32: #define __UINT_LEAST16_FMTx__ "hx" +-// LA32: #define __UINT_LEAST16_MAX__ 65535 +-// LA32: #define __UINT_LEAST16_TYPE__ unsigned short +-// LA32: #define __UINT_LEAST32_FMTX__ "X" +-// LA32: #define __UINT_LEAST32_FMTo__ "o" +-// LA32: #define __UINT_LEAST32_FMTu__ "u" +-// LA32: #define __UINT_LEAST32_FMTx__ "x" +-// LA32: #define __UINT_LEAST32_MAX__ 4294967295U +-// LA32: #define __UINT_LEAST32_TYPE__ unsigned int +-// LA32: #define __UINT_LEAST64_FMTX__ "llX" +-// LA32: #define __UINT_LEAST64_FMTo__ "llo" +-// LA32: #define __UINT_LEAST64_FMTu__ "llu" +-// LA32: #define __UINT_LEAST64_FMTx__ "llx" +-// LA32: #define __UINT_LEAST64_MAX__ 18446744073709551615ULL +-// LA32: #define __UINT_LEAST64_TYPE__ long long unsigned int +-// LA32: #define __UINT_LEAST8_FMTX__ "hhX" +-// LA32: #define __UINT_LEAST8_FMTo__ "hho" +-// LA32: #define __UINT_LEAST8_FMTu__ "hhu" +-// LA32: #define __UINT_LEAST8_FMTx__ "hhx" +-// LA32: #define __UINT_LEAST8_MAX__ 255 +-// LA32: #define __UINT_LEAST8_TYPE__ unsigned char +-// LA32: #define __USER_LABEL_PREFIX__ +-// LA32: #define __WCHAR_MAX__ 2147483647 +-// LA32: #define __WCHAR_TYPE__ int +-// LA32: #define __WCHAR_WIDTH__ 32 +-// LA32: #define __WINT_MAX__ 4294967295U +-// LA32: #define __WINT_TYPE__ unsigned int +-// LA32: #define __WINT_UNSIGNED__ 1 +-// LA32: #define __WINT_WIDTH__ 32 +-// LA32-LINUX: #define __gnu_linux__ 1 +-// LA32-LINUX: #define __linux 1 +-// LA32-LINUX: #define __linux__ 1 +-// LA32-NOT: #define __loongarch64 1 +-// LA32: #define __loongarch__ 1 +-// LA32-LINUX: #define __unix 1 +-// LA32-LINUX: #define __unix__ 1 +-// LA32-LINUX: #define linux 1 +-// LA32-LINUX: #define unix 1 +- +-// LA64: #define _LP64 1 +-// LA64: #define __ATOMIC_ACQUIRE 2 +-// LA64-NEXT: #define __ATOMIC_ACQ_REL 4 +-// LA64-NEXT: #define __ATOMIC_CONSUME 1 +-// LA64-NEXT: #define __ATOMIC_RELAXED 0 +-// LA64-NEXT: #define __ATOMIC_RELEASE 3 +-// LA64-NEXT: #define __ATOMIC_SEQ_CST 5 +-// LA64: #define __BIGGEST_ALIGNMENT__ 16 +-// LA64: #define __BITINT_MAXWIDTH__ 128 +-// LA64: #define __BOOL_WIDTH__ 8 +-// LA64: #define __BYTE_ORDER__ __ORDER_LITTLE_ENDIAN__ +-// LA64: #define __CHAR16_TYPE__ unsigned short +-// LA64: #define __CHAR32_TYPE__ unsigned int +-// LA64: #define __CHAR_BIT__ 8 +-// LA64: #define __CLANG_ATOMIC_BOOL_LOCK_FREE 2 +-// LA64: #define __CLANG_ATOMIC_CHAR16_T_LOCK_FREE 2 +-// LA64: #define __CLANG_ATOMIC_CHAR32_T_LOCK_FREE 2 +-// LA64: #define __CLANG_ATOMIC_CHAR_LOCK_FREE 2 +-// LA64: #define __CLANG_ATOMIC_INT_LOCK_FREE 2 +-// LA64: #define __CLANG_ATOMIC_LLONG_LOCK_FREE 2 +-// LA64: #define __CLANG_ATOMIC_LONG_LOCK_FREE 2 +-// LA64: #define __CLANG_ATOMIC_POINTER_LOCK_FREE 2 +-// LA64: #define __CLANG_ATOMIC_SHORT_LOCK_FREE 2 +-// LA64: #define __CLANG_ATOMIC_WCHAR_T_LOCK_FREE 2 +-// LA64: #define __DBL_DECIMAL_DIG__ 17 +-// LA64: #define __DBL_DENORM_MIN__ 4.9406564584124654e-324 +-// LA64: #define __DBL_DIG__ 15 +-// LA64: #define __DBL_EPSILON__ 2.2204460492503131e-16 +-// LA64: #define __DBL_HAS_DENORM__ 1 +-// LA64: #define __DBL_HAS_INFINITY__ 1 +-// LA64: #define __DBL_HAS_QUIET_NAN__ 1 +-// LA64: #define __DBL_MANT_DIG__ 53 +-// LA64: #define __DBL_MAX_10_EXP__ 308 +-// LA64: #define __DBL_MAX_EXP__ 1024 +-// LA64: #define __DBL_MAX__ 1.7976931348623157e+308 +-// LA64: #define __DBL_MIN_10_EXP__ (-307) +-// LA64: #define __DBL_MIN_EXP__ (-1021) +-// LA64: #define __DBL_MIN__ 2.2250738585072014e-308 +-// LA64: #define __DECIMAL_DIG__ __LDBL_DECIMAL_DIG__ +-// LA64: #define __FLT_DECIMAL_DIG__ 9 +-// LA64: #define __FLT_DENORM_MIN__ 1.40129846e-45F +-// LA64: #define __FLT_DIG__ 6 +-// LA64: #define __FLT_EPSILON__ 1.19209290e-7F +-// LA64: #define __FLT_HAS_DENORM__ 1 +-// LA64: #define __FLT_HAS_INFINITY__ 1 +-// LA64: #define __FLT_HAS_QUIET_NAN__ 1 +-// LA64: #define __FLT_MANT_DIG__ 24 +-// LA64: #define __FLT_MAX_10_EXP__ 38 +-// LA64: #define __FLT_MAX_EXP__ 128 +-// LA64: #define __FLT_MAX__ 3.40282347e+38F +-// LA64: #define __FLT_MIN_10_EXP__ (-37) +-// LA64: #define __FLT_MIN_EXP__ (-125) +-// LA64: #define __FLT_MIN__ 1.17549435e-38F +-// LA64: #define __FLT_RADIX__ 2 +-// LA64: #define __GCC_ATOMIC_BOOL_LOCK_FREE 2 +-// LA64: #define __GCC_ATOMIC_CHAR16_T_LOCK_FREE 2 +-// LA64: #define __GCC_ATOMIC_CHAR32_T_LOCK_FREE 2 +-// LA64: #define __GCC_ATOMIC_CHAR_LOCK_FREE 2 +-// LA64: #define __GCC_ATOMIC_INT_LOCK_FREE 2 +-// LA64: #define __GCC_ATOMIC_LLONG_LOCK_FREE 2 +-// LA64: #define __GCC_ATOMIC_LONG_LOCK_FREE 2 +-// LA64: #define __GCC_ATOMIC_POINTER_LOCK_FREE 2 +-// LA64: #define __GCC_ATOMIC_SHORT_LOCK_FREE 2 +-// LA64: #define __GCC_ATOMIC_TEST_AND_SET_TRUEVAL 1 +-// LA64: #define __GCC_ATOMIC_WCHAR_T_LOCK_FREE 2 +-// LA64: #define __GCC_HAVE_SYNC_COMPARE_AND_SWAP_1 1 +-// LA64: #define __GCC_HAVE_SYNC_COMPARE_AND_SWAP_2 1 +-// LA64: #define __GCC_HAVE_SYNC_COMPARE_AND_SWAP_4 1 +-// LA64: #define __GCC_HAVE_SYNC_COMPARE_AND_SWAP_8 1 +-// LA64: #define __INT16_C_SUFFIX__ +-// LA64: #define __INT16_FMTd__ "hd" +-// LA64: #define __INT16_FMTi__ "hi" +-// LA64: #define __INT16_MAX__ 32767 +-// LA64: #define __INT16_TYPE__ short +-// LA64: #define __INT32_C_SUFFIX__ +-// LA64: #define __INT32_FMTd__ "d" +-// LA64: #define __INT32_FMTi__ "i" +-// LA64: #define __INT32_MAX__ 2147483647 +-// LA64: #define __INT32_TYPE__ int +-// LA64: #define __INT64_C_SUFFIX__ L +-// LA64: #define __INT64_FMTd__ "ld" +-// LA64: #define __INT64_FMTi__ "li" +-// LA64: #define __INT64_MAX__ 9223372036854775807L +-// LA64: #define __INT64_TYPE__ long int +-// LA64: #define __INT8_C_SUFFIX__ +-// LA64: #define __INT8_FMTd__ "hhd" +-// LA64: #define __INT8_FMTi__ "hhi" +-// LA64: #define __INT8_MAX__ 127 +-// LA64: #define __INT8_TYPE__ signed char +-// LA64: #define __INTMAX_C_SUFFIX__ L +-// LA64: #define __INTMAX_FMTd__ "ld" +-// LA64: #define __INTMAX_FMTi__ "li" +-// LA64: #define __INTMAX_MAX__ 9223372036854775807L +-// LA64: #define __INTMAX_TYPE__ long int +-// LA64: #define __INTMAX_WIDTH__ 64 +-// LA64: #define __INTPTR_FMTd__ "ld" +-// LA64: #define __INTPTR_FMTi__ "li" +-// LA64: #define __INTPTR_MAX__ 9223372036854775807L +-// LA64: #define __INTPTR_TYPE__ long int +-// LA64: #define __INTPTR_WIDTH__ 64 +-// LA64: #define __INT_FAST16_FMTd__ "hd" +-// LA64: #define __INT_FAST16_FMTi__ "hi" +-// LA64: #define __INT_FAST16_MAX__ 32767 +-// LA64: #define __INT_FAST16_TYPE__ short +-// LA64: #define __INT_FAST16_WIDTH__ 16 +-// LA64: #define __INT_FAST32_FMTd__ "d" +-// LA64: #define __INT_FAST32_FMTi__ "i" +-// LA64: #define __INT_FAST32_MAX__ 2147483647 +-// LA64: #define __INT_FAST32_TYPE__ int +-// LA64: #define __INT_FAST32_WIDTH__ 32 +-// LA64: #define __INT_FAST64_FMTd__ "ld" +-// LA64: #define __INT_FAST64_FMTi__ "li" +-// LA64: #define __INT_FAST64_MAX__ 9223372036854775807L +-// LA64: #define __INT_FAST64_TYPE__ long int +-// LA64: #define __INT_FAST64_WIDTH__ 64 +-// LA64: #define __INT_FAST8_FMTd__ "hhd" +-// LA64: #define __INT_FAST8_FMTi__ "hhi" +-// LA64: #define __INT_FAST8_MAX__ 127 +-// LA64: #define __INT_FAST8_TYPE__ signed char +-// LA64: #define __INT_FAST8_WIDTH__ 8 +-// LA64: #define __INT_LEAST16_FMTd__ "hd" +-// LA64: #define __INT_LEAST16_FMTi__ "hi" +-// LA64: #define __INT_LEAST16_MAX__ 32767 +-// LA64: #define __INT_LEAST16_TYPE__ short +-// LA64: #define __INT_LEAST16_WIDTH__ 16 +-// LA64: #define __INT_LEAST32_FMTd__ "d" +-// LA64: #define __INT_LEAST32_FMTi__ "i" +-// LA64: #define __INT_LEAST32_MAX__ 2147483647 +-// LA64: #define __INT_LEAST32_TYPE__ int +-// LA64: #define __INT_LEAST32_WIDTH__ 32 +-// LA64: #define __INT_LEAST64_FMTd__ "ld" +-// LA64: #define __INT_LEAST64_FMTi__ "li" +-// LA64: #define __INT_LEAST64_MAX__ 9223372036854775807L +-// LA64: #define __INT_LEAST64_TYPE__ long int +-// LA64: #define __INT_LEAST64_WIDTH__ 64 +-// LA64: #define __INT_LEAST8_FMTd__ "hhd" +-// LA64: #define __INT_LEAST8_FMTi__ "hhi" +-// LA64: #define __INT_LEAST8_MAX__ 127 +-// LA64: #define __INT_LEAST8_TYPE__ signed char +-// LA64: #define __INT_LEAST8_WIDTH__ 8 +-// LA64: #define __INT_MAX__ 2147483647 +-// LA64: #define __INT_WIDTH__ 32 +-// LA64: #define __LDBL_DECIMAL_DIG__ 36 +-// LA64: #define __LDBL_DENORM_MIN__ 6.47517511943802511092443895822764655e-4966L +-// LA64: #define __LDBL_DIG__ 33 +-// LA64: #define __LDBL_EPSILON__ 1.92592994438723585305597794258492732e-34L +-// LA64: #define __LDBL_HAS_DENORM__ 1 +-// LA64: #define __LDBL_HAS_INFINITY__ 1 +-// LA64: #define __LDBL_HAS_QUIET_NAN__ 1 +-// LA64: #define __LDBL_MANT_DIG__ 113 +-// LA64: #define __LDBL_MAX_10_EXP__ 4932 +-// LA64: #define __LDBL_MAX_EXP__ 16384 +-// LA64: #define __LDBL_MAX__ 1.18973149535723176508575932662800702e+4932L +-// LA64: #define __LDBL_MIN_10_EXP__ (-4931) +-// LA64: #define __LDBL_MIN_EXP__ (-16381) +-// LA64: #define __LDBL_MIN__ 3.36210314311209350626267781732175260e-4932L +-// LA64: #define __LITTLE_ENDIAN__ 1 +-// LA64: #define __LLONG_WIDTH__ 64 +-// LA64: #define __LONG_LONG_MAX__ 9223372036854775807LL +-// LA64: #define __LONG_MAX__ 9223372036854775807L +-// LA64: #define __LONG_WIDTH__ 64 +-// LA64: #define __LP64__ 1 +-// LA64: #define __MEMORY_SCOPE_DEVICE 1 +-// LA64: #define __MEMORY_SCOPE_SINGLE 4 +-// LA64: #define __MEMORY_SCOPE_SYSTEM 0 +-// LA64: #define __MEMORY_SCOPE_WRKGRP 2 +-// LA64: #define __MEMORY_SCOPE_WVFRNT 3 +-// LA64: #define __NO_INLINE__ 1 +-// LA64: #define __NO_MATH_ERRNO__ 1 +-// LA64: #define __OBJC_BOOL_IS_BOOL 0 +-// LA64: #define __OPENCL_MEMORY_SCOPE_ALL_SVM_DEVICES 3 +-// LA64: #define __OPENCL_MEMORY_SCOPE_DEVICE 2 +-// LA64: #define __OPENCL_MEMORY_SCOPE_SUB_GROUP 4 +-// LA64: #define __OPENCL_MEMORY_SCOPE_WORK_GROUP 1 +-// LA64: #define __OPENCL_MEMORY_SCOPE_WORK_ITEM 0 +-// LA64: #define __POINTER_WIDTH__ 64 +-// LA64: #define __PRAGMA_REDEFINE_EXTNAME 1 +-// LA64: #define __PTRDIFF_FMTd__ "ld" +-// LA64: #define __PTRDIFF_FMTi__ "li" +-// LA64: #define __PTRDIFF_MAX__ 9223372036854775807L +-// LA64: #define __PTRDIFF_TYPE__ long int +-// LA64: #define __PTRDIFF_WIDTH__ 64 +-// LA64: #define __SCHAR_MAX__ 127 +-// LA64: #define __SHRT_MAX__ 32767 +-// LA64: #define __SHRT_WIDTH__ 16 +-// LA64: #define __SIG_ATOMIC_MAX__ 2147483647 +-// LA64: #define __SIG_ATOMIC_WIDTH__ 32 +-// LA64: #define __SIZEOF_DOUBLE__ 8 +-// LA64: #define __SIZEOF_FLOAT__ 4 +-// LA64-INT128: #define __SIZEOF_INT128__ 16 +-// LA64: #define __SIZEOF_INT__ 4 +-// LA64: #define __SIZEOF_LONG_DOUBLE__ 16 +-// LA64: #define __SIZEOF_LONG_LONG__ 8 +-// LA64: #define __SIZEOF_LONG__ 8 +-// LA64: #define __SIZEOF_POINTER__ 8 +-// LA64: #define __SIZEOF_PTRDIFF_T__ 8 +-// LA64: #define __SIZEOF_SHORT__ 2 +-// LA64: #define __SIZEOF_SIZE_T__ 8 +-// LA64: #define __SIZEOF_WCHAR_T__ 4 +-// LA64: #define __SIZEOF_WINT_T__ 4 +-// LA64: #define __SIZE_FMTX__ "lX" +-// LA64: #define __SIZE_FMTo__ "lo" +-// LA64: #define __SIZE_FMTu__ "lu" +-// LA64: #define __SIZE_FMTx__ "lx" +-// LA64: #define __SIZE_MAX__ 18446744073709551615UL +-// LA64: #define __SIZE_TYPE__ long unsigned int +-// LA64: #define __SIZE_WIDTH__ 64 +-// LA64: #define __STDC_HOSTED__ 0 +-// LA64: #define __STDC_UTF_16__ 1 +-// LA64: #define __STDC_UTF_32__ 1 +-// LA64: #define __STDC_VERSION__ 201710L +-// LA64: #define __STDC__ 1 +-// LA64: #define __UINT16_C_SUFFIX__ +-// LA64: #define __UINT16_FMTX__ "hX" +-// LA64: #define __UINT16_FMTo__ "ho" +-// LA64: #define __UINT16_FMTu__ "hu" +-// LA64: #define __UINT16_FMTx__ "hx" +-// LA64: #define __UINT16_MAX__ 65535 +-// LA64: #define __UINT16_TYPE__ unsigned short +-// LA64: #define __UINT32_C_SUFFIX__ U +-// LA64: #define __UINT32_FMTX__ "X" +-// LA64: #define __UINT32_FMTo__ "o" +-// LA64: #define __UINT32_FMTu__ "u" +-// LA64: #define __UINT32_FMTx__ "x" +-// LA64: #define __UINT32_MAX__ 4294967295U +-// LA64: #define __UINT32_TYPE__ unsigned int +-// LA64: #define __UINT64_C_SUFFIX__ UL +-// LA64: #define __UINT64_FMTX__ "lX" +-// LA64: #define __UINT64_FMTo__ "lo" +-// LA64: #define __UINT64_FMTu__ "lu" +-// LA64: #define __UINT64_FMTx__ "lx" +-// LA64: #define __UINT64_MAX__ 18446744073709551615UL +-// LA64: #define __UINT64_TYPE__ long unsigned int +-// LA64: #define __UINT8_C_SUFFIX__ +-// LA64: #define __UINT8_FMTX__ "hhX" +-// LA64: #define __UINT8_FMTo__ "hho" +-// LA64: #define __UINT8_FMTu__ "hhu" +-// LA64: #define __UINT8_FMTx__ "hhx" +-// LA64: #define __UINT8_MAX__ 255 +-// LA64: #define __UINT8_TYPE__ unsigned char +-// LA64: #define __UINTMAX_C_SUFFIX__ UL +-// LA64: #define __UINTMAX_FMTX__ "lX" +-// LA64: #define __UINTMAX_FMTo__ "lo" +-// LA64: #define __UINTMAX_FMTu__ "lu" +-// LA64: #define __UINTMAX_FMTx__ "lx" +-// LA64: #define __UINTMAX_MAX__ 18446744073709551615UL +-// LA64: #define __UINTMAX_TYPE__ long unsigned int +-// LA64: #define __UINTMAX_WIDTH__ 64 +-// LA64: #define __UINTPTR_FMTX__ "lX" +-// LA64: #define __UINTPTR_FMTo__ "lo" +-// LA64: #define __UINTPTR_FMTu__ "lu" +-// LA64: #define __UINTPTR_FMTx__ "lx" +-// LA64: #define __UINTPTR_MAX__ 18446744073709551615UL +-// LA64: #define __UINTPTR_TYPE__ long unsigned int +-// LA64: #define __UINTPTR_WIDTH__ 64 +-// LA64: #define __UINT_FAST16_FMTX__ "hX" +-// LA64: #define __UINT_FAST16_FMTo__ "ho" +-// LA64: #define __UINT_FAST16_FMTu__ "hu" +-// LA64: #define __UINT_FAST16_FMTx__ "hx" +-// LA64: #define __UINT_FAST16_MAX__ 65535 +-// TODO: LoongArch GCC defines UINT_FAST16 to be long unsigned int +-// LA64: #define __UINT_FAST16_TYPE__ unsigned short +-// LA64: #define __UINT_FAST32_FMTX__ "X" +-// LA64: #define __UINT_FAST32_FMTo__ "o" +-// LA64: #define __UINT_FAST32_FMTu__ "u" +-// LA64: #define __UINT_FAST32_FMTx__ "x" +-// LA64: #define __UINT_FAST32_MAX__ 4294967295U +-// LA64: #define __UINT_FAST32_TYPE__ unsigned int +-// LA64: #define __UINT_FAST64_FMTX__ "lX" +-// LA64: #define __UINT_FAST64_FMTo__ "lo" +-// LA64: #define __UINT_FAST64_FMTu__ "lu" +-// LA64: #define __UINT_FAST64_FMTx__ "lx" +-// LA64: #define __UINT_FAST64_MAX__ 18446744073709551615UL +-// LA64: #define __UINT_FAST64_TYPE__ long unsigned int +-// LA64: #define __UINT_FAST8_FMTX__ "hhX" +-// LA64: #define __UINT_FAST8_FMTo__ "hho" +-// LA64: #define __UINT_FAST8_FMTu__ "hhu" +-// LA64: #define __UINT_FAST8_FMTx__ "hhx" +-// LA64: #define __UINT_FAST8_MAX__ 255 +-// LA64: #define __UINT_FAST8_TYPE__ unsigned char +-// LA64: #define __UINT_LEAST16_FMTX__ "hX" +-// LA64: #define __UINT_LEAST16_FMTo__ "ho" +-// LA64: #define __UINT_LEAST16_FMTu__ "hu" +-// LA64: #define __UINT_LEAST16_FMTx__ "hx" +-// LA64: #define __UINT_LEAST16_MAX__ 65535 +-// LA64: #define __UINT_LEAST16_TYPE__ unsigned short +-// LA64: #define __UINT_LEAST32_FMTX__ "X" +-// LA64: #define __UINT_LEAST32_FMTo__ "o" +-// LA64: #define __UINT_LEAST32_FMTu__ "u" +-// LA64: #define __UINT_LEAST32_FMTx__ "x" +-// LA64: #define __UINT_LEAST32_MAX__ 4294967295U +-// LA64: #define __UINT_LEAST32_TYPE__ unsigned int +-// LA64: #define __UINT_LEAST64_FMTX__ "lX" +-// LA64: #define __UINT_LEAST64_FMTo__ "lo" +-// LA64: #define __UINT_LEAST64_FMTu__ "lu" +-// LA64: #define __UINT_LEAST64_FMTx__ "lx" +-// LA64: #define __UINT_LEAST64_MAX__ 18446744073709551615UL +-// LA64: #define __UINT_LEAST64_TYPE__ long unsigned int +-// LA64: #define __UINT_LEAST8_FMTX__ "hhX" +-// LA64: #define __UINT_LEAST8_FMTo__ "hho" +-// LA64: #define __UINT_LEAST8_FMTu__ "hhu" +-// LA64: #define __UINT_LEAST8_FMTx__ "hhx" +-// LA64: #define __UINT_LEAST8_MAX__ 255 +-// LA64: #define __UINT_LEAST8_TYPE__ unsigned char +-// LA64: #define __USER_LABEL_PREFIX__ +-// LA64: #define __WCHAR_MAX__ 2147483647 +-// LA64: #define __WCHAR_TYPE__ int +-// LA64: #define __WCHAR_WIDTH__ 32 +-// LA64: #define __WINT_MAX__ 4294967295U +-// LA64: #define __WINT_TYPE__ unsigned int +-// LA64: #define __WINT_UNSIGNED__ 1 +-// LA64: #define __WINT_WIDTH__ 32 +-// LA64-LINUX: #define __gnu_linux__ 1 +-// LA64-LINUX: #define __linux 1 +-// LA64-LINUX: #define __linux__ 1 +-// LA64: #define __loongarch64 1 +-// LA64: #define __loongarch__ 1 +-// LA64-LINUX: #define __unix 1 +-// LA64-LINUX: #define __unix__ 1 +-// LA64-LINUX: #define linux 1 +-// LA64-LINUX: #define unix 1 +- +- +-/// Check __loongarch_{double,single,hard,soft}_float, __loongarch_{gr,fr}len, __loongarch_lp64. +- +-// RUN: %clang --target=loongarch32 -mfpu=64 -mabi=ilp32d -x c -E -dM %s -o - \ +-// RUN: | FileCheck --match-full-lines --check-prefix=LA32-FPU64-ILP32D %s +-// RUN: %clang --target=loongarch32 -mdouble-float -x c -E -dM %s -o - \ +-// RUN: | FileCheck --match-full-lines --check-prefix=LA32-FPU64-ILP32D %s +-// LA32-FPU64-ILP32D: #define __loongarch_double_float 1 +-// LA32-FPU64-ILP32D: #define __loongarch_frlen 64 +-// LA32-FPU64-ILP32D: #define __loongarch_grlen 32 +-// LA32-FPU64-ILP32D: #define __loongarch_hard_float 1 +-// LA32-FPU64-ILP32D-NOT: #define __loongarch_lp64 +-// LA32-FPU64-ILP32D-NOT: #define __loongarch_single_float +-// LA32-FPU64-ILP32D-NOT: #define __loongarch_soft_float +- +-// RUN: %clang --target=loongarch32 -mfpu=64 -mabi=ilp32f -x c -E -dM %s -o - \ +-// RUN: | FileCheck --match-full-lines --check-prefix=LA32-FPU64-ILP32F %s +-// LA32-FPU64-ILP32F-NOT: #define __loongarch_double_float +-// LA32-FPU64-ILP32F: #define __loongarch_frlen 64 +-// LA32-FPU64-ILP32F: #define __loongarch_grlen 32 +-// LA32-FPU64-ILP32F: #define __loongarch_hard_float 1 +-// LA32-FPU64-ILP32F-NOT: #define __loongarch_lp64 +-// LA32-FPU64-ILP32F: #define __loongarch_single_float 1 +-// LA32-FPU64-ILP32F-NOT: #define __loongarch_soft_float +- +-// RUN: %clang --target=loongarch32 -mfpu=64 -mabi=ilp32s -x c -E -dM %s -o - \ +-// RUN: | FileCheck --match-full-lines --check-prefix=LA32-FPU64-ILP32S %s +-// LA32-FPU64-ILP32S-NOT: #define __loongarch_double_float +-// LA32-FPU64-ILP32S: #define __loongarch_frlen 64 +-// LA32-FPU64-ILP32S: #define __loongarch_grlen 32 +-// LA32-FPU64-ILP32S-NOT: #define __loongarch_hard_float +-// LA32-FPU64-ILP32S-NOT: #define __loongarch_lp64 +-// LA32-FPU64-ILP32S-NOT: #define __loongarch_single_float +-// LA32-FPU64-ILP32S: #define __loongarch_soft_float 1 +- +-// RUN: %clang --target=loongarch32 -mfpu=32 -mabi=ilp32f -x c -E -dM %s -o - \ +-// RUN: | FileCheck --match-full-lines --check-prefix=LA32-FPU32-ILP32F %s +-// RUN: %clang --target=loongarch32 -msingle-float -x c -E -dM %s -o - \ +-// RUN: | FileCheck --match-full-lines --check-prefix=LA32-FPU32-ILP32F %s +-// LA32-FPU32-ILP32F-NOT: #define __loongarch_double_float +-// LA32-FPU32-ILP32F: #define __loongarch_frlen 32 +-// LA32-FPU32-ILP32F: #define __loongarch_grlen 32 +-// LA32-FPU32-ILP32F: #define __loongarch_hard_float 1 +-// LA32-FPU32-ILP32F-NOT: #define __loongarch_lp64 +-// LA32-FPU32-ILP32F: #define __loongarch_single_float 1 +-// LA32-FPU32-ILP32F-NOT: #define __loongarch_soft_float +- +-// RUN: %clang --target=loongarch32 -mfpu=32 -mabi=ilp32s -x c -E -dM %s -o - \ +-// RUN: | FileCheck --match-full-lines --check-prefix=LA32-FPU32-ILP32S %s +-// LA32-FPU32-ILP32S-NOT: #define __loongarch_double_float +-// LA32-FPU32-ILP32S: #define __loongarch_frlen 32 +-// LA32-FPU32-ILP32S: #define __loongarch_grlen 32 +-// LA32-FPU32-ILP32S-NOT: #define __loongarch_hard_float +-// LA32-FPU32-ILP32S-NOT: #define __loongarch_lp64 +-// LA32-FPU32-ILP32S-NOT: #define __loongarch_single_float +-// LA32-FPU32-ILP32S: #define __loongarch_soft_float 1 +- +-// RUN: %clang --target=loongarch32 -mfpu=0 -mabi=ilp32s -x c -E -dM %s -o - \ +-// RUN: | FileCheck --match-full-lines --check-prefix=LA32-FPU0-ILP32S %s +-// RUN: %clang --target=loongarch32 -mfpu=none -mabi=ilp32s -x c -E -dM %s -o - \ +-// RUN: | FileCheck --match-full-lines --check-prefix=LA32-FPU0-ILP32S %s +-// RUN: %clang --target=loongarch32 -msoft-float -x c -E -dM %s -o - \ +-// RUN: | FileCheck --match-full-lines --check-prefix=LA32-FPU0-ILP32S %s +-// LA32-FPU0-ILP32S-NOT: #define __loongarch_double_float +-// LA32-FPU0-ILP32S: #define __loongarch_frlen 0 +-// LA32-FPU0-ILP32S: #define __loongarch_grlen 32 +-// LA32-FPU0-ILP32S-NOT: #define __loongarch_hard_float +-// LA32-FPU0-ILP32S-NOT: #define __loongarch_lp64 +-// LA32-FPU0-ILP32S-NOT: #define __loongarch_single_float +-// LA32-FPU0-ILP32S: #define __loongarch_soft_float 1 +- +-// RUN: %clang --target=loongarch64 -mfpu=64 -mabi=lp64d -x c -E -dM %s -o - \ +-// RUN: | FileCheck --match-full-lines --check-prefix=LA64-FPU64-LP64D %s +-// RUN: %clang --target=loongarch64 -mdouble-float -x c -E -dM %s -o - \ +-// RUN: | FileCheck --match-full-lines --check-prefix=LA64-FPU64-LP64D %s +-// LA64-FPU64-LP64D: #define __loongarch_double_float 1 +-// LA64-FPU64-LP64D: #define __loongarch_frlen 64 +-// LA64-FPU64-LP64D: #define __loongarch_grlen 64 +-// LA64-FPU64-LP64D: #define __loongarch_hard_float 1 +-// LA64-FPU64-LP64D: #define __loongarch_lp64 1 +-// LA64-FPU64-LP64D-NOT: #define __loongarch_single_float +-// LA64-FPU64-LP64D-NOT: #define __loongarch_soft_float +- +-// RUN: %clang --target=loongarch64 -mfpu=64 -mabi=lp64f -x c -E -dM %s -o - \ +-// RUN: | FileCheck --match-full-lines --check-prefix=LA64-FPU64-LP64F %s +-// LA64-FPU64-LP64F-NOT: #define __loongarch_double_float +-// LA64-FPU64-LP64F: #define __loongarch_frlen 64 +-// LA64-FPU64-LP64F: #define __loongarch_grlen 64 +-// LA64-FPU64-LP64F: #define __loongarch_hard_float 1 +-// LA64-FPU64-LP64F: #define __loongarch_lp64 1 +-// LA64-FPU64-LP64F: #define __loongarch_single_float 1 +-// LA64-FPU64-LP64F-NOT: #define __loongarch_soft_float +- +-// RUN: %clang --target=loongarch64 -mfpu=64 -mabi=lp64s -x c -E -dM %s -o - \ +-// RUN: | FileCheck --match-full-lines --check-prefix=LA64-FPU64-LP64S %s +-// LA64-FPU64-LP64S-NOT: #define __loongarch_double_float +-// LA64-FPU64-LP64S: #define __loongarch_frlen 64 +-// LA64-FPU64-LP64S: #define __loongarch_grlen 64 +-// LA64-FPU64-LP64S-NOT: #define __loongarch_hard_float +-// LA64-FPU64-LP64S: #define __loongarch_lp64 1 +-// LA64-FPU64-LP64S-NOT: #define __loongarch_single_float +-// LA64-FPU64-LP64S: #define __loongarch_soft_float 1 +- +-// RUN: %clang --target=loongarch64 -mfpu=32 -mabi=lp64f -x c -E -dM %s -o - \ +-// RUN: | FileCheck --match-full-lines --check-prefix=LA64-FPU32-LP64F %s +-// RUN: %clang --target=loongarch64 -msingle-float -x c -E -dM %s -o - \ +-// RUN: | FileCheck --match-full-lines --check-prefix=LA64-FPU32-LP64F %s +-// LA64-FPU32-LP64F-NOT: #define __loongarch_double_float +-// LA64-FPU32-LP64F: #define __loongarch_frlen 32 +-// LA64-FPU32-LP64F: #define __loongarch_grlen 64 +-// LA64-FPU32-LP64F: #define __loongarch_hard_float 1 +-// LA64-FPU32-LP64F: #define __loongarch_lp64 1 +-// LA64-FPU32-LP64F: #define __loongarch_single_float 1 +-// LA64-FPU32-LP64F-NOT: #define __loongarch_soft_float +- +-// RUN: %clang --target=loongarch64 -mfpu=32 -mabi=lp64s -x c -E -dM %s -o - \ +-// RUN: | FileCheck --match-full-lines --check-prefix=LA64-FPU32-LP64S %s +-// LA64-FPU32-LP64S-NOT: #define __loongarch_double_float +-// LA64-FPU32-LP64S: #define __loongarch_frlen 32 +-// LA64-FPU32-LP64S: #define __loongarch_grlen 64 +-// LA64-FPU32-LP64S-NOT: #define __loongarch_hard_float +-// LA64-FPU32-LP64S: #define __loongarch_lp64 1 +-// LA64-FPU32-LP64S-NOT: #define __loongarch_single_float +-// LA64-FPU32-LP64S: #define __loongarch_soft_float 1 +- +-// RUN: %clang --target=loongarch64 -mfpu=0 -mabi=lp64s -x c -E -dM %s -o - \ +-// RUN: | FileCheck --match-full-lines --check-prefix=LA64-FPU0-LP64S %s +-// RUN: %clang --target=loongarch64 -mfpu=none -mabi=lp64s -x c -E -dM %s -o - \ +-// RUN: | FileCheck --match-full-lines --check-prefix=LA64-FPU0-LP64S %s +-// RUN: %clang --target=loongarch64 -msoft-float -x c -E -dM %s -o - \ +-// RUN: | FileCheck --match-full-lines --check-prefix=LA64-FPU0-LP64S %s +-// LA64-FPU0-LP64S-NOT: #define __loongarch_double_float +-// LA64-FPU0-LP64S: #define __loongarch_frlen 0 +-// LA64-FPU0-LP64S: #define __loongarch_grlen 64 +-// LA64-FPU0-LP64S-NOT: #define __loongarch_hard_float +-// LA64-FPU0-LP64S: #define __loongarch_lp64 1 +-// LA64-FPU0-LP64S-NOT: #define __loongarch_single_float +-// LA64-FPU0-LP64S: #define __loongarch_soft_float 1 +- +-/// Check __loongarch_arch and __loongarch_tune. +- +-// RUN: %clang --target=loongarch64 -x c -E -dM %s -o - | \ +-// RUN: FileCheck --match-full-lines --check-prefix=ARCH-TUNE -DARCH=loongarch64 -DTUNE=loongarch64 %s +-// RUN: %clang --target=loongarch64 -x c -E -dM %s -o - -march=loongarch64 | \ +-// RUN: FileCheck --match-full-lines --check-prefix=ARCH-TUNE -DARCH=loongarch64 -DTUNE=loongarch64 %s +-// RUN: %clang --target=loongarch64 -x c -E -dM %s -o - -march=la464 | \ +-// RUN: FileCheck --match-full-lines --check-prefix=ARCH-TUNE -DARCH=la464 -DTUNE=la464 %s +-// RUN: %clang --target=loongarch64 -x c -E -dM %s -o - -mtune=loongarch64 | \ +-// RUN: FileCheck --match-full-lines --check-prefix=ARCH-TUNE -DARCH=loongarch64 -DTUNE=loongarch64 %s +-// RUN: %clang --target=loongarch64 -x c -E -dM %s -o - -mtune=la464 | \ +-// RUN: FileCheck --match-full-lines --check-prefix=ARCH-TUNE -DARCH=loongarch64 -DTUNE=la464 %s +-// RUN: %clang --target=loongarch64 -x c -E -dM %s -o - -march=loongarch64 -mtune=la464 | \ +-// RUN: FileCheck --match-full-lines --check-prefix=ARCH-TUNE -DARCH=loongarch64 -DTUNE=la464 %s +-// RUN: %clang --target=loongarch64 -x c -E -dM %s -o - -march=la464 -mtune=loongarch64 | \ +-// RUN: FileCheck --match-full-lines --check-prefix=ARCH-TUNE -DARCH=la464 -DTUNE=loongarch64 %s +- +-// ARCH-TUNE: #define __loongarch_arch "[[ARCH]]" +-// ARCH-TUNE: #define __loongarch_tune "[[TUNE]]" +- +-// RUN: %clang --target=loongarch64 -mlsx -x c -E -dM %s -o - \ +-// RUN: | FileCheck --match-full-lines --check-prefix=MLSX %s +-// RUN: %clang --target=loongarch64 -mno-lsx -mlsx -x c -E -dM %s -o - \ +-// RUN: | FileCheck --match-full-lines --check-prefix=MLSX %s +-// RUN: %clang --target=loongarch64 -mlsx -mno-lasx -x c -E -dM %s -o - \ +-// RUN: | FileCheck --match-full-lines --check-prefix=MLSX %s +-// RUN: %clang --target=loongarch64 -mno-lasx -mlsx -x c -E -dM %s -o - \ +-// RUN: | FileCheck --match-full-lines --check-prefix=MLSX %s +-// MLSX-NOT: #define __loongarch_asx +-// MLSX: #define __loongarch_sx 1 +- +-// RUN: %clang --target=loongarch64 -mlasx -x c -E -dM %s -o - \ +-// RUN: | FileCheck --match-full-lines --check-prefix=MLASX %s +-// RUN: %clang --target=loongarch64 -mno-lasx -mlasx -x c -E -dM %s -o - \ +-// RUN: | FileCheck --match-full-lines --check-prefix=MLASX %s +-// RUN: %clang --target=loongarch64 -mlsx -mlasx -x c -E -dM %s -o - \ +-// RUN: | FileCheck --match-full-lines --check-prefix=MLASX %s +-// RUN: %clang --target=loongarch64 -mlasx -mlsx -x c -E -dM %s -o - \ +-// RUN: | FileCheck --match-full-lines --check-prefix=MLASX %s +-// MLASX: #define __loongarch_asx 1 +-// MLASX: #define __loongarch_sx 1 +- +-// RUN: %clang --target=loongarch64 -mno-lsx -x c -E -dM %s -o - \ +-// RUN: | FileCheck --match-full-lines --check-prefix=MNO-LSX %s +-// RUN: %clang --target=loongarch64 -mlsx -mno-lsx -x c -E -dM %s -o - \ +-// RUN: | FileCheck --match-full-lines --check-prefix=MNO-LSX %s +-// RUN: %clang --target=loongarch64 -mno-lsx -mno-lasx -x c -E -dM %s -o - \ +-// RUN: | FileCheck --match-full-lines --check-prefix=MNO-LSX %s +-// RUN: %clang --target=loongarch64 -mno-lasx -mno-lsx -x c -E -dM %s -o - \ +-// RUN: | FileCheck --match-full-lines --check-prefix=MNO-LSX %s +-// RUN: %clang --target=loongarch64 -mno-lasx -x c -E -dM %s -o - \ +-// RUN: | FileCheck --match-full-lines --check-prefix=MNO-LSX %s +-// MNO-LSX-NOT: #define __loongarch_asx +-// MNO-LSX-NOT: #define __loongarch_sx ++// RUN: %clang --target=loongarch64 -x c -E -dM %s -o - | grep loongarch | FileCheck %s ++ ++// CHECK: #define __loongarch64 1 ++// CHECK-NEXT: #define __loongarch__ 1 ++// CHECK-NEXT: #define __loongarch_double_float 1 ++// CHECK-NEXT: #define __loongarch_fpr 64 ++// CHECK-NEXT: #define __loongarch_frlen 64 ++// CHECK-NEXT: #define __loongarch_grlen 64 ++// CHECK-NEXT: #define __loongarch_hard_float 1 ++// CHECK-NEXT: #define __loongarch_lp64 1 +diff --git a/clang/test/Preprocessor/predefined-arch-macros.c b/clang/test/Preprocessor/predefined-arch-macros.c +index 27c7b4a27..92b846ea9 100644 +--- a/clang/test/Preprocessor/predefined-arch-macros.c ++++ b/clang/test/Preprocessor/predefined-arch-macros.c +@@ -4384,20 +4384,3 @@ + // CHECK_WASM_ATOMICS: #define __GCC_HAVE_SYNC_COMPARE_AND_SWAP_2 1 + // CHECK_WASM_ATOMICS: #define __GCC_HAVE_SYNC_COMPARE_AND_SWAP_4 1 + // CHECK_WASM_ATOMICS: #define __GCC_HAVE_SYNC_COMPARE_AND_SWAP_8 1 +- +-// Begin LoongArch tests ---------------- +- +-// RUN: %clang -E -dM %s -o - 2>&1 \ +-// RUN: --target=loongarch32-unknown-linux-gnu \ +-// RUN: | FileCheck -match-full-lines %s -check-prefix=CHECK_LA32_ATOMICS +-// CHECK_LA32_ATOMICS: #define __GCC_HAVE_SYNC_COMPARE_AND_SWAP_1 1 +-// CHECK_LA32_ATOMICS: #define __GCC_HAVE_SYNC_COMPARE_AND_SWAP_2 1 +-// CHECK_LA32_ATOMICS: #define __GCC_HAVE_SYNC_COMPARE_AND_SWAP_4 1 +- +-// RUN: %clang -E -dM %s -o - 2>&1 \ +-// RUN: --target=loongarch64-unknown-linux-gnu \ +-// RUN: | FileCheck -match-full-lines %s -check-prefix=CHECK_LA64_ATOMICS +-// CHECK_LA64_ATOMICS: #define __GCC_HAVE_SYNC_COMPARE_AND_SWAP_1 1 +-// CHECK_LA64_ATOMICS: #define __GCC_HAVE_SYNC_COMPARE_AND_SWAP_2 1 +-// CHECK_LA64_ATOMICS: #define __GCC_HAVE_SYNC_COMPARE_AND_SWAP_4 1 +-// CHECK_LA64_ATOMICS: #define __GCC_HAVE_SYNC_COMPARE_AND_SWAP_8 1 +diff --git a/clang/test/Preprocessor/predefined-macros-no-warnings.c b/clang/test/Preprocessor/predefined-macros-no-warnings.c +index e0617f8de..84e23f8ce 100644 +--- a/clang/test/Preprocessor/predefined-macros-no-warnings.c ++++ b/clang/test/Preprocessor/predefined-macros-no-warnings.c +@@ -185,7 +185,3 @@ + // RUN: %clang_cc1 %s -Eonly -Wsystem-headers -Werror -triple ve + // RUN: %clang_cc1 %s -Eonly -Wsystem-headers -Werror -triple csky + // RUN: %clang_cc1 %s -Eonly -Wsystem-headers -Werror -triple csky-linux +-// RUN: %clang_cc1 %s -Eonly -Wsystem-headers -Werror -triple loongarch32 +-// RUN: %clang_cc1 %s -Eonly -Wsystem-headers -Werror -triple loongarch32-linux +-// RUN: %clang_cc1 %s -Eonly -Wsystem-headers -Werror -triple loongarch64 +-// RUN: %clang_cc1 %s -Eonly -Wsystem-headers -Werror -triple loongarch64-linux +diff --git a/clang/test/Sema/attr-model.cpp b/clang/test/Sema/attr-model.cpp +deleted file mode 100644 +index 898cc0393..000000000 +--- a/clang/test/Sema/attr-model.cpp ++++ /dev/null +@@ -1,64 +0,0 @@ +-// RUN: %clang_cc1 -triple aarch64 -verify=expected,aarch64 -fsyntax-only %s +-// RUN: %clang_cc1 -triple loongarch64 -verify=expected,loongarch64 -fsyntax-only %s +-// RUN: %clang_cc1 -triple mips64 -verify=expected,mips64 -fsyntax-only %s +-// RUN: %clang_cc1 -triple powerpc64 -verify=expected,powerpc64 -fsyntax-only %s +-// RUN: %clang_cc1 -triple riscv64 -verify=expected,riscv64 -fsyntax-only %s +-// RUN: %clang_cc1 -triple x86_64 -verify=expected,x86_64 -fsyntax-only %s +- +-#if defined(__loongarch__) && !__has_attribute(model) +-#error "Should support model attribute" +-#endif +- +-int a __attribute((model("tiny"))); // aarch64-warning {{unknown attribute 'model' ignored}} \ +- // loongarch64-error {{code model 'tiny' is not supported on this target}} \ +- // mips64-warning {{unknown attribute 'model' ignored}} \ +- // powerpc64-warning {{unknown attribute 'model' ignored}} \ +- // riscv64-warning {{unknown attribute 'model' ignored}} \ +- // x86_64-warning {{unknown attribute 'model' ignored}} +-int b __attribute((model("small"))); // aarch64-warning {{unknown attribute 'model' ignored}} \ +- // loongarch64-error {{code model 'small' is not supported on this target}} \ +- // mips64-warning {{unknown attribute 'model' ignored}} \ +- // powerpc64-warning {{unknown attribute 'model' ignored}} \ +- // riscv64-warning {{unknown attribute 'model' ignored}} \ +- // x86_64-warning {{unknown attribute 'model' ignored}} +-int c __attribute((model("normal"))); // aarch64-warning {{unknown attribute 'model' ignored}} \ +- // mips64-warning {{unknown attribute 'model' ignored}} \ +- // powerpc64-warning {{unknown attribute 'model' ignored}} \ +- // riscv64-warning {{unknown attribute 'model' ignored}} \ +- // x86_64-warning {{unknown attribute 'model' ignored}} +-int d __attribute((model("kernel"))); // aarch64-warning {{unknown attribute 'model' ignored}} \ +- // loongarch64-error {{code model 'kernel' is not supported on this target}} \ +- // mips64-warning {{unknown attribute 'model' ignored}} \ +- // powerpc64-warning {{unknown attribute 'model' ignored}} \ +- // riscv64-warning {{unknown attribute 'model' ignored}} \ +- // x86_64-warning {{unknown attribute 'model' ignored}} +-int e __attribute((model("medium"))); // aarch64-warning {{unknown attribute 'model' ignored}} \ +- // mips64-warning {{unknown attribute 'model' ignored}} \ +- // powerpc64-warning {{unknown attribute 'model' ignored}} \ +- // riscv64-warning {{unknown attribute 'model' ignored}} \ +- // x86_64-warning {{unknown attribute 'model' ignored}} +-int f __attribute((model("large"))); // aarch64-warning {{unknown attribute 'model' ignored}} \ +- // loongarch64-error {{code model 'large' is not supported on this target}} \ +- // mips64-warning {{unknown attribute 'model' ignored}} \ +- // powerpc64-warning {{unknown attribute 'model' ignored}} \ +- // riscv64-warning {{unknown attribute 'model' ignored}} \ +- // x86_64-warning {{unknown attribute 'model' ignored}} +-int g __attribute((model("extreme"))); // aarch64-warning {{unknown attribute 'model' ignored}} \ +- // mips64-warning {{unknown attribute 'model' ignored}} \ +- // powerpc64-warning {{unknown attribute 'model' ignored}} \ +- // riscv64-warning {{unknown attribute 'model' ignored}} \ +- // x86_64-warning {{unknown attribute 'model' ignored}} +- +-void __attribute((model("extreme"))) h() {} // aarch64-warning {{unknown attribute 'model' ignored}} \ +- // loongarch64-error {{'model' attribute only applies to non-TLS global variables}} \ +- // mips64-warning {{unknown attribute 'model' ignored}} \ +- // powerpc64-warning {{unknown attribute 'model' ignored}} \ +- // riscv64-warning {{unknown attribute 'model' ignored}} \ +- // x86_64-warning {{unknown attribute 'model' ignored}} +- +-thread_local int i __attribute((model("extreme"))); // aarch64-warning {{unknown attribute 'model' ignored}} \ +- // loongarch64-error {{'model' attribute only applies to non-TLS global variables}} \ +- // mips64-warning {{unknown attribute 'model' ignored}} \ +- // powerpc64-warning {{unknown attribute 'model' ignored}} \ +- // riscv64-warning {{unknown attribute 'model' ignored}} \ +- // x86_64-warning {{unknown attribute 'model' ignored}} +diff --git a/clang/test/Sema/patchable-function-entry-attr.cpp b/clang/test/Sema/patchable-function-entry-attr.cpp +index 9134c851d..3dd050498 100644 +--- a/clang/test/Sema/patchable-function-entry-attr.cpp ++++ b/clang/test/Sema/patchable-function-entry-attr.cpp +@@ -2,8 +2,6 @@ + // RUN: %clang_cc1 -triple aarch64_be -fsyntax-only -verify=silence %s + // RUN: %clang_cc1 -triple i386 -fsyntax-only -verify=silence %s + // RUN: %clang_cc1 -triple x86_64 -fsyntax-only -verify=silence %s +-// RUN: %clang_cc1 -triple loongarch32 -fsyntax-only -verify=silence %s +-// RUN: %clang_cc1 -triple loongarch64 -fsyntax-only -verify=silence %s + // RUN: %clang_cc1 -triple riscv32 -fsyntax-only -verify=silence %s + // RUN: %clang_cc1 -triple riscv64 -fsyntax-only -verify=silence %s + // RUN: %clang_cc1 -triple ppc64le -fsyntax-only -verify %s +diff --git a/clang/unittests/Interpreter/ExceptionTests/InterpreterExceptionTest.cpp b/clang/unittests/Interpreter/ExceptionTests/InterpreterExceptionTest.cpp +index 2f1c4efb3..dc6bb50f8 100644 +--- a/clang/unittests/Interpreter/ExceptionTests/InterpreterExceptionTest.cpp ++++ b/clang/unittests/Interpreter/ExceptionTests/InterpreterExceptionTest.cpp +@@ -113,6 +113,10 @@ extern "C" int throw_exception() { + if (Triple.isOSAIX()) + GTEST_SKIP(); + ++ // FIXME: LoongArch64 reports: terminate called after throwing an instance of 'custom_exception' ++ if (Triple.isLoongArch64()) ++ GTEST_SKIP(); ++ + // FIXME: ARM fails due to `Not implemented relocation type!` + if (Triple.isARM()) + GTEST_SKIP(); diff --git a/clang.spec b/clang.spec index 67d11d35e318a61c4f94f1933affbc6dbc9b6cef..9586164ec17048a10d66167284b1aa9b2ee0d22b 100644 --- a/clang.spec +++ b/clang.spec @@ -1,3 +1,4 @@ +%define anolis_release .0.1 %bcond_with snapshot_build %if %{with snapshot_build} @@ -80,7 +81,7 @@ Name: %pkg_name Version: %{clang_version}%{?rc_ver:~rc%{rc_ver}}%{?llvm_snapshot_version_suffix:~%{llvm_snapshot_version_suffix}} -Release: 1%{?dist} +Release: 1%{anolis_release}%{?dist} Summary: A C language family front-end for LLVM License: NCSA @@ -110,6 +111,7 @@ Patch2: 0003-PATCH-clang-Don-t-install-static-libraries.patch # Workaround a bug in ORC on ppc64le. # More info is available here: https://reviews.llvm.org/D159115#4641826 Patch5: 0001-Workaround-a-bug-in-ORC-on-ppc64le.patch +Patch6: 0006-Support-LoongArch.patch # RHEL specific patches # Avoid unwanted dependency on python-myst-parser @@ -220,8 +222,10 @@ libomp-devel to enable -fopenmp. %package libs Summary: Runtime library for clang Requires: %{name}-resource-filesystem = %{version} +%ifnarch loongarch64 # RHEL specific: Use libstdc++ from gcc13 by default. rhbz#2178804 Requires: gcc-toolset-13-gcc-c++ +%endif Recommends: compiler-rt%{?_isa} = %{version} # libomp-devel is required, so clang can find the omp.h header when compiling # with -fopenmp. @@ -355,7 +359,7 @@ find -name '*.md' | while read md; do sed -r -e 's/^( )*\* /\n\1\* /' ${md} | pa %global _lto_cflags %nil %endif -%ifarch s390 s390x aarch64 %ix86 ppc64le +%ifarch s390 s390x aarch64 %ix86 ppc64le loongarch64 # Decrease debuginfo verbosity to reduce memory consumption during final library linking %global optflags %(echo %{optflags} | sed 's/-g /-g1 /') %endif @@ -373,8 +377,13 @@ find -name '*.md' | while read md; do sed -r -e 's/^( )*\* /\n\1\* /' ${md} | pa %set_build_flags +%ifnarch loongarch64 CXXFLAGS="$CXXFLAGS -Wno-address -Wno-nonnull -Wno-maybe-uninitialized" CFLAGS="$CFLAGS -Wno-address -Wno-nonnull -Wno-maybe-uninitialized" +%else +CXXFLAGS="$CXXFLAGS -Wno-address -Wno-nonnull" +CFLAGS="$CFLAGS -Wno-address -Wno-nonnull" +%endif # We set CLANG_DEFAULT_PIE_ON_LINUX=OFF and PPC_LINUX_DEFAULT_IEEELONGDOUBLE=ON to match the # defaults used by Fedora's GCC. @@ -382,13 +391,17 @@ CFLAGS="$CFLAGS -Wno-address -Wno-nonnull -Wno-maybe-uninitialized" -DCLANG_DEFAULT_PIE_ON_LINUX=OFF \ %if 0%{?fedora} || 0%{?rhel} > 9 -DPPC_LINUX_DEFAULT_IEEELONGDOUBLE=ON \ +%endif +%ifarch loongarch64 + -DCMAKE_C_COMPILER=clang \ + -DCMAKE_CXX_COMPILER=clang++ \ %endif -DLLVM_PARALLEL_LINK_JOBS=1 \ -DLLVM_LINK_LLVM_DYLIB:BOOL=ON \ -DCMAKE_BUILD_TYPE=RelWithDebInfo \ -DPYTHON_EXECUTABLE=%{__python3} \ -DCMAKE_SKIP_RPATH:BOOL=ON \ -%ifarch s390 s390x %ix86 ppc64le aarch64 +%ifarch s390 s390x %ix86 ppc64le aarch64 loongarch64 -DCMAKE_C_FLAGS_RELWITHDEBINFO="%{optflags} -DNDEBUG" \ -DCMAKE_CXX_FLAGS_RELWITHDEBINFO="%{optflags} -DNDEBUG" \ %endif @@ -435,7 +448,9 @@ CFLAGS="$CFLAGS -Wno-address -Wno-nonnull -Wno-maybe-uninitialized" -DCLANG_BUILD_EXAMPLES:BOOL=OFF \ -DBUILD_SHARED_LIBS=OFF \ -DCLANG_REPOSITORY_STRING="%{?fedora:Fedora}%{?rhel:Red Hat} %{version}-%{release}" \ +%ifnarch loongarch64 -DGCC_INSTALL_PREFIX=/opt/rh/gcc-toolset-13/root/usr \ +%endif -DCLANG_RESOURCE_DIR=../lib/clang/%{maj_ver} \ -DCLANG_CONFIG_FILE_SYSTEM_DIR=%{_sysconfdir}/%{name}/ \ %ifarch %{arm} @@ -742,6 +757,9 @@ SOURCE_DATE_EPOCH=1629181597 LD_LIBRARY_PATH=%{buildroot}/%{install_libdir} %{__ %endif %changelog +* Fri Nov 29 2024 Chen Li - 18.1.8-1.0.1 +- Add support for LoongArch + * Tue Jul 09 2024 Tom Stellard - 18.1.8-1 - 18.1.8 Release