// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +xop -emit-llvm -o - -Wall -Werror | FileCheck %s
// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +xop -fno-signed-char -emit-llvm -o - -Wall -Werror | FileCheck %s
// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=i386-apple-darwin -target-feature +xop -emit-llvm -o - -Wall -Werror | FileCheck %s
// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=i386-apple-darwin -target-feature +xop -fno-signed-char -emit-llvm -o - -Wall -Werror | FileCheck %s
// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +xop -emit-llvm -o - -Wall -Werror | FileCheck %s
// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +xop -fno-signed-char -emit-llvm -o - -Wall -Werror | FileCheck %s
// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=i386-apple-darwin -target-feature +xop -emit-llvm -o - -Wall -Werror | FileCheck %s
// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=i386-apple-darwin -target-feature +xop -fno-signed-char -emit-llvm -o - -Wall -Werror | FileCheck %s

// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +xop -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s
// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +xop -fno-signed-char -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s
// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=i386-apple-darwin -target-feature +xop -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s
// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=i386-apple-darwin -target-feature +xop -fno-signed-char -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s
// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +xop -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s
// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +xop -fno-signed-char -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s
// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=i386-apple-darwin -target-feature +xop -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s
// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=i386-apple-darwin -target-feature +xop -fno-signed-char -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s


#include <x86intrin.h>
#include "builtin_test_helpers.h"

// NOTE: This should match the tests in llvm/test/CodeGen/X86/xop-intrinsics-fast-isel.ll

__m128i test_mm_maccs_epi16(__m128i a, __m128i b, __m128i c) {
  // CHECK-LABEL: test_mm_maccs_epi16
  // CHECK: call <8 x i16> @llvm.x86.xop.vpmacssww(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}})
  return _mm_maccs_epi16(a, b, c);
}

__m128i test_mm_macc_epi16(__m128i a, __m128i b, __m128i c) {
  // CHECK-LABEL: test_mm_macc_epi16
  // CHECK: call <8 x i16> @llvm.x86.xop.vpmacsww(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}})
  return _mm_macc_epi16(a, b, c);
}

__m128i test_mm_maccsd_epi16(__m128i a, __m128i b, __m128i c) {
  // CHECK-LABEL: test_mm_maccsd_epi16
  // CHECK: call <4 x i32> @llvm.x86.xop.vpmacsswd(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}, <4 x i32> %{{.*}})
  return _mm_maccsd_epi16(a, b, c);
}

__m128i test_mm_maccd_epi16(__m128i a, __m128i b, __m128i c) {
  // CHECK-LABEL: test_mm_maccd_epi16
  // CHECK: call <4 x i32> @llvm.x86.xop.vpmacswd(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}, <4 x i32> %{{.*}})
  return _mm_maccd_epi16(a, b, c);
}

__m128i test_mm_maccs_epi32(__m128i a, __m128i b, __m128i c) {
  // CHECK-LABEL: test_mm_maccs_epi32
  // CHECK: call <4 x i32> @llvm.x86.xop.vpmacssdd(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}})
  return _mm_maccs_epi32(a, b, c);
}

__m128i test_mm_macc_epi32(__m128i a, __m128i b, __m128i c) {
  // CHECK-LABEL: test_mm_macc_epi32
  // CHECK: call <4 x i32> @llvm.x86.xop.vpmacsdd(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}})
  return _mm_macc_epi32(a, b, c);
}

__m128i test_mm_maccslo_epi32(__m128i a, __m128i b, __m128i c) {
  // CHECK-LABEL: test_mm_maccslo_epi32
  // CHECK: call {{.*}}<2 x i64> @llvm.x86.xop.vpmacssdql(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <2 x i64> %{{.*}})
  return _mm_maccslo_epi32(a, b, c);
}

__m128i test_mm_macclo_epi32(__m128i a, __m128i b, __m128i c) {
  // CHECK-LABEL: test_mm_macclo_epi32
  // CHECK: call {{.*}}<2 x i64> @llvm.x86.xop.vpmacsdql(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <2 x i64> %{{.*}})
  return _mm_macclo_epi32(a, b, c);
}

__m128i test_mm_maccshi_epi32(__m128i a, __m128i b, __m128i c) {
  // CHECK-LABEL: test_mm_maccshi_epi32
  // CHECK: call {{.*}}<2 x i64> @llvm.x86.xop.vpmacssdqh(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <2 x i64> %{{.*}})
  return _mm_maccshi_epi32(a, b, c);
}

__m128i test_mm_macchi_epi32(__m128i a, __m128i b, __m128i c) {
  // CHECK-LABEL: test_mm_macchi_epi32
  // CHECK: call {{.*}}<2 x i64> @llvm.x86.xop.vpmacsdqh(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <2 x i64> %{{.*}})
  return _mm_macchi_epi32(a, b, c);
}

__m128i test_mm_maddsd_epi16(__m128i a, __m128i b, __m128i c) {
  // CHECK-LABEL: test_mm_maddsd_epi16
  // CHECK: call <4 x i32> @llvm.x86.xop.vpmadcsswd(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}, <4 x i32> %{{.*}})
  return _mm_maddsd_epi16(a, b, c);
}

__m128i test_mm_maddd_epi16(__m128i a, __m128i b, __m128i c) {
  // CHECK-LABEL: test_mm_maddd_epi16
  // CHECK: call <4 x i32> @llvm.x86.xop.vpmadcswd(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}, <4 x i32> %{{.*}})
  return _mm_maddd_epi16(a, b, c);
}

__m128i test_mm_haddw_epi8(__m128i a) {
  // CHECK-LABEL: test_mm_haddw_epi8
  // CHECK: call <8 x i16> @llvm.x86.xop.vphaddbw(<16 x i8> %{{.*}})
  return _mm_haddw_epi8(a);
}

__m128i test_mm_haddd_epi8(__m128i a) {
  // CHECK-LABEL: test_mm_haddd_epi8
  // CHECK: call <4 x i32> @llvm.x86.xop.vphaddbd(<16 x i8> %{{.*}})
  return _mm_haddd_epi8(a);
}

__m128i test_mm_haddq_epi8(__m128i a) {
  // CHECK-LABEL: test_mm_haddq_epi8
  // CHECK: call {{.*}}<2 x i64> @llvm.x86.xop.vphaddbq(<16 x i8> %{{.*}})
  return _mm_haddq_epi8(a);
}

__m128i test_mm_haddd_epi16(__m128i a) {
  // CHECK-LABEL: test_mm_haddd_epi16
  // CHECK: call <4 x i32> @llvm.x86.xop.vphaddwd(<8 x i16> %{{.*}})
  return _mm_haddd_epi16(a);
}

__m128i test_mm_haddq_epi16(__m128i a) {
  // CHECK-LABEL: test_mm_haddq_epi16
  // CHECK: call {{.*}}<2 x i64> @llvm.x86.xop.vphaddwq(<8 x i16> %{{.*}})
  return _mm_haddq_epi16(a);
}

__m128i test_mm_haddq_epi32(__m128i a) {
  // CHECK-LABEL: test_mm_haddq_epi32
  // CHECK: call {{.*}}<2 x i64> @llvm.x86.xop.vphadddq(<4 x i32> %{{.*}})
  return _mm_haddq_epi32(a);
}

__m128i test_mm_haddw_epu8(__m128i a) {
  // CHECK-LABEL: test_mm_haddw_epu8
  // CHECK: call <8 x i16> @llvm.x86.xop.vphaddubw(<16 x i8> %{{.*}})
  return _mm_haddw_epu8(a);
}

__m128i test_mm_haddd_epu8(__m128i a) {
  // CHECK-LABEL: test_mm_haddd_epu8
  // CHECK: call <4 x i32> @llvm.x86.xop.vphaddubd(<16 x i8> %{{.*}})
  return _mm_haddd_epu8(a);
}

__m128i test_mm_haddq_epu8(__m128i a) {
  // CHECK-LABEL: test_mm_haddq_epu8
  // CHECK: call {{.*}}<2 x i64> @llvm.x86.xop.vphaddubq(<16 x i8> %{{.*}})
  return _mm_haddq_epu8(a);
}

__m128i test_mm_haddd_epu16(__m128i a) {
  // CHECK-LABEL: test_mm_haddd_epu16
  // CHECK: call <4 x i32> @llvm.x86.xop.vphadduwd(<8 x i16> %{{.*}})
  return _mm_haddd_epu16(a);
}

__m128i test_mm_haddq_epu16(__m128i a) {
  // CHECK-LABEL: test_mm_haddq_epu16
  // CHECK: call {{.*}}<2 x i64> @llvm.x86.xop.vphadduwq(<8 x i16> %{{.*}})
  return _mm_haddq_epu16(a);
}

__m128i test_mm_haddq_epu32(__m128i a) {
  // CHECK-LABEL: test_mm_haddq_epu32
  // CHECK: call {{.*}}<2 x i64> @llvm.x86.xop.vphaddudq(<4 x i32> %{{.*}})
  return _mm_haddq_epu32(a);
}

__m128i test_mm_hsubw_epi8(__m128i a) {
  // CHECK-LABEL: test_mm_hsubw_epi8
  // CHECK: call <8 x i16> @llvm.x86.xop.vphsubbw(<16 x i8> %{{.*}})
  return _mm_hsubw_epi8(a);
}

__m128i test_mm_hsubd_epi16(__m128i a) {
  // CHECK-LABEL: test_mm_hsubd_epi16
  // CHECK: call <4 x i32> @llvm.x86.xop.vphsubwd(<8 x i16> %{{.*}})
  return _mm_hsubd_epi16(a);
}

__m128i test_mm_hsubq_epi32(__m128i a) {
  // CHECK-LABEL: test_mm_hsubq_epi32
  // CHECK: call {{.*}}<2 x i64> @llvm.x86.xop.vphsubdq(<4 x i32> %{{.*}})
  return _mm_hsubq_epi32(a);
}

__m128i test_mm_cmov_si128(__m128i a, __m128i b, __m128i c) {
  // CHECK-LABEL: test_mm_cmov_si128
  // CHECK: [[AND:%.*]] = and <2 x i64> %{{.*}}, %{{.*}}
  // CHECK: [[NEG:%.*]] = xor <2 x i64> %{{.*}}, splat (i64 -1)
  // CHECK-NEXT: [[ANDN:%.*]] = and <2 x i64> %{{.*}}, [[NEG]]
  // CHECK-NEXT: %{{.*}} = or <2 x i64> [[AND]], [[ANDN]]
  return _mm_cmov_si128(a, b, c);
}
TEST_CONSTEXPR(match_v4si(_mm_cmov_si128((__m128i)(__v4si){+1,+2,+3,+4}, (__m128i)(__v4si){-4,-3,-2,-1}, (__m128i)(__v4si){-1,0,0,-1}), +1, -3, -2, +4));

__m256i test_mm256_cmov_si256(__m256i a, __m256i b, __m256i c) {
  // CHECK-LABEL: test_mm256_cmov_si256
  // CHECK: [[AND:%.*]] = and <4 x i64> %{{.*}}, %{{.*}}
  // CHECK: [[NEG:%.*]] = xor <4 x i64> %{{.*}}, splat (i64 -1)
  // CHECK-NEXT: [[ANDN:%.*]] = and <4 x i64> %{{.*}}, [[NEG]]
  // CHECK-NEXT: %{{.*}} = or <4 x i64> [[AND]], [[ANDN]]
  return _mm256_cmov_si256(a, b, c);
}
TEST_CONSTEXPR(match_v4di(_mm256_cmov_si256((__m256i)(__v4di){+1,+2,+3,+4}, (__m256i)(__v4di){-4,-3,-2,-1}, (__m256i)(__v4di){0,-1,0,-1}), -4, +2, -2, +4));

__m128i test_mm_perm_epi8(__m128i a, __m128i b, __m128i c) {
  // CHECK-LABEL: test_mm_perm_epi8
  // CHECK: call <16 x i8> @llvm.x86.xop.vpperm(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}})
  return _mm_perm_epi8(a, b, c);
}

__m128i test_mm_rot_epi8(__m128i a, __m128i b) {
  // CHECK-LABEL: test_mm_rot_epi8
  // CHECK: call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}})
  return _mm_rot_epi8(a, b);
}
TEST_CONSTEXPR(match_v16qi(_mm_rot_epi8((__m128i)(__v16qs){15, -14, -13, -12, 11, 10, 9, 8, 7, 6, 5, -4, 3, -2, 1, 0}, (__m128i)(__v16qs){0, 1, -2, 3, -4, 5, -6, 7, -8, 9, -10, 11, -12, 13, -14, 15}), 15, -27, -4, -89, -80, 65, 36, 4, 7, 12, 65, -25, 48, -33, 4, 0));

__m128i test_mm_rot_epi16(__m128i a, __m128i b) {
  // CHECK-LABEL: test_mm_rot_epi16
  // CHECK: call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}})
  return _mm_rot_epi16(a, b);
}
TEST_CONSTEXPR(match_v8hi(_mm_rot_epi16((__m128i)(__v8hi){7, 6, 5, -4, 3, -2, 1, 0}, (__m128i)(__v8hi){0, 1, -2, 3, -4, 5, -6, 7}), 7, 12, 16385, -25, 12288, -33, 1024, 0));

__m128i test_mm_rot_epi32(__m128i a, __m128i b) {
  // CHECK-LABEL: test_mm_rot_epi32
  // CHECK: call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}})
  return _mm_rot_epi32(a, b);
}
TEST_CONSTEXPR(match_v4si(_mm_rot_epi32((__m128i)(__v4si){3, -2, 1, 0}, (__m128i)(__v4si){0, 1, -2, 3}), 3, -3, 1073741824, 0));

__m128i test_mm_rot_epi64(__m128i a, __m128i b) {
  // CHECK-LABEL: test_mm_rot_epi64
  // CHECK: call {{.*}}<2 x i64> @llvm.fshl.v2i64(<2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}})
  return _mm_rot_epi64(a, b);
}
TEST_CONSTEXPR(match_v2di(_mm_rot_epi64((__m128i)(__v2di){99, -55}, (__m128i)(__v2di){1, -2}), 198, 9223372036854775794LL));

__m128i test_mm_roti_epi8(__m128i a) {
  // CHECK-LABEL: test_mm_roti_epi8
  // CHECK: call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> splat (i8 1))
  return _mm_roti_epi8(a, 1);
}
TEST_CONSTEXPR(match_v16qi(_mm_roti_epi8(((__m128i)(__v16qs){0, 1, -2, 3, -4, 5, -6, 7, -8, 9, -10, 11, -12, 13, -14, 15}), 3), 0, 8, -9, 24, -25, 40, -41, 56, -57, 72, -73, 88, -89, 104, -105, 120));

__m128i test_mm_roti_epi16(__m128i a) {
  // CHECK-LABEL: test_mm_roti_epi16
  // CHECK: call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> splat (i16 50))
  return _mm_roti_epi16(a, 50);
}
TEST_CONSTEXPR(match_v8hi(_mm_roti_epi16(((__m128i)(__v8hi){2, -3, 4, -5, 6, -7, 8, -9}), 1), 4, -5, 8, -9, 12, -13, 16, -17));

__m128i test_mm_roti_epi32(__m128i a) {
  // CHECK-LABEL: test_mm_roti_epi32
  // CHECK: call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> splat (i32 226))
  return _mm_roti_epi32(a, -30);
}
TEST_CONSTEXPR(match_v4si(_mm_roti_epi32(((__m128i)(__v4si){1, -2, 3, -4}), 5), 32, -33, 96, -97));

__m128i test_mm_roti_epi64(__m128i a) {
  // CHECK-LABEL: test_mm_roti_epi64
  // CHECK: call {{.*}}<2 x i64> @llvm.fshl.v2i64(<2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> splat (i64 100))
  return _mm_roti_epi64(a, 100);
}
TEST_CONSTEXPR(match_v2di(_mm_roti_epi64(((__m128i)(__v2di){99, -55}), 19), 51904512, -28311553));

__m128i test_mm_shl_epi8(__m128i a, __m128i b) {
  // CHECK-LABEL: test_mm_shl_epi8
  // CHECK: call <16 x i8> @llvm.x86.xop.vpshlb(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
  return _mm_shl_epi8(a, b);
}

__m128i test_mm_shl_epi16(__m128i a, __m128i b) {
  // CHECK-LABEL: test_mm_shl_epi16
  // CHECK: call <8 x i16> @llvm.x86.xop.vpshlw(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
  return _mm_shl_epi16(a, b);
}

__m128i test_mm_shl_epi32(__m128i a, __m128i b) {
  // CHECK-LABEL: test_mm_shl_epi32
  // CHECK: call <4 x i32> @llvm.x86.xop.vpshld(<4 x i32> %{{.*}}, <4 x i32> %{{.*}})
  return _mm_shl_epi32(a, b);
}

__m128i test_mm_shl_epi64(__m128i a, __m128i b) {
  // CHECK-LABEL: test_mm_shl_epi64
  // CHECK: call {{.*}}<2 x i64> @llvm.x86.xop.vpshlq(<2 x i64> %{{.*}}, <2 x i64> %{{.*}})
  return _mm_shl_epi64(a, b);
}

__m128i test_mm_sha_epi8(__m128i a, __m128i b) {
  // CHECK-LABEL: test_mm_sha_epi8
  // CHECK: call <16 x i8> @llvm.x86.xop.vpshab(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
  return _mm_sha_epi8(a, b);
}

__m128i test_mm_sha_epi16(__m128i a, __m128i b) {
  // CHECK-LABEL: test_mm_sha_epi16
  // CHECK: call <8 x i16> @llvm.x86.xop.vpshaw(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
  return _mm_sha_epi16(a, b);
}

__m128i test_mm_sha_epi32(__m128i a, __m128i b) {
  // CHECK-LABEL: test_mm_sha_epi32
  // CHECK: call <4 x i32> @llvm.x86.xop.vpshad(<4 x i32> %{{.*}}, <4 x i32> %{{.*}})
  return _mm_sha_epi32(a, b);
}

__m128i test_mm_sha_epi64(__m128i a, __m128i b) {
  // CHECK-LABEL: test_mm_sha_epi64
  // CHECK: call {{.*}}<2 x i64> @llvm.x86.xop.vpshaq(<2 x i64> %{{.*}}, <2 x i64> %{{.*}})
  return _mm_sha_epi64(a, b);
}

__m128i test_mm_com_epu8(__m128i a, __m128i b) {
  // CHECK-LABEL: test_mm_com_epu8
  // CHECK: icmp ult <16 x i8> %{{.*}}, %{{.*}}
  // CHECK: sext <16 x i1> %{{.*}} to <16 x i8>
  return _mm_com_epu8(a, b, 0);
}

__m128i test_mm_com_epu16(__m128i a, __m128i b) {
  // CHECK-LABEL: test_mm_com_epu16
  // CHECK: icmp ult <8 x i16> %{{.*}}, %{{.*}}
  // CHECK: sext <8 x i1> %{{.*}} to <8 x i16>
  return _mm_com_epu16(a, b, 0);
}

__m128i test_mm_com_epu32(__m128i a, __m128i b) {
  // CHECK-LABEL: test_mm_com_epu32
  // CHECK: icmp ult <4 x i32> %{{.*}}, %{{.*}}
  // CHECK: sext <4 x i1> %{{.*}} to <4 x i32>
  return _mm_com_epu32(a, b, 0);
}

__m128i test_mm_com_epu64(__m128i a, __m128i b) {
  // CHECK-LABEL: test_mm_com_epu64
  // CHECK: icmp ult <2 x i64> %{{.*}}, %{{.*}}
  // CHECK: sext <2 x i1> %{{.*}} to <2 x i64>
  return _mm_com_epu64(a, b, 0);
}

__m128i test_mm_com_epi8(__m128i a, __m128i b) {
  // CHECK-LABEL: test_mm_com_epi8
  // CHECK: icmp slt <16 x i8> %{{.*}}, %{{.*}}
  // CHECK: sext <16 x i1> %{{.*}} to <16 x i8>
  return _mm_com_epi8(a, b, 0);
}

__m128i test_mm_com_epi16(__m128i a, __m128i b) {
  // CHECK-LABEL: test_mm_com_epi16
  // CHECK: icmp slt <8 x i16> %{{.*}}, %{{.*}}
  // CHECK: sext <8 x i1> %{{.*}} to <8 x i16>
  return _mm_com_epi16(a, b, 0);
}

__m128i test_mm_com_epi32(__m128i a, __m128i b) {
  // CHECK-LABEL: test_mm_com_epi32
  // CHECK: icmp slt <4 x i32> %{{.*}}, %{{.*}}
  // CHECK: sext <4 x i1> %{{.*}} to <4 x i32>
  return _mm_com_epi32(a, b, 0);
}

__m128i test_mm_com_epi64(__m128i a, __m128i b) {
  // CHECK-LABEL: test_mm_com_epi64
  // CHECK: icmp slt <2 x i64> %{{.*}}, %{{.*}}
  // CHECK: sext <2 x i1> %{{.*}} to <2 x i64>
  return _mm_com_epi64(a, b, 0);
}

__m128d test_mm_permute2_pd(__m128d a, __m128d b, __m128i c) {
  // CHECK-LABEL: test_mm_permute2_pd
  // CHECK: call <2 x double> @llvm.x86.xop.vpermil2pd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x i64> %{{.*}}, i8 0)
  return _mm_permute2_pd(a, b, c, 0);
}

__m256d test_mm256_permute2_pd(__m256d a, __m256d b, __m256i c) {
  // CHECK-LABEL: test_mm256_permute2_pd
  // CHECK: call <4 x double> @llvm.x86.xop.vpermil2pd.256(<4 x double> %{{.*}}, <4 x double> %{{.*}}, <4 x i64> %{{.*}}, i8 0)
  return _mm256_permute2_pd(a, b, c, 0);
}

__m128 test_mm_permute2_ps(__m128 a, __m128 b, __m128i c) {
  // CHECK-LABEL: test_mm_permute2_ps
  // CHECK: call <4 x float> @llvm.x86.xop.vpermil2ps(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x i32> %{{.*}}, i8 0)
  return _mm_permute2_ps(a, b, c, 0);
}

__m256 test_mm256_permute2_ps(__m256 a, __m256 b, __m256i c) {
  // CHECK-LABEL: test_mm256_permute2_ps
  // CHECK: call <8 x float> @llvm.x86.xop.vpermil2ps.256(<8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x i32> %{{.*}}, i8 0)
  return _mm256_permute2_ps(a, b, c, 0);
}

__m128 test_mm_frcz_ss(__m128 a) {
  // CHECK-LABEL: test_mm_frcz_ss
  // CHECK: call {{.*}}<4 x float> @llvm.x86.xop.vfrcz.ss(<4 x float> %{{.*}})
  return _mm_frcz_ss(a);
}

__m128d test_mm_frcz_sd(__m128d a) {
  // CHECK-LABEL: test_mm_frcz_sd
  // CHECK: call {{.*}}<2 x double> @llvm.x86.xop.vfrcz.sd(<2 x double> %{{.*}})
  return _mm_frcz_sd(a);
}

__m128 test_mm_frcz_ps(__m128 a) {
  // CHECK-LABEL: test_mm_frcz_ps
  // CHECK: call {{.*}}<4 x float> @llvm.x86.xop.vfrcz.ps(<4 x float> %{{.*}})
  return _mm_frcz_ps(a);
}

__m128d test_mm_frcz_pd(__m128d a) {
  // CHECK-LABEL: test_mm_frcz_pd
  // CHECK: call {{.*}}<2 x double> @llvm.x86.xop.vfrcz.pd(<2 x double> %{{.*}})
  return _mm_frcz_pd(a);
}

__m256 test_mm256_frcz_ps(__m256 a) {
  // CHECK-LABEL: test_mm256_frcz_ps
  // CHECK: call {{.*}}<8 x float> @llvm.x86.xop.vfrcz.ps.256(<8 x float> %{{.*}})
  return _mm256_frcz_ps(a);
}

__m256d test_mm256_frcz_pd(__m256d a) {
  // CHECK-LABEL: test_mm256_frcz_pd
  // CHECK: call {{.*}}<4 x double> @llvm.x86.xop.vfrcz.pd.256(<4 x double> %{{.*}})
  return _mm256_frcz_pd(a);
}
