From aa8fa6ccf020d90b28fd14d10ea88ae0785b1ee3 Mon Sep 17 00:00:00 2001 From: AmrDeveloper Date: Thu, 26 Dec 2024 00:18:15 +0100 Subject: [PATCH 1/2] [CIR][CIRGen][Builtin][Neon] Lower `vaddh_f16`, `vsubh_f16`, `vmulh_f16` and `vdivh_f16` --- clang/lib/CIR/CodeGen/CIRGenBuilder.h | 8 + .../lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp | 24 +- clang/test/CIR/CodeGen/AArch64/neon-fp16.c | 685 ++++++++++++++++++ 3 files changed, 709 insertions(+), 8 deletions(-) create mode 100644 clang/test/CIR/CodeGen/AArch64/neon-fp16.c diff --git a/clang/lib/CIR/CodeGen/CIRGenBuilder.h b/clang/lib/CIR/CodeGen/CIRGenBuilder.h index 28be733f62d7..46b001531795 100644 --- a/clang/lib/CIR/CodeGen/CIRGenBuilder.h +++ b/clang/lib/CIR/CodeGen/CIRGenBuilder.h @@ -675,6 +675,14 @@ class CIRGenBuilderTy : public cir::CIRBaseBuilderTy { assert(!cir::MissingFeatures::foldBinOpFMF()); return create(lhs.getLoc(), cir::BinOpKind::Mul, lhs, rhs); } + mlir::Value createFDiv(mlir::Value lhs, mlir::Value rhs) { + assert(!cir::MissingFeatures::metaDataNode()); + if (IsFPConstrained) + llvm_unreachable("Constrained FP NYI"); + + assert(!cir::MissingFeatures::foldBinOpFMF()); + return create(lhs.getLoc(), cir::BinOpKind::Div, lhs, rhs); + } mlir::Value createDynCast(mlir::Location loc, mlir::Value src, cir::PointerType destType, bool isRefCast, diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp index 83952d1dfae2..1957f640a62d 100644 --- a/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp @@ -3766,14 +3766,22 @@ CIRGenFunction::emitAArch64BuiltinExpr(unsigned BuiltinID, const CallExpr *E, Ops[0], cir::VectorType::get(&getMLIRContext(), DoubleTy, 2)); return builder.create(getLoc(E->getExprLoc()), Ops[0], emitScalarExpr(E->getArg(1))); - case NEON::BI__builtin_neon_vaddh_f16: - llvm_unreachable("NEON::BI__builtin_neon_vaddh_f16 NYI"); - case NEON::BI__builtin_neon_vsubh_f16: - llvm_unreachable("NEON::BI__builtin_neon_vsubh_f16 NYI"); - case NEON::BI__builtin_neon_vmulh_f16: - llvm_unreachable("NEON::BI__builtin_neon_vmulh_f16 NYI"); - case NEON::BI__builtin_neon_vdivh_f16: - llvm_unreachable("NEON::BI__builtin_neon_vdivh_f16 NYI"); + case NEON::BI__builtin_neon_vaddh_f16: { + Ops.push_back(emitScalarExpr(E->getArg(1))); + return builder.createFAdd(Ops[0], Ops[1]); + } + case NEON::BI__builtin_neon_vsubh_f16: { + Ops.push_back(emitScalarExpr(E->getArg(1))); + return builder.createFSub(Ops[0], Ops[1]); + } + case NEON::BI__builtin_neon_vmulh_f16: { + Ops.push_back(emitScalarExpr(E->getArg(1))); + return builder.createFMul(Ops[0], Ops[1]); + } + case NEON::BI__builtin_neon_vdivh_f16: { + Ops.push_back(emitScalarExpr(E->getArg(1))); + return builder.createFDiv(Ops[0], Ops[1]); + } case NEON::BI__builtin_neon_vfmah_f16: // NEON intrinsic puts accumulator first, unlike the LLVM fma. llvm_unreachable("NEON::BI__builtin_neon_vfmah_f16 NYI"); diff --git a/clang/test/CIR/CodeGen/AArch64/neon-fp16.c b/clang/test/CIR/CodeGen/AArch64/neon-fp16.c new file mode 100644 index 000000000000..cc9e2631e10c --- /dev/null +++ b/clang/test/CIR/CodeGen/AArch64/neon-fp16.c @@ -0,0 +1,685 @@ +// RUN: %clang_cc1 -triple arm64-none-linux-gnu -target-feature +fullfp16 \ +// RUN: -fclangir -disable-O0-optnone \ +// RUN: -flax-vector-conversions=none -emit-cir -o %t.cir %s +// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s + +// RUN: %clang_cc1 -triple arm64-none-linux-gnu -target-feature +fullfp16 \ +// RUN: -fclangir -disable-O0-optnone \ +// RUN: -flax-vector-conversions=none -emit-llvm -fno-clangir-call-conv-lowering -o - %s \ +// RUN: | opt -S -passes=mem2reg,simplifycfg -o %t.ll +// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s + +// REQUIRES: aarch64-registered-target || arm-registered-target + +// This test mimics clang/test/CodeGen/AArch64/v8.2a-fp16-intrinsics.c, which eventually +// CIR shall be able to support fully. Since this is going to take some time to converge, +// the unsupported/NYI code is commented out, so that we can incrementally improve this. +// The NYI filecheck used contains the LLVM output from OG codegen that should guide the +// correct result when implementing this into the CIR pipeline. + +#include + +// NYI-LABEL: test_vabsh_f16 +// NYI: [[ABS:%.*]] = call half @llvm.fabs.f16(half %a) +// NYI: ret half [[ABS]] +// float16_t test_vabsh_f16(float16_t a) { +// return vabsh_f16(a); +// } + +// NYI-LABEL: test_vceqzh_f16 +// NYI: [[TMP1:%.*]] = fcmp oeq half %a, 0xH0000 +// NYI: [[TMP2:%.*]] = sext i1 [[TMP1]] to i16 +// NYI: ret i16 [[TMP2]] +// uint16_t test_vceqzh_f16(float16_t a) { +// return vceqzh_f16(a); +// } + +// NYI-LABEL: test_vcgezh_f16 +// NYI: [[TMP1:%.*]] = fcmp oge half %a, 0xH0000 +// NYI: [[TMP2:%.*]] = sext i1 [[TMP1]] to i16 +// NYI: ret i16 [[TMP2]] +// uint16_t test_vcgezh_f16(float16_t a) { +// return vcgezh_f16(a); +// } + +// NYI-LABEL: test_vcgtzh_f16 +// NYI: [[TMP1:%.*]] = fcmp ogt half %a, 0xH0000 +// NYI: [[TMP2:%.*]] = sext i1 [[TMP1]] to i16 +// NYI: ret i16 [[TMP2]] +// uint16_t test_vcgtzh_f16(float16_t a) { +// return vcgtzh_f16(a); +// } + +// NYI-LABEL: test_vclezh_f16 +// NYI: [[TMP1:%.*]] = fcmp ole half %a, 0xH0000 +// NYI: [[TMP2:%.*]] = sext i1 [[TMP1]] to i16 +// NYI: ret i16 [[TMP2]] +// uint16_t test_vclezh_f16(float16_t a) { +// return vclezh_f16(a); +// } + +// NYI-LABEL: test_vcltzh_f16 +// NYI: [[TMP1:%.*]] = fcmp olt half %a, 0xH0000 +// NYI: [[TMP2:%.*]] = sext i1 [[TMP1]] to i16 +// NYI: ret i16 [[TMP2]] +// uint16_t test_vcltzh_f16(float16_t a) { +// return vcltzh_f16(a); +// } + +// NYI-LABEL: test_vcvth_f16_s16 +// NYI: [[VCVT:%.*]] = sitofp i16 %a to half +// NYI: ret half [[VCVT]] +// float16_t test_vcvth_f16_s16 (int16_t a) { +// return vcvth_f16_s16(a); +// } + +// NYI-LABEL: test_vcvth_f16_s32 +// NYI: [[VCVT:%.*]] = sitofp i32 %a to half +// NYI: ret half [[VCVT]] +// float16_t test_vcvth_f16_s32 (int32_t a) { +// return vcvth_f16_s32(a); +// } + +// NYI-LABEL: test_vcvth_f16_s64 +// NYI: [[VCVT:%.*]] = sitofp i64 %a to half +// NYI: ret half [[VCVT]] +// float16_t test_vcvth_f16_s64 (int64_t a) { +// return vcvth_f16_s64(a); +// } + +// NYI-LABEL: test_vcvth_f16_u16 +// NYI: [[VCVT:%.*]] = uitofp i16 %a to half +// NYI: ret half [[VCVT]] +// float16_t test_vcvth_f16_u16 (uint16_t a) { +// return vcvth_f16_u16(a); +// } + +// NYI-LABEL: test_vcvth_f16_u32 +// NYI: [[VCVT:%.*]] = uitofp i32 %a to half +// NYI: ret half [[VCVT]] +// float16_t test_vcvth_f16_u32 (uint32_t a) { +// return vcvth_f16_u32(a); +// } + +// NYI-LABEL: test_vcvth_f16_u64 +// NYI: [[VCVT:%.*]] = uitofp i64 %a to half +// NYI: ret half [[VCVT]] +// float16_t test_vcvth_f16_u64 (uint64_t a) { +// return vcvth_f16_u64(a); +// } + +// NYI-LABEL: test_vcvth_s16_f16 +// NYI: [[VCVT:%.*]] = call i32 @llvm.aarch64.neon.fcvtzs.i32.f16(half %a) +// NYI: [[TRUNC:%.*]] = trunc i32 [[VCVT]] to i16 +// NYI: ret i16 [[TRUNC]] +// int16_t test_vcvth_s16_f16 (float16_t a) { +// return vcvth_s16_f16(a); +// } + +// NYI-LABEL: test_vcvth_s32_f16 +// NYI: [[VCVT:%.*]] = call i32 @llvm.aarch64.neon.fcvtzs.i32.f16(half %a) +// NYI: ret i32 [[VCVT]] +// int32_t test_vcvth_s32_f16 (float16_t a) { +// return vcvth_s32_f16(a); +// } + +// NYI-LABEL: test_vcvth_s64_f16 +// NYI: [[VCVT:%.*]] = call i64 @llvm.aarch64.neon.fcvtzs.i64.f16(half %a) +// NYI: ret i64 [[VCVT]] +// int64_t test_vcvth_s64_f16 (float16_t a) { +// return vcvth_s64_f16(a); +// } + +// NYI-LABEL: test_vcvth_u16_f16 +// NYI: [[VCVT:%.*]] = call i32 @llvm.aarch64.neon.fcvtzu.i32.f16(half %a) +// NYI: [[TRUNC:%.*]] = trunc i32 [[VCVT]] to i16 +// NYI: ret i16 [[TRUNC]] +// uint16_t test_vcvth_u16_f16 (float16_t a) { +// return vcvth_u16_f16(a); +// } + +// NYI-LABEL: test_vcvth_u32_f16 +// NYI: [[VCVT:%.*]] = call i32 @llvm.aarch64.neon.fcvtzu.i32.f16(half %a) +// NYI: ret i32 [[VCVT]] +// uint32_t test_vcvth_u32_f16 (float16_t a) { +// return vcvth_u32_f16(a); +// } + +// NYI-LABEL: test_vcvth_u64_f16 +// NYI: [[VCVT:%.*]] = call i64 @llvm.aarch64.neon.fcvtzu.i64.f16(half %a) +// NYI: ret i64 [[VCVT]] +// uint64_t test_vcvth_u64_f16 (float16_t a) { +// return vcvth_u64_f16(a); +// } + +// NYI-LABEL: test_vcvtah_s16_f16 +// NYI: [[FCVT:%.*]] = call i32 @llvm.aarch64.neon.fcvtas.i32.f16(half %a) +// NYI: [[RET:%.*]] = trunc i32 [[FCVT]] to i16 +// NYI: ret i16 [[RET]] +// int16_t test_vcvtah_s16_f16 (float16_t a) { +// return vcvtah_s16_f16(a); +// } + +// NYI-LABEL: test_vcvtah_s32_f16 +// NYI: [[VCVT:%.*]] = call i32 @llvm.aarch64.neon.fcvtas.i32.f16(half %a) +// NYI: ret i32 [[VCVT]] +// int32_t test_vcvtah_s32_f16 (float16_t a) { +// return vcvtah_s32_f16(a); +// } + +// NYI-LABEL: test_vcvtah_s64_f16 +// NYI: [[VCVT:%.*]] = call i64 @llvm.aarch64.neon.fcvtas.i64.f16(half %a) +// NYI: ret i64 [[VCVT]] +// int64_t test_vcvtah_s64_f16 (float16_t a) { +// return vcvtah_s64_f16(a); +// } + +// NYI-LABEL: test_vcvtah_u16_f16 +// NYI: [[FCVT:%.*]] = call i32 @llvm.aarch64.neon.fcvtau.i32.f16(half %a) +// NYI: [[RET:%.*]] = trunc i32 [[FCVT]] to i16 +// NYI: ret i16 [[RET]] +// uint16_t test_vcvtah_u16_f16 (float16_t a) { +// return vcvtah_u16_f16(a); +// } + +// NYI-LABEL: test_vcvtah_u32_f16 +// NYI: [[VCVT:%.*]] = call i32 @llvm.aarch64.neon.fcvtau.i32.f16(half %a) +// NYI: ret i32 [[VCVT]] +// uint32_t test_vcvtah_u32_f16 (float16_t a) { +// return vcvtah_u32_f16(a); +// } + +// NYI-LABEL: test_vcvtah_u64_f16 +// NYI: [[VCVT:%.*]] = call i64 @llvm.aarch64.neon.fcvtau.i64.f16(half %a) +// NYI: ret i64 [[VCVT]] +// uint64_t test_vcvtah_u64_f16 (float16_t a) { +// return vcvtah_u64_f16(a); +// } + +// NYI-LABEL: test_vcvtmh_s16_f16 +// NYI: [[FCVT:%.*]] = call i32 @llvm.aarch64.neon.fcvtms.i32.f16(half %a) +// NYI: [[RET:%.*]] = trunc i32 [[FCVT]] to i16 +// NYI: ret i16 [[RET]] +// int16_t test_vcvtmh_s16_f16 (float16_t a) { +// return vcvtmh_s16_f16(a); +// } + +// NYI-LABEL: test_vcvtmh_s32_f16 +// NYI: [[VCVT:%.*]] = call i32 @llvm.aarch64.neon.fcvtms.i32.f16(half %a) +// NYI: ret i32 [[VCVT]] +// int32_t test_vcvtmh_s32_f16 (float16_t a) { +// return vcvtmh_s32_f16(a); +// } + +// NYI-LABEL: test_vcvtmh_s64_f16 +// NYI: [[VCVT:%.*]] = call i64 @llvm.aarch64.neon.fcvtms.i64.f16(half %a) +// NYI: ret i64 [[VCVT]] +// int64_t test_vcvtmh_s64_f16 (float16_t a) { +// return vcvtmh_s64_f16(a); +// } + +// NYI-LABEL: test_vcvtmh_u16_f16 +// NYI: [[FCVT:%.*]] = call i32 @llvm.aarch64.neon.fcvtmu.i32.f16(half %a) +// NYI: [[RET:%.*]] = trunc i32 [[FCVT]] to i16 +// NYI: ret i16 [[RET]] +// uint16_t test_vcvtmh_u16_f16 (float16_t a) { +// return vcvtmh_u16_f16(a); +// } + +// NYI-LABEL: test_vcvtmh_u32_f16 +// NYI: [[VCVT:%.*]] = call i32 @llvm.aarch64.neon.fcvtmu.i32.f16(half %a) +// NYI: ret i32 [[VCVT]] +// uint32_t test_vcvtmh_u32_f16 (float16_t a) { +// return vcvtmh_u32_f16(a); +// } + +// NYI-LABEL: test_vcvtmh_u64_f16 +// NYI: [[VCVT:%.*]] = call i64 @llvm.aarch64.neon.fcvtmu.i64.f16(half %a) +// NYI: ret i64 [[VCVT]] +// uint64_t test_vcvtmh_u64_f16 (float16_t a) { +// return vcvtmh_u64_f16(a); +// } + +// NYI-LABEL: test_vcvtnh_s16_f16 +// NYI: [[FCVT:%.*]] = call i32 @llvm.aarch64.neon.fcvtns.i32.f16(half %a) +// NYI: [[RET:%.*]] = trunc i32 [[FCVT]] to i16 +// NYI: ret i16 [[RET]] +// int16_t test_vcvtnh_s16_f16 (float16_t a) { +// return vcvtnh_s16_f16(a); +// } + +// NYI-LABEL: test_vcvtnh_s32_f16 +// NYI: [[VCVT:%.*]] = call i32 @llvm.aarch64.neon.fcvtns.i32.f16(half %a) +// NYI: ret i32 [[VCVT]] +// int32_t test_vcvtnh_s32_f16 (float16_t a) { +// return vcvtnh_s32_f16(a); +// } + +// NYI-LABEL: test_vcvtnh_s64_f16 +// NYI: [[VCVT:%.*]] = call i64 @llvm.aarch64.neon.fcvtns.i64.f16(half %a) +// NYI: ret i64 [[VCVT]] +// int64_t test_vcvtnh_s64_f16 (float16_t a) { +// return vcvtnh_s64_f16(a); +// } + +// NYI-LABEL: test_vcvtnh_u16_f16 +// NYI: [[FCVT:%.*]] = call i32 @llvm.aarch64.neon.fcvtnu.i32.f16(half %a) +// NYI: [[RET:%.*]] = trunc i32 [[FCVT]] to i16 +// NYI: ret i16 [[RET]] +// uint16_t test_vcvtnh_u16_f16 (float16_t a) { +// return vcvtnh_u16_f16(a); +// } + +// NYI-LABEL: test_vcvtnh_u32_f16 +// NYI: [[VCVT:%.*]] = call i32 @llvm.aarch64.neon.fcvtnu.i32.f16(half %a) +// NYI: ret i32 [[VCVT]] +// uint32_t test_vcvtnh_u32_f16 (float16_t a) { +// return vcvtnh_u32_f16(a); +// } + +// NYI-LABEL: test_vcvtnh_u64_f16 +// NYI: [[VCVT:%.*]] = call i64 @llvm.aarch64.neon.fcvtnu.i64.f16(half %a) +// NYI: ret i64 [[VCVT]] +// uint64_t test_vcvtnh_u64_f16 (float16_t a) { +// return vcvtnh_u64_f16(a); +// } + +// NYI-LABEL: test_vcvtph_s16_f16 +// NYI: [[FCVT:%.*]] = call i32 @llvm.aarch64.neon.fcvtps.i32.f16(half %a) +// NYI: [[RET:%.*]] = trunc i32 [[FCVT]] to i16 +// NYI: ret i16 [[RET]] +// int16_t test_vcvtph_s16_f16 (float16_t a) { +// return vcvtph_s16_f16(a); +// } + +// NYI-LABEL: test_vcvtph_s32_f16 +// NYI: [[VCVT:%.*]] = call i32 @llvm.aarch64.neon.fcvtps.i32.f16(half %a) +// NYI: ret i32 [[VCVT]] +// int32_t test_vcvtph_s32_f16 (float16_t a) { +// return vcvtph_s32_f16(a); +// } + +// NYI-LABEL: test_vcvtph_s64_f16 +// NYI: [[VCVT:%.*]] = call i64 @llvm.aarch64.neon.fcvtps.i64.f16(half %a) +// NYI: ret i64 [[VCVT]] +// int64_t test_vcvtph_s64_f16 (float16_t a) { +// return vcvtph_s64_f16(a); +// } + +// NYI-LABEL: test_vcvtph_u16_f16 +// NYI: [[FCVT:%.*]] = call i32 @llvm.aarch64.neon.fcvtpu.i32.f16(half %a) +// NYI: [[RET:%.*]] = trunc i32 [[FCVT]] to i16 +// NYI: ret i16 [[RET]] +// uint16_t test_vcvtph_u16_f16 (float16_t a) { +// return vcvtph_u16_f16(a); +// } + +// NYI-LABEL: test_vcvtph_u32_f16 +// NYI: [[VCVT:%.*]] = call i32 @llvm.aarch64.neon.fcvtpu.i32.f16(half %a) +// NYI: ret i32 [[VCVT]] +// uint32_t test_vcvtph_u32_f16 (float16_t a) { +// return vcvtph_u32_f16(a); +// } + +// NYI-LABEL: test_vcvtph_u64_f16 +// NYI: [[VCVT:%.*]] = call i64 @llvm.aarch64.neon.fcvtpu.i64.f16(half %a) +// NYI: ret i64 [[VCVT]] +// uint64_t test_vcvtph_u64_f16 (float16_t a) { +// return vcvtph_u64_f16(a); +// } + +// NYI-LABEL: test_vnegh_f16 +// NYI: [[NEG:%.*]] = fneg half %a +// NYI: ret half [[NEG]] +// float16_t test_vnegh_f16(float16_t a) { +// return vnegh_f16(a); +// } + +// NYI-LABEL: test_vrecpeh_f16 +// NYI: [[VREC:%.*]] = call half @llvm.aarch64.neon.frecpe.f16(half %a) +// NYI: ret half [[VREC]] +// float16_t test_vrecpeh_f16(float16_t a) { +// return vrecpeh_f16(a); +// } + +// NYI-LABEL: test_vrecpxh_f16 +// NYI: [[VREC:%.*]] = call half @llvm.aarch64.neon.frecpx.f16(half %a) +// NYI: ret half [[VREC]] +// float16_t test_vrecpxh_f16(float16_t a) { +// return vrecpxh_f16(a); +// } + +// NYI-LABEL: test_vrndh_f16 +// NYI: [[RND:%.*]] = call half @llvm.trunc.f16(half %a) +// NYI: ret half [[RND]] +// float16_t test_vrndh_f16(float16_t a) { +// return vrndh_f16(a); +// } + +// NYI-LABEL: test_vrndah_f16 +// NYI: [[RND:%.*]] = call half @llvm.round.f16(half %a) +// NYI: ret half [[RND]] +// float16_t test_vrndah_f16(float16_t a) { +// return vrndah_f16(a); +// } + +// NYI-LABEL: test_vrndih_f16 +// NYI: [[RND:%.*]] = call half @llvm.nearbyint.f16(half %a) +// NYI: ret half [[RND]] +// float16_t test_vrndih_f16(float16_t a) { +// return vrndih_f16(a); +// } + +// NYI-LABEL: test_vrndmh_f16 +// NYI: [[RND:%.*]] = call half @llvm.floor.f16(half %a) +// NYI: ret half [[RND]] +// float16_t test_vrndmh_f16(float16_t a) { +// return vrndmh_f16(a); +// } + +// NYI-LABEL: test_vrndnh_f16 +// NYI: [[RND:%.*]] = call half @llvm.roundeven.f16(half %a) +// NYI: ret half [[RND]] +// float16_t test_vrndnh_f16(float16_t a) { +// return vrndnh_f16(a); +// } + +// NYI-LABEL: test_vrndph_f16 +// NYI: [[RND:%.*]] = call half @llvm.ceil.f16(half %a) +// NYI: ret half [[RND]] +// float16_t test_vrndph_f16(float16_t a) { +// return vrndph_f16(a); +// } + +// NYI-LABEL: test_vrndxh_f16 +// NYI: [[RND:%.*]] = call half @llvm.rint.f16(half %a) +// NYI: ret half [[RND]] +// float16_t test_vrndxh_f16(float16_t a) { +// return vrndxh_f16(a); +// } + +// NYI-LABEL: test_vrsqrteh_f16 +// NYI: [[RND:%.*]] = call half @llvm.aarch64.neon.frsqrte.f16(half %a) +// NYI: ret half [[RND]] +// float16_t test_vrsqrteh_f16(float16_t a) { +// return vrsqrteh_f16(a); +// } + +// NYI-LABEL: test_vsqrth_f16 +// NYI: [[SQR:%.*]] = call half @llvm.sqrt.f16(half %a) +// NYI: ret half [[SQR]] +// float16_t test_vsqrth_f16(float16_t a) { +// return vsqrth_f16(a); +// } + +// CIR-LABEL: vaddh_f16 +// CIR: [[v3:%.*]] = cir.binop(add, [[v1:%.*]], [[v2:%.*]]) : !cir.f16 +// +// LLVM-LABEL: test_vaddh_f16 +// LLVM: [[ADD:%.*]] = fadd half [[A:%.*]], [[B:%.*]] +// LLVM: ret half [[ADD]] +float16_t test_vaddh_f16(float16_t a, float16_t b) { + return vaddh_f16(a, b); +} + +// NYI-LABEL: test_vabdh_f16 +// NYI: [[ABD:%.*]] = call half @llvm.aarch64.sisd.fabd.f16(half %a, half %b) +// NYI: ret half [[ABD]] +// float16_t test_vabdh_f16(float16_t a, float16_t b) { +// return vabdh_f16(a, b); +// } + +// NYI-LABEL: test_vcageh_f16 +// NYI: [[FACG:%.*]] = call i32 @llvm.aarch64.neon.facge.i32.f16(half %a, half %b) +// NYI: [[RET:%.*]] = trunc i32 [[FACG]] to i16 +// NYI: ret i16 [[RET]] +// uint16_t test_vcageh_f16(float16_t a, float16_t b) { +// return vcageh_f16(a, b); +// } + +// NYI-LABEL: test_vcagth_f16 +// NYI: [[FACG:%.*]] = call i32 @llvm.aarch64.neon.facgt.i32.f16(half %a, half %b) +// NYI: [[RET:%.*]] = trunc i32 [[FACG]] to i16 +// NYI: ret i16 [[RET]] +// uint16_t test_vcagth_f16(float16_t a, float16_t b) { +// return vcagth_f16(a, b); +// } + +// NYI-LABEL: test_vcaleh_f16 +// NYI: [[FACG:%.*]] = call i32 @llvm.aarch64.neon.facge.i32.f16(half %b, half %a) +// NYI: [[RET:%.*]] = trunc i32 [[FACG]] to i16 +// NYI: ret i16 [[RET]] +// uint16_t test_vcaleh_f16(float16_t a, float16_t b) { +// return vcaleh_f16(a, b); +// } + +// NYI-LABEL: test_vcalth_f16 +// NYI: [[FACG:%.*]] = call i32 @llvm.aarch64.neon.facgt.i32.f16(half %b, half %a) +// NYI: [[RET:%.*]] = trunc i32 [[FACG]] to i16 +// NYI: ret i16 [[RET]] +// uint16_t test_vcalth_f16(float16_t a, float16_t b) { +// return vcalth_f16(a, b); +// } + +// NYI-LABEL: test_vceqh_f16 +// NYI: [[TMP1:%.*]] = fcmp oeq half %a, %b +// NYI: [[TMP2:%.*]] = sext i1 [[TMP1]] to i16 +// NYI: ret i16 [[TMP2]] +// uint16_t test_vceqh_f16(float16_t a, float16_t b) { +// return vceqh_f16(a, b); +// } + +// NYI-LABEL: test_vcgeh_f16 +// NYI: [[TMP1:%.*]] = fcmp oge half %a, %b +// NYI: [[TMP2:%.*]] = sext i1 [[TMP1]] to i16 +// NYI: ret i16 [[TMP2]] +// uint16_t test_vcgeh_f16(float16_t a, float16_t b) { +// return vcgeh_f16(a, b); +// } + +// NYI-LABEL: test_vcgth_f16 +//NYI: [[TMP1:%.*]] = fcmp ogt half %a, %b +// NYI: [[TMP2:%.*]] = sext i1 [[TMP1]] to i16 +// NYI: ret i16 [[TMP2]] +// uint16_t test_vcgth_f16(float16_t a, float16_t b) { +// return vcgth_f16(a, b); +// } + +// NYI-LABEL: test_vcleh_f16 +// NYI: [[TMP1:%.*]] = fcmp ole half %a, %b +// NYI: [[TMP2:%.*]] = sext i1 [[TMP1]] to i16 +// NYI: ret i16 [[TMP2]] +// uint16_t test_vcleh_f16(float16_t a, float16_t b) { +// return vcleh_f16(a, b); +// } + +// NYI-LABEL: test_vclth_f16 +// NYI: [[TMP1:%.*]] = fcmp olt half %a, %b +// NYI: [[TMP2:%.*]] = sext i1 [[TMP1]] to i16 +// NYI: ret i16 [[TMP2]] +// uint16_t test_vclth_f16(float16_t a, float16_t b) { +// return vclth_f16(a, b); +// } + +// NYI-LABEL: test_vcvth_n_f16_s16 +// NYI: [[SEXT:%.*]] = sext i16 %a to i32 +// NYI: [[CVT:%.*]] = call half @llvm.aarch64.neon.vcvtfxs2fp.f16.i32(i32 [[SEXT]], i32 1) +// NYI: ret half [[CVT]] +// float16_t test_vcvth_n_f16_s16(int16_t a) { +// return vcvth_n_f16_s16(a, 1); +// } + +// NYI-LABEL: test_vcvth_n_f16_s32 +// NYI: [[CVT:%.*]] = call half @llvm.aarch64.neon.vcvtfxs2fp.f16.i32(i32 %a, i32 1) +// NYI: ret half [[CVT]] +// float16_t test_vcvth_n_f16_s32(int32_t a) { +// return vcvth_n_f16_s32(a, 1); +// } + +// NYI-LABEL: test_vcvth_n_f16_s64 +// NYI: [[CVT:%.*]] = call half @llvm.aarch64.neon.vcvtfxs2fp.f16.i64(i64 %a, i32 1) +// NYI: ret half [[CVT]] +// float16_t test_vcvth_n_f16_s64(int64_t a) { +// return vcvth_n_f16_s64(a, 1); +// } + +// NYI-LABEL: test_vcvth_n_s16_f16 +// NYI: [[CVT:%.*]] = call i32 @llvm.aarch64.neon.vcvtfp2fxs.i32.f16(half %a, i32 1) +// NYI: [[RET:%.*]] = trunc i32 [[CVT]] to i16 +// NYI: ret i16 [[RET]] +// int16_t test_vcvth_n_s16_f16(float16_t a) { +// return vcvth_n_s16_f16(a, 1); +// } + +// NYI-LABEL: test_vcvth_n_s32_f16 +// NYI: [[CVT:%.*]] = call i32 @llvm.aarch64.neon.vcvtfp2fxs.i32.f16(half %a, i32 1) +// NYI: ret i32 [[CVT]] +// int32_t test_vcvth_n_s32_f16(float16_t a) { +// return vcvth_n_s32_f16(a, 1); +// } + +// NYI-LABEL: test_vcvth_n_s64_f16 +// NYI: [[CVT:%.*]] = call i64 @llvm.aarch64.neon.vcvtfp2fxs.i64.f16(half %a, i32 1) +// NYI: ret i64 [[CVT]] +// int64_t test_vcvth_n_s64_f16(float16_t a) { +// return vcvth_n_s64_f16(a, 1); +// } + +// NYI-LABEL: test_vcvth_n_f16_u16 +// NYI: [[SEXT:%.*]] = zext i16 %a to i32 +// NYI: [[CVT:%.*]] = call half @llvm.aarch64.neon.vcvtfxu2fp.f16.i32(i32 [[SEXT]], i32 1) +// NYI: ret half [[CVT]] +// float16_t test_vcvth_n_f16_u16(int16_t a) { +// return vcvth_n_f16_u16(a, 1); +// } + +// NYI-LABEL: test_vcvth_n_f16_u32 +// NYI: [[CVT:%.*]] = call half @llvm.aarch64.neon.vcvtfxu2fp.f16.i32(i32 %a, i32 1) +// NYI: ret half [[CVT]] +// float16_t test_vcvth_n_f16_u32(int32_t a) { +// return vcvth_n_f16_u32(a, 1); +// } + +// NYI-LABEL: test_vcvth_n_f16_u64 +// NYI: [[CVT:%.*]] = call half @llvm.aarch64.neon.vcvtfxu2fp.f16.i64(i64 %a, i32 1) +// NYI: ret half [[CVT]] +// float16_t test_vcvth_n_f16_u64(int64_t a) { +// return vcvth_n_f16_u64(a, 1); +// } + +// NYI-LABEL: test_vcvth_n_u16_f16 +// NYI: [[CVT:%.*]] = call i32 @llvm.aarch64.neon.vcvtfp2fxu.i32.f16(half %a, i32 1) +// NYI: [[RET:%.*]] = trunc i32 [[CVT]] to i16 +// NYI: ret i16 [[RET]] +// int16_t test_vcvth_n_u16_f16(float16_t a) { +// return vcvth_n_u16_f16(a, 1); +// } + +// NYI-LABEL: test_vcvth_n_u32_f16 +// NYI: [[CVT:%.*]] = call i32 @llvm.aarch64.neon.vcvtfp2fxu.i32.f16(half %a, i32 1) +// NYI: ret i32 [[CVT]] +// int32_t test_vcvth_n_u32_f16(float16_t a) { +// return vcvth_n_u32_f16(a, 1); +// } + +// NYI-LABEL: test_vcvth_n_u64_f16 +// NYI: [[CVT:%.*]] = call i64 @llvm.aarch64.neon.vcvtfp2fxu.i64.f16(half %a, i32 1) +// NYI: ret i64 [[CVT]] +// int64_t test_vcvth_n_u64_f16(float16_t a) { +// return vcvth_n_u64_f16(a, 1); +// } + +// CIR-LABEL: vdivh_f16 +// CIR: [[v3:%.*]] = cir.binop(div, [[v1:%.*]], [[v2:%.*]]) : !cir.f16 +// +// LLVM-LABEL: test_vdivh_f16 +// LLVM: [[DIV:%.*]] = fdiv half [[A:%.*]], [[B:%.*]] +// LLVM: ret half [[DIV]] +float16_t test_vdivh_f16(float16_t a, float16_t b) { + return vdivh_f16(a, b); +} + +// NYI-LABEL: test_vmaxh_f16 +// NYI: [[MAX:%.*]] = call half @llvm.aarch64.neon.fmax.f16(half %a, half %b) +// NYI: ret half [[MAX]] +// float16_t test_vmaxh_f16(float16_t a, float16_t b) { +// return vmaxh_f16(a, b); +// } + +// NYI-LABEL: test_vmaxnmh_f16 +// NYI: [[MAX:%.*]] = call half @llvm.aarch64.neon.fmaxnm.f16(half %a, half %b) +// NYI: ret half [[MAX]] +// float16_t test_vmaxnmh_f16(float16_t a, float16_t b) { +// return vmaxnmh_f16(a, b); +// } + +// NYI-LABEL: test_vminh_f16 +// NYI: [[MIN:%.*]] = call half @llvm.aarch64.neon.fmin.f16(half %a, half %b) +// NYI: ret half [[MIN]] +// float16_t test_vminh_f16(float16_t a, float16_t b) { +// return vminh_f16(a, b); +// } + +// NYI-LABEL: test_vminnmh_f16 +// NYI: [[MIN:%.*]] = call half @llvm.aarch64.neon.fminnm.f16(half %a, half %b) +// NYI: ret half [[MIN]] +// float16_t test_vminnmh_f16(float16_t a, float16_t b) { +// return vminnmh_f16(a, b); +// } + +// CIR-LABEL: vmulh_f16 +// CIR: [[v3:%.*]] = cir.binop(mul, [[v1:%.*]], [[v2:%.*]]) : !cir.f16 +// +// LLVM-LABEL: test_vmulh_f16 +// LLVM: [[MUL:%.*]] = fmul half [[A:%.*]], [[B:%.*]] +// LLVM: ret half [[MUL]] +float16_t test_vmulh_f16(float16_t a, float16_t b) { + return vmulh_f16(a, b); +} + +// NYI-LABEL: test_vmulxh_f16 +// NYI: [[MUL:%.*]] = call half @llvm.aarch64.neon.fmulx.f16(half %a, half %b) +// NYI: ret half [[MUL]] +// float16_t test_vmulxh_f16(float16_t a, float16_t b) { +// return vmulxh_f16(a, b); +// } + +// NYI-LABEL: test_vrecpsh_f16 +// NYI: [[RECPS:%.*]] = call half @llvm.aarch64.neon.frecps.f16(half %a, half %b) +// NYI: ret half [[RECPS]] +// float16_t test_vrecpsh_f16(float16_t a, float16_t b) { +// return vrecpsh_f16(a, b); +// } + +// NYI-LABEL: test_vrsqrtsh_f16 +// NYI: [[RSQRTS:%.*]] = call half @llvm.aarch64.neon.frsqrts.f16(half %a, half %b) +// NYI: ret half [[RSQRTS]] +// float16_t test_vrsqrtsh_f16(float16_t a, float16_t b) { +// return vrsqrtsh_f16(a, b); +// } + +// CIR-LABEL: vsubh_f16 +// CIR: [[v3:%.*]] = cir.binop(sub, [[v1:%.*]], [[v2:%.*]]) : !cir.f16 +// +// LLVM-LABEL: test_vsubh_f16 +// LLVM: [[SUB:%.*]] = fsub half [[A:%.*]], [[B:%.*]] +// LLVM: ret half [[SUB]] +float16_t test_vsubh_f16(float16_t a, float16_t b) { + return vsubh_f16(a, b); +} + +// NYI-LABEL: test_vfmah_f16 +// NYI: [[FMA:%.*]] = call half @llvm.fma.f16(half %b, half %c, half %a) +// NYI: ret half [[FMA]] +// float16_t test_vfmah_f16(float16_t a, float16_t b, float16_t c) { +// return vfmah_f16(a, b, c); +// } + +// NYI-LABEL: test_vfmsh_f16 +// NYI: [[SUB:%.*]] = fneg half %b +// NYI: [[ADD:%.*]] = call half @llvm.fma.f16(half [[SUB]], half %c, half %a) +// NYI: ret half [[ADD]] +// float16_t test_vfmsh_f16(float16_t a, float16_t b, float16_t c) { +// return vfmsh_f16(a, b, c); +// } + From 9e59e77d8cbdb63f1667221ecf6e63c0ac72b6ee Mon Sep 17 00:00:00 2001 From: AmrDeveloper Date: Thu, 2 Jan 2025 18:17:35 +0100 Subject: [PATCH 2/2] Improve testing --- clang/test/CIR/CodeGen/AArch64/neon-fp16.c | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/clang/test/CIR/CodeGen/AArch64/neon-fp16.c b/clang/test/CIR/CodeGen/AArch64/neon-fp16.c index cc9e2631e10c..3d3d4c439f43 100644 --- a/clang/test/CIR/CodeGen/AArch64/neon-fp16.c +++ b/clang/test/CIR/CodeGen/AArch64/neon-fp16.c @@ -413,10 +413,11 @@ // } // CIR-LABEL: vaddh_f16 -// CIR: [[v3:%.*]] = cir.binop(add, [[v1:%.*]], [[v2:%.*]]) : !cir.f16 +// CIR: {{%.*}} = cir.binop(add, {{%.*}}, {{%.*}}) : !cir.f16 // -// LLVM-LABEL: test_vaddh_f16 -// LLVM: [[ADD:%.*]] = fadd half [[A:%.*]], [[B:%.*]] +// LLVM-LABEL: test_vaddh_f16 +// LLVM-SAME: (half [[a:%.]], half [[b:%.]]) +// LLVM: [[ADD:%.*]] = fadd half [[a]], [[b]] // LLVM: ret half [[ADD]] float16_t test_vaddh_f16(float16_t a, float16_t b) { return vaddh_f16(a, b); @@ -590,10 +591,11 @@ float16_t test_vaddh_f16(float16_t a, float16_t b) { // } // CIR-LABEL: vdivh_f16 -// CIR: [[v3:%.*]] = cir.binop(div, [[v1:%.*]], [[v2:%.*]]) : !cir.f16 +// CIR: {{%.*}} = cir.binop(div, {{%.*}}, {{%.*}}) : !cir.f16 // // LLVM-LABEL: test_vdivh_f16 -// LLVM: [[DIV:%.*]] = fdiv half [[A:%.*]], [[B:%.*]] +// LLVM-SAME: (half [[a:%.]], half [[b:%.]]) +// LLVM: [[DIV:%.*]] = fdiv half [[a]], [[b]] // LLVM: ret half [[DIV]] float16_t test_vdivh_f16(float16_t a, float16_t b) { return vdivh_f16(a, b); @@ -628,10 +630,11 @@ float16_t test_vdivh_f16(float16_t a, float16_t b) { // } // CIR-LABEL: vmulh_f16 -// CIR: [[v3:%.*]] = cir.binop(mul, [[v1:%.*]], [[v2:%.*]]) : !cir.f16 +// CIR: {{%.*}} = cir.binop(mul, {{%.*}}, {{%.*}}) : !cir.f16 // // LLVM-LABEL: test_vmulh_f16 -// LLVM: [[MUL:%.*]] = fmul half [[A:%.*]], [[B:%.*]] +// LLVM-SAME: (half [[a:%.]], half [[b:%.]]) +// LLVM: [[MUL:%.*]] = fmul half [[a]], [[b]] // LLVM: ret half [[MUL]] float16_t test_vmulh_f16(float16_t a, float16_t b) { return vmulh_f16(a, b); @@ -659,10 +662,11 @@ float16_t test_vmulh_f16(float16_t a, float16_t b) { // } // CIR-LABEL: vsubh_f16 -// CIR: [[v3:%.*]] = cir.binop(sub, [[v1:%.*]], [[v2:%.*]]) : !cir.f16 +// CIR: {{%.*}} = cir.binop(sub, {{%.*}}, {{%.*}}) : !cir.f16 // // LLVM-LABEL: test_vsubh_f16 -// LLVM: [[SUB:%.*]] = fsub half [[A:%.*]], [[B:%.*]] +// LLVM-SAME: (half [[a:%.]], half [[b:%.]]) +// LLVM: [[SUB:%.*]] = fsub half [[a]], [[b]] // LLVM: ret half [[SUB]] float16_t test_vsubh_f16(float16_t a, float16_t b) { return vsubh_f16(a, b);