Skip to content

Commit

Permalink
[SYCLomatic][PTX] Refine migration of PTX asm instruction "lop3.b32" (#…
Browse files Browse the repository at this point in the history
…2592)


Signed-off-by: chenwei.sun <chenwei.sun@intel.com>
  • Loading branch information
tomflinda authored Jan 9, 2025
1 parent 7e8a364 commit 821800f
Show file tree
Hide file tree
Showing 3 changed files with 140 additions and 23 deletions.
50 changes: 27 additions & 23 deletions clang/lib/DPCT/RulesAsm/AsmMigration.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -943,24 +943,27 @@ class SYCLGen : public SYCLGenBase {
return SYCLGenError();
OS() << " = ";

std::string Op[3];
for (auto Idx : llvm::seq(0, 3)) {
std::string Op[4];
for (auto Idx : llvm::seq(0, 4)) {
if (tryEmitStmt(Op[Idx], I->getInputOperand(Idx)))
return SYCLGenError();
}

if (!isa<InlineAsmIntegerLiteral>(I->getInputOperand(3)))
return SYCLGenError();
unsigned Imm = dyn_cast<InlineAsmIntegerLiteral>(I->getInputOperand(3))
->getValue()
.getZExtValue();
if (!isa<InlineAsmIntegerLiteral>(I->getInputOperand(3))) {
OS() << MapNames::getDpctNamespace() << "ternary_logic_op(" << Op[0]
<< ", " << Op[1] << ", " << Op[2] << ", " << Op[3] << ")";

} else {
unsigned Imm = dyn_cast<InlineAsmIntegerLiteral>(I->getInputOperand(3))
->getValue()
.getZExtValue();

#define EMPTY nullptr
#define EMPTY4 EMPTY, EMPTY, EMPTY, EMPTY
#define EMPTY16 EMPTY4, EMPTY4, EMPTY4, EMPTY4
constexpr const char *FastMap[256] = {
/*0x00*/ "0",
// clang-format off
constexpr const char *FastMap[256] = {
/*0x00*/ "0",
// clang-format off
EMPTY16, EMPTY4, EMPTY4, EMPTY,
/*0x1a*/ "({0} & {1} | {2}) ^ {0}",
EMPTY, EMPTY, EMPTY,
Expand Down Expand Up @@ -988,12 +991,12 @@ class SYCLGen : public SYCLGenBase {
EMPTY16, EMPTY, EMPTY, EMPTY,
/*0xfe*/ "{0} | {1} | {2}",
/*0xff*/ "uint32_t(-1)"};
// clang-format on
// clang-format on

#undef EMPTY16
#undef EMPTY4
#undef EMPTY
// clang-format off
// clang-format off
constexpr const char *SlowMap[8] = {
/* 0x01*/ "(~{0} & ~{1} & ~{2})",
/* 0x02*/ "(~{0} & ~{1} & {2})",
Expand All @@ -1004,20 +1007,21 @@ class SYCLGen : public SYCLGenBase {
/* 0x40*/ "({0} & {1} & ~{2})",
/* 0x80*/ "({0} & {1} & {2})"
};
// clang-format on
// clang-format on

if (FastMap[Imm]) {
OS() << llvm::formatv(FastMap[Imm], Op[0], Op[1], Op[2]);
} else {
SmallVector<std::string, 8> Templates;
for (auto Bit : llvm::seq(0, 8)) {
if (Imm & (1U << Bit)) {
Templates.push_back(
llvm::formatv(SlowMap[Bit], Op[0], Op[1], Op[2]).str());
if (FastMap[Imm]) {
OS() << llvm::formatv(FastMap[Imm], Op[0], Op[1], Op[2]);
} else {
SmallVector<std::string, 8> Templates;
for (auto Bit : llvm::seq(0, 8)) {
if (Imm & (1U << Bit)) {
Templates.push_back(
llvm::formatv(SlowMap[Bit], Op[0], Op[1], Op[2]).str());
}
}
}

OS() << llvm::join(Templates, " | ");
OS() << llvm::join(Templates, " | ");
}
}

endstmt();
Expand Down
100 changes: 100 additions & 0 deletions clang/runtime/dpct-rt/include/dpct/util.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -1205,6 +1205,106 @@ template <typename Func, std::size_t N> struct nth_argument_type {
using type = decltype(helper(std::declval<Func>()));
};

/// \brief The function performs bitwise logical operations on three input
/// values of \p a, \p b and \p c based on the specified 8-bit truth table \p
/// lut and return the result
///
/// \param [in] a Input value
/// \param [in] b Input value
/// \param [in] c Input value
/// \param [in] lut truth table for looking up
/// \returns The result
inline uint32_t ternary_logic_op(uint32_t a, uint32_t b, uint32_t c,
uint8_t lut) {
uint32_t result = 0;

switch (lut) {
case 0x0:
result = 0;
break;
case 0x1:
result = ~a & ~b & ~c;
break;
case 0x2:
result = ~a & ~b & c;
case 0x4:
result = ~a & b & ~c;
break;
case 0x8:
result = ~a & b & c;
break;
case 0x10:
result = a & ~b & ~c;
break;
case 0x20:
result = a & ~b & c;
break;
case 0x40:
result = a & b & ~c;
break;
case 0x80:
result = a & b & c;
break;
case 0x1a:
result = (a & b | c) ^ a;
break;
case 0x1e:
result = a ^ (b | c);
break;
case 0x2d:
result = ~a ^ (~b & c);
break;
case 0x78:
result = a ^ (b & c);
break;
case 0x96:
result = a ^ b ^ c;
break;
case 0xb4:
result = a ^ (b & ~c);
break;
case 0xb8:
result = a ^ (b & (c ^ a));
break;
case 0xd2:
result = a ^ (~b & c);
break;
case 0xe8:
result = a & (b | c) | (b & c);
break;
case 0xea:
result = a & b | c;
break;
case 0xfe:
result = a | b | c;
break;
case 0xff:
result = -1;
break;
default: {
if (lut & 0x01)
result |= ~a & ~b & ~c;
if (lut & 0x02)
result |= ~a & ~b & c;
if (lut & 0x04)
result |= ~a & b & ~c;
if (lut & 0x08)
result |= ~a & b & c;
if (lut & 0x10)
result |= a & ~b & ~c;
if (lut & 0x20)
result |= a & ~b & c;
if (lut & 0x40)
result |= a & b & ~c;
if (lut & 0x80)
result |= a & b & c;
break;
}
}

return result;
}

#ifdef _WIN32
#define DPCT_EXPORT __declspec(dllexport)
#else
Expand Down
13 changes: 13 additions & 0 deletions clang/test/dpct/asm/lop3.cu
Original file line number Diff line number Diff line change
Expand Up @@ -35,4 +35,17 @@ __device__ int hard(int a) {
asm("lop3.b32 %0, %1, %2, 3, 0x1C;" : "=r"(d4) : "r"(a + B), "r"(B));
return d4;
}

// CHECK: template <int lut, typename T> inline T lop3(T a, T b, T c) {
// CHECK-NEXT: T res;
// CHECK-NEXT: res = dpct::ternary_logic_op(a, b, c, lut);
// CHECK-NEXT: return res;
// CHECK-NEXT:}
template <int lut, typename T> __device__ inline T lop3(T a, T b, T c) {
T res;
asm volatile("lop3.b32 %0, %1, %2, %3, %4;\n"
: "=r"(res)
: "r"(a), "r"(b), "r"(c), "n"(lut));
return res;
}
// clang-format on

0 comments on commit 821800f

Please sign in to comment.