Skip to content

Commit

Permalink
[spirv] Push GPU target conversion to before SPIR-V conversion (iree-…
Browse files Browse the repository at this point in the history
…org#17816)

This commit moves the `SPIRVConvertGPUTargetPass` to right before the
`ConvertToSPIRVPass` in the pipeline. This makes sure we use the same
`#iree_gpu.target` in the majority of the configuration and lowering
passes in the CodeGen flow, and scopes the SPIR-V target environment to
only the final SPIR-V conversion. With this, we are able to unify and
simplify lots of SPIR-V tests.

Progress towards iree-org#16341

ci-extra:
test_nvidia_gpu,test_nvidia_a100,test_amd_mi250,test_amd_w7900,build_test_all_macos_arm64,build_and_test_android

---------

Signed-off-by: Lei Zhang <antiagainst@gmail.com>
  • Loading branch information
antiagainst authored Jul 13, 2024
1 parent 2ed3f92 commit 9d6b425
Show file tree
Hide file tree
Showing 71 changed files with 1,102 additions and 1,732 deletions.
36 changes: 34 additions & 2 deletions compiler/src/iree/compiler/Codegen/Dialect/GPU/IR/IREEGPUAttrs.td
Original file line number Diff line number Diff line change
Expand Up @@ -362,15 +362,47 @@ def IREEGPU_TargetAttr : AttrDef<IREEGPU_Dialect, "Target"> {
let assemblyFormat = "`<` struct(params) `>`";

let extraClassDeclaration = [{
int getPreferredSubgroupSize() const {
return getWgp().getSubgroupSizeChoices().asArrayRef().front();
// Subgroup size related APIs

int getMinSubgroupSize() const {
return *llvm::min_element(getWgp().getSubgroupSizeChoices().asArrayRef());
}
int getMaxSubgroupSize() const {
return *llvm::max_element(getWgp().getSubgroupSizeChoices().asArrayRef());
}
// Returns the preferred subgroup size. If the target supports multiple
// subgroup sizes, pickLargest controls whether to return the largest one.
//
// AMD RDNA GPUs supports multiple subgroup sizes and the preferred one
// differ given the API--HIP prefers 32 while Vulkan prefers 64.
// TODO: We should be able to force Vulkan side to use 32 consistently
// too with subgroup size control; it might have perf implications though.
int getPreferredSubgroupSize(bool pickLargest=false) const {
if (pickLargest) {
return getMaxSubgroupSize();
}
return getMinSubgroupSize();
}

// Hardware feature related APIs

bool supportsSubgroupShuffle() const {
return bitEnumContainsAll(getWgp().getSubgroup().getValue(),
SubgroupOps::Shuffle);
}

// Vendor querying APIs

bool isAMD() const {
return getArch().starts_with("gfx") || getArch().starts_with("rdna");
}
bool isApple() const { return getArch().starts_with("apple"); }
bool isARM() const { return getArch().starts_with("valhall"); }
bool isNVIDIA() const { return getArch().starts_with("sm_"); }
bool isQualcomm() const { return getArch().starts_with("adreno"); }

// CUDA specific querying APIs

std::optional<int> getCUDAComputeCapability() const;
// Returns true if this target supports TensoreCore MMA ops with TF32
// input types.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -563,7 +563,7 @@ std::optional<TargetDetails> getAndroidProfileDetails(StringRef target) {
//===----------------------------------------------------------------------===//

TargetAttr getMetalTargetDetails(MLIRContext *context) {
return createTargetAttr(*getAppleTargetDetails(), /*arch=*/"",
return createTargetAttr(*getAppleTargetDetails(), /*arch=*/"apple",
/*features=*/"spirv:v1.3,cap:Shader", context);
}

Expand Down Expand Up @@ -603,6 +603,8 @@ TargetAttr getVulkanTargetDetails(llvm::StringRef target,
// SPIR-V 1.4. For non-mobile GPUs we target Vulkan 1.3, which accepts
// SPIR-V 1.6 as the maximum.

// TODO: Add feature bits for physical storage buffer.

if (std::optional<TargetDetails> details = getAMDGPUTargetDetails(target)) {
return createTargetAttr(*details, normalizeAMDGPUTarget(target),
/*features=*/"spirv:v1.6,cap:Shader", context);
Expand Down Expand Up @@ -654,7 +656,8 @@ TargetAttr getFullTarget(StringRef targetAPI, StringRef aliasTarget,
StringRef features, MLIRContext *context) {
return llvm::StringSwitch<TargetAttr>(targetAPI)
.Case("cuda", getCUDATargetDetails(aliasTarget, features, context))
.Case("rocm", getHIPTargetDetails(aliasTarget, features, context))
.Case("hip", getHIPTargetDetails(aliasTarget, features, context))
.Case("vulkan", getVulkanTargetDetails(aliasTarget, context))
.Default(nullptr);
}

Expand Down
21 changes: 7 additions & 14 deletions compiler/src/iree/compiler/Codegen/SPIRV/AMDConfig.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,15 +10,10 @@
//
//===----------------------------------------------------------------------===//

#include "iree/compiler/Codegen/Dialect/Codegen/IR/IREECodegenAttrs.h"
#include "iree/compiler/Codegen/SPIRV/KernelConfig.h"
#include "iree/compiler/Codegen/Utils/Utils.h"
#include "iree/compiler/Dialect/Util/IR/UtilTypes.h"
#include "llvm/Support/Debug.h"
#include "mlir/Dialect/Linalg/IR/Linalg.h"
#include "mlir/Dialect/SPIRV/IR/SPIRVAttributes.h"
#include "mlir/Dialect/Tensor/IR/Tensor.h"
#include "mlir/IR/BuiltinOps.h"

#define DEBUG_TYPE "iree-spirv-amd-config"

Expand All @@ -35,15 +30,14 @@ constexpr unsigned AMDNumSubgroupsPerWorkgroup = 4;
constexpr unsigned AMDNumMNTilesPerSubgroup = 8;

static LogicalResult setAMDMatmulConfig(linalg::LinalgOp op,
const spirv::TargetEnv &targetEnv) {
IREE::GPU::TargetAttr target) {
if (succeeded(setCooperativeMatrixConfig(
targetEnv, op, AMDNumSubgroupsPerWorkgroup, AMDNumMNTilesPerSubgroup,
target, op, AMDNumSubgroupsPerWorkgroup, AMDNumMNTilesPerSubgroup,
AMDCoopMatrixSoftwarePipelineDepth,
AMDCoopMatrixSoftwarePipelineStoreStage)))
return success();

spirv::ResourceLimitsAttr limits = targetEnv.getResourceLimits();
const int subgroupSize = limits.getSubgroupSize();
int subgroupSize = target.getPreferredSubgroupSize(/*pickLargest=*/true);
const std::array<int64_t, 2> workgroupXY = {subgroupSize / 2, 8};
std::array<int64_t, 3> threadMNK;
auto inputType =
Expand All @@ -53,7 +47,7 @@ static LogicalResult setAMDMatmulConfig(linalg::LinalgOp op,
} else {
threadMNK = {8, 4, 16};
}
return setMatmulOpConfig(limits, op, workgroupXY, threadMNK,
return setMatmulOpConfig(target, op, workgroupXY, threadMNK,
/*enablePromotion=*/true,
AMDSimtSoftwarePipelineDepth,
AMDSimtSoftwarePipelineStoreStage);
Expand All @@ -71,14 +65,13 @@ static LogicalResult setAMDMatmulConfig(linalg::LinalgOp op,
// * Max 20 waves per SIMD32
// * Max 64KB LDS per workgroup

LogicalResult setAMDCodeGenConfig(const spirv::TargetEnv &targetEnv,
LogicalResult setAMDCodeGenConfig(IREE::GPU::TargetAttr target,
Operation *rootOp) {
spirv::ResourceLimitsAttr limits = targetEnv.getResourceLimits();
int subgroupSize = limits.getSubgroupSize();
int subgroupSize = target.getPreferredSubgroupSize(/*pickLargest=*/true);

if (auto linalgOp = dyn_cast<linalg::LinalgOp>(rootOp)) {
if (isMatmulOrBatchMatmul(linalgOp))
return setAMDMatmulConfig(linalgOp, targetEnv);
return setAMDMatmulConfig(linalgOp, target);
}

if (auto convOp = dyn_cast<linalg::ConvolutionOpInterface>(rootOp)) {
Expand Down
15 changes: 6 additions & 9 deletions compiler/src/iree/compiler/Codegen/SPIRV/AdrenoConfig.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,15 +14,13 @@

#include "iree/compiler/Codegen/SPIRV/KernelConfig.h"
#include "iree/compiler/Dialect/Util/IR/UtilTypes.h"
#include "llvm/ADT/TypeSwitch.h"
#include "mlir/Dialect/Linalg/IR/Linalg.h"
#include "mlir/IR/BuiltinOps.h"

namespace mlir::iree_compiler::detail {

static LogicalResult setAdrenoMatmulConfig(linalg::LinalgOp op,
spirv::ResourceLimitsAttr limits) {
const int subgroupSize = limits.getSubgroupSize();
IREE::GPU::TargetAttr target) {
const int subgroupSize = target.getPreferredSubgroupSize();
const std::array<int64_t, 2> workgroupXY = {subgroupSize / 2, 2};
std::array<int64_t, 3> threadMNK;
auto inputType =
Expand All @@ -32,24 +30,23 @@ static LogicalResult setAdrenoMatmulConfig(linalg::LinalgOp op,
} else {
threadMNK = {16, 4, 4};
}
return setMatmulOpConfig(limits, op, workgroupXY, threadMNK);
return setMatmulOpConfig(target, op, workgroupXY, threadMNK);
}

//===----------------------------------------------------------------------===//
// Entry Point
//===----------------------------------------------------------------------===//

LogicalResult setAdrenoCodeGenConfig(const spirv::TargetEnv &targetEnv,
LogicalResult setAdrenoCodeGenConfig(IREE::GPU::TargetAttr target,
Operation *rootOp) {
spirv::ResourceLimitsAttr limits = targetEnv.getResourceLimits();
int subgroupSize = limits.getSubgroupSize();
int subgroupSize = target.getPreferredSubgroupSize();

if (!isa<linalg::LinalgOp>(rootOp))
return failure();

auto linalgOp = cast<linalg::LinalgOp>(rootOp);
if (isMatmulOrBatchMatmul(linalgOp))
return setAdrenoMatmulConfig(linalgOp, limits);
return setAdrenoMatmulConfig(linalgOp, target);

if (auto convOp = dyn_cast<linalg::ConvolutionOpInterface>(rootOp)) {
// Use the result type in case of larger bitwidth for accumulators.
Expand Down
14 changes: 5 additions & 9 deletions compiler/src/iree/compiler/Codegen/SPIRV/AppleConfig.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,15 +14,12 @@

#include "iree/compiler/Codegen/SPIRV/KernelConfig.h"
#include "iree/compiler/Dialect/Util/IR/UtilTypes.h"
#include "llvm/ADT/TypeSwitch.h"
#include "mlir/Dialect/Linalg/IR/Linalg.h"
#include "mlir/Dialect/SPIRV/IR/SPIRVAttributes.h"
#include "mlir/IR/BuiltinOps.h"

namespace mlir::iree_compiler::detail {

static LogicalResult setAppleMatmulConfig(linalg::LinalgOp op,
spirv::ResourceLimitsAttr limits) {
IREE::GPU::TargetAttr target) {
const std::array<int64_t, 2> workgroupXY = {256, 1};
std::array<int64_t, 3> threadMNK;
auto inputType =
Expand All @@ -32,21 +29,20 @@ static LogicalResult setAppleMatmulConfig(linalg::LinalgOp op,
} else {
threadMNK = {4, 4, 4};
}
return setMatmulOpConfig(limits, op, workgroupXY, threadMNK);
return setMatmulOpConfig(target, op, workgroupXY, threadMNK);
}

//===----------------------------------------------------------------------===//
// Entry Point
//===----------------------------------------------------------------------===//

LogicalResult setAppleCodeGenConfig(const spirv::TargetEnv &targetEnv,
LogicalResult setAppleCodeGenConfig(IREE::GPU::TargetAttr target,
Operation *rootOp) {
spirv::ResourceLimitsAttr limits = targetEnv.getResourceLimits();
int subgroupSize = limits.getSubgroupSize();
int subgroupSize = target.getPreferredSubgroupSize();

if (auto linalgOp = dyn_cast<linalg::LinalgOp>(rootOp)) {
if (isMatmulOrBatchMatmul(linalgOp))
return setAppleMatmulConfig(linalgOp, limits);
return setAppleMatmulConfig(linalgOp, target);
}

if (auto convOp = dyn_cast<linalg::ConvolutionOpInterface>(rootOp)) {
Expand Down
1 change: 1 addition & 0 deletions compiler/src/iree/compiler/Codegen/SPIRV/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,7 @@ iree_compiler_cc_library(
"//compiler/src/iree/compiler/Codegen/Common/GPU:CommonGPUPasses",
"//compiler/src/iree/compiler/Codegen/Common/GPU:GPUHeuristics",
"//compiler/src/iree/compiler/Codegen/Dialect/Codegen/IR:IREECodegenDialect",
"//compiler/src/iree/compiler/Codegen/Dialect/GPU/IR:IREEGPUDialect",
"//compiler/src/iree/compiler/Codegen/Interfaces:PartitionableLoopsInterface",
"//compiler/src/iree/compiler/Codegen/TransformStrategies/GPU",
"//compiler/src/iree/compiler/Codegen/Transforms",
Expand Down
1 change: 1 addition & 0 deletions compiler/src/iree/compiler/Codegen/SPIRV/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -140,6 +140,7 @@ iree_cc_library(
iree::compiler::Codegen::Common::GPU::GPUHeuristics
iree::compiler::Codegen::Common::TransformDialectInterpreterPass
iree::compiler::Codegen::Dialect::Codegen::IR::IREECodegenDialect
iree::compiler::Codegen::Dialect::GPU::IR::IREEGPUDialect
iree::compiler::Codegen::Interfaces::PartitionableLoopsInterface
iree::compiler::Codegen::TransformStrategies::GPU
iree::compiler::Codegen::Transforms
Expand Down
22 changes: 9 additions & 13 deletions compiler/src/iree/compiler/Codegen/SPIRV/ConvertToSPIRVPass.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,16 +17,12 @@
#include <cstdint>
#include <tuple>

#include "iree/compiler/Codegen/Dialect/Codegen/IR/IREECodegenAttrs.h"
#include "iree/compiler/Codegen/SPIRV/PassDetail.h"
#include "iree/compiler/Codegen/SPIRV/Passes.h"
#include "iree/compiler/Codegen/SPIRV/Utils.h"
#include "iree/compiler/Codegen/Utils/MarkerUtils.h"
#include "iree/compiler/Codegen/Utils/Utils.h"
#include "iree/compiler/Dialect/HAL/IR/HALOps.h"
#include "iree/compiler/Dialect/HAL/IR/HALTypes.h"
#include "iree/compiler/Dialect/Util/IR/UtilOps.h"
#include "llvm/ADT/DenseMapInfo.h"
#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/Support/FormatVariadic.h"
Expand All @@ -41,15 +37,11 @@
#include "mlir/Conversion/TensorToSPIRV/TensorToSPIRV.h"
#include "mlir/Conversion/VectorToSPIRV/VectorToSPIRV.h"
#include "mlir/Dialect/Arith/Transforms/Passes.h"
#include "mlir/Dialect/Arith/Utils/Utils.h"
#include "mlir/Dialect/Bufferization/IR/Bufferization.h"
#include "mlir/Dialect/Func/IR/FuncOps.h"
#include "mlir/Dialect/GPU/IR/GPUDialect.h"
#include "mlir/Dialect/Linalg/IR/Linalg.h"
#include "mlir/Dialect/Math/Transforms/Passes.h"
#include "mlir/Dialect/MemRef/IR/MemRef.h"
#include "mlir/Dialect/SPIRV/IR/SPIRVDialect.h"
#include "mlir/Dialect/SPIRV/IR/SPIRVEnums.h"
#include "mlir/Dialect/SPIRV/IR/SPIRVOps.h"
#include "mlir/Dialect/SPIRV/IR/SPIRVTypes.h"
#include "mlir/Dialect/SPIRV/Transforms/SPIRVConversion.h"
Expand Down Expand Up @@ -596,17 +588,21 @@ void ConvertToSPIRVPass::runOnOperation() {
}
}

spirv::TargetEnvAttr targetAttr = getSPIRVTargetEnvAttr(moduleOp);
moduleOp->setAttr(spirv::getTargetEnvAttrName(), targetAttr);

if (indexBits != 32 && indexBits != 64) {
moduleOp.emitOpError(
"Only 32-bit or 64-bit indices are supported for SPIR-V");
"only 32-bit or 64-bit indices are supported for SPIR-V");
return signalPassFailure();
}

bool use64bitIndex = indexBits == 64;

auto targetAttr = moduleOp->getAttrOfType<spirv::TargetEnvAttr>(
spirv::getTargetEnvAttrName());
if (!targetAttr) {
moduleOp.emitOpError("should contain a spirv.target_env attribute");
return signalPassFailure();
}
spirv::TargetEnv targetEnv(targetAttr);

if (use64bitIndex && !targetEnv.allows(spirv::Capability::Int64)) {
moduleOp.emitOpError(
"64-bit indices are not supported for the specified target "
Expand Down
Loading

0 comments on commit 9d6b425

Please sign in to comment.