From 3509ead1d7b94b873d885f4387d98366fdaac4fd Mon Sep 17 00:00:00 2001 From: MaheshRavishankar <1663364+MaheshRavishankar@users.noreply.github.com> Date: Tue, 17 Dec 2024 14:19:48 -0800 Subject: [PATCH] Cleanup `ConvertToStream` to accomodate llvm/llvm-project@3f136f7 (#19451) The upstream change https://github.com/llvm/llvm-project/commit/3f136f7 allows `ConvertToStream` to better handle the 1:N type conversion, specifically the type conversion of a `tensor<...>` to `!stream.resource<*>, index`. Now instead of trying to work around `builtin.unrealized_conversion_cast`s the conversion can get the converted values directly using the `OneToNAdaptor` and can also replace a `tensor<..>` directly with multiple values using the `ConversionPatternRewriter::replaceOpWithMultiple`. These changes are required to drop the revert of https://github.com/llvm/llvm-project/pull/116470 in the IREE ToM. The change drops these reverts as well. Fixes #19448 --------- Signed-off-by: MaheshRavishankar --- .../Conversion/FlowToStream/Patterns.cpp | 341 ++++++++++-------- .../Conversion/HALToStream/Patterns.cpp | 72 ++-- .../Stream/Conversion/PatternUtils.cpp | 95 ++--- .../Dialect/Stream/Conversion/PatternUtils.h | 45 +-- .../Conversion/StandardToStream/Patterns.cpp | 132 ++++--- .../Conversion/UtilToStream/Patterns.cpp | 68 ++-- .../UtilToStream/test/compiler_hints.mlir | 4 +- .../Stream/Transforms/ConvertToStream.cpp | 18 +- .../Transforms/test/convert_to_stream.mlir | 3 +- third_party/llvm-project | 2 +- 10 files changed, 418 insertions(+), 362 deletions(-) diff --git a/compiler/src/iree/compiler/Dialect/Stream/Conversion/FlowToStream/Patterns.cpp b/compiler/src/iree/compiler/Dialect/Stream/Conversion/FlowToStream/Patterns.cpp index 31d61516e3eb..44c8a4630ea0 100644 --- a/compiler/src/iree/compiler/Dialect/Stream/Conversion/FlowToStream/Patterns.cpp +++ b/compiler/src/iree/compiler/Dialect/Stream/Conversion/FlowToStream/Patterns.cpp @@ -13,6 +13,7 @@ #include "iree/compiler/Dialect/Stream/IR/StreamOps.h" #include "mlir/Dialect/Arith/IR/Arith.h" #include "mlir/Dialect/Tensor/IR/Tensor.h" +#include "mlir/IR/BuiltinDialect.h" #include "mlir/IR/IRMapping.h" #include "mlir/Interfaces/FunctionInterfaces.h" @@ -20,6 +21,14 @@ namespace mlir::iree_compiler { namespace { +static SmallVector flattenValues(ArrayRef values) { + SmallVector vec; + for (auto v : values) { + vec.append(v.begin(), v.end()); + } + return vec; +} + // Inserts a sizeof calculation for the given tensor value type and dims. // This should only be used to produce sizes for values produced by an op; the // size of operands must be queried from the input resource. @@ -39,7 +48,7 @@ struct ConvertTensorConstantOp public: using AffinityOpConversionPattern::AffinityOpConversionPattern; LogicalResult matchAndRewriteOnAffinity( - IREE::Flow::TensorConstantOp constantOp, OpAdaptor adaptor, + IREE::Flow::TensorConstantOp constantOp, OneToNOpAdaptor adaptor, IREE::Stream::AffinityAttr executionAffinityAttr, ConversionPatternRewriter &rewriter) const override { // Capture the tensor constant strongly typed with constant lifetime. @@ -55,10 +64,13 @@ struct ConvertTensorConstantOp auto unknownType = rewriter.getType(); auto constantSize = rewriter.createOrFold( constantOp.getLoc(), rewriter.getIndexType(), newOp.getResult()); - rewriter.replaceOpWithNewOp( - constantOp, unknownType, newOp.getResult(), constantSize, constantSize, + auto transferOp = rewriter.create( + constantOp.getLoc(), unknownType, newOp.getResult(), constantSize, + constantSize, /*source_affinity=*/executionAffinityAttr, /*result_affinity=*/executionAffinityAttr); + rewriter.replaceOpWithMultiple(constantOp, + {{transferOp.getResult(), constantSize}}); return success(); } }; @@ -68,7 +80,7 @@ struct ConvertTensorDynamicConstantOp public: using AffinityOpConversionPattern::AffinityOpConversionPattern; LogicalResult matchAndRewriteOnAffinity( - IREE::Flow::TensorDynamicConstantOp constantOp, OpAdaptor adaptor, + IREE::Flow::TensorDynamicConstantOp constantOp, OneToNOpAdaptor adaptor, IREE::Stream::AffinityAttr executionAffinityAttr, ConversionPatternRewriter &rewriter) const override { auto attrType = dyn_cast(constantOp.getValue().getType()); @@ -103,10 +115,12 @@ struct ConvertTensorDynamicConstantOp auto unknownType = rewriter.getType(); auto constantSize = rewriter.createOrFold( constantOp.getLoc(), rewriter.getIndexType(), newOp.getResult()); - rewriter.replaceOpWithNewOp( - constantOp, unknownType, newOp.getResult(), constantSize, constantSize, + auto transferOp = rewriter.create( + constantOp.getLoc(), unknownType, newOp.getResult(), constantSize, + constantSize, /*source_affinity=*/executionAffinityAttr, /*result_affinity=*/executionAffinityAttr); + rewriter.replaceOpWithMultiple(constantOp, {{transferOp, constantSize}}); return success(); } }; @@ -123,21 +137,23 @@ struct ConvertTensorCastLikeOp : public AffinityAwareConversionPattern { using AffinityAwareConversionPattern< CastOpTy>::AffinityAwareConversionPattern; - LogicalResult - matchAndRewrite(CastOpTy op, typename CastOpTy::Adaptor adaptor, - ConversionPatternRewriter &rewriter) const override { + LogicalResult matchAndRewrite( + CastOpTy op, + typename OpConversionPattern::OneToNOpAdaptor adaptor, + ConversionPatternRewriter &rewriter) const override { auto resultAffinityAttr = this->lookupResultAffinity(op.getResult()); - auto source = this->transferTensorOperand(op.getLoc(), op.getSource(), - adaptor.getSource(), - resultAffinityAttr, rewriter); + auto source = this->transferTensorOperands(op.getLoc(), op.getSource(), + adaptor.getSource(), + resultAffinityAttr, rewriter); auto resultSize = buildResultSizeOf(op.getLoc(), op.getResult(), op.getResultDims(), resultAffinityAttr, rewriter); auto unknownType = rewriter.getType(); - rewriter.replaceOpWithNewOp( - op, unknownType, source.resource, op.getSource().getType(), + Value cloneOp = rewriter.create( + op.getLoc(), unknownType, source.resource, op.getSource().getType(), op.getSourceDims(), source.resourceSize, op.getResult().getType(), - adaptor.getResultDims(), resultSize, resultAffinityAttr); + flattenValues(adaptor.getResultDims()), resultSize, resultAffinityAttr); + rewriter.replaceOpWithMultiple(op, {{cloneOp, resultSize}}); return success(); } }; @@ -146,15 +162,16 @@ struct ConvertTensorAllocaOp : public AffinityOpConversionPattern { using AffinityOpConversionPattern::AffinityOpConversionPattern; LogicalResult matchAndRewriteOnAffinity( - IREE::Flow::TensorAllocaOp op, OpAdaptor adaptor, + IREE::Flow::TensorAllocaOp op, OneToNOpAdaptor adaptor, IREE::Stream::AffinityAttr executionAffinityAttr, ConversionPatternRewriter &rewriter) const override { auto resultSize = buildResultSizeOf(op.getLoc(), op.getResult(), op.getResultDims(), executionAffinityAttr, rewriter); auto unknownType = rewriter.getType(); - rewriter.replaceOpWithNewOp( - op, unknownType, resultSize, executionAffinityAttr); + auto allocaOp = rewriter.create( + op.getLoc(), unknownType, resultSize, executionAffinityAttr); + rewriter.replaceOpWithMultiple(op, {{allocaOp.getResult(), resultSize}}); return success(); } }; @@ -163,16 +180,18 @@ struct ConvertTensorEmptyOp : public AffinityOpConversionPattern { using AffinityOpConversionPattern::AffinityOpConversionPattern; LogicalResult matchAndRewriteOnAffinity( - IREE::Flow::TensorEmptyOp op, OpAdaptor adaptor, + IREE::Flow::TensorEmptyOp op, OneToNOpAdaptor adaptor, IREE::Stream::AffinityAttr executionAffinityAttr, ConversionPatternRewriter &rewriter) const override { auto resultSize = buildResultSizeOf(op.getLoc(), op.getResult(), op.getResultDims(), executionAffinityAttr, rewriter); auto unknownType = rewriter.getType(); - rewriter.replaceOpWithNewOp( - op, unknownType, op.getResult().getType(), adaptor.getResultDims(), - resultSize, executionAffinityAttr); + auto emptyOp = rewriter.create( + op.getLoc(), unknownType, op.getResult().getType(), + flattenValues(adaptor.getResultDims()), resultSize, + executionAffinityAttr); + rewriter.replaceOpWithMultiple(op, {{emptyOp.getResult(), resultSize}}); return success(); } }; @@ -181,16 +200,18 @@ struct ConvertTensorSplatOp : public AffinityOpConversionPattern { using AffinityOpConversionPattern::AffinityOpConversionPattern; LogicalResult matchAndRewriteOnAffinity( - IREE::Flow::TensorSplatOp op, OpAdaptor adaptor, + IREE::Flow::TensorSplatOp op, OneToNOpAdaptor adaptor, IREE::Stream::AffinityAttr executionAffinityAttr, ConversionPatternRewriter &rewriter) const override { auto resultSize = buildResultSizeOf(op.getLoc(), op.getResult(), op.getResultDims(), executionAffinityAttr, rewriter); auto unknownType = rewriter.getType(); - rewriter.replaceOpWithNewOp( - op, unknownType, adaptor.getValue(), op.getResult().getType(), - adaptor.getResultDims(), resultSize, executionAffinityAttr); + auto splatOp = rewriter.create( + op.getLoc(), unknownType, adaptor.getValue().front(), + op.getResult().getType(), flattenValues(adaptor.getResultDims()), + resultSize, executionAffinityAttr); + rewriter.replaceOpWithMultiple(op, {{splatOp, resultSize}}); return success(); } }; @@ -199,17 +220,19 @@ struct ConvertTensorCloneOp : public AffinityOpConversionPattern { using AffinityOpConversionPattern::AffinityOpConversionPattern; LogicalResult matchAndRewriteOnAffinity( - IREE::Flow::TensorCloneOp op, OpAdaptor adaptor, + IREE::Flow::TensorCloneOp op, OneToNOpAdaptor adaptor, IREE::Stream::AffinityAttr executionAffinityAttr, ConversionPatternRewriter &rewriter) const override { - auto operand = transferTensorOperand(op.getLoc(), op.getOperand(), - adaptor.getOperand(), - executionAffinityAttr, rewriter); + auto operand = transferTensorOperands(op.getLoc(), op.getOperand(), + adaptor.getOperand(), + executionAffinityAttr, rewriter); auto unknownType = rewriter.getType(); - rewriter.replaceOpWithNewOp( - op, unknownType, operand.resource, op.getOperand().getType(), + auto cloneOp = rewriter.create( + op.getLoc(), unknownType, operand.resource, op.getOperand().getType(), op.getArgumentDims(), operand.resourceSize, op.getResult().getType(), - adaptor.getArgumentDims(), operand.resourceSize, executionAffinityAttr); + flattenValues(adaptor.getArgumentDims()), operand.resourceSize, + executionAffinityAttr); + rewriter.replaceOpWithMultiple(op, {{cloneOp, operand.resourceSize}}); return success(); } }; @@ -218,20 +241,21 @@ struct ConvertTensorTransferOp : public AffinityOpConversionPattern { using AffinityOpConversionPattern::AffinityOpConversionPattern; LogicalResult matchAndRewriteOnAffinity( - IREE::Flow::TensorTransferOp op, OpAdaptor adaptor, + IREE::Flow::TensorTransferOp op, OneToNOpAdaptor adaptor, IREE::Stream::AffinityAttr executionAffinityAttr, ConversionPatternRewriter &rewriter) const override { if (!executionAffinityAttr) { return rewriter.notifyMatchFailure(op, "invalid stream affinity attr"); } - auto operand = resolveTensorOperand(op.getLoc(), op.getOperand(), - adaptor.getOperand(), rewriter); + auto operand = resolveTensorOperands(op.getLoc(), op.getOperand(), + adaptor.getOperand(), rewriter); auto unknownType = rewriter.getType(); - rewriter.replaceOpWithNewOp( - op, unknownType, operand.resource, operand.resourceSize, + auto transferOp = rewriter.create( + op.getLoc(), unknownType, operand.resource, operand.resourceSize, operand.resourceSize, /*source_affinity=*/operand.affinity, /*result_affinity=*/executionAffinityAttr); + rewriter.replaceOpWithMultiple(op, {{transferOp, operand.resourceSize}}); return success(); } }; @@ -240,21 +264,24 @@ struct ConvertTensorSliceOp : public AffinityOpConversionPattern { using AffinityOpConversionPattern::AffinityOpConversionPattern; LogicalResult matchAndRewriteOnAffinity( - IREE::Flow::TensorSliceOp op, OpAdaptor adaptor, + IREE::Flow::TensorSliceOp op, OneToNOpAdaptor adaptor, IREE::Stream::AffinityAttr executionAffinityAttr, ConversionPatternRewriter &rewriter) const override { auto source = - transferTensorOperand(op.getLoc(), op.getSource(), adaptor.getSource(), - executionAffinityAttr, rewriter); + transferTensorOperands(op.getLoc(), op.getSource(), adaptor.getSource(), + executionAffinityAttr, rewriter); auto resultSize = buildResultSizeOf(op.getLoc(), op.getResult(), op.getResultDims(), executionAffinityAttr, rewriter); auto unknownType = rewriter.getType(); - rewriter.replaceOpWithNewOp( - op, unknownType, source.resource, op.getSource().getType(), - op.getSourceDims(), source.resourceSize, adaptor.getStartIndices(), - adaptor.getLengths(), op.getResult().getType(), adaptor.getResultDims(), - resultSize, executionAffinityAttr); + auto sliceOp = rewriter.create( + op.getLoc(), unknownType, source.resource, op.getSource().getType(), + op.getSourceDims(), source.resourceSize, + flattenValues(adaptor.getStartIndices()), + flattenValues(adaptor.getLengths()), op.getResult().getType(), + flattenValues(adaptor.getResultDims()), resultSize, + executionAffinityAttr); + rewriter.replaceOpWithMultiple(op, {{sliceOp, resultSize}}); return success(); } }; @@ -263,20 +290,23 @@ struct ConvertTensorUpdateOp : public AffinityOpConversionPattern { using AffinityOpConversionPattern::AffinityOpConversionPattern; LogicalResult matchAndRewriteOnAffinity( - IREE::Flow::TensorUpdateOp op, OpAdaptor adaptor, + IREE::Flow::TensorUpdateOp op, OneToNOpAdaptor adaptor, IREE::Stream::AffinityAttr executionAffinityAttr, ConversionPatternRewriter &rewriter) const override { auto target = - transferTensorOperand(op.getLoc(), op.getTarget(), adaptor.getTarget(), - executionAffinityAttr, rewriter); + transferTensorOperands(op.getLoc(), op.getTarget(), adaptor.getTarget(), + executionAffinityAttr, rewriter); auto update = - transferTensorOperand(op.getLoc(), op.getUpdate(), adaptor.getUpdate(), - executionAffinityAttr, rewriter); - rewriter.replaceOpWithNewOp( - op, target.resource.getType(), target.resource, - op.getTarget().getType(), adaptor.getTargetDims(), target.resourceSize, - adaptor.getStartIndices(), update.resource, op.getUpdate().getType(), - op.getUpdateDims(), update.resourceSize, executionAffinityAttr); + transferTensorOperands(op.getLoc(), op.getUpdate(), adaptor.getUpdate(), + executionAffinityAttr, rewriter); + auto updateOp = rewriter.create( + op.getLoc(), target.resource.getType(), target.resource, + op.getTarget().getType(), flattenValues(adaptor.getTargetDims()), + target.resourceSize, flattenValues(adaptor.getStartIndices()), + update.resource, op.getUpdate().getType(), op.getUpdateDims(), + update.resourceSize, executionAffinityAttr); + rewriter.replaceOpWithMultiple( + op, {{updateOp.getResult(), target.resourceSize}}); return success(); } }; @@ -296,10 +326,10 @@ struct ConvertTensorLoadOp : public AffinityAwareConversionPattern { using AffinityAwareConversionPattern::AffinityAwareConversionPattern; LogicalResult - matchAndRewrite(IREE::Flow::TensorLoadOp op, OpAdaptor adaptor, + matchAndRewrite(IREE::Flow::TensorLoadOp op, OneToNOpAdaptor adaptor, ConversionPatternRewriter &rewriter) const override { - auto source = resolveTensorOperand(op.getLoc(), op.getSource(), - adaptor.getSource(), rewriter); + auto source = resolveTensorOperands(op.getLoc(), op.getSource(), + adaptor.getSource(), rewriter); // If the source is not a staging resource then we need to transfer it to // a staging resource. We slice out just what is being loaded so that we @@ -311,10 +341,13 @@ struct ConvertTensorLoadOp auto stagingType = rewriter.getType( IREE::Stream::Lifetime::Staging); auto resultType = getTypeConverter()->convertType(op.getResult().getType()); + SmallVector convertedSourceDims = + flattenValues(adaptor.getSourceDims()); + SmallVector convertedIndices = flattenValues(adaptor.getIndices()); if (source.resource.getType() == stagingType) { rewriter.replaceOpWithNewOp( op, resultType, source.resource, op.getSource().getType(), - adaptor.getSourceDims(), source.resourceSize, adaptor.getIndices()); + convertedSourceDims, source.resourceSize, convertedIndices); return success(); } @@ -328,19 +361,18 @@ struct ConvertTensorLoadOp /*result_affinity=*/source.affinity); rewriter.replaceOpWithNewOp( op, resultType, transferOp.getResult(), sourceEncoding, - adaptor.getSourceDims(), transferOp.getResultSize(), - adaptor.getIndices()); + convertedSourceDims, transferOp.getResultSize(), convertedIndices); return success(); } // Slice out the individual element value. IndexSet indexSet(op.getLoc(), rewriter); - indexSet.populate(adaptor.getIndices()); + indexSet.populate(convertedIndices); SmallVector sliceIndices; SmallVector sliceLengths; SmallVector loadIndices; SmallVector resultDims; - for (auto index : adaptor.getIndices()) { + for (auto index : convertedIndices) { // TODO(benvanik): support larger buffer slices. sliceIndices.push_back(index); sliceLengths.push_back(indexSet.get(1)); @@ -354,9 +386,8 @@ struct ConvertTensorLoadOp op.getLoc(), resultEncoding, ValueRange{}, source.affinity); auto sliceOp = rewriter.create( op.getLoc(), source.resource.getType(), source.resource, sourceEncoding, - adaptor.getSourceDims(), source.resourceSize, sliceIndices, - sliceLengths, resultEncoding, ValueRange{}, resultSize, - source.affinity); + convertedSourceDims, source.resourceSize, sliceIndices, sliceLengths, + resultEncoding, ValueRange{}, resultSize, source.affinity); auto transferOp = rewriter.create( op.getLoc(), stagingType, sliceOp.getResult(), sliceOp.getResultSize(), sliceOp.getResultSize(), @@ -374,33 +405,37 @@ struct ConvertTensorStoreOp : public AffinityAwareConversionPattern { using AffinityAwareConversionPattern::AffinityAwareConversionPattern; LogicalResult - matchAndRewrite(IREE::Flow::TensorStoreOp op, OpAdaptor adaptor, + matchAndRewrite(IREE::Flow::TensorStoreOp op, OneToNOpAdaptor adaptor, ConversionPatternRewriter &rewriter) const override { - auto target = resolveTensorOperand(op.getLoc(), op.getTarget(), - adaptor.getTarget(), rewriter); + auto target = resolveTensorOperands(op.getLoc(), op.getTarget(), + adaptor.getTarget(), rewriter); // If the target is a staging resource then we can directly store into it // with a fast-path. Otherwise we need to stage an upload. auto stagingType = rewriter.getType( IREE::Stream::Lifetime::Staging); if (target.resource.getType() == stagingType) { - rewriter.replaceOpWithNewOp( - op, target.resource.getType(), target.resource, - op.getTarget().getType(), adaptor.getTargetDims(), - target.resourceSize, adaptor.getIndices(), adaptor.getValue()); + auto storeOp = rewriter.create( + op.getLoc(), target.resource.getType(), target.resource, + op.getTarget().getType(), flattenValues(adaptor.getTargetDims()), + target.resourceSize, flattenValues(adaptor.getIndices()), + adaptor.getValue().front()); + rewriter.replaceOpWithMultiple(op, {{storeOp, target.resourceSize}}); return success(); } // Use fill to store the value. // TODO(benvanik): support larger buffer slices (stage + update). IndexSet indexSet(op.getLoc(), rewriter); - indexSet.populate(adaptor.getIndices()); - SmallVector lengths(adaptor.getIndices().size(), indexSet.get(1)); + SmallVector convertedIndices = flattenValues(adaptor.getIndices()); + indexSet.populate(convertedIndices); + SmallVector lengths(convertedIndices.size(), indexSet.get(1)); auto targetEncoding = op.getTarget().getType(); - rewriter.replaceOpWithNewOp( - op, target.resource, targetEncoding, adaptor.getTargetDims(), - target.resourceSize, adaptor.getIndices(), lengths, adaptor.getValue(), - target.affinity); + auto fillOp = rewriter.create( + op.getLoc(), target.resource, targetEncoding, + flattenValues(adaptor.getTargetDims()), target.resourceSize, + convertedIndices, lengths, adaptor.getValue().front(), target.affinity); + rewriter.replaceOpWithMultiple(op, {{fillOp, target.resourceSize}}); return success(); } }; @@ -409,15 +444,15 @@ struct ConvertTensorTraceOp : public AffinityAwareConversionPattern { using AffinityAwareConversionPattern::AffinityAwareConversionPattern; LogicalResult - matchAndRewrite(IREE::Flow::TensorTraceOp op, OpAdaptor adaptor, + matchAndRewrite(IREE::Flow::TensorTraceOp op, OneToNOpAdaptor adaptor, ConversionPatternRewriter &rewriter) const override { SmallVector resources; SmallVector resourceSizes; SmallVector resourceEncodings; for (auto [tensorOperand, resourceOperand] : llvm::zip_equal(op.getValues(), adaptor.getValues())) { - auto source = resolveTensorOperand(op.getLoc(), tensorOperand, - resourceOperand, rewriter); + auto source = resolveTensorOperands(op.getLoc(), tensorOperand, + resourceOperand, rewriter); auto stagingType = rewriter.getType( IREE::Stream::Lifetime::Staging); auto traceSource = source.resource; @@ -432,10 +467,10 @@ struct ConvertTensorTraceOp resourceSizes.push_back(source.resourceSize); resourceEncodings.push_back(TypeAttr::get(tensorOperand.getType())); } - rewriter.replaceOpWithNewOp( op, adaptor.getKey(), resources, resourceSizes, - rewriter.getArrayAttr(resourceEncodings), adaptor.getValueDims()); + rewriter.getArrayAttr(resourceEncodings), + flattenValues(adaptor.getValueDims())); return success(); } }; @@ -444,7 +479,7 @@ struct ConvertChannelDefaultOp : public AffinityOpConversionPattern { using AffinityOpConversionPattern::AffinityOpConversionPattern; LogicalResult matchAndRewriteOnAffinity( - IREE::Flow::ChannelDefaultOp op, OpAdaptor adaptor, + IREE::Flow::ChannelDefaultOp op, OneToNOpAdaptor adaptor, IREE::Stream::AffinityAttr executionAffinityAttr, ConversionPatternRewriter &rewriter) const override { rewriter.replaceOpWithNewOp( @@ -497,7 +532,7 @@ struct ConvertAllGatherOp : public AffinityOpConversionPattern { using AffinityOpConversionPattern::AffinityOpConversionPattern; LogicalResult matchAndRewriteOnAffinity( - IREE::Flow::CollectiveAllGatherOp op, OpAdaptor adaptor, + IREE::Flow::CollectiveAllGatherOp op, OneToNOpAdaptor adaptor, IREE::Stream::AffinityAttr executionAffinityAttr, ConversionPatternRewriter &rewriter) const override { auto collectiveAttr = rewriter.getAttr( @@ -509,14 +544,14 @@ struct ConvertAllGatherOp auto elementCount = rewriter.create( op.getLoc(), op.getType().getNumElements()); auto newTargetCast = - transferTensorOperand(op.getLoc(), op.getTarget(), adaptor.getTarget(), - executionAffinityAttr, rewriter); + transferTensorOperands(op.getLoc(), op.getTarget(), adaptor.getTarget(), + executionAffinityAttr, rewriter); auto newSourceCast = - transferTensorOperand(op.getLoc(), op.getSource(), adaptor.getSource(), - executionAffinityAttr, rewriter); + transferTensorOperands(op.getLoc(), op.getSource(), adaptor.getSource(), + executionAffinityAttr, rewriter); - rewriter.replaceOpWithNewOp( - op, collectiveAttr, + auto collectiveOp = rewriter.create( + op.getLoc(), collectiveAttr, /*target=*/newTargetCast.resource, /*target_size=*/newTargetCast.resourceSize, /*target_offset=*/zeroOffset, @@ -528,8 +563,10 @@ struct ConvertAllGatherOp /*source_end=*/newSourceCast.resourceSize, /*source_length=*/newSourceCast.resourceSize, /*element_count=*/elementCount, - /*channel=*/adaptor.getChannel(), + /*channel=*/adaptor.getChannel().front(), /*param=*/mlir::Value(), executionAffinityAttr); + rewriter.replaceOpWithMultiple( + op, {{collectiveOp, newTargetCast.resourceSize}}); return success(); } }; @@ -538,7 +575,7 @@ struct ConvertAllReduceOp : public AffinityOpConversionPattern { using AffinityOpConversionPattern::AffinityOpConversionPattern; LogicalResult matchAndRewriteOnAffinity( - IREE::Flow::CollectiveAllReduceOp op, OpAdaptor adaptor, + IREE::Flow::CollectiveAllReduceOp op, OneToNOpAdaptor adaptor, IREE::Stream::AffinityAttr executionAffinityAttr, ConversionPatternRewriter &rewriter) const override { auto collectiveAttr = rewriter.getAttr( @@ -550,14 +587,14 @@ struct ConvertAllReduceOp auto elementCount = rewriter.create( op.getLoc(), op.getType().getNumElements()); auto newTargetCast = - transferTensorOperand(op.getLoc(), op.getTarget(), adaptor.getTarget(), - executionAffinityAttr, rewriter); + transferTensorOperands(op.getLoc(), op.getTarget(), adaptor.getTarget(), + executionAffinityAttr, rewriter); auto newSourceCast = - transferTensorOperand(op.getLoc(), op.getSource(), adaptor.getSource(), - executionAffinityAttr, rewriter); + transferTensorOperands(op.getLoc(), op.getSource(), adaptor.getSource(), + executionAffinityAttr, rewriter); - rewriter.replaceOpWithNewOp( - op, collectiveAttr, + auto collectiveOp = rewriter.create( + op.getLoc(), collectiveAttr, /*target=*/newTargetCast.resource, /*target_size=*/newTargetCast.resourceSize, /*target_offset=*/zeroOffset, @@ -569,8 +606,10 @@ struct ConvertAllReduceOp /*source_end=*/newSourceCast.resourceSize, /*source_length=*/newSourceCast.resourceSize, /*element_count=*/elementCount, - /*channel=*/adaptor.getChannel(), + /*channel=*/adaptor.getChannel().front(), /*param=*/mlir::Value(), executionAffinityAttr); + rewriter.replaceOpWithMultiple( + op, {{collectiveOp, newTargetCast.resourceSize}}); return success(); } }; @@ -579,7 +618,7 @@ struct ConvertAllToAllOp : public AffinityOpConversionPattern { using AffinityOpConversionPattern::AffinityOpConversionPattern; LogicalResult matchAndRewriteOnAffinity( - IREE::Flow::CollectiveAllToAllOp op, OpAdaptor adaptor, + IREE::Flow::CollectiveAllToAllOp op, OneToNOpAdaptor adaptor, IREE::Stream::AffinityAttr executionAffinityAttr, ConversionPatternRewriter &rewriter) const override { auto collectiveAttr = rewriter.getAttr( @@ -591,14 +630,14 @@ struct ConvertAllToAllOp auto elementCount = rewriter.create( op.getLoc(), op.getType().getNumElements()); auto newTargetCast = - transferTensorOperand(op.getLoc(), op.getTarget(), adaptor.getTarget(), - executionAffinityAttr, rewriter); + transferTensorOperands(op.getLoc(), op.getTarget(), adaptor.getTarget(), + executionAffinityAttr, rewriter); auto newSourceCast = - transferTensorOperand(op.getLoc(), op.getSource(), adaptor.getSource(), - executionAffinityAttr, rewriter); + transferTensorOperands(op.getLoc(), op.getSource(), adaptor.getSource(), + executionAffinityAttr, rewriter); - rewriter.replaceOpWithNewOp( - op, collectiveAttr, + auto collectiveOp = rewriter.create( + op.getLoc(), collectiveAttr, /*target=*/newTargetCast.resource, /*target_size=*/newTargetCast.resourceSize, /*target_offset=*/zeroOffset, @@ -610,8 +649,10 @@ struct ConvertAllToAllOp /*source_end=*/newSourceCast.resourceSize, /*source_length=*/newSourceCast.resourceSize, /*element_count=*/elementCount, - /*channel=*/adaptor.getChannel(), + /*channel=*/adaptor.getChannel().front(), /*param=*/mlir::Value(), executionAffinityAttr); + rewriter.replaceOpWithMultiple( + op, {{collectiveOp, newTargetCast.resourceSize}}); return success(); } }; @@ -620,7 +661,7 @@ struct ConvertReduceScatterOp : public AffinityOpConversionPattern< IREE::Flow::CollectiveReduceScatterOp> { using AffinityOpConversionPattern::AffinityOpConversionPattern; LogicalResult matchAndRewriteOnAffinity( - IREE::Flow::CollectiveReduceScatterOp op, OpAdaptor adaptor, + IREE::Flow::CollectiveReduceScatterOp op, OneToNOpAdaptor adaptor, IREE::Stream::AffinityAttr executionAffinityAttr, ConversionPatternRewriter &rewriter) const override { auto collectiveAttr = rewriter.getAttr( @@ -632,14 +673,14 @@ struct ConvertReduceScatterOp : public AffinityOpConversionPattern< auto elementCount = rewriter.create( op.getLoc(), op.getType().getNumElements()); auto newTargetCast = - transferTensorOperand(op.getLoc(), op.getTarget(), adaptor.getTarget(), - executionAffinityAttr, rewriter); + transferTensorOperands(op.getLoc(), op.getTarget(), adaptor.getTarget(), + executionAffinityAttr, rewriter); auto newSourceCast = - transferTensorOperand(op.getLoc(), op.getSource(), adaptor.getSource(), - executionAffinityAttr, rewriter); + transferTensorOperands(op.getLoc(), op.getSource(), adaptor.getSource(), + executionAffinityAttr, rewriter); - rewriter.replaceOpWithNewOp( - op, collectiveAttr, + auto collectiveOp = rewriter.create( + op.getLoc(), collectiveAttr, /*target=*/newTargetCast.resource, /*target_size=*/newTargetCast.resourceSize, /*target_offset=*/zeroOffset, @@ -651,8 +692,10 @@ struct ConvertReduceScatterOp : public AffinityOpConversionPattern< /*source_end=*/newSourceCast.resourceSize, /*source_length=*/newSourceCast.resourceSize, /*element_count=*/elementCount, - /*channel=*/adaptor.getChannel(), + /*channel=*/adaptor.getChannel().front(), /*param=*/mlir::Value(), executionAffinityAttr); + rewriter.replaceOpWithMultiple( + op, {{collectiveOp, newTargetCast.resourceSize}}); return success(); } }; @@ -661,7 +704,7 @@ struct ConvertCollectiveSendRecvOp : public AffinityOpConversionPattern { using AffinityOpConversionPattern::AffinityOpConversionPattern; LogicalResult matchAndRewriteOnAffinity( - IREE::Flow::CollectiveSendRecvOp op, OpAdaptor adaptor, + IREE::Flow::CollectiveSendRecvOp op, OneToNOpAdaptor adaptor, IREE::Stream::AffinityAttr executionAffinityAttr, ConversionPatternRewriter &rewriter) const override { auto collectiveAttr = rewriter.getAttr( @@ -673,11 +716,11 @@ struct ConvertCollectiveSendRecvOp auto elementCount = rewriter.create( op.getLoc(), op.getType().getNumElements()); auto newTargetCast = - transferTensorOperand(op.getLoc(), op.getTarget(), adaptor.getTarget(), - executionAffinityAttr, rewriter); + transferTensorOperands(op.getLoc(), op.getTarget(), adaptor.getTarget(), + executionAffinityAttr, rewriter); auto newSourceCast = - transferTensorOperand(op.getLoc(), op.getSource(), adaptor.getSource(), - executionAffinityAttr, rewriter); + transferTensorOperands(op.getLoc(), op.getSource(), adaptor.getSource(), + executionAffinityAttr, rewriter); // Pack send, recv into param. The values are checked to be within the // 16-bit range during lowering to Flow dialect. @@ -693,8 +736,8 @@ struct ConvertCollectiveSendRecvOp rewriter.create(op.getLoc(), 16, 32)); auto param = rewriter.create(op.getLoc(), hi, lo); - rewriter.replaceOpWithNewOp( - op, collectiveAttr, + auto collectiveOp = rewriter.create( + op.getLoc(), collectiveAttr, /*target=*/newTargetCast.resource, /*target_size=*/newTargetCast.resourceSize, /*target_offset=*/zeroOffset, @@ -706,8 +749,10 @@ struct ConvertCollectiveSendRecvOp /*source_end=*/newSourceCast.resourceSize, /*source_length=*/newSourceCast.resourceSize, /*element_count=*/elementCount, - /*channel=*/adaptor.getChannel(), + /*channel=*/adaptor.getChannel().front(), /*param=*/param, executionAffinityAttr); + rewriter.replaceOpWithMultiple( + op, {{collectiveOp, newTargetCast.resourceSize}}); return success(); } }; @@ -716,7 +761,7 @@ struct ConvertDispatchOp : public AffinityOpConversionPattern { using AffinityOpConversionPattern::AffinityOpConversionPattern; LogicalResult matchAndRewriteOnAffinity( - IREE::Flow::DispatchOp op, OpAdaptor adaptor, + IREE::Flow::DispatchOp op, OneToNOpAdaptor adaptor, IREE::Stream::AffinityAttr executionAffinityAttr, ConversionPatternRewriter &rewriter) const override { // Zero is going to be used for each operand to start. @@ -729,12 +774,14 @@ struct ConvertDispatchOp SmallVector dispatchOperandEnds; SmallVector dispatchOperandLengths; SmallVector operandSizes; - for (auto [oldOperand, newOperand] : + + for (auto [oldOperand, convertedOperands] : llvm::zip_equal(op.getArguments(), adaptor.getArguments())) { + Value newOperand; if (llvm::isa(oldOperand.getType())) { auto newOperandCast = - transferTensorOperand(op.getLoc(), oldOperand, newOperand, - executionAffinityAttr, rewriter); + transferTensorOperands(op.getLoc(), oldOperand, convertedOperands, + executionAffinityAttr, rewriter); newOperand = newOperandCast.resource; dispatchOperandSizes.push_back(newOperandCast.resourceSize); operandSizes.push_back(newOperandCast.resourceSize); @@ -743,6 +790,7 @@ struct ConvertDispatchOp dispatchOperandLengths.push_back(newOperandCast.resourceSize); } else { operandSizes.push_back({}); + newOperand = convertedOperands.front(); } dispatchOperands.push_back(newOperand); } @@ -773,12 +821,19 @@ struct ConvertDispatchOp } } - auto newOp = rewriter.replaceOpWithNewOp( - op, resultTypes, adaptor.getWorkload(), adaptor.getEntryPointsAttr(), - dispatchOperands, dispatchOperandSizes, dispatchOperandOffsets, - dispatchOperandEnds, dispatchOperandLengths, resultSizes, - adaptor.getTiedOperandsAttr(), executionAffinityAttr); + auto newOp = rewriter.create( + op.getLoc(), resultTypes, flattenValues(adaptor.getWorkload()), + adaptor.getEntryPointsAttr(), dispatchOperands, dispatchOperandSizes, + dispatchOperandOffsets, dispatchOperandEnds, dispatchOperandLengths, + resultSizes, adaptor.getTiedOperandsAttr(), executionAffinityAttr); newOp->setDialectAttrs(op->getDialectAttrs()); + SmallVector> replacementsVec = llvm::map_to_vector( + llvm::zip_equal(newOp->getResults(), resultSizes), [](auto it) { + return SmallVector{std::get<0>(it), std::get<1>(it)}; + }); + SmallVector replacements = llvm::map_to_vector( + replacementsVec, [](ArrayRef v) -> ValueRange { return v; }); + rewriter.replaceOpWithMultiple(op, replacements); return success(); } }; @@ -821,7 +876,7 @@ struct ConvertFuncOp : public OpConversionPattern { struct ConvertCallOp : public AffinityOpConversionPattern { using AffinityOpConversionPattern::AffinityOpConversionPattern; LogicalResult matchAndRewriteOnAffinity( - IREE::Flow::CallOp op, OpAdaptor adaptor, + IREE::Flow::CallOp op, OneToNOpAdaptor adaptor, IREE::Stream::AffinityAttr executionAffinityAttr, ConversionPatternRewriter &rewriter) const override { // Zero is going to be used for each operand to start. @@ -834,12 +889,13 @@ struct ConvertCallOp : public AffinityOpConversionPattern { SmallVector callOperandEnds; SmallVector callOperandLengths; SmallVector operandSizes; - for (auto [oldOperand, newOperand] : + for (auto [oldOperand, convertedOperand] : llvm::zip_equal(op.getArguments(), adaptor.getArguments())) { + Value newOperand; if (llvm::isa(oldOperand.getType())) { auto newOperandCast = - transferTensorOperand(op.getLoc(), oldOperand, newOperand, - executionAffinityAttr, rewriter); + transferTensorOperands(op.getLoc(), oldOperand, convertedOperand, + executionAffinityAttr, rewriter); newOperand = newOperandCast.resource; callOperandSizes.push_back(newOperandCast.resourceSize); operandSizes.push_back(newOperandCast.resourceSize); @@ -847,6 +903,7 @@ struct ConvertCallOp : public AffinityOpConversionPattern { callOperandEnds.push_back(newOperandCast.resourceSize); callOperandLengths.push_back(newOperandCast.resourceSize); } else { + newOperand = convertedOperand.front(); operandSizes.push_back({}); } callOperands.push_back(newOperand); @@ -861,6 +918,7 @@ struct ConvertCallOp : public AffinityOpConversionPattern { auto oldResultType = result.value().getType(); if (!llvm::isa(oldResultType)) { resultTypes.push_back(getTypeConverter()->convertType(oldResultType)); + resultSizes.push_back(nullptr); continue; } auto tiedOperand = op.getTiedResultOperandIndex(result.index()); @@ -878,12 +936,13 @@ struct ConvertCallOp : public AffinityOpConversionPattern { } } - auto newOp = rewriter.replaceOpWithNewOp( - op, resultTypes, adaptor.getCalleeAttr(), callOperands, + auto newOp = rewriter.create( + op.getLoc(), resultTypes, adaptor.getCalleeAttr(), callOperands, callOperandSizes, callOperandOffsets, callOperandEnds, callOperandLengths, resultSizes, adaptor.getTiedOperandsAttr(), executionAffinityAttr); newOp->setDialectAttrs(op->getDialectAttrs()); + replaceOpWithMultiple(op, newOp->getResults(), resultSizes, rewriter); return success(); } }; diff --git a/compiler/src/iree/compiler/Dialect/Stream/Conversion/HALToStream/Patterns.cpp b/compiler/src/iree/compiler/Dialect/Stream/Conversion/HALToStream/Patterns.cpp index 76eef8b8e56f..e597aaffba8f 100644 --- a/compiler/src/iree/compiler/Dialect/Stream/Conversion/HALToStream/Patterns.cpp +++ b/compiler/src/iree/compiler/Dialect/Stream/Conversion/HALToStream/Patterns.cpp @@ -16,6 +16,14 @@ namespace mlir::iree_compiler { namespace { +/// Flatten the given value ranges into a single vector of values. +static SmallVector flattenValues(ArrayRef values) { + SmallVector result; + for (const auto &vals : values) + llvm::append_range(result, vals); + return result; +} + // %1 = hal.tensor.import %0 : !hal.buffer_view -> tensor<4xf32> // -> // %1 = stream.tensor.import %0 : !hal.buffer_view -> @@ -24,7 +32,7 @@ struct ConvertTensorImportOp : public AffinityOpConversionPattern { using AffinityOpConversionPattern::AffinityOpConversionPattern; LogicalResult matchAndRewriteOnAffinity( - IREE::HAL::TensorImportOp op, OpAdaptor adaptor, + IREE::HAL::TensorImportOp op, OneToNOpAdaptor adaptor, IREE::Stream::AffinityAttr executionAffinityAttr, ConversionPatternRewriter &rewriter) const override { auto sourceType = op.getSource().getType(); @@ -42,9 +50,9 @@ struct ConvertTensorImportOp // mistake and it's better to know of a shape mismatch than just buffer // byte length difference. if (auto tensorType = llvm::dyn_cast(targetType)) { - if (failed(buildEncodingAssertions(op.getLoc(), adaptor.getSource(), - op.getNameAttr(), tensorType, - op.getTargetDims(), rewriter))) { + if (failed(buildEncodingAssertions( + op.getLoc(), adaptor.getSource().front(), op.getNameAttr(), + tensorType, op.getTargetDims(), rewriter))) { return rewriter.notifyMatchFailure(op, "unsupported tensor type"); } } @@ -55,11 +63,12 @@ struct ConvertTensorImportOp IREE::Stream::Lifetime::External); Value resultSize = rewriter.create( op.getLoc(), rewriter.getIndexType(), - TypeAttr::get(op.getTarget().getType()), adaptor.getTargetDims(), - executionAffinityAttr); + TypeAttr::get(op.getTarget().getType()), + flattenValues(adaptor.getTargetDims()), executionAffinityAttr); Value resource = rewriter.create( - op.getLoc(), resultType, adaptor.getSource(), TypeAttr::get(targetType), - adaptor.getTargetDims(), resultSize, executionAffinityAttr); + op.getLoc(), resultType, adaptor.getSource().front(), + TypeAttr::get(targetType), flattenValues(adaptor.getTargetDims()), + resultSize, executionAffinityAttr); // Await the fence, if needed. When not specified the resource is assumed to // be immediately available. @@ -75,10 +84,11 @@ struct ConvertTensorImportOp } auto unknownType = rewriter.getType(); - rewriter.replaceOpWithNewOp( - op, unknownType, resource, resultSize, resultSize, + Value newImport = rewriter.create( + op.getLoc(), unknownType, resource, resultSize, resultSize, /*source_affinity=*/executionAffinityAttr, /*target_affinity=*/executionAffinityAttr); + rewriter.replaceOpWithMultiple(op, {{newImport, resultSize}}); return success(); } @@ -125,7 +135,7 @@ struct ConvertTensorExportOp : public AffinityOpConversionPattern { using AffinityOpConversionPattern::AffinityOpConversionPattern; LogicalResult matchAndRewriteOnAffinity( - IREE::HAL::TensorExportOp op, OpAdaptor adaptor, + IREE::HAL::TensorExportOp op, OneToNOpAdaptor adaptor, IREE::Stream::AffinityAttr executionAffinityAttr, ConversionPatternRewriter &rewriter) const override { auto sourceType = op.getSourceEncoding(); @@ -136,12 +146,12 @@ struct ConvertTensorExportOp } auto source = - transferTensorOperand(op.getLoc(), op.getSource(), adaptor.getSource(), - executionAffinityAttr, rewriter); + transferTensorOperands(op.getLoc(), op.getSource(), adaptor.getSource(), + executionAffinityAttr, rewriter); // Exporting a produced value - transfer our source value to an externally // usable resource and directly export it. This will cause an allocation. - auto exportSource = adaptor.getSource(); + Value exportSource = adaptor.getSource().front(); auto externalType = rewriter.getType( IREE::Stream::Lifetime::External); if (source.resource.getType() != externalType) { @@ -154,7 +164,8 @@ struct ConvertTensorExportOp // Export (stream resource to buffer view). rewriter.replaceOpWithNewOp( op, targetType, exportSource, TypeAttr::get(sourceType), - adaptor.getSourceDims(), source.resourceSize, executionAffinityAttr); + flattenValues(adaptor.getSourceDims()), source.resourceSize, + executionAffinityAttr); return success(); } }; @@ -174,19 +185,21 @@ struct ConvertTensorAliasOp : public AffinityOpConversionPattern { using AffinityOpConversionPattern::AffinityOpConversionPattern; LogicalResult matchAndRewriteOnAffinity( - IREE::HAL::TensorAliasOp op, OpAdaptor adaptor, + IREE::HAL::TensorAliasOp op, OneToNOpAdaptor adaptor, IREE::Stream::AffinityAttr executionAffinityAttr, ConversionPatternRewriter &rewriter) const override { auto sourceType = op.getSource().getType(); auto source = - transferTensorOperand(op.getLoc(), op.getSource(), adaptor.getSource(), - executionAffinityAttr, rewriter); + transferTensorOperands(op.getLoc(), op.getSource(), adaptor.getSource(), + executionAffinityAttr, rewriter); // Query the target storage buffer length; we will only populate up to // what is required for the output. + SmallVector convertedSourceDims = + flattenValues(adaptor.getSourceDims()); Value storageSize = rewriter.create( op.getLoc(), rewriter.getIndexType(), - TypeAttr::get(op.getSource().getType()), adaptor.getSourceDims(), + TypeAttr::get(op.getSource().getType()), convertedSourceDims, executionAffinityAttr); // Import the target storage as a resource that we can use as an update @@ -195,8 +208,8 @@ struct ConvertTensorAliasOp auto externalType = rewriter.getType( IREE::Stream::Lifetime::External); auto importOp = rewriter.create( - op.getLoc(), externalType, adaptor.getStorage(), - TypeAttr::get(sourceType), adaptor.getSourceDims(), storageSize, + op.getLoc(), externalType, adaptor.getStorage().front(), + TypeAttr::get(sourceType), convertedSourceDims, storageSize, executionAffinityAttr); // Await the fence, if needed. When not specified the storage is assumed to @@ -235,7 +248,7 @@ struct ConvertTensorAliasOp op.getLoc(), source.resource.getType(), result, source.resourceSize, source.resourceSize, executionAffinityAttr, executionAffinityAttr); } - rewriter.replaceOp(op, result); + rewriter.replaceOpWithMultiple(op, {{result, source.resourceSize}}); return success(); } @@ -254,20 +267,22 @@ struct ConvertTensorBarrierOp : public AffinityAwareConversionPattern { using AffinityAwareConversionPattern::AffinityAwareConversionPattern; LogicalResult - matchAndRewrite(IREE::HAL::TensorBarrierOp op, OpAdaptor adaptor, + matchAndRewrite(IREE::HAL::TensorBarrierOp op, OneToNOpAdaptor adaptor, ConversionPatternRewriter &rewriter) const override { auto timepointType = rewriter.getType(); IREE::Stream::AffinityAttr anyAffinityAttr; SmallVector signaledResources; + SmallVector signaledResourceSizes; SmallVector signaledTimepoints; for (auto [sourceTensor, sourceResource] : llvm::zip_equal(op.getSources(), adaptor.getSources())) { - auto source = resolveTensorOperand(op.getLoc(), sourceTensor, - sourceResource, rewriter); + auto source = resolveTensorOperands(op.getLoc(), sourceTensor, + sourceResource, rewriter); auto barrierOp = rewriter.create( - sourceResource.getLoc(), source.resource.getType(), timepointType, - source.resource, source.resourceSize, source.affinity); + sourceResource.front().getLoc(), source.resource.getType(), + timepointType, source.resource, source.resourceSize, source.affinity); signaledResources.push_back(barrierOp.getResult()); + signaledResourceSizes.push_back(source.resourceSize); signaledTimepoints.push_back(barrierOp.getResultTimepoint()); // When joining from multiple affinities we need to pick one to perform @@ -283,7 +298,8 @@ struct ConvertTensorBarrierOp rewriter.create( op.getLoc(), joinedTimepoint, ValueRange{adaptor.getSignalFence()}, anyAffinityAttr); - rewriter.replaceOp(op, signaledResources); + replaceOpWithMultiple(op, signaledResources, signaledResourceSizes, + rewriter); return success(); } }; diff --git a/compiler/src/iree/compiler/Dialect/Stream/Conversion/PatternUtils.cpp b/compiler/src/iree/compiler/Dialect/Stream/Conversion/PatternUtils.cpp index fee06f2df4cb..45122452d64b 100644 --- a/compiler/src/iree/compiler/Dialect/Stream/Conversion/PatternUtils.cpp +++ b/compiler/src/iree/compiler/Dialect/Stream/Conversion/PatternUtils.cpp @@ -44,73 +44,25 @@ tryLookupResultAffinity(Value value, return affinityAnalysis->lookupResourceAffinity(value); } -static std::pair -resolveTensorOperand(Location loc, Value convertedOperand, OpBuilder &builder) { - auto operandType = convertedOperand.getType(); - if (llvm::isa(operandType)) { - // Prior to https://reviews.llvm.org/D111620 this is the path we'd take; - // the tensor operands would be remapped into their new resource types. - // This is still possible during rewriting if we ourselves produce a new - // resource type, but the automatic materialization will go down the - // unrealized_conversion_cast path below. - return std::make_pair(convertedOperand, - builder.createOrFold( - loc, builder.getIndexType(), convertedOperand)); - } else if (auto castOp = - convertedOperand - .getDefiningOp()) { - // We only have a single tensor type conversion and it expands to (resource, - // size) so that's all we look for here. - assert(castOp.getNumOperands() == 2 && "expected (resource, size)"); - return std::make_pair(castOp.getOperand(0), castOp.getOperand(1)); - } - assert(false && - "unexpected operand; expected either a IREE::Stream::ResourceType or " - "the result of a mlir::UnrealizedConversionCastOp"); - return std::make_pair(Value{}, Value{}); -} - -void expandResourceOperand(Location loc, Value operand, - SmallVectorImpl &newOperands, - OpBuilder &builder) { - if (llvm::isa(operand.getType())) { - auto [resource, resourceSize] = resolveTensorOperand(loc, operand, builder); - newOperands.push_back(resource); - newOperands.push_back(resourceSize); - } else if (llvm::isa(operand.getType())) { - newOperands.push_back(operand); - newOperands.push_back( - builder.createOrFold(loc, operand)); - } else { - newOperands.push_back(operand); - } -} - -SmallVector expandResourceOperands(Location loc, ValueRange operands, - ConversionPatternRewriter &rewriter) { - SmallVector expandedOperands; - expandedOperands.reserve(operands.size()); - for (auto operand : operands) { - expandResourceOperand(loc, operand, expandedOperands, rewriter); - } - return expandedOperands; -} - -ConvertedTensor resolveTensorOperand( - Location loc, Value originalOperand, Value convertedOperand, +ConvertedTensor resolveTensorOperands( + Location loc, Value originalOperand, ValueRange convertedOperand, IREE::Stream::AffinityAnalysis *affinityAnalysis, OpBuilder &builder) { - auto [resource, resourceSize] = - resolveTensorOperand(loc, convertedOperand, builder); + assert(convertedOperand.size() == 2 && + "expected tensor operands to be converted to `!stream.resource<*>, " + "index`"); auto affinityAttr = affinityAnalysis->lookupResourceAffinity(originalOperand); - return {affinityAttr, resource, resourceSize}; + return {affinityAttr, convertedOperand[0], convertedOperand[1]}; } -ConvertedTensor transferTensorOperand( - Location loc, Value originalOperand, Value convertedOperand, +ConvertedTensor transferTensorOperands( + Location loc, Value originalOperand, ValueRange convertedOperand, IREE::Stream::AffinityAttr requiredAffinityAttr, IREE::Stream::AffinityAnalysis *affinityAnalysis, OpBuilder &builder) { - auto [resource, resourceSize] = - resolveTensorOperand(loc, convertedOperand, builder); + assert(convertedOperand.size() == 2 && + "expected tensor operands to be converted to `!stream.resource<*>, " + "index`"); + Value resource = convertedOperand[0]; + Value resourceSize = convertedOperand[1]; auto affinityAttr = affinityAnalysis->lookupResourceAffinity(originalOperand); if (affinityAttr != requiredAffinityAttr) { resource = builder.create( @@ -120,4 +72,25 @@ ConvertedTensor transferTensorOperand( return {requiredAffinityAttr, resource, resourceSize}; } +void replaceOpWithMultiple(Operation *op, + ArrayRef> replacements, + ConversionPatternRewriter &rewriter) { + auto r = llvm::map_to_vector( + replacements, [](ArrayRef v) -> ValueRange { return v; }); + rewriter.replaceOpWithMultiple(op, r); +} + +void replaceOpWithMultiple(Operation *op, ValueRange resources, + ValueRange sizes, + ConversionPatternRewriter &rewriter) { + SmallVector> replacements = llvm::map_to_vector( + llvm::zip_equal(resources, sizes), [](auto it) -> SmallVector { + if (std::get<1>(it)) { + return {std::get<0>(it), std::get<1>(it)}; + } + return {std::get<0>(it)}; + }); + replaceOpWithMultiple(op, replacements, rewriter); +} + } // namespace mlir::iree_compiler diff --git a/compiler/src/iree/compiler/Dialect/Stream/Conversion/PatternUtils.h b/compiler/src/iree/compiler/Dialect/Stream/Conversion/PatternUtils.h index 43cfbb073494..774b7f65b9d2 100644 --- a/compiler/src/iree/compiler/Dialect/Stream/Conversion/PatternUtils.h +++ b/compiler/src/iree/compiler/Dialect/Stream/Conversion/PatternUtils.h @@ -42,18 +42,11 @@ struct ConvertedTensor { Value resourceSize; }; -void expandResourceOperand(Location loc, Value convertedOperand, - SmallVectorImpl &newOperands, - OpBuilder &builder); -SmallVector expandResourceOperands(Location loc, - ValueRange convertedOperands, - ConversionPatternRewriter &rewriter); - -ConvertedTensor resolveTensorOperand( - Location loc, Value originalOperand, Value convertedOperand, +ConvertedTensor resolveTensorOperands( + Location loc, Value originalOperand, ValueRange convertedOperand, IREE::Stream::AffinityAnalysis *affinityAnalysis, OpBuilder &builder); -ConvertedTensor transferTensorOperand( - Location loc, Value originalOperand, Value convertedOperand, +ConvertedTensor transferTensorOperands( + Location loc, Value originalOperand, ValueRange convertedOperand, IREE::Stream::AffinityAttr requiredAffinityAttr, IREE::Stream::AffinityAnalysis *affinityAnalysis, OpBuilder &builder); @@ -72,19 +65,19 @@ struct AffinityAwareConversionPattern : public OpConversionPattern { } protected: - ConvertedTensor resolveTensorOperand(Location loc, Value originalOperand, - Value convertedOperand, - OpBuilder &builder) const { - return mlir::iree_compiler::resolveTensorOperand( + ConvertedTensor resolveTensorOperands(Location loc, Value originalOperand, + ValueRange convertedOperand, + OpBuilder &builder) const { + return mlir::iree_compiler::resolveTensorOperands( loc, originalOperand, convertedOperand, affinityAnalysis, builder); } ConvertedTensor - transferTensorOperand(Location loc, Value originalOperand, - Value convertedOperand, - IREE::Stream::AffinityAttr requiredAffinityAttr, - OpBuilder &builder) const { - return mlir::iree_compiler::transferTensorOperand( + transferTensorOperands(Location loc, Value originalOperand, + ValueRange convertedOperand, + IREE::Stream::AffinityAttr requiredAffinityAttr, + OpBuilder &builder) const { + return mlir::iree_compiler::transferTensorOperands( loc, originalOperand, convertedOperand, requiredAffinityAttr, affinityAnalysis, builder); } @@ -110,13 +103,14 @@ struct AffinityOpConversionPattern protected: virtual LogicalResult matchAndRewriteOnAffinity( - OpT op, typename OpConversionPattern::OpAdaptor adaptor, + OpT op, typename OpConversionPattern::OneToNOpAdaptor adaptor, IREE::Stream::AffinityAttr executionAffinityAttr, ConversionPatternRewriter &rewriter) const = 0; private: LogicalResult - matchAndRewrite(OpT op, typename OpConversionPattern::OpAdaptor adaptor, + matchAndRewrite(OpT op, + typename OpConversionPattern::OneToNOpAdaptor adaptor, ConversionPatternRewriter &rewriter) const override final { auto executionAffinityAttr = tryLookupExecutionAffinity(op, this->getAffinityAnalysis()); @@ -125,6 +119,13 @@ struct AffinityOpConversionPattern } }; +void replaceOpWithMultiple(Operation *op, + ArrayRef> replacements, + ConversionPatternRewriter &rewriter); +void replaceOpWithMultiple(Operation *op, ValueRange resources, + ValueRange sizes, + ConversionPatternRewriter &rewriter); + } // namespace mlir::iree_compiler #endif // IREE_COMPILER_DIALECT_STREAM_CONVERSION_PATTERN_UTILS_H_ diff --git a/compiler/src/iree/compiler/Dialect/Stream/Conversion/StandardToStream/Patterns.cpp b/compiler/src/iree/compiler/Dialect/Stream/Conversion/StandardToStream/Patterns.cpp index 9924fd2edf1c..ce51aad16c06 100644 --- a/compiler/src/iree/compiler/Dialect/Stream/Conversion/StandardToStream/Patterns.cpp +++ b/compiler/src/iree/compiler/Dialect/Stream/Conversion/StandardToStream/Patterns.cpp @@ -29,11 +29,19 @@ namespace mlir::iree_compiler { namespace { +/// Flatten the given value ranges into a single vector of values. +static SmallVector flattenValues(ArrayRef values) { + SmallVector result; + for (const auto &vals : values) + llvm::append_range(result, vals); + return result; +} + struct ConvertTensorConstantOp : public AffinityOpConversionPattern { using AffinityOpConversionPattern::AffinityOpConversionPattern; LogicalResult matchAndRewriteOnAffinity( - arith::ConstantOp constantOp, OpAdaptor adaptor, + arith::ConstantOp constantOp, OneToNOpAdaptor adaptor, IREE::Stream::AffinityAttr executionAffinityAttr, ConversionPatternRewriter &rewriter) const override { // Only handle tensor types - other arith.constant types (like i32) are @@ -53,10 +61,13 @@ struct ConvertTensorConstantOp auto unknownType = rewriter.getType(); auto constantSize = rewriter.createOrFold( constantOp.getLoc(), rewriter.getIndexType(), newOp.getResult()); - rewriter.replaceOpWithNewOp( - constantOp, unknownType, newOp.getResult(), constantSize, constantSize, + auto transferOp = rewriter.create( + constantOp.getLoc(), unknownType, newOp.getResult(), constantSize, + constantSize, /*source_affinity=*/executionAffinityAttr, /*result_affinity=*/executionAffinityAttr); + rewriter.replaceOpWithMultiple(constantOp, + {{transferOp.getResult(), constantSize}}); return success(); } }; @@ -65,13 +76,11 @@ struct BranchOpConversion : public AffinityAwareConversionPattern { using AffinityAwareConversionPattern::AffinityAwareConversionPattern; LogicalResult - matchAndRewrite(mlir::cf::BranchOp op, OpAdaptor adaptor, + matchAndRewrite(mlir::cf::BranchOp op, OneToNOpAdaptor adaptor, ConversionPatternRewriter &rewriter) const override { // Expand any resource operands to resource + size. - auto expandedOperands = expandResourceOperands( - op.getLoc(), adaptor.getDestOperands(), rewriter); - rewriter.replaceOpWithNewOp(op, op.getDest(), - expandedOperands); + rewriter.replaceOpWithNewOp( + op, op.getDest(), flattenValues(adaptor.getOperands())); return success(); } }; @@ -80,15 +89,13 @@ struct CondBranchOpConversion : public AffinityAwareConversionPattern { using AffinityAwareConversionPattern::AffinityAwareConversionPattern; LogicalResult - matchAndRewrite(mlir::cf::CondBranchOp op, OpAdaptor adaptor, + matchAndRewrite(mlir::cf::CondBranchOp op, OneToNOpAdaptor adaptor, ConversionPatternRewriter &rewriter) const override { // Expand any resource operands to resource + size. - auto trueDestOperands = expandResourceOperands( - op.getLoc(), adaptor.getTrueDestOperands(), rewriter); - auto falseDestOperands = expandResourceOperands( - op.getLoc(), adaptor.getFalseDestOperands(), rewriter); + auto trueDestOperands = flattenValues(adaptor.getTrueDestOperands()); + auto falseDestOperands = flattenValues(adaptor.getFalseDestOperands()); rewriter.replaceOpWithNewOp( - op, adaptor.getCondition(), op.getTrueDest(), trueDestOperands, + op, adaptor.getCondition().front(), op.getTrueDest(), trueDestOperands, op.getFalseDest(), falseDestOperands); return success(); } @@ -100,18 +107,17 @@ struct SwitchOpConversion : public AffinityAwareConversionPattern { using AffinityAwareConversionPattern::AffinityAwareConversionPattern; LogicalResult - matchAndRewrite(mlir::cf::SwitchOp op, OpAdaptor adaptor, + matchAndRewrite(mlir::cf::SwitchOp op, OneToNOpAdaptor adaptor, ConversionPatternRewriter &rewriter) const override { // Expand any resource operands to resource + size. - auto defaultOperands = expandResourceOperands( - op.getLoc(), adaptor.getDefaultOperands(), rewriter); - auto caseOperands = llvm::to_vector( - llvm::map_range(adaptor.getCaseOperands(), [&](ValueRange operands) { - return expandResourceOperands(op.getLoc(), operands, rewriter); + auto defaultOperands = flattenValues(adaptor.getDefaultOperands()); + auto caseOperands = llvm::to_vector(llvm::map_range( + adaptor.getCaseOperands(), [&](ArrayRef operands) { + return flattenValues(operands); })); rewriter.replaceOpWithNewOp( - op, adaptor.getFlag(), op.getDefaultDestination(), defaultOperands, - op.getCaseValuesAttr(), op.getCaseDestinations(), + op, adaptor.getFlag().front(), op.getDefaultDestination(), + defaultOperands, op.getCaseValuesAttr(), op.getCaseDestinations(), llvm::to_vector(llvm::map_range(caseOperands, asValueRange))); return success(); } @@ -121,24 +127,23 @@ struct SelectOpConversion : public AffinityAwareConversionPattern { using AffinityAwareConversionPattern::AffinityAwareConversionPattern; LogicalResult - matchAndRewrite(mlir::arith::SelectOp op, OpAdaptor adaptor, + matchAndRewrite(mlir::arith::SelectOp op, OneToNOpAdaptor adaptor, ConversionPatternRewriter &rewriter) const override { // Only handle selects where the operands are tensors (resources). if (!llvm::isa(op.getTrueValue().getType())) return failure(); - auto trueOperand = resolveTensorOperand(op.getLoc(), op.getTrueValue(), - adaptor.getTrueValue(), rewriter); - auto falseOperand = resolveTensorOperand(op.getLoc(), op.getFalseValue(), - adaptor.getFalseValue(), rewriter); + auto trueOperand = resolveTensorOperands(op.getLoc(), op.getTrueValue(), + adaptor.getTrueValue(), rewriter); + auto falseOperand = resolveTensorOperands( + op.getLoc(), op.getFalseValue(), adaptor.getFalseValue(), rewriter); auto resourceSelectOp = rewriter.create( - op.getLoc(), adaptor.getCondition(), trueOperand.resource, + op.getLoc(), adaptor.getCondition().front(), trueOperand.resource, falseOperand.resource); auto sizeSelectOp = rewriter.create( - op.getLoc(), adaptor.getCondition(), trueOperand.resourceSize, + op.getLoc(), adaptor.getCondition().front(), trueOperand.resourceSize, falseOperand.resourceSize); - rewriter.replaceOpWithNewOp( - op, adaptor.getTrueValue().getType(), - ValueRange{resourceSelectOp.getResult(), sizeSelectOp.getResult()}); + rewriter.replaceOpWithMultiple(op, {ValueRange{resourceSelectOp.getResult(), + sizeSelectOp.getResult()}}); return success(); } }; @@ -186,21 +191,19 @@ struct ScfIfOpConversion // Tie all resource results together so we end up with 1:1 results with the // original op. SmallVector results; + SmallVector resultSizes; for (auto result : resultMap) { if (llvm::isa(result.newType)) { - auto oldType = op.getResult(result.originalIndex).getType(); auto resource = ifOp.getResult(result.newIndex + 0); auto resourceSize = ifOp.getResult(result.newIndex + 1); - results.push_back(rewriter - .create( - op.getLoc(), TypeRange{oldType}, - ValueRange{resource, resourceSize}) - .getResult(0)); + results.push_back(resource); + resultSizes.push_back(resourceSize); } else { results.push_back(ifOp.getResult(result.newIndex)); + resultSizes.push_back(nullptr); } } - rewriter.replaceOp(op, results); + replaceOpWithMultiple(op, results, resultSizes, rewriter); return success(); } }; @@ -209,13 +212,12 @@ struct ScfForOpConversion : public AffinityAwareConversionPattern { using AffinityAwareConversionPattern::AffinityAwareConversionPattern; LogicalResult - matchAndRewrite(mlir::scf::ForOp op, OpAdaptor adaptor, + matchAndRewrite(mlir::scf::ForOp op, OneToNOpAdaptor adaptor, ConversionPatternRewriter &rewriter) const override { auto &typeConverter = *getTypeConverter(); // Expand any resource operands to resource + size. - auto expandedOperands = - expandResourceOperands(op.getLoc(), adaptor.getInitArgs(), rewriter); + auto expandedOperands = flattenValues(adaptor.getInitArgs()); // Expand any resource results to resource + size. SmallVector expandedTypes; @@ -250,8 +252,9 @@ struct ScfForOpConversion // expanded output results. We can't directly replace the original loop as // the result counts differ. auto forOp = rewriter.create( - op.getLoc(), adaptor.getLowerBound(), adaptor.getUpperBound(), - adaptor.getStep(), expandedOperands); + op.getLoc(), adaptor.getLowerBound().front(), + adaptor.getUpperBound().front(), adaptor.getStep().front(), + expandedOperands); // Inline the block and update the block arguments. rewriter.eraseBlock(forOp.getBody()); @@ -265,21 +268,19 @@ struct ScfForOpConversion // Tie all resource results together so we end up with 1:1 results with the // original op. SmallVector results; + SmallVector resultSizes; for (auto result : resultMap) { if (llvm::isa(result.newType)) { - auto oldType = op.getResult(result.originalIndex).getType(); auto resource = forOp.getResult(result.newIndex + 0); auto resourceSize = forOp.getResult(result.newIndex + 1); - results.push_back(rewriter - .create( - op.getLoc(), TypeRange{oldType}, - ValueRange{resource, resourceSize}) - .getResult(0)); + results.push_back(resource); + resultSizes.push_back(resourceSize); } else { results.push_back(forOp.getResult(result.newIndex)); + resultSizes.push_back(nullptr); } } - rewriter.replaceOp(op, results); + replaceOpWithMultiple(op, results, resultSizes, rewriter); return success(); } }; @@ -288,13 +289,12 @@ struct ScfWhileOpConversion : public AffinityAwareConversionPattern { using AffinityAwareConversionPattern::AffinityAwareConversionPattern; LogicalResult - matchAndRewrite(mlir::scf::WhileOp op, OpAdaptor adaptor, + matchAndRewrite(mlir::scf::WhileOp op, OneToNOpAdaptor adaptor, ConversionPatternRewriter &rewriter) const override { auto &typeConverter = *getTypeConverter(); // Expand any resource operands to resource + size. - auto expandedOperands = - expandResourceOperands(op.getLoc(), adaptor.getOperands(), rewriter); + auto expandedOperands = flattenValues(adaptor.getOperands()); // Expand any resource results to resource + size. SmallVector expandedTypes; @@ -351,21 +351,19 @@ struct ScfWhileOpConversion // Tie all resource results together so we end up with 1:1 results with the // original op. SmallVector results; + SmallVector resultSizes; for (auto result : resultMap) { if (llvm::isa(result.newType)) { - auto oldType = op.getResult(result.originalIndex).getType(); auto resource = whileOp.getResult(result.newIndex + 0); auto resourceSize = whileOp.getResult(result.newIndex + 1); - results.push_back(rewriter - .create( - op.getLoc(), TypeRange{oldType}, - ValueRange{resource, resourceSize}) - .getResult(0)); + results.push_back(resource); + resultSizes.push_back(resourceSize); } else { results.push_back(whileOp.getResult(result.newIndex)); + resultSizes.push_back(nullptr); } } - rewriter.replaceOp(op, results); + replaceOpWithMultiple(op, results, resultSizes, rewriter); return success(); } }; @@ -374,13 +372,12 @@ struct ScfConditionOpConversion : public AffinityAwareConversionPattern { using AffinityAwareConversionPattern::AffinityAwareConversionPattern; LogicalResult - matchAndRewrite(mlir::scf::ConditionOp op, OpAdaptor adaptor, + matchAndRewrite(mlir::scf::ConditionOp op, OneToNOpAdaptor adaptor, ConversionPatternRewriter &rewriter) const override { // Expand any resource operands to resource + size. - auto expandedOperands = - expandResourceOperands(op.getLoc(), adaptor.getArgs(), rewriter); + auto expandedOperands = flattenValues(adaptor.getArgs()); rewriter.replaceOpWithNewOp( - op, adaptor.getCondition(), expandedOperands); + op, adaptor.getCondition().front(), expandedOperands); return success(); } }; @@ -389,11 +386,10 @@ struct ScfYieldOpConversion : public AffinityAwareConversionPattern { using AffinityAwareConversionPattern::AffinityAwareConversionPattern; LogicalResult - matchAndRewrite(mlir::scf::YieldOp op, OpAdaptor adaptor, + matchAndRewrite(mlir::scf::YieldOp op, OneToNOpAdaptor adaptor, ConversionPatternRewriter &rewriter) const override { // Expand any resource operands to resource + size. - auto expandedOperands = - expandResourceOperands(op.getLoc(), adaptor.getOperands(), rewriter); + auto expandedOperands = flattenValues(adaptor.getOperands()); rewriter.replaceOpWithNewOp(op, expandedOperands); return success(); } diff --git a/compiler/src/iree/compiler/Dialect/Stream/Conversion/UtilToStream/Patterns.cpp b/compiler/src/iree/compiler/Dialect/Stream/Conversion/UtilToStream/Patterns.cpp index 35e1ca8760a8..b7c24d4b1820 100644 --- a/compiler/src/iree/compiler/Dialect/Stream/Conversion/UtilToStream/Patterns.cpp +++ b/compiler/src/iree/compiler/Dialect/Stream/Conversion/UtilToStream/Patterns.cpp @@ -19,6 +19,14 @@ namespace mlir::iree_compiler { namespace { +/// Flatten the given value ranges into a single vector of values. +static SmallVector flattenValues(ArrayRef values) { + SmallVector result; + for (const auto &vals : values) + llvm::append_range(result, vals); + return result; +} + //===----------------------------------------------------------------------===// // Structural ops //===----------------------------------------------------------------------===// @@ -71,7 +79,7 @@ struct CallOpConversion : public AffinityAwareConversionPattern { using AffinityAwareConversionPattern::AffinityAwareConversionPattern; LogicalResult - matchAndRewrite(IREE::Util::CallOp op, OpAdaptor adaptor, + matchAndRewrite(IREE::Util::CallOp op, OneToNOpAdaptor adaptor, ConversionPatternRewriter &rewriter) const override { // Create a new call that takes the expanded input operands and returns the // expanded output results. We can't directly replace the original call as @@ -85,9 +93,9 @@ struct CallOpConversion bool anyFailed = false; auto callOp = op.cloneAndExpand( [&](unsigned i, Value operand, SmallVectorImpl &newOperands) { - auto adaptorOperand = adaptor.getOperands()[i]; - expandResourceOperand(op.getLoc(), adaptorOperand, newOperands, - rewriter); + SmallVector appendNewOperands = + flattenValues(adaptor.getOperands()[i]); + newOperands.append(appendNewOperands); }, [&](unsigned i, Type type, SmallVectorImpl &newTypes) { size_t newIndex = newTypes.size(); @@ -103,21 +111,19 @@ struct CallOpConversion // Tie all resource results together so we end up with 1:1 results with the // original op. SmallVector results; + SmallVector resourceSizes; for (auto result : resultMap) { if (llvm::isa(result.newType)) { - auto oldType = op.getResult(result.originalIndex).getType(); auto resource = callOp.getResult(result.newIndex + 0); auto resourceSize = callOp.getResult(result.newIndex + 1); - results.push_back(rewriter - .create( - op.getLoc(), TypeRange{oldType}, - ValueRange{resource, resourceSize}) - .getResult(0)); + results.push_back(resource); + resourceSizes.push_back(resourceSize); } else { results.push_back(callOp.getResult(result.newIndex)); + resourceSizes.push_back(nullptr); } } - rewriter.replaceOp(op, results); + replaceOpWithMultiple(op, results, resourceSizes, rewriter); return success(); } @@ -127,11 +133,10 @@ struct ReturnOpConversion : public AffinityAwareConversionPattern { using AffinityAwareConversionPattern::AffinityAwareConversionPattern; LogicalResult - matchAndRewrite(IREE::Util::ReturnOp op, OpAdaptor adaptor, + matchAndRewrite(IREE::Util::ReturnOp op, OneToNOpAdaptor adaptor, ConversionPatternRewriter &rewriter) const override { // Expand any resource operands to resource + size. - auto expandedOperands = - expandResourceOperands(op.getLoc(), adaptor.getOperands(), rewriter); + auto expandedOperands = flattenValues(adaptor.getOperands()); rewriter.replaceOpWithNewOp(op, expandedOperands); return success(); } @@ -312,11 +317,12 @@ struct GlobalLoadOpExpansion loadOp.getLoc(), rewriter.getIndexType(), expandedGlobal.resourceSizeOp.getSymName()) .getResult(); - rewriter.replaceOpWithNewOp( - loadOp, unknownType, resource, resourceSize, resourceSize, + auto transferOp = rewriter.create( + loadOp.getLoc(), unknownType, resource, resourceSize, resourceSize, /*source_affinity=*/expandedGlobal.affinityAttr, /*result_affinity=*/expandedGlobal.affinityAttr); - + rewriter.replaceOpWithMultiple(loadOp, + {{transferOp.getResult(), resourceSize}}); return success(); } }; @@ -325,7 +331,7 @@ struct GlobalStoreOpExpansion : public BaseGlobalConversionPattern { using BaseGlobalConversionPattern::BaseGlobalConversionPattern; LogicalResult - matchAndRewrite(IREE::Util::GlobalStoreOp storeOp, OpAdaptor adaptor, + matchAndRewrite(IREE::Util::GlobalStoreOp storeOp, OneToNOpAdaptor adaptor, ConversionPatternRewriter &rewriter) const override { // Only apply to expanded types (tensors/etc). if (!isExpandedType(storeOp.getValue().getType())) @@ -341,8 +347,8 @@ struct GlobalStoreOpExpansion // Insert a transfer/store to the global with unknown lifetime. Lifetime // refinement will make this go away if possible. auto value = - resolveTensorOperand(storeOp.getLoc(), storeOp.getValue(), - adaptor.getValue(), affinityAnalysis, rewriter); + resolveTensorOperands(storeOp.getLoc(), storeOp.getValue(), + adaptor.getValue(), affinityAnalysis, rewriter); assert(expandedGlobal.resourceOp && "Missing resource op"); auto transferOp = rewriter.create( storeOp.getLoc(), expandedGlobal.resourceOp.getType(), value.resource, @@ -364,21 +370,27 @@ struct OptimizationBarrierOpConversion : public AffinityAwareConversionPattern { using AffinityAwareConversionPattern::AffinityAwareConversionPattern; LogicalResult - matchAndRewrite(IREE::Util::OptimizationBarrierOp op, OpAdaptor adaptor, + matchAndRewrite(IREE::Util::OptimizationBarrierOp op, OneToNOpAdaptor adaptor, ConversionPatternRewriter &rewriter) const override { SmallVector newOperands; + SmallVector operandSizes; for (auto [originalOperand, convertedOperand] : llvm::zip_equal(op.getOperands(), adaptor.getOperands())) { - if (isa(convertedOperand.getType())) { - newOperands.push_back(resolveTensorOperand(op.getLoc(), originalOperand, - convertedOperand, rewriter) - .resource); + if (isa(originalOperand.getType())) { + auto tensorOperands = resolveTensorOperands( + op.getLoc(), originalOperand, convertedOperand, rewriter); + newOperands.push_back(tensorOperands.resource); + operandSizes.push_back(tensorOperands.resourceSize); } else { - newOperands.push_back(convertedOperand); + assert(convertedOperand.size() == 1 && + "all non-tensor type expected to have a 1-1 conversion"); + newOperands.push_back(convertedOperand.front()); + operandSizes.push_back(nullptr); } } - rewriter.replaceOpWithNewOp(op, - newOperands); + auto barrierOp = rewriter.create( + op.getLoc(), newOperands); + replaceOpWithMultiple(op, barrierOp->getResults(), operandSizes, rewriter); return success(); } }; diff --git a/compiler/src/iree/compiler/Dialect/Stream/Conversion/UtilToStream/test/compiler_hints.mlir b/compiler/src/iree/compiler/Dialect/Stream/Conversion/UtilToStream/test/compiler_hints.mlir index c778fbf1e502..7c178e503924 100644 --- a/compiler/src/iree/compiler/Dialect/Stream/Conversion/UtilToStream/test/compiler_hints.mlir +++ b/compiler/src/iree/compiler/Dialect/Stream/Conversion/UtilToStream/test/compiler_hints.mlir @@ -3,9 +3,9 @@ // CHECK-LABEL: @optimizationBarrier util.func public @optimizationBarrier(%arg0: tensor) -> tensor { // CHECK-SAME: %[[ARG0:.+]]: !stream.resource<*> + // CHECK-SAME: %[[ARG1:.+]]: index // CHECK: %[[RESOURCE:.*]] = util.optimization_barrier %[[ARG0]] - // CHECK: %[[SIZE:.*]] = stream.resource.size %[[RESOURCE]] : !stream.resource<*> - // CHECK: util.return %[[RESOURCE]], %[[SIZE]] : !stream.resource<*>, index + // CHECK: util.return %[[RESOURCE]], %[[ARG1]] : !stream.resource<*>, index %0 = util.optimization_barrier %arg0 : tensor util.return %0 : tensor } diff --git a/compiler/src/iree/compiler/Dialect/Stream/Transforms/ConvertToStream.cpp b/compiler/src/iree/compiler/Dialect/Stream/Transforms/ConvertToStream.cpp index 0da7d95f486d..501cbb83fbbb 100644 --- a/compiler/src/iree/compiler/Dialect/Stream/Transforms/ConvertToStream.cpp +++ b/compiler/src/iree/compiler/Dialect/Stream/Transforms/ConvertToStream.cpp @@ -68,7 +68,7 @@ struct GenericResourcePattern : public ConversionPattern { affinityAnalysis(affinityAnalysis) {} LogicalResult - matchAndRewrite(Operation *op, ArrayRef operands, + matchAndRewrite(Operation *op, ArrayRef operands, ConversionPatternRewriter &rewriter) const override { if (!doesOperationNeedWrapping(op)) { return failure(); @@ -80,10 +80,10 @@ struct GenericResourcePattern : public ConversionPattern { SmallVector newOperands; newOperands.reserve(op->getNumOperands()); rewriter.setInsertionPoint(op); - for (auto [oldOperand, newOperand] : + for (auto [oldOperand, convertedOperands] : llvm::zip_equal(op->getOperands(), operands)) { - if (!isa(newOperand.getType())) { - newOperands.push_back(newOperand); + if (!isa(oldOperand.getType())) { + newOperands.push_back(convertedOperands.front()); continue; } auto tensorType = dyn_cast(oldOperand.getType()); @@ -94,7 +94,7 @@ struct GenericResourcePattern : public ConversionPattern { auto dynamicDims = IREE::Util::buildDynamicDimsForValue( op->getLoc(), oldOperand, rewriter); newOperands.push_back(buildTensorExportOp( - op->getLoc(), oldOperand, newOperand, tensorType, dynamicDims, + op->getLoc(), oldOperand, convertedOperands, tensorType, dynamicDims, exportAffinityAttr ? exportAffinityAttr : executionAffinityAttr, rewriter)); } @@ -127,13 +127,13 @@ struct GenericResourcePattern : public ConversionPattern { // Builds a stream.tensor.export op that exports a stream resource into an // external tensor value. Value buildTensorExportOp(Location loc, Value originalValue, - Value convertedValue, TensorType targetType, + ValueRange convertedValue, TensorType targetType, ValueRange dynamicDims, IREE::Stream::AffinityAttr executionAffinityAttr, OpBuilder &builder) const { - auto source = - transferTensorOperand(loc, originalValue, convertedValue, - executionAffinityAttr, affinityAnalysis, builder); + auto source = transferTensorOperands(loc, originalValue, convertedValue, + executionAffinityAttr, + affinityAnalysis, builder); // If needed insert a transfer to external resource lifetime. auto externalType = builder.getType( diff --git a/compiler/src/iree/compiler/Dialect/Stream/Transforms/test/convert_to_stream.mlir b/compiler/src/iree/compiler/Dialect/Stream/Transforms/test/convert_to_stream.mlir index fd68d30bc5f6..8815f6103f78 100644 --- a/compiler/src/iree/compiler/Dialect/Stream/Transforms/test/convert_to_stream.mlir +++ b/compiler/src/iree/compiler/Dialect/Stream/Transforms/test/convert_to_stream.mlir @@ -130,8 +130,7 @@ util.func public @while_test() { // CHECK: %[[INITIAL_DNO:.+]] = util.optimization_barrier %[[INITIAL]] : !stream.resource<*> %0 = util.optimization_barrier %cst : tensor - // CHECK: %[[VAR_SIZE:.+]] = stream.resource.size %[[INITIAL_DNO]] : !stream.resource<*> - // CHECK: cf.br ^bb1(%[[INITIAL_DNO]], %[[VAR_SIZE]] : !stream.resource<*>, index) + // CHECK: cf.br ^bb1(%[[INITIAL_DNO]], %[[CONSTANT_SIZE]] : !stream.resource<*>, index) cf.br ^bb1(%0 : tensor) // CHECK: ^bb1(%[[BB1_ARG:.+]]: !stream.resource<*>, %[[BB1_ARG_SIZE:.+]]: index): diff --git a/third_party/llvm-project b/third_party/llvm-project index ccdbcf948ba2..078c7bb5c927 160000 --- a/third_party/llvm-project +++ b/third_party/llvm-project @@ -1 +1 @@ -Subproject commit ccdbcf948ba24cfc80860e9a0256eb343f3373da +Subproject commit 078c7bb5c927ab1596d8a508e0b70d5140e59669