-
Notifications
You must be signed in to change notification settings - Fork 12.4k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[Attributor] Pack out arguments into a struct #119267
base: main
Are you sure you want to change the base?
Conversation
@llvm/pr-subscribers-llvm-transforms Author: None (elhewaty) Changes
Full diff: https://github.com/llvm/llvm-project/pull/119267.diff 2 Files Affected:
diff --git a/llvm/lib/Transforms/IPO/Attributor.cpp b/llvm/lib/Transforms/IPO/Attributor.cpp
index 116f419129a239..90eecf45892ee6 100644
--- a/llvm/lib/Transforms/IPO/Attributor.cpp
+++ b/llvm/lib/Transforms/IPO/Attributor.cpp
@@ -2963,6 +2963,119 @@ bool Attributor::shouldSeedAttribute(AbstractAttribute &AA) {
return Result;
}
+// For now: argument can be put in the struct if it's write only and
+// has no aliases.
+static bool canBeComapctedInAStruct(const Argument &Arg, Attributor &A, const AbstractAttribute &QueryingAA) {
+ IRPosition ArgPosition = IRPosition::argument(Arg);
+ // Check if Arg has no alias.
+ auto *AAliasInfo = A.getAAFor<AANoAlias>(QueryingAA, ArgPosition, DepClassTy::NONE);
+ if (!AAliasInfo || !AAliasInfo->isKnownNoAlias())
+ return false;
+
+ // Check if Arg is write-only.
+ const auto *MemBehaviorAA =
+ A.getAAFor<AAMemoryBehavior>(QueryingAA, ArgPosition, DepClassTy::NONE);
+ if (!MemBehaviorAA || !MemBehaviorAA->isKnownWriteOnly())
+ return false;
+
+ return true;
+}
+
+static void replaceArgRetWithStructRetCalls(Function &OldFunction, Function &NewFunction) {
+ for (auto UseItr = OldFunction.use_begin(); UseItr != OldFunction.use_end(); ++UseItr) {
+ CallBase *Call = dyn_cast<CallBase>(UseItr->getUser());
+ if (!Call)
+ continue;
+
+ IRBuilder<> Builder(Call);
+ SmallVector<Value *, 8> NewArgs;
+ for (unsigned ArgIdx = 0; ArgIdx < Call->arg_size(); ++ArgIdx)
+ if (std::find_if(OldFunction.arg_begin(), OldFunction.arg_end(),
+ [&](Argument &Arg) { return &Arg == Call->getArgOperand(ArgIdx); }) == OldFunction.arg_end())
+ NewArgs.push_back(Call->getArgOperand(ArgIdx));
+
+ CallInst *NewCall = Builder.CreateCall(&NewFunction, NewArgs);
+ Call->replaceAllUsesWith(NewCall);
+ Call->eraseFromParent();
+ }
+}
+
+static bool convertOutArgsToRetStruct(Function &F, Attributor &A, AbstractAttribute &QueryingAA) {
+ // Get valid ptr args.
+ DenseMap<Argument *, Type *> PtrToType;
+ for (unsigned ArgIdx = 0; ArgIdx < F.arg_size(); ++ArgIdx) {
+ Argument *Arg = F.getArg(ArgIdx);
+ if (Arg->getType()->isPointerTy() && canBeComapctedInAStruct(*Arg, A, QueryingAA)) {
+ // Get the the type of the pointer through its users
+ for (auto UseItr = Arg->use_begin(); UseItr != Arg->use_end(); ++UseItr) {
+ auto *Store = dyn_cast<StoreInst>(UseItr->getUser());
+ if (Store)
+ PtrToType[Arg] = Store->getValueOperand()->getType();
+ }
+ }
+ }
+
+ // If there is no valid candidates then return false.
+ if (PtrToType.empty())
+ return false;
+
+ // Create the new struct return type.
+ SmallVector<Type *, 4> OutStructElements;
+ if (auto *OriginalFuncTy = F.getReturnType(); !OriginalFuncTy->isVoidTy())
+ OutStructElements.push_back(OriginalFuncTy);
+
+ for (const auto &[Arg, Type] : PtrToType)
+ OutStructElements.push_back(Type);
+
+ auto *ReturnStructType = StructType::create(F.getContext(), OutStructElements, (F.getName() + "Out").str());
+
+ // Get the new Args.
+ SmallVector<Type *, 4> NewParamTypes;
+ for (unsigned ArgIdx = 0; ArgIdx < F.arg_size(); ++ArgIdx)
+ if (!PtrToType.count(F.getArg(ArgIdx)))
+ NewParamTypes.push_back(F.getArg(ArgIdx)->getType());
+
+ auto *NewFunctionType = FunctionType::get(ReturnStructType, NewParamTypes, F.isVarArg());
+ auto *NewFunction = Function::Create(NewFunctionType, F.getLinkage(), F.getAddressSpace(), F.getName());
+
+ // Map old args to new args.
+ ValueToValueMapTy VMap;
+ auto *NewArgIt = NewFunction->arg_begin();
+ for (Argument &OldArg : F.args())
+ if (!PtrToType.count(F.getArg(OldArg.getArgNo())))
+ VMap[&OldArg] = &(*NewArgIt++);
+
+ // Clone the old function into the new one.
+ SmallVector<ReturnInst *, 8> Returns;
+ CloneFunctionInto(NewFunction, &F, VMap, CloneFunctionChangeType::LocalChangesOnly, Returns);
+
+ // Update the return values (make it struct).
+ for (ReturnInst *Ret : Returns) {
+ IRBuilder<> Builder(Ret);
+ SmallVector<Value *, 4> StructValues;
+ // Include original return type, if any
+ if (auto *OriginalFuncTy = F.getReturnType(); !OriginalFuncTy->isVoidTy())
+ StructValues.push_back(Ret->getReturnValue());
+
+ // Create a load instruction to fill the struct element.
+ for (const auto &[Arg, Ty] : PtrToType) {
+ Value *OutVal = Builder.CreateLoad(Ty, VMap[Arg]);
+ StructValues.push_back(OutVal);
+ }
+
+ // Build the return struct incrementally.
+ Value *StructRetVal = UndefValue::get(ReturnStructType);
+ for (unsigned i = 0; i < StructValues.size(); ++i)
+ StructRetVal = Builder.CreateInsertValue(StructRetVal, StructValues[i], i);
+
+ Builder.CreateRet(StructRetVal);
+ Ret->eraseFromParent();
+ }
+
+ replaceArgRetWithStructRetCalls(F, *NewFunction);
+ F.eraseFromParent();
+}
+
ChangeStatus Attributor::rewriteFunctionSignatures(
SmallSetVector<Function *, 8> &ModifiedFns) {
ChangeStatus Changed = ChangeStatus::UNCHANGED;
diff --git a/llvm/test/Transforms/Attributor/remove_out_args.ll b/llvm/test/Transforms/Attributor/remove_out_args.ll
new file mode 100644
index 00000000000000..bd52bf5d80656c
--- /dev/null
+++ b/llvm/test/Transforms/Attributor/remove_out_args.ll
@@ -0,0 +1,16 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -S -passes=attributor < %s | FileCheck %s
+
+
+
+define i1 @foo(ptr %dst) {
+; CHECK-LABEL: define noundef i1 @foo(
+; CHECK-SAME: ptr nocapture nofree noundef nonnull writeonly align 4 dereferenceable(4) [[DST:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: store i32 42, ptr [[DST]], align 4
+; CHECK-NEXT: ret i1 true
+;
+entry:
+ store i32 42, ptr %dst
+ ret i1 1
+}
|
✅ With the latest revision this PR passed the C/C++ code formatter. |
1ab8d0c
to
8e88711
Compare
I know it's not complete. but I wonder if I am on the right track as I am confused. |
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 | ||
; RUN: opt -S -passes=attributor < %s | FileCheck %s | ||
|
||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This needs about 100 more tests llvm/test/CodeGen/AMDGPU/rewrite-out-arguments.ll and llvm/test/CodeGen/AMDGPU/rewrite-out-arguments-address-space.ll would be better starting points
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I will. BTW, I don't have an AMD GPU, I have a nvidia one, should J add the tests in NVPTX?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It doesn't matter what GPU you have, it doesn't matter when working on the compiler. NVPTX will be less useful for IR testing since it doesn't use a non-0 address space for stack objects
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Still need all of these tests, using AMDGPU
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@arsenm, I will test using AMDGPU, but I don't know why my changes don't change this simple test? Do I need to call the some function somewhere? the optimizer dosn't see my changes or do my changes don't affect the input?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This is an externally visible function, so it's not allowed to change the signature. It would need to be be internal or similar linkage, with a call use
8e88711
to
461834b
Compare
You can test this locally with the following command:git diff -U0 --pickaxe-regex -S '([^a-zA-Z0-9#_-]undef[^a-zA-Z0-9_-]|UndefValue::get)' e6ced4da4499007a366ada31cfb07e0b4fbf2393 7e34621d2aca606ec1836ef761a1cca590249e45 llvm/test/Transforms/Attributor/remove_out_args.ll llvm/include/llvm/Transforms/IPO/Attributor.h llvm/lib/Transforms/IPO/Attributor.cpp llvm/lib/Transforms/IPO/AttributorAttributes.cpp The following files introduce new uses of undef:
Undef is now deprecated and should only be used in the rare cases where no replacement is possible. For example, a load of uninitialized memory yields In tests, avoid using For example, this is considered a bad practice: define void @fn() {
...
br i1 undef, ...
} Please use the following instead: define void @fn(i1 %cond) {
...
br i1 %cond, ...
} Please refer to the Undefined Behavior Manual for more information. |
@arsenm @shiltian @vidsinghal @jdoerfert @jhuber6
But I need you to share your opinions so far, I will write tests to cover all possible cases, but at least I need this to work with the simple test function written before. What did I mess up while adding the attributor? |
You didn't implement a virtual method of the base class. If you look further along in the error message it should tell you which ones |
461834b
to
4faf50a
Compare
@arsenm, Thanks. |
const IRPosition &ArgPos = IRPosition::argument(Arg); | ||
auto *AAMem = A.getAAFor<AAMemoryBehavior>(AA, ArgPos, DepClassTy::NONE); | ||
|
||
return Arg.hasNoAliasAttr() && AAMem && AAMem->isKnownWriteOnly() && |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Should this be querying from AANoAlias instead of directly checking the attribute is present on the argument?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@arsenm, in the following example does ptr %dts
have any aliases in @foo
? as AANoAlias->isKnownNoAlias()
returns false.
define internal i1 @foo(ptr %dst) {
entry:
store i32 42, ptr %dst
ret i1 true
}
define i1 @fee(i32 %x, i32 %y) {
%ptr = alloca i32
%a = call i1 @foo(ptr %ptr, i32 %y)
%b = load i32, ptr %ptr
%c = icmp sle i32 %b, %x
%xor = xor i1 %a, %c
ret i1 %xor
}
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This callsite has the wrong type, so this test is UB and it probably doesn't bother trying to optimize it correctly
@@ -3412,6 +3412,9 @@ void Attributor::identifyDefaultAbstractAttributes(Function &F) { | |||
// Every function can track active assumptions. | |||
getOrCreateAAFor<AAAssumptionInfo>(FPos); | |||
|
|||
// Every function can have out arguments. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Only functions with pointer arguments, not sure if it's worth checking if there are any
|
||
bool hasCandidateArg = false; | ||
for (const Argument &Arg : F->args()) | ||
if (Arg.getType()->isPointerTy() && isEligibleArgument(Arg, A, *this)) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
If you're going to introduce isEligibleArgumentType, use it consistently. You also need to guard against sret arguments
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Doesn't hasPointeeInMemoryValueAttr()
do this?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yes
Argument *Arg = F.getArg(argIdx); | ||
if (isEligibleArgument(*Arg, A, *this)) { | ||
CandidateArgs.push_back(Arg); | ||
for (auto UseItr = Arg->use_begin(); UseItr != Arg->use_end(); ++UseItr) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
range loop, probably needs to be early_inc_range
SmallVector<Argument *, 4> CandidateArgs; | ||
for (unsigned argIdx = 0; argIdx < F.arg_size(); ++argIdx) { | ||
Argument *Arg = F.getArg(argIdx); | ||
if (isEligibleArgument(*Arg, A, *this)) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Early continue and reduce indent
// If there is no valid candidates then return false. | ||
if (PtrToType.empty()) | ||
return ChangeStatus::UNCHANGED; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This shouldn't reach the manifest stage if it's not possible to do anything
if (auto *OriginalFuncTy = F.getReturnType(); !OriginalFuncTy->isVoidTy()) | ||
OutStructElementsTypes.push_back(OriginalFuncTy); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
If the function already has a struct return, you could flatten and insert directly into the struct type. It may be a little more effort to emit though.
You also should check whether TargetLowering::CanLowerReturn succeeds, you should avoid over-committing return values if they're going to be forced to the stack. As written out arguments may be in any address space, not just allocas
|
||
// Redirect all uses of the old call to the new call. | ||
for (auto &Use : CB->uses()) | ||
Use.set(NewCall); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Should assert that this is the callee use, and not a data operand or other type of non-call user
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 | ||
; RUN: opt -S -passes=attributor < %s | FileCheck %s | ||
|
||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Still need all of these tests, using AMDGPU
4faf50a
to
7e34621
Compare
@arsenm, @jdoerfert @shiltian @vidsinghal I run into this crash, any help?
|
It seems like the assertion |
PR Summary:
Transforms functions with write-only pointer arguments into functions that return a struct containing the original return value and pointer-written values, improving GPU performance, and maybe x86 too.
Example:
we need to convert this:
into