From ea89bf7566c8469f1ed834fa31e568df8a7613b7 Mon Sep 17 00:00:00 2001 From: gitoleg Date: Wed, 25 Dec 2024 13:11:45 +0300 Subject: [PATCH] wip --- clang/lib/CIR/CodeGen/CIRGenExprConst.cpp | 82 ++++++++++++++++++++--- clang/lib/CIR/CodeGen/CIRGenModule.h | 51 ++++++++++++++ clang/test/CIR/CodeGen/bitfields.c | 6 +- clang/test/CIR/CodeGen/const-bitfields.c | 4 +- clang/test/CIR/CodeGen/struct.c | 5 +- 5 files changed, 131 insertions(+), 17 deletions(-) diff --git a/clang/lib/CIR/CodeGen/CIRGenExprConst.cpp b/clang/lib/CIR/CodeGen/CIRGenExprConst.cpp index dbd78284349d..23c1ffec74c3 100644 --- a/clang/lib/CIR/CodeGen/CIRGenExprConst.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenExprConst.cpp @@ -9,6 +9,8 @@ // This contains code to emit Constant Expr nodes as LLVM code. // //===----------------------------------------------------------------------===// +#include + #include "Address.h" #include "CIRGenCXXABI.h" #include "CIRGenCstEmitter.h" @@ -43,6 +45,19 @@ using namespace clang::CIRGen; namespace { class ConstExprEmitter; +mlir::TypedAttr getPadding(CIRGenModule &CGM, CharUnits size) { + auto eltTy = CGM.UCharTy; + auto arSize = size.getQuantity(); + auto &bld = CGM.getBuilder(); + if (size > CharUnits::One()) { + SmallVector elts(arSize, bld.getZeroAttr(eltTy)); + return bld.getConstArray(mlir::ArrayAttr::get(bld.getContext(), elts), + bld.getArrayType(eltTy, arSize)); + } else { + return cir::ZeroAttr::get(bld.getContext(), eltTy); + } +} + static mlir::Attribute emitArrayConstant(CIRGenModule &CGM, mlir::Type DesiredType, mlir::Type CommonElementType, unsigned ArrayBound, @@ -70,12 +85,7 @@ struct ConstantAggregateBuilderUtils { } mlir::TypedAttr getPadding(CharUnits size) const { - auto eltTy = CGM.UCharTy; - auto arSize = size.getQuantity(); - auto &bld = CGM.getBuilder(); - SmallVector elts(arSize, bld.getZeroAttr(eltTy)); - return bld.getConstArray(mlir::ArrayAttr::get(bld.getContext(), elts), - bld.getArrayType(eltTy, arSize)); + return ::getPadding(CGM, size); } mlir::Attribute getZeroes(CharUnits ZeroSize) const { @@ -151,7 +161,7 @@ static void replace(Container &C, size_t BeginOff, size_t EndOff, Range Vals) { } bool ConstantAggregateBuilder::add(mlir::Attribute A, CharUnits Offset, - bool AllowOverwrite) { + bool AllowOverwrite) { // FIXME(cir): migrate most of this file to use mlir::TypedAttr directly. mlir::TypedAttr C = mlir::dyn_cast(A); assert(C && "expected typed attribute"); @@ -377,7 +387,7 @@ mlir::Attribute ConstantAggregateBuilder::buildFrom( CharUnits AlignedSize = Size.alignTo(Align); bool Packed = false; - ArrayRef UnpackedElems = Elems; + ArrayRef UnpackedElems = Elems; llvm::SmallVector UnpackedElemStorage; if (DesiredSize < AlignedSize || DesiredSize.alignTo(Align) != DesiredSize) { @@ -388,7 +398,7 @@ mlir::Attribute ConstantAggregateBuilder::buildFrom( // is ignored if we choose a packed layout.) UnpackedElemStorage.assign(UnpackedElems.begin(), UnpackedElems.end()); UnpackedElemStorage.push_back(Utils.getPadding(DesiredSize - Size)); - UnpackedElems = UnpackedElemStorage; + UnpackedElems = UnpackedElemStorage; } // If we don't have a natural layout, insert padding as necessary. @@ -508,6 +518,11 @@ class ConstStructBuilder { bool Build(InitListExpr *ILE, bool AllowOverwrite); bool Build(const APValue &Val, const RecordDecl *RD, bool IsPrimaryBase, const CXXRecordDecl *VTableClass, CharUnits BaseOffset); + + bool DoZeroInitPadding( + const ASTRecordLayout &Layout, unsigned FieldNo, const FieldDecl &Field, + bool AllowOverwrite, CharUnits &SizeSoFar, bool &ZeroFieldSize); + mlir::Attribute Finalize(QualType Ty); }; @@ -614,6 +629,10 @@ bool ConstStructBuilder::Build(InitListExpr *ILE, bool AllowOverwrite) { if (CXXRD->getNumBases()) return false; + const bool ZeroInitPadding = CGM.shouldZeroInitPadding(); + bool ZeroFieldSize = false; + CharUnits SizeSoFar = CharUnits::Zero(); + for (FieldDecl *Field : RD->fields()) { ++FieldNo; @@ -631,8 +650,15 @@ bool ConstStructBuilder::Build(InitListExpr *ILE, bool AllowOverwrite) { Expr *Init = nullptr; if (ElementNo < ILE->getNumInits()) Init = ILE->getInit(ElementNo++); - if (Init && isa(Init)) + + if (Init && isa(Init)) { + if (ZeroInitPadding && + !DoZeroInitPadding(Layout, FieldNo, *Field, AllowOverwrite, SizeSoFar, + ZeroFieldSize)) + return false; + continue; + } // Zero-sized fields are not emitted, but their initializers may still // prevent emission of this struct as a constant. @@ -641,6 +667,11 @@ bool ConstStructBuilder::Build(InitListExpr *ILE, bool AllowOverwrite) { return false; continue; } + + if (ZeroInitPadding && + !DoZeroInitPadding(Layout, FieldNo, *Field, AllowOverwrite, SizeSoFar, + ZeroFieldSize)) + return false; // When emitting a DesignatedInitUpdateExpr, a nested InitListExpr // represents additional overwriting of our current constant value, and not @@ -784,6 +815,37 @@ bool ConstStructBuilder::Build(const APValue &Val, const RecordDecl *RD, return true; } + +bool ConstStructBuilder::DoZeroInitPadding( + const ASTRecordLayout &Layout, unsigned FieldNo, const FieldDecl &Field, + bool AllowOverwrite, CharUnits &SizeSoFar, bool &ZeroFieldSize) { + + uint64_t StartBitOffset = Layout.getFieldOffset(FieldNo); + CharUnits StartOffset = CGM.getASTContext().toCharUnitsFromBits(StartBitOffset); + if (SizeSoFar < StartOffset) { + if (!AppendBytes(SizeSoFar, getPadding(CGM, StartOffset - SizeSoFar), + AllowOverwrite)) + return false; + } + + if (!Field.isBitField()) { + CharUnits FieldSize = CGM.getASTContext().getTypeSizeInChars(Field.getType()); + SizeSoFar = StartOffset + FieldSize; + ZeroFieldSize = FieldSize.isZero(); + } else { + const CIRGenRecordLayout &RL = + CGM.getTypes().getCIRGenRecordLayout(Field.getParent()); + const CIRGenBitFieldInfo &Info = RL.getBitFieldInfo(&Field); + uint64_t EndBitOffset = StartBitOffset + Info.Size; + SizeSoFar = CGM.getASTContext().toCharUnitsFromBits(EndBitOffset); + if (EndBitOffset % CGM.getASTContext().getCharWidth() != 0) { + SizeSoFar++; + } + ZeroFieldSize = Info.Size == 0; + } + return true; +} + mlir::Attribute ConstStructBuilder::Finalize(QualType Type) { Type = Type.getNonReferenceType(); RecordDecl *RD = Type->castAs()->getDecl(); diff --git a/clang/lib/CIR/CodeGen/CIRGenModule.h b/clang/lib/CIR/CodeGen/CIRGenModule.h index dd8a0c98b081..b6b7077badd6 100644 --- a/clang/lib/CIR/CodeGen/CIRGenModule.h +++ b/clang/lib/CIR/CodeGen/CIRGenModule.h @@ -273,6 +273,57 @@ class CIRGenModule : public CIRGenTypeCache { return getTriple().isSPIRVLogical(); } + bool shouldZeroInitPadding() const { + // In C23 (N3096) $6.7.10: + // """ + // If any object is initialized with an empty iniitializer, then it is + // subject to default initialization: + // - if it is an aggregate, every member is initialized (recursively) + // according to these rules, and any padding is initialized to zero bits; + // - if it is a union, the first named member is initialized (recursively) + // according to these rules, and any padding is initialized to zero bits. + // + // If the aggregate or union contains elements or members that are + // aggregates or unions, these rules apply recursively to the subaggregates + // or contained unions. + // + // If there are fewer initializers in a brace-enclosed list than there are + // elements or members of an aggregate, or fewer characters in a string + // literal used to initialize an array of known size than there are elements + // in the array, the remainder of the aggregate is subject to default + // initialization. + // """ + // + // From my understanding, the standard is ambiguous in the following two + // areas: + // 1. For a union type with empty initializer, if the first named member is + // not the largest member, then the bytes comes after the first named member + // but before padding are left unspecified. An example is: + // union U { int a; long long b;}; + // union U u = {}; // The first 4 bytes are 0, but 4-8 bytes are left + // unspecified. + // + // 2. It only mentions padding for empty initializer, but doesn't mention + // padding for a non empty initialization list. And if the aggregation or + // union contains elements or members that are aggregates or unions, and + // some are non empty initializers, while others are empty initiailizers, + // the padding initialization is unclear. An example is: + // struct S1 { int a; long long b; }; + // struct S2 { char c; struct S1 s1; }; + // // The values for paddings between s2.c and s2.s1.a, between s2.s1.a + // and s2.s1.b are unclear. + // struct S2 s2 = { 'c' }; + // + // Here we choose to zero initiailize left bytes of a union type. Because + // projects like the Linux kernel are relying on this behavior. If we don't + // explicitly zero initialize them, the undef values can be optimized to + // return gabage data. We also choose to zero initialize paddings for + // aggregates and unions, no matter they are initialized by empty + // initializers or non empty initializers. This can provide a consistent + // behavior. So projects like the Linux kernel can rely on it. + return !getLangOpts().CPlusPlus; + } + /// Return the mlir::Value for the address of the given global variable. /// If Ty is non-null and if the global doesn't exist, then it will be created /// with the specified type instead of whatever the normal requested type diff --git a/clang/test/CIR/CodeGen/bitfields.c b/clang/test/CIR/CodeGen/bitfields.c index ded089655f59..3c42e1bf9ef6 100644 --- a/clang/test/CIR/CodeGen/bitfields.c +++ b/clang/test/CIR/CodeGen/bitfields.c @@ -57,12 +57,12 @@ typedef struct { // CHECK: !ty_G = !cir.struct // CHECK: !ty_T = !cir.struct // CHECK: !ty_anon2E0_ = !cir.struct -// CHECK: !ty_anon_struct = !cir.struct // CHECK: #bfi_a = #cir.bitfield_info // CHECK: #bfi_e = #cir.bitfield_info // CHECK: !ty_S = !cir.struct, !u16i, !u32i}> // CHECK: !ty_U = !cir.struct}> // CHECK: !ty___long = !cir.struct}> +// CHECK: !ty_anon_struct = !cir.struct, !s32i}> // CHECK: #bfi_d = #cir.bitfield_info, size = 2, offset = 17, is_signed = true> // CHECK: cir.func {{.*@store_field}} @@ -125,7 +125,7 @@ void createU() { // CHECK: cir.func {{.*@createD}} // CHECK: %0 = cir.alloca !ty_D, !cir.ptr, ["d"] {alignment = 4 : i64} // CHECK: %1 = cir.cast(bitcast, %0 : !cir.ptr), !cir.ptr -// CHECK: %2 = cir.const #cir.const_struct<{#cir.int<33> : !u8i, #cir.int<0> : !u8i, #cir.int<3> : !s32i}> : !ty_anon_struct +// CHECK: %2 = cir.const #cir.const_struct<{#cir.int<33> : !u8i, #cir.int<0> : !u8i, #cir.const_array<[#cir.zero : !u8i, #cir.zero : !u8i]> : !cir.array, #cir.int<3> : !s32i}> : !ty_anon_struct // CHECK: cir.store %2, %1 : !ty_anon_struct, !cir.ptr void createD() { D d = {1,2,3}; @@ -136,7 +136,7 @@ typedef struct { int y ; } G; -// CHECK: cir.global external @g = #cir.const_struct<{#cir.int<133> : !u8i, #cir.int<127> : !u8i, #cir.int<254> : !s32i}> : !ty_anon_struct +// CHECK: cir.global external @g = #cir.const_struct<{#cir.int<133> : !u8i, #cir.int<127> : !u8i, #cir.const_array<[#cir.zero : !u8i, #cir.zero : !u8i]> : !cir.array, #cir.int<254> : !s32i}> : !ty_anon_struct G g = { -123, 254UL}; // CHECK: cir.func {{.*@get_y}} diff --git a/clang/test/CIR/CodeGen/const-bitfields.c b/clang/test/CIR/CodeGen/const-bitfields.c index 0015f4fe5c83..71e02507b9d0 100644 --- a/clang/test/CIR/CodeGen/const-bitfields.c +++ b/clang/test/CIR/CodeGen/const-bitfields.c @@ -14,13 +14,13 @@ struct Inner { unsigned d : 30; }; -// CHECK: !ty_anon_struct = !cir.struct +// CHECK: !ty_anon_struct = !cir.struct // CHECK: !ty_T = !cir.struct, !s32i} #cir.record.decl.ast> // CHECK: !ty_anon_struct1 = !cir.struct, !u8i, !u8i, !u8i, !u8i}> // CHECK: #bfi_Z = #cir.bitfield_info, size = 9, offset = 11, is_signed = true> struct T GV = { 1, 5, 26, 42 }; -// CHECK: cir.global external @GV = #cir.const_struct<{#cir.int<161> : !u8i, #cir.int<208> : !u8i, #cir.int<0> : !u8i, #cir.int<42> : !s32i}> : !ty_anon_struct +// CHECK: cir.global external @GV = #cir.const_struct<{#cir.int<161> : !u8i, #cir.int<208> : !u8i, #cir.int<0> : !u8i, #cir.zero : !u8i, #cir.int<42> : !s32i}> : !ty_anon_struct // check padding is used (const array of zeros) struct Inner var = { 1, 0, 1, 21}; diff --git a/clang/test/CIR/CodeGen/struct.c b/clang/test/CIR/CodeGen/struct.c index 43f1576bb09a..267c755e0a7e 100644 --- a/clang/test/CIR/CodeGen/struct.c +++ b/clang/test/CIR/CodeGen/struct.c @@ -38,8 +38,9 @@ void shouldConstInitStructs(void) { // CHECK: cir.func @shouldConstInitStructs struct Foo f = {1, 2, {3, 4}}; // CHECK: %[[#V0:]] = cir.alloca !ty_Foo, !cir.ptr, ["f"] {alignment = 4 : i64} - // CHECK: %[[#V1:]] = cir.const #cir.const_struct<{#cir.int<1> : !s32i, #cir.int<2> : !s8i, #cir.const_struct<{#cir.int<3> : !s32i, #cir.int<4> : !s8i}> : !ty_Bar}> : !ty_Foo - // CHECK: cir.store %[[#V1]], %[[#V0]] : !ty_Foo, !cir.ptr + // CHECK: %[[#V1:]] = cir.cast(bitcast, %[[#V0]] : !cir.ptr), !cir.ptr l + // CHECK: %[[#V2:]] = cir.const #cir.const_struct<{#cir.int<1> : !s32i, #cir.int<2> : !s8i, #cir.const_array<[#cir.zero : !u8i, #cir.zero : !u8i, #cir.zero : !u8i]> : !cir.array, #cir.const_struct<{#cir.int<3> : !s32i, #cir.int<4> : !s8i}> : !ty_Bar}> : !ty_anon_struct + // CHECK: cir.store %[[#V2]], %[[#V1]] : !ty_anon_struct, !cir.ptr } // Should zero-initialize uninitialized global structs.