Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[CIR][CodeGen] handle zero init padding case #1257

Open
wants to merge 3 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
66 changes: 60 additions & 6 deletions clang/lib/CIR/CodeGen/CIRGenExprConst.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,19 @@ using namespace clang::CIRGen;
namespace {
class ConstExprEmitter;

mlir::TypedAttr getPadding(CIRGenModule &CGM, CharUnits size) {
auto eltTy = CGM.UCharTy;
auto arSize = size.getQuantity();
auto &bld = CGM.getBuilder();
if (size > CharUnits::One()) {
SmallVector<mlir::Attribute, 4> elts(arSize, bld.getZeroAttr(eltTy));
return bld.getConstArray(mlir::ArrayAttr::get(bld.getContext(), elts),
bld.getArrayType(eltTy, arSize));
} else {
return cir::ZeroAttr::get(bld.getContext(), eltTy);
}
}

static mlir::Attribute
emitArrayConstant(CIRGenModule &CGM, mlir::Type DesiredType,
mlir::Type CommonElementType, unsigned ArrayBound,
Expand Down Expand Up @@ -70,12 +83,7 @@ struct ConstantAggregateBuilderUtils {
}

mlir::TypedAttr getPadding(CharUnits size) const {
auto eltTy = CGM.UCharTy;
auto arSize = size.getQuantity();
auto &bld = CGM.getBuilder();
SmallVector<mlir::Attribute, 4> elts(arSize, bld.getZeroAttr(eltTy));
return bld.getConstArray(mlir::ArrayAttr::get(bld.getContext(), elts),
bld.getArrayType(eltTy, arSize));
return ::getPadding(CGM, size);
}

mlir::Attribute getZeroes(CharUnits ZeroSize) const {
Expand Down Expand Up @@ -508,6 +516,11 @@ class ConstStructBuilder {
bool Build(InitListExpr *ILE, bool AllowOverwrite);
bool Build(const APValue &Val, const RecordDecl *RD, bool IsPrimaryBase,
const CXXRecordDecl *VTableClass, CharUnits BaseOffset);

bool DoZeroInitPadding(const ASTRecordLayout &Layout, unsigned FieldNo,
const FieldDecl &Field, bool AllowOverwrite,
CharUnits &SizeSoFar, bool &ZeroFieldSize);

mlir::Attribute Finalize(QualType Ty);
};

Expand Down Expand Up @@ -614,6 +627,10 @@ bool ConstStructBuilder::Build(InitListExpr *ILE, bool AllowOverwrite) {
if (CXXRD->getNumBases())
return false;

const bool ZeroInitPadding = CGM.shouldZeroInitPadding();
bool ZeroFieldSize = false;
CharUnits SizeSoFar = CharUnits::Zero();

for (FieldDecl *Field : RD->fields()) {
++FieldNo;

Expand Down Expand Up @@ -642,6 +659,11 @@ bool ConstStructBuilder::Build(InitListExpr *ILE, bool AllowOverwrite) {
continue;
}

if (ZeroInitPadding &&
!DoZeroInitPadding(Layout, FieldNo, *Field, AllowOverwrite, SizeSoFar,
ZeroFieldSize))
return false;

// When emitting a DesignatedInitUpdateExpr, a nested InitListExpr
// represents additional overwriting of our current constant value, and not
// a new constant to emit independently.
Expand Down Expand Up @@ -784,6 +806,38 @@ bool ConstStructBuilder::Build(const APValue &Val, const RecordDecl *RD,
return true;
}

bool ConstStructBuilder::DoZeroInitPadding(
const ASTRecordLayout &Layout, unsigned FieldNo, const FieldDecl &Field,
bool AllowOverwrite, CharUnits &SizeSoFar, bool &ZeroFieldSize) {

uint64_t StartBitOffset = Layout.getFieldOffset(FieldNo);
CharUnits StartOffset =
CGM.getASTContext().toCharUnitsFromBits(StartBitOffset);
if (SizeSoFar < StartOffset) {
if (!AppendBytes(SizeSoFar, getPadding(CGM, StartOffset - SizeSoFar),
AllowOverwrite))
return false;
}

if (!Field.isBitField()) {
CharUnits FieldSize =
CGM.getASTContext().getTypeSizeInChars(Field.getType());
SizeSoFar = StartOffset + FieldSize;
ZeroFieldSize = FieldSize.isZero();
} else {
const CIRGenRecordLayout &RL =
CGM.getTypes().getCIRGenRecordLayout(Field.getParent());
const CIRGenBitFieldInfo &Info = RL.getBitFieldInfo(&Field);
uint64_t EndBitOffset = StartBitOffset + Info.Size;
SizeSoFar = CGM.getASTContext().toCharUnitsFromBits(EndBitOffset);
if (EndBitOffset % CGM.getASTContext().getCharWidth() != 0) {
SizeSoFar++;
}
ZeroFieldSize = Info.Size == 0;
}
return true;
}

mlir::Attribute ConstStructBuilder::Finalize(QualType Type) {
Type = Type.getNonReferenceType();
RecordDecl *RD = Type->castAs<RecordType>()->getDecl();
Expand Down
51 changes: 51 additions & 0 deletions clang/lib/CIR/CodeGen/CIRGenModule.h
Original file line number Diff line number Diff line change
Expand Up @@ -273,6 +273,57 @@ class CIRGenModule : public CIRGenTypeCache {
return getTriple().isSPIRVLogical();
}

bool shouldZeroInitPadding() const {
// In C23 (N3096) $6.7.10:
// """
// If any object is initialized with an empty iniitializer, then it is
// subject to default initialization:
// - if it is an aggregate, every member is initialized (recursively)
// according to these rules, and any padding is initialized to zero bits;
// - if it is a union, the first named member is initialized (recursively)
// according to these rules, and any padding is initialized to zero bits.
//
// If the aggregate or union contains elements or members that are
// aggregates or unions, these rules apply recursively to the subaggregates
// or contained unions.
//
// If there are fewer initializers in a brace-enclosed list than there are
// elements or members of an aggregate, or fewer characters in a string
// literal used to initialize an array of known size than there are elements
// in the array, the remainder of the aggregate is subject to default
// initialization.
// """
//
// From my understanding, the standard is ambiguous in the following two
// areas:
// 1. For a union type with empty initializer, if the first named member is
// not the largest member, then the bytes comes after the first named member
// but before padding are left unspecified. An example is:
// union U { int a; long long b;};
// union U u = {}; // The first 4 bytes are 0, but 4-8 bytes are left
// unspecified.
//
// 2. It only mentions padding for empty initializer, but doesn't mention
// padding for a non empty initialization list. And if the aggregation or
// union contains elements or members that are aggregates or unions, and
// some are non empty initializers, while others are empty initiailizers,
// the padding initialization is unclear. An example is:
// struct S1 { int a; long long b; };
// struct S2 { char c; struct S1 s1; };
// // The values for paddings between s2.c and s2.s1.a, between s2.s1.a
// and s2.s1.b are unclear.
// struct S2 s2 = { 'c' };
//
// Here we choose to zero initiailize left bytes of a union type. Because
// projects like the Linux kernel are relying on this behavior. If we don't
// explicitly zero initialize them, the undef values can be optimized to
// return gabage data. We also choose to zero initialize paddings for
// aggregates and unions, no matter they are initialized by empty
// initializers or non empty initializers. This can provide a consistent
// behavior. So projects like the Linux kernel can rely on it.
return !getLangOpts().CPlusPlus;
}

/// Return the mlir::Value for the address of the given global variable.
/// If Ty is non-null and if the global doesn't exist, then it will be created
/// with the specified type instead of whatever the normal requested type
Expand Down
6 changes: 3 additions & 3 deletions clang/test/CIR/CodeGen/bitfields.c
Original file line number Diff line number Diff line change
Expand Up @@ -57,12 +57,12 @@ typedef struct {
// CHECK: !ty_G = !cir.struct<struct "G" {!u16i, !s32i} #cir.record.decl.ast>
// CHECK: !ty_T = !cir.struct<struct "T" {!u8i, !u32i} #cir.record.decl.ast>
// CHECK: !ty_anon2E0_ = !cir.struct<struct "anon.0" {!u32i} #cir.record.decl.ast>
// CHECK: !ty_anon_struct = !cir.struct<struct {!u8i, !u8i, !s32i}>
// CHECK: #bfi_a = #cir.bitfield_info<name = "a", storage_type = !u8i, size = 3, offset = 0, is_signed = true>
// CHECK: #bfi_e = #cir.bitfield_info<name = "e", storage_type = !u16i, size = 15, offset = 0, is_signed = true>
// CHECK: !ty_S = !cir.struct<struct "S" {!u32i, !cir.array<!u8i x 3>, !u16i, !u32i}>
// CHECK: !ty_U = !cir.struct<struct "U" {!s8i, !s8i, !s8i, !cir.array<!u8i x 9>}>
// CHECK: !ty___long = !cir.struct<struct "__long" {!ty_anon2E0_, !u32i, !cir.ptr<!u32i>}>
// CHECK: !ty_anon_struct = !cir.struct<struct {!u8i, !u8i, !cir.array<!u8i x 2>, !s32i}>
// CHECK: #bfi_d = #cir.bitfield_info<name = "d", storage_type = !cir.array<!u8i x 3>, size = 2, offset = 17, is_signed = true>

// CHECK: cir.func {{.*@store_field}}
Expand Down Expand Up @@ -125,7 +125,7 @@ void createU() {
// CHECK: cir.func {{.*@createD}}
// CHECK: %0 = cir.alloca !ty_D, !cir.ptr<!ty_D>, ["d"] {alignment = 4 : i64}
// CHECK: %1 = cir.cast(bitcast, %0 : !cir.ptr<!ty_D>), !cir.ptr<!ty_anon_struct>
// CHECK: %2 = cir.const #cir.const_struct<{#cir.int<33> : !u8i, #cir.int<0> : !u8i, #cir.int<3> : !s32i}> : !ty_anon_struct
// CHECK: %2 = cir.const #cir.const_struct<{#cir.int<33> : !u8i, #cir.int<0> : !u8i, #cir.const_array<[#cir.zero : !u8i, #cir.zero : !u8i]> : !cir.array<!u8i x 2>, #cir.int<3> : !s32i}> : !ty_anon_struct
// CHECK: cir.store %2, %1 : !ty_anon_struct, !cir.ptr<!ty_anon_struct>
void createD() {
D d = {1,2,3};
Expand All @@ -136,7 +136,7 @@ typedef struct {
int y ;
} G;

// CHECK: cir.global external @g = #cir.const_struct<{#cir.int<133> : !u8i, #cir.int<127> : !u8i, #cir.int<254> : !s32i}> : !ty_anon_struct
// CHECK: cir.global external @g = #cir.const_struct<{#cir.int<133> : !u8i, #cir.int<127> : !u8i, #cir.const_array<[#cir.zero : !u8i, #cir.zero : !u8i]> : !cir.array<!u8i x 2>, #cir.int<254> : !s32i}> : !ty_anon_struct
G g = { -123, 254UL};

// CHECK: cir.func {{.*@get_y}}
Expand Down
4 changes: 2 additions & 2 deletions clang/test/CIR/CodeGen/const-bitfields.c
Original file line number Diff line number Diff line change
Expand Up @@ -14,13 +14,13 @@ struct Inner {
unsigned d : 30;
};

// CHECK: !ty_anon_struct = !cir.struct<struct {!u8i, !u8i, !u8i, !s32i}>
// CHECK: !ty_anon_struct = !cir.struct<struct {!u8i, !u8i, !u8i, !u8i, !s32i}>
// CHECK: !ty_T = !cir.struct<struct "T" {!cir.array<!u8i x 3>, !s32i} #cir.record.decl.ast>
// CHECK: !ty_anon_struct1 = !cir.struct<struct {!u8i, !cir.array<!u8i x 3>, !u8i, !u8i, !u8i, !u8i}>
// CHECK: #bfi_Z = #cir.bitfield_info<name = "Z", storage_type = !cir.array<!u8i x 3>, size = 9, offset = 11, is_signed = true>

struct T GV = { 1, 5, 26, 42 };
// CHECK: cir.global external @GV = #cir.const_struct<{#cir.int<161> : !u8i, #cir.int<208> : !u8i, #cir.int<0> : !u8i, #cir.int<42> : !s32i}> : !ty_anon_struct
// CHECK: cir.global external @GV = #cir.const_struct<{#cir.int<161> : !u8i, #cir.int<208> : !u8i, #cir.int<0> : !u8i, #cir.zero : !u8i, #cir.int<42> : !s32i}> : !ty_anon_struct

// check padding is used (const array of zeros)
struct Inner var = { 1, 0, 1, 21};
Expand Down
5 changes: 3 additions & 2 deletions clang/test/CIR/CodeGen/struct.c
Original file line number Diff line number Diff line change
Expand Up @@ -38,8 +38,9 @@ void shouldConstInitStructs(void) {
// CHECK: cir.func @shouldConstInitStructs
struct Foo f = {1, 2, {3, 4}};
// CHECK: %[[#V0:]] = cir.alloca !ty_Foo, !cir.ptr<!ty_Foo>, ["f"] {alignment = 4 : i64}
// CHECK: %[[#V1:]] = cir.const #cir.const_struct<{#cir.int<1> : !s32i, #cir.int<2> : !s8i, #cir.const_struct<{#cir.int<3> : !s32i, #cir.int<4> : !s8i}> : !ty_Bar}> : !ty_Foo
// CHECK: cir.store %[[#V1]], %[[#V0]] : !ty_Foo, !cir.ptr<!ty_Foo>
// CHECK: %[[#V1:]] = cir.cast(bitcast, %[[#V0]] : !cir.ptr<!ty_Foo>), !cir.ptr<!ty_anon_struct> l
// CHECK: %[[#V2:]] = cir.const #cir.const_struct<{#cir.int<1> : !s32i, #cir.int<2> : !s8i, #cir.const_array<[#cir.zero : !u8i, #cir.zero : !u8i, #cir.zero : !u8i]> : !cir.array<!u8i x 3>, #cir.const_struct<{#cir.int<3> : !s32i, #cir.int<4> : !s8i}> : !ty_Bar}> : !ty_anon_struct
// CHECK: cir.store %[[#V2]], %[[#V1]] : !ty_anon_struct, !cir.ptr<!ty_anon_struct>
}

// Should zero-initialize uninitialized global structs.
Expand Down
Loading