Skip to content

Commit

Permalink
performance: align structures for 64-bit platforms
Browse files Browse the repository at this point in the history
- _tagCLGLBufferInfo 56 -> 48 bytes
- InlineSamplerBaseT 16 -> 12 bytes
- CsrSelectionArgs 88 -> 80 bytes
- EngineInfo 64 -> 48 bytes
- FragmentStorage 48 -> 40 bytes
- SWTagsManager 48 -> 40 bytes
- _tagCLGLResourceInfo 96 -> 80 bytes
- SvmFreeUserData 40 -> 32 bytes
- PayloadArgumentBaseT 40 -> 36 bytes
- ImageDescriptor 72 -> 64 bytes
- PerThreadMemoryBufferBaseT 16 -> 12 bytes
- CopyEngineState 24 -> 16 bytes
- BatchBuffer 120 -> 112 bytes
- DebugMetadata 32 -> 24 bytes
- ImmediateFlushData 48 -> 40 bytes
  • Loading branch information
GermanAizek committed Dec 19, 2024
1 parent c5d541d commit 840c942
Show file tree
Hide file tree
Showing 18 changed files with 59 additions and 58 deletions.
16 changes: 8 additions & 8 deletions opencl/extensions/public/cl_gl_private_intel_structures.h
Original file line number Diff line number Diff line change
Expand Up @@ -12,24 +12,24 @@

// Used for creating CL resources from GL resources
typedef struct _tagCLGLResourceInfo {
GMM_RESOURCE_INFO *pGmmResInfo; /// Pointer to GMMResInfo from GL that will be copied in CL (GL)
GLuint name;
GLenum target;
unsigned int globalShareHandle;
GMM_RESOURCE_INFO *pGmmResInfo; /// Pointer to GMMResInfo from GL that will be copied in CL (GL)
GLenum glFormat;
GLint glInternalFormat;
GLuint glHWFormat;
GLboolean isAuxEnabled;
GLuint borderWidth;
GLint textureBufferWidth;
GLint textureBufferSize;
GLint textureBufferOffset;
GMM_RESOURCE_INFO *pGmmResInfoMCS;
GLvoid *pReleaseData;
GLboolean oglSynchronized;
GLboolean isAuxEnabled;
GMM_STATUS status;
unsigned int globalShareHandleMCS;
GMM_RESOURCE_INFO *pGmmResInfoMCS;
GLint numberOfSamples; // Number of samples as specified by API
GLvoid *pReleaseData;
} CL_GL_RESOURCE_INFO, *PCL_GL_RESOURCE_INFO;

// Used for creating GL resources from CL resources
Expand All @@ -48,15 +48,15 @@ typedef struct _tagGLCLResourceInfo {
} GL_CL_RESOURCE_INFO, *PGL_CL_RESOURCE_INFO;

typedef struct _tagCLGLBufferInfo {
GLenum bufferName;
unsigned int globalShareHandle;
GMM_RESOURCE_INFO *pGmmResInfo; /// Pointer to GMMResInfo from GL that will be copied in CL (GL)
GLvoid *pSysMem;
GLvoid *pReleaseData;
GLenum bufferName;
unsigned int globalShareHandle;
GLint bufferSize;
GLint bufferOffset;
GLboolean oglSynchronized;
GMM_STATUS status;
GLvoid *pReleaseData;
GLboolean oglSynchronized;
GLboolean createOrDestroy;
} CL_GL_BUFFER_INFO, *PCL_GL_BUFFER_INFO;

Expand Down
2 changes: 1 addition & 1 deletion opencl/source/command_queue/copy_engine_state.h
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,8 @@

namespace NEO {
struct CopyEngineState {
aub_stream::EngineType engineType = aub_stream::EngineType::NUM_ENGINES;
TaskCountType taskCount = 0;
aub_stream::EngineType engineType = aub_stream::EngineType::NUM_ENGINES;
bool csrClientRegistered = false;

bool isValid() const {
Expand Down
12 changes: 6 additions & 6 deletions opencl/source/command_queue/csr_selection_args.h
Original file line number Diff line number Diff line change
Expand Up @@ -24,21 +24,21 @@ struct CsrSelectionArgs {
const size_t *imageOrigin = nullptr;
};

cl_command_type cmdType;
const size_t *size = nullptr;
Resource srcResource;
Resource dstResource;
const size_t *size = nullptr;
cl_command_type cmdType;
TransferDirection direction;

CsrSelectionArgs(cl_command_type cmdType, const size_t *size)
: cmdType(cmdType),
size(size),
: size(size),
cmdType(cmdType),
direction(TransferDirection::hostToHost) {}

template <typename ResourceType>
CsrSelectionArgs(cl_command_type cmdType, ResourceType *src, ResourceType *dst, uint32_t rootDeviceIndex, const size_t *size)
: cmdType(cmdType),
size(size) {
: size(size),
cmdType(cmdType) {
if (src) {
processResource(*src, rootDeviceIndex, this->srcResource);
}
Expand Down
6 changes: 3 additions & 3 deletions opencl/source/command_queue/enqueue_svm.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,20 +25,20 @@ using SvmFreeClbT = void(CL_CALLBACK *)(cl_command_queue queue,
void *userData);

struct SvmFreeUserData {
cl_uint numSvmPointers;
void **svmPointers;
SvmFreeClbT clb;
void *userData;
cl_uint numSvmPointers;
bool ownsEventDeletion;

SvmFreeUserData(cl_uint numSvmPointers,
void **svmPointers, SvmFreeClbT clb,
void *userData,
bool ownsEventDeletion)
: numSvmPointers(numSvmPointers),
svmPointers(svmPointers),
: svmPointers(svmPointers),
clb(clb),
userData(userData),
numSvmPointers(numSvmPointers),
ownsEventDeletion(ownsEventDeletion){};
};

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1251,7 +1251,7 @@ HWTEST_TEMPLATED_F(BlitEnqueueTaskCountTests, whenWaitUntilCompletionCalledThenW
uint32_t gpgpuTaskCount = 123;
uint32_t bcsTaskCount = 123;

CopyEngineState bcsState{bcsCsr->getOsContext().getEngineType(), bcsTaskCount};
CopyEngineState bcsState{bcsTaskCount, bcsCsr->getOsContext().getEngineType()};
commandQueue->waitUntilComplete(gpgpuTaskCount, Range{&bcsState}, 0, false);

EXPECT_EQ(gpgpuTaskCount, static_cast<UltCommandStreamReceiver<FamilyType> *>(gpgpuCsr)->latestWaitForCompletionWithTimeoutTaskCount.load());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ HWTEST_F(CommandQueueHwTest, whenCallingIsCompletedThenTestTaskCountValue) {
bcsCsr->setupContext(*osContext);
bcsCsr->initializeTagAllocation();
EngineControl control(bcsCsr.get(), osContext.get());
CopyEngineState state{aub_stream::EngineType::ENGINE_BCS, 1, false};
CopyEngineState state{1, aub_stream::EngineType::ENGINE_BCS, false};

MockCommandQueueHw<FamilyType> cmdQ(context, pClDevice, nullptr);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ HWTEST_F(ClTbxCommandStreamTests, givenTbxCsrWhenDispatchBlitEnqueueThenProcessC
cmdQ.clearBcsEngines();
cmdQ.bcsEngines[0] = &engineControl1;

cmdQ.bcsStates[0] = {aub_stream::ENGINE_BCS, 0, false};
cmdQ.bcsStates[0] = {0, aub_stream::ENGINE_BCS, false};

cl_int error = CL_SUCCESS;
std::unique_ptr<Buffer> buffer(Buffer::create(&context, 0, 1, nullptr, error));
Expand Down
2 changes: 1 addition & 1 deletion opencl/test/unit_test/mem_obj/buffer_bcs_tests.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -619,7 +619,7 @@ HWTEST_TEMPLATED_F(BcsBufferTests, givenAllBcsEnginesReadyWhenWaitingForEventThe
ultCsr2.initializeTagAllocation();
ultCsr2.setupContext(osContext);

CopyEngineState copyEngineState = {aub_stream::EngineType::ENGINE_BCS2, 2, false};
CopyEngineState copyEngineState = {2, aub_stream::EngineType::ENGINE_BCS2, false};
EngineControl engineControl = {&ultCsr2, &osContext};
auto bcs2Index = EngineHelpers::getBcsIndex(aub_stream::EngineType::ENGINE_BCS2);
mockCmdQ->bcsStates[bcs2Index] = copyEngineState;
Expand Down
2 changes: 1 addition & 1 deletion opencl/test/unit_test/mem_obj/image_tests.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1559,7 +1559,7 @@ TEST(ImageConvertDescriptorTest, givenClImageDescWhenConvertedThenCorrectImageDe
}

TEST(ImageConvertDescriptorTest, givenImageDescriptorWhenConvertedThenCorrectClImageDescIsReturned) {
ImageDescriptor desc = {ImageType::image2D, 16, 24, 1, 1, 1024, 2048, 1, 3, false};
ImageDescriptor desc = {16, 24, 1, 1, 1024, 2048, ImageType::image2D, 1, 3, false};
auto clDesc = Image::convertDescriptor(desc);

EXPECT_EQ(clDesc.image_type, static_cast<cl_mem_object_type>(CL_MEM_OBJECT_IMAGE2D));
Expand Down
4 changes: 2 additions & 2 deletions shared/source/command_stream/command_stream_receiver_hw.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,10 +25,10 @@ class CommandStreamReceiverHw : public CommandStreamReceiver {
using STATE_BASE_ADDRESS = typename GfxFamily::STATE_BASE_ADDRESS;

struct ImmediateFlushData {
PipelineSelectArgs pipelineSelectArgs{};
size_t estimatedSize = 0;
void *endPtr = nullptr;
size_t estimatedSize = 0;
size_t csrStartOffset = 0;
PipelineSelectArgs pipelineSelectArgs{};

bool pipelineSelectFullConfigurationNeeded = false;
bool pipelineSelectDirty = false;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -81,12 +81,12 @@ class CommandStreamReceiverSimulatedCommonHw : public CommandStreamReceiverHw<Gf

struct EngineInfo {
void *pLRCA;
uint32_t ggttLRCA;
void *pGlobalHWStatusPage;
uint32_t ggttHWSP;
void *pRingBuffer;
uint32_t ggttRingBuffer;
size_t sizeRingBuffer;
uint32_t ggttLRCA;
uint32_t ggttHWSP;
uint32_t ggttRingBuffer;
uint32_t tailRingBuffer;
} engineInfo = {};

Expand Down
10 changes: 5 additions & 5 deletions shared/source/command_stream/submissions_aggregator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -99,11 +99,11 @@ NEO::BatchBuffer::BatchBuffer(GraphicsAllocation *commandBufferAllocation, size_
size_t usedSize, LinearStream *stream, void *endCmdPtr, uint32_t numCsrClients, bool hasStallingCmds,
bool hasRelaxedOrderingDependencies, bool dispatchMonitorFence, bool taskCountUpdateOnly)
: commandBufferAllocation(commandBufferAllocation), startOffset(startOffset),
chainedBatchBufferStartOffset(chainedBatchBufferStartOffset), taskStartAddress(taskStartAddress), chainedBatchBuffer(chainedBatchBuffer),
lowPriority(lowPriority),
throttle(throttle), sliceCount(sliceCount),
usedSize(usedSize), stream(stream), endCmdPtr(endCmdPtr), numCsrClients(numCsrClients), hasStallingCmds(hasStallingCmds),
hasRelaxedOrderingDependencies(hasRelaxedOrderingDependencies), dispatchMonitorFence(dispatchMonitorFence), taskCountUpdateOnly(taskCountUpdateOnly) {}
chainedBatchBufferStartOffset(chainedBatchBufferStartOffset), taskStartAddress(taskStartAddress), stream(stream), endCmdPtr(endCmdPtr),
numCsrClients(numCsrClients), hasStallingCmds(hasStallingCmds), hasRelaxedOrderingDependencies(hasRelaxedOrderingDependencies),
dispatchMonitorFence(dispatchMonitorFence), taskCountUpdateOnly(taskCountUpdateOnly), lowPriority(lowPriority), throttle(throttle),
chainedBatchBuffer(chainedBatchBuffer), sliceCount(sliceCount),
usedSize(usedSize) {}

NEO::CommandBuffer::CommandBuffer(Device &device) : device(device) {
flushStamp.reset(new FlushStampTracker(false));
Expand Down
17 changes: 9 additions & 8 deletions shared/source/command_stream/submissions_aggregator.h
Original file line number Diff line number Diff line change
Expand Up @@ -46,30 +46,31 @@ struct BatchBuffer {
bool dispatchMonitorFence,
bool taskCountUpdateOnly);
BatchBuffer() {}

PagingFenceSemaphoreInfo pagingFenceSemInfo{};

GraphicsAllocation *commandBufferAllocation = nullptr;
ResidencyContainer *allocationsForResidency = nullptr;
size_t startOffset = 0u;
size_t chainedBatchBufferStartOffset = 0u;
uint64_t taskStartAddress = 0; // if task not available, use CSR stream

GraphicsAllocation *chainedBatchBuffer = nullptr;
bool lowPriority = false;
QueueThrottle throttle = QueueThrottle::MEDIUM;
uint64_t sliceCount = QueueSliceCount::defaultSliceCount;
size_t usedSize = 0u;

// only used in drm csr in gem close worker active mode
LinearStream *stream = nullptr;
void *endCmdPtr = nullptr;
uint32_t numCsrClients = 0;

PagingFenceSemaphoreInfo pagingFenceSemInfo{};

bool hasStallingCmds = false;
bool hasRelaxedOrderingDependencies = false;
bool disableFlatRingBuffer = false;
bool dispatchMonitorFence = false;
bool taskCountUpdateOnly = false;

bool lowPriority = false;
QueueThrottle throttle = QueueThrottle::MEDIUM;
GraphicsAllocation *chainedBatchBuffer = nullptr;
uint64_t sliceCount = QueueSliceCount::defaultSliceCount;
size_t usedSize = 0u;
};

struct CommandBuffer : public IDNode<CommandBuffer> {
Expand Down
6 changes: 3 additions & 3 deletions shared/source/device_binary_format/zebin/zeinfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -634,12 +634,12 @@ inline constexpr BtiValueT btiValue = -1;
} // namespace Defaults

struct PayloadArgumentBaseT {
ArgTypeT argType = argTypeUnknown;
OffsetT offset = Defaults::offset;
SourceOffseT sourceOffset = Defaults::sourceOffset;
SizeT size = 0;
ArgIndexT argIndex = Defaults::argIndex;
BtiValueT btiValue = Defaults::btiValue;
ArgTypeT argType = argTypeUnknown;
AddrmodeT addrmode = memoryAddressingModeUnknown;
AddrspaceT addrspace = addressSpaceUnknown;
AccessTypeT accessType = accessTypeUnknown;
Expand Down Expand Up @@ -692,9 +692,9 @@ inline constexpr Slot slot = 0U;
} // namespace Defaults

struct PerThreadMemoryBufferBaseT {
SizeT size = 0U;
AllocationType allocationType = AllocationTypeUnknown;
MemoryUsageT memoryUsage = MemoryUsageUnknown;
SizeT size = 0U;
IsSimtThreadT isSimtThread = Defaults::isSimtThread;
Slot slot = Defaults::slot;
};
Expand Down Expand Up @@ -732,8 +732,8 @@ inline constexpr NormalizedT normalized = false;

struct InlineSamplerBaseT {
SamplerIndexT samplerIndex = Defaults::samplerIndex;
AddrModeT addrMode = Defaults::addrMode;
FilterModeT filterMode = Defaults::filterMode;
AddrModeT addrMode = Defaults::addrMode;
NormalizedT normalized = Defaults::normalized;
};
} // namespace InlineSamplers
Expand Down
2 changes: 1 addition & 1 deletion shared/source/helpers/surface_format_info.h
Original file line number Diff line number Diff line change
Expand Up @@ -218,13 +218,13 @@ enum class ImageType {
};

struct ImageDescriptor {
ImageType imageType;
size_t imageWidth;
size_t imageHeight;
size_t imageDepth;
size_t imageArraySize;
size_t imageRowPitch;
size_t imageSlicePitch;
ImageType imageType;
uint32_t numMipLevels;
uint32_t numSamples;
bool fromParent;
Expand Down
2 changes: 1 addition & 1 deletion shared/source/memory_manager/host_ptr_defines.h
Original file line number Diff line number Diff line change
Expand Up @@ -51,9 +51,9 @@ struct AllocationRequirements {
struct FragmentStorage {
const void *fragmentCpuPointer = nullptr;
size_t fragmentSize = 0;
int refCount = 0;
OsHandle *osInternalStorage = nullptr;
ResidencyData *residency = nullptr;
int refCount = 0;
bool driverAllocation = false;
};

Expand Down
2 changes: 1 addition & 1 deletion shared/source/os_interface/linux/xe/ioctl_helper_xe.h
Original file line number Diff line number Diff line change
Expand Up @@ -190,9 +190,9 @@ class IoctlHelperXe : public IoctlHelper {

std::unique_ptr<XeDrm::drm_xe_engine_class_instance> defaultEngine;
struct DebugMetadata {
DrmResourceClass type;
uint64_t offset;
uint64_t size;
DrmResourceClass type;
bool isCookie;
};

Expand Down
22 changes: 11 additions & 11 deletions shared/source/utilities/software_tags_manager.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,17 @@ class GraphicsAllocation;
class LinearStream;

class SWTagsManager {
protected:
void allocateBXMLHeap(Device &device);
void allocateSWTagHeap(Device &device);

MemoryManager *memoryManager{};
GraphicsAllocation *tagHeap = nullptr;
GraphicsAllocation *bxmlHeap = nullptr;
unsigned int currentHeapOffset = 0;
unsigned int currentTagCount = 0;
bool initialized = false;

public:
SWTagsManager() = default;

Expand All @@ -43,17 +54,6 @@ class SWTagsManager {
static const unsigned int maxTagHeapSize = 16384;
unsigned int currentCallCount = 0;
unsigned int getCurrentHeapOffset() { return currentHeapOffset; }

protected:
void allocateBXMLHeap(Device &device);
void allocateSWTagHeap(Device &device);

MemoryManager *memoryManager{};
GraphicsAllocation *tagHeap = nullptr;
GraphicsAllocation *bxmlHeap = nullptr;
unsigned int currentHeapOffset = 0;
unsigned int currentTagCount = 0;
bool initialized = false;
};

template <typename GfxFamily>
Expand Down

0 comments on commit 840c942

Please sign in to comment.