Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Align structures for 64-bit platforms (optimization move and copy object cost time) #786

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions opencl/source/command_queue/copy_engine_state.h
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (C) 2021-2023 Intel Corporation
* Copyright (C) 2021-2025 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
Expand All @@ -13,8 +13,8 @@

namespace NEO {
struct CopyEngineState {
aub_stream::EngineType engineType = aub_stream::EngineType::NUM_ENGINES;
TaskCountType taskCount = 0;
aub_stream::EngineType engineType = aub_stream::EngineType::NUM_ENGINES;
bool csrClientRegistered = false;

bool isValid() const {
Expand Down
14 changes: 7 additions & 7 deletions opencl/source/command_queue/csr_selection_args.h
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (C) 2021-2023 Intel Corporation
* Copyright (C) 2021-2025 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
Expand All @@ -24,21 +24,21 @@ struct CsrSelectionArgs {
const size_t *imageOrigin = nullptr;
};

cl_command_type cmdType;
const size_t *size = nullptr;
Resource srcResource;
Resource dstResource;
const size_t *size = nullptr;
cl_command_type cmdType;
TransferDirection direction;

CsrSelectionArgs(cl_command_type cmdType, const size_t *size)
: cmdType(cmdType),
size(size),
: size(size),
cmdType(cmdType),
direction(TransferDirection::hostToHost) {}

template <typename ResourceType>
CsrSelectionArgs(cl_command_type cmdType, ResourceType *src, ResourceType *dst, uint32_t rootDeviceIndex, const size_t *size)
: cmdType(cmdType),
size(size) {
: size(size),
cmdType(cmdType) {
if (src) {
processResource(*src, rootDeviceIndex, this->srcResource);
}
Expand Down
8 changes: 4 additions & 4 deletions opencl/source/command_queue/enqueue_svm.h
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (C) 2018-2024 Intel Corporation
* Copyright (C) 2018-2025 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
Expand All @@ -25,20 +25,20 @@ using SvmFreeClbT = void(CL_CALLBACK *)(cl_command_queue queue,
void *userData);

struct SvmFreeUserData {
cl_uint numSvmPointers;
void **svmPointers;
SvmFreeClbT clb;
void *userData;
cl_uint numSvmPointers;
bool ownsEventDeletion;

SvmFreeUserData(cl_uint numSvmPointers,
void **svmPointers, SvmFreeClbT clb,
void *userData,
bool ownsEventDeletion)
: numSvmPointers(numSvmPointers),
svmPointers(svmPointers),
: svmPointers(svmPointers),
clb(clb),
userData(userData),
numSvmPointers(numSvmPointers),
ownsEventDeletion(ownsEventDeletion){};
};

Expand Down
4 changes: 2 additions & 2 deletions opencl/test/unit_test/command_queue/blit_enqueue_1_tests.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (C) 2019-2024 Intel Corporation
* Copyright (C) 2019-2025 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
Expand Down Expand Up @@ -1251,7 +1251,7 @@ HWTEST_TEMPLATED_F(BlitEnqueueTaskCountTests, whenWaitUntilCompletionCalledThenW
uint32_t gpgpuTaskCount = 123;
uint32_t bcsTaskCount = 123;

CopyEngineState bcsState{bcsCsr->getOsContext().getEngineType(), bcsTaskCount};
CopyEngineState bcsState{bcsTaskCount, bcsCsr->getOsContext().getEngineType()};
commandQueue->waitUntilComplete(gpgpuTaskCount, Range{&bcsState}, 0, false);

EXPECT_EQ(gpgpuTaskCount, static_cast<UltCommandStreamReceiver<FamilyType> *>(gpgpuCsr)->latestWaitForCompletionWithTimeoutTaskCount.load());
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (C) 2018-2024 Intel Corporation
* Copyright (C) 2018-2025 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
Expand Down Expand Up @@ -55,7 +55,7 @@ HWTEST_F(CommandQueueHwTest, whenCallingIsCompletedThenTestTaskCountValue) {
bcsCsr->setupContext(*osContext);
bcsCsr->initializeTagAllocation();
EngineControl control(bcsCsr.get(), osContext.get());
CopyEngineState state{aub_stream::EngineType::ENGINE_BCS, 1, false};
CopyEngineState state{1, aub_stream::EngineType::ENGINE_BCS, false};

MockCommandQueueHw<FamilyType> cmdQ(context, pClDevice, nullptr);

Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (C) 2021-2023 Intel Corporation
* Copyright (C) 2021-2025 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
Expand Down Expand Up @@ -47,7 +47,7 @@ HWTEST_F(ClTbxCommandStreamTests, givenTbxCsrWhenDispatchBlitEnqueueThenProcessC
cmdQ.clearBcsEngines();
cmdQ.bcsEngines[0] = &engineControl1;

cmdQ.bcsStates[0] = {aub_stream::ENGINE_BCS, 0, false};
cmdQ.bcsStates[0] = {0, aub_stream::ENGINE_BCS, false};

cl_int error = CL_SUCCESS;
std::unique_ptr<Buffer> buffer(Buffer::create(&context, 0, 1, nullptr, error));
Expand Down
4 changes: 2 additions & 2 deletions opencl/test/unit_test/mem_obj/buffer_bcs_tests.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (C) 2020-2024 Intel Corporation
* Copyright (C) 2020-2025 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
Expand Down Expand Up @@ -619,7 +619,7 @@ HWTEST_TEMPLATED_F(BcsBufferTests, givenAllBcsEnginesReadyWhenWaitingForEventThe
ultCsr2.initializeTagAllocation();
ultCsr2.setupContext(osContext);

CopyEngineState copyEngineState = {aub_stream::EngineType::ENGINE_BCS2, 2, false};
CopyEngineState copyEngineState = {2, aub_stream::EngineType::ENGINE_BCS2, false};
EngineControl engineControl = {&ultCsr2, &osContext};
auto bcs2Index = EngineHelpers::getBcsIndex(aub_stream::EngineType::ENGINE_BCS2);
mockCmdQ->bcsStates[bcs2Index] = copyEngineState;
Expand Down
4 changes: 2 additions & 2 deletions opencl/test/unit_test/mem_obj/image_tests.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (C) 2018-2024 Intel Corporation
* Copyright (C) 2018-2025 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
Expand Down Expand Up @@ -1559,7 +1559,7 @@ TEST(ImageConvertDescriptorTest, givenClImageDescWhenConvertedThenCorrectImageDe
}

TEST(ImageConvertDescriptorTest, givenImageDescriptorWhenConvertedThenCorrectClImageDescIsReturned) {
ImageDescriptor desc = {ImageType::image2D, 16, 24, 1, 1, 1024, 2048, 1, 3, false};
ImageDescriptor desc = {16, 24, 1, 1, 1024, 2048, ImageType::image2D, 1, 3, false};
auto clDesc = Image::convertDescriptor(desc);

EXPECT_EQ(clDesc.image_type, static_cast<cl_mem_object_type>(CL_MEM_OBJECT_IMAGE2D));
Expand Down
6 changes: 3 additions & 3 deletions shared/source/command_stream/command_stream_receiver_hw.h
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (C) 2018-2024 Intel Corporation
* Copyright (C) 2018-2025 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
Expand All @@ -25,10 +25,10 @@ class CommandStreamReceiverHw : public CommandStreamReceiver {
using STATE_BASE_ADDRESS = typename GfxFamily::STATE_BASE_ADDRESS;

struct ImmediateFlushData {
PipelineSelectArgs pipelineSelectArgs{};
size_t estimatedSize = 0;
void *endPtr = nullptr;
size_t estimatedSize = 0;
size_t csrStartOffset = 0;
PipelineSelectArgs pipelineSelectArgs{};

bool pipelineSelectFullConfigurationNeeded = false;
bool pipelineSelectDirty = false;
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (C) 2018-2024 Intel Corporation
* Copyright (C) 2018-2025 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
Expand Down Expand Up @@ -81,12 +81,12 @@ class CommandStreamReceiverSimulatedCommonHw : public CommandStreamReceiverHw<Gf

struct EngineInfo {
void *pLRCA;
uint32_t ggttLRCA;
void *pGlobalHWStatusPage;
uint32_t ggttHWSP;
void *pRingBuffer;
uint32_t ggttRingBuffer;
size_t sizeRingBuffer;
uint32_t ggttLRCA;
uint32_t ggttHWSP;
uint32_t ggttRingBuffer;
uint32_t tailRingBuffer;
} engineInfo = {};

Expand Down
12 changes: 6 additions & 6 deletions shared/source/command_stream/submissions_aggregator.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (C) 2018-2024 Intel Corporation
* Copyright (C) 2018-2025 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
Expand Down Expand Up @@ -99,11 +99,11 @@ NEO::BatchBuffer::BatchBuffer(GraphicsAllocation *commandBufferAllocation, size_
size_t usedSize, LinearStream *stream, void *endCmdPtr, uint32_t numCsrClients, bool hasStallingCmds,
bool hasRelaxedOrderingDependencies, bool dispatchMonitorFence, bool taskCountUpdateOnly)
: commandBufferAllocation(commandBufferAllocation), startOffset(startOffset),
chainedBatchBufferStartOffset(chainedBatchBufferStartOffset), taskStartAddress(taskStartAddress), chainedBatchBuffer(chainedBatchBuffer),
lowPriority(lowPriority),
throttle(throttle), sliceCount(sliceCount),
usedSize(usedSize), stream(stream), endCmdPtr(endCmdPtr), numCsrClients(numCsrClients), hasStallingCmds(hasStallingCmds),
hasRelaxedOrderingDependencies(hasRelaxedOrderingDependencies), dispatchMonitorFence(dispatchMonitorFence), taskCountUpdateOnly(taskCountUpdateOnly) {}
chainedBatchBufferStartOffset(chainedBatchBufferStartOffset), taskStartAddress(taskStartAddress), stream(stream), endCmdPtr(endCmdPtr),
numCsrClients(numCsrClients), hasStallingCmds(hasStallingCmds), hasRelaxedOrderingDependencies(hasRelaxedOrderingDependencies),
dispatchMonitorFence(dispatchMonitorFence), taskCountUpdateOnly(taskCountUpdateOnly), lowPriority(lowPriority), throttle(throttle),
chainedBatchBuffer(chainedBatchBuffer), sliceCount(sliceCount),
usedSize(usedSize) {}

NEO::CommandBuffer::CommandBuffer(Device &device) : device(device) {
flushStamp.reset(new FlushStampTracker(false));
Expand Down
19 changes: 10 additions & 9 deletions shared/source/command_stream/submissions_aggregator.h
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (C) 2018-2024 Intel Corporation
* Copyright (C) 2018-2025 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
Expand Down Expand Up @@ -46,30 +46,31 @@ struct BatchBuffer {
bool dispatchMonitorFence,
bool taskCountUpdateOnly);
BatchBuffer() {}

PagingFenceSemaphoreInfo pagingFenceSemInfo{};

GraphicsAllocation *commandBufferAllocation = nullptr;
ResidencyContainer *allocationsForResidency = nullptr;
size_t startOffset = 0u;
size_t chainedBatchBufferStartOffset = 0u;
uint64_t taskStartAddress = 0; // if task not available, use CSR stream

GraphicsAllocation *chainedBatchBuffer = nullptr;
bool lowPriority = false;
QueueThrottle throttle = QueueThrottle::MEDIUM;
uint64_t sliceCount = QueueSliceCount::defaultSliceCount;
size_t usedSize = 0u;

// only used in drm csr in gem close worker active mode
LinearStream *stream = nullptr;
void *endCmdPtr = nullptr;
uint32_t numCsrClients = 0;

PagingFenceSemaphoreInfo pagingFenceSemInfo{};

bool hasStallingCmds = false;
bool hasRelaxedOrderingDependencies = false;
bool disableFlatRingBuffer = false;
bool dispatchMonitorFence = false;
bool taskCountUpdateOnly = false;

bool lowPriority = false;
QueueThrottle throttle = QueueThrottle::MEDIUM;
GraphicsAllocation *chainedBatchBuffer = nullptr;
uint64_t sliceCount = QueueSliceCount::defaultSliceCount;
size_t usedSize = 0u;
};

struct CommandBuffer : public IDNode<CommandBuffer> {
Expand Down
8 changes: 4 additions & 4 deletions shared/source/device_binary_format/zebin/zeinfo.h
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (C) 2023-2024 Intel Corporation
* Copyright (C) 2023-2025 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
Expand Down Expand Up @@ -634,12 +634,12 @@ inline constexpr BtiValueT btiValue = -1;
} // namespace Defaults

struct PayloadArgumentBaseT {
ArgTypeT argType = argTypeUnknown;
OffsetT offset = Defaults::offset;
SourceOffseT sourceOffset = Defaults::sourceOffset;
SizeT size = 0;
ArgIndexT argIndex = Defaults::argIndex;
BtiValueT btiValue = Defaults::btiValue;
ArgTypeT argType = argTypeUnknown;
AddrmodeT addrmode = memoryAddressingModeUnknown;
AddrspaceT addrspace = addressSpaceUnknown;
AccessTypeT accessType = accessTypeUnknown;
Expand Down Expand Up @@ -692,9 +692,9 @@ inline constexpr Slot slot = 0U;
} // namespace Defaults

struct PerThreadMemoryBufferBaseT {
SizeT size = 0U;
AllocationType allocationType = AllocationTypeUnknown;
MemoryUsageT memoryUsage = MemoryUsageUnknown;
SizeT size = 0U;
IsSimtThreadT isSimtThread = Defaults::isSimtThread;
Slot slot = Defaults::slot;
};
Expand Down Expand Up @@ -732,8 +732,8 @@ inline constexpr NormalizedT normalized = false;

struct InlineSamplerBaseT {
SamplerIndexT samplerIndex = Defaults::samplerIndex;
AddrModeT addrMode = Defaults::addrMode;
FilterModeT filterMode = Defaults::filterMode;
AddrModeT addrMode = Defaults::addrMode;
NormalizedT normalized = Defaults::normalized;
};
} // namespace InlineSamplers
Expand Down
4 changes: 2 additions & 2 deletions shared/source/helpers/surface_format_info.h
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (C) 2020-2024 Intel Corporation
* Copyright (C) 2020-2025 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
Expand Down Expand Up @@ -218,13 +218,13 @@ enum class ImageType {
};

struct ImageDescriptor {
ImageType imageType;
size_t imageWidth;
size_t imageHeight;
size_t imageDepth;
size_t imageArraySize;
size_t imageRowPitch;
size_t imageSlicePitch;
ImageType imageType;
uint32_t numMipLevels;
uint32_t numSamples;
bool fromParent;
Expand Down
4 changes: 2 additions & 2 deletions shared/source/memory_manager/host_ptr_defines.h
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (C) 2018-2023 Intel Corporation
* Copyright (C) 2018-2025 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
Expand Down Expand Up @@ -51,9 +51,9 @@ struct AllocationRequirements {
struct FragmentStorage {
const void *fragmentCpuPointer = nullptr;
size_t fragmentSize = 0;
int refCount = 0;
OsHandle *osInternalStorage = nullptr;
ResidencyData *residency = nullptr;
int refCount = 0;
bool driverAllocation = false;
};

Expand Down
4 changes: 2 additions & 2 deletions shared/source/os_interface/linux/xe/ioctl_helper_xe.h
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (C) 2023-2024 Intel Corporation
* Copyright (C) 2023-2025 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
Expand Down Expand Up @@ -190,9 +190,9 @@ class IoctlHelperXe : public IoctlHelper {

std::unique_ptr<XeDrm::drm_xe_engine_class_instance> defaultEngine;
struct DebugMetadata {
DrmResourceClass type;
uint64_t offset;
uint64_t size;
DrmResourceClass type;
bool isCookie;
};

Expand Down