Skip to content
This repository has been archived by the owner on Jan 13, 2025. It is now read-only.

Update compiler to support icpx and latest nightly #512

Merged
merged 13 commits into from
May 3, 2024
13 changes: 11 additions & 2 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,7 @@ set(PORTBLAS_GENERATED_SRC ${CMAKE_CURRENT_BINARY_DIR}/generated_src)
set(PORTBLAS_INCLUDE $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include>
$<INSTALL_INTERFACE:include>)
set(PORTBLAS_COMMON_INCLUDE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/common/include)
set(PORTBLAS_SRC ${CMAKE_CURRENT_SOURCE_DIR}/src)
set(PORTBLAS_SRC $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/src> $<INSTALL_INTERFACE:src>)
set(PORTBLAS_SRC_GENERATOR ${CMAKE_CURRENT_SOURCE_DIR}/python_generator)
list(APPEND THIRD_PARTIES_INCLUDE ${CBLAS_INCLUDE})

Expand Down Expand Up @@ -142,8 +142,17 @@ include(CmakeFunctionHelper)
if (INSTALL_HEADER_ONLY)
add_library(portblas INTERFACE)
set_target_properties(portblas PROPERTIES
INTERFACE_INCLUDE_DIRECTORIES "${PORTBLAS_INCLUDE};$<INSTALL_INTERFACE:src>"
INTERFACE_INCLUDE_DIRECTORIES "${PORTBLAS_INCLUDE};${PORTBLAS_SRC}"
)
target_compile_definitions(portblas INTERFACE "SB_ENABLE_USM")
Rbiessy marked this conversation as resolved.
Show resolved Hide resolved
target_compile_definitions(portblas INTERFACE "BLAS_ENABLE_COMPLEX=${BLAS_ENABLE_COMPLEX}")
Rbiessy marked this conversation as resolved.
Show resolved Hide resolved
target_compile_definitions(portblas INTERFACE ${TUNING_TARGET})
if((${CMAKE_CXX_COMPILER_ID} STREQUAL "IntelLLVM") AND NOT
(${TUNING_TARGET} STREQUAL "INTEL_GPU") )
target_compile_options(portblas INTERFACE -fno-fast-math)
target_compile_options(portblas INTERFACE -mllvm -loopopt=0 )
Rbiessy marked this conversation as resolved.
Show resolved Hide resolved
pgorlani marked this conversation as resolved.
Show resolved Hide resolved
message(STATUS "Adding -fno-fast-math -mllvm -loopopt=0 to portblas")
endif()
else()
add_subdirectory(src)
build_library(portblas ${BLAS_ENABLE_EXTENSIONS})
Expand Down
10 changes: 8 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -401,8 +401,7 @@ to clone submodule(s).

### Compile with DPC++
```bash
export CC=[path/to/intel/clang]
export CXX=[path/to/intel/clang++]
export CXX=[path/to/intel/icpx]
cd build
cmake -GNinja ../ -DSYCL_COMPILER=dpcpp
ninja
Expand All @@ -417,6 +416,13 @@ advisable for NVIDIA and **mandatory for AMD** to provide the specific device
architecture through `-DDPCPP_SYCL_ARCH=<arch>`, e.g., `<arch>` can be `sm_80`
for NVIDIA or `gfx908` for AMD.

#### DPC++ Compiler Support

As DPCPP SYCL compiler the project is fully compatible with `icpx` provided by
intel [oneAPI base-toolkit](https://www.intel.com/content/www/us/en/developer/tools/oneapi/base-toolkit.html#gs.7t6x52)
which is the suggested one. PortBLAS can be compiled also with the [open source intel/llvm](https://github.com/intel/llvm)
compiler, but not all the latest changes are tested.

### Compile with AdaptiveCpp *(Formerly hipSYCL)*
The following instructions concern the **generic** *(clang-based)* flow supported
by AdaptiveCpp.
Expand Down
6 changes: 6 additions & 0 deletions cmake/Modules/FindDPCPP.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,12 @@ function(add_sycl_to_target)
"${multi_value_args}"
${ARGN}
)
if((${CMAKE_CXX_COMPILER_ID} STREQUAL "IntelLLVM") AND NOT
(${TUNING_TARGET} STREQUAL "INTEL_GPU") )
target_compile_options(${SB_ADD_SYCL_TARGET} PRIVATE -fno-fast-math)
Rbiessy marked this conversation as resolved.
Show resolved Hide resolved
target_compile_options(${SB_ADD_SYCL_TARGET} PRIVATE -mllvm -loopopt=0 )
Rbiessy marked this conversation as resolved.
Show resolved Hide resolved
message(STATUS "Adding -fno-fast-math -mllvm -loopopt=0 to target ${SB_ADD_SYCL_TARGET}")
Rbiessy marked this conversation as resolved.
Show resolved Hide resolved
endif()
target_compile_options(${SB_ADD_SYCL_TARGET} PUBLIC ${DPCPP_FLAGS})
get_target_property(target_type ${SB_ADD_SYCL_TARGET} TYPE)
if (NOT target_type STREQUAL "OBJECT_LIBRARY")
Expand Down
4 changes: 2 additions & 2 deletions src/operations/blas1/IndexMaxMin.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -98,8 +98,8 @@ PORTBLAS_INLINE void IndexMaxMin<is_max, is_step0, lhs_t, rhs_t>::eval(
// reduction within the sub_group
for (index_t i = sg_local_range >> 1; i > 0; i >>= 1) {
if (sg_local_id < i) {
element_t shfl_val = sg.shuffle_down(val.get_value(), i);
index_t shfl_idx = sg.shuffle_down(val.get_index(), i);
element_t shfl_val = sycl::shift_group_left(sg, val.get_value(), i);
index_t shfl_idx = sycl::shift_group_left(sg, val.get_index(), i);
value_t shfl{shfl_idx, shfl_val};
val = op::eval(val, shfl);
}
Expand Down
10 changes: 1 addition & 9 deletions test/unittest/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -101,13 +101,6 @@ if(is_dpcpp)
)
endif()


# Contains tests that fail if compiled with -ffast-math
set(SYCL_UNITTEST_NOFASTMATH
${PORTBLAS_UNITTEST}/blas1/blas1_rotg_test.cpp
${PORTBLAS_UNITTEST}/blas1/blas1_rotmg_test.cpp
)

if(GEMM_TALL_SKINNY_SUPPORT)
list(APPEND SYCL_UNITTEST_SRCS ${PORTBLAS_UNITTEST}/blas3/blas3_gemm_tall_skinny_test.cpp)
endif()
Expand Down Expand Up @@ -142,8 +135,7 @@ foreach(blas_test ${SYCL_UNITTEST_SRCS})
target_link_libraries(${test_exec} PRIVATE gtest_main Clara::Clara blas::blas portblas)
target_include_directories(${test_exec} PRIVATE ${CBLAS_INCLUDE} ${PORTBLAS_COMMON_INCLUDE_DIR})

list (FIND SYCL_UNITTEST_NOFASTMATH ${blas_test} _index)
if (${_index} GREATER -1)
if (${CMAKE_CXX_COMPILER_ID} STREQUAL "IntelLLVM")
pgorlani marked this conversation as resolved.
Show resolved Hide resolved
target_compile_options(${test_exec} PRIVATE "-fno-fast-math")
endif()

Expand Down
Loading