Skip to content

Commit

Permalink
adding the handout url
Browse files Browse the repository at this point in the history
  • Loading branch information
Nour A Abouelnadar committed Dec 12, 2021
1 parent ebe6089 commit cc72fe8
Show file tree
Hide file tree
Showing 12 changed files with 155 additions and 21 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ These are the test files.

### 5 Handout

![alt text](https://github.com/ecrc/tlrmvm-dev/blob/master/doxygen/handsout.png)
![alt text](https://github.com/ecrc/tlrmvm/blob/master/doxygen/handsout.png)

If you have any troubles, please create an issue or
send email to yuxi.hong@kaust.edu.sa / hatem.ltaief@kaust.edu.sa.
4 changes: 4 additions & 0 deletions test/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -12,13 +12,17 @@ install(TARGETS ex1basic_complexfloat DESTINATION test)
if(USE_MPI)
WrapBinary(ex2mpitlrmvm_float cpp)
WrapBinary(ex2mpitlrmvm_complexfloat cpp)
WrapBinary(ex3_gendata cpp)
install(TARGETS ex3_gendata DESTINATION test)
install(TARGETS ex2mpitlrmvm_float DESTINATION test)
install(TARGETS ex2mpitlrmvm_complexfloat DESTINATION test)
install(FILES
${CMAKE_CURRENT_LIST_DIR}/cpp/runexp2AMD.sh
${CMAKE_CURRENT_LIST_DIR}/cpp/runexp2AMD_synthetic.sh
${CMAKE_CURRENT_LIST_DIR}/cpp/runexp2ICX.sh
${CMAKE_CURRENT_LIST_DIR}/cpp/runexp2NEC.sh
${CMAKE_CURRENT_LIST_DIR}/cpp/runexp2A64FX.sh
${CMAKE_CURRENT_LIST_DIR}/cpp/runexp3_gendata.sh
DESTINATION test
PERMISSIONS OWNER_READ OWNER_EXECUTE GROUP_READ GROUP_EXECUTE)
endif() # USE MPI
Expand Down
7 changes: 5 additions & 2 deletions test/cpp/ex2mpitlrmvm_complexfloat.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,10 +26,12 @@ int main (int argc, char ** argv){
vector<double> bandstat;
double bytesprocessed;
size_t granksum;
int loopsize;
auto argparser = ArgsParser(argc, argv);
originM = argparser.getint("M");
originN = argparser.getint("N");
nb = argparser.getint("nb");
loopsize = argparser.getint("loopsize");
acc = argparser.getstring("errorthreshold");
problemname = argparser.getstring("problemname");
datafolder = argparser.getstring("datafolder");
Expand All @@ -55,7 +57,7 @@ int main (int argc, char ** argv){
double bytes = TLRMVMBytesProcessed<complex<float>>(tlrmvmptr.granksum,
tlrmvmptr.nb, tlrmvmptr.paddingM, tlrmvmptr.paddingN);
tlrmvmptr.MemoryInit();
for(int i=0; i<10000; i++){
for(int i=0; i<loopsize; i++){
MPI_Barrier(MPI_COMM_WORLD);
auto start = std::chrono::steady_clock::now();
tlrmvmptr.MVM();
Expand All @@ -78,12 +80,13 @@ int main (int argc, char ** argv){
auto hyu = Matrix<complex<float>>(tlrmvmptr.h_yu, tlrmvmptr.workmatgranksum, 1);
// cout << " Phase 2 Correctness : " << hyu.allclose(yu_pc) << endl;
Matrix<complex<float>> y_pc = seismicpcmat.Phase3();
auto hy = Matrix<complex<float>>(tlrmvmptr.h_yout, tlrmvmptr.paddingM, 1);
auto hy = Matrix<complex<float>>(tlrmvmptr.h_yout, tlrmvmptr.originM, 1);
cout << " Check MPI Phase 3 Correctness : "<< hy.allclose(y_pc) << endl;
std::sort(mergetime.begin(), mergetime.end());
int N = mergetime.size();
cout << "median " << mergetime[N / 2] * 1e6 << " us."<< endl;
double bytes = TLRMVMBytesProcessed<complex<float>>(tlrmvmptr.granksum, tlrmvmptr.nb, originM, originN);
cout << "U and V bases size: " << bytes * 1e-6 << " MB." << endl;
cout << "Bandwidth " << bytes / mergetime[N/2] * 1e-9 << " GB/s" << endl;
}
tlrmvmptr.MemoryFree();
Expand Down
9 changes: 6 additions & 3 deletions test/cpp/ex2mpitlrmvm_float.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ int main (int argc, char ** argv){
int originM;
int originN;
int nb;
int loopsize;
string acc;
string datafolder;
string problemname;
Expand All @@ -30,6 +31,7 @@ int main (int argc, char ** argv){
originM = argparser.getint("M");
originN = argparser.getint("N");
nb = argparser.getint("nb");
loopsize = argparser.getint("loopsize");
acc = argparser.getstring("errorthreshold");
problemname = argparser.getstring("problemname");
datafolder = argparser.getstring("datafolder");
Expand All @@ -46,7 +48,7 @@ int main (int argc, char ** argv){
maskmat.Fill(0);
for(int i=0; i<tlrmvmconfig.Mtg; i++){
for(int j=0; j<tlrmvmconfig.Ntg; j++){
if (j % size == rank )
if (j % size == rank)
maskmat.SetElem(i,j,1);
}
}
Expand All @@ -55,7 +57,7 @@ int main (int argc, char ** argv){
double bytes = TLRMVMBytesProcessed<float>(tlrmvmptr.granksum,
tlrmvmptr.nb, tlrmvmptr.paddingM, tlrmvmptr.paddingN);
tlrmvmptr.MemoryInit();
for(int i=0; i<10000; i++){
for(int i=0; i<loopsize; i++){
MPI_Barrier(MPI_COMM_WORLD);
auto start = std::chrono::steady_clock::now();
tlrmvmptr.MVM();
Expand All @@ -78,12 +80,13 @@ int main (int argc, char ** argv){
auto hyu = Matrix<float>(tlrmvmptr.h_yu, tlrmvmptr.workmatgranksum, 1);
// cout << " Phase 2 Correctness : " << hyu.allclose(yu_pc) << endl;
Matrix<float> y_pc = seismicpcmat.Phase3();
auto hy = Matrix<float>(tlrmvmptr.h_yout, tlrmvmptr.paddingM, 1);
auto hy = Matrix<float>(tlrmvmptr.h_yout, tlrmvmptr.originM, 1);
cout << " Check MPI Phase 3 Correctness : "<< hy.allclose(y_pc) << endl;
std::sort(mergetime.begin(), mergetime.end());
int N = mergetime.size();
cout << "median " << mergetime[N / 2] * 1e6 << " us."<< endl;
double bytes = TLRMVMBytesProcessed<float>(tlrmvmptr.granksum, tlrmvmptr.nb, originM, originN);
cout << "U and V bases size: " << bytes * 1e-6 << " MB." << endl;
cout << "Bandwidth " << bytes / mergetime[N/2] * 1e-9 << " GB/s" << endl;
}
tlrmvmptr.MemoryFree();
Expand Down
99 changes: 99 additions & 0 deletions test/cpp/ex3_gendata.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
#include <string>
#include <vector>
#include <chrono>

#include <algorithm>
#include <mpi.h>
// include common component
#include <common/Common.h>
using namespace tlrmat;

// include tlrmvm component
#include <tlrmvm/Tlrmvm.h>
using namespace tlrmvm;
using namespace std;

// This App is used to generate synthetic dataset as input of tlrmvm.
// The rank is constant.

struct Params{
int originM;
int originN;
int nb;
int constrank;
string acc;
string datafolder;
string problemname;
string rankfile;
string Ufile;
string Vfile;
string dtype;
Params(){}
};



template<typename T>
void generate_data(Params & pm){
char rpath[100];
sprintf(rpath, "%s/%s_Rmat_nb%d_acc%s.bin", pm.datafolder.c_str(), pm.problemname.c_str(), pm.nb, pm.acc.c_str());
int nb = pm.nb;
int mtiles = pm.originM / nb;
if(pm.originM % nb != 0) mtiles++;
int ntiles = pm.originN / nb;
if(pm.originN % nb != 0) ntiles++;
int paddingM = mtiles * nb;
int paddingN = ntiles * nb;
int grank = mtiles * ntiles * pm.constrank;
T* uvec = new T[grank * nb];
T* vvec = new T[grank * nb];
T* xvec = new T[paddingN];
for(int i=0; i<grank*nb; i++) uvec[i] = (T)1.0;
for(int i=0; i<grank*nb; i++) vvec[i] = (T)1.0;
for(int i=0; i<paddingN; i++) xvec[i] = (T)1.0;
int * rvec = new int[mtiles * ntiles];
for(int i=0; i<mtiles * ntiles; i++) rvec[i] = pm.constrank;
char upath[100];
sprintf(upath, "%s/%s_Ubases_nb%d_acc%s.bin", pm.datafolder.c_str(),
pm.problemname.c_str(), pm.nb, pm.acc.c_str());
char vpath[100];
sprintf(vpath, "%s/%s_Vbases_nb%d_acc%s.bin", pm.datafolder.c_str(),
pm.problemname.c_str(), pm.nb, pm.acc.c_str());
char xpath[100];
sprintf(xpath, "%s/%s_x.bin", pm.datafolder.c_str(),
pm.problemname.c_str());
auto umat = Matrix<T>(uvec, grank, nb);
umat.Tofile(upath);
auto vmat = Matrix<T>(vvec, grank, nb);
vmat.Tofile(vpath);
auto rmat = Matrix<int>(rvec, mtiles, ntiles);
rmat.Tofile(rpath);
auto xmat = Matrix<T>(xvec, paddingN, 1);
xmat.Tofile(xpath);
delete[] rvec;
delete[] uvec;
delete[] vvec;
}

int main(int argc, char** argv){
Params pm = Params();
vector<double> timestat;
vector<double> bandstat;
double bytesprocessed;
size_t granksum;
auto argparser = ArgsParser(argc, argv);
pm.originM = argparser.getint("M");
pm.originN = argparser.getint("N");
pm.nb = argparser.getint("nb");
pm.acc = argparser.getstring("errorthreshold");
pm.problemname = argparser.getstring("problemname");
pm.datafolder = argparser.getstring("datafolder");
pm.constrank = argparser.getint("constrank");
pm.dtype = argparser.getstring("dtype");
if(pm.dtype == "float"){
generate_data<float>(pm);
}else if(pm.dtype == "complexfloat"){
generate_data<complex<float>>(pm);
}
return 0;
}
8 changes: 5 additions & 3 deletions test/cpp/runexp1.sh
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
./install/test/ex1basic_float --M=4802 --N=19078 \
--errorthreshold=0.0001 --problemname=mavis_000_R \
--datafolder=$WORK_ROOT --nb=256

./install/test/ex1basic_complexfloat --M=9801 --N=9801 \
--errorthreshold=0.001 --problemname=SeismicFreq100 \
--datafolder=$WORK_ROOT --nb=256

./install/test/ex1basic_float --M=4802 --N=19078 \
--errorthreshold=0.0001 --problemname=mavis_000_R \
--datafolder=$WORK_ROOT --nb=256

14 changes: 8 additions & 6 deletions test/cpp/runexp2A64FX.sh
Original file line number Diff line number Diff line change
@@ -1,14 +1,16 @@
# for NEC, for some reason the proof of concept matrix is running very slow,
# one can try to by pass the logic so that you can get speed up.

OMP_NUM_THREADS=8 mpirun -ve 0-1 -np 2 \
./install/test/ex2mpitlrmvm_float --M=4802 --N=19078 \
--errorthreshold=0.0001 --problemname=mavis_000_R \
--datafolder=$WORK_ROOT --nb=256 --loopsize=5000
#135 us 1138.03 GB/s

OMP_NUM_THREADS=8 mpirun -ve 0-1 -np 2 \
./install/test/ex2mpitlrmvm_complexfloat --M=9801 --N=9801 \
--errorthreshold=0.001 --problemname=SeismicFreq100 \
--datafolder=$WORK_ROOT --nb=256
--datafolder=$WORK_ROOT --nb=256 --loopsize=5000
# 262us, 1025.71 GB/s


OMP_NUM_THREADS=8 mpirun -ve 0-1 -np 2 \
./install/test/ex2mpitlrmvm_float --M=4802 --N=19078 \
--errorthreshold=0.0001 --problemname=mavis_000_R \
--datafolder=$WORK_ROOT --nb=256
#135 us 1138.03 GB/s
4 changes: 2 additions & 2 deletions test/cpp/runexp2AMD.sh
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,12 @@
OMP_NUM_THREADS=8 mpirun -np 16 --map-by L3cache:PE=8 \
./install/test/ex2mpitlrmvm_complexfloat --M=9801 --N=9801 \
--errorthreshold=0.001 --problemname=SeismicFreq100 \
--datafolder=$WORK_ROOT --nb=256
--datafolder=$WORK_ROOT --nb=256 --loopsize=5000
# 379us, 708.84 GB/s


OMP_NUM_THREADS=8 mpirun -np 16 --map-by L3cache:PE=8 \
./install/test/ex2mpitlrmvm_float --M=4802 --N=19078 \
--errorthreshold=0.0001 --problemname=mavis_000_R \
--datafolder=$WORK_ROOT --nb=256
--datafolder=$WORK_ROOT --nb=256 --loopsize=5000
#115 us 1335.66 GB/s
13 changes: 13 additions & 0 deletions test/cpp/runexp2AMD_synthetic.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
# run on AMD Milan 2 sockets 1 cpu/socket

OMP_NUM_THREADS=8 mpirun -np 16 --map-by L3cache:PE=8 \
./install/test/ex2mpitlrmvm_float --M=4802 --N=19078 \
--errorthreshold=0.0001 --problemname=Sytheticfloat \
--datafolder=$WORK_ROOT --nb=256 --loopsize=1000

OMP_NUM_THREADS=8 mpirun -np 16 --map-by L3cache:PE=8 \
./install/test/ex2mpitlrmvm_complexfloat --M=9801 --N=9801 \
--errorthreshold=0.001 --problemname=Sytheticcomplex \
--datafolder=$WORK_ROOT --nb=256 --loopsize=1000


4 changes: 2 additions & 2 deletions test/cpp/runexp2ICX.sh
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
OMP_NUM_THREADS=28 mpirun -np 2 --map-by L3cache:PE=28 \
./install/test/ex2mpitlrmvm_complexfloat --M=9801 --N=9801 \
--errorthreshold=0.001 --problemname=SeismicFreq100 \
--datafolder=$WORK_ROOT --nb=256
--datafolder=$WORK_ROOT --nb=256 --loopsize=5000
# 859us, 312.75 GB/s


OMP_NUM_THREADS=28 mpirun -np 2 --map-by L3cache:PE=28 \
./install/test/ex2mpitlrmvm_float --M=4802 --N=19078 \
--errorthreshold=0.0001 --problemname=mavis_000_R \
--datafolder=$WORK_ROOT --nb=256
--datafolder=$WORK_ROOT --nb=256 --loopsize=5000
#400 us 384.0 GB/s
4 changes: 2 additions & 2 deletions test/cpp/runexp2NEC.sh
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,12 @@
OMP_NUM_THREADS=8 mpirun -ve 0-1 -np 2 \
./install/test/ex2mpitlrmvm_complexfloat --M=9801 --N=9801 \
--errorthreshold=0.001 --problemname=SeismicFreq100 \
--datafolder=$WORK_ROOT --nb=256
--datafolder=$WORK_ROOT --nb=256 --loopsize=5000
# 262us, 1025.71 GB/s


OMP_NUM_THREADS=8 mpirun -ve 0-1 -np 2 \
./install/test/ex2mpitlrmvm_float --M=4802 --N=19078 \
--errorthreshold=0.0001 --problemname=mavis_000_R \
--datafolder=$WORK_ROOT --nb=256
--datafolder=$WORK_ROOT --nb=256 --loopsize=5000
#135 us 1138.03 GB/s
8 changes: 8 additions & 0 deletions test/cpp/runexp3_gendata.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
./install/test/ex3_gendata --M=4802 --N=19078 \
--errorthreshold=0.0001 --problemname=Sytheticfloat \
--datafolder=$WORK_ROOT --nb=256 --constrank=100 --dtype=float

./install/test/ex3_gendata --M=9801 --N=9801 \
--errorthreshold=0.001 --problemname=Sytheticcomplex \
--datafolder=$WORK_ROOT --nb=256 --constrank=100 --dtype=complexfloat

0 comments on commit cc72fe8

Please sign in to comment.