-
Notifications
You must be signed in to change notification settings - Fork 29
/
Copy pathCompute.h
371 lines (298 loc) · 12.3 KB
/
Compute.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
#ifndef __COMPUTE_H__
#define __COMPUTE_H__
#ifdef COOLING_MOLECULARH
#include "Vector3D.h"
#endif /*COOLING_MOLECULARH*/
#include "codes.h"
#include "ParallelGravity.h"
class State;
class DoubleWalkState;
/** @file Compute.h
* Defines classes for objects that encapsulate computation
*/
class TreeWalk;
class Opt;
/// @brief Base clase for all tree based computations.
///
/// The Compute object determines what work is to be done at each
/// treenode, as well as what gets done at the beginning and the end
/// of a walk. The key method is doWork().
class Compute{
protected:
Opt *opt;
void *computeEntity;
int activeRung;
ComputeType type;
Compute(ComputeType t) : type(t) /*state(0)*/{}
public:
int nActive; // accumulate total number of active particles.
void setOpt(Opt *opt);
// should the dowork method have a state argument?
// yes, allows listcompute object to keep modifying state
// which will have within it the checklist, clist and plist
/// @brief Work to be done at each node.
virtual int doWork(GenericTreeNode *, TreeWalk *tw, State *state, int chunk, int reqID, bool isRoot, bool &didcomp, int awi) = 0;
// should return int, not bool
virtual int openCriterion(TreePiece *ownerTP,
GenericTreeNode *node, int reqID, State *state) = 0;
virtual void stateReady(State *state, TreePiece *owner, int chunk, int start, int end) {}
virtual void stateReadyPar(TreePiece *tp, int start, int end,
CkVec<OffsetNode>& clist, CkVec<RemotePartInfo>& rpilist,
CkVec<LocalPartInfo>& lpilist){}
virtual void fillLists(State *state_, TreePiece *tp, int chunk, int start,
int end, CkVec<OffsetNode>& clistforb, CkVec<RemotePartInfo>& rplistforb,
CkVec<LocalPartInfo>& lplistforb) {}
/// @brief Associate computeEntity (target bucket or node),
/// activeRung and Optimization with this Compute object.
virtual void init(void *cE, int activeRung, Opt *opt);
virtual void reassoc(void *cE, int aR, Opt *opt){}
ComputeType getSelfType(){ return type;}
OptType getOptType();
int getActiveRung() {return activeRung;}
// Default impl is empty. Currently only redefined by ListCompute
// Initializes state.
virtual void initState(State *state){}
/// virtual functions to allow for book-keeping
/// these are essentially notifications to the
/// Compute object from the TreeWalk that certain
/// events have taken place - the Compute reacts
/// accordingly.
virtual void startNodeProcessEvent(State *state) {}
/// Allow book-keeping when finished with a node
virtual void finishNodeProcessEvent(TreePiece *owner, State *state) {}
/// Allow book-keeping of a cache miss.
virtual void nodeMissedEvent(int reqID, int chunk, State *state, TreePiece *tp) {}
/// Allow book-keeping of a cache receive event.
virtual void nodeRecvdEvent(TreePiece *owner, int chunk, State *state, int bucket){}
/// Allow book-keeping of a cache receive event.
virtual void recvdParticles(ExternalGravityParticle *egp,int num,int chunk,int reqID,State *state, TreePiece *tp, Tree::NodeKey &remoteBucket){}
/// Allow book-keeping of a cache receive event.
virtual void recvdParticlesFull(GravityParticle *egp,int num,int chunk,int reqID,State *state, TreePiece *tp, Tree::NodeKey &remoteBucket){}
virtual ~Compute(){}
virtual void walkDone(State *state){}
virtual void setComputeEntity(void *ce){
computeEntity = ce;
}
virtual void *getComputeEntity(){
return computeEntity;
}
virtual State *getNewState(int d1, int d2);
virtual State *getNewState(int d1);
virtual State *getNewState();
virtual void freeState(State *state);
};
#include "SSEdefs.h"
///
/// @brief Class to compute gravity using a "bucket walk".
///
class GravityCompute : public Compute{
#ifdef BENCHMARK_TIME_COMPUTE
double computeTimePart;
double computeTimeNode;
#endif
void updateInterMass(GravityParticle *p, int start, int end, double mass);
void updateInterMass(GravityParticle *p, int start, int end, GravityParticle *s, Vector3D<cosmoType> &offset);
public:
GravityCompute() : Compute(Gravity){
#ifdef BENCHMARK_TIME_COMPUTE
computeTimePart = 0.0;
computeTimeNode = 0.0;
#endif
}
~GravityCompute() {
#ifdef BENCHMARK_TIME_COMPUTE
CkPrintf("Compute time part: %f\n",computeTimePart);
CkPrintf("Compute time node: %f\n",computeTimeNode);
#endif
}
int doWork(GenericTreeNode *, TreeWalk *tw, State *state, int chunk, int reqID, bool isRoot, bool &didcomp, int awi);
int openCriterion(TreePiece *ownerTP, GenericTreeNode *node, int reqID, State *state);
int computeParticleForces(TreePiece *owner, GenericTreeNode *node, ExternalGravityParticle *part, int reqID);
// book keeping on notifications
void nodeMissedEvent(int reqID, int chunk, State *state, TreePiece *tp);
void nodeRecvdEvent(TreePiece *owner, int chunk, State *state, int bucket);
void recvdParticles(ExternalGravityParticle *egp,int num,int chunk,int reqID,State *state, TreePiece *tp, Tree::NodeKey &remoteBucket);
void reassoc(void *cE, int activeRung, Opt *o);
};
#if INTERLIST_VER > 0
/// @brief Computations for Stadel-style interaction list walk.
///
/// At a given point in the walk, this compares a node against another
/// see if it 1) has to be opened, 2) doesn't need to be opened or 3)
/// undecided, and manipulates the lists in State accordingly.
class ListCompute : public Compute{
public:
ListCompute() : Compute(List) {
bUseCpu = 0;
}
int doWork(GenericTreeNode *, TreeWalk *tw, State *state, int chunk, int reqID, bool isRoot, bool &didcomp, int awi);
int openCriterion(TreePiece *ownerTP, GenericTreeNode *node, int reqID, State *state);
// book keeping on notifications
void nodeMissedEvent(int reqID, int chunk, State *state, TreePiece *tp);
void nodeRecvdEvent(TreePiece *owner, int chunk, State *state, int bucket);
void recvdParticles(ExternalGravityParticle *egp,int num,int chunk,int reqID,State *state, TreePiece *tp, Tree::NodeKey &remoteBucket);
void initState(State *state);
void stateReady(State *, TreePiece *, int chunk, int start, int end);
void stateReadyPar(TreePiece *tp, int start, int end,
CkVec<OffsetNode>& clist, CkVec<RemotePartInfo>& rpilist,
CkVec<LocalPartInfo>& lpilist);
void fillLists(State *state_, TreePiece *tp, int chunk, int start,
int end, CkVec<OffsetNode>& clistforb, CkVec<RemotePartInfo>& rplistforb,
CkVec<LocalPartInfo>& lplistforb);
// void printUndlist(DoubleWalkState *state, int level, TreePiece *tp);
// void printClist(DoubleWalkState *state, int level, TreePiece *tp);
void reassoc(void *cE, int activeRung, Opt *o);
State *getNewState(int d1, int d2);
State *getNewState(int d1);
State *getNewState();
void freeState(State *state);
void freeDoubleWalkState(DoubleWalkState *state);
/// Flag the cpu (instead of gpu) for usage for the next walk
void enableCpu() {bUseCpu = 1;}
#ifdef CUDA
#ifdef GPU_LOCAL_TREE_WALK
void sendLocalTreeWalkTriggerToGpu(State *state, TreePiece *tp, int activeRung, int startBucket, int endBucket);
#endif //GPU_LOCAL_TREE_WALK
void sendNodeInteractionsToGpu(DoubleWalkState *state, TreePiece *tp);
void sendPartInteractionsToGpu(DoubleWalkState *state, TreePiece *tp);
#endif
private:
void addChildrenToCheckList(GenericTreeNode *node, int reqID, int chunk, int awi, State *s, CheckList &chklist, TreePiece *tp);
void addNodeToInt(GenericTreeNode *node, int offsetID, DoubleWalkState *s);
DoubleWalkState *allocDoubleWalkState();
/// used to flag cpu (instead of gpu) for usage when compiling with CUDA
int bUseCpu;
#if defined CHANGA_REFACTOR_PRINT_INTERACTIONS || defined CHANGA_REFACTOR_WALKCHECK_INTERLIST || defined CUDA
void addRemoteParticlesToInt(ExternalGravityParticle *parts, int n,
Vector3D<cosmoType> &offset, DoubleWalkState *s,
NodeKey key);
void addLocalParticlesToInt(GravityParticle *parts, int n,
Vector3D<cosmoType> &offset, DoubleWalkState *s,
NodeKey key, GenericTreeNode *gtn);
#else
void addRemoteParticlesToInt(ExternalGravityParticle *parts, int n,
Vector3D<cosmoType> &offset, DoubleWalkState *s);
void addLocalParticlesToInt(GravityParticle *parts, int n,
Vector3D<cosmoType> &offset, DoubleWalkState *s);
#endif
#ifdef CUDA
void getBucketParameters(TreePiece *tp, int bucket, int &bucketStart, int &bucketSize, std::map<NodeKey, int>&lpref);
public:
void resetCudaNodeState(DoubleWalkState *state);
void resetCudaPartState(DoubleWalkState *state);
void initCudaState(DoubleWalkState *state, int numBuckets, int nodeThreshold, int partThreshold, bool resume);
#endif
};
#endif
/// @brief Compute for the remote node prefetch walk.
class PrefetchCompute : public Compute{
public:
PrefetchCompute() : Compute(Prefetch) {
computeEntity = 0;
}
virtual int doWork(GenericTreeNode *, TreeWalk *tw, State *state, int chunk, int reqID, bool isRoot, bool &didcomp, int awi);
int openCriterion(TreePiece *ownerTP, GenericTreeNode *node, int reqIDD, State *state);
// book-keeping on notifications
void startNodeProcessEvent(State *state);
void finishNodeProcessEvent(TreePiece *owner, State *state);
void recvdParticles(ExternalGravityParticle *egp,int num,int chunk,int reqID,State *state, TreePiece *tp, Tree::NodeKey &remoteBucket);
};
/// when prefetching is disabled from command line, use
/// DummyPrefetchCompute instead of PrefetchCompute
class DummyPrefetchCompute : public PrefetchCompute {
public:
/// Immediately stop the walk.
int doWork(GenericTreeNode *, TreeWalk *tw, State *state, int chunk, int reqID, bool isRoot, bool &didcomp, int awi){
return DUMP;
}
};
/// @brief distingish between the walks that could be running.
enum WalkIndices {
prefetchAwi = 0,
interListAwi = 1,
remoteGravityAwi = 2,
smoothAwi = 3,
maxAwi = 4
};
/// Object to record a type of active walk. Contains pointers to
/// TreeWalk/Compute/Opt/State (T/C/O/S) combinations
class ActiveWalk {
public:
TreeWalk *tw;
Compute *c;
Opt *o;
State *s;
ActiveWalk(TreeWalk *_tw, Compute *_c, Opt *_o, State *state) :
tw(_tw), c(_c), o(_o), s(state){}
ActiveWalk(){}
};
/// @brief Interface for work in LocalTreeTraversal.
class TreeNodeWorker {
public:
virtual bool work(GenericTreeNode *node, int level) = 0;
virtual void doneChildren(GenericTreeNode *node, int level) {}
};
/// @brief Class to build the remote part of the tree. Fills in
/// Boundary and NonLocal nodes.
class RemoteTreeBuilder : public TreeNodeWorker {
TreePiece *tp;
bool requestNonLocalMoments;
public:
RemoteTreeBuilder(TreePiece *owner, bool req) :
tp(owner),
requestNonLocalMoments(req)
{}
bool work(GenericTreeNode *node, int level);
void doneChildren(GenericTreeNode *node, int level);
private:
void registerNode(GenericTreeNode *node);
};
/// @brief Class to build the local part of the tree. Builds Internal nodes.
class LocalTreeBuilder : public TreeNodeWorker {
TreePiece *tp;
public:
LocalTreeBuilder(TreePiece *owner) :
tp(owner)
{}
bool work(GenericTreeNode *node, int level);
void doneChildren(GenericTreeNode *node, int level);
private:
void registerNode(GenericTreeNode *node);
};
/** @brief TreeNodeWorker implementation that just prints out the
* tree. This is just for diagnostics.
*/
class LocalTreePrinter : public TreeNodeWorker {
int index;
std::ofstream file;
string description;
void openFile();
public:
LocalTreePrinter(string d, int idx) :
index(idx),
description(d)
{
openFile();
}
~LocalTreePrinter(){
file << "}" << std::endl;
file.close();
}
bool work(GenericTreeNode *node, int level);
void doneChildren(GenericTreeNode *node, int level);
};
#ifdef COOLING_MOLECULARH
class LocalLymanWernerDistributor : public TreeNodeWorker {
/*Defining a new class inherited from TreeNodeWorker to use while distributing LW over a depth-first walk */
TreePiece *tp; /*Defined in ParallelGravity.h. Fundamental structure that holds particle and tree data.*/
public:
/*Constructor. Only argment is the TreePiece*/
LocalLymanWernerDistributor(TreePiece *owner) :
tp(owner)
{}
bool work(GenericTreeNode *node, int level); /*virtually defined in TreeNodeWorker Class (Compute.h)*/
void doneChildren(GenericTreeNode *node, int level); /*virtually defined in TreeNodeWorker Class*/
private:
};
#endif /*COOLING_MOLECULARH*/
#endif