forked from N-BodyShop/changa
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathDataManager.h
332 lines (286 loc) · 10.7 KB
/
DataManager.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
/** @file DataManager.h
The DataManager holds data that TreePieces need access to, but not
their own copy.
@author Graeme Lufkin (gwl@u.washington.edu)
*/
#ifndef DATAMANAGER_H
#define DATAMANAGER_H
#include <vector>
#include <map>
#include <string>
#include "GenericTreeNode.h"
#include "ParallelGravity.decl.h"
#if CHARM_VERSION > 60401 && CMK_BALANCED_INJECTION_API
#include "ckBIconfig.h"
#endif
/// @brief Information about TreePieces on an SMP node.
struct TreePieceDescriptor{
TreePiece *treePiece;
Tree::GenericTreeNode *root;
TreePieceDescriptor(){}
TreePieceDescriptor(TreePiece *tp_, GenericTreeNode *r){
treePiece = tp_;
root = r;
}
};
#ifdef CUDA
struct UpdateParticlesStruct{
CkCallback *cb;
DataManager *dm;
VariablePartData *buf;
int size;
};
// store pointers to flattened buffers if !gpuFree
struct PendingBuffers {
CkVec<CudaMultipoleMoments> *moments;
CkVec<CompactPartData> *particles;
int chunk;
/// Pointer to callback so it can be freed.
CkCallback *cb;
};
#endif
/** The DataManager is used to store information that all TreePieces will need,
but will not modify. The first example is the list of splitter keys and the
responsible chare for each interval. This data is given to the DataManager by
the Sorter. The DataManager then instructs all the TreePieces on its node
to evaluate the boundary keys.
*/
class DataManager : public CBase_DataManager {
friend class TreePiece;
friend class OctTreeBuildPhaseIWorker;
/// The array of TreePieces I hold data for.
CProxy_TreePiece treePieces;
protected:
/// The array of splitter keys for the sort.
std::vector<SFC::Key> boundaryKeys;
/// An array identifying which chare is responsible for each interval of keys.
std::vector<int> responsibleIndex;
/// An array with how many particles are held by each TreePiece when sorted.
std::vector<int> particleCounts;
/// A list of roots of the TreePieces in this node
// holds chare array indices of registered treepieces
CkVec<TreePieceDescriptor> registeredTreePieces;
#ifdef CUDA
//CkVec<int> registeredTreePieceIndices;
/// @brief counter for the number of tree nodes that are
/// replicated by TreePieces that share the same address space.
int cumNumReplicatedNodes;
int treePiecesDone;
int savedChunk;
int treePiecesDonePrefetch;
int treePiecesDoneLocalComputation;
// XXX - assumes that only one chunk can be on the gpu
// at a given time
int treePiecesDoneRemoteChunkComputation;
int treePiecesWantParticlesBack;
/// Reference count for Pieces that have finished updating
/// their acclerations.
int treePiecesParticlesUpdated;
int savedNumTotalParticles;
int savedNumTotalNodes;
// keeps track of buckets of particles that were
// received during the prefetch and which were subsequently
// shipped off to the gpu - XXX
// not including cached particles in postfrefetch entities shipped to gpu
// since it is hard to count their number given just the pointer to the cache entry
// * either do not concern yourself with cached particles
// * or for each entry, get key, find bucket node in CM, DM or TPs and get number
// for now, the former
std::map<NodeKey, int> cachedPartsOnGpu;
// local particles that have been copied to the gpu
//std::map<NodeKey, int> localPartsOnGpu;
// can the gpu accept a chunk of remote particles/nodes?
bool gpuFree;
// This var will indicate if particle data has been loaded to the GPU
bool gputransfer;
/// Callback pointer to pass to HAPI.
CkCallback *localTransferCallback;
PendingBuffers *currentChunkBuffers;
// queue that stores all pending chunk transfers
CkQ<PendingBuffers *> pendingChunkTransferQ;
// last remote chunk's size in moments and particles
int lastChunkMoments;
int lastChunkParticles;
/// host buffer to transfer remote moments to GPU
CudaMultipoleMoments *bufRemoteMoments;
/// host buffer to transfer remote particles to GPU
CompactPartData *bufRemoteParts;
/// host buffer to transfer local moments to GPU
CudaMultipoleMoments *bufLocalMoments;
/// host buffer to transfer local particles to GPU
CompactPartData *bufLocalParts;
/// host buffer to transfer initial accelerations to GPU
VariablePartData *bufLocalVars;
#ifdef HAPI_INSTRUMENT_WRS
int activeRung;
int treePiecesDoneInitInstrumentation;
#endif
#endif
/// The root of the combined trees
Tree::GenericTreeNode * root;
/// Table for nodes created by the DataManager to combine trees.
/// Kept track of here so we can delete them when done.
CkVec<GenericTreeNode *> nodeTable;
/// Number of chunks in which the tree was splitted during last combine operation
int oldNumChunks;
/// Nodes currently used as roots for remote computation
Tree::NodeKey *chunkRoots;
/// Lookup table for the chunkRoots
Tree::NodeLookupType chunkRootTable;
public:
/*
** Cooling
*/
COOL *Cool;
/// @brief log of star formation events.
///
/// Star formation events are stored on the data manager since there
/// is no need to migrate them with the TreePiece.
StarLog *starLog;
/// @brief Lock for accessing starlog from TreePieces
CmiNodeLock lockStarLog;
DataManager(const CkArrayID& treePieceID);
DataManager(CkMigrateMessage *);
void startLocalWalk();
void resumeRemoteChunk();
#ifdef CUDA
void donePrefetch(int chunk); // serialize remote chunk wrapper
void serializeLocalTree();
#ifdef GPU_LOCAL_TREE_WALK
void transformLocalTreeRecursive(GenericTreeNode *node, CkVec<CudaMultipoleMoments>& localMoments);
#endif //GPU_LOCAL_TREE_WALK
// actual serialization methods
PendingBuffers *serializeRemoteChunk(GenericTreeNode *);
void serializeLocal(GenericTreeNode *);
void freeLocalTreeMemory();
void freeRemoteChunkMemory(int chunk);
void transferParticleVarsBack();
void updateParticles(UpdateParticlesStruct *data);
void updateParticlesFreeMemory(UpdateParticlesStruct *data);
void initiateNextChunkTransfer();
#ifdef HAPI_INSTRUMENT_WRS
int initInstrumentation();
#endif
DataManager(){}
#endif
void clearInstrument(CkCallback const& cb);
private:
void init();
public:
~DataManager() {
for (unsigned int i = 0; i < nodeTable.length(); i++) {
delete nodeTable[i];
}
nodeTable.clear();
CoolFinalize(Cool);
delete starLog;
CmiDestroyLock(lockStarLog);
}
/// Called by ORB Sorter, save the list of which TreePiece is
/// responsible for which interval.
void acceptResponsibleIndex(const int* responsible, const int n,
const CkCallback& cb);
/// Called by the Sorter, I save these final keys and the list
/// of which TreePiece is responsible for which interval.
/// This routine then calls TreePiece::unshuffleParticles to
/// move the particles around.
/// @param keys vector of boundary keys
/// @param responsible vector of which piece is responsible
/// for which interval
/// @param bins number of particles in each interval.
void acceptFinalKeys(const SFC::Key* keys, const int* responsible, uint64_t* bins, const int n, const CkCallback& cb);
void pup(PUP::er& p);
#ifdef CUDA
/*
std::map<NodeKey, int> &getLocalPartsOnGpuTable(){
return localPartsOnGpu;
}
*/
std::map<NodeKey, int> &getCachedPartsOnGpuTable(){
return cachedPartsOnGpu;
}
#endif
// Functions used to create a tree inside the DataManager comprising
// all the trees in the TreePieces in the local node
private:
Tree::GenericTreeNode *buildProcessorTree(int n, Tree::GenericTreeNode **gtn);
int createLookupRoots(Tree::GenericTreeNode *node, Tree::NodeKey *keys);
public:
/// \brief Collect roots of treepieces on this node.
///
/// The roots are stored in registeredChares to be used by TreePiece
/// combineLocalTrees.
void notifyPresence(Tree::GenericTreeNode *root, TreePiece *treePiece);
void clearRegisteredPieces(const CkCallback& cb);
void combineLocalTrees(CkReductionMsg *msg);
void getChunks(int &num, Tree::NodeKey *&roots);
inline Tree::GenericTreeNode *chunkRootToNode(const Tree::NodeKey k) {
NodeLookupType::iterator iter = chunkRootTable.find(k);
if (iter != chunkRootTable.end()) return iter->second;
else return NULL;
}
inline Tree::GenericTreeNode *getRoot() { return root; }
void initCooling(double dGmPerCcUnit, double dComovingGmPerCcUnit,
double dErgPerGmUnit, double dSecUnit, double dKpcUnit,
COOLPARAM inParam, const CkCallback& cb);
void initStarLog(std::string _fileName, const CkCallback &cb);
void dmCoolTableRead(double *dTableData, int nData, const CkCallback& cb);
void CoolingSetTime(double z, // redshift
double dTime, // Time
const CkCallback& cb);
void SetStarCM(double dCenterOfMass[4], const CkCallback& cb);
void memoryStats(const CkCallback& cb);
void resetReadOnly(Parameters param, const CkCallback &cb);
public:
static Tree::GenericTreeNode *pickNodeFromMergeList(int n, GenericTreeNode **gtn, int &nUnresolved, int &pickedIndex);
};
inline static void setBIconfig()
{
#if CHARM_VERSION > 60401 && CMK_BALANCED_INJECTION_API
if (CkMyRank()==0) {
#define GNI_BI_DEFAULT 64
uint16_t cur_bi = ck_get_GNI_BIConfig();
if (cur_bi > GNI_BI_DEFAULT) {
ck_set_GNI_BIConfig(GNI_BI_DEFAULT);
}
}
if (CkMyPe() == 0)
CkPrintf("Balanced injection is set to %d.\n", ck_get_GNI_BIConfig());
#endif
}
/** @brief Control recording of Charm++ projections logs
*
* The constructors for this class are also used to set default
* node-wide communication parameters.
*/
class ProjectionsControl : public CBase_ProjectionsControl {
public:
ProjectionsControl() {
setBIconfig();
LBTurnCommOff();
LBSetPeriod(0.0); // no need for LB interval: we are using Sync Mode
}
ProjectionsControl(CkMigrateMessage *m) : CBase_ProjectionsControl(m) {
setBIconfig();
LBTurnCommOff();
LBSetPeriod(0.0); // no need for LB interval: we are using Sync Mode
}
void on(CkCallback cb) {
if(CkMyPe() == 0){
CkPrintf("\n\n**** PROJECTIONS ON *****\n\n");
}
traceBegin();
contribute(cb);
}
void off(CkCallback cb) {
if(CkMyPe() == 0){
CkPrintf("\n\n**** PROJECTIONS OFF *****\n\n");
}
traceEnd();
contribute(cb);
}
void pup(PUP::er &p){
CBase_ProjectionsControl::pup(p);
}
};
#endif //DATAMANAGER_H