COMBINATORIAL_BLAS  1.3
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
SpParHelper.cpp
Go to the documentation of this file.
1 /****************************************************************/
2 /* Parallel Combinatorial BLAS Library (for Graph Computations) */
3 /* version 1.2 -------------------------------------------------*/
4 /* date: 10/06/2011 --------------------------------------------*/
5 /* authors: Aydin Buluc (abuluc@lbl.gov), Adam Lugowski --------*/
6 /****************************************************************/
7 /*
8 Copyright (c) 2011, Aydin Buluc
9 
10 Permission is hereby granted, free of charge, to any person obtaining a copy
11 of this software and associated documentation files (the "Software"), to deal
12 in the Software without restriction, including without limitation the rights
13 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
14 copies of the Software, and to permit persons to whom the Software is
15 furnished to do so, subject to the following conditions:
16 
17 The above copyright notice and this permission notice shall be included in
18 all copies or substantial portions of the Software.
19 
20 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
23 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
25 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
26 THE SOFTWARE.
27 */
28 
29 
30 template<typename KEY, typename VAL, typename IT>
31 void SpParHelper::MemoryEfficientPSort(pair<KEY,VAL> * array, IT length, IT * dist, const MPI_Comm & comm)
32 {
33  int nprocs, myrank;
34  MPI_Comm_size(comm, &nprocs);
35  MPI_Comm_rank(comm, &myrank);
36  int nsize = nprocs / 2; // new size
37  if(nprocs < 1000)
38  {
39  bool excluded = false;
40  if(dist[myrank] == 0) excluded = true;
41 
42  int nreals = 0;
43  for(int i=0; i< nprocs; ++i)
44  if(dist[i] != 0) ++nreals;
45 
46  if(nreals == nprocs) // general case
47  {
48  long * dist_in = new long[nprocs];
49  for(int i=0; i< nprocs; ++i) dist_in[i] = (long) dist[i];
50  vpsort::parallel_sort (array, array+length, dist_in, comm);
51  delete [] dist_in;
52  }
53  else
54  {
55  long * dist_in = new long[nreals];
56  int * dist_out = new int[nprocs-nreals]; // ranks to exclude
57  int indin = 0;
58  int indout = 0;
59  for(int i=0; i< nprocs; ++i)
60  {
61  if(dist[i] == 0)
62  dist_out[indout++] = i;
63  else
64  dist_in[indin++] = (long) dist[i];
65  }
66 
67  #ifdef DEBUG
68  ostringstream outs;
69  outs << "To exclude indices: ";
70  copy(dist_out, dist_out+indout, ostream_iterator<int>(outs, " ")); outs << endl;
71  SpParHelper::Print(outs.str());
72  #endif
73 
74  MPI_Group sort_group, real_group;
75  MPI_Comm_group(comm, &sort_group);
76  MPI_Group_excl(sort_group, indout, dist_out, &real_group);
77  MPI_Group_free(&sort_group);
78 
79  // The Create() function should be executed by all processes in comm,
80  // even if they do not belong to the new group (in that case MPI_COMM_NULL is returned as real_comm?)
81  // MPI::Intracomm MPI::Intracomm::Create(const MPI::Group& group) const;
82  MPI_Comm real_comm;
83  MPI_Comm_create(comm, real_group, &real_comm);
84  if(!excluded)
85  {
86  vpsort::parallel_sort (array, array+length, dist_in, real_comm);
87  MPI_Comm_free(&real_comm);
88  }
89  MPI_Group_free(&real_group);
90  delete [] dist_in;
91  delete [] dist_out;
92  }
93  }
94  else
95  {
96  IT gl_median = accumulate(dist, dist+nsize, static_cast<IT>(0)); // global rank of the first element of the median processor
97  sort(array, array+length); // re-sort because we might have swapped data in previous iterations
98  int color = (myrank < nsize)? 0: 1;
99 
100  pair<KEY,VAL> * low = array;
101  pair<KEY,VAL> * upp = array;
102  GlobalSelect(gl_median, low, upp, array, length, comm);
103  BipartiteSwap(low, array, length, nsize, color, comm);
104 
105  if(color == 1) dist = dist + nsize; // adjust for the second half of processors
106 
107  // recursive call; two implicit 'spawn's where half of the processors execute different paramaters
108  // MPI::Intracomm MPI::Intracomm::Split(int color, int key) const;
109 
110  MPI_Comm halfcomm;
111  MPI_Comm_split(comm, color, myrank, &halfcomm); // split into two communicators
112  MemoryEfficientPSort(array, length, dist, halfcomm);
113  }
114 }
115 
116 template<typename KEY, typename VAL, typename IT>
117 void SpParHelper::GlobalSelect(IT gl_rank, pair<KEY,VAL> * & low, pair<KEY,VAL> * & upp, pair<KEY,VAL> * array, IT length, const MPI_Comm & comm)
118 {
119  int nprocs, myrank;
120  MPI_Comm_size(comm, &nprocs);
121  MPI_Comm_rank(comm, &myrank);
122  IT begin = 0;
123  IT end = length; // initially everyone is active
124  pair<KEY, double> * wmminput = new pair<KEY,double>[nprocs]; // (median, #{actives})
125 
126  MPI_Datatype MPI_sortType;
127  MPI_Type_contiguous (sizeof(pair<KEY,double>), MPI_CHAR, &MPI_sortType);
128  MPI_Type_commit (&MPI_sortType);
129 
130  KEY wmm; // our median pick
131  IT gl_low, gl_upp;
132  IT active = end-begin; // size of the active range
133  IT nacts = 0;
134  bool found = 0;
135  int iters = 0;
136 
137  /* changes by shan : to add begin0 and end0 for exit condition */
138  IT begin0, end0;
139  do
140  {
141  iters++;
142  begin0 = begin; end0 = end;
143  KEY median = array[(begin + end)/2].first; // median of the active range
144  wmminput[myrank].first = median;
145  wmminput[myrank].second = static_cast<double>(active);
146  MPI_Allgather(MPI_IN_PLACE, 0, MPI_sortType, wmminput, 1, MPI_sortType, comm);
147  double totact = 0; // total number of active elements
148  for(int i=0; i<nprocs; ++i)
149  totact += wmminput[i].second;
150 
151  // input to weighted median of medians is a set of (object, weight) pairs
152  // the algorithm computes the first set of elements (according to total
153  // order of "object"s), whose sum is still less than or equal to 1/2
154  for(int i=0; i<nprocs; ++i)
155  wmminput[i].second /= totact ; // normalize the weights
156 
157  sort(wmminput, wmminput+nprocs); // sort w.r.t. medians
158  double totweight = 0;
159  int wmmloc=0;
160  while( wmmloc<nprocs && totweight < 0.5 )
161  {
162  totweight += wmminput[wmmloc++].second;
163  }
164 
165  wmm = wmminput[wmmloc-1].first; // weighted median of medians
166 
167  pair<KEY,VAL> wmmpair = make_pair(wmm, VAL());
168  low =lower_bound (array+begin, array+end, wmmpair);
169  upp =upper_bound (array+begin, array+end, wmmpair);
170  IT loc_low = low-array; // #{elements smaller than wmm}
171  IT loc_upp = upp-array; // #{elements smaller or equal to wmm}
172 
173  MPI_Allreduce( &loc_low, &gl_low, 1, MPIType<IT>(), MPI_SUM, comm);
174  MPI_Allreduce( &loc_upp, &gl_upp, 1, MPIType<IT>(), MPI_SUM, comm);
175 
176  if(gl_upp < gl_rank)
177  {
178  // our pick was too small; only recurse to the right
179  begin = (low - array);
180  }
181  else if(gl_rank < gl_low)
182  {
183  // our pick was too big; only recurse to the left
184  end = (upp - array);
185  }
186  else
187  {
188  found = true;
189  }
190  active = end-begin;
191  MPI_Allreduce(&active, &nacts, 1, MPIType<IT>(), MPI_SUM, comm);
192  if (begin0 == begin && end0 == end) break; // ABAB: Active range did not shrink, so we break (is this kosher?)
193  }
194  while((nacts > 2*nprocs) && (!found));
195  delete [] wmminput;
196 
197  MPI_Datatype MPI_pairType;
198  MPI_Type_contiguous (sizeof(pair<KEY,VAL>), MPI_CHAR, &MPI_pairType);
199  MPI_Type_commit (&MPI_pairType);
200 
201  int * nactives = new int[nprocs];
202  nactives[myrank] = static_cast<int>(active); // At this point, actives are small enough
203  MPI_Allgather(MPI_IN_PLACE, 0, MPI_INT, nactives, 1, MPI_INT, comm);
204  int * dpls = new int[nprocs](); // displacements (zero initialized pid)
205  partial_sum(nactives, nactives+nprocs-1, dpls+1);
206  pair<KEY,VAL> * recvbuf = new pair<KEY,VAL>[nacts];
207  low = array + begin; // update low to the beginning of the active range
208  MPI_Allgatherv(low, active, MPI_pairType, recvbuf, nactives, dpls, MPI_pairType, comm);
209 
210  pair<KEY,int> * allactives = new pair<KEY,int>[nacts];
211  int k = 0;
212  for(int i=0; i<nprocs; ++i)
213  {
214  for(int j=0; j<nactives[i]; ++j)
215  {
216  allactives[k] = make_pair(recvbuf[k].first, i);
217  k++;
218  }
219  }
220  DeleteAll(recvbuf, dpls, nactives);
221  sort(allactives, allactives+nacts);
222  MPI_Allreduce(&begin, &gl_low, 1, MPIType<IT>(), MPI_SUM, comm); // update
223  int diff = gl_rank - gl_low;
224  for(int k=0; k < diff; ++k)
225  {
226  if(allactives[k].second == myrank)
227  ++low; // increment the local pointer
228  }
229  delete [] allactives;
230  begin = low-array;
231  MPI_Allreduce(&begin, &gl_low, 1, MPIType<IT>(), MPI_SUM, comm); // update
232 }
233 
234 template<typename KEY, typename VAL, typename IT>
235 void SpParHelper::BipartiteSwap(pair<KEY,VAL> * low, pair<KEY,VAL> * array, IT length, int nfirsthalf, int color, const MPI_Comm & comm)
236 {
237  int nprocs, myrank;
238  MPI_Comm_size(comm, &nprocs);
239  MPI_Comm_rank(comm, &myrank);
240 
241  IT * firsthalves = new IT[nprocs];
242  IT * secondhalves = new IT[nprocs];
243  firsthalves[myrank] = low-array;
244  secondhalves[myrank] = length - (low-array);
245 
246  MPI_Allgather(MPI_IN_PLACE, 0, MPIType<IT>(), firsthalves, 1, MPIType<IT>(), comm);
247  MPI_Allgather(MPI_IN_PLACE, 0, MPIType<IT>(), secondhalves, 1, MPIType<IT>(), comm);
248 
249  int * sendcnt = new int[nprocs](); // zero initialize
250  int totrecvcnt = 0;
251 
252  pair<KEY,VAL> * bufbegin = NULL;
253  if(color == 0) // first processor half, only send second half of data
254  {
255  bufbegin = low;
256  totrecvcnt = length - (low-array);
257  IT beg_oftransfer = accumulate(secondhalves, secondhalves+myrank, static_cast<IT>(0));
258  IT spaceafter = firsthalves[nfirsthalf];
259  int i=nfirsthalf+1;
260  while(i < nprocs && spaceafter < beg_oftransfer)
261  {
262  spaceafter += firsthalves[i++]; // post-incremenet
263  }
264  IT end_oftransfer = beg_oftransfer + secondhalves[myrank]; // global index (within second half) of the end of my data
265  IT beg_pour = beg_oftransfer;
266  IT end_pour = min(end_oftransfer, spaceafter);
267  sendcnt[i-1] = end_pour - beg_pour;
268  while( i < nprocs && spaceafter < end_oftransfer ) // find other recipients until I run out of data
269  {
270  beg_pour = end_pour;
271  spaceafter += firsthalves[i];
272  end_pour = min(end_oftransfer, spaceafter);
273  sendcnt[i++] = end_pour - beg_pour; // post-increment
274  }
275  }
276  else if(color == 1) // second processor half, only send first half of data
277  {
278  bufbegin = array;
279  totrecvcnt = low-array;
280  // global index (within the second processor half) of the beginning of my data
281  IT beg_oftransfer = accumulate(firsthalves+nfirsthalf, firsthalves+myrank, static_cast<IT>(0));
282  IT spaceafter = secondhalves[0];
283  int i=1;
284  while( i< nfirsthalf && spaceafter < beg_oftransfer)
285  {
286  //spacebefore = spaceafter;
287  spaceafter += secondhalves[i++]; // post-increment
288  }
289  IT end_oftransfer = beg_oftransfer + firsthalves[myrank]; // global index (within second half) of the end of my data
290  IT beg_pour = beg_oftransfer;
291  IT end_pour = min(end_oftransfer, spaceafter);
292  sendcnt[i-1] = end_pour - beg_pour;
293  while( i < nfirsthalf && spaceafter < end_oftransfer ) // find other recipients until I run out of data
294  {
295  beg_pour = end_pour;
296  spaceafter += secondhalves[i];
297  end_pour = min(end_oftransfer, spaceafter);
298  sendcnt[i++] = end_pour - beg_pour; // post-increment
299  }
300  }
301  DeleteAll(firsthalves, secondhalves);
302  int * recvcnt = new int[nprocs];
303  MPI_Alltoall(sendcnt, 1, MPI_INT, recvcnt, 1, MPI_INT, comm); // get the recv counts
304  // Alltoall is actually unnecessary, because sendcnt = recvcnt
305  // If I have n_mine > n_yours data to send, then I can send you only n_yours
306  // as this is your space, and you'll send me identical amount.
307  // Then I can only receive n_mine - n_yours from the third processor and
308  // that processor can only send n_mine - n_yours to me back.
309  // The proof follows from induction
310 
311  MPI_Datatype MPI_valueType;
312  MPI_Type_contiguous(sizeof(pair<KEY,VAL>), MPI_CHAR, &MPI_valueType);
313  MPI_Type_commit(&MPI_valueType);
314 
315  pair<KEY,VAL> * receives = new pair<KEY,VAL>[totrecvcnt];
316  int * sdpls = new int[nprocs](); // displacements (zero initialized pid)
317  int * rdpls = new int[nprocs]();
318  partial_sum(sendcnt, sendcnt+nprocs-1, sdpls+1);
319  partial_sum(recvcnt, recvcnt+nprocs-1, rdpls+1);
320 
321  MPI_Alltoallv(bufbegin, sendcnt, sdpls, MPI_valueType, receives, recvcnt, rdpls, MPI_valueType, comm); // sparse swap
322 
323  DeleteAll(sendcnt, recvcnt, sdpls, rdpls);
324  copy(receives, receives+totrecvcnt, bufbegin);
325  delete [] receives;
326 }
327 
328 
329 template<typename KEY, typename VAL, typename IT>
330 void SpParHelper::DebugPrintKeys(pair<KEY,VAL> * array, IT length, IT * dist, MPI_Comm & World)
331 {
332  int rank, nprocs;
333  MPI_Comm_rank(World, &rank);
334  MPI_Comm_size(World, &nprocs);
335  MPI_File thefile;
336  MPI_File_open(World, "temp_sortedkeys", MPI_MODE_CREATE | MPI_MODE_WRONLY, MPI_INFO_NULL, &thefile);
337 
338  // The cast in the last parameter is crucial because the signature of the function is
339  // T accumulate ( InputIterator first, InputIterator last, T init )
340  // Hence if init if of type "int", the output also becomes an it (remember C++ signatures are ignorant of return value)
341  IT sizeuntil = accumulate(dist, dist+rank, static_cast<IT>(0));
342 
343  MPI_Offset disp = sizeuntil * sizeof(KEY); // displacement is in bytes
344  MPI_File_set_view(thefile, disp, MPIType<KEY>(), MPIType<KEY>(), "native", MPI_INFO_NULL);
345 
346  KEY * packed = new KEY[length];
347  for(int i=0; i<length; ++i)
348  {
349  packed[i] = array[i].first;
350  }
351  MPI_File_write(thefile, packed, length, MPIType<KEY>(), NULL);
352  MPI_File_close(&thefile);
353  delete [] packed;
354 
355  // Now let processor-0 read the file and print
356  if(rank == 0)
357  {
358  FILE * f = fopen("temp_sortedkeys", "r");
359  if(!f)
360  {
361  cerr << "Problem reading binary input file\n";
362  return;
363  }
364  IT maxd = *max_element(dist, dist+nprocs);
365  KEY * data = new KEY[maxd];
366 
367  for(int i=0; i<nprocs; ++i)
368  {
369  // read n_per_proc integers and print them
370  fread(data, sizeof(KEY), dist[i],f);
371 
372  cout << "Elements stored on proc " << i << ": " << endl;
373  copy(data, data+dist[i], ostream_iterator<KEY>(cout, "\n"));
374  }
375  delete [] data;
376  }
377 }
378 
379 
387 template <class IT, class NT, class DER>
388 void SpParHelper::FetchMatrix(SpMat<IT,NT,DER> & MRecv, const vector<IT> & essentials, vector<MPI_Win> & arrwin, int ownind)
389 {
390  MRecv.Create(essentials); // allocate memory for arrays
391 
392  Arr<IT,NT> arrinfo = MRecv.GetArrays();
393  assert( (arrwin.size() == arrinfo.totalsize()));
394 
395  // C-binding for MPI::Get
396  // int MPI_Get(void *origin_addr, int origin_count, MPI_Datatype origin_datatype, int target_rank, MPI_Aint target_disp,
397  // int target_count, MPI_Datatype target_datatype, MPI_Win win)
398 
399  IT essk = 0;
400  for(int i=0; i< arrinfo.indarrs.size(); ++i) // get index arrays
401  {
402  //arrwin[essk].Lock(MPI::LOCK_SHARED, ownind, 0);
403  MPI_Get( arrinfo.indarrs[i].addr, arrinfo.indarrs[i].count, MPIType<IT>(), ownind, 0, arrinfo.indarrs[i].count, MPIType<IT>(), arrwin[essk++]);
404  }
405  for(int i=0; i< arrinfo.numarrs.size(); ++i) // get numerical arrays
406  {
407  //arrwin[essk].Lock(MPI::LOCK_SHARED, ownind, 0);
408  MPI_Get(arrinfo.numarrs[i].addr, arrinfo.numarrs[i].count, MPIType<NT>(), ownind, 0, arrinfo.numarrs[i].count, MPIType<NT>(), arrwin[essk++]);
409  }
410 }
411 
412 
418 template<typename IT, typename NT, typename DER>
419 void SpParHelper::BCastMatrix(MPI_Comm & comm1d, SpMat<IT,NT,DER> & Matrix, const vector<IT> & essentials, int root)
420 {
421  int myrank;
422  MPI_Comm_rank(comm1d, &myrank);
423  if(myrank != root)
424  {
425  Matrix.Create(essentials); // allocate memory for arrays
426  }
427 
428  Arr<IT,NT> arrinfo = Matrix.GetArrays();
429  for(unsigned int i=0; i< arrinfo.indarrs.size(); ++i) // get index arrays
430  {
431  MPI_Bcast(arrinfo.indarrs[i].addr, arrinfo.indarrs[i].count, MPIType<IT>(), root, comm1d);
432  }
433  for(unsigned int i=0; i< arrinfo.numarrs.size(); ++i) // get numerical arrays
434  {
435  MPI_Bcast(arrinfo.numarrs[i].addr, arrinfo.numarrs[i].count, MPIType<NT>(), root, comm1d);
436  }
437 }
438 
439 
440 template <class IT, class NT, class DER>
441 void SpParHelper::SetWindows(MPI_Comm & comm1d, const SpMat< IT,NT,DER > & Matrix, vector<MPI_Win> & arrwin)
442 {
443  Arr<IT,NT> arrs = Matrix.GetArrays();
444 
445  // static MPI::Win MPI::Win::create(const void *base, MPI::Aint size, int disp_unit, MPI::Info info, const MPI_Comm & comm);
446  // The displacement unit argument is provided to facilitate address arithmetic in RMA operations
447  // *** COLLECTIVE OPERATION ***, everybody exposes its own array to everyone else in the communicator
448 
449  for(int i=0; i< arrs.indarrs.size(); ++i)
450  {
451  MPI_Win nWin;
452  MPI_Win_create(arrs.indarrs[i].addr,
453  arrs.indarrs[i].count * sizeof(IT), sizeof(IT), MPI_INFO_NULL, comm1d, &nWin);
454  arrwin.push_back(nWin);
455  }
456  for(int i=0; i< arrs.numarrs.size(); ++i)
457  {
458  MPI_Win nWin;
459  MPI_Win_create(arrs.numarrs[i].addr,
460  arrs.numarrs[i].count * sizeof(NT), sizeof(NT), MPI_INFO_NULL, comm1d, &nWin);
461  arrwin.push_back(nWin);
462  }
463 }
464 
465 inline void SpParHelper::LockWindows(int ownind, vector<MPI_Win> & arrwin)
466 {
467  for(unsigned int i=0; i< arrwin.size(); ++i)
468  {
469  MPI_Win_lock(MPI_LOCK_SHARED, ownind, 0, arrwin[i]);
470  }
471 }
472 
473 inline void SpParHelper::UnlockWindows(int ownind, vector<MPI_Win> & arrwin)
474 {
475  for(unsigned int i=0; i< arrwin.size(); ++i)
476  {
477  MPI_Win_unlock( ownind, arrwin[i]);
478  }
479 }
480 
481 
486 inline void SpParHelper::StartAccessEpoch(int owner, vector<MPI_Win> & arrwin, MPI_Group & group)
487 {
488  /* Now start using the whole comm as a group */
489  int acc_ranks[1];
490  acc_ranks[0] = owner;
491  MPI_Group access;
492  MPI_Group_incl(group, 1, acc_ranks, &access); // take only the owner
493 
494  // begin the ACCESS epochs for the arrays of the remote matrices A and B
495  // Start() *may* block until all processes in the target group have entered their exposure epoch
496  for(unsigned int i=0; i< arrwin.size(); ++i)
497  MPI_Win_start(access, 0, arrwin[i]);
498 
499  MPI_Group_free(&access);
500 }
501 
505 inline void SpParHelper::PostExposureEpoch(int self, vector<MPI_Win> & arrwin, MPI_Group & group)
506 {
507  // begin the EXPOSURE epochs for the arrays of the local matrices A and B
508  for(unsigned int i=0; i< arrwin.size(); ++i)
509  MPI_Win_post(group, MPI_MODE_NOPUT, arrwin[i]);
510 }
511 
512 template <class IT, class DER>
513 void SpParHelper::AccessNFetch(DER * & Matrix, int owner, vector<MPI_Win> & arrwin, MPI_Group & group, IT ** sizes)
514 {
515  StartAccessEpoch(owner, arrwin, group); // start the access epoch to arrwin of owner
516 
517  vector<IT> ess(DER::esscount); // pack essentials to a vector
518  for(int j=0; j< DER::esscount; ++j)
519  ess[j] = sizes[j][owner];
520 
521  Matrix = new DER(); // create the object first
522  FetchMatrix(*Matrix, ess, arrwin, owner); // then start fetching its elements
523 }
524 
525 template <class IT, class DER>
526 void SpParHelper::LockNFetch(DER * & Matrix, int owner, vector<MPI_Win> & arrwin, MPI_Group & group, IT ** sizes)
527 {
528  LockWindows(owner, arrwin);
529 
530  vector<IT> ess(DER::esscount); // pack essentials to a vector
531  for(int j=0; j< DER::esscount; ++j)
532  ess[j] = sizes[j][owner];
533 
534  Matrix = new DER(); // create the object first
535  FetchMatrix(*Matrix, ess, arrwin, owner); // then start fetching its elements
536 }
537 
543 template <class IT, class NT, class DER>
544 void SpParHelper::GetSetSizes(const SpMat<IT,NT,DER> & Matrix, IT ** & sizes, MPI_Comm & comm1d)
545 {
546  vector<IT> essentials = Matrix.GetEssentials();
547  int index;
548  MPI_Comm_rank(comm1d, &index);
549 
550  for(IT i=0; (unsigned)i < essentials.size(); ++i)
551  {
552  sizes[i][index] = essentials[i];
553  MPI_Allgather(MPI_IN_PLACE, 1, MPIType<IT>(), sizes[i], 1, MPIType<IT>(), comm1d);
554  }
555 }
556 
557 
558 inline void SpParHelper::Print(const string & s)
559 {
560  int myrank;
561  MPI_Comm_rank(MPI_COMM_WORLD, &myrank);
562  if(myrank == 0)
563  {
564  cout << s;
565  }
566 }
567 
568 inline void SpParHelper::WaitNFree(vector<MPI_Win> & arrwin)
569 {
570  // End the exposure epochs for the arrays of the local matrices A and B
571  // The Wait() call matches calls to Complete() issued by ** EACH OF THE ORIGIN PROCESSES **
572  // that were granted access to the window during this epoch.
573  for(unsigned int i=0; i< arrwin.size(); ++i)
574  {
575  MPI_Win_wait(arrwin[i]);
576  }
577  FreeWindows(arrwin);
578 }
579 
580 inline void SpParHelper::FreeWindows(vector<MPI_Win> & arrwin)
581 {
582  for(unsigned int i=0; i< arrwin.size(); ++i)
583  {
584  MPI_Win_free(&arrwin[i]);
585  }
586 }
587