COMBINATORIAL_BLAS  1.3
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
DenseParVec.cpp
Go to the documentation of this file.
1 /****************************************************************/
2 /* Parallel Combinatorial BLAS Library (for Graph Computations) */
3 /* version 1.2 -------------------------------------------------*/
4 /* date: 10/06/2011 --------------------------------------------*/
5 /* authors: Aydin Buluc (abuluc@lbl.gov), Adam Lugowski --------*/
6 /****************************************************************/
7 /*
8 Copyright (c) 2011, Aydin Buluc
9 
10 WARNING: This file is deprecated with Combinatorial v1.2
11 Please consider using the FullyDistVec object
12 
13 Permission is hereby granted, free of charge, to any person obtaining a copy
14 of this software and associated documentation files (the "Software"), to deal
15 in the Software without restriction, including without limitation the rights
16 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
17 copies of the Software, and to permit persons to whom the Software is
18 furnished to do so, subject to the following conditions:
19 
20 The above copyright notice and this permission notice shall be included in
21 all copies or substantial portions of the Software.
22 
23 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
24 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
25 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
26 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
27 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
28 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
29 THE SOFTWARE.
30 */
31 
32 #include "DenseParVec.h"
33 #include "SpParVec.h"
34 #include "Operations.h"
35 
36 template <class IT, class NT>
38 {
39  zero = static_cast<NT>(0);
40  commGrid.reset(new CommGrid(MPI_COMM_WORLD, 0, 0));
41 
42  if(commGrid->GetRankInProcRow() == commGrid->GetRankInProcCol())
43  diagonal = true;
44  else
45  diagonal = false;
46 }
47 
48 // Create a new distributed dense array with all values initialized to zero
49 template<class IT, class NT>
51 {
52  zero = static_cast<NT>(0);
53  commGrid.reset(new CommGrid(MPI_COMM_WORLD, 0, 0));
54  if(commGrid->GetRankInProcRow() == commGrid->GetRankInProcCol())
55  diagonal = true;
56  else
57  diagonal = false;
58 
59  if (diagonal)
60  {
61  int nprocs = commGrid->GetDiagSize();
62  int ndrank = commGrid->GetDiagRank();
63 
64  IT typical = globallength/nprocs;
65  if(ndrank == nprocs - 1)
66  arr.resize(globallength - ndrank*typical, zero);
67  else
68  arr.resize(typical, zero);
69  }
70 }
71 
72 template <class IT, class NT>
73 DenseParVec<IT, NT>::DenseParVec (IT locallength, NT initval, NT id): zero(id)
74 {
75  commGrid.reset(new CommGrid(MPI_COMM_WORLD, 0, 0));
76 
77  if(commGrid->GetRankInProcRow() == commGrid->GetRankInProcCol())
78  diagonal = true;
79  else
80  diagonal = false;
81 
82  if (diagonal)
83  arr.resize(locallength, initval);
84 }
85 
86 template <class IT, class NT>
87 DenseParVec<IT, NT>::DenseParVec ( shared_ptr<CommGrid> grid, NT id): zero(id)
88 {
89  commGrid.reset(new CommGrid(*grid));
90  if(commGrid->GetRankInProcRow() == commGrid->GetRankInProcCol())
91  diagonal = true;
92  else
93  diagonal = false;
94 };
95 
96 template <class IT, class NT>
97 DenseParVec<IT, NT>::DenseParVec ( shared_ptr<CommGrid> grid, IT locallength, NT initval, NT id): commGrid(grid), zero(id)
98 {
99  if(commGrid->GetRankInProcRow() == commGrid->GetRankInProcCol())
100  diagonal = true;
101  else
102  diagonal = false;
103 
104  if (diagonal)
105  arr.resize(locallength, initval);
106 };
107 
108 template <class IT, class NT>
109 template <typename _BinaryOperation>
110 NT DenseParVec<IT,NT>::Reduce(_BinaryOperation __binary_op, NT identity)
111 {
112  // std::accumulate returns identity for empty sequences
113  NT localsum = std::accumulate( arr.begin(), arr.end(), identity, __binary_op);
114 
115  NT totalsum = identity;
116  MPI_Allreduce( &localsum, &totalsum, 1, MPIType<NT>(), MPIOp<_BinaryOperation, NT>::op(), commGrid->GetWorld());
117  return totalsum;
118 }
119 
120 template <class IT, class NT>
121 DenseParVec< IT,NT > & DenseParVec<IT,NT>::operator=(const SpParVec< IT,NT > & rhs) // SpParVec->DenseParVec conversion operator
122 {
123  arr.resize(rhs.length);
124  std::fill(arr.begin(), arr.end(), zero);
125 
126  IT spvecsize = rhs.ind.size();
127  for(IT i=0; i< spvecsize; ++i)
128  {
129  arr[rhs.ind[i]] = rhs.num[i];
130  }
131 
132  return *this;
133 }
134 
135 template <class IT, class NT>
137 {
138  if (this == &rhs) // Same object?
139  return *this; // Yes, so skip assignment, and just return *this.
140  commGrid.reset(new CommGrid(*(rhs.commGrid)));
141  arr = rhs.arr;
142  diagonal = rhs.diagonal;
143  zero = rhs.zero;
144  return *this;
145 }
146 
147 template <class IT, class NT>
148 DenseParVec< IT,NT > & DenseParVec<IT,NT>::stealFrom(DenseParVec<IT,NT> & victim) // SpParVec->DenseParVec conversion operator
149 {
150  commGrid.reset(new CommGrid(*(victim.commGrid)));
151  arr.swap(victim.arr);
152  diagonal = victim.diagonal;
153  zero = victim.zero;
154 
155  return *this;
156 }
157 
158 template <class IT, class NT>
160 {
161  IT spvecsize = rhs.ind.size();
162  for(IT i=0; i< spvecsize; ++i)
163  {
164  if(arr[rhs.ind[i]] == zero) // not set before
165  arr[rhs.ind[i]] = rhs.num[i];
166  else
167  arr[rhs.ind[i]] += rhs.num[i];
168  }
169  return *this;
170 }
171 
172 template <class IT, class NT>
174 {
175  IT spvecsize = rhs.ind.size();
176  for(IT i=0; i< spvecsize; ++i)
177  {
178  arr[rhs.ind[i]] -= rhs.num[i];
179  }
180 }
181 
182 
187 template <class IT, class NT>
188 template <typename _BinaryOperation>
189 void DenseParVec<IT,NT>::EWise(const DenseParVec<IT,NT> & rhs, _BinaryOperation __binary_op)
190 {
191  if(zero == rhs.zero)
192  {
193  transform ( arr.begin(), arr.end(), rhs.arr.begin(), arr.begin(), __binary_op );
194  }
195  else
196  {
197  cout << "DenseParVec objects have different identity (zero) elements..." << endl;
198  cout << "Operation didn't happen !" << endl;
199  }
200 };
201 
202 
203 template <class IT, class NT>
205 {
206  if(this != &rhs)
207  {
208  if(!(*commGrid == *rhs.commGrid))
209  {
210  cout << "Grids are not comparable elementwise addition" << endl;
211  MPI_Abort(MPI_COMM_WORLD,GRIDMISMATCH);
212  }
213  else if(diagonal) // Only the diagonal processors hold values
214  {
215  EWise(rhs, std::plus<NT>());
216  }
217  }
218  return *this;
219 };
220 
221 template <class IT, class NT>
223 {
224  if(this != &rhs)
225  {
226  if(!(*commGrid == *rhs.commGrid))
227  {
228  cout << "Grids are not comparable elementwise addition" << endl;
229  MPI_Abort(MPI_COMM_WORLD,GRIDMISMATCH);
230  }
231  else if(diagonal) // Only the diagonal processors hold values
232  {
233  EWise(rhs, std::minus<NT>());
234  }
235  }
236  return *this;
237 };
238 
239 template <class IT, class NT>
241 {
242  ErrorTolerantEqual<NT> epsilonequal(EPSILON);
243  int local = 1;
244  if(diagonal)
245  {
246  local = (int) std::equal(arr.begin(), arr.end(), rhs.arr.begin(), epsilonequal );
247 #ifdef DEBUG
248  vector<NT> diff(arr.size());
249  transform(arr.begin(), arr.end(), rhs.arr.begin(), diff.begin(), minus<NT>());
250  typename vector<NT>::iterator maxitr;
251  maxitr = max_element(diff.begin(), diff.end());
252  cout << maxitr-diff.begin() << ": " << *maxitr << " where lhs: " << *(arr.begin()+(maxitr-diff.begin()))
253  << " and rhs: " << *(rhs.arr.begin()+(maxitr-diff.begin())) << endl;
254 #endif
255  }
256  int whole = 1;
257  MPI_Allreduce( &local, &whole, 1, MPI_INT, MPI_BAND, commGrid->GetWorld());
258  return static_cast<bool>(whole);
259 }
260 
261 template <class IT, class NT>
262 template <typename _Predicate>
263 IT DenseParVec<IT,NT>::Count(_Predicate pred) const
264 {
265  IT local = 0;
266  if(diagonal)
267  {
268  local = count_if( arr.begin(), arr.end(), pred );
269  }
270  IT whole = 0;
271  MPI_Allreduce( &local, &whole, 1, MPIType<IT>(), MPI_SUM, commGrid->GetWorld());
272  return whole;
273 }
274 
275 
278 template <class IT, class NT>
279 template <typename _Predicate>
281 {
282  DenseParVec<IT,IT> found(commGrid, (IT) 0);
283  MPI_Comm DiagWorld = commGrid->GetDiagWorld();
284  if(DiagWorld != MPI_COMM_NULL) // Diagonal processors only
285  {
286  int dgrank, nprocs;
287  MPI_Comm_rank(DiagWorld, &dgrank);
288  MPI_Comm_size(DiagWorld, &nprocs);
289  IT old_n_perproc = getTypicalLocLength();
290 
291  IT size = arr.size();
292  for(IT i=0; i<size; ++i)
293  {
294  if(pred(arr[i]))
295  {
296  found.arr.push_back(i+old_n_perproc*dgrank);
297  }
298  }
299  MPI_Barrier(DiagWorld);
300 
301  // Since the found vector is not reshuffled yet, we can't use getTypicalLocLength() at this point
302  IT n_perproc = found.getTotalLength(DiagWorld) / nprocs;
303  if(n_perproc == 0) // it has less than sqrt(p) elements, all owned by the last processor
304  {
305  if(dgrank != nprocs-1)
306  {
307  int arrsize = found.arr.size();
308  MPI_Gather(&arrsize, 1, MPI_INT, NULL, 1, MPI_INT, nprocs-1, DiagWorld);
309  MPI_Gatherv(&(found.arr[0]), arrsize, MPIType<IT>(), NULL, NULL, NULL, MPIType<IT>(), nprocs-1, DiagWorld);
310  }
311  else
312  {
313  int * allnnzs = new int[nprocs];
314  allnnzs[dgrank] = found.arr.size();
315  MPI_Gather(MPI_IN_PLACE, 1, MPI_INT, allnnzs, 1, MPI_INT, nprocs-1, DiagWorld);
316 
317  int * rdispls = new int[nprocs];
318  rdispls[0] = 0;
319  for(int i=0; i<nprocs-1; ++i)
320  rdispls[i+1] = rdispls[i] + allnnzs[i];
321 
322  IT totrecv = accumulate(allnnzs, allnnzs+nprocs, 0);
323  vector<IT> recvbuf(totrecv);
324  MPI_Gatherv(MPI_IN_PLACE, 1, MPI_INT, &(recvbuf[0]), allnnzs, rdispls, MPIType<IT>(), nprocs-1, DiagWorld);
325 
326  found.arr.swap(recvbuf);
327  DeleteAll(allnnzs, rdispls);
328  }
329  return found; // don't execute further
330  }
331  IT lengthuntil = dgrank * n_perproc;
332 
333  // rebalance/redistribute
334  IT nsize = found.arr.size();
335  int * sendcnt = new int[nprocs];
336  fill(sendcnt, sendcnt+nprocs, 0);
337  for(IT i=0; i<nsize; ++i)
338  {
339  // owner id's are monotonically increasing and continuous
340  int owner = std::min(static_cast<int>( (i+lengthuntil) / n_perproc), nprocs-1);
341  sendcnt[owner]++;
342  }
343 
344  int * recvcnt = new int[nprocs];
345  MPI_Alltoall(sendcnt, 1, MPI_INT, recvcnt, 1, MPI_INT, DiagWorld); // share the counts
346 
347  int * sdispls = new int[nprocs];
348  int * rdispls = new int[nprocs];
349  sdispls[0] = 0;
350  rdispls[0] = 0;
351  for(int i=0; i<nprocs-1; ++i)
352  {
353  sdispls[i+1] = sdispls[i] + sendcnt[i];
354  rdispls[i+1] = rdispls[i] + recvcnt[i];
355  }
356 
357  IT totrecv = accumulate(recvcnt,recvcnt+nprocs, (IT) 0);
358  vector<IT> recvbuf(totrecv);
359 
360  // data is already in the right order in found.arr
361  MPI_Alltoallv(&(found.arr[0]), sendcnt, sdispls, MPIType<IT>(), &(recvbuf[0]), recvcnt, rdispls, MPIType<IT>(), DiagWorld);
362  found.arr.swap(recvbuf);
363  DeleteAll(sendcnt, recvcnt, sdispls, rdispls);
364  }
365  return found;
366 }
367 
368 
369 
372 template <class IT, class NT>
373 template <typename _Predicate>
375 {
376  SpParVec<IT,NT> found(commGrid);
377  if(diagonal)
378  {
379  IT size = arr.size();
380  for(IT i=0; i<size; ++i)
381  {
382  if(pred(arr[i]))
383  {
384  found.ind.push_back(i);
385  found.num.push_back(arr[i]);
386  }
387  }
388  found.length = size;
389  }
390  return found;
391 }
392 
393 template <class IT, class NT>
394 ifstream& DenseParVec<IT,NT>::ReadDistribute (ifstream& infile, int master)
395 {
396  SpParVec<IT,NT> tmpSpVec(commGrid);
397  tmpSpVec.ReadDistribute(infile, master);
398 
399  *this = tmpSpVec;
400  return infile;
401 }
402 
403 template <class IT, class NT>
404 void DenseParVec<IT,NT>::SetElement (IT indx, NT numx)
405 {
406  MPI_Comm DiagWorld = commGrid->GetDiagWorld();
407  if(DiagWorld != MPI_COMM_NULL) // Diagonal processors only
408  {
409  int dgrank, nprocs;
410  MPI_Comm_rank(DiagWorld, &dgrank);
411  MPI_Comm_size(DiagWorld, &nprocs);
412  IT n_perproc = getTypicalLocLength();
413  IT offset = dgrank * n_perproc;
414 
415  if (n_perproc == 0) {
416  cout << "DenseParVec::SetElement can't be called on an empty vector." << endl;
417  return;
418  }
419  IT owner = std::min(static_cast<int>(indx / n_perproc), nprocs-1);
420  if(owner == dgrank) // this process is the owner
421  {
422  IT locindx = indx-offset;
423 
424  if (locindx > arr.size()-1)
425  {
426  cout << "DenseParVec::SetElement cannot expand array" << endl;
427  }
428  else if (locindx < 0)
429  {
430  cout << "DenseParVec::SetElement local index < 0" << endl;
431  }
432  else
433  {
434  arr[locindx] = numx;
435  }
436  }
437  }
438 }
439 
440 template <class IT, class NT>
442 {
443  NT ret;
444  int owner = 0;
445  MPI_Comm DiagWorld = commGrid->GetDiagWorld();
446  if(DiagWorld != MPI_COMM_NULL) // Diagonal processors only
447  {
448  int dgrank, nprocs;
449  MPI_Comm_rank(DiagWorld, &dgrank);
450  MPI_Comm_size(DiagWorld, &nprocs);
451  IT n_perproc = getTypicalLocLength();
452  IT offset = dgrank * n_perproc;
453 
454  if (n_perproc == 0 && dgrank == 0) {
455  cout << "DenseParVec::GetElement can't be called on an empty vector." << endl;
456  return numeric_limits<NT>::min();
457  }
458  owner = std::min(static_cast<int>(indx / n_perproc), nprocs-1);
459  if(owner == dgrank) // this process is the owner
460  {
461  IT locindx = indx-offset;
462  if (locindx > arr.size()-1)
463  {
464  cout << "DenseParVec::GetElement cannot expand array" << endl;
465  }
466  else if (locindx < 0)
467  {
468  cout << "DenseParVec::GetElement local index < 0" << endl;
469  }
470  else
471  {
472  ret = arr[locindx];
473  }
474  }
475  }
476  int worldowner = commGrid->GetRank(owner); // 0 is always on the diagonal
477  MPI_Bcast(&worldowner, 1, MPIType<int>(), 0, commGrid->GetWorld());
478  MPI_Bcast(&ret, 1, MPIType<NT>(), worldowner, commGrid->GetWorld());
479  return ret;
480 }
481 
482 template <class IT, class NT>
484 {
485  // ABAB: Alternative
486  // ofstream out;
487  // commGrid->OpenDebugFile("DenseParVec", out);
488  // copy(recvbuf, recvbuf+totrecv, ostream_iterator<IT>(out, " "));
489  // out << " <end_of_vector>"<< endl;
490 
491  MPI_Comm DiagWorld = commGrid->GetDiagWorld();
492  if(DiagWorld != MPI_COMM_NULL) // Diagonal processors only
493  {
494  int dgrank, nprocs;
495  MPI_Comm_rank(DiagWorld, &dgrank);
496  MPI_Comm_size(DiagWorld, &nprocs);
497 
498  int64_t* all_nnzs = new int64_t[nprocs];
499 
500  all_nnzs[dgrank] = arr.size();
501  MPI_Allgather(MPI_IN_PLACE, 1, MPIType<int64_t>(), all_nnzs, 1, MPIType<int64_t>(), DiagWorld);
502  int64_t offset = 0;
503 
504  for (int i = 0; i < nprocs; i++)
505  {
506  if (i == dgrank)
507  {
508  cerr << arr.size() << " elements stored on proc " << dgrank << "," << dgrank << ":" ;
509 
510  for (int j = 0; j < arr.size(); j++)
511  {
512  cerr << "\n[" << (j+offset) << "] = " << arr[j] ;
513  }
514  cerr << endl;
515  }
516  offset += all_nnzs[i];
517  MPI_Barrier(DiagWorld);
518  }
519  MPI_Barrier(DiagWorld);
520  if (dgrank == 0)
521  cerr << "total size: " << offset << endl;
522  MPI_Barrier(DiagWorld);
523  }
524 }
525 
526 template <class IT, class NT>
527 template <typename _UnaryOperation>
528 void DenseParVec<IT,NT>::Apply(_UnaryOperation __unary_op, const SpParVec<IT,NT> & mask)
529 {
530  typename vector< IT >::const_iterator miter = mask.ind.begin();
531  while (miter < mask.ind.end())
532  {
533  IT index = *miter++;
534  arr[index] = __unary_op(arr[index]);
535  }
536 }
537 
538 // Randomly permutes an already existing vector
539 template <class IT, class NT>
541 {
542  MPI_Comm DiagWorld = commGrid->GetDiagWorld();
543  if(DiagWorld != MPI_COMM_NULL) // Diagonal processors only
544  {
545  IT size = arr.size();
546  pair<double,IT> * vecpair = new pair<double,IT>[size];
547 
548  int dgrank, nprocs;
549  MPI_Comm_rank(DiagWorld, &dgrank);
550  MPI_Comm_size(DiagWorld, &nprocs);
551 
552  long * dist = new long[nprocs];
553  dist[dgrank] = size;
554  MPI_Allgather(MPI_IN_PLACE, 1, MPIType<long>(), dist, 1, MPIType<long>(), DiagWorld);
555  IT lengthuntil = accumulate(dist, dist+dgrank, 0);
556 
557  MTRand M; // generate random numbers with Mersenne Twister
558  for(int i=0; i<size; ++i)
559  {
560  vecpair[i].first = M.rand();
561  vecpair[i].second = arr[i];
562  }
563 
564  // less< pair<T1,T2> > works correctly (sorts wrt first elements)
565  vpsort::parallel_sort (vecpair, vecpair + size, dist, DiagWorld);
566 
567  vector< NT > nnum(size);
568  for(int i=0; i<size; ++i)
569  nnum[i] = vecpair[i].second;
570 
571  delete [] vecpair;
572  delete [] dist;
573 
574  arr.swap(nnum);
575  }
576 }
577 
578 template <class IT, class NT>
579 void DenseParVec<IT,NT>::iota(IT size, NT first)
580 {
581  MPI_Comm DiagWorld = commGrid->GetDiagWorld();
582  if(DiagWorld != MPI_COMM_NULL) // Diagonal processors only
583  {
584  int dgrank, nprocs;
585  MPI_Comm_rank(DiagWorld, &dgrank);
586  MPI_Comm_size(DiagWorld, &nprocs);
587  IT n_perproc = size / nprocs;
588 
589  IT length = (dgrank != nprocs-1) ? n_perproc: (size - (n_perproc * (nprocs-1)));
590  arr.resize(length);
591  SpHelper::iota(arr.begin(), arr.end(), (dgrank * n_perproc) + first); // global across processors
592  }
593 }
594 
595 template <class IT, class NT>
597 {
598  if(!(*commGrid == *ri.commGrid))
599  {
600  cout << "Grids are not comparable for dense vector subsref" << endl;
601  return DenseParVec<IT,NT>();
602  }
603 
604  MPI_Comm DiagWorld = commGrid->GetDiagWorld();
605  DenseParVec<IT,NT> Indexed(commGrid, zero); // length(Indexed) = length(ri)
606  if(DiagWorld != MPI_COMM_NULL) // Diagonal processors only
607  {
608  int dgrank, nprocs;
609  MPI_Comm_rank(DiagWorld, &dgrank);
610  MPI_Comm_size(DiagWorld, &nprocs);
611  IT n_perproc = getTypicalLocLength();
612  vector< vector< IT > > data_req(nprocs);
613  vector< vector< IT > > revr_map(nprocs); // to put the incoming data to the correct location
614  for(IT i=0; i < ri.arr.size(); ++i)
615  {
616  int owner = ri.arr[i] / n_perproc; // numerical values in ri are 0-based
617  owner = std::min(owner, nprocs-1); // find its owner
618  data_req[owner].push_back(ri.arr[i] - (n_perproc * owner));
619  revr_map[owner].push_back(i);
620  }
621  IT * sendbuf = new IT[ri.arr.size()];
622  int * sendcnt = new int[nprocs];
623  int * sdispls = new int[nprocs];
624  for(int i=0; i<nprocs; ++i)
625  sendcnt[i] = data_req[i].size();
626 
627  int * rdispls = new int[nprocs];
628  int * recvcnt = new int[nprocs];
629  MPI_Alltoall(sendcnt, 1, MPI_INT, recvcnt, 1, MPI_INT, DiagWorld); // share the request count
630 
631  sdispls[0] = 0;
632  rdispls[0] = 0;
633  for(int i=0; i<nprocs-1; ++i)
634  {
635  sdispls[i+1] = sdispls[i] + sendcnt[i];
636  rdispls[i+1] = rdispls[i] + recvcnt[i];
637  }
638  IT totrecv = accumulate(recvcnt,recvcnt+nprocs,zero);
639  IT * recvbuf = new IT[totrecv];
640 
641  for(int i=0; i<nprocs; ++i)
642  {
643  copy(data_req[i].begin(), data_req[i].end(), sendbuf+sdispls[i]);
644  vector<IT>().swap(data_req[i]);
645  }
646 
647  IT * reversemap = new IT[ri.arr.size()];
648  for(int i=0; i<nprocs; ++i)
649  {
650  copy(revr_map[i].begin(), revr_map[i].end(), reversemap+sdispls[i]);
651  vector<IT>().swap(revr_map[i]);
652  }
653  MPI_Alltoallv(sendbuf, sendcnt, sdispls, MPIType<IT>(), recvbuf, recvcnt, rdispls, MPIType<IT>(), DiagWorld); // request data
654 
655  // We will return the requested data,
656  // our return will be as big as the request
657  // as we are indexing a dense vector, all elements exist
658  // so the displacement boundaries are the same as rdispls
659  NT * databack = new NT[totrecv];
660 
661  for(int i=0; i<nprocs; ++i)
662  {
663  for(int j = rdispls[i]; j < rdispls[i] + recvcnt[i]; ++j) // fetch the numerical values
664  {
665  databack[j] = arr[recvbuf[j]];
666  }
667  }
668 
669  delete [] recvbuf;
670  NT * databuf = new NT[ri.arr.size()];
671 
672  // the response counts are the same as the request counts
673  MPI_Alltoallv(databack, recvcnt, rdispls, MPIType<NT>(), databuf, sendcnt, sdispls, MPIType<NT>(), DiagWorld); // send data
674  DeleteAll(rdispls, recvcnt, databack);
675 
676  // Now create the output from databuf
677  Indexed.arr.resize(ri.arr.size());
678  for(int i=0; i<nprocs; ++i)
679  {
680  for(int j=sdispls[i]; j< sdispls[i]+sendcnt[i]; ++j)
681  {
682  Indexed.arr[reversemap[j]] = databuf[j];
683  }
684  }
685  DeleteAll(sdispls, sendcnt, databuf,reversemap);
686  }
687  return Indexed;
688 }
689 
690 template <class IT, class NT>
691 IT DenseParVec<IT,NT>::getTotalLength(MPI_Comm & comm) const
692 {
693  IT totnnz = 0;
694  if (comm != MPI_COMM_NULL)
695  {
696  IT locnnz = arr.size();
697  MPI_Allreduce( &locnnz, & totnnz, 1, MPIType<IT>(), MPI_SUM, comm);
698  }
699  return totnnz;
700 }
701 
702 template <class IT, class NT>
704 {
705  IT n_perproc = 0 ;
706  MPI_Comm DiagWorld = commGrid->GetDiagWorld();
707  if(DiagWorld != MPI_COMM_NULL) // Diagonal processors only
708  {
709  int dgrank, nprocs;
710  MPI_Comm_rank(DiagWorld, &dgrank);
711  MPI_Comm_size(DiagWorld, &nprocs);
712  n_perproc = arr.size();
713  if (dgrank == nprocs-1 && nprocs > 1)
714  {
715  // the local length on the last processor will be greater than the others if the vector length is not evenly divisible
716  // but for these calculations we need that length
717  MPI_Recv(&n_perproc, 1, MPIType<IT>(), 0, 1, DiagWorld, NULL);
718  }
719  else if (dgrank == 0 && nprocs > 1)
720  {
721  MPI_Send(&n_perproc, 1, MPIType<IT>(), nprocs-1, 1, DiagWorld);
722  }
723  }
724  return n_perproc;
725 }
726 
727 
728 
729 template <class IT, class NT>
730 void DenseParVec<IT,NT>::PrintInfo(string vectorname) const
731 {
732  IT totl = getTotalLength(commGrid->GetDiagWorld());
733  if (commGrid->GetRank() == 0) // is always on the diagonal
734  cout << "As a whole, " << vectorname << " has length " << totl << endl;
735 }