COMBINATORIAL_BLAS  1.3
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
FullyDistVec.cpp
Go to the documentation of this file.
1 /****************************************************************/
2 /* Parallel Combinatorial BLAS Library (for Graph Computations) */
3 /* version 1.2 -------------------------------------------------*/
4 /* date: 10/06/2011 --------------------------------------------*/
5 /* authors: Aydin Buluc (abuluc@lbl.gov), Adam Lugowski --------*/
6 /****************************************************************/
7 /*
8 Copyright (c) 2011, Aydin Buluc
9 
10 Permission is hereby granted, free of charge, to any person obtaining a copy
11 of this software and associated documentation files (the "Software"), to deal
12 in the Software without restriction, including without limitation the rights
13 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
14 copies of the Software, and to permit persons to whom the Software is
15 furnished to do so, subject to the following conditions:
16 
17 The above copyright notice and this permission notice shall be included in
18 all copies or substantial portions of the Software.
19 
20 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
23 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
25 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
26 THE SOFTWARE.
27 */
28 
29 #include "FullyDistVec.h"
30 #include "FullyDistSpVec.h"
31 #include "Operations.h"
32 
33 template <class IT, class NT>
35 : FullyDist<IT,NT,typename CombBLAS::disable_if< CombBLAS::is_boolean<NT>::value, NT >::type>()
36 {
37 }
38 
39 template <class IT, class NT>
40 FullyDistVec<IT, NT>::FullyDistVec (IT globallen, NT initval)
41 :FullyDist<IT,NT,typename CombBLAS::disable_if< CombBLAS::is_boolean<NT>::value, NT >::type>(globallen)
42 {
43  arr.resize(MyLocLength(), initval);
44 }
45 
46 template <class IT, class NT>
47 FullyDistVec<IT, NT>::FullyDistVec ( shared_ptr<CommGrid> grid)
48 : FullyDist<IT,NT,typename CombBLAS::disable_if< CombBLAS::is_boolean<NT>::value, NT >::type>(grid)
49 { }
50 
51 template <class IT, class NT>
52 FullyDistVec<IT, NT>::FullyDistVec ( shared_ptr<CommGrid> grid, IT globallen, NT initval)
53 : FullyDist<IT,NT,typename CombBLAS::disable_if< CombBLAS::is_boolean<NT>::value, NT >::type>(grid,globallen)
54 {
55  arr.resize(MyLocLength(), initval);
56 }
57 
58 template <class IT, class NT>
59 FullyDistVec<IT,NT>::FullyDistVec (const FullyDistSpVec<IT,NT> & rhs) // Conversion copy-constructor
60 {
61  *this = rhs;
62 }
63 
64 template <class IT, class NT>
65 template <class ITRHS, class NTRHS>
67 : FullyDist<IT,NT,typename CombBLAS::disable_if< CombBLAS::is_boolean<NT>::value, NT >::type>(rhs.commGrid, static_cast<IT>(rhs.glen))
68 {
69  arr.resize(static_cast<IT>(rhs.arr.size()), NT());
70 
71  for(IT i=0; (unsigned)i < arr.size(); ++i)
72  {
73  arr[i] = static_cast<NT>(rhs.arr[static_cast<ITRHS>(i)]);
74  }
75 }
76 
77 template <class IT, class NT>
78 template <typename _BinaryOperation>
79 NT FullyDistVec<IT,NT>::Reduce(_BinaryOperation __binary_op, NT identity)
80 {
81  // std::accumulate returns identity for empty sequences
82  NT localsum = std::accumulate( arr.begin(), arr.end(), identity, __binary_op);
83 
84  NT totalsum = identity;
85  MPI_Allreduce( &localsum, &totalsum, 1, MPIType<NT>(), MPIOp<_BinaryOperation, NT>::op(), commGrid->GetWorld());
86  return totalsum;
87 }
88 
89 template <class IT, class NT>
90 template <typename OUT, typename _BinaryOperation, typename _UnaryOperation>
91 OUT FullyDistVec<IT,NT>::Reduce(_BinaryOperation __binary_op, OUT default_val, _UnaryOperation __unary_op)
92 {
93  // std::accumulate returns identity for empty sequences
94  OUT localsum = default_val;
95 
96  if (arr.size() > 0)
97  {
98  typename vector< NT >::const_iterator iter = arr.begin();
99  //localsum = __unary_op(*iter);
100  //iter++;
101  while (iter < arr.end())
102  {
103  localsum = __binary_op(localsum, __unary_op(*iter));
104  iter++;
105  }
106  }
107 
108  OUT totalsum = default_val;
109  MPI_Allreduce( &localsum, &totalsum, 1, MPIType<OUT>(), MPIOp<_BinaryOperation, OUT>::op(), commGrid->GetWorld());
110  return totalsum;
111 }
112 
113 
115 template<class IT, class NT>
116 void FullyDistVec<IT,NT>::SelectCandidates(double nver, bool deterministic)
117 {
118  MTRand M; // generate random numbers with Mersenne Twister
119  if(deterministic)
120  M.seed(1);
121 
122  IT length = TotalLength();
123  vector<double> loccands(length);
124  vector<NT> loccandints(length);
125  MPI_Comm World = commGrid->GetWorld();
126  int myrank = commGrid->GetRank();
127  if(myrank == 0)
128  {
129  for(int i=0; i<length; ++i)
130  loccands[i] = M.rand();
131  transform(loccands.begin(), loccands.end(), loccands.begin(), bind2nd( multiplies<double>(), nver ));
132 
133  for(int i=0; i<length; ++i)
134  loccandints[i] = static_cast<NT>(loccands[i]);
135  }
136  MPI_Bcast(&(loccandints[0]), length, MPIType<NT>(),0, World);
137  for(IT i=0; i<length; ++i)
138  SetElement(i,loccandints[i]);
139 }
140 
141 template <class IT, class NT>
142 template <class ITRHS, class NTRHS>
144 {
145  if(static_cast<const void*>(this) != static_cast<const void*>(&rhs))
146  {
147  //FullyDist<IT,NT>::operator= (rhs); // to update glen and commGrid
148  glen = static_cast<IT>(rhs.glen);
149  commGrid = rhs.commGrid;
150 
151  arr.resize(rhs.arr.size(), NT());
152  for(IT i=0; (unsigned)i < arr.size(); ++i)
153  {
154  arr[i] = static_cast<NT>(rhs.arr[static_cast<ITRHS>(i)]);
155  }
156  }
157  return *this;
158 }
159 
160 template <class IT, class NT>
162 {
163  if(this != &rhs)
164  {
165  FullyDist<IT,NT,typename CombBLAS::disable_if< CombBLAS::is_boolean<NT>::value, NT >::type>::operator= (rhs); // to update glen and commGrid
166  arr = rhs.arr;
167  }
168  return *this;
169 }
170 
171 template <class IT, class NT>
172 FullyDistVec< IT,NT > & FullyDistVec<IT,NT>::operator=(const FullyDistSpVec< IT,NT > & rhs) // FullyDistSpVec->FullyDistVec conversion operator
173 {
174  FullyDist<IT,NT,typename CombBLAS::disable_if< CombBLAS::is_boolean<NT>::value, NT >::type>::operator= (rhs); // to update glen and commGrid
175  arr.resize(rhs.MyLocLength());
176  std::fill(arr.begin(), arr.end(), NT());
177 
178  IT spvecsize = rhs.getlocnnz();
179  for(IT i=0; i< spvecsize; ++i)
180  {
181  //if(rhs.ind[i] > arr.size())
182  // cout << "rhs.ind[i]: " << rhs.ind[i] << endl;
183  arr[rhs.ind[i]] = rhs.num[i];
184  }
185  return *this;
186 }
187 
188 template <class IT, class NT>
189 FullyDistVec< IT,NT > & FullyDistVec<IT,NT>::operator=(const DenseParVec< IT,NT > & rhs) // DenseParVec->FullyDistVec conversion operator
190 {
191  if(*commGrid != *rhs.commGrid)
192  {
193  cout << "Grids are not comparable elementwise addition" << endl;
194  MPI_Abort(MPI_COMM_WORLD, GRIDMISMATCH);
195  }
196  else
197  {
198  glen = rhs.getTotalLength();
199  arr.resize(MyLocLength()); // once glen is set, MyLocLength() works
200  fill(arr.begin(), arr.end(), NT());
201 
202  int * sendcnts = NULL;
203  int * dpls = NULL;
204  if(rhs.diagonal)
205  {
206  int proccols = commGrid->GetGridCols();
207  IT n_perproc = rhs.getLocalLength() / proccols;
208  sendcnts = new int[proccols];
209  fill(sendcnts, sendcnts+proccols-1, n_perproc);
210  sendcnts[proccols-1] = rhs.getLocalLength() - (n_perproc * (proccols-1));
211  dpls = new int[proccols](); // displacements (zero initialized pid)
212  partial_sum(sendcnts, sendcnts+proccols-1, dpls+1);
213  }
214 
215  int rowroot = commGrid->GetDiagOfProcRow();
216  MPI_Scatterv(&(rhs.arr[0]),sendcnts, dpls, MPIType<NT>(), &(arr[0]), arr.size(), MPIType<NT>(),rowroot, commGrid->GetRowWorld());
217  }
218  return *this;
219 }
220 
221 
222 // Let the compiler create an assignment operator and call base class'
223 // assignment operator automatically
224 
225 template <class IT, class NT>
227 {
228  FullyDist<IT,NT,typename CombBLAS::disable_if< CombBLAS::is_boolean<NT>::value, NT >::type>::operator= (victim); // to update glen and commGrid
229  arr.swap(victim.arr);
230  return *this;
231 }
232 
233 template <class IT, class NT>
235 {
236  IT spvecsize = rhs.getlocnnz();
237  #ifdef _OPENMP
238  #pragma omp parallel for
239  #endif
240  for(IT i=0; i< spvecsize; ++i)
241  {
242  if(arr[rhs.ind[i]] == NT()) // not set before
243  arr[rhs.ind[i]] = rhs.num[i];
244  else
245  arr[rhs.ind[i]] += rhs.num[i];
246  }
247  return *this;
248 }
249 
250 template <class IT, class NT>
252 {
253  IT spvecsize = rhs.getlocnnz();
254  #ifdef _OPENMP
255  #pragma omp parallel for
256  #endif
257  for(IT i=0; i< spvecsize; ++i)
258  {
259  arr[rhs.ind[i]] = rhs.num[i];
260  }
261 }
262 
263 
264 template <class IT, class NT>
266 {
267  IT spvecsize = rhs.getlocnnz();
268  for(IT i=0; i< spvecsize; ++i)
269  {
270  arr[rhs.ind[i]] -= rhs.num[i];
271  }
272  return *this;
273 }
274 
275 
280 template <class IT, class NT>
281 template <typename _BinaryOperation>
282 void FullyDistVec<IT,NT>::EWise(const FullyDistVec<IT,NT> & rhs, _BinaryOperation __binary_op)
283 {
284  transform ( arr.begin(), arr.end(), rhs.arr.begin(), arr.begin(), __binary_op );
285 };
286 
287 
288 template <class IT, class NT>
290 {
291  if(this != &rhs)
292  {
293  if(!(*commGrid == *rhs.commGrid))
294  {
295  cout << "Grids are not comparable elementwise addition" << endl;
296  MPI_Abort(MPI_COMM_WORLD, GRIDMISMATCH);
297  }
298  else
299  {
300  EWise(rhs, std::plus<NT>());
301  }
302  }
303  return *this;
304 };
305 
306 template <class IT, class NT>
308 {
309  if(this != &rhs)
310  {
311  if(!(*commGrid == *rhs.commGrid))
312  {
313  cout << "Grids are not comparable elementwise addition" << endl;
314  MPI_Abort(MPI_COMM_WORLD, GRIDMISMATCH);
315  }
316  else
317  {
318  EWise(rhs, std::minus<NT>());
319  }
320  }
321  return *this;
322 };
323 
324 template <class IT, class NT>
326 {
327  ErrorTolerantEqual<NT> epsilonequal(EPSILON);
328  int local = 1;
329  local = (int) std::equal(arr.begin(), arr.end(), rhs.arr.begin(), epsilonequal );
330  int whole = 1;
331  MPI_Allreduce( &local, &whole, 1, MPI_INT, MPI_BAND, commGrid->GetWorld());
332  return static_cast<bool>(whole);
333 }
334 
335 template <class IT, class NT>
336 template <typename _Predicate>
337 IT FullyDistVec<IT,NT>::Count(_Predicate pred) const
338 {
339  IT local = count_if( arr.begin(), arr.end(), pred );
340  IT whole = 0;
341  MPI_Allreduce( &local, &whole, 1, MPIType<IT>(), MPI_SUM, commGrid->GetWorld());
342  return whole;
343 }
344 
347 template <class IT, class NT>
348 template <typename _Predicate>
350 {
351  FullyDistVec<IT,IT> found(commGrid);
352  MPI_Comm World = commGrid->GetWorld();
353  int nprocs = commGrid->GetSize();
354  int rank = commGrid->GetRank();
355 
356  IT sizelocal = LocArrSize();
357  IT sizesofar = LengthUntil();
358  for(IT i=0; i<sizelocal; ++i)
359  {
360  if(pred(arr[i]))
361  {
362  found.arr.push_back(i+sizesofar);
363  }
364  }
365  IT * dist = new IT[nprocs];
366  IT nsize = found.arr.size();
367  dist[rank] = nsize;
368  MPI_Allgather(MPI_IN_PLACE, 1, MPIType<IT>(), dist, 1, MPIType<IT>(), World);
369  IT lengthuntil = accumulate(dist, dist+rank, static_cast<IT>(0));
370  found.glen = accumulate(dist, dist+nprocs, static_cast<IT>(0));
371 
372  // Although the found vector is not reshuffled yet, its glen and commGrid are set
373  // We can call the Owner/MyLocLength/LengthUntil functions (to infer future distribution)
374 
375  // rebalance/redistribute
376  int * sendcnt = new int[nprocs];
377  fill(sendcnt, sendcnt+nprocs, 0);
378  for(IT i=0; i<nsize; ++i)
379  {
380  IT locind;
381  int owner = found.Owner(i+lengthuntil, locind);
382  ++sendcnt[owner];
383  }
384  int * recvcnt = new int[nprocs];
385  MPI_Alltoall(sendcnt, 1, MPI_INT, recvcnt, 1, MPI_INT, World); // share the counts
386 
387  int * sdispls = new int[nprocs];
388  int * rdispls = new int[nprocs];
389  sdispls[0] = 0;
390  rdispls[0] = 0;
391  for(int i=0; i<nprocs-1; ++i)
392  {
393  sdispls[i+1] = sdispls[i] + sendcnt[i];
394  rdispls[i+1] = rdispls[i] + recvcnt[i];
395  }
396  IT totrecv = accumulate(recvcnt,recvcnt+nprocs, static_cast<IT>(0));
397  vector<IT> recvbuf(totrecv);
398 
399  // data is already in the right order in found.arr
400  MPI_Alltoallv(&(found.arr[0]), sendcnt, sdispls, MPIType<IT>(), &(recvbuf[0]), recvcnt, rdispls, MPIType<IT>(), World);
401  found.arr.swap(recvbuf);
402  delete [] dist;
403  DeleteAll(sendcnt, recvcnt, sdispls, rdispls);
404 
405  return found;
406 }
407 
408 
411 template <class IT, class NT>
412 template <typename _Predicate>
414 {
415  FullyDistSpVec<IT,NT> found(commGrid);
416  size_t size = arr.size();
417  for(size_t i=0; i<size; ++i)
418  {
419  if(pred(arr[i]))
420  {
421  found.ind.push_back( (IT) i);
422  found.num.push_back(arr[i]);
423  }
424  }
425  found.glen = glen;
426  return found;
427 }
428 
429 template <class IT, class NT>
430 template <class HANDLER>
431 ifstream& FullyDistVec<IT,NT>::ReadDistribute (ifstream& infile, int master, HANDLER handler)
432 {
433  FullyDistSpVec<IT,NT> tmpSpVec(commGrid);
434  tmpSpVec.ReadDistribute(infile, master, handler);
435 
436  *this = tmpSpVec;
437  return infile;
438 }
439 
440 template <class IT, class NT>
441 template <class HANDLER>
442 void FullyDistVec<IT,NT>::SaveGathered(ofstream& outfile, int master, HANDLER handler, bool printProcSplits)
443 {
444  FullyDistSpVec<IT,NT> tmpSpVec = *this;
445  tmpSpVec.SaveGathered(outfile, master, handler, printProcSplits);
446 }
447 
448 template <class IT, class NT>
449 void FullyDistVec<IT,NT>::SetElement (IT indx, NT numx)
450 {
451  int rank = commGrid->GetRank();
452  if (glen == 0)
453  {
454  if(rank == 0)
455  cout << "FullyDistVec::SetElement can't be called on an empty vector." << endl;
456  return;
457  }
458  IT locind;
459  int owner = Owner(indx, locind);
460  if(commGrid->GetRank() == owner)
461  {
462  if (locind > (LocArrSize() -1))
463  {
464  cout << "FullyDistVec::SetElement cannot expand array" << endl;
465  }
466  else if (locind < 0)
467  {
468  cout << "FullyDistVec::SetElement local index < 0" << endl;
469  }
470  else
471  {
472  arr[locind] = numx;
473  }
474  }
475 }
476 
477 template <class IT, class NT>
479 {
480  NT ret;
481  MPI_Comm World = commGrid->GetWorld();
482  int rank = commGrid->GetRank();
483  if (glen == 0)
484  {
485  if(rank == 0)
486  cout << "FullyDistVec::GetElement can't be called on an empty vector." << endl;
487 
488  return NT();
489  }
490  IT locind;
491  int owner = Owner(indx, locind);
492  if(commGrid->GetRank() == owner)
493  {
494  if (locind > (LocArrSize() -1))
495  {
496  cout << "FullyDistVec::GetElement local index > size" << endl;
497  ret = NT();
498 
499  }
500  else if (locind < 0)
501  {
502  cout << "FullyDistVec::GetElement local index < 0" << endl;
503  ret = NT();
504  }
505  else
506  {
507  ret = arr[locind];
508  }
509  }
510  MPI_Bcast(&ret, 1, MPIType<NT>(), owner, World);
511  return ret;
512 }
513 
514 // Write to file using MPI-2
515 template <class IT, class NT>
517 {
518  int nprocs, rank;
519  MPI_Comm World = commGrid->GetWorld();
520  MPI_Comm_rank(World, &rank);
521  MPI_Comm_size(World, &nprocs);
522  MPI_File thefile;
523  MPI_File_open(World, "temp_fullydistvec", MPI_MODE_CREATE | MPI_MODE_WRONLY, MPI_INFO_NULL, &thefile);
524  IT lengthuntil = LengthUntil();
525 
526  // The disp displacement argument specifies the position
527  // (absolute offset in bytes from the beginning of the file)
528  MPI_File_set_view(thefile, int64_t(lengthuntil * sizeof(NT)), MPIType<NT>(), MPIType<NT>(), "native", MPI_INFO_NULL);
529 
530  IT count = LocArrSize();
531  MPI_File_write(thefile, &(arr[0]), count, MPIType<NT>(), NULL);
532  MPI_File_close(&thefile);
533 
534  // Now let processor-0 read the file and print
535  IT * counts = new IT[nprocs];
536  MPI_Gather(&count, 1, MPIType<IT>(), counts, 1, MPIType<IT>(), 0, World); // gather at root=0
537  if(rank == 0)
538  {
539  FILE * f = fopen("temp_fullydistvec", "r");
540  if(!f)
541  {
542  cerr << "Problem reading binary input file\n";
543  return;
544  }
545  IT maxd = *max_element(counts, counts+nprocs);
546  NT * data = new NT[maxd];
547 
548  for(int i=0; i<nprocs; ++i)
549  {
550  // read counts[i] integers and print them
551  size_t result = fread(data, sizeof(NT), counts[i],f);
552  if (result != (unsigned)counts[i]) { cout << "Error in fread, only " << result << " entries read" << endl; }
553 
554  cout << "Elements stored on proc " << i << ": {" ;
555  for (int j = 0; j < counts[i]; j++)
556  {
557  cout << data[j] << ",";
558  }
559  cout << "}" << endl;
560  }
561  delete [] data;
562  delete [] counts;
563  }
564 }
565 
566 template <class IT, class NT>
567 template <typename _UnaryOperation, typename IRRELEVANT_NT>
568 void FullyDistVec<IT,NT>::Apply(_UnaryOperation __unary_op, const FullyDistSpVec<IT,IRRELEVANT_NT> & mask)
569 {
570  typename vector< IT >::const_iterator miter = mask.ind.begin();
571  while (miter < mask.ind.end())
572  {
573  IT index = *miter++;
574  arr[index] = __unary_op(arr[index]);
575  }
576 }
577 
578 template <class IT, class NT>
579 template <typename _BinaryOperation, typename _BinaryPredicate, class NT2>
580 void FullyDistVec<IT,NT>::EWiseApply(const FullyDistVec<IT,NT2> & other, _BinaryOperation __binary_op, _BinaryPredicate _do_op, const bool useExtendedBinOp)
581 {
582  if(*(commGrid) == *(other.commGrid))
583  {
584  if(glen != other.glen)
585  {
586  ostringstream outs;
587  outs << "Vector dimensions don't match (" << glen << " vs " << other.glen << ") for FullyDistVec::EWiseApply\n";
588  SpParHelper::Print(outs.str());
589  MPI_Abort(MPI_COMM_WORLD, GRIDMISMATCH);
590  }
591  else
592  {
593  typename vector< NT >::iterator thisIter = arr.begin();
594  typename vector< NT2 >::const_iterator otherIter = other.arr.begin();
595  while (thisIter < arr.end())
596  {
597  if (_do_op(*thisIter, *otherIter, false, false))
598  *thisIter = __binary_op(*thisIter, *otherIter, false, false);
599  thisIter++;
600  otherIter++;
601  }
602  }
603  }
604  else
605  {
606  ostringstream outs;
607  outs << "Grids are not comparable for FullyDistVec<IT,NT>::EWiseApply" << endl;
608  SpParHelper::Print(outs.str());
609  MPI_Abort(MPI_COMM_WORLD, GRIDMISMATCH);
610  }
611 }
612 
613 template <class IT, class NT>
614 template <typename _BinaryOperation, typename _BinaryPredicate, class NT2>
615 void FullyDistVec<IT,NT>::EWiseApply(const FullyDistSpVec<IT,NT2> & other, _BinaryOperation __binary_op, _BinaryPredicate _do_op, bool applyNulls, NT2 nullValue, const bool useExtendedBinOp)
616 {
617  if(*(commGrid) == *(other.commGrid))
618  {
619  if(glen != other.glen)
620  {
621  cerr << "Vector dimensions don't match (" << glen << " vs " << other.glen << ") for FullyDistVec::EWiseApply\n";
622  MPI_Abort(MPI_COMM_WORLD, GRIDMISMATCH);
623  }
624  else
625  {
626  typename vector< IT >::const_iterator otherInd = other.ind.begin();
627  typename vector< NT2 >::const_iterator otherNum = other.num.begin();
628 
629  if (applyNulls) // scan the entire dense vector and apply sparse elements as they appear
630  {
631  for(IT i=0; (unsigned)i < arr.size(); ++i)
632  {
633  if (otherInd == other.ind.end() || i < *otherInd)
634  {
635  if (_do_op(arr[i], nullValue, false, true))
636  arr[i] = __binary_op(arr[i], nullValue, false, true);
637  }
638  else
639  {
640  if (_do_op(arr[i], *otherNum, false, false))
641  arr[i] = __binary_op(arr[i], *otherNum, false, false);
642  otherInd++;
643  otherNum++;
644  }
645  }
646  }
647  else // scan the sparse vector only
648  {
649  while (otherInd < other.ind.end())
650  {
651  if (_do_op(arr[*otherInd], *otherNum, false, false))
652  arr[*otherInd] = __binary_op(arr[*otherInd], *otherNum, false, false);
653  otherInd++;
654  otherNum++;
655  }
656  }
657  }
658  }
659  else
660  {
661  cout << "Grids are not comparable elementwise apply" << endl;
662  MPI_Abort(MPI_COMM_WORLD, GRIDMISMATCH);
663  }
664 }
665 
666 
667 template <class IT, class NT>
669 {
670  MPI_Comm World = commGrid->GetWorld();
671  FullyDistVec<IT,IT> temp(commGrid);
672  IT nnz = LocArrSize();
673  pair<NT,IT> * vecpair = new pair<NT,IT>[nnz];
674  int nprocs = commGrid->GetSize();
675  int rank = commGrid->GetRank();
676 
677  IT * dist = new IT[nprocs];
678  dist[rank] = nnz;
679  MPI_Allgather(MPI_IN_PLACE, 1, MPIType<IT>(), dist, 1, MPIType<IT>(), World);
680  IT sizeuntil = LengthUntil(); // size = length, for dense vectors
681  for(IT i=0; i< nnz; ++i)
682  {
683  vecpair[i].first = arr[i]; // we'll sort wrt numerical values
684  vecpair[i].second = i + sizeuntil;
685  }
686  SpParHelper::MemoryEfficientPSort(vecpair, nnz, dist, World);
687 
688  vector< IT > narr(nnz);
689  for(IT i=0; i< nnz; ++i)
690  {
691  arr[i] = vecpair[i].first; // sorted range (change the object itself)
692  narr[i] = vecpair[i].second; // inverse permutation stored as numerical values
693  }
694  delete [] vecpair;
695  delete [] dist;
696 
697  temp.glen = glen;
698  temp.arr = narr;
699  return temp;
700 }
701 
702 
703 // Randomly permutes an already existing vector
704 template <class IT, class NT>
706 {
707  MPI_Comm World = commGrid->GetWorld();
708  IT size = LocArrSize();
709  pair<double,NT> * vecpair = new pair<double,NT>[size];
710  int nprocs = commGrid->GetSize();
711  int rank = commGrid->GetRank();
712  IT * dist = new IT[nprocs];
713  dist[rank] = size;
714  MPI_Allgather(MPI_IN_PLACE, 1, MPIType<IT>(), dist, 1, MPIType<IT>(), World);
715 #ifdef DETERMINISTIC
716  MTRand M(1);
717 #else
718  MTRand M; // generate random numbers with Mersenne Twister
719 #endif
720  for(int i=0; i<size; ++i)
721  {
722  vecpair[i].first = M.rand();
723  vecpair[i].second = arr[i];
724  }
725  // less< pair<T1,T2> > works correctly (sorts wrt first elements)
726  SpParHelper::MemoryEfficientPSort(vecpair, size, dist, World);
727  vector< NT > nnum(size);
728  for(int i=0; i<size; ++i)
729  nnum[i] = vecpair[i].second;
730 
731  delete [] vecpair;
732  delete [] dist;
733 
734  arr.swap(nnum);
735 }
736 
737 // ABAB: In its current form, unless LengthUntil returns NT
738 // this won't work reliably for anything other than integers
739 template <class IT, class NT>
740 void FullyDistVec<IT,NT>::iota(IT globalsize, NT first)
741 {
742  glen = globalsize;
743  IT length = MyLocLength(); // only needs glen to determine length
744 
745  arr.resize(length);
746  SpHelper::iota(arr.begin(), arr.end(), LengthUntil() + first); // global across processors
747 }
748 
749 template <class IT, class NT>
751 {
752  if(!(*commGrid == *ri.commGrid))
753  {
754  cout << "Grids are not comparable for dense vector subsref" << endl;
755  return FullyDistVec<IT,NT>();
756  }
757 
758  MPI_Comm World = commGrid->GetWorld();
759  FullyDistVec<IT,NT> Indexed(commGrid, ri.glen, NT()); // length(Indexed) = length(ri)
760  int nprocs = commGrid->GetSize();
761  vector< vector< IT > > data_req(nprocs);
762  vector< vector< IT > > revr_map(nprocs); // to put the incoming data to the correct location
763 
764  IT riloclen = ri.LocArrSize();
765  for(IT i=0; i < riloclen; ++i)
766  {
767  IT locind;
768  int owner = Owner(ri.arr[i], locind); // numerical values in ri are 0-based
769  data_req[owner].push_back(locind);
770  revr_map[owner].push_back(i);
771  }
772  IT * sendbuf = new IT[riloclen];
773  int * sendcnt = new int[nprocs];
774  int * sdispls = new int[nprocs];
775  for(int i=0; i<nprocs; ++i)
776  sendcnt[i] = (int) data_req[i].size();
777 
778  int * rdispls = new int[nprocs];
779  int * recvcnt = new int[nprocs];
780  MPI_Alltoall(sendcnt, 1, MPI_INT, recvcnt, 1, MPI_INT, World); // share the request counts
781  sdispls[0] = 0;
782  rdispls[0] = 0;
783  for(int i=0; i<nprocs-1; ++i)
784  {
785  sdispls[i+1] = sdispls[i] + sendcnt[i];
786  rdispls[i+1] = rdispls[i] + recvcnt[i];
787  }
788  IT totrecv = accumulate(recvcnt,recvcnt+nprocs, static_cast<IT>(0));
789  for(int i=0; i<nprocs; ++i)
790  {
791  copy(data_req[i].begin(), data_req[i].end(), sendbuf+sdispls[i]);
792  vector<IT>().swap(data_req[i]);
793  }
794 
795  IT * reversemap = new IT[riloclen];
796  for(int i=0; i<nprocs; ++i)
797  {
798  copy(revr_map[i].begin(), revr_map[i].end(), reversemap+sdispls[i]); // reversemap array is unique
799  vector<IT>().swap(revr_map[i]);
800  }
801 
802  IT * recvbuf = new IT[totrecv];
803  MPI_Alltoallv(sendbuf, sendcnt, sdispls, MPIType<IT>(), recvbuf, recvcnt, rdispls, MPIType<IT>(), World); // request data
804  delete [] sendbuf;
805 
806  // We will return the requested data,
807  // our return will be as big as the request
808  // as we are indexing a dense vector, all elements exist
809  // so the displacement boundaries are the same as rdispls
810  NT * databack = new NT[totrecv];
811  for(int i=0; i<nprocs; ++i)
812  {
813  for(int j = rdispls[i]; j < rdispls[i] + recvcnt[i]; ++j) // fetch the numerical values
814  {
815  databack[j] = arr[recvbuf[j]];
816  }
817  }
818  delete [] recvbuf;
819  NT * databuf = new NT[riloclen];
820 
821  // the response counts are the same as the request counts
822  MPI_Alltoallv(databack, recvcnt, rdispls, MPIType<NT>(), databuf, sendcnt, sdispls, MPIType<NT>(), World); // send data
823  DeleteAll(rdispls, recvcnt, databack);
824 
825  // Now create the output from databuf
826  // Indexed.arr is already allocated in contructor
827  for(int i=0; i<nprocs; ++i)
828  {
829  for(int j=sdispls[i]; j< sdispls[i]+sendcnt[i]; ++j)
830  {
831  Indexed.arr[reversemap[j]] = databuf[j];
832  }
833  }
834  DeleteAll(sdispls, sendcnt, databuf,reversemap);
835  return Indexed;
836 }
837 
838 template <class IT, class NT>
839 void FullyDistVec<IT,NT>::PrintInfo(string vectorname) const
840 {
841  IT totl = TotalLength();
842  if (commGrid->GetRank() == 0)
843  cout << "As a whole, " << vectorname << " has length " << totl << endl;
844 }