COMBINATORIAL_BLAS  1.3
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
FullyDistSpVec.cpp
Go to the documentation of this file.
1 /****************************************************************/
2 /* Parallel Combinatorial BLAS Library (for Graph Computations) */
3 /* version 1.2 -------------------------------------------------*/
4 /* date: 10/06/2011 --------------------------------------------*/
5 /* authors: Aydin Buluc (abuluc@lbl.gov), Adam Lugowski --------*/
6 /****************************************************************/
7 /*
8 Copyright (c) 2011, Aydin Buluc
9 
10 Permission is hereby granted, free of charge, to any person obtaining a copy
11 of this software and associated documentation files (the "Software"), to deal
12 in the Software without restriction, including without limitation the rights
13 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
14 copies of the Software, and to permit persons to whom the Software is
15 furnished to do so, subject to the following conditions:
16 
17 The above copyright notice and this permission notice shall be included in
18 all copies or substantial portions of the Software.
19 
20 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
23 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
25 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
26 THE SOFTWARE.
27 */
28 
29 #include <limits>
30 #include "FullyDistSpVec.h"
31 #include "SpDefs.h"
32 #include "SpHelper.h"
33 using namespace std;
34 
35 template <class IT, class NT>
36 FullyDistSpVec<IT, NT>::FullyDistSpVec ( shared_ptr<CommGrid> grid)
37 : FullyDist<IT,NT,typename CombBLAS::disable_if< CombBLAS::is_boolean<NT>::value, NT >::type>(grid)
38 { };
39 
40 template <class IT, class NT>
41 FullyDistSpVec<IT, NT>::FullyDistSpVec ( shared_ptr<CommGrid> grid, IT globallen)
42 : FullyDist<IT,NT,typename CombBLAS::disable_if< CombBLAS::is_boolean<NT>::value, NT >::type>(grid,globallen)
43 { };
44 
45 template <class IT, class NT>
47 : FullyDist<IT,NT,typename CombBLAS::disable_if< CombBLAS::is_boolean<NT>::value, NT >::type>()
48 { };
49 
50 template <class IT, class NT>
52 : FullyDist<IT,NT,typename CombBLAS::disable_if< CombBLAS::is_boolean<NT>::value, NT >::type>(globallen)
53 { }
54 
55 
56 template <class IT, class NT>
58 {
59  if(this != &rhs)
60  {
61  FullyDist<IT,NT,typename CombBLAS::disable_if< CombBLAS::is_boolean<NT>::value, NT >::type>::operator= (rhs); // to update glen and commGrid
62  ind = rhs.ind;
63  num = rhs.num;
64  }
65  return *this;
66 }
67 
68 template <class IT, class NT>
69 FullyDistSpVec<IT,NT>::FullyDistSpVec (const FullyDistVec<IT,NT> & rhs) // Conversion copy-constructor
70 {
71  *this = rhs;
72 }
73 
74 template <class IT, class NT>
76 {
77  FullyDist<IT,NT,typename CombBLAS::disable_if< CombBLAS::is_boolean<NT>::value, NT >::type>::operator= (rhs); // to update glen and commGrid
78 
79  IT vecsize = rhs.LocArrSize();
80  for(IT i=0; i< vecsize; ++i)
81  {
82  // rhs.zero does not exist after CombBLAS 1.2
83  ind.push_back(i);
84  num.push_back(rhs.arr[i]);
85  }
86  return *this;
87 }
88 
89 template <class IT, class NT>
91 {
92  FullyDist<IT,NT,typename CombBLAS::disable_if< CombBLAS::is_boolean<NT>::value, NT >::type>::operator= (victim); // to update glen and commGrid
93  ind.swap(victim.ind);
94  num.swap(victim.num);
95 }
96 
97 template <class IT, class NT>
99 {
100  NT val;
101  IT locind;
102  int owner = Owner(indx, locind);
103  int found = 0;
104  if(commGrid->GetRank() == owner)
105  {
106  typename vector<IT>::const_iterator it = lower_bound(ind.begin(), ind.end(), locind); // ind is a sorted vector
107  if(it != ind.end() && locind == (*it)) // found
108  {
109  val = num[it-ind.begin()];
110  found = 1;
111  }
112  else
113  {
114  val = NT(); // return NULL
115  found = 0;
116  }
117  }
118  MPI_Bcast(&found, 1, MPI_INT, owner, commGrid->GetWorld());
119  MPI_Bcast(&val, 1, MPIType<NT>(), owner, commGrid->GetWorld());
120  wasFound = found;
121  return val;
122 }
123 
125 template <class IT, class NT>
126 void FullyDistSpVec<IT,NT>::SetElement (IT indx, NT numx)
127 {
128  if(glen == 0)
129  SpParHelper::Print("WARNING: SetElement() called on a vector with zero length\n");
130 
131  IT locind;
132  int owner = Owner(indx, locind);
133  if(commGrid->GetRank() == owner)
134  {
135  typename vector<IT>::iterator iter = lower_bound(ind.begin(), ind.end(), locind);
136  if(iter == ind.end()) // beyond limits, insert from back
137  {
138  ind.push_back(locind);
139  num.push_back(numx);
140  }
141  else if (locind < *iter) // not found, insert in the middle
142  {
143  // the order of insertions is crucial
144  // if we first insert to ind, then ind.begin() is invalidated !
145  num.insert(num.begin() + (iter-ind.begin()), numx);
146  ind.insert(iter, locind);
147  }
148  else // found
149  {
150  *(num.begin() + (iter-ind.begin())) = numx;
151  }
152  }
153 }
154 
155 template <class IT, class NT>
157 {
158  IT locind;
159  int owner = Owner(indx, locind);
160  if(commGrid->GetRank() == owner)
161  {
162  typename vector<IT>::iterator iter = lower_bound(ind.begin(), ind.end(), locind);
163  if(iter != ind.end() && !(locind < *iter))
164  {
165  num.erase(num.begin() + (iter-ind.begin()));
166  ind.erase(iter);
167  }
168  }
169 }
170 
179 template <class IT, class NT>
181 {
182  MPI_Comm World = commGrid->GetWorld();
183  FullyDistVec<IT,NT> Indexed(ri.commGrid, ri.glen, NT()); // NT() is the initial value
184  int nprocs;
185  MPI_Comm_size(World, &nprocs);
186  unordered_map<IT, IT> revr_map; // inverted index that maps indices of *this to indices of output
187  vector< vector<IT> > data_req(nprocs);
188  IT locnnz = ri.LocArrSize();
189 
190  // ABAB: Input sanity check
191  int local = 1;
192  int whole = 1;
193  for(IT i=0; i < locnnz; ++i)
194  {
195  if(ri.arr[i] >= glen || ri.arr[i] < 0)
196  {
197  local = 0;
198  }
199  }
200  MPI_Allreduce( &local, &whole, 1, MPI_INT, MPI_BAND, World);
201  if(whole == 0)
202  {
203  throw outofrangeexception();
204  }
205 
206  for(IT i=0; i < locnnz; ++i)
207  {
208  IT locind;
209  int owner = Owner(ri.arr[i], locind); // numerical values in ri are 0-based
210  data_req[owner].push_back(locind);
211  revr_map.insert(typename unordered_map<IT, IT>::value_type(locind, i));
212  }
213  IT * sendbuf = new IT[locnnz];
214  int * sendcnt = new int[nprocs];
215  int * sdispls = new int[nprocs];
216  for(int i=0; i<nprocs; ++i)
217  sendcnt[i] = data_req[i].size();
218 
219  int * rdispls = new int[nprocs];
220  int * recvcnt = new int[nprocs];
221  MPI_Alltoall(sendcnt, 1, MPI_INT, recvcnt, 1, MPI_INT, World); // share the request counts
222 
223  sdispls[0] = 0;
224  rdispls[0] = 0;
225  for(int i=0; i<nprocs-1; ++i)
226  {
227  sdispls[i+1] = sdispls[i] + sendcnt[i];
228  rdispls[i+1] = rdispls[i] + recvcnt[i];
229  }
230  IT totrecv = accumulate(recvcnt,recvcnt+nprocs,0);
231  IT * recvbuf = new IT[totrecv];
232 
233  for(int i=0; i<nprocs; ++i)
234  {
235  copy(data_req[i].begin(), data_req[i].end(), sendbuf+sdispls[i]);
236  vector<IT>().swap(data_req[i]);
237  }
238  MPI_Alltoallv(sendbuf, sendcnt, sdispls, MPIType<IT>(), recvbuf, recvcnt, rdispls, MPIType<IT>(), World); // request data
239 
240  // We will return the requested data,
241  // our return can be at most as big as the request
242  // and smaller if we are missing some elements
243  IT * indsback = new IT[totrecv];
244  NT * databack = new NT[totrecv];
245 
246  int * ddispls = new int[nprocs];
247  copy(rdispls, rdispls+nprocs, ddispls);
248  for(int i=0; i<nprocs; ++i)
249  {
250  // this is not the most efficient method because it scans ind vector nprocs = sqrt(p) times
251  IT * it = set_intersection(recvbuf+rdispls[i], recvbuf+rdispls[i]+recvcnt[i], ind.begin(), ind.end(), indsback+rdispls[i]);
252  recvcnt[i] = (it - (indsback+rdispls[i])); // update with size of the intersection
253 
254  IT vi = 0;
255  for(int j = rdispls[i]; j < rdispls[i] + recvcnt[i]; ++j) // fetch the numerical values
256  {
257  // indsback is a subset of ind
258  while(indsback[j] > ind[vi])
259  ++vi;
260  databack[j] = num[vi++];
261  }
262  }
263 
264  DeleteAll(recvbuf, ddispls);
265  NT * databuf = new NT[ri.LocArrSize()];
266 
267  MPI_Alltoall(recvcnt, 1, MPI_INT, sendcnt, 1, MPI_INT, World); // share the response counts, overriding request counts
268  MPI_Alltoallv(indsback, recvcnt, rdispls, MPIType<IT>(), sendbuf, sendcnt, sdispls, MPIType<IT>(), World); // send indices
269  MPI_Alltoallv(databack, recvcnt, rdispls, MPIType<NT>(), databuf, sendcnt, sdispls, MPIType<NT>(), World); // send data
270  DeleteAll(rdispls, recvcnt, indsback, databack);
271 
272  // Now create the output from databuf (holds numerical values) and sendbuf (holds indices)
273  // arr is already resized during its construction
274  for(int i=0; i<nprocs; ++i)
275  {
276  // data will come globally sorted from processors
277  // i.e. ind owned by proc_i is always smaller than
278  // ind owned by proc_j for j < i
279  for(int j=sdispls[i]; j< sdispls[i]+sendcnt[i]; ++j)
280  {
281  typename unordered_map<IT,IT>::iterator it = revr_map.find(sendbuf[j]);
282  Indexed.arr[it->second] = databuf[j];
283  // cout << it->second << "(" << sendbuf[j] << "):" << databuf[j] << endl;
284  }
285  }
286  DeleteAll(sdispls, sendcnt, sendbuf, databuf);
287  return Indexed;
288 }
289 
290 template <class IT, class NT>
291 void FullyDistSpVec<IT,NT>::iota(IT globalsize, NT first)
292 {
293  glen = globalsize;
294  IT length = MyLocLength();
295  ind.resize(length);
296  num.resize(length);
297  SpHelper::iota(ind.begin(), ind.end(), 0); // offset'd within processors
298  SpHelper::iota(num.begin(), num.end(), LengthUntil() + first); // global across processors
299 }
300 
301 template <class IT, class NT>
303 {
304  MPI_Comm World = commGrid->GetWorld();
305  FullyDistSpVec<IT,IT> temp(commGrid);
306  IT nnz = getlocnnz();
307  pair<NT,IT> * vecpair = new pair<NT,IT>[nnz];
308 
309  int nprocs, rank;
310  MPI_Comm_size(World, &nprocs);
311  MPI_Comm_rank(World, &rank);
312 
313  IT * dist = new IT[nprocs];
314  dist[rank] = nnz;
315  MPI_Allgather(MPI_IN_PLACE, 1, MPIType<IT>(), dist, 1, MPIType<IT>(), World);
316  IT sizeuntil = accumulate(dist, dist+rank, 0);
317  for(IT i=0; i< nnz; ++i)
318  {
319  vecpair[i].first = num[i]; // we'll sort wrt numerical values
320  vecpair[i].second = ind[i] + sizeuntil;
321  }
322  SpParHelper::MemoryEfficientPSort(vecpair, nnz, dist, World);
323 
324  vector< IT > nind(nnz);
325  vector< IT > nnum(nnz);
326  for(IT i=0; i< nnz; ++i)
327  {
328  num[i] = vecpair[i].first; // sorted range (change the object itself)
329  nind[i] = ind[i]; // make sure the sparsity distribution is the same
330  nnum[i] = vecpair[i].second; // inverse permutation stored as numerical values
331  }
332  delete [] vecpair;
333  delete [] dist;
334 
335  temp.glen = glen;
336  temp.ind = nind;
337  temp.num = nnum;
338  return temp;
339 }
340 
341 template <class IT, class NT>
343 {
344  if(this != &rhs)
345  {
346  if(glen != rhs.glen)
347  {
348  cerr << "Vector dimensions don't match for addition\n";
349  return *this;
350  }
351  IT lsize = getlocnnz();
352  IT rsize = rhs.getlocnnz();
353 
354  vector< IT > nind;
355  vector< NT > nnum;
356  nind.reserve(lsize+rsize);
357  nnum.reserve(lsize+rsize);
358 
359  IT i=0, j=0;
360  while(i < lsize && j < rsize)
361  {
362  // assignment won't change the size of vector, push_back is necessary
363  if(ind[i] > rhs.ind[j])
364  {
365  nind.push_back( rhs.ind[j] );
366  nnum.push_back( rhs.num[j++] );
367  }
368  else if(ind[i] < rhs.ind[j])
369  {
370  nind.push_back( ind[i] );
371  nnum.push_back( num[i++] );
372  }
373  else
374  {
375  nind.push_back( ind[i] );
376  nnum.push_back( num[i++] + rhs.num[j++] );
377  }
378  }
379  while( i < lsize) // rhs was depleted first
380  {
381  nind.push_back( ind[i] );
382  nnum.push_back( num[i++] );
383  }
384  while( j < rsize) // *this was depleted first
385  {
386  nind.push_back( rhs.ind[j] );
387  nnum.push_back( rhs.num[j++] );
388  }
389  ind.swap(nind); // ind will contain the elements of nind with capacity shrunk-to-fit size
390  num.swap(nnum);
391  }
392  else
393  {
394  typename vector<NT>::iterator it;
395  for(it = num.begin(); it != num.end(); ++it)
396  (*it) *= 2;
397  }
398  return *this;
399 };
400 template <class IT, class NT>
402 {
403  if(this != &rhs)
404  {
405  if(glen != rhs.glen)
406  {
407  cerr << "Vector dimensions don't match for addition\n";
408  return *this;
409  }
410  IT lsize = getlocnnz();
411  IT rsize = rhs.getlocnnz();
412  vector< IT > nind;
413  vector< NT > nnum;
414  nind.reserve(lsize+rsize);
415  nnum.reserve(lsize+rsize);
416 
417  IT i=0, j=0;
418  while(i < lsize && j < rsize)
419  {
420  // assignment won't change the size of vector, push_back is necessary
421  if(ind[i] > rhs.ind[j])
422  {
423  nind.push_back( rhs.ind[j] );
424  nnum.push_back( -static_cast<NT>(rhs.num[j++]) );
425  }
426  else if(ind[i] < rhs.ind[j])
427  {
428  nind.push_back( ind[i] );
429  nnum.push_back( num[i++] );
430  }
431  else
432  {
433  nind.push_back( ind[i] );
434  nnum.push_back( num[i++] - rhs.num[j++] ); // ignore numerical cancellations
435  }
436  }
437  while( i < lsize) // rhs was depleted first
438  {
439  nind.push_back( ind[i] );
440  nnum.push_back( num[i++] );
441  }
442  while( j < rsize) // *this was depleted first
443  {
444  nind.push_back( rhs.ind[j] );
445  nnum.push_back( NT() - (rhs.num[j++]) );
446  }
447  ind.swap(nind); // ind will contain the elements of nind with capacity shrunk-to-fit size
448  num.swap(nnum);
449  }
450  else
451  {
452  ind.clear();
453  num.clear();
454  }
455  return *this;
456 };
457 
459 template <class IT, class NT>
460 template <class HANDLER>
461 ifstream& FullyDistSpVec<IT,NT>::ReadDistribute (ifstream& infile, int master, HANDLER handler)
462 {
463  IT total_nnz;
464  MPI_Comm World = commGrid->GetWorld();
465  int neighs = commGrid->GetSize(); // number of neighbors (including oneself)
466  int buffperneigh = MEMORYINBYTES / (neighs * (sizeof(IT) + sizeof(NT)));
467 
468  int * displs = new int[neighs];
469  for (int i=0; i<neighs; ++i)
470  displs[i] = i*buffperneigh;
471 
472  int * curptrs = NULL;
473  int recvcount = 0;
474  IT * inds = NULL;
475  NT * vals = NULL;
476  int rank = commGrid->GetRank();
477  if(rank == master) // 1 processor only
478  {
479  inds = new IT [ buffperneigh * neighs ];
480  vals = new NT [ buffperneigh * neighs ];
481  curptrs = new int[neighs];
482  fill_n(curptrs, neighs, 0); // fill with zero
483  if (infile.is_open())
484  {
485  infile.clear();
486  infile.seekg(0);
487  IT numrows, numcols;
488  bool indIsRow = true;
489  //infile >> glen >> total_nnz;
490  infile >> numrows >> numcols >> total_nnz;
491  if (numcols == 1)
492  { // column vector, read vector indices from the row index
493  indIsRow = true;
494  glen = numrows;
495  }
496  else
497  { // row vector, read vector indices from the column index
498  indIsRow = false;
499  glen = numcols;
500  }
501  MPI_Bcast(&glen, 1, MPIType<IT>(), master, World);
502 
503  IT tempind;
504  IT temprow, tempcol;
505  IT cnz = 0;
506  while ( (!infile.eof()) && cnz < total_nnz)
507  {
508  //infile >> tempind;
509  infile >> temprow >> tempcol;
510  if (indIsRow)
511  tempind = temprow;
512  else
513  tempind = tempcol;
514  tempind--;
515  IT locind;
516  int rec = Owner(tempind, locind); // recipient (owner) processor
517  inds[ rec * buffperneigh + curptrs[rec] ] = locind;
518  vals[ rec * buffperneigh + curptrs[rec] ] = handler.read(infile, tempind);
519  ++ (curptrs[rec]);
520 
521  if(curptrs[rec] == buffperneigh || (cnz == (total_nnz-1)) ) // one buffer is full, or file is done !
522  {
523  // first, send the receive counts ...
524  MPI_Scatter(curptrs, 1, MPI_INT, &recvcount, 1, MPI_INT, master, World);
525 
526  // generate space for own recv data ... (use arrays because vector<bool> is cripled, if NT=bool)
527  IT * tempinds = new IT[recvcount];
528  NT * tempvals = new NT[recvcount];
529 
530  // then, send all buffers that to their recipients ...
531  MPI_Scatterv(inds, curptrs, displs, MPIType<IT>(), tempinds, recvcount, MPIType<IT>(), master, World);
532  MPI_Scatterv(vals, curptrs, displs, MPIType<NT>(), tempvals, recvcount, MPIType<NT>(), master, World);
533 
534  // now push what is ours to tuples
535  for(IT i=0; i< recvcount; ++i)
536  {
537  ind.push_back( tempinds[i] ); // already offset'd by the sender
538  num.push_back( tempvals[i] );
539  }
540 
541  // reset current pointers so that we can reuse {inds,vals} buffers
542  fill_n(curptrs, neighs, 0);
543  DeleteAll(tempinds, tempvals);
544  }
545  ++ cnz;
546  }
547  assert (cnz == total_nnz);
548 
549  // Signal the end of file to other processors along the diagonal
550  fill_n(curptrs, neighs, numeric_limits<int>::max());
551  MPI_Scatter(curptrs, 1, MPI_INT, &recvcount, 1, MPI_INT, master, World);
552  }
553  else // input file does not exist !
554  {
555  glen = 0;
556  MPI_Bcast(&glen, 1, MPIType<IT>(), master, World);
557  }
558  DeleteAll(inds,vals, curptrs);
559  }
560  else // all other processors
561  {
562  MPI_Bcast(&glen, 1, MPIType<IT>(), master, World);
563  while(glen > 0) // otherwise, input file do not exist
564  {
565  // first receive the receive counts ...
566  MPI_Scatter(curptrs, 1, MPI_INT, &recvcount, 1, MPI_INT, master, World);
567 
568  if( recvcount == numeric_limits<int>::max())
569  break;
570 
571  // create space for incoming data ...
572  IT * tempinds = new IT[recvcount];
573  NT * tempvals = new NT[recvcount];
574 
575  // receive actual data ... (first 4 arguments are ignored in the receiver side)
576  MPI_Scatterv(inds, curptrs, displs, MPIType<IT>(), tempinds, recvcount, MPIType<IT>(), master, World);
577  MPI_Scatterv(vals, curptrs, displs, MPIType<NT>(), tempvals, recvcount, MPIType<NT>(), master, World);
578 
579  // now push what is ours to tuples
580  for(IT i=0; i< recvcount; ++i)
581  {
582  ind.push_back( tempinds[i] );
583  num.push_back( tempvals[i] );
584  }
585  DeleteAll(tempinds, tempvals);
586  }
587  }
588  delete [] displs;
589  MPI_Barrier(World);
590  return infile;
591 }
592 
593 template <class IT, class NT>
594 template <class HANDLER>
595 void FullyDistSpVec<IT,NT>::SaveGathered(ofstream& outfile, int master, HANDLER handler, bool printProcSplits)
596 {
597  int rank, nprocs;
598  MPI_Comm World = commGrid->GetWorld();
599  MPI_Comm_rank(World, &rank);
600  MPI_Comm_size(World, &nprocs);
601  MPI_File thefile;
602  MPI_File_open(World, "temp_fullydistspvec", MPI_MODE_CREATE | MPI_MODE_WRONLY, MPI_INFO_NULL, &thefile);
603 
604  IT * dist = new IT[nprocs];
605  dist[rank] = getlocnnz();
606  MPI_Allgather(MPI_IN_PLACE, 1, MPIType<IT>(), dist, 1, MPIType<IT>(), World);
607  IT sizeuntil = accumulate(dist, dist+rank, static_cast<IT>(0));
608  IT totalLength = TotalLength();
609  IT totalNNZ = getnnz();
610 
611  struct mystruct
612  {
613  IT ind;
614  NT num;
615  };
616 
617  MPI_Datatype datatype;
618  MPI_Type_contiguous(sizeof(mystruct), MPI_CHAR, &datatype );
619  MPI_Type_commit(&datatype);
620  int dsize;
621  MPI_Type_size(datatype, &dsize);
622 
623  // The disp displacement argument specifies the position
624  // (absolute offset in bytes from the beginning of the file)
625  MPI_File_set_view(thefile, static_cast<int>(sizeuntil * dsize), datatype, datatype, "native", MPI_INFO_NULL);
626 
627  int count = ind.size();
628  mystruct * packed = new mystruct[count];
629  for(int i=0; i<count; ++i)
630  {
631  packed[i].ind = ind[i] + sizeuntil;
632  packed[i].num = num[i];
633  }
634  MPI_File_write(thefile, packed, count, datatype, NULL);
635  MPI_Barrier(World);
636  MPI_File_close(&thefile);
637  delete [] packed;
638 
639  // Now let processor-0 read the file and print
640  if(rank == 0)
641  {
642  FILE * f = fopen("temp_fullydistspvec", "r");
643  if(!f)
644  {
645  cerr << "Problem reading binary input file\n";
646  return;
647  }
648  IT maxd = *max_element(dist, dist+nprocs);
649  mystruct * data = new mystruct[maxd];
650 
651  streamsize oldPrecision = outfile.precision();
652  outfile.precision(21);
653  outfile << totalLength << "\t1\t" << totalNNZ << endl;
654  for(int i=0; i<nprocs; ++i)
655  {
656  // read n_per_proc integers and print them
657  fread(data, dsize, dist[i], f);
658 
659  if (printProcSplits)
660  outfile << "Elements stored on proc " << i << ":" << endl;
661 
662  for (int j = 0; j < dist[i]; j++)
663  {
664  outfile << data[j].ind+1 << "\t1\t";
665  handler.save(outfile, data[j].num, data[j].ind);
666  outfile << endl;
667  }
668  }
669  outfile.precision(oldPrecision);
670  fclose(f);
671  remove("temp_fullydistspvec");
672  delete [] data;
673  delete [] dist;
674  }
675  MPI_Barrier(World);
676 }
677 
678 
679 template <class IT, class NT>
680 template <typename _Predicate>
681 IT FullyDistSpVec<IT,NT>::Count(_Predicate pred) const
682 {
683  IT local = count_if( num.begin(), num.end(), pred );
684  IT whole = 0;
685  MPI_Allreduce( &local, &whole, 1, MPIType<IT>(), MPI_SUM, commGrid->GetWorld());
686  return whole;
687 }
688 
689 
690 template <class IT, class NT>
691 template <typename _BinaryOperation>
692 NT FullyDistSpVec<IT,NT>::Reduce(_BinaryOperation __binary_op, NT init)
693 {
694  // std::accumulate returns init for empty sequences
695  // the semantics are init + num[0] + ... + num[n]
696  NT localsum = std::accumulate( num.begin(), num.end(), init, __binary_op);
697 
698  NT totalsum = init;
699  MPI_Allreduce( &localsum, &totalsum, 1, MPIType<NT>(), MPIOp<_BinaryOperation, NT>::op(), commGrid->GetWorld());
700  return totalsum;
701 }
702 
703 template <class IT, class NT>
704 template <typename OUT, typename _BinaryOperation, typename _UnaryOperation>
705 OUT FullyDistSpVec<IT,NT>::Reduce(_BinaryOperation __binary_op, OUT default_val, _UnaryOperation __unary_op)
706 {
707  // std::accumulate returns identity for empty sequences
708  OUT localsum = default_val;
709 
710  if (num.size() > 0)
711  {
712  typename vector< NT >::const_iterator iter = num.begin();
713  //localsum = __unary_op(*iter);
714  //iter++;
715  while (iter < num.end())
716  {
717  localsum = __binary_op(localsum, __unary_op(*iter));
718  iter++;
719  }
720  }
721 
722  OUT totalsum = default_val;
723  MPI_Allreduce( &localsum, &totalsum, 1, MPIType<OUT>(), MPIOp<_BinaryOperation, OUT>::op(), commGrid->GetWorld());
724  return totalsum;
725 }
726 
727 template <class IT, class NT>
728 void FullyDistSpVec<IT,NT>::PrintInfo(string vectorname) const
729 {
730  IT nznz = getnnz();
731  if (commGrid->GetRank() == 0)
732  cout << "As a whole, " << vectorname << " has: " << nznz << " nonzeros and length " << glen << endl;
733 }
734 
735 template <class IT, class NT>
737 {
738  int rank, nprocs;
739  MPI_Comm World = commGrid->GetWorld();
740  MPI_Comm_rank(World, &rank);
741  MPI_Comm_size(World, &nprocs);
742  MPI_File thefile;
743  char tfilename[32] = "temp_fullydistspvec";
744  MPI_File_open(World, tfilename, MPI_MODE_CREATE | MPI_MODE_WRONLY, MPI_INFO_NULL, &thefile);
745 
746  IT * dist = new IT[nprocs];
747  dist[rank] = getlocnnz();
748  MPI_Allgather(MPI_IN_PLACE, 1, MPIType<IT>(), dist, 1, MPIType<IT>(), World);
749  IT sizeuntil = accumulate(dist, dist+rank, static_cast<IT>(0));
750 
751  struct mystruct
752  {
753  IT ind;
754  NT num;
755  };
756 
757  MPI_Datatype datatype;
758  MPI_Type_contiguous(sizeof(mystruct), MPI_CHAR, &datatype );
759  MPI_Type_commit(&datatype);
760  int dsize;
761  MPI_Type_size(datatype, &dsize);
762 
763  // The disp displacement argument specifies the position
764  // (absolute offset in bytes from the beginning of the file)
765  char openmode[32] = "native";
766  MPI_File_set_view(thefile, static_cast<int>(sizeuntil * dsize), datatype, datatype, openmode, MPI_INFO_NULL);
767 
768  int count = ind.size();
769  mystruct * packed = new mystruct[count];
770  for(int i=0; i<count; ++i)
771  {
772  packed[i].ind = ind[i];
773  packed[i].num = num[i];
774  }
775  MPI_File_write(thefile, packed, count, datatype, NULL);
776  MPI_Barrier(World);
777  MPI_File_close(&thefile);
778  delete [] packed;
779 
780  // Now let processor-0 read the file and print
781  if(rank == 0)
782  {
783  FILE * f = fopen("temp_fullydistspvec", "r");
784  if(!f)
785  {
786  cerr << "Problem reading binary input file\n";
787  return;
788  }
789  IT maxd = *max_element(dist, dist+nprocs);
790  mystruct * data = new mystruct[maxd];
791 
792  for(int i=0; i<nprocs; ++i)
793  {
794  // read n_per_proc integers and print them
795  fread(data, dsize, dist[i],f);
796 
797  cout << "Elements stored on proc " << i << ": {";
798  for (int j = 0; j < dist[i]; j++)
799  {
800  cout << "(" << data[j].ind << "," << data[j].num << "), ";
801  }
802  cout << "}" << endl;
803  }
804  fclose(f);
805  remove("temp_fullydistspvec");
806  delete [] data;
807  delete [] dist;
808  }
809  MPI_Barrier(World);
810 }