35 template <
class IT,
class NT>
37 :
FullyDist<IT,NT,typename CombBLAS::disable_if< CombBLAS::is_boolean<NT>::value, NT >::type>(grid)
40 template <
class IT,
class NT>
42 :
FullyDist<IT,NT,typename CombBLAS::disable_if< CombBLAS::is_boolean<NT>::value, NT >::type>(grid,globallen)
45 template <
class IT,
class NT>
47 :
FullyDist<IT,NT,typename CombBLAS::disable_if< CombBLAS::is_boolean<NT>::value, NT >::type>()
50 template <
class IT,
class NT>
52 :
FullyDist<IT,NT,typename CombBLAS::disable_if< CombBLAS::is_boolean<NT>::value, NT >::type>(globallen)
56 template <
class IT,
class NT>
68 template <
class IT,
class NT>
74 template <
class IT,
class NT>
80 for(IT i=0; i< vecsize; ++i)
84 num.push_back(rhs.arr[i]);
89 template <
class IT,
class NT>
97 template <
class IT,
class NT>
102 int owner = Owner(indx, locind);
104 if(commGrid->GetRank() == owner)
106 typename vector<IT>::const_iterator it = lower_bound(ind.begin(), ind.end(), locind);
107 if(it != ind.end() && locind == (*it))
109 val = num[it-ind.begin()];
118 MPI_Bcast(&found, 1, MPI_INT, owner, commGrid->GetWorld());
119 MPI_Bcast(&val, 1, MPIType<NT>(), owner, commGrid->GetWorld());
125 template <
class IT,
class NT>
132 int owner = Owner(indx, locind);
133 if(commGrid->GetRank() == owner)
135 typename vector<IT>::iterator iter = lower_bound(ind.begin(), ind.end(), locind);
136 if(iter == ind.end())
138 ind.push_back(locind);
141 else if (locind < *iter)
145 num.insert(num.begin() + (iter-ind.begin()), numx);
146 ind.insert(iter, locind);
150 *(num.begin() + (iter-ind.begin())) = numx;
155 template <
class IT,
class NT>
159 int owner = Owner(indx, locind);
160 if(commGrid->GetRank() == owner)
162 typename vector<IT>::iterator iter = lower_bound(ind.begin(), ind.end(), locind);
163 if(iter != ind.end() && !(locind < *iter))
165 num.erase(num.begin() + (iter-ind.begin()));
179 template <
class IT,
class NT>
182 MPI_Comm World = commGrid->GetWorld();
185 MPI_Comm_size(World, &nprocs);
186 unordered_map<IT, IT> revr_map;
187 vector< vector<IT> > data_req(nprocs);
193 for(IT i=0; i < locnnz; ++i)
195 if(ri.arr[i] >= glen || ri.arr[i] < 0)
200 MPI_Allreduce( &local, &whole, 1, MPI_INT, MPI_BAND, World);
206 for(IT i=0; i < locnnz; ++i)
209 int owner = Owner(ri.arr[i], locind);
210 data_req[owner].push_back(locind);
211 revr_map.insert(
typename unordered_map<IT, IT>::value_type(locind, i));
213 IT * sendbuf =
new IT[locnnz];
214 int * sendcnt =
new int[nprocs];
215 int * sdispls =
new int[nprocs];
216 for(
int i=0; i<nprocs; ++i)
217 sendcnt[i] = data_req[i].
size();
219 int * rdispls =
new int[nprocs];
220 int * recvcnt =
new int[nprocs];
221 MPI_Alltoall(sendcnt, 1, MPI_INT, recvcnt, 1, MPI_INT, World);
225 for(
int i=0; i<nprocs-1; ++i)
227 sdispls[i+1] = sdispls[i] + sendcnt[i];
228 rdispls[i+1] = rdispls[i] + recvcnt[i];
230 IT totrecv = accumulate(recvcnt,recvcnt+nprocs,0);
231 IT * recvbuf =
new IT[totrecv];
233 for(
int i=0; i<nprocs; ++i)
235 copy(data_req[i].begin(), data_req[i].end(), sendbuf+sdispls[i]);
236 vector<IT>().swap(data_req[i]);
238 MPI_Alltoallv(sendbuf, sendcnt, sdispls, MPIType<IT>(), recvbuf, recvcnt, rdispls, MPIType<IT>(), World);
243 IT * indsback =
new IT[totrecv];
244 NT * databack =
new NT[totrecv];
246 int * ddispls =
new int[nprocs];
247 copy(rdispls, rdispls+nprocs, ddispls);
248 for(
int i=0; i<nprocs; ++i)
251 IT * it = set_intersection(recvbuf+rdispls[i], recvbuf+rdispls[i]+recvcnt[i], ind.begin(), ind.end(), indsback+rdispls[i]);
252 recvcnt[i] = (it - (indsback+rdispls[i]));
255 for(
int j = rdispls[i]; j < rdispls[i] + recvcnt[i]; ++j)
258 while(indsback[j] > ind[vi])
260 databack[j] = num[vi++];
267 MPI_Alltoall(recvcnt, 1, MPI_INT, sendcnt, 1, MPI_INT, World);
268 MPI_Alltoallv(indsback, recvcnt, rdispls, MPIType<IT>(), sendbuf, sendcnt, sdispls, MPIType<IT>(), World);
269 MPI_Alltoallv(databack, recvcnt, rdispls, MPIType<NT>(), databuf, sendcnt, sdispls, MPIType<NT>(), World);
270 DeleteAll(rdispls, recvcnt, indsback, databack);
274 for(
int i=0; i<nprocs; ++i)
279 for(
int j=sdispls[i]; j< sdispls[i]+sendcnt[i]; ++j)
281 typename unordered_map<IT,IT>::iterator it = revr_map.find(sendbuf[j]);
282 Indexed.arr[it->second] = databuf[j];
286 DeleteAll(sdispls, sendcnt, sendbuf, databuf);
290 template <
class IT,
class NT>
294 IT length = MyLocLength();
301 template <
class IT,
class NT>
304 MPI_Comm World = commGrid->GetWorld();
306 IT nnz = getlocnnz();
307 pair<NT,IT> * vecpair =
new pair<NT,IT>[nnz];
310 MPI_Comm_size(World, &nprocs);
311 MPI_Comm_rank(World, &rank);
313 IT * dist =
new IT[nprocs];
315 MPI_Allgather(MPI_IN_PLACE, 1, MPIType<IT>(), dist, 1, MPIType<IT>(), World);
316 IT sizeuntil = accumulate(dist, dist+rank, 0);
317 for(IT i=0; i< nnz; ++i)
319 vecpair[i].first = num[i];
320 vecpair[i].second = ind[i] + sizeuntil;
324 vector< IT > nind(nnz);
325 vector< IT > nnum(nnz);
326 for(IT i=0; i< nnz; ++i)
328 num[i] = vecpair[i].first;
330 nnum[i] = vecpair[i].second;
341 template <
class IT,
class NT>
348 cerr <<
"Vector dimensions don't match for addition\n";
351 IT lsize = getlocnnz();
356 nind.reserve(lsize+rsize);
357 nnum.reserve(lsize+rsize);
360 while(i < lsize && j < rsize)
363 if(ind[i] > rhs.ind[j])
365 nind.push_back( rhs.ind[j] );
366 nnum.push_back( rhs.num[j++] );
368 else if(ind[i] < rhs.ind[j])
370 nind.push_back( ind[i] );
371 nnum.push_back( num[i++] );
375 nind.push_back( ind[i] );
376 nnum.push_back( num[i++] + rhs.num[j++] );
381 nind.push_back( ind[i] );
382 nnum.push_back( num[i++] );
386 nind.push_back( rhs.ind[j] );
387 nnum.push_back( rhs.num[j++] );
394 typename vector<NT>::iterator it;
395 for(it = num.begin(); it != num.end(); ++it)
400 template <
class IT,
class NT>
407 cerr <<
"Vector dimensions don't match for addition\n";
410 IT lsize = getlocnnz();
414 nind.reserve(lsize+rsize);
415 nnum.reserve(lsize+rsize);
418 while(i < lsize && j < rsize)
421 if(ind[i] > rhs.ind[j])
423 nind.push_back( rhs.ind[j] );
424 nnum.push_back( -static_cast<NT>(rhs.num[j++]) );
426 else if(ind[i] < rhs.ind[j])
428 nind.push_back( ind[i] );
429 nnum.push_back( num[i++] );
433 nind.push_back( ind[i] );
434 nnum.push_back( num[i++] - rhs.num[j++] );
439 nind.push_back( ind[i] );
440 nnum.push_back( num[i++] );
444 nind.push_back( rhs.ind[j] );
445 nnum.push_back( NT() - (rhs.num[j++]) );
459 template <
class IT,
class NT>
460 template <
class HANDLER>
464 MPI_Comm World = commGrid->GetWorld();
465 int neighs = commGrid->GetSize();
466 int buffperneigh = MEMORYINBYTES / (neighs * (
sizeof(IT) +
sizeof(NT)));
468 int * displs =
new int[neighs];
469 for (
int i=0; i<neighs; ++i)
470 displs[i] = i*buffperneigh;
472 int * curptrs = NULL;
476 int rank = commGrid->GetRank();
479 inds =
new IT [ buffperneigh * neighs ];
480 vals =
new NT [ buffperneigh * neighs ];
481 curptrs =
new int[neighs];
482 fill_n(curptrs, neighs, 0);
483 if (infile.is_open())
488 bool indIsRow =
true;
490 infile >> numrows >> numcols >> total_nnz;
501 MPI_Bcast(&glen, 1, MPIType<IT>(), master, World);
506 while ( (!infile.eof()) && cnz < total_nnz)
509 infile >> temprow >> tempcol;
516 int rec = Owner(tempind, locind);
517 inds[ rec * buffperneigh + curptrs[rec] ] = locind;
518 vals[ rec * buffperneigh + curptrs[rec] ] = handler.read(infile, tempind);
521 if(curptrs[rec] == buffperneigh || (cnz == (total_nnz-1)) )
524 MPI_Scatter(curptrs, 1, MPI_INT, &recvcount, 1, MPI_INT, master, World);
527 IT * tempinds =
new IT[recvcount];
528 NT * tempvals =
new NT[recvcount];
531 MPI_Scatterv(inds, curptrs, displs, MPIType<IT>(), tempinds, recvcount, MPIType<IT>(), master, World);
532 MPI_Scatterv(vals, curptrs, displs, MPIType<NT>(), tempvals, recvcount, MPIType<NT>(), master, World);
535 for(IT i=0; i< recvcount; ++i)
537 ind.push_back( tempinds[i] );
538 num.push_back( tempvals[i] );
542 fill_n(curptrs, neighs, 0);
547 assert (cnz == total_nnz);
550 fill_n(curptrs, neighs, numeric_limits<int>::max());
551 MPI_Scatter(curptrs, 1, MPI_INT, &recvcount, 1, MPI_INT, master, World);
556 MPI_Bcast(&glen, 1, MPIType<IT>(), master, World);
562 MPI_Bcast(&glen, 1, MPIType<IT>(), master, World);
566 MPI_Scatter(curptrs, 1, MPI_INT, &recvcount, 1, MPI_INT, master, World);
568 if( recvcount == numeric_limits<int>::max())
572 IT * tempinds =
new IT[recvcount];
573 NT * tempvals =
new NT[recvcount];
576 MPI_Scatterv(inds, curptrs, displs, MPIType<IT>(), tempinds, recvcount, MPIType<IT>(), master, World);
577 MPI_Scatterv(vals, curptrs, displs, MPIType<NT>(), tempvals, recvcount, MPIType<NT>(), master, World);
580 for(IT i=0; i< recvcount; ++i)
582 ind.push_back( tempinds[i] );
583 num.push_back( tempvals[i] );
593 template <
class IT,
class NT>
594 template <
class HANDLER>
598 MPI_Comm World = commGrid->GetWorld();
599 MPI_Comm_rank(World, &rank);
600 MPI_Comm_size(World, &nprocs);
602 MPI_File_open(World,
"temp_fullydistspvec", MPI_MODE_CREATE | MPI_MODE_WRONLY, MPI_INFO_NULL, &thefile);
604 IT * dist =
new IT[nprocs];
605 dist[rank] = getlocnnz();
606 MPI_Allgather(MPI_IN_PLACE, 1, MPIType<IT>(), dist, 1, MPIType<IT>(), World);
607 IT sizeuntil = accumulate(dist, dist+rank, static_cast<IT>(0));
608 IT totalLength = TotalLength();
609 IT totalNNZ = getnnz();
617 MPI_Datatype datatype;
618 MPI_Type_contiguous(
sizeof(mystruct), MPI_CHAR, &datatype );
619 MPI_Type_commit(&datatype);
621 MPI_Type_size(datatype, &dsize);
625 MPI_File_set_view(thefile, static_cast<int>(sizeuntil * dsize), datatype, datatype,
"native", MPI_INFO_NULL);
627 int count = ind.size();
628 mystruct * packed =
new mystruct[count];
629 for(
int i=0; i<count; ++i)
631 packed[i].ind = ind[i] + sizeuntil;
632 packed[i].num = num[i];
634 MPI_File_write(thefile, packed, count, datatype, NULL);
636 MPI_File_close(&thefile);
642 FILE * f = fopen(
"temp_fullydistspvec",
"r");
645 cerr <<
"Problem reading binary input file\n";
648 IT maxd = *max_element(dist, dist+nprocs);
649 mystruct * data =
new mystruct[maxd];
651 streamsize oldPrecision = outfile.precision();
652 outfile.precision(21);
653 outfile << totalLength <<
"\t1\t" << totalNNZ << endl;
654 for(
int i=0; i<nprocs; ++i)
657 fread(data, dsize, dist[i], f);
660 outfile <<
"Elements stored on proc " << i <<
":" << endl;
662 for (
int j = 0; j < dist[i]; j++)
664 outfile << data[j].ind+1 <<
"\t1\t";
665 handler.save(outfile, data[j].num, data[j].ind);
669 outfile.precision(oldPrecision);
671 remove(
"temp_fullydistspvec");
679 template <
class IT,
class NT>
680 template <
typename _Predicate>
683 IT local = count_if( num.begin(), num.end(), pred );
685 MPI_Allreduce( &local, &whole, 1, MPIType<IT>(), MPI_SUM, commGrid->GetWorld());
690 template <
class IT,
class NT>
691 template <
typename _BinaryOperation>
696 NT localsum = std::accumulate( num.begin(), num.end(), init, __binary_op);
703 template <
class IT,
class NT>
704 template <
typename OUT,
typename _BinaryOperation,
typename _UnaryOperation>
708 OUT localsum = default_val;
712 typename vector< NT >::const_iterator iter = num.begin();
715 while (iter < num.end())
717 localsum = __binary_op(localsum, __unary_op(*iter));
722 OUT totalsum = default_val;
727 template <
class IT,
class NT>
731 if (commGrid->GetRank() == 0)
732 cout <<
"As a whole, " << vectorname <<
" has: " << nznz <<
" nonzeros and length " << glen << endl;
735 template <
class IT,
class NT>
739 MPI_Comm World = commGrid->GetWorld();
740 MPI_Comm_rank(World, &rank);
741 MPI_Comm_size(World, &nprocs);
743 char tfilename[32] =
"temp_fullydistspvec";
744 MPI_File_open(World, tfilename, MPI_MODE_CREATE | MPI_MODE_WRONLY, MPI_INFO_NULL, &thefile);
746 IT * dist =
new IT[nprocs];
747 dist[rank] = getlocnnz();
748 MPI_Allgather(MPI_IN_PLACE, 1, MPIType<IT>(), dist, 1, MPIType<IT>(), World);
749 IT sizeuntil = accumulate(dist, dist+rank, static_cast<IT>(0));
757 MPI_Datatype datatype;
758 MPI_Type_contiguous(
sizeof(mystruct), MPI_CHAR, &datatype );
759 MPI_Type_commit(&datatype);
761 MPI_Type_size(datatype, &dsize);
765 char openmode[32] =
"native";
766 MPI_File_set_view(thefile, static_cast<int>(sizeuntil * dsize), datatype, datatype, openmode, MPI_INFO_NULL);
768 int count = ind.size();
769 mystruct * packed =
new mystruct[count];
770 for(
int i=0; i<count; ++i)
772 packed[i].ind = ind[i];
773 packed[i].num = num[i];
775 MPI_File_write(thefile, packed, count, datatype, NULL);
777 MPI_File_close(&thefile);
783 FILE * f = fopen(
"temp_fullydistspvec",
"r");
786 cerr <<
"Problem reading binary input file\n";
789 IT maxd = *max_element(dist, dist+nprocs);
790 mystruct * data =
new mystruct[maxd];
792 for(
int i=0; i<nprocs; ++i)
795 fread(data, dsize, dist[i],f);
797 cout <<
"Elements stored on proc " << i <<
": {";
798 for (
int j = 0; j < dist[i]; j++)
800 cout <<
"(" << data[j].ind <<
"," << data[j].num <<
"), ";
805 remove(
"temp_fullydistspvec");