33 template <
class IT,
class NT>
35 :
FullyDist<IT,NT,typename CombBLAS::disable_if< CombBLAS::is_boolean<NT>::value, NT >::type>()
39 template <
class IT,
class NT>
41 :
FullyDist<IT,NT,typename CombBLAS::disable_if< CombBLAS::is_boolean<NT>::value, NT >::type>(globallen)
43 arr.resize(MyLocLength(), initval);
46 template <
class IT,
class NT>
48 :
FullyDist<IT,NT,typename CombBLAS::disable_if< CombBLAS::is_boolean<NT>::value, NT >::type>(grid)
51 template <
class IT,
class NT>
53 :
FullyDist<IT,NT,typename CombBLAS::disable_if< CombBLAS::is_boolean<NT>::value, NT >::type>(grid,globallen)
55 arr.resize(MyLocLength(), initval);
58 template <
class IT,
class NT>
64 template <
class IT,
class NT>
65 template <
class ITRHS,
class NTRHS>
67 :
FullyDist<IT,NT,typename CombBLAS::disable_if< CombBLAS::is_boolean<NT>::value, NT >::type>(rhs.commGrid, static_cast<IT>(rhs.glen))
69 arr.resize(static_cast<IT>(rhs.arr.size()), NT());
71 for(IT i=0; (unsigned)i < arr.size(); ++i)
73 arr[i] =
static_cast<NT
>(rhs.arr[
static_cast<ITRHS
>(i)]);
77 template <
class IT,
class NT>
78 template <
typename _BinaryOperation>
82 NT localsum = std::accumulate( arr.begin(), arr.end(), identity, __binary_op);
84 NT totalsum = identity;
89 template <
class IT,
class NT>
90 template <
typename OUT,
typename _BinaryOperation,
typename _UnaryOperation>
94 OUT localsum = default_val;
98 typename vector< NT >::const_iterator iter = arr.begin();
101 while (iter < arr.end())
103 localsum = __binary_op(localsum, __unary_op(*iter));
108 OUT totalsum = default_val;
115 template<
class IT,
class NT>
122 IT length = TotalLength();
123 vector<double> loccands(length);
124 vector<NT> loccandints(length);
125 MPI_Comm World = commGrid->GetWorld();
126 int myrank = commGrid->GetRank();
129 for(
int i=0; i<length; ++i)
130 loccands[i] = M.
rand();
131 transform(loccands.begin(), loccands.end(), loccands.begin(), bind2nd( multiplies<double>(), nver ));
133 for(
int i=0; i<length; ++i)
134 loccandints[i] = static_cast<NT>(loccands[i]);
136 MPI_Bcast(&(loccandints[0]), length, MPIType<NT>(),0, World);
137 for(IT i=0; i<length; ++i)
138 SetElement(i,loccandints[i]);
141 template <
class IT,
class NT>
142 template <
class ITRHS,
class NTRHS>
145 if(static_cast<const void*>(
this) != static_cast<const void*>(&rhs))
148 glen =
static_cast<IT
>(rhs.glen);
149 commGrid = rhs.commGrid;
151 arr.resize(rhs.arr.size(), NT());
152 for(IT i=0; (unsigned)i < arr.size(); ++i)
154 arr[i] =
static_cast<NT
>(rhs.arr[
static_cast<ITRHS
>(i)]);
160 template <
class IT,
class NT>
171 template <
class IT,
class NT>
175 arr.resize(rhs.MyLocLength());
176 std::fill(arr.begin(), arr.end(), NT());
179 for(IT i=0; i< spvecsize; ++i)
183 arr[rhs.ind[i]] = rhs.num[i];
188 template <
class IT,
class NT>
191 if(*commGrid != *rhs.commGrid)
193 cout <<
"Grids are not comparable elementwise addition" << endl;
199 arr.resize(MyLocLength());
200 fill(arr.begin(), arr.end(), NT());
202 int * sendcnts = NULL;
206 int proccols = commGrid->GetGridCols();
208 sendcnts =
new int[proccols];
209 fill(sendcnts, sendcnts+proccols-1, n_perproc);
210 sendcnts[proccols-1] = rhs.
getLocalLength() - (n_perproc * (proccols-1));
211 dpls =
new int[proccols]();
212 partial_sum(sendcnts, sendcnts+proccols-1, dpls+1);
215 int rowroot = commGrid->GetDiagOfProcRow();
216 MPI_Scatterv(&(rhs.arr[0]),sendcnts, dpls, MPIType<NT>(), &(arr[0]), arr.size(), MPIType<NT>(),rowroot, commGrid->GetRowWorld());
225 template <
class IT,
class NT>
229 arr.swap(victim.arr);
233 template <
class IT,
class NT>
238 #pragma omp parallel for
240 for(IT i=0; i< spvecsize; ++i)
242 if(arr[rhs.ind[i]] == NT())
243 arr[rhs.ind[i]] = rhs.num[i];
245 arr[rhs.ind[i]] += rhs.num[i];
250 template <
class IT,
class NT>
255 #pragma omp parallel for
257 for(IT i=0; i< spvecsize; ++i)
259 arr[rhs.ind[i]] = rhs.num[i];
264 template <
class IT,
class NT>
268 for(IT i=0; i< spvecsize; ++i)
270 arr[rhs.ind[i]] -= rhs.num[i];
280 template <
class IT,
class NT>
281 template <
typename _BinaryOperation>
284 transform ( arr.begin(), arr.end(), rhs.arr.begin(), arr.begin(), __binary_op );
288 template <
class IT,
class NT>
293 if(!(*commGrid == *rhs.commGrid))
295 cout <<
"Grids are not comparable elementwise addition" << endl;
300 EWise(rhs, std::plus<NT>());
306 template <
class IT,
class NT>
311 if(!(*commGrid == *rhs.commGrid))
313 cout <<
"Grids are not comparable elementwise addition" << endl;
318 EWise(rhs, std::minus<NT>());
324 template <
class IT,
class NT>
329 local = (int) std::equal(arr.begin(), arr.end(), rhs.arr.begin(), epsilonequal );
331 MPI_Allreduce( &local, &whole, 1, MPI_INT, MPI_BAND, commGrid->GetWorld());
332 return static_cast<bool>(whole);
335 template <
class IT,
class NT>
336 template <
typename _Predicate>
339 IT local = count_if( arr.begin(), arr.end(), pred );
341 MPI_Allreduce( &local, &whole, 1, MPIType<IT>(), MPI_SUM, commGrid->GetWorld());
347 template <
class IT,
class NT>
348 template <
typename _Predicate>
352 MPI_Comm World = commGrid->GetWorld();
353 int nprocs = commGrid->GetSize();
354 int rank = commGrid->GetRank();
356 IT sizelocal = LocArrSize();
357 IT sizesofar = LengthUntil();
358 for(IT i=0; i<sizelocal; ++i)
362 found.arr.push_back(i+sizesofar);
365 IT * dist =
new IT[nprocs];
366 IT nsize = found.arr.size();
368 MPI_Allgather(MPI_IN_PLACE, 1, MPIType<IT>(), dist, 1, MPIType<IT>(), World);
369 IT lengthuntil = accumulate(dist, dist+rank, static_cast<IT>(0));
370 found.glen = accumulate(dist, dist+nprocs, static_cast<IT>(0));
376 int * sendcnt =
new int[nprocs];
377 fill(sendcnt, sendcnt+nprocs, 0);
378 for(IT i=0; i<nsize; ++i)
381 int owner = found.Owner(i+lengthuntil, locind);
384 int * recvcnt =
new int[nprocs];
385 MPI_Alltoall(sendcnt, 1, MPI_INT, recvcnt, 1, MPI_INT, World);
387 int * sdispls =
new int[nprocs];
388 int * rdispls =
new int[nprocs];
391 for(
int i=0; i<nprocs-1; ++i)
393 sdispls[i+1] = sdispls[i] + sendcnt[i];
394 rdispls[i+1] = rdispls[i] + recvcnt[i];
396 IT totrecv = accumulate(recvcnt,recvcnt+nprocs, static_cast<IT>(0));
397 vector<IT> recvbuf(totrecv);
400 MPI_Alltoallv(&(found.arr[0]), sendcnt, sdispls, MPIType<IT>(), &(recvbuf[0]), recvcnt, rdispls, MPIType<IT>(), World);
401 found.arr.swap(recvbuf);
403 DeleteAll(sendcnt, recvcnt, sdispls, rdispls);
411 template <
class IT,
class NT>
412 template <
typename _Predicate>
416 size_t size = arr.size();
417 for(
size_t i=0; i<size; ++i)
421 found.ind.push_back( (IT) i);
422 found.num.push_back(arr[i]);
429 template <
class IT,
class NT>
430 template <
class HANDLER>
440 template <
class IT,
class NT>
441 template <
class HANDLER>
445 tmpSpVec.
SaveGathered(outfile, master, handler, printProcSplits);
448 template <
class IT,
class NT>
451 int rank = commGrid->GetRank();
455 cout <<
"FullyDistVec::SetElement can't be called on an empty vector." << endl;
459 int owner = Owner(indx, locind);
460 if(commGrid->GetRank() == owner)
462 if (locind > (LocArrSize() -1))
464 cout <<
"FullyDistVec::SetElement cannot expand array" << endl;
468 cout <<
"FullyDistVec::SetElement local index < 0" << endl;
477 template <
class IT,
class NT>
481 MPI_Comm World = commGrid->GetWorld();
482 int rank = commGrid->GetRank();
486 cout <<
"FullyDistVec::GetElement can't be called on an empty vector." << endl;
491 int owner = Owner(indx, locind);
492 if(commGrid->GetRank() == owner)
494 if (locind > (LocArrSize() -1))
496 cout <<
"FullyDistVec::GetElement local index > size" << endl;
502 cout <<
"FullyDistVec::GetElement local index < 0" << endl;
510 MPI_Bcast(&ret, 1, MPIType<NT>(), owner, World);
515 template <
class IT,
class NT>
519 MPI_Comm World = commGrid->GetWorld();
520 MPI_Comm_rank(World, &rank);
521 MPI_Comm_size(World, &nprocs);
523 MPI_File_open(World,
"temp_fullydistvec", MPI_MODE_CREATE | MPI_MODE_WRONLY, MPI_INFO_NULL, &thefile);
524 IT lengthuntil = LengthUntil();
528 MPI_File_set_view(thefile,
int64_t(lengthuntil *
sizeof(NT)), MPIType<NT>(), MPIType<NT>(),
"native", MPI_INFO_NULL);
530 IT count = LocArrSize();
531 MPI_File_write(thefile, &(arr[0]), count, MPIType<NT>(), NULL);
532 MPI_File_close(&thefile);
535 IT * counts =
new IT[nprocs];
536 MPI_Gather(&count, 1, MPIType<IT>(), counts, 1, MPIType<IT>(), 0, World);
539 FILE * f = fopen(
"temp_fullydistvec",
"r");
542 cerr <<
"Problem reading binary input file\n";
545 IT maxd = *max_element(counts, counts+nprocs);
546 NT * data =
new NT[maxd];
548 for(
int i=0; i<nprocs; ++i)
551 size_t result = fread(data,
sizeof(NT), counts[i],f);
552 if (result != (
unsigned)counts[i]) { cout <<
"Error in fread, only " << result <<
" entries read" << endl; }
554 cout <<
"Elements stored on proc " << i <<
": {" ;
555 for (
int j = 0; j < counts[i]; j++)
557 cout << data[j] <<
",";
566 template <
class IT,
class NT>
567 template <
typename _UnaryOperation,
typename IRRELEVANT_NT>
570 typename vector< IT >::const_iterator miter = mask.ind.begin();
571 while (miter < mask.ind.end())
574 arr[index] = __unary_op(arr[index]);
578 template <
class IT,
class NT>
579 template <
typename _BinaryOperation,
typename _BinaryPredicate,
class NT2>
582 if(*(commGrid) == *(other.commGrid))
584 if(glen != other.glen)
587 outs <<
"Vector dimensions don't match (" << glen <<
" vs " << other.glen <<
") for FullyDistVec::EWiseApply\n";
593 typename vector< NT >::iterator thisIter = arr.begin();
594 typename vector< NT2 >::const_iterator otherIter = other.arr.begin();
595 while (thisIter < arr.end())
597 if (_do_op(*thisIter, *otherIter,
false,
false))
598 *thisIter = __binary_op(*thisIter, *otherIter,
false,
false);
607 outs <<
"Grids are not comparable for FullyDistVec<IT,NT>::EWiseApply" << endl;
613 template <
class IT,
class NT>
614 template <
typename _BinaryOperation,
typename _BinaryPredicate,
class NT2>
617 if(*(commGrid) == *(other.commGrid))
619 if(glen != other.glen)
621 cerr <<
"Vector dimensions don't match (" << glen <<
" vs " << other.glen <<
") for FullyDistVec::EWiseApply\n";
626 typename vector< IT >::const_iterator otherInd = other.ind.begin();
627 typename vector< NT2 >::const_iterator otherNum = other.num.begin();
631 for(IT i=0; (unsigned)i < arr.size(); ++i)
633 if (otherInd == other.ind.end() || i < *otherInd)
635 if (_do_op(arr[i], nullValue,
false,
true))
636 arr[i] = __binary_op(arr[i], nullValue,
false,
true);
640 if (_do_op(arr[i], *otherNum,
false,
false))
641 arr[i] = __binary_op(arr[i], *otherNum,
false,
false);
649 while (otherInd < other.ind.end())
651 if (_do_op(arr[*otherInd], *otherNum,
false,
false))
652 arr[*otherInd] = __binary_op(arr[*otherInd], *otherNum,
false,
false);
661 cout <<
"Grids are not comparable elementwise apply" << endl;
667 template <
class IT,
class NT>
670 MPI_Comm World = commGrid->GetWorld();
672 IT nnz = LocArrSize();
673 pair<NT,IT> * vecpair =
new pair<NT,IT>[nnz];
674 int nprocs = commGrid->GetSize();
675 int rank = commGrid->GetRank();
677 IT * dist =
new IT[nprocs];
679 MPI_Allgather(MPI_IN_PLACE, 1, MPIType<IT>(), dist, 1, MPIType<IT>(), World);
680 IT sizeuntil = LengthUntil();
681 for(IT i=0; i< nnz; ++i)
683 vecpair[i].first = arr[i];
684 vecpair[i].second = i + sizeuntil;
688 vector< IT > narr(nnz);
689 for(IT i=0; i< nnz; ++i)
691 arr[i] = vecpair[i].first;
692 narr[i] = vecpair[i].second;
704 template <
class IT,
class NT>
707 MPI_Comm World = commGrid->GetWorld();
708 IT
size = LocArrSize();
709 pair<double,NT> * vecpair =
new pair<double,NT>[size];
710 int nprocs = commGrid->GetSize();
711 int rank = commGrid->GetRank();
712 IT * dist =
new IT[nprocs];
714 MPI_Allgather(MPI_IN_PLACE, 1, MPIType<IT>(), dist, 1, MPIType<IT>(), World);
720 for(
int i=0; i<size; ++i)
722 vecpair[i].first = M.
rand();
723 vecpair[i].second = arr[i];
727 vector< NT > nnum(size);
728 for(
int i=0; i<size; ++i)
729 nnum[i] = vecpair[i].second;
739 template <
class IT,
class NT>
743 IT length = MyLocLength();
749 template <
class IT,
class NT>
752 if(!(*commGrid == *ri.commGrid))
754 cout <<
"Grids are not comparable for dense vector subsref" << endl;
758 MPI_Comm World = commGrid->GetWorld();
760 int nprocs = commGrid->GetSize();
761 vector< vector< IT > > data_req(nprocs);
762 vector< vector< IT > > revr_map(nprocs);
765 for(IT i=0; i < riloclen; ++i)
768 int owner = Owner(ri.arr[i], locind);
769 data_req[owner].push_back(locind);
770 revr_map[owner].push_back(i);
772 IT * sendbuf =
new IT[riloclen];
773 int * sendcnt =
new int[nprocs];
774 int * sdispls =
new int[nprocs];
775 for(
int i=0; i<nprocs; ++i)
776 sendcnt[i] = (
int) data_req[i].size();
778 int * rdispls =
new int[nprocs];
779 int * recvcnt =
new int[nprocs];
780 MPI_Alltoall(sendcnt, 1, MPI_INT, recvcnt, 1, MPI_INT, World);
783 for(
int i=0; i<nprocs-1; ++i)
785 sdispls[i+1] = sdispls[i] + sendcnt[i];
786 rdispls[i+1] = rdispls[i] + recvcnt[i];
788 IT totrecv = accumulate(recvcnt,recvcnt+nprocs, static_cast<IT>(0));
789 for(
int i=0; i<nprocs; ++i)
791 copy(data_req[i].begin(), data_req[i].end(), sendbuf+sdispls[i]);
792 vector<IT>().swap(data_req[i]);
795 IT * reversemap =
new IT[riloclen];
796 for(
int i=0; i<nprocs; ++i)
798 copy(revr_map[i].begin(), revr_map[i].end(), reversemap+sdispls[i]);
799 vector<IT>().swap(revr_map[i]);
802 IT * recvbuf =
new IT[totrecv];
803 MPI_Alltoallv(sendbuf, sendcnt, sdispls, MPIType<IT>(), recvbuf, recvcnt, rdispls, MPIType<IT>(), World);
810 NT * databack =
new NT[totrecv];
811 for(
int i=0; i<nprocs; ++i)
813 for(
int j = rdispls[i]; j < rdispls[i] + recvcnt[i]; ++j)
815 databack[j] = arr[recvbuf[j]];
819 NT * databuf =
new NT[riloclen];
822 MPI_Alltoallv(databack, recvcnt, rdispls, MPIType<NT>(), databuf, sendcnt, sdispls, MPIType<NT>(), World);
827 for(
int i=0; i<nprocs; ++i)
829 for(
int j=sdispls[i]; j< sdispls[i]+sendcnt[i]; ++j)
831 Indexed.arr[reversemap[j]] = databuf[j];
834 DeleteAll(sdispls, sendcnt, databuf,reversemap);
838 template <
class IT,
class NT>
841 IT totl = TotalLength();
842 if (commGrid->GetRank() == 0)
843 cout <<
"As a whole, " << vectorname <<
" has length " << totl << endl;