36 template <
class IT,
class NT>
39 zero =
static_cast<NT
>(0);
40 commGrid.reset(
new CommGrid(MPI_COMM_WORLD, 0, 0));
42 if(commGrid->GetRankInProcRow() == commGrid->GetRankInProcCol())
49 template<
class IT,
class NT>
52 zero =
static_cast<NT
>(0);
53 commGrid.reset(
new CommGrid(MPI_COMM_WORLD, 0, 0));
54 if(commGrid->GetRankInProcRow() == commGrid->GetRankInProcCol())
61 int nprocs = commGrid->GetDiagSize();
62 int ndrank = commGrid->GetDiagRank();
64 IT typical = globallength/nprocs;
65 if(ndrank == nprocs - 1)
66 arr.resize(globallength - ndrank*typical, zero);
68 arr.resize(typical, zero);
72 template <
class IT,
class NT>
75 commGrid.reset(
new CommGrid(MPI_COMM_WORLD, 0, 0));
77 if(commGrid->GetRankInProcRow() == commGrid->GetRankInProcCol())
83 arr.resize(locallength, initval);
86 template <
class IT,
class NT>
90 if(commGrid->GetRankInProcRow() == commGrid->GetRankInProcCol())
96 template <
class IT,
class NT>
99 if(commGrid->GetRankInProcRow() == commGrid->GetRankInProcCol())
105 arr.resize(locallength, initval);
108 template <
class IT,
class NT>
109 template <
typename _BinaryOperation>
113 NT localsum = std::accumulate( arr.begin(), arr.end(), identity, __binary_op);
115 NT totalsum = identity;
120 template <
class IT,
class NT>
123 arr.resize(rhs.length);
124 std::fill(arr.begin(), arr.end(), zero);
126 IT spvecsize = rhs.ind.size();
127 for(IT i=0; i< spvecsize; ++i)
129 arr[rhs.ind[i]] = rhs.num[i];
135 template <
class IT,
class NT>
140 commGrid.reset(
new CommGrid(*(rhs.commGrid)));
142 diagonal = rhs.diagonal;
147 template <
class IT,
class NT>
150 commGrid.reset(
new CommGrid(*(victim.commGrid)));
151 arr.swap(victim.arr);
152 diagonal = victim.diagonal;
158 template <
class IT,
class NT>
161 IT spvecsize = rhs.ind.size();
162 for(IT i=0; i< spvecsize; ++i)
164 if(arr[rhs.ind[i]] == zero)
165 arr[rhs.ind[i]] = rhs.num[i];
167 arr[rhs.ind[i]] += rhs.num[i];
172 template <
class IT,
class NT>
175 IT spvecsize = rhs.ind.size();
176 for(IT i=0; i< spvecsize; ++i)
178 arr[rhs.ind[i]] -= rhs.num[i];
187 template <
class IT,
class NT>
188 template <
typename _BinaryOperation>
193 transform ( arr.begin(), arr.end(), rhs.arr.begin(), arr.begin(), __binary_op );
197 cout <<
"DenseParVec objects have different identity (zero) elements..." << endl;
198 cout <<
"Operation didn't happen !" << endl;
203 template <
class IT,
class NT>
208 if(!(*commGrid == *rhs.commGrid))
210 cout <<
"Grids are not comparable elementwise addition" << endl;
215 EWise(rhs, std::plus<NT>());
221 template <
class IT,
class NT>
226 if(!(*commGrid == *rhs.commGrid))
228 cout <<
"Grids are not comparable elementwise addition" << endl;
233 EWise(rhs, std::minus<NT>());
239 template <
class IT,
class NT>
246 local = (int) std::equal(arr.begin(), arr.end(), rhs.arr.begin(), epsilonequal );
248 vector<NT> diff(arr.size());
249 transform(arr.begin(), arr.end(), rhs.arr.begin(), diff.begin(), minus<NT>());
250 typename vector<NT>::iterator maxitr;
251 maxitr = max_element(diff.begin(), diff.end());
252 cout << maxitr-diff.begin() <<
": " << *maxitr <<
" where lhs: " << *(arr.begin()+(maxitr-diff.begin()))
253 <<
" and rhs: " << *(rhs.arr.begin()+(maxitr-diff.begin())) << endl;
257 MPI_Allreduce( &local, &whole, 1, MPI_INT, MPI_BAND, commGrid->GetWorld());
258 return static_cast<bool>(whole);
261 template <
class IT,
class NT>
262 template <
typename _Predicate>
268 local = count_if( arr.begin(), arr.end(), pred );
271 MPI_Allreduce( &local, &whole, 1, MPIType<IT>(), MPI_SUM, commGrid->GetWorld());
278 template <
class IT,
class NT>
279 template <
typename _Predicate>
283 MPI_Comm DiagWorld = commGrid->GetDiagWorld();
284 if(DiagWorld != MPI_COMM_NULL)
287 MPI_Comm_rank(DiagWorld, &dgrank);
288 MPI_Comm_size(DiagWorld, &nprocs);
289 IT old_n_perproc = getTypicalLocLength();
291 IT
size = arr.size();
292 for(IT i=0; i<size; ++i)
296 found.arr.push_back(i+old_n_perproc*dgrank);
299 MPI_Barrier(DiagWorld);
305 if(dgrank != nprocs-1)
307 int arrsize = found.arr.size();
308 MPI_Gather(&arrsize, 1, MPI_INT, NULL, 1, MPI_INT, nprocs-1, DiagWorld);
309 MPI_Gatherv(&(found.arr[0]), arrsize, MPIType<IT>(), NULL, NULL, NULL, MPIType<IT>(), nprocs-1, DiagWorld);
313 int * allnnzs =
new int[nprocs];
314 allnnzs[dgrank] = found.arr.size();
315 MPI_Gather(MPI_IN_PLACE, 1, MPI_INT, allnnzs, 1, MPI_INT, nprocs-1, DiagWorld);
317 int * rdispls =
new int[nprocs];
319 for(
int i=0; i<nprocs-1; ++i)
320 rdispls[i+1] = rdispls[i] + allnnzs[i];
322 IT totrecv = accumulate(allnnzs, allnnzs+nprocs, 0);
323 vector<IT> recvbuf(totrecv);
324 MPI_Gatherv(MPI_IN_PLACE, 1, MPI_INT, &(recvbuf[0]), allnnzs, rdispls, MPIType<IT>(), nprocs-1, DiagWorld);
326 found.arr.swap(recvbuf);
331 IT lengthuntil = dgrank * n_perproc;
334 IT nsize = found.arr.size();
335 int * sendcnt =
new int[nprocs];
336 fill(sendcnt, sendcnt+nprocs, 0);
337 for(IT i=0; i<nsize; ++i)
340 int owner = std::min(static_cast<int>( (i+lengthuntil) / n_perproc), nprocs-1);
344 int * recvcnt =
new int[nprocs];
345 MPI_Alltoall(sendcnt, 1, MPI_INT, recvcnt, 1, MPI_INT, DiagWorld);
347 int * sdispls =
new int[nprocs];
348 int * rdispls =
new int[nprocs];
351 for(
int i=0; i<nprocs-1; ++i)
353 sdispls[i+1] = sdispls[i] + sendcnt[i];
354 rdispls[i+1] = rdispls[i] + recvcnt[i];
357 IT totrecv = accumulate(recvcnt,recvcnt+nprocs, (IT) 0);
358 vector<IT> recvbuf(totrecv);
361 MPI_Alltoallv(&(found.arr[0]), sendcnt, sdispls, MPIType<IT>(), &(recvbuf[0]), recvcnt, rdispls, MPIType<IT>(), DiagWorld);
362 found.arr.swap(recvbuf);
363 DeleteAll(sendcnt, recvcnt, sdispls, rdispls);
372 template <
class IT,
class NT>
373 template <
typename _Predicate>
379 IT
size = arr.size();
380 for(IT i=0; i<size; ++i)
384 found.ind.push_back(i);
385 found.num.push_back(arr[i]);
393 template <
class IT,
class NT>
403 template <
class IT,
class NT>
406 MPI_Comm DiagWorld = commGrid->GetDiagWorld();
407 if(DiagWorld != MPI_COMM_NULL)
410 MPI_Comm_rank(DiagWorld, &dgrank);
411 MPI_Comm_size(DiagWorld, &nprocs);
412 IT n_perproc = getTypicalLocLength();
413 IT offset = dgrank * n_perproc;
415 if (n_perproc == 0) {
416 cout <<
"DenseParVec::SetElement can't be called on an empty vector." << endl;
419 IT owner = std::min(static_cast<int>(indx / n_perproc), nprocs-1);
422 IT locindx = indx-offset;
424 if (locindx > arr.size()-1)
426 cout <<
"DenseParVec::SetElement cannot expand array" << endl;
428 else if (locindx < 0)
430 cout <<
"DenseParVec::SetElement local index < 0" << endl;
440 template <
class IT,
class NT>
445 MPI_Comm DiagWorld = commGrid->GetDiagWorld();
446 if(DiagWorld != MPI_COMM_NULL)
449 MPI_Comm_rank(DiagWorld, &dgrank);
450 MPI_Comm_size(DiagWorld, &nprocs);
451 IT n_perproc = getTypicalLocLength();
452 IT offset = dgrank * n_perproc;
454 if (n_perproc == 0 && dgrank == 0) {
455 cout <<
"DenseParVec::GetElement can't be called on an empty vector." << endl;
456 return numeric_limits<NT>::min();
458 owner = std::min(static_cast<int>(indx / n_perproc), nprocs-1);
461 IT locindx = indx-offset;
462 if (locindx > arr.size()-1)
464 cout <<
"DenseParVec::GetElement cannot expand array" << endl;
466 else if (locindx < 0)
468 cout <<
"DenseParVec::GetElement local index < 0" << endl;
476 int worldowner = commGrid->GetRank(owner);
477 MPI_Bcast(&worldowner, 1, MPIType<int>(), 0, commGrid->GetWorld());
478 MPI_Bcast(&ret, 1, MPIType<NT>(), worldowner, commGrid->GetWorld());
482 template <
class IT,
class NT>
491 MPI_Comm DiagWorld = commGrid->GetDiagWorld();
492 if(DiagWorld != MPI_COMM_NULL)
495 MPI_Comm_rank(DiagWorld, &dgrank);
496 MPI_Comm_size(DiagWorld, &nprocs);
500 all_nnzs[dgrank] = arr.size();
504 for (
int i = 0; i < nprocs; i++)
508 cerr << arr.size() <<
" elements stored on proc " << dgrank <<
"," << dgrank <<
":" ;
510 for (
int j = 0; j < arr.size(); j++)
512 cerr <<
"\n[" << (j+offset) <<
"] = " << arr[j] ;
516 offset += all_nnzs[i];
517 MPI_Barrier(DiagWorld);
519 MPI_Barrier(DiagWorld);
521 cerr <<
"total size: " << offset << endl;
522 MPI_Barrier(DiagWorld);
526 template <
class IT,
class NT>
527 template <
typename _UnaryOperation>
530 typename vector< IT >::const_iterator miter = mask.ind.begin();
531 while (miter < mask.ind.end())
534 arr[index] = __unary_op(arr[index]);
539 template <
class IT,
class NT>
542 MPI_Comm DiagWorld = commGrid->GetDiagWorld();
543 if(DiagWorld != MPI_COMM_NULL)
545 IT
size = arr.size();
546 pair<double,IT> * vecpair =
new pair<double,IT>[size];
549 MPI_Comm_rank(DiagWorld, &dgrank);
550 MPI_Comm_size(DiagWorld, &nprocs);
552 long * dist =
new long[nprocs];
554 MPI_Allgather(MPI_IN_PLACE, 1, MPIType<long>(), dist, 1, MPIType<long>(), DiagWorld);
555 IT lengthuntil = accumulate(dist, dist+dgrank, 0);
558 for(
int i=0; i<size; ++i)
560 vecpair[i].first = M.
rand();
561 vecpair[i].second = arr[i];
567 vector< NT > nnum(size);
568 for(
int i=0; i<size; ++i)
569 nnum[i] = vecpair[i].second;
578 template <
class IT,
class NT>
581 MPI_Comm DiagWorld = commGrid->GetDiagWorld();
582 if(DiagWorld != MPI_COMM_NULL)
585 MPI_Comm_rank(DiagWorld, &dgrank);
586 MPI_Comm_size(DiagWorld, &nprocs);
587 IT n_perproc = size / nprocs;
589 IT length = (dgrank != nprocs-1) ? n_perproc: (size - (n_perproc * (nprocs-1)));
591 SpHelper::iota(arr.begin(), arr.end(), (dgrank * n_perproc) + first);
595 template <
class IT,
class NT>
598 if(!(*commGrid == *ri.commGrid))
600 cout <<
"Grids are not comparable for dense vector subsref" << endl;
604 MPI_Comm DiagWorld = commGrid->GetDiagWorld();
606 if(DiagWorld != MPI_COMM_NULL)
609 MPI_Comm_rank(DiagWorld, &dgrank);
610 MPI_Comm_size(DiagWorld, &nprocs);
611 IT n_perproc = getTypicalLocLength();
612 vector< vector< IT > > data_req(nprocs);
613 vector< vector< IT > > revr_map(nprocs);
614 for(IT i=0; i < ri.arr.size(); ++i)
616 int owner = ri.arr[i] / n_perproc;
617 owner = std::min(owner, nprocs-1);
618 data_req[owner].push_back(ri.arr[i] - (n_perproc * owner));
619 revr_map[owner].push_back(i);
621 IT * sendbuf =
new IT[ri.arr.size()];
622 int * sendcnt =
new int[nprocs];
623 int * sdispls =
new int[nprocs];
624 for(
int i=0; i<nprocs; ++i)
625 sendcnt[i] = data_req[i].
size();
627 int * rdispls =
new int[nprocs];
628 int * recvcnt =
new int[nprocs];
629 MPI_Alltoall(sendcnt, 1, MPI_INT, recvcnt, 1, MPI_INT, DiagWorld);
633 for(
int i=0; i<nprocs-1; ++i)
635 sdispls[i+1] = sdispls[i] + sendcnt[i];
636 rdispls[i+1] = rdispls[i] + recvcnt[i];
638 IT totrecv = accumulate(recvcnt,recvcnt+nprocs,zero);
639 IT * recvbuf =
new IT[totrecv];
641 for(
int i=0; i<nprocs; ++i)
643 copy(data_req[i].begin(), data_req[i].end(), sendbuf+sdispls[i]);
644 vector<IT>().swap(data_req[i]);
647 IT * reversemap =
new IT[ri.arr.size()];
648 for(
int i=0; i<nprocs; ++i)
650 copy(revr_map[i].begin(), revr_map[i].end(), reversemap+sdispls[i]);
651 vector<IT>().swap(revr_map[i]);
653 MPI_Alltoallv(sendbuf, sendcnt, sdispls, MPIType<IT>(), recvbuf, recvcnt, rdispls, MPIType<IT>(), DiagWorld);
659 NT * databack =
new NT[totrecv];
661 for(
int i=0; i<nprocs; ++i)
663 for(
int j = rdispls[i]; j < rdispls[i] + recvcnt[i]; ++j)
665 databack[j] = arr[recvbuf[j]];
670 NT * databuf =
new NT[ri.arr.size()];
673 MPI_Alltoallv(databack, recvcnt, rdispls, MPIType<NT>(), databuf, sendcnt, sdispls, MPIType<NT>(), DiagWorld);
677 Indexed.arr.resize(ri.arr.size());
678 for(
int i=0; i<nprocs; ++i)
680 for(
int j=sdispls[i]; j< sdispls[i]+sendcnt[i]; ++j)
682 Indexed.arr[reversemap[j]] = databuf[j];
685 DeleteAll(sdispls, sendcnt, databuf,reversemap);
690 template <
class IT,
class NT>
694 if (comm != MPI_COMM_NULL)
696 IT locnnz = arr.size();
697 MPI_Allreduce( &locnnz, & totnnz, 1, MPIType<IT>(), MPI_SUM, comm);
702 template <
class IT,
class NT>
706 MPI_Comm DiagWorld = commGrid->GetDiagWorld();
707 if(DiagWorld != MPI_COMM_NULL)
710 MPI_Comm_rank(DiagWorld, &dgrank);
711 MPI_Comm_size(DiagWorld, &nprocs);
712 n_perproc = arr.size();
713 if (dgrank == nprocs-1 && nprocs > 1)
717 MPI_Recv(&n_perproc, 1, MPIType<IT>(), 0, 1, DiagWorld, NULL);
719 else if (dgrank == 0 && nprocs > 1)
721 MPI_Send(&n_perproc, 1, MPIType<IT>(), nprocs-1, 1, DiagWorld);
729 template <
class IT,
class NT>
732 IT totl = getTotalLength(commGrid->GetDiagWorld());
733 if (commGrid->GetRank() == 0)
734 cout <<
"As a whole, " << vectorname <<
" has length " << totl << endl;