42 template <
class IT,
class NT,
class DER>
45 assert( (
sizeof(IT) >=
sizeof(
typename DER::LocalIT)) );
48 perror(
"Input file doesn't exist\n");
51 commGrid.reset(
new CommGrid(world, 0, 0));
55 template <
class IT,
class NT,
class DER>
58 assert( (
sizeof(IT) >=
sizeof(
typename DER::LocalIT)) );
59 commGrid.reset(
new CommGrid(world, 0, 0));
62 template <
class IT,
class NT,
class DER>
65 assert( (
sizeof(IT) >=
sizeof(
typename DER::LocalIT)) );
69 template <
class IT,
class NT,
class DER>
72 assert( (
sizeof(IT) >=
sizeof(
typename DER::LocalIT)) );
81 template <
class IT,
class NT,
class DER>
85 assert( (
sizeof(IT) >=
sizeof(
typename DER::LocalIT)) );
87 commGrid.reset(
new CommGrid(MPI_COMM_WORLD, 0, 0));
90 template <
class IT,
class NT,
class DER>
93 if(spSeq != NULL)
delete spSeq;
96 template <
class IT,
class NT,
class DER>
99 if(spSeq != NULL)
delete spSeq;
104 template <
class IT,
class NT,
class DER>
107 MPI_Comm World = commGrid->GetWorld();
108 int rank = commGrid->GetRank;
109 int nprocs = commGrid->GetSize();
112 MPI_File_open(World, filename.c_str(), MPI_MODE_CREATE | MPI_MODE_WRONLY, MPI_INFO_NULL, &thefile);
114 int rankinrow = commGrid->GetRankInProcRow();
115 int rankincol = commGrid->GetRankInProcCol();
116 int rowneighs = commGrid->GetGridCols();
117 int colneighs = commGrid->GetGridRows();
119 IT * colcnts =
new IT[rowneighs];
120 IT * rowcnts =
new IT[colneighs];
121 rowcnts[rankincol] = getlocalrows();
122 colcnts[rankinrow] = getlocalcols();
124 MPI_Allgather(MPI_IN_PLACE, 0, MPIType<IT>(), colcnts, 1, MPIType<IT>(), commGrid->GetRowWorld());
125 IT coloffset = accumulate(colcnts, colcnts+rankinrow, static_cast<IT>(0));
127 MPI_Allgather(MPI_IN_PLACE, 0, MPIType<IT>(), rowcnts, 1, MPIType<IT>(), commGrid->GetColWorld());
128 IT rowoffset = accumulate(rowcnts, rowcnts+rankincol, static_cast<IT>(0));
131 IT * prelens =
new IT[nprocs];
132 prelens[rank] = 2*getlocalnnz();
133 MPI_Allgather(MPI_IN_PLACE, 0, MPIType<IT>(), prelens, 1, MPIType<IT>(), commGrid->GetWorld());
134 IT lengthuntil = accumulate(prelens, prelens+rank, static_cast<IT>(0));
138 MPI_Offset disp = lengthuntil *
sizeof(uint32_t);
139 MPI_File_set_view(thefile, disp, MPI_UNSIGNED, MPI_UNSIGNED,
"native", MPI_INFO_NULL);
140 uint32_t * gen_edges =
new uint32_t[prelens[rank]];
143 for(
typename DER::SpColIter colit = spSeq->begcol(); colit != spSeq->endcol(); ++colit)
145 for(
typename DER::SpColIter::NzIter nzit = spSeq->begnz(colit); nzit != spSeq->endnz(colit); ++nzit)
147 gen_edges[k++] = (uint32_t) (nzit.rowid() + rowoffset);
148 gen_edges[k++] = (uint32_t) (colit.colid() + coloffset);
151 assert(k == prelens[rank]);
152 MPI_File_write(thefile, gen_edges, prelens[rank], MPI_UNSIGNED, NULL);
153 MPI_File_close(&thefile);
160 template <
class IT,
class NT,
class DER>
163 if(rhs.spSeq != NULL)
164 spSeq =
new DER(*(rhs.spSeq));
166 commGrid = rhs.commGrid;
169 template <
class IT,
class NT,
class DER>
176 if(spSeq != NULL)
delete spSeq;
177 if(rhs.spSeq != NULL)
178 spSeq =
new DER(*(rhs.spSeq));
180 commGrid = rhs.commGrid;
185 template <
class IT,
class NT,
class DER>
190 if(*commGrid == *rhs.commGrid)
192 (*spSeq) += (*(rhs.spSeq));
196 cout <<
"Grids are not comparable for parallel addition (A+B)" << endl;
201 cout<<
"Missing feature (A+A): Use multiply with 2 instead !"<<endl;
206 template <
class IT,
class NT,
class DER>
209 IT totnnz = getnnz();
211 IT localnnz = (IT) spSeq->getnnz();
212 MPI_Allreduce( &localnnz, &maxnnz, 1, MPIType<IT>(), MPI_MAX, commGrid->GetWorld());
213 if(totnnz == 0)
return 1;
214 return static_cast<float>((commGrid->GetSize() * maxnnz)) /
static_cast<float>(totnnz);
217 template <
class IT,
class NT,
class DER>
221 IT localnnz = spSeq->getnnz();
222 MPI_Allreduce( &localnnz, &totalnnz, 1, MPIType<IT>(), MPI_SUM, commGrid->GetWorld());
226 template <
class IT,
class NT,
class DER>
230 IT localrows = spSeq->getnrow();
231 MPI_Allreduce( &localrows, &totalrows, 1, MPIType<IT>(), MPI_SUM, commGrid->GetColWorld());
235 template <
class IT,
class NT,
class DER>
239 IT localcols = spSeq->getncol();
240 MPI_Allreduce( &localcols, &totalcols, 1, MPIType<IT>(), MPI_SUM, commGrid->GetRowWorld());
244 template <
class IT,
class NT,
class DER>
245 template <
typename _BinaryOperation>
249 if(!(*commGrid == *(x.commGrid)))
251 cout <<
"Grids are not comparable for SpParMat::DimApply" << endl;
255 MPI_Comm World = x.commGrid->GetWorld();
256 MPI_Comm ColWorld = x.commGrid->GetColWorld();
257 MPI_Comm RowWorld = x.commGrid->GetRowWorld();
264 int diagneigh = x.commGrid->GetComplementRank();
266 MPI_Sendrecv(&xsize, 1, MPI_INT, diagneigh,
TRX, &trxsize, 1, MPI_INT, diagneigh,
TRX, World, &status);
268 NT * trxnums =
new NT[trxsize];
269 MPI_Sendrecv(const_cast<NT*>(
SpHelper::p2a(x.arr)), xsize, MPIType<NT>(), diagneigh,
TRX, trxnums, trxsize, MPIType<NT>(), diagneigh,
TRX, World, &status);
271 int colneighs, colrank;
272 MPI_Comm_size(ColWorld, &colneighs);
273 MPI_Comm_rank(ColWorld, &colrank);
274 int * colsize =
new int[colneighs];
275 colsize[colrank] = trxsize;
277 MPI_Allgather(MPI_IN_PLACE, 1, MPI_INT, colsize, 1, MPI_INT, ColWorld);
278 int * dpls =
new int[colneighs]();
279 std::partial_sum(colsize, colsize+colneighs-1, dpls+1);
280 int accsize = std::accumulate(colsize, colsize+colneighs, 0);
281 NT * scaler =
new NT[accsize];
283 MPI_Allgatherv(trxnums, trxsize, MPIType<NT>(), scaler, colsize, dpls, MPIType<NT>(), ColWorld);
286 for(
typename DER::SpColIter colit = spSeq->begcol(); colit != spSeq->endcol(); ++colit)
288 for(
typename DER::SpColIter::NzIter nzit = spSeq->begnz(colit); nzit != spSeq->endnz(colit); ++nzit)
290 nzit.value() = __binary_op(nzit.value(), scaler[colit.colid()]);
299 int rowneighs, rowrank;
300 MPI_Comm_size(RowWorld, &rowneighs);
301 MPI_Comm_rank(RowWorld, &rowrank);
302 int * rowsize =
new int[rowneighs];
303 rowsize[rowrank] = xsize;
304 MPI_Allgather(MPI_IN_PLACE, 1, MPI_INT, rowsize, 1, MPI_INT, RowWorld);
305 int * dpls =
new int[rowneighs]();
306 std::partial_sum(rowsize, rowsize+rowneighs-1, dpls+1);
307 int accsize = std::accumulate(rowsize, rowsize+rowneighs, 0);
308 NT * scaler =
new NT[accsize];
310 MPI_Allgatherv(const_cast<NT*>(
SpHelper::p2a(x.arr)), xsize, MPIType<NT>(), scaler, rowsize, dpls, MPIType<NT>(), RowWorld);
313 for(
typename DER::SpColIter colit = spSeq->begcol(); colit != spSeq->endcol(); ++colit)
315 for(
typename DER::SpColIter::NzIter nzit = spSeq->begnz(colit); nzit != spSeq->endnz(colit); ++nzit)
317 nzit.value() = __binary_op(nzit.value(), scaler[nzit.rowid()]);
325 cout <<
"Unknown scaling dimension, returning..." << endl;
331 template <
class IT,
class NT,
class DER>
332 template <
typename _BinaryOperation,
typename _UnaryOperation >
336 Reduce(parvec, dim, __binary_op,
id, __unary_op);
341 template <
class IT,
class NT,
class DER>
342 template <
typename _BinaryOperation>
350 template <
class IT,
class NT,
class DER>
351 template <
typename VT,
typename _BinaryOperation>
357 template <
class IT,
class NT,
class DER>
358 template <
typename VT,
typename GIT,
typename _BinaryOperation>
364 template <
class IT,
class NT,
class DER>
365 template <
typename VT,
typename GIT,
typename _BinaryOperation,
typename _UnaryOperation>
368 if(*rvec.commGrid != *commGrid)
378 IT n_thiscol = getlocalcols();
379 int colneighs = commGrid->GetGridRows();
380 int colrank = commGrid->GetRankInProcCol();
382 GIT * loclens =
new GIT[colneighs];
383 GIT * lensums =
new GIT[colneighs+1]();
385 GIT n_perproc = n_thiscol / colneighs;
386 if(colrank == colneighs-1)
387 loclens[colrank] = n_thiscol - (n_perproc*colrank);
389 loclens[colrank] = n_perproc;
391 MPI_Allgather(MPI_IN_PLACE, 0, MPIType<GIT>(), loclens, 1, MPIType<GIT>(), commGrid->GetColWorld());
392 partial_sum(loclens, loclens+colneighs, lensums+1);
395 typename DER::SpColIter colit = spSeq->begcol();
396 for(
int i=0; i< colneighs; ++i)
398 VT * sendbuf =
new VT[loclens[i]];
399 fill(sendbuf, sendbuf+loclens[i],
id);
401 for(; colit != spSeq->endcol() && colit.colid() < lensums[i+1]; ++colit)
403 for(
typename DER::SpColIter::NzIter nzit = spSeq->begnz(colit); nzit != spSeq->endnz(colit); ++nzit)
405 sendbuf[colit.colid()-lensums[i]] = __binary_op(static_cast<VT>(__unary_op(nzit.value())), sendbuf[colit.colid()-lensums[i]]);
411 trarr.resize(loclens[i]);
420 GIT trlen = trarr.size();
421 int diagneigh = commGrid->GetComplementRank();
423 MPI_Sendrecv(&trlen, 1, MPIType<IT>(), diagneigh,
TRNNZ, &reallen, 1, MPIType<IT>(), diagneigh,
TRNNZ, commGrid->GetWorld(), &status);
425 rvec.arr.resize(reallen);
426 MPI_Sendrecv(
SpHelper::p2a(trarr), trlen, MPIType<VT>(), diagneigh,
TRX,
SpHelper::p2a(rvec.arr), reallen, MPIType<VT>(), diagneigh,
TRX, commGrid->GetWorld(), &status);
427 rvec.glen = getncol();
433 rvec.glen = getnrow();
434 int rowneighs = commGrid->GetGridCols();
435 int rowrank = commGrid->GetRankInProcRow();
436 GIT * loclens =
new GIT[rowneighs];
437 GIT * lensums =
new GIT[rowneighs+1]();
438 loclens[rowrank] = rvec.MyLocLength();
439 MPI_Allgather(MPI_IN_PLACE, 0, MPIType<GIT>(), loclens, 1, MPIType<GIT>(), commGrid->GetRowWorld());
440 partial_sum(loclens, loclens+rowneighs, lensums+1);
443 rvec.arr.resize(loclens[rowrank],
id);
447 #define MAXCOLUMNBATCH 5 * 1024 * 1024
448 typename DER::SpColIter begfinger = spSeq->begcol();
451 int numreducecalls = (int) ceil(static_cast<float>(spSeq->getnzc()) / static_cast<float>(
MAXCOLUMNBATCH));
453 MPI_Allreduce( &numreducecalls, &maxreducecalls, 1, MPI_INT, MPI_MAX, commGrid->GetRowWorld());
455 for(
int k=0; k< maxreducecalls; ++k)
457 vector<typename DER::SpColIter::NzIter> nziters;
458 typename DER::SpColIter curfinger = begfinger;
459 for(; curfinger != spSeq->endcol() && nziters.size() <
MAXCOLUMNBATCH ; ++curfinger)
461 nziters.push_back(spSeq->begnz(curfinger));
463 for(
int i=0; i< rowneighs; ++i)
465 VT * sendbuf =
new VT[loclens[i]];
466 fill(sendbuf, sendbuf+loclens[i],
id);
468 typename DER::SpColIter colit = begfinger;
470 for(; colit != curfinger; ++colit, ++colcnt)
472 typename DER::SpColIter::NzIter nzit = nziters[colcnt];
473 for(; nzit != spSeq->endnz(colit) && nzit.rowid() < lensums[i+1]; ++nzit)
475 sendbuf[nzit.rowid()-lensums[i]] = __binary_op(static_cast<VT>(__unary_op(nzit.value())), sendbuf[nzit.rowid()-lensums[i]]);
477 nziters[colcnt] = nzit;
483 for(
int j=0; j< loclens[i]; ++j)
485 sendbuf[j] = __binary_op(rvec.arr[j], sendbuf[j]);
492 begfinger = curfinger;
496 catch (length_error& le)
498 cerr <<
"Length error: " << le.what() << endl;
504 cout <<
"Unknown reduction dimension, returning empty vector" << endl;
517 template <
class IT,
class NT,
class DER>
518 template <
typename VT,
typename _BinaryOperation,
typename _UnaryOperation>
524 outs <<
"SpParMat::Reduce(): Return vector's zero is different than set id" << endl;
525 outs <<
"Setting rvec.zero to id (" <<
id <<
") instead" << endl;
529 if(*rvec.commGrid != *commGrid)
538 VT * sendbuf =
new VT[getlocalcols()];
539 fill(sendbuf, sendbuf+getlocalcols(),
id);
541 for(
typename DER::SpColIter colit = spSeq->begcol(); colit != spSeq->endcol(); ++colit)
543 for(
typename DER::SpColIter::NzIter nzit = spSeq->begnz(colit); nzit != spSeq->endnz(colit); ++nzit)
545 sendbuf[colit.colid()] = __binary_op(static_cast<VT>(__unary_op(nzit.value())), sendbuf[colit.colid()]);
549 int root = commGrid->GetDiagOfProcCol();
552 rvec.arr.resize(getlocalcols());
561 VT * sendbuf =
new VT[getlocalrows()];
562 fill(sendbuf, sendbuf+getlocalrows(),
id);
564 for(
typename DER::SpColIter colit = spSeq->begcol(); colit != spSeq->endcol(); ++colit)
566 for(
typename DER::SpColIter::NzIter nzit = spSeq->begnz(colit); nzit != spSeq->endnz(colit); ++nzit)
568 sendbuf[nzit.rowid()] = __binary_op(static_cast<VT>(__unary_op(nzit.value())), sendbuf[nzit.rowid()]);
572 int root = commGrid->GetDiagOfProcRow();
575 rvec.arr.resize(getlocalrows());
584 cout <<
"Unknown reduction dimension, returning empty vector" << endl;
590 template <
class IT,
class NT,
class DER>
591 template <
typename NNT,
typename NDER>
594 NDER * convert =
new NDER(*spSeq);
599 template <
class IT,
class NT,
class DER>
600 template <
typename NIT,
typename NNT,
typename NDER>
603 NDER * convert =
new NDER(*spSeq);
611 template <
class IT,
class NT,
class DER>
615 DER * tempseq =
new DER((*spSeq)(ri, ci));
626 template <
class IT,
class NT,
class DER>
627 template <
typename PTNTBOOL,
typename PTBOOLNT>
633 if((*(ri.commGrid) != *(commGrid)) || (*(ci.commGrid) != *(commGrid)))
643 locmax_ri = *max_element(ri.arr.begin(), ri.arr.end());
645 locmax_ci = *max_element(ci.arr.begin(), ci.arr.end());
647 IT total_m = getnrow();
648 IT total_n = getncol();
649 if(locmax_ri > total_m || locmax_ci > total_n)
657 IT roffset = ri.RowLenUntil();
658 IT rrowlen = ri.MyRowLength();
659 IT coffset = ci.RowLenUntil();
660 IT crowlen = ci.MyRowLength();
668 IT rowneighs = commGrid->GetGridCols();
669 IT totalm = getnrow();
670 IT totaln = getncol();
671 IT m_perproccol = totalm / rowneighs;
672 IT n_perproccol = totaln / rowneighs;
675 IT diagneigh = commGrid->GetComplementRank();
676 IT mylocalrows = getlocalrows();
677 IT mylocalcols = getlocalcols();
680 MPI_Sendrecv(&mylocalrows, 1, MPIType<IT>(), diagneigh,
TRROWX, &trlocalrows, 1, MPIType<IT>(), diagneigh,
TRROWX, commGrid->GetWorld(), &status);
683 vector< vector<IT> > rowid(rowneighs);
684 vector< vector<IT> > colid(rowneighs);
687 IT locvec = ri.arr.size();
688 for(
typename vector<IT>::size_type i=0; i< (unsigned)locvec; ++i)
692 IT rowrec = (m_perproccol!=0) ? std::min(ri.arr[i] / m_perproccol, rowneighs-1) : (rowneighs-1);
695 rowid[rowrec].push_back( i + roffset);
696 colid[rowrec].push_back(ri.arr[i] - (rowrec * m_perproccol));
699 int * sendcnt =
new int[rowneighs];
700 int * recvcnt =
new int[rowneighs];
701 for(IT i=0; i<rowneighs; ++i)
702 sendcnt[i] = rowid[i].
size();
704 MPI_Alltoall(sendcnt, 1, MPI_INT, recvcnt, 1, MPI_INT, commGrid->GetRowWorld());
705 int * sdispls =
new int[rowneighs]();
706 int * rdispls =
new int[rowneighs]();
707 partial_sum(sendcnt, sendcnt+rowneighs-1, sdispls+1);
708 partial_sum(recvcnt, recvcnt+rowneighs-1, rdispls+1);
709 IT p_nnz = accumulate(recvcnt,recvcnt+rowneighs, static_cast<IT>(0));
712 IT * p_rows =
new IT[p_nnz];
713 IT * p_cols =
new IT[p_nnz];
714 IT * senddata =
new IT[locvec];
715 for(
int i=0; i<rowneighs; ++i)
717 copy(rowid[i].begin(), rowid[i].end(), senddata+sdispls[i]);
718 vector<IT>().swap(rowid[i]);
720 MPI_Alltoallv(senddata, sendcnt, sdispls, MPIType<IT>(), p_rows, recvcnt, rdispls, MPIType<IT>(), commGrid->GetRowWorld());
722 for(
int i=0; i<rowneighs; ++i)
724 copy(colid[i].begin(), colid[i].end(), senddata+sdispls[i]);
725 vector<IT>().swap(colid[i]);
727 MPI_Alltoallv(senddata, sendcnt, sdispls, MPIType<IT>(), p_cols, recvcnt, rdispls, MPIType<IT>(), commGrid->GetRowWorld());
730 tuple<IT,IT,bool> * p_tuples =
new tuple<IT,IT,bool>[p_nnz];
731 for(IT i=0; i< p_nnz; ++i)
733 p_tuples[i] = make_tuple(p_rows[i], p_cols[i], 1);
737 DER_IT * PSeq =
new DER_IT();
738 PSeq->Create( p_nnz, rrowlen, trlocalrows, p_tuples);
743 DeleteAll(sendcnt, recvcnt, sdispls, rdispls);
753 *
this = Mult_AnXBn_DoubleBuff<PTBOOLNT, NT, DER>(P, *
this,
false,
true);
761 *
this = Mult_AnXBn_DoubleBuff<PTNTBOOL, NT, DER>(*
this, P,
true,
true);
766 PA = Mult_AnXBn_DoubleBuff<PTBOOLNT, NT, DER>(P,*
this);
768 return Mult_AnXBn_DoubleBuff<PTNTBOOL, NT, DER>(PA, P);
778 PA = Mult_AnXBn_DoubleBuff<PTBOOLNT, NT, DER>(P, *
this);
785 locvec = ci.arr.size();
786 for(
typename vector<IT>::size_type i=0; i< (unsigned)locvec; ++i)
789 IT rowrec = (n_perproccol!=0) ? std::min(ci.arr[i] / n_perproccol, rowneighs-1) : (rowneighs-1);
792 rowid[rowrec].push_back( i + coffset);
793 colid[rowrec].push_back(ci.arr[i] - (rowrec * n_perproccol));
796 for(IT i=0; i<rowneighs; ++i)
797 sendcnt[i] = rowid[i].
size();
799 MPI_Alltoall(sendcnt, 1, MPI_INT, recvcnt, 1, MPI_INT, commGrid->GetRowWorld());
800 fill(sdispls, sdispls+rowneighs, 0);
801 fill(rdispls, rdispls+rowneighs, 0);
802 partial_sum(sendcnt, sendcnt+rowneighs-1, sdispls+1);
803 partial_sum(recvcnt, recvcnt+rowneighs-1, rdispls+1);
804 IT q_nnz = accumulate(recvcnt,recvcnt+rowneighs, static_cast<IT>(0));
807 IT * q_rows =
new IT[q_nnz];
808 IT * q_cols =
new IT[q_nnz];
809 senddata =
new IT[locvec];
810 for(
int i=0; i<rowneighs; ++i)
812 copy(rowid[i].begin(), rowid[i].end(), senddata+sdispls[i]);
813 vector<IT>().swap(rowid[i]);
815 MPI_Alltoallv(senddata, sendcnt, sdispls, MPIType<IT>(), q_rows, recvcnt, rdispls, MPIType<IT>(), commGrid->GetRowWorld());
817 for(
int i=0; i<rowneighs; ++i)
819 copy(colid[i].begin(), colid[i].end(), senddata+sdispls[i]);
820 vector<IT>().swap(colid[i]);
822 MPI_Alltoallv(senddata, sendcnt, sdispls, MPIType<IT>(), q_cols, recvcnt, rdispls, MPIType<IT>(), commGrid->GetRowWorld());
823 DeleteAll(senddata, sendcnt, recvcnt, sdispls, rdispls);
825 tuple<IT,IT,bool> * q_tuples =
new tuple<IT,IT,bool>[q_nnz];
826 for(IT i=0; i< q_nnz; ++i)
828 q_tuples[i] = make_tuple(q_rows[i], q_cols[i], 1);
831 DER_IT * QSeq =
new DER_IT();
832 QSeq->Create( q_nnz, crowlen, mylocalcols, q_tuples);
840 *
this = Mult_AnXBn_DoubleBuff<PTNTBOOL, NT, DER>(PA, Q,
true,
true);
845 return Mult_AnXBn_DoubleBuff<PTNTBOOL, NT, DER>(PA, Q);
850 template <
class IT,
class NT,
class DER>
855 if((*(ri.commGrid) != *(B.commGrid)) || (*(ci.commGrid) != *(B.commGrid)))
860 IT total_m_A = getnrow();
861 IT total_n_A = getncol();
865 if(total_m_B != ri.TotalLength())
867 SpParHelper::Print(
"First dimension of B does NOT match the length of ri, SpAsgn fails !");
870 if(total_n_B != ci.TotalLength())
872 SpParHelper::Print(
"Second dimension of B does NOT match the length of ci, SpAsgn fails !");
879 rvec->
iota(total_m_B, 0);
886 qvec->
iota(total_n_B, 0);
893 template <
class IT,
class NT,
class DER>
898 if((*(ri.commGrid) != *(commGrid)) || (*(ci.commGrid) != *(commGrid)))
908 locmax_ri = *max_element(ri.arr.begin(), ri.arr.end());
910 locmax_ci = *max_element(ci.arr.begin(), ci.arr.end());
912 IT total_m = getnrow();
913 IT total_n = getncol();
914 if(locmax_ri > total_m || locmax_ci > total_n)
929 template <
class IT,
class NT,
class DER>
932 if(*commGrid == *rhs.commGrid)
934 spSeq->EWiseMult(*(rhs.spSeq), exclude);
938 cout <<
"Grids are not comparable, EWiseMult() fails !" << endl;
944 template <
class IT,
class NT,
class DER>
947 if(*commGrid == *rhs.commGrid)
949 spSeq->EWiseScale(rhs.array, rhs.m, rhs.n);
953 cout <<
"Grids are not comparable, EWiseScale() fails !" << endl;
958 template <
class IT,
class NT,
class DER>
959 template <
typename _BinaryOperation>
962 if(*commGrid == *rhs.commGrid)
964 if(getlocalrows() == rhs.m && getlocalcols() == rhs.n)
966 spSeq->UpdateDense(rhs.array, __binary_op);
970 cout <<
"Matrices have different dimensions, UpdateDense() fails !" << endl;
976 cout <<
"Grids are not comparable, UpdateDense() fails !" << endl;
981 template <
class IT,
class NT,
class DER>
988 if (commGrid->myrank == 0)
989 cout <<
"As a whole: " << mm <<
" rows and "<< nn <<
" columns and "<< nznz <<
" nonzeros" << endl;
992 IT allprocs = commGrid->grrows * commGrid->grcols;
993 for(IT i=0; i< allprocs; ++i)
995 if (commGrid->myrank == i)
997 cout <<
"Processor (" << commGrid->GetRankInProcRow() <<
"," << commGrid->GetRankInProcCol() <<
")'s data: " << endl;
1000 MPI_Barrier(commGrid->GetWorld());
1005 template <
class IT,
class NT,
class DER>
1008 int local =
static_cast<int>((*spSeq) == (*(rhs.spSeq)));
1010 MPI_Allreduce( &local, &whole, 1, MPI_INT, MPI_BAND, commGrid->GetWorld());
1011 return static_cast<bool>(whole);
1019 template <
class IT,
class NT,
class DER>
1022 int nprocs = commGrid->GetSize();
1023 int * sendcnt =
new int[nprocs];
1024 int * recvcnt =
new int[nprocs];
1025 for(
int i=0; i<nprocs; ++i)
1026 sendcnt[i] = data[i].
size();
1028 MPI_Alltoall(sendcnt, 1, MPI_INT, recvcnt, 1, MPI_INT, commGrid->GetWorld());
1029 int * sdispls =
new int[nprocs]();
1030 int * rdispls =
new int[nprocs]();
1031 partial_sum(sendcnt, sendcnt+nprocs-1, sdispls+1);
1032 partial_sum(recvcnt, recvcnt+nprocs-1, rdispls+1);
1034 tuple<IT,IT,NT> * senddata =
new tuple<IT,IT,NT>[locsize];
1035 for(
int i=0; i<nprocs; ++i)
1037 copy(data[i].begin(), data[i].end(), senddata+sdispls[i]);
1038 vector< tuple<IT,IT,NT> >().swap(data[i]);
1040 MPI_Datatype MPI_triple;
1041 MPI_Type_contiguous(
sizeof(tuple<IT,IT,NT>), MPI_CHAR, &MPI_triple);
1042 MPI_Type_commit(&MPI_triple);
1044 IT totrecv = accumulate(recvcnt,recvcnt+nprocs, static_cast<IT>(0));
1045 tuple<IT,IT,NT> * recvdata =
new tuple<IT,IT,NT>[totrecv];
1046 MPI_Alltoallv(senddata, sendcnt, sdispls, MPI_triple, recvdata, recvcnt, rdispls, MPI_triple, commGrid->GetWorld());
1048 DeleteAll(senddata, sendcnt, recvcnt, sdispls, rdispls);
1049 MPI_Type_free(&MPI_triple);
1051 int r = commGrid->GetGridRows();
1052 int s = commGrid->GetGridCols();
1053 IT m_perproc = total_m / r;
1054 IT n_perproc = total_n / s;
1055 int myprocrow = commGrid->GetRankInProcCol();
1056 int myproccol = commGrid->GetRankInProcRow();
1057 IT locrows, loccols;
1058 if(myprocrow != r-1) locrows = m_perproc;
1059 else locrows = total_m - myprocrow * m_perproc;
1060 if(myproccol != s-1) loccols = n_perproc;
1061 else loccols = total_n - myproccol * n_perproc;
1064 spSeq =
new DER(A,
false);
1069 template <
class IT,
class NT,
class DER>
1073 if((*(distrows.commGrid) != *(distcols.commGrid)) || (*(distcols.commGrid) != *(distvals.commGrid)))
1078 if((distrows.TotalLength() != distcols.TotalLength()) || (distcols.TotalLength() != distvals.TotalLength()))
1084 commGrid = distrows.commGrid;
1085 int nprocs = commGrid->GetSize();
1086 vector< vector < tuple<IT,IT,NT> > > data(nprocs);
1089 for(IT i=0; i<locsize; ++i)
1092 int owner = Owner(total_m, total_n, distrows.arr[i], distcols.arr[i], lrow, lcol);
1093 data[owner].push_back(make_tuple(lrow,lcol,distvals.arr[i]));
1095 SparseCommon(data, locsize, total_m, total_n);
1100 template <
class IT,
class NT,
class DER>
1104 if((*(distrows.commGrid) != *(distcols.commGrid)) )
1109 if((distrows.TotalLength() != distcols.TotalLength()) )
1114 commGrid = distrows.commGrid;
1115 int nprocs = commGrid->GetSize();
1116 vector< vector < tuple<IT,IT,NT> > > data(nprocs);
1119 for(IT i=0; i<locsize; ++i)
1122 int owner = Owner(total_m, total_n, distrows.arr[i], distcols.arr[i], lrow, lcol);
1123 data[owner].push_back(make_tuple(lrow,lcol,val));
1125 SparseCommon(data, locsize, total_m, total_n);
1128 template <
class IT,
class NT,
class DER>
1129 template <
class DELIT>
1132 commGrid = DEL.commGrid;
1133 typedef typename DER::LocalIT LIT;
1135 int nprocs = commGrid->GetSize();
1136 int r = commGrid->GetGridRows();
1137 int s = commGrid->GetGridCols();
1138 vector< vector<LIT> > data(nprocs);
1143 if(
sizeof(LIT) <
sizeof(DELIT))
1146 outs <<
"Warning: Using smaller indices for the matrix than DistEdgeList\n";
1147 outs <<
"Local matrices are " << m_perproc <<
"-by-" << n_perproc << endl;
1155 int64_t perstage = DEL.nedges / stages;
1157 vector<LIT> alledges;
1161 for(LIT s=0; s< stages; ++s)
1164 int64_t n_after= ((s==(stages-1))? DEL.nedges : ((s+1)*perstage));
1171 for (
int64_t i = n_befor; i < n_after; i++)
1173 int64_t fr = get_v0_from_edge(&(DEL.pedges[i]));
1174 int64_t to = get_v1_from_edge(&(DEL.pedges[i]));
1176 if(fr >= 0 && to >= 0)
1178 int rowowner = min(static_cast<int>(fr / m_perproc), maxr);
1179 int colowner = min(static_cast<int>(to / n_perproc), maxs);
1180 int owner = commGrid->GetRank(rowowner, colowner);
1181 LIT rowid = fr - (rowowner * m_perproc);
1182 LIT colid = to - (colowner * n_perproc);
1183 data[owner].push_back(rowid);
1184 data[owner].push_back(colid);
1191 for (
int64_t i = n_befor; i < n_after; i++)
1193 if(DEL.edges[2*i+0] >= 0 && DEL.edges[2*i+1] >= 0)
1195 int rowowner = min(static_cast<int>(DEL.edges[2*i+0] / m_perproc), maxr);
1196 int colowner = min(static_cast<int>(DEL.edges[2*i+1] / n_perproc), maxs);
1197 int owner = commGrid->GetRank(rowowner, colowner);
1198 LIT rowid = DEL.edges[2*i+0]- (rowowner * m_perproc);
1199 LIT colid = DEL.edges[2*i+1]- (colowner * n_perproc);
1200 data[owner].push_back(rowid);
1201 data[owner].push_back(colid);
1207 LIT * sendbuf =
new LIT[2*realedges];
1208 int * sendcnt =
new int[nprocs];
1209 int * sdispls =
new int[nprocs];
1210 for(
int i=0; i<nprocs; ++i)
1211 sendcnt[i] = data[i].
size();
1213 int * rdispls =
new int[nprocs];
1214 int * recvcnt =
new int[nprocs];
1215 MPI_Alltoall(sendcnt, 1, MPI_INT, recvcnt, 1, MPI_INT,commGrid->GetWorld());
1219 for(
int i=0; i<nprocs-1; ++i)
1221 sdispls[i+1] = sdispls[i] + sendcnt[i];
1222 rdispls[i+1] = rdispls[i] + recvcnt[i];
1224 for(
int i=0; i<nprocs; ++i)
1225 copy(data[i].begin(), data[i].end(), sendbuf+sdispls[i]);
1228 for(
int i=0; i<nprocs; ++i)
1229 vector<LIT>().swap(data[i]);
1233 IT thisrecv = accumulate(recvcnt,recvcnt+nprocs, static_cast<IT>(0));
1234 LIT * recvbuf =
new LIT[thisrecv];
1235 totrecv += thisrecv;
1237 MPI_Alltoallv(sendbuf, sendcnt, sdispls, MPIType<LIT>(), recvbuf, recvcnt, rdispls, MPIType<LIT>(), commGrid->GetWorld());
1238 DeleteAll(sendcnt, recvcnt, sdispls, rdispls,sendbuf);
1239 copy (recvbuf,recvbuf+thisrecv,back_inserter(alledges));
1243 int myprocrow = commGrid->GetRankInProcCol();
1244 int myproccol = commGrid->GetRankInProcRow();
1245 LIT locrows, loccols;
1246 if(myprocrow != r-1) locrows = m_perproc;
1247 else locrows = DEL.
getGlobalV() - myprocrow * m_perproc;
1248 if(myproccol != s-1) loccols = n_perproc;
1249 else loccols = DEL.
getGlobalV() - myproccol * n_perproc;
1252 spSeq =
new DER(A,
false);
1255 template <
class IT,
class NT,
class DER>
1258 MPI_Comm DiagWorld = commGrid->GetDiagWorld();
1261 if(DiagWorld != MPI_COMM_NULL)
1266 spSeq =
new DER(tuples,
false);
1268 MPI_Allreduce( &removed, & totrem, 1, MPIType<IT>(), MPI_SUM, commGrid->GetWorld());
1272 template <
class IT,
class NT,
class DER>
1273 template <
typename LIT,
typename OT>
1276 if(spSeq->getnsplit() > 0)
1278 SpParHelper::Print(
"Can not declare preallocated buffers for multithreaded execution");
1283 typename DER::LocalIT mA = spSeq->getnrow();
1284 typename DER::LocalIT p_c = commGrid->GetGridCols();
1285 vector<bool> isthere(mA,
false);
1286 typename DER::LocalIT perproc = mA / p_c;
1287 vector<int> maxlens(p_c,0);
1289 for(
typename DER::SpColIter colit = spSeq->begcol(); colit != spSeq->endcol(); ++colit)
1291 for(
typename DER::SpColIter::NzIter nzit = spSeq->begnz(colit); nzit != spSeq->endnz(colit); ++nzit)
1293 typename DER::LocalIT rowid = nzit.rowid();
1296 typename DER::LocalIT owner = min(nzit.rowid() / perproc, p_c-1);
1298 isthere[rowid] =
true;
1303 optbuf.
Set(maxlens,mA);
1306 template <
class IT,
class NT,
class DER>
1309 spSeq->RowSplit(numsplits);
1317 template <
class IT,
class NT,
class DER>
1318 template <
typename SR>
1322 shared_ptr<CommGrid> Grid =
ProductGrid(commGrid.get(), commGrid.get(), stages, dummy, dummy);
1324 IT AA_m = spSeq->getnrow();
1325 IT AA_n = spSeq->getncol();
1327 DER seqTrn = spSeq->TransposeConst();
1329 MPI_Barrier(commGrid->GetWorld());
1331 IT ** NRecvSizes = SpHelper::allocate2D<IT>(DER::esscount, stages);
1332 IT ** TRecvSizes = SpHelper::allocate2D<IT>(DER::esscount, stages);
1340 vector< SpTuples<IT,NT> *> tomerge;
1342 int Nself = commGrid->GetRankInProcRow();
1343 int Tself = commGrid->GetRankInProcCol();
1345 for(
int i = 0; i < stages; ++i)
1354 ess.resize(DER::esscount);
1355 for(
int j=0; j< DER::esscount; ++j)
1357 ess[j] = NRecvSizes[j][i];
1371 ess.resize(DER::esscount);
1372 for(
int j=0; j< DER::esscount; ++j)
1374 ess[j] = TRecvSizes[j][i];
1380 SpTuples<IT,NT> * AA_cont = MultiplyReturnTuples<SR, NT>(*NRecv, *TRecv,
false,
true);
1381 if(!AA_cont->isZero())
1382 tomerge.push_back(AA_cont);
1398 spSeq =
new DER(MergeAll<SR>(tomerge, AA_m, AA_n),
false);
1399 for(
unsigned int i=0; i<tomerge.size(); ++i)
1406 template <
class IT,
class NT,
class DER>
1409 if(commGrid->myproccol == commGrid->myprocrow)
1415 typedef typename DER::LocalIT LIT;
1417 LIT locnnz = Atuples.
getnnz();
1418 LIT * rows =
new LIT[locnnz];
1419 LIT * cols =
new LIT[locnnz];
1420 NT * vals =
new NT[locnnz];
1421 for(LIT i=0; i < locnnz; ++i)
1427 LIT locm = getlocalcols();
1428 LIT locn = getlocalrows();
1431 LIT remotem, remoten, remotennz;
1433 int diagneigh = commGrid->GetComplementRank();
1436 MPI_Sendrecv(&locnnz, 1, MPIType<LIT>(), diagneigh,
TRTAGNZ, &remotennz, 1, MPIType<LIT>(), diagneigh,
TRTAGNZ, commGrid->GetWorld(), &status);
1437 MPI_Sendrecv(&locn, 1, MPIType<LIT>(), diagneigh,
TRTAGM, &remotem, 1, MPIType<LIT>(), diagneigh,
TRTAGM, commGrid->GetWorld(), &status);
1438 MPI_Sendrecv(&locm, 1, MPIType<LIT>(), diagneigh,
TRTAGN, &remoten, 1, MPIType<LIT>(), diagneigh,
TRTAGN, commGrid->GetWorld(), &status);
1440 LIT * rowsrecv =
new LIT[remotennz];
1441 MPI_Sendrecv(rows, locnnz, MPIType<LIT>(), diagneigh,
TRTAGROWS, rowsrecv, remotennz, MPIType<LIT>(), diagneigh,
TRTAGROWS, commGrid->GetWorld(), &status);
1444 LIT * colsrecv =
new LIT[remotennz];
1445 MPI_Sendrecv(cols, locnnz, MPIType<LIT>(), diagneigh,
TRTAGCOLS, colsrecv, remotennz, MPIType<LIT>(), diagneigh,
TRTAGCOLS, commGrid->GetWorld(), &status);
1448 NT * valsrecv =
new NT[remotennz];
1449 MPI_Sendrecv(vals, locnnz, MPIType<NT>(), diagneigh,
TRTAGVALS, valsrecv, remotennz, MPIType<NT>(), diagneigh,
TRTAGVALS, commGrid->GetWorld(), &status);
1452 tuple<LIT,LIT,NT> * arrtuples =
new tuple<LIT,LIT,NT>[remotennz];
1453 for(LIT i=0; i< remotennz; ++i)
1455 arrtuples[i] = make_tuple(rowsrecv[i], colsrecv[i], valsrecv[i]);
1457 DeleteAll(rowsrecv, colsrecv, valsrecv);
1459 sort(arrtuples , arrtuples+remotennz, collexicogcmp );
1462 spSeq->Create( remotennz, remotem, remoten, arrtuples);
1467 template <
class IT,
class NT,
class DER>
1468 template <
class HANDLER>
1471 int proccols = commGrid->GetGridCols();
1472 int procrows = commGrid->GetGridRows();
1473 IT totalm = getnrow();
1474 IT totaln = getncol();
1475 IT totnnz = getnnz();
1478 if(commGrid->GetRank() == 0)
1481 std::stringstream strm;
1482 strm << totalm <<
" " << totaln <<
" " << totnnz << endl;
1484 out.open(filename.c_str(),ios_base::trunc);
1485 flinelen = s.length();
1486 out.write(s.c_str(), flinelen);
1489 int colrank = commGrid->GetRankInProcCol();
1490 int colneighs = commGrid->GetGridRows();
1491 IT * locnrows =
new IT[colneighs];
1492 locnrows[colrank] = (IT) getlocalrows();
1493 MPI_Allgather(MPI_IN_PLACE, 0, MPIType<IT>(),locnrows, 1, MPIType<IT>(), commGrid->GetColWorld());
1494 IT roffset = accumulate(locnrows, locnrows+colrank, 0);
1497 MPI_Datatype datatype;
1498 MPI_Type_contiguous(
sizeof(pair<IT,NT>), MPI_CHAR, &datatype);
1499 MPI_Type_commit(&datatype);
1501 for(
int i = 0; i < procrows; i++)
1503 if(commGrid->GetRankInProcCol() == i)
1505 IT localrows = spSeq->getnrow();
1506 vector< vector< pair<IT,NT> > > csr(localrows);
1507 if(commGrid->GetRankInProcRow() == 0)
1509 IT localcols = spSeq->getncol();
1510 MPI_Bcast(&localcols, 1, MPIType<IT>(), 0, commGrid->GetRowWorld());
1511 for(
typename DER::SpColIter colit = spSeq->begcol(); colit != spSeq->endcol(); ++colit)
1513 for(
typename DER::SpColIter::NzIter nzit = spSeq->begnz(colit); nzit != spSeq->endnz(colit); ++nzit)
1515 csr[nzit.rowid()].push_back( make_pair(colit.colid(), nzit.value()) );
1522 MPI_Bcast(&n_perproc, 1, MPIType<IT>(), 0, commGrid->GetRowWorld());
1523 IT noffset = commGrid->GetRankInProcRow() * n_perproc;
1524 for(
typename DER::SpColIter colit = spSeq->begcol(); colit != spSeq->endcol(); ++colit)
1526 for(
typename DER::SpColIter::NzIter nzit = spSeq->begnz(colit); nzit != spSeq->endnz(colit); ++nzit)
1528 csr[nzit.rowid()].push_back( make_pair(colit.colid() + noffset, nzit.value()) );
1532 pair<IT,NT> * ents = NULL;
1533 int * gsizes = NULL, * dpls = NULL;
1534 if(commGrid->GetRankInProcRow() == 0)
1536 out.open(filename.c_str(),std::ios_base::app);
1537 gsizes =
new int[proccols];
1538 dpls =
new int[proccols]();
1540 for(
int j = 0; j < localrows; ++j)
1543 sort(csr[j].begin(), csr[j].end());
1544 int mysize = csr[j].size();
1545 MPI_Gather(&mysize, 1, MPI_INT, gsizes, 1, MPI_INT, 0, commGrid->GetRowWorld());
1546 if(commGrid->GetRankInProcRow() == 0)
1548 rowcnt = std::accumulate(gsizes, gsizes+proccols, static_cast<IT>(0));
1549 std::partial_sum(gsizes, gsizes+proccols-1, dpls+1);
1550 ents =
new pair<IT,NT>[rowcnt];
1555 MPI_Gatherv(
SpHelper::p2a(csr[j]), mysize, datatype, ents, gsizes, dpls, datatype, 0, commGrid->GetRowWorld());
1556 if(commGrid->GetRankInProcRow() == 0)
1558 for(
int k=0; k< rowcnt; ++k)
1563 out << j + roffset + 1 <<
"\t" << ents[k].first + 1 <<
"\t";
1566 out << ents[k].first + 1 <<
"\t" << j + roffset + 1 <<
"\t";
1567 handler.save(out, ents[k].second, j + roffset, ents[k].first);
1573 if(commGrid->GetRankInProcRow() == 0)
1579 MPI_Barrier(commGrid->GetWorld());
1589 template <
class IT,
class NT,
class DER>
1592 int proccols = commGrid->GetGridCols();
1593 int procrows = commGrid->GetGridRows();
1597 IT totalm = getnrow();
1598 IT totaln = getncol();
1599 IT totnnz = getnnz();
1601 if(commGrid->GetRank() == 0)
1604 std::stringstream strm;
1605 strm << 0 <<
" " << totalm <<
" " << totaln <<
" " << totnnz << endl;
1607 std::ofstream out(filename.c_str(),std::ios_base::app);
1608 flinelen = s.length();
1609 out.write(s.c_str(), flinelen);
1614 for(
int i = 0; i < proccols; i++)
1616 if(commGrid->GetRankInProcRow() == i)
1618 if(commGrid->GetRankInProcCol() == 0)
1620 std::ofstream out(filename.c_str(),std::ios_base::app);
1621 std::ostream_iterator<IT> dest(out,
" ");
1623 gsizes =
new IT[procrows];
1624 IT localrows = spSeq->getnrow();
1625 MPI_Bcast(&localrows, 1, MPIType<IT>(), 0, commGrid->GetColWorld());
1627 for(
typename DER::SpColIter colit = spSeq->begcol(); colit != spSeq->endcol(); ++colit)
1631 while(netid <= colit.colid())
1633 if(netid < colit.colid())
1639 mysize = colit.nnz();
1640 vertices =
new IT[mysize];
1642 for(
typename DER::SpColIter::NzIter nzit = spSeq->begnz(colit); nzit != spSeq->endnz(colit); ++nzit)
1644 vertices[j] = nzit.rowid();
1649 MPI_Gather(&mysize, 1, MPIType<IT>(), gsizes, 1, MPIType<IT>(), 0, commGrid->GetColWorld());
1650 IT colcnt = std::accumulate(gsizes, gsizes+procrows, 0);
1651 IT * ents =
new IT[colcnt];
1652 IT * dpls =
new IT[procrows]();
1653 std::partial_sum(gsizes, gsizes+procrows-1, dpls+1);
1657 MPI_Gatherv(vertices, mysize, MPIType<IT>(), ents, gsizes, dpls, MPIType<IT>(), 0, commGrid->GetColWorld());
1660 std::copy(ents, ents+colcnt, dest);
1664 delete [] ents;
delete [] dpls;
1666 if(netid == colit.colid())
delete [] vertices;
1670 while(netid < spSeq->getncol())
1673 MPI_Gather(&mysize, 1, MPIType<IT>(), gsizes, 1, MPIType<IT>(), 0, commGrid->GetColWorld());
1674 IT colcnt = std::accumulate(gsizes, gsizes+procrows, 0);
1675 IT * ents =
new IT[colcnt];
1676 IT * dpls =
new IT[procrows]();
1677 std::partial_sum(gsizes, gsizes+procrows-1, dpls+1);
1679 MPI_Gatherv(NULL, mysize, MPIType<IT>(), ents, gsizes, dpls, MPIType<IT>(), 0, commGrid->GetColWorld());
1682 std::copy(ents, ents+colcnt, dest);
1686 delete [] ents;
delete [] dpls;
1695 MPI_Bcast(&m_perproc, 1, MPIType<IT>(), 0, commGrid->GetColWorld());
1696 IT moffset = commGrid->GetRankInProcCol() * m_perproc;
1698 for(
typename DER::SpColIter colit = spSeq->begcol(); colit != spSeq->endcol(); ++colit)
1702 while(netid <= colit.colid())
1704 if(netid < colit.colid())
1710 mysize = colit.nnz();
1711 vertices =
new IT[mysize];
1713 for(
typename DER::SpColIter::NzIter nzit = spSeq->begnz(colit); nzit != spSeq->endnz(colit); ++nzit)
1715 vertices[j] = nzit.rowid() + moffset;
1719 MPI_Gather(&mysize, 1, MPIType<IT>(), gsizes, 1, MPIType<IT>(), 0, commGrid->GetColWorld());
1722 MPI_Gatherv(vertices, mysize, MPIType<IT>(), NULL, NULL, NULL, MPIType<IT>(), 0, commGrid->GetColWorld());
1724 if(netid == colit.colid())
delete [] vertices;
1728 while(netid < spSeq->getncol())
1731 MPI_Gather(&mysize, 1, MPIType<IT>(), gsizes, 1, MPIType<IT>(), 0, commGrid->GetColWorld());
1732 MPI_Gatherv(NULL, mysize, MPIType<IT>(), NULL, NULL, NULL, MPIType<IT>(), 0, commGrid->GetColWorld());
1738 MPI_Barrier(commGrid->GetWorld());
1739 if((i == proccols-1) && (commGrid->GetRankInProcCol() == 0))
1742 MPI_Reduce(&nzc, &totalnzc, 1, MPIType<IT>(), MPI_SUM, 0, commGrid->GetRowWorld());
1744 if(commGrid->GetRank() == 0)
1748 std::ofstream out(filename.c_str(),std::ios_base::in | std::ios_base::out | std::ios_base::binary);
1749 out.seekp(0, std::ios_base::beg);
1752 std::stringstream strm;
1753 strm << 0 <<
" " << totalm <<
" " << totalnzc <<
" " << totnnz;
1755 s.resize(flinelen-1,
' ');
1757 out.write(s.c_str(), flinelen-1);
1767 template <
class IT,
class NT,
class DER>
1768 template <
class HANDLER>
1772 TAU_PROFILE_TIMER(rdtimer,
"ReadDistribute",
"void SpParMat::ReadDistribute (const string & , int, bool, HANDLER, bool)", TAU_DEFAULT);
1773 TAU_PROFILE_START(rdtimer);
1780 if(commGrid->GetRank() == master)
1782 hfile =
ParseHeader(filename, binfile, seeklength);
1784 MPI_Bcast(&seeklength, 1, MPI_INT, master, commGrid->commWorld);
1786 IT total_m, total_n, total_nnz;
1787 IT m_perproc = 0, n_perproc = 0;
1789 int colneighs = commGrid->GetGridRows();
1790 int rowneighs = commGrid->GetGridCols();
1792 IT buffpercolneigh = MEMORYINBYTES / (colneighs * (2 *
sizeof(IT) +
sizeof(NT)));
1793 IT buffperrowneigh = MEMORYINBYTES / (rowneighs * (2 *
sizeof(IT) +
sizeof(NT)));
1800 buffpercolneigh /= colneighs;
1802 SpParHelper::Print(
"COMBBLAS: Parallel I/O requested but binary header is corrupted\n");
1808 buffperrowneigh = std::max(buffperrowneigh, buffpercolneigh);
1809 if(std::max(buffpercolneigh * colneighs, buffperrowneigh * rowneighs) > numeric_limits<int>::max())
1811 SpParHelper::Print(
"COMBBLAS: MPI doesn't support sending int64_t send/recv counts or displacements\n");
1814 int * cdispls =
new int[colneighs];
1815 for (IT i=0; i<colneighs; ++i) cdispls[i] = i*buffpercolneigh;
1816 int * rdispls =
new int[rowneighs];
1817 for (IT i=0; i<rowneighs; ++i) rdispls[i] = i*buffperrowneigh;
1819 int *ccurptrs = NULL, *rcurptrs = NULL;
1826 int rankincol = commGrid->GetRankInProcCol(master);
1827 int rankinrow = commGrid->GetRankInProcRow(master);
1828 vector< tuple<IT, IT, NT> > localtuples;
1830 if(commGrid->GetRank() == master)
1835 total_n = 0; total_m = 0;
1836 BcastEssentials(commGrid->commWorld, total_m, total_n, total_nnz, master);
1842 total_n = 0; total_m = 0;
1843 BcastEssentials(commGrid->commWorld, total_m, total_n, total_nnz, master);
1848 infile.open(filename.c_str());
1850 infile.getline(comment,256);
1851 while(comment[0] ==
'%')
1853 infile.getline(comment,256);
1856 ss << string(comment);
1857 ss >> total_m >> total_n >> total_nnz;
1860 SpParHelper::Print(
"COMBBLAS: Trying to read binary headerless file in parallel, aborting\n");
1861 total_n = 0; total_m = 0;
1862 BcastEssentials(commGrid->commWorld, total_m, total_n, total_nnz, master);
1870 total_nnz = hfile.
nnz;
1872 m_perproc = total_m / colneighs;
1873 n_perproc = total_n / rowneighs;
1874 BcastEssentials(commGrid->commWorld, total_m, total_n, total_nnz, master);
1875 AllocateSetBuffers(rows, cols, vals, rcurptrs, ccurptrs, rowneighs, colneighs, buffpercolneigh);
1877 if(seeklength > 0 && pario)
1879 IT entriestoread = total_nnz / colneighs;
1882 commGrid->OpenDebugFile(
"Read", oput);
1883 oput <<
"Total nnz: " << total_nnz <<
" entries to read: " << entriestoread << endl;
1886 ReadAllMine(binfile, rows, cols, vals, localtuples, rcurptrs, ccurptrs, rdispls, cdispls, m_perproc, n_perproc,
1887 rowneighs, colneighs, buffperrowneigh, buffpercolneigh, entriestoread, handler, rankinrow, transpose);
1891 IT temprow, tempcol;
1893 IT ntrow = 0, ntcol = 0;
1895 bool nonumline = nonum;
1897 for(; cnz < total_nnz; ++cnz)
1901 stringstream linestream;
1905 infile.getline(line, 1024);
1907 linestream >> temprow >> tempcol;
1911 linestream >> skipws;
1912 nonumline = linestream.eof();
1921 handler.binaryfill(binfile, temprow , tempcol, tempval);
1929 colrec = std::min(static_cast<int>(temprow / m_perproc), colneighs-1);
1930 commonindex = colrec * buffpercolneigh + ccurptrs[colrec];
1932 rows[ commonindex ] = temprow;
1933 cols[ commonindex ] = tempcol;
1936 vals[ commonindex ] = nonumline ? handler.getNoNum(ntrow, ntcol) : handler.read(linestream, ntrow, ntcol);
1940 vals[ commonindex ] = tempval;
1942 ++ (ccurptrs[colrec]);
1943 if(ccurptrs[colrec] == buffpercolneigh || (cnz == (total_nnz-1)) )
1945 MPI_Scatter(ccurptrs, 1, MPI_INT, &recvcount, 1, MPI_INT, rankincol, commGrid->colWorld);
1948 IT * temprows =
new IT[recvcount];
1949 IT * tempcols =
new IT[recvcount];
1950 NT * tempvals =
new NT[recvcount];
1953 MPI_Scatterv(rows, ccurptrs, cdispls, MPIType<IT>(), temprows, recvcount, MPIType<IT>(), rankincol, commGrid->colWorld);
1954 MPI_Scatterv(cols, ccurptrs, cdispls, MPIType<IT>(), tempcols, recvcount, MPIType<IT>(), rankincol, commGrid->colWorld);
1955 MPI_Scatterv(vals, ccurptrs, cdispls, MPIType<NT>(), tempvals, recvcount, MPIType<NT>(), rankincol, commGrid->colWorld);
1957 fill_n(ccurptrs, colneighs, 0);
1960 HorizontalSend(rows, cols, vals,temprows, tempcols, tempvals, localtuples, rcurptrs, rdispls,
1961 buffperrowneigh, rowneighs, recvcount, m_perproc, n_perproc, rankinrow);
1963 if( cnz != (total_nnz-1) )
1966 rows =
new IT [ buffpercolneigh * colneighs ];
1967 cols =
new IT [ buffpercolneigh * colneighs ];
1968 vals =
new NT [ buffpercolneigh * colneighs ];
1972 assert (cnz == total_nnz);
1975 fill_n(ccurptrs, colneighs, numeric_limits<int>::max());
1976 MPI_Scatter(ccurptrs, 1, MPI_INT, &recvcount, 1, MPI_INT, rankincol, commGrid->colWorld);
1979 fill_n(rcurptrs, rowneighs, numeric_limits<int>::max());
1980 MPI_Scatter(rcurptrs, 1, MPI_INT, &recvcount, 1, MPI_INT, rankinrow, commGrid->rowWorld);
1983 else if( commGrid->OnSameProcCol(master) )
1985 BcastEssentials(commGrid->commWorld, total_m, total_n, total_nnz, master);
1986 m_perproc = total_m / colneighs;
1987 n_perproc = total_n / rowneighs;
1989 if(seeklength > 0 && pario)
1991 binfile = fopen(filename.c_str(),
"rb");
1992 IT entrysize = handler.entrylength();
1993 int myrankincol = commGrid->GetRankInProcCol();
1994 IT perreader = total_nnz / colneighs;
1995 IT read_offset = entrysize *
static_cast<IT
>(myrankincol) * perreader + seeklength;
1996 IT entriestoread = perreader;
1997 if (myrankincol == colneighs-1)
1998 entriestoread = total_nnz -
static_cast<IT
>(myrankincol) * perreader;
1999 fseek(binfile, read_offset, SEEK_SET);
2003 commGrid->OpenDebugFile(
"Read", oput);
2004 oput <<
"Total nnz: " << total_nnz <<
" OFFSET : " << read_offset <<
" entries to read: " << entriestoread << endl;
2008 AllocateSetBuffers(rows, cols, vals, rcurptrs, ccurptrs, rowneighs, colneighs, buffpercolneigh);
2009 ReadAllMine(binfile, rows, cols, vals, localtuples, rcurptrs, ccurptrs, rdispls, cdispls, m_perproc, n_perproc,
2010 rowneighs, colneighs, buffperrowneigh, buffpercolneigh, entriestoread, handler, rankinrow, transpose);
2014 while(total_n > 0 || total_m > 0)
2024 MPI_Scatter(ccurptrs, 1, MPI_INT, &recvcount, 1, MPI_INT, rankincol, commGrid->colWorld);
2025 if( recvcount == numeric_limits<int>::max())
break;
2028 IT * temprows =
new IT[recvcount];
2029 IT * tempcols =
new IT[recvcount];
2030 NT * tempvals =
new NT[recvcount];
2033 MPI_Scatterv(rows, ccurptrs, cdispls, MPIType<IT>(), temprows, recvcount, MPIType<IT>(), rankincol, commGrid->colWorld);
2034 MPI_Scatterv(cols, ccurptrs, cdispls, MPIType<IT>(), tempcols, recvcount, MPIType<IT>(), rankincol, commGrid->colWorld);
2035 MPI_Scatterv(vals, ccurptrs, cdispls, MPIType<NT>(), tempvals, recvcount, MPIType<NT>(), rankincol, commGrid->colWorld);
2038 rcurptrs =
new int[rowneighs];
2039 fill_n(rcurptrs, rowneighs, 0);
2042 HorizontalSend(rows, cols, vals,temprows, tempcols, tempvals, localtuples, rcurptrs, rdispls,
2043 buffperrowneigh, rowneighs, recvcount, m_perproc, n_perproc, rankinrow);
2048 fill_n(rcurptrs, rowneighs, numeric_limits<int>::max());
2049 MPI_Scatter(rcurptrs, 1, MPI_INT, &recvcount, 1, MPI_INT, rankinrow, commGrid->rowWorld);
2054 BcastEssentials(commGrid->commWorld, total_m, total_n, total_nnz, master);
2055 m_perproc = total_m / colneighs;
2056 n_perproc = total_n / rowneighs;
2057 while(total_n > 0 || total_m > 0)
2060 MPI_Scatter(rcurptrs, 1, MPI_INT, &recvcount, 1, MPI_INT, rankinrow, commGrid->rowWorld);
2061 if( recvcount == numeric_limits<int>::max())
2065 IT * temprows =
new IT[recvcount];
2066 IT * tempcols =
new IT[recvcount];
2067 NT * tempvals =
new NT[recvcount];
2069 MPI_Scatterv(rows, rcurptrs, rdispls, MPIType<IT>(), temprows, recvcount, MPIType<IT>(), rankinrow, commGrid->rowWorld);
2070 MPI_Scatterv(cols, rcurptrs, rdispls, MPIType<IT>(), tempcols, recvcount, MPIType<IT>(), rankinrow, commGrid->rowWorld);
2071 MPI_Scatterv(vals, rcurptrs, rdispls, MPIType<NT>(), tempvals, recvcount, MPIType<NT>(), rankinrow, commGrid->rowWorld);
2074 IT moffset = commGrid->myprocrow * m_perproc;
2075 IT noffset = commGrid->myproccol * n_perproc;
2077 for(IT i=0; i< recvcount; ++i)
2079 localtuples.push_back( make_tuple(temprows[i]-moffset, tempcols[i]-noffset, tempvals[i]) );
2085 tuple<IT,IT,NT> * arrtuples =
new tuple<IT,IT,NT>[localtuples.size()];
2086 copy(localtuples.begin(), localtuples.end(), arrtuples);
2088 IT localm = (commGrid->myprocrow != (commGrid->grrows-1))? m_perproc: (total_m - (m_perproc * (commGrid->grrows-1)));
2089 IT localn = (commGrid->myproccol != (commGrid->grcols-1))? n_perproc: (total_n - (n_perproc * (commGrid->grcols-1)));
2090 spSeq->Create( localtuples.size(), localm, localn, arrtuples);
2093 TAU_PROFILE_STOP(rdtimer);
2098 template <
class IT,
class NT,
class DER>
2102 rows =
new IT [ buffpercolneigh * colneighs ];
2103 cols =
new IT [ buffpercolneigh * colneighs ];
2104 vals =
new NT [ buffpercolneigh * colneighs ];
2106 ccurptrs =
new int[colneighs];
2107 rcurptrs =
new int[rowneighs];
2108 fill_n(ccurptrs, colneighs, 0);
2109 fill_n(rcurptrs, rowneighs, 0);
2112 template <
class IT,
class NT,
class DER>
2115 MPI_Bcast(&total_m, 1, MPIType<IT>(), master, world);
2116 MPI_Bcast(&total_n, 1, MPIType<IT>(), master, world);
2117 MPI_Bcast(&total_nnz, 1, MPIType<IT>(), master, world);
2124 template <
class IT,
class NT,
class DER>
2125 void SpParMat<IT,NT,DER>::VerticalSend(IT * & rows, IT * & cols, NT * & vals, vector< tuple<IT,IT,NT> > & localtuples,
int * rcurptrs,
int * ccurptrs,
int * rdispls,
int * cdispls,
2126 IT m_perproc, IT n_perproc,
int rowneighs,
int colneighs, IT buffperrowneigh, IT buffpercolneigh,
int rankinrow)
2129 int * colrecvdispls =
new int[colneighs];
2130 int * colrecvcounts =
new int[colneighs];
2131 MPI_Alltoall(ccurptrs, 1, MPI_INT, colrecvcounts, 1, MPI_INT, commGrid->colWorld);
2132 int totrecv = accumulate(colrecvcounts,colrecvcounts+colneighs,0);
2133 colrecvdispls[0] = 0;
2134 for(
int i=0; i<colneighs-1; ++i)
2135 colrecvdispls[i+1] = colrecvdispls[i] + colrecvcounts[i];
2138 IT * temprows =
new IT[totrecv];
2139 IT * tempcols =
new IT[totrecv];
2140 NT * tempvals =
new NT[totrecv];
2143 MPI_Alltoallv(rows, ccurptrs, cdispls, MPIType<IT>(), temprows, colrecvcounts, colrecvdispls, MPIType<IT>(), commGrid->colWorld);
2144 MPI_Alltoallv(cols, ccurptrs, cdispls, MPIType<IT>(), tempcols, colrecvcounts, colrecvdispls, MPIType<IT>(), commGrid->colWorld);
2145 MPI_Alltoallv(vals, ccurptrs, cdispls, MPIType<NT>(), tempvals, colrecvcounts, colrecvdispls, MPIType<NT>(), commGrid->colWorld);
2148 fill_n(ccurptrs, colneighs, 0);
2149 DeleteAll(colrecvdispls, colrecvcounts);
2153 HorizontalSend(rows, cols, vals,temprows, tempcols, tempvals, localtuples, rcurptrs, rdispls, buffperrowneigh, rowneighs, totrecv, m_perproc, n_perproc, rankinrow);
2156 rows =
new IT [ buffpercolneigh * colneighs ];
2157 cols =
new IT [ buffpercolneigh * colneighs ];
2158 vals =
new NT [ buffpercolneigh * colneighs ];
2168 template <
class IT,
class NT,
class DER>
2169 template <
class HANDLER>
2170 void SpParMat<IT,NT,DER>::ReadAllMine(FILE * binfile, IT * & rows, IT * & cols, NT * & vals, vector< tuple<IT,IT,NT> > & localtuples,
int * rcurptrs,
int * ccurptrs,
int * rdispls,
int * cdispls,
2171 IT m_perproc, IT n_perproc,
int rowneighs,
int colneighs, IT buffperrowneigh, IT buffpercolneigh, IT entriestoread, HANDLER handler,
int rankinrow,
bool transpose)
2173 assert(entriestoread != 0);
2175 IT temprow, tempcol;
2177 int finishedglobal = 1;
2178 while(cnz < entriestoread && !feof(binfile))
2180 handler.binaryfill(binfile, temprow , tempcol, tempval);
2187 int colrec = std::min(static_cast<int>(temprow / m_perproc), colneighs-1);
2188 size_t commonindex = colrec * buffpercolneigh + ccurptrs[colrec];
2189 rows[ commonindex ] = temprow;
2190 cols[ commonindex ] = tempcol;
2191 vals[ commonindex ] = tempval;
2192 ++ (ccurptrs[colrec]);
2193 if(ccurptrs[colrec] == buffpercolneigh || (cnz == (entriestoread-1)) )
2197 commGrid->OpenDebugFile(
"Read", oput);
2198 oput <<
"To column neighbors: ";
2199 copy(ccurptrs, ccurptrs+colneighs, ostream_iterator<int>(oput,
" ")); oput << endl;
2203 VerticalSend(rows, cols, vals, localtuples, rcurptrs, ccurptrs, rdispls, cdispls, m_perproc, n_perproc,
2204 rowneighs, colneighs, buffperrowneigh, buffpercolneigh, rankinrow);
2206 if(cnz == (entriestoread-1))
2208 int finishedlocal = 1;
2209 MPI_Allreduce( &finishedlocal, &finishedglobal, 1, MPI_INT, MPI_BAND, commGrid->colWorld);
2210 while(!finishedglobal)
2214 commGrid->OpenDebugFile(
"Read", oput);
2215 oput <<
"To column neighbors: ";
2216 copy(ccurptrs, ccurptrs+colneighs, ostream_iterator<int>(oput,
" ")); oput << endl;
2222 VerticalSend(rows, cols, vals, localtuples, rcurptrs, ccurptrs, rdispls, cdispls, m_perproc, n_perproc,
2223 rowneighs, colneighs, buffperrowneigh, buffpercolneigh, rankinrow);
2225 MPI_Allreduce( &finishedlocal, &finishedglobal, 1, MPI_INT, MPI_BAND, commGrid->colWorld);
2230 int finishedlocal = 0;
2231 MPI_Allreduce( &finishedlocal, &finishedglobal, 1, MPI_INT, MPI_BAND, commGrid->colWorld);
2238 fill_n(rcurptrs, rowneighs, numeric_limits<int>::max());
2240 MPI_Scatter(rcurptrs, 1, MPI_INT, &recvcount, 1, MPI_INT, rankinrow, commGrid->rowWorld);
2250 template <
class IT,
class NT,
class DER>
2251 void SpParMat<IT,NT,DER>::HorizontalSend(IT * & rows, IT * & cols, NT * & vals, IT * & temprows, IT * & tempcols, NT * & tempvals, vector < tuple <IT,IT,NT> > & localtuples,
2252 int * rcurptrs,
int * rdispls, IT buffperrowneigh,
int rowneighs,
int recvcount, IT m_perproc, IT n_perproc,
int rankinrow)
2254 rows =
new IT [ buffperrowneigh * rowneighs ];
2255 cols =
new IT [ buffperrowneigh * rowneighs ];
2256 vals =
new NT [ buffperrowneigh * rowneighs ];
2259 for(
int i=0; i< recvcount; ++i)
2261 int rowrec = std::min(static_cast<int>(tempcols[i] / n_perproc), rowneighs-1);
2262 rows[ rowrec * buffperrowneigh + rcurptrs[rowrec] ] = temprows[i];
2263 cols[ rowrec * buffperrowneigh + rcurptrs[rowrec] ] = tempcols[i];
2264 vals[ rowrec * buffperrowneigh + rcurptrs[rowrec] ] = tempvals[i];
2265 ++ (rcurptrs[rowrec]);
2270 commGrid->OpenDebugFile(
"Read", oput);
2271 oput <<
"To row neighbors: ";
2272 copy(rcurptrs, rcurptrs+rowneighs, ostream_iterator<int>(oput,
" ")); oput << endl;
2273 oput <<
"Row displacements were: ";
2274 copy(rdispls, rdispls+rowneighs, ostream_iterator<int>(oput,
" ")); oput << endl;
2278 MPI_Scatter(rcurptrs, 1, MPI_INT, &recvcount, 1, MPI_INT, rankinrow, commGrid->rowWorld);
2282 DeleteAll(temprows, tempcols, tempvals);
2283 temprows =
new IT[recvcount];
2284 tempcols =
new IT[recvcount];
2285 tempvals =
new NT[recvcount];
2288 MPI_Scatterv(rows, rcurptrs, rdispls, MPIType<IT>(), temprows, recvcount, MPIType<IT>(), rankinrow, commGrid->rowWorld);
2289 MPI_Scatterv(cols, rcurptrs, rdispls, MPIType<IT>(), tempcols, recvcount, MPIType<IT>(), rankinrow, commGrid->rowWorld);
2290 MPI_Scatterv(vals, rcurptrs, rdispls, MPIType<NT>(), tempvals, recvcount, MPIType<NT>(), rankinrow, commGrid->rowWorld);
2293 IT moffset = commGrid->myprocrow * m_perproc;
2294 IT noffset = commGrid->myproccol * n_perproc;
2296 for(
int i=0; i< recvcount; ++i)
2298 localtuples.push_back( make_tuple(temprows[i]-moffset, tempcols[i]-noffset, tempvals[i]) );
2301 fill_n(rcurptrs, rowneighs, 0);
2302 DeleteAll(rows, cols, vals, temprows, tempcols, tempvals);
2308 template <
class IT,
class NT,
class DER>
2311 if((*(distrows.commGrid) != *(distcols.commGrid)) || (*(distcols.commGrid) != *(distvals.commGrid)))
2316 IT globallen = getnnz();
2323 IT prelen = Atuples.
getnnz();
2326 int rank = commGrid->GetRank();
2327 int nprocs = commGrid->GetSize();
2328 IT * prelens =
new IT[nprocs];
2329 prelens[rank] = prelen;
2330 MPI_Allgather(MPI_IN_PLACE, 0, MPIType<IT>(), prelens, 1, MPIType<IT>(), commGrid->GetWorld());
2331 IT prelenuntil = accumulate(prelens, prelens+rank, static_cast<IT>(0));
2333 int * sendcnt =
new int[nprocs]();
2334 IT * rows =
new IT[prelen];
2335 IT * cols =
new IT[prelen];
2336 NT * vals =
new NT[prelen];
2338 int rowrank = commGrid->GetRankInProcRow();
2339 int colrank = commGrid->GetRankInProcCol();
2340 int rowneighs = commGrid->GetGridCols();
2341 int colneighs = commGrid->GetGridRows();
2342 IT * locnrows =
new IT[colneighs];
2343 IT * locncols =
new IT[rowneighs];
2344 locnrows[colrank] = getlocalrows();
2345 locncols[rowrank] = getlocalcols();
2347 MPI_Allgather(MPI_IN_PLACE, 0, MPIType<IT>(),locnrows, 1, MPIType<IT>(), commGrid->GetColWorld());
2348 MPI_Allgather(MPI_IN_PLACE, 0, MPIType<IT>(),locncols, 1, MPIType<IT>(), commGrid->GetRowWorld());
2350 IT roffset = accumulate(locnrows, locnrows+colrank, static_cast<IT>(0));
2351 IT coffset = accumulate(locncols, locncols+rowrank, static_cast<IT>(0));
2354 for(
int i=0; i< prelen; ++i)
2357 int owner = nrows.Owner(prelenuntil+i, locid);
2360 rows[i] = Atuples.
rowindex(i) + roffset;
2361 cols[i] = Atuples.
colindex(i) + coffset;
2365 int * recvcnt =
new int[nprocs];
2366 MPI_Alltoall(sendcnt, 1, MPI_INT, recvcnt, 1, MPI_INT, commGrid->GetWorld());
2368 int * sdpls =
new int[nprocs]();
2369 int * rdpls =
new int[nprocs]();
2370 partial_sum(sendcnt, sendcnt+nprocs-1, sdpls+1);
2371 partial_sum(recvcnt, recvcnt+nprocs-1, rdpls+1);
2373 MPI_Alltoallv(rows, sendcnt, sdpls, MPIType<IT>(),
SpHelper::p2a(nrows.arr), recvcnt, rdpls, MPIType<IT>(), commGrid->GetWorld());
2374 MPI_Alltoallv(cols, sendcnt, sdpls, MPIType<IT>(),
SpHelper::p2a(ncols.arr), recvcnt, rdpls, MPIType<IT>(), commGrid->GetWorld());
2375 MPI_Alltoallv(vals, sendcnt, sdpls, MPIType<NT>(),
SpHelper::p2a(nvals.arr), recvcnt, rdpls, MPIType<NT>(), commGrid->GetWorld());
2377 DeleteAll(sendcnt, recvcnt, sdpls, rdpls);
2386 template <
class IT,
class NT,
class DER>
2389 if((*(distrows.commGrid) != *(distcols.commGrid)) )
2394 IT globallen = getnnz();
2400 IT prelen = Atuples.
getnnz();
2402 int rank = commGrid->GetRank();
2403 int nprocs = commGrid->GetSize();
2404 IT * prelens =
new IT[nprocs];
2405 prelens[rank] = prelen;
2406 MPI_Allgather(MPI_IN_PLACE, 0, MPIType<IT>(), prelens, 1, MPIType<IT>(), commGrid->GetWorld());
2407 IT prelenuntil = accumulate(prelens, prelens+rank, static_cast<IT>(0));
2409 int * sendcnt =
new int[nprocs]();
2410 IT * rows =
new IT[prelen];
2411 IT * cols =
new IT[prelen];
2412 NT * vals =
new NT[prelen];
2414 int rowrank = commGrid->GetRankInProcRow();
2415 int colrank = commGrid->GetRankInProcCol();
2416 int rowneighs = commGrid->GetGridCols();
2417 int colneighs = commGrid->GetGridRows();
2418 IT * locnrows =
new IT[colneighs];
2419 IT * locncols =
new IT[rowneighs];
2420 locnrows[colrank] = getlocalrows();
2421 locncols[rowrank] = getlocalcols();
2423 MPI_Allgather(MPI_IN_PLACE, 0, MPIType<IT>(),locnrows, 1, MPIType<IT>(), commGrid->GetColWorld());
2424 MPI_Allgather(MPI_IN_PLACE, 0, MPIType<IT>(),locncols, 1, MPIType<IT>(), commGrid->GetColWorld());
2425 IT roffset = accumulate(locnrows, locnrows+colrank, static_cast<IT>(0));
2426 IT coffset = accumulate(locncols, locncols+rowrank, static_cast<IT>(0));
2429 for(
int i=0; i< prelen; ++i)
2432 int owner = nrows.Owner(prelenuntil+i, locid);
2435 rows[i] = Atuples.
rowindex(i) + roffset;
2436 cols[i] = Atuples.
colindex(i) + coffset;
2439 int * recvcnt =
new int[nprocs];
2440 MPI_Alltoall(sendcnt, 1, MPI_INT, recvcnt, 1, MPI_INT, commGrid->GetWorld());
2442 int * sdpls =
new int[nprocs]();
2443 int * rdpls =
new int[nprocs]();
2444 partial_sum(sendcnt, sendcnt+nprocs-1, sdpls+1);
2445 partial_sum(recvcnt, recvcnt+nprocs-1, rdpls+1);
2447 MPI_Alltoallv(rows, sendcnt, sdpls, MPIType<IT>(),
SpHelper::p2a(nrows.arr), recvcnt, rdpls, MPIType<IT>(), commGrid->GetWorld());
2448 MPI_Alltoallv(cols, sendcnt, sdpls, MPIType<IT>(),
SpHelper::p2a(ncols.arr), recvcnt, rdpls, MPIType<IT>(), commGrid->GetWorld());
2450 DeleteAll(sendcnt, recvcnt, sdpls, rdpls);
2456 template <
class IT,
class NT,
class DER>
2459 outfile << (*spSeq) << endl;
2463 template <
class IU,
class NU,
class UDER>
2464 ofstream& operator<<(ofstream& outfile, const SpParMat<IU, NU, UDER> & s)
2466 return s.put(outfile) ;
2477 template <
class IT,
class NT,
class DER>
2480 int procrows = commGrid->GetGridRows();
2481 int proccols = commGrid->GetGridCols();
2482 IT m_perproc = total_m / procrows;
2483 IT n_perproc = total_n / proccols;
2488 own_procrow = std::min(static_cast<int>(grow / m_perproc), procrows-1);
2492 own_procrow = procrows -1;
2497 own_proccol = std::min(static_cast<int>(gcol / n_perproc), proccols-1);
2501 own_proccol = proccols-1;
2503 lrow = grow - (own_procrow * m_perproc);
2504 lcol = gcol - (own_proccol * n_perproc);
2505 return commGrid->GetRank(own_procrow, own_proccol);