2 #include "../CombBLAS.h"
42 const uint64_t b[] = {0x2ULL, 0xCULL, 0xF0ULL, 0xFF00ULL, 0xFFFF0000ULL, 0xFFFFFFFF00000000ULL};
43 const unsigned int S[] = {1, 2, 4, 8, 16, 32};
47 for (i = 5; i >= 0; i--)
59 bool from_string(T & t,
const string& s, std::ios_base& (*f)(std::ios_base&))
62 return !(iss >> f >> t).fail();
66 template <
typename PARMAT>
84 return ( y == -1 ) ? x: -1;
88 int main(
int argc,
char* argv[])
91 MPI_Init(&argc, &argv);
92 MPI_Comm_size(MPI_COMM_WORLD,&nprocs);
93 MPI_Comm_rank(MPI_COMM_WORLD,&myrank);
98 cout <<
"Usage: ./Graph500 <Force,Input> <Scale Forced | Input Name> {FastGen}" << endl;
99 cout <<
"Example: ./Graph500 Force 25 FastGen" << endl;
118 bool scramble =
false;
120 if(
string(argv[1]) ==
string(
"Input"))
126 G->
Reduce(degrees,
Row, plus<int64_t>(), static_cast<int64_t>(0));
131 A.
Reduce(*ColSums,
Column, plus<int64_t>(), static_cast<int64_t>(0));
132 nonisov = ColSums->
FindInds(bind2nd(greater<int64_t>(), 0));
134 A = A(nonisov, nonisov);
135 Aeff = PSpMat_s32p64(A);
139 Aeff.OptimizeForGraph500(optbuf);
142 tinfo <<
"Threading activated with " <<
cblas_splits <<
" threads" << endl;
147 else if(
string(argv[1]) ==
string(
"Binary"))
154 outs <<
"Reading " << argv[2] <<
" with " << n <<
" vertices and " << m <<
" edges" << endl;
160 SpParHelper::Print(
"Permuted Edges\n");
164 SpParHelper::Print(
"Renamed Vertices\n");
169 SpParHelper::Print(
"Created Int64 Sparse Matrix\n");
171 G->
Reduce(degrees,
Row, plus<int64_t>(), static_cast<int64_t>(0));
177 ostringstream loopinfo;
178 loopinfo <<
"Converted to Boolean and removed " << removed <<
" loops" << endl;
179 SpParHelper::Print(loopinfo.str());
184 A.
Reduce(*ColSums,
Column, plus<int64_t>(), static_cast<int64_t>(0));
185 A.
Reduce(*RowSums,
Row, plus<int64_t>(), static_cast<int64_t>(0));
186 ColSums->
EWiseApply(*RowSums, plus<int64_t>());
189 nonisov = ColSums->
FindInds(bind2nd(greater<int64_t>(), 0));
192 SpParHelper::Print(
"Found (and permuted) non-isolated vertices\n");
196 A(nonisov, nonisov,
true);
197 SpParHelper::Print(
"Dropped isolated vertices from input\n");
200 Aeff = PSpMat_s32p64(A);
204 SpParHelper::Print(
"Symmetricized\n");
207 Aeff.OptimizeForGraph500(optbuf);
210 tinfo <<
"Threading activated with " <<
cblas_splits <<
" threads" << endl;
211 SpParHelper::Print(tinfo.str());
217 if(
string(argv[1]) ==
string(
"Force"))
219 scale =
static_cast<unsigned>(atoi(argv[2]));
221 outs <<
"Forcing scale to : " << scale << endl;
224 if(argc > 3 &&
string(argv[3]) == string(
"FastGen"))
226 SpParHelper::Print(
"Using fast vertex permutations; skipping edge permutations (like v2.1)\n");
237 double initiator[4] = {.57, .19, .19, .05};
239 double t01 = MPI_Wtime();
248 tinfo <<
"Generation took " << t02-t01 <<
" seconds" << endl;
265 tinfo <<
"Generation took " << t02-t01 <<
" seconds" << endl;
270 MPI_Barrier(MPI_COMM_WORLD);
271 double t1 = MPI_Wtime();
274 PSpMat_s32p64_Int * G =
new PSpMat_s32p64_Int(*DEL,
false);
278 MPI_Barrier(MPI_COMM_WORLD);
279 double redts = MPI_Wtime();
280 G->Reduce(degrees,
Row, plus<int64_t>(), static_cast<int64_t>(0));
281 MPI_Barrier(MPI_COMM_WORLD);
282 double redtf = MPI_Wtime();
284 ostringstream redtimeinfo;
285 redtimeinfo <<
"Calculated degrees in " << redtf-redts <<
" seconds" << endl;
289 int64_t removed = A.RemoveLoops();
291 ostringstream loopinfo;
292 loopinfo <<
"Converted to Boolean and removed " << removed <<
" loops" << endl;
298 A.
Reduce(*ColSums,
Column, plus<int64_t>(), static_cast<int64_t>(0));
299 A.Reduce(*RowSums,
Row, plus<int64_t>(), static_cast<int64_t>(0));
301 ColSums->
EWiseApply(*RowSums, plus<int64_t>());
306 nonisov = ColSums->
FindInds(bind2nd(greater<int64_t>(), 0));
313 A(nonisov, nonisov,
true);
318 Aeff = PSpMat_s32p64(A);
323 Aeff.OptimizeForGraph500(optbuf);
326 tinfo <<
"Threading activated with " <<
cblas_splits <<
" threads" << endl;
332 MPI_Barrier(MPI_COMM_WORLD);
333 double t2=MPI_Wtime();
335 ostringstream k1timeinfo;
336 k1timeinfo << (t2-t1) - (redtf-redts) <<
" seconds elapsed for Kernel #1" << endl;
340 float balance = Aeff.LoadImbalance();
342 outs <<
"Load balance: " << balance << endl;
345 MPI_Barrier(MPI_COMM_WORLD);
346 double t1 = MPI_Wtime();
350 degrees = degrees(nonisov);
355 double nver = (double) degrees.TotalLength();
363 vector<double> loccands(
ITERS);
364 vector<int64_t> loccandints(
ITERS);
367 for(
int i=0; i<
ITERS; ++i)
368 loccands[i] = M.
rand();
369 copy(loccands.begin(), loccands.end(), ostream_iterator<double>(cout,
" ")); cout << endl;
370 transform(loccands.begin(), loccands.end(), loccands.begin(), bind2nd( multiplies<double>(), nver ));
372 for(
int i=0; i<
ITERS; ++i)
373 loccandints[i] = static_cast<int64_t>(loccands[i]);
374 copy(loccandints.begin(), loccandints.end(), ostream_iterator<double>(cout,
" ")); cout << endl;
377 for(
int i=0; i<
ITERS; ++i)
383 for(
int trials =0; trials <
MAXTRIALS; trials++)
390 MPI_Pcontrol(1,
"BFS");
393 for(
int i=0; i<
ITERS; ++i)
401 MPI_Barrier(MPI_COMM_WORLD);
402 double t1 = MPI_Wtime();
404 fringe.SetElement(Cands[i], Cands[i]);
406 while(fringe.getnnz() > 0)
408 fringe.setNumToInd();
409 fringe =
SpMV(Aeff, fringe,optbuf);
414 MPI_Barrier(MPI_COMM_WORLD);
415 double t2 = MPI_Wtime();
423 ostringstream outnew;
424 outnew << i <<
"th starting vertex was " << Cands[i] << endl;
425 outnew <<
"Number iterations: " << iterations << endl;
426 outnew <<
"Number of vertices found: " << parentsp.
Reduce(plus<int64_t>(), (
int64_t) 0) << endl;
427 outnew <<
"Number of edges traversed: " << nedges << endl;
428 outnew <<
"BFS time: " << t2-t1 <<
" seconds" << endl;
429 outnew <<
"MTEPS: " <<
static_cast<double>(nedges) / (t2-t1) / 1000000.0 << endl;
433 MTEPS[i] =
static_cast<double>(nedges) / (t2-t1) / 1000000.0;
438 MPI_Pcontrol(-1,
"BFS");
441 os <<
"Per iteration communication times: " << endl;
446 os <<
"Per iteration computation times: " << endl;
450 sort(EDGES, EDGES+ITERS);
451 os <<
"--------------------------" << endl;
452 os <<
"Min nedges: " << EDGES[0] << endl;
453 os <<
"First Quartile nedges: " << (EDGES[(ITERS/4)-1] + EDGES[ITERS/4])/2 << endl;
454 os <<
"Median nedges: " << (EDGES[(ITERS/2)-1] + EDGES[ITERS/2])/2 << endl;
455 os <<
"Third Quartile nedges: " << (EDGES[(3*ITERS/4) -1 ] + EDGES[3*ITERS/4])/2 << endl;
456 os <<
"Max nedges: " << EDGES[ITERS-1] << endl;
457 double mean = accumulate( EDGES, EDGES+ITERS, 0.0 )/
ITERS;
458 vector<double> zero_mean(ITERS);
459 transform(EDGES, EDGES+ITERS, zero_mean.begin(), bind2nd( minus<double>(), mean ));
461 double deviation = inner_product( zero_mean.begin(),zero_mean.end(), zero_mean.begin(), 0.0 );
462 deviation = sqrt( deviation / (ITERS-1) );
463 os <<
"Mean nedges: " << mean << endl;
464 os <<
"STDDEV nedges: " << deviation << endl;
465 os <<
"--------------------------" << endl;
467 sort(TIMES,TIMES+ITERS);
468 os <<
"Min time: " << TIMES[0] <<
" seconds" << endl;
469 os <<
"First Quartile time: " << (TIMES[(ITERS/4)-1] + TIMES[ITERS/4])/2 <<
" seconds" << endl;
470 os <<
"Median time: " << (TIMES[(ITERS/2)-1] + TIMES[ITERS/2])/2 <<
" seconds" << endl;
471 os <<
"Third Quartile time: " << (TIMES[(3*ITERS/4)-1] + TIMES[3*ITERS/4])/2 <<
" seconds" << endl;
472 os <<
"Max time: " << TIMES[ITERS-1] <<
" seconds" << endl;
473 mean = accumulate( TIMES, TIMES+ITERS, 0.0 )/
ITERS;
474 transform(TIMES, TIMES+ITERS, zero_mean.begin(), bind2nd( minus<double>(), mean ));
475 deviation = inner_product( zero_mean.begin(),zero_mean.end(), zero_mean.begin(), 0.0 );
476 deviation = sqrt( deviation / (ITERS-1) );
477 os <<
"Mean time: " << mean <<
" seconds" << endl;
478 os <<
"STDDEV time: " << deviation <<
" seconds" << endl;
479 os <<
"--------------------------" << endl;
481 sort(MTEPS, MTEPS+ITERS);
482 os <<
"Min MTEPS: " << MTEPS[0] << endl;
483 os <<
"First Quartile MTEPS: " << (MTEPS[(ITERS/4)-1] + MTEPS[ITERS/4])/2 << endl;
484 os <<
"Median MTEPS: " << (MTEPS[(ITERS/2)-1] + MTEPS[ITERS/2])/2 << endl;
485 os <<
"Third Quartile MTEPS: " << (MTEPS[(3*ITERS/4)-1] + MTEPS[3*ITERS/4])/2 << endl;
486 os <<
"Max MTEPS: " << MTEPS[ITERS-1] << endl;
488 double hteps =
static_cast<double>(
ITERS) / accumulate(INVMTEPS, INVMTEPS+ITERS, 0.0);
489 os <<
"Harmonic mean of MTEPS: " << hteps << endl;
490 transform(INVMTEPS, INVMTEPS+ITERS, zero_mean.begin(), bind2nd(minus<double>(), 1/hteps));
491 deviation = inner_product( zero_mean.begin(),zero_mean.end(), zero_mean.begin(), 0.0 );
492 deviation = sqrt( deviation / (ITERS-1) ) * (hteps*hteps);
493 os <<
"Harmonic standard deviation of MTEPS: " << deviation << endl;