COMBINATORIAL_BLAS  1.3
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
SpImpl.cpp
Go to the documentation of this file.
1 /****************************************************************/
2 /* Parallel Combinatorial BLAS Library (for Graph Computations) */
3 /* version 1.2 -------------------------------------------------*/
4 /* date: 10/06/2011 --------------------------------------------*/
5 /* authors: Aydin Buluc (abuluc@lbl.gov), Adam Lugowski --------*/
6 /****************************************************************/
7 /*
8  Copyright (c) 2011, Aydin Buluc
9 
10  Permission is hereby granted, free of charge, to any person obtaining a copy
11  of this software and associated documentation files (the "Software"), to deal
12  in the Software without restriction, including without limitation the rights
13  to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
14  copies of the Software, and to permit persons to whom the Software is
15  furnished to do so, subject to the following conditions:
16 
17  The above copyright notice and this permission notice shall be included in
18  all copies or substantial portions of the Software.
19 
20  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21  IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22  FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
23  AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24  LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
25  OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
26  THE SOFTWARE.
27  */
28 
29 #include "SpImpl.h"
30 
52 template <class SR, class IT, class NUM, class IVT, class OVT>
53 void SpImpl<SR,IT,NUM,IVT,OVT>::SpMXSpV(const Dcsc<IT,NUM> & Adcsc, int32_t mA, const int32_t * indx, const IVT * numx, int32_t veclen,
54  vector<int32_t> & indy, vector< OVT > & numy)
55 {
56  int32_t hsize = 0;
57  // colinds dereferences A.ir (valid from colinds[].first to colinds[].second)
58  vector< pair<IT,IT> > colinds( (IT) veclen);
59  Adcsc.FillColInds(indx, (IT) veclen, colinds, NULL, 0); // csize is irrelevant if aux is NULL
60 
61  if(sizeof(NUM) > sizeof(OVT)) // ABAB: include a filtering based runtime choice as well?
62  {
63  HeapEntry<IT, OVT> * wset = new HeapEntry<IT, OVT>[veclen];
64  for(IT j =0; j< veclen; ++j) // create the initial heap
65  {
66  while(colinds[j].first != colinds[j].second ) // iterate until finding the first entry within this column that passes the filter
67  {
68  OVT mrhs = SR::multiply(Adcsc.numx[colinds[j].first], numx[j]);
69  if(SR::returnedSAID())
70  {
71  ++(colinds[j].first); // increment the active row index within the jth column
72  }
73  else
74  {
75  wset[hsize++] = HeapEntry< IT,OVT > ( Adcsc.ir[colinds[j].first], j, mrhs);
76  break; // this column successfully inserted an entry to the heap
77  }
78  }
79  }
80  make_heap(wset, wset+hsize);
81  while(hsize > 0)
82  {
83  pop_heap(wset, wset + hsize); // result is stored in wset[hsize-1]
84  IT locv = wset[hsize-1].runr; // relative location of the nonzero in sparse column vector
85  if((!indy.empty()) && indy.back() == wset[hsize-1].key)
86  {
87  numy.back() = SR::add(numy.back(), wset[hsize-1].num);
88  }
89  else
90  {
91  indy.push_back( (int32_t) wset[hsize-1].key);
92  numy.push_back(wset[hsize-1].num);
93  }
94  bool pushed = false;
95  // invariant: if ++(colinds[locv].first) == colinds[locv].second, then locv will not appear again in the heap
96  while ( (++(colinds[locv].first)) != colinds[locv].second ) // iterate until finding another passing entry
97  {
98  OVT mrhs = SR::multiply(Adcsc.numx[colinds[locv].first], numx[locv]);
99  if(!SR::returnedSAID())
100  {
101  wset[hsize-1].key = Adcsc.ir[colinds[locv].first];
102  wset[hsize-1].num = mrhs;
103  push_heap(wset, wset+hsize); // runr stays the same
104  pushed = true;
105  break;
106  }
107  }
108  if(!pushed) --hsize;
109  }
110  delete [] wset;
111  }
112 
113  else
114  {
115  HeapEntry<IT, NUM> * wset = new HeapEntry<IT, NUM>[veclen];
116  for(IT j =0; j< veclen; ++j) // create the initial heap
117  {
118  if(colinds[j].first != colinds[j].second) // current != end
119  {
120  wset[hsize++] = HeapEntry< IT,NUM > ( Adcsc.ir[colinds[j].first], j, Adcsc.numx[colinds[j].first]); // HeapEntry(key, run, num)
121  }
122  }
123  make_heap(wset, wset+hsize);
124  while(hsize > 0)
125  {
126  pop_heap(wset, wset + hsize); // result is stored in wset[hsize-1]
127  IT locv = wset[hsize-1].runr; // relative location of the nonzero in sparse column vector
128  OVT mrhs = SR::multiply(wset[hsize-1].num, numx[locv]);
129 
130  if (!SR::returnedSAID())
131  {
132  if((!indy.empty()) && indy.back() == wset[hsize-1].key)
133  {
134  numy.back() = SR::add(numy.back(), mrhs);
135  }
136  else
137  {
138  indy.push_back( (int32_t) wset[hsize-1].key);
139  numy.push_back(mrhs);
140  }
141  }
142 
143  if( (++(colinds[locv].first)) != colinds[locv].second) // current != end
144  {
145  // runr stays the same !
146  wset[hsize-1].key = Adcsc.ir[colinds[locv].first];
147  wset[hsize-1].num = Adcsc.numx[colinds[locv].first];
148  push_heap(wset, wset+hsize);
149  }
150  else --hsize;
151  }
152  delete [] wset;
153  }
154 }
155 
156 
162 template <class SR, class IT, class IVT, class OVT>
163 void SpImpl<SR,IT,bool,IVT,OVT>::SpMXSpV(const Dcsc<IT,bool> & Adcsc, int32_t mA, const int32_t * indx, const IVT * numx, int32_t veclen,
164  vector<int32_t> & indy, vector<OVT> & numy)
165 {
166  IT inf = numeric_limits<IT>::min();
167  IT sup = numeric_limits<IT>::max();
168  KNHeap< IT, IVT > sHeap(sup, inf); // max size: flops
169 
170  IT k = 0; // index to indx vector
171  IT i = 0; // index to columns of matrix
172  while(i< Adcsc.nzc && k < veclen)
173  {
174  if(Adcsc.jc[i] < indx[k]) ++i;
175  else if(indx[k] < Adcsc.jc[i]) ++k;
176  else
177  {
178  for(IT j=Adcsc.cp[i]; j < Adcsc.cp[i+1]; ++j) // for all nonzeros in this column
179  {
180  sHeap.insert(Adcsc.ir[j], numx[k]); // row_id, num
181  }
182  ++i;
183  ++k;
184  }
185  }
186 
187  IT row;
188  IVT num;
189  if(sHeap.getSize() > 0)
190  {
191  sHeap.deleteMin(&row, &num);
192  indy.push_back( (int32_t) row);
193  numy.push_back( num );
194  }
195  while(sHeap.getSize() > 0)
196  {
197  sHeap.deleteMin(&row, &num);
198  if(indy.back() == row)
199  {
200  numy.back() = SR::add(numy.back(), num);
201  }
202  else
203  {
204  indy.push_back( (int32_t) row);
205  numy.push_back(num);
206  }
207  }
208 }
209 
210 
217 template <typename SR, typename IT, typename IVT, class OVT>
218 void SpImpl<SR,IT,bool,IVT,OVT>::SpMXSpV(const Dcsc<IT,bool> & Adcsc, int32_t mA, const int32_t * indx, const IVT * numx, int32_t veclen,
219  int32_t * indy, OVT * numy, int * cnts, int * dspls, int p_c)
220 {
221  OVT * localy = new OVT[mA];
222  bool * isthere = new bool[mA];
223  fill(isthere, isthere+mA, false);
224  vector< vector<int32_t> > nzinds(p_c); // nonzero indices
225 
226  int32_t perproc = mA / p_c;
227  int32_t k = 0; // index to indx vector
228  IT i = 0; // index to columns of matrix
229  while(i< Adcsc.nzc && k < veclen)
230  {
231  if(Adcsc.jc[i] < indx[k]) ++i;
232  else if(indx[k] < Adcsc.jc[i]) ++k;
233  else
234  {
235  for(IT j=Adcsc.cp[i]; j < Adcsc.cp[i+1]; ++j) // for all nonzeros in this column
236  {
237  int32_t rowid = (int32_t) Adcsc.ir[j];
238  if(!isthere[rowid])
239  {
240  int32_t owner = min(rowid / perproc, static_cast<int32_t>(p_c-1));
241  localy[rowid] = numx[k]; // initial assignment, requires implicit conversion if IVT != OVT
242  nzinds[owner].push_back(rowid);
243  isthere[rowid] = true;
244  }
245  else
246  {
247  localy[rowid] = SR::add(localy[rowid], numx[k]);
248  }
249  }
250  ++i;
251  ++k;
252  }
253  }
254 
255  for(int p = 0; p< p_c; ++p)
256  {
257  sort(nzinds[p].begin(), nzinds[p].end());
258  cnts[p] = nzinds[p].size();
259  int32_t * locnzinds = &nzinds[p][0];
260  int32_t offset = perproc * p;
261  for(int i=0; i< cnts[p]; ++i)
262  {
263  indy[dspls[p]+i] = locnzinds[i] - offset; // conver to local offset
264  numy[dspls[p]+i] = localy[locnzinds[i]];
265  }
266  }
267  delete [] localy;
268  delete [] isthere;
269 }
270 
271 
272 
273 template <typename SR, typename IT, typename IVT, typename OVT>
274 void SpImpl<SR,IT,bool,IVT,OVT>::SpMXSpV_ForThreading(const Dcsc<IT,bool> & Adcsc, int32_t mA, const int32_t * indx, const IVT * numx, int32_t veclen,
275  vector<int32_t> & indy, vector<OVT> & numy, int32_t offset)
276 {
277  OVT * localy = new OVT[mA];
278  bool * isthere = new bool[mA];
279  fill(isthere, isthere+mA, false);
280  vector<int32_t> nzinds; // nonzero indices
281 
282  // The following piece of code is not general, but it's more memory efficient than FillColInds
283  int32_t k = 0; // index to indx vector
284  IT i = 0; // index to columns of matrix
285  while(i< Adcsc.nzc && k < veclen)
286  {
287  if(Adcsc.jc[i] < indx[k]) ++i;
288  else if(indx[k] < Adcsc.jc[i]) ++k;
289  else
290  {
291  for(IT j=Adcsc.cp[i]; j < Adcsc.cp[i+1]; ++j) // for all nonzeros in this column
292  {
293  int32_t rowid = (int32_t) Adcsc.ir[j];
294  if(!isthere[rowid])
295  {
296  localy[rowid] = numx[k]; // initial assignment
297  nzinds.push_back(rowid);
298  isthere[rowid] = true;
299  }
300  else
301  {
302  localy[rowid] = SR::add(localy[rowid], numx[k]);
303  }
304  }
305  ++i;
306  ++k;
307  }
308  }
309 
310  sort(nzinds.begin(), nzinds.end());
311  int nnzy = nzinds.size();
312  indy.resize(nnzy);
313  numy.resize(nnzy);
314  for(int i=0; i< nnzy; ++i)
315  {
316  indy[i] = nzinds[i] + offset; // return column-global index and let gespmv determine the receiver's local index
317  numy[i] = localy[nzinds[i]];
318  }
319  delete [] localy;
320  delete [] isthere;
321 }
322 
323