52 template <
class SR,
class IT,
class NUM,
class IVT,
class OVT>
53 void SpImpl<SR,IT,NUM,IVT,OVT>::SpMXSpV(
const Dcsc<IT,NUM> & Adcsc, int32_t mA,
const int32_t * indx,
const IVT * numx, int32_t veclen,
54 vector<int32_t> & indy, vector< OVT > & numy)
58 vector< pair<IT,IT> > colinds( (IT) veclen);
59 Adcsc.
FillColInds(indx, (IT) veclen, colinds, NULL, 0);
61 if(
sizeof(NUM) >
sizeof(OVT))
64 for(IT j =0; j< veclen; ++j)
66 while(colinds[j].first != colinds[j].second )
80 make_heap(wset, wset+hsize);
83 pop_heap(wset, wset + hsize);
84 IT locv = wset[hsize-1].
runr;
85 if((!indy.empty()) && indy.back() == wset[hsize-1].
key)
87 numy.back() =
SR::add(numy.back(), wset[hsize-1].
num);
91 indy.push_back( (int32_t) wset[hsize-1].key);
92 numy.push_back(wset[hsize-1].num);
96 while ( (++(colinds[locv].first)) != colinds[locv].second )
101 wset[hsize-1].
key = Adcsc.
ir[colinds[locv].first];
102 wset[hsize-1].
num = mrhs;
103 push_heap(wset, wset+hsize);
116 for(IT j =0; j< veclen; ++j)
118 if(colinds[j].first != colinds[j].second)
123 make_heap(wset, wset+hsize);
126 pop_heap(wset, wset + hsize);
127 IT locv = wset[hsize-1].
runr;
132 if((!indy.empty()) && indy.back() == wset[hsize-1].
key)
134 numy.back() =
SR::add(numy.back(), mrhs);
138 indy.push_back( (int32_t) wset[hsize-1].key);
139 numy.push_back(mrhs);
143 if( (++(colinds[locv].first)) != colinds[locv].second)
146 wset[hsize-1].
key = Adcsc.
ir[colinds[locv].first];
147 wset[hsize-1].
num = Adcsc.
numx[colinds[locv].first];
148 push_heap(wset, wset+hsize);
162 template <
class SR,
class IT,
class IVT,
class OVT>
163 void SpImpl<SR,IT,bool,IVT,OVT>::SpMXSpV(
const Dcsc<IT,bool> & Adcsc, int32_t mA,
const int32_t * indx,
const IVT * numx, int32_t veclen,
164 vector<int32_t> & indy, vector<OVT> & numy)
166 IT inf = numeric_limits<IT>::min();
167 IT sup = numeric_limits<IT>::max();
168 KNHeap< IT, IVT > sHeap(sup, inf);
172 while(i< Adcsc.
nzc && k < veclen)
174 if(Adcsc.
jc[i] < indx[k]) ++i;
175 else if(indx[k] < Adcsc.
jc[i]) ++k;
178 for(IT j=Adcsc.
cp[i]; j < Adcsc.
cp[i+1]; ++j)
180 sHeap.insert(Adcsc.
ir[j], numx[k]);
189 if(sHeap.getSize() > 0)
191 sHeap.deleteMin(&row, &num);
192 indy.push_back( (int32_t) row);
193 numy.push_back( num );
195 while(sHeap.getSize() > 0)
197 sHeap.deleteMin(&row, &num);
198 if(indy.back() == row)
200 numy.back() =
SR::add(numy.back(), num);
204 indy.push_back( (int32_t) row);
217 template <
typename SR,
typename IT,
typename IVT,
class OVT>
218 void SpImpl<SR,IT,bool,IVT,OVT>::SpMXSpV(
const Dcsc<IT,bool> & Adcsc, int32_t mA,
const int32_t * indx,
const IVT * numx, int32_t veclen,
219 int32_t * indy, OVT * numy,
int * cnts,
int * dspls,
int p_c)
221 OVT * localy =
new OVT[mA];
222 bool * isthere =
new bool[mA];
223 fill(isthere, isthere+mA,
false);
224 vector< vector<int32_t> > nzinds(p_c);
226 int32_t perproc = mA / p_c;
229 while(i< Adcsc.
nzc && k < veclen)
231 if(Adcsc.
jc[i] < indx[k]) ++i;
232 else if(indx[k] < Adcsc.
jc[i]) ++k;
235 for(IT j=Adcsc.
cp[i]; j < Adcsc.
cp[i+1]; ++j)
237 int32_t rowid = (int32_t) Adcsc.
ir[j];
240 int32_t owner = min(rowid / perproc, static_cast<int32_t>(p_c-1));
241 localy[rowid] = numx[k];
242 nzinds[owner].push_back(rowid);
243 isthere[rowid] =
true;
247 localy[rowid] =
SR::add(localy[rowid], numx[k]);
255 for(
int p = 0; p< p_c; ++p)
257 sort(nzinds[p].begin(), nzinds[p].end());
258 cnts[p] = nzinds[p].size();
259 int32_t * locnzinds = &nzinds[p][0];
260 int32_t offset = perproc * p;
261 for(
int i=0; i< cnts[p]; ++i)
263 indy[dspls[p]+i] = locnzinds[i] - offset;
264 numy[dspls[p]+i] = localy[locnzinds[i]];
273 template <
typename SR,
typename IT,
typename IVT,
typename OVT>
274 void SpImpl<SR,IT,bool,IVT,OVT>::SpMXSpV_ForThreading(
const Dcsc<IT,bool> & Adcsc, int32_t mA,
const int32_t * indx,
const IVT * numx, int32_t veclen,
275 vector<int32_t> & indy, vector<OVT> & numy, int32_t offset)
277 OVT * localy =
new OVT[mA];
278 bool * isthere =
new bool[mA];
279 fill(isthere, isthere+mA,
false);
280 vector<int32_t> nzinds;
285 while(i< Adcsc.
nzc && k < veclen)
287 if(Adcsc.
jc[i] < indx[k]) ++i;
288 else if(indx[k] < Adcsc.
jc[i]) ++k;
291 for(IT j=Adcsc.
cp[i]; j < Adcsc.
cp[i+1]; ++j)
293 int32_t rowid = (int32_t) Adcsc.
ir[j];
296 localy[rowid] = numx[k];
297 nzinds.push_back(rowid);
298 isthere[rowid] =
true;
302 localy[rowid] =
SR::add(localy[rowid], numx[k]);
310 sort(nzinds.begin(), nzinds.end());
311 int nnzy = nzinds.size();
314 for(
int i=0; i< nnzy; ++i)
316 indy[i] = nzinds[i] + offset;
317 numy[i] = localy[nzinds[i]];