COMBINATORIAL_BLAS  1.3
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
TwitterEdge.h
Go to the documentation of this file.
1 #ifndef _TWITTER_EDGE_
2 #define _TWITTER_EDGE_
3 
4 #include <iostream>
5 #include <ctime>
6 #include "../CombBLAS.h"
7 
8 using namespace std;
9 
10 struct DetSymmetricize;
11 
18 {
19 public:
20  TwitterEdge(): count(0), follower(0), latest(0) {};
21  template <typename X>
22  TwitterEdge(X x):count(0), follower(0), latest(0) {}; // any upcasting constructs the default object too
23 
24  TwitterEdge(short mycount, bool myfollow, time_t mylatest):count(mycount), follower(myfollow), latest(mylatest) {};
25  bool isFollower() const { return follower; };
26  bool isRetwitter() const { return (count > 0); };
27  bool TweetWithinInterval (time_t begin, time_t end) const { return ((count > 0) && (begin <= latest && latest <= end)); };
28  bool TweetSince (time_t begin) const { return ((count > 0) && (begin <= latest)); };
29  bool LastTweetBy (time_t end) const { return ((count > 0) && (latest <= end)); };
30 
31  operator bool () const { return true; } ; // Type conversion operator (ABAB: Shoots in the foot by implicitly converting many things)
32 
33  TwitterEdge & operator+=(const TwitterEdge & rhs)
34  {
35  cout << "Error: TwitterEdge::operator+=() shouldn't be executed" << endl;
36  count += rhs.count;
37  follower |= rhs.follower;
38  if(rhs.count > 0) // ensure that addition with additive identity doesn't change "latest"
39  latest = max(latest, rhs.latest);
40  return *this;
41  }
42  bool operator ==(const TwitterEdge & b) const
43  {
44  return ((follower == b.follower) && (latest == b.latest) && (count == b.count));
45  }
46 
47  friend ostream& operator<<( ostream& os, const TwitterEdge & twe);
48  friend TwitterEdge operator*( const TwitterEdge & a, const TwitterEdge & b);
49 
50 private:
51  bool follower; // default constructor sets all to zero
52  time_t latest; // not assigned if no retweets happened
53  short count;
54 
55  template <typename IT>
56  friend class TwitterReadSaveHandler;
57 
58  friend struct DetSymmetricize;
59 };
60 
61 ostream& operator<<(ostream& os, const TwitterEdge & twe )
62 {
63  if( twe.follower == 0 && twe.latest == 0 && twe.count == 0)
64  os << 0;
65  else
66  os << 1;
67  return os;
68 };
69 
71 {
72  // One of the parameters is an upcast from bool (used in Indexing), so return the other one
73  if(a == TwitterEdge()) return b;
74  else return a;
75 }
76 
77 
78 template <class IT>
80 {
81  public:
83  TwitterEdge getNoNum(IT row, IT col) { return TwitterEdge(); }
84 
85  MPI_Datatype getMPIType()
86  {
87  return MPIType<TwitterEdge>(); // utilize the MPI type cache
88  }
89 
90  void binaryfill(FILE * rFile, IT & row, IT & col, TwitterEdge & val)
91  {
92  TwitterInteraction twi;
93  size_t entryLength = fread (&twi,sizeof(TwitterInteraction),1,rFile);
94  row = twi.from - 1 ;
95  col = twi.to - 1;
96  val = TwitterEdge(twi.retweets, twi.follow, twi.twtime);
97  if(entryLength != 1)
98  cout << "Not enough bytes read in binaryfill " << endl;
99  }
100  size_t entrylength() { return sizeof(TwitterInteraction); }
101 
102  template <typename c, typename t>
103  TwitterEdge read(std::basic_istream<c,t>& is, IT row, IT col)
104  {
105  TwitterEdge tw;
106  is >> tw.follower;
107  is >> tw.count;
108  if(tw.count > 0)
109  {
110  string date;
111  string time;
112  is >> date;
113  is >> time;
114 
115  struct tm timeinfo;
116  int year, month, day, hour, min, sec;
117  sscanf (date.c_str(),"%d-%d-%d",&year, &month, &day);
118  sscanf (time.c_str(),"%d:%d:%d",&hour, &min, &sec);
119 
120  memset(&timeinfo, 0, sizeof(struct tm));
121  timeinfo.tm_year = year - 1900; // year is "years since 1900"
122  timeinfo.tm_mon = month - 1 ; // month is in range 0...11
123  timeinfo.tm_mday = day; // range 1...31
124  timeinfo.tm_hour = hour; // range 0...23
125  timeinfo.tm_min = min; // range 0...59
126  timeinfo.tm_sec = sec; // range 0.
127  tw.latest = timegm(&timeinfo);
128  if(tw.latest == -1) { cout << "Can not parse time date" << endl; exit(-1);}
129  }
130  else
131  {
132  tw.latest = 0; // initialized to dummy
133  }
134  //cout << row << " follows " << col << "? : " << tw.follower << " and the retweet count is " << tw.count << endl;
135  return tw;
136  }
137 
138 
139  template <typename c, typename t>
140  void save(std::basic_ostream<c,t>& os, const TwitterEdge & tw, IT row, IT col) // save is NOT compatible with read
141  {
142  os << row << "\t" << col << "\t";
143  os << tw.follower << "\t";
144  os << tw.count << "\t";
145  os << tw.latest << endl;
146  }
147  private:
148  struct TwitterInteraction
149  {
150  int32_t from;
151  int32_t to;
152  bool follow;
153  int16_t retweets;
154  time_t twtime;
155  };
156 };
157 
158 
160 {
161  ParentType():id(-1) { };
162  ParentType(int64_t myid):id(myid) { };
163  int64_t id;
164  bool operator ==(const ParentType & rhs) const
165  {
166  return (id == rhs.id);
167  }
168  bool operator !=(const ParentType & rhs) const
169  {
170  return (id != rhs.id);
171  }
172  ParentType & operator+=(const ParentType & rhs)
173  {
174  cout << "Adding parent with id: " << rhs.id << " to this one with id " << id << endl;
175  return *this;
176  }
177  const ParentType operator++(int) // for iota
178  {
179  ParentType temp(*this); // post-fix requirement
180  ++id;
181  return temp;
182  }
183  friend ostream& operator<<(ostream& os, const ParentType & twe );
184 
185  template <typename IT>
186  friend ParentType operator+( const IT & left, const ParentType & right);
187 };
188 
189 ostream& operator<<(ostream& os, const ParentType & twe )
190 {
191  os << "Parent=" << twe.id;
192  return os;
193 };
194 
196 {
197  return ParentType(index);
198 }
199 
200 template <typename IT>
201 ParentType operator+( const IT & left, const ParentType & right)
202 {
203  return ParentType(left+right.id);
204 }
205 
206 // forward declaration
207 template <typename SR, typename T>
208 void select2nd(void * invec, void * inoutvec, int * len, MPI_Datatype *datatype);
209 
210 
211 template <typename SR, typename VECTYPE>
212 static VECTYPE filtered_select2nd(const TwitterEdge & arg1, const VECTYPE & arg2, time_t & sincedate)
213 {
214  if(sincedate == -1) // uninitialized
215  {
216  struct tm timeinfo;
217  memset(&timeinfo, 0, sizeof(struct tm));
218  int year, month, day, hour, min, sec;
219  year = 2009; month = 7; day = 1;
220  hour = 0; min = 0; sec = 0;
221 
222  timeinfo.tm_year = year - 1900; // year is "years since 1900"
223  timeinfo.tm_mon = month - 1 ; // month is in range 0...11
224  timeinfo.tm_mday = day; // range 1...31
225  timeinfo.tm_hour = hour; // range 0...23
226  timeinfo.tm_min = min; // range 0...59
227  timeinfo.tm_sec = sec; // range 0.
228  sincedate = timegm(&timeinfo);
229 
230  ostringstream outs;
231  outs << "Initializing since date (only once) to " << sincedate << endl;
232  SpParHelper::Print(outs.str());
233  }
234 
235  if(arg1.isRetwitter() && arg1.LastTweetBy(sincedate)) // T1 is of type edges for BFS
236  {
237  return arg2;
238  }
239  else
240  {
241  SR::returnedSAID(true);
242  return VECTYPE();
243  // return null-type parent id (for BFS) or
244  // double() for MIS - POD objects are zero initilied
245  }
246 }
247 
248 
249 
256 {
257  static MPI_Op MPI_BFSADD;
258  static ParentType id() { return ParentType(); } // additive identity
259 
260  // the default argument means that this function can be used like this:
261  // if (returnedSAID()) {...}
262  // which is how it is called inside CombBLAS routines. That call conveniently clears the flag for us.
263  static bool returnedSAID(bool setFlagTo = false)
264  {
265  static bool flag = false;
266 
267  bool temp = flag; // save the current flag value to be returned later. Saves an if statement.
268  flag = setFlagTo; // set/clear the flag.
269  return temp;
270  }
271 
272  static ParentType add(const ParentType & arg1, const ParentType & arg2)
273  {
274  return ((arg2 == ParentType()) ? arg1: arg2);
275  }
276 
277  static MPI_Op mpi_op()
278  {
279  MPI_Op_create(select2nd<LatestRetwitterBFS,ParentType>, false, &MPI_BFSADD); // \todo {do this once only, by greating a MPI_Op buffer}
280  return MPI_BFSADD;
281  }
282  static time_t sincedate;
283  static ParentType multiply(const TwitterEdge & arg1, const ParentType & arg2)
284  {
285  return filtered_select2nd<LatestRetwitterBFS>(arg1, arg2, sincedate);
286  }
287  static void axpy(TwitterEdge a, const ParentType & x, ParentType & y)
288  {
289  y = add(y, multiply(a, x));
290  }
291 };
292 
294 
295 // select2nd for doubles
296 template <typename SR, typename T>
297 void select2nd(void * invec, void * inoutvec, int * len, MPI_Datatype *datatype)
298 {
299  T * pinvec = static_cast<T*>(invec);
300  T * pinoutvec = static_cast<T*>(inoutvec);
301  for (int i = 0; i < *len; i++)
302  {
303  pinoutvec[i] = SR::add(pinvec[i], pinoutvec[i]);
304  }
305 }
306 
307 
309 
310 struct getfringe: public std::binary_function<ParentType, ParentType, ParentType>
311 {
312  ParentType operator()(ParentType x, const ParentType & y) const
313  {
314  return x;
315  }
316 
317 };
318 
319 // x: Parent type (always 1 if exits, sparse)
320 // y: degree (dense)
321 struct seldegree: public std::binary_function<ParentType, int64_t, int64_t>
322 {
323  int64_t operator()(ParentType x, const int64_t & y) const
324  {
325  return y;
326  }
327 
328 };
329 
330 // This is like an "isparentset" with the extra parameter that we don't care
331 struct passifthere: public std::binary_function<ParentType, int64_t, bool>
332 {
333  bool operator()(ParentType x, const int64_t & y) const
334  {
335  return (x != ParentType());
336  }
337 
338 };
339 
340 // DoOp for MIS's EWiseApply
341 struct is2ndSmaller: public std::binary_function<double, double, bool>
342 {
343  bool operator()(double m, double c) const
344  {
345  return (c < m);
346  }
347 };
348 
349 // BinOp for MIS's EWiseApply
350 struct return1_uint8: public std::binary_function<double, double, uint8_t>
351 {
352  uint8_t operator() (double t1, double t2)
353  {
354  return (uint8_t) 1;
355  }
356 };
357 
358 
359 // x: elements from fringe (sparse), y: elements from parents (dense)
360 // return true for edges that are not filtered out, and not previously discovered
361 // if the edge was filtered out, then x would be ParentType()
362 // if y was already discovered its parent would NOT be ParentType()
363 struct keepinfrontier_f: public std::binary_function<ParentType, ParentType, bool>
364 {
365  bool operator()(ParentType x, const ParentType & y) const
366  {
367  return ( x != ParentType() && y == ParentType()) ;
368  }
369 
370 };
371 
372 struct isparentset: public std::unary_function<ParentType, bool>
373 {
374  bool operator()(const ParentType & x) const
375  {
376  return ( x != ParentType() ) ;
377  }
378 
379 };
380 
381 // Matrix type: TwitterEdge
382 // Vector type: double
384 {
385  static double id() { return 0.0; } // additive identity
386 
387  // the default argument means that this function can be used like this:
388  // if (returnedSAID()) {...}
389  // which is how it is called inside CombBLAS routines. That call conveniently clears the flag for us.
390  static bool returnedSAID(bool setFlagTo = false)
391  {
392  static bool flag = false;
393 
394  bool temp = flag; // save the current flag value to be returned later. Saves an if statement.
395  flag = setFlagTo; // set/clear the flag.
396  return temp;
397  }
398 
399  static double add(const double & arg1, const double & arg2)
400  {
401  return std::min(arg1, arg2);
402  }
403 
404  static MPI_Op mpi_op()
405  {
406  return MPI_MIN;
407  }
408  static time_t sincedate;
409  static double multiply(const TwitterEdge & arg1, const double & arg2) // filtered select2nd
410  {
411  return filtered_select2nd<LatestRetwitterMIS>(arg1, arg2, sincedate);
412  }
413  static void axpy(TwitterEdge a, const double & x, double & y)
414  {
415  y = add(y, multiply(a, x));
416  }
417 };
418 
419 // Matrix type: TwitterEdge
420 // Vector type: double
421 struct LatestRetwitterSelect2nd // also used for finding neighbors of the candidate set in MIS
422 {
423  static MPI_Op MPI_SEL2NDADD;
424  static double id() { return 0.0; } // additive identity
425 
426  // the default argument means that this function can be used like this:
427  // if (returnedSAID()) {...}
428  // which is how it is called inside CombBLAS routines. That call conveniently clears the flag for us.
429  static bool returnedSAID(bool setFlagTo = false)
430  {
431  static bool flag = false;
432 
433  bool temp = flag; // save the current flag value to be returned later. Saves an if statement.
434  flag = setFlagTo; // set/clear the flag.
435  return temp;
436  }
437 
438  static double add(const double & arg1, const double & arg2)
439  {
440  return arg2;
441  }
442 
443  static MPI_Op mpi_op()
444  {
445  MPI_Op_create(select2nd<LatestRetwitterSelect2nd,double>, false, &MPI_SEL2NDADD); // \todo {do this once only, by greating a MPI_Op buffer}
446  return MPI_SEL2NDADD;
447  }
448  static time_t sincedate;
449  static double multiply(const TwitterEdge & arg1, const double & arg2) // filtered select2nd
450  {
451  return filtered_select2nd<LatestRetwitterSelect2nd>(arg1, arg2, sincedate);
452  }
453  static void axpy(TwitterEdge a, const double & x, double & y)
454  {
455  y = add(y, multiply(a, x));
456  }
457 };
458 
462 
463 
464 
465 #endif