0001 function [E, Verif] = gendata2(parms, seed)
0002 % GENDATA2 : UCSB SSCA#2 scalable data generator (Integer only)
0003 %
0004 % E = gendata2 (scale)  
0005 %     Generates the edge list data for SSCA#2
0006 %     The generated graph has 2^scale (default 2^10 = 1024) vertices
0007 %
0008 % E = gendata2 (scale, seed)  
0009 %     Uses the specified random seed (default 1)
0010 %     If [] is used for seed, a random seed is used from the clock
0011 %
0012 % [E, Verif] = ... 
0013 %     Also yields graph info for use in validation
0014 %
0015 % John Gilbert, Nov 2, 2005
0016 % Viral Shah, May 22, 2006
0017 
0018 fprintf ('\n** UCSB/SGI SSCA#2 Scalable Data Generator (Integer Only) **\n\n');
0019 
0020 if nargin < 2
0021     seed = 1;
0022 elseif isempty(seed)
0023     seed = sum(100*clock);
0024 end
0025 rand('state',seed);
0026 
0027 Scale = 10;
0028 if nargin == 0
0029     parms = [];
0030 else
0031     if ~isstruct (parms)
0032         Scale = parms;
0033         parms = [];
0034     end
0035 end
0036 
0037 
0038 % Use default parameters if they are not passed.
0039 if ~isstruct (parms)
0040 
0041     % Problem scale
0042     parms.Scale = Scale;
0043     
0044     % Number of vertices
0045     parms.TotVertices = 2.^Scale;    
0046     
0047     % Max clique size
0048     parms.MaxCliqueSize = floor(2.^(Scale/3));  
0049     
0050     % Max number of parallel directed/undirected edges
0051     parms.MaxParallelEdges = 3;
0052     
0053     % Probability of interclique edges for adjacent cliques
0054     parms.ProbInterCliqueEdges = 0.5;
0055     
0056     % Probability an edge is unidirectional
0057     parms.ProbUniDirectional = 0.2;
0058     
0059     % Probability an edge label is integer.
0060     % This parameter is ignored since this is the integer only version.
0061     parms.PercentIntegerLabels = 0.6;
0062     
0063     % Max edge weight.
0064     parms.MaxIntLabel = 2.^Scale;
0065     
0066     % Max length of string for string labels
0067     % This parameter is ignored since this is the integer only version.
0068     parms.MaxStrLen = Scale;
0069 end
0070 
0071 
0072 disp('1: Generate clique sizes and first vertices');
0073 [cliqueID, cliquesizes] = getCliqueSizes (parms);
0074 
0075 disp('2: Generate edges within cliques')
0076 [startVertices, endVertices] = genEdgesWithinCliques (parms, cliquesizes);
0077 nIntraCliqueEdges = length(startVertices);
0078 
0079 disp('3: Generate edges between cliques')
0080 [startVertices, endVertices] = ...
0081     genEdgesBetweenCliques (parms, startVertices, endVertices);
0082 nInterCliqueEdges = length(startVertices) - nIntraCliqueEdges;
0083 
0084 disp('4: Make edges unidirectional or bidirectional');
0085 [startVertices, endVertices] = ...
0086     makeEdgesUniOrBi (parms, startVertices, endVertices);
0087 
0088 disp('5: Generate duplicate edges')
0089 [startVertices, endVertices] = ...
0090     genDuplicateEdges (parms, startVertices, endVertices);
0091 
0092 disp('6: Generate edge labels')
0093 [startVertices, endVertices, intWeights] = ...
0094     genEdgeLabels(parms, startVertices, endVertices);
0095 
0096 if nargout > 1
0097     disp('7: Randomize vertex numbers and triple order')
0098     [startVertices, endVertices, intWeights, vperm] = ...
0099         randomizeVerticesAndTriples (parms, ...
0100         startVertices, endVertices, intWeights);
0101 
0102     Verif.parms = parms;
0103     Verif.totVertices = parms.TotVertices;
0104     Verif.maxCliqueSize = parms.MaxCliqueSize;
0105     Verif.numInterCliqueLinks = nInterCliqueEdges;
0106     Verif.vperm = vperm;
0107     Verif.cliqueID(vperm) = cliqueID;
0108 else
0109     disp('7: Randomize vertex numbers and triple order')
0110     [startVertices, endVertices, intWeights] = ...
0111         randomizeVerticesAndTriples (parms, ...
0112         startVertices, endVertices, intWeights);
0113 end
0114 E.startVertices = startVertices;
0115 E.endVertices = endVertices;
0116 E.intWeights = intWeights;
0117 
0118 
0119 fprintf ('\n==========================================================\n');
0120 fprintf ('Scale                                 : %d\n', parms.Scale);
0121 fprintf ('Number of Vertices                    : %d\n', parms.TotVertices);
0122 fprintf ('Number of Cliques                     : %d\n', max(cliqueID));
0123 fprintf ('Max Clique Size                       : %d\n', max(cliquesizes));
0124 fprintf ('Edges in directed multigraph          : %d\n', ...
0125     length(startVertices));
0126 fprintf ('Intraclique edges in undirected graph : %d\n', nIntraCliqueEdges);
0127 fprintf ('Interclique edges in undirected graph : %d\n', nInterCliqueEdges);
0128 fprintf ('Edges in undirected graph             : %d\n', ...
0129     nIntraCliqueEdges + nInterCliqueEdges);
0130 fprintf ('==========================================================\n\n');
0131 
0132 end % gendata2
0133 
0134 
0135 
0136 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
0137 % Generate clique sizes and first vertices
0138 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
0139 
0140 function [cliqueID, cliquesizes] = getCliqueSizes (parms)
0141 
0142 TotVertices = parms.TotVertices;
0143 MaxCliqueSize = parms.MaxCliqueSize;
0144 
0145 ncliques = ceil(2*TotVertices/MaxCliqueSize);
0146 cliquesizes = 0;
0147 while sum(cliquesizes) < TotVertices
0148     ncliques = round(1.05 * ncliques + 1);
0149     cliquesizes = 1 + fix(MaxCliqueSize * rand(1,ncliques));
0150 end
0151 firstvtx = cumsum([1 cliquesizes]);
0152 ncliques = min(find(firstvtx > TotVertices)) - 1;
0153 firstvtx = firstvtx(1:ncliques);
0154 cliquesizes = diff([firstvtx TotVertices+1]);
0155 cliqueID = zeros(1,TotVertices);
0156 cliqueID(firstvtx) = 1;
0157 cliqueID = cumsum(cliqueID);
0158 
0159 end % getCliqueSizes
0160 
0161 
0162 
0163 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
0164 % Generate edges within cliques
0165 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
0166 
0167 function [startVertices, endVertices] = ...
0168     genEdgesWithinCliques (parms, cliquesizes)
0169 
0170 MaxCliqueSize = parms.MaxCliqueSize;
0171 
0172 cedges = (cliquesizes .* (cliquesizes-1))/2;   % edges within each clique
0173 totcedges = sum(cedges);
0174 if isa (cedges, 'ddense')
0175     totcedges = totcedges*p;
0176 end
0177 
0178 nonsingletons = find(cedges > 0);
0179 singletons = find(cedges == 0);
0180 firstedge = cumsum([1 cedges(1:end-1)]);
0181 
0182 % index maps a triple into the typical clique
0183 index = ones(1,totcedges);
0184 index(firstedge(nonsingletons)) = [1 1-cedges(nonsingletons(1:end-1))];
0185 index = cumsum(index);
0186 
0187 % offset gives local indexing for triples
0188 offset = zeros(1,totcedges);
0189 offset(firstedge(nonsingletons)) = [0 cliquesizes(nonsingletons(1:end-1))];
0190 offset = cumsum(offset);
0191 
0192 % Fix offsets to account for singletons
0193 singleoffset = sparse(firstedge(singletons), 1, 1, totcedges+1, 1);
0194 singleoffset = cumsum(full(singleoffset))'; 
0195 offset = offset + singleoffset(1:end-1);
0196 
0197 % typical max-size clique
0198 C = ones(MaxCliqueSize,1) * (1:MaxCliqueSize);
0199 CI = C(:)';
0200 C = C';
0201 CJ = C(:)';
0202 f = find(CI > CJ);
0203 CI = CI(f);
0204 CJ = CJ(f);
0205 
0206 % triples for cliques
0207 startVertices = [];
0208 endVertices = [];
0209 startVertices = [startVertices CI(index)+offset];
0210 endVertices = [endVertices CJ(index)+offset];
0211 
0212 end % genEdgesWithinCliques
0213 
0214 
0215 
0216 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
0217 % Generate edges between cliques
0218 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
0219 
0220 function [startVertices, endVertices] = ...
0221     genEdgesBetweenCliques (parms, startVertices, endVertices)
0222 
0223 Scale = parms.Scale;
0224 TotVertices = parms.TotVertices;
0225 ProbInterCliqueEdges = parms.ProbInterCliqueEdges;
0226 
0227 % Find all pairs of vertices for all distances
0228 I = kron (ones(1,Scale-2), 1:TotVertices);
0229 dist =  kron (2.^(1:(Scale-2)), ones(1,2.^Scale));
0230 prob = ProbInterCliqueEdges .* (2 ./ dist);
0231 J = 1 + mod(I+dist-1, TotVertices);
0232 possibles = find(rand(1,(Scale-2)*TotVertices) < prob);
0233 startVertices = [startVertices I(possibles)];
0234 endVertices = [endVertices J(possibles)];
0235 
0236 % Remove new edges that might have been inserted in cliques. 
0237 % We still do not have a multigraph, so all edges must be unique.
0238 % All edges are also still unidirectional.
0239 
0240 S = sparse (startVertices, endVertices, 1, TotVertices, TotVertices);
0241 [startVertices, endVertices] = find (tril (S|S', -1));
0242 startVertices = startVertices';
0243 endVertices = endVertices';
0244 
0245 end % genEdgesBetweenCliques
0246 
0247 
0248 
0249 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
0250 % Make edges unidirectional or bidirectional
0251 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
0252 
0253 function [startVertices, endVertices] = ...
0254     makeEdgesUniOrBi (parms, startVertices, endVertices)
0255 
0256 ProbUniDirectional = parms.ProbUniDirectional;
0257 
0258 nedges = length (startVertices);
0259 
0260 R = rand (1, nedges);
0261 BiDir = find (R > ProbUniDirectional);
0262 startBiDir = startVertices(BiDir);
0263 endBiDir = endVertices(BiDir);
0264 
0265 UniDir = find (R <= ProbUniDirectional);
0266 R = rand (1, length(UniDir));
0267 oneway = UniDir(R <= 0.5);
0268 otherway = UniDir(R > 0.5);
0269 
0270 startVerticesNew = [startVertices(oneway) ...
0271     endVertices(otherway) ...
0272     startBiDir ...
0273     endBiDir];
0274 endVerticesNew = [endVertices(oneway) ...
0275     startVertices(otherway) ...
0276     endBiDir ...
0277     startBiDir];
0278 
0279 startVertices = startVerticesNew;
0280 endVertices = endVerticesNew;
0281 
0282 end % makeEdgesUniOrBi
0283 
0284 
0285 
0286 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
0287 % Generate duplicate edges
0288 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
0289 
0290 function [startVertices, endVertices] = ...
0291     genDuplicateEdges(parms, startVertices, endVertices)
0292 
0293 MaxParallelEdges = parms.MaxParallelEdges;
0294 
0295 offset = 0;
0296 nnew = length (startVertices);
0297 for dups = 2:MaxParallelEdges
0298     dupedges = find (rand (1, nnew) < ...
0299              (MaxParallelEdges-dups+1) / ...
0300              (MaxParallelEdges-dups+2));
0301     dupedges = dupedges + offset;
0302     startVertices = [startVertices startVertices(dupedges)];
0303     endVertices = [endVertices endVertices(dupedges)];
0304     offset = offset + nnew;
0305     nnew = length (dupedges);
0306 end
0307 
0308 end % genDuplicateEdges
0309 
0310 
0311 
0312 
0313 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
0314 % Generate edge labels (Integer only version)
0315 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
0316 
0317 function [startVertices, endVertices, intWeights] = ...
0318     genEdgeLabels(parms, startVertices, endVertices)
0319 
0320 MaxIntLabel = parms.MaxIntLabel;
0321 
0322 nedges = length(startVertices);
0323 intWeights = 1 + fix(MaxIntLabel*rand(1,nedges));
0324 
0325 end % genEdgeLabels
0326 
0327 
0328 
0329 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
0330 % Randomize vertex numbers and triple order
0331 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
0332 
0333 function [startVertices, endVertices, intWeights, vperm] = ...
0334     randomizeVerticesAndTriples(parms, ...
0335     startVertices, endVertices, intWeights)
0336 
0337 TotVertices = parms.TotVertices;
0338 
0339 nedges = length(startVertices);
0340 vperm = randperm(TotVertices);
0341 startVertices = vperm(startVertices);
0342 endVertices = vperm(endVertices);
0343 tperm = randperm(nedges);
0344 startVertices = startVertices(tperm);
0345 endVertices = endVertices(tperm);
0346 intWeights = intWeights(tperm);
0347 
0348 end % randomizeVerticesAndTriples