common.c (10948B)
1 /**************************************************************************** 2 * * 3 * OpenMP MicroBenchmark Suite - Version 3.1 * 4 * * 5 * produced by * 6 * * 7 * Mark Bull, Fiona Reid and Nix Mc Donnell * 8 * * 9 * at * 10 * * 11 * Edinburgh Parallel Computing Centre * 12 * * 13 * email: markb@epcc.ed.ac.uk or fiona@epcc.ed.ac.uk * 14 * * 15 * * 16 * This version copyright (c) The University of Edinburgh, 2015. * 17 * * 18 * * 19 * Licensed under the Apache License, Version 2.0 (the "License"); * 20 * you may not use this file except in compliance with the License. * 21 * You may obtain a copy of the License at * 22 * * 23 * http://www.apache.org/licenses/LICENSE-2.0 * 24 * * 25 * Unless required by applicable law or agreed to in writing, software * 26 * distributed under the License is distributed on an "AS IS" BASIS, * 27 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * 28 * See the License for the specific language governing permissions and * 29 * limitations under the License. * 30 * * 31 ****************************************************************************/ 32 33 #include <stdlib.h> 34 #include <stdio.h> 35 #include <string.h> 36 #include <math.h> 37 #include <omp.h> 38 39 #include "common.h" 40 41 #define CONF95 1.96 42 43 int nthreads = -1; // Number of OpenMP threads 44 int delaylength = -1; // The number of iterations to delay for 45 int outerreps = -1; // Outer repetitions 46 double delaytime = -1.0; // Length of time to delay for in microseconds 47 double targettesttime = 0.0; // The length of time in microseconds that the test 48 // should run for. 49 unsigned long innerreps; // Inner repetitions 50 double *times; // Array of doubles storing the benchmark times in microseconds 51 double referencetime; // The average reference time in microseconds to perform 52 // outerreps runs 53 double referencesd; // The standard deviation in the reference time in 54 // microseconds for outerreps runs. 55 double testtime; // The average test time in microseconds for 56 // outerreps runs 57 double testsd; // The standard deviation in the test time in 58 // microseconds for outerreps runs. 59 60 void usage(char *argv[]) { 61 printf("Usage: %s.x \n" 62 "\t--outer-repetitions <outer-repetitions> (default %d)\n" 63 "\t--test-time <target-test-time> (default %0.2f microseconds)\n" 64 "\t--delay-time <delay-time> (default %0.4f microseconds)\n" 65 "\t--delay-length <delay-length> " 66 "(default auto-generated based on processor speed)\n", 67 argv[0], 68 DEFAULT_OUTER_REPS, DEFAULT_TEST_TARGET_TIME, DEFAULT_DELAY_TIME); 69 } 70 71 void parse_args(int argc, char *argv[]) { 72 // Parse the parameters 73 int arg; 74 for (arg = 1; arg < argc; arg++) { 75 if (strcmp(argv[arg], "--delay-time") == 0.0) { 76 delaytime = atof(argv[++arg]); 77 if (delaytime == 0.0) { 78 printf("Invalid float:--delay-time: %s\n", argv[arg]); 79 usage(argv); 80 exit(EXIT_FAILURE); 81 } 82 83 } else if (strcmp(argv[arg], "--outer-repetitions") == 0) { 84 outerreps = atoi(argv[++arg]); 85 if (outerreps == 0) { 86 printf("Invalid integer:--outer-repetitions: %s\n", argv[arg]); 87 usage(argv); 88 exit(EXIT_FAILURE); 89 } 90 91 } else if (strcmp(argv[arg], "--test-time") == 0) { 92 targettesttime = atof(argv[++arg]); 93 if (targettesttime == 0) { 94 printf("Invalid integer:--test-time: %s\n", argv[arg]); 95 usage(argv); 96 exit(EXIT_FAILURE); 97 } 98 99 } else if (strcmp(argv[arg], "-h") == 0) { 100 usage(argv); 101 exit(EXIT_SUCCESS); 102 103 } else { 104 printf("Invalid parameters: %s\n", argv[arg]); 105 usage(argv); 106 exit(EXIT_FAILURE); 107 } 108 } 109 } 110 111 int getdelaylengthfromtime(double delaytime) { 112 int i, reps; 113 double lapsedtime, starttime; // seconds 114 115 reps = 1000; 116 lapsedtime = 0.0; 117 118 delaytime = delaytime/1.0E6; // convert from microseconds to seconds 119 120 // Note: delaytime is local to this function and thus the conversion 121 // does not propagate to the main code. 122 123 // Here we want to use the delaytime in microseconds to find the 124 // delaylength in iterations. We start with delaylength=0 and 125 // increase until we get a large enough delaytime, return delaylength 126 // in iterations. 127 128 delaylength = 0; 129 delay(delaylength); 130 131 while (lapsedtime < delaytime) { 132 delaylength = delaylength * 1.1 + 1; 133 starttime = getclock(); 134 for (i = 0; i < reps; i++) { 135 delay(delaylength); 136 } 137 lapsedtime = (getclock() - starttime) / (double) reps; 138 } 139 return delaylength; 140 141 } 142 143 unsigned long getinnerreps(void (*test)(void)) { 144 innerreps = 10L; // some initial value 145 double time = 0.0; 146 147 while (time < targettesttime) { 148 double start = getclock(); 149 test(); 150 time = (getclock() - start) * 1.0e6; 151 innerreps *=2; 152 153 // Test to stop code if compiler is optimising reference time expressions away 154 if (innerreps > (targettesttime*1.0e15)) { 155 printf("Compiler has optimised reference loop away, STOP! \n"); 156 printf("Try recompiling with lower optimisation level \n"); 157 exit(1); 158 } 159 } 160 return innerreps; 161 } 162 163 void printheader(char *name) { 164 printf("\n"); 165 printf("--------------------------------------------------------\n"); 166 printf("Computing %s time using %lu reps\n", name, innerreps); 167 } 168 169 void stats(double *mtp, double *sdp) { 170 171 double meantime, totaltime, sumsq, mintime, maxtime, sd, cutoff; 172 173 int i, nr; 174 175 mintime = 1.0e10; 176 maxtime = 0.; 177 totaltime = 0.; 178 179 for (i = 1; i <= outerreps; i++) { 180 mintime = (mintime < times[i]) ? mintime : times[i]; 181 maxtime = (maxtime > times[i]) ? maxtime : times[i]; 182 totaltime += times[i]; 183 } 184 185 meantime = totaltime / outerreps; 186 sumsq = 0; 187 188 for (i = 1; i <= outerreps; i++) { 189 sumsq += (times[i] - meantime) * (times[i] - meantime); 190 } 191 sd = sqrt(sumsq / (outerreps - 1)); 192 193 cutoff = 3.0 * sd; 194 195 nr = 0; 196 197 for (i = 1; i <= outerreps; i++) { 198 if (fabs(times[i] - meantime) > cutoff) 199 nr++; 200 } 201 202 printf("\n"); 203 printf("Sample_size Average Min Max S.D. Outliers\n"); 204 printf(" %d %f %f %f %f %d\n", 205 outerreps, meantime, mintime, maxtime, sd, nr); 206 printf("\n"); 207 208 *mtp = meantime; 209 *sdp = sd; 210 211 } 212 213 void printfooter(char *name, double testtime, double testsd, 214 double referencetime, double refsd) { 215 printf("%s time = %f microseconds +/- %f\n", 216 name, testtime, CONF95*testsd); 217 printf("%s overhead = %f microseconds +/- %f\n", 218 name, testtime-referencetime, CONF95*(testsd+referencesd)); 219 220 } 221 222 void printreferencefooter(char *name, double referencetime, double referencesd) { 223 printf("%s time = %f microseconds +/- %f\n", 224 name, referencetime, CONF95 * referencesd); 225 } 226 227 void init(int argc, char **argv) 228 { 229 #pragma omp parallel 230 { 231 #pragma omp master 232 { 233 nthreads = omp_get_num_threads(); 234 } 235 236 } 237 238 parse_args(argc, argv); 239 240 if (outerreps == -1) { 241 outerreps = DEFAULT_OUTER_REPS; 242 } 243 if (targettesttime == 0.0) { 244 targettesttime = DEFAULT_TEST_TARGET_TIME; 245 } 246 if (delaytime == -1.0) { 247 delaytime = DEFAULT_DELAY_TIME; 248 } 249 delaylength = getdelaylengthfromtime(delaytime); // Always need to compute delaylength in iterations 250 251 times = malloc((outerreps+1) * sizeof(double)); 252 253 printf("Running OpenMP benchmark version 3.0\n" 254 "\t%d thread(s)\n" 255 "\t%d outer repetitions\n" 256 "\t%0.2f test time (microseconds)\n" 257 "\t%d delay length (iterations) \n" 258 "\t%f delay time (microseconds)\n", 259 nthreads, 260 outerreps, targettesttime, 261 delaylength, delaytime); 262 } 263 264 void finalise(void) { 265 free(times); 266 267 } 268 269 void initreference(char *name) { 270 printheader(name); 271 272 } 273 274 /* Calculate the reference time. */ 275 void reference(char *name, void (*refer)(void)) { 276 int k; 277 double start; 278 279 // Calculate the required number of innerreps 280 innerreps = getinnerreps(refer); 281 282 initreference(name); 283 284 for (k = 0; k <= outerreps; k++) { 285 start = getclock(); 286 refer(); 287 times[k] = (getclock() - start) * 1.0e6 / (double) innerreps; 288 } 289 290 finalisereference(name); 291 292 } 293 294 void finalisereference(char *name) { 295 stats(&referencetime, &referencesd); 296 printreferencefooter(name, referencetime, referencesd); 297 298 } 299 300 void intitest(char *name) { 301 printheader(name); 302 303 } 304 305 void finalisetest(char *name) { 306 stats(&testtime, &testsd); 307 printfooter(name, testtime, testsd, referencetime, referencesd); 308 309 } 310 311 /* Function to run a microbenchmark test*/ 312 void benchmark(char *name, void (*test)(void)) 313 { 314 int k; 315 double start; 316 317 // Calculate the required number of innerreps 318 innerreps = getinnerreps(test); 319 320 intitest(name); 321 322 for (k=0; k<=outerreps; k++) { 323 start = getclock(); 324 test(); 325 times[k] = (getclock() - start) * 1.0e6 / (double) innerreps; 326 } 327 328 finalisetest(name); 329 330 } 331 332 // For the Cray compiler on HECToR we need to turn off optimisation 333 // for the delay and array_delay functions. Other compilers should 334 // not be afffected. 335 #pragma _CRI noopt 336 void delay(int delaylength) { 337 338 int i; 339 float a = 0.; 340 341 for (i = 0; i < delaylength; i++) 342 a += i; 343 if (a < 0) 344 printf("%f \n", a); 345 346 } 347 348 void array_delay(int delaylength, double a[1]) { 349 350 int i; 351 a[0] = 1.0; 352 for (i = 0; i < delaylength; i++) 353 a[0] += i; 354 if (a[0] < 0) 355 printf("%f \n", a[0]); 356 357 } 358 // Re-enable optimisation for remainder of source. 359 #pragma _CRI opt 360 361 double getclock() { 362 double time; 363 // Returns a value in seconds of the time elapsed from some arbitrary, 364 // but consistent point. 365 double omp_get_wtime(void); 366 time = omp_get_wtime(); 367 return time; 368 } 369 370 int returnfalse() { 371 return 0; 372 373 } 374