cs205-lecture-examples

Example codes used during Harvard CS205 lectures
git clone https://git.0xfab.ch/cs205-lecture-examples.git
Log | Files | Refs | README | LICENSE

common.c (10948B)


      1 /****************************************************************************
      2 *                                                                           *
      3 *             OpenMP MicroBenchmark Suite - Version 3.1                     *
      4 *                                                                           *
      5 *                            produced by                                    *
      6 *                                                                           *
      7 *             Mark Bull, Fiona Reid and Nix Mc Donnell                      *
      8 *                                                                           *
      9 *                                at                                         *
     10 *                                                                           *
     11 *                Edinburgh Parallel Computing Centre                        *
     12 *                                                                           *
     13 *         email: markb@epcc.ed.ac.uk or fiona@epcc.ed.ac.uk                 *
     14 *                                                                           *
     15 *                                                                           *
     16 *      This version copyright (c) The University of Edinburgh, 2015.        *
     17 *                                                                           *
     18 *                                                                           *
     19 *  Licensed under the Apache License, Version 2.0 (the "License");          *
     20 *  you may not use this file except in compliance with the License.         *
     21 *  You may obtain a copy of the License at                                  *
     22 *                                                                           *
     23 *      http://www.apache.org/licenses/LICENSE-2.0                           *
     24 *                                                                           *
     25 *  Unless required by applicable law or agreed to in writing, software      *
     26 *  distributed under the License is distributed on an "AS IS" BASIS,        *
     27 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. *
     28 *  See the License for the specific language governing permissions and      *
     29 *  limitations under the License.                                           *
     30 *                                                                           *
     31 ****************************************************************************/
     32 
     33 #include <stdlib.h>
     34 #include <stdio.h>
     35 #include <string.h>
     36 #include <math.h>
     37 #include <omp.h>
     38 
     39 #include "common.h"
     40 
     41 #define CONF95 1.96
     42 
     43 int nthreads = -1;           // Number of OpenMP threads
     44 int delaylength = -1;        // The number of iterations to delay for
     45 int outerreps = -1;          // Outer repetitions
     46 double delaytime = -1.0;     // Length of time to delay for in microseconds
     47 double targettesttime = 0.0; // The length of time in microseconds that the test
     48                              // should run for.
     49 unsigned long innerreps; // Inner repetitions
     50 double *times;           // Array of doubles storing the benchmark times in microseconds
     51 double referencetime;    // The average reference time in microseconds to perform
     52 			 // outerreps runs
     53 double referencesd;      // The standard deviation in the reference time in
     54 			 // microseconds for outerreps runs.
     55 double testtime;         // The average test time in microseconds for
     56 			 // outerreps runs
     57 double testsd;		 // The standard deviation in the test time in
     58 			 // microseconds for outerreps runs.
     59 
     60 void usage(char *argv[]) {
     61     printf("Usage: %s.x \n"
     62 	   "\t--outer-repetitions <outer-repetitions> (default %d)\n"
     63 	   "\t--test-time <target-test-time> (default %0.2f microseconds)\n"
     64 	   "\t--delay-time <delay-time> (default %0.4f microseconds)\n"
     65 	   "\t--delay-length <delay-length> "
     66 	   "(default auto-generated based on processor speed)\n",
     67 	   argv[0],
     68 	   DEFAULT_OUTER_REPS, DEFAULT_TEST_TARGET_TIME, DEFAULT_DELAY_TIME);
     69 }
     70 
     71 void parse_args(int argc, char *argv[]) {
     72     // Parse the parameters
     73     int arg;
     74     for (arg = 1; arg < argc; arg++) {
     75 	if (strcmp(argv[arg], "--delay-time") == 0.0) {
     76 	    delaytime = atof(argv[++arg]);
     77 	    if (delaytime == 0.0) {
     78 		printf("Invalid float:--delay-time: %s\n", argv[arg]);
     79 		usage(argv);
     80 		exit(EXIT_FAILURE);
     81 	    }
     82 		
     83 	} else if (strcmp(argv[arg], "--outer-repetitions") == 0) {
     84 	    outerreps = atoi(argv[++arg]);
     85 	    if (outerreps == 0) {
     86 		printf("Invalid integer:--outer-repetitions: %s\n", argv[arg]);
     87 		usage(argv);
     88 		exit(EXIT_FAILURE);
     89 	    }
     90 		
     91 	} else if (strcmp(argv[arg], "--test-time") == 0) {
     92 	    targettesttime = atof(argv[++arg]);
     93 	    if (targettesttime == 0) {
     94 		printf("Invalid integer:--test-time: %s\n", argv[arg]);
     95 		usage(argv);
     96 		exit(EXIT_FAILURE);
     97 	    }
     98 		
     99 	} else if (strcmp(argv[arg], "-h") == 0) {
    100 	    usage(argv);
    101 	    exit(EXIT_SUCCESS);
    102 		
    103 	} else {
    104 	    printf("Invalid parameters: %s\n", argv[arg]);
    105 	    usage(argv);
    106 	    exit(EXIT_FAILURE);
    107 	}
    108     }
    109 }
    110 
    111 int getdelaylengthfromtime(double delaytime) {
    112     int i, reps;
    113     double lapsedtime, starttime; // seconds
    114 
    115     reps = 1000;
    116     lapsedtime = 0.0;
    117 
    118     delaytime = delaytime/1.0E6; // convert from microseconds to seconds
    119 
    120     // Note: delaytime is local to this function and thus the conversion
    121     // does not propagate to the main code. 
    122 
    123     // Here we want to use the delaytime in microseconds to find the 
    124     // delaylength in iterations. We start with delaylength=0 and 
    125     // increase until we get a large enough delaytime, return delaylength 
    126     // in iterations. 
    127 
    128     delaylength = 0;
    129     delay(delaylength);
    130 
    131     while (lapsedtime < delaytime) {
    132 	delaylength = delaylength * 1.1 + 1;
    133 	starttime = getclock();
    134 	for (i = 0; i < reps; i++) {
    135 	    delay(delaylength);
    136 	}
    137 	lapsedtime = (getclock() - starttime) / (double) reps;
    138     }
    139     return delaylength;
    140 
    141 }
    142 
    143 unsigned long getinnerreps(void (*test)(void)) {
    144     innerreps = 10L;  // some initial value
    145     double time = 0.0;
    146 
    147     while (time < targettesttime) {
    148 	double start  = getclock();
    149 	test();
    150 	time = (getclock() - start) * 1.0e6;
    151 	innerreps *=2;
    152 
    153 	// Test to stop code if compiler is optimising reference time expressions away
    154 	if (innerreps > (targettesttime*1.0e15)) {
    155 	    printf("Compiler has optimised reference loop away, STOP! \n");
    156 	    printf("Try recompiling with lower optimisation level \n");
    157 	    exit(1);
    158 	}
    159     }
    160     return innerreps;
    161 }
    162 
    163 void printheader(char *name) {
    164     printf("\n");
    165     printf("--------------------------------------------------------\n");
    166     printf("Computing %s time using %lu reps\n", name, innerreps);
    167 }
    168 
    169 void stats(double *mtp, double *sdp) {
    170 
    171     double meantime, totaltime, sumsq, mintime, maxtime, sd, cutoff;
    172 
    173     int i, nr;
    174 
    175     mintime = 1.0e10;
    176     maxtime = 0.;
    177     totaltime = 0.;
    178 
    179     for (i = 1; i <= outerreps; i++) {
    180 	mintime = (mintime < times[i]) ? mintime : times[i];
    181 	maxtime = (maxtime > times[i]) ? maxtime : times[i];
    182 	totaltime += times[i];
    183     }
    184 
    185     meantime = totaltime / outerreps;
    186     sumsq = 0;
    187 
    188     for (i = 1; i <= outerreps; i++) {
    189 	sumsq += (times[i] - meantime) * (times[i] - meantime);
    190     }
    191     sd = sqrt(sumsq / (outerreps - 1));
    192 
    193     cutoff = 3.0 * sd;
    194 
    195     nr = 0;
    196 
    197     for (i = 1; i <= outerreps; i++) {
    198 	if (fabs(times[i] - meantime) > cutoff)
    199 	    nr++;
    200     }
    201 
    202     printf("\n");
    203     printf("Sample_size       Average     Min         Max          S.D.          Outliers\n");
    204     printf(" %d                %f   %f   %f    %f      %d\n",
    205 	   outerreps, meantime, mintime, maxtime, sd, nr);
    206     printf("\n");
    207 
    208     *mtp = meantime;
    209     *sdp = sd;
    210 
    211 }
    212 
    213 void printfooter(char *name, double testtime, double testsd,
    214 		 double referencetime, double refsd) {
    215     printf("%s time     = %f microseconds +/- %f\n",
    216 	   name, testtime, CONF95*testsd);
    217     printf("%s overhead = %f microseconds +/- %f\n",
    218 	   name, testtime-referencetime, CONF95*(testsd+referencesd));
    219 
    220 }
    221 
    222 void printreferencefooter(char *name, double referencetime, double referencesd) {
    223     printf("%s time     = %f microseconds +/- %f\n",
    224 	   name, referencetime, CONF95 * referencesd);
    225 }
    226 
    227 void init(int argc, char **argv)
    228 {
    229 #pragma omp parallel
    230     {
    231 #pragma omp master
    232 	{
    233 	    nthreads = omp_get_num_threads();
    234 	}
    235 
    236     }
    237 
    238     parse_args(argc, argv);
    239 
    240     if (outerreps == -1) {
    241 	outerreps = DEFAULT_OUTER_REPS;
    242     }
    243     if (targettesttime == 0.0) {
    244 	targettesttime = DEFAULT_TEST_TARGET_TIME;
    245     }
    246     if (delaytime == -1.0) {
    247 	delaytime = DEFAULT_DELAY_TIME; 
    248     }
    249     delaylength = getdelaylengthfromtime(delaytime); // Always need to compute delaylength in iterations 
    250     
    251     times = malloc((outerreps+1) * sizeof(double));
    252 
    253     printf("Running OpenMP benchmark version 3.0\n"
    254 	   "\t%d thread(s)\n"
    255 	   "\t%d outer repetitions\n"
    256 	   "\t%0.2f test time (microseconds)\n"
    257 	   "\t%d delay length (iterations) \n"
    258 	   "\t%f delay time (microseconds)\n",
    259 	   nthreads,
    260 	   outerreps, targettesttime,
    261 	   delaylength, delaytime);
    262 }
    263 
    264 void finalise(void) {
    265     free(times);
    266 
    267 }
    268 
    269 void initreference(char *name) {
    270     printheader(name);
    271 
    272 }
    273 
    274 /* Calculate the reference time. */
    275 void reference(char *name, void (*refer)(void)) {
    276     int k;
    277     double start;
    278 
    279     // Calculate the required number of innerreps
    280     innerreps = getinnerreps(refer);
    281 
    282     initreference(name);
    283 
    284     for (k = 0; k <= outerreps; k++) {
    285 	start = getclock();
    286 	refer();
    287 	times[k] = (getclock() - start) * 1.0e6 / (double) innerreps;
    288     }
    289 
    290     finalisereference(name);
    291 
    292 }
    293 
    294 void finalisereference(char *name) {
    295     stats(&referencetime, &referencesd);
    296     printreferencefooter(name, referencetime, referencesd);
    297 
    298 }
    299 
    300 void intitest(char *name) {
    301     printheader(name);
    302 
    303 }
    304 
    305 void finalisetest(char *name) {
    306     stats(&testtime, &testsd);
    307     printfooter(name, testtime, testsd, referencetime, referencesd);
    308 
    309 }
    310 
    311 /* Function to run a microbenchmark test*/
    312 void benchmark(char *name, void (*test)(void))
    313 {
    314     int k;
    315     double start;
    316 
    317     // Calculate the required number of innerreps
    318     innerreps = getinnerreps(test);
    319 
    320     intitest(name);
    321 
    322     for (k=0; k<=outerreps; k++) {
    323 	start = getclock();
    324 	test();
    325 	times[k] = (getclock() - start) * 1.0e6 / (double) innerreps;
    326     }
    327 
    328     finalisetest(name);
    329 
    330 }
    331 
    332 // For the Cray compiler on HECToR we need to turn off optimisation 
    333 // for the delay and array_delay functions. Other compilers should
    334 // not be afffected. 
    335 #pragma _CRI noopt
    336 void delay(int delaylength) {
    337 
    338     int i;
    339     float a = 0.;
    340 
    341     for (i = 0; i < delaylength; i++)
    342 	a += i;
    343     if (a < 0)
    344 	printf("%f \n", a);
    345 
    346 }
    347 
    348 void array_delay(int delaylength, double a[1]) {
    349 
    350     int i;
    351     a[0] = 1.0;
    352     for (i = 0; i < delaylength; i++)
    353 	a[0] += i;
    354     if (a[0] < 0)
    355 	printf("%f \n", a[0]);
    356 
    357 }
    358 // Re-enable optimisation for remainder of source. 
    359 #pragma _CRI opt
    360 
    361 double getclock() {
    362     double time;
    363     // Returns a value in seconds of the time elapsed from some arbitrary,
    364     // but consistent point.
    365     double omp_get_wtime(void);
    366     time = omp_get_wtime();
    367     return time;
    368 }
    369 
    370 int returnfalse() {
    371     return 0;
    372 
    373 }
    374