test.cpp (1854B)
1 #include "papi.h" 2 #include <iostream> 3 4 // Broadwell CPU on cluster, you can get one with 5 // salloc -N1 -c32 -t 01:00:00 6 // 7 // Model name: Intel(R) Xeon(R) CPU E5-2683 v4 @ 2.10GHz 8 // L1d cache: 32K 9 // L1i cache: 32K 10 // L2 cache: 256K 11 // L3 cache: 40960K 12 #define L1_SIZE_KB 32 13 #define L2_SIZE_KB 256 14 #define L3_SIZE_KB 40960 15 16 void test(char *A); 17 18 int main() 19 { 20 char A[100] = {0}; 21 22 // Initialize PAPI 23 int event_set = PAPI_NULL; 24 int events[4] = {PAPI_LST_INS, PAPI_L1_DCM, PAPI_L2_DCM, PAPI_L3_DCA}; 25 long long int counters[4]; 26 PAPI_library_init(PAPI_VER_CURRENT); 27 PAPI_create_eventset(&event_set); 28 PAPI_add_events(event_set, events, 4); 29 30 // start PAPI measurement 31 PAPI_start(event_set); 32 33 // run code to be measured 34 test(A); 35 36 // stop PAPI and get counter values 37 PAPI_stop(event_set, counters); 38 39 // clang-format off 40 const long long total_lst = counters[0]; // total load/store 41 const long long total_l1m = counters[1]; // total L1 load misses 42 const long long total_l2m = counters[2]; // total L2 load misses 43 const long long total_l3m = counters[3]; // total L3 load misses 44 // clang-format on 45 46 int sum = 0; 47 for (int i = 0; i < 100; ++i) { 48 sum += A[i]; 49 } 50 51 std::cout << "Result: " << sum << '\n'; 52 std::cout << "L1 cache size: " << L1_SIZE_KB << " KB\n"; 53 std::cout << "L2 cache size: " << L2_SIZE_KB << " KB\n"; 54 std::cout << "L3 cache size: " << L3_SIZE_KB << " KB\n"; 55 std::cout << "Total L1 data misses: " << total_l1m << '\n'; 56 std::cout << "Total L2 data misses: " << total_l2m << '\n'; 57 std::cout << "Total L3 data accesses: " << total_l3m << '\n'; 58 std::cout << "Total load/store: " << total_lst << '\n'; 59 60 return 0; 61 }