cs205-lecture-examples

Example codes used during Harvard CS205 lectures
git clone https://git.0xfab.ch/cs205-lecture-examples.git
Log | Files | Refs | README | LICENSE

saxpy_SSE.c (337B)


      1 #include <x86intrin.h>
      2 
      3 void saxpy_SSE(float *x, float *y, float a, size_t n)
      4 {
      5     const __m128 a4 = _mm_set1_ps(a);
      6     // assumes n % 4 == 0
      7     for (size_t i = 0; i < n; i += 4) {
      8         __m128 r0 = _mm_load_ps(x + i);
      9         __m128 r1 = _mm_load_ps(y + i);
     10         _mm_store_ps(y + i, _mm_add_ps(_mm_mul_ps(r0, a4), r1));
     11     }
     12 }