/* * * $g++ -O3 -msse2 sseex1.c * $./a.out 51200 * */ #include #include #include void normal(float* a, int N){ for (int i = 0; i < N; ++i) a[i] = sqrt(a[i]); } void sse(float* a, int N){ int nb_iters = N / 4;//N is multiple of 4 __m128* ptr = (__m128*)a; for (int i = 0; i < nb_iters; ++i, ++ptr, a += 4) _mm_store_ps(a, _mm_sqrt_ps(*ptr)); } int main(int argc, char** argv){ if (argc != 2) return 1; int N = atoi(argv[1]); float* a; posix_memalign((void**)&a, 16, N * sizeof(float)); system("date"); for (int i = 0; i < N; ++i) a[i] = 3141592.65358; for (int i = 0; i < N; ++i) normal(a, N); system("date"); for (int i = 0; i < N; ++i) a[i] = 3141592.65358; for (int i = 0; i < N; ++i) sse(a, N); system("date"); }