#include #include #include #include #include #include #include #ifdef HAVE_OPENCV #include #include #endif #define LOOPS 100 #define RUNS 10 #undef _4BPP uint32_t *img; int width = 1024; int height = 768; int depth = 4; int K = 0x17, L = 0xe3, M = 0x7f, N = 0x48; static uint64_t gettime(void); static void measure(const char *, void(*)(void)); #ifdef HAVE_OPENCV static void display(const char *, void *, int, int, int); #endif static void set8(void); static void set32(void); static void mul8(void); static void mul32shift(void); static void mul32tab(void); static void mul32union(void); static void mul32cpy(void); static void mul32cpy2(void); static void comp8(void); static void comp32shift(void); int main(int argc, char *argv[]) { img = malloc(sizeof(char) * width * height * depth); measure("set8", set8); measure("set32", set32); measure("mul8", mul8); measure("mul32shift", mul32shift); measure("mul32tab", mul32tab); measure("mul32union", mul32union); measure("mul32cpy", mul32cpy); measure("mul32cpy2", mul32cpy2); measure("comp8", comp8); measure("comp32shift", comp32shift); #ifdef HAVE_OPENCV display("title" , img, width, height, depth); printf("\nFinish\n\tPress any key to quit...\n"); cvWaitKey(0); #endif free(img); return 0; } /* * Helper functions */ static uint64_t gettime(void) { struct timeval tv; gettimeofday(&tv, NULL); return (uint64_t)tv.tv_sec * 1000000 + (uint64_t)tv.tv_usec; } static void measure(const char *msg, void(*f)(void)) { uint64_t times[RUNS + 2]; uint64_t total = 0, tmin = (uint64_t)-1, tmax = 0; int i, j; fprintf(stderr, "%s... ", msg); for(i = RUNS + 2; i--;) { times[i] = gettime(); for(j = LOOPS; j--;) (*f)(); times[i] = gettime() - times[i]; } for(i = RUNS + 2; i--;) { if(times[i] > tmax) tmax = times[i]; if(times[i] < tmin) tmin = times[i]; total += times[i]; } fprintf(stderr, "%lf\n", (double)(total - tmin - tmax) / 1.0e6); } #ifdef HAVE_OPENCV static void display(const char *msg, void * img, int w, int h, int d) { IplImage *ipl = cvCreateImage(cvSize(w,h),8,d); cvSetData(ipl, img, w*d); cvNamedWindow(msg, 0); cvShowImage(msg, ipl); } #endif /* * The tests */ static void set8(void) { int x, y; for(y = 0; y < height; y++) { uint8_t *data = (uint8_t *)(img + y * width); for(x = 0; x < width ; x++) { *data++ = K; *data++ = L; *data++ = M; #ifdef _4BPP *data++ = N; #else data++; #endif } } } static void set32(void) { int x, y; for(y = 0; y < height; y++) { uint32_t *data = img + y * width; for(x = 0; x < width ; x++) { #ifdef _4BPP *data++ = K | (L << 8) | (M << 16) | (N << 24); #else *data++ = K | (L << 8) | (M << 16); #endif } } } static void mul8(void) { int x, y; for(y = 0; y < height; y++) { uint8_t *data = (uint8_t *)(img + y * width); for(x = 0; x < width ; x++) { *data++ *= K; *data++ *= L; *data++ *= M; #ifdef _4BPP *data++ *= N; #else data++; #endif } } } static void mul32shift(void) { int x, y; for(y = 0; y < height; y++) { uint32_t *data = img + y * width; for(x = 0; x < width ; x++) { uint32_t x = *data; #ifdef _4BPP uint8_t a = x >> 24; #endif uint8_t b = (uint8_t)(x >> 16); uint8_t c = (uint8_t)(x >> 8); uint8_t d = (uint8_t)x; #ifdef _4BPP a *= K; #endif b *= L; c *= M; d *= N; #ifdef _4BPP *data++ = ((uint32_t)a << 24) | ((uint32_t)b << 16) | ((uint16_t)c << 8) | d; #else *data++ = ((uint32_t)b << 16) | ((uint16_t)c << 8) | d; #endif } } } static void mul32tab(void) { int x, y; for(y = 0; y < height; y++) { uint32_t *data = img + y * width; for(x = 0; x < width ; x++) { uint32_t x = *data; uint8_t t[4]; #ifdef _4BPP t[0] = x >> 24; #endif t[1] = (uint8_t)(x >> 16); t[2] = (uint8_t)(x >> 8); t[3] = (uint8_t)x; #ifdef _4BPP t[0] *= K; #endif t[1] *= L; t[2] *= M; t[3] *= N; #ifdef _4BPP *data++ = ((uint32_t)t[0] << 24) | ((uint32_t)t[1] << 16) | ((uint16_t)t[2] << 8) | t[3]; #else *data++ = ((uint32_t)t[1] << 16) | ((uint16_t)t[2] << 8) | t[3]; #endif } } } static void mul32union(void) { int x, y; for(y = 0; y < height; y++) { uint32_t *data = img + y * width; for(x = 0; x < width ; x++) { union { uint32_t x; uint8_t t[4]; } u; u.x = *data; #ifdef _4BPP u.t[0] *= K; #endif u.t[1] *= L; u.t[2] *= M; u.t[3] *= N; *data++ = u.x; } } } static void mul32cpy(void) { int x, y; for(y = 0; y < height; y++) { uint32_t *data = img + y * width; for(x = 0; x < width ; x++) { uint8_t t[4]; memcpy(t, data, 4); #ifdef _4BPP t[0] *= K; #endif t[1] *= L; t[2] *= M; t[3] *= N; memcpy(data, t, 4); data++; } } } static void mul32cpy2(void) { int x, y; for(y = 0; y < height; y++) { uint32_t *data = img + y * width; for(x = 0; x < width ; x++) { uint8_t t[4]; uint32_t x; x = *data++; memcpy(t, &x, 4); #ifdef _4BPP t[0] *= K; #endif t[1] *= L; t[2] *= M; t[3] *= N; memcpy(&x, t, 4); *data++ = x; } } } static void comp8(void) { int x, y; for(y = 0; y < height; y++) { uint8_t *data = (uint8_t *)(img + y * width); for(x = 0; x < width ; x++) { uint8_t a; a = *data; *data++ = ((a * K) & (a + L)) ^ (M * N); a = *data; *data++ = ((a * L) & (a + M)) ^ (N * K); a = *data; *data++ = ((a * M) & (a + N)) ^ (K * L); #ifdef _4BPP a = *data; *data++ = ((a * N) & (a + K)) ^ (L * M); #else data++; #endif } } } static void comp32shift(void) { int x, y; for(y = 0; y < height; y++) { uint32_t *data = img + y * width; for(x = 0; x < width ; x++) { uint32_t x = *data; #ifdef _4BPP uint8_t a = x >> 24; #endif uint8_t b = (uint8_t)(x >> 16); uint8_t c = (uint8_t)(x >> 8); uint8_t d = (uint8_t)x; #ifdef _4BPP a = ((a * K) & (a + L)) ^ (M * N); #endif b = ((b * L) & (b + M)) ^ (N * K); c = ((c * M) & (c + N)) ^ (K * L); d = ((d * N) & (d + K)) ^ (L * M); #ifdef _4BPP *data++ = ((uint32_t)a << 24) | ((uint32_t)b << 16) | ((uint16_t)c << 8) | d; #else *data++ = ((uint32_t)b << 16) | ((uint16_t)c << 8) | d; #endif } } }