We show an implementation of vector dotproduct with both mapreduce and with a map followed by a reduce, to appreciate optimizations in the mapreduce skeleton.
// following define to enable/disable OpenMP implmentation to be used // #define SKEPU_OPENMP #define SKEPU_CUDA // following define to enable/disable OpenCL implmentation to be used //#define SKEPU_OPENCL // With OpenCL, following define to specify number of GPUs to be used. Specifying 0 means all available GPUs. Default is 1 GPU. /* #define SKEPU_NUMGPU 0 */ #include <iostream> #include <time.h> #include "skepu/vector.h" #include "skepu/mapreduce.h" #include "skepu/map.h" #include "skepu/reduce.h" // User-function used for mapping BINARY_FUNC(mult_f, float, a, b, return a*b; ) // User-function used for reduction BINARY_FUNC(plus_f, float, a, b, return a+b; ) #define N (1024*1024) //#define N 16 int main() { skepu::MapReduce<mult_f, plus_f> dotProduct(new mult_f, new plus_f); skepu::Map<mult_f> mapzip(new mult_f); skepu::Reduce<plus_f> redsum(new plus_f); skepu::Vector<float> v0(N, (float)2); skepu::Vector<float> v1(N, (float)5); skepu::Vector<float> vtemp(N, (float)0); // std::cout<<"v0: " <<v0 <<"\n"; // std::cout<<"v1: " <<v1 <<"\n"; std::cout << "Computing mapreduce version " << std::endl; #ifdef SKEPU_OPENMP clock_t t0, t1; t0 = clock(); #endif #ifdef SKEPU_CUDA cudaEvent_t t0, t1; cudaEventCreate(&t0); cudaEventCreate(&t1); cudaEventRecord(t0,0); #endif float r = dotProduct(v0, v1); #ifdef SKEPU_OPENMP t1 = clock(); float elapsed; elapsed = ((float) (t1-t0))* 1000.0; elapsed = elapsed / CLOCKS_PER_SEC; std::cout << "Elapsed time (OpenMP) " << elapsed << " msecs" << std::endl; #endif #ifdef SKEPU_CUDA float elapsed; cudaEventRecord(t1,0); cudaEventSynchronize(t1); cudaEventElapsedTime(&elapsed,t0,t1); std::cout << "Elapsed time (CUDA) " << elapsed << " msecs" << std::endl; #endif std::cout << "Computed " << r << std::endl; // now computing map followed by a reduce std::cout << "Computing map + reduce version " << std::endl; #ifdef SKEPU_OPENMP t0 = clock(); #endif #ifdef SKEPU_CUDA cudaEventCreate(&t0); cudaEventCreate(&t1); cudaEventRecord(t0,0); #endif mapzip(v0,v1,vtemp); r = redsum(vtemp); #ifdef SKEPU_OPENMP t1 = clock(); elapsed = ((float) (t1-t0))* 1000.0; elapsed = elapsed / CLOCKS_PER_SEC; std::cout << "Elapsed time (OpenMP) " << elapsed << " msecs" << std::endl; #endif #ifdef SKEPU_CUDA cudaEventRecord(t1,0); cudaEventSynchronize(t1); cudaEventElapsedTime(&elapsed,t0,t1); std::cout << "Elapsed time (CUDA) " << elapsed << " msecs" << std::endl; #endif std::cout << "Computed " << r << std::endl;; // std::cout<<"r: " <<r <<"\n"; return 0; }