SKEPU mapreduce code sample

We show an implementation of vector dotproduct with both mapreduce and with a map followed by a reduce, to appreciate optimizations in the mapreduce skeleton.

dotproduct.cpp
// following define to enable/disable OpenMP implmentation to be used
// #define SKEPU_OPENMP 
#define SKEPU_CUDA
 
// following define to enable/disable OpenCL implmentation to be used
//#define SKEPU_OPENCL
// With OpenCL, following define to specify number of GPUs to be used. Specifying 0 means all available GPUs. Default is 1 GPU.
/* #define SKEPU_NUMGPU 0 */
 
#include <iostream>
#include <time.h>
 
#include "skepu/vector.h"
#include "skepu/mapreduce.h"
#include "skepu/map.h"
#include "skepu/reduce.h"
 
// User-function used for mapping
BINARY_FUNC(mult_f, float, a, b,
	    return a*b;
	    )
 
// User-function used for reduction
BINARY_FUNC(plus_f, float, a, b,
	    return a+b;
	    )
 
#define N (1024*1024)
//#define N 16
 
int main()
{
  skepu::MapReduce<mult_f, plus_f> dotProduct(new mult_f, new plus_f);
  skepu::Map<mult_f> mapzip(new mult_f);
  skepu::Reduce<plus_f> redsum(new plus_f);
 
  skepu::Vector<float> v0(N, (float)2);
  skepu::Vector<float> v1(N, (float)5);
  skepu::Vector<float> vtemp(N, (float)0);
 
  // std::cout<<"v0: " <<v0 <<"\n";
  // std::cout<<"v1: " <<v1 <<"\n";
  std::cout << "Computing mapreduce version " << std::endl; 
#ifdef SKEPU_OPENMP
  clock_t t0, t1; 
  t0 = clock();
#endif
#ifdef SKEPU_CUDA
  cudaEvent_t t0, t1; 
  cudaEventCreate(&t0);
  cudaEventCreate(&t1);
  cudaEventRecord(t0,0);
#endif
  float r = dotProduct(v0, v1);
#ifdef SKEPU_OPENMP
  t1 = clock();
  float elapsed; 
  elapsed = ((float) (t1-t0))* 1000.0;
  elapsed = elapsed / CLOCKS_PER_SEC; 
  std::cout << "Elapsed time (OpenMP) " << elapsed << " msecs"  << std::endl;
#endif
#ifdef SKEPU_CUDA
  float elapsed;
  cudaEventRecord(t1,0);
  cudaEventSynchronize(t1);
  cudaEventElapsedTime(&elapsed,t0,t1);
  std::cout << "Elapsed time (CUDA) " << elapsed << " msecs" << std::endl;
#endif
  std::cout << "Computed " << r << std::endl;
 
 
  // now computing map followed by a reduce
 
  std::cout << "Computing map + reduce version " << std::endl; 
#ifdef SKEPU_OPENMP
  t0 = clock();
#endif
#ifdef SKEPU_CUDA
  cudaEventCreate(&t0);
  cudaEventCreate(&t1);
  cudaEventRecord(t0,0);
#endif
  mapzip(v0,v1,vtemp);
  r = redsum(vtemp); 
#ifdef SKEPU_OPENMP
  t1 = clock();
  elapsed = ((float) (t1-t0))* 1000.0;
  elapsed = elapsed / CLOCKS_PER_SEC; 
  std::cout << "Elapsed time (OpenMP) " << elapsed << " msecs"  << std::endl;
#endif
#ifdef SKEPU_CUDA
  cudaEventRecord(t1,0);
  cudaEventSynchronize(t1);
  cudaEventElapsedTime(&elapsed,t0,t1);
  std::cout << "Elapsed time (CUDA) " << elapsed << " msecs" << std::endl;
#endif
  std::cout << "Computed " << r << std::endl;;
 
  // std::cout<<"r: " <<r <<"\n";
 
  return 0;
}