ViennaCL - The Vienna Computing Library  1.6.1
Free open-source GPU-accelerated linear algebra and solver library.
self_assign.cpp
Go to the documentation of this file.
1 /* =========================================================================
2  Copyright (c) 2010-2014, Institute for Microelectronics,
3  Institute for Analysis and Scientific Computing,
4  TU Wien.
5  Portions of this software are copyright by UChicago Argonne, LLC.
6 
7  -----------------
8  ViennaCL - The Vienna Computing Library
9  -----------------
10 
11  Project Head: Karl Rupp rupp@iue.tuwien.ac.at
12 
13  (A list of authors and contributors can be found in the PDF manual)
14 
15  License: MIT (X11), see file LICENSE in the base directory
16 ============================================================================= */
17 
18 
19 
24 //
25 // *** System
26 //
27 #include <iostream>
28 
29 
30 //
31 // *** ViennaCL
32 //
33 
34 //#define VIENNACL_DEBUG_ALL
35 
36 #include "viennacl/scalar.hpp"
37 #include "viennacl/vector.hpp"
39 #include "viennacl/matrix.hpp"
44 #include "viennacl/ell_matrix.hpp"
46 #include "viennacl/hyb_matrix.hpp"
47 #include "viennacl/linalg/prod.hpp"
49 #include "viennacl/linalg/ilu.hpp"
54 
55 //
56 // -------------------------------------------------------------
57 //
58 template<typename NumericT>
59 NumericT diff(NumericT const & s1, viennacl::scalar<NumericT> const & s2)
60 {
61  if (std::fabs(s1 - s2) > 0)
62  return (s1 - s2) / std::max(std::fabs(s1), std::fabs(s2));
63  return 0;
64 }
65 
66 template<typename NumericT>
67 NumericT diff(std::vector<NumericT> const & v1, viennacl::vector<NumericT> const & v2)
68 {
69  std::vector<NumericT> v2_cpu(v2.size());
71  viennacl::copy(v2.begin(), v2.end(), v2_cpu.begin());
72 
73  for (std::size_t i=0;i<v1.size(); ++i)
74  {
75  if ( std::max( std::fabs(v2_cpu[i]), std::fabs(v1[i]) ) > 0 )
76  v2_cpu[i] = std::fabs(v2_cpu[i] - v1[i]) / std::max( std::fabs(v2_cpu[i]), std::fabs(v1[i]) );
77  else
78  v2_cpu[i] = 0.0;
79 
80  if (v2_cpu[i] > 0.0001)
81  {
82  //std::cout << "Neighbor: " << i-1 << ": " << v1[i-1] << " vs. " << v2_cpu[i-1] << std::endl;
83  std::cout << "Error at entry " << i << ": " << v1[i] << " vs. " << v2[i] << std::endl;
84  //std::cout << "Neighbor: " << i+1 << ": " << v1[i+1] << " vs. " << v2_cpu[i+1] << std::endl;
85  exit(EXIT_FAILURE);
86  }
87  }
88 
89  NumericT inf_norm = 0;
90  for (std::size_t i=0;i<v2_cpu.size(); ++i)
91  inf_norm = std::max<NumericT>(inf_norm, std::fabs(v2_cpu[i]));
92 
93  return inf_norm;
94 }
95 
96 template<typename NumericT>
97 NumericT diff(std::vector<std::vector<NumericT> > const & A1, viennacl::matrix<NumericT> const & A2)
98 {
99  std::vector<NumericT> host_values(A2.internal_size());
100  for (std::size_t i=0; i<A2.size1(); ++i)
101  for (std::size_t j=0; j<A2.size2(); ++j)
102  host_values[i*A2.internal_size2() + j] = A1[i][j];
103 
104  std::vector<NumericT> device_values(A2.internal_size());
105  viennacl::fast_copy(A2, &device_values[0]);
106  viennacl::vector<NumericT> vcl_device_values(A2.internal_size()); // workaround to avoid code duplication
107  viennacl::copy(device_values, vcl_device_values);
108 
109  return diff(host_values, vcl_device_values);
110 }
111 
112 
113 template<typename HostContainerT, typename DeviceContainerT, typename NumericT>
114 void check(HostContainerT const & host_container, DeviceContainerT const & device_container,
115  std::string current_stage, NumericT epsilon)
116 {
117  current_stage.resize(25, ' ');
118  std::cout << "Testing operation: " << current_stage;
119  NumericT rel_error = std::fabs(diff(host_container, device_container));
120 
121  if (rel_error > epsilon)
122  {
123  std::cout << std::endl;
124  std::cout << "# Error at operation: " << current_stage << std::endl;
125  std::cout << " diff: " << rel_error << std::endl;
126  exit(EXIT_FAILURE);
127  }
128  std::cout << "PASS" << std::endl;
129 }
130 
131 
132 struct op_assign
133 {
134  template<typename LHS, typename RHS>
135  static void apply(LHS & lhs, RHS const & rhs) { lhs = rhs; }
136 
137  static std::string str() { return "="; }
138 };
139 
141 {
142  template<typename LHS, typename RHS>
143  static void apply(LHS & lhs, RHS const & rhs) { lhs += rhs; }
144 
145  static std::string str() { return "+="; }
146 };
147 
149 {
150  template<typename LHS, typename RHS>
151  static void apply(LHS & lhs, RHS const & rhs) { lhs -= rhs; }
152 
153  static std::string str() { return "-="; }
154 };
155 
156 
157 // compute C = A * B on host and device and compare results.
158 // Note that the reference uses three distinct matrices A, B, C,
159 // whereas C on the device is the same as either A, B, or both.
160 template<typename OpT, typename NumericT, typename HostMatrixT, typename DeviceMatrixT>
161 void test_gemm(NumericT epsilon,
162  HostMatrixT & host_A, HostMatrixT & host_B, HostMatrixT & host_C,
163  DeviceMatrixT & device_A, std::string name_A,
164  DeviceMatrixT & device_B, std::string name_B,
165  DeviceMatrixT & device_C, bool copy_from_A,
166  bool trans_first, bool trans_second)
167 {
168  for (std::size_t i = 0; i<host_A.size(); ++i)
169  for (std::size_t j = 0; j<host_A[i].size(); ++j)
170  {
171  host_A[i][j] = random<NumericT>();
172  host_B[i][j] = random<NumericT>();
173  }
174 
175  viennacl::copy(host_A, device_A);
176  viennacl::copy(host_B, device_B);
177  if (copy_from_A)
178  host_C = host_A;
179  else
180  host_C = host_B;
181 
182  for (std::size_t i = 0; i<host_A.size(); ++i)
183  for (std::size_t j = 0; j<host_A[i].size(); ++j)
184  {
185  NumericT tmp = 0;
186  for (std::size_t k = 0; k<host_A[i].size(); ++k)
187  tmp += (trans_first ? host_A[k][i] : host_A[i][k])
188  * (trans_second ? host_B[j][k] : host_B[k][j]);
189  OpT::apply(host_C[i][j], tmp);
190  }
191 
192  if (trans_first && trans_second)
193  {
194  OpT::apply(device_C, viennacl::linalg::prod(trans(device_A), trans(device_B)));
195  check(host_C, device_C, std::string("A ") + OpT::str() + std::string(" ") + name_A + std::string("^T*") + name_B + std::string("^T"), epsilon);
196  }
197  else if (trans_first && !trans_second)
198  {
199  OpT::apply(device_C, viennacl::linalg::prod(trans(device_A), device_B));
200  check(host_C, device_C, std::string("A ") + OpT::str() + std::string(" ") + name_A + std::string("^T*") + name_B + std::string(""), epsilon);
201  }
202  else if (!trans_first && trans_second)
203  {
204  OpT::apply(device_C, viennacl::linalg::prod(device_A, trans(device_B)));
205  check(host_C, device_C, std::string("A ") + OpT::str() + std::string(" ") + name_A + std::string("*") + name_B + std::string("^T"), epsilon);
206  }
207  else
208  {
209  OpT::apply(device_C, viennacl::linalg::prod(device_A, device_B));
210  check(host_C, device_C, std::string("A ") + OpT::str() + std::string(" ") + name_A + std::string("*") + name_B + std::string(""), epsilon);
211  }
212 }
213 
214 // dispatch routine for all combinations of transpositions:
215 // C = A * B, C = A * B^T, C = A^T * B, C = A^T * B^T
216 template<typename OpT, typename NumericT, typename HostMatrixT, typename DeviceMatrixT>
217 void test_gemm(NumericT epsilon,
218  HostMatrixT & host_A, HostMatrixT & host_B, HostMatrixT & host_C,
219  DeviceMatrixT & device_A, std::string name_A,
220  DeviceMatrixT & device_B, std::string name_B,
221  DeviceMatrixT & device_C, bool copy_from_A)
222 {
223  test_gemm<OpT>(epsilon, host_A, host_B, host_C, device_A, name_A, device_B, name_B, device_C, copy_from_A, false, false);
224  test_gemm<OpT>(epsilon, host_A, host_B, host_C, device_A, name_A, device_B, name_B, device_C, copy_from_A, false, true);
225  test_gemm<OpT>(epsilon, host_A, host_B, host_C, device_A, name_A, device_B, name_B, device_C, copy_from_A, true, false);
226  test_gemm<OpT>(epsilon, host_A, host_B, host_C, device_A, name_A, device_B, name_B, device_C, copy_from_A, true, true);
227 }
228 
229 // The actual testing routine.
230 // Sets of vectors and matrices using STL types and uses these for reference calculations.
231 // ViennaCL operations are carried out as usual and then compared against the reference.
232 template<typename NumericT>
233 int test(NumericT epsilon)
234 {
235  std::size_t N = 142; // should be larger than 128 in order to avoid false negatives due to blocking
236 
237  //
238  // Vector setup and test:
239  //
240 
241  std::vector<NumericT> std_x(N);
242  std::vector<NumericT> std_y(N);
243  std::vector<NumericT> std_z(N);
244 
245  for (std::size_t i=0; i<std_x.size(); ++i)
246  std_x[i] = NumericT(i + 1);
247  for (std::size_t i=0; i<std_y.size(); ++i)
248  std_y[i] = NumericT(i*i + 1);
249  for (std::size_t i=0; i<std_z.size(); ++i)
250  std_z[i] = NumericT(2 * i + 1);
251 
255 
256  viennacl::copy(std_x, vcl_x);
257  viennacl::copy(std_y, vcl_y);
258  viennacl::copy(std_z, vcl_z);
259 
260  // This shouldn't do anything bad:
261  vcl_x = vcl_x;
262  check(std_x, vcl_x, "x = x", epsilon);
263 
264  // This should work, even though we are dealing with the same buffer:
265  std_x[0] = std_x[2]; std_x[1] = std_x[3];
267  check(std_x, vcl_x, "x = x (range)", epsilon);
268 
269  //
270  // Matrix-Vector
271  //
272 
273  std::vector<std::vector<NumericT> > std_A(N, std::vector<NumericT>(N, NumericT(1)));
274  std::vector<std::vector<NumericT> > std_B(N, std::vector<NumericT>(N, NumericT(2)));
275  std::vector<std::vector<NumericT> > std_C(N, std::vector<NumericT>(N, NumericT(3)));
276 
280 
281  viennacl::copy(std_A, vcl_A);
282  viennacl::copy(std_B, vcl_B);
283  viennacl::copy(std_C, vcl_C);
284 
285  // This shouldn't do anything bad:
286  vcl_A = vcl_A;
287  check(std_A, vcl_A, "A = A", epsilon);
288 
289  // This should work, even though we are dealing with the same buffer:
290  std_A[0][0] = std_A[0][2]; std_A[0][1] = std_A[0][3];
292  check(std_A, vcl_A, "A = A (range)", epsilon);
293 
294  // check x <- A * x;
295  for (std::size_t i = 0; i<std_y.size(); ++i)
296  {
297  NumericT val = 0;
298  for (std::size_t j = 0; j<std_x.size(); ++j)
299  val += std_A[i][j] * std_x[j];
300  std_y[i] = val;
301  }
302  vcl_x = viennacl::linalg::prod(vcl_A, vcl_x);
303  check(std_y, vcl_x, "x = A*x", epsilon);
304 
305 
306  std::vector< std::map<unsigned int, NumericT> > std_Asparse(N);
307 
308  for (std::size_t i=0; i<std_Asparse.size(); ++i)
309  {
310  if (i > 0)
311  std_Asparse[i][i-1] = random<NumericT>();
312  std_Asparse[i][i] = NumericT(1) + random<NumericT>();
313  if (i < std_Asparse.size() - 1)
314  std_Asparse[i][i+1] = random<NumericT>();
315  }
316 
317  // Sparse
323 
324  viennacl::copy(std_Asparse, vcl_A_csr);
325  viennacl::copy(std_Asparse, vcl_A_coo);
326  viennacl::copy(std_Asparse, vcl_A_ell);
327  viennacl::copy(std_Asparse, vcl_A_sell);
328  viennacl::copy(std_Asparse, vcl_A_hyb);
329 
330  for (std::size_t i=0; i<std_Asparse.size(); ++i)
331  {
332  NumericT val = 0;
333  for (typename std::map<unsigned int, NumericT>::const_iterator it = std_Asparse[i].begin(); it != std_Asparse[i].end(); ++it)
334  val += it->second * std_x[it->first];
335  std_y[i] = val;
336  }
337 
338  viennacl::copy(std_x, vcl_x);
339  vcl_x = viennacl::linalg::prod(vcl_A_csr, vcl_x);
340  check(std_y, vcl_x, "x = A*x (sparse, csr)", epsilon);
341 
342  viennacl::copy(std_x, vcl_x);
343  vcl_x = viennacl::linalg::prod(vcl_A_coo, vcl_x);
344  check(std_y, vcl_x, "x = A*x (sparse, coo)", epsilon);
345 
346  viennacl::copy(std_x, vcl_x);
347  vcl_x = viennacl::linalg::prod(vcl_A_ell, vcl_x);
348  check(std_y, vcl_x, "x = A*x (sparse, ell)", epsilon);
349 
350  viennacl::copy(std_x, vcl_x);
351  vcl_x = viennacl::linalg::prod(vcl_A_sell, vcl_x);
352  check(std_y, vcl_x, "x = A*x (sparse, sell)", epsilon);
353 
354  viennacl::copy(std_x, vcl_x);
355  vcl_x = viennacl::linalg::prod(vcl_A_hyb, vcl_x);
356  check(std_y, vcl_x, "x = A*x (sparse, hyb)", epsilon);
357  std::cout << std::endl;
358 
359 
360  //
361  // Matrix-Matrix (dense times dense):
362  //
363  test_gemm<op_assign>(epsilon, std_A, std_B, std_C, vcl_A, "A", vcl_B, "B", vcl_A, true);
364  test_gemm<op_assign>(epsilon, std_B, std_A, std_C, vcl_B, "B", vcl_A, "A", vcl_A, false);
365  test_gemm<op_assign>(epsilon, std_A, std_A, std_C, vcl_A, "A", vcl_A, "A", vcl_A, true);
366  std::cout << std::endl;
367 
368  test_gemm<op_plus_assign>(epsilon, std_A, std_B, std_C, vcl_A, "A", vcl_B, "B", vcl_A, true);
369  test_gemm<op_plus_assign>(epsilon, std_B, std_A, std_C, vcl_B, "B", vcl_A, "A", vcl_A, false);
370  test_gemm<op_plus_assign>(epsilon, std_A, std_A, std_C, vcl_A, "A", vcl_A, "A", vcl_A, true);
371  std::cout << std::endl;
372 
373  test_gemm<op_minus_assign>(epsilon, std_A, std_B, std_C, vcl_A, "A", vcl_B, "B", vcl_A, true);
374  test_gemm<op_minus_assign>(epsilon, std_B, std_A, std_C, vcl_B, "B", vcl_A, "A", vcl_A, false);
375  test_gemm<op_minus_assign>(epsilon, std_A, std_A, std_C, vcl_A, "A", vcl_A, "A", vcl_A, true);
376  std::cout << std::endl;
377 
378 
379 
380  //
381  // Matrix-Matrix (sparse times dense)
382  //
383  // A = sparse * A
384  viennacl::copy(std_A, vcl_A);
385  for (std::size_t i = 0; i<std_A.size(); ++i)
386  for (std::size_t j = 0; j<std_A[i].size(); ++j)
387  {
388  NumericT tmp = 0;
389  for (std::size_t k = 0; k<std_A[i].size(); ++k)
390  tmp += std_Asparse[i][k] * std_A[k][j];
391  std_C[i][j] = tmp;
392  }
393 
394  viennacl::copy(std_A, vcl_A);
395  vcl_A = viennacl::linalg::prod(vcl_A_csr, vcl_A);
396  check(std_C, vcl_A, "A = csr*A", epsilon);
397 
398  viennacl::copy(std_A, vcl_A);
399  vcl_A = viennacl::linalg::prod(vcl_A_coo, vcl_A);
400  check(std_C, vcl_A, "A = coo*A", epsilon);
401 
402  viennacl::copy(std_A, vcl_A);
403  vcl_A = viennacl::linalg::prod(vcl_A_ell, vcl_A);
404  check(std_C, vcl_A, "A = ell*A", epsilon);
405 
406  viennacl::copy(std_A, vcl_A);
407  //vcl_A = viennacl::linalg::prod(vcl_A_sell, vcl_A);
408  //check(std_C, vcl_A, "A = sell*A", epsilon);
409 
410  viennacl::copy(std_A, vcl_A);
411  vcl_A = viennacl::linalg::prod(vcl_A_hyb, vcl_A);
412  check(std_C, vcl_A, "A = hyb*A", epsilon);
413 
414  // A = sparse * A^T
415  viennacl::copy(std_A, vcl_A);
416  for (std::size_t i = 0; i<std_A.size(); ++i)
417  for (std::size_t j = 0; j<std_A[i].size(); ++j)
418  {
419  NumericT tmp = 0;
420  for (std::size_t k = 0; k<std_A[i].size(); ++k)
421  tmp += std_Asparse[i][k] * std_A[j][k];
422  std_C[i][j] = tmp;
423  }
424 
425  viennacl::copy(std_A, vcl_A);
426  vcl_A = viennacl::linalg::prod(vcl_A_csr, trans(vcl_A));
427  check(std_C, vcl_A, "A = csr*A^T", epsilon);
428 
429  viennacl::copy(std_A, vcl_A);
430  vcl_A = viennacl::linalg::prod(vcl_A_coo, trans(vcl_A));
431  check(std_C, vcl_A, "A = coo*A^T", epsilon);
432 
433  viennacl::copy(std_A, vcl_A);
434  vcl_A = viennacl::linalg::prod(vcl_A_ell, trans(vcl_A));
435  check(std_C, vcl_A, "A = ell*A^T", epsilon);
436 
437  viennacl::copy(std_A, vcl_A);
438  //vcl_A = viennacl::linalg::prod(vcl_A_sell, trans(vcl_A));
439  //check(std_C, vcl_A, "A = sell*A^T", epsilon);
440 
441  viennacl::copy(std_A, vcl_A);
442  vcl_A = viennacl::linalg::prod(vcl_A_hyb, trans(vcl_A));
443  check(std_C, vcl_A, "A = hyb*A^T", epsilon);
444 
445  return EXIT_SUCCESS;
446 }
447 
448 
449 //
450 // -------------------------------------------------------------
451 //
452 int main()
453 {
454  std::cout << std::endl;
455  std::cout << "----------------------------------------------" << std::endl;
456  std::cout << "----------------------------------------------" << std::endl;
457  std::cout << "## Test :: Self-Assignment" << std::endl;
458  std::cout << "----------------------------------------------" << std::endl;
459  std::cout << "----------------------------------------------" << std::endl;
460  std::cout << std::endl;
461 
462  int retval = EXIT_SUCCESS;
463 
464  std::cout << std::endl;
465  std::cout << "----------------------------------------------" << std::endl;
466  std::cout << std::endl;
467  {
468  typedef float NumericT;
469  NumericT epsilon = static_cast<NumericT>(1E-4);
470  std::cout << "# Testing setup:" << std::endl;
471  std::cout << " eps: " << epsilon << std::endl;
472  std::cout << " numeric: float" << std::endl;
473  retval = test<NumericT>(epsilon);
474  if ( retval == EXIT_SUCCESS )
475  std::cout << "# Test passed" << std::endl;
476  else
477  return retval;
478  }
479  std::cout << std::endl;
480  std::cout << "----------------------------------------------" << std::endl;
481  std::cout << std::endl;
482 
483  // Note: No need for double precision check, self-assignments are handled in a numeric-type agnostic manner.
484 
485  std::cout << std::endl;
486  std::cout << "------- Test completed --------" << std::endl;
487  std::cout << std::endl;
488 
489  return retval;
490 }
Sparse matrix class using a hybrid format composed of the ELL and CSR format for storing the nonzeros...
Definition: forwards.h:405
void test_gemm(NumericT epsilon, HostMatrixT &host_A, HostMatrixT &host_B, HostMatrixT &host_C, DeviceMatrixT &device_A, std::string name_A, DeviceMatrixT &device_B, std::string name_B, DeviceMatrixT &device_C, bool copy_from_A, bool trans_first, bool trans_second)
A reader and writer for the matrix market format is implemented here.
NumericT diff(NumericT const &s1, viennacl::scalar< NumericT > const &s2)
Definition: self_assign.cpp:59
This class represents a single scalar value on the GPU and behaves mostly like a built-in scalar type...
Definition: forwards.h:226
Generic interface for the l^2-norm. See viennacl/linalg/vector_operations.hpp for implementations...
int main()
size_type internal_size() const
Returns the total amount of allocated memory in multiples of sizeof(NumericT)
Definition: matrix_def.hpp:233
void trans(matrix_expression< const matrix_base< NumericT, SizeT, DistanceT >, const matrix_base< NumericT, SizeT, DistanceT >, op_trans > const &proxy, matrix_base< NumericT > &temp_trans)
Generic interface for matrix-vector and matrix-matrix products. See viennacl/linalg/vector_operations...
Implementation of the dense matrix class.
void finish()
Synchronizes the execution. finish() will only return after all compute kernels (CUDA, OpenCL) have completed.
Definition: memory.hpp:54
A dense matrix class.
Definition: forwards.h:374
viennacl::scalar< int > s2
viennacl::scalar< float > s1
T max(const T &lhs, const T &rhs)
Maximum.
Definition: util.hpp:59
Implementation of the coordinate_matrix class.
static void apply(LHS &lhs, RHS const &rhs)
viennacl::vector< float > v1
Implementation of the hyb_matrix class.
VectorT prod(std::vector< std::vector< T, A1 >, A2 > const &matrix, VectorT const &vector)
Definition: prod.hpp:91
Sparse matrix class using the ELLPACK format for storing the nonzeros.
Definition: ell_matrix.hpp:53
iterator begin()
Returns an iterator pointing to the beginning of the vector (STL like)
Definition: vector.hpp:827
Implementations of incomplete factorization preconditioners. Convenience header file.
Sparse matrix class using the sliced ELLPACK with parameters C, .
Definition: forwards.h:402
Implementation of the compressed_matrix class.
Implementation of the sliced_ell_matrix class.
static std::string str()
int test(NumericT epsilon)
matrix_range< MatrixType > project(MatrixType const &A, viennacl::range const &r1, viennacl::range const &r2)
size_type size2() const
Returns the number of columns.
Definition: matrix_def.hpp:217
Implementation of the ell_matrix class.
size_type size1() const
Returns the number of rows.
Definition: matrix_def.hpp:215
Proxy classes for vectors.
static std::string str()
Implementation of the compressed_compressed_matrix class (CSR format with a relatively small number o...
Proxy classes for matrices.
viennacl::vector< int > v2
The vector type with operator-overloads and proxy classes is defined here. Linear algebra operations ...
static void apply(LHS &lhs, RHS const &rhs)
void copy(std::vector< NumericT > &cpu_vec, circulant_matrix< NumericT, AlignmentV > &gpu_mat)
Copies a circulant matrix from the std::vector to the OpenCL device (either GPU or multi-core CPU) ...
size_type size() const
Returns the length of the vector (cf. std::vector)
Definition: vector_def.hpp:118
static std::string str()
A range class that refers to an interval [start, stop), where 'start' is included, and 'stop' is excluded.
Definition: forwards.h:423
static void apply(LHS &lhs, RHS const &rhs)
size_type internal_size2() const
Returns the internal number of columns. Usually required for launching OpenCL kernels only...
Definition: matrix_def.hpp:231
iterator end()
Returns an iterator pointing to the end of the vector (STL like)
Definition: vector.hpp:834
void check(HostContainerT const &host_container, DeviceContainerT const &device_container, std::string current_stage, NumericT epsilon)
Common routines used within ILU-type preconditioners.
Implementation of the ViennaCL scalar class.
A sparse square matrix, where entries are stored as triplets (i,j, val), where i and j are the row an...
void fast_copy(const const_vector_iterator< SCALARTYPE, ALIGNMENT > &gpu_begin, const const_vector_iterator< SCALARTYPE, ALIGNMENT > &gpu_end, CPU_ITERATOR cpu_begin)