Ошибка ядра OpenCL на Mac OSx
Я написал некоторый код OpenCL, который прекрасно работает на LINUX, но не работает с ошибками на Mac OSX. Может кто-нибудь, пожалуйста, помогите мне определить, почему это должно произойти. Код ядра отображается после ошибки. Мое ядро использует double, поэтому у меня есть соответствующая прагма вверху. Но я не знаю, почему ошибка показывает тип данных с плавающей точкой:
inline float8 __OVERLOAD__ _name(float8 x) { return _default_name(x); } \
^
/System/Library/Frameworks/OpenCL.framework/Versions/A/lib/clang/3.2/include/cl_kernel.h:4606:30: note: candidate function
__CLFN_FD_1FD_FAST_RELAX(__fast_relax_log, native_log, __cl_log);
^
/System/Library/Frameworks/OpenCL.framework/Versions/A/lib/clang/3.2/include/cl_kernel.h:421:29:
note: expanded from macro '__CLFN_FD_1FD_FAST_RELAX'
inline float16 __OVERLOAD__ _name(float16 x){ return _default_name(x); }
^
<program source>:206:19: error: call to '__fast_relax_log' is ambiguous
det_zkinin + log((2.0) * 3.14));
^~~~~~~~~~~~~~~~~
/System/Library/Frameworks/OpenCL.framework/Versions/A/lib/clang/3.2/include/cl_kernel.h:4608:22:
note: expanded from macro 'log'
#define log(__x) __fast_relax_log(__x)
^~~~~~~~~~~~~~~~
/System/Library/Frameworks/OpenCL.framework/Versions/A/lib/clang/3.2/include/cl_kernel.h:4606:30:
note: candidate function
__CLFN_FD_1FD_FAST_RELAX(__fast_relax_log, native_log, __cl_log);
^
/System/Library/Frameworks/OpenCL.framework/Versions/A/lib/clang/3.2/include/cl_kernel.h:416:27:
note: expanded from macro '__CLFN_FD_1FD_FAST_RELAX'
inline float __OVERLOAD__ _name(float x) { return _default_name(x); } \
^
/System/Library/Frameworks/OpenCL.framework/Versions/A/lib/clang/3.2/include/cl_kernel.h:4606:30
note: candidate function
__CLFN_FD_1FD_FAST_RELAX(__fast_relax_log, native_log, __cl_log);
^
^
Это код ядра:
#pragma OPENCL EXTENSION cl_khr_fp64: enable
__kernel void ckf_kernel2(int dimx, int aligned_dimx,
int numOfCKF, int aligned_ckf,
int iter,
double epsilon,
__global double * yrlists,
__global double * zrlists,
__global double * rlists,
__global double * init_state,
__global double * init_var,
__global double * sing_j,
__global double * covMatrixSum,
__global double * cummulative,
__global double * temp_var,
__global double * x_k_f,
__global double * z_k_j,
__global double * crossCovMatrixSum,
__global double * z_k_f,
__global double * innCovMatrixSum,
__global double * zk_diff,
__global double * reduce_gain_matrix,
__global double * llk
)
{
int ckf_id = get_global_id(0);
if( ckf_id < numOfCKF){
for (int i = 0 ; i < dimx ; i++)
{
for (int idx = 0; idx < dimx * 2 ; idx++)
{
int column = idx % dimx;
int mode = (idx >= dimx) ? -1 : 1;
sing_j[(i * dimx * 2 + idx) * aligned_ckf + ckf_id] = temp_var[(i * dimx + column) * aligned_ckf + ckf_id] * epsilon * mode + init_state[i * aligned_ckf + ckf_id];
}
}
z_k_f[ckf_id] = 0;
innCovMatrixSum[ckf_id] = 0;
for (int idx = 0; idx < dimx * 2 ; idx++)
{
z_k_j[idx * aligned_ckf + ckf_id] = 0;
for (int i = 0 ; i < dimx ; i++)
z_k_j[idx * aligned_ckf + ckf_id] += sing_j[(i * dimx * 2 + idx) * aligned_ckf + ckf_id] * zrlists[iter * aligned_dimx + i ];
z_k_f[ckf_id] += z_k_j[idx* aligned_ckf + ckf_id] ;
innCovMatrixSum[ckf_id] += z_k_j[idx* aligned_ckf + ckf_id] * z_k_j[idx* aligned_ckf + ckf_id];
}
z_k_f[ckf_id] = z_k_f[ckf_id] / (dimx * 2);
innCovMatrixSum[ckf_id] = innCovMatrixSum[ckf_id] / (dimx * 2);
innCovMatrixSum[ckf_id] = (innCovMatrixSum[ckf_id] - z_k_f[ckf_id] *z_k_f[ckf_id]) + rlists[ckf_id];
// calcualte crossCovMatrixSum
for (int idx = 0; idx < dimx; idx ++)
{
crossCovMatrixSum[idx * aligned_ckf + ckf_id] = 0;
for (int i = 0 ; i < 2 * dimx ; i++)
{
crossCovMatrixSum[idx * aligned_ckf + ckf_id] += sing_j[(idx * dimx*2 + i) * aligned_ckf + ckf_id ] * z_k_j[i* aligned_ckf + ckf_id];
}
crossCovMatrixSum[idx * aligned_ckf + ckf_id] = crossCovMatrixSum[idx * aligned_ckf + ckf_id]/ (dimx * 2);
crossCovMatrixSum[idx * aligned_ckf + ckf_id] = crossCovMatrixSum[idx * aligned_ckf + ckf_id] - x_k_f[idx* aligned_ckf + ckf_id] * z_k_f[ckf_id];
}
// calculate zk_diff
int z_check = (int)yrlists[iter];
if (z_check == -1)
zk_diff[ckf_id] = 0;
else
zk_diff[ckf_id] = yrlists[iter] - z_k_f[ckf_id];
// calculate reduce_gain_matrix and (reduce_state_matrix <==> init_state);
for (int idx = 0 ; idx < dimx; idx++)
{
reduce_gain_matrix[idx * aligned_ckf + ckf_id] = (crossCovMatrixSum[idx * aligned_ckf + ckf_id] / innCovMatrixSum[ckf_id]);
init_state[idx * aligned_ckf + ckf_id] = reduce_gain_matrix[idx * aligned_ckf + ckf_id] * zk_diff[ckf_id] + x_k_f[idx* aligned_ckf + ckf_id];
}
for (int idx = 0 ; idx < dimx; idx++)
{
init_var[idx * aligned_ckf + ckf_id ] = covMatrixSum[(idx * dimx + idx) * aligned_ckf + ckf_id] -
reduce_gain_matrix[idx * aligned_ckf + ckf_id] * innCovMatrixSum[ckf_id] *
reduce_gain_matrix[idx * aligned_ckf + ckf_id];
}
double det_zkinin = zk_diff[ckf_id] * zk_diff[ckf_id] * (1.0f /innCovMatrixSum[ckf_id]);
if (innCovMatrixSum[ckf_id] <= 0)
llk[ckf_id] = 0;
else
llk[ckf_id] = 0.5 * ((log(innCovMatrixSum[ckf_id])) +
det_zkinin + log((2.0) * 3.14));
cummulative[ckf_id] += llk[ckf_id];
}
}