blob: c8e0a6dfc7b50203350df9e4d784ac904e297622 [file] [log] [blame]
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
// Copyright (C) 2016
// Mehdi Goli Codeplay Software Ltd.
// Ralph Potter Codeplay Software Ltd.
// Luke Iwanski Codeplay Software Ltd.
// Contact: <eigen@codeplay.com>
//
// This Source Code Form is subject to the terms of the Mozilla
// Public License v. 2.0. If a copy of the MPL was not distributed
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
#define EIGEN_TEST_NO_LONGDOUBLE
#define EIGEN_TEST_NO_COMPLEX
#define EIGEN_DEFAULT_DENSE_INDEX_TYPE int64_t
#define EIGEN_USE_SYCL
#include "main.h"
#include <unsupported/Eigen/CXX11/Tensor>
using Eigen::array;
using Eigen::SyclDevice;
using Eigen::Tensor;
using Eigen::TensorMap;
// Functions used to compare the TensorMap implementation on the device with
// the equivalent on the host
namespace SYCL {
template <typename T>
T abs(T x) {
return cl::sycl::abs(x);
}
template <>
Eigen::half abs(Eigen::half x) {
return Eigen::half(cl::sycl::fabs(static_cast<cl::sycl::half>(x)));
}
template <>
float abs(float x) {
return cl::sycl::fabs(x);
}
template <>
double abs(double x) {
return cl::sycl::fabs(x);
}
template <typename T>
T square(T x) {
return x * x;
}
template <typename T>
T cube(T x) {
return x * x * x;
}
template <typename T>
T inverse(T x) {
return T(1) / x;
}
template <typename T>
T cwiseMax(T x, T y) {
return cl::sycl::max(x, y);
}
template <>
Eigen::half cwiseMax(Eigen::half x, Eigen::half y) {
return Eigen::half(cl::sycl::max(static_cast<cl::sycl::half>(x), static_cast<cl::sycl::half>(y)));
}
template <typename T>
T cwiseMin(T x, T y) {
return cl::sycl::min(x, y);
}
template <>
Eigen::half cwiseMin(Eigen::half x, Eigen::half y) {
return Eigen::half(cl::sycl::min(static_cast<cl::sycl::half>(x), static_cast<cl::sycl::half>(y)));
}
template <typename T>
T sqrt(T x) {
return cl::sycl::sqrt(x);
}
template <>
Eigen::half sqrt(Eigen::half x) {
return Eigen::half(cl::sycl::sqrt(static_cast<cl::sycl::half>(x)));
}
template <typename T>
T rsqrt(T x) {
return cl::sycl::rsqrt(x);
}
template <>
Eigen::half rsqrt(Eigen::half x) {
return Eigen::half(cl::sycl::rsqrt(static_cast<cl::sycl::half>(x)));
}
template <typename T>
T tanh(T x) {
return cl::sycl::tanh(x);
}
template <>
Eigen::half tanh(Eigen::half x) {
return Eigen::half(cl::sycl::tanh(static_cast<cl::sycl::half>(x)));
}
template <typename T>
T exp(T x) {
return cl::sycl::exp(x);
}
template <>
Eigen::half exp(Eigen::half x) {
return Eigen::half(cl::sycl::exp(static_cast<cl::sycl::half>(x)));
}
template <typename T>
T expm1(T x) {
return cl::sycl::expm1(x);
}
template <>
Eigen::half expm1(Eigen::half x) {
return Eigen::half(cl::sycl::expm1(static_cast<cl::sycl::half>(x)));
}
template <typename T>
T log(T x) {
return cl::sycl::log(x);
}
template <>
Eigen::half log(Eigen::half x) {
return Eigen::half(cl::sycl::log(static_cast<cl::sycl::half>(x)));
}
template <typename T>
T ceil(T x) {
return cl::sycl::ceil(x);
}
template <>
Eigen::half ceil(Eigen::half x) {
return Eigen::half(cl::sycl::ceil(static_cast<cl::sycl::half>(x)));
}
template <typename T>
T floor(T x) {
return cl::sycl::floor(x);
}
template <>
Eigen::half floor(Eigen::half x) {
return Eigen::half(cl::sycl::floor(static_cast<cl::sycl::half>(x)));
}
template <typename T>
T round(T x) {
return cl::sycl::round(x);
}
template <>
Eigen::half round(Eigen::half x) {
return Eigen::half(cl::sycl::round(static_cast<cl::sycl::half>(x)));
}
template <typename T>
T log1p(T x) {
return cl::sycl::log1p(x);
}
template <>
Eigen::half log1p(Eigen::half x) {
return Eigen::half(cl::sycl::log1p(static_cast<cl::sycl::half>(x)));
}
template <typename T>
T sign(T x) {
return cl::sycl::sign(x);
}
template <>
Eigen::half sign(Eigen::half x) {
return Eigen::half(cl::sycl::sign(static_cast<cl::sycl::half>(x)));
}
template <typename T>
T isnan(T x) {
return cl::sycl::isnan(x);
}
template <>
Eigen::half isnan(Eigen::half x) {
return Eigen::half(cl::sycl::isnan(static_cast<cl::sycl::half>(x)));
}
template <typename T>
T isfinite(T x) {
return cl::sycl::isfinite(x);
}
template <>
Eigen::half isfinite(Eigen::half x) {
return Eigen::half(cl::sycl::isfinite(static_cast<cl::sycl::half>(x)));
}
template <typename T>
T isinf(T x) {
return cl::sycl::isinf(x);
}
template <>
Eigen::half isinf(Eigen::half x) {
return Eigen::half(cl::sycl::isinf(static_cast<cl::sycl::half>(x)));
}
} // namespace SYCL
#define DECLARE_UNARY_STRUCT_NON_SYCL(FUNC) \
struct op_##FUNC { \
template <typename T> \
auto operator()(const T& x) { \
return SYCL::FUNC(x); \
} \
template <typename T> \
auto operator()(const TensorMap<T>& x) { \
return x.FUNC(); \
} \
};
DECLARE_UNARY_STRUCT_NON_SYCL(abs)
DECLARE_UNARY_STRUCT_NON_SYCL(square)
DECLARE_UNARY_STRUCT_NON_SYCL(cube)
DECLARE_UNARY_STRUCT_NON_SYCL(inverse)
#define DECLARE_BINARY_STRUCT_NON_SYCL(FUNC) \
struct op_##FUNC { \
template <typename T1, typename T2> \
auto operator()(const T1& x, const T2& y) { \
return SYCL::FUNC(x, y); \
} \
template <typename T1, typename T2> \
auto operator()(const TensorMap<T1>& x, const TensorMap<T2>& y) { \
return x.FUNC(y); \
} \
};
DECLARE_BINARY_STRUCT_NON_SYCL(cwiseMax)
DECLARE_BINARY_STRUCT_NON_SYCL(cwiseMin)
struct EqualAssignment {
template <typename Lhs, typename Rhs>
void operator()(Lhs& lhs, const Rhs& rhs) {
lhs = rhs;
}
};
struct PlusEqualAssignment {
template <typename Lhs, typename Rhs>
void operator()(Lhs& lhs, const Rhs& rhs) {
lhs += rhs;
}
};
template <typename DataType, int DataLayout, typename Assignment, typename Operator>
void test_unary_builtins_for_scalar(const Eigen::SyclDevice& sycl_device, const array<int64_t, 3>& tensor_range) {
Operator op;
Assignment asgn;
{
/* Assignment(out, Operator(in)) */
Tensor<DataType, 3, DataLayout, int64_t> in(tensor_range);
Tensor<DataType, 3, DataLayout, int64_t> out(tensor_range);
in = in.random() + DataType(0.01);
out = out.random() + DataType(0.01);
Tensor<DataType, 3, DataLayout, int64_t> reference(out);
DataType* gpu_data = static_cast<DataType*>(sycl_device.allocate(in.size() * sizeof(DataType)));
DataType* gpu_data_out = static_cast<DataType*>(sycl_device.allocate(out.size() * sizeof(DataType)));
TensorMap<Tensor<DataType, 3, DataLayout, int64_t>> gpu(gpu_data, tensor_range);
TensorMap<Tensor<DataType, 3, DataLayout, int64_t>> gpu_out(gpu_data_out, tensor_range);
sycl_device.memcpyHostToDevice(gpu_data, in.data(), (in.size()) * sizeof(DataType));
sycl_device.memcpyHostToDevice(gpu_data_out, out.data(), (out.size()) * sizeof(DataType));
auto device_expr = gpu_out.device(sycl_device);
asgn(device_expr, op(gpu));
sycl_device.memcpyDeviceToHost(out.data(), gpu_data_out, (out.size()) * sizeof(DataType));
for (int64_t i = 0; i < out.size(); ++i) {
DataType ver = reference(i);
asgn(ver, op(in(i)));
VERIFY_IS_APPROX(out(i), ver);
}
sycl_device.deallocate(gpu_data);
sycl_device.deallocate(gpu_data_out);
}
{
/* Assignment(out, Operator(out)) */
Tensor<DataType, 3, DataLayout, int64_t> out(tensor_range);
// Offset with 1 to avoid tiny output (< 1e-6) as they can easily fail.
out = out.random() + DataType(1);
Tensor<DataType, 3, DataLayout, int64_t> reference(out);
DataType* gpu_data_out = static_cast<DataType*>(sycl_device.allocate(out.size() * sizeof(DataType)));
TensorMap<Tensor<DataType, 3, DataLayout, int64_t>> gpu_out(gpu_data_out, tensor_range);
sycl_device.memcpyHostToDevice(gpu_data_out, out.data(), (out.size()) * sizeof(DataType));
auto device_expr = gpu_out.device(sycl_device);
asgn(device_expr, op(gpu_out));
sycl_device.memcpyDeviceToHost(out.data(), gpu_data_out, (out.size()) * sizeof(DataType));
for (int64_t i = 0; i < out.size(); ++i) {
DataType ver = reference(i);
asgn(ver, op(reference(i)));
VERIFY_IS_APPROX(out(i), ver);
}
sycl_device.deallocate(gpu_data_out);
}
}
#define DECLARE_UNARY_STRUCT(FUNC) \
struct op_##FUNC { \
template <typename T> \
auto operator()(const T& x) -> decltype(SYCL::FUNC(x)) { \
return SYCL::FUNC(x); \
} \
template <typename T> \
auto operator()(const TensorMap<T>& x) -> decltype(x.FUNC()) { \
return x.FUNC(); \
} \
};
DECLARE_UNARY_STRUCT(sqrt)
DECLARE_UNARY_STRUCT(rsqrt)
DECLARE_UNARY_STRUCT(tanh)
DECLARE_UNARY_STRUCT(exp)
DECLARE_UNARY_STRUCT(expm1)
DECLARE_UNARY_STRUCT(log)
DECLARE_UNARY_STRUCT(ceil)
DECLARE_UNARY_STRUCT(floor)
DECLARE_UNARY_STRUCT(round)
DECLARE_UNARY_STRUCT(log1p)
DECLARE_UNARY_STRUCT(sign)
DECLARE_UNARY_STRUCT(isnan)
DECLARE_UNARY_STRUCT(isfinite)
DECLARE_UNARY_STRUCT(isinf)
template <typename DataType, int DataLayout, typename Assignment>
void test_unary_builtins_for_assignement(const Eigen::SyclDevice& sycl_device, const array<int64_t, 3>& tensor_range) {
#define RUN_UNARY_TEST(FUNC) \
test_unary_builtins_for_scalar<DataType, DataLayout, Assignment, op_##FUNC>(sycl_device, tensor_range)
RUN_UNARY_TEST(abs);
RUN_UNARY_TEST(sqrt);
RUN_UNARY_TEST(rsqrt);
RUN_UNARY_TEST(square);
RUN_UNARY_TEST(cube);
RUN_UNARY_TEST(inverse);
RUN_UNARY_TEST(tanh);
RUN_UNARY_TEST(exp);
RUN_UNARY_TEST(expm1);
RUN_UNARY_TEST(log);
RUN_UNARY_TEST(ceil);
RUN_UNARY_TEST(floor);
RUN_UNARY_TEST(round);
RUN_UNARY_TEST(log1p);
RUN_UNARY_TEST(sign);
}
template <typename DataType, int DataLayout, typename Operator>
void test_unary_builtins_return_bool(const Eigen::SyclDevice& sycl_device, const array<int64_t, 3>& tensor_range) {
/* out = op(in) */
Operator op;
Tensor<DataType, 3, DataLayout, int64_t> in(tensor_range);
Tensor<bool, 3, DataLayout, int64_t> out(tensor_range);
in = in.random() + DataType(0.01);
DataType* gpu_data = static_cast<DataType*>(sycl_device.allocate(in.size() * sizeof(DataType)));
bool* gpu_data_out = static_cast<bool*>(sycl_device.allocate(out.size() * sizeof(bool)));
TensorMap<Tensor<DataType, 3, DataLayout, int64_t>> gpu(gpu_data, tensor_range);
TensorMap<Tensor<bool, 3, DataLayout, int64_t>> gpu_out(gpu_data_out, tensor_range);
sycl_device.memcpyHostToDevice(gpu_data, in.data(), (in.size()) * sizeof(DataType));
gpu_out.device(sycl_device) = op(gpu);
sycl_device.memcpyDeviceToHost(out.data(), gpu_data_out, (out.size()) * sizeof(bool));
for (int64_t i = 0; i < out.size(); ++i) {
VERIFY_IS_EQUAL(out(i), op(in(i)));
}
sycl_device.deallocate(gpu_data);
sycl_device.deallocate(gpu_data_out);
}
template <typename DataType, int DataLayout>
void test_unary_builtins(const Eigen::SyclDevice& sycl_device, const array<int64_t, 3>& tensor_range) {
test_unary_builtins_for_assignement<DataType, DataLayout, PlusEqualAssignment>(sycl_device, tensor_range);
test_unary_builtins_for_assignement<DataType, DataLayout, EqualAssignment>(sycl_device, tensor_range);
test_unary_builtins_return_bool<DataType, DataLayout, op_isnan>(sycl_device, tensor_range);
test_unary_builtins_return_bool<DataType, DataLayout, op_isfinite>(sycl_device, tensor_range);
test_unary_builtins_return_bool<DataType, DataLayout, op_isinf>(sycl_device, tensor_range);
}
template <typename DataType>
static void test_builtin_unary_sycl(const Eigen::SyclDevice& sycl_device) {
int64_t sizeDim1 = 10;
int64_t sizeDim2 = 10;
int64_t sizeDim3 = 10;
array<int64_t, 3> tensor_range = {{sizeDim1, sizeDim2, sizeDim3}};
test_unary_builtins<DataType, RowMajor>(sycl_device, tensor_range);
test_unary_builtins<DataType, ColMajor>(sycl_device, tensor_range);
}
template <typename DataType, int DataLayout, typename Operator>
void test_binary_builtins_func(const Eigen::SyclDevice& sycl_device, const array<int64_t, 3>& tensor_range) {
/* out = op(in_1, in_2) */
Operator op;
Tensor<DataType, 3, DataLayout, int64_t> in_1(tensor_range);
Tensor<DataType, 3, DataLayout, int64_t> in_2(tensor_range);
Tensor<DataType, 3, DataLayout, int64_t> out(tensor_range);
in_1 = in_1.random() + DataType(0.01);
in_2 = in_2.random() + DataType(0.01);
Tensor<DataType, 3, DataLayout, int64_t> reference(out);
DataType* gpu_data_1 = static_cast<DataType*>(sycl_device.allocate(in_1.size() * sizeof(DataType)));
DataType* gpu_data_2 = static_cast<DataType*>(sycl_device.allocate(in_2.size() * sizeof(DataType)));
DataType* gpu_data_out = static_cast<DataType*>(sycl_device.allocate(out.size() * sizeof(DataType)));
TensorMap<Tensor<DataType, 3, DataLayout, int64_t>> gpu_1(gpu_data_1, tensor_range);
TensorMap<Tensor<DataType, 3, DataLayout, int64_t>> gpu_2(gpu_data_2, tensor_range);
TensorMap<Tensor<DataType, 3, DataLayout, int64_t>> gpu_out(gpu_data_out, tensor_range);
sycl_device.memcpyHostToDevice(gpu_data_1, in_1.data(), (in_1.size()) * sizeof(DataType));
sycl_device.memcpyHostToDevice(gpu_data_2, in_2.data(), (in_2.size()) * sizeof(DataType));
gpu_out.device(sycl_device) = op(gpu_1, gpu_2);
sycl_device.memcpyDeviceToHost(out.data(), gpu_data_out, (out.size()) * sizeof(DataType));
for (int64_t i = 0; i < out.size(); ++i) {
VERIFY_IS_APPROX(out(i), op(in_1(i), in_2(i)));
}
sycl_device.deallocate(gpu_data_1);
sycl_device.deallocate(gpu_data_2);
sycl_device.deallocate(gpu_data_out);
}
template <typename DataType, int DataLayout, typename Operator>
void test_binary_builtins_fixed_arg2(const Eigen::SyclDevice& sycl_device, const array<int64_t, 3>& tensor_range) {
/* out = op(in_1, 2) */
Operator op;
const DataType arg2(2);
Tensor<DataType, 3, DataLayout, int64_t> in_1(tensor_range);
Tensor<DataType, 3, DataLayout, int64_t> out(tensor_range);
in_1 = in_1.random();
Tensor<DataType, 3, DataLayout, int64_t> reference(out);
DataType* gpu_data_1 = static_cast<DataType*>(sycl_device.allocate(in_1.size() * sizeof(DataType)));
DataType* gpu_data_out = static_cast<DataType*>(sycl_device.allocate(out.size() * sizeof(DataType)));
TensorMap<Tensor<DataType, 3, DataLayout, int64_t>> gpu_1(gpu_data_1, tensor_range);
TensorMap<Tensor<DataType, 3, DataLayout, int64_t>> gpu_out(gpu_data_out, tensor_range);
sycl_device.memcpyHostToDevice(gpu_data_1, in_1.data(), (in_1.size()) * sizeof(DataType));
gpu_out.device(sycl_device) = op(gpu_1, arg2);
sycl_device.memcpyDeviceToHost(out.data(), gpu_data_out, (out.size()) * sizeof(DataType));
for (int64_t i = 0; i < out.size(); ++i) {
VERIFY_IS_APPROX(out(i), op(in_1(i), arg2));
}
sycl_device.deallocate(gpu_data_1);
sycl_device.deallocate(gpu_data_out);
}
#define DECLARE_BINARY_STRUCT(FUNC) \
struct op_##FUNC { \
template <typename T1, typename T2> \
auto operator()(const T1& x, const T2& y) -> decltype(cl::sycl::FUNC(x, y)) { \
return cl::sycl::FUNC(x, y); \
} \
template <typename T1, typename T2> \
auto operator()(const TensorMap<T1>& x, const TensorMap<T2>& y) -> decltype(x.FUNC(y)) { \
return x.FUNC(y); \
} \
};
#define DECLARE_BINARY_STRUCT_OP(NAME, OPERATOR) \
struct op_##NAME { \
template <typename T1, typename T2> \
auto operator()(const T1& x, const T2& y) -> decltype(x OPERATOR y) { \
return x OPERATOR y; \
} \
};
DECLARE_BINARY_STRUCT_OP(plus, +)
DECLARE_BINARY_STRUCT_OP(minus, -)
DECLARE_BINARY_STRUCT_OP(times, *)
DECLARE_BINARY_STRUCT_OP(divide, /)
DECLARE_BINARY_STRUCT_OP(modulo, %)
template <typename DataType, int DataLayout>
void test_binary_builtins(const Eigen::SyclDevice& sycl_device, const array<int64_t, 3>& tensor_range) {
test_binary_builtins_func<DataType, DataLayout, op_cwiseMax>(sycl_device, tensor_range);
test_binary_builtins_func<DataType, DataLayout, op_cwiseMin>(sycl_device, tensor_range);
test_binary_builtins_func<DataType, DataLayout, op_plus>(sycl_device, tensor_range);
test_binary_builtins_func<DataType, DataLayout, op_minus>(sycl_device, tensor_range);
test_binary_builtins_func<DataType, DataLayout, op_times>(sycl_device, tensor_range);
test_binary_builtins_func<DataType, DataLayout, op_divide>(sycl_device, tensor_range);
}
template <typename DataType>
static void test_floating_builtin_binary_sycl(const Eigen::SyclDevice& sycl_device) {
int64_t sizeDim1 = 10;
int64_t sizeDim2 = 10;
int64_t sizeDim3 = 10;
array<int64_t, 3> tensor_range = {{sizeDim1, sizeDim2, sizeDim3}};
test_binary_builtins<DataType, RowMajor>(sycl_device, tensor_range);
test_binary_builtins<DataType, ColMajor>(sycl_device, tensor_range);
}
template <typename DataType>
static void test_integer_builtin_binary_sycl(const Eigen::SyclDevice& sycl_device) {
int64_t sizeDim1 = 10;
int64_t sizeDim2 = 10;
int64_t sizeDim3 = 10;
array<int64_t, 3> tensor_range = {{sizeDim1, sizeDim2, sizeDim3}};
test_binary_builtins_fixed_arg2<DataType, RowMajor, op_modulo>(sycl_device, tensor_range);
test_binary_builtins_fixed_arg2<DataType, ColMajor, op_modulo>(sycl_device, tensor_range);
}
EIGEN_DECLARE_TEST(cxx11_tensor_builtins_sycl) {
for (const auto& device : Eigen::get_sycl_supported_devices()) {
QueueInterface queueInterface(device);
Eigen::SyclDevice sycl_device(&queueInterface);
CALL_SUBTEST_1(test_builtin_unary_sycl<half>(sycl_device));
CALL_SUBTEST_2(test_floating_builtin_binary_sycl<half>(sycl_device));
CALL_SUBTEST_3(test_builtin_unary_sycl<float>(sycl_device));
CALL_SUBTEST_4(test_floating_builtin_binary_sycl<float>(sycl_device));
CALL_SUBTEST_5(test_integer_builtin_binary_sycl<int>(sycl_device));
}
}