Update Eigen to commit:42aa3d17cd27ecdfa34b81100314749f72c00256
CHANGELOG
=========
42aa3d17c - Slightly adjust error bound for nonlinear tests.
1c8c734c8 - Fix sin/cos on PPC.
34967b0b5 - Revert "fix transposed matrix product bug"
9cec679ef - Don't let the PPC runner try to cross-compile.
574bc8820 - fix transposed matrix product bug
112ad8b84 - Revert part of !1583, which may cause underflow on ARM.
8cafbc473 - Fix unused variable warnings in TensorIO
4de870b6e - fix autodiff enum comparison warnings
2265242aa - Update CI scripts.
ee9d57347 - Fix `tridiagonalization_inplace_selector::run()` when called from CUDA
1550c9954 - Eigen select
PiperOrigin-RevId: 628474184
Change-Id: I9f5a69fd4ad6d7c65a5deccfc669047656d84536
diff --git a/Eigen/src/Core/CoreEvaluators.h b/Eigen/src/Core/CoreEvaluators.h
index c620600..5e1cbf6 100644
--- a/Eigen/src/Core/CoreEvaluators.h
+++ b/Eigen/src/Core/CoreEvaluators.h
@@ -840,6 +840,27 @@
Data m_d;
};
+// specialization for expresions like (a < b).select(c, d) to enable full vectorization
+template <typename Arg1, typename Arg2, typename Scalar, typename CmpLhsType, typename CmpRhsType, ComparisonName cmp>
+struct evaluator<CwiseTernaryOp<scalar_boolean_select_op<Scalar, Scalar, bool>, Arg1, Arg2,
+ CwiseBinaryOp<scalar_cmp_op<Scalar, Scalar, cmp, false>, CmpLhsType, CmpRhsType>>>
+ : public ternary_evaluator<
+ CwiseTernaryOp<scalar_boolean_select_op<Scalar, Scalar, Scalar>, Arg1, Arg2,
+ CwiseBinaryOp<scalar_cmp_op<Scalar, Scalar, cmp, true>, CmpLhsType, CmpRhsType>>> {
+ using DummyTernaryOp = scalar_boolean_select_op<Scalar, Scalar, bool>;
+ using DummyArg3 = CwiseBinaryOp<scalar_cmp_op<Scalar, Scalar, cmp, false>, CmpLhsType, CmpRhsType>;
+ using DummyXprType = CwiseTernaryOp<DummyTernaryOp, Arg1, Arg2, DummyArg3>;
+
+ using TernaryOp = scalar_boolean_select_op<Scalar, Scalar, Scalar>;
+ using Arg3 = CwiseBinaryOp<scalar_cmp_op<Scalar, Scalar, cmp, true>, CmpLhsType, CmpRhsType>;
+ using XprType = CwiseTernaryOp<TernaryOp, Arg1, Arg2, Arg3>;
+
+ using Base = ternary_evaluator<XprType>;
+
+ EIGEN_DEVICE_FUNC explicit evaluator(const DummyXprType& xpr)
+ : Base(XprType(xpr.arg1(), xpr.arg2(), Arg3(xpr.arg3().lhs(), xpr.arg3().rhs()))) {}
+};
+
// -------------------- CwiseBinaryOp --------------------
// this is a binary expression
diff --git a/Eigen/src/Core/arch/AltiVec/PacketMath.h b/Eigen/src/Core/arch/AltiVec/PacketMath.h
index eed545c..b0f7262 100644
--- a/Eigen/src/Core/arch/AltiVec/PacketMath.h
+++ b/Eigen/src/Core/arch/AltiVec/PacketMath.h
@@ -3398,6 +3398,10 @@
return reinterpret_cast<Packet2d>(vec_cmpeq(a, b));
}
template <>
+EIGEN_STRONG_INLINE Packet2l pcmp_eq(const Packet2l& a, const Packet2l& b) {
+ return reinterpret_cast<Packet2l>(vec_cmpeq(a, b));
+}
+template <>
EIGEN_STRONG_INLINE Packet2d pcmp_lt_or_nan(const Packet2d& a, const Packet2d& b) {
Packet2d c = reinterpret_cast<Packet2d>(vec_cmpge(a, b));
return vec_nor(c, c);
diff --git a/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h b/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h
index 4ee035d..16ca807 100644
--- a/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h
+++ b/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h
@@ -129,7 +129,7 @@
const PacketI e = pcast<Packet, PacketI>(pmin(pmax(exponent, pnegate(max_exponent)), max_exponent));
PacketI b = parithmetic_shift_right<2>(e); // floor(e/4);
Packet c = preinterpret<Packet>(plogical_shift_left<MantissaBits>(padd(b, bias))); // 2^b
- Packet out = pmul(pmul(a, c), pmul(c, c)); // a * 2^(3b)
+ Packet out = pmul(pmul(pmul(a, c), c), c); // a * 2^(3b)
b = pnmadd(pset1<PacketI>(3), b, e); // e - 3b
c = preinterpret<Packet>(plogical_shift_left<MantissaBits>(padd(b, bias))); // 2^(e-3*b)
out = pmul(out, c);
diff --git a/Eigen/src/Eigenvalues/Tridiagonalization.h b/Eigen/src/Eigenvalues/Tridiagonalization.h
index 9bb791d..e49e9db 100644
--- a/Eigen/src/Eigenvalues/Tridiagonalization.h
+++ b/Eigen/src/Eigenvalues/Tridiagonalization.h
@@ -445,8 +445,8 @@
typedef typename MatrixType::RealScalar RealScalar;
template <typename DiagonalType, typename SubDiagonalType, typename CoeffVectorType, typename WorkSpaceType>
- static void run(MatrixType& mat, DiagonalType& diag, SubDiagonalType& subdiag, CoeffVectorType&, WorkSpaceType&,
- bool extractQ) {
+ static EIGEN_DEVICE_FUNC void run(MatrixType& mat, DiagonalType& diag, SubDiagonalType& subdiag, CoeffVectorType&,
+ WorkSpaceType&, bool extractQ) {
using std::sqrt;
const RealScalar tol = (std::numeric_limits<RealScalar>::min)();
diag[0] = mat(0, 0);
diff --git a/scripts/ci_cmake_msvc.ps1 b/scripts/ci_cmake_msvc.ps1
new file mode 100644
index 0000000..5dae3c8
--- /dev/null
+++ b/scripts/ci_cmake_msvc.ps1
@@ -0,0 +1,65 @@
+# Powershell script to set up MSVC cmake builds that mirror the CI. Useful for reproducing issues.
+
+param ($EIGEN_CI_ROOTDIR,
+ $EIGEN_CI_BUILDDIR,
+ $EIGEN_CI_BUILD_TARGET,
+ $EIGEN_CI_ADDITIONAL_ARGS,
+ $EIGEN_CI_BEFORE_SCRIPT,
+ $EIGEN_CI_CMAKE_GENERATOR,
+ $EIGEN_CI_MSVC_ARCH,
+ $EIGEN_CI_MSVC_VER,
+ $EIGEN_CI_TEST_CUSTOM_CXX_FLAGS
+ )
+
+function Get-ScriptDirectory { Split-Path $MyInvocation.ScriptName }
+
+# Set defaults if not already set.
+IF (!$EIGEN_CI_ROOTDIR) { $EIGEN_CI_ROOTDIR = Join-Path (Get-ScriptDirectory) '..' }
+IF (!$EIGEN_CI_BUILDDIR) { $EIGEN_CI_BUILDDIR = ".build" }
+IF (!$EIGEN_CI_BUILD_TARGET) { $EIGEN_CI_BUILD_TARGET = "buildtests" }
+IF (!$EIGEN_CI_ADDITIONAL_ARGS) { $EIGEN_CI_ADDITIONAL_ARGS = "" }
+IF (!$EIGEN_CI_BEFORE_SCRIPT) { $EIGEN_CI_BEFORE_SCRIPT = "" }
+IF (!$EIGEN_CI_CMAKE_GENERATOR) { $EIGEN_CI_CMAKE_GENERATOR = "Ninja" }
+IF (!$EIGEN_CI_MSVC_ARCH) { $EIGEN_CI_MSVC_ARCH = "x64" }
+IF (!$EIGEN_CI_MSVC_VER) { $EIGEN_CI_MSVC_VER = "14.29" }
+IF (!$EIGEN_CI_TEST_CUSTOM_CXX_FLAGS) { $EIGEN_CI_TEST_CUSTOM_CXX_FLAGS = "/d2ReducedOptimizeHugeFunctions /DEIGEN_STRONG_INLINE=inline /Os" }
+
+# Export variables into the global scope
+$global:EIGEN_CI_ROOTDIR = $EIGEN_CI_ROOTDIR
+$global:EIGEN_CI_BUILDDIR = $EIGEN_CI_BUILDDIR
+$global:EIGEN_CI_BUILD_TARGET = $EIGEN_CI_BUILD_TARGET
+$global:EIGEN_CI_ADDITIONAL_ARGS = $EIGEN_CI_ADDITIONAL_ARGS
+$global:EIGEN_CI_BEFORE_SCRIPT = $EIGEN_CI_BEFORE_SCRIPT
+$global:EIGEN_CI_CMAKE_GENERATOR = $EIGEN_CI_CMAKE_GENERATOR
+$global:EIGEN_CI_MSVC_ARCH = $EIGEN_CI_MSVC_ARCH
+$global:EIGEN_CI_MSVC_VER = $EIGEN_CI_MSVC_VER
+$global:EIGEN_CI_TEST_CUSTOM_CXX_FLAGS = $EIGEN_CI_TEST_CUSTOM_CXX_FLAGS
+
+# Print configuration variables.
+Get-Variable | findstr EIGEN
+
+
+# Run any setup scripts.
+if ("${EIGEN_CI_BEFORE_SCRIPT}") { Invoke-Expression -Command "${EIGEN_CI_BEFORE_SCRIPT}" }
+
+# Find Visual Studio installation directory.
+$global:VS_INSTALL_DIR = &"${Env:ProgramFiles(x86)}\Microsoft Visual Studio\Installer\vswhere.exe" -latest -property installationPath
+# Run VCVarsAll.bat incitialization script and extract environment variables.
+# http://allen-mack.blogspot.com/2008/03/replace-visual-studio-command-prompt.html
+cmd.exe /c "`"${VS_INSTALL_DIR}\VC\Auxiliary\Build\vcvarsall.bat`" $EIGEN_CI_MSVC_ARCH -vcvars_ver=$EIGEN_CI_MSVC_VER & set" | foreach { if ($_ -match "=") { $v = $_.split("="); set-item -force -path "ENV:\$($v[0])" -value "$($v[1])" } }
+
+# Create and change to the build directory.
+IF (-Not (Test-Path -Path $EIGEN_CI_BUILDDIR) ) { mkdir $EIGEN_CI_BUILDDIR }
+cd $EIGEN_CI_BUILDDIR
+
+# We need to split EIGEN_CI_ADDITIONAL_ARGS, otherwise they are interpretted
+# as a single argument. Split by space, unless double-quoted.
+$split_args = [regex]::Split(${EIGEN_CI_ADDITIONAL_ARGS}, ' (?=(?:[^"]|"[^"]*")*$)' )
+cmake -G "${EIGEN_CI_CMAKE_GENERATOR}" -DCMAKE_BUILD_TYPE=MinSizeRel -DEIGEN_TEST_CUSTOM_CXX_FLAGS="${EIGEN_CI_TEST_CUSTOM_CXX_FLAGS}" ${split_args} "${EIGEN_CI_ROOTDIR}"
+
+IF ($EIGEN_CI_BUILD_TARGET) {
+ # Windows builds sometimes fail due heap errors. In that case, try
+ # building the rest, then try to build again with a single thread.
+ cmake --build . --target $EIGEN_CI_BUILD_TARGET -- -k0 || cmake --build . --target $EIGEN_CI_BUILD_TARGET -- -k0 -j1
+}
+
diff --git a/scripts/ci_cmake_msvc_cuda.ps1 b/scripts/ci_cmake_msvc_cuda.ps1
new file mode 100644
index 0000000..dd615ec
--- /dev/null
+++ b/scripts/ci_cmake_msvc_cuda.ps1
@@ -0,0 +1,31 @@
+# Powershell script to set up MSVC CUDA cmake builds that mirror the CI. Useful for reproducing issues.
+
+param ($EIGEN_CI_ROOTDIR,
+ $EIGEN_CI_BUILDDIR,
+ $EIGEN_CI_BUILD_TARGET,
+ $EIGEN_CI_ADDITIONAL_ARGS,
+ $EIGEN_CI_BEFORE_SCRIPT,
+ $EIGEN_CI_CMAKE_GENERATOR,
+ $EIGEN_CI_MSVC_ARCH,
+ $EIGEN_CI_MSVC_VER,
+ $EIGEN_CI_TEST_CUSTOM_CXX_FLAGS,
+
+ $EIGEN_CI_CUDA_CXX_FLAGS,
+ $EIGEN_CI_CUDA_COMPUTE_ARCH
+ )
+
+# Set defaults if not already set.
+IF (!$EIGEN_CI_CUDA_CXX_FLAGS) { $EIGEN_CI_CUDA_CXX_FLAGS = "" }
+IF (!$EIGEN_CI_CUDA_COMPUTE_ARCH) { $EIGEN_CI_CUDA_COMPUTE_ARCH = "50;70" }
+IF (!$EIGEN_CI_BUILD_TARGET) { $EIGEN_CI_BUILD_TARGET = "buildtests_gpu" }
+IF (!$EIGEN_CI_ADDITIONAL_ARGS) { $EIGEN_CI_ADDITIONAL_ARGS = '-DCMAKE_CUDA_COMPILER=nvcc.exe -DCMAKE_CUDA_SEPARABLE_COMPILATION=OFF -DEIGEN_TEST_CUDA=on -DEIGEN_CUDA_CXX_FLAGS='+${EIGEN_CI_CUDA_CXX_FLAGS}+' -DEIGEN_CUDA_COMPUTE_ARCH='+${EIGEN_CI_CUDA_COMPUTE_ARCH} }
+
+
+# Export variables into the global scope
+$global:EIGEN_CI_CUDA_CXX_FLAGS = $EIGEN_CI_CUDA_CXX_FLAGS
+$global:EIGEN_CI_CUDA_COMPUTE_ARCH = $EIGEN_CI_CUDA_COMPUTE_ARCH
+
+# Call the generic msvc setup scripts.
+function Get-ScriptDirectory { Split-Path $MyInvocation.ScriptName }
+$script = Join-Path (Get-ScriptDirectory) 'ci_cmake_msvc.ps1'
+& $script $EIGEN_CI_ROOTDIR $EIGEN_CI_BUILDDIR $EIGEN_CI_BUILD_TARGET $EIGEN_CI_ADDITIONAL_ARGS $EIGEN_CI_BEFORE_SCRIPT $EIGEN_CI_CMAKE_GENERATOR $EIGEN_CI_MSVC_ARCH $EIGEN_CI_MSVC_VER $EIGEN_CI_TEST_CUSTOM_CXX_FLAGS
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorIO.h b/unsupported/Eigen/CXX11/src/Tensor/TensorIO.h
index b1928c4..2605219 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorIO.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorIO.h
@@ -195,19 +195,19 @@
// Default scalar printer.
template <typename Scalar, typename Format, typename EnableIf = void>
struct ScalarPrinter {
- static void run(std::ostream& stream, const Scalar& scalar, const Format& fmt) { stream << scalar; }
+ static void run(std::ostream& stream, const Scalar& scalar, const Format&) { stream << scalar; }
};
template <typename Scalar>
struct ScalarPrinter<Scalar, TensorIOFormatNumpy, std::enable_if_t<NumTraits<Scalar>::IsComplex>> {
- static void run(std::ostream& stream, const Scalar& scalar, const TensorIOFormatNumpy& fmt) {
+ static void run(std::ostream& stream, const Scalar& scalar, const TensorIOFormatNumpy&) {
stream << numext::real(scalar) << "+" << numext::imag(scalar) << "j";
}
};
template <typename Scalar>
struct ScalarPrinter<Scalar, TensorIOFormatNative, std::enable_if_t<NumTraits<Scalar>::IsComplex>> {
- static void run(std::ostream& stream, const Scalar& scalar, const TensorIOFormatNative& fmt) {
+ static void run(std::ostream& stream, const Scalar& scalar, const TensorIOFormatNative&) {
stream << "{" << numext::real(scalar) << ", " << numext::imag(scalar) << "}";
}
};
@@ -339,7 +339,7 @@
IndexType scalar_width = scalar_str.length();
if (width && scalar_width < width) {
std::string filler;
- for (IndexType i = scalar_width; i < width; ++i) {
+ for (IndexType j = scalar_width; j < width; ++j) {
filler.push_back(fmt.fill);
}
s << filler;
@@ -360,7 +360,7 @@
using Format = TensorIOFormatLegacy;
using Scalar = std::remove_const_t<typename Tensor::Scalar>;
- static void run(std::ostream& s, const Tensor& tensor, const Format& fmt) {
+ static void run(std::ostream& s, const Tensor& tensor, const Format&) {
typedef typename Tensor::Index IndexType;
// backwards compatibility case: print tensor after reshaping to matrix of size dim(0) x
// (dim(1)*dim(2)*...*dim(rank-1)).
diff --git a/unsupported/Eigen/src/AutoDiff/AutoDiffScalar.h b/unsupported/Eigen/src/AutoDiff/AutoDiffScalar.h
index c6ffa0d..74f2e6f 100644
--- a/unsupported/Eigen/src/AutoDiff/AutoDiffScalar.h
+++ b/unsupported/Eigen/src/AutoDiff/AutoDiffScalar.h
@@ -22,13 +22,8 @@
template <typename DerivativeType, typename OtherDerivativeType, typename EnableIf = void>
struct maybe_coherent_pad_helper {
- static constexpr int SizeAtCompileTime = DerivativeType::SizeAtCompileTime == Dynamic ||
- OtherDerivativeType::SizeAtCompileTime == Dynamic
- ? Dynamic
- : int(DerivativeType::SizeAtCompileTime) >
- int(OtherDerivativeType::SizeAtCompileTime)
- ? DerivativeType::SizeAtCompileTime
- : OtherDerivativeType::SizeAtCompileTime;
+ static constexpr int SizeAtCompileTime =
+ max_size_prefer_dynamic(DerivativeType::SizeAtCompileTime, OtherDerivativeType::SizeAtCompileTime);
using type = CoherentPadOp<DerivativeType, SizeAtCompileTime>;
static type pad(const DerivativeType& x, const OtherDerivativeType& y) {
// CoherentPadOp uses variable_if_dynamic<SizeAtCompileTime>. In this case, `SizeAtCompileTime` might
diff --git a/unsupported/test/NonLinearOptimization.cpp b/unsupported/test/NonLinearOptimization.cpp
index c847420..0a1cf36 100644
--- a/unsupported/test/NonLinearOptimization.cpp
+++ b/unsupported/test/NonLinearOptimization.cpp
@@ -1029,7 +1029,7 @@
// VERIFY_IS_EQUAL(info, 2);
LM_CHECK_N_ITERS(lm, 9, 8);
// check norm^2
- VERIFY(lm.fvec.squaredNorm() <= 1.4307867721E-25);
+ VERIFY(lm.fvec.squaredNorm() <= 1.44E-25);
// check x
VERIFY_IS_APPROX(x[0], 9.5100000027E-02);
VERIFY_IS_APPROX(x[1], 1.0000000001E+00);
diff --git a/unsupported/test/levenberg_marquardt.cpp b/unsupported/test/levenberg_marquardt.cpp
index 355fc7c..c4f060b 100644
--- a/unsupported/test/levenberg_marquardt.cpp
+++ b/unsupported/test/levenberg_marquardt.cpp
@@ -692,7 +692,7 @@
// VERIFY_IS_EQUAL(lm.nfev(), 9);
// VERIFY_IS_EQUAL(lm.njev(), 8);
// check norm^2
- VERIFY(lm.fvec().squaredNorm() <= 1.4307867721E-25);
+ VERIFY(lm.fvec().squaredNorm() <= 1.44E-25);
// check x
VERIFY_IS_APPROX(x[0], 9.5100000027E-02);
VERIFY_IS_APPROX(x[1], 1.0000000001E+00);