Update Eigen to commit:3bb6a48d8c171cf20b5f8e48bfb4e424fbd4f79e

CHANGELOG
=========
3bb6a48d8 - Fix bug atan2
14c847dc0 - Refactor special values test for pow, and add a similar test for atan2
462758e8a - Don'\''t use generic sign function for sign(complex) unless it is vectorizable
c0d6a7261 - Use pnegate(pzero(x)) as a generic way to generate -0.0. Some compiler do not handle the literal -0.0 properly in fastmath mode.
7846c7387 - Eigen/Sparse: fix warnings -Wunused-but-set-variable
316754487 - Handle NaN inputs to atan2.
72db3f0fa - Remove references to M_PI_2 and M_PI_4.
d6bc06259 - Remove reference to EIGEN_HAS_CXX11_MATH.
5ceed0d57 - Guard GCC-specific pragmas with "#ifdef EIGEN_COMP_GNUC"
528b68674 - [clang-format] Add a few macros to AttributeMacros
e95c4a837 - Simpler range reduction strategy for atan<float>().
80efbfded - Unconditionally enable CXX11 math.
e5794873c - Replace assert with eigen_assert.
7d6a9925c - Fix 4x4 inverse when compiling with -Ofast.
1414a76fa - Only vectorize atan<double> for Altivec if VSX is available.
c475228b2 - Vectorize atan() for double.
1e1848fdb - Add a vectorized implementation of atan2 to Eigen.

* Switch TensorFlow to using the new fast atan2 in Eigen.
* Get rid of local implementations since Eigen is now guaranteed to support C++11 math.

PiperOrigin-RevId: 481044760
Change-Id: Ibb2e835abae11d2dac14b811af7b7b3b766a1672
diff --git a/Eigen/src/Core/BandMatrix.h b/Eigen/src/Core/BandMatrix.h
index c2d943c..dcb0d13 100644
--- a/Eigen/src/Core/BandMatrix.h
+++ b/Eigen/src/Core/BandMatrix.h
@@ -273,7 +273,7 @@
         m_rows(rows), m_supers(supers), m_subs(subs)
     {
       EIGEN_UNUSED_VARIABLE(cols);
-      //internal::assert(coeffs.cols()==cols() && (supers()+subs()+1)==coeffs.rows());
+      // eigen_assert(coeffs.cols()==cols() && (supers()+subs()+1)==coeffs.rows());
     }
 
     /** \returns the number of columns */
diff --git a/Eigen/src/Core/GlobalFunctions.h b/Eigen/src/Core/GlobalFunctions.h
index f8d00b1..18792cb 100644
--- a/Eigen/src/Core/GlobalFunctions.h
+++ b/Eigen/src/Core/GlobalFunctions.h
@@ -68,11 +68,9 @@
   EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(sinh,scalar_sinh_op,hyperbolic sine,\sa ArrayBase::sinh)
   EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(cosh,scalar_cosh_op,hyperbolic cosine,\sa ArrayBase::cosh)
   EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(tanh,scalar_tanh_op,hyperbolic tangent,\sa ArrayBase::tanh)
-#if EIGEN_HAS_CXX11_MATH
   EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(asinh,scalar_asinh_op,inverse hyperbolic sine,\sa ArrayBase::asinh)
   EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(acosh,scalar_acosh_op,inverse hyperbolic cosine,\sa ArrayBase::acosh)
   EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(atanh,scalar_atanh_op,inverse hyperbolic tangent,\sa ArrayBase::atanh)
-#endif
   EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(logistic,scalar_logistic_op,logistic function,\sa ArrayBase::logistic)
   EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(lgamma,scalar_lgamma_op,natural logarithm of the gamma function,\sa ArrayBase::lgamma)
   EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(digamma,scalar_digamma_op,derivative of lgamma,\sa ArrayBase::digamma)
@@ -181,6 +179,25 @@
   }
 #endif
 
+  /** \returns an expression of the coefficient-wise atan2(\a x, \a y). \a x and \a y must be of the same type.
+    *
+    * This function computes the coefficient-wise atan2().
+    *
+    * \sa ArrayBase::atan2()
+    *
+    * \relates ArrayBase
+    */
+  template <typename LhsDerived, typename RhsDerived>
+  inline const std::enable_if_t<
+      std::is_same<typename LhsDerived::Scalar, typename RhsDerived::Scalar>::value,
+      Eigen::CwiseBinaryOp<Eigen::internal::scalar_atan2_op<typename LhsDerived::Scalar, typename RhsDerived::Scalar>, const LhsDerived, const RhsDerived>
+      >
+  atan2(const Eigen::ArrayBase<LhsDerived>& x, const Eigen::ArrayBase<RhsDerived>& exponents) {
+    return Eigen::CwiseBinaryOp<Eigen::internal::scalar_atan2_op<typename LhsDerived::Scalar, typename RhsDerived::Scalar>, const LhsDerived, const RhsDerived>(
+      x.derived(),
+      exponents.derived()
+    );
+  }
 
   namespace internal
   {
diff --git a/Eigen/src/Core/MathFunctions.h b/Eigen/src/Core/MathFunctions.h
index af8b029..0eee333 100644
--- a/Eigen/src/Core/MathFunctions.h
+++ b/Eigen/src/Core/MathFunctions.h
@@ -524,54 +524,11 @@
   EIGEN_DEVICE_FUNC
   static inline Scalar run(const Scalar& x)
   {
-#if EIGEN_HAS_CXX11_MATH
     EIGEN_USING_STD(round);
-#endif
     return Scalar(round(x));
   }
 };
 
-#if !EIGEN_HAS_CXX11_MATH
-#if EIGEN_HAS_C99_MATH
-// Use ::roundf for float.
-template<>
-struct round_impl<float> {
-  EIGEN_DEVICE_FUNC
-  static inline float run(const float& x)
-  {
-    return ::roundf(x);
-  }
-};
-#else
-template<typename Scalar>
-struct round_using_floor_ceil_impl
-{
-  EIGEN_STATIC_ASSERT((!NumTraits<Scalar>::IsComplex), NUMERIC_TYPE_MUST_BE_REAL)
-
-  EIGEN_DEVICE_FUNC
-  static inline Scalar run(const Scalar& x)
-  {
-    // Without C99 round/roundf, resort to floor/ceil.
-    EIGEN_USING_STD(floor);
-    EIGEN_USING_STD(ceil);
-    // If not enough precision to resolve a decimal at all, return the input.
-    // Otherwise, adding 0.5 can trigger an increment by 1.
-    const Scalar limit = Scalar(1ull << (NumTraits<Scalar>::digits() - 1));
-    if (x >= limit || x <= -limit) {
-      return x;
-    }
-    return (x > Scalar(0)) ? Scalar(floor(x + Scalar(0.5))) : Scalar(ceil(x - Scalar(0.5)));
-  }
-};
-
-template<>
-struct round_impl<float> : round_using_floor_ceil_impl<float> {};
-
-template<>
-struct round_impl<double> : round_using_floor_ceil_impl<double> {};
-#endif // EIGEN_HAS_C99_MATH
-#endif // !EIGEN_HAS_CXX11_MATH
-
 template<typename Scalar>
 struct round_retval
 {
@@ -589,32 +546,11 @@
   EIGEN_DEVICE_FUNC
   static inline Scalar run(const Scalar& x)
   {
-#if EIGEN_HAS_CXX11_MATH
-      EIGEN_USING_STD(rint);
-#endif
+    EIGEN_USING_STD(rint);
     return rint(x);
   }
 };
 
-#if !EIGEN_HAS_CXX11_MATH
-template<>
-struct rint_impl<double> {
-  EIGEN_DEVICE_FUNC
-  static inline double run(const double& x)
-  {
-    return ::rint(x);
-  }
-};
-template<>
-struct rint_impl<float> {
-  EIGEN_DEVICE_FUNC
-  static inline float run(const float& x)
-  {
-    return ::rintf(x);
-  }
-};
-#endif
-
 template<typename Scalar>
 struct rint_retval
 {
@@ -627,7 +563,7 @@
 
 // Visual Studio 2017 has a bug where arg(float) returns 0 for negative inputs.
 // This seems to be fixed in VS 2019.
-#if EIGEN_HAS_CXX11_MATH && (!EIGEN_COMP_MSVC || EIGEN_COMP_MSVC >= 1920)
+#if (!EIGEN_COMP_MSVC || EIGEN_COMP_MSVC >= 1920)
 // std::arg is only defined for types of std::complex, or integer types or float/double/long double
 template<typename Scalar,
           bool HasStdImpl = NumTraits<Scalar>::IsComplex || is_integral<Scalar>::value
@@ -728,11 +664,7 @@
   EIGEN_DEVICE_FUNC static inline Scalar run(const Scalar& x)
   {
     EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar)
-    #if EIGEN_HAS_CXX11_MATH
     using std::expm1;
-    #else
-    using std_fallback::expm1;
-    #endif
     return expm1(x);
   }
 };
@@ -793,11 +725,7 @@
 
   EIGEN_DEVICE_FUNC static inline Scalar run(const Scalar& x)
   {
-    #if EIGEN_HAS_CXX11_MATH
     using std::log1p;
-    #else
-    using std_fallback::log1p;
-    #endif
     return log1p(x);
   }
 };
@@ -1011,7 +939,7 @@
 // Implementation of is* functions
 
 // std::is* do not work with fast-math and gcc, std::is* are available on MSVC 2013 and newer, as well as in clang.
-#if (EIGEN_HAS_CXX11_MATH && !(EIGEN_COMP_GNUC_STRICT && __FINITE_MATH_ONLY__)) || (EIGEN_COMP_MSVC) || (EIGEN_COMP_CLANG)
+#if (!(EIGEN_COMP_GNUC_STRICT && __FINITE_MATH_ONLY__)) || (EIGEN_COMP_MSVC) || (EIGEN_COMP_CLANG)
 #define EIGEN_USE_STD_FPCLASSIFY 1
 #else
 #define EIGEN_USE_STD_FPCLASSIFY 0
@@ -1721,14 +1649,12 @@
   return acos(x);
 }
 
-#if EIGEN_HAS_CXX11_MATH
 template<typename T>
 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
 T acosh(const T &x) {
   EIGEN_USING_STD(acosh);
   return static_cast<T>(acosh(x));
 }
-#endif
 
 #if defined(SYCL_DEVICE_ONLY)
 SYCL_SPECIALIZE_FLOATING_TYPES_UNARY(acos, acos)
@@ -1750,14 +1676,12 @@
   return asin(x);
 }
 
-#if EIGEN_HAS_CXX11_MATH
 template<typename T>
 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
 T asinh(const T &x) {
   EIGEN_USING_STD(asinh);
   return static_cast<T>(asinh(x));
 }
-#endif
 
 #if defined(SYCL_DEVICE_ONLY)
 SYCL_SPECIALIZE_FLOATING_TYPES_UNARY(asin, asin)
@@ -1779,14 +1703,12 @@
   return static_cast<T>(atan(x));
 }
 
-#if EIGEN_HAS_CXX11_MATH
 template<typename T>
 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
 T atanh(const T &x) {
   EIGEN_USING_STD(atanh);
   return static_cast<T>(atanh(x));
 }
-#endif
 
 #if defined(SYCL_DEVICE_ONLY)
 SYCL_SPECIALIZE_FLOATING_TYPES_UNARY(atan, atan)
diff --git a/Eigen/src/Core/MatrixBase.h b/Eigen/src/Core/MatrixBase.h
index 85132cc..d87c6b3 100644
--- a/Eigen/src/Core/MatrixBase.h
+++ b/Eigen/src/Core/MatrixBase.h
@@ -482,11 +482,9 @@
     const MatrixFunctionReturnValue<Derived> matrixFunction(StemFunction f) const;
     EIGEN_MATRIX_FUNCTION(MatrixFunctionReturnValue, cosh, hyperbolic cosine)
     EIGEN_MATRIX_FUNCTION(MatrixFunctionReturnValue, sinh, hyperbolic sine)
-#if EIGEN_HAS_CXX11_MATH
     EIGEN_MATRIX_FUNCTION(MatrixFunctionReturnValue, atanh, inverse hyperbolic cosine)
     EIGEN_MATRIX_FUNCTION(MatrixFunctionReturnValue, acosh, inverse hyperbolic cosine)
     EIGEN_MATRIX_FUNCTION(MatrixFunctionReturnValue, asinh, inverse hyperbolic sine)
-#endif
     EIGEN_MATRIX_FUNCTION(MatrixFunctionReturnValue, cos, cosine)
     EIGEN_MATRIX_FUNCTION(MatrixFunctionReturnValue, sin, sine)
     EIGEN_MATRIX_FUNCTION(MatrixSquareRootReturnValue, sqrt, square root)
diff --git a/Eigen/src/Core/arch/AVX/MathFunctions.h b/Eigen/src/Core/arch/AVX/MathFunctions.h
index 43aea54..cb7d7b8 100644
--- a/Eigen/src/Core/arch/AVX/MathFunctions.h
+++ b/Eigen/src/Core/arch/AVX/MathFunctions.h
@@ -51,6 +51,12 @@
 }
 
 template <>
+EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet4d
+patan<Packet4d>(const Packet4d& _x) {
+  return patan_double(_x);
+}
+
+template <>
 EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet8f
 plog<Packet8f>(const Packet8f& _x) {
   return plog_float(_x);
diff --git a/Eigen/src/Core/arch/AVX/PacketMath.h b/Eigen/src/Core/arch/AVX/PacketMath.h
index 227e88a..ecbb73c 100644
--- a/Eigen/src/Core/arch/AVX/PacketMath.h
+++ b/Eigen/src/Core/arch/AVX/PacketMath.h
@@ -109,6 +109,7 @@
     HasExp  = 1,
     HasSqrt = 1,
     HasRsqrt = 1,
+    HasATan = 1,
     HasBlend = 1,
     HasRound = 1,
     HasFloor = 1,
diff --git a/Eigen/src/Core/arch/AVX512/MathFunctions.h b/Eigen/src/Core/arch/AVX512/MathFunctions.h
index 79a9043..af47a85 100644
--- a/Eigen/src/Core/arch/AVX512/MathFunctions.h
+++ b/Eigen/src/Core/arch/AVX512/MathFunctions.h
@@ -276,6 +276,12 @@
 }
 
 template <>
+EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet8d
+patan<Packet8d>(const Packet8d& _x) {
+  return patan_double(_x);
+}
+
+template <>
 EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet16f
 ptanh<Packet16f>(const Packet16f& _x) {
   return internal::generic_fast_tanh_float(_x);
diff --git a/Eigen/src/Core/arch/AVX512/PacketMath.h b/Eigen/src/Core/arch/AVX512/PacketMath.h
index 5e9670c..5f37740 100644
--- a/Eigen/src/Core/arch/AVX512/PacketMath.h
+++ b/Eigen/src/Core/arch/AVX512/PacketMath.h
@@ -161,6 +161,7 @@
     HasSqrt = EIGEN_FAST_MATH,
     HasRsqrt = EIGEN_FAST_MATH,
 #endif
+    HasATan = 1,
     HasCmp  = 1,
     HasDiv = 1,
     HasRound = 1,
@@ -1835,7 +1836,7 @@
 EIGEN_STRONG_INLINE Packet16f pblend(const Selector<16>& /*ifPacket*/,
                                      const Packet16f& /*thenPacket*/,
                                      const Packet16f& /*elsePacket*/) {
-  assert(false && "To be implemented");
+  eigen_assert(false && "To be implemented");
   return Packet16f();
 }
 template <>
diff --git a/Eigen/src/Core/arch/AltiVec/MathFunctions.h b/Eigen/src/Core/arch/AltiVec/MathFunctions.h
index fffd2e5..6f48d98 100644
--- a/Eigen/src/Core/arch/AltiVec/MathFunctions.h
+++ b/Eigen/src/Core/arch/AltiVec/MathFunctions.h
@@ -73,6 +73,12 @@
 {
   return  vec_rsqrt(x);
 }
+
+template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
+Packet2d patan<Packet2d>(const Packet2d& _x)
+{
+  return patan_double(_x);
+}
 #endif
 
 template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
diff --git a/Eigen/src/Core/arch/AltiVec/PacketMath.h b/Eigen/src/Core/arch/AltiVec/PacketMath.h
index f030189..37398de 100644
--- a/Eigen/src/Core/arch/AltiVec/PacketMath.h
+++ b/Eigen/src/Core/arch/AltiVec/PacketMath.h
@@ -2708,6 +2708,7 @@
     HasAbs  = 1,
     HasSin  = 0,
     HasCos  = 0,
+    HasATan = 1,
     HasLog  = 0,
     HasExp  = 1,
     HasSqrt = 1,
diff --git a/Eigen/src/Core/arch/Default/BFloat16.h b/Eigen/src/Core/arch/Default/BFloat16.h
index c444b8a..d2137d4 100644
--- a/Eigen/src/Core/arch/Default/BFloat16.h
+++ b/Eigen/src/Core/arch/Default/BFloat16.h
@@ -626,6 +626,9 @@
 EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 pow(const bfloat16& a, const bfloat16& b) {
   return bfloat16(::powf(float(a), float(b)));
 }
+EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 atan2(const bfloat16& a, const bfloat16& b) {
+  return bfloat16(::atan2f(float(a), float(b)));
+}
 EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 sin(const bfloat16& a) {
   return bfloat16(::sinf(float(a)));
 }
@@ -653,7 +656,6 @@
 EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 tanh(const bfloat16& a) {
   return bfloat16(::tanhf(float(a)));
 }
-#if EIGEN_HAS_CXX11_MATH
 EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 asinh(const bfloat16& a) {
   return bfloat16(::asinhf(float(a)));
 }
@@ -663,7 +665,6 @@
 EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 atanh(const bfloat16& a) {
   return bfloat16(::atanhf(float(a)));
 }
-#endif
 EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 floor(const bfloat16& a) {
   return bfloat16(::floorf(float(a)));
 }
diff --git a/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h b/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h
index a8837b2..3060214 100644
--- a/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h
+++ b/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h
@@ -815,6 +815,37 @@
   return pselect(invalid_mask, pset1<Packet>(std::numeric_limits<float>::quiet_NaN()), p);
 }
 
+// Computes elementwise atan(x) for x in [-1:1] with 2 ulp accuracy.
+template<typename Packet>
+EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
+Packet patan_reduced_float(const Packet& x) {
+  const Packet q0 = pset1<Packet>(-0.3333314359188079833984375f);
+  const Packet q2 = pset1<Packet>(0.19993579387664794921875f);
+  const Packet q4 = pset1<Packet>(-0.14209578931331634521484375f);
+  const Packet q6 = pset1<Packet>(0.1066047251224517822265625f);
+  const Packet q8 = pset1<Packet>(-7.5408883392810821533203125e-2f);
+  const Packet q10 = pset1<Packet>(4.3082617223262786865234375e-2f);
+  const Packet q12 = pset1<Packet>(-1.62907354533672332763671875e-2f);
+  const Packet q14 = pset1<Packet>(2.90188402868807315826416015625e-3f);
+
+  // Approximate atan(x) by a polynomial of the form
+  //   P(x) = x + x^3 * Q(x^2),
+  // where Q(x^2) is a 7th order polynomial in x^2.
+  // We evaluate even and odd terms in x^2 in parallel
+  // to take advantage of instruction level parallelism
+  // and hardware with multiple FMA units.
+  const Packet x2 = pmul(x, x);
+  const Packet x4 = pmul(x2, x2);
+  Packet q_odd = pmadd(q14, x4, q10);
+  Packet q_even = pmadd(q12, x4, q8);
+  q_odd = pmadd(q_odd, x4, q6);
+  q_even = pmadd(q_even, x4, q4);
+  q_odd = pmadd(q_odd, x4, q2);
+  q_even = pmadd(q_even, x4, q0);
+  const Packet q = pmadd(q_odd, x2, q_even);
+  return pmadd(q, pmul(x, x2), x);
+}
+
 template<typename Packet>
 EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
 Packet patan_float(const Packet& x_in) {
@@ -823,15 +854,79 @@
 
   const Packet cst_one = pset1<Packet>(1.0f);
   constexpr float kPiOverTwo = static_cast<float>(EIGEN_PI/2);
+
+  //   "Large": For |x| > 1, use atan(1/x) = sign(x)*pi/2 - atan(x).
+  //   "Small": For |x| <= 1, approximate atan(x) directly by a polynomial
+  //            calculated using Sollya.
+  const Packet neg_mask = pcmp_lt(x_in, pzero(x_in));
+  const Packet large_mask = pcmp_lt(cst_one, pabs(x_in));
+  const Packet large_shift = pselect(neg_mask, pset1<Packet>(-kPiOverTwo), pset1<Packet>(kPiOverTwo));
+  const Packet x = pselect(large_mask, preciprocal(x_in), x_in);
+  const Packet p = patan_reduced_float(x);
+  
+  // Apply transformations according to the range reduction masks.
+  return pselect(large_mask, psub(large_shift, p), p);
+}
+
+// Computes elementwise atan(x) for x in [-tan(pi/8):tan(pi/8)]
+// with 2 ulp accuracy.
+template <typename Packet>
+EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet
+patan_reduced_double(const Packet& x) {
+  const Packet q0 =
+      pset1<Packet>(-0.33333333333330028569463365784031338989734649658203);
+  const Packet q2 =
+      pset1<Packet>(0.199999999990664090177006073645316064357757568359375);
+  const Packet q4 =
+      pset1<Packet>(-0.142857141937123677255527809393242932856082916259766);
+  const Packet q6 =
+      pset1<Packet>(0.111111065991039953404495577160560060292482376098633);
+  const Packet q8 =
+      pset1<Packet>(-9.0907812986129224452902519715280504897236824035645e-2);
+  const Packet q10 =
+      pset1<Packet>(7.6900542950704739442180368769186316058039665222168e-2);
+  const Packet q12 =
+      pset1<Packet>(-6.6410112986494976294871150912513257935643196105957e-2);
+  const Packet q14 =
+      pset1<Packet>(5.6920144995467943094258345126945641823112964630127e-2);
+  const Packet q16 =
+      pset1<Packet>(-4.3577020814990513608577771265117917209863662719727e-2);
+  const Packet q18 =
+      pset1<Packet>(2.1244050233624342527427586446719942614436149597168e-2);
+
+  // Approximate atan(x) on [0:tan(pi/8)] by a polynomial of the form
+  //   P(x) = x + x^3 * Q(x^2),
+  // where Q(x^2) is a 9th order polynomial in x^2.
+  // We evaluate even and odd terms in x^2 in parallel
+  // to take advantage of instruction level parallelism
+  // and hardware with multiple FMA units.
+  const Packet x2 = pmul(x, x);
+  const Packet x4 = pmul(x2, x2);
+  Packet q_odd = pmadd(q18, x4, q14);
+  Packet q_even = pmadd(q16, x4, q12);
+  q_odd = pmadd(q_odd, x4, q10);
+  q_even = pmadd(q_even, x4, q8);
+  q_odd = pmadd(q_odd, x4, q6);
+  q_even = pmadd(q_even, x4, q4);
+  q_odd = pmadd(q_odd, x4, q2);
+  q_even = pmadd(q_even, x4, q0);
+  const Packet p = pmadd(q_odd, x2, q_even);
+  return pmadd(p, pmul(x, x2), x);
+}
+
+template<typename Packet>
+EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
+Packet patan_double(const Packet& x_in) {
+  typedef typename unpacket_traits<Packet>::type Scalar;
+  static_assert(std::is_same<Scalar, double>::value, "Scalar type must be double");
+
+  const Packet cst_one = pset1<Packet>(1.0);
+  constexpr double kPiOverTwo = static_cast<double>(EIGEN_PI / 2);
   const Packet cst_pi_over_two = pset1<Packet>(kPiOverTwo);
-  constexpr float kPiOverFour = static_cast<float>(EIGEN_PI/4);
+  constexpr double kPiOverFour = static_cast<double>(EIGEN_PI / 4);
   const Packet cst_pi_over_four = pset1<Packet>(kPiOverFour);
-  const Packet cst_large = pset1<Packet>(2.4142135623730950488016887f);  // tan(3*pi/8);
-  const Packet cst_medium = pset1<Packet>(0.4142135623730950488016887f);  // tan(pi/8);
-  const Packet q0 = pset1<Packet>(-0.333329379558563232421875f);
-  const Packet q2 = pset1<Packet>(0.19977366924285888671875f);
-  const Packet q4 = pset1<Packet>(-0.13874518871307373046875f);
-  const Packet q6 = pset1<Packet>(8.044691383838653564453125e-2f);
+  const Packet cst_large = pset1<Packet>(2.4142135623730950488016887);  // tan(3*pi/8);
+  const Packet cst_medium = pset1<Packet>(0.4142135623730950488016887);  // tan(pi/8);
 
   const Packet neg_mask = pcmp_lt(x_in, pzero(x_in));
   Packet x = pabs(x_in);
@@ -841,22 +936,16 @@
   //   "Large": For x >= tan(3*pi/8), use atan(1/x) = pi/2 - atan(x).
   //   "Medium": For x in [tan(pi/8) : tan(3*pi/8)),
   //             use atan(x) = pi/4 + atan((x-1)/(x+1)).
-  //   "Small": For x < pi/8, approximate atan(x) directly by a polynomial
+  //   "Small": For x < tan(pi/8), approximate atan(x) directly by a polynomial
   //            calculated using Sollya.
   const Packet large_mask = pcmp_lt(cst_large, x);
   x = pselect(large_mask, preciprocal(x), x);
   const Packet medium_mask = pandnot(pcmp_lt(cst_medium, x), large_mask);
   x = pselect(medium_mask, pdiv(psub(x, cst_one), padd(x, cst_one)), x);
 
-  // Approximate atan(x) on [0:tan(pi/8)] by a polynomial of the form
-  //   P(x) = x + x^3 * Q(x^2),
-  // where Q(x^2) is a cubic polynomial in x^2.
-  const Packet x2 = pmul(x, x);
-  const Packet x4 = pmul(x2, x2);
-  Packet q_odd = pmadd(q6, x4, q2);
-  Packet q_even = pmadd(q4, x4, q0);
-  const Packet q = pmadd(q_odd, x2, q_even);
-  Packet p = pmadd(q, pmul(x, x2), x);
+  // Compute approximation of p ~= atan(x') where x' is the argument reduced to
+  // [0:tan(pi/8)].
+  Packet p = patan_reduced_double(x);
 
   // Apply transformations according to the range reduction masks.
   p = pselect(large_mask, psub(cst_pi_over_two, p), p);
@@ -958,8 +1047,8 @@
 
   // Step 4. Compute solution for inputs with negative real part:
   //         [|eta0|, sign(y0)*rho0, |eta1|, sign(y1)*rho1]
-  const RealScalar neg_zero = RealScalar(numext::bit_cast<float>(0x80000000u));
-  const RealPacket cst_imag_sign_mask = pset1<Packet>(Scalar(RealScalar(0.0), neg_zero)).v;
+  const RealPacket cst_imag_sign_mask =
+      pset1<Packet>(Scalar(RealScalar(0.0), RealScalar(-0.0))).v;
   RealPacket imag_signs = pand(a.v, cst_imag_sign_mask);
   Packet negative_real_result;
   // Notice that rho is positive, so taking it's absolute value is a noop.
@@ -1057,7 +1146,8 @@
 
 // \internal \returns the the sign of a complex number z, defined as z / abs(z).
 template <typename Packet>
-struct psign_impl<Packet, std::enable_if_t<NumTraits<typename unpacket_traits<Packet>::type>::IsComplex>> {
+struct psign_impl<Packet, std::enable_if_t<NumTraits<typename unpacket_traits<Packet>::type>::IsComplex &&
+                                           unpacket_traits<Packet>::vectorizable>> {
   static EIGEN_DEVICE_FUNC inline Packet run(const Packet& a) {
     typedef typename unpacket_traits<Packet>::type Scalar;
     typedef typename Scalar::value_type RealScalar;
diff --git a/Eigen/src/Core/arch/Default/GenericPacketMathFunctionsFwd.h b/Eigen/src/Core/arch/Default/GenericPacketMathFunctionsFwd.h
index de6fd95..179c55c 100644
--- a/Eigen/src/Core/arch/Default/GenericPacketMathFunctionsFwd.h
+++ b/Eigen/src/Core/arch/Default/GenericPacketMathFunctionsFwd.h
@@ -104,6 +104,11 @@
 EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
 Packet patan_float(const Packet& x);
 
+/** \internal \returns atan(x) for double precision float */
+template<typename Packet>
+EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
+Packet patan_double(const Packet& x);
+
 /** \internal \returns sqrt(x) for complex types */
 template<typename Packet>
 EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
diff --git a/Eigen/src/Core/arch/Default/Half.h b/Eigen/src/Core/arch/Default/Half.h
index d58b6a3..75d6228 100644
--- a/Eigen/src/Core/arch/Default/Half.h
+++ b/Eigen/src/Core/arch/Default/Half.h
@@ -758,6 +758,9 @@
 EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half pow(const half& a, const half& b) {
   return half(::powf(float(a), float(b)));
 }
+EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half atan2(const half& a, const half& b) {
+  return half(::atan2f(float(a), float(b)));
+}
 EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half sin(const half& a) {
   return half(::sinf(float(a)));
 }
diff --git a/Eigen/src/Core/arch/NEON/MathFunctions.h b/Eigen/src/Core/arch/NEON/MathFunctions.h
index 445572f..aea5149 100644
--- a/Eigen/src/Core/arch/NEON/MathFunctions.h
+++ b/Eigen/src/Core/arch/NEON/MathFunctions.h
@@ -102,6 +102,9 @@
 template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet2d plog<Packet2d>(const Packet2d& x)
 { return plog_double(x); }
 
+template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet2d patan<Packet2d>(const Packet2d& x)
+{ return patan_double(x); }
+
 #endif
 
 } // end namespace internal
diff --git a/Eigen/src/Core/arch/NEON/PacketMath.h b/Eigen/src/Core/arch/NEON/PacketMath.h
index 9ef83d4..5cbf4ac 100644
--- a/Eigen/src/Core/arch/NEON/PacketMath.h
+++ b/Eigen/src/Core/arch/NEON/PacketMath.h
@@ -3762,10 +3762,13 @@
     HasCeil = 1,
     HasRint = 1,
 
+#if EIGEN_ARCH_ARM64 && !EIGEN_APPLE_DOUBLE_NEON_BUG
+    HasExp  = 1,
+    HasLog  = 1,
+    HasATan = 1,
+#endif
     HasSin  = 0,
     HasCos  = 0,
-    HasLog  = 1,
-    HasExp  = 1,
     HasSqrt = 1,
     HasRsqrt = 1,
     HasTanh = 0,
diff --git a/Eigen/src/Core/arch/SSE/MathFunctions.h b/Eigen/src/Core/arch/SSE/MathFunctions.h
index 8e8a0a4..f98fb7a 100644
--- a/Eigen/src/Core/arch/SSE/MathFunctions.h
+++ b/Eigen/src/Core/arch/SSE/MathFunctions.h
@@ -81,6 +81,11 @@
   return pacos_float(_x);
 }
 
+template <> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
+Packet2d patan<Packet2d>(const Packet2d& _x) {
+  return patan_double(_x);
+}
+
 template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
 Packet4f pasin<Packet4f>(const Packet4f& _x)
 {
diff --git a/Eigen/src/Core/arch/SSE/PacketMath.h b/Eigen/src/Core/arch/SSE/PacketMath.h
index f942668..847ff07 100644
--- a/Eigen/src/Core/arch/SSE/PacketMath.h
+++ b/Eigen/src/Core/arch/SSE/PacketMath.h
@@ -178,6 +178,7 @@
     HasExp  = 1,
     HasSqrt = 1,
     HasRsqrt = 1,
+    HasATan = 1,
     HasBlend = 1,
     HasFloor = 1,
     HasCeil = 1,
diff --git a/Eigen/src/Core/functors/BinaryFunctors.h b/Eigen/src/Core/functors/BinaryFunctors.h
index 9b560e9..eaa4352 100644
--- a/Eigen/src/Core/functors/BinaryFunctors.h
+++ b/Eigen/src/Core/functors/BinaryFunctors.h
@@ -509,6 +509,73 @@
 };
 
 
+template <typename LhsScalar, typename RhsScalar>
+struct scalar_atan2_op {
+  using Scalar = LhsScalar;
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::enable_if_t<is_same<LhsScalar,RhsScalar>::value, Scalar>
+  operator()(const Scalar& y, const Scalar& x) const {
+    EIGEN_USING_STD(atan2);
+    return static_cast<Scalar>(atan2(y, x));
+  }
+  template <typename Packet>
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+      std::enable_if_t<is_same<LhsScalar, RhsScalar>::value, Packet>
+      packetOp(const Packet& y, const Packet& x) const {
+    // See https://en.cppreference.com/w/cpp/numeric/math/atan2
+    // for how corner cases are supposed to be handled according to the
+    // IEEE floating-point standard (IEC 60559).
+    const Packet kSignMask = pset1<Packet>(-Scalar(0));
+    const Packet kPi = pset1<Packet>(Scalar(EIGEN_PI));
+    const Packet kPiO2 = pset1<Packet>(Scalar(EIGEN_PI / 2));
+    const Packet kPiO4 = pset1<Packet>(Scalar(EIGEN_PI / 4));
+    const Packet k3PiO4 = pset1<Packet>(Scalar(3.0 * (EIGEN_PI / 4)));
+
+    // Various predicates about the inputs.
+    Packet x_signbit = pand(x, kSignMask);
+    Packet x_has_signbit = pcmp_lt(por(x_signbit, kPi), pzero(x));
+    Packet x_is_zero = pcmp_eq(x, pzero(x));
+    Packet x_neg = pandnot(x_has_signbit, x_is_zero);
+
+    Packet y_signbit = pand(y, kSignMask);
+    Packet y_is_zero = pcmp_eq(y, pzero(y));
+    Packet x_is_not_nan = pcmp_eq(x, x);
+    Packet y_is_not_nan = pcmp_eq(y, y);
+
+    // Compute the normal case. Notice that we expect that
+    // finite/infinite = +/-0 here.
+    Packet result = patan(pdiv(y, x));
+
+    // Compute shift for when x != 0 and y != 0.
+    Packet shift = pselect(x_neg, por(kPi, y_signbit), pzero(x));
+
+    // Special cases:
+    //   Handle  x = +/-inf && y = +/-inf.
+    Packet is_not_nan = pcmp_eq(result, result);
+    result =
+        pselect(is_not_nan, padd(shift, result),
+                pselect(x_neg, por(k3PiO4, y_signbit), por(kPiO4, y_signbit)));
+    //   Handle x == +/-0.
+    result = pselect(
+        x_is_zero, pselect(y_is_zero, pzero(y), por(y_signbit, kPiO2)), result);
+    //   Handle y == +/-0.
+    result = pselect(
+        y_is_zero,
+        pselect(x_has_signbit, por(y_signbit, kPi), por(y_signbit, pzero(y))),
+        result);
+    // Handle NaN inputs.
+    Packet kQNaN = pset1<Packet>(NumTraits<Scalar>::quiet_NaN());
+    return pselect(pand(x_is_not_nan, y_is_not_nan), result, kQNaN);
+  }
+};
+
+template<typename LhsScalar,typename RhsScalar>
+    struct functor_traits<scalar_atan2_op<LhsScalar, RhsScalar>> {
+  enum {
+    PacketAccess = is_same<LhsScalar,RhsScalar>::value && packet_traits<LhsScalar>::HasATan && packet_traits<LhsScalar>::HasDiv && !NumTraits<LhsScalar>::IsInteger && !NumTraits<LhsScalar>::IsComplex,
+    Cost =
+        scalar_div_cost<LhsScalar, PacketAccess>::value + 5 * NumTraits<LhsScalar>::MulCost + 5 * NumTraits<LhsScalar>::AddCost
+  };
+};
 
 //---------- binary functors bound to a constant, thus appearing as a unary functor ----------
 
diff --git a/Eigen/src/Core/functors/UnaryFunctors.h b/Eigen/src/Core/functors/UnaryFunctors.h
index 8a84415..3485369 100644
--- a/Eigen/src/Core/functors/UnaryFunctors.h
+++ b/Eigen/src/Core/functors/UnaryFunctors.h
@@ -593,7 +593,6 @@
   };
 };
 
-#if EIGEN_HAS_CXX11_MATH
 /** \internal
   * \brief Template functor to compute the atanh of a scalar
   * \sa class CwiseUnaryOp, ArrayBase::atanh()
@@ -607,7 +606,6 @@
 struct functor_traits<scalar_atanh_op<Scalar> > {
   enum { Cost = 5 * NumTraits<Scalar>::MulCost, PacketAccess = false };
 };
-#endif
 
 /** \internal
   * \brief Template functor to compute the sinh of a scalar
@@ -627,7 +625,6 @@
   };
 };
 
-#if EIGEN_HAS_CXX11_MATH
 /** \internal
   * \brief Template functor to compute the asinh of a scalar
   * \sa class CwiseUnaryOp, ArrayBase::asinh()
@@ -641,7 +638,6 @@
 struct functor_traits<scalar_asinh_op<Scalar> > {
   enum { Cost = 5 * NumTraits<Scalar>::MulCost, PacketAccess = false };
 };
-#endif
 
 /** \internal
   * \brief Template functor to compute the cosh of a scalar
@@ -661,7 +657,6 @@
   };
 };
 
-#if EIGEN_HAS_CXX11_MATH
 /** \internal
   * \brief Template functor to compute the acosh of a scalar
   * \sa class CwiseUnaryOp, ArrayBase::acosh()
@@ -675,7 +670,6 @@
 struct functor_traits<scalar_acosh_op<Scalar> > {
   enum { Cost = 5 * NumTraits<Scalar>::MulCost, PacketAccess = false };
 };
-#endif
 
 /** \internal
   * \brief Template functor to compute the inverse of a scalar
@@ -936,7 +930,7 @@
         NumTraits<Scalar>::IsComplex
         ? ( 8*NumTraits<Scalar>::MulCost  ) // roughly
         : ( 3*NumTraits<Scalar>::AddCost),
-    PacketAccess = packet_traits<Scalar>::HasSign
+    PacketAccess = packet_traits<Scalar>::HasSign && packet_traits<Scalar>::Vectorizable
   };
 };
 
diff --git a/Eigen/src/Core/util/Macros.h b/Eigen/src/Core/util/Macros.h
index 570d033..d8a67a4 100644
--- a/Eigen/src/Core/util/Macros.h
+++ b/Eigen/src/Core/util/Macros.h
@@ -713,16 +713,6 @@
 
 #define EIGEN_CONSTEXPR constexpr
 
-// Does the compiler support C++11 math?
-// Let's be conservative and enable the default C++11 implementation only if we are sure it exists
-#ifndef EIGEN_HAS_CXX11_MATH
-  #if (EIGEN_ARCH_i386_OR_x86_64 && (EIGEN_OS_GNULINUX || EIGEN_OS_WIN_STRICT || EIGEN_OS_MAC))
-    #define EIGEN_HAS_CXX11_MATH 1
-  #else
-    #define EIGEN_HAS_CXX11_MATH 0
-  #endif
-#endif
-
 // NOTE: the required Apple's clang version is very conservative
 //       and it could be that XCode 9 works just fine.
 // NOTE: the MSVC version is based on https://en.cppreference.com/w/cpp/compiler_support
diff --git a/Eigen/src/LU/arch/InverseSize4.h b/Eigen/src/LU/arch/InverseSize4.h
index 225a2fa..2733fbe 100644
--- a/Eigen/src/LU/arch/InverseSize4.h
+++ b/Eigen/src/LU/arch/InverseSize4.h
@@ -37,6 +37,13 @@
 
 #include "../InternalHeaderCheck.h"
 
+#if !EIGEN_COMP_LLVM
+// These routines requires bit manipulation of the sign, which is not compatible
+// with fastmath.
+#pragma GCC push_options
+#pragma GCC optimize ("no-fast-math")
+#endif
+
 namespace Eigen
 {
 namespace internal
@@ -145,8 +152,8 @@
     iC = psub(iC, pmul(vec4f_swizzle2(A, A, 1, 0, 3, 2), vec4f_swizzle2(DC, DC, 2, 1, 2, 1)));
     iC = psub(pmul(B, vec4f_duplane(dC, 0)), iC);
 
-    const float sign_mask[4] = {0.0f, numext::bit_cast<float>(0x80000000u), numext::bit_cast<float>(0x80000000u), 0.0f};
-    const Packet4f p4f_sign_PNNP = ploadu<Packet4f>(sign_mask);
+    EIGEN_ALIGN_MAX const float sign_mask[4] = {0.0f, -0.0f, -0.0f, 0.0f};
+    const Packet4f p4f_sign_PNNP = pload<Packet4f>(sign_mask);
     rd = pxor(rd, p4f_sign_PNNP);
     iA = pmul(iA, rd);
     iB = pmul(iB, rd);
@@ -328,10 +335,10 @@
     iC1 = psub(pmul(B1, dC), iC1);
     iC2 = psub(pmul(B2, dC), iC2);
 
-    const double sign_mask1[2] = {0.0, numext::bit_cast<double>(0x8000000000000000ull)};
-    const double sign_mask2[2] = {numext::bit_cast<double>(0x8000000000000000ull), 0.0};
-    const Packet2d sign_PN = ploadu<Packet2d>(sign_mask1);
-    const Packet2d sign_NP = ploadu<Packet2d>(sign_mask2);
+    EIGEN_ALIGN_MAX const double sign_mask1[2] = {0.0, -0.0};
+    EIGEN_ALIGN_MAX const double sign_mask2[2] = {-0.0, 0.0};
+    const Packet2d sign_PN = pload<Packet2d>(sign_mask1);
+    const Packet2d sign_NP = pload<Packet2d>(sign_mask2);
     d1 = pxor(rd, sign_PN);
     d2 = pxor(rd, sign_NP);
 
@@ -350,4 +357,9 @@
 #endif
 } // namespace internal
 } // namespace Eigen
+
+#if !EIGEN_COMP_LLVM
+#pragma GCC pop_options
+#endif
+
 #endif
diff --git a/Eigen/src/SVD/BDCSVD.h b/Eigen/src/SVD/BDCSVD.h
index 0949442..1b10c0e 100644
--- a/Eigen/src/SVD/BDCSVD.h
+++ b/Eigen/src/SVD/BDCSVD.h
@@ -594,9 +594,9 @@
   }
 
 #ifdef EIGEN_BDCSVD_SANITY_CHECKS
-  assert(m_naiveU.allFinite());
-  assert(m_naiveV.allFinite());
-  assert(m_computed.allFinite());
+  eigen_internal_assert(m_naiveU.allFinite());
+  eigen_internal_assert(m_naiveV.allFinite());
+  eigen_internal_assert(m_computed.allFinite());
 #endif
 
   if (m_compU)
@@ -633,9 +633,9 @@
   }
 
 #ifdef EIGEN_BDCSVD_SANITY_CHECKS
-  assert(m_naiveU.allFinite());
-  assert(m_naiveV.allFinite());
-  assert(m_computed.allFinite());
+  eigen_internal_assert(m_naiveU.allFinite());
+  eigen_internal_assert(m_naiveV.allFinite());
+  eigen_internal_assert(m_computed.allFinite());
 #endif
 
   m_computed(firstCol + shift, firstCol + shift) = r0;
@@ -654,9 +654,9 @@
   std::cout << "err:      " << ((tmp1-tmp2).abs()>1e-12*tmp2.abs()).transpose() << "\n";
   static int count = 0;
   std::cout << "# " << ++count << "\n\n";
-  assert((tmp1-tmp2).matrix().norm() < 1e-14*tmp2.matrix().norm());
-//   assert(count<681);
-//   assert(((tmp1-tmp2).abs()<1e-13*tmp2.abs()).all());
+  eigen_internal_assert((tmp1-tmp2).matrix().norm() < 1e-14*tmp2.matrix().norm());
+//   eigen_internal_assert(count<681);
+//   eigen_internal_assert(((tmp1-tmp2).abs()<1e-13*tmp2.abs()).all());
 #endif
 
   // Third part: compute SVD of combined matrix
@@ -665,8 +665,8 @@
   computeSVDofM(firstCol + shift, n, UofSVD, singVals, VofSVD);
 
 #ifdef EIGEN_BDCSVD_SANITY_CHECKS
-  assert(UofSVD.allFinite());
-  assert(VofSVD.allFinite());
+  eigen_internal_assert(UofSVD.allFinite());
+  eigen_internal_assert(VofSVD.allFinite());
 #endif
 
   if (m_compU)
@@ -681,9 +681,9 @@
   if (m_compV)  structured_update(m_naiveV.block(firstRowW, firstColW, n, n), VofSVD, (n+1)/2);
 
 #ifdef EIGEN_BDCSVD_SANITY_CHECKS
-  assert(m_naiveU.allFinite());
-  assert(m_naiveV.allFinite());
-  assert(m_computed.allFinite());
+  eigen_internal_assert(m_naiveU.allFinite());
+  eigen_internal_assert(m_naiveV.allFinite());
+  eigen_internal_assert(m_computed.allFinite());
 #endif
 
   m_computed.block(firstCol + shift, firstCol + shift, n, n).setZero();
@@ -754,16 +754,16 @@
   {
     std::cout << "\n\n    mus:    " << mus.head(actual_n).transpose() << "\n\n";
     std::cout << "    check1 (expect0) : " << ((singVals.array()-(shifts+mus)) / singVals.array()).head(actual_n).transpose() << "\n\n";
-    assert((((singVals.array()-(shifts+mus)) / singVals.array()).head(actual_n) >= 0).all());
+    eigen_internal_assert((((singVals.array()-(shifts+mus)) / singVals.array()).head(actual_n) >= 0).all());
     std::cout << "    check2 (>0)      : " << ((singVals.array()-diag) / singVals.array()).head(actual_n).transpose() << "\n\n";
-    assert((((singVals.array()-diag) / singVals.array()).head(actual_n) >= 0).all());
+    eigen_internal_assert((((singVals.array()-diag) / singVals.array()).head(actual_n) >= 0).all());
   }
 #endif
 
 #ifdef EIGEN_BDCSVD_SANITY_CHECKS
-  assert(singVals.allFinite());
-  assert(mus.allFinite());
-  assert(shifts.allFinite());
+  eigen_internal_assert(singVals.allFinite());
+  eigen_internal_assert(mus.allFinite());
+  eigen_internal_assert(shifts.allFinite());
 #endif
 
   // Compute zhat
@@ -773,7 +773,7 @@
 #endif
 
 #ifdef EIGEN_BDCSVD_SANITY_CHECKS
-  assert(zhat.allFinite());
+  eigen_internal_assert(zhat.allFinite());
 #endif
 
   computeSingVecs(zhat, diag, perm, singVals, shifts, mus, U, V);
@@ -784,13 +784,13 @@
 #endif
 
 #ifdef EIGEN_BDCSVD_SANITY_CHECKS
-  assert(m_naiveU.allFinite());
-  assert(m_naiveV.allFinite());
-  assert(m_computed.allFinite());
-  assert(U.allFinite());
-  assert(V.allFinite());
-//   assert((U.transpose() * U - MatrixXr(MatrixXr::Identity(U.cols(),U.cols()))).norm() < 100*NumTraits<RealScalar>::epsilon() * n);
-//   assert((V.transpose() * V - MatrixXr(MatrixXr::Identity(V.cols(),V.cols()))).norm() < 100*NumTraits<RealScalar>::epsilon() * n);
+  eigen_internal_assert(m_naiveU.allFinite());
+  eigen_internal_assert(m_naiveV.allFinite());
+  eigen_internal_assert(m_computed.allFinite());
+  eigen_internal_assert(U.allFinite());
+  eigen_internal_assert(V.allFinite());
+//   eigen_internal_assert((U.transpose() * U - MatrixXr(MatrixXr::Identity(U.cols(),U.cols()))).norm() < 100*NumTraits<RealScalar>::epsilon() * n);
+//   eigen_internal_assert((V.transpose() * V - MatrixXr(MatrixXr::Identity(V.cols(),V.cols()))).norm() < 100*NumTraits<RealScalar>::epsilon() * n);
 #endif
 
   // Because of deflation, the singular values might not be completely sorted.
@@ -811,7 +811,7 @@
     bool singular_values_sorted = (((singVals.segment(1,actual_n-1)-singVals.head(actual_n-1))).array() >= 0).all();
     if(!singular_values_sorted)
       std::cout << "Singular values are not sorted: " << singVals.segment(1,actual_n).transpose() << "\n";
-    assert(singular_values_sorted);
+    eigen_internal_assert(singular_values_sorted);
   }
 #endif
 
@@ -966,7 +966,7 @@
       RealScalar fZero = secularEq(muZero, col0, diag, perm, diagShifted, shift);
 
 #ifdef EIGEN_BDCSVD_SANITY_CHECKS
-      assert((numext::isfinite)(fZero));
+      eigen_internal_assert((numext::isfinite)(fZero));
 #endif
 
       muPrev = muCur;
@@ -1019,11 +1019,11 @@
 #ifdef EIGEN_BDCSVD_SANITY_CHECKS
       if(!(numext::isfinite)(fLeft))
         std::cout << "f(" << leftShifted << ") =" << fLeft << " ; " << left << " " << shift << " " << right << "\n";
-      assert((numext::isfinite)(fLeft));
+      eigen_internal_assert((numext::isfinite)(fLeft));
 
       if(!(numext::isfinite)(fRight))
         std::cout << "f(" << rightShifted << ") =" << fRight << " ; " << left << " " << shift << " " << right << "\n";
-      // assert((numext::isfinite)(fRight));
+      // eigen_internal_assert((numext::isfinite)(fRight));
 #endif
 
 #ifdef  EIGEN_BDCSVD_DEBUG_VERBOSE
@@ -1081,8 +1081,8 @@
       std::cout << "found " << singVals[k] << " == " << shift << " + " << muCur << " from " << diag(k) << " .. "  << diag(k+1) << "\n";
 #endif
 #ifdef EIGEN_BDCSVD_SANITY_CHECKS
-    assert(k==0 || singVals[k]>=singVals[k-1]);
-    assert(singVals[k]>=diag(k));
+    eigen_internal_assert(k==0 || singVals[k]>=singVals[k-1]);
+    eigen_internal_assert(singVals[k]>=diag(k));
 #endif
 
     // perturb singular value slightly if it equals diagonal entry to avoid division by zero later
@@ -1123,7 +1123,7 @@
         std::cout << "prod = " << "(" << singVals(lastIdx) << " + " << dk << ") * (" << mus(lastIdx) << " + (" << shifts(lastIdx) << " - " << dk << "))" << "\n";
         std::cout << "     = " << singVals(lastIdx) + dk << " * " << mus(lastIdx) + (shifts(lastIdx) - dk) <<  "\n";
       }
-      assert(prod>=0);
+      eigen_internal_assert(prod>=0);
 #endif
 
       for(Index l = 0; l<m; ++l)
@@ -1154,11 +1154,11 @@
           {
             std::cout << "k=" << k << ", i=" << i << ", l=" << l << ", perm.size()=" << perm.size() << "\n";
           }
-          assert(dk!=Literal(0) || diag(i)!=Literal(0));
+          eigen_internal_assert(dk!=Literal(0) || diag(i)!=Literal(0));
 #endif
           prod *= ((singVals(j)+dk) / ((diag(i)+dk))) * ((mus(j)+(shifts(j)-dk)) / ((diag(i)-dk)));
 #ifdef EIGEN_BDCSVD_SANITY_CHECKS
-          assert(prod>=0);
+          eigen_internal_assert(prod>=0);
 #endif
 #ifdef EIGEN_BDCSVD_DEBUG_VERBOSE
           if(i!=k && numext::abs(((singVals(j)+dk)*(mus(j)+(shifts(j)-dk)))/((diag(i)+dk)*(diag(i)-dk)) - 1) > 0.9 )
@@ -1172,7 +1172,7 @@
 #endif
       RealScalar tmp = sqrt(prod);
 #ifdef EIGEN_BDCSVD_SANITY_CHECKS
-      assert((numext::isfinite)(tmp));
+      eigen_internal_assert((numext::isfinite)(tmp));
 #endif
       zhat(k) = col0(k) > Literal(0) ? RealScalar(tmp) : RealScalar(-tmp);
     }
@@ -1309,9 +1309,9 @@
   RealScalar epsilon_coarse = Literal(8) * NumTraits<RealScalar>::epsilon() * numext::maxi<RealScalar>(col0.cwiseAbs().maxCoeff(), maxDiag);
 
 #ifdef EIGEN_BDCSVD_SANITY_CHECKS
-  assert(m_naiveU.allFinite());
-  assert(m_naiveV.allFinite());
-  assert(m_computed.allFinite());
+  eigen_internal_assert(m_naiveU.allFinite());
+  eigen_internal_assert(m_naiveV.allFinite());
+  eigen_internal_assert(m_computed.allFinite());
 #endif
 
 #ifdef  EIGEN_BDCSVD_DEBUG_VERBOSE
@@ -1348,9 +1348,9 @@
     }
 
 #ifdef EIGEN_BDCSVD_SANITY_CHECKS
-  assert(m_naiveU.allFinite());
-  assert(m_naiveV.allFinite());
-  assert(m_computed.allFinite());
+  eigen_internal_assert(m_naiveU.allFinite());
+  eigen_internal_assert(m_naiveV.allFinite());
+  eigen_internal_assert(m_computed.allFinite());
 #endif
 #ifdef EIGEN_BDCSVD_DEBUG_VERBOSE
   std::cout << "to be sorted: " << diag.transpose() << "\n\n";
@@ -1454,13 +1454,13 @@
 
 #ifdef EIGEN_BDCSVD_SANITY_CHECKS
   for(Index j=2;j<length;++j)
-    assert(diag(j-1)<=diag(j) || abs(diag(j))<considerZero);
+    eigen_internal_assert(diag(j-1)<=diag(j) || abs(diag(j))<considerZero);
 #endif
 
 #ifdef EIGEN_BDCSVD_SANITY_CHECKS
-  assert(m_naiveU.allFinite());
-  assert(m_naiveV.allFinite());
-  assert(m_computed.allFinite());
+  eigen_internal_assert(m_naiveU.allFinite());
+  eigen_internal_assert(m_naiveV.allFinite());
+  eigen_internal_assert(m_computed.allFinite());
 #endif
 }  // end deflation
 
diff --git a/Eigen/src/SparseCore/TriangularSolver.h b/Eigen/src/SparseCore/TriangularSolver.h
index 79c867c..a9fbeeb 100644
--- a/Eigen/src/SparseCore/TriangularSolver.h
+++ b/Eigen/src/SparseCore/TriangularSolver.h
@@ -272,11 +272,11 @@
       }
 
 
-      Index count = 0;
+//       Index count = 0;
       // FIXME compute a reference value to filter zeros
       for (typename AmbiVector<Scalar,StorageIndex>::Iterator it(tempVector/*,1e-12*/); it; ++it)
       {
-        ++ count;
+//         ++ count;
 //         std::cerr << "fill " << it.index() << ", " << col << "\n";
 //         std::cout << it.value() << "  ";
         // FIXME use insertBack
diff --git a/Eigen/src/SparseLU/SparseLU_heap_relax_snode.h b/Eigen/src/SparseLU/SparseLU_heap_relax_snode.h
index 7bed85d..2a8d80b 100644
--- a/Eigen/src/SparseLU/SparseLU_heap_relax_snode.h
+++ b/Eigen/src/SparseLU/SparseLU_heap_relax_snode.h
@@ -77,8 +77,6 @@
   // Identify the relaxed supernodes by postorder traversal of the etree
   Index snode_start; // beginning of a snode 
   StorageIndex k;
-  Index nsuper_et_post = 0; // Number of relaxed snodes in postordered etree 
-  Index nsuper_et = 0; // Number of relaxed snodes in the original etree 
   StorageIndex l; 
   for (j = 0; j < n; )
   {
@@ -90,7 +88,6 @@
       parent = et(j);
     }
     // Found a supernode in postordered etree, j is the last column 
-    ++nsuper_et_post;
     k = StorageIndex(n);
     for (Index i = snode_start; i <= j; ++i)
       k = (std::min)(k, inv_post(i));
@@ -99,7 +96,6 @@
     {
       // This is also a supernode in the original etree
       relax_end(k) = l; // Record last column 
-      ++nsuper_et; 
     }
     else 
     {
@@ -109,7 +105,6 @@
         if (descendants(i) == 0) 
         {
           relax_end(l) = l;
-          ++nsuper_et;
         }
       }
     }
diff --git a/Eigen/src/plugins/ArrayCwiseBinaryOps.h b/Eigen/src/plugins/ArrayCwiseBinaryOps.h
index 5f1e844..30e3ee1 100644
--- a/Eigen/src/plugins/ArrayCwiseBinaryOps.h
+++ b/Eigen/src/plugins/ArrayCwiseBinaryOps.h
@@ -134,6 +134,14 @@
   */
 EIGEN_MAKE_CWISE_BINARY_OP(pow,pow)
 
+/** \returns an expression of the coefficient-wise atan2(\c *this, \a y), where \a y is the given array argument.
+  *
+  * This function computes the coefficient-wise atan2.
+  *
+  */
+EIGEN_MAKE_CWISE_BINARY_OP(atan2,atan2)
+
+
 // TODO code generating macros could be moved to Macros.h and could include generation of documentation
 #define EIGEN_MAKE_CWISE_COMP_OP(OP, COMPARATOR) \
 template<typename OtherDerived> \
diff --git a/Eigen/src/plugins/ArrayCwiseUnaryOps.h b/Eigen/src/plugins/ArrayCwiseUnaryOps.h
index b2d9331..d8c1a84 100644
--- a/Eigen/src/plugins/ArrayCwiseUnaryOps.h
+++ b/Eigen/src/plugins/ArrayCwiseUnaryOps.h
@@ -24,11 +24,9 @@
 typedef CwiseUnaryOp<internal::scalar_tanh_op<Scalar>, const Derived> TanhReturnType;
 typedef CwiseUnaryOp<internal::scalar_logistic_op<Scalar>, const Derived> LogisticReturnType;
 typedef CwiseUnaryOp<internal::scalar_sinh_op<Scalar>, const Derived> SinhReturnType;
-#if EIGEN_HAS_CXX11_MATH
 typedef CwiseUnaryOp<internal::scalar_atanh_op<Scalar>, const Derived> AtanhReturnType;
 typedef CwiseUnaryOp<internal::scalar_asinh_op<Scalar>, const Derived> AsinhReturnType;
 typedef CwiseUnaryOp<internal::scalar_acosh_op<Scalar>, const Derived> AcoshReturnType;
-#endif
 typedef CwiseUnaryOp<internal::scalar_cosh_op<Scalar>, const Derived> CoshReturnType;
 typedef CwiseUnaryOp<internal::scalar_square_op<Scalar>, const Derived> SquareReturnType;
 typedef CwiseUnaryOp<internal::scalar_cube_op<Scalar>, const Derived> CubeReturnType;
@@ -355,7 +353,6 @@
   return CoshReturnType(derived());
 }
 
-#if EIGEN_HAS_CXX11_MATH
 /** \returns an expression of the coefficient-wise inverse hyperbolic tan of *this.
   *
   * \sa <a href="group__CoeffwiseMathFunctions.html#cwisetable_atanh">Math functions</a>, atanh(), asinh(), acosh()
@@ -388,7 +385,6 @@
 {
   return AcoshReturnType(derived());
 }
-#endif
 
 /** \returns an expression of the coefficient-wise logistic of *this.
   */
diff --git a/doc/Doxyfile.in b/doc/Doxyfile.in
index 7b1d770..d0e96fa 100644
--- a/doc/Doxyfile.in
+++ b/doc/Doxyfile.in
@@ -1600,7 +1600,6 @@
                          EIGEN_QT_SUPPORT \
                          EIGEN_STRONG_INLINE=inline \
                          EIGEN_DEVICE_FUNC= \
-                         EIGEN_HAS_CXX11_MATH=1 \
                          "EIGEN_MAKE_CWISE_BINARY_OP(METHOD,FUNCTOR)=template<typename OtherDerived> const CwiseBinaryOp<FUNCTOR<Scalar>, const Derived, const OtherDerived> METHOD(const EIGEN_CURRENT_STORAGE_BASE_CLASS<OtherDerived> &other) const;" \
                          "EIGEN_CWISE_PRODUCT_RETURN_TYPE(LHS,RHS)=CwiseBinaryOp<internal::scalar_product_op<LHS::Scalar,RHS::Scalar>, const LHS, const RHS>"\
                          "EIGEN_CAT2(a,b)= a ## b"\
diff --git a/doc/PreprocessorDirectives.dox b/doc/PreprocessorDirectives.dox
index a0e77b5..d6024dc 100644
--- a/doc/PreprocessorDirectives.dox
+++ b/doc/PreprocessorDirectives.dox
@@ -63,7 +63,6 @@
 functions by defining EIGEN_HAS_C99_MATH=1.
 
  - \b EIGEN_HAS_C99_MATH - controls the usage of C99 math functions such as erf, erfc, lgamma, etc.
- - \b EIGEN_HAS_CXX11_MATH - controls the implementation of some functions such as round, logp1, isinf, isnan, etc.
  - \b EIGEN_HAS_STD_RESULT_OF - defines whether std::result_of is supported
  - \b EIGEN_NO_IO - Disables any usage and support for `<iostreams>`.
 
diff --git a/doc/snippets/Cwise_array_atan2_array.cpp b/doc/snippets/Cwise_array_atan2_array.cpp
new file mode 100644
index 0000000..ace075a
--- /dev/null
+++ b/doc/snippets/Cwise_array_atan2_array.cpp
@@ -0,0 +1,4 @@
+Array<double,1,3> x(8,-25,3),
+                  y(1./3.,0.5,-2.);
+cout << "atan2([" << x << "], [" << y << "]) = " << x.atan2(y) << endl; // using ArrayBase::pow
+cout << "atan2([" << x << "], [" << y << "] = " << atan2(x,y) << endl; // using Eigen::pow
diff --git a/test/array_cwise.cpp b/test/array_cwise.cpp
index 319eba3..a7e0ff4 100644
--- a/test/array_cwise.cpp
+++ b/test/array_cwise.cpp
@@ -7,12 +7,11 @@
 // Public License v. 2.0. If a copy of the MPL was not distributed
 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
 
+#include <vector>
 #include "main.h"
 
-
-// Test the corner cases of pow(x, y) for real types.
-template<typename Scalar>
-void pow_test() {
+template <typename Scalar>
+std::vector<Scalar> special_values() {
   const Scalar zero = Scalar(0);
   const Scalar eps = Eigen::NumTraits<Scalar>::epsilon();
   const Scalar one = Scalar(1);
@@ -27,25 +26,19 @@
   const Scalar max = (std::numeric_limits<Scalar>::max)();
   const Scalar max_exp = (static_cast<Scalar>(int(Eigen::NumTraits<Scalar>::max_exponent())) * Scalar(EIGEN_LN2)) / eps;
 
-  const static Scalar abs_vals[] = {zero,
-                                    denorm_min,
-                                    min,
-                                    eps,
-                                    sqrt_half,
-                                    one,
-                                    sqrt2,
-                                    two,
-                                    three,
-                                    max_exp,
-                                    max,
-                                    inf,
-                                    nan};
-  const int abs_cases = 13;
+  return {zero, denorm_min, min, eps, sqrt_half, one, sqrt2, two, three, max_exp, max, inf, nan};
+}
+
+template<typename Scalar>
+void special_value_pairs(Array<Scalar, Dynamic, Dynamic>& x,
+                         Array<Scalar, Dynamic, Dynamic>& y) {
+  std::vector<Scalar> abs_vals = special_values<Scalar>();
+  const int abs_cases = abs_vals.size();
   const int num_cases = 2*abs_cases * 2*abs_cases;
-  // Repeat the same value to make sure we hit the vectorized path.
-  const int num_repeats = 32;
-  Array<Scalar, Dynamic, Dynamic> x(num_repeats, num_cases);
-  Array<Scalar, Dynamic, Dynamic> y(num_repeats, num_cases);
+  // ensure both vectorized and non-vectorized paths taken
+  const int num_repeats = 2 * internal::packet_traits<Scalar>::size + 1;
+  x.resize(num_repeats, num_cases);
+  y.resize(num_repeats, num_cases);
   int count = 0;
   for (int i = 0; i < abs_cases; ++i) {
     const Scalar abs_x = abs_vals[i];
@@ -64,65 +57,85 @@
       }
     }
   }
+}
 
-  Array<Scalar, Dynamic, Dynamic> actual = x.pow(y);
+template <typename Scalar, typename Fn, typename RefFn>
+void binary_op_test(std::string name, Fn fun, RefFn ref) {
   const Scalar tol = test_precision<Scalar>();
+  Array<Scalar, Dynamic, Dynamic> x;
+  Array<Scalar, Dynamic, Dynamic> y;
+  special_value_pairs(x, y);
+
+  Array<Scalar, Dynamic, Dynamic> actual = fun(x, y);
   bool all_pass = true;
-  for (int i = 0; i < 1; ++i) {
-    for (int j = 0; j < num_cases; ++j) {
-      Scalar e = static_cast<Scalar>(std::pow(x(i,j), y(i,j)));
+  for (int i = 0; i < x.rows(); ++i) {
+    for (int j = 0; j < x.cols(); ++j) {
+      Scalar e = static_cast<Scalar>(ref(x(i,j), y(i,j)));
       Scalar a = actual(i, j);
       bool success = (a==e) || ((numext::isfinite)(e) && internal::isApprox(a, e, tol)) || ((numext::isnan)(a) && (numext::isnan)(e));
       all_pass &= success;
       if (!success) {
-        std::cout << "pow(" << x(i,j) << "," << y(i,j) << ")   =   " << a << " !=  " << e << std::endl;
+        std::cout << name << "(" << x(i,j) << "," << y(i,j) << ") = " << a << " !=  " << e << std::endl;
       }
     }
   }
+  VERIFY(all_pass);
+}
 
-  typedef typename internal::make_integer<Scalar>::type Int_t;
+template <typename Scalar>
+void binary_ops_test() {
+  binary_op_test<Scalar>("pow",
+                         [](auto x, auto y) { return Eigen::pow(x, y); },
+                         [](auto x, auto y) { return std::pow(x, y); });
+  binary_op_test<Scalar>("atan2",
+                         [](auto x, auto y) { return Eigen::atan2(x, y); },
+                         [](auto x, auto y) { return std::atan2(x, y); });
+}
 
-  // ensure both vectorized and non-vectorized paths taken
-  Index test_size = 2 * internal::packet_traits<Scalar>::size + 1;
-  
-  Array<Scalar, Dynamic, 1> eigenPow(test_size);
-  for (int i = 0; i < num_cases; ++i) {
-    Array<Scalar, Dynamic, 1> bases = x.col(i);
-    for (Scalar abs_exponent : abs_vals){
-      for (Scalar exponent : {-abs_exponent, abs_exponent}){
-        // test floating point exponent code path
-        eigenPow.setZero();
-        eigenPow = bases.pow(exponent);
-        for (int j = 0; j < num_repeats; j++){
+template <typename Scalar>
+void pow_scalar_exponent_test() {
+  using Int_t = typename internal::make_integer<Scalar>::type;
+  const Scalar tol = test_precision<Scalar>();
+
+  std::vector<Scalar> abs_vals = special_values<Scalar>();
+  const int num_vals = abs_vals.size();
+  Map<Array<Scalar, Dynamic, 1>> bases(abs_vals.data(), num_vals);
+
+  bool all_pass = true;
+  for (Scalar abs_exponent : abs_vals) {
+    for (Scalar exponent : {-abs_exponent, abs_exponent}) {
+      // test integer exponent code path
+      bool exponent_is_integer = (numext::isfinite)(exponent) && (numext::round(exponent) == exponent) &&
+                                 (numext::abs(exponent) < static_cast<Scalar>(NumTraits<Int_t>::highest()));
+      if (exponent_is_integer) {
+        Int_t exponent_as_int = static_cast<Int_t>(exponent);
+        Array<Scalar, Dynamic, 1> eigenPow = bases.pow(exponent_as_int);
+        for (int j = 0; j < num_vals; j++) {
           Scalar e = static_cast<Scalar>(std::pow(bases(j), exponent));
           Scalar a = eigenPow(j);
-          bool success = (a == e) || ((numext::isfinite)(e) && internal::isApprox(a, e, tol)) || ((numext::isnan)(a) && (numext::isnan)(e));
+          bool success = (a == e) || ((numext::isfinite)(e) && internal::isApprox(a, e, tol)) ||
+                         ((numext::isnan)(a) && (numext::isnan)(e));
           all_pass &= success;
           if (!success) {
-            std::cout << "pow(" << x(i, j) << "," << y(i, j) << ")   =   " << a << " !=  " << e << std::endl;
+            std::cout << "pow(" << bases(j) << "," << exponent << ") = " << a << " !=  " << e << std::endl;
           }
         }
-        // test integer exponent code path
-        bool exponent_is_integer = (numext::isfinite)(exponent) && (numext::round(exponent) == exponent) && (numext::abs(exponent) < static_cast<Scalar>(NumTraits<Int_t>::highest()));
-        if (exponent_is_integer)
-        {
-          Int_t exponent_as_int = static_cast<Int_t>(exponent);
-          eigenPow.setZero();
-          eigenPow = bases.pow(exponent_as_int);
-          for (int j = 0; j < num_repeats; j++){
-            Scalar e = static_cast<Scalar>(std::pow(bases(j), exponent));
-            Scalar a = eigenPow(j);
-            bool success = (a == e) || ((numext::isfinite)(e) && internal::isApprox(a, e, tol)) || ((numext::isnan)(a) && (numext::isnan)(e));
-            all_pass &= success;
-            if (!success) {
-              std::cout << "pow(" << x(i, j) << "," << y(i, j) << ")   =   " << a << " !=  " << e << std::endl;
-            }
+      } else {
+        // test floating point exponent code path
+        Array<Scalar, Dynamic, 1> eigenPow = bases.pow(exponent);
+        for (int j = 0; j < num_vals; j++) {
+          Scalar e = static_cast<Scalar>(std::pow(bases(j), exponent));
+          Scalar a = eigenPow(j);
+          bool success = (a == e) || ((numext::isfinite)(e) && internal::isApprox(a, e, tol)) ||
+                         ((numext::isnan)(a) && (numext::isnan)(e));
+          all_pass &= success;
+          if (!success) {
+            std::cout << "pow(" << bases(j) << "," << exponent << ")   =   " << a << " !=  " << e << std::endl;
           }
         }
       }
     }
   }
-
   VERIFY(all_pass);
 }
 
@@ -531,11 +544,11 @@
   VERIFY_IS_APPROX(m1.sinh(), sinh(m1));
   VERIFY_IS_APPROX(m1.cosh(), cosh(m1));
   VERIFY_IS_APPROX(m1.tanh(), tanh(m1));
-#if EIGEN_HAS_CXX11_MATH
+  VERIFY_IS_APPROX(m1.atan2(m2), atan2(m1,m2));
+
   VERIFY_IS_APPROX(m1.tanh().atanh(), atanh(tanh(m1)));
   VERIFY_IS_APPROX(m1.sinh().asinh(), asinh(sinh(m1)));
   VERIFY_IS_APPROX(m1.cosh().acosh(), acosh(cosh(m1)));
-#endif
   VERIFY_IS_APPROX(m1.logistic(), logistic(m1));
 
   VERIFY_IS_APPROX(m1.arg(), arg(m1));
@@ -592,6 +605,13 @@
   VERIFY_IS_APPROX( m1.sign(), -(-m1).sign() );
   VERIFY_IS_APPROX( m1*m1.sign(),m1.abs());
   VERIFY_IS_APPROX(m1.sign() * m1.abs(), m1);
+  
+  ArrayType tmp = m1.atan2(m2);
+  for (Index i = 0; i < tmp.size(); ++i) {
+    Scalar actual = tmp.array()(i);
+    Scalar expected = atan2(m1.array()(i), m2.array()(i));
+    VERIFY_IS_APPROX(actual, expected);
+  }
 
   VERIFY_IS_APPROX(numext::abs2(numext::real(m1)) + numext::abs2(numext::imag(m1)), numext::abs2(m1));
   VERIFY_IS_APPROX(numext::abs2(Eigen::real(m1)) + numext::abs2(Eigen::imag(m1)), numext::abs2(m1));
@@ -619,7 +639,10 @@
   // Avoid inf and NaN.
   m3 = (m1.square()<NumTraits<Scalar>::epsilon()).select(Scalar(1),m3);
   VERIFY_IS_APPROX(m3.pow(RealScalar(-2)), m3.square().inverse());
-  pow_test<Scalar>();
+
+  // Test pow and atan2 on special IEEE values.
+  binary_ops_test<Scalar>();
+  pow_scalar_exponent_test<Scalar>();
 
   VERIFY_IS_APPROX(log10(m3), log(m3)/numext::log(Scalar(10)));
   VERIFY_IS_APPROX(log2(m3), log(m3)/numext::log(Scalar(2)));
@@ -684,7 +707,6 @@
   VERIFY_IS_APPROX(cos(m1+RealScalar(3)*m2), cos((m1+RealScalar(3)*m2).eval()));
   VERIFY_IS_APPROX(m1.sign(), sign(m1));
 
-
   VERIFY_IS_APPROX(m1.exp() * m2.exp(), exp(m1+m2));
   VERIFY_IS_APPROX(m1.exp(), exp(m1));
   VERIFY_IS_APPROX(m1.exp() / m2.exp(),(m1-m2).exp());
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorChipping.h b/unsupported/Eigen/CXX11/src/Tensor/TensorChipping.h
index c3b28ec..ef01e30 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorChipping.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorChipping.h
@@ -483,7 +483,7 @@
   template <typename TensorBlock>
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void writeBlock(
       const TensorBlockDesc& desc, const TensorBlock& block) {
-    assert(this->m_impl.data() != NULL);
+    eigen_assert(this->m_impl.data() != NULL);
 
     const Index chip_dim = this->m_dim.actualDim();
 
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorContractionThreadPool.h b/unsupported/Eigen/CXX11/src/Tensor/TensorContractionThreadPool.h
index 1f818ec..19f664c 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorContractionThreadPool.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorContractionThreadPool.h
@@ -874,7 +874,7 @@
                         lhs_.getSubMapper(m1 * bm_, k * bk_), bk(k), bm(m1));
 
       if (!parallel_pack_ && shard_by_col_) {
-        assert(!use_thread_local);
+        eigen_assert(!use_thread_local);
         signal_packing(k);
       } else {
         signal_switch(k + 1);
@@ -927,7 +927,7 @@
           signal_kernel(m, n, k, sync, use_thread_local);
         }
       } else {
-        assert(!use_thread_local);
+        eigen_assert(!use_thread_local);
         signal_packing(k);
       }
     }
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h b/unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h
index e54c558..92ad0f5 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h
@@ -159,14 +159,14 @@
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlock
   block(TensorBlockDesc& desc, TensorBlockScratch& scratch,
           bool /*root_of_expr_ast*/ = false) const {
-    assert(m_data != NULL);
+    eigen_assert(m_data != NULL);
     return TensorBlock::materialize(m_data, m_dims, desc, scratch);
   }
 
   template<typename TensorBlock>
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void writeBlock(
       const TensorBlockDesc& desc, const TensorBlock& block) {
-    assert(m_data != NULL);
+    eigen_assert(m_data != NULL);
 
     typedef typename TensorBlock::XprType TensorBlockExpr;
     typedef internal::TensorBlockAssignment<Scalar, NumCoords, TensorBlockExpr,
@@ -331,7 +331,7 @@
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlock
   block(TensorBlockDesc& desc, TensorBlockScratch& scratch,
           bool /*root_of_expr_ast*/ = false) const {
-    assert(m_data != NULL);
+    eigen_assert(m_data != NULL);
     return TensorBlock::materialize(m_data, m_dims, desc, scratch);
   }
 
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorForcedEval.h b/unsupported/Eigen/CXX11/src/Tensor/TensorForcedEval.h
index a176374..4550fca 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorForcedEval.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorForcedEval.h
@@ -208,7 +208,7 @@
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlock
   block(TensorBlockDesc& desc, TensorBlockScratch& scratch,
           bool /*root_of_expr_ast*/ = false) const {
-    assert(m_buffer != NULL);
+    eigen_assert(m_buffer != NULL);
     return TensorBlock::materialize(m_buffer, m_impl.dimensions(), desc, scratch);
   }
 
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorGpuHipCudaDefines.h b/unsupported/Eigen/CXX11/src/Tensor/TensorGpuHipCudaDefines.h
index 82ca999..ae79a1d 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorGpuHipCudaDefines.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorGpuHipCudaDefines.h
@@ -93,7 +93,7 @@
 // HIPCC do not support the use of assert on the GPU side.
 #define gpu_assert(COND)
 #else
-#define gpu_assert(COND) assert(COND)
+#define gpu_assert(COND) eigen_assert(COND)
 #endif
 
 #endif // gpu_assert
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorIO.h b/unsupported/Eigen/CXX11/src/Tensor/TensorIO.h
index 2511d5a..efda741 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorIO.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorIO.h
@@ -202,7 +202,7 @@
       }
     }
 
-    assert(layout == RowMajor);
+    eigen_assert(layout == RowMajor);
     typedef std::conditional_t<is_same<Scalar, char>::value || is_same<Scalar, unsigned char>::value ||
                              is_same<Scalar, numext::int8_t>::value || is_same<Scalar, numext::uint8_t>::value,
                           int,
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorMorphing.h b/unsupported/Eigen/CXX11/src/Tensor/TensorMorphing.h
index ea622a5..0205710 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorMorphing.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorMorphing.h
@@ -281,7 +281,7 @@
   template <typename TensorBlock>
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void writeBlock(
       const TensorBlockDesc& desc, const TensorBlock& block) {
-    assert(this->m_impl.data() != NULL);
+    eigen_assert(this->m_impl.data() != NULL);
 
     typedef typename TensorBlock::XprType TensorBlockExpr;
     typedef internal::TensorBlockAssignment<
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorShuffling.h b/unsupported/Eigen/CXX11/src/Tensor/TensorShuffling.h
index e50a570..977263a 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorShuffling.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorShuffling.h
@@ -256,7 +256,7 @@
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlock
   block(TensorBlockDesc& desc, TensorBlockScratch& scratch,
           bool root_of_expr_ast = false) const {
-    assert(m_impl.data() != NULL);
+    eigen_assert(m_impl.data() != NULL);
 
     typedef internal::TensorBlockIO<ScalarNoConst, Index, NumDims, Layout>
         TensorBlockIO;
diff --git a/unsupported/Eigen/src/MatrixFunctions/MatrixLogarithm.h b/unsupported/Eigen/src/MatrixFunctions/MatrixLogarithm.h
index bb77885..2036fe4 100644
--- a/unsupported/Eigen/src/MatrixFunctions/MatrixLogarithm.h
+++ b/unsupported/Eigen/src/MatrixFunctions/MatrixLogarithm.h
@@ -136,7 +136,7 @@
   typedef typename NumTraits<typename MatrixType::Scalar>::Real RealScalar;
   const int minPadeDegree = 3;
   const int maxPadeDegree = 11;
-  assert(degree >= minPadeDegree && degree <= maxPadeDegree);
+  eigen_assert(degree >= minPadeDegree && degree <= maxPadeDegree);
   // FIXME this creates float-conversion-warnings if these are enabled.
   // Either manually convert each value, or disable the warning locally
   const RealScalar nodes[][maxPadeDegree] = { 
diff --git a/unsupported/Eigen/src/NonLinearOptimization/HybridNonLinearSolver.h b/unsupported/Eigen/src/NonLinearOptimization/HybridNonLinearSolver.h
index 7acd01a..4c38426 100644
--- a/unsupported/Eigen/src/NonLinearOptimization/HybridNonLinearSolver.h
+++ b/unsupported/Eigen/src/NonLinearOptimization/HybridNonLinearSolver.h
@@ -430,7 +430,7 @@
     using std::sqrt;
     using std::abs;
     
-    assert(x.size()==n); // check the caller is not cheating us
+    eigen_assert(x.size()==n); // check the caller is not cheating us
 
     Index j;
     std::vector<JacobiRotation<Scalar> > v_givens(n), w_givens(n);
diff --git a/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsImpl.h b/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsImpl.h
index 0addd09..d85729d 100644
--- a/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsImpl.h
+++ b/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsImpl.h
@@ -685,11 +685,11 @@
 template <typename Scalar>
 struct cephes_helper {
   EIGEN_DEVICE_FUNC
-  static EIGEN_STRONG_INLINE Scalar machep() { assert(false && "machep not supported for this type"); return 0.0; }
+  static EIGEN_STRONG_INLINE Scalar machep() { eigen_assert(false && "machep not supported for this type"); return 0.0; }
   EIGEN_DEVICE_FUNC
-  static EIGEN_STRONG_INLINE Scalar big() { assert(false && "big not supported for this type"); return 0.0; }
+  static EIGEN_STRONG_INLINE Scalar big() { eigen_assert(false && "big not supported for this type"); return 0.0; }
   EIGEN_DEVICE_FUNC
-  static EIGEN_STRONG_INLINE Scalar biginv() { assert(false && "biginv not supported for this type"); return 0.0; }
+  static EIGEN_STRONG_INLINE Scalar biginv() { eigen_assert(false && "biginv not supported for this type"); return 0.0; }
 };
 
 template <>