Update Eigen to commit:2a9055b50ed22101da7d77e999b90ed50956fe0b CHANGELOG ========= 2a9055b50 - Fix random for custom scalars that don't have constexpr digits(). 500a3602f - Use traits<Matrix>::Options instead of Matrix::Options. 0b9ca1159 - Fix deflation in BDCSVD. f40ad38fd - Fix failure on ARM with latest compilers. a24bf2e9a - Disable float16 packet casting if native AVX512 f16 is available. 5361dea83 - Remove return int types from BLAS/LAPACK functions. 7e655c9a5 - Fixes 2780 6ea33f95d - Eliminate warning about writing bytes directly to non-trivial type. 06b45905e - Remove r_cnjg due to conflicts with f2c. 9229cfa82 - Fix division by zero UB in packet size logic. 186f8205d - Apply clang-format to lapack/blas directories 4eac211e9 - Fix C++20 error, Arithmetic between different enumeration types d1d87973f - Fix segfault in CholmodBase::factorize() for zero matrix 7b87b2191 - Fix UB in bool packetmath test. 431e4a913 - Fix the fuzz 3ab8f4825 - fix tests when scalar is bfloat16, half 3ebaab8a6 - Fix PPC rand and other failures. ebd13c3b1 - fix skew symmetric test 128c8abf4 - Fix gcc-6 bug in the rand test. d626762e3 - improve random a9ddab3e0 - Fix a bunch of ODR violations. PiperOrigin-RevId: 609443252 Change-Id: Iddb4afa1b81a173975c2ef3771fc4b08683fdbdc

diff --git a/Eigen/src/CholmodSupport/CholmodSupport.h b/Eigen/src/CholmodSupport/CholmodSupport.h
index 447a393..e5b46c4 100644
--- a/Eigen/src/CholmodSupport/CholmodSupport.h
+++ b/Eigen/src/CholmodSupport/CholmodSupport.h

@@ -329,8 +329,10 @@
     cholmod_sparse A = viewAsCholmod(matrix.template selfadjointView<UpLo>());
     internal::cm_factorize_p<StorageIndex>(&A, m_shiftOffset, 0, 0, m_cholmodFactor, m_cholmod);
 
-    // If the factorization failed, minor is the column at which it did. On success minor == n.
-    this->m_info = (m_cholmodFactor->minor == m_cholmodFactor->n ? Success : NumericalIssue);
+    // If the factorization failed, either the input matrix was zero (so m_cholmodFactor == nullptr), or minor is the
+    // column at which it failed. On success minor == n.
+    this->m_info =
+        (m_cholmodFactor != nullptr && m_cholmodFactor->minor == m_cholmodFactor->n ? Success : NumericalIssue);
     m_factorizationIsOk = true;
   }
 

diff --git a/Eigen/src/Core/GenericPacketMath.h b/Eigen/src/Core/GenericPacketMath.h
index 3a302c0..4b56f0f 100644
--- a/Eigen/src/Core/GenericPacketMath.h
+++ b/Eigen/src/Core/GenericPacketMath.h

@@ -376,6 +376,12 @@
   }
 };
 
+// For booleans, we can only directly set a valid `bool` value to avoid UB.
+template <>
+struct ptrue_impl<bool, void> {
+  static EIGEN_DEVICE_FUNC inline bool run(const bool& /*a*/) { return true; }
+};
+
 // For non-trivial scalars, set to Scalar(1) (i.e. a non-zero value).
 // Although this is technically not a valid bitmask, the scalar path for pselect
 // uses a comparison to zero, so this should still work in most cases. We don't
@@ -458,6 +464,32 @@
   EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR EIGEN_ALWAYS_INLINE T operator()(const T& a) const { return ~a; }
 };
 
+template <>
+struct bit_and<bool> {
+  EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR EIGEN_ALWAYS_INLINE bool operator()(const bool& a, const bool& b) const {
+    return a && b;
+  }
+};
+
+template <>
+struct bit_or<bool> {
+  EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR EIGEN_ALWAYS_INLINE bool operator()(const bool& a, const bool& b) const {
+    return a || b;
+  }
+};
+
+template <>
+struct bit_xor<bool> {
+  EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR EIGEN_ALWAYS_INLINE bool operator()(const bool& a, const bool& b) const {
+    return a != b;
+  }
+};
+
+template <>
+struct bit_not<bool> {
+  EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR EIGEN_ALWAYS_INLINE bool operator()(const bool& a) const { return !a; }
+};
+
 // Use operators &, |, ^, ~.
 template <typename T>
 struct operator_bitwise_helper {

diff --git a/Eigen/src/Core/MathFunctions.h b/Eigen/src/Core/MathFunctions.h
index 0be29bc..c92572f 100644
--- a/Eigen/src/Core/MathFunctions.h
+++ b/Eigen/src/Core/MathFunctions.h

@@ -563,34 +563,6 @@
   }
 };
 
-/****************************************************************************
- * Implementation of random                                               *
- ****************************************************************************/
-
-template <typename Scalar, bool IsComplex, bool IsInteger>
-struct random_default_impl {};
-
-template <typename Scalar>
-struct random_impl : random_default_impl<Scalar, NumTraits<Scalar>::IsComplex, NumTraits<Scalar>::IsInteger> {};
-
-template <typename Scalar>
-struct random_retval {
-  typedef Scalar type;
-};
-
-template <typename Scalar>
-inline EIGEN_MATHFUNC_RETVAL(random, Scalar) random(const Scalar& x, const Scalar& y);
-template <typename Scalar>
-inline EIGEN_MATHFUNC_RETVAL(random, Scalar) random();
-
-template <typename Scalar>
-struct random_default_impl<Scalar, false, false> {
-  static inline Scalar run(const Scalar& x, const Scalar& y) {
-    return x + (y - x) * Scalar(std::rand()) / Scalar(RAND_MAX);
-  }
-  static inline Scalar run() { return run(Scalar(NumTraits<Scalar>::IsSigned ? -1 : 0), Scalar(1)); }
-};
-
 enum { meta_floor_log2_terminate, meta_floor_log2_move_up, meta_floor_log2_move_down, meta_floor_log2_bogus };
 
 template <unsigned int n, int lower, int upper>
@@ -769,56 +741,174 @@
 
 #endif  // EIGEN_COMP_GNUC || EIGEN_COMP_CLANG
 
+template <typename BitsType>
+int log2_ceil(BitsType x) {
+  int n = CHAR_BIT * sizeof(BitsType) - clz(x);
+  bool powerOfTwo = (x & (x - 1)) == 0;
+  return x == 0 ? 0 : powerOfTwo ? n - 1 : n;
+}
+
+template <typename BitsType>
+int log2_floor(BitsType x) {
+  int n = CHAR_BIT * sizeof(BitsType) - clz(x);
+  return x == 0 ? 0 : n - 1;
+}
+
+/****************************************************************************
+ * Implementation of random                                               *
+ ****************************************************************************/
+
+// return a Scalar filled with numRandomBits beginning from the least significant bit
+template <typename Scalar>
+Scalar getRandomBits(int numRandomBits) {
+  using BitsType = typename numext::get_integer_by_size<sizeof(Scalar)>::unsigned_type;
+  enum : int {
+    StdRandBits = meta_floor_log2<(unsigned int)(RAND_MAX) + 1>::value,
+    ScalarBits = sizeof(Scalar) * CHAR_BIT
+  };
+  eigen_assert((numRandomBits >= 0) && (numRandomBits <= ScalarBits));
+  const BitsType mask = BitsType(-1) >> ((ScalarBits - numRandomBits) & (ScalarBits - 1));
+  BitsType randomBits = BitsType(0);
+  for (int shift = 0; shift < numRandomBits; shift += StdRandBits) {
+    int r = std::rand();
+    randomBits |= static_cast<BitsType>(r) << shift;
+  }
+  // clear the excess bits
+  randomBits &= mask;
+  return numext::bit_cast<Scalar, BitsType>(randomBits);
+}
+
+template <typename Scalar, bool IsComplex, bool IsInteger>
+struct random_default_impl {};
+
+template <typename Scalar>
+struct random_impl : random_default_impl<Scalar, NumTraits<Scalar>::IsComplex, NumTraits<Scalar>::IsInteger> {};
+
+template <typename Scalar>
+struct random_retval {
+  typedef Scalar type;
+};
+
+template <typename Scalar>
+inline EIGEN_MATHFUNC_RETVAL(random, Scalar) random(const Scalar& x, const Scalar& y);
+template <typename Scalar>
+inline EIGEN_MATHFUNC_RETVAL(random, Scalar) random();
+
+template <typename Scalar>
+struct random_default_impl<Scalar, false, false> {
+  using BitsType = typename numext::get_integer_by_size<sizeof(Scalar)>::unsigned_type;
+  static EIGEN_DEVICE_FUNC inline Scalar run(const Scalar& x, const Scalar& y, int numRandomBits) {
+    Scalar half_x = Scalar(0.5) * x;
+    Scalar half_y = Scalar(0.5) * y;
+    Scalar result = (half_x + half_y) + (half_y - half_x) * run(numRandomBits);
+    // result is in the half-open interval [x, y) -- provided that x < y
+    return result;
+  }
+  static EIGEN_DEVICE_FUNC inline Scalar run(const Scalar& x, const Scalar& y) {
+    const int mantissa_bits = NumTraits<Scalar>::digits() - 1;
+    return run(x, y, mantissa_bits);
+  }
+  static EIGEN_DEVICE_FUNC inline Scalar run(int numRandomBits) {
+    const int mantissa_bits = NumTraits<Scalar>::digits() - 1;
+    eigen_assert(numRandomBits >= 0 && numRandomBits <= mantissa_bits);
+    BitsType randomBits = getRandomBits<BitsType>(numRandomBits);
+    // if fewer than MantissaBits is requested, shift them to the left
+    randomBits <<= (mantissa_bits - numRandomBits);
+    // randomBits is in the half-open interval [2,4)
+    randomBits |= numext::bit_cast<BitsType>(Scalar(2));
+    // result is in the half-open interval [-1,1)
+    Scalar result = numext::bit_cast<Scalar>(randomBits) - Scalar(3);
+    return result;
+  }
+  static EIGEN_DEVICE_FUNC inline Scalar run() {
+    const int mantissa_bits = NumTraits<Scalar>::digits() - 1;
+    return run(mantissa_bits);
+  }
+};
+
+// TODO: fix this for PPC
+template <bool Specialize = sizeof(long double) == 2 * sizeof(uint64_t) && !EIGEN_ARCH_PPC>
+struct random_longdouble_impl {
+  enum : int {
+    Size = sizeof(long double),
+    MantissaBits = NumTraits<long double>::digits() - 1,
+    LowBits = MantissaBits > 64 ? 64 : MantissaBits,
+    HighBits = MantissaBits > 64 ? MantissaBits - 64 : 0
+  };
+  static EIGEN_DEVICE_FUNC inline long double run() {
+    EIGEN_USING_STD(memcpy)
+    uint64_t randomBits[2];
+    long double result = 2.0L;
+    memcpy(&randomBits, &result, Size);
+    randomBits[0] |= getRandomBits<uint64_t>(LowBits);
+    randomBits[1] |= getRandomBits<uint64_t>(HighBits);
+    memcpy(&result, &randomBits, Size);
+    result -= 3.0L;
+    return result;
+  }
+};
+template <>
+struct random_longdouble_impl<false> {
+  using Impl = random_impl<double>;
+  static EIGEN_DEVICE_FUNC inline long double run() { return static_cast<long double>(Impl::run()); }
+};
+
+template <>
+struct random_impl<long double> {
+  static EIGEN_DEVICE_FUNC inline long double run(const long double& x, const long double& y) {
+    long double half_x = 0.5L * x;
+    long double half_y = 0.5L * y;
+    long double result = (half_x + half_y) + (half_y - half_x) * run();
+    return result;
+  }
+  static EIGEN_DEVICE_FUNC inline long double run() { return random_longdouble_impl<>::run(); }
+};
+
 template <typename Scalar>
 struct random_default_impl<Scalar, false, true> {
-  static inline Scalar run(const Scalar& x, const Scalar& y) {
+  using BitsType = typename numext::get_integer_by_size<sizeof(Scalar)>::unsigned_type;
+  enum : int { ScalarBits = sizeof(Scalar) * CHAR_BIT };
+  static EIGEN_DEVICE_FUNC inline Scalar run(const Scalar& x, const Scalar& y) {
     if (y <= x) return x;
-    // ScalarU is the unsigned counterpart of Scalar, possibly Scalar itself.
-    typedef typename make_unsigned<Scalar>::type ScalarU;
-    // ScalarX is the widest of ScalarU and unsigned int.
-    // We'll deal only with ScalarX and unsigned int below thus avoiding signed
-    // types and arithmetic and signed overflows (which are undefined behavior).
-    typedef std::conditional_t<(ScalarU(-1) > unsigned(-1)), ScalarU, unsigned> ScalarX;
-    // The following difference doesn't overflow, provided our integer types are two's
-    // complement and have the same number of padding bits in signed and unsigned variants.
-    // This is the case in most modern implementations of C++.
-    ScalarX range = ScalarX(y) - ScalarX(x);
-    ScalarX offset = 0;
-    ScalarX divisor = 1;
-    ScalarX multiplier = 1;
-    const unsigned rand_max = RAND_MAX;
-    if (range <= rand_max)
-      divisor = (rand_max + 1) / (range + 1);
-    else
-      multiplier = 1 + range / (rand_max + 1);
-    // Rejection sampling.
+    const BitsType range = static_cast<BitsType>(y) - static_cast<BitsType>(x) + 1;
+    // handle edge case where [x,y] spans the entire range of Scalar
+    if (range == 0) return getRandomBits<Scalar>(ScalarBits);
+    // calculate the number of random bits needed to fill range
+    const int numRandomBits = log2_ceil(range);
+    BitsType randomBits;
     do {
-      offset = (unsigned(std::rand()) * multiplier) / divisor;
-    } while (offset > range);
-    return Scalar(ScalarX(x) + offset);
+      randomBits = getRandomBits<BitsType>(numRandomBits);
+      // if the random draw is outside [0, range), try again (rejection sampling)
+      // in the worst-case scenario, the probability of rejection is: 1/2 - 1/2^numRandomBits < 50%
+    } while (randomBits >= range);
+    Scalar result = x + static_cast<Scalar>(randomBits);
+    return result;
   }
 
-  static inline Scalar run() {
+  static EIGEN_DEVICE_FUNC inline Scalar run() {
 #ifdef EIGEN_MAKING_DOCS
     return run(Scalar(NumTraits<Scalar>::IsSigned ? -10 : 0), Scalar(10));
 #else
-    enum {
-      rand_bits = meta_floor_log2<(unsigned int)(RAND_MAX) + 1>::value,
-      scalar_bits = sizeof(Scalar) * CHAR_BIT,
-      shift = plain_enum_max(0, int(rand_bits) - int(scalar_bits)),
-      offset = NumTraits<Scalar>::IsSigned ? (1 << (plain_enum_min(rand_bits, scalar_bits) - 1)) : 0
-    };
-    return Scalar((std::rand() >> shift) - offset);
+    return getRandomBits<Scalar>(ScalarBits);
 #endif
   }
 };
 
+template <>
+struct random_impl<bool> {
+  static EIGEN_DEVICE_FUNC inline bool run(const bool& x, const bool& y) {
+    if (y <= x) return x;
+    return run();
+  }
+  static EIGEN_DEVICE_FUNC inline bool run() { return getRandomBits<int>(1) ? true : false; }
+};
+
 template <typename Scalar>
 struct random_default_impl<Scalar, true, false> {
-  static inline Scalar run(const Scalar& x, const Scalar& y) {
+  static EIGEN_DEVICE_FUNC inline Scalar run(const Scalar& x, const Scalar& y) {
     return Scalar(random(x.real(), y.real()), random(x.imag(), y.imag()));
   }
-  static inline Scalar run() {
+  static EIGEN_DEVICE_FUNC inline Scalar run() {
     typedef typename NumTraits<Scalar>::Real RealScalar;
     return Scalar(random<RealScalar>(), random<RealScalar>());
   }
@@ -1864,13 +1954,6 @@
 ******************************************/
 
 template <>
-struct random_impl<bool> {
-  static inline bool run() { return random<int>(0, 1) == 0 ? false : true; }
-
-  static inline bool run(const bool& a, const bool& b) { return random<int>(a, b) == 0 ? false : true; }
-};
-
-template <>
 struct scalar_fuzzy_impl<bool> {
   typedef bool RealScalar;
 

diff --git a/Eigen/src/Core/NumTraits.h b/Eigen/src/Core/NumTraits.h
index 80f74e9..2848b78 100644
--- a/Eigen/src/Core/NumTraits.h
+++ b/Eigen/src/Core/NumTraits.h

@@ -206,9 +206,7 @@
 
   EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR static inline T highest() { return (numext::numeric_limits<T>::max)(); }
 
-  EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR static inline T lowest() {
-    return IsInteger ? (numext::numeric_limits<T>::min)() : static_cast<T>(-(numext::numeric_limits<T>::max)());
-  }
+  EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR static inline T lowest() { return (numext::numeric_limits<T>::lowest)(); }
 
   EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR static inline T infinity() { return numext::numeric_limits<T>::infinity(); }
 

diff --git a/Eigen/src/Core/arch/AVX512/PacketMathFP16.h b/Eigen/src/Core/arch/AVX512/PacketMathFP16.h
index fc11174..131e6f1 100644
--- a/Eigen/src/Core/arch/AVX512/PacketMathFP16.h
+++ b/Eigen/src/Core/arch/AVX512/PacketMathFP16.h

@@ -1,870 +1,870 @@
-// This file is part of Eigen, a lightweight C++ template library
-// for linear algebra.
-//
-//
-//
-// This Source Code Form is subject to the terms of the Mozilla
-// Public License v. 2.0. If a copy of the MPL was not distributed
-// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
-
-#ifndef EIGEN_PACKET_MATH_FP16_AVX512_H
-#define EIGEN_PACKET_MATH_FP16_AVX512_H
-
-// IWYU pragma: private
-#include "../../InternalHeaderCheck.h"
-
-namespace Eigen {
-
-namespace internal {
-
-typedef __m512h Packet32h;
-typedef eigen_packet_wrapper<__m256i, 1> Packet16h;
-typedef eigen_packet_wrapper<__m128i, 2> Packet8h;
-
-template <>
-struct is_arithmetic<Packet8h> {
-  enum { value = true };
-};
-
-template <>
-struct packet_traits<half> : default_packet_traits {
-  typedef Packet32h type;
-  typedef Packet16h half;
-  enum {
-    Vectorizable = 1,
-    AlignedOnScalar = 1,
-    size = 32,
-
-    HasCmp = 1,
-    HasAdd = 1,
-    HasSub = 1,
-    HasMul = 1,
-    HasDiv = 1,
-    HasNegate = 1,
-    HasAbs = 1,
-    HasAbs2 = 0,
-    HasMin = 1,
-    HasMax = 1,
-    HasConj = 1,
-    HasSetLinear = 0,
-    HasLog = 1,
-    HasLog1p = 1,
-    HasExp = 1,
-    HasExpm1 = 1,
-    HasSqrt = 1,
-    HasRsqrt = 1,
-    // These ones should be implemented in future
-    HasBessel = 0,
-    HasNdtri = 0,
-    HasSin = EIGEN_FAST_MATH,
-    HasCos = EIGEN_FAST_MATH,
-    HasTanh = EIGEN_FAST_MATH,
-    HasErf = 0,  // EIGEN_FAST_MATH,
-    HasBlend = 0,
-    HasRound = 1,
-    HasFloor = 1,
-    HasCeil = 1,
-    HasRint = 1
-  };
-};
-
-template <>
-struct unpacket_traits<Packet32h> {
-  typedef Eigen::half type;
-  typedef Packet16h half;
-  enum {
-    size = 32,
-    alignment = Aligned64,
-    vectorizable = true,
-    masked_load_available = false,
-    masked_store_available = false
-  };
-};
-
-template <>
-struct unpacket_traits<Packet16h> {
-  typedef Eigen::half type;
-  typedef Packet8h half;
-  enum {
-    size = 16,
-    alignment = Aligned32,
-    vectorizable = true,
-    masked_load_available = false,
-    masked_store_available = false
-  };
-};
-
-template <>
-struct unpacket_traits<Packet8h> {
-  typedef Eigen::half type;
-  typedef Packet8h half;
-  enum {
-    size = 8,
-    alignment = Aligned16,
-    vectorizable = true,
-    masked_load_available = false,
-    masked_store_available = false
-  };
-};
-
-// Memory functions
-
-// pset1
-
-template <>
-EIGEN_STRONG_INLINE Packet32h pset1<Packet32h>(const Eigen::half& from) {
-  return _mm512_set1_ph(static_cast<_Float16>(from));
-}
-
-// pset1frombits
-template <>
-EIGEN_STRONG_INLINE Packet32h pset1frombits<Packet32h>(unsigned short from) {
-  return _mm512_castsi512_ph(_mm512_set1_epi16(from));
-}
-
-// pfirst
-
-template <>
-EIGEN_STRONG_INLINE Eigen::half pfirst<Packet32h>(const Packet32h& from) {
-#ifdef EIGEN_VECTORIZE_AVX512DQ
-  return half_impl::raw_uint16_to_half(
-      static_cast<unsigned short>(_mm256_extract_epi16(_mm512_extracti32x8_epi32(_mm512_castph_si512(from), 0), 0)));
-#else
-  Eigen::half dest[32];
-  _mm512_storeu_ph(dest, from);
-  return dest[0];
-#endif
-}
-
-// pload
-
-template <>
-EIGEN_STRONG_INLINE Packet32h pload<Packet32h>(const Eigen::half* from) {
-  EIGEN_DEBUG_ALIGNED_LOAD return _mm512_load_ph(from);
-}
-
-// ploadu
-
-template <>
-EIGEN_STRONG_INLINE Packet32h ploadu<Packet32h>(const Eigen::half* from) {
-  EIGEN_DEBUG_UNALIGNED_LOAD return _mm512_loadu_ph(from);
-}
-
-// pstore
-
-template <>
-EIGEN_STRONG_INLINE void pstore<half>(Eigen::half* to, const Packet32h& from) {
-  EIGEN_DEBUG_ALIGNED_STORE _mm512_store_ph(to, from);
-}
-
-// pstoreu
-
-template <>
-EIGEN_STRONG_INLINE void pstoreu<half>(Eigen::half* to, const Packet32h& from) {
-  EIGEN_DEBUG_UNALIGNED_STORE _mm512_storeu_ph(to, from);
-}
-
-// ploaddup
-template <>
-EIGEN_STRONG_INLINE Packet32h ploaddup<Packet32h>(const Eigen::half* from) {
-  __m512h a = _mm512_castph256_ph512(_mm256_loadu_ph(from));
-  return _mm512_permutexvar_ph(_mm512_set_epi16(15, 15, 14, 14, 13, 13, 12, 12, 11, 11, 10, 10, 9, 9, 8, 8, 7, 7, 6, 6,
-                                                5, 5, 4, 4, 3, 3, 2, 2, 1, 1, 0, 0),
-                               a);
-}
-
-// ploadquad
-template <>
-EIGEN_STRONG_INLINE Packet32h ploadquad<Packet32h>(const Eigen::half* from) {
-  __m512h a = _mm512_castph128_ph512(_mm_loadu_ph(from));
-  return _mm512_permutexvar_ph(
-      _mm512_set_epi16(7, 7, 7, 7, 6, 6, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 3, 3, 3, 3, 2, 2, 2, 2, 1, 1, 1, 1, 0, 0, 0, 0),
-      a);
-}
-
-// pabs
-
-template <>
-EIGEN_STRONG_INLINE Packet32h pabs<Packet32h>(const Packet32h& a) {
-  return _mm512_abs_ph(a);
-}
-
-// psignbit
-
-template <>
-EIGEN_STRONG_INLINE Packet32h psignbit<Packet32h>(const Packet32h& a) {
-  return _mm512_castsi512_ph(_mm512_srai_epi16(_mm512_castph_si512(a), 15));
-}
-
-// pmin
-
-template <>
-EIGEN_STRONG_INLINE Packet32h pmin<Packet32h>(const Packet32h& a, const Packet32h& b) {
-  return _mm512_min_ph(a, b);
-}
-
-// pmax
-
-template <>
-EIGEN_STRONG_INLINE Packet32h pmax<Packet32h>(const Packet32h& a, const Packet32h& b) {
-  return _mm512_max_ph(a, b);
-}
-
-// plset
-template <>
-EIGEN_STRONG_INLINE Packet32h plset<Packet32h>(const half& a) {
-  return _mm512_add_ph(_mm512_set1_ph(a),
-                       _mm512_set_ph(31.0f, 30.0f, 29.0f, 28.0f, 27.0f, 26.0f, 25.0f, 24.0f, 23.0f, 22.0f, 21.0f, 20.0f,
-                                     19.0f, 18.0f, 17.0f, 16.0f, 15.0f, 14.0f, 13.0f, 12.0f, 11.0f, 10.0f, 9.0f, 8.0f,
-                                     7.0f, 6.0f, 5.0f, 4.0f, 3.0f, 2.0f, 1.0f, 0.0f));
-}
-
-// por
-
-template <>
-EIGEN_STRONG_INLINE Packet32h por(const Packet32h& a, const Packet32h& b) {
-  return _mm512_castsi512_ph(_mm512_or_si512(_mm512_castph_si512(a), _mm512_castph_si512(b)));
-}
-
-// pxor
-
-template <>
-EIGEN_STRONG_INLINE Packet32h pxor(const Packet32h& a, const Packet32h& b) {
-  return _mm512_castsi512_ph(_mm512_xor_si512(_mm512_castph_si512(a), _mm512_castph_si512(b)));
-}
-
-// pand
-
-template <>
-EIGEN_STRONG_INLINE Packet32h pand(const Packet32h& a, const Packet32h& b) {
-  return _mm512_castsi512_ph(_mm512_and_si512(_mm512_castph_si512(a), _mm512_castph_si512(b)));
-}
-
-// pandnot
-
-template <>
-EIGEN_STRONG_INLINE Packet32h pandnot(const Packet32h& a, const Packet32h& b) {
-  return _mm512_castsi512_ph(_mm512_andnot_si512(_mm512_castph_si512(b), _mm512_castph_si512(a)));
-}
-
-// pselect
-
-template <>
-EIGEN_DEVICE_FUNC inline Packet32h pselect(const Packet32h& mask, const Packet32h& a, const Packet32h& b) {
-  __mmask32 mask32 = _mm512_cmp_epi16_mask(_mm512_castph_si512(mask), _mm512_setzero_epi32(), _MM_CMPINT_EQ);
-  return _mm512_mask_blend_ph(mask32, a, b);
-}
-
-// pcmp_eq
-
-template <>
-EIGEN_STRONG_INLINE Packet32h pcmp_eq(const Packet32h& a, const Packet32h& b) {
-  __mmask32 mask = _mm512_cmp_ph_mask(a, b, _CMP_EQ_OQ);
-  return _mm512_castsi512_ph(_mm512_mask_set1_epi16(_mm512_set1_epi32(0), mask, 0xffffu));
-}
-
-// pcmp_le
-
-template <>
-EIGEN_STRONG_INLINE Packet32h pcmp_le(const Packet32h& a, const Packet32h& b) {
-  __mmask32 mask = _mm512_cmp_ph_mask(a, b, _CMP_LE_OQ);
-  return _mm512_castsi512_ph(_mm512_mask_set1_epi16(_mm512_set1_epi32(0), mask, 0xffffu));
-}
-
-// pcmp_lt
-
-template <>
-EIGEN_STRONG_INLINE Packet32h pcmp_lt(const Packet32h& a, const Packet32h& b) {
-  __mmask32 mask = _mm512_cmp_ph_mask(a, b, _CMP_LT_OQ);
-  return _mm512_castsi512_ph(_mm512_mask_set1_epi16(_mm512_set1_epi32(0), mask, 0xffffu));
-}
-
-// pcmp_lt_or_nan
-
-template <>
-EIGEN_STRONG_INLINE Packet32h pcmp_lt_or_nan(const Packet32h& a, const Packet32h& b) {
-  __mmask32 mask = _mm512_cmp_ph_mask(a, b, _CMP_NGE_UQ);
-  return _mm512_castsi512_ph(_mm512_mask_set1_epi16(_mm512_set1_epi16(0), mask, 0xffffu));
-}
-
-// padd
-
-template <>
-EIGEN_STRONG_INLINE Packet32h padd<Packet32h>(const Packet32h& a, const Packet32h& b) {
-  return _mm512_add_ph(a, b);
-}
-
-template <>
-EIGEN_STRONG_INLINE Packet16h padd<Packet16h>(const Packet16h& a, const Packet16h& b) {
-  return _mm256_castph_si256(_mm256_add_ph(_mm256_castsi256_ph(a), _mm256_castsi256_ph(b)));
-}
-
-template <>
-EIGEN_STRONG_INLINE Packet8h padd<Packet8h>(const Packet8h& a, const Packet8h& b) {
-  return _mm_castph_si128(_mm_add_ph(_mm_castsi128_ph(a), _mm_castsi128_ph(b)));
-}
-
-// psub
-
-template <>
-EIGEN_STRONG_INLINE Packet32h psub<Packet32h>(const Packet32h& a, const Packet32h& b) {
-  return _mm512_sub_ph(a, b);
-}
-
-template <>
-EIGEN_STRONG_INLINE Packet16h psub<Packet16h>(const Packet16h& a, const Packet16h& b) {
-  return _mm256_castph_si256(_mm256_sub_ph(_mm256_castsi256_ph(a), _mm256_castsi256_ph(b)));
-}
-
-template <>
-EIGEN_STRONG_INLINE Packet8h psub<Packet8h>(const Packet8h& a, const Packet8h& b) {
-  return _mm_castph_si128(_mm_sub_ph(_mm_castsi128_ph(a), _mm_castsi128_ph(b)));
-}
-
-// pmul
-
-template <>
-EIGEN_STRONG_INLINE Packet32h pmul<Packet32h>(const Packet32h& a, const Packet32h& b) {
-  return _mm512_mul_ph(a, b);
-}
-
-template <>
-EIGEN_STRONG_INLINE Packet16h pmul<Packet16h>(const Packet16h& a, const Packet16h& b) {
-  return _mm256_castph_si256(_mm256_mul_ph(_mm256_castsi256_ph(a), _mm256_castsi256_ph(b)));
-}
-
-template <>
-EIGEN_STRONG_INLINE Packet8h pmul<Packet8h>(const Packet8h& a, const Packet8h& b) {
-  return _mm_castph_si128(_mm_mul_ph(_mm_castsi128_ph(a), _mm_castsi128_ph(b)));
-}
-
-// pdiv
-
-template <>
-EIGEN_STRONG_INLINE Packet32h pdiv<Packet32h>(const Packet32h& a, const Packet32h& b) {
-  return _mm512_div_ph(a, b);
-}
-
-template <>
-EIGEN_STRONG_INLINE Packet16h pdiv<Packet16h>(const Packet16h& a, const Packet16h& b) {
-  return _mm256_castph_si256(_mm256_div_ph(_mm256_castsi256_ph(a), _mm256_castsi256_ph(b)));
-}
-
-template <>
-EIGEN_STRONG_INLINE Packet8h pdiv<Packet8h>(const Packet8h& a, const Packet8h& b) {
-  return _mm_castph_si128(_mm_div_ph(_mm_castsi128_ph(a), _mm_castsi128_ph(b)));
-}
-
-// pround
-
-template <>
-EIGEN_STRONG_INLINE Packet32h pround<Packet32h>(const Packet32h& a) {
-  // Work-around for default std::round rounding mode.
-
-  // Mask for the sign bit
-  const Packet32h signMask = pset1frombits<Packet32h>(static_cast<numext::uint16_t>(0x8000u));
-  // The largest half-preicision float less than 0.5
-  const Packet32h prev0dot5 = pset1frombits<Packet32h>(static_cast<numext::uint16_t>(0x37FFu));
-
-  return _mm512_roundscale_ph(padd(por(pand(a, signMask), prev0dot5), a), _MM_FROUND_TO_ZERO);
-}
-
-// print
-
-template <>
-EIGEN_STRONG_INLINE Packet32h print<Packet32h>(const Packet32h& a) {
-  return _mm512_roundscale_ph(a, _MM_FROUND_CUR_DIRECTION);
-}
-
-// pceil
-
-template <>
-EIGEN_STRONG_INLINE Packet32h pceil<Packet32h>(const Packet32h& a) {
-  return _mm512_roundscale_ph(a, _MM_FROUND_TO_POS_INF);
-}
-
-// pfloor
-
-template <>
-EIGEN_STRONG_INLINE Packet32h pfloor<Packet32h>(const Packet32h& a) {
-  return _mm512_roundscale_ph(a, _MM_FROUND_TO_NEG_INF);
-}
-
-// predux
-template <>
-EIGEN_STRONG_INLINE half predux<Packet32h>(const Packet32h& a) {
-  return (half)_mm512_reduce_add_ph(a);
-}
-
-template <>
-EIGEN_STRONG_INLINE half predux<Packet16h>(const Packet16h& a) {
-  return (half)_mm256_reduce_add_ph(_mm256_castsi256_ph(a));
-}
-
-template <>
-EIGEN_STRONG_INLINE half predux<Packet8h>(const Packet8h& a) {
-  return (half)_mm_reduce_add_ph(_mm_castsi128_ph(a));
-}
-
-// predux_half_dowto4
-template <>
-EIGEN_STRONG_INLINE Packet16h predux_half_dowto4<Packet32h>(const Packet32h& a) {
-#ifdef EIGEN_VECTORIZE_AVX512DQ
-  __m256i lowHalf = _mm256_castps_si256(_mm512_extractf32x8_ps(_mm512_castph_ps(a), 0));
-  __m256i highHalf = _mm256_castps_si256(_mm512_extractf32x8_ps(_mm512_castph_ps(a), 1));
-
-  return Packet16h(padd<Packet16h>(lowHalf, highHalf));
-#else
-  Eigen::half data[32];
-  _mm512_storeu_ph(data, a);
-
-  __m256i lowHalf = _mm256_castph_si256(_mm256_loadu_ph(data));
-  __m256i highHalf = _mm256_castph_si256(_mm256_loadu_ph(data + 16));
-
-  return Packet16h(padd<Packet16h>(lowHalf, highHalf));
-#endif
-}
-
-// predux_max
-
-// predux_min
-
-// predux_mul
-
-#ifdef EIGEN_VECTORIZE_FMA
-
-// pmadd
-
-template <>
-EIGEN_STRONG_INLINE Packet32h pmadd(const Packet32h& a, const Packet32h& b, const Packet32h& c) {
-  return _mm512_fmadd_ph(a, b, c);
-}
-
-template <>
-EIGEN_STRONG_INLINE Packet16h pmadd(const Packet16h& a, const Packet16h& b, const Packet16h& c) {
-  return _mm256_castph_si256(_mm256_fmadd_ph(_mm256_castsi256_ph(a), _mm256_castsi256_ph(b), _mm256_castsi256_ph(c)));
-}
-
-template <>
-EIGEN_STRONG_INLINE Packet8h pmadd(const Packet8h& a, const Packet8h& b, const Packet8h& c) {
-  return _mm_castph_si128(_mm_fmadd_ph(_mm_castsi128_ph(a), _mm_castsi128_ph(b), _mm_castsi128_ph(c)));
-}
-
-// pmsub
-
-template <>
-EIGEN_STRONG_INLINE Packet32h pmsub(const Packet32h& a, const Packet32h& b, const Packet32h& c) {
-  return _mm512_fmsub_ph(a, b, c);
-}
-
-template <>
-EIGEN_STRONG_INLINE Packet16h pmsub(const Packet16h& a, const Packet16h& b, const Packet16h& c) {
-  return _mm256_castph_si256(_mm256_fmsub_ph(_mm256_castsi256_ph(a), _mm256_castsi256_ph(b), _mm256_castsi256_ph(c)));
-}
-
-template <>
-EIGEN_STRONG_INLINE Packet8h pmsub(const Packet8h& a, const Packet8h& b, const Packet8h& c) {
-  return _mm_castph_si128(_mm_fmsub_ph(_mm_castsi128_ph(a), _mm_castsi128_ph(b), _mm_castsi128_ph(c)));
-}
-
-// pnmadd
-
-template <>
-EIGEN_STRONG_INLINE Packet32h pnmadd(const Packet32h& a, const Packet32h& b, const Packet32h& c) {
-  return _mm512_fnmadd_ph(a, b, c);
-}
-
-template <>
-EIGEN_STRONG_INLINE Packet16h pnmadd(const Packet16h& a, const Packet16h& b, const Packet16h& c) {
-  return _mm256_castph_si256(_mm256_fnmadd_ph(_mm256_castsi256_ph(a), _mm256_castsi256_ph(b), _mm256_castsi256_ph(c)));
-}
-
-template <>
-EIGEN_STRONG_INLINE Packet8h pnmadd(const Packet8h& a, const Packet8h& b, const Packet8h& c) {
-  return _mm_castph_si128(_mm_fnmadd_ph(_mm_castsi128_ph(a), _mm_castsi128_ph(b), _mm_castsi128_ph(c)));
-}
-
-// pnmsub
-
-template <>
-EIGEN_STRONG_INLINE Packet32h pnmsub(const Packet32h& a, const Packet32h& b, const Packet32h& c) {
-  return _mm512_fnmsub_ph(a, b, c);
-}
-
-template <>
-EIGEN_STRONG_INLINE Packet16h pnmsub(const Packet16h& a, const Packet16h& b, const Packet16h& c) {
-  return _mm256_castph_si256(_mm256_fnmsub_ph(_mm256_castsi256_ph(a), _mm256_castsi256_ph(b), _mm256_castsi256_ph(c)));
-}
-
-template <>
-EIGEN_STRONG_INLINE Packet8h pnmsub(const Packet8h& a, const Packet8h& b, const Packet8h& c) {
-  return _mm_castph_si128(_mm_fnmsub_ph(_mm_castsi128_ph(a), _mm_castsi128_ph(b), _mm_castsi128_ph(c)));
-}
-
-#endif
-
-// pnegate
-
-template <>
-EIGEN_STRONG_INLINE Packet32h pnegate<Packet32h>(const Packet32h& a) {
-  return _mm512_sub_ph(_mm512_set1_ph(0.0), a);
-}
-
-// pconj
-
-template <>
-EIGEN_STRONG_INLINE Packet32h pconj<Packet32h>(const Packet32h& a) {
-  return a;
-}
-
-// psqrt
-
-template <>
-EIGEN_STRONG_INLINE Packet32h psqrt<Packet32h>(const Packet32h& a) {
-  return _mm512_sqrt_ph(a);
-}
-
-// prsqrt
-
-template <>
-EIGEN_STRONG_INLINE Packet32h prsqrt<Packet32h>(const Packet32h& a) {
-  return _mm512_rsqrt_ph(a);
-}
-
-// preciprocal
-
-template <>
-EIGEN_STRONG_INLINE Packet32h preciprocal<Packet32h>(const Packet32h& a) {
-  return _mm512_rcp_ph(a);
-}
-
-// ptranspose
-
-EIGEN_DEVICE_FUNC inline void ptranspose(PacketBlock<Packet32h, 32>& a) {
-  __m512i t[32];
-
-  EIGEN_UNROLL_LOOP
-  for (int i = 0; i < 16; i++) {
-    t[2 * i] = _mm512_unpacklo_epi16(_mm512_castph_si512(a.packet[2 * i]), _mm512_castph_si512(a.packet[2 * i + 1]));
-    t[2 * i + 1] =
-        _mm512_unpackhi_epi16(_mm512_castph_si512(a.packet[2 * i]), _mm512_castph_si512(a.packet[2 * i + 1]));
-  }
-
-  __m512i p[32];
-
-  EIGEN_UNROLL_LOOP
-  for (int i = 0; i < 8; i++) {
-    p[4 * i] = _mm512_unpacklo_epi32(t[4 * i], t[4 * i + 2]);
-    p[4 * i + 1] = _mm512_unpackhi_epi32(t[4 * i], t[4 * i + 2]);
-    p[4 * i + 2] = _mm512_unpacklo_epi32(t[4 * i + 1], t[4 * i + 3]);
-    p[4 * i + 3] = _mm512_unpackhi_epi32(t[4 * i + 1], t[4 * i + 3]);
-  }
-
-  __m512i q[32];
-
-  EIGEN_UNROLL_LOOP
-  for (int i = 0; i < 4; i++) {
-    q[8 * i] = _mm512_unpacklo_epi64(p[8 * i], p[8 * i + 4]);
-    q[8 * i + 1] = _mm512_unpackhi_epi64(p[8 * i], p[8 * i + 4]);
-    q[8 * i + 2] = _mm512_unpacklo_epi64(p[8 * i + 1], p[8 * i + 5]);
-    q[8 * i + 3] = _mm512_unpackhi_epi64(p[8 * i + 1], p[8 * i + 5]);
-    q[8 * i + 4] = _mm512_unpacklo_epi64(p[8 * i + 2], p[8 * i + 6]);
-    q[8 * i + 5] = _mm512_unpackhi_epi64(p[8 * i + 2], p[8 * i + 6]);
-    q[8 * i + 6] = _mm512_unpacklo_epi64(p[8 * i + 3], p[8 * i + 7]);
-    q[8 * i + 7] = _mm512_unpackhi_epi64(p[8 * i + 3], p[8 * i + 7]);
-  }
-
-  __m512i f[32];
-
-#define PACKET32H_TRANSPOSE_HELPER(X, Y)                                                            \
-  do {                                                                                              \
-    f[Y * 8] = _mm512_inserti32x4(f[Y * 8], _mm512_extracti32x4_epi32(q[X * 8], Y), X);             \
-    f[Y * 8 + 1] = _mm512_inserti32x4(f[Y * 8 + 1], _mm512_extracti32x4_epi32(q[X * 8 + 1], Y), X); \
-    f[Y * 8 + 2] = _mm512_inserti32x4(f[Y * 8 + 2], _mm512_extracti32x4_epi32(q[X * 8 + 2], Y), X); \
-    f[Y * 8 + 3] = _mm512_inserti32x4(f[Y * 8 + 3], _mm512_extracti32x4_epi32(q[X * 8 + 3], Y), X); \
-    f[Y * 8 + 4] = _mm512_inserti32x4(f[Y * 8 + 4], _mm512_extracti32x4_epi32(q[X * 8 + 4], Y), X); \
-    f[Y * 8 + 5] = _mm512_inserti32x4(f[Y * 8 + 5], _mm512_extracti32x4_epi32(q[X * 8 + 5], Y), X); \
-    f[Y * 8 + 6] = _mm512_inserti32x4(f[Y * 8 + 6], _mm512_extracti32x4_epi32(q[X * 8 + 6], Y), X); \
-    f[Y * 8 + 7] = _mm512_inserti32x4(f[Y * 8 + 7], _mm512_extracti32x4_epi32(q[X * 8 + 7], Y), X); \
-  } while (false);
-
-  PACKET32H_TRANSPOSE_HELPER(0, 0);
-  PACKET32H_TRANSPOSE_HELPER(1, 1);
-  PACKET32H_TRANSPOSE_HELPER(2, 2);
-  PACKET32H_TRANSPOSE_HELPER(3, 3);
-
-  PACKET32H_TRANSPOSE_HELPER(1, 0);
-  PACKET32H_TRANSPOSE_HELPER(2, 0);
-  PACKET32H_TRANSPOSE_HELPER(3, 0);
-  PACKET32H_TRANSPOSE_HELPER(2, 1);
-  PACKET32H_TRANSPOSE_HELPER(3, 1);
-  PACKET32H_TRANSPOSE_HELPER(3, 2);
-
-  PACKET32H_TRANSPOSE_HELPER(0, 1);
-  PACKET32H_TRANSPOSE_HELPER(0, 2);
-  PACKET32H_TRANSPOSE_HELPER(0, 3);
-  PACKET32H_TRANSPOSE_HELPER(1, 2);
-  PACKET32H_TRANSPOSE_HELPER(1, 3);
-  PACKET32H_TRANSPOSE_HELPER(2, 3);
-
-#undef PACKET32H_TRANSPOSE_HELPER
-
-  EIGEN_UNROLL_LOOP
-  for (int i = 0; i < 32; i++) {
-    a.packet[i] = _mm512_castsi512_ph(f[i]);
-  }
-}
-
-EIGEN_DEVICE_FUNC inline void ptranspose(PacketBlock<Packet32h, 4>& a) {
-  __m512i p0, p1, p2, p3, t0, t1, t2, t3, a0, a1, a2, a3;
-  t0 = _mm512_unpacklo_epi16(_mm512_castph_si512(a.packet[0]), _mm512_castph_si512(a.packet[1]));
-  t1 = _mm512_unpackhi_epi16(_mm512_castph_si512(a.packet[0]), _mm512_castph_si512(a.packet[1]));
-  t2 = _mm512_unpacklo_epi16(_mm512_castph_si512(a.packet[2]), _mm512_castph_si512(a.packet[3]));
-  t3 = _mm512_unpackhi_epi16(_mm512_castph_si512(a.packet[2]), _mm512_castph_si512(a.packet[3]));
-
-  p0 = _mm512_unpacklo_epi32(t0, t2);
-  p1 = _mm512_unpackhi_epi32(t0, t2);
-  p2 = _mm512_unpacklo_epi32(t1, t3);
-  p3 = _mm512_unpackhi_epi32(t1, t3);
-
-  a0 = p0;
-  a1 = p1;
-  a2 = p2;
-  a3 = p3;
-
-  a0 = _mm512_inserti32x4(a0, _mm512_extracti32x4_epi32(p1, 0), 1);
-  a1 = _mm512_inserti32x4(a1, _mm512_extracti32x4_epi32(p0, 1), 0);
-
-  a0 = _mm512_inserti32x4(a0, _mm512_extracti32x4_epi32(p2, 0), 2);
-  a2 = _mm512_inserti32x4(a2, _mm512_extracti32x4_epi32(p0, 2), 0);
-
-  a0 = _mm512_inserti32x4(a0, _mm512_extracti32x4_epi32(p3, 0), 3);
-  a3 = _mm512_inserti32x4(a3, _mm512_extracti32x4_epi32(p0, 3), 0);
-
-  a1 = _mm512_inserti32x4(a1, _mm512_extracti32x4_epi32(p2, 1), 2);
-  a2 = _mm512_inserti32x4(a2, _mm512_extracti32x4_epi32(p1, 2), 1);
-
-  a2 = _mm512_inserti32x4(a2, _mm512_extracti32x4_epi32(p3, 2), 3);
-  a3 = _mm512_inserti32x4(a3, _mm512_extracti32x4_epi32(p2, 3), 2);
-
-  a1 = _mm512_inserti32x4(a1, _mm512_extracti32x4_epi32(p3, 1), 3);
-  a3 = _mm512_inserti32x4(a3, _mm512_extracti32x4_epi32(p1, 3), 1);
-
-  a.packet[0] = _mm512_castsi512_ph(a0);
-  a.packet[1] = _mm512_castsi512_ph(a1);
-  a.packet[2] = _mm512_castsi512_ph(a2);
-  a.packet[3] = _mm512_castsi512_ph(a3);
-}
-
-// preverse
-
-template <>
-EIGEN_STRONG_INLINE Packet32h preverse(const Packet32h& a) {
-  return _mm512_permutexvar_ph(_mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
-                                                20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31),
-                               a);
-}
-
-// pscatter
-
-template <>
-EIGEN_STRONG_INLINE void pscatter<half, Packet32h>(half* to, const Packet32h& from, Index stride) {
-  EIGEN_ALIGN64 half aux[32];
-  pstore(aux, from);
-
-  EIGEN_UNROLL_LOOP
-  for (int i = 0; i < 32; i++) {
-    to[stride * i] = aux[i];
-  }
-}
-
-// pgather
-
-template <>
-EIGEN_STRONG_INLINE Packet32h pgather<Eigen::half, Packet32h>(const Eigen::half* from, Index stride) {
-  return _mm512_castsi512_ph(_mm512_set_epi16(
-      from[31 * stride].x, from[30 * stride].x, from[29 * stride].x, from[28 * stride].x, from[27 * stride].x,
-      from[26 * stride].x, from[25 * stride].x, from[24 * stride].x, from[23 * stride].x, from[22 * stride].x,
-      from[21 * stride].x, from[20 * stride].x, from[19 * stride].x, from[18 * stride].x, from[17 * stride].x,
-      from[16 * stride].x, from[15 * stride].x, from[14 * stride].x, from[13 * stride].x, from[12 * stride].x,
-      from[11 * stride].x, from[10 * stride].x, from[9 * stride].x, from[8 * stride].x, from[7 * stride].x,
-      from[6 * stride].x, from[5 * stride].x, from[4 * stride].x, from[3 * stride].x, from[2 * stride].x,
-      from[1 * stride].x, from[0 * stride].x));
-}
-
-template <>
-EIGEN_STRONG_INLINE Packet16h pcos<Packet16h>(const Packet16h&);
-template <>
-EIGEN_STRONG_INLINE Packet16h psin<Packet16h>(const Packet16h&);
-template <>
-EIGEN_STRONG_INLINE Packet16h plog<Packet16h>(const Packet16h&);
-template <>
-EIGEN_STRONG_INLINE Packet16h plog2<Packet16h>(const Packet16h&);
-template <>
-EIGEN_STRONG_INLINE Packet16h plog1p<Packet16h>(const Packet16h&);
-template <>
-EIGEN_STRONG_INLINE Packet16h pexp<Packet16h>(const Packet16h&);
-template <>
-EIGEN_STRONG_INLINE Packet16h pexpm1<Packet16h>(const Packet16h&);
-template <>
-EIGEN_STRONG_INLINE Packet16h ptanh<Packet16h>(const Packet16h&);
-template <>
-EIGEN_STRONG_INLINE Packet16h pfrexp<Packet16h>(const Packet16h&, Packet16h&);
-template <>
-EIGEN_STRONG_INLINE Packet16h pldexp<Packet16h>(const Packet16h&, const Packet16h&);
-
-EIGEN_STRONG_INLINE Packet32h combine2Packet16h(const Packet16h& a, const Packet16h& b) {
-  __m512d result = _mm512_undefined_pd();
-  result = _mm512_insertf64x4(result, _mm256_castsi256_pd(a), 0);
-  result = _mm512_insertf64x4(result, _mm256_castsi256_pd(b), 1);
-  return _mm512_castpd_ph(result);
-}
-
-EIGEN_STRONG_INLINE void extract2Packet16h(const Packet32h& x, Packet16h& a, Packet16h& b) {
-  a = _mm256_castpd_si256(_mm512_extractf64x4_pd(_mm512_castph_pd(x), 0));
-  b = _mm256_castpd_si256(_mm512_extractf64x4_pd(_mm512_castph_pd(x), 1));
-}
-
-// psin
-template <>
-EIGEN_STRONG_INLINE Packet32h psin<Packet32h>(const Packet32h& a) {
-  Packet16h low;
-  Packet16h high;
-  extract2Packet16h(a, low, high);
-
-  Packet16h lowOut = psin(low);
-  Packet16h highOut = psin(high);
-
-  return combine2Packet16h(lowOut, highOut);
-}
-
-// pcos
-template <>
-EIGEN_STRONG_INLINE Packet32h pcos<Packet32h>(const Packet32h& a) {
-  Packet16h low;
-  Packet16h high;
-  extract2Packet16h(a, low, high);
-
-  Packet16h lowOut = pcos(low);
-  Packet16h highOut = pcos(high);
-
-  return combine2Packet16h(lowOut, highOut);
-}
-
-// plog
-template <>
-EIGEN_STRONG_INLINE Packet32h plog<Packet32h>(const Packet32h& a) {
-  Packet16h low;
-  Packet16h high;
-  extract2Packet16h(a, low, high);
-
-  Packet16h lowOut = plog(low);
-  Packet16h highOut = plog(high);
-
-  return combine2Packet16h(lowOut, highOut);
-}
-
-// plog2
-template <>
-EIGEN_STRONG_INLINE Packet32h plog2<Packet32h>(const Packet32h& a) {
-  Packet16h low;
-  Packet16h high;
-  extract2Packet16h(a, low, high);
-
-  Packet16h lowOut = plog2(low);
-  Packet16h highOut = plog2(high);
-
-  return combine2Packet16h(lowOut, highOut);
-}
-
-// plog1p
-template <>
-EIGEN_STRONG_INLINE Packet32h plog1p<Packet32h>(const Packet32h& a) {
-  Packet16h low;
-  Packet16h high;
-  extract2Packet16h(a, low, high);
-
-  Packet16h lowOut = plog1p(low);
-  Packet16h highOut = plog1p(high);
-
-  return combine2Packet16h(lowOut, highOut);
-}
-
-// pexp
-template <>
-EIGEN_STRONG_INLINE Packet32h pexp<Packet32h>(const Packet32h& a) {
-  Packet16h low;
-  Packet16h high;
-  extract2Packet16h(a, low, high);
-
-  Packet16h lowOut = pexp(low);
-  Packet16h highOut = pexp(high);
-
-  return combine2Packet16h(lowOut, highOut);
-}
-
-// pexpm1
-template <>
-EIGEN_STRONG_INLINE Packet32h pexpm1<Packet32h>(const Packet32h& a) {
-  Packet16h low;
-  Packet16h high;
-  extract2Packet16h(a, low, high);
-
-  Packet16h lowOut = pexpm1(low);
-  Packet16h highOut = pexpm1(high);
-
-  return combine2Packet16h(lowOut, highOut);
-}
-
-// ptanh
-template <>
-EIGEN_STRONG_INLINE Packet32h ptanh<Packet32h>(const Packet32h& a) {
-  Packet16h low;
-  Packet16h high;
-  extract2Packet16h(a, low, high);
-
-  Packet16h lowOut = ptanh(low);
-  Packet16h highOut = ptanh(high);
-
-  return combine2Packet16h(lowOut, highOut);
-}
-
-// pfrexp
-template <>
-EIGEN_STRONG_INLINE Packet32h pfrexp<Packet32h>(const Packet32h& a, Packet32h& exponent) {
-  Packet16h low;
-  Packet16h high;
-  extract2Packet16h(a, low, high);
-
-  Packet16h exp1 = _mm256_undefined_si256();
-  Packet16h exp2 = _mm256_undefined_si256();
-
-  Packet16h lowOut = pfrexp(low, exp1);
-  Packet16h highOut = pfrexp(high, exp2);
-
-  exponent = combine2Packet16h(exp1, exp2);
-
-  return combine2Packet16h(lowOut, highOut);
-}
-
-// pldexp
-template <>
-EIGEN_STRONG_INLINE Packet32h pldexp<Packet32h>(const Packet32h& a, const Packet32h& exponent) {
-  Packet16h low;
-  Packet16h high;
-  extract2Packet16h(a, low, high);
-
-  Packet16h exp1;
-  Packet16h exp2;
-  extract2Packet16h(exponent, exp1, exp2);
-
-  Packet16h lowOut = pldexp(low, exp1);
-  Packet16h highOut = pldexp(high, exp2);
-
-  return combine2Packet16h(lowOut, highOut);
-}
-
-}  // end namespace internal
-}  // end namespace Eigen
-
-#endif  // EIGEN_PACKET_MATH_FP16_AVX512_H
+// This file is part of Eigen, a lightweight C++ template library

+// for linear algebra.

+//

+//

+//

+// This Source Code Form is subject to the terms of the Mozilla

+// Public License v. 2.0. If a copy of the MPL was not distributed

+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.

+

+#ifndef EIGEN_PACKET_MATH_FP16_AVX512_H

+#define EIGEN_PACKET_MATH_FP16_AVX512_H

+

+// IWYU pragma: private

+#include "../../InternalHeaderCheck.h"

+

+namespace Eigen {

+

+namespace internal {

+

+typedef __m512h Packet32h;

+typedef eigen_packet_wrapper<__m256i, 1> Packet16h;

+typedef eigen_packet_wrapper<__m128i, 2> Packet8h;

+

+template <>

+struct is_arithmetic<Packet8h> {

+  enum { value = true };

+};

+

+template <>

+struct packet_traits<half> : default_packet_traits {

+  typedef Packet32h type;

+  typedef Packet16h half;

+  enum {

+    Vectorizable = 1,

+    AlignedOnScalar = 1,

+    size = 32,

+

+    HasCmp = 1,

+    HasAdd = 1,

+    HasSub = 1,

+    HasMul = 1,

+    HasDiv = 1,

+    HasNegate = 1,

+    HasAbs = 1,

+    HasAbs2 = 0,

+    HasMin = 1,

+    HasMax = 1,

+    HasConj = 1,

+    HasSetLinear = 0,

+    HasLog = 1,

+    HasLog1p = 1,

+    HasExp = 1,

+    HasExpm1 = 1,

+    HasSqrt = 1,

+    HasRsqrt = 1,

+    // These ones should be implemented in future

+    HasBessel = 0,

+    HasNdtri = 0,

+    HasSin = EIGEN_FAST_MATH,

+    HasCos = EIGEN_FAST_MATH,

+    HasTanh = EIGEN_FAST_MATH,

+    HasErf = 0,  // EIGEN_FAST_MATH,

+    HasBlend = 0,

+    HasRound = 1,

+    HasFloor = 1,

+    HasCeil = 1,

+    HasRint = 1

+  };

+};

+

+template <>

+struct unpacket_traits<Packet32h> {

+  typedef Eigen::half type;

+  typedef Packet16h half;

+  enum {

+    size = 32,

+    alignment = Aligned64,

+    vectorizable = true,

+    masked_load_available = false,

+    masked_store_available = false

+  };

+};

+

+template <>

+struct unpacket_traits<Packet16h> {

+  typedef Eigen::half type;

+  typedef Packet8h half;

+  enum {

+    size = 16,

+    alignment = Aligned32,

+    vectorizable = true,

+    masked_load_available = false,

+    masked_store_available = false

+  };

+};

+

+template <>

+struct unpacket_traits<Packet8h> {

+  typedef Eigen::half type;

+  typedef Packet8h half;

+  enum {

+    size = 8,

+    alignment = Aligned16,

+    vectorizable = true,

+    masked_load_available = false,

+    masked_store_available = false

+  };

+};

+

+// Memory functions

+

+// pset1

+

+template <>

+EIGEN_STRONG_INLINE Packet32h pset1<Packet32h>(const Eigen::half& from) {

+  return _mm512_set1_ph(static_cast<_Float16>(from));

+}

+

+// pset1frombits

+template <>

+EIGEN_STRONG_INLINE Packet32h pset1frombits<Packet32h>(unsigned short from) {

+  return _mm512_castsi512_ph(_mm512_set1_epi16(from));

+}

+

+// pfirst

+

+template <>

+EIGEN_STRONG_INLINE Eigen::half pfirst<Packet32h>(const Packet32h& from) {

+#ifdef EIGEN_VECTORIZE_AVX512DQ

+  return half_impl::raw_uint16_to_half(

+      static_cast<unsigned short>(_mm256_extract_epi16(_mm512_extracti32x8_epi32(_mm512_castph_si512(from), 0), 0)));

+#else

+  Eigen::half dest[32];

+  _mm512_storeu_ph(dest, from);

+  return dest[0];

+#endif

+}

+

+// pload

+

+template <>

+EIGEN_STRONG_INLINE Packet32h pload<Packet32h>(const Eigen::half* from) {

+  EIGEN_DEBUG_ALIGNED_LOAD return _mm512_load_ph(from);

+}

+

+// ploadu

+

+template <>

+EIGEN_STRONG_INLINE Packet32h ploadu<Packet32h>(const Eigen::half* from) {

+  EIGEN_DEBUG_UNALIGNED_LOAD return _mm512_loadu_ph(from);

+}

+

+// pstore

+

+template <>

+EIGEN_STRONG_INLINE void pstore<half>(Eigen::half* to, const Packet32h& from) {

+  EIGEN_DEBUG_ALIGNED_STORE _mm512_store_ph(to, from);

+}

+

+// pstoreu

+

+template <>

+EIGEN_STRONG_INLINE void pstoreu<half>(Eigen::half* to, const Packet32h& from) {

+  EIGEN_DEBUG_UNALIGNED_STORE _mm512_storeu_ph(to, from);

+}

+

+// ploaddup

+template <>

+EIGEN_STRONG_INLINE Packet32h ploaddup<Packet32h>(const Eigen::half* from) {

+  __m512h a = _mm512_castph256_ph512(_mm256_loadu_ph(from));

+  return _mm512_permutexvar_ph(_mm512_set_epi16(15, 15, 14, 14, 13, 13, 12, 12, 11, 11, 10, 10, 9, 9, 8, 8, 7, 7, 6, 6,

+                                                5, 5, 4, 4, 3, 3, 2, 2, 1, 1, 0, 0),

+                               a);

+}

+

+// ploadquad

+template <>

+EIGEN_STRONG_INLINE Packet32h ploadquad<Packet32h>(const Eigen::half* from) {

+  __m512h a = _mm512_castph128_ph512(_mm_loadu_ph(from));

+  return _mm512_permutexvar_ph(

+      _mm512_set_epi16(7, 7, 7, 7, 6, 6, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 3, 3, 3, 3, 2, 2, 2, 2, 1, 1, 1, 1, 0, 0, 0, 0),

+      a);

+}

+

+// pabs

+

+template <>

+EIGEN_STRONG_INLINE Packet32h pabs<Packet32h>(const Packet32h& a) {

+  return _mm512_abs_ph(a);

+}

+

+// psignbit

+

+template <>

+EIGEN_STRONG_INLINE Packet32h psignbit<Packet32h>(const Packet32h& a) {

+  return _mm512_castsi512_ph(_mm512_srai_epi16(_mm512_castph_si512(a), 15));

+}

+

+// pmin

+

+template <>

+EIGEN_STRONG_INLINE Packet32h pmin<Packet32h>(const Packet32h& a, const Packet32h& b) {

+  return _mm512_min_ph(a, b);

+}

+

+// pmax

+

+template <>

+EIGEN_STRONG_INLINE Packet32h pmax<Packet32h>(const Packet32h& a, const Packet32h& b) {

+  return _mm512_max_ph(a, b);

+}

+

+// plset

+template <>

+EIGEN_STRONG_INLINE Packet32h plset<Packet32h>(const half& a) {

+  return _mm512_add_ph(_mm512_set1_ph(a),

+                       _mm512_set_ph(31.0f, 30.0f, 29.0f, 28.0f, 27.0f, 26.0f, 25.0f, 24.0f, 23.0f, 22.0f, 21.0f, 20.0f,

+                                     19.0f, 18.0f, 17.0f, 16.0f, 15.0f, 14.0f, 13.0f, 12.0f, 11.0f, 10.0f, 9.0f, 8.0f,

+                                     7.0f, 6.0f, 5.0f, 4.0f, 3.0f, 2.0f, 1.0f, 0.0f));

+}

+

+// por

+

+template <>

+EIGEN_STRONG_INLINE Packet32h por(const Packet32h& a, const Packet32h& b) {

+  return _mm512_castsi512_ph(_mm512_or_si512(_mm512_castph_si512(a), _mm512_castph_si512(b)));

+}

+

+// pxor

+

+template <>

+EIGEN_STRONG_INLINE Packet32h pxor(const Packet32h& a, const Packet32h& b) {

+  return _mm512_castsi512_ph(_mm512_xor_si512(_mm512_castph_si512(a), _mm512_castph_si512(b)));

+}

+

+// pand

+

+template <>

+EIGEN_STRONG_INLINE Packet32h pand(const Packet32h& a, const Packet32h& b) {

+  return _mm512_castsi512_ph(_mm512_and_si512(_mm512_castph_si512(a), _mm512_castph_si512(b)));

+}

+

+// pandnot

+

+template <>

+EIGEN_STRONG_INLINE Packet32h pandnot(const Packet32h& a, const Packet32h& b) {

+  return _mm512_castsi512_ph(_mm512_andnot_si512(_mm512_castph_si512(b), _mm512_castph_si512(a)));

+}

+

+// pselect

+

+template <>

+EIGEN_DEVICE_FUNC inline Packet32h pselect(const Packet32h& mask, const Packet32h& a, const Packet32h& b) {

+  __mmask32 mask32 = _mm512_cmp_epi16_mask(_mm512_castph_si512(mask), _mm512_setzero_epi32(), _MM_CMPINT_EQ);

+  return _mm512_mask_blend_ph(mask32, a, b);

+}

+

+// pcmp_eq

+

+template <>

+EIGEN_STRONG_INLINE Packet32h pcmp_eq(const Packet32h& a, const Packet32h& b) {

+  __mmask32 mask = _mm512_cmp_ph_mask(a, b, _CMP_EQ_OQ);

+  return _mm512_castsi512_ph(_mm512_mask_set1_epi16(_mm512_set1_epi32(0), mask, 0xffffu));

+}

+

+// pcmp_le

+

+template <>

+EIGEN_STRONG_INLINE Packet32h pcmp_le(const Packet32h& a, const Packet32h& b) {

+  __mmask32 mask = _mm512_cmp_ph_mask(a, b, _CMP_LE_OQ);

+  return _mm512_castsi512_ph(_mm512_mask_set1_epi16(_mm512_set1_epi32(0), mask, 0xffffu));

+}

+

+// pcmp_lt

+

+template <>

+EIGEN_STRONG_INLINE Packet32h pcmp_lt(const Packet32h& a, const Packet32h& b) {

+  __mmask32 mask = _mm512_cmp_ph_mask(a, b, _CMP_LT_OQ);

+  return _mm512_castsi512_ph(_mm512_mask_set1_epi16(_mm512_set1_epi32(0), mask, 0xffffu));

+}

+

+// pcmp_lt_or_nan

+

+template <>

+EIGEN_STRONG_INLINE Packet32h pcmp_lt_or_nan(const Packet32h& a, const Packet32h& b) {

+  __mmask32 mask = _mm512_cmp_ph_mask(a, b, _CMP_NGE_UQ);

+  return _mm512_castsi512_ph(_mm512_mask_set1_epi16(_mm512_set1_epi16(0), mask, 0xffffu));

+}

+

+// padd

+

+template <>

+EIGEN_STRONG_INLINE Packet32h padd<Packet32h>(const Packet32h& a, const Packet32h& b) {

+  return _mm512_add_ph(a, b);

+}

+

+template <>

+EIGEN_STRONG_INLINE Packet16h padd<Packet16h>(const Packet16h& a, const Packet16h& b) {

+  return _mm256_castph_si256(_mm256_add_ph(_mm256_castsi256_ph(a), _mm256_castsi256_ph(b)));

+}

+

+template <>

+EIGEN_STRONG_INLINE Packet8h padd<Packet8h>(const Packet8h& a, const Packet8h& b) {

+  return _mm_castph_si128(_mm_add_ph(_mm_castsi128_ph(a), _mm_castsi128_ph(b)));

+}

+

+// psub

+

+template <>

+EIGEN_STRONG_INLINE Packet32h psub<Packet32h>(const Packet32h& a, const Packet32h& b) {

+  return _mm512_sub_ph(a, b);

+}

+

+template <>

+EIGEN_STRONG_INLINE Packet16h psub<Packet16h>(const Packet16h& a, const Packet16h& b) {

+  return _mm256_castph_si256(_mm256_sub_ph(_mm256_castsi256_ph(a), _mm256_castsi256_ph(b)));

+}

+

+template <>

+EIGEN_STRONG_INLINE Packet8h psub<Packet8h>(const Packet8h& a, const Packet8h& b) {

+  return _mm_castph_si128(_mm_sub_ph(_mm_castsi128_ph(a), _mm_castsi128_ph(b)));

+}

+

+// pmul

+

+template <>

+EIGEN_STRONG_INLINE Packet32h pmul<Packet32h>(const Packet32h& a, const Packet32h& b) {

+  return _mm512_mul_ph(a, b);

+}

+

+template <>

+EIGEN_STRONG_INLINE Packet16h pmul<Packet16h>(const Packet16h& a, const Packet16h& b) {

+  return _mm256_castph_si256(_mm256_mul_ph(_mm256_castsi256_ph(a), _mm256_castsi256_ph(b)));

+}

+

+template <>

+EIGEN_STRONG_INLINE Packet8h pmul<Packet8h>(const Packet8h& a, const Packet8h& b) {

+  return _mm_castph_si128(_mm_mul_ph(_mm_castsi128_ph(a), _mm_castsi128_ph(b)));

+}

+

+// pdiv

+

+template <>

+EIGEN_STRONG_INLINE Packet32h pdiv<Packet32h>(const Packet32h& a, const Packet32h& b) {

+  return _mm512_div_ph(a, b);

+}

+

+template <>

+EIGEN_STRONG_INLINE Packet16h pdiv<Packet16h>(const Packet16h& a, const Packet16h& b) {

+  return _mm256_castph_si256(_mm256_div_ph(_mm256_castsi256_ph(a), _mm256_castsi256_ph(b)));

+}

+

+template <>

+EIGEN_STRONG_INLINE Packet8h pdiv<Packet8h>(const Packet8h& a, const Packet8h& b) {

+  return _mm_castph_si128(_mm_div_ph(_mm_castsi128_ph(a), _mm_castsi128_ph(b)));

+}

+

+// pround

+

+template <>

+EIGEN_STRONG_INLINE Packet32h pround<Packet32h>(const Packet32h& a) {

+  // Work-around for default std::round rounding mode.

+

+  // Mask for the sign bit

+  const Packet32h signMask = pset1frombits<Packet32h>(static_cast<numext::uint16_t>(0x8000u));

+  // The largest half-preicision float less than 0.5

+  const Packet32h prev0dot5 = pset1frombits<Packet32h>(static_cast<numext::uint16_t>(0x37FFu));

+

+  return _mm512_roundscale_ph(padd(por(pand(a, signMask), prev0dot5), a), _MM_FROUND_TO_ZERO);

+}

+

+// print

+

+template <>

+EIGEN_STRONG_INLINE Packet32h print<Packet32h>(const Packet32h& a) {

+  return _mm512_roundscale_ph(a, _MM_FROUND_CUR_DIRECTION);

+}

+

+// pceil

+

+template <>

+EIGEN_STRONG_INLINE Packet32h pceil<Packet32h>(const Packet32h& a) {

+  return _mm512_roundscale_ph(a, _MM_FROUND_TO_POS_INF);

+}

+

+// pfloor

+

+template <>

+EIGEN_STRONG_INLINE Packet32h pfloor<Packet32h>(const Packet32h& a) {

+  return _mm512_roundscale_ph(a, _MM_FROUND_TO_NEG_INF);

+}

+

+// predux

+template <>

+EIGEN_STRONG_INLINE half predux<Packet32h>(const Packet32h& a) {

+  return (half)_mm512_reduce_add_ph(a);

+}

+

+template <>

+EIGEN_STRONG_INLINE half predux<Packet16h>(const Packet16h& a) {

+  return (half)_mm256_reduce_add_ph(_mm256_castsi256_ph(a));

+}

+

+template <>

+EIGEN_STRONG_INLINE half predux<Packet8h>(const Packet8h& a) {

+  return (half)_mm_reduce_add_ph(_mm_castsi128_ph(a));

+}

+

+// predux_half_dowto4

+template <>

+EIGEN_STRONG_INLINE Packet16h predux_half_dowto4<Packet32h>(const Packet32h& a) {

+#ifdef EIGEN_VECTORIZE_AVX512DQ

+  __m256i lowHalf = _mm256_castps_si256(_mm512_extractf32x8_ps(_mm512_castph_ps(a), 0));

+  __m256i highHalf = _mm256_castps_si256(_mm512_extractf32x8_ps(_mm512_castph_ps(a), 1));

+

+  return Packet16h(padd<Packet16h>(lowHalf, highHalf));

+#else

+  Eigen::half data[32];

+  _mm512_storeu_ph(data, a);

+

+  __m256i lowHalf = _mm256_castph_si256(_mm256_loadu_ph(data));

+  __m256i highHalf = _mm256_castph_si256(_mm256_loadu_ph(data + 16));

+

+  return Packet16h(padd<Packet16h>(lowHalf, highHalf));

+#endif

+}

+

+// predux_max

+

+// predux_min

+

+// predux_mul

+

+#ifdef EIGEN_VECTORIZE_FMA

+

+// pmadd

+

+template <>

+EIGEN_STRONG_INLINE Packet32h pmadd(const Packet32h& a, const Packet32h& b, const Packet32h& c) {

+  return _mm512_fmadd_ph(a, b, c);

+}

+

+template <>

+EIGEN_STRONG_INLINE Packet16h pmadd(const Packet16h& a, const Packet16h& b, const Packet16h& c) {

+  return _mm256_castph_si256(_mm256_fmadd_ph(_mm256_castsi256_ph(a), _mm256_castsi256_ph(b), _mm256_castsi256_ph(c)));

+}

+

+template <>

+EIGEN_STRONG_INLINE Packet8h pmadd(const Packet8h& a, const Packet8h& b, const Packet8h& c) {

+  return _mm_castph_si128(_mm_fmadd_ph(_mm_castsi128_ph(a), _mm_castsi128_ph(b), _mm_castsi128_ph(c)));

+}

+

+// pmsub

+

+template <>

+EIGEN_STRONG_INLINE Packet32h pmsub(const Packet32h& a, const Packet32h& b, const Packet32h& c) {

+  return _mm512_fmsub_ph(a, b, c);

+}

+

+template <>

+EIGEN_STRONG_INLINE Packet16h pmsub(const Packet16h& a, const Packet16h& b, const Packet16h& c) {

+  return _mm256_castph_si256(_mm256_fmsub_ph(_mm256_castsi256_ph(a), _mm256_castsi256_ph(b), _mm256_castsi256_ph(c)));

+}

+

+template <>

+EIGEN_STRONG_INLINE Packet8h pmsub(const Packet8h& a, const Packet8h& b, const Packet8h& c) {

+  return _mm_castph_si128(_mm_fmsub_ph(_mm_castsi128_ph(a), _mm_castsi128_ph(b), _mm_castsi128_ph(c)));

+}

+

+// pnmadd

+

+template <>

+EIGEN_STRONG_INLINE Packet32h pnmadd(const Packet32h& a, const Packet32h& b, const Packet32h& c) {

+  return _mm512_fnmadd_ph(a, b, c);

+}

+

+template <>

+EIGEN_STRONG_INLINE Packet16h pnmadd(const Packet16h& a, const Packet16h& b, const Packet16h& c) {

+  return _mm256_castph_si256(_mm256_fnmadd_ph(_mm256_castsi256_ph(a), _mm256_castsi256_ph(b), _mm256_castsi256_ph(c)));

+}

+

+template <>

+EIGEN_STRONG_INLINE Packet8h pnmadd(const Packet8h& a, const Packet8h& b, const Packet8h& c) {

+  return _mm_castph_si128(_mm_fnmadd_ph(_mm_castsi128_ph(a), _mm_castsi128_ph(b), _mm_castsi128_ph(c)));

+}

+

+// pnmsub

+

+template <>

+EIGEN_STRONG_INLINE Packet32h pnmsub(const Packet32h& a, const Packet32h& b, const Packet32h& c) {

+  return _mm512_fnmsub_ph(a, b, c);

+}

+

+template <>

+EIGEN_STRONG_INLINE Packet16h pnmsub(const Packet16h& a, const Packet16h& b, const Packet16h& c) {

+  return _mm256_castph_si256(_mm256_fnmsub_ph(_mm256_castsi256_ph(a), _mm256_castsi256_ph(b), _mm256_castsi256_ph(c)));

+}

+

+template <>

+EIGEN_STRONG_INLINE Packet8h pnmsub(const Packet8h& a, const Packet8h& b, const Packet8h& c) {

+  return _mm_castph_si128(_mm_fnmsub_ph(_mm_castsi128_ph(a), _mm_castsi128_ph(b), _mm_castsi128_ph(c)));

+}

+

+#endif

+

+// pnegate

+

+template <>

+EIGEN_STRONG_INLINE Packet32h pnegate<Packet32h>(const Packet32h& a) {

+  return _mm512_sub_ph(_mm512_set1_ph(0.0), a);

+}

+

+// pconj

+

+template <>

+EIGEN_STRONG_INLINE Packet32h pconj<Packet32h>(const Packet32h& a) {

+  return a;

+}

+

+// psqrt

+

+template <>

+EIGEN_STRONG_INLINE Packet32h psqrt<Packet32h>(const Packet32h& a) {

+  return _mm512_sqrt_ph(a);

+}

+

+// prsqrt

+

+template <>

+EIGEN_STRONG_INLINE Packet32h prsqrt<Packet32h>(const Packet32h& a) {

+  return _mm512_rsqrt_ph(a);

+}

+

+// preciprocal

+

+template <>

+EIGEN_STRONG_INLINE Packet32h preciprocal<Packet32h>(const Packet32h& a) {

+  return _mm512_rcp_ph(a);

+}

+

+// ptranspose

+

+EIGEN_DEVICE_FUNC inline void ptranspose(PacketBlock<Packet32h, 32>& a) {

+  __m512i t[32];

+

+  EIGEN_UNROLL_LOOP

+  for (int i = 0; i < 16; i++) {

+    t[2 * i] = _mm512_unpacklo_epi16(_mm512_castph_si512(a.packet[2 * i]), _mm512_castph_si512(a.packet[2 * i + 1]));

+    t[2 * i + 1] =

+        _mm512_unpackhi_epi16(_mm512_castph_si512(a.packet[2 * i]), _mm512_castph_si512(a.packet[2 * i + 1]));

+  }

+

+  __m512i p[32];

+

+  EIGEN_UNROLL_LOOP

+  for (int i = 0; i < 8; i++) {

+    p[4 * i] = _mm512_unpacklo_epi32(t[4 * i], t[4 * i + 2]);

+    p[4 * i + 1] = _mm512_unpackhi_epi32(t[4 * i], t[4 * i + 2]);

+    p[4 * i + 2] = _mm512_unpacklo_epi32(t[4 * i + 1], t[4 * i + 3]);

+    p[4 * i + 3] = _mm512_unpackhi_epi32(t[4 * i + 1], t[4 * i + 3]);

+  }

+

+  __m512i q[32];

+

+  EIGEN_UNROLL_LOOP

+  for (int i = 0; i < 4; i++) {

+    q[8 * i] = _mm512_unpacklo_epi64(p[8 * i], p[8 * i + 4]);

+    q[8 * i + 1] = _mm512_unpackhi_epi64(p[8 * i], p[8 * i + 4]);

+    q[8 * i + 2] = _mm512_unpacklo_epi64(p[8 * i + 1], p[8 * i + 5]);

+    q[8 * i + 3] = _mm512_unpackhi_epi64(p[8 * i + 1], p[8 * i + 5]);

+    q[8 * i + 4] = _mm512_unpacklo_epi64(p[8 * i + 2], p[8 * i + 6]);

+    q[8 * i + 5] = _mm512_unpackhi_epi64(p[8 * i + 2], p[8 * i + 6]);

+    q[8 * i + 6] = _mm512_unpacklo_epi64(p[8 * i + 3], p[8 * i + 7]);

+    q[8 * i + 7] = _mm512_unpackhi_epi64(p[8 * i + 3], p[8 * i + 7]);

+  }

+

+  __m512i f[32];

+

+#define PACKET32H_TRANSPOSE_HELPER(X, Y)                                                            \

+  do {                                                                                              \

+    f[Y * 8] = _mm512_inserti32x4(f[Y * 8], _mm512_extracti32x4_epi32(q[X * 8], Y), X);             \

+    f[Y * 8 + 1] = _mm512_inserti32x4(f[Y * 8 + 1], _mm512_extracti32x4_epi32(q[X * 8 + 1], Y), X); \

+    f[Y * 8 + 2] = _mm512_inserti32x4(f[Y * 8 + 2], _mm512_extracti32x4_epi32(q[X * 8 + 2], Y), X); \

+    f[Y * 8 + 3] = _mm512_inserti32x4(f[Y * 8 + 3], _mm512_extracti32x4_epi32(q[X * 8 + 3], Y), X); \

+    f[Y * 8 + 4] = _mm512_inserti32x4(f[Y * 8 + 4], _mm512_extracti32x4_epi32(q[X * 8 + 4], Y), X); \

+    f[Y * 8 + 5] = _mm512_inserti32x4(f[Y * 8 + 5], _mm512_extracti32x4_epi32(q[X * 8 + 5], Y), X); \

+    f[Y * 8 + 6] = _mm512_inserti32x4(f[Y * 8 + 6], _mm512_extracti32x4_epi32(q[X * 8 + 6], Y), X); \

+    f[Y * 8 + 7] = _mm512_inserti32x4(f[Y * 8 + 7], _mm512_extracti32x4_epi32(q[X * 8 + 7], Y), X); \

+  } while (false);

+

+  PACKET32H_TRANSPOSE_HELPER(0, 0);

+  PACKET32H_TRANSPOSE_HELPER(1, 1);

+  PACKET32H_TRANSPOSE_HELPER(2, 2);

+  PACKET32H_TRANSPOSE_HELPER(3, 3);

+

+  PACKET32H_TRANSPOSE_HELPER(1, 0);

+  PACKET32H_TRANSPOSE_HELPER(2, 0);

+  PACKET32H_TRANSPOSE_HELPER(3, 0);

+  PACKET32H_TRANSPOSE_HELPER(2, 1);

+  PACKET32H_TRANSPOSE_HELPER(3, 1);

+  PACKET32H_TRANSPOSE_HELPER(3, 2);

+

+  PACKET32H_TRANSPOSE_HELPER(0, 1);

+  PACKET32H_TRANSPOSE_HELPER(0, 2);

+  PACKET32H_TRANSPOSE_HELPER(0, 3);

+  PACKET32H_TRANSPOSE_HELPER(1, 2);

+  PACKET32H_TRANSPOSE_HELPER(1, 3);

+  PACKET32H_TRANSPOSE_HELPER(2, 3);

+

+#undef PACKET32H_TRANSPOSE_HELPER

+

+  EIGEN_UNROLL_LOOP

+  for (int i = 0; i < 32; i++) {

+    a.packet[i] = _mm512_castsi512_ph(f[i]);

+  }

+}

+

+EIGEN_DEVICE_FUNC inline void ptranspose(PacketBlock<Packet32h, 4>& a) {

+  __m512i p0, p1, p2, p3, t0, t1, t2, t3, a0, a1, a2, a3;

+  t0 = _mm512_unpacklo_epi16(_mm512_castph_si512(a.packet[0]), _mm512_castph_si512(a.packet[1]));

+  t1 = _mm512_unpackhi_epi16(_mm512_castph_si512(a.packet[0]), _mm512_castph_si512(a.packet[1]));

+  t2 = _mm512_unpacklo_epi16(_mm512_castph_si512(a.packet[2]), _mm512_castph_si512(a.packet[3]));

+  t3 = _mm512_unpackhi_epi16(_mm512_castph_si512(a.packet[2]), _mm512_castph_si512(a.packet[3]));

+

+  p0 = _mm512_unpacklo_epi32(t0, t2);

+  p1 = _mm512_unpackhi_epi32(t0, t2);

+  p2 = _mm512_unpacklo_epi32(t1, t3);

+  p3 = _mm512_unpackhi_epi32(t1, t3);

+

+  a0 = p0;

+  a1 = p1;

+  a2 = p2;

+  a3 = p3;

+

+  a0 = _mm512_inserti32x4(a0, _mm512_extracti32x4_epi32(p1, 0), 1);

+  a1 = _mm512_inserti32x4(a1, _mm512_extracti32x4_epi32(p0, 1), 0);

+

+  a0 = _mm512_inserti32x4(a0, _mm512_extracti32x4_epi32(p2, 0), 2);

+  a2 = _mm512_inserti32x4(a2, _mm512_extracti32x4_epi32(p0, 2), 0);

+

+  a0 = _mm512_inserti32x4(a0, _mm512_extracti32x4_epi32(p3, 0), 3);

+  a3 = _mm512_inserti32x4(a3, _mm512_extracti32x4_epi32(p0, 3), 0);

+

+  a1 = _mm512_inserti32x4(a1, _mm512_extracti32x4_epi32(p2, 1), 2);

+  a2 = _mm512_inserti32x4(a2, _mm512_extracti32x4_epi32(p1, 2), 1);

+

+  a2 = _mm512_inserti32x4(a2, _mm512_extracti32x4_epi32(p3, 2), 3);

+  a3 = _mm512_inserti32x4(a3, _mm512_extracti32x4_epi32(p2, 3), 2);

+

+  a1 = _mm512_inserti32x4(a1, _mm512_extracti32x4_epi32(p3, 1), 3);

+  a3 = _mm512_inserti32x4(a3, _mm512_extracti32x4_epi32(p1, 3), 1);

+

+  a.packet[0] = _mm512_castsi512_ph(a0);

+  a.packet[1] = _mm512_castsi512_ph(a1);

+  a.packet[2] = _mm512_castsi512_ph(a2);

+  a.packet[3] = _mm512_castsi512_ph(a3);

+}

+

+// preverse

+

+template <>

+EIGEN_STRONG_INLINE Packet32h preverse(const Packet32h& a) {

+  return _mm512_permutexvar_ph(_mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,

+                                                20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31),

+                               a);

+}

+

+// pscatter

+

+template <>

+EIGEN_STRONG_INLINE void pscatter<half, Packet32h>(half* to, const Packet32h& from, Index stride) {

+  EIGEN_ALIGN64 half aux[32];

+  pstore(aux, from);

+

+  EIGEN_UNROLL_LOOP

+  for (int i = 0; i < 32; i++) {

+    to[stride * i] = aux[i];

+  }

+}

+

+// pgather

+

+template <>

+EIGEN_STRONG_INLINE Packet32h pgather<Eigen::half, Packet32h>(const Eigen::half* from, Index stride) {

+  return _mm512_castsi512_ph(_mm512_set_epi16(

+      from[31 * stride].x, from[30 * stride].x, from[29 * stride].x, from[28 * stride].x, from[27 * stride].x,

+      from[26 * stride].x, from[25 * stride].x, from[24 * stride].x, from[23 * stride].x, from[22 * stride].x,

+      from[21 * stride].x, from[20 * stride].x, from[19 * stride].x, from[18 * stride].x, from[17 * stride].x,

+      from[16 * stride].x, from[15 * stride].x, from[14 * stride].x, from[13 * stride].x, from[12 * stride].x,

+      from[11 * stride].x, from[10 * stride].x, from[9 * stride].x, from[8 * stride].x, from[7 * stride].x,

+      from[6 * stride].x, from[5 * stride].x, from[4 * stride].x, from[3 * stride].x, from[2 * stride].x,

+      from[1 * stride].x, from[0 * stride].x));

+}

+

+template <>

+EIGEN_STRONG_INLINE Packet16h pcos<Packet16h>(const Packet16h&);

+template <>

+EIGEN_STRONG_INLINE Packet16h psin<Packet16h>(const Packet16h&);

+template <>

+EIGEN_STRONG_INLINE Packet16h plog<Packet16h>(const Packet16h&);

+template <>

+EIGEN_STRONG_INLINE Packet16h plog2<Packet16h>(const Packet16h&);

+template <>

+EIGEN_STRONG_INLINE Packet16h plog1p<Packet16h>(const Packet16h&);

+template <>

+EIGEN_STRONG_INLINE Packet16h pexp<Packet16h>(const Packet16h&);

+template <>

+EIGEN_STRONG_INLINE Packet16h pexpm1<Packet16h>(const Packet16h&);

+template <>

+EIGEN_STRONG_INLINE Packet16h ptanh<Packet16h>(const Packet16h&);

+template <>

+EIGEN_STRONG_INLINE Packet16h pfrexp<Packet16h>(const Packet16h&, Packet16h&);

+template <>

+EIGEN_STRONG_INLINE Packet16h pldexp<Packet16h>(const Packet16h&, const Packet16h&);

+

+EIGEN_STRONG_INLINE Packet32h combine2Packet16h(const Packet16h& a, const Packet16h& b) {

+  __m512d result = _mm512_undefined_pd();

+  result = _mm512_insertf64x4(result, _mm256_castsi256_pd(a), 0);

+  result = _mm512_insertf64x4(result, _mm256_castsi256_pd(b), 1);

+  return _mm512_castpd_ph(result);

+}

+

+EIGEN_STRONG_INLINE void extract2Packet16h(const Packet32h& x, Packet16h& a, Packet16h& b) {

+  a = _mm256_castpd_si256(_mm512_extractf64x4_pd(_mm512_castph_pd(x), 0));

+  b = _mm256_castpd_si256(_mm512_extractf64x4_pd(_mm512_castph_pd(x), 1));

+}

+

+// psin

+template <>

+EIGEN_STRONG_INLINE Packet32h psin<Packet32h>(const Packet32h& a) {

+  Packet16h low;

+  Packet16h high;

+  extract2Packet16h(a, low, high);

+

+  Packet16h lowOut = psin(low);

+  Packet16h highOut = psin(high);

+

+  return combine2Packet16h(lowOut, highOut);

+}

+

+// pcos

+template <>

+EIGEN_STRONG_INLINE Packet32h pcos<Packet32h>(const Packet32h& a) {

+  Packet16h low;

+  Packet16h high;

+  extract2Packet16h(a, low, high);

+

+  Packet16h lowOut = pcos(low);

+  Packet16h highOut = pcos(high);

+

+  return combine2Packet16h(lowOut, highOut);

+}

+

+// plog

+template <>

+EIGEN_STRONG_INLINE Packet32h plog<Packet32h>(const Packet32h& a) {

+  Packet16h low;

+  Packet16h high;

+  extract2Packet16h(a, low, high);

+

+  Packet16h lowOut = plog(low);

+  Packet16h highOut = plog(high);

+

+  return combine2Packet16h(lowOut, highOut);

+}

+

+// plog2

+template <>

+EIGEN_STRONG_INLINE Packet32h plog2<Packet32h>(const Packet32h& a) {

+  Packet16h low;

+  Packet16h high;

+  extract2Packet16h(a, low, high);

+

+  Packet16h lowOut = plog2(low);

+  Packet16h highOut = plog2(high);

+

+  return combine2Packet16h(lowOut, highOut);

+}

+

+// plog1p

+template <>

+EIGEN_STRONG_INLINE Packet32h plog1p<Packet32h>(const Packet32h& a) {

+  Packet16h low;

+  Packet16h high;

+  extract2Packet16h(a, low, high);

+

+  Packet16h lowOut = plog1p(low);

+  Packet16h highOut = plog1p(high);

+

+  return combine2Packet16h(lowOut, highOut);

+}

+

+// pexp

+template <>

+EIGEN_STRONG_INLINE Packet32h pexp<Packet32h>(const Packet32h& a) {

+  Packet16h low;

+  Packet16h high;

+  extract2Packet16h(a, low, high);

+

+  Packet16h lowOut = pexp(low);

+  Packet16h highOut = pexp(high);

+

+  return combine2Packet16h(lowOut, highOut);

+}

+

+// pexpm1

+template <>

+EIGEN_STRONG_INLINE Packet32h pexpm1<Packet32h>(const Packet32h& a) {

+  Packet16h low;

+  Packet16h high;

+  extract2Packet16h(a, low, high);

+

+  Packet16h lowOut = pexpm1(low);

+  Packet16h highOut = pexpm1(high);

+

+  return combine2Packet16h(lowOut, highOut);

+}

+

+// ptanh

+template <>

+EIGEN_STRONG_INLINE Packet32h ptanh<Packet32h>(const Packet32h& a) {

+  Packet16h low;

+  Packet16h high;

+  extract2Packet16h(a, low, high);

+

+  Packet16h lowOut = ptanh(low);

+  Packet16h highOut = ptanh(high);

+

+  return combine2Packet16h(lowOut, highOut);

+}

+

+// pfrexp

+template <>

+EIGEN_STRONG_INLINE Packet32h pfrexp<Packet32h>(const Packet32h& a, Packet32h& exponent) {

+  Packet16h low;

+  Packet16h high;

+  extract2Packet16h(a, low, high);

+

+  Packet16h exp1 = _mm256_undefined_si256();

+  Packet16h exp2 = _mm256_undefined_si256();

+

+  Packet16h lowOut = pfrexp(low, exp1);

+  Packet16h highOut = pfrexp(high, exp2);

+

+  exponent = combine2Packet16h(exp1, exp2);

+

+  return combine2Packet16h(lowOut, highOut);

+}

+

+// pldexp

+template <>

+EIGEN_STRONG_INLINE Packet32h pldexp<Packet32h>(const Packet32h& a, const Packet32h& exponent) {

+  Packet16h low;

+  Packet16h high;

+  extract2Packet16h(a, low, high);

+

+  Packet16h exp1;

+  Packet16h exp2;

+  extract2Packet16h(exponent, exp1, exp2);

+

+  Packet16h lowOut = pldexp(low, exp1);

+  Packet16h highOut = pldexp(high, exp2);

+

+  return combine2Packet16h(lowOut, highOut);

+}

+

+}  // end namespace internal

+}  // end namespace Eigen

+

+#endif  // EIGEN_PACKET_MATH_FP16_AVX512_H


diff --git a/Eigen/src/Core/arch/AVX512/TypeCasting.h b/Eigen/src/Core/arch/AVX512/TypeCasting.h
index 56a94f4..ccdb563 100644
--- a/Eigen/src/Core/arch/AVX512/TypeCasting.h
+++ b/Eigen/src/Core/arch/AVX512/TypeCasting.h

@@ -37,10 +37,12 @@
 template <>
 struct type_casting_traits<int, double> : vectorized_type_casting_traits<int, double> {};
 
+#ifndef EIGEN_VECTORIZE_AVX512FP16
 template <>
 struct type_casting_traits<half, float> : vectorized_type_casting_traits<half, float> {};
 template <>
 struct type_casting_traits<float, half> : vectorized_type_casting_traits<float, half> {};
+#endif
 
 template <>
 struct type_casting_traits<bfloat16, float> : vectorized_type_casting_traits<bfloat16, float> {};

diff --git a/Eigen/src/Core/arch/AltiVec/PacketMath.h b/Eigen/src/Core/arch/AltiVec/PacketMath.h
index 414f05c..a4b134c 100644
--- a/Eigen/src/Core/arch/AltiVec/PacketMath.h
+++ b/Eigen/src/Core/arch/AltiVec/PacketMath.h

@@ -105,6 +105,7 @@
 static const Packet16uc p16uc_DUPLICATE16_ODD = {2, 3, 2, 3, 6, 7, 6, 7, 10, 11, 10, 11, 14, 15, 14, 15};
 
 static Packet16uc p16uc_QUADRUPLICATE16_HI = {0, 1, 0, 1, 0, 1, 0, 1, 2, 3, 2, 3, 2, 3, 2, 3};
+static Packet16uc p16uc_QUADRUPLICATE16 = {0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3};
 
 static Packet16uc p16uc_MERGEE16 = {0, 1, 16, 17, 4, 5, 20, 21, 8, 9, 24, 25, 12, 13, 28, 29};
 static Packet16uc p16uc_MERGEO16 = {2, 3, 18, 19, 6, 7, 22, 23, 10, 11, 26, 27, 14, 15, 30, 31};
@@ -1716,6 +1717,26 @@
   return vec_mergeh(p, p);
 }
 
+template <>
+EIGEN_STRONG_INLINE Packet16c ploadquad<Packet16c>(const signed char* from) {
+  Packet16c p;
+  if ((std::ptrdiff_t(from) % 16) == 0)
+    p = pload<Packet16c>(from);
+  else
+    p = ploadu<Packet16c>(from);
+  return vec_perm(p, p, p16uc_QUADRUPLICATE16);
+}
+
+template <>
+EIGEN_STRONG_INLINE Packet16uc ploadquad<Packet16uc>(const unsigned char* from) {
+  Packet16uc p;
+  if ((std::ptrdiff_t(from) % 16) == 0)
+    p = pload<Packet16uc>(from);
+  else
+    p = ploadu<Packet16uc>(from);
+  return vec_perm(p, p, p16uc_QUADRUPLICATE16);
+}
+
 template <typename Packet>
 EIGEN_STRONG_INLINE void pstoreu_common(__UNPACK_TYPE__(Packet) * to, const Packet& from) {
   EIGEN_DEBUG_UNALIGNED_STORE

diff --git a/Eigen/src/Core/arch/Default/BFloat16.h b/Eigen/src/Core/arch/Default/BFloat16.h
index 68b48f9..be44909 100644
--- a/Eigen/src/Core/arch/Default/BFloat16.h
+++ b/Eigen/src/Core/arch/Default/BFloat16.h

@@ -677,16 +677,22 @@
 namespace internal {
 
 template <>
-struct random_default_impl<bfloat16, false, false> {
-  static inline bfloat16 run(const bfloat16& x, const bfloat16& y) {
-    return x + (y - x) * bfloat16(float(std::rand()) / float(RAND_MAX));
-  }
-  static inline bfloat16 run() { return run(bfloat16(-1.f), bfloat16(1.f)); }
+struct is_arithmetic<bfloat16> {
+  enum { value = true };
 };
 
 template <>
-struct is_arithmetic<bfloat16> {
-  enum { value = true };
+struct random_impl<bfloat16> {
+  enum : int { MantissaBits = 7 };
+  using Impl = random_impl<float>;
+  static EIGEN_DEVICE_FUNC inline bfloat16 run(const bfloat16& x, const bfloat16& y) {
+    float result = Impl::run(x, y, MantissaBits);
+    return bfloat16(result);
+  }
+  static EIGEN_DEVICE_FUNC inline bfloat16 run() {
+    float result = Impl::run(MantissaBits);
+    return bfloat16(result);
+  }
 };
 
 }  // namespace internal

diff --git a/Eigen/src/Core/arch/Default/Half.h b/Eigen/src/Core/arch/Default/Half.h
index 92516c7..17d534d 100644
--- a/Eigen/src/Core/arch/Default/Half.h
+++ b/Eigen/src/Core/arch/Default/Half.h

@@ -762,16 +762,22 @@
 namespace internal {
 
 template <>
-struct random_default_impl<half, false, false> {
-  static inline half run(const half& x, const half& y) {
-    return x + (y - x) * half(float(std::rand()) / float(RAND_MAX));
-  }
-  static inline half run() { return run(half(-1.f), half(1.f)); }
+struct is_arithmetic<half> {
+  enum { value = true };
 };
 
 template <>
-struct is_arithmetic<half> {
-  enum { value = true };
+struct random_impl<half> {
+  enum : int { MantissaBits = 10 };
+  using Impl = random_impl<float>;
+  static EIGEN_DEVICE_FUNC inline half run(const half& x, const half& y) {
+    float result = Impl::run(x, y, MantissaBits);
+    return half(result);
+  }
+  static EIGEN_DEVICE_FUNC inline half run() {
+    float result = Impl::run(MantissaBits);
+    return half(result);
+  }
 };
 
 }  // end namespace internal

diff --git a/Eigen/src/Core/arch/SSE/PacketMath.h b/Eigen/src/Core/arch/SSE/PacketMath.h
index be8183c..bdbf759 100644
--- a/Eigen/src/Core/arch/SSE/PacketMath.h
+++ b/Eigen/src/Core/arch/SSE/PacketMath.h

@@ -527,7 +527,7 @@
 
 template <>
 EIGEN_STRONG_INLINE Packet16b pnegate(const Packet16b& a) {
-  return psub(pset1<Packet16b>(false), a);
+  return a;
 }
 
 template <>
@@ -677,18 +677,6 @@
 EIGEN_DEVICE_FUNC inline Packet2d pselect(const Packet2d& mask, const Packet2d& a, const Packet2d& b) {
   return _mm_blendv_pd(b, a, mask);
 }
-
-template <>
-EIGEN_DEVICE_FUNC inline Packet16b pselect(const Packet16b& mask, const Packet16b& a, const Packet16b& b) {
-  return _mm_blendv_epi8(b, a, mask);
-}
-#else
-template <>
-EIGEN_DEVICE_FUNC inline Packet16b pselect(const Packet16b& mask, const Packet16b& a, const Packet16b& b) {
-  Packet16b a_part = _mm_and_si128(mask, a);
-  Packet16b b_part = _mm_andnot_si128(mask, b);
-  return _mm_or_si128(a_part, b_part);
-}
 #endif
 
 template <>
@@ -696,8 +684,8 @@
   return _mm_cmpeq_epi32(a, a);
 }
 template <>
-EIGEN_STRONG_INLINE Packet16b ptrue<Packet16b>(const Packet16b& a) {
-  return _mm_cmpeq_epi8(a, a);
+EIGEN_STRONG_INLINE Packet16b ptrue<Packet16b>(const Packet16b& /*a*/) {
+  return pset1<Packet16b>(true);
 }
 template <>
 EIGEN_STRONG_INLINE Packet4f ptrue<Packet4f>(const Packet4f& a) {
@@ -838,7 +826,9 @@
 }
 template <>
 EIGEN_STRONG_INLINE Packet16b pcmp_eq(const Packet16b& a, const Packet16b& b) {
-  return _mm_cmpeq_epi8(a, b);
+  // Mask out invalid bool bits to avoid UB.
+  const Packet16b kBoolMask = pset1<Packet16b>(true);
+  return _mm_and_si128(_mm_cmpeq_epi8(a, b), kBoolMask);
 }
 template <>
 EIGEN_STRONG_INLINE Packet4i pcmp_le(const Packet4i& a, const Packet4i& b) {
@@ -1377,7 +1367,7 @@
 }
 template <>
 EIGEN_STRONG_INLINE void pstoreu<bool>(bool* to, const Packet16b& from) {
-  EIGEN_DEBUG_ALIGNED_STORE _mm_storeu_si128(reinterpret_cast<__m128i*>(to), from);
+  EIGEN_DEBUG_UNALIGNED_STORE _mm_storeu_si128(reinterpret_cast<__m128i*>(to), from);
 }
 
 template <typename Scalar, typename Packet>

diff --git a/Eigen/src/Core/products/GeneralBlockPanelKernel.h b/Eigen/src/Core/products/GeneralBlockPanelKernel.h
index 647a7dd..c4fa771 100644
--- a/Eigen/src/Core/products/GeneralBlockPanelKernel.h
+++ b/Eigen/src/Core/products/GeneralBlockPanelKernel.h

@@ -140,7 +140,7 @@
     typedef typename Traits::ResScalar ResScalar;
     enum {
       kdiv = KcFactor * (Traits::mr * sizeof(LhsScalar) + Traits::nr * sizeof(RhsScalar)),
-      ksub = Traits::mr * Traits::nr * sizeof(ResScalar),
+      ksub = Traits::mr * (Traits::nr * sizeof(ResScalar)),
       kr = 8,
       mr = Traits::mr,
       nr = Traits::nr
@@ -197,7 +197,7 @@
     enum {
       k_peeling = 8,
       k_div = KcFactor * (Traits::mr * sizeof(LhsScalar) + Traits::nr * sizeof(RhsScalar)),
-      k_sub = Traits::mr * Traits::nr * sizeof(ResScalar)
+      k_sub = Traits::mr * (Traits::nr * sizeof(ResScalar))
     };
 
     // ---- 1st level of blocking on L1, yields kc ----
@@ -2399,7 +2399,7 @@
         // fails, drop down to the scalar path.
         constexpr bool kCanLoadSRhsQuad =
             (unpacket_traits<SLhsPacket>::size < 4) ||
-            (unpacket_traits<SRhsPacket>::size % (unpacket_traits<SLhsPacket>::size / 4)) == 0;
+            (unpacket_traits<SRhsPacket>::size % ((std::max<int>)(unpacket_traits<SLhsPacket>::size, 4) / 4)) == 0;
         if (kCanLoadSRhsQuad && (SwappedTraits::LhsProgress % 4) == 0 && (SwappedTraits::LhsProgress <= 16) &&
             (SwappedTraits::LhsProgress != 8 || SResPacketHalfSize == nr) &&
             (SwappedTraits::LhsProgress != 16 || SResPacketQuarterSize == nr)) {

diff --git a/Eigen/src/Core/products/TriangularSolverMatrix.h b/Eigen/src/Core/products/TriangularSolverMatrix.h
index f9b2ad0..2122af9 100644
--- a/Eigen/src/Core/products/TriangularSolverMatrix.h
+++ b/Eigen/src/Core/products/TriangularSolverMatrix.h

@@ -57,7 +57,7 @@
     Index rs = size - k - 1;  // remaining size
     Index s = TriStorageOrder == RowMajor ? (IsLower ? 0 : i + 1) : IsLower ? i + 1 : i - rs;
 
-    Scalar a = (Mode & UnitDiag) ? Scalar(1) : Scalar(1) / conj(tri(i, i));
+    Scalar a = (Mode & UnitDiag) ? Scalar(1) : Scalar(Scalar(1)/conj(tri(i,i)));
     for (Index j = 0; j < otherSize; ++j) {
       if (TriStorageOrder == RowMajor) {
         Scalar b(0);

diff --git a/Eigen/src/Eigenvalues/ComplexEigenSolver.h b/Eigen/src/Eigenvalues/ComplexEigenSolver.h
index a68996a..60a24a8 100644
--- a/Eigen/src/Eigenvalues/ComplexEigenSolver.h
+++ b/Eigen/src/Eigenvalues/ComplexEigenSolver.h

@@ -54,7 +54,7 @@
   enum {
     RowsAtCompileTime = MatrixType::RowsAtCompileTime,
     ColsAtCompileTime = MatrixType::ColsAtCompileTime,
-    Options = MatrixType::Options,
+    Options = internal::traits<MatrixType>::Options,
     MaxRowsAtCompileTime = MatrixType::MaxRowsAtCompileTime,
     MaxColsAtCompileTime = MatrixType::MaxColsAtCompileTime
   };

diff --git a/Eigen/src/Eigenvalues/ComplexSchur.h b/Eigen/src/Eigenvalues/ComplexSchur.h
index 1ec8fb8..a33e46e 100644
--- a/Eigen/src/Eigenvalues/ComplexSchur.h
+++ b/Eigen/src/Eigenvalues/ComplexSchur.h

@@ -59,7 +59,7 @@
   enum {
     RowsAtCompileTime = MatrixType::RowsAtCompileTime,
     ColsAtCompileTime = MatrixType::ColsAtCompileTime,
-    Options = MatrixType::Options,
+    Options = internal::traits<MatrixType>::Options,
     MaxRowsAtCompileTime = MatrixType::MaxRowsAtCompileTime,
     MaxColsAtCompileTime = MatrixType::MaxColsAtCompileTime
   };

diff --git a/Eigen/src/Eigenvalues/EigenSolver.h b/Eigen/src/Eigenvalues/EigenSolver.h
index bb6583a..40830fb 100644
--- a/Eigen/src/Eigenvalues/EigenSolver.h
+++ b/Eigen/src/Eigenvalues/EigenSolver.h

@@ -73,7 +73,7 @@
   enum {
     RowsAtCompileTime = MatrixType::RowsAtCompileTime,
     ColsAtCompileTime = MatrixType::ColsAtCompileTime,
-    Options = MatrixType::Options,
+    Options = internal::traits<MatrixType>::Options,
     MaxRowsAtCompileTime = MatrixType::MaxRowsAtCompileTime,
     MaxColsAtCompileTime = MatrixType::MaxColsAtCompileTime
   };

diff --git a/Eigen/src/Eigenvalues/GeneralizedEigenSolver.h b/Eigen/src/Eigenvalues/GeneralizedEigenSolver.h
index 95954e7..08f1e34 100644
--- a/Eigen/src/Eigenvalues/GeneralizedEigenSolver.h
+++ b/Eigen/src/Eigenvalues/GeneralizedEigenSolver.h

@@ -67,7 +67,7 @@
   enum {
     RowsAtCompileTime = MatrixType::RowsAtCompileTime,
     ColsAtCompileTime = MatrixType::ColsAtCompileTime,
-    Options = MatrixType::Options,
+    Options = internal::traits<MatrixType>::Options,
     MaxRowsAtCompileTime = MatrixType::MaxRowsAtCompileTime,
     MaxColsAtCompileTime = MatrixType::MaxColsAtCompileTime
   };

diff --git a/Eigen/src/Eigenvalues/HessenbergDecomposition.h b/Eigen/src/Eigenvalues/HessenbergDecomposition.h
index 8f3c1b3..f79ee33 100644
--- a/Eigen/src/Eigenvalues/HessenbergDecomposition.h
+++ b/Eigen/src/Eigenvalues/HessenbergDecomposition.h

@@ -66,7 +66,7 @@
   enum {
     Size = MatrixType::RowsAtCompileTime,
     SizeMinusOne = Size == Dynamic ? Dynamic : Size - 1,
-    Options = MatrixType::Options,
+    Options = internal::traits<MatrixType>::Options,
     MaxSize = MatrixType::MaxRowsAtCompileTime,
     MaxSizeMinusOne = MaxSize == Dynamic ? Dynamic : MaxSize - 1
   };

diff --git a/Eigen/src/Eigenvalues/RealQZ.h b/Eigen/src/Eigenvalues/RealQZ.h
index 9fba7ad..3466f51 100644
--- a/Eigen/src/Eigenvalues/RealQZ.h
+++ b/Eigen/src/Eigenvalues/RealQZ.h

@@ -64,7 +64,7 @@
   enum {
     RowsAtCompileTime = MatrixType::RowsAtCompileTime,
     ColsAtCompileTime = MatrixType::ColsAtCompileTime,
-    Options = MatrixType::Options,
+    Options = internal::traits<MatrixType>::Options,
     MaxRowsAtCompileTime = MatrixType::MaxRowsAtCompileTime,
     MaxColsAtCompileTime = MatrixType::MaxColsAtCompileTime
   };

diff --git a/Eigen/src/Eigenvalues/RealSchur.h b/Eigen/src/Eigenvalues/RealSchur.h
index 1ac9af8..970500c 100644
--- a/Eigen/src/Eigenvalues/RealSchur.h
+++ b/Eigen/src/Eigenvalues/RealSchur.h

@@ -61,7 +61,7 @@
   enum {
     RowsAtCompileTime = MatrixType::RowsAtCompileTime,
     ColsAtCompileTime = MatrixType::ColsAtCompileTime,
-    Options = MatrixType::Options,
+    Options = internal::traits<MatrixType>::Options,
     MaxRowsAtCompileTime = MatrixType::MaxRowsAtCompileTime,
     MaxColsAtCompileTime = MatrixType::MaxColsAtCompileTime
   };

diff --git a/Eigen/src/Eigenvalues/SelfAdjointEigenSolver.h b/Eigen/src/Eigenvalues/SelfAdjointEigenSolver.h
index 9511e68..f84da91 100644
--- a/Eigen/src/Eigenvalues/SelfAdjointEigenSolver.h
+++ b/Eigen/src/Eigenvalues/SelfAdjointEigenSolver.h

@@ -85,7 +85,7 @@
   enum {
     Size = MatrixType::RowsAtCompileTime,
     ColsAtCompileTime = MatrixType::ColsAtCompileTime,
-    Options = MatrixType::Options,
+    Options = internal::traits<MatrixType>::Options,
     MaxColsAtCompileTime = MatrixType::MaxColsAtCompileTime
   };
 

diff --git a/Eigen/src/Eigenvalues/Tridiagonalization.h b/Eigen/src/Eigenvalues/Tridiagonalization.h
index 76158e9..9bb791d 100644
--- a/Eigen/src/Eigenvalues/Tridiagonalization.h
+++ b/Eigen/src/Eigenvalues/Tridiagonalization.h

@@ -75,7 +75,7 @@
   enum {
     Size = MatrixType::RowsAtCompileTime,
     SizeMinusOne = Size == Dynamic ? Dynamic : (Size > 1 ? Size - 1 : 1),
-    Options = MatrixType::Options,
+    Options = internal::traits<MatrixType>::Options,
     MaxSize = MatrixType::MaxRowsAtCompileTime,
     MaxSizeMinusOne = MaxSize == Dynamic ? Dynamic : (MaxSize > 1 ? MaxSize - 1 : 1)
   };

diff --git a/Eigen/src/LU/FullPivLU.h b/Eigen/src/LU/FullPivLU.h
index 57d049c..466834a 100644
--- a/Eigen/src/LU/FullPivLU.h
+++ b/Eigen/src/LU/FullPivLU.h

@@ -622,7 +622,7 @@
     // permuting the rows and cols to bring the nonnegligible pivots to the top of
     // the main diagonal. We need that to be able to apply our triangular solvers.
     // FIXME when we get triangularView-for-rectangular-matrices, this can be simplified
-    Matrix<typename MatrixType::Scalar, Dynamic, Dynamic, MatrixType::Options, MaxSmallDimAtCompileTime,
+    Matrix<typename MatrixType::Scalar, Dynamic, Dynamic, traits<MatrixType>::Options, MaxSmallDimAtCompileTime,
            MatrixType::MaxColsAtCompileTime>
         m(dec().matrixLU().block(0, 0, rank(), cols));
     for (Index i = 0; i < rank(); ++i) {

diff --git a/Eigen/src/SVD/BDCSVD.h b/Eigen/src/SVD/BDCSVD.h
index 7948ca3..f80ddc0 100644
--- a/Eigen/src/SVD/BDCSVD.h
+++ b/Eigen/src/SVD/BDCSVD.h

@@ -1126,13 +1126,6 @@
                       << "j=" << j << "\n";
           }
 #endif
-          // Avoid index out of bounds.
-          // Will end up setting zhat(k) = 0.
-          if (i >= k && l == 0) {
-            m_info = NumericalIssue;
-            prod = 0;
-            break;
-          }
           Index j = i < k ? i : l > 0 ? perm(l - 1) : i;
 #ifdef EIGEN_BDCSVD_SANITY_CHECKS
           if (!(dk != Literal(0) || diag(i) != Literal(0))) {
@@ -1205,7 +1198,7 @@
 
 // page 12_13
 // i >= 1, di almost null and zi non null.
-// We use a rotation to zero out zi applied to the left of M
+// We use a rotation to zero out zi applied to the left of M, and set di = 0.
 template <typename MatrixType, int Options>
 void BDCSVD<MatrixType, Options>::deflation43(Index firstCol, Index shift, Index i, Index size) {
   using std::abs;
@@ -1231,9 +1224,8 @@
 }  // end deflation 43
 
 // page 13
-// i,j >= 1, i!=j and |di - dj| < epsilon * norm2(M)
-// We apply two rotations to have zj = 0;
-// TODO deflation44 is still broken and not properly tested
+// i,j >= 1, i > j, and |di - dj| < epsilon * norm2(M)
+// We apply two rotations to have zi = 0, and dj = di.
 template <typename MatrixType, int Options>
 void BDCSVD<MatrixType, Options>::deflation44(Index firstColu, Index firstColm, Index firstRowW, Index firstColW,
                                               Index i, Index j, Index size) {
@@ -1241,9 +1233,10 @@
   using std::conj;
   using std::pow;
   using std::sqrt;
-  RealScalar c = m_computed(firstColm + i, firstColm);
-  RealScalar s = m_computed(firstColm + j, firstColm);
-  RealScalar r = sqrt(numext::abs2(c) + numext::abs2(s));
+  
+  RealScalar s = m_computed(firstColm + i, firstColm);
+  RealScalar c = m_computed(firstColm + j, firstColm);
+  RealScalar r = numext::hypot(c, s);
 #ifdef EIGEN_BDCSVD_DEBUG_VERBOSE
   std::cout << "deflation 4.4: " << i << "," << j << " -> " << c << " " << s << " " << r << " ; "
             << m_computed(firstColm + i - 1, firstColm) << " " << m_computed(firstColm + i, firstColm) << " "
@@ -1253,21 +1246,21 @@
             << m_computed(firstColm + i + 2, firstColm + i + 2) << "\n";
 #endif
   if (numext::is_exactly_zero(r)) {
-    m_computed(firstColm + i, firstColm + i) = m_computed(firstColm + j, firstColm + j);
+    m_computed(firstColm + j, firstColm + j) = m_computed(firstColm + i, firstColm + i);
     return;
   }
   c /= r;
   s /= r;
-  m_computed(firstColm + i, firstColm) = r;
+  m_computed(firstColm + j, firstColm) = r;
   m_computed(firstColm + j, firstColm + j) = m_computed(firstColm + i, firstColm + i);
-  m_computed(firstColm + j, firstColm) = Literal(0);
+  m_computed(firstColm + i, firstColm) = Literal(0);
 
   JacobiRotation<RealScalar> J(c, -s);
   if (m_compU)
-    m_naiveU.middleRows(firstColu, size + 1).applyOnTheRight(firstColu + i, firstColu + j, J);
+    m_naiveU.middleRows(firstColu, size + 1).applyOnTheRight(firstColu + j, firstColu + i, J);
   else
-    m_naiveU.applyOnTheRight(firstColu + i, firstColu + j, J);
-  if (m_compV) m_naiveV.middleRows(firstRowW, size).applyOnTheRight(firstColW + i, firstColW + j, J);
+    m_naiveU.applyOnTheRight(firstColu + j, firstColu + i, J);
+  if (m_compV) m_naiveV.middleRows(firstRowW, size).applyOnTheRight(firstColW + j, firstColW + i, J);
 }  // end deflation 44
 
 // acts on block from (firstCol+shift, firstCol+shift) to (lastCol+shift, lastCol+shift) [inclusive]
@@ -1350,7 +1343,7 @@
 
       // Move deflated diagonal entries at the end.
       for (Index i = 1; i < length; ++i)
-        if (abs(diag(i)) < considerZero) permutation[p++] = i;
+        if (diag(i) < considerZero) permutation[p++] = i;
 
       Index i = 1, j = k + 1;
       for (; p < length; ++p) {
@@ -1369,7 +1362,7 @@
     if (total_deflation) {
       for (Index i = 1; i < length; ++i) {
         Index pi = permutation[i];
-        if (abs(diag(pi)) < considerZero || diag(0) < diag(pi))
+        if (diag(pi) < considerZero || diag(0) < diag(pi))
           permutation[i - 1] = permutation[i];
         else {
           permutation[i - 1] = 0;
@@ -1424,17 +1417,19 @@
   // condition 4.4
   {
     Index i = length - 1;
-    while (i > 0 && (abs(diag(i)) < considerZero || abs(col0(i)) < considerZero)) --i;
+    // Find last non-deflated entry.
+    while (i > 0 && (diag(i) < considerZero || abs(col0(i)) < considerZero)) --i;
+
     for (; i > 1; --i)
-      if ((diag(i) - diag(i - 1)) < NumTraits<RealScalar>::epsilon() * maxDiag) {
+      if ((diag(i) - diag(i - 1)) < epsilon_strict) {
 #ifdef EIGEN_BDCSVD_DEBUG_VERBOSE
         std::cout << "deflation 4.4 with i = " << i << " because " << diag(i) << " - " << diag(i - 1)
                   << " == " << (diag(i) - diag(i - 1)) << " < "
-                  << NumTraits<RealScalar>::epsilon() * /*diag(i)*/ maxDiag << "\n";
+                  << epsilon_strict << "\n";
 #endif
         eigen_internal_assert(abs(diag(i) - diag(i - 1)) < epsilon_coarse &&
                               " diagonal entries are not properly sorted");
-        deflation44(firstCol, firstCol + shift, firstRowW, firstColW, i - 1, i, length);
+        deflation44(firstCol, firstCol + shift, firstRowW, firstColW, i, i - 1, length);
       }
   }
 

diff --git a/Eigen/src/SVD/JacobiSVD.h b/Eigen/src/SVD/JacobiSVD.h
index aec1931..cb41123 100644
--- a/Eigen/src/SVD/JacobiSVD.h
+++ b/Eigen/src/SVD/JacobiSVD.h

@@ -105,7 +105,7 @@
     ColsAtCompileTime = MatrixType::ColsAtCompileTime,
     MaxRowsAtCompileTime = MatrixType::MaxRowsAtCompileTime,
     MaxColsAtCompileTime = MatrixType::MaxColsAtCompileTime,
-    MatrixOptions = MatrixType::Options
+    MatrixOptions = traits<MatrixType>::Options
   };
 
   typedef typename internal::make_proper_matrix_type<Scalar, ColsAtCompileTime, RowsAtCompileTime, MatrixOptions,
@@ -202,7 +202,7 @@
     ColsAtCompileTime = MatrixType::ColsAtCompileTime,
     MaxRowsAtCompileTime = MatrixType::MaxRowsAtCompileTime,
     MaxColsAtCompileTime = MatrixType::MaxColsAtCompileTime,
-    MatrixOptions = MatrixType::Options,
+    MatrixOptions = internal::traits<MatrixType>::Options,
     WorkspaceSize = internal::traits<SVDType>::MatrixVColsAtCompileTime,
     MaxWorkspaceSize = internal::traits<SVDType>::MatrixVMaxColsAtCompileTime
   };
@@ -310,7 +310,7 @@
     ColsAtCompileTime = MatrixType::ColsAtCompileTime,
     MaxRowsAtCompileTime = MatrixType::MaxRowsAtCompileTime,
     MaxColsAtCompileTime = MatrixType::MaxColsAtCompileTime,
-    MatrixOptions = MatrixType::Options,
+    MatrixOptions = internal::traits<MatrixType>::Options,
     WorkspaceSize = internal::traits<SVDType>::MatrixVColsAtCompileTime,
     MaxWorkspaceSize = internal::traits<SVDType>::MatrixVMaxColsAtCompileTime
   };

diff --git a/Eigen/src/SVD/SVDBase.h b/Eigen/src/SVD/SVDBase.h
index ae2843b..5f04647 100644
--- a/Eigen/src/SVD/SVDBase.h
+++ b/Eigen/src/SVD/SVDBase.h

@@ -139,7 +139,7 @@
     MaxRowsAtCompileTime = MatrixType::MaxRowsAtCompileTime,
     MaxColsAtCompileTime = MatrixType::MaxColsAtCompileTime,
     MaxDiagSizeAtCompileTime = internal::min_size_prefer_fixed(MaxRowsAtCompileTime, MaxColsAtCompileTime),
-    MatrixOptions = MatrixType::Options,
+    MatrixOptions = internal::traits<MatrixType>::Options,
     MatrixUColsAtCompileTime = internal::traits<Derived>::MatrixUColsAtCompileTime,
     MatrixVColsAtCompileTime = internal::traits<Derived>::MatrixVColsAtCompileTime,
     MatrixUMaxColsAtCompileTime = internal::traits<Derived>::MatrixUMaxColsAtCompileTime,

diff --git a/Eigen/src/SparseCore/SparseMatrix.h b/Eigen/src/SparseCore/SparseMatrix.h
index 19dd40c..81b0a11 100644
--- a/Eigen/src/SparseCore/SparseMatrix.h
+++ b/Eigen/src/SparseCore/SparseMatrix.h

@@ -58,6 +58,7 @@
     ColsAtCompileTime = Dynamic,
     MaxRowsAtCompileTime = Dynamic,
     MaxColsAtCompileTime = Dynamic,
+    Options = Options_,
     Flags = Options_ | NestByRefBit | LvalueBit | CompressedAccessBit,
     SupportedAccessPatterns = InnerRandomAccessPattern
   };

diff --git a/Eigen/src/misc/Image.h b/Eigen/src/misc/Image.h
index fd1ac99..38d516e 100644
--- a/Eigen/src/misc/Image.h
+++ b/Eigen/src/misc/Image.h

@@ -27,7 +27,7 @@
                  MatrixType::RowsAtCompileTime,  // the image is a subspace of the destination space, whose
                                                  // dimension is the number of rows of the original matrix
                  Dynamic,                        // we don't know at compile time the dimension of the image (the rank)
-                 MatrixType::Options,
+                 traits<MatrixType>::Options,
                  MatrixType::MaxRowsAtCompileTime,  // the image matrix will consist of columns from the original
                                                     // matrix,
                  MatrixType::MaxColsAtCompileTime   // so it has the same number of rows and at most as many columns.

diff --git a/Eigen/src/misc/Kernel.h b/Eigen/src/misc/Kernel.h
index 55c3efe..3ed458b 100644
--- a/Eigen/src/misc/Kernel.h
+++ b/Eigen/src/misc/Kernel.h

@@ -28,7 +28,7 @@
                                                  // is the number of cols of the original matrix
                                                  // so that the product "matrix * kernel = zero" makes sense
                  Dynamic,                        // we don't know at compile-time the dimension of the kernel
-                 MatrixType::Options,
+                 traits<MatrixType>::Options,
                  MatrixType::MaxColsAtCompileTime,  // see explanation for 2nd template parameter
                  MatrixType::MaxColsAtCompileTime   // the kernel is a subspace of the domain space,
                                                     // whose dimension is the number of columns of the original matrix

diff --git a/Eigen/src/misc/blas.h b/Eigen/src/misc/blas.h
index bb133bb..f12bc7c 100644
--- a/Eigen/src/misc/blas.h
+++ b/Eigen/src/misc/blas.h

@@ -7,89 +7,89 @@
 
 /* Level 1 routines */
 
-int BLASFUNC(saxpy)(const int *, const float *, const float *, const int *, float *, const int *);
-int BLASFUNC(daxpy)(const int *, const double *, const double *, const int *, double *, const int *);
-int BLASFUNC(caxpy)(const int *, const float *, const float *, const int *, float *, const int *);
-int BLASFUNC(zaxpy)(const int *, const double *, const double *, const int *, double *, const int *);
+void BLASFUNC(saxpy)(const int *, const float *, const float *, const int *, float *, const int *);
+void BLASFUNC(daxpy)(const int *, const double *, const double *, const int *, double *, const int *);
+void BLASFUNC(caxpy)(const int *, const float *, const float *, const int *, float *, const int *);
+void BLASFUNC(zaxpy)(const int *, const double *, const double *, const int *, double *, const int *);
 
 /* Level 2 routines */
 
-int BLASFUNC(sgemv)(const char *, const int *, const int *, const float *, const float *, const int *, const float *,
-                    const int *, const float *, float *, const int *);
-int BLASFUNC(dgemv)(const char *, const int *, const int *, const double *, const double *, const int *, const double *,
-                    const int *, const double *, double *, const int *);
-int BLASFUNC(cgemv)(const char *, const int *, const int *, const float *, const float *, const int *, const float *,
-                    const int *, const float *, float *, const int *);
-int BLASFUNC(zgemv)(const char *, const int *, const int *, const double *, const double *, const int *, const double *,
-                    const int *, const double *, double *, const int *);
+void BLASFUNC(sgemv)(const char *, const int *, const int *, const float *, const float *, const int *, const float *,
+                     const int *, const float *, float *, const int *);
+void BLASFUNC(dgemv)(const char *, const int *, const int *, const double *, const double *, const int *,
+                     const double *, const int *, const double *, double *, const int *);
+void BLASFUNC(cgemv)(const char *, const int *, const int *, const float *, const float *, const int *, const float *,
+                     const int *, const float *, float *, const int *);
+void BLASFUNC(zgemv)(const char *, const int *, const int *, const double *, const double *, const int *,
+                     const double *, const int *, const double *, double *, const int *);
 
-int BLASFUNC(strmv)(const char *, const char *, const char *, const int *, const float *, const int *, float *,
-                    const int *);
-int BLASFUNC(dtrmv)(const char *, const char *, const char *, const int *, const double *, const int *, double *,
-                    const int *);
-int BLASFUNC(ctrmv)(const char *, const char *, const char *, const int *, const float *, const int *, float *,
-                    const int *);
-int BLASFUNC(ztrmv)(const char *, const char *, const char *, const int *, const double *, const int *, double *,
-                    const int *);
+void BLASFUNC(strmv)(const char *, const char *, const char *, const int *, const float *, const int *, float *,
+                     const int *);
+void BLASFUNC(dtrmv)(const char *, const char *, const char *, const int *, const double *, const int *, double *,
+                     const int *);
+void BLASFUNC(ctrmv)(const char *, const char *, const char *, const int *, const float *, const int *, float *,
+                     const int *);
+void BLASFUNC(ztrmv)(const char *, const char *, const char *, const int *, const double *, const int *, double *,
+                     const int *);
 
-int BLASFUNC(ssymv)(const char *, const int *, const float *, const float *, const int *, const float *, const int *,
-                    const float *, float *, const int *);
-int BLASFUNC(dsymv)(const char *, const int *, const double *, const double *, const int *, const double *, const int *,
-                    const double *, double *, const int *);
+void BLASFUNC(ssymv)(const char *, const int *, const float *, const float *, const int *, const float *, const int *,
+                     const float *, float *, const int *);
+void BLASFUNC(dsymv)(const char *, const int *, const double *, const double *, const int *, const double *,
+                     const int *, const double *, double *, const int *);
 
-int BLASFUNC(chemv)(const char *, const int *, const float *, const float *, const int *, const float *, const int *,
-                    const float *, float *, const int *);
-int BLASFUNC(zhemv)(const char *, const int *, const double *, const double *, const int *, const double *, const int *,
-                    const double *, double *, const int *);
+void BLASFUNC(chemv)(const char *, const int *, const float *, const float *, const int *, const float *, const int *,
+                     const float *, float *, const int *);
+void BLASFUNC(zhemv)(const char *, const int *, const double *, const double *, const int *, const double *,
+                     const int *, const double *, double *, const int *);
 
 /* Level 3 routines */
 
-int BLASFUNC(sgemm)(const char *, const char *, const int *, const int *, const int *, const float *, const float *,
-                    const int *, const float *, const int *, const float *, float *, const int *);
-int BLASFUNC(dgemm)(const char *, const char *, const int *, const int *, const int *, const double *, const double *,
-                    const int *, const double *, const int *, const double *, double *, const int *);
-int BLASFUNC(cgemm)(const char *, const char *, const int *, const int *, const int *, const float *, const float *,
-                    const int *, const float *, const int *, const float *, float *, const int *);
-int BLASFUNC(zgemm)(const char *, const char *, const int *, const int *, const int *, const double *, const double *,
-                    const int *, const double *, const int *, const double *, double *, const int *);
+void BLASFUNC(sgemm)(const char *, const char *, const int *, const int *, const int *, const float *, const float *,
+                     const int *, const float *, const int *, const float *, float *, const int *);
+void BLASFUNC(dgemm)(const char *, const char *, const int *, const int *, const int *, const double *, const double *,
+                     const int *, const double *, const int *, const double *, double *, const int *);
+void BLASFUNC(cgemm)(const char *, const char *, const int *, const int *, const int *, const float *, const float *,
+                     const int *, const float *, const int *, const float *, float *, const int *);
+void BLASFUNC(zgemm)(const char *, const char *, const int *, const int *, const int *, const double *, const double *,
+                     const int *, const double *, const int *, const double *, double *, const int *);
 
-int BLASFUNC(strsm)(const char *, const char *, const char *, const char *, const int *, const int *, const float *,
-                    const float *, const int *, float *, const int *);
-int BLASFUNC(dtrsm)(const char *, const char *, const char *, const char *, const int *, const int *, const double *,
-                    const double *, const int *, double *, const int *);
-int BLASFUNC(ctrsm)(const char *, const char *, const char *, const char *, const int *, const int *, const float *,
-                    const float *, const int *, float *, const int *);
-int BLASFUNC(ztrsm)(const char *, const char *, const char *, const char *, const int *, const int *, const double *,
-                    const double *, const int *, double *, const int *);
+void BLASFUNC(strsm)(const char *, const char *, const char *, const char *, const int *, const int *, const float *,
+                     const float *, const int *, float *, const int *);
+void BLASFUNC(dtrsm)(const char *, const char *, const char *, const char *, const int *, const int *, const double *,
+                     const double *, const int *, double *, const int *);
+void BLASFUNC(ctrsm)(const char *, const char *, const char *, const char *, const int *, const int *, const float *,
+                     const float *, const int *, float *, const int *);
+void BLASFUNC(ztrsm)(const char *, const char *, const char *, const char *, const int *, const int *, const double *,
+                     const double *, const int *, double *, const int *);
 
-int BLASFUNC(strmm)(const char *, const char *, const char *, const char *, const int *, const int *, const float *,
-                    const float *, const int *, float *, const int *);
-int BLASFUNC(dtrmm)(const char *, const char *, const char *, const char *, const int *, const int *, const double *,
-                    const double *, const int *, double *, const int *);
-int BLASFUNC(ctrmm)(const char *, const char *, const char *, const char *, const int *, const int *, const float *,
-                    const float *, const int *, float *, const int *);
-int BLASFUNC(ztrmm)(const char *, const char *, const char *, const char *, const int *, const int *, const double *,
-                    const double *, const int *, double *, const int *);
+void BLASFUNC(strmm)(const char *, const char *, const char *, const char *, const int *, const int *, const float *,
+                     const float *, const int *, float *, const int *);
+void BLASFUNC(dtrmm)(const char *, const char *, const char *, const char *, const int *, const int *, const double *,
+                     const double *, const int *, double *, const int *);
+void BLASFUNC(ctrmm)(const char *, const char *, const char *, const char *, const int *, const int *, const float *,
+                     const float *, const int *, float *, const int *);
+void BLASFUNC(ztrmm)(const char *, const char *, const char *, const char *, const int *, const int *, const double *,
+                     const double *, const int *, double *, const int *);
 
-int BLASFUNC(ssymm)(const char *, const char *, const int *, const int *, const float *, const float *, const int *,
-                    const float *, const int *, const float *, float *, const int *);
-int BLASFUNC(dsymm)(const char *, const char *, const int *, const int *, const double *, const double *, const int *,
-                    const double *, const int *, const double *, double *, const int *);
+void BLASFUNC(ssymm)(const char *, const char *, const int *, const int *, const float *, const float *, const int *,
+                     const float *, const int *, const float *, float *, const int *);
+void BLASFUNC(dsymm)(const char *, const char *, const int *, const int *, const double *, const double *, const int *,
+                     const double *, const int *, const double *, double *, const int *);
 
-int BLASFUNC(ssyrk)(const char *, const char *, const int *, const int *, const float *, const float *, const int *,
-                    const float *, float *, const int *);
-int BLASFUNC(dsyrk)(const char *, const char *, const int *, const int *, const double *, const double *, const int *,
-                    const double *, double *, const int *);
+void BLASFUNC(ssyrk)(const char *, const char *, const int *, const int *, const float *, const float *, const int *,
+                     const float *, float *, const int *);
+void BLASFUNC(dsyrk)(const char *, const char *, const int *, const int *, const double *, const double *, const int *,
+                     const double *, double *, const int *);
 
-int BLASFUNC(chemm)(const char *, const char *, const int *, const int *, const float *, const float *, const int *,
-                    const float *, const int *, const float *, float *, const int *);
-int BLASFUNC(zhemm)(const char *, const char *, const int *, const int *, const double *, const double *, const int *,
-                    const double *, const int *, const double *, double *, const int *);
+void BLASFUNC(chemm)(const char *, const char *, const int *, const int *, const float *, const float *, const int *,
+                     const float *, const int *, const float *, float *, const int *);
+void BLASFUNC(zhemm)(const char *, const char *, const int *, const int *, const double *, const double *, const int *,
+                     const double *, const int *, const double *, double *, const int *);
 
-int BLASFUNC(cherk)(const char *, const char *, const int *, const int *, const float *, const float *, const int *,
-                    const float *, float *, const int *);
-int BLASFUNC(zherk)(const char *, const char *, const int *, const int *, const double *, const double *, const int *,
-                    const double *, double *, const int *);
+void BLASFUNC(cherk)(const char *, const char *, const int *, const int *, const float *, const float *, const int *,
+                     const float *, float *, const int *);
+void BLASFUNC(zherk)(const char *, const char *, const int *, const int *, const double *, const double *, const int *,
+                     const double *, double *, const int *);
 
 #undef BLASFUNC
 }

diff --git a/blas/CMakeLists.txt b/blas/CMakeLists.txt
index c530957..4ae0603 100644
--- a/blas/CMakeLists.txt
+++ b/blas/CMakeLists.txt

@@ -18,7 +18,7 @@
                     f2c/lsame.c   f2c/dspmv.c   f2c/ssbmv.c f2c/chbmv.c
                     f2c/sspmv.c   f2c/zhbmv.c   f2c/chpmv.c f2c/dsbmv.c
                     f2c/zhpmv.c   f2c/dtbmv.c   f2c/stbmv.c f2c/ctbmv.c
-                    f2c/ztbmv.c   f2c/d_cnjg.c  f2c/r_cnjg.c
+                    f2c/ztbmv.c
    )
 
 if (EIGEN_Fortran_COMPILER_WORKS)

diff --git a/blas/blas.h b/blas/blas.h
index 69a9262..8962dc9 100644
--- a/blas/blas.h
+++ b/blas/blas.h

@@ -15,7 +15,7 @@
 typedef unsigned long BLASULONG;
 #endif
 
-int BLASFUNC(xerbla)(const char *, int *info, int);
+void BLASFUNC(xerbla)(const char *, int *info);
 
 float BLASFUNC(sdot)(int *, float *, int *, float *, int *);
 float BLASFUNC(sdsdot)(int *, float *, float *, int *, float *, int *);
@@ -24,34 +24,34 @@
 double BLASFUNC(ddot)(int *, double *, int *, double *, int *);
 double BLASFUNC(qdot)(int *, double *, int *, double *, int *);
 
-int BLASFUNC(cdotuw)(int *, float *, int *, float *, int *, float *);
-int BLASFUNC(cdotcw)(int *, float *, int *, float *, int *, float *);
-int BLASFUNC(zdotuw)(int *, double *, int *, double *, int *, double *);
-int BLASFUNC(zdotcw)(int *, double *, int *, double *, int *, double *);
+void BLASFUNC(cdotuw)(int *, float *, int *, float *, int *, float *);
+void BLASFUNC(cdotcw)(int *, float *, int *, float *, int *, float *);
+void BLASFUNC(zdotuw)(int *, double *, int *, double *, int *, double *);
+void BLASFUNC(zdotcw)(int *, double *, int *, double *, int *, double *);
 
-int BLASFUNC(saxpy)(const int *, const float *, const float *, const int *, float *, const int *);
-int BLASFUNC(daxpy)(const int *, const double *, const double *, const int *, double *, const int *);
-int BLASFUNC(qaxpy)(const int *, const double *, const double *, const int *, double *, const int *);
-int BLASFUNC(caxpy)(const int *, const float *, const float *, const int *, float *, const int *);
-int BLASFUNC(zaxpy)(const int *, const double *, const double *, const int *, double *, const int *);
-int BLASFUNC(xaxpy)(const int *, const double *, const double *, const int *, double *, const int *);
-int BLASFUNC(caxpyc)(const int *, const float *, const float *, const int *, float *, const int *);
-int BLASFUNC(zaxpyc)(const int *, const double *, const double *, const int *, double *, const int *);
-int BLASFUNC(xaxpyc)(const int *, const double *, const double *, const int *, double *, const int *);
+void BLASFUNC(saxpy)(const int *, const float *, const float *, const int *, float *, const int *);
+void BLASFUNC(daxpy)(const int *, const double *, const double *, const int *, double *, const int *);
+void BLASFUNC(qaxpy)(const int *, const double *, const double *, const int *, double *, const int *);
+void BLASFUNC(caxpy)(const int *, const float *, const float *, const int *, float *, const int *);
+void BLASFUNC(zaxpy)(const int *, const double *, const double *, const int *, double *, const int *);
+void BLASFUNC(xaxpy)(const int *, const double *, const double *, const int *, double *, const int *);
+void BLASFUNC(caxpyc)(const int *, const float *, const float *, const int *, float *, const int *);
+void BLASFUNC(zaxpyc)(const int *, const double *, const double *, const int *, double *, const int *);
+void BLASFUNC(xaxpyc)(const int *, const double *, const double *, const int *, double *, const int *);
 
-int BLASFUNC(scopy)(int *, float *, int *, float *, int *);
-int BLASFUNC(dcopy)(int *, double *, int *, double *, int *);
-int BLASFUNC(qcopy)(int *, double *, int *, double *, int *);
-int BLASFUNC(ccopy)(int *, float *, int *, float *, int *);
-int BLASFUNC(zcopy)(int *, double *, int *, double *, int *);
-int BLASFUNC(xcopy)(int *, double *, int *, double *, int *);
+void BLASFUNC(scopy)(int *, float *, int *, float *, int *);
+void BLASFUNC(dcopy)(int *, double *, int *, double *, int *);
+void BLASFUNC(qcopy)(int *, double *, int *, double *, int *);
+void BLASFUNC(ccopy)(int *, float *, int *, float *, int *);
+void BLASFUNC(zcopy)(int *, double *, int *, double *, int *);
+void BLASFUNC(xcopy)(int *, double *, int *, double *, int *);
 
-int BLASFUNC(sswap)(int *, float *, int *, float *, int *);
-int BLASFUNC(dswap)(int *, double *, int *, double *, int *);
-int BLASFUNC(qswap)(int *, double *, int *, double *, int *);
-int BLASFUNC(cswap)(int *, float *, int *, float *, int *);
-int BLASFUNC(zswap)(int *, double *, int *, double *, int *);
-int BLASFUNC(xswap)(int *, double *, int *, double *, int *);
+void BLASFUNC(sswap)(int *, float *, int *, float *, int *);
+void BLASFUNC(dswap)(int *, double *, int *, double *, int *);
+void BLASFUNC(qswap)(int *, double *, int *, double *, int *);
+void BLASFUNC(cswap)(int *, float *, int *, float *, int *);
+void BLASFUNC(zswap)(int *, double *, int *, double *, int *);
+void BLASFUNC(xswap)(int *, double *, int *, double *, int *);
 
 float BLASFUNC(sasum)(int *, float *, int *);
 float BLASFUNC(scasum)(int *, float *, int *);
@@ -116,15 +116,15 @@
 double BLASFUNC(dzmin)(int *, double *, int *);
 double BLASFUNC(qxmin)(int *, double *, int *);
 
-int BLASFUNC(sscal)(int *, float *, float *, int *);
-int BLASFUNC(dscal)(int *, double *, double *, int *);
-int BLASFUNC(qscal)(int *, double *, double *, int *);
-int BLASFUNC(cscal)(int *, float *, float *, int *);
-int BLASFUNC(zscal)(int *, double *, double *, int *);
-int BLASFUNC(xscal)(int *, double *, double *, int *);
-int BLASFUNC(csscal)(int *, float *, float *, int *);
-int BLASFUNC(zdscal)(int *, double *, double *, int *);
-int BLASFUNC(xqscal)(int *, double *, double *, int *);
+void BLASFUNC(sscal)(int *, float *, float *, int *);
+void BLASFUNC(dscal)(int *, double *, double *, int *);
+void BLASFUNC(qscal)(int *, double *, double *, int *);
+void BLASFUNC(cscal)(int *, float *, float *, int *);
+void BLASFUNC(zscal)(int *, double *, double *, int *);
+void BLASFUNC(xscal)(int *, double *, double *, int *);
+void BLASFUNC(csscal)(int *, float *, float *, int *);
+void BLASFUNC(zdscal)(int *, double *, double *, int *);
+void BLASFUNC(xqscal)(int *, double *, double *, int *);
 
 float BLASFUNC(snrm2)(int *, float *, int *);
 float BLASFUNC(scnrm2)(int *, float *, int *);
@@ -134,335 +134,335 @@
 double BLASFUNC(dznrm2)(int *, double *, int *);
 double BLASFUNC(qxnrm2)(int *, double *, int *);
 
-int BLASFUNC(srot)(int *, float *, int *, float *, int *, float *, float *);
-int BLASFUNC(drot)(int *, double *, int *, double *, int *, double *, double *);
-int BLASFUNC(qrot)(int *, double *, int *, double *, int *, double *, double *);
-int BLASFUNC(csrot)(int *, float *, int *, float *, int *, float *, float *);
-int BLASFUNC(zdrot)(int *, double *, int *, double *, int *, double *, double *);
-int BLASFUNC(xqrot)(int *, double *, int *, double *, int *, double *, double *);
+void BLASFUNC(srot)(int *, float *, int *, float *, int *, float *, float *);
+void BLASFUNC(drot)(int *, double *, int *, double *, int *, double *, double *);
+void BLASFUNC(qrot)(int *, double *, int *, double *, int *, double *, double *);
+void BLASFUNC(csrot)(int *, float *, int *, float *, int *, float *, float *);
+void BLASFUNC(zdrot)(int *, double *, int *, double *, int *, double *, double *);
+void BLASFUNC(xqrot)(int *, double *, int *, double *, int *, double *, double *);
 
-int BLASFUNC(srotg)(float *, float *, float *, float *);
-int BLASFUNC(drotg)(double *, double *, double *, double *);
-int BLASFUNC(qrotg)(double *, double *, double *, double *);
-int BLASFUNC(crotg)(float *, float *, float *, float *);
-int BLASFUNC(zrotg)(double *, double *, double *, double *);
-int BLASFUNC(xrotg)(double *, double *, double *, double *);
+void BLASFUNC(srotg)(float *, float *, float *, float *);
+void BLASFUNC(drotg)(double *, double *, double *, double *);
+void BLASFUNC(qrotg)(double *, double *, double *, double *);
+void BLASFUNC(crotg)(float *, float *, float *, float *);
+void BLASFUNC(zrotg)(double *, double *, double *, double *);
+void BLASFUNC(xrotg)(double *, double *, double *, double *);
 
-int BLASFUNC(srotmg)(float *, float *, float *, float *, float *);
-int BLASFUNC(drotmg)(double *, double *, double *, double *, double *);
+void BLASFUNC(srotmg)(float *, float *, float *, float *, float *);
+void BLASFUNC(drotmg)(double *, double *, double *, double *, double *);
 
-int BLASFUNC(srotm)(int *, float *, int *, float *, int *, float *);
-int BLASFUNC(drotm)(int *, double *, int *, double *, int *, double *);
-int BLASFUNC(qrotm)(int *, double *, int *, double *, int *, double *);
+void BLASFUNC(srotm)(int *, float *, int *, float *, int *, float *);
+void BLASFUNC(drotm)(int *, double *, int *, double *, int *, double *);
+void BLASFUNC(qrotm)(int *, double *, int *, double *, int *, double *);
 
 /* Level 2 routines */
 
-int BLASFUNC(sger)(int *, int *, float *, float *, int *, float *, int *, float *, int *);
-int BLASFUNC(dger)(int *, int *, double *, double *, int *, double *, int *, double *, int *);
-int BLASFUNC(qger)(int *, int *, double *, double *, int *, double *, int *, double *, int *);
-int BLASFUNC(cgeru)(int *, int *, float *, float *, int *, float *, int *, float *, int *);
-int BLASFUNC(cgerc)(int *, int *, float *, float *, int *, float *, int *, float *, int *);
-int BLASFUNC(zgeru)(int *, int *, double *, double *, int *, double *, int *, double *, int *);
-int BLASFUNC(zgerc)(int *, int *, double *, double *, int *, double *, int *, double *, int *);
-int BLASFUNC(xgeru)(int *, int *, double *, double *, int *, double *, int *, double *, int *);
-int BLASFUNC(xgerc)(int *, int *, double *, double *, int *, double *, int *, double *, int *);
+void BLASFUNC(sger)(int *, int *, float *, float *, int *, float *, int *, float *, int *);
+void BLASFUNC(dger)(int *, int *, double *, double *, int *, double *, int *, double *, int *);
+void BLASFUNC(qger)(int *, int *, double *, double *, int *, double *, int *, double *, int *);
+void BLASFUNC(cgeru)(int *, int *, float *, float *, int *, float *, int *, float *, int *);
+void BLASFUNC(cgerc)(int *, int *, float *, float *, int *, float *, int *, float *, int *);
+void BLASFUNC(zgeru)(int *, int *, double *, double *, int *, double *, int *, double *, int *);
+void BLASFUNC(zgerc)(int *, int *, double *, double *, int *, double *, int *, double *, int *);
+void BLASFUNC(xgeru)(int *, int *, double *, double *, int *, double *, int *, double *, int *);
+void BLASFUNC(xgerc)(int *, int *, double *, double *, int *, double *, int *, double *, int *);
 
-int BLASFUNC(sgemv)(const char *, const int *, const int *, const float *, const float *, const int *, const float *,
-                    const int *, const float *, float *, const int *);
-int BLASFUNC(dgemv)(const char *, const int *, const int *, const double *, const double *, const int *, const double *,
-                    const int *, const double *, double *, const int *);
-int BLASFUNC(qgemv)(const char *, const int *, const int *, const double *, const double *, const int *, const double *,
-                    const int *, const double *, double *, const int *);
-int BLASFUNC(cgemv)(const char *, const int *, const int *, const float *, const float *, const int *, const float *,
-                    const int *, const float *, float *, const int *);
-int BLASFUNC(zgemv)(const char *, const int *, const int *, const double *, const double *, const int *, const double *,
-                    const int *, const double *, double *, const int *);
-int BLASFUNC(xgemv)(const char *, const int *, const int *, const double *, const double *, const int *, const double *,
-                    const int *, const double *, double *, const int *);
+void BLASFUNC(sgemv)(const char *, const int *, const int *, const float *, const float *, const int *, const float *,
+                     const int *, const float *, float *, const int *);
+void BLASFUNC(dgemv)(const char *, const int *, const int *, const double *, const double *, const int *,
+                     const double *, const int *, const double *, double *, const int *);
+void BLASFUNC(qgemv)(const char *, const int *, const int *, const double *, const double *, const int *,
+                     const double *, const int *, const double *, double *, const int *);
+void BLASFUNC(cgemv)(const char *, const int *, const int *, const float *, const float *, const int *, const float *,
+                     const int *, const float *, float *, const int *);
+void BLASFUNC(zgemv)(const char *, const int *, const int *, const double *, const double *, const int *,
+                     const double *, const int *, const double *, double *, const int *);
+void BLASFUNC(xgemv)(const char *, const int *, const int *, const double *, const double *, const int *,
+                     const double *, const int *, const double *, double *, const int *);
 
-int BLASFUNC(strsv)(const char *, const char *, const char *, const int *, const float *, const int *, float *,
-                    const int *);
-int BLASFUNC(dtrsv)(const char *, const char *, const char *, const int *, const double *, const int *, double *,
-                    const int *);
-int BLASFUNC(qtrsv)(const char *, const char *, const char *, const int *, const double *, const int *, double *,
-                    const int *);
-int BLASFUNC(ctrsv)(const char *, const char *, const char *, const int *, const float *, const int *, float *,
-                    const int *);
-int BLASFUNC(ztrsv)(const char *, const char *, const char *, const int *, const double *, const int *, double *,
-                    const int *);
-int BLASFUNC(xtrsv)(const char *, const char *, const char *, const int *, const double *, const int *, double *,
-                    const int *);
+void BLASFUNC(strsv)(const char *, const char *, const char *, const int *, const float *, const int *, float *,
+                     const int *);
+void BLASFUNC(dtrsv)(const char *, const char *, const char *, const int *, const double *, const int *, double *,
+                     const int *);
+void BLASFUNC(qtrsv)(const char *, const char *, const char *, const int *, const double *, const int *, double *,
+                     const int *);
+void BLASFUNC(ctrsv)(const char *, const char *, const char *, const int *, const float *, const int *, float *,
+                     const int *);
+void BLASFUNC(ztrsv)(const char *, const char *, const char *, const int *, const double *, const int *, double *,
+                     const int *);
+void BLASFUNC(xtrsv)(const char *, const char *, const char *, const int *, const double *, const int *, double *,
+                     const int *);
 
-int BLASFUNC(stpsv)(char *, char *, char *, int *, float *, float *, int *);
-int BLASFUNC(dtpsv)(char *, char *, char *, int *, double *, double *, int *);
-int BLASFUNC(qtpsv)(char *, char *, char *, int *, double *, double *, int *);
-int BLASFUNC(ctpsv)(char *, char *, char *, int *, float *, float *, int *);
-int BLASFUNC(ztpsv)(char *, char *, char *, int *, double *, double *, int *);
-int BLASFUNC(xtpsv)(char *, char *, char *, int *, double *, double *, int *);
+void BLASFUNC(stpsv)(char *, char *, char *, int *, float *, float *, int *);
+void BLASFUNC(dtpsv)(char *, char *, char *, int *, double *, double *, int *);
+void BLASFUNC(qtpsv)(char *, char *, char *, int *, double *, double *, int *);
+void BLASFUNC(ctpsv)(char *, char *, char *, int *, float *, float *, int *);
+void BLASFUNC(ztpsv)(char *, char *, char *, int *, double *, double *, int *);
+void BLASFUNC(xtpsv)(char *, char *, char *, int *, double *, double *, int *);
 
-int BLASFUNC(strmv)(const char *, const char *, const char *, const int *, const float *, const int *, float *,
-                    const int *);
-int BLASFUNC(dtrmv)(const char *, const char *, const char *, const int *, const double *, const int *, double *,
-                    const int *);
-int BLASFUNC(qtrmv)(const char *, const char *, const char *, const int *, const double *, const int *, double *,
-                    const int *);
-int BLASFUNC(ctrmv)(const char *, const char *, const char *, const int *, const float *, const int *, float *,
-                    const int *);
-int BLASFUNC(ztrmv)(const char *, const char *, const char *, const int *, const double *, const int *, double *,
-                    const int *);
-int BLASFUNC(xtrmv)(const char *, const char *, const char *, const int *, const double *, const int *, double *,
-                    const int *);
+void BLASFUNC(strmv)(const char *, const char *, const char *, const int *, const float *, const int *, float *,
+                     const int *);
+void BLASFUNC(dtrmv)(const char *, const char *, const char *, const int *, const double *, const int *, double *,
+                     const int *);
+void BLASFUNC(qtrmv)(const char *, const char *, const char *, const int *, const double *, const int *, double *,
+                     const int *);
+void BLASFUNC(ctrmv)(const char *, const char *, const char *, const int *, const float *, const int *, float *,
+                     const int *);
+void BLASFUNC(ztrmv)(const char *, const char *, const char *, const int *, const double *, const int *, double *,
+                     const int *);
+void BLASFUNC(xtrmv)(const char *, const char *, const char *, const int *, const double *, const int *, double *,
+                     const int *);
 
-int BLASFUNC(stpmv)(char *, char *, char *, int *, float *, float *, int *);
-int BLASFUNC(dtpmv)(char *, char *, char *, int *, double *, double *, int *);
-int BLASFUNC(qtpmv)(char *, char *, char *, int *, double *, double *, int *);
-int BLASFUNC(ctpmv)(char *, char *, char *, int *, float *, float *, int *);
-int BLASFUNC(ztpmv)(char *, char *, char *, int *, double *, double *, int *);
-int BLASFUNC(xtpmv)(char *, char *, char *, int *, double *, double *, int *);
+void BLASFUNC(stpmv)(char *, char *, char *, int *, float *, float *, int *);
+void BLASFUNC(dtpmv)(char *, char *, char *, int *, double *, double *, int *);
+void BLASFUNC(qtpmv)(char *, char *, char *, int *, double *, double *, int *);
+void BLASFUNC(ctpmv)(char *, char *, char *, int *, float *, float *, int *);
+void BLASFUNC(ztpmv)(char *, char *, char *, int *, double *, double *, int *);
+void BLASFUNC(xtpmv)(char *, char *, char *, int *, double *, double *, int *);
 
-int BLASFUNC(stbmv)(char *, char *, char *, int *, int *, float *, int *, float *, int *);
-int BLASFUNC(dtbmv)(char *, char *, char *, int *, int *, double *, int *, double *, int *);
-int BLASFUNC(qtbmv)(char *, char *, char *, int *, int *, double *, int *, double *, int *);
-int BLASFUNC(ctbmv)(char *, char *, char *, int *, int *, float *, int *, float *, int *);
-int BLASFUNC(ztbmv)(char *, char *, char *, int *, int *, double *, int *, double *, int *);
-int BLASFUNC(xtbmv)(char *, char *, char *, int *, int *, double *, int *, double *, int *);
+void BLASFUNC(stbmv)(char *, char *, char *, int *, int *, float *, int *, float *, int *);
+void BLASFUNC(dtbmv)(char *, char *, char *, int *, int *, double *, int *, double *, int *);
+void BLASFUNC(qtbmv)(char *, char *, char *, int *, int *, double *, int *, double *, int *);
+void BLASFUNC(ctbmv)(char *, char *, char *, int *, int *, float *, int *, float *, int *);
+void BLASFUNC(ztbmv)(char *, char *, char *, int *, int *, double *, int *, double *, int *);
+void BLASFUNC(xtbmv)(char *, char *, char *, int *, int *, double *, int *, double *, int *);
 
-int BLASFUNC(stbsv)(char *, char *, char *, int *, int *, float *, int *, float *, int *);
-int BLASFUNC(dtbsv)(char *, char *, char *, int *, int *, double *, int *, double *, int *);
-int BLASFUNC(qtbsv)(char *, char *, char *, int *, int *, double *, int *, double *, int *);
-int BLASFUNC(ctbsv)(char *, char *, char *, int *, int *, float *, int *, float *, int *);
-int BLASFUNC(ztbsv)(char *, char *, char *, int *, int *, double *, int *, double *, int *);
-int BLASFUNC(xtbsv)(char *, char *, char *, int *, int *, double *, int *, double *, int *);
+void BLASFUNC(stbsv)(char *, char *, char *, int *, int *, float *, int *, float *, int *);
+void BLASFUNC(dtbsv)(char *, char *, char *, int *, int *, double *, int *, double *, int *);
+void BLASFUNC(qtbsv)(char *, char *, char *, int *, int *, double *, int *, double *, int *);
+void BLASFUNC(ctbsv)(char *, char *, char *, int *, int *, float *, int *, float *, int *);
+void BLASFUNC(ztbsv)(char *, char *, char *, int *, int *, double *, int *, double *, int *);
+void BLASFUNC(xtbsv)(char *, char *, char *, int *, int *, double *, int *, double *, int *);
 
-int BLASFUNC(ssymv)(const char *, const int *, const float *, const float *, const int *, const float *, const int *,
-                    const float *, float *, const int *);
-int BLASFUNC(dsymv)(const char *, const int *, const double *, const double *, const int *, const double *, const int *,
-                    const double *, double *, const int *);
-int BLASFUNC(qsymv)(const char *, const int *, const double *, const double *, const int *, const double *, const int *,
-                    const double *, double *, const int *);
+void BLASFUNC(ssymv)(const char *, const int *, const float *, const float *, const int *, const float *, const int *,
+                     const float *, float *, const int *);
+void BLASFUNC(dsymv)(const char *, const int *, const double *, const double *, const int *, const double *,
+                     const int *, const double *, double *, const int *);
+void BLASFUNC(qsymv)(const char *, const int *, const double *, const double *, const int *, const double *,
+                     const int *, const double *, double *, const int *);
 
-int BLASFUNC(sspmv)(char *, int *, float *, float *, float *, int *, float *, float *, int *);
-int BLASFUNC(dspmv)(char *, int *, double *, double *, double *, int *, double *, double *, int *);
-int BLASFUNC(qspmv)(char *, int *, double *, double *, double *, int *, double *, double *, int *);
+void BLASFUNC(sspmv)(char *, int *, float *, float *, float *, int *, float *, float *, int *);
+void BLASFUNC(dspmv)(char *, int *, double *, double *, double *, int *, double *, double *, int *);
+void BLASFUNC(qspmv)(char *, int *, double *, double *, double *, int *, double *, double *, int *);
 
-int BLASFUNC(ssyr)(const char *, const int *, const float *, const float *, const int *, float *, const int *);
-int BLASFUNC(dsyr)(const char *, const int *, const double *, const double *, const int *, double *, const int *);
-int BLASFUNC(qsyr)(const char *, const int *, const double *, const double *, const int *, double *, const int *);
+void BLASFUNC(ssyr)(const char *, const int *, const float *, const float *, const int *, float *, const int *);
+void BLASFUNC(dsyr)(const char *, const int *, const double *, const double *, const int *, double *, const int *);
+void BLASFUNC(qsyr)(const char *, const int *, const double *, const double *, const int *, double *, const int *);
 
-int BLASFUNC(ssyr2)(const char *, const int *, const float *, const float *, const int *, const float *, const int *,
-                    float *, const int *);
-int BLASFUNC(dsyr2)(const char *, const int *, const double *, const double *, const int *, const double *, const int *,
-                    double *, const int *);
-int BLASFUNC(qsyr2)(const char *, const int *, const double *, const double *, const int *, const double *, const int *,
-                    double *, const int *);
-int BLASFUNC(csyr2)(const char *, const int *, const float *, const float *, const int *, const float *, const int *,
-                    float *, const int *);
-int BLASFUNC(zsyr2)(const char *, const int *, const double *, const double *, const int *, const double *, const int *,
-                    double *, const int *);
-int BLASFUNC(xsyr2)(const char *, const int *, const double *, const double *, const int *, const double *, const int *,
-                    double *, const int *);
+void BLASFUNC(ssyr2)(const char *, const int *, const float *, const float *, const int *, const float *, const int *,
+                     float *, const int *);
+void BLASFUNC(dsyr2)(const char *, const int *, const double *, const double *, const int *, const double *,
+                     const int *, double *, const int *);
+void BLASFUNC(qsyr2)(const char *, const int *, const double *, const double *, const int *, const double *,
+                     const int *, double *, const int *);
+void BLASFUNC(csyr2)(const char *, const int *, const float *, const float *, const int *, const float *, const int *,
+                     float *, const int *);
+void BLASFUNC(zsyr2)(const char *, const int *, const double *, const double *, const int *, const double *,
+                     const int *, double *, const int *);
+void BLASFUNC(xsyr2)(const char *, const int *, const double *, const double *, const int *, const double *,
+                     const int *, double *, const int *);
 
-int BLASFUNC(sspr)(char *, int *, float *, float *, int *, float *);
-int BLASFUNC(dspr)(char *, int *, double *, double *, int *, double *);
-int BLASFUNC(qspr)(char *, int *, double *, double *, int *, double *);
+void BLASFUNC(sspr)(char *, int *, float *, float *, int *, float *);
+void BLASFUNC(dspr)(char *, int *, double *, double *, int *, double *);
+void BLASFUNC(qspr)(char *, int *, double *, double *, int *, double *);
 
-int BLASFUNC(sspr2)(char *, int *, float *, float *, int *, float *, int *, float *);
-int BLASFUNC(dspr2)(char *, int *, double *, double *, int *, double *, int *, double *);
-int BLASFUNC(qspr2)(char *, int *, double *, double *, int *, double *, int *, double *);
-int BLASFUNC(cspr2)(char *, int *, float *, float *, int *, float *, int *, float *);
-int BLASFUNC(zspr2)(char *, int *, double *, double *, int *, double *, int *, double *);
-int BLASFUNC(xspr2)(char *, int *, double *, double *, int *, double *, int *, double *);
+void BLASFUNC(sspr2)(char *, int *, float *, float *, int *, float *, int *, float *);
+void BLASFUNC(dspr2)(char *, int *, double *, double *, int *, double *, int *, double *);
+void BLASFUNC(qspr2)(char *, int *, double *, double *, int *, double *, int *, double *);
+void BLASFUNC(cspr2)(char *, int *, float *, float *, int *, float *, int *, float *);
+void BLASFUNC(zspr2)(char *, int *, double *, double *, int *, double *, int *, double *);
+void BLASFUNC(xspr2)(char *, int *, double *, double *, int *, double *, int *, double *);
 
-int BLASFUNC(cher)(char *, int *, float *, float *, int *, float *, int *);
-int BLASFUNC(zher)(char *, int *, double *, double *, int *, double *, int *);
-int BLASFUNC(xher)(char *, int *, double *, double *, int *, double *, int *);
+void BLASFUNC(cher)(char *, int *, float *, float *, int *, float *, int *);
+void BLASFUNC(zher)(char *, int *, double *, double *, int *, double *, int *);
+void BLASFUNC(xher)(char *, int *, double *, double *, int *, double *, int *);
 
-int BLASFUNC(chpr)(char *, int *, float *, float *, int *, float *);
-int BLASFUNC(zhpr)(char *, int *, double *, double *, int *, double *);
-int BLASFUNC(xhpr)(char *, int *, double *, double *, int *, double *);
+void BLASFUNC(chpr)(char *, int *, float *, float *, int *, float *);
+void BLASFUNC(zhpr)(char *, int *, double *, double *, int *, double *);
+void BLASFUNC(xhpr)(char *, int *, double *, double *, int *, double *);
 
-int BLASFUNC(cher2)(char *, int *, float *, float *, int *, float *, int *, float *, int *);
-int BLASFUNC(zher2)(char *, int *, double *, double *, int *, double *, int *, double *, int *);
-int BLASFUNC(xher2)(char *, int *, double *, double *, int *, double *, int *, double *, int *);
+void BLASFUNC(cher2)(char *, int *, float *, float *, int *, float *, int *, float *, int *);
+void BLASFUNC(zher2)(char *, int *, double *, double *, int *, double *, int *, double *, int *);
+void BLASFUNC(xher2)(char *, int *, double *, double *, int *, double *, int *, double *, int *);
 
-int BLASFUNC(chpr2)(char *, int *, float *, float *, int *, float *, int *, float *);
-int BLASFUNC(zhpr2)(char *, int *, double *, double *, int *, double *, int *, double *);
-int BLASFUNC(xhpr2)(char *, int *, double *, double *, int *, double *, int *, double *);
+void BLASFUNC(chpr2)(char *, int *, float *, float *, int *, float *, int *, float *);
+void BLASFUNC(zhpr2)(char *, int *, double *, double *, int *, double *, int *, double *);
+void BLASFUNC(xhpr2)(char *, int *, double *, double *, int *, double *, int *, double *);
 
-int BLASFUNC(chemv)(const char *, const int *, const float *, const float *, const int *, const float *, const int *,
-                    const float *, float *, const int *);
-int BLASFUNC(zhemv)(const char *, const int *, const double *, const double *, const int *, const double *, const int *,
-                    const double *, double *, const int *);
-int BLASFUNC(xhemv)(const char *, const int *, const double *, const double *, const int *, const double *, const int *,
-                    const double *, double *, const int *);
+void BLASFUNC(chemv)(const char *, const int *, const float *, const float *, const int *, const float *, const int *,
+                     const float *, float *, const int *);
+void BLASFUNC(zhemv)(const char *, const int *, const double *, const double *, const int *, const double *,
+                     const int *, const double *, double *, const int *);
+void BLASFUNC(xhemv)(const char *, const int *, const double *, const double *, const int *, const double *,
+                     const int *, const double *, double *, const int *);
 
-int BLASFUNC(chpmv)(char *, int *, float *, float *, float *, int *, float *, float *, int *);
-int BLASFUNC(zhpmv)(char *, int *, double *, double *, double *, int *, double *, double *, int *);
-int BLASFUNC(xhpmv)(char *, int *, double *, double *, double *, int *, double *, double *, int *);
+void BLASFUNC(chpmv)(char *, int *, float *, float *, float *, int *, float *, float *, int *);
+void BLASFUNC(zhpmv)(char *, int *, double *, double *, double *, int *, double *, double *, int *);
+void BLASFUNC(xhpmv)(char *, int *, double *, double *, double *, int *, double *, double *, int *);
 
-int BLASFUNC(snorm)(char *, int *, int *, float *, int *);
-int BLASFUNC(dnorm)(char *, int *, int *, double *, int *);
-int BLASFUNC(cnorm)(char *, int *, int *, float *, int *);
-int BLASFUNC(znorm)(char *, int *, int *, double *, int *);
+void BLASFUNC(snorm)(char *, int *, int *, float *, int *);
+void BLASFUNC(dnorm)(char *, int *, int *, double *, int *);
+void BLASFUNC(cnorm)(char *, int *, int *, float *, int *);
+void BLASFUNC(znorm)(char *, int *, int *, double *, int *);
 
-int BLASFUNC(sgbmv)(char *, int *, int *, int *, int *, float *, float *, int *, float *, int *, float *, float *,
-                    int *);
-int BLASFUNC(dgbmv)(char *, int *, int *, int *, int *, double *, double *, int *, double *, int *, double *, double *,
-                    int *);
-int BLASFUNC(qgbmv)(char *, int *, int *, int *, int *, double *, double *, int *, double *, int *, double *, double *,
-                    int *);
-int BLASFUNC(cgbmv)(char *, int *, int *, int *, int *, float *, float *, int *, float *, int *, float *, float *,
-                    int *);
-int BLASFUNC(zgbmv)(char *, int *, int *, int *, int *, double *, double *, int *, double *, int *, double *, double *,
-                    int *);
-int BLASFUNC(xgbmv)(char *, int *, int *, int *, int *, double *, double *, int *, double *, int *, double *, double *,
-                    int *);
+void BLASFUNC(sgbmv)(char *, int *, int *, int *, int *, float *, float *, int *, float *, int *, float *, float *,
+                     int *);
+void BLASFUNC(dgbmv)(char *, int *, int *, int *, int *, double *, double *, int *, double *, int *, double *, double *,
+                     int *);
+void BLASFUNC(qgbmv)(char *, int *, int *, int *, int *, double *, double *, int *, double *, int *, double *, double *,
+                     int *);
+void BLASFUNC(cgbmv)(char *, int *, int *, int *, int *, float *, float *, int *, float *, int *, float *, float *,
+                     int *);
+void BLASFUNC(zgbmv)(char *, int *, int *, int *, int *, double *, double *, int *, double *, int *, double *, double *,
+                     int *);
+void BLASFUNC(xgbmv)(char *, int *, int *, int *, int *, double *, double *, int *, double *, int *, double *, double *,
+                     int *);
 
-int BLASFUNC(ssbmv)(char *, int *, int *, float *, float *, int *, float *, int *, float *, float *, int *);
-int BLASFUNC(dsbmv)(char *, int *, int *, double *, double *, int *, double *, int *, double *, double *, int *);
-int BLASFUNC(qsbmv)(char *, int *, int *, double *, double *, int *, double *, int *, double *, double *, int *);
-int BLASFUNC(csbmv)(char *, int *, int *, float *, float *, int *, float *, int *, float *, float *, int *);
-int BLASFUNC(zsbmv)(char *, int *, int *, double *, double *, int *, double *, int *, double *, double *, int *);
-int BLASFUNC(xsbmv)(char *, int *, int *, double *, double *, int *, double *, int *, double *, double *, int *);
+void BLASFUNC(ssbmv)(char *, int *, int *, float *, float *, int *, float *, int *, float *, float *, int *);
+void BLASFUNC(dsbmv)(char *, int *, int *, double *, double *, int *, double *, int *, double *, double *, int *);
+void BLASFUNC(qsbmv)(char *, int *, int *, double *, double *, int *, double *, int *, double *, double *, int *);
+void BLASFUNC(csbmv)(char *, int *, int *, float *, float *, int *, float *, int *, float *, float *, int *);
+void BLASFUNC(zsbmv)(char *, int *, int *, double *, double *, int *, double *, int *, double *, double *, int *);
+void BLASFUNC(xsbmv)(char *, int *, int *, double *, double *, int *, double *, int *, double *, double *, int *);
 
-int BLASFUNC(chbmv)(char *, int *, int *, float *, float *, int *, float *, int *, float *, float *, int *);
-int BLASFUNC(zhbmv)(char *, int *, int *, double *, double *, int *, double *, int *, double *, double *, int *);
-int BLASFUNC(xhbmv)(char *, int *, int *, double *, double *, int *, double *, int *, double *, double *, int *);
+void BLASFUNC(chbmv)(char *, int *, int *, float *, float *, int *, float *, int *, float *, float *, int *);
+void BLASFUNC(zhbmv)(char *, int *, int *, double *, double *, int *, double *, int *, double *, double *, int *);
+void BLASFUNC(xhbmv)(char *, int *, int *, double *, double *, int *, double *, int *, double *, double *, int *);
 
 /* Level 3 routines */
 
-int BLASFUNC(sgemm)(const char *, const char *, const int *, const int *, const int *, const float *, const float *,
-                    const int *, const float *, const int *, const float *, float *, const int *);
-int BLASFUNC(dgemm)(const char *, const char *, const int *, const int *, const int *, const double *, const double *,
-                    const int *, const double *, const int *, const double *, double *, const int *);
-int BLASFUNC(qgemm)(const char *, const char *, const int *, const int *, const int *, const double *, const double *,
-                    const int *, const double *, const int *, const double *, double *, const int *);
-int BLASFUNC(cgemm)(const char *, const char *, const int *, const int *, const int *, const float *, const float *,
-                    const int *, const float *, const int *, const float *, float *, const int *);
-int BLASFUNC(zgemm)(const char *, const char *, const int *, const int *, const int *, const double *, const double *,
-                    const int *, const double *, const int *, const double *, double *, const int *);
-int BLASFUNC(xgemm)(const char *, const char *, const int *, const int *, const int *, const double *, const double *,
-                    const int *, const double *, const int *, const double *, double *, const int *);
-
-int BLASFUNC(cgemm3m)(char *, char *, int *, int *, int *, float *, float *, int *, float *, int *, float *, float *,
-                      int *);
-int BLASFUNC(zgemm3m)(char *, char *, int *, int *, int *, double *, double *, int *, double *, int *, double *,
-                      double *, int *);
-int BLASFUNC(xgemm3m)(char *, char *, int *, int *, int *, double *, double *, int *, double *, int *, double *,
-                      double *, int *);
-
-int BLASFUNC(sge2mm)(char *, char *, char *, int *, int *, float *, float *, int *, float *, int *, float *, float *,
-                     int *);
-int BLASFUNC(dge2mm)(char *, char *, char *, int *, int *, double *, double *, int *, double *, int *, double *,
-                     double *, int *);
-int BLASFUNC(cge2mm)(char *, char *, char *, int *, int *, float *, float *, int *, float *, int *, float *, float *,
-                     int *);
-int BLASFUNC(zge2mm)(char *, char *, char *, int *, int *, double *, double *, int *, double *, int *, double *,
-                     double *, int *);
-
-int BLASFUNC(strsm)(const char *, const char *, const char *, const char *, const int *, const int *, const float *,
-                    const float *, const int *, float *, const int *);
-int BLASFUNC(dtrsm)(const char *, const char *, const char *, const char *, const int *, const int *, const double *,
-                    const double *, const int *, double *, const int *);
-int BLASFUNC(qtrsm)(const char *, const char *, const char *, const char *, const int *, const int *, const double *,
-                    const double *, const int *, double *, const int *);
-int BLASFUNC(ctrsm)(const char *, const char *, const char *, const char *, const int *, const int *, const float *,
-                    const float *, const int *, float *, const int *);
-int BLASFUNC(ztrsm)(const char *, const char *, const char *, const char *, const int *, const int *, const double *,
-                    const double *, const int *, double *, const int *);
-int BLASFUNC(xtrsm)(const char *, const char *, const char *, const char *, const int *, const int *, const double *,
-                    const double *, const int *, double *, const int *);
-
-int BLASFUNC(strmm)(const char *, const char *, const char *, const char *, const int *, const int *, const float *,
-                    const float *, const int *, float *, const int *);
-int BLASFUNC(dtrmm)(const char *, const char *, const char *, const char *, const int *, const int *, const double *,
-                    const double *, const int *, double *, const int *);
-int BLASFUNC(qtrmm)(const char *, const char *, const char *, const char *, const int *, const int *, const double *,
-                    const double *, const int *, double *, const int *);
-int BLASFUNC(ctrmm)(const char *, const char *, const char *, const char *, const int *, const int *, const float *,
-                    const float *, const int *, float *, const int *);
-int BLASFUNC(ztrmm)(const char *, const char *, const char *, const char *, const int *, const int *, const double *,
-                    const double *, const int *, double *, const int *);
-int BLASFUNC(xtrmm)(const char *, const char *, const char *, const char *, const int *, const int *, const double *,
-                    const double *, const int *, double *, const int *);
-
-int BLASFUNC(ssymm)(const char *, const char *, const int *, const int *, const float *, const float *, const int *,
-                    const float *, const int *, const float *, float *, const int *);
-int BLASFUNC(dsymm)(const char *, const char *, const int *, const int *, const double *, const double *, const int *,
-                    const double *, const int *, const double *, double *, const int *);
-int BLASFUNC(qsymm)(const char *, const char *, const int *, const int *, const double *, const double *, const int *,
-                    const double *, const int *, const double *, double *, const int *);
-int BLASFUNC(csymm)(const char *, const char *, const int *, const int *, const float *, const float *, const int *,
-                    const float *, const int *, const float *, float *, const int *);
-int BLASFUNC(zsymm)(const char *, const char *, const int *, const int *, const double *, const double *, const int *,
-                    const double *, const int *, const double *, double *, const int *);
-int BLASFUNC(xsymm)(const char *, const char *, const int *, const int *, const double *, const double *, const int *,
-                    const double *, const int *, const double *, double *, const int *);
-
-int BLASFUNC(csymm3m)(char *, char *, int *, int *, float *, float *, int *, float *, int *, float *, float *, int *);
-int BLASFUNC(zsymm3m)(char *, char *, int *, int *, double *, double *, int *, double *, int *, double *, double *,
-                      int *);
-int BLASFUNC(xsymm3m)(char *, char *, int *, int *, double *, double *, int *, double *, int *, double *, double *,
-                      int *);
-
-int BLASFUNC(ssyrk)(const char *, const char *, const int *, const int *, const float *, const float *, const int *,
-                    const float *, float *, const int *);
-int BLASFUNC(dsyrk)(const char *, const char *, const int *, const int *, const double *, const double *, const int *,
-                    const double *, double *, const int *);
-int BLASFUNC(qsyrk)(const char *, const char *, const int *, const int *, const double *, const double *, const int *,
-                    const double *, double *, const int *);
-int BLASFUNC(csyrk)(const char *, const char *, const int *, const int *, const float *, const float *, const int *,
-                    const float *, float *, const int *);
-int BLASFUNC(zsyrk)(const char *, const char *, const int *, const int *, const double *, const double *, const int *,
-                    const double *, double *, const int *);
-int BLASFUNC(xsyrk)(const char *, const char *, const int *, const int *, const double *, const double *, const int *,
-                    const double *, double *, const int *);
-
-int BLASFUNC(ssyr2k)(const char *, const char *, const int *, const int *, const float *, const float *, const int *,
-                     const float *, const int *, const float *, float *, const int *);
-int BLASFUNC(dsyr2k)(const char *, const char *, const int *, const int *, const double *, const double *, const int *,
-                     const double *, const int *, const double *, double *, const int *);
-int BLASFUNC(qsyr2k)(const char *, const char *, const int *, const int *, const double *, const double *, const int *,
-                     const double *, const int *, const double *, double *, const int *);
-int BLASFUNC(csyr2k)(const char *, const char *, const int *, const int *, const float *, const float *, const int *,
-                     const float *, const int *, const float *, float *, const int *);
-int BLASFUNC(zsyr2k)(const char *, const char *, const int *, const int *, const double *, const double *, const int *,
-                     const double *, const int *, const double *, double *, const int *);
-int BLASFUNC(xsyr2k)(const char *, const char *, const int *, const int *, const double *, const double *, const int *,
-                     const double *, const int *, const double *, double *, const int *);
-
-int BLASFUNC(chemm)(const char *, const char *, const int *, const int *, const float *, const float *, const int *,
-                    const float *, const int *, const float *, float *, const int *);
-int BLASFUNC(zhemm)(const char *, const char *, const int *, const int *, const double *, const double *, const int *,
-                    const double *, const int *, const double *, double *, const int *);
-int BLASFUNC(xhemm)(const char *, const char *, const int *, const int *, const double *, const double *, const int *,
-                    const double *, const int *, const double *, double *, const int *);
-
-int BLASFUNC(chemm3m)(char *, char *, int *, int *, float *, float *, int *, float *, int *, float *, float *, int *);
-int BLASFUNC(zhemm3m)(char *, char *, int *, int *, double *, double *, int *, double *, int *, double *, double *,
-                      int *);
-int BLASFUNC(xhemm3m)(char *, char *, int *, int *, double *, double *, int *, double *, int *, double *, double *,
-                      int *);
-
-int BLASFUNC(cherk)(const char *, const char *, const int *, const int *, const float *, const float *, const int *,
-                    const float *, float *, const int *);
-int BLASFUNC(zherk)(const char *, const char *, const int *, const int *, const double *, const double *, const int *,
-                    const double *, double *, const int *);
-int BLASFUNC(xherk)(const char *, const char *, const int *, const int *, const double *, const double *, const int *,
-                    const double *, double *, const int *);
-
-int BLASFUNC(cher2k)(const char *, const char *, const int *, const int *, const float *, const float *, const int *,
-                     const float *, const int *, const float *, float *, const int *);
-int BLASFUNC(zher2k)(const char *, const char *, const int *, const int *, const double *, const double *, const int *,
-                     const double *, const int *, const double *, double *, const int *);
-int BLASFUNC(xher2k)(const char *, const char *, const int *, const int *, const double *, const double *, const int *,
-                     const double *, const int *, const double *, double *, const int *);
-int BLASFUNC(cher2m)(const char *, const char *, const char *, const int *, const int *, const float *, const float *,
+void BLASFUNC(sgemm)(const char *, const char *, const int *, const int *, const int *, const float *, const float *,
                      const int *, const float *, const int *, const float *, float *, const int *);
-int BLASFUNC(zher2m)(const char *, const char *, const char *, const int *, const int *, const double *, const double *,
+void BLASFUNC(dgemm)(const char *, const char *, const int *, const int *, const int *, const double *, const double *,
                      const int *, const double *, const int *, const double *, double *, const int *);
-int BLASFUNC(xher2m)(const char *, const char *, const char *, const int *, const int *, const double *, const double *,
+void BLASFUNC(qgemm)(const char *, const char *, const int *, const int *, const int *, const double *, const double *,
                      const int *, const double *, const int *, const double *, double *, const int *);
+void BLASFUNC(cgemm)(const char *, const char *, const int *, const int *, const int *, const float *, const float *,
+                     const int *, const float *, const int *, const float *, float *, const int *);
+void BLASFUNC(zgemm)(const char *, const char *, const int *, const int *, const int *, const double *, const double *,
+                     const int *, const double *, const int *, const double *, double *, const int *);
+void BLASFUNC(xgemm)(const char *, const char *, const int *, const int *, const int *, const double *, const double *,
+                     const int *, const double *, const int *, const double *, double *, const int *);
+
+void BLASFUNC(cgemm3m)(char *, char *, int *, int *, int *, float *, float *, int *, float *, int *, float *, float *,
+                       int *);
+void BLASFUNC(zgemm3m)(char *, char *, int *, int *, int *, double *, double *, int *, double *, int *, double *,
+                       double *, int *);
+void BLASFUNC(xgemm3m)(char *, char *, int *, int *, int *, double *, double *, int *, double *, int *, double *,
+                       double *, int *);
+
+void BLASFUNC(sge2mm)(char *, char *, char *, int *, int *, float *, float *, int *, float *, int *, float *, float *,
+                      int *);
+void BLASFUNC(dge2mm)(char *, char *, char *, int *, int *, double *, double *, int *, double *, int *, double *,
+                      double *, int *);
+void BLASFUNC(cge2mm)(char *, char *, char *, int *, int *, float *, float *, int *, float *, int *, float *, float *,
+                      int *);
+void BLASFUNC(zge2mm)(char *, char *, char *, int *, int *, double *, double *, int *, double *, int *, double *,
+                      double *, int *);
+
+void BLASFUNC(strsm)(const char *, const char *, const char *, const char *, const int *, const int *, const float *,
+                     const float *, const int *, float *, const int *);
+void BLASFUNC(dtrsm)(const char *, const char *, const char *, const char *, const int *, const int *, const double *,
+                     const double *, const int *, double *, const int *);
+void BLASFUNC(qtrsm)(const char *, const char *, const char *, const char *, const int *, const int *, const double *,
+                     const double *, const int *, double *, const int *);
+void BLASFUNC(ctrsm)(const char *, const char *, const char *, const char *, const int *, const int *, const float *,
+                     const float *, const int *, float *, const int *);
+void BLASFUNC(ztrsm)(const char *, const char *, const char *, const char *, const int *, const int *, const double *,
+                     const double *, const int *, double *, const int *);
+void BLASFUNC(xtrsm)(const char *, const char *, const char *, const char *, const int *, const int *, const double *,
+                     const double *, const int *, double *, const int *);
+
+void BLASFUNC(strmm)(const char *, const char *, const char *, const char *, const int *, const int *, const float *,
+                     const float *, const int *, float *, const int *);
+void BLASFUNC(dtrmm)(const char *, const char *, const char *, const char *, const int *, const int *, const double *,
+                     const double *, const int *, double *, const int *);
+void BLASFUNC(qtrmm)(const char *, const char *, const char *, const char *, const int *, const int *, const double *,
+                     const double *, const int *, double *, const int *);
+void BLASFUNC(ctrmm)(const char *, const char *, const char *, const char *, const int *, const int *, const float *,
+                     const float *, const int *, float *, const int *);
+void BLASFUNC(ztrmm)(const char *, const char *, const char *, const char *, const int *, const int *, const double *,
+                     const double *, const int *, double *, const int *);
+void BLASFUNC(xtrmm)(const char *, const char *, const char *, const char *, const int *, const int *, const double *,
+                     const double *, const int *, double *, const int *);
+
+void BLASFUNC(ssymm)(const char *, const char *, const int *, const int *, const float *, const float *, const int *,
+                     const float *, const int *, const float *, float *, const int *);
+void BLASFUNC(dsymm)(const char *, const char *, const int *, const int *, const double *, const double *, const int *,
+                     const double *, const int *, const double *, double *, const int *);
+void BLASFUNC(qsymm)(const char *, const char *, const int *, const int *, const double *, const double *, const int *,
+                     const double *, const int *, const double *, double *, const int *);
+void BLASFUNC(csymm)(const char *, const char *, const int *, const int *, const float *, const float *, const int *,
+                     const float *, const int *, const float *, float *, const int *);
+void BLASFUNC(zsymm)(const char *, const char *, const int *, const int *, const double *, const double *, const int *,
+                     const double *, const int *, const double *, double *, const int *);
+void BLASFUNC(xsymm)(const char *, const char *, const int *, const int *, const double *, const double *, const int *,
+                     const double *, const int *, const double *, double *, const int *);
+
+void BLASFUNC(csymm3m)(char *, char *, int *, int *, float *, float *, int *, float *, int *, float *, float *, int *);
+void BLASFUNC(zsymm3m)(char *, char *, int *, int *, double *, double *, int *, double *, int *, double *, double *,
+                       int *);
+void BLASFUNC(xsymm3m)(char *, char *, int *, int *, double *, double *, int *, double *, int *, double *, double *,
+                       int *);
+
+void BLASFUNC(ssyrk)(const char *, const char *, const int *, const int *, const float *, const float *, const int *,
+                     const float *, float *, const int *);
+void BLASFUNC(dsyrk)(const char *, const char *, const int *, const int *, const double *, const double *, const int *,
+                     const double *, double *, const int *);
+void BLASFUNC(qsyrk)(const char *, const char *, const int *, const int *, const double *, const double *, const int *,
+                     const double *, double *, const int *);
+void BLASFUNC(csyrk)(const char *, const char *, const int *, const int *, const float *, const float *, const int *,
+                     const float *, float *, const int *);
+void BLASFUNC(zsyrk)(const char *, const char *, const int *, const int *, const double *, const double *, const int *,
+                     const double *, double *, const int *);
+void BLASFUNC(xsyrk)(const char *, const char *, const int *, const int *, const double *, const double *, const int *,
+                     const double *, double *, const int *);
+
+void BLASFUNC(ssyr2k)(const char *, const char *, const int *, const int *, const float *, const float *, const int *,
+                      const float *, const int *, const float *, float *, const int *);
+void BLASFUNC(dsyr2k)(const char *, const char *, const int *, const int *, const double *, const double *, const int *,
+                      const double *, const int *, const double *, double *, const int *);
+void BLASFUNC(qsyr2k)(const char *, const char *, const int *, const int *, const double *, const double *, const int *,
+                      const double *, const int *, const double *, double *, const int *);
+void BLASFUNC(csyr2k)(const char *, const char *, const int *, const int *, const float *, const float *, const int *,
+                      const float *, const int *, const float *, float *, const int *);
+void BLASFUNC(zsyr2k)(const char *, const char *, const int *, const int *, const double *, const double *, const int *,
+                      const double *, const int *, const double *, double *, const int *);
+void BLASFUNC(xsyr2k)(const char *, const char *, const int *, const int *, const double *, const double *, const int *,
+                      const double *, const int *, const double *, double *, const int *);
+
+void BLASFUNC(chemm)(const char *, const char *, const int *, const int *, const float *, const float *, const int *,
+                     const float *, const int *, const float *, float *, const int *);
+void BLASFUNC(zhemm)(const char *, const char *, const int *, const int *, const double *, const double *, const int *,
+                     const double *, const int *, const double *, double *, const int *);
+void BLASFUNC(xhemm)(const char *, const char *, const int *, const int *, const double *, const double *, const int *,
+                     const double *, const int *, const double *, double *, const int *);
+
+void BLASFUNC(chemm3m)(char *, char *, int *, int *, float *, float *, int *, float *, int *, float *, float *, int *);
+void BLASFUNC(zhemm3m)(char *, char *, int *, int *, double *, double *, int *, double *, int *, double *, double *,
+                       int *);
+void BLASFUNC(xhemm3m)(char *, char *, int *, int *, double *, double *, int *, double *, int *, double *, double *,
+                       int *);
+
+void BLASFUNC(cherk)(const char *, const char *, const int *, const int *, const float *, const float *, const int *,
+                     const float *, float *, const int *);
+void BLASFUNC(zherk)(const char *, const char *, const int *, const int *, const double *, const double *, const int *,
+                     const double *, double *, const int *);
+void BLASFUNC(xherk)(const char *, const char *, const int *, const int *, const double *, const double *, const int *,
+                     const double *, double *, const int *);
+
+void BLASFUNC(cher2k)(const char *, const char *, const int *, const int *, const float *, const float *, const int *,
+                      const float *, const int *, const float *, float *, const int *);
+void BLASFUNC(zher2k)(const char *, const char *, const int *, const int *, const double *, const double *, const int *,
+                      const double *, const int *, const double *, double *, const int *);
+void BLASFUNC(xher2k)(const char *, const char *, const int *, const int *, const double *, const double *, const int *,
+                      const double *, const int *, const double *, double *, const int *);
+void BLASFUNC(cher2m)(const char *, const char *, const char *, const int *, const int *, const float *, const float *,
+                      const int *, const float *, const int *, const float *, float *, const int *);
+void BLASFUNC(zher2m)(const char *, const char *, const char *, const int *, const int *, const double *,
+                      const double *, const int *, const double *, const int *, const double *, double *, const int *);
+void BLASFUNC(xher2m)(const char *, const char *, const char *, const int *, const int *, const double *,
+                      const double *, const int *, const double *, const int *, const double *, double *, const int *);
 
 #ifdef __cplusplus
 }

diff --git a/blas/common.h b/blas/common.h
index bf91a83..2456273 100644
--- a/blas/common.h
+++ b/blas/common.h

@@ -140,6 +140,7 @@
 #define EIGEN_BLAS_FUNC_SUFFIX _
 #endif
 
-#define EIGEN_BLAS_FUNC(X) EIGEN_CAT(SCALAR_SUFFIX, EIGEN_CAT(X, EIGEN_BLAS_FUNC_SUFFIX))
+#define EIGEN_BLAS_FUNC_NAME(X) EIGEN_CAT(SCALAR_SUFFIX, EIGEN_CAT(X, EIGEN_BLAS_FUNC_SUFFIX))
+#define EIGEN_BLAS_FUNC(X) extern "C" void EIGEN_BLAS_FUNC_NAME(X)
 
 #endif  // EIGEN_BLAS_COMMON_H

diff --git a/blas/double.cpp b/blas/double.cpp
index eef5891..4298665 100644
--- a/blas/double.cpp
+++ b/blas/double.cpp

@@ -19,7 +19,7 @@
 #include "level2_real_impl.h"
 #include "level3_impl.h"
 
-double EIGEN_BLAS_FUNC(sdot)(int* n, float* x, int* incx, float* y, int* incy) {
+double EIGEN_BLAS_FUNC_NAME(sdot)(int* n, float* x, int* incx, float* y, int* incy) {
   if (*n <= 0) return 0;
 
   if (*incx == 1 && *incy == 1)

diff --git a/blas/f2c/chbmv.c b/blas/f2c/chbmv.c
index f218fe3..2aa3a01 100644
--- a/blas/f2c/chbmv.c
+++ b/blas/f2c/chbmv.c

@@ -1,487 +1,456 @@
 /* chbmv.f -- translated by f2c (version 20100827).
    You must link the resulting object file with libf2c:
-	on Microsoft Windows system, link with libf2c.lib;
-	on Linux or Unix systems, link with .../path/to/libf2c.a -lm
-	or, if you install libf2c.a in a standard place, with -lf2c -lm
-	-- in that order, at the end of the command line, as in
-		cc *.o -lf2c -lm
-	Source for libf2c is in /netlib/f2c/libf2c.zip, e.g.,
+        on Microsoft Windows system, link with libf2c.lib;
+        on Linux or Unix systems, link with .../path/to/libf2c.a -lm
+        or, if you install libf2c.a in a standard place, with -lf2c -lm
+        -- in that order, at the end of the command line, as in
+                cc *.o -lf2c -lm
+        Source for libf2c is in /netlib/f2c/libf2c.zip, e.g.,
 
-		http://www.netlib.org/f2c/libf2c.zip
+                http://www.netlib.org/f2c/libf2c.zip
 */
 
 #include "datatypes.h"
 
-/* Subroutine */ int chbmv_(char *uplo, integer *n, integer *k, complex *
-	alpha, complex *a, integer *lda, complex *x, integer *incx, complex *
-	beta, complex *y, integer *incy, ftnlen uplo_len)
-{
-    /* System generated locals */
-    integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5;
-    real r__1;
-    complex q__1, q__2, q__3, q__4;
+static inline void r_cnjg(complex *r, complex *z) {
+  r->r = z->r;
+  r->i = -(z->i);
+}
 
-    /* Builtin functions */
-    void r_cnjg(complex *, complex *);
+/* Subroutine */ void chbmv_(char *uplo, integer *n, integer *k, complex *alpha, complex *a, integer *lda, complex *x,
+                             integer *incx, complex *beta, complex *y, integer *incy) {
+  /* System generated locals */
+  integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5;
+  real r__1;
+  complex q__1, q__2, q__3, q__4;
 
-    /* Local variables */
-    integer i__, j, l, ix, iy, jx, jy, kx, ky, info;
-    complex temp1, temp2;
-    extern logical lsame_(char *, char *, ftnlen, ftnlen);
-    integer kplus1;
-    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
+  /* Local variables */
+  integer i__, j, l, ix, iy, jx, jy, kx, ky, info;
+  complex temp1, temp2;
+  extern logical lsame_(char *, char *);
+  integer kplus1;
+  extern /* Subroutine */ void xerbla_(const char *, integer *);
 
-/*     .. Scalar Arguments .. */
-/*     .. */
-/*     .. Array Arguments .. */
-/*     .. */
+  /*     .. Scalar Arguments .. */
+  /*     .. */
+  /*     .. Array Arguments .. */
+  /*     .. */
 
-/*  Purpose */
-/*  ======= */
+  /*  Purpose */
+  /*  ======= */
 
-/*  CHBMV  performs the matrix-vector  operation */
+  /*  CHBMV  performs the matrix-vector  operation */
 
-/*     y := alpha*A*x + beta*y, */
+  /*     y := alpha*A*x + beta*y, */
 
-/*  where alpha and beta are scalars, x and y are n element vectors and */
-/*  A is an n by n hermitian band matrix, with k super-diagonals. */
+  /*  where alpha and beta are scalars, x and y are n element vectors and */
+  /*  A is an n by n hermitian band matrix, with k super-diagonals. */
 
-/*  Arguments */
-/*  ========== */
+  /*  Arguments */
+  /*  ========== */
 
-/*  UPLO   - CHARACTER*1. */
-/*           On entry, UPLO specifies whether the upper or lower */
-/*           triangular part of the band matrix A is being supplied as */
-/*           follows: */
+  /*  UPLO   - CHARACTER*1. */
+  /*           On entry, UPLO specifies whether the upper or lower */
+  /*           triangular part of the band matrix A is being supplied as */
+  /*           follows: */
 
-/*              UPLO = 'U' or 'u'   The upper triangular part of A is */
-/*                                  being supplied. */
+  /*              UPLO = 'U' or 'u'   The upper triangular part of A is */
+  /*                                  being supplied. */
 
-/*              UPLO = 'L' or 'l'   The lower triangular part of A is */
-/*                                  being supplied. */
+  /*              UPLO = 'L' or 'l'   The lower triangular part of A is */
+  /*                                  being supplied. */
 
-/*           Unchanged on exit. */
+  /*           Unchanged on exit. */
 
-/*  N      - INTEGER. */
-/*           On entry, N specifies the order of the matrix A. */
-/*           N must be at least zero. */
-/*           Unchanged on exit. */
+  /*  N      - INTEGER. */
+  /*           On entry, N specifies the order of the matrix A. */
+  /*           N must be at least zero. */
+  /*           Unchanged on exit. */
 
-/*  K      - INTEGER. */
-/*           On entry, K specifies the number of super-diagonals of the */
-/*           matrix A. K must satisfy  0 .le. K. */
-/*           Unchanged on exit. */
+  /*  K      - INTEGER. */
+  /*           On entry, K specifies the number of super-diagonals of the */
+  /*           matrix A. K must satisfy  0 .le. K. */
+  /*           Unchanged on exit. */
 
-/*  ALPHA  - COMPLEX         . */
-/*           On entry, ALPHA specifies the scalar alpha. */
-/*           Unchanged on exit. */
+  /*  ALPHA  - COMPLEX         . */
+  /*           On entry, ALPHA specifies the scalar alpha. */
+  /*           Unchanged on exit. */
 
-/*  A      - COMPLEX          array of DIMENSION ( LDA, n ). */
-/*           Before entry with UPLO = 'U' or 'u', the leading ( k + 1 ) */
-/*           by n part of the array A must contain the upper triangular */
-/*           band part of the hermitian matrix, supplied column by */
-/*           column, with the leading diagonal of the matrix in row */
-/*           ( k + 1 ) of the array, the first super-diagonal starting at */
-/*           position 2 in row k, and so on. The top left k by k triangle */
-/*           of the array A is not referenced. */
-/*           The following program segment will transfer the upper */
-/*           triangular part of a hermitian band matrix from conventional */
-/*           full matrix storage to band storage: */
+  /*  A      - COMPLEX          array of DIMENSION ( LDA, n ). */
+  /*           Before entry with UPLO = 'U' or 'u', the leading ( k + 1 ) */
+  /*           by n part of the array A must contain the upper triangular */
+  /*           band part of the hermitian matrix, supplied column by */
+  /*           column, with the leading diagonal of the matrix in row */
+  /*           ( k + 1 ) of the array, the first super-diagonal starting at */
+  /*           position 2 in row k, and so on. The top left k by k triangle */
+  /*           of the array A is not referenced. */
+  /*           The following program segment will transfer the upper */
+  /*           triangular part of a hermitian band matrix from conventional */
+  /*           full matrix storage to band storage: */
 
-/*                 DO 20, J = 1, N */
-/*                    M = K + 1 - J */
-/*                    DO 10, I = MAX( 1, J - K ), J */
-/*                       A( M + I, J ) = matrix( I, J ) */
-/*              10    CONTINUE */
-/*              20 CONTINUE */
+  /*                 DO 20, J = 1, N */
+  /*                    M = K + 1 - J */
+  /*                    DO 10, I = MAX( 1, J - K ), J */
+  /*                       A( M + I, J ) = matrix( I, J ) */
+  /*              10    CONTINUE */
+  /*              20 CONTINUE */
 
-/*           Before entry with UPLO = 'L' or 'l', the leading ( k + 1 ) */
-/*           by n part of the array A must contain the lower triangular */
-/*           band part of the hermitian matrix, supplied column by */
-/*           column, with the leading diagonal of the matrix in row 1 of */
-/*           the array, the first sub-diagonal starting at position 1 in */
-/*           row 2, and so on. The bottom right k by k triangle of the */
-/*           array A is not referenced. */
-/*           The following program segment will transfer the lower */
-/*           triangular part of a hermitian band matrix from conventional */
-/*           full matrix storage to band storage: */
+  /*           Before entry with UPLO = 'L' or 'l', the leading ( k + 1 ) */
+  /*           by n part of the array A must contain the lower triangular */
+  /*           band part of the hermitian matrix, supplied column by */
+  /*           column, with the leading diagonal of the matrix in row 1 of */
+  /*           the array, the first sub-diagonal starting at position 1 in */
+  /*           row 2, and so on. The bottom right k by k triangle of the */
+  /*           array A is not referenced. */
+  /*           The following program segment will transfer the lower */
+  /*           triangular part of a hermitian band matrix from conventional */
+  /*           full matrix storage to band storage: */
 
-/*                 DO 20, J = 1, N */
-/*                    M = 1 - J */
-/*                    DO 10, I = J, MIN( N, J + K ) */
-/*                       A( M + I, J ) = matrix( I, J ) */
-/*              10    CONTINUE */
-/*              20 CONTINUE */
+  /*                 DO 20, J = 1, N */
+  /*                    M = 1 - J */
+  /*                    DO 10, I = J, MIN( N, J + K ) */
+  /*                       A( M + I, J ) = matrix( I, J ) */
+  /*              10    CONTINUE */
+  /*              20 CONTINUE */
 
-/*           Note that the imaginary parts of the diagonal elements need */
-/*           not be set and are assumed to be zero. */
-/*           Unchanged on exit. */
+  /*           Note that the imaginary parts of the diagonal elements need */
+  /*           not be set and are assumed to be zero. */
+  /*           Unchanged on exit. */
 
-/*  LDA    - INTEGER. */
-/*           On entry, LDA specifies the first dimension of A as declared */
-/*           in the calling (sub) program. LDA must be at least */
-/*           ( k + 1 ). */
-/*           Unchanged on exit. */
+  /*  LDA    - INTEGER. */
+  /*           On entry, LDA specifies the first dimension of A as declared */
+  /*           in the calling (sub) program. LDA must be at least */
+  /*           ( k + 1 ). */
+  /*           Unchanged on exit. */
 
-/*  X      - COMPLEX          array of DIMENSION at least */
-/*           ( 1 + ( n - 1 )*abs( INCX ) ). */
-/*           Before entry, the incremented array X must contain the */
-/*           vector x. */
-/*           Unchanged on exit. */
+  /*  X      - COMPLEX          array of DIMENSION at least */
+  /*           ( 1 + ( n - 1 )*abs( INCX ) ). */
+  /*           Before entry, the incremented array X must contain the */
+  /*           vector x. */
+  /*           Unchanged on exit. */
 
-/*  INCX   - INTEGER. */
-/*           On entry, INCX specifies the increment for the elements of */
-/*           X. INCX must not be zero. */
-/*           Unchanged on exit. */
+  /*  INCX   - INTEGER. */
+  /*           On entry, INCX specifies the increment for the elements of */
+  /*           X. INCX must not be zero. */
+  /*           Unchanged on exit. */
 
-/*  BETA   - COMPLEX         . */
-/*           On entry, BETA specifies the scalar beta. */
-/*           Unchanged on exit. */
+  /*  BETA   - COMPLEX         . */
+  /*           On entry, BETA specifies the scalar beta. */
+  /*           Unchanged on exit. */
 
-/*  Y      - COMPLEX          array of DIMENSION at least */
-/*           ( 1 + ( n - 1 )*abs( INCY ) ). */
-/*           Before entry, the incremented array Y must contain the */
-/*           vector y. On exit, Y is overwritten by the updated vector y. */
+  /*  Y      - COMPLEX          array of DIMENSION at least */
+  /*           ( 1 + ( n - 1 )*abs( INCY ) ). */
+  /*           Before entry, the incremented array Y must contain the */
+  /*           vector y. On exit, Y is overwritten by the updated vector y. */
 
-/*  INCY   - INTEGER. */
-/*           On entry, INCY specifies the increment for the elements of */
-/*           Y. INCY must not be zero. */
-/*           Unchanged on exit. */
+  /*  INCY   - INTEGER. */
+  /*           On entry, INCY specifies the increment for the elements of */
+  /*           Y. INCY must not be zero. */
+  /*           Unchanged on exit. */
 
-/*  Further Details */
-/*  =============== */
+  /*  Further Details */
+  /*  =============== */
 
-/*  Level 2 Blas routine. */
+  /*  Level 2 Blas routine. */
 
-/*  -- Written on 22-October-1986. */
-/*     Jack Dongarra, Argonne National Lab. */
-/*     Jeremy Du Croz, Nag Central Office. */
-/*     Sven Hammarling, Nag Central Office. */
-/*     Richard Hanson, Sandia National Labs. */
+  /*  -- Written on 22-October-1986. */
+  /*     Jack Dongarra, Argonne National Lab. */
+  /*     Jeremy Du Croz, Nag Central Office. */
+  /*     Sven Hammarling, Nag Central Office. */
+  /*     Richard Hanson, Sandia National Labs. */
 
-/*  ===================================================================== */
+  /*  ===================================================================== */
 
-/*     .. Parameters .. */
-/*     .. */
-/*     .. Local Scalars .. */
-/*     .. */
-/*     .. External Functions .. */
-/*     .. */
-/*     .. External Subroutines .. */
-/*     .. */
-/*     .. Intrinsic Functions .. */
-/*     .. */
+  /*     .. Parameters .. */
+  /*     .. */
+  /*     .. Local Scalars .. */
+  /*     .. */
+  /*     .. External Functions .. */
+  /*     .. */
+  /*     .. External Subroutines .. */
+  /*     .. */
+  /*     .. Intrinsic Functions .. */
+  /*     .. */
 
-/*     Test the input parameters. */
+  /*     Test the input parameters. */
 
-    /* Parameter adjustments */
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1;
-    a -= a_offset;
-    --x;
-    --y;
+  /* Parameter adjustments */
+  a_dim1 = *lda;
+  a_offset = 1 + a_dim1;
+  a -= a_offset;
+  --x;
+  --y;
 
-    /* Function Body */
-    info = 0;
-    if (! lsame_(uplo, "U", (ftnlen)1, (ftnlen)1) && ! lsame_(uplo, "L", (
-	    ftnlen)1, (ftnlen)1)) {
-	info = 1;
-    } else if (*n < 0) {
-	info = 2;
-    } else if (*k < 0) {
-	info = 3;
-    } else if (*lda < *k + 1) {
-	info = 6;
-    } else if (*incx == 0) {
-	info = 8;
-    } else if (*incy == 0) {
-	info = 11;
-    }
-    if (info != 0) {
-	xerbla_("CHBMV ", &info, (ftnlen)6);
-	return 0;
-    }
+  /* Function Body */
+  info = 0;
+  if (!lsame_(uplo, "U") && !lsame_(uplo, "L")) {
+    info = 1;
+  } else if (*n < 0) {
+    info = 2;
+  } else if (*k < 0) {
+    info = 3;
+  } else if (*lda < *k + 1) {
+    info = 6;
+  } else if (*incx == 0) {
+    info = 8;
+  } else if (*incy == 0) {
+    info = 11;
+  }
+  if (info != 0) {
+    xerbla_("CHBMV ", &info);
+    return;
+  }
 
-/*     Quick return if possible. */
+  /*     Quick return if possible. */
 
-    if (*n == 0 || (alpha->r == 0.f && alpha->i == 0.f && (beta->r == 1.f && 
-                                                           beta->i == 0.f))) {
-	return 0;
-    }
+  if (*n == 0 || (alpha->r == 0.f && alpha->i == 0.f && (beta->r == 1.f && beta->i == 0.f))) {
+    return;
+  }
 
-/*     Set up the start points in  X  and  Y. */
+  /*     Set up the start points in  X  and  Y. */
 
-    if (*incx > 0) {
-	kx = 1;
+  if (*incx > 0) {
+    kx = 1;
+  } else {
+    kx = 1 - (*n - 1) * *incx;
+  }
+  if (*incy > 0) {
+    ky = 1;
+  } else {
+    ky = 1 - (*n - 1) * *incy;
+  }
+
+  /*     Start the operations. In this version the elements of the array A */
+  /*     are accessed sequentially with one pass through A. */
+
+  /*     First form  y := beta*y. */
+
+  if (beta->r != 1.f || beta->i != 0.f) {
+    if (*incy == 1) {
+      if (beta->r == 0.f && beta->i == 0.f) {
+        i__1 = *n;
+        for (i__ = 1; i__ <= i__1; ++i__) {
+          i__2 = i__;
+          y[i__2].r = 0.f, y[i__2].i = 0.f;
+          /* L10: */
+        }
+      } else {
+        i__1 = *n;
+        for (i__ = 1; i__ <= i__1; ++i__) {
+          i__2 = i__;
+          i__3 = i__;
+          q__1.r = beta->r * y[i__3].r - beta->i * y[i__3].i, q__1.i = beta->r * y[i__3].i + beta->i * y[i__3].r;
+          y[i__2].r = q__1.r, y[i__2].i = q__1.i;
+          /* L20: */
+        }
+      }
     } else {
-	kx = 1 - (*n - 1) * *incx;
+      iy = ky;
+      if (beta->r == 0.f && beta->i == 0.f) {
+        i__1 = *n;
+        for (i__ = 1; i__ <= i__1; ++i__) {
+          i__2 = iy;
+          y[i__2].r = 0.f, y[i__2].i = 0.f;
+          iy += *incy;
+          /* L30: */
+        }
+      } else {
+        i__1 = *n;
+        for (i__ = 1; i__ <= i__1; ++i__) {
+          i__2 = iy;
+          i__3 = iy;
+          q__1.r = beta->r * y[i__3].r - beta->i * y[i__3].i, q__1.i = beta->r * y[i__3].i + beta->i * y[i__3].r;
+          y[i__2].r = q__1.r, y[i__2].i = q__1.i;
+          iy += *incy;
+          /* L40: */
+        }
+      }
     }
-    if (*incy > 0) {
-	ky = 1;
+  }
+  if (alpha->r == 0.f && alpha->i == 0.f) {
+    return;
+  }
+  if (lsame_(uplo, "U")) {
+    /*        Form  y  when upper triangle of A is stored. */
+
+    kplus1 = *k + 1;
+    if (*incx == 1 && *incy == 1) {
+      i__1 = *n;
+      for (j = 1; j <= i__1; ++j) {
+        i__2 = j;
+        q__1.r = alpha->r * x[i__2].r - alpha->i * x[i__2].i, q__1.i = alpha->r * x[i__2].i + alpha->i * x[i__2].r;
+        temp1.r = q__1.r, temp1.i = q__1.i;
+        temp2.r = 0.f, temp2.i = 0.f;
+        l = kplus1 - j;
+        /* Computing MAX */
+        i__2 = 1, i__3 = j - *k;
+        i__4 = j - 1;
+        for (i__ = max(i__2, i__3); i__ <= i__4; ++i__) {
+          i__2 = i__;
+          i__3 = i__;
+          i__5 = l + i__ + j * a_dim1;
+          q__2.r = temp1.r * a[i__5].r - temp1.i * a[i__5].i, q__2.i = temp1.r * a[i__5].i + temp1.i * a[i__5].r;
+          q__1.r = y[i__3].r + q__2.r, q__1.i = y[i__3].i + q__2.i;
+          y[i__2].r = q__1.r, y[i__2].i = q__1.i;
+          r_cnjg(&q__3, &a[l + i__ + j * a_dim1]);
+          i__2 = i__;
+          q__2.r = q__3.r * x[i__2].r - q__3.i * x[i__2].i, q__2.i = q__3.r * x[i__2].i + q__3.i * x[i__2].r;
+          q__1.r = temp2.r + q__2.r, q__1.i = temp2.i + q__2.i;
+          temp2.r = q__1.r, temp2.i = q__1.i;
+          /* L50: */
+        }
+        i__4 = j;
+        i__2 = j;
+        i__3 = kplus1 + j * a_dim1;
+        r__1 = a[i__3].r;
+        q__3.r = r__1 * temp1.r, q__3.i = r__1 * temp1.i;
+        q__2.r = y[i__2].r + q__3.r, q__2.i = y[i__2].i + q__3.i;
+        q__4.r = alpha->r * temp2.r - alpha->i * temp2.i, q__4.i = alpha->r * temp2.i + alpha->i * temp2.r;
+        q__1.r = q__2.r + q__4.r, q__1.i = q__2.i + q__4.i;
+        y[i__4].r = q__1.r, y[i__4].i = q__1.i;
+        /* L60: */
+      }
     } else {
-	ky = 1 - (*n - 1) * *incy;
+      jx = kx;
+      jy = ky;
+      i__1 = *n;
+      for (j = 1; j <= i__1; ++j) {
+        i__4 = jx;
+        q__1.r = alpha->r * x[i__4].r - alpha->i * x[i__4].i, q__1.i = alpha->r * x[i__4].i + alpha->i * x[i__4].r;
+        temp1.r = q__1.r, temp1.i = q__1.i;
+        temp2.r = 0.f, temp2.i = 0.f;
+        ix = kx;
+        iy = ky;
+        l = kplus1 - j;
+        /* Computing MAX */
+        i__4 = 1, i__2 = j - *k;
+        i__3 = j - 1;
+        for (i__ = max(i__4, i__2); i__ <= i__3; ++i__) {
+          i__4 = iy;
+          i__2 = iy;
+          i__5 = l + i__ + j * a_dim1;
+          q__2.r = temp1.r * a[i__5].r - temp1.i * a[i__5].i, q__2.i = temp1.r * a[i__5].i + temp1.i * a[i__5].r;
+          q__1.r = y[i__2].r + q__2.r, q__1.i = y[i__2].i + q__2.i;
+          y[i__4].r = q__1.r, y[i__4].i = q__1.i;
+          r_cnjg(&q__3, &a[l + i__ + j * a_dim1]);
+          i__4 = ix;
+          q__2.r = q__3.r * x[i__4].r - q__3.i * x[i__4].i, q__2.i = q__3.r * x[i__4].i + q__3.i * x[i__4].r;
+          q__1.r = temp2.r + q__2.r, q__1.i = temp2.i + q__2.i;
+          temp2.r = q__1.r, temp2.i = q__1.i;
+          ix += *incx;
+          iy += *incy;
+          /* L70: */
+        }
+        i__3 = jy;
+        i__4 = jy;
+        i__2 = kplus1 + j * a_dim1;
+        r__1 = a[i__2].r;
+        q__3.r = r__1 * temp1.r, q__3.i = r__1 * temp1.i;
+        q__2.r = y[i__4].r + q__3.r, q__2.i = y[i__4].i + q__3.i;
+        q__4.r = alpha->r * temp2.r - alpha->i * temp2.i, q__4.i = alpha->r * temp2.i + alpha->i * temp2.r;
+        q__1.r = q__2.r + q__4.r, q__1.i = q__2.i + q__4.i;
+        y[i__3].r = q__1.r, y[i__3].i = q__1.i;
+        jx += *incx;
+        jy += *incy;
+        if (j > *k) {
+          kx += *incx;
+          ky += *incy;
+        }
+        /* L80: */
+      }
     }
+  } else {
+    /*        Form  y  when lower triangle of A is stored. */
 
-/*     Start the operations. In this version the elements of the array A */
-/*     are accessed sequentially with one pass through A. */
-
-/*     First form  y := beta*y. */
-
-    if (beta->r != 1.f || beta->i != 0.f) {
-	if (*incy == 1) {
-	    if (beta->r == 0.f && beta->i == 0.f) {
-		i__1 = *n;
-		for (i__ = 1; i__ <= i__1; ++i__) {
-		    i__2 = i__;
-		    y[i__2].r = 0.f, y[i__2].i = 0.f;
-/* L10: */
-		}
-	    } else {
-		i__1 = *n;
-		for (i__ = 1; i__ <= i__1; ++i__) {
-		    i__2 = i__;
-		    i__3 = i__;
-		    q__1.r = beta->r * y[i__3].r - beta->i * y[i__3].i, 
-			    q__1.i = beta->r * y[i__3].i + beta->i * y[i__3]
-			    .r;
-		    y[i__2].r = q__1.r, y[i__2].i = q__1.i;
-/* L20: */
-		}
-	    }
-	} else {
-	    iy = ky;
-	    if (beta->r == 0.f && beta->i == 0.f) {
-		i__1 = *n;
-		for (i__ = 1; i__ <= i__1; ++i__) {
-		    i__2 = iy;
-		    y[i__2].r = 0.f, y[i__2].i = 0.f;
-		    iy += *incy;
-/* L30: */
-		}
-	    } else {
-		i__1 = *n;
-		for (i__ = 1; i__ <= i__1; ++i__) {
-		    i__2 = iy;
-		    i__3 = iy;
-		    q__1.r = beta->r * y[i__3].r - beta->i * y[i__3].i, 
-			    q__1.i = beta->r * y[i__3].i + beta->i * y[i__3]
-			    .r;
-		    y[i__2].r = q__1.r, y[i__2].i = q__1.i;
-		    iy += *incy;
-/* L40: */
-		}
-	    }
-	}
-    }
-    if (alpha->r == 0.f && alpha->i == 0.f) {
-	return 0;
-    }
-    if (lsame_(uplo, "U", (ftnlen)1, (ftnlen)1)) {
-
-/*        Form  y  when upper triangle of A is stored. */
-
-	kplus1 = *k + 1;
-	if (*incx == 1 && *incy == 1) {
-	    i__1 = *n;
-	    for (j = 1; j <= i__1; ++j) {
-		i__2 = j;
-		q__1.r = alpha->r * x[i__2].r - alpha->i * x[i__2].i, q__1.i =
-			 alpha->r * x[i__2].i + alpha->i * x[i__2].r;
-		temp1.r = q__1.r, temp1.i = q__1.i;
-		temp2.r = 0.f, temp2.i = 0.f;
-		l = kplus1 - j;
-/* Computing MAX */
-		i__2 = 1, i__3 = j - *k;
-		i__4 = j - 1;
-		for (i__ = max(i__2,i__3); i__ <= i__4; ++i__) {
-		    i__2 = i__;
-		    i__3 = i__;
-		    i__5 = l + i__ + j * a_dim1;
-		    q__2.r = temp1.r * a[i__5].r - temp1.i * a[i__5].i, 
-			    q__2.i = temp1.r * a[i__5].i + temp1.i * a[i__5]
-			    .r;
-		    q__1.r = y[i__3].r + q__2.r, q__1.i = y[i__3].i + q__2.i;
-		    y[i__2].r = q__1.r, y[i__2].i = q__1.i;
-		    r_cnjg(&q__3, &a[l + i__ + j * a_dim1]);
-		    i__2 = i__;
-		    q__2.r = q__3.r * x[i__2].r - q__3.i * x[i__2].i, q__2.i =
-			     q__3.r * x[i__2].i + q__3.i * x[i__2].r;
-		    q__1.r = temp2.r + q__2.r, q__1.i = temp2.i + q__2.i;
-		    temp2.r = q__1.r, temp2.i = q__1.i;
-/* L50: */
-		}
-		i__4 = j;
-		i__2 = j;
-		i__3 = kplus1 + j * a_dim1;
-		r__1 = a[i__3].r;
-		q__3.r = r__1 * temp1.r, q__3.i = r__1 * temp1.i;
-		q__2.r = y[i__2].r + q__3.r, q__2.i = y[i__2].i + q__3.i;
-		q__4.r = alpha->r * temp2.r - alpha->i * temp2.i, q__4.i = 
-			alpha->r * temp2.i + alpha->i * temp2.r;
-		q__1.r = q__2.r + q__4.r, q__1.i = q__2.i + q__4.i;
-		y[i__4].r = q__1.r, y[i__4].i = q__1.i;
-/* L60: */
-	    }
-	} else {
-	    jx = kx;
-	    jy = ky;
-	    i__1 = *n;
-	    for (j = 1; j <= i__1; ++j) {
-		i__4 = jx;
-		q__1.r = alpha->r * x[i__4].r - alpha->i * x[i__4].i, q__1.i =
-			 alpha->r * x[i__4].i + alpha->i * x[i__4].r;
-		temp1.r = q__1.r, temp1.i = q__1.i;
-		temp2.r = 0.f, temp2.i = 0.f;
-		ix = kx;
-		iy = ky;
-		l = kplus1 - j;
-/* Computing MAX */
-		i__4 = 1, i__2 = j - *k;
-		i__3 = j - 1;
-		for (i__ = max(i__4,i__2); i__ <= i__3; ++i__) {
-		    i__4 = iy;
-		    i__2 = iy;
-		    i__5 = l + i__ + j * a_dim1;
-		    q__2.r = temp1.r * a[i__5].r - temp1.i * a[i__5].i, 
-			    q__2.i = temp1.r * a[i__5].i + temp1.i * a[i__5]
-			    .r;
-		    q__1.r = y[i__2].r + q__2.r, q__1.i = y[i__2].i + q__2.i;
-		    y[i__4].r = q__1.r, y[i__4].i = q__1.i;
-		    r_cnjg(&q__3, &a[l + i__ + j * a_dim1]);
-		    i__4 = ix;
-		    q__2.r = q__3.r * x[i__4].r - q__3.i * x[i__4].i, q__2.i =
-			     q__3.r * x[i__4].i + q__3.i * x[i__4].r;
-		    q__1.r = temp2.r + q__2.r, q__1.i = temp2.i + q__2.i;
-		    temp2.r = q__1.r, temp2.i = q__1.i;
-		    ix += *incx;
-		    iy += *incy;
-/* L70: */
-		}
-		i__3 = jy;
-		i__4 = jy;
-		i__2 = kplus1 + j * a_dim1;
-		r__1 = a[i__2].r;
-		q__3.r = r__1 * temp1.r, q__3.i = r__1 * temp1.i;
-		q__2.r = y[i__4].r + q__3.r, q__2.i = y[i__4].i + q__3.i;
-		q__4.r = alpha->r * temp2.r - alpha->i * temp2.i, q__4.i = 
-			alpha->r * temp2.i + alpha->i * temp2.r;
-		q__1.r = q__2.r + q__4.r, q__1.i = q__2.i + q__4.i;
-		y[i__3].r = q__1.r, y[i__3].i = q__1.i;
-		jx += *incx;
-		jy += *incy;
-		if (j > *k) {
-		    kx += *incx;
-		    ky += *incy;
-		}
-/* L80: */
-	    }
-	}
+    if (*incx == 1 && *incy == 1) {
+      i__1 = *n;
+      for (j = 1; j <= i__1; ++j) {
+        i__3 = j;
+        q__1.r = alpha->r * x[i__3].r - alpha->i * x[i__3].i, q__1.i = alpha->r * x[i__3].i + alpha->i * x[i__3].r;
+        temp1.r = q__1.r, temp1.i = q__1.i;
+        temp2.r = 0.f, temp2.i = 0.f;
+        i__3 = j;
+        i__4 = j;
+        i__2 = j * a_dim1 + 1;
+        r__1 = a[i__2].r;
+        q__2.r = r__1 * temp1.r, q__2.i = r__1 * temp1.i;
+        q__1.r = y[i__4].r + q__2.r, q__1.i = y[i__4].i + q__2.i;
+        y[i__3].r = q__1.r, y[i__3].i = q__1.i;
+        l = 1 - j;
+        /* Computing MIN */
+        i__4 = *n, i__2 = j + *k;
+        i__3 = min(i__4, i__2);
+        for (i__ = j + 1; i__ <= i__3; ++i__) {
+          i__4 = i__;
+          i__2 = i__;
+          i__5 = l + i__ + j * a_dim1;
+          q__2.r = temp1.r * a[i__5].r - temp1.i * a[i__5].i, q__2.i = temp1.r * a[i__5].i + temp1.i * a[i__5].r;
+          q__1.r = y[i__2].r + q__2.r, q__1.i = y[i__2].i + q__2.i;
+          y[i__4].r = q__1.r, y[i__4].i = q__1.i;
+          r_cnjg(&q__3, &a[l + i__ + j * a_dim1]);
+          i__4 = i__;
+          q__2.r = q__3.r * x[i__4].r - q__3.i * x[i__4].i, q__2.i = q__3.r * x[i__4].i + q__3.i * x[i__4].r;
+          q__1.r = temp2.r + q__2.r, q__1.i = temp2.i + q__2.i;
+          temp2.r = q__1.r, temp2.i = q__1.i;
+          /* L90: */
+        }
+        i__3 = j;
+        i__4 = j;
+        q__2.r = alpha->r * temp2.r - alpha->i * temp2.i, q__2.i = alpha->r * temp2.i + alpha->i * temp2.r;
+        q__1.r = y[i__4].r + q__2.r, q__1.i = y[i__4].i + q__2.i;
+        y[i__3].r = q__1.r, y[i__3].i = q__1.i;
+        /* L100: */
+      }
     } else {
-
-/*        Form  y  when lower triangle of A is stored. */
-
-	if (*incx == 1 && *incy == 1) {
-	    i__1 = *n;
-	    for (j = 1; j <= i__1; ++j) {
-		i__3 = j;
-		q__1.r = alpha->r * x[i__3].r - alpha->i * x[i__3].i, q__1.i =
-			 alpha->r * x[i__3].i + alpha->i * x[i__3].r;
-		temp1.r = q__1.r, temp1.i = q__1.i;
-		temp2.r = 0.f, temp2.i = 0.f;
-		i__3 = j;
-		i__4 = j;
-		i__2 = j * a_dim1 + 1;
-		r__1 = a[i__2].r;
-		q__2.r = r__1 * temp1.r, q__2.i = r__1 * temp1.i;
-		q__1.r = y[i__4].r + q__2.r, q__1.i = y[i__4].i + q__2.i;
-		y[i__3].r = q__1.r, y[i__3].i = q__1.i;
-		l = 1 - j;
-/* Computing MIN */
-		i__4 = *n, i__2 = j + *k;
-		i__3 = min(i__4,i__2);
-		for (i__ = j + 1; i__ <= i__3; ++i__) {
-		    i__4 = i__;
-		    i__2 = i__;
-		    i__5 = l + i__ + j * a_dim1;
-		    q__2.r = temp1.r * a[i__5].r - temp1.i * a[i__5].i, 
-			    q__2.i = temp1.r * a[i__5].i + temp1.i * a[i__5]
-			    .r;
-		    q__1.r = y[i__2].r + q__2.r, q__1.i = y[i__2].i + q__2.i;
-		    y[i__4].r = q__1.r, y[i__4].i = q__1.i;
-		    r_cnjg(&q__3, &a[l + i__ + j * a_dim1]);
-		    i__4 = i__;
-		    q__2.r = q__3.r * x[i__4].r - q__3.i * x[i__4].i, q__2.i =
-			     q__3.r * x[i__4].i + q__3.i * x[i__4].r;
-		    q__1.r = temp2.r + q__2.r, q__1.i = temp2.i + q__2.i;
-		    temp2.r = q__1.r, temp2.i = q__1.i;
-/* L90: */
-		}
-		i__3 = j;
-		i__4 = j;
-		q__2.r = alpha->r * temp2.r - alpha->i * temp2.i, q__2.i = 
-			alpha->r * temp2.i + alpha->i * temp2.r;
-		q__1.r = y[i__4].r + q__2.r, q__1.i = y[i__4].i + q__2.i;
-		y[i__3].r = q__1.r, y[i__3].i = q__1.i;
-/* L100: */
-	    }
-	} else {
-	    jx = kx;
-	    jy = ky;
-	    i__1 = *n;
-	    for (j = 1; j <= i__1; ++j) {
-		i__3 = jx;
-		q__1.r = alpha->r * x[i__3].r - alpha->i * x[i__3].i, q__1.i =
-			 alpha->r * x[i__3].i + alpha->i * x[i__3].r;
-		temp1.r = q__1.r, temp1.i = q__1.i;
-		temp2.r = 0.f, temp2.i = 0.f;
-		i__3 = jy;
-		i__4 = jy;
-		i__2 = j * a_dim1 + 1;
-		r__1 = a[i__2].r;
-		q__2.r = r__1 * temp1.r, q__2.i = r__1 * temp1.i;
-		q__1.r = y[i__4].r + q__2.r, q__1.i = y[i__4].i + q__2.i;
-		y[i__3].r = q__1.r, y[i__3].i = q__1.i;
-		l = 1 - j;
-		ix = jx;
-		iy = jy;
-/* Computing MIN */
-		i__4 = *n, i__2 = j + *k;
-		i__3 = min(i__4,i__2);
-		for (i__ = j + 1; i__ <= i__3; ++i__) {
-		    ix += *incx;
-		    iy += *incy;
-		    i__4 = iy;
-		    i__2 = iy;
-		    i__5 = l + i__ + j * a_dim1;
-		    q__2.r = temp1.r * a[i__5].r - temp1.i * a[i__5].i, 
-			    q__2.i = temp1.r * a[i__5].i + temp1.i * a[i__5]
-			    .r;
-		    q__1.r = y[i__2].r + q__2.r, q__1.i = y[i__2].i + q__2.i;
-		    y[i__4].r = q__1.r, y[i__4].i = q__1.i;
-		    r_cnjg(&q__3, &a[l + i__ + j * a_dim1]);
-		    i__4 = ix;
-		    q__2.r = q__3.r * x[i__4].r - q__3.i * x[i__4].i, q__2.i =
-			     q__3.r * x[i__4].i + q__3.i * x[i__4].r;
-		    q__1.r = temp2.r + q__2.r, q__1.i = temp2.i + q__2.i;
-		    temp2.r = q__1.r, temp2.i = q__1.i;
-/* L110: */
-		}
-		i__3 = jy;
-		i__4 = jy;
-		q__2.r = alpha->r * temp2.r - alpha->i * temp2.i, q__2.i = 
-			alpha->r * temp2.i + alpha->i * temp2.r;
-		q__1.r = y[i__4].r + q__2.r, q__1.i = y[i__4].i + q__2.i;
-		y[i__3].r = q__1.r, y[i__3].i = q__1.i;
-		jx += *incx;
-		jy += *incy;
-/* L120: */
-	    }
-	}
+      jx = kx;
+      jy = ky;
+      i__1 = *n;
+      for (j = 1; j <= i__1; ++j) {
+        i__3 = jx;
+        q__1.r = alpha->r * x[i__3].r - alpha->i * x[i__3].i, q__1.i = alpha->r * x[i__3].i + alpha->i * x[i__3].r;
+        temp1.r = q__1.r, temp1.i = q__1.i;
+        temp2.r = 0.f, temp2.i = 0.f;
+        i__3 = jy;
+        i__4 = jy;
+        i__2 = j * a_dim1 + 1;
+        r__1 = a[i__2].r;
+        q__2.r = r__1 * temp1.r, q__2.i = r__1 * temp1.i;
+        q__1.r = y[i__4].r + q__2.r, q__1.i = y[i__4].i + q__2.i;
+        y[i__3].r = q__1.r, y[i__3].i = q__1.i;
+        l = 1 - j;
+        ix = jx;
+        iy = jy;
+        /* Computing MIN */
+        i__4 = *n, i__2 = j + *k;
+        i__3 = min(i__4, i__2);
+        for (i__ = j + 1; i__ <= i__3; ++i__) {
+          ix += *incx;
+          iy += *incy;
+          i__4 = iy;
+          i__2 = iy;
+          i__5 = l + i__ + j * a_dim1;
+          q__2.r = temp1.r * a[i__5].r - temp1.i * a[i__5].i, q__2.i = temp1.r * a[i__5].i + temp1.i * a[i__5].r;
+          q__1.r = y[i__2].r + q__2.r, q__1.i = y[i__2].i + q__2.i;
+          y[i__4].r = q__1.r, y[i__4].i = q__1.i;
+          r_cnjg(&q__3, &a[l + i__ + j * a_dim1]);
+          i__4 = ix;
+          q__2.r = q__3.r * x[i__4].r - q__3.i * x[i__4].i, q__2.i = q__3.r * x[i__4].i + q__3.i * x[i__4].r;
+          q__1.r = temp2.r + q__2.r, q__1.i = temp2.i + q__2.i;
+          temp2.r = q__1.r, temp2.i = q__1.i;
+          /* L110: */
+        }
+        i__3 = jy;
+        i__4 = jy;
+        q__2.r = alpha->r * temp2.r - alpha->i * temp2.i, q__2.i = alpha->r * temp2.i + alpha->i * temp2.r;
+        q__1.r = y[i__4].r + q__2.r, q__1.i = y[i__4].i + q__2.i;
+        y[i__3].r = q__1.r, y[i__3].i = q__1.i;
+        jx += *incx;
+        jy += *incy;
+        /* L120: */
+      }
     }
+  }
 
-    return 0;
-
-/*     End of CHBMV . */
+  /*     End of CHBMV . */
 
 } /* chbmv_ */
-

diff --git a/blas/f2c/chpmv.c b/blas/f2c/chpmv.c
index 65bab1c..4a38a52 100644
--- a/blas/f2c/chpmv.c
+++ b/blas/f2c/chpmv.c

@@ -1,438 +1,407 @@
 /* chpmv.f -- translated by f2c (version 20100827).
    You must link the resulting object file with libf2c:
-	on Microsoft Windows system, link with libf2c.lib;
-	on Linux or Unix systems, link with .../path/to/libf2c.a -lm
-	or, if you install libf2c.a in a standard place, with -lf2c -lm
-	-- in that order, at the end of the command line, as in
-		cc *.o -lf2c -lm
-	Source for libf2c is in /netlib/f2c/libf2c.zip, e.g.,
+        on Microsoft Windows system, link with libf2c.lib;
+        on Linux or Unix systems, link with .../path/to/libf2c.a -lm
+        or, if you install libf2c.a in a standard place, with -lf2c -lm
+        -- in that order, at the end of the command line, as in
+                cc *.o -lf2c -lm
+        Source for libf2c is in /netlib/f2c/libf2c.zip, e.g.,
 
-		http://www.netlib.org/f2c/libf2c.zip
+                http://www.netlib.org/f2c/libf2c.zip
 */
 
 #include "datatypes.h"
 
-/* Subroutine */ int chpmv_(char *uplo, integer *n, complex *alpha, complex *
-	ap, complex *x, integer *incx, complex *beta, complex *y, integer *
-	incy, ftnlen uplo_len)
-{
-    /* System generated locals */
-    integer i__1, i__2, i__3, i__4, i__5;
-    real r__1;
-    complex q__1, q__2, q__3, q__4;
+static inline void r_cnjg(complex *r, complex *z) {
+  r->r = z->r;
+  r->i = -(z->i);
+}
 
-    /* Builtin functions */
-    void r_cnjg(complex *, complex *);
+/* Subroutine */ void chpmv_(char *uplo, integer *n, complex *alpha, complex *ap, complex *x, integer *incx,
+                             complex *beta, complex *y, integer *incy) {
+  /* System generated locals */
+  integer i__1, i__2, i__3, i__4, i__5;
+  real r__1;
+  complex q__1, q__2, q__3, q__4;
 
-    /* Local variables */
-    integer i__, j, k, kk, ix, iy, jx, jy, kx, ky, info;
-    complex temp1, temp2;
-    extern logical lsame_(char *, char *, ftnlen, ftnlen);
-    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
+  /* Local variables */
+  integer i__, j, k, kk, ix, iy, jx, jy, kx, ky, info;
+  complex temp1, temp2;
+  extern logical lsame_(char *, char *);
+  extern /* Subroutine */ void xerbla_(const char *, integer *);
 
-/*     .. Scalar Arguments .. */
-/*     .. */
-/*     .. Array Arguments .. */
-/*     .. */
+  /*     .. Scalar Arguments .. */
+  /*     .. */
+  /*     .. Array Arguments .. */
+  /*     .. */
 
-/*  Purpose */
-/*  ======= */
+  /*  Purpose */
+  /*  ======= */
 
-/*  CHPMV  performs the matrix-vector operation */
+  /*  CHPMV  performs the matrix-vector operation */
 
-/*     y := alpha*A*x + beta*y, */
+  /*     y := alpha*A*x + beta*y, */
 
-/*  where alpha and beta are scalars, x and y are n element vectors and */
-/*  A is an n by n hermitian matrix, supplied in packed form. */
+  /*  where alpha and beta are scalars, x and y are n element vectors and */
+  /*  A is an n by n hermitian matrix, supplied in packed form. */
 
-/*  Arguments */
-/*  ========== */
+  /*  Arguments */
+  /*  ========== */
 
-/*  UPLO   - CHARACTER*1. */
-/*           On entry, UPLO specifies whether the upper or lower */
-/*           triangular part of the matrix A is supplied in the packed */
-/*           array AP as follows: */
+  /*  UPLO   - CHARACTER*1. */
+  /*           On entry, UPLO specifies whether the upper or lower */
+  /*           triangular part of the matrix A is supplied in the packed */
+  /*           array AP as follows: */
 
-/*              UPLO = 'U' or 'u'   The upper triangular part of A is */
-/*                                  supplied in AP. */
+  /*              UPLO = 'U' or 'u'   The upper triangular part of A is */
+  /*                                  supplied in AP. */
 
-/*              UPLO = 'L' or 'l'   The lower triangular part of A is */
-/*                                  supplied in AP. */
+  /*              UPLO = 'L' or 'l'   The lower triangular part of A is */
+  /*                                  supplied in AP. */
 
-/*           Unchanged on exit. */
+  /*           Unchanged on exit. */
 
-/*  N      - INTEGER. */
-/*           On entry, N specifies the order of the matrix A. */
-/*           N must be at least zero. */
-/*           Unchanged on exit. */
+  /*  N      - INTEGER. */
+  /*           On entry, N specifies the order of the matrix A. */
+  /*           N must be at least zero. */
+  /*           Unchanged on exit. */
 
-/*  ALPHA  - COMPLEX         . */
-/*           On entry, ALPHA specifies the scalar alpha. */
-/*           Unchanged on exit. */
+  /*  ALPHA  - COMPLEX         . */
+  /*           On entry, ALPHA specifies the scalar alpha. */
+  /*           Unchanged on exit. */
 
-/*  AP     - COMPLEX          array of DIMENSION at least */
-/*           ( ( n*( n + 1 ) )/2 ). */
-/*           Before entry with UPLO = 'U' or 'u', the array AP must */
-/*           contain the upper triangular part of the hermitian matrix */
-/*           packed sequentially, column by column, so that AP( 1 ) */
-/*           contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 1, 2 ) */
-/*           and a( 2, 2 ) respectively, and so on. */
-/*           Before entry with UPLO = 'L' or 'l', the array AP must */
-/*           contain the lower triangular part of the hermitian matrix */
-/*           packed sequentially, column by column, so that AP( 1 ) */
-/*           contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 2, 1 ) */
-/*           and a( 3, 1 ) respectively, and so on. */
-/*           Note that the imaginary parts of the diagonal elements need */
-/*           not be set and are assumed to be zero. */
-/*           Unchanged on exit. */
+  /*  AP     - COMPLEX          array of DIMENSION at least */
+  /*           ( ( n*( n + 1 ) )/2 ). */
+  /*           Before entry with UPLO = 'U' or 'u', the array AP must */
+  /*           contain the upper triangular part of the hermitian matrix */
+  /*           packed sequentially, column by column, so that AP( 1 ) */
+  /*           contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 1, 2 ) */
+  /*           and a( 2, 2 ) respectively, and so on. */
+  /*           Before entry with UPLO = 'L' or 'l', the array AP must */
+  /*           contain the lower triangular part of the hermitian matrix */
+  /*           packed sequentially, column by column, so that AP( 1 ) */
+  /*           contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 2, 1 ) */
+  /*           and a( 3, 1 ) respectively, and so on. */
+  /*           Note that the imaginary parts of the diagonal elements need */
+  /*           not be set and are assumed to be zero. */
+  /*           Unchanged on exit. */
 
-/*  X      - COMPLEX          array of dimension at least */
-/*           ( 1 + ( n - 1 )*abs( INCX ) ). */
-/*           Before entry, the incremented array X must contain the n */
-/*           element vector x. */
-/*           Unchanged on exit. */
+  /*  X      - COMPLEX          array of dimension at least */
+  /*           ( 1 + ( n - 1 )*abs( INCX ) ). */
+  /*           Before entry, the incremented array X must contain the n */
+  /*           element vector x. */
+  /*           Unchanged on exit. */
 
-/*  INCX   - INTEGER. */
-/*           On entry, INCX specifies the increment for the elements of */
-/*           X. INCX must not be zero. */
-/*           Unchanged on exit. */
+  /*  INCX   - INTEGER. */
+  /*           On entry, INCX specifies the increment for the elements of */
+  /*           X. INCX must not be zero. */
+  /*           Unchanged on exit. */
 
-/*  BETA   - COMPLEX         . */
-/*           On entry, BETA specifies the scalar beta. When BETA is */
-/*           supplied as zero then Y need not be set on input. */
-/*           Unchanged on exit. */
+  /*  BETA   - COMPLEX         . */
+  /*           On entry, BETA specifies the scalar beta. When BETA is */
+  /*           supplied as zero then Y need not be set on input. */
+  /*           Unchanged on exit. */
 
-/*  Y      - COMPLEX          array of dimension at least */
-/*           ( 1 + ( n - 1 )*abs( INCY ) ). */
-/*           Before entry, the incremented array Y must contain the n */
-/*           element vector y. On exit, Y is overwritten by the updated */
-/*           vector y. */
+  /*  Y      - COMPLEX          array of dimension at least */
+  /*           ( 1 + ( n - 1 )*abs( INCY ) ). */
+  /*           Before entry, the incremented array Y must contain the n */
+  /*           element vector y. On exit, Y is overwritten by the updated */
+  /*           vector y. */
 
-/*  INCY   - INTEGER. */
-/*           On entry, INCY specifies the increment for the elements of */
-/*           Y. INCY must not be zero. */
-/*           Unchanged on exit. */
+  /*  INCY   - INTEGER. */
+  /*           On entry, INCY specifies the increment for the elements of */
+  /*           Y. INCY must not be zero. */
+  /*           Unchanged on exit. */
 
-/*  Further Details */
-/*  =============== */
+  /*  Further Details */
+  /*  =============== */
 
-/*  Level 2 Blas routine. */
+  /*  Level 2 Blas routine. */
 
-/*  -- Written on 22-October-1986. */
-/*     Jack Dongarra, Argonne National Lab. */
-/*     Jeremy Du Croz, Nag Central Office. */
-/*     Sven Hammarling, Nag Central Office. */
-/*     Richard Hanson, Sandia National Labs. */
+  /*  -- Written on 22-October-1986. */
+  /*     Jack Dongarra, Argonne National Lab. */
+  /*     Jeremy Du Croz, Nag Central Office. */
+  /*     Sven Hammarling, Nag Central Office. */
+  /*     Richard Hanson, Sandia National Labs. */
 
-/*  ===================================================================== */
+  /*  ===================================================================== */
 
-/*     .. Parameters .. */
-/*     .. */
-/*     .. Local Scalars .. */
-/*     .. */
-/*     .. External Functions .. */
-/*     .. */
-/*     .. External Subroutines .. */
-/*     .. */
-/*     .. Intrinsic Functions .. */
-/*     .. */
+  /*     .. Parameters .. */
+  /*     .. */
+  /*     .. Local Scalars .. */
+  /*     .. */
+  /*     .. External Functions .. */
+  /*     .. */
+  /*     .. External Subroutines .. */
+  /*     .. */
+  /*     .. Intrinsic Functions .. */
+  /*     .. */
 
-/*     Test the input parameters. */
+  /*     Test the input parameters. */
 
-    /* Parameter adjustments */
-    --y;
-    --x;
-    --ap;
+  /* Parameter adjustments */
+  --y;
+  --x;
+  --ap;
 
-    /* Function Body */
-    info = 0;
-    if (! lsame_(uplo, "U", (ftnlen)1, (ftnlen)1) && ! lsame_(uplo, "L", (
-	    ftnlen)1, (ftnlen)1)) {
-	info = 1;
-    } else if (*n < 0) {
-	info = 2;
-    } else if (*incx == 0) {
-	info = 6;
-    } else if (*incy == 0) {
-	info = 9;
-    }
-    if (info != 0) {
-	xerbla_("CHPMV ", &info, (ftnlen)6);
-	return 0;
-    }
+  /* Function Body */
+  info = 0;
+  if (!lsame_(uplo, "U") && !lsame_(uplo, "L")) {
+    info = 1;
+  } else if (*n < 0) {
+    info = 2;
+  } else if (*incx == 0) {
+    info = 6;
+  } else if (*incy == 0) {
+    info = 9;
+  }
+  if (info != 0) {
+    xerbla_("CHPMV ", &info);
+    return;
+  }
 
-/*     Quick return if possible. */
+  /*     Quick return if possible. */
 
-    if (*n == 0 || (alpha->r == 0.f && alpha->i == 0.f && (beta->r == 1.f && 
-                                                           beta->i == 0.f))) {
-	return 0;
-    }
+  if (*n == 0 || (alpha->r == 0.f && alpha->i == 0.f && (beta->r == 1.f && beta->i == 0.f))) {
+    return;
+  }
 
-/*     Set up the start points in  X  and  Y. */
+  /*     Set up the start points in  X  and  Y. */
 
-    if (*incx > 0) {
-	kx = 1;
+  if (*incx > 0) {
+    kx = 1;
+  } else {
+    kx = 1 - (*n - 1) * *incx;
+  }
+  if (*incy > 0) {
+    ky = 1;
+  } else {
+    ky = 1 - (*n - 1) * *incy;
+  }
+
+  /*     Start the operations. In this version the elements of the array AP */
+  /*     are accessed sequentially with one pass through AP. */
+
+  /*     First form  y := beta*y. */
+
+  if (beta->r != 1.f || beta->i != 0.f) {
+    if (*incy == 1) {
+      if (beta->r == 0.f && beta->i == 0.f) {
+        i__1 = *n;
+        for (i__ = 1; i__ <= i__1; ++i__) {
+          i__2 = i__;
+          y[i__2].r = 0.f, y[i__2].i = 0.f;
+          /* L10: */
+        }
+      } else {
+        i__1 = *n;
+        for (i__ = 1; i__ <= i__1; ++i__) {
+          i__2 = i__;
+          i__3 = i__;
+          q__1.r = beta->r * y[i__3].r - beta->i * y[i__3].i, q__1.i = beta->r * y[i__3].i + beta->i * y[i__3].r;
+          y[i__2].r = q__1.r, y[i__2].i = q__1.i;
+          /* L20: */
+        }
+      }
     } else {
-	kx = 1 - (*n - 1) * *incx;
+      iy = ky;
+      if (beta->r == 0.f && beta->i == 0.f) {
+        i__1 = *n;
+        for (i__ = 1; i__ <= i__1; ++i__) {
+          i__2 = iy;
+          y[i__2].r = 0.f, y[i__2].i = 0.f;
+          iy += *incy;
+          /* L30: */
+        }
+      } else {
+        i__1 = *n;
+        for (i__ = 1; i__ <= i__1; ++i__) {
+          i__2 = iy;
+          i__3 = iy;
+          q__1.r = beta->r * y[i__3].r - beta->i * y[i__3].i, q__1.i = beta->r * y[i__3].i + beta->i * y[i__3].r;
+          y[i__2].r = q__1.r, y[i__2].i = q__1.i;
+          iy += *incy;
+          /* L40: */
+        }
+      }
     }
-    if (*incy > 0) {
-	ky = 1;
+  }
+  if (alpha->r == 0.f && alpha->i == 0.f) {
+    return;
+  }
+  kk = 1;
+  if (lsame_(uplo, "U")) {
+    /*        Form  y  when AP contains the upper triangle. */
+
+    if (*incx == 1 && *incy == 1) {
+      i__1 = *n;
+      for (j = 1; j <= i__1; ++j) {
+        i__2 = j;
+        q__1.r = alpha->r * x[i__2].r - alpha->i * x[i__2].i, q__1.i = alpha->r * x[i__2].i + alpha->i * x[i__2].r;
+        temp1.r = q__1.r, temp1.i = q__1.i;
+        temp2.r = 0.f, temp2.i = 0.f;
+        k = kk;
+        i__2 = j - 1;
+        for (i__ = 1; i__ <= i__2; ++i__) {
+          i__3 = i__;
+          i__4 = i__;
+          i__5 = k;
+          q__2.r = temp1.r * ap[i__5].r - temp1.i * ap[i__5].i, q__2.i = temp1.r * ap[i__5].i + temp1.i * ap[i__5].r;
+          q__1.r = y[i__4].r + q__2.r, q__1.i = y[i__4].i + q__2.i;
+          y[i__3].r = q__1.r, y[i__3].i = q__1.i;
+          r_cnjg(&q__3, &ap[k]);
+          i__3 = i__;
+          q__2.r = q__3.r * x[i__3].r - q__3.i * x[i__3].i, q__2.i = q__3.r * x[i__3].i + q__3.i * x[i__3].r;
+          q__1.r = temp2.r + q__2.r, q__1.i = temp2.i + q__2.i;
+          temp2.r = q__1.r, temp2.i = q__1.i;
+          ++k;
+          /* L50: */
+        }
+        i__2 = j;
+        i__3 = j;
+        i__4 = kk + j - 1;
+        r__1 = ap[i__4].r;
+        q__3.r = r__1 * temp1.r, q__3.i = r__1 * temp1.i;
+        q__2.r = y[i__3].r + q__3.r, q__2.i = y[i__3].i + q__3.i;
+        q__4.r = alpha->r * temp2.r - alpha->i * temp2.i, q__4.i = alpha->r * temp2.i + alpha->i * temp2.r;
+        q__1.r = q__2.r + q__4.r, q__1.i = q__2.i + q__4.i;
+        y[i__2].r = q__1.r, y[i__2].i = q__1.i;
+        kk += j;
+        /* L60: */
+      }
     } else {
-	ky = 1 - (*n - 1) * *incy;
+      jx = kx;
+      jy = ky;
+      i__1 = *n;
+      for (j = 1; j <= i__1; ++j) {
+        i__2 = jx;
+        q__1.r = alpha->r * x[i__2].r - alpha->i * x[i__2].i, q__1.i = alpha->r * x[i__2].i + alpha->i * x[i__2].r;
+        temp1.r = q__1.r, temp1.i = q__1.i;
+        temp2.r = 0.f, temp2.i = 0.f;
+        ix = kx;
+        iy = ky;
+        i__2 = kk + j - 2;
+        for (k = kk; k <= i__2; ++k) {
+          i__3 = iy;
+          i__4 = iy;
+          i__5 = k;
+          q__2.r = temp1.r * ap[i__5].r - temp1.i * ap[i__5].i, q__2.i = temp1.r * ap[i__5].i + temp1.i * ap[i__5].r;
+          q__1.r = y[i__4].r + q__2.r, q__1.i = y[i__4].i + q__2.i;
+          y[i__3].r = q__1.r, y[i__3].i = q__1.i;
+          r_cnjg(&q__3, &ap[k]);
+          i__3 = ix;
+          q__2.r = q__3.r * x[i__3].r - q__3.i * x[i__3].i, q__2.i = q__3.r * x[i__3].i + q__3.i * x[i__3].r;
+          q__1.r = temp2.r + q__2.r, q__1.i = temp2.i + q__2.i;
+          temp2.r = q__1.r, temp2.i = q__1.i;
+          ix += *incx;
+          iy += *incy;
+          /* L70: */
+        }
+        i__2 = jy;
+        i__3 = jy;
+        i__4 = kk + j - 1;
+        r__1 = ap[i__4].r;
+        q__3.r = r__1 * temp1.r, q__3.i = r__1 * temp1.i;
+        q__2.r = y[i__3].r + q__3.r, q__2.i = y[i__3].i + q__3.i;
+        q__4.r = alpha->r * temp2.r - alpha->i * temp2.i, q__4.i = alpha->r * temp2.i + alpha->i * temp2.r;
+        q__1.r = q__2.r + q__4.r, q__1.i = q__2.i + q__4.i;
+        y[i__2].r = q__1.r, y[i__2].i = q__1.i;
+        jx += *incx;
+        jy += *incy;
+        kk += j;
+        /* L80: */
+      }
     }
+  } else {
+    /*        Form  y  when AP contains the lower triangle. */
 
-/*     Start the operations. In this version the elements of the array AP */
-/*     are accessed sequentially with one pass through AP. */
-
-/*     First form  y := beta*y. */
-
-    if (beta->r != 1.f || beta->i != 0.f) {
-	if (*incy == 1) {
-	    if (beta->r == 0.f && beta->i == 0.f) {
-		i__1 = *n;
-		for (i__ = 1; i__ <= i__1; ++i__) {
-		    i__2 = i__;
-		    y[i__2].r = 0.f, y[i__2].i = 0.f;
-/* L10: */
-		}
-	    } else {
-		i__1 = *n;
-		for (i__ = 1; i__ <= i__1; ++i__) {
-		    i__2 = i__;
-		    i__3 = i__;
-		    q__1.r = beta->r * y[i__3].r - beta->i * y[i__3].i, 
-			    q__1.i = beta->r * y[i__3].i + beta->i * y[i__3]
-			    .r;
-		    y[i__2].r = q__1.r, y[i__2].i = q__1.i;
-/* L20: */
-		}
-	    }
-	} else {
-	    iy = ky;
-	    if (beta->r == 0.f && beta->i == 0.f) {
-		i__1 = *n;
-		for (i__ = 1; i__ <= i__1; ++i__) {
-		    i__2 = iy;
-		    y[i__2].r = 0.f, y[i__2].i = 0.f;
-		    iy += *incy;
-/* L30: */
-		}
-	    } else {
-		i__1 = *n;
-		for (i__ = 1; i__ <= i__1; ++i__) {
-		    i__2 = iy;
-		    i__3 = iy;
-		    q__1.r = beta->r * y[i__3].r - beta->i * y[i__3].i, 
-			    q__1.i = beta->r * y[i__3].i + beta->i * y[i__3]
-			    .r;
-		    y[i__2].r = q__1.r, y[i__2].i = q__1.i;
-		    iy += *incy;
-/* L40: */
-		}
-	    }
-	}
-    }
-    if (alpha->r == 0.f && alpha->i == 0.f) {
-	return 0;
-    }
-    kk = 1;
-    if (lsame_(uplo, "U", (ftnlen)1, (ftnlen)1)) {
-
-/*        Form  y  when AP contains the upper triangle. */
-
-	if (*incx == 1 && *incy == 1) {
-	    i__1 = *n;
-	    for (j = 1; j <= i__1; ++j) {
-		i__2 = j;
-		q__1.r = alpha->r * x[i__2].r - alpha->i * x[i__2].i, q__1.i =
-			 alpha->r * x[i__2].i + alpha->i * x[i__2].r;
-		temp1.r = q__1.r, temp1.i = q__1.i;
-		temp2.r = 0.f, temp2.i = 0.f;
-		k = kk;
-		i__2 = j - 1;
-		for (i__ = 1; i__ <= i__2; ++i__) {
-		    i__3 = i__;
-		    i__4 = i__;
-		    i__5 = k;
-		    q__2.r = temp1.r * ap[i__5].r - temp1.i * ap[i__5].i, 
-			    q__2.i = temp1.r * ap[i__5].i + temp1.i * ap[i__5]
-			    .r;
-		    q__1.r = y[i__4].r + q__2.r, q__1.i = y[i__4].i + q__2.i;
-		    y[i__3].r = q__1.r, y[i__3].i = q__1.i;
-		    r_cnjg(&q__3, &ap[k]);
-		    i__3 = i__;
-		    q__2.r = q__3.r * x[i__3].r - q__3.i * x[i__3].i, q__2.i =
-			     q__3.r * x[i__3].i + q__3.i * x[i__3].r;
-		    q__1.r = temp2.r + q__2.r, q__1.i = temp2.i + q__2.i;
-		    temp2.r = q__1.r, temp2.i = q__1.i;
-		    ++k;
-/* L50: */
-		}
-		i__2 = j;
-		i__3 = j;
-		i__4 = kk + j - 1;
-		r__1 = ap[i__4].r;
-		q__3.r = r__1 * temp1.r, q__3.i = r__1 * temp1.i;
-		q__2.r = y[i__3].r + q__3.r, q__2.i = y[i__3].i + q__3.i;
-		q__4.r = alpha->r * temp2.r - alpha->i * temp2.i, q__4.i = 
-			alpha->r * temp2.i + alpha->i * temp2.r;
-		q__1.r = q__2.r + q__4.r, q__1.i = q__2.i + q__4.i;
-		y[i__2].r = q__1.r, y[i__2].i = q__1.i;
-		kk += j;
-/* L60: */
-	    }
-	} else {
-	    jx = kx;
-	    jy = ky;
-	    i__1 = *n;
-	    for (j = 1; j <= i__1; ++j) {
-		i__2 = jx;
-		q__1.r = alpha->r * x[i__2].r - alpha->i * x[i__2].i, q__1.i =
-			 alpha->r * x[i__2].i + alpha->i * x[i__2].r;
-		temp1.r = q__1.r, temp1.i = q__1.i;
-		temp2.r = 0.f, temp2.i = 0.f;
-		ix = kx;
-		iy = ky;
-		i__2 = kk + j - 2;
-		for (k = kk; k <= i__2; ++k) {
-		    i__3 = iy;
-		    i__4 = iy;
-		    i__5 = k;
-		    q__2.r = temp1.r * ap[i__5].r - temp1.i * ap[i__5].i, 
-			    q__2.i = temp1.r * ap[i__5].i + temp1.i * ap[i__5]
-			    .r;
-		    q__1.r = y[i__4].r + q__2.r, q__1.i = y[i__4].i + q__2.i;
-		    y[i__3].r = q__1.r, y[i__3].i = q__1.i;
-		    r_cnjg(&q__3, &ap[k]);
-		    i__3 = ix;
-		    q__2.r = q__3.r * x[i__3].r - q__3.i * x[i__3].i, q__2.i =
-			     q__3.r * x[i__3].i + q__3.i * x[i__3].r;
-		    q__1.r = temp2.r + q__2.r, q__1.i = temp2.i + q__2.i;
-		    temp2.r = q__1.r, temp2.i = q__1.i;
-		    ix += *incx;
-		    iy += *incy;
-/* L70: */
-		}
-		i__2 = jy;
-		i__3 = jy;
-		i__4 = kk + j - 1;
-		r__1 = ap[i__4].r;
-		q__3.r = r__1 * temp1.r, q__3.i = r__1 * temp1.i;
-		q__2.r = y[i__3].r + q__3.r, q__2.i = y[i__3].i + q__3.i;
-		q__4.r = alpha->r * temp2.r - alpha->i * temp2.i, q__4.i = 
-			alpha->r * temp2.i + alpha->i * temp2.r;
-		q__1.r = q__2.r + q__4.r, q__1.i = q__2.i + q__4.i;
-		y[i__2].r = q__1.r, y[i__2].i = q__1.i;
-		jx += *incx;
-		jy += *incy;
-		kk += j;
-/* L80: */
-	    }
-	}
+    if (*incx == 1 && *incy == 1) {
+      i__1 = *n;
+      for (j = 1; j <= i__1; ++j) {
+        i__2 = j;
+        q__1.r = alpha->r * x[i__2].r - alpha->i * x[i__2].i, q__1.i = alpha->r * x[i__2].i + alpha->i * x[i__2].r;
+        temp1.r = q__1.r, temp1.i = q__1.i;
+        temp2.r = 0.f, temp2.i = 0.f;
+        i__2 = j;
+        i__3 = j;
+        i__4 = kk;
+        r__1 = ap[i__4].r;
+        q__2.r = r__1 * temp1.r, q__2.i = r__1 * temp1.i;
+        q__1.r = y[i__3].r + q__2.r, q__1.i = y[i__3].i + q__2.i;
+        y[i__2].r = q__1.r, y[i__2].i = q__1.i;
+        k = kk + 1;
+        i__2 = *n;
+        for (i__ = j + 1; i__ <= i__2; ++i__) {
+          i__3 = i__;
+          i__4 = i__;
+          i__5 = k;
+          q__2.r = temp1.r * ap[i__5].r - temp1.i * ap[i__5].i, q__2.i = temp1.r * ap[i__5].i + temp1.i * ap[i__5].r;
+          q__1.r = y[i__4].r + q__2.r, q__1.i = y[i__4].i + q__2.i;
+          y[i__3].r = q__1.r, y[i__3].i = q__1.i;
+          r_cnjg(&q__3, &ap[k]);
+          i__3 = i__;
+          q__2.r = q__3.r * x[i__3].r - q__3.i * x[i__3].i, q__2.i = q__3.r * x[i__3].i + q__3.i * x[i__3].r;
+          q__1.r = temp2.r + q__2.r, q__1.i = temp2.i + q__2.i;
+          temp2.r = q__1.r, temp2.i = q__1.i;
+          ++k;
+          /* L90: */
+        }
+        i__2 = j;
+        i__3 = j;
+        q__2.r = alpha->r * temp2.r - alpha->i * temp2.i, q__2.i = alpha->r * temp2.i + alpha->i * temp2.r;
+        q__1.r = y[i__3].r + q__2.r, q__1.i = y[i__3].i + q__2.i;
+        y[i__2].r = q__1.r, y[i__2].i = q__1.i;
+        kk += *n - j + 1;
+        /* L100: */
+      }
     } else {
-
-/*        Form  y  when AP contains the lower triangle. */
-
-	if (*incx == 1 && *incy == 1) {
-	    i__1 = *n;
-	    for (j = 1; j <= i__1; ++j) {
-		i__2 = j;
-		q__1.r = alpha->r * x[i__2].r - alpha->i * x[i__2].i, q__1.i =
-			 alpha->r * x[i__2].i + alpha->i * x[i__2].r;
-		temp1.r = q__1.r, temp1.i = q__1.i;
-		temp2.r = 0.f, temp2.i = 0.f;
-		i__2 = j;
-		i__3 = j;
-		i__4 = kk;
-		r__1 = ap[i__4].r;
-		q__2.r = r__1 * temp1.r, q__2.i = r__1 * temp1.i;
-		q__1.r = y[i__3].r + q__2.r, q__1.i = y[i__3].i + q__2.i;
-		y[i__2].r = q__1.r, y[i__2].i = q__1.i;
-		k = kk + 1;
-		i__2 = *n;
-		for (i__ = j + 1; i__ <= i__2; ++i__) {
-		    i__3 = i__;
-		    i__4 = i__;
-		    i__5 = k;
-		    q__2.r = temp1.r * ap[i__5].r - temp1.i * ap[i__5].i, 
-			    q__2.i = temp1.r * ap[i__5].i + temp1.i * ap[i__5]
-			    .r;
-		    q__1.r = y[i__4].r + q__2.r, q__1.i = y[i__4].i + q__2.i;
-		    y[i__3].r = q__1.r, y[i__3].i = q__1.i;
-		    r_cnjg(&q__3, &ap[k]);
-		    i__3 = i__;
-		    q__2.r = q__3.r * x[i__3].r - q__3.i * x[i__3].i, q__2.i =
-			     q__3.r * x[i__3].i + q__3.i * x[i__3].r;
-		    q__1.r = temp2.r + q__2.r, q__1.i = temp2.i + q__2.i;
-		    temp2.r = q__1.r, temp2.i = q__1.i;
-		    ++k;
-/* L90: */
-		}
-		i__2 = j;
-		i__3 = j;
-		q__2.r = alpha->r * temp2.r - alpha->i * temp2.i, q__2.i = 
-			alpha->r * temp2.i + alpha->i * temp2.r;
-		q__1.r = y[i__3].r + q__2.r, q__1.i = y[i__3].i + q__2.i;
-		y[i__2].r = q__1.r, y[i__2].i = q__1.i;
-		kk += *n - j + 1;
-/* L100: */
-	    }
-	} else {
-	    jx = kx;
-	    jy = ky;
-	    i__1 = *n;
-	    for (j = 1; j <= i__1; ++j) {
-		i__2 = jx;
-		q__1.r = alpha->r * x[i__2].r - alpha->i * x[i__2].i, q__1.i =
-			 alpha->r * x[i__2].i + alpha->i * x[i__2].r;
-		temp1.r = q__1.r, temp1.i = q__1.i;
-		temp2.r = 0.f, temp2.i = 0.f;
-		i__2 = jy;
-		i__3 = jy;
-		i__4 = kk;
-		r__1 = ap[i__4].r;
-		q__2.r = r__1 * temp1.r, q__2.i = r__1 * temp1.i;
-		q__1.r = y[i__3].r + q__2.r, q__1.i = y[i__3].i + q__2.i;
-		y[i__2].r = q__1.r, y[i__2].i = q__1.i;
-		ix = jx;
-		iy = jy;
-		i__2 = kk + *n - j;
-		for (k = kk + 1; k <= i__2; ++k) {
-		    ix += *incx;
-		    iy += *incy;
-		    i__3 = iy;
-		    i__4 = iy;
-		    i__5 = k;
-		    q__2.r = temp1.r * ap[i__5].r - temp1.i * ap[i__5].i, 
-			    q__2.i = temp1.r * ap[i__5].i + temp1.i * ap[i__5]
-			    .r;
-		    q__1.r = y[i__4].r + q__2.r, q__1.i = y[i__4].i + q__2.i;
-		    y[i__3].r = q__1.r, y[i__3].i = q__1.i;
-		    r_cnjg(&q__3, &ap[k]);
-		    i__3 = ix;
-		    q__2.r = q__3.r * x[i__3].r - q__3.i * x[i__3].i, q__2.i =
-			     q__3.r * x[i__3].i + q__3.i * x[i__3].r;
-		    q__1.r = temp2.r + q__2.r, q__1.i = temp2.i + q__2.i;
-		    temp2.r = q__1.r, temp2.i = q__1.i;
-/* L110: */
-		}
-		i__2 = jy;
-		i__3 = jy;
-		q__2.r = alpha->r * temp2.r - alpha->i * temp2.i, q__2.i = 
-			alpha->r * temp2.i + alpha->i * temp2.r;
-		q__1.r = y[i__3].r + q__2.r, q__1.i = y[i__3].i + q__2.i;
-		y[i__2].r = q__1.r, y[i__2].i = q__1.i;
-		jx += *incx;
-		jy += *incy;
-		kk += *n - j + 1;
-/* L120: */
-	    }
-	}
+      jx = kx;
+      jy = ky;
+      i__1 = *n;
+      for (j = 1; j <= i__1; ++j) {
+        i__2 = jx;
+        q__1.r = alpha->r * x[i__2].r - alpha->i * x[i__2].i, q__1.i = alpha->r * x[i__2].i + alpha->i * x[i__2].r;
+        temp1.r = q__1.r, temp1.i = q__1.i;
+        temp2.r = 0.f, temp2.i = 0.f;
+        i__2 = jy;
+        i__3 = jy;
+        i__4 = kk;
+        r__1 = ap[i__4].r;
+        q__2.r = r__1 * temp1.r, q__2.i = r__1 * temp1.i;
+        q__1.r = y[i__3].r + q__2.r, q__1.i = y[i__3].i + q__2.i;
+        y[i__2].r = q__1.r, y[i__2].i = q__1.i;
+        ix = jx;
+        iy = jy;
+        i__2 = kk + *n - j;
+        for (k = kk + 1; k <= i__2; ++k) {
+          ix += *incx;
+          iy += *incy;
+          i__3 = iy;
+          i__4 = iy;
+          i__5 = k;
+          q__2.r = temp1.r * ap[i__5].r - temp1.i * ap[i__5].i, q__2.i = temp1.r * ap[i__5].i + temp1.i * ap[i__5].r;
+          q__1.r = y[i__4].r + q__2.r, q__1.i = y[i__4].i + q__2.i;
+          y[i__3].r = q__1.r, y[i__3].i = q__1.i;
+          r_cnjg(&q__3, &ap[k]);
+          i__3 = ix;
+          q__2.r = q__3.r * x[i__3].r - q__3.i * x[i__3].i, q__2.i = q__3.r * x[i__3].i + q__3.i * x[i__3].r;
+          q__1.r = temp2.r + q__2.r, q__1.i = temp2.i + q__2.i;
+          temp2.r = q__1.r, temp2.i = q__1.i;
+          /* L110: */
+        }
+        i__2 = jy;
+        i__3 = jy;
+        q__2.r = alpha->r * temp2.r - alpha->i * temp2.i, q__2.i = alpha->r * temp2.i + alpha->i * temp2.r;
+        q__1.r = y[i__3].r + q__2.r, q__1.i = y[i__3].i + q__2.i;
+        y[i__2].r = q__1.r, y[i__2].i = q__1.i;
+        jx += *incx;
+        jy += *incy;
+        kk += *n - j + 1;
+        /* L120: */
+      }
     }
+  }
 
-    return 0;
-
-/*     End of CHPMV . */
+  /*     End of CHPMV . */
 
 } /* chpmv_ */
-

diff --git a/blas/f2c/complexdots.c b/blas/f2c/complexdots.c
index a856a23..b0dc6c8 100644
--- a/blas/f2c/complexdots.c
+++ b/blas/f2c/complexdots.c

@@ -6,79 +6,68 @@
 
 /* complexdots.f -- translated by f2c (version 20100827).
    You must link the resulting object file with libf2c:
-	on Microsoft Windows system, link with libf2c.lib;
-	on Linux or Unix systems, link with .../path/to/libf2c.a -lm
-	or, if you install libf2c.a in a standard place, with -lf2c -lm
-	-- in that order, at the end of the command line, as in
-		cc *.o -lf2c -lm
-	Source for libf2c is in /netlib/f2c/libf2c.zip, e.g.,
+        on Microsoft Windows system, link with libf2c.lib;
+        on Linux or Unix systems, link with .../path/to/libf2c.a -lm
+        or, if you install libf2c.a in a standard place, with -lf2c -lm
+        -- in that order, at the end of the command line, as in
+                cc *.o -lf2c -lm
+        Source for libf2c is in /netlib/f2c/libf2c.zip, e.g.,
 
-		http://www.netlib.org/f2c/libf2c.zip
+                http://www.netlib.org/f2c/libf2c.zip
 */
 
 #include "datatypes.h"
 
-complex cdotc_(integer *n, complex *cx, integer 
-	*incx, complex *cy, integer *incy)
-{
-    complex res;
-    extern /* Subroutine */ int cdotcw_(integer *, complex *, integer *, 
-	    complex *, integer *, complex *);
+complex cdotc_(integer *n, complex *cx, integer *incx, complex *cy, integer *incy) {
+  complex res;
+  extern /* Subroutine */ void cdotcw_(integer *, complex *, integer *, complex *, integer *, complex *);
 
-    /* Parameter adjustments */
-    --cy;
-    --cx;
+  /* Parameter adjustments */
+  --cy;
+  --cx;
 
-    /* Function Body */
-    cdotcw_(n, &cx[1], incx, &cy[1], incy, &res);
-    return res;
+  /* Function Body */
+  cdotcw_(n, &cx[1], incx, &cy[1], incy, &res);
+  return res;
 } /* cdotc_ */
 
-complex cdotu_(integer *n, complex *cx, integer 
-	*incx, complex *cy, integer *incy)
-{
-    complex res;
-    extern /* Subroutine */ int cdotuw_(integer *, complex *, integer *, 
-	    complex *, integer *, complex *);
+complex cdotu_(integer *n, complex *cx, integer *incx, complex *cy, integer *incy) {
+  complex res;
+  extern /* Subroutine */ void cdotuw_(integer *, complex *, integer *, complex *, integer *, complex *);
 
-    /* Parameter adjustments */
-    --cy;
-    --cx;
+  /* Parameter adjustments */
+  --cy;
+  --cx;
 
-    /* Function Body */
-    cdotuw_(n, &cx[1], incx, &cy[1], incy, &res);
-    return res;
+  /* Function Body */
+  cdotuw_(n, &cx[1], incx, &cy[1], incy, &res);
+  return res;
 } /* cdotu_ */
 
-doublecomplex zdotc_(integer *n, doublecomplex *cx, integer *incx, 
-                     doublecomplex *cy, integer *incy)
-{
-    doublecomplex res;
-    extern /* Subroutine */ int zdotcw_(integer *, doublecomplex *, integer *,
-	     doublecomplex *, integer *, doublecomplex *);
+doublecomplex zdotc_(integer *n, doublecomplex *cx, integer *incx, doublecomplex *cy, integer *incy) {
+  doublecomplex res;
+  extern /* Subroutine */ void zdotcw_(integer *, doublecomplex *, integer *, doublecomplex *, integer *,
+                                       doublecomplex *);
 
-    /* Parameter adjustments */
-    --cy;
-    --cx;
+  /* Parameter adjustments */
+  --cy;
+  --cx;
 
-    /* Function Body */
-    zdotcw_(n, &cx[1], incx, &cy[1], incy, &res);
-    return res;
+  /* Function Body */
+  zdotcw_(n, &cx[1], incx, &cy[1], incy, &res);
+  return res;
 } /* zdotc_ */
 
-doublecomplex zdotu_(integer *n, doublecomplex *cx, integer *incx, 
-                     doublecomplex *cy, integer *incy)
-{
-    doublecomplex res;
-    extern /* Subroutine */ int zdotuw_(integer *, doublecomplex *, integer *,
-	     doublecomplex *, integer *, doublecomplex *);
+doublecomplex zdotu_(integer *n, doublecomplex *cx, integer *incx, doublecomplex *cy, integer *incy) {
+  doublecomplex res;
+  extern /* Subroutine */ void zdotuw_(integer *, doublecomplex *, integer *, doublecomplex *, integer *,
+                                       doublecomplex *);
 
-    /* Parameter adjustments */
-    --cy;
-    --cx;
+  /* Parameter adjustments */
+  --cy;
+  --cx;
 
-    /* Function Body */
-    zdotuw_(n, &cx[1], incx, &cy[1], incy, &res);
-    return res;
+  /* Function Body */
+  zdotuw_(n, &cx[1], incx, &cy[1], incy, &res);
+  return res;
 } /* zdotu_ */
-

diff --git a/blas/f2c/ctbmv.c b/blas/f2c/ctbmv.c
index a6e0dae..2c0ce9b 100644
--- a/blas/f2c/ctbmv.c
+++ b/blas/f2c/ctbmv.c

@@ -1,647 +1,586 @@
 /* ctbmv.f -- translated by f2c (version 20100827).
    You must link the resulting object file with libf2c:
-	on Microsoft Windows system, link with libf2c.lib;
-	on Linux or Unix systems, link with .../path/to/libf2c.a -lm
-	or, if you install libf2c.a in a standard place, with -lf2c -lm
-	-- in that order, at the end of the command line, as in
-		cc *.o -lf2c -lm
-	Source for libf2c is in /netlib/f2c/libf2c.zip, e.g.,
+        on Microsoft Windows system, link with libf2c.lib;
+        on Linux or Unix systems, link with .../path/to/libf2c.a -lm
+        or, if you install libf2c.a in a standard place, with -lf2c -lm
+        -- in that order, at the end of the command line, as in
+                cc *.o -lf2c -lm
+        Source for libf2c is in /netlib/f2c/libf2c.zip, e.g.,
 
-		http://www.netlib.org/f2c/libf2c.zip
+                http://www.netlib.org/f2c/libf2c.zip
 */
 
 #include "datatypes.h"
 
-/* Subroutine */ int ctbmv_(char *uplo, char *trans, char *diag, integer *n, 
-	integer *k, complex *a, integer *lda, complex *x, integer *incx, 
-	ftnlen uplo_len, ftnlen trans_len, ftnlen diag_len)
-{
-    /* System generated locals */
-    integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5;
-    complex q__1, q__2, q__3;
+static inline void r_cnjg(complex *r, complex *z) {
+  r->r = z->r;
+  r->i = -(z->i);
+}
 
-    /* Builtin functions */
-    void r_cnjg(complex *, complex *);
+/* Subroutine */ void ctbmv_(char *uplo, char *trans, char *diag, integer *n, integer *k, complex *a, integer *lda,
+                             complex *x, integer *incx) {
+  /* System generated locals */
+  integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5;
+  complex q__1, q__2, q__3;
 
-    /* Local variables */
-    integer i__, j, l, ix, jx, kx, info;
-    complex temp;
-    extern logical lsame_(char *, char *, ftnlen, ftnlen);
-    integer kplus1;
-    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
-    logical noconj, nounit;
+  /* Local variables */
+  integer i__, j, l, ix, jx, kx, info;
+  complex temp;
+  extern logical lsame_(char *, char *);
+  integer kplus1;
+  extern /* Subroutine */ void xerbla_(const char *, integer *);
+  logical noconj, nounit;
 
-/*     .. Scalar Arguments .. */
-/*     .. */
-/*     .. Array Arguments .. */
-/*     .. */
+  /*     .. Scalar Arguments .. */
+  /*     .. */
+  /*     .. Array Arguments .. */
+  /*     .. */
 
-/*  Purpose */
-/*  ======= */
+  /*  Purpose */
+  /*  ======= */
 
-/*  CTBMV  performs one of the matrix-vector operations */
+  /*  CTBMV  performs one of the matrix-vector operations */
 
-/*     x := A*x,   or   x := A'*x,   or   x := conjg( A' )*x, */
+  /*     x := A*x,   or   x := A'*x,   or   x := conjg( A' )*x, */
 
-/*  where x is an n element vector and  A is an n by n unit, or non-unit, */
-/*  upper or lower triangular band matrix, with ( k + 1 ) diagonals. */
+  /*  where x is an n element vector and  A is an n by n unit, or non-unit, */
+  /*  upper or lower triangular band matrix, with ( k + 1 ) diagonals. */
 
-/*  Arguments */
-/*  ========== */
+  /*  Arguments */
+  /*  ========== */
 
-/*  UPLO   - CHARACTER*1. */
-/*           On entry, UPLO specifies whether the matrix is an upper or */
-/*           lower triangular matrix as follows: */
+  /*  UPLO   - CHARACTER*1. */
+  /*           On entry, UPLO specifies whether the matrix is an upper or */
+  /*           lower triangular matrix as follows: */
 
-/*              UPLO = 'U' or 'u'   A is an upper triangular matrix. */
+  /*              UPLO = 'U' or 'u'   A is an upper triangular matrix. */
 
-/*              UPLO = 'L' or 'l'   A is a lower triangular matrix. */
+  /*              UPLO = 'L' or 'l'   A is a lower triangular matrix. */
 
-/*           Unchanged on exit. */
+  /*           Unchanged on exit. */
 
-/*  TRANS  - CHARACTER*1. */
-/*           On entry, TRANS specifies the operation to be performed as */
-/*           follows: */
+  /*  TRANS  - CHARACTER*1. */
+  /*           On entry, TRANS specifies the operation to be performed as */
+  /*           follows: */
 
-/*              TRANS = 'N' or 'n'   x := A*x. */
+  /*              TRANS = 'N' or 'n'   x := A*x. */
 
-/*              TRANS = 'T' or 't'   x := A'*x. */
+  /*              TRANS = 'T' or 't'   x := A'*x. */
 
-/*              TRANS = 'C' or 'c'   x := conjg( A' )*x. */
+  /*              TRANS = 'C' or 'c'   x := conjg( A' )*x. */
 
-/*           Unchanged on exit. */
+  /*           Unchanged on exit. */
 
-/*  DIAG   - CHARACTER*1. */
-/*           On entry, DIAG specifies whether or not A is unit */
-/*           triangular as follows: */
+  /*  DIAG   - CHARACTER*1. */
+  /*           On entry, DIAG specifies whether or not A is unit */
+  /*           triangular as follows: */
 
-/*              DIAG = 'U' or 'u'   A is assumed to be unit triangular. */
+  /*              DIAG = 'U' or 'u'   A is assumed to be unit triangular. */
 
-/*              DIAG = 'N' or 'n'   A is not assumed to be unit */
-/*                                  triangular. */
+  /*              DIAG = 'N' or 'n'   A is not assumed to be unit */
+  /*                                  triangular. */
 
-/*           Unchanged on exit. */
+  /*           Unchanged on exit. */
 
-/*  N      - INTEGER. */
-/*           On entry, N specifies the order of the matrix A. */
-/*           N must be at least zero. */
-/*           Unchanged on exit. */
+  /*  N      - INTEGER. */
+  /*           On entry, N specifies the order of the matrix A. */
+  /*           N must be at least zero. */
+  /*           Unchanged on exit. */
 
-/*  K      - INTEGER. */
-/*           On entry with UPLO = 'U' or 'u', K specifies the number of */
-/*           super-diagonals of the matrix A. */
-/*           On entry with UPLO = 'L' or 'l', K specifies the number of */
-/*           sub-diagonals of the matrix A. */
-/*           K must satisfy  0 .le. K. */
-/*           Unchanged on exit. */
+  /*  K      - INTEGER. */
+  /*           On entry with UPLO = 'U' or 'u', K specifies the number of */
+  /*           super-diagonals of the matrix A. */
+  /*           On entry with UPLO = 'L' or 'l', K specifies the number of */
+  /*           sub-diagonals of the matrix A. */
+  /*           K must satisfy  0 .le. K. */
+  /*           Unchanged on exit. */
 
-/*  A      - COMPLEX          array of DIMENSION ( LDA, n ). */
-/*           Before entry with UPLO = 'U' or 'u', the leading ( k + 1 ) */
-/*           by n part of the array A must contain the upper triangular */
-/*           band part of the matrix of coefficients, supplied column by */
-/*           column, with the leading diagonal of the matrix in row */
-/*           ( k + 1 ) of the array, the first super-diagonal starting at */
-/*           position 2 in row k, and so on. The top left k by k triangle */
-/*           of the array A is not referenced. */
-/*           The following program segment will transfer an upper */
-/*           triangular band matrix from conventional full matrix storage */
-/*           to band storage: */
+  /*  A      - COMPLEX          array of DIMENSION ( LDA, n ). */
+  /*           Before entry with UPLO = 'U' or 'u', the leading ( k + 1 ) */
+  /*           by n part of the array A must contain the upper triangular */
+  /*           band part of the matrix of coefficients, supplied column by */
+  /*           column, with the leading diagonal of the matrix in row */
+  /*           ( k + 1 ) of the array, the first super-diagonal starting at */
+  /*           position 2 in row k, and so on. The top left k by k triangle */
+  /*           of the array A is not referenced. */
+  /*           The following program segment will transfer an upper */
+  /*           triangular band matrix from conventional full matrix storage */
+  /*           to band storage: */
 
-/*                 DO 20, J = 1, N */
-/*                    M = K + 1 - J */
-/*                    DO 10, I = MAX( 1, J - K ), J */
-/*                       A( M + I, J ) = matrix( I, J ) */
-/*              10    CONTINUE */
-/*              20 CONTINUE */
+  /*                 DO 20, J = 1, N */
+  /*                    M = K + 1 - J */
+  /*                    DO 10, I = MAX( 1, J - K ), J */
+  /*                       A( M + I, J ) = matrix( I, J ) */
+  /*              10    CONTINUE */
+  /*              20 CONTINUE */
 
-/*           Before entry with UPLO = 'L' or 'l', the leading ( k + 1 ) */
-/*           by n part of the array A must contain the lower triangular */
-/*           band part of the matrix of coefficients, supplied column by */
-/*           column, with the leading diagonal of the matrix in row 1 of */
-/*           the array, the first sub-diagonal starting at position 1 in */
-/*           row 2, and so on. The bottom right k by k triangle of the */
-/*           array A is not referenced. */
-/*           The following program segment will transfer a lower */
-/*           triangular band matrix from conventional full matrix storage */
-/*           to band storage: */
+  /*           Before entry with UPLO = 'L' or 'l', the leading ( k + 1 ) */
+  /*           by n part of the array A must contain the lower triangular */
+  /*           band part of the matrix of coefficients, supplied column by */
+  /*           column, with the leading diagonal of the matrix in row 1 of */
+  /*           the array, the first sub-diagonal starting at position 1 in */
+  /*           row 2, and so on. The bottom right k by k triangle of the */
+  /*           array A is not referenced. */
+  /*           The following program segment will transfer a lower */
+  /*           triangular band matrix from conventional full matrix storage */
+  /*           to band storage: */
 
-/*                 DO 20, J = 1, N */
-/*                    M = 1 - J */
-/*                    DO 10, I = J, MIN( N, J + K ) */
-/*                       A( M + I, J ) = matrix( I, J ) */
-/*              10    CONTINUE */
-/*              20 CONTINUE */
+  /*                 DO 20, J = 1, N */
+  /*                    M = 1 - J */
+  /*                    DO 10, I = J, MIN( N, J + K ) */
+  /*                       A( M + I, J ) = matrix( I, J ) */
+  /*              10    CONTINUE */
+  /*              20 CONTINUE */
 
-/*           Note that when DIAG = 'U' or 'u' the elements of the array A */
-/*           corresponding to the diagonal elements of the matrix are not */
-/*           referenced, but are assumed to be unity. */
-/*           Unchanged on exit. */
+  /*           Note that when DIAG = 'U' or 'u' the elements of the array A */
+  /*           corresponding to the diagonal elements of the matrix are not */
+  /*           referenced, but are assumed to be unity. */
+  /*           Unchanged on exit. */
 
-/*  LDA    - INTEGER. */
-/*           On entry, LDA specifies the first dimension of A as declared */
-/*           in the calling (sub) program. LDA must be at least */
-/*           ( k + 1 ). */
-/*           Unchanged on exit. */
+  /*  LDA    - INTEGER. */
+  /*           On entry, LDA specifies the first dimension of A as declared */
+  /*           in the calling (sub) program. LDA must be at least */
+  /*           ( k + 1 ). */
+  /*           Unchanged on exit. */
 
-/*  X      - COMPLEX          array of dimension at least */
-/*           ( 1 + ( n - 1 )*abs( INCX ) ). */
-/*           Before entry, the incremented array X must contain the n */
-/*           element vector x. On exit, X is overwritten with the */
-/*           transformed vector x. */
+  /*  X      - COMPLEX          array of dimension at least */
+  /*           ( 1 + ( n - 1 )*abs( INCX ) ). */
+  /*           Before entry, the incremented array X must contain the n */
+  /*           element vector x. On exit, X is overwritten with the */
+  /*           transformed vector x. */
 
-/*  INCX   - INTEGER. */
-/*           On entry, INCX specifies the increment for the elements of */
-/*           X. INCX must not be zero. */
-/*           Unchanged on exit. */
+  /*  INCX   - INTEGER. */
+  /*           On entry, INCX specifies the increment for the elements of */
+  /*           X. INCX must not be zero. */
+  /*           Unchanged on exit. */
 
-/*  Further Details */
-/*  =============== */
+  /*  Further Details */
+  /*  =============== */
 
-/*  Level 2 Blas routine. */
+  /*  Level 2 Blas routine. */
 
-/*  -- Written on 22-October-1986. */
-/*     Jack Dongarra, Argonne National Lab. */
-/*     Jeremy Du Croz, Nag Central Office. */
-/*     Sven Hammarling, Nag Central Office. */
-/*     Richard Hanson, Sandia National Labs. */
+  /*  -- Written on 22-October-1986. */
+  /*     Jack Dongarra, Argonne National Lab. */
+  /*     Jeremy Du Croz, Nag Central Office. */
+  /*     Sven Hammarling, Nag Central Office. */
+  /*     Richard Hanson, Sandia National Labs. */
 
-/*  ===================================================================== */
+  /*  ===================================================================== */
 
-/*     .. Parameters .. */
-/*     .. */
-/*     .. Local Scalars .. */
-/*     .. */
-/*     .. External Functions .. */
-/*     .. */
-/*     .. External Subroutines .. */
-/*     .. */
-/*     .. Intrinsic Functions .. */
-/*     .. */
+  /*     .. Parameters .. */
+  /*     .. */
+  /*     .. Local Scalars .. */
+  /*     .. */
+  /*     .. External Functions .. */
+  /*     .. */
+  /*     .. External Subroutines .. */
+  /*     .. */
+  /*     .. Intrinsic Functions .. */
+  /*     .. */
 
-/*     Test the input parameters. */
+  /*     Test the input parameters. */
 
-    /* Parameter adjustments */
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1;
-    a -= a_offset;
-    --x;
+  /* Parameter adjustments */
+  a_dim1 = *lda;
+  a_offset = 1 + a_dim1;
+  a -= a_offset;
+  --x;
 
-    /* Function Body */
-    info = 0;
-    if (! lsame_(uplo, "U", (ftnlen)1, (ftnlen)1) && ! lsame_(uplo, "L", (
-	    ftnlen)1, (ftnlen)1)) {
-	info = 1;
-    } else if (! lsame_(trans, "N", (ftnlen)1, (ftnlen)1) && ! lsame_(trans, 
-	    "T", (ftnlen)1, (ftnlen)1) && ! lsame_(trans, "C", (ftnlen)1, (
-	    ftnlen)1)) {
-	info = 2;
-    } else if (! lsame_(diag, "U", (ftnlen)1, (ftnlen)1) && ! lsame_(diag, 
-	    "N", (ftnlen)1, (ftnlen)1)) {
-	info = 3;
-    } else if (*n < 0) {
-	info = 4;
-    } else if (*k < 0) {
-	info = 5;
-    } else if (*lda < *k + 1) {
-	info = 7;
-    } else if (*incx == 0) {
-	info = 9;
-    }
-    if (info != 0) {
-	xerbla_("CTBMV ", &info, (ftnlen)6);
-	return 0;
-    }
+  /* Function Body */
+  info = 0;
+  if (!lsame_(uplo, "U") && !lsame_(uplo, "L")) {
+    info = 1;
+  } else if (!lsame_(trans, "N") && !lsame_(trans, "T") && !lsame_(trans, "C")) {
+    info = 2;
+  } else if (!lsame_(diag, "U") && !lsame_(diag, "N")) {
+    info = 3;
+  } else if (*n < 0) {
+    info = 4;
+  } else if (*k < 0) {
+    info = 5;
+  } else if (*lda < *k + 1) {
+    info = 7;
+  } else if (*incx == 0) {
+    info = 9;
+  }
+  if (info != 0) {
+    xerbla_("CTBMV ", &info);
+    return;
+  }
 
-/*     Quick return if possible. */
+  /*     Quick return if possible. */
 
-    if (*n == 0) {
-	return 0;
-    }
+  if (*n == 0) {
+    return;
+  }
 
-    noconj = lsame_(trans, "T", (ftnlen)1, (ftnlen)1);
-    nounit = lsame_(diag, "N", (ftnlen)1, (ftnlen)1);
+  noconj = lsame_(trans, "T");
+  nounit = lsame_(diag, "N");
 
-/*     Set up the start point in X if the increment is not unity. This */
-/*     will be  ( N - 1 )*INCX   too small for descending loops. */
+  /*     Set up the start point in X if the increment is not unity. This */
+  /*     will be  ( N - 1 )*INCX   too small for descending loops. */
 
-    if (*incx <= 0) {
-	kx = 1 - (*n - 1) * *incx;
-    } else if (*incx != 1) {
-	kx = 1;
-    }
+  if (*incx <= 0) {
+    kx = 1 - (*n - 1) * *incx;
+  } else if (*incx != 1) {
+    kx = 1;
+  }
 
-/*     Start the operations. In this version the elements of A are */
-/*     accessed sequentially with one pass through A. */
+  /*     Start the operations. In this version the elements of A are */
+  /*     accessed sequentially with one pass through A. */
 
-    if (lsame_(trans, "N", (ftnlen)1, (ftnlen)1)) {
+  if (lsame_(trans, "N")) {
+    /*         Form  x := A*x. */
 
-/*         Form  x := A*x. */
-
-	if (lsame_(uplo, "U", (ftnlen)1, (ftnlen)1)) {
-	    kplus1 = *k + 1;
-	    if (*incx == 1) {
-		i__1 = *n;
-		for (j = 1; j <= i__1; ++j) {
-		    i__2 = j;
-		    if (x[i__2].r != 0.f || x[i__2].i != 0.f) {
-			i__2 = j;
-			temp.r = x[i__2].r, temp.i = x[i__2].i;
-			l = kplus1 - j;
-/* Computing MAX */
-			i__2 = 1, i__3 = j - *k;
-			i__4 = j - 1;
-			for (i__ = max(i__2,i__3); i__ <= i__4; ++i__) {
-			    i__2 = i__;
-			    i__3 = i__;
-			    i__5 = l + i__ + j * a_dim1;
-			    q__2.r = temp.r * a[i__5].r - temp.i * a[i__5].i, 
-				    q__2.i = temp.r * a[i__5].i + temp.i * a[
-				    i__5].r;
-			    q__1.r = x[i__3].r + q__2.r, q__1.i = x[i__3].i + 
-				    q__2.i;
-			    x[i__2].r = q__1.r, x[i__2].i = q__1.i;
-/* L10: */
-			}
-			if (nounit) {
-			    i__4 = j;
-			    i__2 = j;
-			    i__3 = kplus1 + j * a_dim1;
-			    q__1.r = x[i__2].r * a[i__3].r - x[i__2].i * a[
-				    i__3].i, q__1.i = x[i__2].r * a[i__3].i + 
-				    x[i__2].i * a[i__3].r;
-			    x[i__4].r = q__1.r, x[i__4].i = q__1.i;
-			}
-		    }
-/* L20: */
-		}
-	    } else {
-		jx = kx;
-		i__1 = *n;
-		for (j = 1; j <= i__1; ++j) {
-		    i__4 = jx;
-		    if (x[i__4].r != 0.f || x[i__4].i != 0.f) {
-			i__4 = jx;
-			temp.r = x[i__4].r, temp.i = x[i__4].i;
-			ix = kx;
-			l = kplus1 - j;
-/* Computing MAX */
-			i__4 = 1, i__2 = j - *k;
-			i__3 = j - 1;
-			for (i__ = max(i__4,i__2); i__ <= i__3; ++i__) {
-			    i__4 = ix;
-			    i__2 = ix;
-			    i__5 = l + i__ + j * a_dim1;
-			    q__2.r = temp.r * a[i__5].r - temp.i * a[i__5].i, 
-				    q__2.i = temp.r * a[i__5].i + temp.i * a[
-				    i__5].r;
-			    q__1.r = x[i__2].r + q__2.r, q__1.i = x[i__2].i + 
-				    q__2.i;
-			    x[i__4].r = q__1.r, x[i__4].i = q__1.i;
-			    ix += *incx;
-/* L30: */
-			}
-			if (nounit) {
-			    i__3 = jx;
-			    i__4 = jx;
-			    i__2 = kplus1 + j * a_dim1;
-			    q__1.r = x[i__4].r * a[i__2].r - x[i__4].i * a[
-				    i__2].i, q__1.i = x[i__4].r * a[i__2].i + 
-				    x[i__4].i * a[i__2].r;
-			    x[i__3].r = q__1.r, x[i__3].i = q__1.i;
-			}
-		    }
-		    jx += *incx;
-		    if (j > *k) {
-			kx += *incx;
-		    }
-/* L40: */
-		}
-	    }
-	} else {
-	    if (*incx == 1) {
-		for (j = *n; j >= 1; --j) {
-		    i__1 = j;
-		    if (x[i__1].r != 0.f || x[i__1].i != 0.f) {
-			i__1 = j;
-			temp.r = x[i__1].r, temp.i = x[i__1].i;
-			l = 1 - j;
-/* Computing MIN */
-			i__1 = *n, i__3 = j + *k;
-			i__4 = j + 1;
-			for (i__ = min(i__1,i__3); i__ >= i__4; --i__) {
-			    i__1 = i__;
-			    i__3 = i__;
-			    i__2 = l + i__ + j * a_dim1;
-			    q__2.r = temp.r * a[i__2].r - temp.i * a[i__2].i, 
-				    q__2.i = temp.r * a[i__2].i + temp.i * a[
-				    i__2].r;
-			    q__1.r = x[i__3].r + q__2.r, q__1.i = x[i__3].i + 
-				    q__2.i;
-			    x[i__1].r = q__1.r, x[i__1].i = q__1.i;
-/* L50: */
-			}
-			if (nounit) {
-			    i__4 = j;
-			    i__1 = j;
-			    i__3 = j * a_dim1 + 1;
-			    q__1.r = x[i__1].r * a[i__3].r - x[i__1].i * a[
-				    i__3].i, q__1.i = x[i__1].r * a[i__3].i + 
-				    x[i__1].i * a[i__3].r;
-			    x[i__4].r = q__1.r, x[i__4].i = q__1.i;
-			}
-		    }
-/* L60: */
-		}
-	    } else {
-		kx += (*n - 1) * *incx;
-		jx = kx;
-		for (j = *n; j >= 1; --j) {
-		    i__4 = jx;
-		    if (x[i__4].r != 0.f || x[i__4].i != 0.f) {
-			i__4 = jx;
-			temp.r = x[i__4].r, temp.i = x[i__4].i;
-			ix = kx;
-			l = 1 - j;
-/* Computing MIN */
-			i__4 = *n, i__1 = j + *k;
-			i__3 = j + 1;
-			for (i__ = min(i__4,i__1); i__ >= i__3; --i__) {
-			    i__4 = ix;
-			    i__1 = ix;
-			    i__2 = l + i__ + j * a_dim1;
-			    q__2.r = temp.r * a[i__2].r - temp.i * a[i__2].i, 
-				    q__2.i = temp.r * a[i__2].i + temp.i * a[
-				    i__2].r;
-			    q__1.r = x[i__1].r + q__2.r, q__1.i = x[i__1].i + 
-				    q__2.i;
-			    x[i__4].r = q__1.r, x[i__4].i = q__1.i;
-			    ix -= *incx;
-/* L70: */
-			}
-			if (nounit) {
-			    i__3 = jx;
-			    i__4 = jx;
-			    i__1 = j * a_dim1 + 1;
-			    q__1.r = x[i__4].r * a[i__1].r - x[i__4].i * a[
-				    i__1].i, q__1.i = x[i__4].r * a[i__1].i + 
-				    x[i__4].i * a[i__1].r;
-			    x[i__3].r = q__1.r, x[i__3].i = q__1.i;
-			}
-		    }
-		    jx -= *incx;
-		    if (*n - j >= *k) {
-			kx -= *incx;
-		    }
-/* L80: */
-		}
-	    }
-	}
+    if (lsame_(uplo, "U")) {
+      kplus1 = *k + 1;
+      if (*incx == 1) {
+        i__1 = *n;
+        for (j = 1; j <= i__1; ++j) {
+          i__2 = j;
+          if (x[i__2].r != 0.f || x[i__2].i != 0.f) {
+            i__2 = j;
+            temp.r = x[i__2].r, temp.i = x[i__2].i;
+            l = kplus1 - j;
+            /* Computing MAX */
+            i__2 = 1, i__3 = j - *k;
+            i__4 = j - 1;
+            for (i__ = max(i__2, i__3); i__ <= i__4; ++i__) {
+              i__2 = i__;
+              i__3 = i__;
+              i__5 = l + i__ + j * a_dim1;
+              q__2.r = temp.r * a[i__5].r - temp.i * a[i__5].i, q__2.i = temp.r * a[i__5].i + temp.i * a[i__5].r;
+              q__1.r = x[i__3].r + q__2.r, q__1.i = x[i__3].i + q__2.i;
+              x[i__2].r = q__1.r, x[i__2].i = q__1.i;
+              /* L10: */
+            }
+            if (nounit) {
+              i__4 = j;
+              i__2 = j;
+              i__3 = kplus1 + j * a_dim1;
+              q__1.r = x[i__2].r * a[i__3].r - x[i__2].i * a[i__3].i,
+              q__1.i = x[i__2].r * a[i__3].i + x[i__2].i * a[i__3].r;
+              x[i__4].r = q__1.r, x[i__4].i = q__1.i;
+            }
+          }
+          /* L20: */
+        }
+      } else {
+        jx = kx;
+        i__1 = *n;
+        for (j = 1; j <= i__1; ++j) {
+          i__4 = jx;
+          if (x[i__4].r != 0.f || x[i__4].i != 0.f) {
+            i__4 = jx;
+            temp.r = x[i__4].r, temp.i = x[i__4].i;
+            ix = kx;
+            l = kplus1 - j;
+            /* Computing MAX */
+            i__4 = 1, i__2 = j - *k;
+            i__3 = j - 1;
+            for (i__ = max(i__4, i__2); i__ <= i__3; ++i__) {
+              i__4 = ix;
+              i__2 = ix;
+              i__5 = l + i__ + j * a_dim1;
+              q__2.r = temp.r * a[i__5].r - temp.i * a[i__5].i, q__2.i = temp.r * a[i__5].i + temp.i * a[i__5].r;
+              q__1.r = x[i__2].r + q__2.r, q__1.i = x[i__2].i + q__2.i;
+              x[i__4].r = q__1.r, x[i__4].i = q__1.i;
+              ix += *incx;
+              /* L30: */
+            }
+            if (nounit) {
+              i__3 = jx;
+              i__4 = jx;
+              i__2 = kplus1 + j * a_dim1;
+              q__1.r = x[i__4].r * a[i__2].r - x[i__4].i * a[i__2].i,
+              q__1.i = x[i__4].r * a[i__2].i + x[i__4].i * a[i__2].r;
+              x[i__3].r = q__1.r, x[i__3].i = q__1.i;
+            }
+          }
+          jx += *incx;
+          if (j > *k) {
+            kx += *incx;
+          }
+          /* L40: */
+        }
+      }
     } else {
-
-/*        Form  x := A'*x  or  x := conjg( A' )*x. */
-
-	if (lsame_(uplo, "U", (ftnlen)1, (ftnlen)1)) {
-	    kplus1 = *k + 1;
-	    if (*incx == 1) {
-		for (j = *n; j >= 1; --j) {
-		    i__3 = j;
-		    temp.r = x[i__3].r, temp.i = x[i__3].i;
-		    l = kplus1 - j;
-		    if (noconj) {
-			if (nounit) {
-			    i__3 = kplus1 + j * a_dim1;
-			    q__1.r = temp.r * a[i__3].r - temp.i * a[i__3].i, 
-				    q__1.i = temp.r * a[i__3].i + temp.i * a[
-				    i__3].r;
-			    temp.r = q__1.r, temp.i = q__1.i;
-			}
-/* Computing MAX */
-			i__4 = 1, i__1 = j - *k;
-			i__3 = max(i__4,i__1);
-			for (i__ = j - 1; i__ >= i__3; --i__) {
-			    i__4 = l + i__ + j * a_dim1;
-			    i__1 = i__;
-			    q__2.r = a[i__4].r * x[i__1].r - a[i__4].i * x[
-				    i__1].i, q__2.i = a[i__4].r * x[i__1].i + 
-				    a[i__4].i * x[i__1].r;
-			    q__1.r = temp.r + q__2.r, q__1.i = temp.i + 
-				    q__2.i;
-			    temp.r = q__1.r, temp.i = q__1.i;
-/* L90: */
-			}
-		    } else {
-			if (nounit) {
-			    r_cnjg(&q__2, &a[kplus1 + j * a_dim1]);
-			    q__1.r = temp.r * q__2.r - temp.i * q__2.i, 
-				    q__1.i = temp.r * q__2.i + temp.i * 
-				    q__2.r;
-			    temp.r = q__1.r, temp.i = q__1.i;
-			}
-/* Computing MAX */
-			i__4 = 1, i__1 = j - *k;
-			i__3 = max(i__4,i__1);
-			for (i__ = j - 1; i__ >= i__3; --i__) {
-			    r_cnjg(&q__3, &a[l + i__ + j * a_dim1]);
-			    i__4 = i__;
-			    q__2.r = q__3.r * x[i__4].r - q__3.i * x[i__4].i, 
-				    q__2.i = q__3.r * x[i__4].i + q__3.i * x[
-				    i__4].r;
-			    q__1.r = temp.r + q__2.r, q__1.i = temp.i + 
-				    q__2.i;
-			    temp.r = q__1.r, temp.i = q__1.i;
-/* L100: */
-			}
-		    }
-		    i__3 = j;
-		    x[i__3].r = temp.r, x[i__3].i = temp.i;
-/* L110: */
-		}
-	    } else {
-		kx += (*n - 1) * *incx;
-		jx = kx;
-		for (j = *n; j >= 1; --j) {
-		    i__3 = jx;
-		    temp.r = x[i__3].r, temp.i = x[i__3].i;
-		    kx -= *incx;
-		    ix = kx;
-		    l = kplus1 - j;
-		    if (noconj) {
-			if (nounit) {
-			    i__3 = kplus1 + j * a_dim1;
-			    q__1.r = temp.r * a[i__3].r - temp.i * a[i__3].i, 
-				    q__1.i = temp.r * a[i__3].i + temp.i * a[
-				    i__3].r;
-			    temp.r = q__1.r, temp.i = q__1.i;
-			}
-/* Computing MAX */
-			i__4 = 1, i__1 = j - *k;
-			i__3 = max(i__4,i__1);
-			for (i__ = j - 1; i__ >= i__3; --i__) {
-			    i__4 = l + i__ + j * a_dim1;
-			    i__1 = ix;
-			    q__2.r = a[i__4].r * x[i__1].r - a[i__4].i * x[
-				    i__1].i, q__2.i = a[i__4].r * x[i__1].i + 
-				    a[i__4].i * x[i__1].r;
-			    q__1.r = temp.r + q__2.r, q__1.i = temp.i + 
-				    q__2.i;
-			    temp.r = q__1.r, temp.i = q__1.i;
-			    ix -= *incx;
-/* L120: */
-			}
-		    } else {
-			if (nounit) {
-			    r_cnjg(&q__2, &a[kplus1 + j * a_dim1]);
-			    q__1.r = temp.r * q__2.r - temp.i * q__2.i, 
-				    q__1.i = temp.r * q__2.i + temp.i * 
-				    q__2.r;
-			    temp.r = q__1.r, temp.i = q__1.i;
-			}
-/* Computing MAX */
-			i__4 = 1, i__1 = j - *k;
-			i__3 = max(i__4,i__1);
-			for (i__ = j - 1; i__ >= i__3; --i__) {
-			    r_cnjg(&q__3, &a[l + i__ + j * a_dim1]);
-			    i__4 = ix;
-			    q__2.r = q__3.r * x[i__4].r - q__3.i * x[i__4].i, 
-				    q__2.i = q__3.r * x[i__4].i + q__3.i * x[
-				    i__4].r;
-			    q__1.r = temp.r + q__2.r, q__1.i = temp.i + 
-				    q__2.i;
-			    temp.r = q__1.r, temp.i = q__1.i;
-			    ix -= *incx;
-/* L130: */
-			}
-		    }
-		    i__3 = jx;
-		    x[i__3].r = temp.r, x[i__3].i = temp.i;
-		    jx -= *incx;
-/* L140: */
-		}
-	    }
-	} else {
-	    if (*incx == 1) {
-		i__3 = *n;
-		for (j = 1; j <= i__3; ++j) {
-		    i__4 = j;
-		    temp.r = x[i__4].r, temp.i = x[i__4].i;
-		    l = 1 - j;
-		    if (noconj) {
-			if (nounit) {
-			    i__4 = j * a_dim1 + 1;
-			    q__1.r = temp.r * a[i__4].r - temp.i * a[i__4].i, 
-				    q__1.i = temp.r * a[i__4].i + temp.i * a[
-				    i__4].r;
-			    temp.r = q__1.r, temp.i = q__1.i;
-			}
-/* Computing MIN */
-			i__1 = *n, i__2 = j + *k;
-			i__4 = min(i__1,i__2);
-			for (i__ = j + 1; i__ <= i__4; ++i__) {
-			    i__1 = l + i__ + j * a_dim1;
-			    i__2 = i__;
-			    q__2.r = a[i__1].r * x[i__2].r - a[i__1].i * x[
-				    i__2].i, q__2.i = a[i__1].r * x[i__2].i + 
-				    a[i__1].i * x[i__2].r;
-			    q__1.r = temp.r + q__2.r, q__1.i = temp.i + 
-				    q__2.i;
-			    temp.r = q__1.r, temp.i = q__1.i;
-/* L150: */
-			}
-		    } else {
-			if (nounit) {
-			    r_cnjg(&q__2, &a[j * a_dim1 + 1]);
-			    q__1.r = temp.r * q__2.r - temp.i * q__2.i, 
-				    q__1.i = temp.r * q__2.i + temp.i * 
-				    q__2.r;
-			    temp.r = q__1.r, temp.i = q__1.i;
-			}
-/* Computing MIN */
-			i__1 = *n, i__2 = j + *k;
-			i__4 = min(i__1,i__2);
-			for (i__ = j + 1; i__ <= i__4; ++i__) {
-			    r_cnjg(&q__3, &a[l + i__ + j * a_dim1]);
-			    i__1 = i__;
-			    q__2.r = q__3.r * x[i__1].r - q__3.i * x[i__1].i, 
-				    q__2.i = q__3.r * x[i__1].i + q__3.i * x[
-				    i__1].r;
-			    q__1.r = temp.r + q__2.r, q__1.i = temp.i + 
-				    q__2.i;
-			    temp.r = q__1.r, temp.i = q__1.i;
-/* L160: */
-			}
-		    }
-		    i__4 = j;
-		    x[i__4].r = temp.r, x[i__4].i = temp.i;
-/* L170: */
-		}
-	    } else {
-		jx = kx;
-		i__3 = *n;
-		for (j = 1; j <= i__3; ++j) {
-		    i__4 = jx;
-		    temp.r = x[i__4].r, temp.i = x[i__4].i;
-		    kx += *incx;
-		    ix = kx;
-		    l = 1 - j;
-		    if (noconj) {
-			if (nounit) {
-			    i__4 = j * a_dim1 + 1;
-			    q__1.r = temp.r * a[i__4].r - temp.i * a[i__4].i, 
-				    q__1.i = temp.r * a[i__4].i + temp.i * a[
-				    i__4].r;
-			    temp.r = q__1.r, temp.i = q__1.i;
-			}
-/* Computing MIN */
-			i__1 = *n, i__2 = j + *k;
-			i__4 = min(i__1,i__2);
-			for (i__ = j + 1; i__ <= i__4; ++i__) {
-			    i__1 = l + i__ + j * a_dim1;
-			    i__2 = ix;
-			    q__2.r = a[i__1].r * x[i__2].r - a[i__1].i * x[
-				    i__2].i, q__2.i = a[i__1].r * x[i__2].i + 
-				    a[i__1].i * x[i__2].r;
-			    q__1.r = temp.r + q__2.r, q__1.i = temp.i + 
-				    q__2.i;
-			    temp.r = q__1.r, temp.i = q__1.i;
-			    ix += *incx;
-/* L180: */
-			}
-		    } else {
-			if (nounit) {
-			    r_cnjg(&q__2, &a[j * a_dim1 + 1]);
-			    q__1.r = temp.r * q__2.r - temp.i * q__2.i, 
-				    q__1.i = temp.r * q__2.i + temp.i * 
-				    q__2.r;
-			    temp.r = q__1.r, temp.i = q__1.i;
-			}
-/* Computing MIN */
-			i__1 = *n, i__2 = j + *k;
-			i__4 = min(i__1,i__2);
-			for (i__ = j + 1; i__ <= i__4; ++i__) {
-			    r_cnjg(&q__3, &a[l + i__ + j * a_dim1]);
-			    i__1 = ix;
-			    q__2.r = q__3.r * x[i__1].r - q__3.i * x[i__1].i, 
-				    q__2.i = q__3.r * x[i__1].i + q__3.i * x[
-				    i__1].r;
-			    q__1.r = temp.r + q__2.r, q__1.i = temp.i + 
-				    q__2.i;
-			    temp.r = q__1.r, temp.i = q__1.i;
-			    ix += *incx;
-/* L190: */
-			}
-		    }
-		    i__4 = jx;
-		    x[i__4].r = temp.r, x[i__4].i = temp.i;
-		    jx += *incx;
-/* L200: */
-		}
-	    }
-	}
+      if (*incx == 1) {
+        for (j = *n; j >= 1; --j) {
+          i__1 = j;
+          if (x[i__1].r != 0.f || x[i__1].i != 0.f) {
+            i__1 = j;
+            temp.r = x[i__1].r, temp.i = x[i__1].i;
+            l = 1 - j;
+            /* Computing MIN */
+            i__1 = *n, i__3 = j + *k;
+            i__4 = j + 1;
+            for (i__ = min(i__1, i__3); i__ >= i__4; --i__) {
+              i__1 = i__;
+              i__3 = i__;
+              i__2 = l + i__ + j * a_dim1;
+              q__2.r = temp.r * a[i__2].r - temp.i * a[i__2].i, q__2.i = temp.r * a[i__2].i + temp.i * a[i__2].r;
+              q__1.r = x[i__3].r + q__2.r, q__1.i = x[i__3].i + q__2.i;
+              x[i__1].r = q__1.r, x[i__1].i = q__1.i;
+              /* L50: */
+            }
+            if (nounit) {
+              i__4 = j;
+              i__1 = j;
+              i__3 = j * a_dim1 + 1;
+              q__1.r = x[i__1].r * a[i__3].r - x[i__1].i * a[i__3].i,
+              q__1.i = x[i__1].r * a[i__3].i + x[i__1].i * a[i__3].r;
+              x[i__4].r = q__1.r, x[i__4].i = q__1.i;
+            }
+          }
+          /* L60: */
+        }
+      } else {
+        kx += (*n - 1) * *incx;
+        jx = kx;
+        for (j = *n; j >= 1; --j) {
+          i__4 = jx;
+          if (x[i__4].r != 0.f || x[i__4].i != 0.f) {
+            i__4 = jx;
+            temp.r = x[i__4].r, temp.i = x[i__4].i;
+            ix = kx;
+            l = 1 - j;
+            /* Computing MIN */
+            i__4 = *n, i__1 = j + *k;
+            i__3 = j + 1;
+            for (i__ = min(i__4, i__1); i__ >= i__3; --i__) {
+              i__4 = ix;
+              i__1 = ix;
+              i__2 = l + i__ + j * a_dim1;
+              q__2.r = temp.r * a[i__2].r - temp.i * a[i__2].i, q__2.i = temp.r * a[i__2].i + temp.i * a[i__2].r;
+              q__1.r = x[i__1].r + q__2.r, q__1.i = x[i__1].i + q__2.i;
+              x[i__4].r = q__1.r, x[i__4].i = q__1.i;
+              ix -= *incx;
+              /* L70: */
+            }
+            if (nounit) {
+              i__3 = jx;
+              i__4 = jx;
+              i__1 = j * a_dim1 + 1;
+              q__1.r = x[i__4].r * a[i__1].r - x[i__4].i * a[i__1].i,
+              q__1.i = x[i__4].r * a[i__1].i + x[i__4].i * a[i__1].r;
+              x[i__3].r = q__1.r, x[i__3].i = q__1.i;
+            }
+          }
+          jx -= *incx;
+          if (*n - j >= *k) {
+            kx -= *incx;
+          }
+          /* L80: */
+        }
+      }
     }
+  } else {
+    /*        Form  x := A'*x  or  x := conjg( A' )*x. */
 
-    return 0;
+    if (lsame_(uplo, "U")) {
+      kplus1 = *k + 1;
+      if (*incx == 1) {
+        for (j = *n; j >= 1; --j) {
+          i__3 = j;
+          temp.r = x[i__3].r, temp.i = x[i__3].i;
+          l = kplus1 - j;
+          if (noconj) {
+            if (nounit) {
+              i__3 = kplus1 + j * a_dim1;
+              q__1.r = temp.r * a[i__3].r - temp.i * a[i__3].i, q__1.i = temp.r * a[i__3].i + temp.i * a[i__3].r;
+              temp.r = q__1.r, temp.i = q__1.i;
+            }
+            /* Computing MAX */
+            i__4 = 1, i__1 = j - *k;
+            i__3 = max(i__4, i__1);
+            for (i__ = j - 1; i__ >= i__3; --i__) {
+              i__4 = l + i__ + j * a_dim1;
+              i__1 = i__;
+              q__2.r = a[i__4].r * x[i__1].r - a[i__4].i * x[i__1].i,
+              q__2.i = a[i__4].r * x[i__1].i + a[i__4].i * x[i__1].r;
+              q__1.r = temp.r + q__2.r, q__1.i = temp.i + q__2.i;
+              temp.r = q__1.r, temp.i = q__1.i;
+              /* L90: */
+            }
+          } else {
+            if (nounit) {
+              r_cnjg(&q__2, &a[kplus1 + j * a_dim1]);
+              q__1.r = temp.r * q__2.r - temp.i * q__2.i, q__1.i = temp.r * q__2.i + temp.i * q__2.r;
+              temp.r = q__1.r, temp.i = q__1.i;
+            }
+            /* Computing MAX */
+            i__4 = 1, i__1 = j - *k;
+            i__3 = max(i__4, i__1);
+            for (i__ = j - 1; i__ >= i__3; --i__) {
+              r_cnjg(&q__3, &a[l + i__ + j * a_dim1]);
+              i__4 = i__;
+              q__2.r = q__3.r * x[i__4].r - q__3.i * x[i__4].i, q__2.i = q__3.r * x[i__4].i + q__3.i * x[i__4].r;
+              q__1.r = temp.r + q__2.r, q__1.i = temp.i + q__2.i;
+              temp.r = q__1.r, temp.i = q__1.i;
+              /* L100: */
+            }
+          }
+          i__3 = j;
+          x[i__3].r = temp.r, x[i__3].i = temp.i;
+          /* L110: */
+        }
+      } else {
+        kx += (*n - 1) * *incx;
+        jx = kx;
+        for (j = *n; j >= 1; --j) {
+          i__3 = jx;
+          temp.r = x[i__3].r, temp.i = x[i__3].i;
+          kx -= *incx;
+          ix = kx;
+          l = kplus1 - j;
+          if (noconj) {
+            if (nounit) {
+              i__3 = kplus1 + j * a_dim1;
+              q__1.r = temp.r * a[i__3].r - temp.i * a[i__3].i, q__1.i = temp.r * a[i__3].i + temp.i * a[i__3].r;
+              temp.r = q__1.r, temp.i = q__1.i;
+            }
+            /* Computing MAX */
+            i__4 = 1, i__1 = j - *k;
+            i__3 = max(i__4, i__1);
+            for (i__ = j - 1; i__ >= i__3; --i__) {
+              i__4 = l + i__ + j * a_dim1;
+              i__1 = ix;
+              q__2.r = a[i__4].r * x[i__1].r - a[i__4].i * x[i__1].i,
+              q__2.i = a[i__4].r * x[i__1].i + a[i__4].i * x[i__1].r;
+              q__1.r = temp.r + q__2.r, q__1.i = temp.i + q__2.i;
+              temp.r = q__1.r, temp.i = q__1.i;
+              ix -= *incx;
+              /* L120: */
+            }
+          } else {
+            if (nounit) {
+              r_cnjg(&q__2, &a[kplus1 + j * a_dim1]);
+              q__1.r = temp.r * q__2.r - temp.i * q__2.i, q__1.i = temp.r * q__2.i + temp.i * q__2.r;
+              temp.r = q__1.r, temp.i = q__1.i;
+            }
+            /* Computing MAX */
+            i__4 = 1, i__1 = j - *k;
+            i__3 = max(i__4, i__1);
+            for (i__ = j - 1; i__ >= i__3; --i__) {
+              r_cnjg(&q__3, &a[l + i__ + j * a_dim1]);
+              i__4 = ix;
+              q__2.r = q__3.r * x[i__4].r - q__3.i * x[i__4].i, q__2.i = q__3.r * x[i__4].i + q__3.i * x[i__4].r;
+              q__1.r = temp.r + q__2.r, q__1.i = temp.i + q__2.i;
+              temp.r = q__1.r, temp.i = q__1.i;
+              ix -= *incx;
+              /* L130: */
+            }
+          }
+          i__3 = jx;
+          x[i__3].r = temp.r, x[i__3].i = temp.i;
+          jx -= *incx;
+          /* L140: */
+        }
+      }
+    } else {
+      if (*incx == 1) {
+        i__3 = *n;
+        for (j = 1; j <= i__3; ++j) {
+          i__4 = j;
+          temp.r = x[i__4].r, temp.i = x[i__4].i;
+          l = 1 - j;
+          if (noconj) {
+            if (nounit) {
+              i__4 = j * a_dim1 + 1;
+              q__1.r = temp.r * a[i__4].r - temp.i * a[i__4].i, q__1.i = temp.r * a[i__4].i + temp.i * a[i__4].r;
+              temp.r = q__1.r, temp.i = q__1.i;
+            }
+            /* Computing MIN */
+            i__1 = *n, i__2 = j + *k;
+            i__4 = min(i__1, i__2);
+            for (i__ = j + 1; i__ <= i__4; ++i__) {
+              i__1 = l + i__ + j * a_dim1;
+              i__2 = i__;
+              q__2.r = a[i__1].r * x[i__2].r - a[i__1].i * x[i__2].i,
+              q__2.i = a[i__1].r * x[i__2].i + a[i__1].i * x[i__2].r;
+              q__1.r = temp.r + q__2.r, q__1.i = temp.i + q__2.i;
+              temp.r = q__1.r, temp.i = q__1.i;
+              /* L150: */
+            }
+          } else {
+            if (nounit) {
+              r_cnjg(&q__2, &a[j * a_dim1 + 1]);
+              q__1.r = temp.r * q__2.r - temp.i * q__2.i, q__1.i = temp.r * q__2.i + temp.i * q__2.r;
+              temp.r = q__1.r, temp.i = q__1.i;
+            }
+            /* Computing MIN */
+            i__1 = *n, i__2 = j + *k;
+            i__4 = min(i__1, i__2);
+            for (i__ = j + 1; i__ <= i__4; ++i__) {
+              r_cnjg(&q__3, &a[l + i__ + j * a_dim1]);
+              i__1 = i__;
+              q__2.r = q__3.r * x[i__1].r - q__3.i * x[i__1].i, q__2.i = q__3.r * x[i__1].i + q__3.i * x[i__1].r;
+              q__1.r = temp.r + q__2.r, q__1.i = temp.i + q__2.i;
+              temp.r = q__1.r, temp.i = q__1.i;
+              /* L160: */
+            }
+          }
+          i__4 = j;
+          x[i__4].r = temp.r, x[i__4].i = temp.i;
+          /* L170: */
+        }
+      } else {
+        jx = kx;
+        i__3 = *n;
+        for (j = 1; j <= i__3; ++j) {
+          i__4 = jx;
+          temp.r = x[i__4].r, temp.i = x[i__4].i;
+          kx += *incx;
+          ix = kx;
+          l = 1 - j;
+          if (noconj) {
+            if (nounit) {
+              i__4 = j * a_dim1 + 1;
+              q__1.r = temp.r * a[i__4].r - temp.i * a[i__4].i, q__1.i = temp.r * a[i__4].i + temp.i * a[i__4].r;
+              temp.r = q__1.r, temp.i = q__1.i;
+            }
+            /* Computing MIN */
+            i__1 = *n, i__2 = j + *k;
+            i__4 = min(i__1, i__2);
+            for (i__ = j + 1; i__ <= i__4; ++i__) {
+              i__1 = l + i__ + j * a_dim1;
+              i__2 = ix;
+              q__2.r = a[i__1].r * x[i__2].r - a[i__1].i * x[i__2].i,
+              q__2.i = a[i__1].r * x[i__2].i + a[i__1].i * x[i__2].r;
+              q__1.r = temp.r + q__2.r, q__1.i = temp.i + q__2.i;
+              temp.r = q__1.r, temp.i = q__1.i;
+              ix += *incx;
+              /* L180: */
+            }
+          } else {
+            if (nounit) {
+              r_cnjg(&q__2, &a[j * a_dim1 + 1]);
+              q__1.r = temp.r * q__2.r - temp.i * q__2.i, q__1.i = temp.r * q__2.i + temp.i * q__2.r;
+              temp.r = q__1.r, temp.i = q__1.i;
+            }
+            /* Computing MIN */
+            i__1 = *n, i__2 = j + *k;
+            i__4 = min(i__1, i__2);
+            for (i__ = j + 1; i__ <= i__4; ++i__) {
+              r_cnjg(&q__3, &a[l + i__ + j * a_dim1]);
+              i__1 = ix;
+              q__2.r = q__3.r * x[i__1].r - q__3.i * x[i__1].i, q__2.i = q__3.r * x[i__1].i + q__3.i * x[i__1].r;
+              q__1.r = temp.r + q__2.r, q__1.i = temp.i + q__2.i;
+              temp.r = q__1.r, temp.i = q__1.i;
+              ix += *incx;
+              /* L190: */
+            }
+          }
+          i__4 = jx;
+          x[i__4].r = temp.r, x[i__4].i = temp.i;
+          jx += *incx;
+          /* L200: */
+        }
+      }
+    }
+  }
 
-/*     End of CTBMV . */
+  /*     End of CTBMV . */
 
 } /* ctbmv_ */
-

diff --git a/blas/f2c/d_cnjg.c b/blas/f2c/d_cnjg.c
deleted file mode 100644
index 623090c..0000000
--- a/blas/f2c/d_cnjg.c
+++ /dev/null

@@ -1,6 +0,0 @@
-#include "datatypes.h"    
-
-void d_cnjg(doublecomplex *r, doublecomplex *z) {
-    r->r = z->r;
-    r->i = -(z->i);
-}

diff --git a/blas/f2c/datatypes.h b/blas/f2c/datatypes.h
index 7c2f4ae..45b108d 100644
--- a/blas/f2c/datatypes.h
+++ b/blas/f2c/datatypes.h

@@ -15,7 +15,6 @@
 typedef struct {
   doublereal r, i;
 } doublecomplex;
-typedef int ftnlen;
 typedef int logical;
 
 #define abs(x) ((x) >= 0 ? (x) : -(x))

diff --git a/blas/f2c/drotm.c b/blas/f2c/drotm.c
index 17a779b..da82317 100644
--- a/blas/f2c/drotm.c
+++ b/blas/f2c/drotm.c

@@ -1,215 +1,213 @@
 /* drotm.f -- translated by f2c (version 20100827).
    You must link the resulting object file with libf2c:
-	on Microsoft Windows system, link with libf2c.lib;
-	on Linux or Unix systems, link with .../path/to/libf2c.a -lm
-	or, if you install libf2c.a in a standard place, with -lf2c -lm
-	-- in that order, at the end of the command line, as in
-		cc *.o -lf2c -lm
-	Source for libf2c is in /netlib/f2c/libf2c.zip, e.g.,
+        on Microsoft Windows system, link with libf2c.lib;
+        on Linux or Unix systems, link with .../path/to/libf2c.a -lm
+        or, if you install libf2c.a in a standard place, with -lf2c -lm
+        -- in that order, at the end of the command line, as in
+                cc *.o -lf2c -lm
+        Source for libf2c is in /netlib/f2c/libf2c.zip, e.g.,
 
-		http://www.netlib.org/f2c/libf2c.zip
+                http://www.netlib.org/f2c/libf2c.zip
 */
 
 #include "datatypes.h"
 
-/* Subroutine */ int drotm_(integer *n, doublereal *dx, integer *incx, 
-	doublereal *dy, integer *incy, doublereal *dparam)
-{
-    /* Initialized data */
+/* Subroutine */ void drotm_(integer *n, doublereal *dx, integer *incx, doublereal *dy, integer *incy,
+                             doublereal *dparam) {
+  /* Initialized data */
 
-    static doublereal zero = 0.;
-    static doublereal two = 2.;
+  static doublereal zero = 0.;
+  static doublereal two = 2.;
 
-    /* System generated locals */
-    integer i__1, i__2;
+  /* System generated locals */
+  integer i__1, i__2;
 
-    /* Local variables */
-    integer i__;
-    doublereal w, z__;
-    integer kx, ky;
-    doublereal dh11, dh12, dh21, dh22, dflag;
-    integer nsteps;
+  /* Local variables */
+  integer i__;
+  doublereal w, z__;
+  integer kx, ky;
+  doublereal dh11, dh12, dh21, dh22, dflag;
+  integer nsteps;
 
-/*     .. Scalar Arguments .. */
-/*     .. */
-/*     .. Array Arguments .. */
-/*     .. */
+  /*     .. Scalar Arguments .. */
+  /*     .. */
+  /*     .. Array Arguments .. */
+  /*     .. */
 
-/*  Purpose */
-/*  ======= */
+  /*  Purpose */
+  /*  ======= */
 
-/*     APPLY THE MODIFIED GIVENS TRANSFORMATION, H, TO THE 2 BY N MATRIX */
+  /*     APPLY THE MODIFIED GIVENS TRANSFORMATION, H, TO THE 2 BY N MATRIX */
 
-/*     (DX**T) , WHERE **T INDICATES TRANSPOSE. THE ELEMENTS OF DX ARE IN */
-/*     (DY**T) */
+  /*     (DX**T) , WHERE **T INDICATES TRANSPOSE. THE ELEMENTS OF DX ARE IN */
+  /*     (DY**T) */
 
-/*     DX(LX+I*INCX), I = 0 TO N-1, WHERE LX = 1 IF INCX .GE. 0, ELSE */
-/*     LX = (-INCX)*N, AND SIMILARLY FOR SY USING LY AND INCY. */
-/*     WITH DPARAM(1)=DFLAG, H HAS ONE OF THE FOLLOWING FORMS.. */
+  /*     DX(LX+I*INCX), I = 0 TO N-1, WHERE LX = 1 IF INCX .GE. 0, ELSE */
+  /*     LX = (-INCX)*N, AND SIMILARLY FOR SY USING LY AND INCY. */
+  /*     WITH DPARAM(1)=DFLAG, H HAS ONE OF THE FOLLOWING FORMS.. */
 
-/*     DFLAG=-1.D0     DFLAG=0.D0        DFLAG=1.D0     DFLAG=-2.D0 */
+  /*     DFLAG=-1.D0     DFLAG=0.D0        DFLAG=1.D0     DFLAG=-2.D0 */
 
-/*       (DH11  DH12)    (1.D0  DH12)    (DH11  1.D0)    (1.D0  0.D0) */
-/*     H=(          )    (          )    (          )    (          ) */
-/*       (DH21  DH22),   (DH21  1.D0),   (-1.D0 DH22),   (0.D0  1.D0). */
-/*     SEE DROTMG FOR A DESCRIPTION OF DATA STORAGE IN DPARAM. */
+  /*       (DH11  DH12)    (1.D0  DH12)    (DH11  1.D0)    (1.D0  0.D0) */
+  /*     H=(          )    (          )    (          )    (          ) */
+  /*       (DH21  DH22),   (DH21  1.D0),   (-1.D0 DH22),   (0.D0  1.D0). */
+  /*     SEE DROTMG FOR A DESCRIPTION OF DATA STORAGE IN DPARAM. */
 
-/*  Arguments */
-/*  ========= */
+  /*  Arguments */
+  /*  ========= */
 
-/*  N      (input) INTEGER */
-/*         number of elements in input vector(s) */
+  /*  N      (input) INTEGER */
+  /*         number of elements in input vector(s) */
 
-/*  DX     (input/output) DOUBLE PRECISION array, dimension N */
-/*         double precision vector with N elements */
+  /*  DX     (input/output) DOUBLE PRECISION array, dimension N */
+  /*         double precision vector with N elements */
 
-/*  INCX   (input) INTEGER */
-/*         storage spacing between elements of DX */
+  /*  INCX   (input) INTEGER */
+  /*         storage spacing between elements of DX */
 
-/*  DY     (input/output) DOUBLE PRECISION array, dimension N */
-/*         double precision vector with N elements */
+  /*  DY     (input/output) DOUBLE PRECISION array, dimension N */
+  /*         double precision vector with N elements */
 
-/*  INCY   (input) INTEGER */
-/*         storage spacing between elements of DY */
+  /*  INCY   (input) INTEGER */
+  /*         storage spacing between elements of DY */
 
-/*  DPARAM (input/output)  DOUBLE PRECISION array, dimension 5 */
-/*     DPARAM(1)=DFLAG */
-/*     DPARAM(2)=DH11 */
-/*     DPARAM(3)=DH21 */
-/*     DPARAM(4)=DH12 */
-/*     DPARAM(5)=DH22 */
+  /*  DPARAM (input/output)  DOUBLE PRECISION array, dimension 5 */
+  /*     DPARAM(1)=DFLAG */
+  /*     DPARAM(2)=DH11 */
+  /*     DPARAM(3)=DH21 */
+  /*     DPARAM(4)=DH12 */
+  /*     DPARAM(5)=DH22 */
 
-/*  ===================================================================== */
+  /*  ===================================================================== */
 
-/*     .. Local Scalars .. */
-/*     .. */
-/*     .. Data statements .. */
-    /* Parameter adjustments */
-    --dparam;
-    --dy;
-    --dx;
+  /*     .. Local Scalars .. */
+  /*     .. */
+  /*     .. Data statements .. */
+  /* Parameter adjustments */
+  --dparam;
+  --dy;
+  --dx;
 
-    /* Function Body */
-/*     .. */
+  /* Function Body */
+  /*     .. */
 
-    dflag = dparam[1];
-    if (*n <= 0 || dflag + two == zero) {
-	goto L140;
-    }
-    if (! (*incx == *incy && *incx > 0)) {
-	goto L70;
-    }
+  dflag = dparam[1];
+  if (*n <= 0 || dflag + two == zero) {
+    goto L140;
+  }
+  if (!(*incx == *incy && *incx > 0)) {
+    goto L70;
+  }
 
-    nsteps = *n * *incx;
-    if (dflag < 0.) {
-	goto L50;
-    } else if (dflag == 0) {
-	goto L10;
-    } else {
-	goto L30;
-    }
+  nsteps = *n * *incx;
+  if (dflag < 0.) {
+    goto L50;
+  } else if (dflag == 0) {
+    goto L10;
+  } else {
+    goto L30;
+  }
 L10:
-    dh12 = dparam[4];
-    dh21 = dparam[3];
-    i__1 = nsteps;
-    i__2 = *incx;
-    for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) {
-	w = dx[i__];
-	z__ = dy[i__];
-	dx[i__] = w + z__ * dh12;
-	dy[i__] = w * dh21 + z__;
-/* L20: */
-    }
-    goto L140;
+  dh12 = dparam[4];
+  dh21 = dparam[3];
+  i__1 = nsteps;
+  i__2 = *incx;
+  for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) {
+    w = dx[i__];
+    z__ = dy[i__];
+    dx[i__] = w + z__ * dh12;
+    dy[i__] = w * dh21 + z__;
+    /* L20: */
+  }
+  goto L140;
 L30:
-    dh11 = dparam[2];
-    dh22 = dparam[5];
-    i__2 = nsteps;
-    i__1 = *incx;
-    for (i__ = 1; i__1 < 0 ? i__ >= i__2 : i__ <= i__2; i__ += i__1) {
-	w = dx[i__];
-	z__ = dy[i__];
-	dx[i__] = w * dh11 + z__;
-	dy[i__] = -w + dh22 * z__;
-/* L40: */
-    }
-    goto L140;
+  dh11 = dparam[2];
+  dh22 = dparam[5];
+  i__2 = nsteps;
+  i__1 = *incx;
+  for (i__ = 1; i__1 < 0 ? i__ >= i__2 : i__ <= i__2; i__ += i__1) {
+    w = dx[i__];
+    z__ = dy[i__];
+    dx[i__] = w * dh11 + z__;
+    dy[i__] = -w + dh22 * z__;
+    /* L40: */
+  }
+  goto L140;
 L50:
-    dh11 = dparam[2];
-    dh12 = dparam[4];
-    dh21 = dparam[3];
-    dh22 = dparam[5];
-    i__1 = nsteps;
-    i__2 = *incx;
-    for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) {
-	w = dx[i__];
-	z__ = dy[i__];
-	dx[i__] = w * dh11 + z__ * dh12;
-	dy[i__] = w * dh21 + z__ * dh22;
-/* L60: */
-    }
-    goto L140;
+  dh11 = dparam[2];
+  dh12 = dparam[4];
+  dh21 = dparam[3];
+  dh22 = dparam[5];
+  i__1 = nsteps;
+  i__2 = *incx;
+  for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) {
+    w = dx[i__];
+    z__ = dy[i__];
+    dx[i__] = w * dh11 + z__ * dh12;
+    dy[i__] = w * dh21 + z__ * dh22;
+    /* L60: */
+  }
+  goto L140;
 L70:
-    kx = 1;
-    ky = 1;
-    if (*incx < 0) {
-	kx = (1 - *n) * *incx + 1;
-    }
-    if (*incy < 0) {
-	ky = (1 - *n) * *incy + 1;
-    }
+  kx = 1;
+  ky = 1;
+  if (*incx < 0) {
+    kx = (1 - *n) * *incx + 1;
+  }
+  if (*incy < 0) {
+    ky = (1 - *n) * *incy + 1;
+  }
 
-    if (dflag < 0.) {
-	goto L120;
-    } else if (dflag == 0) {
-	goto L80;
-    } else {
-	goto L100;
-    }
+  if (dflag < 0.) {
+    goto L120;
+  } else if (dflag == 0) {
+    goto L80;
+  } else {
+    goto L100;
+  }
 L80:
-    dh12 = dparam[4];
-    dh21 = dparam[3];
-    i__2 = *n;
-    for (i__ = 1; i__ <= i__2; ++i__) {
-	w = dx[kx];
-	z__ = dy[ky];
-	dx[kx] = w + z__ * dh12;
-	dy[ky] = w * dh21 + z__;
-	kx += *incx;
-	ky += *incy;
-/* L90: */
-    }
-    goto L140;
+  dh12 = dparam[4];
+  dh21 = dparam[3];
+  i__2 = *n;
+  for (i__ = 1; i__ <= i__2; ++i__) {
+    w = dx[kx];
+    z__ = dy[ky];
+    dx[kx] = w + z__ * dh12;
+    dy[ky] = w * dh21 + z__;
+    kx += *incx;
+    ky += *incy;
+    /* L90: */
+  }
+  goto L140;
 L100:
-    dh11 = dparam[2];
-    dh22 = dparam[5];
-    i__2 = *n;
-    for (i__ = 1; i__ <= i__2; ++i__) {
-	w = dx[kx];
-	z__ = dy[ky];
-	dx[kx] = w * dh11 + z__;
-	dy[ky] = -w + dh22 * z__;
-	kx += *incx;
-	ky += *incy;
-/* L110: */
-    }
-    goto L140;
+  dh11 = dparam[2];
+  dh22 = dparam[5];
+  i__2 = *n;
+  for (i__ = 1; i__ <= i__2; ++i__) {
+    w = dx[kx];
+    z__ = dy[ky];
+    dx[kx] = w * dh11 + z__;
+    dy[ky] = -w + dh22 * z__;
+    kx += *incx;
+    ky += *incy;
+    /* L110: */
+  }
+  goto L140;
 L120:
-    dh11 = dparam[2];
-    dh12 = dparam[4];
-    dh21 = dparam[3];
-    dh22 = dparam[5];
-    i__2 = *n;
-    for (i__ = 1; i__ <= i__2; ++i__) {
-	w = dx[kx];
-	z__ = dy[ky];
-	dx[kx] = w * dh11 + z__ * dh12;
-	dy[ky] = w * dh21 + z__ * dh22;
-	kx += *incx;
-	ky += *incy;
-/* L130: */
-    }
+  dh11 = dparam[2];
+  dh12 = dparam[4];
+  dh21 = dparam[3];
+  dh22 = dparam[5];
+  i__2 = *n;
+  for (i__ = 1; i__ <= i__2; ++i__) {
+    w = dx[kx];
+    z__ = dy[ky];
+    dx[kx] = w * dh11 + z__ * dh12;
+    dy[ky] = w * dh21 + z__ * dh22;
+    kx += *incx;
+    ky += *incy;
+    /* L130: */
+  }
 L140:
-    return 0;
+  return;
 } /* drotm_ */
-

diff --git a/blas/f2c/drotmg.c b/blas/f2c/drotmg.c
index a63eb10..81233cb 100644
--- a/blas/f2c/drotmg.c
+++ b/blas/f2c/drotmg.c

@@ -1,293 +1,293 @@
 /* drotmg.f -- translated by f2c (version 20100827).
    You must link the resulting object file with libf2c:
-	on Microsoft Windows system, link with libf2c.lib;
-	on Linux or Unix systems, link with .../path/to/libf2c.a -lm
-	or, if you install libf2c.a in a standard place, with -lf2c -lm
-	-- in that order, at the end of the command line, as in
-		cc *.o -lf2c -lm
-	Source for libf2c is in /netlib/f2c/libf2c.zip, e.g.,
+        on Microsoft Windows system, link with libf2c.lib;
+        on Linux or Unix systems, link with .../path/to/libf2c.a -lm
+        or, if you install libf2c.a in a standard place, with -lf2c -lm
+        -- in that order, at the end of the command line, as in
+                cc *.o -lf2c -lm
+        Source for libf2c is in /netlib/f2c/libf2c.zip, e.g.,
 
-		http://www.netlib.org/f2c/libf2c.zip
+                http://www.netlib.org/f2c/libf2c.zip
 */
 
 #include "datatypes.h"
 
-/* Subroutine */ int drotmg_(doublereal *dd1, doublereal *dd2, doublereal *
-	dx1, doublereal *dy1, doublereal *dparam)
-{
-    /* Initialized data */
+/* Subroutine */ void drotmg_(doublereal *dd1, doublereal *dd2, doublereal *dx1, doublereal *dy1, doublereal *dparam) {
+  /* Initialized data */
 
-    static doublereal zero = 0.;
-    static doublereal one = 1.;
-    static doublereal two = 2.;
-    static doublereal gam = 4096.;
-    static doublereal gamsq = 16777216.;
-    static doublereal rgamsq = 5.9604645e-8;
+  static doublereal zero = 0.;
+  static doublereal one = 1.;
+  static doublereal two = 2.;
+  static doublereal gam = 4096.;
+  static doublereal gamsq = 16777216.;
+  static doublereal rgamsq = 5.9604645e-8;
 
-    /* Format strings */
-    static char fmt_120[] = "";
-    static char fmt_150[] = "";
-    static char fmt_180[] = "";
-    static char fmt_210[] = "";
+  /* Format strings */
+  static char fmt_120[] = "";
+  static char fmt_150[] = "";
+  static char fmt_180[] = "";
+  static char fmt_210[] = "";
 
-    /* System generated locals */
-    doublereal d__1;
+  /* System generated locals */
+  doublereal d__1;
 
-    /* Local variables */
-    doublereal du, dp1, dp2, dq1, dq2, dh11, dh12, dh21, dh22;
-    integer igo;
-    doublereal dflag, dtemp;
+  /* Local variables */
+  doublereal du, dp1, dp2, dq1, dq2, dh11, dh12, dh21, dh22;
+  integer igo;
+  doublereal dflag, dtemp;
 
-    /* Assigned format variables */
-    static char *igo_fmt;
+  /* Assigned format variables */
+  static char *igo_fmt;
+  (void)igo_fmt;
 
-/*     .. Scalar Arguments .. */
-/*     .. */
-/*     .. Array Arguments .. */
-/*     .. */
+  /*     .. Scalar Arguments .. */
+  /*     .. */
+  /*     .. Array Arguments .. */
+  /*     .. */
 
-/*  Purpose */
-/*  ======= */
+  /*  Purpose */
+  /*  ======= */
 
-/*     CONSTRUCT THE MODIFIED GIVENS TRANSFORMATION MATRIX H WHICH ZEROS */
-/*     THE SECOND COMPONENT OF THE 2-VECTOR  (DSQRT(DD1)*DX1,DSQRT(DD2)* */
-/*     DY2)**T. */
-/*     WITH DPARAM(1)=DFLAG, H HAS ONE OF THE FOLLOWING FORMS.. */
+  /*     CONSTRUCT THE MODIFIED GIVENS TRANSFORMATION MATRIX H WHICH ZEROS */
+  /*     THE SECOND COMPONENT OF THE 2-VECTOR  (DSQRT(DD1)*DX1,DSQRT(DD2)* */
+  /*     DY2)**T. */
+  /*     WITH DPARAM(1)=DFLAG, H HAS ONE OF THE FOLLOWING FORMS.. */
 
-/*     DFLAG=-1.D0     DFLAG=0.D0        DFLAG=1.D0     DFLAG=-2.D0 */
+  /*     DFLAG=-1.D0     DFLAG=0.D0        DFLAG=1.D0     DFLAG=-2.D0 */
 
-/*       (DH11  DH12)    (1.D0  DH12)    (DH11  1.D0)    (1.D0  0.D0) */
-/*     H=(          )    (          )    (          )    (          ) */
-/*       (DH21  DH22),   (DH21  1.D0),   (-1.D0 DH22),   (0.D0  1.D0). */
-/*     LOCATIONS 2-4 OF DPARAM CONTAIN DH11, DH21, DH12, AND DH22 */
-/*     RESPECTIVELY. (VALUES OF 1.D0, -1.D0, OR 0.D0 IMPLIED BY THE */
-/*     VALUE OF DPARAM(1) ARE NOT STORED IN DPARAM.) */
+  /*       (DH11  DH12)    (1.D0  DH12)    (DH11  1.D0)    (1.D0  0.D0) */
+  /*     H=(          )    (          )    (          )    (          ) */
+  /*       (DH21  DH22),   (DH21  1.D0),   (-1.D0 DH22),   (0.D0  1.D0). */
+  /*     LOCATIONS 2-4 OF DPARAM CONTAIN DH11, DH21, DH12, AND DH22 */
+  /*     RESPECTIVELY. (VALUES OF 1.D0, -1.D0, OR 0.D0 IMPLIED BY THE */
+  /*     VALUE OF DPARAM(1) ARE NOT STORED IN DPARAM.) */
 
-/*     THE VALUES OF GAMSQ AND RGAMSQ SET IN THE DATA STATEMENT MAY BE */
-/*     INEXACT.  THIS IS OK AS THEY ARE ONLY USED FOR TESTING THE SIZE */
-/*     OF DD1 AND DD2.  ALL ACTUAL SCALING OF DATA IS DONE USING GAM. */
+  /*     THE VALUES OF GAMSQ AND RGAMSQ SET IN THE DATA STATEMENT MAY BE */
+  /*     INEXACT.  THIS IS OK AS THEY ARE ONLY USED FOR TESTING THE SIZE */
+  /*     OF DD1 AND DD2.  ALL ACTUAL SCALING OF DATA IS DONE USING GAM. */
 
+  /*  Arguments */
+  /*  ========= */
 
-/*  Arguments */
-/*  ========= */
+  /*  DD1    (input/output) DOUBLE PRECISION */
 
-/*  DD1    (input/output) DOUBLE PRECISION */
+  /*  DD2    (input/output) DOUBLE PRECISION */
 
-/*  DD2    (input/output) DOUBLE PRECISION */
+  /*  DX1    (input/output) DOUBLE PRECISION */
 
-/*  DX1    (input/output) DOUBLE PRECISION */
+  /*  DY1    (input) DOUBLE PRECISION */
 
-/*  DY1    (input) DOUBLE PRECISION */
+  /*  DPARAM (input/output)  DOUBLE PRECISION array, dimension 5 */
+  /*     DPARAM(1)=DFLAG */
+  /*     DPARAM(2)=DH11 */
+  /*     DPARAM(3)=DH21 */
+  /*     DPARAM(4)=DH12 */
+  /*     DPARAM(5)=DH22 */
 
-/*  DPARAM (input/output)  DOUBLE PRECISION array, dimension 5 */
-/*     DPARAM(1)=DFLAG */
-/*     DPARAM(2)=DH11 */
-/*     DPARAM(3)=DH21 */
-/*     DPARAM(4)=DH12 */
-/*     DPARAM(5)=DH22 */
+  /*  ===================================================================== */
 
-/*  ===================================================================== */
+  /*     .. Local Scalars .. */
+  /*     .. */
+  /*     .. Intrinsic Functions .. */
+  /*     .. */
+  /*     .. Data statements .. */
 
-/*     .. Local Scalars .. */
-/*     .. */
-/*     .. Intrinsic Functions .. */
-/*     .. */
-/*     .. Data statements .. */
+  /* Parameter adjustments */
+  --dparam;
 
-    /* Parameter adjustments */
-    --dparam;
-
-    /* Function Body */
-/*     .. */
-    if (! (*dd1 < zero)) {
-	goto L10;
-    }
-/*       GO ZERO-H-D-AND-DX1.. */
-    goto L60;
+  /* Function Body */
+  /*     .. */
+  if (!(*dd1 < zero)) {
+    goto L10;
+  }
+  /*       GO ZERO-H-D-AND-DX1.. */
+  goto L60;
 L10:
-/*     CASE-DD1-NONNEGATIVE */
-    dp2 = *dd2 * *dy1;
-    if (! (dp2 == zero)) {
-	goto L20;
-    }
-    dflag = -two;
-    goto L260;
+  /*     CASE-DD1-NONNEGATIVE */
+  dp2 = *dd2 * *dy1;
+  if (!(dp2 == zero)) {
+    goto L20;
+  }
+  dflag = -two;
+  goto L260;
 /*     REGULAR-CASE.. */
 L20:
-    dp1 = *dd1 * *dx1;
-    dq2 = dp2 * *dy1;
-    dq1 = dp1 * *dx1;
+  dp1 = *dd1 * *dx1;
+  dq2 = dp2 * *dy1;
+  dq1 = dp1 * *dx1;
 
-    if (! (abs(dq1) > abs(dq2))) {
-	goto L40;
-    }
-    dh21 = -(*dy1) / *dx1;
-    dh12 = dp2 / dp1;
+  if (!(abs(dq1) > abs(dq2))) {
+    goto L40;
+  }
+  dh21 = -(*dy1) / *dx1;
+  dh12 = dp2 / dp1;
 
-    du = one - dh12 * dh21;
+  du = one - dh12 * dh21;
 
-    if (! (du <= zero)) {
-	goto L30;
-    }
-/*         GO ZERO-H-D-AND-DX1.. */
-    goto L60;
+  if (!(du <= zero)) {
+    goto L30;
+  }
+  /*         GO ZERO-H-D-AND-DX1.. */
+  goto L60;
 L30:
-    dflag = zero;
-    *dd1 /= du;
-    *dd2 /= du;
-    *dx1 *= du;
-/*         GO SCALE-CHECK.. */
-    goto L100;
+  dflag = zero;
+  *dd1 /= du;
+  *dd2 /= du;
+  *dx1 *= du;
+  /*         GO SCALE-CHECK.. */
+  goto L100;
 L40:
-    if (! (dq2 < zero)) {
-	goto L50;
-    }
-/*         GO ZERO-H-D-AND-DX1.. */
-    goto L60;
+  if (!(dq2 < zero)) {
+    goto L50;
+  }
+  /*         GO ZERO-H-D-AND-DX1.. */
+  goto L60;
 L50:
-    dflag = one;
-    dh11 = dp1 / dp2;
-    dh22 = *dx1 / *dy1;
-    du = one + dh11 * dh22;
-    dtemp = *dd2 / du;
-    *dd2 = *dd1 / du;
-    *dd1 = dtemp;
-    *dx1 = *dy1 * du;
-/*         GO SCALE-CHECK */
-    goto L100;
+  dflag = one;
+  dh11 = dp1 / dp2;
+  dh22 = *dx1 / *dy1;
+  du = one + dh11 * dh22;
+  dtemp = *dd2 / du;
+  *dd2 = *dd1 / du;
+  *dd1 = dtemp;
+  *dx1 = *dy1 * du;
+  /*         GO SCALE-CHECK */
+  goto L100;
 /*     PROCEDURE..ZERO-H-D-AND-DX1.. */
 L60:
-    dflag = -one;
-    dh11 = zero;
-    dh12 = zero;
-    dh21 = zero;
-    dh22 = zero;
+  dflag = -one;
+  dh11 = zero;
+  dh12 = zero;
+  dh21 = zero;
+  dh22 = zero;
 
-    *dd1 = zero;
-    *dd2 = zero;
-    *dx1 = zero;
-/*         RETURN.. */
-    goto L220;
+  *dd1 = zero;
+  *dd2 = zero;
+  *dx1 = zero;
+  /*         RETURN.. */
+  goto L220;
 /*     PROCEDURE..FIX-H.. */
 L70:
-    if (! (dflag >= zero)) {
-	goto L90;
-    }
-
-    if (! (dflag == zero)) {
-	goto L80;
-    }
-    dh11 = one;
-    dh22 = one;
-    dflag = -one;
+  if (!(dflag >= zero)) {
     goto L90;
+  }
+
+  if (!(dflag == zero)) {
+    goto L80;
+  }
+  dh11 = one;
+  dh22 = one;
+  dflag = -one;
+  goto L90;
 L80:
-    dh21 = -one;
-    dh12 = one;
-    dflag = -one;
+  dh21 = -one;
+  dh12 = one;
+  dflag = -one;
 L90:
-    switch (igo) {
-	case 0: goto L120;
-	case 1: goto L150;
-	case 2: goto L180;
-	case 3: goto L210;
-    }
+  switch (igo) {
+    case 0:
+      goto L120;
+    case 1:
+      goto L150;
+    case 2:
+      goto L180;
+    case 3:
+      goto L210;
+  }
 /*     PROCEDURE..SCALE-CHECK */
 L100:
 L110:
-    if (! (*dd1 <= rgamsq)) {
-	goto L130;
-    }
-    if (*dd1 == zero) {
-	goto L160;
-    }
-    igo = 0;
-    igo_fmt = fmt_120;
-/*              FIX-H.. */
-    goto L70;
+  if (!(*dd1 <= rgamsq)) {
+    goto L130;
+  }
+  if (*dd1 == zero) {
+    goto L160;
+  }
+  igo = 0;
+  igo_fmt = fmt_120;
+  /*              FIX-H.. */
+  goto L70;
 L120:
-/* Computing 2nd power */
-    d__1 = gam;
-    *dd1 *= d__1 * d__1;
-    *dx1 /= gam;
-    dh11 /= gam;
-    dh12 /= gam;
-    goto L110;
+  /* Computing 2nd power */
+  d__1 = gam;
+  *dd1 *= d__1 * d__1;
+  *dx1 /= gam;
+  dh11 /= gam;
+  dh12 /= gam;
+  goto L110;
 L130:
 L140:
-    if (! (*dd1 >= gamsq)) {
-	goto L160;
-    }
-    igo = 1;
-    igo_fmt = fmt_150;
-/*              FIX-H.. */
-    goto L70;
+  if (!(*dd1 >= gamsq)) {
+    goto L160;
+  }
+  igo = 1;
+  igo_fmt = fmt_150;
+  /*              FIX-H.. */
+  goto L70;
 L150:
-/* Computing 2nd power */
-    d__1 = gam;
-    *dd1 /= d__1 * d__1;
-    *dx1 *= gam;
-    dh11 *= gam;
-    dh12 *= gam;
-    goto L140;
+  /* Computing 2nd power */
+  d__1 = gam;
+  *dd1 /= d__1 * d__1;
+  *dx1 *= gam;
+  dh11 *= gam;
+  dh12 *= gam;
+  goto L140;
 L160:
 L170:
-    if (! (abs(*dd2) <= rgamsq)) {
-	goto L190;
-    }
-    if (*dd2 == zero) {
-	goto L220;
-    }
-    igo = 2;
-    igo_fmt = fmt_180;
-/*              FIX-H.. */
-    goto L70;
+  if (!(abs(*dd2) <= rgamsq)) {
+    goto L190;
+  }
+  if (*dd2 == zero) {
+    goto L220;
+  }
+  igo = 2;
+  igo_fmt = fmt_180;
+  /*              FIX-H.. */
+  goto L70;
 L180:
-/* Computing 2nd power */
-    d__1 = gam;
-    *dd2 *= d__1 * d__1;
-    dh21 /= gam;
-    dh22 /= gam;
-    goto L170;
+  /* Computing 2nd power */
+  d__1 = gam;
+  *dd2 *= d__1 * d__1;
+  dh21 /= gam;
+  dh22 /= gam;
+  goto L170;
 L190:
 L200:
-    if (! (abs(*dd2) >= gamsq)) {
-	goto L220;
-    }
-    igo = 3;
-    igo_fmt = fmt_210;
-/*              FIX-H.. */
-    goto L70;
+  if (!(abs(*dd2) >= gamsq)) {
+    goto L220;
+  }
+  igo = 3;
+  igo_fmt = fmt_210;
+  /*              FIX-H.. */
+  goto L70;
 L210:
-/* Computing 2nd power */
-    d__1 = gam;
-    *dd2 /= d__1 * d__1;
-    dh21 *= gam;
-    dh22 *= gam;
-    goto L200;
+  /* Computing 2nd power */
+  d__1 = gam;
+  *dd2 /= d__1 * d__1;
+  dh21 *= gam;
+  dh22 *= gam;
+  goto L200;
 L220:
-    if (dflag < 0.) {
-	goto L250;
-    } else if (dflag == 0) {
-	goto L230;
-    } else {
-	goto L240;
-    }
+  if (dflag < 0.) {
+    goto L250;
+  } else if (dflag == 0) {
+    goto L230;
+  } else {
+    goto L240;
+  }
 L230:
-    dparam[3] = dh21;
-    dparam[4] = dh12;
-    goto L260;
+  dparam[3] = dh21;
+  dparam[4] = dh12;
+  goto L260;
 L240:
-    dparam[2] = dh11;
-    dparam[5] = dh22;
-    goto L260;
+  dparam[2] = dh11;
+  dparam[5] = dh22;
+  goto L260;
 L250:
-    dparam[2] = dh11;
-    dparam[3] = dh21;
-    dparam[4] = dh12;
-    dparam[5] = dh22;
+  dparam[2] = dh11;
+  dparam[3] = dh21;
+  dparam[4] = dh12;
+  dparam[5] = dh22;
 L260:
-    dparam[1] = dflag;
-    return 0;
+  dparam[1] = dflag;
 } /* drotmg_ */
-

diff --git a/blas/f2c/dsbmv.c b/blas/f2c/dsbmv.c
index c6b4b21..4619369 100644
--- a/blas/f2c/dsbmv.c
+++ b/blas/f2c/dsbmv.c

@@ -1,366 +1,356 @@
 /* dsbmv.f -- translated by f2c (version 20100827).
    You must link the resulting object file with libf2c:
-	on Microsoft Windows system, link with libf2c.lib;
-	on Linux or Unix systems, link with .../path/to/libf2c.a -lm
-	or, if you install libf2c.a in a standard place, with -lf2c -lm
-	-- in that order, at the end of the command line, as in
-		cc *.o -lf2c -lm
-	Source for libf2c is in /netlib/f2c/libf2c.zip, e.g.,
+        on Microsoft Windows system, link with libf2c.lib;
+        on Linux or Unix systems, link with .../path/to/libf2c.a -lm
+        or, if you install libf2c.a in a standard place, with -lf2c -lm
+        -- in that order, at the end of the command line, as in
+                cc *.o -lf2c -lm
+        Source for libf2c is in /netlib/f2c/libf2c.zip, e.g.,
 
-		http://www.netlib.org/f2c/libf2c.zip
+                http://www.netlib.org/f2c/libf2c.zip
 */
 
 #include "datatypes.h"
 
-/* Subroutine */ int dsbmv_(char *uplo, integer *n, integer *k, doublereal *
-	alpha, doublereal *a, integer *lda, doublereal *x, integer *incx, 
-	doublereal *beta, doublereal *y, integer *incy, ftnlen uplo_len)
-{
-    /* System generated locals */
-    integer a_dim1, a_offset, i__1, i__2, i__3, i__4;
+/* Subroutine */ void dsbmv_(char *uplo, integer *n, integer *k, doublereal *alpha, doublereal *a, integer *lda,
+                             doublereal *x, integer *incx, doublereal *beta, doublereal *y, integer *incy) {
+  /* System generated locals */
+  integer a_dim1, a_offset, i__1, i__2, i__3, i__4;
 
-    /* Local variables */
-    integer i__, j, l, ix, iy, jx, jy, kx, ky, info;
-    doublereal temp1, temp2;
-    extern logical lsame_(char *, char *, ftnlen, ftnlen);
-    integer kplus1;
-    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
+  /* Local variables */
+  integer i__, j, l, ix, iy, jx, jy, kx, ky, info;
+  doublereal temp1, temp2;
+  extern logical lsame_(char *, char *);
+  integer kplus1;
+  extern /* Subroutine */ void xerbla_(const char *, integer *);
 
-/*     .. Scalar Arguments .. */
-/*     .. */
-/*     .. Array Arguments .. */
-/*     .. */
+  /*     .. Scalar Arguments .. */
+  /*     .. */
+  /*     .. Array Arguments .. */
+  /*     .. */
 
-/*  Purpose */
-/*  ======= */
+  /*  Purpose */
+  /*  ======= */
 
-/*  DSBMV  performs the matrix-vector  operation */
+  /*  DSBMV  performs the matrix-vector  operation */
 
-/*     y := alpha*A*x + beta*y, */
+  /*     y := alpha*A*x + beta*y, */
 
-/*  where alpha and beta are scalars, x and y are n element vectors and */
-/*  A is an n by n symmetric band matrix, with k super-diagonals. */
+  /*  where alpha and beta are scalars, x and y are n element vectors and */
+  /*  A is an n by n symmetric band matrix, with k super-diagonals. */
 
-/*  Arguments */
-/*  ========== */
+  /*  Arguments */
+  /*  ========== */
 
-/*  UPLO   - CHARACTER*1. */
-/*           On entry, UPLO specifies whether the upper or lower */
-/*           triangular part of the band matrix A is being supplied as */
-/*           follows: */
+  /*  UPLO   - CHARACTER*1. */
+  /*           On entry, UPLO specifies whether the upper or lower */
+  /*           triangular part of the band matrix A is being supplied as */
+  /*           follows: */
 
-/*              UPLO = 'U' or 'u'   The upper triangular part of A is */
-/*                                  being supplied. */
+  /*              UPLO = 'U' or 'u'   The upper triangular part of A is */
+  /*                                  being supplied. */
 
-/*              UPLO = 'L' or 'l'   The lower triangular part of A is */
-/*                                  being supplied. */
+  /*              UPLO = 'L' or 'l'   The lower triangular part of A is */
+  /*                                  being supplied. */
 
-/*           Unchanged on exit. */
+  /*           Unchanged on exit. */
 
-/*  N      - INTEGER. */
-/*           On entry, N specifies the order of the matrix A. */
-/*           N must be at least zero. */
-/*           Unchanged on exit. */
+  /*  N      - INTEGER. */
+  /*           On entry, N specifies the order of the matrix A. */
+  /*           N must be at least zero. */
+  /*           Unchanged on exit. */
 
-/*  K      - INTEGER. */
-/*           On entry, K specifies the number of super-diagonals of the */
-/*           matrix A. K must satisfy  0 .le. K. */
-/*           Unchanged on exit. */
+  /*  K      - INTEGER. */
+  /*           On entry, K specifies the number of super-diagonals of the */
+  /*           matrix A. K must satisfy  0 .le. K. */
+  /*           Unchanged on exit. */
 
-/*  ALPHA  - DOUBLE PRECISION. */
-/*           On entry, ALPHA specifies the scalar alpha. */
-/*           Unchanged on exit. */
+  /*  ALPHA  - DOUBLE PRECISION. */
+  /*           On entry, ALPHA specifies the scalar alpha. */
+  /*           Unchanged on exit. */
 
-/*  A      - DOUBLE PRECISION array of DIMENSION ( LDA, n ). */
-/*           Before entry with UPLO = 'U' or 'u', the leading ( k + 1 ) */
-/*           by n part of the array A must contain the upper triangular */
-/*           band part of the symmetric matrix, supplied column by */
-/*           column, with the leading diagonal of the matrix in row */
-/*           ( k + 1 ) of the array, the first super-diagonal starting at */
-/*           position 2 in row k, and so on. The top left k by k triangle */
-/*           of the array A is not referenced. */
-/*           The following program segment will transfer the upper */
-/*           triangular part of a symmetric band matrix from conventional */
-/*           full matrix storage to band storage: */
+  /*  A      - DOUBLE PRECISION array of DIMENSION ( LDA, n ). */
+  /*           Before entry with UPLO = 'U' or 'u', the leading ( k + 1 ) */
+  /*           by n part of the array A must contain the upper triangular */
+  /*           band part of the symmetric matrix, supplied column by */
+  /*           column, with the leading diagonal of the matrix in row */
+  /*           ( k + 1 ) of the array, the first super-diagonal starting at */
+  /*           position 2 in row k, and so on. The top left k by k triangle */
+  /*           of the array A is not referenced. */
+  /*           The following program segment will transfer the upper */
+  /*           triangular part of a symmetric band matrix from conventional */
+  /*           full matrix storage to band storage: */
 
-/*                 DO 20, J = 1, N */
-/*                    M = K + 1 - J */
-/*                    DO 10, I = MAX( 1, J - K ), J */
-/*                       A( M + I, J ) = matrix( I, J ) */
-/*              10    CONTINUE */
-/*              20 CONTINUE */
+  /*                 DO 20, J = 1, N */
+  /*                    M = K + 1 - J */
+  /*                    DO 10, I = MAX( 1, J - K ), J */
+  /*                       A( M + I, J ) = matrix( I, J ) */
+  /*              10    CONTINUE */
+  /*              20 CONTINUE */
 
-/*           Before entry with UPLO = 'L' or 'l', the leading ( k + 1 ) */
-/*           by n part of the array A must contain the lower triangular */
-/*           band part of the symmetric matrix, supplied column by */
-/*           column, with the leading diagonal of the matrix in row 1 of */
-/*           the array, the first sub-diagonal starting at position 1 in */
-/*           row 2, and so on. The bottom right k by k triangle of the */
-/*           array A is not referenced. */
-/*           The following program segment will transfer the lower */
-/*           triangular part of a symmetric band matrix from conventional */
-/*           full matrix storage to band storage: */
+  /*           Before entry with UPLO = 'L' or 'l', the leading ( k + 1 ) */
+  /*           by n part of the array A must contain the lower triangular */
+  /*           band part of the symmetric matrix, supplied column by */
+  /*           column, with the leading diagonal of the matrix in row 1 of */
+  /*           the array, the first sub-diagonal starting at position 1 in */
+  /*           row 2, and so on. The bottom right k by k triangle of the */
+  /*           array A is not referenced. */
+  /*           The following program segment will transfer the lower */
+  /*           triangular part of a symmetric band matrix from conventional */
+  /*           full matrix storage to band storage: */
 
-/*                 DO 20, J = 1, N */
-/*                    M = 1 - J */
-/*                    DO 10, I = J, MIN( N, J + K ) */
-/*                       A( M + I, J ) = matrix( I, J ) */
-/*              10    CONTINUE */
-/*              20 CONTINUE */
+  /*                 DO 20, J = 1, N */
+  /*                    M = 1 - J */
+  /*                    DO 10, I = J, MIN( N, J + K ) */
+  /*                       A( M + I, J ) = matrix( I, J ) */
+  /*              10    CONTINUE */
+  /*              20 CONTINUE */
 
-/*           Unchanged on exit. */
+  /*           Unchanged on exit. */
 
-/*  LDA    - INTEGER. */
-/*           On entry, LDA specifies the first dimension of A as declared */
-/*           in the calling (sub) program. LDA must be at least */
-/*           ( k + 1 ). */
-/*           Unchanged on exit. */
+  /*  LDA    - INTEGER. */
+  /*           On entry, LDA specifies the first dimension of A as declared */
+  /*           in the calling (sub) program. LDA must be at least */
+  /*           ( k + 1 ). */
+  /*           Unchanged on exit. */
 
-/*  X      - DOUBLE PRECISION array of DIMENSION at least */
-/*           ( 1 + ( n - 1 )*abs( INCX ) ). */
-/*           Before entry, the incremented array X must contain the */
-/*           vector x. */
-/*           Unchanged on exit. */
+  /*  X      - DOUBLE PRECISION array of DIMENSION at least */
+  /*           ( 1 + ( n - 1 )*abs( INCX ) ). */
+  /*           Before entry, the incremented array X must contain the */
+  /*           vector x. */
+  /*           Unchanged on exit. */
 
-/*  INCX   - INTEGER. */
-/*           On entry, INCX specifies the increment for the elements of */
-/*           X. INCX must not be zero. */
-/*           Unchanged on exit. */
+  /*  INCX   - INTEGER. */
+  /*           On entry, INCX specifies the increment for the elements of */
+  /*           X. INCX must not be zero. */
+  /*           Unchanged on exit. */
 
-/*  BETA   - DOUBLE PRECISION. */
-/*           On entry, BETA specifies the scalar beta. */
-/*           Unchanged on exit. */
+  /*  BETA   - DOUBLE PRECISION. */
+  /*           On entry, BETA specifies the scalar beta. */
+  /*           Unchanged on exit. */
 
-/*  Y      - DOUBLE PRECISION array of DIMENSION at least */
-/*           ( 1 + ( n - 1 )*abs( INCY ) ). */
-/*           Before entry, the incremented array Y must contain the */
-/*           vector y. On exit, Y is overwritten by the updated vector y. */
+  /*  Y      - DOUBLE PRECISION array of DIMENSION at least */
+  /*           ( 1 + ( n - 1 )*abs( INCY ) ). */
+  /*           Before entry, the incremented array Y must contain the */
+  /*           vector y. On exit, Y is overwritten by the updated vector y. */
 
-/*  INCY   - INTEGER. */
-/*           On entry, INCY specifies the increment for the elements of */
-/*           Y. INCY must not be zero. */
-/*           Unchanged on exit. */
+  /*  INCY   - INTEGER. */
+  /*           On entry, INCY specifies the increment for the elements of */
+  /*           Y. INCY must not be zero. */
+  /*           Unchanged on exit. */
 
+  /*  Level 2 Blas routine. */
 
-/*  Level 2 Blas routine. */
+  /*  -- Written on 22-October-1986. */
+  /*     Jack Dongarra, Argonne National Lab. */
+  /*     Jeremy Du Croz, Nag Central Office. */
+  /*     Sven Hammarling, Nag Central Office. */
+  /*     Richard Hanson, Sandia National Labs. */
 
-/*  -- Written on 22-October-1986. */
-/*     Jack Dongarra, Argonne National Lab. */
-/*     Jeremy Du Croz, Nag Central Office. */
-/*     Sven Hammarling, Nag Central Office. */
-/*     Richard Hanson, Sandia National Labs. */
+  /*  ===================================================================== */
 
-/*  ===================================================================== */
+  /*     .. Parameters .. */
+  /*     .. */
+  /*     .. Local Scalars .. */
+  /*     .. */
+  /*     .. External Functions .. */
+  /*     .. */
+  /*     .. External Subroutines .. */
+  /*     .. */
+  /*     .. Intrinsic Functions .. */
+  /*     .. */
 
-/*     .. Parameters .. */
-/*     .. */
-/*     .. Local Scalars .. */
-/*     .. */
-/*     .. External Functions .. */
-/*     .. */
-/*     .. External Subroutines .. */
-/*     .. */
-/*     .. Intrinsic Functions .. */
-/*     .. */
+  /*     Test the input parameters. */
 
-/*     Test the input parameters. */
+  /* Parameter adjustments */
+  a_dim1 = *lda;
+  a_offset = 1 + a_dim1;
+  a -= a_offset;
+  --x;
+  --y;
 
-    /* Parameter adjustments */
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1;
-    a -= a_offset;
-    --x;
-    --y;
+  /* Function Body */
+  info = 0;
+  if (!lsame_(uplo, "U") && !lsame_(uplo, "L")) {
+    info = 1;
+  } else if (*n < 0) {
+    info = 2;
+  } else if (*k < 0) {
+    info = 3;
+  } else if (*lda < *k + 1) {
+    info = 6;
+  } else if (*incx == 0) {
+    info = 8;
+  } else if (*incy == 0) {
+    info = 11;
+  }
+  if (info != 0) {
+    xerbla_("DSBMV ", &info);
+    return;
+  }
 
-    /* Function Body */
-    info = 0;
-    if (! lsame_(uplo, "U", (ftnlen)1, (ftnlen)1) && ! lsame_(uplo, "L", (
-	    ftnlen)1, (ftnlen)1)) {
-	info = 1;
-    } else if (*n < 0) {
-	info = 2;
-    } else if (*k < 0) {
-	info = 3;
-    } else if (*lda < *k + 1) {
-	info = 6;
-    } else if (*incx == 0) {
-	info = 8;
-    } else if (*incy == 0) {
-	info = 11;
-    }
-    if (info != 0) {
-	xerbla_("DSBMV ", &info, (ftnlen)6);
-	return 0;
-    }
+  /*     Quick return if possible. */
 
-/*     Quick return if possible. */
+  if (*n == 0 || (*alpha == 0. && *beta == 1.)) {
+    return;
+  }
 
-    if (*n == 0 || (*alpha == 0. && *beta == 1.)) {
-	return 0;
-    }
+  /*     Set up the start points in  X  and  Y. */
 
-/*     Set up the start points in  X  and  Y. */
+  if (*incx > 0) {
+    kx = 1;
+  } else {
+    kx = 1 - (*n - 1) * *incx;
+  }
+  if (*incy > 0) {
+    ky = 1;
+  } else {
+    ky = 1 - (*n - 1) * *incy;
+  }
 
-    if (*incx > 0) {
-	kx = 1;
+  /*     Start the operations. In this version the elements of the array A */
+  /*     are accessed sequentially with one pass through A. */
+
+  /*     First form  y := beta*y. */
+
+  if (*beta != 1.) {
+    if (*incy == 1) {
+      if (*beta == 0.) {
+        i__1 = *n;
+        for (i__ = 1; i__ <= i__1; ++i__) {
+          y[i__] = 0.;
+          /* L10: */
+        }
+      } else {
+        i__1 = *n;
+        for (i__ = 1; i__ <= i__1; ++i__) {
+          y[i__] = *beta * y[i__];
+          /* L20: */
+        }
+      }
     } else {
-	kx = 1 - (*n - 1) * *incx;
+      iy = ky;
+      if (*beta == 0.) {
+        i__1 = *n;
+        for (i__ = 1; i__ <= i__1; ++i__) {
+          y[iy] = 0.;
+          iy += *incy;
+          /* L30: */
+        }
+      } else {
+        i__1 = *n;
+        for (i__ = 1; i__ <= i__1; ++i__) {
+          y[iy] = *beta * y[iy];
+          iy += *incy;
+          /* L40: */
+        }
+      }
     }
-    if (*incy > 0) {
-	ky = 1;
+  }
+  if (*alpha == 0.) {
+    return;
+  }
+  if (lsame_(uplo, "U")) {
+    /*        Form  y  when upper triangle of A is stored. */
+
+    kplus1 = *k + 1;
+    if (*incx == 1 && *incy == 1) {
+      i__1 = *n;
+      for (j = 1; j <= i__1; ++j) {
+        temp1 = *alpha * x[j];
+        temp2 = 0.;
+        l = kplus1 - j;
+        /* Computing MAX */
+        i__2 = 1, i__3 = j - *k;
+        i__4 = j - 1;
+        for (i__ = max(i__2, i__3); i__ <= i__4; ++i__) {
+          y[i__] += temp1 * a[l + i__ + j * a_dim1];
+          temp2 += a[l + i__ + j * a_dim1] * x[i__];
+          /* L50: */
+        }
+        y[j] = y[j] + temp1 * a[kplus1 + j * a_dim1] + *alpha * temp2;
+        /* L60: */
+      }
     } else {
-	ky = 1 - (*n - 1) * *incy;
+      jx = kx;
+      jy = ky;
+      i__1 = *n;
+      for (j = 1; j <= i__1; ++j) {
+        temp1 = *alpha * x[jx];
+        temp2 = 0.;
+        ix = kx;
+        iy = ky;
+        l = kplus1 - j;
+        /* Computing MAX */
+        i__4 = 1, i__2 = j - *k;
+        i__3 = j - 1;
+        for (i__ = max(i__4, i__2); i__ <= i__3; ++i__) {
+          y[iy] += temp1 * a[l + i__ + j * a_dim1];
+          temp2 += a[l + i__ + j * a_dim1] * x[ix];
+          ix += *incx;
+          iy += *incy;
+          /* L70: */
+        }
+        y[jy] = y[jy] + temp1 * a[kplus1 + j * a_dim1] + *alpha * temp2;
+        jx += *incx;
+        jy += *incy;
+        if (j > *k) {
+          kx += *incx;
+          ky += *incy;
+        }
+        /* L80: */
+      }
     }
+  } else {
+    /*        Form  y  when lower triangle of A is stored. */
 
-/*     Start the operations. In this version the elements of the array A */
-/*     are accessed sequentially with one pass through A. */
-
-/*     First form  y := beta*y. */
-
-    if (*beta != 1.) {
-	if (*incy == 1) {
-	    if (*beta == 0.) {
-		i__1 = *n;
-		for (i__ = 1; i__ <= i__1; ++i__) {
-		    y[i__] = 0.;
-/* L10: */
-		}
-	    } else {
-		i__1 = *n;
-		for (i__ = 1; i__ <= i__1; ++i__) {
-		    y[i__] = *beta * y[i__];
-/* L20: */
-		}
-	    }
-	} else {
-	    iy = ky;
-	    if (*beta == 0.) {
-		i__1 = *n;
-		for (i__ = 1; i__ <= i__1; ++i__) {
-		    y[iy] = 0.;
-		    iy += *incy;
-/* L30: */
-		}
-	    } else {
-		i__1 = *n;
-		for (i__ = 1; i__ <= i__1; ++i__) {
-		    y[iy] = *beta * y[iy];
-		    iy += *incy;
-/* L40: */
-		}
-	    }
-	}
-    }
-    if (*alpha == 0.) {
-	return 0;
-    }
-    if (lsame_(uplo, "U", (ftnlen)1, (ftnlen)1)) {
-
-/*        Form  y  when upper triangle of A is stored. */
-
-	kplus1 = *k + 1;
-	if (*incx == 1 && *incy == 1) {
-	    i__1 = *n;
-	    for (j = 1; j <= i__1; ++j) {
-		temp1 = *alpha * x[j];
-		temp2 = 0.;
-		l = kplus1 - j;
-/* Computing MAX */
-		i__2 = 1, i__3 = j - *k;
-		i__4 = j - 1;
-		for (i__ = max(i__2,i__3); i__ <= i__4; ++i__) {
-		    y[i__] += temp1 * a[l + i__ + j * a_dim1];
-		    temp2 += a[l + i__ + j * a_dim1] * x[i__];
-/* L50: */
-		}
-		y[j] = y[j] + temp1 * a[kplus1 + j * a_dim1] + *alpha * temp2;
-/* L60: */
-	    }
-	} else {
-	    jx = kx;
-	    jy = ky;
-	    i__1 = *n;
-	    for (j = 1; j <= i__1; ++j) {
-		temp1 = *alpha * x[jx];
-		temp2 = 0.;
-		ix = kx;
-		iy = ky;
-		l = kplus1 - j;
-/* Computing MAX */
-		i__4 = 1, i__2 = j - *k;
-		i__3 = j - 1;
-		for (i__ = max(i__4,i__2); i__ <= i__3; ++i__) {
-		    y[iy] += temp1 * a[l + i__ + j * a_dim1];
-		    temp2 += a[l + i__ + j * a_dim1] * x[ix];
-		    ix += *incx;
-		    iy += *incy;
-/* L70: */
-		}
-		y[jy] = y[jy] + temp1 * a[kplus1 + j * a_dim1] + *alpha * 
-			temp2;
-		jx += *incx;
-		jy += *incy;
-		if (j > *k) {
-		    kx += *incx;
-		    ky += *incy;
-		}
-/* L80: */
-	    }
-	}
+    if (*incx == 1 && *incy == 1) {
+      i__1 = *n;
+      for (j = 1; j <= i__1; ++j) {
+        temp1 = *alpha * x[j];
+        temp2 = 0.;
+        y[j] += temp1 * a[j * a_dim1 + 1];
+        l = 1 - j;
+        /* Computing MIN */
+        i__4 = *n, i__2 = j + *k;
+        i__3 = min(i__4, i__2);
+        for (i__ = j + 1; i__ <= i__3; ++i__) {
+          y[i__] += temp1 * a[l + i__ + j * a_dim1];
+          temp2 += a[l + i__ + j * a_dim1] * x[i__];
+          /* L90: */
+        }
+        y[j] += *alpha * temp2;
+        /* L100: */
+      }
     } else {
-
-/*        Form  y  when lower triangle of A is stored. */
-
-	if (*incx == 1 && *incy == 1) {
-	    i__1 = *n;
-	    for (j = 1; j <= i__1; ++j) {
-		temp1 = *alpha * x[j];
-		temp2 = 0.;
-		y[j] += temp1 * a[j * a_dim1 + 1];
-		l = 1 - j;
-/* Computing MIN */
-		i__4 = *n, i__2 = j + *k;
-		i__3 = min(i__4,i__2);
-		for (i__ = j + 1; i__ <= i__3; ++i__) {
-		    y[i__] += temp1 * a[l + i__ + j * a_dim1];
-		    temp2 += a[l + i__ + j * a_dim1] * x[i__];
-/* L90: */
-		}
-		y[j] += *alpha * temp2;
-/* L100: */
-	    }
-	} else {
-	    jx = kx;
-	    jy = ky;
-	    i__1 = *n;
-	    for (j = 1; j <= i__1; ++j) {
-		temp1 = *alpha * x[jx];
-		temp2 = 0.;
-		y[jy] += temp1 * a[j * a_dim1 + 1];
-		l = 1 - j;
-		ix = jx;
-		iy = jy;
-/* Computing MIN */
-		i__4 = *n, i__2 = j + *k;
-		i__3 = min(i__4,i__2);
-		for (i__ = j + 1; i__ <= i__3; ++i__) {
-		    ix += *incx;
-		    iy += *incy;
-		    y[iy] += temp1 * a[l + i__ + j * a_dim1];
-		    temp2 += a[l + i__ + j * a_dim1] * x[ix];
-/* L110: */
-		}
-		y[jy] += *alpha * temp2;
-		jx += *incx;
-		jy += *incy;
-/* L120: */
-	    }
-	}
+      jx = kx;
+      jy = ky;
+      i__1 = *n;
+      for (j = 1; j <= i__1; ++j) {
+        temp1 = *alpha * x[jx];
+        temp2 = 0.;
+        y[jy] += temp1 * a[j * a_dim1 + 1];
+        l = 1 - j;
+        ix = jx;
+        iy = jy;
+        /* Computing MIN */
+        i__4 = *n, i__2 = j + *k;
+        i__3 = min(i__4, i__2);
+        for (i__ = j + 1; i__ <= i__3; ++i__) {
+          ix += *incx;
+          iy += *incy;
+          y[iy] += temp1 * a[l + i__ + j * a_dim1];
+          temp2 += a[l + i__ + j * a_dim1] * x[ix];
+          /* L110: */
+        }
+        y[jy] += *alpha * temp2;
+        jx += *incx;
+        jy += *incy;
+        /* L120: */
+      }
     }
+  }
 
-    return 0;
-
-/*     End of DSBMV . */
+  /*     End of DSBMV . */
 
 } /* dsbmv_ */
-

diff --git a/blas/f2c/dspmv.c b/blas/f2c/dspmv.c
index 0b4e92d..314d67b 100644
--- a/blas/f2c/dspmv.c
+++ b/blas/f2c/dspmv.c

@@ -1,316 +1,308 @@
 /* dspmv.f -- translated by f2c (version 20100827).
    You must link the resulting object file with libf2c:
-	on Microsoft Windows system, link with libf2c.lib;
-	on Linux or Unix systems, link with .../path/to/libf2c.a -lm
-	or, if you install libf2c.a in a standard place, with -lf2c -lm
-	-- in that order, at the end of the command line, as in
-		cc *.o -lf2c -lm
-	Source for libf2c is in /netlib/f2c/libf2c.zip, e.g.,
+        on Microsoft Windows system, link with libf2c.lib;
+        on Linux or Unix systems, link with .../path/to/libf2c.a -lm
+        or, if you install libf2c.a in a standard place, with -lf2c -lm
+        -- in that order, at the end of the command line, as in
+                cc *.o -lf2c -lm
+        Source for libf2c is in /netlib/f2c/libf2c.zip, e.g.,
 
-		http://www.netlib.org/f2c/libf2c.zip
+                http://www.netlib.org/f2c/libf2c.zip
 */
 
 #include "datatypes.h"
 
-/* Subroutine */ int dspmv_(char *uplo, integer *n, doublereal *alpha, 
-	doublereal *ap, doublereal *x, integer *incx, doublereal *beta, 
-	doublereal *y, integer *incy, ftnlen uplo_len)
-{
-    /* System generated locals */
-    integer i__1, i__2;
+/* Subroutine */ void dspmv_(char *uplo, integer *n, doublereal *alpha, doublereal *ap, doublereal *x, integer *incx,
+                             doublereal *beta, doublereal *y, integer *incy) {
+  /* System generated locals */
+  integer i__1, i__2;
 
-    /* Local variables */
-    integer i__, j, k, kk, ix, iy, jx, jy, kx, ky, info;
-    doublereal temp1, temp2;
-    extern logical lsame_(char *, char *, ftnlen, ftnlen);
-    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
+  /* Local variables */
+  integer i__, j, k, kk, ix, iy, jx, jy, kx, ky, info;
+  doublereal temp1, temp2;
+  extern logical lsame_(char *, char *);
+  extern /* Subroutine */ void xerbla_(const char *, integer *);
 
-/*     .. Scalar Arguments .. */
-/*     .. */
-/*     .. Array Arguments .. */
-/*     .. */
+  /*     .. Scalar Arguments .. */
+  /*     .. */
+  /*     .. Array Arguments .. */
+  /*     .. */
 
-/*  Purpose */
-/*  ======= */
+  /*  Purpose */
+  /*  ======= */
 
-/*  DSPMV  performs the matrix-vector operation */
+  /*  DSPMV  performs the matrix-vector operation */
 
-/*     y := alpha*A*x + beta*y, */
+  /*     y := alpha*A*x + beta*y, */
 
-/*  where alpha and beta are scalars, x and y are n element vectors and */
-/*  A is an n by n symmetric matrix, supplied in packed form. */
+  /*  where alpha and beta are scalars, x and y are n element vectors and */
+  /*  A is an n by n symmetric matrix, supplied in packed form. */
 
-/*  Arguments */
-/*  ========== */
+  /*  Arguments */
+  /*  ========== */
 
-/*  UPLO   - CHARACTER*1. */
-/*           On entry, UPLO specifies whether the upper or lower */
-/*           triangular part of the matrix A is supplied in the packed */
-/*           array AP as follows: */
+  /*  UPLO   - CHARACTER*1. */
+  /*           On entry, UPLO specifies whether the upper or lower */
+  /*           triangular part of the matrix A is supplied in the packed */
+  /*           array AP as follows: */
 
-/*              UPLO = 'U' or 'u'   The upper triangular part of A is */
-/*                                  supplied in AP. */
+  /*              UPLO = 'U' or 'u'   The upper triangular part of A is */
+  /*                                  supplied in AP. */
 
-/*              UPLO = 'L' or 'l'   The lower triangular part of A is */
-/*                                  supplied in AP. */
+  /*              UPLO = 'L' or 'l'   The lower triangular part of A is */
+  /*                                  supplied in AP. */
 
-/*           Unchanged on exit. */
+  /*           Unchanged on exit. */
 
-/*  N      - INTEGER. */
-/*           On entry, N specifies the order of the matrix A. */
-/*           N must be at least zero. */
-/*           Unchanged on exit. */
+  /*  N      - INTEGER. */
+  /*           On entry, N specifies the order of the matrix A. */
+  /*           N must be at least zero. */
+  /*           Unchanged on exit. */
 
-/*  ALPHA  - DOUBLE PRECISION. */
-/*           On entry, ALPHA specifies the scalar alpha. */
-/*           Unchanged on exit. */
+  /*  ALPHA  - DOUBLE PRECISION. */
+  /*           On entry, ALPHA specifies the scalar alpha. */
+  /*           Unchanged on exit. */
 
-/*  AP     - DOUBLE PRECISION array of DIMENSION at least */
-/*           ( ( n*( n + 1 ) )/2 ). */
-/*           Before entry with UPLO = 'U' or 'u', the array AP must */
-/*           contain the upper triangular part of the symmetric matrix */
-/*           packed sequentially, column by column, so that AP( 1 ) */
-/*           contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 1, 2 ) */
-/*           and a( 2, 2 ) respectively, and so on. */
-/*           Before entry with UPLO = 'L' or 'l', the array AP must */
-/*           contain the lower triangular part of the symmetric matrix */
-/*           packed sequentially, column by column, so that AP( 1 ) */
-/*           contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 2, 1 ) */
-/*           and a( 3, 1 ) respectively, and so on. */
-/*           Unchanged on exit. */
+  /*  AP     - DOUBLE PRECISION array of DIMENSION at least */
+  /*           ( ( n*( n + 1 ) )/2 ). */
+  /*           Before entry with UPLO = 'U' or 'u', the array AP must */
+  /*           contain the upper triangular part of the symmetric matrix */
+  /*           packed sequentially, column by column, so that AP( 1 ) */
+  /*           contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 1, 2 ) */
+  /*           and a( 2, 2 ) respectively, and so on. */
+  /*           Before entry with UPLO = 'L' or 'l', the array AP must */
+  /*           contain the lower triangular part of the symmetric matrix */
+  /*           packed sequentially, column by column, so that AP( 1 ) */
+  /*           contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 2, 1 ) */
+  /*           and a( 3, 1 ) respectively, and so on. */
+  /*           Unchanged on exit. */
 
-/*  X      - DOUBLE PRECISION array of dimension at least */
-/*           ( 1 + ( n - 1 )*abs( INCX ) ). */
-/*           Before entry, the incremented array X must contain the n */
-/*           element vector x. */
-/*           Unchanged on exit. */
+  /*  X      - DOUBLE PRECISION array of dimension at least */
+  /*           ( 1 + ( n - 1 )*abs( INCX ) ). */
+  /*           Before entry, the incremented array X must contain the n */
+  /*           element vector x. */
+  /*           Unchanged on exit. */
 
-/*  INCX   - INTEGER. */
-/*           On entry, INCX specifies the increment for the elements of */
-/*           X. INCX must not be zero. */
-/*           Unchanged on exit. */
+  /*  INCX   - INTEGER. */
+  /*           On entry, INCX specifies the increment for the elements of */
+  /*           X. INCX must not be zero. */
+  /*           Unchanged on exit. */
 
-/*  BETA   - DOUBLE PRECISION. */
-/*           On entry, BETA specifies the scalar beta. When BETA is */
-/*           supplied as zero then Y need not be set on input. */
-/*           Unchanged on exit. */
+  /*  BETA   - DOUBLE PRECISION. */
+  /*           On entry, BETA specifies the scalar beta. When BETA is */
+  /*           supplied as zero then Y need not be set on input. */
+  /*           Unchanged on exit. */
 
-/*  Y      - DOUBLE PRECISION array of dimension at least */
-/*           ( 1 + ( n - 1 )*abs( INCY ) ). */
-/*           Before entry, the incremented array Y must contain the n */
-/*           element vector y. On exit, Y is overwritten by the updated */
-/*           vector y. */
+  /*  Y      - DOUBLE PRECISION array of dimension at least */
+  /*           ( 1 + ( n - 1 )*abs( INCY ) ). */
+  /*           Before entry, the incremented array Y must contain the n */
+  /*           element vector y. On exit, Y is overwritten by the updated */
+  /*           vector y. */
 
-/*  INCY   - INTEGER. */
-/*           On entry, INCY specifies the increment for the elements of */
-/*           Y. INCY must not be zero. */
-/*           Unchanged on exit. */
+  /*  INCY   - INTEGER. */
+  /*           On entry, INCY specifies the increment for the elements of */
+  /*           Y. INCY must not be zero. */
+  /*           Unchanged on exit. */
 
-/*  Further Details */
-/*  =============== */
+  /*  Further Details */
+  /*  =============== */
 
-/*  Level 2 Blas routine. */
+  /*  Level 2 Blas routine. */
 
-/*  -- Written on 22-October-1986. */
-/*     Jack Dongarra, Argonne National Lab. */
-/*     Jeremy Du Croz, Nag Central Office. */
-/*     Sven Hammarling, Nag Central Office. */
-/*     Richard Hanson, Sandia National Labs. */
+  /*  -- Written on 22-October-1986. */
+  /*     Jack Dongarra, Argonne National Lab. */
+  /*     Jeremy Du Croz, Nag Central Office. */
+  /*     Sven Hammarling, Nag Central Office. */
+  /*     Richard Hanson, Sandia National Labs. */
 
-/*  ===================================================================== */
+  /*  ===================================================================== */
 
-/*     .. Parameters .. */
-/*     .. */
-/*     .. Local Scalars .. */
-/*     .. */
-/*     .. External Functions .. */
-/*     .. */
-/*     .. External Subroutines .. */
-/*     .. */
+  /*     .. Parameters .. */
+  /*     .. */
+  /*     .. Local Scalars .. */
+  /*     .. */
+  /*     .. External Functions .. */
+  /*     .. */
+  /*     .. External Subroutines .. */
+  /*     .. */
 
-/*     Test the input parameters. */
+  /*     Test the input parameters. */
 
-    /* Parameter adjustments */
-    --y;
-    --x;
-    --ap;
+  /* Parameter adjustments */
+  --y;
+  --x;
+  --ap;
 
-    /* Function Body */
-    info = 0;
-    if (! lsame_(uplo, "U", (ftnlen)1, (ftnlen)1) && ! lsame_(uplo, "L", (
-	    ftnlen)1, (ftnlen)1)) {
-	info = 1;
-    } else if (*n < 0) {
-	info = 2;
-    } else if (*incx == 0) {
-	info = 6;
-    } else if (*incy == 0) {
-	info = 9;
-    }
-    if (info != 0) {
-	xerbla_("DSPMV ", &info, (ftnlen)6);
-	return 0;
-    }
+  /* Function Body */
+  info = 0;
+  if (!lsame_(uplo, "U") && !lsame_(uplo, "L")) {
+    info = 1;
+  } else if (*n < 0) {
+    info = 2;
+  } else if (*incx == 0) {
+    info = 6;
+  } else if (*incy == 0) {
+    info = 9;
+  }
+  if (info != 0) {
+    xerbla_("DSPMV ", &info);
+    return;
+  }
 
-/*     Quick return if possible. */
+  /*     Quick return if possible. */
 
-    if (*n == 0 || (*alpha == 0. && *beta == 1.)) {
-	return 0;
-    }
+  if (*n == 0 || (*alpha == 0. && *beta == 1.)) {
+    return;
+  }
 
-/*     Set up the start points in  X  and  Y. */
+  /*     Set up the start points in  X  and  Y. */
 
-    if (*incx > 0) {
-	kx = 1;
+  if (*incx > 0) {
+    kx = 1;
+  } else {
+    kx = 1 - (*n - 1) * *incx;
+  }
+  if (*incy > 0) {
+    ky = 1;
+  } else {
+    ky = 1 - (*n - 1) * *incy;
+  }
+
+  /*     Start the operations. In this version the elements of the array AP */
+  /*     are accessed sequentially with one pass through AP. */
+
+  /*     First form  y := beta*y. */
+
+  if (*beta != 1.) {
+    if (*incy == 1) {
+      if (*beta == 0.) {
+        i__1 = *n;
+        for (i__ = 1; i__ <= i__1; ++i__) {
+          y[i__] = 0.;
+          /* L10: */
+        }
+      } else {
+        i__1 = *n;
+        for (i__ = 1; i__ <= i__1; ++i__) {
+          y[i__] = *beta * y[i__];
+          /* L20: */
+        }
+      }
     } else {
-	kx = 1 - (*n - 1) * *incx;
+      iy = ky;
+      if (*beta == 0.) {
+        i__1 = *n;
+        for (i__ = 1; i__ <= i__1; ++i__) {
+          y[iy] = 0.;
+          iy += *incy;
+          /* L30: */
+        }
+      } else {
+        i__1 = *n;
+        for (i__ = 1; i__ <= i__1; ++i__) {
+          y[iy] = *beta * y[iy];
+          iy += *incy;
+          /* L40: */
+        }
+      }
     }
-    if (*incy > 0) {
-	ky = 1;
+  }
+  if (*alpha == 0.) {
+    return;
+  }
+  kk = 1;
+  if (lsame_(uplo, "U")) {
+    /*        Form  y  when AP contains the upper triangle. */
+
+    if (*incx == 1 && *incy == 1) {
+      i__1 = *n;
+      for (j = 1; j <= i__1; ++j) {
+        temp1 = *alpha * x[j];
+        temp2 = 0.;
+        k = kk;
+        i__2 = j - 1;
+        for (i__ = 1; i__ <= i__2; ++i__) {
+          y[i__] += temp1 * ap[k];
+          temp2 += ap[k] * x[i__];
+          ++k;
+          /* L50: */
+        }
+        y[j] = y[j] + temp1 * ap[kk + j - 1] + *alpha * temp2;
+        kk += j;
+        /* L60: */
+      }
     } else {
-	ky = 1 - (*n - 1) * *incy;
+      jx = kx;
+      jy = ky;
+      i__1 = *n;
+      for (j = 1; j <= i__1; ++j) {
+        temp1 = *alpha * x[jx];
+        temp2 = 0.;
+        ix = kx;
+        iy = ky;
+        i__2 = kk + j - 2;
+        for (k = kk; k <= i__2; ++k) {
+          y[iy] += temp1 * ap[k];
+          temp2 += ap[k] * x[ix];
+          ix += *incx;
+          iy += *incy;
+          /* L70: */
+        }
+        y[jy] = y[jy] + temp1 * ap[kk + j - 1] + *alpha * temp2;
+        jx += *incx;
+        jy += *incy;
+        kk += j;
+        /* L80: */
+      }
     }
+  } else {
+    /*        Form  y  when AP contains the lower triangle. */
 
-/*     Start the operations. In this version the elements of the array AP */
-/*     are accessed sequentially with one pass through AP. */
-
-/*     First form  y := beta*y. */
-
-    if (*beta != 1.) {
-	if (*incy == 1) {
-	    if (*beta == 0.) {
-		i__1 = *n;
-		for (i__ = 1; i__ <= i__1; ++i__) {
-		    y[i__] = 0.;
-/* L10: */
-		}
-	    } else {
-		i__1 = *n;
-		for (i__ = 1; i__ <= i__1; ++i__) {
-		    y[i__] = *beta * y[i__];
-/* L20: */
-		}
-	    }
-	} else {
-	    iy = ky;
-	    if (*beta == 0.) {
-		i__1 = *n;
-		for (i__ = 1; i__ <= i__1; ++i__) {
-		    y[iy] = 0.;
-		    iy += *incy;
-/* L30: */
-		}
-	    } else {
-		i__1 = *n;
-		for (i__ = 1; i__ <= i__1; ++i__) {
-		    y[iy] = *beta * y[iy];
-		    iy += *incy;
-/* L40: */
-		}
-	    }
-	}
-    }
-    if (*alpha == 0.) {
-	return 0;
-    }
-    kk = 1;
-    if (lsame_(uplo, "U", (ftnlen)1, (ftnlen)1)) {
-
-/*        Form  y  when AP contains the upper triangle. */
-
-	if (*incx == 1 && *incy == 1) {
-	    i__1 = *n;
-	    for (j = 1; j <= i__1; ++j) {
-		temp1 = *alpha * x[j];
-		temp2 = 0.;
-		k = kk;
-		i__2 = j - 1;
-		for (i__ = 1; i__ <= i__2; ++i__) {
-		    y[i__] += temp1 * ap[k];
-		    temp2 += ap[k] * x[i__];
-		    ++k;
-/* L50: */
-		}
-		y[j] = y[j] + temp1 * ap[kk + j - 1] + *alpha * temp2;
-		kk += j;
-/* L60: */
-	    }
-	} else {
-	    jx = kx;
-	    jy = ky;
-	    i__1 = *n;
-	    for (j = 1; j <= i__1; ++j) {
-		temp1 = *alpha * x[jx];
-		temp2 = 0.;
-		ix = kx;
-		iy = ky;
-		i__2 = kk + j - 2;
-		for (k = kk; k <= i__2; ++k) {
-		    y[iy] += temp1 * ap[k];
-		    temp2 += ap[k] * x[ix];
-		    ix += *incx;
-		    iy += *incy;
-/* L70: */
-		}
-		y[jy] = y[jy] + temp1 * ap[kk + j - 1] + *alpha * temp2;
-		jx += *incx;
-		jy += *incy;
-		kk += j;
-/* L80: */
-	    }
-	}
+    if (*incx == 1 && *incy == 1) {
+      i__1 = *n;
+      for (j = 1; j <= i__1; ++j) {
+        temp1 = *alpha * x[j];
+        temp2 = 0.;
+        y[j] += temp1 * ap[kk];
+        k = kk + 1;
+        i__2 = *n;
+        for (i__ = j + 1; i__ <= i__2; ++i__) {
+          y[i__] += temp1 * ap[k];
+          temp2 += ap[k] * x[i__];
+          ++k;
+          /* L90: */
+        }
+        y[j] += *alpha * temp2;
+        kk += *n - j + 1;
+        /* L100: */
+      }
     } else {
-
-/*        Form  y  when AP contains the lower triangle. */
-
-	if (*incx == 1 && *incy == 1) {
-	    i__1 = *n;
-	    for (j = 1; j <= i__1; ++j) {
-		temp1 = *alpha * x[j];
-		temp2 = 0.;
-		y[j] += temp1 * ap[kk];
-		k = kk + 1;
-		i__2 = *n;
-		for (i__ = j + 1; i__ <= i__2; ++i__) {
-		    y[i__] += temp1 * ap[k];
-		    temp2 += ap[k] * x[i__];
-		    ++k;
-/* L90: */
-		}
-		y[j] += *alpha * temp2;
-		kk += *n - j + 1;
-/* L100: */
-	    }
-	} else {
-	    jx = kx;
-	    jy = ky;
-	    i__1 = *n;
-	    for (j = 1; j <= i__1; ++j) {
-		temp1 = *alpha * x[jx];
-		temp2 = 0.;
-		y[jy] += temp1 * ap[kk];
-		ix = jx;
-		iy = jy;
-		i__2 = kk + *n - j;
-		for (k = kk + 1; k <= i__2; ++k) {
-		    ix += *incx;
-		    iy += *incy;
-		    y[iy] += temp1 * ap[k];
-		    temp2 += ap[k] * x[ix];
-/* L110: */
-		}
-		y[jy] += *alpha * temp2;
-		jx += *incx;
-		jy += *incy;
-		kk += *n - j + 1;
-/* L120: */
-	    }
-	}
+      jx = kx;
+      jy = ky;
+      i__1 = *n;
+      for (j = 1; j <= i__1; ++j) {
+        temp1 = *alpha * x[jx];
+        temp2 = 0.;
+        y[jy] += temp1 * ap[kk];
+        ix = jx;
+        iy = jy;
+        i__2 = kk + *n - j;
+        for (k = kk + 1; k <= i__2; ++k) {
+          ix += *incx;
+          iy += *incy;
+          y[iy] += temp1 * ap[k];
+          temp2 += ap[k] * x[ix];
+          /* L110: */
+        }
+        y[jy] += *alpha * temp2;
+        jx += *incx;
+        jy += *incy;
+        kk += *n - j + 1;
+        /* L120: */
+      }
     }
+  }
 
-    return 0;
-
-/*     End of DSPMV . */
+  /*     End of DSPMV . */
 
 } /* dspmv_ */
-

diff --git a/blas/f2c/dtbmv.c b/blas/f2c/dtbmv.c
index aa67d19..92a0aec 100644
--- a/blas/f2c/dtbmv.c
+++ b/blas/f2c/dtbmv.c

@@ -1,428 +1,417 @@
 /* dtbmv.f -- translated by f2c (version 20100827).
    You must link the resulting object file with libf2c:
-	on Microsoft Windows system, link with libf2c.lib;
-	on Linux or Unix systems, link with .../path/to/libf2c.a -lm
-	or, if you install libf2c.a in a standard place, with -lf2c -lm
-	-- in that order, at the end of the command line, as in
-		cc *.o -lf2c -lm
-	Source for libf2c is in /netlib/f2c/libf2c.zip, e.g.,
+        on Microsoft Windows system, link with libf2c.lib;
+        on Linux or Unix systems, link with .../path/to/libf2c.a -lm
+        or, if you install libf2c.a in a standard place, with -lf2c -lm
+        -- in that order, at the end of the command line, as in
+                cc *.o -lf2c -lm
+        Source for libf2c is in /netlib/f2c/libf2c.zip, e.g.,
 
-		http://www.netlib.org/f2c/libf2c.zip
+                http://www.netlib.org/f2c/libf2c.zip
 */
 
 #include "datatypes.h"
 
-/* Subroutine */ int dtbmv_(char *uplo, char *trans, char *diag, integer *n, 
-	integer *k, doublereal *a, integer *lda, doublereal *x, integer *incx,
-	 ftnlen uplo_len, ftnlen trans_len, ftnlen diag_len)
-{
-    /* System generated locals */
-    integer a_dim1, a_offset, i__1, i__2, i__3, i__4;
+/* Subroutine */ void dtbmv_(char *uplo, char *trans, char *diag, integer *n, integer *k, doublereal *a, integer *lda,
+                             doublereal *x, integer *incx) {
+  /* System generated locals */
+  integer a_dim1, a_offset, i__1, i__2, i__3, i__4;
 
-    /* Local variables */
-    integer i__, j, l, ix, jx, kx, info;
-    doublereal temp;
-    extern logical lsame_(char *, char *, ftnlen, ftnlen);
-    integer kplus1;
-    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
-    logical nounit;
+  /* Local variables */
+  integer i__, j, l, ix, jx, kx, info;
+  doublereal temp;
+  extern logical lsame_(char *, char *);
+  integer kplus1;
+  extern /* Subroutine */ void xerbla_(const char *, integer *);
+  logical nounit;
 
-/*     .. Scalar Arguments .. */
-/*     .. */
-/*     .. Array Arguments .. */
-/*     .. */
+  /*     .. Scalar Arguments .. */
+  /*     .. */
+  /*     .. Array Arguments .. */
+  /*     .. */
 
-/*  Purpose */
-/*  ======= */
+  /*  Purpose */
+  /*  ======= */
 
-/*  DTBMV  performs one of the matrix-vector operations */
+  /*  DTBMV  performs one of the matrix-vector operations */
 
-/*     x := A*x,   or   x := A'*x, */
+  /*     x := A*x,   or   x := A'*x, */
 
-/*  where x is an n element vector and  A is an n by n unit, or non-unit, */
-/*  upper or lower triangular band matrix, with ( k + 1 ) diagonals. */
+  /*  where x is an n element vector and  A is an n by n unit, or non-unit, */
+  /*  upper or lower triangular band matrix, with ( k + 1 ) diagonals. */
 
-/*  Arguments */
-/*  ========== */
+  /*  Arguments */
+  /*  ========== */
 
-/*  UPLO   - CHARACTER*1. */
-/*           On entry, UPLO specifies whether the matrix is an upper or */
-/*           lower triangular matrix as follows: */
+  /*  UPLO   - CHARACTER*1. */
+  /*           On entry, UPLO specifies whether the matrix is an upper or */
+  /*           lower triangular matrix as follows: */
 
-/*              UPLO = 'U' or 'u'   A is an upper triangular matrix. */
+  /*              UPLO = 'U' or 'u'   A is an upper triangular matrix. */
 
-/*              UPLO = 'L' or 'l'   A is a lower triangular matrix. */
+  /*              UPLO = 'L' or 'l'   A is a lower triangular matrix. */
 
-/*           Unchanged on exit. */
+  /*           Unchanged on exit. */
 
-/*  TRANS  - CHARACTER*1. */
-/*           On entry, TRANS specifies the operation to be performed as */
-/*           follows: */
+  /*  TRANS  - CHARACTER*1. */
+  /*           On entry, TRANS specifies the operation to be performed as */
+  /*           follows: */
 
-/*              TRANS = 'N' or 'n'   x := A*x. */
+  /*              TRANS = 'N' or 'n'   x := A*x. */
 
-/*              TRANS = 'T' or 't'   x := A'*x. */
+  /*              TRANS = 'T' or 't'   x := A'*x. */
 
-/*              TRANS = 'C' or 'c'   x := A'*x. */
+  /*              TRANS = 'C' or 'c'   x := A'*x. */
 
-/*           Unchanged on exit. */
+  /*           Unchanged on exit. */
 
-/*  DIAG   - CHARACTER*1. */
-/*           On entry, DIAG specifies whether or not A is unit */
-/*           triangular as follows: */
+  /*  DIAG   - CHARACTER*1. */
+  /*           On entry, DIAG specifies whether or not A is unit */
+  /*           triangular as follows: */
 
-/*              DIAG = 'U' or 'u'   A is assumed to be unit triangular. */
+  /*              DIAG = 'U' or 'u'   A is assumed to be unit triangular. */
 
-/*              DIAG = 'N' or 'n'   A is not assumed to be unit */
-/*                                  triangular. */
+  /*              DIAG = 'N' or 'n'   A is not assumed to be unit */
+  /*                                  triangular. */
 
-/*           Unchanged on exit. */
+  /*           Unchanged on exit. */
 
-/*  N      - INTEGER. */
-/*           On entry, N specifies the order of the matrix A. */
-/*           N must be at least zero. */
-/*           Unchanged on exit. */
+  /*  N      - INTEGER. */
+  /*           On entry, N specifies the order of the matrix A. */
+  /*           N must be at least zero. */
+  /*           Unchanged on exit. */
 
-/*  K      - INTEGER. */
-/*           On entry with UPLO = 'U' or 'u', K specifies the number of */
-/*           super-diagonals of the matrix A. */
-/*           On entry with UPLO = 'L' or 'l', K specifies the number of */
-/*           sub-diagonals of the matrix A. */
-/*           K must satisfy  0 .le. K. */
-/*           Unchanged on exit. */
+  /*  K      - INTEGER. */
+  /*           On entry with UPLO = 'U' or 'u', K specifies the number of */
+  /*           super-diagonals of the matrix A. */
+  /*           On entry with UPLO = 'L' or 'l', K specifies the number of */
+  /*           sub-diagonals of the matrix A. */
+  /*           K must satisfy  0 .le. K. */
+  /*           Unchanged on exit. */
 
-/*  A      - DOUBLE PRECISION array of DIMENSION ( LDA, n ). */
-/*           Before entry with UPLO = 'U' or 'u', the leading ( k + 1 ) */
-/*           by n part of the array A must contain the upper triangular */
-/*           band part of the matrix of coefficients, supplied column by */
-/*           column, with the leading diagonal of the matrix in row */
-/*           ( k + 1 ) of the array, the first super-diagonal starting at */
-/*           position 2 in row k, and so on. The top left k by k triangle */
-/*           of the array A is not referenced. */
-/*           The following program segment will transfer an upper */
-/*           triangular band matrix from conventional full matrix storage */
-/*           to band storage: */
+  /*  A      - DOUBLE PRECISION array of DIMENSION ( LDA, n ). */
+  /*           Before entry with UPLO = 'U' or 'u', the leading ( k + 1 ) */
+  /*           by n part of the array A must contain the upper triangular */
+  /*           band part of the matrix of coefficients, supplied column by */
+  /*           column, with the leading diagonal of the matrix in row */
+  /*           ( k + 1 ) of the array, the first super-diagonal starting at */
+  /*           position 2 in row k, and so on. The top left k by k triangle */
+  /*           of the array A is not referenced. */
+  /*           The following program segment will transfer an upper */
+  /*           triangular band matrix from conventional full matrix storage */
+  /*           to band storage: */
 
-/*                 DO 20, J = 1, N */
-/*                    M = K + 1 - J */
-/*                    DO 10, I = MAX( 1, J - K ), J */
-/*                       A( M + I, J ) = matrix( I, J ) */
-/*              10    CONTINUE */
-/*              20 CONTINUE */
+  /*                 DO 20, J = 1, N */
+  /*                    M = K + 1 - J */
+  /*                    DO 10, I = MAX( 1, J - K ), J */
+  /*                       A( M + I, J ) = matrix( I, J ) */
+  /*              10    CONTINUE */
+  /*              20 CONTINUE */
 
-/*           Before entry with UPLO = 'L' or 'l', the leading ( k + 1 ) */
-/*           by n part of the array A must contain the lower triangular */
-/*           band part of the matrix of coefficients, supplied column by */
-/*           column, with the leading diagonal of the matrix in row 1 of */
-/*           the array, the first sub-diagonal starting at position 1 in */
-/*           row 2, and so on. The bottom right k by k triangle of the */
-/*           array A is not referenced. */
-/*           The following program segment will transfer a lower */
-/*           triangular band matrix from conventional full matrix storage */
-/*           to band storage: */
+  /*           Before entry with UPLO = 'L' or 'l', the leading ( k + 1 ) */
+  /*           by n part of the array A must contain the lower triangular */
+  /*           band part of the matrix of coefficients, supplied column by */
+  /*           column, with the leading diagonal of the matrix in row 1 of */
+  /*           the array, the first sub-diagonal starting at position 1 in */
+  /*           row 2, and so on. The bottom right k by k triangle of the */
+  /*           array A is not referenced. */
+  /*           The following program segment will transfer a lower */
+  /*           triangular band matrix from conventional full matrix storage */
+  /*           to band storage: */
 
-/*                 DO 20, J = 1, N */
-/*                    M = 1 - J */
-/*                    DO 10, I = J, MIN( N, J + K ) */
-/*                       A( M + I, J ) = matrix( I, J ) */
-/*              10    CONTINUE */
-/*              20 CONTINUE */
+  /*                 DO 20, J = 1, N */
+  /*                    M = 1 - J */
+  /*                    DO 10, I = J, MIN( N, J + K ) */
+  /*                       A( M + I, J ) = matrix( I, J ) */
+  /*              10    CONTINUE */
+  /*              20 CONTINUE */
 
-/*           Note that when DIAG = 'U' or 'u' the elements of the array A */
-/*           corresponding to the diagonal elements of the matrix are not */
-/*           referenced, but are assumed to be unity. */
-/*           Unchanged on exit. */
+  /*           Note that when DIAG = 'U' or 'u' the elements of the array A */
+  /*           corresponding to the diagonal elements of the matrix are not */
+  /*           referenced, but are assumed to be unity. */
+  /*           Unchanged on exit. */
 
-/*  LDA    - INTEGER. */
-/*           On entry, LDA specifies the first dimension of A as declared */
-/*           in the calling (sub) program. LDA must be at least */
-/*           ( k + 1 ). */
-/*           Unchanged on exit. */
+  /*  LDA    - INTEGER. */
+  /*           On entry, LDA specifies the first dimension of A as declared */
+  /*           in the calling (sub) program. LDA must be at least */
+  /*           ( k + 1 ). */
+  /*           Unchanged on exit. */
 
-/*  X      - DOUBLE PRECISION array of dimension at least */
-/*           ( 1 + ( n - 1 )*abs( INCX ) ). */
-/*           Before entry, the incremented array X must contain the n */
-/*           element vector x. On exit, X is overwritten with the */
-/*           transformed vector x. */
+  /*  X      - DOUBLE PRECISION array of dimension at least */
+  /*           ( 1 + ( n - 1 )*abs( INCX ) ). */
+  /*           Before entry, the incremented array X must contain the n */
+  /*           element vector x. On exit, X is overwritten with the */
+  /*           transformed vector x. */
 
-/*  INCX   - INTEGER. */
-/*           On entry, INCX specifies the increment for the elements of */
-/*           X. INCX must not be zero. */
-/*           Unchanged on exit. */
+  /*  INCX   - INTEGER. */
+  /*           On entry, INCX specifies the increment for the elements of */
+  /*           X. INCX must not be zero. */
+  /*           Unchanged on exit. */
 
-/*  Further Details */
-/*  =============== */
+  /*  Further Details */
+  /*  =============== */
 
-/*  Level 2 Blas routine. */
+  /*  Level 2 Blas routine. */
 
-/*  -- Written on 22-October-1986. */
-/*     Jack Dongarra, Argonne National Lab. */
-/*     Jeremy Du Croz, Nag Central Office. */
-/*     Sven Hammarling, Nag Central Office. */
-/*     Richard Hanson, Sandia National Labs. */
+  /*  -- Written on 22-October-1986. */
+  /*     Jack Dongarra, Argonne National Lab. */
+  /*     Jeremy Du Croz, Nag Central Office. */
+  /*     Sven Hammarling, Nag Central Office. */
+  /*     Richard Hanson, Sandia National Labs. */
 
-/*  ===================================================================== */
+  /*  ===================================================================== */
 
-/*     .. Parameters .. */
-/*     .. */
-/*     .. Local Scalars .. */
-/*     .. */
-/*     .. External Functions .. */
-/*     .. */
-/*     .. External Subroutines .. */
-/*     .. */
-/*     .. Intrinsic Functions .. */
-/*     .. */
+  /*     .. Parameters .. */
+  /*     .. */
+  /*     .. Local Scalars .. */
+  /*     .. */
+  /*     .. External Functions .. */
+  /*     .. */
+  /*     .. External Subroutines .. */
+  /*     .. */
+  /*     .. Intrinsic Functions .. */
+  /*     .. */
 
-/*     Test the input parameters. */
+  /*     Test the input parameters. */
 
-    /* Parameter adjustments */
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1;
-    a -= a_offset;
-    --x;
+  /* Parameter adjustments */
+  a_dim1 = *lda;
+  a_offset = 1 + a_dim1;
+  a -= a_offset;
+  --x;
 
-    /* Function Body */
-    info = 0;
-    if (! lsame_(uplo, "U", (ftnlen)1, (ftnlen)1) && ! lsame_(uplo, "L", (
-	    ftnlen)1, (ftnlen)1)) {
-	info = 1;
-    } else if (! lsame_(trans, "N", (ftnlen)1, (ftnlen)1) && ! lsame_(trans, 
-	    "T", (ftnlen)1, (ftnlen)1) && ! lsame_(trans, "C", (ftnlen)1, (
-	    ftnlen)1)) {
-	info = 2;
-    } else if (! lsame_(diag, "U", (ftnlen)1, (ftnlen)1) && ! lsame_(diag, 
-	    "N", (ftnlen)1, (ftnlen)1)) {
-	info = 3;
-    } else if (*n < 0) {
-	info = 4;
-    } else if (*k < 0) {
-	info = 5;
-    } else if (*lda < *k + 1) {
-	info = 7;
-    } else if (*incx == 0) {
-	info = 9;
-    }
-    if (info != 0) {
-	xerbla_("DTBMV ", &info, (ftnlen)6);
-	return 0;
-    }
+  /* Function Body */
+  info = 0;
+  if (!lsame_(uplo, "U") && !lsame_(uplo, "L")) {
+    info = 1;
+  } else if (!lsame_(trans, "N") && !lsame_(trans, "T") && !lsame_(trans, "C")) {
+    info = 2;
+  } else if (!lsame_(diag, "U") && !lsame_(diag, "N")) {
+    info = 3;
+  } else if (*n < 0) {
+    info = 4;
+  } else if (*k < 0) {
+    info = 5;
+  } else if (*lda < *k + 1) {
+    info = 7;
+  } else if (*incx == 0) {
+    info = 9;
+  }
+  if (info != 0) {
+    xerbla_("DTBMV ", &info);
+    return;
+  }
 
-/*     Quick return if possible. */
+  /*     Quick return if possible. */
 
-    if (*n == 0) {
-	return 0;
-    }
+  if (*n == 0) {
+    return;
+  }
 
-    nounit = lsame_(diag, "N", (ftnlen)1, (ftnlen)1);
+  nounit = lsame_(diag, "N");
 
-/*     Set up the start point in X if the increment is not unity. This */
-/*     will be  ( N - 1 )*INCX   too small for descending loops. */
+  /*     Set up the start point in X if the increment is not unity. This */
+  /*     will be  ( N - 1 )*INCX   too small for descending loops. */
 
-    if (*incx <= 0) {
-	kx = 1 - (*n - 1) * *incx;
-    } else if (*incx != 1) {
-	kx = 1;
-    }
+  if (*incx <= 0) {
+    kx = 1 - (*n - 1) * *incx;
+  } else if (*incx != 1) {
+    kx = 1;
+  }
 
-/*     Start the operations. In this version the elements of A are */
-/*     accessed sequentially with one pass through A. */
+  /*     Start the operations. In this version the elements of A are */
+  /*     accessed sequentially with one pass through A. */
 
-    if (lsame_(trans, "N", (ftnlen)1, (ftnlen)1)) {
+  if (lsame_(trans, "N")) {
+    /*         Form  x := A*x. */
 
-/*         Form  x := A*x. */
-
-	if (lsame_(uplo, "U", (ftnlen)1, (ftnlen)1)) {
-	    kplus1 = *k + 1;
-	    if (*incx == 1) {
-		i__1 = *n;
-		for (j = 1; j <= i__1; ++j) {
-		    if (x[j] != 0.) {
-			temp = x[j];
-			l = kplus1 - j;
-/* Computing MAX */
-			i__2 = 1, i__3 = j - *k;
-			i__4 = j - 1;
-			for (i__ = max(i__2,i__3); i__ <= i__4; ++i__) {
-			    x[i__] += temp * a[l + i__ + j * a_dim1];
-/* L10: */
-			}
-			if (nounit) {
-			    x[j] *= a[kplus1 + j * a_dim1];
-			}
-		    }
-/* L20: */
-		}
-	    } else {
-		jx = kx;
-		i__1 = *n;
-		for (j = 1; j <= i__1; ++j) {
-		    if (x[jx] != 0.) {
-			temp = x[jx];
-			ix = kx;
-			l = kplus1 - j;
-/* Computing MAX */
-			i__4 = 1, i__2 = j - *k;
-			i__3 = j - 1;
-			for (i__ = max(i__4,i__2); i__ <= i__3; ++i__) {
-			    x[ix] += temp * a[l + i__ + j * a_dim1];
-			    ix += *incx;
-/* L30: */
-			}
-			if (nounit) {
-			    x[jx] *= a[kplus1 + j * a_dim1];
-			}
-		    }
-		    jx += *incx;
-		    if (j > *k) {
-			kx += *incx;
-		    }
-/* L40: */
-		}
-	    }
-	} else {
-	    if (*incx == 1) {
-		for (j = *n; j >= 1; --j) {
-		    if (x[j] != 0.) {
-			temp = x[j];
-			l = 1 - j;
-/* Computing MIN */
-			i__1 = *n, i__3 = j + *k;
-			i__4 = j + 1;
-			for (i__ = min(i__1,i__3); i__ >= i__4; --i__) {
-			    x[i__] += temp * a[l + i__ + j * a_dim1];
-/* L50: */
-			}
-			if (nounit) {
-			    x[j] *= a[j * a_dim1 + 1];
-			}
-		    }
-/* L60: */
-		}
-	    } else {
-		kx += (*n - 1) * *incx;
-		jx = kx;
-		for (j = *n; j >= 1; --j) {
-		    if (x[jx] != 0.) {
-			temp = x[jx];
-			ix = kx;
-			l = 1 - j;
-/* Computing MIN */
-			i__4 = *n, i__1 = j + *k;
-			i__3 = j + 1;
-			for (i__ = min(i__4,i__1); i__ >= i__3; --i__) {
-			    x[ix] += temp * a[l + i__ + j * a_dim1];
-			    ix -= *incx;
-/* L70: */
-			}
-			if (nounit) {
-			    x[jx] *= a[j * a_dim1 + 1];
-			}
-		    }
-		    jx -= *incx;
-		    if (*n - j >= *k) {
-			kx -= *incx;
-		    }
-/* L80: */
-		}
-	    }
-	}
+    if (lsame_(uplo, "U")) {
+      kplus1 = *k + 1;
+      if (*incx == 1) {
+        i__1 = *n;
+        for (j = 1; j <= i__1; ++j) {
+          if (x[j] != 0.) {
+            temp = x[j];
+            l = kplus1 - j;
+            /* Computing MAX */
+            i__2 = 1, i__3 = j - *k;
+            i__4 = j - 1;
+            for (i__ = max(i__2, i__3); i__ <= i__4; ++i__) {
+              x[i__] += temp * a[l + i__ + j * a_dim1];
+              /* L10: */
+            }
+            if (nounit) {
+              x[j] *= a[kplus1 + j * a_dim1];
+            }
+          }
+          /* L20: */
+        }
+      } else {
+        jx = kx;
+        i__1 = *n;
+        for (j = 1; j <= i__1; ++j) {
+          if (x[jx] != 0.) {
+            temp = x[jx];
+            ix = kx;
+            l = kplus1 - j;
+            /* Computing MAX */
+            i__4 = 1, i__2 = j - *k;
+            i__3 = j - 1;
+            for (i__ = max(i__4, i__2); i__ <= i__3; ++i__) {
+              x[ix] += temp * a[l + i__ + j * a_dim1];
+              ix += *incx;
+              /* L30: */
+            }
+            if (nounit) {
+              x[jx] *= a[kplus1 + j * a_dim1];
+            }
+          }
+          jx += *incx;
+          if (j > *k) {
+            kx += *incx;
+          }
+          /* L40: */
+        }
+      }
     } else {
-
-/*        Form  x := A'*x. */
-
-	if (lsame_(uplo, "U", (ftnlen)1, (ftnlen)1)) {
-	    kplus1 = *k + 1;
-	    if (*incx == 1) {
-		for (j = *n; j >= 1; --j) {
-		    temp = x[j];
-		    l = kplus1 - j;
-		    if (nounit) {
-			temp *= a[kplus1 + j * a_dim1];
-		    }
-/* Computing MAX */
-		    i__4 = 1, i__1 = j - *k;
-		    i__3 = max(i__4,i__1);
-		    for (i__ = j - 1; i__ >= i__3; --i__) {
-			temp += a[l + i__ + j * a_dim1] * x[i__];
-/* L90: */
-		    }
-		    x[j] = temp;
-/* L100: */
-		}
-	    } else {
-		kx += (*n - 1) * *incx;
-		jx = kx;
-		for (j = *n; j >= 1; --j) {
-		    temp = x[jx];
-		    kx -= *incx;
-		    ix = kx;
-		    l = kplus1 - j;
-		    if (nounit) {
-			temp *= a[kplus1 + j * a_dim1];
-		    }
-/* Computing MAX */
-		    i__4 = 1, i__1 = j - *k;
-		    i__3 = max(i__4,i__1);
-		    for (i__ = j - 1; i__ >= i__3; --i__) {
-			temp += a[l + i__ + j * a_dim1] * x[ix];
-			ix -= *incx;
-/* L110: */
-		    }
-		    x[jx] = temp;
-		    jx -= *incx;
-/* L120: */
-		}
-	    }
-	} else {
-	    if (*incx == 1) {
-		i__3 = *n;
-		for (j = 1; j <= i__3; ++j) {
-		    temp = x[j];
-		    l = 1 - j;
-		    if (nounit) {
-			temp *= a[j * a_dim1 + 1];
-		    }
-/* Computing MIN */
-		    i__1 = *n, i__2 = j + *k;
-		    i__4 = min(i__1,i__2);
-		    for (i__ = j + 1; i__ <= i__4; ++i__) {
-			temp += a[l + i__ + j * a_dim1] * x[i__];
-/* L130: */
-		    }
-		    x[j] = temp;
-/* L140: */
-		}
-	    } else {
-		jx = kx;
-		i__3 = *n;
-		for (j = 1; j <= i__3; ++j) {
-		    temp = x[jx];
-		    kx += *incx;
-		    ix = kx;
-		    l = 1 - j;
-		    if (nounit) {
-			temp *= a[j * a_dim1 + 1];
-		    }
-/* Computing MIN */
-		    i__1 = *n, i__2 = j + *k;
-		    i__4 = min(i__1,i__2);
-		    for (i__ = j + 1; i__ <= i__4; ++i__) {
-			temp += a[l + i__ + j * a_dim1] * x[ix];
-			ix += *incx;
-/* L150: */
-		    }
-		    x[jx] = temp;
-		    jx += *incx;
-/* L160: */
-		}
-	    }
-	}
+      if (*incx == 1) {
+        for (j = *n; j >= 1; --j) {
+          if (x[j] != 0.) {
+            temp = x[j];
+            l = 1 - j;
+            /* Computing MIN */
+            i__1 = *n, i__3 = j + *k;
+            i__4 = j + 1;
+            for (i__ = min(i__1, i__3); i__ >= i__4; --i__) {
+              x[i__] += temp * a[l + i__ + j * a_dim1];
+              /* L50: */
+            }
+            if (nounit) {
+              x[j] *= a[j * a_dim1 + 1];
+            }
+          }
+          /* L60: */
+        }
+      } else {
+        kx += (*n - 1) * *incx;
+        jx = kx;
+        for (j = *n; j >= 1; --j) {
+          if (x[jx] != 0.) {
+            temp = x[jx];
+            ix = kx;
+            l = 1 - j;
+            /* Computing MIN */
+            i__4 = *n, i__1 = j + *k;
+            i__3 = j + 1;
+            for (i__ = min(i__4, i__1); i__ >= i__3; --i__) {
+              x[ix] += temp * a[l + i__ + j * a_dim1];
+              ix -= *incx;
+              /* L70: */
+            }
+            if (nounit) {
+              x[jx] *= a[j * a_dim1 + 1];
+            }
+          }
+          jx -= *incx;
+          if (*n - j >= *k) {
+            kx -= *incx;
+          }
+          /* L80: */
+        }
+      }
     }
+  } else {
+    /*        Form  x := A'*x. */
 
-    return 0;
+    if (lsame_(uplo, "U")) {
+      kplus1 = *k + 1;
+      if (*incx == 1) {
+        for (j = *n; j >= 1; --j) {
+          temp = x[j];
+          l = kplus1 - j;
+          if (nounit) {
+            temp *= a[kplus1 + j * a_dim1];
+          }
+          /* Computing MAX */
+          i__4 = 1, i__1 = j - *k;
+          i__3 = max(i__4, i__1);
+          for (i__ = j - 1; i__ >= i__3; --i__) {
+            temp += a[l + i__ + j * a_dim1] * x[i__];
+            /* L90: */
+          }
+          x[j] = temp;
+          /* L100: */
+        }
+      } else {
+        kx += (*n - 1) * *incx;
+        jx = kx;
+        for (j = *n; j >= 1; --j) {
+          temp = x[jx];
+          kx -= *incx;
+          ix = kx;
+          l = kplus1 - j;
+          if (nounit) {
+            temp *= a[kplus1 + j * a_dim1];
+          }
+          /* Computing MAX */
+          i__4 = 1, i__1 = j - *k;
+          i__3 = max(i__4, i__1);
+          for (i__ = j - 1; i__ >= i__3; --i__) {
+            temp += a[l + i__ + j * a_dim1] * x[ix];
+            ix -= *incx;
+            /* L110: */
+          }
+          x[jx] = temp;
+          jx -= *incx;
+          /* L120: */
+        }
+      }
+    } else {
+      if (*incx == 1) {
+        i__3 = *n;
+        for (j = 1; j <= i__3; ++j) {
+          temp = x[j];
+          l = 1 - j;
+          if (nounit) {
+            temp *= a[j * a_dim1 + 1];
+          }
+          /* Computing MIN */
+          i__1 = *n, i__2 = j + *k;
+          i__4 = min(i__1, i__2);
+          for (i__ = j + 1; i__ <= i__4; ++i__) {
+            temp += a[l + i__ + j * a_dim1] * x[i__];
+            /* L130: */
+          }
+          x[j] = temp;
+          /* L140: */
+        }
+      } else {
+        jx = kx;
+        i__3 = *n;
+        for (j = 1; j <= i__3; ++j) {
+          temp = x[jx];
+          kx += *incx;
+          ix = kx;
+          l = 1 - j;
+          if (nounit) {
+            temp *= a[j * a_dim1 + 1];
+          }
+          /* Computing MIN */
+          i__1 = *n, i__2 = j + *k;
+          i__4 = min(i__1, i__2);
+          for (i__ = j + 1; i__ <= i__4; ++i__) {
+            temp += a[l + i__ + j * a_dim1] * x[ix];
+            ix += *incx;
+            /* L150: */
+          }
+          x[jx] = temp;
+          jx += *incx;
+          /* L160: */
+        }
+      }
+    }
+  }
 
-/*     End of DTBMV . */
+  /*     End of DTBMV . */
 
 } /* dtbmv_ */
-

diff --git a/blas/f2c/lsame.c b/blas/f2c/lsame.c
index 46324d9..ad51ea1 100644
--- a/blas/f2c/lsame.c
+++ b/blas/f2c/lsame.c

@@ -1,117 +1,109 @@
 /* lsame.f -- translated by f2c (version 20100827).
    You must link the resulting object file with libf2c:
-	on Microsoft Windows system, link with libf2c.lib;
-	on Linux or Unix systems, link with .../path/to/libf2c.a -lm
-	or, if you install libf2c.a in a standard place, with -lf2c -lm
-	-- in that order, at the end of the command line, as in
-		cc *.o -lf2c -lm
-	Source for libf2c is in /netlib/f2c/libf2c.zip, e.g.,
+        on Microsoft Windows system, link with libf2c.lib;
+        on Linux or Unix systems, link with .../path/to/libf2c.a -lm
+        or, if you install libf2c.a in a standard place, with -lf2c -lm
+        -- in that order, at the end of the command line, as in
+                cc *.o -lf2c -lm
+        Source for libf2c is in /netlib/f2c/libf2c.zip, e.g.,
 
-		http://www.netlib.org/f2c/libf2c.zip
+                http://www.netlib.org/f2c/libf2c.zip
 */
 
 #include "datatypes.h"
 
-logical lsame_(char *ca, char *cb, ftnlen ca_len, ftnlen cb_len)
-{
-    /* System generated locals */
-    logical ret_val;
+logical lsame_(char *ca, char *cb) {
+  /* System generated locals */
+  logical ret_val;
 
-    /* Local variables */
-    integer inta, intb, zcode;
+  /* Local variables */
+  integer inta, intb, zcode;
 
+  /*  -- LAPACK auxiliary routine (version 3.1) -- */
+  /*     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
+  /*     November 2006 */
 
-/*  -- LAPACK auxiliary routine (version 3.1) -- */
-/*     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
-/*     November 2006 */
+  /*     .. Scalar Arguments .. */
+  /*     .. */
 
-/*     .. Scalar Arguments .. */
-/*     .. */
+  /*  Purpose */
+  /*  ======= */
 
-/*  Purpose */
-/*  ======= */
+  /*  LSAME returns .TRUE. if CA is the same letter as CB regardless of */
+  /*  case. */
 
-/*  LSAME returns .TRUE. if CA is the same letter as CB regardless of */
-/*  case. */
+  /*  Arguments */
+  /*  ========= */
 
-/*  Arguments */
-/*  ========= */
+  /*  CA      (input) CHARACTER*1 */
 
-/*  CA      (input) CHARACTER*1 */
+  /*  CB      (input) CHARACTER*1 */
+  /*          CA and CB specify the single characters to be compared. */
 
-/*  CB      (input) CHARACTER*1 */
-/*          CA and CB specify the single characters to be compared. */
+  /* ===================================================================== */
 
-/* ===================================================================== */
+  /*     .. Intrinsic Functions .. */
+  /*     .. */
+  /*     .. Local Scalars .. */
+  /*     .. */
 
-/*     .. Intrinsic Functions .. */
-/*     .. */
-/*     .. Local Scalars .. */
-/*     .. */
+  /*     Test if the characters are equal */
 
-/*     Test if the characters are equal */
-
-    ret_val = *(unsigned char *)ca == *(unsigned char *)cb;
-    if (ret_val) {
-	return ret_val;
-    }
-
-/*     Now test for equivalence if both characters are alphabetic. */
-
-    zcode = 'Z';
-
-/*     Use 'Z' rather than 'A' so that ASCII can be detected on Prime */
-/*     machines, on which ICHAR returns a value with bit 8 set. */
-/*     ICHAR('A') on Prime machines returns 193 which is the same as */
-/*     ICHAR('A') on an EBCDIC machine. */
-
-    inta = *(unsigned char *)ca;
-    intb = *(unsigned char *)cb;
-
-    if (zcode == 90 || zcode == 122) {
-
-/*        ASCII is assumed - ZCODE is the ASCII code of either lower or */
-/*        upper case 'Z'. */
-
-	if (inta >= 97 && inta <= 122) {
-	    inta += -32;
-	}
-	if (intb >= 97 && intb <= 122) {
-	    intb += -32;
-	}
-
-    } else if (zcode == 233 || zcode == 169) {
-
-/*        EBCDIC is assumed - ZCODE is the EBCDIC code of either lower or */
-/*        upper case 'Z'. */
-
-	if ((inta >= 129 && inta <= 137) || (inta >= 145 && inta <= 153) || 
-            (inta >= 162 && inta <= 169)) {
-	    inta += 64;
-	}
-	if ((intb >= 129 && intb <= 137) || (intb >= 145 && intb <= 153) || 
-            (intb >= 162 && intb <= 169)) {
-	    intb += 64;
-	}
-
-    } else if (zcode == 218 || zcode == 250) {
-
-/*        ASCII is assumed, on Prime machines - ZCODE is the ASCII code */
-/*        plus 128 of either lower or upper case 'Z'. */
-
-	if (inta >= 225 && inta <= 250) {
-	    inta += -32;
-	}
-	if (intb >= 225 && intb <= 250) {
-	    intb += -32;
-	}
-    }
-    ret_val = inta == intb;
-
-/*     RETURN */
-
-/*     End of LSAME */
-
+  ret_val = *(unsigned char *)ca == *(unsigned char *)cb;
+  if (ret_val) {
     return ret_val;
-} /* lsame_ */
+  }
 
+  /*     Now test for equivalence if both characters are alphabetic. */
+
+  zcode = 'Z';
+
+  /*     Use 'Z' rather than 'A' so that ASCII can be detected on Prime */
+  /*     machines, on which ICHAR returns a value with bit 8 set. */
+  /*     ICHAR('A') on Prime machines returns 193 which is the same as */
+  /*     ICHAR('A') on an EBCDIC machine. */
+
+  inta = *(unsigned char *)ca;
+  intb = *(unsigned char *)cb;
+
+  if (zcode == 90 || zcode == 122) {
+    /*        ASCII is assumed - ZCODE is the ASCII code of either lower or */
+    /*        upper case 'Z'. */
+
+    if (inta >= 97 && inta <= 122) {
+      inta += -32;
+    }
+    if (intb >= 97 && intb <= 122) {
+      intb += -32;
+    }
+
+  } else if (zcode == 233 || zcode == 169) {
+    /*        EBCDIC is assumed - ZCODE is the EBCDIC code of either lower or */
+    /*        upper case 'Z'. */
+
+    if ((inta >= 129 && inta <= 137) || (inta >= 145 && inta <= 153) || (inta >= 162 && inta <= 169)) {
+      inta += 64;
+    }
+    if ((intb >= 129 && intb <= 137) || (intb >= 145 && intb <= 153) || (intb >= 162 && intb <= 169)) {
+      intb += 64;
+    }
+
+  } else if (zcode == 218 || zcode == 250) {
+    /*        ASCII is assumed, on Prime machines - ZCODE is the ASCII code */
+    /*        plus 128 of either lower or upper case 'Z'. */
+
+    if (inta >= 225 && inta <= 250) {
+      inta += -32;
+    }
+    if (intb >= 225 && intb <= 250) {
+      intb += -32;
+    }
+  }
+  ret_val = inta == intb;
+
+  /*     RETURN */
+
+  /*     End of LSAME */
+
+  return ret_val;
+} /* lsame_ */

diff --git a/blas/f2c/r_cnjg.c b/blas/f2c/r_cnjg.c
deleted file mode 100644
index c08182f..0000000
--- a/blas/f2c/r_cnjg.c
+++ /dev/null

@@ -1,6 +0,0 @@
-#include "datatypes.h"    
-
-void r_cnjg(complex *r, complex *z) {
-    r->r = z->r;
-    r->i = -(z->i);
-}

diff --git a/blas/f2c/srotm.c b/blas/f2c/srotm.c
index bd5944a..410dd87 100644
--- a/blas/f2c/srotm.c
+++ b/blas/f2c/srotm.c

@@ -1,216 +1,212 @@
 /* srotm.f -- translated by f2c (version 20100827).
    You must link the resulting object file with libf2c:
-	on Microsoft Windows system, link with libf2c.lib;
-	on Linux or Unix systems, link with .../path/to/libf2c.a -lm
-	or, if you install libf2c.a in a standard place, with -lf2c -lm
-	-- in that order, at the end of the command line, as in
-		cc *.o -lf2c -lm
-	Source for libf2c is in /netlib/f2c/libf2c.zip, e.g.,
+        on Microsoft Windows system, link with libf2c.lib;
+        on Linux or Unix systems, link with .../path/to/libf2c.a -lm
+        or, if you install libf2c.a in a standard place, with -lf2c -lm
+        -- in that order, at the end of the command line, as in
+                cc *.o -lf2c -lm
+        Source for libf2c is in /netlib/f2c/libf2c.zip, e.g.,
 
-		http://www.netlib.org/f2c/libf2c.zip
+                http://www.netlib.org/f2c/libf2c.zip
 */
 
 #include "datatypes.h"
 
-/* Subroutine */ int srotm_(integer *n, real *sx, integer *incx, real *sy, 
-	integer *incy, real *sparam)
-{
-    /* Initialized data */
+/* Subroutine */ void srotm_(integer *n, real *sx, integer *incx, real *sy, integer *incy, real *sparam) {
+  /* Initialized data */
 
-    static real zero = 0.f;
-    static real two = 2.f;
+  static real zero = 0.f;
+  static real two = 2.f;
 
-    /* System generated locals */
-    integer i__1, i__2;
+  /* System generated locals */
+  integer i__1, i__2;
 
-    /* Local variables */
-    integer i__;
-    real w, z__;
-    integer kx, ky;
-    real sh11, sh12, sh21, sh22, sflag;
-    integer nsteps;
+  /* Local variables */
+  integer i__;
+  real w, z__;
+  integer kx, ky;
+  real sh11, sh12, sh21, sh22, sflag;
+  integer nsteps;
 
-/*     .. Scalar Arguments .. */
-/*     .. */
-/*     .. Array Arguments .. */
-/*     .. */
+  /*     .. Scalar Arguments .. */
+  /*     .. */
+  /*     .. Array Arguments .. */
+  /*     .. */
 
-/*  Purpose */
-/*  ======= */
+  /*  Purpose */
+  /*  ======= */
 
-/*     APPLY THE MODIFIED GIVENS TRANSFORMATION, H, TO THE 2 BY N MATRIX */
+  /*     APPLY THE MODIFIED GIVENS TRANSFORMATION, H, TO THE 2 BY N MATRIX */
 
-/*     (SX**T) , WHERE **T INDICATES TRANSPOSE. THE ELEMENTS OF SX ARE IN */
-/*     (DX**T) */
+  /*     (SX**T) , WHERE **T INDICATES TRANSPOSE. THE ELEMENTS OF SX ARE IN */
+  /*     (DX**T) */
 
-/*     SX(LX+I*INCX), I = 0 TO N-1, WHERE LX = 1 IF INCX .GE. 0, ELSE */
-/*     LX = (-INCX)*N, AND SIMILARLY FOR SY USING USING LY AND INCY. */
-/*     WITH SPARAM(1)=SFLAG, H HAS ONE OF THE FOLLOWING FORMS.. */
+  /*     SX(LX+I*INCX), I = 0 TO N-1, WHERE LX = 1 IF INCX .GE. 0, ELSE */
+  /*     LX = (-INCX)*N, AND SIMILARLY FOR SY USING USING LY AND INCY. */
+  /*     WITH SPARAM(1)=SFLAG, H HAS ONE OF THE FOLLOWING FORMS.. */
 
-/*     SFLAG=-1.E0     SFLAG=0.E0        SFLAG=1.E0     SFLAG=-2.E0 */
+  /*     SFLAG=-1.E0     SFLAG=0.E0        SFLAG=1.E0     SFLAG=-2.E0 */
 
-/*       (SH11  SH12)    (1.E0  SH12)    (SH11  1.E0)    (1.E0  0.E0) */
-/*     H=(          )    (          )    (          )    (          ) */
-/*       (SH21  SH22),   (SH21  1.E0),   (-1.E0 SH22),   (0.E0  1.E0). */
-/*     SEE  SROTMG FOR A DESCRIPTION OF DATA STORAGE IN SPARAM. */
+  /*       (SH11  SH12)    (1.E0  SH12)    (SH11  1.E0)    (1.E0  0.E0) */
+  /*     H=(          )    (          )    (          )    (          ) */
+  /*       (SH21  SH22),   (SH21  1.E0),   (-1.E0 SH22),   (0.E0  1.E0). */
+  /*     SEE  SROTMG FOR A DESCRIPTION OF DATA STORAGE IN SPARAM. */
 
+  /*  Arguments */
+  /*  ========= */
 
-/*  Arguments */
-/*  ========= */
+  /*  N      (input) INTEGER */
+  /*         number of elements in input vector(s) */
 
-/*  N      (input) INTEGER */
-/*         number of elements in input vector(s) */
+  /*  SX     (input/output) REAL array, dimension N */
+  /*         double precision vector with N elements */
 
-/*  SX     (input/output) REAL array, dimension N */
-/*         double precision vector with N elements */
+  /*  INCX   (input) INTEGER */
+  /*         storage spacing between elements of SX */
 
-/*  INCX   (input) INTEGER */
-/*         storage spacing between elements of SX */
+  /*  SY     (input/output) REAL array, dimension N */
+  /*         double precision vector with N elements */
 
-/*  SY     (input/output) REAL array, dimension N */
-/*         double precision vector with N elements */
+  /*  INCY   (input) INTEGER */
+  /*         storage spacing between elements of SY */
 
-/*  INCY   (input) INTEGER */
-/*         storage spacing between elements of SY */
+  /*  SPARAM (input/output)  REAL array, dimension 5 */
+  /*     SPARAM(1)=SFLAG */
+  /*     SPARAM(2)=SH11 */
+  /*     SPARAM(3)=SH21 */
+  /*     SPARAM(4)=SH12 */
+  /*     SPARAM(5)=SH22 */
 
-/*  SPARAM (input/output)  REAL array, dimension 5 */
-/*     SPARAM(1)=SFLAG */
-/*     SPARAM(2)=SH11 */
-/*     SPARAM(3)=SH21 */
-/*     SPARAM(4)=SH12 */
-/*     SPARAM(5)=SH22 */
+  /*  ===================================================================== */
 
-/*  ===================================================================== */
+  /*     .. Local Scalars .. */
+  /*     .. */
+  /*     .. Data statements .. */
+  /* Parameter adjustments */
+  --sparam;
+  --sy;
+  --sx;
 
-/*     .. Local Scalars .. */
-/*     .. */
-/*     .. Data statements .. */
-    /* Parameter adjustments */
-    --sparam;
-    --sy;
-    --sx;
+  /* Function Body */
+  /*     .. */
 
-    /* Function Body */
-/*     .. */
+  sflag = sparam[1];
+  if (*n <= 0 || sflag + two == zero) {
+    goto L140;
+  }
+  if (!(*incx == *incy && *incx > 0)) {
+    goto L70;
+  }
 
-    sflag = sparam[1];
-    if (*n <= 0 || sflag + two == zero) {
-	goto L140;
-    }
-    if (! (*incx == *incy && *incx > 0)) {
-	goto L70;
-    }
-
-    nsteps = *n * *incx;
-    if (sflag < 0.f) {
-	goto L50;
-    } else if (sflag == 0) {
-	goto L10;
-    } else {
-	goto L30;
-    }
+  nsteps = *n * *incx;
+  if (sflag < 0.f) {
+    goto L50;
+  } else if (sflag == 0) {
+    goto L10;
+  } else {
+    goto L30;
+  }
 L10:
-    sh12 = sparam[4];
-    sh21 = sparam[3];
-    i__1 = nsteps;
-    i__2 = *incx;
-    for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) {
-	w = sx[i__];
-	z__ = sy[i__];
-	sx[i__] = w + z__ * sh12;
-	sy[i__] = w * sh21 + z__;
-/* L20: */
-    }
-    goto L140;
+  sh12 = sparam[4];
+  sh21 = sparam[3];
+  i__1 = nsteps;
+  i__2 = *incx;
+  for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) {
+    w = sx[i__];
+    z__ = sy[i__];
+    sx[i__] = w + z__ * sh12;
+    sy[i__] = w * sh21 + z__;
+    /* L20: */
+  }
+  goto L140;
 L30:
-    sh11 = sparam[2];
-    sh22 = sparam[5];
-    i__2 = nsteps;
-    i__1 = *incx;
-    for (i__ = 1; i__1 < 0 ? i__ >= i__2 : i__ <= i__2; i__ += i__1) {
-	w = sx[i__];
-	z__ = sy[i__];
-	sx[i__] = w * sh11 + z__;
-	sy[i__] = -w + sh22 * z__;
-/* L40: */
-    }
-    goto L140;
+  sh11 = sparam[2];
+  sh22 = sparam[5];
+  i__2 = nsteps;
+  i__1 = *incx;
+  for (i__ = 1; i__1 < 0 ? i__ >= i__2 : i__ <= i__2; i__ += i__1) {
+    w = sx[i__];
+    z__ = sy[i__];
+    sx[i__] = w * sh11 + z__;
+    sy[i__] = -w + sh22 * z__;
+    /* L40: */
+  }
+  goto L140;
 L50:
-    sh11 = sparam[2];
-    sh12 = sparam[4];
-    sh21 = sparam[3];
-    sh22 = sparam[5];
-    i__1 = nsteps;
-    i__2 = *incx;
-    for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) {
-	w = sx[i__];
-	z__ = sy[i__];
-	sx[i__] = w * sh11 + z__ * sh12;
-	sy[i__] = w * sh21 + z__ * sh22;
-/* L60: */
-    }
-    goto L140;
+  sh11 = sparam[2];
+  sh12 = sparam[4];
+  sh21 = sparam[3];
+  sh22 = sparam[5];
+  i__1 = nsteps;
+  i__2 = *incx;
+  for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) {
+    w = sx[i__];
+    z__ = sy[i__];
+    sx[i__] = w * sh11 + z__ * sh12;
+    sy[i__] = w * sh21 + z__ * sh22;
+    /* L60: */
+  }
+  goto L140;
 L70:
-    kx = 1;
-    ky = 1;
-    if (*incx < 0) {
-	kx = (1 - *n) * *incx + 1;
-    }
-    if (*incy < 0) {
-	ky = (1 - *n) * *incy + 1;
-    }
+  kx = 1;
+  ky = 1;
+  if (*incx < 0) {
+    kx = (1 - *n) * *incx + 1;
+  }
+  if (*incy < 0) {
+    ky = (1 - *n) * *incy + 1;
+  }
 
-    if (sflag < 0.f) {
-	goto L120;
-    } else if (sflag == 0) {
-	goto L80;
-    } else {
-	goto L100;
-    }
+  if (sflag < 0.f) {
+    goto L120;
+  } else if (sflag == 0) {
+    goto L80;
+  } else {
+    goto L100;
+  }
 L80:
-    sh12 = sparam[4];
-    sh21 = sparam[3];
-    i__2 = *n;
-    for (i__ = 1; i__ <= i__2; ++i__) {
-	w = sx[kx];
-	z__ = sy[ky];
-	sx[kx] = w + z__ * sh12;
-	sy[ky] = w * sh21 + z__;
-	kx += *incx;
-	ky += *incy;
-/* L90: */
-    }
-    goto L140;
+  sh12 = sparam[4];
+  sh21 = sparam[3];
+  i__2 = *n;
+  for (i__ = 1; i__ <= i__2; ++i__) {
+    w = sx[kx];
+    z__ = sy[ky];
+    sx[kx] = w + z__ * sh12;
+    sy[ky] = w * sh21 + z__;
+    kx += *incx;
+    ky += *incy;
+    /* L90: */
+  }
+  goto L140;
 L100:
-    sh11 = sparam[2];
-    sh22 = sparam[5];
-    i__2 = *n;
-    for (i__ = 1; i__ <= i__2; ++i__) {
-	w = sx[kx];
-	z__ = sy[ky];
-	sx[kx] = w * sh11 + z__;
-	sy[ky] = -w + sh22 * z__;
-	kx += *incx;
-	ky += *incy;
-/* L110: */
-    }
-    goto L140;
+  sh11 = sparam[2];
+  sh22 = sparam[5];
+  i__2 = *n;
+  for (i__ = 1; i__ <= i__2; ++i__) {
+    w = sx[kx];
+    z__ = sy[ky];
+    sx[kx] = w * sh11 + z__;
+    sy[ky] = -w + sh22 * z__;
+    kx += *incx;
+    ky += *incy;
+    /* L110: */
+  }
+  goto L140;
 L120:
-    sh11 = sparam[2];
-    sh12 = sparam[4];
-    sh21 = sparam[3];
-    sh22 = sparam[5];
-    i__2 = *n;
-    for (i__ = 1; i__ <= i__2; ++i__) {
-	w = sx[kx];
-	z__ = sy[ky];
-	sx[kx] = w * sh11 + z__ * sh12;
-	sy[ky] = w * sh21 + z__ * sh22;
-	kx += *incx;
-	ky += *incy;
-/* L130: */
-    }
+  sh11 = sparam[2];
+  sh12 = sparam[4];
+  sh21 = sparam[3];
+  sh22 = sparam[5];
+  i__2 = *n;
+  for (i__ = 1; i__ <= i__2; ++i__) {
+    w = sx[kx];
+    z__ = sy[ky];
+    sx[kx] = w * sh11 + z__ * sh12;
+    sy[ky] = w * sh21 + z__ * sh22;
+    kx += *incx;
+    ky += *incy;
+    /* L130: */
+  }
 L140:
-    return 0;
+  return;
 } /* srotm_ */
-

diff --git a/blas/f2c/srotmg.c b/blas/f2c/srotmg.c
index 75f789f..3a0f9f6 100644
--- a/blas/f2c/srotmg.c
+++ b/blas/f2c/srotmg.c

@@ -1,295 +1,293 @@
 /* srotmg.f -- translated by f2c (version 20100827).
    You must link the resulting object file with libf2c:
-	on Microsoft Windows system, link with libf2c.lib;
-	on Linux or Unix systems, link with .../path/to/libf2c.a -lm
-	or, if you install libf2c.a in a standard place, with -lf2c -lm
-	-- in that order, at the end of the command line, as in
-		cc *.o -lf2c -lm
-	Source for libf2c is in /netlib/f2c/libf2c.zip, e.g.,
+        on Microsoft Windows system, link with libf2c.lib;
+        on Linux or Unix systems, link with .../path/to/libf2c.a -lm
+        or, if you install libf2c.a in a standard place, with -lf2c -lm
+        -- in that order, at the end of the command line, as in
+                cc *.o -lf2c -lm
+        Source for libf2c is in /netlib/f2c/libf2c.zip, e.g.,
 
-		http://www.netlib.org/f2c/libf2c.zip
+                http://www.netlib.org/f2c/libf2c.zip
 */
 
 #include "datatypes.h"
 
-/* Subroutine */ int srotmg_(real *sd1, real *sd2, real *sx1, real *sy1, real 
-	*sparam)
-{
-    /* Initialized data */
+/* Subroutine */ void srotmg_(real *sd1, real *sd2, real *sx1, real *sy1, real *sparam) {
+  /* Initialized data */
 
-    static real zero = 0.f;
-    static real one = 1.f;
-    static real two = 2.f;
-    static real gam = 4096.f;
-    static real gamsq = 16777200.f;
-    static real rgamsq = 5.96046e-8f;
+  static real zero = 0.f;
+  static real one = 1.f;
+  static real two = 2.f;
+  static real gam = 4096.f;
+  static real gamsq = 16777200.f;
+  static real rgamsq = 5.96046e-8f;
 
-    /* Format strings */
-    static char fmt_120[] = "";
-    static char fmt_150[] = "";
-    static char fmt_180[] = "";
-    static char fmt_210[] = "";
+  /* Format strings */
+  static char fmt_120[] = "";
+  static char fmt_150[] = "";
+  static char fmt_180[] = "";
+  static char fmt_210[] = "";
 
-    /* System generated locals */
-    real r__1;
+  /* System generated locals */
+  real r__1;
 
-    /* Local variables */
-    real su, sp1, sp2, sq1, sq2, sh11, sh12, sh21, sh22;
-    integer igo;
-    real sflag, stemp;
+  /* Local variables */
+  real su, sp1, sp2, sq1, sq2, sh11, sh12, sh21, sh22;
+  integer igo;
+  real sflag, stemp;
 
-    /* Assigned format variables */
-    static char *igo_fmt;
+  /* Assigned format variables */
+  static char *igo_fmt;
+  (void)igo_fmt;
 
-/*     .. Scalar Arguments .. */
-/*     .. */
-/*     .. Array Arguments .. */
-/*     .. */
+  /*     .. Scalar Arguments .. */
+  /*     .. */
+  /*     .. Array Arguments .. */
+  /*     .. */
 
-/*  Purpose */
-/*  ======= */
+  /*  Purpose */
+  /*  ======= */
 
-/*     CONSTRUCT THE MODIFIED GIVENS TRANSFORMATION MATRIX H WHICH ZEROS */
-/*     THE SECOND COMPONENT OF THE 2-VECTOR  (SQRT(SD1)*SX1,SQRT(SD2)* */
-/*     SY2)**T. */
-/*     WITH SPARAM(1)=SFLAG, H HAS ONE OF THE FOLLOWING FORMS.. */
+  /*     CONSTRUCT THE MODIFIED GIVENS TRANSFORMATION MATRIX H WHICH ZEROS */
+  /*     THE SECOND COMPONENT OF THE 2-VECTOR  (SQRT(SD1)*SX1,SQRT(SD2)* */
+  /*     SY2)**T. */
+  /*     WITH SPARAM(1)=SFLAG, H HAS ONE OF THE FOLLOWING FORMS.. */
 
-/*     SFLAG=-1.E0     SFLAG=0.E0        SFLAG=1.E0     SFLAG=-2.E0 */
+  /*     SFLAG=-1.E0     SFLAG=0.E0        SFLAG=1.E0     SFLAG=-2.E0 */
 
-/*       (SH11  SH12)    (1.E0  SH12)    (SH11  1.E0)    (1.E0  0.E0) */
-/*     H=(          )    (          )    (          )    (          ) */
-/*       (SH21  SH22),   (SH21  1.E0),   (-1.E0 SH22),   (0.E0  1.E0). */
-/*     LOCATIONS 2-4 OF SPARAM CONTAIN SH11,SH21,SH12, AND SH22 */
-/*     RESPECTIVELY. (VALUES OF 1.E0, -1.E0, OR 0.E0 IMPLIED BY THE */
-/*     VALUE OF SPARAM(1) ARE NOT STORED IN SPARAM.) */
+  /*       (SH11  SH12)    (1.E0  SH12)    (SH11  1.E0)    (1.E0  0.E0) */
+  /*     H=(          )    (          )    (          )    (          ) */
+  /*       (SH21  SH22),   (SH21  1.E0),   (-1.E0 SH22),   (0.E0  1.E0). */
+  /*     LOCATIONS 2-4 OF SPARAM CONTAIN SH11,SH21,SH12, AND SH22 */
+  /*     RESPECTIVELY. (VALUES OF 1.E0, -1.E0, OR 0.E0 IMPLIED BY THE */
+  /*     VALUE OF SPARAM(1) ARE NOT STORED IN SPARAM.) */
 
-/*     THE VALUES OF GAMSQ AND RGAMSQ SET IN THE DATA STATEMENT MAY BE */
-/*     INEXACT.  THIS IS OK AS THEY ARE ONLY USED FOR TESTING THE SIZE */
-/*     OF SD1 AND SD2.  ALL ACTUAL SCALING OF DATA IS DONE USING GAM. */
+  /*     THE VALUES OF GAMSQ AND RGAMSQ SET IN THE DATA STATEMENT MAY BE */
+  /*     INEXACT.  THIS IS OK AS THEY ARE ONLY USED FOR TESTING THE SIZE */
+  /*     OF SD1 AND SD2.  ALL ACTUAL SCALING OF DATA IS DONE USING GAM. */
 
+  /*  Arguments */
+  /*  ========= */
 
-/*  Arguments */
-/*  ========= */
+  /*  SD1    (input/output) REAL */
 
+  /*  SD2    (input/output) REAL */
 
-/*  SD1    (input/output) REAL */
+  /*  SX1    (input/output) REAL */
 
-/*  SD2    (input/output) REAL */
+  /*  SY1    (input) REAL */
 
-/*  SX1    (input/output) REAL */
+  /*  SPARAM (input/output)  REAL array, dimension 5 */
+  /*     SPARAM(1)=SFLAG */
+  /*     SPARAM(2)=SH11 */
+  /*     SPARAM(3)=SH21 */
+  /*     SPARAM(4)=SH12 */
+  /*     SPARAM(5)=SH22 */
 
-/*  SY1    (input) REAL */
+  /*  ===================================================================== */
 
+  /*     .. Local Scalars .. */
+  /*     .. */
+  /*     .. Intrinsic Functions .. */
+  /*     .. */
+  /*     .. Data statements .. */
 
-/*  SPARAM (input/output)  REAL array, dimension 5 */
-/*     SPARAM(1)=SFLAG */
-/*     SPARAM(2)=SH11 */
-/*     SPARAM(3)=SH21 */
-/*     SPARAM(4)=SH12 */
-/*     SPARAM(5)=SH22 */
+  /* Parameter adjustments */
+  --sparam;
 
-/*  ===================================================================== */
-
-/*     .. Local Scalars .. */
-/*     .. */
-/*     .. Intrinsic Functions .. */
-/*     .. */
-/*     .. Data statements .. */
-
-    /* Parameter adjustments */
-    --sparam;
-
-    /* Function Body */
-/*     .. */
-    if (! (*sd1 < zero)) {
-	goto L10;
-    }
-/*       GO ZERO-H-D-AND-SX1.. */
-    goto L60;
+  /* Function Body */
+  /*     .. */
+  if (!(*sd1 < zero)) {
+    goto L10;
+  }
+  /*       GO ZERO-H-D-AND-SX1.. */
+  goto L60;
 L10:
-/*     CASE-SD1-NONNEGATIVE */
-    sp2 = *sd2 * *sy1;
-    if (! (sp2 == zero)) {
-	goto L20;
-    }
-    sflag = -two;
-    goto L260;
+  /*     CASE-SD1-NONNEGATIVE */
+  sp2 = *sd2 * *sy1;
+  if (!(sp2 == zero)) {
+    goto L20;
+  }
+  sflag = -two;
+  goto L260;
 /*     REGULAR-CASE.. */
 L20:
-    sp1 = *sd1 * *sx1;
-    sq2 = sp2 * *sy1;
-    sq1 = sp1 * *sx1;
+  sp1 = *sd1 * *sx1;
+  sq2 = sp2 * *sy1;
+  sq1 = sp1 * *sx1;
 
-    if (! (dabs(sq1) > dabs(sq2))) {
-	goto L40;
-    }
-    sh21 = -(*sy1) / *sx1;
-    sh12 = sp2 / sp1;
+  if (!(dabs(sq1) > dabs(sq2))) {
+    goto L40;
+  }
+  sh21 = -(*sy1) / *sx1;
+  sh12 = sp2 / sp1;
 
-    su = one - sh12 * sh21;
+  su = one - sh12 * sh21;
 
-    if (! (su <= zero)) {
-	goto L30;
-    }
-/*         GO ZERO-H-D-AND-SX1.. */
-    goto L60;
+  if (!(su <= zero)) {
+    goto L30;
+  }
+  /*         GO ZERO-H-D-AND-SX1.. */
+  goto L60;
 L30:
-    sflag = zero;
-    *sd1 /= su;
-    *sd2 /= su;
-    *sx1 *= su;
-/*         GO SCALE-CHECK.. */
-    goto L100;
+  sflag = zero;
+  *sd1 /= su;
+  *sd2 /= su;
+  *sx1 *= su;
+  /*         GO SCALE-CHECK.. */
+  goto L100;
 L40:
-    if (! (sq2 < zero)) {
-	goto L50;
-    }
-/*         GO ZERO-H-D-AND-SX1.. */
-    goto L60;
+  if (!(sq2 < zero)) {
+    goto L50;
+  }
+  /*         GO ZERO-H-D-AND-SX1.. */
+  goto L60;
 L50:
-    sflag = one;
-    sh11 = sp1 / sp2;
-    sh22 = *sx1 / *sy1;
-    su = one + sh11 * sh22;
-    stemp = *sd2 / su;
-    *sd2 = *sd1 / su;
-    *sd1 = stemp;
-    *sx1 = *sy1 * su;
-/*         GO SCALE-CHECK */
-    goto L100;
+  sflag = one;
+  sh11 = sp1 / sp2;
+  sh22 = *sx1 / *sy1;
+  su = one + sh11 * sh22;
+  stemp = *sd2 / su;
+  *sd2 = *sd1 / su;
+  *sd1 = stemp;
+  *sx1 = *sy1 * su;
+  /*         GO SCALE-CHECK */
+  goto L100;
 /*     PROCEDURE..ZERO-H-D-AND-SX1.. */
 L60:
-    sflag = -one;
-    sh11 = zero;
-    sh12 = zero;
-    sh21 = zero;
-    sh22 = zero;
+  sflag = -one;
+  sh11 = zero;
+  sh12 = zero;
+  sh21 = zero;
+  sh22 = zero;
 
-    *sd1 = zero;
-    *sd2 = zero;
-    *sx1 = zero;
-/*         RETURN.. */
-    goto L220;
+  *sd1 = zero;
+  *sd2 = zero;
+  *sx1 = zero;
+  /*         RETURN.. */
+  goto L220;
 /*     PROCEDURE..FIX-H.. */
 L70:
-    if (! (sflag >= zero)) {
-	goto L90;
-    }
-
-    if (! (sflag == zero)) {
-	goto L80;
-    }
-    sh11 = one;
-    sh22 = one;
-    sflag = -one;
+  if (!(sflag >= zero)) {
     goto L90;
+  }
+
+  if (!(sflag == zero)) {
+    goto L80;
+  }
+  sh11 = one;
+  sh22 = one;
+  sflag = -one;
+  goto L90;
 L80:
-    sh21 = -one;
-    sh12 = one;
-    sflag = -one;
+  sh21 = -one;
+  sh12 = one;
+  sflag = -one;
 L90:
-    switch (igo) {
-	case 0: goto L120;
-	case 1: goto L150;
-	case 2: goto L180;
-	case 3: goto L210;
-    }
+  switch (igo) {
+    case 0:
+      goto L120;
+    case 1:
+      goto L150;
+    case 2:
+      goto L180;
+    case 3:
+      goto L210;
+  }
 /*     PROCEDURE..SCALE-CHECK */
 L100:
 L110:
-    if (! (*sd1 <= rgamsq)) {
-	goto L130;
-    }
-    if (*sd1 == zero) {
-	goto L160;
-    }
-    igo = 0;
-    igo_fmt = fmt_120;
-/*              FIX-H.. */
-    goto L70;
+  if (!(*sd1 <= rgamsq)) {
+    goto L130;
+  }
+  if (*sd1 == zero) {
+    goto L160;
+  }
+  igo = 0;
+  igo_fmt = fmt_120;
+  /*              FIX-H.. */
+  goto L70;
 L120:
-/* Computing 2nd power */
-    r__1 = gam;
-    *sd1 *= r__1 * r__1;
-    *sx1 /= gam;
-    sh11 /= gam;
-    sh12 /= gam;
-    goto L110;
+  /* Computing 2nd power */
+  r__1 = gam;
+  *sd1 *= r__1 * r__1;
+  *sx1 /= gam;
+  sh11 /= gam;
+  sh12 /= gam;
+  goto L110;
 L130:
 L140:
-    if (! (*sd1 >= gamsq)) {
-	goto L160;
-    }
-    igo = 1;
-    igo_fmt = fmt_150;
-/*              FIX-H.. */
-    goto L70;
+  if (!(*sd1 >= gamsq)) {
+    goto L160;
+  }
+  igo = 1;
+  igo_fmt = fmt_150;
+  /*              FIX-H.. */
+  goto L70;
 L150:
-/* Computing 2nd power */
-    r__1 = gam;
-    *sd1 /= r__1 * r__1;
-    *sx1 *= gam;
-    sh11 *= gam;
-    sh12 *= gam;
-    goto L140;
+  /* Computing 2nd power */
+  r__1 = gam;
+  *sd1 /= r__1 * r__1;
+  *sx1 *= gam;
+  sh11 *= gam;
+  sh12 *= gam;
+  goto L140;
 L160:
 L170:
-    if (! (dabs(*sd2) <= rgamsq)) {
-	goto L190;
-    }
-    if (*sd2 == zero) {
-	goto L220;
-    }
-    igo = 2;
-    igo_fmt = fmt_180;
-/*              FIX-H.. */
-    goto L70;
+  if (!(dabs(*sd2) <= rgamsq)) {
+    goto L190;
+  }
+  if (*sd2 == zero) {
+    goto L220;
+  }
+  igo = 2;
+  igo_fmt = fmt_180;
+  /*              FIX-H.. */
+  goto L70;
 L180:
-/* Computing 2nd power */
-    r__1 = gam;
-    *sd2 *= r__1 * r__1;
-    sh21 /= gam;
-    sh22 /= gam;
-    goto L170;
+  /* Computing 2nd power */
+  r__1 = gam;
+  *sd2 *= r__1 * r__1;
+  sh21 /= gam;
+  sh22 /= gam;
+  goto L170;
 L190:
 L200:
-    if (! (dabs(*sd2) >= gamsq)) {
-	goto L220;
-    }
-    igo = 3;
-    igo_fmt = fmt_210;
-/*              FIX-H.. */
-    goto L70;
+  if (!(dabs(*sd2) >= gamsq)) {
+    goto L220;
+  }
+  igo = 3;
+  igo_fmt = fmt_210;
+  /*              FIX-H.. */
+  goto L70;
 L210:
-/* Computing 2nd power */
-    r__1 = gam;
-    *sd2 /= r__1 * r__1;
-    sh21 *= gam;
-    sh22 *= gam;
-    goto L200;
+  /* Computing 2nd power */
+  r__1 = gam;
+  *sd2 /= r__1 * r__1;
+  sh21 *= gam;
+  sh22 *= gam;
+  goto L200;
 L220:
-    if (sflag < 0.f) {
-	goto L250;
-    } else if (sflag == 0) {
-	goto L230;
-    } else {
-	goto L240;
-    }
+  if (sflag < 0.f) {
+    goto L250;
+  } else if (sflag == 0) {
+    goto L230;
+  } else {
+    goto L240;
+  }
 L230:
-    sparam[3] = sh21;
-    sparam[4] = sh12;
-    goto L260;
+  sparam[3] = sh21;
+  sparam[4] = sh12;
+  goto L260;
 L240:
-    sparam[2] = sh11;
-    sparam[5] = sh22;
-    goto L260;
+  sparam[2] = sh11;
+  sparam[5] = sh22;
+  goto L260;
 L250:
-    sparam[2] = sh11;
-    sparam[3] = sh21;
-    sparam[4] = sh12;
-    sparam[5] = sh22;
+  sparam[2] = sh11;
+  sparam[3] = sh21;
+  sparam[4] = sh12;
+  sparam[5] = sh22;
 L260:
-    sparam[1] = sflag;
-    return 0;
+  sparam[1] = sflag;
 } /* srotmg_ */
-

diff --git a/blas/f2c/ssbmv.c b/blas/f2c/ssbmv.c
index 8599325..ac1d70a 100644
--- a/blas/f2c/ssbmv.c
+++ b/blas/f2c/ssbmv.c

@@ -1,368 +1,359 @@
 /* ssbmv.f -- translated by f2c (version 20100827).
    You must link the resulting object file with libf2c:
-	on Microsoft Windows system, link with libf2c.lib;
-	on Linux or Unix systems, link with .../path/to/libf2c.a -lm
-	or, if you install libf2c.a in a standard place, with -lf2c -lm
-	-- in that order, at the end of the command line, as in
-		cc *.o -lf2c -lm
-	Source for libf2c is in /netlib/f2c/libf2c.zip, e.g.,
+        on Microsoft Windows system, link with libf2c.lib;
+        on Linux or Unix systems, link with .../path/to/libf2c.a -lm
+        or, if you install libf2c.a in a standard place, with -lf2c -lm
+        -- in that order, at the end of the command line, as in
+                cc *.o -lf2c -lm
+        Source for libf2c is in /netlib/f2c/libf2c.zip, e.g.,
 
-		http://www.netlib.org/f2c/libf2c.zip
+                http://www.netlib.org/f2c/libf2c.zip
 */
 
 #include "datatypes.h"
 
-/* Subroutine */ int ssbmv_(char *uplo, integer *n, integer *k, real *alpha, 
-	real *a, integer *lda, real *x, integer *incx, real *beta, real *y, 
-	integer *incy, ftnlen uplo_len)
-{
-    /* System generated locals */
-    integer a_dim1, a_offset, i__1, i__2, i__3, i__4;
+/* Subroutine */ void ssbmv_(char *uplo, integer *n, integer *k, real *alpha, real *a, integer *lda, real *x,
+                             integer *incx, real *beta, real *y, integer *incy) {
+  /* System generated locals */
+  integer a_dim1, a_offset, i__1, i__2, i__3, i__4;
 
-    /* Local variables */
-    integer i__, j, l, ix, iy, jx, jy, kx, ky, info;
-    real temp1, temp2;
-    extern logical lsame_(char *, char *, ftnlen, ftnlen);
-    integer kplus1;
-    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
+  /* Local variables */
+  integer i__, j, l, ix, iy, jx, jy, kx, ky, info;
+  real temp1, temp2;
+  extern logical lsame_(char *, char *);
+  integer kplus1;
+  extern /* Subroutine */ void xerbla_(const char *, integer *);
 
-/*     .. Scalar Arguments .. */
-/*     .. */
-/*     .. Array Arguments .. */
-/*     .. */
+  /*     .. Scalar Arguments .. */
+  /*     .. */
+  /*     .. Array Arguments .. */
+  /*     .. */
 
-/*  Purpose */
-/*  ======= */
+  /*  Purpose */
+  /*  ======= */
 
-/*  SSBMV  performs the matrix-vector  operation */
+  /*  SSBMV  performs the matrix-vector  operation */
 
-/*     y := alpha*A*x + beta*y, */
+  /*     y := alpha*A*x + beta*y, */
 
-/*  where alpha and beta are scalars, x and y are n element vectors and */
-/*  A is an n by n symmetric band matrix, with k super-diagonals. */
+  /*  where alpha and beta are scalars, x and y are n element vectors and */
+  /*  A is an n by n symmetric band matrix, with k super-diagonals. */
 
-/*  Arguments */
-/*  ========== */
+  /*  Arguments */
+  /*  ========== */
 
-/*  UPLO   - CHARACTER*1. */
-/*           On entry, UPLO specifies whether the upper or lower */
-/*           triangular part of the band matrix A is being supplied as */
-/*           follows: */
+  /*  UPLO   - CHARACTER*1. */
+  /*           On entry, UPLO specifies whether the upper or lower */
+  /*           triangular part of the band matrix A is being supplied as */
+  /*           follows: */
 
-/*              UPLO = 'U' or 'u'   The upper triangular part of A is */
-/*                                  being supplied. */
+  /*              UPLO = 'U' or 'u'   The upper triangular part of A is */
+  /*                                  being supplied. */
 
-/*              UPLO = 'L' or 'l'   The lower triangular part of A is */
-/*                                  being supplied. */
+  /*              UPLO = 'L' or 'l'   The lower triangular part of A is */
+  /*                                  being supplied. */
 
-/*           Unchanged on exit. */
+  /*           Unchanged on exit. */
 
-/*  N      - INTEGER. */
-/*           On entry, N specifies the order of the matrix A. */
-/*           N must be at least zero. */
-/*           Unchanged on exit. */
+  /*  N      - INTEGER. */
+  /*           On entry, N specifies the order of the matrix A. */
+  /*           N must be at least zero. */
+  /*           Unchanged on exit. */
 
-/*  K      - INTEGER. */
-/*           On entry, K specifies the number of super-diagonals of the */
-/*           matrix A. K must satisfy  0 .le. K. */
-/*           Unchanged on exit. */
+  /*  K      - INTEGER. */
+  /*           On entry, K specifies the number of super-diagonals of the */
+  /*           matrix A. K must satisfy  0 .le. K. */
+  /*           Unchanged on exit. */
 
-/*  ALPHA  - REAL            . */
-/*           On entry, ALPHA specifies the scalar alpha. */
-/*           Unchanged on exit. */
+  /*  ALPHA  - REAL            . */
+  /*           On entry, ALPHA specifies the scalar alpha. */
+  /*           Unchanged on exit. */
 
-/*  A      - REAL             array of DIMENSION ( LDA, n ). */
-/*           Before entry with UPLO = 'U' or 'u', the leading ( k + 1 ) */
-/*           by n part of the array A must contain the upper triangular */
-/*           band part of the symmetric matrix, supplied column by */
-/*           column, with the leading diagonal of the matrix in row */
-/*           ( k + 1 ) of the array, the first super-diagonal starting at */
-/*           position 2 in row k, and so on. The top left k by k triangle */
-/*           of the array A is not referenced. */
-/*           The following program segment will transfer the upper */
-/*           triangular part of a symmetric band matrix from conventional */
-/*           full matrix storage to band storage: */
+  /*  A      - REAL             array of DIMENSION ( LDA, n ). */
+  /*           Before entry with UPLO = 'U' or 'u', the leading ( k + 1 ) */
+  /*           by n part of the array A must contain the upper triangular */
+  /*           band part of the symmetric matrix, supplied column by */
+  /*           column, with the leading diagonal of the matrix in row */
+  /*           ( k + 1 ) of the array, the first super-diagonal starting at */
+  /*           position 2 in row k, and so on. The top left k by k triangle */
+  /*           of the array A is not referenced. */
+  /*           The following program segment will transfer the upper */
+  /*           triangular part of a symmetric band matrix from conventional */
+  /*           full matrix storage to band storage: */
 
-/*                 DO 20, J = 1, N */
-/*                    M = K + 1 - J */
-/*                    DO 10, I = MAX( 1, J - K ), J */
-/*                       A( M + I, J ) = matrix( I, J ) */
-/*              10    CONTINUE */
-/*              20 CONTINUE */
+  /*                 DO 20, J = 1, N */
+  /*                    M = K + 1 - J */
+  /*                    DO 10, I = MAX( 1, J - K ), J */
+  /*                       A( M + I, J ) = matrix( I, J ) */
+  /*              10    CONTINUE */
+  /*              20 CONTINUE */
 
-/*           Before entry with UPLO = 'L' or 'l', the leading ( k + 1 ) */
-/*           by n part of the array A must contain the lower triangular */
-/*           band part of the symmetric matrix, supplied column by */
-/*           column, with the leading diagonal of the matrix in row 1 of */
-/*           the array, the first sub-diagonal starting at position 1 in */
-/*           row 2, and so on. The bottom right k by k triangle of the */
-/*           array A is not referenced. */
-/*           The following program segment will transfer the lower */
-/*           triangular part of a symmetric band matrix from conventional */
-/*           full matrix storage to band storage: */
+  /*           Before entry with UPLO = 'L' or 'l', the leading ( k + 1 ) */
+  /*           by n part of the array A must contain the lower triangular */
+  /*           band part of the symmetric matrix, supplied column by */
+  /*           column, with the leading diagonal of the matrix in row 1 of */
+  /*           the array, the first sub-diagonal starting at position 1 in */
+  /*           row 2, and so on. The bottom right k by k triangle of the */
+  /*           array A is not referenced. */
+  /*           The following program segment will transfer the lower */
+  /*           triangular part of a symmetric band matrix from conventional */
+  /*           full matrix storage to band storage: */
 
-/*                 DO 20, J = 1, N */
-/*                    M = 1 - J */
-/*                    DO 10, I = J, MIN( N, J + K ) */
-/*                       A( M + I, J ) = matrix( I, J ) */
-/*              10    CONTINUE */
-/*              20 CONTINUE */
+  /*                 DO 20, J = 1, N */
+  /*                    M = 1 - J */
+  /*                    DO 10, I = J, MIN( N, J + K ) */
+  /*                       A( M + I, J ) = matrix( I, J ) */
+  /*              10    CONTINUE */
+  /*              20 CONTINUE */
 
-/*           Unchanged on exit. */
+  /*           Unchanged on exit. */
 
-/*  LDA    - INTEGER. */
-/*           On entry, LDA specifies the first dimension of A as declared */
-/*           in the calling (sub) program. LDA must be at least */
-/*           ( k + 1 ). */
-/*           Unchanged on exit. */
+  /*  LDA    - INTEGER. */
+  /*           On entry, LDA specifies the first dimension of A as declared */
+  /*           in the calling (sub) program. LDA must be at least */
+  /*           ( k + 1 ). */
+  /*           Unchanged on exit. */
 
-/*  X      - REAL             array of DIMENSION at least */
-/*           ( 1 + ( n - 1 )*abs( INCX ) ). */
-/*           Before entry, the incremented array X must contain the */
-/*           vector x. */
-/*           Unchanged on exit. */
+  /*  X      - REAL             array of DIMENSION at least */
+  /*           ( 1 + ( n - 1 )*abs( INCX ) ). */
+  /*           Before entry, the incremented array X must contain the */
+  /*           vector x. */
+  /*           Unchanged on exit. */
 
-/*  INCX   - INTEGER. */
-/*           On entry, INCX specifies the increment for the elements of */
-/*           X. INCX must not be zero. */
-/*           Unchanged on exit. */
+  /*  INCX   - INTEGER. */
+  /*           On entry, INCX specifies the increment for the elements of */
+  /*           X. INCX must not be zero. */
+  /*           Unchanged on exit. */
 
-/*  BETA   - REAL            . */
-/*           On entry, BETA specifies the scalar beta. */
-/*           Unchanged on exit. */
+  /*  BETA   - REAL            . */
+  /*           On entry, BETA specifies the scalar beta. */
+  /*           Unchanged on exit. */
 
-/*  Y      - REAL             array of DIMENSION at least */
-/*           ( 1 + ( n - 1 )*abs( INCY ) ). */
-/*           Before entry, the incremented array Y must contain the */
-/*           vector y. On exit, Y is overwritten by the updated vector y. */
+  /*  Y      - REAL             array of DIMENSION at least */
+  /*           ( 1 + ( n - 1 )*abs( INCY ) ). */
+  /*           Before entry, the incremented array Y must contain the */
+  /*           vector y. On exit, Y is overwritten by the updated vector y. */
 
-/*  INCY   - INTEGER. */
-/*           On entry, INCY specifies the increment for the elements of */
-/*           Y. INCY must not be zero. */
-/*           Unchanged on exit. */
+  /*  INCY   - INTEGER. */
+  /*           On entry, INCY specifies the increment for the elements of */
+  /*           Y. INCY must not be zero. */
+  /*           Unchanged on exit. */
 
-/*  Further Details */
-/*  =============== */
+  /*  Further Details */
+  /*  =============== */
 
-/*  Level 2 Blas routine. */
+  /*  Level 2 Blas routine. */
 
-/*  -- Written on 22-October-1986. */
-/*     Jack Dongarra, Argonne National Lab. */
-/*     Jeremy Du Croz, Nag Central Office. */
-/*     Sven Hammarling, Nag Central Office. */
-/*     Richard Hanson, Sandia National Labs. */
+  /*  -- Written on 22-October-1986. */
+  /*     Jack Dongarra, Argonne National Lab. */
+  /*     Jeremy Du Croz, Nag Central Office. */
+  /*     Sven Hammarling, Nag Central Office. */
+  /*     Richard Hanson, Sandia National Labs. */
 
-/*  ===================================================================== */
+  /*  ===================================================================== */
 
-/*     .. Parameters .. */
-/*     .. */
-/*     .. Local Scalars .. */
-/*     .. */
-/*     .. External Functions .. */
-/*     .. */
-/*     .. External Subroutines .. */
-/*     .. */
-/*     .. Intrinsic Functions .. */
-/*     .. */
+  /*     .. Parameters .. */
+  /*     .. */
+  /*     .. Local Scalars .. */
+  /*     .. */
+  /*     .. External Functions .. */
+  /*     .. */
+  /*     .. External Subroutines .. */
+  /*     .. */
+  /*     .. Intrinsic Functions .. */
+  /*     .. */
 
-/*     Test the input parameters. */
+  /*     Test the input parameters. */
 
-    /* Parameter adjustments */
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1;
-    a -= a_offset;
-    --x;
-    --y;
+  /* Parameter adjustments */
+  a_dim1 = *lda;
+  a_offset = 1 + a_dim1;
+  a -= a_offset;
+  --x;
+  --y;
 
-    /* Function Body */
-    info = 0;
-    if (! lsame_(uplo, "U", (ftnlen)1, (ftnlen)1) && ! lsame_(uplo, "L", (
-	    ftnlen)1, (ftnlen)1)) {
-	info = 1;
-    } else if (*n < 0) {
-	info = 2;
-    } else if (*k < 0) {
-	info = 3;
-    } else if (*lda < *k + 1) {
-	info = 6;
-    } else if (*incx == 0) {
-	info = 8;
-    } else if (*incy == 0) {
-	info = 11;
-    }
-    if (info != 0) {
-	xerbla_("SSBMV ", &info, (ftnlen)6);
-	return 0;
-    }
+  /* Function Body */
+  info = 0;
+  if (!lsame_(uplo, "U") && !lsame_(uplo, "L")) {
+    info = 1;
+  } else if (*n < 0) {
+    info = 2;
+  } else if (*k < 0) {
+    info = 3;
+  } else if (*lda < *k + 1) {
+    info = 6;
+  } else if (*incx == 0) {
+    info = 8;
+  } else if (*incy == 0) {
+    info = 11;
+  }
+  if (info != 0) {
+    xerbla_("SSBMV ", &info);
+    return;
+  }
 
-/*     Quick return if possible. */
+  /*     Quick return if possible. */
 
-    if (*n == 0 || (*alpha == 0.f && *beta == 1.f)) {
-	return 0;
-    }
+  if (*n == 0 || (*alpha == 0.f && *beta == 1.f)) {
+    return;
+  }
 
-/*     Set up the start points in  X  and  Y. */
+  /*     Set up the start points in  X  and  Y. */
 
-    if (*incx > 0) {
-	kx = 1;
+  if (*incx > 0) {
+    kx = 1;
+  } else {
+    kx = 1 - (*n - 1) * *incx;
+  }
+  if (*incy > 0) {
+    ky = 1;
+  } else {
+    ky = 1 - (*n - 1) * *incy;
+  }
+
+  /*     Start the operations. In this version the elements of the array A */
+  /*     are accessed sequentially with one pass through A. */
+
+  /*     First form  y := beta*y. */
+
+  if (*beta != 1.f) {
+    if (*incy == 1) {
+      if (*beta == 0.f) {
+        i__1 = *n;
+        for (i__ = 1; i__ <= i__1; ++i__) {
+          y[i__] = 0.f;
+          /* L10: */
+        }
+      } else {
+        i__1 = *n;
+        for (i__ = 1; i__ <= i__1; ++i__) {
+          y[i__] = *beta * y[i__];
+          /* L20: */
+        }
+      }
     } else {
-	kx = 1 - (*n - 1) * *incx;
+      iy = ky;
+      if (*beta == 0.f) {
+        i__1 = *n;
+        for (i__ = 1; i__ <= i__1; ++i__) {
+          y[iy] = 0.f;
+          iy += *incy;
+          /* L30: */
+        }
+      } else {
+        i__1 = *n;
+        for (i__ = 1; i__ <= i__1; ++i__) {
+          y[iy] = *beta * y[iy];
+          iy += *incy;
+          /* L40: */
+        }
+      }
     }
-    if (*incy > 0) {
-	ky = 1;
+  }
+  if (*alpha == 0.f) {
+    return;
+  }
+  if (lsame_(uplo, "U")) {
+    /*        Form  y  when upper triangle of A is stored. */
+
+    kplus1 = *k + 1;
+    if (*incx == 1 && *incy == 1) {
+      i__1 = *n;
+      for (j = 1; j <= i__1; ++j) {
+        temp1 = *alpha * x[j];
+        temp2 = 0.f;
+        l = kplus1 - j;
+        /* Computing MAX */
+        i__2 = 1, i__3 = j - *k;
+        i__4 = j - 1;
+        for (i__ = max(i__2, i__3); i__ <= i__4; ++i__) {
+          y[i__] += temp1 * a[l + i__ + j * a_dim1];
+          temp2 += a[l + i__ + j * a_dim1] * x[i__];
+          /* L50: */
+        }
+        y[j] = y[j] + temp1 * a[kplus1 + j * a_dim1] + *alpha * temp2;
+        /* L60: */
+      }
     } else {
-	ky = 1 - (*n - 1) * *incy;
+      jx = kx;
+      jy = ky;
+      i__1 = *n;
+      for (j = 1; j <= i__1; ++j) {
+        temp1 = *alpha * x[jx];
+        temp2 = 0.f;
+        ix = kx;
+        iy = ky;
+        l = kplus1 - j;
+        /* Computing MAX */
+        i__4 = 1, i__2 = j - *k;
+        i__3 = j - 1;
+        for (i__ = max(i__4, i__2); i__ <= i__3; ++i__) {
+          y[iy] += temp1 * a[l + i__ + j * a_dim1];
+          temp2 += a[l + i__ + j * a_dim1] * x[ix];
+          ix += *incx;
+          iy += *incy;
+          /* L70: */
+        }
+        y[jy] = y[jy] + temp1 * a[kplus1 + j * a_dim1] + *alpha * temp2;
+        jx += *incx;
+        jy += *incy;
+        if (j > *k) {
+          kx += *incx;
+          ky += *incy;
+        }
+        /* L80: */
+      }
     }
+  } else {
+    /*        Form  y  when lower triangle of A is stored. */
 
-/*     Start the operations. In this version the elements of the array A */
-/*     are accessed sequentially with one pass through A. */
-
-/*     First form  y := beta*y. */
-
-    if (*beta != 1.f) {
-	if (*incy == 1) {
-	    if (*beta == 0.f) {
-		i__1 = *n;
-		for (i__ = 1; i__ <= i__1; ++i__) {
-		    y[i__] = 0.f;
-/* L10: */
-		}
-	    } else {
-		i__1 = *n;
-		for (i__ = 1; i__ <= i__1; ++i__) {
-		    y[i__] = *beta * y[i__];
-/* L20: */
-		}
-	    }
-	} else {
-	    iy = ky;
-	    if (*beta == 0.f) {
-		i__1 = *n;
-		for (i__ = 1; i__ <= i__1; ++i__) {
-		    y[iy] = 0.f;
-		    iy += *incy;
-/* L30: */
-		}
-	    } else {
-		i__1 = *n;
-		for (i__ = 1; i__ <= i__1; ++i__) {
-		    y[iy] = *beta * y[iy];
-		    iy += *incy;
-/* L40: */
-		}
-	    }
-	}
-    }
-    if (*alpha == 0.f) {
-	return 0;
-    }
-    if (lsame_(uplo, "U", (ftnlen)1, (ftnlen)1)) {
-
-/*        Form  y  when upper triangle of A is stored. */
-
-	kplus1 = *k + 1;
-	if (*incx == 1 && *incy == 1) {
-	    i__1 = *n;
-	    for (j = 1; j <= i__1; ++j) {
-		temp1 = *alpha * x[j];
-		temp2 = 0.f;
-		l = kplus1 - j;
-/* Computing MAX */
-		i__2 = 1, i__3 = j - *k;
-		i__4 = j - 1;
-		for (i__ = max(i__2,i__3); i__ <= i__4; ++i__) {
-		    y[i__] += temp1 * a[l + i__ + j * a_dim1];
-		    temp2 += a[l + i__ + j * a_dim1] * x[i__];
-/* L50: */
-		}
-		y[j] = y[j] + temp1 * a[kplus1 + j * a_dim1] + *alpha * temp2;
-/* L60: */
-	    }
-	} else {
-	    jx = kx;
-	    jy = ky;
-	    i__1 = *n;
-	    for (j = 1; j <= i__1; ++j) {
-		temp1 = *alpha * x[jx];
-		temp2 = 0.f;
-		ix = kx;
-		iy = ky;
-		l = kplus1 - j;
-/* Computing MAX */
-		i__4 = 1, i__2 = j - *k;
-		i__3 = j - 1;
-		for (i__ = max(i__4,i__2); i__ <= i__3; ++i__) {
-		    y[iy] += temp1 * a[l + i__ + j * a_dim1];
-		    temp2 += a[l + i__ + j * a_dim1] * x[ix];
-		    ix += *incx;
-		    iy += *incy;
-/* L70: */
-		}
-		y[jy] = y[jy] + temp1 * a[kplus1 + j * a_dim1] + *alpha * 
-			temp2;
-		jx += *incx;
-		jy += *incy;
-		if (j > *k) {
-		    kx += *incx;
-		    ky += *incy;
-		}
-/* L80: */
-	    }
-	}
+    if (*incx == 1 && *incy == 1) {
+      i__1 = *n;
+      for (j = 1; j <= i__1; ++j) {
+        temp1 = *alpha * x[j];
+        temp2 = 0.f;
+        y[j] += temp1 * a[j * a_dim1 + 1];
+        l = 1 - j;
+        /* Computing MIN */
+        i__4 = *n, i__2 = j + *k;
+        i__3 = min(i__4, i__2);
+        for (i__ = j + 1; i__ <= i__3; ++i__) {
+          y[i__] += temp1 * a[l + i__ + j * a_dim1];
+          temp2 += a[l + i__ + j * a_dim1] * x[i__];
+          /* L90: */
+        }
+        y[j] += *alpha * temp2;
+        /* L100: */
+      }
     } else {
-
-/*        Form  y  when lower triangle of A is stored. */
-
-	if (*incx == 1 && *incy == 1) {
-	    i__1 = *n;
-	    for (j = 1; j <= i__1; ++j) {
-		temp1 = *alpha * x[j];
-		temp2 = 0.f;
-		y[j] += temp1 * a[j * a_dim1 + 1];
-		l = 1 - j;
-/* Computing MIN */
-		i__4 = *n, i__2 = j + *k;
-		i__3 = min(i__4,i__2);
-		for (i__ = j + 1; i__ <= i__3; ++i__) {
-		    y[i__] += temp1 * a[l + i__ + j * a_dim1];
-		    temp2 += a[l + i__ + j * a_dim1] * x[i__];
-/* L90: */
-		}
-		y[j] += *alpha * temp2;
-/* L100: */
-	    }
-	} else {
-	    jx = kx;
-	    jy = ky;
-	    i__1 = *n;
-	    for (j = 1; j <= i__1; ++j) {
-		temp1 = *alpha * x[jx];
-		temp2 = 0.f;
-		y[jy] += temp1 * a[j * a_dim1 + 1];
-		l = 1 - j;
-		ix = jx;
-		iy = jy;
-/* Computing MIN */
-		i__4 = *n, i__2 = j + *k;
-		i__3 = min(i__4,i__2);
-		for (i__ = j + 1; i__ <= i__3; ++i__) {
-		    ix += *incx;
-		    iy += *incy;
-		    y[iy] += temp1 * a[l + i__ + j * a_dim1];
-		    temp2 += a[l + i__ + j * a_dim1] * x[ix];
-/* L110: */
-		}
-		y[jy] += *alpha * temp2;
-		jx += *incx;
-		jy += *incy;
-/* L120: */
-	    }
-	}
+      jx = kx;
+      jy = ky;
+      i__1 = *n;
+      for (j = 1; j <= i__1; ++j) {
+        temp1 = *alpha * x[jx];
+        temp2 = 0.f;
+        y[jy] += temp1 * a[j * a_dim1 + 1];
+        l = 1 - j;
+        ix = jx;
+        iy = jy;
+        /* Computing MIN */
+        i__4 = *n, i__2 = j + *k;
+        i__3 = min(i__4, i__2);
+        for (i__ = j + 1; i__ <= i__3; ++i__) {
+          ix += *incx;
+          iy += *incy;
+          y[iy] += temp1 * a[l + i__ + j * a_dim1];
+          temp2 += a[l + i__ + j * a_dim1] * x[ix];
+          /* L110: */
+        }
+        y[jy] += *alpha * temp2;
+        jx += *incx;
+        jy += *incy;
+        /* L120: */
+      }
     }
+  }
 
-    return 0;
-
-/*     End of SSBMV . */
+  /*     End of SSBMV . */
 
 } /* ssbmv_ */
-

diff --git a/blas/f2c/sspmv.c b/blas/f2c/sspmv.c
index 47858ec..ea9db33 100644
--- a/blas/f2c/sspmv.c
+++ b/blas/f2c/sspmv.c

@@ -1,316 +1,308 @@
 /* sspmv.f -- translated by f2c (version 20100827).
    You must link the resulting object file with libf2c:
-	on Microsoft Windows system, link with libf2c.lib;
-	on Linux or Unix systems, link with .../path/to/libf2c.a -lm
-	or, if you install libf2c.a in a standard place, with -lf2c -lm
-	-- in that order, at the end of the command line, as in
-		cc *.o -lf2c -lm
-	Source for libf2c is in /netlib/f2c/libf2c.zip, e.g.,
+        on Microsoft Windows system, link with libf2c.lib;
+        on Linux or Unix systems, link with .../path/to/libf2c.a -lm
+        or, if you install libf2c.a in a standard place, with -lf2c -lm
+        -- in that order, at the end of the command line, as in
+                cc *.o -lf2c -lm
+        Source for libf2c is in /netlib/f2c/libf2c.zip, e.g.,
 
-		http://www.netlib.org/f2c/libf2c.zip
+                http://www.netlib.org/f2c/libf2c.zip
 */
 
 #include "datatypes.h"
 
-/* Subroutine */ int sspmv_(char *uplo, integer *n, real *alpha, real *ap, 
-	real *x, integer *incx, real *beta, real *y, integer *incy, ftnlen 
-	uplo_len)
-{
-    /* System generated locals */
-    integer i__1, i__2;
+/* Subroutine */ void sspmv_(char *uplo, integer *n, real *alpha, real *ap, real *x, integer *incx, real *beta, real *y,
+                             integer *incy) {
+  /* System generated locals */
+  integer i__1, i__2;
 
-    /* Local variables */
-    integer i__, j, k, kk, ix, iy, jx, jy, kx, ky, info;
-    real temp1, temp2;
-    extern logical lsame_(char *, char *, ftnlen, ftnlen);
-    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
+  /* Local variables */
+  integer i__, j, k, kk, ix, iy, jx, jy, kx, ky, info;
+  real temp1, temp2;
+  extern logical lsame_(char *, char *);
+  extern /* Subroutine */ void xerbla_(const char *, integer *);
 
-/*     .. Scalar Arguments .. */
-/*     .. */
-/*     .. Array Arguments .. */
-/*     .. */
+  /*     .. Scalar Arguments .. */
+  /*     .. */
+  /*     .. Array Arguments .. */
+  /*     .. */
 
-/*  Purpose */
-/*  ======= */
+  /*  Purpose */
+  /*  ======= */
 
-/*  SSPMV  performs the matrix-vector operation */
+  /*  SSPMV  performs the matrix-vector operation */
 
-/*     y := alpha*A*x + beta*y, */
+  /*     y := alpha*A*x + beta*y, */
 
-/*  where alpha and beta are scalars, x and y are n element vectors and */
-/*  A is an n by n symmetric matrix, supplied in packed form. */
+  /*  where alpha and beta are scalars, x and y are n element vectors and */
+  /*  A is an n by n symmetric matrix, supplied in packed form. */
 
-/*  Arguments */
-/*  ========== */
+  /*  Arguments */
+  /*  ========== */
 
-/*  UPLO   - CHARACTER*1. */
-/*           On entry, UPLO specifies whether the upper or lower */
-/*           triangular part of the matrix A is supplied in the packed */
-/*           array AP as follows: */
+  /*  UPLO   - CHARACTER*1. */
+  /*           On entry, UPLO specifies whether the upper or lower */
+  /*           triangular part of the matrix A is supplied in the packed */
+  /*           array AP as follows: */
 
-/*              UPLO = 'U' or 'u'   The upper triangular part of A is */
-/*                                  supplied in AP. */
+  /*              UPLO = 'U' or 'u'   The upper triangular part of A is */
+  /*                                  supplied in AP. */
 
-/*              UPLO = 'L' or 'l'   The lower triangular part of A is */
-/*                                  supplied in AP. */
+  /*              UPLO = 'L' or 'l'   The lower triangular part of A is */
+  /*                                  supplied in AP. */
 
-/*           Unchanged on exit. */
+  /*           Unchanged on exit. */
 
-/*  N      - INTEGER. */
-/*           On entry, N specifies the order of the matrix A. */
-/*           N must be at least zero. */
-/*           Unchanged on exit. */
+  /*  N      - INTEGER. */
+  /*           On entry, N specifies the order of the matrix A. */
+  /*           N must be at least zero. */
+  /*           Unchanged on exit. */
 
-/*  ALPHA  - REAL            . */
-/*           On entry, ALPHA specifies the scalar alpha. */
-/*           Unchanged on exit. */
+  /*  ALPHA  - REAL            . */
+  /*           On entry, ALPHA specifies the scalar alpha. */
+  /*           Unchanged on exit. */
 
-/*  AP     - REAL             array of DIMENSION at least */
-/*           ( ( n*( n + 1 ) )/2 ). */
-/*           Before entry with UPLO = 'U' or 'u', the array AP must */
-/*           contain the upper triangular part of the symmetric matrix */
-/*           packed sequentially, column by column, so that AP( 1 ) */
-/*           contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 1, 2 ) */
-/*           and a( 2, 2 ) respectively, and so on. */
-/*           Before entry with UPLO = 'L' or 'l', the array AP must */
-/*           contain the lower triangular part of the symmetric matrix */
-/*           packed sequentially, column by column, so that AP( 1 ) */
-/*           contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 2, 1 ) */
-/*           and a( 3, 1 ) respectively, and so on. */
-/*           Unchanged on exit. */
+  /*  AP     - REAL             array of DIMENSION at least */
+  /*           ( ( n*( n + 1 ) )/2 ). */
+  /*           Before entry with UPLO = 'U' or 'u', the array AP must */
+  /*           contain the upper triangular part of the symmetric matrix */
+  /*           packed sequentially, column by column, so that AP( 1 ) */
+  /*           contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 1, 2 ) */
+  /*           and a( 2, 2 ) respectively, and so on. */
+  /*           Before entry with UPLO = 'L' or 'l', the array AP must */
+  /*           contain the lower triangular part of the symmetric matrix */
+  /*           packed sequentially, column by column, so that AP( 1 ) */
+  /*           contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 2, 1 ) */
+  /*           and a( 3, 1 ) respectively, and so on. */
+  /*           Unchanged on exit. */
 
-/*  X      - REAL             array of dimension at least */
-/*           ( 1 + ( n - 1 )*abs( INCX ) ). */
-/*           Before entry, the incremented array X must contain the n */
-/*           element vector x. */
-/*           Unchanged on exit. */
+  /*  X      - REAL             array of dimension at least */
+  /*           ( 1 + ( n - 1 )*abs( INCX ) ). */
+  /*           Before entry, the incremented array X must contain the n */
+  /*           element vector x. */
+  /*           Unchanged on exit. */
 
-/*  INCX   - INTEGER. */
-/*           On entry, INCX specifies the increment for the elements of */
-/*           X. INCX must not be zero. */
-/*           Unchanged on exit. */
+  /*  INCX   - INTEGER. */
+  /*           On entry, INCX specifies the increment for the elements of */
+  /*           X. INCX must not be zero. */
+  /*           Unchanged on exit. */
 
-/*  BETA   - REAL            . */
-/*           On entry, BETA specifies the scalar beta. When BETA is */
-/*           supplied as zero then Y need not be set on input. */
-/*           Unchanged on exit. */
+  /*  BETA   - REAL            . */
+  /*           On entry, BETA specifies the scalar beta. When BETA is */
+  /*           supplied as zero then Y need not be set on input. */
+  /*           Unchanged on exit. */
 
-/*  Y      - REAL             array of dimension at least */
-/*           ( 1 + ( n - 1 )*abs( INCY ) ). */
-/*           Before entry, the incremented array Y must contain the n */
-/*           element vector y. On exit, Y is overwritten by the updated */
-/*           vector y. */
+  /*  Y      - REAL             array of dimension at least */
+  /*           ( 1 + ( n - 1 )*abs( INCY ) ). */
+  /*           Before entry, the incremented array Y must contain the n */
+  /*           element vector y. On exit, Y is overwritten by the updated */
+  /*           vector y. */
 
-/*  INCY   - INTEGER. */
-/*           On entry, INCY specifies the increment for the elements of */
-/*           Y. INCY must not be zero. */
-/*           Unchanged on exit. */
+  /*  INCY   - INTEGER. */
+  /*           On entry, INCY specifies the increment for the elements of */
+  /*           Y. INCY must not be zero. */
+  /*           Unchanged on exit. */
 
-/*  Further Details */
-/*  =============== */
+  /*  Further Details */
+  /*  =============== */
 
-/*  Level 2 Blas routine. */
+  /*  Level 2 Blas routine. */
 
-/*  -- Written on 22-October-1986. */
-/*     Jack Dongarra, Argonne National Lab. */
-/*     Jeremy Du Croz, Nag Central Office. */
-/*     Sven Hammarling, Nag Central Office. */
-/*     Richard Hanson, Sandia National Labs. */
+  /*  -- Written on 22-October-1986. */
+  /*     Jack Dongarra, Argonne National Lab. */
+  /*     Jeremy Du Croz, Nag Central Office. */
+  /*     Sven Hammarling, Nag Central Office. */
+  /*     Richard Hanson, Sandia National Labs. */
 
-/*  ===================================================================== */
+  /*  ===================================================================== */
 
-/*     .. Parameters .. */
-/*     .. */
-/*     .. Local Scalars .. */
-/*     .. */
-/*     .. External Functions .. */
-/*     .. */
-/*     .. External Subroutines .. */
-/*     .. */
+  /*     .. Parameters .. */
+  /*     .. */
+  /*     .. Local Scalars .. */
+  /*     .. */
+  /*     .. External Functions .. */
+  /*     .. */
+  /*     .. External Subroutines .. */
+  /*     .. */
 
-/*     Test the input parameters. */
+  /*     Test the input parameters. */
 
-    /* Parameter adjustments */
-    --y;
-    --x;
-    --ap;
+  /* Parameter adjustments */
+  --y;
+  --x;
+  --ap;
 
-    /* Function Body */
-    info = 0;
-    if (! lsame_(uplo, "U", (ftnlen)1, (ftnlen)1) && ! lsame_(uplo, "L", (
-	    ftnlen)1, (ftnlen)1)) {
-	info = 1;
-    } else if (*n < 0) {
-	info = 2;
-    } else if (*incx == 0) {
-	info = 6;
-    } else if (*incy == 0) {
-	info = 9;
-    }
-    if (info != 0) {
-	xerbla_("SSPMV ", &info, (ftnlen)6);
-	return 0;
-    }
+  /* Function Body */
+  info = 0;
+  if (!lsame_(uplo, "U") && !lsame_(uplo, "L")) {
+    info = 1;
+  } else if (*n < 0) {
+    info = 2;
+  } else if (*incx == 0) {
+    info = 6;
+  } else if (*incy == 0) {
+    info = 9;
+  }
+  if (info != 0) {
+    xerbla_("SSPMV ", &info);
+    return;
+  }
 
-/*     Quick return if possible. */
+  /*     Quick return if possible. */
 
-    if (*n == 0 || (*alpha == 0.f && *beta == 1.f)) {
-	return 0;
-    }
+  if (*n == 0 || (*alpha == 0.f && *beta == 1.f)) {
+    return;
+  }
 
-/*     Set up the start points in  X  and  Y. */
+  /*     Set up the start points in  X  and  Y. */
 
-    if (*incx > 0) {
-	kx = 1;
+  if (*incx > 0) {
+    kx = 1;
+  } else {
+    kx = 1 - (*n - 1) * *incx;
+  }
+  if (*incy > 0) {
+    ky = 1;
+  } else {
+    ky = 1 - (*n - 1) * *incy;
+  }
+
+  /*     Start the operations. In this version the elements of the array AP */
+  /*     are accessed sequentially with one pass through AP. */
+
+  /*     First form  y := beta*y. */
+
+  if (*beta != 1.f) {
+    if (*incy == 1) {
+      if (*beta == 0.f) {
+        i__1 = *n;
+        for (i__ = 1; i__ <= i__1; ++i__) {
+          y[i__] = 0.f;
+          /* L10: */
+        }
+      } else {
+        i__1 = *n;
+        for (i__ = 1; i__ <= i__1; ++i__) {
+          y[i__] = *beta * y[i__];
+          /* L20: */
+        }
+      }
     } else {
-	kx = 1 - (*n - 1) * *incx;
+      iy = ky;
+      if (*beta == 0.f) {
+        i__1 = *n;
+        for (i__ = 1; i__ <= i__1; ++i__) {
+          y[iy] = 0.f;
+          iy += *incy;
+          /* L30: */
+        }
+      } else {
+        i__1 = *n;
+        for (i__ = 1; i__ <= i__1; ++i__) {
+          y[iy] = *beta * y[iy];
+          iy += *incy;
+          /* L40: */
+        }
+      }
     }
-    if (*incy > 0) {
-	ky = 1;
+  }
+  if (*alpha == 0.f) {
+    return;
+  }
+  kk = 1;
+  if (lsame_(uplo, "U")) {
+    /*        Form  y  when AP contains the upper triangle. */
+
+    if (*incx == 1 && *incy == 1) {
+      i__1 = *n;
+      for (j = 1; j <= i__1; ++j) {
+        temp1 = *alpha * x[j];
+        temp2 = 0.f;
+        k = kk;
+        i__2 = j - 1;
+        for (i__ = 1; i__ <= i__2; ++i__) {
+          y[i__] += temp1 * ap[k];
+          temp2 += ap[k] * x[i__];
+          ++k;
+          /* L50: */
+        }
+        y[j] = y[j] + temp1 * ap[kk + j - 1] + *alpha * temp2;
+        kk += j;
+        /* L60: */
+      }
     } else {
-	ky = 1 - (*n - 1) * *incy;
+      jx = kx;
+      jy = ky;
+      i__1 = *n;
+      for (j = 1; j <= i__1; ++j) {
+        temp1 = *alpha * x[jx];
+        temp2 = 0.f;
+        ix = kx;
+        iy = ky;
+        i__2 = kk + j - 2;
+        for (k = kk; k <= i__2; ++k) {
+          y[iy] += temp1 * ap[k];
+          temp2 += ap[k] * x[ix];
+          ix += *incx;
+          iy += *incy;
+          /* L70: */
+        }
+        y[jy] = y[jy] + temp1 * ap[kk + j - 1] + *alpha * temp2;
+        jx += *incx;
+        jy += *incy;
+        kk += j;
+        /* L80: */
+      }
     }
+  } else {
+    /*        Form  y  when AP contains the lower triangle. */
 
-/*     Start the operations. In this version the elements of the array AP */
-/*     are accessed sequentially with one pass through AP. */
-
-/*     First form  y := beta*y. */
-
-    if (*beta != 1.f) {
-	if (*incy == 1) {
-	    if (*beta == 0.f) {
-		i__1 = *n;
-		for (i__ = 1; i__ <= i__1; ++i__) {
-		    y[i__] = 0.f;
-/* L10: */
-		}
-	    } else {
-		i__1 = *n;
-		for (i__ = 1; i__ <= i__1; ++i__) {
-		    y[i__] = *beta * y[i__];
-/* L20: */
-		}
-	    }
-	} else {
-	    iy = ky;
-	    if (*beta == 0.f) {
-		i__1 = *n;
-		for (i__ = 1; i__ <= i__1; ++i__) {
-		    y[iy] = 0.f;
-		    iy += *incy;
-/* L30: */
-		}
-	    } else {
-		i__1 = *n;
-		for (i__ = 1; i__ <= i__1; ++i__) {
-		    y[iy] = *beta * y[iy];
-		    iy += *incy;
-/* L40: */
-		}
-	    }
-	}
-    }
-    if (*alpha == 0.f) {
-	return 0;
-    }
-    kk = 1;
-    if (lsame_(uplo, "U", (ftnlen)1, (ftnlen)1)) {
-
-/*        Form  y  when AP contains the upper triangle. */
-
-	if (*incx == 1 && *incy == 1) {
-	    i__1 = *n;
-	    for (j = 1; j <= i__1; ++j) {
-		temp1 = *alpha * x[j];
-		temp2 = 0.f;
-		k = kk;
-		i__2 = j - 1;
-		for (i__ = 1; i__ <= i__2; ++i__) {
-		    y[i__] += temp1 * ap[k];
-		    temp2 += ap[k] * x[i__];
-		    ++k;
-/* L50: */
-		}
-		y[j] = y[j] + temp1 * ap[kk + j - 1] + *alpha * temp2;
-		kk += j;
-/* L60: */
-	    }
-	} else {
-	    jx = kx;
-	    jy = ky;
-	    i__1 = *n;
-	    for (j = 1; j <= i__1; ++j) {
-		temp1 = *alpha * x[jx];
-		temp2 = 0.f;
-		ix = kx;
-		iy = ky;
-		i__2 = kk + j - 2;
-		for (k = kk; k <= i__2; ++k) {
-		    y[iy] += temp1 * ap[k];
-		    temp2 += ap[k] * x[ix];
-		    ix += *incx;
-		    iy += *incy;
-/* L70: */
-		}
-		y[jy] = y[jy] + temp1 * ap[kk + j - 1] + *alpha * temp2;
-		jx += *incx;
-		jy += *incy;
-		kk += j;
-/* L80: */
-	    }
-	}
+    if (*incx == 1 && *incy == 1) {
+      i__1 = *n;
+      for (j = 1; j <= i__1; ++j) {
+        temp1 = *alpha * x[j];
+        temp2 = 0.f;
+        y[j] += temp1 * ap[kk];
+        k = kk + 1;
+        i__2 = *n;
+        for (i__ = j + 1; i__ <= i__2; ++i__) {
+          y[i__] += temp1 * ap[k];
+          temp2 += ap[k] * x[i__];
+          ++k;
+          /* L90: */
+        }
+        y[j] += *alpha * temp2;
+        kk += *n - j + 1;
+        /* L100: */
+      }
     } else {
-
-/*        Form  y  when AP contains the lower triangle. */
-
-	if (*incx == 1 && *incy == 1) {
-	    i__1 = *n;
-	    for (j = 1; j <= i__1; ++j) {
-		temp1 = *alpha * x[j];
-		temp2 = 0.f;
-		y[j] += temp1 * ap[kk];
-		k = kk + 1;
-		i__2 = *n;
-		for (i__ = j + 1; i__ <= i__2; ++i__) {
-		    y[i__] += temp1 * ap[k];
-		    temp2 += ap[k] * x[i__];
-		    ++k;
-/* L90: */
-		}
-		y[j] += *alpha * temp2;
-		kk += *n - j + 1;
-/* L100: */
-	    }
-	} else {
-	    jx = kx;
-	    jy = ky;
-	    i__1 = *n;
-	    for (j = 1; j <= i__1; ++j) {
-		temp1 = *alpha * x[jx];
-		temp2 = 0.f;
-		y[jy] += temp1 * ap[kk];
-		ix = jx;
-		iy = jy;
-		i__2 = kk + *n - j;
-		for (k = kk + 1; k <= i__2; ++k) {
-		    ix += *incx;
-		    iy += *incy;
-		    y[iy] += temp1 * ap[k];
-		    temp2 += ap[k] * x[ix];
-/* L110: */
-		}
-		y[jy] += *alpha * temp2;
-		jx += *incx;
-		jy += *incy;
-		kk += *n - j + 1;
-/* L120: */
-	    }
-	}
+      jx = kx;
+      jy = ky;
+      i__1 = *n;
+      for (j = 1; j <= i__1; ++j) {
+        temp1 = *alpha * x[jx];
+        temp2 = 0.f;
+        y[jy] += temp1 * ap[kk];
+        ix = jx;
+        iy = jy;
+        i__2 = kk + *n - j;
+        for (k = kk + 1; k <= i__2; ++k) {
+          ix += *incx;
+          iy += *incy;
+          y[iy] += temp1 * ap[k];
+          temp2 += ap[k] * x[ix];
+          /* L110: */
+        }
+        y[jy] += *alpha * temp2;
+        jx += *incx;
+        jy += *incy;
+        kk += *n - j + 1;
+        /* L120: */
+      }
     }
+  }
 
-    return 0;
-
-/*     End of SSPMV . */
+  /*     End of SSPMV . */
 
 } /* sspmv_ */
-

diff --git a/blas/f2c/stbmv.c b/blas/f2c/stbmv.c
index b5a68b5..43329f6 100644
--- a/blas/f2c/stbmv.c
+++ b/blas/f2c/stbmv.c

@@ -1,428 +1,417 @@
 /* stbmv.f -- translated by f2c (version 20100827).
    You must link the resulting object file with libf2c:
-	on Microsoft Windows system, link with libf2c.lib;
-	on Linux or Unix systems, link with .../path/to/libf2c.a -lm
-	or, if you install libf2c.a in a standard place, with -lf2c -lm
-	-- in that order, at the end of the command line, as in
-		cc *.o -lf2c -lm
-	Source for libf2c is in /netlib/f2c/libf2c.zip, e.g.,
+        on Microsoft Windows system, link with libf2c.lib;
+        on Linux or Unix systems, link with .../path/to/libf2c.a -lm
+        or, if you install libf2c.a in a standard place, with -lf2c -lm
+        -- in that order, at the end of the command line, as in
+                cc *.o -lf2c -lm
+        Source for libf2c is in /netlib/f2c/libf2c.zip, e.g.,
 
-		http://www.netlib.org/f2c/libf2c.zip
+                http://www.netlib.org/f2c/libf2c.zip
 */
 
 #include "datatypes.h"
 
-/* Subroutine */ int stbmv_(char *uplo, char *trans, char *diag, integer *n, 
-	integer *k, real *a, integer *lda, real *x, integer *incx, ftnlen 
-	uplo_len, ftnlen trans_len, ftnlen diag_len)
-{
-    /* System generated locals */
-    integer a_dim1, a_offset, i__1, i__2, i__3, i__4;
+/* Subroutine */ void stbmv_(char *uplo, char *trans, char *diag, integer *n, integer *k, real *a, integer *lda,
+                             real *x, integer *incx) {
+  /* System generated locals */
+  integer a_dim1, a_offset, i__1, i__2, i__3, i__4;
 
-    /* Local variables */
-    integer i__, j, l, ix, jx, kx, info;
-    real temp;
-    extern logical lsame_(char *, char *, ftnlen, ftnlen);
-    integer kplus1;
-    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
-    logical nounit;
+  /* Local variables */
+  integer i__, j, l, ix, jx, kx, info;
+  real temp;
+  extern logical lsame_(char *, char *);
+  integer kplus1;
+  extern /* Subroutine */ void xerbla_(const char *, integer *);
+  logical nounit;
 
-/*     .. Scalar Arguments .. */
-/*     .. */
-/*     .. Array Arguments .. */
-/*     .. */
+  /*     .. Scalar Arguments .. */
+  /*     .. */
+  /*     .. Array Arguments .. */
+  /*     .. */
 
-/*  Purpose */
-/*  ======= */
+  /*  Purpose */
+  /*  ======= */
 
-/*  STBMV  performs one of the matrix-vector operations */
+  /*  STBMV  performs one of the matrix-vector operations */
 
-/*     x := A*x,   or   x := A'*x, */
+  /*     x := A*x,   or   x := A'*x, */
 
-/*  where x is an n element vector and  A is an n by n unit, or non-unit, */
-/*  upper or lower triangular band matrix, with ( k + 1 ) diagonals. */
+  /*  where x is an n element vector and  A is an n by n unit, or non-unit, */
+  /*  upper or lower triangular band matrix, with ( k + 1 ) diagonals. */
 
-/*  Arguments */
-/*  ========== */
+  /*  Arguments */
+  /*  ========== */
 
-/*  UPLO   - CHARACTER*1. */
-/*           On entry, UPLO specifies whether the matrix is an upper or */
-/*           lower triangular matrix as follows: */
+  /*  UPLO   - CHARACTER*1. */
+  /*           On entry, UPLO specifies whether the matrix is an upper or */
+  /*           lower triangular matrix as follows: */
 
-/*              UPLO = 'U' or 'u'   A is an upper triangular matrix. */
+  /*              UPLO = 'U' or 'u'   A is an upper triangular matrix. */
 
-/*              UPLO = 'L' or 'l'   A is a lower triangular matrix. */
+  /*              UPLO = 'L' or 'l'   A is a lower triangular matrix. */
 
-/*           Unchanged on exit. */
+  /*           Unchanged on exit. */
 
-/*  TRANS  - CHARACTER*1. */
-/*           On entry, TRANS specifies the operation to be performed as */
-/*           follows: */
+  /*  TRANS  - CHARACTER*1. */
+  /*           On entry, TRANS specifies the operation to be performed as */
+  /*           follows: */
 
-/*              TRANS = 'N' or 'n'   x := A*x. */
+  /*              TRANS = 'N' or 'n'   x := A*x. */
 
-/*              TRANS = 'T' or 't'   x := A'*x. */
+  /*              TRANS = 'T' or 't'   x := A'*x. */
 
-/*              TRANS = 'C' or 'c'   x := A'*x. */
+  /*              TRANS = 'C' or 'c'   x := A'*x. */
 
-/*           Unchanged on exit. */
+  /*           Unchanged on exit. */
 
-/*  DIAG   - CHARACTER*1. */
-/*           On entry, DIAG specifies whether or not A is unit */
-/*           triangular as follows: */
+  /*  DIAG   - CHARACTER*1. */
+  /*           On entry, DIAG specifies whether or not A is unit */
+  /*           triangular as follows: */
 
-/*              DIAG = 'U' or 'u'   A is assumed to be unit triangular. */
+  /*              DIAG = 'U' or 'u'   A is assumed to be unit triangular. */
 
-/*              DIAG = 'N' or 'n'   A is not assumed to be unit */
-/*                                  triangular. */
+  /*              DIAG = 'N' or 'n'   A is not assumed to be unit */
+  /*                                  triangular. */
 
-/*           Unchanged on exit. */
+  /*           Unchanged on exit. */
 
-/*  N      - INTEGER. */
-/*           On entry, N specifies the order of the matrix A. */
-/*           N must be at least zero. */
-/*           Unchanged on exit. */
+  /*  N      - INTEGER. */
+  /*           On entry, N specifies the order of the matrix A. */
+  /*           N must be at least zero. */
+  /*           Unchanged on exit. */
 
-/*  K      - INTEGER. */
-/*           On entry with UPLO = 'U' or 'u', K specifies the number of */
-/*           super-diagonals of the matrix A. */
-/*           On entry with UPLO = 'L' or 'l', K specifies the number of */
-/*           sub-diagonals of the matrix A. */
-/*           K must satisfy  0 .le. K. */
-/*           Unchanged on exit. */
+  /*  K      - INTEGER. */
+  /*           On entry with UPLO = 'U' or 'u', K specifies the number of */
+  /*           super-diagonals of the matrix A. */
+  /*           On entry with UPLO = 'L' or 'l', K specifies the number of */
+  /*           sub-diagonals of the matrix A. */
+  /*           K must satisfy  0 .le. K. */
+  /*           Unchanged on exit. */
 
-/*  A      - REAL             array of DIMENSION ( LDA, n ). */
-/*           Before entry with UPLO = 'U' or 'u', the leading ( k + 1 ) */
-/*           by n part of the array A must contain the upper triangular */
-/*           band part of the matrix of coefficients, supplied column by */
-/*           column, with the leading diagonal of the matrix in row */
-/*           ( k + 1 ) of the array, the first super-diagonal starting at */
-/*           position 2 in row k, and so on. The top left k by k triangle */
-/*           of the array A is not referenced. */
-/*           The following program segment will transfer an upper */
-/*           triangular band matrix from conventional full matrix storage */
-/*           to band storage: */
+  /*  A      - REAL             array of DIMENSION ( LDA, n ). */
+  /*           Before entry with UPLO = 'U' or 'u', the leading ( k + 1 ) */
+  /*           by n part of the array A must contain the upper triangular */
+  /*           band part of the matrix of coefficients, supplied column by */
+  /*           column, with the leading diagonal of the matrix in row */
+  /*           ( k + 1 ) of the array, the first super-diagonal starting at */
+  /*           position 2 in row k, and so on. The top left k by k triangle */
+  /*           of the array A is not referenced. */
+  /*           The following program segment will transfer an upper */
+  /*           triangular band matrix from conventional full matrix storage */
+  /*           to band storage: */
 
-/*                 DO 20, J = 1, N */
-/*                    M = K + 1 - J */
-/*                    DO 10, I = MAX( 1, J - K ), J */
-/*                       A( M + I, J ) = matrix( I, J ) */
-/*              10    CONTINUE */
-/*              20 CONTINUE */
+  /*                 DO 20, J = 1, N */
+  /*                    M = K + 1 - J */
+  /*                    DO 10, I = MAX( 1, J - K ), J */
+  /*                       A( M + I, J ) = matrix( I, J ) */
+  /*              10    CONTINUE */
+  /*              20 CONTINUE */
 
-/*           Before entry with UPLO = 'L' or 'l', the leading ( k + 1 ) */
-/*           by n part of the array A must contain the lower triangular */
-/*           band part of the matrix of coefficients, supplied column by */
-/*           column, with the leading diagonal of the matrix in row 1 of */
-/*           the array, the first sub-diagonal starting at position 1 in */
-/*           row 2, and so on. The bottom right k by k triangle of the */
-/*           array A is not referenced. */
-/*           The following program segment will transfer a lower */
-/*           triangular band matrix from conventional full matrix storage */
-/*           to band storage: */
+  /*           Before entry with UPLO = 'L' or 'l', the leading ( k + 1 ) */
+  /*           by n part of the array A must contain the lower triangular */
+  /*           band part of the matrix of coefficients, supplied column by */
+  /*           column, with the leading diagonal of the matrix in row 1 of */
+  /*           the array, the first sub-diagonal starting at position 1 in */
+  /*           row 2, and so on. The bottom right k by k triangle of the */
+  /*           array A is not referenced. */
+  /*           The following program segment will transfer a lower */
+  /*           triangular band matrix from conventional full matrix storage */
+  /*           to band storage: */
 
-/*                 DO 20, J = 1, N */
-/*                    M = 1 - J */
-/*                    DO 10, I = J, MIN( N, J + K ) */
-/*                       A( M + I, J ) = matrix( I, J ) */
-/*              10    CONTINUE */
-/*              20 CONTINUE */
+  /*                 DO 20, J = 1, N */
+  /*                    M = 1 - J */
+  /*                    DO 10, I = J, MIN( N, J + K ) */
+  /*                       A( M + I, J ) = matrix( I, J ) */
+  /*              10    CONTINUE */
+  /*              20 CONTINUE */
 
-/*           Note that when DIAG = 'U' or 'u' the elements of the array A */
-/*           corresponding to the diagonal elements of the matrix are not */
-/*           referenced, but are assumed to be unity. */
-/*           Unchanged on exit. */
+  /*           Note that when DIAG = 'U' or 'u' the elements of the array A */
+  /*           corresponding to the diagonal elements of the matrix are not */
+  /*           referenced, but are assumed to be unity. */
+  /*           Unchanged on exit. */
 
-/*  LDA    - INTEGER. */
-/*           On entry, LDA specifies the first dimension of A as declared */
-/*           in the calling (sub) program. LDA must be at least */
-/*           ( k + 1 ). */
-/*           Unchanged on exit. */
+  /*  LDA    - INTEGER. */
+  /*           On entry, LDA specifies the first dimension of A as declared */
+  /*           in the calling (sub) program. LDA must be at least */
+  /*           ( k + 1 ). */
+  /*           Unchanged on exit. */
 
-/*  X      - REAL             array of dimension at least */
-/*           ( 1 + ( n - 1 )*abs( INCX ) ). */
-/*           Before entry, the incremented array X must contain the n */
-/*           element vector x. On exit, X is overwritten with the */
-/*           transformed vector x. */
+  /*  X      - REAL             array of dimension at least */
+  /*           ( 1 + ( n - 1 )*abs( INCX ) ). */
+  /*           Before entry, the incremented array X must contain the n */
+  /*           element vector x. On exit, X is overwritten with the */
+  /*           transformed vector x. */
 
-/*  INCX   - INTEGER. */
-/*           On entry, INCX specifies the increment for the elements of */
-/*           X. INCX must not be zero. */
-/*           Unchanged on exit. */
+  /*  INCX   - INTEGER. */
+  /*           On entry, INCX specifies the increment for the elements of */
+  /*           X. INCX must not be zero. */
+  /*           Unchanged on exit. */
 
-/*  Further Details */
-/*  =============== */
+  /*  Further Details */
+  /*  =============== */
 
-/*  Level 2 Blas routine. */
+  /*  Level 2 Blas routine. */
 
-/*  -- Written on 22-October-1986. */
-/*     Jack Dongarra, Argonne National Lab. */
-/*     Jeremy Du Croz, Nag Central Office. */
-/*     Sven Hammarling, Nag Central Office. */
-/*     Richard Hanson, Sandia National Labs. */
+  /*  -- Written on 22-October-1986. */
+  /*     Jack Dongarra, Argonne National Lab. */
+  /*     Jeremy Du Croz, Nag Central Office. */
+  /*     Sven Hammarling, Nag Central Office. */
+  /*     Richard Hanson, Sandia National Labs. */
 
-/*  ===================================================================== */
+  /*  ===================================================================== */
 
-/*     .. Parameters .. */
-/*     .. */
-/*     .. Local Scalars .. */
-/*     .. */
-/*     .. External Functions .. */
-/*     .. */
-/*     .. External Subroutines .. */
-/*     .. */
-/*     .. Intrinsic Functions .. */
-/*     .. */
+  /*     .. Parameters .. */
+  /*     .. */
+  /*     .. Local Scalars .. */
+  /*     .. */
+  /*     .. External Functions .. */
+  /*     .. */
+  /*     .. External Subroutines .. */
+  /*     .. */
+  /*     .. Intrinsic Functions .. */
+  /*     .. */
 
-/*     Test the input parameters. */
+  /*     Test the input parameters. */
 
-    /* Parameter adjustments */
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1;
-    a -= a_offset;
-    --x;
+  /* Parameter adjustments */
+  a_dim1 = *lda;
+  a_offset = 1 + a_dim1;
+  a -= a_offset;
+  --x;
 
-    /* Function Body */
-    info = 0;
-    if (! lsame_(uplo, "U", (ftnlen)1, (ftnlen)1) && ! lsame_(uplo, "L", (
-	    ftnlen)1, (ftnlen)1)) {
-	info = 1;
-    } else if (! lsame_(trans, "N", (ftnlen)1, (ftnlen)1) && ! lsame_(trans, 
-	    "T", (ftnlen)1, (ftnlen)1) && ! lsame_(trans, "C", (ftnlen)1, (
-	    ftnlen)1)) {
-	info = 2;
-    } else if (! lsame_(diag, "U", (ftnlen)1, (ftnlen)1) && ! lsame_(diag, 
-	    "N", (ftnlen)1, (ftnlen)1)) {
-	info = 3;
-    } else if (*n < 0) {
-	info = 4;
-    } else if (*k < 0) {
-	info = 5;
-    } else if (*lda < *k + 1) {
-	info = 7;
-    } else if (*incx == 0) {
-	info = 9;
-    }
-    if (info != 0) {
-	xerbla_("STBMV ", &info, (ftnlen)6);
-	return 0;
-    }
+  /* Function Body */
+  info = 0;
+  if (!lsame_(uplo, "U") && !lsame_(uplo, "L")) {
+    info = 1;
+  } else if (!lsame_(trans, "N") && !lsame_(trans, "T") && !lsame_(trans, "C")) {
+    info = 2;
+  } else if (!lsame_(diag, "U") && !lsame_(diag, "N")) {
+    info = 3;
+  } else if (*n < 0) {
+    info = 4;
+  } else if (*k < 0) {
+    info = 5;
+  } else if (*lda < *k + 1) {
+    info = 7;
+  } else if (*incx == 0) {
+    info = 9;
+  }
+  if (info != 0) {
+    xerbla_("STBMV ", &info);
+    return;
+  }
 
-/*     Quick return if possible. */
+  /*     Quick return if possible. */
 
-    if (*n == 0) {
-	return 0;
-    }
+  if (*n == 0) {
+    return;
+  }
 
-    nounit = lsame_(diag, "N", (ftnlen)1, (ftnlen)1);
+  nounit = lsame_(diag, "N");
 
-/*     Set up the start point in X if the increment is not unity. This */
-/*     will be  ( N - 1 )*INCX   too small for descending loops. */
+  /*     Set up the start point in X if the increment is not unity. This */
+  /*     will be  ( N - 1 )*INCX   too small for descending loops. */
 
-    if (*incx <= 0) {
-	kx = 1 - (*n - 1) * *incx;
-    } else if (*incx != 1) {
-	kx = 1;
-    }
+  if (*incx <= 0) {
+    kx = 1 - (*n - 1) * *incx;
+  } else if (*incx != 1) {
+    kx = 1;
+  }
 
-/*     Start the operations. In this version the elements of A are */
-/*     accessed sequentially with one pass through A. */
+  /*     Start the operations. In this version the elements of A are */
+  /*     accessed sequentially with one pass through A. */
 
-    if (lsame_(trans, "N", (ftnlen)1, (ftnlen)1)) {
+  if (lsame_(trans, "N")) {
+    /*         Form  x := A*x. */
 
-/*         Form  x := A*x. */
-
-	if (lsame_(uplo, "U", (ftnlen)1, (ftnlen)1)) {
-	    kplus1 = *k + 1;
-	    if (*incx == 1) {
-		i__1 = *n;
-		for (j = 1; j <= i__1; ++j) {
-		    if (x[j] != 0.f) {
-			temp = x[j];
-			l = kplus1 - j;
-/* Computing MAX */
-			i__2 = 1, i__3 = j - *k;
-			i__4 = j - 1;
-			for (i__ = max(i__2,i__3); i__ <= i__4; ++i__) {
-			    x[i__] += temp * a[l + i__ + j * a_dim1];
-/* L10: */
-			}
-			if (nounit) {
-			    x[j] *= a[kplus1 + j * a_dim1];
-			}
-		    }
-/* L20: */
-		}
-	    } else {
-		jx = kx;
-		i__1 = *n;
-		for (j = 1; j <= i__1; ++j) {
-		    if (x[jx] != 0.f) {
-			temp = x[jx];
-			ix = kx;
-			l = kplus1 - j;
-/* Computing MAX */
-			i__4 = 1, i__2 = j - *k;
-			i__3 = j - 1;
-			for (i__ = max(i__4,i__2); i__ <= i__3; ++i__) {
-			    x[ix] += temp * a[l + i__ + j * a_dim1];
-			    ix += *incx;
-/* L30: */
-			}
-			if (nounit) {
-			    x[jx] *= a[kplus1 + j * a_dim1];
-			}
-		    }
-		    jx += *incx;
-		    if (j > *k) {
-			kx += *incx;
-		    }
-/* L40: */
-		}
-	    }
-	} else {
-	    if (*incx == 1) {
-		for (j = *n; j >= 1; --j) {
-		    if (x[j] != 0.f) {
-			temp = x[j];
-			l = 1 - j;
-/* Computing MIN */
-			i__1 = *n, i__3 = j + *k;
-			i__4 = j + 1;
-			for (i__ = min(i__1,i__3); i__ >= i__4; --i__) {
-			    x[i__] += temp * a[l + i__ + j * a_dim1];
-/* L50: */
-			}
-			if (nounit) {
-			    x[j] *= a[j * a_dim1 + 1];
-			}
-		    }
-/* L60: */
-		}
-	    } else {
-		kx += (*n - 1) * *incx;
-		jx = kx;
-		for (j = *n; j >= 1; --j) {
-		    if (x[jx] != 0.f) {
-			temp = x[jx];
-			ix = kx;
-			l = 1 - j;
-/* Computing MIN */
-			i__4 = *n, i__1 = j + *k;
-			i__3 = j + 1;
-			for (i__ = min(i__4,i__1); i__ >= i__3; --i__) {
-			    x[ix] += temp * a[l + i__ + j * a_dim1];
-			    ix -= *incx;
-/* L70: */
-			}
-			if (nounit) {
-			    x[jx] *= a[j * a_dim1 + 1];
-			}
-		    }
-		    jx -= *incx;
-		    if (*n - j >= *k) {
-			kx -= *incx;
-		    }
-/* L80: */
-		}
-	    }
-	}
+    if (lsame_(uplo, "U")) {
+      kplus1 = *k + 1;
+      if (*incx == 1) {
+        i__1 = *n;
+        for (j = 1; j <= i__1; ++j) {
+          if (x[j] != 0.f) {
+            temp = x[j];
+            l = kplus1 - j;
+            /* Computing MAX */
+            i__2 = 1, i__3 = j - *k;
+            i__4 = j - 1;
+            for (i__ = max(i__2, i__3); i__ <= i__4; ++i__) {
+              x[i__] += temp * a[l + i__ + j * a_dim1];
+              /* L10: */
+            }
+            if (nounit) {
+              x[j] *= a[kplus1 + j * a_dim1];
+            }
+          }
+          /* L20: */
+        }
+      } else {
+        jx = kx;
+        i__1 = *n;
+        for (j = 1; j <= i__1; ++j) {
+          if (x[jx] != 0.f) {
+            temp = x[jx];
+            ix = kx;
+            l = kplus1 - j;
+            /* Computing MAX */
+            i__4 = 1, i__2 = j - *k;
+            i__3 = j - 1;
+            for (i__ = max(i__4, i__2); i__ <= i__3; ++i__) {
+              x[ix] += temp * a[l + i__ + j * a_dim1];
+              ix += *incx;
+              /* L30: */
+            }
+            if (nounit) {
+              x[jx] *= a[kplus1 + j * a_dim1];
+            }
+          }
+          jx += *incx;
+          if (j > *k) {
+            kx += *incx;
+          }
+          /* L40: */
+        }
+      }
     } else {
-
-/*        Form  x := A'*x. */
-
-	if (lsame_(uplo, "U", (ftnlen)1, (ftnlen)1)) {
-	    kplus1 = *k + 1;
-	    if (*incx == 1) {
-		for (j = *n; j >= 1; --j) {
-		    temp = x[j];
-		    l = kplus1 - j;
-		    if (nounit) {
-			temp *= a[kplus1 + j * a_dim1];
-		    }
-/* Computing MAX */
-		    i__4 = 1, i__1 = j - *k;
-		    i__3 = max(i__4,i__1);
-		    for (i__ = j - 1; i__ >= i__3; --i__) {
-			temp += a[l + i__ + j * a_dim1] * x[i__];
-/* L90: */
-		    }
-		    x[j] = temp;
-/* L100: */
-		}
-	    } else {
-		kx += (*n - 1) * *incx;
-		jx = kx;
-		for (j = *n; j >= 1; --j) {
-		    temp = x[jx];
-		    kx -= *incx;
-		    ix = kx;
-		    l = kplus1 - j;
-		    if (nounit) {
-			temp *= a[kplus1 + j * a_dim1];
-		    }
-/* Computing MAX */
-		    i__4 = 1, i__1 = j - *k;
-		    i__3 = max(i__4,i__1);
-		    for (i__ = j - 1; i__ >= i__3; --i__) {
-			temp += a[l + i__ + j * a_dim1] * x[ix];
-			ix -= *incx;
-/* L110: */
-		    }
-		    x[jx] = temp;
-		    jx -= *incx;
-/* L120: */
-		}
-	    }
-	} else {
-	    if (*incx == 1) {
-		i__3 = *n;
-		for (j = 1; j <= i__3; ++j) {
-		    temp = x[j];
-		    l = 1 - j;
-		    if (nounit) {
-			temp *= a[j * a_dim1 + 1];
-		    }
-/* Computing MIN */
-		    i__1 = *n, i__2 = j + *k;
-		    i__4 = min(i__1,i__2);
-		    for (i__ = j + 1; i__ <= i__4; ++i__) {
-			temp += a[l + i__ + j * a_dim1] * x[i__];
-/* L130: */
-		    }
-		    x[j] = temp;
-/* L140: */
-		}
-	    } else {
-		jx = kx;
-		i__3 = *n;
-		for (j = 1; j <= i__3; ++j) {
-		    temp = x[jx];
-		    kx += *incx;
-		    ix = kx;
-		    l = 1 - j;
-		    if (nounit) {
-			temp *= a[j * a_dim1 + 1];
-		    }
-/* Computing MIN */
-		    i__1 = *n, i__2 = j + *k;
-		    i__4 = min(i__1,i__2);
-		    for (i__ = j + 1; i__ <= i__4; ++i__) {
-			temp += a[l + i__ + j * a_dim1] * x[ix];
-			ix += *incx;
-/* L150: */
-		    }
-		    x[jx] = temp;
-		    jx += *incx;
-/* L160: */
-		}
-	    }
-	}
+      if (*incx == 1) {
+        for (j = *n; j >= 1; --j) {
+          if (x[j] != 0.f) {
+            temp = x[j];
+            l = 1 - j;
+            /* Computing MIN */
+            i__1 = *n, i__3 = j + *k;
+            i__4 = j + 1;
+            for (i__ = min(i__1, i__3); i__ >= i__4; --i__) {
+              x[i__] += temp * a[l + i__ + j * a_dim1];
+              /* L50: */
+            }
+            if (nounit) {
+              x[j] *= a[j * a_dim1 + 1];
+            }
+          }
+          /* L60: */
+        }
+      } else {
+        kx += (*n - 1) * *incx;
+        jx = kx;
+        for (j = *n; j >= 1; --j) {
+          if (x[jx] != 0.f) {
+            temp = x[jx];
+            ix = kx;
+            l = 1 - j;
+            /* Computing MIN */
+            i__4 = *n, i__1 = j + *k;
+            i__3 = j + 1;
+            for (i__ = min(i__4, i__1); i__ >= i__3; --i__) {
+              x[ix] += temp * a[l + i__ + j * a_dim1];
+              ix -= *incx;
+              /* L70: */
+            }
+            if (nounit) {
+              x[jx] *= a[j * a_dim1 + 1];
+            }
+          }
+          jx -= *incx;
+          if (*n - j >= *k) {
+            kx -= *incx;
+          }
+          /* L80: */
+        }
+      }
     }
+  } else {
+    /*        Form  x := A'*x. */
 
-    return 0;
+    if (lsame_(uplo, "U")) {
+      kplus1 = *k + 1;
+      if (*incx == 1) {
+        for (j = *n; j >= 1; --j) {
+          temp = x[j];
+          l = kplus1 - j;
+          if (nounit) {
+            temp *= a[kplus1 + j * a_dim1];
+          }
+          /* Computing MAX */
+          i__4 = 1, i__1 = j - *k;
+          i__3 = max(i__4, i__1);
+          for (i__ = j - 1; i__ >= i__3; --i__) {
+            temp += a[l + i__ + j * a_dim1] * x[i__];
+            /* L90: */
+          }
+          x[j] = temp;
+          /* L100: */
+        }
+      } else {
+        kx += (*n - 1) * *incx;
+        jx = kx;
+        for (j = *n; j >= 1; --j) {
+          temp = x[jx];
+          kx -= *incx;
+          ix = kx;
+          l = kplus1 - j;
+          if (nounit) {
+            temp *= a[kplus1 + j * a_dim1];
+          }
+          /* Computing MAX */
+          i__4 = 1, i__1 = j - *k;
+          i__3 = max(i__4, i__1);
+          for (i__ = j - 1; i__ >= i__3; --i__) {
+            temp += a[l + i__ + j * a_dim1] * x[ix];
+            ix -= *incx;
+            /* L110: */
+          }
+          x[jx] = temp;
+          jx -= *incx;
+          /* L120: */
+        }
+      }
+    } else {
+      if (*incx == 1) {
+        i__3 = *n;
+        for (j = 1; j <= i__3; ++j) {
+          temp = x[j];
+          l = 1 - j;
+          if (nounit) {
+            temp *= a[j * a_dim1 + 1];
+          }
+          /* Computing MIN */
+          i__1 = *n, i__2 = j + *k;
+          i__4 = min(i__1, i__2);
+          for (i__ = j + 1; i__ <= i__4; ++i__) {
+            temp += a[l + i__ + j * a_dim1] * x[i__];
+            /* L130: */
+          }
+          x[j] = temp;
+          /* L140: */
+        }
+      } else {
+        jx = kx;
+        i__3 = *n;
+        for (j = 1; j <= i__3; ++j) {
+          temp = x[jx];
+          kx += *incx;
+          ix = kx;
+          l = 1 - j;
+          if (nounit) {
+            temp *= a[j * a_dim1 + 1];
+          }
+          /* Computing MIN */
+          i__1 = *n, i__2 = j + *k;
+          i__4 = min(i__1, i__2);
+          for (i__ = j + 1; i__ <= i__4; ++i__) {
+            temp += a[l + i__ + j * a_dim1] * x[ix];
+            ix += *incx;
+            /* L150: */
+          }
+          x[jx] = temp;
+          jx += *incx;
+          /* L160: */
+        }
+      }
+    }
+  }
 
-/*     End of STBMV . */
+  /*     End of STBMV . */
 
 } /* stbmv_ */
-

diff --git a/blas/f2c/zhbmv.c b/blas/f2c/zhbmv.c
index 42da13d..cef1117 100644
--- a/blas/f2c/zhbmv.c
+++ b/blas/f2c/zhbmv.c

@@ -1,488 +1,456 @@
 /* zhbmv.f -- translated by f2c (version 20100827).
    You must link the resulting object file with libf2c:
-	on Microsoft Windows system, link with libf2c.lib;
-	on Linux or Unix systems, link with .../path/to/libf2c.a -lm
-	or, if you install libf2c.a in a standard place, with -lf2c -lm
-	-- in that order, at the end of the command line, as in
-		cc *.o -lf2c -lm
-	Source for libf2c is in /netlib/f2c/libf2c.zip, e.g.,
+        on Microsoft Windows system, link with libf2c.lib;
+        on Linux or Unix systems, link with .../path/to/libf2c.a -lm
+        or, if you install libf2c.a in a standard place, with -lf2c -lm
+        -- in that order, at the end of the command line, as in
+                cc *.o -lf2c -lm
+        Source for libf2c is in /netlib/f2c/libf2c.zip, e.g.,
 
-		http://www.netlib.org/f2c/libf2c.zip
+                http://www.netlib.org/f2c/libf2c.zip
 */
 
 #include "datatypes.h"
 
-/* Subroutine */ int zhbmv_(char *uplo, integer *n, integer *k, doublecomplex 
-	*alpha, doublecomplex *a, integer *lda, doublecomplex *x, integer *
-	incx, doublecomplex *beta, doublecomplex *y, integer *incy, ftnlen 
-	uplo_len)
-{
-    /* System generated locals */
-    integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5;
-    doublereal d__1;
-    doublecomplex z__1, z__2, z__3, z__4;
+static inline void d_cnjg(doublecomplex *r, doublecomplex *z) {
+  r->r = z->r;
+  r->i = -(z->i);
+}
 
-    /* Builtin functions */
-    void d_cnjg(doublecomplex *, doublecomplex *);
+/* Subroutine */ void zhbmv_(char *uplo, integer *n, integer *k, doublecomplex *alpha, doublecomplex *a, integer *lda,
+                             doublecomplex *x, integer *incx, doublecomplex *beta, doublecomplex *y, integer *incy) {
+  /* System generated locals */
+  integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5;
+  doublereal d__1;
+  doublecomplex z__1, z__2, z__3, z__4;
 
-    /* Local variables */
-    integer i__, j, l, ix, iy, jx, jy, kx, ky, info;
-    doublecomplex temp1, temp2;
-    extern logical lsame_(char *, char *, ftnlen, ftnlen);
-    integer kplus1;
-    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
+  /* Local variables */
+  integer i__, j, l, ix, iy, jx, jy, kx, ky, info;
+  doublecomplex temp1, temp2;
+  extern logical lsame_(char *, char *);
+  integer kplus1;
+  extern /* Subroutine */ void xerbla_(const char *, integer *);
 
-/*     .. Scalar Arguments .. */
-/*     .. */
-/*     .. Array Arguments .. */
-/*     .. */
+  /*     .. Scalar Arguments .. */
+  /*     .. */
+  /*     .. Array Arguments .. */
+  /*     .. */
 
-/*  Purpose */
-/*  ======= */
+  /*  Purpose */
+  /*  ======= */
 
-/*  ZHBMV  performs the matrix-vector  operation */
+  /*  ZHBMV  performs the matrix-vector  operation */
 
-/*     y := alpha*A*x + beta*y, */
+  /*     y := alpha*A*x + beta*y, */
 
-/*  where alpha and beta are scalars, x and y are n element vectors and */
-/*  A is an n by n hermitian band matrix, with k super-diagonals. */
+  /*  where alpha and beta are scalars, x and y are n element vectors and */
+  /*  A is an n by n hermitian band matrix, with k super-diagonals. */
 
-/*  Arguments */
-/*  ========== */
+  /*  Arguments */
+  /*  ========== */
 
-/*  UPLO   - CHARACTER*1. */
-/*           On entry, UPLO specifies whether the upper or lower */
-/*           triangular part of the band matrix A is being supplied as */
-/*           follows: */
+  /*  UPLO   - CHARACTER*1. */
+  /*           On entry, UPLO specifies whether the upper or lower */
+  /*           triangular part of the band matrix A is being supplied as */
+  /*           follows: */
 
-/*              UPLO = 'U' or 'u'   The upper triangular part of A is */
-/*                                  being supplied. */
+  /*              UPLO = 'U' or 'u'   The upper triangular part of A is */
+  /*                                  being supplied. */
 
-/*              UPLO = 'L' or 'l'   The lower triangular part of A is */
-/*                                  being supplied. */
+  /*              UPLO = 'L' or 'l'   The lower triangular part of A is */
+  /*                                  being supplied. */
 
-/*           Unchanged on exit. */
+  /*           Unchanged on exit. */
 
-/*  N      - INTEGER. */
-/*           On entry, N specifies the order of the matrix A. */
-/*           N must be at least zero. */
-/*           Unchanged on exit. */
+  /*  N      - INTEGER. */
+  /*           On entry, N specifies the order of the matrix A. */
+  /*           N must be at least zero. */
+  /*           Unchanged on exit. */
 
-/*  K      - INTEGER. */
-/*           On entry, K specifies the number of super-diagonals of the */
-/*           matrix A. K must satisfy  0 .le. K. */
-/*           Unchanged on exit. */
+  /*  K      - INTEGER. */
+  /*           On entry, K specifies the number of super-diagonals of the */
+  /*           matrix A. K must satisfy  0 .le. K. */
+  /*           Unchanged on exit. */
 
-/*  ALPHA  - COMPLEX*16      . */
-/*           On entry, ALPHA specifies the scalar alpha. */
-/*           Unchanged on exit. */
+  /*  ALPHA  - COMPLEX*16      . */
+  /*           On entry, ALPHA specifies the scalar alpha. */
+  /*           Unchanged on exit. */
 
-/*  A      - COMPLEX*16       array of DIMENSION ( LDA, n ). */
-/*           Before entry with UPLO = 'U' or 'u', the leading ( k + 1 ) */
-/*           by n part of the array A must contain the upper triangular */
-/*           band part of the hermitian matrix, supplied column by */
-/*           column, with the leading diagonal of the matrix in row */
-/*           ( k + 1 ) of the array, the first super-diagonal starting at */
-/*           position 2 in row k, and so on. The top left k by k triangle */
-/*           of the array A is not referenced. */
-/*           The following program segment will transfer the upper */
-/*           triangular part of a hermitian band matrix from conventional */
-/*           full matrix storage to band storage: */
+  /*  A      - COMPLEX*16       array of DIMENSION ( LDA, n ). */
+  /*           Before entry with UPLO = 'U' or 'u', the leading ( k + 1 ) */
+  /*           by n part of the array A must contain the upper triangular */
+  /*           band part of the hermitian matrix, supplied column by */
+  /*           column, with the leading diagonal of the matrix in row */
+  /*           ( k + 1 ) of the array, the first super-diagonal starting at */
+  /*           position 2 in row k, and so on. The top left k by k triangle */
+  /*           of the array A is not referenced. */
+  /*           The following program segment will transfer the upper */
+  /*           triangular part of a hermitian band matrix from conventional */
+  /*           full matrix storage to band storage: */
 
-/*                 DO 20, J = 1, N */
-/*                    M = K + 1 - J */
-/*                    DO 10, I = MAX( 1, J - K ), J */
-/*                       A( M + I, J ) = matrix( I, J ) */
-/*              10    CONTINUE */
-/*              20 CONTINUE */
+  /*                 DO 20, J = 1, N */
+  /*                    M = K + 1 - J */
+  /*                    DO 10, I = MAX( 1, J - K ), J */
+  /*                       A( M + I, J ) = matrix( I, J ) */
+  /*              10    CONTINUE */
+  /*              20 CONTINUE */
 
-/*           Before entry with UPLO = 'L' or 'l', the leading ( k + 1 ) */
-/*           by n part of the array A must contain the lower triangular */
-/*           band part of the hermitian matrix, supplied column by */
-/*           column, with the leading diagonal of the matrix in row 1 of */
-/*           the array, the first sub-diagonal starting at position 1 in */
-/*           row 2, and so on. The bottom right k by k triangle of the */
-/*           array A is not referenced. */
-/*           The following program segment will transfer the lower */
-/*           triangular part of a hermitian band matrix from conventional */
-/*           full matrix storage to band storage: */
+  /*           Before entry with UPLO = 'L' or 'l', the leading ( k + 1 ) */
+  /*           by n part of the array A must contain the lower triangular */
+  /*           band part of the hermitian matrix, supplied column by */
+  /*           column, with the leading diagonal of the matrix in row 1 of */
+  /*           the array, the first sub-diagonal starting at position 1 in */
+  /*           row 2, and so on. The bottom right k by k triangle of the */
+  /*           array A is not referenced. */
+  /*           The following program segment will transfer the lower */
+  /*           triangular part of a hermitian band matrix from conventional */
+  /*           full matrix storage to band storage: */
 
-/*                 DO 20, J = 1, N */
-/*                    M = 1 - J */
-/*                    DO 10, I = J, MIN( N, J + K ) */
-/*                       A( M + I, J ) = matrix( I, J ) */
-/*              10    CONTINUE */
-/*              20 CONTINUE */
+  /*                 DO 20, J = 1, N */
+  /*                    M = 1 - J */
+  /*                    DO 10, I = J, MIN( N, J + K ) */
+  /*                       A( M + I, J ) = matrix( I, J ) */
+  /*              10    CONTINUE */
+  /*              20 CONTINUE */
 
-/*           Note that the imaginary parts of the diagonal elements need */
-/*           not be set and are assumed to be zero. */
-/*           Unchanged on exit. */
+  /*           Note that the imaginary parts of the diagonal elements need */
+  /*           not be set and are assumed to be zero. */
+  /*           Unchanged on exit. */
 
-/*  LDA    - INTEGER. */
-/*           On entry, LDA specifies the first dimension of A as declared */
-/*           in the calling (sub) program. LDA must be at least */
-/*           ( k + 1 ). */
-/*           Unchanged on exit. */
+  /*  LDA    - INTEGER. */
+  /*           On entry, LDA specifies the first dimension of A as declared */
+  /*           in the calling (sub) program. LDA must be at least */
+  /*           ( k + 1 ). */
+  /*           Unchanged on exit. */
 
-/*  X      - COMPLEX*16       array of DIMENSION at least */
-/*           ( 1 + ( n - 1 )*abs( INCX ) ). */
-/*           Before entry, the incremented array X must contain the */
-/*           vector x. */
-/*           Unchanged on exit. */
+  /*  X      - COMPLEX*16       array of DIMENSION at least */
+  /*           ( 1 + ( n - 1 )*abs( INCX ) ). */
+  /*           Before entry, the incremented array X must contain the */
+  /*           vector x. */
+  /*           Unchanged on exit. */
 
-/*  INCX   - INTEGER. */
-/*           On entry, INCX specifies the increment for the elements of */
-/*           X. INCX must not be zero. */
-/*           Unchanged on exit. */
+  /*  INCX   - INTEGER. */
+  /*           On entry, INCX specifies the increment for the elements of */
+  /*           X. INCX must not be zero. */
+  /*           Unchanged on exit. */
 
-/*  BETA   - COMPLEX*16      . */
-/*           On entry, BETA specifies the scalar beta. */
-/*           Unchanged on exit. */
+  /*  BETA   - COMPLEX*16      . */
+  /*           On entry, BETA specifies the scalar beta. */
+  /*           Unchanged on exit. */
 
-/*  Y      - COMPLEX*16       array of DIMENSION at least */
-/*           ( 1 + ( n - 1 )*abs( INCY ) ). */
-/*           Before entry, the incremented array Y must contain the */
-/*           vector y. On exit, Y is overwritten by the updated vector y. */
+  /*  Y      - COMPLEX*16       array of DIMENSION at least */
+  /*           ( 1 + ( n - 1 )*abs( INCY ) ). */
+  /*           Before entry, the incremented array Y must contain the */
+  /*           vector y. On exit, Y is overwritten by the updated vector y. */
 
-/*  INCY   - INTEGER. */
-/*           On entry, INCY specifies the increment for the elements of */
-/*           Y. INCY must not be zero. */
-/*           Unchanged on exit. */
+  /*  INCY   - INTEGER. */
+  /*           On entry, INCY specifies the increment for the elements of */
+  /*           Y. INCY must not be zero. */
+  /*           Unchanged on exit. */
 
-/*  Further Details */
-/*  =============== */
+  /*  Further Details */
+  /*  =============== */
 
-/*  Level 2 Blas routine. */
+  /*  Level 2 Blas routine. */
 
-/*  -- Written on 22-October-1986. */
-/*     Jack Dongarra, Argonne National Lab. */
-/*     Jeremy Du Croz, Nag Central Office. */
-/*     Sven Hammarling, Nag Central Office. */
-/*     Richard Hanson, Sandia National Labs. */
+  /*  -- Written on 22-October-1986. */
+  /*     Jack Dongarra, Argonne National Lab. */
+  /*     Jeremy Du Croz, Nag Central Office. */
+  /*     Sven Hammarling, Nag Central Office. */
+  /*     Richard Hanson, Sandia National Labs. */
 
-/*  ===================================================================== */
+  /*  ===================================================================== */
 
-/*     .. Parameters .. */
-/*     .. */
-/*     .. Local Scalars .. */
-/*     .. */
-/*     .. External Functions .. */
-/*     .. */
-/*     .. External Subroutines .. */
-/*     .. */
-/*     .. Intrinsic Functions .. */
-/*     .. */
+  /*     .. Parameters .. */
+  /*     .. */
+  /*     .. Local Scalars .. */
+  /*     .. */
+  /*     .. External Functions .. */
+  /*     .. */
+  /*     .. External Subroutines .. */
+  /*     .. */
+  /*     .. Intrinsic Functions .. */
+  /*     .. */
 
-/*     Test the input parameters. */
+  /*     Test the input parameters. */
 
-    /* Parameter adjustments */
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1;
-    a -= a_offset;
-    --x;
-    --y;
+  /* Parameter adjustments */
+  a_dim1 = *lda;
+  a_offset = 1 + a_dim1;
+  a -= a_offset;
+  --x;
+  --y;
 
-    /* Function Body */
-    info = 0;
-    if (! lsame_(uplo, "U", (ftnlen)1, (ftnlen)1) && ! lsame_(uplo, "L", (
-	    ftnlen)1, (ftnlen)1)) {
-	info = 1;
-    } else if (*n < 0) {
-	info = 2;
-    } else if (*k < 0) {
-	info = 3;
-    } else if (*lda < *k + 1) {
-	info = 6;
-    } else if (*incx == 0) {
-	info = 8;
-    } else if (*incy == 0) {
-	info = 11;
-    }
-    if (info != 0) {
-	xerbla_("ZHBMV ", &info, (ftnlen)6);
-	return 0;
-    }
+  /* Function Body */
+  info = 0;
+  if (!lsame_(uplo, "U") && !lsame_(uplo, "L")) {
+    info = 1;
+  } else if (*n < 0) {
+    info = 2;
+  } else if (*k < 0) {
+    info = 3;
+  } else if (*lda < *k + 1) {
+    info = 6;
+  } else if (*incx == 0) {
+    info = 8;
+  } else if (*incy == 0) {
+    info = 11;
+  }
+  if (info != 0) {
+    xerbla_("ZHBMV ", &info);
+    return;
+  }
 
-/*     Quick return if possible. */
+  /*     Quick return if possible. */
 
-    if (*n == 0 || (alpha->r == 0. && alpha->i == 0. && (beta->r == 1. && 
-                                                         beta->i == 0.))) {
-	return 0;
-    }
+  if (*n == 0 || (alpha->r == 0. && alpha->i == 0. && (beta->r == 1. && beta->i == 0.))) {
+    return;
+  }
 
-/*     Set up the start points in  X  and  Y. */
+  /*     Set up the start points in  X  and  Y. */
 
-    if (*incx > 0) {
-	kx = 1;
+  if (*incx > 0) {
+    kx = 1;
+  } else {
+    kx = 1 - (*n - 1) * *incx;
+  }
+  if (*incy > 0) {
+    ky = 1;
+  } else {
+    ky = 1 - (*n - 1) * *incy;
+  }
+
+  /*     Start the operations. In this version the elements of the array A */
+  /*     are accessed sequentially with one pass through A. */
+
+  /*     First form  y := beta*y. */
+
+  if (beta->r != 1. || beta->i != 0.) {
+    if (*incy == 1) {
+      if (beta->r == 0. && beta->i == 0.) {
+        i__1 = *n;
+        for (i__ = 1; i__ <= i__1; ++i__) {
+          i__2 = i__;
+          y[i__2].r = 0., y[i__2].i = 0.;
+          /* L10: */
+        }
+      } else {
+        i__1 = *n;
+        for (i__ = 1; i__ <= i__1; ++i__) {
+          i__2 = i__;
+          i__3 = i__;
+          z__1.r = beta->r * y[i__3].r - beta->i * y[i__3].i, z__1.i = beta->r * y[i__3].i + beta->i * y[i__3].r;
+          y[i__2].r = z__1.r, y[i__2].i = z__1.i;
+          /* L20: */
+        }
+      }
     } else {
-	kx = 1 - (*n - 1) * *incx;
+      iy = ky;
+      if (beta->r == 0. && beta->i == 0.) {
+        i__1 = *n;
+        for (i__ = 1; i__ <= i__1; ++i__) {
+          i__2 = iy;
+          y[i__2].r = 0., y[i__2].i = 0.;
+          iy += *incy;
+          /* L30: */
+        }
+      } else {
+        i__1 = *n;
+        for (i__ = 1; i__ <= i__1; ++i__) {
+          i__2 = iy;
+          i__3 = iy;
+          z__1.r = beta->r * y[i__3].r - beta->i * y[i__3].i, z__1.i = beta->r * y[i__3].i + beta->i * y[i__3].r;
+          y[i__2].r = z__1.r, y[i__2].i = z__1.i;
+          iy += *incy;
+          /* L40: */
+        }
+      }
     }
-    if (*incy > 0) {
-	ky = 1;
+  }
+  if (alpha->r == 0. && alpha->i == 0.) {
+    return;
+  }
+  if (lsame_(uplo, "U")) {
+    /*        Form  y  when upper triangle of A is stored. */
+
+    kplus1 = *k + 1;
+    if (*incx == 1 && *incy == 1) {
+      i__1 = *n;
+      for (j = 1; j <= i__1; ++j) {
+        i__2 = j;
+        z__1.r = alpha->r * x[i__2].r - alpha->i * x[i__2].i, z__1.i = alpha->r * x[i__2].i + alpha->i * x[i__2].r;
+        temp1.r = z__1.r, temp1.i = z__1.i;
+        temp2.r = 0., temp2.i = 0.;
+        l = kplus1 - j;
+        /* Computing MAX */
+        i__2 = 1, i__3 = j - *k;
+        i__4 = j - 1;
+        for (i__ = max(i__2, i__3); i__ <= i__4; ++i__) {
+          i__2 = i__;
+          i__3 = i__;
+          i__5 = l + i__ + j * a_dim1;
+          z__2.r = temp1.r * a[i__5].r - temp1.i * a[i__5].i, z__2.i = temp1.r * a[i__5].i + temp1.i * a[i__5].r;
+          z__1.r = y[i__3].r + z__2.r, z__1.i = y[i__3].i + z__2.i;
+          y[i__2].r = z__1.r, y[i__2].i = z__1.i;
+          d_cnjg(&z__3, &a[l + i__ + j * a_dim1]);
+          i__2 = i__;
+          z__2.r = z__3.r * x[i__2].r - z__3.i * x[i__2].i, z__2.i = z__3.r * x[i__2].i + z__3.i * x[i__2].r;
+          z__1.r = temp2.r + z__2.r, z__1.i = temp2.i + z__2.i;
+          temp2.r = z__1.r, temp2.i = z__1.i;
+          /* L50: */
+        }
+        i__4 = j;
+        i__2 = j;
+        i__3 = kplus1 + j * a_dim1;
+        d__1 = a[i__3].r;
+        z__3.r = d__1 * temp1.r, z__3.i = d__1 * temp1.i;
+        z__2.r = y[i__2].r + z__3.r, z__2.i = y[i__2].i + z__3.i;
+        z__4.r = alpha->r * temp2.r - alpha->i * temp2.i, z__4.i = alpha->r * temp2.i + alpha->i * temp2.r;
+        z__1.r = z__2.r + z__4.r, z__1.i = z__2.i + z__4.i;
+        y[i__4].r = z__1.r, y[i__4].i = z__1.i;
+        /* L60: */
+      }
     } else {
-	ky = 1 - (*n - 1) * *incy;
+      jx = kx;
+      jy = ky;
+      i__1 = *n;
+      for (j = 1; j <= i__1; ++j) {
+        i__4 = jx;
+        z__1.r = alpha->r * x[i__4].r - alpha->i * x[i__4].i, z__1.i = alpha->r * x[i__4].i + alpha->i * x[i__4].r;
+        temp1.r = z__1.r, temp1.i = z__1.i;
+        temp2.r = 0., temp2.i = 0.;
+        ix = kx;
+        iy = ky;
+        l = kplus1 - j;
+        /* Computing MAX */
+        i__4 = 1, i__2 = j - *k;
+        i__3 = j - 1;
+        for (i__ = max(i__4, i__2); i__ <= i__3; ++i__) {
+          i__4 = iy;
+          i__2 = iy;
+          i__5 = l + i__ + j * a_dim1;
+          z__2.r = temp1.r * a[i__5].r - temp1.i * a[i__5].i, z__2.i = temp1.r * a[i__5].i + temp1.i * a[i__5].r;
+          z__1.r = y[i__2].r + z__2.r, z__1.i = y[i__2].i + z__2.i;
+          y[i__4].r = z__1.r, y[i__4].i = z__1.i;
+          d_cnjg(&z__3, &a[l + i__ + j * a_dim1]);
+          i__4 = ix;
+          z__2.r = z__3.r * x[i__4].r - z__3.i * x[i__4].i, z__2.i = z__3.r * x[i__4].i + z__3.i * x[i__4].r;
+          z__1.r = temp2.r + z__2.r, z__1.i = temp2.i + z__2.i;
+          temp2.r = z__1.r, temp2.i = z__1.i;
+          ix += *incx;
+          iy += *incy;
+          /* L70: */
+        }
+        i__3 = jy;
+        i__4 = jy;
+        i__2 = kplus1 + j * a_dim1;
+        d__1 = a[i__2].r;
+        z__3.r = d__1 * temp1.r, z__3.i = d__1 * temp1.i;
+        z__2.r = y[i__4].r + z__3.r, z__2.i = y[i__4].i + z__3.i;
+        z__4.r = alpha->r * temp2.r - alpha->i * temp2.i, z__4.i = alpha->r * temp2.i + alpha->i * temp2.r;
+        z__1.r = z__2.r + z__4.r, z__1.i = z__2.i + z__4.i;
+        y[i__3].r = z__1.r, y[i__3].i = z__1.i;
+        jx += *incx;
+        jy += *incy;
+        if (j > *k) {
+          kx += *incx;
+          ky += *incy;
+        }
+        /* L80: */
+      }
     }
+  } else {
+    /*        Form  y  when lower triangle of A is stored. */
 
-/*     Start the operations. In this version the elements of the array A */
-/*     are accessed sequentially with one pass through A. */
-
-/*     First form  y := beta*y. */
-
-    if (beta->r != 1. || beta->i != 0.) {
-	if (*incy == 1) {
-	    if (beta->r == 0. && beta->i == 0.) {
-		i__1 = *n;
-		for (i__ = 1; i__ <= i__1; ++i__) {
-		    i__2 = i__;
-		    y[i__2].r = 0., y[i__2].i = 0.;
-/* L10: */
-		}
-	    } else {
-		i__1 = *n;
-		for (i__ = 1; i__ <= i__1; ++i__) {
-		    i__2 = i__;
-		    i__3 = i__;
-		    z__1.r = beta->r * y[i__3].r - beta->i * y[i__3].i, 
-			    z__1.i = beta->r * y[i__3].i + beta->i * y[i__3]
-			    .r;
-		    y[i__2].r = z__1.r, y[i__2].i = z__1.i;
-/* L20: */
-		}
-	    }
-	} else {
-	    iy = ky;
-	    if (beta->r == 0. && beta->i == 0.) {
-		i__1 = *n;
-		for (i__ = 1; i__ <= i__1; ++i__) {
-		    i__2 = iy;
-		    y[i__2].r = 0., y[i__2].i = 0.;
-		    iy += *incy;
-/* L30: */
-		}
-	    } else {
-		i__1 = *n;
-		for (i__ = 1; i__ <= i__1; ++i__) {
-		    i__2 = iy;
-		    i__3 = iy;
-		    z__1.r = beta->r * y[i__3].r - beta->i * y[i__3].i, 
-			    z__1.i = beta->r * y[i__3].i + beta->i * y[i__3]
-			    .r;
-		    y[i__2].r = z__1.r, y[i__2].i = z__1.i;
-		    iy += *incy;
-/* L40: */
-		}
-	    }
-	}
-    }
-    if (alpha->r == 0. && alpha->i == 0.) {
-	return 0;
-    }
-    if (lsame_(uplo, "U", (ftnlen)1, (ftnlen)1)) {
-
-/*        Form  y  when upper triangle of A is stored. */
-
-	kplus1 = *k + 1;
-	if (*incx == 1 && *incy == 1) {
-	    i__1 = *n;
-	    for (j = 1; j <= i__1; ++j) {
-		i__2 = j;
-		z__1.r = alpha->r * x[i__2].r - alpha->i * x[i__2].i, z__1.i =
-			 alpha->r * x[i__2].i + alpha->i * x[i__2].r;
-		temp1.r = z__1.r, temp1.i = z__1.i;
-		temp2.r = 0., temp2.i = 0.;
-		l = kplus1 - j;
-/* Computing MAX */
-		i__2 = 1, i__3 = j - *k;
-		i__4 = j - 1;
-		for (i__ = max(i__2,i__3); i__ <= i__4; ++i__) {
-		    i__2 = i__;
-		    i__3 = i__;
-		    i__5 = l + i__ + j * a_dim1;
-		    z__2.r = temp1.r * a[i__5].r - temp1.i * a[i__5].i, 
-			    z__2.i = temp1.r * a[i__5].i + temp1.i * a[i__5]
-			    .r;
-		    z__1.r = y[i__3].r + z__2.r, z__1.i = y[i__3].i + z__2.i;
-		    y[i__2].r = z__1.r, y[i__2].i = z__1.i;
-		    d_cnjg(&z__3, &a[l + i__ + j * a_dim1]);
-		    i__2 = i__;
-		    z__2.r = z__3.r * x[i__2].r - z__3.i * x[i__2].i, z__2.i =
-			     z__3.r * x[i__2].i + z__3.i * x[i__2].r;
-		    z__1.r = temp2.r + z__2.r, z__1.i = temp2.i + z__2.i;
-		    temp2.r = z__1.r, temp2.i = z__1.i;
-/* L50: */
-		}
-		i__4 = j;
-		i__2 = j;
-		i__3 = kplus1 + j * a_dim1;
-		d__1 = a[i__3].r;
-		z__3.r = d__1 * temp1.r, z__3.i = d__1 * temp1.i;
-		z__2.r = y[i__2].r + z__3.r, z__2.i = y[i__2].i + z__3.i;
-		z__4.r = alpha->r * temp2.r - alpha->i * temp2.i, z__4.i = 
-			alpha->r * temp2.i + alpha->i * temp2.r;
-		z__1.r = z__2.r + z__4.r, z__1.i = z__2.i + z__4.i;
-		y[i__4].r = z__1.r, y[i__4].i = z__1.i;
-/* L60: */
-	    }
-	} else {
-	    jx = kx;
-	    jy = ky;
-	    i__1 = *n;
-	    for (j = 1; j <= i__1; ++j) {
-		i__4 = jx;
-		z__1.r = alpha->r * x[i__4].r - alpha->i * x[i__4].i, z__1.i =
-			 alpha->r * x[i__4].i + alpha->i * x[i__4].r;
-		temp1.r = z__1.r, temp1.i = z__1.i;
-		temp2.r = 0., temp2.i = 0.;
-		ix = kx;
-		iy = ky;
-		l = kplus1 - j;
-/* Computing MAX */
-		i__4 = 1, i__2 = j - *k;
-		i__3 = j - 1;
-		for (i__ = max(i__4,i__2); i__ <= i__3; ++i__) {
-		    i__4 = iy;
-		    i__2 = iy;
-		    i__5 = l + i__ + j * a_dim1;
-		    z__2.r = temp1.r * a[i__5].r - temp1.i * a[i__5].i, 
-			    z__2.i = temp1.r * a[i__5].i + temp1.i * a[i__5]
-			    .r;
-		    z__1.r = y[i__2].r + z__2.r, z__1.i = y[i__2].i + z__2.i;
-		    y[i__4].r = z__1.r, y[i__4].i = z__1.i;
-		    d_cnjg(&z__3, &a[l + i__ + j * a_dim1]);
-		    i__4 = ix;
-		    z__2.r = z__3.r * x[i__4].r - z__3.i * x[i__4].i, z__2.i =
-			     z__3.r * x[i__4].i + z__3.i * x[i__4].r;
-		    z__1.r = temp2.r + z__2.r, z__1.i = temp2.i + z__2.i;
-		    temp2.r = z__1.r, temp2.i = z__1.i;
-		    ix += *incx;
-		    iy += *incy;
-/* L70: */
-		}
-		i__3 = jy;
-		i__4 = jy;
-		i__2 = kplus1 + j * a_dim1;
-		d__1 = a[i__2].r;
-		z__3.r = d__1 * temp1.r, z__3.i = d__1 * temp1.i;
-		z__2.r = y[i__4].r + z__3.r, z__2.i = y[i__4].i + z__3.i;
-		z__4.r = alpha->r * temp2.r - alpha->i * temp2.i, z__4.i = 
-			alpha->r * temp2.i + alpha->i * temp2.r;
-		z__1.r = z__2.r + z__4.r, z__1.i = z__2.i + z__4.i;
-		y[i__3].r = z__1.r, y[i__3].i = z__1.i;
-		jx += *incx;
-		jy += *incy;
-		if (j > *k) {
-		    kx += *incx;
-		    ky += *incy;
-		}
-/* L80: */
-	    }
-	}
+    if (*incx == 1 && *incy == 1) {
+      i__1 = *n;
+      for (j = 1; j <= i__1; ++j) {
+        i__3 = j;
+        z__1.r = alpha->r * x[i__3].r - alpha->i * x[i__3].i, z__1.i = alpha->r * x[i__3].i + alpha->i * x[i__3].r;
+        temp1.r = z__1.r, temp1.i = z__1.i;
+        temp2.r = 0., temp2.i = 0.;
+        i__3 = j;
+        i__4 = j;
+        i__2 = j * a_dim1 + 1;
+        d__1 = a[i__2].r;
+        z__2.r = d__1 * temp1.r, z__2.i = d__1 * temp1.i;
+        z__1.r = y[i__4].r + z__2.r, z__1.i = y[i__4].i + z__2.i;
+        y[i__3].r = z__1.r, y[i__3].i = z__1.i;
+        l = 1 - j;
+        /* Computing MIN */
+        i__4 = *n, i__2 = j + *k;
+        i__3 = min(i__4, i__2);
+        for (i__ = j + 1; i__ <= i__3; ++i__) {
+          i__4 = i__;
+          i__2 = i__;
+          i__5 = l + i__ + j * a_dim1;
+          z__2.r = temp1.r * a[i__5].r - temp1.i * a[i__5].i, z__2.i = temp1.r * a[i__5].i + temp1.i * a[i__5].r;
+          z__1.r = y[i__2].r + z__2.r, z__1.i = y[i__2].i + z__2.i;
+          y[i__4].r = z__1.r, y[i__4].i = z__1.i;
+          d_cnjg(&z__3, &a[l + i__ + j * a_dim1]);
+          i__4 = i__;
+          z__2.r = z__3.r * x[i__4].r - z__3.i * x[i__4].i, z__2.i = z__3.r * x[i__4].i + z__3.i * x[i__4].r;
+          z__1.r = temp2.r + z__2.r, z__1.i = temp2.i + z__2.i;
+          temp2.r = z__1.r, temp2.i = z__1.i;
+          /* L90: */
+        }
+        i__3 = j;
+        i__4 = j;
+        z__2.r = alpha->r * temp2.r - alpha->i * temp2.i, z__2.i = alpha->r * temp2.i + alpha->i * temp2.r;
+        z__1.r = y[i__4].r + z__2.r, z__1.i = y[i__4].i + z__2.i;
+        y[i__3].r = z__1.r, y[i__3].i = z__1.i;
+        /* L100: */
+      }
     } else {
-
-/*        Form  y  when lower triangle of A is stored. */
-
-	if (*incx == 1 && *incy == 1) {
-	    i__1 = *n;
-	    for (j = 1; j <= i__1; ++j) {
-		i__3 = j;
-		z__1.r = alpha->r * x[i__3].r - alpha->i * x[i__3].i, z__1.i =
-			 alpha->r * x[i__3].i + alpha->i * x[i__3].r;
-		temp1.r = z__1.r, temp1.i = z__1.i;
-		temp2.r = 0., temp2.i = 0.;
-		i__3 = j;
-		i__4 = j;
-		i__2 = j * a_dim1 + 1;
-		d__1 = a[i__2].r;
-		z__2.r = d__1 * temp1.r, z__2.i = d__1 * temp1.i;
-		z__1.r = y[i__4].r + z__2.r, z__1.i = y[i__4].i + z__2.i;
-		y[i__3].r = z__1.r, y[i__3].i = z__1.i;
-		l = 1 - j;
-/* Computing MIN */
-		i__4 = *n, i__2 = j + *k;
-		i__3 = min(i__4,i__2);
-		for (i__ = j + 1; i__ <= i__3; ++i__) {
-		    i__4 = i__;
-		    i__2 = i__;
-		    i__5 = l + i__ + j * a_dim1;
-		    z__2.r = temp1.r * a[i__5].r - temp1.i * a[i__5].i, 
-			    z__2.i = temp1.r * a[i__5].i + temp1.i * a[i__5]
-			    .r;
-		    z__1.r = y[i__2].r + z__2.r, z__1.i = y[i__2].i + z__2.i;
-		    y[i__4].r = z__1.r, y[i__4].i = z__1.i;
-		    d_cnjg(&z__3, &a[l + i__ + j * a_dim1]);
-		    i__4 = i__;
-		    z__2.r = z__3.r * x[i__4].r - z__3.i * x[i__4].i, z__2.i =
-			     z__3.r * x[i__4].i + z__3.i * x[i__4].r;
-		    z__1.r = temp2.r + z__2.r, z__1.i = temp2.i + z__2.i;
-		    temp2.r = z__1.r, temp2.i = z__1.i;
-/* L90: */
-		}
-		i__3 = j;
-		i__4 = j;
-		z__2.r = alpha->r * temp2.r - alpha->i * temp2.i, z__2.i = 
-			alpha->r * temp2.i + alpha->i * temp2.r;
-		z__1.r = y[i__4].r + z__2.r, z__1.i = y[i__4].i + z__2.i;
-		y[i__3].r = z__1.r, y[i__3].i = z__1.i;
-/* L100: */
-	    }
-	} else {
-	    jx = kx;
-	    jy = ky;
-	    i__1 = *n;
-	    for (j = 1; j <= i__1; ++j) {
-		i__3 = jx;
-		z__1.r = alpha->r * x[i__3].r - alpha->i * x[i__3].i, z__1.i =
-			 alpha->r * x[i__3].i + alpha->i * x[i__3].r;
-		temp1.r = z__1.r, temp1.i = z__1.i;
-		temp2.r = 0., temp2.i = 0.;
-		i__3 = jy;
-		i__4 = jy;
-		i__2 = j * a_dim1 + 1;
-		d__1 = a[i__2].r;
-		z__2.r = d__1 * temp1.r, z__2.i = d__1 * temp1.i;
-		z__1.r = y[i__4].r + z__2.r, z__1.i = y[i__4].i + z__2.i;
-		y[i__3].r = z__1.r, y[i__3].i = z__1.i;
-		l = 1 - j;
-		ix = jx;
-		iy = jy;
-/* Computing MIN */
-		i__4 = *n, i__2 = j + *k;
-		i__3 = min(i__4,i__2);
-		for (i__ = j + 1; i__ <= i__3; ++i__) {
-		    ix += *incx;
-		    iy += *incy;
-		    i__4 = iy;
-		    i__2 = iy;
-		    i__5 = l + i__ + j * a_dim1;
-		    z__2.r = temp1.r * a[i__5].r - temp1.i * a[i__5].i, 
-			    z__2.i = temp1.r * a[i__5].i + temp1.i * a[i__5]
-			    .r;
-		    z__1.r = y[i__2].r + z__2.r, z__1.i = y[i__2].i + z__2.i;
-		    y[i__4].r = z__1.r, y[i__4].i = z__1.i;
-		    d_cnjg(&z__3, &a[l + i__ + j * a_dim1]);
-		    i__4 = ix;
-		    z__2.r = z__3.r * x[i__4].r - z__3.i * x[i__4].i, z__2.i =
-			     z__3.r * x[i__4].i + z__3.i * x[i__4].r;
-		    z__1.r = temp2.r + z__2.r, z__1.i = temp2.i + z__2.i;
-		    temp2.r = z__1.r, temp2.i = z__1.i;
-/* L110: */
-		}
-		i__3 = jy;
-		i__4 = jy;
-		z__2.r = alpha->r * temp2.r - alpha->i * temp2.i, z__2.i = 
-			alpha->r * temp2.i + alpha->i * temp2.r;
-		z__1.r = y[i__4].r + z__2.r, z__1.i = y[i__4].i + z__2.i;
-		y[i__3].r = z__1.r, y[i__3].i = z__1.i;
-		jx += *incx;
-		jy += *incy;
-/* L120: */
-	    }
-	}
+      jx = kx;
+      jy = ky;
+      i__1 = *n;
+      for (j = 1; j <= i__1; ++j) {
+        i__3 = jx;
+        z__1.r = alpha->r * x[i__3].r - alpha->i * x[i__3].i, z__1.i = alpha->r * x[i__3].i + alpha->i * x[i__3].r;
+        temp1.r = z__1.r, temp1.i = z__1.i;
+        temp2.r = 0., temp2.i = 0.;
+        i__3 = jy;
+        i__4 = jy;
+        i__2 = j * a_dim1 + 1;
+        d__1 = a[i__2].r;
+        z__2.r = d__1 * temp1.r, z__2.i = d__1 * temp1.i;
+        z__1.r = y[i__4].r + z__2.r, z__1.i = y[i__4].i + z__2.i;
+        y[i__3].r = z__1.r, y[i__3].i = z__1.i;
+        l = 1 - j;
+        ix = jx;
+        iy = jy;
+        /* Computing MIN */
+        i__4 = *n, i__2 = j + *k;
+        i__3 = min(i__4, i__2);
+        for (i__ = j + 1; i__ <= i__3; ++i__) {
+          ix += *incx;
+          iy += *incy;
+          i__4 = iy;
+          i__2 = iy;
+          i__5 = l + i__ + j * a_dim1;
+          z__2.r = temp1.r * a[i__5].r - temp1.i * a[i__5].i, z__2.i = temp1.r * a[i__5].i + temp1.i * a[i__5].r;
+          z__1.r = y[i__2].r + z__2.r, z__1.i = y[i__2].i + z__2.i;
+          y[i__4].r = z__1.r, y[i__4].i = z__1.i;
+          d_cnjg(&z__3, &a[l + i__ + j * a_dim1]);
+          i__4 = ix;
+          z__2.r = z__3.r * x[i__4].r - z__3.i * x[i__4].i, z__2.i = z__3.r * x[i__4].i + z__3.i * x[i__4].r;
+          z__1.r = temp2.r + z__2.r, z__1.i = temp2.i + z__2.i;
+          temp2.r = z__1.r, temp2.i = z__1.i;
+          /* L110: */
+        }
+        i__3 = jy;
+        i__4 = jy;
+        z__2.r = alpha->r * temp2.r - alpha->i * temp2.i, z__2.i = alpha->r * temp2.i + alpha->i * temp2.r;
+        z__1.r = y[i__4].r + z__2.r, z__1.i = y[i__4].i + z__2.i;
+        y[i__3].r = z__1.r, y[i__3].i = z__1.i;
+        jx += *incx;
+        jy += *incy;
+        /* L120: */
+      }
     }
+  }
 
-    return 0;
-
-/*     End of ZHBMV . */
+  /*     End of ZHBMV . */
 
 } /* zhbmv_ */
-

diff --git a/blas/f2c/zhpmv.c b/blas/f2c/zhpmv.c
index fbe2f42..6b9a000 100644
--- a/blas/f2c/zhpmv.c
+++ b/blas/f2c/zhpmv.c

@@ -1,438 +1,407 @@
 /* zhpmv.f -- translated by f2c (version 20100827).
    You must link the resulting object file with libf2c:
-	on Microsoft Windows system, link with libf2c.lib;
-	on Linux or Unix systems, link with .../path/to/libf2c.a -lm
-	or, if you install libf2c.a in a standard place, with -lf2c -lm
-	-- in that order, at the end of the command line, as in
-		cc *.o -lf2c -lm
-	Source for libf2c is in /netlib/f2c/libf2c.zip, e.g.,
+        on Microsoft Windows system, link with libf2c.lib;
+        on Linux or Unix systems, link with .../path/to/libf2c.a -lm
+        or, if you install libf2c.a in a standard place, with -lf2c -lm
+        -- in that order, at the end of the command line, as in
+                cc *.o -lf2c -lm
+        Source for libf2c is in /netlib/f2c/libf2c.zip, e.g.,
 
-		http://www.netlib.org/f2c/libf2c.zip
+                http://www.netlib.org/f2c/libf2c.zip
 */
 
 #include "datatypes.h"
 
-/* Subroutine */ int zhpmv_(char *uplo, integer *n, doublecomplex *alpha, 
-	doublecomplex *ap, doublecomplex *x, integer *incx, doublecomplex *
-	beta, doublecomplex *y, integer *incy, ftnlen uplo_len)
-{
-    /* System generated locals */
-    integer i__1, i__2, i__3, i__4, i__5;
-    doublereal d__1;
-    doublecomplex z__1, z__2, z__3, z__4;
+static inline void d_cnjg(doublecomplex *r, doublecomplex *z) {
+  r->r = z->r;
+  r->i = -(z->i);
+}
 
-    /* Builtin functions */
-    void d_cnjg(doublecomplex *, doublecomplex *);
+/* Subroutine */ void zhpmv_(char *uplo, integer *n, doublecomplex *alpha, doublecomplex *ap, doublecomplex *x,
+                             integer *incx, doublecomplex *beta, doublecomplex *y, integer *incy) {
+  /* System generated locals */
+  integer i__1, i__2, i__3, i__4, i__5;
+  doublereal d__1;
+  doublecomplex z__1, z__2, z__3, z__4;
 
-    /* Local variables */
-    integer i__, j, k, kk, ix, iy, jx, jy, kx, ky, info;
-    doublecomplex temp1, temp2;
-    extern logical lsame_(char *, char *, ftnlen, ftnlen);
-    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
+  /* Local variables */
+  integer i__, j, k, kk, ix, iy, jx, jy, kx, ky, info;
+  doublecomplex temp1, temp2;
+  extern logical lsame_(char *, char *);
+  extern /* Subroutine */ void xerbla_(const char *, integer *);
 
-/*     .. Scalar Arguments .. */
-/*     .. */
-/*     .. Array Arguments .. */
-/*     .. */
+  /*     .. Scalar Arguments .. */
+  /*     .. */
+  /*     .. Array Arguments .. */
+  /*     .. */
 
-/*  Purpose */
-/*  ======= */
+  /*  Purpose */
+  /*  ======= */
 
-/*  ZHPMV  performs the matrix-vector operation */
+  /*  ZHPMV  performs the matrix-vector operation */
 
-/*     y := alpha*A*x + beta*y, */
+  /*     y := alpha*A*x + beta*y, */
 
-/*  where alpha and beta are scalars, x and y are n element vectors and */
-/*  A is an n by n hermitian matrix, supplied in packed form. */
+  /*  where alpha and beta are scalars, x and y are n element vectors and */
+  /*  A is an n by n hermitian matrix, supplied in packed form. */
 
-/*  Arguments */
-/*  ========== */
+  /*  Arguments */
+  /*  ========== */
 
-/*  UPLO   - CHARACTER*1. */
-/*           On entry, UPLO specifies whether the upper or lower */
-/*           triangular part of the matrix A is supplied in the packed */
-/*           array AP as follows: */
+  /*  UPLO   - CHARACTER*1. */
+  /*           On entry, UPLO specifies whether the upper or lower */
+  /*           triangular part of the matrix A is supplied in the packed */
+  /*           array AP as follows: */
 
-/*              UPLO = 'U' or 'u'   The upper triangular part of A is */
-/*                                  supplied in AP. */
+  /*              UPLO = 'U' or 'u'   The upper triangular part of A is */
+  /*                                  supplied in AP. */
 
-/*              UPLO = 'L' or 'l'   The lower triangular part of A is */
-/*                                  supplied in AP. */
+  /*              UPLO = 'L' or 'l'   The lower triangular part of A is */
+  /*                                  supplied in AP. */
 
-/*           Unchanged on exit. */
+  /*           Unchanged on exit. */
 
-/*  N      - INTEGER. */
-/*           On entry, N specifies the order of the matrix A. */
-/*           N must be at least zero. */
-/*           Unchanged on exit. */
+  /*  N      - INTEGER. */
+  /*           On entry, N specifies the order of the matrix A. */
+  /*           N must be at least zero. */
+  /*           Unchanged on exit. */
 
-/*  ALPHA  - COMPLEX*16      . */
-/*           On entry, ALPHA specifies the scalar alpha. */
-/*           Unchanged on exit. */
+  /*  ALPHA  - COMPLEX*16      . */
+  /*           On entry, ALPHA specifies the scalar alpha. */
+  /*           Unchanged on exit. */
 
-/*  AP     - COMPLEX*16       array of DIMENSION at least */
-/*           ( ( n*( n + 1 ) )/2 ). */
-/*           Before entry with UPLO = 'U' or 'u', the array AP must */
-/*           contain the upper triangular part of the hermitian matrix */
-/*           packed sequentially, column by column, so that AP( 1 ) */
-/*           contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 1, 2 ) */
-/*           and a( 2, 2 ) respectively, and so on. */
-/*           Before entry with UPLO = 'L' or 'l', the array AP must */
-/*           contain the lower triangular part of the hermitian matrix */
-/*           packed sequentially, column by column, so that AP( 1 ) */
-/*           contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 2, 1 ) */
-/*           and a( 3, 1 ) respectively, and so on. */
-/*           Note that the imaginary parts of the diagonal elements need */
-/*           not be set and are assumed to be zero. */
-/*           Unchanged on exit. */
+  /*  AP     - COMPLEX*16       array of DIMENSION at least */
+  /*           ( ( n*( n + 1 ) )/2 ). */
+  /*           Before entry with UPLO = 'U' or 'u', the array AP must */
+  /*           contain the upper triangular part of the hermitian matrix */
+  /*           packed sequentially, column by column, so that AP( 1 ) */
+  /*           contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 1, 2 ) */
+  /*           and a( 2, 2 ) respectively, and so on. */
+  /*           Before entry with UPLO = 'L' or 'l', the array AP must */
+  /*           contain the lower triangular part of the hermitian matrix */
+  /*           packed sequentially, column by column, so that AP( 1 ) */
+  /*           contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 2, 1 ) */
+  /*           and a( 3, 1 ) respectively, and so on. */
+  /*           Note that the imaginary parts of the diagonal elements need */
+  /*           not be set and are assumed to be zero. */
+  /*           Unchanged on exit. */
 
-/*  X      - COMPLEX*16       array of dimension at least */
-/*           ( 1 + ( n - 1 )*abs( INCX ) ). */
-/*           Before entry, the incremented array X must contain the n */
-/*           element vector x. */
-/*           Unchanged on exit. */
+  /*  X      - COMPLEX*16       array of dimension at least */
+  /*           ( 1 + ( n - 1 )*abs( INCX ) ). */
+  /*           Before entry, the incremented array X must contain the n */
+  /*           element vector x. */
+  /*           Unchanged on exit. */
 
-/*  INCX   - INTEGER. */
-/*           On entry, INCX specifies the increment for the elements of */
-/*           X. INCX must not be zero. */
-/*           Unchanged on exit. */
+  /*  INCX   - INTEGER. */
+  /*           On entry, INCX specifies the increment for the elements of */
+  /*           X. INCX must not be zero. */
+  /*           Unchanged on exit. */
 
-/*  BETA   - COMPLEX*16      . */
-/*           On entry, BETA specifies the scalar beta. When BETA is */
-/*           supplied as zero then Y need not be set on input. */
-/*           Unchanged on exit. */
+  /*  BETA   - COMPLEX*16      . */
+  /*           On entry, BETA specifies the scalar beta. When BETA is */
+  /*           supplied as zero then Y need not be set on input. */
+  /*           Unchanged on exit. */
 
-/*  Y      - COMPLEX*16       array of dimension at least */
-/*           ( 1 + ( n - 1 )*abs( INCY ) ). */
-/*           Before entry, the incremented array Y must contain the n */
-/*           element vector y. On exit, Y is overwritten by the updated */
-/*           vector y. */
+  /*  Y      - COMPLEX*16       array of dimension at least */
+  /*           ( 1 + ( n - 1 )*abs( INCY ) ). */
+  /*           Before entry, the incremented array Y must contain the n */
+  /*           element vector y. On exit, Y is overwritten by the updated */
+  /*           vector y. */
 
-/*  INCY   - INTEGER. */
-/*           On entry, INCY specifies the increment for the elements of */
-/*           Y. INCY must not be zero. */
-/*           Unchanged on exit. */
+  /*  INCY   - INTEGER. */
+  /*           On entry, INCY specifies the increment for the elements of */
+  /*           Y. INCY must not be zero. */
+  /*           Unchanged on exit. */
 
-/*  Further Details */
-/*  =============== */
+  /*  Further Details */
+  /*  =============== */
 
-/*  Level 2 Blas routine. */
+  /*  Level 2 Blas routine. */
 
-/*  -- Written on 22-October-1986. */
-/*     Jack Dongarra, Argonne National Lab. */
-/*     Jeremy Du Croz, Nag Central Office. */
-/*     Sven Hammarling, Nag Central Office. */
-/*     Richard Hanson, Sandia National Labs. */
+  /*  -- Written on 22-October-1986. */
+  /*     Jack Dongarra, Argonne National Lab. */
+  /*     Jeremy Du Croz, Nag Central Office. */
+  /*     Sven Hammarling, Nag Central Office. */
+  /*     Richard Hanson, Sandia National Labs. */
 
-/*  ===================================================================== */
+  /*  ===================================================================== */
 
-/*     .. Parameters .. */
-/*     .. */
-/*     .. Local Scalars .. */
-/*     .. */
-/*     .. External Functions .. */
-/*     .. */
-/*     .. External Subroutines .. */
-/*     .. */
-/*     .. Intrinsic Functions .. */
-/*     .. */
+  /*     .. Parameters .. */
+  /*     .. */
+  /*     .. Local Scalars .. */
+  /*     .. */
+  /*     .. External Functions .. */
+  /*     .. */
+  /*     .. External Subroutines .. */
+  /*     .. */
+  /*     .. Intrinsic Functions .. */
+  /*     .. */
 
-/*     Test the input parameters. */
+  /*     Test the input parameters. */
 
-    /* Parameter adjustments */
-    --y;
-    --x;
-    --ap;
+  /* Parameter adjustments */
+  --y;
+  --x;
+  --ap;
 
-    /* Function Body */
-    info = 0;
-    if (! lsame_(uplo, "U", (ftnlen)1, (ftnlen)1) && ! lsame_(uplo, "L", (
-	    ftnlen)1, (ftnlen)1)) {
-	info = 1;
-    } else if (*n < 0) {
-	info = 2;
-    } else if (*incx == 0) {
-	info = 6;
-    } else if (*incy == 0) {
-	info = 9;
-    }
-    if (info != 0) {
-	xerbla_("ZHPMV ", &info, (ftnlen)6);
-	return 0;
-    }
+  /* Function Body */
+  info = 0;
+  if (!lsame_(uplo, "U") && !lsame_(uplo, "L")) {
+    info = 1;
+  } else if (*n < 0) {
+    info = 2;
+  } else if (*incx == 0) {
+    info = 6;
+  } else if (*incy == 0) {
+    info = 9;
+  }
+  if (info != 0) {
+    xerbla_("ZHPMV ", &info);
+    return;
+  }
 
-/*     Quick return if possible. */
+  /*     Quick return if possible. */
 
-    if (*n == 0 || (alpha->r == 0. && alpha->i == 0. && (beta->r == 1. && 
-                                                         beta->i == 0.))) {
-	return 0;
-    }
+  if (*n == 0 || (alpha->r == 0. && alpha->i == 0. && (beta->r == 1. && beta->i == 0.))) {
+    return;
+  }
 
-/*     Set up the start points in  X  and  Y. */
+  /*     Set up the start points in  X  and  Y. */
 
-    if (*incx > 0) {
-	kx = 1;
+  if (*incx > 0) {
+    kx = 1;
+  } else {
+    kx = 1 - (*n - 1) * *incx;
+  }
+  if (*incy > 0) {
+    ky = 1;
+  } else {
+    ky = 1 - (*n - 1) * *incy;
+  }
+
+  /*     Start the operations. In this version the elements of the array AP */
+  /*     are accessed sequentially with one pass through AP. */
+
+  /*     First form  y := beta*y. */
+
+  if (beta->r != 1. || beta->i != 0.) {
+    if (*incy == 1) {
+      if (beta->r == 0. && beta->i == 0.) {
+        i__1 = *n;
+        for (i__ = 1; i__ <= i__1; ++i__) {
+          i__2 = i__;
+          y[i__2].r = 0., y[i__2].i = 0.;
+          /* L10: */
+        }
+      } else {
+        i__1 = *n;
+        for (i__ = 1; i__ <= i__1; ++i__) {
+          i__2 = i__;
+          i__3 = i__;
+          z__1.r = beta->r * y[i__3].r - beta->i * y[i__3].i, z__1.i = beta->r * y[i__3].i + beta->i * y[i__3].r;
+          y[i__2].r = z__1.r, y[i__2].i = z__1.i;
+          /* L20: */
+        }
+      }
     } else {
-	kx = 1 - (*n - 1) * *incx;
+      iy = ky;
+      if (beta->r == 0. && beta->i == 0.) {
+        i__1 = *n;
+        for (i__ = 1; i__ <= i__1; ++i__) {
+          i__2 = iy;
+          y[i__2].r = 0., y[i__2].i = 0.;
+          iy += *incy;
+          /* L30: */
+        }
+      } else {
+        i__1 = *n;
+        for (i__ = 1; i__ <= i__1; ++i__) {
+          i__2 = iy;
+          i__3 = iy;
+          z__1.r = beta->r * y[i__3].r - beta->i * y[i__3].i, z__1.i = beta->r * y[i__3].i + beta->i * y[i__3].r;
+          y[i__2].r = z__1.r, y[i__2].i = z__1.i;
+          iy += *incy;
+          /* L40: */
+        }
+      }
     }
-    if (*incy > 0) {
-	ky = 1;
+  }
+  if (alpha->r == 0. && alpha->i == 0.) {
+    return;
+  }
+  kk = 1;
+  if (lsame_(uplo, "U")) {
+    /*        Form  y  when AP contains the upper triangle. */
+
+    if (*incx == 1 && *incy == 1) {
+      i__1 = *n;
+      for (j = 1; j <= i__1; ++j) {
+        i__2 = j;
+        z__1.r = alpha->r * x[i__2].r - alpha->i * x[i__2].i, z__1.i = alpha->r * x[i__2].i + alpha->i * x[i__2].r;
+        temp1.r = z__1.r, temp1.i = z__1.i;
+        temp2.r = 0., temp2.i = 0.;
+        k = kk;
+        i__2 = j - 1;
+        for (i__ = 1; i__ <= i__2; ++i__) {
+          i__3 = i__;
+          i__4 = i__;
+          i__5 = k;
+          z__2.r = temp1.r * ap[i__5].r - temp1.i * ap[i__5].i, z__2.i = temp1.r * ap[i__5].i + temp1.i * ap[i__5].r;
+          z__1.r = y[i__4].r + z__2.r, z__1.i = y[i__4].i + z__2.i;
+          y[i__3].r = z__1.r, y[i__3].i = z__1.i;
+          d_cnjg(&z__3, &ap[k]);
+          i__3 = i__;
+          z__2.r = z__3.r * x[i__3].r - z__3.i * x[i__3].i, z__2.i = z__3.r * x[i__3].i + z__3.i * x[i__3].r;
+          z__1.r = temp2.r + z__2.r, z__1.i = temp2.i + z__2.i;
+          temp2.r = z__1.r, temp2.i = z__1.i;
+          ++k;
+          /* L50: */
+        }
+        i__2 = j;
+        i__3 = j;
+        i__4 = kk + j - 1;
+        d__1 = ap[i__4].r;
+        z__3.r = d__1 * temp1.r, z__3.i = d__1 * temp1.i;
+        z__2.r = y[i__3].r + z__3.r, z__2.i = y[i__3].i + z__3.i;
+        z__4.r = alpha->r * temp2.r - alpha->i * temp2.i, z__4.i = alpha->r * temp2.i + alpha->i * temp2.r;
+        z__1.r = z__2.r + z__4.r, z__1.i = z__2.i + z__4.i;
+        y[i__2].r = z__1.r, y[i__2].i = z__1.i;
+        kk += j;
+        /* L60: */
+      }
     } else {
-	ky = 1 - (*n - 1) * *incy;
+      jx = kx;
+      jy = ky;
+      i__1 = *n;
+      for (j = 1; j <= i__1; ++j) {
+        i__2 = jx;
+        z__1.r = alpha->r * x[i__2].r - alpha->i * x[i__2].i, z__1.i = alpha->r * x[i__2].i + alpha->i * x[i__2].r;
+        temp1.r = z__1.r, temp1.i = z__1.i;
+        temp2.r = 0., temp2.i = 0.;
+        ix = kx;
+        iy = ky;
+        i__2 = kk + j - 2;
+        for (k = kk; k <= i__2; ++k) {
+          i__3 = iy;
+          i__4 = iy;
+          i__5 = k;
+          z__2.r = temp1.r * ap[i__5].r - temp1.i * ap[i__5].i, z__2.i = temp1.r * ap[i__5].i + temp1.i * ap[i__5].r;
+          z__1.r = y[i__4].r + z__2.r, z__1.i = y[i__4].i + z__2.i;
+          y[i__3].r = z__1.r, y[i__3].i = z__1.i;
+          d_cnjg(&z__3, &ap[k]);
+          i__3 = ix;
+          z__2.r = z__3.r * x[i__3].r - z__3.i * x[i__3].i, z__2.i = z__3.r * x[i__3].i + z__3.i * x[i__3].r;
+          z__1.r = temp2.r + z__2.r, z__1.i = temp2.i + z__2.i;
+          temp2.r = z__1.r, temp2.i = z__1.i;
+          ix += *incx;
+          iy += *incy;
+          /* L70: */
+        }
+        i__2 = jy;
+        i__3 = jy;
+        i__4 = kk + j - 1;
+        d__1 = ap[i__4].r;
+        z__3.r = d__1 * temp1.r, z__3.i = d__1 * temp1.i;
+        z__2.r = y[i__3].r + z__3.r, z__2.i = y[i__3].i + z__3.i;
+        z__4.r = alpha->r * temp2.r - alpha->i * temp2.i, z__4.i = alpha->r * temp2.i + alpha->i * temp2.r;
+        z__1.r = z__2.r + z__4.r, z__1.i = z__2.i + z__4.i;
+        y[i__2].r = z__1.r, y[i__2].i = z__1.i;
+        jx += *incx;
+        jy += *incy;
+        kk += j;
+        /* L80: */
+      }
     }
+  } else {
+    /*        Form  y  when AP contains the lower triangle. */
 
-/*     Start the operations. In this version the elements of the array AP */
-/*     are accessed sequentially with one pass through AP. */
-
-/*     First form  y := beta*y. */
-
-    if (beta->r != 1. || beta->i != 0.) {
-	if (*incy == 1) {
-	    if (beta->r == 0. && beta->i == 0.) {
-		i__1 = *n;
-		for (i__ = 1; i__ <= i__1; ++i__) {
-		    i__2 = i__;
-		    y[i__2].r = 0., y[i__2].i = 0.;
-/* L10: */
-		}
-	    } else {
-		i__1 = *n;
-		for (i__ = 1; i__ <= i__1; ++i__) {
-		    i__2 = i__;
-		    i__3 = i__;
-		    z__1.r = beta->r * y[i__3].r - beta->i * y[i__3].i, 
-			    z__1.i = beta->r * y[i__3].i + beta->i * y[i__3]
-			    .r;
-		    y[i__2].r = z__1.r, y[i__2].i = z__1.i;
-/* L20: */
-		}
-	    }
-	} else {
-	    iy = ky;
-	    if (beta->r == 0. && beta->i == 0.) {
-		i__1 = *n;
-		for (i__ = 1; i__ <= i__1; ++i__) {
-		    i__2 = iy;
-		    y[i__2].r = 0., y[i__2].i = 0.;
-		    iy += *incy;
-/* L30: */
-		}
-	    } else {
-		i__1 = *n;
-		for (i__ = 1; i__ <= i__1; ++i__) {
-		    i__2 = iy;
-		    i__3 = iy;
-		    z__1.r = beta->r * y[i__3].r - beta->i * y[i__3].i, 
-			    z__1.i = beta->r * y[i__3].i + beta->i * y[i__3]
-			    .r;
-		    y[i__2].r = z__1.r, y[i__2].i = z__1.i;
-		    iy += *incy;
-/* L40: */
-		}
-	    }
-	}
-    }
-    if (alpha->r == 0. && alpha->i == 0.) {
-	return 0;
-    }
-    kk = 1;
-    if (lsame_(uplo, "U", (ftnlen)1, (ftnlen)1)) {
-
-/*        Form  y  when AP contains the upper triangle. */
-
-	if (*incx == 1 && *incy == 1) {
-	    i__1 = *n;
-	    for (j = 1; j <= i__1; ++j) {
-		i__2 = j;
-		z__1.r = alpha->r * x[i__2].r - alpha->i * x[i__2].i, z__1.i =
-			 alpha->r * x[i__2].i + alpha->i * x[i__2].r;
-		temp1.r = z__1.r, temp1.i = z__1.i;
-		temp2.r = 0., temp2.i = 0.;
-		k = kk;
-		i__2 = j - 1;
-		for (i__ = 1; i__ <= i__2; ++i__) {
-		    i__3 = i__;
-		    i__4 = i__;
-		    i__5 = k;
-		    z__2.r = temp1.r * ap[i__5].r - temp1.i * ap[i__5].i, 
-			    z__2.i = temp1.r * ap[i__5].i + temp1.i * ap[i__5]
-			    .r;
-		    z__1.r = y[i__4].r + z__2.r, z__1.i = y[i__4].i + z__2.i;
-		    y[i__3].r = z__1.r, y[i__3].i = z__1.i;
-		    d_cnjg(&z__3, &ap[k]);
-		    i__3 = i__;
-		    z__2.r = z__3.r * x[i__3].r - z__3.i * x[i__3].i, z__2.i =
-			     z__3.r * x[i__3].i + z__3.i * x[i__3].r;
-		    z__1.r = temp2.r + z__2.r, z__1.i = temp2.i + z__2.i;
-		    temp2.r = z__1.r, temp2.i = z__1.i;
-		    ++k;
-/* L50: */
-		}
-		i__2 = j;
-		i__3 = j;
-		i__4 = kk + j - 1;
-		d__1 = ap[i__4].r;
-		z__3.r = d__1 * temp1.r, z__3.i = d__1 * temp1.i;
-		z__2.r = y[i__3].r + z__3.r, z__2.i = y[i__3].i + z__3.i;
-		z__4.r = alpha->r * temp2.r - alpha->i * temp2.i, z__4.i = 
-			alpha->r * temp2.i + alpha->i * temp2.r;
-		z__1.r = z__2.r + z__4.r, z__1.i = z__2.i + z__4.i;
-		y[i__2].r = z__1.r, y[i__2].i = z__1.i;
-		kk += j;
-/* L60: */
-	    }
-	} else {
-	    jx = kx;
-	    jy = ky;
-	    i__1 = *n;
-	    for (j = 1; j <= i__1; ++j) {
-		i__2 = jx;
-		z__1.r = alpha->r * x[i__2].r - alpha->i * x[i__2].i, z__1.i =
-			 alpha->r * x[i__2].i + alpha->i * x[i__2].r;
-		temp1.r = z__1.r, temp1.i = z__1.i;
-		temp2.r = 0., temp2.i = 0.;
-		ix = kx;
-		iy = ky;
-		i__2 = kk + j - 2;
-		for (k = kk; k <= i__2; ++k) {
-		    i__3 = iy;
-		    i__4 = iy;
-		    i__5 = k;
-		    z__2.r = temp1.r * ap[i__5].r - temp1.i * ap[i__5].i, 
-			    z__2.i = temp1.r * ap[i__5].i + temp1.i * ap[i__5]
-			    .r;
-		    z__1.r = y[i__4].r + z__2.r, z__1.i = y[i__4].i + z__2.i;
-		    y[i__3].r = z__1.r, y[i__3].i = z__1.i;
-		    d_cnjg(&z__3, &ap[k]);
-		    i__3 = ix;
-		    z__2.r = z__3.r * x[i__3].r - z__3.i * x[i__3].i, z__2.i =
-			     z__3.r * x[i__3].i + z__3.i * x[i__3].r;
-		    z__1.r = temp2.r + z__2.r, z__1.i = temp2.i + z__2.i;
-		    temp2.r = z__1.r, temp2.i = z__1.i;
-		    ix += *incx;
-		    iy += *incy;
-/* L70: */
-		}
-		i__2 = jy;
-		i__3 = jy;
-		i__4 = kk + j - 1;
-		d__1 = ap[i__4].r;
-		z__3.r = d__1 * temp1.r, z__3.i = d__1 * temp1.i;
-		z__2.r = y[i__3].r + z__3.r, z__2.i = y[i__3].i + z__3.i;
-		z__4.r = alpha->r * temp2.r - alpha->i * temp2.i, z__4.i = 
-			alpha->r * temp2.i + alpha->i * temp2.r;
-		z__1.r = z__2.r + z__4.r, z__1.i = z__2.i + z__4.i;
-		y[i__2].r = z__1.r, y[i__2].i = z__1.i;
-		jx += *incx;
-		jy += *incy;
-		kk += j;
-/* L80: */
-	    }
-	}
+    if (*incx == 1 && *incy == 1) {
+      i__1 = *n;
+      for (j = 1; j <= i__1; ++j) {
+        i__2 = j;
+        z__1.r = alpha->r * x[i__2].r - alpha->i * x[i__2].i, z__1.i = alpha->r * x[i__2].i + alpha->i * x[i__2].r;
+        temp1.r = z__1.r, temp1.i = z__1.i;
+        temp2.r = 0., temp2.i = 0.;
+        i__2 = j;
+        i__3 = j;
+        i__4 = kk;
+        d__1 = ap[i__4].r;
+        z__2.r = d__1 * temp1.r, z__2.i = d__1 * temp1.i;
+        z__1.r = y[i__3].r + z__2.r, z__1.i = y[i__3].i + z__2.i;
+        y[i__2].r = z__1.r, y[i__2].i = z__1.i;
+        k = kk + 1;
+        i__2 = *n;
+        for (i__ = j + 1; i__ <= i__2; ++i__) {
+          i__3 = i__;
+          i__4 = i__;
+          i__5 = k;
+          z__2.r = temp1.r * ap[i__5].r - temp1.i * ap[i__5].i, z__2.i = temp1.r * ap[i__5].i + temp1.i * ap[i__5].r;
+          z__1.r = y[i__4].r + z__2.r, z__1.i = y[i__4].i + z__2.i;
+          y[i__3].r = z__1.r, y[i__3].i = z__1.i;
+          d_cnjg(&z__3, &ap[k]);
+          i__3 = i__;
+          z__2.r = z__3.r * x[i__3].r - z__3.i * x[i__3].i, z__2.i = z__3.r * x[i__3].i + z__3.i * x[i__3].r;
+          z__1.r = temp2.r + z__2.r, z__1.i = temp2.i + z__2.i;
+          temp2.r = z__1.r, temp2.i = z__1.i;
+          ++k;
+          /* L90: */
+        }
+        i__2 = j;
+        i__3 = j;
+        z__2.r = alpha->r * temp2.r - alpha->i * temp2.i, z__2.i = alpha->r * temp2.i + alpha->i * temp2.r;
+        z__1.r = y[i__3].r + z__2.r, z__1.i = y[i__3].i + z__2.i;
+        y[i__2].r = z__1.r, y[i__2].i = z__1.i;
+        kk += *n - j + 1;
+        /* L100: */
+      }
     } else {
-
-/*        Form  y  when AP contains the lower triangle. */
-
-	if (*incx == 1 && *incy == 1) {
-	    i__1 = *n;
-	    for (j = 1; j <= i__1; ++j) {
-		i__2 = j;
-		z__1.r = alpha->r * x[i__2].r - alpha->i * x[i__2].i, z__1.i =
-			 alpha->r * x[i__2].i + alpha->i * x[i__2].r;
-		temp1.r = z__1.r, temp1.i = z__1.i;
-		temp2.r = 0., temp2.i = 0.;
-		i__2 = j;
-		i__3 = j;
-		i__4 = kk;
-		d__1 = ap[i__4].r;
-		z__2.r = d__1 * temp1.r, z__2.i = d__1 * temp1.i;
-		z__1.r = y[i__3].r + z__2.r, z__1.i = y[i__3].i + z__2.i;
-		y[i__2].r = z__1.r, y[i__2].i = z__1.i;
-		k = kk + 1;
-		i__2 = *n;
-		for (i__ = j + 1; i__ <= i__2; ++i__) {
-		    i__3 = i__;
-		    i__4 = i__;
-		    i__5 = k;
-		    z__2.r = temp1.r * ap[i__5].r - temp1.i * ap[i__5].i, 
-			    z__2.i = temp1.r * ap[i__5].i + temp1.i * ap[i__5]
-			    .r;
-		    z__1.r = y[i__4].r + z__2.r, z__1.i = y[i__4].i + z__2.i;
-		    y[i__3].r = z__1.r, y[i__3].i = z__1.i;
-		    d_cnjg(&z__3, &ap[k]);
-		    i__3 = i__;
-		    z__2.r = z__3.r * x[i__3].r - z__3.i * x[i__3].i, z__2.i =
-			     z__3.r * x[i__3].i + z__3.i * x[i__3].r;
-		    z__1.r = temp2.r + z__2.r, z__1.i = temp2.i + z__2.i;
-		    temp2.r = z__1.r, temp2.i = z__1.i;
-		    ++k;
-/* L90: */
-		}
-		i__2 = j;
-		i__3 = j;
-		z__2.r = alpha->r * temp2.r - alpha->i * temp2.i, z__2.i = 
-			alpha->r * temp2.i + alpha->i * temp2.r;
-		z__1.r = y[i__3].r + z__2.r, z__1.i = y[i__3].i + z__2.i;
-		y[i__2].r = z__1.r, y[i__2].i = z__1.i;
-		kk += *n - j + 1;
-/* L100: */
-	    }
-	} else {
-	    jx = kx;
-	    jy = ky;
-	    i__1 = *n;
-	    for (j = 1; j <= i__1; ++j) {
-		i__2 = jx;
-		z__1.r = alpha->r * x[i__2].r - alpha->i * x[i__2].i, z__1.i =
-			 alpha->r * x[i__2].i + alpha->i * x[i__2].r;
-		temp1.r = z__1.r, temp1.i = z__1.i;
-		temp2.r = 0., temp2.i = 0.;
-		i__2 = jy;
-		i__3 = jy;
-		i__4 = kk;
-		d__1 = ap[i__4].r;
-		z__2.r = d__1 * temp1.r, z__2.i = d__1 * temp1.i;
-		z__1.r = y[i__3].r + z__2.r, z__1.i = y[i__3].i + z__2.i;
-		y[i__2].r = z__1.r, y[i__2].i = z__1.i;
-		ix = jx;
-		iy = jy;
-		i__2 = kk + *n - j;
-		for (k = kk + 1; k <= i__2; ++k) {
-		    ix += *incx;
-		    iy += *incy;
-		    i__3 = iy;
-		    i__4 = iy;
-		    i__5 = k;
-		    z__2.r = temp1.r * ap[i__5].r - temp1.i * ap[i__5].i, 
-			    z__2.i = temp1.r * ap[i__5].i + temp1.i * ap[i__5]
-			    .r;
-		    z__1.r = y[i__4].r + z__2.r, z__1.i = y[i__4].i + z__2.i;
-		    y[i__3].r = z__1.r, y[i__3].i = z__1.i;
-		    d_cnjg(&z__3, &ap[k]);
-		    i__3 = ix;
-		    z__2.r = z__3.r * x[i__3].r - z__3.i * x[i__3].i, z__2.i =
-			     z__3.r * x[i__3].i + z__3.i * x[i__3].r;
-		    z__1.r = temp2.r + z__2.r, z__1.i = temp2.i + z__2.i;
-		    temp2.r = z__1.r, temp2.i = z__1.i;
-/* L110: */
-		}
-		i__2 = jy;
-		i__3 = jy;
-		z__2.r = alpha->r * temp2.r - alpha->i * temp2.i, z__2.i = 
-			alpha->r * temp2.i + alpha->i * temp2.r;
-		z__1.r = y[i__3].r + z__2.r, z__1.i = y[i__3].i + z__2.i;
-		y[i__2].r = z__1.r, y[i__2].i = z__1.i;
-		jx += *incx;
-		jy += *incy;
-		kk += *n - j + 1;
-/* L120: */
-	    }
-	}
+      jx = kx;
+      jy = ky;
+      i__1 = *n;
+      for (j = 1; j <= i__1; ++j) {
+        i__2 = jx;
+        z__1.r = alpha->r * x[i__2].r - alpha->i * x[i__2].i, z__1.i = alpha->r * x[i__2].i + alpha->i * x[i__2].r;
+        temp1.r = z__1.r, temp1.i = z__1.i;
+        temp2.r = 0., temp2.i = 0.;
+        i__2 = jy;
+        i__3 = jy;
+        i__4 = kk;
+        d__1 = ap[i__4].r;
+        z__2.r = d__1 * temp1.r, z__2.i = d__1 * temp1.i;
+        z__1.r = y[i__3].r + z__2.r, z__1.i = y[i__3].i + z__2.i;
+        y[i__2].r = z__1.r, y[i__2].i = z__1.i;
+        ix = jx;
+        iy = jy;
+        i__2 = kk + *n - j;
+        for (k = kk + 1; k <= i__2; ++k) {
+          ix += *incx;
+          iy += *incy;
+          i__3 = iy;
+          i__4 = iy;
+          i__5 = k;
+          z__2.r = temp1.r * ap[i__5].r - temp1.i * ap[i__5].i, z__2.i = temp1.r * ap[i__5].i + temp1.i * ap[i__5].r;
+          z__1.r = y[i__4].r + z__2.r, z__1.i = y[i__4].i + z__2.i;
+          y[i__3].r = z__1.r, y[i__3].i = z__1.i;
+          d_cnjg(&z__3, &ap[k]);
+          i__3 = ix;
+          z__2.r = z__3.r * x[i__3].r - z__3.i * x[i__3].i, z__2.i = z__3.r * x[i__3].i + z__3.i * x[i__3].r;
+          z__1.r = temp2.r + z__2.r, z__1.i = temp2.i + z__2.i;
+          temp2.r = z__1.r, temp2.i = z__1.i;
+          /* L110: */
+        }
+        i__2 = jy;
+        i__3 = jy;
+        z__2.r = alpha->r * temp2.r - alpha->i * temp2.i, z__2.i = alpha->r * temp2.i + alpha->i * temp2.r;
+        z__1.r = y[i__3].r + z__2.r, z__1.i = y[i__3].i + z__2.i;
+        y[i__2].r = z__1.r, y[i__2].i = z__1.i;
+        jx += *incx;
+        jy += *incy;
+        kk += *n - j + 1;
+        /* L120: */
+      }
     }
+  }
 
-    return 0;
-
-/*     End of ZHPMV . */
+  /*     End of ZHPMV . */
 
 } /* zhpmv_ */
-

diff --git a/blas/f2c/ztbmv.c b/blas/f2c/ztbmv.c
index 3bf0beb..2fbfa00 100644
--- a/blas/f2c/ztbmv.c
+++ b/blas/f2c/ztbmv.c

@@ -1,647 +1,586 @@
 /* ztbmv.f -- translated by f2c (version 20100827).
    You must link the resulting object file with libf2c:
-	on Microsoft Windows system, link with libf2c.lib;
-	on Linux or Unix systems, link with .../path/to/libf2c.a -lm
-	or, if you install libf2c.a in a standard place, with -lf2c -lm
-	-- in that order, at the end of the command line, as in
-		cc *.o -lf2c -lm
-	Source for libf2c is in /netlib/f2c/libf2c.zip, e.g.,
+        on Microsoft Windows system, link with libf2c.lib;
+        on Linux or Unix systems, link with .../path/to/libf2c.a -lm
+        or, if you install libf2c.a in a standard place, with -lf2c -lm
+        -- in that order, at the end of the command line, as in
+                cc *.o -lf2c -lm
+        Source for libf2c is in /netlib/f2c/libf2c.zip, e.g.,
 
-		http://www.netlib.org/f2c/libf2c.zip
+                http://www.netlib.org/f2c/libf2c.zip
 */
 
 #include "datatypes.h"
 
-/* Subroutine */ int ztbmv_(char *uplo, char *trans, char *diag, integer *n, 
-	integer *k, doublecomplex *a, integer *lda, doublecomplex *x, integer 
-	*incx, ftnlen uplo_len, ftnlen trans_len, ftnlen diag_len)
-{
-    /* System generated locals */
-    integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5;
-    doublecomplex z__1, z__2, z__3;
+static inline void d_cnjg(doublecomplex *r, doublecomplex *z) {
+  r->r = z->r;
+  r->i = -(z->i);
+}
 
-    /* Builtin functions */
-    void d_cnjg(doublecomplex *, doublecomplex *);
+/* Subroutine */ void ztbmv_(char *uplo, char *trans, char *diag, integer *n, integer *k, doublecomplex *a,
+                             integer *lda, doublecomplex *x, integer *incx) {
+  /* System generated locals */
+  integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5;
+  doublecomplex z__1, z__2, z__3;
 
-    /* Local variables */
-    integer i__, j, l, ix, jx, kx, info;
-    doublecomplex temp;
-    extern logical lsame_(char *, char *, ftnlen, ftnlen);
-    integer kplus1;
-    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
-    logical noconj, nounit;
+  /* Local variables */
+  integer i__, j, l, ix, jx, kx, info;
+  doublecomplex temp;
+  extern logical lsame_(char *, char *);
+  integer kplus1;
+  extern /* Subroutine */ void xerbla_(const char *, integer *);
+  logical noconj, nounit;
 
-/*     .. Scalar Arguments .. */
-/*     .. */
-/*     .. Array Arguments .. */
-/*     .. */
+  /*     .. Scalar Arguments .. */
+  /*     .. */
+  /*     .. Array Arguments .. */
+  /*     .. */
 
-/*  Purpose */
-/*  ======= */
+  /*  Purpose */
+  /*  ======= */
 
-/*  ZTBMV  performs one of the matrix-vector operations */
+  /*  ZTBMV  performs one of the matrix-vector operations */
 
-/*     x := A*x,   or   x := A'*x,   or   x := conjg( A' )*x, */
+  /*     x := A*x,   or   x := A'*x,   or   x := conjg( A' )*x, */
 
-/*  where x is an n element vector and  A is an n by n unit, or non-unit, */
-/*  upper or lower triangular band matrix, with ( k + 1 ) diagonals. */
+  /*  where x is an n element vector and  A is an n by n unit, or non-unit, */
+  /*  upper or lower triangular band matrix, with ( k + 1 ) diagonals. */
 
-/*  Arguments */
-/*  ========== */
+  /*  Arguments */
+  /*  ========== */
 
-/*  UPLO   - CHARACTER*1. */
-/*           On entry, UPLO specifies whether the matrix is an upper or */
-/*           lower triangular matrix as follows: */
+  /*  UPLO   - CHARACTER*1. */
+  /*           On entry, UPLO specifies whether the matrix is an upper or */
+  /*           lower triangular matrix as follows: */
 
-/*              UPLO = 'U' or 'u'   A is an upper triangular matrix. */
+  /*              UPLO = 'U' or 'u'   A is an upper triangular matrix. */
 
-/*              UPLO = 'L' or 'l'   A is a lower triangular matrix. */
+  /*              UPLO = 'L' or 'l'   A is a lower triangular matrix. */
 
-/*           Unchanged on exit. */
+  /*           Unchanged on exit. */
 
-/*  TRANS  - CHARACTER*1. */
-/*           On entry, TRANS specifies the operation to be performed as */
-/*           follows: */
+  /*  TRANS  - CHARACTER*1. */
+  /*           On entry, TRANS specifies the operation to be performed as */
+  /*           follows: */
 
-/*              TRANS = 'N' or 'n'   x := A*x. */
+  /*              TRANS = 'N' or 'n'   x := A*x. */
 
-/*              TRANS = 'T' or 't'   x := A'*x. */
+  /*              TRANS = 'T' or 't'   x := A'*x. */
 
-/*              TRANS = 'C' or 'c'   x := conjg( A' )*x. */
+  /*              TRANS = 'C' or 'c'   x := conjg( A' )*x. */
 
-/*           Unchanged on exit. */
+  /*           Unchanged on exit. */
 
-/*  DIAG   - CHARACTER*1. */
-/*           On entry, DIAG specifies whether or not A is unit */
-/*           triangular as follows: */
+  /*  DIAG   - CHARACTER*1. */
+  /*           On entry, DIAG specifies whether or not A is unit */
+  /*           triangular as follows: */
 
-/*              DIAG = 'U' or 'u'   A is assumed to be unit triangular. */
+  /*              DIAG = 'U' or 'u'   A is assumed to be unit triangular. */
 
-/*              DIAG = 'N' or 'n'   A is not assumed to be unit */
-/*                                  triangular. */
+  /*              DIAG = 'N' or 'n'   A is not assumed to be unit */
+  /*                                  triangular. */
 
-/*           Unchanged on exit. */
+  /*           Unchanged on exit. */
 
-/*  N      - INTEGER. */
-/*           On entry, N specifies the order of the matrix A. */
-/*           N must be at least zero. */
-/*           Unchanged on exit. */
+  /*  N      - INTEGER. */
+  /*           On entry, N specifies the order of the matrix A. */
+  /*           N must be at least zero. */
+  /*           Unchanged on exit. */
 
-/*  K      - INTEGER. */
-/*           On entry with UPLO = 'U' or 'u', K specifies the number of */
-/*           super-diagonals of the matrix A. */
-/*           On entry with UPLO = 'L' or 'l', K specifies the number of */
-/*           sub-diagonals of the matrix A. */
-/*           K must satisfy  0 .le. K. */
-/*           Unchanged on exit. */
+  /*  K      - INTEGER. */
+  /*           On entry with UPLO = 'U' or 'u', K specifies the number of */
+  /*           super-diagonals of the matrix A. */
+  /*           On entry with UPLO = 'L' or 'l', K specifies the number of */
+  /*           sub-diagonals of the matrix A. */
+  /*           K must satisfy  0 .le. K. */
+  /*           Unchanged on exit. */
 
-/*  A      - COMPLEX*16       array of DIMENSION ( LDA, n ). */
-/*           Before entry with UPLO = 'U' or 'u', the leading ( k + 1 ) */
-/*           by n part of the array A must contain the upper triangular */
-/*           band part of the matrix of coefficients, supplied column by */
-/*           column, with the leading diagonal of the matrix in row */
-/*           ( k + 1 ) of the array, the first super-diagonal starting at */
-/*           position 2 in row k, and so on. The top left k by k triangle */
-/*           of the array A is not referenced. */
-/*           The following program segment will transfer an upper */
-/*           triangular band matrix from conventional full matrix storage */
-/*           to band storage: */
+  /*  A      - COMPLEX*16       array of DIMENSION ( LDA, n ). */
+  /*           Before entry with UPLO = 'U' or 'u', the leading ( k + 1 ) */
+  /*           by n part of the array A must contain the upper triangular */
+  /*           band part of the matrix of coefficients, supplied column by */
+  /*           column, with the leading diagonal of the matrix in row */
+  /*           ( k + 1 ) of the array, the first super-diagonal starting at */
+  /*           position 2 in row k, and so on. The top left k by k triangle */
+  /*           of the array A is not referenced. */
+  /*           The following program segment will transfer an upper */
+  /*           triangular band matrix from conventional full matrix storage */
+  /*           to band storage: */
 
-/*                 DO 20, J = 1, N */
-/*                    M = K + 1 - J */
-/*                    DO 10, I = MAX( 1, J - K ), J */
-/*                       A( M + I, J ) = matrix( I, J ) */
-/*              10    CONTINUE */
-/*              20 CONTINUE */
+  /*                 DO 20, J = 1, N */
+  /*                    M = K + 1 - J */
+  /*                    DO 10, I = MAX( 1, J - K ), J */
+  /*                       A( M + I, J ) = matrix( I, J ) */
+  /*              10    CONTINUE */
+  /*              20 CONTINUE */
 
-/*           Before entry with UPLO = 'L' or 'l', the leading ( k + 1 ) */
-/*           by n part of the array A must contain the lower triangular */
-/*           band part of the matrix of coefficients, supplied column by */
-/*           column, with the leading diagonal of the matrix in row 1 of */
-/*           the array, the first sub-diagonal starting at position 1 in */
-/*           row 2, and so on. The bottom right k by k triangle of the */
-/*           array A is not referenced. */
-/*           The following program segment will transfer a lower */
-/*           triangular band matrix from conventional full matrix storage */
-/*           to band storage: */
+  /*           Before entry with UPLO = 'L' or 'l', the leading ( k + 1 ) */
+  /*           by n part of the array A must contain the lower triangular */
+  /*           band part of the matrix of coefficients, supplied column by */
+  /*           column, with the leading diagonal of the matrix in row 1 of */
+  /*           the array, the first sub-diagonal starting at position 1 in */
+  /*           row 2, and so on. The bottom right k by k triangle of the */
+  /*           array A is not referenced. */
+  /*           The following program segment will transfer a lower */
+  /*           triangular band matrix from conventional full matrix storage */
+  /*           to band storage: */
 
-/*                 DO 20, J = 1, N */
-/*                    M = 1 - J */
-/*                    DO 10, I = J, MIN( N, J + K ) */
-/*                       A( M + I, J ) = matrix( I, J ) */
-/*              10    CONTINUE */
-/*              20 CONTINUE */
+  /*                 DO 20, J = 1, N */
+  /*                    M = 1 - J */
+  /*                    DO 10, I = J, MIN( N, J + K ) */
+  /*                       A( M + I, J ) = matrix( I, J ) */
+  /*              10    CONTINUE */
+  /*              20 CONTINUE */
 
-/*           Note that when DIAG = 'U' or 'u' the elements of the array A */
-/*           corresponding to the diagonal elements of the matrix are not */
-/*           referenced, but are assumed to be unity. */
-/*           Unchanged on exit. */
+  /*           Note that when DIAG = 'U' or 'u' the elements of the array A */
+  /*           corresponding to the diagonal elements of the matrix are not */
+  /*           referenced, but are assumed to be unity. */
+  /*           Unchanged on exit. */
 
-/*  LDA    - INTEGER. */
-/*           On entry, LDA specifies the first dimension of A as declared */
-/*           in the calling (sub) program. LDA must be at least */
-/*           ( k + 1 ). */
-/*           Unchanged on exit. */
+  /*  LDA    - INTEGER. */
+  /*           On entry, LDA specifies the first dimension of A as declared */
+  /*           in the calling (sub) program. LDA must be at least */
+  /*           ( k + 1 ). */
+  /*           Unchanged on exit. */
 
-/*  X      - COMPLEX*16       array of dimension at least */
-/*           ( 1 + ( n - 1 )*abs( INCX ) ). */
-/*           Before entry, the incremented array X must contain the n */
-/*           element vector x. On exit, X is overwritten with the */
-/*           transformed vector x. */
+  /*  X      - COMPLEX*16       array of dimension at least */
+  /*           ( 1 + ( n - 1 )*abs( INCX ) ). */
+  /*           Before entry, the incremented array X must contain the n */
+  /*           element vector x. On exit, X is overwritten with the */
+  /*           transformed vector x. */
 
-/*  INCX   - INTEGER. */
-/*           On entry, INCX specifies the increment for the elements of */
-/*           X. INCX must not be zero. */
-/*           Unchanged on exit. */
+  /*  INCX   - INTEGER. */
+  /*           On entry, INCX specifies the increment for the elements of */
+  /*           X. INCX must not be zero. */
+  /*           Unchanged on exit. */
 
-/*  Further Details */
-/*  =============== */
+  /*  Further Details */
+  /*  =============== */
 
-/*  Level 2 Blas routine. */
+  /*  Level 2 Blas routine. */
 
-/*  -- Written on 22-October-1986. */
-/*     Jack Dongarra, Argonne National Lab. */
-/*     Jeremy Du Croz, Nag Central Office. */
-/*     Sven Hammarling, Nag Central Office. */
-/*     Richard Hanson, Sandia National Labs. */
+  /*  -- Written on 22-October-1986. */
+  /*     Jack Dongarra, Argonne National Lab. */
+  /*     Jeremy Du Croz, Nag Central Office. */
+  /*     Sven Hammarling, Nag Central Office. */
+  /*     Richard Hanson, Sandia National Labs. */
 
-/*  ===================================================================== */
+  /*  ===================================================================== */
 
-/*     .. Parameters .. */
-/*     .. */
-/*     .. Local Scalars .. */
-/*     .. */
-/*     .. External Functions .. */
-/*     .. */
-/*     .. External Subroutines .. */
-/*     .. */
-/*     .. Intrinsic Functions .. */
-/*     .. */
+  /*     .. Parameters .. */
+  /*     .. */
+  /*     .. Local Scalars .. */
+  /*     .. */
+  /*     .. External Functions .. */
+  /*     .. */
+  /*     .. External Subroutines .. */
+  /*     .. */
+  /*     .. Intrinsic Functions .. */
+  /*     .. */
 
-/*     Test the input parameters. */
+  /*     Test the input parameters. */
 
-    /* Parameter adjustments */
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1;
-    a -= a_offset;
-    --x;
+  /* Parameter adjustments */
+  a_dim1 = *lda;
+  a_offset = 1 + a_dim1;
+  a -= a_offset;
+  --x;
 
-    /* Function Body */
-    info = 0;
-    if (! lsame_(uplo, "U", (ftnlen)1, (ftnlen)1) && ! lsame_(uplo, "L", (
-	    ftnlen)1, (ftnlen)1)) {
-	info = 1;
-    } else if (! lsame_(trans, "N", (ftnlen)1, (ftnlen)1) && ! lsame_(trans, 
-	    "T", (ftnlen)1, (ftnlen)1) && ! lsame_(trans, "C", (ftnlen)1, (
-	    ftnlen)1)) {
-	info = 2;
-    } else if (! lsame_(diag, "U", (ftnlen)1, (ftnlen)1) && ! lsame_(diag, 
-	    "N", (ftnlen)1, (ftnlen)1)) {
-	info = 3;
-    } else if (*n < 0) {
-	info = 4;
-    } else if (*k < 0) {
-	info = 5;
-    } else if (*lda < *k + 1) {
-	info = 7;
-    } else if (*incx == 0) {
-	info = 9;
-    }
-    if (info != 0) {
-	xerbla_("ZTBMV ", &info, (ftnlen)6);
-	return 0;
-    }
+  /* Function Body */
+  info = 0;
+  if (!lsame_(uplo, "U") && !lsame_(uplo, "L")) {
+    info = 1;
+  } else if (!lsame_(trans, "N") && !lsame_(trans, "T") && !lsame_(trans, "C")) {
+    info = 2;
+  } else if (!lsame_(diag, "U") && !lsame_(diag, "N")) {
+    info = 3;
+  } else if (*n < 0) {
+    info = 4;
+  } else if (*k < 0) {
+    info = 5;
+  } else if (*lda < *k + 1) {
+    info = 7;
+  } else if (*incx == 0) {
+    info = 9;
+  }
+  if (info != 0) {
+    xerbla_("ZTBMV ", &info);
+    return;
+  }
 
-/*     Quick return if possible. */
+  /*     Quick return if possible. */
 
-    if (*n == 0) {
-	return 0;
-    }
+  if (*n == 0) {
+    return;
+  }
 
-    noconj = lsame_(trans, "T", (ftnlen)1, (ftnlen)1);
-    nounit = lsame_(diag, "N", (ftnlen)1, (ftnlen)1);
+  noconj = lsame_(trans, "T");
+  nounit = lsame_(diag, "N");
 
-/*     Set up the start point in X if the increment is not unity. This */
-/*     will be  ( N - 1 )*INCX   too small for descending loops. */
+  /*     Set up the start point in X if the increment is not unity. This */
+  /*     will be  ( N - 1 )*INCX   too small for descending loops. */
 
-    if (*incx <= 0) {
-	kx = 1 - (*n - 1) * *incx;
-    } else if (*incx != 1) {
-	kx = 1;
-    }
+  if (*incx <= 0) {
+    kx = 1 - (*n - 1) * *incx;
+  } else if (*incx != 1) {
+    kx = 1;
+  }
 
-/*     Start the operations. In this version the elements of A are */
-/*     accessed sequentially with one pass through A. */
+  /*     Start the operations. In this version the elements of A are */
+  /*     accessed sequentially with one pass through A. */
 
-    if (lsame_(trans, "N", (ftnlen)1, (ftnlen)1)) {
+  if (lsame_(trans, "N")) {
+    /*         Form  x := A*x. */
 
-/*         Form  x := A*x. */
-
-	if (lsame_(uplo, "U", (ftnlen)1, (ftnlen)1)) {
-	    kplus1 = *k + 1;
-	    if (*incx == 1) {
-		i__1 = *n;
-		for (j = 1; j <= i__1; ++j) {
-		    i__2 = j;
-		    if (x[i__2].r != 0. || x[i__2].i != 0.) {
-			i__2 = j;
-			temp.r = x[i__2].r, temp.i = x[i__2].i;
-			l = kplus1 - j;
-/* Computing MAX */
-			i__2 = 1, i__3 = j - *k;
-			i__4 = j - 1;
-			for (i__ = max(i__2,i__3); i__ <= i__4; ++i__) {
-			    i__2 = i__;
-			    i__3 = i__;
-			    i__5 = l + i__ + j * a_dim1;
-			    z__2.r = temp.r * a[i__5].r - temp.i * a[i__5].i, 
-				    z__2.i = temp.r * a[i__5].i + temp.i * a[
-				    i__5].r;
-			    z__1.r = x[i__3].r + z__2.r, z__1.i = x[i__3].i + 
-				    z__2.i;
-			    x[i__2].r = z__1.r, x[i__2].i = z__1.i;
-/* L10: */
-			}
-			if (nounit) {
-			    i__4 = j;
-			    i__2 = j;
-			    i__3 = kplus1 + j * a_dim1;
-			    z__1.r = x[i__2].r * a[i__3].r - x[i__2].i * a[
-				    i__3].i, z__1.i = x[i__2].r * a[i__3].i + 
-				    x[i__2].i * a[i__3].r;
-			    x[i__4].r = z__1.r, x[i__4].i = z__1.i;
-			}
-		    }
-/* L20: */
-		}
-	    } else {
-		jx = kx;
-		i__1 = *n;
-		for (j = 1; j <= i__1; ++j) {
-		    i__4 = jx;
-		    if (x[i__4].r != 0. || x[i__4].i != 0.) {
-			i__4 = jx;
-			temp.r = x[i__4].r, temp.i = x[i__4].i;
-			ix = kx;
-			l = kplus1 - j;
-/* Computing MAX */
-			i__4 = 1, i__2 = j - *k;
-			i__3 = j - 1;
-			for (i__ = max(i__4,i__2); i__ <= i__3; ++i__) {
-			    i__4 = ix;
-			    i__2 = ix;
-			    i__5 = l + i__ + j * a_dim1;
-			    z__2.r = temp.r * a[i__5].r - temp.i * a[i__5].i, 
-				    z__2.i = temp.r * a[i__5].i + temp.i * a[
-				    i__5].r;
-			    z__1.r = x[i__2].r + z__2.r, z__1.i = x[i__2].i + 
-				    z__2.i;
-			    x[i__4].r = z__1.r, x[i__4].i = z__1.i;
-			    ix += *incx;
-/* L30: */
-			}
-			if (nounit) {
-			    i__3 = jx;
-			    i__4 = jx;
-			    i__2 = kplus1 + j * a_dim1;
-			    z__1.r = x[i__4].r * a[i__2].r - x[i__4].i * a[
-				    i__2].i, z__1.i = x[i__4].r * a[i__2].i + 
-				    x[i__4].i * a[i__2].r;
-			    x[i__3].r = z__1.r, x[i__3].i = z__1.i;
-			}
-		    }
-		    jx += *incx;
-		    if (j > *k) {
-			kx += *incx;
-		    }
-/* L40: */
-		}
-	    }
-	} else {
-	    if (*incx == 1) {
-		for (j = *n; j >= 1; --j) {
-		    i__1 = j;
-		    if (x[i__1].r != 0. || x[i__1].i != 0.) {
-			i__1 = j;
-			temp.r = x[i__1].r, temp.i = x[i__1].i;
-			l = 1 - j;
-/* Computing MIN */
-			i__1 = *n, i__3 = j + *k;
-			i__4 = j + 1;
-			for (i__ = min(i__1,i__3); i__ >= i__4; --i__) {
-			    i__1 = i__;
-			    i__3 = i__;
-			    i__2 = l + i__ + j * a_dim1;
-			    z__2.r = temp.r * a[i__2].r - temp.i * a[i__2].i, 
-				    z__2.i = temp.r * a[i__2].i + temp.i * a[
-				    i__2].r;
-			    z__1.r = x[i__3].r + z__2.r, z__1.i = x[i__3].i + 
-				    z__2.i;
-			    x[i__1].r = z__1.r, x[i__1].i = z__1.i;
-/* L50: */
-			}
-			if (nounit) {
-			    i__4 = j;
-			    i__1 = j;
-			    i__3 = j * a_dim1 + 1;
-			    z__1.r = x[i__1].r * a[i__3].r - x[i__1].i * a[
-				    i__3].i, z__1.i = x[i__1].r * a[i__3].i + 
-				    x[i__1].i * a[i__3].r;
-			    x[i__4].r = z__1.r, x[i__4].i = z__1.i;
-			}
-		    }
-/* L60: */
-		}
-	    } else {
-		kx += (*n - 1) * *incx;
-		jx = kx;
-		for (j = *n; j >= 1; --j) {
-		    i__4 = jx;
-		    if (x[i__4].r != 0. || x[i__4].i != 0.) {
-			i__4 = jx;
-			temp.r = x[i__4].r, temp.i = x[i__4].i;
-			ix = kx;
-			l = 1 - j;
-/* Computing MIN */
-			i__4 = *n, i__1 = j + *k;
-			i__3 = j + 1;
-			for (i__ = min(i__4,i__1); i__ >= i__3; --i__) {
-			    i__4 = ix;
-			    i__1 = ix;
-			    i__2 = l + i__ + j * a_dim1;
-			    z__2.r = temp.r * a[i__2].r - temp.i * a[i__2].i, 
-				    z__2.i = temp.r * a[i__2].i + temp.i * a[
-				    i__2].r;
-			    z__1.r = x[i__1].r + z__2.r, z__1.i = x[i__1].i + 
-				    z__2.i;
-			    x[i__4].r = z__1.r, x[i__4].i = z__1.i;
-			    ix -= *incx;
-/* L70: */
-			}
-			if (nounit) {
-			    i__3 = jx;
-			    i__4 = jx;
-			    i__1 = j * a_dim1 + 1;
-			    z__1.r = x[i__4].r * a[i__1].r - x[i__4].i * a[
-				    i__1].i, z__1.i = x[i__4].r * a[i__1].i + 
-				    x[i__4].i * a[i__1].r;
-			    x[i__3].r = z__1.r, x[i__3].i = z__1.i;
-			}
-		    }
-		    jx -= *incx;
-		    if (*n - j >= *k) {
-			kx -= *incx;
-		    }
-/* L80: */
-		}
-	    }
-	}
+    if (lsame_(uplo, "U")) {
+      kplus1 = *k + 1;
+      if (*incx == 1) {
+        i__1 = *n;
+        for (j = 1; j <= i__1; ++j) {
+          i__2 = j;
+          if (x[i__2].r != 0. || x[i__2].i != 0.) {
+            i__2 = j;
+            temp.r = x[i__2].r, temp.i = x[i__2].i;
+            l = kplus1 - j;
+            /* Computing MAX */
+            i__2 = 1, i__3 = j - *k;
+            i__4 = j - 1;
+            for (i__ = max(i__2, i__3); i__ <= i__4; ++i__) {
+              i__2 = i__;
+              i__3 = i__;
+              i__5 = l + i__ + j * a_dim1;
+              z__2.r = temp.r * a[i__5].r - temp.i * a[i__5].i, z__2.i = temp.r * a[i__5].i + temp.i * a[i__5].r;
+              z__1.r = x[i__3].r + z__2.r, z__1.i = x[i__3].i + z__2.i;
+              x[i__2].r = z__1.r, x[i__2].i = z__1.i;
+              /* L10: */
+            }
+            if (nounit) {
+              i__4 = j;
+              i__2 = j;
+              i__3 = kplus1 + j * a_dim1;
+              z__1.r = x[i__2].r * a[i__3].r - x[i__2].i * a[i__3].i,
+              z__1.i = x[i__2].r * a[i__3].i + x[i__2].i * a[i__3].r;
+              x[i__4].r = z__1.r, x[i__4].i = z__1.i;
+            }
+          }
+          /* L20: */
+        }
+      } else {
+        jx = kx;
+        i__1 = *n;
+        for (j = 1; j <= i__1; ++j) {
+          i__4 = jx;
+          if (x[i__4].r != 0. || x[i__4].i != 0.) {
+            i__4 = jx;
+            temp.r = x[i__4].r, temp.i = x[i__4].i;
+            ix = kx;
+            l = kplus1 - j;
+            /* Computing MAX */
+            i__4 = 1, i__2 = j - *k;
+            i__3 = j - 1;
+            for (i__ = max(i__4, i__2); i__ <= i__3; ++i__) {
+              i__4 = ix;
+              i__2 = ix;
+              i__5 = l + i__ + j * a_dim1;
+              z__2.r = temp.r * a[i__5].r - temp.i * a[i__5].i, z__2.i = temp.r * a[i__5].i + temp.i * a[i__5].r;
+              z__1.r = x[i__2].r + z__2.r, z__1.i = x[i__2].i + z__2.i;
+              x[i__4].r = z__1.r, x[i__4].i = z__1.i;
+              ix += *incx;
+              /* L30: */
+            }
+            if (nounit) {
+              i__3 = jx;
+              i__4 = jx;
+              i__2 = kplus1 + j * a_dim1;
+              z__1.r = x[i__4].r * a[i__2].r - x[i__4].i * a[i__2].i,
+              z__1.i = x[i__4].r * a[i__2].i + x[i__4].i * a[i__2].r;
+              x[i__3].r = z__1.r, x[i__3].i = z__1.i;
+            }
+          }
+          jx += *incx;
+          if (j > *k) {
+            kx += *incx;
+          }
+          /* L40: */
+        }
+      }
     } else {
-
-/*        Form  x := A'*x  or  x := conjg( A' )*x. */
-
-	if (lsame_(uplo, "U", (ftnlen)1, (ftnlen)1)) {
-	    kplus1 = *k + 1;
-	    if (*incx == 1) {
-		for (j = *n; j >= 1; --j) {
-		    i__3 = j;
-		    temp.r = x[i__3].r, temp.i = x[i__3].i;
-		    l = kplus1 - j;
-		    if (noconj) {
-			if (nounit) {
-			    i__3 = kplus1 + j * a_dim1;
-			    z__1.r = temp.r * a[i__3].r - temp.i * a[i__3].i, 
-				    z__1.i = temp.r * a[i__3].i + temp.i * a[
-				    i__3].r;
-			    temp.r = z__1.r, temp.i = z__1.i;
-			}
-/* Computing MAX */
-			i__4 = 1, i__1 = j - *k;
-			i__3 = max(i__4,i__1);
-			for (i__ = j - 1; i__ >= i__3; --i__) {
-			    i__4 = l + i__ + j * a_dim1;
-			    i__1 = i__;
-			    z__2.r = a[i__4].r * x[i__1].r - a[i__4].i * x[
-				    i__1].i, z__2.i = a[i__4].r * x[i__1].i + 
-				    a[i__4].i * x[i__1].r;
-			    z__1.r = temp.r + z__2.r, z__1.i = temp.i + 
-				    z__2.i;
-			    temp.r = z__1.r, temp.i = z__1.i;
-/* L90: */
-			}
-		    } else {
-			if (nounit) {
-			    d_cnjg(&z__2, &a[kplus1 + j * a_dim1]);
-			    z__1.r = temp.r * z__2.r - temp.i * z__2.i, 
-				    z__1.i = temp.r * z__2.i + temp.i * 
-				    z__2.r;
-			    temp.r = z__1.r, temp.i = z__1.i;
-			}
-/* Computing MAX */
-			i__4 = 1, i__1 = j - *k;
-			i__3 = max(i__4,i__1);
-			for (i__ = j - 1; i__ >= i__3; --i__) {
-			    d_cnjg(&z__3, &a[l + i__ + j * a_dim1]);
-			    i__4 = i__;
-			    z__2.r = z__3.r * x[i__4].r - z__3.i * x[i__4].i, 
-				    z__2.i = z__3.r * x[i__4].i + z__3.i * x[
-				    i__4].r;
-			    z__1.r = temp.r + z__2.r, z__1.i = temp.i + 
-				    z__2.i;
-			    temp.r = z__1.r, temp.i = z__1.i;
-/* L100: */
-			}
-		    }
-		    i__3 = j;
-		    x[i__3].r = temp.r, x[i__3].i = temp.i;
-/* L110: */
-		}
-	    } else {
-		kx += (*n - 1) * *incx;
-		jx = kx;
-		for (j = *n; j >= 1; --j) {
-		    i__3 = jx;
-		    temp.r = x[i__3].r, temp.i = x[i__3].i;
-		    kx -= *incx;
-		    ix = kx;
-		    l = kplus1 - j;
-		    if (noconj) {
-			if (nounit) {
-			    i__3 = kplus1 + j * a_dim1;
-			    z__1.r = temp.r * a[i__3].r - temp.i * a[i__3].i, 
-				    z__1.i = temp.r * a[i__3].i + temp.i * a[
-				    i__3].r;
-			    temp.r = z__1.r, temp.i = z__1.i;
-			}
-/* Computing MAX */
-			i__4 = 1, i__1 = j - *k;
-			i__3 = max(i__4,i__1);
-			for (i__ = j - 1; i__ >= i__3; --i__) {
-			    i__4 = l + i__ + j * a_dim1;
-			    i__1 = ix;
-			    z__2.r = a[i__4].r * x[i__1].r - a[i__4].i * x[
-				    i__1].i, z__2.i = a[i__4].r * x[i__1].i + 
-				    a[i__4].i * x[i__1].r;
-			    z__1.r = temp.r + z__2.r, z__1.i = temp.i + 
-				    z__2.i;
-			    temp.r = z__1.r, temp.i = z__1.i;
-			    ix -= *incx;
-/* L120: */
-			}
-		    } else {
-			if (nounit) {
-			    d_cnjg(&z__2, &a[kplus1 + j * a_dim1]);
-			    z__1.r = temp.r * z__2.r - temp.i * z__2.i, 
-				    z__1.i = temp.r * z__2.i + temp.i * 
-				    z__2.r;
-			    temp.r = z__1.r, temp.i = z__1.i;
-			}
-/* Computing MAX */
-			i__4 = 1, i__1 = j - *k;
-			i__3 = max(i__4,i__1);
-			for (i__ = j - 1; i__ >= i__3; --i__) {
-			    d_cnjg(&z__3, &a[l + i__ + j * a_dim1]);
-			    i__4 = ix;
-			    z__2.r = z__3.r * x[i__4].r - z__3.i * x[i__4].i, 
-				    z__2.i = z__3.r * x[i__4].i + z__3.i * x[
-				    i__4].r;
-			    z__1.r = temp.r + z__2.r, z__1.i = temp.i + 
-				    z__2.i;
-			    temp.r = z__1.r, temp.i = z__1.i;
-			    ix -= *incx;
-/* L130: */
-			}
-		    }
-		    i__3 = jx;
-		    x[i__3].r = temp.r, x[i__3].i = temp.i;
-		    jx -= *incx;
-/* L140: */
-		}
-	    }
-	} else {
-	    if (*incx == 1) {
-		i__3 = *n;
-		for (j = 1; j <= i__3; ++j) {
-		    i__4 = j;
-		    temp.r = x[i__4].r, temp.i = x[i__4].i;
-		    l = 1 - j;
-		    if (noconj) {
-			if (nounit) {
-			    i__4 = j * a_dim1 + 1;
-			    z__1.r = temp.r * a[i__4].r - temp.i * a[i__4].i, 
-				    z__1.i = temp.r * a[i__4].i + temp.i * a[
-				    i__4].r;
-			    temp.r = z__1.r, temp.i = z__1.i;
-			}
-/* Computing MIN */
-			i__1 = *n, i__2 = j + *k;
-			i__4 = min(i__1,i__2);
-			for (i__ = j + 1; i__ <= i__4; ++i__) {
-			    i__1 = l + i__ + j * a_dim1;
-			    i__2 = i__;
-			    z__2.r = a[i__1].r * x[i__2].r - a[i__1].i * x[
-				    i__2].i, z__2.i = a[i__1].r * x[i__2].i + 
-				    a[i__1].i * x[i__2].r;
-			    z__1.r = temp.r + z__2.r, z__1.i = temp.i + 
-				    z__2.i;
-			    temp.r = z__1.r, temp.i = z__1.i;
-/* L150: */
-			}
-		    } else {
-			if (nounit) {
-			    d_cnjg(&z__2, &a[j * a_dim1 + 1]);
-			    z__1.r = temp.r * z__2.r - temp.i * z__2.i, 
-				    z__1.i = temp.r * z__2.i + temp.i * 
-				    z__2.r;
-			    temp.r = z__1.r, temp.i = z__1.i;
-			}
-/* Computing MIN */
-			i__1 = *n, i__2 = j + *k;
-			i__4 = min(i__1,i__2);
-			for (i__ = j + 1; i__ <= i__4; ++i__) {
-			    d_cnjg(&z__3, &a[l + i__ + j * a_dim1]);
-			    i__1 = i__;
-			    z__2.r = z__3.r * x[i__1].r - z__3.i * x[i__1].i, 
-				    z__2.i = z__3.r * x[i__1].i + z__3.i * x[
-				    i__1].r;
-			    z__1.r = temp.r + z__2.r, z__1.i = temp.i + 
-				    z__2.i;
-			    temp.r = z__1.r, temp.i = z__1.i;
-/* L160: */
-			}
-		    }
-		    i__4 = j;
-		    x[i__4].r = temp.r, x[i__4].i = temp.i;
-/* L170: */
-		}
-	    } else {
-		jx = kx;
-		i__3 = *n;
-		for (j = 1; j <= i__3; ++j) {
-		    i__4 = jx;
-		    temp.r = x[i__4].r, temp.i = x[i__4].i;
-		    kx += *incx;
-		    ix = kx;
-		    l = 1 - j;
-		    if (noconj) {
-			if (nounit) {
-			    i__4 = j * a_dim1 + 1;
-			    z__1.r = temp.r * a[i__4].r - temp.i * a[i__4].i, 
-				    z__1.i = temp.r * a[i__4].i + temp.i * a[
-				    i__4].r;
-			    temp.r = z__1.r, temp.i = z__1.i;
-			}
-/* Computing MIN */
-			i__1 = *n, i__2 = j + *k;
-			i__4 = min(i__1,i__2);
-			for (i__ = j + 1; i__ <= i__4; ++i__) {
-			    i__1 = l + i__ + j * a_dim1;
-			    i__2 = ix;
-			    z__2.r = a[i__1].r * x[i__2].r - a[i__1].i * x[
-				    i__2].i, z__2.i = a[i__1].r * x[i__2].i + 
-				    a[i__1].i * x[i__2].r;
-			    z__1.r = temp.r + z__2.r, z__1.i = temp.i + 
-				    z__2.i;
-			    temp.r = z__1.r, temp.i = z__1.i;
-			    ix += *incx;
-/* L180: */
-			}
-		    } else {
-			if (nounit) {
-			    d_cnjg(&z__2, &a[j * a_dim1 + 1]);
-			    z__1.r = temp.r * z__2.r - temp.i * z__2.i, 
-				    z__1.i = temp.r * z__2.i + temp.i * 
-				    z__2.r;
-			    temp.r = z__1.r, temp.i = z__1.i;
-			}
-/* Computing MIN */
-			i__1 = *n, i__2 = j + *k;
-			i__4 = min(i__1,i__2);
-			for (i__ = j + 1; i__ <= i__4; ++i__) {
-			    d_cnjg(&z__3, &a[l + i__ + j * a_dim1]);
-			    i__1 = ix;
-			    z__2.r = z__3.r * x[i__1].r - z__3.i * x[i__1].i, 
-				    z__2.i = z__3.r * x[i__1].i + z__3.i * x[
-				    i__1].r;
-			    z__1.r = temp.r + z__2.r, z__1.i = temp.i + 
-				    z__2.i;
-			    temp.r = z__1.r, temp.i = z__1.i;
-			    ix += *incx;
-/* L190: */
-			}
-		    }
-		    i__4 = jx;
-		    x[i__4].r = temp.r, x[i__4].i = temp.i;
-		    jx += *incx;
-/* L200: */
-		}
-	    }
-	}
+      if (*incx == 1) {
+        for (j = *n; j >= 1; --j) {
+          i__1 = j;
+          if (x[i__1].r != 0. || x[i__1].i != 0.) {
+            i__1 = j;
+            temp.r = x[i__1].r, temp.i = x[i__1].i;
+            l = 1 - j;
+            /* Computing MIN */
+            i__1 = *n, i__3 = j + *k;
+            i__4 = j + 1;
+            for (i__ = min(i__1, i__3); i__ >= i__4; --i__) {
+              i__1 = i__;
+              i__3 = i__;
+              i__2 = l + i__ + j * a_dim1;
+              z__2.r = temp.r * a[i__2].r - temp.i * a[i__2].i, z__2.i = temp.r * a[i__2].i + temp.i * a[i__2].r;
+              z__1.r = x[i__3].r + z__2.r, z__1.i = x[i__3].i + z__2.i;
+              x[i__1].r = z__1.r, x[i__1].i = z__1.i;
+              /* L50: */
+            }
+            if (nounit) {
+              i__4 = j;
+              i__1 = j;
+              i__3 = j * a_dim1 + 1;
+              z__1.r = x[i__1].r * a[i__3].r - x[i__1].i * a[i__3].i,
+              z__1.i = x[i__1].r * a[i__3].i + x[i__1].i * a[i__3].r;
+              x[i__4].r = z__1.r, x[i__4].i = z__1.i;
+            }
+          }
+          /* L60: */
+        }
+      } else {
+        kx += (*n - 1) * *incx;
+        jx = kx;
+        for (j = *n; j >= 1; --j) {
+          i__4 = jx;
+          if (x[i__4].r != 0. || x[i__4].i != 0.) {
+            i__4 = jx;
+            temp.r = x[i__4].r, temp.i = x[i__4].i;
+            ix = kx;
+            l = 1 - j;
+            /* Computing MIN */
+            i__4 = *n, i__1 = j + *k;
+            i__3 = j + 1;
+            for (i__ = min(i__4, i__1); i__ >= i__3; --i__) {
+              i__4 = ix;
+              i__1 = ix;
+              i__2 = l + i__ + j * a_dim1;
+              z__2.r = temp.r * a[i__2].r - temp.i * a[i__2].i, z__2.i = temp.r * a[i__2].i + temp.i * a[i__2].r;
+              z__1.r = x[i__1].r + z__2.r, z__1.i = x[i__1].i + z__2.i;
+              x[i__4].r = z__1.r, x[i__4].i = z__1.i;
+              ix -= *incx;
+              /* L70: */
+            }
+            if (nounit) {
+              i__3 = jx;
+              i__4 = jx;
+              i__1 = j * a_dim1 + 1;
+              z__1.r = x[i__4].r * a[i__1].r - x[i__4].i * a[i__1].i,
+              z__1.i = x[i__4].r * a[i__1].i + x[i__4].i * a[i__1].r;
+              x[i__3].r = z__1.r, x[i__3].i = z__1.i;
+            }
+          }
+          jx -= *incx;
+          if (*n - j >= *k) {
+            kx -= *incx;
+          }
+          /* L80: */
+        }
+      }
     }
+  } else {
+    /*        Form  x := A'*x  or  x := conjg( A' )*x. */
 
-    return 0;
+    if (lsame_(uplo, "U")) {
+      kplus1 = *k + 1;
+      if (*incx == 1) {
+        for (j = *n; j >= 1; --j) {
+          i__3 = j;
+          temp.r = x[i__3].r, temp.i = x[i__3].i;
+          l = kplus1 - j;
+          if (noconj) {
+            if (nounit) {
+              i__3 = kplus1 + j * a_dim1;
+              z__1.r = temp.r * a[i__3].r - temp.i * a[i__3].i, z__1.i = temp.r * a[i__3].i + temp.i * a[i__3].r;
+              temp.r = z__1.r, temp.i = z__1.i;
+            }
+            /* Computing MAX */
+            i__4 = 1, i__1 = j - *k;
+            i__3 = max(i__4, i__1);
+            for (i__ = j - 1; i__ >= i__3; --i__) {
+              i__4 = l + i__ + j * a_dim1;
+              i__1 = i__;
+              z__2.r = a[i__4].r * x[i__1].r - a[i__4].i * x[i__1].i,
+              z__2.i = a[i__4].r * x[i__1].i + a[i__4].i * x[i__1].r;
+              z__1.r = temp.r + z__2.r, z__1.i = temp.i + z__2.i;
+              temp.r = z__1.r, temp.i = z__1.i;
+              /* L90: */
+            }
+          } else {
+            if (nounit) {
+              d_cnjg(&z__2, &a[kplus1 + j * a_dim1]);
+              z__1.r = temp.r * z__2.r - temp.i * z__2.i, z__1.i = temp.r * z__2.i + temp.i * z__2.r;
+              temp.r = z__1.r, temp.i = z__1.i;
+            }
+            /* Computing MAX */
+            i__4 = 1, i__1 = j - *k;
+            i__3 = max(i__4, i__1);
+            for (i__ = j - 1; i__ >= i__3; --i__) {
+              d_cnjg(&z__3, &a[l + i__ + j * a_dim1]);
+              i__4 = i__;
+              z__2.r = z__3.r * x[i__4].r - z__3.i * x[i__4].i, z__2.i = z__3.r * x[i__4].i + z__3.i * x[i__4].r;
+              z__1.r = temp.r + z__2.r, z__1.i = temp.i + z__2.i;
+              temp.r = z__1.r, temp.i = z__1.i;
+              /* L100: */
+            }
+          }
+          i__3 = j;
+          x[i__3].r = temp.r, x[i__3].i = temp.i;
+          /* L110: */
+        }
+      } else {
+        kx += (*n - 1) * *incx;
+        jx = kx;
+        for (j = *n; j >= 1; --j) {
+          i__3 = jx;
+          temp.r = x[i__3].r, temp.i = x[i__3].i;
+          kx -= *incx;
+          ix = kx;
+          l = kplus1 - j;
+          if (noconj) {
+            if (nounit) {
+              i__3 = kplus1 + j * a_dim1;
+              z__1.r = temp.r * a[i__3].r - temp.i * a[i__3].i, z__1.i = temp.r * a[i__3].i + temp.i * a[i__3].r;
+              temp.r = z__1.r, temp.i = z__1.i;
+            }
+            /* Computing MAX */
+            i__4 = 1, i__1 = j - *k;
+            i__3 = max(i__4, i__1);
+            for (i__ = j - 1; i__ >= i__3; --i__) {
+              i__4 = l + i__ + j * a_dim1;
+              i__1 = ix;
+              z__2.r = a[i__4].r * x[i__1].r - a[i__4].i * x[i__1].i,
+              z__2.i = a[i__4].r * x[i__1].i + a[i__4].i * x[i__1].r;
+              z__1.r = temp.r + z__2.r, z__1.i = temp.i + z__2.i;
+              temp.r = z__1.r, temp.i = z__1.i;
+              ix -= *incx;
+              /* L120: */
+            }
+          } else {
+            if (nounit) {
+              d_cnjg(&z__2, &a[kplus1 + j * a_dim1]);
+              z__1.r = temp.r * z__2.r - temp.i * z__2.i, z__1.i = temp.r * z__2.i + temp.i * z__2.r;
+              temp.r = z__1.r, temp.i = z__1.i;
+            }
+            /* Computing MAX */
+            i__4 = 1, i__1 = j - *k;
+            i__3 = max(i__4, i__1);
+            for (i__ = j - 1; i__ >= i__3; --i__) {
+              d_cnjg(&z__3, &a[l + i__ + j * a_dim1]);
+              i__4 = ix;
+              z__2.r = z__3.r * x[i__4].r - z__3.i * x[i__4].i, z__2.i = z__3.r * x[i__4].i + z__3.i * x[i__4].r;
+              z__1.r = temp.r + z__2.r, z__1.i = temp.i + z__2.i;
+              temp.r = z__1.r, temp.i = z__1.i;
+              ix -= *incx;
+              /* L130: */
+            }
+          }
+          i__3 = jx;
+          x[i__3].r = temp.r, x[i__3].i = temp.i;
+          jx -= *incx;
+          /* L140: */
+        }
+      }
+    } else {
+      if (*incx == 1) {
+        i__3 = *n;
+        for (j = 1; j <= i__3; ++j) {
+          i__4 = j;
+          temp.r = x[i__4].r, temp.i = x[i__4].i;
+          l = 1 - j;
+          if (noconj) {
+            if (nounit) {
+              i__4 = j * a_dim1 + 1;
+              z__1.r = temp.r * a[i__4].r - temp.i * a[i__4].i, z__1.i = temp.r * a[i__4].i + temp.i * a[i__4].r;
+              temp.r = z__1.r, temp.i = z__1.i;
+            }
+            /* Computing MIN */
+            i__1 = *n, i__2 = j + *k;
+            i__4 = min(i__1, i__2);
+            for (i__ = j + 1; i__ <= i__4; ++i__) {
+              i__1 = l + i__ + j * a_dim1;
+              i__2 = i__;
+              z__2.r = a[i__1].r * x[i__2].r - a[i__1].i * x[i__2].i,
+              z__2.i = a[i__1].r * x[i__2].i + a[i__1].i * x[i__2].r;
+              z__1.r = temp.r + z__2.r, z__1.i = temp.i + z__2.i;
+              temp.r = z__1.r, temp.i = z__1.i;
+              /* L150: */
+            }
+          } else {
+            if (nounit) {
+              d_cnjg(&z__2, &a[j * a_dim1 + 1]);
+              z__1.r = temp.r * z__2.r - temp.i * z__2.i, z__1.i = temp.r * z__2.i + temp.i * z__2.r;
+              temp.r = z__1.r, temp.i = z__1.i;
+            }
+            /* Computing MIN */
+            i__1 = *n, i__2 = j + *k;
+            i__4 = min(i__1, i__2);
+            for (i__ = j + 1; i__ <= i__4; ++i__) {
+              d_cnjg(&z__3, &a[l + i__ + j * a_dim1]);
+              i__1 = i__;
+              z__2.r = z__3.r * x[i__1].r - z__3.i * x[i__1].i, z__2.i = z__3.r * x[i__1].i + z__3.i * x[i__1].r;
+              z__1.r = temp.r + z__2.r, z__1.i = temp.i + z__2.i;
+              temp.r = z__1.r, temp.i = z__1.i;
+              /* L160: */
+            }
+          }
+          i__4 = j;
+          x[i__4].r = temp.r, x[i__4].i = temp.i;
+          /* L170: */
+        }
+      } else {
+        jx = kx;
+        i__3 = *n;
+        for (j = 1; j <= i__3; ++j) {
+          i__4 = jx;
+          temp.r = x[i__4].r, temp.i = x[i__4].i;
+          kx += *incx;
+          ix = kx;
+          l = 1 - j;
+          if (noconj) {
+            if (nounit) {
+              i__4 = j * a_dim1 + 1;
+              z__1.r = temp.r * a[i__4].r - temp.i * a[i__4].i, z__1.i = temp.r * a[i__4].i + temp.i * a[i__4].r;
+              temp.r = z__1.r, temp.i = z__1.i;
+            }
+            /* Computing MIN */
+            i__1 = *n, i__2 = j + *k;
+            i__4 = min(i__1, i__2);
+            for (i__ = j + 1; i__ <= i__4; ++i__) {
+              i__1 = l + i__ + j * a_dim1;
+              i__2 = ix;
+              z__2.r = a[i__1].r * x[i__2].r - a[i__1].i * x[i__2].i,
+              z__2.i = a[i__1].r * x[i__2].i + a[i__1].i * x[i__2].r;
+              z__1.r = temp.r + z__2.r, z__1.i = temp.i + z__2.i;
+              temp.r = z__1.r, temp.i = z__1.i;
+              ix += *incx;
+              /* L180: */
+            }
+          } else {
+            if (nounit) {
+              d_cnjg(&z__2, &a[j * a_dim1 + 1]);
+              z__1.r = temp.r * z__2.r - temp.i * z__2.i, z__1.i = temp.r * z__2.i + temp.i * z__2.r;
+              temp.r = z__1.r, temp.i = z__1.i;
+            }
+            /* Computing MIN */
+            i__1 = *n, i__2 = j + *k;
+            i__4 = min(i__1, i__2);
+            for (i__ = j + 1; i__ <= i__4; ++i__) {
+              d_cnjg(&z__3, &a[l + i__ + j * a_dim1]);
+              i__1 = ix;
+              z__2.r = z__3.r * x[i__1].r - z__3.i * x[i__1].i, z__2.i = z__3.r * x[i__1].i + z__3.i * x[i__1].r;
+              z__1.r = temp.r + z__2.r, z__1.i = temp.i + z__2.i;
+              temp.r = z__1.r, temp.i = z__1.i;
+              ix += *incx;
+              /* L190: */
+            }
+          }
+          i__4 = jx;
+          x[i__4].r = temp.r, x[i__4].i = temp.i;
+          jx += *incx;
+          /* L200: */
+        }
+      }
+    }
+  }
 
-/*     End of ZTBMV . */
+  /*     End of ZTBMV . */
 
 } /* ztbmv_ */
-

diff --git a/blas/level1_cplx_impl.h b/blas/level1_cplx_impl.h
index 552b384..be88b92 100644
--- a/blas/level1_cplx_impl.h
+++ b/blas/level1_cplx_impl.h

@@ -24,7 +24,7 @@
 
 // computes the sum of magnitudes of all vector elements or, for a complex vector x, the sum
 // res = |Rex1| + |Imx1| + |Rex2| + |Imx2| + ... + |Rexn| + |Imxn|, where x is a vector of order n
-RealScalar EIGEN_CAT(REAL_SCALAR_SUFFIX, EIGEN_BLAS_FUNC(asum))(int *n, RealScalar *px, int *incx) {
+extern "C" RealScalar EIGEN_CAT(REAL_SCALAR_SUFFIX, EIGEN_BLAS_FUNC_NAME(asum))(int *n, RealScalar *px, int *incx) {
   //   std::cerr << "__asum " << *n << " " << *incx << "\n";
   Complex *x = reinterpret_cast<Complex *>(px);
 
@@ -36,7 +36,7 @@
     return make_vector(x, *n, std::abs(*incx)).unaryExpr<scalar_norm1_op>().sum();
 }
 
-int EIGEN_CAT(i, EIGEN_BLAS_FUNC(amax))(int *n, RealScalar *px, int *incx) {
+extern "C" int EIGEN_CAT(i, EIGEN_BLAS_FUNC_NAME(amax))(int *n, RealScalar *px, int *incx) {
   if (*n <= 0) return 0;
   Scalar *x = reinterpret_cast<Scalar *>(px);
 
@@ -48,7 +48,7 @@
   return int(ret) + 1;
 }
 
-int EIGEN_CAT(i, EIGEN_BLAS_FUNC(amin))(int *n, RealScalar *px, int *incx) {
+extern "C" int EIGEN_CAT(i, EIGEN_BLAS_FUNC_NAME(amin))(int *n, RealScalar *px, int *incx) {
   if (*n <= 0) return 0;
   Scalar *x = reinterpret_cast<Scalar *>(px);
 
@@ -61,13 +61,13 @@
 }
 
 // computes a dot product of a conjugated vector with another vector.
-int EIGEN_BLAS_FUNC(dotcw)(int *n, RealScalar *px, int *incx, RealScalar *py, int *incy, RealScalar *pres) {
+EIGEN_BLAS_FUNC(dotcw)(int *n, RealScalar *px, int *incx, RealScalar *py, int *incy, RealScalar *pres) {
   //   std::cerr << "_dotc " << *n << " " << *incx << " " << *incy << "\n";
   Scalar *res = reinterpret_cast<Scalar *>(pres);
 
   if (*n <= 0) {
     *res = Scalar(0);
-    return 0;
+    return;
   }
 
   Scalar *x = reinterpret_cast<Scalar *>(px);
@@ -83,16 +83,15 @@
     *res = (make_vector(x, *n, *incx).dot(make_vector(y, *n, -*incy).reverse()));
   else if (*incx < 0 && *incy < 0)
     *res = (make_vector(x, *n, -*incx).reverse().dot(make_vector(y, *n, -*incy).reverse()));
-  return 0;
 }
 
 // computes a vector-vector dot product without complex conjugation.
-int EIGEN_BLAS_FUNC(dotuw)(int *n, RealScalar *px, int *incx, RealScalar *py, int *incy, RealScalar *pres) {
+EIGEN_BLAS_FUNC(dotuw)(int *n, RealScalar *px, int *incx, RealScalar *py, int *incy, RealScalar *pres) {
   Scalar *res = reinterpret_cast<Scalar *>(pres);
 
   if (*n <= 0) {
     *res = Scalar(0);
-    return 0;
+    return;
   }
 
   Scalar *x = reinterpret_cast<Scalar *>(px);
@@ -108,10 +107,9 @@
     *res = (make_vector(x, *n, *incx).cwiseProduct(make_vector(y, *n, -*incy).reverse())).sum();
   else if (*incx < 0 && *incy < 0)
     *res = (make_vector(x, *n, -*incx).reverse().cwiseProduct(make_vector(y, *n, -*incy).reverse())).sum();
-  return 0;
 }
 
-RealScalar EIGEN_CAT(REAL_SCALAR_SUFFIX, EIGEN_BLAS_FUNC(nrm2))(int *n, RealScalar *px, int *incx) {
+extern "C" RealScalar EIGEN_CAT(REAL_SCALAR_SUFFIX, EIGEN_BLAS_FUNC_NAME(nrm2))(int *n, RealScalar *px, int *incx) {
   //   std::cerr << "__nrm2 " << *n << " " << *incx << "\n";
   if (*n <= 0) return 0;
 
@@ -122,9 +120,9 @@
   return make_vector(x, *n, *incx).stableNorm();
 }
 
-int EIGEN_BLAS_FUNC(EIGEN_CAT(REAL_SCALAR_SUFFIX, rot))(int *n, RealScalar *px, int *incx, RealScalar *py, int *incy,
-                                                        RealScalar *pc, RealScalar *ps) {
-  if (*n <= 0) return 0;
+EIGEN_BLAS_FUNC(EIGEN_CAT(REAL_SCALAR_SUFFIX, rot))
+(int *n, RealScalar *px, int *incx, RealScalar *py, int *incy, RealScalar *pc, RealScalar *ps) {
+  if (*n <= 0) return;
 
   Scalar *x = reinterpret_cast<Scalar *>(px);
   Scalar *y = reinterpret_cast<Scalar *>(py);
@@ -144,12 +142,10 @@
     internal::apply_rotation_in_the_plane(vx, rvy, JacobiRotation<Scalar>(c, s));
   else
     internal::apply_rotation_in_the_plane(vx, vy, JacobiRotation<Scalar>(c, s));
-
-  return 0;
 }
 
-int EIGEN_BLAS_FUNC(EIGEN_CAT(REAL_SCALAR_SUFFIX, scal))(int *n, RealScalar *palpha, RealScalar *px, int *incx) {
-  if (*n <= 0) return 0;
+EIGEN_BLAS_FUNC(EIGEN_CAT(REAL_SCALAR_SUFFIX, scal))(int *n, RealScalar *palpha, RealScalar *px, int *incx) {
+  if (*n <= 0) return;
 
   Scalar *x = reinterpret_cast<Scalar *>(px);
   RealScalar alpha = *palpha;
@@ -160,6 +156,4 @@
     make_vector(x, *n) *= alpha;
   else
     make_vector(x, *n, std::abs(*incx)) *= alpha;
-
-  return 0;
 }

diff --git a/blas/level1_impl.h b/blas/level1_impl.h
index f1ff3cc..2422d10 100644
--- a/blas/level1_impl.h
+++ b/blas/level1_impl.h

@@ -9,13 +9,13 @@
 
 #include "common.h"
 
-int EIGEN_BLAS_FUNC(axpy)(const int *n, const RealScalar *palpha, const RealScalar *px, const int *incx, RealScalar *py,
-                          const int *incy) {
+EIGEN_BLAS_FUNC(axpy)
+(const int *n, const RealScalar *palpha, const RealScalar *px, const int *incx, RealScalar *py, const int *incy) {
   const Scalar *x = reinterpret_cast<const Scalar *>(px);
   Scalar *y = reinterpret_cast<Scalar *>(py);
   Scalar alpha = *reinterpret_cast<const Scalar *>(palpha);
 
-  if (*n <= 0) return 0;
+  if (*n <= 0) return;
 
   if (*incx == 1 && *incy == 1)
     make_vector(y, *n) += alpha * make_vector(x, *n);
@@ -27,12 +27,10 @@
     make_vector(y, *n, *incy) += alpha * make_vector(x, *n, -*incx).reverse();
   else if (*incx < 0 && *incy < 0)
     make_vector(y, *n, -*incy).reverse() += alpha * make_vector(x, *n, -*incx).reverse();
-
-  return 0;
 }
 
-int EIGEN_BLAS_FUNC(copy)(int *n, RealScalar *px, int *incx, RealScalar *py, int *incy) {
-  if (*n <= 0) return 0;
+EIGEN_BLAS_FUNC(copy)(int *n, RealScalar *px, int *incx, RealScalar *py, int *incy) {
+  if (*n <= 0) return;
 
   Scalar *x = reinterpret_cast<Scalar *>(px);
   Scalar *y = reinterpret_cast<Scalar *>(py);
@@ -49,11 +47,9 @@
       y += *incy;
     }
   }
-
-  return 0;
 }
 
-int EIGEN_BLAS_FUNC(rotg)(RealScalar *pa, RealScalar *pb, RealScalar *pc, RealScalar *ps) {
+EIGEN_BLAS_FUNC(rotg)(RealScalar *pa, RealScalar *pb, RealScalar *pc, RealScalar *ps) {
   using std::abs;
   using std::sqrt;
 
@@ -104,12 +100,10 @@
   //   r.makeGivens(a,b);
   //   *c = r.c();
   //   *s = r.s();
-
-  return 0;
 }
 
-int EIGEN_BLAS_FUNC(scal)(int *n, RealScalar *palpha, RealScalar *px, int *incx) {
-  if (*n <= 0) return 0;
+EIGEN_BLAS_FUNC(scal)(int *n, RealScalar *palpha, RealScalar *px, int *incx) {
+  if (*n <= 0) return;
 
   Scalar *x = reinterpret_cast<Scalar *>(px);
   Scalar alpha = *reinterpret_cast<Scalar *>(palpha);
@@ -118,12 +112,10 @@
     make_vector(x, *n) *= alpha;
   else
     make_vector(x, *n, std::abs(*incx)) *= alpha;
-
-  return 0;
 }
 
-int EIGEN_BLAS_FUNC(swap)(int *n, RealScalar *px, int *incx, RealScalar *py, int *incy) {
-  if (*n <= 0) return 0;
+EIGEN_BLAS_FUNC(swap)(int *n, RealScalar *px, int *incx, RealScalar *py, int *incy) {
+  if (*n <= 0) return;
 
   Scalar *x = reinterpret_cast<Scalar *>(px);
   Scalar *y = reinterpret_cast<Scalar *>(py);
@@ -138,6 +130,4 @@
     make_vector(y, *n, *incy).swap(make_vector(x, *n, -*incx).reverse());
   else if (*incx < 0 && *incy < 0)
     make_vector(y, *n, -*incy).reverse().swap(make_vector(x, *n, -*incx).reverse());
-
-  return 1;
 }

diff --git a/blas/level1_real_impl.h b/blas/level1_real_impl.h
index d5cb573..cd9c189 100644
--- a/blas/level1_real_impl.h
+++ b/blas/level1_real_impl.h

@@ -11,7 +11,7 @@
 
 // computes the sum of magnitudes of all vector elements or, for a complex vector x, the sum
 // res = |Rex1| + |Imx1| + |Rex2| + |Imx2| + ... + |Rexn| + |Imxn|, where x is a vector of order n
-RealScalar EIGEN_BLAS_FUNC(asum)(int *n, RealScalar *px, int *incx) {
+extern "C" RealScalar EIGEN_BLAS_FUNC_NAME(asum)(int *n, Scalar *px, int *incx) {
   //   std::cerr << "_asum " << *n << " " << *incx << "\n";
 
   Scalar *x = reinterpret_cast<Scalar *>(px);
@@ -24,7 +24,7 @@
     return make_vector(x, *n, std::abs(*incx)).cwiseAbs().sum();
 }
 
-int EIGEN_CAT(i, EIGEN_BLAS_FUNC(amax))(int *n, RealScalar *px, int *incx) {
+extern "C" int EIGEN_CAT(i, EIGEN_BLAS_FUNC_NAME(amax))(int *n, Scalar *px, int *incx) {
   if (*n <= 0) return 0;
   Scalar *x = reinterpret_cast<Scalar *>(px);
 
@@ -36,7 +36,7 @@
   return int(ret) + 1;
 }
 
-int EIGEN_CAT(i, EIGEN_BLAS_FUNC(amin))(int *n, RealScalar *px, int *incx) {
+extern "C" int EIGEN_CAT(i, EIGEN_BLAS_FUNC_NAME(amin))(int *n, Scalar *px, int *incx) {
   if (*n <= 0) return 0;
   Scalar *x = reinterpret_cast<Scalar *>(px);
 
@@ -49,7 +49,7 @@
 }
 
 // computes a vector-vector dot product.
-Scalar EIGEN_BLAS_FUNC(dot)(int *n, RealScalar *px, int *incx, RealScalar *py, int *incy) {
+extern "C" Scalar EIGEN_BLAS_FUNC_NAME(dot)(int *n, Scalar *px, int *incx, Scalar *py, int *incy) {
   //   std::cerr << "_dot " << *n << " " << *incx << " " << *incy << "\n";
 
   if (*n <= 0) return 0;
@@ -73,7 +73,7 @@
 
 // computes the Euclidean norm of a vector.
 // FIXME
-Scalar EIGEN_BLAS_FUNC(nrm2)(int *n, RealScalar *px, int *incx) {
+extern "C" Scalar EIGEN_BLAS_FUNC_NAME(nrm2)(int *n, Scalar *px, int *incx) {
   //   std::cerr << "_nrm2 " << *n << " " << *incx << "\n";
   if (*n <= 0) return 0;
 
@@ -85,9 +85,9 @@
     return make_vector(x, *n, std::abs(*incx)).stableNorm();
 }
 
-int EIGEN_BLAS_FUNC(rot)(int *n, RealScalar *px, int *incx, RealScalar *py, int *incy, RealScalar *pc, RealScalar *ps) {
+EIGEN_BLAS_FUNC(rot)(int *n, Scalar *px, int *incx, Scalar *py, int *incy, Scalar *pc, Scalar *ps) {
   //   std::cerr << "_rot " << *n << " " << *incx << " " << *incy << "\n";
-  if (*n <= 0) return 0;
+  if (*n <= 0) return;
 
   Scalar *x = reinterpret_cast<Scalar *>(px);
   Scalar *y = reinterpret_cast<Scalar *>(py);
@@ -106,13 +106,11 @@
     internal::apply_rotation_in_the_plane(vx, rvy, JacobiRotation<Scalar>(c, s));
   else
     internal::apply_rotation_in_the_plane(vx, vy, JacobiRotation<Scalar>(c, s));
-
-  return 0;
 }
 
 /*
 // performs rotation of points in the modified plane.
-int EIGEN_BLAS_FUNC(rotm)(int *n, RealScalar *px, int *incx, RealScalar *py, int *incy, RealScalar *param)
+EIGEN_BLAS_FUNC(rotm)(int *n, Scalar *px, int *incx, Scalar *py, int *incy, Scalar *param)
 {
   Scalar* x = reinterpret_cast<Scalar*>(px);
   Scalar* y = reinterpret_cast<Scalar*>(py);
@@ -123,7 +121,7 @@
 }
 
 // computes the modified parameters for a Givens rotation.
-int EIGEN_BLAS_FUNC(rotmg)(RealScalar *d1, RealScalar *d2, RealScalar *x1, RealScalar *x2, RealScalar *param)
+EIGEN_BLAS_FUNC(rotmg)(Scalar *d1, Scalar *d2, Scalar *x1, Scalar *x2, Scalar *param)
 {
   // TODO
 

diff --git a/blas/level2_cplx_impl.h b/blas/level2_cplx_impl.h
index 8c56a6f..f04dda1 100644
--- a/blas/level2_cplx_impl.h
+++ b/blas/level2_cplx_impl.h

@@ -16,9 +16,9 @@
  *  where alpha and beta are scalars, x and y are n element vectors and
  *  A is an n by n hermitian matrix.
  */
-int EIGEN_BLAS_FUNC(hemv)(const char *uplo, const int *n, const RealScalar *palpha, const RealScalar *pa,
-                          const int *lda, const RealScalar *px, const int *incx, const RealScalar *pbeta,
-                          RealScalar *py, const int *incy) {
+EIGEN_BLAS_FUNC(hemv)
+(const char *uplo, const int *n, const RealScalar *palpha, const RealScalar *pa, const int *lda, const RealScalar *px,
+ const int *incx, const RealScalar *pbeta, RealScalar *py, const int *incy) {
   typedef void (*functype)(int, const Scalar *, int, const Scalar *, Scalar *, Scalar);
   static const functype func[2] = {
       // array index: UP
@@ -45,9 +45,9 @@
     info = 7;
   else if (*incy == 0)
     info = 10;
-  if (info) return xerbla_(SCALAR_SUFFIX_UP "HEMV ", &info, 6);
+  if (info) return xerbla_(SCALAR_SUFFIX_UP "HEMV ", &info);
 
-  if (*n == 0) return 1;
+  if (*n == 0) return;
 
   const Scalar *actual_x = get_compact_vector(x, *n, *incx);
   Scalar *actual_y = get_compact_vector(y, *n, *incy);
@@ -61,15 +61,13 @@
 
   if (alpha != Scalar(0)) {
     int code = UPLO(*uplo);
-    if (code >= 2 || func[code] == 0) return 0;
+    if (code >= 2 || func[code] == 0) return;
 
     func[code](*n, a, *lda, actual_x, actual_y, alpha);
   }
 
   if (actual_x != x) delete[] actual_x;
   if (actual_y != y) delete[] copy_back(actual_y, y, *n, *incy);
-
-  return 1;
 }
 
 /**  ZHBMV  performs the matrix-vector  operation
@@ -79,7 +77,7 @@
  *  where alpha and beta are scalars, x and y are n element vectors and
  *  A is an n by n hermitian band matrix, with k super-diagonals.
  */
-// int EIGEN_BLAS_FUNC(hbmv)(char *uplo, int *n, int *k, RealScalar *alpha, RealScalar *a, int *lda,
+// EIGEN_BLAS_FUNC(hbmv)(char *uplo, int *n, int *k, RealScalar *alpha, RealScalar *a, int *lda,
 //                           RealScalar *x, int *incx, RealScalar *beta, RealScalar *y, int *incy)
 // {
 //   return 1;
@@ -92,7 +90,7 @@
  *  where alpha and beta are scalars, x and y are n element vectors and
  *  A is an n by n hermitian matrix, supplied in packed form.
  */
-// int EIGEN_BLAS_FUNC(hpmv)(char *uplo, int *n, RealScalar *alpha, RealScalar *ap, RealScalar *x, int *incx, RealScalar
+// EIGEN_BLAS_FUNC(hpmv)(char *uplo, int *n, RealScalar *alpha, RealScalar *ap, RealScalar *x, int *incx, RealScalar
 // *beta, RealScalar *y, int *incy)
 // {
 //   return 1;
@@ -105,7 +103,7 @@
  *  where alpha is a real scalar, x is an n element vector and A is an
  *  n by n hermitian matrix, supplied in packed form.
  */
-int EIGEN_BLAS_FUNC(hpr)(char *uplo, int *n, RealScalar *palpha, RealScalar *px, int *incx, RealScalar *pap) {
+EIGEN_BLAS_FUNC(hpr)(char *uplo, int *n, RealScalar *palpha, RealScalar *px, int *incx, RealScalar *pap) {
   typedef void (*functype)(int, Scalar *, const Scalar *, RealScalar);
   static const functype func[2] = {
       // array index: UP
@@ -125,20 +123,18 @@
     info = 2;
   else if (*incx == 0)
     info = 5;
-  if (info) return xerbla_(SCALAR_SUFFIX_UP "HPR  ", &info, 6);
+  if (info) return xerbla_(SCALAR_SUFFIX_UP "HPR  ", &info);
 
-  if (alpha == Scalar(0)) return 1;
+  if (alpha == Scalar(0)) return;
 
   Scalar *x_cpy = get_compact_vector(x, *n, *incx);
 
   int code = UPLO(*uplo);
-  if (code >= 2 || func[code] == 0) return 0;
+  if (code >= 2 || func[code] == 0) return;
 
   func[code](*n, ap, x_cpy, alpha);
 
   if (x_cpy != x) delete[] x_cpy;
-
-  return 1;
 }
 
 /**  ZHPR2  performs the hermitian rank 2 operation
@@ -148,8 +144,8 @@
  *  where alpha is a scalar, x and y are n element vectors and A is an
  *  n by n hermitian matrix, supplied in packed form.
  */
-int EIGEN_BLAS_FUNC(hpr2)(char *uplo, int *n, RealScalar *palpha, RealScalar *px, int *incx, RealScalar *py, int *incy,
-                          RealScalar *pap) {
+EIGEN_BLAS_FUNC(hpr2)
+(char *uplo, int *n, RealScalar *palpha, RealScalar *px, int *incx, RealScalar *py, int *incy, RealScalar *pap) {
   typedef void (*functype)(int, Scalar *, const Scalar *, const Scalar *, Scalar);
   static const functype func[2] = {
       // array index: UP
@@ -172,22 +168,20 @@
     info = 5;
   else if (*incy == 0)
     info = 7;
-  if (info) return xerbla_(SCALAR_SUFFIX_UP "HPR2 ", &info, 6);
+  if (info) return xerbla_(SCALAR_SUFFIX_UP "HPR2 ", &info);
 
-  if (alpha == Scalar(0)) return 1;
+  if (alpha == Scalar(0)) return;
 
   Scalar *x_cpy = get_compact_vector(x, *n, *incx);
   Scalar *y_cpy = get_compact_vector(y, *n, *incy);
 
   int code = UPLO(*uplo);
-  if (code >= 2 || func[code] == 0) return 0;
+  if (code >= 2 || func[code] == 0) return;
 
   func[code](*n, ap, x_cpy, y_cpy, alpha);
 
   if (x_cpy != x) delete[] x_cpy;
   if (y_cpy != y) delete[] y_cpy;
-
-  return 1;
 }
 
 /**  ZHER   performs the hermitian rank 1 operation
@@ -197,7 +191,7 @@
  *  where alpha is a real scalar, x is an n element vector and A is an
  *  n by n hermitian matrix.
  */
-int EIGEN_BLAS_FUNC(her)(char *uplo, int *n, RealScalar *palpha, RealScalar *px, int *incx, RealScalar *pa, int *lda) {
+EIGEN_BLAS_FUNC(her)(char *uplo, int *n, RealScalar *palpha, RealScalar *px, int *incx, RealScalar *pa, int *lda) {
   typedef void (*functype)(int, Scalar *, int, const Scalar *, const Scalar *, const Scalar &);
   static const functype func[2] = {
       // array index: UP
@@ -219,22 +213,20 @@
     info = 5;
   else if (*lda < std::max(1, *n))
     info = 7;
-  if (info) return xerbla_(SCALAR_SUFFIX_UP "HER  ", &info, 6);
+  if (info) return xerbla_(SCALAR_SUFFIX_UP "HER  ", &info);
 
-  if (alpha == RealScalar(0)) return 1;
+  if (alpha == RealScalar(0)) return;
 
   Scalar *x_cpy = get_compact_vector(x, *n, *incx);
 
   int code = UPLO(*uplo);
-  if (code >= 2 || func[code] == 0) return 0;
+  if (code >= 2 || func[code] == 0) return;
 
   func[code](*n, a, *lda, x_cpy, x_cpy, alpha);
 
   matrix(a, *n, *n, *lda).diagonal().imag().setZero();
 
   if (x_cpy != x) delete[] x_cpy;
-
-  return 1;
 }
 
 /**  ZHER2  performs the hermitian rank 2 operation
@@ -244,8 +236,9 @@
  *  where alpha is a scalar, x and y are n element vectors and A is an n
  *  by n hermitian matrix.
  */
-int EIGEN_BLAS_FUNC(her2)(char *uplo, int *n, RealScalar *palpha, RealScalar *px, int *incx, RealScalar *py, int *incy,
-                          RealScalar *pa, int *lda) {
+EIGEN_BLAS_FUNC(her2)
+(char *uplo, int *n, RealScalar *palpha, RealScalar *px, int *incx, RealScalar *py, int *incy, RealScalar *pa,
+ int *lda) {
   typedef void (*functype)(int, Scalar *, int, const Scalar *, const Scalar *, Scalar);
   static const functype func[2] = {
       // array index: UP
@@ -270,15 +263,15 @@
     info = 7;
   else if (*lda < std::max(1, *n))
     info = 9;
-  if (info) return xerbla_(SCALAR_SUFFIX_UP "HER2 ", &info, 6);
+  if (info) return xerbla_(SCALAR_SUFFIX_UP "HER2 ", &info);
 
-  if (alpha == Scalar(0)) return 1;
+  if (alpha == Scalar(0)) return;
 
   Scalar *x_cpy = get_compact_vector(x, *n, *incx);
   Scalar *y_cpy = get_compact_vector(y, *n, *incy);
 
   int code = UPLO(*uplo);
-  if (code >= 2 || func[code] == 0) return 0;
+  if (code >= 2 || func[code] == 0) return;
 
   func[code](*n, a, *lda, x_cpy, y_cpy, alpha);
 
@@ -286,8 +279,6 @@
 
   if (x_cpy != x) delete[] x_cpy;
   if (y_cpy != y) delete[] y_cpy;
-
-  return 1;
 }
 
 /**  ZGERU  performs the rank 1 operation
@@ -297,8 +288,8 @@
  *  where alpha is a scalar, x is an m element vector, y is an n element
  *  vector and A is an m by n matrix.
  */
-int EIGEN_BLAS_FUNC(geru)(int *m, int *n, RealScalar *palpha, RealScalar *px, int *incx, RealScalar *py, int *incy,
-                          RealScalar *pa, int *lda) {
+EIGEN_BLAS_FUNC(geru)
+(int *m, int *n, RealScalar *palpha, RealScalar *px, int *incx, RealScalar *py, int *incy, RealScalar *pa, int *lda) {
   Scalar *x = reinterpret_cast<Scalar *>(px);
   Scalar *y = reinterpret_cast<Scalar *>(py);
   Scalar *a = reinterpret_cast<Scalar *>(pa);
@@ -315,9 +306,9 @@
     info = 7;
   else if (*lda < std::max(1, *m))
     info = 9;
-  if (info) return xerbla_(SCALAR_SUFFIX_UP "GERU ", &info, 6);
+  if (info) return xerbla_(SCALAR_SUFFIX_UP "GERU ", &info);
 
-  if (alpha == Scalar(0)) return 1;
+  if (alpha == Scalar(0)) return;
 
   Scalar *x_cpy = get_compact_vector(x, *m, *incx);
   Scalar *y_cpy = get_compact_vector(y, *n, *incy);
@@ -326,8 +317,6 @@
 
   if (x_cpy != x) delete[] x_cpy;
   if (y_cpy != y) delete[] y_cpy;
-
-  return 1;
 }
 
 /**  ZGERC  performs the rank 1 operation
@@ -337,8 +326,8 @@
  *  where alpha is a scalar, x is an m element vector, y is an n element
  *  vector and A is an m by n matrix.
  */
-int EIGEN_BLAS_FUNC(gerc)(int *m, int *n, RealScalar *palpha, RealScalar *px, int *incx, RealScalar *py, int *incy,
-                          RealScalar *pa, int *lda) {
+EIGEN_BLAS_FUNC(gerc)
+(int *m, int *n, RealScalar *palpha, RealScalar *px, int *incx, RealScalar *py, int *incy, RealScalar *pa, int *lda) {
   Scalar *x = reinterpret_cast<Scalar *>(px);
   Scalar *y = reinterpret_cast<Scalar *>(py);
   Scalar *a = reinterpret_cast<Scalar *>(pa);
@@ -355,9 +344,9 @@
     info = 7;
   else if (*lda < std::max(1, *m))
     info = 9;
-  if (info) return xerbla_(SCALAR_SUFFIX_UP "GERC ", &info, 6);
+  if (info) return xerbla_(SCALAR_SUFFIX_UP "GERC ", &info);
 
-  if (alpha == Scalar(0)) return 1;
+  if (alpha == Scalar(0)) return;
 
   Scalar *x_cpy = get_compact_vector(x, *m, *incx);
   Scalar *y_cpy = get_compact_vector(y, *n, *incy);
@@ -366,6 +355,4 @@
 
   if (x_cpy != x) delete[] x_cpy;
   if (y_cpy != y) delete[] y_cpy;
-
-  return 1;
 }

diff --git a/blas/level2_impl.h b/blas/level2_impl.h
index 0138a9c..5721ee6 100644
--- a/blas/level2_impl.h
+++ b/blas/level2_impl.h

@@ -22,9 +22,9 @@
   }
 };
 
-int EIGEN_BLAS_FUNC(gemv)(const char *opa, const int *m, const int *n, const RealScalar *palpha, const RealScalar *pa,
-                          const int *lda, const RealScalar *pb, const int *incb, const RealScalar *pbeta,
-                          RealScalar *pc, const int *incc) {
+EIGEN_BLAS_FUNC(gemv)
+(const char *opa, const int *m, const int *n, const RealScalar *palpha, const RealScalar *pa, const int *lda,
+ const RealScalar *pb, const int *incb, const RealScalar *pbeta, RealScalar *pc, const int *incc) {
   typedef void (*functype)(int, int, const Scalar *, int, const Scalar *, int, Scalar *, int, Scalar);
   static const functype func[4] = {// array index: NOTR
                                    (general_matrix_vector_product_wrapper<int, Scalar, ColMajor, false, false>::run),
@@ -53,9 +53,9 @@
     info = 8;
   else if (*incc == 0)
     info = 11;
-  if (info) return xerbla_(SCALAR_SUFFIX_UP "GEMV ", &info, 6);
+  if (info) return xerbla_(SCALAR_SUFFIX_UP "GEMV ", &info);
 
-  if (*m == 0 || *n == 0 || (alpha == Scalar(0) && beta == Scalar(1))) return 0;
+  if (*m == 0 || *n == 0 || (alpha == Scalar(0) && beta == Scalar(1))) return;
 
   int actual_m = *m;
   int actual_n = *n;
@@ -72,18 +72,17 @@
       make_vector(actual_c, actual_m) *= beta;
   }
 
-  if (code >= 4 || func[code] == 0) return 0;
+  if (code >= 4 || func[code] == 0) return;
 
   func[code](actual_m, actual_n, a, *lda, actual_b, 1, actual_c, 1, alpha);
 
   if (actual_b != b) delete[] actual_b;
   if (actual_c != c) delete[] copy_back(actual_c, c, actual_m, *incc);
-
-  return 1;
 }
 
-int EIGEN_BLAS_FUNC(trsv)(const char *uplo, const char *opa, const char *diag, const int *n, const RealScalar *pa,
-                          const int *lda, RealScalar *pb, const int *incb) {
+EIGEN_BLAS_FUNC(trsv)
+(const char *uplo, const char *opa, const char *diag, const int *n, const RealScalar *pa, const int *lda,
+ RealScalar *pb, const int *incb) {
   typedef void (*functype)(int, const Scalar *, int, Scalar *);
   static const functype func[16] = {
       // array index: NOTR  | (UP << 2) | (NUNIT << 3)
@@ -127,7 +126,7 @@
     info = 6;
   else if (*incb == 0)
     info = 8;
-  if (info) return xerbla_(SCALAR_SUFFIX_UP "TRSV ", &info, 6);
+  if (info) return xerbla_(SCALAR_SUFFIX_UP "TRSV ", &info);
 
   Scalar *actual_b = get_compact_vector(b, *n, *incb);
 
@@ -135,12 +134,11 @@
   func[code](*n, a, *lda, actual_b);
 
   if (actual_b != b) delete[] copy_back(actual_b, b, *n, *incb);
-
-  return 0;
 }
 
-int EIGEN_BLAS_FUNC(trmv)(const char *uplo, const char *opa, const char *diag, const int *n, const RealScalar *pa,
-                          const int *lda, RealScalar *pb, const int *incb) {
+EIGEN_BLAS_FUNC(trmv)
+(const char *uplo, const char *opa, const char *diag, const int *n, const RealScalar *pa, const int *lda,
+ RealScalar *pb, const int *incb) {
   typedef void (*functype)(int, int, const Scalar *, int, const Scalar *, int, Scalar *, int, const Scalar &);
   static const functype func[16] = {
       // array index: NOTR  | (UP << 2) | (NUNIT << 3)
@@ -186,23 +184,21 @@
     info = 6;
   else if (*incb == 0)
     info = 8;
-  if (info) return xerbla_(SCALAR_SUFFIX_UP "TRMV ", &info, 6);
+  if (info) return xerbla_(SCALAR_SUFFIX_UP "TRMV ", &info);
 
-  if (*n == 0) return 1;
+  if (*n == 0) return;
 
   Scalar *actual_b = get_compact_vector(b, *n, *incb);
   Matrix<Scalar, Dynamic, 1> res(*n);
   res.setZero();
 
   int code = OP(*opa) | (UPLO(*uplo) << 2) | (DIAG(*diag) << 3);
-  if (code >= 16 || func[code] == 0) return 0;
+  if (code >= 16 || func[code] == 0) return;
 
   func[code](*n, *n, a, *lda, actual_b, 1, res.data(), 1, Scalar(1));
 
   copy_back(res.data(), b, *n, *incb);
   if (actual_b != b) delete[] actual_b;
-
-  return 1;
 }
 
 /**  GBMV  performs one of the matrix-vector operations
@@ -212,8 +208,9 @@
  *  where alpha and beta are scalars, x and y are vectors and A is an
  *  m by n band matrix, with kl sub-diagonals and ku super-diagonals.
  */
-int EIGEN_BLAS_FUNC(gbmv)(char *trans, int *m, int *n, int *kl, int *ku, RealScalar *palpha, RealScalar *pa, int *lda,
-                          RealScalar *px, int *incx, RealScalar *pbeta, RealScalar *py, int *incy) {
+EIGEN_BLAS_FUNC(gbmv)
+(char *trans, int *m, int *n, int *kl, int *ku, RealScalar *palpha, RealScalar *pa, int *lda, RealScalar *px, int *incx,
+ RealScalar *pbeta, RealScalar *py, int *incy) {
   const Scalar *a = reinterpret_cast<const Scalar *>(pa);
   const Scalar *x = reinterpret_cast<const Scalar *>(px);
   Scalar *y = reinterpret_cast<Scalar *>(py);
@@ -238,9 +235,9 @@
     info = 10;
   else if (*incy == 0)
     info = 13;
-  if (info) return xerbla_(SCALAR_SUFFIX_UP "GBMV ", &info, 6);
+  if (info) return xerbla_(SCALAR_SUFFIX_UP "GBMV ", &info);
 
-  if (*m == 0 || *n == 0 || (alpha == Scalar(0) && beta == Scalar(1))) return 0;
+  if (*m == 0 || *n == 0 || (alpha == Scalar(0) && beta == Scalar(1))) return;
 
   int actual_m = *m;
   int actual_n = *n;
@@ -276,8 +273,6 @@
 
   if (actual_x != x) delete[] actual_x;
   if (actual_y != y) delete[] copy_back(actual_y, y, actual_m, *incy);
-
-  return 0;
 }
 
 #if 0
@@ -288,7 +283,7 @@
   *  where x is an n element vector and  A is an n by n unit, or non-unit,
   *  upper or lower triangular band matrix, with ( k + 1 ) diagonals.
   */
-int EIGEN_BLAS_FUNC(tbmv)(char *uplo, char *opa, char *diag, int *n, int *k, RealScalar *pa, int *lda, RealScalar *px, int *incx)
+EIGEN_BLAS_FUNC(tbmv)(char *uplo, char *opa, char *diag, int *n, int *k, RealScalar *pa, int *lda, RealScalar *px, int *incx)
 {
   Scalar* a = reinterpret_cast<Scalar*>(pa);
   Scalar* x = reinterpret_cast<Scalar*>(px);
@@ -305,8 +300,7 @@
   if(info)
     return xerbla_(SCALAR_SUFFIX_UP"TBMV ",&info,6);
 
-  if(*n==0)
-    return 0;
+  if(*n==0) return;
 
   int actual_n = *n;
 
@@ -334,8 +328,6 @@
 
   if(actual_x!=x) delete[] actual_x;
   if(actual_y!=y) delete[] copy_back(actual_y,y,actual_m,*incy);
-
-  return 0;
 }
 #endif
 
@@ -350,8 +342,8 @@
  *  No test for singularity or near-singularity is included in this
  *  routine. Such tests must be performed before calling this routine.
  */
-int EIGEN_BLAS_FUNC(tbsv)(char *uplo, char *op, char *diag, int *n, int *k, RealScalar *pa, int *lda, RealScalar *px,
-                          int *incx) {
+EIGEN_BLAS_FUNC(tbsv)
+(char *uplo, char *op, char *diag, int *n, int *k, RealScalar *pa, int *lda, RealScalar *px, int *incx) {
   typedef void (*functype)(int, int, const Scalar *, int, Scalar *);
   static const functype func[16] = {
       // array index: NOTR  | (UP << 2) | (NUNIT << 3)
@@ -403,22 +395,20 @@
     info = 7;
   else if (*incx == 0)
     info = 9;
-  if (info) return xerbla_(SCALAR_SUFFIX_UP "TBSV ", &info, 6);
+  if (info) return xerbla_(SCALAR_SUFFIX_UP "TBSV ", &info);
 
-  if (*n == 0 || (*k == 0 && DIAG(*diag) == UNIT)) return 0;
+  if (*n == 0 || (*k == 0 && DIAG(*diag) == UNIT)) return;
 
   int actual_n = *n;
 
   Scalar *actual_x = get_compact_vector(x, actual_n, *incx);
 
   int code = OP(*op) | (UPLO(*uplo) << 2) | (DIAG(*diag) << 3);
-  if (code >= 16 || func[code] == 0) return 0;
+  if (code >= 16 || func[code] == 0) return;
 
   func[code](*n, *k, a, *lda, actual_x);
 
   if (actual_x != x) delete[] copy_back(actual_x, x, actual_n, *incx);
-
-  return 0;
 }
 
 /**  DTPMV  performs one of the matrix-vector operations
@@ -428,7 +418,7 @@
  *  where x is an n element vector and  A is an n by n unit, or non-unit,
  *  upper or lower triangular matrix, supplied in packed form.
  */
-int EIGEN_BLAS_FUNC(tpmv)(char *uplo, char *opa, char *diag, int *n, RealScalar *pap, RealScalar *px, int *incx) {
+EIGEN_BLAS_FUNC(tpmv)(char *uplo, char *opa, char *diag, int *n, RealScalar *pap, RealScalar *px, int *incx) {
   typedef void (*functype)(int, const Scalar *, const Scalar *, Scalar *, Scalar);
   static const functype func[16] = {
       // array index: NOTR  | (UP << 2) | (NUNIT << 3)
@@ -480,23 +470,21 @@
     info = 4;
   else if (*incx == 0)
     info = 7;
-  if (info) return xerbla_(SCALAR_SUFFIX_UP "TPMV ", &info, 6);
+  if (info) return xerbla_(SCALAR_SUFFIX_UP "TPMV ", &info);
 
-  if (*n == 0) return 1;
+  if (*n == 0) return;
 
   Scalar *actual_x = get_compact_vector(x, *n, *incx);
   Matrix<Scalar, Dynamic, 1> res(*n);
   res.setZero();
 
   int code = OP(*opa) | (UPLO(*uplo) << 2) | (DIAG(*diag) << 3);
-  if (code >= 16 || func[code] == 0) return 0;
+  if (code >= 16 || func[code] == 0) return;
 
   func[code](*n, ap, actual_x, res.data(), Scalar(1));
 
   copy_back(res.data(), x, *n, *incx);
   if (actual_x != x) delete[] actual_x;
-
-  return 1;
 }
 
 /**  DTPSV  solves one of the systems of equations
@@ -509,7 +497,7 @@
  *  No test for singularity or near-singularity is included in this
  *  routine. Such tests must be performed before calling this routine.
  */
-int EIGEN_BLAS_FUNC(tpsv)(char *uplo, char *opa, char *diag, int *n, RealScalar *pap, RealScalar *px, int *incx) {
+EIGEN_BLAS_FUNC(tpsv)(char *uplo, char *opa, char *diag, int *n, RealScalar *pap, RealScalar *px, int *incx) {
   typedef void (*functype)(int, const Scalar *, Scalar *);
   static const functype func[16] = {
       // array index: NOTR  | (UP << 2) | (NUNIT << 3)
@@ -557,7 +545,7 @@
     info = 4;
   else if (*incx == 0)
     info = 7;
-  if (info) return xerbla_(SCALAR_SUFFIX_UP "TPSV ", &info, 6);
+  if (info) return xerbla_(SCALAR_SUFFIX_UP "TPSV ", &info);
 
   Scalar *actual_x = get_compact_vector(x, *n, *incx);
 
@@ -565,6 +553,4 @@
   func[code](*n, ap, actual_x);
 
   if (actual_x != x) delete[] copy_back(actual_x, x, *n, *incx);
-
-  return 1;
 }

diff --git a/blas/level2_real_impl.h b/blas/level2_real_impl.h
index 29f5a56..5653767 100644
--- a/blas/level2_real_impl.h
+++ b/blas/level2_real_impl.h

@@ -10,9 +10,9 @@
 #include "common.h"
 
 // y = alpha*A*x + beta*y
-int EIGEN_BLAS_FUNC(symv)(const char *uplo, const int *n, const RealScalar *palpha, const RealScalar *pa,
-                          const int *lda, const RealScalar *px, const int *incx, const RealScalar *pbeta,
-                          RealScalar *py, const int *incy) {
+EIGEN_BLAS_FUNC(symv)
+(const char *uplo, const int *n, const RealScalar *palpha, const RealScalar *pa, const int *lda, const RealScalar *px,
+ const int *incx, const RealScalar *pbeta, RealScalar *py, const int *incy) {
   typedef void (*functype)(int, const Scalar *, int, const Scalar *, Scalar *, Scalar);
   static const functype func[2] = {
       // array index: UP
@@ -39,9 +39,9 @@
     info = 7;
   else if (*incy == 0)
     info = 10;
-  if (info) return xerbla_(SCALAR_SUFFIX_UP "SYMV ", &info, 6);
+  if (info) return xerbla_(SCALAR_SUFFIX_UP "SYMV ", &info);
 
-  if (*n == 0) return 0;
+  if (*n == 0) return;
 
   const Scalar *actual_x = get_compact_vector(x, *n, *incx);
   Scalar *actual_y = get_compact_vector(y, *n, *incy);
@@ -54,19 +54,18 @@
   }
 
   int code = UPLO(*uplo);
-  if (code >= 2 || func[code] == 0) return 0;
+  if (code >= 2 || func[code] == 0) return;
 
   func[code](*n, a, *lda, actual_x, actual_y, alpha);
 
   if (actual_x != x) delete[] actual_x;
   if (actual_y != y) delete[] copy_back(actual_y, y, *n, *incy);
-
-  return 1;
 }
 
 // C := alpha*x*x' + C
-int EIGEN_BLAS_FUNC(syr)(const char *uplo, const int *n, const RealScalar *palpha, const RealScalar *px,
-                         const int *incx, RealScalar *pc, const int *ldc) {
+EIGEN_BLAS_FUNC(syr)
+(const char *uplo, const int *n, const RealScalar *palpha, const RealScalar *px, const int *incx, RealScalar *pc,
+ const int *ldc) {
   typedef void (*functype)(int, Scalar *, int, const Scalar *, const Scalar *, const Scalar &);
   static const functype func[2] = {
       // array index: UP
@@ -88,26 +87,25 @@
     info = 5;
   else if (*ldc < std::max(1, *n))
     info = 7;
-  if (info) return xerbla_(SCALAR_SUFFIX_UP "SYR  ", &info, 6);
+  if (info) return xerbla_(SCALAR_SUFFIX_UP "SYR  ", &info);
 
-  if (*n == 0 || alpha == Scalar(0)) return 1;
+  if (*n == 0 || alpha == Scalar(0)) return;
 
   // if the increment is not 1, let's copy it to a temporary vector to enable vectorization
   const Scalar *x_cpy = get_compact_vector(x, *n, *incx);
 
   int code = UPLO(*uplo);
-  if (code >= 2 || func[code] == 0) return 0;
+  if (code >= 2 || func[code] == 0) return;
 
   func[code](*n, c, *ldc, x_cpy, x_cpy, alpha);
 
   if (x_cpy != x) delete[] x_cpy;
-
-  return 1;
 }
 
 // C := alpha*x*y' + alpha*y*x' + C
-int EIGEN_BLAS_FUNC(syr2)(const char *uplo, const int *n, const RealScalar *palpha, const RealScalar *px,
-                          const int *incx, const RealScalar *py, const int *incy, RealScalar *pc, const int *ldc) {
+EIGEN_BLAS_FUNC(syr2)
+(const char *uplo, const int *n, const RealScalar *palpha, const RealScalar *px, const int *incx, const RealScalar *py,
+ const int *incy, RealScalar *pc, const int *ldc) {
   typedef void (*functype)(int, Scalar *, int, const Scalar *, const Scalar *, Scalar);
   static const functype func[2] = {
       // array index: UP
@@ -132,15 +130,15 @@
     info = 7;
   else if (*ldc < std::max(1, *n))
     info = 9;
-  if (info) return xerbla_(SCALAR_SUFFIX_UP "SYR2 ", &info, 6);
+  if (info) return xerbla_(SCALAR_SUFFIX_UP "SYR2 ", &info);
 
-  if (alpha == Scalar(0)) return 1;
+  if (alpha == Scalar(0)) return;
 
   const Scalar *x_cpy = get_compact_vector(x, *n, *incx);
   const Scalar *y_cpy = get_compact_vector(y, *n, *incy);
 
   int code = UPLO(*uplo);
-  if (code >= 2 || func[code] == 0) return 0;
+  if (code >= 2 || func[code] == 0) return;
 
   func[code](*n, c, *ldc, x_cpy, y_cpy, alpha);
 
@@ -152,7 +150,6 @@
   //     return 0;
 
   //   func[code](*n, a, *inca, b, *incb, c, *ldc, alpha);
-  return 1;
 }
 
 /**  DSBMV  performs the matrix-vector  operation
@@ -162,7 +159,7 @@
  *  where alpha and beta are scalars, x and y are n element vectors and
  *  A is an n by n symmetric band matrix, with k super-diagonals.
  */
-// int EIGEN_BLAS_FUNC(sbmv)( char *uplo, int *n, int *k, RealScalar *alpha, RealScalar *a, int *lda,
+// EIGEN_BLAS_FUNC(sbmv)( char *uplo, int *n, int *k, RealScalar *alpha, RealScalar *a, int *lda,
 //                            RealScalar *x, int *incx, RealScalar *beta, RealScalar *y, int *incy)
 // {
 //   return 1;
@@ -176,7 +173,7 @@
  *  A is an n by n symmetric matrix, supplied in packed form.
  *
  */
-// int EIGEN_BLAS_FUNC(spmv)(char *uplo, int *n, RealScalar *alpha, RealScalar *ap, RealScalar *x, int *incx, RealScalar
+// EIGEN_BLAS_FUNC(spmv)(char *uplo, int *n, RealScalar *alpha, RealScalar *ap, RealScalar *x, int *incx, RealScalar
 // *beta, RealScalar *y, int *incy)
 // {
 //   return 1;
@@ -189,7 +186,7 @@
  *  where alpha is a real scalar, x is an n element vector and A is an
  *  n by n symmetric matrix, supplied in packed form.
  */
-int EIGEN_BLAS_FUNC(spr)(char *uplo, int *n, Scalar *palpha, Scalar *px, int *incx, Scalar *pap) {
+EIGEN_BLAS_FUNC(spr)(char *uplo, int *n, Scalar *palpha, Scalar *px, int *incx, Scalar *pap) {
   typedef void (*functype)(int, Scalar *, const Scalar *, Scalar);
   static const functype func[2] = {
       // array index: UP
@@ -209,20 +206,18 @@
     info = 2;
   else if (*incx == 0)
     info = 5;
-  if (info) return xerbla_(SCALAR_SUFFIX_UP "SPR  ", &info, 6);
+  if (info) return xerbla_(SCALAR_SUFFIX_UP "SPR  ", &info);
 
-  if (alpha == Scalar(0)) return 1;
+  if (alpha == Scalar(0)) return;
 
   Scalar *x_cpy = get_compact_vector(x, *n, *incx);
 
   int code = UPLO(*uplo);
-  if (code >= 2 || func[code] == 0) return 0;
+  if (code >= 2 || func[code] == 0) return;
 
   func[code](*n, ap, x_cpy, alpha);
 
   if (x_cpy != x) delete[] x_cpy;
-
-  return 1;
 }
 
 /**  DSPR2  performs the symmetric rank 2 operation
@@ -232,8 +227,8 @@
  *  where alpha is a scalar, x and y are n element vectors and A is an
  *  n by n symmetric matrix, supplied in packed form.
  */
-int EIGEN_BLAS_FUNC(spr2)(char *uplo, int *n, RealScalar *palpha, RealScalar *px, int *incx, RealScalar *py, int *incy,
-                          RealScalar *pap) {
+EIGEN_BLAS_FUNC(spr2)
+(char *uplo, int *n, RealScalar *palpha, RealScalar *px, int *incx, RealScalar *py, int *incy, RealScalar *pap) {
   typedef void (*functype)(int, Scalar *, const Scalar *, const Scalar *, Scalar);
   static const functype func[2] = {
       // array index: UP
@@ -256,22 +251,20 @@
     info = 5;
   else if (*incy == 0)
     info = 7;
-  if (info) return xerbla_(SCALAR_SUFFIX_UP "SPR2 ", &info, 6);
+  if (info) return xerbla_(SCALAR_SUFFIX_UP "SPR2 ", &info);
 
-  if (alpha == Scalar(0)) return 1;
+  if (alpha == Scalar(0)) return;
 
   Scalar *x_cpy = get_compact_vector(x, *n, *incx);
   Scalar *y_cpy = get_compact_vector(y, *n, *incy);
 
   int code = UPLO(*uplo);
-  if (code >= 2 || func[code] == 0) return 0;
+  if (code >= 2 || func[code] == 0) return;
 
   func[code](*n, ap, x_cpy, y_cpy, alpha);
 
   if (x_cpy != x) delete[] x_cpy;
   if (y_cpy != y) delete[] y_cpy;
-
-  return 1;
 }
 
 /**  DGER   performs the rank 1 operation
@@ -281,8 +274,8 @@
  *  where alpha is a scalar, x is an m element vector, y is an n element
  *  vector and A is an m by n matrix.
  */
-int EIGEN_BLAS_FUNC(ger)(int *m, int *n, Scalar *palpha, Scalar *px, int *incx, Scalar *py, int *incy, Scalar *pa,
-                         int *lda) {
+EIGEN_BLAS_FUNC(ger)
+(int *m, int *n, Scalar *palpha, Scalar *px, int *incx, Scalar *py, int *incy, Scalar *pa, int *lda) {
   Scalar *x = reinterpret_cast<Scalar *>(px);
   Scalar *y = reinterpret_cast<Scalar *>(py);
   Scalar *a = reinterpret_cast<Scalar *>(pa);
@@ -299,9 +292,9 @@
     info = 7;
   else if (*lda < std::max(1, *m))
     info = 9;
-  if (info) return xerbla_(SCALAR_SUFFIX_UP "GER  ", &info, 6);
+  if (info) return xerbla_(SCALAR_SUFFIX_UP "GER  ", &info);
 
-  if (alpha == Scalar(0)) return 1;
+  if (alpha == Scalar(0)) return;
 
   Scalar *x_cpy = get_compact_vector(x, *m, *incx);
   Scalar *y_cpy = get_compact_vector(y, *n, *incy);
@@ -310,6 +303,4 @@
 
   if (x_cpy != x) delete[] x_cpy;
   if (y_cpy != y) delete[] y_cpy;
-
-  return 1;
 }

diff --git a/blas/level3_impl.h b/blas/level3_impl.h
index 5af5027..a6ddf26 100644
--- a/blas/level3_impl.h
+++ b/blas/level3_impl.h

@@ -9,13 +9,14 @@
 #include <iostream>
 #include "common.h"
 
-int EIGEN_BLAS_FUNC(gemm)(const char *opa, const char *opb, const int *m, const int *n, const int *k,
-                          const RealScalar *palpha, const RealScalar *pa, const int *lda, const RealScalar *pb,
-                          const int *ldb, const RealScalar *pbeta, RealScalar *pc, const int *ldc) {
+EIGEN_BLAS_FUNC(gemm)
+(const char *opa, const char *opb, const int *m, const int *n, const int *k, const RealScalar *palpha,
+ const RealScalar *pa, const int *lda, const RealScalar *pb, const int *ldb, const RealScalar *pbeta, RealScalar *pc,
+ const int *ldc) {
   //   std::cerr << "in gemm " << *opa << " " << *opb << " " << *m << " " << *n << " " << *k << " " << *lda << " " <<
   //   *ldb << " " << *ldc << " " << *palpha << " " << *pbeta << "\n";
   typedef void (*functype)(DenseIndex, DenseIndex, DenseIndex, const Scalar *, DenseIndex, const Scalar *, DenseIndex,
-                           Scalar *, DenseIndex, DenseIndex, Scalar, internal::level3_blocking<Scalar, Scalar> &,
+                           Scalar *, DenseIndex, DenseIndex, Scalar, Eigen::internal::level3_blocking<Scalar, Scalar> &,
                            Eigen::internal::GemmParallelInfo<DenseIndex> *);
   static const functype func[12] = {
       // array index: NOTR  | (NOTR << 2)
@@ -72,9 +73,9 @@
     info = 10;
   else if (*ldc < std::max(1, *m))
     info = 13;
-  if (info) return xerbla_(SCALAR_SUFFIX_UP "GEMM ", &info, 6);
+  if (info) return xerbla_(SCALAR_SUFFIX_UP "GEMM ", &info);
 
-  if (*m == 0 || *n == 0) return 0;
+  if (*m == 0 || *n == 0) return;
 
   if (beta != Scalar(1)) {
     if (beta == Scalar(0))
@@ -83,22 +84,21 @@
       matrix(c, *m, *n, *ldc) *= beta;
   }
 
-  if (*k == 0) return 0;
+  if (*k == 0) return;
 
   internal::gemm_blocking_space<ColMajor, Scalar, Scalar, Dynamic, Dynamic, Dynamic> blocking(*m, *n, *k, 1, true);
 
   int code = OP(*opa) | (OP(*opb) << 2);
   func[code](*m, *n, *k, a, *lda, b, *ldb, c, 1, *ldc, alpha, blocking, 0);
-  return 0;
 }
 
-int EIGEN_BLAS_FUNC(trsm)(const char *side, const char *uplo, const char *opa, const char *diag, const int *m,
-                          const int *n, const RealScalar *palpha, const RealScalar *pa, const int *lda, RealScalar *pb,
-                          const int *ldb) {
+EIGEN_BLAS_FUNC(trsm)
+(const char *side, const char *uplo, const char *opa, const char *diag, const int *m, const int *n,
+ const RealScalar *palpha, const RealScalar *pa, const int *lda, RealScalar *pb, const int *ldb) {
   //   std::cerr << "in trsm " << *side << " " << *uplo << " " << *opa << " " << *diag << " " << *m << "," << *n << " "
   //   << *palpha << " " << *lda << " " << *ldb<< "\n";
   typedef void (*functype)(DenseIndex, DenseIndex, const Scalar *, DenseIndex, Scalar *, DenseIndex, DenseIndex,
-                           internal::level3_blocking<Scalar, Scalar> &);
+                           Eigen::internal::level3_blocking<Scalar, Scalar> &);
   static const functype func[32] = {
       // array index: NOTR  | (LEFT  << 2) | (UP << 3) | (NUNIT << 4)
       (internal::triangular_solve_matrix<Scalar, DenseIndex, OnTheLeft, Upper | 0, false, ColMajor, ColMajor, 1>::run),
@@ -190,9 +190,9 @@
     info = 9;
   else if (*ldb < std::max(1, *m))
     info = 11;
-  if (info) return xerbla_(SCALAR_SUFFIX_UP "TRSM ", &info, 6);
+  if (info) return xerbla_(SCALAR_SUFFIX_UP "TRSM ", &info);
 
-  if (*m == 0 || *n == 0) return 0;
+  if (*m == 0 || *n == 0) return;
 
   int code = OP(*opa) | (SIDE(*side) << 2) | (UPLO(*uplo) << 3) | (DIAG(*diag) << 4);
 
@@ -207,15 +207,13 @@
   }
 
   if (alpha != Scalar(1)) matrix(b, *m, *n, *ldb) *= alpha;
-
-  return 0;
 }
 
 // b = alpha*op(a)*b  for side = 'L'or'l'
 // b = alpha*b*op(a)  for side = 'R'or'r'
-int EIGEN_BLAS_FUNC(trmm)(const char *side, const char *uplo, const char *opa, const char *diag, const int *m,
-                          const int *n, const RealScalar *palpha, const RealScalar *pa, const int *lda, RealScalar *pb,
-                          const int *ldb) {
+EIGEN_BLAS_FUNC(trmm)
+(const char *side, const char *uplo, const char *opa, const char *diag, const int *m, const int *n,
+ const RealScalar *palpha, const RealScalar *pa, const int *lda, RealScalar *pb, const int *ldb) {
   //   std::cerr << "in trmm " << *side << " " << *uplo << " " << *opa << " " << *diag << " " << *m << " " << *n << " "
   //   << *lda << " " << *ldb << " " << *palpha << "\n";
   typedef void (*functype)(DenseIndex, DenseIndex, DenseIndex, const Scalar *, DenseIndex, const Scalar *, DenseIndex,
@@ -324,11 +322,11 @@
     info = 9;
   else if (*ldb < std::max(1, *m))
     info = 11;
-  if (info) return xerbla_(SCALAR_SUFFIX_UP "TRMM ", &info, 6);
+  if (info) return xerbla_(SCALAR_SUFFIX_UP "TRMM ", &info);
 
   int code = OP(*opa) | (SIDE(*side) << 2) | (UPLO(*uplo) << 3) | (DIAG(*diag) << 4);
 
-  if (*m == 0 || *n == 0) return 1;
+  if (*m == 0 || *n == 0) return;
 
   // FIXME find a way to avoid this copy
   Matrix<Scalar, Dynamic, Dynamic, ColMajor> tmp = matrix(b, *m, *n, *ldb);
@@ -343,14 +341,13 @@
                                                                                                    false);
     func[code](*m, *n, *n, tmp.data(), tmp.outerStride(), a, *lda, b, 1, *ldb, alpha, blocking);
   }
-  return 1;
 }
 
 // c = alpha*a*b + beta*c  for side = 'L'or'l'
 // c = alpha*b*a + beta*c  for side = 'R'or'r
-int EIGEN_BLAS_FUNC(symm)(const char *side, const char *uplo, const int *m, const int *n, const RealScalar *palpha,
-                          const RealScalar *pa, const int *lda, const RealScalar *pb, const int *ldb,
-                          const RealScalar *pbeta, RealScalar *pc, const int *ldc) {
+EIGEN_BLAS_FUNC(symm)
+(const char *side, const char *uplo, const int *m, const int *n, const RealScalar *palpha, const RealScalar *pa,
+ const int *lda, const RealScalar *pb, const int *ldb, const RealScalar *pbeta, RealScalar *pc, const int *ldc) {
   //   std::cerr << "in symm " << *side << " " << *uplo << " " << *m << "x" << *n << " lda:" << *lda << " ldb:" << *ldb
   //   << " ldc:" << *ldc << " alpha:" << *palpha << " beta:" << *pbeta << "\n";
   const Scalar *a = reinterpret_cast<const Scalar *>(pa);
@@ -374,7 +371,7 @@
     info = 9;
   else if (*ldc < std::max(1, *m))
     info = 12;
-  if (info) return xerbla_(SCALAR_SUFFIX_UP "SYMM ", &info, 6);
+  if (info) return xerbla_(SCALAR_SUFFIX_UP "SYMM ", &info);
 
   if (beta != Scalar(1)) {
     if (beta == Scalar(0))
@@ -383,9 +380,7 @@
       matrix(c, *m, *n, *ldc) *= beta;
   }
 
-  if (*m == 0 || *n == 0) {
-    return 1;
-  }
+  if (*m == 0 || *n == 0) return;
 
   int size = (SIDE(*side) == LEFT) ? (*m) : (*n);
 #if ISCOMPLEX
@@ -413,7 +408,7 @@
       internal::product_selfadjoint_matrix<Scalar, DenseIndex, ColMajor, true, false, ColMajor, false, false, ColMajor,
                                            1>::run(*m, *n, a, *lda, b, *ldb, c, 1, *ldc, alpha, blocking);
     else
-      return 0;
+      return;
   else if (SIDE(*side) == RIGHT)
     if (UPLO(*uplo) == UP)
       internal::product_selfadjoint_matrix<Scalar, DenseIndex, ColMajor, false, false, RowMajor, true, false, ColMajor,
@@ -422,19 +417,17 @@
       internal::product_selfadjoint_matrix<Scalar, DenseIndex, ColMajor, false, false, ColMajor, true, false, ColMajor,
                                            1>::run(*m, *n, b, *ldb, a, *lda, c, 1, *ldc, alpha, blocking);
     else
-      return 0;
+      return;
   else
-    return 0;
+    return;
 #endif
-
-  return 0;
 }
 
 // c = alpha*a*a' + beta*c  for op = 'N'or'n'
 // c = alpha*a'*a + beta*c  for op = 'T'or't','C'or'c'
-int EIGEN_BLAS_FUNC(syrk)(const char *uplo, const char *op, const int *n, const int *k, const RealScalar *palpha,
-                          const RealScalar *pa, const int *lda, const RealScalar *pbeta, RealScalar *pc,
-                          const int *ldc) {
+EIGEN_BLAS_FUNC(syrk)
+(const char *uplo, const char *op, const int *n, const int *k, const RealScalar *palpha, const RealScalar *pa,
+ const int *lda, const RealScalar *pbeta, RealScalar *pc, const int *ldc) {
   //   std::cerr << "in syrk " << *uplo << " " << *op << " " << *n << " " << *k << " " << *palpha << " " << *lda << " "
   //   << *pbeta << " " << *ldc << "\n";
 #if !ISCOMPLEX
@@ -481,7 +474,7 @@
     info = 7;
   else if (*ldc < std::max(1, *n))
     info = 10;
-  if (info) return xerbla_(SCALAR_SUFFIX_UP "SYRK ", &info, 6);
+  if (info) return xerbla_(SCALAR_SUFFIX_UP "SYRK ", &info);
 
   if (beta != Scalar(1)) {
     if (UPLO(*uplo) == UP)
@@ -495,7 +488,7 @@
       matrix(c, *n, *n, *ldc).triangularView<Lower>() *= beta;
   }
 
-  if (*n == 0 || *k == 0) return 0;
+  if (*n == 0 || *k == 0) return;
 
 #if ISCOMPLEX
   // FIXME add support for symmetric complex matrix
@@ -520,15 +513,13 @@
   int code = OP(*op) | (UPLO(*uplo) << 2);
   func[code](*n, *k, a, *lda, a, *lda, c, 1, *ldc, alpha, blocking);
 #endif
-
-  return 0;
 }
 
 // c = alpha*a*b' + alpha*b*a' + beta*c  for op = 'N'or'n'
 // c = alpha*a'*b + alpha*b'*a + beta*c  for op = 'T'or't'
-int EIGEN_BLAS_FUNC(syr2k)(const char *uplo, const char *op, const int *n, const int *k, const RealScalar *palpha,
-                           const RealScalar *pa, const int *lda, const RealScalar *pb, const int *ldb,
-                           const RealScalar *pbeta, RealScalar *pc, const int *ldc) {
+EIGEN_BLAS_FUNC(syr2k)
+(const char *uplo, const char *op, const int *n, const int *k, const RealScalar *palpha, const RealScalar *pa,
+ const int *lda, const RealScalar *pb, const int *ldb, const RealScalar *pbeta, RealScalar *pc, const int *ldc) {
   const Scalar *a = reinterpret_cast<const Scalar *>(pa);
   const Scalar *b = reinterpret_cast<const Scalar *>(pb);
   Scalar *c = reinterpret_cast<Scalar *>(pc);
@@ -553,7 +544,7 @@
     info = 9;
   else if (*ldc < std::max(1, *n))
     info = 12;
-  if (info) return xerbla_(SCALAR_SUFFIX_UP "SYR2K", &info, 6);
+  if (info) return xerbla_(SCALAR_SUFFIX_UP "SYR2K", &info);
 
   if (beta != Scalar(1)) {
     if (UPLO(*uplo) == UP)
@@ -567,7 +558,7 @@
       matrix(c, *n, *n, *ldc).triangularView<Lower>() *= beta;
   }
 
-  if (*k == 0) return 1;
+  if (*k == 0) return;
 
   if (OP(*op) == NOTR) {
     if (UPLO(*uplo) == UP) {
@@ -588,17 +579,15 @@
           alpha * matrix(a, *k, *n, *lda).transpose() * matrix(b, *k, *n, *ldb) +
           alpha * matrix(b, *k, *n, *ldb).transpose() * matrix(a, *k, *n, *lda);
   }
-
-  return 0;
 }
 
 #if ISCOMPLEX
 
 // c = alpha*a*b + beta*c  for side = 'L'or'l'
 // c = alpha*b*a + beta*c  for side = 'R'or'r
-int EIGEN_BLAS_FUNC(hemm)(const char *side, const char *uplo, const int *m, const int *n, const RealScalar *palpha,
-                          const RealScalar *pa, const int *lda, const RealScalar *pb, const int *ldb,
-                          const RealScalar *pbeta, RealScalar *pc, const int *ldc) {
+EIGEN_BLAS_FUNC(hemm)
+(const char *side, const char *uplo, const int *m, const int *n, const RealScalar *palpha, const RealScalar *pa,
+ const int *lda, const RealScalar *pb, const int *ldb, const RealScalar *pbeta, RealScalar *pc, const int *ldc) {
   const Scalar *a = reinterpret_cast<const Scalar *>(pa);
   const Scalar *b = reinterpret_cast<const Scalar *>(pb);
   Scalar *c = reinterpret_cast<Scalar *>(pc);
@@ -623,16 +612,14 @@
     info = 9;
   else if (*ldc < std::max(1, *m))
     info = 12;
-  if (info) return xerbla_(SCALAR_SUFFIX_UP "HEMM ", &info, 6);
+  if (info) return xerbla_(SCALAR_SUFFIX_UP "HEMM ", &info);
 
   if (beta == Scalar(0))
     matrix(c, *m, *n, *ldc).setZero();
   else if (beta != Scalar(1))
     matrix(c, *m, *n, *ldc) *= beta;
 
-  if (*m == 0 || *n == 0) {
-    return 1;
-  }
+  if (*m == 0 || *n == 0) return;
 
   int size = (SIDE(*side) == LEFT) ? (*m) : (*n);
   internal::gemm_blocking_space<ColMajor, Scalar, Scalar, Dynamic, Dynamic, Dynamic> blocking(*m, *n, size, 1, false);
@@ -645,7 +632,7 @@
       internal::product_selfadjoint_matrix<Scalar, DenseIndex, ColMajor, true, false, ColMajor, false, false, ColMajor,
                                            1>::run(*m, *n, a, *lda, b, *ldb, c, 1, *ldc, alpha, blocking);
     else
-      return 0;
+      return;
   } else if (SIDE(*side) == RIGHT) {
     if (UPLO(*uplo) == UP)
       matrix(c, *m, *n, *ldc) +=
@@ -658,24 +645,22 @@
       internal::product_selfadjoint_matrix<Scalar, DenseIndex, ColMajor, false, false, ColMajor, true, false, ColMajor,
                                            1>::run(*m, *n, b, *ldb, a, *lda, c, 1, *ldc, alpha, blocking);
     else
-      return 0;
+      return;
   } else {
-    return 0;
+    return;
   }
-
-  return 0;
 }
 
 // c = alpha*a*conj(a') + beta*c  for op = 'N'or'n'
 // c = alpha*conj(a')*a + beta*c  for op  = 'C'or'c'
-int EIGEN_BLAS_FUNC(herk)(const char *uplo, const char *op, const int *n, const int *k, const RealScalar *palpha,
-                          const RealScalar *pa, const int *lda, const RealScalar *pbeta, RealScalar *pc,
-                          const int *ldc) {
+EIGEN_BLAS_FUNC(herk)
+(const char *uplo, const char *op, const int *n, const int *k, const RealScalar *palpha, const RealScalar *pa,
+ const int *lda, const RealScalar *pbeta, RealScalar *pc, const int *ldc) {
   //   std::cerr << "in herk " << *uplo << " " << *op << " " << *n << " " << *k << " " << *palpha << " " << *lda << " "
   //   << *pbeta << " " << *ldc << "\n";
 
   typedef void (*functype)(DenseIndex, DenseIndex, const Scalar *, DenseIndex, const Scalar *, DenseIndex, Scalar *,
-                           DenseIndex, DenseIndex, const Scalar &, internal::level3_blocking<Scalar, Scalar> &);
+                           DenseIndex, DenseIndex, const Scalar &, Eigen::internal::level3_blocking<Scalar, Scalar> &);
   static const functype func[8] = {
       // array index: NOTR  | (UP << 2)
       (internal::general_matrix_matrix_triangular_product<DenseIndex, Scalar, ColMajor, false, Scalar, RowMajor, Conj,
@@ -715,7 +700,7 @@
     info = 7;
   else if (*ldc < std::max(1, *n))
     info = 10;
-  if (info) return xerbla_(SCALAR_SUFFIX_UP "HERK ", &info, 6);
+  if (info) return xerbla_(SCALAR_SUFFIX_UP "HERK ", &info);
 
   int code = OP(*op) | (UPLO(*uplo) << 2);
 
@@ -741,14 +726,13 @@
     func[code](*n, *k, a, *lda, a, *lda, c, 1, *ldc, alpha, blocking);
     matrix(c, *n, *n, *ldc).diagonal().imag().setZero();
   }
-  return 0;
 }
 
 // c = alpha*a*conj(b') + conj(alpha)*b*conj(a') + beta*c,  for op = 'N'or'n'
 // c = alpha*conj(a')*b + conj(alpha)*conj(b')*a + beta*c,  for op = 'C'or'c'
-int EIGEN_BLAS_FUNC(her2k)(const char *uplo, const char *op, const int *n, const int *k, const RealScalar *palpha,
-                           const RealScalar *pa, const int *lda, const RealScalar *pb, const int *ldb,
-                           const RealScalar *pbeta, RealScalar *pc, const int *ldc) {
+EIGEN_BLAS_FUNC(her2k)
+(const char *uplo, const char *op, const int *n, const int *k, const RealScalar *palpha, const RealScalar *pa,
+ const int *lda, const RealScalar *pb, const int *ldb, const RealScalar *pbeta, RealScalar *pc, const int *ldc) {
   const Scalar *a = reinterpret_cast<const Scalar *>(pa);
   const Scalar *b = reinterpret_cast<const Scalar *>(pb);
   Scalar *c = reinterpret_cast<Scalar *>(pc);
@@ -773,7 +757,7 @@
     info = 9;
   else if (*ldc < std::max(1, *n))
     info = 12;
-  if (info) return xerbla_(SCALAR_SUFFIX_UP "HER2K", &info, 6);
+  if (info) return xerbla_(SCALAR_SUFFIX_UP "HER2K", &info);
 
   if (beta != RealScalar(1)) {
     if (UPLO(*uplo) == UP)
@@ -793,7 +777,7 @@
   } else if (*k > 0 && alpha != Scalar(0))
     matrix(c, *n, *n, *ldc).diagonal().imag().setZero();
 
-  if (*k == 0) return 1;
+  if (*k == 0) return;
 
   if (OP(*op) == NOTR) {
     if (UPLO(*uplo) == UP) {
@@ -814,8 +798,6 @@
           alpha * matrix(a, *k, *n, *lda).adjoint() * matrix(b, *k, *n, *ldb) +
           numext::conj(alpha) * matrix(b, *k, *n, *ldb).adjoint() * matrix(a, *k, *n, *lda);
   }
-
-  return 1;
 }
 
 #endif  // ISCOMPLEX

diff --git a/blas/single.cpp b/blas/single.cpp
index 9afea35..fefb695 100644
--- a/blas/single.cpp
+++ b/blas/single.cpp

@@ -18,6 +18,6 @@
 #include "level2_real_impl.h"
 #include "level3_impl.h"
 
-float EIGEN_BLAS_FUNC(dsdot)(int* n, float* alpha, float* x, int* incx, float* y, int* incy) {
+float EIGEN_BLAS_FUNC_NAME(dsdot)(int* n, float* alpha, float* x, int* incx, float* y, int* incy) {
   return double(*alpha) + BLASFUNC(dsdot)(n, x, incx, y, incy);
 }

diff --git a/blas/xerbla.cpp b/blas/xerbla.cpp
index 698e89f..0b59c4a 100644
--- a/blas/xerbla.cpp
+++ b/blas/xerbla.cpp

@@ -11,10 +11,7 @@
 extern "C" {
 #endif
 
-EIGEN_WEAK_LINKING int xerbla_(const char *msg, int *info, int) {
-  printf("Eigen BLAS ERROR #%i: %s\n", *info, msg);
-  return 0;
-}
+EIGEN_WEAK_LINKING void xerbla_(const char *msg, int *info) { printf("Eigen BLAS ERROR #%i: %s\n", *info, msg); }
 
 #ifdef __cplusplus
 }

diff --git a/debug/msvc/eigen.natvis b/debug/msvc/eigen.natvis
index 22cf346..da89857 100644
--- a/debug/msvc/eigen.natvis
+++ b/debug/msvc/eigen.natvis

@@ -1,235 +1,235 @@
-<?xml version="1.0" encoding="utf-8"?>
-
-<AutoVisualizer xmlns="http://schemas.microsoft.com/vstudio/debugger/natvis/2010">
-
-  <!-- Fixed x Fixed Matrix -->
-  <Type Name="Eigen::Matrix&lt;*,*,*,*,*,*&gt;">      
-      <AlternativeType Name="Eigen::Array&lt;*,-1,-1,*,*,*&gt;"/>
-      <DisplayString>[{$T2}, {$T3}] (fixed matrix)</DisplayString>
-      <Expand>
-        <ArrayItems Condition="Flags%2"> <!-- row major layout -->
-          <Rank>2</Rank>
-          <Size>$i==0 ? $T2 : $T3</Size>
-          <ValuePointer>m_storage.m_data.array</ValuePointer>
-        </ArrayItems>
-        <ArrayItems Condition="!(Flags%2)"> <!-- column major layout -->
-          <Direction>Backward</Direction>
-          <Rank>2</Rank>
-          <Size>$i==0 ? $T2 : $T3</Size>
-          <ValuePointer>m_storage.m_data.array</ValuePointer>
-        </ArrayItems>
-      </Expand>
-  </Type>
-  
-  <!-- 2 x 2 Matrix -->
-  <Type Name="Eigen::Matrix&lt;*,2,2,*,*,*&gt;">      
-      <AlternativeType Name="Eigen::Array&lt;*,2,2,*,*,*&gt;"/>
-      <DisplayString>[2, 2] (fixed matrix)</DisplayString>
-      <Expand>
-        <Synthetic Name="[row 0]" Condition="Flags%2">
-          <DisplayString>({m_storage.m_data.array[0]}, {m_storage.m_data.array[1]})</DisplayString>
-        </Synthetic>
-        <Synthetic Name="[row 0]" Condition="!(Flags%2)">
-          <DisplayString>({m_storage.m_data.array[0]}, {m_storage.m_data.array[2]})</DisplayString>
-        </Synthetic>
-        <Synthetic Name="[row 1]" Condition="Flags%2">
-          <DisplayString>({m_storage.m_data.array[2]}, {m_storage.m_data.array[3]})</DisplayString>
-        </Synthetic>
-        <Synthetic Name="[row 1]" Condition="!(Flags%2)">
-          <DisplayString>({m_storage.m_data.array[1]}, {m_storage.m_data.array[3]})</DisplayString>
-        </Synthetic>        
-      </Expand>
-  </Type>
-  
-  <!-- 3 x 3 Matrix -->
-  <Type Name="Eigen::Matrix&lt;*,3,3,*,*,*&gt;">      
-      <AlternativeType Name="Eigen::Array&lt;*,3,3,*,*,*&gt;"/>
-      <DisplayString>[3, 3] (fixed matrix)</DisplayString>
-      <Expand>
-        <Synthetic Name="[row 0]" Condition="Flags%2">
-          <DisplayString>({m_storage.m_data.array[0]}, {m_storage.m_data.array[1]}, {m_storage.m_data.array[2]})</DisplayString>
-        </Synthetic>
-        <Synthetic Name="[row 0]" Condition="!(Flags%2)">
-          <DisplayString>({m_storage.m_data.array[0]}, {m_storage.m_data.array[3]}, {m_storage.m_data.array[6]})</DisplayString>
-        </Synthetic>
-        <Synthetic Name="[row 1]" Condition="Flags%2">
-          <DisplayString>({m_storage.m_data.array[3]}, {m_storage.m_data.array[4]}, {m_storage.m_data.array[5]})</DisplayString>
-        </Synthetic>
-        <Synthetic Name="[row 1]" Condition="!(Flags%2)">
-          <DisplayString>({m_storage.m_data.array[1]}, {m_storage.m_data.array[4]}, {m_storage.m_data.array[7]})</DisplayString>
-        </Synthetic>
-        <Synthetic Name="[row 2]" Condition="Flags%2">
-          <DisplayString>({m_storage.m_data.array[6]}, {m_storage.m_data.array[7]}, {m_storage.m_data.array[8]})</DisplayString>
-        </Synthetic>
-        <Synthetic Name="[row 2]" Condition="!(Flags%2)">
-          <DisplayString>({m_storage.m_data.array[2]}, {m_storage.m_data.array[5]}, {m_storage.m_data.array[8]})</DisplayString>
-        </Synthetic>        
-      </Expand>
-  </Type>
-  
-  <!-- 4 x 4 Matrix -->
-  <Type Name="Eigen::Matrix&lt;*,4,4,*,*,*&gt;">      
-      <AlternativeType Name="Eigen::Array&lt;*,4,4,*,*,*&gt;"/>
-      <DisplayString>[4, 4] (fixed matrix)</DisplayString>
-      <Expand>
-        <Synthetic Name="[row 0]" Condition="Flags%2">
-          <DisplayString>({m_storage.m_data.array[0]}, {m_storage.m_data.array[1]}, {m_storage.m_data.array[2]}, {m_storage.m_data.array[3]})</DisplayString>
-        </Synthetic>
-        <Synthetic Name="[row 0]" Condition="!(Flags%2)">
-          <DisplayString>({m_storage.m_data.array[0]}, {m_storage.m_data.array[4]}, {m_storage.m_data.array[8]}, {m_storage.m_data.array[12]})</DisplayString>
-        </Synthetic>
-        <Synthetic Name="[row 1]" Condition="Flags%2">
-          <DisplayString>({m_storage.m_data.array[4]}, {m_storage.m_data.array[5]}, {m_storage.m_data.array[6]}, {m_storage.m_data.array[7]})</DisplayString>
-        </Synthetic>
-        <Synthetic Name="[row 1]" Condition="!(Flags%2)">
-          <DisplayString>({m_storage.m_data.array[1]}, {m_storage.m_data.array[5]}, {m_storage.m_data.array[9]}, {m_storage.m_data.array[13]})</DisplayString>
-        </Synthetic>
-        <Synthetic Name="[row 2]" Condition="Flags%2">
-          <DisplayString>({m_storage.m_data.array[8]}, {m_storage.m_data.array[9]}, {m_storage.m_data.array[10]}, {m_storage.m_data.array[11]})</DisplayString>
-        </Synthetic>
-        <Synthetic Name="[row 2]" Condition="!(Flags%2)">
-          <DisplayString>({m_storage.m_data.array[2]}, {m_storage.m_data.array[6]}, {m_storage.m_data.array[10]}, {m_storage.m_data.array[14]})</DisplayString>
-        </Synthetic>
-        <Synthetic Name="[row 3]" Condition="Flags%2">
-          <DisplayString>({m_storage.m_data.array[12]}, {m_storage.m_data.array[13]}, {m_storage.m_data.array[14]}, {m_storage.m_data.array[15]})</DisplayString>
-        </Synthetic>
-        <Synthetic Name="[row 3]" Condition="!(Flags%2)">
-          <DisplayString>({m_storage.m_data.array[3]}, {m_storage.m_data.array[7]}, {m_storage.m_data.array[11]}, {m_storage.m_data.array[15]})</DisplayString>
-        </Synthetic>        
-      </Expand>
-  </Type>  
-  
-  <!-- Dynamic x Dynamic Matrix -->
-  <Type Name="Eigen::Matrix&lt;*,-1,-1,*,*,*&gt;">      
-      <AlternativeType Name="Eigen::Array&lt;*,-1,-1,*,*,*&gt;"/>
-      <DisplayString Condition="m_storage.m_data == 0">empty</DisplayString>
-      <DisplayString Condition="m_storage.m_data != 0">[{m_storage.m_rows}, {m_storage.m_cols}] (dynamic matrix)</DisplayString>
-      <Expand>
-        <ArrayItems Condition="Flags%2"> <!-- row major layout -->
-          <Rank>2</Rank>
-          <Size>$i==0 ? m_storage.m_rows : m_storage.m_cols</Size>
-          <ValuePointer>m_storage.m_data</ValuePointer>
-        </ArrayItems>
-        <ArrayItems Condition="!(Flags%2)"> <!-- column major layout -->
-          <Direction>Backward</Direction>
-          <Rank>2</Rank>
-          <Size>$i==0 ? m_storage.m_rows : m_storage.m_cols</Size>
-          <ValuePointer>m_storage.m_data</ValuePointer>
-        </ArrayItems>
-      </Expand>
-  </Type>
-  
-  <!-- Fixed x Dynamic Matrix -->
-  <Type Name="Eigen::Matrix&lt;*,*,-1,*,*,*&gt;">
-      <AlternativeType Name="Eigen::Array&lt;*,*,-1,*,*,*&gt;"/>
-      <DisplayString Condition="m_storage.m_data == 0">empty</DisplayString>
-      <DisplayString Condition="m_storage.m_data != 0">[{$T2}, {m_storage.m_cols}] (dynamic column matrix)</DisplayString>
-      <Expand>
-        <ArrayItems Condition="Flags%2"> <!-- row major layout -->
-          <Rank>2</Rank>
-          <Size>$i==0 ? $T2 : m_storage.m_cols</Size>
-          <ValuePointer>m_storage.m_data</ValuePointer>
-        </ArrayItems>
-        <ArrayItems Condition="!(Flags%2)"> <!-- column major layout -->
-          <Direction>Backward</Direction>
-          <Rank>2</Rank>
-          <Size>$i==0 ? $T2 : m_storage.m_cols</Size>
-          <ValuePointer>m_storage.m_data</ValuePointer>
-        </ArrayItems>
-      </Expand>
-  </Type>
-  
-  <!-- Dynamic x Fixed Matrix -->
-  <Type Name="Eigen::Matrix&lt;*,-1,*,*,*,*&gt;">
-      <AlternativeType Name="Eigen::Array&lt;*,-1,*,*,*,*&gt;"/>
-      <DisplayString Condition="m_storage.m_data == 0">empty</DisplayString>
-      <DisplayString Condition="m_storage.m_data != 0">[{m_storage.m_rows}, {$T2}] (dynamic row matrix)</DisplayString>
-      <Expand>
-        <ArrayItems Condition="Flags%2"> <!-- row major layout -->
-          <Rank>2</Rank>
-          <Size>$i==0 ? m_storage.m_rows : $T2</Size>
-          <ValuePointer>m_storage.m_data</ValuePointer>
-        </ArrayItems>
-        <ArrayItems Condition="!(Flags%2)"> <!-- column major layout -->
-          <Direction>Backward</Direction>
-          <Rank>2</Rank>
-          <Size>$i==0 ? m_storage.m_rows : $T2</Size>
-          <ValuePointer>m_storage.m_data</ValuePointer>
-        </ArrayItems>
-      </Expand>
-  </Type>
-  
-  <!-- Dynamic Column Vector -->
-  <Type Name="Eigen::Matrix&lt;*,1,-1,*,*,*&gt;">
-      <AlternativeType Name="Eigen::Array&lt;*,1,-1,*,*,*&gt;"/>
-      <DisplayString Condition="m_storage.m_data == 0">empty</DisplayString>
-      <DisplayString Condition="m_storage.m_data != 0">[{m_storage.m_cols}] (dynamic column vector)</DisplayString>
-      <Expand>
-        <Item Name="[size]">m_storage.m_cols</Item>
-        <ArrayItems>
-          <Size>m_storage.m_cols</Size>
-          <ValuePointer>m_storage.m_data</ValuePointer>
-        </ArrayItems>
-      </Expand>
-  </Type>
-  
-  <!-- Dynamic Row Vector -->
-  <Type Name="Eigen::Matrix&lt;*,-1,1,*,*,*&gt;">
-      <AlternativeType Name="Eigen::Array&lt;*,-1,1,*,*,*&gt;"/>
-      <DisplayString Condition="m_storage.m_data == 0">empty</DisplayString>
-      <DisplayString Condition="m_storage.m_data != 0">[{m_storage.m_rows}] (dynamic row vector)</DisplayString>
-      <Expand>
-        <Item Name="[size]">m_storage.m_rows</Item>
-        <ArrayItems>
-          <Size>m_storage.m_rows</Size>
-          <ValuePointer>m_storage.m_data</ValuePointer>
-        </ArrayItems>
-      </Expand>
-  </Type>
-  
-  <!-- Fixed Vector -->
-  <Type Name="Eigen::Matrix&lt;*,1,1,*,*,*&gt;">
-      <AlternativeType Name="Eigen::Array&lt;*,1,1,*,*,*&gt;"/>
-      <DisplayString>[1] ({m_storage.m_data.array[0]})</DisplayString>
-      <Expand>
-        <Item Name="[x]">m_storage.m_data.array[0]</Item>
-      </Expand>
-  </Type>
-  
-  <Type Name="Eigen::Matrix&lt;*,2,1,*,*,*&gt;">
-      <AlternativeType Name="Eigen::Matrix&lt;*,1,2,*,*,*&gt;"/>
-      <AlternativeType Name="Eigen::Array&lt;*,2,1,*,*,*&gt;"/>
-      <AlternativeType Name="Eigen::Array&lt;*,1,2,*,*,*&gt;"/>
-      <DisplayString>[2] ({m_storage.m_data.array[0]}, {m_storage.m_data.array[1]})</DisplayString>
-      <Expand>
-        <Item Name="[x]">m_storage.m_data.array[0]</Item>
-        <Item Name="[y]">m_storage.m_data.array[1]</Item>
-      </Expand>
-  </Type>
-  
-  <Type Name="Eigen::Matrix&lt;*,3,1,*,*,*&gt;">
-      <AlternativeType Name="Eigen::Matrix&lt;*,1,3,*,*,*&gt;"/>
-      <AlternativeType Name="Eigen::Array&lt;*,3,1,*,*,*&gt;"/>
-      <AlternativeType Name="Eigen::Array&lt;*,1,3,*,*,*&gt;"/>
-      <DisplayString>[3] ({m_storage.m_data.array[0]}, {m_storage.m_data.array[1]}, {m_storage.m_data.array[2]})</DisplayString>
-      <Expand>
-        <Item Name="[x]">m_storage.m_data.array[0]</Item>
-        <Item Name="[y]">m_storage.m_data.array[1]</Item>
-        <Item Name="[z]">m_storage.m_data.array[2]</Item>
-      </Expand>
-  </Type>
-  
-    <Type Name="Eigen::Matrix&lt;*,4,1,*,*,*&gt;">
-      <AlternativeType Name="Eigen::Matrix&lt;*,1,4,*,*,*&gt;"/>
-      <AlternativeType Name="Eigen::Array&lt;*,4,1,*,*,*&gt;"/>
-      <AlternativeType Name="Eigen::Array&lt;*,1,4,*,*,*&gt;"/>
-      <DisplayString>[4] ({m_storage.m_data.array[0]}, {m_storage.m_data.array[1]}, {m_storage.m_data.array[2]}, {m_storage.m_data.array[3]})</DisplayString>
-      <Expand>
-        <Item Name="[x]">m_storage.m_data.array[0]</Item>
-        <Item Name="[y]">m_storage.m_data.array[1]</Item>
-        <Item Name="[z]">m_storage.m_data.array[2]</Item>
-        <Item Name="[w]">m_storage.m_data.array[3]</Item>
-      </Expand>
-  </Type>
-
-</AutoVisualizer>
+<?xml version="1.0" encoding="utf-8"?>

+

+<AutoVisualizer xmlns="http://schemas.microsoft.com/vstudio/debugger/natvis/2010">

+

+  <!-- Fixed x Fixed Matrix -->

+  <Type Name="Eigen::Matrix&lt;*,*,*,*,*,*&gt;">      

+      <AlternativeType Name="Eigen::Array&lt;*,-1,-1,*,*,*&gt;"/>

+      <DisplayString>[{$T2}, {$T3}] (fixed matrix)</DisplayString>

+      <Expand>

+        <ArrayItems Condition="Flags%2"> <!-- row major layout -->

+          <Rank>2</Rank>

+          <Size>$i==0 ? $T2 : $T3</Size>

+          <ValuePointer>m_storage.m_data.array</ValuePointer>

+        </ArrayItems>

+        <ArrayItems Condition="!(Flags%2)"> <!-- column major layout -->

+          <Direction>Backward</Direction>

+          <Rank>2</Rank>

+          <Size>$i==0 ? $T2 : $T3</Size>

+          <ValuePointer>m_storage.m_data.array</ValuePointer>

+        </ArrayItems>

+      </Expand>

+  </Type>

+  

+  <!-- 2 x 2 Matrix -->

+  <Type Name="Eigen::Matrix&lt;*,2,2,*,*,*&gt;">      

+      <AlternativeType Name="Eigen::Array&lt;*,2,2,*,*,*&gt;"/>

+      <DisplayString>[2, 2] (fixed matrix)</DisplayString>

+      <Expand>

+        <Synthetic Name="[row 0]" Condition="Flags%2">

+          <DisplayString>({m_storage.m_data.array[0]}, {m_storage.m_data.array[1]})</DisplayString>

+        </Synthetic>

+        <Synthetic Name="[row 0]" Condition="!(Flags%2)">

+          <DisplayString>({m_storage.m_data.array[0]}, {m_storage.m_data.array[2]})</DisplayString>

+        </Synthetic>

+        <Synthetic Name="[row 1]" Condition="Flags%2">

+          <DisplayString>({m_storage.m_data.array[2]}, {m_storage.m_data.array[3]})</DisplayString>

+        </Synthetic>

+        <Synthetic Name="[row 1]" Condition="!(Flags%2)">

+          <DisplayString>({m_storage.m_data.array[1]}, {m_storage.m_data.array[3]})</DisplayString>

+        </Synthetic>        

+      </Expand>

+  </Type>

+  

+  <!-- 3 x 3 Matrix -->

+  <Type Name="Eigen::Matrix&lt;*,3,3,*,*,*&gt;">      

+      <AlternativeType Name="Eigen::Array&lt;*,3,3,*,*,*&gt;"/>

+      <DisplayString>[3, 3] (fixed matrix)</DisplayString>

+      <Expand>

+        <Synthetic Name="[row 0]" Condition="Flags%2">

+          <DisplayString>({m_storage.m_data.array[0]}, {m_storage.m_data.array[1]}, {m_storage.m_data.array[2]})</DisplayString>

+        </Synthetic>

+        <Synthetic Name="[row 0]" Condition="!(Flags%2)">

+          <DisplayString>({m_storage.m_data.array[0]}, {m_storage.m_data.array[3]}, {m_storage.m_data.array[6]})</DisplayString>

+        </Synthetic>

+        <Synthetic Name="[row 1]" Condition="Flags%2">

+          <DisplayString>({m_storage.m_data.array[3]}, {m_storage.m_data.array[4]}, {m_storage.m_data.array[5]})</DisplayString>

+        </Synthetic>

+        <Synthetic Name="[row 1]" Condition="!(Flags%2)">

+          <DisplayString>({m_storage.m_data.array[1]}, {m_storage.m_data.array[4]}, {m_storage.m_data.array[7]})</DisplayString>

+        </Synthetic>

+        <Synthetic Name="[row 2]" Condition="Flags%2">

+          <DisplayString>({m_storage.m_data.array[6]}, {m_storage.m_data.array[7]}, {m_storage.m_data.array[8]})</DisplayString>

+        </Synthetic>

+        <Synthetic Name="[row 2]" Condition="!(Flags%2)">

+          <DisplayString>({m_storage.m_data.array[2]}, {m_storage.m_data.array[5]}, {m_storage.m_data.array[8]})</DisplayString>

+        </Synthetic>        

+      </Expand>

+  </Type>

+  

+  <!-- 4 x 4 Matrix -->

+  <Type Name="Eigen::Matrix&lt;*,4,4,*,*,*&gt;">      

+      <AlternativeType Name="Eigen::Array&lt;*,4,4,*,*,*&gt;"/>

+      <DisplayString>[4, 4] (fixed matrix)</DisplayString>

+      <Expand>

+        <Synthetic Name="[row 0]" Condition="Flags%2">

+          <DisplayString>({m_storage.m_data.array[0]}, {m_storage.m_data.array[1]}, {m_storage.m_data.array[2]}, {m_storage.m_data.array[3]})</DisplayString>

+        </Synthetic>

+        <Synthetic Name="[row 0]" Condition="!(Flags%2)">

+          <DisplayString>({m_storage.m_data.array[0]}, {m_storage.m_data.array[4]}, {m_storage.m_data.array[8]}, {m_storage.m_data.array[12]})</DisplayString>

+        </Synthetic>

+        <Synthetic Name="[row 1]" Condition="Flags%2">

+          <DisplayString>({m_storage.m_data.array[4]}, {m_storage.m_data.array[5]}, {m_storage.m_data.array[6]}, {m_storage.m_data.array[7]})</DisplayString>

+        </Synthetic>

+        <Synthetic Name="[row 1]" Condition="!(Flags%2)">

+          <DisplayString>({m_storage.m_data.array[1]}, {m_storage.m_data.array[5]}, {m_storage.m_data.array[9]}, {m_storage.m_data.array[13]})</DisplayString>

+        </Synthetic>

+        <Synthetic Name="[row 2]" Condition="Flags%2">

+          <DisplayString>({m_storage.m_data.array[8]}, {m_storage.m_data.array[9]}, {m_storage.m_data.array[10]}, {m_storage.m_data.array[11]})</DisplayString>

+        </Synthetic>

+        <Synthetic Name="[row 2]" Condition="!(Flags%2)">

+          <DisplayString>({m_storage.m_data.array[2]}, {m_storage.m_data.array[6]}, {m_storage.m_data.array[10]}, {m_storage.m_data.array[14]})</DisplayString>

+        </Synthetic>

+        <Synthetic Name="[row 3]" Condition="Flags%2">

+          <DisplayString>({m_storage.m_data.array[12]}, {m_storage.m_data.array[13]}, {m_storage.m_data.array[14]}, {m_storage.m_data.array[15]})</DisplayString>

+        </Synthetic>

+        <Synthetic Name="[row 3]" Condition="!(Flags%2)">

+          <DisplayString>({m_storage.m_data.array[3]}, {m_storage.m_data.array[7]}, {m_storage.m_data.array[11]}, {m_storage.m_data.array[15]})</DisplayString>

+        </Synthetic>        

+      </Expand>

+  </Type>  

+  

+  <!-- Dynamic x Dynamic Matrix -->

+  <Type Name="Eigen::Matrix&lt;*,-1,-1,*,*,*&gt;">      

+      <AlternativeType Name="Eigen::Array&lt;*,-1,-1,*,*,*&gt;"/>

+      <DisplayString Condition="m_storage.m_data == 0">empty</DisplayString>

+      <DisplayString Condition="m_storage.m_data != 0">[{m_storage.m_rows}, {m_storage.m_cols}] (dynamic matrix)</DisplayString>

+      <Expand>

+        <ArrayItems Condition="Flags%2"> <!-- row major layout -->

+          <Rank>2</Rank>

+          <Size>$i==0 ? m_storage.m_rows : m_storage.m_cols</Size>

+          <ValuePointer>m_storage.m_data</ValuePointer>

+        </ArrayItems>

+        <ArrayItems Condition="!(Flags%2)"> <!-- column major layout -->

+          <Direction>Backward</Direction>

+          <Rank>2</Rank>

+          <Size>$i==0 ? m_storage.m_rows : m_storage.m_cols</Size>

+          <ValuePointer>m_storage.m_data</ValuePointer>

+        </ArrayItems>

+      </Expand>

+  </Type>

+  

+  <!-- Fixed x Dynamic Matrix -->

+  <Type Name="Eigen::Matrix&lt;*,*,-1,*,*,*&gt;">

+      <AlternativeType Name="Eigen::Array&lt;*,*,-1,*,*,*&gt;"/>

+      <DisplayString Condition="m_storage.m_data == 0">empty</DisplayString>

+      <DisplayString Condition="m_storage.m_data != 0">[{$T2}, {m_storage.m_cols}] (dynamic column matrix)</DisplayString>

+      <Expand>

+        <ArrayItems Condition="Flags%2"> <!-- row major layout -->

+          <Rank>2</Rank>

+          <Size>$i==0 ? $T2 : m_storage.m_cols</Size>

+          <ValuePointer>m_storage.m_data</ValuePointer>

+        </ArrayItems>

+        <ArrayItems Condition="!(Flags%2)"> <!-- column major layout -->

+          <Direction>Backward</Direction>

+          <Rank>2</Rank>

+          <Size>$i==0 ? $T2 : m_storage.m_cols</Size>

+          <ValuePointer>m_storage.m_data</ValuePointer>

+        </ArrayItems>

+      </Expand>

+  </Type>

+  

+  <!-- Dynamic x Fixed Matrix -->

+  <Type Name="Eigen::Matrix&lt;*,-1,*,*,*,*&gt;">

+      <AlternativeType Name="Eigen::Array&lt;*,-1,*,*,*,*&gt;"/>

+      <DisplayString Condition="m_storage.m_data == 0">empty</DisplayString>

+      <DisplayString Condition="m_storage.m_data != 0">[{m_storage.m_rows}, {$T2}] (dynamic row matrix)</DisplayString>

+      <Expand>

+        <ArrayItems Condition="Flags%2"> <!-- row major layout -->

+          <Rank>2</Rank>

+          <Size>$i==0 ? m_storage.m_rows : $T2</Size>

+          <ValuePointer>m_storage.m_data</ValuePointer>

+        </ArrayItems>

+        <ArrayItems Condition="!(Flags%2)"> <!-- column major layout -->

+          <Direction>Backward</Direction>

+          <Rank>2</Rank>

+          <Size>$i==0 ? m_storage.m_rows : $T2</Size>

+          <ValuePointer>m_storage.m_data</ValuePointer>

+        </ArrayItems>

+      </Expand>

+  </Type>

+  

+  <!-- Dynamic Column Vector -->

+  <Type Name="Eigen::Matrix&lt;*,1,-1,*,*,*&gt;">

+      <AlternativeType Name="Eigen::Array&lt;*,1,-1,*,*,*&gt;"/>

+      <DisplayString Condition="m_storage.m_data == 0">empty</DisplayString>

+      <DisplayString Condition="m_storage.m_data != 0">[{m_storage.m_cols}] (dynamic column vector)</DisplayString>

+      <Expand>

+        <Item Name="[size]">m_storage.m_cols</Item>

+        <ArrayItems>

+          <Size>m_storage.m_cols</Size>

+          <ValuePointer>m_storage.m_data</ValuePointer>

+        </ArrayItems>

+      </Expand>

+  </Type>

+  

+  <!-- Dynamic Row Vector -->

+  <Type Name="Eigen::Matrix&lt;*,-1,1,*,*,*&gt;">

+      <AlternativeType Name="Eigen::Array&lt;*,-1,1,*,*,*&gt;"/>

+      <DisplayString Condition="m_storage.m_data == 0">empty</DisplayString>

+      <DisplayString Condition="m_storage.m_data != 0">[{m_storage.m_rows}] (dynamic row vector)</DisplayString>

+      <Expand>

+        <Item Name="[size]">m_storage.m_rows</Item>

+        <ArrayItems>

+          <Size>m_storage.m_rows</Size>

+          <ValuePointer>m_storage.m_data</ValuePointer>

+        </ArrayItems>

+      </Expand>

+  </Type>

+  

+  <!-- Fixed Vector -->

+  <Type Name="Eigen::Matrix&lt;*,1,1,*,*,*&gt;">

+      <AlternativeType Name="Eigen::Array&lt;*,1,1,*,*,*&gt;"/>

+      <DisplayString>[1] ({m_storage.m_data.array[0]})</DisplayString>

+      <Expand>

+        <Item Name="[x]">m_storage.m_data.array[0]</Item>

+      </Expand>

+  </Type>

+  

+  <Type Name="Eigen::Matrix&lt;*,2,1,*,*,*&gt;">

+      <AlternativeType Name="Eigen::Matrix&lt;*,1,2,*,*,*&gt;"/>

+      <AlternativeType Name="Eigen::Array&lt;*,2,1,*,*,*&gt;"/>

+      <AlternativeType Name="Eigen::Array&lt;*,1,2,*,*,*&gt;"/>

+      <DisplayString>[2] ({m_storage.m_data.array[0]}, {m_storage.m_data.array[1]})</DisplayString>

+      <Expand>

+        <Item Name="[x]">m_storage.m_data.array[0]</Item>

+        <Item Name="[y]">m_storage.m_data.array[1]</Item>

+      </Expand>

+  </Type>

+  

+  <Type Name="Eigen::Matrix&lt;*,3,1,*,*,*&gt;">

+      <AlternativeType Name="Eigen::Matrix&lt;*,1,3,*,*,*&gt;"/>

+      <AlternativeType Name="Eigen::Array&lt;*,3,1,*,*,*&gt;"/>

+      <AlternativeType Name="Eigen::Array&lt;*,1,3,*,*,*&gt;"/>

+      <DisplayString>[3] ({m_storage.m_data.array[0]}, {m_storage.m_data.array[1]}, {m_storage.m_data.array[2]})</DisplayString>

+      <Expand>

+        <Item Name="[x]">m_storage.m_data.array[0]</Item>

+        <Item Name="[y]">m_storage.m_data.array[1]</Item>

+        <Item Name="[z]">m_storage.m_data.array[2]</Item>

+      </Expand>

+  </Type>

+  

+    <Type Name="Eigen::Matrix&lt;*,4,1,*,*,*&gt;">

+      <AlternativeType Name="Eigen::Matrix&lt;*,1,4,*,*,*&gt;"/>

+      <AlternativeType Name="Eigen::Array&lt;*,4,1,*,*,*&gt;"/>

+      <AlternativeType Name="Eigen::Array&lt;*,1,4,*,*,*&gt;"/>

+      <DisplayString>[4] ({m_storage.m_data.array[0]}, {m_storage.m_data.array[1]}, {m_storage.m_data.array[2]}, {m_storage.m_data.array[3]})</DisplayString>

+      <Expand>

+        <Item Name="[x]">m_storage.m_data.array[0]</Item>

+        <Item Name="[y]">m_storage.m_data.array[1]</Item>

+        <Item Name="[z]">m_storage.m_data.array[2]</Item>

+        <Item Name="[w]">m_storage.m_data.array[3]</Item>

+      </Expand>

+  </Type>

+

+</AutoVisualizer>


diff --git a/debug/msvc/eigen_autoexp_part.dat b/debug/msvc/eigen_autoexp_part.dat
index 273c10d..35ef580 100644
--- a/debug/msvc/eigen_autoexp_part.dat
+++ b/debug/msvc/eigen_autoexp_part.dat

@@ -1,295 +1,295 @@
-; ***************************************************************
-; * Eigen Visualizer
-; *
-; * Author: Hauke Heibel <hauke.heibel@gmail.com>
-; *
-; * Support the enhanced debugging of the following Eigen
-; * types (*: any, +:fixed dimension) :
-; *
-; * - Eigen::Matrix<*,4,1,*,*,*> and Eigen::Matrix<*,1,4,*,*,*>
-; * - Eigen::Matrix<*,3,1,*,*,*> and Eigen::Matrix<*,1,3,*,*,*>
-; * - Eigen::Matrix<*,2,1,*,*,*> and Eigen::Matrix<*,1,2,*,*,*>
-; * - Eigen::Matrix<*,-1,-1,*,*,*>
-; * - Eigen::Matrix<*,+,-1,*,*,*>
-; * - Eigen::Matrix<*,-1,+,*,*,*>
-; * - Eigen::Matrix<*,+,+,*,*,*>
-; *
-; * Matrices are displayed properly independently of the memory
-; * alignment (RowMajor vs. ColMajor).
-; *
-; * This file is distributed WITHOUT ANY WARRANTY. Please ensure
-; * that your original autoexp.dat file is copied to a safe 
-; * place before proceeding with its modification.
-; ***************************************************************
-
-[Visualizer]
-
-; Fixed size 4-vectors
-Eigen::Matrix<*,4,1,*,*,*>|Eigen::Matrix<*,1,4,*,*,*>{
-   children
-   (
-      #(
-        [internals]: [$c,!],
-         x : ($c.m_storage.m_data.array)[0],
-         y : ($c.m_storage.m_data.array)[1],
-         z : ($c.m_storage.m_data.array)[2],
-         w : ($c.m_storage.m_data.array)[3]
-      )
-   )
-
-   preview
-   (
-      #(
-        "[",
-        4,
-        "](",
-        #array(expr: $e.m_storage.m_data.array[$i], size: 4),
-        ")"
-      )
-   )
-}
-
-; Fixed size 3-vectors
-Eigen::Matrix<*,3,1,*,*,*>|Eigen::Matrix<*,1,3,*,*,*>{
-   children
-   (
-      #(
-        [internals]: [$c,!],
-         x : ($c.m_storage.m_data.array)[0],
-         y : ($c.m_storage.m_data.array)[1],
-         z : ($c.m_storage.m_data.array)[2]
-      )
-   )
-
-   preview
-   (
-      #(
-        "[",
-        3,
-        "](",
-        #array(expr: $e.m_storage.m_data.array[$i], size: 3),
-        ")"
-      )
-   )
-}
-
-; Fixed size 2-vectors
-Eigen::Matrix<*,2,1,*,*,*>|Eigen::Matrix<*,1,2,*,*,*>{
-   children
-   (
-      #(
-        [internals]: [$c,!],
-         x : ($c.m_storage.m_data.array)[0],
-         y : ($c.m_storage.m_data.array)[1]
-      )
-   )
-
-   preview
-   (
-      #(
-        "[",
-        2,
-        "](",
-        #array(expr: $e.m_storage.m_data.array[$i], size: 2),
-        ")"
-      )
-   )
-}
-
-; Fixed size 1-vectors
-Eigen::Matrix<*,1,1,*,*,*>|Eigen::Matrix<*,1,1,*,*,*>{
-   children
-   (
-      #(
-        [internals]: [$c,!],
-         x : ($c.m_storage.m_data.array)[0]
-      )
-   )
-
-   preview
-   (
-      #(
-        "[",
-        1,
-        "](",
-        #array(expr: $e.m_storage.m_data.array[$i], size: 1),
-        ")"
-      )
-   )
-}
-
-; Dynamic matrices (ColMajor and RowMajor support)
-Eigen::Matrix<*,-1,-1,*,*,*>{
-  children
-   (
-      #(
-         [internals]: [$c,!],
-         rows: $c.m_storage.m_rows,
-         cols: $c.m_storage.m_cols,
-         ; Check for RowMajorBit
-         #if ($c.Flags & 0x1) (
-             #array(
-                rank: 2,
-                base: 0,
-                expr: ($c.m_storage.m_data)[($i % $c.m_storage.m_rows)*$c.m_storage.m_cols + (($i- $i % $c.m_storage.m_rows)/$c.m_storage.m_rows)], 
-                size: ($r==1)*$c.m_storage.m_rows+($r==0)*$c.m_storage.m_cols
-             )
-         ) #else (
-             #array(
-                rank: 2,
-                base: 0,
-                expr: ($c.m_storage.m_data)[$i],
-                size: ($r==1)*$c.m_storage.m_rows+($r==0)*$c.m_storage.m_cols
-             )
-         )
-      )
-   )
-
-   preview
-   (
-     #(
-         "[",
-           $c.m_storage.m_rows,
-         ",",
-           $c.m_storage.m_cols,
-         "](",
-           #array(
-            expr :    [($c.m_storage.m_data)[$i],g],
-            size :    $c.m_storage.m_rows*$c.m_storage.m_cols
-           ),
-         ")"
-      )
-   )
-}
-
-; Fixed rows, dynamic columns matrix (ColMajor and RowMajor support)
-Eigen::Matrix<*,*,-1,*,*,*>{
-  children
-   (
-      #(
-         [internals]: [$c,!],
-         rows: $c.RowsAtCompileTime,
-         cols: $c.m_storage.m_cols,
-         ; Check for RowMajorBit
-         #if ($c.Flags & 0x1) (
-             #array(
-                rank: 2,
-                base: 0,
-                expr: ($c.m_storage.m_data)[($i % $c.RowsAtCompileTime)*$c.m_storage.m_cols + (($i- $i % $c.RowsAtCompileTime)/$c.RowsAtCompileTime)],
-                size: ($r==1)*$c.RowsAtCompileTime+($r==0)*$c.m_storage.m_cols
-             )
-         ) #else (
-             #array(
-                rank: 2,
-                base: 0,
-                expr: ($c.m_storage.m_data)[$i],
-                size: ($r==1)*$c.RowsAtCompileTime+($r==0)*$c.m_storage.m_cols
-             )
-         )
-      )
-   )
-
-   preview
-   (
-     #(
-         "[",
-           $c.RowsAtCompileTime,
-         ",",
-           $c.m_storage.m_cols,
-         "](",
-           #array(
-            expr :    [($c.m_storage.m_data)[$i],g],
-            size :    $c.RowsAtCompileTime*$c.m_storage.m_cols
-           ),
-         ")"
-      )
-   )
-}
-
-; Dynamic rows, fixed columns matrix (ColMajor and RowMajor support)
-Eigen::Matrix<*,-1,*,*,*,*>{
-  children
-   (
-      #(
-         [internals]: [$c,!],
-         rows: $c.m_storage.m_rows,
-         cols: $c.ColsAtCompileTime,
-         ; Check for RowMajorBit
-         #if ($c.Flags & 0x1) (
-             #array(
-                rank: 2,
-                base: 0,
-                expr: ($c.m_storage.m_data)[($i % $c.m_storage.m_rows)*$c.ColsAtCompileTime + (($i- $i % $c.m_storage.m_rows)/$c.m_storage.m_rows)], 
-                size: ($r==1)*$c.m_storage.m_rows+($r==0)*$c.ColsAtCompileTime
-             )
-         ) #else (
-             #array(
-                rank: 2,
-                base: 0,
-                expr: ($c.m_storage.m_data)[$i],
-                size: ($r==1)*$c.m_storage.m_rows+($r==0)*$c.ColsAtCompileTime
-             )
-         )
-      )
-   )
-
-   preview
-   (
-     #(
-         "[",
-           $c.m_storage.m_rows,
-         ",",
-           $c.ColsAtCompileTime,
-         "](",
-           #array(
-            expr :    [($c.m_storage.m_data)[$i],g],
-            size :    $c.m_storage.m_rows*$c.ColsAtCompileTime
-           ),
-         ")"
-      )
-   )
-}
-
-; Fixed size matrix (ColMajor and RowMajor support)
-Eigen::Matrix<*,*,*,*,*,*>{
-  children
-   (
-      #(
-         [internals]: [$c,!],
-         rows: $c.RowsAtCompileTime,
-         cols: $c.ColsAtCompileTime,
-         ; Check for RowMajorBit
-         #if ($c.Flags & 0x1) (
-             #array(
-                rank: 2,
-                base: 0,
-                expr: ($c.m_storage.m_data.array)[($i % $c.RowsAtCompileTime)*$c.ColsAtCompileTime + (($i- $i % $c.RowsAtCompileTime)/$c.RowsAtCompileTime)], 
-                size: ($r==1)*$c.RowsAtCompileTime+($r==0)*$c.ColsAtCompileTime
-             )
-         ) #else (
-             #array(
-                rank: 2,
-                base: 0,
-                expr: ($c.m_storage.m_data.array)[$i],
-                size: ($r==1)*$c.RowsAtCompileTime+($r==0)*$c.ColsAtCompileTime
-             )
-         )
-      )
-   )
-
-   preview
-   (
-     #(
-         "[",
-           $c.RowsAtCompileTime,
-         ",",
-           $c.ColsAtCompileTime,
-         "](",
-           #array(
-            expr :    [($c.m_storage.m_data.array)[$i],g],
-            size :    $c.RowsAtCompileTime*$c.ColsAtCompileTime
-           ),
-         ")"
-      )
-   )
-}
+; ***************************************************************

+; * Eigen Visualizer

+; *

+; * Author: Hauke Heibel <hauke.heibel@gmail.com>

+; *

+; * Support the enhanced debugging of the following Eigen

+; * types (*: any, +:fixed dimension) :

+; *

+; * - Eigen::Matrix<*,4,1,*,*,*> and Eigen::Matrix<*,1,4,*,*,*>

+; * - Eigen::Matrix<*,3,1,*,*,*> and Eigen::Matrix<*,1,3,*,*,*>

+; * - Eigen::Matrix<*,2,1,*,*,*> and Eigen::Matrix<*,1,2,*,*,*>

+; * - Eigen::Matrix<*,-1,-1,*,*,*>

+; * - Eigen::Matrix<*,+,-1,*,*,*>

+; * - Eigen::Matrix<*,-1,+,*,*,*>

+; * - Eigen::Matrix<*,+,+,*,*,*>

+; *

+; * Matrices are displayed properly independently of the memory

+; * alignment (RowMajor vs. ColMajor).

+; *

+; * This file is distributed WITHOUT ANY WARRANTY. Please ensure

+; * that your original autoexp.dat file is copied to a safe 

+; * place before proceeding with its modification.

+; ***************************************************************

+

+[Visualizer]

+

+; Fixed size 4-vectors

+Eigen::Matrix<*,4,1,*,*,*>|Eigen::Matrix<*,1,4,*,*,*>{

+   children

+   (

+      #(

+        [internals]: [$c,!],

+         x : ($c.m_storage.m_data.array)[0],

+         y : ($c.m_storage.m_data.array)[1],

+         z : ($c.m_storage.m_data.array)[2],

+         w : ($c.m_storage.m_data.array)[3]

+      )

+   )

+

+   preview

+   (

+      #(

+        "[",

+        4,

+        "](",

+        #array(expr: $e.m_storage.m_data.array[$i], size: 4),

+        ")"

+      )

+   )

+}

+

+; Fixed size 3-vectors

+Eigen::Matrix<*,3,1,*,*,*>|Eigen::Matrix<*,1,3,*,*,*>{

+   children

+   (

+      #(

+        [internals]: [$c,!],

+         x : ($c.m_storage.m_data.array)[0],

+         y : ($c.m_storage.m_data.array)[1],

+         z : ($c.m_storage.m_data.array)[2]

+      )

+   )

+

+   preview

+   (

+      #(

+        "[",

+        3,

+        "](",

+        #array(expr: $e.m_storage.m_data.array[$i], size: 3),

+        ")"

+      )

+   )

+}

+

+; Fixed size 2-vectors

+Eigen::Matrix<*,2,1,*,*,*>|Eigen::Matrix<*,1,2,*,*,*>{

+   children

+   (

+      #(

+        [internals]: [$c,!],

+         x : ($c.m_storage.m_data.array)[0],

+         y : ($c.m_storage.m_data.array)[1]

+      )

+   )

+

+   preview

+   (

+      #(

+        "[",

+        2,

+        "](",

+        #array(expr: $e.m_storage.m_data.array[$i], size: 2),

+        ")"

+      )

+   )

+}

+

+; Fixed size 1-vectors

+Eigen::Matrix<*,1,1,*,*,*>|Eigen::Matrix<*,1,1,*,*,*>{

+   children

+   (

+      #(

+        [internals]: [$c,!],

+         x : ($c.m_storage.m_data.array)[0]

+      )

+   )

+

+   preview

+   (

+      #(

+        "[",

+        1,

+        "](",

+        #array(expr: $e.m_storage.m_data.array[$i], size: 1),

+        ")"

+      )

+   )

+}

+

+; Dynamic matrices (ColMajor and RowMajor support)

+Eigen::Matrix<*,-1,-1,*,*,*>{

+  children

+   (

+      #(

+         [internals]: [$c,!],

+         rows: $c.m_storage.m_rows,

+         cols: $c.m_storage.m_cols,

+         ; Check for RowMajorBit

+         #if ($c.Flags & 0x1) (

+             #array(

+                rank: 2,

+                base: 0,

+                expr: ($c.m_storage.m_data)[($i % $c.m_storage.m_rows)*$c.m_storage.m_cols + (($i- $i % $c.m_storage.m_rows)/$c.m_storage.m_rows)], 

+                size: ($r==1)*$c.m_storage.m_rows+($r==0)*$c.m_storage.m_cols

+             )

+         ) #else (

+             #array(

+                rank: 2,

+                base: 0,

+                expr: ($c.m_storage.m_data)[$i],

+                size: ($r==1)*$c.m_storage.m_rows+($r==0)*$c.m_storage.m_cols

+             )

+         )

+      )

+   )

+

+   preview

+   (

+     #(

+         "[",

+           $c.m_storage.m_rows,

+         ",",

+           $c.m_storage.m_cols,

+         "](",

+           #array(

+            expr :    [($c.m_storage.m_data)[$i],g],

+            size :    $c.m_storage.m_rows*$c.m_storage.m_cols

+           ),

+         ")"

+      )

+   )

+}

+

+; Fixed rows, dynamic columns matrix (ColMajor and RowMajor support)

+Eigen::Matrix<*,*,-1,*,*,*>{

+  children

+   (

+      #(

+         [internals]: [$c,!],

+         rows: $c.RowsAtCompileTime,

+         cols: $c.m_storage.m_cols,

+         ; Check for RowMajorBit

+         #if ($c.Flags & 0x1) (

+             #array(

+                rank: 2,

+                base: 0,

+                expr: ($c.m_storage.m_data)[($i % $c.RowsAtCompileTime)*$c.m_storage.m_cols + (($i- $i % $c.RowsAtCompileTime)/$c.RowsAtCompileTime)],

+                size: ($r==1)*$c.RowsAtCompileTime+($r==0)*$c.m_storage.m_cols

+             )

+         ) #else (

+             #array(

+                rank: 2,

+                base: 0,

+                expr: ($c.m_storage.m_data)[$i],

+                size: ($r==1)*$c.RowsAtCompileTime+($r==0)*$c.m_storage.m_cols

+             )

+         )

+      )

+   )

+

+   preview

+   (

+     #(

+         "[",

+           $c.RowsAtCompileTime,

+         ",",

+           $c.m_storage.m_cols,

+         "](",

+           #array(

+            expr :    [($c.m_storage.m_data)[$i],g],

+            size :    $c.RowsAtCompileTime*$c.m_storage.m_cols

+           ),

+         ")"

+      )

+   )

+}

+

+; Dynamic rows, fixed columns matrix (ColMajor and RowMajor support)

+Eigen::Matrix<*,-1,*,*,*,*>{

+  children

+   (

+      #(

+         [internals]: [$c,!],

+         rows: $c.m_storage.m_rows,

+         cols: $c.ColsAtCompileTime,

+         ; Check for RowMajorBit

+         #if ($c.Flags & 0x1) (

+             #array(

+                rank: 2,

+                base: 0,

+                expr: ($c.m_storage.m_data)[($i % $c.m_storage.m_rows)*$c.ColsAtCompileTime + (($i- $i % $c.m_storage.m_rows)/$c.m_storage.m_rows)], 

+                size: ($r==1)*$c.m_storage.m_rows+($r==0)*$c.ColsAtCompileTime

+             )

+         ) #else (

+             #array(

+                rank: 2,

+                base: 0,

+                expr: ($c.m_storage.m_data)[$i],

+                size: ($r==1)*$c.m_storage.m_rows+($r==0)*$c.ColsAtCompileTime

+             )

+         )

+      )

+   )

+

+   preview

+   (

+     #(

+         "[",

+           $c.m_storage.m_rows,

+         ",",

+           $c.ColsAtCompileTime,

+         "](",

+           #array(

+            expr :    [($c.m_storage.m_data)[$i],g],

+            size :    $c.m_storage.m_rows*$c.ColsAtCompileTime

+           ),

+         ")"

+      )

+   )

+}

+

+; Fixed size matrix (ColMajor and RowMajor support)

+Eigen::Matrix<*,*,*,*,*,*>{

+  children

+   (

+      #(

+         [internals]: [$c,!],

+         rows: $c.RowsAtCompileTime,

+         cols: $c.ColsAtCompileTime,

+         ; Check for RowMajorBit

+         #if ($c.Flags & 0x1) (

+             #array(

+                rank: 2,

+                base: 0,

+                expr: ($c.m_storage.m_data.array)[($i % $c.RowsAtCompileTime)*$c.ColsAtCompileTime + (($i- $i % $c.RowsAtCompileTime)/$c.RowsAtCompileTime)], 

+                size: ($r==1)*$c.RowsAtCompileTime+($r==0)*$c.ColsAtCompileTime

+             )

+         ) #else (

+             #array(

+                rank: 2,

+                base: 0,

+                expr: ($c.m_storage.m_data.array)[$i],

+                size: ($r==1)*$c.RowsAtCompileTime+($r==0)*$c.ColsAtCompileTime

+             )

+         )

+      )

+   )

+

+   preview

+   (

+     #(

+         "[",

+           $c.RowsAtCompileTime,

+         ",",

+           $c.ColsAtCompileTime,

+         "](",

+           #array(

+            expr :    [($c.m_storage.m_data.array)[$i],g],

+            size :    $c.RowsAtCompileTime*$c.ColsAtCompileTime

+           ),

+         ")"

+      )

+   )

+}


diff --git a/lapack/cholesky.inc b/lapack/cholesky.inc
index ea3bc12..dea5bf6 100644
--- a/lapack/cholesky.inc
+++ b/lapack/cholesky.inc

@@ -11,62 +11,60 @@
 #include <Eigen/Cholesky>
 
 // POTRF computes the Cholesky factorization of a real symmetric positive definite matrix A.
-EIGEN_LAPACK_FUNC(potrf,(char* uplo, int *n, RealScalar *pa, int *lda, int *info))
-{
+EIGEN_LAPACK_FUNC(potrf)(char *uplo, int *n, RealScalar *pa, int *lda, int *info) {
   *info = 0;
-        if(UPLO(*uplo)==INVALID) *info = -1;
-  else  if(*n<0)                 *info = -2;
-  else  if(*lda<std::max(1,*n))  *info = -4;
-  if(*info!=0)
-  {
+  if (UPLO(*uplo) == INVALID)
+    *info = -1;
+  else if (*n < 0)
+    *info = -2;
+  else if (*lda < std::max(1, *n))
+    *info = -4;
+  if (*info != 0) {
     int e = -*info;
-    return xerbla_(SCALAR_SUFFIX_UP"POTRF", &e, 6);
+    return xerbla_(SCALAR_SUFFIX_UP "POTRF", &e);
   }
 
-  Scalar* a = reinterpret_cast<Scalar*>(pa);
-  MatrixType A(a,*n,*n,*lda);
+  Scalar *a = reinterpret_cast<Scalar *>(pa);
+  MatrixType A(a, *n, *n, *lda);
   int ret;
-  if(UPLO(*uplo)==UP) ret = int(internal::llt_inplace<Scalar, Upper>::blocked(A));
-  else                ret = int(internal::llt_inplace<Scalar, Lower>::blocked(A));
+  if (UPLO(*uplo) == UP)
+    ret = int(internal::llt_inplace<Scalar, Upper>::blocked(A));
+  else
+    ret = int(internal::llt_inplace<Scalar, Lower>::blocked(A));
 
-  if(ret>=0)
-    *info = ret+1;
-  
-  return 0;
+  if (ret >= 0) *info = ret + 1;
 }
 
 // POTRS solves a system of linear equations A*X = B with a symmetric
 // positive definite matrix A using the Cholesky factorization
 // A = U**T*U or A = L*L**T computed by DPOTRF.
-EIGEN_LAPACK_FUNC(potrs,(char* uplo, int *n, int *nrhs, RealScalar *pa, int *lda, RealScalar *pb, int *ldb, int *info))
-{
+EIGEN_LAPACK_FUNC(potrs)(char *uplo, int *n, int *nrhs, RealScalar *pa, int *lda, RealScalar *pb, int *ldb, int *info) {
   *info = 0;
-        if(UPLO(*uplo)==INVALID) *info = -1;
-  else  if(*n<0)                 *info = -2;
-  else  if(*nrhs<0)              *info = -3;
-  else  if(*lda<std::max(1,*n))  *info = -5;
-  else  if(*ldb<std::max(1,*n))  *info = -7;
-  if(*info!=0)
-  {
+  if (UPLO(*uplo) == INVALID)
+    *info = -1;
+  else if (*n < 0)
+    *info = -2;
+  else if (*nrhs < 0)
+    *info = -3;
+  else if (*lda < std::max(1, *n))
+    *info = -5;
+  else if (*ldb < std::max(1, *n))
+    *info = -7;
+  if (*info != 0) {
     int e = -*info;
-    return xerbla_(SCALAR_SUFFIX_UP"POTRS", &e, 6);
+    return xerbla_(SCALAR_SUFFIX_UP "POTRS", &e);
   }
 
-  Scalar* a = reinterpret_cast<Scalar*>(pa);
-  Scalar* b = reinterpret_cast<Scalar*>(pb);
-  MatrixType A(a,*n,*n,*lda);
-  MatrixType B(b,*n,*nrhs,*ldb);
+  Scalar *a = reinterpret_cast<Scalar *>(pa);
+  Scalar *b = reinterpret_cast<Scalar *>(pb);
+  MatrixType A(a, *n, *n, *lda);
+  MatrixType B(b, *n, *nrhs, *ldb);
 
-  if(UPLO(*uplo)==UP)
-  {
+  if (UPLO(*uplo) == UP) {
     A.triangularView<Upper>().adjoint().solveInPlace(B);
     A.triangularView<Upper>().solveInPlace(B);
-  }
-  else
-  {
+  } else {
     A.triangularView<Lower>().solveInPlace(B);
     A.triangularView<Lower>().adjoint().solveInPlace(B);
   }
-
-  return 0;
 }

diff --git a/lapack/dsecnd.cpp b/lapack/dsecnd.cpp
deleted file mode 100644
index b046955..0000000
--- a/lapack/dsecnd.cpp
+++ /dev/null

@@ -1,11 +0,0 @@
-#include <ctime>
-
-extern "C" {
-double dsecnd_();
-}
-
-// Elapsed CPU Time in seconds.
-double dsecnd_() {
-  return static_cast<double>(std::clock()) /
-         static_cast<double>(CLOCKS_PER_SEC);
-}

diff --git a/lapack/eigenvalues.inc b/lapack/eigenvalues.inc
index 921c515..6f168de 100644
--- a/lapack/eigenvalues.inc
+++ b/lapack/eigenvalues.inc

@@ -11,52 +11,51 @@
 #include <Eigen/Eigenvalues>
 
 // computes eigen values and vectors of a general N-by-N matrix A
-EIGEN_LAPACK_FUNC(syev,(char *jobz, char *uplo, int* n, Scalar* a, int *lda, Scalar* w, Scalar* /*work*/, int* lwork, int *info))
-{
+EIGEN_LAPACK_FUNC(syev)
+(char* jobz, char* uplo, int* n, Scalar* a, int* lda, Scalar* w, Scalar* /*work*/, int* lwork, int* info) {
   // TODO exploit the work buffer
-  bool query_size = *lwork==-1;
-  
+  bool query_size = *lwork == -1;
+
   *info = 0;
-        if(*jobz!='N' && *jobz!='V')                    *info = -1;
-  else  if(UPLO(*uplo)==INVALID)                        *info = -2;
-  else  if(*n<0)                                        *info = -3;
-  else  if(*lda<std::max(1,*n))                         *info = -5;
-  else  if((!query_size) && *lwork<std::max(1,3**n-1))  *info = -8;
-    
-  if(*info!=0)
-  {
+  if (*jobz != 'N' && *jobz != 'V')
+    *info = -1;
+  else if (UPLO(*uplo) == INVALID)
+    *info = -2;
+  else if (*n < 0)
+    *info = -3;
+  else if (*lda < std::max(1, *n))
+    *info = -5;
+  else if ((!query_size) && *lwork < std::max(1, 3 * *n - 1))
+    *info = -8;
+
+  if (*info != 0) {
     int e = -*info;
-    return xerbla_(SCALAR_SUFFIX_UP"SYEV ", &e, 6);
+    return xerbla_(SCALAR_SUFFIX_UP "SYEV ", &e);
   }
-  
-  if(query_size)
-  {
+
+  if (query_size) {
     *lwork = 0;
-    return 0;
+    return;
   }
-  
-  if(*n==0)
-    return 0;
-  
-  PlainMatrixType mat(*n,*n);
-  if(UPLO(*uplo)==UP) mat = matrix(a,*n,*n,*lda).adjoint();
-  else                mat = matrix(a,*n,*n,*lda);
-  
-  bool computeVectors = *jobz=='V' || *jobz=='v';
-  SelfAdjointEigenSolver<PlainMatrixType> eig(mat,computeVectors?ComputeEigenvectors:EigenvaluesOnly);
-  
-  if(eig.info()==NoConvergence)
-  {
-    make_vector(w,*n).setZero();
-    if(computeVectors)
-      matrix(a,*n,*n,*lda).setIdentity();
+
+  if (*n == 0) return;
+
+  PlainMatrixType mat(*n, *n);
+  if (UPLO(*uplo) == UP)
+    mat = matrix(a, *n, *n, *lda).adjoint();
+  else
+    mat = matrix(a, *n, *n, *lda);
+
+  bool computeVectors = *jobz == 'V' || *jobz == 'v';
+  SelfAdjointEigenSolver<PlainMatrixType> eig(mat, computeVectors ? ComputeEigenvectors : EigenvaluesOnly);
+
+  if (eig.info() == NoConvergence) {
+    make_vector(w, *n).setZero();
+    if (computeVectors) matrix(a, *n, *n, *lda).setIdentity();
     //*info = 1;
-    return 0;
+    return;
   }
-  
-  make_vector(w,*n) = eig.eigenvalues();
-  if(computeVectors)
-    matrix(a,*n,*n,*lda) = eig.eigenvectors();
-  
-  return 0;
+
+  make_vector(w, *n) = eig.eigenvalues();
+  if (computeVectors) matrix(a, *n, *n, *lda) = eig.eigenvectors();
 }

diff --git a/lapack/lapack.h b/lapack/lapack.h
index 9bb222b..88fdb98 100644
--- a/lapack/lapack.h
+++ b/lapack/lapack.h

@@ -7,127 +7,127 @@
 extern "C" {
 #endif
 
-int BLASFUNC(csymv)(const char *, const int *, const float *, const float *, const int *, const float *, const int *,
-                    const float *, float *, const int *);
-int BLASFUNC(zsymv)(const char *, const int *, const double *, const double *, const int *, const double *, const int *,
-                    const double *, double *, const int *);
-int BLASFUNC(xsymv)(const char *, const int *, const double *, const double *, const int *, const double *, const int *,
-                    const double *, double *, const int *);
+void BLASFUNC(csymv)(const char *, const int *, const float *, const float *, const int *, const float *, const int *,
+                     const float *, float *, const int *);
+void BLASFUNC(zsymv)(const char *, const int *, const double *, const double *, const int *, const double *,
+                     const int *, const double *, double *, const int *);
+void BLASFUNC(xsymv)(const char *, const int *, const double *, const double *, const int *, const double *,
+                     const int *, const double *, double *, const int *);
 
-int BLASFUNC(cspmv)(char *, int *, float *, float *, float *, int *, float *, float *, int *);
-int BLASFUNC(zspmv)(char *, int *, double *, double *, double *, int *, double *, double *, int *);
-int BLASFUNC(xspmv)(char *, int *, double *, double *, double *, int *, double *, double *, int *);
+void BLASFUNC(cspmv)(char *, int *, float *, float *, float *, int *, float *, float *, int *);
+void BLASFUNC(zspmv)(char *, int *, double *, double *, double *, int *, double *, double *, int *);
+void BLASFUNC(xspmv)(char *, int *, double *, double *, double *, int *, double *, double *, int *);
 
-int BLASFUNC(csyr)(char *, int *, float *, float *, int *, float *, int *);
-int BLASFUNC(zsyr)(char *, int *, double *, double *, int *, double *, int *);
-int BLASFUNC(xsyr)(char *, int *, double *, double *, int *, double *, int *);
+void BLASFUNC(csyr)(char *, int *, float *, float *, int *, float *, int *);
+void BLASFUNC(zsyr)(char *, int *, double *, double *, int *, double *, int *);
+void BLASFUNC(xsyr)(char *, int *, double *, double *, int *, double *, int *);
 
-int BLASFUNC(cspr)(char *, int *, float *, float *, int *, float *);
-int BLASFUNC(zspr)(char *, int *, double *, double *, int *, double *);
-int BLASFUNC(xspr)(char *, int *, double *, double *, int *, double *);
+void BLASFUNC(cspr)(char *, int *, float *, float *, int *, float *);
+void BLASFUNC(zspr)(char *, int *, double *, double *, int *, double *);
+void BLASFUNC(xspr)(char *, int *, double *, double *, int *, double *);
 
-int BLASFUNC(sgemt)(char *, int *, int *, float *, float *, int *, float *, int *);
-int BLASFUNC(dgemt)(char *, int *, int *, double *, double *, int *, double *, int *);
-int BLASFUNC(cgemt)(char *, int *, int *, float *, float *, int *, float *, int *);
-int BLASFUNC(zgemt)(char *, int *, int *, double *, double *, int *, double *, int *);
+void BLASFUNC(sgemt)(char *, int *, int *, float *, float *, int *, float *, int *);
+void BLASFUNC(dgemt)(char *, int *, int *, double *, double *, int *, double *, int *);
+void BLASFUNC(cgemt)(char *, int *, int *, float *, float *, int *, float *, int *);
+void BLASFUNC(zgemt)(char *, int *, int *, double *, double *, int *, double *, int *);
 
-int BLASFUNC(sgema)(char *, char *, int *, int *, float *, float *, int *, float *, float *, int *, float *, int *);
-int BLASFUNC(dgema)(char *, char *, int *, int *, double *, double *, int *, double *, double *, int *, double *,
-                    int *);
-int BLASFUNC(cgema)(char *, char *, int *, int *, float *, float *, int *, float *, float *, int *, float *, int *);
-int BLASFUNC(zgema)(char *, char *, int *, int *, double *, double *, int *, double *, double *, int *, double *,
-                    int *);
+void BLASFUNC(sgema)(char *, char *, int *, int *, float *, float *, int *, float *, float *, int *, float *, int *);
+void BLASFUNC(dgema)(char *, char *, int *, int *, double *, double *, int *, double *, double *, int *, double *,
+                     int *);
+void BLASFUNC(cgema)(char *, char *, int *, int *, float *, float *, int *, float *, float *, int *, float *, int *);
+void BLASFUNC(zgema)(char *, char *, int *, int *, double *, double *, int *, double *, double *, int *, double *,
+                     int *);
 
-int BLASFUNC(sgems)(char *, char *, int *, int *, float *, float *, int *, float *, float *, int *, float *, int *);
-int BLASFUNC(dgems)(char *, char *, int *, int *, double *, double *, int *, double *, double *, int *, double *,
-                    int *);
-int BLASFUNC(cgems)(char *, char *, int *, int *, float *, float *, int *, float *, float *, int *, float *, int *);
-int BLASFUNC(zgems)(char *, char *, int *, int *, double *, double *, int *, double *, double *, int *, double *,
-                    int *);
+void BLASFUNC(sgems)(char *, char *, int *, int *, float *, float *, int *, float *, float *, int *, float *, int *);
+void BLASFUNC(dgems)(char *, char *, int *, int *, double *, double *, int *, double *, double *, int *, double *,
+                     int *);
+void BLASFUNC(cgems)(char *, char *, int *, int *, float *, float *, int *, float *, float *, int *, float *, int *);
+void BLASFUNC(zgems)(char *, char *, int *, int *, double *, double *, int *, double *, double *, int *, double *,
+                     int *);
 
-int BLASFUNC(sgetf2)(int *, int *, float *, int *, int *, int *);
-int BLASFUNC(dgetf2)(int *, int *, double *, int *, int *, int *);
-int BLASFUNC(qgetf2)(int *, int *, double *, int *, int *, int *);
-int BLASFUNC(cgetf2)(int *, int *, float *, int *, int *, int *);
-int BLASFUNC(zgetf2)(int *, int *, double *, int *, int *, int *);
-int BLASFUNC(xgetf2)(int *, int *, double *, int *, int *, int *);
+void BLASFUNC(sgetf2)(int *, int *, float *, int *, int *, int *);
+void BLASFUNC(dgetf2)(int *, int *, double *, int *, int *, int *);
+void BLASFUNC(qgetf2)(int *, int *, double *, int *, int *, int *);
+void BLASFUNC(cgetf2)(int *, int *, float *, int *, int *, int *);
+void BLASFUNC(zgetf2)(int *, int *, double *, int *, int *, int *);
+void BLASFUNC(xgetf2)(int *, int *, double *, int *, int *, int *);
 
-int BLASFUNC(sgetrf)(int *, int *, float *, int *, int *, int *);
-int BLASFUNC(dgetrf)(int *, int *, double *, int *, int *, int *);
-int BLASFUNC(qgetrf)(int *, int *, double *, int *, int *, int *);
-int BLASFUNC(cgetrf)(int *, int *, float *, int *, int *, int *);
-int BLASFUNC(zgetrf)(int *, int *, double *, int *, int *, int *);
-int BLASFUNC(xgetrf)(int *, int *, double *, int *, int *, int *);
+void BLASFUNC(sgetrf)(int *, int *, float *, int *, int *, int *);
+void BLASFUNC(dgetrf)(int *, int *, double *, int *, int *, int *);
+void BLASFUNC(qgetrf)(int *, int *, double *, int *, int *, int *);
+void BLASFUNC(cgetrf)(int *, int *, float *, int *, int *, int *);
+void BLASFUNC(zgetrf)(int *, int *, double *, int *, int *, int *);
+void BLASFUNC(xgetrf)(int *, int *, double *, int *, int *, int *);
 
-int BLASFUNC(slaswp)(int *, float *, int *, int *, int *, int *, int *);
-int BLASFUNC(dlaswp)(int *, double *, int *, int *, int *, int *, int *);
-int BLASFUNC(qlaswp)(int *, double *, int *, int *, int *, int *, int *);
-int BLASFUNC(claswp)(int *, float *, int *, int *, int *, int *, int *);
-int BLASFUNC(zlaswp)(int *, double *, int *, int *, int *, int *, int *);
-int BLASFUNC(xlaswp)(int *, double *, int *, int *, int *, int *, int *);
+void BLASFUNC(slaswp)(int *, float *, int *, int *, int *, int *, int *);
+void BLASFUNC(dlaswp)(int *, double *, int *, int *, int *, int *, int *);
+void BLASFUNC(qlaswp)(int *, double *, int *, int *, int *, int *, int *);
+void BLASFUNC(claswp)(int *, float *, int *, int *, int *, int *, int *);
+void BLASFUNC(zlaswp)(int *, double *, int *, int *, int *, int *, int *);
+void BLASFUNC(xlaswp)(int *, double *, int *, int *, int *, int *, int *);
 
-int BLASFUNC(sgetrs)(char *, int *, int *, float *, int *, int *, float *, int *, int *);
-int BLASFUNC(dgetrs)(char *, int *, int *, double *, int *, int *, double *, int *, int *);
-int BLASFUNC(qgetrs)(char *, int *, int *, double *, int *, int *, double *, int *, int *);
-int BLASFUNC(cgetrs)(char *, int *, int *, float *, int *, int *, float *, int *, int *);
-int BLASFUNC(zgetrs)(char *, int *, int *, double *, int *, int *, double *, int *, int *);
-int BLASFUNC(xgetrs)(char *, int *, int *, double *, int *, int *, double *, int *, int *);
+void BLASFUNC(sgetrs)(char *, int *, int *, float *, int *, int *, float *, int *, int *);
+void BLASFUNC(dgetrs)(char *, int *, int *, double *, int *, int *, double *, int *, int *);
+void BLASFUNC(qgetrs)(char *, int *, int *, double *, int *, int *, double *, int *, int *);
+void BLASFUNC(cgetrs)(char *, int *, int *, float *, int *, int *, float *, int *, int *);
+void BLASFUNC(zgetrs)(char *, int *, int *, double *, int *, int *, double *, int *, int *);
+void BLASFUNC(xgetrs)(char *, int *, int *, double *, int *, int *, double *, int *, int *);
 
-int BLASFUNC(sgesv)(int *, int *, float *, int *, int *, float *, int *, int *);
-int BLASFUNC(dgesv)(int *, int *, double *, int *, int *, double *, int *, int *);
-int BLASFUNC(qgesv)(int *, int *, double *, int *, int *, double *, int *, int *);
-int BLASFUNC(cgesv)(int *, int *, float *, int *, int *, float *, int *, int *);
-int BLASFUNC(zgesv)(int *, int *, double *, int *, int *, double *, int *, int *);
-int BLASFUNC(xgesv)(int *, int *, double *, int *, int *, double *, int *, int *);
+void BLASFUNC(sgesv)(int *, int *, float *, int *, int *, float *, int *, int *);
+void BLASFUNC(dgesv)(int *, int *, double *, int *, int *, double *, int *, int *);
+void BLASFUNC(qgesv)(int *, int *, double *, int *, int *, double *, int *, int *);
+void BLASFUNC(cgesv)(int *, int *, float *, int *, int *, float *, int *, int *);
+void BLASFUNC(zgesv)(int *, int *, double *, int *, int *, double *, int *, int *);
+void BLASFUNC(xgesv)(int *, int *, double *, int *, int *, double *, int *, int *);
 
-int BLASFUNC(spotf2)(char *, int *, float *, int *, int *);
-int BLASFUNC(dpotf2)(char *, int *, double *, int *, int *);
-int BLASFUNC(qpotf2)(char *, int *, double *, int *, int *);
-int BLASFUNC(cpotf2)(char *, int *, float *, int *, int *);
-int BLASFUNC(zpotf2)(char *, int *, double *, int *, int *);
-int BLASFUNC(xpotf2)(char *, int *, double *, int *, int *);
+void BLASFUNC(spotf2)(char *, int *, float *, int *, int *);
+void BLASFUNC(dpotf2)(char *, int *, double *, int *, int *);
+void BLASFUNC(qpotf2)(char *, int *, double *, int *, int *);
+void BLASFUNC(cpotf2)(char *, int *, float *, int *, int *);
+void BLASFUNC(zpotf2)(char *, int *, double *, int *, int *);
+void BLASFUNC(xpotf2)(char *, int *, double *, int *, int *);
 
-int BLASFUNC(spotrf)(char *, int *, float *, int *, int *);
-int BLASFUNC(dpotrf)(char *, int *, double *, int *, int *);
-int BLASFUNC(qpotrf)(char *, int *, double *, int *, int *);
-int BLASFUNC(cpotrf)(char *, int *, float *, int *, int *);
-int BLASFUNC(zpotrf)(char *, int *, double *, int *, int *);
-int BLASFUNC(xpotrf)(char *, int *, double *, int *, int *);
+void BLASFUNC(spotrf)(char *, int *, float *, int *, int *);
+void BLASFUNC(dpotrf)(char *, int *, double *, int *, int *);
+void BLASFUNC(qpotrf)(char *, int *, double *, int *, int *);
+void BLASFUNC(cpotrf)(char *, int *, float *, int *, int *);
+void BLASFUNC(zpotrf)(char *, int *, double *, int *, int *);
+void BLASFUNC(xpotrf)(char *, int *, double *, int *, int *);
 
-int BLASFUNC(slauu2)(char *, int *, float *, int *, int *);
-int BLASFUNC(dlauu2)(char *, int *, double *, int *, int *);
-int BLASFUNC(qlauu2)(char *, int *, double *, int *, int *);
-int BLASFUNC(clauu2)(char *, int *, float *, int *, int *);
-int BLASFUNC(zlauu2)(char *, int *, double *, int *, int *);
-int BLASFUNC(xlauu2)(char *, int *, double *, int *, int *);
+void BLASFUNC(slauu2)(char *, int *, float *, int *, int *);
+void BLASFUNC(dlauu2)(char *, int *, double *, int *, int *);
+void BLASFUNC(qlauu2)(char *, int *, double *, int *, int *);
+void BLASFUNC(clauu2)(char *, int *, float *, int *, int *);
+void BLASFUNC(zlauu2)(char *, int *, double *, int *, int *);
+void BLASFUNC(xlauu2)(char *, int *, double *, int *, int *);
 
-int BLASFUNC(slauum)(char *, int *, float *, int *, int *);
-int BLASFUNC(dlauum)(char *, int *, double *, int *, int *);
-int BLASFUNC(qlauum)(char *, int *, double *, int *, int *);
-int BLASFUNC(clauum)(char *, int *, float *, int *, int *);
-int BLASFUNC(zlauum)(char *, int *, double *, int *, int *);
-int BLASFUNC(xlauum)(char *, int *, double *, int *, int *);
+void BLASFUNC(slauum)(char *, int *, float *, int *, int *);
+void BLASFUNC(dlauum)(char *, int *, double *, int *, int *);
+void BLASFUNC(qlauum)(char *, int *, double *, int *, int *);
+void BLASFUNC(clauum)(char *, int *, float *, int *, int *);
+void BLASFUNC(zlauum)(char *, int *, double *, int *, int *);
+void BLASFUNC(xlauum)(char *, int *, double *, int *, int *);
 
-int BLASFUNC(strti2)(char *, char *, int *, float *, int *, int *);
-int BLASFUNC(dtrti2)(char *, char *, int *, double *, int *, int *);
-int BLASFUNC(qtrti2)(char *, char *, int *, double *, int *, int *);
-int BLASFUNC(ctrti2)(char *, char *, int *, float *, int *, int *);
-int BLASFUNC(ztrti2)(char *, char *, int *, double *, int *, int *);
-int BLASFUNC(xtrti2)(char *, char *, int *, double *, int *, int *);
+void BLASFUNC(strti2)(char *, char *, int *, float *, int *, int *);
+void BLASFUNC(dtrti2)(char *, char *, int *, double *, int *, int *);
+void BLASFUNC(qtrti2)(char *, char *, int *, double *, int *, int *);
+void BLASFUNC(ctrti2)(char *, char *, int *, float *, int *, int *);
+void BLASFUNC(ztrti2)(char *, char *, int *, double *, int *, int *);
+void BLASFUNC(xtrti2)(char *, char *, int *, double *, int *, int *);
 
-int BLASFUNC(strtri)(char *, char *, int *, float *, int *, int *);
-int BLASFUNC(dtrtri)(char *, char *, int *, double *, int *, int *);
-int BLASFUNC(qtrtri)(char *, char *, int *, double *, int *, int *);
-int BLASFUNC(ctrtri)(char *, char *, int *, float *, int *, int *);
-int BLASFUNC(ztrtri)(char *, char *, int *, double *, int *, int *);
-int BLASFUNC(xtrtri)(char *, char *, int *, double *, int *, int *);
+void BLASFUNC(strtri)(char *, char *, int *, float *, int *, int *);
+void BLASFUNC(dtrtri)(char *, char *, int *, double *, int *, int *);
+void BLASFUNC(qtrtri)(char *, char *, int *, double *, int *, int *);
+void BLASFUNC(ctrtri)(char *, char *, int *, float *, int *, int *);
+void BLASFUNC(ztrtri)(char *, char *, int *, double *, int *, int *);
+void BLASFUNC(xtrtri)(char *, char *, int *, double *, int *, int *);
 
-int BLASFUNC(spotri)(char *, int *, float *, int *, int *);
-int BLASFUNC(dpotri)(char *, int *, double *, int *, int *);
-int BLASFUNC(qpotri)(char *, int *, double *, int *, int *);
-int BLASFUNC(cpotri)(char *, int *, float *, int *, int *);
-int BLASFUNC(zpotri)(char *, int *, double *, int *, int *);
-int BLASFUNC(xpotri)(char *, int *, double *, int *, int *);
+void BLASFUNC(spotri)(char *, int *, float *, int *, int *);
+void BLASFUNC(dpotri)(char *, int *, double *, int *, int *);
+void BLASFUNC(qpotri)(char *, int *, double *, int *, int *);
+void BLASFUNC(cpotri)(char *, int *, float *, int *, int *);
+void BLASFUNC(zpotri)(char *, int *, double *, int *, int *);
+void BLASFUNC(xpotri)(char *, int *, double *, int *, int *);
 
 #ifdef __cplusplus
 }

diff --git a/lapack/lapack_common.h b/lapack/lapack_common.h
index 37d1f14..726fabf 100644
--- a/lapack/lapack_common.h
+++ b/lapack/lapack_common.h

@@ -13,19 +13,8 @@
 #include "../blas/common.h"
 #include "lapack.h"
 
-#define EIGEN_LAPACK_FUNC(FUNC, ARGLIST) \
-  extern "C" {                           \
-  int EIGEN_BLAS_FUNC(FUNC) ARGLIST;     \
-  }                                      \
-  int EIGEN_BLAS_FUNC(FUNC)              \
-  ARGLIST
+#define EIGEN_LAPACK_FUNC(FUNC) EIGEN_BLAS_FUNC(FUNC)
 
 typedef Eigen::Map<Eigen::Transpositions<Eigen::Dynamic, Eigen::Dynamic, int> > PivotsType;
 
-#if ISCOMPLEX
-#define EIGEN_LAPACK_ARG_IF_COMPLEX(X) X,
-#else
-#define EIGEN_LAPACK_ARG_IF_COMPLEX(X)
-#endif
-
 #endif  // EIGEN_LAPACK_COMMON_H

diff --git a/lapack/lu.inc b/lapack/lu.inc
index 90cebe0..d30c8ce 100644
--- a/lapack/lu.inc
+++ b/lapack/lu.inc

@@ -11,79 +11,70 @@
 #include <Eigen/LU>
 
 // computes an LU factorization of a general M-by-N matrix A using partial pivoting with row interchanges
-EIGEN_LAPACK_FUNC(getrf,(int *m, int *n, RealScalar *pa, int *lda, int *ipiv, int *info))
-{
+EIGEN_LAPACK_FUNC(getrf)(int *m, int *n, RealScalar *pa, int *lda, int *ipiv, int *info) {
   *info = 0;
-        if(*m<0)                  *info = -1;
-  else  if(*n<0)                  *info = -2;
-  else  if(*lda<std::max(1,*m))   *info = -4;
-  if(*info!=0)
-  {
+  if (*m < 0)
+    *info = -1;
+  else if (*n < 0)
+    *info = -2;
+  else if (*lda < std::max(1, *m))
+    *info = -4;
+  if (*info != 0) {
     int e = -*info;
-    return xerbla_(SCALAR_SUFFIX_UP"GETRF", &e, 6);
+    return xerbla_(SCALAR_SUFFIX_UP "GETRF", &e);
   }
 
-  if(*m==0 || *n==0)
-    return 0;
+  if (*m == 0 || *n == 0) return;
 
-  Scalar* a = reinterpret_cast<Scalar*>(pa);
+  Scalar *a = reinterpret_cast<Scalar *>(pa);
   int nb_transpositions;
-  int ret = int(Eigen::internal::partial_lu_impl<Scalar,ColMajor,int>
-                     ::blocked_lu(*m, *n, a, *lda, ipiv, nb_transpositions));
+  int ret = int(Eigen::internal::partial_lu_impl<Scalar, Eigen::ColMajor, int>::blocked_lu(*m, *n, a, *lda, ipiv,
+                                                                                           nb_transpositions));
 
-  for(int i=0; i<std::min(*m,*n); ++i)
-    ipiv[i]++;
+  for (int i = 0; i < std::min(*m, *n); ++i) ipiv[i]++;
 
-  if(ret>=0)
-    *info = ret+1;
-
-  return 0;
+  if (ret >= 0) *info = ret + 1;
 }
 
-//GETRS solves a system of linear equations
-//    A * X = B  or  A' * X = B
-//  with a general N-by-N matrix A using the LU factorization computed  by GETRF
-EIGEN_LAPACK_FUNC(getrs,(char *trans, int *n, int *nrhs, RealScalar *pa, int *lda, int *ipiv, RealScalar *pb, int *ldb, int *info))
-{
+// GETRS solves a system of linear equations
+//     A * X = B  or  A' * X = B
+//   with a general N-by-N matrix A using the LU factorization computed  by GETRF
+EIGEN_LAPACK_FUNC(getrs)
+(char *trans, int *n, int *nrhs, RealScalar *pa, int *lda, int *ipiv, RealScalar *pb, int *ldb, int *info) {
   *info = 0;
-        if(OP(*trans)==INVALID)  *info = -1;
-  else  if(*n<0)                 *info = -2;
-  else  if(*nrhs<0)              *info = -3;
-  else  if(*lda<std::max(1,*n))  *info = -5;
-  else  if(*ldb<std::max(1,*n))  *info = -8;
-  if(*info!=0)
-  {
+  if (OP(*trans) == INVALID)
+    *info = -1;
+  else if (*n < 0)
+    *info = -2;
+  else if (*nrhs < 0)
+    *info = -3;
+  else if (*lda < std::max(1, *n))
+    *info = -5;
+  else if (*ldb < std::max(1, *n))
+    *info = -8;
+  if (*info != 0) {
     int e = -*info;
-    return xerbla_(SCALAR_SUFFIX_UP"GETRS", &e, 6);
+    return xerbla_(SCALAR_SUFFIX_UP "GETRS", &e);
   }
 
-  Scalar* a = reinterpret_cast<Scalar*>(pa);
-  Scalar* b = reinterpret_cast<Scalar*>(pb);
-  MatrixType lu(a,*n,*n,*lda);
-  MatrixType B(b,*n,*nrhs,*ldb);
+  Scalar *a = reinterpret_cast<Scalar *>(pa);
+  Scalar *b = reinterpret_cast<Scalar *>(pb);
+  MatrixType lu(a, *n, *n, *lda);
+  MatrixType B(b, *n, *nrhs, *ldb);
 
-  for(int i=0; i<*n; ++i)
-    ipiv[i]--;
-  if(OP(*trans)==NOTR)
-  {
-    B = PivotsType(ipiv,*n) * B;
+  for (int i = 0; i < *n; ++i) ipiv[i]--;
+  if (OP(*trans) == NOTR) {
+    B = PivotsType(ipiv, *n) * B;
     lu.triangularView<UnitLower>().solveInPlace(B);
     lu.triangularView<Upper>().solveInPlace(B);
-  }
-  else if(OP(*trans)==TR)
-  {
+  } else if (OP(*trans) == TR) {
     lu.triangularView<Upper>().transpose().solveInPlace(B);
     lu.triangularView<UnitLower>().transpose().solveInPlace(B);
-    B = PivotsType(ipiv,*n).transpose() * B;
-  }
-  else if(OP(*trans)==ADJ)
-  {
+    B = PivotsType(ipiv, *n).transpose() * B;
+  } else if (OP(*trans) == ADJ) {
     lu.triangularView<Upper>().adjoint().solveInPlace(B);
     lu.triangularView<UnitLower>().adjoint().solveInPlace(B);
-    B = PivotsType(ipiv,*n).transpose() * B;
+    B = PivotsType(ipiv, *n).transpose() * B;
   }
-  for(int i=0; i<*n; ++i)
-    ipiv[i]++;
-
-  return 0;
+  for (int i = 0; i < *n; ++i) ipiv[i]++;
 }

diff --git a/lapack/second.cpp b/lapack/second.cpp
deleted file mode 100644
index faa6187..0000000
--- a/lapack/second.cpp
+++ /dev/null

@@ -1,11 +0,0 @@
-#include <ctime>
-
-extern "C" {
-double second_();
-}
-
-// Elapsed CPU Time in seconds.
-double second_() {
-  return static_cast<double>(std::clock()) /
-         static_cast<double>(CLOCKS_PER_SEC);
-}
\ No newline at end of file

diff --git a/lapack/svd.inc b/lapack/svd.inc
index 83544cf..8e45310 100644
--- a/lapack/svd.inc
+++ b/lapack/svd.inc

@@ -10,129 +10,141 @@
 #include "lapack_common.h"
 #include <Eigen/SVD>
 
+#if ISCOMPLEX
+#define EIGEN_LAPACK_ARG_IF_COMPLEX(X) X,
+#else
+#define EIGEN_LAPACK_ARG_IF_COMPLEX(X)
+#endif
+
 // computes the singular values/vectors a general M-by-N matrix A using divide-and-conquer
-EIGEN_LAPACK_FUNC(gesdd,(char *jobz, int *m, int* n, Scalar* a, int *lda, RealScalar *s, Scalar *u, int *ldu, Scalar *vt, int *ldvt, Scalar* /*work*/, int* lwork,
-                         EIGEN_LAPACK_ARG_IF_COMPLEX(RealScalar */*rwork*/) int * /*iwork*/, int *info))
-{
+EIGEN_LAPACK_FUNC(gesdd)
+(char *jobz, int *m, int *n, Scalar *a, int *lda, RealScalar *s, Scalar *u, int *ldu, Scalar *vt, int *ldvt,
+ Scalar * /*work*/, int *lwork, EIGEN_LAPACK_ARG_IF_COMPLEX(RealScalar * /*rwork*/) int * /*iwork*/, int *info) {
   // TODO exploit the work buffer
-  bool query_size = *lwork==-1;
-  int diag_size = (std::min)(*m,*n);
-  
+  bool query_size = *lwork == -1;
+  int diag_size = (std::min)(*m, *n);
+
   *info = 0;
-        if(*jobz!='A' && *jobz!='S' && *jobz!='O' && *jobz!='N')  *info = -1;
-  else  if(*m<0)                                                  *info = -2;
-  else  if(*n<0)                                                  *info = -3;
-  else  if(*lda<std::max(1,*m))                                   *info = -5;
-  else  if(*lda<std::max(1,*m))                                   *info = -8;
-  else  if(*ldu <1 || (*jobz=='A' && *ldu <*m)
-                   || (*jobz=='O' && *m<*n && *ldu<*m))           *info = -8;
-  else  if(*ldvt<1 || (*jobz=='A' && *ldvt<*n)
-                   || (*jobz=='S' && *ldvt<diag_size)
-                   || (*jobz=='O' && *m>=*n && *ldvt<*n))         *info = -10;
-  
-  if(*info!=0)
-  {
+  if (*jobz != 'A' && *jobz != 'S' && *jobz != 'O' && *jobz != 'N')
+    *info = -1;
+  else if (*m < 0)
+    *info = -2;
+  else if (*n < 0)
+    *info = -3;
+  else if (*lda < std::max(1, *m))
+    *info = -5;
+  else if (*lda < std::max(1, *m))
+    *info = -8;
+  else if (*ldu < 1 || (*jobz == 'A' && *ldu < *m) || (*jobz == 'O' && *m < *n && *ldu < *m))
+    *info = -8;
+  else if (*ldvt < 1 || (*jobz == 'A' && *ldvt < *n) || (*jobz == 'S' && *ldvt < diag_size) ||
+           (*jobz == 'O' && *m >= *n && *ldvt < *n))
+    *info = -10;
+
+  if (*info != 0) {
     int e = -*info;
-    return xerbla_(SCALAR_SUFFIX_UP"GESDD ", &e, 6);
+    return xerbla_(SCALAR_SUFFIX_UP "GESDD ", &e);
   }
-  
-  if(query_size)
-  {
+
+  if (query_size) {
     *lwork = 0;
-    return 0;
+    return;
   }
-  
-  if(*n==0 || *m==0)
-    return 0;
-  
-  PlainMatrixType mat(*m,*n);
-  mat = matrix(a,*m,*n,*lda);
-  
-  int option = *jobz=='A' ? ComputeFullU|ComputeFullV
-             : *jobz=='S' ? ComputeThinU|ComputeThinV
-             : *jobz=='O' ? ComputeThinU|ComputeThinV
-             : 0;
 
-  BDCSVD<PlainMatrixType> svd(mat,option);
-  
-  make_vector(s,diag_size) = svd.singularValues().head(diag_size);
+  if (*n == 0 || *m == 0) return;
 
-  if(*jobz=='A')
-  {
-    matrix(u,*m,*m,*ldu)   = svd.matrixU();
-    matrix(vt,*n,*n,*ldvt) = svd.matrixV().adjoint();
+  PlainMatrixType mat(*m, *n);
+  mat = matrix(a, *m, *n, *lda);
+
+  int option = *jobz == 'A'   ? ComputeFullU | ComputeFullV
+               : *jobz == 'S' ? ComputeThinU | ComputeThinV
+               : *jobz == 'O' ? ComputeThinU | ComputeThinV
+                              : 0;
+
+  BDCSVD<PlainMatrixType> svd(mat, option);
+
+  make_vector(s, diag_size) = svd.singularValues().head(diag_size);
+
+  if (*jobz == 'A') {
+    matrix(u, *m, *m, *ldu) = svd.matrixU();
+    matrix(vt, *n, *n, *ldvt) = svd.matrixV().adjoint();
+  } else if (*jobz == 'S') {
+    matrix(u, *m, diag_size, *ldu) = svd.matrixU();
+    matrix(vt, diag_size, *n, *ldvt) = svd.matrixV().adjoint();
+  } else if (*jobz == 'O' && *m >= *n) {
+    matrix(a, *m, *n, *lda) = svd.matrixU();
+    matrix(vt, *n, *n, *ldvt) = svd.matrixV().adjoint();
+  } else if (*jobz == 'O') {
+    matrix(u, *m, *m, *ldu) = svd.matrixU();
+    matrix(a, diag_size, *n, *lda) = svd.matrixV().adjoint();
   }
-  else if(*jobz=='S')
-  {
-    matrix(u,*m,diag_size,*ldu)   = svd.matrixU();
-    matrix(vt,diag_size,*n,*ldvt) = svd.matrixV().adjoint();
-  }
-  else if(*jobz=='O' && *m>=*n)
-  {
-    matrix(a,*m,*n,*lda)   = svd.matrixU();
-    matrix(vt,*n,*n,*ldvt) = svd.matrixV().adjoint();
-  }
-  else if(*jobz=='O')
-  {
-    matrix(u,*m,*m,*ldu)        = svd.matrixU();
-    matrix(a,diag_size,*n,*lda) = svd.matrixV().adjoint();
-  }
-    
-  return 0;
 }
 
 // computes the singular values/vectors a general M-by-N matrix A using two sided jacobi algorithm
-EIGEN_LAPACK_FUNC(gesvd,(char *jobu, char *jobv, int *m, int* n, Scalar* a, int *lda, RealScalar *s, Scalar *u, int *ldu, Scalar *vt, int *ldvt, Scalar* /*work*/, int* lwork,
-                         EIGEN_LAPACK_ARG_IF_COMPLEX(RealScalar */*rwork*/) int *info))
-{
+EIGEN_LAPACK_FUNC(gesvd)
+(char *jobu, char *jobv, int *m, int *n, Scalar *a, int *lda, RealScalar *s, Scalar *u, int *ldu, Scalar *vt, int *ldvt,
+ Scalar * /*work*/, int *lwork, EIGEN_LAPACK_ARG_IF_COMPLEX(RealScalar * /*rwork*/) int *info) {
   // TODO exploit the work buffer
-  bool query_size = *lwork==-1;
-  int diag_size = (std::min)(*m,*n);
-  
+  bool query_size = *lwork == -1;
+  int diag_size = (std::min)(*m, *n);
+
   *info = 0;
-        if( *jobu!='A' && *jobu!='S' && *jobu!='O' && *jobu!='N') *info = -1;
-  else  if((*jobv!='A' && *jobv!='S' && *jobv!='O' && *jobv!='N')
-           || (*jobu=='O' && *jobv=='O'))                         *info = -2;
-  else  if(*m<0)                                                  *info = -3;
-  else  if(*n<0)                                                  *info = -4;
-  else  if(*lda<std::max(1,*m))                                   *info = -6;
-  else  if(*ldu <1 || ((*jobu=='A' || *jobu=='S') && *ldu<*m))    *info = -9;
-  else  if(*ldvt<1 || (*jobv=='A' && *ldvt<*n)
-                   || (*jobv=='S' && *ldvt<diag_size))            *info = -11;
-  
-  if(*info!=0)
-  {
+  if (*jobu != 'A' && *jobu != 'S' && *jobu != 'O' && *jobu != 'N')
+    *info = -1;
+  else if ((*jobv != 'A' && *jobv != 'S' && *jobv != 'O' && *jobv != 'N') || (*jobu == 'O' && *jobv == 'O'))
+    *info = -2;
+  else if (*m < 0)
+    *info = -3;
+  else if (*n < 0)
+    *info = -4;
+  else if (*lda < std::max(1, *m))
+    *info = -6;
+  else if (*ldu < 1 || ((*jobu == 'A' || *jobu == 'S') && *ldu < *m))
+    *info = -9;
+  else if (*ldvt < 1 || (*jobv == 'A' && *ldvt < *n) || (*jobv == 'S' && *ldvt < diag_size))
+    *info = -11;
+
+  if (*info != 0) {
     int e = -*info;
-    return xerbla_(SCALAR_SUFFIX_UP"GESVD ", &e, 6);
+    return xerbla_(SCALAR_SUFFIX_UP "GESVD ", &e);
   }
-  
-  if(query_size)
-  {
+
+  if (query_size) {
     *lwork = 0;
-    return 0;
+    return;
   }
-  
-  if(*n==0 || *m==0)
-    return 0;
-  
-  PlainMatrixType mat(*m,*n);
-  mat = matrix(a,*m,*n,*lda);
-  
-  int option = (*jobu=='A' ? ComputeFullU : *jobu=='S' || *jobu=='O' ? ComputeThinU : 0)
-             | (*jobv=='A' ? ComputeFullV : *jobv=='S' || *jobv=='O' ? ComputeThinV : 0);
-  
-  JacobiSVD<PlainMatrixType> svd(mat,option);
-  
-  make_vector(s,diag_size) = svd.singularValues().head(diag_size);
+
+  if (*n == 0 || *m == 0) return;
+
+  PlainMatrixType mat(*m, *n);
+  mat = matrix(a, *m, *n, *lda);
+
+  int option = (*jobu == 'A'                   ? ComputeFullU
+                : *jobu == 'S' || *jobu == 'O' ? ComputeThinU
+                                               : 0) |
+               (*jobv == 'A'                   ? ComputeFullV
+                : *jobv == 'S' || *jobv == 'O' ? ComputeThinV
+                                               : 0);
+
+  JacobiSVD<PlainMatrixType> svd(mat, option);
+
+  make_vector(s, diag_size) = svd.singularValues().head(diag_size);
   {
-        if(*jobu=='A') matrix(u,*m,*m,*ldu)           = svd.matrixU();
-  else  if(*jobu=='S') matrix(u,*m,diag_size,*ldu)    = svd.matrixU();
-  else  if(*jobu=='O') matrix(a,*m,diag_size,*lda)    = svd.matrixU();
+    if (*jobu == 'A')
+      matrix(u, *m, *m, *ldu) = svd.matrixU();
+    else if (*jobu == 'S')
+      matrix(u, *m, diag_size, *ldu) = svd.matrixU();
+    else if (*jobu == 'O')
+      matrix(a, *m, diag_size, *lda) = svd.matrixU();
   }
   {
-        if(*jobv=='A') matrix(vt,*n,*n,*ldvt)         = svd.matrixV().adjoint();
-  else  if(*jobv=='S') matrix(vt,diag_size,*n,*ldvt)  = svd.matrixV().adjoint();
-  else  if(*jobv=='O') matrix(a,diag_size,*n,*lda)    = svd.matrixV().adjoint();
+    if (*jobv == 'A')
+      matrix(vt, *n, *n, *ldvt) = svd.matrixV().adjoint();
+    else if (*jobv == 'S')
+      matrix(vt, diag_size, *n, *ldvt) = svd.matrixV().adjoint();
+    else if (*jobv == 'O')
+      matrix(a, diag_size, *n, *lda) = svd.matrixV().adjoint();
   }
-  return 0;
-}
\ No newline at end of file
+}
+
+#undef EIGEN_LAPACK_ARG_IF_COMPLEX
\ No newline at end of file

diff --git a/test/AnnoyingScalar.h b/test/AnnoyingScalar.h
index b4f77ad..637fdbf 100644
--- a/test/AnnoyingScalar.h
+++ b/test/AnnoyingScalar.h

@@ -183,6 +183,20 @@
 EIGEN_STRONG_INLINE float cast(const AnnoyingScalar& x) {
   return *x.v;
 }
+
+template <>
+struct random_impl<AnnoyingScalar> {
+  using Impl = random_impl<float>;
+  static EIGEN_DEVICE_FUNC inline AnnoyingScalar run(const AnnoyingScalar& x, const AnnoyingScalar& y) {
+    float result = Impl::run(*x.v, *y.v);
+    return AnnoyingScalar(result);
+  }
+  static EIGEN_DEVICE_FUNC inline AnnoyingScalar run() {
+    float result = Impl::run();
+    return AnnoyingScalar(result);
+  }
+};
+
 }  // namespace internal
 }  // namespace Eigen
 

diff --git a/test/MovableScalar.h b/test/MovableScalar.h
index 52591bc..56a873e 100644
--- a/test/MovableScalar.h
+++ b/test/MovableScalar.h

@@ -28,6 +28,23 @@
 
 template <>
 struct NumTraits<MovableScalar<float>> : GenericNumTraits<float> {};
+
+namespace internal {
+template <typename T>
+struct random_impl<MovableScalar<T>> {
+  using MoveableT = MovableScalar<T>;
+  using Impl = random_impl<T>;
+  static EIGEN_DEVICE_FUNC inline MoveableT run(const MoveableT& x, const MoveableT& y) {
+    T result = Impl::run(x, y);
+    return MoveableT(result);
+  }
+  static EIGEN_DEVICE_FUNC inline MoveableT run() {
+    T result = Impl::run();
+    return MoveableT(result);
+  }
+};
+}  // namespace internal
+
 }  // namespace Eigen
 
 #endif

diff --git a/test/SafeScalar.h b/test/SafeScalar.h
index 2079af4..4f4da56 100644
--- a/test/SafeScalar.h
+++ b/test/SafeScalar.h

@@ -26,3 +26,21 @@
   T val_;
   bool initialized_;
 };
+
+namespace Eigen {
+namespace internal {
+template <typename T>
+struct random_impl<SafeScalar<T>> {
+  using SafeT = SafeScalar<T>;
+  using Impl = random_impl<T>;
+  static EIGEN_DEVICE_FUNC inline SafeT run(const SafeT& x, const SafeT& y) {
+    T result = Impl::run(x, y);
+    return SafeT(result);
+  }
+  static EIGEN_DEVICE_FUNC inline SafeT run() {
+    T result = Impl::run();
+    return SafeT(result);
+  }
+};
+}  // namespace internal
+}  // namespace Eigen

diff --git a/test/array_for_matrix.cpp b/test/array_for_matrix.cpp
index afe6894..237ac67 100644
--- a/test/array_for_matrix.cpp
+++ b/test/array_for_matrix.cpp

@@ -19,12 +19,18 @@
   Index cols = m.cols();
 
   MatrixType m1 = MatrixType::Random(rows, cols), m2 = MatrixType::Random(rows, cols), m3(rows, cols);
-
   ColVectorType cv1 = ColVectorType::Random(rows);
   RowVectorType rv1 = RowVectorType::Random(cols);
 
   Scalar s1 = internal::random<Scalar>(), s2 = internal::random<Scalar>();
 
+  // Prevent overflows for integer types.
+  if (Eigen::NumTraits<Scalar>::IsInteger) {
+    Scalar kMaxVal = Scalar(10000);
+    m1.array() = m1.array() - kMaxVal * (m1.array() / kMaxVal);
+    m2.array() = m2.array() - kMaxVal * (m2.array() / kMaxVal);
+  }
+
   // scalar addition
   VERIFY_IS_APPROX(m1.array() + s1, s1 + m1.array());
   VERIFY_IS_APPROX((m1.array() + s1).matrix(), MatrixType::Constant(rows, cols, s1) + m1);

diff --git a/test/bdcsvd.cpp b/test/bdcsvd.cpp
index c51f354..3ba4cb7 100644
--- a/test/bdcsvd.cpp
+++ b/test/bdcsvd.cpp

@@ -77,6 +77,14 @@
   svd_verify_constructor_options_assert<BDCSVD<MatrixType>>(input);
 }
 
+template <typename MatrixType>
+void bdcsvd_check_convergence(const MatrixType& input) {
+  BDCSVD<MatrixType, Eigen::ComputeThinU | Eigen::ComputeThinV> svd(input);
+  VERIFY(svd.info() == Eigen::Success);
+  MatrixType D = svd.matrixU() * svd.singularValues().asDiagonal() * svd.matrixV().transpose();
+  VERIFY_IS_APPROX(input, D);
+}
+
 EIGEN_DECLARE_TEST(bdcsvd) {
   CALL_SUBTEST_1((bdcsvd_verify_assert<Matrix3f>()));
   CALL_SUBTEST_2((bdcsvd_verify_assert<Matrix4d>()));
@@ -163,4 +171,7 @@
   // With total deflation issues before, when it shouldn't be triggered.
   CALL_SUBTEST_47((compare_bdc_jacobi_instance(true, 3)));
   CALL_SUBTEST_48((compare_bdc_jacobi_instance(false, 3)));
+
+  // Convergence for large constant matrix (https://gitlab.com/libeigen/eigen/-/issues/2491)
+  CALL_SUBTEST_49(bdcsvd_check_convergence<MatrixXf>(MatrixXf::Constant(500, 500, 1)));
 }

diff --git a/test/cholmod_support.cpp b/test/cholmod_support.cpp
index 24126a0..0666749 100644
--- a/test/cholmod_support.cpp
+++ b/test/cholmod_support.cpp

@@ -54,6 +54,13 @@
   check_sparse_spd_determinant(llt_colmajor_upper);
   check_sparse_spd_determinant(ldlt_colmajor_lower);
   check_sparse_spd_determinant(ldlt_colmajor_upper);
+
+  check_sparse_zero_matrix(chol_colmajor_lower);
+  check_sparse_zero_matrix(chol_colmajor_upper);
+  check_sparse_zero_matrix(llt_colmajor_lower);
+  check_sparse_zero_matrix(llt_colmajor_upper);
+  check_sparse_zero_matrix(ldlt_colmajor_lower);
+  check_sparse_zero_matrix(ldlt_colmajor_upper);
 }
 
 template <typename T, int flags, typename IdxType>

diff --git a/test/geo_alignedbox.cpp b/test/geo_alignedbox.cpp
index 266e58b..f335b34 100644
--- a/test/geo_alignedbox.cpp
+++ b/test/geo_alignedbox.cpp

@@ -36,10 +36,10 @@
 
   const Index dim = box.dim();
 
-  VectorType p0 = VectorType::Random(dim);
-  VectorType p1 = VectorType::Random(dim);
+  VectorType p0 = VectorType::Random(dim) / Scalar(2);
+  VectorType p1 = VectorType::Random(dim) / Scalar(2);
   while (p1 == p0) {
-    p1 = VectorType::Random(dim);
+    p1 = VectorType::Random(dim) / Scalar(2);
   }
   RealScalar s1 = internal::random<RealScalar>(0, 1);
 
@@ -216,7 +216,7 @@
 Matrix<Scalar, Dim, (1 << Dim)> boxGetCorners(const Matrix<Scalar, Dim, 1>& min_, const Matrix<Scalar, Dim, 1>& max_) {
   Matrix<Scalar, Dim, (1 << Dim)> result;
   for (Index i = 0; i < (1 << Dim); ++i) {
-    for (Index j = 0; j < Dim; ++j) result(j, i) = (i & (1 << j)) ? min_(j) : max_(j);
+    for (Index j = 0; j < Dim; ++j) result(j, i) = (i & (Index(1) << j)) ? min_(j) : max_(j);
   }
   return result;
 }

diff --git a/test/indexed_view.cpp b/test/indexed_view.cpp
index 494e0d6..4040448 100644
--- a/test/indexed_view.cpp
+++ b/test/indexed_view.cpp

@@ -7,8 +7,8 @@
 // Public License v. 2.0. If a copy of the MPL was not distributed
 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
 
-#include <valarray>
 #include <vector>
+
 #include "main.h"
 
 using Eigen::placeholders::all;
@@ -17,11 +17,13 @@
 using Eigen::placeholders::lastp1;
 #include <array>
 
+namespace test {
 typedef std::pair<Index, Index> IndexPair;
+}
 
 int encode(Index i, Index j) { return int(i * 100 + j); }
 
-IndexPair decode(Index ij) { return IndexPair(ij / 100, ij % 100); }
+test::IndexPair decode(Index ij) { return test::IndexPair(ij / 100, ij % 100); }
 
 template <typename T>
 bool match(const T& xpr, std::string ref, std::string str_xpr = "") {
@@ -69,12 +71,10 @@
   ArrayXXi A = ArrayXXi::NullaryExpr(n, n, std::ref(encode));
 
   for (Index i = 0; i < n; ++i)
-    for (Index j = 0; j < n; ++j) VERIFY(decode(A(i, j)) == IndexPair(i, j));
+    for (Index j = 0; j < n; ++j) VERIFY(decode(A(i, j)) == test::IndexPair(i, j));
 
   Array4i eii(4);
   eii << 3, 1, 6, 5;
-  std::valarray<int> vali(4);
-  Map<ArrayXi>(&vali[0], 4) = eii;
   std::vector<int> veci(4);
   Map<ArrayXi>(veci.data(), 4) = eii;
 

diff --git a/test/main.h b/test/main.h
index 771725f..bce6736 100644
--- a/test/main.h
+++ b/test/main.h

@@ -176,11 +176,6 @@
 #define DEBUG
 #endif
 
-// bounds integer values for AltiVec
-#if defined(__ALTIVEC__) || defined(__VSX__)
-#define EIGEN_MAKING_DOCS
-#endif
-
 #define DEFAULT_REPEAT 10
 
 namespace Eigen {

diff --git a/test/packetmath.cpp b/test/packetmath.cpp
index b2cca73..bf2970c 100644
--- a/test/packetmath.cpp
+++ b/test/packetmath.cpp

@@ -382,12 +382,6 @@
   }
 };
 
-// Packet16b representing bool does not support ptrue, pandnot or pcmp_eq, since
-// the scalar path (for some compilers) compute the bitwise and with 0x1 of the
-// results to keep the value in [0,1].
-template <>
-void packetmath_boolean_mask_ops<bool, internal::packet_traits<bool>::type>() {}
-
 template <typename Scalar, typename Packet, typename EnableIf = void>
 struct packetmath_minus_zero_add_test {
   static void run() {}
@@ -538,7 +532,7 @@
   CHECK_CWISE2_IF(PacketTraits::HasMul, REF_MUL, internal::pmul);
   CHECK_CWISE2_IF(PacketTraits::HasDiv, REF_DIV, internal::pdiv);
 
-  CHECK_CWISE1_IF(PacketTraits::HasNegate, internal::negate, internal::pnegate);
+  CHECK_CWISE1_IF(PacketTraits::HasNegate, test::negate, internal::pnegate);
   CHECK_CWISE1_IF(PacketTraits::HasReciprocal, REF_RECIPROCAL, internal::preciprocal);
   CHECK_CWISE1(numext::conj, internal::pconj);
   CHECK_CWISE1_IF(PacketTraits::HasSign, numext::sign, internal::psign);
@@ -662,11 +656,11 @@
   {
     for (int i = 0; i < PacketSize; ++i) {
       // "if" mask
-      unsigned char v = internal::random<bool>() ? 0xff : 0;
-      char* bytes = (char*)(data1 + i);
-      for (int k = 0; k < int(sizeof(Scalar)); ++k) {
-        bytes[k] = v;
-      }
+      // Note: it's UB to load 0xFF directly into a `bool`.
+      uint8_t v =
+          internal::random<bool>() ? (std::is_same<Scalar, bool>::value ? static_cast<uint8_t>(true) : 0xff) : 0;
+      // Avoid strict aliasing violation by using memset.
+      memset(static_cast<void*>(data1 + i), v, sizeof(Scalar));
       // "then" packet
       data1[i + PacketSize] = internal::random<Scalar>();
       // "else" packet
@@ -1141,7 +1135,7 @@
 
       data1[0] = -Scalar(0.);
       h.store(data2, internal::psin(h.load(data1)));
-      VERIFY(internal::biteq(data2[0], data1[0]));
+      VERIFY(test::biteq(data2[0], data1[0]));
       h.store(data2, internal::pcos(h.load(data1)));
       VERIFY_IS_EQUAL(data2[0], Scalar(1));
     }
@@ -1220,6 +1214,15 @@
   CHECK_CWISE2_IF(PacketTraits::HasMin, propagate_number_min, internal::pmin<PropagateNumbers>);
   CHECK_CWISE2_IF(PacketTraits::HasMax, propagate_number_max, internal::pmax<PropagateNumbers>);
   CHECK_CWISE1(numext::abs, internal::pabs);
+  // Vectorized versions may give a different result in the case of signed int overflow,
+  // which is undefined behavior (e.g. NEON).
+  // Also note that unsigned integers with size < sizeof(int) may be implicitly converted to a signed
+  // int, which can also trigger UB.
+  if (Eigen::NumTraits<Scalar>::IsInteger) {
+    for (int i = 0; i < 2 * PacketSize; ++i) {
+      data1[i] = data1[i] / Scalar(2);
+    }
+  }
   CHECK_CWISE2_IF(PacketTraits::HasAbsDiff, REF_ABS_DIFF, internal::pabsdiff);
 
   ref[0] = data1[0];

diff --git a/test/packetmath_test_shared.h b/test/packetmath_test_shared.h
index a4bb347..86a01fb 100644
--- a/test/packetmath_test_shared.h
+++ b/test/packetmath_test_shared.h

@@ -19,7 +19,8 @@
 bool g_first_pass = true;
 
 namespace Eigen {
-namespace internal {
+
+namespace test {
 
 template <typename T>
 T negate(const T& x) {
@@ -31,50 +32,11 @@
   return Map<const Array<unsigned char, sizeof(T), 1> >(reinterpret_cast<const unsigned char*>(&x));
 }
 
-// The following implement bitwise operations on floating point types
-template <typename T, typename Bits, typename Func>
-T apply_bit_op(Bits a, Bits b, Func f) {
-  Array<unsigned char, sizeof(T), 1> data;
-  T res;
-  for (Index i = 0; i < data.size(); ++i) data[i] = f(a[i], b[i]);
-  // Note: The reinterpret_cast works around GCC's class-memaccess warnings:
-  std::memcpy(reinterpret_cast<unsigned char*>(&res), data.data(), sizeof(T));
-  return res;
-}
-
-#define EIGEN_TEST_MAKE_BITWISE2(OP, FUNC, T)       \
-  template <>                                       \
-  T EIGEN_CAT(p, OP)(const T& a, const T& b) {      \
-    return apply_bit_op<T>(bits(a), bits(b), FUNC); \
-  }
-
-#define EIGEN_TEST_MAKE_BITWISE(OP, FUNC)                 \
-  EIGEN_TEST_MAKE_BITWISE2(OP, FUNC, float)               \
-  EIGEN_TEST_MAKE_BITWISE2(OP, FUNC, double)              \
-  EIGEN_TEST_MAKE_BITWISE2(OP, FUNC, half)                \
-  EIGEN_TEST_MAKE_BITWISE2(OP, FUNC, bfloat16)            \
-  EIGEN_TEST_MAKE_BITWISE2(OP, FUNC, std::complex<float>) \
-  EIGEN_TEST_MAKE_BITWISE2(OP, FUNC, std::complex<double>)
-
-EIGEN_TEST_MAKE_BITWISE(xor, std::bit_xor<unsigned char>())
-EIGEN_TEST_MAKE_BITWISE(and, std::bit_and<unsigned char>())
-EIGEN_TEST_MAKE_BITWISE(or, std::bit_or<unsigned char>())
-struct bit_andnot {
-  template <typename T>
-  T operator()(T a, T b) const {
-    return a & (~b);
-  }
-};
-EIGEN_TEST_MAKE_BITWISE(andnot, bit_andnot())
 template <typename T>
 bool biteq(T a, T b) {
   return (bits(a) == bits(b)).all();
 }
 
-}  // namespace internal
-
-namespace test {
-
 // NOTE: we disable inlining for this function to workaround a GCC issue when using -O3 and the i387 FPU.
 template <typename Scalar>
 EIGEN_DONT_INLINE bool isApproxAbs(const Scalar& a, const Scalar& b, const typename NumTraits<Scalar>::Real& refvalue) {

diff --git a/test/product.h b/test/product.h
index 8d46846..74f01b0 100644
--- a/test/product.h
+++ b/test/product.h

@@ -53,7 +53,7 @@
                  MatrixType::Flags & RowMajorBit ? ColMajor : RowMajor>
       OtherMajorMatrixType;
 
-  // Wwe want a tighter epsilon for not-approx tests.  Otherwise, for certain
+  // We want a tighter epsilon for not-approx tests.  Otherwise, for certain
   // low-precision types (e.g. bfloat16), the bound ends up being relatively large
   // (e.g. 0.12), causing flaky tests.
   RealScalar not_approx_epsilon = RealScalar(0.1) * NumTraits<RealScalar>::dummy_precision();
@@ -69,6 +69,15 @@
   ColSquareMatrixType square2 = ColSquareMatrixType::Random(cols, cols), res2 = ColSquareMatrixType::Random(cols, cols);
   RowVectorType v1 = RowVectorType::Random(rows);
   ColVectorType vc2 = ColVectorType::Random(cols), vcres(cols);
+
+  // Prevent overflows for integer types.
+  if (Eigen::NumTraits<Scalar>::IsInteger) {
+    Scalar kMaxVal = Scalar(10000);
+    m1.array() = m1.array() - kMaxVal * (m1.array() / kMaxVal);
+    m2.array() = m2.array() - kMaxVal * (m2.array() / kMaxVal);
+    v1.array() = v1.array() - kMaxVal * (v1.array() / kMaxVal);
+  }
+
   OtherMajorMatrixType tm1 = m1;
 
   Scalar s1 = internal::random<Scalar>();

diff --git a/test/rand.cpp b/test/rand.cpp
index 2b193ff..b5cf801 100644
--- a/test/rand.cpp
+++ b/test/rand.cpp

@@ -7,12 +7,14 @@
 // Public License v. 2.0. If a copy of the MPL was not distributed
 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
 
+#include <cstdlib>
 #include "main.h"
 
-typedef long long int64;
-
+// For GCC-6, if this function is inlined then there seems to be an optimization
+// bug that triggers a failure.  This failure goes away if you access `r` in
+// in any way, and for any other compiler.
 template <typename Scalar>
-Scalar check_in_range(Scalar x, Scalar y) {
+EIGEN_DONT_INLINE Scalar check_in_range(Scalar x, Scalar y) {
   Scalar r = internal::random<Scalar>(x, y);
   VERIFY(r >= x);
   if (y >= x) {
@@ -25,8 +27,8 @@
 void check_all_in_range(Scalar x, Scalar y) {
   Array<int, 1, Dynamic> mask(y - x + 1);
   mask.fill(0);
-  long n = (y - x + 1) * 32;
-  for (long k = 0; k < n; ++k) {
+  int64_t n = (y - x + 1) * 32;
+  for (int64_t k = 0; k < n; ++k) {
     mask(check_in_range(x, y) - x)++;
   }
   for (Index i = 0; i < mask.size(); ++i)
@@ -34,82 +36,203 @@
   VERIFY((mask > 0).all());
 }
 
+template <typename Scalar, typename EnableIf = void>
+class HistogramHelper {
+ public:
+  HistogramHelper(int nbins) : HistogramHelper(Scalar(-1), Scalar(1), nbins) {}
+  HistogramHelper(Scalar lower, Scalar upper, int nbins) {
+    lower_ = static_cast<double>(lower);
+    upper_ = static_cast<double>(upper);
+    num_bins_ = nbins;
+    bin_width_ = (upper_ - lower_) / static_cast<double>(nbins);
+  }
+  int bin(Scalar v) {
+    double result = (static_cast<double>(v) - lower_) / bin_width_;
+    return std::min<int>(static_cast<int>(result), num_bins_ - 1);
+  }
+
+  double uniform_bin_probability(int bin) {
+    double range = upper_ - lower_;
+    if (bin < num_bins_ - 1) {
+      return bin_width_ / range;
+    }
+    return (upper_ - (lower_ + double(bin) * bin_width_)) / range;
+  }
+
+ private:
+  double lower_;
+  double upper_;
+  int num_bins_;
+  double bin_width_;
+};
+
+template <typename Scalar>
+class HistogramHelper<Scalar, std::enable_if_t<Eigen::NumTraits<Scalar>::IsInteger>> {
+ public:
+  using RangeType = typename Eigen::internal::make_unsigned<Scalar>::type;
+  HistogramHelper(int nbins)
+      : HistogramHelper(Eigen::NumTraits<Scalar>::lowest(), Eigen::NumTraits<Scalar>::highest(), nbins) {}
+  HistogramHelper(Scalar lower, Scalar upper, int nbins)
+      : lower_{lower}, upper_{upper}, num_bins_{nbins}, bin_width_{bin_width(lower, upper, nbins)} {}
+
+  int bin(Scalar v) { return static_cast<int>(RangeType(v - lower_) / bin_width_); }
+
+  double uniform_bin_probability(int bin) {
+    // Avoid overflow in computing range.
+    double range = static_cast<double>(RangeType(upper_ - lower_)) + 1.0;
+    if (bin < num_bins_ - 1) {
+      return static_cast<double>(bin_width_) / range;
+    }
+    return static_cast<double>(RangeType(upper_) - RangeType((lower_ + bin * bin_width_)) + 1) / range;
+  }
+
+ private:
+  static constexpr Scalar bin_width(Scalar lower, Scalar upper, int nbins) {
+    // Avoid overflow in computing the full range.
+    return RangeType(upper - nbins - lower + 1) / nbins + 1;
+  }
+
+  Scalar lower_;
+  Scalar upper_;
+  int num_bins_;
+  Scalar bin_width_;
+};
+
 template <typename Scalar>
 void check_histogram(Scalar x, Scalar y, int bins) {
-  Array<int, 1, Dynamic> hist(bins);
-  hist.fill(0);
-  int f = 100000;
-  int n = bins * f;
-  int64 range = int64(y) - int64(x);
-  int divisor = int((range + 1) / bins);
-  assert(((range + 1) % bins) == 0);
-  for (int k = 0; k < n; ++k) {
+  Eigen::VectorXd hist = Eigen::VectorXd::Zero(bins);
+  HistogramHelper<Scalar> hist_helper(x, y, bins);
+  int64_t n = static_cast<int64_t>(bins) * 10000;  // Approx 10000 per bin.
+  for (int64_t k = 0; k < n; ++k) {
     Scalar r = check_in_range(x, y);
-    hist(int((int64(r) - int64(x)) / divisor))++;
+    int bin = hist_helper.bin(r);
+    hist(bin)++;
   }
-  VERIFY((((hist.cast<double>() / double(f)) - 1.0).abs() < 0.03).all());
+  // Normalize bins by probability.
+  for (int i = 0; i < bins; ++i) {
+    hist(i) = hist(i) / n / hist_helper.uniform_bin_probability(i);
+  }
+  VERIFY(((hist.array() - 1.0).abs() < 0.05).all());
+}
+
+template <typename Scalar>
+void check_histogram(int bins) {
+  Eigen::VectorXd hist = Eigen::VectorXd::Zero(bins);
+  HistogramHelper<Scalar> hist_helper(bins);
+  int64_t n = static_cast<int64_t>(bins) * 10000;  // Approx 10000 per bin.
+  for (int64_t k = 0; k < n; ++k) {
+    Scalar r = Eigen::internal::random<Scalar>();
+    int bin = hist_helper.bin(r);
+    hist(bin)++;
+  }
+  // Normalize bins by probability.
+  for (int i = 0; i < bins; ++i) {
+    hist(i) = hist(i) / n / hist_helper.uniform_bin_probability(i);
+  }
+  VERIFY(((hist.array() - 1.0).abs() < 0.05).all());
 }
 
 EIGEN_DECLARE_TEST(rand) {
-  long long_ref = NumTraits<long>::highest() / 10;
+  int64_t int64_ref = NumTraits<int64_t>::highest() / 10;
   // the minimum guarantees that these conversions are safe
-  auto char_offset = static_cast<signed char>((std::min)(g_repeat, 64));
-  auto short_offset = static_cast<signed short>((std::min)(g_repeat, 8000));
+  int8_t int8t_offset = static_cast<int8_t>((std::min)(g_repeat, 64));
+  int16_t int16t_offset = static_cast<int16_t>((std::min)(g_repeat, 8000));
+  EIGEN_UNUSED_VARIABLE(int64_ref);
+  EIGEN_UNUSED_VARIABLE(int8t_offset);
+  EIGEN_UNUSED_VARIABLE(int16t_offset);
 
   for (int i = 0; i < g_repeat * 10000; i++) {
-    CALL_SUBTEST(check_in_range<float>(10, 11));
-    CALL_SUBTEST(check_in_range<float>(1.24234523f, 1.24234523f));
-    CALL_SUBTEST(check_in_range<float>(-1, 1));
-    CALL_SUBTEST(check_in_range<float>(-1432.2352f, -1432.2352f));
+    CALL_SUBTEST_1(check_in_range<float>(10.0f, 11.0f));
+    CALL_SUBTEST_1(check_in_range<float>(1.24234523f, 1.24234523f));
+    CALL_SUBTEST_1(check_in_range<float>(-1.0f, 1.0f));
+    CALL_SUBTEST_1(check_in_range<float>(-1432.2352f, -1432.2352f));
 
-    CALL_SUBTEST(check_in_range<double>(10, 11));
-    CALL_SUBTEST(check_in_range<double>(1.24234523, 1.24234523));
-    CALL_SUBTEST(check_in_range<double>(-1, 1));
-    CALL_SUBTEST(check_in_range<double>(-1432.2352, -1432.2352));
+    CALL_SUBTEST_2(check_in_range<double>(10.0, 11.0));
+    CALL_SUBTEST_2(check_in_range<double>(1.24234523, 1.24234523));
+    CALL_SUBTEST_2(check_in_range<double>(-1.0, 1.0));
+    CALL_SUBTEST_2(check_in_range<double>(-1432.2352, -1432.2352));
 
-    CALL_SUBTEST(check_in_range<int>(0, -1));
-    CALL_SUBTEST(check_in_range<short>(0, -1));
-    CALL_SUBTEST(check_in_range<long>(0, -1));
-    CALL_SUBTEST(check_in_range<int>(-673456, 673456));
-    CALL_SUBTEST(check_in_range<int>(-RAND_MAX + 10, RAND_MAX - 10));
-    CALL_SUBTEST(check_in_range<short>(-24345, 24345));
-    CALL_SUBTEST(check_in_range<long>(-long_ref, long_ref));
+    CALL_SUBTEST_3(check_in_range<long double>(10.0L, 11.0L));
+    CALL_SUBTEST_3(check_in_range<long double>(1.24234523L, 1.24234523L));
+    CALL_SUBTEST_3(check_in_range<long double>(-1.0L, 1.0L));
+    CALL_SUBTEST_3(check_in_range<long double>(-1432.2352L, -1432.2352L));
+
+    CALL_SUBTEST_4(check_in_range<half>(half(10.0f), half(11.0f)));
+    CALL_SUBTEST_4(check_in_range<half>(half(1.24234523f), half(1.24234523f)));
+    CALL_SUBTEST_4(check_in_range<half>(half(-1.0f), half(1.0f)));
+    CALL_SUBTEST_4(check_in_range<half>(half(-1432.2352f), half(-1432.2352f)));
+
+    CALL_SUBTEST_5(check_in_range<bfloat16>(bfloat16(10.0f), bfloat16(11.0f)));
+    CALL_SUBTEST_5(check_in_range<bfloat16>(bfloat16(1.24234523f), bfloat16(1.24234523f)));
+    CALL_SUBTEST_5(check_in_range<bfloat16>(bfloat16(-1.0f), bfloat16(1.0f)));
+    CALL_SUBTEST_5(check_in_range<bfloat16>(bfloat16(-1432.2352f), bfloat16(-1432.2352f)));
+
+    CALL_SUBTEST_6(check_in_range<int32_t>(0, -1));
+    CALL_SUBTEST_6(check_in_range<int16_t>(0, -1));
+    CALL_SUBTEST_6(check_in_range<int64_t>(0, -1));
+    CALL_SUBTEST_6(check_in_range<int32_t>(-673456, 673456));
+    CALL_SUBTEST_6(check_in_range<int32_t>(-RAND_MAX + 10, RAND_MAX - 10));
+    CALL_SUBTEST_6(check_in_range<int16_t>(-24345, 24345));
+    CALL_SUBTEST_6(check_in_range<int64_t>(-int64_ref, int64_ref));
   }
 
-  CALL_SUBTEST(check_all_in_range<signed char>(11, 11));
-  CALL_SUBTEST(check_all_in_range<signed char>(11, 11 + char_offset));
-  CALL_SUBTEST(check_all_in_range<signed char>(-5, 5));
-  CALL_SUBTEST(check_all_in_range<signed char>(-11 - char_offset, -11));
-  CALL_SUBTEST(check_all_in_range<signed char>(-126, -126 + char_offset));
-  CALL_SUBTEST(check_all_in_range<signed char>(126 - char_offset, 126));
-  CALL_SUBTEST(check_all_in_range<signed char>(-126, 126));
+  CALL_SUBTEST_7(check_all_in_range<int8_t>(11, 11));
+  CALL_SUBTEST_7(check_all_in_range<int8_t>(11, 11 + int8t_offset));
+  CALL_SUBTEST_7(check_all_in_range<int8_t>(-5, 5));
+  CALL_SUBTEST_7(check_all_in_range<int8_t>(-11 - int8t_offset, -11));
+  CALL_SUBTEST_7(check_all_in_range<int8_t>(-126, -126 + int8t_offset));
+  CALL_SUBTEST_7(check_all_in_range<int8_t>(126 - int8t_offset, 126));
+  CALL_SUBTEST_7(check_all_in_range<int8_t>(-126, 126));
 
-  CALL_SUBTEST(check_all_in_range<short>(11, 11));
-  CALL_SUBTEST(check_all_in_range<short>(11, 11 + short_offset));
-  CALL_SUBTEST(check_all_in_range<short>(-5, 5));
-  CALL_SUBTEST(check_all_in_range<short>(-11 - short_offset, -11));
-  CALL_SUBTEST(check_all_in_range<short>(-24345, -24345 + short_offset));
-  CALL_SUBTEST(check_all_in_range<short>(24345, 24345 + short_offset));
+  CALL_SUBTEST_8(check_all_in_range<int16_t>(11, 11));
+  CALL_SUBTEST_8(check_all_in_range<int16_t>(11, 11 + int16t_offset));
+  CALL_SUBTEST_8(check_all_in_range<int16_t>(-5, 5));
+  CALL_SUBTEST_8(check_all_in_range<int16_t>(-11 - int16t_offset, -11));
+  CALL_SUBTEST_8(check_all_in_range<int16_t>(-24345, -24345 + int16t_offset));
+  CALL_SUBTEST_8(check_all_in_range<int16_t>(24345, 24345 + int16t_offset));
 
-  CALL_SUBTEST(check_all_in_range<int>(11, 11));
-  CALL_SUBTEST(check_all_in_range<int>(11, 11 + g_repeat));
-  CALL_SUBTEST(check_all_in_range<int>(-5, 5));
-  CALL_SUBTEST(check_all_in_range<int>(-11 - g_repeat, -11));
-  CALL_SUBTEST(check_all_in_range<int>(-673456, -673456 + g_repeat));
-  CALL_SUBTEST(check_all_in_range<int>(673456, 673456 + g_repeat));
+  CALL_SUBTEST_9(check_all_in_range<int32_t>(11, 11));
+  CALL_SUBTEST_9(check_all_in_range<int32_t>(11, 11 + g_repeat));
+  CALL_SUBTEST_9(check_all_in_range<int32_t>(-5, 5));
+  CALL_SUBTEST_9(check_all_in_range<int32_t>(-11 - g_repeat, -11));
+  CALL_SUBTEST_9(check_all_in_range<int32_t>(-673456, -673456 + g_repeat));
+  CALL_SUBTEST_9(check_all_in_range<int32_t>(673456, 673456 + g_repeat));
 
-  CALL_SUBTEST(check_all_in_range<long>(11, 11));
-  CALL_SUBTEST(check_all_in_range<long>(11, 11 + g_repeat));
-  CALL_SUBTEST(check_all_in_range<long>(-5, 5));
-  CALL_SUBTEST(check_all_in_range<long>(-11 - g_repeat, -11));
-  CALL_SUBTEST(check_all_in_range<long>(-long_ref, -long_ref + g_repeat));
-  CALL_SUBTEST(check_all_in_range<long>(long_ref, long_ref + g_repeat));
+  CALL_SUBTEST_10(check_all_in_range<int64_t>(11, 11));
+  CALL_SUBTEST_10(check_all_in_range<int64_t>(11, 11 + g_repeat));
+  CALL_SUBTEST_10(check_all_in_range<int64_t>(-5, 5));
+  CALL_SUBTEST_10(check_all_in_range<int64_t>(-11 - g_repeat, -11));
+  CALL_SUBTEST_10(check_all_in_range<int64_t>(-int64_ref, -int64_ref + g_repeat));
+  CALL_SUBTEST_10(check_all_in_range<int64_t>(int64_ref, int64_ref + g_repeat));
 
-  CALL_SUBTEST(check_histogram<int>(-5, 5, 11));
+  CALL_SUBTEST_11(check_histogram<int32_t>(-5, 5, 11));
   int bins = 100;
-  CALL_SUBTEST(check_histogram<int>(-3333, -3333 + bins * (3333 / bins) - 1, bins));
+  EIGEN_UNUSED_VARIABLE(bins)
+  CALL_SUBTEST_11(check_histogram<int32_t>(-3333, -3333 + bins * (3333 / bins) - 1, bins));
   bins = 1000;
-  CALL_SUBTEST(check_histogram<int>(-RAND_MAX + 10, -RAND_MAX + 10 + bins * (RAND_MAX / bins) - 1, bins));
-  CALL_SUBTEST(
-      check_histogram<int>(-RAND_MAX + 10, -int64(RAND_MAX) + 10 + bins * (2 * int64(RAND_MAX) / bins) - 1, bins));
+  CALL_SUBTEST_11(check_histogram<int32_t>(-RAND_MAX + 10, -RAND_MAX + 10 + bins * (RAND_MAX / bins) - 1, bins));
+  CALL_SUBTEST_11(check_histogram<int32_t>(-RAND_MAX + 10,
+                                           -int64_t(RAND_MAX) + 10 + bins * (2 * int64_t(RAND_MAX) / bins) - 1, bins));
+
+  CALL_SUBTEST_12(check_histogram<uint8_t>(/*bins=*/16));
+  CALL_SUBTEST_12(check_histogram<uint16_t>(/*bins=*/1024));
+  CALL_SUBTEST_12(check_histogram<uint32_t>(/*bins=*/1024));
+  CALL_SUBTEST_12(check_histogram<uint64_t>(/*bins=*/1024));
+
+  CALL_SUBTEST_13(check_histogram<int8_t>(/*bins=*/16));
+  CALL_SUBTEST_13(check_histogram<int16_t>(/*bins=*/1024));
+  CALL_SUBTEST_13(check_histogram<int32_t>(/*bins=*/1024));
+  CALL_SUBTEST_13(check_histogram<int64_t>(/*bins=*/1024));
+
+  CALL_SUBTEST_14(check_histogram<float>(-10.0f, 10.0f, /*bins=*/1024));
+  CALL_SUBTEST_14(check_histogram<double>(-10.0, 10.0, /*bins=*/1024));
+  CALL_SUBTEST_14(check_histogram<long double>(-10.0L, 10.0L, /*bins=*/1024));
+  CALL_SUBTEST_14(check_histogram<half>(half(-10.0f), half(10.0f), /*bins=*/512));
+  CALL_SUBTEST_14(check_histogram<bfloat16>(bfloat16(-10.0f), bfloat16(10.0f), /*bins=*/64));
+
+  CALL_SUBTEST_15(check_histogram<float>(/*bins=*/1024));
+  CALL_SUBTEST_15(check_histogram<double>(/*bins=*/1024));
+  CALL_SUBTEST_15(check_histogram<long double>(/*bins=*/1024));
+  CALL_SUBTEST_15(check_histogram<half>(/*bins=*/512));
+  CALL_SUBTEST_15(check_histogram<bfloat16>(/*bins=*/64));
 }

diff --git a/test/redux.cpp b/test/redux.cpp
index 8a8138d..42c269a 100644
--- a/test/redux.cpp
+++ b/test/redux.cpp

@@ -24,12 +24,18 @@
 
   MatrixType m1 = MatrixType::Random(rows, cols);
 
-  // The entries of m1 are uniformly distributed in [0,1], so m1.prod() is very small. This may lead to test
+  // The entries of m1 are uniformly distributed in [-1,1), so m1.prod() is very small. This may lead to test
   // failures if we underflow into denormals. Thus, we scale so that entries are close to 1.
   MatrixType m1_for_prod = MatrixType::Ones(rows, cols) + RealScalar(0.2) * m1;
 
   Matrix<Scalar, MatrixType::RowsAtCompileTime, MatrixType::RowsAtCompileTime> m2(rows, rows);
   m2.setRandom();
+  // Prevent overflows for integer types.
+  if (Eigen::NumTraits<Scalar>::IsInteger) {
+    Scalar kMaxVal = Scalar(10000);
+    m1.array() = m1.array() - kMaxVal * (m1.array() / kMaxVal);
+    m2.array() = m2.array() - kMaxVal * (m2.array() / kMaxVal);
+  }
 
   VERIFY_IS_MUCH_SMALLER_THAN(MatrixType::Zero(rows, cols).sum(), Scalar(1));
   VERIFY_IS_APPROX(

diff --git a/test/skew_symmetric_matrix3.cpp b/test/skew_symmetric_matrix3.cpp
index d45262f..48d490d 100644
--- a/test/skew_symmetric_matrix3.cpp
+++ b/test/skew_symmetric_matrix3.cpp

@@ -177,7 +177,7 @@
 
   // rotate around z-axis
   Vector v2;
-  v2 << 0, 0, EIGEN_PI;
+  v2 << 0, 0, Scalar(EIGEN_PI);
   const SquareMatrix r2 = v2.asSkewSymmetric().exponential();
   VERIFY_IS_APPROX(r2 * (Vector() << 1, 0, 0).finished(), (Vector() << -1, 0, 0).finished());
   VERIFY_IS_APPROX(r2 * (Vector() << 0, 1, 0).finished(), (Vector() << 0, -1, 0).finished());
@@ -196,8 +196,8 @@
     CALL_SUBTEST_2(plusMinus<double>());
     CALL_SUBTEST_2(multiplyScale<float>());
     CALL_SUBTEST_2(multiplyScale<double>());
-    CALL_SUBTEST_2(skewSymmetricMultiplication(MatrixXf(3, internal::random<int>(1, EIGEN_TEST_MAX_SIZE))));
-    CALL_SUBTEST_2(skewSymmetricMultiplication(MatrixXd(3, internal::random<int>(1, EIGEN_TEST_MAX_SIZE))));
+    CALL_SUBTEST_2(skewSymmetricMultiplication(MatrixXf(3, internal::random<int>(3, EIGEN_TEST_MAX_SIZE))));
+    CALL_SUBTEST_2(skewSymmetricMultiplication(MatrixXd(3, internal::random<int>(3, EIGEN_TEST_MAX_SIZE))));
     CALL_SUBTEST_2(traceAndDet<float>());
     CALL_SUBTEST_2(traceAndDet<double>());
     CALL_SUBTEST_2(transpose<float>());

diff --git a/test/sparse_solver.h b/test/sparse_solver.h
index e7518e4..033df83 100644
--- a/test/sparse_solver.h
+++ b/test/sparse_solver.h

@@ -484,6 +484,15 @@
   }
 }
 
+template <typename Solver>
+void check_sparse_zero_matrix(Solver& solver) {
+  typedef typename Solver::MatrixType Mat;
+
+  Mat A(1, 1);
+  solver.compute(A);
+  VERIFY_IS_EQUAL(solver.info(), NumericalIssue);
+}
+
 template <typename Solver, typename DenseMat>
 Index generate_sparse_square_problem(Solver&, typename Solver::MatrixType& A, DenseMat& dA, int maxSize = 300,
                                      int options = ForceNonZeroDiag) {

diff --git a/test/stl_iterators.cpp b/test/stl_iterators.cpp
index 7a62673..fee8ef5 100644
--- a/test/stl_iterators.cpp
+++ b/test/stl_iterators.cpp

@@ -463,6 +463,13 @@
   // check rows/cols iterators with STL algorithms
   {
     RowVectorType row = RowVectorType::Random(cols);
+    VectorType col = VectorType::Random(rows);
+    // Prevent overflows for integer types.
+    if (Eigen::NumTraits<Scalar>::IsInteger) {
+      Scalar kMaxVal = Scalar(1000);
+      row.array() = row.array() - kMaxVal * (row.array() / kMaxVal);
+      col.array() = col.array() - kMaxVal * (col.array() / kMaxVal);
+    }
     A.rowwise() = row;
     VERIFY(std::all_of(A.rowwise().begin(), A.rowwise().end(), [&row](typename ColMatrixType::RowXpr x) {
       return internal::isApprox(x.squaredNorm(), row.squaredNorm());
@@ -471,7 +478,6 @@
       return internal::isApprox(x.squaredNorm(), row.squaredNorm());
     }));
 
-    VectorType col = VectorType::Random(rows);
     A.colwise() = col;
     VERIFY(std::all_of(A.colwise().begin(), A.colwise().end(), [&col](typename ColMatrixType::ColXpr x) {
       return internal::isApprox(x.squaredNorm(), col.squaredNorm());

diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h b/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h
index 9375398..f9f07d4 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h

@@ -1007,13 +1007,14 @@
     #include EIGEN_READONLY_TENSORBASE_PLUGIN
     #endif
 
+    EIGEN_DEVICE_FUNC
+    EIGEN_STRONG_INLINE const Derived& derived() const { return *static_cast<const Derived*>(this); }
+
   protected:
     template <typename Scalar, int NumIndices, int Options, typename IndexType> friend class Tensor;
     template <typename Scalar, typename Dimensions, int Option, typename IndexTypes> friend class TensorFixedSize;
     // the Eigen:: prefix is required to workaround a compilation issue with nvcc 9.0
     template <typename OtherDerived, int AccessLevel> friend class Eigen::TensorBase;
-    EIGEN_DEVICE_FUNC
-    EIGEN_STRONG_INLINE const Derived& derived() const { return *static_cast<const Derived*>(this); }
 };
 
 template<typename Derived, int AccessLevel = internal::accessors_level<Derived>::value>
@@ -1199,6 +1200,11 @@
       return TensorAsyncDevice<Derived, DeviceType, DoneCallback>(dev, derived(), std::move(done));
     }
 
+    EIGEN_DEVICE_FUNC
+    EIGEN_STRONG_INLINE Derived& derived() { return *static_cast<Derived*>(this); }
+    EIGEN_DEVICE_FUNC
+    EIGEN_STRONG_INLINE const Derived& derived() const { return *static_cast<const Derived*>(this); }
+
     #ifdef EIGEN_TENSORBASE_PLUGIN
     #include EIGEN_TENSORBASE_PLUGIN
     #endif
@@ -1215,10 +1221,6 @@
       internal::TensorExecutor<const Assign, DefaultDevice>::run(assign, DefaultDevice());
       return derived();
     }
-    EIGEN_DEVICE_FUNC
-    EIGEN_STRONG_INLINE Derived& derived() { return *static_cast<Derived*>(this); }
-    EIGEN_DEVICE_FUNC
-    EIGEN_STRONG_INLINE const Derived& derived() const { return *static_cast<const Derived*>(this); }
 };
 #endif // EIGEN_PARSED_BY_DOXYGEN
 } // end namespace Eigen

diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorGlobalFunctions.h b/unsupported/Eigen/CXX11/src/Tensor/TensorGlobalFunctions.h
index 8d330b5..6a1240c 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorGlobalFunctions.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorGlobalFunctions.h

@@ -23,9 +23,12 @@
 template <typename ADerived, typename BDerived, typename XDerived>
 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const TensorCwiseTernaryOp<internal::scalar_betainc_op<typename XDerived::Scalar>,
                                                                  const ADerived, const BDerived, const XDerived>
-betainc(const ADerived& a, const BDerived& b, const XDerived& x) {
+betainc(const Eigen::TensorBase<ADerived, ReadOnlyAccessors>& a,
+        const Eigen::TensorBase<BDerived, ReadOnlyAccessors>& b,
+        const Eigen::TensorBase<XDerived, ReadOnlyAccessors>& x) {
   return TensorCwiseTernaryOp<internal::scalar_betainc_op<typename XDerived::Scalar>, const ADerived, const BDerived,
-                              const XDerived>(a, b, x, internal::scalar_betainc_op<typename XDerived::Scalar>());
+                              const XDerived>(a.derived(), b.derived(), x.derived(),
+                                              internal::scalar_betainc_op<typename XDerived::Scalar>());
 }
 
 }  // end namespace Eigen