Update Eigen to commit:2a9055b50ed22101da7d77e999b90ed50956fe0b CHANGELOG ========= 2a9055b50 - Fix random for custom scalars that don't have constexpr digits(). 500a3602f - Use traits<Matrix>::Options instead of Matrix::Options. 0b9ca1159 - Fix deflation in BDCSVD. f40ad38fd - Fix failure on ARM with latest compilers. a24bf2e9a - Disable float16 packet casting if native AVX512 f16 is available. 5361dea83 - Remove return int types from BLAS/LAPACK functions. 7e655c9a5 - Fixes 2780 6ea33f95d - Eliminate warning about writing bytes directly to non-trivial type. 06b45905e - Remove r_cnjg due to conflicts with f2c. 9229cfa82 - Fix division by zero UB in packet size logic. 186f8205d - Apply clang-format to lapack/blas directories 4eac211e9 - Fix C++20 error, Arithmetic between different enumeration types d1d87973f - Fix segfault in CholmodBase::factorize() for zero matrix 7b87b2191 - Fix UB in bool packetmath test. 431e4a913 - Fix the fuzz 3ab8f4825 - fix tests when scalar is bfloat16, half 3ebaab8a6 - Fix PPC rand and other failures. ebd13c3b1 - fix skew symmetric test 128c8abf4 - Fix gcc-6 bug in the rand test. d626762e3 - improve random a9ddab3e0 - Fix a bunch of ODR violations. PiperOrigin-RevId: 609443252 Change-Id: Iddb4afa1b81a173975c2ef3771fc4b08683fdbdc
diff --git a/Eigen/src/CholmodSupport/CholmodSupport.h b/Eigen/src/CholmodSupport/CholmodSupport.h index 447a393..e5b46c4 100644 --- a/Eigen/src/CholmodSupport/CholmodSupport.h +++ b/Eigen/src/CholmodSupport/CholmodSupport.h
@@ -329,8 +329,10 @@ cholmod_sparse A = viewAsCholmod(matrix.template selfadjointView<UpLo>()); internal::cm_factorize_p<StorageIndex>(&A, m_shiftOffset, 0, 0, m_cholmodFactor, m_cholmod); - // If the factorization failed, minor is the column at which it did. On success minor == n. - this->m_info = (m_cholmodFactor->minor == m_cholmodFactor->n ? Success : NumericalIssue); + // If the factorization failed, either the input matrix was zero (so m_cholmodFactor == nullptr), or minor is the + // column at which it failed. On success minor == n. + this->m_info = + (m_cholmodFactor != nullptr && m_cholmodFactor->minor == m_cholmodFactor->n ? Success : NumericalIssue); m_factorizationIsOk = true; }
diff --git a/Eigen/src/Core/GenericPacketMath.h b/Eigen/src/Core/GenericPacketMath.h index 3a302c0..4b56f0f 100644 --- a/Eigen/src/Core/GenericPacketMath.h +++ b/Eigen/src/Core/GenericPacketMath.h
@@ -376,6 +376,12 @@ } }; +// For booleans, we can only directly set a valid `bool` value to avoid UB. +template <> +struct ptrue_impl<bool, void> { + static EIGEN_DEVICE_FUNC inline bool run(const bool& /*a*/) { return true; } +}; + // For non-trivial scalars, set to Scalar(1) (i.e. a non-zero value). // Although this is technically not a valid bitmask, the scalar path for pselect // uses a comparison to zero, so this should still work in most cases. We don't @@ -458,6 +464,32 @@ EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR EIGEN_ALWAYS_INLINE T operator()(const T& a) const { return ~a; } }; +template <> +struct bit_and<bool> { + EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR EIGEN_ALWAYS_INLINE bool operator()(const bool& a, const bool& b) const { + return a && b; + } +}; + +template <> +struct bit_or<bool> { + EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR EIGEN_ALWAYS_INLINE bool operator()(const bool& a, const bool& b) const { + return a || b; + } +}; + +template <> +struct bit_xor<bool> { + EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR EIGEN_ALWAYS_INLINE bool operator()(const bool& a, const bool& b) const { + return a != b; + } +}; + +template <> +struct bit_not<bool> { + EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR EIGEN_ALWAYS_INLINE bool operator()(const bool& a) const { return !a; } +}; + // Use operators &, |, ^, ~. template <typename T> struct operator_bitwise_helper {
diff --git a/Eigen/src/Core/MathFunctions.h b/Eigen/src/Core/MathFunctions.h index 0be29bc..c92572f 100644 --- a/Eigen/src/Core/MathFunctions.h +++ b/Eigen/src/Core/MathFunctions.h
@@ -563,34 +563,6 @@ } }; -/**************************************************************************** - * Implementation of random * - ****************************************************************************/ - -template <typename Scalar, bool IsComplex, bool IsInteger> -struct random_default_impl {}; - -template <typename Scalar> -struct random_impl : random_default_impl<Scalar, NumTraits<Scalar>::IsComplex, NumTraits<Scalar>::IsInteger> {}; - -template <typename Scalar> -struct random_retval { - typedef Scalar type; -}; - -template <typename Scalar> -inline EIGEN_MATHFUNC_RETVAL(random, Scalar) random(const Scalar& x, const Scalar& y); -template <typename Scalar> -inline EIGEN_MATHFUNC_RETVAL(random, Scalar) random(); - -template <typename Scalar> -struct random_default_impl<Scalar, false, false> { - static inline Scalar run(const Scalar& x, const Scalar& y) { - return x + (y - x) * Scalar(std::rand()) / Scalar(RAND_MAX); - } - static inline Scalar run() { return run(Scalar(NumTraits<Scalar>::IsSigned ? -1 : 0), Scalar(1)); } -}; - enum { meta_floor_log2_terminate, meta_floor_log2_move_up, meta_floor_log2_move_down, meta_floor_log2_bogus }; template <unsigned int n, int lower, int upper> @@ -769,56 +741,174 @@ #endif // EIGEN_COMP_GNUC || EIGEN_COMP_CLANG +template <typename BitsType> +int log2_ceil(BitsType x) { + int n = CHAR_BIT * sizeof(BitsType) - clz(x); + bool powerOfTwo = (x & (x - 1)) == 0; + return x == 0 ? 0 : powerOfTwo ? n - 1 : n; +} + +template <typename BitsType> +int log2_floor(BitsType x) { + int n = CHAR_BIT * sizeof(BitsType) - clz(x); + return x == 0 ? 0 : n - 1; +} + +/**************************************************************************** + * Implementation of random * + ****************************************************************************/ + +// return a Scalar filled with numRandomBits beginning from the least significant bit +template <typename Scalar> +Scalar getRandomBits(int numRandomBits) { + using BitsType = typename numext::get_integer_by_size<sizeof(Scalar)>::unsigned_type; + enum : int { + StdRandBits = meta_floor_log2<(unsigned int)(RAND_MAX) + 1>::value, + ScalarBits = sizeof(Scalar) * CHAR_BIT + }; + eigen_assert((numRandomBits >= 0) && (numRandomBits <= ScalarBits)); + const BitsType mask = BitsType(-1) >> ((ScalarBits - numRandomBits) & (ScalarBits - 1)); + BitsType randomBits = BitsType(0); + for (int shift = 0; shift < numRandomBits; shift += StdRandBits) { + int r = std::rand(); + randomBits |= static_cast<BitsType>(r) << shift; + } + // clear the excess bits + randomBits &= mask; + return numext::bit_cast<Scalar, BitsType>(randomBits); +} + +template <typename Scalar, bool IsComplex, bool IsInteger> +struct random_default_impl {}; + +template <typename Scalar> +struct random_impl : random_default_impl<Scalar, NumTraits<Scalar>::IsComplex, NumTraits<Scalar>::IsInteger> {}; + +template <typename Scalar> +struct random_retval { + typedef Scalar type; +}; + +template <typename Scalar> +inline EIGEN_MATHFUNC_RETVAL(random, Scalar) random(const Scalar& x, const Scalar& y); +template <typename Scalar> +inline EIGEN_MATHFUNC_RETVAL(random, Scalar) random(); + +template <typename Scalar> +struct random_default_impl<Scalar, false, false> { + using BitsType = typename numext::get_integer_by_size<sizeof(Scalar)>::unsigned_type; + static EIGEN_DEVICE_FUNC inline Scalar run(const Scalar& x, const Scalar& y, int numRandomBits) { + Scalar half_x = Scalar(0.5) * x; + Scalar half_y = Scalar(0.5) * y; + Scalar result = (half_x + half_y) + (half_y - half_x) * run(numRandomBits); + // result is in the half-open interval [x, y) -- provided that x < y + return result; + } + static EIGEN_DEVICE_FUNC inline Scalar run(const Scalar& x, const Scalar& y) { + const int mantissa_bits = NumTraits<Scalar>::digits() - 1; + return run(x, y, mantissa_bits); + } + static EIGEN_DEVICE_FUNC inline Scalar run(int numRandomBits) { + const int mantissa_bits = NumTraits<Scalar>::digits() - 1; + eigen_assert(numRandomBits >= 0 && numRandomBits <= mantissa_bits); + BitsType randomBits = getRandomBits<BitsType>(numRandomBits); + // if fewer than MantissaBits is requested, shift them to the left + randomBits <<= (mantissa_bits - numRandomBits); + // randomBits is in the half-open interval [2,4) + randomBits |= numext::bit_cast<BitsType>(Scalar(2)); + // result is in the half-open interval [-1,1) + Scalar result = numext::bit_cast<Scalar>(randomBits) - Scalar(3); + return result; + } + static EIGEN_DEVICE_FUNC inline Scalar run() { + const int mantissa_bits = NumTraits<Scalar>::digits() - 1; + return run(mantissa_bits); + } +}; + +// TODO: fix this for PPC +template <bool Specialize = sizeof(long double) == 2 * sizeof(uint64_t) && !EIGEN_ARCH_PPC> +struct random_longdouble_impl { + enum : int { + Size = sizeof(long double), + MantissaBits = NumTraits<long double>::digits() - 1, + LowBits = MantissaBits > 64 ? 64 : MantissaBits, + HighBits = MantissaBits > 64 ? MantissaBits - 64 : 0 + }; + static EIGEN_DEVICE_FUNC inline long double run() { + EIGEN_USING_STD(memcpy) + uint64_t randomBits[2]; + long double result = 2.0L; + memcpy(&randomBits, &result, Size); + randomBits[0] |= getRandomBits<uint64_t>(LowBits); + randomBits[1] |= getRandomBits<uint64_t>(HighBits); + memcpy(&result, &randomBits, Size); + result -= 3.0L; + return result; + } +}; +template <> +struct random_longdouble_impl<false> { + using Impl = random_impl<double>; + static EIGEN_DEVICE_FUNC inline long double run() { return static_cast<long double>(Impl::run()); } +}; + +template <> +struct random_impl<long double> { + static EIGEN_DEVICE_FUNC inline long double run(const long double& x, const long double& y) { + long double half_x = 0.5L * x; + long double half_y = 0.5L * y; + long double result = (half_x + half_y) + (half_y - half_x) * run(); + return result; + } + static EIGEN_DEVICE_FUNC inline long double run() { return random_longdouble_impl<>::run(); } +}; + template <typename Scalar> struct random_default_impl<Scalar, false, true> { - static inline Scalar run(const Scalar& x, const Scalar& y) { + using BitsType = typename numext::get_integer_by_size<sizeof(Scalar)>::unsigned_type; + enum : int { ScalarBits = sizeof(Scalar) * CHAR_BIT }; + static EIGEN_DEVICE_FUNC inline Scalar run(const Scalar& x, const Scalar& y) { if (y <= x) return x; - // ScalarU is the unsigned counterpart of Scalar, possibly Scalar itself. - typedef typename make_unsigned<Scalar>::type ScalarU; - // ScalarX is the widest of ScalarU and unsigned int. - // We'll deal only with ScalarX and unsigned int below thus avoiding signed - // types and arithmetic and signed overflows (which are undefined behavior). - typedef std::conditional_t<(ScalarU(-1) > unsigned(-1)), ScalarU, unsigned> ScalarX; - // The following difference doesn't overflow, provided our integer types are two's - // complement and have the same number of padding bits in signed and unsigned variants. - // This is the case in most modern implementations of C++. - ScalarX range = ScalarX(y) - ScalarX(x); - ScalarX offset = 0; - ScalarX divisor = 1; - ScalarX multiplier = 1; - const unsigned rand_max = RAND_MAX; - if (range <= rand_max) - divisor = (rand_max + 1) / (range + 1); - else - multiplier = 1 + range / (rand_max + 1); - // Rejection sampling. + const BitsType range = static_cast<BitsType>(y) - static_cast<BitsType>(x) + 1; + // handle edge case where [x,y] spans the entire range of Scalar + if (range == 0) return getRandomBits<Scalar>(ScalarBits); + // calculate the number of random bits needed to fill range + const int numRandomBits = log2_ceil(range); + BitsType randomBits; do { - offset = (unsigned(std::rand()) * multiplier) / divisor; - } while (offset > range); - return Scalar(ScalarX(x) + offset); + randomBits = getRandomBits<BitsType>(numRandomBits); + // if the random draw is outside [0, range), try again (rejection sampling) + // in the worst-case scenario, the probability of rejection is: 1/2 - 1/2^numRandomBits < 50% + } while (randomBits >= range); + Scalar result = x + static_cast<Scalar>(randomBits); + return result; } - static inline Scalar run() { + static EIGEN_DEVICE_FUNC inline Scalar run() { #ifdef EIGEN_MAKING_DOCS return run(Scalar(NumTraits<Scalar>::IsSigned ? -10 : 0), Scalar(10)); #else - enum { - rand_bits = meta_floor_log2<(unsigned int)(RAND_MAX) + 1>::value, - scalar_bits = sizeof(Scalar) * CHAR_BIT, - shift = plain_enum_max(0, int(rand_bits) - int(scalar_bits)), - offset = NumTraits<Scalar>::IsSigned ? (1 << (plain_enum_min(rand_bits, scalar_bits) - 1)) : 0 - }; - return Scalar((std::rand() >> shift) - offset); + return getRandomBits<Scalar>(ScalarBits); #endif } }; +template <> +struct random_impl<bool> { + static EIGEN_DEVICE_FUNC inline bool run(const bool& x, const bool& y) { + if (y <= x) return x; + return run(); + } + static EIGEN_DEVICE_FUNC inline bool run() { return getRandomBits<int>(1) ? true : false; } +}; + template <typename Scalar> struct random_default_impl<Scalar, true, false> { - static inline Scalar run(const Scalar& x, const Scalar& y) { + static EIGEN_DEVICE_FUNC inline Scalar run(const Scalar& x, const Scalar& y) { return Scalar(random(x.real(), y.real()), random(x.imag(), y.imag())); } - static inline Scalar run() { + static EIGEN_DEVICE_FUNC inline Scalar run() { typedef typename NumTraits<Scalar>::Real RealScalar; return Scalar(random<RealScalar>(), random<RealScalar>()); } @@ -1864,13 +1954,6 @@ ******************************************/ template <> -struct random_impl<bool> { - static inline bool run() { return random<int>(0, 1) == 0 ? false : true; } - - static inline bool run(const bool& a, const bool& b) { return random<int>(a, b) == 0 ? false : true; } -}; - -template <> struct scalar_fuzzy_impl<bool> { typedef bool RealScalar;
diff --git a/Eigen/src/Core/NumTraits.h b/Eigen/src/Core/NumTraits.h index 80f74e9..2848b78 100644 --- a/Eigen/src/Core/NumTraits.h +++ b/Eigen/src/Core/NumTraits.h
@@ -206,9 +206,7 @@ EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR static inline T highest() { return (numext::numeric_limits<T>::max)(); } - EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR static inline T lowest() { - return IsInteger ? (numext::numeric_limits<T>::min)() : static_cast<T>(-(numext::numeric_limits<T>::max)()); - } + EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR static inline T lowest() { return (numext::numeric_limits<T>::lowest)(); } EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR static inline T infinity() { return numext::numeric_limits<T>::infinity(); }
diff --git a/Eigen/src/Core/arch/AVX512/PacketMathFP16.h b/Eigen/src/Core/arch/AVX512/PacketMathFP16.h index fc11174..131e6f1 100644 --- a/Eigen/src/Core/arch/AVX512/PacketMathFP16.h +++ b/Eigen/src/Core/arch/AVX512/PacketMathFP16.h
@@ -1,870 +1,870 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN_PACKET_MATH_FP16_AVX512_H -#define EIGEN_PACKET_MATH_FP16_AVX512_H - -// IWYU pragma: private -#include "../../InternalHeaderCheck.h" - -namespace Eigen { - -namespace internal { - -typedef __m512h Packet32h; -typedef eigen_packet_wrapper<__m256i, 1> Packet16h; -typedef eigen_packet_wrapper<__m128i, 2> Packet8h; - -template <> -struct is_arithmetic<Packet8h> { - enum { value = true }; -}; - -template <> -struct packet_traits<half> : default_packet_traits { - typedef Packet32h type; - typedef Packet16h half; - enum { - Vectorizable = 1, - AlignedOnScalar = 1, - size = 32, - - HasCmp = 1, - HasAdd = 1, - HasSub = 1, - HasMul = 1, - HasDiv = 1, - HasNegate = 1, - HasAbs = 1, - HasAbs2 = 0, - HasMin = 1, - HasMax = 1, - HasConj = 1, - HasSetLinear = 0, - HasLog = 1, - HasLog1p = 1, - HasExp = 1, - HasExpm1 = 1, - HasSqrt = 1, - HasRsqrt = 1, - // These ones should be implemented in future - HasBessel = 0, - HasNdtri = 0, - HasSin = EIGEN_FAST_MATH, - HasCos = EIGEN_FAST_MATH, - HasTanh = EIGEN_FAST_MATH, - HasErf = 0, // EIGEN_FAST_MATH, - HasBlend = 0, - HasRound = 1, - HasFloor = 1, - HasCeil = 1, - HasRint = 1 - }; -}; - -template <> -struct unpacket_traits<Packet32h> { - typedef Eigen::half type; - typedef Packet16h half; - enum { - size = 32, - alignment = Aligned64, - vectorizable = true, - masked_load_available = false, - masked_store_available = false - }; -}; - -template <> -struct unpacket_traits<Packet16h> { - typedef Eigen::half type; - typedef Packet8h half; - enum { - size = 16, - alignment = Aligned32, - vectorizable = true, - masked_load_available = false, - masked_store_available = false - }; -}; - -template <> -struct unpacket_traits<Packet8h> { - typedef Eigen::half type; - typedef Packet8h half; - enum { - size = 8, - alignment = Aligned16, - vectorizable = true, - masked_load_available = false, - masked_store_available = false - }; -}; - -// Memory functions - -// pset1 - -template <> -EIGEN_STRONG_INLINE Packet32h pset1<Packet32h>(const Eigen::half& from) { - return _mm512_set1_ph(static_cast<_Float16>(from)); -} - -// pset1frombits -template <> -EIGEN_STRONG_INLINE Packet32h pset1frombits<Packet32h>(unsigned short from) { - return _mm512_castsi512_ph(_mm512_set1_epi16(from)); -} - -// pfirst - -template <> -EIGEN_STRONG_INLINE Eigen::half pfirst<Packet32h>(const Packet32h& from) { -#ifdef EIGEN_VECTORIZE_AVX512DQ - return half_impl::raw_uint16_to_half( - static_cast<unsigned short>(_mm256_extract_epi16(_mm512_extracti32x8_epi32(_mm512_castph_si512(from), 0), 0))); -#else - Eigen::half dest[32]; - _mm512_storeu_ph(dest, from); - return dest[0]; -#endif -} - -// pload - -template <> -EIGEN_STRONG_INLINE Packet32h pload<Packet32h>(const Eigen::half* from) { - EIGEN_DEBUG_ALIGNED_LOAD return _mm512_load_ph(from); -} - -// ploadu - -template <> -EIGEN_STRONG_INLINE Packet32h ploadu<Packet32h>(const Eigen::half* from) { - EIGEN_DEBUG_UNALIGNED_LOAD return _mm512_loadu_ph(from); -} - -// pstore - -template <> -EIGEN_STRONG_INLINE void pstore<half>(Eigen::half* to, const Packet32h& from) { - EIGEN_DEBUG_ALIGNED_STORE _mm512_store_ph(to, from); -} - -// pstoreu - -template <> -EIGEN_STRONG_INLINE void pstoreu<half>(Eigen::half* to, const Packet32h& from) { - EIGEN_DEBUG_UNALIGNED_STORE _mm512_storeu_ph(to, from); -} - -// ploaddup -template <> -EIGEN_STRONG_INLINE Packet32h ploaddup<Packet32h>(const Eigen::half* from) { - __m512h a = _mm512_castph256_ph512(_mm256_loadu_ph(from)); - return _mm512_permutexvar_ph(_mm512_set_epi16(15, 15, 14, 14, 13, 13, 12, 12, 11, 11, 10, 10, 9, 9, 8, 8, 7, 7, 6, 6, - 5, 5, 4, 4, 3, 3, 2, 2, 1, 1, 0, 0), - a); -} - -// ploadquad -template <> -EIGEN_STRONG_INLINE Packet32h ploadquad<Packet32h>(const Eigen::half* from) { - __m512h a = _mm512_castph128_ph512(_mm_loadu_ph(from)); - return _mm512_permutexvar_ph( - _mm512_set_epi16(7, 7, 7, 7, 6, 6, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 3, 3, 3, 3, 2, 2, 2, 2, 1, 1, 1, 1, 0, 0, 0, 0), - a); -} - -// pabs - -template <> -EIGEN_STRONG_INLINE Packet32h pabs<Packet32h>(const Packet32h& a) { - return _mm512_abs_ph(a); -} - -// psignbit - -template <> -EIGEN_STRONG_INLINE Packet32h psignbit<Packet32h>(const Packet32h& a) { - return _mm512_castsi512_ph(_mm512_srai_epi16(_mm512_castph_si512(a), 15)); -} - -// pmin - -template <> -EIGEN_STRONG_INLINE Packet32h pmin<Packet32h>(const Packet32h& a, const Packet32h& b) { - return _mm512_min_ph(a, b); -} - -// pmax - -template <> -EIGEN_STRONG_INLINE Packet32h pmax<Packet32h>(const Packet32h& a, const Packet32h& b) { - return _mm512_max_ph(a, b); -} - -// plset -template <> -EIGEN_STRONG_INLINE Packet32h plset<Packet32h>(const half& a) { - return _mm512_add_ph(_mm512_set1_ph(a), - _mm512_set_ph(31.0f, 30.0f, 29.0f, 28.0f, 27.0f, 26.0f, 25.0f, 24.0f, 23.0f, 22.0f, 21.0f, 20.0f, - 19.0f, 18.0f, 17.0f, 16.0f, 15.0f, 14.0f, 13.0f, 12.0f, 11.0f, 10.0f, 9.0f, 8.0f, - 7.0f, 6.0f, 5.0f, 4.0f, 3.0f, 2.0f, 1.0f, 0.0f)); -} - -// por - -template <> -EIGEN_STRONG_INLINE Packet32h por(const Packet32h& a, const Packet32h& b) { - return _mm512_castsi512_ph(_mm512_or_si512(_mm512_castph_si512(a), _mm512_castph_si512(b))); -} - -// pxor - -template <> -EIGEN_STRONG_INLINE Packet32h pxor(const Packet32h& a, const Packet32h& b) { - return _mm512_castsi512_ph(_mm512_xor_si512(_mm512_castph_si512(a), _mm512_castph_si512(b))); -} - -// pand - -template <> -EIGEN_STRONG_INLINE Packet32h pand(const Packet32h& a, const Packet32h& b) { - return _mm512_castsi512_ph(_mm512_and_si512(_mm512_castph_si512(a), _mm512_castph_si512(b))); -} - -// pandnot - -template <> -EIGEN_STRONG_INLINE Packet32h pandnot(const Packet32h& a, const Packet32h& b) { - return _mm512_castsi512_ph(_mm512_andnot_si512(_mm512_castph_si512(b), _mm512_castph_si512(a))); -} - -// pselect - -template <> -EIGEN_DEVICE_FUNC inline Packet32h pselect(const Packet32h& mask, const Packet32h& a, const Packet32h& b) { - __mmask32 mask32 = _mm512_cmp_epi16_mask(_mm512_castph_si512(mask), _mm512_setzero_epi32(), _MM_CMPINT_EQ); - return _mm512_mask_blend_ph(mask32, a, b); -} - -// pcmp_eq - -template <> -EIGEN_STRONG_INLINE Packet32h pcmp_eq(const Packet32h& a, const Packet32h& b) { - __mmask32 mask = _mm512_cmp_ph_mask(a, b, _CMP_EQ_OQ); - return _mm512_castsi512_ph(_mm512_mask_set1_epi16(_mm512_set1_epi32(0), mask, 0xffffu)); -} - -// pcmp_le - -template <> -EIGEN_STRONG_INLINE Packet32h pcmp_le(const Packet32h& a, const Packet32h& b) { - __mmask32 mask = _mm512_cmp_ph_mask(a, b, _CMP_LE_OQ); - return _mm512_castsi512_ph(_mm512_mask_set1_epi16(_mm512_set1_epi32(0), mask, 0xffffu)); -} - -// pcmp_lt - -template <> -EIGEN_STRONG_INLINE Packet32h pcmp_lt(const Packet32h& a, const Packet32h& b) { - __mmask32 mask = _mm512_cmp_ph_mask(a, b, _CMP_LT_OQ); - return _mm512_castsi512_ph(_mm512_mask_set1_epi16(_mm512_set1_epi32(0), mask, 0xffffu)); -} - -// pcmp_lt_or_nan - -template <> -EIGEN_STRONG_INLINE Packet32h pcmp_lt_or_nan(const Packet32h& a, const Packet32h& b) { - __mmask32 mask = _mm512_cmp_ph_mask(a, b, _CMP_NGE_UQ); - return _mm512_castsi512_ph(_mm512_mask_set1_epi16(_mm512_set1_epi16(0), mask, 0xffffu)); -} - -// padd - -template <> -EIGEN_STRONG_INLINE Packet32h padd<Packet32h>(const Packet32h& a, const Packet32h& b) { - return _mm512_add_ph(a, b); -} - -template <> -EIGEN_STRONG_INLINE Packet16h padd<Packet16h>(const Packet16h& a, const Packet16h& b) { - return _mm256_castph_si256(_mm256_add_ph(_mm256_castsi256_ph(a), _mm256_castsi256_ph(b))); -} - -template <> -EIGEN_STRONG_INLINE Packet8h padd<Packet8h>(const Packet8h& a, const Packet8h& b) { - return _mm_castph_si128(_mm_add_ph(_mm_castsi128_ph(a), _mm_castsi128_ph(b))); -} - -// psub - -template <> -EIGEN_STRONG_INLINE Packet32h psub<Packet32h>(const Packet32h& a, const Packet32h& b) { - return _mm512_sub_ph(a, b); -} - -template <> -EIGEN_STRONG_INLINE Packet16h psub<Packet16h>(const Packet16h& a, const Packet16h& b) { - return _mm256_castph_si256(_mm256_sub_ph(_mm256_castsi256_ph(a), _mm256_castsi256_ph(b))); -} - -template <> -EIGEN_STRONG_INLINE Packet8h psub<Packet8h>(const Packet8h& a, const Packet8h& b) { - return _mm_castph_si128(_mm_sub_ph(_mm_castsi128_ph(a), _mm_castsi128_ph(b))); -} - -// pmul - -template <> -EIGEN_STRONG_INLINE Packet32h pmul<Packet32h>(const Packet32h& a, const Packet32h& b) { - return _mm512_mul_ph(a, b); -} - -template <> -EIGEN_STRONG_INLINE Packet16h pmul<Packet16h>(const Packet16h& a, const Packet16h& b) { - return _mm256_castph_si256(_mm256_mul_ph(_mm256_castsi256_ph(a), _mm256_castsi256_ph(b))); -} - -template <> -EIGEN_STRONG_INLINE Packet8h pmul<Packet8h>(const Packet8h& a, const Packet8h& b) { - return _mm_castph_si128(_mm_mul_ph(_mm_castsi128_ph(a), _mm_castsi128_ph(b))); -} - -// pdiv - -template <> -EIGEN_STRONG_INLINE Packet32h pdiv<Packet32h>(const Packet32h& a, const Packet32h& b) { - return _mm512_div_ph(a, b); -} - -template <> -EIGEN_STRONG_INLINE Packet16h pdiv<Packet16h>(const Packet16h& a, const Packet16h& b) { - return _mm256_castph_si256(_mm256_div_ph(_mm256_castsi256_ph(a), _mm256_castsi256_ph(b))); -} - -template <> -EIGEN_STRONG_INLINE Packet8h pdiv<Packet8h>(const Packet8h& a, const Packet8h& b) { - return _mm_castph_si128(_mm_div_ph(_mm_castsi128_ph(a), _mm_castsi128_ph(b))); -} - -// pround - -template <> -EIGEN_STRONG_INLINE Packet32h pround<Packet32h>(const Packet32h& a) { - // Work-around for default std::round rounding mode. - - // Mask for the sign bit - const Packet32h signMask = pset1frombits<Packet32h>(static_cast<numext::uint16_t>(0x8000u)); - // The largest half-preicision float less than 0.5 - const Packet32h prev0dot5 = pset1frombits<Packet32h>(static_cast<numext::uint16_t>(0x37FFu)); - - return _mm512_roundscale_ph(padd(por(pand(a, signMask), prev0dot5), a), _MM_FROUND_TO_ZERO); -} - -// print - -template <> -EIGEN_STRONG_INLINE Packet32h print<Packet32h>(const Packet32h& a) { - return _mm512_roundscale_ph(a, _MM_FROUND_CUR_DIRECTION); -} - -// pceil - -template <> -EIGEN_STRONG_INLINE Packet32h pceil<Packet32h>(const Packet32h& a) { - return _mm512_roundscale_ph(a, _MM_FROUND_TO_POS_INF); -} - -// pfloor - -template <> -EIGEN_STRONG_INLINE Packet32h pfloor<Packet32h>(const Packet32h& a) { - return _mm512_roundscale_ph(a, _MM_FROUND_TO_NEG_INF); -} - -// predux -template <> -EIGEN_STRONG_INLINE half predux<Packet32h>(const Packet32h& a) { - return (half)_mm512_reduce_add_ph(a); -} - -template <> -EIGEN_STRONG_INLINE half predux<Packet16h>(const Packet16h& a) { - return (half)_mm256_reduce_add_ph(_mm256_castsi256_ph(a)); -} - -template <> -EIGEN_STRONG_INLINE half predux<Packet8h>(const Packet8h& a) { - return (half)_mm_reduce_add_ph(_mm_castsi128_ph(a)); -} - -// predux_half_dowto4 -template <> -EIGEN_STRONG_INLINE Packet16h predux_half_dowto4<Packet32h>(const Packet32h& a) { -#ifdef EIGEN_VECTORIZE_AVX512DQ - __m256i lowHalf = _mm256_castps_si256(_mm512_extractf32x8_ps(_mm512_castph_ps(a), 0)); - __m256i highHalf = _mm256_castps_si256(_mm512_extractf32x8_ps(_mm512_castph_ps(a), 1)); - - return Packet16h(padd<Packet16h>(lowHalf, highHalf)); -#else - Eigen::half data[32]; - _mm512_storeu_ph(data, a); - - __m256i lowHalf = _mm256_castph_si256(_mm256_loadu_ph(data)); - __m256i highHalf = _mm256_castph_si256(_mm256_loadu_ph(data + 16)); - - return Packet16h(padd<Packet16h>(lowHalf, highHalf)); -#endif -} - -// predux_max - -// predux_min - -// predux_mul - -#ifdef EIGEN_VECTORIZE_FMA - -// pmadd - -template <> -EIGEN_STRONG_INLINE Packet32h pmadd(const Packet32h& a, const Packet32h& b, const Packet32h& c) { - return _mm512_fmadd_ph(a, b, c); -} - -template <> -EIGEN_STRONG_INLINE Packet16h pmadd(const Packet16h& a, const Packet16h& b, const Packet16h& c) { - return _mm256_castph_si256(_mm256_fmadd_ph(_mm256_castsi256_ph(a), _mm256_castsi256_ph(b), _mm256_castsi256_ph(c))); -} - -template <> -EIGEN_STRONG_INLINE Packet8h pmadd(const Packet8h& a, const Packet8h& b, const Packet8h& c) { - return _mm_castph_si128(_mm_fmadd_ph(_mm_castsi128_ph(a), _mm_castsi128_ph(b), _mm_castsi128_ph(c))); -} - -// pmsub - -template <> -EIGEN_STRONG_INLINE Packet32h pmsub(const Packet32h& a, const Packet32h& b, const Packet32h& c) { - return _mm512_fmsub_ph(a, b, c); -} - -template <> -EIGEN_STRONG_INLINE Packet16h pmsub(const Packet16h& a, const Packet16h& b, const Packet16h& c) { - return _mm256_castph_si256(_mm256_fmsub_ph(_mm256_castsi256_ph(a), _mm256_castsi256_ph(b), _mm256_castsi256_ph(c))); -} - -template <> -EIGEN_STRONG_INLINE Packet8h pmsub(const Packet8h& a, const Packet8h& b, const Packet8h& c) { - return _mm_castph_si128(_mm_fmsub_ph(_mm_castsi128_ph(a), _mm_castsi128_ph(b), _mm_castsi128_ph(c))); -} - -// pnmadd - -template <> -EIGEN_STRONG_INLINE Packet32h pnmadd(const Packet32h& a, const Packet32h& b, const Packet32h& c) { - return _mm512_fnmadd_ph(a, b, c); -} - -template <> -EIGEN_STRONG_INLINE Packet16h pnmadd(const Packet16h& a, const Packet16h& b, const Packet16h& c) { - return _mm256_castph_si256(_mm256_fnmadd_ph(_mm256_castsi256_ph(a), _mm256_castsi256_ph(b), _mm256_castsi256_ph(c))); -} - -template <> -EIGEN_STRONG_INLINE Packet8h pnmadd(const Packet8h& a, const Packet8h& b, const Packet8h& c) { - return _mm_castph_si128(_mm_fnmadd_ph(_mm_castsi128_ph(a), _mm_castsi128_ph(b), _mm_castsi128_ph(c))); -} - -// pnmsub - -template <> -EIGEN_STRONG_INLINE Packet32h pnmsub(const Packet32h& a, const Packet32h& b, const Packet32h& c) { - return _mm512_fnmsub_ph(a, b, c); -} - -template <> -EIGEN_STRONG_INLINE Packet16h pnmsub(const Packet16h& a, const Packet16h& b, const Packet16h& c) { - return _mm256_castph_si256(_mm256_fnmsub_ph(_mm256_castsi256_ph(a), _mm256_castsi256_ph(b), _mm256_castsi256_ph(c))); -} - -template <> -EIGEN_STRONG_INLINE Packet8h pnmsub(const Packet8h& a, const Packet8h& b, const Packet8h& c) { - return _mm_castph_si128(_mm_fnmsub_ph(_mm_castsi128_ph(a), _mm_castsi128_ph(b), _mm_castsi128_ph(c))); -} - -#endif - -// pnegate - -template <> -EIGEN_STRONG_INLINE Packet32h pnegate<Packet32h>(const Packet32h& a) { - return _mm512_sub_ph(_mm512_set1_ph(0.0), a); -} - -// pconj - -template <> -EIGEN_STRONG_INLINE Packet32h pconj<Packet32h>(const Packet32h& a) { - return a; -} - -// psqrt - -template <> -EIGEN_STRONG_INLINE Packet32h psqrt<Packet32h>(const Packet32h& a) { - return _mm512_sqrt_ph(a); -} - -// prsqrt - -template <> -EIGEN_STRONG_INLINE Packet32h prsqrt<Packet32h>(const Packet32h& a) { - return _mm512_rsqrt_ph(a); -} - -// preciprocal - -template <> -EIGEN_STRONG_INLINE Packet32h preciprocal<Packet32h>(const Packet32h& a) { - return _mm512_rcp_ph(a); -} - -// ptranspose - -EIGEN_DEVICE_FUNC inline void ptranspose(PacketBlock<Packet32h, 32>& a) { - __m512i t[32]; - - EIGEN_UNROLL_LOOP - for (int i = 0; i < 16; i++) { - t[2 * i] = _mm512_unpacklo_epi16(_mm512_castph_si512(a.packet[2 * i]), _mm512_castph_si512(a.packet[2 * i + 1])); - t[2 * i + 1] = - _mm512_unpackhi_epi16(_mm512_castph_si512(a.packet[2 * i]), _mm512_castph_si512(a.packet[2 * i + 1])); - } - - __m512i p[32]; - - EIGEN_UNROLL_LOOP - for (int i = 0; i < 8; i++) { - p[4 * i] = _mm512_unpacklo_epi32(t[4 * i], t[4 * i + 2]); - p[4 * i + 1] = _mm512_unpackhi_epi32(t[4 * i], t[4 * i + 2]); - p[4 * i + 2] = _mm512_unpacklo_epi32(t[4 * i + 1], t[4 * i + 3]); - p[4 * i + 3] = _mm512_unpackhi_epi32(t[4 * i + 1], t[4 * i + 3]); - } - - __m512i q[32]; - - EIGEN_UNROLL_LOOP - for (int i = 0; i < 4; i++) { - q[8 * i] = _mm512_unpacklo_epi64(p[8 * i], p[8 * i + 4]); - q[8 * i + 1] = _mm512_unpackhi_epi64(p[8 * i], p[8 * i + 4]); - q[8 * i + 2] = _mm512_unpacklo_epi64(p[8 * i + 1], p[8 * i + 5]); - q[8 * i + 3] = _mm512_unpackhi_epi64(p[8 * i + 1], p[8 * i + 5]); - q[8 * i + 4] = _mm512_unpacklo_epi64(p[8 * i + 2], p[8 * i + 6]); - q[8 * i + 5] = _mm512_unpackhi_epi64(p[8 * i + 2], p[8 * i + 6]); - q[8 * i + 6] = _mm512_unpacklo_epi64(p[8 * i + 3], p[8 * i + 7]); - q[8 * i + 7] = _mm512_unpackhi_epi64(p[8 * i + 3], p[8 * i + 7]); - } - - __m512i f[32]; - -#define PACKET32H_TRANSPOSE_HELPER(X, Y) \ - do { \ - f[Y * 8] = _mm512_inserti32x4(f[Y * 8], _mm512_extracti32x4_epi32(q[X * 8], Y), X); \ - f[Y * 8 + 1] = _mm512_inserti32x4(f[Y * 8 + 1], _mm512_extracti32x4_epi32(q[X * 8 + 1], Y), X); \ - f[Y * 8 + 2] = _mm512_inserti32x4(f[Y * 8 + 2], _mm512_extracti32x4_epi32(q[X * 8 + 2], Y), X); \ - f[Y * 8 + 3] = _mm512_inserti32x4(f[Y * 8 + 3], _mm512_extracti32x4_epi32(q[X * 8 + 3], Y), X); \ - f[Y * 8 + 4] = _mm512_inserti32x4(f[Y * 8 + 4], _mm512_extracti32x4_epi32(q[X * 8 + 4], Y), X); \ - f[Y * 8 + 5] = _mm512_inserti32x4(f[Y * 8 + 5], _mm512_extracti32x4_epi32(q[X * 8 + 5], Y), X); \ - f[Y * 8 + 6] = _mm512_inserti32x4(f[Y * 8 + 6], _mm512_extracti32x4_epi32(q[X * 8 + 6], Y), X); \ - f[Y * 8 + 7] = _mm512_inserti32x4(f[Y * 8 + 7], _mm512_extracti32x4_epi32(q[X * 8 + 7], Y), X); \ - } while (false); - - PACKET32H_TRANSPOSE_HELPER(0, 0); - PACKET32H_TRANSPOSE_HELPER(1, 1); - PACKET32H_TRANSPOSE_HELPER(2, 2); - PACKET32H_TRANSPOSE_HELPER(3, 3); - - PACKET32H_TRANSPOSE_HELPER(1, 0); - PACKET32H_TRANSPOSE_HELPER(2, 0); - PACKET32H_TRANSPOSE_HELPER(3, 0); - PACKET32H_TRANSPOSE_HELPER(2, 1); - PACKET32H_TRANSPOSE_HELPER(3, 1); - PACKET32H_TRANSPOSE_HELPER(3, 2); - - PACKET32H_TRANSPOSE_HELPER(0, 1); - PACKET32H_TRANSPOSE_HELPER(0, 2); - PACKET32H_TRANSPOSE_HELPER(0, 3); - PACKET32H_TRANSPOSE_HELPER(1, 2); - PACKET32H_TRANSPOSE_HELPER(1, 3); - PACKET32H_TRANSPOSE_HELPER(2, 3); - -#undef PACKET32H_TRANSPOSE_HELPER - - EIGEN_UNROLL_LOOP - for (int i = 0; i < 32; i++) { - a.packet[i] = _mm512_castsi512_ph(f[i]); - } -} - -EIGEN_DEVICE_FUNC inline void ptranspose(PacketBlock<Packet32h, 4>& a) { - __m512i p0, p1, p2, p3, t0, t1, t2, t3, a0, a1, a2, a3; - t0 = _mm512_unpacklo_epi16(_mm512_castph_si512(a.packet[0]), _mm512_castph_si512(a.packet[1])); - t1 = _mm512_unpackhi_epi16(_mm512_castph_si512(a.packet[0]), _mm512_castph_si512(a.packet[1])); - t2 = _mm512_unpacklo_epi16(_mm512_castph_si512(a.packet[2]), _mm512_castph_si512(a.packet[3])); - t3 = _mm512_unpackhi_epi16(_mm512_castph_si512(a.packet[2]), _mm512_castph_si512(a.packet[3])); - - p0 = _mm512_unpacklo_epi32(t0, t2); - p1 = _mm512_unpackhi_epi32(t0, t2); - p2 = _mm512_unpacklo_epi32(t1, t3); - p3 = _mm512_unpackhi_epi32(t1, t3); - - a0 = p0; - a1 = p1; - a2 = p2; - a3 = p3; - - a0 = _mm512_inserti32x4(a0, _mm512_extracti32x4_epi32(p1, 0), 1); - a1 = _mm512_inserti32x4(a1, _mm512_extracti32x4_epi32(p0, 1), 0); - - a0 = _mm512_inserti32x4(a0, _mm512_extracti32x4_epi32(p2, 0), 2); - a2 = _mm512_inserti32x4(a2, _mm512_extracti32x4_epi32(p0, 2), 0); - - a0 = _mm512_inserti32x4(a0, _mm512_extracti32x4_epi32(p3, 0), 3); - a3 = _mm512_inserti32x4(a3, _mm512_extracti32x4_epi32(p0, 3), 0); - - a1 = _mm512_inserti32x4(a1, _mm512_extracti32x4_epi32(p2, 1), 2); - a2 = _mm512_inserti32x4(a2, _mm512_extracti32x4_epi32(p1, 2), 1); - - a2 = _mm512_inserti32x4(a2, _mm512_extracti32x4_epi32(p3, 2), 3); - a3 = _mm512_inserti32x4(a3, _mm512_extracti32x4_epi32(p2, 3), 2); - - a1 = _mm512_inserti32x4(a1, _mm512_extracti32x4_epi32(p3, 1), 3); - a3 = _mm512_inserti32x4(a3, _mm512_extracti32x4_epi32(p1, 3), 1); - - a.packet[0] = _mm512_castsi512_ph(a0); - a.packet[1] = _mm512_castsi512_ph(a1); - a.packet[2] = _mm512_castsi512_ph(a2); - a.packet[3] = _mm512_castsi512_ph(a3); -} - -// preverse - -template <> -EIGEN_STRONG_INLINE Packet32h preverse(const Packet32h& a) { - return _mm512_permutexvar_ph(_mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, - 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31), - a); -} - -// pscatter - -template <> -EIGEN_STRONG_INLINE void pscatter<half, Packet32h>(half* to, const Packet32h& from, Index stride) { - EIGEN_ALIGN64 half aux[32]; - pstore(aux, from); - - EIGEN_UNROLL_LOOP - for (int i = 0; i < 32; i++) { - to[stride * i] = aux[i]; - } -} - -// pgather - -template <> -EIGEN_STRONG_INLINE Packet32h pgather<Eigen::half, Packet32h>(const Eigen::half* from, Index stride) { - return _mm512_castsi512_ph(_mm512_set_epi16( - from[31 * stride].x, from[30 * stride].x, from[29 * stride].x, from[28 * stride].x, from[27 * stride].x, - from[26 * stride].x, from[25 * stride].x, from[24 * stride].x, from[23 * stride].x, from[22 * stride].x, - from[21 * stride].x, from[20 * stride].x, from[19 * stride].x, from[18 * stride].x, from[17 * stride].x, - from[16 * stride].x, from[15 * stride].x, from[14 * stride].x, from[13 * stride].x, from[12 * stride].x, - from[11 * stride].x, from[10 * stride].x, from[9 * stride].x, from[8 * stride].x, from[7 * stride].x, - from[6 * stride].x, from[5 * stride].x, from[4 * stride].x, from[3 * stride].x, from[2 * stride].x, - from[1 * stride].x, from[0 * stride].x)); -} - -template <> -EIGEN_STRONG_INLINE Packet16h pcos<Packet16h>(const Packet16h&); -template <> -EIGEN_STRONG_INLINE Packet16h psin<Packet16h>(const Packet16h&); -template <> -EIGEN_STRONG_INLINE Packet16h plog<Packet16h>(const Packet16h&); -template <> -EIGEN_STRONG_INLINE Packet16h plog2<Packet16h>(const Packet16h&); -template <> -EIGEN_STRONG_INLINE Packet16h plog1p<Packet16h>(const Packet16h&); -template <> -EIGEN_STRONG_INLINE Packet16h pexp<Packet16h>(const Packet16h&); -template <> -EIGEN_STRONG_INLINE Packet16h pexpm1<Packet16h>(const Packet16h&); -template <> -EIGEN_STRONG_INLINE Packet16h ptanh<Packet16h>(const Packet16h&); -template <> -EIGEN_STRONG_INLINE Packet16h pfrexp<Packet16h>(const Packet16h&, Packet16h&); -template <> -EIGEN_STRONG_INLINE Packet16h pldexp<Packet16h>(const Packet16h&, const Packet16h&); - -EIGEN_STRONG_INLINE Packet32h combine2Packet16h(const Packet16h& a, const Packet16h& b) { - __m512d result = _mm512_undefined_pd(); - result = _mm512_insertf64x4(result, _mm256_castsi256_pd(a), 0); - result = _mm512_insertf64x4(result, _mm256_castsi256_pd(b), 1); - return _mm512_castpd_ph(result); -} - -EIGEN_STRONG_INLINE void extract2Packet16h(const Packet32h& x, Packet16h& a, Packet16h& b) { - a = _mm256_castpd_si256(_mm512_extractf64x4_pd(_mm512_castph_pd(x), 0)); - b = _mm256_castpd_si256(_mm512_extractf64x4_pd(_mm512_castph_pd(x), 1)); -} - -// psin -template <> -EIGEN_STRONG_INLINE Packet32h psin<Packet32h>(const Packet32h& a) { - Packet16h low; - Packet16h high; - extract2Packet16h(a, low, high); - - Packet16h lowOut = psin(low); - Packet16h highOut = psin(high); - - return combine2Packet16h(lowOut, highOut); -} - -// pcos -template <> -EIGEN_STRONG_INLINE Packet32h pcos<Packet32h>(const Packet32h& a) { - Packet16h low; - Packet16h high; - extract2Packet16h(a, low, high); - - Packet16h lowOut = pcos(low); - Packet16h highOut = pcos(high); - - return combine2Packet16h(lowOut, highOut); -} - -// plog -template <> -EIGEN_STRONG_INLINE Packet32h plog<Packet32h>(const Packet32h& a) { - Packet16h low; - Packet16h high; - extract2Packet16h(a, low, high); - - Packet16h lowOut = plog(low); - Packet16h highOut = plog(high); - - return combine2Packet16h(lowOut, highOut); -} - -// plog2 -template <> -EIGEN_STRONG_INLINE Packet32h plog2<Packet32h>(const Packet32h& a) { - Packet16h low; - Packet16h high; - extract2Packet16h(a, low, high); - - Packet16h lowOut = plog2(low); - Packet16h highOut = plog2(high); - - return combine2Packet16h(lowOut, highOut); -} - -// plog1p -template <> -EIGEN_STRONG_INLINE Packet32h plog1p<Packet32h>(const Packet32h& a) { - Packet16h low; - Packet16h high; - extract2Packet16h(a, low, high); - - Packet16h lowOut = plog1p(low); - Packet16h highOut = plog1p(high); - - return combine2Packet16h(lowOut, highOut); -} - -// pexp -template <> -EIGEN_STRONG_INLINE Packet32h pexp<Packet32h>(const Packet32h& a) { - Packet16h low; - Packet16h high; - extract2Packet16h(a, low, high); - - Packet16h lowOut = pexp(low); - Packet16h highOut = pexp(high); - - return combine2Packet16h(lowOut, highOut); -} - -// pexpm1 -template <> -EIGEN_STRONG_INLINE Packet32h pexpm1<Packet32h>(const Packet32h& a) { - Packet16h low; - Packet16h high; - extract2Packet16h(a, low, high); - - Packet16h lowOut = pexpm1(low); - Packet16h highOut = pexpm1(high); - - return combine2Packet16h(lowOut, highOut); -} - -// ptanh -template <> -EIGEN_STRONG_INLINE Packet32h ptanh<Packet32h>(const Packet32h& a) { - Packet16h low; - Packet16h high; - extract2Packet16h(a, low, high); - - Packet16h lowOut = ptanh(low); - Packet16h highOut = ptanh(high); - - return combine2Packet16h(lowOut, highOut); -} - -// pfrexp -template <> -EIGEN_STRONG_INLINE Packet32h pfrexp<Packet32h>(const Packet32h& a, Packet32h& exponent) { - Packet16h low; - Packet16h high; - extract2Packet16h(a, low, high); - - Packet16h exp1 = _mm256_undefined_si256(); - Packet16h exp2 = _mm256_undefined_si256(); - - Packet16h lowOut = pfrexp(low, exp1); - Packet16h highOut = pfrexp(high, exp2); - - exponent = combine2Packet16h(exp1, exp2); - - return combine2Packet16h(lowOut, highOut); -} - -// pldexp -template <> -EIGEN_STRONG_INLINE Packet32h pldexp<Packet32h>(const Packet32h& a, const Packet32h& exponent) { - Packet16h low; - Packet16h high; - extract2Packet16h(a, low, high); - - Packet16h exp1; - Packet16h exp2; - extract2Packet16h(exponent, exp1, exp2); - - Packet16h lowOut = pldexp(low, exp1); - Packet16h highOut = pldexp(high, exp2); - - return combine2Packet16h(lowOut, highOut); -} - -} // end namespace internal -} // end namespace Eigen - -#endif // EIGEN_PACKET_MATH_FP16_AVX512_H +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_PACKET_MATH_FP16_AVX512_H +#define EIGEN_PACKET_MATH_FP16_AVX512_H + +// IWYU pragma: private +#include "../../InternalHeaderCheck.h" + +namespace Eigen { + +namespace internal { + +typedef __m512h Packet32h; +typedef eigen_packet_wrapper<__m256i, 1> Packet16h; +typedef eigen_packet_wrapper<__m128i, 2> Packet8h; + +template <> +struct is_arithmetic<Packet8h> { + enum { value = true }; +}; + +template <> +struct packet_traits<half> : default_packet_traits { + typedef Packet32h type; + typedef Packet16h half; + enum { + Vectorizable = 1, + AlignedOnScalar = 1, + size = 32, + + HasCmp = 1, + HasAdd = 1, + HasSub = 1, + HasMul = 1, + HasDiv = 1, + HasNegate = 1, + HasAbs = 1, + HasAbs2 = 0, + HasMin = 1, + HasMax = 1, + HasConj = 1, + HasSetLinear = 0, + HasLog = 1, + HasLog1p = 1, + HasExp = 1, + HasExpm1 = 1, + HasSqrt = 1, + HasRsqrt = 1, + // These ones should be implemented in future + HasBessel = 0, + HasNdtri = 0, + HasSin = EIGEN_FAST_MATH, + HasCos = EIGEN_FAST_MATH, + HasTanh = EIGEN_FAST_MATH, + HasErf = 0, // EIGEN_FAST_MATH, + HasBlend = 0, + HasRound = 1, + HasFloor = 1, + HasCeil = 1, + HasRint = 1 + }; +}; + +template <> +struct unpacket_traits<Packet32h> { + typedef Eigen::half type; + typedef Packet16h half; + enum { + size = 32, + alignment = Aligned64, + vectorizable = true, + masked_load_available = false, + masked_store_available = false + }; +}; + +template <> +struct unpacket_traits<Packet16h> { + typedef Eigen::half type; + typedef Packet8h half; + enum { + size = 16, + alignment = Aligned32, + vectorizable = true, + masked_load_available = false, + masked_store_available = false + }; +}; + +template <> +struct unpacket_traits<Packet8h> { + typedef Eigen::half type; + typedef Packet8h half; + enum { + size = 8, + alignment = Aligned16, + vectorizable = true, + masked_load_available = false, + masked_store_available = false + }; +}; + +// Memory functions + +// pset1 + +template <> +EIGEN_STRONG_INLINE Packet32h pset1<Packet32h>(const Eigen::half& from) { + return _mm512_set1_ph(static_cast<_Float16>(from)); +} + +// pset1frombits +template <> +EIGEN_STRONG_INLINE Packet32h pset1frombits<Packet32h>(unsigned short from) { + return _mm512_castsi512_ph(_mm512_set1_epi16(from)); +} + +// pfirst + +template <> +EIGEN_STRONG_INLINE Eigen::half pfirst<Packet32h>(const Packet32h& from) { +#ifdef EIGEN_VECTORIZE_AVX512DQ + return half_impl::raw_uint16_to_half( + static_cast<unsigned short>(_mm256_extract_epi16(_mm512_extracti32x8_epi32(_mm512_castph_si512(from), 0), 0))); +#else + Eigen::half dest[32]; + _mm512_storeu_ph(dest, from); + return dest[0]; +#endif +} + +// pload + +template <> +EIGEN_STRONG_INLINE Packet32h pload<Packet32h>(const Eigen::half* from) { + EIGEN_DEBUG_ALIGNED_LOAD return _mm512_load_ph(from); +} + +// ploadu + +template <> +EIGEN_STRONG_INLINE Packet32h ploadu<Packet32h>(const Eigen::half* from) { + EIGEN_DEBUG_UNALIGNED_LOAD return _mm512_loadu_ph(from); +} + +// pstore + +template <> +EIGEN_STRONG_INLINE void pstore<half>(Eigen::half* to, const Packet32h& from) { + EIGEN_DEBUG_ALIGNED_STORE _mm512_store_ph(to, from); +} + +// pstoreu + +template <> +EIGEN_STRONG_INLINE void pstoreu<half>(Eigen::half* to, const Packet32h& from) { + EIGEN_DEBUG_UNALIGNED_STORE _mm512_storeu_ph(to, from); +} + +// ploaddup +template <> +EIGEN_STRONG_INLINE Packet32h ploaddup<Packet32h>(const Eigen::half* from) { + __m512h a = _mm512_castph256_ph512(_mm256_loadu_ph(from)); + return _mm512_permutexvar_ph(_mm512_set_epi16(15, 15, 14, 14, 13, 13, 12, 12, 11, 11, 10, 10, 9, 9, 8, 8, 7, 7, 6, 6, + 5, 5, 4, 4, 3, 3, 2, 2, 1, 1, 0, 0), + a); +} + +// ploadquad +template <> +EIGEN_STRONG_INLINE Packet32h ploadquad<Packet32h>(const Eigen::half* from) { + __m512h a = _mm512_castph128_ph512(_mm_loadu_ph(from)); + return _mm512_permutexvar_ph( + _mm512_set_epi16(7, 7, 7, 7, 6, 6, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 3, 3, 3, 3, 2, 2, 2, 2, 1, 1, 1, 1, 0, 0, 0, 0), + a); +} + +// pabs + +template <> +EIGEN_STRONG_INLINE Packet32h pabs<Packet32h>(const Packet32h& a) { + return _mm512_abs_ph(a); +} + +// psignbit + +template <> +EIGEN_STRONG_INLINE Packet32h psignbit<Packet32h>(const Packet32h& a) { + return _mm512_castsi512_ph(_mm512_srai_epi16(_mm512_castph_si512(a), 15)); +} + +// pmin + +template <> +EIGEN_STRONG_INLINE Packet32h pmin<Packet32h>(const Packet32h& a, const Packet32h& b) { + return _mm512_min_ph(a, b); +} + +// pmax + +template <> +EIGEN_STRONG_INLINE Packet32h pmax<Packet32h>(const Packet32h& a, const Packet32h& b) { + return _mm512_max_ph(a, b); +} + +// plset +template <> +EIGEN_STRONG_INLINE Packet32h plset<Packet32h>(const half& a) { + return _mm512_add_ph(_mm512_set1_ph(a), + _mm512_set_ph(31.0f, 30.0f, 29.0f, 28.0f, 27.0f, 26.0f, 25.0f, 24.0f, 23.0f, 22.0f, 21.0f, 20.0f, + 19.0f, 18.0f, 17.0f, 16.0f, 15.0f, 14.0f, 13.0f, 12.0f, 11.0f, 10.0f, 9.0f, 8.0f, + 7.0f, 6.0f, 5.0f, 4.0f, 3.0f, 2.0f, 1.0f, 0.0f)); +} + +// por + +template <> +EIGEN_STRONG_INLINE Packet32h por(const Packet32h& a, const Packet32h& b) { + return _mm512_castsi512_ph(_mm512_or_si512(_mm512_castph_si512(a), _mm512_castph_si512(b))); +} + +// pxor + +template <> +EIGEN_STRONG_INLINE Packet32h pxor(const Packet32h& a, const Packet32h& b) { + return _mm512_castsi512_ph(_mm512_xor_si512(_mm512_castph_si512(a), _mm512_castph_si512(b))); +} + +// pand + +template <> +EIGEN_STRONG_INLINE Packet32h pand(const Packet32h& a, const Packet32h& b) { + return _mm512_castsi512_ph(_mm512_and_si512(_mm512_castph_si512(a), _mm512_castph_si512(b))); +} + +// pandnot + +template <> +EIGEN_STRONG_INLINE Packet32h pandnot(const Packet32h& a, const Packet32h& b) { + return _mm512_castsi512_ph(_mm512_andnot_si512(_mm512_castph_si512(b), _mm512_castph_si512(a))); +} + +// pselect + +template <> +EIGEN_DEVICE_FUNC inline Packet32h pselect(const Packet32h& mask, const Packet32h& a, const Packet32h& b) { + __mmask32 mask32 = _mm512_cmp_epi16_mask(_mm512_castph_si512(mask), _mm512_setzero_epi32(), _MM_CMPINT_EQ); + return _mm512_mask_blend_ph(mask32, a, b); +} + +// pcmp_eq + +template <> +EIGEN_STRONG_INLINE Packet32h pcmp_eq(const Packet32h& a, const Packet32h& b) { + __mmask32 mask = _mm512_cmp_ph_mask(a, b, _CMP_EQ_OQ); + return _mm512_castsi512_ph(_mm512_mask_set1_epi16(_mm512_set1_epi32(0), mask, 0xffffu)); +} + +// pcmp_le + +template <> +EIGEN_STRONG_INLINE Packet32h pcmp_le(const Packet32h& a, const Packet32h& b) { + __mmask32 mask = _mm512_cmp_ph_mask(a, b, _CMP_LE_OQ); + return _mm512_castsi512_ph(_mm512_mask_set1_epi16(_mm512_set1_epi32(0), mask, 0xffffu)); +} + +// pcmp_lt + +template <> +EIGEN_STRONG_INLINE Packet32h pcmp_lt(const Packet32h& a, const Packet32h& b) { + __mmask32 mask = _mm512_cmp_ph_mask(a, b, _CMP_LT_OQ); + return _mm512_castsi512_ph(_mm512_mask_set1_epi16(_mm512_set1_epi32(0), mask, 0xffffu)); +} + +// pcmp_lt_or_nan + +template <> +EIGEN_STRONG_INLINE Packet32h pcmp_lt_or_nan(const Packet32h& a, const Packet32h& b) { + __mmask32 mask = _mm512_cmp_ph_mask(a, b, _CMP_NGE_UQ); + return _mm512_castsi512_ph(_mm512_mask_set1_epi16(_mm512_set1_epi16(0), mask, 0xffffu)); +} + +// padd + +template <> +EIGEN_STRONG_INLINE Packet32h padd<Packet32h>(const Packet32h& a, const Packet32h& b) { + return _mm512_add_ph(a, b); +} + +template <> +EIGEN_STRONG_INLINE Packet16h padd<Packet16h>(const Packet16h& a, const Packet16h& b) { + return _mm256_castph_si256(_mm256_add_ph(_mm256_castsi256_ph(a), _mm256_castsi256_ph(b))); +} + +template <> +EIGEN_STRONG_INLINE Packet8h padd<Packet8h>(const Packet8h& a, const Packet8h& b) { + return _mm_castph_si128(_mm_add_ph(_mm_castsi128_ph(a), _mm_castsi128_ph(b))); +} + +// psub + +template <> +EIGEN_STRONG_INLINE Packet32h psub<Packet32h>(const Packet32h& a, const Packet32h& b) { + return _mm512_sub_ph(a, b); +} + +template <> +EIGEN_STRONG_INLINE Packet16h psub<Packet16h>(const Packet16h& a, const Packet16h& b) { + return _mm256_castph_si256(_mm256_sub_ph(_mm256_castsi256_ph(a), _mm256_castsi256_ph(b))); +} + +template <> +EIGEN_STRONG_INLINE Packet8h psub<Packet8h>(const Packet8h& a, const Packet8h& b) { + return _mm_castph_si128(_mm_sub_ph(_mm_castsi128_ph(a), _mm_castsi128_ph(b))); +} + +// pmul + +template <> +EIGEN_STRONG_INLINE Packet32h pmul<Packet32h>(const Packet32h& a, const Packet32h& b) { + return _mm512_mul_ph(a, b); +} + +template <> +EIGEN_STRONG_INLINE Packet16h pmul<Packet16h>(const Packet16h& a, const Packet16h& b) { + return _mm256_castph_si256(_mm256_mul_ph(_mm256_castsi256_ph(a), _mm256_castsi256_ph(b))); +} + +template <> +EIGEN_STRONG_INLINE Packet8h pmul<Packet8h>(const Packet8h& a, const Packet8h& b) { + return _mm_castph_si128(_mm_mul_ph(_mm_castsi128_ph(a), _mm_castsi128_ph(b))); +} + +// pdiv + +template <> +EIGEN_STRONG_INLINE Packet32h pdiv<Packet32h>(const Packet32h& a, const Packet32h& b) { + return _mm512_div_ph(a, b); +} + +template <> +EIGEN_STRONG_INLINE Packet16h pdiv<Packet16h>(const Packet16h& a, const Packet16h& b) { + return _mm256_castph_si256(_mm256_div_ph(_mm256_castsi256_ph(a), _mm256_castsi256_ph(b))); +} + +template <> +EIGEN_STRONG_INLINE Packet8h pdiv<Packet8h>(const Packet8h& a, const Packet8h& b) { + return _mm_castph_si128(_mm_div_ph(_mm_castsi128_ph(a), _mm_castsi128_ph(b))); +} + +// pround + +template <> +EIGEN_STRONG_INLINE Packet32h pround<Packet32h>(const Packet32h& a) { + // Work-around for default std::round rounding mode. + + // Mask for the sign bit + const Packet32h signMask = pset1frombits<Packet32h>(static_cast<numext::uint16_t>(0x8000u)); + // The largest half-preicision float less than 0.5 + const Packet32h prev0dot5 = pset1frombits<Packet32h>(static_cast<numext::uint16_t>(0x37FFu)); + + return _mm512_roundscale_ph(padd(por(pand(a, signMask), prev0dot5), a), _MM_FROUND_TO_ZERO); +} + +// print + +template <> +EIGEN_STRONG_INLINE Packet32h print<Packet32h>(const Packet32h& a) { + return _mm512_roundscale_ph(a, _MM_FROUND_CUR_DIRECTION); +} + +// pceil + +template <> +EIGEN_STRONG_INLINE Packet32h pceil<Packet32h>(const Packet32h& a) { + return _mm512_roundscale_ph(a, _MM_FROUND_TO_POS_INF); +} + +// pfloor + +template <> +EIGEN_STRONG_INLINE Packet32h pfloor<Packet32h>(const Packet32h& a) { + return _mm512_roundscale_ph(a, _MM_FROUND_TO_NEG_INF); +} + +// predux +template <> +EIGEN_STRONG_INLINE half predux<Packet32h>(const Packet32h& a) { + return (half)_mm512_reduce_add_ph(a); +} + +template <> +EIGEN_STRONG_INLINE half predux<Packet16h>(const Packet16h& a) { + return (half)_mm256_reduce_add_ph(_mm256_castsi256_ph(a)); +} + +template <> +EIGEN_STRONG_INLINE half predux<Packet8h>(const Packet8h& a) { + return (half)_mm_reduce_add_ph(_mm_castsi128_ph(a)); +} + +// predux_half_dowto4 +template <> +EIGEN_STRONG_INLINE Packet16h predux_half_dowto4<Packet32h>(const Packet32h& a) { +#ifdef EIGEN_VECTORIZE_AVX512DQ + __m256i lowHalf = _mm256_castps_si256(_mm512_extractf32x8_ps(_mm512_castph_ps(a), 0)); + __m256i highHalf = _mm256_castps_si256(_mm512_extractf32x8_ps(_mm512_castph_ps(a), 1)); + + return Packet16h(padd<Packet16h>(lowHalf, highHalf)); +#else + Eigen::half data[32]; + _mm512_storeu_ph(data, a); + + __m256i lowHalf = _mm256_castph_si256(_mm256_loadu_ph(data)); + __m256i highHalf = _mm256_castph_si256(_mm256_loadu_ph(data + 16)); + + return Packet16h(padd<Packet16h>(lowHalf, highHalf)); +#endif +} + +// predux_max + +// predux_min + +// predux_mul + +#ifdef EIGEN_VECTORIZE_FMA + +// pmadd + +template <> +EIGEN_STRONG_INLINE Packet32h pmadd(const Packet32h& a, const Packet32h& b, const Packet32h& c) { + return _mm512_fmadd_ph(a, b, c); +} + +template <> +EIGEN_STRONG_INLINE Packet16h pmadd(const Packet16h& a, const Packet16h& b, const Packet16h& c) { + return _mm256_castph_si256(_mm256_fmadd_ph(_mm256_castsi256_ph(a), _mm256_castsi256_ph(b), _mm256_castsi256_ph(c))); +} + +template <> +EIGEN_STRONG_INLINE Packet8h pmadd(const Packet8h& a, const Packet8h& b, const Packet8h& c) { + return _mm_castph_si128(_mm_fmadd_ph(_mm_castsi128_ph(a), _mm_castsi128_ph(b), _mm_castsi128_ph(c))); +} + +// pmsub + +template <> +EIGEN_STRONG_INLINE Packet32h pmsub(const Packet32h& a, const Packet32h& b, const Packet32h& c) { + return _mm512_fmsub_ph(a, b, c); +} + +template <> +EIGEN_STRONG_INLINE Packet16h pmsub(const Packet16h& a, const Packet16h& b, const Packet16h& c) { + return _mm256_castph_si256(_mm256_fmsub_ph(_mm256_castsi256_ph(a), _mm256_castsi256_ph(b), _mm256_castsi256_ph(c))); +} + +template <> +EIGEN_STRONG_INLINE Packet8h pmsub(const Packet8h& a, const Packet8h& b, const Packet8h& c) { + return _mm_castph_si128(_mm_fmsub_ph(_mm_castsi128_ph(a), _mm_castsi128_ph(b), _mm_castsi128_ph(c))); +} + +// pnmadd + +template <> +EIGEN_STRONG_INLINE Packet32h pnmadd(const Packet32h& a, const Packet32h& b, const Packet32h& c) { + return _mm512_fnmadd_ph(a, b, c); +} + +template <> +EIGEN_STRONG_INLINE Packet16h pnmadd(const Packet16h& a, const Packet16h& b, const Packet16h& c) { + return _mm256_castph_si256(_mm256_fnmadd_ph(_mm256_castsi256_ph(a), _mm256_castsi256_ph(b), _mm256_castsi256_ph(c))); +} + +template <> +EIGEN_STRONG_INLINE Packet8h pnmadd(const Packet8h& a, const Packet8h& b, const Packet8h& c) { + return _mm_castph_si128(_mm_fnmadd_ph(_mm_castsi128_ph(a), _mm_castsi128_ph(b), _mm_castsi128_ph(c))); +} + +// pnmsub + +template <> +EIGEN_STRONG_INLINE Packet32h pnmsub(const Packet32h& a, const Packet32h& b, const Packet32h& c) { + return _mm512_fnmsub_ph(a, b, c); +} + +template <> +EIGEN_STRONG_INLINE Packet16h pnmsub(const Packet16h& a, const Packet16h& b, const Packet16h& c) { + return _mm256_castph_si256(_mm256_fnmsub_ph(_mm256_castsi256_ph(a), _mm256_castsi256_ph(b), _mm256_castsi256_ph(c))); +} + +template <> +EIGEN_STRONG_INLINE Packet8h pnmsub(const Packet8h& a, const Packet8h& b, const Packet8h& c) { + return _mm_castph_si128(_mm_fnmsub_ph(_mm_castsi128_ph(a), _mm_castsi128_ph(b), _mm_castsi128_ph(c))); +} + +#endif + +// pnegate + +template <> +EIGEN_STRONG_INLINE Packet32h pnegate<Packet32h>(const Packet32h& a) { + return _mm512_sub_ph(_mm512_set1_ph(0.0), a); +} + +// pconj + +template <> +EIGEN_STRONG_INLINE Packet32h pconj<Packet32h>(const Packet32h& a) { + return a; +} + +// psqrt + +template <> +EIGEN_STRONG_INLINE Packet32h psqrt<Packet32h>(const Packet32h& a) { + return _mm512_sqrt_ph(a); +} + +// prsqrt + +template <> +EIGEN_STRONG_INLINE Packet32h prsqrt<Packet32h>(const Packet32h& a) { + return _mm512_rsqrt_ph(a); +} + +// preciprocal + +template <> +EIGEN_STRONG_INLINE Packet32h preciprocal<Packet32h>(const Packet32h& a) { + return _mm512_rcp_ph(a); +} + +// ptranspose + +EIGEN_DEVICE_FUNC inline void ptranspose(PacketBlock<Packet32h, 32>& a) { + __m512i t[32]; + + EIGEN_UNROLL_LOOP + for (int i = 0; i < 16; i++) { + t[2 * i] = _mm512_unpacklo_epi16(_mm512_castph_si512(a.packet[2 * i]), _mm512_castph_si512(a.packet[2 * i + 1])); + t[2 * i + 1] = + _mm512_unpackhi_epi16(_mm512_castph_si512(a.packet[2 * i]), _mm512_castph_si512(a.packet[2 * i + 1])); + } + + __m512i p[32]; + + EIGEN_UNROLL_LOOP + for (int i = 0; i < 8; i++) { + p[4 * i] = _mm512_unpacklo_epi32(t[4 * i], t[4 * i + 2]); + p[4 * i + 1] = _mm512_unpackhi_epi32(t[4 * i], t[4 * i + 2]); + p[4 * i + 2] = _mm512_unpacklo_epi32(t[4 * i + 1], t[4 * i + 3]); + p[4 * i + 3] = _mm512_unpackhi_epi32(t[4 * i + 1], t[4 * i + 3]); + } + + __m512i q[32]; + + EIGEN_UNROLL_LOOP + for (int i = 0; i < 4; i++) { + q[8 * i] = _mm512_unpacklo_epi64(p[8 * i], p[8 * i + 4]); + q[8 * i + 1] = _mm512_unpackhi_epi64(p[8 * i], p[8 * i + 4]); + q[8 * i + 2] = _mm512_unpacklo_epi64(p[8 * i + 1], p[8 * i + 5]); + q[8 * i + 3] = _mm512_unpackhi_epi64(p[8 * i + 1], p[8 * i + 5]); + q[8 * i + 4] = _mm512_unpacklo_epi64(p[8 * i + 2], p[8 * i + 6]); + q[8 * i + 5] = _mm512_unpackhi_epi64(p[8 * i + 2], p[8 * i + 6]); + q[8 * i + 6] = _mm512_unpacklo_epi64(p[8 * i + 3], p[8 * i + 7]); + q[8 * i + 7] = _mm512_unpackhi_epi64(p[8 * i + 3], p[8 * i + 7]); + } + + __m512i f[32]; + +#define PACKET32H_TRANSPOSE_HELPER(X, Y) \ + do { \ + f[Y * 8] = _mm512_inserti32x4(f[Y * 8], _mm512_extracti32x4_epi32(q[X * 8], Y), X); \ + f[Y * 8 + 1] = _mm512_inserti32x4(f[Y * 8 + 1], _mm512_extracti32x4_epi32(q[X * 8 + 1], Y), X); \ + f[Y * 8 + 2] = _mm512_inserti32x4(f[Y * 8 + 2], _mm512_extracti32x4_epi32(q[X * 8 + 2], Y), X); \ + f[Y * 8 + 3] = _mm512_inserti32x4(f[Y * 8 + 3], _mm512_extracti32x4_epi32(q[X * 8 + 3], Y), X); \ + f[Y * 8 + 4] = _mm512_inserti32x4(f[Y * 8 + 4], _mm512_extracti32x4_epi32(q[X * 8 + 4], Y), X); \ + f[Y * 8 + 5] = _mm512_inserti32x4(f[Y * 8 + 5], _mm512_extracti32x4_epi32(q[X * 8 + 5], Y), X); \ + f[Y * 8 + 6] = _mm512_inserti32x4(f[Y * 8 + 6], _mm512_extracti32x4_epi32(q[X * 8 + 6], Y), X); \ + f[Y * 8 + 7] = _mm512_inserti32x4(f[Y * 8 + 7], _mm512_extracti32x4_epi32(q[X * 8 + 7], Y), X); \ + } while (false); + + PACKET32H_TRANSPOSE_HELPER(0, 0); + PACKET32H_TRANSPOSE_HELPER(1, 1); + PACKET32H_TRANSPOSE_HELPER(2, 2); + PACKET32H_TRANSPOSE_HELPER(3, 3); + + PACKET32H_TRANSPOSE_HELPER(1, 0); + PACKET32H_TRANSPOSE_HELPER(2, 0); + PACKET32H_TRANSPOSE_HELPER(3, 0); + PACKET32H_TRANSPOSE_HELPER(2, 1); + PACKET32H_TRANSPOSE_HELPER(3, 1); + PACKET32H_TRANSPOSE_HELPER(3, 2); + + PACKET32H_TRANSPOSE_HELPER(0, 1); + PACKET32H_TRANSPOSE_HELPER(0, 2); + PACKET32H_TRANSPOSE_HELPER(0, 3); + PACKET32H_TRANSPOSE_HELPER(1, 2); + PACKET32H_TRANSPOSE_HELPER(1, 3); + PACKET32H_TRANSPOSE_HELPER(2, 3); + +#undef PACKET32H_TRANSPOSE_HELPER + + EIGEN_UNROLL_LOOP + for (int i = 0; i < 32; i++) { + a.packet[i] = _mm512_castsi512_ph(f[i]); + } +} + +EIGEN_DEVICE_FUNC inline void ptranspose(PacketBlock<Packet32h, 4>& a) { + __m512i p0, p1, p2, p3, t0, t1, t2, t3, a0, a1, a2, a3; + t0 = _mm512_unpacklo_epi16(_mm512_castph_si512(a.packet[0]), _mm512_castph_si512(a.packet[1])); + t1 = _mm512_unpackhi_epi16(_mm512_castph_si512(a.packet[0]), _mm512_castph_si512(a.packet[1])); + t2 = _mm512_unpacklo_epi16(_mm512_castph_si512(a.packet[2]), _mm512_castph_si512(a.packet[3])); + t3 = _mm512_unpackhi_epi16(_mm512_castph_si512(a.packet[2]), _mm512_castph_si512(a.packet[3])); + + p0 = _mm512_unpacklo_epi32(t0, t2); + p1 = _mm512_unpackhi_epi32(t0, t2); + p2 = _mm512_unpacklo_epi32(t1, t3); + p3 = _mm512_unpackhi_epi32(t1, t3); + + a0 = p0; + a1 = p1; + a2 = p2; + a3 = p3; + + a0 = _mm512_inserti32x4(a0, _mm512_extracti32x4_epi32(p1, 0), 1); + a1 = _mm512_inserti32x4(a1, _mm512_extracti32x4_epi32(p0, 1), 0); + + a0 = _mm512_inserti32x4(a0, _mm512_extracti32x4_epi32(p2, 0), 2); + a2 = _mm512_inserti32x4(a2, _mm512_extracti32x4_epi32(p0, 2), 0); + + a0 = _mm512_inserti32x4(a0, _mm512_extracti32x4_epi32(p3, 0), 3); + a3 = _mm512_inserti32x4(a3, _mm512_extracti32x4_epi32(p0, 3), 0); + + a1 = _mm512_inserti32x4(a1, _mm512_extracti32x4_epi32(p2, 1), 2); + a2 = _mm512_inserti32x4(a2, _mm512_extracti32x4_epi32(p1, 2), 1); + + a2 = _mm512_inserti32x4(a2, _mm512_extracti32x4_epi32(p3, 2), 3); + a3 = _mm512_inserti32x4(a3, _mm512_extracti32x4_epi32(p2, 3), 2); + + a1 = _mm512_inserti32x4(a1, _mm512_extracti32x4_epi32(p3, 1), 3); + a3 = _mm512_inserti32x4(a3, _mm512_extracti32x4_epi32(p1, 3), 1); + + a.packet[0] = _mm512_castsi512_ph(a0); + a.packet[1] = _mm512_castsi512_ph(a1); + a.packet[2] = _mm512_castsi512_ph(a2); + a.packet[3] = _mm512_castsi512_ph(a3); +} + +// preverse + +template <> +EIGEN_STRONG_INLINE Packet32h preverse(const Packet32h& a) { + return _mm512_permutexvar_ph(_mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, + 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31), + a); +} + +// pscatter + +template <> +EIGEN_STRONG_INLINE void pscatter<half, Packet32h>(half* to, const Packet32h& from, Index stride) { + EIGEN_ALIGN64 half aux[32]; + pstore(aux, from); + + EIGEN_UNROLL_LOOP + for (int i = 0; i < 32; i++) { + to[stride * i] = aux[i]; + } +} + +// pgather + +template <> +EIGEN_STRONG_INLINE Packet32h pgather<Eigen::half, Packet32h>(const Eigen::half* from, Index stride) { + return _mm512_castsi512_ph(_mm512_set_epi16( + from[31 * stride].x, from[30 * stride].x, from[29 * stride].x, from[28 * stride].x, from[27 * stride].x, + from[26 * stride].x, from[25 * stride].x, from[24 * stride].x, from[23 * stride].x, from[22 * stride].x, + from[21 * stride].x, from[20 * stride].x, from[19 * stride].x, from[18 * stride].x, from[17 * stride].x, + from[16 * stride].x, from[15 * stride].x, from[14 * stride].x, from[13 * stride].x, from[12 * stride].x, + from[11 * stride].x, from[10 * stride].x, from[9 * stride].x, from[8 * stride].x, from[7 * stride].x, + from[6 * stride].x, from[5 * stride].x, from[4 * stride].x, from[3 * stride].x, from[2 * stride].x, + from[1 * stride].x, from[0 * stride].x)); +} + +template <> +EIGEN_STRONG_INLINE Packet16h pcos<Packet16h>(const Packet16h&); +template <> +EIGEN_STRONG_INLINE Packet16h psin<Packet16h>(const Packet16h&); +template <> +EIGEN_STRONG_INLINE Packet16h plog<Packet16h>(const Packet16h&); +template <> +EIGEN_STRONG_INLINE Packet16h plog2<Packet16h>(const Packet16h&); +template <> +EIGEN_STRONG_INLINE Packet16h plog1p<Packet16h>(const Packet16h&); +template <> +EIGEN_STRONG_INLINE Packet16h pexp<Packet16h>(const Packet16h&); +template <> +EIGEN_STRONG_INLINE Packet16h pexpm1<Packet16h>(const Packet16h&); +template <> +EIGEN_STRONG_INLINE Packet16h ptanh<Packet16h>(const Packet16h&); +template <> +EIGEN_STRONG_INLINE Packet16h pfrexp<Packet16h>(const Packet16h&, Packet16h&); +template <> +EIGEN_STRONG_INLINE Packet16h pldexp<Packet16h>(const Packet16h&, const Packet16h&); + +EIGEN_STRONG_INLINE Packet32h combine2Packet16h(const Packet16h& a, const Packet16h& b) { + __m512d result = _mm512_undefined_pd(); + result = _mm512_insertf64x4(result, _mm256_castsi256_pd(a), 0); + result = _mm512_insertf64x4(result, _mm256_castsi256_pd(b), 1); + return _mm512_castpd_ph(result); +} + +EIGEN_STRONG_INLINE void extract2Packet16h(const Packet32h& x, Packet16h& a, Packet16h& b) { + a = _mm256_castpd_si256(_mm512_extractf64x4_pd(_mm512_castph_pd(x), 0)); + b = _mm256_castpd_si256(_mm512_extractf64x4_pd(_mm512_castph_pd(x), 1)); +} + +// psin +template <> +EIGEN_STRONG_INLINE Packet32h psin<Packet32h>(const Packet32h& a) { + Packet16h low; + Packet16h high; + extract2Packet16h(a, low, high); + + Packet16h lowOut = psin(low); + Packet16h highOut = psin(high); + + return combine2Packet16h(lowOut, highOut); +} + +// pcos +template <> +EIGEN_STRONG_INLINE Packet32h pcos<Packet32h>(const Packet32h& a) { + Packet16h low; + Packet16h high; + extract2Packet16h(a, low, high); + + Packet16h lowOut = pcos(low); + Packet16h highOut = pcos(high); + + return combine2Packet16h(lowOut, highOut); +} + +// plog +template <> +EIGEN_STRONG_INLINE Packet32h plog<Packet32h>(const Packet32h& a) { + Packet16h low; + Packet16h high; + extract2Packet16h(a, low, high); + + Packet16h lowOut = plog(low); + Packet16h highOut = plog(high); + + return combine2Packet16h(lowOut, highOut); +} + +// plog2 +template <> +EIGEN_STRONG_INLINE Packet32h plog2<Packet32h>(const Packet32h& a) { + Packet16h low; + Packet16h high; + extract2Packet16h(a, low, high); + + Packet16h lowOut = plog2(low); + Packet16h highOut = plog2(high); + + return combine2Packet16h(lowOut, highOut); +} + +// plog1p +template <> +EIGEN_STRONG_INLINE Packet32h plog1p<Packet32h>(const Packet32h& a) { + Packet16h low; + Packet16h high; + extract2Packet16h(a, low, high); + + Packet16h lowOut = plog1p(low); + Packet16h highOut = plog1p(high); + + return combine2Packet16h(lowOut, highOut); +} + +// pexp +template <> +EIGEN_STRONG_INLINE Packet32h pexp<Packet32h>(const Packet32h& a) { + Packet16h low; + Packet16h high; + extract2Packet16h(a, low, high); + + Packet16h lowOut = pexp(low); + Packet16h highOut = pexp(high); + + return combine2Packet16h(lowOut, highOut); +} + +// pexpm1 +template <> +EIGEN_STRONG_INLINE Packet32h pexpm1<Packet32h>(const Packet32h& a) { + Packet16h low; + Packet16h high; + extract2Packet16h(a, low, high); + + Packet16h lowOut = pexpm1(low); + Packet16h highOut = pexpm1(high); + + return combine2Packet16h(lowOut, highOut); +} + +// ptanh +template <> +EIGEN_STRONG_INLINE Packet32h ptanh<Packet32h>(const Packet32h& a) { + Packet16h low; + Packet16h high; + extract2Packet16h(a, low, high); + + Packet16h lowOut = ptanh(low); + Packet16h highOut = ptanh(high); + + return combine2Packet16h(lowOut, highOut); +} + +// pfrexp +template <> +EIGEN_STRONG_INLINE Packet32h pfrexp<Packet32h>(const Packet32h& a, Packet32h& exponent) { + Packet16h low; + Packet16h high; + extract2Packet16h(a, low, high); + + Packet16h exp1 = _mm256_undefined_si256(); + Packet16h exp2 = _mm256_undefined_si256(); + + Packet16h lowOut = pfrexp(low, exp1); + Packet16h highOut = pfrexp(high, exp2); + + exponent = combine2Packet16h(exp1, exp2); + + return combine2Packet16h(lowOut, highOut); +} + +// pldexp +template <> +EIGEN_STRONG_INLINE Packet32h pldexp<Packet32h>(const Packet32h& a, const Packet32h& exponent) { + Packet16h low; + Packet16h high; + extract2Packet16h(a, low, high); + + Packet16h exp1; + Packet16h exp2; + extract2Packet16h(exponent, exp1, exp2); + + Packet16h lowOut = pldexp(low, exp1); + Packet16h highOut = pldexp(high, exp2); + + return combine2Packet16h(lowOut, highOut); +} + +} // end namespace internal +} // end namespace Eigen + +#endif // EIGEN_PACKET_MATH_FP16_AVX512_H
diff --git a/Eigen/src/Core/arch/AVX512/TypeCasting.h b/Eigen/src/Core/arch/AVX512/TypeCasting.h index 56a94f4..ccdb563 100644 --- a/Eigen/src/Core/arch/AVX512/TypeCasting.h +++ b/Eigen/src/Core/arch/AVX512/TypeCasting.h
@@ -37,10 +37,12 @@ template <> struct type_casting_traits<int, double> : vectorized_type_casting_traits<int, double> {}; +#ifndef EIGEN_VECTORIZE_AVX512FP16 template <> struct type_casting_traits<half, float> : vectorized_type_casting_traits<half, float> {}; template <> struct type_casting_traits<float, half> : vectorized_type_casting_traits<float, half> {}; +#endif template <> struct type_casting_traits<bfloat16, float> : vectorized_type_casting_traits<bfloat16, float> {};
diff --git a/Eigen/src/Core/arch/AltiVec/PacketMath.h b/Eigen/src/Core/arch/AltiVec/PacketMath.h index 414f05c..a4b134c 100644 --- a/Eigen/src/Core/arch/AltiVec/PacketMath.h +++ b/Eigen/src/Core/arch/AltiVec/PacketMath.h
@@ -105,6 +105,7 @@ static const Packet16uc p16uc_DUPLICATE16_ODD = {2, 3, 2, 3, 6, 7, 6, 7, 10, 11, 10, 11, 14, 15, 14, 15}; static Packet16uc p16uc_QUADRUPLICATE16_HI = {0, 1, 0, 1, 0, 1, 0, 1, 2, 3, 2, 3, 2, 3, 2, 3}; +static Packet16uc p16uc_QUADRUPLICATE16 = {0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3}; static Packet16uc p16uc_MERGEE16 = {0, 1, 16, 17, 4, 5, 20, 21, 8, 9, 24, 25, 12, 13, 28, 29}; static Packet16uc p16uc_MERGEO16 = {2, 3, 18, 19, 6, 7, 22, 23, 10, 11, 26, 27, 14, 15, 30, 31}; @@ -1716,6 +1717,26 @@ return vec_mergeh(p, p); } +template <> +EIGEN_STRONG_INLINE Packet16c ploadquad<Packet16c>(const signed char* from) { + Packet16c p; + if ((std::ptrdiff_t(from) % 16) == 0) + p = pload<Packet16c>(from); + else + p = ploadu<Packet16c>(from); + return vec_perm(p, p, p16uc_QUADRUPLICATE16); +} + +template <> +EIGEN_STRONG_INLINE Packet16uc ploadquad<Packet16uc>(const unsigned char* from) { + Packet16uc p; + if ((std::ptrdiff_t(from) % 16) == 0) + p = pload<Packet16uc>(from); + else + p = ploadu<Packet16uc>(from); + return vec_perm(p, p, p16uc_QUADRUPLICATE16); +} + template <typename Packet> EIGEN_STRONG_INLINE void pstoreu_common(__UNPACK_TYPE__(Packet) * to, const Packet& from) { EIGEN_DEBUG_UNALIGNED_STORE
diff --git a/Eigen/src/Core/arch/Default/BFloat16.h b/Eigen/src/Core/arch/Default/BFloat16.h index 68b48f9..be44909 100644 --- a/Eigen/src/Core/arch/Default/BFloat16.h +++ b/Eigen/src/Core/arch/Default/BFloat16.h
@@ -677,16 +677,22 @@ namespace internal { template <> -struct random_default_impl<bfloat16, false, false> { - static inline bfloat16 run(const bfloat16& x, const bfloat16& y) { - return x + (y - x) * bfloat16(float(std::rand()) / float(RAND_MAX)); - } - static inline bfloat16 run() { return run(bfloat16(-1.f), bfloat16(1.f)); } +struct is_arithmetic<bfloat16> { + enum { value = true }; }; template <> -struct is_arithmetic<bfloat16> { - enum { value = true }; +struct random_impl<bfloat16> { + enum : int { MantissaBits = 7 }; + using Impl = random_impl<float>; + static EIGEN_DEVICE_FUNC inline bfloat16 run(const bfloat16& x, const bfloat16& y) { + float result = Impl::run(x, y, MantissaBits); + return bfloat16(result); + } + static EIGEN_DEVICE_FUNC inline bfloat16 run() { + float result = Impl::run(MantissaBits); + return bfloat16(result); + } }; } // namespace internal
diff --git a/Eigen/src/Core/arch/Default/Half.h b/Eigen/src/Core/arch/Default/Half.h index 92516c7..17d534d 100644 --- a/Eigen/src/Core/arch/Default/Half.h +++ b/Eigen/src/Core/arch/Default/Half.h
@@ -762,16 +762,22 @@ namespace internal { template <> -struct random_default_impl<half, false, false> { - static inline half run(const half& x, const half& y) { - return x + (y - x) * half(float(std::rand()) / float(RAND_MAX)); - } - static inline half run() { return run(half(-1.f), half(1.f)); } +struct is_arithmetic<half> { + enum { value = true }; }; template <> -struct is_arithmetic<half> { - enum { value = true }; +struct random_impl<half> { + enum : int { MantissaBits = 10 }; + using Impl = random_impl<float>; + static EIGEN_DEVICE_FUNC inline half run(const half& x, const half& y) { + float result = Impl::run(x, y, MantissaBits); + return half(result); + } + static EIGEN_DEVICE_FUNC inline half run() { + float result = Impl::run(MantissaBits); + return half(result); + } }; } // end namespace internal
diff --git a/Eigen/src/Core/arch/SSE/PacketMath.h b/Eigen/src/Core/arch/SSE/PacketMath.h index be8183c..bdbf759 100644 --- a/Eigen/src/Core/arch/SSE/PacketMath.h +++ b/Eigen/src/Core/arch/SSE/PacketMath.h
@@ -527,7 +527,7 @@ template <> EIGEN_STRONG_INLINE Packet16b pnegate(const Packet16b& a) { - return psub(pset1<Packet16b>(false), a); + return a; } template <> @@ -677,18 +677,6 @@ EIGEN_DEVICE_FUNC inline Packet2d pselect(const Packet2d& mask, const Packet2d& a, const Packet2d& b) { return _mm_blendv_pd(b, a, mask); } - -template <> -EIGEN_DEVICE_FUNC inline Packet16b pselect(const Packet16b& mask, const Packet16b& a, const Packet16b& b) { - return _mm_blendv_epi8(b, a, mask); -} -#else -template <> -EIGEN_DEVICE_FUNC inline Packet16b pselect(const Packet16b& mask, const Packet16b& a, const Packet16b& b) { - Packet16b a_part = _mm_and_si128(mask, a); - Packet16b b_part = _mm_andnot_si128(mask, b); - return _mm_or_si128(a_part, b_part); -} #endif template <> @@ -696,8 +684,8 @@ return _mm_cmpeq_epi32(a, a); } template <> -EIGEN_STRONG_INLINE Packet16b ptrue<Packet16b>(const Packet16b& a) { - return _mm_cmpeq_epi8(a, a); +EIGEN_STRONG_INLINE Packet16b ptrue<Packet16b>(const Packet16b& /*a*/) { + return pset1<Packet16b>(true); } template <> EIGEN_STRONG_INLINE Packet4f ptrue<Packet4f>(const Packet4f& a) { @@ -838,7 +826,9 @@ } template <> EIGEN_STRONG_INLINE Packet16b pcmp_eq(const Packet16b& a, const Packet16b& b) { - return _mm_cmpeq_epi8(a, b); + // Mask out invalid bool bits to avoid UB. + const Packet16b kBoolMask = pset1<Packet16b>(true); + return _mm_and_si128(_mm_cmpeq_epi8(a, b), kBoolMask); } template <> EIGEN_STRONG_INLINE Packet4i pcmp_le(const Packet4i& a, const Packet4i& b) { @@ -1377,7 +1367,7 @@ } template <> EIGEN_STRONG_INLINE void pstoreu<bool>(bool* to, const Packet16b& from) { - EIGEN_DEBUG_ALIGNED_STORE _mm_storeu_si128(reinterpret_cast<__m128i*>(to), from); + EIGEN_DEBUG_UNALIGNED_STORE _mm_storeu_si128(reinterpret_cast<__m128i*>(to), from); } template <typename Scalar, typename Packet>
diff --git a/Eigen/src/Core/products/GeneralBlockPanelKernel.h b/Eigen/src/Core/products/GeneralBlockPanelKernel.h index 647a7dd..c4fa771 100644 --- a/Eigen/src/Core/products/GeneralBlockPanelKernel.h +++ b/Eigen/src/Core/products/GeneralBlockPanelKernel.h
@@ -140,7 +140,7 @@ typedef typename Traits::ResScalar ResScalar; enum { kdiv = KcFactor * (Traits::mr * sizeof(LhsScalar) + Traits::nr * sizeof(RhsScalar)), - ksub = Traits::mr * Traits::nr * sizeof(ResScalar), + ksub = Traits::mr * (Traits::nr * sizeof(ResScalar)), kr = 8, mr = Traits::mr, nr = Traits::nr @@ -197,7 +197,7 @@ enum { k_peeling = 8, k_div = KcFactor * (Traits::mr * sizeof(LhsScalar) + Traits::nr * sizeof(RhsScalar)), - k_sub = Traits::mr * Traits::nr * sizeof(ResScalar) + k_sub = Traits::mr * (Traits::nr * sizeof(ResScalar)) }; // ---- 1st level of blocking on L1, yields kc ---- @@ -2399,7 +2399,7 @@ // fails, drop down to the scalar path. constexpr bool kCanLoadSRhsQuad = (unpacket_traits<SLhsPacket>::size < 4) || - (unpacket_traits<SRhsPacket>::size % (unpacket_traits<SLhsPacket>::size / 4)) == 0; + (unpacket_traits<SRhsPacket>::size % ((std::max<int>)(unpacket_traits<SLhsPacket>::size, 4) / 4)) == 0; if (kCanLoadSRhsQuad && (SwappedTraits::LhsProgress % 4) == 0 && (SwappedTraits::LhsProgress <= 16) && (SwappedTraits::LhsProgress != 8 || SResPacketHalfSize == nr) && (SwappedTraits::LhsProgress != 16 || SResPacketQuarterSize == nr)) {
diff --git a/Eigen/src/Core/products/TriangularSolverMatrix.h b/Eigen/src/Core/products/TriangularSolverMatrix.h index f9b2ad0..2122af9 100644 --- a/Eigen/src/Core/products/TriangularSolverMatrix.h +++ b/Eigen/src/Core/products/TriangularSolverMatrix.h
@@ -57,7 +57,7 @@ Index rs = size - k - 1; // remaining size Index s = TriStorageOrder == RowMajor ? (IsLower ? 0 : i + 1) : IsLower ? i + 1 : i - rs; - Scalar a = (Mode & UnitDiag) ? Scalar(1) : Scalar(1) / conj(tri(i, i)); + Scalar a = (Mode & UnitDiag) ? Scalar(1) : Scalar(Scalar(1)/conj(tri(i,i))); for (Index j = 0; j < otherSize; ++j) { if (TriStorageOrder == RowMajor) { Scalar b(0);
diff --git a/Eigen/src/Eigenvalues/ComplexEigenSolver.h b/Eigen/src/Eigenvalues/ComplexEigenSolver.h index a68996a..60a24a8 100644 --- a/Eigen/src/Eigenvalues/ComplexEigenSolver.h +++ b/Eigen/src/Eigenvalues/ComplexEigenSolver.h
@@ -54,7 +54,7 @@ enum { RowsAtCompileTime = MatrixType::RowsAtCompileTime, ColsAtCompileTime = MatrixType::ColsAtCompileTime, - Options = MatrixType::Options, + Options = internal::traits<MatrixType>::Options, MaxRowsAtCompileTime = MatrixType::MaxRowsAtCompileTime, MaxColsAtCompileTime = MatrixType::MaxColsAtCompileTime };
diff --git a/Eigen/src/Eigenvalues/ComplexSchur.h b/Eigen/src/Eigenvalues/ComplexSchur.h index 1ec8fb8..a33e46e 100644 --- a/Eigen/src/Eigenvalues/ComplexSchur.h +++ b/Eigen/src/Eigenvalues/ComplexSchur.h
@@ -59,7 +59,7 @@ enum { RowsAtCompileTime = MatrixType::RowsAtCompileTime, ColsAtCompileTime = MatrixType::ColsAtCompileTime, - Options = MatrixType::Options, + Options = internal::traits<MatrixType>::Options, MaxRowsAtCompileTime = MatrixType::MaxRowsAtCompileTime, MaxColsAtCompileTime = MatrixType::MaxColsAtCompileTime };
diff --git a/Eigen/src/Eigenvalues/EigenSolver.h b/Eigen/src/Eigenvalues/EigenSolver.h index bb6583a..40830fb 100644 --- a/Eigen/src/Eigenvalues/EigenSolver.h +++ b/Eigen/src/Eigenvalues/EigenSolver.h
@@ -73,7 +73,7 @@ enum { RowsAtCompileTime = MatrixType::RowsAtCompileTime, ColsAtCompileTime = MatrixType::ColsAtCompileTime, - Options = MatrixType::Options, + Options = internal::traits<MatrixType>::Options, MaxRowsAtCompileTime = MatrixType::MaxRowsAtCompileTime, MaxColsAtCompileTime = MatrixType::MaxColsAtCompileTime };
diff --git a/Eigen/src/Eigenvalues/GeneralizedEigenSolver.h b/Eigen/src/Eigenvalues/GeneralizedEigenSolver.h index 95954e7..08f1e34 100644 --- a/Eigen/src/Eigenvalues/GeneralizedEigenSolver.h +++ b/Eigen/src/Eigenvalues/GeneralizedEigenSolver.h
@@ -67,7 +67,7 @@ enum { RowsAtCompileTime = MatrixType::RowsAtCompileTime, ColsAtCompileTime = MatrixType::ColsAtCompileTime, - Options = MatrixType::Options, + Options = internal::traits<MatrixType>::Options, MaxRowsAtCompileTime = MatrixType::MaxRowsAtCompileTime, MaxColsAtCompileTime = MatrixType::MaxColsAtCompileTime };
diff --git a/Eigen/src/Eigenvalues/HessenbergDecomposition.h b/Eigen/src/Eigenvalues/HessenbergDecomposition.h index 8f3c1b3..f79ee33 100644 --- a/Eigen/src/Eigenvalues/HessenbergDecomposition.h +++ b/Eigen/src/Eigenvalues/HessenbergDecomposition.h
@@ -66,7 +66,7 @@ enum { Size = MatrixType::RowsAtCompileTime, SizeMinusOne = Size == Dynamic ? Dynamic : Size - 1, - Options = MatrixType::Options, + Options = internal::traits<MatrixType>::Options, MaxSize = MatrixType::MaxRowsAtCompileTime, MaxSizeMinusOne = MaxSize == Dynamic ? Dynamic : MaxSize - 1 };
diff --git a/Eigen/src/Eigenvalues/RealQZ.h b/Eigen/src/Eigenvalues/RealQZ.h index 9fba7ad..3466f51 100644 --- a/Eigen/src/Eigenvalues/RealQZ.h +++ b/Eigen/src/Eigenvalues/RealQZ.h
@@ -64,7 +64,7 @@ enum { RowsAtCompileTime = MatrixType::RowsAtCompileTime, ColsAtCompileTime = MatrixType::ColsAtCompileTime, - Options = MatrixType::Options, + Options = internal::traits<MatrixType>::Options, MaxRowsAtCompileTime = MatrixType::MaxRowsAtCompileTime, MaxColsAtCompileTime = MatrixType::MaxColsAtCompileTime };
diff --git a/Eigen/src/Eigenvalues/RealSchur.h b/Eigen/src/Eigenvalues/RealSchur.h index 1ac9af8..970500c 100644 --- a/Eigen/src/Eigenvalues/RealSchur.h +++ b/Eigen/src/Eigenvalues/RealSchur.h
@@ -61,7 +61,7 @@ enum { RowsAtCompileTime = MatrixType::RowsAtCompileTime, ColsAtCompileTime = MatrixType::ColsAtCompileTime, - Options = MatrixType::Options, + Options = internal::traits<MatrixType>::Options, MaxRowsAtCompileTime = MatrixType::MaxRowsAtCompileTime, MaxColsAtCompileTime = MatrixType::MaxColsAtCompileTime };
diff --git a/Eigen/src/Eigenvalues/SelfAdjointEigenSolver.h b/Eigen/src/Eigenvalues/SelfAdjointEigenSolver.h index 9511e68..f84da91 100644 --- a/Eigen/src/Eigenvalues/SelfAdjointEigenSolver.h +++ b/Eigen/src/Eigenvalues/SelfAdjointEigenSolver.h
@@ -85,7 +85,7 @@ enum { Size = MatrixType::RowsAtCompileTime, ColsAtCompileTime = MatrixType::ColsAtCompileTime, - Options = MatrixType::Options, + Options = internal::traits<MatrixType>::Options, MaxColsAtCompileTime = MatrixType::MaxColsAtCompileTime };
diff --git a/Eigen/src/Eigenvalues/Tridiagonalization.h b/Eigen/src/Eigenvalues/Tridiagonalization.h index 76158e9..9bb791d 100644 --- a/Eigen/src/Eigenvalues/Tridiagonalization.h +++ b/Eigen/src/Eigenvalues/Tridiagonalization.h
@@ -75,7 +75,7 @@ enum { Size = MatrixType::RowsAtCompileTime, SizeMinusOne = Size == Dynamic ? Dynamic : (Size > 1 ? Size - 1 : 1), - Options = MatrixType::Options, + Options = internal::traits<MatrixType>::Options, MaxSize = MatrixType::MaxRowsAtCompileTime, MaxSizeMinusOne = MaxSize == Dynamic ? Dynamic : (MaxSize > 1 ? MaxSize - 1 : 1) };
diff --git a/Eigen/src/LU/FullPivLU.h b/Eigen/src/LU/FullPivLU.h index 57d049c..466834a 100644 --- a/Eigen/src/LU/FullPivLU.h +++ b/Eigen/src/LU/FullPivLU.h
@@ -622,7 +622,7 @@ // permuting the rows and cols to bring the nonnegligible pivots to the top of // the main diagonal. We need that to be able to apply our triangular solvers. // FIXME when we get triangularView-for-rectangular-matrices, this can be simplified - Matrix<typename MatrixType::Scalar, Dynamic, Dynamic, MatrixType::Options, MaxSmallDimAtCompileTime, + Matrix<typename MatrixType::Scalar, Dynamic, Dynamic, traits<MatrixType>::Options, MaxSmallDimAtCompileTime, MatrixType::MaxColsAtCompileTime> m(dec().matrixLU().block(0, 0, rank(), cols)); for (Index i = 0; i < rank(); ++i) {
diff --git a/Eigen/src/SVD/BDCSVD.h b/Eigen/src/SVD/BDCSVD.h index 7948ca3..f80ddc0 100644 --- a/Eigen/src/SVD/BDCSVD.h +++ b/Eigen/src/SVD/BDCSVD.h
@@ -1126,13 +1126,6 @@ << "j=" << j << "\n"; } #endif - // Avoid index out of bounds. - // Will end up setting zhat(k) = 0. - if (i >= k && l == 0) { - m_info = NumericalIssue; - prod = 0; - break; - } Index j = i < k ? i : l > 0 ? perm(l - 1) : i; #ifdef EIGEN_BDCSVD_SANITY_CHECKS if (!(dk != Literal(0) || diag(i) != Literal(0))) { @@ -1205,7 +1198,7 @@ // page 12_13 // i >= 1, di almost null and zi non null. -// We use a rotation to zero out zi applied to the left of M +// We use a rotation to zero out zi applied to the left of M, and set di = 0. template <typename MatrixType, int Options> void BDCSVD<MatrixType, Options>::deflation43(Index firstCol, Index shift, Index i, Index size) { using std::abs; @@ -1231,9 +1224,8 @@ } // end deflation 43 // page 13 -// i,j >= 1, i!=j and |di - dj| < epsilon * norm2(M) -// We apply two rotations to have zj = 0; -// TODO deflation44 is still broken and not properly tested +// i,j >= 1, i > j, and |di - dj| < epsilon * norm2(M) +// We apply two rotations to have zi = 0, and dj = di. template <typename MatrixType, int Options> void BDCSVD<MatrixType, Options>::deflation44(Index firstColu, Index firstColm, Index firstRowW, Index firstColW, Index i, Index j, Index size) { @@ -1241,9 +1233,10 @@ using std::conj; using std::pow; using std::sqrt; - RealScalar c = m_computed(firstColm + i, firstColm); - RealScalar s = m_computed(firstColm + j, firstColm); - RealScalar r = sqrt(numext::abs2(c) + numext::abs2(s)); + + RealScalar s = m_computed(firstColm + i, firstColm); + RealScalar c = m_computed(firstColm + j, firstColm); + RealScalar r = numext::hypot(c, s); #ifdef EIGEN_BDCSVD_DEBUG_VERBOSE std::cout << "deflation 4.4: " << i << "," << j << " -> " << c << " " << s << " " << r << " ; " << m_computed(firstColm + i - 1, firstColm) << " " << m_computed(firstColm + i, firstColm) << " " @@ -1253,21 +1246,21 @@ << m_computed(firstColm + i + 2, firstColm + i + 2) << "\n"; #endif if (numext::is_exactly_zero(r)) { - m_computed(firstColm + i, firstColm + i) = m_computed(firstColm + j, firstColm + j); + m_computed(firstColm + j, firstColm + j) = m_computed(firstColm + i, firstColm + i); return; } c /= r; s /= r; - m_computed(firstColm + i, firstColm) = r; + m_computed(firstColm + j, firstColm) = r; m_computed(firstColm + j, firstColm + j) = m_computed(firstColm + i, firstColm + i); - m_computed(firstColm + j, firstColm) = Literal(0); + m_computed(firstColm + i, firstColm) = Literal(0); JacobiRotation<RealScalar> J(c, -s); if (m_compU) - m_naiveU.middleRows(firstColu, size + 1).applyOnTheRight(firstColu + i, firstColu + j, J); + m_naiveU.middleRows(firstColu, size + 1).applyOnTheRight(firstColu + j, firstColu + i, J); else - m_naiveU.applyOnTheRight(firstColu + i, firstColu + j, J); - if (m_compV) m_naiveV.middleRows(firstRowW, size).applyOnTheRight(firstColW + i, firstColW + j, J); + m_naiveU.applyOnTheRight(firstColu + j, firstColu + i, J); + if (m_compV) m_naiveV.middleRows(firstRowW, size).applyOnTheRight(firstColW + j, firstColW + i, J); } // end deflation 44 // acts on block from (firstCol+shift, firstCol+shift) to (lastCol+shift, lastCol+shift) [inclusive] @@ -1350,7 +1343,7 @@ // Move deflated diagonal entries at the end. for (Index i = 1; i < length; ++i) - if (abs(diag(i)) < considerZero) permutation[p++] = i; + if (diag(i) < considerZero) permutation[p++] = i; Index i = 1, j = k + 1; for (; p < length; ++p) { @@ -1369,7 +1362,7 @@ if (total_deflation) { for (Index i = 1; i < length; ++i) { Index pi = permutation[i]; - if (abs(diag(pi)) < considerZero || diag(0) < diag(pi)) + if (diag(pi) < considerZero || diag(0) < diag(pi)) permutation[i - 1] = permutation[i]; else { permutation[i - 1] = 0; @@ -1424,17 +1417,19 @@ // condition 4.4 { Index i = length - 1; - while (i > 0 && (abs(diag(i)) < considerZero || abs(col0(i)) < considerZero)) --i; + // Find last non-deflated entry. + while (i > 0 && (diag(i) < considerZero || abs(col0(i)) < considerZero)) --i; + for (; i > 1; --i) - if ((diag(i) - diag(i - 1)) < NumTraits<RealScalar>::epsilon() * maxDiag) { + if ((diag(i) - diag(i - 1)) < epsilon_strict) { #ifdef EIGEN_BDCSVD_DEBUG_VERBOSE std::cout << "deflation 4.4 with i = " << i << " because " << diag(i) << " - " << diag(i - 1) << " == " << (diag(i) - diag(i - 1)) << " < " - << NumTraits<RealScalar>::epsilon() * /*diag(i)*/ maxDiag << "\n"; + << epsilon_strict << "\n"; #endif eigen_internal_assert(abs(diag(i) - diag(i - 1)) < epsilon_coarse && " diagonal entries are not properly sorted"); - deflation44(firstCol, firstCol + shift, firstRowW, firstColW, i - 1, i, length); + deflation44(firstCol, firstCol + shift, firstRowW, firstColW, i, i - 1, length); } }
diff --git a/Eigen/src/SVD/JacobiSVD.h b/Eigen/src/SVD/JacobiSVD.h index aec1931..cb41123 100644 --- a/Eigen/src/SVD/JacobiSVD.h +++ b/Eigen/src/SVD/JacobiSVD.h
@@ -105,7 +105,7 @@ ColsAtCompileTime = MatrixType::ColsAtCompileTime, MaxRowsAtCompileTime = MatrixType::MaxRowsAtCompileTime, MaxColsAtCompileTime = MatrixType::MaxColsAtCompileTime, - MatrixOptions = MatrixType::Options + MatrixOptions = traits<MatrixType>::Options }; typedef typename internal::make_proper_matrix_type<Scalar, ColsAtCompileTime, RowsAtCompileTime, MatrixOptions, @@ -202,7 +202,7 @@ ColsAtCompileTime = MatrixType::ColsAtCompileTime, MaxRowsAtCompileTime = MatrixType::MaxRowsAtCompileTime, MaxColsAtCompileTime = MatrixType::MaxColsAtCompileTime, - MatrixOptions = MatrixType::Options, + MatrixOptions = internal::traits<MatrixType>::Options, WorkspaceSize = internal::traits<SVDType>::MatrixVColsAtCompileTime, MaxWorkspaceSize = internal::traits<SVDType>::MatrixVMaxColsAtCompileTime }; @@ -310,7 +310,7 @@ ColsAtCompileTime = MatrixType::ColsAtCompileTime, MaxRowsAtCompileTime = MatrixType::MaxRowsAtCompileTime, MaxColsAtCompileTime = MatrixType::MaxColsAtCompileTime, - MatrixOptions = MatrixType::Options, + MatrixOptions = internal::traits<MatrixType>::Options, WorkspaceSize = internal::traits<SVDType>::MatrixVColsAtCompileTime, MaxWorkspaceSize = internal::traits<SVDType>::MatrixVMaxColsAtCompileTime };
diff --git a/Eigen/src/SVD/SVDBase.h b/Eigen/src/SVD/SVDBase.h index ae2843b..5f04647 100644 --- a/Eigen/src/SVD/SVDBase.h +++ b/Eigen/src/SVD/SVDBase.h
@@ -139,7 +139,7 @@ MaxRowsAtCompileTime = MatrixType::MaxRowsAtCompileTime, MaxColsAtCompileTime = MatrixType::MaxColsAtCompileTime, MaxDiagSizeAtCompileTime = internal::min_size_prefer_fixed(MaxRowsAtCompileTime, MaxColsAtCompileTime), - MatrixOptions = MatrixType::Options, + MatrixOptions = internal::traits<MatrixType>::Options, MatrixUColsAtCompileTime = internal::traits<Derived>::MatrixUColsAtCompileTime, MatrixVColsAtCompileTime = internal::traits<Derived>::MatrixVColsAtCompileTime, MatrixUMaxColsAtCompileTime = internal::traits<Derived>::MatrixUMaxColsAtCompileTime,
diff --git a/Eigen/src/SparseCore/SparseMatrix.h b/Eigen/src/SparseCore/SparseMatrix.h index 19dd40c..81b0a11 100644 --- a/Eigen/src/SparseCore/SparseMatrix.h +++ b/Eigen/src/SparseCore/SparseMatrix.h
@@ -58,6 +58,7 @@ ColsAtCompileTime = Dynamic, MaxRowsAtCompileTime = Dynamic, MaxColsAtCompileTime = Dynamic, + Options = Options_, Flags = Options_ | NestByRefBit | LvalueBit | CompressedAccessBit, SupportedAccessPatterns = InnerRandomAccessPattern };
diff --git a/Eigen/src/misc/Image.h b/Eigen/src/misc/Image.h index fd1ac99..38d516e 100644 --- a/Eigen/src/misc/Image.h +++ b/Eigen/src/misc/Image.h
@@ -27,7 +27,7 @@ MatrixType::RowsAtCompileTime, // the image is a subspace of the destination space, whose // dimension is the number of rows of the original matrix Dynamic, // we don't know at compile time the dimension of the image (the rank) - MatrixType::Options, + traits<MatrixType>::Options, MatrixType::MaxRowsAtCompileTime, // the image matrix will consist of columns from the original // matrix, MatrixType::MaxColsAtCompileTime // so it has the same number of rows and at most as many columns.
diff --git a/Eigen/src/misc/Kernel.h b/Eigen/src/misc/Kernel.h index 55c3efe..3ed458b 100644 --- a/Eigen/src/misc/Kernel.h +++ b/Eigen/src/misc/Kernel.h
@@ -28,7 +28,7 @@ // is the number of cols of the original matrix // so that the product "matrix * kernel = zero" makes sense Dynamic, // we don't know at compile-time the dimension of the kernel - MatrixType::Options, + traits<MatrixType>::Options, MatrixType::MaxColsAtCompileTime, // see explanation for 2nd template parameter MatrixType::MaxColsAtCompileTime // the kernel is a subspace of the domain space, // whose dimension is the number of columns of the original matrix
diff --git a/Eigen/src/misc/blas.h b/Eigen/src/misc/blas.h index bb133bb..f12bc7c 100644 --- a/Eigen/src/misc/blas.h +++ b/Eigen/src/misc/blas.h
@@ -7,89 +7,89 @@ /* Level 1 routines */ -int BLASFUNC(saxpy)(const int *, const float *, const float *, const int *, float *, const int *); -int BLASFUNC(daxpy)(const int *, const double *, const double *, const int *, double *, const int *); -int BLASFUNC(caxpy)(const int *, const float *, const float *, const int *, float *, const int *); -int BLASFUNC(zaxpy)(const int *, const double *, const double *, const int *, double *, const int *); +void BLASFUNC(saxpy)(const int *, const float *, const float *, const int *, float *, const int *); +void BLASFUNC(daxpy)(const int *, const double *, const double *, const int *, double *, const int *); +void BLASFUNC(caxpy)(const int *, const float *, const float *, const int *, float *, const int *); +void BLASFUNC(zaxpy)(const int *, const double *, const double *, const int *, double *, const int *); /* Level 2 routines */ -int BLASFUNC(sgemv)(const char *, const int *, const int *, const float *, const float *, const int *, const float *, - const int *, const float *, float *, const int *); -int BLASFUNC(dgemv)(const char *, const int *, const int *, const double *, const double *, const int *, const double *, - const int *, const double *, double *, const int *); -int BLASFUNC(cgemv)(const char *, const int *, const int *, const float *, const float *, const int *, const float *, - const int *, const float *, float *, const int *); -int BLASFUNC(zgemv)(const char *, const int *, const int *, const double *, const double *, const int *, const double *, - const int *, const double *, double *, const int *); +void BLASFUNC(sgemv)(const char *, const int *, const int *, const float *, const float *, const int *, const float *, + const int *, const float *, float *, const int *); +void BLASFUNC(dgemv)(const char *, const int *, const int *, const double *, const double *, const int *, + const double *, const int *, const double *, double *, const int *); +void BLASFUNC(cgemv)(const char *, const int *, const int *, const float *, const float *, const int *, const float *, + const int *, const float *, float *, const int *); +void BLASFUNC(zgemv)(const char *, const int *, const int *, const double *, const double *, const int *, + const double *, const int *, const double *, double *, const int *); -int BLASFUNC(strmv)(const char *, const char *, const char *, const int *, const float *, const int *, float *, - const int *); -int BLASFUNC(dtrmv)(const char *, const char *, const char *, const int *, const double *, const int *, double *, - const int *); -int BLASFUNC(ctrmv)(const char *, const char *, const char *, const int *, const float *, const int *, float *, - const int *); -int BLASFUNC(ztrmv)(const char *, const char *, const char *, const int *, const double *, const int *, double *, - const int *); +void BLASFUNC(strmv)(const char *, const char *, const char *, const int *, const float *, const int *, float *, + const int *); +void BLASFUNC(dtrmv)(const char *, const char *, const char *, const int *, const double *, const int *, double *, + const int *); +void BLASFUNC(ctrmv)(const char *, const char *, const char *, const int *, const float *, const int *, float *, + const int *); +void BLASFUNC(ztrmv)(const char *, const char *, const char *, const int *, const double *, const int *, double *, + const int *); -int BLASFUNC(ssymv)(const char *, const int *, const float *, const float *, const int *, const float *, const int *, - const float *, float *, const int *); -int BLASFUNC(dsymv)(const char *, const int *, const double *, const double *, const int *, const double *, const int *, - const double *, double *, const int *); +void BLASFUNC(ssymv)(const char *, const int *, const float *, const float *, const int *, const float *, const int *, + const float *, float *, const int *); +void BLASFUNC(dsymv)(const char *, const int *, const double *, const double *, const int *, const double *, + const int *, const double *, double *, const int *); -int BLASFUNC(chemv)(const char *, const int *, const float *, const float *, const int *, const float *, const int *, - const float *, float *, const int *); -int BLASFUNC(zhemv)(const char *, const int *, const double *, const double *, const int *, const double *, const int *, - const double *, double *, const int *); +void BLASFUNC(chemv)(const char *, const int *, const float *, const float *, const int *, const float *, const int *, + const float *, float *, const int *); +void BLASFUNC(zhemv)(const char *, const int *, const double *, const double *, const int *, const double *, + const int *, const double *, double *, const int *); /* Level 3 routines */ -int BLASFUNC(sgemm)(const char *, const char *, const int *, const int *, const int *, const float *, const float *, - const int *, const float *, const int *, const float *, float *, const int *); -int BLASFUNC(dgemm)(const char *, const char *, const int *, const int *, const int *, const double *, const double *, - const int *, const double *, const int *, const double *, double *, const int *); -int BLASFUNC(cgemm)(const char *, const char *, const int *, const int *, const int *, const float *, const float *, - const int *, const float *, const int *, const float *, float *, const int *); -int BLASFUNC(zgemm)(const char *, const char *, const int *, const int *, const int *, const double *, const double *, - const int *, const double *, const int *, const double *, double *, const int *); +void BLASFUNC(sgemm)(const char *, const char *, const int *, const int *, const int *, const float *, const float *, + const int *, const float *, const int *, const float *, float *, const int *); +void BLASFUNC(dgemm)(const char *, const char *, const int *, const int *, const int *, const double *, const double *, + const int *, const double *, const int *, const double *, double *, const int *); +void BLASFUNC(cgemm)(const char *, const char *, const int *, const int *, const int *, const float *, const float *, + const int *, const float *, const int *, const float *, float *, const int *); +void BLASFUNC(zgemm)(const char *, const char *, const int *, const int *, const int *, const double *, const double *, + const int *, const double *, const int *, const double *, double *, const int *); -int BLASFUNC(strsm)(const char *, const char *, const char *, const char *, const int *, const int *, const float *, - const float *, const int *, float *, const int *); -int BLASFUNC(dtrsm)(const char *, const char *, const char *, const char *, const int *, const int *, const double *, - const double *, const int *, double *, const int *); -int BLASFUNC(ctrsm)(const char *, const char *, const char *, const char *, const int *, const int *, const float *, - const float *, const int *, float *, const int *); -int BLASFUNC(ztrsm)(const char *, const char *, const char *, const char *, const int *, const int *, const double *, - const double *, const int *, double *, const int *); +void BLASFUNC(strsm)(const char *, const char *, const char *, const char *, const int *, const int *, const float *, + const float *, const int *, float *, const int *); +void BLASFUNC(dtrsm)(const char *, const char *, const char *, const char *, const int *, const int *, const double *, + const double *, const int *, double *, const int *); +void BLASFUNC(ctrsm)(const char *, const char *, const char *, const char *, const int *, const int *, const float *, + const float *, const int *, float *, const int *); +void BLASFUNC(ztrsm)(const char *, const char *, const char *, const char *, const int *, const int *, const double *, + const double *, const int *, double *, const int *); -int BLASFUNC(strmm)(const char *, const char *, const char *, const char *, const int *, const int *, const float *, - const float *, const int *, float *, const int *); -int BLASFUNC(dtrmm)(const char *, const char *, const char *, const char *, const int *, const int *, const double *, - const double *, const int *, double *, const int *); -int BLASFUNC(ctrmm)(const char *, const char *, const char *, const char *, const int *, const int *, const float *, - const float *, const int *, float *, const int *); -int BLASFUNC(ztrmm)(const char *, const char *, const char *, const char *, const int *, const int *, const double *, - const double *, const int *, double *, const int *); +void BLASFUNC(strmm)(const char *, const char *, const char *, const char *, const int *, const int *, const float *, + const float *, const int *, float *, const int *); +void BLASFUNC(dtrmm)(const char *, const char *, const char *, const char *, const int *, const int *, const double *, + const double *, const int *, double *, const int *); +void BLASFUNC(ctrmm)(const char *, const char *, const char *, const char *, const int *, const int *, const float *, + const float *, const int *, float *, const int *); +void BLASFUNC(ztrmm)(const char *, const char *, const char *, const char *, const int *, const int *, const double *, + const double *, const int *, double *, const int *); -int BLASFUNC(ssymm)(const char *, const char *, const int *, const int *, const float *, const float *, const int *, - const float *, const int *, const float *, float *, const int *); -int BLASFUNC(dsymm)(const char *, const char *, const int *, const int *, const double *, const double *, const int *, - const double *, const int *, const double *, double *, const int *); +void BLASFUNC(ssymm)(const char *, const char *, const int *, const int *, const float *, const float *, const int *, + const float *, const int *, const float *, float *, const int *); +void BLASFUNC(dsymm)(const char *, const char *, const int *, const int *, const double *, const double *, const int *, + const double *, const int *, const double *, double *, const int *); -int BLASFUNC(ssyrk)(const char *, const char *, const int *, const int *, const float *, const float *, const int *, - const float *, float *, const int *); -int BLASFUNC(dsyrk)(const char *, const char *, const int *, const int *, const double *, const double *, const int *, - const double *, double *, const int *); +void BLASFUNC(ssyrk)(const char *, const char *, const int *, const int *, const float *, const float *, const int *, + const float *, float *, const int *); +void BLASFUNC(dsyrk)(const char *, const char *, const int *, const int *, const double *, const double *, const int *, + const double *, double *, const int *); -int BLASFUNC(chemm)(const char *, const char *, const int *, const int *, const float *, const float *, const int *, - const float *, const int *, const float *, float *, const int *); -int BLASFUNC(zhemm)(const char *, const char *, const int *, const int *, const double *, const double *, const int *, - const double *, const int *, const double *, double *, const int *); +void BLASFUNC(chemm)(const char *, const char *, const int *, const int *, const float *, const float *, const int *, + const float *, const int *, const float *, float *, const int *); +void BLASFUNC(zhemm)(const char *, const char *, const int *, const int *, const double *, const double *, const int *, + const double *, const int *, const double *, double *, const int *); -int BLASFUNC(cherk)(const char *, const char *, const int *, const int *, const float *, const float *, const int *, - const float *, float *, const int *); -int BLASFUNC(zherk)(const char *, const char *, const int *, const int *, const double *, const double *, const int *, - const double *, double *, const int *); +void BLASFUNC(cherk)(const char *, const char *, const int *, const int *, const float *, const float *, const int *, + const float *, float *, const int *); +void BLASFUNC(zherk)(const char *, const char *, const int *, const int *, const double *, const double *, const int *, + const double *, double *, const int *); #undef BLASFUNC }
diff --git a/blas/CMakeLists.txt b/blas/CMakeLists.txt index c530957..4ae0603 100644 --- a/blas/CMakeLists.txt +++ b/blas/CMakeLists.txt
@@ -18,7 +18,7 @@ f2c/lsame.c f2c/dspmv.c f2c/ssbmv.c f2c/chbmv.c f2c/sspmv.c f2c/zhbmv.c f2c/chpmv.c f2c/dsbmv.c f2c/zhpmv.c f2c/dtbmv.c f2c/stbmv.c f2c/ctbmv.c - f2c/ztbmv.c f2c/d_cnjg.c f2c/r_cnjg.c + f2c/ztbmv.c ) if (EIGEN_Fortran_COMPILER_WORKS)
diff --git a/blas/blas.h b/blas/blas.h index 69a9262..8962dc9 100644 --- a/blas/blas.h +++ b/blas/blas.h
@@ -15,7 +15,7 @@ typedef unsigned long BLASULONG; #endif -int BLASFUNC(xerbla)(const char *, int *info, int); +void BLASFUNC(xerbla)(const char *, int *info); float BLASFUNC(sdot)(int *, float *, int *, float *, int *); float BLASFUNC(sdsdot)(int *, float *, float *, int *, float *, int *); @@ -24,34 +24,34 @@ double BLASFUNC(ddot)(int *, double *, int *, double *, int *); double BLASFUNC(qdot)(int *, double *, int *, double *, int *); -int BLASFUNC(cdotuw)(int *, float *, int *, float *, int *, float *); -int BLASFUNC(cdotcw)(int *, float *, int *, float *, int *, float *); -int BLASFUNC(zdotuw)(int *, double *, int *, double *, int *, double *); -int BLASFUNC(zdotcw)(int *, double *, int *, double *, int *, double *); +void BLASFUNC(cdotuw)(int *, float *, int *, float *, int *, float *); +void BLASFUNC(cdotcw)(int *, float *, int *, float *, int *, float *); +void BLASFUNC(zdotuw)(int *, double *, int *, double *, int *, double *); +void BLASFUNC(zdotcw)(int *, double *, int *, double *, int *, double *); -int BLASFUNC(saxpy)(const int *, const float *, const float *, const int *, float *, const int *); -int BLASFUNC(daxpy)(const int *, const double *, const double *, const int *, double *, const int *); -int BLASFUNC(qaxpy)(const int *, const double *, const double *, const int *, double *, const int *); -int BLASFUNC(caxpy)(const int *, const float *, const float *, const int *, float *, const int *); -int BLASFUNC(zaxpy)(const int *, const double *, const double *, const int *, double *, const int *); -int BLASFUNC(xaxpy)(const int *, const double *, const double *, const int *, double *, const int *); -int BLASFUNC(caxpyc)(const int *, const float *, const float *, const int *, float *, const int *); -int BLASFUNC(zaxpyc)(const int *, const double *, const double *, const int *, double *, const int *); -int BLASFUNC(xaxpyc)(const int *, const double *, const double *, const int *, double *, const int *); +void BLASFUNC(saxpy)(const int *, const float *, const float *, const int *, float *, const int *); +void BLASFUNC(daxpy)(const int *, const double *, const double *, const int *, double *, const int *); +void BLASFUNC(qaxpy)(const int *, const double *, const double *, const int *, double *, const int *); +void BLASFUNC(caxpy)(const int *, const float *, const float *, const int *, float *, const int *); +void BLASFUNC(zaxpy)(const int *, const double *, const double *, const int *, double *, const int *); +void BLASFUNC(xaxpy)(const int *, const double *, const double *, const int *, double *, const int *); +void BLASFUNC(caxpyc)(const int *, const float *, const float *, const int *, float *, const int *); +void BLASFUNC(zaxpyc)(const int *, const double *, const double *, const int *, double *, const int *); +void BLASFUNC(xaxpyc)(const int *, const double *, const double *, const int *, double *, const int *); -int BLASFUNC(scopy)(int *, float *, int *, float *, int *); -int BLASFUNC(dcopy)(int *, double *, int *, double *, int *); -int BLASFUNC(qcopy)(int *, double *, int *, double *, int *); -int BLASFUNC(ccopy)(int *, float *, int *, float *, int *); -int BLASFUNC(zcopy)(int *, double *, int *, double *, int *); -int BLASFUNC(xcopy)(int *, double *, int *, double *, int *); +void BLASFUNC(scopy)(int *, float *, int *, float *, int *); +void BLASFUNC(dcopy)(int *, double *, int *, double *, int *); +void BLASFUNC(qcopy)(int *, double *, int *, double *, int *); +void BLASFUNC(ccopy)(int *, float *, int *, float *, int *); +void BLASFUNC(zcopy)(int *, double *, int *, double *, int *); +void BLASFUNC(xcopy)(int *, double *, int *, double *, int *); -int BLASFUNC(sswap)(int *, float *, int *, float *, int *); -int BLASFUNC(dswap)(int *, double *, int *, double *, int *); -int BLASFUNC(qswap)(int *, double *, int *, double *, int *); -int BLASFUNC(cswap)(int *, float *, int *, float *, int *); -int BLASFUNC(zswap)(int *, double *, int *, double *, int *); -int BLASFUNC(xswap)(int *, double *, int *, double *, int *); +void BLASFUNC(sswap)(int *, float *, int *, float *, int *); +void BLASFUNC(dswap)(int *, double *, int *, double *, int *); +void BLASFUNC(qswap)(int *, double *, int *, double *, int *); +void BLASFUNC(cswap)(int *, float *, int *, float *, int *); +void BLASFUNC(zswap)(int *, double *, int *, double *, int *); +void BLASFUNC(xswap)(int *, double *, int *, double *, int *); float BLASFUNC(sasum)(int *, float *, int *); float BLASFUNC(scasum)(int *, float *, int *); @@ -116,15 +116,15 @@ double BLASFUNC(dzmin)(int *, double *, int *); double BLASFUNC(qxmin)(int *, double *, int *); -int BLASFUNC(sscal)(int *, float *, float *, int *); -int BLASFUNC(dscal)(int *, double *, double *, int *); -int BLASFUNC(qscal)(int *, double *, double *, int *); -int BLASFUNC(cscal)(int *, float *, float *, int *); -int BLASFUNC(zscal)(int *, double *, double *, int *); -int BLASFUNC(xscal)(int *, double *, double *, int *); -int BLASFUNC(csscal)(int *, float *, float *, int *); -int BLASFUNC(zdscal)(int *, double *, double *, int *); -int BLASFUNC(xqscal)(int *, double *, double *, int *); +void BLASFUNC(sscal)(int *, float *, float *, int *); +void BLASFUNC(dscal)(int *, double *, double *, int *); +void BLASFUNC(qscal)(int *, double *, double *, int *); +void BLASFUNC(cscal)(int *, float *, float *, int *); +void BLASFUNC(zscal)(int *, double *, double *, int *); +void BLASFUNC(xscal)(int *, double *, double *, int *); +void BLASFUNC(csscal)(int *, float *, float *, int *); +void BLASFUNC(zdscal)(int *, double *, double *, int *); +void BLASFUNC(xqscal)(int *, double *, double *, int *); float BLASFUNC(snrm2)(int *, float *, int *); float BLASFUNC(scnrm2)(int *, float *, int *); @@ -134,335 +134,335 @@ double BLASFUNC(dznrm2)(int *, double *, int *); double BLASFUNC(qxnrm2)(int *, double *, int *); -int BLASFUNC(srot)(int *, float *, int *, float *, int *, float *, float *); -int BLASFUNC(drot)(int *, double *, int *, double *, int *, double *, double *); -int BLASFUNC(qrot)(int *, double *, int *, double *, int *, double *, double *); -int BLASFUNC(csrot)(int *, float *, int *, float *, int *, float *, float *); -int BLASFUNC(zdrot)(int *, double *, int *, double *, int *, double *, double *); -int BLASFUNC(xqrot)(int *, double *, int *, double *, int *, double *, double *); +void BLASFUNC(srot)(int *, float *, int *, float *, int *, float *, float *); +void BLASFUNC(drot)(int *, double *, int *, double *, int *, double *, double *); +void BLASFUNC(qrot)(int *, double *, int *, double *, int *, double *, double *); +void BLASFUNC(csrot)(int *, float *, int *, float *, int *, float *, float *); +void BLASFUNC(zdrot)(int *, double *, int *, double *, int *, double *, double *); +void BLASFUNC(xqrot)(int *, double *, int *, double *, int *, double *, double *); -int BLASFUNC(srotg)(float *, float *, float *, float *); -int BLASFUNC(drotg)(double *, double *, double *, double *); -int BLASFUNC(qrotg)(double *, double *, double *, double *); -int BLASFUNC(crotg)(float *, float *, float *, float *); -int BLASFUNC(zrotg)(double *, double *, double *, double *); -int BLASFUNC(xrotg)(double *, double *, double *, double *); +void BLASFUNC(srotg)(float *, float *, float *, float *); +void BLASFUNC(drotg)(double *, double *, double *, double *); +void BLASFUNC(qrotg)(double *, double *, double *, double *); +void BLASFUNC(crotg)(float *, float *, float *, float *); +void BLASFUNC(zrotg)(double *, double *, double *, double *); +void BLASFUNC(xrotg)(double *, double *, double *, double *); -int BLASFUNC(srotmg)(float *, float *, float *, float *, float *); -int BLASFUNC(drotmg)(double *, double *, double *, double *, double *); +void BLASFUNC(srotmg)(float *, float *, float *, float *, float *); +void BLASFUNC(drotmg)(double *, double *, double *, double *, double *); -int BLASFUNC(srotm)(int *, float *, int *, float *, int *, float *); -int BLASFUNC(drotm)(int *, double *, int *, double *, int *, double *); -int BLASFUNC(qrotm)(int *, double *, int *, double *, int *, double *); +void BLASFUNC(srotm)(int *, float *, int *, float *, int *, float *); +void BLASFUNC(drotm)(int *, double *, int *, double *, int *, double *); +void BLASFUNC(qrotm)(int *, double *, int *, double *, int *, double *); /* Level 2 routines */ -int BLASFUNC(sger)(int *, int *, float *, float *, int *, float *, int *, float *, int *); -int BLASFUNC(dger)(int *, int *, double *, double *, int *, double *, int *, double *, int *); -int BLASFUNC(qger)(int *, int *, double *, double *, int *, double *, int *, double *, int *); -int BLASFUNC(cgeru)(int *, int *, float *, float *, int *, float *, int *, float *, int *); -int BLASFUNC(cgerc)(int *, int *, float *, float *, int *, float *, int *, float *, int *); -int BLASFUNC(zgeru)(int *, int *, double *, double *, int *, double *, int *, double *, int *); -int BLASFUNC(zgerc)(int *, int *, double *, double *, int *, double *, int *, double *, int *); -int BLASFUNC(xgeru)(int *, int *, double *, double *, int *, double *, int *, double *, int *); -int BLASFUNC(xgerc)(int *, int *, double *, double *, int *, double *, int *, double *, int *); +void BLASFUNC(sger)(int *, int *, float *, float *, int *, float *, int *, float *, int *); +void BLASFUNC(dger)(int *, int *, double *, double *, int *, double *, int *, double *, int *); +void BLASFUNC(qger)(int *, int *, double *, double *, int *, double *, int *, double *, int *); +void BLASFUNC(cgeru)(int *, int *, float *, float *, int *, float *, int *, float *, int *); +void BLASFUNC(cgerc)(int *, int *, float *, float *, int *, float *, int *, float *, int *); +void BLASFUNC(zgeru)(int *, int *, double *, double *, int *, double *, int *, double *, int *); +void BLASFUNC(zgerc)(int *, int *, double *, double *, int *, double *, int *, double *, int *); +void BLASFUNC(xgeru)(int *, int *, double *, double *, int *, double *, int *, double *, int *); +void BLASFUNC(xgerc)(int *, int *, double *, double *, int *, double *, int *, double *, int *); -int BLASFUNC(sgemv)(const char *, const int *, const int *, const float *, const float *, const int *, const float *, - const int *, const float *, float *, const int *); -int BLASFUNC(dgemv)(const char *, const int *, const int *, const double *, const double *, const int *, const double *, - const int *, const double *, double *, const int *); -int BLASFUNC(qgemv)(const char *, const int *, const int *, const double *, const double *, const int *, const double *, - const int *, const double *, double *, const int *); -int BLASFUNC(cgemv)(const char *, const int *, const int *, const float *, const float *, const int *, const float *, - const int *, const float *, float *, const int *); -int BLASFUNC(zgemv)(const char *, const int *, const int *, const double *, const double *, const int *, const double *, - const int *, const double *, double *, const int *); -int BLASFUNC(xgemv)(const char *, const int *, const int *, const double *, const double *, const int *, const double *, - const int *, const double *, double *, const int *); +void BLASFUNC(sgemv)(const char *, const int *, const int *, const float *, const float *, const int *, const float *, + const int *, const float *, float *, const int *); +void BLASFUNC(dgemv)(const char *, const int *, const int *, const double *, const double *, const int *, + const double *, const int *, const double *, double *, const int *); +void BLASFUNC(qgemv)(const char *, const int *, const int *, const double *, const double *, const int *, + const double *, const int *, const double *, double *, const int *); +void BLASFUNC(cgemv)(const char *, const int *, const int *, const float *, const float *, const int *, const float *, + const int *, const float *, float *, const int *); +void BLASFUNC(zgemv)(const char *, const int *, const int *, const double *, const double *, const int *, + const double *, const int *, const double *, double *, const int *); +void BLASFUNC(xgemv)(const char *, const int *, const int *, const double *, const double *, const int *, + const double *, const int *, const double *, double *, const int *); -int BLASFUNC(strsv)(const char *, const char *, const char *, const int *, const float *, const int *, float *, - const int *); -int BLASFUNC(dtrsv)(const char *, const char *, const char *, const int *, const double *, const int *, double *, - const int *); -int BLASFUNC(qtrsv)(const char *, const char *, const char *, const int *, const double *, const int *, double *, - const int *); -int BLASFUNC(ctrsv)(const char *, const char *, const char *, const int *, const float *, const int *, float *, - const int *); -int BLASFUNC(ztrsv)(const char *, const char *, const char *, const int *, const double *, const int *, double *, - const int *); -int BLASFUNC(xtrsv)(const char *, const char *, const char *, const int *, const double *, const int *, double *, - const int *); +void BLASFUNC(strsv)(const char *, const char *, const char *, const int *, const float *, const int *, float *, + const int *); +void BLASFUNC(dtrsv)(const char *, const char *, const char *, const int *, const double *, const int *, double *, + const int *); +void BLASFUNC(qtrsv)(const char *, const char *, const char *, const int *, const double *, const int *, double *, + const int *); +void BLASFUNC(ctrsv)(const char *, const char *, const char *, const int *, const float *, const int *, float *, + const int *); +void BLASFUNC(ztrsv)(const char *, const char *, const char *, const int *, const double *, const int *, double *, + const int *); +void BLASFUNC(xtrsv)(const char *, const char *, const char *, const int *, const double *, const int *, double *, + const int *); -int BLASFUNC(stpsv)(char *, char *, char *, int *, float *, float *, int *); -int BLASFUNC(dtpsv)(char *, char *, char *, int *, double *, double *, int *); -int BLASFUNC(qtpsv)(char *, char *, char *, int *, double *, double *, int *); -int BLASFUNC(ctpsv)(char *, char *, char *, int *, float *, float *, int *); -int BLASFUNC(ztpsv)(char *, char *, char *, int *, double *, double *, int *); -int BLASFUNC(xtpsv)(char *, char *, char *, int *, double *, double *, int *); +void BLASFUNC(stpsv)(char *, char *, char *, int *, float *, float *, int *); +void BLASFUNC(dtpsv)(char *, char *, char *, int *, double *, double *, int *); +void BLASFUNC(qtpsv)(char *, char *, char *, int *, double *, double *, int *); +void BLASFUNC(ctpsv)(char *, char *, char *, int *, float *, float *, int *); +void BLASFUNC(ztpsv)(char *, char *, char *, int *, double *, double *, int *); +void BLASFUNC(xtpsv)(char *, char *, char *, int *, double *, double *, int *); -int BLASFUNC(strmv)(const char *, const char *, const char *, const int *, const float *, const int *, float *, - const int *); -int BLASFUNC(dtrmv)(const char *, const char *, const char *, const int *, const double *, const int *, double *, - const int *); -int BLASFUNC(qtrmv)(const char *, const char *, const char *, const int *, const double *, const int *, double *, - const int *); -int BLASFUNC(ctrmv)(const char *, const char *, const char *, const int *, const float *, const int *, float *, - const int *); -int BLASFUNC(ztrmv)(const char *, const char *, const char *, const int *, const double *, const int *, double *, - const int *); -int BLASFUNC(xtrmv)(const char *, const char *, const char *, const int *, const double *, const int *, double *, - const int *); +void BLASFUNC(strmv)(const char *, const char *, const char *, const int *, const float *, const int *, float *, + const int *); +void BLASFUNC(dtrmv)(const char *, const char *, const char *, const int *, const double *, const int *, double *, + const int *); +void BLASFUNC(qtrmv)(const char *, const char *, const char *, const int *, const double *, const int *, double *, + const int *); +void BLASFUNC(ctrmv)(const char *, const char *, const char *, const int *, const float *, const int *, float *, + const int *); +void BLASFUNC(ztrmv)(const char *, const char *, const char *, const int *, const double *, const int *, double *, + const int *); +void BLASFUNC(xtrmv)(const char *, const char *, const char *, const int *, const double *, const int *, double *, + const int *); -int BLASFUNC(stpmv)(char *, char *, char *, int *, float *, float *, int *); -int BLASFUNC(dtpmv)(char *, char *, char *, int *, double *, double *, int *); -int BLASFUNC(qtpmv)(char *, char *, char *, int *, double *, double *, int *); -int BLASFUNC(ctpmv)(char *, char *, char *, int *, float *, float *, int *); -int BLASFUNC(ztpmv)(char *, char *, char *, int *, double *, double *, int *); -int BLASFUNC(xtpmv)(char *, char *, char *, int *, double *, double *, int *); +void BLASFUNC(stpmv)(char *, char *, char *, int *, float *, float *, int *); +void BLASFUNC(dtpmv)(char *, char *, char *, int *, double *, double *, int *); +void BLASFUNC(qtpmv)(char *, char *, char *, int *, double *, double *, int *); +void BLASFUNC(ctpmv)(char *, char *, char *, int *, float *, float *, int *); +void BLASFUNC(ztpmv)(char *, char *, char *, int *, double *, double *, int *); +void BLASFUNC(xtpmv)(char *, char *, char *, int *, double *, double *, int *); -int BLASFUNC(stbmv)(char *, char *, char *, int *, int *, float *, int *, float *, int *); -int BLASFUNC(dtbmv)(char *, char *, char *, int *, int *, double *, int *, double *, int *); -int BLASFUNC(qtbmv)(char *, char *, char *, int *, int *, double *, int *, double *, int *); -int BLASFUNC(ctbmv)(char *, char *, char *, int *, int *, float *, int *, float *, int *); -int BLASFUNC(ztbmv)(char *, char *, char *, int *, int *, double *, int *, double *, int *); -int BLASFUNC(xtbmv)(char *, char *, char *, int *, int *, double *, int *, double *, int *); +void BLASFUNC(stbmv)(char *, char *, char *, int *, int *, float *, int *, float *, int *); +void BLASFUNC(dtbmv)(char *, char *, char *, int *, int *, double *, int *, double *, int *); +void BLASFUNC(qtbmv)(char *, char *, char *, int *, int *, double *, int *, double *, int *); +void BLASFUNC(ctbmv)(char *, char *, char *, int *, int *, float *, int *, float *, int *); +void BLASFUNC(ztbmv)(char *, char *, char *, int *, int *, double *, int *, double *, int *); +void BLASFUNC(xtbmv)(char *, char *, char *, int *, int *, double *, int *, double *, int *); -int BLASFUNC(stbsv)(char *, char *, char *, int *, int *, float *, int *, float *, int *); -int BLASFUNC(dtbsv)(char *, char *, char *, int *, int *, double *, int *, double *, int *); -int BLASFUNC(qtbsv)(char *, char *, char *, int *, int *, double *, int *, double *, int *); -int BLASFUNC(ctbsv)(char *, char *, char *, int *, int *, float *, int *, float *, int *); -int BLASFUNC(ztbsv)(char *, char *, char *, int *, int *, double *, int *, double *, int *); -int BLASFUNC(xtbsv)(char *, char *, char *, int *, int *, double *, int *, double *, int *); +void BLASFUNC(stbsv)(char *, char *, char *, int *, int *, float *, int *, float *, int *); +void BLASFUNC(dtbsv)(char *, char *, char *, int *, int *, double *, int *, double *, int *); +void BLASFUNC(qtbsv)(char *, char *, char *, int *, int *, double *, int *, double *, int *); +void BLASFUNC(ctbsv)(char *, char *, char *, int *, int *, float *, int *, float *, int *); +void BLASFUNC(ztbsv)(char *, char *, char *, int *, int *, double *, int *, double *, int *); +void BLASFUNC(xtbsv)(char *, char *, char *, int *, int *, double *, int *, double *, int *); -int BLASFUNC(ssymv)(const char *, const int *, const float *, const float *, const int *, const float *, const int *, - const float *, float *, const int *); -int BLASFUNC(dsymv)(const char *, const int *, const double *, const double *, const int *, const double *, const int *, - const double *, double *, const int *); -int BLASFUNC(qsymv)(const char *, const int *, const double *, const double *, const int *, const double *, const int *, - const double *, double *, const int *); +void BLASFUNC(ssymv)(const char *, const int *, const float *, const float *, const int *, const float *, const int *, + const float *, float *, const int *); +void BLASFUNC(dsymv)(const char *, const int *, const double *, const double *, const int *, const double *, + const int *, const double *, double *, const int *); +void BLASFUNC(qsymv)(const char *, const int *, const double *, const double *, const int *, const double *, + const int *, const double *, double *, const int *); -int BLASFUNC(sspmv)(char *, int *, float *, float *, float *, int *, float *, float *, int *); -int BLASFUNC(dspmv)(char *, int *, double *, double *, double *, int *, double *, double *, int *); -int BLASFUNC(qspmv)(char *, int *, double *, double *, double *, int *, double *, double *, int *); +void BLASFUNC(sspmv)(char *, int *, float *, float *, float *, int *, float *, float *, int *); +void BLASFUNC(dspmv)(char *, int *, double *, double *, double *, int *, double *, double *, int *); +void BLASFUNC(qspmv)(char *, int *, double *, double *, double *, int *, double *, double *, int *); -int BLASFUNC(ssyr)(const char *, const int *, const float *, const float *, const int *, float *, const int *); -int BLASFUNC(dsyr)(const char *, const int *, const double *, const double *, const int *, double *, const int *); -int BLASFUNC(qsyr)(const char *, const int *, const double *, const double *, const int *, double *, const int *); +void BLASFUNC(ssyr)(const char *, const int *, const float *, const float *, const int *, float *, const int *); +void BLASFUNC(dsyr)(const char *, const int *, const double *, const double *, const int *, double *, const int *); +void BLASFUNC(qsyr)(const char *, const int *, const double *, const double *, const int *, double *, const int *); -int BLASFUNC(ssyr2)(const char *, const int *, const float *, const float *, const int *, const float *, const int *, - float *, const int *); -int BLASFUNC(dsyr2)(const char *, const int *, const double *, const double *, const int *, const double *, const int *, - double *, const int *); -int BLASFUNC(qsyr2)(const char *, const int *, const double *, const double *, const int *, const double *, const int *, - double *, const int *); -int BLASFUNC(csyr2)(const char *, const int *, const float *, const float *, const int *, const float *, const int *, - float *, const int *); -int BLASFUNC(zsyr2)(const char *, const int *, const double *, const double *, const int *, const double *, const int *, - double *, const int *); -int BLASFUNC(xsyr2)(const char *, const int *, const double *, const double *, const int *, const double *, const int *, - double *, const int *); +void BLASFUNC(ssyr2)(const char *, const int *, const float *, const float *, const int *, const float *, const int *, + float *, const int *); +void BLASFUNC(dsyr2)(const char *, const int *, const double *, const double *, const int *, const double *, + const int *, double *, const int *); +void BLASFUNC(qsyr2)(const char *, const int *, const double *, const double *, const int *, const double *, + const int *, double *, const int *); +void BLASFUNC(csyr2)(const char *, const int *, const float *, const float *, const int *, const float *, const int *, + float *, const int *); +void BLASFUNC(zsyr2)(const char *, const int *, const double *, const double *, const int *, const double *, + const int *, double *, const int *); +void BLASFUNC(xsyr2)(const char *, const int *, const double *, const double *, const int *, const double *, + const int *, double *, const int *); -int BLASFUNC(sspr)(char *, int *, float *, float *, int *, float *); -int BLASFUNC(dspr)(char *, int *, double *, double *, int *, double *); -int BLASFUNC(qspr)(char *, int *, double *, double *, int *, double *); +void BLASFUNC(sspr)(char *, int *, float *, float *, int *, float *); +void BLASFUNC(dspr)(char *, int *, double *, double *, int *, double *); +void BLASFUNC(qspr)(char *, int *, double *, double *, int *, double *); -int BLASFUNC(sspr2)(char *, int *, float *, float *, int *, float *, int *, float *); -int BLASFUNC(dspr2)(char *, int *, double *, double *, int *, double *, int *, double *); -int BLASFUNC(qspr2)(char *, int *, double *, double *, int *, double *, int *, double *); -int BLASFUNC(cspr2)(char *, int *, float *, float *, int *, float *, int *, float *); -int BLASFUNC(zspr2)(char *, int *, double *, double *, int *, double *, int *, double *); -int BLASFUNC(xspr2)(char *, int *, double *, double *, int *, double *, int *, double *); +void BLASFUNC(sspr2)(char *, int *, float *, float *, int *, float *, int *, float *); +void BLASFUNC(dspr2)(char *, int *, double *, double *, int *, double *, int *, double *); +void BLASFUNC(qspr2)(char *, int *, double *, double *, int *, double *, int *, double *); +void BLASFUNC(cspr2)(char *, int *, float *, float *, int *, float *, int *, float *); +void BLASFUNC(zspr2)(char *, int *, double *, double *, int *, double *, int *, double *); +void BLASFUNC(xspr2)(char *, int *, double *, double *, int *, double *, int *, double *); -int BLASFUNC(cher)(char *, int *, float *, float *, int *, float *, int *); -int BLASFUNC(zher)(char *, int *, double *, double *, int *, double *, int *); -int BLASFUNC(xher)(char *, int *, double *, double *, int *, double *, int *); +void BLASFUNC(cher)(char *, int *, float *, float *, int *, float *, int *); +void BLASFUNC(zher)(char *, int *, double *, double *, int *, double *, int *); +void BLASFUNC(xher)(char *, int *, double *, double *, int *, double *, int *); -int BLASFUNC(chpr)(char *, int *, float *, float *, int *, float *); -int BLASFUNC(zhpr)(char *, int *, double *, double *, int *, double *); -int BLASFUNC(xhpr)(char *, int *, double *, double *, int *, double *); +void BLASFUNC(chpr)(char *, int *, float *, float *, int *, float *); +void BLASFUNC(zhpr)(char *, int *, double *, double *, int *, double *); +void BLASFUNC(xhpr)(char *, int *, double *, double *, int *, double *); -int BLASFUNC(cher2)(char *, int *, float *, float *, int *, float *, int *, float *, int *); -int BLASFUNC(zher2)(char *, int *, double *, double *, int *, double *, int *, double *, int *); -int BLASFUNC(xher2)(char *, int *, double *, double *, int *, double *, int *, double *, int *); +void BLASFUNC(cher2)(char *, int *, float *, float *, int *, float *, int *, float *, int *); +void BLASFUNC(zher2)(char *, int *, double *, double *, int *, double *, int *, double *, int *); +void BLASFUNC(xher2)(char *, int *, double *, double *, int *, double *, int *, double *, int *); -int BLASFUNC(chpr2)(char *, int *, float *, float *, int *, float *, int *, float *); -int BLASFUNC(zhpr2)(char *, int *, double *, double *, int *, double *, int *, double *); -int BLASFUNC(xhpr2)(char *, int *, double *, double *, int *, double *, int *, double *); +void BLASFUNC(chpr2)(char *, int *, float *, float *, int *, float *, int *, float *); +void BLASFUNC(zhpr2)(char *, int *, double *, double *, int *, double *, int *, double *); +void BLASFUNC(xhpr2)(char *, int *, double *, double *, int *, double *, int *, double *); -int BLASFUNC(chemv)(const char *, const int *, const float *, const float *, const int *, const float *, const int *, - const float *, float *, const int *); -int BLASFUNC(zhemv)(const char *, const int *, const double *, const double *, const int *, const double *, const int *, - const double *, double *, const int *); -int BLASFUNC(xhemv)(const char *, const int *, const double *, const double *, const int *, const double *, const int *, - const double *, double *, const int *); +void BLASFUNC(chemv)(const char *, const int *, const float *, const float *, const int *, const float *, const int *, + const float *, float *, const int *); +void BLASFUNC(zhemv)(const char *, const int *, const double *, const double *, const int *, const double *, + const int *, const double *, double *, const int *); +void BLASFUNC(xhemv)(const char *, const int *, const double *, const double *, const int *, const double *, + const int *, const double *, double *, const int *); -int BLASFUNC(chpmv)(char *, int *, float *, float *, float *, int *, float *, float *, int *); -int BLASFUNC(zhpmv)(char *, int *, double *, double *, double *, int *, double *, double *, int *); -int BLASFUNC(xhpmv)(char *, int *, double *, double *, double *, int *, double *, double *, int *); +void BLASFUNC(chpmv)(char *, int *, float *, float *, float *, int *, float *, float *, int *); +void BLASFUNC(zhpmv)(char *, int *, double *, double *, double *, int *, double *, double *, int *); +void BLASFUNC(xhpmv)(char *, int *, double *, double *, double *, int *, double *, double *, int *); -int BLASFUNC(snorm)(char *, int *, int *, float *, int *); -int BLASFUNC(dnorm)(char *, int *, int *, double *, int *); -int BLASFUNC(cnorm)(char *, int *, int *, float *, int *); -int BLASFUNC(znorm)(char *, int *, int *, double *, int *); +void BLASFUNC(snorm)(char *, int *, int *, float *, int *); +void BLASFUNC(dnorm)(char *, int *, int *, double *, int *); +void BLASFUNC(cnorm)(char *, int *, int *, float *, int *); +void BLASFUNC(znorm)(char *, int *, int *, double *, int *); -int BLASFUNC(sgbmv)(char *, int *, int *, int *, int *, float *, float *, int *, float *, int *, float *, float *, - int *); -int BLASFUNC(dgbmv)(char *, int *, int *, int *, int *, double *, double *, int *, double *, int *, double *, double *, - int *); -int BLASFUNC(qgbmv)(char *, int *, int *, int *, int *, double *, double *, int *, double *, int *, double *, double *, - int *); -int BLASFUNC(cgbmv)(char *, int *, int *, int *, int *, float *, float *, int *, float *, int *, float *, float *, - int *); -int BLASFUNC(zgbmv)(char *, int *, int *, int *, int *, double *, double *, int *, double *, int *, double *, double *, - int *); -int BLASFUNC(xgbmv)(char *, int *, int *, int *, int *, double *, double *, int *, double *, int *, double *, double *, - int *); +void BLASFUNC(sgbmv)(char *, int *, int *, int *, int *, float *, float *, int *, float *, int *, float *, float *, + int *); +void BLASFUNC(dgbmv)(char *, int *, int *, int *, int *, double *, double *, int *, double *, int *, double *, double *, + int *); +void BLASFUNC(qgbmv)(char *, int *, int *, int *, int *, double *, double *, int *, double *, int *, double *, double *, + int *); +void BLASFUNC(cgbmv)(char *, int *, int *, int *, int *, float *, float *, int *, float *, int *, float *, float *, + int *); +void BLASFUNC(zgbmv)(char *, int *, int *, int *, int *, double *, double *, int *, double *, int *, double *, double *, + int *); +void BLASFUNC(xgbmv)(char *, int *, int *, int *, int *, double *, double *, int *, double *, int *, double *, double *, + int *); -int BLASFUNC(ssbmv)(char *, int *, int *, float *, float *, int *, float *, int *, float *, float *, int *); -int BLASFUNC(dsbmv)(char *, int *, int *, double *, double *, int *, double *, int *, double *, double *, int *); -int BLASFUNC(qsbmv)(char *, int *, int *, double *, double *, int *, double *, int *, double *, double *, int *); -int BLASFUNC(csbmv)(char *, int *, int *, float *, float *, int *, float *, int *, float *, float *, int *); -int BLASFUNC(zsbmv)(char *, int *, int *, double *, double *, int *, double *, int *, double *, double *, int *); -int BLASFUNC(xsbmv)(char *, int *, int *, double *, double *, int *, double *, int *, double *, double *, int *); +void BLASFUNC(ssbmv)(char *, int *, int *, float *, float *, int *, float *, int *, float *, float *, int *); +void BLASFUNC(dsbmv)(char *, int *, int *, double *, double *, int *, double *, int *, double *, double *, int *); +void BLASFUNC(qsbmv)(char *, int *, int *, double *, double *, int *, double *, int *, double *, double *, int *); +void BLASFUNC(csbmv)(char *, int *, int *, float *, float *, int *, float *, int *, float *, float *, int *); +void BLASFUNC(zsbmv)(char *, int *, int *, double *, double *, int *, double *, int *, double *, double *, int *); +void BLASFUNC(xsbmv)(char *, int *, int *, double *, double *, int *, double *, int *, double *, double *, int *); -int BLASFUNC(chbmv)(char *, int *, int *, float *, float *, int *, float *, int *, float *, float *, int *); -int BLASFUNC(zhbmv)(char *, int *, int *, double *, double *, int *, double *, int *, double *, double *, int *); -int BLASFUNC(xhbmv)(char *, int *, int *, double *, double *, int *, double *, int *, double *, double *, int *); +void BLASFUNC(chbmv)(char *, int *, int *, float *, float *, int *, float *, int *, float *, float *, int *); +void BLASFUNC(zhbmv)(char *, int *, int *, double *, double *, int *, double *, int *, double *, double *, int *); +void BLASFUNC(xhbmv)(char *, int *, int *, double *, double *, int *, double *, int *, double *, double *, int *); /* Level 3 routines */ -int BLASFUNC(sgemm)(const char *, const char *, const int *, const int *, const int *, const float *, const float *, - const int *, const float *, const int *, const float *, float *, const int *); -int BLASFUNC(dgemm)(const char *, const char *, const int *, const int *, const int *, const double *, const double *, - const int *, const double *, const int *, const double *, double *, const int *); -int BLASFUNC(qgemm)(const char *, const char *, const int *, const int *, const int *, const double *, const double *, - const int *, const double *, const int *, const double *, double *, const int *); -int BLASFUNC(cgemm)(const char *, const char *, const int *, const int *, const int *, const float *, const float *, - const int *, const float *, const int *, const float *, float *, const int *); -int BLASFUNC(zgemm)(const char *, const char *, const int *, const int *, const int *, const double *, const double *, - const int *, const double *, const int *, const double *, double *, const int *); -int BLASFUNC(xgemm)(const char *, const char *, const int *, const int *, const int *, const double *, const double *, - const int *, const double *, const int *, const double *, double *, const int *); - -int BLASFUNC(cgemm3m)(char *, char *, int *, int *, int *, float *, float *, int *, float *, int *, float *, float *, - int *); -int BLASFUNC(zgemm3m)(char *, char *, int *, int *, int *, double *, double *, int *, double *, int *, double *, - double *, int *); -int BLASFUNC(xgemm3m)(char *, char *, int *, int *, int *, double *, double *, int *, double *, int *, double *, - double *, int *); - -int BLASFUNC(sge2mm)(char *, char *, char *, int *, int *, float *, float *, int *, float *, int *, float *, float *, - int *); -int BLASFUNC(dge2mm)(char *, char *, char *, int *, int *, double *, double *, int *, double *, int *, double *, - double *, int *); -int BLASFUNC(cge2mm)(char *, char *, char *, int *, int *, float *, float *, int *, float *, int *, float *, float *, - int *); -int BLASFUNC(zge2mm)(char *, char *, char *, int *, int *, double *, double *, int *, double *, int *, double *, - double *, int *); - -int BLASFUNC(strsm)(const char *, const char *, const char *, const char *, const int *, const int *, const float *, - const float *, const int *, float *, const int *); -int BLASFUNC(dtrsm)(const char *, const char *, const char *, const char *, const int *, const int *, const double *, - const double *, const int *, double *, const int *); -int BLASFUNC(qtrsm)(const char *, const char *, const char *, const char *, const int *, const int *, const double *, - const double *, const int *, double *, const int *); -int BLASFUNC(ctrsm)(const char *, const char *, const char *, const char *, const int *, const int *, const float *, - const float *, const int *, float *, const int *); -int BLASFUNC(ztrsm)(const char *, const char *, const char *, const char *, const int *, const int *, const double *, - const double *, const int *, double *, const int *); -int BLASFUNC(xtrsm)(const char *, const char *, const char *, const char *, const int *, const int *, const double *, - const double *, const int *, double *, const int *); - -int BLASFUNC(strmm)(const char *, const char *, const char *, const char *, const int *, const int *, const float *, - const float *, const int *, float *, const int *); -int BLASFUNC(dtrmm)(const char *, const char *, const char *, const char *, const int *, const int *, const double *, - const double *, const int *, double *, const int *); -int BLASFUNC(qtrmm)(const char *, const char *, const char *, const char *, const int *, const int *, const double *, - const double *, const int *, double *, const int *); -int BLASFUNC(ctrmm)(const char *, const char *, const char *, const char *, const int *, const int *, const float *, - const float *, const int *, float *, const int *); -int BLASFUNC(ztrmm)(const char *, const char *, const char *, const char *, const int *, const int *, const double *, - const double *, const int *, double *, const int *); -int BLASFUNC(xtrmm)(const char *, const char *, const char *, const char *, const int *, const int *, const double *, - const double *, const int *, double *, const int *); - -int BLASFUNC(ssymm)(const char *, const char *, const int *, const int *, const float *, const float *, const int *, - const float *, const int *, const float *, float *, const int *); -int BLASFUNC(dsymm)(const char *, const char *, const int *, const int *, const double *, const double *, const int *, - const double *, const int *, const double *, double *, const int *); -int BLASFUNC(qsymm)(const char *, const char *, const int *, const int *, const double *, const double *, const int *, - const double *, const int *, const double *, double *, const int *); -int BLASFUNC(csymm)(const char *, const char *, const int *, const int *, const float *, const float *, const int *, - const float *, const int *, const float *, float *, const int *); -int BLASFUNC(zsymm)(const char *, const char *, const int *, const int *, const double *, const double *, const int *, - const double *, const int *, const double *, double *, const int *); -int BLASFUNC(xsymm)(const char *, const char *, const int *, const int *, const double *, const double *, const int *, - const double *, const int *, const double *, double *, const int *); - -int BLASFUNC(csymm3m)(char *, char *, int *, int *, float *, float *, int *, float *, int *, float *, float *, int *); -int BLASFUNC(zsymm3m)(char *, char *, int *, int *, double *, double *, int *, double *, int *, double *, double *, - int *); -int BLASFUNC(xsymm3m)(char *, char *, int *, int *, double *, double *, int *, double *, int *, double *, double *, - int *); - -int BLASFUNC(ssyrk)(const char *, const char *, const int *, const int *, const float *, const float *, const int *, - const float *, float *, const int *); -int BLASFUNC(dsyrk)(const char *, const char *, const int *, const int *, const double *, const double *, const int *, - const double *, double *, const int *); -int BLASFUNC(qsyrk)(const char *, const char *, const int *, const int *, const double *, const double *, const int *, - const double *, double *, const int *); -int BLASFUNC(csyrk)(const char *, const char *, const int *, const int *, const float *, const float *, const int *, - const float *, float *, const int *); -int BLASFUNC(zsyrk)(const char *, const char *, const int *, const int *, const double *, const double *, const int *, - const double *, double *, const int *); -int BLASFUNC(xsyrk)(const char *, const char *, const int *, const int *, const double *, const double *, const int *, - const double *, double *, const int *); - -int BLASFUNC(ssyr2k)(const char *, const char *, const int *, const int *, const float *, const float *, const int *, - const float *, const int *, const float *, float *, const int *); -int BLASFUNC(dsyr2k)(const char *, const char *, const int *, const int *, const double *, const double *, const int *, - const double *, const int *, const double *, double *, const int *); -int BLASFUNC(qsyr2k)(const char *, const char *, const int *, const int *, const double *, const double *, const int *, - const double *, const int *, const double *, double *, const int *); -int BLASFUNC(csyr2k)(const char *, const char *, const int *, const int *, const float *, const float *, const int *, - const float *, const int *, const float *, float *, const int *); -int BLASFUNC(zsyr2k)(const char *, const char *, const int *, const int *, const double *, const double *, const int *, - const double *, const int *, const double *, double *, const int *); -int BLASFUNC(xsyr2k)(const char *, const char *, const int *, const int *, const double *, const double *, const int *, - const double *, const int *, const double *, double *, const int *); - -int BLASFUNC(chemm)(const char *, const char *, const int *, const int *, const float *, const float *, const int *, - const float *, const int *, const float *, float *, const int *); -int BLASFUNC(zhemm)(const char *, const char *, const int *, const int *, const double *, const double *, const int *, - const double *, const int *, const double *, double *, const int *); -int BLASFUNC(xhemm)(const char *, const char *, const int *, const int *, const double *, const double *, const int *, - const double *, const int *, const double *, double *, const int *); - -int BLASFUNC(chemm3m)(char *, char *, int *, int *, float *, float *, int *, float *, int *, float *, float *, int *); -int BLASFUNC(zhemm3m)(char *, char *, int *, int *, double *, double *, int *, double *, int *, double *, double *, - int *); -int BLASFUNC(xhemm3m)(char *, char *, int *, int *, double *, double *, int *, double *, int *, double *, double *, - int *); - -int BLASFUNC(cherk)(const char *, const char *, const int *, const int *, const float *, const float *, const int *, - const float *, float *, const int *); -int BLASFUNC(zherk)(const char *, const char *, const int *, const int *, const double *, const double *, const int *, - const double *, double *, const int *); -int BLASFUNC(xherk)(const char *, const char *, const int *, const int *, const double *, const double *, const int *, - const double *, double *, const int *); - -int BLASFUNC(cher2k)(const char *, const char *, const int *, const int *, const float *, const float *, const int *, - const float *, const int *, const float *, float *, const int *); -int BLASFUNC(zher2k)(const char *, const char *, const int *, const int *, const double *, const double *, const int *, - const double *, const int *, const double *, double *, const int *); -int BLASFUNC(xher2k)(const char *, const char *, const int *, const int *, const double *, const double *, const int *, - const double *, const int *, const double *, double *, const int *); -int BLASFUNC(cher2m)(const char *, const char *, const char *, const int *, const int *, const float *, const float *, +void BLASFUNC(sgemm)(const char *, const char *, const int *, const int *, const int *, const float *, const float *, const int *, const float *, const int *, const float *, float *, const int *); -int BLASFUNC(zher2m)(const char *, const char *, const char *, const int *, const int *, const double *, const double *, +void BLASFUNC(dgemm)(const char *, const char *, const int *, const int *, const int *, const double *, const double *, const int *, const double *, const int *, const double *, double *, const int *); -int BLASFUNC(xher2m)(const char *, const char *, const char *, const int *, const int *, const double *, const double *, +void BLASFUNC(qgemm)(const char *, const char *, const int *, const int *, const int *, const double *, const double *, const int *, const double *, const int *, const double *, double *, const int *); +void BLASFUNC(cgemm)(const char *, const char *, const int *, const int *, const int *, const float *, const float *, + const int *, const float *, const int *, const float *, float *, const int *); +void BLASFUNC(zgemm)(const char *, const char *, const int *, const int *, const int *, const double *, const double *, + const int *, const double *, const int *, const double *, double *, const int *); +void BLASFUNC(xgemm)(const char *, const char *, const int *, const int *, const int *, const double *, const double *, + const int *, const double *, const int *, const double *, double *, const int *); + +void BLASFUNC(cgemm3m)(char *, char *, int *, int *, int *, float *, float *, int *, float *, int *, float *, float *, + int *); +void BLASFUNC(zgemm3m)(char *, char *, int *, int *, int *, double *, double *, int *, double *, int *, double *, + double *, int *); +void BLASFUNC(xgemm3m)(char *, char *, int *, int *, int *, double *, double *, int *, double *, int *, double *, + double *, int *); + +void BLASFUNC(sge2mm)(char *, char *, char *, int *, int *, float *, float *, int *, float *, int *, float *, float *, + int *); +void BLASFUNC(dge2mm)(char *, char *, char *, int *, int *, double *, double *, int *, double *, int *, double *, + double *, int *); +void BLASFUNC(cge2mm)(char *, char *, char *, int *, int *, float *, float *, int *, float *, int *, float *, float *, + int *); +void BLASFUNC(zge2mm)(char *, char *, char *, int *, int *, double *, double *, int *, double *, int *, double *, + double *, int *); + +void BLASFUNC(strsm)(const char *, const char *, const char *, const char *, const int *, const int *, const float *, + const float *, const int *, float *, const int *); +void BLASFUNC(dtrsm)(const char *, const char *, const char *, const char *, const int *, const int *, const double *, + const double *, const int *, double *, const int *); +void BLASFUNC(qtrsm)(const char *, const char *, const char *, const char *, const int *, const int *, const double *, + const double *, const int *, double *, const int *); +void BLASFUNC(ctrsm)(const char *, const char *, const char *, const char *, const int *, const int *, const float *, + const float *, const int *, float *, const int *); +void BLASFUNC(ztrsm)(const char *, const char *, const char *, const char *, const int *, const int *, const double *, + const double *, const int *, double *, const int *); +void BLASFUNC(xtrsm)(const char *, const char *, const char *, const char *, const int *, const int *, const double *, + const double *, const int *, double *, const int *); + +void BLASFUNC(strmm)(const char *, const char *, const char *, const char *, const int *, const int *, const float *, + const float *, const int *, float *, const int *); +void BLASFUNC(dtrmm)(const char *, const char *, const char *, const char *, const int *, const int *, const double *, + const double *, const int *, double *, const int *); +void BLASFUNC(qtrmm)(const char *, const char *, const char *, const char *, const int *, const int *, const double *, + const double *, const int *, double *, const int *); +void BLASFUNC(ctrmm)(const char *, const char *, const char *, const char *, const int *, const int *, const float *, + const float *, const int *, float *, const int *); +void BLASFUNC(ztrmm)(const char *, const char *, const char *, const char *, const int *, const int *, const double *, + const double *, const int *, double *, const int *); +void BLASFUNC(xtrmm)(const char *, const char *, const char *, const char *, const int *, const int *, const double *, + const double *, const int *, double *, const int *); + +void BLASFUNC(ssymm)(const char *, const char *, const int *, const int *, const float *, const float *, const int *, + const float *, const int *, const float *, float *, const int *); +void BLASFUNC(dsymm)(const char *, const char *, const int *, const int *, const double *, const double *, const int *, + const double *, const int *, const double *, double *, const int *); +void BLASFUNC(qsymm)(const char *, const char *, const int *, const int *, const double *, const double *, const int *, + const double *, const int *, const double *, double *, const int *); +void BLASFUNC(csymm)(const char *, const char *, const int *, const int *, const float *, const float *, const int *, + const float *, const int *, const float *, float *, const int *); +void BLASFUNC(zsymm)(const char *, const char *, const int *, const int *, const double *, const double *, const int *, + const double *, const int *, const double *, double *, const int *); +void BLASFUNC(xsymm)(const char *, const char *, const int *, const int *, const double *, const double *, const int *, + const double *, const int *, const double *, double *, const int *); + +void BLASFUNC(csymm3m)(char *, char *, int *, int *, float *, float *, int *, float *, int *, float *, float *, int *); +void BLASFUNC(zsymm3m)(char *, char *, int *, int *, double *, double *, int *, double *, int *, double *, double *, + int *); +void BLASFUNC(xsymm3m)(char *, char *, int *, int *, double *, double *, int *, double *, int *, double *, double *, + int *); + +void BLASFUNC(ssyrk)(const char *, const char *, const int *, const int *, const float *, const float *, const int *, + const float *, float *, const int *); +void BLASFUNC(dsyrk)(const char *, const char *, const int *, const int *, const double *, const double *, const int *, + const double *, double *, const int *); +void BLASFUNC(qsyrk)(const char *, const char *, const int *, const int *, const double *, const double *, const int *, + const double *, double *, const int *); +void BLASFUNC(csyrk)(const char *, const char *, const int *, const int *, const float *, const float *, const int *, + const float *, float *, const int *); +void BLASFUNC(zsyrk)(const char *, const char *, const int *, const int *, const double *, const double *, const int *, + const double *, double *, const int *); +void BLASFUNC(xsyrk)(const char *, const char *, const int *, const int *, const double *, const double *, const int *, + const double *, double *, const int *); + +void BLASFUNC(ssyr2k)(const char *, const char *, const int *, const int *, const float *, const float *, const int *, + const float *, const int *, const float *, float *, const int *); +void BLASFUNC(dsyr2k)(const char *, const char *, const int *, const int *, const double *, const double *, const int *, + const double *, const int *, const double *, double *, const int *); +void BLASFUNC(qsyr2k)(const char *, const char *, const int *, const int *, const double *, const double *, const int *, + const double *, const int *, const double *, double *, const int *); +void BLASFUNC(csyr2k)(const char *, const char *, const int *, const int *, const float *, const float *, const int *, + const float *, const int *, const float *, float *, const int *); +void BLASFUNC(zsyr2k)(const char *, const char *, const int *, const int *, const double *, const double *, const int *, + const double *, const int *, const double *, double *, const int *); +void BLASFUNC(xsyr2k)(const char *, const char *, const int *, const int *, const double *, const double *, const int *, + const double *, const int *, const double *, double *, const int *); + +void BLASFUNC(chemm)(const char *, const char *, const int *, const int *, const float *, const float *, const int *, + const float *, const int *, const float *, float *, const int *); +void BLASFUNC(zhemm)(const char *, const char *, const int *, const int *, const double *, const double *, const int *, + const double *, const int *, const double *, double *, const int *); +void BLASFUNC(xhemm)(const char *, const char *, const int *, const int *, const double *, const double *, const int *, + const double *, const int *, const double *, double *, const int *); + +void BLASFUNC(chemm3m)(char *, char *, int *, int *, float *, float *, int *, float *, int *, float *, float *, int *); +void BLASFUNC(zhemm3m)(char *, char *, int *, int *, double *, double *, int *, double *, int *, double *, double *, + int *); +void BLASFUNC(xhemm3m)(char *, char *, int *, int *, double *, double *, int *, double *, int *, double *, double *, + int *); + +void BLASFUNC(cherk)(const char *, const char *, const int *, const int *, const float *, const float *, const int *, + const float *, float *, const int *); +void BLASFUNC(zherk)(const char *, const char *, const int *, const int *, const double *, const double *, const int *, + const double *, double *, const int *); +void BLASFUNC(xherk)(const char *, const char *, const int *, const int *, const double *, const double *, const int *, + const double *, double *, const int *); + +void BLASFUNC(cher2k)(const char *, const char *, const int *, const int *, const float *, const float *, const int *, + const float *, const int *, const float *, float *, const int *); +void BLASFUNC(zher2k)(const char *, const char *, const int *, const int *, const double *, const double *, const int *, + const double *, const int *, const double *, double *, const int *); +void BLASFUNC(xher2k)(const char *, const char *, const int *, const int *, const double *, const double *, const int *, + const double *, const int *, const double *, double *, const int *); +void BLASFUNC(cher2m)(const char *, const char *, const char *, const int *, const int *, const float *, const float *, + const int *, const float *, const int *, const float *, float *, const int *); +void BLASFUNC(zher2m)(const char *, const char *, const char *, const int *, const int *, const double *, + const double *, const int *, const double *, const int *, const double *, double *, const int *); +void BLASFUNC(xher2m)(const char *, const char *, const char *, const int *, const int *, const double *, + const double *, const int *, const double *, const int *, const double *, double *, const int *); #ifdef __cplusplus }
diff --git a/blas/common.h b/blas/common.h index bf91a83..2456273 100644 --- a/blas/common.h +++ b/blas/common.h
@@ -140,6 +140,7 @@ #define EIGEN_BLAS_FUNC_SUFFIX _ #endif -#define EIGEN_BLAS_FUNC(X) EIGEN_CAT(SCALAR_SUFFIX, EIGEN_CAT(X, EIGEN_BLAS_FUNC_SUFFIX)) +#define EIGEN_BLAS_FUNC_NAME(X) EIGEN_CAT(SCALAR_SUFFIX, EIGEN_CAT(X, EIGEN_BLAS_FUNC_SUFFIX)) +#define EIGEN_BLAS_FUNC(X) extern "C" void EIGEN_BLAS_FUNC_NAME(X) #endif // EIGEN_BLAS_COMMON_H
diff --git a/blas/double.cpp b/blas/double.cpp index eef5891..4298665 100644 --- a/blas/double.cpp +++ b/blas/double.cpp
@@ -19,7 +19,7 @@ #include "level2_real_impl.h" #include "level3_impl.h" -double EIGEN_BLAS_FUNC(sdot)(int* n, float* x, int* incx, float* y, int* incy) { +double EIGEN_BLAS_FUNC_NAME(sdot)(int* n, float* x, int* incx, float* y, int* incy) { if (*n <= 0) return 0; if (*incx == 1 && *incy == 1)
diff --git a/blas/f2c/chbmv.c b/blas/f2c/chbmv.c index f218fe3..2aa3a01 100644 --- a/blas/f2c/chbmv.c +++ b/blas/f2c/chbmv.c
@@ -1,487 +1,456 @@ /* chbmv.f -- translated by f2c (version 20100827). You must link the resulting object file with libf2c: - on Microsoft Windows system, link with libf2c.lib; - on Linux or Unix systems, link with .../path/to/libf2c.a -lm - or, if you install libf2c.a in a standard place, with -lf2c -lm - -- in that order, at the end of the command line, as in - cc *.o -lf2c -lm - Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., - http://www.netlib.org/f2c/libf2c.zip + http://www.netlib.org/f2c/libf2c.zip */ #include "datatypes.h" -/* Subroutine */ int chbmv_(char *uplo, integer *n, integer *k, complex * - alpha, complex *a, integer *lda, complex *x, integer *incx, complex * - beta, complex *y, integer *incy, ftnlen uplo_len) -{ - /* System generated locals */ - integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5; - real r__1; - complex q__1, q__2, q__3, q__4; +static inline void r_cnjg(complex *r, complex *z) { + r->r = z->r; + r->i = -(z->i); +} - /* Builtin functions */ - void r_cnjg(complex *, complex *); +/* Subroutine */ void chbmv_(char *uplo, integer *n, integer *k, complex *alpha, complex *a, integer *lda, complex *x, + integer *incx, complex *beta, complex *y, integer *incy) { + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5; + real r__1; + complex q__1, q__2, q__3, q__4; - /* Local variables */ - integer i__, j, l, ix, iy, jx, jy, kx, ky, info; - complex temp1, temp2; - extern logical lsame_(char *, char *, ftnlen, ftnlen); - integer kplus1; - extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen); + /* Local variables */ + integer i__, j, l, ix, iy, jx, jy, kx, ky, info; + complex temp1, temp2; + extern logical lsame_(char *, char *); + integer kplus1; + extern /* Subroutine */ void xerbla_(const char *, integer *); -/* .. Scalar Arguments .. */ -/* .. */ -/* .. Array Arguments .. */ -/* .. */ + /* .. Scalar Arguments .. */ + /* .. */ + /* .. Array Arguments .. */ + /* .. */ -/* Purpose */ -/* ======= */ + /* Purpose */ + /* ======= */ -/* CHBMV performs the matrix-vector operation */ + /* CHBMV performs the matrix-vector operation */ -/* y := alpha*A*x + beta*y, */ + /* y := alpha*A*x + beta*y, */ -/* where alpha and beta are scalars, x and y are n element vectors and */ -/* A is an n by n hermitian band matrix, with k super-diagonals. */ + /* where alpha and beta are scalars, x and y are n element vectors and */ + /* A is an n by n hermitian band matrix, with k super-diagonals. */ -/* Arguments */ -/* ========== */ + /* Arguments */ + /* ========== */ -/* UPLO - CHARACTER*1. */ -/* On entry, UPLO specifies whether the upper or lower */ -/* triangular part of the band matrix A is being supplied as */ -/* follows: */ + /* UPLO - CHARACTER*1. */ + /* On entry, UPLO specifies whether the upper or lower */ + /* triangular part of the band matrix A is being supplied as */ + /* follows: */ -/* UPLO = 'U' or 'u' The upper triangular part of A is */ -/* being supplied. */ + /* UPLO = 'U' or 'u' The upper triangular part of A is */ + /* being supplied. */ -/* UPLO = 'L' or 'l' The lower triangular part of A is */ -/* being supplied. */ + /* UPLO = 'L' or 'l' The lower triangular part of A is */ + /* being supplied. */ -/* Unchanged on exit. */ + /* Unchanged on exit. */ -/* N - INTEGER. */ -/* On entry, N specifies the order of the matrix A. */ -/* N must be at least zero. */ -/* Unchanged on exit. */ + /* N - INTEGER. */ + /* On entry, N specifies the order of the matrix A. */ + /* N must be at least zero. */ + /* Unchanged on exit. */ -/* K - INTEGER. */ -/* On entry, K specifies the number of super-diagonals of the */ -/* matrix A. K must satisfy 0 .le. K. */ -/* Unchanged on exit. */ + /* K - INTEGER. */ + /* On entry, K specifies the number of super-diagonals of the */ + /* matrix A. K must satisfy 0 .le. K. */ + /* Unchanged on exit. */ -/* ALPHA - COMPLEX . */ -/* On entry, ALPHA specifies the scalar alpha. */ -/* Unchanged on exit. */ + /* ALPHA - COMPLEX . */ + /* On entry, ALPHA specifies the scalar alpha. */ + /* Unchanged on exit. */ -/* A - COMPLEX array of DIMENSION ( LDA, n ). */ -/* Before entry with UPLO = 'U' or 'u', the leading ( k + 1 ) */ -/* by n part of the array A must contain the upper triangular */ -/* band part of the hermitian matrix, supplied column by */ -/* column, with the leading diagonal of the matrix in row */ -/* ( k + 1 ) of the array, the first super-diagonal starting at */ -/* position 2 in row k, and so on. The top left k by k triangle */ -/* of the array A is not referenced. */ -/* The following program segment will transfer the upper */ -/* triangular part of a hermitian band matrix from conventional */ -/* full matrix storage to band storage: */ + /* A - COMPLEX array of DIMENSION ( LDA, n ). */ + /* Before entry with UPLO = 'U' or 'u', the leading ( k + 1 ) */ + /* by n part of the array A must contain the upper triangular */ + /* band part of the hermitian matrix, supplied column by */ + /* column, with the leading diagonal of the matrix in row */ + /* ( k + 1 ) of the array, the first super-diagonal starting at */ + /* position 2 in row k, and so on. The top left k by k triangle */ + /* of the array A is not referenced. */ + /* The following program segment will transfer the upper */ + /* triangular part of a hermitian band matrix from conventional */ + /* full matrix storage to band storage: */ -/* DO 20, J = 1, N */ -/* M = K + 1 - J */ -/* DO 10, I = MAX( 1, J - K ), J */ -/* A( M + I, J ) = matrix( I, J ) */ -/* 10 CONTINUE */ -/* 20 CONTINUE */ + /* DO 20, J = 1, N */ + /* M = K + 1 - J */ + /* DO 10, I = MAX( 1, J - K ), J */ + /* A( M + I, J ) = matrix( I, J ) */ + /* 10 CONTINUE */ + /* 20 CONTINUE */ -/* Before entry with UPLO = 'L' or 'l', the leading ( k + 1 ) */ -/* by n part of the array A must contain the lower triangular */ -/* band part of the hermitian matrix, supplied column by */ -/* column, with the leading diagonal of the matrix in row 1 of */ -/* the array, the first sub-diagonal starting at position 1 in */ -/* row 2, and so on. The bottom right k by k triangle of the */ -/* array A is not referenced. */ -/* The following program segment will transfer the lower */ -/* triangular part of a hermitian band matrix from conventional */ -/* full matrix storage to band storage: */ + /* Before entry with UPLO = 'L' or 'l', the leading ( k + 1 ) */ + /* by n part of the array A must contain the lower triangular */ + /* band part of the hermitian matrix, supplied column by */ + /* column, with the leading diagonal of the matrix in row 1 of */ + /* the array, the first sub-diagonal starting at position 1 in */ + /* row 2, and so on. The bottom right k by k triangle of the */ + /* array A is not referenced. */ + /* The following program segment will transfer the lower */ + /* triangular part of a hermitian band matrix from conventional */ + /* full matrix storage to band storage: */ -/* DO 20, J = 1, N */ -/* M = 1 - J */ -/* DO 10, I = J, MIN( N, J + K ) */ -/* A( M + I, J ) = matrix( I, J ) */ -/* 10 CONTINUE */ -/* 20 CONTINUE */ + /* DO 20, J = 1, N */ + /* M = 1 - J */ + /* DO 10, I = J, MIN( N, J + K ) */ + /* A( M + I, J ) = matrix( I, J ) */ + /* 10 CONTINUE */ + /* 20 CONTINUE */ -/* Note that the imaginary parts of the diagonal elements need */ -/* not be set and are assumed to be zero. */ -/* Unchanged on exit. */ + /* Note that the imaginary parts of the diagonal elements need */ + /* not be set and are assumed to be zero. */ + /* Unchanged on exit. */ -/* LDA - INTEGER. */ -/* On entry, LDA specifies the first dimension of A as declared */ -/* in the calling (sub) program. LDA must be at least */ -/* ( k + 1 ). */ -/* Unchanged on exit. */ + /* LDA - INTEGER. */ + /* On entry, LDA specifies the first dimension of A as declared */ + /* in the calling (sub) program. LDA must be at least */ + /* ( k + 1 ). */ + /* Unchanged on exit. */ -/* X - COMPLEX array of DIMENSION at least */ -/* ( 1 + ( n - 1 )*abs( INCX ) ). */ -/* Before entry, the incremented array X must contain the */ -/* vector x. */ -/* Unchanged on exit. */ + /* X - COMPLEX array of DIMENSION at least */ + /* ( 1 + ( n - 1 )*abs( INCX ) ). */ + /* Before entry, the incremented array X must contain the */ + /* vector x. */ + /* Unchanged on exit. */ -/* INCX - INTEGER. */ -/* On entry, INCX specifies the increment for the elements of */ -/* X. INCX must not be zero. */ -/* Unchanged on exit. */ + /* INCX - INTEGER. */ + /* On entry, INCX specifies the increment for the elements of */ + /* X. INCX must not be zero. */ + /* Unchanged on exit. */ -/* BETA - COMPLEX . */ -/* On entry, BETA specifies the scalar beta. */ -/* Unchanged on exit. */ + /* BETA - COMPLEX . */ + /* On entry, BETA specifies the scalar beta. */ + /* Unchanged on exit. */ -/* Y - COMPLEX array of DIMENSION at least */ -/* ( 1 + ( n - 1 )*abs( INCY ) ). */ -/* Before entry, the incremented array Y must contain the */ -/* vector y. On exit, Y is overwritten by the updated vector y. */ + /* Y - COMPLEX array of DIMENSION at least */ + /* ( 1 + ( n - 1 )*abs( INCY ) ). */ + /* Before entry, the incremented array Y must contain the */ + /* vector y. On exit, Y is overwritten by the updated vector y. */ -/* INCY - INTEGER. */ -/* On entry, INCY specifies the increment for the elements of */ -/* Y. INCY must not be zero. */ -/* Unchanged on exit. */ + /* INCY - INTEGER. */ + /* On entry, INCY specifies the increment for the elements of */ + /* Y. INCY must not be zero. */ + /* Unchanged on exit. */ -/* Further Details */ -/* =============== */ + /* Further Details */ + /* =============== */ -/* Level 2 Blas routine. */ + /* Level 2 Blas routine. */ -/* -- Written on 22-October-1986. */ -/* Jack Dongarra, Argonne National Lab. */ -/* Jeremy Du Croz, Nag Central Office. */ -/* Sven Hammarling, Nag Central Office. */ -/* Richard Hanson, Sandia National Labs. */ + /* -- Written on 22-October-1986. */ + /* Jack Dongarra, Argonne National Lab. */ + /* Jeremy Du Croz, Nag Central Office. */ + /* Sven Hammarling, Nag Central Office. */ + /* Richard Hanson, Sandia National Labs. */ -/* ===================================================================== */ + /* ===================================================================== */ -/* .. Parameters .. */ -/* .. */ -/* .. Local Scalars .. */ -/* .. */ -/* .. External Functions .. */ -/* .. */ -/* .. External Subroutines .. */ -/* .. */ -/* .. Intrinsic Functions .. */ -/* .. */ + /* .. Parameters .. */ + /* .. */ + /* .. Local Scalars .. */ + /* .. */ + /* .. External Functions .. */ + /* .. */ + /* .. External Subroutines .. */ + /* .. */ + /* .. Intrinsic Functions .. */ + /* .. */ -/* Test the input parameters. */ + /* Test the input parameters. */ - /* Parameter adjustments */ - a_dim1 = *lda; - a_offset = 1 + a_dim1; - a -= a_offset; - --x; - --y; + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --x; + --y; - /* Function Body */ - info = 0; - if (! lsame_(uplo, "U", (ftnlen)1, (ftnlen)1) && ! lsame_(uplo, "L", ( - ftnlen)1, (ftnlen)1)) { - info = 1; - } else if (*n < 0) { - info = 2; - } else if (*k < 0) { - info = 3; - } else if (*lda < *k + 1) { - info = 6; - } else if (*incx == 0) { - info = 8; - } else if (*incy == 0) { - info = 11; - } - if (info != 0) { - xerbla_("CHBMV ", &info, (ftnlen)6); - return 0; - } + /* Function Body */ + info = 0; + if (!lsame_(uplo, "U") && !lsame_(uplo, "L")) { + info = 1; + } else if (*n < 0) { + info = 2; + } else if (*k < 0) { + info = 3; + } else if (*lda < *k + 1) { + info = 6; + } else if (*incx == 0) { + info = 8; + } else if (*incy == 0) { + info = 11; + } + if (info != 0) { + xerbla_("CHBMV ", &info); + return; + } -/* Quick return if possible. */ + /* Quick return if possible. */ - if (*n == 0 || (alpha->r == 0.f && alpha->i == 0.f && (beta->r == 1.f && - beta->i == 0.f))) { - return 0; - } + if (*n == 0 || (alpha->r == 0.f && alpha->i == 0.f && (beta->r == 1.f && beta->i == 0.f))) { + return; + } -/* Set up the start points in X and Y. */ + /* Set up the start points in X and Y. */ - if (*incx > 0) { - kx = 1; + if (*incx > 0) { + kx = 1; + } else { + kx = 1 - (*n - 1) * *incx; + } + if (*incy > 0) { + ky = 1; + } else { + ky = 1 - (*n - 1) * *incy; + } + + /* Start the operations. In this version the elements of the array A */ + /* are accessed sequentially with one pass through A. */ + + /* First form y := beta*y. */ + + if (beta->r != 1.f || beta->i != 0.f) { + if (*incy == 1) { + if (beta->r == 0.f && beta->i == 0.f) { + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = i__; + y[i__2].r = 0.f, y[i__2].i = 0.f; + /* L10: */ + } + } else { + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = i__; + i__3 = i__; + q__1.r = beta->r * y[i__3].r - beta->i * y[i__3].i, q__1.i = beta->r * y[i__3].i + beta->i * y[i__3].r; + y[i__2].r = q__1.r, y[i__2].i = q__1.i; + /* L20: */ + } + } } else { - kx = 1 - (*n - 1) * *incx; + iy = ky; + if (beta->r == 0.f && beta->i == 0.f) { + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = iy; + y[i__2].r = 0.f, y[i__2].i = 0.f; + iy += *incy; + /* L30: */ + } + } else { + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = iy; + i__3 = iy; + q__1.r = beta->r * y[i__3].r - beta->i * y[i__3].i, q__1.i = beta->r * y[i__3].i + beta->i * y[i__3].r; + y[i__2].r = q__1.r, y[i__2].i = q__1.i; + iy += *incy; + /* L40: */ + } + } } - if (*incy > 0) { - ky = 1; + } + if (alpha->r == 0.f && alpha->i == 0.f) { + return; + } + if (lsame_(uplo, "U")) { + /* Form y when upper triangle of A is stored. */ + + kplus1 = *k + 1; + if (*incx == 1 && *incy == 1) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = j; + q__1.r = alpha->r * x[i__2].r - alpha->i * x[i__2].i, q__1.i = alpha->r * x[i__2].i + alpha->i * x[i__2].r; + temp1.r = q__1.r, temp1.i = q__1.i; + temp2.r = 0.f, temp2.i = 0.f; + l = kplus1 - j; + /* Computing MAX */ + i__2 = 1, i__3 = j - *k; + i__4 = j - 1; + for (i__ = max(i__2, i__3); i__ <= i__4; ++i__) { + i__2 = i__; + i__3 = i__; + i__5 = l + i__ + j * a_dim1; + q__2.r = temp1.r * a[i__5].r - temp1.i * a[i__5].i, q__2.i = temp1.r * a[i__5].i + temp1.i * a[i__5].r; + q__1.r = y[i__3].r + q__2.r, q__1.i = y[i__3].i + q__2.i; + y[i__2].r = q__1.r, y[i__2].i = q__1.i; + r_cnjg(&q__3, &a[l + i__ + j * a_dim1]); + i__2 = i__; + q__2.r = q__3.r * x[i__2].r - q__3.i * x[i__2].i, q__2.i = q__3.r * x[i__2].i + q__3.i * x[i__2].r; + q__1.r = temp2.r + q__2.r, q__1.i = temp2.i + q__2.i; + temp2.r = q__1.r, temp2.i = q__1.i; + /* L50: */ + } + i__4 = j; + i__2 = j; + i__3 = kplus1 + j * a_dim1; + r__1 = a[i__3].r; + q__3.r = r__1 * temp1.r, q__3.i = r__1 * temp1.i; + q__2.r = y[i__2].r + q__3.r, q__2.i = y[i__2].i + q__3.i; + q__4.r = alpha->r * temp2.r - alpha->i * temp2.i, q__4.i = alpha->r * temp2.i + alpha->i * temp2.r; + q__1.r = q__2.r + q__4.r, q__1.i = q__2.i + q__4.i; + y[i__4].r = q__1.r, y[i__4].i = q__1.i; + /* L60: */ + } } else { - ky = 1 - (*n - 1) * *incy; + jx = kx; + jy = ky; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__4 = jx; + q__1.r = alpha->r * x[i__4].r - alpha->i * x[i__4].i, q__1.i = alpha->r * x[i__4].i + alpha->i * x[i__4].r; + temp1.r = q__1.r, temp1.i = q__1.i; + temp2.r = 0.f, temp2.i = 0.f; + ix = kx; + iy = ky; + l = kplus1 - j; + /* Computing MAX */ + i__4 = 1, i__2 = j - *k; + i__3 = j - 1; + for (i__ = max(i__4, i__2); i__ <= i__3; ++i__) { + i__4 = iy; + i__2 = iy; + i__5 = l + i__ + j * a_dim1; + q__2.r = temp1.r * a[i__5].r - temp1.i * a[i__5].i, q__2.i = temp1.r * a[i__5].i + temp1.i * a[i__5].r; + q__1.r = y[i__2].r + q__2.r, q__1.i = y[i__2].i + q__2.i; + y[i__4].r = q__1.r, y[i__4].i = q__1.i; + r_cnjg(&q__3, &a[l + i__ + j * a_dim1]); + i__4 = ix; + q__2.r = q__3.r * x[i__4].r - q__3.i * x[i__4].i, q__2.i = q__3.r * x[i__4].i + q__3.i * x[i__4].r; + q__1.r = temp2.r + q__2.r, q__1.i = temp2.i + q__2.i; + temp2.r = q__1.r, temp2.i = q__1.i; + ix += *incx; + iy += *incy; + /* L70: */ + } + i__3 = jy; + i__4 = jy; + i__2 = kplus1 + j * a_dim1; + r__1 = a[i__2].r; + q__3.r = r__1 * temp1.r, q__3.i = r__1 * temp1.i; + q__2.r = y[i__4].r + q__3.r, q__2.i = y[i__4].i + q__3.i; + q__4.r = alpha->r * temp2.r - alpha->i * temp2.i, q__4.i = alpha->r * temp2.i + alpha->i * temp2.r; + q__1.r = q__2.r + q__4.r, q__1.i = q__2.i + q__4.i; + y[i__3].r = q__1.r, y[i__3].i = q__1.i; + jx += *incx; + jy += *incy; + if (j > *k) { + kx += *incx; + ky += *incy; + } + /* L80: */ + } } + } else { + /* Form y when lower triangle of A is stored. */ -/* Start the operations. In this version the elements of the array A */ -/* are accessed sequentially with one pass through A. */ - -/* First form y := beta*y. */ - - if (beta->r != 1.f || beta->i != 0.f) { - if (*incy == 1) { - if (beta->r == 0.f && beta->i == 0.f) { - i__1 = *n; - for (i__ = 1; i__ <= i__1; ++i__) { - i__2 = i__; - y[i__2].r = 0.f, y[i__2].i = 0.f; -/* L10: */ - } - } else { - i__1 = *n; - for (i__ = 1; i__ <= i__1; ++i__) { - i__2 = i__; - i__3 = i__; - q__1.r = beta->r * y[i__3].r - beta->i * y[i__3].i, - q__1.i = beta->r * y[i__3].i + beta->i * y[i__3] - .r; - y[i__2].r = q__1.r, y[i__2].i = q__1.i; -/* L20: */ - } - } - } else { - iy = ky; - if (beta->r == 0.f && beta->i == 0.f) { - i__1 = *n; - for (i__ = 1; i__ <= i__1; ++i__) { - i__2 = iy; - y[i__2].r = 0.f, y[i__2].i = 0.f; - iy += *incy; -/* L30: */ - } - } else { - i__1 = *n; - for (i__ = 1; i__ <= i__1; ++i__) { - i__2 = iy; - i__3 = iy; - q__1.r = beta->r * y[i__3].r - beta->i * y[i__3].i, - q__1.i = beta->r * y[i__3].i + beta->i * y[i__3] - .r; - y[i__2].r = q__1.r, y[i__2].i = q__1.i; - iy += *incy; -/* L40: */ - } - } - } - } - if (alpha->r == 0.f && alpha->i == 0.f) { - return 0; - } - if (lsame_(uplo, "U", (ftnlen)1, (ftnlen)1)) { - -/* Form y when upper triangle of A is stored. */ - - kplus1 = *k + 1; - if (*incx == 1 && *incy == 1) { - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - i__2 = j; - q__1.r = alpha->r * x[i__2].r - alpha->i * x[i__2].i, q__1.i = - alpha->r * x[i__2].i + alpha->i * x[i__2].r; - temp1.r = q__1.r, temp1.i = q__1.i; - temp2.r = 0.f, temp2.i = 0.f; - l = kplus1 - j; -/* Computing MAX */ - i__2 = 1, i__3 = j - *k; - i__4 = j - 1; - for (i__ = max(i__2,i__3); i__ <= i__4; ++i__) { - i__2 = i__; - i__3 = i__; - i__5 = l + i__ + j * a_dim1; - q__2.r = temp1.r * a[i__5].r - temp1.i * a[i__5].i, - q__2.i = temp1.r * a[i__5].i + temp1.i * a[i__5] - .r; - q__1.r = y[i__3].r + q__2.r, q__1.i = y[i__3].i + q__2.i; - y[i__2].r = q__1.r, y[i__2].i = q__1.i; - r_cnjg(&q__3, &a[l + i__ + j * a_dim1]); - i__2 = i__; - q__2.r = q__3.r * x[i__2].r - q__3.i * x[i__2].i, q__2.i = - q__3.r * x[i__2].i + q__3.i * x[i__2].r; - q__1.r = temp2.r + q__2.r, q__1.i = temp2.i + q__2.i; - temp2.r = q__1.r, temp2.i = q__1.i; -/* L50: */ - } - i__4 = j; - i__2 = j; - i__3 = kplus1 + j * a_dim1; - r__1 = a[i__3].r; - q__3.r = r__1 * temp1.r, q__3.i = r__1 * temp1.i; - q__2.r = y[i__2].r + q__3.r, q__2.i = y[i__2].i + q__3.i; - q__4.r = alpha->r * temp2.r - alpha->i * temp2.i, q__4.i = - alpha->r * temp2.i + alpha->i * temp2.r; - q__1.r = q__2.r + q__4.r, q__1.i = q__2.i + q__4.i; - y[i__4].r = q__1.r, y[i__4].i = q__1.i; -/* L60: */ - } - } else { - jx = kx; - jy = ky; - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - i__4 = jx; - q__1.r = alpha->r * x[i__4].r - alpha->i * x[i__4].i, q__1.i = - alpha->r * x[i__4].i + alpha->i * x[i__4].r; - temp1.r = q__1.r, temp1.i = q__1.i; - temp2.r = 0.f, temp2.i = 0.f; - ix = kx; - iy = ky; - l = kplus1 - j; -/* Computing MAX */ - i__4 = 1, i__2 = j - *k; - i__3 = j - 1; - for (i__ = max(i__4,i__2); i__ <= i__3; ++i__) { - i__4 = iy; - i__2 = iy; - i__5 = l + i__ + j * a_dim1; - q__2.r = temp1.r * a[i__5].r - temp1.i * a[i__5].i, - q__2.i = temp1.r * a[i__5].i + temp1.i * a[i__5] - .r; - q__1.r = y[i__2].r + q__2.r, q__1.i = y[i__2].i + q__2.i; - y[i__4].r = q__1.r, y[i__4].i = q__1.i; - r_cnjg(&q__3, &a[l + i__ + j * a_dim1]); - i__4 = ix; - q__2.r = q__3.r * x[i__4].r - q__3.i * x[i__4].i, q__2.i = - q__3.r * x[i__4].i + q__3.i * x[i__4].r; - q__1.r = temp2.r + q__2.r, q__1.i = temp2.i + q__2.i; - temp2.r = q__1.r, temp2.i = q__1.i; - ix += *incx; - iy += *incy; -/* L70: */ - } - i__3 = jy; - i__4 = jy; - i__2 = kplus1 + j * a_dim1; - r__1 = a[i__2].r; - q__3.r = r__1 * temp1.r, q__3.i = r__1 * temp1.i; - q__2.r = y[i__4].r + q__3.r, q__2.i = y[i__4].i + q__3.i; - q__4.r = alpha->r * temp2.r - alpha->i * temp2.i, q__4.i = - alpha->r * temp2.i + alpha->i * temp2.r; - q__1.r = q__2.r + q__4.r, q__1.i = q__2.i + q__4.i; - y[i__3].r = q__1.r, y[i__3].i = q__1.i; - jx += *incx; - jy += *incy; - if (j > *k) { - kx += *incx; - ky += *incy; - } -/* L80: */ - } - } + if (*incx == 1 && *incy == 1) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__3 = j; + q__1.r = alpha->r * x[i__3].r - alpha->i * x[i__3].i, q__1.i = alpha->r * x[i__3].i + alpha->i * x[i__3].r; + temp1.r = q__1.r, temp1.i = q__1.i; + temp2.r = 0.f, temp2.i = 0.f; + i__3 = j; + i__4 = j; + i__2 = j * a_dim1 + 1; + r__1 = a[i__2].r; + q__2.r = r__1 * temp1.r, q__2.i = r__1 * temp1.i; + q__1.r = y[i__4].r + q__2.r, q__1.i = y[i__4].i + q__2.i; + y[i__3].r = q__1.r, y[i__3].i = q__1.i; + l = 1 - j; + /* Computing MIN */ + i__4 = *n, i__2 = j + *k; + i__3 = min(i__4, i__2); + for (i__ = j + 1; i__ <= i__3; ++i__) { + i__4 = i__; + i__2 = i__; + i__5 = l + i__ + j * a_dim1; + q__2.r = temp1.r * a[i__5].r - temp1.i * a[i__5].i, q__2.i = temp1.r * a[i__5].i + temp1.i * a[i__5].r; + q__1.r = y[i__2].r + q__2.r, q__1.i = y[i__2].i + q__2.i; + y[i__4].r = q__1.r, y[i__4].i = q__1.i; + r_cnjg(&q__3, &a[l + i__ + j * a_dim1]); + i__4 = i__; + q__2.r = q__3.r * x[i__4].r - q__3.i * x[i__4].i, q__2.i = q__3.r * x[i__4].i + q__3.i * x[i__4].r; + q__1.r = temp2.r + q__2.r, q__1.i = temp2.i + q__2.i; + temp2.r = q__1.r, temp2.i = q__1.i; + /* L90: */ + } + i__3 = j; + i__4 = j; + q__2.r = alpha->r * temp2.r - alpha->i * temp2.i, q__2.i = alpha->r * temp2.i + alpha->i * temp2.r; + q__1.r = y[i__4].r + q__2.r, q__1.i = y[i__4].i + q__2.i; + y[i__3].r = q__1.r, y[i__3].i = q__1.i; + /* L100: */ + } } else { - -/* Form y when lower triangle of A is stored. */ - - if (*incx == 1 && *incy == 1) { - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - i__3 = j; - q__1.r = alpha->r * x[i__3].r - alpha->i * x[i__3].i, q__1.i = - alpha->r * x[i__3].i + alpha->i * x[i__3].r; - temp1.r = q__1.r, temp1.i = q__1.i; - temp2.r = 0.f, temp2.i = 0.f; - i__3 = j; - i__4 = j; - i__2 = j * a_dim1 + 1; - r__1 = a[i__2].r; - q__2.r = r__1 * temp1.r, q__2.i = r__1 * temp1.i; - q__1.r = y[i__4].r + q__2.r, q__1.i = y[i__4].i + q__2.i; - y[i__3].r = q__1.r, y[i__3].i = q__1.i; - l = 1 - j; -/* Computing MIN */ - i__4 = *n, i__2 = j + *k; - i__3 = min(i__4,i__2); - for (i__ = j + 1; i__ <= i__3; ++i__) { - i__4 = i__; - i__2 = i__; - i__5 = l + i__ + j * a_dim1; - q__2.r = temp1.r * a[i__5].r - temp1.i * a[i__5].i, - q__2.i = temp1.r * a[i__5].i + temp1.i * a[i__5] - .r; - q__1.r = y[i__2].r + q__2.r, q__1.i = y[i__2].i + q__2.i; - y[i__4].r = q__1.r, y[i__4].i = q__1.i; - r_cnjg(&q__3, &a[l + i__ + j * a_dim1]); - i__4 = i__; - q__2.r = q__3.r * x[i__4].r - q__3.i * x[i__4].i, q__2.i = - q__3.r * x[i__4].i + q__3.i * x[i__4].r; - q__1.r = temp2.r + q__2.r, q__1.i = temp2.i + q__2.i; - temp2.r = q__1.r, temp2.i = q__1.i; -/* L90: */ - } - i__3 = j; - i__4 = j; - q__2.r = alpha->r * temp2.r - alpha->i * temp2.i, q__2.i = - alpha->r * temp2.i + alpha->i * temp2.r; - q__1.r = y[i__4].r + q__2.r, q__1.i = y[i__4].i + q__2.i; - y[i__3].r = q__1.r, y[i__3].i = q__1.i; -/* L100: */ - } - } else { - jx = kx; - jy = ky; - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - i__3 = jx; - q__1.r = alpha->r * x[i__3].r - alpha->i * x[i__3].i, q__1.i = - alpha->r * x[i__3].i + alpha->i * x[i__3].r; - temp1.r = q__1.r, temp1.i = q__1.i; - temp2.r = 0.f, temp2.i = 0.f; - i__3 = jy; - i__4 = jy; - i__2 = j * a_dim1 + 1; - r__1 = a[i__2].r; - q__2.r = r__1 * temp1.r, q__2.i = r__1 * temp1.i; - q__1.r = y[i__4].r + q__2.r, q__1.i = y[i__4].i + q__2.i; - y[i__3].r = q__1.r, y[i__3].i = q__1.i; - l = 1 - j; - ix = jx; - iy = jy; -/* Computing MIN */ - i__4 = *n, i__2 = j + *k; - i__3 = min(i__4,i__2); - for (i__ = j + 1; i__ <= i__3; ++i__) { - ix += *incx; - iy += *incy; - i__4 = iy; - i__2 = iy; - i__5 = l + i__ + j * a_dim1; - q__2.r = temp1.r * a[i__5].r - temp1.i * a[i__5].i, - q__2.i = temp1.r * a[i__5].i + temp1.i * a[i__5] - .r; - q__1.r = y[i__2].r + q__2.r, q__1.i = y[i__2].i + q__2.i; - y[i__4].r = q__1.r, y[i__4].i = q__1.i; - r_cnjg(&q__3, &a[l + i__ + j * a_dim1]); - i__4 = ix; - q__2.r = q__3.r * x[i__4].r - q__3.i * x[i__4].i, q__2.i = - q__3.r * x[i__4].i + q__3.i * x[i__4].r; - q__1.r = temp2.r + q__2.r, q__1.i = temp2.i + q__2.i; - temp2.r = q__1.r, temp2.i = q__1.i; -/* L110: */ - } - i__3 = jy; - i__4 = jy; - q__2.r = alpha->r * temp2.r - alpha->i * temp2.i, q__2.i = - alpha->r * temp2.i + alpha->i * temp2.r; - q__1.r = y[i__4].r + q__2.r, q__1.i = y[i__4].i + q__2.i; - y[i__3].r = q__1.r, y[i__3].i = q__1.i; - jx += *incx; - jy += *incy; -/* L120: */ - } - } + jx = kx; + jy = ky; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__3 = jx; + q__1.r = alpha->r * x[i__3].r - alpha->i * x[i__3].i, q__1.i = alpha->r * x[i__3].i + alpha->i * x[i__3].r; + temp1.r = q__1.r, temp1.i = q__1.i; + temp2.r = 0.f, temp2.i = 0.f; + i__3 = jy; + i__4 = jy; + i__2 = j * a_dim1 + 1; + r__1 = a[i__2].r; + q__2.r = r__1 * temp1.r, q__2.i = r__1 * temp1.i; + q__1.r = y[i__4].r + q__2.r, q__1.i = y[i__4].i + q__2.i; + y[i__3].r = q__1.r, y[i__3].i = q__1.i; + l = 1 - j; + ix = jx; + iy = jy; + /* Computing MIN */ + i__4 = *n, i__2 = j + *k; + i__3 = min(i__4, i__2); + for (i__ = j + 1; i__ <= i__3; ++i__) { + ix += *incx; + iy += *incy; + i__4 = iy; + i__2 = iy; + i__5 = l + i__ + j * a_dim1; + q__2.r = temp1.r * a[i__5].r - temp1.i * a[i__5].i, q__2.i = temp1.r * a[i__5].i + temp1.i * a[i__5].r; + q__1.r = y[i__2].r + q__2.r, q__1.i = y[i__2].i + q__2.i; + y[i__4].r = q__1.r, y[i__4].i = q__1.i; + r_cnjg(&q__3, &a[l + i__ + j * a_dim1]); + i__4 = ix; + q__2.r = q__3.r * x[i__4].r - q__3.i * x[i__4].i, q__2.i = q__3.r * x[i__4].i + q__3.i * x[i__4].r; + q__1.r = temp2.r + q__2.r, q__1.i = temp2.i + q__2.i; + temp2.r = q__1.r, temp2.i = q__1.i; + /* L110: */ + } + i__3 = jy; + i__4 = jy; + q__2.r = alpha->r * temp2.r - alpha->i * temp2.i, q__2.i = alpha->r * temp2.i + alpha->i * temp2.r; + q__1.r = y[i__4].r + q__2.r, q__1.i = y[i__4].i + q__2.i; + y[i__3].r = q__1.r, y[i__3].i = q__1.i; + jx += *incx; + jy += *incy; + /* L120: */ + } } + } - return 0; - -/* End of CHBMV . */ + /* End of CHBMV . */ } /* chbmv_ */ -
diff --git a/blas/f2c/chpmv.c b/blas/f2c/chpmv.c index 65bab1c..4a38a52 100644 --- a/blas/f2c/chpmv.c +++ b/blas/f2c/chpmv.c
@@ -1,438 +1,407 @@ /* chpmv.f -- translated by f2c (version 20100827). You must link the resulting object file with libf2c: - on Microsoft Windows system, link with libf2c.lib; - on Linux or Unix systems, link with .../path/to/libf2c.a -lm - or, if you install libf2c.a in a standard place, with -lf2c -lm - -- in that order, at the end of the command line, as in - cc *.o -lf2c -lm - Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., - http://www.netlib.org/f2c/libf2c.zip + http://www.netlib.org/f2c/libf2c.zip */ #include "datatypes.h" -/* Subroutine */ int chpmv_(char *uplo, integer *n, complex *alpha, complex * - ap, complex *x, integer *incx, complex *beta, complex *y, integer * - incy, ftnlen uplo_len) -{ - /* System generated locals */ - integer i__1, i__2, i__3, i__4, i__5; - real r__1; - complex q__1, q__2, q__3, q__4; +static inline void r_cnjg(complex *r, complex *z) { + r->r = z->r; + r->i = -(z->i); +} - /* Builtin functions */ - void r_cnjg(complex *, complex *); +/* Subroutine */ void chpmv_(char *uplo, integer *n, complex *alpha, complex *ap, complex *x, integer *incx, + complex *beta, complex *y, integer *incy) { + /* System generated locals */ + integer i__1, i__2, i__3, i__4, i__5; + real r__1; + complex q__1, q__2, q__3, q__4; - /* Local variables */ - integer i__, j, k, kk, ix, iy, jx, jy, kx, ky, info; - complex temp1, temp2; - extern logical lsame_(char *, char *, ftnlen, ftnlen); - extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen); + /* Local variables */ + integer i__, j, k, kk, ix, iy, jx, jy, kx, ky, info; + complex temp1, temp2; + extern logical lsame_(char *, char *); + extern /* Subroutine */ void xerbla_(const char *, integer *); -/* .. Scalar Arguments .. */ -/* .. */ -/* .. Array Arguments .. */ -/* .. */ + /* .. Scalar Arguments .. */ + /* .. */ + /* .. Array Arguments .. */ + /* .. */ -/* Purpose */ -/* ======= */ + /* Purpose */ + /* ======= */ -/* CHPMV performs the matrix-vector operation */ + /* CHPMV performs the matrix-vector operation */ -/* y := alpha*A*x + beta*y, */ + /* y := alpha*A*x + beta*y, */ -/* where alpha and beta are scalars, x and y are n element vectors and */ -/* A is an n by n hermitian matrix, supplied in packed form. */ + /* where alpha and beta are scalars, x and y are n element vectors and */ + /* A is an n by n hermitian matrix, supplied in packed form. */ -/* Arguments */ -/* ========== */ + /* Arguments */ + /* ========== */ -/* UPLO - CHARACTER*1. */ -/* On entry, UPLO specifies whether the upper or lower */ -/* triangular part of the matrix A is supplied in the packed */ -/* array AP as follows: */ + /* UPLO - CHARACTER*1. */ + /* On entry, UPLO specifies whether the upper or lower */ + /* triangular part of the matrix A is supplied in the packed */ + /* array AP as follows: */ -/* UPLO = 'U' or 'u' The upper triangular part of A is */ -/* supplied in AP. */ + /* UPLO = 'U' or 'u' The upper triangular part of A is */ + /* supplied in AP. */ -/* UPLO = 'L' or 'l' The lower triangular part of A is */ -/* supplied in AP. */ + /* UPLO = 'L' or 'l' The lower triangular part of A is */ + /* supplied in AP. */ -/* Unchanged on exit. */ + /* Unchanged on exit. */ -/* N - INTEGER. */ -/* On entry, N specifies the order of the matrix A. */ -/* N must be at least zero. */ -/* Unchanged on exit. */ + /* N - INTEGER. */ + /* On entry, N specifies the order of the matrix A. */ + /* N must be at least zero. */ + /* Unchanged on exit. */ -/* ALPHA - COMPLEX . */ -/* On entry, ALPHA specifies the scalar alpha. */ -/* Unchanged on exit. */ + /* ALPHA - COMPLEX . */ + /* On entry, ALPHA specifies the scalar alpha. */ + /* Unchanged on exit. */ -/* AP - COMPLEX array of DIMENSION at least */ -/* ( ( n*( n + 1 ) )/2 ). */ -/* Before entry with UPLO = 'U' or 'u', the array AP must */ -/* contain the upper triangular part of the hermitian matrix */ -/* packed sequentially, column by column, so that AP( 1 ) */ -/* contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 1, 2 ) */ -/* and a( 2, 2 ) respectively, and so on. */ -/* Before entry with UPLO = 'L' or 'l', the array AP must */ -/* contain the lower triangular part of the hermitian matrix */ -/* packed sequentially, column by column, so that AP( 1 ) */ -/* contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 2, 1 ) */ -/* and a( 3, 1 ) respectively, and so on. */ -/* Note that the imaginary parts of the diagonal elements need */ -/* not be set and are assumed to be zero. */ -/* Unchanged on exit. */ + /* AP - COMPLEX array of DIMENSION at least */ + /* ( ( n*( n + 1 ) )/2 ). */ + /* Before entry with UPLO = 'U' or 'u', the array AP must */ + /* contain the upper triangular part of the hermitian matrix */ + /* packed sequentially, column by column, so that AP( 1 ) */ + /* contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 1, 2 ) */ + /* and a( 2, 2 ) respectively, and so on. */ + /* Before entry with UPLO = 'L' or 'l', the array AP must */ + /* contain the lower triangular part of the hermitian matrix */ + /* packed sequentially, column by column, so that AP( 1 ) */ + /* contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 2, 1 ) */ + /* and a( 3, 1 ) respectively, and so on. */ + /* Note that the imaginary parts of the diagonal elements need */ + /* not be set and are assumed to be zero. */ + /* Unchanged on exit. */ -/* X - COMPLEX array of dimension at least */ -/* ( 1 + ( n - 1 )*abs( INCX ) ). */ -/* Before entry, the incremented array X must contain the n */ -/* element vector x. */ -/* Unchanged on exit. */ + /* X - COMPLEX array of dimension at least */ + /* ( 1 + ( n - 1 )*abs( INCX ) ). */ + /* Before entry, the incremented array X must contain the n */ + /* element vector x. */ + /* Unchanged on exit. */ -/* INCX - INTEGER. */ -/* On entry, INCX specifies the increment for the elements of */ -/* X. INCX must not be zero. */ -/* Unchanged on exit. */ + /* INCX - INTEGER. */ + /* On entry, INCX specifies the increment for the elements of */ + /* X. INCX must not be zero. */ + /* Unchanged on exit. */ -/* BETA - COMPLEX . */ -/* On entry, BETA specifies the scalar beta. When BETA is */ -/* supplied as zero then Y need not be set on input. */ -/* Unchanged on exit. */ + /* BETA - COMPLEX . */ + /* On entry, BETA specifies the scalar beta. When BETA is */ + /* supplied as zero then Y need not be set on input. */ + /* Unchanged on exit. */ -/* Y - COMPLEX array of dimension at least */ -/* ( 1 + ( n - 1 )*abs( INCY ) ). */ -/* Before entry, the incremented array Y must contain the n */ -/* element vector y. On exit, Y is overwritten by the updated */ -/* vector y. */ + /* Y - COMPLEX array of dimension at least */ + /* ( 1 + ( n - 1 )*abs( INCY ) ). */ + /* Before entry, the incremented array Y must contain the n */ + /* element vector y. On exit, Y is overwritten by the updated */ + /* vector y. */ -/* INCY - INTEGER. */ -/* On entry, INCY specifies the increment for the elements of */ -/* Y. INCY must not be zero. */ -/* Unchanged on exit. */ + /* INCY - INTEGER. */ + /* On entry, INCY specifies the increment for the elements of */ + /* Y. INCY must not be zero. */ + /* Unchanged on exit. */ -/* Further Details */ -/* =============== */ + /* Further Details */ + /* =============== */ -/* Level 2 Blas routine. */ + /* Level 2 Blas routine. */ -/* -- Written on 22-October-1986. */ -/* Jack Dongarra, Argonne National Lab. */ -/* Jeremy Du Croz, Nag Central Office. */ -/* Sven Hammarling, Nag Central Office. */ -/* Richard Hanson, Sandia National Labs. */ + /* -- Written on 22-October-1986. */ + /* Jack Dongarra, Argonne National Lab. */ + /* Jeremy Du Croz, Nag Central Office. */ + /* Sven Hammarling, Nag Central Office. */ + /* Richard Hanson, Sandia National Labs. */ -/* ===================================================================== */ + /* ===================================================================== */ -/* .. Parameters .. */ -/* .. */ -/* .. Local Scalars .. */ -/* .. */ -/* .. External Functions .. */ -/* .. */ -/* .. External Subroutines .. */ -/* .. */ -/* .. Intrinsic Functions .. */ -/* .. */ + /* .. Parameters .. */ + /* .. */ + /* .. Local Scalars .. */ + /* .. */ + /* .. External Functions .. */ + /* .. */ + /* .. External Subroutines .. */ + /* .. */ + /* .. Intrinsic Functions .. */ + /* .. */ -/* Test the input parameters. */ + /* Test the input parameters. */ - /* Parameter adjustments */ - --y; - --x; - --ap; + /* Parameter adjustments */ + --y; + --x; + --ap; - /* Function Body */ - info = 0; - if (! lsame_(uplo, "U", (ftnlen)1, (ftnlen)1) && ! lsame_(uplo, "L", ( - ftnlen)1, (ftnlen)1)) { - info = 1; - } else if (*n < 0) { - info = 2; - } else if (*incx == 0) { - info = 6; - } else if (*incy == 0) { - info = 9; - } - if (info != 0) { - xerbla_("CHPMV ", &info, (ftnlen)6); - return 0; - } + /* Function Body */ + info = 0; + if (!lsame_(uplo, "U") && !lsame_(uplo, "L")) { + info = 1; + } else if (*n < 0) { + info = 2; + } else if (*incx == 0) { + info = 6; + } else if (*incy == 0) { + info = 9; + } + if (info != 0) { + xerbla_("CHPMV ", &info); + return; + } -/* Quick return if possible. */ + /* Quick return if possible. */ - if (*n == 0 || (alpha->r == 0.f && alpha->i == 0.f && (beta->r == 1.f && - beta->i == 0.f))) { - return 0; - } + if (*n == 0 || (alpha->r == 0.f && alpha->i == 0.f && (beta->r == 1.f && beta->i == 0.f))) { + return; + } -/* Set up the start points in X and Y. */ + /* Set up the start points in X and Y. */ - if (*incx > 0) { - kx = 1; + if (*incx > 0) { + kx = 1; + } else { + kx = 1 - (*n - 1) * *incx; + } + if (*incy > 0) { + ky = 1; + } else { + ky = 1 - (*n - 1) * *incy; + } + + /* Start the operations. In this version the elements of the array AP */ + /* are accessed sequentially with one pass through AP. */ + + /* First form y := beta*y. */ + + if (beta->r != 1.f || beta->i != 0.f) { + if (*incy == 1) { + if (beta->r == 0.f && beta->i == 0.f) { + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = i__; + y[i__2].r = 0.f, y[i__2].i = 0.f; + /* L10: */ + } + } else { + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = i__; + i__3 = i__; + q__1.r = beta->r * y[i__3].r - beta->i * y[i__3].i, q__1.i = beta->r * y[i__3].i + beta->i * y[i__3].r; + y[i__2].r = q__1.r, y[i__2].i = q__1.i; + /* L20: */ + } + } } else { - kx = 1 - (*n - 1) * *incx; + iy = ky; + if (beta->r == 0.f && beta->i == 0.f) { + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = iy; + y[i__2].r = 0.f, y[i__2].i = 0.f; + iy += *incy; + /* L30: */ + } + } else { + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = iy; + i__3 = iy; + q__1.r = beta->r * y[i__3].r - beta->i * y[i__3].i, q__1.i = beta->r * y[i__3].i + beta->i * y[i__3].r; + y[i__2].r = q__1.r, y[i__2].i = q__1.i; + iy += *incy; + /* L40: */ + } + } } - if (*incy > 0) { - ky = 1; + } + if (alpha->r == 0.f && alpha->i == 0.f) { + return; + } + kk = 1; + if (lsame_(uplo, "U")) { + /* Form y when AP contains the upper triangle. */ + + if (*incx == 1 && *incy == 1) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = j; + q__1.r = alpha->r * x[i__2].r - alpha->i * x[i__2].i, q__1.i = alpha->r * x[i__2].i + alpha->i * x[i__2].r; + temp1.r = q__1.r, temp1.i = q__1.i; + temp2.r = 0.f, temp2.i = 0.f; + k = kk; + i__2 = j - 1; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__; + i__4 = i__; + i__5 = k; + q__2.r = temp1.r * ap[i__5].r - temp1.i * ap[i__5].i, q__2.i = temp1.r * ap[i__5].i + temp1.i * ap[i__5].r; + q__1.r = y[i__4].r + q__2.r, q__1.i = y[i__4].i + q__2.i; + y[i__3].r = q__1.r, y[i__3].i = q__1.i; + r_cnjg(&q__3, &ap[k]); + i__3 = i__; + q__2.r = q__3.r * x[i__3].r - q__3.i * x[i__3].i, q__2.i = q__3.r * x[i__3].i + q__3.i * x[i__3].r; + q__1.r = temp2.r + q__2.r, q__1.i = temp2.i + q__2.i; + temp2.r = q__1.r, temp2.i = q__1.i; + ++k; + /* L50: */ + } + i__2 = j; + i__3 = j; + i__4 = kk + j - 1; + r__1 = ap[i__4].r; + q__3.r = r__1 * temp1.r, q__3.i = r__1 * temp1.i; + q__2.r = y[i__3].r + q__3.r, q__2.i = y[i__3].i + q__3.i; + q__4.r = alpha->r * temp2.r - alpha->i * temp2.i, q__4.i = alpha->r * temp2.i + alpha->i * temp2.r; + q__1.r = q__2.r + q__4.r, q__1.i = q__2.i + q__4.i; + y[i__2].r = q__1.r, y[i__2].i = q__1.i; + kk += j; + /* L60: */ + } } else { - ky = 1 - (*n - 1) * *incy; + jx = kx; + jy = ky; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = jx; + q__1.r = alpha->r * x[i__2].r - alpha->i * x[i__2].i, q__1.i = alpha->r * x[i__2].i + alpha->i * x[i__2].r; + temp1.r = q__1.r, temp1.i = q__1.i; + temp2.r = 0.f, temp2.i = 0.f; + ix = kx; + iy = ky; + i__2 = kk + j - 2; + for (k = kk; k <= i__2; ++k) { + i__3 = iy; + i__4 = iy; + i__5 = k; + q__2.r = temp1.r * ap[i__5].r - temp1.i * ap[i__5].i, q__2.i = temp1.r * ap[i__5].i + temp1.i * ap[i__5].r; + q__1.r = y[i__4].r + q__2.r, q__1.i = y[i__4].i + q__2.i; + y[i__3].r = q__1.r, y[i__3].i = q__1.i; + r_cnjg(&q__3, &ap[k]); + i__3 = ix; + q__2.r = q__3.r * x[i__3].r - q__3.i * x[i__3].i, q__2.i = q__3.r * x[i__3].i + q__3.i * x[i__3].r; + q__1.r = temp2.r + q__2.r, q__1.i = temp2.i + q__2.i; + temp2.r = q__1.r, temp2.i = q__1.i; + ix += *incx; + iy += *incy; + /* L70: */ + } + i__2 = jy; + i__3 = jy; + i__4 = kk + j - 1; + r__1 = ap[i__4].r; + q__3.r = r__1 * temp1.r, q__3.i = r__1 * temp1.i; + q__2.r = y[i__3].r + q__3.r, q__2.i = y[i__3].i + q__3.i; + q__4.r = alpha->r * temp2.r - alpha->i * temp2.i, q__4.i = alpha->r * temp2.i + alpha->i * temp2.r; + q__1.r = q__2.r + q__4.r, q__1.i = q__2.i + q__4.i; + y[i__2].r = q__1.r, y[i__2].i = q__1.i; + jx += *incx; + jy += *incy; + kk += j; + /* L80: */ + } } + } else { + /* Form y when AP contains the lower triangle. */ -/* Start the operations. In this version the elements of the array AP */ -/* are accessed sequentially with one pass through AP. */ - -/* First form y := beta*y. */ - - if (beta->r != 1.f || beta->i != 0.f) { - if (*incy == 1) { - if (beta->r == 0.f && beta->i == 0.f) { - i__1 = *n; - for (i__ = 1; i__ <= i__1; ++i__) { - i__2 = i__; - y[i__2].r = 0.f, y[i__2].i = 0.f; -/* L10: */ - } - } else { - i__1 = *n; - for (i__ = 1; i__ <= i__1; ++i__) { - i__2 = i__; - i__3 = i__; - q__1.r = beta->r * y[i__3].r - beta->i * y[i__3].i, - q__1.i = beta->r * y[i__3].i + beta->i * y[i__3] - .r; - y[i__2].r = q__1.r, y[i__2].i = q__1.i; -/* L20: */ - } - } - } else { - iy = ky; - if (beta->r == 0.f && beta->i == 0.f) { - i__1 = *n; - for (i__ = 1; i__ <= i__1; ++i__) { - i__2 = iy; - y[i__2].r = 0.f, y[i__2].i = 0.f; - iy += *incy; -/* L30: */ - } - } else { - i__1 = *n; - for (i__ = 1; i__ <= i__1; ++i__) { - i__2 = iy; - i__3 = iy; - q__1.r = beta->r * y[i__3].r - beta->i * y[i__3].i, - q__1.i = beta->r * y[i__3].i + beta->i * y[i__3] - .r; - y[i__2].r = q__1.r, y[i__2].i = q__1.i; - iy += *incy; -/* L40: */ - } - } - } - } - if (alpha->r == 0.f && alpha->i == 0.f) { - return 0; - } - kk = 1; - if (lsame_(uplo, "U", (ftnlen)1, (ftnlen)1)) { - -/* Form y when AP contains the upper triangle. */ - - if (*incx == 1 && *incy == 1) { - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - i__2 = j; - q__1.r = alpha->r * x[i__2].r - alpha->i * x[i__2].i, q__1.i = - alpha->r * x[i__2].i + alpha->i * x[i__2].r; - temp1.r = q__1.r, temp1.i = q__1.i; - temp2.r = 0.f, temp2.i = 0.f; - k = kk; - i__2 = j - 1; - for (i__ = 1; i__ <= i__2; ++i__) { - i__3 = i__; - i__4 = i__; - i__5 = k; - q__2.r = temp1.r * ap[i__5].r - temp1.i * ap[i__5].i, - q__2.i = temp1.r * ap[i__5].i + temp1.i * ap[i__5] - .r; - q__1.r = y[i__4].r + q__2.r, q__1.i = y[i__4].i + q__2.i; - y[i__3].r = q__1.r, y[i__3].i = q__1.i; - r_cnjg(&q__3, &ap[k]); - i__3 = i__; - q__2.r = q__3.r * x[i__3].r - q__3.i * x[i__3].i, q__2.i = - q__3.r * x[i__3].i + q__3.i * x[i__3].r; - q__1.r = temp2.r + q__2.r, q__1.i = temp2.i + q__2.i; - temp2.r = q__1.r, temp2.i = q__1.i; - ++k; -/* L50: */ - } - i__2 = j; - i__3 = j; - i__4 = kk + j - 1; - r__1 = ap[i__4].r; - q__3.r = r__1 * temp1.r, q__3.i = r__1 * temp1.i; - q__2.r = y[i__3].r + q__3.r, q__2.i = y[i__3].i + q__3.i; - q__4.r = alpha->r * temp2.r - alpha->i * temp2.i, q__4.i = - alpha->r * temp2.i + alpha->i * temp2.r; - q__1.r = q__2.r + q__4.r, q__1.i = q__2.i + q__4.i; - y[i__2].r = q__1.r, y[i__2].i = q__1.i; - kk += j; -/* L60: */ - } - } else { - jx = kx; - jy = ky; - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - i__2 = jx; - q__1.r = alpha->r * x[i__2].r - alpha->i * x[i__2].i, q__1.i = - alpha->r * x[i__2].i + alpha->i * x[i__2].r; - temp1.r = q__1.r, temp1.i = q__1.i; - temp2.r = 0.f, temp2.i = 0.f; - ix = kx; - iy = ky; - i__2 = kk + j - 2; - for (k = kk; k <= i__2; ++k) { - i__3 = iy; - i__4 = iy; - i__5 = k; - q__2.r = temp1.r * ap[i__5].r - temp1.i * ap[i__5].i, - q__2.i = temp1.r * ap[i__5].i + temp1.i * ap[i__5] - .r; - q__1.r = y[i__4].r + q__2.r, q__1.i = y[i__4].i + q__2.i; - y[i__3].r = q__1.r, y[i__3].i = q__1.i; - r_cnjg(&q__3, &ap[k]); - i__3 = ix; - q__2.r = q__3.r * x[i__3].r - q__3.i * x[i__3].i, q__2.i = - q__3.r * x[i__3].i + q__3.i * x[i__3].r; - q__1.r = temp2.r + q__2.r, q__1.i = temp2.i + q__2.i; - temp2.r = q__1.r, temp2.i = q__1.i; - ix += *incx; - iy += *incy; -/* L70: */ - } - i__2 = jy; - i__3 = jy; - i__4 = kk + j - 1; - r__1 = ap[i__4].r; - q__3.r = r__1 * temp1.r, q__3.i = r__1 * temp1.i; - q__2.r = y[i__3].r + q__3.r, q__2.i = y[i__3].i + q__3.i; - q__4.r = alpha->r * temp2.r - alpha->i * temp2.i, q__4.i = - alpha->r * temp2.i + alpha->i * temp2.r; - q__1.r = q__2.r + q__4.r, q__1.i = q__2.i + q__4.i; - y[i__2].r = q__1.r, y[i__2].i = q__1.i; - jx += *incx; - jy += *incy; - kk += j; -/* L80: */ - } - } + if (*incx == 1 && *incy == 1) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = j; + q__1.r = alpha->r * x[i__2].r - alpha->i * x[i__2].i, q__1.i = alpha->r * x[i__2].i + alpha->i * x[i__2].r; + temp1.r = q__1.r, temp1.i = q__1.i; + temp2.r = 0.f, temp2.i = 0.f; + i__2 = j; + i__3 = j; + i__4 = kk; + r__1 = ap[i__4].r; + q__2.r = r__1 * temp1.r, q__2.i = r__1 * temp1.i; + q__1.r = y[i__3].r + q__2.r, q__1.i = y[i__3].i + q__2.i; + y[i__2].r = q__1.r, y[i__2].i = q__1.i; + k = kk + 1; + i__2 = *n; + for (i__ = j + 1; i__ <= i__2; ++i__) { + i__3 = i__; + i__4 = i__; + i__5 = k; + q__2.r = temp1.r * ap[i__5].r - temp1.i * ap[i__5].i, q__2.i = temp1.r * ap[i__5].i + temp1.i * ap[i__5].r; + q__1.r = y[i__4].r + q__2.r, q__1.i = y[i__4].i + q__2.i; + y[i__3].r = q__1.r, y[i__3].i = q__1.i; + r_cnjg(&q__3, &ap[k]); + i__3 = i__; + q__2.r = q__3.r * x[i__3].r - q__3.i * x[i__3].i, q__2.i = q__3.r * x[i__3].i + q__3.i * x[i__3].r; + q__1.r = temp2.r + q__2.r, q__1.i = temp2.i + q__2.i; + temp2.r = q__1.r, temp2.i = q__1.i; + ++k; + /* L90: */ + } + i__2 = j; + i__3 = j; + q__2.r = alpha->r * temp2.r - alpha->i * temp2.i, q__2.i = alpha->r * temp2.i + alpha->i * temp2.r; + q__1.r = y[i__3].r + q__2.r, q__1.i = y[i__3].i + q__2.i; + y[i__2].r = q__1.r, y[i__2].i = q__1.i; + kk += *n - j + 1; + /* L100: */ + } } else { - -/* Form y when AP contains the lower triangle. */ - - if (*incx == 1 && *incy == 1) { - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - i__2 = j; - q__1.r = alpha->r * x[i__2].r - alpha->i * x[i__2].i, q__1.i = - alpha->r * x[i__2].i + alpha->i * x[i__2].r; - temp1.r = q__1.r, temp1.i = q__1.i; - temp2.r = 0.f, temp2.i = 0.f; - i__2 = j; - i__3 = j; - i__4 = kk; - r__1 = ap[i__4].r; - q__2.r = r__1 * temp1.r, q__2.i = r__1 * temp1.i; - q__1.r = y[i__3].r + q__2.r, q__1.i = y[i__3].i + q__2.i; - y[i__2].r = q__1.r, y[i__2].i = q__1.i; - k = kk + 1; - i__2 = *n; - for (i__ = j + 1; i__ <= i__2; ++i__) { - i__3 = i__; - i__4 = i__; - i__5 = k; - q__2.r = temp1.r * ap[i__5].r - temp1.i * ap[i__5].i, - q__2.i = temp1.r * ap[i__5].i + temp1.i * ap[i__5] - .r; - q__1.r = y[i__4].r + q__2.r, q__1.i = y[i__4].i + q__2.i; - y[i__3].r = q__1.r, y[i__3].i = q__1.i; - r_cnjg(&q__3, &ap[k]); - i__3 = i__; - q__2.r = q__3.r * x[i__3].r - q__3.i * x[i__3].i, q__2.i = - q__3.r * x[i__3].i + q__3.i * x[i__3].r; - q__1.r = temp2.r + q__2.r, q__1.i = temp2.i + q__2.i; - temp2.r = q__1.r, temp2.i = q__1.i; - ++k; -/* L90: */ - } - i__2 = j; - i__3 = j; - q__2.r = alpha->r * temp2.r - alpha->i * temp2.i, q__2.i = - alpha->r * temp2.i + alpha->i * temp2.r; - q__1.r = y[i__3].r + q__2.r, q__1.i = y[i__3].i + q__2.i; - y[i__2].r = q__1.r, y[i__2].i = q__1.i; - kk += *n - j + 1; -/* L100: */ - } - } else { - jx = kx; - jy = ky; - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - i__2 = jx; - q__1.r = alpha->r * x[i__2].r - alpha->i * x[i__2].i, q__1.i = - alpha->r * x[i__2].i + alpha->i * x[i__2].r; - temp1.r = q__1.r, temp1.i = q__1.i; - temp2.r = 0.f, temp2.i = 0.f; - i__2 = jy; - i__3 = jy; - i__4 = kk; - r__1 = ap[i__4].r; - q__2.r = r__1 * temp1.r, q__2.i = r__1 * temp1.i; - q__1.r = y[i__3].r + q__2.r, q__1.i = y[i__3].i + q__2.i; - y[i__2].r = q__1.r, y[i__2].i = q__1.i; - ix = jx; - iy = jy; - i__2 = kk + *n - j; - for (k = kk + 1; k <= i__2; ++k) { - ix += *incx; - iy += *incy; - i__3 = iy; - i__4 = iy; - i__5 = k; - q__2.r = temp1.r * ap[i__5].r - temp1.i * ap[i__5].i, - q__2.i = temp1.r * ap[i__5].i + temp1.i * ap[i__5] - .r; - q__1.r = y[i__4].r + q__2.r, q__1.i = y[i__4].i + q__2.i; - y[i__3].r = q__1.r, y[i__3].i = q__1.i; - r_cnjg(&q__3, &ap[k]); - i__3 = ix; - q__2.r = q__3.r * x[i__3].r - q__3.i * x[i__3].i, q__2.i = - q__3.r * x[i__3].i + q__3.i * x[i__3].r; - q__1.r = temp2.r + q__2.r, q__1.i = temp2.i + q__2.i; - temp2.r = q__1.r, temp2.i = q__1.i; -/* L110: */ - } - i__2 = jy; - i__3 = jy; - q__2.r = alpha->r * temp2.r - alpha->i * temp2.i, q__2.i = - alpha->r * temp2.i + alpha->i * temp2.r; - q__1.r = y[i__3].r + q__2.r, q__1.i = y[i__3].i + q__2.i; - y[i__2].r = q__1.r, y[i__2].i = q__1.i; - jx += *incx; - jy += *incy; - kk += *n - j + 1; -/* L120: */ - } - } + jx = kx; + jy = ky; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = jx; + q__1.r = alpha->r * x[i__2].r - alpha->i * x[i__2].i, q__1.i = alpha->r * x[i__2].i + alpha->i * x[i__2].r; + temp1.r = q__1.r, temp1.i = q__1.i; + temp2.r = 0.f, temp2.i = 0.f; + i__2 = jy; + i__3 = jy; + i__4 = kk; + r__1 = ap[i__4].r; + q__2.r = r__1 * temp1.r, q__2.i = r__1 * temp1.i; + q__1.r = y[i__3].r + q__2.r, q__1.i = y[i__3].i + q__2.i; + y[i__2].r = q__1.r, y[i__2].i = q__1.i; + ix = jx; + iy = jy; + i__2 = kk + *n - j; + for (k = kk + 1; k <= i__2; ++k) { + ix += *incx; + iy += *incy; + i__3 = iy; + i__4 = iy; + i__5 = k; + q__2.r = temp1.r * ap[i__5].r - temp1.i * ap[i__5].i, q__2.i = temp1.r * ap[i__5].i + temp1.i * ap[i__5].r; + q__1.r = y[i__4].r + q__2.r, q__1.i = y[i__4].i + q__2.i; + y[i__3].r = q__1.r, y[i__3].i = q__1.i; + r_cnjg(&q__3, &ap[k]); + i__3 = ix; + q__2.r = q__3.r * x[i__3].r - q__3.i * x[i__3].i, q__2.i = q__3.r * x[i__3].i + q__3.i * x[i__3].r; + q__1.r = temp2.r + q__2.r, q__1.i = temp2.i + q__2.i; + temp2.r = q__1.r, temp2.i = q__1.i; + /* L110: */ + } + i__2 = jy; + i__3 = jy; + q__2.r = alpha->r * temp2.r - alpha->i * temp2.i, q__2.i = alpha->r * temp2.i + alpha->i * temp2.r; + q__1.r = y[i__3].r + q__2.r, q__1.i = y[i__3].i + q__2.i; + y[i__2].r = q__1.r, y[i__2].i = q__1.i; + jx += *incx; + jy += *incy; + kk += *n - j + 1; + /* L120: */ + } } + } - return 0; - -/* End of CHPMV . */ + /* End of CHPMV . */ } /* chpmv_ */ -
diff --git a/blas/f2c/complexdots.c b/blas/f2c/complexdots.c index a856a23..b0dc6c8 100644 --- a/blas/f2c/complexdots.c +++ b/blas/f2c/complexdots.c
@@ -6,79 +6,68 @@ /* complexdots.f -- translated by f2c (version 20100827). You must link the resulting object file with libf2c: - on Microsoft Windows system, link with libf2c.lib; - on Linux or Unix systems, link with .../path/to/libf2c.a -lm - or, if you install libf2c.a in a standard place, with -lf2c -lm - -- in that order, at the end of the command line, as in - cc *.o -lf2c -lm - Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., - http://www.netlib.org/f2c/libf2c.zip + http://www.netlib.org/f2c/libf2c.zip */ #include "datatypes.h" -complex cdotc_(integer *n, complex *cx, integer - *incx, complex *cy, integer *incy) -{ - complex res; - extern /* Subroutine */ int cdotcw_(integer *, complex *, integer *, - complex *, integer *, complex *); +complex cdotc_(integer *n, complex *cx, integer *incx, complex *cy, integer *incy) { + complex res; + extern /* Subroutine */ void cdotcw_(integer *, complex *, integer *, complex *, integer *, complex *); - /* Parameter adjustments */ - --cy; - --cx; + /* Parameter adjustments */ + --cy; + --cx; - /* Function Body */ - cdotcw_(n, &cx[1], incx, &cy[1], incy, &res); - return res; + /* Function Body */ + cdotcw_(n, &cx[1], incx, &cy[1], incy, &res); + return res; } /* cdotc_ */ -complex cdotu_(integer *n, complex *cx, integer - *incx, complex *cy, integer *incy) -{ - complex res; - extern /* Subroutine */ int cdotuw_(integer *, complex *, integer *, - complex *, integer *, complex *); +complex cdotu_(integer *n, complex *cx, integer *incx, complex *cy, integer *incy) { + complex res; + extern /* Subroutine */ void cdotuw_(integer *, complex *, integer *, complex *, integer *, complex *); - /* Parameter adjustments */ - --cy; - --cx; + /* Parameter adjustments */ + --cy; + --cx; - /* Function Body */ - cdotuw_(n, &cx[1], incx, &cy[1], incy, &res); - return res; + /* Function Body */ + cdotuw_(n, &cx[1], incx, &cy[1], incy, &res); + return res; } /* cdotu_ */ -doublecomplex zdotc_(integer *n, doublecomplex *cx, integer *incx, - doublecomplex *cy, integer *incy) -{ - doublecomplex res; - extern /* Subroutine */ int zdotcw_(integer *, doublecomplex *, integer *, - doublecomplex *, integer *, doublecomplex *); +doublecomplex zdotc_(integer *n, doublecomplex *cx, integer *incx, doublecomplex *cy, integer *incy) { + doublecomplex res; + extern /* Subroutine */ void zdotcw_(integer *, doublecomplex *, integer *, doublecomplex *, integer *, + doublecomplex *); - /* Parameter adjustments */ - --cy; - --cx; + /* Parameter adjustments */ + --cy; + --cx; - /* Function Body */ - zdotcw_(n, &cx[1], incx, &cy[1], incy, &res); - return res; + /* Function Body */ + zdotcw_(n, &cx[1], incx, &cy[1], incy, &res); + return res; } /* zdotc_ */ -doublecomplex zdotu_(integer *n, doublecomplex *cx, integer *incx, - doublecomplex *cy, integer *incy) -{ - doublecomplex res; - extern /* Subroutine */ int zdotuw_(integer *, doublecomplex *, integer *, - doublecomplex *, integer *, doublecomplex *); +doublecomplex zdotu_(integer *n, doublecomplex *cx, integer *incx, doublecomplex *cy, integer *incy) { + doublecomplex res; + extern /* Subroutine */ void zdotuw_(integer *, doublecomplex *, integer *, doublecomplex *, integer *, + doublecomplex *); - /* Parameter adjustments */ - --cy; - --cx; + /* Parameter adjustments */ + --cy; + --cx; - /* Function Body */ - zdotuw_(n, &cx[1], incx, &cy[1], incy, &res); - return res; + /* Function Body */ + zdotuw_(n, &cx[1], incx, &cy[1], incy, &res); + return res; } /* zdotu_ */ -
diff --git a/blas/f2c/ctbmv.c b/blas/f2c/ctbmv.c index a6e0dae..2c0ce9b 100644 --- a/blas/f2c/ctbmv.c +++ b/blas/f2c/ctbmv.c
@@ -1,647 +1,586 @@ /* ctbmv.f -- translated by f2c (version 20100827). You must link the resulting object file with libf2c: - on Microsoft Windows system, link with libf2c.lib; - on Linux or Unix systems, link with .../path/to/libf2c.a -lm - or, if you install libf2c.a in a standard place, with -lf2c -lm - -- in that order, at the end of the command line, as in - cc *.o -lf2c -lm - Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., - http://www.netlib.org/f2c/libf2c.zip + http://www.netlib.org/f2c/libf2c.zip */ #include "datatypes.h" -/* Subroutine */ int ctbmv_(char *uplo, char *trans, char *diag, integer *n, - integer *k, complex *a, integer *lda, complex *x, integer *incx, - ftnlen uplo_len, ftnlen trans_len, ftnlen diag_len) -{ - /* System generated locals */ - integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5; - complex q__1, q__2, q__3; +static inline void r_cnjg(complex *r, complex *z) { + r->r = z->r; + r->i = -(z->i); +} - /* Builtin functions */ - void r_cnjg(complex *, complex *); +/* Subroutine */ void ctbmv_(char *uplo, char *trans, char *diag, integer *n, integer *k, complex *a, integer *lda, + complex *x, integer *incx) { + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5; + complex q__1, q__2, q__3; - /* Local variables */ - integer i__, j, l, ix, jx, kx, info; - complex temp; - extern logical lsame_(char *, char *, ftnlen, ftnlen); - integer kplus1; - extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen); - logical noconj, nounit; + /* Local variables */ + integer i__, j, l, ix, jx, kx, info; + complex temp; + extern logical lsame_(char *, char *); + integer kplus1; + extern /* Subroutine */ void xerbla_(const char *, integer *); + logical noconj, nounit; -/* .. Scalar Arguments .. */ -/* .. */ -/* .. Array Arguments .. */ -/* .. */ + /* .. Scalar Arguments .. */ + /* .. */ + /* .. Array Arguments .. */ + /* .. */ -/* Purpose */ -/* ======= */ + /* Purpose */ + /* ======= */ -/* CTBMV performs one of the matrix-vector operations */ + /* CTBMV performs one of the matrix-vector operations */ -/* x := A*x, or x := A'*x, or x := conjg( A' )*x, */ + /* x := A*x, or x := A'*x, or x := conjg( A' )*x, */ -/* where x is an n element vector and A is an n by n unit, or non-unit, */ -/* upper or lower triangular band matrix, with ( k + 1 ) diagonals. */ + /* where x is an n element vector and A is an n by n unit, or non-unit, */ + /* upper or lower triangular band matrix, with ( k + 1 ) diagonals. */ -/* Arguments */ -/* ========== */ + /* Arguments */ + /* ========== */ -/* UPLO - CHARACTER*1. */ -/* On entry, UPLO specifies whether the matrix is an upper or */ -/* lower triangular matrix as follows: */ + /* UPLO - CHARACTER*1. */ + /* On entry, UPLO specifies whether the matrix is an upper or */ + /* lower triangular matrix as follows: */ -/* UPLO = 'U' or 'u' A is an upper triangular matrix. */ + /* UPLO = 'U' or 'u' A is an upper triangular matrix. */ -/* UPLO = 'L' or 'l' A is a lower triangular matrix. */ + /* UPLO = 'L' or 'l' A is a lower triangular matrix. */ -/* Unchanged on exit. */ + /* Unchanged on exit. */ -/* TRANS - CHARACTER*1. */ -/* On entry, TRANS specifies the operation to be performed as */ -/* follows: */ + /* TRANS - CHARACTER*1. */ + /* On entry, TRANS specifies the operation to be performed as */ + /* follows: */ -/* TRANS = 'N' or 'n' x := A*x. */ + /* TRANS = 'N' or 'n' x := A*x. */ -/* TRANS = 'T' or 't' x := A'*x. */ + /* TRANS = 'T' or 't' x := A'*x. */ -/* TRANS = 'C' or 'c' x := conjg( A' )*x. */ + /* TRANS = 'C' or 'c' x := conjg( A' )*x. */ -/* Unchanged on exit. */ + /* Unchanged on exit. */ -/* DIAG - CHARACTER*1. */ -/* On entry, DIAG specifies whether or not A is unit */ -/* triangular as follows: */ + /* DIAG - CHARACTER*1. */ + /* On entry, DIAG specifies whether or not A is unit */ + /* triangular as follows: */ -/* DIAG = 'U' or 'u' A is assumed to be unit triangular. */ + /* DIAG = 'U' or 'u' A is assumed to be unit triangular. */ -/* DIAG = 'N' or 'n' A is not assumed to be unit */ -/* triangular. */ + /* DIAG = 'N' or 'n' A is not assumed to be unit */ + /* triangular. */ -/* Unchanged on exit. */ + /* Unchanged on exit. */ -/* N - INTEGER. */ -/* On entry, N specifies the order of the matrix A. */ -/* N must be at least zero. */ -/* Unchanged on exit. */ + /* N - INTEGER. */ + /* On entry, N specifies the order of the matrix A. */ + /* N must be at least zero. */ + /* Unchanged on exit. */ -/* K - INTEGER. */ -/* On entry with UPLO = 'U' or 'u', K specifies the number of */ -/* super-diagonals of the matrix A. */ -/* On entry with UPLO = 'L' or 'l', K specifies the number of */ -/* sub-diagonals of the matrix A. */ -/* K must satisfy 0 .le. K. */ -/* Unchanged on exit. */ + /* K - INTEGER. */ + /* On entry with UPLO = 'U' or 'u', K specifies the number of */ + /* super-diagonals of the matrix A. */ + /* On entry with UPLO = 'L' or 'l', K specifies the number of */ + /* sub-diagonals of the matrix A. */ + /* K must satisfy 0 .le. K. */ + /* Unchanged on exit. */ -/* A - COMPLEX array of DIMENSION ( LDA, n ). */ -/* Before entry with UPLO = 'U' or 'u', the leading ( k + 1 ) */ -/* by n part of the array A must contain the upper triangular */ -/* band part of the matrix of coefficients, supplied column by */ -/* column, with the leading diagonal of the matrix in row */ -/* ( k + 1 ) of the array, the first super-diagonal starting at */ -/* position 2 in row k, and so on. The top left k by k triangle */ -/* of the array A is not referenced. */ -/* The following program segment will transfer an upper */ -/* triangular band matrix from conventional full matrix storage */ -/* to band storage: */ + /* A - COMPLEX array of DIMENSION ( LDA, n ). */ + /* Before entry with UPLO = 'U' or 'u', the leading ( k + 1 ) */ + /* by n part of the array A must contain the upper triangular */ + /* band part of the matrix of coefficients, supplied column by */ + /* column, with the leading diagonal of the matrix in row */ + /* ( k + 1 ) of the array, the first super-diagonal starting at */ + /* position 2 in row k, and so on. The top left k by k triangle */ + /* of the array A is not referenced. */ + /* The following program segment will transfer an upper */ + /* triangular band matrix from conventional full matrix storage */ + /* to band storage: */ -/* DO 20, J = 1, N */ -/* M = K + 1 - J */ -/* DO 10, I = MAX( 1, J - K ), J */ -/* A( M + I, J ) = matrix( I, J ) */ -/* 10 CONTINUE */ -/* 20 CONTINUE */ + /* DO 20, J = 1, N */ + /* M = K + 1 - J */ + /* DO 10, I = MAX( 1, J - K ), J */ + /* A( M + I, J ) = matrix( I, J ) */ + /* 10 CONTINUE */ + /* 20 CONTINUE */ -/* Before entry with UPLO = 'L' or 'l', the leading ( k + 1 ) */ -/* by n part of the array A must contain the lower triangular */ -/* band part of the matrix of coefficients, supplied column by */ -/* column, with the leading diagonal of the matrix in row 1 of */ -/* the array, the first sub-diagonal starting at position 1 in */ -/* row 2, and so on. The bottom right k by k triangle of the */ -/* array A is not referenced. */ -/* The following program segment will transfer a lower */ -/* triangular band matrix from conventional full matrix storage */ -/* to band storage: */ + /* Before entry with UPLO = 'L' or 'l', the leading ( k + 1 ) */ + /* by n part of the array A must contain the lower triangular */ + /* band part of the matrix of coefficients, supplied column by */ + /* column, with the leading diagonal of the matrix in row 1 of */ + /* the array, the first sub-diagonal starting at position 1 in */ + /* row 2, and so on. The bottom right k by k triangle of the */ + /* array A is not referenced. */ + /* The following program segment will transfer a lower */ + /* triangular band matrix from conventional full matrix storage */ + /* to band storage: */ -/* DO 20, J = 1, N */ -/* M = 1 - J */ -/* DO 10, I = J, MIN( N, J + K ) */ -/* A( M + I, J ) = matrix( I, J ) */ -/* 10 CONTINUE */ -/* 20 CONTINUE */ + /* DO 20, J = 1, N */ + /* M = 1 - J */ + /* DO 10, I = J, MIN( N, J + K ) */ + /* A( M + I, J ) = matrix( I, J ) */ + /* 10 CONTINUE */ + /* 20 CONTINUE */ -/* Note that when DIAG = 'U' or 'u' the elements of the array A */ -/* corresponding to the diagonal elements of the matrix are not */ -/* referenced, but are assumed to be unity. */ -/* Unchanged on exit. */ + /* Note that when DIAG = 'U' or 'u' the elements of the array A */ + /* corresponding to the diagonal elements of the matrix are not */ + /* referenced, but are assumed to be unity. */ + /* Unchanged on exit. */ -/* LDA - INTEGER. */ -/* On entry, LDA specifies the first dimension of A as declared */ -/* in the calling (sub) program. LDA must be at least */ -/* ( k + 1 ). */ -/* Unchanged on exit. */ + /* LDA - INTEGER. */ + /* On entry, LDA specifies the first dimension of A as declared */ + /* in the calling (sub) program. LDA must be at least */ + /* ( k + 1 ). */ + /* Unchanged on exit. */ -/* X - COMPLEX array of dimension at least */ -/* ( 1 + ( n - 1 )*abs( INCX ) ). */ -/* Before entry, the incremented array X must contain the n */ -/* element vector x. On exit, X is overwritten with the */ -/* transformed vector x. */ + /* X - COMPLEX array of dimension at least */ + /* ( 1 + ( n - 1 )*abs( INCX ) ). */ + /* Before entry, the incremented array X must contain the n */ + /* element vector x. On exit, X is overwritten with the */ + /* transformed vector x. */ -/* INCX - INTEGER. */ -/* On entry, INCX specifies the increment for the elements of */ -/* X. INCX must not be zero. */ -/* Unchanged on exit. */ + /* INCX - INTEGER. */ + /* On entry, INCX specifies the increment for the elements of */ + /* X. INCX must not be zero. */ + /* Unchanged on exit. */ -/* Further Details */ -/* =============== */ + /* Further Details */ + /* =============== */ -/* Level 2 Blas routine. */ + /* Level 2 Blas routine. */ -/* -- Written on 22-October-1986. */ -/* Jack Dongarra, Argonne National Lab. */ -/* Jeremy Du Croz, Nag Central Office. */ -/* Sven Hammarling, Nag Central Office. */ -/* Richard Hanson, Sandia National Labs. */ + /* -- Written on 22-October-1986. */ + /* Jack Dongarra, Argonne National Lab. */ + /* Jeremy Du Croz, Nag Central Office. */ + /* Sven Hammarling, Nag Central Office. */ + /* Richard Hanson, Sandia National Labs. */ -/* ===================================================================== */ + /* ===================================================================== */ -/* .. Parameters .. */ -/* .. */ -/* .. Local Scalars .. */ -/* .. */ -/* .. External Functions .. */ -/* .. */ -/* .. External Subroutines .. */ -/* .. */ -/* .. Intrinsic Functions .. */ -/* .. */ + /* .. Parameters .. */ + /* .. */ + /* .. Local Scalars .. */ + /* .. */ + /* .. External Functions .. */ + /* .. */ + /* .. External Subroutines .. */ + /* .. */ + /* .. Intrinsic Functions .. */ + /* .. */ -/* Test the input parameters. */ + /* Test the input parameters. */ - /* Parameter adjustments */ - a_dim1 = *lda; - a_offset = 1 + a_dim1; - a -= a_offset; - --x; + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --x; - /* Function Body */ - info = 0; - if (! lsame_(uplo, "U", (ftnlen)1, (ftnlen)1) && ! lsame_(uplo, "L", ( - ftnlen)1, (ftnlen)1)) { - info = 1; - } else if (! lsame_(trans, "N", (ftnlen)1, (ftnlen)1) && ! lsame_(trans, - "T", (ftnlen)1, (ftnlen)1) && ! lsame_(trans, "C", (ftnlen)1, ( - ftnlen)1)) { - info = 2; - } else if (! lsame_(diag, "U", (ftnlen)1, (ftnlen)1) && ! lsame_(diag, - "N", (ftnlen)1, (ftnlen)1)) { - info = 3; - } else if (*n < 0) { - info = 4; - } else if (*k < 0) { - info = 5; - } else if (*lda < *k + 1) { - info = 7; - } else if (*incx == 0) { - info = 9; - } - if (info != 0) { - xerbla_("CTBMV ", &info, (ftnlen)6); - return 0; - } + /* Function Body */ + info = 0; + if (!lsame_(uplo, "U") && !lsame_(uplo, "L")) { + info = 1; + } else if (!lsame_(trans, "N") && !lsame_(trans, "T") && !lsame_(trans, "C")) { + info = 2; + } else if (!lsame_(diag, "U") && !lsame_(diag, "N")) { + info = 3; + } else if (*n < 0) { + info = 4; + } else if (*k < 0) { + info = 5; + } else if (*lda < *k + 1) { + info = 7; + } else if (*incx == 0) { + info = 9; + } + if (info != 0) { + xerbla_("CTBMV ", &info); + return; + } -/* Quick return if possible. */ + /* Quick return if possible. */ - if (*n == 0) { - return 0; - } + if (*n == 0) { + return; + } - noconj = lsame_(trans, "T", (ftnlen)1, (ftnlen)1); - nounit = lsame_(diag, "N", (ftnlen)1, (ftnlen)1); + noconj = lsame_(trans, "T"); + nounit = lsame_(diag, "N"); -/* Set up the start point in X if the increment is not unity. This */ -/* will be ( N - 1 )*INCX too small for descending loops. */ + /* Set up the start point in X if the increment is not unity. This */ + /* will be ( N - 1 )*INCX too small for descending loops. */ - if (*incx <= 0) { - kx = 1 - (*n - 1) * *incx; - } else if (*incx != 1) { - kx = 1; - } + if (*incx <= 0) { + kx = 1 - (*n - 1) * *incx; + } else if (*incx != 1) { + kx = 1; + } -/* Start the operations. In this version the elements of A are */ -/* accessed sequentially with one pass through A. */ + /* Start the operations. In this version the elements of A are */ + /* accessed sequentially with one pass through A. */ - if (lsame_(trans, "N", (ftnlen)1, (ftnlen)1)) { + if (lsame_(trans, "N")) { + /* Form x := A*x. */ -/* Form x := A*x. */ - - if (lsame_(uplo, "U", (ftnlen)1, (ftnlen)1)) { - kplus1 = *k + 1; - if (*incx == 1) { - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - i__2 = j; - if (x[i__2].r != 0.f || x[i__2].i != 0.f) { - i__2 = j; - temp.r = x[i__2].r, temp.i = x[i__2].i; - l = kplus1 - j; -/* Computing MAX */ - i__2 = 1, i__3 = j - *k; - i__4 = j - 1; - for (i__ = max(i__2,i__3); i__ <= i__4; ++i__) { - i__2 = i__; - i__3 = i__; - i__5 = l + i__ + j * a_dim1; - q__2.r = temp.r * a[i__5].r - temp.i * a[i__5].i, - q__2.i = temp.r * a[i__5].i + temp.i * a[ - i__5].r; - q__1.r = x[i__3].r + q__2.r, q__1.i = x[i__3].i + - q__2.i; - x[i__2].r = q__1.r, x[i__2].i = q__1.i; -/* L10: */ - } - if (nounit) { - i__4 = j; - i__2 = j; - i__3 = kplus1 + j * a_dim1; - q__1.r = x[i__2].r * a[i__3].r - x[i__2].i * a[ - i__3].i, q__1.i = x[i__2].r * a[i__3].i + - x[i__2].i * a[i__3].r; - x[i__4].r = q__1.r, x[i__4].i = q__1.i; - } - } -/* L20: */ - } - } else { - jx = kx; - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - i__4 = jx; - if (x[i__4].r != 0.f || x[i__4].i != 0.f) { - i__4 = jx; - temp.r = x[i__4].r, temp.i = x[i__4].i; - ix = kx; - l = kplus1 - j; -/* Computing MAX */ - i__4 = 1, i__2 = j - *k; - i__3 = j - 1; - for (i__ = max(i__4,i__2); i__ <= i__3; ++i__) { - i__4 = ix; - i__2 = ix; - i__5 = l + i__ + j * a_dim1; - q__2.r = temp.r * a[i__5].r - temp.i * a[i__5].i, - q__2.i = temp.r * a[i__5].i + temp.i * a[ - i__5].r; - q__1.r = x[i__2].r + q__2.r, q__1.i = x[i__2].i + - q__2.i; - x[i__4].r = q__1.r, x[i__4].i = q__1.i; - ix += *incx; -/* L30: */ - } - if (nounit) { - i__3 = jx; - i__4 = jx; - i__2 = kplus1 + j * a_dim1; - q__1.r = x[i__4].r * a[i__2].r - x[i__4].i * a[ - i__2].i, q__1.i = x[i__4].r * a[i__2].i + - x[i__4].i * a[i__2].r; - x[i__3].r = q__1.r, x[i__3].i = q__1.i; - } - } - jx += *incx; - if (j > *k) { - kx += *incx; - } -/* L40: */ - } - } - } else { - if (*incx == 1) { - for (j = *n; j >= 1; --j) { - i__1 = j; - if (x[i__1].r != 0.f || x[i__1].i != 0.f) { - i__1 = j; - temp.r = x[i__1].r, temp.i = x[i__1].i; - l = 1 - j; -/* Computing MIN */ - i__1 = *n, i__3 = j + *k; - i__4 = j + 1; - for (i__ = min(i__1,i__3); i__ >= i__4; --i__) { - i__1 = i__; - i__3 = i__; - i__2 = l + i__ + j * a_dim1; - q__2.r = temp.r * a[i__2].r - temp.i * a[i__2].i, - q__2.i = temp.r * a[i__2].i + temp.i * a[ - i__2].r; - q__1.r = x[i__3].r + q__2.r, q__1.i = x[i__3].i + - q__2.i; - x[i__1].r = q__1.r, x[i__1].i = q__1.i; -/* L50: */ - } - if (nounit) { - i__4 = j; - i__1 = j; - i__3 = j * a_dim1 + 1; - q__1.r = x[i__1].r * a[i__3].r - x[i__1].i * a[ - i__3].i, q__1.i = x[i__1].r * a[i__3].i + - x[i__1].i * a[i__3].r; - x[i__4].r = q__1.r, x[i__4].i = q__1.i; - } - } -/* L60: */ - } - } else { - kx += (*n - 1) * *incx; - jx = kx; - for (j = *n; j >= 1; --j) { - i__4 = jx; - if (x[i__4].r != 0.f || x[i__4].i != 0.f) { - i__4 = jx; - temp.r = x[i__4].r, temp.i = x[i__4].i; - ix = kx; - l = 1 - j; -/* Computing MIN */ - i__4 = *n, i__1 = j + *k; - i__3 = j + 1; - for (i__ = min(i__4,i__1); i__ >= i__3; --i__) { - i__4 = ix; - i__1 = ix; - i__2 = l + i__ + j * a_dim1; - q__2.r = temp.r * a[i__2].r - temp.i * a[i__2].i, - q__2.i = temp.r * a[i__2].i + temp.i * a[ - i__2].r; - q__1.r = x[i__1].r + q__2.r, q__1.i = x[i__1].i + - q__2.i; - x[i__4].r = q__1.r, x[i__4].i = q__1.i; - ix -= *incx; -/* L70: */ - } - if (nounit) { - i__3 = jx; - i__4 = jx; - i__1 = j * a_dim1 + 1; - q__1.r = x[i__4].r * a[i__1].r - x[i__4].i * a[ - i__1].i, q__1.i = x[i__4].r * a[i__1].i + - x[i__4].i * a[i__1].r; - x[i__3].r = q__1.r, x[i__3].i = q__1.i; - } - } - jx -= *incx; - if (*n - j >= *k) { - kx -= *incx; - } -/* L80: */ - } - } - } + if (lsame_(uplo, "U")) { + kplus1 = *k + 1; + if (*incx == 1) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = j; + if (x[i__2].r != 0.f || x[i__2].i != 0.f) { + i__2 = j; + temp.r = x[i__2].r, temp.i = x[i__2].i; + l = kplus1 - j; + /* Computing MAX */ + i__2 = 1, i__3 = j - *k; + i__4 = j - 1; + for (i__ = max(i__2, i__3); i__ <= i__4; ++i__) { + i__2 = i__; + i__3 = i__; + i__5 = l + i__ + j * a_dim1; + q__2.r = temp.r * a[i__5].r - temp.i * a[i__5].i, q__2.i = temp.r * a[i__5].i + temp.i * a[i__5].r; + q__1.r = x[i__3].r + q__2.r, q__1.i = x[i__3].i + q__2.i; + x[i__2].r = q__1.r, x[i__2].i = q__1.i; + /* L10: */ + } + if (nounit) { + i__4 = j; + i__2 = j; + i__3 = kplus1 + j * a_dim1; + q__1.r = x[i__2].r * a[i__3].r - x[i__2].i * a[i__3].i, + q__1.i = x[i__2].r * a[i__3].i + x[i__2].i * a[i__3].r; + x[i__4].r = q__1.r, x[i__4].i = q__1.i; + } + } + /* L20: */ + } + } else { + jx = kx; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__4 = jx; + if (x[i__4].r != 0.f || x[i__4].i != 0.f) { + i__4 = jx; + temp.r = x[i__4].r, temp.i = x[i__4].i; + ix = kx; + l = kplus1 - j; + /* Computing MAX */ + i__4 = 1, i__2 = j - *k; + i__3 = j - 1; + for (i__ = max(i__4, i__2); i__ <= i__3; ++i__) { + i__4 = ix; + i__2 = ix; + i__5 = l + i__ + j * a_dim1; + q__2.r = temp.r * a[i__5].r - temp.i * a[i__5].i, q__2.i = temp.r * a[i__5].i + temp.i * a[i__5].r; + q__1.r = x[i__2].r + q__2.r, q__1.i = x[i__2].i + q__2.i; + x[i__4].r = q__1.r, x[i__4].i = q__1.i; + ix += *incx; + /* L30: */ + } + if (nounit) { + i__3 = jx; + i__4 = jx; + i__2 = kplus1 + j * a_dim1; + q__1.r = x[i__4].r * a[i__2].r - x[i__4].i * a[i__2].i, + q__1.i = x[i__4].r * a[i__2].i + x[i__4].i * a[i__2].r; + x[i__3].r = q__1.r, x[i__3].i = q__1.i; + } + } + jx += *incx; + if (j > *k) { + kx += *incx; + } + /* L40: */ + } + } } else { - -/* Form x := A'*x or x := conjg( A' )*x. */ - - if (lsame_(uplo, "U", (ftnlen)1, (ftnlen)1)) { - kplus1 = *k + 1; - if (*incx == 1) { - for (j = *n; j >= 1; --j) { - i__3 = j; - temp.r = x[i__3].r, temp.i = x[i__3].i; - l = kplus1 - j; - if (noconj) { - if (nounit) { - i__3 = kplus1 + j * a_dim1; - q__1.r = temp.r * a[i__3].r - temp.i * a[i__3].i, - q__1.i = temp.r * a[i__3].i + temp.i * a[ - i__3].r; - temp.r = q__1.r, temp.i = q__1.i; - } -/* Computing MAX */ - i__4 = 1, i__1 = j - *k; - i__3 = max(i__4,i__1); - for (i__ = j - 1; i__ >= i__3; --i__) { - i__4 = l + i__ + j * a_dim1; - i__1 = i__; - q__2.r = a[i__4].r * x[i__1].r - a[i__4].i * x[ - i__1].i, q__2.i = a[i__4].r * x[i__1].i + - a[i__4].i * x[i__1].r; - q__1.r = temp.r + q__2.r, q__1.i = temp.i + - q__2.i; - temp.r = q__1.r, temp.i = q__1.i; -/* L90: */ - } - } else { - if (nounit) { - r_cnjg(&q__2, &a[kplus1 + j * a_dim1]); - q__1.r = temp.r * q__2.r - temp.i * q__2.i, - q__1.i = temp.r * q__2.i + temp.i * - q__2.r; - temp.r = q__1.r, temp.i = q__1.i; - } -/* Computing MAX */ - i__4 = 1, i__1 = j - *k; - i__3 = max(i__4,i__1); - for (i__ = j - 1; i__ >= i__3; --i__) { - r_cnjg(&q__3, &a[l + i__ + j * a_dim1]); - i__4 = i__; - q__2.r = q__3.r * x[i__4].r - q__3.i * x[i__4].i, - q__2.i = q__3.r * x[i__4].i + q__3.i * x[ - i__4].r; - q__1.r = temp.r + q__2.r, q__1.i = temp.i + - q__2.i; - temp.r = q__1.r, temp.i = q__1.i; -/* L100: */ - } - } - i__3 = j; - x[i__3].r = temp.r, x[i__3].i = temp.i; -/* L110: */ - } - } else { - kx += (*n - 1) * *incx; - jx = kx; - for (j = *n; j >= 1; --j) { - i__3 = jx; - temp.r = x[i__3].r, temp.i = x[i__3].i; - kx -= *incx; - ix = kx; - l = kplus1 - j; - if (noconj) { - if (nounit) { - i__3 = kplus1 + j * a_dim1; - q__1.r = temp.r * a[i__3].r - temp.i * a[i__3].i, - q__1.i = temp.r * a[i__3].i + temp.i * a[ - i__3].r; - temp.r = q__1.r, temp.i = q__1.i; - } -/* Computing MAX */ - i__4 = 1, i__1 = j - *k; - i__3 = max(i__4,i__1); - for (i__ = j - 1; i__ >= i__3; --i__) { - i__4 = l + i__ + j * a_dim1; - i__1 = ix; - q__2.r = a[i__4].r * x[i__1].r - a[i__4].i * x[ - i__1].i, q__2.i = a[i__4].r * x[i__1].i + - a[i__4].i * x[i__1].r; - q__1.r = temp.r + q__2.r, q__1.i = temp.i + - q__2.i; - temp.r = q__1.r, temp.i = q__1.i; - ix -= *incx; -/* L120: */ - } - } else { - if (nounit) { - r_cnjg(&q__2, &a[kplus1 + j * a_dim1]); - q__1.r = temp.r * q__2.r - temp.i * q__2.i, - q__1.i = temp.r * q__2.i + temp.i * - q__2.r; - temp.r = q__1.r, temp.i = q__1.i; - } -/* Computing MAX */ - i__4 = 1, i__1 = j - *k; - i__3 = max(i__4,i__1); - for (i__ = j - 1; i__ >= i__3; --i__) { - r_cnjg(&q__3, &a[l + i__ + j * a_dim1]); - i__4 = ix; - q__2.r = q__3.r * x[i__4].r - q__3.i * x[i__4].i, - q__2.i = q__3.r * x[i__4].i + q__3.i * x[ - i__4].r; - q__1.r = temp.r + q__2.r, q__1.i = temp.i + - q__2.i; - temp.r = q__1.r, temp.i = q__1.i; - ix -= *incx; -/* L130: */ - } - } - i__3 = jx; - x[i__3].r = temp.r, x[i__3].i = temp.i; - jx -= *incx; -/* L140: */ - } - } - } else { - if (*incx == 1) { - i__3 = *n; - for (j = 1; j <= i__3; ++j) { - i__4 = j; - temp.r = x[i__4].r, temp.i = x[i__4].i; - l = 1 - j; - if (noconj) { - if (nounit) { - i__4 = j * a_dim1 + 1; - q__1.r = temp.r * a[i__4].r - temp.i * a[i__4].i, - q__1.i = temp.r * a[i__4].i + temp.i * a[ - i__4].r; - temp.r = q__1.r, temp.i = q__1.i; - } -/* Computing MIN */ - i__1 = *n, i__2 = j + *k; - i__4 = min(i__1,i__2); - for (i__ = j + 1; i__ <= i__4; ++i__) { - i__1 = l + i__ + j * a_dim1; - i__2 = i__; - q__2.r = a[i__1].r * x[i__2].r - a[i__1].i * x[ - i__2].i, q__2.i = a[i__1].r * x[i__2].i + - a[i__1].i * x[i__2].r; - q__1.r = temp.r + q__2.r, q__1.i = temp.i + - q__2.i; - temp.r = q__1.r, temp.i = q__1.i; -/* L150: */ - } - } else { - if (nounit) { - r_cnjg(&q__2, &a[j * a_dim1 + 1]); - q__1.r = temp.r * q__2.r - temp.i * q__2.i, - q__1.i = temp.r * q__2.i + temp.i * - q__2.r; - temp.r = q__1.r, temp.i = q__1.i; - } -/* Computing MIN */ - i__1 = *n, i__2 = j + *k; - i__4 = min(i__1,i__2); - for (i__ = j + 1; i__ <= i__4; ++i__) { - r_cnjg(&q__3, &a[l + i__ + j * a_dim1]); - i__1 = i__; - q__2.r = q__3.r * x[i__1].r - q__3.i * x[i__1].i, - q__2.i = q__3.r * x[i__1].i + q__3.i * x[ - i__1].r; - q__1.r = temp.r + q__2.r, q__1.i = temp.i + - q__2.i; - temp.r = q__1.r, temp.i = q__1.i; -/* L160: */ - } - } - i__4 = j; - x[i__4].r = temp.r, x[i__4].i = temp.i; -/* L170: */ - } - } else { - jx = kx; - i__3 = *n; - for (j = 1; j <= i__3; ++j) { - i__4 = jx; - temp.r = x[i__4].r, temp.i = x[i__4].i; - kx += *incx; - ix = kx; - l = 1 - j; - if (noconj) { - if (nounit) { - i__4 = j * a_dim1 + 1; - q__1.r = temp.r * a[i__4].r - temp.i * a[i__4].i, - q__1.i = temp.r * a[i__4].i + temp.i * a[ - i__4].r; - temp.r = q__1.r, temp.i = q__1.i; - } -/* Computing MIN */ - i__1 = *n, i__2 = j + *k; - i__4 = min(i__1,i__2); - for (i__ = j + 1; i__ <= i__4; ++i__) { - i__1 = l + i__ + j * a_dim1; - i__2 = ix; - q__2.r = a[i__1].r * x[i__2].r - a[i__1].i * x[ - i__2].i, q__2.i = a[i__1].r * x[i__2].i + - a[i__1].i * x[i__2].r; - q__1.r = temp.r + q__2.r, q__1.i = temp.i + - q__2.i; - temp.r = q__1.r, temp.i = q__1.i; - ix += *incx; -/* L180: */ - } - } else { - if (nounit) { - r_cnjg(&q__2, &a[j * a_dim1 + 1]); - q__1.r = temp.r * q__2.r - temp.i * q__2.i, - q__1.i = temp.r * q__2.i + temp.i * - q__2.r; - temp.r = q__1.r, temp.i = q__1.i; - } -/* Computing MIN */ - i__1 = *n, i__2 = j + *k; - i__4 = min(i__1,i__2); - for (i__ = j + 1; i__ <= i__4; ++i__) { - r_cnjg(&q__3, &a[l + i__ + j * a_dim1]); - i__1 = ix; - q__2.r = q__3.r * x[i__1].r - q__3.i * x[i__1].i, - q__2.i = q__3.r * x[i__1].i + q__3.i * x[ - i__1].r; - q__1.r = temp.r + q__2.r, q__1.i = temp.i + - q__2.i; - temp.r = q__1.r, temp.i = q__1.i; - ix += *incx; -/* L190: */ - } - } - i__4 = jx; - x[i__4].r = temp.r, x[i__4].i = temp.i; - jx += *incx; -/* L200: */ - } - } - } + if (*incx == 1) { + for (j = *n; j >= 1; --j) { + i__1 = j; + if (x[i__1].r != 0.f || x[i__1].i != 0.f) { + i__1 = j; + temp.r = x[i__1].r, temp.i = x[i__1].i; + l = 1 - j; + /* Computing MIN */ + i__1 = *n, i__3 = j + *k; + i__4 = j + 1; + for (i__ = min(i__1, i__3); i__ >= i__4; --i__) { + i__1 = i__; + i__3 = i__; + i__2 = l + i__ + j * a_dim1; + q__2.r = temp.r * a[i__2].r - temp.i * a[i__2].i, q__2.i = temp.r * a[i__2].i + temp.i * a[i__2].r; + q__1.r = x[i__3].r + q__2.r, q__1.i = x[i__3].i + q__2.i; + x[i__1].r = q__1.r, x[i__1].i = q__1.i; + /* L50: */ + } + if (nounit) { + i__4 = j; + i__1 = j; + i__3 = j * a_dim1 + 1; + q__1.r = x[i__1].r * a[i__3].r - x[i__1].i * a[i__3].i, + q__1.i = x[i__1].r * a[i__3].i + x[i__1].i * a[i__3].r; + x[i__4].r = q__1.r, x[i__4].i = q__1.i; + } + } + /* L60: */ + } + } else { + kx += (*n - 1) * *incx; + jx = kx; + for (j = *n; j >= 1; --j) { + i__4 = jx; + if (x[i__4].r != 0.f || x[i__4].i != 0.f) { + i__4 = jx; + temp.r = x[i__4].r, temp.i = x[i__4].i; + ix = kx; + l = 1 - j; + /* Computing MIN */ + i__4 = *n, i__1 = j + *k; + i__3 = j + 1; + for (i__ = min(i__4, i__1); i__ >= i__3; --i__) { + i__4 = ix; + i__1 = ix; + i__2 = l + i__ + j * a_dim1; + q__2.r = temp.r * a[i__2].r - temp.i * a[i__2].i, q__2.i = temp.r * a[i__2].i + temp.i * a[i__2].r; + q__1.r = x[i__1].r + q__2.r, q__1.i = x[i__1].i + q__2.i; + x[i__4].r = q__1.r, x[i__4].i = q__1.i; + ix -= *incx; + /* L70: */ + } + if (nounit) { + i__3 = jx; + i__4 = jx; + i__1 = j * a_dim1 + 1; + q__1.r = x[i__4].r * a[i__1].r - x[i__4].i * a[i__1].i, + q__1.i = x[i__4].r * a[i__1].i + x[i__4].i * a[i__1].r; + x[i__3].r = q__1.r, x[i__3].i = q__1.i; + } + } + jx -= *incx; + if (*n - j >= *k) { + kx -= *incx; + } + /* L80: */ + } + } } + } else { + /* Form x := A'*x or x := conjg( A' )*x. */ - return 0; + if (lsame_(uplo, "U")) { + kplus1 = *k + 1; + if (*incx == 1) { + for (j = *n; j >= 1; --j) { + i__3 = j; + temp.r = x[i__3].r, temp.i = x[i__3].i; + l = kplus1 - j; + if (noconj) { + if (nounit) { + i__3 = kplus1 + j * a_dim1; + q__1.r = temp.r * a[i__3].r - temp.i * a[i__3].i, q__1.i = temp.r * a[i__3].i + temp.i * a[i__3].r; + temp.r = q__1.r, temp.i = q__1.i; + } + /* Computing MAX */ + i__4 = 1, i__1 = j - *k; + i__3 = max(i__4, i__1); + for (i__ = j - 1; i__ >= i__3; --i__) { + i__4 = l + i__ + j * a_dim1; + i__1 = i__; + q__2.r = a[i__4].r * x[i__1].r - a[i__4].i * x[i__1].i, + q__2.i = a[i__4].r * x[i__1].i + a[i__4].i * x[i__1].r; + q__1.r = temp.r + q__2.r, q__1.i = temp.i + q__2.i; + temp.r = q__1.r, temp.i = q__1.i; + /* L90: */ + } + } else { + if (nounit) { + r_cnjg(&q__2, &a[kplus1 + j * a_dim1]); + q__1.r = temp.r * q__2.r - temp.i * q__2.i, q__1.i = temp.r * q__2.i + temp.i * q__2.r; + temp.r = q__1.r, temp.i = q__1.i; + } + /* Computing MAX */ + i__4 = 1, i__1 = j - *k; + i__3 = max(i__4, i__1); + for (i__ = j - 1; i__ >= i__3; --i__) { + r_cnjg(&q__3, &a[l + i__ + j * a_dim1]); + i__4 = i__; + q__2.r = q__3.r * x[i__4].r - q__3.i * x[i__4].i, q__2.i = q__3.r * x[i__4].i + q__3.i * x[i__4].r; + q__1.r = temp.r + q__2.r, q__1.i = temp.i + q__2.i; + temp.r = q__1.r, temp.i = q__1.i; + /* L100: */ + } + } + i__3 = j; + x[i__3].r = temp.r, x[i__3].i = temp.i; + /* L110: */ + } + } else { + kx += (*n - 1) * *incx; + jx = kx; + for (j = *n; j >= 1; --j) { + i__3 = jx; + temp.r = x[i__3].r, temp.i = x[i__3].i; + kx -= *incx; + ix = kx; + l = kplus1 - j; + if (noconj) { + if (nounit) { + i__3 = kplus1 + j * a_dim1; + q__1.r = temp.r * a[i__3].r - temp.i * a[i__3].i, q__1.i = temp.r * a[i__3].i + temp.i * a[i__3].r; + temp.r = q__1.r, temp.i = q__1.i; + } + /* Computing MAX */ + i__4 = 1, i__1 = j - *k; + i__3 = max(i__4, i__1); + for (i__ = j - 1; i__ >= i__3; --i__) { + i__4 = l + i__ + j * a_dim1; + i__1 = ix; + q__2.r = a[i__4].r * x[i__1].r - a[i__4].i * x[i__1].i, + q__2.i = a[i__4].r * x[i__1].i + a[i__4].i * x[i__1].r; + q__1.r = temp.r + q__2.r, q__1.i = temp.i + q__2.i; + temp.r = q__1.r, temp.i = q__1.i; + ix -= *incx; + /* L120: */ + } + } else { + if (nounit) { + r_cnjg(&q__2, &a[kplus1 + j * a_dim1]); + q__1.r = temp.r * q__2.r - temp.i * q__2.i, q__1.i = temp.r * q__2.i + temp.i * q__2.r; + temp.r = q__1.r, temp.i = q__1.i; + } + /* Computing MAX */ + i__4 = 1, i__1 = j - *k; + i__3 = max(i__4, i__1); + for (i__ = j - 1; i__ >= i__3; --i__) { + r_cnjg(&q__3, &a[l + i__ + j * a_dim1]); + i__4 = ix; + q__2.r = q__3.r * x[i__4].r - q__3.i * x[i__4].i, q__2.i = q__3.r * x[i__4].i + q__3.i * x[i__4].r; + q__1.r = temp.r + q__2.r, q__1.i = temp.i + q__2.i; + temp.r = q__1.r, temp.i = q__1.i; + ix -= *incx; + /* L130: */ + } + } + i__3 = jx; + x[i__3].r = temp.r, x[i__3].i = temp.i; + jx -= *incx; + /* L140: */ + } + } + } else { + if (*incx == 1) { + i__3 = *n; + for (j = 1; j <= i__3; ++j) { + i__4 = j; + temp.r = x[i__4].r, temp.i = x[i__4].i; + l = 1 - j; + if (noconj) { + if (nounit) { + i__4 = j * a_dim1 + 1; + q__1.r = temp.r * a[i__4].r - temp.i * a[i__4].i, q__1.i = temp.r * a[i__4].i + temp.i * a[i__4].r; + temp.r = q__1.r, temp.i = q__1.i; + } + /* Computing MIN */ + i__1 = *n, i__2 = j + *k; + i__4 = min(i__1, i__2); + for (i__ = j + 1; i__ <= i__4; ++i__) { + i__1 = l + i__ + j * a_dim1; + i__2 = i__; + q__2.r = a[i__1].r * x[i__2].r - a[i__1].i * x[i__2].i, + q__2.i = a[i__1].r * x[i__2].i + a[i__1].i * x[i__2].r; + q__1.r = temp.r + q__2.r, q__1.i = temp.i + q__2.i; + temp.r = q__1.r, temp.i = q__1.i; + /* L150: */ + } + } else { + if (nounit) { + r_cnjg(&q__2, &a[j * a_dim1 + 1]); + q__1.r = temp.r * q__2.r - temp.i * q__2.i, q__1.i = temp.r * q__2.i + temp.i * q__2.r; + temp.r = q__1.r, temp.i = q__1.i; + } + /* Computing MIN */ + i__1 = *n, i__2 = j + *k; + i__4 = min(i__1, i__2); + for (i__ = j + 1; i__ <= i__4; ++i__) { + r_cnjg(&q__3, &a[l + i__ + j * a_dim1]); + i__1 = i__; + q__2.r = q__3.r * x[i__1].r - q__3.i * x[i__1].i, q__2.i = q__3.r * x[i__1].i + q__3.i * x[i__1].r; + q__1.r = temp.r + q__2.r, q__1.i = temp.i + q__2.i; + temp.r = q__1.r, temp.i = q__1.i; + /* L160: */ + } + } + i__4 = j; + x[i__4].r = temp.r, x[i__4].i = temp.i; + /* L170: */ + } + } else { + jx = kx; + i__3 = *n; + for (j = 1; j <= i__3; ++j) { + i__4 = jx; + temp.r = x[i__4].r, temp.i = x[i__4].i; + kx += *incx; + ix = kx; + l = 1 - j; + if (noconj) { + if (nounit) { + i__4 = j * a_dim1 + 1; + q__1.r = temp.r * a[i__4].r - temp.i * a[i__4].i, q__1.i = temp.r * a[i__4].i + temp.i * a[i__4].r; + temp.r = q__1.r, temp.i = q__1.i; + } + /* Computing MIN */ + i__1 = *n, i__2 = j + *k; + i__4 = min(i__1, i__2); + for (i__ = j + 1; i__ <= i__4; ++i__) { + i__1 = l + i__ + j * a_dim1; + i__2 = ix; + q__2.r = a[i__1].r * x[i__2].r - a[i__1].i * x[i__2].i, + q__2.i = a[i__1].r * x[i__2].i + a[i__1].i * x[i__2].r; + q__1.r = temp.r + q__2.r, q__1.i = temp.i + q__2.i; + temp.r = q__1.r, temp.i = q__1.i; + ix += *incx; + /* L180: */ + } + } else { + if (nounit) { + r_cnjg(&q__2, &a[j * a_dim1 + 1]); + q__1.r = temp.r * q__2.r - temp.i * q__2.i, q__1.i = temp.r * q__2.i + temp.i * q__2.r; + temp.r = q__1.r, temp.i = q__1.i; + } + /* Computing MIN */ + i__1 = *n, i__2 = j + *k; + i__4 = min(i__1, i__2); + for (i__ = j + 1; i__ <= i__4; ++i__) { + r_cnjg(&q__3, &a[l + i__ + j * a_dim1]); + i__1 = ix; + q__2.r = q__3.r * x[i__1].r - q__3.i * x[i__1].i, q__2.i = q__3.r * x[i__1].i + q__3.i * x[i__1].r; + q__1.r = temp.r + q__2.r, q__1.i = temp.i + q__2.i; + temp.r = q__1.r, temp.i = q__1.i; + ix += *incx; + /* L190: */ + } + } + i__4 = jx; + x[i__4].r = temp.r, x[i__4].i = temp.i; + jx += *incx; + /* L200: */ + } + } + } + } -/* End of CTBMV . */ + /* End of CTBMV . */ } /* ctbmv_ */ -
diff --git a/blas/f2c/d_cnjg.c b/blas/f2c/d_cnjg.c deleted file mode 100644 index 623090c..0000000 --- a/blas/f2c/d_cnjg.c +++ /dev/null
@@ -1,6 +0,0 @@ -#include "datatypes.h" - -void d_cnjg(doublecomplex *r, doublecomplex *z) { - r->r = z->r; - r->i = -(z->i); -}
diff --git a/blas/f2c/datatypes.h b/blas/f2c/datatypes.h index 7c2f4ae..45b108d 100644 --- a/blas/f2c/datatypes.h +++ b/blas/f2c/datatypes.h
@@ -15,7 +15,6 @@ typedef struct { doublereal r, i; } doublecomplex; -typedef int ftnlen; typedef int logical; #define abs(x) ((x) >= 0 ? (x) : -(x))
diff --git a/blas/f2c/drotm.c b/blas/f2c/drotm.c index 17a779b..da82317 100644 --- a/blas/f2c/drotm.c +++ b/blas/f2c/drotm.c
@@ -1,215 +1,213 @@ /* drotm.f -- translated by f2c (version 20100827). You must link the resulting object file with libf2c: - on Microsoft Windows system, link with libf2c.lib; - on Linux or Unix systems, link with .../path/to/libf2c.a -lm - or, if you install libf2c.a in a standard place, with -lf2c -lm - -- in that order, at the end of the command line, as in - cc *.o -lf2c -lm - Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., - http://www.netlib.org/f2c/libf2c.zip + http://www.netlib.org/f2c/libf2c.zip */ #include "datatypes.h" -/* Subroutine */ int drotm_(integer *n, doublereal *dx, integer *incx, - doublereal *dy, integer *incy, doublereal *dparam) -{ - /* Initialized data */ +/* Subroutine */ void drotm_(integer *n, doublereal *dx, integer *incx, doublereal *dy, integer *incy, + doublereal *dparam) { + /* Initialized data */ - static doublereal zero = 0.; - static doublereal two = 2.; + static doublereal zero = 0.; + static doublereal two = 2.; - /* System generated locals */ - integer i__1, i__2; + /* System generated locals */ + integer i__1, i__2; - /* Local variables */ - integer i__; - doublereal w, z__; - integer kx, ky; - doublereal dh11, dh12, dh21, dh22, dflag; - integer nsteps; + /* Local variables */ + integer i__; + doublereal w, z__; + integer kx, ky; + doublereal dh11, dh12, dh21, dh22, dflag; + integer nsteps; -/* .. Scalar Arguments .. */ -/* .. */ -/* .. Array Arguments .. */ -/* .. */ + /* .. Scalar Arguments .. */ + /* .. */ + /* .. Array Arguments .. */ + /* .. */ -/* Purpose */ -/* ======= */ + /* Purpose */ + /* ======= */ -/* APPLY THE MODIFIED GIVENS TRANSFORMATION, H, TO THE 2 BY N MATRIX */ + /* APPLY THE MODIFIED GIVENS TRANSFORMATION, H, TO THE 2 BY N MATRIX */ -/* (DX**T) , WHERE **T INDICATES TRANSPOSE. THE ELEMENTS OF DX ARE IN */ -/* (DY**T) */ + /* (DX**T) , WHERE **T INDICATES TRANSPOSE. THE ELEMENTS OF DX ARE IN */ + /* (DY**T) */ -/* DX(LX+I*INCX), I = 0 TO N-1, WHERE LX = 1 IF INCX .GE. 0, ELSE */ -/* LX = (-INCX)*N, AND SIMILARLY FOR SY USING LY AND INCY. */ -/* WITH DPARAM(1)=DFLAG, H HAS ONE OF THE FOLLOWING FORMS.. */ + /* DX(LX+I*INCX), I = 0 TO N-1, WHERE LX = 1 IF INCX .GE. 0, ELSE */ + /* LX = (-INCX)*N, AND SIMILARLY FOR SY USING LY AND INCY. */ + /* WITH DPARAM(1)=DFLAG, H HAS ONE OF THE FOLLOWING FORMS.. */ -/* DFLAG=-1.D0 DFLAG=0.D0 DFLAG=1.D0 DFLAG=-2.D0 */ + /* DFLAG=-1.D0 DFLAG=0.D0 DFLAG=1.D0 DFLAG=-2.D0 */ -/* (DH11 DH12) (1.D0 DH12) (DH11 1.D0) (1.D0 0.D0) */ -/* H=( ) ( ) ( ) ( ) */ -/* (DH21 DH22), (DH21 1.D0), (-1.D0 DH22), (0.D0 1.D0). */ -/* SEE DROTMG FOR A DESCRIPTION OF DATA STORAGE IN DPARAM. */ + /* (DH11 DH12) (1.D0 DH12) (DH11 1.D0) (1.D0 0.D0) */ + /* H=( ) ( ) ( ) ( ) */ + /* (DH21 DH22), (DH21 1.D0), (-1.D0 DH22), (0.D0 1.D0). */ + /* SEE DROTMG FOR A DESCRIPTION OF DATA STORAGE IN DPARAM. */ -/* Arguments */ -/* ========= */ + /* Arguments */ + /* ========= */ -/* N (input) INTEGER */ -/* number of elements in input vector(s) */ + /* N (input) INTEGER */ + /* number of elements in input vector(s) */ -/* DX (input/output) DOUBLE PRECISION array, dimension N */ -/* double precision vector with N elements */ + /* DX (input/output) DOUBLE PRECISION array, dimension N */ + /* double precision vector with N elements */ -/* INCX (input) INTEGER */ -/* storage spacing between elements of DX */ + /* INCX (input) INTEGER */ + /* storage spacing between elements of DX */ -/* DY (input/output) DOUBLE PRECISION array, dimension N */ -/* double precision vector with N elements */ + /* DY (input/output) DOUBLE PRECISION array, dimension N */ + /* double precision vector with N elements */ -/* INCY (input) INTEGER */ -/* storage spacing between elements of DY */ + /* INCY (input) INTEGER */ + /* storage spacing between elements of DY */ -/* DPARAM (input/output) DOUBLE PRECISION array, dimension 5 */ -/* DPARAM(1)=DFLAG */ -/* DPARAM(2)=DH11 */ -/* DPARAM(3)=DH21 */ -/* DPARAM(4)=DH12 */ -/* DPARAM(5)=DH22 */ + /* DPARAM (input/output) DOUBLE PRECISION array, dimension 5 */ + /* DPARAM(1)=DFLAG */ + /* DPARAM(2)=DH11 */ + /* DPARAM(3)=DH21 */ + /* DPARAM(4)=DH12 */ + /* DPARAM(5)=DH22 */ -/* ===================================================================== */ + /* ===================================================================== */ -/* .. Local Scalars .. */ -/* .. */ -/* .. Data statements .. */ - /* Parameter adjustments */ - --dparam; - --dy; - --dx; + /* .. Local Scalars .. */ + /* .. */ + /* .. Data statements .. */ + /* Parameter adjustments */ + --dparam; + --dy; + --dx; - /* Function Body */ -/* .. */ + /* Function Body */ + /* .. */ - dflag = dparam[1]; - if (*n <= 0 || dflag + two == zero) { - goto L140; - } - if (! (*incx == *incy && *incx > 0)) { - goto L70; - } + dflag = dparam[1]; + if (*n <= 0 || dflag + two == zero) { + goto L140; + } + if (!(*incx == *incy && *incx > 0)) { + goto L70; + } - nsteps = *n * *incx; - if (dflag < 0.) { - goto L50; - } else if (dflag == 0) { - goto L10; - } else { - goto L30; - } + nsteps = *n * *incx; + if (dflag < 0.) { + goto L50; + } else if (dflag == 0) { + goto L10; + } else { + goto L30; + } L10: - dh12 = dparam[4]; - dh21 = dparam[3]; - i__1 = nsteps; - i__2 = *incx; - for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { - w = dx[i__]; - z__ = dy[i__]; - dx[i__] = w + z__ * dh12; - dy[i__] = w * dh21 + z__; -/* L20: */ - } - goto L140; + dh12 = dparam[4]; + dh21 = dparam[3]; + i__1 = nsteps; + i__2 = *incx; + for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { + w = dx[i__]; + z__ = dy[i__]; + dx[i__] = w + z__ * dh12; + dy[i__] = w * dh21 + z__; + /* L20: */ + } + goto L140; L30: - dh11 = dparam[2]; - dh22 = dparam[5]; - i__2 = nsteps; - i__1 = *incx; - for (i__ = 1; i__1 < 0 ? i__ >= i__2 : i__ <= i__2; i__ += i__1) { - w = dx[i__]; - z__ = dy[i__]; - dx[i__] = w * dh11 + z__; - dy[i__] = -w + dh22 * z__; -/* L40: */ - } - goto L140; + dh11 = dparam[2]; + dh22 = dparam[5]; + i__2 = nsteps; + i__1 = *incx; + for (i__ = 1; i__1 < 0 ? i__ >= i__2 : i__ <= i__2; i__ += i__1) { + w = dx[i__]; + z__ = dy[i__]; + dx[i__] = w * dh11 + z__; + dy[i__] = -w + dh22 * z__; + /* L40: */ + } + goto L140; L50: - dh11 = dparam[2]; - dh12 = dparam[4]; - dh21 = dparam[3]; - dh22 = dparam[5]; - i__1 = nsteps; - i__2 = *incx; - for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { - w = dx[i__]; - z__ = dy[i__]; - dx[i__] = w * dh11 + z__ * dh12; - dy[i__] = w * dh21 + z__ * dh22; -/* L60: */ - } - goto L140; + dh11 = dparam[2]; + dh12 = dparam[4]; + dh21 = dparam[3]; + dh22 = dparam[5]; + i__1 = nsteps; + i__2 = *incx; + for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { + w = dx[i__]; + z__ = dy[i__]; + dx[i__] = w * dh11 + z__ * dh12; + dy[i__] = w * dh21 + z__ * dh22; + /* L60: */ + } + goto L140; L70: - kx = 1; - ky = 1; - if (*incx < 0) { - kx = (1 - *n) * *incx + 1; - } - if (*incy < 0) { - ky = (1 - *n) * *incy + 1; - } + kx = 1; + ky = 1; + if (*incx < 0) { + kx = (1 - *n) * *incx + 1; + } + if (*incy < 0) { + ky = (1 - *n) * *incy + 1; + } - if (dflag < 0.) { - goto L120; - } else if (dflag == 0) { - goto L80; - } else { - goto L100; - } + if (dflag < 0.) { + goto L120; + } else if (dflag == 0) { + goto L80; + } else { + goto L100; + } L80: - dh12 = dparam[4]; - dh21 = dparam[3]; - i__2 = *n; - for (i__ = 1; i__ <= i__2; ++i__) { - w = dx[kx]; - z__ = dy[ky]; - dx[kx] = w + z__ * dh12; - dy[ky] = w * dh21 + z__; - kx += *incx; - ky += *incy; -/* L90: */ - } - goto L140; + dh12 = dparam[4]; + dh21 = dparam[3]; + i__2 = *n; + for (i__ = 1; i__ <= i__2; ++i__) { + w = dx[kx]; + z__ = dy[ky]; + dx[kx] = w + z__ * dh12; + dy[ky] = w * dh21 + z__; + kx += *incx; + ky += *incy; + /* L90: */ + } + goto L140; L100: - dh11 = dparam[2]; - dh22 = dparam[5]; - i__2 = *n; - for (i__ = 1; i__ <= i__2; ++i__) { - w = dx[kx]; - z__ = dy[ky]; - dx[kx] = w * dh11 + z__; - dy[ky] = -w + dh22 * z__; - kx += *incx; - ky += *incy; -/* L110: */ - } - goto L140; + dh11 = dparam[2]; + dh22 = dparam[5]; + i__2 = *n; + for (i__ = 1; i__ <= i__2; ++i__) { + w = dx[kx]; + z__ = dy[ky]; + dx[kx] = w * dh11 + z__; + dy[ky] = -w + dh22 * z__; + kx += *incx; + ky += *incy; + /* L110: */ + } + goto L140; L120: - dh11 = dparam[2]; - dh12 = dparam[4]; - dh21 = dparam[3]; - dh22 = dparam[5]; - i__2 = *n; - for (i__ = 1; i__ <= i__2; ++i__) { - w = dx[kx]; - z__ = dy[ky]; - dx[kx] = w * dh11 + z__ * dh12; - dy[ky] = w * dh21 + z__ * dh22; - kx += *incx; - ky += *incy; -/* L130: */ - } + dh11 = dparam[2]; + dh12 = dparam[4]; + dh21 = dparam[3]; + dh22 = dparam[5]; + i__2 = *n; + for (i__ = 1; i__ <= i__2; ++i__) { + w = dx[kx]; + z__ = dy[ky]; + dx[kx] = w * dh11 + z__ * dh12; + dy[ky] = w * dh21 + z__ * dh22; + kx += *incx; + ky += *incy; + /* L130: */ + } L140: - return 0; + return; } /* drotm_ */ -
diff --git a/blas/f2c/drotmg.c b/blas/f2c/drotmg.c index a63eb10..81233cb 100644 --- a/blas/f2c/drotmg.c +++ b/blas/f2c/drotmg.c
@@ -1,293 +1,293 @@ /* drotmg.f -- translated by f2c (version 20100827). You must link the resulting object file with libf2c: - on Microsoft Windows system, link with libf2c.lib; - on Linux or Unix systems, link with .../path/to/libf2c.a -lm - or, if you install libf2c.a in a standard place, with -lf2c -lm - -- in that order, at the end of the command line, as in - cc *.o -lf2c -lm - Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., - http://www.netlib.org/f2c/libf2c.zip + http://www.netlib.org/f2c/libf2c.zip */ #include "datatypes.h" -/* Subroutine */ int drotmg_(doublereal *dd1, doublereal *dd2, doublereal * - dx1, doublereal *dy1, doublereal *dparam) -{ - /* Initialized data */ +/* Subroutine */ void drotmg_(doublereal *dd1, doublereal *dd2, doublereal *dx1, doublereal *dy1, doublereal *dparam) { + /* Initialized data */ - static doublereal zero = 0.; - static doublereal one = 1.; - static doublereal two = 2.; - static doublereal gam = 4096.; - static doublereal gamsq = 16777216.; - static doublereal rgamsq = 5.9604645e-8; + static doublereal zero = 0.; + static doublereal one = 1.; + static doublereal two = 2.; + static doublereal gam = 4096.; + static doublereal gamsq = 16777216.; + static doublereal rgamsq = 5.9604645e-8; - /* Format strings */ - static char fmt_120[] = ""; - static char fmt_150[] = ""; - static char fmt_180[] = ""; - static char fmt_210[] = ""; + /* Format strings */ + static char fmt_120[] = ""; + static char fmt_150[] = ""; + static char fmt_180[] = ""; + static char fmt_210[] = ""; - /* System generated locals */ - doublereal d__1; + /* System generated locals */ + doublereal d__1; - /* Local variables */ - doublereal du, dp1, dp2, dq1, dq2, dh11, dh12, dh21, dh22; - integer igo; - doublereal dflag, dtemp; + /* Local variables */ + doublereal du, dp1, dp2, dq1, dq2, dh11, dh12, dh21, dh22; + integer igo; + doublereal dflag, dtemp; - /* Assigned format variables */ - static char *igo_fmt; + /* Assigned format variables */ + static char *igo_fmt; + (void)igo_fmt; -/* .. Scalar Arguments .. */ -/* .. */ -/* .. Array Arguments .. */ -/* .. */ + /* .. Scalar Arguments .. */ + /* .. */ + /* .. Array Arguments .. */ + /* .. */ -/* Purpose */ -/* ======= */ + /* Purpose */ + /* ======= */ -/* CONSTRUCT THE MODIFIED GIVENS TRANSFORMATION MATRIX H WHICH ZEROS */ -/* THE SECOND COMPONENT OF THE 2-VECTOR (DSQRT(DD1)*DX1,DSQRT(DD2)* */ -/* DY2)**T. */ -/* WITH DPARAM(1)=DFLAG, H HAS ONE OF THE FOLLOWING FORMS.. */ + /* CONSTRUCT THE MODIFIED GIVENS TRANSFORMATION MATRIX H WHICH ZEROS */ + /* THE SECOND COMPONENT OF THE 2-VECTOR (DSQRT(DD1)*DX1,DSQRT(DD2)* */ + /* DY2)**T. */ + /* WITH DPARAM(1)=DFLAG, H HAS ONE OF THE FOLLOWING FORMS.. */ -/* DFLAG=-1.D0 DFLAG=0.D0 DFLAG=1.D0 DFLAG=-2.D0 */ + /* DFLAG=-1.D0 DFLAG=0.D0 DFLAG=1.D0 DFLAG=-2.D0 */ -/* (DH11 DH12) (1.D0 DH12) (DH11 1.D0) (1.D0 0.D0) */ -/* H=( ) ( ) ( ) ( ) */ -/* (DH21 DH22), (DH21 1.D0), (-1.D0 DH22), (0.D0 1.D0). */ -/* LOCATIONS 2-4 OF DPARAM CONTAIN DH11, DH21, DH12, AND DH22 */ -/* RESPECTIVELY. (VALUES OF 1.D0, -1.D0, OR 0.D0 IMPLIED BY THE */ -/* VALUE OF DPARAM(1) ARE NOT STORED IN DPARAM.) */ + /* (DH11 DH12) (1.D0 DH12) (DH11 1.D0) (1.D0 0.D0) */ + /* H=( ) ( ) ( ) ( ) */ + /* (DH21 DH22), (DH21 1.D0), (-1.D0 DH22), (0.D0 1.D0). */ + /* LOCATIONS 2-4 OF DPARAM CONTAIN DH11, DH21, DH12, AND DH22 */ + /* RESPECTIVELY. (VALUES OF 1.D0, -1.D0, OR 0.D0 IMPLIED BY THE */ + /* VALUE OF DPARAM(1) ARE NOT STORED IN DPARAM.) */ -/* THE VALUES OF GAMSQ AND RGAMSQ SET IN THE DATA STATEMENT MAY BE */ -/* INEXACT. THIS IS OK AS THEY ARE ONLY USED FOR TESTING THE SIZE */ -/* OF DD1 AND DD2. ALL ACTUAL SCALING OF DATA IS DONE USING GAM. */ + /* THE VALUES OF GAMSQ AND RGAMSQ SET IN THE DATA STATEMENT MAY BE */ + /* INEXACT. THIS IS OK AS THEY ARE ONLY USED FOR TESTING THE SIZE */ + /* OF DD1 AND DD2. ALL ACTUAL SCALING OF DATA IS DONE USING GAM. */ + /* Arguments */ + /* ========= */ -/* Arguments */ -/* ========= */ + /* DD1 (input/output) DOUBLE PRECISION */ -/* DD1 (input/output) DOUBLE PRECISION */ + /* DD2 (input/output) DOUBLE PRECISION */ -/* DD2 (input/output) DOUBLE PRECISION */ + /* DX1 (input/output) DOUBLE PRECISION */ -/* DX1 (input/output) DOUBLE PRECISION */ + /* DY1 (input) DOUBLE PRECISION */ -/* DY1 (input) DOUBLE PRECISION */ + /* DPARAM (input/output) DOUBLE PRECISION array, dimension 5 */ + /* DPARAM(1)=DFLAG */ + /* DPARAM(2)=DH11 */ + /* DPARAM(3)=DH21 */ + /* DPARAM(4)=DH12 */ + /* DPARAM(5)=DH22 */ -/* DPARAM (input/output) DOUBLE PRECISION array, dimension 5 */ -/* DPARAM(1)=DFLAG */ -/* DPARAM(2)=DH11 */ -/* DPARAM(3)=DH21 */ -/* DPARAM(4)=DH12 */ -/* DPARAM(5)=DH22 */ + /* ===================================================================== */ -/* ===================================================================== */ + /* .. Local Scalars .. */ + /* .. */ + /* .. Intrinsic Functions .. */ + /* .. */ + /* .. Data statements .. */ -/* .. Local Scalars .. */ -/* .. */ -/* .. Intrinsic Functions .. */ -/* .. */ -/* .. Data statements .. */ + /* Parameter adjustments */ + --dparam; - /* Parameter adjustments */ - --dparam; - - /* Function Body */ -/* .. */ - if (! (*dd1 < zero)) { - goto L10; - } -/* GO ZERO-H-D-AND-DX1.. */ - goto L60; + /* Function Body */ + /* .. */ + if (!(*dd1 < zero)) { + goto L10; + } + /* GO ZERO-H-D-AND-DX1.. */ + goto L60; L10: -/* CASE-DD1-NONNEGATIVE */ - dp2 = *dd2 * *dy1; - if (! (dp2 == zero)) { - goto L20; - } - dflag = -two; - goto L260; + /* CASE-DD1-NONNEGATIVE */ + dp2 = *dd2 * *dy1; + if (!(dp2 == zero)) { + goto L20; + } + dflag = -two; + goto L260; /* REGULAR-CASE.. */ L20: - dp1 = *dd1 * *dx1; - dq2 = dp2 * *dy1; - dq1 = dp1 * *dx1; + dp1 = *dd1 * *dx1; + dq2 = dp2 * *dy1; + dq1 = dp1 * *dx1; - if (! (abs(dq1) > abs(dq2))) { - goto L40; - } - dh21 = -(*dy1) / *dx1; - dh12 = dp2 / dp1; + if (!(abs(dq1) > abs(dq2))) { + goto L40; + } + dh21 = -(*dy1) / *dx1; + dh12 = dp2 / dp1; - du = one - dh12 * dh21; + du = one - dh12 * dh21; - if (! (du <= zero)) { - goto L30; - } -/* GO ZERO-H-D-AND-DX1.. */ - goto L60; + if (!(du <= zero)) { + goto L30; + } + /* GO ZERO-H-D-AND-DX1.. */ + goto L60; L30: - dflag = zero; - *dd1 /= du; - *dd2 /= du; - *dx1 *= du; -/* GO SCALE-CHECK.. */ - goto L100; + dflag = zero; + *dd1 /= du; + *dd2 /= du; + *dx1 *= du; + /* GO SCALE-CHECK.. */ + goto L100; L40: - if (! (dq2 < zero)) { - goto L50; - } -/* GO ZERO-H-D-AND-DX1.. */ - goto L60; + if (!(dq2 < zero)) { + goto L50; + } + /* GO ZERO-H-D-AND-DX1.. */ + goto L60; L50: - dflag = one; - dh11 = dp1 / dp2; - dh22 = *dx1 / *dy1; - du = one + dh11 * dh22; - dtemp = *dd2 / du; - *dd2 = *dd1 / du; - *dd1 = dtemp; - *dx1 = *dy1 * du; -/* GO SCALE-CHECK */ - goto L100; + dflag = one; + dh11 = dp1 / dp2; + dh22 = *dx1 / *dy1; + du = one + dh11 * dh22; + dtemp = *dd2 / du; + *dd2 = *dd1 / du; + *dd1 = dtemp; + *dx1 = *dy1 * du; + /* GO SCALE-CHECK */ + goto L100; /* PROCEDURE..ZERO-H-D-AND-DX1.. */ L60: - dflag = -one; - dh11 = zero; - dh12 = zero; - dh21 = zero; - dh22 = zero; + dflag = -one; + dh11 = zero; + dh12 = zero; + dh21 = zero; + dh22 = zero; - *dd1 = zero; - *dd2 = zero; - *dx1 = zero; -/* RETURN.. */ - goto L220; + *dd1 = zero; + *dd2 = zero; + *dx1 = zero; + /* RETURN.. */ + goto L220; /* PROCEDURE..FIX-H.. */ L70: - if (! (dflag >= zero)) { - goto L90; - } - - if (! (dflag == zero)) { - goto L80; - } - dh11 = one; - dh22 = one; - dflag = -one; + if (!(dflag >= zero)) { goto L90; + } + + if (!(dflag == zero)) { + goto L80; + } + dh11 = one; + dh22 = one; + dflag = -one; + goto L90; L80: - dh21 = -one; - dh12 = one; - dflag = -one; + dh21 = -one; + dh12 = one; + dflag = -one; L90: - switch (igo) { - case 0: goto L120; - case 1: goto L150; - case 2: goto L180; - case 3: goto L210; - } + switch (igo) { + case 0: + goto L120; + case 1: + goto L150; + case 2: + goto L180; + case 3: + goto L210; + } /* PROCEDURE..SCALE-CHECK */ L100: L110: - if (! (*dd1 <= rgamsq)) { - goto L130; - } - if (*dd1 == zero) { - goto L160; - } - igo = 0; - igo_fmt = fmt_120; -/* FIX-H.. */ - goto L70; + if (!(*dd1 <= rgamsq)) { + goto L130; + } + if (*dd1 == zero) { + goto L160; + } + igo = 0; + igo_fmt = fmt_120; + /* FIX-H.. */ + goto L70; L120: -/* Computing 2nd power */ - d__1 = gam; - *dd1 *= d__1 * d__1; - *dx1 /= gam; - dh11 /= gam; - dh12 /= gam; - goto L110; + /* Computing 2nd power */ + d__1 = gam; + *dd1 *= d__1 * d__1; + *dx1 /= gam; + dh11 /= gam; + dh12 /= gam; + goto L110; L130: L140: - if (! (*dd1 >= gamsq)) { - goto L160; - } - igo = 1; - igo_fmt = fmt_150; -/* FIX-H.. */ - goto L70; + if (!(*dd1 >= gamsq)) { + goto L160; + } + igo = 1; + igo_fmt = fmt_150; + /* FIX-H.. */ + goto L70; L150: -/* Computing 2nd power */ - d__1 = gam; - *dd1 /= d__1 * d__1; - *dx1 *= gam; - dh11 *= gam; - dh12 *= gam; - goto L140; + /* Computing 2nd power */ + d__1 = gam; + *dd1 /= d__1 * d__1; + *dx1 *= gam; + dh11 *= gam; + dh12 *= gam; + goto L140; L160: L170: - if (! (abs(*dd2) <= rgamsq)) { - goto L190; - } - if (*dd2 == zero) { - goto L220; - } - igo = 2; - igo_fmt = fmt_180; -/* FIX-H.. */ - goto L70; + if (!(abs(*dd2) <= rgamsq)) { + goto L190; + } + if (*dd2 == zero) { + goto L220; + } + igo = 2; + igo_fmt = fmt_180; + /* FIX-H.. */ + goto L70; L180: -/* Computing 2nd power */ - d__1 = gam; - *dd2 *= d__1 * d__1; - dh21 /= gam; - dh22 /= gam; - goto L170; + /* Computing 2nd power */ + d__1 = gam; + *dd2 *= d__1 * d__1; + dh21 /= gam; + dh22 /= gam; + goto L170; L190: L200: - if (! (abs(*dd2) >= gamsq)) { - goto L220; - } - igo = 3; - igo_fmt = fmt_210; -/* FIX-H.. */ - goto L70; + if (!(abs(*dd2) >= gamsq)) { + goto L220; + } + igo = 3; + igo_fmt = fmt_210; + /* FIX-H.. */ + goto L70; L210: -/* Computing 2nd power */ - d__1 = gam; - *dd2 /= d__1 * d__1; - dh21 *= gam; - dh22 *= gam; - goto L200; + /* Computing 2nd power */ + d__1 = gam; + *dd2 /= d__1 * d__1; + dh21 *= gam; + dh22 *= gam; + goto L200; L220: - if (dflag < 0.) { - goto L250; - } else if (dflag == 0) { - goto L230; - } else { - goto L240; - } + if (dflag < 0.) { + goto L250; + } else if (dflag == 0) { + goto L230; + } else { + goto L240; + } L230: - dparam[3] = dh21; - dparam[4] = dh12; - goto L260; + dparam[3] = dh21; + dparam[4] = dh12; + goto L260; L240: - dparam[2] = dh11; - dparam[5] = dh22; - goto L260; + dparam[2] = dh11; + dparam[5] = dh22; + goto L260; L250: - dparam[2] = dh11; - dparam[3] = dh21; - dparam[4] = dh12; - dparam[5] = dh22; + dparam[2] = dh11; + dparam[3] = dh21; + dparam[4] = dh12; + dparam[5] = dh22; L260: - dparam[1] = dflag; - return 0; + dparam[1] = dflag; } /* drotmg_ */ -
diff --git a/blas/f2c/dsbmv.c b/blas/f2c/dsbmv.c index c6b4b21..4619369 100644 --- a/blas/f2c/dsbmv.c +++ b/blas/f2c/dsbmv.c
@@ -1,366 +1,356 @@ /* dsbmv.f -- translated by f2c (version 20100827). You must link the resulting object file with libf2c: - on Microsoft Windows system, link with libf2c.lib; - on Linux or Unix systems, link with .../path/to/libf2c.a -lm - or, if you install libf2c.a in a standard place, with -lf2c -lm - -- in that order, at the end of the command line, as in - cc *.o -lf2c -lm - Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., - http://www.netlib.org/f2c/libf2c.zip + http://www.netlib.org/f2c/libf2c.zip */ #include "datatypes.h" -/* Subroutine */ int dsbmv_(char *uplo, integer *n, integer *k, doublereal * - alpha, doublereal *a, integer *lda, doublereal *x, integer *incx, - doublereal *beta, doublereal *y, integer *incy, ftnlen uplo_len) -{ - /* System generated locals */ - integer a_dim1, a_offset, i__1, i__2, i__3, i__4; +/* Subroutine */ void dsbmv_(char *uplo, integer *n, integer *k, doublereal *alpha, doublereal *a, integer *lda, + doublereal *x, integer *incx, doublereal *beta, doublereal *y, integer *incy) { + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3, i__4; - /* Local variables */ - integer i__, j, l, ix, iy, jx, jy, kx, ky, info; - doublereal temp1, temp2; - extern logical lsame_(char *, char *, ftnlen, ftnlen); - integer kplus1; - extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen); + /* Local variables */ + integer i__, j, l, ix, iy, jx, jy, kx, ky, info; + doublereal temp1, temp2; + extern logical lsame_(char *, char *); + integer kplus1; + extern /* Subroutine */ void xerbla_(const char *, integer *); -/* .. Scalar Arguments .. */ -/* .. */ -/* .. Array Arguments .. */ -/* .. */ + /* .. Scalar Arguments .. */ + /* .. */ + /* .. Array Arguments .. */ + /* .. */ -/* Purpose */ -/* ======= */ + /* Purpose */ + /* ======= */ -/* DSBMV performs the matrix-vector operation */ + /* DSBMV performs the matrix-vector operation */ -/* y := alpha*A*x + beta*y, */ + /* y := alpha*A*x + beta*y, */ -/* where alpha and beta are scalars, x and y are n element vectors and */ -/* A is an n by n symmetric band matrix, with k super-diagonals. */ + /* where alpha and beta are scalars, x and y are n element vectors and */ + /* A is an n by n symmetric band matrix, with k super-diagonals. */ -/* Arguments */ -/* ========== */ + /* Arguments */ + /* ========== */ -/* UPLO - CHARACTER*1. */ -/* On entry, UPLO specifies whether the upper or lower */ -/* triangular part of the band matrix A is being supplied as */ -/* follows: */ + /* UPLO - CHARACTER*1. */ + /* On entry, UPLO specifies whether the upper or lower */ + /* triangular part of the band matrix A is being supplied as */ + /* follows: */ -/* UPLO = 'U' or 'u' The upper triangular part of A is */ -/* being supplied. */ + /* UPLO = 'U' or 'u' The upper triangular part of A is */ + /* being supplied. */ -/* UPLO = 'L' or 'l' The lower triangular part of A is */ -/* being supplied. */ + /* UPLO = 'L' or 'l' The lower triangular part of A is */ + /* being supplied. */ -/* Unchanged on exit. */ + /* Unchanged on exit. */ -/* N - INTEGER. */ -/* On entry, N specifies the order of the matrix A. */ -/* N must be at least zero. */ -/* Unchanged on exit. */ + /* N - INTEGER. */ + /* On entry, N specifies the order of the matrix A. */ + /* N must be at least zero. */ + /* Unchanged on exit. */ -/* K - INTEGER. */ -/* On entry, K specifies the number of super-diagonals of the */ -/* matrix A. K must satisfy 0 .le. K. */ -/* Unchanged on exit. */ + /* K - INTEGER. */ + /* On entry, K specifies the number of super-diagonals of the */ + /* matrix A. K must satisfy 0 .le. K. */ + /* Unchanged on exit. */ -/* ALPHA - DOUBLE PRECISION. */ -/* On entry, ALPHA specifies the scalar alpha. */ -/* Unchanged on exit. */ + /* ALPHA - DOUBLE PRECISION. */ + /* On entry, ALPHA specifies the scalar alpha. */ + /* Unchanged on exit. */ -/* A - DOUBLE PRECISION array of DIMENSION ( LDA, n ). */ -/* Before entry with UPLO = 'U' or 'u', the leading ( k + 1 ) */ -/* by n part of the array A must contain the upper triangular */ -/* band part of the symmetric matrix, supplied column by */ -/* column, with the leading diagonal of the matrix in row */ -/* ( k + 1 ) of the array, the first super-diagonal starting at */ -/* position 2 in row k, and so on. The top left k by k triangle */ -/* of the array A is not referenced. */ -/* The following program segment will transfer the upper */ -/* triangular part of a symmetric band matrix from conventional */ -/* full matrix storage to band storage: */ + /* A - DOUBLE PRECISION array of DIMENSION ( LDA, n ). */ + /* Before entry with UPLO = 'U' or 'u', the leading ( k + 1 ) */ + /* by n part of the array A must contain the upper triangular */ + /* band part of the symmetric matrix, supplied column by */ + /* column, with the leading diagonal of the matrix in row */ + /* ( k + 1 ) of the array, the first super-diagonal starting at */ + /* position 2 in row k, and so on. The top left k by k triangle */ + /* of the array A is not referenced. */ + /* The following program segment will transfer the upper */ + /* triangular part of a symmetric band matrix from conventional */ + /* full matrix storage to band storage: */ -/* DO 20, J = 1, N */ -/* M = K + 1 - J */ -/* DO 10, I = MAX( 1, J - K ), J */ -/* A( M + I, J ) = matrix( I, J ) */ -/* 10 CONTINUE */ -/* 20 CONTINUE */ + /* DO 20, J = 1, N */ + /* M = K + 1 - J */ + /* DO 10, I = MAX( 1, J - K ), J */ + /* A( M + I, J ) = matrix( I, J ) */ + /* 10 CONTINUE */ + /* 20 CONTINUE */ -/* Before entry with UPLO = 'L' or 'l', the leading ( k + 1 ) */ -/* by n part of the array A must contain the lower triangular */ -/* band part of the symmetric matrix, supplied column by */ -/* column, with the leading diagonal of the matrix in row 1 of */ -/* the array, the first sub-diagonal starting at position 1 in */ -/* row 2, and so on. The bottom right k by k triangle of the */ -/* array A is not referenced. */ -/* The following program segment will transfer the lower */ -/* triangular part of a symmetric band matrix from conventional */ -/* full matrix storage to band storage: */ + /* Before entry with UPLO = 'L' or 'l', the leading ( k + 1 ) */ + /* by n part of the array A must contain the lower triangular */ + /* band part of the symmetric matrix, supplied column by */ + /* column, with the leading diagonal of the matrix in row 1 of */ + /* the array, the first sub-diagonal starting at position 1 in */ + /* row 2, and so on. The bottom right k by k triangle of the */ + /* array A is not referenced. */ + /* The following program segment will transfer the lower */ + /* triangular part of a symmetric band matrix from conventional */ + /* full matrix storage to band storage: */ -/* DO 20, J = 1, N */ -/* M = 1 - J */ -/* DO 10, I = J, MIN( N, J + K ) */ -/* A( M + I, J ) = matrix( I, J ) */ -/* 10 CONTINUE */ -/* 20 CONTINUE */ + /* DO 20, J = 1, N */ + /* M = 1 - J */ + /* DO 10, I = J, MIN( N, J + K ) */ + /* A( M + I, J ) = matrix( I, J ) */ + /* 10 CONTINUE */ + /* 20 CONTINUE */ -/* Unchanged on exit. */ + /* Unchanged on exit. */ -/* LDA - INTEGER. */ -/* On entry, LDA specifies the first dimension of A as declared */ -/* in the calling (sub) program. LDA must be at least */ -/* ( k + 1 ). */ -/* Unchanged on exit. */ + /* LDA - INTEGER. */ + /* On entry, LDA specifies the first dimension of A as declared */ + /* in the calling (sub) program. LDA must be at least */ + /* ( k + 1 ). */ + /* Unchanged on exit. */ -/* X - DOUBLE PRECISION array of DIMENSION at least */ -/* ( 1 + ( n - 1 )*abs( INCX ) ). */ -/* Before entry, the incremented array X must contain the */ -/* vector x. */ -/* Unchanged on exit. */ + /* X - DOUBLE PRECISION array of DIMENSION at least */ + /* ( 1 + ( n - 1 )*abs( INCX ) ). */ + /* Before entry, the incremented array X must contain the */ + /* vector x. */ + /* Unchanged on exit. */ -/* INCX - INTEGER. */ -/* On entry, INCX specifies the increment for the elements of */ -/* X. INCX must not be zero. */ -/* Unchanged on exit. */ + /* INCX - INTEGER. */ + /* On entry, INCX specifies the increment for the elements of */ + /* X. INCX must not be zero. */ + /* Unchanged on exit. */ -/* BETA - DOUBLE PRECISION. */ -/* On entry, BETA specifies the scalar beta. */ -/* Unchanged on exit. */ + /* BETA - DOUBLE PRECISION. */ + /* On entry, BETA specifies the scalar beta. */ + /* Unchanged on exit. */ -/* Y - DOUBLE PRECISION array of DIMENSION at least */ -/* ( 1 + ( n - 1 )*abs( INCY ) ). */ -/* Before entry, the incremented array Y must contain the */ -/* vector y. On exit, Y is overwritten by the updated vector y. */ + /* Y - DOUBLE PRECISION array of DIMENSION at least */ + /* ( 1 + ( n - 1 )*abs( INCY ) ). */ + /* Before entry, the incremented array Y must contain the */ + /* vector y. On exit, Y is overwritten by the updated vector y. */ -/* INCY - INTEGER. */ -/* On entry, INCY specifies the increment for the elements of */ -/* Y. INCY must not be zero. */ -/* Unchanged on exit. */ + /* INCY - INTEGER. */ + /* On entry, INCY specifies the increment for the elements of */ + /* Y. INCY must not be zero. */ + /* Unchanged on exit. */ + /* Level 2 Blas routine. */ -/* Level 2 Blas routine. */ + /* -- Written on 22-October-1986. */ + /* Jack Dongarra, Argonne National Lab. */ + /* Jeremy Du Croz, Nag Central Office. */ + /* Sven Hammarling, Nag Central Office. */ + /* Richard Hanson, Sandia National Labs. */ -/* -- Written on 22-October-1986. */ -/* Jack Dongarra, Argonne National Lab. */ -/* Jeremy Du Croz, Nag Central Office. */ -/* Sven Hammarling, Nag Central Office. */ -/* Richard Hanson, Sandia National Labs. */ + /* ===================================================================== */ -/* ===================================================================== */ + /* .. Parameters .. */ + /* .. */ + /* .. Local Scalars .. */ + /* .. */ + /* .. External Functions .. */ + /* .. */ + /* .. External Subroutines .. */ + /* .. */ + /* .. Intrinsic Functions .. */ + /* .. */ -/* .. Parameters .. */ -/* .. */ -/* .. Local Scalars .. */ -/* .. */ -/* .. External Functions .. */ -/* .. */ -/* .. External Subroutines .. */ -/* .. */ -/* .. Intrinsic Functions .. */ -/* .. */ + /* Test the input parameters. */ -/* Test the input parameters. */ + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --x; + --y; - /* Parameter adjustments */ - a_dim1 = *lda; - a_offset = 1 + a_dim1; - a -= a_offset; - --x; - --y; + /* Function Body */ + info = 0; + if (!lsame_(uplo, "U") && !lsame_(uplo, "L")) { + info = 1; + } else if (*n < 0) { + info = 2; + } else if (*k < 0) { + info = 3; + } else if (*lda < *k + 1) { + info = 6; + } else if (*incx == 0) { + info = 8; + } else if (*incy == 0) { + info = 11; + } + if (info != 0) { + xerbla_("DSBMV ", &info); + return; + } - /* Function Body */ - info = 0; - if (! lsame_(uplo, "U", (ftnlen)1, (ftnlen)1) && ! lsame_(uplo, "L", ( - ftnlen)1, (ftnlen)1)) { - info = 1; - } else if (*n < 0) { - info = 2; - } else if (*k < 0) { - info = 3; - } else if (*lda < *k + 1) { - info = 6; - } else if (*incx == 0) { - info = 8; - } else if (*incy == 0) { - info = 11; - } - if (info != 0) { - xerbla_("DSBMV ", &info, (ftnlen)6); - return 0; - } + /* Quick return if possible. */ -/* Quick return if possible. */ + if (*n == 0 || (*alpha == 0. && *beta == 1.)) { + return; + } - if (*n == 0 || (*alpha == 0. && *beta == 1.)) { - return 0; - } + /* Set up the start points in X and Y. */ -/* Set up the start points in X and Y. */ + if (*incx > 0) { + kx = 1; + } else { + kx = 1 - (*n - 1) * *incx; + } + if (*incy > 0) { + ky = 1; + } else { + ky = 1 - (*n - 1) * *incy; + } - if (*incx > 0) { - kx = 1; + /* Start the operations. In this version the elements of the array A */ + /* are accessed sequentially with one pass through A. */ + + /* First form y := beta*y. */ + + if (*beta != 1.) { + if (*incy == 1) { + if (*beta == 0.) { + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + y[i__] = 0.; + /* L10: */ + } + } else { + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + y[i__] = *beta * y[i__]; + /* L20: */ + } + } } else { - kx = 1 - (*n - 1) * *incx; + iy = ky; + if (*beta == 0.) { + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + y[iy] = 0.; + iy += *incy; + /* L30: */ + } + } else { + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + y[iy] = *beta * y[iy]; + iy += *incy; + /* L40: */ + } + } } - if (*incy > 0) { - ky = 1; + } + if (*alpha == 0.) { + return; + } + if (lsame_(uplo, "U")) { + /* Form y when upper triangle of A is stored. */ + + kplus1 = *k + 1; + if (*incx == 1 && *incy == 1) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + temp1 = *alpha * x[j]; + temp2 = 0.; + l = kplus1 - j; + /* Computing MAX */ + i__2 = 1, i__3 = j - *k; + i__4 = j - 1; + for (i__ = max(i__2, i__3); i__ <= i__4; ++i__) { + y[i__] += temp1 * a[l + i__ + j * a_dim1]; + temp2 += a[l + i__ + j * a_dim1] * x[i__]; + /* L50: */ + } + y[j] = y[j] + temp1 * a[kplus1 + j * a_dim1] + *alpha * temp2; + /* L60: */ + } } else { - ky = 1 - (*n - 1) * *incy; + jx = kx; + jy = ky; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + temp1 = *alpha * x[jx]; + temp2 = 0.; + ix = kx; + iy = ky; + l = kplus1 - j; + /* Computing MAX */ + i__4 = 1, i__2 = j - *k; + i__3 = j - 1; + for (i__ = max(i__4, i__2); i__ <= i__3; ++i__) { + y[iy] += temp1 * a[l + i__ + j * a_dim1]; + temp2 += a[l + i__ + j * a_dim1] * x[ix]; + ix += *incx; + iy += *incy; + /* L70: */ + } + y[jy] = y[jy] + temp1 * a[kplus1 + j * a_dim1] + *alpha * temp2; + jx += *incx; + jy += *incy; + if (j > *k) { + kx += *incx; + ky += *incy; + } + /* L80: */ + } } + } else { + /* Form y when lower triangle of A is stored. */ -/* Start the operations. In this version the elements of the array A */ -/* are accessed sequentially with one pass through A. */ - -/* First form y := beta*y. */ - - if (*beta != 1.) { - if (*incy == 1) { - if (*beta == 0.) { - i__1 = *n; - for (i__ = 1; i__ <= i__1; ++i__) { - y[i__] = 0.; -/* L10: */ - } - } else { - i__1 = *n; - for (i__ = 1; i__ <= i__1; ++i__) { - y[i__] = *beta * y[i__]; -/* L20: */ - } - } - } else { - iy = ky; - if (*beta == 0.) { - i__1 = *n; - for (i__ = 1; i__ <= i__1; ++i__) { - y[iy] = 0.; - iy += *incy; -/* L30: */ - } - } else { - i__1 = *n; - for (i__ = 1; i__ <= i__1; ++i__) { - y[iy] = *beta * y[iy]; - iy += *incy; -/* L40: */ - } - } - } - } - if (*alpha == 0.) { - return 0; - } - if (lsame_(uplo, "U", (ftnlen)1, (ftnlen)1)) { - -/* Form y when upper triangle of A is stored. */ - - kplus1 = *k + 1; - if (*incx == 1 && *incy == 1) { - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - temp1 = *alpha * x[j]; - temp2 = 0.; - l = kplus1 - j; -/* Computing MAX */ - i__2 = 1, i__3 = j - *k; - i__4 = j - 1; - for (i__ = max(i__2,i__3); i__ <= i__4; ++i__) { - y[i__] += temp1 * a[l + i__ + j * a_dim1]; - temp2 += a[l + i__ + j * a_dim1] * x[i__]; -/* L50: */ - } - y[j] = y[j] + temp1 * a[kplus1 + j * a_dim1] + *alpha * temp2; -/* L60: */ - } - } else { - jx = kx; - jy = ky; - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - temp1 = *alpha * x[jx]; - temp2 = 0.; - ix = kx; - iy = ky; - l = kplus1 - j; -/* Computing MAX */ - i__4 = 1, i__2 = j - *k; - i__3 = j - 1; - for (i__ = max(i__4,i__2); i__ <= i__3; ++i__) { - y[iy] += temp1 * a[l + i__ + j * a_dim1]; - temp2 += a[l + i__ + j * a_dim1] * x[ix]; - ix += *incx; - iy += *incy; -/* L70: */ - } - y[jy] = y[jy] + temp1 * a[kplus1 + j * a_dim1] + *alpha * - temp2; - jx += *incx; - jy += *incy; - if (j > *k) { - kx += *incx; - ky += *incy; - } -/* L80: */ - } - } + if (*incx == 1 && *incy == 1) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + temp1 = *alpha * x[j]; + temp2 = 0.; + y[j] += temp1 * a[j * a_dim1 + 1]; + l = 1 - j; + /* Computing MIN */ + i__4 = *n, i__2 = j + *k; + i__3 = min(i__4, i__2); + for (i__ = j + 1; i__ <= i__3; ++i__) { + y[i__] += temp1 * a[l + i__ + j * a_dim1]; + temp2 += a[l + i__ + j * a_dim1] * x[i__]; + /* L90: */ + } + y[j] += *alpha * temp2; + /* L100: */ + } } else { - -/* Form y when lower triangle of A is stored. */ - - if (*incx == 1 && *incy == 1) { - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - temp1 = *alpha * x[j]; - temp2 = 0.; - y[j] += temp1 * a[j * a_dim1 + 1]; - l = 1 - j; -/* Computing MIN */ - i__4 = *n, i__2 = j + *k; - i__3 = min(i__4,i__2); - for (i__ = j + 1; i__ <= i__3; ++i__) { - y[i__] += temp1 * a[l + i__ + j * a_dim1]; - temp2 += a[l + i__ + j * a_dim1] * x[i__]; -/* L90: */ - } - y[j] += *alpha * temp2; -/* L100: */ - } - } else { - jx = kx; - jy = ky; - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - temp1 = *alpha * x[jx]; - temp2 = 0.; - y[jy] += temp1 * a[j * a_dim1 + 1]; - l = 1 - j; - ix = jx; - iy = jy; -/* Computing MIN */ - i__4 = *n, i__2 = j + *k; - i__3 = min(i__4,i__2); - for (i__ = j + 1; i__ <= i__3; ++i__) { - ix += *incx; - iy += *incy; - y[iy] += temp1 * a[l + i__ + j * a_dim1]; - temp2 += a[l + i__ + j * a_dim1] * x[ix]; -/* L110: */ - } - y[jy] += *alpha * temp2; - jx += *incx; - jy += *incy; -/* L120: */ - } - } + jx = kx; + jy = ky; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + temp1 = *alpha * x[jx]; + temp2 = 0.; + y[jy] += temp1 * a[j * a_dim1 + 1]; + l = 1 - j; + ix = jx; + iy = jy; + /* Computing MIN */ + i__4 = *n, i__2 = j + *k; + i__3 = min(i__4, i__2); + for (i__ = j + 1; i__ <= i__3; ++i__) { + ix += *incx; + iy += *incy; + y[iy] += temp1 * a[l + i__ + j * a_dim1]; + temp2 += a[l + i__ + j * a_dim1] * x[ix]; + /* L110: */ + } + y[jy] += *alpha * temp2; + jx += *incx; + jy += *incy; + /* L120: */ + } } + } - return 0; - -/* End of DSBMV . */ + /* End of DSBMV . */ } /* dsbmv_ */ -
diff --git a/blas/f2c/dspmv.c b/blas/f2c/dspmv.c index 0b4e92d..314d67b 100644 --- a/blas/f2c/dspmv.c +++ b/blas/f2c/dspmv.c
@@ -1,316 +1,308 @@ /* dspmv.f -- translated by f2c (version 20100827). You must link the resulting object file with libf2c: - on Microsoft Windows system, link with libf2c.lib; - on Linux or Unix systems, link with .../path/to/libf2c.a -lm - or, if you install libf2c.a in a standard place, with -lf2c -lm - -- in that order, at the end of the command line, as in - cc *.o -lf2c -lm - Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., - http://www.netlib.org/f2c/libf2c.zip + http://www.netlib.org/f2c/libf2c.zip */ #include "datatypes.h" -/* Subroutine */ int dspmv_(char *uplo, integer *n, doublereal *alpha, - doublereal *ap, doublereal *x, integer *incx, doublereal *beta, - doublereal *y, integer *incy, ftnlen uplo_len) -{ - /* System generated locals */ - integer i__1, i__2; +/* Subroutine */ void dspmv_(char *uplo, integer *n, doublereal *alpha, doublereal *ap, doublereal *x, integer *incx, + doublereal *beta, doublereal *y, integer *incy) { + /* System generated locals */ + integer i__1, i__2; - /* Local variables */ - integer i__, j, k, kk, ix, iy, jx, jy, kx, ky, info; - doublereal temp1, temp2; - extern logical lsame_(char *, char *, ftnlen, ftnlen); - extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen); + /* Local variables */ + integer i__, j, k, kk, ix, iy, jx, jy, kx, ky, info; + doublereal temp1, temp2; + extern logical lsame_(char *, char *); + extern /* Subroutine */ void xerbla_(const char *, integer *); -/* .. Scalar Arguments .. */ -/* .. */ -/* .. Array Arguments .. */ -/* .. */ + /* .. Scalar Arguments .. */ + /* .. */ + /* .. Array Arguments .. */ + /* .. */ -/* Purpose */ -/* ======= */ + /* Purpose */ + /* ======= */ -/* DSPMV performs the matrix-vector operation */ + /* DSPMV performs the matrix-vector operation */ -/* y := alpha*A*x + beta*y, */ + /* y := alpha*A*x + beta*y, */ -/* where alpha and beta are scalars, x and y are n element vectors and */ -/* A is an n by n symmetric matrix, supplied in packed form. */ + /* where alpha and beta are scalars, x and y are n element vectors and */ + /* A is an n by n symmetric matrix, supplied in packed form. */ -/* Arguments */ -/* ========== */ + /* Arguments */ + /* ========== */ -/* UPLO - CHARACTER*1. */ -/* On entry, UPLO specifies whether the upper or lower */ -/* triangular part of the matrix A is supplied in the packed */ -/* array AP as follows: */ + /* UPLO - CHARACTER*1. */ + /* On entry, UPLO specifies whether the upper or lower */ + /* triangular part of the matrix A is supplied in the packed */ + /* array AP as follows: */ -/* UPLO = 'U' or 'u' The upper triangular part of A is */ -/* supplied in AP. */ + /* UPLO = 'U' or 'u' The upper triangular part of A is */ + /* supplied in AP. */ -/* UPLO = 'L' or 'l' The lower triangular part of A is */ -/* supplied in AP. */ + /* UPLO = 'L' or 'l' The lower triangular part of A is */ + /* supplied in AP. */ -/* Unchanged on exit. */ + /* Unchanged on exit. */ -/* N - INTEGER. */ -/* On entry, N specifies the order of the matrix A. */ -/* N must be at least zero. */ -/* Unchanged on exit. */ + /* N - INTEGER. */ + /* On entry, N specifies the order of the matrix A. */ + /* N must be at least zero. */ + /* Unchanged on exit. */ -/* ALPHA - DOUBLE PRECISION. */ -/* On entry, ALPHA specifies the scalar alpha. */ -/* Unchanged on exit. */ + /* ALPHA - DOUBLE PRECISION. */ + /* On entry, ALPHA specifies the scalar alpha. */ + /* Unchanged on exit. */ -/* AP - DOUBLE PRECISION array of DIMENSION at least */ -/* ( ( n*( n + 1 ) )/2 ). */ -/* Before entry with UPLO = 'U' or 'u', the array AP must */ -/* contain the upper triangular part of the symmetric matrix */ -/* packed sequentially, column by column, so that AP( 1 ) */ -/* contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 1, 2 ) */ -/* and a( 2, 2 ) respectively, and so on. */ -/* Before entry with UPLO = 'L' or 'l', the array AP must */ -/* contain the lower triangular part of the symmetric matrix */ -/* packed sequentially, column by column, so that AP( 1 ) */ -/* contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 2, 1 ) */ -/* and a( 3, 1 ) respectively, and so on. */ -/* Unchanged on exit. */ + /* AP - DOUBLE PRECISION array of DIMENSION at least */ + /* ( ( n*( n + 1 ) )/2 ). */ + /* Before entry with UPLO = 'U' or 'u', the array AP must */ + /* contain the upper triangular part of the symmetric matrix */ + /* packed sequentially, column by column, so that AP( 1 ) */ + /* contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 1, 2 ) */ + /* and a( 2, 2 ) respectively, and so on. */ + /* Before entry with UPLO = 'L' or 'l', the array AP must */ + /* contain the lower triangular part of the symmetric matrix */ + /* packed sequentially, column by column, so that AP( 1 ) */ + /* contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 2, 1 ) */ + /* and a( 3, 1 ) respectively, and so on. */ + /* Unchanged on exit. */ -/* X - DOUBLE PRECISION array of dimension at least */ -/* ( 1 + ( n - 1 )*abs( INCX ) ). */ -/* Before entry, the incremented array X must contain the n */ -/* element vector x. */ -/* Unchanged on exit. */ + /* X - DOUBLE PRECISION array of dimension at least */ + /* ( 1 + ( n - 1 )*abs( INCX ) ). */ + /* Before entry, the incremented array X must contain the n */ + /* element vector x. */ + /* Unchanged on exit. */ -/* INCX - INTEGER. */ -/* On entry, INCX specifies the increment for the elements of */ -/* X. INCX must not be zero. */ -/* Unchanged on exit. */ + /* INCX - INTEGER. */ + /* On entry, INCX specifies the increment for the elements of */ + /* X. INCX must not be zero. */ + /* Unchanged on exit. */ -/* BETA - DOUBLE PRECISION. */ -/* On entry, BETA specifies the scalar beta. When BETA is */ -/* supplied as zero then Y need not be set on input. */ -/* Unchanged on exit. */ + /* BETA - DOUBLE PRECISION. */ + /* On entry, BETA specifies the scalar beta. When BETA is */ + /* supplied as zero then Y need not be set on input. */ + /* Unchanged on exit. */ -/* Y - DOUBLE PRECISION array of dimension at least */ -/* ( 1 + ( n - 1 )*abs( INCY ) ). */ -/* Before entry, the incremented array Y must contain the n */ -/* element vector y. On exit, Y is overwritten by the updated */ -/* vector y. */ + /* Y - DOUBLE PRECISION array of dimension at least */ + /* ( 1 + ( n - 1 )*abs( INCY ) ). */ + /* Before entry, the incremented array Y must contain the n */ + /* element vector y. On exit, Y is overwritten by the updated */ + /* vector y. */ -/* INCY - INTEGER. */ -/* On entry, INCY specifies the increment for the elements of */ -/* Y. INCY must not be zero. */ -/* Unchanged on exit. */ + /* INCY - INTEGER. */ + /* On entry, INCY specifies the increment for the elements of */ + /* Y. INCY must not be zero. */ + /* Unchanged on exit. */ -/* Further Details */ -/* =============== */ + /* Further Details */ + /* =============== */ -/* Level 2 Blas routine. */ + /* Level 2 Blas routine. */ -/* -- Written on 22-October-1986. */ -/* Jack Dongarra, Argonne National Lab. */ -/* Jeremy Du Croz, Nag Central Office. */ -/* Sven Hammarling, Nag Central Office. */ -/* Richard Hanson, Sandia National Labs. */ + /* -- Written on 22-October-1986. */ + /* Jack Dongarra, Argonne National Lab. */ + /* Jeremy Du Croz, Nag Central Office. */ + /* Sven Hammarling, Nag Central Office. */ + /* Richard Hanson, Sandia National Labs. */ -/* ===================================================================== */ + /* ===================================================================== */ -/* .. Parameters .. */ -/* .. */ -/* .. Local Scalars .. */ -/* .. */ -/* .. External Functions .. */ -/* .. */ -/* .. External Subroutines .. */ -/* .. */ + /* .. Parameters .. */ + /* .. */ + /* .. Local Scalars .. */ + /* .. */ + /* .. External Functions .. */ + /* .. */ + /* .. External Subroutines .. */ + /* .. */ -/* Test the input parameters. */ + /* Test the input parameters. */ - /* Parameter adjustments */ - --y; - --x; - --ap; + /* Parameter adjustments */ + --y; + --x; + --ap; - /* Function Body */ - info = 0; - if (! lsame_(uplo, "U", (ftnlen)1, (ftnlen)1) && ! lsame_(uplo, "L", ( - ftnlen)1, (ftnlen)1)) { - info = 1; - } else if (*n < 0) { - info = 2; - } else if (*incx == 0) { - info = 6; - } else if (*incy == 0) { - info = 9; - } - if (info != 0) { - xerbla_("DSPMV ", &info, (ftnlen)6); - return 0; - } + /* Function Body */ + info = 0; + if (!lsame_(uplo, "U") && !lsame_(uplo, "L")) { + info = 1; + } else if (*n < 0) { + info = 2; + } else if (*incx == 0) { + info = 6; + } else if (*incy == 0) { + info = 9; + } + if (info != 0) { + xerbla_("DSPMV ", &info); + return; + } -/* Quick return if possible. */ + /* Quick return if possible. */ - if (*n == 0 || (*alpha == 0. && *beta == 1.)) { - return 0; - } + if (*n == 0 || (*alpha == 0. && *beta == 1.)) { + return; + } -/* Set up the start points in X and Y. */ + /* Set up the start points in X and Y. */ - if (*incx > 0) { - kx = 1; + if (*incx > 0) { + kx = 1; + } else { + kx = 1 - (*n - 1) * *incx; + } + if (*incy > 0) { + ky = 1; + } else { + ky = 1 - (*n - 1) * *incy; + } + + /* Start the operations. In this version the elements of the array AP */ + /* are accessed sequentially with one pass through AP. */ + + /* First form y := beta*y. */ + + if (*beta != 1.) { + if (*incy == 1) { + if (*beta == 0.) { + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + y[i__] = 0.; + /* L10: */ + } + } else { + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + y[i__] = *beta * y[i__]; + /* L20: */ + } + } } else { - kx = 1 - (*n - 1) * *incx; + iy = ky; + if (*beta == 0.) { + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + y[iy] = 0.; + iy += *incy; + /* L30: */ + } + } else { + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + y[iy] = *beta * y[iy]; + iy += *incy; + /* L40: */ + } + } } - if (*incy > 0) { - ky = 1; + } + if (*alpha == 0.) { + return; + } + kk = 1; + if (lsame_(uplo, "U")) { + /* Form y when AP contains the upper triangle. */ + + if (*incx == 1 && *incy == 1) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + temp1 = *alpha * x[j]; + temp2 = 0.; + k = kk; + i__2 = j - 1; + for (i__ = 1; i__ <= i__2; ++i__) { + y[i__] += temp1 * ap[k]; + temp2 += ap[k] * x[i__]; + ++k; + /* L50: */ + } + y[j] = y[j] + temp1 * ap[kk + j - 1] + *alpha * temp2; + kk += j; + /* L60: */ + } } else { - ky = 1 - (*n - 1) * *incy; + jx = kx; + jy = ky; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + temp1 = *alpha * x[jx]; + temp2 = 0.; + ix = kx; + iy = ky; + i__2 = kk + j - 2; + for (k = kk; k <= i__2; ++k) { + y[iy] += temp1 * ap[k]; + temp2 += ap[k] * x[ix]; + ix += *incx; + iy += *incy; + /* L70: */ + } + y[jy] = y[jy] + temp1 * ap[kk + j - 1] + *alpha * temp2; + jx += *incx; + jy += *incy; + kk += j; + /* L80: */ + } } + } else { + /* Form y when AP contains the lower triangle. */ -/* Start the operations. In this version the elements of the array AP */ -/* are accessed sequentially with one pass through AP. */ - -/* First form y := beta*y. */ - - if (*beta != 1.) { - if (*incy == 1) { - if (*beta == 0.) { - i__1 = *n; - for (i__ = 1; i__ <= i__1; ++i__) { - y[i__] = 0.; -/* L10: */ - } - } else { - i__1 = *n; - for (i__ = 1; i__ <= i__1; ++i__) { - y[i__] = *beta * y[i__]; -/* L20: */ - } - } - } else { - iy = ky; - if (*beta == 0.) { - i__1 = *n; - for (i__ = 1; i__ <= i__1; ++i__) { - y[iy] = 0.; - iy += *incy; -/* L30: */ - } - } else { - i__1 = *n; - for (i__ = 1; i__ <= i__1; ++i__) { - y[iy] = *beta * y[iy]; - iy += *incy; -/* L40: */ - } - } - } - } - if (*alpha == 0.) { - return 0; - } - kk = 1; - if (lsame_(uplo, "U", (ftnlen)1, (ftnlen)1)) { - -/* Form y when AP contains the upper triangle. */ - - if (*incx == 1 && *incy == 1) { - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - temp1 = *alpha * x[j]; - temp2 = 0.; - k = kk; - i__2 = j - 1; - for (i__ = 1; i__ <= i__2; ++i__) { - y[i__] += temp1 * ap[k]; - temp2 += ap[k] * x[i__]; - ++k; -/* L50: */ - } - y[j] = y[j] + temp1 * ap[kk + j - 1] + *alpha * temp2; - kk += j; -/* L60: */ - } - } else { - jx = kx; - jy = ky; - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - temp1 = *alpha * x[jx]; - temp2 = 0.; - ix = kx; - iy = ky; - i__2 = kk + j - 2; - for (k = kk; k <= i__2; ++k) { - y[iy] += temp1 * ap[k]; - temp2 += ap[k] * x[ix]; - ix += *incx; - iy += *incy; -/* L70: */ - } - y[jy] = y[jy] + temp1 * ap[kk + j - 1] + *alpha * temp2; - jx += *incx; - jy += *incy; - kk += j; -/* L80: */ - } - } + if (*incx == 1 && *incy == 1) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + temp1 = *alpha * x[j]; + temp2 = 0.; + y[j] += temp1 * ap[kk]; + k = kk + 1; + i__2 = *n; + for (i__ = j + 1; i__ <= i__2; ++i__) { + y[i__] += temp1 * ap[k]; + temp2 += ap[k] * x[i__]; + ++k; + /* L90: */ + } + y[j] += *alpha * temp2; + kk += *n - j + 1; + /* L100: */ + } } else { - -/* Form y when AP contains the lower triangle. */ - - if (*incx == 1 && *incy == 1) { - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - temp1 = *alpha * x[j]; - temp2 = 0.; - y[j] += temp1 * ap[kk]; - k = kk + 1; - i__2 = *n; - for (i__ = j + 1; i__ <= i__2; ++i__) { - y[i__] += temp1 * ap[k]; - temp2 += ap[k] * x[i__]; - ++k; -/* L90: */ - } - y[j] += *alpha * temp2; - kk += *n - j + 1; -/* L100: */ - } - } else { - jx = kx; - jy = ky; - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - temp1 = *alpha * x[jx]; - temp2 = 0.; - y[jy] += temp1 * ap[kk]; - ix = jx; - iy = jy; - i__2 = kk + *n - j; - for (k = kk + 1; k <= i__2; ++k) { - ix += *incx; - iy += *incy; - y[iy] += temp1 * ap[k]; - temp2 += ap[k] * x[ix]; -/* L110: */ - } - y[jy] += *alpha * temp2; - jx += *incx; - jy += *incy; - kk += *n - j + 1; -/* L120: */ - } - } + jx = kx; + jy = ky; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + temp1 = *alpha * x[jx]; + temp2 = 0.; + y[jy] += temp1 * ap[kk]; + ix = jx; + iy = jy; + i__2 = kk + *n - j; + for (k = kk + 1; k <= i__2; ++k) { + ix += *incx; + iy += *incy; + y[iy] += temp1 * ap[k]; + temp2 += ap[k] * x[ix]; + /* L110: */ + } + y[jy] += *alpha * temp2; + jx += *incx; + jy += *incy; + kk += *n - j + 1; + /* L120: */ + } } + } - return 0; - -/* End of DSPMV . */ + /* End of DSPMV . */ } /* dspmv_ */ -
diff --git a/blas/f2c/dtbmv.c b/blas/f2c/dtbmv.c index aa67d19..92a0aec 100644 --- a/blas/f2c/dtbmv.c +++ b/blas/f2c/dtbmv.c
@@ -1,428 +1,417 @@ /* dtbmv.f -- translated by f2c (version 20100827). You must link the resulting object file with libf2c: - on Microsoft Windows system, link with libf2c.lib; - on Linux or Unix systems, link with .../path/to/libf2c.a -lm - or, if you install libf2c.a in a standard place, with -lf2c -lm - -- in that order, at the end of the command line, as in - cc *.o -lf2c -lm - Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., - http://www.netlib.org/f2c/libf2c.zip + http://www.netlib.org/f2c/libf2c.zip */ #include "datatypes.h" -/* Subroutine */ int dtbmv_(char *uplo, char *trans, char *diag, integer *n, - integer *k, doublereal *a, integer *lda, doublereal *x, integer *incx, - ftnlen uplo_len, ftnlen trans_len, ftnlen diag_len) -{ - /* System generated locals */ - integer a_dim1, a_offset, i__1, i__2, i__3, i__4; +/* Subroutine */ void dtbmv_(char *uplo, char *trans, char *diag, integer *n, integer *k, doublereal *a, integer *lda, + doublereal *x, integer *incx) { + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3, i__4; - /* Local variables */ - integer i__, j, l, ix, jx, kx, info; - doublereal temp; - extern logical lsame_(char *, char *, ftnlen, ftnlen); - integer kplus1; - extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen); - logical nounit; + /* Local variables */ + integer i__, j, l, ix, jx, kx, info; + doublereal temp; + extern logical lsame_(char *, char *); + integer kplus1; + extern /* Subroutine */ void xerbla_(const char *, integer *); + logical nounit; -/* .. Scalar Arguments .. */ -/* .. */ -/* .. Array Arguments .. */ -/* .. */ + /* .. Scalar Arguments .. */ + /* .. */ + /* .. Array Arguments .. */ + /* .. */ -/* Purpose */ -/* ======= */ + /* Purpose */ + /* ======= */ -/* DTBMV performs one of the matrix-vector operations */ + /* DTBMV performs one of the matrix-vector operations */ -/* x := A*x, or x := A'*x, */ + /* x := A*x, or x := A'*x, */ -/* where x is an n element vector and A is an n by n unit, or non-unit, */ -/* upper or lower triangular band matrix, with ( k + 1 ) diagonals. */ + /* where x is an n element vector and A is an n by n unit, or non-unit, */ + /* upper or lower triangular band matrix, with ( k + 1 ) diagonals. */ -/* Arguments */ -/* ========== */ + /* Arguments */ + /* ========== */ -/* UPLO - CHARACTER*1. */ -/* On entry, UPLO specifies whether the matrix is an upper or */ -/* lower triangular matrix as follows: */ + /* UPLO - CHARACTER*1. */ + /* On entry, UPLO specifies whether the matrix is an upper or */ + /* lower triangular matrix as follows: */ -/* UPLO = 'U' or 'u' A is an upper triangular matrix. */ + /* UPLO = 'U' or 'u' A is an upper triangular matrix. */ -/* UPLO = 'L' or 'l' A is a lower triangular matrix. */ + /* UPLO = 'L' or 'l' A is a lower triangular matrix. */ -/* Unchanged on exit. */ + /* Unchanged on exit. */ -/* TRANS - CHARACTER*1. */ -/* On entry, TRANS specifies the operation to be performed as */ -/* follows: */ + /* TRANS - CHARACTER*1. */ + /* On entry, TRANS specifies the operation to be performed as */ + /* follows: */ -/* TRANS = 'N' or 'n' x := A*x. */ + /* TRANS = 'N' or 'n' x := A*x. */ -/* TRANS = 'T' or 't' x := A'*x. */ + /* TRANS = 'T' or 't' x := A'*x. */ -/* TRANS = 'C' or 'c' x := A'*x. */ + /* TRANS = 'C' or 'c' x := A'*x. */ -/* Unchanged on exit. */ + /* Unchanged on exit. */ -/* DIAG - CHARACTER*1. */ -/* On entry, DIAG specifies whether or not A is unit */ -/* triangular as follows: */ + /* DIAG - CHARACTER*1. */ + /* On entry, DIAG specifies whether or not A is unit */ + /* triangular as follows: */ -/* DIAG = 'U' or 'u' A is assumed to be unit triangular. */ + /* DIAG = 'U' or 'u' A is assumed to be unit triangular. */ -/* DIAG = 'N' or 'n' A is not assumed to be unit */ -/* triangular. */ + /* DIAG = 'N' or 'n' A is not assumed to be unit */ + /* triangular. */ -/* Unchanged on exit. */ + /* Unchanged on exit. */ -/* N - INTEGER. */ -/* On entry, N specifies the order of the matrix A. */ -/* N must be at least zero. */ -/* Unchanged on exit. */ + /* N - INTEGER. */ + /* On entry, N specifies the order of the matrix A. */ + /* N must be at least zero. */ + /* Unchanged on exit. */ -/* K - INTEGER. */ -/* On entry with UPLO = 'U' or 'u', K specifies the number of */ -/* super-diagonals of the matrix A. */ -/* On entry with UPLO = 'L' or 'l', K specifies the number of */ -/* sub-diagonals of the matrix A. */ -/* K must satisfy 0 .le. K. */ -/* Unchanged on exit. */ + /* K - INTEGER. */ + /* On entry with UPLO = 'U' or 'u', K specifies the number of */ + /* super-diagonals of the matrix A. */ + /* On entry with UPLO = 'L' or 'l', K specifies the number of */ + /* sub-diagonals of the matrix A. */ + /* K must satisfy 0 .le. K. */ + /* Unchanged on exit. */ -/* A - DOUBLE PRECISION array of DIMENSION ( LDA, n ). */ -/* Before entry with UPLO = 'U' or 'u', the leading ( k + 1 ) */ -/* by n part of the array A must contain the upper triangular */ -/* band part of the matrix of coefficients, supplied column by */ -/* column, with the leading diagonal of the matrix in row */ -/* ( k + 1 ) of the array, the first super-diagonal starting at */ -/* position 2 in row k, and so on. The top left k by k triangle */ -/* of the array A is not referenced. */ -/* The following program segment will transfer an upper */ -/* triangular band matrix from conventional full matrix storage */ -/* to band storage: */ + /* A - DOUBLE PRECISION array of DIMENSION ( LDA, n ). */ + /* Before entry with UPLO = 'U' or 'u', the leading ( k + 1 ) */ + /* by n part of the array A must contain the upper triangular */ + /* band part of the matrix of coefficients, supplied column by */ + /* column, with the leading diagonal of the matrix in row */ + /* ( k + 1 ) of the array, the first super-diagonal starting at */ + /* position 2 in row k, and so on. The top left k by k triangle */ + /* of the array A is not referenced. */ + /* The following program segment will transfer an upper */ + /* triangular band matrix from conventional full matrix storage */ + /* to band storage: */ -/* DO 20, J = 1, N */ -/* M = K + 1 - J */ -/* DO 10, I = MAX( 1, J - K ), J */ -/* A( M + I, J ) = matrix( I, J ) */ -/* 10 CONTINUE */ -/* 20 CONTINUE */ + /* DO 20, J = 1, N */ + /* M = K + 1 - J */ + /* DO 10, I = MAX( 1, J - K ), J */ + /* A( M + I, J ) = matrix( I, J ) */ + /* 10 CONTINUE */ + /* 20 CONTINUE */ -/* Before entry with UPLO = 'L' or 'l', the leading ( k + 1 ) */ -/* by n part of the array A must contain the lower triangular */ -/* band part of the matrix of coefficients, supplied column by */ -/* column, with the leading diagonal of the matrix in row 1 of */ -/* the array, the first sub-diagonal starting at position 1 in */ -/* row 2, and so on. The bottom right k by k triangle of the */ -/* array A is not referenced. */ -/* The following program segment will transfer a lower */ -/* triangular band matrix from conventional full matrix storage */ -/* to band storage: */ + /* Before entry with UPLO = 'L' or 'l', the leading ( k + 1 ) */ + /* by n part of the array A must contain the lower triangular */ + /* band part of the matrix of coefficients, supplied column by */ + /* column, with the leading diagonal of the matrix in row 1 of */ + /* the array, the first sub-diagonal starting at position 1 in */ + /* row 2, and so on. The bottom right k by k triangle of the */ + /* array A is not referenced. */ + /* The following program segment will transfer a lower */ + /* triangular band matrix from conventional full matrix storage */ + /* to band storage: */ -/* DO 20, J = 1, N */ -/* M = 1 - J */ -/* DO 10, I = J, MIN( N, J + K ) */ -/* A( M + I, J ) = matrix( I, J ) */ -/* 10 CONTINUE */ -/* 20 CONTINUE */ + /* DO 20, J = 1, N */ + /* M = 1 - J */ + /* DO 10, I = J, MIN( N, J + K ) */ + /* A( M + I, J ) = matrix( I, J ) */ + /* 10 CONTINUE */ + /* 20 CONTINUE */ -/* Note that when DIAG = 'U' or 'u' the elements of the array A */ -/* corresponding to the diagonal elements of the matrix are not */ -/* referenced, but are assumed to be unity. */ -/* Unchanged on exit. */ + /* Note that when DIAG = 'U' or 'u' the elements of the array A */ + /* corresponding to the diagonal elements of the matrix are not */ + /* referenced, but are assumed to be unity. */ + /* Unchanged on exit. */ -/* LDA - INTEGER. */ -/* On entry, LDA specifies the first dimension of A as declared */ -/* in the calling (sub) program. LDA must be at least */ -/* ( k + 1 ). */ -/* Unchanged on exit. */ + /* LDA - INTEGER. */ + /* On entry, LDA specifies the first dimension of A as declared */ + /* in the calling (sub) program. LDA must be at least */ + /* ( k + 1 ). */ + /* Unchanged on exit. */ -/* X - DOUBLE PRECISION array of dimension at least */ -/* ( 1 + ( n - 1 )*abs( INCX ) ). */ -/* Before entry, the incremented array X must contain the n */ -/* element vector x. On exit, X is overwritten with the */ -/* transformed vector x. */ + /* X - DOUBLE PRECISION array of dimension at least */ + /* ( 1 + ( n - 1 )*abs( INCX ) ). */ + /* Before entry, the incremented array X must contain the n */ + /* element vector x. On exit, X is overwritten with the */ + /* transformed vector x. */ -/* INCX - INTEGER. */ -/* On entry, INCX specifies the increment for the elements of */ -/* X. INCX must not be zero. */ -/* Unchanged on exit. */ + /* INCX - INTEGER. */ + /* On entry, INCX specifies the increment for the elements of */ + /* X. INCX must not be zero. */ + /* Unchanged on exit. */ -/* Further Details */ -/* =============== */ + /* Further Details */ + /* =============== */ -/* Level 2 Blas routine. */ + /* Level 2 Blas routine. */ -/* -- Written on 22-October-1986. */ -/* Jack Dongarra, Argonne National Lab. */ -/* Jeremy Du Croz, Nag Central Office. */ -/* Sven Hammarling, Nag Central Office. */ -/* Richard Hanson, Sandia National Labs. */ + /* -- Written on 22-October-1986. */ + /* Jack Dongarra, Argonne National Lab. */ + /* Jeremy Du Croz, Nag Central Office. */ + /* Sven Hammarling, Nag Central Office. */ + /* Richard Hanson, Sandia National Labs. */ -/* ===================================================================== */ + /* ===================================================================== */ -/* .. Parameters .. */ -/* .. */ -/* .. Local Scalars .. */ -/* .. */ -/* .. External Functions .. */ -/* .. */ -/* .. External Subroutines .. */ -/* .. */ -/* .. Intrinsic Functions .. */ -/* .. */ + /* .. Parameters .. */ + /* .. */ + /* .. Local Scalars .. */ + /* .. */ + /* .. External Functions .. */ + /* .. */ + /* .. External Subroutines .. */ + /* .. */ + /* .. Intrinsic Functions .. */ + /* .. */ -/* Test the input parameters. */ + /* Test the input parameters. */ - /* Parameter adjustments */ - a_dim1 = *lda; - a_offset = 1 + a_dim1; - a -= a_offset; - --x; + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --x; - /* Function Body */ - info = 0; - if (! lsame_(uplo, "U", (ftnlen)1, (ftnlen)1) && ! lsame_(uplo, "L", ( - ftnlen)1, (ftnlen)1)) { - info = 1; - } else if (! lsame_(trans, "N", (ftnlen)1, (ftnlen)1) && ! lsame_(trans, - "T", (ftnlen)1, (ftnlen)1) && ! lsame_(trans, "C", (ftnlen)1, ( - ftnlen)1)) { - info = 2; - } else if (! lsame_(diag, "U", (ftnlen)1, (ftnlen)1) && ! lsame_(diag, - "N", (ftnlen)1, (ftnlen)1)) { - info = 3; - } else if (*n < 0) { - info = 4; - } else if (*k < 0) { - info = 5; - } else if (*lda < *k + 1) { - info = 7; - } else if (*incx == 0) { - info = 9; - } - if (info != 0) { - xerbla_("DTBMV ", &info, (ftnlen)6); - return 0; - } + /* Function Body */ + info = 0; + if (!lsame_(uplo, "U") && !lsame_(uplo, "L")) { + info = 1; + } else if (!lsame_(trans, "N") && !lsame_(trans, "T") && !lsame_(trans, "C")) { + info = 2; + } else if (!lsame_(diag, "U") && !lsame_(diag, "N")) { + info = 3; + } else if (*n < 0) { + info = 4; + } else if (*k < 0) { + info = 5; + } else if (*lda < *k + 1) { + info = 7; + } else if (*incx == 0) { + info = 9; + } + if (info != 0) { + xerbla_("DTBMV ", &info); + return; + } -/* Quick return if possible. */ + /* Quick return if possible. */ - if (*n == 0) { - return 0; - } + if (*n == 0) { + return; + } - nounit = lsame_(diag, "N", (ftnlen)1, (ftnlen)1); + nounit = lsame_(diag, "N"); -/* Set up the start point in X if the increment is not unity. This */ -/* will be ( N - 1 )*INCX too small for descending loops. */ + /* Set up the start point in X if the increment is not unity. This */ + /* will be ( N - 1 )*INCX too small for descending loops. */ - if (*incx <= 0) { - kx = 1 - (*n - 1) * *incx; - } else if (*incx != 1) { - kx = 1; - } + if (*incx <= 0) { + kx = 1 - (*n - 1) * *incx; + } else if (*incx != 1) { + kx = 1; + } -/* Start the operations. In this version the elements of A are */ -/* accessed sequentially with one pass through A. */ + /* Start the operations. In this version the elements of A are */ + /* accessed sequentially with one pass through A. */ - if (lsame_(trans, "N", (ftnlen)1, (ftnlen)1)) { + if (lsame_(trans, "N")) { + /* Form x := A*x. */ -/* Form x := A*x. */ - - if (lsame_(uplo, "U", (ftnlen)1, (ftnlen)1)) { - kplus1 = *k + 1; - if (*incx == 1) { - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - if (x[j] != 0.) { - temp = x[j]; - l = kplus1 - j; -/* Computing MAX */ - i__2 = 1, i__3 = j - *k; - i__4 = j - 1; - for (i__ = max(i__2,i__3); i__ <= i__4; ++i__) { - x[i__] += temp * a[l + i__ + j * a_dim1]; -/* L10: */ - } - if (nounit) { - x[j] *= a[kplus1 + j * a_dim1]; - } - } -/* L20: */ - } - } else { - jx = kx; - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - if (x[jx] != 0.) { - temp = x[jx]; - ix = kx; - l = kplus1 - j; -/* Computing MAX */ - i__4 = 1, i__2 = j - *k; - i__3 = j - 1; - for (i__ = max(i__4,i__2); i__ <= i__3; ++i__) { - x[ix] += temp * a[l + i__ + j * a_dim1]; - ix += *incx; -/* L30: */ - } - if (nounit) { - x[jx] *= a[kplus1 + j * a_dim1]; - } - } - jx += *incx; - if (j > *k) { - kx += *incx; - } -/* L40: */ - } - } - } else { - if (*incx == 1) { - for (j = *n; j >= 1; --j) { - if (x[j] != 0.) { - temp = x[j]; - l = 1 - j; -/* Computing MIN */ - i__1 = *n, i__3 = j + *k; - i__4 = j + 1; - for (i__ = min(i__1,i__3); i__ >= i__4; --i__) { - x[i__] += temp * a[l + i__ + j * a_dim1]; -/* L50: */ - } - if (nounit) { - x[j] *= a[j * a_dim1 + 1]; - } - } -/* L60: */ - } - } else { - kx += (*n - 1) * *incx; - jx = kx; - for (j = *n; j >= 1; --j) { - if (x[jx] != 0.) { - temp = x[jx]; - ix = kx; - l = 1 - j; -/* Computing MIN */ - i__4 = *n, i__1 = j + *k; - i__3 = j + 1; - for (i__ = min(i__4,i__1); i__ >= i__3; --i__) { - x[ix] += temp * a[l + i__ + j * a_dim1]; - ix -= *incx; -/* L70: */ - } - if (nounit) { - x[jx] *= a[j * a_dim1 + 1]; - } - } - jx -= *incx; - if (*n - j >= *k) { - kx -= *incx; - } -/* L80: */ - } - } - } + if (lsame_(uplo, "U")) { + kplus1 = *k + 1; + if (*incx == 1) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + if (x[j] != 0.) { + temp = x[j]; + l = kplus1 - j; + /* Computing MAX */ + i__2 = 1, i__3 = j - *k; + i__4 = j - 1; + for (i__ = max(i__2, i__3); i__ <= i__4; ++i__) { + x[i__] += temp * a[l + i__ + j * a_dim1]; + /* L10: */ + } + if (nounit) { + x[j] *= a[kplus1 + j * a_dim1]; + } + } + /* L20: */ + } + } else { + jx = kx; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + if (x[jx] != 0.) { + temp = x[jx]; + ix = kx; + l = kplus1 - j; + /* Computing MAX */ + i__4 = 1, i__2 = j - *k; + i__3 = j - 1; + for (i__ = max(i__4, i__2); i__ <= i__3; ++i__) { + x[ix] += temp * a[l + i__ + j * a_dim1]; + ix += *incx; + /* L30: */ + } + if (nounit) { + x[jx] *= a[kplus1 + j * a_dim1]; + } + } + jx += *incx; + if (j > *k) { + kx += *incx; + } + /* L40: */ + } + } } else { - -/* Form x := A'*x. */ - - if (lsame_(uplo, "U", (ftnlen)1, (ftnlen)1)) { - kplus1 = *k + 1; - if (*incx == 1) { - for (j = *n; j >= 1; --j) { - temp = x[j]; - l = kplus1 - j; - if (nounit) { - temp *= a[kplus1 + j * a_dim1]; - } -/* Computing MAX */ - i__4 = 1, i__1 = j - *k; - i__3 = max(i__4,i__1); - for (i__ = j - 1; i__ >= i__3; --i__) { - temp += a[l + i__ + j * a_dim1] * x[i__]; -/* L90: */ - } - x[j] = temp; -/* L100: */ - } - } else { - kx += (*n - 1) * *incx; - jx = kx; - for (j = *n; j >= 1; --j) { - temp = x[jx]; - kx -= *incx; - ix = kx; - l = kplus1 - j; - if (nounit) { - temp *= a[kplus1 + j * a_dim1]; - } -/* Computing MAX */ - i__4 = 1, i__1 = j - *k; - i__3 = max(i__4,i__1); - for (i__ = j - 1; i__ >= i__3; --i__) { - temp += a[l + i__ + j * a_dim1] * x[ix]; - ix -= *incx; -/* L110: */ - } - x[jx] = temp; - jx -= *incx; -/* L120: */ - } - } - } else { - if (*incx == 1) { - i__3 = *n; - for (j = 1; j <= i__3; ++j) { - temp = x[j]; - l = 1 - j; - if (nounit) { - temp *= a[j * a_dim1 + 1]; - } -/* Computing MIN */ - i__1 = *n, i__2 = j + *k; - i__4 = min(i__1,i__2); - for (i__ = j + 1; i__ <= i__4; ++i__) { - temp += a[l + i__ + j * a_dim1] * x[i__]; -/* L130: */ - } - x[j] = temp; -/* L140: */ - } - } else { - jx = kx; - i__3 = *n; - for (j = 1; j <= i__3; ++j) { - temp = x[jx]; - kx += *incx; - ix = kx; - l = 1 - j; - if (nounit) { - temp *= a[j * a_dim1 + 1]; - } -/* Computing MIN */ - i__1 = *n, i__2 = j + *k; - i__4 = min(i__1,i__2); - for (i__ = j + 1; i__ <= i__4; ++i__) { - temp += a[l + i__ + j * a_dim1] * x[ix]; - ix += *incx; -/* L150: */ - } - x[jx] = temp; - jx += *incx; -/* L160: */ - } - } - } + if (*incx == 1) { + for (j = *n; j >= 1; --j) { + if (x[j] != 0.) { + temp = x[j]; + l = 1 - j; + /* Computing MIN */ + i__1 = *n, i__3 = j + *k; + i__4 = j + 1; + for (i__ = min(i__1, i__3); i__ >= i__4; --i__) { + x[i__] += temp * a[l + i__ + j * a_dim1]; + /* L50: */ + } + if (nounit) { + x[j] *= a[j * a_dim1 + 1]; + } + } + /* L60: */ + } + } else { + kx += (*n - 1) * *incx; + jx = kx; + for (j = *n; j >= 1; --j) { + if (x[jx] != 0.) { + temp = x[jx]; + ix = kx; + l = 1 - j; + /* Computing MIN */ + i__4 = *n, i__1 = j + *k; + i__3 = j + 1; + for (i__ = min(i__4, i__1); i__ >= i__3; --i__) { + x[ix] += temp * a[l + i__ + j * a_dim1]; + ix -= *incx; + /* L70: */ + } + if (nounit) { + x[jx] *= a[j * a_dim1 + 1]; + } + } + jx -= *incx; + if (*n - j >= *k) { + kx -= *incx; + } + /* L80: */ + } + } } + } else { + /* Form x := A'*x. */ - return 0; + if (lsame_(uplo, "U")) { + kplus1 = *k + 1; + if (*incx == 1) { + for (j = *n; j >= 1; --j) { + temp = x[j]; + l = kplus1 - j; + if (nounit) { + temp *= a[kplus1 + j * a_dim1]; + } + /* Computing MAX */ + i__4 = 1, i__1 = j - *k; + i__3 = max(i__4, i__1); + for (i__ = j - 1; i__ >= i__3; --i__) { + temp += a[l + i__ + j * a_dim1] * x[i__]; + /* L90: */ + } + x[j] = temp; + /* L100: */ + } + } else { + kx += (*n - 1) * *incx; + jx = kx; + for (j = *n; j >= 1; --j) { + temp = x[jx]; + kx -= *incx; + ix = kx; + l = kplus1 - j; + if (nounit) { + temp *= a[kplus1 + j * a_dim1]; + } + /* Computing MAX */ + i__4 = 1, i__1 = j - *k; + i__3 = max(i__4, i__1); + for (i__ = j - 1; i__ >= i__3; --i__) { + temp += a[l + i__ + j * a_dim1] * x[ix]; + ix -= *incx; + /* L110: */ + } + x[jx] = temp; + jx -= *incx; + /* L120: */ + } + } + } else { + if (*incx == 1) { + i__3 = *n; + for (j = 1; j <= i__3; ++j) { + temp = x[j]; + l = 1 - j; + if (nounit) { + temp *= a[j * a_dim1 + 1]; + } + /* Computing MIN */ + i__1 = *n, i__2 = j + *k; + i__4 = min(i__1, i__2); + for (i__ = j + 1; i__ <= i__4; ++i__) { + temp += a[l + i__ + j * a_dim1] * x[i__]; + /* L130: */ + } + x[j] = temp; + /* L140: */ + } + } else { + jx = kx; + i__3 = *n; + for (j = 1; j <= i__3; ++j) { + temp = x[jx]; + kx += *incx; + ix = kx; + l = 1 - j; + if (nounit) { + temp *= a[j * a_dim1 + 1]; + } + /* Computing MIN */ + i__1 = *n, i__2 = j + *k; + i__4 = min(i__1, i__2); + for (i__ = j + 1; i__ <= i__4; ++i__) { + temp += a[l + i__ + j * a_dim1] * x[ix]; + ix += *incx; + /* L150: */ + } + x[jx] = temp; + jx += *incx; + /* L160: */ + } + } + } + } -/* End of DTBMV . */ + /* End of DTBMV . */ } /* dtbmv_ */ -
diff --git a/blas/f2c/lsame.c b/blas/f2c/lsame.c index 46324d9..ad51ea1 100644 --- a/blas/f2c/lsame.c +++ b/blas/f2c/lsame.c
@@ -1,117 +1,109 @@ /* lsame.f -- translated by f2c (version 20100827). You must link the resulting object file with libf2c: - on Microsoft Windows system, link with libf2c.lib; - on Linux or Unix systems, link with .../path/to/libf2c.a -lm - or, if you install libf2c.a in a standard place, with -lf2c -lm - -- in that order, at the end of the command line, as in - cc *.o -lf2c -lm - Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., - http://www.netlib.org/f2c/libf2c.zip + http://www.netlib.org/f2c/libf2c.zip */ #include "datatypes.h" -logical lsame_(char *ca, char *cb, ftnlen ca_len, ftnlen cb_len) -{ - /* System generated locals */ - logical ret_val; +logical lsame_(char *ca, char *cb) { + /* System generated locals */ + logical ret_val; - /* Local variables */ - integer inta, intb, zcode; + /* Local variables */ + integer inta, intb, zcode; + /* -- LAPACK auxiliary routine (version 3.1) -- */ + /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ + /* November 2006 */ -/* -- LAPACK auxiliary routine (version 3.1) -- */ -/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ -/* November 2006 */ + /* .. Scalar Arguments .. */ + /* .. */ -/* .. Scalar Arguments .. */ -/* .. */ + /* Purpose */ + /* ======= */ -/* Purpose */ -/* ======= */ + /* LSAME returns .TRUE. if CA is the same letter as CB regardless of */ + /* case. */ -/* LSAME returns .TRUE. if CA is the same letter as CB regardless of */ -/* case. */ + /* Arguments */ + /* ========= */ -/* Arguments */ -/* ========= */ + /* CA (input) CHARACTER*1 */ -/* CA (input) CHARACTER*1 */ + /* CB (input) CHARACTER*1 */ + /* CA and CB specify the single characters to be compared. */ -/* CB (input) CHARACTER*1 */ -/* CA and CB specify the single characters to be compared. */ + /* ===================================================================== */ -/* ===================================================================== */ + /* .. Intrinsic Functions .. */ + /* .. */ + /* .. Local Scalars .. */ + /* .. */ -/* .. Intrinsic Functions .. */ -/* .. */ -/* .. Local Scalars .. */ -/* .. */ + /* Test if the characters are equal */ -/* Test if the characters are equal */ - - ret_val = *(unsigned char *)ca == *(unsigned char *)cb; - if (ret_val) { - return ret_val; - } - -/* Now test for equivalence if both characters are alphabetic. */ - - zcode = 'Z'; - -/* Use 'Z' rather than 'A' so that ASCII can be detected on Prime */ -/* machines, on which ICHAR returns a value with bit 8 set. */ -/* ICHAR('A') on Prime machines returns 193 which is the same as */ -/* ICHAR('A') on an EBCDIC machine. */ - - inta = *(unsigned char *)ca; - intb = *(unsigned char *)cb; - - if (zcode == 90 || zcode == 122) { - -/* ASCII is assumed - ZCODE is the ASCII code of either lower or */ -/* upper case 'Z'. */ - - if (inta >= 97 && inta <= 122) { - inta += -32; - } - if (intb >= 97 && intb <= 122) { - intb += -32; - } - - } else if (zcode == 233 || zcode == 169) { - -/* EBCDIC is assumed - ZCODE is the EBCDIC code of either lower or */ -/* upper case 'Z'. */ - - if ((inta >= 129 && inta <= 137) || (inta >= 145 && inta <= 153) || - (inta >= 162 && inta <= 169)) { - inta += 64; - } - if ((intb >= 129 && intb <= 137) || (intb >= 145 && intb <= 153) || - (intb >= 162 && intb <= 169)) { - intb += 64; - } - - } else if (zcode == 218 || zcode == 250) { - -/* ASCII is assumed, on Prime machines - ZCODE is the ASCII code */ -/* plus 128 of either lower or upper case 'Z'. */ - - if (inta >= 225 && inta <= 250) { - inta += -32; - } - if (intb >= 225 && intb <= 250) { - intb += -32; - } - } - ret_val = inta == intb; - -/* RETURN */ - -/* End of LSAME */ - + ret_val = *(unsigned char *)ca == *(unsigned char *)cb; + if (ret_val) { return ret_val; -} /* lsame_ */ + } + /* Now test for equivalence if both characters are alphabetic. */ + + zcode = 'Z'; + + /* Use 'Z' rather than 'A' so that ASCII can be detected on Prime */ + /* machines, on which ICHAR returns a value with bit 8 set. */ + /* ICHAR('A') on Prime machines returns 193 which is the same as */ + /* ICHAR('A') on an EBCDIC machine. */ + + inta = *(unsigned char *)ca; + intb = *(unsigned char *)cb; + + if (zcode == 90 || zcode == 122) { + /* ASCII is assumed - ZCODE is the ASCII code of either lower or */ + /* upper case 'Z'. */ + + if (inta >= 97 && inta <= 122) { + inta += -32; + } + if (intb >= 97 && intb <= 122) { + intb += -32; + } + + } else if (zcode == 233 || zcode == 169) { + /* EBCDIC is assumed - ZCODE is the EBCDIC code of either lower or */ + /* upper case 'Z'. */ + + if ((inta >= 129 && inta <= 137) || (inta >= 145 && inta <= 153) || (inta >= 162 && inta <= 169)) { + inta += 64; + } + if ((intb >= 129 && intb <= 137) || (intb >= 145 && intb <= 153) || (intb >= 162 && intb <= 169)) { + intb += 64; + } + + } else if (zcode == 218 || zcode == 250) { + /* ASCII is assumed, on Prime machines - ZCODE is the ASCII code */ + /* plus 128 of either lower or upper case 'Z'. */ + + if (inta >= 225 && inta <= 250) { + inta += -32; + } + if (intb >= 225 && intb <= 250) { + intb += -32; + } + } + ret_val = inta == intb; + + /* RETURN */ + + /* End of LSAME */ + + return ret_val; +} /* lsame_ */
diff --git a/blas/f2c/r_cnjg.c b/blas/f2c/r_cnjg.c deleted file mode 100644 index c08182f..0000000 --- a/blas/f2c/r_cnjg.c +++ /dev/null
@@ -1,6 +0,0 @@ -#include "datatypes.h" - -void r_cnjg(complex *r, complex *z) { - r->r = z->r; - r->i = -(z->i); -}
diff --git a/blas/f2c/srotm.c b/blas/f2c/srotm.c index bd5944a..410dd87 100644 --- a/blas/f2c/srotm.c +++ b/blas/f2c/srotm.c
@@ -1,216 +1,212 @@ /* srotm.f -- translated by f2c (version 20100827). You must link the resulting object file with libf2c: - on Microsoft Windows system, link with libf2c.lib; - on Linux or Unix systems, link with .../path/to/libf2c.a -lm - or, if you install libf2c.a in a standard place, with -lf2c -lm - -- in that order, at the end of the command line, as in - cc *.o -lf2c -lm - Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., - http://www.netlib.org/f2c/libf2c.zip + http://www.netlib.org/f2c/libf2c.zip */ #include "datatypes.h" -/* Subroutine */ int srotm_(integer *n, real *sx, integer *incx, real *sy, - integer *incy, real *sparam) -{ - /* Initialized data */ +/* Subroutine */ void srotm_(integer *n, real *sx, integer *incx, real *sy, integer *incy, real *sparam) { + /* Initialized data */ - static real zero = 0.f; - static real two = 2.f; + static real zero = 0.f; + static real two = 2.f; - /* System generated locals */ - integer i__1, i__2; + /* System generated locals */ + integer i__1, i__2; - /* Local variables */ - integer i__; - real w, z__; - integer kx, ky; - real sh11, sh12, sh21, sh22, sflag; - integer nsteps; + /* Local variables */ + integer i__; + real w, z__; + integer kx, ky; + real sh11, sh12, sh21, sh22, sflag; + integer nsteps; -/* .. Scalar Arguments .. */ -/* .. */ -/* .. Array Arguments .. */ -/* .. */ + /* .. Scalar Arguments .. */ + /* .. */ + /* .. Array Arguments .. */ + /* .. */ -/* Purpose */ -/* ======= */ + /* Purpose */ + /* ======= */ -/* APPLY THE MODIFIED GIVENS TRANSFORMATION, H, TO THE 2 BY N MATRIX */ + /* APPLY THE MODIFIED GIVENS TRANSFORMATION, H, TO THE 2 BY N MATRIX */ -/* (SX**T) , WHERE **T INDICATES TRANSPOSE. THE ELEMENTS OF SX ARE IN */ -/* (DX**T) */ + /* (SX**T) , WHERE **T INDICATES TRANSPOSE. THE ELEMENTS OF SX ARE IN */ + /* (DX**T) */ -/* SX(LX+I*INCX), I = 0 TO N-1, WHERE LX = 1 IF INCX .GE. 0, ELSE */ -/* LX = (-INCX)*N, AND SIMILARLY FOR SY USING USING LY AND INCY. */ -/* WITH SPARAM(1)=SFLAG, H HAS ONE OF THE FOLLOWING FORMS.. */ + /* SX(LX+I*INCX), I = 0 TO N-1, WHERE LX = 1 IF INCX .GE. 0, ELSE */ + /* LX = (-INCX)*N, AND SIMILARLY FOR SY USING USING LY AND INCY. */ + /* WITH SPARAM(1)=SFLAG, H HAS ONE OF THE FOLLOWING FORMS.. */ -/* SFLAG=-1.E0 SFLAG=0.E0 SFLAG=1.E0 SFLAG=-2.E0 */ + /* SFLAG=-1.E0 SFLAG=0.E0 SFLAG=1.E0 SFLAG=-2.E0 */ -/* (SH11 SH12) (1.E0 SH12) (SH11 1.E0) (1.E0 0.E0) */ -/* H=( ) ( ) ( ) ( ) */ -/* (SH21 SH22), (SH21 1.E0), (-1.E0 SH22), (0.E0 1.E0). */ -/* SEE SROTMG FOR A DESCRIPTION OF DATA STORAGE IN SPARAM. */ + /* (SH11 SH12) (1.E0 SH12) (SH11 1.E0) (1.E0 0.E0) */ + /* H=( ) ( ) ( ) ( ) */ + /* (SH21 SH22), (SH21 1.E0), (-1.E0 SH22), (0.E0 1.E0). */ + /* SEE SROTMG FOR A DESCRIPTION OF DATA STORAGE IN SPARAM. */ + /* Arguments */ + /* ========= */ -/* Arguments */ -/* ========= */ + /* N (input) INTEGER */ + /* number of elements in input vector(s) */ -/* N (input) INTEGER */ -/* number of elements in input vector(s) */ + /* SX (input/output) REAL array, dimension N */ + /* double precision vector with N elements */ -/* SX (input/output) REAL array, dimension N */ -/* double precision vector with N elements */ + /* INCX (input) INTEGER */ + /* storage spacing between elements of SX */ -/* INCX (input) INTEGER */ -/* storage spacing between elements of SX */ + /* SY (input/output) REAL array, dimension N */ + /* double precision vector with N elements */ -/* SY (input/output) REAL array, dimension N */ -/* double precision vector with N elements */ + /* INCY (input) INTEGER */ + /* storage spacing between elements of SY */ -/* INCY (input) INTEGER */ -/* storage spacing between elements of SY */ + /* SPARAM (input/output) REAL array, dimension 5 */ + /* SPARAM(1)=SFLAG */ + /* SPARAM(2)=SH11 */ + /* SPARAM(3)=SH21 */ + /* SPARAM(4)=SH12 */ + /* SPARAM(5)=SH22 */ -/* SPARAM (input/output) REAL array, dimension 5 */ -/* SPARAM(1)=SFLAG */ -/* SPARAM(2)=SH11 */ -/* SPARAM(3)=SH21 */ -/* SPARAM(4)=SH12 */ -/* SPARAM(5)=SH22 */ + /* ===================================================================== */ -/* ===================================================================== */ + /* .. Local Scalars .. */ + /* .. */ + /* .. Data statements .. */ + /* Parameter adjustments */ + --sparam; + --sy; + --sx; -/* .. Local Scalars .. */ -/* .. */ -/* .. Data statements .. */ - /* Parameter adjustments */ - --sparam; - --sy; - --sx; + /* Function Body */ + /* .. */ - /* Function Body */ -/* .. */ + sflag = sparam[1]; + if (*n <= 0 || sflag + two == zero) { + goto L140; + } + if (!(*incx == *incy && *incx > 0)) { + goto L70; + } - sflag = sparam[1]; - if (*n <= 0 || sflag + two == zero) { - goto L140; - } - if (! (*incx == *incy && *incx > 0)) { - goto L70; - } - - nsteps = *n * *incx; - if (sflag < 0.f) { - goto L50; - } else if (sflag == 0) { - goto L10; - } else { - goto L30; - } + nsteps = *n * *incx; + if (sflag < 0.f) { + goto L50; + } else if (sflag == 0) { + goto L10; + } else { + goto L30; + } L10: - sh12 = sparam[4]; - sh21 = sparam[3]; - i__1 = nsteps; - i__2 = *incx; - for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { - w = sx[i__]; - z__ = sy[i__]; - sx[i__] = w + z__ * sh12; - sy[i__] = w * sh21 + z__; -/* L20: */ - } - goto L140; + sh12 = sparam[4]; + sh21 = sparam[3]; + i__1 = nsteps; + i__2 = *incx; + for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { + w = sx[i__]; + z__ = sy[i__]; + sx[i__] = w + z__ * sh12; + sy[i__] = w * sh21 + z__; + /* L20: */ + } + goto L140; L30: - sh11 = sparam[2]; - sh22 = sparam[5]; - i__2 = nsteps; - i__1 = *incx; - for (i__ = 1; i__1 < 0 ? i__ >= i__2 : i__ <= i__2; i__ += i__1) { - w = sx[i__]; - z__ = sy[i__]; - sx[i__] = w * sh11 + z__; - sy[i__] = -w + sh22 * z__; -/* L40: */ - } - goto L140; + sh11 = sparam[2]; + sh22 = sparam[5]; + i__2 = nsteps; + i__1 = *incx; + for (i__ = 1; i__1 < 0 ? i__ >= i__2 : i__ <= i__2; i__ += i__1) { + w = sx[i__]; + z__ = sy[i__]; + sx[i__] = w * sh11 + z__; + sy[i__] = -w + sh22 * z__; + /* L40: */ + } + goto L140; L50: - sh11 = sparam[2]; - sh12 = sparam[4]; - sh21 = sparam[3]; - sh22 = sparam[5]; - i__1 = nsteps; - i__2 = *incx; - for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { - w = sx[i__]; - z__ = sy[i__]; - sx[i__] = w * sh11 + z__ * sh12; - sy[i__] = w * sh21 + z__ * sh22; -/* L60: */ - } - goto L140; + sh11 = sparam[2]; + sh12 = sparam[4]; + sh21 = sparam[3]; + sh22 = sparam[5]; + i__1 = nsteps; + i__2 = *incx; + for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { + w = sx[i__]; + z__ = sy[i__]; + sx[i__] = w * sh11 + z__ * sh12; + sy[i__] = w * sh21 + z__ * sh22; + /* L60: */ + } + goto L140; L70: - kx = 1; - ky = 1; - if (*incx < 0) { - kx = (1 - *n) * *incx + 1; - } - if (*incy < 0) { - ky = (1 - *n) * *incy + 1; - } + kx = 1; + ky = 1; + if (*incx < 0) { + kx = (1 - *n) * *incx + 1; + } + if (*incy < 0) { + ky = (1 - *n) * *incy + 1; + } - if (sflag < 0.f) { - goto L120; - } else if (sflag == 0) { - goto L80; - } else { - goto L100; - } + if (sflag < 0.f) { + goto L120; + } else if (sflag == 0) { + goto L80; + } else { + goto L100; + } L80: - sh12 = sparam[4]; - sh21 = sparam[3]; - i__2 = *n; - for (i__ = 1; i__ <= i__2; ++i__) { - w = sx[kx]; - z__ = sy[ky]; - sx[kx] = w + z__ * sh12; - sy[ky] = w * sh21 + z__; - kx += *incx; - ky += *incy; -/* L90: */ - } - goto L140; + sh12 = sparam[4]; + sh21 = sparam[3]; + i__2 = *n; + for (i__ = 1; i__ <= i__2; ++i__) { + w = sx[kx]; + z__ = sy[ky]; + sx[kx] = w + z__ * sh12; + sy[ky] = w * sh21 + z__; + kx += *incx; + ky += *incy; + /* L90: */ + } + goto L140; L100: - sh11 = sparam[2]; - sh22 = sparam[5]; - i__2 = *n; - for (i__ = 1; i__ <= i__2; ++i__) { - w = sx[kx]; - z__ = sy[ky]; - sx[kx] = w * sh11 + z__; - sy[ky] = -w + sh22 * z__; - kx += *incx; - ky += *incy; -/* L110: */ - } - goto L140; + sh11 = sparam[2]; + sh22 = sparam[5]; + i__2 = *n; + for (i__ = 1; i__ <= i__2; ++i__) { + w = sx[kx]; + z__ = sy[ky]; + sx[kx] = w * sh11 + z__; + sy[ky] = -w + sh22 * z__; + kx += *incx; + ky += *incy; + /* L110: */ + } + goto L140; L120: - sh11 = sparam[2]; - sh12 = sparam[4]; - sh21 = sparam[3]; - sh22 = sparam[5]; - i__2 = *n; - for (i__ = 1; i__ <= i__2; ++i__) { - w = sx[kx]; - z__ = sy[ky]; - sx[kx] = w * sh11 + z__ * sh12; - sy[ky] = w * sh21 + z__ * sh22; - kx += *incx; - ky += *incy; -/* L130: */ - } + sh11 = sparam[2]; + sh12 = sparam[4]; + sh21 = sparam[3]; + sh22 = sparam[5]; + i__2 = *n; + for (i__ = 1; i__ <= i__2; ++i__) { + w = sx[kx]; + z__ = sy[ky]; + sx[kx] = w * sh11 + z__ * sh12; + sy[ky] = w * sh21 + z__ * sh22; + kx += *incx; + ky += *incy; + /* L130: */ + } L140: - return 0; + return; } /* srotm_ */ -
diff --git a/blas/f2c/srotmg.c b/blas/f2c/srotmg.c index 75f789f..3a0f9f6 100644 --- a/blas/f2c/srotmg.c +++ b/blas/f2c/srotmg.c
@@ -1,295 +1,293 @@ /* srotmg.f -- translated by f2c (version 20100827). You must link the resulting object file with libf2c: - on Microsoft Windows system, link with libf2c.lib; - on Linux or Unix systems, link with .../path/to/libf2c.a -lm - or, if you install libf2c.a in a standard place, with -lf2c -lm - -- in that order, at the end of the command line, as in - cc *.o -lf2c -lm - Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., - http://www.netlib.org/f2c/libf2c.zip + http://www.netlib.org/f2c/libf2c.zip */ #include "datatypes.h" -/* Subroutine */ int srotmg_(real *sd1, real *sd2, real *sx1, real *sy1, real - *sparam) -{ - /* Initialized data */ +/* Subroutine */ void srotmg_(real *sd1, real *sd2, real *sx1, real *sy1, real *sparam) { + /* Initialized data */ - static real zero = 0.f; - static real one = 1.f; - static real two = 2.f; - static real gam = 4096.f; - static real gamsq = 16777200.f; - static real rgamsq = 5.96046e-8f; + static real zero = 0.f; + static real one = 1.f; + static real two = 2.f; + static real gam = 4096.f; + static real gamsq = 16777200.f; + static real rgamsq = 5.96046e-8f; - /* Format strings */ - static char fmt_120[] = ""; - static char fmt_150[] = ""; - static char fmt_180[] = ""; - static char fmt_210[] = ""; + /* Format strings */ + static char fmt_120[] = ""; + static char fmt_150[] = ""; + static char fmt_180[] = ""; + static char fmt_210[] = ""; - /* System generated locals */ - real r__1; + /* System generated locals */ + real r__1; - /* Local variables */ - real su, sp1, sp2, sq1, sq2, sh11, sh12, sh21, sh22; - integer igo; - real sflag, stemp; + /* Local variables */ + real su, sp1, sp2, sq1, sq2, sh11, sh12, sh21, sh22; + integer igo; + real sflag, stemp; - /* Assigned format variables */ - static char *igo_fmt; + /* Assigned format variables */ + static char *igo_fmt; + (void)igo_fmt; -/* .. Scalar Arguments .. */ -/* .. */ -/* .. Array Arguments .. */ -/* .. */ + /* .. Scalar Arguments .. */ + /* .. */ + /* .. Array Arguments .. */ + /* .. */ -/* Purpose */ -/* ======= */ + /* Purpose */ + /* ======= */ -/* CONSTRUCT THE MODIFIED GIVENS TRANSFORMATION MATRIX H WHICH ZEROS */ -/* THE SECOND COMPONENT OF THE 2-VECTOR (SQRT(SD1)*SX1,SQRT(SD2)* */ -/* SY2)**T. */ -/* WITH SPARAM(1)=SFLAG, H HAS ONE OF THE FOLLOWING FORMS.. */ + /* CONSTRUCT THE MODIFIED GIVENS TRANSFORMATION MATRIX H WHICH ZEROS */ + /* THE SECOND COMPONENT OF THE 2-VECTOR (SQRT(SD1)*SX1,SQRT(SD2)* */ + /* SY2)**T. */ + /* WITH SPARAM(1)=SFLAG, H HAS ONE OF THE FOLLOWING FORMS.. */ -/* SFLAG=-1.E0 SFLAG=0.E0 SFLAG=1.E0 SFLAG=-2.E0 */ + /* SFLAG=-1.E0 SFLAG=0.E0 SFLAG=1.E0 SFLAG=-2.E0 */ -/* (SH11 SH12) (1.E0 SH12) (SH11 1.E0) (1.E0 0.E0) */ -/* H=( ) ( ) ( ) ( ) */ -/* (SH21 SH22), (SH21 1.E0), (-1.E0 SH22), (0.E0 1.E0). */ -/* LOCATIONS 2-4 OF SPARAM CONTAIN SH11,SH21,SH12, AND SH22 */ -/* RESPECTIVELY. (VALUES OF 1.E0, -1.E0, OR 0.E0 IMPLIED BY THE */ -/* VALUE OF SPARAM(1) ARE NOT STORED IN SPARAM.) */ + /* (SH11 SH12) (1.E0 SH12) (SH11 1.E0) (1.E0 0.E0) */ + /* H=( ) ( ) ( ) ( ) */ + /* (SH21 SH22), (SH21 1.E0), (-1.E0 SH22), (0.E0 1.E0). */ + /* LOCATIONS 2-4 OF SPARAM CONTAIN SH11,SH21,SH12, AND SH22 */ + /* RESPECTIVELY. (VALUES OF 1.E0, -1.E0, OR 0.E0 IMPLIED BY THE */ + /* VALUE OF SPARAM(1) ARE NOT STORED IN SPARAM.) */ -/* THE VALUES OF GAMSQ AND RGAMSQ SET IN THE DATA STATEMENT MAY BE */ -/* INEXACT. THIS IS OK AS THEY ARE ONLY USED FOR TESTING THE SIZE */ -/* OF SD1 AND SD2. ALL ACTUAL SCALING OF DATA IS DONE USING GAM. */ + /* THE VALUES OF GAMSQ AND RGAMSQ SET IN THE DATA STATEMENT MAY BE */ + /* INEXACT. THIS IS OK AS THEY ARE ONLY USED FOR TESTING THE SIZE */ + /* OF SD1 AND SD2. ALL ACTUAL SCALING OF DATA IS DONE USING GAM. */ + /* Arguments */ + /* ========= */ -/* Arguments */ -/* ========= */ + /* SD1 (input/output) REAL */ + /* SD2 (input/output) REAL */ -/* SD1 (input/output) REAL */ + /* SX1 (input/output) REAL */ -/* SD2 (input/output) REAL */ + /* SY1 (input) REAL */ -/* SX1 (input/output) REAL */ + /* SPARAM (input/output) REAL array, dimension 5 */ + /* SPARAM(1)=SFLAG */ + /* SPARAM(2)=SH11 */ + /* SPARAM(3)=SH21 */ + /* SPARAM(4)=SH12 */ + /* SPARAM(5)=SH22 */ -/* SY1 (input) REAL */ + /* ===================================================================== */ + /* .. Local Scalars .. */ + /* .. */ + /* .. Intrinsic Functions .. */ + /* .. */ + /* .. Data statements .. */ -/* SPARAM (input/output) REAL array, dimension 5 */ -/* SPARAM(1)=SFLAG */ -/* SPARAM(2)=SH11 */ -/* SPARAM(3)=SH21 */ -/* SPARAM(4)=SH12 */ -/* SPARAM(5)=SH22 */ + /* Parameter adjustments */ + --sparam; -/* ===================================================================== */ - -/* .. Local Scalars .. */ -/* .. */ -/* .. Intrinsic Functions .. */ -/* .. */ -/* .. Data statements .. */ - - /* Parameter adjustments */ - --sparam; - - /* Function Body */ -/* .. */ - if (! (*sd1 < zero)) { - goto L10; - } -/* GO ZERO-H-D-AND-SX1.. */ - goto L60; + /* Function Body */ + /* .. */ + if (!(*sd1 < zero)) { + goto L10; + } + /* GO ZERO-H-D-AND-SX1.. */ + goto L60; L10: -/* CASE-SD1-NONNEGATIVE */ - sp2 = *sd2 * *sy1; - if (! (sp2 == zero)) { - goto L20; - } - sflag = -two; - goto L260; + /* CASE-SD1-NONNEGATIVE */ + sp2 = *sd2 * *sy1; + if (!(sp2 == zero)) { + goto L20; + } + sflag = -two; + goto L260; /* REGULAR-CASE.. */ L20: - sp1 = *sd1 * *sx1; - sq2 = sp2 * *sy1; - sq1 = sp1 * *sx1; + sp1 = *sd1 * *sx1; + sq2 = sp2 * *sy1; + sq1 = sp1 * *sx1; - if (! (dabs(sq1) > dabs(sq2))) { - goto L40; - } - sh21 = -(*sy1) / *sx1; - sh12 = sp2 / sp1; + if (!(dabs(sq1) > dabs(sq2))) { + goto L40; + } + sh21 = -(*sy1) / *sx1; + sh12 = sp2 / sp1; - su = one - sh12 * sh21; + su = one - sh12 * sh21; - if (! (su <= zero)) { - goto L30; - } -/* GO ZERO-H-D-AND-SX1.. */ - goto L60; + if (!(su <= zero)) { + goto L30; + } + /* GO ZERO-H-D-AND-SX1.. */ + goto L60; L30: - sflag = zero; - *sd1 /= su; - *sd2 /= su; - *sx1 *= su; -/* GO SCALE-CHECK.. */ - goto L100; + sflag = zero; + *sd1 /= su; + *sd2 /= su; + *sx1 *= su; + /* GO SCALE-CHECK.. */ + goto L100; L40: - if (! (sq2 < zero)) { - goto L50; - } -/* GO ZERO-H-D-AND-SX1.. */ - goto L60; + if (!(sq2 < zero)) { + goto L50; + } + /* GO ZERO-H-D-AND-SX1.. */ + goto L60; L50: - sflag = one; - sh11 = sp1 / sp2; - sh22 = *sx1 / *sy1; - su = one + sh11 * sh22; - stemp = *sd2 / su; - *sd2 = *sd1 / su; - *sd1 = stemp; - *sx1 = *sy1 * su; -/* GO SCALE-CHECK */ - goto L100; + sflag = one; + sh11 = sp1 / sp2; + sh22 = *sx1 / *sy1; + su = one + sh11 * sh22; + stemp = *sd2 / su; + *sd2 = *sd1 / su; + *sd1 = stemp; + *sx1 = *sy1 * su; + /* GO SCALE-CHECK */ + goto L100; /* PROCEDURE..ZERO-H-D-AND-SX1.. */ L60: - sflag = -one; - sh11 = zero; - sh12 = zero; - sh21 = zero; - sh22 = zero; + sflag = -one; + sh11 = zero; + sh12 = zero; + sh21 = zero; + sh22 = zero; - *sd1 = zero; - *sd2 = zero; - *sx1 = zero; -/* RETURN.. */ - goto L220; + *sd1 = zero; + *sd2 = zero; + *sx1 = zero; + /* RETURN.. */ + goto L220; /* PROCEDURE..FIX-H.. */ L70: - if (! (sflag >= zero)) { - goto L90; - } - - if (! (sflag == zero)) { - goto L80; - } - sh11 = one; - sh22 = one; - sflag = -one; + if (!(sflag >= zero)) { goto L90; + } + + if (!(sflag == zero)) { + goto L80; + } + sh11 = one; + sh22 = one; + sflag = -one; + goto L90; L80: - sh21 = -one; - sh12 = one; - sflag = -one; + sh21 = -one; + sh12 = one; + sflag = -one; L90: - switch (igo) { - case 0: goto L120; - case 1: goto L150; - case 2: goto L180; - case 3: goto L210; - } + switch (igo) { + case 0: + goto L120; + case 1: + goto L150; + case 2: + goto L180; + case 3: + goto L210; + } /* PROCEDURE..SCALE-CHECK */ L100: L110: - if (! (*sd1 <= rgamsq)) { - goto L130; - } - if (*sd1 == zero) { - goto L160; - } - igo = 0; - igo_fmt = fmt_120; -/* FIX-H.. */ - goto L70; + if (!(*sd1 <= rgamsq)) { + goto L130; + } + if (*sd1 == zero) { + goto L160; + } + igo = 0; + igo_fmt = fmt_120; + /* FIX-H.. */ + goto L70; L120: -/* Computing 2nd power */ - r__1 = gam; - *sd1 *= r__1 * r__1; - *sx1 /= gam; - sh11 /= gam; - sh12 /= gam; - goto L110; + /* Computing 2nd power */ + r__1 = gam; + *sd1 *= r__1 * r__1; + *sx1 /= gam; + sh11 /= gam; + sh12 /= gam; + goto L110; L130: L140: - if (! (*sd1 >= gamsq)) { - goto L160; - } - igo = 1; - igo_fmt = fmt_150; -/* FIX-H.. */ - goto L70; + if (!(*sd1 >= gamsq)) { + goto L160; + } + igo = 1; + igo_fmt = fmt_150; + /* FIX-H.. */ + goto L70; L150: -/* Computing 2nd power */ - r__1 = gam; - *sd1 /= r__1 * r__1; - *sx1 *= gam; - sh11 *= gam; - sh12 *= gam; - goto L140; + /* Computing 2nd power */ + r__1 = gam; + *sd1 /= r__1 * r__1; + *sx1 *= gam; + sh11 *= gam; + sh12 *= gam; + goto L140; L160: L170: - if (! (dabs(*sd2) <= rgamsq)) { - goto L190; - } - if (*sd2 == zero) { - goto L220; - } - igo = 2; - igo_fmt = fmt_180; -/* FIX-H.. */ - goto L70; + if (!(dabs(*sd2) <= rgamsq)) { + goto L190; + } + if (*sd2 == zero) { + goto L220; + } + igo = 2; + igo_fmt = fmt_180; + /* FIX-H.. */ + goto L70; L180: -/* Computing 2nd power */ - r__1 = gam; - *sd2 *= r__1 * r__1; - sh21 /= gam; - sh22 /= gam; - goto L170; + /* Computing 2nd power */ + r__1 = gam; + *sd2 *= r__1 * r__1; + sh21 /= gam; + sh22 /= gam; + goto L170; L190: L200: - if (! (dabs(*sd2) >= gamsq)) { - goto L220; - } - igo = 3; - igo_fmt = fmt_210; -/* FIX-H.. */ - goto L70; + if (!(dabs(*sd2) >= gamsq)) { + goto L220; + } + igo = 3; + igo_fmt = fmt_210; + /* FIX-H.. */ + goto L70; L210: -/* Computing 2nd power */ - r__1 = gam; - *sd2 /= r__1 * r__1; - sh21 *= gam; - sh22 *= gam; - goto L200; + /* Computing 2nd power */ + r__1 = gam; + *sd2 /= r__1 * r__1; + sh21 *= gam; + sh22 *= gam; + goto L200; L220: - if (sflag < 0.f) { - goto L250; - } else if (sflag == 0) { - goto L230; - } else { - goto L240; - } + if (sflag < 0.f) { + goto L250; + } else if (sflag == 0) { + goto L230; + } else { + goto L240; + } L230: - sparam[3] = sh21; - sparam[4] = sh12; - goto L260; + sparam[3] = sh21; + sparam[4] = sh12; + goto L260; L240: - sparam[2] = sh11; - sparam[5] = sh22; - goto L260; + sparam[2] = sh11; + sparam[5] = sh22; + goto L260; L250: - sparam[2] = sh11; - sparam[3] = sh21; - sparam[4] = sh12; - sparam[5] = sh22; + sparam[2] = sh11; + sparam[3] = sh21; + sparam[4] = sh12; + sparam[5] = sh22; L260: - sparam[1] = sflag; - return 0; + sparam[1] = sflag; } /* srotmg_ */ -
diff --git a/blas/f2c/ssbmv.c b/blas/f2c/ssbmv.c index 8599325..ac1d70a 100644 --- a/blas/f2c/ssbmv.c +++ b/blas/f2c/ssbmv.c
@@ -1,368 +1,359 @@ /* ssbmv.f -- translated by f2c (version 20100827). You must link the resulting object file with libf2c: - on Microsoft Windows system, link with libf2c.lib; - on Linux or Unix systems, link with .../path/to/libf2c.a -lm - or, if you install libf2c.a in a standard place, with -lf2c -lm - -- in that order, at the end of the command line, as in - cc *.o -lf2c -lm - Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., - http://www.netlib.org/f2c/libf2c.zip + http://www.netlib.org/f2c/libf2c.zip */ #include "datatypes.h" -/* Subroutine */ int ssbmv_(char *uplo, integer *n, integer *k, real *alpha, - real *a, integer *lda, real *x, integer *incx, real *beta, real *y, - integer *incy, ftnlen uplo_len) -{ - /* System generated locals */ - integer a_dim1, a_offset, i__1, i__2, i__3, i__4; +/* Subroutine */ void ssbmv_(char *uplo, integer *n, integer *k, real *alpha, real *a, integer *lda, real *x, + integer *incx, real *beta, real *y, integer *incy) { + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3, i__4; - /* Local variables */ - integer i__, j, l, ix, iy, jx, jy, kx, ky, info; - real temp1, temp2; - extern logical lsame_(char *, char *, ftnlen, ftnlen); - integer kplus1; - extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen); + /* Local variables */ + integer i__, j, l, ix, iy, jx, jy, kx, ky, info; + real temp1, temp2; + extern logical lsame_(char *, char *); + integer kplus1; + extern /* Subroutine */ void xerbla_(const char *, integer *); -/* .. Scalar Arguments .. */ -/* .. */ -/* .. Array Arguments .. */ -/* .. */ + /* .. Scalar Arguments .. */ + /* .. */ + /* .. Array Arguments .. */ + /* .. */ -/* Purpose */ -/* ======= */ + /* Purpose */ + /* ======= */ -/* SSBMV performs the matrix-vector operation */ + /* SSBMV performs the matrix-vector operation */ -/* y := alpha*A*x + beta*y, */ + /* y := alpha*A*x + beta*y, */ -/* where alpha and beta are scalars, x and y are n element vectors and */ -/* A is an n by n symmetric band matrix, with k super-diagonals. */ + /* where alpha and beta are scalars, x and y are n element vectors and */ + /* A is an n by n symmetric band matrix, with k super-diagonals. */ -/* Arguments */ -/* ========== */ + /* Arguments */ + /* ========== */ -/* UPLO - CHARACTER*1. */ -/* On entry, UPLO specifies whether the upper or lower */ -/* triangular part of the band matrix A is being supplied as */ -/* follows: */ + /* UPLO - CHARACTER*1. */ + /* On entry, UPLO specifies whether the upper or lower */ + /* triangular part of the band matrix A is being supplied as */ + /* follows: */ -/* UPLO = 'U' or 'u' The upper triangular part of A is */ -/* being supplied. */ + /* UPLO = 'U' or 'u' The upper triangular part of A is */ + /* being supplied. */ -/* UPLO = 'L' or 'l' The lower triangular part of A is */ -/* being supplied. */ + /* UPLO = 'L' or 'l' The lower triangular part of A is */ + /* being supplied. */ -/* Unchanged on exit. */ + /* Unchanged on exit. */ -/* N - INTEGER. */ -/* On entry, N specifies the order of the matrix A. */ -/* N must be at least zero. */ -/* Unchanged on exit. */ + /* N - INTEGER. */ + /* On entry, N specifies the order of the matrix A. */ + /* N must be at least zero. */ + /* Unchanged on exit. */ -/* K - INTEGER. */ -/* On entry, K specifies the number of super-diagonals of the */ -/* matrix A. K must satisfy 0 .le. K. */ -/* Unchanged on exit. */ + /* K - INTEGER. */ + /* On entry, K specifies the number of super-diagonals of the */ + /* matrix A. K must satisfy 0 .le. K. */ + /* Unchanged on exit. */ -/* ALPHA - REAL . */ -/* On entry, ALPHA specifies the scalar alpha. */ -/* Unchanged on exit. */ + /* ALPHA - REAL . */ + /* On entry, ALPHA specifies the scalar alpha. */ + /* Unchanged on exit. */ -/* A - REAL array of DIMENSION ( LDA, n ). */ -/* Before entry with UPLO = 'U' or 'u', the leading ( k + 1 ) */ -/* by n part of the array A must contain the upper triangular */ -/* band part of the symmetric matrix, supplied column by */ -/* column, with the leading diagonal of the matrix in row */ -/* ( k + 1 ) of the array, the first super-diagonal starting at */ -/* position 2 in row k, and so on. The top left k by k triangle */ -/* of the array A is not referenced. */ -/* The following program segment will transfer the upper */ -/* triangular part of a symmetric band matrix from conventional */ -/* full matrix storage to band storage: */ + /* A - REAL array of DIMENSION ( LDA, n ). */ + /* Before entry with UPLO = 'U' or 'u', the leading ( k + 1 ) */ + /* by n part of the array A must contain the upper triangular */ + /* band part of the symmetric matrix, supplied column by */ + /* column, with the leading diagonal of the matrix in row */ + /* ( k + 1 ) of the array, the first super-diagonal starting at */ + /* position 2 in row k, and so on. The top left k by k triangle */ + /* of the array A is not referenced. */ + /* The following program segment will transfer the upper */ + /* triangular part of a symmetric band matrix from conventional */ + /* full matrix storage to band storage: */ -/* DO 20, J = 1, N */ -/* M = K + 1 - J */ -/* DO 10, I = MAX( 1, J - K ), J */ -/* A( M + I, J ) = matrix( I, J ) */ -/* 10 CONTINUE */ -/* 20 CONTINUE */ + /* DO 20, J = 1, N */ + /* M = K + 1 - J */ + /* DO 10, I = MAX( 1, J - K ), J */ + /* A( M + I, J ) = matrix( I, J ) */ + /* 10 CONTINUE */ + /* 20 CONTINUE */ -/* Before entry with UPLO = 'L' or 'l', the leading ( k + 1 ) */ -/* by n part of the array A must contain the lower triangular */ -/* band part of the symmetric matrix, supplied column by */ -/* column, with the leading diagonal of the matrix in row 1 of */ -/* the array, the first sub-diagonal starting at position 1 in */ -/* row 2, and so on. The bottom right k by k triangle of the */ -/* array A is not referenced. */ -/* The following program segment will transfer the lower */ -/* triangular part of a symmetric band matrix from conventional */ -/* full matrix storage to band storage: */ + /* Before entry with UPLO = 'L' or 'l', the leading ( k + 1 ) */ + /* by n part of the array A must contain the lower triangular */ + /* band part of the symmetric matrix, supplied column by */ + /* column, with the leading diagonal of the matrix in row 1 of */ + /* the array, the first sub-diagonal starting at position 1 in */ + /* row 2, and so on. The bottom right k by k triangle of the */ + /* array A is not referenced. */ + /* The following program segment will transfer the lower */ + /* triangular part of a symmetric band matrix from conventional */ + /* full matrix storage to band storage: */ -/* DO 20, J = 1, N */ -/* M = 1 - J */ -/* DO 10, I = J, MIN( N, J + K ) */ -/* A( M + I, J ) = matrix( I, J ) */ -/* 10 CONTINUE */ -/* 20 CONTINUE */ + /* DO 20, J = 1, N */ + /* M = 1 - J */ + /* DO 10, I = J, MIN( N, J + K ) */ + /* A( M + I, J ) = matrix( I, J ) */ + /* 10 CONTINUE */ + /* 20 CONTINUE */ -/* Unchanged on exit. */ + /* Unchanged on exit. */ -/* LDA - INTEGER. */ -/* On entry, LDA specifies the first dimension of A as declared */ -/* in the calling (sub) program. LDA must be at least */ -/* ( k + 1 ). */ -/* Unchanged on exit. */ + /* LDA - INTEGER. */ + /* On entry, LDA specifies the first dimension of A as declared */ + /* in the calling (sub) program. LDA must be at least */ + /* ( k + 1 ). */ + /* Unchanged on exit. */ -/* X - REAL array of DIMENSION at least */ -/* ( 1 + ( n - 1 )*abs( INCX ) ). */ -/* Before entry, the incremented array X must contain the */ -/* vector x. */ -/* Unchanged on exit. */ + /* X - REAL array of DIMENSION at least */ + /* ( 1 + ( n - 1 )*abs( INCX ) ). */ + /* Before entry, the incremented array X must contain the */ + /* vector x. */ + /* Unchanged on exit. */ -/* INCX - INTEGER. */ -/* On entry, INCX specifies the increment for the elements of */ -/* X. INCX must not be zero. */ -/* Unchanged on exit. */ + /* INCX - INTEGER. */ + /* On entry, INCX specifies the increment for the elements of */ + /* X. INCX must not be zero. */ + /* Unchanged on exit. */ -/* BETA - REAL . */ -/* On entry, BETA specifies the scalar beta. */ -/* Unchanged on exit. */ + /* BETA - REAL . */ + /* On entry, BETA specifies the scalar beta. */ + /* Unchanged on exit. */ -/* Y - REAL array of DIMENSION at least */ -/* ( 1 + ( n - 1 )*abs( INCY ) ). */ -/* Before entry, the incremented array Y must contain the */ -/* vector y. On exit, Y is overwritten by the updated vector y. */ + /* Y - REAL array of DIMENSION at least */ + /* ( 1 + ( n - 1 )*abs( INCY ) ). */ + /* Before entry, the incremented array Y must contain the */ + /* vector y. On exit, Y is overwritten by the updated vector y. */ -/* INCY - INTEGER. */ -/* On entry, INCY specifies the increment for the elements of */ -/* Y. INCY must not be zero. */ -/* Unchanged on exit. */ + /* INCY - INTEGER. */ + /* On entry, INCY specifies the increment for the elements of */ + /* Y. INCY must not be zero. */ + /* Unchanged on exit. */ -/* Further Details */ -/* =============== */ + /* Further Details */ + /* =============== */ -/* Level 2 Blas routine. */ + /* Level 2 Blas routine. */ -/* -- Written on 22-October-1986. */ -/* Jack Dongarra, Argonne National Lab. */ -/* Jeremy Du Croz, Nag Central Office. */ -/* Sven Hammarling, Nag Central Office. */ -/* Richard Hanson, Sandia National Labs. */ + /* -- Written on 22-October-1986. */ + /* Jack Dongarra, Argonne National Lab. */ + /* Jeremy Du Croz, Nag Central Office. */ + /* Sven Hammarling, Nag Central Office. */ + /* Richard Hanson, Sandia National Labs. */ -/* ===================================================================== */ + /* ===================================================================== */ -/* .. Parameters .. */ -/* .. */ -/* .. Local Scalars .. */ -/* .. */ -/* .. External Functions .. */ -/* .. */ -/* .. External Subroutines .. */ -/* .. */ -/* .. Intrinsic Functions .. */ -/* .. */ + /* .. Parameters .. */ + /* .. */ + /* .. Local Scalars .. */ + /* .. */ + /* .. External Functions .. */ + /* .. */ + /* .. External Subroutines .. */ + /* .. */ + /* .. Intrinsic Functions .. */ + /* .. */ -/* Test the input parameters. */ + /* Test the input parameters. */ - /* Parameter adjustments */ - a_dim1 = *lda; - a_offset = 1 + a_dim1; - a -= a_offset; - --x; - --y; + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --x; + --y; - /* Function Body */ - info = 0; - if (! lsame_(uplo, "U", (ftnlen)1, (ftnlen)1) && ! lsame_(uplo, "L", ( - ftnlen)1, (ftnlen)1)) { - info = 1; - } else if (*n < 0) { - info = 2; - } else if (*k < 0) { - info = 3; - } else if (*lda < *k + 1) { - info = 6; - } else if (*incx == 0) { - info = 8; - } else if (*incy == 0) { - info = 11; - } - if (info != 0) { - xerbla_("SSBMV ", &info, (ftnlen)6); - return 0; - } + /* Function Body */ + info = 0; + if (!lsame_(uplo, "U") && !lsame_(uplo, "L")) { + info = 1; + } else if (*n < 0) { + info = 2; + } else if (*k < 0) { + info = 3; + } else if (*lda < *k + 1) { + info = 6; + } else if (*incx == 0) { + info = 8; + } else if (*incy == 0) { + info = 11; + } + if (info != 0) { + xerbla_("SSBMV ", &info); + return; + } -/* Quick return if possible. */ + /* Quick return if possible. */ - if (*n == 0 || (*alpha == 0.f && *beta == 1.f)) { - return 0; - } + if (*n == 0 || (*alpha == 0.f && *beta == 1.f)) { + return; + } -/* Set up the start points in X and Y. */ + /* Set up the start points in X and Y. */ - if (*incx > 0) { - kx = 1; + if (*incx > 0) { + kx = 1; + } else { + kx = 1 - (*n - 1) * *incx; + } + if (*incy > 0) { + ky = 1; + } else { + ky = 1 - (*n - 1) * *incy; + } + + /* Start the operations. In this version the elements of the array A */ + /* are accessed sequentially with one pass through A. */ + + /* First form y := beta*y. */ + + if (*beta != 1.f) { + if (*incy == 1) { + if (*beta == 0.f) { + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + y[i__] = 0.f; + /* L10: */ + } + } else { + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + y[i__] = *beta * y[i__]; + /* L20: */ + } + } } else { - kx = 1 - (*n - 1) * *incx; + iy = ky; + if (*beta == 0.f) { + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + y[iy] = 0.f; + iy += *incy; + /* L30: */ + } + } else { + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + y[iy] = *beta * y[iy]; + iy += *incy; + /* L40: */ + } + } } - if (*incy > 0) { - ky = 1; + } + if (*alpha == 0.f) { + return; + } + if (lsame_(uplo, "U")) { + /* Form y when upper triangle of A is stored. */ + + kplus1 = *k + 1; + if (*incx == 1 && *incy == 1) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + temp1 = *alpha * x[j]; + temp2 = 0.f; + l = kplus1 - j; + /* Computing MAX */ + i__2 = 1, i__3 = j - *k; + i__4 = j - 1; + for (i__ = max(i__2, i__3); i__ <= i__4; ++i__) { + y[i__] += temp1 * a[l + i__ + j * a_dim1]; + temp2 += a[l + i__ + j * a_dim1] * x[i__]; + /* L50: */ + } + y[j] = y[j] + temp1 * a[kplus1 + j * a_dim1] + *alpha * temp2; + /* L60: */ + } } else { - ky = 1 - (*n - 1) * *incy; + jx = kx; + jy = ky; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + temp1 = *alpha * x[jx]; + temp2 = 0.f; + ix = kx; + iy = ky; + l = kplus1 - j; + /* Computing MAX */ + i__4 = 1, i__2 = j - *k; + i__3 = j - 1; + for (i__ = max(i__4, i__2); i__ <= i__3; ++i__) { + y[iy] += temp1 * a[l + i__ + j * a_dim1]; + temp2 += a[l + i__ + j * a_dim1] * x[ix]; + ix += *incx; + iy += *incy; + /* L70: */ + } + y[jy] = y[jy] + temp1 * a[kplus1 + j * a_dim1] + *alpha * temp2; + jx += *incx; + jy += *incy; + if (j > *k) { + kx += *incx; + ky += *incy; + } + /* L80: */ + } } + } else { + /* Form y when lower triangle of A is stored. */ -/* Start the operations. In this version the elements of the array A */ -/* are accessed sequentially with one pass through A. */ - -/* First form y := beta*y. */ - - if (*beta != 1.f) { - if (*incy == 1) { - if (*beta == 0.f) { - i__1 = *n; - for (i__ = 1; i__ <= i__1; ++i__) { - y[i__] = 0.f; -/* L10: */ - } - } else { - i__1 = *n; - for (i__ = 1; i__ <= i__1; ++i__) { - y[i__] = *beta * y[i__]; -/* L20: */ - } - } - } else { - iy = ky; - if (*beta == 0.f) { - i__1 = *n; - for (i__ = 1; i__ <= i__1; ++i__) { - y[iy] = 0.f; - iy += *incy; -/* L30: */ - } - } else { - i__1 = *n; - for (i__ = 1; i__ <= i__1; ++i__) { - y[iy] = *beta * y[iy]; - iy += *incy; -/* L40: */ - } - } - } - } - if (*alpha == 0.f) { - return 0; - } - if (lsame_(uplo, "U", (ftnlen)1, (ftnlen)1)) { - -/* Form y when upper triangle of A is stored. */ - - kplus1 = *k + 1; - if (*incx == 1 && *incy == 1) { - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - temp1 = *alpha * x[j]; - temp2 = 0.f; - l = kplus1 - j; -/* Computing MAX */ - i__2 = 1, i__3 = j - *k; - i__4 = j - 1; - for (i__ = max(i__2,i__3); i__ <= i__4; ++i__) { - y[i__] += temp1 * a[l + i__ + j * a_dim1]; - temp2 += a[l + i__ + j * a_dim1] * x[i__]; -/* L50: */ - } - y[j] = y[j] + temp1 * a[kplus1 + j * a_dim1] + *alpha * temp2; -/* L60: */ - } - } else { - jx = kx; - jy = ky; - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - temp1 = *alpha * x[jx]; - temp2 = 0.f; - ix = kx; - iy = ky; - l = kplus1 - j; -/* Computing MAX */ - i__4 = 1, i__2 = j - *k; - i__3 = j - 1; - for (i__ = max(i__4,i__2); i__ <= i__3; ++i__) { - y[iy] += temp1 * a[l + i__ + j * a_dim1]; - temp2 += a[l + i__ + j * a_dim1] * x[ix]; - ix += *incx; - iy += *incy; -/* L70: */ - } - y[jy] = y[jy] + temp1 * a[kplus1 + j * a_dim1] + *alpha * - temp2; - jx += *incx; - jy += *incy; - if (j > *k) { - kx += *incx; - ky += *incy; - } -/* L80: */ - } - } + if (*incx == 1 && *incy == 1) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + temp1 = *alpha * x[j]; + temp2 = 0.f; + y[j] += temp1 * a[j * a_dim1 + 1]; + l = 1 - j; + /* Computing MIN */ + i__4 = *n, i__2 = j + *k; + i__3 = min(i__4, i__2); + for (i__ = j + 1; i__ <= i__3; ++i__) { + y[i__] += temp1 * a[l + i__ + j * a_dim1]; + temp2 += a[l + i__ + j * a_dim1] * x[i__]; + /* L90: */ + } + y[j] += *alpha * temp2; + /* L100: */ + } } else { - -/* Form y when lower triangle of A is stored. */ - - if (*incx == 1 && *incy == 1) { - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - temp1 = *alpha * x[j]; - temp2 = 0.f; - y[j] += temp1 * a[j * a_dim1 + 1]; - l = 1 - j; -/* Computing MIN */ - i__4 = *n, i__2 = j + *k; - i__3 = min(i__4,i__2); - for (i__ = j + 1; i__ <= i__3; ++i__) { - y[i__] += temp1 * a[l + i__ + j * a_dim1]; - temp2 += a[l + i__ + j * a_dim1] * x[i__]; -/* L90: */ - } - y[j] += *alpha * temp2; -/* L100: */ - } - } else { - jx = kx; - jy = ky; - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - temp1 = *alpha * x[jx]; - temp2 = 0.f; - y[jy] += temp1 * a[j * a_dim1 + 1]; - l = 1 - j; - ix = jx; - iy = jy; -/* Computing MIN */ - i__4 = *n, i__2 = j + *k; - i__3 = min(i__4,i__2); - for (i__ = j + 1; i__ <= i__3; ++i__) { - ix += *incx; - iy += *incy; - y[iy] += temp1 * a[l + i__ + j * a_dim1]; - temp2 += a[l + i__ + j * a_dim1] * x[ix]; -/* L110: */ - } - y[jy] += *alpha * temp2; - jx += *incx; - jy += *incy; -/* L120: */ - } - } + jx = kx; + jy = ky; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + temp1 = *alpha * x[jx]; + temp2 = 0.f; + y[jy] += temp1 * a[j * a_dim1 + 1]; + l = 1 - j; + ix = jx; + iy = jy; + /* Computing MIN */ + i__4 = *n, i__2 = j + *k; + i__3 = min(i__4, i__2); + for (i__ = j + 1; i__ <= i__3; ++i__) { + ix += *incx; + iy += *incy; + y[iy] += temp1 * a[l + i__ + j * a_dim1]; + temp2 += a[l + i__ + j * a_dim1] * x[ix]; + /* L110: */ + } + y[jy] += *alpha * temp2; + jx += *incx; + jy += *incy; + /* L120: */ + } } + } - return 0; - -/* End of SSBMV . */ + /* End of SSBMV . */ } /* ssbmv_ */ -
diff --git a/blas/f2c/sspmv.c b/blas/f2c/sspmv.c index 47858ec..ea9db33 100644 --- a/blas/f2c/sspmv.c +++ b/blas/f2c/sspmv.c
@@ -1,316 +1,308 @@ /* sspmv.f -- translated by f2c (version 20100827). You must link the resulting object file with libf2c: - on Microsoft Windows system, link with libf2c.lib; - on Linux or Unix systems, link with .../path/to/libf2c.a -lm - or, if you install libf2c.a in a standard place, with -lf2c -lm - -- in that order, at the end of the command line, as in - cc *.o -lf2c -lm - Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., - http://www.netlib.org/f2c/libf2c.zip + http://www.netlib.org/f2c/libf2c.zip */ #include "datatypes.h" -/* Subroutine */ int sspmv_(char *uplo, integer *n, real *alpha, real *ap, - real *x, integer *incx, real *beta, real *y, integer *incy, ftnlen - uplo_len) -{ - /* System generated locals */ - integer i__1, i__2; +/* Subroutine */ void sspmv_(char *uplo, integer *n, real *alpha, real *ap, real *x, integer *incx, real *beta, real *y, + integer *incy) { + /* System generated locals */ + integer i__1, i__2; - /* Local variables */ - integer i__, j, k, kk, ix, iy, jx, jy, kx, ky, info; - real temp1, temp2; - extern logical lsame_(char *, char *, ftnlen, ftnlen); - extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen); + /* Local variables */ + integer i__, j, k, kk, ix, iy, jx, jy, kx, ky, info; + real temp1, temp2; + extern logical lsame_(char *, char *); + extern /* Subroutine */ void xerbla_(const char *, integer *); -/* .. Scalar Arguments .. */ -/* .. */ -/* .. Array Arguments .. */ -/* .. */ + /* .. Scalar Arguments .. */ + /* .. */ + /* .. Array Arguments .. */ + /* .. */ -/* Purpose */ -/* ======= */ + /* Purpose */ + /* ======= */ -/* SSPMV performs the matrix-vector operation */ + /* SSPMV performs the matrix-vector operation */ -/* y := alpha*A*x + beta*y, */ + /* y := alpha*A*x + beta*y, */ -/* where alpha and beta are scalars, x and y are n element vectors and */ -/* A is an n by n symmetric matrix, supplied in packed form. */ + /* where alpha and beta are scalars, x and y are n element vectors and */ + /* A is an n by n symmetric matrix, supplied in packed form. */ -/* Arguments */ -/* ========== */ + /* Arguments */ + /* ========== */ -/* UPLO - CHARACTER*1. */ -/* On entry, UPLO specifies whether the upper or lower */ -/* triangular part of the matrix A is supplied in the packed */ -/* array AP as follows: */ + /* UPLO - CHARACTER*1. */ + /* On entry, UPLO specifies whether the upper or lower */ + /* triangular part of the matrix A is supplied in the packed */ + /* array AP as follows: */ -/* UPLO = 'U' or 'u' The upper triangular part of A is */ -/* supplied in AP. */ + /* UPLO = 'U' or 'u' The upper triangular part of A is */ + /* supplied in AP. */ -/* UPLO = 'L' or 'l' The lower triangular part of A is */ -/* supplied in AP. */ + /* UPLO = 'L' or 'l' The lower triangular part of A is */ + /* supplied in AP. */ -/* Unchanged on exit. */ + /* Unchanged on exit. */ -/* N - INTEGER. */ -/* On entry, N specifies the order of the matrix A. */ -/* N must be at least zero. */ -/* Unchanged on exit. */ + /* N - INTEGER. */ + /* On entry, N specifies the order of the matrix A. */ + /* N must be at least zero. */ + /* Unchanged on exit. */ -/* ALPHA - REAL . */ -/* On entry, ALPHA specifies the scalar alpha. */ -/* Unchanged on exit. */ + /* ALPHA - REAL . */ + /* On entry, ALPHA specifies the scalar alpha. */ + /* Unchanged on exit. */ -/* AP - REAL array of DIMENSION at least */ -/* ( ( n*( n + 1 ) )/2 ). */ -/* Before entry with UPLO = 'U' or 'u', the array AP must */ -/* contain the upper triangular part of the symmetric matrix */ -/* packed sequentially, column by column, so that AP( 1 ) */ -/* contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 1, 2 ) */ -/* and a( 2, 2 ) respectively, and so on. */ -/* Before entry with UPLO = 'L' or 'l', the array AP must */ -/* contain the lower triangular part of the symmetric matrix */ -/* packed sequentially, column by column, so that AP( 1 ) */ -/* contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 2, 1 ) */ -/* and a( 3, 1 ) respectively, and so on. */ -/* Unchanged on exit. */ + /* AP - REAL array of DIMENSION at least */ + /* ( ( n*( n + 1 ) )/2 ). */ + /* Before entry with UPLO = 'U' or 'u', the array AP must */ + /* contain the upper triangular part of the symmetric matrix */ + /* packed sequentially, column by column, so that AP( 1 ) */ + /* contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 1, 2 ) */ + /* and a( 2, 2 ) respectively, and so on. */ + /* Before entry with UPLO = 'L' or 'l', the array AP must */ + /* contain the lower triangular part of the symmetric matrix */ + /* packed sequentially, column by column, so that AP( 1 ) */ + /* contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 2, 1 ) */ + /* and a( 3, 1 ) respectively, and so on. */ + /* Unchanged on exit. */ -/* X - REAL array of dimension at least */ -/* ( 1 + ( n - 1 )*abs( INCX ) ). */ -/* Before entry, the incremented array X must contain the n */ -/* element vector x. */ -/* Unchanged on exit. */ + /* X - REAL array of dimension at least */ + /* ( 1 + ( n - 1 )*abs( INCX ) ). */ + /* Before entry, the incremented array X must contain the n */ + /* element vector x. */ + /* Unchanged on exit. */ -/* INCX - INTEGER. */ -/* On entry, INCX specifies the increment for the elements of */ -/* X. INCX must not be zero. */ -/* Unchanged on exit. */ + /* INCX - INTEGER. */ + /* On entry, INCX specifies the increment for the elements of */ + /* X. INCX must not be zero. */ + /* Unchanged on exit. */ -/* BETA - REAL . */ -/* On entry, BETA specifies the scalar beta. When BETA is */ -/* supplied as zero then Y need not be set on input. */ -/* Unchanged on exit. */ + /* BETA - REAL . */ + /* On entry, BETA specifies the scalar beta. When BETA is */ + /* supplied as zero then Y need not be set on input. */ + /* Unchanged on exit. */ -/* Y - REAL array of dimension at least */ -/* ( 1 + ( n - 1 )*abs( INCY ) ). */ -/* Before entry, the incremented array Y must contain the n */ -/* element vector y. On exit, Y is overwritten by the updated */ -/* vector y. */ + /* Y - REAL array of dimension at least */ + /* ( 1 + ( n - 1 )*abs( INCY ) ). */ + /* Before entry, the incremented array Y must contain the n */ + /* element vector y. On exit, Y is overwritten by the updated */ + /* vector y. */ -/* INCY - INTEGER. */ -/* On entry, INCY specifies the increment for the elements of */ -/* Y. INCY must not be zero. */ -/* Unchanged on exit. */ + /* INCY - INTEGER. */ + /* On entry, INCY specifies the increment for the elements of */ + /* Y. INCY must not be zero. */ + /* Unchanged on exit. */ -/* Further Details */ -/* =============== */ + /* Further Details */ + /* =============== */ -/* Level 2 Blas routine. */ + /* Level 2 Blas routine. */ -/* -- Written on 22-October-1986. */ -/* Jack Dongarra, Argonne National Lab. */ -/* Jeremy Du Croz, Nag Central Office. */ -/* Sven Hammarling, Nag Central Office. */ -/* Richard Hanson, Sandia National Labs. */ + /* -- Written on 22-October-1986. */ + /* Jack Dongarra, Argonne National Lab. */ + /* Jeremy Du Croz, Nag Central Office. */ + /* Sven Hammarling, Nag Central Office. */ + /* Richard Hanson, Sandia National Labs. */ -/* ===================================================================== */ + /* ===================================================================== */ -/* .. Parameters .. */ -/* .. */ -/* .. Local Scalars .. */ -/* .. */ -/* .. External Functions .. */ -/* .. */ -/* .. External Subroutines .. */ -/* .. */ + /* .. Parameters .. */ + /* .. */ + /* .. Local Scalars .. */ + /* .. */ + /* .. External Functions .. */ + /* .. */ + /* .. External Subroutines .. */ + /* .. */ -/* Test the input parameters. */ + /* Test the input parameters. */ - /* Parameter adjustments */ - --y; - --x; - --ap; + /* Parameter adjustments */ + --y; + --x; + --ap; - /* Function Body */ - info = 0; - if (! lsame_(uplo, "U", (ftnlen)1, (ftnlen)1) && ! lsame_(uplo, "L", ( - ftnlen)1, (ftnlen)1)) { - info = 1; - } else if (*n < 0) { - info = 2; - } else if (*incx == 0) { - info = 6; - } else if (*incy == 0) { - info = 9; - } - if (info != 0) { - xerbla_("SSPMV ", &info, (ftnlen)6); - return 0; - } + /* Function Body */ + info = 0; + if (!lsame_(uplo, "U") && !lsame_(uplo, "L")) { + info = 1; + } else if (*n < 0) { + info = 2; + } else if (*incx == 0) { + info = 6; + } else if (*incy == 0) { + info = 9; + } + if (info != 0) { + xerbla_("SSPMV ", &info); + return; + } -/* Quick return if possible. */ + /* Quick return if possible. */ - if (*n == 0 || (*alpha == 0.f && *beta == 1.f)) { - return 0; - } + if (*n == 0 || (*alpha == 0.f && *beta == 1.f)) { + return; + } -/* Set up the start points in X and Y. */ + /* Set up the start points in X and Y. */ - if (*incx > 0) { - kx = 1; + if (*incx > 0) { + kx = 1; + } else { + kx = 1 - (*n - 1) * *incx; + } + if (*incy > 0) { + ky = 1; + } else { + ky = 1 - (*n - 1) * *incy; + } + + /* Start the operations. In this version the elements of the array AP */ + /* are accessed sequentially with one pass through AP. */ + + /* First form y := beta*y. */ + + if (*beta != 1.f) { + if (*incy == 1) { + if (*beta == 0.f) { + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + y[i__] = 0.f; + /* L10: */ + } + } else { + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + y[i__] = *beta * y[i__]; + /* L20: */ + } + } } else { - kx = 1 - (*n - 1) * *incx; + iy = ky; + if (*beta == 0.f) { + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + y[iy] = 0.f; + iy += *incy; + /* L30: */ + } + } else { + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + y[iy] = *beta * y[iy]; + iy += *incy; + /* L40: */ + } + } } - if (*incy > 0) { - ky = 1; + } + if (*alpha == 0.f) { + return; + } + kk = 1; + if (lsame_(uplo, "U")) { + /* Form y when AP contains the upper triangle. */ + + if (*incx == 1 && *incy == 1) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + temp1 = *alpha * x[j]; + temp2 = 0.f; + k = kk; + i__2 = j - 1; + for (i__ = 1; i__ <= i__2; ++i__) { + y[i__] += temp1 * ap[k]; + temp2 += ap[k] * x[i__]; + ++k; + /* L50: */ + } + y[j] = y[j] + temp1 * ap[kk + j - 1] + *alpha * temp2; + kk += j; + /* L60: */ + } } else { - ky = 1 - (*n - 1) * *incy; + jx = kx; + jy = ky; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + temp1 = *alpha * x[jx]; + temp2 = 0.f; + ix = kx; + iy = ky; + i__2 = kk + j - 2; + for (k = kk; k <= i__2; ++k) { + y[iy] += temp1 * ap[k]; + temp2 += ap[k] * x[ix]; + ix += *incx; + iy += *incy; + /* L70: */ + } + y[jy] = y[jy] + temp1 * ap[kk + j - 1] + *alpha * temp2; + jx += *incx; + jy += *incy; + kk += j; + /* L80: */ + } } + } else { + /* Form y when AP contains the lower triangle. */ -/* Start the operations. In this version the elements of the array AP */ -/* are accessed sequentially with one pass through AP. */ - -/* First form y := beta*y. */ - - if (*beta != 1.f) { - if (*incy == 1) { - if (*beta == 0.f) { - i__1 = *n; - for (i__ = 1; i__ <= i__1; ++i__) { - y[i__] = 0.f; -/* L10: */ - } - } else { - i__1 = *n; - for (i__ = 1; i__ <= i__1; ++i__) { - y[i__] = *beta * y[i__]; -/* L20: */ - } - } - } else { - iy = ky; - if (*beta == 0.f) { - i__1 = *n; - for (i__ = 1; i__ <= i__1; ++i__) { - y[iy] = 0.f; - iy += *incy; -/* L30: */ - } - } else { - i__1 = *n; - for (i__ = 1; i__ <= i__1; ++i__) { - y[iy] = *beta * y[iy]; - iy += *incy; -/* L40: */ - } - } - } - } - if (*alpha == 0.f) { - return 0; - } - kk = 1; - if (lsame_(uplo, "U", (ftnlen)1, (ftnlen)1)) { - -/* Form y when AP contains the upper triangle. */ - - if (*incx == 1 && *incy == 1) { - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - temp1 = *alpha * x[j]; - temp2 = 0.f; - k = kk; - i__2 = j - 1; - for (i__ = 1; i__ <= i__2; ++i__) { - y[i__] += temp1 * ap[k]; - temp2 += ap[k] * x[i__]; - ++k; -/* L50: */ - } - y[j] = y[j] + temp1 * ap[kk + j - 1] + *alpha * temp2; - kk += j; -/* L60: */ - } - } else { - jx = kx; - jy = ky; - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - temp1 = *alpha * x[jx]; - temp2 = 0.f; - ix = kx; - iy = ky; - i__2 = kk + j - 2; - for (k = kk; k <= i__2; ++k) { - y[iy] += temp1 * ap[k]; - temp2 += ap[k] * x[ix]; - ix += *incx; - iy += *incy; -/* L70: */ - } - y[jy] = y[jy] + temp1 * ap[kk + j - 1] + *alpha * temp2; - jx += *incx; - jy += *incy; - kk += j; -/* L80: */ - } - } + if (*incx == 1 && *incy == 1) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + temp1 = *alpha * x[j]; + temp2 = 0.f; + y[j] += temp1 * ap[kk]; + k = kk + 1; + i__2 = *n; + for (i__ = j + 1; i__ <= i__2; ++i__) { + y[i__] += temp1 * ap[k]; + temp2 += ap[k] * x[i__]; + ++k; + /* L90: */ + } + y[j] += *alpha * temp2; + kk += *n - j + 1; + /* L100: */ + } } else { - -/* Form y when AP contains the lower triangle. */ - - if (*incx == 1 && *incy == 1) { - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - temp1 = *alpha * x[j]; - temp2 = 0.f; - y[j] += temp1 * ap[kk]; - k = kk + 1; - i__2 = *n; - for (i__ = j + 1; i__ <= i__2; ++i__) { - y[i__] += temp1 * ap[k]; - temp2 += ap[k] * x[i__]; - ++k; -/* L90: */ - } - y[j] += *alpha * temp2; - kk += *n - j + 1; -/* L100: */ - } - } else { - jx = kx; - jy = ky; - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - temp1 = *alpha * x[jx]; - temp2 = 0.f; - y[jy] += temp1 * ap[kk]; - ix = jx; - iy = jy; - i__2 = kk + *n - j; - for (k = kk + 1; k <= i__2; ++k) { - ix += *incx; - iy += *incy; - y[iy] += temp1 * ap[k]; - temp2 += ap[k] * x[ix]; -/* L110: */ - } - y[jy] += *alpha * temp2; - jx += *incx; - jy += *incy; - kk += *n - j + 1; -/* L120: */ - } - } + jx = kx; + jy = ky; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + temp1 = *alpha * x[jx]; + temp2 = 0.f; + y[jy] += temp1 * ap[kk]; + ix = jx; + iy = jy; + i__2 = kk + *n - j; + for (k = kk + 1; k <= i__2; ++k) { + ix += *incx; + iy += *incy; + y[iy] += temp1 * ap[k]; + temp2 += ap[k] * x[ix]; + /* L110: */ + } + y[jy] += *alpha * temp2; + jx += *incx; + jy += *incy; + kk += *n - j + 1; + /* L120: */ + } } + } - return 0; - -/* End of SSPMV . */ + /* End of SSPMV . */ } /* sspmv_ */ -
diff --git a/blas/f2c/stbmv.c b/blas/f2c/stbmv.c index b5a68b5..43329f6 100644 --- a/blas/f2c/stbmv.c +++ b/blas/f2c/stbmv.c
@@ -1,428 +1,417 @@ /* stbmv.f -- translated by f2c (version 20100827). You must link the resulting object file with libf2c: - on Microsoft Windows system, link with libf2c.lib; - on Linux or Unix systems, link with .../path/to/libf2c.a -lm - or, if you install libf2c.a in a standard place, with -lf2c -lm - -- in that order, at the end of the command line, as in - cc *.o -lf2c -lm - Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., - http://www.netlib.org/f2c/libf2c.zip + http://www.netlib.org/f2c/libf2c.zip */ #include "datatypes.h" -/* Subroutine */ int stbmv_(char *uplo, char *trans, char *diag, integer *n, - integer *k, real *a, integer *lda, real *x, integer *incx, ftnlen - uplo_len, ftnlen trans_len, ftnlen diag_len) -{ - /* System generated locals */ - integer a_dim1, a_offset, i__1, i__2, i__3, i__4; +/* Subroutine */ void stbmv_(char *uplo, char *trans, char *diag, integer *n, integer *k, real *a, integer *lda, + real *x, integer *incx) { + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3, i__4; - /* Local variables */ - integer i__, j, l, ix, jx, kx, info; - real temp; - extern logical lsame_(char *, char *, ftnlen, ftnlen); - integer kplus1; - extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen); - logical nounit; + /* Local variables */ + integer i__, j, l, ix, jx, kx, info; + real temp; + extern logical lsame_(char *, char *); + integer kplus1; + extern /* Subroutine */ void xerbla_(const char *, integer *); + logical nounit; -/* .. Scalar Arguments .. */ -/* .. */ -/* .. Array Arguments .. */ -/* .. */ + /* .. Scalar Arguments .. */ + /* .. */ + /* .. Array Arguments .. */ + /* .. */ -/* Purpose */ -/* ======= */ + /* Purpose */ + /* ======= */ -/* STBMV performs one of the matrix-vector operations */ + /* STBMV performs one of the matrix-vector operations */ -/* x := A*x, or x := A'*x, */ + /* x := A*x, or x := A'*x, */ -/* where x is an n element vector and A is an n by n unit, or non-unit, */ -/* upper or lower triangular band matrix, with ( k + 1 ) diagonals. */ + /* where x is an n element vector and A is an n by n unit, or non-unit, */ + /* upper or lower triangular band matrix, with ( k + 1 ) diagonals. */ -/* Arguments */ -/* ========== */ + /* Arguments */ + /* ========== */ -/* UPLO - CHARACTER*1. */ -/* On entry, UPLO specifies whether the matrix is an upper or */ -/* lower triangular matrix as follows: */ + /* UPLO - CHARACTER*1. */ + /* On entry, UPLO specifies whether the matrix is an upper or */ + /* lower triangular matrix as follows: */ -/* UPLO = 'U' or 'u' A is an upper triangular matrix. */ + /* UPLO = 'U' or 'u' A is an upper triangular matrix. */ -/* UPLO = 'L' or 'l' A is a lower triangular matrix. */ + /* UPLO = 'L' or 'l' A is a lower triangular matrix. */ -/* Unchanged on exit. */ + /* Unchanged on exit. */ -/* TRANS - CHARACTER*1. */ -/* On entry, TRANS specifies the operation to be performed as */ -/* follows: */ + /* TRANS - CHARACTER*1. */ + /* On entry, TRANS specifies the operation to be performed as */ + /* follows: */ -/* TRANS = 'N' or 'n' x := A*x. */ + /* TRANS = 'N' or 'n' x := A*x. */ -/* TRANS = 'T' or 't' x := A'*x. */ + /* TRANS = 'T' or 't' x := A'*x. */ -/* TRANS = 'C' or 'c' x := A'*x. */ + /* TRANS = 'C' or 'c' x := A'*x. */ -/* Unchanged on exit. */ + /* Unchanged on exit. */ -/* DIAG - CHARACTER*1. */ -/* On entry, DIAG specifies whether or not A is unit */ -/* triangular as follows: */ + /* DIAG - CHARACTER*1. */ + /* On entry, DIAG specifies whether or not A is unit */ + /* triangular as follows: */ -/* DIAG = 'U' or 'u' A is assumed to be unit triangular. */ + /* DIAG = 'U' or 'u' A is assumed to be unit triangular. */ -/* DIAG = 'N' or 'n' A is not assumed to be unit */ -/* triangular. */ + /* DIAG = 'N' or 'n' A is not assumed to be unit */ + /* triangular. */ -/* Unchanged on exit. */ + /* Unchanged on exit. */ -/* N - INTEGER. */ -/* On entry, N specifies the order of the matrix A. */ -/* N must be at least zero. */ -/* Unchanged on exit. */ + /* N - INTEGER. */ + /* On entry, N specifies the order of the matrix A. */ + /* N must be at least zero. */ + /* Unchanged on exit. */ -/* K - INTEGER. */ -/* On entry with UPLO = 'U' or 'u', K specifies the number of */ -/* super-diagonals of the matrix A. */ -/* On entry with UPLO = 'L' or 'l', K specifies the number of */ -/* sub-diagonals of the matrix A. */ -/* K must satisfy 0 .le. K. */ -/* Unchanged on exit. */ + /* K - INTEGER. */ + /* On entry with UPLO = 'U' or 'u', K specifies the number of */ + /* super-diagonals of the matrix A. */ + /* On entry with UPLO = 'L' or 'l', K specifies the number of */ + /* sub-diagonals of the matrix A. */ + /* K must satisfy 0 .le. K. */ + /* Unchanged on exit. */ -/* A - REAL array of DIMENSION ( LDA, n ). */ -/* Before entry with UPLO = 'U' or 'u', the leading ( k + 1 ) */ -/* by n part of the array A must contain the upper triangular */ -/* band part of the matrix of coefficients, supplied column by */ -/* column, with the leading diagonal of the matrix in row */ -/* ( k + 1 ) of the array, the first super-diagonal starting at */ -/* position 2 in row k, and so on. The top left k by k triangle */ -/* of the array A is not referenced. */ -/* The following program segment will transfer an upper */ -/* triangular band matrix from conventional full matrix storage */ -/* to band storage: */ + /* A - REAL array of DIMENSION ( LDA, n ). */ + /* Before entry with UPLO = 'U' or 'u', the leading ( k + 1 ) */ + /* by n part of the array A must contain the upper triangular */ + /* band part of the matrix of coefficients, supplied column by */ + /* column, with the leading diagonal of the matrix in row */ + /* ( k + 1 ) of the array, the first super-diagonal starting at */ + /* position 2 in row k, and so on. The top left k by k triangle */ + /* of the array A is not referenced. */ + /* The following program segment will transfer an upper */ + /* triangular band matrix from conventional full matrix storage */ + /* to band storage: */ -/* DO 20, J = 1, N */ -/* M = K + 1 - J */ -/* DO 10, I = MAX( 1, J - K ), J */ -/* A( M + I, J ) = matrix( I, J ) */ -/* 10 CONTINUE */ -/* 20 CONTINUE */ + /* DO 20, J = 1, N */ + /* M = K + 1 - J */ + /* DO 10, I = MAX( 1, J - K ), J */ + /* A( M + I, J ) = matrix( I, J ) */ + /* 10 CONTINUE */ + /* 20 CONTINUE */ -/* Before entry with UPLO = 'L' or 'l', the leading ( k + 1 ) */ -/* by n part of the array A must contain the lower triangular */ -/* band part of the matrix of coefficients, supplied column by */ -/* column, with the leading diagonal of the matrix in row 1 of */ -/* the array, the first sub-diagonal starting at position 1 in */ -/* row 2, and so on. The bottom right k by k triangle of the */ -/* array A is not referenced. */ -/* The following program segment will transfer a lower */ -/* triangular band matrix from conventional full matrix storage */ -/* to band storage: */ + /* Before entry with UPLO = 'L' or 'l', the leading ( k + 1 ) */ + /* by n part of the array A must contain the lower triangular */ + /* band part of the matrix of coefficients, supplied column by */ + /* column, with the leading diagonal of the matrix in row 1 of */ + /* the array, the first sub-diagonal starting at position 1 in */ + /* row 2, and so on. The bottom right k by k triangle of the */ + /* array A is not referenced. */ + /* The following program segment will transfer a lower */ + /* triangular band matrix from conventional full matrix storage */ + /* to band storage: */ -/* DO 20, J = 1, N */ -/* M = 1 - J */ -/* DO 10, I = J, MIN( N, J + K ) */ -/* A( M + I, J ) = matrix( I, J ) */ -/* 10 CONTINUE */ -/* 20 CONTINUE */ + /* DO 20, J = 1, N */ + /* M = 1 - J */ + /* DO 10, I = J, MIN( N, J + K ) */ + /* A( M + I, J ) = matrix( I, J ) */ + /* 10 CONTINUE */ + /* 20 CONTINUE */ -/* Note that when DIAG = 'U' or 'u' the elements of the array A */ -/* corresponding to the diagonal elements of the matrix are not */ -/* referenced, but are assumed to be unity. */ -/* Unchanged on exit. */ + /* Note that when DIAG = 'U' or 'u' the elements of the array A */ + /* corresponding to the diagonal elements of the matrix are not */ + /* referenced, but are assumed to be unity. */ + /* Unchanged on exit. */ -/* LDA - INTEGER. */ -/* On entry, LDA specifies the first dimension of A as declared */ -/* in the calling (sub) program. LDA must be at least */ -/* ( k + 1 ). */ -/* Unchanged on exit. */ + /* LDA - INTEGER. */ + /* On entry, LDA specifies the first dimension of A as declared */ + /* in the calling (sub) program. LDA must be at least */ + /* ( k + 1 ). */ + /* Unchanged on exit. */ -/* X - REAL array of dimension at least */ -/* ( 1 + ( n - 1 )*abs( INCX ) ). */ -/* Before entry, the incremented array X must contain the n */ -/* element vector x. On exit, X is overwritten with the */ -/* transformed vector x. */ + /* X - REAL array of dimension at least */ + /* ( 1 + ( n - 1 )*abs( INCX ) ). */ + /* Before entry, the incremented array X must contain the n */ + /* element vector x. On exit, X is overwritten with the */ + /* transformed vector x. */ -/* INCX - INTEGER. */ -/* On entry, INCX specifies the increment for the elements of */ -/* X. INCX must not be zero. */ -/* Unchanged on exit. */ + /* INCX - INTEGER. */ + /* On entry, INCX specifies the increment for the elements of */ + /* X. INCX must not be zero. */ + /* Unchanged on exit. */ -/* Further Details */ -/* =============== */ + /* Further Details */ + /* =============== */ -/* Level 2 Blas routine. */ + /* Level 2 Blas routine. */ -/* -- Written on 22-October-1986. */ -/* Jack Dongarra, Argonne National Lab. */ -/* Jeremy Du Croz, Nag Central Office. */ -/* Sven Hammarling, Nag Central Office. */ -/* Richard Hanson, Sandia National Labs. */ + /* -- Written on 22-October-1986. */ + /* Jack Dongarra, Argonne National Lab. */ + /* Jeremy Du Croz, Nag Central Office. */ + /* Sven Hammarling, Nag Central Office. */ + /* Richard Hanson, Sandia National Labs. */ -/* ===================================================================== */ + /* ===================================================================== */ -/* .. Parameters .. */ -/* .. */ -/* .. Local Scalars .. */ -/* .. */ -/* .. External Functions .. */ -/* .. */ -/* .. External Subroutines .. */ -/* .. */ -/* .. Intrinsic Functions .. */ -/* .. */ + /* .. Parameters .. */ + /* .. */ + /* .. Local Scalars .. */ + /* .. */ + /* .. External Functions .. */ + /* .. */ + /* .. External Subroutines .. */ + /* .. */ + /* .. Intrinsic Functions .. */ + /* .. */ -/* Test the input parameters. */ + /* Test the input parameters. */ - /* Parameter adjustments */ - a_dim1 = *lda; - a_offset = 1 + a_dim1; - a -= a_offset; - --x; + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --x; - /* Function Body */ - info = 0; - if (! lsame_(uplo, "U", (ftnlen)1, (ftnlen)1) && ! lsame_(uplo, "L", ( - ftnlen)1, (ftnlen)1)) { - info = 1; - } else if (! lsame_(trans, "N", (ftnlen)1, (ftnlen)1) && ! lsame_(trans, - "T", (ftnlen)1, (ftnlen)1) && ! lsame_(trans, "C", (ftnlen)1, ( - ftnlen)1)) { - info = 2; - } else if (! lsame_(diag, "U", (ftnlen)1, (ftnlen)1) && ! lsame_(diag, - "N", (ftnlen)1, (ftnlen)1)) { - info = 3; - } else if (*n < 0) { - info = 4; - } else if (*k < 0) { - info = 5; - } else if (*lda < *k + 1) { - info = 7; - } else if (*incx == 0) { - info = 9; - } - if (info != 0) { - xerbla_("STBMV ", &info, (ftnlen)6); - return 0; - } + /* Function Body */ + info = 0; + if (!lsame_(uplo, "U") && !lsame_(uplo, "L")) { + info = 1; + } else if (!lsame_(trans, "N") && !lsame_(trans, "T") && !lsame_(trans, "C")) { + info = 2; + } else if (!lsame_(diag, "U") && !lsame_(diag, "N")) { + info = 3; + } else if (*n < 0) { + info = 4; + } else if (*k < 0) { + info = 5; + } else if (*lda < *k + 1) { + info = 7; + } else if (*incx == 0) { + info = 9; + } + if (info != 0) { + xerbla_("STBMV ", &info); + return; + } -/* Quick return if possible. */ + /* Quick return if possible. */ - if (*n == 0) { - return 0; - } + if (*n == 0) { + return; + } - nounit = lsame_(diag, "N", (ftnlen)1, (ftnlen)1); + nounit = lsame_(diag, "N"); -/* Set up the start point in X if the increment is not unity. This */ -/* will be ( N - 1 )*INCX too small for descending loops. */ + /* Set up the start point in X if the increment is not unity. This */ + /* will be ( N - 1 )*INCX too small for descending loops. */ - if (*incx <= 0) { - kx = 1 - (*n - 1) * *incx; - } else if (*incx != 1) { - kx = 1; - } + if (*incx <= 0) { + kx = 1 - (*n - 1) * *incx; + } else if (*incx != 1) { + kx = 1; + } -/* Start the operations. In this version the elements of A are */ -/* accessed sequentially with one pass through A. */ + /* Start the operations. In this version the elements of A are */ + /* accessed sequentially with one pass through A. */ - if (lsame_(trans, "N", (ftnlen)1, (ftnlen)1)) { + if (lsame_(trans, "N")) { + /* Form x := A*x. */ -/* Form x := A*x. */ - - if (lsame_(uplo, "U", (ftnlen)1, (ftnlen)1)) { - kplus1 = *k + 1; - if (*incx == 1) { - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - if (x[j] != 0.f) { - temp = x[j]; - l = kplus1 - j; -/* Computing MAX */ - i__2 = 1, i__3 = j - *k; - i__4 = j - 1; - for (i__ = max(i__2,i__3); i__ <= i__4; ++i__) { - x[i__] += temp * a[l + i__ + j * a_dim1]; -/* L10: */ - } - if (nounit) { - x[j] *= a[kplus1 + j * a_dim1]; - } - } -/* L20: */ - } - } else { - jx = kx; - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - if (x[jx] != 0.f) { - temp = x[jx]; - ix = kx; - l = kplus1 - j; -/* Computing MAX */ - i__4 = 1, i__2 = j - *k; - i__3 = j - 1; - for (i__ = max(i__4,i__2); i__ <= i__3; ++i__) { - x[ix] += temp * a[l + i__ + j * a_dim1]; - ix += *incx; -/* L30: */ - } - if (nounit) { - x[jx] *= a[kplus1 + j * a_dim1]; - } - } - jx += *incx; - if (j > *k) { - kx += *incx; - } -/* L40: */ - } - } - } else { - if (*incx == 1) { - for (j = *n; j >= 1; --j) { - if (x[j] != 0.f) { - temp = x[j]; - l = 1 - j; -/* Computing MIN */ - i__1 = *n, i__3 = j + *k; - i__4 = j + 1; - for (i__ = min(i__1,i__3); i__ >= i__4; --i__) { - x[i__] += temp * a[l + i__ + j * a_dim1]; -/* L50: */ - } - if (nounit) { - x[j] *= a[j * a_dim1 + 1]; - } - } -/* L60: */ - } - } else { - kx += (*n - 1) * *incx; - jx = kx; - for (j = *n; j >= 1; --j) { - if (x[jx] != 0.f) { - temp = x[jx]; - ix = kx; - l = 1 - j; -/* Computing MIN */ - i__4 = *n, i__1 = j + *k; - i__3 = j + 1; - for (i__ = min(i__4,i__1); i__ >= i__3; --i__) { - x[ix] += temp * a[l + i__ + j * a_dim1]; - ix -= *incx; -/* L70: */ - } - if (nounit) { - x[jx] *= a[j * a_dim1 + 1]; - } - } - jx -= *incx; - if (*n - j >= *k) { - kx -= *incx; - } -/* L80: */ - } - } - } + if (lsame_(uplo, "U")) { + kplus1 = *k + 1; + if (*incx == 1) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + if (x[j] != 0.f) { + temp = x[j]; + l = kplus1 - j; + /* Computing MAX */ + i__2 = 1, i__3 = j - *k; + i__4 = j - 1; + for (i__ = max(i__2, i__3); i__ <= i__4; ++i__) { + x[i__] += temp * a[l + i__ + j * a_dim1]; + /* L10: */ + } + if (nounit) { + x[j] *= a[kplus1 + j * a_dim1]; + } + } + /* L20: */ + } + } else { + jx = kx; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + if (x[jx] != 0.f) { + temp = x[jx]; + ix = kx; + l = kplus1 - j; + /* Computing MAX */ + i__4 = 1, i__2 = j - *k; + i__3 = j - 1; + for (i__ = max(i__4, i__2); i__ <= i__3; ++i__) { + x[ix] += temp * a[l + i__ + j * a_dim1]; + ix += *incx; + /* L30: */ + } + if (nounit) { + x[jx] *= a[kplus1 + j * a_dim1]; + } + } + jx += *incx; + if (j > *k) { + kx += *incx; + } + /* L40: */ + } + } } else { - -/* Form x := A'*x. */ - - if (lsame_(uplo, "U", (ftnlen)1, (ftnlen)1)) { - kplus1 = *k + 1; - if (*incx == 1) { - for (j = *n; j >= 1; --j) { - temp = x[j]; - l = kplus1 - j; - if (nounit) { - temp *= a[kplus1 + j * a_dim1]; - } -/* Computing MAX */ - i__4 = 1, i__1 = j - *k; - i__3 = max(i__4,i__1); - for (i__ = j - 1; i__ >= i__3; --i__) { - temp += a[l + i__ + j * a_dim1] * x[i__]; -/* L90: */ - } - x[j] = temp; -/* L100: */ - } - } else { - kx += (*n - 1) * *incx; - jx = kx; - for (j = *n; j >= 1; --j) { - temp = x[jx]; - kx -= *incx; - ix = kx; - l = kplus1 - j; - if (nounit) { - temp *= a[kplus1 + j * a_dim1]; - } -/* Computing MAX */ - i__4 = 1, i__1 = j - *k; - i__3 = max(i__4,i__1); - for (i__ = j - 1; i__ >= i__3; --i__) { - temp += a[l + i__ + j * a_dim1] * x[ix]; - ix -= *incx; -/* L110: */ - } - x[jx] = temp; - jx -= *incx; -/* L120: */ - } - } - } else { - if (*incx == 1) { - i__3 = *n; - for (j = 1; j <= i__3; ++j) { - temp = x[j]; - l = 1 - j; - if (nounit) { - temp *= a[j * a_dim1 + 1]; - } -/* Computing MIN */ - i__1 = *n, i__2 = j + *k; - i__4 = min(i__1,i__2); - for (i__ = j + 1; i__ <= i__4; ++i__) { - temp += a[l + i__ + j * a_dim1] * x[i__]; -/* L130: */ - } - x[j] = temp; -/* L140: */ - } - } else { - jx = kx; - i__3 = *n; - for (j = 1; j <= i__3; ++j) { - temp = x[jx]; - kx += *incx; - ix = kx; - l = 1 - j; - if (nounit) { - temp *= a[j * a_dim1 + 1]; - } -/* Computing MIN */ - i__1 = *n, i__2 = j + *k; - i__4 = min(i__1,i__2); - for (i__ = j + 1; i__ <= i__4; ++i__) { - temp += a[l + i__ + j * a_dim1] * x[ix]; - ix += *incx; -/* L150: */ - } - x[jx] = temp; - jx += *incx; -/* L160: */ - } - } - } + if (*incx == 1) { + for (j = *n; j >= 1; --j) { + if (x[j] != 0.f) { + temp = x[j]; + l = 1 - j; + /* Computing MIN */ + i__1 = *n, i__3 = j + *k; + i__4 = j + 1; + for (i__ = min(i__1, i__3); i__ >= i__4; --i__) { + x[i__] += temp * a[l + i__ + j * a_dim1]; + /* L50: */ + } + if (nounit) { + x[j] *= a[j * a_dim1 + 1]; + } + } + /* L60: */ + } + } else { + kx += (*n - 1) * *incx; + jx = kx; + for (j = *n; j >= 1; --j) { + if (x[jx] != 0.f) { + temp = x[jx]; + ix = kx; + l = 1 - j; + /* Computing MIN */ + i__4 = *n, i__1 = j + *k; + i__3 = j + 1; + for (i__ = min(i__4, i__1); i__ >= i__3; --i__) { + x[ix] += temp * a[l + i__ + j * a_dim1]; + ix -= *incx; + /* L70: */ + } + if (nounit) { + x[jx] *= a[j * a_dim1 + 1]; + } + } + jx -= *incx; + if (*n - j >= *k) { + kx -= *incx; + } + /* L80: */ + } + } } + } else { + /* Form x := A'*x. */ - return 0; + if (lsame_(uplo, "U")) { + kplus1 = *k + 1; + if (*incx == 1) { + for (j = *n; j >= 1; --j) { + temp = x[j]; + l = kplus1 - j; + if (nounit) { + temp *= a[kplus1 + j * a_dim1]; + } + /* Computing MAX */ + i__4 = 1, i__1 = j - *k; + i__3 = max(i__4, i__1); + for (i__ = j - 1; i__ >= i__3; --i__) { + temp += a[l + i__ + j * a_dim1] * x[i__]; + /* L90: */ + } + x[j] = temp; + /* L100: */ + } + } else { + kx += (*n - 1) * *incx; + jx = kx; + for (j = *n; j >= 1; --j) { + temp = x[jx]; + kx -= *incx; + ix = kx; + l = kplus1 - j; + if (nounit) { + temp *= a[kplus1 + j * a_dim1]; + } + /* Computing MAX */ + i__4 = 1, i__1 = j - *k; + i__3 = max(i__4, i__1); + for (i__ = j - 1; i__ >= i__3; --i__) { + temp += a[l + i__ + j * a_dim1] * x[ix]; + ix -= *incx; + /* L110: */ + } + x[jx] = temp; + jx -= *incx; + /* L120: */ + } + } + } else { + if (*incx == 1) { + i__3 = *n; + for (j = 1; j <= i__3; ++j) { + temp = x[j]; + l = 1 - j; + if (nounit) { + temp *= a[j * a_dim1 + 1]; + } + /* Computing MIN */ + i__1 = *n, i__2 = j + *k; + i__4 = min(i__1, i__2); + for (i__ = j + 1; i__ <= i__4; ++i__) { + temp += a[l + i__ + j * a_dim1] * x[i__]; + /* L130: */ + } + x[j] = temp; + /* L140: */ + } + } else { + jx = kx; + i__3 = *n; + for (j = 1; j <= i__3; ++j) { + temp = x[jx]; + kx += *incx; + ix = kx; + l = 1 - j; + if (nounit) { + temp *= a[j * a_dim1 + 1]; + } + /* Computing MIN */ + i__1 = *n, i__2 = j + *k; + i__4 = min(i__1, i__2); + for (i__ = j + 1; i__ <= i__4; ++i__) { + temp += a[l + i__ + j * a_dim1] * x[ix]; + ix += *incx; + /* L150: */ + } + x[jx] = temp; + jx += *incx; + /* L160: */ + } + } + } + } -/* End of STBMV . */ + /* End of STBMV . */ } /* stbmv_ */ -
diff --git a/blas/f2c/zhbmv.c b/blas/f2c/zhbmv.c index 42da13d..cef1117 100644 --- a/blas/f2c/zhbmv.c +++ b/blas/f2c/zhbmv.c
@@ -1,488 +1,456 @@ /* zhbmv.f -- translated by f2c (version 20100827). You must link the resulting object file with libf2c: - on Microsoft Windows system, link with libf2c.lib; - on Linux or Unix systems, link with .../path/to/libf2c.a -lm - or, if you install libf2c.a in a standard place, with -lf2c -lm - -- in that order, at the end of the command line, as in - cc *.o -lf2c -lm - Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., - http://www.netlib.org/f2c/libf2c.zip + http://www.netlib.org/f2c/libf2c.zip */ #include "datatypes.h" -/* Subroutine */ int zhbmv_(char *uplo, integer *n, integer *k, doublecomplex - *alpha, doublecomplex *a, integer *lda, doublecomplex *x, integer * - incx, doublecomplex *beta, doublecomplex *y, integer *incy, ftnlen - uplo_len) -{ - /* System generated locals */ - integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5; - doublereal d__1; - doublecomplex z__1, z__2, z__3, z__4; +static inline void d_cnjg(doublecomplex *r, doublecomplex *z) { + r->r = z->r; + r->i = -(z->i); +} - /* Builtin functions */ - void d_cnjg(doublecomplex *, doublecomplex *); +/* Subroutine */ void zhbmv_(char *uplo, integer *n, integer *k, doublecomplex *alpha, doublecomplex *a, integer *lda, + doublecomplex *x, integer *incx, doublecomplex *beta, doublecomplex *y, integer *incy) { + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5; + doublereal d__1; + doublecomplex z__1, z__2, z__3, z__4; - /* Local variables */ - integer i__, j, l, ix, iy, jx, jy, kx, ky, info; - doublecomplex temp1, temp2; - extern logical lsame_(char *, char *, ftnlen, ftnlen); - integer kplus1; - extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen); + /* Local variables */ + integer i__, j, l, ix, iy, jx, jy, kx, ky, info; + doublecomplex temp1, temp2; + extern logical lsame_(char *, char *); + integer kplus1; + extern /* Subroutine */ void xerbla_(const char *, integer *); -/* .. Scalar Arguments .. */ -/* .. */ -/* .. Array Arguments .. */ -/* .. */ + /* .. Scalar Arguments .. */ + /* .. */ + /* .. Array Arguments .. */ + /* .. */ -/* Purpose */ -/* ======= */ + /* Purpose */ + /* ======= */ -/* ZHBMV performs the matrix-vector operation */ + /* ZHBMV performs the matrix-vector operation */ -/* y := alpha*A*x + beta*y, */ + /* y := alpha*A*x + beta*y, */ -/* where alpha and beta are scalars, x and y are n element vectors and */ -/* A is an n by n hermitian band matrix, with k super-diagonals. */ + /* where alpha and beta are scalars, x and y are n element vectors and */ + /* A is an n by n hermitian band matrix, with k super-diagonals. */ -/* Arguments */ -/* ========== */ + /* Arguments */ + /* ========== */ -/* UPLO - CHARACTER*1. */ -/* On entry, UPLO specifies whether the upper or lower */ -/* triangular part of the band matrix A is being supplied as */ -/* follows: */ + /* UPLO - CHARACTER*1. */ + /* On entry, UPLO specifies whether the upper or lower */ + /* triangular part of the band matrix A is being supplied as */ + /* follows: */ -/* UPLO = 'U' or 'u' The upper triangular part of A is */ -/* being supplied. */ + /* UPLO = 'U' or 'u' The upper triangular part of A is */ + /* being supplied. */ -/* UPLO = 'L' or 'l' The lower triangular part of A is */ -/* being supplied. */ + /* UPLO = 'L' or 'l' The lower triangular part of A is */ + /* being supplied. */ -/* Unchanged on exit. */ + /* Unchanged on exit. */ -/* N - INTEGER. */ -/* On entry, N specifies the order of the matrix A. */ -/* N must be at least zero. */ -/* Unchanged on exit. */ + /* N - INTEGER. */ + /* On entry, N specifies the order of the matrix A. */ + /* N must be at least zero. */ + /* Unchanged on exit. */ -/* K - INTEGER. */ -/* On entry, K specifies the number of super-diagonals of the */ -/* matrix A. K must satisfy 0 .le. K. */ -/* Unchanged on exit. */ + /* K - INTEGER. */ + /* On entry, K specifies the number of super-diagonals of the */ + /* matrix A. K must satisfy 0 .le. K. */ + /* Unchanged on exit. */ -/* ALPHA - COMPLEX*16 . */ -/* On entry, ALPHA specifies the scalar alpha. */ -/* Unchanged on exit. */ + /* ALPHA - COMPLEX*16 . */ + /* On entry, ALPHA specifies the scalar alpha. */ + /* Unchanged on exit. */ -/* A - COMPLEX*16 array of DIMENSION ( LDA, n ). */ -/* Before entry with UPLO = 'U' or 'u', the leading ( k + 1 ) */ -/* by n part of the array A must contain the upper triangular */ -/* band part of the hermitian matrix, supplied column by */ -/* column, with the leading diagonal of the matrix in row */ -/* ( k + 1 ) of the array, the first super-diagonal starting at */ -/* position 2 in row k, and so on. The top left k by k triangle */ -/* of the array A is not referenced. */ -/* The following program segment will transfer the upper */ -/* triangular part of a hermitian band matrix from conventional */ -/* full matrix storage to band storage: */ + /* A - COMPLEX*16 array of DIMENSION ( LDA, n ). */ + /* Before entry with UPLO = 'U' or 'u', the leading ( k + 1 ) */ + /* by n part of the array A must contain the upper triangular */ + /* band part of the hermitian matrix, supplied column by */ + /* column, with the leading diagonal of the matrix in row */ + /* ( k + 1 ) of the array, the first super-diagonal starting at */ + /* position 2 in row k, and so on. The top left k by k triangle */ + /* of the array A is not referenced. */ + /* The following program segment will transfer the upper */ + /* triangular part of a hermitian band matrix from conventional */ + /* full matrix storage to band storage: */ -/* DO 20, J = 1, N */ -/* M = K + 1 - J */ -/* DO 10, I = MAX( 1, J - K ), J */ -/* A( M + I, J ) = matrix( I, J ) */ -/* 10 CONTINUE */ -/* 20 CONTINUE */ + /* DO 20, J = 1, N */ + /* M = K + 1 - J */ + /* DO 10, I = MAX( 1, J - K ), J */ + /* A( M + I, J ) = matrix( I, J ) */ + /* 10 CONTINUE */ + /* 20 CONTINUE */ -/* Before entry with UPLO = 'L' or 'l', the leading ( k + 1 ) */ -/* by n part of the array A must contain the lower triangular */ -/* band part of the hermitian matrix, supplied column by */ -/* column, with the leading diagonal of the matrix in row 1 of */ -/* the array, the first sub-diagonal starting at position 1 in */ -/* row 2, and so on. The bottom right k by k triangle of the */ -/* array A is not referenced. */ -/* The following program segment will transfer the lower */ -/* triangular part of a hermitian band matrix from conventional */ -/* full matrix storage to band storage: */ + /* Before entry with UPLO = 'L' or 'l', the leading ( k + 1 ) */ + /* by n part of the array A must contain the lower triangular */ + /* band part of the hermitian matrix, supplied column by */ + /* column, with the leading diagonal of the matrix in row 1 of */ + /* the array, the first sub-diagonal starting at position 1 in */ + /* row 2, and so on. The bottom right k by k triangle of the */ + /* array A is not referenced. */ + /* The following program segment will transfer the lower */ + /* triangular part of a hermitian band matrix from conventional */ + /* full matrix storage to band storage: */ -/* DO 20, J = 1, N */ -/* M = 1 - J */ -/* DO 10, I = J, MIN( N, J + K ) */ -/* A( M + I, J ) = matrix( I, J ) */ -/* 10 CONTINUE */ -/* 20 CONTINUE */ + /* DO 20, J = 1, N */ + /* M = 1 - J */ + /* DO 10, I = J, MIN( N, J + K ) */ + /* A( M + I, J ) = matrix( I, J ) */ + /* 10 CONTINUE */ + /* 20 CONTINUE */ -/* Note that the imaginary parts of the diagonal elements need */ -/* not be set and are assumed to be zero. */ -/* Unchanged on exit. */ + /* Note that the imaginary parts of the diagonal elements need */ + /* not be set and are assumed to be zero. */ + /* Unchanged on exit. */ -/* LDA - INTEGER. */ -/* On entry, LDA specifies the first dimension of A as declared */ -/* in the calling (sub) program. LDA must be at least */ -/* ( k + 1 ). */ -/* Unchanged on exit. */ + /* LDA - INTEGER. */ + /* On entry, LDA specifies the first dimension of A as declared */ + /* in the calling (sub) program. LDA must be at least */ + /* ( k + 1 ). */ + /* Unchanged on exit. */ -/* X - COMPLEX*16 array of DIMENSION at least */ -/* ( 1 + ( n - 1 )*abs( INCX ) ). */ -/* Before entry, the incremented array X must contain the */ -/* vector x. */ -/* Unchanged on exit. */ + /* X - COMPLEX*16 array of DIMENSION at least */ + /* ( 1 + ( n - 1 )*abs( INCX ) ). */ + /* Before entry, the incremented array X must contain the */ + /* vector x. */ + /* Unchanged on exit. */ -/* INCX - INTEGER. */ -/* On entry, INCX specifies the increment for the elements of */ -/* X. INCX must not be zero. */ -/* Unchanged on exit. */ + /* INCX - INTEGER. */ + /* On entry, INCX specifies the increment for the elements of */ + /* X. INCX must not be zero. */ + /* Unchanged on exit. */ -/* BETA - COMPLEX*16 . */ -/* On entry, BETA specifies the scalar beta. */ -/* Unchanged on exit. */ + /* BETA - COMPLEX*16 . */ + /* On entry, BETA specifies the scalar beta. */ + /* Unchanged on exit. */ -/* Y - COMPLEX*16 array of DIMENSION at least */ -/* ( 1 + ( n - 1 )*abs( INCY ) ). */ -/* Before entry, the incremented array Y must contain the */ -/* vector y. On exit, Y is overwritten by the updated vector y. */ + /* Y - COMPLEX*16 array of DIMENSION at least */ + /* ( 1 + ( n - 1 )*abs( INCY ) ). */ + /* Before entry, the incremented array Y must contain the */ + /* vector y. On exit, Y is overwritten by the updated vector y. */ -/* INCY - INTEGER. */ -/* On entry, INCY specifies the increment for the elements of */ -/* Y. INCY must not be zero. */ -/* Unchanged on exit. */ + /* INCY - INTEGER. */ + /* On entry, INCY specifies the increment for the elements of */ + /* Y. INCY must not be zero. */ + /* Unchanged on exit. */ -/* Further Details */ -/* =============== */ + /* Further Details */ + /* =============== */ -/* Level 2 Blas routine. */ + /* Level 2 Blas routine. */ -/* -- Written on 22-October-1986. */ -/* Jack Dongarra, Argonne National Lab. */ -/* Jeremy Du Croz, Nag Central Office. */ -/* Sven Hammarling, Nag Central Office. */ -/* Richard Hanson, Sandia National Labs. */ + /* -- Written on 22-October-1986. */ + /* Jack Dongarra, Argonne National Lab. */ + /* Jeremy Du Croz, Nag Central Office. */ + /* Sven Hammarling, Nag Central Office. */ + /* Richard Hanson, Sandia National Labs. */ -/* ===================================================================== */ + /* ===================================================================== */ -/* .. Parameters .. */ -/* .. */ -/* .. Local Scalars .. */ -/* .. */ -/* .. External Functions .. */ -/* .. */ -/* .. External Subroutines .. */ -/* .. */ -/* .. Intrinsic Functions .. */ -/* .. */ + /* .. Parameters .. */ + /* .. */ + /* .. Local Scalars .. */ + /* .. */ + /* .. External Functions .. */ + /* .. */ + /* .. External Subroutines .. */ + /* .. */ + /* .. Intrinsic Functions .. */ + /* .. */ -/* Test the input parameters. */ + /* Test the input parameters. */ - /* Parameter adjustments */ - a_dim1 = *lda; - a_offset = 1 + a_dim1; - a -= a_offset; - --x; - --y; + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --x; + --y; - /* Function Body */ - info = 0; - if (! lsame_(uplo, "U", (ftnlen)1, (ftnlen)1) && ! lsame_(uplo, "L", ( - ftnlen)1, (ftnlen)1)) { - info = 1; - } else if (*n < 0) { - info = 2; - } else if (*k < 0) { - info = 3; - } else if (*lda < *k + 1) { - info = 6; - } else if (*incx == 0) { - info = 8; - } else if (*incy == 0) { - info = 11; - } - if (info != 0) { - xerbla_("ZHBMV ", &info, (ftnlen)6); - return 0; - } + /* Function Body */ + info = 0; + if (!lsame_(uplo, "U") && !lsame_(uplo, "L")) { + info = 1; + } else if (*n < 0) { + info = 2; + } else if (*k < 0) { + info = 3; + } else if (*lda < *k + 1) { + info = 6; + } else if (*incx == 0) { + info = 8; + } else if (*incy == 0) { + info = 11; + } + if (info != 0) { + xerbla_("ZHBMV ", &info); + return; + } -/* Quick return if possible. */ + /* Quick return if possible. */ - if (*n == 0 || (alpha->r == 0. && alpha->i == 0. && (beta->r == 1. && - beta->i == 0.))) { - return 0; - } + if (*n == 0 || (alpha->r == 0. && alpha->i == 0. && (beta->r == 1. && beta->i == 0.))) { + return; + } -/* Set up the start points in X and Y. */ + /* Set up the start points in X and Y. */ - if (*incx > 0) { - kx = 1; + if (*incx > 0) { + kx = 1; + } else { + kx = 1 - (*n - 1) * *incx; + } + if (*incy > 0) { + ky = 1; + } else { + ky = 1 - (*n - 1) * *incy; + } + + /* Start the operations. In this version the elements of the array A */ + /* are accessed sequentially with one pass through A. */ + + /* First form y := beta*y. */ + + if (beta->r != 1. || beta->i != 0.) { + if (*incy == 1) { + if (beta->r == 0. && beta->i == 0.) { + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = i__; + y[i__2].r = 0., y[i__2].i = 0.; + /* L10: */ + } + } else { + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = i__; + i__3 = i__; + z__1.r = beta->r * y[i__3].r - beta->i * y[i__3].i, z__1.i = beta->r * y[i__3].i + beta->i * y[i__3].r; + y[i__2].r = z__1.r, y[i__2].i = z__1.i; + /* L20: */ + } + } } else { - kx = 1 - (*n - 1) * *incx; + iy = ky; + if (beta->r == 0. && beta->i == 0.) { + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = iy; + y[i__2].r = 0., y[i__2].i = 0.; + iy += *incy; + /* L30: */ + } + } else { + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = iy; + i__3 = iy; + z__1.r = beta->r * y[i__3].r - beta->i * y[i__3].i, z__1.i = beta->r * y[i__3].i + beta->i * y[i__3].r; + y[i__2].r = z__1.r, y[i__2].i = z__1.i; + iy += *incy; + /* L40: */ + } + } } - if (*incy > 0) { - ky = 1; + } + if (alpha->r == 0. && alpha->i == 0.) { + return; + } + if (lsame_(uplo, "U")) { + /* Form y when upper triangle of A is stored. */ + + kplus1 = *k + 1; + if (*incx == 1 && *incy == 1) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = j; + z__1.r = alpha->r * x[i__2].r - alpha->i * x[i__2].i, z__1.i = alpha->r * x[i__2].i + alpha->i * x[i__2].r; + temp1.r = z__1.r, temp1.i = z__1.i; + temp2.r = 0., temp2.i = 0.; + l = kplus1 - j; + /* Computing MAX */ + i__2 = 1, i__3 = j - *k; + i__4 = j - 1; + for (i__ = max(i__2, i__3); i__ <= i__4; ++i__) { + i__2 = i__; + i__3 = i__; + i__5 = l + i__ + j * a_dim1; + z__2.r = temp1.r * a[i__5].r - temp1.i * a[i__5].i, z__2.i = temp1.r * a[i__5].i + temp1.i * a[i__5].r; + z__1.r = y[i__3].r + z__2.r, z__1.i = y[i__3].i + z__2.i; + y[i__2].r = z__1.r, y[i__2].i = z__1.i; + d_cnjg(&z__3, &a[l + i__ + j * a_dim1]); + i__2 = i__; + z__2.r = z__3.r * x[i__2].r - z__3.i * x[i__2].i, z__2.i = z__3.r * x[i__2].i + z__3.i * x[i__2].r; + z__1.r = temp2.r + z__2.r, z__1.i = temp2.i + z__2.i; + temp2.r = z__1.r, temp2.i = z__1.i; + /* L50: */ + } + i__4 = j; + i__2 = j; + i__3 = kplus1 + j * a_dim1; + d__1 = a[i__3].r; + z__3.r = d__1 * temp1.r, z__3.i = d__1 * temp1.i; + z__2.r = y[i__2].r + z__3.r, z__2.i = y[i__2].i + z__3.i; + z__4.r = alpha->r * temp2.r - alpha->i * temp2.i, z__4.i = alpha->r * temp2.i + alpha->i * temp2.r; + z__1.r = z__2.r + z__4.r, z__1.i = z__2.i + z__4.i; + y[i__4].r = z__1.r, y[i__4].i = z__1.i; + /* L60: */ + } } else { - ky = 1 - (*n - 1) * *incy; + jx = kx; + jy = ky; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__4 = jx; + z__1.r = alpha->r * x[i__4].r - alpha->i * x[i__4].i, z__1.i = alpha->r * x[i__4].i + alpha->i * x[i__4].r; + temp1.r = z__1.r, temp1.i = z__1.i; + temp2.r = 0., temp2.i = 0.; + ix = kx; + iy = ky; + l = kplus1 - j; + /* Computing MAX */ + i__4 = 1, i__2 = j - *k; + i__3 = j - 1; + for (i__ = max(i__4, i__2); i__ <= i__3; ++i__) { + i__4 = iy; + i__2 = iy; + i__5 = l + i__ + j * a_dim1; + z__2.r = temp1.r * a[i__5].r - temp1.i * a[i__5].i, z__2.i = temp1.r * a[i__5].i + temp1.i * a[i__5].r; + z__1.r = y[i__2].r + z__2.r, z__1.i = y[i__2].i + z__2.i; + y[i__4].r = z__1.r, y[i__4].i = z__1.i; + d_cnjg(&z__3, &a[l + i__ + j * a_dim1]); + i__4 = ix; + z__2.r = z__3.r * x[i__4].r - z__3.i * x[i__4].i, z__2.i = z__3.r * x[i__4].i + z__3.i * x[i__4].r; + z__1.r = temp2.r + z__2.r, z__1.i = temp2.i + z__2.i; + temp2.r = z__1.r, temp2.i = z__1.i; + ix += *incx; + iy += *incy; + /* L70: */ + } + i__3 = jy; + i__4 = jy; + i__2 = kplus1 + j * a_dim1; + d__1 = a[i__2].r; + z__3.r = d__1 * temp1.r, z__3.i = d__1 * temp1.i; + z__2.r = y[i__4].r + z__3.r, z__2.i = y[i__4].i + z__3.i; + z__4.r = alpha->r * temp2.r - alpha->i * temp2.i, z__4.i = alpha->r * temp2.i + alpha->i * temp2.r; + z__1.r = z__2.r + z__4.r, z__1.i = z__2.i + z__4.i; + y[i__3].r = z__1.r, y[i__3].i = z__1.i; + jx += *incx; + jy += *incy; + if (j > *k) { + kx += *incx; + ky += *incy; + } + /* L80: */ + } } + } else { + /* Form y when lower triangle of A is stored. */ -/* Start the operations. In this version the elements of the array A */ -/* are accessed sequentially with one pass through A. */ - -/* First form y := beta*y. */ - - if (beta->r != 1. || beta->i != 0.) { - if (*incy == 1) { - if (beta->r == 0. && beta->i == 0.) { - i__1 = *n; - for (i__ = 1; i__ <= i__1; ++i__) { - i__2 = i__; - y[i__2].r = 0., y[i__2].i = 0.; -/* L10: */ - } - } else { - i__1 = *n; - for (i__ = 1; i__ <= i__1; ++i__) { - i__2 = i__; - i__3 = i__; - z__1.r = beta->r * y[i__3].r - beta->i * y[i__3].i, - z__1.i = beta->r * y[i__3].i + beta->i * y[i__3] - .r; - y[i__2].r = z__1.r, y[i__2].i = z__1.i; -/* L20: */ - } - } - } else { - iy = ky; - if (beta->r == 0. && beta->i == 0.) { - i__1 = *n; - for (i__ = 1; i__ <= i__1; ++i__) { - i__2 = iy; - y[i__2].r = 0., y[i__2].i = 0.; - iy += *incy; -/* L30: */ - } - } else { - i__1 = *n; - for (i__ = 1; i__ <= i__1; ++i__) { - i__2 = iy; - i__3 = iy; - z__1.r = beta->r * y[i__3].r - beta->i * y[i__3].i, - z__1.i = beta->r * y[i__3].i + beta->i * y[i__3] - .r; - y[i__2].r = z__1.r, y[i__2].i = z__1.i; - iy += *incy; -/* L40: */ - } - } - } - } - if (alpha->r == 0. && alpha->i == 0.) { - return 0; - } - if (lsame_(uplo, "U", (ftnlen)1, (ftnlen)1)) { - -/* Form y when upper triangle of A is stored. */ - - kplus1 = *k + 1; - if (*incx == 1 && *incy == 1) { - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - i__2 = j; - z__1.r = alpha->r * x[i__2].r - alpha->i * x[i__2].i, z__1.i = - alpha->r * x[i__2].i + alpha->i * x[i__2].r; - temp1.r = z__1.r, temp1.i = z__1.i; - temp2.r = 0., temp2.i = 0.; - l = kplus1 - j; -/* Computing MAX */ - i__2 = 1, i__3 = j - *k; - i__4 = j - 1; - for (i__ = max(i__2,i__3); i__ <= i__4; ++i__) { - i__2 = i__; - i__3 = i__; - i__5 = l + i__ + j * a_dim1; - z__2.r = temp1.r * a[i__5].r - temp1.i * a[i__5].i, - z__2.i = temp1.r * a[i__5].i + temp1.i * a[i__5] - .r; - z__1.r = y[i__3].r + z__2.r, z__1.i = y[i__3].i + z__2.i; - y[i__2].r = z__1.r, y[i__2].i = z__1.i; - d_cnjg(&z__3, &a[l + i__ + j * a_dim1]); - i__2 = i__; - z__2.r = z__3.r * x[i__2].r - z__3.i * x[i__2].i, z__2.i = - z__3.r * x[i__2].i + z__3.i * x[i__2].r; - z__1.r = temp2.r + z__2.r, z__1.i = temp2.i + z__2.i; - temp2.r = z__1.r, temp2.i = z__1.i; -/* L50: */ - } - i__4 = j; - i__2 = j; - i__3 = kplus1 + j * a_dim1; - d__1 = a[i__3].r; - z__3.r = d__1 * temp1.r, z__3.i = d__1 * temp1.i; - z__2.r = y[i__2].r + z__3.r, z__2.i = y[i__2].i + z__3.i; - z__4.r = alpha->r * temp2.r - alpha->i * temp2.i, z__4.i = - alpha->r * temp2.i + alpha->i * temp2.r; - z__1.r = z__2.r + z__4.r, z__1.i = z__2.i + z__4.i; - y[i__4].r = z__1.r, y[i__4].i = z__1.i; -/* L60: */ - } - } else { - jx = kx; - jy = ky; - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - i__4 = jx; - z__1.r = alpha->r * x[i__4].r - alpha->i * x[i__4].i, z__1.i = - alpha->r * x[i__4].i + alpha->i * x[i__4].r; - temp1.r = z__1.r, temp1.i = z__1.i; - temp2.r = 0., temp2.i = 0.; - ix = kx; - iy = ky; - l = kplus1 - j; -/* Computing MAX */ - i__4 = 1, i__2 = j - *k; - i__3 = j - 1; - for (i__ = max(i__4,i__2); i__ <= i__3; ++i__) { - i__4 = iy; - i__2 = iy; - i__5 = l + i__ + j * a_dim1; - z__2.r = temp1.r * a[i__5].r - temp1.i * a[i__5].i, - z__2.i = temp1.r * a[i__5].i + temp1.i * a[i__5] - .r; - z__1.r = y[i__2].r + z__2.r, z__1.i = y[i__2].i + z__2.i; - y[i__4].r = z__1.r, y[i__4].i = z__1.i; - d_cnjg(&z__3, &a[l + i__ + j * a_dim1]); - i__4 = ix; - z__2.r = z__3.r * x[i__4].r - z__3.i * x[i__4].i, z__2.i = - z__3.r * x[i__4].i + z__3.i * x[i__4].r; - z__1.r = temp2.r + z__2.r, z__1.i = temp2.i + z__2.i; - temp2.r = z__1.r, temp2.i = z__1.i; - ix += *incx; - iy += *incy; -/* L70: */ - } - i__3 = jy; - i__4 = jy; - i__2 = kplus1 + j * a_dim1; - d__1 = a[i__2].r; - z__3.r = d__1 * temp1.r, z__3.i = d__1 * temp1.i; - z__2.r = y[i__4].r + z__3.r, z__2.i = y[i__4].i + z__3.i; - z__4.r = alpha->r * temp2.r - alpha->i * temp2.i, z__4.i = - alpha->r * temp2.i + alpha->i * temp2.r; - z__1.r = z__2.r + z__4.r, z__1.i = z__2.i + z__4.i; - y[i__3].r = z__1.r, y[i__3].i = z__1.i; - jx += *incx; - jy += *incy; - if (j > *k) { - kx += *incx; - ky += *incy; - } -/* L80: */ - } - } + if (*incx == 1 && *incy == 1) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__3 = j; + z__1.r = alpha->r * x[i__3].r - alpha->i * x[i__3].i, z__1.i = alpha->r * x[i__3].i + alpha->i * x[i__3].r; + temp1.r = z__1.r, temp1.i = z__1.i; + temp2.r = 0., temp2.i = 0.; + i__3 = j; + i__4 = j; + i__2 = j * a_dim1 + 1; + d__1 = a[i__2].r; + z__2.r = d__1 * temp1.r, z__2.i = d__1 * temp1.i; + z__1.r = y[i__4].r + z__2.r, z__1.i = y[i__4].i + z__2.i; + y[i__3].r = z__1.r, y[i__3].i = z__1.i; + l = 1 - j; + /* Computing MIN */ + i__4 = *n, i__2 = j + *k; + i__3 = min(i__4, i__2); + for (i__ = j + 1; i__ <= i__3; ++i__) { + i__4 = i__; + i__2 = i__; + i__5 = l + i__ + j * a_dim1; + z__2.r = temp1.r * a[i__5].r - temp1.i * a[i__5].i, z__2.i = temp1.r * a[i__5].i + temp1.i * a[i__5].r; + z__1.r = y[i__2].r + z__2.r, z__1.i = y[i__2].i + z__2.i; + y[i__4].r = z__1.r, y[i__4].i = z__1.i; + d_cnjg(&z__3, &a[l + i__ + j * a_dim1]); + i__4 = i__; + z__2.r = z__3.r * x[i__4].r - z__3.i * x[i__4].i, z__2.i = z__3.r * x[i__4].i + z__3.i * x[i__4].r; + z__1.r = temp2.r + z__2.r, z__1.i = temp2.i + z__2.i; + temp2.r = z__1.r, temp2.i = z__1.i; + /* L90: */ + } + i__3 = j; + i__4 = j; + z__2.r = alpha->r * temp2.r - alpha->i * temp2.i, z__2.i = alpha->r * temp2.i + alpha->i * temp2.r; + z__1.r = y[i__4].r + z__2.r, z__1.i = y[i__4].i + z__2.i; + y[i__3].r = z__1.r, y[i__3].i = z__1.i; + /* L100: */ + } } else { - -/* Form y when lower triangle of A is stored. */ - - if (*incx == 1 && *incy == 1) { - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - i__3 = j; - z__1.r = alpha->r * x[i__3].r - alpha->i * x[i__3].i, z__1.i = - alpha->r * x[i__3].i + alpha->i * x[i__3].r; - temp1.r = z__1.r, temp1.i = z__1.i; - temp2.r = 0., temp2.i = 0.; - i__3 = j; - i__4 = j; - i__2 = j * a_dim1 + 1; - d__1 = a[i__2].r; - z__2.r = d__1 * temp1.r, z__2.i = d__1 * temp1.i; - z__1.r = y[i__4].r + z__2.r, z__1.i = y[i__4].i + z__2.i; - y[i__3].r = z__1.r, y[i__3].i = z__1.i; - l = 1 - j; -/* Computing MIN */ - i__4 = *n, i__2 = j + *k; - i__3 = min(i__4,i__2); - for (i__ = j + 1; i__ <= i__3; ++i__) { - i__4 = i__; - i__2 = i__; - i__5 = l + i__ + j * a_dim1; - z__2.r = temp1.r * a[i__5].r - temp1.i * a[i__5].i, - z__2.i = temp1.r * a[i__5].i + temp1.i * a[i__5] - .r; - z__1.r = y[i__2].r + z__2.r, z__1.i = y[i__2].i + z__2.i; - y[i__4].r = z__1.r, y[i__4].i = z__1.i; - d_cnjg(&z__3, &a[l + i__ + j * a_dim1]); - i__4 = i__; - z__2.r = z__3.r * x[i__4].r - z__3.i * x[i__4].i, z__2.i = - z__3.r * x[i__4].i + z__3.i * x[i__4].r; - z__1.r = temp2.r + z__2.r, z__1.i = temp2.i + z__2.i; - temp2.r = z__1.r, temp2.i = z__1.i; -/* L90: */ - } - i__3 = j; - i__4 = j; - z__2.r = alpha->r * temp2.r - alpha->i * temp2.i, z__2.i = - alpha->r * temp2.i + alpha->i * temp2.r; - z__1.r = y[i__4].r + z__2.r, z__1.i = y[i__4].i + z__2.i; - y[i__3].r = z__1.r, y[i__3].i = z__1.i; -/* L100: */ - } - } else { - jx = kx; - jy = ky; - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - i__3 = jx; - z__1.r = alpha->r * x[i__3].r - alpha->i * x[i__3].i, z__1.i = - alpha->r * x[i__3].i + alpha->i * x[i__3].r; - temp1.r = z__1.r, temp1.i = z__1.i; - temp2.r = 0., temp2.i = 0.; - i__3 = jy; - i__4 = jy; - i__2 = j * a_dim1 + 1; - d__1 = a[i__2].r; - z__2.r = d__1 * temp1.r, z__2.i = d__1 * temp1.i; - z__1.r = y[i__4].r + z__2.r, z__1.i = y[i__4].i + z__2.i; - y[i__3].r = z__1.r, y[i__3].i = z__1.i; - l = 1 - j; - ix = jx; - iy = jy; -/* Computing MIN */ - i__4 = *n, i__2 = j + *k; - i__3 = min(i__4,i__2); - for (i__ = j + 1; i__ <= i__3; ++i__) { - ix += *incx; - iy += *incy; - i__4 = iy; - i__2 = iy; - i__5 = l + i__ + j * a_dim1; - z__2.r = temp1.r * a[i__5].r - temp1.i * a[i__5].i, - z__2.i = temp1.r * a[i__5].i + temp1.i * a[i__5] - .r; - z__1.r = y[i__2].r + z__2.r, z__1.i = y[i__2].i + z__2.i; - y[i__4].r = z__1.r, y[i__4].i = z__1.i; - d_cnjg(&z__3, &a[l + i__ + j * a_dim1]); - i__4 = ix; - z__2.r = z__3.r * x[i__4].r - z__3.i * x[i__4].i, z__2.i = - z__3.r * x[i__4].i + z__3.i * x[i__4].r; - z__1.r = temp2.r + z__2.r, z__1.i = temp2.i + z__2.i; - temp2.r = z__1.r, temp2.i = z__1.i; -/* L110: */ - } - i__3 = jy; - i__4 = jy; - z__2.r = alpha->r * temp2.r - alpha->i * temp2.i, z__2.i = - alpha->r * temp2.i + alpha->i * temp2.r; - z__1.r = y[i__4].r + z__2.r, z__1.i = y[i__4].i + z__2.i; - y[i__3].r = z__1.r, y[i__3].i = z__1.i; - jx += *incx; - jy += *incy; -/* L120: */ - } - } + jx = kx; + jy = ky; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__3 = jx; + z__1.r = alpha->r * x[i__3].r - alpha->i * x[i__3].i, z__1.i = alpha->r * x[i__3].i + alpha->i * x[i__3].r; + temp1.r = z__1.r, temp1.i = z__1.i; + temp2.r = 0., temp2.i = 0.; + i__3 = jy; + i__4 = jy; + i__2 = j * a_dim1 + 1; + d__1 = a[i__2].r; + z__2.r = d__1 * temp1.r, z__2.i = d__1 * temp1.i; + z__1.r = y[i__4].r + z__2.r, z__1.i = y[i__4].i + z__2.i; + y[i__3].r = z__1.r, y[i__3].i = z__1.i; + l = 1 - j; + ix = jx; + iy = jy; + /* Computing MIN */ + i__4 = *n, i__2 = j + *k; + i__3 = min(i__4, i__2); + for (i__ = j + 1; i__ <= i__3; ++i__) { + ix += *incx; + iy += *incy; + i__4 = iy; + i__2 = iy; + i__5 = l + i__ + j * a_dim1; + z__2.r = temp1.r * a[i__5].r - temp1.i * a[i__5].i, z__2.i = temp1.r * a[i__5].i + temp1.i * a[i__5].r; + z__1.r = y[i__2].r + z__2.r, z__1.i = y[i__2].i + z__2.i; + y[i__4].r = z__1.r, y[i__4].i = z__1.i; + d_cnjg(&z__3, &a[l + i__ + j * a_dim1]); + i__4 = ix; + z__2.r = z__3.r * x[i__4].r - z__3.i * x[i__4].i, z__2.i = z__3.r * x[i__4].i + z__3.i * x[i__4].r; + z__1.r = temp2.r + z__2.r, z__1.i = temp2.i + z__2.i; + temp2.r = z__1.r, temp2.i = z__1.i; + /* L110: */ + } + i__3 = jy; + i__4 = jy; + z__2.r = alpha->r * temp2.r - alpha->i * temp2.i, z__2.i = alpha->r * temp2.i + alpha->i * temp2.r; + z__1.r = y[i__4].r + z__2.r, z__1.i = y[i__4].i + z__2.i; + y[i__3].r = z__1.r, y[i__3].i = z__1.i; + jx += *incx; + jy += *incy; + /* L120: */ + } } + } - return 0; - -/* End of ZHBMV . */ + /* End of ZHBMV . */ } /* zhbmv_ */ -
diff --git a/blas/f2c/zhpmv.c b/blas/f2c/zhpmv.c index fbe2f42..6b9a000 100644 --- a/blas/f2c/zhpmv.c +++ b/blas/f2c/zhpmv.c
@@ -1,438 +1,407 @@ /* zhpmv.f -- translated by f2c (version 20100827). You must link the resulting object file with libf2c: - on Microsoft Windows system, link with libf2c.lib; - on Linux or Unix systems, link with .../path/to/libf2c.a -lm - or, if you install libf2c.a in a standard place, with -lf2c -lm - -- in that order, at the end of the command line, as in - cc *.o -lf2c -lm - Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., - http://www.netlib.org/f2c/libf2c.zip + http://www.netlib.org/f2c/libf2c.zip */ #include "datatypes.h" -/* Subroutine */ int zhpmv_(char *uplo, integer *n, doublecomplex *alpha, - doublecomplex *ap, doublecomplex *x, integer *incx, doublecomplex * - beta, doublecomplex *y, integer *incy, ftnlen uplo_len) -{ - /* System generated locals */ - integer i__1, i__2, i__3, i__4, i__5; - doublereal d__1; - doublecomplex z__1, z__2, z__3, z__4; +static inline void d_cnjg(doublecomplex *r, doublecomplex *z) { + r->r = z->r; + r->i = -(z->i); +} - /* Builtin functions */ - void d_cnjg(doublecomplex *, doublecomplex *); +/* Subroutine */ void zhpmv_(char *uplo, integer *n, doublecomplex *alpha, doublecomplex *ap, doublecomplex *x, + integer *incx, doublecomplex *beta, doublecomplex *y, integer *incy) { + /* System generated locals */ + integer i__1, i__2, i__3, i__4, i__5; + doublereal d__1; + doublecomplex z__1, z__2, z__3, z__4; - /* Local variables */ - integer i__, j, k, kk, ix, iy, jx, jy, kx, ky, info; - doublecomplex temp1, temp2; - extern logical lsame_(char *, char *, ftnlen, ftnlen); - extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen); + /* Local variables */ + integer i__, j, k, kk, ix, iy, jx, jy, kx, ky, info; + doublecomplex temp1, temp2; + extern logical lsame_(char *, char *); + extern /* Subroutine */ void xerbla_(const char *, integer *); -/* .. Scalar Arguments .. */ -/* .. */ -/* .. Array Arguments .. */ -/* .. */ + /* .. Scalar Arguments .. */ + /* .. */ + /* .. Array Arguments .. */ + /* .. */ -/* Purpose */ -/* ======= */ + /* Purpose */ + /* ======= */ -/* ZHPMV performs the matrix-vector operation */ + /* ZHPMV performs the matrix-vector operation */ -/* y := alpha*A*x + beta*y, */ + /* y := alpha*A*x + beta*y, */ -/* where alpha and beta are scalars, x and y are n element vectors and */ -/* A is an n by n hermitian matrix, supplied in packed form. */ + /* where alpha and beta are scalars, x and y are n element vectors and */ + /* A is an n by n hermitian matrix, supplied in packed form. */ -/* Arguments */ -/* ========== */ + /* Arguments */ + /* ========== */ -/* UPLO - CHARACTER*1. */ -/* On entry, UPLO specifies whether the upper or lower */ -/* triangular part of the matrix A is supplied in the packed */ -/* array AP as follows: */ + /* UPLO - CHARACTER*1. */ + /* On entry, UPLO specifies whether the upper or lower */ + /* triangular part of the matrix A is supplied in the packed */ + /* array AP as follows: */ -/* UPLO = 'U' or 'u' The upper triangular part of A is */ -/* supplied in AP. */ + /* UPLO = 'U' or 'u' The upper triangular part of A is */ + /* supplied in AP. */ -/* UPLO = 'L' or 'l' The lower triangular part of A is */ -/* supplied in AP. */ + /* UPLO = 'L' or 'l' The lower triangular part of A is */ + /* supplied in AP. */ -/* Unchanged on exit. */ + /* Unchanged on exit. */ -/* N - INTEGER. */ -/* On entry, N specifies the order of the matrix A. */ -/* N must be at least zero. */ -/* Unchanged on exit. */ + /* N - INTEGER. */ + /* On entry, N specifies the order of the matrix A. */ + /* N must be at least zero. */ + /* Unchanged on exit. */ -/* ALPHA - COMPLEX*16 . */ -/* On entry, ALPHA specifies the scalar alpha. */ -/* Unchanged on exit. */ + /* ALPHA - COMPLEX*16 . */ + /* On entry, ALPHA specifies the scalar alpha. */ + /* Unchanged on exit. */ -/* AP - COMPLEX*16 array of DIMENSION at least */ -/* ( ( n*( n + 1 ) )/2 ). */ -/* Before entry with UPLO = 'U' or 'u', the array AP must */ -/* contain the upper triangular part of the hermitian matrix */ -/* packed sequentially, column by column, so that AP( 1 ) */ -/* contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 1, 2 ) */ -/* and a( 2, 2 ) respectively, and so on. */ -/* Before entry with UPLO = 'L' or 'l', the array AP must */ -/* contain the lower triangular part of the hermitian matrix */ -/* packed sequentially, column by column, so that AP( 1 ) */ -/* contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 2, 1 ) */ -/* and a( 3, 1 ) respectively, and so on. */ -/* Note that the imaginary parts of the diagonal elements need */ -/* not be set and are assumed to be zero. */ -/* Unchanged on exit. */ + /* AP - COMPLEX*16 array of DIMENSION at least */ + /* ( ( n*( n + 1 ) )/2 ). */ + /* Before entry with UPLO = 'U' or 'u', the array AP must */ + /* contain the upper triangular part of the hermitian matrix */ + /* packed sequentially, column by column, so that AP( 1 ) */ + /* contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 1, 2 ) */ + /* and a( 2, 2 ) respectively, and so on. */ + /* Before entry with UPLO = 'L' or 'l', the array AP must */ + /* contain the lower triangular part of the hermitian matrix */ + /* packed sequentially, column by column, so that AP( 1 ) */ + /* contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 2, 1 ) */ + /* and a( 3, 1 ) respectively, and so on. */ + /* Note that the imaginary parts of the diagonal elements need */ + /* not be set and are assumed to be zero. */ + /* Unchanged on exit. */ -/* X - COMPLEX*16 array of dimension at least */ -/* ( 1 + ( n - 1 )*abs( INCX ) ). */ -/* Before entry, the incremented array X must contain the n */ -/* element vector x. */ -/* Unchanged on exit. */ + /* X - COMPLEX*16 array of dimension at least */ + /* ( 1 + ( n - 1 )*abs( INCX ) ). */ + /* Before entry, the incremented array X must contain the n */ + /* element vector x. */ + /* Unchanged on exit. */ -/* INCX - INTEGER. */ -/* On entry, INCX specifies the increment for the elements of */ -/* X. INCX must not be zero. */ -/* Unchanged on exit. */ + /* INCX - INTEGER. */ + /* On entry, INCX specifies the increment for the elements of */ + /* X. INCX must not be zero. */ + /* Unchanged on exit. */ -/* BETA - COMPLEX*16 . */ -/* On entry, BETA specifies the scalar beta. When BETA is */ -/* supplied as zero then Y need not be set on input. */ -/* Unchanged on exit. */ + /* BETA - COMPLEX*16 . */ + /* On entry, BETA specifies the scalar beta. When BETA is */ + /* supplied as zero then Y need not be set on input. */ + /* Unchanged on exit. */ -/* Y - COMPLEX*16 array of dimension at least */ -/* ( 1 + ( n - 1 )*abs( INCY ) ). */ -/* Before entry, the incremented array Y must contain the n */ -/* element vector y. On exit, Y is overwritten by the updated */ -/* vector y. */ + /* Y - COMPLEX*16 array of dimension at least */ + /* ( 1 + ( n - 1 )*abs( INCY ) ). */ + /* Before entry, the incremented array Y must contain the n */ + /* element vector y. On exit, Y is overwritten by the updated */ + /* vector y. */ -/* INCY - INTEGER. */ -/* On entry, INCY specifies the increment for the elements of */ -/* Y. INCY must not be zero. */ -/* Unchanged on exit. */ + /* INCY - INTEGER. */ + /* On entry, INCY specifies the increment for the elements of */ + /* Y. INCY must not be zero. */ + /* Unchanged on exit. */ -/* Further Details */ -/* =============== */ + /* Further Details */ + /* =============== */ -/* Level 2 Blas routine. */ + /* Level 2 Blas routine. */ -/* -- Written on 22-October-1986. */ -/* Jack Dongarra, Argonne National Lab. */ -/* Jeremy Du Croz, Nag Central Office. */ -/* Sven Hammarling, Nag Central Office. */ -/* Richard Hanson, Sandia National Labs. */ + /* -- Written on 22-October-1986. */ + /* Jack Dongarra, Argonne National Lab. */ + /* Jeremy Du Croz, Nag Central Office. */ + /* Sven Hammarling, Nag Central Office. */ + /* Richard Hanson, Sandia National Labs. */ -/* ===================================================================== */ + /* ===================================================================== */ -/* .. Parameters .. */ -/* .. */ -/* .. Local Scalars .. */ -/* .. */ -/* .. External Functions .. */ -/* .. */ -/* .. External Subroutines .. */ -/* .. */ -/* .. Intrinsic Functions .. */ -/* .. */ + /* .. Parameters .. */ + /* .. */ + /* .. Local Scalars .. */ + /* .. */ + /* .. External Functions .. */ + /* .. */ + /* .. External Subroutines .. */ + /* .. */ + /* .. Intrinsic Functions .. */ + /* .. */ -/* Test the input parameters. */ + /* Test the input parameters. */ - /* Parameter adjustments */ - --y; - --x; - --ap; + /* Parameter adjustments */ + --y; + --x; + --ap; - /* Function Body */ - info = 0; - if (! lsame_(uplo, "U", (ftnlen)1, (ftnlen)1) && ! lsame_(uplo, "L", ( - ftnlen)1, (ftnlen)1)) { - info = 1; - } else if (*n < 0) { - info = 2; - } else if (*incx == 0) { - info = 6; - } else if (*incy == 0) { - info = 9; - } - if (info != 0) { - xerbla_("ZHPMV ", &info, (ftnlen)6); - return 0; - } + /* Function Body */ + info = 0; + if (!lsame_(uplo, "U") && !lsame_(uplo, "L")) { + info = 1; + } else if (*n < 0) { + info = 2; + } else if (*incx == 0) { + info = 6; + } else if (*incy == 0) { + info = 9; + } + if (info != 0) { + xerbla_("ZHPMV ", &info); + return; + } -/* Quick return if possible. */ + /* Quick return if possible. */ - if (*n == 0 || (alpha->r == 0. && alpha->i == 0. && (beta->r == 1. && - beta->i == 0.))) { - return 0; - } + if (*n == 0 || (alpha->r == 0. && alpha->i == 0. && (beta->r == 1. && beta->i == 0.))) { + return; + } -/* Set up the start points in X and Y. */ + /* Set up the start points in X and Y. */ - if (*incx > 0) { - kx = 1; + if (*incx > 0) { + kx = 1; + } else { + kx = 1 - (*n - 1) * *incx; + } + if (*incy > 0) { + ky = 1; + } else { + ky = 1 - (*n - 1) * *incy; + } + + /* Start the operations. In this version the elements of the array AP */ + /* are accessed sequentially with one pass through AP. */ + + /* First form y := beta*y. */ + + if (beta->r != 1. || beta->i != 0.) { + if (*incy == 1) { + if (beta->r == 0. && beta->i == 0.) { + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = i__; + y[i__2].r = 0., y[i__2].i = 0.; + /* L10: */ + } + } else { + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = i__; + i__3 = i__; + z__1.r = beta->r * y[i__3].r - beta->i * y[i__3].i, z__1.i = beta->r * y[i__3].i + beta->i * y[i__3].r; + y[i__2].r = z__1.r, y[i__2].i = z__1.i; + /* L20: */ + } + } } else { - kx = 1 - (*n - 1) * *incx; + iy = ky; + if (beta->r == 0. && beta->i == 0.) { + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = iy; + y[i__2].r = 0., y[i__2].i = 0.; + iy += *incy; + /* L30: */ + } + } else { + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = iy; + i__3 = iy; + z__1.r = beta->r * y[i__3].r - beta->i * y[i__3].i, z__1.i = beta->r * y[i__3].i + beta->i * y[i__3].r; + y[i__2].r = z__1.r, y[i__2].i = z__1.i; + iy += *incy; + /* L40: */ + } + } } - if (*incy > 0) { - ky = 1; + } + if (alpha->r == 0. && alpha->i == 0.) { + return; + } + kk = 1; + if (lsame_(uplo, "U")) { + /* Form y when AP contains the upper triangle. */ + + if (*incx == 1 && *incy == 1) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = j; + z__1.r = alpha->r * x[i__2].r - alpha->i * x[i__2].i, z__1.i = alpha->r * x[i__2].i + alpha->i * x[i__2].r; + temp1.r = z__1.r, temp1.i = z__1.i; + temp2.r = 0., temp2.i = 0.; + k = kk; + i__2 = j - 1; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__; + i__4 = i__; + i__5 = k; + z__2.r = temp1.r * ap[i__5].r - temp1.i * ap[i__5].i, z__2.i = temp1.r * ap[i__5].i + temp1.i * ap[i__5].r; + z__1.r = y[i__4].r + z__2.r, z__1.i = y[i__4].i + z__2.i; + y[i__3].r = z__1.r, y[i__3].i = z__1.i; + d_cnjg(&z__3, &ap[k]); + i__3 = i__; + z__2.r = z__3.r * x[i__3].r - z__3.i * x[i__3].i, z__2.i = z__3.r * x[i__3].i + z__3.i * x[i__3].r; + z__1.r = temp2.r + z__2.r, z__1.i = temp2.i + z__2.i; + temp2.r = z__1.r, temp2.i = z__1.i; + ++k; + /* L50: */ + } + i__2 = j; + i__3 = j; + i__4 = kk + j - 1; + d__1 = ap[i__4].r; + z__3.r = d__1 * temp1.r, z__3.i = d__1 * temp1.i; + z__2.r = y[i__3].r + z__3.r, z__2.i = y[i__3].i + z__3.i; + z__4.r = alpha->r * temp2.r - alpha->i * temp2.i, z__4.i = alpha->r * temp2.i + alpha->i * temp2.r; + z__1.r = z__2.r + z__4.r, z__1.i = z__2.i + z__4.i; + y[i__2].r = z__1.r, y[i__2].i = z__1.i; + kk += j; + /* L60: */ + } } else { - ky = 1 - (*n - 1) * *incy; + jx = kx; + jy = ky; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = jx; + z__1.r = alpha->r * x[i__2].r - alpha->i * x[i__2].i, z__1.i = alpha->r * x[i__2].i + alpha->i * x[i__2].r; + temp1.r = z__1.r, temp1.i = z__1.i; + temp2.r = 0., temp2.i = 0.; + ix = kx; + iy = ky; + i__2 = kk + j - 2; + for (k = kk; k <= i__2; ++k) { + i__3 = iy; + i__4 = iy; + i__5 = k; + z__2.r = temp1.r * ap[i__5].r - temp1.i * ap[i__5].i, z__2.i = temp1.r * ap[i__5].i + temp1.i * ap[i__5].r; + z__1.r = y[i__4].r + z__2.r, z__1.i = y[i__4].i + z__2.i; + y[i__3].r = z__1.r, y[i__3].i = z__1.i; + d_cnjg(&z__3, &ap[k]); + i__3 = ix; + z__2.r = z__3.r * x[i__3].r - z__3.i * x[i__3].i, z__2.i = z__3.r * x[i__3].i + z__3.i * x[i__3].r; + z__1.r = temp2.r + z__2.r, z__1.i = temp2.i + z__2.i; + temp2.r = z__1.r, temp2.i = z__1.i; + ix += *incx; + iy += *incy; + /* L70: */ + } + i__2 = jy; + i__3 = jy; + i__4 = kk + j - 1; + d__1 = ap[i__4].r; + z__3.r = d__1 * temp1.r, z__3.i = d__1 * temp1.i; + z__2.r = y[i__3].r + z__3.r, z__2.i = y[i__3].i + z__3.i; + z__4.r = alpha->r * temp2.r - alpha->i * temp2.i, z__4.i = alpha->r * temp2.i + alpha->i * temp2.r; + z__1.r = z__2.r + z__4.r, z__1.i = z__2.i + z__4.i; + y[i__2].r = z__1.r, y[i__2].i = z__1.i; + jx += *incx; + jy += *incy; + kk += j; + /* L80: */ + } } + } else { + /* Form y when AP contains the lower triangle. */ -/* Start the operations. In this version the elements of the array AP */ -/* are accessed sequentially with one pass through AP. */ - -/* First form y := beta*y. */ - - if (beta->r != 1. || beta->i != 0.) { - if (*incy == 1) { - if (beta->r == 0. && beta->i == 0.) { - i__1 = *n; - for (i__ = 1; i__ <= i__1; ++i__) { - i__2 = i__; - y[i__2].r = 0., y[i__2].i = 0.; -/* L10: */ - } - } else { - i__1 = *n; - for (i__ = 1; i__ <= i__1; ++i__) { - i__2 = i__; - i__3 = i__; - z__1.r = beta->r * y[i__3].r - beta->i * y[i__3].i, - z__1.i = beta->r * y[i__3].i + beta->i * y[i__3] - .r; - y[i__2].r = z__1.r, y[i__2].i = z__1.i; -/* L20: */ - } - } - } else { - iy = ky; - if (beta->r == 0. && beta->i == 0.) { - i__1 = *n; - for (i__ = 1; i__ <= i__1; ++i__) { - i__2 = iy; - y[i__2].r = 0., y[i__2].i = 0.; - iy += *incy; -/* L30: */ - } - } else { - i__1 = *n; - for (i__ = 1; i__ <= i__1; ++i__) { - i__2 = iy; - i__3 = iy; - z__1.r = beta->r * y[i__3].r - beta->i * y[i__3].i, - z__1.i = beta->r * y[i__3].i + beta->i * y[i__3] - .r; - y[i__2].r = z__1.r, y[i__2].i = z__1.i; - iy += *incy; -/* L40: */ - } - } - } - } - if (alpha->r == 0. && alpha->i == 0.) { - return 0; - } - kk = 1; - if (lsame_(uplo, "U", (ftnlen)1, (ftnlen)1)) { - -/* Form y when AP contains the upper triangle. */ - - if (*incx == 1 && *incy == 1) { - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - i__2 = j; - z__1.r = alpha->r * x[i__2].r - alpha->i * x[i__2].i, z__1.i = - alpha->r * x[i__2].i + alpha->i * x[i__2].r; - temp1.r = z__1.r, temp1.i = z__1.i; - temp2.r = 0., temp2.i = 0.; - k = kk; - i__2 = j - 1; - for (i__ = 1; i__ <= i__2; ++i__) { - i__3 = i__; - i__4 = i__; - i__5 = k; - z__2.r = temp1.r * ap[i__5].r - temp1.i * ap[i__5].i, - z__2.i = temp1.r * ap[i__5].i + temp1.i * ap[i__5] - .r; - z__1.r = y[i__4].r + z__2.r, z__1.i = y[i__4].i + z__2.i; - y[i__3].r = z__1.r, y[i__3].i = z__1.i; - d_cnjg(&z__3, &ap[k]); - i__3 = i__; - z__2.r = z__3.r * x[i__3].r - z__3.i * x[i__3].i, z__2.i = - z__3.r * x[i__3].i + z__3.i * x[i__3].r; - z__1.r = temp2.r + z__2.r, z__1.i = temp2.i + z__2.i; - temp2.r = z__1.r, temp2.i = z__1.i; - ++k; -/* L50: */ - } - i__2 = j; - i__3 = j; - i__4 = kk + j - 1; - d__1 = ap[i__4].r; - z__3.r = d__1 * temp1.r, z__3.i = d__1 * temp1.i; - z__2.r = y[i__3].r + z__3.r, z__2.i = y[i__3].i + z__3.i; - z__4.r = alpha->r * temp2.r - alpha->i * temp2.i, z__4.i = - alpha->r * temp2.i + alpha->i * temp2.r; - z__1.r = z__2.r + z__4.r, z__1.i = z__2.i + z__4.i; - y[i__2].r = z__1.r, y[i__2].i = z__1.i; - kk += j; -/* L60: */ - } - } else { - jx = kx; - jy = ky; - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - i__2 = jx; - z__1.r = alpha->r * x[i__2].r - alpha->i * x[i__2].i, z__1.i = - alpha->r * x[i__2].i + alpha->i * x[i__2].r; - temp1.r = z__1.r, temp1.i = z__1.i; - temp2.r = 0., temp2.i = 0.; - ix = kx; - iy = ky; - i__2 = kk + j - 2; - for (k = kk; k <= i__2; ++k) { - i__3 = iy; - i__4 = iy; - i__5 = k; - z__2.r = temp1.r * ap[i__5].r - temp1.i * ap[i__5].i, - z__2.i = temp1.r * ap[i__5].i + temp1.i * ap[i__5] - .r; - z__1.r = y[i__4].r + z__2.r, z__1.i = y[i__4].i + z__2.i; - y[i__3].r = z__1.r, y[i__3].i = z__1.i; - d_cnjg(&z__3, &ap[k]); - i__3 = ix; - z__2.r = z__3.r * x[i__3].r - z__3.i * x[i__3].i, z__2.i = - z__3.r * x[i__3].i + z__3.i * x[i__3].r; - z__1.r = temp2.r + z__2.r, z__1.i = temp2.i + z__2.i; - temp2.r = z__1.r, temp2.i = z__1.i; - ix += *incx; - iy += *incy; -/* L70: */ - } - i__2 = jy; - i__3 = jy; - i__4 = kk + j - 1; - d__1 = ap[i__4].r; - z__3.r = d__1 * temp1.r, z__3.i = d__1 * temp1.i; - z__2.r = y[i__3].r + z__3.r, z__2.i = y[i__3].i + z__3.i; - z__4.r = alpha->r * temp2.r - alpha->i * temp2.i, z__4.i = - alpha->r * temp2.i + alpha->i * temp2.r; - z__1.r = z__2.r + z__4.r, z__1.i = z__2.i + z__4.i; - y[i__2].r = z__1.r, y[i__2].i = z__1.i; - jx += *incx; - jy += *incy; - kk += j; -/* L80: */ - } - } + if (*incx == 1 && *incy == 1) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = j; + z__1.r = alpha->r * x[i__2].r - alpha->i * x[i__2].i, z__1.i = alpha->r * x[i__2].i + alpha->i * x[i__2].r; + temp1.r = z__1.r, temp1.i = z__1.i; + temp2.r = 0., temp2.i = 0.; + i__2 = j; + i__3 = j; + i__4 = kk; + d__1 = ap[i__4].r; + z__2.r = d__1 * temp1.r, z__2.i = d__1 * temp1.i; + z__1.r = y[i__3].r + z__2.r, z__1.i = y[i__3].i + z__2.i; + y[i__2].r = z__1.r, y[i__2].i = z__1.i; + k = kk + 1; + i__2 = *n; + for (i__ = j + 1; i__ <= i__2; ++i__) { + i__3 = i__; + i__4 = i__; + i__5 = k; + z__2.r = temp1.r * ap[i__5].r - temp1.i * ap[i__5].i, z__2.i = temp1.r * ap[i__5].i + temp1.i * ap[i__5].r; + z__1.r = y[i__4].r + z__2.r, z__1.i = y[i__4].i + z__2.i; + y[i__3].r = z__1.r, y[i__3].i = z__1.i; + d_cnjg(&z__3, &ap[k]); + i__3 = i__; + z__2.r = z__3.r * x[i__3].r - z__3.i * x[i__3].i, z__2.i = z__3.r * x[i__3].i + z__3.i * x[i__3].r; + z__1.r = temp2.r + z__2.r, z__1.i = temp2.i + z__2.i; + temp2.r = z__1.r, temp2.i = z__1.i; + ++k; + /* L90: */ + } + i__2 = j; + i__3 = j; + z__2.r = alpha->r * temp2.r - alpha->i * temp2.i, z__2.i = alpha->r * temp2.i + alpha->i * temp2.r; + z__1.r = y[i__3].r + z__2.r, z__1.i = y[i__3].i + z__2.i; + y[i__2].r = z__1.r, y[i__2].i = z__1.i; + kk += *n - j + 1; + /* L100: */ + } } else { - -/* Form y when AP contains the lower triangle. */ - - if (*incx == 1 && *incy == 1) { - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - i__2 = j; - z__1.r = alpha->r * x[i__2].r - alpha->i * x[i__2].i, z__1.i = - alpha->r * x[i__2].i + alpha->i * x[i__2].r; - temp1.r = z__1.r, temp1.i = z__1.i; - temp2.r = 0., temp2.i = 0.; - i__2 = j; - i__3 = j; - i__4 = kk; - d__1 = ap[i__4].r; - z__2.r = d__1 * temp1.r, z__2.i = d__1 * temp1.i; - z__1.r = y[i__3].r + z__2.r, z__1.i = y[i__3].i + z__2.i; - y[i__2].r = z__1.r, y[i__2].i = z__1.i; - k = kk + 1; - i__2 = *n; - for (i__ = j + 1; i__ <= i__2; ++i__) { - i__3 = i__; - i__4 = i__; - i__5 = k; - z__2.r = temp1.r * ap[i__5].r - temp1.i * ap[i__5].i, - z__2.i = temp1.r * ap[i__5].i + temp1.i * ap[i__5] - .r; - z__1.r = y[i__4].r + z__2.r, z__1.i = y[i__4].i + z__2.i; - y[i__3].r = z__1.r, y[i__3].i = z__1.i; - d_cnjg(&z__3, &ap[k]); - i__3 = i__; - z__2.r = z__3.r * x[i__3].r - z__3.i * x[i__3].i, z__2.i = - z__3.r * x[i__3].i + z__3.i * x[i__3].r; - z__1.r = temp2.r + z__2.r, z__1.i = temp2.i + z__2.i; - temp2.r = z__1.r, temp2.i = z__1.i; - ++k; -/* L90: */ - } - i__2 = j; - i__3 = j; - z__2.r = alpha->r * temp2.r - alpha->i * temp2.i, z__2.i = - alpha->r * temp2.i + alpha->i * temp2.r; - z__1.r = y[i__3].r + z__2.r, z__1.i = y[i__3].i + z__2.i; - y[i__2].r = z__1.r, y[i__2].i = z__1.i; - kk += *n - j + 1; -/* L100: */ - } - } else { - jx = kx; - jy = ky; - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - i__2 = jx; - z__1.r = alpha->r * x[i__2].r - alpha->i * x[i__2].i, z__1.i = - alpha->r * x[i__2].i + alpha->i * x[i__2].r; - temp1.r = z__1.r, temp1.i = z__1.i; - temp2.r = 0., temp2.i = 0.; - i__2 = jy; - i__3 = jy; - i__4 = kk; - d__1 = ap[i__4].r; - z__2.r = d__1 * temp1.r, z__2.i = d__1 * temp1.i; - z__1.r = y[i__3].r + z__2.r, z__1.i = y[i__3].i + z__2.i; - y[i__2].r = z__1.r, y[i__2].i = z__1.i; - ix = jx; - iy = jy; - i__2 = kk + *n - j; - for (k = kk + 1; k <= i__2; ++k) { - ix += *incx; - iy += *incy; - i__3 = iy; - i__4 = iy; - i__5 = k; - z__2.r = temp1.r * ap[i__5].r - temp1.i * ap[i__5].i, - z__2.i = temp1.r * ap[i__5].i + temp1.i * ap[i__5] - .r; - z__1.r = y[i__4].r + z__2.r, z__1.i = y[i__4].i + z__2.i; - y[i__3].r = z__1.r, y[i__3].i = z__1.i; - d_cnjg(&z__3, &ap[k]); - i__3 = ix; - z__2.r = z__3.r * x[i__3].r - z__3.i * x[i__3].i, z__2.i = - z__3.r * x[i__3].i + z__3.i * x[i__3].r; - z__1.r = temp2.r + z__2.r, z__1.i = temp2.i + z__2.i; - temp2.r = z__1.r, temp2.i = z__1.i; -/* L110: */ - } - i__2 = jy; - i__3 = jy; - z__2.r = alpha->r * temp2.r - alpha->i * temp2.i, z__2.i = - alpha->r * temp2.i + alpha->i * temp2.r; - z__1.r = y[i__3].r + z__2.r, z__1.i = y[i__3].i + z__2.i; - y[i__2].r = z__1.r, y[i__2].i = z__1.i; - jx += *incx; - jy += *incy; - kk += *n - j + 1; -/* L120: */ - } - } + jx = kx; + jy = ky; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = jx; + z__1.r = alpha->r * x[i__2].r - alpha->i * x[i__2].i, z__1.i = alpha->r * x[i__2].i + alpha->i * x[i__2].r; + temp1.r = z__1.r, temp1.i = z__1.i; + temp2.r = 0., temp2.i = 0.; + i__2 = jy; + i__3 = jy; + i__4 = kk; + d__1 = ap[i__4].r; + z__2.r = d__1 * temp1.r, z__2.i = d__1 * temp1.i; + z__1.r = y[i__3].r + z__2.r, z__1.i = y[i__3].i + z__2.i; + y[i__2].r = z__1.r, y[i__2].i = z__1.i; + ix = jx; + iy = jy; + i__2 = kk + *n - j; + for (k = kk + 1; k <= i__2; ++k) { + ix += *incx; + iy += *incy; + i__3 = iy; + i__4 = iy; + i__5 = k; + z__2.r = temp1.r * ap[i__5].r - temp1.i * ap[i__5].i, z__2.i = temp1.r * ap[i__5].i + temp1.i * ap[i__5].r; + z__1.r = y[i__4].r + z__2.r, z__1.i = y[i__4].i + z__2.i; + y[i__3].r = z__1.r, y[i__3].i = z__1.i; + d_cnjg(&z__3, &ap[k]); + i__3 = ix; + z__2.r = z__3.r * x[i__3].r - z__3.i * x[i__3].i, z__2.i = z__3.r * x[i__3].i + z__3.i * x[i__3].r; + z__1.r = temp2.r + z__2.r, z__1.i = temp2.i + z__2.i; + temp2.r = z__1.r, temp2.i = z__1.i; + /* L110: */ + } + i__2 = jy; + i__3 = jy; + z__2.r = alpha->r * temp2.r - alpha->i * temp2.i, z__2.i = alpha->r * temp2.i + alpha->i * temp2.r; + z__1.r = y[i__3].r + z__2.r, z__1.i = y[i__3].i + z__2.i; + y[i__2].r = z__1.r, y[i__2].i = z__1.i; + jx += *incx; + jy += *incy; + kk += *n - j + 1; + /* L120: */ + } } + } - return 0; - -/* End of ZHPMV . */ + /* End of ZHPMV . */ } /* zhpmv_ */ -
diff --git a/blas/f2c/ztbmv.c b/blas/f2c/ztbmv.c index 3bf0beb..2fbfa00 100644 --- a/blas/f2c/ztbmv.c +++ b/blas/f2c/ztbmv.c
@@ -1,647 +1,586 @@ /* ztbmv.f -- translated by f2c (version 20100827). You must link the resulting object file with libf2c: - on Microsoft Windows system, link with libf2c.lib; - on Linux or Unix systems, link with .../path/to/libf2c.a -lm - or, if you install libf2c.a in a standard place, with -lf2c -lm - -- in that order, at the end of the command line, as in - cc *.o -lf2c -lm - Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., - http://www.netlib.org/f2c/libf2c.zip + http://www.netlib.org/f2c/libf2c.zip */ #include "datatypes.h" -/* Subroutine */ int ztbmv_(char *uplo, char *trans, char *diag, integer *n, - integer *k, doublecomplex *a, integer *lda, doublecomplex *x, integer - *incx, ftnlen uplo_len, ftnlen trans_len, ftnlen diag_len) -{ - /* System generated locals */ - integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5; - doublecomplex z__1, z__2, z__3; +static inline void d_cnjg(doublecomplex *r, doublecomplex *z) { + r->r = z->r; + r->i = -(z->i); +} - /* Builtin functions */ - void d_cnjg(doublecomplex *, doublecomplex *); +/* Subroutine */ void ztbmv_(char *uplo, char *trans, char *diag, integer *n, integer *k, doublecomplex *a, + integer *lda, doublecomplex *x, integer *incx) { + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5; + doublecomplex z__1, z__2, z__3; - /* Local variables */ - integer i__, j, l, ix, jx, kx, info; - doublecomplex temp; - extern logical lsame_(char *, char *, ftnlen, ftnlen); - integer kplus1; - extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen); - logical noconj, nounit; + /* Local variables */ + integer i__, j, l, ix, jx, kx, info; + doublecomplex temp; + extern logical lsame_(char *, char *); + integer kplus1; + extern /* Subroutine */ void xerbla_(const char *, integer *); + logical noconj, nounit; -/* .. Scalar Arguments .. */ -/* .. */ -/* .. Array Arguments .. */ -/* .. */ + /* .. Scalar Arguments .. */ + /* .. */ + /* .. Array Arguments .. */ + /* .. */ -/* Purpose */ -/* ======= */ + /* Purpose */ + /* ======= */ -/* ZTBMV performs one of the matrix-vector operations */ + /* ZTBMV performs one of the matrix-vector operations */ -/* x := A*x, or x := A'*x, or x := conjg( A' )*x, */ + /* x := A*x, or x := A'*x, or x := conjg( A' )*x, */ -/* where x is an n element vector and A is an n by n unit, or non-unit, */ -/* upper or lower triangular band matrix, with ( k + 1 ) diagonals. */ + /* where x is an n element vector and A is an n by n unit, or non-unit, */ + /* upper or lower triangular band matrix, with ( k + 1 ) diagonals. */ -/* Arguments */ -/* ========== */ + /* Arguments */ + /* ========== */ -/* UPLO - CHARACTER*1. */ -/* On entry, UPLO specifies whether the matrix is an upper or */ -/* lower triangular matrix as follows: */ + /* UPLO - CHARACTER*1. */ + /* On entry, UPLO specifies whether the matrix is an upper or */ + /* lower triangular matrix as follows: */ -/* UPLO = 'U' or 'u' A is an upper triangular matrix. */ + /* UPLO = 'U' or 'u' A is an upper triangular matrix. */ -/* UPLO = 'L' or 'l' A is a lower triangular matrix. */ + /* UPLO = 'L' or 'l' A is a lower triangular matrix. */ -/* Unchanged on exit. */ + /* Unchanged on exit. */ -/* TRANS - CHARACTER*1. */ -/* On entry, TRANS specifies the operation to be performed as */ -/* follows: */ + /* TRANS - CHARACTER*1. */ + /* On entry, TRANS specifies the operation to be performed as */ + /* follows: */ -/* TRANS = 'N' or 'n' x := A*x. */ + /* TRANS = 'N' or 'n' x := A*x. */ -/* TRANS = 'T' or 't' x := A'*x. */ + /* TRANS = 'T' or 't' x := A'*x. */ -/* TRANS = 'C' or 'c' x := conjg( A' )*x. */ + /* TRANS = 'C' or 'c' x := conjg( A' )*x. */ -/* Unchanged on exit. */ + /* Unchanged on exit. */ -/* DIAG - CHARACTER*1. */ -/* On entry, DIAG specifies whether or not A is unit */ -/* triangular as follows: */ + /* DIAG - CHARACTER*1. */ + /* On entry, DIAG specifies whether or not A is unit */ + /* triangular as follows: */ -/* DIAG = 'U' or 'u' A is assumed to be unit triangular. */ + /* DIAG = 'U' or 'u' A is assumed to be unit triangular. */ -/* DIAG = 'N' or 'n' A is not assumed to be unit */ -/* triangular. */ + /* DIAG = 'N' or 'n' A is not assumed to be unit */ + /* triangular. */ -/* Unchanged on exit. */ + /* Unchanged on exit. */ -/* N - INTEGER. */ -/* On entry, N specifies the order of the matrix A. */ -/* N must be at least zero. */ -/* Unchanged on exit. */ + /* N - INTEGER. */ + /* On entry, N specifies the order of the matrix A. */ + /* N must be at least zero. */ + /* Unchanged on exit. */ -/* K - INTEGER. */ -/* On entry with UPLO = 'U' or 'u', K specifies the number of */ -/* super-diagonals of the matrix A. */ -/* On entry with UPLO = 'L' or 'l', K specifies the number of */ -/* sub-diagonals of the matrix A. */ -/* K must satisfy 0 .le. K. */ -/* Unchanged on exit. */ + /* K - INTEGER. */ + /* On entry with UPLO = 'U' or 'u', K specifies the number of */ + /* super-diagonals of the matrix A. */ + /* On entry with UPLO = 'L' or 'l', K specifies the number of */ + /* sub-diagonals of the matrix A. */ + /* K must satisfy 0 .le. K. */ + /* Unchanged on exit. */ -/* A - COMPLEX*16 array of DIMENSION ( LDA, n ). */ -/* Before entry with UPLO = 'U' or 'u', the leading ( k + 1 ) */ -/* by n part of the array A must contain the upper triangular */ -/* band part of the matrix of coefficients, supplied column by */ -/* column, with the leading diagonal of the matrix in row */ -/* ( k + 1 ) of the array, the first super-diagonal starting at */ -/* position 2 in row k, and so on. The top left k by k triangle */ -/* of the array A is not referenced. */ -/* The following program segment will transfer an upper */ -/* triangular band matrix from conventional full matrix storage */ -/* to band storage: */ + /* A - COMPLEX*16 array of DIMENSION ( LDA, n ). */ + /* Before entry with UPLO = 'U' or 'u', the leading ( k + 1 ) */ + /* by n part of the array A must contain the upper triangular */ + /* band part of the matrix of coefficients, supplied column by */ + /* column, with the leading diagonal of the matrix in row */ + /* ( k + 1 ) of the array, the first super-diagonal starting at */ + /* position 2 in row k, and so on. The top left k by k triangle */ + /* of the array A is not referenced. */ + /* The following program segment will transfer an upper */ + /* triangular band matrix from conventional full matrix storage */ + /* to band storage: */ -/* DO 20, J = 1, N */ -/* M = K + 1 - J */ -/* DO 10, I = MAX( 1, J - K ), J */ -/* A( M + I, J ) = matrix( I, J ) */ -/* 10 CONTINUE */ -/* 20 CONTINUE */ + /* DO 20, J = 1, N */ + /* M = K + 1 - J */ + /* DO 10, I = MAX( 1, J - K ), J */ + /* A( M + I, J ) = matrix( I, J ) */ + /* 10 CONTINUE */ + /* 20 CONTINUE */ -/* Before entry with UPLO = 'L' or 'l', the leading ( k + 1 ) */ -/* by n part of the array A must contain the lower triangular */ -/* band part of the matrix of coefficients, supplied column by */ -/* column, with the leading diagonal of the matrix in row 1 of */ -/* the array, the first sub-diagonal starting at position 1 in */ -/* row 2, and so on. The bottom right k by k triangle of the */ -/* array A is not referenced. */ -/* The following program segment will transfer a lower */ -/* triangular band matrix from conventional full matrix storage */ -/* to band storage: */ + /* Before entry with UPLO = 'L' or 'l', the leading ( k + 1 ) */ + /* by n part of the array A must contain the lower triangular */ + /* band part of the matrix of coefficients, supplied column by */ + /* column, with the leading diagonal of the matrix in row 1 of */ + /* the array, the first sub-diagonal starting at position 1 in */ + /* row 2, and so on. The bottom right k by k triangle of the */ + /* array A is not referenced. */ + /* The following program segment will transfer a lower */ + /* triangular band matrix from conventional full matrix storage */ + /* to band storage: */ -/* DO 20, J = 1, N */ -/* M = 1 - J */ -/* DO 10, I = J, MIN( N, J + K ) */ -/* A( M + I, J ) = matrix( I, J ) */ -/* 10 CONTINUE */ -/* 20 CONTINUE */ + /* DO 20, J = 1, N */ + /* M = 1 - J */ + /* DO 10, I = J, MIN( N, J + K ) */ + /* A( M + I, J ) = matrix( I, J ) */ + /* 10 CONTINUE */ + /* 20 CONTINUE */ -/* Note that when DIAG = 'U' or 'u' the elements of the array A */ -/* corresponding to the diagonal elements of the matrix are not */ -/* referenced, but are assumed to be unity. */ -/* Unchanged on exit. */ + /* Note that when DIAG = 'U' or 'u' the elements of the array A */ + /* corresponding to the diagonal elements of the matrix are not */ + /* referenced, but are assumed to be unity. */ + /* Unchanged on exit. */ -/* LDA - INTEGER. */ -/* On entry, LDA specifies the first dimension of A as declared */ -/* in the calling (sub) program. LDA must be at least */ -/* ( k + 1 ). */ -/* Unchanged on exit. */ + /* LDA - INTEGER. */ + /* On entry, LDA specifies the first dimension of A as declared */ + /* in the calling (sub) program. LDA must be at least */ + /* ( k + 1 ). */ + /* Unchanged on exit. */ -/* X - COMPLEX*16 array of dimension at least */ -/* ( 1 + ( n - 1 )*abs( INCX ) ). */ -/* Before entry, the incremented array X must contain the n */ -/* element vector x. On exit, X is overwritten with the */ -/* transformed vector x. */ + /* X - COMPLEX*16 array of dimension at least */ + /* ( 1 + ( n - 1 )*abs( INCX ) ). */ + /* Before entry, the incremented array X must contain the n */ + /* element vector x. On exit, X is overwritten with the */ + /* transformed vector x. */ -/* INCX - INTEGER. */ -/* On entry, INCX specifies the increment for the elements of */ -/* X. INCX must not be zero. */ -/* Unchanged on exit. */ + /* INCX - INTEGER. */ + /* On entry, INCX specifies the increment for the elements of */ + /* X. INCX must not be zero. */ + /* Unchanged on exit. */ -/* Further Details */ -/* =============== */ + /* Further Details */ + /* =============== */ -/* Level 2 Blas routine. */ + /* Level 2 Blas routine. */ -/* -- Written on 22-October-1986. */ -/* Jack Dongarra, Argonne National Lab. */ -/* Jeremy Du Croz, Nag Central Office. */ -/* Sven Hammarling, Nag Central Office. */ -/* Richard Hanson, Sandia National Labs. */ + /* -- Written on 22-October-1986. */ + /* Jack Dongarra, Argonne National Lab. */ + /* Jeremy Du Croz, Nag Central Office. */ + /* Sven Hammarling, Nag Central Office. */ + /* Richard Hanson, Sandia National Labs. */ -/* ===================================================================== */ + /* ===================================================================== */ -/* .. Parameters .. */ -/* .. */ -/* .. Local Scalars .. */ -/* .. */ -/* .. External Functions .. */ -/* .. */ -/* .. External Subroutines .. */ -/* .. */ -/* .. Intrinsic Functions .. */ -/* .. */ + /* .. Parameters .. */ + /* .. */ + /* .. Local Scalars .. */ + /* .. */ + /* .. External Functions .. */ + /* .. */ + /* .. External Subroutines .. */ + /* .. */ + /* .. Intrinsic Functions .. */ + /* .. */ -/* Test the input parameters. */ + /* Test the input parameters. */ - /* Parameter adjustments */ - a_dim1 = *lda; - a_offset = 1 + a_dim1; - a -= a_offset; - --x; + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --x; - /* Function Body */ - info = 0; - if (! lsame_(uplo, "U", (ftnlen)1, (ftnlen)1) && ! lsame_(uplo, "L", ( - ftnlen)1, (ftnlen)1)) { - info = 1; - } else if (! lsame_(trans, "N", (ftnlen)1, (ftnlen)1) && ! lsame_(trans, - "T", (ftnlen)1, (ftnlen)1) && ! lsame_(trans, "C", (ftnlen)1, ( - ftnlen)1)) { - info = 2; - } else if (! lsame_(diag, "U", (ftnlen)1, (ftnlen)1) && ! lsame_(diag, - "N", (ftnlen)1, (ftnlen)1)) { - info = 3; - } else if (*n < 0) { - info = 4; - } else if (*k < 0) { - info = 5; - } else if (*lda < *k + 1) { - info = 7; - } else if (*incx == 0) { - info = 9; - } - if (info != 0) { - xerbla_("ZTBMV ", &info, (ftnlen)6); - return 0; - } + /* Function Body */ + info = 0; + if (!lsame_(uplo, "U") && !lsame_(uplo, "L")) { + info = 1; + } else if (!lsame_(trans, "N") && !lsame_(trans, "T") && !lsame_(trans, "C")) { + info = 2; + } else if (!lsame_(diag, "U") && !lsame_(diag, "N")) { + info = 3; + } else if (*n < 0) { + info = 4; + } else if (*k < 0) { + info = 5; + } else if (*lda < *k + 1) { + info = 7; + } else if (*incx == 0) { + info = 9; + } + if (info != 0) { + xerbla_("ZTBMV ", &info); + return; + } -/* Quick return if possible. */ + /* Quick return if possible. */ - if (*n == 0) { - return 0; - } + if (*n == 0) { + return; + } - noconj = lsame_(trans, "T", (ftnlen)1, (ftnlen)1); - nounit = lsame_(diag, "N", (ftnlen)1, (ftnlen)1); + noconj = lsame_(trans, "T"); + nounit = lsame_(diag, "N"); -/* Set up the start point in X if the increment is not unity. This */ -/* will be ( N - 1 )*INCX too small for descending loops. */ + /* Set up the start point in X if the increment is not unity. This */ + /* will be ( N - 1 )*INCX too small for descending loops. */ - if (*incx <= 0) { - kx = 1 - (*n - 1) * *incx; - } else if (*incx != 1) { - kx = 1; - } + if (*incx <= 0) { + kx = 1 - (*n - 1) * *incx; + } else if (*incx != 1) { + kx = 1; + } -/* Start the operations. In this version the elements of A are */ -/* accessed sequentially with one pass through A. */ + /* Start the operations. In this version the elements of A are */ + /* accessed sequentially with one pass through A. */ - if (lsame_(trans, "N", (ftnlen)1, (ftnlen)1)) { + if (lsame_(trans, "N")) { + /* Form x := A*x. */ -/* Form x := A*x. */ - - if (lsame_(uplo, "U", (ftnlen)1, (ftnlen)1)) { - kplus1 = *k + 1; - if (*incx == 1) { - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - i__2 = j; - if (x[i__2].r != 0. || x[i__2].i != 0.) { - i__2 = j; - temp.r = x[i__2].r, temp.i = x[i__2].i; - l = kplus1 - j; -/* Computing MAX */ - i__2 = 1, i__3 = j - *k; - i__4 = j - 1; - for (i__ = max(i__2,i__3); i__ <= i__4; ++i__) { - i__2 = i__; - i__3 = i__; - i__5 = l + i__ + j * a_dim1; - z__2.r = temp.r * a[i__5].r - temp.i * a[i__5].i, - z__2.i = temp.r * a[i__5].i + temp.i * a[ - i__5].r; - z__1.r = x[i__3].r + z__2.r, z__1.i = x[i__3].i + - z__2.i; - x[i__2].r = z__1.r, x[i__2].i = z__1.i; -/* L10: */ - } - if (nounit) { - i__4 = j; - i__2 = j; - i__3 = kplus1 + j * a_dim1; - z__1.r = x[i__2].r * a[i__3].r - x[i__2].i * a[ - i__3].i, z__1.i = x[i__2].r * a[i__3].i + - x[i__2].i * a[i__3].r; - x[i__4].r = z__1.r, x[i__4].i = z__1.i; - } - } -/* L20: */ - } - } else { - jx = kx; - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - i__4 = jx; - if (x[i__4].r != 0. || x[i__4].i != 0.) { - i__4 = jx; - temp.r = x[i__4].r, temp.i = x[i__4].i; - ix = kx; - l = kplus1 - j; -/* Computing MAX */ - i__4 = 1, i__2 = j - *k; - i__3 = j - 1; - for (i__ = max(i__4,i__2); i__ <= i__3; ++i__) { - i__4 = ix; - i__2 = ix; - i__5 = l + i__ + j * a_dim1; - z__2.r = temp.r * a[i__5].r - temp.i * a[i__5].i, - z__2.i = temp.r * a[i__5].i + temp.i * a[ - i__5].r; - z__1.r = x[i__2].r + z__2.r, z__1.i = x[i__2].i + - z__2.i; - x[i__4].r = z__1.r, x[i__4].i = z__1.i; - ix += *incx; -/* L30: */ - } - if (nounit) { - i__3 = jx; - i__4 = jx; - i__2 = kplus1 + j * a_dim1; - z__1.r = x[i__4].r * a[i__2].r - x[i__4].i * a[ - i__2].i, z__1.i = x[i__4].r * a[i__2].i + - x[i__4].i * a[i__2].r; - x[i__3].r = z__1.r, x[i__3].i = z__1.i; - } - } - jx += *incx; - if (j > *k) { - kx += *incx; - } -/* L40: */ - } - } - } else { - if (*incx == 1) { - for (j = *n; j >= 1; --j) { - i__1 = j; - if (x[i__1].r != 0. || x[i__1].i != 0.) { - i__1 = j; - temp.r = x[i__1].r, temp.i = x[i__1].i; - l = 1 - j; -/* Computing MIN */ - i__1 = *n, i__3 = j + *k; - i__4 = j + 1; - for (i__ = min(i__1,i__3); i__ >= i__4; --i__) { - i__1 = i__; - i__3 = i__; - i__2 = l + i__ + j * a_dim1; - z__2.r = temp.r * a[i__2].r - temp.i * a[i__2].i, - z__2.i = temp.r * a[i__2].i + temp.i * a[ - i__2].r; - z__1.r = x[i__3].r + z__2.r, z__1.i = x[i__3].i + - z__2.i; - x[i__1].r = z__1.r, x[i__1].i = z__1.i; -/* L50: */ - } - if (nounit) { - i__4 = j; - i__1 = j; - i__3 = j * a_dim1 + 1; - z__1.r = x[i__1].r * a[i__3].r - x[i__1].i * a[ - i__3].i, z__1.i = x[i__1].r * a[i__3].i + - x[i__1].i * a[i__3].r; - x[i__4].r = z__1.r, x[i__4].i = z__1.i; - } - } -/* L60: */ - } - } else { - kx += (*n - 1) * *incx; - jx = kx; - for (j = *n; j >= 1; --j) { - i__4 = jx; - if (x[i__4].r != 0. || x[i__4].i != 0.) { - i__4 = jx; - temp.r = x[i__4].r, temp.i = x[i__4].i; - ix = kx; - l = 1 - j; -/* Computing MIN */ - i__4 = *n, i__1 = j + *k; - i__3 = j + 1; - for (i__ = min(i__4,i__1); i__ >= i__3; --i__) { - i__4 = ix; - i__1 = ix; - i__2 = l + i__ + j * a_dim1; - z__2.r = temp.r * a[i__2].r - temp.i * a[i__2].i, - z__2.i = temp.r * a[i__2].i + temp.i * a[ - i__2].r; - z__1.r = x[i__1].r + z__2.r, z__1.i = x[i__1].i + - z__2.i; - x[i__4].r = z__1.r, x[i__4].i = z__1.i; - ix -= *incx; -/* L70: */ - } - if (nounit) { - i__3 = jx; - i__4 = jx; - i__1 = j * a_dim1 + 1; - z__1.r = x[i__4].r * a[i__1].r - x[i__4].i * a[ - i__1].i, z__1.i = x[i__4].r * a[i__1].i + - x[i__4].i * a[i__1].r; - x[i__3].r = z__1.r, x[i__3].i = z__1.i; - } - } - jx -= *incx; - if (*n - j >= *k) { - kx -= *incx; - } -/* L80: */ - } - } - } + if (lsame_(uplo, "U")) { + kplus1 = *k + 1; + if (*incx == 1) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = j; + if (x[i__2].r != 0. || x[i__2].i != 0.) { + i__2 = j; + temp.r = x[i__2].r, temp.i = x[i__2].i; + l = kplus1 - j; + /* Computing MAX */ + i__2 = 1, i__3 = j - *k; + i__4 = j - 1; + for (i__ = max(i__2, i__3); i__ <= i__4; ++i__) { + i__2 = i__; + i__3 = i__; + i__5 = l + i__ + j * a_dim1; + z__2.r = temp.r * a[i__5].r - temp.i * a[i__5].i, z__2.i = temp.r * a[i__5].i + temp.i * a[i__5].r; + z__1.r = x[i__3].r + z__2.r, z__1.i = x[i__3].i + z__2.i; + x[i__2].r = z__1.r, x[i__2].i = z__1.i; + /* L10: */ + } + if (nounit) { + i__4 = j; + i__2 = j; + i__3 = kplus1 + j * a_dim1; + z__1.r = x[i__2].r * a[i__3].r - x[i__2].i * a[i__3].i, + z__1.i = x[i__2].r * a[i__3].i + x[i__2].i * a[i__3].r; + x[i__4].r = z__1.r, x[i__4].i = z__1.i; + } + } + /* L20: */ + } + } else { + jx = kx; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__4 = jx; + if (x[i__4].r != 0. || x[i__4].i != 0.) { + i__4 = jx; + temp.r = x[i__4].r, temp.i = x[i__4].i; + ix = kx; + l = kplus1 - j; + /* Computing MAX */ + i__4 = 1, i__2 = j - *k; + i__3 = j - 1; + for (i__ = max(i__4, i__2); i__ <= i__3; ++i__) { + i__4 = ix; + i__2 = ix; + i__5 = l + i__ + j * a_dim1; + z__2.r = temp.r * a[i__5].r - temp.i * a[i__5].i, z__2.i = temp.r * a[i__5].i + temp.i * a[i__5].r; + z__1.r = x[i__2].r + z__2.r, z__1.i = x[i__2].i + z__2.i; + x[i__4].r = z__1.r, x[i__4].i = z__1.i; + ix += *incx; + /* L30: */ + } + if (nounit) { + i__3 = jx; + i__4 = jx; + i__2 = kplus1 + j * a_dim1; + z__1.r = x[i__4].r * a[i__2].r - x[i__4].i * a[i__2].i, + z__1.i = x[i__4].r * a[i__2].i + x[i__4].i * a[i__2].r; + x[i__3].r = z__1.r, x[i__3].i = z__1.i; + } + } + jx += *incx; + if (j > *k) { + kx += *incx; + } + /* L40: */ + } + } } else { - -/* Form x := A'*x or x := conjg( A' )*x. */ - - if (lsame_(uplo, "U", (ftnlen)1, (ftnlen)1)) { - kplus1 = *k + 1; - if (*incx == 1) { - for (j = *n; j >= 1; --j) { - i__3 = j; - temp.r = x[i__3].r, temp.i = x[i__3].i; - l = kplus1 - j; - if (noconj) { - if (nounit) { - i__3 = kplus1 + j * a_dim1; - z__1.r = temp.r * a[i__3].r - temp.i * a[i__3].i, - z__1.i = temp.r * a[i__3].i + temp.i * a[ - i__3].r; - temp.r = z__1.r, temp.i = z__1.i; - } -/* Computing MAX */ - i__4 = 1, i__1 = j - *k; - i__3 = max(i__4,i__1); - for (i__ = j - 1; i__ >= i__3; --i__) { - i__4 = l + i__ + j * a_dim1; - i__1 = i__; - z__2.r = a[i__4].r * x[i__1].r - a[i__4].i * x[ - i__1].i, z__2.i = a[i__4].r * x[i__1].i + - a[i__4].i * x[i__1].r; - z__1.r = temp.r + z__2.r, z__1.i = temp.i + - z__2.i; - temp.r = z__1.r, temp.i = z__1.i; -/* L90: */ - } - } else { - if (nounit) { - d_cnjg(&z__2, &a[kplus1 + j * a_dim1]); - z__1.r = temp.r * z__2.r - temp.i * z__2.i, - z__1.i = temp.r * z__2.i + temp.i * - z__2.r; - temp.r = z__1.r, temp.i = z__1.i; - } -/* Computing MAX */ - i__4 = 1, i__1 = j - *k; - i__3 = max(i__4,i__1); - for (i__ = j - 1; i__ >= i__3; --i__) { - d_cnjg(&z__3, &a[l + i__ + j * a_dim1]); - i__4 = i__; - z__2.r = z__3.r * x[i__4].r - z__3.i * x[i__4].i, - z__2.i = z__3.r * x[i__4].i + z__3.i * x[ - i__4].r; - z__1.r = temp.r + z__2.r, z__1.i = temp.i + - z__2.i; - temp.r = z__1.r, temp.i = z__1.i; -/* L100: */ - } - } - i__3 = j; - x[i__3].r = temp.r, x[i__3].i = temp.i; -/* L110: */ - } - } else { - kx += (*n - 1) * *incx; - jx = kx; - for (j = *n; j >= 1; --j) { - i__3 = jx; - temp.r = x[i__3].r, temp.i = x[i__3].i; - kx -= *incx; - ix = kx; - l = kplus1 - j; - if (noconj) { - if (nounit) { - i__3 = kplus1 + j * a_dim1; - z__1.r = temp.r * a[i__3].r - temp.i * a[i__3].i, - z__1.i = temp.r * a[i__3].i + temp.i * a[ - i__3].r; - temp.r = z__1.r, temp.i = z__1.i; - } -/* Computing MAX */ - i__4 = 1, i__1 = j - *k; - i__3 = max(i__4,i__1); - for (i__ = j - 1; i__ >= i__3; --i__) { - i__4 = l + i__ + j * a_dim1; - i__1 = ix; - z__2.r = a[i__4].r * x[i__1].r - a[i__4].i * x[ - i__1].i, z__2.i = a[i__4].r * x[i__1].i + - a[i__4].i * x[i__1].r; - z__1.r = temp.r + z__2.r, z__1.i = temp.i + - z__2.i; - temp.r = z__1.r, temp.i = z__1.i; - ix -= *incx; -/* L120: */ - } - } else { - if (nounit) { - d_cnjg(&z__2, &a[kplus1 + j * a_dim1]); - z__1.r = temp.r * z__2.r - temp.i * z__2.i, - z__1.i = temp.r * z__2.i + temp.i * - z__2.r; - temp.r = z__1.r, temp.i = z__1.i; - } -/* Computing MAX */ - i__4 = 1, i__1 = j - *k; - i__3 = max(i__4,i__1); - for (i__ = j - 1; i__ >= i__3; --i__) { - d_cnjg(&z__3, &a[l + i__ + j * a_dim1]); - i__4 = ix; - z__2.r = z__3.r * x[i__4].r - z__3.i * x[i__4].i, - z__2.i = z__3.r * x[i__4].i + z__3.i * x[ - i__4].r; - z__1.r = temp.r + z__2.r, z__1.i = temp.i + - z__2.i; - temp.r = z__1.r, temp.i = z__1.i; - ix -= *incx; -/* L130: */ - } - } - i__3 = jx; - x[i__3].r = temp.r, x[i__3].i = temp.i; - jx -= *incx; -/* L140: */ - } - } - } else { - if (*incx == 1) { - i__3 = *n; - for (j = 1; j <= i__3; ++j) { - i__4 = j; - temp.r = x[i__4].r, temp.i = x[i__4].i; - l = 1 - j; - if (noconj) { - if (nounit) { - i__4 = j * a_dim1 + 1; - z__1.r = temp.r * a[i__4].r - temp.i * a[i__4].i, - z__1.i = temp.r * a[i__4].i + temp.i * a[ - i__4].r; - temp.r = z__1.r, temp.i = z__1.i; - } -/* Computing MIN */ - i__1 = *n, i__2 = j + *k; - i__4 = min(i__1,i__2); - for (i__ = j + 1; i__ <= i__4; ++i__) { - i__1 = l + i__ + j * a_dim1; - i__2 = i__; - z__2.r = a[i__1].r * x[i__2].r - a[i__1].i * x[ - i__2].i, z__2.i = a[i__1].r * x[i__2].i + - a[i__1].i * x[i__2].r; - z__1.r = temp.r + z__2.r, z__1.i = temp.i + - z__2.i; - temp.r = z__1.r, temp.i = z__1.i; -/* L150: */ - } - } else { - if (nounit) { - d_cnjg(&z__2, &a[j * a_dim1 + 1]); - z__1.r = temp.r * z__2.r - temp.i * z__2.i, - z__1.i = temp.r * z__2.i + temp.i * - z__2.r; - temp.r = z__1.r, temp.i = z__1.i; - } -/* Computing MIN */ - i__1 = *n, i__2 = j + *k; - i__4 = min(i__1,i__2); - for (i__ = j + 1; i__ <= i__4; ++i__) { - d_cnjg(&z__3, &a[l + i__ + j * a_dim1]); - i__1 = i__; - z__2.r = z__3.r * x[i__1].r - z__3.i * x[i__1].i, - z__2.i = z__3.r * x[i__1].i + z__3.i * x[ - i__1].r; - z__1.r = temp.r + z__2.r, z__1.i = temp.i + - z__2.i; - temp.r = z__1.r, temp.i = z__1.i; -/* L160: */ - } - } - i__4 = j; - x[i__4].r = temp.r, x[i__4].i = temp.i; -/* L170: */ - } - } else { - jx = kx; - i__3 = *n; - for (j = 1; j <= i__3; ++j) { - i__4 = jx; - temp.r = x[i__4].r, temp.i = x[i__4].i; - kx += *incx; - ix = kx; - l = 1 - j; - if (noconj) { - if (nounit) { - i__4 = j * a_dim1 + 1; - z__1.r = temp.r * a[i__4].r - temp.i * a[i__4].i, - z__1.i = temp.r * a[i__4].i + temp.i * a[ - i__4].r; - temp.r = z__1.r, temp.i = z__1.i; - } -/* Computing MIN */ - i__1 = *n, i__2 = j + *k; - i__4 = min(i__1,i__2); - for (i__ = j + 1; i__ <= i__4; ++i__) { - i__1 = l + i__ + j * a_dim1; - i__2 = ix; - z__2.r = a[i__1].r * x[i__2].r - a[i__1].i * x[ - i__2].i, z__2.i = a[i__1].r * x[i__2].i + - a[i__1].i * x[i__2].r; - z__1.r = temp.r + z__2.r, z__1.i = temp.i + - z__2.i; - temp.r = z__1.r, temp.i = z__1.i; - ix += *incx; -/* L180: */ - } - } else { - if (nounit) { - d_cnjg(&z__2, &a[j * a_dim1 + 1]); - z__1.r = temp.r * z__2.r - temp.i * z__2.i, - z__1.i = temp.r * z__2.i + temp.i * - z__2.r; - temp.r = z__1.r, temp.i = z__1.i; - } -/* Computing MIN */ - i__1 = *n, i__2 = j + *k; - i__4 = min(i__1,i__2); - for (i__ = j + 1; i__ <= i__4; ++i__) { - d_cnjg(&z__3, &a[l + i__ + j * a_dim1]); - i__1 = ix; - z__2.r = z__3.r * x[i__1].r - z__3.i * x[i__1].i, - z__2.i = z__3.r * x[i__1].i + z__3.i * x[ - i__1].r; - z__1.r = temp.r + z__2.r, z__1.i = temp.i + - z__2.i; - temp.r = z__1.r, temp.i = z__1.i; - ix += *incx; -/* L190: */ - } - } - i__4 = jx; - x[i__4].r = temp.r, x[i__4].i = temp.i; - jx += *incx; -/* L200: */ - } - } - } + if (*incx == 1) { + for (j = *n; j >= 1; --j) { + i__1 = j; + if (x[i__1].r != 0. || x[i__1].i != 0.) { + i__1 = j; + temp.r = x[i__1].r, temp.i = x[i__1].i; + l = 1 - j; + /* Computing MIN */ + i__1 = *n, i__3 = j + *k; + i__4 = j + 1; + for (i__ = min(i__1, i__3); i__ >= i__4; --i__) { + i__1 = i__; + i__3 = i__; + i__2 = l + i__ + j * a_dim1; + z__2.r = temp.r * a[i__2].r - temp.i * a[i__2].i, z__2.i = temp.r * a[i__2].i + temp.i * a[i__2].r; + z__1.r = x[i__3].r + z__2.r, z__1.i = x[i__3].i + z__2.i; + x[i__1].r = z__1.r, x[i__1].i = z__1.i; + /* L50: */ + } + if (nounit) { + i__4 = j; + i__1 = j; + i__3 = j * a_dim1 + 1; + z__1.r = x[i__1].r * a[i__3].r - x[i__1].i * a[i__3].i, + z__1.i = x[i__1].r * a[i__3].i + x[i__1].i * a[i__3].r; + x[i__4].r = z__1.r, x[i__4].i = z__1.i; + } + } + /* L60: */ + } + } else { + kx += (*n - 1) * *incx; + jx = kx; + for (j = *n; j >= 1; --j) { + i__4 = jx; + if (x[i__4].r != 0. || x[i__4].i != 0.) { + i__4 = jx; + temp.r = x[i__4].r, temp.i = x[i__4].i; + ix = kx; + l = 1 - j; + /* Computing MIN */ + i__4 = *n, i__1 = j + *k; + i__3 = j + 1; + for (i__ = min(i__4, i__1); i__ >= i__3; --i__) { + i__4 = ix; + i__1 = ix; + i__2 = l + i__ + j * a_dim1; + z__2.r = temp.r * a[i__2].r - temp.i * a[i__2].i, z__2.i = temp.r * a[i__2].i + temp.i * a[i__2].r; + z__1.r = x[i__1].r + z__2.r, z__1.i = x[i__1].i + z__2.i; + x[i__4].r = z__1.r, x[i__4].i = z__1.i; + ix -= *incx; + /* L70: */ + } + if (nounit) { + i__3 = jx; + i__4 = jx; + i__1 = j * a_dim1 + 1; + z__1.r = x[i__4].r * a[i__1].r - x[i__4].i * a[i__1].i, + z__1.i = x[i__4].r * a[i__1].i + x[i__4].i * a[i__1].r; + x[i__3].r = z__1.r, x[i__3].i = z__1.i; + } + } + jx -= *incx; + if (*n - j >= *k) { + kx -= *incx; + } + /* L80: */ + } + } } + } else { + /* Form x := A'*x or x := conjg( A' )*x. */ - return 0; + if (lsame_(uplo, "U")) { + kplus1 = *k + 1; + if (*incx == 1) { + for (j = *n; j >= 1; --j) { + i__3 = j; + temp.r = x[i__3].r, temp.i = x[i__3].i; + l = kplus1 - j; + if (noconj) { + if (nounit) { + i__3 = kplus1 + j * a_dim1; + z__1.r = temp.r * a[i__3].r - temp.i * a[i__3].i, z__1.i = temp.r * a[i__3].i + temp.i * a[i__3].r; + temp.r = z__1.r, temp.i = z__1.i; + } + /* Computing MAX */ + i__4 = 1, i__1 = j - *k; + i__3 = max(i__4, i__1); + for (i__ = j - 1; i__ >= i__3; --i__) { + i__4 = l + i__ + j * a_dim1; + i__1 = i__; + z__2.r = a[i__4].r * x[i__1].r - a[i__4].i * x[i__1].i, + z__2.i = a[i__4].r * x[i__1].i + a[i__4].i * x[i__1].r; + z__1.r = temp.r + z__2.r, z__1.i = temp.i + z__2.i; + temp.r = z__1.r, temp.i = z__1.i; + /* L90: */ + } + } else { + if (nounit) { + d_cnjg(&z__2, &a[kplus1 + j * a_dim1]); + z__1.r = temp.r * z__2.r - temp.i * z__2.i, z__1.i = temp.r * z__2.i + temp.i * z__2.r; + temp.r = z__1.r, temp.i = z__1.i; + } + /* Computing MAX */ + i__4 = 1, i__1 = j - *k; + i__3 = max(i__4, i__1); + for (i__ = j - 1; i__ >= i__3; --i__) { + d_cnjg(&z__3, &a[l + i__ + j * a_dim1]); + i__4 = i__; + z__2.r = z__3.r * x[i__4].r - z__3.i * x[i__4].i, z__2.i = z__3.r * x[i__4].i + z__3.i * x[i__4].r; + z__1.r = temp.r + z__2.r, z__1.i = temp.i + z__2.i; + temp.r = z__1.r, temp.i = z__1.i; + /* L100: */ + } + } + i__3 = j; + x[i__3].r = temp.r, x[i__3].i = temp.i; + /* L110: */ + } + } else { + kx += (*n - 1) * *incx; + jx = kx; + for (j = *n; j >= 1; --j) { + i__3 = jx; + temp.r = x[i__3].r, temp.i = x[i__3].i; + kx -= *incx; + ix = kx; + l = kplus1 - j; + if (noconj) { + if (nounit) { + i__3 = kplus1 + j * a_dim1; + z__1.r = temp.r * a[i__3].r - temp.i * a[i__3].i, z__1.i = temp.r * a[i__3].i + temp.i * a[i__3].r; + temp.r = z__1.r, temp.i = z__1.i; + } + /* Computing MAX */ + i__4 = 1, i__1 = j - *k; + i__3 = max(i__4, i__1); + for (i__ = j - 1; i__ >= i__3; --i__) { + i__4 = l + i__ + j * a_dim1; + i__1 = ix; + z__2.r = a[i__4].r * x[i__1].r - a[i__4].i * x[i__1].i, + z__2.i = a[i__4].r * x[i__1].i + a[i__4].i * x[i__1].r; + z__1.r = temp.r + z__2.r, z__1.i = temp.i + z__2.i; + temp.r = z__1.r, temp.i = z__1.i; + ix -= *incx; + /* L120: */ + } + } else { + if (nounit) { + d_cnjg(&z__2, &a[kplus1 + j * a_dim1]); + z__1.r = temp.r * z__2.r - temp.i * z__2.i, z__1.i = temp.r * z__2.i + temp.i * z__2.r; + temp.r = z__1.r, temp.i = z__1.i; + } + /* Computing MAX */ + i__4 = 1, i__1 = j - *k; + i__3 = max(i__4, i__1); + for (i__ = j - 1; i__ >= i__3; --i__) { + d_cnjg(&z__3, &a[l + i__ + j * a_dim1]); + i__4 = ix; + z__2.r = z__3.r * x[i__4].r - z__3.i * x[i__4].i, z__2.i = z__3.r * x[i__4].i + z__3.i * x[i__4].r; + z__1.r = temp.r + z__2.r, z__1.i = temp.i + z__2.i; + temp.r = z__1.r, temp.i = z__1.i; + ix -= *incx; + /* L130: */ + } + } + i__3 = jx; + x[i__3].r = temp.r, x[i__3].i = temp.i; + jx -= *incx; + /* L140: */ + } + } + } else { + if (*incx == 1) { + i__3 = *n; + for (j = 1; j <= i__3; ++j) { + i__4 = j; + temp.r = x[i__4].r, temp.i = x[i__4].i; + l = 1 - j; + if (noconj) { + if (nounit) { + i__4 = j * a_dim1 + 1; + z__1.r = temp.r * a[i__4].r - temp.i * a[i__4].i, z__1.i = temp.r * a[i__4].i + temp.i * a[i__4].r; + temp.r = z__1.r, temp.i = z__1.i; + } + /* Computing MIN */ + i__1 = *n, i__2 = j + *k; + i__4 = min(i__1, i__2); + for (i__ = j + 1; i__ <= i__4; ++i__) { + i__1 = l + i__ + j * a_dim1; + i__2 = i__; + z__2.r = a[i__1].r * x[i__2].r - a[i__1].i * x[i__2].i, + z__2.i = a[i__1].r * x[i__2].i + a[i__1].i * x[i__2].r; + z__1.r = temp.r + z__2.r, z__1.i = temp.i + z__2.i; + temp.r = z__1.r, temp.i = z__1.i; + /* L150: */ + } + } else { + if (nounit) { + d_cnjg(&z__2, &a[j * a_dim1 + 1]); + z__1.r = temp.r * z__2.r - temp.i * z__2.i, z__1.i = temp.r * z__2.i + temp.i * z__2.r; + temp.r = z__1.r, temp.i = z__1.i; + } + /* Computing MIN */ + i__1 = *n, i__2 = j + *k; + i__4 = min(i__1, i__2); + for (i__ = j + 1; i__ <= i__4; ++i__) { + d_cnjg(&z__3, &a[l + i__ + j * a_dim1]); + i__1 = i__; + z__2.r = z__3.r * x[i__1].r - z__3.i * x[i__1].i, z__2.i = z__3.r * x[i__1].i + z__3.i * x[i__1].r; + z__1.r = temp.r + z__2.r, z__1.i = temp.i + z__2.i; + temp.r = z__1.r, temp.i = z__1.i; + /* L160: */ + } + } + i__4 = j; + x[i__4].r = temp.r, x[i__4].i = temp.i; + /* L170: */ + } + } else { + jx = kx; + i__3 = *n; + for (j = 1; j <= i__3; ++j) { + i__4 = jx; + temp.r = x[i__4].r, temp.i = x[i__4].i; + kx += *incx; + ix = kx; + l = 1 - j; + if (noconj) { + if (nounit) { + i__4 = j * a_dim1 + 1; + z__1.r = temp.r * a[i__4].r - temp.i * a[i__4].i, z__1.i = temp.r * a[i__4].i + temp.i * a[i__4].r; + temp.r = z__1.r, temp.i = z__1.i; + } + /* Computing MIN */ + i__1 = *n, i__2 = j + *k; + i__4 = min(i__1, i__2); + for (i__ = j + 1; i__ <= i__4; ++i__) { + i__1 = l + i__ + j * a_dim1; + i__2 = ix; + z__2.r = a[i__1].r * x[i__2].r - a[i__1].i * x[i__2].i, + z__2.i = a[i__1].r * x[i__2].i + a[i__1].i * x[i__2].r; + z__1.r = temp.r + z__2.r, z__1.i = temp.i + z__2.i; + temp.r = z__1.r, temp.i = z__1.i; + ix += *incx; + /* L180: */ + } + } else { + if (nounit) { + d_cnjg(&z__2, &a[j * a_dim1 + 1]); + z__1.r = temp.r * z__2.r - temp.i * z__2.i, z__1.i = temp.r * z__2.i + temp.i * z__2.r; + temp.r = z__1.r, temp.i = z__1.i; + } + /* Computing MIN */ + i__1 = *n, i__2 = j + *k; + i__4 = min(i__1, i__2); + for (i__ = j + 1; i__ <= i__4; ++i__) { + d_cnjg(&z__3, &a[l + i__ + j * a_dim1]); + i__1 = ix; + z__2.r = z__3.r * x[i__1].r - z__3.i * x[i__1].i, z__2.i = z__3.r * x[i__1].i + z__3.i * x[i__1].r; + z__1.r = temp.r + z__2.r, z__1.i = temp.i + z__2.i; + temp.r = z__1.r, temp.i = z__1.i; + ix += *incx; + /* L190: */ + } + } + i__4 = jx; + x[i__4].r = temp.r, x[i__4].i = temp.i; + jx += *incx; + /* L200: */ + } + } + } + } -/* End of ZTBMV . */ + /* End of ZTBMV . */ } /* ztbmv_ */ -
diff --git a/blas/level1_cplx_impl.h b/blas/level1_cplx_impl.h index 552b384..be88b92 100644 --- a/blas/level1_cplx_impl.h +++ b/blas/level1_cplx_impl.h
@@ -24,7 +24,7 @@ // computes the sum of magnitudes of all vector elements or, for a complex vector x, the sum // res = |Rex1| + |Imx1| + |Rex2| + |Imx2| + ... + |Rexn| + |Imxn|, where x is a vector of order n -RealScalar EIGEN_CAT(REAL_SCALAR_SUFFIX, EIGEN_BLAS_FUNC(asum))(int *n, RealScalar *px, int *incx) { +extern "C" RealScalar EIGEN_CAT(REAL_SCALAR_SUFFIX, EIGEN_BLAS_FUNC_NAME(asum))(int *n, RealScalar *px, int *incx) { // std::cerr << "__asum " << *n << " " << *incx << "\n"; Complex *x = reinterpret_cast<Complex *>(px); @@ -36,7 +36,7 @@ return make_vector(x, *n, std::abs(*incx)).unaryExpr<scalar_norm1_op>().sum(); } -int EIGEN_CAT(i, EIGEN_BLAS_FUNC(amax))(int *n, RealScalar *px, int *incx) { +extern "C" int EIGEN_CAT(i, EIGEN_BLAS_FUNC_NAME(amax))(int *n, RealScalar *px, int *incx) { if (*n <= 0) return 0; Scalar *x = reinterpret_cast<Scalar *>(px); @@ -48,7 +48,7 @@ return int(ret) + 1; } -int EIGEN_CAT(i, EIGEN_BLAS_FUNC(amin))(int *n, RealScalar *px, int *incx) { +extern "C" int EIGEN_CAT(i, EIGEN_BLAS_FUNC_NAME(amin))(int *n, RealScalar *px, int *incx) { if (*n <= 0) return 0; Scalar *x = reinterpret_cast<Scalar *>(px); @@ -61,13 +61,13 @@ } // computes a dot product of a conjugated vector with another vector. -int EIGEN_BLAS_FUNC(dotcw)(int *n, RealScalar *px, int *incx, RealScalar *py, int *incy, RealScalar *pres) { +EIGEN_BLAS_FUNC(dotcw)(int *n, RealScalar *px, int *incx, RealScalar *py, int *incy, RealScalar *pres) { // std::cerr << "_dotc " << *n << " " << *incx << " " << *incy << "\n"; Scalar *res = reinterpret_cast<Scalar *>(pres); if (*n <= 0) { *res = Scalar(0); - return 0; + return; } Scalar *x = reinterpret_cast<Scalar *>(px); @@ -83,16 +83,15 @@ *res = (make_vector(x, *n, *incx).dot(make_vector(y, *n, -*incy).reverse())); else if (*incx < 0 && *incy < 0) *res = (make_vector(x, *n, -*incx).reverse().dot(make_vector(y, *n, -*incy).reverse())); - return 0; } // computes a vector-vector dot product without complex conjugation. -int EIGEN_BLAS_FUNC(dotuw)(int *n, RealScalar *px, int *incx, RealScalar *py, int *incy, RealScalar *pres) { +EIGEN_BLAS_FUNC(dotuw)(int *n, RealScalar *px, int *incx, RealScalar *py, int *incy, RealScalar *pres) { Scalar *res = reinterpret_cast<Scalar *>(pres); if (*n <= 0) { *res = Scalar(0); - return 0; + return; } Scalar *x = reinterpret_cast<Scalar *>(px); @@ -108,10 +107,9 @@ *res = (make_vector(x, *n, *incx).cwiseProduct(make_vector(y, *n, -*incy).reverse())).sum(); else if (*incx < 0 && *incy < 0) *res = (make_vector(x, *n, -*incx).reverse().cwiseProduct(make_vector(y, *n, -*incy).reverse())).sum(); - return 0; } -RealScalar EIGEN_CAT(REAL_SCALAR_SUFFIX, EIGEN_BLAS_FUNC(nrm2))(int *n, RealScalar *px, int *incx) { +extern "C" RealScalar EIGEN_CAT(REAL_SCALAR_SUFFIX, EIGEN_BLAS_FUNC_NAME(nrm2))(int *n, RealScalar *px, int *incx) { // std::cerr << "__nrm2 " << *n << " " << *incx << "\n"; if (*n <= 0) return 0; @@ -122,9 +120,9 @@ return make_vector(x, *n, *incx).stableNorm(); } -int EIGEN_BLAS_FUNC(EIGEN_CAT(REAL_SCALAR_SUFFIX, rot))(int *n, RealScalar *px, int *incx, RealScalar *py, int *incy, - RealScalar *pc, RealScalar *ps) { - if (*n <= 0) return 0; +EIGEN_BLAS_FUNC(EIGEN_CAT(REAL_SCALAR_SUFFIX, rot)) +(int *n, RealScalar *px, int *incx, RealScalar *py, int *incy, RealScalar *pc, RealScalar *ps) { + if (*n <= 0) return; Scalar *x = reinterpret_cast<Scalar *>(px); Scalar *y = reinterpret_cast<Scalar *>(py); @@ -144,12 +142,10 @@ internal::apply_rotation_in_the_plane(vx, rvy, JacobiRotation<Scalar>(c, s)); else internal::apply_rotation_in_the_plane(vx, vy, JacobiRotation<Scalar>(c, s)); - - return 0; } -int EIGEN_BLAS_FUNC(EIGEN_CAT(REAL_SCALAR_SUFFIX, scal))(int *n, RealScalar *palpha, RealScalar *px, int *incx) { - if (*n <= 0) return 0; +EIGEN_BLAS_FUNC(EIGEN_CAT(REAL_SCALAR_SUFFIX, scal))(int *n, RealScalar *palpha, RealScalar *px, int *incx) { + if (*n <= 0) return; Scalar *x = reinterpret_cast<Scalar *>(px); RealScalar alpha = *palpha; @@ -160,6 +156,4 @@ make_vector(x, *n) *= alpha; else make_vector(x, *n, std::abs(*incx)) *= alpha; - - return 0; }
diff --git a/blas/level1_impl.h b/blas/level1_impl.h index f1ff3cc..2422d10 100644 --- a/blas/level1_impl.h +++ b/blas/level1_impl.h
@@ -9,13 +9,13 @@ #include "common.h" -int EIGEN_BLAS_FUNC(axpy)(const int *n, const RealScalar *palpha, const RealScalar *px, const int *incx, RealScalar *py, - const int *incy) { +EIGEN_BLAS_FUNC(axpy) +(const int *n, const RealScalar *palpha, const RealScalar *px, const int *incx, RealScalar *py, const int *incy) { const Scalar *x = reinterpret_cast<const Scalar *>(px); Scalar *y = reinterpret_cast<Scalar *>(py); Scalar alpha = *reinterpret_cast<const Scalar *>(palpha); - if (*n <= 0) return 0; + if (*n <= 0) return; if (*incx == 1 && *incy == 1) make_vector(y, *n) += alpha * make_vector(x, *n); @@ -27,12 +27,10 @@ make_vector(y, *n, *incy) += alpha * make_vector(x, *n, -*incx).reverse(); else if (*incx < 0 && *incy < 0) make_vector(y, *n, -*incy).reverse() += alpha * make_vector(x, *n, -*incx).reverse(); - - return 0; } -int EIGEN_BLAS_FUNC(copy)(int *n, RealScalar *px, int *incx, RealScalar *py, int *incy) { - if (*n <= 0) return 0; +EIGEN_BLAS_FUNC(copy)(int *n, RealScalar *px, int *incx, RealScalar *py, int *incy) { + if (*n <= 0) return; Scalar *x = reinterpret_cast<Scalar *>(px); Scalar *y = reinterpret_cast<Scalar *>(py); @@ -49,11 +47,9 @@ y += *incy; } } - - return 0; } -int EIGEN_BLAS_FUNC(rotg)(RealScalar *pa, RealScalar *pb, RealScalar *pc, RealScalar *ps) { +EIGEN_BLAS_FUNC(rotg)(RealScalar *pa, RealScalar *pb, RealScalar *pc, RealScalar *ps) { using std::abs; using std::sqrt; @@ -104,12 +100,10 @@ // r.makeGivens(a,b); // *c = r.c(); // *s = r.s(); - - return 0; } -int EIGEN_BLAS_FUNC(scal)(int *n, RealScalar *palpha, RealScalar *px, int *incx) { - if (*n <= 0) return 0; +EIGEN_BLAS_FUNC(scal)(int *n, RealScalar *palpha, RealScalar *px, int *incx) { + if (*n <= 0) return; Scalar *x = reinterpret_cast<Scalar *>(px); Scalar alpha = *reinterpret_cast<Scalar *>(palpha); @@ -118,12 +112,10 @@ make_vector(x, *n) *= alpha; else make_vector(x, *n, std::abs(*incx)) *= alpha; - - return 0; } -int EIGEN_BLAS_FUNC(swap)(int *n, RealScalar *px, int *incx, RealScalar *py, int *incy) { - if (*n <= 0) return 0; +EIGEN_BLAS_FUNC(swap)(int *n, RealScalar *px, int *incx, RealScalar *py, int *incy) { + if (*n <= 0) return; Scalar *x = reinterpret_cast<Scalar *>(px); Scalar *y = reinterpret_cast<Scalar *>(py); @@ -138,6 +130,4 @@ make_vector(y, *n, *incy).swap(make_vector(x, *n, -*incx).reverse()); else if (*incx < 0 && *incy < 0) make_vector(y, *n, -*incy).reverse().swap(make_vector(x, *n, -*incx).reverse()); - - return 1; }
diff --git a/blas/level1_real_impl.h b/blas/level1_real_impl.h index d5cb573..cd9c189 100644 --- a/blas/level1_real_impl.h +++ b/blas/level1_real_impl.h
@@ -11,7 +11,7 @@ // computes the sum of magnitudes of all vector elements or, for a complex vector x, the sum // res = |Rex1| + |Imx1| + |Rex2| + |Imx2| + ... + |Rexn| + |Imxn|, where x is a vector of order n -RealScalar EIGEN_BLAS_FUNC(asum)(int *n, RealScalar *px, int *incx) { +extern "C" RealScalar EIGEN_BLAS_FUNC_NAME(asum)(int *n, Scalar *px, int *incx) { // std::cerr << "_asum " << *n << " " << *incx << "\n"; Scalar *x = reinterpret_cast<Scalar *>(px); @@ -24,7 +24,7 @@ return make_vector(x, *n, std::abs(*incx)).cwiseAbs().sum(); } -int EIGEN_CAT(i, EIGEN_BLAS_FUNC(amax))(int *n, RealScalar *px, int *incx) { +extern "C" int EIGEN_CAT(i, EIGEN_BLAS_FUNC_NAME(amax))(int *n, Scalar *px, int *incx) { if (*n <= 0) return 0; Scalar *x = reinterpret_cast<Scalar *>(px); @@ -36,7 +36,7 @@ return int(ret) + 1; } -int EIGEN_CAT(i, EIGEN_BLAS_FUNC(amin))(int *n, RealScalar *px, int *incx) { +extern "C" int EIGEN_CAT(i, EIGEN_BLAS_FUNC_NAME(amin))(int *n, Scalar *px, int *incx) { if (*n <= 0) return 0; Scalar *x = reinterpret_cast<Scalar *>(px); @@ -49,7 +49,7 @@ } // computes a vector-vector dot product. -Scalar EIGEN_BLAS_FUNC(dot)(int *n, RealScalar *px, int *incx, RealScalar *py, int *incy) { +extern "C" Scalar EIGEN_BLAS_FUNC_NAME(dot)(int *n, Scalar *px, int *incx, Scalar *py, int *incy) { // std::cerr << "_dot " << *n << " " << *incx << " " << *incy << "\n"; if (*n <= 0) return 0; @@ -73,7 +73,7 @@ // computes the Euclidean norm of a vector. // FIXME -Scalar EIGEN_BLAS_FUNC(nrm2)(int *n, RealScalar *px, int *incx) { +extern "C" Scalar EIGEN_BLAS_FUNC_NAME(nrm2)(int *n, Scalar *px, int *incx) { // std::cerr << "_nrm2 " << *n << " " << *incx << "\n"; if (*n <= 0) return 0; @@ -85,9 +85,9 @@ return make_vector(x, *n, std::abs(*incx)).stableNorm(); } -int EIGEN_BLAS_FUNC(rot)(int *n, RealScalar *px, int *incx, RealScalar *py, int *incy, RealScalar *pc, RealScalar *ps) { +EIGEN_BLAS_FUNC(rot)(int *n, Scalar *px, int *incx, Scalar *py, int *incy, Scalar *pc, Scalar *ps) { // std::cerr << "_rot " << *n << " " << *incx << " " << *incy << "\n"; - if (*n <= 0) return 0; + if (*n <= 0) return; Scalar *x = reinterpret_cast<Scalar *>(px); Scalar *y = reinterpret_cast<Scalar *>(py); @@ -106,13 +106,11 @@ internal::apply_rotation_in_the_plane(vx, rvy, JacobiRotation<Scalar>(c, s)); else internal::apply_rotation_in_the_plane(vx, vy, JacobiRotation<Scalar>(c, s)); - - return 0; } /* // performs rotation of points in the modified plane. -int EIGEN_BLAS_FUNC(rotm)(int *n, RealScalar *px, int *incx, RealScalar *py, int *incy, RealScalar *param) +EIGEN_BLAS_FUNC(rotm)(int *n, Scalar *px, int *incx, Scalar *py, int *incy, Scalar *param) { Scalar* x = reinterpret_cast<Scalar*>(px); Scalar* y = reinterpret_cast<Scalar*>(py); @@ -123,7 +121,7 @@ } // computes the modified parameters for a Givens rotation. -int EIGEN_BLAS_FUNC(rotmg)(RealScalar *d1, RealScalar *d2, RealScalar *x1, RealScalar *x2, RealScalar *param) +EIGEN_BLAS_FUNC(rotmg)(Scalar *d1, Scalar *d2, Scalar *x1, Scalar *x2, Scalar *param) { // TODO
diff --git a/blas/level2_cplx_impl.h b/blas/level2_cplx_impl.h index 8c56a6f..f04dda1 100644 --- a/blas/level2_cplx_impl.h +++ b/blas/level2_cplx_impl.h
@@ -16,9 +16,9 @@ * where alpha and beta are scalars, x and y are n element vectors and * A is an n by n hermitian matrix. */ -int EIGEN_BLAS_FUNC(hemv)(const char *uplo, const int *n, const RealScalar *palpha, const RealScalar *pa, - const int *lda, const RealScalar *px, const int *incx, const RealScalar *pbeta, - RealScalar *py, const int *incy) { +EIGEN_BLAS_FUNC(hemv) +(const char *uplo, const int *n, const RealScalar *palpha, const RealScalar *pa, const int *lda, const RealScalar *px, + const int *incx, const RealScalar *pbeta, RealScalar *py, const int *incy) { typedef void (*functype)(int, const Scalar *, int, const Scalar *, Scalar *, Scalar); static const functype func[2] = { // array index: UP @@ -45,9 +45,9 @@ info = 7; else if (*incy == 0) info = 10; - if (info) return xerbla_(SCALAR_SUFFIX_UP "HEMV ", &info, 6); + if (info) return xerbla_(SCALAR_SUFFIX_UP "HEMV ", &info); - if (*n == 0) return 1; + if (*n == 0) return; const Scalar *actual_x = get_compact_vector(x, *n, *incx); Scalar *actual_y = get_compact_vector(y, *n, *incy); @@ -61,15 +61,13 @@ if (alpha != Scalar(0)) { int code = UPLO(*uplo); - if (code >= 2 || func[code] == 0) return 0; + if (code >= 2 || func[code] == 0) return; func[code](*n, a, *lda, actual_x, actual_y, alpha); } if (actual_x != x) delete[] actual_x; if (actual_y != y) delete[] copy_back(actual_y, y, *n, *incy); - - return 1; } /** ZHBMV performs the matrix-vector operation @@ -79,7 +77,7 @@ * where alpha and beta are scalars, x and y are n element vectors and * A is an n by n hermitian band matrix, with k super-diagonals. */ -// int EIGEN_BLAS_FUNC(hbmv)(char *uplo, int *n, int *k, RealScalar *alpha, RealScalar *a, int *lda, +// EIGEN_BLAS_FUNC(hbmv)(char *uplo, int *n, int *k, RealScalar *alpha, RealScalar *a, int *lda, // RealScalar *x, int *incx, RealScalar *beta, RealScalar *y, int *incy) // { // return 1; @@ -92,7 +90,7 @@ * where alpha and beta are scalars, x and y are n element vectors and * A is an n by n hermitian matrix, supplied in packed form. */ -// int EIGEN_BLAS_FUNC(hpmv)(char *uplo, int *n, RealScalar *alpha, RealScalar *ap, RealScalar *x, int *incx, RealScalar +// EIGEN_BLAS_FUNC(hpmv)(char *uplo, int *n, RealScalar *alpha, RealScalar *ap, RealScalar *x, int *incx, RealScalar // *beta, RealScalar *y, int *incy) // { // return 1; @@ -105,7 +103,7 @@ * where alpha is a real scalar, x is an n element vector and A is an * n by n hermitian matrix, supplied in packed form. */ -int EIGEN_BLAS_FUNC(hpr)(char *uplo, int *n, RealScalar *palpha, RealScalar *px, int *incx, RealScalar *pap) { +EIGEN_BLAS_FUNC(hpr)(char *uplo, int *n, RealScalar *palpha, RealScalar *px, int *incx, RealScalar *pap) { typedef void (*functype)(int, Scalar *, const Scalar *, RealScalar); static const functype func[2] = { // array index: UP @@ -125,20 +123,18 @@ info = 2; else if (*incx == 0) info = 5; - if (info) return xerbla_(SCALAR_SUFFIX_UP "HPR ", &info, 6); + if (info) return xerbla_(SCALAR_SUFFIX_UP "HPR ", &info); - if (alpha == Scalar(0)) return 1; + if (alpha == Scalar(0)) return; Scalar *x_cpy = get_compact_vector(x, *n, *incx); int code = UPLO(*uplo); - if (code >= 2 || func[code] == 0) return 0; + if (code >= 2 || func[code] == 0) return; func[code](*n, ap, x_cpy, alpha); if (x_cpy != x) delete[] x_cpy; - - return 1; } /** ZHPR2 performs the hermitian rank 2 operation @@ -148,8 +144,8 @@ * where alpha is a scalar, x and y are n element vectors and A is an * n by n hermitian matrix, supplied in packed form. */ -int EIGEN_BLAS_FUNC(hpr2)(char *uplo, int *n, RealScalar *palpha, RealScalar *px, int *incx, RealScalar *py, int *incy, - RealScalar *pap) { +EIGEN_BLAS_FUNC(hpr2) +(char *uplo, int *n, RealScalar *palpha, RealScalar *px, int *incx, RealScalar *py, int *incy, RealScalar *pap) { typedef void (*functype)(int, Scalar *, const Scalar *, const Scalar *, Scalar); static const functype func[2] = { // array index: UP @@ -172,22 +168,20 @@ info = 5; else if (*incy == 0) info = 7; - if (info) return xerbla_(SCALAR_SUFFIX_UP "HPR2 ", &info, 6); + if (info) return xerbla_(SCALAR_SUFFIX_UP "HPR2 ", &info); - if (alpha == Scalar(0)) return 1; + if (alpha == Scalar(0)) return; Scalar *x_cpy = get_compact_vector(x, *n, *incx); Scalar *y_cpy = get_compact_vector(y, *n, *incy); int code = UPLO(*uplo); - if (code >= 2 || func[code] == 0) return 0; + if (code >= 2 || func[code] == 0) return; func[code](*n, ap, x_cpy, y_cpy, alpha); if (x_cpy != x) delete[] x_cpy; if (y_cpy != y) delete[] y_cpy; - - return 1; } /** ZHER performs the hermitian rank 1 operation @@ -197,7 +191,7 @@ * where alpha is a real scalar, x is an n element vector and A is an * n by n hermitian matrix. */ -int EIGEN_BLAS_FUNC(her)(char *uplo, int *n, RealScalar *palpha, RealScalar *px, int *incx, RealScalar *pa, int *lda) { +EIGEN_BLAS_FUNC(her)(char *uplo, int *n, RealScalar *palpha, RealScalar *px, int *incx, RealScalar *pa, int *lda) { typedef void (*functype)(int, Scalar *, int, const Scalar *, const Scalar *, const Scalar &); static const functype func[2] = { // array index: UP @@ -219,22 +213,20 @@ info = 5; else if (*lda < std::max(1, *n)) info = 7; - if (info) return xerbla_(SCALAR_SUFFIX_UP "HER ", &info, 6); + if (info) return xerbla_(SCALAR_SUFFIX_UP "HER ", &info); - if (alpha == RealScalar(0)) return 1; + if (alpha == RealScalar(0)) return; Scalar *x_cpy = get_compact_vector(x, *n, *incx); int code = UPLO(*uplo); - if (code >= 2 || func[code] == 0) return 0; + if (code >= 2 || func[code] == 0) return; func[code](*n, a, *lda, x_cpy, x_cpy, alpha); matrix(a, *n, *n, *lda).diagonal().imag().setZero(); if (x_cpy != x) delete[] x_cpy; - - return 1; } /** ZHER2 performs the hermitian rank 2 operation @@ -244,8 +236,9 @@ * where alpha is a scalar, x and y are n element vectors and A is an n * by n hermitian matrix. */ -int EIGEN_BLAS_FUNC(her2)(char *uplo, int *n, RealScalar *palpha, RealScalar *px, int *incx, RealScalar *py, int *incy, - RealScalar *pa, int *lda) { +EIGEN_BLAS_FUNC(her2) +(char *uplo, int *n, RealScalar *palpha, RealScalar *px, int *incx, RealScalar *py, int *incy, RealScalar *pa, + int *lda) { typedef void (*functype)(int, Scalar *, int, const Scalar *, const Scalar *, Scalar); static const functype func[2] = { // array index: UP @@ -270,15 +263,15 @@ info = 7; else if (*lda < std::max(1, *n)) info = 9; - if (info) return xerbla_(SCALAR_SUFFIX_UP "HER2 ", &info, 6); + if (info) return xerbla_(SCALAR_SUFFIX_UP "HER2 ", &info); - if (alpha == Scalar(0)) return 1; + if (alpha == Scalar(0)) return; Scalar *x_cpy = get_compact_vector(x, *n, *incx); Scalar *y_cpy = get_compact_vector(y, *n, *incy); int code = UPLO(*uplo); - if (code >= 2 || func[code] == 0) return 0; + if (code >= 2 || func[code] == 0) return; func[code](*n, a, *lda, x_cpy, y_cpy, alpha); @@ -286,8 +279,6 @@ if (x_cpy != x) delete[] x_cpy; if (y_cpy != y) delete[] y_cpy; - - return 1; } /** ZGERU performs the rank 1 operation @@ -297,8 +288,8 @@ * where alpha is a scalar, x is an m element vector, y is an n element * vector and A is an m by n matrix. */ -int EIGEN_BLAS_FUNC(geru)(int *m, int *n, RealScalar *palpha, RealScalar *px, int *incx, RealScalar *py, int *incy, - RealScalar *pa, int *lda) { +EIGEN_BLAS_FUNC(geru) +(int *m, int *n, RealScalar *palpha, RealScalar *px, int *incx, RealScalar *py, int *incy, RealScalar *pa, int *lda) { Scalar *x = reinterpret_cast<Scalar *>(px); Scalar *y = reinterpret_cast<Scalar *>(py); Scalar *a = reinterpret_cast<Scalar *>(pa); @@ -315,9 +306,9 @@ info = 7; else if (*lda < std::max(1, *m)) info = 9; - if (info) return xerbla_(SCALAR_SUFFIX_UP "GERU ", &info, 6); + if (info) return xerbla_(SCALAR_SUFFIX_UP "GERU ", &info); - if (alpha == Scalar(0)) return 1; + if (alpha == Scalar(0)) return; Scalar *x_cpy = get_compact_vector(x, *m, *incx); Scalar *y_cpy = get_compact_vector(y, *n, *incy); @@ -326,8 +317,6 @@ if (x_cpy != x) delete[] x_cpy; if (y_cpy != y) delete[] y_cpy; - - return 1; } /** ZGERC performs the rank 1 operation @@ -337,8 +326,8 @@ * where alpha is a scalar, x is an m element vector, y is an n element * vector and A is an m by n matrix. */ -int EIGEN_BLAS_FUNC(gerc)(int *m, int *n, RealScalar *palpha, RealScalar *px, int *incx, RealScalar *py, int *incy, - RealScalar *pa, int *lda) { +EIGEN_BLAS_FUNC(gerc) +(int *m, int *n, RealScalar *palpha, RealScalar *px, int *incx, RealScalar *py, int *incy, RealScalar *pa, int *lda) { Scalar *x = reinterpret_cast<Scalar *>(px); Scalar *y = reinterpret_cast<Scalar *>(py); Scalar *a = reinterpret_cast<Scalar *>(pa); @@ -355,9 +344,9 @@ info = 7; else if (*lda < std::max(1, *m)) info = 9; - if (info) return xerbla_(SCALAR_SUFFIX_UP "GERC ", &info, 6); + if (info) return xerbla_(SCALAR_SUFFIX_UP "GERC ", &info); - if (alpha == Scalar(0)) return 1; + if (alpha == Scalar(0)) return; Scalar *x_cpy = get_compact_vector(x, *m, *incx); Scalar *y_cpy = get_compact_vector(y, *n, *incy); @@ -366,6 +355,4 @@ if (x_cpy != x) delete[] x_cpy; if (y_cpy != y) delete[] y_cpy; - - return 1; }
diff --git a/blas/level2_impl.h b/blas/level2_impl.h index 0138a9c..5721ee6 100644 --- a/blas/level2_impl.h +++ b/blas/level2_impl.h
@@ -22,9 +22,9 @@ } }; -int EIGEN_BLAS_FUNC(gemv)(const char *opa, const int *m, const int *n, const RealScalar *palpha, const RealScalar *pa, - const int *lda, const RealScalar *pb, const int *incb, const RealScalar *pbeta, - RealScalar *pc, const int *incc) { +EIGEN_BLAS_FUNC(gemv) +(const char *opa, const int *m, const int *n, const RealScalar *palpha, const RealScalar *pa, const int *lda, + const RealScalar *pb, const int *incb, const RealScalar *pbeta, RealScalar *pc, const int *incc) { typedef void (*functype)(int, int, const Scalar *, int, const Scalar *, int, Scalar *, int, Scalar); static const functype func[4] = {// array index: NOTR (general_matrix_vector_product_wrapper<int, Scalar, ColMajor, false, false>::run), @@ -53,9 +53,9 @@ info = 8; else if (*incc == 0) info = 11; - if (info) return xerbla_(SCALAR_SUFFIX_UP "GEMV ", &info, 6); + if (info) return xerbla_(SCALAR_SUFFIX_UP "GEMV ", &info); - if (*m == 0 || *n == 0 || (alpha == Scalar(0) && beta == Scalar(1))) return 0; + if (*m == 0 || *n == 0 || (alpha == Scalar(0) && beta == Scalar(1))) return; int actual_m = *m; int actual_n = *n; @@ -72,18 +72,17 @@ make_vector(actual_c, actual_m) *= beta; } - if (code >= 4 || func[code] == 0) return 0; + if (code >= 4 || func[code] == 0) return; func[code](actual_m, actual_n, a, *lda, actual_b, 1, actual_c, 1, alpha); if (actual_b != b) delete[] actual_b; if (actual_c != c) delete[] copy_back(actual_c, c, actual_m, *incc); - - return 1; } -int EIGEN_BLAS_FUNC(trsv)(const char *uplo, const char *opa, const char *diag, const int *n, const RealScalar *pa, - const int *lda, RealScalar *pb, const int *incb) { +EIGEN_BLAS_FUNC(trsv) +(const char *uplo, const char *opa, const char *diag, const int *n, const RealScalar *pa, const int *lda, + RealScalar *pb, const int *incb) { typedef void (*functype)(int, const Scalar *, int, Scalar *); static const functype func[16] = { // array index: NOTR | (UP << 2) | (NUNIT << 3) @@ -127,7 +126,7 @@ info = 6; else if (*incb == 0) info = 8; - if (info) return xerbla_(SCALAR_SUFFIX_UP "TRSV ", &info, 6); + if (info) return xerbla_(SCALAR_SUFFIX_UP "TRSV ", &info); Scalar *actual_b = get_compact_vector(b, *n, *incb); @@ -135,12 +134,11 @@ func[code](*n, a, *lda, actual_b); if (actual_b != b) delete[] copy_back(actual_b, b, *n, *incb); - - return 0; } -int EIGEN_BLAS_FUNC(trmv)(const char *uplo, const char *opa, const char *diag, const int *n, const RealScalar *pa, - const int *lda, RealScalar *pb, const int *incb) { +EIGEN_BLAS_FUNC(trmv) +(const char *uplo, const char *opa, const char *diag, const int *n, const RealScalar *pa, const int *lda, + RealScalar *pb, const int *incb) { typedef void (*functype)(int, int, const Scalar *, int, const Scalar *, int, Scalar *, int, const Scalar &); static const functype func[16] = { // array index: NOTR | (UP << 2) | (NUNIT << 3) @@ -186,23 +184,21 @@ info = 6; else if (*incb == 0) info = 8; - if (info) return xerbla_(SCALAR_SUFFIX_UP "TRMV ", &info, 6); + if (info) return xerbla_(SCALAR_SUFFIX_UP "TRMV ", &info); - if (*n == 0) return 1; + if (*n == 0) return; Scalar *actual_b = get_compact_vector(b, *n, *incb); Matrix<Scalar, Dynamic, 1> res(*n); res.setZero(); int code = OP(*opa) | (UPLO(*uplo) << 2) | (DIAG(*diag) << 3); - if (code >= 16 || func[code] == 0) return 0; + if (code >= 16 || func[code] == 0) return; func[code](*n, *n, a, *lda, actual_b, 1, res.data(), 1, Scalar(1)); copy_back(res.data(), b, *n, *incb); if (actual_b != b) delete[] actual_b; - - return 1; } /** GBMV performs one of the matrix-vector operations @@ -212,8 +208,9 @@ * where alpha and beta are scalars, x and y are vectors and A is an * m by n band matrix, with kl sub-diagonals and ku super-diagonals. */ -int EIGEN_BLAS_FUNC(gbmv)(char *trans, int *m, int *n, int *kl, int *ku, RealScalar *palpha, RealScalar *pa, int *lda, - RealScalar *px, int *incx, RealScalar *pbeta, RealScalar *py, int *incy) { +EIGEN_BLAS_FUNC(gbmv) +(char *trans, int *m, int *n, int *kl, int *ku, RealScalar *palpha, RealScalar *pa, int *lda, RealScalar *px, int *incx, + RealScalar *pbeta, RealScalar *py, int *incy) { const Scalar *a = reinterpret_cast<const Scalar *>(pa); const Scalar *x = reinterpret_cast<const Scalar *>(px); Scalar *y = reinterpret_cast<Scalar *>(py); @@ -238,9 +235,9 @@ info = 10; else if (*incy == 0) info = 13; - if (info) return xerbla_(SCALAR_SUFFIX_UP "GBMV ", &info, 6); + if (info) return xerbla_(SCALAR_SUFFIX_UP "GBMV ", &info); - if (*m == 0 || *n == 0 || (alpha == Scalar(0) && beta == Scalar(1))) return 0; + if (*m == 0 || *n == 0 || (alpha == Scalar(0) && beta == Scalar(1))) return; int actual_m = *m; int actual_n = *n; @@ -276,8 +273,6 @@ if (actual_x != x) delete[] actual_x; if (actual_y != y) delete[] copy_back(actual_y, y, actual_m, *incy); - - return 0; } #if 0 @@ -288,7 +283,7 @@ * where x is an n element vector and A is an n by n unit, or non-unit, * upper or lower triangular band matrix, with ( k + 1 ) diagonals. */ -int EIGEN_BLAS_FUNC(tbmv)(char *uplo, char *opa, char *diag, int *n, int *k, RealScalar *pa, int *lda, RealScalar *px, int *incx) +EIGEN_BLAS_FUNC(tbmv)(char *uplo, char *opa, char *diag, int *n, int *k, RealScalar *pa, int *lda, RealScalar *px, int *incx) { Scalar* a = reinterpret_cast<Scalar*>(pa); Scalar* x = reinterpret_cast<Scalar*>(px); @@ -305,8 +300,7 @@ if(info) return xerbla_(SCALAR_SUFFIX_UP"TBMV ",&info,6); - if(*n==0) - return 0; + if(*n==0) return; int actual_n = *n; @@ -334,8 +328,6 @@ if(actual_x!=x) delete[] actual_x; if(actual_y!=y) delete[] copy_back(actual_y,y,actual_m,*incy); - - return 0; } #endif @@ -350,8 +342,8 @@ * No test for singularity or near-singularity is included in this * routine. Such tests must be performed before calling this routine. */ -int EIGEN_BLAS_FUNC(tbsv)(char *uplo, char *op, char *diag, int *n, int *k, RealScalar *pa, int *lda, RealScalar *px, - int *incx) { +EIGEN_BLAS_FUNC(tbsv) +(char *uplo, char *op, char *diag, int *n, int *k, RealScalar *pa, int *lda, RealScalar *px, int *incx) { typedef void (*functype)(int, int, const Scalar *, int, Scalar *); static const functype func[16] = { // array index: NOTR | (UP << 2) | (NUNIT << 3) @@ -403,22 +395,20 @@ info = 7; else if (*incx == 0) info = 9; - if (info) return xerbla_(SCALAR_SUFFIX_UP "TBSV ", &info, 6); + if (info) return xerbla_(SCALAR_SUFFIX_UP "TBSV ", &info); - if (*n == 0 || (*k == 0 && DIAG(*diag) == UNIT)) return 0; + if (*n == 0 || (*k == 0 && DIAG(*diag) == UNIT)) return; int actual_n = *n; Scalar *actual_x = get_compact_vector(x, actual_n, *incx); int code = OP(*op) | (UPLO(*uplo) << 2) | (DIAG(*diag) << 3); - if (code >= 16 || func[code] == 0) return 0; + if (code >= 16 || func[code] == 0) return; func[code](*n, *k, a, *lda, actual_x); if (actual_x != x) delete[] copy_back(actual_x, x, actual_n, *incx); - - return 0; } /** DTPMV performs one of the matrix-vector operations @@ -428,7 +418,7 @@ * where x is an n element vector and A is an n by n unit, or non-unit, * upper or lower triangular matrix, supplied in packed form. */ -int EIGEN_BLAS_FUNC(tpmv)(char *uplo, char *opa, char *diag, int *n, RealScalar *pap, RealScalar *px, int *incx) { +EIGEN_BLAS_FUNC(tpmv)(char *uplo, char *opa, char *diag, int *n, RealScalar *pap, RealScalar *px, int *incx) { typedef void (*functype)(int, const Scalar *, const Scalar *, Scalar *, Scalar); static const functype func[16] = { // array index: NOTR | (UP << 2) | (NUNIT << 3) @@ -480,23 +470,21 @@ info = 4; else if (*incx == 0) info = 7; - if (info) return xerbla_(SCALAR_SUFFIX_UP "TPMV ", &info, 6); + if (info) return xerbla_(SCALAR_SUFFIX_UP "TPMV ", &info); - if (*n == 0) return 1; + if (*n == 0) return; Scalar *actual_x = get_compact_vector(x, *n, *incx); Matrix<Scalar, Dynamic, 1> res(*n); res.setZero(); int code = OP(*opa) | (UPLO(*uplo) << 2) | (DIAG(*diag) << 3); - if (code >= 16 || func[code] == 0) return 0; + if (code >= 16 || func[code] == 0) return; func[code](*n, ap, actual_x, res.data(), Scalar(1)); copy_back(res.data(), x, *n, *incx); if (actual_x != x) delete[] actual_x; - - return 1; } /** DTPSV solves one of the systems of equations @@ -509,7 +497,7 @@ * No test for singularity or near-singularity is included in this * routine. Such tests must be performed before calling this routine. */ -int EIGEN_BLAS_FUNC(tpsv)(char *uplo, char *opa, char *diag, int *n, RealScalar *pap, RealScalar *px, int *incx) { +EIGEN_BLAS_FUNC(tpsv)(char *uplo, char *opa, char *diag, int *n, RealScalar *pap, RealScalar *px, int *incx) { typedef void (*functype)(int, const Scalar *, Scalar *); static const functype func[16] = { // array index: NOTR | (UP << 2) | (NUNIT << 3) @@ -557,7 +545,7 @@ info = 4; else if (*incx == 0) info = 7; - if (info) return xerbla_(SCALAR_SUFFIX_UP "TPSV ", &info, 6); + if (info) return xerbla_(SCALAR_SUFFIX_UP "TPSV ", &info); Scalar *actual_x = get_compact_vector(x, *n, *incx); @@ -565,6 +553,4 @@ func[code](*n, ap, actual_x); if (actual_x != x) delete[] copy_back(actual_x, x, *n, *incx); - - return 1; }
diff --git a/blas/level2_real_impl.h b/blas/level2_real_impl.h index 29f5a56..5653767 100644 --- a/blas/level2_real_impl.h +++ b/blas/level2_real_impl.h
@@ -10,9 +10,9 @@ #include "common.h" // y = alpha*A*x + beta*y -int EIGEN_BLAS_FUNC(symv)(const char *uplo, const int *n, const RealScalar *palpha, const RealScalar *pa, - const int *lda, const RealScalar *px, const int *incx, const RealScalar *pbeta, - RealScalar *py, const int *incy) { +EIGEN_BLAS_FUNC(symv) +(const char *uplo, const int *n, const RealScalar *palpha, const RealScalar *pa, const int *lda, const RealScalar *px, + const int *incx, const RealScalar *pbeta, RealScalar *py, const int *incy) { typedef void (*functype)(int, const Scalar *, int, const Scalar *, Scalar *, Scalar); static const functype func[2] = { // array index: UP @@ -39,9 +39,9 @@ info = 7; else if (*incy == 0) info = 10; - if (info) return xerbla_(SCALAR_SUFFIX_UP "SYMV ", &info, 6); + if (info) return xerbla_(SCALAR_SUFFIX_UP "SYMV ", &info); - if (*n == 0) return 0; + if (*n == 0) return; const Scalar *actual_x = get_compact_vector(x, *n, *incx); Scalar *actual_y = get_compact_vector(y, *n, *incy); @@ -54,19 +54,18 @@ } int code = UPLO(*uplo); - if (code >= 2 || func[code] == 0) return 0; + if (code >= 2 || func[code] == 0) return; func[code](*n, a, *lda, actual_x, actual_y, alpha); if (actual_x != x) delete[] actual_x; if (actual_y != y) delete[] copy_back(actual_y, y, *n, *incy); - - return 1; } // C := alpha*x*x' + C -int EIGEN_BLAS_FUNC(syr)(const char *uplo, const int *n, const RealScalar *palpha, const RealScalar *px, - const int *incx, RealScalar *pc, const int *ldc) { +EIGEN_BLAS_FUNC(syr) +(const char *uplo, const int *n, const RealScalar *palpha, const RealScalar *px, const int *incx, RealScalar *pc, + const int *ldc) { typedef void (*functype)(int, Scalar *, int, const Scalar *, const Scalar *, const Scalar &); static const functype func[2] = { // array index: UP @@ -88,26 +87,25 @@ info = 5; else if (*ldc < std::max(1, *n)) info = 7; - if (info) return xerbla_(SCALAR_SUFFIX_UP "SYR ", &info, 6); + if (info) return xerbla_(SCALAR_SUFFIX_UP "SYR ", &info); - if (*n == 0 || alpha == Scalar(0)) return 1; + if (*n == 0 || alpha == Scalar(0)) return; // if the increment is not 1, let's copy it to a temporary vector to enable vectorization const Scalar *x_cpy = get_compact_vector(x, *n, *incx); int code = UPLO(*uplo); - if (code >= 2 || func[code] == 0) return 0; + if (code >= 2 || func[code] == 0) return; func[code](*n, c, *ldc, x_cpy, x_cpy, alpha); if (x_cpy != x) delete[] x_cpy; - - return 1; } // C := alpha*x*y' + alpha*y*x' + C -int EIGEN_BLAS_FUNC(syr2)(const char *uplo, const int *n, const RealScalar *palpha, const RealScalar *px, - const int *incx, const RealScalar *py, const int *incy, RealScalar *pc, const int *ldc) { +EIGEN_BLAS_FUNC(syr2) +(const char *uplo, const int *n, const RealScalar *palpha, const RealScalar *px, const int *incx, const RealScalar *py, + const int *incy, RealScalar *pc, const int *ldc) { typedef void (*functype)(int, Scalar *, int, const Scalar *, const Scalar *, Scalar); static const functype func[2] = { // array index: UP @@ -132,15 +130,15 @@ info = 7; else if (*ldc < std::max(1, *n)) info = 9; - if (info) return xerbla_(SCALAR_SUFFIX_UP "SYR2 ", &info, 6); + if (info) return xerbla_(SCALAR_SUFFIX_UP "SYR2 ", &info); - if (alpha == Scalar(0)) return 1; + if (alpha == Scalar(0)) return; const Scalar *x_cpy = get_compact_vector(x, *n, *incx); const Scalar *y_cpy = get_compact_vector(y, *n, *incy); int code = UPLO(*uplo); - if (code >= 2 || func[code] == 0) return 0; + if (code >= 2 || func[code] == 0) return; func[code](*n, c, *ldc, x_cpy, y_cpy, alpha); @@ -152,7 +150,6 @@ // return 0; // func[code](*n, a, *inca, b, *incb, c, *ldc, alpha); - return 1; } /** DSBMV performs the matrix-vector operation @@ -162,7 +159,7 @@ * where alpha and beta are scalars, x and y are n element vectors and * A is an n by n symmetric band matrix, with k super-diagonals. */ -// int EIGEN_BLAS_FUNC(sbmv)( char *uplo, int *n, int *k, RealScalar *alpha, RealScalar *a, int *lda, +// EIGEN_BLAS_FUNC(sbmv)( char *uplo, int *n, int *k, RealScalar *alpha, RealScalar *a, int *lda, // RealScalar *x, int *incx, RealScalar *beta, RealScalar *y, int *incy) // { // return 1; @@ -176,7 +173,7 @@ * A is an n by n symmetric matrix, supplied in packed form. * */ -// int EIGEN_BLAS_FUNC(spmv)(char *uplo, int *n, RealScalar *alpha, RealScalar *ap, RealScalar *x, int *incx, RealScalar +// EIGEN_BLAS_FUNC(spmv)(char *uplo, int *n, RealScalar *alpha, RealScalar *ap, RealScalar *x, int *incx, RealScalar // *beta, RealScalar *y, int *incy) // { // return 1; @@ -189,7 +186,7 @@ * where alpha is a real scalar, x is an n element vector and A is an * n by n symmetric matrix, supplied in packed form. */ -int EIGEN_BLAS_FUNC(spr)(char *uplo, int *n, Scalar *palpha, Scalar *px, int *incx, Scalar *pap) { +EIGEN_BLAS_FUNC(spr)(char *uplo, int *n, Scalar *palpha, Scalar *px, int *incx, Scalar *pap) { typedef void (*functype)(int, Scalar *, const Scalar *, Scalar); static const functype func[2] = { // array index: UP @@ -209,20 +206,18 @@ info = 2; else if (*incx == 0) info = 5; - if (info) return xerbla_(SCALAR_SUFFIX_UP "SPR ", &info, 6); + if (info) return xerbla_(SCALAR_SUFFIX_UP "SPR ", &info); - if (alpha == Scalar(0)) return 1; + if (alpha == Scalar(0)) return; Scalar *x_cpy = get_compact_vector(x, *n, *incx); int code = UPLO(*uplo); - if (code >= 2 || func[code] == 0) return 0; + if (code >= 2 || func[code] == 0) return; func[code](*n, ap, x_cpy, alpha); if (x_cpy != x) delete[] x_cpy; - - return 1; } /** DSPR2 performs the symmetric rank 2 operation @@ -232,8 +227,8 @@ * where alpha is a scalar, x and y are n element vectors and A is an * n by n symmetric matrix, supplied in packed form. */ -int EIGEN_BLAS_FUNC(spr2)(char *uplo, int *n, RealScalar *palpha, RealScalar *px, int *incx, RealScalar *py, int *incy, - RealScalar *pap) { +EIGEN_BLAS_FUNC(spr2) +(char *uplo, int *n, RealScalar *palpha, RealScalar *px, int *incx, RealScalar *py, int *incy, RealScalar *pap) { typedef void (*functype)(int, Scalar *, const Scalar *, const Scalar *, Scalar); static const functype func[2] = { // array index: UP @@ -256,22 +251,20 @@ info = 5; else if (*incy == 0) info = 7; - if (info) return xerbla_(SCALAR_SUFFIX_UP "SPR2 ", &info, 6); + if (info) return xerbla_(SCALAR_SUFFIX_UP "SPR2 ", &info); - if (alpha == Scalar(0)) return 1; + if (alpha == Scalar(0)) return; Scalar *x_cpy = get_compact_vector(x, *n, *incx); Scalar *y_cpy = get_compact_vector(y, *n, *incy); int code = UPLO(*uplo); - if (code >= 2 || func[code] == 0) return 0; + if (code >= 2 || func[code] == 0) return; func[code](*n, ap, x_cpy, y_cpy, alpha); if (x_cpy != x) delete[] x_cpy; if (y_cpy != y) delete[] y_cpy; - - return 1; } /** DGER performs the rank 1 operation @@ -281,8 +274,8 @@ * where alpha is a scalar, x is an m element vector, y is an n element * vector and A is an m by n matrix. */ -int EIGEN_BLAS_FUNC(ger)(int *m, int *n, Scalar *palpha, Scalar *px, int *incx, Scalar *py, int *incy, Scalar *pa, - int *lda) { +EIGEN_BLAS_FUNC(ger) +(int *m, int *n, Scalar *palpha, Scalar *px, int *incx, Scalar *py, int *incy, Scalar *pa, int *lda) { Scalar *x = reinterpret_cast<Scalar *>(px); Scalar *y = reinterpret_cast<Scalar *>(py); Scalar *a = reinterpret_cast<Scalar *>(pa); @@ -299,9 +292,9 @@ info = 7; else if (*lda < std::max(1, *m)) info = 9; - if (info) return xerbla_(SCALAR_SUFFIX_UP "GER ", &info, 6); + if (info) return xerbla_(SCALAR_SUFFIX_UP "GER ", &info); - if (alpha == Scalar(0)) return 1; + if (alpha == Scalar(0)) return; Scalar *x_cpy = get_compact_vector(x, *m, *incx); Scalar *y_cpy = get_compact_vector(y, *n, *incy); @@ -310,6 +303,4 @@ if (x_cpy != x) delete[] x_cpy; if (y_cpy != y) delete[] y_cpy; - - return 1; }
diff --git a/blas/level3_impl.h b/blas/level3_impl.h index 5af5027..a6ddf26 100644 --- a/blas/level3_impl.h +++ b/blas/level3_impl.h
@@ -9,13 +9,14 @@ #include <iostream> #include "common.h" -int EIGEN_BLAS_FUNC(gemm)(const char *opa, const char *opb, const int *m, const int *n, const int *k, - const RealScalar *palpha, const RealScalar *pa, const int *lda, const RealScalar *pb, - const int *ldb, const RealScalar *pbeta, RealScalar *pc, const int *ldc) { +EIGEN_BLAS_FUNC(gemm) +(const char *opa, const char *opb, const int *m, const int *n, const int *k, const RealScalar *palpha, + const RealScalar *pa, const int *lda, const RealScalar *pb, const int *ldb, const RealScalar *pbeta, RealScalar *pc, + const int *ldc) { // std::cerr << "in gemm " << *opa << " " << *opb << " " << *m << " " << *n << " " << *k << " " << *lda << " " << // *ldb << " " << *ldc << " " << *palpha << " " << *pbeta << "\n"; typedef void (*functype)(DenseIndex, DenseIndex, DenseIndex, const Scalar *, DenseIndex, const Scalar *, DenseIndex, - Scalar *, DenseIndex, DenseIndex, Scalar, internal::level3_blocking<Scalar, Scalar> &, + Scalar *, DenseIndex, DenseIndex, Scalar, Eigen::internal::level3_blocking<Scalar, Scalar> &, Eigen::internal::GemmParallelInfo<DenseIndex> *); static const functype func[12] = { // array index: NOTR | (NOTR << 2) @@ -72,9 +73,9 @@ info = 10; else if (*ldc < std::max(1, *m)) info = 13; - if (info) return xerbla_(SCALAR_SUFFIX_UP "GEMM ", &info, 6); + if (info) return xerbla_(SCALAR_SUFFIX_UP "GEMM ", &info); - if (*m == 0 || *n == 0) return 0; + if (*m == 0 || *n == 0) return; if (beta != Scalar(1)) { if (beta == Scalar(0)) @@ -83,22 +84,21 @@ matrix(c, *m, *n, *ldc) *= beta; } - if (*k == 0) return 0; + if (*k == 0) return; internal::gemm_blocking_space<ColMajor, Scalar, Scalar, Dynamic, Dynamic, Dynamic> blocking(*m, *n, *k, 1, true); int code = OP(*opa) | (OP(*opb) << 2); func[code](*m, *n, *k, a, *lda, b, *ldb, c, 1, *ldc, alpha, blocking, 0); - return 0; } -int EIGEN_BLAS_FUNC(trsm)(const char *side, const char *uplo, const char *opa, const char *diag, const int *m, - const int *n, const RealScalar *palpha, const RealScalar *pa, const int *lda, RealScalar *pb, - const int *ldb) { +EIGEN_BLAS_FUNC(trsm) +(const char *side, const char *uplo, const char *opa, const char *diag, const int *m, const int *n, + const RealScalar *palpha, const RealScalar *pa, const int *lda, RealScalar *pb, const int *ldb) { // std::cerr << "in trsm " << *side << " " << *uplo << " " << *opa << " " << *diag << " " << *m << "," << *n << " " // << *palpha << " " << *lda << " " << *ldb<< "\n"; typedef void (*functype)(DenseIndex, DenseIndex, const Scalar *, DenseIndex, Scalar *, DenseIndex, DenseIndex, - internal::level3_blocking<Scalar, Scalar> &); + Eigen::internal::level3_blocking<Scalar, Scalar> &); static const functype func[32] = { // array index: NOTR | (LEFT << 2) | (UP << 3) | (NUNIT << 4) (internal::triangular_solve_matrix<Scalar, DenseIndex, OnTheLeft, Upper | 0, false, ColMajor, ColMajor, 1>::run), @@ -190,9 +190,9 @@ info = 9; else if (*ldb < std::max(1, *m)) info = 11; - if (info) return xerbla_(SCALAR_SUFFIX_UP "TRSM ", &info, 6); + if (info) return xerbla_(SCALAR_SUFFIX_UP "TRSM ", &info); - if (*m == 0 || *n == 0) return 0; + if (*m == 0 || *n == 0) return; int code = OP(*opa) | (SIDE(*side) << 2) | (UPLO(*uplo) << 3) | (DIAG(*diag) << 4); @@ -207,15 +207,13 @@ } if (alpha != Scalar(1)) matrix(b, *m, *n, *ldb) *= alpha; - - return 0; } // b = alpha*op(a)*b for side = 'L'or'l' // b = alpha*b*op(a) for side = 'R'or'r' -int EIGEN_BLAS_FUNC(trmm)(const char *side, const char *uplo, const char *opa, const char *diag, const int *m, - const int *n, const RealScalar *palpha, const RealScalar *pa, const int *lda, RealScalar *pb, - const int *ldb) { +EIGEN_BLAS_FUNC(trmm) +(const char *side, const char *uplo, const char *opa, const char *diag, const int *m, const int *n, + const RealScalar *palpha, const RealScalar *pa, const int *lda, RealScalar *pb, const int *ldb) { // std::cerr << "in trmm " << *side << " " << *uplo << " " << *opa << " " << *diag << " " << *m << " " << *n << " " // << *lda << " " << *ldb << " " << *palpha << "\n"; typedef void (*functype)(DenseIndex, DenseIndex, DenseIndex, const Scalar *, DenseIndex, const Scalar *, DenseIndex, @@ -324,11 +322,11 @@ info = 9; else if (*ldb < std::max(1, *m)) info = 11; - if (info) return xerbla_(SCALAR_SUFFIX_UP "TRMM ", &info, 6); + if (info) return xerbla_(SCALAR_SUFFIX_UP "TRMM ", &info); int code = OP(*opa) | (SIDE(*side) << 2) | (UPLO(*uplo) << 3) | (DIAG(*diag) << 4); - if (*m == 0 || *n == 0) return 1; + if (*m == 0 || *n == 0) return; // FIXME find a way to avoid this copy Matrix<Scalar, Dynamic, Dynamic, ColMajor> tmp = matrix(b, *m, *n, *ldb); @@ -343,14 +341,13 @@ false); func[code](*m, *n, *n, tmp.data(), tmp.outerStride(), a, *lda, b, 1, *ldb, alpha, blocking); } - return 1; } // c = alpha*a*b + beta*c for side = 'L'or'l' // c = alpha*b*a + beta*c for side = 'R'or'r -int EIGEN_BLAS_FUNC(symm)(const char *side, const char *uplo, const int *m, const int *n, const RealScalar *palpha, - const RealScalar *pa, const int *lda, const RealScalar *pb, const int *ldb, - const RealScalar *pbeta, RealScalar *pc, const int *ldc) { +EIGEN_BLAS_FUNC(symm) +(const char *side, const char *uplo, const int *m, const int *n, const RealScalar *palpha, const RealScalar *pa, + const int *lda, const RealScalar *pb, const int *ldb, const RealScalar *pbeta, RealScalar *pc, const int *ldc) { // std::cerr << "in symm " << *side << " " << *uplo << " " << *m << "x" << *n << " lda:" << *lda << " ldb:" << *ldb // << " ldc:" << *ldc << " alpha:" << *palpha << " beta:" << *pbeta << "\n"; const Scalar *a = reinterpret_cast<const Scalar *>(pa); @@ -374,7 +371,7 @@ info = 9; else if (*ldc < std::max(1, *m)) info = 12; - if (info) return xerbla_(SCALAR_SUFFIX_UP "SYMM ", &info, 6); + if (info) return xerbla_(SCALAR_SUFFIX_UP "SYMM ", &info); if (beta != Scalar(1)) { if (beta == Scalar(0)) @@ -383,9 +380,7 @@ matrix(c, *m, *n, *ldc) *= beta; } - if (*m == 0 || *n == 0) { - return 1; - } + if (*m == 0 || *n == 0) return; int size = (SIDE(*side) == LEFT) ? (*m) : (*n); #if ISCOMPLEX @@ -413,7 +408,7 @@ internal::product_selfadjoint_matrix<Scalar, DenseIndex, ColMajor, true, false, ColMajor, false, false, ColMajor, 1>::run(*m, *n, a, *lda, b, *ldb, c, 1, *ldc, alpha, blocking); else - return 0; + return; else if (SIDE(*side) == RIGHT) if (UPLO(*uplo) == UP) internal::product_selfadjoint_matrix<Scalar, DenseIndex, ColMajor, false, false, RowMajor, true, false, ColMajor, @@ -422,19 +417,17 @@ internal::product_selfadjoint_matrix<Scalar, DenseIndex, ColMajor, false, false, ColMajor, true, false, ColMajor, 1>::run(*m, *n, b, *ldb, a, *lda, c, 1, *ldc, alpha, blocking); else - return 0; + return; else - return 0; + return; #endif - - return 0; } // c = alpha*a*a' + beta*c for op = 'N'or'n' // c = alpha*a'*a + beta*c for op = 'T'or't','C'or'c' -int EIGEN_BLAS_FUNC(syrk)(const char *uplo, const char *op, const int *n, const int *k, const RealScalar *palpha, - const RealScalar *pa, const int *lda, const RealScalar *pbeta, RealScalar *pc, - const int *ldc) { +EIGEN_BLAS_FUNC(syrk) +(const char *uplo, const char *op, const int *n, const int *k, const RealScalar *palpha, const RealScalar *pa, + const int *lda, const RealScalar *pbeta, RealScalar *pc, const int *ldc) { // std::cerr << "in syrk " << *uplo << " " << *op << " " << *n << " " << *k << " " << *palpha << " " << *lda << " " // << *pbeta << " " << *ldc << "\n"; #if !ISCOMPLEX @@ -481,7 +474,7 @@ info = 7; else if (*ldc < std::max(1, *n)) info = 10; - if (info) return xerbla_(SCALAR_SUFFIX_UP "SYRK ", &info, 6); + if (info) return xerbla_(SCALAR_SUFFIX_UP "SYRK ", &info); if (beta != Scalar(1)) { if (UPLO(*uplo) == UP) @@ -495,7 +488,7 @@ matrix(c, *n, *n, *ldc).triangularView<Lower>() *= beta; } - if (*n == 0 || *k == 0) return 0; + if (*n == 0 || *k == 0) return; #if ISCOMPLEX // FIXME add support for symmetric complex matrix @@ -520,15 +513,13 @@ int code = OP(*op) | (UPLO(*uplo) << 2); func[code](*n, *k, a, *lda, a, *lda, c, 1, *ldc, alpha, blocking); #endif - - return 0; } // c = alpha*a*b' + alpha*b*a' + beta*c for op = 'N'or'n' // c = alpha*a'*b + alpha*b'*a + beta*c for op = 'T'or't' -int EIGEN_BLAS_FUNC(syr2k)(const char *uplo, const char *op, const int *n, const int *k, const RealScalar *palpha, - const RealScalar *pa, const int *lda, const RealScalar *pb, const int *ldb, - const RealScalar *pbeta, RealScalar *pc, const int *ldc) { +EIGEN_BLAS_FUNC(syr2k) +(const char *uplo, const char *op, const int *n, const int *k, const RealScalar *palpha, const RealScalar *pa, + const int *lda, const RealScalar *pb, const int *ldb, const RealScalar *pbeta, RealScalar *pc, const int *ldc) { const Scalar *a = reinterpret_cast<const Scalar *>(pa); const Scalar *b = reinterpret_cast<const Scalar *>(pb); Scalar *c = reinterpret_cast<Scalar *>(pc); @@ -553,7 +544,7 @@ info = 9; else if (*ldc < std::max(1, *n)) info = 12; - if (info) return xerbla_(SCALAR_SUFFIX_UP "SYR2K", &info, 6); + if (info) return xerbla_(SCALAR_SUFFIX_UP "SYR2K", &info); if (beta != Scalar(1)) { if (UPLO(*uplo) == UP) @@ -567,7 +558,7 @@ matrix(c, *n, *n, *ldc).triangularView<Lower>() *= beta; } - if (*k == 0) return 1; + if (*k == 0) return; if (OP(*op) == NOTR) { if (UPLO(*uplo) == UP) { @@ -588,17 +579,15 @@ alpha * matrix(a, *k, *n, *lda).transpose() * matrix(b, *k, *n, *ldb) + alpha * matrix(b, *k, *n, *ldb).transpose() * matrix(a, *k, *n, *lda); } - - return 0; } #if ISCOMPLEX // c = alpha*a*b + beta*c for side = 'L'or'l' // c = alpha*b*a + beta*c for side = 'R'or'r -int EIGEN_BLAS_FUNC(hemm)(const char *side, const char *uplo, const int *m, const int *n, const RealScalar *palpha, - const RealScalar *pa, const int *lda, const RealScalar *pb, const int *ldb, - const RealScalar *pbeta, RealScalar *pc, const int *ldc) { +EIGEN_BLAS_FUNC(hemm) +(const char *side, const char *uplo, const int *m, const int *n, const RealScalar *palpha, const RealScalar *pa, + const int *lda, const RealScalar *pb, const int *ldb, const RealScalar *pbeta, RealScalar *pc, const int *ldc) { const Scalar *a = reinterpret_cast<const Scalar *>(pa); const Scalar *b = reinterpret_cast<const Scalar *>(pb); Scalar *c = reinterpret_cast<Scalar *>(pc); @@ -623,16 +612,14 @@ info = 9; else if (*ldc < std::max(1, *m)) info = 12; - if (info) return xerbla_(SCALAR_SUFFIX_UP "HEMM ", &info, 6); + if (info) return xerbla_(SCALAR_SUFFIX_UP "HEMM ", &info); if (beta == Scalar(0)) matrix(c, *m, *n, *ldc).setZero(); else if (beta != Scalar(1)) matrix(c, *m, *n, *ldc) *= beta; - if (*m == 0 || *n == 0) { - return 1; - } + if (*m == 0 || *n == 0) return; int size = (SIDE(*side) == LEFT) ? (*m) : (*n); internal::gemm_blocking_space<ColMajor, Scalar, Scalar, Dynamic, Dynamic, Dynamic> blocking(*m, *n, size, 1, false); @@ -645,7 +632,7 @@ internal::product_selfadjoint_matrix<Scalar, DenseIndex, ColMajor, true, false, ColMajor, false, false, ColMajor, 1>::run(*m, *n, a, *lda, b, *ldb, c, 1, *ldc, alpha, blocking); else - return 0; + return; } else if (SIDE(*side) == RIGHT) { if (UPLO(*uplo) == UP) matrix(c, *m, *n, *ldc) += @@ -658,24 +645,22 @@ internal::product_selfadjoint_matrix<Scalar, DenseIndex, ColMajor, false, false, ColMajor, true, false, ColMajor, 1>::run(*m, *n, b, *ldb, a, *lda, c, 1, *ldc, alpha, blocking); else - return 0; + return; } else { - return 0; + return; } - - return 0; } // c = alpha*a*conj(a') + beta*c for op = 'N'or'n' // c = alpha*conj(a')*a + beta*c for op = 'C'or'c' -int EIGEN_BLAS_FUNC(herk)(const char *uplo, const char *op, const int *n, const int *k, const RealScalar *palpha, - const RealScalar *pa, const int *lda, const RealScalar *pbeta, RealScalar *pc, - const int *ldc) { +EIGEN_BLAS_FUNC(herk) +(const char *uplo, const char *op, const int *n, const int *k, const RealScalar *palpha, const RealScalar *pa, + const int *lda, const RealScalar *pbeta, RealScalar *pc, const int *ldc) { // std::cerr << "in herk " << *uplo << " " << *op << " " << *n << " " << *k << " " << *palpha << " " << *lda << " " // << *pbeta << " " << *ldc << "\n"; typedef void (*functype)(DenseIndex, DenseIndex, const Scalar *, DenseIndex, const Scalar *, DenseIndex, Scalar *, - DenseIndex, DenseIndex, const Scalar &, internal::level3_blocking<Scalar, Scalar> &); + DenseIndex, DenseIndex, const Scalar &, Eigen::internal::level3_blocking<Scalar, Scalar> &); static const functype func[8] = { // array index: NOTR | (UP << 2) (internal::general_matrix_matrix_triangular_product<DenseIndex, Scalar, ColMajor, false, Scalar, RowMajor, Conj, @@ -715,7 +700,7 @@ info = 7; else if (*ldc < std::max(1, *n)) info = 10; - if (info) return xerbla_(SCALAR_SUFFIX_UP "HERK ", &info, 6); + if (info) return xerbla_(SCALAR_SUFFIX_UP "HERK ", &info); int code = OP(*op) | (UPLO(*uplo) << 2); @@ -741,14 +726,13 @@ func[code](*n, *k, a, *lda, a, *lda, c, 1, *ldc, alpha, blocking); matrix(c, *n, *n, *ldc).diagonal().imag().setZero(); } - return 0; } // c = alpha*a*conj(b') + conj(alpha)*b*conj(a') + beta*c, for op = 'N'or'n' // c = alpha*conj(a')*b + conj(alpha)*conj(b')*a + beta*c, for op = 'C'or'c' -int EIGEN_BLAS_FUNC(her2k)(const char *uplo, const char *op, const int *n, const int *k, const RealScalar *palpha, - const RealScalar *pa, const int *lda, const RealScalar *pb, const int *ldb, - const RealScalar *pbeta, RealScalar *pc, const int *ldc) { +EIGEN_BLAS_FUNC(her2k) +(const char *uplo, const char *op, const int *n, const int *k, const RealScalar *palpha, const RealScalar *pa, + const int *lda, const RealScalar *pb, const int *ldb, const RealScalar *pbeta, RealScalar *pc, const int *ldc) { const Scalar *a = reinterpret_cast<const Scalar *>(pa); const Scalar *b = reinterpret_cast<const Scalar *>(pb); Scalar *c = reinterpret_cast<Scalar *>(pc); @@ -773,7 +757,7 @@ info = 9; else if (*ldc < std::max(1, *n)) info = 12; - if (info) return xerbla_(SCALAR_SUFFIX_UP "HER2K", &info, 6); + if (info) return xerbla_(SCALAR_SUFFIX_UP "HER2K", &info); if (beta != RealScalar(1)) { if (UPLO(*uplo) == UP) @@ -793,7 +777,7 @@ } else if (*k > 0 && alpha != Scalar(0)) matrix(c, *n, *n, *ldc).diagonal().imag().setZero(); - if (*k == 0) return 1; + if (*k == 0) return; if (OP(*op) == NOTR) { if (UPLO(*uplo) == UP) { @@ -814,8 +798,6 @@ alpha * matrix(a, *k, *n, *lda).adjoint() * matrix(b, *k, *n, *ldb) + numext::conj(alpha) * matrix(b, *k, *n, *ldb).adjoint() * matrix(a, *k, *n, *lda); } - - return 1; } #endif // ISCOMPLEX
diff --git a/blas/single.cpp b/blas/single.cpp index 9afea35..fefb695 100644 --- a/blas/single.cpp +++ b/blas/single.cpp
@@ -18,6 +18,6 @@ #include "level2_real_impl.h" #include "level3_impl.h" -float EIGEN_BLAS_FUNC(dsdot)(int* n, float* alpha, float* x, int* incx, float* y, int* incy) { +float EIGEN_BLAS_FUNC_NAME(dsdot)(int* n, float* alpha, float* x, int* incx, float* y, int* incy) { return double(*alpha) + BLASFUNC(dsdot)(n, x, incx, y, incy); }
diff --git a/blas/xerbla.cpp b/blas/xerbla.cpp index 698e89f..0b59c4a 100644 --- a/blas/xerbla.cpp +++ b/blas/xerbla.cpp
@@ -11,10 +11,7 @@ extern "C" { #endif -EIGEN_WEAK_LINKING int xerbla_(const char *msg, int *info, int) { - printf("Eigen BLAS ERROR #%i: %s\n", *info, msg); - return 0; -} +EIGEN_WEAK_LINKING void xerbla_(const char *msg, int *info) { printf("Eigen BLAS ERROR #%i: %s\n", *info, msg); } #ifdef __cplusplus }
diff --git a/debug/msvc/eigen.natvis b/debug/msvc/eigen.natvis index 22cf346..da89857 100644 --- a/debug/msvc/eigen.natvis +++ b/debug/msvc/eigen.natvis
@@ -1,235 +1,235 @@ -<?xml version="1.0" encoding="utf-8"?> - -<AutoVisualizer xmlns="http://schemas.microsoft.com/vstudio/debugger/natvis/2010"> - - <!-- Fixed x Fixed Matrix --> - <Type Name="Eigen::Matrix<*,*,*,*,*,*>"> - <AlternativeType Name="Eigen::Array<*,-1,-1,*,*,*>"/> - <DisplayString>[{$T2}, {$T3}] (fixed matrix)</DisplayString> - <Expand> - <ArrayItems Condition="Flags%2"> <!-- row major layout --> - <Rank>2</Rank> - <Size>$i==0 ? $T2 : $T3</Size> - <ValuePointer>m_storage.m_data.array</ValuePointer> - </ArrayItems> - <ArrayItems Condition="!(Flags%2)"> <!-- column major layout --> - <Direction>Backward</Direction> - <Rank>2</Rank> - <Size>$i==0 ? $T2 : $T3</Size> - <ValuePointer>m_storage.m_data.array</ValuePointer> - </ArrayItems> - </Expand> - </Type> - - <!-- 2 x 2 Matrix --> - <Type Name="Eigen::Matrix<*,2,2,*,*,*>"> - <AlternativeType Name="Eigen::Array<*,2,2,*,*,*>"/> - <DisplayString>[2, 2] (fixed matrix)</DisplayString> - <Expand> - <Synthetic Name="[row 0]" Condition="Flags%2"> - <DisplayString>({m_storage.m_data.array[0]}, {m_storage.m_data.array[1]})</DisplayString> - </Synthetic> - <Synthetic Name="[row 0]" Condition="!(Flags%2)"> - <DisplayString>({m_storage.m_data.array[0]}, {m_storage.m_data.array[2]})</DisplayString> - </Synthetic> - <Synthetic Name="[row 1]" Condition="Flags%2"> - <DisplayString>({m_storage.m_data.array[2]}, {m_storage.m_data.array[3]})</DisplayString> - </Synthetic> - <Synthetic Name="[row 1]" Condition="!(Flags%2)"> - <DisplayString>({m_storage.m_data.array[1]}, {m_storage.m_data.array[3]})</DisplayString> - </Synthetic> - </Expand> - </Type> - - <!-- 3 x 3 Matrix --> - <Type Name="Eigen::Matrix<*,3,3,*,*,*>"> - <AlternativeType Name="Eigen::Array<*,3,3,*,*,*>"/> - <DisplayString>[3, 3] (fixed matrix)</DisplayString> - <Expand> - <Synthetic Name="[row 0]" Condition="Flags%2"> - <DisplayString>({m_storage.m_data.array[0]}, {m_storage.m_data.array[1]}, {m_storage.m_data.array[2]})</DisplayString> - </Synthetic> - <Synthetic Name="[row 0]" Condition="!(Flags%2)"> - <DisplayString>({m_storage.m_data.array[0]}, {m_storage.m_data.array[3]}, {m_storage.m_data.array[6]})</DisplayString> - </Synthetic> - <Synthetic Name="[row 1]" Condition="Flags%2"> - <DisplayString>({m_storage.m_data.array[3]}, {m_storage.m_data.array[4]}, {m_storage.m_data.array[5]})</DisplayString> - </Synthetic> - <Synthetic Name="[row 1]" Condition="!(Flags%2)"> - <DisplayString>({m_storage.m_data.array[1]}, {m_storage.m_data.array[4]}, {m_storage.m_data.array[7]})</DisplayString> - </Synthetic> - <Synthetic Name="[row 2]" Condition="Flags%2"> - <DisplayString>({m_storage.m_data.array[6]}, {m_storage.m_data.array[7]}, {m_storage.m_data.array[8]})</DisplayString> - </Synthetic> - <Synthetic Name="[row 2]" Condition="!(Flags%2)"> - <DisplayString>({m_storage.m_data.array[2]}, {m_storage.m_data.array[5]}, {m_storage.m_data.array[8]})</DisplayString> - </Synthetic> - </Expand> - </Type> - - <!-- 4 x 4 Matrix --> - <Type Name="Eigen::Matrix<*,4,4,*,*,*>"> - <AlternativeType Name="Eigen::Array<*,4,4,*,*,*>"/> - <DisplayString>[4, 4] (fixed matrix)</DisplayString> - <Expand> - <Synthetic Name="[row 0]" Condition="Flags%2"> - <DisplayString>({m_storage.m_data.array[0]}, {m_storage.m_data.array[1]}, {m_storage.m_data.array[2]}, {m_storage.m_data.array[3]})</DisplayString> - </Synthetic> - <Synthetic Name="[row 0]" Condition="!(Flags%2)"> - <DisplayString>({m_storage.m_data.array[0]}, {m_storage.m_data.array[4]}, {m_storage.m_data.array[8]}, {m_storage.m_data.array[12]})</DisplayString> - </Synthetic> - <Synthetic Name="[row 1]" Condition="Flags%2"> - <DisplayString>({m_storage.m_data.array[4]}, {m_storage.m_data.array[5]}, {m_storage.m_data.array[6]}, {m_storage.m_data.array[7]})</DisplayString> - </Synthetic> - <Synthetic Name="[row 1]" Condition="!(Flags%2)"> - <DisplayString>({m_storage.m_data.array[1]}, {m_storage.m_data.array[5]}, {m_storage.m_data.array[9]}, {m_storage.m_data.array[13]})</DisplayString> - </Synthetic> - <Synthetic Name="[row 2]" Condition="Flags%2"> - <DisplayString>({m_storage.m_data.array[8]}, {m_storage.m_data.array[9]}, {m_storage.m_data.array[10]}, {m_storage.m_data.array[11]})</DisplayString> - </Synthetic> - <Synthetic Name="[row 2]" Condition="!(Flags%2)"> - <DisplayString>({m_storage.m_data.array[2]}, {m_storage.m_data.array[6]}, {m_storage.m_data.array[10]}, {m_storage.m_data.array[14]})</DisplayString> - </Synthetic> - <Synthetic Name="[row 3]" Condition="Flags%2"> - <DisplayString>({m_storage.m_data.array[12]}, {m_storage.m_data.array[13]}, {m_storage.m_data.array[14]}, {m_storage.m_data.array[15]})</DisplayString> - </Synthetic> - <Synthetic Name="[row 3]" Condition="!(Flags%2)"> - <DisplayString>({m_storage.m_data.array[3]}, {m_storage.m_data.array[7]}, {m_storage.m_data.array[11]}, {m_storage.m_data.array[15]})</DisplayString> - </Synthetic> - </Expand> - </Type> - - <!-- Dynamic x Dynamic Matrix --> - <Type Name="Eigen::Matrix<*,-1,-1,*,*,*>"> - <AlternativeType Name="Eigen::Array<*,-1,-1,*,*,*>"/> - <DisplayString Condition="m_storage.m_data == 0">empty</DisplayString> - <DisplayString Condition="m_storage.m_data != 0">[{m_storage.m_rows}, {m_storage.m_cols}] (dynamic matrix)</DisplayString> - <Expand> - <ArrayItems Condition="Flags%2"> <!-- row major layout --> - <Rank>2</Rank> - <Size>$i==0 ? m_storage.m_rows : m_storage.m_cols</Size> - <ValuePointer>m_storage.m_data</ValuePointer> - </ArrayItems> - <ArrayItems Condition="!(Flags%2)"> <!-- column major layout --> - <Direction>Backward</Direction> - <Rank>2</Rank> - <Size>$i==0 ? m_storage.m_rows : m_storage.m_cols</Size> - <ValuePointer>m_storage.m_data</ValuePointer> - </ArrayItems> - </Expand> - </Type> - - <!-- Fixed x Dynamic Matrix --> - <Type Name="Eigen::Matrix<*,*,-1,*,*,*>"> - <AlternativeType Name="Eigen::Array<*,*,-1,*,*,*>"/> - <DisplayString Condition="m_storage.m_data == 0">empty</DisplayString> - <DisplayString Condition="m_storage.m_data != 0">[{$T2}, {m_storage.m_cols}] (dynamic column matrix)</DisplayString> - <Expand> - <ArrayItems Condition="Flags%2"> <!-- row major layout --> - <Rank>2</Rank> - <Size>$i==0 ? $T2 : m_storage.m_cols</Size> - <ValuePointer>m_storage.m_data</ValuePointer> - </ArrayItems> - <ArrayItems Condition="!(Flags%2)"> <!-- column major layout --> - <Direction>Backward</Direction> - <Rank>2</Rank> - <Size>$i==0 ? $T2 : m_storage.m_cols</Size> - <ValuePointer>m_storage.m_data</ValuePointer> - </ArrayItems> - </Expand> - </Type> - - <!-- Dynamic x Fixed Matrix --> - <Type Name="Eigen::Matrix<*,-1,*,*,*,*>"> - <AlternativeType Name="Eigen::Array<*,-1,*,*,*,*>"/> - <DisplayString Condition="m_storage.m_data == 0">empty</DisplayString> - <DisplayString Condition="m_storage.m_data != 0">[{m_storage.m_rows}, {$T2}] (dynamic row matrix)</DisplayString> - <Expand> - <ArrayItems Condition="Flags%2"> <!-- row major layout --> - <Rank>2</Rank> - <Size>$i==0 ? m_storage.m_rows : $T2</Size> - <ValuePointer>m_storage.m_data</ValuePointer> - </ArrayItems> - <ArrayItems Condition="!(Flags%2)"> <!-- column major layout --> - <Direction>Backward</Direction> - <Rank>2</Rank> - <Size>$i==0 ? m_storage.m_rows : $T2</Size> - <ValuePointer>m_storage.m_data</ValuePointer> - </ArrayItems> - </Expand> - </Type> - - <!-- Dynamic Column Vector --> - <Type Name="Eigen::Matrix<*,1,-1,*,*,*>"> - <AlternativeType Name="Eigen::Array<*,1,-1,*,*,*>"/> - <DisplayString Condition="m_storage.m_data == 0">empty</DisplayString> - <DisplayString Condition="m_storage.m_data != 0">[{m_storage.m_cols}] (dynamic column vector)</DisplayString> - <Expand> - <Item Name="[size]">m_storage.m_cols</Item> - <ArrayItems> - <Size>m_storage.m_cols</Size> - <ValuePointer>m_storage.m_data</ValuePointer> - </ArrayItems> - </Expand> - </Type> - - <!-- Dynamic Row Vector --> - <Type Name="Eigen::Matrix<*,-1,1,*,*,*>"> - <AlternativeType Name="Eigen::Array<*,-1,1,*,*,*>"/> - <DisplayString Condition="m_storage.m_data == 0">empty</DisplayString> - <DisplayString Condition="m_storage.m_data != 0">[{m_storage.m_rows}] (dynamic row vector)</DisplayString> - <Expand> - <Item Name="[size]">m_storage.m_rows</Item> - <ArrayItems> - <Size>m_storage.m_rows</Size> - <ValuePointer>m_storage.m_data</ValuePointer> - </ArrayItems> - </Expand> - </Type> - - <!-- Fixed Vector --> - <Type Name="Eigen::Matrix<*,1,1,*,*,*>"> - <AlternativeType Name="Eigen::Array<*,1,1,*,*,*>"/> - <DisplayString>[1] ({m_storage.m_data.array[0]})</DisplayString> - <Expand> - <Item Name="[x]">m_storage.m_data.array[0]</Item> - </Expand> - </Type> - - <Type Name="Eigen::Matrix<*,2,1,*,*,*>"> - <AlternativeType Name="Eigen::Matrix<*,1,2,*,*,*>"/> - <AlternativeType Name="Eigen::Array<*,2,1,*,*,*>"/> - <AlternativeType Name="Eigen::Array<*,1,2,*,*,*>"/> - <DisplayString>[2] ({m_storage.m_data.array[0]}, {m_storage.m_data.array[1]})</DisplayString> - <Expand> - <Item Name="[x]">m_storage.m_data.array[0]</Item> - <Item Name="[y]">m_storage.m_data.array[1]</Item> - </Expand> - </Type> - - <Type Name="Eigen::Matrix<*,3,1,*,*,*>"> - <AlternativeType Name="Eigen::Matrix<*,1,3,*,*,*>"/> - <AlternativeType Name="Eigen::Array<*,3,1,*,*,*>"/> - <AlternativeType Name="Eigen::Array<*,1,3,*,*,*>"/> - <DisplayString>[3] ({m_storage.m_data.array[0]}, {m_storage.m_data.array[1]}, {m_storage.m_data.array[2]})</DisplayString> - <Expand> - <Item Name="[x]">m_storage.m_data.array[0]</Item> - <Item Name="[y]">m_storage.m_data.array[1]</Item> - <Item Name="[z]">m_storage.m_data.array[2]</Item> - </Expand> - </Type> - - <Type Name="Eigen::Matrix<*,4,1,*,*,*>"> - <AlternativeType Name="Eigen::Matrix<*,1,4,*,*,*>"/> - <AlternativeType Name="Eigen::Array<*,4,1,*,*,*>"/> - <AlternativeType Name="Eigen::Array<*,1,4,*,*,*>"/> - <DisplayString>[4] ({m_storage.m_data.array[0]}, {m_storage.m_data.array[1]}, {m_storage.m_data.array[2]}, {m_storage.m_data.array[3]})</DisplayString> - <Expand> - <Item Name="[x]">m_storage.m_data.array[0]</Item> - <Item Name="[y]">m_storage.m_data.array[1]</Item> - <Item Name="[z]">m_storage.m_data.array[2]</Item> - <Item Name="[w]">m_storage.m_data.array[3]</Item> - </Expand> - </Type> - -</AutoVisualizer> +<?xml version="1.0" encoding="utf-8"?> + +<AutoVisualizer xmlns="http://schemas.microsoft.com/vstudio/debugger/natvis/2010"> + + <!-- Fixed x Fixed Matrix --> + <Type Name="Eigen::Matrix<*,*,*,*,*,*>"> + <AlternativeType Name="Eigen::Array<*,-1,-1,*,*,*>"/> + <DisplayString>[{$T2}, {$T3}] (fixed matrix)</DisplayString> + <Expand> + <ArrayItems Condition="Flags%2"> <!-- row major layout --> + <Rank>2</Rank> + <Size>$i==0 ? $T2 : $T3</Size> + <ValuePointer>m_storage.m_data.array</ValuePointer> + </ArrayItems> + <ArrayItems Condition="!(Flags%2)"> <!-- column major layout --> + <Direction>Backward</Direction> + <Rank>2</Rank> + <Size>$i==0 ? $T2 : $T3</Size> + <ValuePointer>m_storage.m_data.array</ValuePointer> + </ArrayItems> + </Expand> + </Type> + + <!-- 2 x 2 Matrix --> + <Type Name="Eigen::Matrix<*,2,2,*,*,*>"> + <AlternativeType Name="Eigen::Array<*,2,2,*,*,*>"/> + <DisplayString>[2, 2] (fixed matrix)</DisplayString> + <Expand> + <Synthetic Name="[row 0]" Condition="Flags%2"> + <DisplayString>({m_storage.m_data.array[0]}, {m_storage.m_data.array[1]})</DisplayString> + </Synthetic> + <Synthetic Name="[row 0]" Condition="!(Flags%2)"> + <DisplayString>({m_storage.m_data.array[0]}, {m_storage.m_data.array[2]})</DisplayString> + </Synthetic> + <Synthetic Name="[row 1]" Condition="Flags%2"> + <DisplayString>({m_storage.m_data.array[2]}, {m_storage.m_data.array[3]})</DisplayString> + </Synthetic> + <Synthetic Name="[row 1]" Condition="!(Flags%2)"> + <DisplayString>({m_storage.m_data.array[1]}, {m_storage.m_data.array[3]})</DisplayString> + </Synthetic> + </Expand> + </Type> + + <!-- 3 x 3 Matrix --> + <Type Name="Eigen::Matrix<*,3,3,*,*,*>"> + <AlternativeType Name="Eigen::Array<*,3,3,*,*,*>"/> + <DisplayString>[3, 3] (fixed matrix)</DisplayString> + <Expand> + <Synthetic Name="[row 0]" Condition="Flags%2"> + <DisplayString>({m_storage.m_data.array[0]}, {m_storage.m_data.array[1]}, {m_storage.m_data.array[2]})</DisplayString> + </Synthetic> + <Synthetic Name="[row 0]" Condition="!(Flags%2)"> + <DisplayString>({m_storage.m_data.array[0]}, {m_storage.m_data.array[3]}, {m_storage.m_data.array[6]})</DisplayString> + </Synthetic> + <Synthetic Name="[row 1]" Condition="Flags%2"> + <DisplayString>({m_storage.m_data.array[3]}, {m_storage.m_data.array[4]}, {m_storage.m_data.array[5]})</DisplayString> + </Synthetic> + <Synthetic Name="[row 1]" Condition="!(Flags%2)"> + <DisplayString>({m_storage.m_data.array[1]}, {m_storage.m_data.array[4]}, {m_storage.m_data.array[7]})</DisplayString> + </Synthetic> + <Synthetic Name="[row 2]" Condition="Flags%2"> + <DisplayString>({m_storage.m_data.array[6]}, {m_storage.m_data.array[7]}, {m_storage.m_data.array[8]})</DisplayString> + </Synthetic> + <Synthetic Name="[row 2]" Condition="!(Flags%2)"> + <DisplayString>({m_storage.m_data.array[2]}, {m_storage.m_data.array[5]}, {m_storage.m_data.array[8]})</DisplayString> + </Synthetic> + </Expand> + </Type> + + <!-- 4 x 4 Matrix --> + <Type Name="Eigen::Matrix<*,4,4,*,*,*>"> + <AlternativeType Name="Eigen::Array<*,4,4,*,*,*>"/> + <DisplayString>[4, 4] (fixed matrix)</DisplayString> + <Expand> + <Synthetic Name="[row 0]" Condition="Flags%2"> + <DisplayString>({m_storage.m_data.array[0]}, {m_storage.m_data.array[1]}, {m_storage.m_data.array[2]}, {m_storage.m_data.array[3]})</DisplayString> + </Synthetic> + <Synthetic Name="[row 0]" Condition="!(Flags%2)"> + <DisplayString>({m_storage.m_data.array[0]}, {m_storage.m_data.array[4]}, {m_storage.m_data.array[8]}, {m_storage.m_data.array[12]})</DisplayString> + </Synthetic> + <Synthetic Name="[row 1]" Condition="Flags%2"> + <DisplayString>({m_storage.m_data.array[4]}, {m_storage.m_data.array[5]}, {m_storage.m_data.array[6]}, {m_storage.m_data.array[7]})</DisplayString> + </Synthetic> + <Synthetic Name="[row 1]" Condition="!(Flags%2)"> + <DisplayString>({m_storage.m_data.array[1]}, {m_storage.m_data.array[5]}, {m_storage.m_data.array[9]}, {m_storage.m_data.array[13]})</DisplayString> + </Synthetic> + <Synthetic Name="[row 2]" Condition="Flags%2"> + <DisplayString>({m_storage.m_data.array[8]}, {m_storage.m_data.array[9]}, {m_storage.m_data.array[10]}, {m_storage.m_data.array[11]})</DisplayString> + </Synthetic> + <Synthetic Name="[row 2]" Condition="!(Flags%2)"> + <DisplayString>({m_storage.m_data.array[2]}, {m_storage.m_data.array[6]}, {m_storage.m_data.array[10]}, {m_storage.m_data.array[14]})</DisplayString> + </Synthetic> + <Synthetic Name="[row 3]" Condition="Flags%2"> + <DisplayString>({m_storage.m_data.array[12]}, {m_storage.m_data.array[13]}, {m_storage.m_data.array[14]}, {m_storage.m_data.array[15]})</DisplayString> + </Synthetic> + <Synthetic Name="[row 3]" Condition="!(Flags%2)"> + <DisplayString>({m_storage.m_data.array[3]}, {m_storage.m_data.array[7]}, {m_storage.m_data.array[11]}, {m_storage.m_data.array[15]})</DisplayString> + </Synthetic> + </Expand> + </Type> + + <!-- Dynamic x Dynamic Matrix --> + <Type Name="Eigen::Matrix<*,-1,-1,*,*,*>"> + <AlternativeType Name="Eigen::Array<*,-1,-1,*,*,*>"/> + <DisplayString Condition="m_storage.m_data == 0">empty</DisplayString> + <DisplayString Condition="m_storage.m_data != 0">[{m_storage.m_rows}, {m_storage.m_cols}] (dynamic matrix)</DisplayString> + <Expand> + <ArrayItems Condition="Flags%2"> <!-- row major layout --> + <Rank>2</Rank> + <Size>$i==0 ? m_storage.m_rows : m_storage.m_cols</Size> + <ValuePointer>m_storage.m_data</ValuePointer> + </ArrayItems> + <ArrayItems Condition="!(Flags%2)"> <!-- column major layout --> + <Direction>Backward</Direction> + <Rank>2</Rank> + <Size>$i==0 ? m_storage.m_rows : m_storage.m_cols</Size> + <ValuePointer>m_storage.m_data</ValuePointer> + </ArrayItems> + </Expand> + </Type> + + <!-- Fixed x Dynamic Matrix --> + <Type Name="Eigen::Matrix<*,*,-1,*,*,*>"> + <AlternativeType Name="Eigen::Array<*,*,-1,*,*,*>"/> + <DisplayString Condition="m_storage.m_data == 0">empty</DisplayString> + <DisplayString Condition="m_storage.m_data != 0">[{$T2}, {m_storage.m_cols}] (dynamic column matrix)</DisplayString> + <Expand> + <ArrayItems Condition="Flags%2"> <!-- row major layout --> + <Rank>2</Rank> + <Size>$i==0 ? $T2 : m_storage.m_cols</Size> + <ValuePointer>m_storage.m_data</ValuePointer> + </ArrayItems> + <ArrayItems Condition="!(Flags%2)"> <!-- column major layout --> + <Direction>Backward</Direction> + <Rank>2</Rank> + <Size>$i==0 ? $T2 : m_storage.m_cols</Size> + <ValuePointer>m_storage.m_data</ValuePointer> + </ArrayItems> + </Expand> + </Type> + + <!-- Dynamic x Fixed Matrix --> + <Type Name="Eigen::Matrix<*,-1,*,*,*,*>"> + <AlternativeType Name="Eigen::Array<*,-1,*,*,*,*>"/> + <DisplayString Condition="m_storage.m_data == 0">empty</DisplayString> + <DisplayString Condition="m_storage.m_data != 0">[{m_storage.m_rows}, {$T2}] (dynamic row matrix)</DisplayString> + <Expand> + <ArrayItems Condition="Flags%2"> <!-- row major layout --> + <Rank>2</Rank> + <Size>$i==0 ? m_storage.m_rows : $T2</Size> + <ValuePointer>m_storage.m_data</ValuePointer> + </ArrayItems> + <ArrayItems Condition="!(Flags%2)"> <!-- column major layout --> + <Direction>Backward</Direction> + <Rank>2</Rank> + <Size>$i==0 ? m_storage.m_rows : $T2</Size> + <ValuePointer>m_storage.m_data</ValuePointer> + </ArrayItems> + </Expand> + </Type> + + <!-- Dynamic Column Vector --> + <Type Name="Eigen::Matrix<*,1,-1,*,*,*>"> + <AlternativeType Name="Eigen::Array<*,1,-1,*,*,*>"/> + <DisplayString Condition="m_storage.m_data == 0">empty</DisplayString> + <DisplayString Condition="m_storage.m_data != 0">[{m_storage.m_cols}] (dynamic column vector)</DisplayString> + <Expand> + <Item Name="[size]">m_storage.m_cols</Item> + <ArrayItems> + <Size>m_storage.m_cols</Size> + <ValuePointer>m_storage.m_data</ValuePointer> + </ArrayItems> + </Expand> + </Type> + + <!-- Dynamic Row Vector --> + <Type Name="Eigen::Matrix<*,-1,1,*,*,*>"> + <AlternativeType Name="Eigen::Array<*,-1,1,*,*,*>"/> + <DisplayString Condition="m_storage.m_data == 0">empty</DisplayString> + <DisplayString Condition="m_storage.m_data != 0">[{m_storage.m_rows}] (dynamic row vector)</DisplayString> + <Expand> + <Item Name="[size]">m_storage.m_rows</Item> + <ArrayItems> + <Size>m_storage.m_rows</Size> + <ValuePointer>m_storage.m_data</ValuePointer> + </ArrayItems> + </Expand> + </Type> + + <!-- Fixed Vector --> + <Type Name="Eigen::Matrix<*,1,1,*,*,*>"> + <AlternativeType Name="Eigen::Array<*,1,1,*,*,*>"/> + <DisplayString>[1] ({m_storage.m_data.array[0]})</DisplayString> + <Expand> + <Item Name="[x]">m_storage.m_data.array[0]</Item> + </Expand> + </Type> + + <Type Name="Eigen::Matrix<*,2,1,*,*,*>"> + <AlternativeType Name="Eigen::Matrix<*,1,2,*,*,*>"/> + <AlternativeType Name="Eigen::Array<*,2,1,*,*,*>"/> + <AlternativeType Name="Eigen::Array<*,1,2,*,*,*>"/> + <DisplayString>[2] ({m_storage.m_data.array[0]}, {m_storage.m_data.array[1]})</DisplayString> + <Expand> + <Item Name="[x]">m_storage.m_data.array[0]</Item> + <Item Name="[y]">m_storage.m_data.array[1]</Item> + </Expand> + </Type> + + <Type Name="Eigen::Matrix<*,3,1,*,*,*>"> + <AlternativeType Name="Eigen::Matrix<*,1,3,*,*,*>"/> + <AlternativeType Name="Eigen::Array<*,3,1,*,*,*>"/> + <AlternativeType Name="Eigen::Array<*,1,3,*,*,*>"/> + <DisplayString>[3] ({m_storage.m_data.array[0]}, {m_storage.m_data.array[1]}, {m_storage.m_data.array[2]})</DisplayString> + <Expand> + <Item Name="[x]">m_storage.m_data.array[0]</Item> + <Item Name="[y]">m_storage.m_data.array[1]</Item> + <Item Name="[z]">m_storage.m_data.array[2]</Item> + </Expand> + </Type> + + <Type Name="Eigen::Matrix<*,4,1,*,*,*>"> + <AlternativeType Name="Eigen::Matrix<*,1,4,*,*,*>"/> + <AlternativeType Name="Eigen::Array<*,4,1,*,*,*>"/> + <AlternativeType Name="Eigen::Array<*,1,4,*,*,*>"/> + <DisplayString>[4] ({m_storage.m_data.array[0]}, {m_storage.m_data.array[1]}, {m_storage.m_data.array[2]}, {m_storage.m_data.array[3]})</DisplayString> + <Expand> + <Item Name="[x]">m_storage.m_data.array[0]</Item> + <Item Name="[y]">m_storage.m_data.array[1]</Item> + <Item Name="[z]">m_storage.m_data.array[2]</Item> + <Item Name="[w]">m_storage.m_data.array[3]</Item> + </Expand> + </Type> + +</AutoVisualizer>
diff --git a/debug/msvc/eigen_autoexp_part.dat b/debug/msvc/eigen_autoexp_part.dat index 273c10d..35ef580 100644 --- a/debug/msvc/eigen_autoexp_part.dat +++ b/debug/msvc/eigen_autoexp_part.dat
@@ -1,295 +1,295 @@ -; *************************************************************** -; * Eigen Visualizer -; * -; * Author: Hauke Heibel <hauke.heibel@gmail.com> -; * -; * Support the enhanced debugging of the following Eigen -; * types (*: any, +:fixed dimension) : -; * -; * - Eigen::Matrix<*,4,1,*,*,*> and Eigen::Matrix<*,1,4,*,*,*> -; * - Eigen::Matrix<*,3,1,*,*,*> and Eigen::Matrix<*,1,3,*,*,*> -; * - Eigen::Matrix<*,2,1,*,*,*> and Eigen::Matrix<*,1,2,*,*,*> -; * - Eigen::Matrix<*,-1,-1,*,*,*> -; * - Eigen::Matrix<*,+,-1,*,*,*> -; * - Eigen::Matrix<*,-1,+,*,*,*> -; * - Eigen::Matrix<*,+,+,*,*,*> -; * -; * Matrices are displayed properly independently of the memory -; * alignment (RowMajor vs. ColMajor). -; * -; * This file is distributed WITHOUT ANY WARRANTY. Please ensure -; * that your original autoexp.dat file is copied to a safe -; * place before proceeding with its modification. -; *************************************************************** - -[Visualizer] - -; Fixed size 4-vectors -Eigen::Matrix<*,4,1,*,*,*>|Eigen::Matrix<*,1,4,*,*,*>{ - children - ( - #( - [internals]: [$c,!], - x : ($c.m_storage.m_data.array)[0], - y : ($c.m_storage.m_data.array)[1], - z : ($c.m_storage.m_data.array)[2], - w : ($c.m_storage.m_data.array)[3] - ) - ) - - preview - ( - #( - "[", - 4, - "](", - #array(expr: $e.m_storage.m_data.array[$i], size: 4), - ")" - ) - ) -} - -; Fixed size 3-vectors -Eigen::Matrix<*,3,1,*,*,*>|Eigen::Matrix<*,1,3,*,*,*>{ - children - ( - #( - [internals]: [$c,!], - x : ($c.m_storage.m_data.array)[0], - y : ($c.m_storage.m_data.array)[1], - z : ($c.m_storage.m_data.array)[2] - ) - ) - - preview - ( - #( - "[", - 3, - "](", - #array(expr: $e.m_storage.m_data.array[$i], size: 3), - ")" - ) - ) -} - -; Fixed size 2-vectors -Eigen::Matrix<*,2,1,*,*,*>|Eigen::Matrix<*,1,2,*,*,*>{ - children - ( - #( - [internals]: [$c,!], - x : ($c.m_storage.m_data.array)[0], - y : ($c.m_storage.m_data.array)[1] - ) - ) - - preview - ( - #( - "[", - 2, - "](", - #array(expr: $e.m_storage.m_data.array[$i], size: 2), - ")" - ) - ) -} - -; Fixed size 1-vectors -Eigen::Matrix<*,1,1,*,*,*>|Eigen::Matrix<*,1,1,*,*,*>{ - children - ( - #( - [internals]: [$c,!], - x : ($c.m_storage.m_data.array)[0] - ) - ) - - preview - ( - #( - "[", - 1, - "](", - #array(expr: $e.m_storage.m_data.array[$i], size: 1), - ")" - ) - ) -} - -; Dynamic matrices (ColMajor and RowMajor support) -Eigen::Matrix<*,-1,-1,*,*,*>{ - children - ( - #( - [internals]: [$c,!], - rows: $c.m_storage.m_rows, - cols: $c.m_storage.m_cols, - ; Check for RowMajorBit - #if ($c.Flags & 0x1) ( - #array( - rank: 2, - base: 0, - expr: ($c.m_storage.m_data)[($i % $c.m_storage.m_rows)*$c.m_storage.m_cols + (($i- $i % $c.m_storage.m_rows)/$c.m_storage.m_rows)], - size: ($r==1)*$c.m_storage.m_rows+($r==0)*$c.m_storage.m_cols - ) - ) #else ( - #array( - rank: 2, - base: 0, - expr: ($c.m_storage.m_data)[$i], - size: ($r==1)*$c.m_storage.m_rows+($r==0)*$c.m_storage.m_cols - ) - ) - ) - ) - - preview - ( - #( - "[", - $c.m_storage.m_rows, - ",", - $c.m_storage.m_cols, - "](", - #array( - expr : [($c.m_storage.m_data)[$i],g], - size : $c.m_storage.m_rows*$c.m_storage.m_cols - ), - ")" - ) - ) -} - -; Fixed rows, dynamic columns matrix (ColMajor and RowMajor support) -Eigen::Matrix<*,*,-1,*,*,*>{ - children - ( - #( - [internals]: [$c,!], - rows: $c.RowsAtCompileTime, - cols: $c.m_storage.m_cols, - ; Check for RowMajorBit - #if ($c.Flags & 0x1) ( - #array( - rank: 2, - base: 0, - expr: ($c.m_storage.m_data)[($i % $c.RowsAtCompileTime)*$c.m_storage.m_cols + (($i- $i % $c.RowsAtCompileTime)/$c.RowsAtCompileTime)], - size: ($r==1)*$c.RowsAtCompileTime+($r==0)*$c.m_storage.m_cols - ) - ) #else ( - #array( - rank: 2, - base: 0, - expr: ($c.m_storage.m_data)[$i], - size: ($r==1)*$c.RowsAtCompileTime+($r==0)*$c.m_storage.m_cols - ) - ) - ) - ) - - preview - ( - #( - "[", - $c.RowsAtCompileTime, - ",", - $c.m_storage.m_cols, - "](", - #array( - expr : [($c.m_storage.m_data)[$i],g], - size : $c.RowsAtCompileTime*$c.m_storage.m_cols - ), - ")" - ) - ) -} - -; Dynamic rows, fixed columns matrix (ColMajor and RowMajor support) -Eigen::Matrix<*,-1,*,*,*,*>{ - children - ( - #( - [internals]: [$c,!], - rows: $c.m_storage.m_rows, - cols: $c.ColsAtCompileTime, - ; Check for RowMajorBit - #if ($c.Flags & 0x1) ( - #array( - rank: 2, - base: 0, - expr: ($c.m_storage.m_data)[($i % $c.m_storage.m_rows)*$c.ColsAtCompileTime + (($i- $i % $c.m_storage.m_rows)/$c.m_storage.m_rows)], - size: ($r==1)*$c.m_storage.m_rows+($r==0)*$c.ColsAtCompileTime - ) - ) #else ( - #array( - rank: 2, - base: 0, - expr: ($c.m_storage.m_data)[$i], - size: ($r==1)*$c.m_storage.m_rows+($r==0)*$c.ColsAtCompileTime - ) - ) - ) - ) - - preview - ( - #( - "[", - $c.m_storage.m_rows, - ",", - $c.ColsAtCompileTime, - "](", - #array( - expr : [($c.m_storage.m_data)[$i],g], - size : $c.m_storage.m_rows*$c.ColsAtCompileTime - ), - ")" - ) - ) -} - -; Fixed size matrix (ColMajor and RowMajor support) -Eigen::Matrix<*,*,*,*,*,*>{ - children - ( - #( - [internals]: [$c,!], - rows: $c.RowsAtCompileTime, - cols: $c.ColsAtCompileTime, - ; Check for RowMajorBit - #if ($c.Flags & 0x1) ( - #array( - rank: 2, - base: 0, - expr: ($c.m_storage.m_data.array)[($i % $c.RowsAtCompileTime)*$c.ColsAtCompileTime + (($i- $i % $c.RowsAtCompileTime)/$c.RowsAtCompileTime)], - size: ($r==1)*$c.RowsAtCompileTime+($r==0)*$c.ColsAtCompileTime - ) - ) #else ( - #array( - rank: 2, - base: 0, - expr: ($c.m_storage.m_data.array)[$i], - size: ($r==1)*$c.RowsAtCompileTime+($r==0)*$c.ColsAtCompileTime - ) - ) - ) - ) - - preview - ( - #( - "[", - $c.RowsAtCompileTime, - ",", - $c.ColsAtCompileTime, - "](", - #array( - expr : [($c.m_storage.m_data.array)[$i],g], - size : $c.RowsAtCompileTime*$c.ColsAtCompileTime - ), - ")" - ) - ) -} +; *************************************************************** +; * Eigen Visualizer +; * +; * Author: Hauke Heibel <hauke.heibel@gmail.com> +; * +; * Support the enhanced debugging of the following Eigen +; * types (*: any, +:fixed dimension) : +; * +; * - Eigen::Matrix<*,4,1,*,*,*> and Eigen::Matrix<*,1,4,*,*,*> +; * - Eigen::Matrix<*,3,1,*,*,*> and Eigen::Matrix<*,1,3,*,*,*> +; * - Eigen::Matrix<*,2,1,*,*,*> and Eigen::Matrix<*,1,2,*,*,*> +; * - Eigen::Matrix<*,-1,-1,*,*,*> +; * - Eigen::Matrix<*,+,-1,*,*,*> +; * - Eigen::Matrix<*,-1,+,*,*,*> +; * - Eigen::Matrix<*,+,+,*,*,*> +; * +; * Matrices are displayed properly independently of the memory +; * alignment (RowMajor vs. ColMajor). +; * +; * This file is distributed WITHOUT ANY WARRANTY. Please ensure +; * that your original autoexp.dat file is copied to a safe +; * place before proceeding with its modification. +; *************************************************************** + +[Visualizer] + +; Fixed size 4-vectors +Eigen::Matrix<*,4,1,*,*,*>|Eigen::Matrix<*,1,4,*,*,*>{ + children + ( + #( + [internals]: [$c,!], + x : ($c.m_storage.m_data.array)[0], + y : ($c.m_storage.m_data.array)[1], + z : ($c.m_storage.m_data.array)[2], + w : ($c.m_storage.m_data.array)[3] + ) + ) + + preview + ( + #( + "[", + 4, + "](", + #array(expr: $e.m_storage.m_data.array[$i], size: 4), + ")" + ) + ) +} + +; Fixed size 3-vectors +Eigen::Matrix<*,3,1,*,*,*>|Eigen::Matrix<*,1,3,*,*,*>{ + children + ( + #( + [internals]: [$c,!], + x : ($c.m_storage.m_data.array)[0], + y : ($c.m_storage.m_data.array)[1], + z : ($c.m_storage.m_data.array)[2] + ) + ) + + preview + ( + #( + "[", + 3, + "](", + #array(expr: $e.m_storage.m_data.array[$i], size: 3), + ")" + ) + ) +} + +; Fixed size 2-vectors +Eigen::Matrix<*,2,1,*,*,*>|Eigen::Matrix<*,1,2,*,*,*>{ + children + ( + #( + [internals]: [$c,!], + x : ($c.m_storage.m_data.array)[0], + y : ($c.m_storage.m_data.array)[1] + ) + ) + + preview + ( + #( + "[", + 2, + "](", + #array(expr: $e.m_storage.m_data.array[$i], size: 2), + ")" + ) + ) +} + +; Fixed size 1-vectors +Eigen::Matrix<*,1,1,*,*,*>|Eigen::Matrix<*,1,1,*,*,*>{ + children + ( + #( + [internals]: [$c,!], + x : ($c.m_storage.m_data.array)[0] + ) + ) + + preview + ( + #( + "[", + 1, + "](", + #array(expr: $e.m_storage.m_data.array[$i], size: 1), + ")" + ) + ) +} + +; Dynamic matrices (ColMajor and RowMajor support) +Eigen::Matrix<*,-1,-1,*,*,*>{ + children + ( + #( + [internals]: [$c,!], + rows: $c.m_storage.m_rows, + cols: $c.m_storage.m_cols, + ; Check for RowMajorBit + #if ($c.Flags & 0x1) ( + #array( + rank: 2, + base: 0, + expr: ($c.m_storage.m_data)[($i % $c.m_storage.m_rows)*$c.m_storage.m_cols + (($i- $i % $c.m_storage.m_rows)/$c.m_storage.m_rows)], + size: ($r==1)*$c.m_storage.m_rows+($r==0)*$c.m_storage.m_cols + ) + ) #else ( + #array( + rank: 2, + base: 0, + expr: ($c.m_storage.m_data)[$i], + size: ($r==1)*$c.m_storage.m_rows+($r==0)*$c.m_storage.m_cols + ) + ) + ) + ) + + preview + ( + #( + "[", + $c.m_storage.m_rows, + ",", + $c.m_storage.m_cols, + "](", + #array( + expr : [($c.m_storage.m_data)[$i],g], + size : $c.m_storage.m_rows*$c.m_storage.m_cols + ), + ")" + ) + ) +} + +; Fixed rows, dynamic columns matrix (ColMajor and RowMajor support) +Eigen::Matrix<*,*,-1,*,*,*>{ + children + ( + #( + [internals]: [$c,!], + rows: $c.RowsAtCompileTime, + cols: $c.m_storage.m_cols, + ; Check for RowMajorBit + #if ($c.Flags & 0x1) ( + #array( + rank: 2, + base: 0, + expr: ($c.m_storage.m_data)[($i % $c.RowsAtCompileTime)*$c.m_storage.m_cols + (($i- $i % $c.RowsAtCompileTime)/$c.RowsAtCompileTime)], + size: ($r==1)*$c.RowsAtCompileTime+($r==0)*$c.m_storage.m_cols + ) + ) #else ( + #array( + rank: 2, + base: 0, + expr: ($c.m_storage.m_data)[$i], + size: ($r==1)*$c.RowsAtCompileTime+($r==0)*$c.m_storage.m_cols + ) + ) + ) + ) + + preview + ( + #( + "[", + $c.RowsAtCompileTime, + ",", + $c.m_storage.m_cols, + "](", + #array( + expr : [($c.m_storage.m_data)[$i],g], + size : $c.RowsAtCompileTime*$c.m_storage.m_cols + ), + ")" + ) + ) +} + +; Dynamic rows, fixed columns matrix (ColMajor and RowMajor support) +Eigen::Matrix<*,-1,*,*,*,*>{ + children + ( + #( + [internals]: [$c,!], + rows: $c.m_storage.m_rows, + cols: $c.ColsAtCompileTime, + ; Check for RowMajorBit + #if ($c.Flags & 0x1) ( + #array( + rank: 2, + base: 0, + expr: ($c.m_storage.m_data)[($i % $c.m_storage.m_rows)*$c.ColsAtCompileTime + (($i- $i % $c.m_storage.m_rows)/$c.m_storage.m_rows)], + size: ($r==1)*$c.m_storage.m_rows+($r==0)*$c.ColsAtCompileTime + ) + ) #else ( + #array( + rank: 2, + base: 0, + expr: ($c.m_storage.m_data)[$i], + size: ($r==1)*$c.m_storage.m_rows+($r==0)*$c.ColsAtCompileTime + ) + ) + ) + ) + + preview + ( + #( + "[", + $c.m_storage.m_rows, + ",", + $c.ColsAtCompileTime, + "](", + #array( + expr : [($c.m_storage.m_data)[$i],g], + size : $c.m_storage.m_rows*$c.ColsAtCompileTime + ), + ")" + ) + ) +} + +; Fixed size matrix (ColMajor and RowMajor support) +Eigen::Matrix<*,*,*,*,*,*>{ + children + ( + #( + [internals]: [$c,!], + rows: $c.RowsAtCompileTime, + cols: $c.ColsAtCompileTime, + ; Check for RowMajorBit + #if ($c.Flags & 0x1) ( + #array( + rank: 2, + base: 0, + expr: ($c.m_storage.m_data.array)[($i % $c.RowsAtCompileTime)*$c.ColsAtCompileTime + (($i- $i % $c.RowsAtCompileTime)/$c.RowsAtCompileTime)], + size: ($r==1)*$c.RowsAtCompileTime+($r==0)*$c.ColsAtCompileTime + ) + ) #else ( + #array( + rank: 2, + base: 0, + expr: ($c.m_storage.m_data.array)[$i], + size: ($r==1)*$c.RowsAtCompileTime+($r==0)*$c.ColsAtCompileTime + ) + ) + ) + ) + + preview + ( + #( + "[", + $c.RowsAtCompileTime, + ",", + $c.ColsAtCompileTime, + "](", + #array( + expr : [($c.m_storage.m_data.array)[$i],g], + size : $c.RowsAtCompileTime*$c.ColsAtCompileTime + ), + ")" + ) + ) +}
diff --git a/lapack/cholesky.inc b/lapack/cholesky.inc index ea3bc12..dea5bf6 100644 --- a/lapack/cholesky.inc +++ b/lapack/cholesky.inc
@@ -11,62 +11,60 @@ #include <Eigen/Cholesky> // POTRF computes the Cholesky factorization of a real symmetric positive definite matrix A. -EIGEN_LAPACK_FUNC(potrf,(char* uplo, int *n, RealScalar *pa, int *lda, int *info)) -{ +EIGEN_LAPACK_FUNC(potrf)(char *uplo, int *n, RealScalar *pa, int *lda, int *info) { *info = 0; - if(UPLO(*uplo)==INVALID) *info = -1; - else if(*n<0) *info = -2; - else if(*lda<std::max(1,*n)) *info = -4; - if(*info!=0) - { + if (UPLO(*uplo) == INVALID) + *info = -1; + else if (*n < 0) + *info = -2; + else if (*lda < std::max(1, *n)) + *info = -4; + if (*info != 0) { int e = -*info; - return xerbla_(SCALAR_SUFFIX_UP"POTRF", &e, 6); + return xerbla_(SCALAR_SUFFIX_UP "POTRF", &e); } - Scalar* a = reinterpret_cast<Scalar*>(pa); - MatrixType A(a,*n,*n,*lda); + Scalar *a = reinterpret_cast<Scalar *>(pa); + MatrixType A(a, *n, *n, *lda); int ret; - if(UPLO(*uplo)==UP) ret = int(internal::llt_inplace<Scalar, Upper>::blocked(A)); - else ret = int(internal::llt_inplace<Scalar, Lower>::blocked(A)); + if (UPLO(*uplo) == UP) + ret = int(internal::llt_inplace<Scalar, Upper>::blocked(A)); + else + ret = int(internal::llt_inplace<Scalar, Lower>::blocked(A)); - if(ret>=0) - *info = ret+1; - - return 0; + if (ret >= 0) *info = ret + 1; } // POTRS solves a system of linear equations A*X = B with a symmetric // positive definite matrix A using the Cholesky factorization // A = U**T*U or A = L*L**T computed by DPOTRF. -EIGEN_LAPACK_FUNC(potrs,(char* uplo, int *n, int *nrhs, RealScalar *pa, int *lda, RealScalar *pb, int *ldb, int *info)) -{ +EIGEN_LAPACK_FUNC(potrs)(char *uplo, int *n, int *nrhs, RealScalar *pa, int *lda, RealScalar *pb, int *ldb, int *info) { *info = 0; - if(UPLO(*uplo)==INVALID) *info = -1; - else if(*n<0) *info = -2; - else if(*nrhs<0) *info = -3; - else if(*lda<std::max(1,*n)) *info = -5; - else if(*ldb<std::max(1,*n)) *info = -7; - if(*info!=0) - { + if (UPLO(*uplo) == INVALID) + *info = -1; + else if (*n < 0) + *info = -2; + else if (*nrhs < 0) + *info = -3; + else if (*lda < std::max(1, *n)) + *info = -5; + else if (*ldb < std::max(1, *n)) + *info = -7; + if (*info != 0) { int e = -*info; - return xerbla_(SCALAR_SUFFIX_UP"POTRS", &e, 6); + return xerbla_(SCALAR_SUFFIX_UP "POTRS", &e); } - Scalar* a = reinterpret_cast<Scalar*>(pa); - Scalar* b = reinterpret_cast<Scalar*>(pb); - MatrixType A(a,*n,*n,*lda); - MatrixType B(b,*n,*nrhs,*ldb); + Scalar *a = reinterpret_cast<Scalar *>(pa); + Scalar *b = reinterpret_cast<Scalar *>(pb); + MatrixType A(a, *n, *n, *lda); + MatrixType B(b, *n, *nrhs, *ldb); - if(UPLO(*uplo)==UP) - { + if (UPLO(*uplo) == UP) { A.triangularView<Upper>().adjoint().solveInPlace(B); A.triangularView<Upper>().solveInPlace(B); - } - else - { + } else { A.triangularView<Lower>().solveInPlace(B); A.triangularView<Lower>().adjoint().solveInPlace(B); } - - return 0; }
diff --git a/lapack/dsecnd.cpp b/lapack/dsecnd.cpp deleted file mode 100644 index b046955..0000000 --- a/lapack/dsecnd.cpp +++ /dev/null
@@ -1,11 +0,0 @@ -#include <ctime> - -extern "C" { -double dsecnd_(); -} - -// Elapsed CPU Time in seconds. -double dsecnd_() { - return static_cast<double>(std::clock()) / - static_cast<double>(CLOCKS_PER_SEC); -}
diff --git a/lapack/eigenvalues.inc b/lapack/eigenvalues.inc index 921c515..6f168de 100644 --- a/lapack/eigenvalues.inc +++ b/lapack/eigenvalues.inc
@@ -11,52 +11,51 @@ #include <Eigen/Eigenvalues> // computes eigen values and vectors of a general N-by-N matrix A -EIGEN_LAPACK_FUNC(syev,(char *jobz, char *uplo, int* n, Scalar* a, int *lda, Scalar* w, Scalar* /*work*/, int* lwork, int *info)) -{ +EIGEN_LAPACK_FUNC(syev) +(char* jobz, char* uplo, int* n, Scalar* a, int* lda, Scalar* w, Scalar* /*work*/, int* lwork, int* info) { // TODO exploit the work buffer - bool query_size = *lwork==-1; - + bool query_size = *lwork == -1; + *info = 0; - if(*jobz!='N' && *jobz!='V') *info = -1; - else if(UPLO(*uplo)==INVALID) *info = -2; - else if(*n<0) *info = -3; - else if(*lda<std::max(1,*n)) *info = -5; - else if((!query_size) && *lwork<std::max(1,3**n-1)) *info = -8; - - if(*info!=0) - { + if (*jobz != 'N' && *jobz != 'V') + *info = -1; + else if (UPLO(*uplo) == INVALID) + *info = -2; + else if (*n < 0) + *info = -3; + else if (*lda < std::max(1, *n)) + *info = -5; + else if ((!query_size) && *lwork < std::max(1, 3 * *n - 1)) + *info = -8; + + if (*info != 0) { int e = -*info; - return xerbla_(SCALAR_SUFFIX_UP"SYEV ", &e, 6); + return xerbla_(SCALAR_SUFFIX_UP "SYEV ", &e); } - - if(query_size) - { + + if (query_size) { *lwork = 0; - return 0; + return; } - - if(*n==0) - return 0; - - PlainMatrixType mat(*n,*n); - if(UPLO(*uplo)==UP) mat = matrix(a,*n,*n,*lda).adjoint(); - else mat = matrix(a,*n,*n,*lda); - - bool computeVectors = *jobz=='V' || *jobz=='v'; - SelfAdjointEigenSolver<PlainMatrixType> eig(mat,computeVectors?ComputeEigenvectors:EigenvaluesOnly); - - if(eig.info()==NoConvergence) - { - make_vector(w,*n).setZero(); - if(computeVectors) - matrix(a,*n,*n,*lda).setIdentity(); + + if (*n == 0) return; + + PlainMatrixType mat(*n, *n); + if (UPLO(*uplo) == UP) + mat = matrix(a, *n, *n, *lda).adjoint(); + else + mat = matrix(a, *n, *n, *lda); + + bool computeVectors = *jobz == 'V' || *jobz == 'v'; + SelfAdjointEigenSolver<PlainMatrixType> eig(mat, computeVectors ? ComputeEigenvectors : EigenvaluesOnly); + + if (eig.info() == NoConvergence) { + make_vector(w, *n).setZero(); + if (computeVectors) matrix(a, *n, *n, *lda).setIdentity(); //*info = 1; - return 0; + return; } - - make_vector(w,*n) = eig.eigenvalues(); - if(computeVectors) - matrix(a,*n,*n,*lda) = eig.eigenvectors(); - - return 0; + + make_vector(w, *n) = eig.eigenvalues(); + if (computeVectors) matrix(a, *n, *n, *lda) = eig.eigenvectors(); }
diff --git a/lapack/lapack.h b/lapack/lapack.h index 9bb222b..88fdb98 100644 --- a/lapack/lapack.h +++ b/lapack/lapack.h
@@ -7,127 +7,127 @@ extern "C" { #endif -int BLASFUNC(csymv)(const char *, const int *, const float *, const float *, const int *, const float *, const int *, - const float *, float *, const int *); -int BLASFUNC(zsymv)(const char *, const int *, const double *, const double *, const int *, const double *, const int *, - const double *, double *, const int *); -int BLASFUNC(xsymv)(const char *, const int *, const double *, const double *, const int *, const double *, const int *, - const double *, double *, const int *); +void BLASFUNC(csymv)(const char *, const int *, const float *, const float *, const int *, const float *, const int *, + const float *, float *, const int *); +void BLASFUNC(zsymv)(const char *, const int *, const double *, const double *, const int *, const double *, + const int *, const double *, double *, const int *); +void BLASFUNC(xsymv)(const char *, const int *, const double *, const double *, const int *, const double *, + const int *, const double *, double *, const int *); -int BLASFUNC(cspmv)(char *, int *, float *, float *, float *, int *, float *, float *, int *); -int BLASFUNC(zspmv)(char *, int *, double *, double *, double *, int *, double *, double *, int *); -int BLASFUNC(xspmv)(char *, int *, double *, double *, double *, int *, double *, double *, int *); +void BLASFUNC(cspmv)(char *, int *, float *, float *, float *, int *, float *, float *, int *); +void BLASFUNC(zspmv)(char *, int *, double *, double *, double *, int *, double *, double *, int *); +void BLASFUNC(xspmv)(char *, int *, double *, double *, double *, int *, double *, double *, int *); -int BLASFUNC(csyr)(char *, int *, float *, float *, int *, float *, int *); -int BLASFUNC(zsyr)(char *, int *, double *, double *, int *, double *, int *); -int BLASFUNC(xsyr)(char *, int *, double *, double *, int *, double *, int *); +void BLASFUNC(csyr)(char *, int *, float *, float *, int *, float *, int *); +void BLASFUNC(zsyr)(char *, int *, double *, double *, int *, double *, int *); +void BLASFUNC(xsyr)(char *, int *, double *, double *, int *, double *, int *); -int BLASFUNC(cspr)(char *, int *, float *, float *, int *, float *); -int BLASFUNC(zspr)(char *, int *, double *, double *, int *, double *); -int BLASFUNC(xspr)(char *, int *, double *, double *, int *, double *); +void BLASFUNC(cspr)(char *, int *, float *, float *, int *, float *); +void BLASFUNC(zspr)(char *, int *, double *, double *, int *, double *); +void BLASFUNC(xspr)(char *, int *, double *, double *, int *, double *); -int BLASFUNC(sgemt)(char *, int *, int *, float *, float *, int *, float *, int *); -int BLASFUNC(dgemt)(char *, int *, int *, double *, double *, int *, double *, int *); -int BLASFUNC(cgemt)(char *, int *, int *, float *, float *, int *, float *, int *); -int BLASFUNC(zgemt)(char *, int *, int *, double *, double *, int *, double *, int *); +void BLASFUNC(sgemt)(char *, int *, int *, float *, float *, int *, float *, int *); +void BLASFUNC(dgemt)(char *, int *, int *, double *, double *, int *, double *, int *); +void BLASFUNC(cgemt)(char *, int *, int *, float *, float *, int *, float *, int *); +void BLASFUNC(zgemt)(char *, int *, int *, double *, double *, int *, double *, int *); -int BLASFUNC(sgema)(char *, char *, int *, int *, float *, float *, int *, float *, float *, int *, float *, int *); -int BLASFUNC(dgema)(char *, char *, int *, int *, double *, double *, int *, double *, double *, int *, double *, - int *); -int BLASFUNC(cgema)(char *, char *, int *, int *, float *, float *, int *, float *, float *, int *, float *, int *); -int BLASFUNC(zgema)(char *, char *, int *, int *, double *, double *, int *, double *, double *, int *, double *, - int *); +void BLASFUNC(sgema)(char *, char *, int *, int *, float *, float *, int *, float *, float *, int *, float *, int *); +void BLASFUNC(dgema)(char *, char *, int *, int *, double *, double *, int *, double *, double *, int *, double *, + int *); +void BLASFUNC(cgema)(char *, char *, int *, int *, float *, float *, int *, float *, float *, int *, float *, int *); +void BLASFUNC(zgema)(char *, char *, int *, int *, double *, double *, int *, double *, double *, int *, double *, + int *); -int BLASFUNC(sgems)(char *, char *, int *, int *, float *, float *, int *, float *, float *, int *, float *, int *); -int BLASFUNC(dgems)(char *, char *, int *, int *, double *, double *, int *, double *, double *, int *, double *, - int *); -int BLASFUNC(cgems)(char *, char *, int *, int *, float *, float *, int *, float *, float *, int *, float *, int *); -int BLASFUNC(zgems)(char *, char *, int *, int *, double *, double *, int *, double *, double *, int *, double *, - int *); +void BLASFUNC(sgems)(char *, char *, int *, int *, float *, float *, int *, float *, float *, int *, float *, int *); +void BLASFUNC(dgems)(char *, char *, int *, int *, double *, double *, int *, double *, double *, int *, double *, + int *); +void BLASFUNC(cgems)(char *, char *, int *, int *, float *, float *, int *, float *, float *, int *, float *, int *); +void BLASFUNC(zgems)(char *, char *, int *, int *, double *, double *, int *, double *, double *, int *, double *, + int *); -int BLASFUNC(sgetf2)(int *, int *, float *, int *, int *, int *); -int BLASFUNC(dgetf2)(int *, int *, double *, int *, int *, int *); -int BLASFUNC(qgetf2)(int *, int *, double *, int *, int *, int *); -int BLASFUNC(cgetf2)(int *, int *, float *, int *, int *, int *); -int BLASFUNC(zgetf2)(int *, int *, double *, int *, int *, int *); -int BLASFUNC(xgetf2)(int *, int *, double *, int *, int *, int *); +void BLASFUNC(sgetf2)(int *, int *, float *, int *, int *, int *); +void BLASFUNC(dgetf2)(int *, int *, double *, int *, int *, int *); +void BLASFUNC(qgetf2)(int *, int *, double *, int *, int *, int *); +void BLASFUNC(cgetf2)(int *, int *, float *, int *, int *, int *); +void BLASFUNC(zgetf2)(int *, int *, double *, int *, int *, int *); +void BLASFUNC(xgetf2)(int *, int *, double *, int *, int *, int *); -int BLASFUNC(sgetrf)(int *, int *, float *, int *, int *, int *); -int BLASFUNC(dgetrf)(int *, int *, double *, int *, int *, int *); -int BLASFUNC(qgetrf)(int *, int *, double *, int *, int *, int *); -int BLASFUNC(cgetrf)(int *, int *, float *, int *, int *, int *); -int BLASFUNC(zgetrf)(int *, int *, double *, int *, int *, int *); -int BLASFUNC(xgetrf)(int *, int *, double *, int *, int *, int *); +void BLASFUNC(sgetrf)(int *, int *, float *, int *, int *, int *); +void BLASFUNC(dgetrf)(int *, int *, double *, int *, int *, int *); +void BLASFUNC(qgetrf)(int *, int *, double *, int *, int *, int *); +void BLASFUNC(cgetrf)(int *, int *, float *, int *, int *, int *); +void BLASFUNC(zgetrf)(int *, int *, double *, int *, int *, int *); +void BLASFUNC(xgetrf)(int *, int *, double *, int *, int *, int *); -int BLASFUNC(slaswp)(int *, float *, int *, int *, int *, int *, int *); -int BLASFUNC(dlaswp)(int *, double *, int *, int *, int *, int *, int *); -int BLASFUNC(qlaswp)(int *, double *, int *, int *, int *, int *, int *); -int BLASFUNC(claswp)(int *, float *, int *, int *, int *, int *, int *); -int BLASFUNC(zlaswp)(int *, double *, int *, int *, int *, int *, int *); -int BLASFUNC(xlaswp)(int *, double *, int *, int *, int *, int *, int *); +void BLASFUNC(slaswp)(int *, float *, int *, int *, int *, int *, int *); +void BLASFUNC(dlaswp)(int *, double *, int *, int *, int *, int *, int *); +void BLASFUNC(qlaswp)(int *, double *, int *, int *, int *, int *, int *); +void BLASFUNC(claswp)(int *, float *, int *, int *, int *, int *, int *); +void BLASFUNC(zlaswp)(int *, double *, int *, int *, int *, int *, int *); +void BLASFUNC(xlaswp)(int *, double *, int *, int *, int *, int *, int *); -int BLASFUNC(sgetrs)(char *, int *, int *, float *, int *, int *, float *, int *, int *); -int BLASFUNC(dgetrs)(char *, int *, int *, double *, int *, int *, double *, int *, int *); -int BLASFUNC(qgetrs)(char *, int *, int *, double *, int *, int *, double *, int *, int *); -int BLASFUNC(cgetrs)(char *, int *, int *, float *, int *, int *, float *, int *, int *); -int BLASFUNC(zgetrs)(char *, int *, int *, double *, int *, int *, double *, int *, int *); -int BLASFUNC(xgetrs)(char *, int *, int *, double *, int *, int *, double *, int *, int *); +void BLASFUNC(sgetrs)(char *, int *, int *, float *, int *, int *, float *, int *, int *); +void BLASFUNC(dgetrs)(char *, int *, int *, double *, int *, int *, double *, int *, int *); +void BLASFUNC(qgetrs)(char *, int *, int *, double *, int *, int *, double *, int *, int *); +void BLASFUNC(cgetrs)(char *, int *, int *, float *, int *, int *, float *, int *, int *); +void BLASFUNC(zgetrs)(char *, int *, int *, double *, int *, int *, double *, int *, int *); +void BLASFUNC(xgetrs)(char *, int *, int *, double *, int *, int *, double *, int *, int *); -int BLASFUNC(sgesv)(int *, int *, float *, int *, int *, float *, int *, int *); -int BLASFUNC(dgesv)(int *, int *, double *, int *, int *, double *, int *, int *); -int BLASFUNC(qgesv)(int *, int *, double *, int *, int *, double *, int *, int *); -int BLASFUNC(cgesv)(int *, int *, float *, int *, int *, float *, int *, int *); -int BLASFUNC(zgesv)(int *, int *, double *, int *, int *, double *, int *, int *); -int BLASFUNC(xgesv)(int *, int *, double *, int *, int *, double *, int *, int *); +void BLASFUNC(sgesv)(int *, int *, float *, int *, int *, float *, int *, int *); +void BLASFUNC(dgesv)(int *, int *, double *, int *, int *, double *, int *, int *); +void BLASFUNC(qgesv)(int *, int *, double *, int *, int *, double *, int *, int *); +void BLASFUNC(cgesv)(int *, int *, float *, int *, int *, float *, int *, int *); +void BLASFUNC(zgesv)(int *, int *, double *, int *, int *, double *, int *, int *); +void BLASFUNC(xgesv)(int *, int *, double *, int *, int *, double *, int *, int *); -int BLASFUNC(spotf2)(char *, int *, float *, int *, int *); -int BLASFUNC(dpotf2)(char *, int *, double *, int *, int *); -int BLASFUNC(qpotf2)(char *, int *, double *, int *, int *); -int BLASFUNC(cpotf2)(char *, int *, float *, int *, int *); -int BLASFUNC(zpotf2)(char *, int *, double *, int *, int *); -int BLASFUNC(xpotf2)(char *, int *, double *, int *, int *); +void BLASFUNC(spotf2)(char *, int *, float *, int *, int *); +void BLASFUNC(dpotf2)(char *, int *, double *, int *, int *); +void BLASFUNC(qpotf2)(char *, int *, double *, int *, int *); +void BLASFUNC(cpotf2)(char *, int *, float *, int *, int *); +void BLASFUNC(zpotf2)(char *, int *, double *, int *, int *); +void BLASFUNC(xpotf2)(char *, int *, double *, int *, int *); -int BLASFUNC(spotrf)(char *, int *, float *, int *, int *); -int BLASFUNC(dpotrf)(char *, int *, double *, int *, int *); -int BLASFUNC(qpotrf)(char *, int *, double *, int *, int *); -int BLASFUNC(cpotrf)(char *, int *, float *, int *, int *); -int BLASFUNC(zpotrf)(char *, int *, double *, int *, int *); -int BLASFUNC(xpotrf)(char *, int *, double *, int *, int *); +void BLASFUNC(spotrf)(char *, int *, float *, int *, int *); +void BLASFUNC(dpotrf)(char *, int *, double *, int *, int *); +void BLASFUNC(qpotrf)(char *, int *, double *, int *, int *); +void BLASFUNC(cpotrf)(char *, int *, float *, int *, int *); +void BLASFUNC(zpotrf)(char *, int *, double *, int *, int *); +void BLASFUNC(xpotrf)(char *, int *, double *, int *, int *); -int BLASFUNC(slauu2)(char *, int *, float *, int *, int *); -int BLASFUNC(dlauu2)(char *, int *, double *, int *, int *); -int BLASFUNC(qlauu2)(char *, int *, double *, int *, int *); -int BLASFUNC(clauu2)(char *, int *, float *, int *, int *); -int BLASFUNC(zlauu2)(char *, int *, double *, int *, int *); -int BLASFUNC(xlauu2)(char *, int *, double *, int *, int *); +void BLASFUNC(slauu2)(char *, int *, float *, int *, int *); +void BLASFUNC(dlauu2)(char *, int *, double *, int *, int *); +void BLASFUNC(qlauu2)(char *, int *, double *, int *, int *); +void BLASFUNC(clauu2)(char *, int *, float *, int *, int *); +void BLASFUNC(zlauu2)(char *, int *, double *, int *, int *); +void BLASFUNC(xlauu2)(char *, int *, double *, int *, int *); -int BLASFUNC(slauum)(char *, int *, float *, int *, int *); -int BLASFUNC(dlauum)(char *, int *, double *, int *, int *); -int BLASFUNC(qlauum)(char *, int *, double *, int *, int *); -int BLASFUNC(clauum)(char *, int *, float *, int *, int *); -int BLASFUNC(zlauum)(char *, int *, double *, int *, int *); -int BLASFUNC(xlauum)(char *, int *, double *, int *, int *); +void BLASFUNC(slauum)(char *, int *, float *, int *, int *); +void BLASFUNC(dlauum)(char *, int *, double *, int *, int *); +void BLASFUNC(qlauum)(char *, int *, double *, int *, int *); +void BLASFUNC(clauum)(char *, int *, float *, int *, int *); +void BLASFUNC(zlauum)(char *, int *, double *, int *, int *); +void BLASFUNC(xlauum)(char *, int *, double *, int *, int *); -int BLASFUNC(strti2)(char *, char *, int *, float *, int *, int *); -int BLASFUNC(dtrti2)(char *, char *, int *, double *, int *, int *); -int BLASFUNC(qtrti2)(char *, char *, int *, double *, int *, int *); -int BLASFUNC(ctrti2)(char *, char *, int *, float *, int *, int *); -int BLASFUNC(ztrti2)(char *, char *, int *, double *, int *, int *); -int BLASFUNC(xtrti2)(char *, char *, int *, double *, int *, int *); +void BLASFUNC(strti2)(char *, char *, int *, float *, int *, int *); +void BLASFUNC(dtrti2)(char *, char *, int *, double *, int *, int *); +void BLASFUNC(qtrti2)(char *, char *, int *, double *, int *, int *); +void BLASFUNC(ctrti2)(char *, char *, int *, float *, int *, int *); +void BLASFUNC(ztrti2)(char *, char *, int *, double *, int *, int *); +void BLASFUNC(xtrti2)(char *, char *, int *, double *, int *, int *); -int BLASFUNC(strtri)(char *, char *, int *, float *, int *, int *); -int BLASFUNC(dtrtri)(char *, char *, int *, double *, int *, int *); -int BLASFUNC(qtrtri)(char *, char *, int *, double *, int *, int *); -int BLASFUNC(ctrtri)(char *, char *, int *, float *, int *, int *); -int BLASFUNC(ztrtri)(char *, char *, int *, double *, int *, int *); -int BLASFUNC(xtrtri)(char *, char *, int *, double *, int *, int *); +void BLASFUNC(strtri)(char *, char *, int *, float *, int *, int *); +void BLASFUNC(dtrtri)(char *, char *, int *, double *, int *, int *); +void BLASFUNC(qtrtri)(char *, char *, int *, double *, int *, int *); +void BLASFUNC(ctrtri)(char *, char *, int *, float *, int *, int *); +void BLASFUNC(ztrtri)(char *, char *, int *, double *, int *, int *); +void BLASFUNC(xtrtri)(char *, char *, int *, double *, int *, int *); -int BLASFUNC(spotri)(char *, int *, float *, int *, int *); -int BLASFUNC(dpotri)(char *, int *, double *, int *, int *); -int BLASFUNC(qpotri)(char *, int *, double *, int *, int *); -int BLASFUNC(cpotri)(char *, int *, float *, int *, int *); -int BLASFUNC(zpotri)(char *, int *, double *, int *, int *); -int BLASFUNC(xpotri)(char *, int *, double *, int *, int *); +void BLASFUNC(spotri)(char *, int *, float *, int *, int *); +void BLASFUNC(dpotri)(char *, int *, double *, int *, int *); +void BLASFUNC(qpotri)(char *, int *, double *, int *, int *); +void BLASFUNC(cpotri)(char *, int *, float *, int *, int *); +void BLASFUNC(zpotri)(char *, int *, double *, int *, int *); +void BLASFUNC(xpotri)(char *, int *, double *, int *, int *); #ifdef __cplusplus }
diff --git a/lapack/lapack_common.h b/lapack/lapack_common.h index 37d1f14..726fabf 100644 --- a/lapack/lapack_common.h +++ b/lapack/lapack_common.h
@@ -13,19 +13,8 @@ #include "../blas/common.h" #include "lapack.h" -#define EIGEN_LAPACK_FUNC(FUNC, ARGLIST) \ - extern "C" { \ - int EIGEN_BLAS_FUNC(FUNC) ARGLIST; \ - } \ - int EIGEN_BLAS_FUNC(FUNC) \ - ARGLIST +#define EIGEN_LAPACK_FUNC(FUNC) EIGEN_BLAS_FUNC(FUNC) typedef Eigen::Map<Eigen::Transpositions<Eigen::Dynamic, Eigen::Dynamic, int> > PivotsType; -#if ISCOMPLEX -#define EIGEN_LAPACK_ARG_IF_COMPLEX(X) X, -#else -#define EIGEN_LAPACK_ARG_IF_COMPLEX(X) -#endif - #endif // EIGEN_LAPACK_COMMON_H
diff --git a/lapack/lu.inc b/lapack/lu.inc index 90cebe0..d30c8ce 100644 --- a/lapack/lu.inc +++ b/lapack/lu.inc
@@ -11,79 +11,70 @@ #include <Eigen/LU> // computes an LU factorization of a general M-by-N matrix A using partial pivoting with row interchanges -EIGEN_LAPACK_FUNC(getrf,(int *m, int *n, RealScalar *pa, int *lda, int *ipiv, int *info)) -{ +EIGEN_LAPACK_FUNC(getrf)(int *m, int *n, RealScalar *pa, int *lda, int *ipiv, int *info) { *info = 0; - if(*m<0) *info = -1; - else if(*n<0) *info = -2; - else if(*lda<std::max(1,*m)) *info = -4; - if(*info!=0) - { + if (*m < 0) + *info = -1; + else if (*n < 0) + *info = -2; + else if (*lda < std::max(1, *m)) + *info = -4; + if (*info != 0) { int e = -*info; - return xerbla_(SCALAR_SUFFIX_UP"GETRF", &e, 6); + return xerbla_(SCALAR_SUFFIX_UP "GETRF", &e); } - if(*m==0 || *n==0) - return 0; + if (*m == 0 || *n == 0) return; - Scalar* a = reinterpret_cast<Scalar*>(pa); + Scalar *a = reinterpret_cast<Scalar *>(pa); int nb_transpositions; - int ret = int(Eigen::internal::partial_lu_impl<Scalar,ColMajor,int> - ::blocked_lu(*m, *n, a, *lda, ipiv, nb_transpositions)); + int ret = int(Eigen::internal::partial_lu_impl<Scalar, Eigen::ColMajor, int>::blocked_lu(*m, *n, a, *lda, ipiv, + nb_transpositions)); - for(int i=0; i<std::min(*m,*n); ++i) - ipiv[i]++; + for (int i = 0; i < std::min(*m, *n); ++i) ipiv[i]++; - if(ret>=0) - *info = ret+1; - - return 0; + if (ret >= 0) *info = ret + 1; } -//GETRS solves a system of linear equations -// A * X = B or A' * X = B -// with a general N-by-N matrix A using the LU factorization computed by GETRF -EIGEN_LAPACK_FUNC(getrs,(char *trans, int *n, int *nrhs, RealScalar *pa, int *lda, int *ipiv, RealScalar *pb, int *ldb, int *info)) -{ +// GETRS solves a system of linear equations +// A * X = B or A' * X = B +// with a general N-by-N matrix A using the LU factorization computed by GETRF +EIGEN_LAPACK_FUNC(getrs) +(char *trans, int *n, int *nrhs, RealScalar *pa, int *lda, int *ipiv, RealScalar *pb, int *ldb, int *info) { *info = 0; - if(OP(*trans)==INVALID) *info = -1; - else if(*n<0) *info = -2; - else if(*nrhs<0) *info = -3; - else if(*lda<std::max(1,*n)) *info = -5; - else if(*ldb<std::max(1,*n)) *info = -8; - if(*info!=0) - { + if (OP(*trans) == INVALID) + *info = -1; + else if (*n < 0) + *info = -2; + else if (*nrhs < 0) + *info = -3; + else if (*lda < std::max(1, *n)) + *info = -5; + else if (*ldb < std::max(1, *n)) + *info = -8; + if (*info != 0) { int e = -*info; - return xerbla_(SCALAR_SUFFIX_UP"GETRS", &e, 6); + return xerbla_(SCALAR_SUFFIX_UP "GETRS", &e); } - Scalar* a = reinterpret_cast<Scalar*>(pa); - Scalar* b = reinterpret_cast<Scalar*>(pb); - MatrixType lu(a,*n,*n,*lda); - MatrixType B(b,*n,*nrhs,*ldb); + Scalar *a = reinterpret_cast<Scalar *>(pa); + Scalar *b = reinterpret_cast<Scalar *>(pb); + MatrixType lu(a, *n, *n, *lda); + MatrixType B(b, *n, *nrhs, *ldb); - for(int i=0; i<*n; ++i) - ipiv[i]--; - if(OP(*trans)==NOTR) - { - B = PivotsType(ipiv,*n) * B; + for (int i = 0; i < *n; ++i) ipiv[i]--; + if (OP(*trans) == NOTR) { + B = PivotsType(ipiv, *n) * B; lu.triangularView<UnitLower>().solveInPlace(B); lu.triangularView<Upper>().solveInPlace(B); - } - else if(OP(*trans)==TR) - { + } else if (OP(*trans) == TR) { lu.triangularView<Upper>().transpose().solveInPlace(B); lu.triangularView<UnitLower>().transpose().solveInPlace(B); - B = PivotsType(ipiv,*n).transpose() * B; - } - else if(OP(*trans)==ADJ) - { + B = PivotsType(ipiv, *n).transpose() * B; + } else if (OP(*trans) == ADJ) { lu.triangularView<Upper>().adjoint().solveInPlace(B); lu.triangularView<UnitLower>().adjoint().solveInPlace(B); - B = PivotsType(ipiv,*n).transpose() * B; + B = PivotsType(ipiv, *n).transpose() * B; } - for(int i=0; i<*n; ++i) - ipiv[i]++; - - return 0; + for (int i = 0; i < *n; ++i) ipiv[i]++; }
diff --git a/lapack/second.cpp b/lapack/second.cpp deleted file mode 100644 index faa6187..0000000 --- a/lapack/second.cpp +++ /dev/null
@@ -1,11 +0,0 @@ -#include <ctime> - -extern "C" { -double second_(); -} - -// Elapsed CPU Time in seconds. -double second_() { - return static_cast<double>(std::clock()) / - static_cast<double>(CLOCKS_PER_SEC); -} \ No newline at end of file
diff --git a/lapack/svd.inc b/lapack/svd.inc index 83544cf..8e45310 100644 --- a/lapack/svd.inc +++ b/lapack/svd.inc
@@ -10,129 +10,141 @@ #include "lapack_common.h" #include <Eigen/SVD> +#if ISCOMPLEX +#define EIGEN_LAPACK_ARG_IF_COMPLEX(X) X, +#else +#define EIGEN_LAPACK_ARG_IF_COMPLEX(X) +#endif + // computes the singular values/vectors a general M-by-N matrix A using divide-and-conquer -EIGEN_LAPACK_FUNC(gesdd,(char *jobz, int *m, int* n, Scalar* a, int *lda, RealScalar *s, Scalar *u, int *ldu, Scalar *vt, int *ldvt, Scalar* /*work*/, int* lwork, - EIGEN_LAPACK_ARG_IF_COMPLEX(RealScalar */*rwork*/) int * /*iwork*/, int *info)) -{ +EIGEN_LAPACK_FUNC(gesdd) +(char *jobz, int *m, int *n, Scalar *a, int *lda, RealScalar *s, Scalar *u, int *ldu, Scalar *vt, int *ldvt, + Scalar * /*work*/, int *lwork, EIGEN_LAPACK_ARG_IF_COMPLEX(RealScalar * /*rwork*/) int * /*iwork*/, int *info) { // TODO exploit the work buffer - bool query_size = *lwork==-1; - int diag_size = (std::min)(*m,*n); - + bool query_size = *lwork == -1; + int diag_size = (std::min)(*m, *n); + *info = 0; - if(*jobz!='A' && *jobz!='S' && *jobz!='O' && *jobz!='N') *info = -1; - else if(*m<0) *info = -2; - else if(*n<0) *info = -3; - else if(*lda<std::max(1,*m)) *info = -5; - else if(*lda<std::max(1,*m)) *info = -8; - else if(*ldu <1 || (*jobz=='A' && *ldu <*m) - || (*jobz=='O' && *m<*n && *ldu<*m)) *info = -8; - else if(*ldvt<1 || (*jobz=='A' && *ldvt<*n) - || (*jobz=='S' && *ldvt<diag_size) - || (*jobz=='O' && *m>=*n && *ldvt<*n)) *info = -10; - - if(*info!=0) - { + if (*jobz != 'A' && *jobz != 'S' && *jobz != 'O' && *jobz != 'N') + *info = -1; + else if (*m < 0) + *info = -2; + else if (*n < 0) + *info = -3; + else if (*lda < std::max(1, *m)) + *info = -5; + else if (*lda < std::max(1, *m)) + *info = -8; + else if (*ldu < 1 || (*jobz == 'A' && *ldu < *m) || (*jobz == 'O' && *m < *n && *ldu < *m)) + *info = -8; + else if (*ldvt < 1 || (*jobz == 'A' && *ldvt < *n) || (*jobz == 'S' && *ldvt < diag_size) || + (*jobz == 'O' && *m >= *n && *ldvt < *n)) + *info = -10; + + if (*info != 0) { int e = -*info; - return xerbla_(SCALAR_SUFFIX_UP"GESDD ", &e, 6); + return xerbla_(SCALAR_SUFFIX_UP "GESDD ", &e); } - - if(query_size) - { + + if (query_size) { *lwork = 0; - return 0; + return; } - - if(*n==0 || *m==0) - return 0; - - PlainMatrixType mat(*m,*n); - mat = matrix(a,*m,*n,*lda); - - int option = *jobz=='A' ? ComputeFullU|ComputeFullV - : *jobz=='S' ? ComputeThinU|ComputeThinV - : *jobz=='O' ? ComputeThinU|ComputeThinV - : 0; - BDCSVD<PlainMatrixType> svd(mat,option); - - make_vector(s,diag_size) = svd.singularValues().head(diag_size); + if (*n == 0 || *m == 0) return; - if(*jobz=='A') - { - matrix(u,*m,*m,*ldu) = svd.matrixU(); - matrix(vt,*n,*n,*ldvt) = svd.matrixV().adjoint(); + PlainMatrixType mat(*m, *n); + mat = matrix(a, *m, *n, *lda); + + int option = *jobz == 'A' ? ComputeFullU | ComputeFullV + : *jobz == 'S' ? ComputeThinU | ComputeThinV + : *jobz == 'O' ? ComputeThinU | ComputeThinV + : 0; + + BDCSVD<PlainMatrixType> svd(mat, option); + + make_vector(s, diag_size) = svd.singularValues().head(diag_size); + + if (*jobz == 'A') { + matrix(u, *m, *m, *ldu) = svd.matrixU(); + matrix(vt, *n, *n, *ldvt) = svd.matrixV().adjoint(); + } else if (*jobz == 'S') { + matrix(u, *m, diag_size, *ldu) = svd.matrixU(); + matrix(vt, diag_size, *n, *ldvt) = svd.matrixV().adjoint(); + } else if (*jobz == 'O' && *m >= *n) { + matrix(a, *m, *n, *lda) = svd.matrixU(); + matrix(vt, *n, *n, *ldvt) = svd.matrixV().adjoint(); + } else if (*jobz == 'O') { + matrix(u, *m, *m, *ldu) = svd.matrixU(); + matrix(a, diag_size, *n, *lda) = svd.matrixV().adjoint(); } - else if(*jobz=='S') - { - matrix(u,*m,diag_size,*ldu) = svd.matrixU(); - matrix(vt,diag_size,*n,*ldvt) = svd.matrixV().adjoint(); - } - else if(*jobz=='O' && *m>=*n) - { - matrix(a,*m,*n,*lda) = svd.matrixU(); - matrix(vt,*n,*n,*ldvt) = svd.matrixV().adjoint(); - } - else if(*jobz=='O') - { - matrix(u,*m,*m,*ldu) = svd.matrixU(); - matrix(a,diag_size,*n,*lda) = svd.matrixV().adjoint(); - } - - return 0; } // computes the singular values/vectors a general M-by-N matrix A using two sided jacobi algorithm -EIGEN_LAPACK_FUNC(gesvd,(char *jobu, char *jobv, int *m, int* n, Scalar* a, int *lda, RealScalar *s, Scalar *u, int *ldu, Scalar *vt, int *ldvt, Scalar* /*work*/, int* lwork, - EIGEN_LAPACK_ARG_IF_COMPLEX(RealScalar */*rwork*/) int *info)) -{ +EIGEN_LAPACK_FUNC(gesvd) +(char *jobu, char *jobv, int *m, int *n, Scalar *a, int *lda, RealScalar *s, Scalar *u, int *ldu, Scalar *vt, int *ldvt, + Scalar * /*work*/, int *lwork, EIGEN_LAPACK_ARG_IF_COMPLEX(RealScalar * /*rwork*/) int *info) { // TODO exploit the work buffer - bool query_size = *lwork==-1; - int diag_size = (std::min)(*m,*n); - + bool query_size = *lwork == -1; + int diag_size = (std::min)(*m, *n); + *info = 0; - if( *jobu!='A' && *jobu!='S' && *jobu!='O' && *jobu!='N') *info = -1; - else if((*jobv!='A' && *jobv!='S' && *jobv!='O' && *jobv!='N') - || (*jobu=='O' && *jobv=='O')) *info = -2; - else if(*m<0) *info = -3; - else if(*n<0) *info = -4; - else if(*lda<std::max(1,*m)) *info = -6; - else if(*ldu <1 || ((*jobu=='A' || *jobu=='S') && *ldu<*m)) *info = -9; - else if(*ldvt<1 || (*jobv=='A' && *ldvt<*n) - || (*jobv=='S' && *ldvt<diag_size)) *info = -11; - - if(*info!=0) - { + if (*jobu != 'A' && *jobu != 'S' && *jobu != 'O' && *jobu != 'N') + *info = -1; + else if ((*jobv != 'A' && *jobv != 'S' && *jobv != 'O' && *jobv != 'N') || (*jobu == 'O' && *jobv == 'O')) + *info = -2; + else if (*m < 0) + *info = -3; + else if (*n < 0) + *info = -4; + else if (*lda < std::max(1, *m)) + *info = -6; + else if (*ldu < 1 || ((*jobu == 'A' || *jobu == 'S') && *ldu < *m)) + *info = -9; + else if (*ldvt < 1 || (*jobv == 'A' && *ldvt < *n) || (*jobv == 'S' && *ldvt < diag_size)) + *info = -11; + + if (*info != 0) { int e = -*info; - return xerbla_(SCALAR_SUFFIX_UP"GESVD ", &e, 6); + return xerbla_(SCALAR_SUFFIX_UP "GESVD ", &e); } - - if(query_size) - { + + if (query_size) { *lwork = 0; - return 0; + return; } - - if(*n==0 || *m==0) - return 0; - - PlainMatrixType mat(*m,*n); - mat = matrix(a,*m,*n,*lda); - - int option = (*jobu=='A' ? ComputeFullU : *jobu=='S' || *jobu=='O' ? ComputeThinU : 0) - | (*jobv=='A' ? ComputeFullV : *jobv=='S' || *jobv=='O' ? ComputeThinV : 0); - - JacobiSVD<PlainMatrixType> svd(mat,option); - - make_vector(s,diag_size) = svd.singularValues().head(diag_size); + + if (*n == 0 || *m == 0) return; + + PlainMatrixType mat(*m, *n); + mat = matrix(a, *m, *n, *lda); + + int option = (*jobu == 'A' ? ComputeFullU + : *jobu == 'S' || *jobu == 'O' ? ComputeThinU + : 0) | + (*jobv == 'A' ? ComputeFullV + : *jobv == 'S' || *jobv == 'O' ? ComputeThinV + : 0); + + JacobiSVD<PlainMatrixType> svd(mat, option); + + make_vector(s, diag_size) = svd.singularValues().head(diag_size); { - if(*jobu=='A') matrix(u,*m,*m,*ldu) = svd.matrixU(); - else if(*jobu=='S') matrix(u,*m,diag_size,*ldu) = svd.matrixU(); - else if(*jobu=='O') matrix(a,*m,diag_size,*lda) = svd.matrixU(); + if (*jobu == 'A') + matrix(u, *m, *m, *ldu) = svd.matrixU(); + else if (*jobu == 'S') + matrix(u, *m, diag_size, *ldu) = svd.matrixU(); + else if (*jobu == 'O') + matrix(a, *m, diag_size, *lda) = svd.matrixU(); } { - if(*jobv=='A') matrix(vt,*n,*n,*ldvt) = svd.matrixV().adjoint(); - else if(*jobv=='S') matrix(vt,diag_size,*n,*ldvt) = svd.matrixV().adjoint(); - else if(*jobv=='O') matrix(a,diag_size,*n,*lda) = svd.matrixV().adjoint(); + if (*jobv == 'A') + matrix(vt, *n, *n, *ldvt) = svd.matrixV().adjoint(); + else if (*jobv == 'S') + matrix(vt, diag_size, *n, *ldvt) = svd.matrixV().adjoint(); + else if (*jobv == 'O') + matrix(a, diag_size, *n, *lda) = svd.matrixV().adjoint(); } - return 0; -} \ No newline at end of file +} + +#undef EIGEN_LAPACK_ARG_IF_COMPLEX \ No newline at end of file
diff --git a/test/AnnoyingScalar.h b/test/AnnoyingScalar.h index b4f77ad..637fdbf 100644 --- a/test/AnnoyingScalar.h +++ b/test/AnnoyingScalar.h
@@ -183,6 +183,20 @@ EIGEN_STRONG_INLINE float cast(const AnnoyingScalar& x) { return *x.v; } + +template <> +struct random_impl<AnnoyingScalar> { + using Impl = random_impl<float>; + static EIGEN_DEVICE_FUNC inline AnnoyingScalar run(const AnnoyingScalar& x, const AnnoyingScalar& y) { + float result = Impl::run(*x.v, *y.v); + return AnnoyingScalar(result); + } + static EIGEN_DEVICE_FUNC inline AnnoyingScalar run() { + float result = Impl::run(); + return AnnoyingScalar(result); + } +}; + } // namespace internal } // namespace Eigen
diff --git a/test/MovableScalar.h b/test/MovableScalar.h index 52591bc..56a873e 100644 --- a/test/MovableScalar.h +++ b/test/MovableScalar.h
@@ -28,6 +28,23 @@ template <> struct NumTraits<MovableScalar<float>> : GenericNumTraits<float> {}; + +namespace internal { +template <typename T> +struct random_impl<MovableScalar<T>> { + using MoveableT = MovableScalar<T>; + using Impl = random_impl<T>; + static EIGEN_DEVICE_FUNC inline MoveableT run(const MoveableT& x, const MoveableT& y) { + T result = Impl::run(x, y); + return MoveableT(result); + } + static EIGEN_DEVICE_FUNC inline MoveableT run() { + T result = Impl::run(); + return MoveableT(result); + } +}; +} // namespace internal + } // namespace Eigen #endif
diff --git a/test/SafeScalar.h b/test/SafeScalar.h index 2079af4..4f4da56 100644 --- a/test/SafeScalar.h +++ b/test/SafeScalar.h
@@ -26,3 +26,21 @@ T val_; bool initialized_; }; + +namespace Eigen { +namespace internal { +template <typename T> +struct random_impl<SafeScalar<T>> { + using SafeT = SafeScalar<T>; + using Impl = random_impl<T>; + static EIGEN_DEVICE_FUNC inline SafeT run(const SafeT& x, const SafeT& y) { + T result = Impl::run(x, y); + return SafeT(result); + } + static EIGEN_DEVICE_FUNC inline SafeT run() { + T result = Impl::run(); + return SafeT(result); + } +}; +} // namespace internal +} // namespace Eigen
diff --git a/test/array_for_matrix.cpp b/test/array_for_matrix.cpp index afe6894..237ac67 100644 --- a/test/array_for_matrix.cpp +++ b/test/array_for_matrix.cpp
@@ -19,12 +19,18 @@ Index cols = m.cols(); MatrixType m1 = MatrixType::Random(rows, cols), m2 = MatrixType::Random(rows, cols), m3(rows, cols); - ColVectorType cv1 = ColVectorType::Random(rows); RowVectorType rv1 = RowVectorType::Random(cols); Scalar s1 = internal::random<Scalar>(), s2 = internal::random<Scalar>(); + // Prevent overflows for integer types. + if (Eigen::NumTraits<Scalar>::IsInteger) { + Scalar kMaxVal = Scalar(10000); + m1.array() = m1.array() - kMaxVal * (m1.array() / kMaxVal); + m2.array() = m2.array() - kMaxVal * (m2.array() / kMaxVal); + } + // scalar addition VERIFY_IS_APPROX(m1.array() + s1, s1 + m1.array()); VERIFY_IS_APPROX((m1.array() + s1).matrix(), MatrixType::Constant(rows, cols, s1) + m1);
diff --git a/test/bdcsvd.cpp b/test/bdcsvd.cpp index c51f354..3ba4cb7 100644 --- a/test/bdcsvd.cpp +++ b/test/bdcsvd.cpp
@@ -77,6 +77,14 @@ svd_verify_constructor_options_assert<BDCSVD<MatrixType>>(input); } +template <typename MatrixType> +void bdcsvd_check_convergence(const MatrixType& input) { + BDCSVD<MatrixType, Eigen::ComputeThinU | Eigen::ComputeThinV> svd(input); + VERIFY(svd.info() == Eigen::Success); + MatrixType D = svd.matrixU() * svd.singularValues().asDiagonal() * svd.matrixV().transpose(); + VERIFY_IS_APPROX(input, D); +} + EIGEN_DECLARE_TEST(bdcsvd) { CALL_SUBTEST_1((bdcsvd_verify_assert<Matrix3f>())); CALL_SUBTEST_2((bdcsvd_verify_assert<Matrix4d>())); @@ -163,4 +171,7 @@ // With total deflation issues before, when it shouldn't be triggered. CALL_SUBTEST_47((compare_bdc_jacobi_instance(true, 3))); CALL_SUBTEST_48((compare_bdc_jacobi_instance(false, 3))); + + // Convergence for large constant matrix (https://gitlab.com/libeigen/eigen/-/issues/2491) + CALL_SUBTEST_49(bdcsvd_check_convergence<MatrixXf>(MatrixXf::Constant(500, 500, 1))); }
diff --git a/test/cholmod_support.cpp b/test/cholmod_support.cpp index 24126a0..0666749 100644 --- a/test/cholmod_support.cpp +++ b/test/cholmod_support.cpp
@@ -54,6 +54,13 @@ check_sparse_spd_determinant(llt_colmajor_upper); check_sparse_spd_determinant(ldlt_colmajor_lower); check_sparse_spd_determinant(ldlt_colmajor_upper); + + check_sparse_zero_matrix(chol_colmajor_lower); + check_sparse_zero_matrix(chol_colmajor_upper); + check_sparse_zero_matrix(llt_colmajor_lower); + check_sparse_zero_matrix(llt_colmajor_upper); + check_sparse_zero_matrix(ldlt_colmajor_lower); + check_sparse_zero_matrix(ldlt_colmajor_upper); } template <typename T, int flags, typename IdxType>
diff --git a/test/geo_alignedbox.cpp b/test/geo_alignedbox.cpp index 266e58b..f335b34 100644 --- a/test/geo_alignedbox.cpp +++ b/test/geo_alignedbox.cpp
@@ -36,10 +36,10 @@ const Index dim = box.dim(); - VectorType p0 = VectorType::Random(dim); - VectorType p1 = VectorType::Random(dim); + VectorType p0 = VectorType::Random(dim) / Scalar(2); + VectorType p1 = VectorType::Random(dim) / Scalar(2); while (p1 == p0) { - p1 = VectorType::Random(dim); + p1 = VectorType::Random(dim) / Scalar(2); } RealScalar s1 = internal::random<RealScalar>(0, 1); @@ -216,7 +216,7 @@ Matrix<Scalar, Dim, (1 << Dim)> boxGetCorners(const Matrix<Scalar, Dim, 1>& min_, const Matrix<Scalar, Dim, 1>& max_) { Matrix<Scalar, Dim, (1 << Dim)> result; for (Index i = 0; i < (1 << Dim); ++i) { - for (Index j = 0; j < Dim; ++j) result(j, i) = (i & (1 << j)) ? min_(j) : max_(j); + for (Index j = 0; j < Dim; ++j) result(j, i) = (i & (Index(1) << j)) ? min_(j) : max_(j); } return result; }
diff --git a/test/indexed_view.cpp b/test/indexed_view.cpp index 494e0d6..4040448 100644 --- a/test/indexed_view.cpp +++ b/test/indexed_view.cpp
@@ -7,8 +7,8 @@ // Public License v. 2.0. If a copy of the MPL was not distributed // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. -#include <valarray> #include <vector> + #include "main.h" using Eigen::placeholders::all; @@ -17,11 +17,13 @@ using Eigen::placeholders::lastp1; #include <array> +namespace test { typedef std::pair<Index, Index> IndexPair; +} int encode(Index i, Index j) { return int(i * 100 + j); } -IndexPair decode(Index ij) { return IndexPair(ij / 100, ij % 100); } +test::IndexPair decode(Index ij) { return test::IndexPair(ij / 100, ij % 100); } template <typename T> bool match(const T& xpr, std::string ref, std::string str_xpr = "") { @@ -69,12 +71,10 @@ ArrayXXi A = ArrayXXi::NullaryExpr(n, n, std::ref(encode)); for (Index i = 0; i < n; ++i) - for (Index j = 0; j < n; ++j) VERIFY(decode(A(i, j)) == IndexPair(i, j)); + for (Index j = 0; j < n; ++j) VERIFY(decode(A(i, j)) == test::IndexPair(i, j)); Array4i eii(4); eii << 3, 1, 6, 5; - std::valarray<int> vali(4); - Map<ArrayXi>(&vali[0], 4) = eii; std::vector<int> veci(4); Map<ArrayXi>(veci.data(), 4) = eii;
diff --git a/test/main.h b/test/main.h index 771725f..bce6736 100644 --- a/test/main.h +++ b/test/main.h
@@ -176,11 +176,6 @@ #define DEBUG #endif -// bounds integer values for AltiVec -#if defined(__ALTIVEC__) || defined(__VSX__) -#define EIGEN_MAKING_DOCS -#endif - #define DEFAULT_REPEAT 10 namespace Eigen {
diff --git a/test/packetmath.cpp b/test/packetmath.cpp index b2cca73..bf2970c 100644 --- a/test/packetmath.cpp +++ b/test/packetmath.cpp
@@ -382,12 +382,6 @@ } }; -// Packet16b representing bool does not support ptrue, pandnot or pcmp_eq, since -// the scalar path (for some compilers) compute the bitwise and with 0x1 of the -// results to keep the value in [0,1]. -template <> -void packetmath_boolean_mask_ops<bool, internal::packet_traits<bool>::type>() {} - template <typename Scalar, typename Packet, typename EnableIf = void> struct packetmath_minus_zero_add_test { static void run() {} @@ -538,7 +532,7 @@ CHECK_CWISE2_IF(PacketTraits::HasMul, REF_MUL, internal::pmul); CHECK_CWISE2_IF(PacketTraits::HasDiv, REF_DIV, internal::pdiv); - CHECK_CWISE1_IF(PacketTraits::HasNegate, internal::negate, internal::pnegate); + CHECK_CWISE1_IF(PacketTraits::HasNegate, test::negate, internal::pnegate); CHECK_CWISE1_IF(PacketTraits::HasReciprocal, REF_RECIPROCAL, internal::preciprocal); CHECK_CWISE1(numext::conj, internal::pconj); CHECK_CWISE1_IF(PacketTraits::HasSign, numext::sign, internal::psign); @@ -662,11 +656,11 @@ { for (int i = 0; i < PacketSize; ++i) { // "if" mask - unsigned char v = internal::random<bool>() ? 0xff : 0; - char* bytes = (char*)(data1 + i); - for (int k = 0; k < int(sizeof(Scalar)); ++k) { - bytes[k] = v; - } + // Note: it's UB to load 0xFF directly into a `bool`. + uint8_t v = + internal::random<bool>() ? (std::is_same<Scalar, bool>::value ? static_cast<uint8_t>(true) : 0xff) : 0; + // Avoid strict aliasing violation by using memset. + memset(static_cast<void*>(data1 + i), v, sizeof(Scalar)); // "then" packet data1[i + PacketSize] = internal::random<Scalar>(); // "else" packet @@ -1141,7 +1135,7 @@ data1[0] = -Scalar(0.); h.store(data2, internal::psin(h.load(data1))); - VERIFY(internal::biteq(data2[0], data1[0])); + VERIFY(test::biteq(data2[0], data1[0])); h.store(data2, internal::pcos(h.load(data1))); VERIFY_IS_EQUAL(data2[0], Scalar(1)); } @@ -1220,6 +1214,15 @@ CHECK_CWISE2_IF(PacketTraits::HasMin, propagate_number_min, internal::pmin<PropagateNumbers>); CHECK_CWISE2_IF(PacketTraits::HasMax, propagate_number_max, internal::pmax<PropagateNumbers>); CHECK_CWISE1(numext::abs, internal::pabs); + // Vectorized versions may give a different result in the case of signed int overflow, + // which is undefined behavior (e.g. NEON). + // Also note that unsigned integers with size < sizeof(int) may be implicitly converted to a signed + // int, which can also trigger UB. + if (Eigen::NumTraits<Scalar>::IsInteger) { + for (int i = 0; i < 2 * PacketSize; ++i) { + data1[i] = data1[i] / Scalar(2); + } + } CHECK_CWISE2_IF(PacketTraits::HasAbsDiff, REF_ABS_DIFF, internal::pabsdiff); ref[0] = data1[0];
diff --git a/test/packetmath_test_shared.h b/test/packetmath_test_shared.h index a4bb347..86a01fb 100644 --- a/test/packetmath_test_shared.h +++ b/test/packetmath_test_shared.h
@@ -19,7 +19,8 @@ bool g_first_pass = true; namespace Eigen { -namespace internal { + +namespace test { template <typename T> T negate(const T& x) { @@ -31,50 +32,11 @@ return Map<const Array<unsigned char, sizeof(T), 1> >(reinterpret_cast<const unsigned char*>(&x)); } -// The following implement bitwise operations on floating point types -template <typename T, typename Bits, typename Func> -T apply_bit_op(Bits a, Bits b, Func f) { - Array<unsigned char, sizeof(T), 1> data; - T res; - for (Index i = 0; i < data.size(); ++i) data[i] = f(a[i], b[i]); - // Note: The reinterpret_cast works around GCC's class-memaccess warnings: - std::memcpy(reinterpret_cast<unsigned char*>(&res), data.data(), sizeof(T)); - return res; -} - -#define EIGEN_TEST_MAKE_BITWISE2(OP, FUNC, T) \ - template <> \ - T EIGEN_CAT(p, OP)(const T& a, const T& b) { \ - return apply_bit_op<T>(bits(a), bits(b), FUNC); \ - } - -#define EIGEN_TEST_MAKE_BITWISE(OP, FUNC) \ - EIGEN_TEST_MAKE_BITWISE2(OP, FUNC, float) \ - EIGEN_TEST_MAKE_BITWISE2(OP, FUNC, double) \ - EIGEN_TEST_MAKE_BITWISE2(OP, FUNC, half) \ - EIGEN_TEST_MAKE_BITWISE2(OP, FUNC, bfloat16) \ - EIGEN_TEST_MAKE_BITWISE2(OP, FUNC, std::complex<float>) \ - EIGEN_TEST_MAKE_BITWISE2(OP, FUNC, std::complex<double>) - -EIGEN_TEST_MAKE_BITWISE(xor, std::bit_xor<unsigned char>()) -EIGEN_TEST_MAKE_BITWISE(and, std::bit_and<unsigned char>()) -EIGEN_TEST_MAKE_BITWISE(or, std::bit_or<unsigned char>()) -struct bit_andnot { - template <typename T> - T operator()(T a, T b) const { - return a & (~b); - } -}; -EIGEN_TEST_MAKE_BITWISE(andnot, bit_andnot()) template <typename T> bool biteq(T a, T b) { return (bits(a) == bits(b)).all(); } -} // namespace internal - -namespace test { - // NOTE: we disable inlining for this function to workaround a GCC issue when using -O3 and the i387 FPU. template <typename Scalar> EIGEN_DONT_INLINE bool isApproxAbs(const Scalar& a, const Scalar& b, const typename NumTraits<Scalar>::Real& refvalue) {
diff --git a/test/product.h b/test/product.h index 8d46846..74f01b0 100644 --- a/test/product.h +++ b/test/product.h
@@ -53,7 +53,7 @@ MatrixType::Flags & RowMajorBit ? ColMajor : RowMajor> OtherMajorMatrixType; - // Wwe want a tighter epsilon for not-approx tests. Otherwise, for certain + // We want a tighter epsilon for not-approx tests. Otherwise, for certain // low-precision types (e.g. bfloat16), the bound ends up being relatively large // (e.g. 0.12), causing flaky tests. RealScalar not_approx_epsilon = RealScalar(0.1) * NumTraits<RealScalar>::dummy_precision(); @@ -69,6 +69,15 @@ ColSquareMatrixType square2 = ColSquareMatrixType::Random(cols, cols), res2 = ColSquareMatrixType::Random(cols, cols); RowVectorType v1 = RowVectorType::Random(rows); ColVectorType vc2 = ColVectorType::Random(cols), vcres(cols); + + // Prevent overflows for integer types. + if (Eigen::NumTraits<Scalar>::IsInteger) { + Scalar kMaxVal = Scalar(10000); + m1.array() = m1.array() - kMaxVal * (m1.array() / kMaxVal); + m2.array() = m2.array() - kMaxVal * (m2.array() / kMaxVal); + v1.array() = v1.array() - kMaxVal * (v1.array() / kMaxVal); + } + OtherMajorMatrixType tm1 = m1; Scalar s1 = internal::random<Scalar>();
diff --git a/test/rand.cpp b/test/rand.cpp index 2b193ff..b5cf801 100644 --- a/test/rand.cpp +++ b/test/rand.cpp
@@ -7,12 +7,14 @@ // Public License v. 2.0. If a copy of the MPL was not distributed // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. +#include <cstdlib> #include "main.h" -typedef long long int64; - +// For GCC-6, if this function is inlined then there seems to be an optimization +// bug that triggers a failure. This failure goes away if you access `r` in +// in any way, and for any other compiler. template <typename Scalar> -Scalar check_in_range(Scalar x, Scalar y) { +EIGEN_DONT_INLINE Scalar check_in_range(Scalar x, Scalar y) { Scalar r = internal::random<Scalar>(x, y); VERIFY(r >= x); if (y >= x) { @@ -25,8 +27,8 @@ void check_all_in_range(Scalar x, Scalar y) { Array<int, 1, Dynamic> mask(y - x + 1); mask.fill(0); - long n = (y - x + 1) * 32; - for (long k = 0; k < n; ++k) { + int64_t n = (y - x + 1) * 32; + for (int64_t k = 0; k < n; ++k) { mask(check_in_range(x, y) - x)++; } for (Index i = 0; i < mask.size(); ++i) @@ -34,82 +36,203 @@ VERIFY((mask > 0).all()); } +template <typename Scalar, typename EnableIf = void> +class HistogramHelper { + public: + HistogramHelper(int nbins) : HistogramHelper(Scalar(-1), Scalar(1), nbins) {} + HistogramHelper(Scalar lower, Scalar upper, int nbins) { + lower_ = static_cast<double>(lower); + upper_ = static_cast<double>(upper); + num_bins_ = nbins; + bin_width_ = (upper_ - lower_) / static_cast<double>(nbins); + } + int bin(Scalar v) { + double result = (static_cast<double>(v) - lower_) / bin_width_; + return std::min<int>(static_cast<int>(result), num_bins_ - 1); + } + + double uniform_bin_probability(int bin) { + double range = upper_ - lower_; + if (bin < num_bins_ - 1) { + return bin_width_ / range; + } + return (upper_ - (lower_ + double(bin) * bin_width_)) / range; + } + + private: + double lower_; + double upper_; + int num_bins_; + double bin_width_; +}; + +template <typename Scalar> +class HistogramHelper<Scalar, std::enable_if_t<Eigen::NumTraits<Scalar>::IsInteger>> { + public: + using RangeType = typename Eigen::internal::make_unsigned<Scalar>::type; + HistogramHelper(int nbins) + : HistogramHelper(Eigen::NumTraits<Scalar>::lowest(), Eigen::NumTraits<Scalar>::highest(), nbins) {} + HistogramHelper(Scalar lower, Scalar upper, int nbins) + : lower_{lower}, upper_{upper}, num_bins_{nbins}, bin_width_{bin_width(lower, upper, nbins)} {} + + int bin(Scalar v) { return static_cast<int>(RangeType(v - lower_) / bin_width_); } + + double uniform_bin_probability(int bin) { + // Avoid overflow in computing range. + double range = static_cast<double>(RangeType(upper_ - lower_)) + 1.0; + if (bin < num_bins_ - 1) { + return static_cast<double>(bin_width_) / range; + } + return static_cast<double>(RangeType(upper_) - RangeType((lower_ + bin * bin_width_)) + 1) / range; + } + + private: + static constexpr Scalar bin_width(Scalar lower, Scalar upper, int nbins) { + // Avoid overflow in computing the full range. + return RangeType(upper - nbins - lower + 1) / nbins + 1; + } + + Scalar lower_; + Scalar upper_; + int num_bins_; + Scalar bin_width_; +}; + template <typename Scalar> void check_histogram(Scalar x, Scalar y, int bins) { - Array<int, 1, Dynamic> hist(bins); - hist.fill(0); - int f = 100000; - int n = bins * f; - int64 range = int64(y) - int64(x); - int divisor = int((range + 1) / bins); - assert(((range + 1) % bins) == 0); - for (int k = 0; k < n; ++k) { + Eigen::VectorXd hist = Eigen::VectorXd::Zero(bins); + HistogramHelper<Scalar> hist_helper(x, y, bins); + int64_t n = static_cast<int64_t>(bins) * 10000; // Approx 10000 per bin. + for (int64_t k = 0; k < n; ++k) { Scalar r = check_in_range(x, y); - hist(int((int64(r) - int64(x)) / divisor))++; + int bin = hist_helper.bin(r); + hist(bin)++; } - VERIFY((((hist.cast<double>() / double(f)) - 1.0).abs() < 0.03).all()); + // Normalize bins by probability. + for (int i = 0; i < bins; ++i) { + hist(i) = hist(i) / n / hist_helper.uniform_bin_probability(i); + } + VERIFY(((hist.array() - 1.0).abs() < 0.05).all()); +} + +template <typename Scalar> +void check_histogram(int bins) { + Eigen::VectorXd hist = Eigen::VectorXd::Zero(bins); + HistogramHelper<Scalar> hist_helper(bins); + int64_t n = static_cast<int64_t>(bins) * 10000; // Approx 10000 per bin. + for (int64_t k = 0; k < n; ++k) { + Scalar r = Eigen::internal::random<Scalar>(); + int bin = hist_helper.bin(r); + hist(bin)++; + } + // Normalize bins by probability. + for (int i = 0; i < bins; ++i) { + hist(i) = hist(i) / n / hist_helper.uniform_bin_probability(i); + } + VERIFY(((hist.array() - 1.0).abs() < 0.05).all()); } EIGEN_DECLARE_TEST(rand) { - long long_ref = NumTraits<long>::highest() / 10; + int64_t int64_ref = NumTraits<int64_t>::highest() / 10; // the minimum guarantees that these conversions are safe - auto char_offset = static_cast<signed char>((std::min)(g_repeat, 64)); - auto short_offset = static_cast<signed short>((std::min)(g_repeat, 8000)); + int8_t int8t_offset = static_cast<int8_t>((std::min)(g_repeat, 64)); + int16_t int16t_offset = static_cast<int16_t>((std::min)(g_repeat, 8000)); + EIGEN_UNUSED_VARIABLE(int64_ref); + EIGEN_UNUSED_VARIABLE(int8t_offset); + EIGEN_UNUSED_VARIABLE(int16t_offset); for (int i = 0; i < g_repeat * 10000; i++) { - CALL_SUBTEST(check_in_range<float>(10, 11)); - CALL_SUBTEST(check_in_range<float>(1.24234523f, 1.24234523f)); - CALL_SUBTEST(check_in_range<float>(-1, 1)); - CALL_SUBTEST(check_in_range<float>(-1432.2352f, -1432.2352f)); + CALL_SUBTEST_1(check_in_range<float>(10.0f, 11.0f)); + CALL_SUBTEST_1(check_in_range<float>(1.24234523f, 1.24234523f)); + CALL_SUBTEST_1(check_in_range<float>(-1.0f, 1.0f)); + CALL_SUBTEST_1(check_in_range<float>(-1432.2352f, -1432.2352f)); - CALL_SUBTEST(check_in_range<double>(10, 11)); - CALL_SUBTEST(check_in_range<double>(1.24234523, 1.24234523)); - CALL_SUBTEST(check_in_range<double>(-1, 1)); - CALL_SUBTEST(check_in_range<double>(-1432.2352, -1432.2352)); + CALL_SUBTEST_2(check_in_range<double>(10.0, 11.0)); + CALL_SUBTEST_2(check_in_range<double>(1.24234523, 1.24234523)); + CALL_SUBTEST_2(check_in_range<double>(-1.0, 1.0)); + CALL_SUBTEST_2(check_in_range<double>(-1432.2352, -1432.2352)); - CALL_SUBTEST(check_in_range<int>(0, -1)); - CALL_SUBTEST(check_in_range<short>(0, -1)); - CALL_SUBTEST(check_in_range<long>(0, -1)); - CALL_SUBTEST(check_in_range<int>(-673456, 673456)); - CALL_SUBTEST(check_in_range<int>(-RAND_MAX + 10, RAND_MAX - 10)); - CALL_SUBTEST(check_in_range<short>(-24345, 24345)); - CALL_SUBTEST(check_in_range<long>(-long_ref, long_ref)); + CALL_SUBTEST_3(check_in_range<long double>(10.0L, 11.0L)); + CALL_SUBTEST_3(check_in_range<long double>(1.24234523L, 1.24234523L)); + CALL_SUBTEST_3(check_in_range<long double>(-1.0L, 1.0L)); + CALL_SUBTEST_3(check_in_range<long double>(-1432.2352L, -1432.2352L)); + + CALL_SUBTEST_4(check_in_range<half>(half(10.0f), half(11.0f))); + CALL_SUBTEST_4(check_in_range<half>(half(1.24234523f), half(1.24234523f))); + CALL_SUBTEST_4(check_in_range<half>(half(-1.0f), half(1.0f))); + CALL_SUBTEST_4(check_in_range<half>(half(-1432.2352f), half(-1432.2352f))); + + CALL_SUBTEST_5(check_in_range<bfloat16>(bfloat16(10.0f), bfloat16(11.0f))); + CALL_SUBTEST_5(check_in_range<bfloat16>(bfloat16(1.24234523f), bfloat16(1.24234523f))); + CALL_SUBTEST_5(check_in_range<bfloat16>(bfloat16(-1.0f), bfloat16(1.0f))); + CALL_SUBTEST_5(check_in_range<bfloat16>(bfloat16(-1432.2352f), bfloat16(-1432.2352f))); + + CALL_SUBTEST_6(check_in_range<int32_t>(0, -1)); + CALL_SUBTEST_6(check_in_range<int16_t>(0, -1)); + CALL_SUBTEST_6(check_in_range<int64_t>(0, -1)); + CALL_SUBTEST_6(check_in_range<int32_t>(-673456, 673456)); + CALL_SUBTEST_6(check_in_range<int32_t>(-RAND_MAX + 10, RAND_MAX - 10)); + CALL_SUBTEST_6(check_in_range<int16_t>(-24345, 24345)); + CALL_SUBTEST_6(check_in_range<int64_t>(-int64_ref, int64_ref)); } - CALL_SUBTEST(check_all_in_range<signed char>(11, 11)); - CALL_SUBTEST(check_all_in_range<signed char>(11, 11 + char_offset)); - CALL_SUBTEST(check_all_in_range<signed char>(-5, 5)); - CALL_SUBTEST(check_all_in_range<signed char>(-11 - char_offset, -11)); - CALL_SUBTEST(check_all_in_range<signed char>(-126, -126 + char_offset)); - CALL_SUBTEST(check_all_in_range<signed char>(126 - char_offset, 126)); - CALL_SUBTEST(check_all_in_range<signed char>(-126, 126)); + CALL_SUBTEST_7(check_all_in_range<int8_t>(11, 11)); + CALL_SUBTEST_7(check_all_in_range<int8_t>(11, 11 + int8t_offset)); + CALL_SUBTEST_7(check_all_in_range<int8_t>(-5, 5)); + CALL_SUBTEST_7(check_all_in_range<int8_t>(-11 - int8t_offset, -11)); + CALL_SUBTEST_7(check_all_in_range<int8_t>(-126, -126 + int8t_offset)); + CALL_SUBTEST_7(check_all_in_range<int8_t>(126 - int8t_offset, 126)); + CALL_SUBTEST_7(check_all_in_range<int8_t>(-126, 126)); - CALL_SUBTEST(check_all_in_range<short>(11, 11)); - CALL_SUBTEST(check_all_in_range<short>(11, 11 + short_offset)); - CALL_SUBTEST(check_all_in_range<short>(-5, 5)); - CALL_SUBTEST(check_all_in_range<short>(-11 - short_offset, -11)); - CALL_SUBTEST(check_all_in_range<short>(-24345, -24345 + short_offset)); - CALL_SUBTEST(check_all_in_range<short>(24345, 24345 + short_offset)); + CALL_SUBTEST_8(check_all_in_range<int16_t>(11, 11)); + CALL_SUBTEST_8(check_all_in_range<int16_t>(11, 11 + int16t_offset)); + CALL_SUBTEST_8(check_all_in_range<int16_t>(-5, 5)); + CALL_SUBTEST_8(check_all_in_range<int16_t>(-11 - int16t_offset, -11)); + CALL_SUBTEST_8(check_all_in_range<int16_t>(-24345, -24345 + int16t_offset)); + CALL_SUBTEST_8(check_all_in_range<int16_t>(24345, 24345 + int16t_offset)); - CALL_SUBTEST(check_all_in_range<int>(11, 11)); - CALL_SUBTEST(check_all_in_range<int>(11, 11 + g_repeat)); - CALL_SUBTEST(check_all_in_range<int>(-5, 5)); - CALL_SUBTEST(check_all_in_range<int>(-11 - g_repeat, -11)); - CALL_SUBTEST(check_all_in_range<int>(-673456, -673456 + g_repeat)); - CALL_SUBTEST(check_all_in_range<int>(673456, 673456 + g_repeat)); + CALL_SUBTEST_9(check_all_in_range<int32_t>(11, 11)); + CALL_SUBTEST_9(check_all_in_range<int32_t>(11, 11 + g_repeat)); + CALL_SUBTEST_9(check_all_in_range<int32_t>(-5, 5)); + CALL_SUBTEST_9(check_all_in_range<int32_t>(-11 - g_repeat, -11)); + CALL_SUBTEST_9(check_all_in_range<int32_t>(-673456, -673456 + g_repeat)); + CALL_SUBTEST_9(check_all_in_range<int32_t>(673456, 673456 + g_repeat)); - CALL_SUBTEST(check_all_in_range<long>(11, 11)); - CALL_SUBTEST(check_all_in_range<long>(11, 11 + g_repeat)); - CALL_SUBTEST(check_all_in_range<long>(-5, 5)); - CALL_SUBTEST(check_all_in_range<long>(-11 - g_repeat, -11)); - CALL_SUBTEST(check_all_in_range<long>(-long_ref, -long_ref + g_repeat)); - CALL_SUBTEST(check_all_in_range<long>(long_ref, long_ref + g_repeat)); + CALL_SUBTEST_10(check_all_in_range<int64_t>(11, 11)); + CALL_SUBTEST_10(check_all_in_range<int64_t>(11, 11 + g_repeat)); + CALL_SUBTEST_10(check_all_in_range<int64_t>(-5, 5)); + CALL_SUBTEST_10(check_all_in_range<int64_t>(-11 - g_repeat, -11)); + CALL_SUBTEST_10(check_all_in_range<int64_t>(-int64_ref, -int64_ref + g_repeat)); + CALL_SUBTEST_10(check_all_in_range<int64_t>(int64_ref, int64_ref + g_repeat)); - CALL_SUBTEST(check_histogram<int>(-5, 5, 11)); + CALL_SUBTEST_11(check_histogram<int32_t>(-5, 5, 11)); int bins = 100; - CALL_SUBTEST(check_histogram<int>(-3333, -3333 + bins * (3333 / bins) - 1, bins)); + EIGEN_UNUSED_VARIABLE(bins) + CALL_SUBTEST_11(check_histogram<int32_t>(-3333, -3333 + bins * (3333 / bins) - 1, bins)); bins = 1000; - CALL_SUBTEST(check_histogram<int>(-RAND_MAX + 10, -RAND_MAX + 10 + bins * (RAND_MAX / bins) - 1, bins)); - CALL_SUBTEST( - check_histogram<int>(-RAND_MAX + 10, -int64(RAND_MAX) + 10 + bins * (2 * int64(RAND_MAX) / bins) - 1, bins)); + CALL_SUBTEST_11(check_histogram<int32_t>(-RAND_MAX + 10, -RAND_MAX + 10 + bins * (RAND_MAX / bins) - 1, bins)); + CALL_SUBTEST_11(check_histogram<int32_t>(-RAND_MAX + 10, + -int64_t(RAND_MAX) + 10 + bins * (2 * int64_t(RAND_MAX) / bins) - 1, bins)); + + CALL_SUBTEST_12(check_histogram<uint8_t>(/*bins=*/16)); + CALL_SUBTEST_12(check_histogram<uint16_t>(/*bins=*/1024)); + CALL_SUBTEST_12(check_histogram<uint32_t>(/*bins=*/1024)); + CALL_SUBTEST_12(check_histogram<uint64_t>(/*bins=*/1024)); + + CALL_SUBTEST_13(check_histogram<int8_t>(/*bins=*/16)); + CALL_SUBTEST_13(check_histogram<int16_t>(/*bins=*/1024)); + CALL_SUBTEST_13(check_histogram<int32_t>(/*bins=*/1024)); + CALL_SUBTEST_13(check_histogram<int64_t>(/*bins=*/1024)); + + CALL_SUBTEST_14(check_histogram<float>(-10.0f, 10.0f, /*bins=*/1024)); + CALL_SUBTEST_14(check_histogram<double>(-10.0, 10.0, /*bins=*/1024)); + CALL_SUBTEST_14(check_histogram<long double>(-10.0L, 10.0L, /*bins=*/1024)); + CALL_SUBTEST_14(check_histogram<half>(half(-10.0f), half(10.0f), /*bins=*/512)); + CALL_SUBTEST_14(check_histogram<bfloat16>(bfloat16(-10.0f), bfloat16(10.0f), /*bins=*/64)); + + CALL_SUBTEST_15(check_histogram<float>(/*bins=*/1024)); + CALL_SUBTEST_15(check_histogram<double>(/*bins=*/1024)); + CALL_SUBTEST_15(check_histogram<long double>(/*bins=*/1024)); + CALL_SUBTEST_15(check_histogram<half>(/*bins=*/512)); + CALL_SUBTEST_15(check_histogram<bfloat16>(/*bins=*/64)); }
diff --git a/test/redux.cpp b/test/redux.cpp index 8a8138d..42c269a 100644 --- a/test/redux.cpp +++ b/test/redux.cpp
@@ -24,12 +24,18 @@ MatrixType m1 = MatrixType::Random(rows, cols); - // The entries of m1 are uniformly distributed in [0,1], so m1.prod() is very small. This may lead to test + // The entries of m1 are uniformly distributed in [-1,1), so m1.prod() is very small. This may lead to test // failures if we underflow into denormals. Thus, we scale so that entries are close to 1. MatrixType m1_for_prod = MatrixType::Ones(rows, cols) + RealScalar(0.2) * m1; Matrix<Scalar, MatrixType::RowsAtCompileTime, MatrixType::RowsAtCompileTime> m2(rows, rows); m2.setRandom(); + // Prevent overflows for integer types. + if (Eigen::NumTraits<Scalar>::IsInteger) { + Scalar kMaxVal = Scalar(10000); + m1.array() = m1.array() - kMaxVal * (m1.array() / kMaxVal); + m2.array() = m2.array() - kMaxVal * (m2.array() / kMaxVal); + } VERIFY_IS_MUCH_SMALLER_THAN(MatrixType::Zero(rows, cols).sum(), Scalar(1)); VERIFY_IS_APPROX(
diff --git a/test/skew_symmetric_matrix3.cpp b/test/skew_symmetric_matrix3.cpp index d45262f..48d490d 100644 --- a/test/skew_symmetric_matrix3.cpp +++ b/test/skew_symmetric_matrix3.cpp
@@ -177,7 +177,7 @@ // rotate around z-axis Vector v2; - v2 << 0, 0, EIGEN_PI; + v2 << 0, 0, Scalar(EIGEN_PI); const SquareMatrix r2 = v2.asSkewSymmetric().exponential(); VERIFY_IS_APPROX(r2 * (Vector() << 1, 0, 0).finished(), (Vector() << -1, 0, 0).finished()); VERIFY_IS_APPROX(r2 * (Vector() << 0, 1, 0).finished(), (Vector() << 0, -1, 0).finished()); @@ -196,8 +196,8 @@ CALL_SUBTEST_2(plusMinus<double>()); CALL_SUBTEST_2(multiplyScale<float>()); CALL_SUBTEST_2(multiplyScale<double>()); - CALL_SUBTEST_2(skewSymmetricMultiplication(MatrixXf(3, internal::random<int>(1, EIGEN_TEST_MAX_SIZE)))); - CALL_SUBTEST_2(skewSymmetricMultiplication(MatrixXd(3, internal::random<int>(1, EIGEN_TEST_MAX_SIZE)))); + CALL_SUBTEST_2(skewSymmetricMultiplication(MatrixXf(3, internal::random<int>(3, EIGEN_TEST_MAX_SIZE)))); + CALL_SUBTEST_2(skewSymmetricMultiplication(MatrixXd(3, internal::random<int>(3, EIGEN_TEST_MAX_SIZE)))); CALL_SUBTEST_2(traceAndDet<float>()); CALL_SUBTEST_2(traceAndDet<double>()); CALL_SUBTEST_2(transpose<float>());
diff --git a/test/sparse_solver.h b/test/sparse_solver.h index e7518e4..033df83 100644 --- a/test/sparse_solver.h +++ b/test/sparse_solver.h
@@ -484,6 +484,15 @@ } } +template <typename Solver> +void check_sparse_zero_matrix(Solver& solver) { + typedef typename Solver::MatrixType Mat; + + Mat A(1, 1); + solver.compute(A); + VERIFY_IS_EQUAL(solver.info(), NumericalIssue); +} + template <typename Solver, typename DenseMat> Index generate_sparse_square_problem(Solver&, typename Solver::MatrixType& A, DenseMat& dA, int maxSize = 300, int options = ForceNonZeroDiag) {
diff --git a/test/stl_iterators.cpp b/test/stl_iterators.cpp index 7a62673..fee8ef5 100644 --- a/test/stl_iterators.cpp +++ b/test/stl_iterators.cpp
@@ -463,6 +463,13 @@ // check rows/cols iterators with STL algorithms { RowVectorType row = RowVectorType::Random(cols); + VectorType col = VectorType::Random(rows); + // Prevent overflows for integer types. + if (Eigen::NumTraits<Scalar>::IsInteger) { + Scalar kMaxVal = Scalar(1000); + row.array() = row.array() - kMaxVal * (row.array() / kMaxVal); + col.array() = col.array() - kMaxVal * (col.array() / kMaxVal); + } A.rowwise() = row; VERIFY(std::all_of(A.rowwise().begin(), A.rowwise().end(), [&row](typename ColMatrixType::RowXpr x) { return internal::isApprox(x.squaredNorm(), row.squaredNorm()); @@ -471,7 +478,6 @@ return internal::isApprox(x.squaredNorm(), row.squaredNorm()); })); - VectorType col = VectorType::Random(rows); A.colwise() = col; VERIFY(std::all_of(A.colwise().begin(), A.colwise().end(), [&col](typename ColMatrixType::ColXpr x) { return internal::isApprox(x.squaredNorm(), col.squaredNorm());
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h b/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h index 9375398..f9f07d4 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h
@@ -1007,13 +1007,14 @@ #include EIGEN_READONLY_TENSORBASE_PLUGIN #endif + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const Derived& derived() const { return *static_cast<const Derived*>(this); } + protected: template <typename Scalar, int NumIndices, int Options, typename IndexType> friend class Tensor; template <typename Scalar, typename Dimensions, int Option, typename IndexTypes> friend class TensorFixedSize; // the Eigen:: prefix is required to workaround a compilation issue with nvcc 9.0 template <typename OtherDerived, int AccessLevel> friend class Eigen::TensorBase; - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE const Derived& derived() const { return *static_cast<const Derived*>(this); } }; template<typename Derived, int AccessLevel = internal::accessors_level<Derived>::value> @@ -1199,6 +1200,11 @@ return TensorAsyncDevice<Derived, DeviceType, DoneCallback>(dev, derived(), std::move(done)); } + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE Derived& derived() { return *static_cast<Derived*>(this); } + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const Derived& derived() const { return *static_cast<const Derived*>(this); } + #ifdef EIGEN_TENSORBASE_PLUGIN #include EIGEN_TENSORBASE_PLUGIN #endif @@ -1215,10 +1221,6 @@ internal::TensorExecutor<const Assign, DefaultDevice>::run(assign, DefaultDevice()); return derived(); } - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE Derived& derived() { return *static_cast<Derived*>(this); } - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE const Derived& derived() const { return *static_cast<const Derived*>(this); } }; #endif // EIGEN_PARSED_BY_DOXYGEN } // end namespace Eigen
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorGlobalFunctions.h b/unsupported/Eigen/CXX11/src/Tensor/TensorGlobalFunctions.h index 8d330b5..6a1240c 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorGlobalFunctions.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorGlobalFunctions.h
@@ -23,9 +23,12 @@ template <typename ADerived, typename BDerived, typename XDerived> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const TensorCwiseTernaryOp<internal::scalar_betainc_op<typename XDerived::Scalar>, const ADerived, const BDerived, const XDerived> -betainc(const ADerived& a, const BDerived& b, const XDerived& x) { +betainc(const Eigen::TensorBase<ADerived, ReadOnlyAccessors>& a, + const Eigen::TensorBase<BDerived, ReadOnlyAccessors>& b, + const Eigen::TensorBase<XDerived, ReadOnlyAccessors>& x) { return TensorCwiseTernaryOp<internal::scalar_betainc_op<typename XDerived::Scalar>, const ADerived, const BDerived, - const XDerived>(a, b, x, internal::scalar_betainc_op<typename XDerived::Scalar>()); + const XDerived>(a.derived(), b.derived(), x.derived(), + internal::scalar_betainc_op<typename XDerived::Scalar>()); } } // end namespace Eigen