blob: 05a5827a96cb9b3cf402cd0ecbeb761a90d93c44 [file] [log] [blame]
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
// Copyright (C) 2009 Gael Guennebaud <gael.guennebaud@inria.fr>
//
// This Source Code Form is subject to the terms of the Mozilla
// Public License v. 2.0. If a copy of the MPL was not distributed
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
#ifndef EIGEN_TRIANGULARMATRIXVECTOR_H
#define EIGEN_TRIANGULARMATRIXVECTOR_H
// IWYU pragma: private
#include "../InternalHeaderCheck.h"
namespace Eigen {
namespace internal {
template <typename Index, int Mode, typename LhsScalar, bool ConjLhs, typename RhsScalar, bool ConjRhs,
int StorageOrder, int Version = Specialized>
struct triangular_matrix_vector_product;
template <typename Index, int Mode, typename LhsScalar, bool ConjLhs, typename RhsScalar, bool ConjRhs, int Version>
struct triangular_matrix_vector_product<Index, Mode, LhsScalar, ConjLhs, RhsScalar, ConjRhs, ColMajor, Version> {
typedef typename ScalarBinaryOpTraits<LhsScalar, RhsScalar>::ReturnType ResScalar;
static constexpr bool IsLower = ((Mode & Lower) == Lower);
static constexpr bool HasUnitDiag = (Mode & UnitDiag) == UnitDiag;
static constexpr bool HasZeroDiag = (Mode & ZeroDiag) == ZeroDiag;
static EIGEN_DONT_INLINE void run(Index _rows, Index _cols, const LhsScalar* lhs_, Index lhsStride,
const RhsScalar* rhs_, Index rhsIncr, ResScalar* res_, Index resIncr,
const RhsScalar& alpha);
};
template <typename Index, int Mode, typename LhsScalar, bool ConjLhs, typename RhsScalar, bool ConjRhs, int Version>
EIGEN_DONT_INLINE void triangular_matrix_vector_product<Index, Mode, LhsScalar, ConjLhs, RhsScalar, ConjRhs, ColMajor,
Version>::run(Index _rows, Index _cols, const LhsScalar* lhs_,
Index lhsStride, const RhsScalar* rhs_,
Index rhsIncr, ResScalar* res_, Index resIncr,
const RhsScalar& alpha) {
static const Index PanelWidth = EIGEN_TUNE_TRIANGULAR_PANEL_WIDTH;
Index size = (std::min)(_rows, _cols);
Index rows = IsLower ? _rows : (std::min)(_rows, _cols);
Index cols = IsLower ? (std::min)(_rows, _cols) : _cols;
typedef Map<const Matrix<LhsScalar, Dynamic, Dynamic, ColMajor>, 0, OuterStride<> > LhsMap;
const LhsMap lhs(lhs_, rows, cols, OuterStride<>(lhsStride));
typename conj_expr_if<ConjLhs, LhsMap>::type cjLhs(lhs);
typedef Map<const Matrix<RhsScalar, Dynamic, 1>, 0, InnerStride<> > RhsMap;
const RhsMap rhs(rhs_, cols, InnerStride<>(rhsIncr));
typename conj_expr_if<ConjRhs, RhsMap>::type cjRhs(rhs);
typedef Map<Matrix<ResScalar, Dynamic, 1> > ResMap;
ResMap res(res_, rows);
typedef const_blas_data_mapper<LhsScalar, Index, ColMajor> LhsMapper;
typedef const_blas_data_mapper<RhsScalar, Index, RowMajor> RhsMapper;
for (Index pi = 0; pi < size; pi += PanelWidth) {
Index actualPanelWidth = (std::min)(PanelWidth, size - pi);
for (Index k = 0; k < actualPanelWidth; ++k) {
Index i = pi + k;
Index s = IsLower ? ((HasUnitDiag || HasZeroDiag) ? i + 1 : i) : pi;
Index r = IsLower ? actualPanelWidth - k : k + 1;
if ((!(HasUnitDiag || HasZeroDiag)) || (--r) > 0)
res.segment(s, r) += (alpha * cjRhs.coeff(i)) * cjLhs.col(i).segment(s, r);
if (HasUnitDiag) res.coeffRef(i) += alpha * cjRhs.coeff(i);
}
Index r = IsLower ? rows - pi - actualPanelWidth : pi;
if (r > 0) {
Index s = IsLower ? pi + actualPanelWidth : 0;
general_matrix_vector_product<Index, LhsScalar, LhsMapper, ColMajor, ConjLhs, RhsScalar, RhsMapper, ConjRhs,
BuiltIn>::run(r, actualPanelWidth, LhsMapper(&lhs.coeffRef(s, pi), lhsStride),
RhsMapper(&rhs.coeffRef(pi), rhsIncr), &res.coeffRef(s), resIncr,
alpha);
}
}
if ((!IsLower) && cols > size) {
general_matrix_vector_product<Index, LhsScalar, LhsMapper, ColMajor, ConjLhs, RhsScalar, RhsMapper, ConjRhs>::run(
rows, cols - size, LhsMapper(&lhs.coeffRef(0, size), lhsStride), RhsMapper(&rhs.coeffRef(size), rhsIncr), res_,
resIncr, alpha);
}
}
template <typename Index, int Mode, typename LhsScalar, bool ConjLhs, typename RhsScalar, bool ConjRhs, int Version>
struct triangular_matrix_vector_product<Index, Mode, LhsScalar, ConjLhs, RhsScalar, ConjRhs, RowMajor, Version> {
typedef typename ScalarBinaryOpTraits<LhsScalar, RhsScalar>::ReturnType ResScalar;
static constexpr bool IsLower = ((Mode & Lower) == Lower);
static constexpr bool HasUnitDiag = (Mode & UnitDiag) == UnitDiag;
static constexpr bool HasZeroDiag = (Mode & ZeroDiag) == ZeroDiag;
static EIGEN_DONT_INLINE void run(Index _rows, Index _cols, const LhsScalar* lhs_, Index lhsStride,
const RhsScalar* rhs_, Index rhsIncr, ResScalar* res_, Index resIncr,
const ResScalar& alpha);
};
template <typename Index, int Mode, typename LhsScalar, bool ConjLhs, typename RhsScalar, bool ConjRhs, int Version>
EIGEN_DONT_INLINE void triangular_matrix_vector_product<Index, Mode, LhsScalar, ConjLhs, RhsScalar, ConjRhs, RowMajor,
Version>::run(Index _rows, Index _cols, const LhsScalar* lhs_,
Index lhsStride, const RhsScalar* rhs_,
Index rhsIncr, ResScalar* res_, Index resIncr,
const ResScalar& alpha) {
static const Index PanelWidth = EIGEN_TUNE_TRIANGULAR_PANEL_WIDTH;
Index diagSize = (std::min)(_rows, _cols);
Index rows = IsLower ? _rows : diagSize;
Index cols = IsLower ? diagSize : _cols;
typedef Map<const Matrix<LhsScalar, Dynamic, Dynamic, RowMajor>, 0, OuterStride<> > LhsMap;
const LhsMap lhs(lhs_, rows, cols, OuterStride<>(lhsStride));
typename conj_expr_if<ConjLhs, LhsMap>::type cjLhs(lhs);
typedef Map<const Matrix<RhsScalar, Dynamic, 1> > RhsMap;
const RhsMap rhs(rhs_, cols);
typename conj_expr_if<ConjRhs, RhsMap>::type cjRhs(rhs);
typedef Map<Matrix<ResScalar, Dynamic, 1>, 0, InnerStride<> > ResMap;
ResMap res(res_, rows, InnerStride<>(resIncr));
typedef const_blas_data_mapper<LhsScalar, Index, RowMajor> LhsMapper;
typedef const_blas_data_mapper<RhsScalar, Index, RowMajor> RhsMapper;
for (Index pi = 0; pi < diagSize; pi += PanelWidth) {
Index actualPanelWidth = (std::min)(PanelWidth, diagSize - pi);
for (Index k = 0; k < actualPanelWidth; ++k) {
Index i = pi + k;
Index s = IsLower ? pi : ((HasUnitDiag || HasZeroDiag) ? i + 1 : i);
Index r = IsLower ? k + 1 : actualPanelWidth - k;
if ((!(HasUnitDiag || HasZeroDiag)) || (--r) > 0)
res.coeffRef(i) += alpha * (cjLhs.row(i).segment(s, r).cwiseProduct(cjRhs.segment(s, r).transpose())).sum();
if (HasUnitDiag) res.coeffRef(i) += alpha * cjRhs.coeff(i);
}
Index r = IsLower ? pi : cols - pi - actualPanelWidth;
if (r > 0) {
Index s = IsLower ? 0 : pi + actualPanelWidth;
general_matrix_vector_product<Index, LhsScalar, LhsMapper, RowMajor, ConjLhs, RhsScalar, RhsMapper, ConjRhs,
BuiltIn>::run(actualPanelWidth, r, LhsMapper(&lhs.coeffRef(pi, s), lhsStride),
RhsMapper(&rhs.coeffRef(s), rhsIncr), &res.coeffRef(pi), resIncr,
alpha);
}
}
if (IsLower && rows > diagSize) {
general_matrix_vector_product<Index, LhsScalar, LhsMapper, RowMajor, ConjLhs, RhsScalar, RhsMapper, ConjRhs>::run(
rows - diagSize, cols, LhsMapper(&lhs.coeffRef(diagSize, 0), lhsStride), RhsMapper(&rhs.coeffRef(0), rhsIncr),
&res.coeffRef(diagSize), resIncr, alpha);
}
}
/***************************************************************************
* Wrapper to product_triangular_vector
***************************************************************************/
template <int Mode, int StorageOrder>
struct trmv_selector;
} // end namespace internal
namespace internal {
template <int Mode, typename Lhs, typename Rhs>
struct triangular_product_impl<Mode, true, Lhs, false, Rhs, true> {
template <typename Dest>
static void run(Dest& dst, const Lhs& lhs, const Rhs& rhs, const typename Dest::Scalar& alpha) {
eigen_assert(dst.rows() == lhs.rows() && dst.cols() == rhs.cols());
internal::trmv_selector<Mode, (int(internal::traits<Lhs>::Flags) & RowMajorBit) ? RowMajor : ColMajor>::run(
lhs, rhs, dst, alpha);
}
};
template <int Mode, typename Lhs, typename Rhs>
struct triangular_product_impl<Mode, false, Lhs, true, Rhs, false> {
template <typename Dest>
static void run(Dest& dst, const Lhs& lhs, const Rhs& rhs, const typename Dest::Scalar& alpha) {
eigen_assert(dst.rows() == lhs.rows() && dst.cols() == rhs.cols());
Transpose<Dest> dstT(dst);
internal::trmv_selector<(Mode & (UnitDiag | ZeroDiag)) | ((Mode & Lower) ? Upper : Lower),
(int(internal::traits<Rhs>::Flags) & RowMajorBit) ? ColMajor
: RowMajor>::run(rhs.transpose(),
lhs.transpose(), dstT,
alpha);
}
};
} // end namespace internal
namespace internal {
// TODO: find a way to factorize this piece of code with gemv_selector since the logic is exactly the same.
template <int Mode>
struct trmv_selector<Mode, ColMajor> {
template <typename Lhs, typename Rhs, typename Dest>
static void run(const Lhs& lhs, const Rhs& rhs, Dest& dest, const typename Dest::Scalar& alpha) {
typedef typename Lhs::Scalar LhsScalar;
typedef typename Rhs::Scalar RhsScalar;
typedef typename Dest::Scalar ResScalar;
typedef internal::blas_traits<Lhs> LhsBlasTraits;
typedef typename LhsBlasTraits::DirectLinearAccessType ActualLhsType;
typedef internal::blas_traits<Rhs> RhsBlasTraits;
typedef typename RhsBlasTraits::DirectLinearAccessType ActualRhsType;
constexpr int Alignment = (std::min)(int(AlignedMax), int(internal::packet_traits<ResScalar>::size));
typedef Map<Matrix<ResScalar, Dynamic, 1>, Alignment> MappedDest;
add_const_on_value_type_t<ActualLhsType> actualLhs = LhsBlasTraits::extract(lhs);
add_const_on_value_type_t<ActualRhsType> actualRhs = RhsBlasTraits::extract(rhs);
LhsScalar lhs_alpha = LhsBlasTraits::extractScalarFactor(lhs);
RhsScalar rhs_alpha = RhsBlasTraits::extractScalarFactor(rhs);
ResScalar actualAlpha = alpha * lhs_alpha * rhs_alpha;
// FIXME find a way to allow an inner stride on the result if packet_traits<Scalar>::size==1
// on, the other hand it is good for the cache to pack the vector anyways...
constexpr bool EvalToDestAtCompileTime = Dest::InnerStrideAtCompileTime == 1;
constexpr bool ComplexByReal = (NumTraits<LhsScalar>::IsComplex) && (!NumTraits<RhsScalar>::IsComplex);
constexpr bool MightCannotUseDest = (Dest::InnerStrideAtCompileTime != 1) || ComplexByReal;
gemv_static_vector_if<ResScalar, Dest::SizeAtCompileTime, Dest::MaxSizeAtCompileTime, MightCannotUseDest>
static_dest;
bool alphaIsCompatible = (!ComplexByReal) || numext::is_exactly_zero(numext::imag(actualAlpha));
bool evalToDest = EvalToDestAtCompileTime && alphaIsCompatible;
RhsScalar compatibleAlpha = get_factor<ResScalar, RhsScalar>::run(actualAlpha);
ei_declare_aligned_stack_constructed_variable(ResScalar, actualDestPtr, dest.size(),
evalToDest ? dest.data() : static_dest.data());
if (!evalToDest) {
#ifdef EIGEN_DENSE_STORAGE_CTOR_PLUGIN
Index size = dest.size();
EIGEN_DENSE_STORAGE_CTOR_PLUGIN
#endif
if (!alphaIsCompatible) {
MappedDest(actualDestPtr, dest.size()).setZero();
compatibleAlpha = RhsScalar(1);
} else
MappedDest(actualDestPtr, dest.size()) = dest;
}
internal::triangular_matrix_vector_product<Index, Mode, LhsScalar, LhsBlasTraits::NeedToConjugate, RhsScalar,
RhsBlasTraits::NeedToConjugate, ColMajor>::run(actualLhs.rows(),
actualLhs.cols(),
actualLhs.data(),
actualLhs.outerStride(),
actualRhs.data(),
actualRhs.innerStride(),
actualDestPtr, 1,
compatibleAlpha);
if (!evalToDest) {
if (!alphaIsCompatible)
dest += actualAlpha * MappedDest(actualDestPtr, dest.size());
else
dest = MappedDest(actualDestPtr, dest.size());
}
if (((Mode & UnitDiag) == UnitDiag) && !numext::is_exactly_one(lhs_alpha)) {
Index diagSize = (std::min)(lhs.rows(), lhs.cols());
dest.head(diagSize) -= (lhs_alpha - LhsScalar(1)) * rhs.head(diagSize);
}
}
};
template <int Mode>
struct trmv_selector<Mode, RowMajor> {
template <typename Lhs, typename Rhs, typename Dest>
static void run(const Lhs& lhs, const Rhs& rhs, Dest& dest, const typename Dest::Scalar& alpha) {
typedef typename Lhs::Scalar LhsScalar;
typedef typename Rhs::Scalar RhsScalar;
typedef typename Dest::Scalar ResScalar;
typedef internal::blas_traits<Lhs> LhsBlasTraits;
typedef typename LhsBlasTraits::DirectLinearAccessType ActualLhsType;
typedef internal::blas_traits<Rhs> RhsBlasTraits;
typedef typename RhsBlasTraits::DirectLinearAccessType ActualRhsType;
typedef internal::remove_all_t<ActualRhsType> ActualRhsTypeCleaned;
std::add_const_t<ActualLhsType> actualLhs = LhsBlasTraits::extract(lhs);
std::add_const_t<ActualRhsType> actualRhs = RhsBlasTraits::extract(rhs);
LhsScalar lhs_alpha = LhsBlasTraits::extractScalarFactor(lhs);
RhsScalar rhs_alpha = RhsBlasTraits::extractScalarFactor(rhs);
ResScalar actualAlpha = alpha * lhs_alpha * rhs_alpha;
constexpr bool DirectlyUseRhs = ActualRhsTypeCleaned::InnerStrideAtCompileTime == 1;
const RhsScalar* actualRhsPtr = actualRhs.data();
// Potentially create a temporary buffer to copy RHS to contiguous memory.
gemv_static_vector_if<RhsScalar, ActualRhsTypeCleaned::SizeAtCompileTime,
ActualRhsTypeCleaned::MaxSizeAtCompileTime, !DirectlyUseRhs>
static_rhs; // Fixed-sized array.
RhsScalar* buffer = nullptr;
if (!DirectlyUseRhs) {
// Maybe used fixed-sized buffer, otherwise allocate.
if (static_rhs.data() != nullptr) {
buffer = static_rhs.data();
} else {
// Allocate either with alloca or malloc.
Eigen::internal::check_size_for_overflow<RhsScalar>(actualRhs.size());
#ifdef EIGEN_ALLOCA
buffer = static_cast<RhsScalar*>((sizeof(RhsScalar) * actualRhs.size() <= EIGEN_STACK_ALLOCATION_LIMIT)
? EIGEN_ALIGNED_ALLOCA(sizeof(RhsScalar) * actualRhs.size())
: Eigen::internal::aligned_malloc(sizeof(RhsScalar) * actualRhs.size()));
#else
buffer = static_cast<RhsScalar*>(Eigen::internal::aligned_malloc(sizeof(RhsScalar) * actualRhs.size()));
#endif
}
#ifdef EIGEN_DENSE_STORAGE_CTOR_PLUGIN
Index size = actualRhs.size();
EIGEN_DENSE_STORAGE_CTOR_PLUGIN
#endif
Map<typename ActualRhsTypeCleaned::PlainObject, Eigen::AlignedMax>(buffer, actualRhs.size()) = actualRhs;
actualRhsPtr = buffer;
}
// Deallocate only if malloced.
Eigen::internal::aligned_stack_memory_handler<RhsScalar> buffer_stack_memory_destructor(
buffer, actualRhs.size(),
!DirectlyUseRhs && static_rhs.data() == nullptr && actualRhs.size() > EIGEN_STACK_ALLOCATION_LIMIT);
internal::triangular_matrix_vector_product<Index, Mode, LhsScalar, LhsBlasTraits::NeedToConjugate, RhsScalar,
RhsBlasTraits::NeedToConjugate, RowMajor>::run(actualLhs.rows(),
actualLhs.cols(),
actualLhs.data(),
actualLhs.outerStride(),
actualRhsPtr, 1,
dest.data(),
dest.innerStride(),
actualAlpha);
if (((Mode & UnitDiag) == UnitDiag) && !numext::is_exactly_one(lhs_alpha)) {
Index diagSize = (std::min)(lhs.rows(), lhs.cols());
dest.head(diagSize) -= (lhs_alpha - LhsScalar(1)) * rhs.head(diagSize);
}
}
};
} // end namespace internal
} // end namespace Eigen
#endif // EIGEN_TRIANGULARMATRIXVECTOR_H