blob: 9db119b67f9ea7037336306d0f67bfa23e4462e2 [file] [log] [blame]
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
// Copyright (C) 2008-2015 Gael Guennebaud <gael.guennebaud@inria.fr>
//
// This Source Code Form is subject to the terms of the Mozilla
// Public License v. 2.0. If a copy of the MPL was not distributed
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
#ifndef EIGEN_CONSERVATIVESPARSESPARSEPRODUCT_H
#define EIGEN_CONSERVATIVESPARSESPARSEPRODUCT_H
namespace Eigen {
namespace internal {
template<typename Lhs, typename Rhs, typename ResultType>
static void conservative_sparse_sparse_product_impl(const Lhs& lhs, const Rhs& rhs, ResultType& res, bool sortedInsertion = false)
{
typedef typename remove_all<Lhs>::type::Scalar LhsScalar;
typedef typename remove_all<Rhs>::type::Scalar RhsScalar;
typedef typename remove_all<ResultType>::type::Scalar ResScalar;
// make sure to call innerSize/outerSize since we fake the storage order.
Index rows = lhs.innerSize();
Index cols = rhs.outerSize();
eigen_assert(lhs.outerSize() == rhs.innerSize());
ei_declare_aligned_stack_constructed_variable(bool, mask, rows, 0);
ei_declare_aligned_stack_constructed_variable(ResScalar, values, rows, 0);
ei_declare_aligned_stack_constructed_variable(Index, indices, rows, 0);
std::memset(mask,0,sizeof(bool)*rows);
evaluator<Lhs> lhsEval(lhs);
evaluator<Rhs> rhsEval(rhs);
// estimate the number of non zero entries
// given a rhs column containing Y non zeros, we assume that the respective Y columns
// of the lhs differs in average of one non zeros, thus the number of non zeros for
// the product of a rhs column with the lhs is X+Y where X is the average number of non zero
// per column of the lhs.
// Therefore, we have nnz(lhs*rhs) = nnz(lhs) + nnz(rhs)
Index estimated_nnz_prod = lhsEval.nonZerosEstimate() + rhsEval.nonZerosEstimate();
res.setZero();
res.reserve(Index(estimated_nnz_prod));
// we compute each column of the result, one after the other
for (Index j=0; j<cols; ++j)
{
res.startVec(j);
Index nnz = 0;
for (typename evaluator<Rhs>::InnerIterator rhsIt(rhsEval, j); rhsIt; ++rhsIt)
{
RhsScalar y = rhsIt.value();
Index k = rhsIt.index();
for (typename evaluator<Lhs>::InnerIterator lhsIt(lhsEval, k); lhsIt; ++lhsIt)
{
Index i = lhsIt.index();
LhsScalar x = lhsIt.value();
if(!mask[i])
{
mask[i] = true;
values[i] = x * y;
indices[nnz] = i;
++nnz;
}
else
values[i] += x * y;
}
}
if(!sortedInsertion)
{
// unordered insertion
for(Index k=0; k<nnz; ++k)
{
Index i = indices[k];
res.insertBackByOuterInnerUnordered(j,i) = values[i];
mask[i] = false;
}
}
else
{
// alternative ordered insertion code:
const Index t200 = rows/11; // 11 == (log2(200)*1.39)
const Index t = (rows*100)/139;
// FIXME reserve nnz non zeros
// FIXME implement faster sorting algorithms for very small nnz
// if the result is sparse enough => use a quick sort
// otherwise => loop through the entire vector
// In order to avoid to perform an expensive log2 when the
// result is clearly very sparse we use a linear bound up to 200.
if((nnz<200 && nnz<t200) || nnz * numext::log2(int(nnz)) < t)
{
if(nnz>1) std::sort(indices,indices+nnz);
for(Index k=0; k<nnz; ++k)
{
Index i = indices[k];
res.insertBackByOuterInner(j,i) = values[i];
mask[i] = false;
}
}
else
{
// dense path
for(Index i=0; i<rows; ++i)
{
if(mask[i])
{
mask[i] = false;
res.insertBackByOuterInner(j,i) = values[i];
}
}
}
}
}
res.finalize();
}
} // end namespace internal
namespace internal {
template<typename Lhs, typename Rhs, typename ResultType,
int LhsStorageOrder = (traits<Lhs>::Flags&RowMajorBit) ? RowMajor : ColMajor,
int RhsStorageOrder = (traits<Rhs>::Flags&RowMajorBit) ? RowMajor : ColMajor,
int ResStorageOrder = (traits<ResultType>::Flags&RowMajorBit) ? RowMajor : ColMajor>
struct conservative_sparse_sparse_product_selector;
template<typename Lhs, typename Rhs, typename ResultType>
struct conservative_sparse_sparse_product_selector<Lhs,Rhs,ResultType,ColMajor,ColMajor,ColMajor>
{
typedef typename remove_all<Lhs>::type LhsCleaned;
typedef typename LhsCleaned::Scalar Scalar;
static void run(const Lhs& lhs, const Rhs& rhs, ResultType& res)
{
typedef SparseMatrix<typename ResultType::Scalar,RowMajor,typename ResultType::StorageIndex> RowMajorMatrix;
typedef SparseMatrix<typename ResultType::Scalar,ColMajor,typename ResultType::StorageIndex> ColMajorMatrixAux;
typedef typename sparse_eval<ColMajorMatrixAux,ResultType::RowsAtCompileTime,ResultType::ColsAtCompileTime,ColMajorMatrixAux::Flags>::type ColMajorMatrix;
// If the result is tall and thin (in the extreme case a column vector)
// then it is faster to sort the coefficients inplace instead of transposing twice.
// FIXME, the following heuristic is probably not very good.
if(lhs.rows()>rhs.cols())
{
ColMajorMatrix resCol(lhs.rows(),rhs.cols());
// perform sorted insertion
internal::conservative_sparse_sparse_product_impl<Lhs,Rhs,ColMajorMatrix>(lhs, rhs, resCol, true);
res = resCol.markAsRValue();
}
else
{
ColMajorMatrixAux resCol(lhs.rows(),rhs.cols());
// ressort to transpose to sort the entries
internal::conservative_sparse_sparse_product_impl<Lhs,Rhs,ColMajorMatrixAux>(lhs, rhs, resCol, false);
RowMajorMatrix resRow(resCol);
res = resRow.markAsRValue();
}
}
};
template<typename Lhs, typename Rhs, typename ResultType>
struct conservative_sparse_sparse_product_selector<Lhs,Rhs,ResultType,RowMajor,ColMajor,ColMajor>
{
static void run(const Lhs& lhs, const Rhs& rhs, ResultType& res)
{
typedef SparseMatrix<typename Rhs::Scalar,RowMajor,typename ResultType::StorageIndex> RowMajorRhs;
typedef SparseMatrix<typename ResultType::Scalar,RowMajor,typename ResultType::StorageIndex> RowMajorRes;
RowMajorRhs rhsRow = rhs;
RowMajorRes resRow(lhs.rows(), rhs.cols());
internal::conservative_sparse_sparse_product_impl<RowMajorRhs,Lhs,RowMajorRes>(rhsRow, lhs, resRow);
res = resRow;
}
};
template<typename Lhs, typename Rhs, typename ResultType>
struct conservative_sparse_sparse_product_selector<Lhs,Rhs,ResultType,ColMajor,RowMajor,ColMajor>
{
static void run(const Lhs& lhs, const Rhs& rhs, ResultType& res)
{
typedef SparseMatrix<typename Lhs::Scalar,RowMajor,typename ResultType::StorageIndex> RowMajorLhs;
typedef SparseMatrix<typename ResultType::Scalar,RowMajor,typename ResultType::StorageIndex> RowMajorRes;
RowMajorLhs lhsRow = lhs;
RowMajorRes resRow(lhs.rows(), rhs.cols());
internal::conservative_sparse_sparse_product_impl<Rhs,RowMajorLhs,RowMajorRes>(rhs, lhsRow, resRow);
res = resRow;
}
};
template<typename Lhs, typename Rhs, typename ResultType>
struct conservative_sparse_sparse_product_selector<Lhs,Rhs,ResultType,RowMajor,RowMajor,ColMajor>
{
static void run(const Lhs& lhs, const Rhs& rhs, ResultType& res)
{
typedef SparseMatrix<typename ResultType::Scalar,RowMajor,typename ResultType::StorageIndex> RowMajorMatrix;
RowMajorMatrix resRow(lhs.rows(), rhs.cols());
internal::conservative_sparse_sparse_product_impl<Rhs,Lhs,RowMajorMatrix>(rhs, lhs, resRow);
res = resRow;
}
};
template<typename Lhs, typename Rhs, typename ResultType>
struct conservative_sparse_sparse_product_selector<Lhs,Rhs,ResultType,ColMajor,ColMajor,RowMajor>
{
typedef typename traits<typename remove_all<Lhs>::type>::Scalar Scalar;
static void run(const Lhs& lhs, const Rhs& rhs, ResultType& res)
{
typedef SparseMatrix<typename ResultType::Scalar,ColMajor,typename ResultType::StorageIndex> ColMajorMatrix;
ColMajorMatrix resCol(lhs.rows(), rhs.cols());
internal::conservative_sparse_sparse_product_impl<Lhs,Rhs,ColMajorMatrix>(lhs, rhs, resCol);
res = resCol;
}
};
template<typename Lhs, typename Rhs, typename ResultType>
struct conservative_sparse_sparse_product_selector<Lhs,Rhs,ResultType,RowMajor,ColMajor,RowMajor>
{
static void run(const Lhs& lhs, const Rhs& rhs, ResultType& res)
{
typedef SparseMatrix<typename Lhs::Scalar,ColMajor,typename ResultType::StorageIndex> ColMajorLhs;
typedef SparseMatrix<typename ResultType::Scalar,ColMajor,typename ResultType::StorageIndex> ColMajorRes;
ColMajorLhs lhsCol = lhs;
ColMajorRes resCol(lhs.rows(), rhs.cols());
internal::conservative_sparse_sparse_product_impl<ColMajorLhs,Rhs,ColMajorRes>(lhsCol, rhs, resCol);
res = resCol;
}
};
template<typename Lhs, typename Rhs, typename ResultType>
struct conservative_sparse_sparse_product_selector<Lhs,Rhs,ResultType,ColMajor,RowMajor,RowMajor>
{
static void run(const Lhs& lhs, const Rhs& rhs, ResultType& res)
{
typedef SparseMatrix<typename Rhs::Scalar,ColMajor,typename ResultType::StorageIndex> ColMajorRhs;
typedef SparseMatrix<typename ResultType::Scalar,ColMajor,typename ResultType::StorageIndex> ColMajorRes;
ColMajorRhs rhsCol = rhs;
ColMajorRes resCol(lhs.rows(), rhs.cols());
internal::conservative_sparse_sparse_product_impl<Lhs,ColMajorRhs,ColMajorRes>(lhs, rhsCol, resCol);
res = resCol;
}
};
template<typename Lhs, typename Rhs, typename ResultType>
struct conservative_sparse_sparse_product_selector<Lhs,Rhs,ResultType,RowMajor,RowMajor,RowMajor>
{
static void run(const Lhs& lhs, const Rhs& rhs, ResultType& res)
{
typedef SparseMatrix<typename ResultType::Scalar,RowMajor,typename ResultType::StorageIndex> RowMajorMatrix;
typedef SparseMatrix<typename ResultType::Scalar,ColMajor,typename ResultType::StorageIndex> ColMajorMatrix;
RowMajorMatrix resRow(lhs.rows(),rhs.cols());
internal::conservative_sparse_sparse_product_impl<Rhs,Lhs,RowMajorMatrix>(rhs, lhs, resRow);
// sort the non zeros:
ColMajorMatrix resCol(resRow);
res = resCol;
}
};
} // end namespace internal
namespace internal {
template<typename Lhs, typename Rhs, typename ResultType>
static void sparse_sparse_to_dense_product_impl(const Lhs& lhs, const Rhs& rhs, ResultType& res)
{
typedef typename remove_all<Lhs>::type::Scalar LhsScalar;
typedef typename remove_all<Rhs>::type::Scalar RhsScalar;
Index cols = rhs.outerSize();
eigen_assert(lhs.outerSize() == rhs.innerSize());
evaluator<Lhs> lhsEval(lhs);
evaluator<Rhs> rhsEval(rhs);
for (Index j=0; j<cols; ++j)
{
for (typename evaluator<Rhs>::InnerIterator rhsIt(rhsEval, j); rhsIt; ++rhsIt)
{
RhsScalar y = rhsIt.value();
Index k = rhsIt.index();
for (typename evaluator<Lhs>::InnerIterator lhsIt(lhsEval, k); lhsIt; ++lhsIt)
{
Index i = lhsIt.index();
LhsScalar x = lhsIt.value();
res.coeffRef(i,j) += x * y;
}
}
}
}
} // end namespace internal
namespace internal {
template<typename Lhs, typename Rhs, typename ResultType,
int LhsStorageOrder = (traits<Lhs>::Flags&RowMajorBit) ? RowMajor : ColMajor,
int RhsStorageOrder = (traits<Rhs>::Flags&RowMajorBit) ? RowMajor : ColMajor>
struct sparse_sparse_to_dense_product_selector;
template<typename Lhs, typename Rhs, typename ResultType>
struct sparse_sparse_to_dense_product_selector<Lhs,Rhs,ResultType,ColMajor,ColMajor>
{
static void run(const Lhs& lhs, const Rhs& rhs, ResultType& res)
{
internal::sparse_sparse_to_dense_product_impl<Lhs,Rhs,ResultType>(lhs, rhs, res);
}
};
template<typename Lhs, typename Rhs, typename ResultType>
struct sparse_sparse_to_dense_product_selector<Lhs,Rhs,ResultType,RowMajor,ColMajor>
{
static void run(const Lhs& lhs, const Rhs& rhs, ResultType& res)
{
typedef SparseMatrix<typename Lhs::Scalar,ColMajor,typename ResultType::StorageIndex> ColMajorLhs;
ColMajorLhs lhsCol(lhs);
internal::sparse_sparse_to_dense_product_impl<ColMajorLhs,Rhs,ResultType>(lhsCol, rhs, res);
}
};
template<typename Lhs, typename Rhs, typename ResultType>
struct sparse_sparse_to_dense_product_selector<Lhs,Rhs,ResultType,ColMajor,RowMajor>
{
static void run(const Lhs& lhs, const Rhs& rhs, ResultType& res)
{
typedef SparseMatrix<typename Rhs::Scalar,ColMajor,typename ResultType::StorageIndex> ColMajorRhs;
ColMajorRhs rhsCol(rhs);
internal::sparse_sparse_to_dense_product_impl<Lhs,ColMajorRhs,ResultType>(lhs, rhsCol, res);
}
};
template<typename Lhs, typename Rhs, typename ResultType>
struct sparse_sparse_to_dense_product_selector<Lhs,Rhs,ResultType,RowMajor,RowMajor>
{
static void run(const Lhs& lhs, const Rhs& rhs, ResultType& res)
{
Transpose<ResultType> trRes(res);
internal::sparse_sparse_to_dense_product_impl<Rhs,Lhs,Transpose<ResultType> >(rhs, lhs, trRes);
}
};
} // end namespace internal
} // end namespace Eigen
#endif // EIGEN_CONSERVATIVESPARSESPARSEPRODUCT_H