Eigen/src/SparseCore/SparseDenseProduct.h - eigen - Git at Google

 // This file is part of Eigen, a lightweight C++ template library
 // for linear algebra.
 //
 // Copyright (C) 2008-2015 Gael Guennebaud <gael.guennebaud@inria.fr>
 //
 // This Source Code Form is subject to the terms of the Mozilla
 // Public License v. 2.0. If a copy of the MPL was not distributed
 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.

 #ifndef EIGEN_SPARSEDENSEPRODUCT_H
 #define EIGEN_SPARSEDENSEPRODUCT_H

 namespace Eigen {

 namespace internal {

 template <> struct product_promote_storage_type<Sparse,Dense, OuterProduct> { typedef Sparse ret; };
 template <> struct product_promote_storage_type<Dense,Sparse, OuterProduct> { typedef Sparse ret; };

 template<typename SparseLhsType, typename DenseRhsType, typename DenseResType,
          typename AlphaType,
          int LhsStorageOrder = ((SparseLhsType::Flags&RowMajorBit)==RowMajorBit) ? RowMajor : ColMajor,
          bool ColPerCol = ((DenseRhsType::Flags&RowMajorBit)==0) || DenseRhsType::ColsAtCompileTime==1>
 struct sparse_time_dense_product_impl;

 template<typename SparseLhsType, typename DenseRhsType, typename DenseResType>
 struct sparse_time_dense_product_impl<SparseLhsType,DenseRhsType,DenseResType, typename DenseResType::Scalar, RowMajor, true>
 {
   typedef typename internal::remove_all<SparseLhsType>::type Lhs;
   typedef typename internal::remove_all<DenseRhsType>::type Rhs;
   typedef typename internal::remove_all<DenseResType>::type Res;
   typedef typename evaluator<Lhs>::InnerIterator LhsInnerIterator;
   typedef evaluator<Lhs> LhsEval;
   static void run(const SparseLhsType& lhs, const DenseRhsType& rhs, DenseResType& res, const typename Res::Scalar& alpha)
   {
     LhsEval lhsEval(lhs);

     Index n = lhs.outerSize();
 #ifdef EIGEN_HAS_OPENMP
     Eigen::initParallel();
     Index threads = Eigen::nbThreads();
 #endif

     for(Index c=0; c<rhs.cols(); ++c)
     {
 #ifdef EIGEN_HAS_OPENMP
       // This 20000 threshold has been found experimentally on 2D and 3D Poisson problems.
       // It basically represents the minimal amount of work to be done to be worth it.
       if(threads>1 && lhsEval.nonZerosEstimate() > 20000)
       {
         #pragma omp parallel for schedule(dynamic,(n+threads*4-1)/(threads*4)) num_threads(threads)
         for(Index i=0; i<n; ++i)
           processRow(lhsEval,rhs,res,alpha,i,c);
       }
       else
 #endif
       {
         for(Index i=0; i<n; ++i)
           processRow(lhsEval,rhs,res,alpha,i,c);
       }
     }
   }

   static void processRow(const LhsEval& lhsEval, const DenseRhsType& rhs, DenseResType& res, const typename Res::Scalar& alpha, Index i, Index col)
   {
     typename Res::Scalar tmp(0);
     for(LhsInnerIterator it(lhsEval,i); it ;++it)
       tmp += it.value() * rhs.coeff(it.index(),col);
     res.coeffRef(i,col) += alpha * tmp;
   }

 };

 // FIXME: what is the purpose of the following specialization? Is it for the BlockedSparse format?
 // -> let's disable it for now as it is conflicting with generic scalar*matrix and matrix*scalar operators
 // template<typename T1, typename T2/*, int _Options, typename _StrideType*/>
 // struct ScalarBinaryOpTraits<T1, Ref<T2/*, _Options, _StrideType*/> >
 // {
 //   enum {
 //     Defined = 1
 //   };
 //   typedef typename CwiseUnaryOp<scalar_multiple2_op<T1, typename T2::Scalar>, T2>::PlainObject ReturnType;
 // };

 template<typename SparseLhsType, typename DenseRhsType, typename DenseResType, typename AlphaType>
 struct sparse_time_dense_product_impl<SparseLhsType,DenseRhsType,DenseResType, AlphaType, ColMajor, true>
 {
   typedef typename internal::remove_all<SparseLhsType>::type Lhs;
   typedef typename internal::remove_all<DenseRhsType>::type Rhs;
   typedef typename internal::remove_all<DenseResType>::type Res;
   typedef evaluator<Lhs> LhsEval;
   typedef typename LhsEval::InnerIterator LhsInnerIterator;
   static void run(const SparseLhsType& lhs, const DenseRhsType& rhs, DenseResType& res, const AlphaType& alpha)
   {
     LhsEval lhsEval(lhs);
     for(Index c=0; c<rhs.cols(); ++c)
     {
       for(Index j=0; j<lhs.outerSize(); ++j)
       {
 //        typename Res::Scalar rhs_j = alpha * rhs.coeff(j,c);
         typename ScalarBinaryOpTraits<AlphaType, typename Rhs::Scalar>::ReturnType rhs_j(alpha * rhs.coeff(j,c));
         for(LhsInnerIterator it(lhsEval,j); it ;++it)
           res.coeffRef(it.index(),c) += it.value() * rhs_j;
       }
     }
   }
 };

 template<typename SparseLhsType, typename DenseRhsType, typename DenseResType>
 struct sparse_time_dense_product_impl<SparseLhsType,DenseRhsType,DenseResType, typename DenseResType::Scalar, RowMajor, false>
 {
   typedef typename internal::remove_all<SparseLhsType>::type Lhs;
   typedef typename internal::remove_all<DenseRhsType>::type Rhs;
   typedef typename internal::remove_all<DenseResType>::type Res;
   typedef evaluator<Lhs> LhsEval;
   typedef typename LhsEval::InnerIterator LhsInnerIterator;
   static void run(const SparseLhsType& lhs, const DenseRhsType& rhs, DenseResType& res, const typename Res::Scalar& alpha)
   {
     Index n = lhs.rows();
     LhsEval lhsEval(lhs);

 #ifdef EIGEN_HAS_OPENMP
     Eigen::initParallel();
     Index threads = Eigen::nbThreads();
     // This 20000 threshold has been found experimentally on 2D and 3D Poisson problems.
     // It basically represents the minimal amount of work to be done to be worth it.
     if(threads>1 && lhsEval.nonZerosEstimate()*rhs.cols() > 20000)
     {
       #pragma omp parallel for schedule(dynamic,(n+threads*4-1)/(threads*4)) num_threads(threads)
       for(Index i=0; i<n; ++i)
         processRow(lhsEval,rhs,res,alpha,i);
     }
     else
 #endif
     {
       for(Index i=0; i<n; ++i)
         processRow(lhsEval, rhs, res, alpha, i);
     }
   }

   static void processRow(const LhsEval& lhsEval, const DenseRhsType& rhs, Res& res, const typename Res::Scalar& alpha, Index i)
   {
     typename Res::RowXpr res_i(res.row(i));
     for(LhsInnerIterator it(lhsEval,i); it ;++it)
       res_i += (alpha*it.value()) * rhs.row(it.index());
   }
 };

 template<typename SparseLhsType, typename DenseRhsType, typename DenseResType>
 struct sparse_time_dense_product_impl<SparseLhsType,DenseRhsType,DenseResType, typename DenseResType::Scalar, ColMajor, false>
 {
   typedef typename internal::remove_all<SparseLhsType>::type Lhs;
   typedef typename internal::remove_all<DenseRhsType>::type Rhs;
   typedef typename internal::remove_all<DenseResType>::type Res;
   typedef typename evaluator<Lhs>::InnerIterator LhsInnerIterator;
   static void run(const SparseLhsType& lhs, const DenseRhsType& rhs, DenseResType& res, const typename Res::Scalar& alpha)
   {
     evaluator<Lhs> lhsEval(lhs);
     for(Index j=0; j<lhs.outerSize(); ++j)
     {
       typename Rhs::ConstRowXpr rhs_j(rhs.row(j));
       for(LhsInnerIterator it(lhsEval,j); it ;++it)
         res.row(it.index()) += (alpha*it.value()) * rhs_j;
     }
   }
 };

 template<typename SparseLhsType, typename DenseRhsType, typename DenseResType,typename AlphaType>
 inline void sparse_time_dense_product(const SparseLhsType& lhs, const DenseRhsType& rhs, DenseResType& res, const AlphaType& alpha)
 {
   sparse_time_dense_product_impl<SparseLhsType,DenseRhsType,DenseResType, AlphaType>::run(lhs, rhs, res, alpha);
 }

 } // end namespace internal

 namespace internal {

 template<typename Lhs, typename Rhs, int ProductType>
 struct generic_product_impl<Lhs, Rhs, SparseShape, DenseShape, ProductType>
  : generic_product_impl_base<Lhs,Rhs,generic_product_impl<Lhs,Rhs,SparseShape,DenseShape,ProductType> >
 {
   typedef typename Product<Lhs,Rhs>::Scalar Scalar;

   template<typename Dest>
   static void scaleAndAddTo(Dest& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha)
   {
     typedef typename nested_eval<Lhs,((Rhs::Flags&RowMajorBit)==0) ? 1 : Rhs::ColsAtCompileTime>::type LhsNested;
     typedef typename nested_eval<Rhs,((Lhs::Flags&RowMajorBit)==0) ? 1 : Dynamic>::type RhsNested;
     LhsNested lhsNested(lhs);
     RhsNested rhsNested(rhs);
     internal::sparse_time_dense_product(lhsNested, rhsNested, dst, alpha);
   }
 };

 template<typename Lhs, typename Rhs, int ProductType>
 struct generic_product_impl<Lhs, Rhs, SparseTriangularShape, DenseShape, ProductType>
   : generic_product_impl<Lhs, Rhs, SparseShape, DenseShape, ProductType>
 {};

 template<typename Lhs, typename Rhs, int ProductType>
 struct generic_product_impl<Lhs, Rhs, DenseShape, SparseShape, ProductType>
   : generic_product_impl_base<Lhs,Rhs,generic_product_impl<Lhs,Rhs,DenseShape,SparseShape,ProductType> >
 {
   typedef typename Product<Lhs,Rhs>::Scalar Scalar;

   template<typename Dst>
   static void scaleAndAddTo(Dst& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha)
   {
     typedef typename nested_eval<Lhs,((Rhs::Flags&RowMajorBit)==0) ? Dynamic : 1>::type LhsNested;
     typedef typename nested_eval<Rhs,((Lhs::Flags&RowMajorBit)==RowMajorBit) ? 1 : Lhs::RowsAtCompileTime>::type RhsNested;
     LhsNested lhsNested(lhs);
     RhsNested rhsNested(rhs);

     // transpose everything
     Transpose<Dst> dstT(dst);
     internal::sparse_time_dense_product(rhsNested.transpose(), lhsNested.transpose(), dstT, alpha);
   }
 };

 template<typename Lhs, typename Rhs, int ProductType>
 struct generic_product_impl<Lhs, Rhs, DenseShape, SparseTriangularShape, ProductType>
   : generic_product_impl<Lhs, Rhs, DenseShape, SparseShape, ProductType>
 {};

 template<typename LhsT, typename RhsT, bool NeedToTranspose>
 struct sparse_dense_outer_product_evaluator
 {
 protected:
   typedef typename conditional<NeedToTranspose,RhsT,LhsT>::type Lhs1;
   typedef typename conditional<NeedToTranspose,LhsT,RhsT>::type ActualRhs;
   typedef Product<LhsT,RhsT,DefaultProduct> ProdXprType;

   // if the actual left-hand side is a dense vector,
   // then build a sparse-view so that we can seamlessly iterate over it.
   typedef typename conditional<is_same<typename internal::traits<Lhs1>::StorageKind,Sparse>::value,
             Lhs1, SparseView<Lhs1> >::type ActualLhs;
   typedef typename conditional<is_same<typename internal::traits<Lhs1>::StorageKind,Sparse>::value,
             Lhs1 const&, SparseView<Lhs1> >::type LhsArg;

   typedef evaluator<ActualLhs> LhsEval;
   typedef evaluator<ActualRhs> RhsEval;
   typedef typename evaluator<ActualLhs>::InnerIterator LhsIterator;
   typedef typename ProdXprType::Scalar Scalar;

 public:
   enum {
     Flags = NeedToTranspose ? RowMajorBit : 0,
     CoeffReadCost = HugeCost
   };

   class InnerIterator : public LhsIterator
   {
   public:
     InnerIterator(const sparse_dense_outer_product_evaluator &xprEval, Index outer)
       : LhsIterator(xprEval.m_lhsXprImpl, 0),
         m_outer(outer),
         m_empty(false),
         m_factor(get(xprEval.m_rhsXprImpl, outer, typename internal::traits<ActualRhs>::StorageKind() ))
     {}

     EIGEN_STRONG_INLINE Index outer() const { return m_outer; }
     EIGEN_STRONG_INLINE Index row()   const { return NeedToTranspose ? m_outer : LhsIterator::index(); }
     EIGEN_STRONG_INLINE Index col()   const { return NeedToTranspose ? LhsIterator::index() : m_outer; }

     EIGEN_STRONG_INLINE Scalar value() const { return LhsIterator::value() * m_factor; }
     EIGEN_STRONG_INLINE operator bool() const { return LhsIterator::operator bool() && (!m_empty); }

   protected:
     Scalar get(const RhsEval &rhs, Index outer, Dense = Dense()) const
     {
       return rhs.coeff(outer);
     }

     Scalar get(const RhsEval &rhs, Index outer, Sparse = Sparse())
     {
       typename RhsEval::InnerIterator it(rhs, outer);
       if (it && it.index()==0 && it.value()!=Scalar(0))
         return it.value();
       m_empty = true;
       return Scalar(0);
     }

     Index m_outer;
     bool m_empty;
     Scalar m_factor;
   };

   sparse_dense_outer_product_evaluator(const Lhs1 &lhs, const ActualRhs &rhs)
      : m_lhs(lhs), m_lhsXprImpl(m_lhs), m_rhsXprImpl(rhs)
   {
     EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost);
   }

   // transpose case
   sparse_dense_outer_product_evaluator(const ActualRhs &rhs, const Lhs1 &lhs)
      : m_lhs(lhs), m_lhsXprImpl(m_lhs), m_rhsXprImpl(rhs)
   {
     EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost);
   }

 protected:
   const LhsArg m_lhs;
   evaluator<ActualLhs> m_lhsXprImpl;
   evaluator<ActualRhs> m_rhsXprImpl;
 };

 // sparse * dense outer product
 template<typename Lhs, typename Rhs>
 struct product_evaluator<Product<Lhs, Rhs, DefaultProduct>, OuterProduct, SparseShape, DenseShape>
   : sparse_dense_outer_product_evaluator<Lhs,Rhs, Lhs::IsRowMajor>
 {
   typedef sparse_dense_outer_product_evaluator<Lhs,Rhs, Lhs::IsRowMajor> Base;

   typedef Product<Lhs, Rhs> XprType;
   typedef typename XprType::PlainObject PlainObject;

   explicit product_evaluator(const XprType& xpr)
     : Base(xpr.lhs(), xpr.rhs())
   {}

 };

 template<typename Lhs, typename Rhs>
 struct product_evaluator<Product<Lhs, Rhs, DefaultProduct>, OuterProduct, DenseShape, SparseShape>
   : sparse_dense_outer_product_evaluator<Lhs,Rhs, Rhs::IsRowMajor>
 {
   typedef sparse_dense_outer_product_evaluator<Lhs,Rhs, Rhs::IsRowMajor> Base;

   typedef Product<Lhs, Rhs> XprType;
   typedef typename XprType::PlainObject PlainObject;

   explicit product_evaluator(const XprType& xpr)
     : Base(xpr.lhs(), xpr.rhs())
   {}

 };

 } // end namespace internal

 } // end namespace Eigen

 #endif // EIGEN_SPARSEDENSEPRODUCT_H
	// This file is part of Eigen, a lightweight C++ template library
	// for linear algebra.
	//
	// Copyright (C) 2008-2015 Gael Guennebaud <gael.guennebaud@inria.fr>
	//
	// This Source Code Form is subject to the terms of the Mozilla
	// Public License v. 2.0. If a copy of the MPL was not distributed
	// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.

	#ifndef EIGEN_SPARSEDENSEPRODUCT_H
	#define EIGEN_SPARSEDENSEPRODUCT_H

	namespace Eigen {

	namespace internal {

	template <> struct product_promote_storage_type<Sparse,Dense, OuterProduct> { typedef Sparse ret; };
	template <> struct product_promote_storage_type<Dense,Sparse, OuterProduct> { typedef Sparse ret; };

	template<typename SparseLhsType, typename DenseRhsType, typename DenseResType,
	typename AlphaType,
	int LhsStorageOrder = ((SparseLhsType::Flags&RowMajorBit)==RowMajorBit) ? RowMajor : ColMajor,
	bool ColPerCol = ((DenseRhsType::Flags&RowMajorBit)==0) \|\| DenseRhsType::ColsAtCompileTime==1>
	struct sparse_time_dense_product_impl;

	template<typename SparseLhsType, typename DenseRhsType, typename DenseResType>
	struct sparse_time_dense_product_impl<SparseLhsType,DenseRhsType,DenseResType, typename DenseResType::Scalar, RowMajor, true>
	{
	typedef typename internal::remove_all<SparseLhsType>::type Lhs;
	typedef typename internal::remove_all<DenseRhsType>::type Rhs;
	typedef typename internal::remove_all<DenseResType>::type Res;
	typedef typename evaluator<Lhs>::InnerIterator LhsInnerIterator;
	typedef evaluator<Lhs> LhsEval;
	static void run(const SparseLhsType& lhs, const DenseRhsType& rhs, DenseResType& res, const typename Res::Scalar& alpha)
	{
	LhsEval lhsEval(lhs);

	Index n = lhs.outerSize();
	#ifdef EIGEN_HAS_OPENMP
	Eigen::initParallel();
	Index threads = Eigen::nbThreads();
	#endif

	for(Index c=0; c<rhs.cols(); ++c)
	{
	#ifdef EIGEN_HAS_OPENMP
	// This 20000 threshold has been found experimentally on 2D and 3D Poisson problems.
	// It basically represents the minimal amount of work to be done to be worth it.
	if(threads>1 && lhsEval.nonZerosEstimate() > 20000)
	{
	#pragma omp parallel for schedule(dynamic,(n+threads4-1)/(threads4)) num_threads(threads)
	for(Index i=0; i<n; ++i)
	processRow(lhsEval,rhs,res,alpha,i,c);
	}
	else
	#endif
	{
	for(Index i=0; i<n; ++i)
	processRow(lhsEval,rhs,res,alpha,i,c);
	}
	}
	}

	static void processRow(const LhsEval& lhsEval, const DenseRhsType& rhs, DenseResType& res, const typename Res::Scalar& alpha, Index i, Index col)
	{
	typename Res::Scalar tmp(0);
	for(LhsInnerIterator it(lhsEval,i); it ;++it)
	tmp += it.value() * rhs.coeff(it.index(),col);
	res.coeffRef(i,col) += alpha * tmp;
	}

	};

	// FIXME: what is the purpose of the following specialization? Is it for the BlockedSparse format?
	// -> let's disable it for now as it is conflicting with generic scalarmatrix and matrixscalar operators
	// template<typename T1, typename T2/, int _Options, typename _StrideType/>
	// struct ScalarBinaryOpTraits<T1, Ref<T2/, _Options, _StrideType/> >
	// {
	// enum {
	// Defined = 1
	// };
	// typedef typename CwiseUnaryOp<scalar_multiple2_op<T1, typename T2::Scalar>, T2>::PlainObject ReturnType;
	// };

	template<typename SparseLhsType, typename DenseRhsType, typename DenseResType, typename AlphaType>
	struct sparse_time_dense_product_impl<SparseLhsType,DenseRhsType,DenseResType, AlphaType, ColMajor, true>
	{
	typedef typename internal::remove_all<SparseLhsType>::type Lhs;
	typedef typename internal::remove_all<DenseRhsType>::type Rhs;
	typedef typename internal::remove_all<DenseResType>::type Res;
	typedef evaluator<Lhs> LhsEval;
	typedef typename LhsEval::InnerIterator LhsInnerIterator;
	static void run(const SparseLhsType& lhs, const DenseRhsType& rhs, DenseResType& res, const AlphaType& alpha)
	{
	LhsEval lhsEval(lhs);
	for(Index c=0; c<rhs.cols(); ++c)
	{
	for(Index j=0; j<lhs.outerSize(); ++j)
	{
	// typename Res::Scalar rhs_j = alpha * rhs.coeff(j,c);
	typename ScalarBinaryOpTraits<AlphaType, typename Rhs::Scalar>::ReturnType rhs_j(alpha * rhs.coeff(j,c));
	for(LhsInnerIterator it(lhsEval,j); it ;++it)
	res.coeffRef(it.index(),c) += it.value() * rhs_j;
	}
	}
	}
	};

	template<typename SparseLhsType, typename DenseRhsType, typename DenseResType>
	struct sparse_time_dense_product_impl<SparseLhsType,DenseRhsType,DenseResType, typename DenseResType::Scalar, RowMajor, false>
	{
	typedef typename internal::remove_all<SparseLhsType>::type Lhs;
	typedef typename internal::remove_all<DenseRhsType>::type Rhs;
	typedef typename internal::remove_all<DenseResType>::type Res;
	typedef evaluator<Lhs> LhsEval;
	typedef typename LhsEval::InnerIterator LhsInnerIterator;
	static void run(const SparseLhsType& lhs, const DenseRhsType& rhs, DenseResType& res, const typename Res::Scalar& alpha)
	{
	Index n = lhs.rows();
	LhsEval lhsEval(lhs);

	#ifdef EIGEN_HAS_OPENMP
	Eigen::initParallel();
	Index threads = Eigen::nbThreads();
	// This 20000 threshold has been found experimentally on 2D and 3D Poisson problems.
	// It basically represents the minimal amount of work to be done to be worth it.
	if(threads>1 && lhsEval.nonZerosEstimate()*rhs.cols() > 20000)
	{
	#pragma omp parallel for schedule(dynamic,(n+threads4-1)/(threads4)) num_threads(threads)
	for(Index i=0; i<n; ++i)
	processRow(lhsEval,rhs,res,alpha,i);
	}
	else
	#endif
	{
	for(Index i=0; i<n; ++i)
	processRow(lhsEval, rhs, res, alpha, i);
	}
	}

	static void processRow(const LhsEval& lhsEval, const DenseRhsType& rhs, Res& res, const typename Res::Scalar& alpha, Index i)
	{
	typename Res::RowXpr res_i(res.row(i));
	for(LhsInnerIterator it(lhsEval,i); it ;++it)
	res_i += (alphait.value()) rhs.row(it.index());
	}
	};

	template<typename SparseLhsType, typename DenseRhsType, typename DenseResType>
	struct sparse_time_dense_product_impl<SparseLhsType,DenseRhsType,DenseResType, typename DenseResType::Scalar, ColMajor, false>
	{
	typedef typename internal::remove_all<SparseLhsType>::type Lhs;
	typedef typename internal::remove_all<DenseRhsType>::type Rhs;
	typedef typename internal::remove_all<DenseResType>::type Res;
	typedef typename evaluator<Lhs>::InnerIterator LhsInnerIterator;
	static void run(const SparseLhsType& lhs, const DenseRhsType& rhs, DenseResType& res, const typename Res::Scalar& alpha)
	{
	evaluator<Lhs> lhsEval(lhs);
	for(Index j=0; j<lhs.outerSize(); ++j)
	{
	typename Rhs::ConstRowXpr rhs_j(rhs.row(j));
	for(LhsInnerIterator it(lhsEval,j); it ;++it)
	res.row(it.index()) += (alphait.value()) rhs_j;
	}
	}
	};

	template<typename SparseLhsType, typename DenseRhsType, typename DenseResType,typename AlphaType>
	inline void sparse_time_dense_product(const SparseLhsType& lhs, const DenseRhsType& rhs, DenseResType& res, const AlphaType& alpha)
	{
	sparse_time_dense_product_impl<SparseLhsType,DenseRhsType,DenseResType, AlphaType>::run(lhs, rhs, res, alpha);
	}

	} // end namespace internal

	namespace internal {

	template<typename Lhs, typename Rhs, int ProductType>
	struct generic_product_impl<Lhs, Rhs, SparseShape, DenseShape, ProductType>
	: generic_product_impl_base<Lhs,Rhs,generic_product_impl<Lhs,Rhs,SparseShape,DenseShape,ProductType> >
	{
	typedef typename Product<Lhs,Rhs>::Scalar Scalar;

	template<typename Dest>
	static void scaleAndAddTo(Dest& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha)
	{
	typedef typename nested_eval<Lhs,((Rhs::Flags&RowMajorBit)==0) ? 1 : Rhs::ColsAtCompileTime>::type LhsNested;
	typedef typename nested_eval<Rhs,((Lhs::Flags&RowMajorBit)==0) ? 1 : Dynamic>::type RhsNested;
	LhsNested lhsNested(lhs);
	RhsNested rhsNested(rhs);
	internal::sparse_time_dense_product(lhsNested, rhsNested, dst, alpha);
	}
	};

	template<typename Lhs, typename Rhs, int ProductType>
	struct generic_product_impl<Lhs, Rhs, SparseTriangularShape, DenseShape, ProductType>
	: generic_product_impl<Lhs, Rhs, SparseShape, DenseShape, ProductType>
	{};

	template<typename Lhs, typename Rhs, int ProductType>
	struct generic_product_impl<Lhs, Rhs, DenseShape, SparseShape, ProductType>
	: generic_product_impl_base<Lhs,Rhs,generic_product_impl<Lhs,Rhs,DenseShape,SparseShape,ProductType> >
	{
	typedef typename Product<Lhs,Rhs>::Scalar Scalar;

	template<typename Dst>
	static void scaleAndAddTo(Dst& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha)
	{
	typedef typename nested_eval<Lhs,((Rhs::Flags&RowMajorBit)==0) ? Dynamic : 1>::type LhsNested;
	typedef typename nested_eval<Rhs,((Lhs::Flags&RowMajorBit)==RowMajorBit) ? 1 : Lhs::RowsAtCompileTime>::type RhsNested;
	LhsNested lhsNested(lhs);
	RhsNested rhsNested(rhs);

	// transpose everything
	Transpose<Dst> dstT(dst);
	internal::sparse_time_dense_product(rhsNested.transpose(), lhsNested.transpose(), dstT, alpha);
	}
	};

	template<typename Lhs, typename Rhs, int ProductType>
	struct generic_product_impl<Lhs, Rhs, DenseShape, SparseTriangularShape, ProductType>
	: generic_product_impl<Lhs, Rhs, DenseShape, SparseShape, ProductType>
	{};

	template<typename LhsT, typename RhsT, bool NeedToTranspose>
	struct sparse_dense_outer_product_evaluator
	{
	protected:
	typedef typename conditional<NeedToTranspose,RhsT,LhsT>::type Lhs1;
	typedef typename conditional<NeedToTranspose,LhsT,RhsT>::type ActualRhs;
	typedef Product<LhsT,RhsT,DefaultProduct> ProdXprType;

	// if the actual left-hand side is a dense vector,
	// then build a sparse-view so that we can seamlessly iterate over it.
	typedef typename conditional<is_same<typename internal::traits<Lhs1>::StorageKind,Sparse>::value,
	Lhs1, SparseView<Lhs1> >::type ActualLhs;
	typedef typename conditional<is_same<typename internal::traits<Lhs1>::StorageKind,Sparse>::value,
	Lhs1 const&, SparseView<Lhs1> >::type LhsArg;

	typedef evaluator<ActualLhs> LhsEval;
	typedef evaluator<ActualRhs> RhsEval;
	typedef typename evaluator<ActualLhs>::InnerIterator LhsIterator;
	typedef typename ProdXprType::Scalar Scalar;

	public:
	enum {
	Flags = NeedToTranspose ? RowMajorBit : 0,
	CoeffReadCost = HugeCost
	};

	class InnerIterator : public LhsIterator
	{
	public:
	InnerIterator(const sparse_dense_outer_product_evaluator &xprEval, Index outer)
	: LhsIterator(xprEval.m_lhsXprImpl, 0),
	m_outer(outer),
	m_empty(false),
	m_factor(get(xprEval.m_rhsXprImpl, outer, typename internal::traits<ActualRhs>::StorageKind() ))
	{}

	EIGEN_STRONG_INLINE Index outer() const { return m_outer; }
	EIGEN_STRONG_INLINE Index row() const { return NeedToTranspose ? m_outer : LhsIterator::index(); }
	EIGEN_STRONG_INLINE Index col() const { return NeedToTranspose ? LhsIterator::index() : m_outer; }

	EIGEN_STRONG_INLINE Scalar value() const { return LhsIterator::value() * m_factor; }
	EIGEN_STRONG_INLINE operator bool() const { return LhsIterator::operator bool() && (!m_empty); }

	protected:
	Scalar get(const RhsEval &rhs, Index outer, Dense = Dense()) const
	{
	return rhs.coeff(outer);
	}

	Scalar get(const RhsEval &rhs, Index outer, Sparse = Sparse())
	{
	typename RhsEval::InnerIterator it(rhs, outer);
	if (it && it.index()==0 && it.value()!=Scalar(0))
	return it.value();
	m_empty = true;
	return Scalar(0);
	}

	Index m_outer;
	bool m_empty;
	Scalar m_factor;
	};

	sparse_dense_outer_product_evaluator(const Lhs1 &lhs, const ActualRhs &rhs)
	: m_lhs(lhs), m_lhsXprImpl(m_lhs), m_rhsXprImpl(rhs)
	{
	EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost);
	}

	// transpose case
	sparse_dense_outer_product_evaluator(const ActualRhs &rhs, const Lhs1 &lhs)
	: m_lhs(lhs), m_lhsXprImpl(m_lhs), m_rhsXprImpl(rhs)
	{
	EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost);
	}

	protected:
	const LhsArg m_lhs;
	evaluator<ActualLhs> m_lhsXprImpl;
	evaluator<ActualRhs> m_rhsXprImpl;
	};

	// sparse * dense outer product
	template<typename Lhs, typename Rhs>
	struct product_evaluator<Product<Lhs, Rhs, DefaultProduct>, OuterProduct, SparseShape, DenseShape>
	: sparse_dense_outer_product_evaluator<Lhs,Rhs, Lhs::IsRowMajor>
	{
	typedef sparse_dense_outer_product_evaluator<Lhs,Rhs, Lhs::IsRowMajor> Base;

	typedef Product<Lhs, Rhs> XprType;
	typedef typename XprType::PlainObject PlainObject;

	explicit product_evaluator(const XprType& xpr)
	: Base(xpr.lhs(), xpr.rhs())
	{}

	};

	template<typename Lhs, typename Rhs>
	struct product_evaluator<Product<Lhs, Rhs, DefaultProduct>, OuterProduct, DenseShape, SparseShape>
	: sparse_dense_outer_product_evaluator<Lhs,Rhs, Rhs::IsRowMajor>
	{
	typedef sparse_dense_outer_product_evaluator<Lhs,Rhs, Rhs::IsRowMajor> Base;

	typedef Product<Lhs, Rhs> XprType;
	typedef typename XprType::PlainObject PlainObject;

	explicit product_evaluator(const XprType& xpr)
	: Base(xpr.lhs(), xpr.rhs())
	{}

	};

	} // end namespace internal

	} // end namespace Eigen

	#endif // EIGEN_SPARSEDENSEPRODUCT_H