blob: 0c5f2d9f6b6859bb8cfa4c23e8ee96f3fb957df9 [file] [log] [blame]
Googler45874d82019-08-21 12:06:47 -07001// This file is part of Eigen, a lightweight C++ template library
2// for linear algebra.
3//
4// Copyright (C) 2008 Gael Guennebaud <gael.guennebaud@inria.fr>
5// Copyright (C) 2006-2008 Benoit Jacob <jacob.benoit.1@gmail.com>
6//
7// This Source Code Form is subject to the terms of the Mozilla
8// Public License v. 2.0. If a copy of the MPL was not distributed
9// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
10
11#ifndef EIGEN_REDUX_H
12#define EIGEN_REDUX_H
13
Rasmus Munk Larsen66c640f2023-08-22 14:16:58 -070014// IWYU pragma: private
C. Antonio Sanchezdcfd7702021-10-26 11:41:24 -070015#include "./InternalHeaderCheck.h"
16
Rasmus Munk Larsen2434cfd2023-12-06 12:02:41 -080017namespace Eigen {
Googler45874d82019-08-21 12:06:47 -070018
19namespace internal {
20
21// TODO
22// * implement other kind of vectorization
23// * factorize code
24
25/***************************************************************************
Rasmus Munk Larsen2434cfd2023-12-06 12:02:41 -080026 * Part 1 : the logic deciding a strategy for vectorization and unrolling
27 ***************************************************************************/
Googler45874d82019-08-21 12:06:47 -070028
Rasmus Munk Larsen2434cfd2023-12-06 12:02:41 -080029template <typename Func, typename Evaluator>
30struct redux_traits {
31 public:
32 typedef typename find_best_packet<typename Evaluator::Scalar, Evaluator::SizeAtCompileTime>::type PacketType;
Googler45874d82019-08-21 12:06:47 -070033 enum {
34 PacketSize = unpacket_traits<PacketType>::size,
Rasmus Munk Larsen2434cfd2023-12-06 12:02:41 -080035 InnerMaxSize = int(Evaluator::IsRowMajor) ? Evaluator::MaxColsAtCompileTime : Evaluator::MaxRowsAtCompileTime,
36 OuterMaxSize = int(Evaluator::IsRowMajor) ? Evaluator::MaxRowsAtCompileTime : Evaluator::MaxColsAtCompileTime,
37 SliceVectorizedWork = int(InnerMaxSize) == Dynamic ? Dynamic
38 : int(OuterMaxSize) == Dynamic ? (int(InnerMaxSize) >= int(PacketSize) ? Dynamic : 0)
39 : (int(InnerMaxSize) / int(PacketSize)) * int(OuterMaxSize)
Googler45874d82019-08-21 12:06:47 -070040 };
41
42 enum {
Rasmus Munk Larsen923c18b2023-05-24 18:46:53 -070043 MayLinearize = (int(Evaluator::Flags) & LinearAccessBit),
Rasmus Munk Larsen2434cfd2023-12-06 12:02:41 -080044 MightVectorize = (int(Evaluator::Flags) & ActualPacketAccessBit) && (functor_traits<Func>::PacketAccess),
Rasmus Munk Larsen923c18b2023-05-24 18:46:53 -070045 MayLinearVectorize = bool(MightVectorize) && bool(MayLinearize),
Rasmus Munk Larsen2434cfd2023-12-06 12:02:41 -080046 MaySliceVectorize = bool(MightVectorize) && (int(SliceVectorizedWork) == Dynamic || int(SliceVectorizedWork) >= 3)
Googler45874d82019-08-21 12:06:47 -070047 };
48
Rasmus Munk Larsen2434cfd2023-12-06 12:02:41 -080049 public:
Googler45874d82019-08-21 12:06:47 -070050 enum {
Rasmus Munk Larsen2434cfd2023-12-06 12:02:41 -080051 Traversal = int(MayLinearVectorize) ? int(LinearVectorizedTraversal)
52 : int(MaySliceVectorize) ? int(SliceVectorizedTraversal)
53 : int(MayLinearize) ? int(LinearTraversal)
54 : int(DefaultTraversal)
Googler45874d82019-08-21 12:06:47 -070055 };
56
Rasmus Munk Larsen2434cfd2023-12-06 12:02:41 -080057 public:
Googler45874d82019-08-21 12:06:47 -070058 enum {
Rasmus Munk Larsen2434cfd2023-12-06 12:02:41 -080059 Cost = Evaluator::SizeAtCompileTime == Dynamic
60 ? HugeCost
61 : int(Evaluator::SizeAtCompileTime) * int(Evaluator::CoeffReadCost) +
62 (Evaluator::SizeAtCompileTime - 1) * functor_traits<Func>::Cost,
Googler45874d82019-08-21 12:06:47 -070063 UnrollingLimit = EIGEN_UNROLLING_LIMIT * (int(Traversal) == int(DefaultTraversal) ? 1 : int(PacketSize))
64 };
65
Rasmus Munk Larsen2434cfd2023-12-06 12:02:41 -080066 public:
67 enum { Unrolling = Cost <= UnrollingLimit ? CompleteUnrolling : NoUnrolling };
68
Googler45874d82019-08-21 12:06:47 -070069#ifdef EIGEN_DEBUG_ASSIGN
Rasmus Munk Larsen2434cfd2023-12-06 12:02:41 -080070 static void debug() {
Googler45874d82019-08-21 12:06:47 -070071 std::cerr << "Xpr: " << typeid(typename Evaluator::XprType).name() << std::endl;
72 std::cerr.setf(std::ios::hex, std::ios::basefield);
73 EIGEN_DEBUG_VAR(Evaluator::Flags)
74 std::cerr.unsetf(std::ios::hex);
75 EIGEN_DEBUG_VAR(InnerMaxSize)
76 EIGEN_DEBUG_VAR(OuterMaxSize)
77 EIGEN_DEBUG_VAR(SliceVectorizedWork)
78 EIGEN_DEBUG_VAR(PacketSize)
79 EIGEN_DEBUG_VAR(MightVectorize)
80 EIGEN_DEBUG_VAR(MayLinearVectorize)
81 EIGEN_DEBUG_VAR(MaySliceVectorize)
Rasmus Munk Larsen2434cfd2023-12-06 12:02:41 -080082 std::cerr << "Traversal"
83 << " = " << Traversal << " (" << demangle_traversal(Traversal) << ")" << std::endl;
Googler45874d82019-08-21 12:06:47 -070084 EIGEN_DEBUG_VAR(UnrollingLimit)
Rasmus Munk Larsen2434cfd2023-12-06 12:02:41 -080085 std::cerr << "Unrolling"
86 << " = " << Unrolling << " (" << demangle_unrolling(Unrolling) << ")" << std::endl;
Googler45874d82019-08-21 12:06:47 -070087 std::cerr << std::endl;
88 }
89#endif
90};
91
92/***************************************************************************
Rasmus Munk Larsen2434cfd2023-12-06 12:02:41 -080093 * Part 2 : unrollers
94 ***************************************************************************/
Googler45874d82019-08-21 12:06:47 -070095
96/*** no vectorization ***/
97
Rasmus Munk Larsen2434cfd2023-12-06 12:02:41 -080098template <typename Func, typename Evaluator, Index Start, Index Length>
99struct redux_novec_unroller {
100 static constexpr Index HalfLength = Length / 2;
Googler45874d82019-08-21 12:06:47 -0700101
102 typedef typename Evaluator::Scalar Scalar;
103
Rasmus Munk Larsen2434cfd2023-12-06 12:02:41 -0800104 EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Scalar run(const Evaluator& eval, const Func& func) {
105 return func(redux_novec_unroller<Func, Evaluator, Start, HalfLength>::run(eval, func),
106 redux_novec_unroller<Func, Evaluator, Start + HalfLength, Length - HalfLength>::run(eval, func));
Googler45874d82019-08-21 12:06:47 -0700107 }
108};
109
Rasmus Munk Larsen2434cfd2023-12-06 12:02:41 -0800110template <typename Func, typename Evaluator, Index Start>
111struct redux_novec_unroller<Func, Evaluator, Start, 1> {
Rasmus Munk Larsen923c18b2023-05-24 18:46:53 -0700112 static constexpr Index outer = Start / Evaluator::InnerSizeAtCompileTime;
113 static constexpr Index inner = Start % Evaluator::InnerSizeAtCompileTime;
Googler45874d82019-08-21 12:06:47 -0700114
115 typedef typename Evaluator::Scalar Scalar;
116
Rasmus Munk Larsen2434cfd2023-12-06 12:02:41 -0800117 EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Scalar run(const Evaluator& eval, const Func&) {
Googler45874d82019-08-21 12:06:47 -0700118 return eval.coeffByOuterInner(outer, inner);
119 }
120};
121
122// This is actually dead code and will never be called. It is required
123// to prevent false warnings regarding failed inlining though
124// for 0 length run() will never be called at all.
Rasmus Munk Larsen2434cfd2023-12-06 12:02:41 -0800125template <typename Func, typename Evaluator, Index Start>
126struct redux_novec_unroller<Func, Evaluator, Start, 0> {
Googler45874d82019-08-21 12:06:47 -0700127 typedef typename Evaluator::Scalar Scalar;
Rasmus Munk Larsen2434cfd2023-12-06 12:02:41 -0800128 EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Scalar run(const Evaluator&, const Func&) { return Scalar(); }
Googler45874d82019-08-21 12:06:47 -0700129};
130
Rasmus Munk Larsen2434cfd2023-12-06 12:02:41 -0800131template <typename Func, typename Evaluator, Index Start, Index Length>
132struct redux_novec_linear_unroller {
133 static constexpr Index HalfLength = Length / 2;
Rasmus Munk Larsen923c18b2023-05-24 18:46:53 -0700134
135 typedef typename Evaluator::Scalar Scalar;
136
Rasmus Munk Larsen2434cfd2023-12-06 12:02:41 -0800137 EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Scalar run(const Evaluator& eval, const Func& func) {
138 return func(redux_novec_linear_unroller<Func, Evaluator, Start, HalfLength>::run(eval, func),
139 redux_novec_linear_unroller<Func, Evaluator, Start + HalfLength, Length - HalfLength>::run(eval, func));
Rasmus Munk Larsen923c18b2023-05-24 18:46:53 -0700140 }
141};
142
Rasmus Munk Larsen2434cfd2023-12-06 12:02:41 -0800143template <typename Func, typename Evaluator, Index Start>
144struct redux_novec_linear_unroller<Func, Evaluator, Start, 1> {
Rasmus Munk Larsen923c18b2023-05-24 18:46:53 -0700145 typedef typename Evaluator::Scalar Scalar;
146
Rasmus Munk Larsen2434cfd2023-12-06 12:02:41 -0800147 EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Scalar run(const Evaluator& eval, const Func&) {
Rasmus Munk Larsen923c18b2023-05-24 18:46:53 -0700148 return eval.coeff(Start);
149 }
150};
151
152// This is actually dead code and will never be called. It is required
153// to prevent false warnings regarding failed inlining though
154// for 0 length run() will never be called at all.
Rasmus Munk Larsen2434cfd2023-12-06 12:02:41 -0800155template <typename Func, typename Evaluator, Index Start>
156struct redux_novec_linear_unroller<Func, Evaluator, Start, 0> {
Rasmus Munk Larsen923c18b2023-05-24 18:46:53 -0700157 typedef typename Evaluator::Scalar Scalar;
Rasmus Munk Larsen2434cfd2023-12-06 12:02:41 -0800158 EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Scalar run(const Evaluator&, const Func&) { return Scalar(); }
Rasmus Munk Larsen923c18b2023-05-24 18:46:53 -0700159};
160
Googler45874d82019-08-21 12:06:47 -0700161/*** vectorization ***/
162
Rasmus Munk Larsen2434cfd2023-12-06 12:02:41 -0800163template <typename Func, typename Evaluator, Index Start, Index Length>
164struct redux_vec_unroller {
165 template <typename PacketType>
166 EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE PacketType run(const Evaluator& eval, const Func& func) {
167 constexpr Index HalfLength = Length / 2;
Googler45874d82019-08-21 12:06:47 -0700168
169 return func.packetOp(
Rasmus Munk Larsen2434cfd2023-12-06 12:02:41 -0800170 redux_vec_unroller<Func, Evaluator, Start, HalfLength>::template run<PacketType>(eval, func),
171 redux_vec_unroller<Func, Evaluator, Start + HalfLength, Length - HalfLength>::template run<PacketType>(eval,
172 func));
Googler45874d82019-08-21 12:06:47 -0700173 }
174};
175
Rasmus Munk Larsen2434cfd2023-12-06 12:02:41 -0800176template <typename Func, typename Evaluator, Index Start>
177struct redux_vec_unroller<Func, Evaluator, Start, 1> {
178 template <typename PacketType>
179 EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE PacketType run(const Evaluator& eval, const Func&) {
Rasmus Munk Larsen923c18b2023-05-24 18:46:53 -0700180 constexpr Index PacketSize = unpacket_traits<PacketType>::size;
181 constexpr Index index = Start * PacketSize;
182 constexpr Index outer = index / int(Evaluator::InnerSizeAtCompileTime);
183 constexpr Index inner = index % int(Evaluator::InnerSizeAtCompileTime);
184 constexpr int alignment = Evaluator::Alignment;
185
Rasmus Munk Larsen2434cfd2023-12-06 12:02:41 -0800186 return eval.template packetByOuterInner<alignment, PacketType>(outer, inner);
Googler45874d82019-08-21 12:06:47 -0700187 }
188};
189
Rasmus Munk Larsen2434cfd2023-12-06 12:02:41 -0800190template <typename Func, typename Evaluator, Index Start, Index Length>
191struct redux_vec_linear_unroller {
192 template <typename PacketType>
193 EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE PacketType run(const Evaluator& eval, const Func& func) {
194 constexpr Index HalfLength = Length / 2;
Rasmus Munk Larsen923c18b2023-05-24 18:46:53 -0700195
196 return func.packetOp(
Rasmus Munk Larsen2434cfd2023-12-06 12:02:41 -0800197 redux_vec_linear_unroller<Func, Evaluator, Start, HalfLength>::template run<PacketType>(eval, func),
198 redux_vec_linear_unroller<Func, Evaluator, Start + HalfLength, Length - HalfLength>::template run<PacketType>(
199 eval, func));
Rasmus Munk Larsen923c18b2023-05-24 18:46:53 -0700200 }
201};
202
Rasmus Munk Larsen2434cfd2023-12-06 12:02:41 -0800203template <typename Func, typename Evaluator, Index Start>
204struct redux_vec_linear_unroller<Func, Evaluator, Start, 1> {
205 template <typename PacketType>
206 EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE PacketType run(const Evaluator& eval, const Func&) {
Rasmus Munk Larsen923c18b2023-05-24 18:46:53 -0700207 constexpr Index PacketSize = unpacket_traits<PacketType>::size;
208 constexpr Index index = (Start * PacketSize);
209 constexpr int alignment = Evaluator::Alignment;
Rasmus Munk Larsen2434cfd2023-12-06 12:02:41 -0800210 return eval.template packet<alignment, PacketType>(index);
Rasmus Munk Larsen923c18b2023-05-24 18:46:53 -0700211 }
212};
213
Googler45874d82019-08-21 12:06:47 -0700214/***************************************************************************
Rasmus Munk Larsen2434cfd2023-12-06 12:02:41 -0800215 * Part 3 : implementation of all cases
216 ***************************************************************************/
Googler45874d82019-08-21 12:06:47 -0700217
Rasmus Munk Larsen2434cfd2023-12-06 12:02:41 -0800218template <typename Func, typename Evaluator, int Traversal = redux_traits<Func, Evaluator>::Traversal,
219 int Unrolling = redux_traits<Func, Evaluator>::Unrolling>
Googler45874d82019-08-21 12:06:47 -0700220struct redux_impl;
221
Rasmus Munk Larsen2434cfd2023-12-06 12:02:41 -0800222template <typename Func, typename Evaluator>
223struct redux_impl<Func, Evaluator, DefaultTraversal, NoUnrolling> {
Googler45874d82019-08-21 12:06:47 -0700224 typedef typename Evaluator::Scalar Scalar;
225
Rasmus Munk Larsen2434cfd2023-12-06 12:02:41 -0800226 template <typename XprType>
227 EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Scalar run(const Evaluator& eval, const Func& func, const XprType& xpr) {
228 eigen_assert(xpr.rows() > 0 && xpr.cols() > 0 && "you are using an empty matrix");
Rasmus Munk Larsen901c1912022-09-15 07:41:15 -0700229 Scalar res = eval.coeffByOuterInner(0, 0);
Rasmus Munk Larsen2434cfd2023-12-06 12:02:41 -0800230 for (Index i = 1; i < xpr.innerSize(); ++i) res = func(res, eval.coeffByOuterInner(0, i));
231 for (Index i = 1; i < xpr.outerSize(); ++i)
232 for (Index j = 0; j < xpr.innerSize(); ++j) res = func(res, eval.coeffByOuterInner(i, j));
Googler45874d82019-08-21 12:06:47 -0700233 return res;
234 }
235};
236
Rasmus Munk Larsen2434cfd2023-12-06 12:02:41 -0800237template <typename Func, typename Evaluator>
238struct redux_impl<Func, Evaluator, LinearTraversal, NoUnrolling> {
Rasmus Munk Larsen923c18b2023-05-24 18:46:53 -0700239 typedef typename Evaluator::Scalar Scalar;
240
Rasmus Munk Larsen2434cfd2023-12-06 12:02:41 -0800241 template <typename XprType>
242 EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Scalar run(const Evaluator& eval, const Func& func, const XprType& xpr) {
243 eigen_assert(xpr.size() > 0 && "you are using an empty matrix");
Rasmus Munk Larsen923c18b2023-05-24 18:46:53 -0700244 Scalar res = eval.coeff(0);
Rasmus Munk Larsen2434cfd2023-12-06 12:02:41 -0800245 for (Index k = 1; k < xpr.size(); ++k) res = func(res, eval.coeff(k));
Rasmus Munk Larsen923c18b2023-05-24 18:46:53 -0700246 return res;
247 }
248};
249
Rasmus Munk Larsen2434cfd2023-12-06 12:02:41 -0800250template <typename Func, typename Evaluator>
251struct redux_impl<Func, Evaluator, DefaultTraversal, CompleteUnrolling>
252 : redux_novec_unroller<Func, Evaluator, 0, Evaluator::SizeAtCompileTime> {
253 typedef redux_novec_unroller<Func, Evaluator, 0, Evaluator::SizeAtCompileTime> Base;
Googler45874d82019-08-21 12:06:47 -0700254 typedef typename Evaluator::Scalar Scalar;
Rasmus Munk Larsen2434cfd2023-12-06 12:02:41 -0800255 template <typename XprType>
256 EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Scalar run(const Evaluator& eval, const Func& func,
257 const XprType& /*xpr*/) {
258 return Base::run(eval, func);
Googler45874d82019-08-21 12:06:47 -0700259 }
260};
261
Rasmus Munk Larsen2434cfd2023-12-06 12:02:41 -0800262template <typename Func, typename Evaluator>
263struct redux_impl<Func, Evaluator, LinearTraversal, CompleteUnrolling>
264 : redux_novec_linear_unroller<Func, Evaluator, 0, Evaluator::SizeAtCompileTime> {
265 typedef redux_novec_linear_unroller<Func, Evaluator, 0, Evaluator::SizeAtCompileTime> Base;
Rasmus Munk Larsen923c18b2023-05-24 18:46:53 -0700266 typedef typename Evaluator::Scalar Scalar;
Rasmus Munk Larsen2434cfd2023-12-06 12:02:41 -0800267 template <typename XprType>
268 EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Scalar run(const Evaluator& eval, const Func& func,
269 const XprType& /*xpr*/) {
270 return Base::run(eval, func);
Rasmus Munk Larsen923c18b2023-05-24 18:46:53 -0700271 }
272};
273
Rasmus Munk Larsen2434cfd2023-12-06 12:02:41 -0800274template <typename Func, typename Evaluator>
275struct redux_impl<Func, Evaluator, LinearVectorizedTraversal, NoUnrolling> {
Googler45874d82019-08-21 12:06:47 -0700276 typedef typename Evaluator::Scalar Scalar;
277 typedef typename redux_traits<Func, Evaluator>::PacketType PacketScalar;
278
Rasmus Munk Larsen2434cfd2023-12-06 12:02:41 -0800279 template <typename XprType>
280 static Scalar run(const Evaluator& eval, const Func& func, const XprType& xpr) {
Googler45874d82019-08-21 12:06:47 -0700281 const Index size = xpr.size();
Rasmus Munk Larsen2434cfd2023-12-06 12:02:41 -0800282
Rasmus Munk Larsen923c18b2023-05-24 18:46:53 -0700283 constexpr Index packetSize = redux_traits<Func, Evaluator>::PacketSize;
284 constexpr int packetAlignment = unpacket_traits<PacketScalar>::alignment;
Rasmus Munk Larsen2434cfd2023-12-06 12:02:41 -0800285 constexpr int alignment0 =
286 (bool(Evaluator::Flags & DirectAccessBit) && bool(packet_traits<Scalar>::AlignedOnScalar))
287 ? int(packetAlignment)
288 : int(Unaligned);
Rasmus Munk Larsen923c18b2023-05-24 18:46:53 -0700289 constexpr int alignment = plain_enum_max(alignment0, Evaluator::Alignment);
Googler45874d82019-08-21 12:06:47 -0700290 const Index alignedStart = internal::first_default_aligned(xpr);
Rasmus Munk Larsen2434cfd2023-12-06 12:02:41 -0800291 const Index alignedSize2 = ((size - alignedStart) / (2 * packetSize)) * (2 * packetSize);
292 const Index alignedSize = ((size - alignedStart) / (packetSize)) * (packetSize);
Googler45874d82019-08-21 12:06:47 -0700293 const Index alignedEnd2 = alignedStart + alignedSize2;
Rasmus Munk Larsen2434cfd2023-12-06 12:02:41 -0800294 const Index alignedEnd = alignedStart + alignedSize;
Googler45874d82019-08-21 12:06:47 -0700295 Scalar res;
Rasmus Munk Larsen2434cfd2023-12-06 12:02:41 -0800296 if (alignedSize) {
297 PacketScalar packet_res0 = eval.template packet<alignment, PacketScalar>(alignedStart);
298 if (alignedSize > packetSize) // we have at least two packets to partly unroll the loop
Googler45874d82019-08-21 12:06:47 -0700299 {
Rasmus Munk Larsen2434cfd2023-12-06 12:02:41 -0800300 PacketScalar packet_res1 = eval.template packet<alignment, PacketScalar>(alignedStart + packetSize);
301 for (Index index = alignedStart + 2 * packetSize; index < alignedEnd2; index += 2 * packetSize) {
302 packet_res0 = func.packetOp(packet_res0, eval.template packet<alignment, PacketScalar>(index));
303 packet_res1 = func.packetOp(packet_res1, eval.template packet<alignment, PacketScalar>(index + packetSize));
Googler45874d82019-08-21 12:06:47 -0700304 }
305
Rasmus Munk Larsen2434cfd2023-12-06 12:02:41 -0800306 packet_res0 = func.packetOp(packet_res0, packet_res1);
307 if (alignedEnd > alignedEnd2)
308 packet_res0 = func.packetOp(packet_res0, eval.template packet<alignment, PacketScalar>(alignedEnd2));
Googler45874d82019-08-21 12:06:47 -0700309 }
310 res = func.predux(packet_res0);
311
Rasmus Munk Larsen2434cfd2023-12-06 12:02:41 -0800312 for (Index index = 0; index < alignedStart; ++index) res = func(res, eval.coeff(index));
Googler45874d82019-08-21 12:06:47 -0700313
Rasmus Munk Larsen2434cfd2023-12-06 12:02:41 -0800314 for (Index index = alignedEnd; index < size; ++index) res = func(res, eval.coeff(index));
315 } else // too small to vectorize anything.
316 // since this is dynamic-size hence inefficient anyway for such small sizes, don't try to optimize.
Googler45874d82019-08-21 12:06:47 -0700317 {
318 res = eval.coeff(0);
Rasmus Munk Larsen2434cfd2023-12-06 12:02:41 -0800319 for (Index index = 1; index < size; ++index) res = func(res, eval.coeff(index));
Googler45874d82019-08-21 12:06:47 -0700320 }
321
322 return res;
323 }
324};
325
326// NOTE: for SliceVectorizedTraversal we simply bypass unrolling
Rasmus Munk Larsen2434cfd2023-12-06 12:02:41 -0800327template <typename Func, typename Evaluator, int Unrolling>
328struct redux_impl<Func, Evaluator, SliceVectorizedTraversal, Unrolling> {
Googler45874d82019-08-21 12:06:47 -0700329 typedef typename Evaluator::Scalar Scalar;
330 typedef typename redux_traits<Func, Evaluator>::PacketType PacketType;
331
Rasmus Munk Larsen2434cfd2023-12-06 12:02:41 -0800332 template <typename XprType>
333 EIGEN_DEVICE_FUNC static Scalar run(const Evaluator& eval, const Func& func, const XprType& xpr) {
334 eigen_assert(xpr.rows() > 0 && xpr.cols() > 0 && "you are using an empty matrix");
Rasmus Munk Larsen923c18b2023-05-24 18:46:53 -0700335 constexpr Index packetSize = redux_traits<Func, Evaluator>::PacketSize;
Googler45874d82019-08-21 12:06:47 -0700336 const Index innerSize = xpr.innerSize();
337 const Index outerSize = xpr.outerSize();
Rasmus Munk Larsen2434cfd2023-12-06 12:02:41 -0800338 const Index packetedInnerSize = ((innerSize) / packetSize) * packetSize;
Googler45874d82019-08-21 12:06:47 -0700339 Scalar res;
Rasmus Munk Larsen2434cfd2023-12-06 12:02:41 -0800340 if (packetedInnerSize) {
341 PacketType packet_res = eval.template packet<Unaligned, PacketType>(0, 0);
342 for (Index j = 0; j < outerSize; ++j)
343 for (Index i = (j == 0 ? packetSize : 0); i < packetedInnerSize; i += Index(packetSize))
344 packet_res = func.packetOp(packet_res, eval.template packetByOuterInner<Unaligned, PacketType>(j, i));
Googler45874d82019-08-21 12:06:47 -0700345
346 res = func.predux(packet_res);
Rasmus Munk Larsen2434cfd2023-12-06 12:02:41 -0800347 for (Index j = 0; j < outerSize; ++j)
348 for (Index i = packetedInnerSize; i < innerSize; ++i) res = func(res, eval.coeffByOuterInner(j, i));
349 } else // too small to vectorize anything.
350 // since this is dynamic-size hence inefficient anyway for such small sizes, don't try to optimize.
Googler45874d82019-08-21 12:06:47 -0700351 {
352 res = redux_impl<Func, Evaluator, DefaultTraversal, NoUnrolling>::run(eval, func, xpr);
353 }
354
355 return res;
356 }
357};
358
Rasmus Munk Larsen2434cfd2023-12-06 12:02:41 -0800359template <typename Func, typename Evaluator>
360struct redux_impl<Func, Evaluator, LinearVectorizedTraversal, CompleteUnrolling> {
Googler45874d82019-08-21 12:06:47 -0700361 typedef typename Evaluator::Scalar Scalar;
362
363 typedef typename redux_traits<Func, Evaluator>::PacketType PacketType;
Rasmus Munk Larsen923c18b2023-05-24 18:46:53 -0700364 static constexpr Index PacketSize = redux_traits<Func, Evaluator>::PacketSize;
Rasmus Munk Larsen2434cfd2023-12-06 12:02:41 -0800365 static constexpr Index Size = Evaluator::SizeAtCompileTime;
366 static constexpr Index VectorizedSize = (int(Size) / int(PacketSize)) * int(PacketSize);
Googler45874d82019-08-21 12:06:47 -0700367
Rasmus Munk Larsen2434cfd2023-12-06 12:02:41 -0800368 template <typename XprType>
369 EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Scalar run(const Evaluator& eval, const Func& func, const XprType& xpr) {
Googler45874d82019-08-21 12:06:47 -0700370 EIGEN_ONLY_USED_FOR_DEBUG(xpr)
Rasmus Munk Larsen2434cfd2023-12-06 12:02:41 -0800371 eigen_assert(xpr.rows() > 0 && xpr.cols() > 0 && "you are using an empty matrix");
Googler45874d82019-08-21 12:06:47 -0700372 if (VectorizedSize > 0) {
Rasmus Munk Larsen2434cfd2023-12-06 12:02:41 -0800373 Scalar res = func.predux(
374 redux_vec_linear_unroller<Func, Evaluator, 0, Size / PacketSize>::template run<PacketType>(eval, func));
Googler45874d82019-08-21 12:06:47 -0700375 if (VectorizedSize != Size)
Rasmus Munk Larsen2434cfd2023-12-06 12:02:41 -0800376 res = func(
377 res, redux_novec_linear_unroller<Func, Evaluator, VectorizedSize, Size - VectorizedSize>::run(eval, func));
Googler45874d82019-08-21 12:06:47 -0700378 return res;
Rasmus Munk Larsen2434cfd2023-12-06 12:02:41 -0800379 } else {
380 return redux_novec_linear_unroller<Func, Evaluator, 0, Size>::run(eval, func);
Googler45874d82019-08-21 12:06:47 -0700381 }
382 }
383};
384
385// evaluator adaptor
Rasmus Munk Larsen2434cfd2023-12-06 12:02:41 -0800386template <typename XprType_>
387class redux_evaluator : public internal::evaluator<XprType_> {
C. Antonio Sanchezf74f79a2021-09-09 16:19:31 -0700388 typedef internal::evaluator<XprType_> Base;
Rasmus Munk Larsen2434cfd2023-12-06 12:02:41 -0800389
390 public:
C. Antonio Sanchezf74f79a2021-09-09 16:19:31 -0700391 typedef XprType_ XprType;
Rasmus Munk Larsen2434cfd2023-12-06 12:02:41 -0800392 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE explicit redux_evaluator(const XprType& xpr) : Base(xpr) {}
393
Googler45874d82019-08-21 12:06:47 -0700394 typedef typename XprType::Scalar Scalar;
395 typedef typename XprType::CoeffReturnType CoeffReturnType;
396 typedef typename XprType::PacketScalar PacketScalar;
Rasmus Munk Larsen2434cfd2023-12-06 12:02:41 -0800397
Googler45874d82019-08-21 12:06:47 -0700398 enum {
399 MaxRowsAtCompileTime = XprType::MaxRowsAtCompileTime,
400 MaxColsAtCompileTime = XprType::MaxColsAtCompileTime,
Rasmus Munk Larsen2434cfd2023-12-06 12:02:41 -0800401 // TODO we should not remove DirectAccessBit and rather find an elegant way to query the alignment offset at runtime
402 // from the evaluator
Googler45874d82019-08-21 12:06:47 -0700403 Flags = Base::Flags & ~DirectAccessBit,
404 IsRowMajor = XprType::IsRowMajor,
405 SizeAtCompileTime = XprType::SizeAtCompileTime,
406 InnerSizeAtCompileTime = XprType::InnerSizeAtCompileTime
407 };
Rasmus Munk Larsen2434cfd2023-12-06 12:02:41 -0800408
409 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeffByOuterInner(Index outer, Index inner) const {
410 return Base::coeff(IsRowMajor ? outer : inner, IsRowMajor ? inner : outer);
411 }
412
413 template <int LoadMode, typename PacketType>
414 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketType packetByOuterInner(Index outer, Index inner) const {
415 return Base::template packet<LoadMode, PacketType>(IsRowMajor ? outer : inner, IsRowMajor ? inner : outer);
416 }
Googler45874d82019-08-21 12:06:47 -0700417};
418
Rasmus Munk Larsen2434cfd2023-12-06 12:02:41 -0800419} // end namespace internal
Googler45874d82019-08-21 12:06:47 -0700420
421/***************************************************************************
Rasmus Munk Larsen2434cfd2023-12-06 12:02:41 -0800422 * Part 4 : public API
423 ***************************************************************************/
Googler45874d82019-08-21 12:06:47 -0700424
425/** \returns the result of a full redux operation on the whole matrix or vector using \a func
Rasmus Munk Larsen2434cfd2023-12-06 12:02:41 -0800426 *
427 * The template parameter \a BinaryOp is the type of the functor \a func which must be
428 * an associative operator. Both current C++98 and C++11 functor styles are handled.
429 *
430 * \warning the matrix must be not empty, otherwise an assertion is triggered.
431 *
432 * \sa DenseBase::sum(), DenseBase::minCoeff(), DenseBase::maxCoeff(), MatrixBase::colwise(), MatrixBase::rowwise()
433 */
434template <typename Derived>
435template <typename Func>
436EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar DenseBase<Derived>::redux(
437 const Func& func) const {
438 eigen_assert(this->rows() > 0 && this->cols() > 0 && "you are using an empty matrix");
Googler45874d82019-08-21 12:06:47 -0700439
440 typedef typename internal::redux_evaluator<Derived> ThisEvaluator;
441 ThisEvaluator thisEval(derived());
442
443 // The initial expression is passed to the reducer as an additional argument instead of
Rasmus Munk Larsen2434cfd2023-12-06 12:02:41 -0800444 // passing it as a member of redux_evaluator to help
Googler45874d82019-08-21 12:06:47 -0700445 return internal::redux_impl<Func, ThisEvaluator>::run(thisEval, func, derived());
446}
447
448/** \returns the minimum of all coefficients of \c *this.
Rasmus Munk Larsen2434cfd2023-12-06 12:02:41 -0800449 * In case \c *this contains NaN, NaNPropagation determines the behavior:
450 * NaNPropagation == PropagateFast : undefined
451 * NaNPropagation == PropagateNaN : result is NaN
452 * NaNPropagation == PropagateNumbers : result is minimum of elements that are not NaN
453 * \warning the matrix must be not empty, otherwise an assertion is triggered.
454 */
455template <typename Derived>
456template <int NaNPropagation>
457EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar DenseBase<Derived>::minCoeff() const {
458 return derived().redux(Eigen::internal::scalar_min_op<Scalar, Scalar, NaNPropagation>());
Googler45874d82019-08-21 12:06:47 -0700459}
460
Rasmus Munk Larsen2434cfd2023-12-06 12:02:41 -0800461/** \returns the maximum of all coefficients of \c *this.
462 * In case \c *this contains NaN, NaNPropagation determines the behavior:
463 * NaNPropagation == PropagateFast : undefined
464 * NaNPropagation == PropagateNaN : result is NaN
465 * NaNPropagation == PropagateNumbers : result is maximum of elements that are not NaN
466 * \warning the matrix must be not empty, otherwise an assertion is triggered.
467 */
468template <typename Derived>
469template <int NaNPropagation>
470EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar DenseBase<Derived>::maxCoeff() const {
471 return derived().redux(Eigen::internal::scalar_max_op<Scalar, Scalar, NaNPropagation>());
Googler45874d82019-08-21 12:06:47 -0700472}
473
474/** \returns the sum of all coefficients of \c *this
Rasmus Munk Larsen2434cfd2023-12-06 12:02:41 -0800475 *
476 * If \c *this is empty, then the value 0 is returned.
477 *
478 * \sa trace(), prod(), mean()
479 */
480template <typename Derived>
481EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar DenseBase<Derived>::sum() const {
482 if (SizeAtCompileTime == 0 || (SizeAtCompileTime == Dynamic && size() == 0)) return Scalar(0);
483 return derived().redux(Eigen::internal::scalar_sum_op<Scalar, Scalar>());
Googler45874d82019-08-21 12:06:47 -0700484}
485
486/** \returns the mean of all coefficients of *this
Rasmus Munk Larsen2434cfd2023-12-06 12:02:41 -0800487 *
488 * \sa trace(), prod(), sum()
489 */
490template <typename Derived>
491EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar DenseBase<Derived>::mean() const {
Googler45874d82019-08-21 12:06:47 -0700492#ifdef __INTEL_COMPILER
Rasmus Munk Larsen2434cfd2023-12-06 12:02:41 -0800493#pragma warning push
494#pragma warning(disable : 2259)
Googler45874d82019-08-21 12:06:47 -0700495#endif
Rasmus Munk Larsen2434cfd2023-12-06 12:02:41 -0800496 return Scalar(derived().redux(Eigen::internal::scalar_sum_op<Scalar, Scalar>())) / Scalar(this->size());
Googler45874d82019-08-21 12:06:47 -0700497#ifdef __INTEL_COMPILER
Rasmus Munk Larsen2434cfd2023-12-06 12:02:41 -0800498#pragma warning pop
Googler45874d82019-08-21 12:06:47 -0700499#endif
500}
501
502/** \returns the product of all coefficients of *this
Rasmus Munk Larsen2434cfd2023-12-06 12:02:41 -0800503 *
504 * Example: \include MatrixBase_prod.cpp
505 * Output: \verbinclude MatrixBase_prod.out
506 *
507 * \sa sum(), mean(), trace()
508 */
509template <typename Derived>
510EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar DenseBase<Derived>::prod() const {
511 if (SizeAtCompileTime == 0 || (SizeAtCompileTime == Dynamic && size() == 0)) return Scalar(1);
Googler45874d82019-08-21 12:06:47 -0700512 return derived().redux(Eigen::internal::scalar_product_op<Scalar>());
513}
514
515/** \returns the trace of \c *this, i.e. the sum of the coefficients on the main diagonal.
Rasmus Munk Larsen2434cfd2023-12-06 12:02:41 -0800516 *
517 * \c *this can be any matrix, not necessarily square.
518 *
519 * \sa diagonal(), sum()
520 */
521template <typename Derived>
522EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar MatrixBase<Derived>::trace() const {
Googler45874d82019-08-21 12:06:47 -0700523 return derived().diagonal().sum();
524}
525
Rasmus Munk Larsen2434cfd2023-12-06 12:02:41 -0800526} // end namespace Eigen
Googler45874d82019-08-21 12:06:47 -0700527
Rasmus Munk Larsen2434cfd2023-12-06 12:02:41 -0800528#endif // EIGEN_REDUX_H