blob: 70cfa08d95fab92bd4090b4fb0629784058cbc16 [file] [log] [blame]
/****************************************************************************
**
** Copyright (C) 2016 The Qt Company Ltd.
** Contact: https://www.qt.io/licensing/
**
** This file is part of the QtGui module of the Qt Toolkit.
**
** $QT_BEGIN_LICENSE:LGPL$
** Commercial License Usage
** Licensees holding valid commercial Qt licenses may use this file in
** accordance with the commercial license agreement provided with the
** Software or, alternatively, in accordance with the terms contained in
** a written agreement between you and The Qt Company. For licensing terms
** and conditions see https://www.qt.io/terms-conditions. For further
** information use the contact form at https://www.qt.io/contact-us.
**
** GNU Lesser General Public License Usage
** Alternatively, this file may be used under the terms of the GNU Lesser
** General Public License version 3 as published by the Free Software
** Foundation and appearing in the file LICENSE.LGPL3 included in the
** packaging of this file. Please review the following information to
** ensure the GNU Lesser General Public License version 3 requirements
** will be met: https://www.gnu.org/licenses/lgpl-3.0.html.
**
** GNU General Public License Usage
** Alternatively, this file may be used under the terms of the GNU
** General Public License version 2.0 or (at your option) the GNU General
** Public license version 3 or any later version approved by the KDE Free
** Qt Foundation. The licenses are as published by the Free Software
** Foundation and appearing in the file LICENSE.GPL2 and LICENSE.GPL3
** included in the packaging of this file. Please review the following
** information to ensure the GNU General Public License requirements will
** be met: https://www.gnu.org/licenses/gpl-2.0.html and
** https://www.gnu.org/licenses/gpl-3.0.html.
**
** $QT_END_LICENSE$
**
****************************************************************************/
#include "qimagescale_p.h"
#include "qimage.h"
#include <private/qdrawhelper_x86_p.h>
#include <private/qsimd_p.h>
#if QT_CONFIG(thread) && !defined(Q_OS_WASM)
#include "qsemaphore.h"
#include "qthreadpool.h"
#endif
#if defined(QT_COMPILER_SUPPORTS_SSE4_1)
QT_BEGIN_NAMESPACE
using namespace QImageScale;
template<typename T>
static inline void multithread_pixels_function(QImageScaleInfo *isi, int dh, const T &scaleSection)
{
#if QT_CONFIG(thread) && !defined(Q_OS_WASM)
int segments = (qsizetype(isi->sh) * isi->sw) / (1<<16);
segments = std::min(segments, dh);
QThreadPool *threadPool = QThreadPool::globalInstance();
if (segments > 1 && !threadPool->contains(QThread::currentThread())) {
QSemaphore semaphore;
int y = 0;
for (int i = 0; i < segments; ++i) {
int yn = (dh - y) / (segments - i);
threadPool->start([&, y, yn]() {
scaleSection(y, y + yn);
semaphore.release(1);
});
y += yn;
}
semaphore.acquire(segments);
return;
}
#endif
scaleSection(0, dh);
}
inline static __m128i Q_DECL_VECTORCALL
qt_qimageScaleAARGBA_helper(const unsigned int *pix, int xyap, int Cxy, int step, const __m128i vxyap, const __m128i vCxy)
{
__m128i vpix = _mm_cvtepu8_epi32(_mm_cvtsi32_si128(*pix));
__m128i vx = _mm_mullo_epi32(vpix, vxyap);
int i;
for (i = (1 << 14) - xyap; i > Cxy; i -= Cxy) {
pix += step;
vpix = _mm_cvtepu8_epi32(_mm_cvtsi32_si128(*pix));
vx = _mm_add_epi32(vx, _mm_mullo_epi32(vpix, vCxy));
}
pix += step;
vpix = _mm_cvtepu8_epi32(_mm_cvtsi32_si128(*pix));
vx = _mm_add_epi32(vx, _mm_mullo_epi32(vpix, _mm_set1_epi32(i)));
return vx;
}
template<bool RGB>
void qt_qimageScaleAARGBA_up_x_down_y_sse4(QImageScaleInfo *isi, unsigned int *dest,
int dw, int dh, int dow, int sow)
{
const unsigned int **ypoints = isi->ypoints;
const int *xpoints = isi->xpoints;
const int *xapoints = isi->xapoints;
const int *yapoints = isi->yapoints;
const __m128i v256 = _mm_set1_epi32(256);
/* go through every scanline in the output buffer */
auto scaleSection = [&] (int yStart, int yEnd) {
for (int y = yStart; y < yEnd; ++y) {
const int Cy = yapoints[y] >> 16;
const int yap = yapoints[y] & 0xffff;
const __m128i vCy = _mm_set1_epi32(Cy);
const __m128i vyap = _mm_set1_epi32(yap);
unsigned int *dptr = dest + (y * dow);
for (int x = 0; x < dw; x++) {
const unsigned int *sptr = ypoints[y] + xpoints[x];
__m128i vx = qt_qimageScaleAARGBA_helper(sptr, yap, Cy, sow, vyap, vCy);
const int xap = xapoints[x];
if (xap > 0) {
const __m128i vxap = _mm_set1_epi32(xap);
const __m128i vinvxap = _mm_sub_epi32(v256, vxap);
__m128i vr = qt_qimageScaleAARGBA_helper(sptr + 1, yap, Cy, sow, vyap, vCy);
vx = _mm_mullo_epi32(vx, vinvxap);
vr = _mm_mullo_epi32(vr, vxap);
vx = _mm_add_epi32(vx, vr);
vx = _mm_srli_epi32(vx, 8);
}
vx = _mm_srli_epi32(vx, 14);
vx = _mm_packus_epi32(vx, vx);
vx = _mm_packus_epi16(vx, vx);
*dptr = _mm_cvtsi128_si32(vx);
if (RGB)
*dptr |= 0xff000000;
dptr++;
}
}
};
multithread_pixels_function(isi, dh, scaleSection);
}
template<bool RGB>
void qt_qimageScaleAARGBA_down_x_up_y_sse4(QImageScaleInfo *isi, unsigned int *dest,
int dw, int dh, int dow, int sow)
{
const unsigned int **ypoints = isi->ypoints;
int *xpoints = isi->xpoints;
int *xapoints = isi->xapoints;
int *yapoints = isi->yapoints;
const __m128i v256 = _mm_set1_epi32(256);
/* go through every scanline in the output buffer */
auto scaleSection = [&] (int yStart, int yEnd) {
for (int y = yStart; y < yEnd; ++y) {
unsigned int *dptr = dest + (y * dow);
for (int x = 0; x < dw; x++) {
int Cx = xapoints[x] >> 16;
int xap = xapoints[x] & 0xffff;
const __m128i vCx = _mm_set1_epi32(Cx);
const __m128i vxap = _mm_set1_epi32(xap);
const unsigned int *sptr = ypoints[y] + xpoints[x];
__m128i vx = qt_qimageScaleAARGBA_helper(sptr, xap, Cx, 1, vxap, vCx);
int yap = yapoints[y];
if (yap > 0) {
const __m128i vyap = _mm_set1_epi32(yap);
const __m128i vinvyap = _mm_sub_epi32(v256, vyap);
__m128i vr = qt_qimageScaleAARGBA_helper(sptr + sow, xap, Cx, 1, vxap, vCx);
vx = _mm_mullo_epi32(vx, vinvyap);
vr = _mm_mullo_epi32(vr, vyap);
vx = _mm_add_epi32(vx, vr);
vx = _mm_srli_epi32(vx, 8);
}
vx = _mm_srli_epi32(vx, 14);
vx = _mm_packus_epi32(vx, vx);
vx = _mm_packus_epi16(vx, vx);
*dptr = _mm_cvtsi128_si32(vx);
if (RGB)
*dptr |= 0xff000000;
dptr++;
}
}
};
multithread_pixels_function(isi, dh, scaleSection);
}
template<bool RGB>
void qt_qimageScaleAARGBA_down_xy_sse4(QImageScaleInfo *isi, unsigned int *dest,
int dw, int dh, int dow, int sow)
{
const unsigned int **ypoints = isi->ypoints;
int *xpoints = isi->xpoints;
int *xapoints = isi->xapoints;
int *yapoints = isi->yapoints;
auto scaleSection = [&] (int yStart, int yEnd) {
for (int y = yStart; y < yEnd; ++y) {
int Cy = yapoints[y] >> 16;
int yap = yapoints[y] & 0xffff;
const __m128i vCy = _mm_set1_epi32(Cy);
const __m128i vyap = _mm_set1_epi32(yap);
unsigned int *dptr = dest + (y * dow);
for (int x = 0; x < dw; x++) {
const int Cx = xapoints[x] >> 16;
const int xap = xapoints[x] & 0xffff;
const __m128i vCx = _mm_set1_epi32(Cx);
const __m128i vxap = _mm_set1_epi32(xap);
const unsigned int *sptr = ypoints[y] + xpoints[x];
__m128i vx = qt_qimageScaleAARGBA_helper(sptr, xap, Cx, 1, vxap, vCx);
__m128i vr = _mm_mullo_epi32(_mm_srli_epi32(vx, 4), vyap);
int j;
for (j = (1 << 14) - yap; j > Cy; j -= Cy) {
sptr += sow;
vx = qt_qimageScaleAARGBA_helper(sptr, xap, Cx, 1, vxap, vCx);
vr = _mm_add_epi32(vr, _mm_mullo_epi32(_mm_srli_epi32(vx, 4), vCy));
}
sptr += sow;
vx = qt_qimageScaleAARGBA_helper(sptr, xap, Cx, 1, vxap, vCx);
vr = _mm_add_epi32(vr, _mm_mullo_epi32(_mm_srli_epi32(vx, 4), _mm_set1_epi32(j)));
vr = _mm_srli_epi32(vr, 24);
vr = _mm_packus_epi32(vr, _mm_setzero_si128());
vr = _mm_packus_epi16(vr, _mm_setzero_si128());
*dptr = _mm_cvtsi128_si32(vr);
if (RGB)
*dptr |= 0xff000000;
dptr++;
}
}
};
multithread_pixels_function(isi, dh, scaleSection);
}
template void qt_qimageScaleAARGBA_up_x_down_y_sse4<false>(QImageScaleInfo *isi, unsigned int *dest,
int dw, int dh, int dow, int sow);
template void qt_qimageScaleAARGBA_up_x_down_y_sse4<true>(QImageScaleInfo *isi, unsigned int *dest,
int dw, int dh, int dow, int sow);
template void qt_qimageScaleAARGBA_down_x_up_y_sse4<false>(QImageScaleInfo *isi, unsigned int *dest,
int dw, int dh, int dow, int sow);
template void qt_qimageScaleAARGBA_down_x_up_y_sse4<true>(QImageScaleInfo *isi, unsigned int *dest,
int dw, int dh, int dow, int sow);
template void qt_qimageScaleAARGBA_down_xy_sse4<false>(QImageScaleInfo *isi, unsigned int *dest,
int dw, int dh, int dow, int sow);
template void qt_qimageScaleAARGBA_down_xy_sse4<true>(QImageScaleInfo *isi, unsigned int *dest,
int dw, int dh, int dow, int sow);
QT_END_NAMESPACE
#endif