qt-everywhere-src-5.14.1/qtbase/src/corelib/tools/qsimd_p.h - orbit - Git at Google

 /****************************************************************************
 **
 ** Copyright (C) 2016 The Qt Company Ltd.
 ** Copyright (C) 2018 Intel Corporation.
 ** Contact: https://www.qt.io/licensing/
 **
 ** This file is part of the QtCore module of the Qt Toolkit.
 **
 ** $QT_BEGIN_LICENSE:LGPL$
 ** Commercial License Usage
 ** Licensees holding valid commercial Qt licenses may use this file in
 ** accordance with the commercial license agreement provided with the
 ** Software or, alternatively, in accordance with the terms contained in
 ** a written agreement between you and The Qt Company. For licensing terms
 ** and conditions see https://www.qt.io/terms-conditions. For further
 ** information use the contact form at https://www.qt.io/contact-us.
 **
 ** GNU Lesser General Public License Usage
 ** Alternatively, this file may be used under the terms of the GNU Lesser
 ** General Public License version 3 as published by the Free Software
 ** Foundation and appearing in the file LICENSE.LGPL3 included in the
 ** packaging of this file. Please review the following information to
 ** ensure the GNU Lesser General Public License version 3 requirements
 ** will be met: https://www.gnu.org/licenses/lgpl-3.0.html.
 **
 ** GNU General Public License Usage
 ** Alternatively, this file may be used under the terms of the GNU
 ** General Public License version 2.0 or (at your option) the GNU General
 ** Public license version 3 or any later version approved by the KDE Free
 ** Qt Foundation. The licenses are as published by the Free Software
 ** Foundation and appearing in the file LICENSE.GPL2 and LICENSE.GPL3
 ** included in the packaging of this file. Please review the following
 ** information to ensure the GNU General Public License requirements will
 ** be met: https://www.gnu.org/licenses/gpl-2.0.html and
 ** https://www.gnu.org/licenses/gpl-3.0.html.
 **
 ** $QT_END_LICENSE$
 **
 ****************************************************************************/

 #ifndef QSIMD_P_H
 #define QSIMD_P_H

 //
 //  W A R N I N G
 //  -------------
 //
 // This file is not part of the Qt API.  It exists purely as an
 // implementation detail.  This header file may change from version to
 // version without notice, or even be removed.
 //
 // We mean it.
 //

 #include <QtCore/private/qglobal_p.h>

 /*
  * qt_module_config.prf defines the QT_COMPILER_SUPPORTS_XXX macros.
  * They mean the compiler supports the necessary flags and the headers
  * for the x86 and ARM intrinsics:
  *  - GCC: the -mXXX or march=YYY flag is necessary before #include
  *    up to 4.8; GCC >= 4.9 can include unconditionally
  *  - Intel CC: #include can happen unconditionally
  *  - MSVC: #include can happen unconditionally
  *  - RVCT: ???
  *
  * We will try to include all headers possible under this configuration.
  *
  * MSVC does not define __SSE2__ & family, so we will define them. MSVC 2013 &
  * up do define __AVX__ if the -arch:AVX option is passed on the command-line.
  *
  * Supported XXX are:
  *   Flag    | Arch |  GCC  | Intel CC |  MSVC  |
  *  ARM_NEON | ARM  | I & C | None     |   ?    |
  *  SSE2     | x86  | I & C | I & C    | I & C  |
  *  SSE3     | x86  | I & C | I & C    | I only |
  *  SSSE3    | x86  | I & C | I & C    | I only |
  *  SSE4_1   | x86  | I & C | I & C    | I only |
  *  SSE4_2   | x86  | I & C | I & C    | I only |
  *  AVX      | x86  | I & C | I & C    | I & C  |
  *  AVX2     | x86  | I & C | I & C    | I only |
  *  AVX512xx | x86  | I & C | I & C    | I only |
  * I = intrinsics; C = code generation
  *
  * Code can use the following constructs to determine compiler support & status:
  * - #ifdef __XXX__      (e.g: #ifdef __AVX__  or #ifdef __ARM_NEON__)
  *   If this test passes, then the compiler is already generating code for that
  *   given sub-architecture. The intrinsics for that sub-architecture are
  *   #included and can be used without restriction or runtime check.
  *
  * - #if QT_COMPILER_SUPPORTS(XXX)
  *   If this test passes, then the compiler is able to generate code for that
  *   given sub-architecture in another translation unit, given the right set of
  *   flags. Use of the intrinsics is not guaranteed. This is useful with
  *   runtime detection (see below).
  *
  * - #if QT_COMPILER_SUPPORTS_HERE(XXX)
  *   If this test passes, then the compiler is able to generate code for that
  *   given sub-architecture in this translation unit, even if it is not doing
  *   that now (it might be). Individual functions may be tagged with
  *   QT_FUNCTION_TARGET(XXX) to cause the compiler to generate code for that
  *   sub-arch. Only inside such functions is the use of the intrisics
  *   guaranteed to work. This is useful with runtime detection (see below).
  *
  * Runtime detection of a CPU sub-architecture can be done with the
  * qCpuHasFeature(XXX) function. There are two strategies for generating
  * optimized code like that:
  *
  * 1) place the optimized code in a different translation unit (C or assembly
  * sources) and pass the correct flags to the compiler to enable support. Those
  * sources must not include qglobal.h, which means they cannot include this
  * file either. The dispatcher function would look like this:
  *
  *      void foo()
  *      {
  *      #if QT_COMPILER_SUPPORTS(XXX)
  *          if (qCpuHasFeature(XXX)) {
  *              foo_optimized_xxx();
  *              return;
  *          }
  *      #endif
  *          foo_plain();
  *      }
  *
  * 2) place the optimized code in a function tagged with QT_FUNCTION_TARGET and
  * surrounded by #if QT_COMPILER_SUPPORTS_HERE(XXX). That code can freely use
  * other Qt code. The dispatcher function would look like this:
  *
  *      void foo()
  *      {
  *      #if QT_COMPILER_SUPPORTS_HERE(XXX)
  *          if (qCpuHasFeature(XXX)) {
  *              foo_optimized_xxx();
  *              return;
  *          }
  *      #endif
  *          foo_plain();
  *      }
  */

 #if defined(__MINGW64_VERSION_MAJOR) || defined(Q_CC_MSVC)
 #include <intrin.h>
 #endif

 #define QT_COMPILER_SUPPORTS(x)     (QT_COMPILER_SUPPORTS_ ## x - 0)

 #if defined(Q_PROCESSOR_ARM)
 #  define QT_COMPILER_SUPPORTS_HERE(x)    (__ARM_FEATURE_ ## x)
 #  if defined(Q_CC_GNU) && !defined(Q_CC_INTEL) && Q_CC_GNU >= 600
      /* GCC requires attributes for a function */
 #    define QT_FUNCTION_TARGET(x)  __attribute__((__target__(QT_FUNCTION_TARGET_STRING_ ## x)))
 #  else
 #    define QT_FUNCTION_TARGET(x)
 #  endif
 #  if !defined(__ARM_FEATURE_NEON) && defined(__ARM_NEON__)
 #    define __ARM_FEATURE_NEON           // also support QT_COMPILER_SUPPORTS_HERE(NEON)
 #  endif
 #elif defined(Q_PROCESSOR_MIPS)
 #  define QT_COMPILER_SUPPORTS_HERE(x)    (__ ## x ## __)
 #  define QT_FUNCTION_TARGET(x)
 #  if !defined(__MIPS_DSP__) && defined(__mips_dsp) && defined(Q_PROCESSOR_MIPS_32)
 #    define __MIPS_DSP__
 #  endif
 #  if !defined(__MIPS_DSPR2__) && defined(__mips_dspr2) && defined(Q_PROCESSOR_MIPS_32)
 #    define __MIPS_DSPR2__
 #  endif
 #elif defined(Q_PROCESSOR_X86) && defined(QT_COMPILER_SUPPORTS_SIMD_ALWAYS)
 #  define QT_COMPILER_SUPPORTS_HERE(x)    ((__ ## x ## __) || QT_COMPILER_SUPPORTS(x))
 #  if defined(Q_CC_GNU) && !defined(Q_CC_INTEL)
      /* GCC requires attributes for a function */
 #    define QT_FUNCTION_TARGET(x)  __attribute__((__target__(QT_FUNCTION_TARGET_STRING_ ## x)))
 #  else
 #    define QT_FUNCTION_TARGET(x)
 #  endif
 #else
 #  define QT_COMPILER_SUPPORTS_HERE(x)    (__ ## x ## __)
 #  define QT_FUNCTION_TARGET(x)
 #endif

 #ifdef Q_PROCESSOR_X86
 /* -- x86 intrinsic support -- */

 #  if defined(Q_CC_MSVC) && (defined(_M_X64) || _M_IX86_FP >= 2)
 // MSVC doesn't define __SSE2__, so do it ourselves
 #    define __SSE__                         1
 #    define __SSE2__                        1
 #  endif

 #  ifdef __SSE2__
 // #include the intrinsics
 #    include <immintrin.h>
 #  endif

 #  if defined(Q_CC_GNU) && !defined(Q_CC_INTEL)
 // GCC 4.4 and Clang 2.8 added a few more intrinsics there
 #    include <x86intrin.h>
 #  endif

 #  if defined(Q_CC_MSVC) && (defined(_M_AVX) || defined(__AVX__))
 // Visual Studio defines __AVX__ when /arch:AVX is passed, but not the earlier macros
 // See: https://msdn.microsoft.com/en-us/library/b0084kay.aspx
 #    define __SSE3__                        1
 #    define __SSSE3__                       1
 // no Intel CPU supports SSE4a, so don't define it
 #    define __SSE4_1__                      1
 #    define __SSE4_2__                      1
 #    ifndef __AVX__
 #      define __AVX__                       1
 #    endif
 #  endif

 #  if defined(__SSE4_2__) && defined(QT_COMPILER_SUPPORTS_SIMD_ALWAYS) && (defined(Q_CC_INTEL) || defined(Q_CC_MSVC))
 // POPCNT instructions:
 // All processors that support SSE4.2 support POPCNT
 // (but neither MSVC nor the Intel compiler define this macro)
 #    define __POPCNT__                      1
 #  endif

 // AVX intrinsics
 #  if defined(__AVX__) && defined(QT_COMPILER_SUPPORTS_SIMD_ALWAYS) && (defined(Q_CC_INTEL) || defined(Q_CC_MSVC))
 // AES, PCLMULQDQ instructions:
 // All processors that support AVX support PCLMULQDQ
 // (but neither MSVC nor the Intel compiler define this macro)
 #    define __PCLMUL__                      1
 #  endif

 #  if defined(__AVX2__) && defined(QT_COMPILER_SUPPORTS_SIMD_ALWAYS) && (defined(Q_CC_INTEL) || defined(Q_CC_MSVC))
 // F16C & RDRAND instructions:
 // All processors that support AVX2 support F16C & RDRAND:
 // (but neither MSVC nor the Intel compiler define these macros)
 #    define __F16C__                        1
 #    define __RDRND__                       1
 #  endif

 #  if defined(__BMI__) && !defined(__BMI2__) && defined(Q_CC_INTEL)
 // BMI2 instructions:
 // All processors that support BMI support BMI2 (and AVX2)
 // (but neither MSVC nor the Intel compiler define this macro)
 #    define __BMI2__                        1
 #  endif

 #  include "qsimd_x86_p.h"

 // Haswell sub-architecture
 //
 // The Intel Core 4th generation was codenamed "Haswell" and introduced AVX2,
 // BMI1, BMI2, FMA, LZCNT, MOVBE, which makes it a good divider for a
 // sub-target for us. The first AMD processor with AVX2 support (Zen) has the
 // same features.
 //
 // macOS's fat binaries support the "x86_64h" sub-architecture and the GNU libc
 // ELF loader also supports a "haswell/" subdir (e.g., /usr/lib/haswell).
 #  define QT_FUNCTION_TARGET_STRING_ARCH_HASWELL    "arch=haswell"
 #  if defined(__AVX2__) && defined(__BMI__) && defined(__BMI2__) && defined(__F16C__) && \
     defined(__FMA__) && defined(__LZCNT__) && defined(__RDRND__)
 #    define __haswell__       1
 #  endif

 // This constant does not include all CPU features found in a Haswell, only
 // those that we'd have optimized code for.
 // Note: must use Q_CONSTEXPR here, as this file may be compiled in C mode.
 QT_BEGIN_NAMESPACE
 static const quint64 CpuFeatureArchHaswell    = 0
         | CpuFeatureSSE2
         | CpuFeatureSSE3
         | CpuFeatureSSSE3
         | CpuFeatureSSE4_1
         | CpuFeatureSSE4_2
         | CpuFeatureFMA
         | CpuFeaturePOPCNT
         | CpuFeatureAVX
         | CpuFeatureF16C
         | CpuFeatureAVX2
         | CpuFeatureBMI
         | CpuFeatureBMI2;
 QT_END_NAMESPACE

 #endif  /* Q_PROCESSOR_X86 */

 // Clang compiler fix, see http://lists.llvm.org/pipermail/cfe-commits/Week-of-Mon-20160222/151168.html
 // This should be tweaked with an "upper version" of clang once we know which release fixes the
 // issue. At that point we can rely on __ARM_FEATURE_CRC32 again.
 #if defined(Q_CC_CLANG) && defined(Q_OS_DARWIN) && defined (__ARM_FEATURE_CRC32)
 #  undef __ARM_FEATURE_CRC32
 #endif

 // NEON intrinsics
 // note: as of GCC 4.9, does not support function targets for ARM
 #if defined(__ARM_NEON) || defined(__ARM_NEON__)
 #include <arm_neon.h>
 #define QT_FUNCTION_TARGET_STRING_NEON      "+neon" // unused: gcc doesn't support function targets on non-aarch64, and on Aarch64 NEON is always available.
 #ifndef __ARM_NEON__
 // __ARM_NEON__ is not defined on AArch64, but we need it in our NEON detection.
 #define __ARM_NEON__
 #endif
 #endif
 // AArch64/ARM64
 #if defined(Q_PROCESSOR_ARM_V8) && defined(__ARM_FEATURE_CRC32)
 #if defined(Q_PROCESSOR_ARM_64)
 // only available on aarch64
 #define QT_FUNCTION_TARGET_STRING_CRC32      "+crc"
 #endif
 #  include <arm_acle.h>
 #endif

 #ifdef __cplusplus
 #include <qatomic.h>

 QT_BEGIN_NAMESPACE

 #ifndef Q_PROCESSOR_X86
 enum CPUFeatures {
 #if defined(Q_PROCESSOR_ARM)
     CpuFeatureNEON          = 2,
     CpuFeatureARM_NEON      = CpuFeatureNEON,
     CpuFeatureCRC32         = 4,
 #elif defined(Q_PROCESSOR_MIPS)
     CpuFeatureDSP           = 2,
     CpuFeatureDSPR2         = 4,
 #endif

     // used only to indicate that the CPU detection was initialised
     QSimdInitialized        = 1
 };

 static const quint64 qCompilerCpuFeatures = 0
 #if defined __ARM_NEON__
         | CpuFeatureNEON
 #endif
 #if defined __ARM_FEATURE_CRC32
         | CpuFeatureCRC32
 #endif
 #if defined __mips_dsp
         | CpuFeatureDSP
 #endif
 #if defined __mips_dspr2
         | CpuFeatureDSPR2
 #endif
         ;
 #endif

 #ifdef Q_ATOMIC_INT64_IS_SUPPORTED
 extern Q_CORE_EXPORT QBasicAtomicInteger<quint64> qt_cpu_features[1];
 #else
 extern Q_CORE_EXPORT QBasicAtomicInteger<unsigned> qt_cpu_features[2];
 #endif
 Q_CORE_EXPORT quint64 qDetectCpuFeatures();

 #if defined(Q_PROCESSOR_X86) && QT_COMPILER_SUPPORTS_HERE(RDRND) && !defined(QT_BOOTSTRAPPED)
 Q_CORE_EXPORT qsizetype qRandomCpu(void *, qsizetype) noexcept;
 #else
 static inline qsizetype qRandomCpu(void *, qsizetype) noexcept
 {
     return 0;
 }
 #endif

 static inline quint64 qCpuFeatures()
 {
     quint64 features = qt_cpu_features[0].loadRelaxed();
 #ifndef Q_ATOMIC_INT64_IS_SUPPORTED
     features |= quint64(qt_cpu_features[1].loadRelaxed()) << 32;
 #endif
     if (Q_UNLIKELY(features == 0)) {
         features = qDetectCpuFeatures();
         Q_ASSUME(features != 0);
     }
     return features;
 }

 #define qCpuHasFeature(feature)     (((qCompilerCpuFeatures & CpuFeature ## feature) == CpuFeature ## feature) \
                                      || ((qCpuFeatures() & CpuFeature ## feature) == CpuFeature ## feature))

 inline bool qHasHwrng()
 {
 #if defined(Q_PROCESSOR_X86) && QT_COMPILER_SUPPORTS_HERE(RDRND)
     return qCpuHasFeature(RDRND);
 #else
     return false;
 #endif
 }

 #define ALIGNMENT_PROLOGUE_16BYTES(ptr, i, length) \
     for (; i < static_cast<int>(qMin(static_cast<quintptr>(length), ((4 - ((reinterpret_cast<quintptr>(ptr) >> 2) & 0x3)) & 0x3))); ++i)

 #define ALIGNMENT_PROLOGUE_32BYTES(ptr, i, length) \
     for (; i < static_cast<int>(qMin(static_cast<quintptr>(length), ((8 - ((reinterpret_cast<quintptr>(ptr) >> 2) & 0x7)) & 0x7))); ++i)

 QT_END_NAMESPACE

 #endif // __cplusplus

 #define SIMD_EPILOGUE(i, length, max) \
     for (int _i = 0; _i < max && i < length; ++i, ++_i)

 #endif // QSIMD_P_H
	/****************************************************************************
	**
	** Copyright (C) 2016 The Qt Company Ltd.
	** Copyright (C) 2018 Intel Corporation.
	** Contact: https://www.qt.io/licensing/
	**
	** This file is part of the QtCore module of the Qt Toolkit.
	**
	** $QT_BEGIN_LICENSE:LGPL$
	** Commercial License Usage
	** Licensees holding valid commercial Qt licenses may use this file in
	** accordance with the commercial license agreement provided with the
	** Software or, alternatively, in accordance with the terms contained in
	** a written agreement between you and The Qt Company. For licensing terms
	** and conditions see https://www.qt.io/terms-conditions. For further
	** information use the contact form at https://www.qt.io/contact-us.
	**
	** GNU Lesser General Public License Usage
	** Alternatively, this file may be used under the terms of the GNU Lesser
	** General Public License version 3 as published by the Free Software
	** Foundation and appearing in the file LICENSE.LGPL3 included in the
	** packaging of this file. Please review the following information to
	** ensure the GNU Lesser General Public License version 3 requirements
	** will be met: https://www.gnu.org/licenses/lgpl-3.0.html.
	**
	** GNU General Public License Usage
	** Alternatively, this file may be used under the terms of the GNU
	** General Public License version 2.0 or (at your option) the GNU General
	** Public license version 3 or any later version approved by the KDE Free
	** Qt Foundation. The licenses are as published by the Free Software
	** Foundation and appearing in the file LICENSE.GPL2 and LICENSE.GPL3
	** included in the packaging of this file. Please review the following
	** information to ensure the GNU General Public License requirements will
	** be met: https://www.gnu.org/licenses/gpl-2.0.html and
	** https://www.gnu.org/licenses/gpl-3.0.html.
	**
	** $QT_END_LICENSE$
	**
	****************************************************************************/

	#ifndef QSIMD_P_H
	#define QSIMD_P_H

	//
	// W A R N I N G
	// -------------
	//
	// This file is not part of the Qt API. It exists purely as an
	// implementation detail. This header file may change from version to
	// version without notice, or even be removed.
	//
	// We mean it.
	//

	#include <QtCore/private/qglobal_p.h>

	/*
	* qt_module_config.prf defines the QT_COMPILER_SUPPORTS_XXX macros.
	* They mean the compiler supports the necessary flags and the headers
	* for the x86 and ARM intrinsics:
	* - GCC: the -mXXX or march=YYY flag is necessary before #include
	* up to 4.8; GCC >= 4.9 can include unconditionally
	* - Intel CC: #include can happen unconditionally
	* - MSVC: #include can happen unconditionally
	* - RVCT: ???
	*
	* We will try to include all headers possible under this configuration.
	*
	* MSVC does not define __SSE2__ & family, so we will define them. MSVC 2013 &
	* up do define __AVX__ if the -arch:AVX option is passed on the command-line.
	*
	* Supported XXX are:
	* Flag \| Arch \| GCC \| Intel CC \| MSVC \|
	* ARM_NEON \| ARM \| I & C \| None \| ? \|
	* SSE2 \| x86 \| I & C \| I & C \| I & C \|
	* SSE3 \| x86 \| I & C \| I & C \| I only \|
	* SSSE3 \| x86 \| I & C \| I & C \| I only \|
	* SSE4_1 \| x86 \| I & C \| I & C \| I only \|
	* SSE4_2 \| x86 \| I & C \| I & C \| I only \|
	* AVX \| x86 \| I & C \| I & C \| I & C \|
	* AVX2 \| x86 \| I & C \| I & C \| I only \|
	* AVX512xx \| x86 \| I & C \| I & C \| I only \|
	* I = intrinsics; C = code generation
	*
	* Code can use the following constructs to determine compiler support & status:
	* - #ifdef __XXX__ (e.g: #ifdef __AVX__ or #ifdef __ARM_NEON__)
	* If this test passes, then the compiler is already generating code for that
	* given sub-architecture. The intrinsics for that sub-architecture are
	* #included and can be used without restriction or runtime check.
	*
	* - #if QT_COMPILER_SUPPORTS(XXX)
	* If this test passes, then the compiler is able to generate code for that
	* given sub-architecture in another translation unit, given the right set of
	* flags. Use of the intrinsics is not guaranteed. This is useful with
	* runtime detection (see below).
	*
	* - #if QT_COMPILER_SUPPORTS_HERE(XXX)
	* If this test passes, then the compiler is able to generate code for that
	* given sub-architecture in this translation unit, even if it is not doing
	* that now (it might be). Individual functions may be tagged with
	* QT_FUNCTION_TARGET(XXX) to cause the compiler to generate code for that
	* sub-arch. Only inside such functions is the use of the intrisics
	* guaranteed to work. This is useful with runtime detection (see below).
	*
	* Runtime detection of a CPU sub-architecture can be done with the
	* qCpuHasFeature(XXX) function. There are two strategies for generating
	* optimized code like that:
	*
	* 1) place the optimized code in a different translation unit (C or assembly
	* sources) and pass the correct flags to the compiler to enable support. Those
	* sources must not include qglobal.h, which means they cannot include this
	* file either. The dispatcher function would look like this:
	*
	* void foo()
	* {
	* #if QT_COMPILER_SUPPORTS(XXX)
	* if (qCpuHasFeature(XXX)) {
	* foo_optimized_xxx();
	* return;
	* }
	* #endif
	* foo_plain();
	* }
	*
	* 2) place the optimized code in a function tagged with QT_FUNCTION_TARGET and
	* surrounded by #if QT_COMPILER_SUPPORTS_HERE(XXX). That code can freely use
	* other Qt code. The dispatcher function would look like this:
	*
	* void foo()
	* {
	* #if QT_COMPILER_SUPPORTS_HERE(XXX)
	* if (qCpuHasFeature(XXX)) {
	* foo_optimized_xxx();
	* return;
	* }
	* #endif
	* foo_plain();
	* }
	*/

	#if defined(__MINGW64_VERSION_MAJOR) \|\| defined(Q_CC_MSVC)
	#include <intrin.h>
	#endif

	#define QT_COMPILER_SUPPORTS(x) (QT_COMPILER_SUPPORTS_ ## x - 0)

	#if defined(Q_PROCESSOR_ARM)
	# define QT_COMPILER_SUPPORTS_HERE(x) (__ARM_FEATURE_ ## x)
	# if defined(Q_CC_GNU) && !defined(Q_CC_INTEL) && Q_CC_GNU >= 600
	/* GCC requires attributes for a function */
	# define QT_FUNCTION_TARGET(x) __attribute__((__target__(QT_FUNCTION_TARGET_STRING_ ## x)))
	# else
	# define QT_FUNCTION_TARGET(x)
	# endif
	# if !defined(__ARM_FEATURE_NEON) && defined(__ARM_NEON__)
	# define __ARM_FEATURE_NEON // also support QT_COMPILER_SUPPORTS_HERE(NEON)
	# endif
	#elif defined(Q_PROCESSOR_MIPS)
	# define QT_COMPILER_SUPPORTS_HERE(x) (__ ## x ## __)
	# define QT_FUNCTION_TARGET(x)
	# if !defined(__MIPS_DSP__) && defined(__mips_dsp) && defined(Q_PROCESSOR_MIPS_32)
	# define __MIPS_DSP__
	# endif
	# if !defined(__MIPS_DSPR2__) && defined(__mips_dspr2) && defined(Q_PROCESSOR_MIPS_32)
	# define __MIPS_DSPR2__
	# endif
	#elif defined(Q_PROCESSOR_X86) && defined(QT_COMPILER_SUPPORTS_SIMD_ALWAYS)
	# define QT_COMPILER_SUPPORTS_HERE(x) ((__ ## x ## __) \|\| QT_COMPILER_SUPPORTS(x))
	# if defined(Q_CC_GNU) && !defined(Q_CC_INTEL)
	/* GCC requires attributes for a function */
	# define QT_FUNCTION_TARGET(x) __attribute__((__target__(QT_FUNCTION_TARGET_STRING_ ## x)))
	# else
	# define QT_FUNCTION_TARGET(x)
	# endif
	#else
	# define QT_COMPILER_SUPPORTS_HERE(x) (__ ## x ## __)
	# define QT_FUNCTION_TARGET(x)
	#endif

	#ifdef Q_PROCESSOR_X86
	/* -- x86 intrinsic support -- */

	# if defined(Q_CC_MSVC) && (defined(_M_X64) \|\| _M_IX86_FP >= 2)
	// MSVC doesn't define __SSE2__, so do it ourselves
	# define __SSE__ 1
	# define __SSE2__ 1
	# endif

	# ifdef __SSE2__
	// #include the intrinsics
	# include <immintrin.h>
	# endif

	# if defined(Q_CC_GNU) && !defined(Q_CC_INTEL)
	// GCC 4.4 and Clang 2.8 added a few more intrinsics there
	# include <x86intrin.h>
	# endif

	# if defined(Q_CC_MSVC) && (defined(_M_AVX) \|\| defined(__AVX__))
	// Visual Studio defines __AVX__ when /arch:AVX is passed, but not the earlier macros
	// See: https://msdn.microsoft.com/en-us/library/b0084kay.aspx
	# define __SSE3__ 1
	# define __SSSE3__ 1
	// no Intel CPU supports SSE4a, so don't define it
	# define __SSE4_1__ 1
	# define __SSE4_2__ 1
	# ifndef __AVX__
	# define __AVX__ 1
	# endif
	# endif

	# if defined(__SSE4_2__) && defined(QT_COMPILER_SUPPORTS_SIMD_ALWAYS) && (defined(Q_CC_INTEL) \|\| defined(Q_CC_MSVC))
	// POPCNT instructions:
	// All processors that support SSE4.2 support POPCNT
	// (but neither MSVC nor the Intel compiler define this macro)
	# define __POPCNT__ 1
	# endif

	// AVX intrinsics
	# if defined(__AVX__) && defined(QT_COMPILER_SUPPORTS_SIMD_ALWAYS) && (defined(Q_CC_INTEL) \|\| defined(Q_CC_MSVC))
	// AES, PCLMULQDQ instructions:
	// All processors that support AVX support PCLMULQDQ
	// (but neither MSVC nor the Intel compiler define this macro)
	# define __PCLMUL__ 1
	# endif

	# if defined(__AVX2__) && defined(QT_COMPILER_SUPPORTS_SIMD_ALWAYS) && (defined(Q_CC_INTEL) \|\| defined(Q_CC_MSVC))
	// F16C & RDRAND instructions:
	// All processors that support AVX2 support F16C & RDRAND:
	// (but neither MSVC nor the Intel compiler define these macros)
	# define __F16C__ 1
	# define __RDRND__ 1
	# endif

	# if defined(__BMI__) && !defined(__BMI2__) && defined(Q_CC_INTEL)
	// BMI2 instructions:
	// All processors that support BMI support BMI2 (and AVX2)
	// (but neither MSVC nor the Intel compiler define this macro)
	# define __BMI2__ 1
	# endif

	# include "qsimd_x86_p.h"

	// Haswell sub-architecture
	//
	// The Intel Core 4th generation was codenamed "Haswell" and introduced AVX2,
	// BMI1, BMI2, FMA, LZCNT, MOVBE, which makes it a good divider for a
	// sub-target for us. The first AMD processor with AVX2 support (Zen) has the
	// same features.
	//
	// macOS's fat binaries support the "x86_64h" sub-architecture and the GNU libc
	// ELF loader also supports a "haswell/" subdir (e.g., /usr/lib/haswell).
	# define QT_FUNCTION_TARGET_STRING_ARCH_HASWELL "arch=haswell"
	# if defined(__AVX2__) && defined(__BMI__) && defined(__BMI2__) && defined(__F16C__) && \
	defined(__FMA__) && defined(__LZCNT__) && defined(__RDRND__)
	# define __haswell__ 1
	# endif

	// This constant does not include all CPU features found in a Haswell, only
	// those that we'd have optimized code for.
	// Note: must use Q_CONSTEXPR here, as this file may be compiled in C mode.
	QT_BEGIN_NAMESPACE
	static const quint64 CpuFeatureArchHaswell = 0
	\| CpuFeatureSSE2
	\| CpuFeatureSSE3
	\| CpuFeatureSSSE3
	\| CpuFeatureSSE4_1
	\| CpuFeatureSSE4_2
	\| CpuFeatureFMA
	\| CpuFeaturePOPCNT
	\| CpuFeatureAVX
	\| CpuFeatureF16C
	\| CpuFeatureAVX2
	\| CpuFeatureBMI
	\| CpuFeatureBMI2;
	QT_END_NAMESPACE

	#endif /* Q_PROCESSOR_X86 */

	// Clang compiler fix, see http://lists.llvm.org/pipermail/cfe-commits/Week-of-Mon-20160222/151168.html
	// This should be tweaked with an "upper version" of clang once we know which release fixes the
	// issue. At that point we can rely on __ARM_FEATURE_CRC32 again.
	#if defined(Q_CC_CLANG) && defined(Q_OS_DARWIN) && defined (__ARM_FEATURE_CRC32)
	# undef __ARM_FEATURE_CRC32
	#endif

	// NEON intrinsics
	// note: as of GCC 4.9, does not support function targets for ARM
	#if defined(__ARM_NEON) \|\| defined(__ARM_NEON__)
	#include <arm_neon.h>
	#define QT_FUNCTION_TARGET_STRING_NEON "+neon" // unused: gcc doesn't support function targets on non-aarch64, and on Aarch64 NEON is always available.
	#ifndef __ARM_NEON__
	// __ARM_NEON__ is not defined on AArch64, but we need it in our NEON detection.
	#define __ARM_NEON__
	#endif
	#endif
	// AArch64/ARM64
	#if defined(Q_PROCESSOR_ARM_V8) && defined(__ARM_FEATURE_CRC32)
	#if defined(Q_PROCESSOR_ARM_64)
	// only available on aarch64
	#define QT_FUNCTION_TARGET_STRING_CRC32 "+crc"
	#endif
	# include <arm_acle.h>
	#endif

	#ifdef __cplusplus
	#include <qatomic.h>

	QT_BEGIN_NAMESPACE

	#ifndef Q_PROCESSOR_X86
	enum CPUFeatures {
	#if defined(Q_PROCESSOR_ARM)
	CpuFeatureNEON = 2,
	CpuFeatureARM_NEON = CpuFeatureNEON,
	CpuFeatureCRC32 = 4,
	#elif defined(Q_PROCESSOR_MIPS)
	CpuFeatureDSP = 2,
	CpuFeatureDSPR2 = 4,
	#endif

	// used only to indicate that the CPU detection was initialised
	QSimdInitialized = 1
	};

	static const quint64 qCompilerCpuFeatures = 0
	#if defined __ARM_NEON__
	\| CpuFeatureNEON
	#endif
	#if defined __ARM_FEATURE_CRC32
	\| CpuFeatureCRC32
	#endif
	#if defined __mips_dsp
	\| CpuFeatureDSP
	#endif
	#if defined __mips_dspr2
	\| CpuFeatureDSPR2
	#endif
	;
	#endif

	#ifdef Q_ATOMIC_INT64_IS_SUPPORTED
	extern Q_CORE_EXPORT QBasicAtomicInteger<quint64> qt_cpu_features[1];
	#else
	extern Q_CORE_EXPORT QBasicAtomicInteger<unsigned> qt_cpu_features[2];
	#endif
	Q_CORE_EXPORT quint64 qDetectCpuFeatures();

	#if defined(Q_PROCESSOR_X86) && QT_COMPILER_SUPPORTS_HERE(RDRND) && !defined(QT_BOOTSTRAPPED)
	Q_CORE_EXPORT qsizetype qRandomCpu(void *, qsizetype) noexcept;
	#else
	static inline qsizetype qRandomCpu(void *, qsizetype) noexcept
	{
	return 0;
	}
	#endif

	static inline quint64 qCpuFeatures()
	{
	quint64 features = qt_cpu_features[0].loadRelaxed();
	#ifndef Q_ATOMIC_INT64_IS_SUPPORTED
	features \|= quint64(qt_cpu_features[1].loadRelaxed()) << 32;
	#endif
	if (Q_UNLIKELY(features == 0)) {
	features = qDetectCpuFeatures();
	Q_ASSUME(features != 0);
	}
	return features;
	}

	#define qCpuHasFeature(feature) (((qCompilerCpuFeatures & CpuFeature ## feature) == CpuFeature ## feature) \
	\|\| ((qCpuFeatures() & CpuFeature ## feature) == CpuFeature ## feature))

	inline bool qHasHwrng()
	{
	#if defined(Q_PROCESSOR_X86) && QT_COMPILER_SUPPORTS_HERE(RDRND)
	return qCpuHasFeature(RDRND);
	#else
	return false;
	#endif
	}

	#define ALIGNMENT_PROLOGUE_16BYTES(ptr, i, length) \
	for (; i < static_cast<int>(qMin(static_cast<quintptr>(length), ((4 - ((reinterpret_cast<quintptr>(ptr) >> 2) & 0x3)) & 0x3))); ++i)

	#define ALIGNMENT_PROLOGUE_32BYTES(ptr, i, length) \
	for (; i < static_cast<int>(qMin(static_cast<quintptr>(length), ((8 - ((reinterpret_cast<quintptr>(ptr) >> 2) & 0x7)) & 0x7))); ++i)

	QT_END_NAMESPACE

	#endif // __cplusplus

	#define SIMD_EPILOGUE(i, length, max) \
	for (int _i = 0; _i < max && i < length; ++i, ++_i)

	#endif // QSIMD_P_H