headers: Don't redirect _mm_malloc to _aligned_malloc

The headers have a number of brittle workarounds, all trying to make
the "#define _mm_malloc _aligned_malloc" redirect in malloc.h work
properly.

That define is problematic, because it behaves differently depending
on the order headers are included. If malloc.h is included before
GCC's mm_malloc.h, malloc.h does "#define _mm_malloc _aligned_malloc"
and "#define _MM_MALLOC_H_INCLUDED", making sure that a later include
of GCC's mm_malloc.h define nothing. If the user code calls
_mm_malloc() after that, it ends up calling _aligned_malloc().

However, if the user code (implicitly) includes mm_malloc.h before
malloc.h, the situation is much trickier. (mm_malloc.h gets implicitly
included by x86intrin.h, which is included by e.g. winnt.h.)

GCC's mm_malloc.h looks like this, a little simplified:

    #ifndef _MM_MALLOC_H_INCLUDED
    #define _MM_MALLOC_H_INCLUDED

    #include <stdlib.h>

    static __inline__ void *_mm_malloc (...)

The stdlib.h include implicitly includes malloc.h, which does
"#define _mm_malloc _aligned_malloc", which causes GCC's mm_malloc.h
to suddenly define a static inline _aligned_malloc instead.

This has been halfway worked around by not defining the non-inline
_aligned_malloc in malloc.h if _MM_MALLOC_H_INCLUDED already was
defined, making the inline function the only definition of it.

So when expanding malloc.h in this context, there's no way to stop the
outer mm_malloc.h from defining a static inline function, and regardless
of whatever name it is renamed to with a define, that static inline
function is what callers to _mm_malloc end up calling.

This causes calls to both _mm_malloc and _aligned_malloc to end
up either with the dllimported function or the static inline version,
depending on which header was included first. If one translation unit
calls _mm_malloc and another one calls _mm_free, there's a risk that
they end up mismatched, which is potentially fatal.

This was earlier attempted to be worked around in e.g. intrin.h, by
forcing including malloc.h before x86intrin.h, but such workarounds
are futile, as user code could also include x86intrin.h, immintrin.h
or even mm_malloc.h directly, without passing through mingw headers.

Instead just remove the _mm_malloc redefinition and include the
compiler's mm_malloc.h header. This makes sure that regardless of
header include order, calls to _aligned_malloc and _mm_malloc will
always end up to the same function, avoiding risks of mismatches
between *_malloc and *_free.

This also has the effect of no longer hiding the declaration of
_aligned_malloc when including intrin.h first.

Signed-off-by: Martin Storsjö <martin@martin.st>
diff --git a/mingw-w64-headers/crt/intrin.h b/mingw-w64-headers/crt/intrin.h
index 0b2343f..198c94f 100644
--- a/mingw-w64-headers/crt/intrin.h
+++ b/mingw-w64-headers/crt/intrin.h
@@ -60,7 +60,6 @@
   extern unsigned int __builtin_ia32_crc32hi (unsigned int, unsigned short);
   extern unsigned int __builtin_ia32_crc32si (unsigned int, unsigned int);
 #ifndef _MM_MALLOC_H_INCLUDED
-#define _MM_MALLOC_H_INCLUDED
 #include <stdlib.h>
 #include <errno.h>
 /* Make sure _mm_malloc and _mm_free are defined.  */
diff --git a/mingw-w64-headers/crt/malloc.h b/mingw-w64-headers/crt/malloc.h
index e73b705..1391119 100644
--- a/mingw-w64-headers/crt/malloc.h
+++ b/mingw-w64-headers/crt/malloc.h
@@ -52,18 +52,6 @@
 
   extern unsigned int _amblksiz;
 
-/* Make sure that X86intrin.h doesn't produce here collisions.  */
-#if (!defined (_XMMINTRIN_H_INCLUDED) && !defined (_MM_MALLOC_H_INCLUDED))
-#define __DO_ALIGN_DEFINES
-#endif
-
-#ifndef _MM_MALLOC_H_INCLUDED
-#define _MM_MALLOC_H_INCLUDED
-#endif
-
-#define _mm_free(a) _aligned_free(a)
-#define _mm_malloc(a,b) _aligned_malloc(a,b)
-
 #ifndef _CRT_ALLOCATION_DEFINED
 #define _CRT_ALLOCATION_DEFINED
   void *__cdecl calloc(size_t _NumOfElements,size_t _SizeOfElements);
@@ -72,10 +60,8 @@
   void *__cdecl realloc(void *_Memory,size_t _NewSize);
   _CRTIMP void *__cdecl _recalloc(void *_Memory,size_t _Count,size_t _Size);
 
-#ifdef __DO_ALIGN_DEFINES
   _CRTIMP void __cdecl _aligned_free(void *_Memory);
   _CRTIMP void *__cdecl _aligned_malloc(size_t _Size,size_t _Alignment);
-#endif
 
   _CRTIMP void *__cdecl _aligned_offset_malloc(size_t _Size,size_t _Alignment,size_t _Offset);
   _CRTIMP void *__cdecl _aligned_realloc(void *_Memory,size_t _Size,size_t _Alignment);
@@ -90,6 +76,9 @@
 void * __mingw_aligned_offset_realloc (void *_Memory, size_t _Size, size_t _Alignment, size_t _Offset);
 void * __mingw_aligned_realloc (void *_Memory, size_t _Size, size_t _Offset);
 
+/* Get the compiler's definition of _mm_malloc and _mm_free. */
+#include <mm_malloc.h>
+
 #define _MAX_WAIT_MALLOC_CRT 60000
 
 #ifdef _CRT_USE_WINAPI_FAMILY_DESKTOP_APP
diff --git a/mingw-w64-headers/crt/stdlib.h b/mingw-w64-headers/crt/stdlib.h
index a0997c4..2403e4c 100644
--- a/mingw-w64-headers/crt/stdlib.h
+++ b/mingw-w64-headers/crt/stdlib.h
@@ -531,11 +531,8 @@
   void *__cdecl malloc(size_t _Size);
   void *__cdecl realloc(void *_Memory,size_t _NewSize);
   _CRTIMP void *__cdecl _recalloc(void *_Memory,size_t _Count,size_t _Size);
-/* Make sure that X86intrin.h doesn't produce here collisions.  */
-#if (!defined (_XMMINTRIN_H_INCLUDED) && !defined (_MM_MALLOC_H_INCLUDED))
   _CRTIMP void __cdecl _aligned_free(void *_Memory);
   _CRTIMP void *__cdecl _aligned_malloc(size_t _Size,size_t _Alignment);
-#endif
   _CRTIMP void *__cdecl _aligned_offset_malloc(size_t _Size,size_t _Alignment,size_t _Offset);
   _CRTIMP void *__cdecl _aligned_realloc(void *_Memory,size_t _Size,size_t _Alignment);
   _CRTIMP void *__cdecl _aligned_recalloc(void *_Memory,size_t _Count,size_t _Size,size_t _Alignment);