src_prepare-overlay/games-fps/quake15/files/0020-do-not-assume-sse2-is-...

382 lines
13 KiB
Diff

--- a/makefile.inc 2024-03-18 14:28:17.506666539 +0100
+++ b/makefile.inc 2024-03-18 14:34:25.586666536 +0100
@@ -181,9 +181,12 @@
ifeq ($(DP_SSE),1)
CFLAGS_SSE=-msse
- CFLAGS_SSE2=-msse2
else
CFLAGS_SSE=
+endif # ifeq ($(DP_SSE),1)
+ifeq ($(DP_SSE2),1)
+ CFLAGS_SSE2=-msse2
+else
CFLAGS_SSE2=
endif # ifeq ($(DP_SSE),1)
--- a/makefile 2017-08-30 06:29:14.000000000 +0200
+++ b/makefile 2024-03-18 14:41:57.396666527 +0100
@@ -26,24 +26,29 @@
endif # ifdef windir
endif # ifndef DP_MAKE_TARGET
+# If we are Gentoo, then we know what we're doing and we can move along
+ifndef GENTOO_BUILD
# If we're targeting an x86 CPU we want to enable DP_SSE (CFLAGS_SSE and SSE2)
-ifeq ($(DP_MAKE_TARGET), mingw)
- DP_SSE:=1
-else
- DP_MACHINE:=$(shell uname -m)
- ifeq ($(DP_MACHINE),x86_64)
+ ifeq ($(DP_MAKE_TARGET), mingw)
DP_SSE:=1
+ DP_SSE2:=1
else
- ifeq ($(DP_MACHINE),i686)
- DP_SSE:=1
- else
- ifeq ($(DP_MACHINE),i386)
- DP_SSE:=1
- else
- DP_SSE:=0
- endif # ifeq ($(DP_MACHINE),i386)
- endif # ifeq ($(DP_MACHINE),i686)
- endif # ifeq ($(DP_MACHINE),x86_64)
+ DP_MACHINE:=$(shell uname -m)
+ ifeq ($(DP_MACHINE),x86_64)
+ DP_SSE:=1
+ DP_SSE2:=1
+ else
+ ifeq ($(DP_MACHINE),i686)
+ DP_SSE:=1
+ else
+ ifeq ($(DP_MACHINE),i386)
+ DP_SSE:=1
+ else
+ DP_SSE:=0
+ endif # ifeq ($(DP_MACHINE),i386)
+ endif # ifeq ($(DP_MACHINE),i686)
+ endif # ifeq ($(DP_MACHINE),x86_64)
+ endif
endif
# Makefile name
--- a/quakedef.h 2017-08-30 06:29:14.000000000 +0200
+++ b/quakedef.h 2024-03-18 15:22:43.836666480 +0100
@@ -470,39 +470,44 @@
#endif
#if defined(__GNUC__)
-# if defined(__i386__)
-# define DP_ARCH_STR "686"
-# define SSE_POSSIBLE
-# ifdef __SSE__
-# define SSE_PRESENT
-# endif
-# ifdef __SSE2__
-# define SSE2_PRESENT
-# endif
-# elif defined(__x86_64__)
-# define DP_ARCH_STR "x86_64"
-# define SSE_PRESENT
-# define SSE2_PRESENT
-# elif defined(__powerpc__)
-# define DP_ARCH_STR "ppc"
-# endif
-#elif defined(_WIN64)
-# define DP_ARCH_STR "x86_64"
-# define SSE_PRESENT
-# define SSE2_PRESENT
-#elif defined(WIN32)
-# define DP_ARCH_STR "x86"
-# define SSE_POSSIBLE
+# if defined(__i386__) || defined(WIN32)
+# ifdef __SSE__
+# define SSE_POSSIBLE
+# endif
+# ifdef __SSE2__
+# define SSE2_POSSIBLE
+# endif
+# if defined(__i686__)
+# define DP_ARCH_STR "i686"
+# elif defined(__i586__)
+# define DP_ARCH_STR "i586"
+# elif defined(__i486__)
+# define DP_ARCH_STR "i486"
+# else
+# define DP_ARCH_STR "i386"
+# endif
+# elif defined(__amd64__) || defined(_WIN64)
+# define SSE_POSSIBLE
+# define SSE2_POSSIBLE
+# define DP_ARCH_STR "amd64"
+# elif defined(__powerpc64__)
+# define DP_ARCH_STR "ppc64"
+# elif defined(__arm64__)
+# define DP_ARCH_STR "arm64"
+# endif
#endif
-
-#ifdef SSE_PRESENT
-# define SSE_POSSIBLE
+#ifdef SSE_POSSIBLE
+# define SSE_PRESENT
+#endif
+#ifdef SSE2_POSSIBLE
+# define SSE2_PRESENT
#endif
#ifdef NO_SSE
-# undef SSE_PRESENT
-# undef SSE_POSSIBLE
-# undef SSE2_PRESENT
+# undef SSE_PRESENT
+# undef SSE_POSSIBLE
+# undef SSE2_PRESENT
+# undef SSE2_POSSIBLE
#endif
#ifdef SSE_POSSIBLE
--- a/dpsoftrast.c 2017-08-30 06:29:14.000000000 +0200
+++ b/dpsoftrast.c 2024-03-18 18:51:30.696666431 +0100
@@ -17,7 +17,7 @@
#define ALIGN_SIZE 16
#define ATOMIC_SIZE 4
-#ifdef SSE_POSSIBLE
+#ifdef SSE2_POSSIBLE
#if defined(__APPLE__)
#include <libkern/OSAtomic.h>
#define ALIGN(var) var __attribute__((__aligned__(16)))
@@ -84,7 +84,7 @@
#define ATOMIC_ADD(counter, val) ((void)((counter) += (val)))
#endif
-#ifdef SSE_POSSIBLE
+#ifdef SSE2_POSSIBLE
#include <emmintrin.h>
#if defined(__GNUC__) && (__GNUC < 4 || __GNUC_MINOR__ < 6) && !defined(__clang__)
@@ -1393,7 +1393,7 @@
}
void DPSOFTRAST_UniformMatrix4fv(DPSOFTRAST_UNIFORM uniform, int arraysize, int transpose, const float *v)
{
-#ifdef SSE_POSSIBLE
+#ifdef SSE2_POSSIBLE
int i, index;
for (i = 0, index = (int)uniform;i < arraysize;i++, index += 4, v += 16)
{
@@ -1467,7 +1467,7 @@
command->clipplane[3] = w;
}
-#ifdef SSE_POSSIBLE
+#ifdef SSE2_POSSIBLE
static void DPSOFTRAST_Load4fTo4f(float *dst, const unsigned char *src, int size, int stride)
{
float *end = dst + size*4;
@@ -1663,7 +1663,7 @@
static void DPSOFTRAST_Vertex_Transform(float *out4f, const float *in4f, int numitems, const float *inmatrix16f)
{
-#ifdef SSE_POSSIBLE
+#ifdef SSE2_POSSIBLE
static const float identitymatrix16f[4][4] = {{1,0,0,0},{0,1,0,0},{0,0,1,0},{0,0,0,1}};
__m128 m0, m1, m2, m3;
float *end;
@@ -1716,7 +1716,7 @@
}
#endif
-#ifdef SSE_POSSIBLE
+#ifdef SSE2_POSSIBLE
#define DPSOFTRAST_PROJECTVERTEX(out, in, viewportcenter, viewportscale) \
{ \
__m128 p = (in), w = _mm_shuffle_ps(p, p, _MM_SHUFFLE(3, 3, 3, 3)); \
@@ -1919,7 +1919,7 @@
static float *DPSOFTRAST_Array_Load(int outarray, int inarray)
{
-#ifdef SSE_POSSIBLE
+#ifdef SSE2_POSSIBLE
float *outf = dpsoftrast.post_array4f[outarray];
const unsigned char *inb;
int firstvertex = dpsoftrast.firstvertex;
@@ -1986,7 +1986,7 @@
#if 0
static float *DPSOFTRAST_Array_Project(int outarray, int inarray)
{
-#ifdef SSE_POSSIBLE
+#ifdef SSE2_POSSIBLE
float *data = inarray >= 0 ? DPSOFTRAST_Array_Load(outarray, inarray) : dpsoftrast.post_array4f[outarray];
dpsoftrast.drawclipped = DPSOFTRAST_Vertex_Project(data, dpsoftrast.screencoord4f, &dpsoftrast.drawstarty, &dpsoftrast.drawendy, data, dpsoftrast.numvertices);
return data;
@@ -1998,7 +1998,7 @@
static float *DPSOFTRAST_Array_TransformProject(int outarray, int inarray, const float *inmatrix16f)
{
-#ifdef SSE_POSSIBLE
+#ifdef SSE2_POSSIBLE
float *data = inarray >= 0 ? DPSOFTRAST_Array_Load(outarray, inarray) : dpsoftrast.post_array4f[outarray];
dpsoftrast.drawclipped = DPSOFTRAST_Vertex_TransformProject(data, dpsoftrast.screencoord4f, &dpsoftrast.drawstarty, &dpsoftrast.drawendy, data, dpsoftrast.numvertices, inmatrix16f);
return data;
@@ -2036,7 +2036,7 @@
static void DPSOFTRAST_Draw_Span_FinishBGRA8(DPSOFTRAST_State_Thread *thread, const DPSOFTRAST_State_Triangle * RESTRICT triangle, const DPSOFTRAST_State_Span * RESTRICT span, const unsigned char* RESTRICT in4ub)
{
-#ifdef SSE_POSSIBLE
+#ifdef SSE2_POSSIBLE
int x;
int startx = span->startx;
int endx = span->endx;
@@ -2545,7 +2545,7 @@
static void DPSOFTRAST_Draw_Span_Texture2DVaryingBGRA8(DPSOFTRAST_State_Thread *thread, const DPSOFTRAST_State_Triangle * RESTRICT triangle, const DPSOFTRAST_State_Span * RESTRICT span, unsigned char * RESTRICT out4ub, int texunitindex, int arrayindex, const float * RESTRICT zf)
{
-#ifdef SSE_POSSIBLE
+#ifdef SSE2_POSSIBLE
int x;
int startx = span->startx;
int endx = span->endx;
@@ -2981,7 +2981,7 @@
static void DPSOFTRAST_Draw_Span_MultiplyVaryingBGRA8(const DPSOFTRAST_State_Triangle * RESTRICT triangle, const DPSOFTRAST_State_Span * RESTRICT span, unsigned char *out4ub, const unsigned char *in4ub, int arrayindex, const float *zf)
{
-#ifdef SSE_POSSIBLE
+#ifdef SSE2_POSSIBLE
int x;
int startx = span->startx;
int endx = span->endx;
@@ -3028,7 +3028,7 @@
static void DPSOFTRAST_Draw_Span_VaryingBGRA8(const DPSOFTRAST_State_Triangle * RESTRICT triangle, const DPSOFTRAST_State_Span * RESTRICT span, unsigned char *out4ub, int arrayindex, const float *zf)
{
-#ifdef SSE_POSSIBLE
+#ifdef SSE2_POSSIBLE
int x;
int startx = span->startx;
int endx = span->endx;
@@ -3073,7 +3073,7 @@
static void DPSOFTRAST_Draw_Span_AddBloomBGRA8(const DPSOFTRAST_State_Triangle * RESTRICT triangle, const DPSOFTRAST_State_Span * RESTRICT span, unsigned char *out4ub, const unsigned char *ina4ub, const unsigned char *inb4ub, const float *subcolor)
{
-#ifdef SSE_POSSIBLE
+#ifdef SSE2_POSSIBLE
int x, startx = span->startx, endx = span->endx;
__m128i localcolor = _mm_shuffle_epi32(_mm_cvtps_epi32(_mm_mul_ps(_mm_loadu_ps(subcolor), _mm_set1_ps(255.0f))), _MM_SHUFFLE(3, 0, 1, 2));
localcolor = _mm_packs_epi32(localcolor, localcolor);
@@ -3096,7 +3096,7 @@
static void DPSOFTRAST_Draw_Span_MultiplyBuffersBGRA8(const DPSOFTRAST_State_Triangle * RESTRICT triangle, const DPSOFTRAST_State_Span * RESTRICT span, unsigned char *out4ub, const unsigned char *ina4ub, const unsigned char *inb4ub)
{
-#ifdef SSE_POSSIBLE
+#ifdef SSE2_POSSIBLE
int x, startx = span->startx, endx = span->endx;
for (x = startx;x+2 <= endx;x+=2)
{
@@ -3117,7 +3117,7 @@
static void DPSOFTRAST_Draw_Span_AddBuffersBGRA8(const DPSOFTRAST_State_Triangle * RESTRICT triangle, const DPSOFTRAST_State_Span * RESTRICT span, unsigned char *out4ub, const unsigned char *ina4ub, const unsigned char *inb4ub)
{
-#ifdef SSE_POSSIBLE
+#ifdef SSE2_POSSIBLE
int x, startx = span->startx, endx = span->endx;
for (x = startx;x+2 <= endx;x+=2)
{
@@ -3139,7 +3139,7 @@
#if 0
static void DPSOFTRAST_Draw_Span_TintedAddBuffersBGRA8(const DPSOFTRAST_State_Triangle * RESTRICT triangle, const DPSOFTRAST_State_Span * RESTRICT span, unsigned char *out4ub, const unsigned char *ina4ub, const unsigned char *inb4ub, const float *inbtintbgra)
{
-#ifdef SSE_POSSIBLE
+#ifdef SSE2_POSSIBLE
int x, startx = span->startx, endx = span->endx;
__m128i tint = _mm_cvtps_epi32(_mm_mul_ps(_mm_loadu_ps(inbtintbgra), _mm_set1_ps(256.0f)));
tint = _mm_packs_epi32(tint, tint);
@@ -3163,7 +3163,7 @@
static void DPSOFTRAST_Draw_Span_MixBuffersBGRA8(const DPSOFTRAST_State_Triangle * RESTRICT triangle, const DPSOFTRAST_State_Span * RESTRICT span, unsigned char *out4ub, const unsigned char *ina4ub, const unsigned char *inb4ub)
{
-#ifdef SSE_POSSIBLE
+#ifdef SSE2_POSSIBLE
int x, startx = span->startx, endx = span->endx;
for (x = startx;x+2 <= endx;x+=2)
{
@@ -3186,7 +3186,7 @@
static void DPSOFTRAST_Draw_Span_MixUniformColorBGRA8(const DPSOFTRAST_State_Triangle * RESTRICT triangle, const DPSOFTRAST_State_Span * RESTRICT span, unsigned char *out4ub, const unsigned char *in4ub, const float *color)
{
-#ifdef SSE_POSSIBLE
+#ifdef SSE2_POSSIBLE
int x, startx = span->startx, endx = span->endx;
__m128i localcolor = _mm_shuffle_epi32(_mm_cvtps_epi32(_mm_mul_ps(_mm_loadu_ps(color), _mm_set1_ps(255.0f))), _MM_SHUFFLE(3, 0, 1, 2)), blend;
localcolor = _mm_packs_epi32(localcolor, localcolor);
@@ -3320,7 +3320,7 @@
static void DPSOFTRAST_PixelShader_FlatColor(DPSOFTRAST_State_Thread *thread, const DPSOFTRAST_State_Triangle * RESTRICT triangle, const DPSOFTRAST_State_Span * RESTRICT span)
{
-#ifdef SSE_POSSIBLE
+#ifdef SSE2_POSSIBLE
unsigned char * RESTRICT pixelmask = span->pixelmask;
unsigned char * RESTRICT pixel = (unsigned char *)dpsoftrast.fb_colorpixels[0] + (span->y * dpsoftrast.fb_width + span->x) * 4;
int x, startx = span->startx, endx = span->endx;
@@ -3371,7 +3371,7 @@
static void DPSOFTRAST_PixelShader_VertexColor(DPSOFTRAST_State_Thread *thread, const DPSOFTRAST_State_Triangle * RESTRICT triangle, const DPSOFTRAST_State_Span * RESTRICT span)
{
-#ifdef SSE_POSSIBLE
+#ifdef SSE2_POSSIBLE
unsigned char * RESTRICT pixelmask = span->pixelmask;
unsigned char * RESTRICT pixel = (unsigned char *)dpsoftrast.fb_colorpixels[0] + (span->y * dpsoftrast.fb_width + span->x) * 4;
int x, startx = span->startx, endx = span->endx;
@@ -3445,7 +3445,7 @@
static void DPSOFTRAST_PixelShader_Lightmap(DPSOFTRAST_State_Thread *thread, const DPSOFTRAST_State_Triangle * RESTRICT triangle, const DPSOFTRAST_State_Span * RESTRICT span)
{
-#ifdef SSE_POSSIBLE
+#ifdef SSE2_POSSIBLE
unsigned char * RESTRICT pixelmask = span->pixelmask;
unsigned char * RESTRICT pixel = (unsigned char *)dpsoftrast.fb_colorpixels[0] + (span->y * dpsoftrast.fb_width + span->x) * 4;
int x, startx = span->startx, endx = span->endx;
@@ -4139,7 +4139,7 @@
static void DPSOFTRAST_PixelShader_LightSource(DPSOFTRAST_State_Thread *thread, const DPSOFTRAST_State_Triangle * RESTRICT triangle, const DPSOFTRAST_State_Span * RESTRICT span)
{
-#ifdef SSE_POSSIBLE
+#ifdef SSE2_POSSIBLE
float buffer_z[DPSOFTRAST_DRAW_MAXSPANLENGTH];
unsigned char buffer_texture_colorbgra8[DPSOFTRAST_DRAW_MAXSPANLENGTH*4];
unsigned char buffer_texture_normalbgra8[DPSOFTRAST_DRAW_MAXSPANLENGTH*4];
@@ -4847,7 +4847,7 @@
static void DPSOFTRAST_Interpret_Draw(DPSOFTRAST_State_Thread *thread, DPSOFTRAST_Command_Draw *command)
{
-#ifdef SSE_POSSIBLE
+#ifdef SSE2_POSSIBLE
int cullface = thread->cullface;
int minx, maxx, miny, maxy;
int miny1, maxy1, miny2, maxy2;
--- a/vid_shared.c 2017-08-30 06:29:14.000000000 +0200
+++ b/vid_shared.c 2024-03-18 19:11:31.063333077 +0100
@@ -1729,16 +1729,11 @@
void VID_Shared_Init(void)
{
-#ifdef SSE_POSSIBLE
- if (Sys_HaveSSE2())
- {
- Con_Printf("DPSOFTRAST available (SSE2 instructions detected)\n");
- Cvar_RegisterVariable(&vid_soft);
- Cvar_RegisterVariable(&vid_soft_threads);
- Cvar_RegisterVariable(&vid_soft_interlace);
- }
- else
- Con_Printf("DPSOFTRAST not available (SSE2 disabled or not detected)\n");
+#ifdef SSE2_POSSIBLE
+ Con_Printf("DPSOFTRAST available (SSE2 instructions compiled in)\n");
+ Cvar_RegisterVariable(&vid_soft);
+ Cvar_RegisterVariable(&vid_soft_threads);
+ Cvar_RegisterVariable(&vid_soft_interlace);
#else
Con_Printf("DPSOFTRAST not available (SSE2 not compiled in)\n");
#endif