From 5ea4313d598fd423fa18ad59e0244b2e4fe9f5d8 Mon Sep 17 00:00:00 2001 From: Luke Benstead Date: Fri, 14 May 2021 21:53:27 +0100 Subject: [PATCH] Fix software renderer --- GL/draw.c | 10 +++++----- GL/flush.c | 4 ++-- GL/platforms/sh4.h | 2 ++ GL/platforms/software.h | 2 ++ 4 files changed, 11 insertions(+), 7 deletions(-) diff --git a/GL/draw.c b/GL/draw.c index 2054932..99a668d 100644 --- a/GL/draw.c +++ b/GL/draw.c @@ -471,7 +471,7 @@ GL_FORCE_INLINE void genQuads(Vertex* output, GLuint count) { Vertex* final = output + 3; GLuint i = count >> 2; while(i--) { - __asm__("pref @%0" : : "r"(pen + 4)); + PREFETCH(pen + 4); swapVertex(pen, final); final->flags = GPU_CMD_VERTEX_EOL; @@ -639,7 +639,7 @@ static void _readPositionData(ReadDiffuseFunc func, const GLuint first, const GL uint32_t* flags; ITERATE(count) { - __asm__("pref @%0" : : "r"(vptr + vstride)); + PREFETCH(vptr + vstride); func(vptr, out); vptr += vstride; @@ -660,7 +660,7 @@ static void _readUVData(ReadUVFunc func, const GLuint first, const GLuint count, GLubyte* out = (GLubyte*) output[0].uv; ITERATE(count) { - __asm__("pref @%0" : : "r"(uvptr + uvstride)); + PREFETCH(uvptr + uvstride); func(uvptr, out); uvptr += uvstride; @@ -675,7 +675,7 @@ static void _readSTData(ReadUVFunc func, const GLuint first, const GLuint count, GLubyte* out = (GLubyte*) extra[0].st; ITERATE(count) { - __asm__("pref @%0" : : "r"(stptr + ststride)); + PREFETCH(stptr + ststride); func(stptr, out); stptr += ststride; @@ -725,7 +725,7 @@ static void _readDiffuseData(ReadDiffuseFunc func, const GLuint first, const GLu GLubyte* out = (GLubyte*) output[0].bgra; ITERATE(count) { - __asm__("pref @%0" : : "r"(cptr + cstride)); + PREFETCH(cptr + cstride); func(cptr, out); cptr += cstride; diff --git a/GL/flush.c b/GL/flush.c index c52b1be..b29aac9 100644 --- a/GL/flush.c +++ b/GL/flush.c @@ -101,7 +101,7 @@ GL_FORCE_INLINE void glPerspectiveDivideStandard(void* src, uint32_t n) { const float h = GetVideoMode()->height; while(n--) { - __asm__("pref @%0" : : "r"(vertex + 1)); + PREFETCH(vertex + 1); if(likely(glIsVertex(vertex->flags))) { const float f = MATH_Fast_Invert(vertex->w); @@ -135,7 +135,7 @@ GL_FORCE_INLINE void glPerspectiveDivideFastMode(void* src, uint32_t n) { const float h = GetVideoMode()->height; while(n--) { - __asm__("pref @%0" : : "r"(vertex + 1)); + PREFETCH(vertex + 1); if(likely(glIsVertex(vertex->flags))) { const float f = MATH_Fast_Invert(vertex->w); diff --git a/GL/platforms/sh4.h b/GL/platforms/sh4.h index 0c312be..f2a3295 100644 --- a/GL/platforms/sh4.h +++ b/GL/platforms/sh4.h @@ -17,6 +17,8 @@ #endif +#define PREFETCH(addr) __asm__("pref @%0" : : "r"((addr))) + /* We use sq_cpy if the src and size is properly aligned. We control that the * destination is properly aligned so we assert that. */ #define FASTCPY(dst, src, bytes) \ diff --git a/GL/platforms/software.h b/GL/platforms/software.h index e3a3a03..bbb4f35 100644 --- a/GL/platforms/software.h +++ b/GL/platforms/software.h @@ -5,6 +5,8 @@ #include "../types.h" +#define PREFETCH(addr) do {} while(0) + #define MATH_Fast_Divide(n, d) (n / d) #define MATH_fmac(a, b, c) (a * b + c) #define MATH_Fast_Sqrt(x) sqrtf((x))