Add debug logging and assertions to make sure we use sq_cpy aligned

This commit is contained in:
Luke Benstead 2021-04-15 21:31:29 +01:00
parent fc1a18e002
commit 1172086378
4 changed files with 29 additions and 11 deletions

View File

@ -49,8 +49,8 @@ void _glInitMatrices() {
stack_push(&MATRIX_STACKS[1], IDENTITY); stack_push(&MATRIX_STACKS[1], IDENTITY);
stack_push(&MATRIX_STACKS[2], IDENTITY); stack_push(&MATRIX_STACKS[2], IDENTITY);
FASTCPY4(NORMAL_MATRIX, IDENTITY, sizeof(Matrix4x4)); MEMCPY4(NORMAL_MATRIX, IDENTITY, sizeof(Matrix4x4));
FASTCPY4(SCREENVIEW_MATRIX, IDENTITY, sizeof(Matrix4x4)); MEMCPY4(SCREENVIEW_MATRIX, IDENTITY, sizeof(Matrix4x4));
const VideoMode* vid_mode = GetVideoMode(); const VideoMode* vid_mode = GetVideoMode();
@ -96,7 +96,7 @@ static void transpose(GLfloat* m) {
} }
static void recalculateNormalMatrix() { static void recalculateNormalMatrix() {
FASTCPY4(NORMAL_MATRIX, stack_top(MATRIX_STACKS + (GL_MODELVIEW & 0xF)), sizeof(Matrix4x4)); MEMCPY4(NORMAL_MATRIX, stack_top(MATRIX_STACKS + (GL_MODELVIEW & 0xF)), sizeof(Matrix4x4));
inverse((GLfloat*) NORMAL_MATRIX); inverse((GLfloat*) NORMAL_MATRIX);
transpose((GLfloat*) NORMAL_MATRIX); transpose((GLfloat*) NORMAL_MATRIX);
} }
@ -290,7 +290,7 @@ void APIENTRY glFrustum(GLfloat left, GLfloat right,
/* Multiply the current matrix by an arbitrary matrix */ /* Multiply the current matrix by an arbitrary matrix */
void glMultMatrixf(const GLfloat *m) { void glMultMatrixf(const GLfloat *m) {
Matrix4x4 TEMP; Matrix4x4 TEMP;
FASTCPY4(TEMP, m, sizeof(Matrix4x4)); MEMCPY4(TEMP, m, sizeof(Matrix4x4));
UploadMatrix4x4(stack_top(MATRIX_STACKS + MATRIX_IDX)); UploadMatrix4x4(stack_top(MATRIX_STACKS + MATRIX_IDX));
MultiplyMatrix4x4((const Matrix4x4*) &TEMP); MultiplyMatrix4x4((const Matrix4x4*) &TEMP);

View File

@ -10,11 +10,27 @@
#include "../types.h" #include "../types.h"
#include "sh4_math.h" #include "sh4_math.h"
#define FASTCPY(dst, src, bytes) \ #ifndef NDEBUG
(bytes % 32 == 0) ? sq_cpy(dst, src, bytes) : memcpy(dst, src, bytes) #define PERF_WARNING() printf("[PERF] Unaligned data passed to glTexImage2D\n")
#else
#define PERF_WARNING() (void) 0
#endif
#define FASTCPY4(dst, src, bytes) \
(bytes % 32 == 0) ? sq_cpy(dst, src, bytes) : memcpy4(dst, src, bytes) /* We use sq_cpy if the src and size is properly aligned. We control that the
* destination is properly aligned so we assert that. */
#define FASTCPY(dst, src, bytes) \
do { \
if(bytes % 32 == 0 && (uintptr_t) src % 32 == 0) { \
assert((uintptr_t) dst % 32 == 0); \
sq_cpy(dst, src, bytes); \
} else { \
PERF_WARNING(); \
memcpy(dst, src, bytes); \
} \
} while(0)
#define MEMCPY4(dst, src, bytes) memcpy4(dst, src, bytes)
#define MEMSET4(dst, v, size) memset4((dst), (v), (size)) #define MEMSET4(dst, v, size) memset4((dst), (v), (size))

View File

@ -12,7 +12,9 @@
#define MATH_Fast_Invert(x) (1.0f / (x)) #define MATH_Fast_Invert(x) (1.0f / (x))
#define FASTCPY(dst, src, bytes) memcpy(dst, src, bytes) #define FASTCPY(dst, src, bytes) memcpy(dst, src, bytes)
#define FASTCPY4(dst, src, bytes) memcpy(dst, src, bytes) #define MEMCPY(dst, src, bytes) memcpy(dst, src, bytes)
#define MEMCPY4(dst, src, bytes) memcpy(dst, src, bytes)
#define MEMSET4(dst, v, size) memset((dst), (v), (size)) #define MEMSET4(dst, v, size) memset((dst), (v), (size))
#define VEC3_NORMALIZE(x, y, z) \ #define VEC3_NORMALIZE(x, y, z) \

View File

@ -702,10 +702,10 @@ void APIENTRY glGetBooleanv(GLenum pname, GLboolean* params) {
void APIENTRY glGetFloatv(GLenum pname, GLfloat* params) { void APIENTRY glGetFloatv(GLenum pname, GLfloat* params) {
switch(pname) { switch(pname) {
case GL_PROJECTION_MATRIX: case GL_PROJECTION_MATRIX:
FASTCPY4(params, _glGetProjectionMatrix(), sizeof(float) * 16); MEMCPY4(params, _glGetProjectionMatrix(), sizeof(float) * 16);
break; break;
case GL_MODELVIEW_MATRIX: case GL_MODELVIEW_MATRIX:
FASTCPY4(params, _glGetModelViewMatrix(), sizeof(float) * 16); MEMCPY4(params, _glGetModelViewMatrix(), sizeof(float) * 16);
break; break;
case GL_POLYGON_OFFSET_FACTOR: case GL_POLYGON_OFFSET_FACTOR:
*params = OFFSET_FACTOR; *params = OFFSET_FACTOR;