Add debug logging and assertions to make sure we use sq_cpy aligned

This commit is contained in:
Luke Benstead 2021-04-15 21:31:29 +01:00
parent fc1a18e002
commit 1172086378
4 changed files with 29 additions and 11 deletions

View File

@ -49,8 +49,8 @@ void _glInitMatrices() {
stack_push(&MATRIX_STACKS[1], IDENTITY);
stack_push(&MATRIX_STACKS[2], IDENTITY);
FASTCPY4(NORMAL_MATRIX, IDENTITY, sizeof(Matrix4x4));
FASTCPY4(SCREENVIEW_MATRIX, IDENTITY, sizeof(Matrix4x4));
MEMCPY4(NORMAL_MATRIX, IDENTITY, sizeof(Matrix4x4));
MEMCPY4(SCREENVIEW_MATRIX, IDENTITY, sizeof(Matrix4x4));
const VideoMode* vid_mode = GetVideoMode();
@ -96,7 +96,7 @@ static void transpose(GLfloat* m) {
}
static void recalculateNormalMatrix() {
FASTCPY4(NORMAL_MATRIX, stack_top(MATRIX_STACKS + (GL_MODELVIEW & 0xF)), sizeof(Matrix4x4));
MEMCPY4(NORMAL_MATRIX, stack_top(MATRIX_STACKS + (GL_MODELVIEW & 0xF)), sizeof(Matrix4x4));
inverse((GLfloat*) NORMAL_MATRIX);
transpose((GLfloat*) NORMAL_MATRIX);
}
@ -290,7 +290,7 @@ void APIENTRY glFrustum(GLfloat left, GLfloat right,
/* Multiply the current matrix by an arbitrary matrix */
void glMultMatrixf(const GLfloat *m) {
Matrix4x4 TEMP;
FASTCPY4(TEMP, m, sizeof(Matrix4x4));
MEMCPY4(TEMP, m, sizeof(Matrix4x4));
UploadMatrix4x4(stack_top(MATRIX_STACKS + MATRIX_IDX));
MultiplyMatrix4x4((const Matrix4x4*) &TEMP);

View File

@ -10,11 +10,27 @@
#include "../types.h"
#include "sh4_math.h"
#define FASTCPY(dst, src, bytes) \
(bytes % 32 == 0) ? sq_cpy(dst, src, bytes) : memcpy(dst, src, bytes)
#ifndef NDEBUG
#define PERF_WARNING() printf("[PERF] Unaligned data passed to glTexImage2D\n")
#else
#define PERF_WARNING() (void) 0
#endif
#define FASTCPY4(dst, src, bytes) \
(bytes % 32 == 0) ? sq_cpy(dst, src, bytes) : memcpy4(dst, src, bytes)
/* We use sq_cpy if the src and size is properly aligned. We control that the
* destination is properly aligned so we assert that. */
#define FASTCPY(dst, src, bytes) \
do { \
if(bytes % 32 == 0 && (uintptr_t) src % 32 == 0) { \
assert((uintptr_t) dst % 32 == 0); \
sq_cpy(dst, src, bytes); \
} else { \
PERF_WARNING(); \
memcpy(dst, src, bytes); \
} \
} while(0)
#define MEMCPY4(dst, src, bytes) memcpy4(dst, src, bytes)
#define MEMSET4(dst, v, size) memset4((dst), (v), (size))

View File

@ -12,7 +12,9 @@
#define MATH_Fast_Invert(x) (1.0f / (x))
#define FASTCPY(dst, src, bytes) memcpy(dst, src, bytes)
#define FASTCPY4(dst, src, bytes) memcpy(dst, src, bytes)
#define MEMCPY(dst, src, bytes) memcpy(dst, src, bytes)
#define MEMCPY4(dst, src, bytes) memcpy(dst, src, bytes)
#define MEMSET4(dst, v, size) memset((dst), (v), (size))
#define VEC3_NORMALIZE(x, y, z) \

View File

@ -702,10 +702,10 @@ void APIENTRY glGetBooleanv(GLenum pname, GLboolean* params) {
void APIENTRY glGetFloatv(GLenum pname, GLfloat* params) {
switch(pname) {
case GL_PROJECTION_MATRIX:
FASTCPY4(params, _glGetProjectionMatrix(), sizeof(float) * 16);
MEMCPY4(params, _glGetProjectionMatrix(), sizeof(float) * 16);
break;
case GL_MODELVIEW_MATRIX:
FASTCPY4(params, _glGetModelViewMatrix(), sizeof(float) * 16);
MEMCPY4(params, _glGetModelViewMatrix(), sizeof(float) * 16);
break;
case GL_POLYGON_OFFSET_FACTOR:
*params = OFFSET_FACTOR;