Add debug logging and assertions to make sure we use sq_cpy aligned

2021-04-15 21:31:29 +01:00 · 2021-04-15 21:31:29 +01:00 · 1172086378
commit 1172086378
parent fc1a18e002
4 changed files with 29 additions and 11 deletions
--- a/GL/matrix.c
+++ b/GL/matrix.c
@ -49,8 +49,8 @@ void _glInitMatrices() {
    stack_push(&MATRIX_STACKS[1], IDENTITY);
    stack_push(&MATRIX_STACKS[2], IDENTITY);
-    FASTCPY4(NORMAL_MATRIX, IDENTITY, sizeof(Matrix4x4));
+    MEMCPY4(NORMAL_MATRIX, IDENTITY, sizeof(Matrix4x4));
-    FASTCPY4(SCREENVIEW_MATRIX, IDENTITY, sizeof(Matrix4x4));
+    MEMCPY4(SCREENVIEW_MATRIX, IDENTITY, sizeof(Matrix4x4));
    const VideoMode* vid_mode = GetVideoMode();
@ -96,7 +96,7 @@ static void transpose(GLfloat* m) {
 }
 static void recalculateNormalMatrix() {
-    FASTCPY4(NORMAL_MATRIX, stack_top(MATRIX_STACKS + (GL_MODELVIEW & 0xF)), sizeof(Matrix4x4));
+    MEMCPY4(NORMAL_MATRIX, stack_top(MATRIX_STACKS + (GL_MODELVIEW & 0xF)), sizeof(Matrix4x4));
    inverse((GLfloat*) NORMAL_MATRIX);
    transpose((GLfloat*) NORMAL_MATRIX);
 }
@ -290,7 +290,7 @@ void APIENTRY glFrustum(GLfloat left, GLfloat right,
 /* Multiply the current matrix by an arbitrary matrix */
 void glMultMatrixf(const GLfloat *m) {
    Matrix4x4 TEMP;
-    FASTCPY4(TEMP, m, sizeof(Matrix4x4));
+    MEMCPY4(TEMP, m, sizeof(Matrix4x4));
    UploadMatrix4x4(stack_top(MATRIX_STACKS + MATRIX_IDX));
    MultiplyMatrix4x4((const Matrix4x4*) &TEMP);
--- a/GL/platforms/sh4.h
+++ b/GL/platforms/sh4.h
@ -10,11 +10,27 @@
 #include "../types.h"
 #include "sh4_math.h"
-#define FASTCPY(dst, src, bytes) \
+#ifndef NDEBUG
-    (bytes % 32 == 0) ? sq_cpy(dst, src, bytes) : memcpy(dst, src, bytes)
+#define PERF_WARNING() printf("[PERF] Unaligned data passed to glTexImage2D\n")
 #else
 #define PERF_WARNING() (void) 0
 #endif
-#define FASTCPY4(dst, src, bytes) \
+
-    (bytes % 32 == 0) ? sq_cpy(dst, src, bytes) : memcpy4(dst, src, bytes)
+/* We use sq_cpy if the src and size is properly aligned. We control that the
 * destination is properly aligned so we assert that. */
 #define FASTCPY(dst, src, bytes) \
    do { \
        if(bytes % 32 == 0 && (uintptr_t) src % 32 == 0) { \
            assert((uintptr_t) dst % 32 == 0); \
            sq_cpy(dst, src, bytes); \
        } else { \
            PERF_WARNING(); \
            memcpy(dst, src, bytes); \
        } \
    } while(0)
 #define MEMCPY4(dst, src, bytes) memcpy4(dst, src, bytes)
 #define MEMSET4(dst, v, size) memset4((dst), (v), (size))
--- a/GL/platforms/software.h
+++ b/GL/platforms/software.h
@ -12,7 +12,9 @@
 #define MATH_Fast_Invert(x) (1.0f / (x))
 #define FASTCPY(dst, src, bytes) memcpy(dst, src, bytes)
-#define FASTCPY4(dst, src, bytes) memcpy(dst, src, bytes)
+#define MEMCPY(dst, src, bytes) memcpy(dst, src, bytes)
 #define MEMCPY4(dst, src, bytes) memcpy(dst, src, bytes)
 #define MEMSET4(dst, v, size) memset((dst), (v), (size))
 #define VEC3_NORMALIZE(x, y, z) \
--- a/GL/state.c
+++ b/GL/state.c
@ -702,10 +702,10 @@ void APIENTRY glGetBooleanv(GLenum pname, GLboolean* params) {
 void APIENTRY glGetFloatv(GLenum pname, GLfloat* params) {
    switch(pname) {
        case GL_PROJECTION_MATRIX:
-            FASTCPY4(params, _glGetProjectionMatrix(), sizeof(float) * 16);
+            MEMCPY4(params, _glGetProjectionMatrix(), sizeof(float) * 16);
        break;
        case GL_MODELVIEW_MATRIX:
-            FASTCPY4(params, _glGetModelViewMatrix(), sizeof(float) * 16);
+            MEMCPY4(params, _glGetModelViewMatrix(), sizeof(float) * 16);
        break;
        case GL_POLYGON_OFFSET_FACTOR:
            *params = OFFSET_FACTOR;