From c3ae9bef640ecf98e33ba78cb7378927b11fc0aa Mon Sep 17 00:00:00 2001 From: Luke Benstead Date: Sun, 5 Apr 2020 21:12:52 +0100 Subject: [PATCH] Fix up depth functions and update sh4_math --- GL/draw.c | 5 +- GL/sh4_math.h | 335 ++++++++++++++++++++++++++++++++++++++++++++++++-- GL/state.c | 8 +- 3 files changed, 331 insertions(+), 17 deletions(-) diff --git a/GL/draw.c b/GL/draw.c index 0a55b61..34186a1 100644 --- a/GL/draw.c +++ b/GL/draw.c @@ -10,6 +10,7 @@ #include "../include/glext.h" #include "private.h" #include "profiler.h" +#include "sh4_math.h" static AttribPointer VERTEX_POINTER; @@ -1168,10 +1169,10 @@ GL_FORCE_INLINE void divide(SubmissionTarget* target) { Vertex* vertex = _glSubmissionTargetStart(target); ITERATE(target->count) { - float f = MATH_fsrra(vertex->w * vertex->w); + float f = MATH_Fast_Invert(vertex->w); vertex->xyz[0] *= f; vertex->xyz[1] *= f; - vertex->xyz[2] = f; + vertex->xyz[2] = vertex->w; /* FIXME: Consider taking glDepthRange into account. PVR is designed to use invW rather * than Z which is unlike most GPUs - this apparently provides advantages. diff --git a/GL/sh4_math.h b/GL/sh4_math.h index 41facc4..4dbf727 100644 --- a/GL/sh4_math.h +++ b/GL/sh4_math.h @@ -1,6 +1,6 @@ // ---- sh4_math.h - SH7091 Math Module ---- // -// Version 1.1.1 +// Version 1.1.3 // // This file is part of the DreamHAL project, a hardware abstraction library // primarily intended for use on the SH7091 found in hardware such as the SEGA @@ -110,6 +110,11 @@ typedef struct { static const ALL_FLOATS_STRUCT MATH_identity_matrix = {1.0f, 0.0f, 0.0f, 0.0f, 0.0f, 1.0f, 0.0f, 0.0f, 0.0f, 0.0f, 1.0f, 0.0f, 0.0f, 0.0f, 0.0f, 1.0f}; +// Constants +#define MATH_pi 3.14159265358979323846264338327950288419716939937510f +#define MATH_e 2.71828182845904523536028747135266249775724709369995f +#define MATH_phi 1.61803398874989484820458683436563811772030917980576f + //============================================================================== // Basic math functions //============================================================================== @@ -518,13 +523,20 @@ static inline __attribute__((always_inline)) float MATH_Slow_Divide(float numera // Notes: // - From http://www.shared-ptr.com/sh_insns.html: -// The input angle is specified as a signed fraction in twos complement. The result of sin and cos is a single-precision floating-point number. +// The input angle is specified as a signed fraction in twos complement. +// The result of sin and cos is a single-precision floating-point number. // 0x7FFFFFFF to 0x00000001: 360×2^15−360/2^16 to 360/2^16 degrees // 0x00000000: 0 degree // 0xFFFFFFFF to 0x80000000: −360/2^16 to −360×2^15 degrees // - fsca format is 2^16 is 360 degrees, so a value of 1 is actually -// 1/182.044444444 of a degree +// 1/182.044444444 of a degree or 1/10430.3783505 of a radian // - fsca does a %360 automatically for values over 360 degrees +// +// Also: +// In order to make the best use of fsca units, a program must expect them from +// the outset and not "make them" by dividing radians or degrees to get them, +// otherwise it's just giving the 'fsca' instruction radians or degrees! +// // The following functions are available. // Please see their definitions for other usage info, otherwise they may not @@ -810,6 +822,11 @@ static inline __attribute__((always_inline)) _Complex float MATH_fsca_Float_Rad( // work for you. // /* + + //------------------------------------------------------------------------------ + // Vector and matrix math operations + //------------------------------------------------------------------------------ + // Inner/dot product (4x1 vec . 4x1 vec = scalar) float MATH_fipr(float x1, float x2, float x3, float x4, float y1, float y2, float y3, float y4) @@ -837,6 +854,10 @@ static inline __attribute__((always_inline)) _Complex float MATH_fsca_Float_Rad( // 4x4 Matrix product (two from memory) void MATH_Load_Matrix_Product(ALL_FLOATS_STRUCT * matrix1, ALL_FLOATS_STRUCT * matrix2) + //------------------------------------------------------------------------------ + // Matrix load and store operations + //------------------------------------------------------------------------------ + // Load 4x4 XMTRX from memory void MATH_Load_XMTRX(ALL_FLOATS_STRUCT * back_matrix) @@ -850,6 +871,10 @@ static inline __attribute__((always_inline)) _Complex float MATH_fsca_Float_Rad( RETURN_VECTOR_STRUCT MATH_Get_XMTRX_2x2(unsigned int which) */ +//------------------------------------------------------------------------------ +// Vector and matrix math operations +//------------------------------------------------------------------------------ + // Inner/dot product: vec . vec = scalar // _ _ // | y1 | @@ -1728,22 +1753,102 @@ static inline __attribute__((always_inline)) RETURN_VECTOR_STRUCT MATH_Get_XMTRX // The following functions are provided as examples of ways in which these math // functions can be used. // +// Reminder: 1 fsca unit = 1/182.044444444 of a degree or 1/10430.3783505 of a radian +// In order to make the best use of fsca units, a program must expect them from +// the outset and not "make them" by dividing radians or degrees to get them, +// otherwise it's just giving the 'fsca' instruction radians or degrees! +// /* - // Linear interpolation - float lerp(float a, float b, float t) - // Speherical interpolation - float slerp(float a, float b, float t, float theta) + //------------------------------------------------------------------------------ + // Commonly useful functions + //------------------------------------------------------------------------------ + + // Returns 1 if point 't' is inside triangle with vertices 'v0', 'v1', and 'v2', and 0 if not + int MATH_Is_Point_In_Triangle(float v0x, float v0y, float v1x, float v1y, float v2x, float v2y, float ptx, float pty) + + //------------------------------------------------------------------------------ + // Interpolation + //------------------------------------------------------------------------------ + + // Linear interpolation + float MATH_Lerp(float a, float b, float t) + + // Speherical interpolation ('theta' in fsca units) + float MATH_Slerp(float a, float b, float t, float theta) + + //------------------------------------------------------------------------------ + // Fast Sinc functions (unnormalized, sin(x)/x version) + //------------------------------------------------------------------------------ + // Just pass in MATH_pi * x for normalized versions :) + + // Sinc function (fsca units) + float MATH_Fast_Sincf(float x) + + // Sinc function (degrees) + float MATH_Fast_Sincf_Deg(float x) + + // Sinc function (rads) + float MATH_Fast_Sincf_Rad(float x) + + //------------------------------------------------------------------------------ + // Kaiser Window + //------------------------------------------------------------------------------ + + // Generates mipmaps. Angle 'x' in radians. + float MATH_Kaiser_Window_Rad(float x, float alpha, float stretch, float m_width) + + // Generates mipmaps. Angle 'x' in fsca units. + float MATH_Kaiser_Window(float x, float alpha, float stretch, float m_width) + */ +//------------------------------------------------------------------------------ +// Commonly useful functions +//------------------------------------------------------------------------------ + +// Returns 1 if point 'pt' is inside triangle with vertices 'v0', 'v1', and 'v2', and 0 if not +// Determines triangle center using barycentric coordinate transformation +// Adapted from: https://stackoverflow.com/questions/2049582/how-to-determine-if-a-point-is-in-a-2d-triangle +// Specifically the answer by user 'adreasdr' in addition to the comment by user 'urraka' on the answer from user 'Andreas Brinck' +// +// The notation here assumes v0x is the x-component of v0, v0y is the y-component of v0, etc. +// +static inline __attribute__((always_inline)) int MATH_Is_Point_In_Triangle(float v0x, float v0y, float v1x, float v1y, float v2x, float v2y, float ptx, float pty) +{ + float sdot = MATH_fipr(v0y, -v0x, v2y - v0y, v0x - v2x, v2x, v2y, ptx, pty); + float tdot = MATH_fipr(v0x, -v0y, v0y - v1y, v1x - v0x, v1y, v1x, ptx, pty); + + float areadot = MATH_fipr(-v1y, v0y, v0x, v1x, v2x, -v1x + v2x, v1y - v2y, v2y); + + // 'areadot' could be negative depending on the winding of the triangle + if(areadot < 0.0f) + { + sdot *= -1.0f; + tdot *= -1.0f; + areadot *= -1.0f; + } + + if( (sdot > 0.0f) && (tdot > 0.0f) && (areadot > (sdot + tdot)) ) + { + return 1; + } + + return 0; +} + +//------------------------------------------------------------------------------ +// Interpolation +//------------------------------------------------------------------------------ + // Linear interpolation -static inline __attribute__((always_inline)) float lerp(float a, float b, float t) +static inline __attribute__((always_inline)) float MATH_Lerp(float a, float b, float t) { return MATH_fmac(t, (b-a), a); } -// Speherical interpolation -static inline __attribute__((always_inline)) float slerp(float a, float b, float t, float theta) +// Speherical interpolation ('theta' in fsca units) +static inline __attribute__((always_inline)) float MATH_Slerp(float a, float b, float t, float theta) { // a is an element of v0, b is an element of v1 // v = ( v0 * sin(theta - t * theta) + v1 * sin(t * theta) ) / sin(theta) @@ -1752,7 +1857,7 @@ static inline __attribute__((always_inline)) float slerp(float a, float b, float // which only requires two calls to fsca. // Specifically, sin(a + b) = sin(a)cos(b) + cos(a)sin(b) & sin(-a) = -sin(a) - // Fsca returns reverse-ordered complex numbers for speed reasons (i.e. normally sine is the imaginary part) + // MATH_fsca_* functions return reverse-ordered complex numbers for speed reasons (i.e. normally sine is the imaginary part) // This could be made even faster by using MATH_fsca_Int() with 'theta' and 't' as unsigned ints #if __GNUC__ <= GNUC_FSCA_ERROR_VERSION @@ -1783,6 +1888,213 @@ static inline __attribute__((always_inline)) float slerp(float a, float b, float return output_float; } +//------------------------------------------------------------------------------ +// Fast Sinc (unnormalized, sin(x)/x version) +//------------------------------------------------------------------------------ +// +// Just pass in MATH_pi * x for normalized versions :) +// + +// Sinc function (fsca units) +static inline __attribute__((always_inline)) float MATH_Fast_Sincf(float x) +{ + if(x == 0.0f) + { + return 1.0f; + } + +#if __GNUC__ <= GNUC_FSCA_ERROR_VERSION + + RETURN_FSCA_STRUCT sine_cosine = MATH_fsca_Float(x); + float sine_value = sine_cosine.sine; + +#else + + _Complex float sine_cosine = MATH_fsca_Float(x); + float sine_value = __real__ sine_cosine; + +#endif + + return MATH_Fast_Divide(sine_value, x); +} + +// Sinc function (degrees) +static inline __attribute__((always_inline)) float MATH_Fast_Sincf_Deg(float x) +{ + if(x == 0.0f) + { + return 1.0f; + } + +#if __GNUC__ <= GNUC_FSCA_ERROR_VERSION + + RETURN_FSCA_STRUCT sine_cosine = MATH_fsca_Float_Deg(x); + float sine_value = sine_cosine.sine; + +#else + + _Complex float sine_cosine = MATH_fsca_Float_Deg(x); + float sine_value = __real__ sine_cosine; + +#endif + + return MATH_Fast_Divide(sine_value, x); +} + +// Sinc function (rads) +static inline __attribute__((always_inline)) float MATH_Fast_Sincf_Rad(float x) +{ + if(x == 0.0f) + { + return 1.0f; + } + +#if __GNUC__ <= GNUC_FSCA_ERROR_VERSION + + RETURN_FSCA_STRUCT sine_cosine = MATH_fsca_Float_Rad(x); + float sine_value = sine_cosine.sine; + +#else + + _Complex float sine_cosine = MATH_fsca_Float_Rad(x); + float sine_value = __real__ sine_cosine; + +#endif + + return MATH_Fast_Divide(sine_value, x); +} + +//------------------------------------------------------------------------------ +// Kaiser Window +//------------------------------------------------------------------------------ +// +// These use regular divides because they only need to be run once during loads, +// not during runtime. +// +// Adapted from public domain NVidia Filter.cpp: +// https://github.com/castano/nvidia-texture-tools/blob/master/src/nvimage/Filter.cpp +// (as of 3/23/2020) +// + +// +// Kaiser window utility functions +// + +// Utility function for 0th-order bessel function +static inline __attribute__((always_inline)) float MATH_Bessel0(float x) +{ + const float EPSILON_RATIO = 1e-6f; + float xh, sum, power, ds, k; + // int k; + + xh = 0.5f * x; + sum = 1.0f; + power = 1.0f; + k = 0.0f; // k = 0; + ds = 1.0; + while (ds > (sum * EPSILON_RATIO)) + { + k += 1.0f; // ++k; + power = power * (xh / k); + ds = power * power; + sum = sum + ds; + } + + return sum; +} + +// Utility for kaiser window's expected sincf() format (radians) +static inline __attribute__((always_inline)) float MATH_NV_Sincf_Rad(const float x) +{ + // Does SH4 need this correction term? x86's sinf() definitely does, + // but SH4 might be ok with if(x == 0.0f) return 1.0f; Not sure. + if (MATH_fabs(x) < 0.0001f) // NV_EPSILON is 0.0001f + { + return 1.0f + x*x*(-1.0f/6.0f + (x*x)/120.0f); // 1.0 + x^2 * (-1/6 + x^2/120) + } + else + { + +#if __GNUC__ <= GNUC_FSCA_ERROR_VERSION + + RETURN_FSCA_STRUCT sine_cosine = MATH_fsca_Float_Rad(x); + float sine_value = sine_cosine.sine; + +#else + + _Complex float sine_cosine = MATH_fsca_Float_Rad(x); + float sine_value = __real__ sine_cosine; + +#endif + + return sine_value / x; + } +} + +// Utility for kaiser window's expected sincf() format (fsca units) +static inline __attribute__((always_inline)) float MATH_NV_Sincf(const float x) +{ + // Does SH4 need this correction term? x86's sinf() definitely does, + // but SH4 might be ok with if(x == 0.0f) return 1.0f; Not sure. + if (MATH_fabs(x) < 0.0001f) // NV_EPSILON is 0.0001f + { + return 1.0f + x*x*(-1.0f/6.0f + (x*x)/120.0f); // 1.0 + x^2 * (-1/6 + x^2/120) + } + else + { + +#if __GNUC__ <= GNUC_FSCA_ERROR_VERSION + + RETURN_FSCA_STRUCT sine_cosine = MATH_fsca_Float(x); + float sine_value = sine_cosine.sine; + +#else + + _Complex float sine_cosine = MATH_fsca_Float(x); + float sine_value = __real__ sine_cosine; + +#endif + + return sine_value / x; + } +} + +// +// Kaiser window mipmap generator main functions +// + +// Generates mipmaps. Angle 'x' in radians. +static inline __attribute__((always_inline)) float MATH_Kaiser_Window_Rad(float x, float alpha, float stretch, float m_width) +{ + const float sinc_value = MATH_NV_Sincf_Rad(MATH_pi * x * stretch); + const float t = x / m_width; + + if ((1 - t * t) >= 0) + { + return sinc_value * MATH_Bessel0(alpha * MATH_fsqrt(1 - t * t)) / MATH_Bessel0(alpha); + } + else + { + return 0; + } +} + +// Generates mipmaps. Angle 'x' in fsca units. +static inline __attribute__((always_inline)) float MATH_Kaiser_Window(float x, float alpha, float stretch, float m_width) +{ + const float sinc_value = MATH_NV_Sincf(MATH_pi * x * stretch); + const float t = x / m_width; + + if ((1 - t * t) >= 0) + { + return sinc_value * MATH_Bessel0(alpha * MATH_fsqrt(1 - t * t)) / MATH_Bessel0(alpha); + } + else + { + return 0; + } +} + //============================================================================== // Miscellaneous Snippets //============================================================================== @@ -1824,3 +2136,4 @@ static inline __attribute__((always_inline)) float slerp(float a, float b, float #endif /* __SH4_MATH_H_ */ + diff --git a/GL/state.c b/GL/state.c index a705526..d6e18d8 100644 --- a/GL/state.c +++ b/GL/state.c @@ -63,17 +63,17 @@ static int _calc_pvr_depth_test() { case GL_NEVER: return PVR_DEPTHCMP_NEVER; case GL_LESS: - return PVR_DEPTHCMP_GREATER; + return PVR_DEPTHCMP_LESS; case GL_EQUAL: return PVR_DEPTHCMP_EQUAL; case GL_LEQUAL: - return PVR_DEPTHCMP_GEQUAL; + return PVR_DEPTHCMP_LEQUAL; case GL_GREATER: - return PVR_DEPTHCMP_LESS; + return PVR_DEPTHCMP_GREATER; case GL_NOTEQUAL: return PVR_DEPTHCMP_NOTEQUAL; case GL_GEQUAL: - return PVR_DEPTHCMP_LEQUAL; + return PVR_DEPTHCMP_GEQUAL; break; case GL_ALWAYS: default: