Fix up depth functions and update sh4_math
This commit is contained in:
parent
ca7eb462fc
commit
c3ae9bef64
|
@ -10,6 +10,7 @@
|
|||
#include "../include/glext.h"
|
||||
#include "private.h"
|
||||
#include "profiler.h"
|
||||
#include "sh4_math.h"
|
||||
|
||||
|
||||
static AttribPointer VERTEX_POINTER;
|
||||
|
@ -1168,10 +1169,10 @@ GL_FORCE_INLINE void divide(SubmissionTarget* target) {
|
|||
Vertex* vertex = _glSubmissionTargetStart(target);
|
||||
|
||||
ITERATE(target->count) {
|
||||
float f = MATH_fsrra(vertex->w * vertex->w);
|
||||
float f = MATH_Fast_Invert(vertex->w);
|
||||
vertex->xyz[0] *= f;
|
||||
vertex->xyz[1] *= f;
|
||||
vertex->xyz[2] = f;
|
||||
vertex->xyz[2] = vertex->w;
|
||||
|
||||
/* FIXME: Consider taking glDepthRange into account. PVR is designed to use invW rather
|
||||
* than Z which is unlike most GPUs - this apparently provides advantages.
|
||||
|
|
335
GL/sh4_math.h
335
GL/sh4_math.h
|
@ -1,6 +1,6 @@
|
|||
// ---- sh4_math.h - SH7091 Math Module ----
|
||||
//
|
||||
// Version 1.1.1
|
||||
// Version 1.1.3
|
||||
//
|
||||
// This file is part of the DreamHAL project, a hardware abstraction library
|
||||
// primarily intended for use on the SH7091 found in hardware such as the SEGA
|
||||
|
@ -110,6 +110,11 @@ typedef struct {
|
|||
|
||||
static const ALL_FLOATS_STRUCT MATH_identity_matrix = {1.0f, 0.0f, 0.0f, 0.0f, 0.0f, 1.0f, 0.0f, 0.0f, 0.0f, 0.0f, 1.0f, 0.0f, 0.0f, 0.0f, 0.0f, 1.0f};
|
||||
|
||||
// Constants
|
||||
#define MATH_pi 3.14159265358979323846264338327950288419716939937510f
|
||||
#define MATH_e 2.71828182845904523536028747135266249775724709369995f
|
||||
#define MATH_phi 1.61803398874989484820458683436563811772030917980576f
|
||||
|
||||
//==============================================================================
|
||||
// Basic math functions
|
||||
//==============================================================================
|
||||
|
@ -518,13 +523,20 @@ static inline __attribute__((always_inline)) float MATH_Slow_Divide(float numera
|
|||
|
||||
// Notes:
|
||||
// - From http://www.shared-ptr.com/sh_insns.html:
|
||||
// The input angle is specified as a signed fraction in twos complement. The result of sin and cos is a single-precision floating-point number.
|
||||
// The input angle is specified as a signed fraction in twos complement.
|
||||
// The result of sin and cos is a single-precision floating-point number.
|
||||
// 0x7FFFFFFF to 0x00000001: 360×2^15−360/2^16 to 360/2^16 degrees
|
||||
// 0x00000000: 0 degree
|
||||
// 0xFFFFFFFF to 0x80000000: −360/2^16 to −360×2^15 degrees
|
||||
// - fsca format is 2^16 is 360 degrees, so a value of 1 is actually
|
||||
// 1/182.044444444 of a degree
|
||||
// 1/182.044444444 of a degree or 1/10430.3783505 of a radian
|
||||
// - fsca does a %360 automatically for values over 360 degrees
|
||||
//
|
||||
// Also:
|
||||
// In order to make the best use of fsca units, a program must expect them from
|
||||
// the outset and not "make them" by dividing radians or degrees to get them,
|
||||
// otherwise it's just giving the 'fsca' instruction radians or degrees!
|
||||
//
|
||||
|
||||
// The following functions are available.
|
||||
// Please see their definitions for other usage info, otherwise they may not
|
||||
|
@ -810,6 +822,11 @@ static inline __attribute__((always_inline)) _Complex float MATH_fsca_Float_Rad(
|
|||
// work for you.
|
||||
//
|
||||
/*
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Vector and matrix math operations
|
||||
//------------------------------------------------------------------------------
|
||||
|
||||
// Inner/dot product (4x1 vec . 4x1 vec = scalar)
|
||||
float MATH_fipr(float x1, float x2, float x3, float x4, float y1, float y2, float y3, float y4)
|
||||
|
||||
|
@ -837,6 +854,10 @@ static inline __attribute__((always_inline)) _Complex float MATH_fsca_Float_Rad(
|
|||
// 4x4 Matrix product (two from memory)
|
||||
void MATH_Load_Matrix_Product(ALL_FLOATS_STRUCT * matrix1, ALL_FLOATS_STRUCT * matrix2)
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Matrix load and store operations
|
||||
//------------------------------------------------------------------------------
|
||||
|
||||
// Load 4x4 XMTRX from memory
|
||||
void MATH_Load_XMTRX(ALL_FLOATS_STRUCT * back_matrix)
|
||||
|
||||
|
@ -850,6 +871,10 @@ static inline __attribute__((always_inline)) _Complex float MATH_fsca_Float_Rad(
|
|||
RETURN_VECTOR_STRUCT MATH_Get_XMTRX_2x2(unsigned int which)
|
||||
*/
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Vector and matrix math operations
|
||||
//------------------------------------------------------------------------------
|
||||
|
||||
// Inner/dot product: vec . vec = scalar
|
||||
// _ _
|
||||
// | y1 |
|
||||
|
@ -1728,22 +1753,102 @@ static inline __attribute__((always_inline)) RETURN_VECTOR_STRUCT MATH_Get_XMTRX
|
|||
// The following functions are provided as examples of ways in which these math
|
||||
// functions can be used.
|
||||
//
|
||||
// Reminder: 1 fsca unit = 1/182.044444444 of a degree or 1/10430.3783505 of a radian
|
||||
// In order to make the best use of fsca units, a program must expect them from
|
||||
// the outset and not "make them" by dividing radians or degrees to get them,
|
||||
// otherwise it's just giving the 'fsca' instruction radians or degrees!
|
||||
//
|
||||
/*
|
||||
// Linear interpolation
|
||||
float lerp(float a, float b, float t)
|
||||
|
||||
// Speherical interpolation
|
||||
float slerp(float a, float b, float t, float theta)
|
||||
//------------------------------------------------------------------------------
|
||||
// Commonly useful functions
|
||||
//------------------------------------------------------------------------------
|
||||
|
||||
// Returns 1 if point 't' is inside triangle with vertices 'v0', 'v1', and 'v2', and 0 if not
|
||||
int MATH_Is_Point_In_Triangle(float v0x, float v0y, float v1x, float v1y, float v2x, float v2y, float ptx, float pty)
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Interpolation
|
||||
//------------------------------------------------------------------------------
|
||||
|
||||
// Linear interpolation
|
||||
float MATH_Lerp(float a, float b, float t)
|
||||
|
||||
// Speherical interpolation ('theta' in fsca units)
|
||||
float MATH_Slerp(float a, float b, float t, float theta)
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Fast Sinc functions (unnormalized, sin(x)/x version)
|
||||
//------------------------------------------------------------------------------
|
||||
// Just pass in MATH_pi * x for normalized versions :)
|
||||
|
||||
// Sinc function (fsca units)
|
||||
float MATH_Fast_Sincf(float x)
|
||||
|
||||
// Sinc function (degrees)
|
||||
float MATH_Fast_Sincf_Deg(float x)
|
||||
|
||||
// Sinc function (rads)
|
||||
float MATH_Fast_Sincf_Rad(float x)
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Kaiser Window
|
||||
//------------------------------------------------------------------------------
|
||||
|
||||
// Generates mipmaps. Angle 'x' in radians.
|
||||
float MATH_Kaiser_Window_Rad(float x, float alpha, float stretch, float m_width)
|
||||
|
||||
// Generates mipmaps. Angle 'x' in fsca units.
|
||||
float MATH_Kaiser_Window(float x, float alpha, float stretch, float m_width)
|
||||
|
||||
*/
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Commonly useful functions
|
||||
//------------------------------------------------------------------------------
|
||||
|
||||
// Returns 1 if point 'pt' is inside triangle with vertices 'v0', 'v1', and 'v2', and 0 if not
|
||||
// Determines triangle center using barycentric coordinate transformation
|
||||
// Adapted from: https://stackoverflow.com/questions/2049582/how-to-determine-if-a-point-is-in-a-2d-triangle
|
||||
// Specifically the answer by user 'adreasdr' in addition to the comment by user 'urraka' on the answer from user 'Andreas Brinck'
|
||||
//
|
||||
// The notation here assumes v0x is the x-component of v0, v0y is the y-component of v0, etc.
|
||||
//
|
||||
static inline __attribute__((always_inline)) int MATH_Is_Point_In_Triangle(float v0x, float v0y, float v1x, float v1y, float v2x, float v2y, float ptx, float pty)
|
||||
{
|
||||
float sdot = MATH_fipr(v0y, -v0x, v2y - v0y, v0x - v2x, v2x, v2y, ptx, pty);
|
||||
float tdot = MATH_fipr(v0x, -v0y, v0y - v1y, v1x - v0x, v1y, v1x, ptx, pty);
|
||||
|
||||
float areadot = MATH_fipr(-v1y, v0y, v0x, v1x, v2x, -v1x + v2x, v1y - v2y, v2y);
|
||||
|
||||
// 'areadot' could be negative depending on the winding of the triangle
|
||||
if(areadot < 0.0f)
|
||||
{
|
||||
sdot *= -1.0f;
|
||||
tdot *= -1.0f;
|
||||
areadot *= -1.0f;
|
||||
}
|
||||
|
||||
if( (sdot > 0.0f) && (tdot > 0.0f) && (areadot > (sdot + tdot)) )
|
||||
{
|
||||
return 1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Interpolation
|
||||
//------------------------------------------------------------------------------
|
||||
|
||||
// Linear interpolation
|
||||
static inline __attribute__((always_inline)) float lerp(float a, float b, float t)
|
||||
static inline __attribute__((always_inline)) float MATH_Lerp(float a, float b, float t)
|
||||
{
|
||||
return MATH_fmac(t, (b-a), a);
|
||||
}
|
||||
|
||||
// Speherical interpolation
|
||||
static inline __attribute__((always_inline)) float slerp(float a, float b, float t, float theta)
|
||||
// Speherical interpolation ('theta' in fsca units)
|
||||
static inline __attribute__((always_inline)) float MATH_Slerp(float a, float b, float t, float theta)
|
||||
{
|
||||
// a is an element of v0, b is an element of v1
|
||||
// v = ( v0 * sin(theta - t * theta) + v1 * sin(t * theta) ) / sin(theta)
|
||||
|
@ -1752,7 +1857,7 @@ static inline __attribute__((always_inline)) float slerp(float a, float b, float
|
|||
// which only requires two calls to fsca.
|
||||
// Specifically, sin(a + b) = sin(a)cos(b) + cos(a)sin(b) & sin(-a) = -sin(a)
|
||||
|
||||
// Fsca returns reverse-ordered complex numbers for speed reasons (i.e. normally sine is the imaginary part)
|
||||
// MATH_fsca_* functions return reverse-ordered complex numbers for speed reasons (i.e. normally sine is the imaginary part)
|
||||
// This could be made even faster by using MATH_fsca_Int() with 'theta' and 't' as unsigned ints
|
||||
|
||||
#if __GNUC__ <= GNUC_FSCA_ERROR_VERSION
|
||||
|
@ -1783,6 +1888,213 @@ static inline __attribute__((always_inline)) float slerp(float a, float b, float
|
|||
return output_float;
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Fast Sinc (unnormalized, sin(x)/x version)
|
||||
//------------------------------------------------------------------------------
|
||||
//
|
||||
// Just pass in MATH_pi * x for normalized versions :)
|
||||
//
|
||||
|
||||
// Sinc function (fsca units)
|
||||
static inline __attribute__((always_inline)) float MATH_Fast_Sincf(float x)
|
||||
{
|
||||
if(x == 0.0f)
|
||||
{
|
||||
return 1.0f;
|
||||
}
|
||||
|
||||
#if __GNUC__ <= GNUC_FSCA_ERROR_VERSION
|
||||
|
||||
RETURN_FSCA_STRUCT sine_cosine = MATH_fsca_Float(x);
|
||||
float sine_value = sine_cosine.sine;
|
||||
|
||||
#else
|
||||
|
||||
_Complex float sine_cosine = MATH_fsca_Float(x);
|
||||
float sine_value = __real__ sine_cosine;
|
||||
|
||||
#endif
|
||||
|
||||
return MATH_Fast_Divide(sine_value, x);
|
||||
}
|
||||
|
||||
// Sinc function (degrees)
|
||||
static inline __attribute__((always_inline)) float MATH_Fast_Sincf_Deg(float x)
|
||||
{
|
||||
if(x == 0.0f)
|
||||
{
|
||||
return 1.0f;
|
||||
}
|
||||
|
||||
#if __GNUC__ <= GNUC_FSCA_ERROR_VERSION
|
||||
|
||||
RETURN_FSCA_STRUCT sine_cosine = MATH_fsca_Float_Deg(x);
|
||||
float sine_value = sine_cosine.sine;
|
||||
|
||||
#else
|
||||
|
||||
_Complex float sine_cosine = MATH_fsca_Float_Deg(x);
|
||||
float sine_value = __real__ sine_cosine;
|
||||
|
||||
#endif
|
||||
|
||||
return MATH_Fast_Divide(sine_value, x);
|
||||
}
|
||||
|
||||
// Sinc function (rads)
|
||||
static inline __attribute__((always_inline)) float MATH_Fast_Sincf_Rad(float x)
|
||||
{
|
||||
if(x == 0.0f)
|
||||
{
|
||||
return 1.0f;
|
||||
}
|
||||
|
||||
#if __GNUC__ <= GNUC_FSCA_ERROR_VERSION
|
||||
|
||||
RETURN_FSCA_STRUCT sine_cosine = MATH_fsca_Float_Rad(x);
|
||||
float sine_value = sine_cosine.sine;
|
||||
|
||||
#else
|
||||
|
||||
_Complex float sine_cosine = MATH_fsca_Float_Rad(x);
|
||||
float sine_value = __real__ sine_cosine;
|
||||
|
||||
#endif
|
||||
|
||||
return MATH_Fast_Divide(sine_value, x);
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Kaiser Window
|
||||
//------------------------------------------------------------------------------
|
||||
//
|
||||
// These use regular divides because they only need to be run once during loads,
|
||||
// not during runtime.
|
||||
//
|
||||
// Adapted from public domain NVidia Filter.cpp:
|
||||
// https://github.com/castano/nvidia-texture-tools/blob/master/src/nvimage/Filter.cpp
|
||||
// (as of 3/23/2020)
|
||||
//
|
||||
|
||||
//
|
||||
// Kaiser window utility functions
|
||||
//
|
||||
|
||||
// Utility function for 0th-order bessel function
|
||||
static inline __attribute__((always_inline)) float MATH_Bessel0(float x)
|
||||
{
|
||||
const float EPSILON_RATIO = 1e-6f;
|
||||
float xh, sum, power, ds, k;
|
||||
// int k;
|
||||
|
||||
xh = 0.5f * x;
|
||||
sum = 1.0f;
|
||||
power = 1.0f;
|
||||
k = 0.0f; // k = 0;
|
||||
ds = 1.0;
|
||||
while (ds > (sum * EPSILON_RATIO))
|
||||
{
|
||||
k += 1.0f; // ++k;
|
||||
power = power * (xh / k);
|
||||
ds = power * power;
|
||||
sum = sum + ds;
|
||||
}
|
||||
|
||||
return sum;
|
||||
}
|
||||
|
||||
// Utility for kaiser window's expected sincf() format (radians)
|
||||
static inline __attribute__((always_inline)) float MATH_NV_Sincf_Rad(const float x)
|
||||
{
|
||||
// Does SH4 need this correction term? x86's sinf() definitely does,
|
||||
// but SH4 might be ok with if(x == 0.0f) return 1.0f; Not sure.
|
||||
if (MATH_fabs(x) < 0.0001f) // NV_EPSILON is 0.0001f
|
||||
{
|
||||
return 1.0f + x*x*(-1.0f/6.0f + (x*x)/120.0f); // 1.0 + x^2 * (-1/6 + x^2/120)
|
||||
}
|
||||
else
|
||||
{
|
||||
|
||||
#if __GNUC__ <= GNUC_FSCA_ERROR_VERSION
|
||||
|
||||
RETURN_FSCA_STRUCT sine_cosine = MATH_fsca_Float_Rad(x);
|
||||
float sine_value = sine_cosine.sine;
|
||||
|
||||
#else
|
||||
|
||||
_Complex float sine_cosine = MATH_fsca_Float_Rad(x);
|
||||
float sine_value = __real__ sine_cosine;
|
||||
|
||||
#endif
|
||||
|
||||
return sine_value / x;
|
||||
}
|
||||
}
|
||||
|
||||
// Utility for kaiser window's expected sincf() format (fsca units)
|
||||
static inline __attribute__((always_inline)) float MATH_NV_Sincf(const float x)
|
||||
{
|
||||
// Does SH4 need this correction term? x86's sinf() definitely does,
|
||||
// but SH4 might be ok with if(x == 0.0f) return 1.0f; Not sure.
|
||||
if (MATH_fabs(x) < 0.0001f) // NV_EPSILON is 0.0001f
|
||||
{
|
||||
return 1.0f + x*x*(-1.0f/6.0f + (x*x)/120.0f); // 1.0 + x^2 * (-1/6 + x^2/120)
|
||||
}
|
||||
else
|
||||
{
|
||||
|
||||
#if __GNUC__ <= GNUC_FSCA_ERROR_VERSION
|
||||
|
||||
RETURN_FSCA_STRUCT sine_cosine = MATH_fsca_Float(x);
|
||||
float sine_value = sine_cosine.sine;
|
||||
|
||||
#else
|
||||
|
||||
_Complex float sine_cosine = MATH_fsca_Float(x);
|
||||
float sine_value = __real__ sine_cosine;
|
||||
|
||||
#endif
|
||||
|
||||
return sine_value / x;
|
||||
}
|
||||
}
|
||||
|
||||
//
|
||||
// Kaiser window mipmap generator main functions
|
||||
//
|
||||
|
||||
// Generates mipmaps. Angle 'x' in radians.
|
||||
static inline __attribute__((always_inline)) float MATH_Kaiser_Window_Rad(float x, float alpha, float stretch, float m_width)
|
||||
{
|
||||
const float sinc_value = MATH_NV_Sincf_Rad(MATH_pi * x * stretch);
|
||||
const float t = x / m_width;
|
||||
|
||||
if ((1 - t * t) >= 0)
|
||||
{
|
||||
return sinc_value * MATH_Bessel0(alpha * MATH_fsqrt(1 - t * t)) / MATH_Bessel0(alpha);
|
||||
}
|
||||
else
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
// Generates mipmaps. Angle 'x' in fsca units.
|
||||
static inline __attribute__((always_inline)) float MATH_Kaiser_Window(float x, float alpha, float stretch, float m_width)
|
||||
{
|
||||
const float sinc_value = MATH_NV_Sincf(MATH_pi * x * stretch);
|
||||
const float t = x / m_width;
|
||||
|
||||
if ((1 - t * t) >= 0)
|
||||
{
|
||||
return sinc_value * MATH_Bessel0(alpha * MATH_fsqrt(1 - t * t)) / MATH_Bessel0(alpha);
|
||||
}
|
||||
else
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
//==============================================================================
|
||||
// Miscellaneous Snippets
|
||||
//==============================================================================
|
||||
|
@ -1824,3 +2136,4 @@ static inline __attribute__((always_inline)) float slerp(float a, float b, float
|
|||
|
||||
|
||||
#endif /* __SH4_MATH_H_ */
|
||||
|
||||
|
|
|
@ -63,17 +63,17 @@ static int _calc_pvr_depth_test() {
|
|||
case GL_NEVER:
|
||||
return PVR_DEPTHCMP_NEVER;
|
||||
case GL_LESS:
|
||||
return PVR_DEPTHCMP_GREATER;
|
||||
return PVR_DEPTHCMP_LESS;
|
||||
case GL_EQUAL:
|
||||
return PVR_DEPTHCMP_EQUAL;
|
||||
case GL_LEQUAL:
|
||||
return PVR_DEPTHCMP_GEQUAL;
|
||||
return PVR_DEPTHCMP_LEQUAL;
|
||||
case GL_GREATER:
|
||||
return PVR_DEPTHCMP_LESS;
|
||||
return PVR_DEPTHCMP_GREATER;
|
||||
case GL_NOTEQUAL:
|
||||
return PVR_DEPTHCMP_NOTEQUAL;
|
||||
case GL_GEQUAL:
|
||||
return PVR_DEPTHCMP_LEQUAL;
|
||||
return PVR_DEPTHCMP_GEQUAL;
|
||||
break;
|
||||
case GL_ALWAYS:
|
||||
default:
|
||||
|
|
Loading…
Reference in New Issue
Block a user