Merge remote-tracking branch 'upstream/master'

This commit is contained in:
Dave 2023-04-08 19:20:24 +02:00
commit aa968d0487
16 changed files with 1155 additions and 718 deletions

2
.gitignore vendored
View File

@ -9,3 +9,5 @@ dc-build.sh
build/*
builddir/*
version.[c|h]
pcbuild/*
dcbuild/*

View File

@ -24,7 +24,16 @@ if(NOT PLATFORM_DREAMCAST)
set(FIND_LIBRARY_USE_LIB32_PATHS true)
set(FIND_LIBRARY_USE_LIB64_PATHS false)
else()
set(CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE} -mfsrra -ffp-contract=fast -mfsca -ffast-math")
include(CheckCCompilerFlag)
check_c_compiler_flag("-mfsrra" COMPILER_HAS_FSRRA)
check_c_compiler_flag("-mfsca" COMPILER_HAS_FSCA)
if(COMPILER_HAS_FSRRA)
set(CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE} -mfsrra")
endif()
if(COMPILER_HAS_FSCA)
set(CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE} -mfsca")
endif()
set(CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE} -ffp-contract=fast -ffast-math")
endif()
set(CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE} -O3 -fexpensive-optimizations")

246
GL/draw.c
View File

@ -13,9 +13,12 @@ GLuint ENABLED_VERTEX_ATTRIBUTES = 0;
GLuint FAST_PATH_ENABLED = GL_FALSE;
static GLubyte ACTIVE_CLIENT_TEXTURE = 0;
static const float ONE_OVER_TWO_FIVE_FIVE = 1.0f / 255.0f;
extern inline GLuint _glRecalcFastPath();
extern GLboolean AUTOSORT_ENABLED;
#define ITERATE(count) \
GLuint i = count; \
while(i--)
@ -116,8 +119,6 @@ static void _readVertexData3ui3f(const GLubyte* in, GLubyte* out) {
static void _readVertexData3ub3f(const GLubyte* input, GLubyte* out) {
const float ONE_OVER_TWO_FIVE_FIVE = 1.0f / 255.0f;
float* output = (float*) out;
output[0] = input[0] * ONE_OVER_TWO_FIVE_FIVE;
@ -138,8 +139,6 @@ static void _readVertexData2f3f(const GLubyte* in, GLubyte* out) {
}
static void _readVertexData2ub3f(const GLubyte* input, GLubyte* out) {
const float ONE_OVER_TWO_FIVE_FIVE = 1.0f / 255.0f;
float* output = (float*) out;
output[0] = input[0] * ONE_OVER_TWO_FIVE_FIVE;
@ -173,7 +172,6 @@ static void _readVertexData2ui2f(const GLubyte* in, GLubyte* out) {
}
static void _readVertexData2ub2f(const GLubyte* input, GLubyte* out) {
const float ONE_OVER_TWO_FIVE_FIVE = 1.0f / 255.0f;
float* output = (float*) out;
output[0] = input[0] * ONE_OVER_TWO_FIVE_FIVE;
@ -999,40 +997,143 @@ GL_FORCE_INLINE void divide(SubmissionTarget* target) {
}
}
GL_FORCE_INLINE void push(PolyHeader* header, GLboolean multiTextureHeader, PolyList* activePolyList, GLshort textureUnit) {
GL_FORCE_INLINE int _calc_pvr_face_culling() {
if(!_glIsCullingEnabled()) {
return GPU_CULLING_SMALL;
} else {
if(_glGetCullFace() == GL_BACK) {
return (_glGetFrontFace() == GL_CW) ? GPU_CULLING_CCW : GPU_CULLING_CW;
} else {
return (_glGetFrontFace() == GL_CCW) ? GPU_CULLING_CCW : GPU_CULLING_CW;
}
}
}
GL_FORCE_INLINE int _calc_pvr_depth_test() {
if(!_glIsDepthTestEnabled()) {
return GPU_DEPTHCMP_ALWAYS;
}
switch(_glGetDepthFunc()) {
case GL_NEVER:
return GPU_DEPTHCMP_NEVER;
case GL_LESS:
return GPU_DEPTHCMP_GREATER;
case GL_EQUAL:
return GPU_DEPTHCMP_EQUAL;
case GL_LEQUAL:
return GPU_DEPTHCMP_GEQUAL;
case GL_GREATER:
return GPU_DEPTHCMP_LESS;
case GL_NOTEQUAL:
return GPU_DEPTHCMP_NOTEQUAL;
case GL_GEQUAL:
return GPU_DEPTHCMP_LEQUAL;
break;
case GL_ALWAYS:
default:
return GPU_DEPTHCMP_ALWAYS;
}
}
GL_FORCE_INLINE int _calcPVRBlendFactor(GLenum factor) {
switch(factor) {
case GL_ZERO:
return GPU_BLEND_ZERO;
case GL_SRC_ALPHA:
return GPU_BLEND_SRCALPHA;
case GL_DST_COLOR:
return GPU_BLEND_DESTCOLOR;
case GL_DST_ALPHA:
return GPU_BLEND_DESTALPHA;
case GL_ONE_MINUS_DST_COLOR:
return GPU_BLEND_INVDESTCOLOR;
case GL_ONE_MINUS_SRC_ALPHA:
return GPU_BLEND_INVSRCALPHA;
case GL_ONE_MINUS_DST_ALPHA:
return GPU_BLEND_INVDESTALPHA;
case GL_ONE:
return GPU_BLEND_ONE;
default:
fprintf(stderr, "Invalid blend mode: %u\n", (unsigned int) factor);
return GPU_BLEND_ONE;
}
}
GL_FORCE_INLINE void _updatePVRBlend(PolyContext* context) {
if(_glIsBlendingEnabled() || _glIsAlphaTestEnabled()) {
context->gen.alpha = GPU_ALPHA_ENABLE;
} else {
context->gen.alpha = GPU_ALPHA_DISABLE;
}
context->blend.src = _calcPVRBlendFactor(_glGetBlendSourceFactor());
context->blend.dst = _calcPVRBlendFactor(_glGetBlendDestFactor());
}
GL_FORCE_INLINE void apply_poly_header(PolyHeader* header, GLboolean multiTextureHeader, PolyList* activePolyList, GLshort textureUnit) {
TRACE();
// Compile the header
PolyContext cxt = *_glGetPVRContext();
cxt.list_type = activePolyList->list_type;
PolyContext ctx;
memset(&ctx, 0, sizeof(PolyContext));
if(cxt.list_type == GPU_LIST_OP_POLY) {
/* Opaque polys are always one/zero */
cxt.blend.src = GPU_BLEND_ONE;
cxt.blend.dst = GPU_BLEND_ZERO;
} else if(cxt.list_type == GPU_LIST_PT_POLY) {
/* Punch-through polys require fixed blending and depth modes */
cxt.blend.src = GPU_BLEND_SRCALPHA;
cxt.blend.dst = GPU_BLEND_INVSRCALPHA;
cxt.depth.comparison = GPU_DEPTHCMP_LEQUAL;
} else if(cxt.list_type == GPU_LIST_TR_POLY && AUTOSORT_ENABLED) {
/* Autosort mode requires this mode for transparent polys */
cxt.depth.comparison = GPU_DEPTHCMP_GEQUAL;
ctx.list_type = activePolyList->list_type;
ctx.fmt.color = GPU_CLRFMT_ARGBPACKED;
ctx.fmt.uv = GPU_UVFMT_32BIT;
ctx.gen.color_clamp = GPU_CLRCLAMP_DISABLE;
ctx.gen.culling = _calc_pvr_face_culling();
ctx.depth.comparison = _calc_pvr_depth_test();
ctx.depth.write = _glIsDepthWriteEnabled() ? GPU_DEPTHWRITE_ENABLE : GPU_DEPTHWRITE_DISABLE;
ctx.gen.shading = (_glGetShadeModel() == GL_SMOOTH) ? GPU_SHADE_GOURAUD : GPU_SHADE_FLAT;
if(_glIsScissorTestEnabled()) {
ctx.gen.clip_mode = GPU_USERCLIP_INSIDE;
} else {
ctx.gen.clip_mode = GPU_USERCLIP_DISABLE;
}
_glUpdatePVRTextureContext(&cxt, textureUnit);
if(_glIsFogEnabled()) {
ctx.gen.fog_type = GPU_FOG_TABLE;
} else {
ctx.gen.fog_type = GPU_FOG_DISABLE;
}
_updatePVRBlend(&ctx);
if(ctx.list_type == GPU_LIST_OP_POLY) {
/* Opaque polys are always one/zero */
ctx.blend.src = GPU_BLEND_ONE;
ctx.blend.dst = GPU_BLEND_ZERO;
} else if(ctx.list_type == GPU_LIST_PT_POLY) {
/* Punch-through polys require fixed blending and depth modes */
ctx.blend.src = GPU_BLEND_SRCALPHA;
ctx.blend.dst = GPU_BLEND_INVSRCALPHA;
ctx.depth.comparison = GPU_DEPTHCMP_LEQUAL;
} else if(ctx.list_type == GPU_LIST_TR_POLY && AUTOSORT_ENABLED) {
/* Autosort mode requires this mode for transparent polys */
ctx.depth.comparison = GPU_DEPTHCMP_GEQUAL;
}
_glUpdatePVRTextureContext(&ctx, textureUnit);
if(multiTextureHeader) {
gl_assert(cxt.list_type == GPU_LIST_TR_POLY);
gl_assert(ctx.list_type == GPU_LIST_TR_POLY);
cxt.gen.alpha = GPU_ALPHA_ENABLE;
cxt.txr.alpha = GPU_TXRALPHA_ENABLE;
cxt.blend.src = GPU_BLEND_ZERO;
cxt.blend.dst = GPU_BLEND_DESTCOLOR;
cxt.depth.comparison = GPU_DEPTHCMP_EQUAL;
ctx.gen.alpha = GPU_ALPHA_ENABLE;
ctx.txr.alpha = GPU_TXRALPHA_ENABLE;
ctx.blend.src = GPU_BLEND_ZERO;
ctx.blend.dst = GPU_BLEND_DESTCOLOR;
ctx.depth.comparison = GPU_DEPTHCMP_EQUAL;
}
CompilePolyHeader(header, &cxt);
CompilePolyHeader(header, &ctx);
/* Force bits 18 and 19 on to switch to 6 triangle strips */
header->cmd |= 0xC0000;
/* Post-process the vertex list */
/*
@ -1109,13 +1210,16 @@ GL_FORCE_INLINE void submitVertices(GLenum mode, GLsizei first, GLuint count, GL
return;
}
GLboolean header_required = (target->output->vector.size == 0) || _glGPUStateIsDirty();
// We don't handle this any further, so just make sure we never pass it down */
gl_assert(mode != GL_POLYGON);
target->output = _glActivePolyList();
target->count = (mode == GL_TRIANGLE_FAN) ? ((count - 2) * 3) : count;
target->header_offset = target->output->vector.size;
target->start_offset = target->header_offset + 1;
target->start_offset = target->header_offset + (header_required);
gl_assert(target->count);
@ -1123,7 +1227,12 @@ GL_FORCE_INLINE void submitVertices(GLenum mode, GLsizei first, GLuint count, GL
aligned_vector_resize(extras, target->count);
/* Make room for the vertices and header */
aligned_vector_extend(&target->output->vector, target->count + 1);
aligned_vector_extend(&target->output->vector, target->count + (header_required));
if(header_required) {
apply_poly_header(_glSubmissionTargetHeader(target), GL_FALSE, target->output, 0);
_glGPUStateMarkClean();
}
/* If we're lighting, then we need to do some work in
* eye-space, so we only transform vertices by the modelview
@ -1132,7 +1241,7 @@ GL_FORCE_INLINE void submitVertices(GLenum mode, GLsizei first, GLuint count, GL
* If we're not doing lighting though we can optimise by taking
* vertices straight to clip-space */
if(LIGHTING_ENABLED) {
if(_glIsLightingEnabled()) {
_glMatrixLoadModelView();
} else {
_glMatrixLoadModelViewProjection();
@ -1147,7 +1256,7 @@ GL_FORCE_INLINE void submitVertices(GLenum mode, GLsizei first, GLuint count, GL
transform(target);
}
if(LIGHTING_ENABLED){
if(_glIsLightingEnabled()){
light(target);
/* OK eye-space work done, now move into clip space */
@ -1155,51 +1264,48 @@ GL_FORCE_INLINE void submitVertices(GLenum mode, GLsizei first, GLuint count, GL
transform(target);
}
push(_glSubmissionTargetHeader(target), GL_FALSE, target->output, 0);
// /*
// Now, if multitexturing is enabled, we want to send exactly the same vertices again, except:
// - We want to enable blending, and send them to the TR list
// - We want to set the depth func to GL_EQUAL
// - We want to set the second texture ID
// - We want to set the uv coordinates to the passed st ones
// */
/*
Now, if multitexturing is enabled, we want to send exactly the same vertices again, except:
- We want to enable blending, and send them to the TR list
- We want to set the depth func to GL_EQUAL
- We want to set the second texture ID
- We want to set the uv coordinates to the passed st ones
*/
// if(!TEXTURES_ENABLED[1]) {
// /* Multitexture actively disabled */
// return;
// }
if(!TEXTURES_ENABLED[1]) {
/* Multitexture actively disabled */
return;
}
// TextureObject* texture1 = _glGetTexture1();
TextureObject* texture1 = _glGetTexture1();
// /* Multitexture implicitly disabled */
// if(!texture1 || ((ENABLED_VERTEX_ATTRIBUTES & ST_ENABLED_FLAG) != ST_ENABLED_FLAG)) {
// /* Multitexture actively disabled */
// return;
// }
/* Multitexture implicitly disabled */
if(!texture1 || ((ENABLED_VERTEX_ATTRIBUTES & ST_ENABLED_FLAG) != ST_ENABLED_FLAG)) {
/* Multitexture actively disabled */
return;
}
// /* Push back a copy of the list to the transparent poly list, including the header
// (hence the + 1)
// */
// Vertex* vertex = aligned_vector_push_back(
// &_glTransparentPolyList()->vector, (Vertex*) _glSubmissionTargetHeader(target), target->count + 1
// );
/* Push back a copy of the list to the transparent poly list, including the header
(hence the + 1)
*/
Vertex* vertex = aligned_vector_push_back(
&_glTransparentPolyList()->vector, (Vertex*) _glSubmissionTargetHeader(target), target->count + 1
);
// gl_assert(vertex);
gl_assert(vertex);
// PolyHeader* mtHeader = (PolyHeader*) vertex++;
// /* Send the buffer again to the transparent list */
// apply_poly_header(mtHeader, GL_TRUE, _glTransparentPolyList(), 1);
PolyHeader* mtHeader = (PolyHeader*) vertex++;
/* Replace the UV coordinates with the ST ones */
VertexExtra* ve = aligned_vector_at(target->extras, 0);
ITERATE(target->count) {
vertex->uv[0] = ve->st[0];
vertex->uv[1] = ve->st[1];
++vertex;
++ve;
}
/* Send the buffer again to the transparent list */
push(mtHeader, GL_TRUE, _glTransparentPolyList(), 1);
// /* Replace the UV coordinates with the ST ones */
// VertexExtra* ve = aligned_vector_at(target->extras, 0);
// ITERATE(target->count) {
// vertex->uv[0] = ve->st[0];
// vertex->uv[1] = ve->st[1];
// ++vertex;
// ++ve;
// }
}
void APIENTRY glDrawElements(GLenum mode, GLsizei count, GLenum type, const GLvoid* indices) {

View File

@ -12,126 +12,107 @@
* multiplier ends up less than this value */
#define ATTENUATION_THRESHOLD 100.0f
static GLfloat SCENE_AMBIENT [] = {0.2f, 0.2f, 0.2f, 1.0f};
static GLboolean VIEWER_IN_EYE_COORDINATES = GL_TRUE;
static GLenum COLOR_CONTROL = GL_SINGLE_COLOR;
static GLenum COLOR_MATERIAL_MODE = GL_AMBIENT_AND_DIFFUSE;
#define AMBIENT_MASK 1
#define DIFFUSE_MASK 2
#define EMISSION_MASK 4
#define SPECULAR_MASK 8
#define SCENE_AMBIENT_MASK 16
static GLenum COLOR_MATERIAL_MASK = AMBIENT_MASK | DIFFUSE_MASK;
static LightSource LIGHTS[MAX_GLDC_LIGHTS];
static GLuint ENABLED_LIGHT_COUNT = 0;
static Material MATERIAL;
GL_FORCE_INLINE void _glPrecalcLightingValues(GLuint mask);
static void recalcEnabledLights() {
GLubyte i;
ENABLED_LIGHT_COUNT = 0;
for(i = 0; i < MAX_GLDC_LIGHTS; ++i) {
if(LIGHTS[i].isEnabled) {
ENABLED_LIGHT_COUNT++;
}
}
}
void _glInitLights() {
static GLfloat ONE [] = {1.0f, 1.0f, 1.0f, 1.0f};
static GLfloat ZERO [] = {0.0f, 0.0f, 0.0f, 1.0f};
static GLfloat PARTIAL [] = {0.2f, 0.2f, 0.2f, 1.0f};
static GLfloat MOSTLY [] = {0.8f, 0.8f, 0.8f, 1.0f};
memcpy(MATERIAL.ambient, PARTIAL, sizeof(GLfloat) * 4);
memcpy(MATERIAL.diffuse, MOSTLY, sizeof(GLfloat) * 4);
memcpy(MATERIAL.specular, ZERO, sizeof(GLfloat) * 4);
memcpy(MATERIAL.emissive, ZERO, sizeof(GLfloat) * 4);
MATERIAL.exponent = 0.0f;
GLubyte i;
for(i = 0; i < MAX_GLDC_LIGHTS; ++i) {
memcpy(LIGHTS[i].ambient, ZERO, sizeof(GLfloat) * 4);
memcpy(LIGHTS[i].diffuse, ONE, sizeof(GLfloat) * 4);
memcpy(LIGHTS[i].specular, ONE, sizeof(GLfloat) * 4);
if(i > 0) {
memcpy(LIGHTS[i].diffuse, ZERO, sizeof(GLfloat) * 4);
memcpy(LIGHTS[i].specular, ZERO, sizeof(GLfloat) * 4);
}
LIGHTS[i].position[0] = LIGHTS[i].position[1] = LIGHTS[i].position[3] = 0.0f;
LIGHTS[i].position[2] = 1.0f;
LIGHTS[i].isDirectional = GL_TRUE;
LIGHTS[i].isEnabled = GL_FALSE;
LIGHTS[i].spot_direction[0] = LIGHTS[i].spot_direction[1] = 0.0f;
LIGHTS[i].spot_direction[2] = -1.0f;
LIGHTS[i].spot_exponent = 0.0f;
LIGHTS[i].spot_cutoff = 180.0f;
LIGHTS[i].constant_attenuation = 1.0f;
LIGHTS[i].linear_attenuation = 0.0f;
LIGHTS[i].quadratic_attenuation = 0.0f;
}
_glPrecalcLightingValues(~0);
recalcEnabledLights();
}
void _glEnableLight(GLubyte light, GLboolean value) {
LIGHTS[light].isEnabled = value;
recalcEnabledLights();
}
GL_FORCE_INLINE void _glPrecalcLightingValues(GLuint mask) {
void _glPrecalcLightingValues(GLuint mask) {
/* Pre-calculate lighting values */
GLshort i;
Material* material = _glActiveMaterial();
if(mask & AMBIENT_MASK) {
for(i = 0; i < MAX_GLDC_LIGHTS; ++i) {
LIGHTS[i].ambientMaterial[0] = LIGHTS[i].ambient[0] * MATERIAL.ambient[0];
LIGHTS[i].ambientMaterial[1] = LIGHTS[i].ambient[1] * MATERIAL.ambient[1];
LIGHTS[i].ambientMaterial[2] = LIGHTS[i].ambient[2] * MATERIAL.ambient[2];
LIGHTS[i].ambientMaterial[3] = LIGHTS[i].ambient[3] * MATERIAL.ambient[3];
LightSource* light = _glLightAt(i);
light->ambientMaterial[0] = light->ambient[0] * material->ambient[0];
light->ambientMaterial[1] = light->ambient[1] * material->ambient[1];
light->ambientMaterial[2] = light->ambient[2] * material->ambient[2];
light->ambientMaterial[3] = light->ambient[3] * material->ambient[3];
}
}
if(mask & DIFFUSE_MASK) {
for(i = 0; i < MAX_GLDC_LIGHTS; ++i) {
LIGHTS[i].diffuseMaterial[0] = LIGHTS[i].diffuse[0] * MATERIAL.diffuse[0];
LIGHTS[i].diffuseMaterial[1] = LIGHTS[i].diffuse[1] * MATERIAL.diffuse[1];
LIGHTS[i].diffuseMaterial[2] = LIGHTS[i].diffuse[2] * MATERIAL.diffuse[2];
LIGHTS[i].diffuseMaterial[3] = LIGHTS[i].diffuse[3] * MATERIAL.diffuse[3];
LightSource* light = _glLightAt(i);
light->diffuseMaterial[0] = light->diffuse[0] * material->diffuse[0];
light->diffuseMaterial[1] = light->diffuse[1] * material->diffuse[1];
light->diffuseMaterial[2] = light->diffuse[2] * material->diffuse[2];
light->diffuseMaterial[3] = light->diffuse[3] * material->diffuse[3];
}
}
if(mask & SPECULAR_MASK) {
for(i = 0; i < MAX_GLDC_LIGHTS; ++i) {
LIGHTS[i].specularMaterial[0] = LIGHTS[i].specular[0] * MATERIAL.specular[0];
LIGHTS[i].specularMaterial[1] = LIGHTS[i].specular[1] * MATERIAL.specular[1];
LIGHTS[i].specularMaterial[2] = LIGHTS[i].specular[2] * MATERIAL.specular[2];
LIGHTS[i].specularMaterial[3] = LIGHTS[i].specular[3] * MATERIAL.specular[3];
LightSource* light = _glLightAt(i);
light->specularMaterial[0] = light->specular[0] * material->specular[0];
light->specularMaterial[1] = light->specular[1] * material->specular[1];
light->specularMaterial[2] = light->specular[2] * material->specular[2];
light->specularMaterial[3] = light->specular[3] * material->specular[3];
}
}
/* If ambient or emission are updated, we need to update
* the base colour. */
if((mask & AMBIENT_MASK) || (mask & EMISSION_MASK) || (mask & SCENE_AMBIENT_MASK)) {
MATERIAL.baseColour[0] = MATH_fmac(SCENE_AMBIENT[0], MATERIAL.ambient[0], MATERIAL.emissive[0]);
MATERIAL.baseColour[1] = MATH_fmac(SCENE_AMBIENT[1], MATERIAL.ambient[1], MATERIAL.emissive[1]);
MATERIAL.baseColour[2] = MATH_fmac(SCENE_AMBIENT[2], MATERIAL.ambient[2], MATERIAL.emissive[2]);
MATERIAL.baseColour[3] = MATH_fmac(SCENE_AMBIENT[3], MATERIAL.ambient[3], MATERIAL.emissive[3]);
GLfloat* scene_ambient = _glLightModelSceneAmbient();
material->baseColour[0] = MATH_fmac(scene_ambient[0], material->ambient[0], material->emissive[0]);
material->baseColour[1] = MATH_fmac(scene_ambient[1], material->ambient[1], material->emissive[1]);
material->baseColour[2] = MATH_fmac(scene_ambient[2], material->ambient[2], material->emissive[2]);
material->baseColour[3] = MATH_fmac(scene_ambient[3], material->ambient[3], material->emissive[3]);
}
}
void _glInitLights() {
Material* material = _glActiveMaterial();
static GLfloat ONE [] = {1.0f, 1.0f, 1.0f, 1.0f};
static GLfloat ZERO [] = {0.0f, 0.0f, 0.0f, 1.0f};
static GLfloat PARTIAL [] = {0.2f, 0.2f, 0.2f, 1.0f};
static GLfloat MOSTLY [] = {0.8f, 0.8f, 0.8f, 1.0f};
memcpy(material->ambient, PARTIAL, sizeof(GLfloat) * 4);
memcpy(material->diffuse, MOSTLY, sizeof(GLfloat) * 4);
memcpy(material->specular, ZERO, sizeof(GLfloat) * 4);
memcpy(material->emissive, ZERO, sizeof(GLfloat) * 4);
material->exponent = 0.0f;
GLubyte i;
for(i = 0; i < MAX_GLDC_LIGHTS; ++i) {
LightSource* light = _glLightAt(i);
memcpy(light->ambient, ZERO, sizeof(GLfloat) * 4);
memcpy(light->diffuse, ONE, sizeof(GLfloat) * 4);
memcpy(light->specular, ONE, sizeof(GLfloat) * 4);
if(i > 0) {
memcpy(light->diffuse, ZERO, sizeof(GLfloat) * 4);
memcpy(light->specular, ZERO, sizeof(GLfloat) * 4);
}
light->position[0] = light->position[1] = light->position[3] = 0.0f;
light->position[2] = 1.0f;
light->isDirectional = GL_TRUE;
light->isEnabled = GL_FALSE;
light->spot_direction[0] = light->spot_direction[1] = 0.0f;
light->spot_direction[2] = -1.0f;
light->spot_exponent = 0.0f;
light->spot_cutoff = 180.0f;
light->constant_attenuation = 1.0f;
light->linear_attenuation = 0.0f;
light->quadratic_attenuation = 0.0f;
}
_glPrecalcLightingValues(~0);
_glRecalcEnabledLights();
}
void APIENTRY glLightModelf(GLenum pname, const GLfloat param) {
glLightModelfv(pname, &param);
}
@ -143,11 +124,13 @@ void APIENTRY glLightModeli(GLenum pname, const GLint param) {
void APIENTRY glLightModelfv(GLenum pname, const GLfloat *params) {
switch(pname) {
case GL_LIGHT_MODEL_AMBIENT: {
for(int i = 0; i < 4; ++i) SCENE_AMBIENT[i] = params[i];
_glPrecalcLightingValues(SCENE_AMBIENT_MASK);
if(memcmp(_glGetLightModelSceneAmbient(), params, sizeof(float) * 4) != 0) {
_glSetLightModelSceneAmbient(params);
_glPrecalcLightingValues(SCENE_AMBIENT_MASK);
}
} break;
case GL_LIGHT_MODEL_LOCAL_VIEWER:
VIEWER_IN_EYE_COORDINATES = (*params) ? GL_TRUE : GL_FALSE;
_glSetLightModelViewerInEyeCoordinates((*params) ? GL_TRUE : GL_FALSE);
break;
case GL_LIGHT_MODEL_TWO_SIDE:
/* Not implemented */
@ -159,10 +142,10 @@ void APIENTRY glLightModelfv(GLenum pname, const GLfloat *params) {
void APIENTRY glLightModeliv(GLenum pname, const GLint* params) {
switch(pname) {
case GL_LIGHT_MODEL_COLOR_CONTROL:
COLOR_CONTROL = *params;
_glSetLightModelColorControl(*params);
break;
case GL_LIGHT_MODEL_LOCAL_VIEWER:
VIEWER_IN_EYE_COORDINATES = (*params) ? GL_TRUE : GL_FALSE;
_glSetLightModelViewerInEyeCoordinates((*params) ? GL_TRUE : GL_FALSE);
break;
default:
_glKosThrowError(GL_INVALID_ENUM, __func__);
@ -173,6 +156,7 @@ void APIENTRY glLightfv(GLenum light, GLenum pname, const GLfloat *params) {
GLubyte idx = light & 0xF;
if(idx >= MAX_GLDC_LIGHTS) {
_glKosThrowError(GL_INVALID_VALUE, __func__);
return;
}
@ -180,33 +164,46 @@ void APIENTRY glLightfv(GLenum light, GLenum pname, const GLfloat *params) {
(pname == GL_DIFFUSE) ? DIFFUSE_MASK :
(pname == GL_SPECULAR) ? SPECULAR_MASK : 0;
LightSource* l = _glLightAt(idx);
GLboolean rebuild = GL_FALSE;
switch(pname) {
case GL_AMBIENT:
memcpy(LIGHTS[idx].ambient, params, sizeof(GLfloat) * 4);
rebuild = memcmp(l->ambient, params, sizeof(GLfloat) * 4) != 0;
if(rebuild) {
memcpy(l->ambient, params, sizeof(GLfloat) * 4);
}
break;
case GL_DIFFUSE:
memcpy(LIGHTS[idx].diffuse, params, sizeof(GLfloat) * 4);
rebuild = memcmp(l->diffuse, params, sizeof(GLfloat) * 4) != 0;
if(rebuild) {
memcpy(l->diffuse, params, sizeof(GLfloat) * 4);
}
break;
case GL_SPECULAR:
memcpy(LIGHTS[idx].specular, params, sizeof(GLfloat) * 4);
rebuild = memcmp(l->specular, params, sizeof(GLfloat) * 4) != 0;
if(rebuild) {
memcpy(l->specular, params, sizeof(GLfloat) * 4);
}
break;
case GL_POSITION: {
_glMatrixLoadModelView();
memcpy(LIGHTS[idx].position, params, sizeof(GLfloat) * 4);
memcpy(l->position, params, sizeof(GLfloat) * 4);
LIGHTS[idx].isDirectional = params[3] == 0.0f;
l->isDirectional = params[3] == 0.0f;
if(LIGHTS[idx].isDirectional) {
if(l->isDirectional) {
//FIXME: Do we need to rotate directional lights?
} else {
TransformVec3(LIGHTS[idx].position);
_glMatrixLoadModelView();
TransformVec3(l->position);
}
}
break;
case GL_SPOT_DIRECTION: {
LIGHTS[idx].spot_direction[0] = params[0];
LIGHTS[idx].spot_direction[1] = params[1];
LIGHTS[idx].spot_direction[2] = params[2];
l->spot_direction[0] = params[0];
l->spot_direction[1] = params[1];
l->spot_direction[2] = params[2];
} break;
case GL_CONSTANT_ATTENUATION:
case GL_LINEAR_ATTENUATION:
@ -220,31 +217,36 @@ void APIENTRY glLightfv(GLenum light, GLenum pname, const GLfloat *params) {
return;
}
_glPrecalcLightingValues(mask);
if(rebuild) {
_glPrecalcLightingValues(mask);
}
}
void APIENTRY glLightf(GLenum light, GLenum pname, GLfloat param) {
GLubyte idx = light & 0xF;
if(idx >= MAX_GLDC_LIGHTS) {
_glKosThrowError(GL_INVALID_VALUE, __func__);
return;
}
LightSource* l = _glLightAt(idx);
switch(pname) {
case GL_CONSTANT_ATTENUATION:
LIGHTS[idx].constant_attenuation = param;
l->constant_attenuation = param;
break;
case GL_LINEAR_ATTENUATION:
LIGHTS[idx].linear_attenuation = param;
l->linear_attenuation = param;
break;
case GL_QUADRATIC_ATTENUATION:
LIGHTS[idx].quadratic_attenuation = param;
l->quadratic_attenuation = param;
break;
case GL_SPOT_EXPONENT:
LIGHTS[idx].spot_exponent = param;
l->spot_exponent = param;
break;
case GL_SPOT_CUTOFF:
LIGHTS[idx].spot_cutoff = param;
l->spot_cutoff = param;
break;
default:
_glKosThrowError(GL_INVALID_ENUM, __func__);
@ -257,7 +259,7 @@ void APIENTRY glMaterialf(GLenum face, GLenum pname, const GLfloat param) {
return;
}
MATERIAL.exponent = _MIN(param, 128); /* 128 is the max according to the GL spec */
_glActiveMaterial()->exponent = _MIN(param, 128); /* 128 is the max according to the GL spec */
}
void APIENTRY glMateriali(GLenum face, GLenum pname, const GLint param) {
@ -270,25 +272,49 @@ void APIENTRY glMaterialfv(GLenum face, GLenum pname, const GLfloat *params) {
return;
}
Material* material = _glActiveMaterial();
GLboolean rebuild = GL_FALSE;
switch(pname) {
case GL_SHININESS:
glMaterialf(face, pname, *params);
rebuild = GL_TRUE;
break;
case GL_AMBIENT:
vec4cpy(MATERIAL.ambient, params);
break;
case GL_AMBIENT: {
if(memcmp(material->ambient, params, sizeof(float) * 4) != 0) {
vec4cpy(material->ambient, params);
rebuild = GL_TRUE;
}
} break;
case GL_DIFFUSE:
vec4cpy(MATERIAL.diffuse, params);
if(memcmp(material->diffuse, params, sizeof(float) * 4) != 0) {
vec4cpy(material->diffuse, params);
rebuild = GL_TRUE;
}
break;
case GL_SPECULAR:
vec4cpy(MATERIAL.specular, params);
if(memcmp(material->specular, params, sizeof(float) * 4) != 0) {
vec4cpy(material->specular, params);
rebuild = GL_TRUE;
}
break;
case GL_EMISSION:
vec4cpy(MATERIAL.emissive, params);
if(memcmp(material->emissive, params, sizeof(float) * 4) != 0) {
vec4cpy(material->emissive, params);
rebuild = GL_TRUE;
}
break;
case GL_AMBIENT_AND_DIFFUSE: {
vec4cpy(MATERIAL.ambient, params);
vec4cpy(MATERIAL.diffuse, params);
rebuild = (
memcmp(material->ambient, params, sizeof(float) * 4) != 0 ||
memcmp(material->diffuse, params, sizeof(float) * 4) != 0
);
if(rebuild) {
vec4cpy(material->ambient, params);
vec4cpy(material->diffuse, params);
}
} break;
case GL_COLOR_INDEXES:
default: {
@ -297,13 +323,15 @@ void APIENTRY glMaterialfv(GLenum face, GLenum pname, const GLfloat *params) {
}
}
GLuint updateMask = (pname == GL_AMBIENT) ? AMBIENT_MASK:
(pname == GL_DIFFUSE) ? DIFFUSE_MASK:
(pname == GL_SPECULAR) ? SPECULAR_MASK:
(pname == GL_EMISSION) ? EMISSION_MASK:
(pname == GL_AMBIENT_AND_DIFFUSE) ? AMBIENT_MASK | DIFFUSE_MASK : 0;
if(rebuild) {
GLuint updateMask = (pname == GL_AMBIENT) ? AMBIENT_MASK:
(pname == GL_DIFFUSE) ? DIFFUSE_MASK:
(pname == GL_SPECULAR) ? SPECULAR_MASK:
(pname == GL_EMISSION) ? EMISSION_MASK:
(pname == GL_AMBIENT_AND_DIFFUSE) ? AMBIENT_MASK | DIFFUSE_MASK : 0;
_glPrecalcLightingValues(updateMask);
_glPrecalcLightingValues(updateMask);
}
}
void APIENTRY glColorMaterial(GLenum face, GLenum mode) {
@ -318,12 +346,13 @@ void APIENTRY glColorMaterial(GLenum face, GLenum mode) {
return;
}
COLOR_MATERIAL_MASK = (mode == GL_AMBIENT) ? AMBIENT_MASK:
GLenum mask = (mode == GL_AMBIENT) ? AMBIENT_MASK:
(mode == GL_DIFFUSE) ? DIFFUSE_MASK:
(mode == GL_AMBIENT_AND_DIFFUSE) ? AMBIENT_MASK | DIFFUSE_MASK:
(mode == GL_EMISSION) ? EMISSION_MASK : SPECULAR_MASK;
COLOR_MATERIAL_MODE = mode;
_glSetColorMaterialMask(mask);
_glSetColorMaterialMode(mode);
}
GL_FORCE_INLINE void bgra_to_float(const uint8_t* input, GLfloat* output) {
@ -336,44 +365,68 @@ GL_FORCE_INLINE void bgra_to_float(const uint8_t* input, GLfloat* output) {
}
void _glUpdateColourMaterialA(const GLubyte* argb) {
Material* material = _glActiveMaterial();
float colour[4];
bgra_to_float(argb, colour);
vec4cpy(MATERIAL.ambient, colour);
_glPrecalcLightingValues(COLOR_MATERIAL_MASK);
vec4cpy(material->ambient, colour);
GLenum mask = _glColorMaterialMode();
_glPrecalcLightingValues(mask);
}
void _glUpdateColourMaterialD(const GLubyte* argb) {
Material* material = _glActiveMaterial();
float colour[4];
bgra_to_float(argb, colour);
vec4cpy(MATERIAL.diffuse, colour);
_glPrecalcLightingValues(COLOR_MATERIAL_MASK);
vec4cpy(material->diffuse, colour);
GLenum mask = _glColorMaterialMode();
_glPrecalcLightingValues(mask);
}
void _glUpdateColourMaterialE(const GLubyte* argb) {
Material* material = _glActiveMaterial();
float colour[4];
bgra_to_float(argb, colour);
vec4cpy(MATERIAL.emissive, colour);
_glPrecalcLightingValues(COLOR_MATERIAL_MASK);
vec4cpy(material->emissive, colour);
GLenum mask = _glColorMaterialMode();
_glPrecalcLightingValues(mask);
}
void _glUpdateColourMaterialAD(const GLubyte* argb) {
Material* material = _glActiveMaterial();
float colour[4];
bgra_to_float(argb, colour);
vec4cpy(MATERIAL.ambient, colour);
vec4cpy(MATERIAL.diffuse, colour);
_glPrecalcLightingValues(COLOR_MATERIAL_MASK);
vec4cpy(material->ambient, colour);
vec4cpy(material->diffuse, colour);
GLenum mask = _glColorMaterialMode();
_glPrecalcLightingValues(mask);
}
GL_FORCE_INLINE GLboolean isDiffuseColorMaterial() {
return (COLOR_MATERIAL_MODE == GL_DIFFUSE || COLOR_MATERIAL_MODE == GL_AMBIENT_AND_DIFFUSE);
GLenum mode = _glColorMaterialMode();
return (
mode == GL_DIFFUSE ||
mode == GL_AMBIENT_AND_DIFFUSE
);
}
GL_FORCE_INLINE GLboolean isAmbientColorMaterial() {
return (COLOR_MATERIAL_MODE == GL_AMBIENT || COLOR_MATERIAL_MODE == GL_AMBIENT_AND_DIFFUSE);
GLenum mode = _glColorMaterialMode();
return (
mode == GL_AMBIENT ||
mode == GL_AMBIENT_AND_DIFFUSE
);
}
GL_FORCE_INLINE GLboolean isSpecularColorMaterial() {
return (COLOR_MATERIAL_MODE == GL_SPECULAR);
GLenum mode = _glColorMaterialMode();
return (mode == GL_SPECULAR);
}
/*
@ -408,12 +461,15 @@ GL_FORCE_INLINE void _glLightVertexDirectional(
float* final, uint8_t lid,
float LdotN, float NdotH) {
float FI = (MATERIAL.exponent) ?
faster_pow((LdotN != 0.0f) * NdotH, MATERIAL.exponent) : 1.0f;
Material* material = _glActiveMaterial();
LightSource* light = _glLightAt(lid);
float FI = (material->exponent) ?
faster_pow((LdotN != 0.0f) * NdotH, material->exponent) : 1.0f;
#define _PROCESS_COMPONENT(X) \
final[X] += (LdotN * LIGHTS[lid].diffuseMaterial[X] + LIGHTS[lid].ambientMaterial[X]) \
+ (FI * LIGHTS[lid].specularMaterial[X]); \
final[X] += (LdotN * light->diffuseMaterial[X] + light->ambientMaterial[X]) \
+ (FI * light->specularMaterial[X]); \
_PROCESS_COMPONENT(0);
_PROCESS_COMPONENT(1);
@ -426,12 +482,15 @@ GL_FORCE_INLINE void _glLightVertexPoint(
float* final, uint8_t lid,
float LdotN, float NdotH, float att) {
float FI = (MATERIAL.exponent) ?
faster_pow((LdotN != 0.0f) * NdotH, MATERIAL.exponent) : 1.0f;
Material* material = _glActiveMaterial();
LightSource* light = _glLightAt(lid);
float FI = (material->exponent) ?
faster_pow((LdotN != 0.0f) * NdotH, material->exponent) : 1.0f;
#define _PROCESS_COMPONENT(X) \
final[X] += ((LdotN * LIGHTS[lid].diffuseMaterial[X] + LIGHTS[lid].ambientMaterial[X]) \
+ (FI * LIGHTS[lid].specularMaterial[X])) * att; \
final[X] += ((LdotN * light->diffuseMaterial[X] + light->ambientMaterial[X]) \
+ (FI * light->specularMaterial[X])) * att; \
_PROCESS_COMPONENT(0);
_PROCESS_COMPONENT(1);
@ -444,6 +503,8 @@ void _glPerformLighting(Vertex* vertices, EyeSpaceData* es, const uint32_t count
GLubyte i;
GLuint j;
Material* material = _glActiveMaterial();
Vertex* vertex = vertices;
EyeSpaceData* data = es;
@ -451,7 +512,8 @@ void _glPerformLighting(Vertex* vertices, EyeSpaceData* es, const uint32_t count
void (*updateColourMaterial)(const GLubyte*) = NULL;
if(_glIsColorMaterialEnabled()) {
switch(COLOR_MATERIAL_MODE) {
GLenum mode = _glColorMaterialMode();
switch(mode) {
case GL_AMBIENT:
updateColourMaterial = _glUpdateColourMaterialA;
break;
@ -474,10 +536,10 @@ void _glPerformLighting(Vertex* vertices, EyeSpaceData* es, const uint32_t count
}
/* Copy the base colour across */
vec4cpy(data->finalColour, MATERIAL.baseColour);
vec4cpy(data->finalColour, material->baseColour);
}
if(!ENABLED_LIGHT_COUNT) {
if(!_glEnabledLightCount()) {
return;
}
@ -495,15 +557,17 @@ void _glPerformLighting(Vertex* vertices, EyeSpaceData* es, const uint32_t count
const float Nz = data->n[2];
for(i = 0; i < MAX_GLDC_LIGHTS; ++i) {
if(!LIGHTS[i].isEnabled) {
LightSource* light = _glLightAt(i);
if(!light->isEnabled) {
continue;
}
float Lx = LIGHTS[i].position[0] - vertex->xyz[0];
float Ly = LIGHTS[i].position[1] - vertex->xyz[1];
float Lz = LIGHTS[i].position[2] - vertex->xyz[2];
float Lx = light->position[0] - vertex->xyz[0];
float Ly = light->position[1] - vertex->xyz[1];
float Lz = light->position[2] - vertex->xyz[2];
if(LIGHTS[i].isDirectional) {
if(light->isDirectional) {
float Hx = (Lx + 0);
float Hy = (Ly + 0);
float Hz = (Lz + 1);
@ -532,9 +596,9 @@ void _glPerformLighting(Vertex* vertices, EyeSpaceData* es, const uint32_t count
VEC3_LENGTH(Lx, Ly, Lz, D);
float att = (
LIGHTS[i].constant_attenuation + (
LIGHTS[i].linear_attenuation * D
) + (LIGHTS[i].quadratic_attenuation * D * D)
light->constant_attenuation + (
light->linear_attenuation * D
) + (light->quadratic_attenuation * D * D)
);
/* Anything over the attenuation threshold will

View File

@ -47,12 +47,10 @@ void SceneListBegin(GPUList list) {
pvr_list_begin(list);
}
__attribute__((optimize("O3", "fast-math")))
GL_FORCE_INLINE float _glFastInvert(float x) {
return (1.f / __builtin_sqrtf(x * x));
}
__attribute__((optimize("O3", "fast-math")))
GL_FORCE_INLINE void _glPerspectiveDivideVertex(Vertex* vertex, const float h) {
const float f = _glFastInvert(vertex->w);
@ -74,9 +72,7 @@ GL_FORCE_INLINE void _glPerspectiveDivideVertex(Vertex* vertex, const float h) {
vertex->xyz[2] = (vertex->w == 1.0f) ? _glFastInvert(1.0001f + vertex->xyz[2]) : f;
}
static uint32_t *d; // SQ target
GL_FORCE_INLINE void _glSubmitHeaderOrVertex(const Vertex* v) {
GL_FORCE_INLINE void _glSubmitHeaderOrVertex(uint32_t* d, const Vertex* v) {
#ifndef NDEBUG
gl_assert(!isnan(v->xyz[2]));
gl_assert(!isnan(v->w));
@ -100,7 +96,7 @@ GL_FORCE_INLINE void _glSubmitHeaderOrVertex(const Vertex* v) {
d += 8;
}
static struct {
static struct __attribute__((aligned(32))) {
Vertex* v;
int visible;
} triangle[3];
@ -108,41 +104,36 @@ static struct {
static int tri_count = 0;
static int strip_count = 0;
GL_FORCE_INLINE void interpolateColour(const uint8_t* v1, const uint8_t* v2, const float t, uint8_t* out) {
const int MASK1 = 0x00FF00FF;
const int MASK2 = 0xFF00FF00;
GL_FORCE_INLINE void interpolateColour(const uint32_t* a, const uint32_t* b, const float t, uint32_t* out) {
const static uint32_t MASK1 = 0x00FF00FF;
const static uint32_t MASK2 = 0xFF00FF00;
const int f2 = 256 * t;
const int f1 = 256 - f2;
const uint32_t f2 = 256 * t;
const uint32_t f1 = 256 - f2;
const uint32_t a = *(uint32_t*) v1;
const uint32_t b = *(uint32_t*) v2;
*((uint32_t*) out) = (((((a & MASK1) * f1) + ((b & MASK1) * f2)) >> 8) & MASK1) |
(((((a & MASK2) * f1) + ((b & MASK2) * f2)) >> 8) & MASK2);
*out = (((((*a & MASK1) * f1) + ((*b & MASK1) * f2)) >> 8) & MASK1) |
(((((*a & MASK2) * f1) + ((*b & MASK2) * f2)) >> 8) & MASK2);
}
GL_FORCE_INLINE void _glClipEdge(const Vertex* v1, const Vertex* v2, Vertex* vout) {
static inline void _glClipEdge(const Vertex* v1, const Vertex* v2, Vertex* vout) {
/* Clipping time! */
const float d0 = v1->w + v1->xyz[2];
const float d1 = v2->w + v2->xyz[2];
const float sign = ((2.0f * (d1 < d0)) - 1.0f);
const float epsilon = -0.00001f * sign;
const float n = (d0 - d1);
const float r = (1.f / sqrtf(n * n)) * sign;
float t = fmaf(r, d0, epsilon);
const float epsilon = (d0 < d1) ? -0.00001f : 0.00001f;
vout->xyz[0] = fmaf(v2->xyz[0] - v1->xyz[0], t, v1->xyz[0]);
vout->xyz[1] = fmaf(v2->xyz[1] - v1->xyz[1], t, v1->xyz[1]);
vout->xyz[2] = fmaf(v2->xyz[2] - v1->xyz[2], t, v1->xyz[2]);
vout->w = fmaf(v2->w - v1->w, t, v1->w);
float t = MATH_Fast_Divide(d0, (d0 - d1)) + epsilon;
vout->uv[0] = fmaf(v2->uv[0] - v1->uv[0], t, v1->uv[0]);
vout->uv[1] = fmaf(v2->uv[1] - v1->uv[1], t, v1->uv[1]);
t = (t > 1.0f) ? 1.0f : t;
t = (t < 0.0f) ? 0.0f : t;
vout->xyz[0] = __builtin_fmaf(v2->xyz[0] - v1->xyz[0], t, v1->xyz[0]);
vout->xyz[1] = __builtin_fmaf(v2->xyz[1] - v1->xyz[1], t, v1->xyz[1]);
vout->xyz[2] = __builtin_fmaf(v2->xyz[2] - v1->xyz[2], t, v1->xyz[2]);
vout->w = __builtin_fmaf(v2->w - v1->w, t, v1->w);
vout->uv[0] = __builtin_fmaf(v2->uv[0] - v1->uv[0], t, v1->uv[0]);
vout->uv[1] = __builtin_fmaf(v2->uv[1] - v1->uv[1], t, v1->uv[1]);
interpolateColour(v1->bgra, v2->bgra, t, vout->bgra);
interpolateColour((uint32_t*) v1->bgra, (uint32_t*) v2->bgra, t, (uint32_t*) vout->bgra);
}
GL_FORCE_INLINE void ClearTriangle() {
@ -182,26 +173,27 @@ GL_FORCE_INLINE void ShiftRotateTriangle() {
#define SPAN_SORT_CFG 0x005F8030
void SceneListSubmit(void* src, int n) {
/* Do everything, everywhere, all at once */
const float h = GetVideoMode()->height;
PVR_SET(SPAN_SORT_CFG, 0x0);
/* Prep store queues */
d = (uint32_t*) SQ_BASE_ADDRESS;
uint32_t *d = (uint32_t*) SQ_BASE_ADDRESS;
*PVR_LMMODE0 = 0x0; /* Enable 64bit mode */
Vertex __attribute__((aligned(32))) tmp;
/* Perform perspective divide on each vertex */
Vertex* vertex = (Vertex*) src;
const float h = GetVideoMode()->height;
if(!_glNearZClippingEnabled()) {
/* Prep store queues */
if(!ZNEAR_CLIPPING_ENABLED) {
for(int i = 0; i < n; ++i, ++vertex) {
PREFETCH(vertex + 1);
if(glIsVertex(vertex->flags)) {
_glPerspectiveDivideVertex(vertex, h);
}
_glSubmitHeaderOrVertex(vertex);
_glSubmitHeaderOrVertex(d, vertex);
}
/* Wait for both store queues to complete */
@ -219,25 +211,22 @@ void SceneListSubmit(void* src, int n) {
#endif
for(int i = 0; i < n; ++i, ++vertex) {
PREFETCH(vertex + 1);
bool is_last_in_strip = glIsLastVertex(vertex->flags);
PREFETCH(vertex + 12);
/* Wait until we fill the triangle */
if(tri_count < 3) {
if(likely(glIsVertex(vertex->flags))) {
if(glIsVertex(vertex->flags)) {
++strip_count;
triangle[tri_count].v = vertex;
triangle[tri_count].visible = vertex->xyz[2] >= -vertex->w;
tri_count++;
strip_count++;
if(++tri_count < 3) {
continue;
}
} else {
/* We hit a header */
tri_count = 0;
strip_count = 0;
_glSubmitHeaderOrVertex(vertex);
}
if(tri_count < 3) {
_glSubmitHeaderOrVertex(d, vertex);
continue;
}
}
@ -248,199 +237,189 @@ void SceneListSubmit(void* src, int n) {
/* If we got here, then triangle contains 3 vertices */
int visible_mask = triangle[0].visible | (triangle[1].visible << 1) | (triangle[2].visible << 2);
if(visible_mask == 7) {
#if CLIP_DEBUG
printf("Visible\n");
#endif
/* All the vertices are visible! We divide and submit v0, then shift */
_glPerspectiveDivideVertex(vertex - 2, h);
_glSubmitHeaderOrVertex(vertex - 2);
if(is_last_in_strip) {
_glPerspectiveDivideVertex(vertex - 1, h);
_glSubmitHeaderOrVertex(vertex - 1);
_glPerspectiveDivideVertex(vertex, h);
_glSubmitHeaderOrVertex(vertex);
tri_count = 0;
strip_count = 0;
}
/* Clipping time!
ShiftRotateTriangle();
There are 6 distinct possibilities when clipping a triangle. 3 of them result
in another triangle, 3 of them result in a quadrilateral.
} else if(visible_mask) {
/* Clipping time!
Assuming you iterate the edges of the triangle in order, and create a new *visible*
vertex when you cross the plane, and discard vertices behind the plane, then the only
difference between the two cases is that the final two vertices that need submitting have
to be reversed.
There are 6 distinct possibilities when clipping a triangle. 3 of them result
in another triangle, 3 of them result in a quadrilateral.
Unfortunately we have to copy vertices here, because if we persp-divide a vertex it may
be used in a subsequent triangle in the strip and would end up being double divided.
*/
Assuming you iterate the edges of the triangle in order, and create a new *visible*
vertex when you cross the plane, and discard vertices behind the plane, then the only
difference between the two cases is that the final two vertices that need submitting have
to be reversed.
#define SUBMIT_QUEUED() \
if(strip_count > 3) { \
tmp = *(vertex - 2); \
/* If we had triangles ahead of this one, submit and finalize */ \
_glPerspectiveDivideVertex(&tmp, h); \
_glSubmitHeaderOrVertex(d, &tmp); \
tmp = *(vertex - 1); \
tmp.flags = GPU_CMD_VERTEX_EOL; \
_glPerspectiveDivideVertex(&tmp, h); \
_glSubmitHeaderOrVertex(d, &tmp); \
}
Unfortunately we have to copy vertices here, because if we persp-divide a vertex it may
be used in a subsequent triangle in the strip and would end up being double divided.
*/
#if CLIP_DEBUG
printf("Clip: %d, SC: %d\n", visible_mask, strip_count);
printf("%d, %d, %d\n", triangle[0].v - (Vertex*) src - 1, triangle[1].v - (Vertex*) src - 1, triangle[2].v - (Vertex*) src - 1);
#endif
Vertex tmp;
if(strip_count > 3) {
#if CLIP_DEBUG
printf("Flush\n");
#endif
tmp = *(vertex - 2);
/* If we had triangles ahead of this one, submit and finalize */
bool is_last_in_strip = glIsLastVertex(vertex->flags);
switch(visible_mask) {
case 1: {
SUBMIT_QUEUED();
/* 0, 0a, 2a */
tmp = *triangle[0].v;
tmp.flags = GPU_CMD_VERTEX;
_glPerspectiveDivideVertex(&tmp, h);
_glSubmitHeaderOrVertex(&tmp);
_glSubmitHeaderOrVertex(d, &tmp);
tmp = *(vertex - 1);
_glClipEdge(triangle[0].v, triangle[1].v, &tmp);
tmp.flags = GPU_CMD_VERTEX;
_glPerspectiveDivideVertex(&tmp, h);
_glSubmitHeaderOrVertex(d, &tmp);
_glClipEdge(triangle[2].v, triangle[0].v, &tmp);
tmp.flags = GPU_CMD_VERTEX_EOL;
_glPerspectiveDivideVertex(&tmp, h);
_glSubmitHeaderOrVertex(&tmp);
}
_glSubmitHeaderOrVertex(d, &tmp);
} break;
case 2: {
SUBMIT_QUEUED();
/* 0a, 1, 1a */
_glClipEdge(triangle[0].v, triangle[1].v, &tmp);
tmp.flags = GPU_CMD_VERTEX;
_glPerspectiveDivideVertex(&tmp, h);
_glSubmitHeaderOrVertex(d, &tmp);
switch(visible_mask) {
case 1: {
/* 0, 0a, 2a */
tmp = *triangle[0].v;
tmp.flags = GPU_CMD_VERTEX;
_glPerspectiveDivideVertex(&tmp, h);
_glSubmitHeaderOrVertex(&tmp);
tmp = *triangle[1].v;
tmp.flags = GPU_CMD_VERTEX;
_glPerspectiveDivideVertex(&tmp, h);
_glSubmitHeaderOrVertex(d, &tmp);
_glClipEdge(triangle[0].v, triangle[1].v, &tmp);
tmp.flags = GPU_CMD_VERTEX;
_glPerspectiveDivideVertex(&tmp, h);
_glSubmitHeaderOrVertex(&tmp);
_glClipEdge(triangle[1].v, triangle[2].v, &tmp);
tmp.flags = GPU_CMD_VERTEX_EOL;
_glPerspectiveDivideVertex(&tmp, h);
_glSubmitHeaderOrVertex(d, &tmp);
} break;
case 3: {
SUBMIT_QUEUED();
/* 0, 1, 2a, 1a */
tmp = *triangle[0].v;
tmp.flags = GPU_CMD_VERTEX;
_glPerspectiveDivideVertex(&tmp, h);
_glSubmitHeaderOrVertex(d, &tmp);
_glClipEdge(triangle[2].v, triangle[0].v, &tmp);
tmp.flags = GPU_CMD_VERTEX_EOL;
_glPerspectiveDivideVertex(&tmp, h);
_glSubmitHeaderOrVertex(&tmp);
} break;
case 2: {
/* 0a, 1, 1a */
_glClipEdge(triangle[0].v, triangle[1].v, &tmp);
tmp.flags = GPU_CMD_VERTEX;
_glPerspectiveDivideVertex(&tmp, h);
_glSubmitHeaderOrVertex(&tmp);
tmp = *triangle[1].v;
tmp.flags = GPU_CMD_VERTEX;
_glPerspectiveDivideVertex(&tmp, h);
_glSubmitHeaderOrVertex(d, &tmp);
tmp = *triangle[1].v;
tmp.flags = GPU_CMD_VERTEX;
_glPerspectiveDivideVertex(&tmp, h);
_glSubmitHeaderOrVertex(&tmp);
_glClipEdge(triangle[2].v, triangle[0].v, &tmp);
tmp.flags = GPU_CMD_VERTEX;
_glPerspectiveDivideVertex(&tmp, h);
_glSubmitHeaderOrVertex(d, &tmp);
_glClipEdge(triangle[1].v, triangle[2].v, &tmp);
tmp.flags = GPU_CMD_VERTEX_EOL;
_glPerspectiveDivideVertex(&tmp, h);
_glSubmitHeaderOrVertex(&tmp);
} break;
case 3: {
/* 0, 1, 2a, 1a */
tmp = *triangle[0].v;
tmp.flags = GPU_CMD_VERTEX;
_glPerspectiveDivideVertex(&tmp, h);
_glSubmitHeaderOrVertex(&tmp);
_glClipEdge(triangle[1].v, triangle[2].v, &tmp);
tmp.flags = GPU_CMD_VERTEX_EOL;
_glPerspectiveDivideVertex(&tmp, h);
_glSubmitHeaderOrVertex(d, &tmp);
} break;
case 4: {
SUBMIT_QUEUED();
/* 1a, 2, 2a */
_glClipEdge(triangle[1].v, triangle[2].v, &tmp);
tmp.flags = GPU_CMD_VERTEX;
_glPerspectiveDivideVertex(&tmp, h);
_glSubmitHeaderOrVertex(d, &tmp);
tmp = *triangle[1].v;
tmp.flags = GPU_CMD_VERTEX;
_glPerspectiveDivideVertex(&tmp, h);
_glSubmitHeaderOrVertex(&tmp);
tmp = *triangle[2].v;
tmp.flags = GPU_CMD_VERTEX;
_glPerspectiveDivideVertex(&tmp, h);
_glSubmitHeaderOrVertex(d, &tmp);
_glClipEdge(triangle[2].v, triangle[0].v, &tmp);
tmp.flags = GPU_CMD_VERTEX;
_glPerspectiveDivideVertex(&tmp, h);
_glSubmitHeaderOrVertex(&tmp);
_glClipEdge(triangle[2].v, triangle[0].v, &tmp);
tmp.flags = GPU_CMD_VERTEX_EOL;
_glPerspectiveDivideVertex(&tmp, h);
_glSubmitHeaderOrVertex(d, &tmp);
} break;
case 5: {
SUBMIT_QUEUED();
/* 0, 0a, 2, 1a */
tmp = *triangle[0].v;
tmp.flags = GPU_CMD_VERTEX;
_glPerspectiveDivideVertex(&tmp, h);
_glSubmitHeaderOrVertex(d, &tmp);
_glClipEdge(triangle[1].v, triangle[2].v, &tmp);
tmp.flags = GPU_CMD_VERTEX_EOL;
_glPerspectiveDivideVertex(&tmp, h);
_glSubmitHeaderOrVertex(&tmp);
} break;
case 4: {
/* 1a, 2, 2a */
_glClipEdge(triangle[1].v, triangle[2].v, &tmp);
tmp.flags = GPU_CMD_VERTEX;
_glPerspectiveDivideVertex(&tmp, h);
_glSubmitHeaderOrVertex(&tmp);
_glClipEdge(triangle[0].v, triangle[1].v, &tmp);
tmp.flags = GPU_CMD_VERTEX;
_glPerspectiveDivideVertex(&tmp, h);
_glSubmitHeaderOrVertex(d, &tmp);
tmp = *triangle[2].v;
tmp.flags = GPU_CMD_VERTEX;
_glPerspectiveDivideVertex(&tmp, h);
_glSubmitHeaderOrVertex(&tmp);
tmp = *triangle[2].v;
tmp.flags = GPU_CMD_VERTEX;
_glPerspectiveDivideVertex(&tmp, h);
_glSubmitHeaderOrVertex(d, &tmp);
_glClipEdge(triangle[2].v, triangle[0].v, &tmp);
tmp.flags = GPU_CMD_VERTEX_EOL;
_glPerspectiveDivideVertex(&tmp, h);
_glSubmitHeaderOrVertex(&tmp);
} break;
case 5: {
/* 0, 0a, 2, 1a */
tmp = *triangle[0].v;
tmp.flags = GPU_CMD_VERTEX;
_glPerspectiveDivideVertex(&tmp, h);
_glSubmitHeaderOrVertex(&tmp);
_glClipEdge(triangle[1].v, triangle[2].v, &tmp);
tmp.flags = GPU_CMD_VERTEX_EOL;
_glPerspectiveDivideVertex(&tmp, h);
_glSubmitHeaderOrVertex(d, &tmp);
} break;
case 6: {
SUBMIT_QUEUED();
/* 0a, 1, 2a, 2 */
_glClipEdge(triangle[0].v, triangle[1].v, &tmp);
tmp.flags = GPU_CMD_VERTEX;
_glPerspectiveDivideVertex(&tmp, h);
_glSubmitHeaderOrVertex(d, &tmp);
_glClipEdge(triangle[0].v, triangle[1].v, &tmp);
tmp.flags = GPU_CMD_VERTEX;
_glPerspectiveDivideVertex(&tmp, h);
_glSubmitHeaderOrVertex(&tmp);
tmp = *triangle[1].v;
tmp.flags = GPU_CMD_VERTEX;
_glPerspectiveDivideVertex(&tmp, h);
_glSubmitHeaderOrVertex(d, &tmp);
tmp = *triangle[2].v;
tmp.flags = GPU_CMD_VERTEX;
_glPerspectiveDivideVertex(&tmp, h);
_glSubmitHeaderOrVertex(&tmp);
_glClipEdge(triangle[2].v, triangle[0].v, &tmp);
tmp.flags = GPU_CMD_VERTEX;
_glPerspectiveDivideVertex(&tmp, h);
_glSubmitHeaderOrVertex(d, &tmp);
_glClipEdge(triangle[1].v, triangle[2].v, &tmp);
tmp.flags = GPU_CMD_VERTEX_EOL;
_glPerspectiveDivideVertex(&tmp, h);
_glSubmitHeaderOrVertex(&tmp);
} break;
case 6: {
/* 0a, 1, 2a, 2 */
_glClipEdge(triangle[0].v, triangle[1].v, &tmp);
tmp.flags = GPU_CMD_VERTEX;
_glPerspectiveDivideVertex(&tmp, h);
_glSubmitHeaderOrVertex(&tmp);
tmp = *triangle[2].v;
tmp.flags = GPU_CMD_VERTEX_EOL;
_glPerspectiveDivideVertex(&tmp, h);
_glSubmitHeaderOrVertex(d, &tmp);
} break;
case 7: {
/* All the vertices are visible! We divide and submit v0, then shift */
_glPerspectiveDivideVertex(vertex - 2, h);
_glSubmitHeaderOrVertex(d, vertex - 2);
tmp = *triangle[1].v;
tmp.flags = GPU_CMD_VERTEX;
_glPerspectiveDivideVertex(&tmp, h);
_glSubmitHeaderOrVertex(&tmp);
if(is_last_in_strip) {
_glPerspectiveDivideVertex(vertex - 1, h);
_glSubmitHeaderOrVertex(d, vertex - 1);
_glPerspectiveDivideVertex(vertex, h);
_glSubmitHeaderOrVertex(d, vertex);
tri_count = 0;
strip_count = 0;
}
_glClipEdge(triangle[2].v, triangle[0].v, &tmp);
tmp.flags = GPU_CMD_VERTEX;
_glPerspectiveDivideVertex(&tmp, h);
_glSubmitHeaderOrVertex(&tmp);
tmp = *triangle[2].v;
tmp.flags = GPU_CMD_VERTEX_EOL;
_glPerspectiveDivideVertex(&tmp, h);
_glSubmitHeaderOrVertex(&tmp);
} break;
default:
break;
}
/* If this was the last in the strip, we don't need to
submit anything else, we just wipe the tri_count */
if(is_last_in_strip) {
tri_count = 0;
strip_count = 0;
} else {
ShiftRotateTriangle();
strip_count = 2;
}
} else {
/* Invisible? Move to the next in the strip */
continue;
} break;
case 0:
default:
break;
}
if(is_last_in_strip) {
tri_count = 0;
strip_count = 0;
}
strip_count = 2;
/* If this was the last in the strip, we don't need to
submit anything else, we just wipe the tri_count */
if(is_last_in_strip) {
tri_count = 0;
strip_count = 0;
} else {
ShiftRotateTriangle();
strip_count = 2;
}
}

View File

@ -26,6 +26,34 @@
#define PREFETCH(addr) __asm__("pref @%0" : : "r"((addr)))
GL_FORCE_INLINE void* memcpy_fast(void *dest, const void *src, size_t len) {
if(!len) {
return dest;
}
const uint8_t *s = (uint8_t *)src;
uint8_t *d = (uint8_t *)dest;
uint32_t diff = (uint32_t)d - (uint32_t)(s + 1); // extra offset because input gets incremented before output is calculated
// Underflow would be like adding a negative offset
// Can use 'd' as a scratch reg now
asm volatile (
"clrs\n" // Align for parallelism (CO) - SH4a use "stc SR, Rn" instead with a dummy Rn
".align 2\n"
"0:\n\t"
"dt %[size]\n\t" // (--len) ? 0 -> T : 1 -> T (EX 1)
"mov.b @%[in]+, %[scratch]\n\t" // scratch = *(s++) (LS 1/2)
"bf.s 0b\n\t" // while(s != nexts) aka while(!T) (BR 1/2)
" mov.b %[scratch], @(%[offset], %[in])\n" // *(datatype_of_s*) ((char*)s + diff) = scratch, where src + diff = dest (LS 1)
: [in] "+&r" ((uint32_t)s), [scratch] "=&r" ((uint32_t)d), [size] "+&r" (len) // outputs
: [offset] "z" (diff) // inputs
: "t", "memory" // clobbers
);
return dest;
}
/* We use sq_cpy if the src and size is properly aligned. We control that the
* destination is properly aligned so we assert that. */
#define FASTCPY(dst, src, bytes) \
@ -34,11 +62,12 @@
gl_assert(((uintptr_t) dst) % 32 == 0); \
sq_cpy(dst, src, bytes); \
} else { \
memcpy(dst, src, bytes); \
memcpy_fast(dst, src, bytes); \
} \
} while(0)
#define MEMCPY4(dst, src, bytes) memcpy4(dst, src, bytes)
#define MEMCPY4(dst, src, bytes) memcpy_fast(dst, src, bytes)
#define MEMSET4(dst, v, size) memset4((dst), (v), (size))

View File

@ -251,7 +251,7 @@ typedef struct {
* when a realloc could invalidate pointers. This structure holds all the information
* we need on the target vertex array to allow passing around to the various stages (e.g. generate/clip etc.)
*/
typedef struct {
typedef struct __attribute__((aligned(32))) {
PolyList* output;
uint32_t header_offset; // The offset of the header in the output list
uint32_t start_offset; // The offset into the output list
@ -306,7 +306,6 @@ Matrix4x4* _glGetModelViewMatrix();
void _glWipeTextureOnFramebuffers(GLuint texture);
PolyContext* _glGetPVRContext();
GLubyte _glInitTextures();
void _glUpdatePVRTextureContext(PolyContext* context, GLshort textureUnit);
@ -354,26 +353,27 @@ void _glSetInternalPaletteFormat(GLenum val);
GLboolean _glIsSharedTexturePaletteEnabled();
void _glApplyColorTable(TexturePalette *palette);
extern GLboolean BLEND_ENABLED;
extern GLboolean ALPHA_TEST_ENABLED;
extern GLboolean AUTOSORT_ENABLED;
GL_FORCE_INLINE GLboolean _glIsBlendingEnabled() {
return BLEND_ENABLED;
}
GL_FORCE_INLINE GLboolean _glIsAlphaTestEnabled() {
return ALPHA_TEST_ENABLED;
}
GLboolean _glIsBlendingEnabled();
GLboolean _glIsAlphaTestEnabled();
GLboolean _glIsCullingEnabled();
GLboolean _glIsDepthTestEnabled();
GLboolean _glIsDepthWriteEnabled();
GLboolean _glIsScissorTestEnabled();
GLboolean _glIsFogEnabled();
GLenum _glGetDepthFunc();
GLenum _glGetCullFace();
GLenum _glGetFrontFace();
GLenum _glGetBlendSourceFactor();
GLenum _glGetBlendDestFactor();
extern PolyList OP_LIST;
extern PolyList PT_LIST;
extern PolyList TR_LIST;
GL_FORCE_INLINE PolyList* _glActivePolyList() {
if(BLEND_ENABLED) {
if(_glIsBlendingEnabled()) {
return &TR_LIST;
} else if(ALPHA_TEST_ENABLED) {
} else if(_glIsAlphaTestEnabled()) {
return &PT_LIST;
} else {
return &OP_LIST;
@ -383,13 +383,9 @@ GL_FORCE_INLINE PolyList* _glActivePolyList() {
GLboolean _glIsMipmapComplete(const TextureObject* obj);
GLubyte* _glGetMipmapLocation(const TextureObject* obj, GLuint level);
GLuint _glGetMipmapLevelCount(const TextureObject* obj);
extern GLboolean ZNEAR_CLIPPING_ENABLED;
extern GLboolean LIGHTING_ENABLED;
GLboolean _glIsLightingEnabled();
void _glEnableLight(GLubyte light, unsigned char value);
void _glEnableLight(GLubyte light, GLboolean value);
GLboolean _glIsColorMaterialEnabled();
GLboolean _glIsNormalizeEnabled();
@ -513,10 +509,35 @@ GLuint _glUsedTextureMemory();
GLuint _glFreeContiguousTextureMemory();
void _glApplyScissor(bool force);
void _glSetColorMaterialMask(GLenum mask);
void _glSetColorMaterialMode(GLenum mode);
GLenum _glColorMaterialMode();
Material* _glActiveMaterial();
void _glSetLightModelViewerInEyeCoordinates(GLboolean v);
void _glSetLightModelSceneAmbient(const GLfloat* v);
void _glSetLightModelColorControl(GLint v);
GLuint _glEnabledLightCount();
void _glRecalcEnabledLights();
GLfloat* _glLightModelSceneAmbient();
GLfloat* _glGetLightModelSceneAmbient();
LightSource* _glLightAt(GLuint i);
GLboolean _glNearZClippingEnabled();
GLboolean _glGPUStateIsDirty();
void _glGPUStateMarkClean();
void _glGPUStateMarkDirty();
#define MAX_GLDC_TEXTURE_UNITS 2
#define MAX_GLDC_LIGHTS 8
#define AMBIENT_MASK 1
#define DIFFUSE_MASK 2
#define EMISSION_MASK 4
#define SPECULAR_MASK 8
#define SCENE_AMBIENT_MASK 16
/* This is from KOS pvr_buffers.c */
#define PVR_MIN_Z 0.0001f

View File

@ -4,137 +4,229 @@
#include "private.h"
static PolyContext GL_CONTEXT;
PolyContext *_glGetPVRContext() {
return &GL_CONTEXT;
}
static struct {
GLboolean is_dirty;
/* We can't just use the GL_CONTEXT for this state as the two
* GL states are combined, so we store them separately and then
* calculate the appropriate PVR state from them. */
static GLenum CULL_FACE = GL_BACK;
static GLenum FRONT_FACE = GL_CCW;
static GLboolean CULLING_ENABLED = GL_FALSE;
static GLboolean COLOR_MATERIAL_ENABLED = GL_FALSE;
GLenum depth_func;
GLboolean depth_test_enabled;
GLenum cull_face;
GLenum front_face;
GLboolean culling_enabled;
GLboolean color_material_enabled;
GLboolean znear_clipping_enabled;
GLboolean lighting_enabled;
GLboolean shared_palette_enabled;
GLboolean alpha_test_enabled;
GLboolean polygon_offset_enabled;
GLboolean normalize_enabled;
GLboolean scissor_test_enabled;
GLboolean fog_enabled;
GLboolean depth_mask_enabled;
GLboolean ZNEAR_CLIPPING_ENABLED = GL_TRUE;
struct {
GLint x;
GLint y;
GLsizei width;
GLsizei height;
GLboolean applied;
} scissor_rect;
GLboolean LIGHTING_ENABLED = GL_FALSE;
GLenum blend_sfactor;
GLenum blend_dfactor;
GLboolean blend_enabled;
GLfloat offset_factor;
GLfloat offset_units;
/* Is the shared texture palette enabled? */
static GLboolean SHARED_PALETTE_ENABLED = GL_FALSE;
GLfloat scene_ambient[4];
GLboolean viewer_in_eye_coords;
GLenum color_control;
GLenum color_material_mode;
GLenum color_material_mask;
GLboolean ALPHA_TEST_ENABLED = GL_FALSE;
LightSource lights[MAX_GLDC_LIGHTS];
GLuint enabled_light_count;
Material material;
static GLboolean POLYGON_OFFSET_ENABLED = GL_FALSE;
static GLboolean NORMALIZE_ENABLED = GL_FALSE;
static struct {
GLint x;
GLint y;
GLsizei width;
GLsizei height;
GLboolean applied;
} SCISSOR_RECT = {
0, 0, 640, 480, false
GLenum shade_model;
} GPUState = {
.is_dirty = GL_TRUE,
.depth_func = GL_LESS,
.depth_test_enabled = GL_FALSE,
.cull_face = GL_BACK,
.front_face = GL_CCW,
.culling_enabled = GL_FALSE,
.color_material_enabled = GL_FALSE,
.znear_clipping_enabled = GL_TRUE,
.lighting_enabled = GL_FALSE,
.shared_palette_enabled = GL_FALSE,
.alpha_test_enabled = GL_FALSE,
.polygon_offset_enabled = GL_FALSE,
.normalize_enabled = GL_FALSE,
.scissor_test_enabled = GL_FALSE,
.fog_enabled = GL_FALSE,
.depth_mask_enabled = GL_FALSE,
.scissor_rect = {0, 0, 640, 480, false},
.blend_sfactor = GL_ONE,
.blend_dfactor = GL_ZERO,
.blend_enabled = GL_FALSE,
.offset_factor = 0.0f,
.offset_units = 0.0f,
.scene_ambient = {0.2f, 0.2f, 0.2f, 1.0f},
.viewer_in_eye_coords = GL_TRUE,
.color_control = GL_SINGLE_COLOR,
.color_material_mode = GL_AMBIENT_AND_DIFFUSE,
.color_material_mask = AMBIENT_MASK | DIFFUSE_MASK,
.lights = {0},
.enabled_light_count = 0,
.material = {0},
.shade_model = GL_SMOOTH
};
GLboolean _glIsSharedTexturePaletteEnabled() {
return SHARED_PALETTE_ENABLED;
void _glGPUStateMarkClean() {
GPUState.is_dirty = GL_FALSE;
}
void _glApplyScissor(bool force);
void _glGPUStateMarkDirty() {
GPUState.is_dirty = GL_TRUE;
}
static int _calc_pvr_face_culling() {
if(!CULLING_ENABLED) {
return GPU_CULLING_NONE;
} else {
if(CULL_FACE == GL_BACK) {
return (FRONT_FACE == GL_CW) ? GPU_CULLING_CCW : GPU_CULLING_CW;
} else {
return (FRONT_FACE == GL_CCW) ? GPU_CULLING_CCW : GPU_CULLING_CW;
GLboolean _glGPUStateIsDirty() {
return GPUState.is_dirty;
}
Material* _glActiveMaterial() {
return &GPUState.material;
}
LightSource* _glLightAt(GLuint i) {
assert(i < MAX_GLDC_LIGHTS);
return &GPUState.lights[i];
}
void _glEnableLight(GLubyte light, GLboolean value) {
GPUState.lights[light].isEnabled = value;
}
GLboolean _glIsDepthTestEnabled() {
return GPUState.depth_test_enabled;
}
GLenum _glGetDepthFunc() {
return GPUState.depth_func;
}
GLboolean _glIsDepthWriteEnabled() {
return GPUState.depth_mask_enabled;
}
GLenum _glGetShadeModel() {
return GPUState.shade_model;
}
GLuint _glEnabledLightCount() {
return GPUState.enabled_light_count;
}
GLfloat* _glLightModelSceneAmbient() {
return GPUState.scene_ambient;
}
GLboolean _glIsBlendingEnabled() {
return GPUState.blend_enabled;
}
GLboolean _glIsAlphaTestEnabled() {
return GPUState.alpha_test_enabled;
}
GLboolean _glIsCullingEnabled() {
return GPUState.culling_enabled;
}
GLenum _glGetCullFace() {
return GPUState.cull_face;
}
GLenum _glGetFrontFace() {
return GPUState.front_face;
}
GLboolean _glIsFogEnabled() {
return GPUState.fog_enabled;
}
GLboolean _glIsScissorTestEnabled() {
return GPUState.scissor_test_enabled;
}
void _glRecalcEnabledLights() {
GPUState.enabled_light_count = 0;
for(GLubyte i = 0; i < MAX_GLDC_LIGHTS; ++i) {
if(_glLightAt(i)->isEnabled) {
GPUState.enabled_light_count++;
}
}
}
static GLenum DEPTH_FUNC = GL_LESS;
static GLboolean DEPTH_TEST_ENABLED = GL_FALSE;
static int _calc_pvr_depth_test() {
if(!DEPTH_TEST_ENABLED) {
return GPU_DEPTHCMP_ALWAYS;
}
switch(DEPTH_FUNC) {
case GL_NEVER:
return GPU_DEPTHCMP_NEVER;
case GL_LESS:
return GPU_DEPTHCMP_GREATER;
case GL_EQUAL:
return GPU_DEPTHCMP_EQUAL;
case GL_LEQUAL:
return GPU_DEPTHCMP_GEQUAL;
case GL_GREATER:
return GPU_DEPTHCMP_LESS;
case GL_NOTEQUAL:
return GPU_DEPTHCMP_NOTEQUAL;
case GL_GEQUAL:
return GPU_DEPTHCMP_LEQUAL;
break;
case GL_ALWAYS:
default:
return GPU_DEPTHCMP_ALWAYS;
}
void _glSetLightModelViewerInEyeCoordinates(GLboolean v) {
GPUState.viewer_in_eye_coords = v;
}
static GLenum BLEND_SFACTOR = GL_ONE;
static GLenum BLEND_DFACTOR = GL_ZERO;
GLboolean BLEND_ENABLED = GL_FALSE;
void _glSetLightModelSceneAmbient(const GLfloat* v) {
vec4cpy(GPUState.scene_ambient, v);
}
static GLfloat OFFSET_FACTOR = 0.0f;
static GLfloat OFFSET_UNITS = 0.0f;
GLfloat* _glGetLightModelSceneAmbient() {
return GPUState.scene_ambient;
}
void _glSetLightModelColorControl(GLint v) {
GPUState.color_control = v;
}
GLenum _glColorMaterialMask() {
return GPUState.color_material_mask;
}
void _glSetColorMaterialMask(GLenum mask) {
GPUState.color_material_mask = mask;
}
void _glSetColorMaterialMode(GLenum mode) {
GPUState.color_material_mode = mode;
}
GLenum _glColorMaterialMode() {
return GPUState.color_material_mode;
}
GLboolean _glIsSharedTexturePaletteEnabled() {
return GPUState.shared_palette_enabled;
}
GLboolean _glNearZClippingEnabled() {
return GPUState.znear_clipping_enabled;
}
void _glApplyScissor(bool force);
GLboolean _glIsNormalizeEnabled() {
return NORMALIZE_ENABLED;
return GPUState.normalize_enabled;
}
static int _calcPVRBlendFactor(GLenum factor) {
switch(factor) {
case GL_ZERO:
return GPU_BLEND_ZERO;
case GL_SRC_ALPHA:
return GPU_BLEND_SRCALPHA;
case GL_DST_COLOR:
return GPU_BLEND_DESTCOLOR;
case GL_DST_ALPHA:
return GPU_BLEND_DESTALPHA;
case GL_ONE_MINUS_DST_COLOR:
return GPU_BLEND_INVDESTCOLOR;
case GL_ONE_MINUS_SRC_ALPHA:
return GPU_BLEND_INVSRCALPHA;
case GL_ONE_MINUS_DST_ALPHA:
return GPU_BLEND_INVDESTALPHA;
case GL_ONE:
return GPU_BLEND_ONE;
default:
fprintf(stderr, "Invalid blend mode: %u\n", (unsigned int) factor);
return GPU_BLEND_ONE;
}
GLenum _glGetBlendSourceFactor() {
return GPUState.blend_sfactor;
}
static void _updatePVRBlend(PolyContext* context) {
if(BLEND_ENABLED || ALPHA_TEST_ENABLED) {
context->gen.alpha = GPU_ALPHA_ENABLE;
} else {
context->gen.alpha = GPU_ALPHA_DISABLE;
}
context->blend.src = _calcPVRBlendFactor(BLEND_SFACTOR);
context->blend.dst = _calcPVRBlendFactor(BLEND_DFACTOR);
GLenum _glGetBlendDestFactor() {
return GPUState.blend_dfactor;
}
GLboolean _glCheckValidEnum(GLint param, GLint* values, const char* func) {
GLubyte found = 0;
while(*values != 0) {
@ -167,7 +259,7 @@ void _glUpdatePVRTextureContext(PolyContext *context, GLshort textureUnit) {
return;
}
context->txr.alpha = (BLEND_ENABLED || ALPHA_TEST_ENABLED) ? GPU_TXRALPHA_ENABLE : GPU_TXRALPHA_DISABLE;
context->txr.alpha = (GPUState.blend_enabled || GPUState.alpha_test_enabled) ? GPU_TXRALPHA_ENABLE : GPU_TXRALPHA_DISABLE;
GLuint filter = GPU_FILTER_NEAREST;
GLboolean enableMipmaps = GL_FALSE;
@ -262,29 +354,22 @@ void _glUpdatePVRTextureContext(PolyContext *context, GLshort textureUnit) {
}
GLboolean _glIsLightingEnabled() {
return LIGHTING_ENABLED;
return GPUState.lighting_enabled;
}
GLboolean _glIsColorMaterialEnabled() {
return COLOR_MATERIAL_ENABLED;
return GPUState.color_material_enabled;
}
static GLfloat CLEAR_COLOUR[3];
void _glInitContext() {
memset(&GL_CONTEXT, 0, sizeof(PolyContext));
GL_CONTEXT.list_type = GPU_LIST_OP_POLY;
GL_CONTEXT.fmt.color = GPU_CLRFMT_ARGBPACKED;
GL_CONTEXT.fmt.uv = GPU_UVFMT_32BIT;
GL_CONTEXT.gen.color_clamp = GPU_CLRCLAMP_DISABLE;
const VideoMode* mode = GetVideoMode();
SCISSOR_RECT.x = 0;
SCISSOR_RECT.y = 0;
SCISSOR_RECT.width = mode->width;
SCISSOR_RECT.height = mode->height;
GPUState.scissor_rect.x = 0;
GPUState.scissor_rect.y = 0;
GPUState.scissor_rect.width = mode->width;
GPUState.scissor_rect.height = mode->height;
glClearDepth(1.0f);
glDepthFunc(GL_LESS);
@ -312,40 +397,66 @@ void _glInitContext() {
GLAPI void APIENTRY glEnable(GLenum cap) {
switch(cap) {
case GL_TEXTURE_2D:
TEXTURES_ENABLED[_glGetActiveTexture()] = GL_TRUE;
if(TEXTURES_ENABLED[_glGetActiveTexture()] != GL_TRUE) {
TEXTURES_ENABLED[_glGetActiveTexture()] = GL_TRUE;
GPUState.is_dirty = GL_TRUE;
}
break;
case GL_CULL_FACE: {
CULLING_ENABLED = GL_TRUE;
GL_CONTEXT.gen.culling = _calc_pvr_face_culling();
if(GPUState.cull_face != GL_TRUE) {
GPUState.cull_face = GL_TRUE;
GPUState.is_dirty = GL_TRUE;
}
} break;
case GL_DEPTH_TEST: {
DEPTH_TEST_ENABLED = GL_TRUE;
GL_CONTEXT.depth.comparison = _calc_pvr_depth_test();
if(GPUState.depth_test_enabled != GL_TRUE) {
GPUState.depth_test_enabled = GL_TRUE;
GPUState.is_dirty = GL_TRUE;
}
} break;
case GL_BLEND: {
BLEND_ENABLED = GL_TRUE;
_updatePVRBlend(&GL_CONTEXT);
if(GPUState.blend_enabled != GL_TRUE) {
GPUState.blend_enabled = GL_TRUE;
GPUState.is_dirty = GL_TRUE;
}
} break;
case GL_SCISSOR_TEST: {
GL_CONTEXT.gen.clip_mode = GPU_USERCLIP_INSIDE;
_glApplyScissor(false);
if(GPUState.scissor_test_enabled != GL_TRUE) {
GPUState.scissor_test_enabled = GL_TRUE;
GPUState.is_dirty = GL_TRUE;
}
} break;
case GL_LIGHTING: {
LIGHTING_ENABLED = GL_TRUE;
if(GPUState.lighting_enabled != GL_TRUE) {
GPUState.lighting_enabled = GL_TRUE;
GPUState.is_dirty = GL_TRUE;
}
} break;
case GL_FOG:
GL_CONTEXT.gen.fog_type = GPU_FOG_TABLE;
if(GPUState.fog_enabled != GL_TRUE) {
GPUState.fog_enabled = GL_TRUE;
GPUState.is_dirty = GL_TRUE;
}
break;
case GL_COLOR_MATERIAL:
COLOR_MATERIAL_ENABLED = GL_TRUE;
if(GPUState.color_material_enabled != GL_TRUE) {
GPUState.color_material_enabled = GL_TRUE;
GPUState.is_dirty = GL_TRUE;
}
break;
case GL_SHARED_TEXTURE_PALETTE_EXT: {
SHARED_PALETTE_ENABLED = GL_TRUE;
if(GPUState.shared_palette_enabled != GL_TRUE) {
GPUState.shared_palette_enabled = GL_TRUE;
GPUState.is_dirty = GL_TRUE;
}
}
break;
case GL_ALPHA_TEST: {
ALPHA_TEST_ENABLED = GL_TRUE;
_updatePVRBlend(&GL_CONTEXT);
if(GPUState.alpha_test_enabled != GL_TRUE) {
GPUState.alpha_test_enabled = GL_TRUE;
GPUState.is_dirty = GL_TRUE;
}
} break;
case GL_LIGHT0:
case GL_LIGHT1:
@ -354,19 +465,33 @@ GLAPI void APIENTRY glEnable(GLenum cap) {
case GL_LIGHT4:
case GL_LIGHT5:
case GL_LIGHT6:
case GL_LIGHT7:
_glEnableLight(cap & 0xF, GL_TRUE);
case GL_LIGHT7: {
LightSource* ptr = _glLightAt(cap & 0xF);
if(ptr->isEnabled != GL_TRUE) {
ptr->isEnabled = GL_TRUE;
_glRecalcEnabledLights();
}
}
break;
case GL_NEARZ_CLIPPING_KOS:
ZNEAR_CLIPPING_ENABLED = GL_TRUE;
if(GPUState.znear_clipping_enabled != GL_TRUE) {
GPUState.znear_clipping_enabled = GL_TRUE;
GPUState.is_dirty = GL_TRUE;
}
break;
case GL_POLYGON_OFFSET_POINT:
case GL_POLYGON_OFFSET_LINE:
case GL_POLYGON_OFFSET_FILL:
POLYGON_OFFSET_ENABLED = GL_TRUE;
if(GPUState.polygon_offset_enabled != GL_TRUE) {
GPUState.polygon_offset_enabled = GL_TRUE;
GPUState.is_dirty = GL_TRUE;
}
break;
case GL_NORMALIZE:
NORMALIZE_ENABLED = GL_TRUE;
if(GPUState.normalize_enabled != GL_TRUE) {
GPUState.normalize_enabled = GL_TRUE;
GPUState.is_dirty = GL_TRUE;
}
break;
default:
break;
@ -375,39 +500,67 @@ GLAPI void APIENTRY glEnable(GLenum cap) {
GLAPI void APIENTRY glDisable(GLenum cap) {
switch(cap) {
case GL_TEXTURE_2D: {
TEXTURES_ENABLED[_glGetActiveTexture()] = GL_FALSE;
} break;
case GL_TEXTURE_2D:
if(TEXTURES_ENABLED[_glGetActiveTexture()] != GL_FALSE) {
TEXTURES_ENABLED[_glGetActiveTexture()] = GL_FALSE;
GPUState.is_dirty = GL_TRUE;
}
break;
case GL_CULL_FACE: {
CULLING_ENABLED = GL_FALSE;
GL_CONTEXT.gen.culling = _calc_pvr_face_culling();
if(GPUState.cull_face != GL_FALSE) {
GPUState.cull_face = GL_FALSE;
GPUState.is_dirty = GL_TRUE;
}
} break;
case GL_DEPTH_TEST: {
DEPTH_TEST_ENABLED = GL_FALSE;
GL_CONTEXT.depth.comparison = _calc_pvr_depth_test();
if(GPUState.depth_test_enabled != GL_FALSE) {
GPUState.depth_test_enabled = GL_FALSE;
GPUState.is_dirty = GL_TRUE;
}
} break;
case GL_BLEND: {
if(GPUState.blend_enabled != GL_FALSE) {
GPUState.blend_enabled = GL_FALSE;
GPUState.is_dirty = GL_TRUE;
}
} break;
case GL_BLEND:
BLEND_ENABLED = GL_FALSE;
_updatePVRBlend(&GL_CONTEXT);
break;
case GL_SCISSOR_TEST: {
GL_CONTEXT.gen.clip_mode = GPU_USERCLIP_DISABLE;
if(GPUState.scissor_test_enabled != GL_FALSE) {
GPUState.scissor_test_enabled = GL_FALSE;
GPUState.is_dirty = GL_TRUE;
}
} break;
case GL_LIGHTING: {
LIGHTING_ENABLED = GL_FALSE;
if(GPUState.lighting_enabled != GL_FALSE) {
GPUState.lighting_enabled = GL_FALSE;
GPUState.is_dirty = GL_TRUE;
}
} break;
case GL_FOG:
GL_CONTEXT.gen.fog_type = GPU_FOG_DISABLE;
if(GPUState.fog_enabled != GL_FALSE) {
GPUState.fog_enabled = GL_FALSE;
GPUState.is_dirty = GL_TRUE;
}
break;
case GL_COLOR_MATERIAL:
COLOR_MATERIAL_ENABLED = GL_FALSE;
if(GPUState.color_material_enabled != GL_FALSE) {
GPUState.color_material_enabled = GL_FALSE;
GPUState.is_dirty = GL_TRUE;
}
break;
case GL_SHARED_TEXTURE_PALETTE_EXT: {
SHARED_PALETTE_ENABLED = GL_FALSE;
if(GPUState.shared_palette_enabled != GL_FALSE) {
GPUState.shared_palette_enabled = GL_FALSE;
GPUState.is_dirty = GL_TRUE;
}
}
break;
case GL_ALPHA_TEST: {
ALPHA_TEST_ENABLED = GL_FALSE;
if(GPUState.alpha_test_enabled != GL_FALSE) {
GPUState.alpha_test_enabled = GL_FALSE;
GPUState.is_dirty = GL_TRUE;
}
} break;
case GL_LIGHT0:
case GL_LIGHT1:
@ -417,18 +570,30 @@ GLAPI void APIENTRY glDisable(GLenum cap) {
case GL_LIGHT5:
case GL_LIGHT6:
case GL_LIGHT7:
_glEnableLight(cap & 0xF, GL_FALSE);
if(GPUState.lights[cap & 0xF].isEnabled) {
_glEnableLight(cap & 0xF, GL_FALSE);
GPUState.is_dirty = GL_TRUE;
}
break;
case GL_NEARZ_CLIPPING_KOS:
ZNEAR_CLIPPING_ENABLED = GL_FALSE;
if(GPUState.znear_clipping_enabled != GL_FALSE) {
GPUState.znear_clipping_enabled = GL_FALSE;
GPUState.is_dirty = GL_TRUE;
}
break;
case GL_POLYGON_OFFSET_POINT:
case GL_POLYGON_OFFSET_LINE:
case GL_POLYGON_OFFSET_FILL:
POLYGON_OFFSET_ENABLED = GL_FALSE;
if(GPUState.polygon_offset_enabled != GL_FALSE) {
GPUState.polygon_offset_enabled = GL_FALSE;
GPUState.is_dirty = GL_TRUE;
}
break;
case GL_NORMALIZE:
NORMALIZE_ENABLED = GL_FALSE;
if(GPUState.normalize_enabled != GL_FALSE) {
GPUState.normalize_enabled = GL_FALSE;
GPUState.is_dirty = GL_TRUE;
}
break;
default:
break;
@ -477,12 +642,17 @@ GLAPI void APIENTRY glReadBuffer(GLenum mode) {
}
GLAPI void APIENTRY glDepthMask(GLboolean flag) {
GL_CONTEXT.depth.write = (flag == GL_TRUE) ? GPU_DEPTHWRITE_ENABLE : GPU_DEPTHWRITE_DISABLE;
if(GPUState.depth_mask_enabled != flag) {
GPUState.depth_mask_enabled = flag;
GPUState.is_dirty = GL_TRUE;
}
}
GLAPI void APIENTRY glDepthFunc(GLenum func) {
DEPTH_FUNC = func;
GL_CONTEXT.depth.comparison = _calc_pvr_depth_test();
if(GPUState.depth_func != func) {
GPUState.depth_func = func;
GPUState.is_dirty = GL_TRUE;
}
}
/* Hints */
@ -502,29 +672,34 @@ GLAPI void APIENTRY glPolygonMode(GLenum face, GLenum mode) {
/* Culling */
GLAPI void APIENTRY glFrontFace(GLenum mode) {
FRONT_FACE = mode;
GL_CONTEXT.gen.culling = _calc_pvr_face_culling();
if(GPUState.front_face != mode) {
GPUState.front_face = mode;
GPUState.is_dirty = GL_TRUE;
}
}
GLAPI void APIENTRY glCullFace(GLenum mode) {
CULL_FACE = mode;
GL_CONTEXT.gen.culling = _calc_pvr_face_culling();
}
GLenum _glGetShadeModel() {
return (GL_CONTEXT.gen.shading == GPU_SHADE_FLAT) ? GL_FLAT : GL_SMOOTH;
if(GPUState.cull_face != mode) {
GPUState.cull_face = mode;
GPUState.is_dirty = GL_TRUE;
}
}
/* Shading - Flat or Goraud */
GLAPI void APIENTRY glShadeModel(GLenum mode) {
GL_CONTEXT.gen.shading = (mode == GL_SMOOTH) ? GPU_SHADE_GOURAUD : GPU_SHADE_FLAT;
if(GPUState.shade_model != mode) {
GPUState.shade_model = mode;
GPUState.is_dirty = GL_TRUE;
}
}
/* Blending */
GLAPI void APIENTRY glBlendFunc(GLenum sfactor, GLenum dfactor) {
BLEND_SFACTOR = sfactor;
BLEND_DFACTOR = dfactor;
_updatePVRBlend(&GL_CONTEXT);
if(GPUState.blend_dfactor != dfactor || GPUState.blend_sfactor != sfactor) {
GPUState.blend_sfactor = sfactor;
GPUState.blend_dfactor = dfactor;
GPUState.is_dirty = GL_TRUE;
}
}
@ -547,8 +722,9 @@ void glLineWidth(GLfloat width) {
}
void glPolygonOffset(GLfloat factor, GLfloat units) {
OFFSET_FACTOR = factor;
OFFSET_UNITS = units;
GPUState.offset_factor = factor;
GPUState.offset_units = units;
GPUState.is_dirty = GL_TRUE;
}
void glGetTexParameterfv(GLenum target, GLenum pname, GLfloat *params) {
@ -577,18 +753,20 @@ void glPixelStorei(GLenum pname, GLint param) {
void APIENTRY glScissor(GLint x, GLint y, GLsizei width, GLsizei height) {
if(SCISSOR_RECT.x == x &&
SCISSOR_RECT.y == y &&
SCISSOR_RECT.width == width &&
SCISSOR_RECT.height == height) {
if(GPUState.scissor_rect.x == x &&
GPUState.scissor_rect.y == y &&
GPUState.scissor_rect.width == width &&
GPUState.scissor_rect.height == height) {
return;
}
SCISSOR_RECT.x = x;
SCISSOR_RECT.y = y;
SCISSOR_RECT.width = width;
SCISSOR_RECT.height = height;
SCISSOR_RECT.applied = false;
GPUState.scissor_rect.x = x;
GPUState.scissor_rect.y = y;
GPUState.scissor_rect.width = width;
GPUState.scissor_rect.height = height;
GPUState.scissor_rect.applied = false;
GPUState.is_dirty = GL_TRUE; // FIXME: do we need this?
_glApplyScissor(false);
}
@ -618,12 +796,12 @@ void APIENTRY glScissor(GLint x, GLint y, GLsizei width, GLsizei height) {
*/
void _glApplyScissor(bool force) {
/* Don't do anyting if clipping is disabled */
if(GL_CONTEXT.gen.clip_mode == GPU_USERCLIP_DISABLE) {
if(!GPUState.scissor_test_enabled) {
return;
}
/* Don't apply if we already applied - nothing changed */
if(SCISSOR_RECT.applied && !force) {
if(GPUState.scissor_rect.applied && !force) {
return;
}
@ -633,27 +811,31 @@ void _glApplyScissor(bool force) {
const VideoMode* vid_mode = GetVideoMode();
GLsizei scissor_width = MAX(MIN(SCISSOR_RECT.width, vid_mode->width), 0);
GLsizei scissor_height = MAX(MIN(SCISSOR_RECT.height, vid_mode->height), 0);
GLsizei scissor_width = MAX(MIN(GPUState.scissor_rect.width, vid_mode->width), 0);
GLsizei scissor_height = MAX(MIN(GPUState.scissor_rect.height, vid_mode->height), 0);
/* force the origin to the lower left-hand corner of the screen */
miny = (vid_mode->height - scissor_height) - SCISSOR_RECT.y;
maxx = (scissor_width + SCISSOR_RECT.x);
miny = (vid_mode->height - scissor_height) - GPUState.scissor_rect.y;
maxx = (scissor_width + GPUState.scissor_rect.x);
maxy = (scissor_height + miny);
/* load command structure while mapping screen coords to TA tiles */
c.flags = GPU_CMD_USERCLIP;
c.d1 = c.d2 = c.d3 = 0;
c.sx = CLAMP(SCISSOR_RECT.x / 32, 0, vid_mode->width / 32);
c.sy = CLAMP(miny / 32, 0, vid_mode->height / 32);
c.ex = CLAMP((maxx / 32) - 1, 0, vid_mode->width / 32);
c.ey = CLAMP((maxy / 32) - 1, 0, vid_mode->height / 32);
uint16_t vw = vid_mode->width >> 5;
uint16_t vh = vid_mode->height >> 5;
c.sx = CLAMP(GPUState.scissor_rect.x >> 5, 0, vw);
c.sy = CLAMP(miny >> 5, 0, vh);
c.ex = CLAMP((maxx >> 5) - 1, 0, vw);
c.ey = CLAMP((maxy >> 5) - 1, 0, vh);
aligned_vector_push_back(&_glOpaquePolyList()->vector, &c, 1);
aligned_vector_push_back(&_glPunchThruPolyList()->vector, &c, 1);
aligned_vector_push_back(&_glTransparentPolyList()->vector, &c, 1);
SCISSOR_RECT.applied = true;
GPUState.scissor_rect.applied = true;
}
void glStencilFunc(GLenum func, GLint ref, GLuint mask) {
@ -671,19 +853,19 @@ void glStencilOp(GLenum sfail, GLenum dpfail, GLenum dppass) {
GLboolean APIENTRY glIsEnabled(GLenum cap) {
switch(cap) {
case GL_DEPTH_TEST:
return DEPTH_TEST_ENABLED;
return GPUState.depth_test_enabled;
case GL_SCISSOR_TEST:
return GL_CONTEXT.gen.clip_mode == GPU_USERCLIP_INSIDE;
return GPUState.scissor_test_enabled;
case GL_CULL_FACE:
return CULLING_ENABLED;
return GPUState.culling_enabled;
case GL_LIGHTING:
return LIGHTING_ENABLED;
return GPUState.lighting_enabled;
case GL_BLEND:
return BLEND_ENABLED;
return GPUState.blend_enabled;
case GL_POLYGON_OFFSET_POINT:
case GL_POLYGON_OFFSET_LINE:
case GL_POLYGON_OFFSET_FILL:
return POLYGON_OFFSET_ENABLED;
return GPUState.polygon_offset_enabled;
}
return GL_FALSE;
@ -738,10 +920,10 @@ void APIENTRY glGetFloatv(GLenum pname, GLfloat* params) {
MEMCPY4(params, _glGetModelViewMatrix(), sizeof(float) * 16);
break;
case GL_POLYGON_OFFSET_FACTOR:
*params = OFFSET_FACTOR;
*params = GPUState.offset_factor;
break;
case GL_POLYGON_OFFSET_UNITS:
*params = OFFSET_UNITS;
*params = GPUState.offset_units;
break;
default:
_glKosThrowError(GL_INVALID_ENUM, __func__);
@ -758,13 +940,13 @@ void APIENTRY glGetIntegerv(GLenum pname, GLint *params) {
*params = (_glGetBoundTexture()) ? _glGetBoundTexture()->index : 0;
break;
case GL_DEPTH_FUNC:
*params = DEPTH_FUNC;
*params = GPUState.depth_func;
break;
case GL_BLEND_SRC:
*params = BLEND_SFACTOR;
*params = GPUState.blend_sfactor;
break;
case GL_BLEND_DST:
*params = BLEND_DFACTOR;
*params = GPUState.blend_dfactor;
break;
case GL_MAX_TEXTURE_SIZE:
*params = MAX_TEXTURE_SIZE;

View File

@ -607,6 +607,8 @@ void APIENTRY glBindTexture(GLenum target, GLuint texture) {
} else {
TEXTURE_UNITS[ACTIVE_TEXTURE] = NULL;
}
_glGPUStateMarkDirty();
}
void APIENTRY glTexEnvi(GLenum target, GLenum pname, GLint param) {
@ -668,6 +670,8 @@ void APIENTRY glTexEnvi(GLenum target, GLenum pname, GLint param) {
default:
break;
}
_glGPUStateMarkDirty();
}
void APIENTRY glTexEnvf(GLenum target, GLenum pname, GLfloat param) {
@ -1215,12 +1219,15 @@ void _glAllocateSpaceForMipmaps(TextureObject* active) {
GLuint size = active->baseDataSize;
/* Copy the data out of the pvr and back to ram */
GLubyte* temp = (GLubyte*) malloc(size);
memcpy(temp, active->data, size);
GLubyte* temp = NULL;
if(active->data) {
temp = (GLubyte*) malloc(size);
memcpy(temp, active->data, size);
/* Free the PVR data */
yalloc_free(YALLOC_BASE, active->data);
active->data = NULL;
/* Free the PVR data */
yalloc_free(YALLOC_BASE, active->data);
active->data = NULL;
}
/* Figure out how much room to allocate for mipmaps */
GLuint bytes = _glGetMipmapDataSize(active);
@ -1228,17 +1235,15 @@ void _glAllocateSpaceForMipmaps(TextureObject* active) {
active->data = yalloc_alloc_and_defrag(bytes);
gl_assert(active->data);
if(!active->data) {
if(temp) {
/* If there was existing data, then copy it where it should go */
memcpy(_glGetMipmapLocation(active, 0), temp, size);
/* We no longer need this */
free(temp);
return;
}
/* If there was existing data, then copy it where it should go */
memcpy(_glGetMipmapLocation(active, 0), temp, size);
/* We no longer need this */
free(temp);
/* Set the data offset depending on whether or not this is a
* paletted texure */
active->baseDataOffset = _glGetMipmapDataOffset(active, 0);
@ -1593,6 +1598,7 @@ void APIENTRY glTexParameteri(GLenum target, GLenum pname, GLint param) {
break;
case GL_TEXTURE_WRAP_S:
switch(param) {
case GL_CLAMP_TO_EDGE:
case GL_CLAMP:
active->uv_clamp |= CLAMP_U;
break;
@ -1606,6 +1612,7 @@ void APIENTRY glTexParameteri(GLenum target, GLenum pname, GLint param) {
case GL_TEXTURE_WRAP_T:
switch(param) {
case GL_CLAMP_TO_EDGE:
case GL_CLAMP:
active->uv_clamp |= CLAMP_V;
break;
@ -1623,6 +1630,8 @@ void APIENTRY glTexParameteri(GLenum target, GLenum pname, GLint param) {
break;
}
}
_glGPUStateMarkDirty();
}
void APIENTRY glTexParameterf(GLenum target, GLenum pname, GLfloat param) {
@ -1780,6 +1789,8 @@ GLAPI void APIENTRY glColorTableEXT(GLenum target, GLenum internalFormat, GLsize
}
_glApplyColorTable(palette);
_glGPUStateMarkDirty();
}
GLAPI void APIENTRY glColorSubTableEXT(GLenum target, GLsizei start, GLsizei count, GLenum format, GLenum type, const GLvoid *data) {

View File

@ -13,4 +13,4 @@ typedef struct {
* but we're not using that for now, so having W here makes the code
* simpler */
float w;
} Vertex;
} __attribute__ ((aligned (32))) Vertex;

View File

@ -3,6 +3,7 @@
#include <assert.h>
#include <stdlib.h>
#include <string.h>
#include <stdint.h>
#ifdef __cplusplus
extern "C" {
@ -17,22 +18,6 @@ static inline void* memalign(size_t alignment, size_t size) {
#include <malloc.h>
#endif
#ifdef __DREAMCAST__
#include <kos/string.h>
#define AV_MEMCPY4 memcpy4
#else
#define AV_MEMCPY4 memcpy
#endif
typedef struct {
unsigned int size;
unsigned int capacity;
unsigned char* data;
unsigned int element_size;
} AlignedVector;
#define ALIGNED_VECTOR_CHUNK_SIZE 256u
#ifdef __cplusplus
#define AV_FORCE_INLINE static inline
#else
@ -41,6 +26,54 @@ typedef struct {
#define AV_FORCE_INLINE static AV_INLINE_DEBUG
#endif
#ifdef __DREAMCAST__
#include <kos/string.h>
AV_FORCE_INLINE void *AV_MEMCPY4(void *dest, const void *src, size_t len)
{
if(!len)
{
return dest;
}
const uint8_t *s = (uint8_t *)src;
uint8_t *d = (uint8_t *)dest;
uint32_t diff = (uint32_t)d - (uint32_t)(s + 1); // extra offset because input gets incremented before output is calculated
// Underflow would be like adding a negative offset
// Can use 'd' as a scratch reg now
asm volatile (
"clrs\n" // Align for parallelism (CO) - SH4a use "stc SR, Rn" instead with a dummy Rn
".align 2\n"
"0:\n\t"
"dt %[size]\n\t" // (--len) ? 0 -> T : 1 -> T (EX 1)
"mov.b @%[in]+, %[scratch]\n\t" // scratch = *(s++) (LS 1/2)
"bf.s 0b\n\t" // while(s != nexts) aka while(!T) (BR 1/2)
" mov.b %[scratch], @(%[offset], %[in])\n" // *(datatype_of_s*) ((char*)s + diff) = scratch, where src + diff = dest (LS 1)
: [in] "+&r" ((uint32_t)s), [scratch] "=&r" ((uint32_t)d), [size] "+&r" (len) // outputs
: [offset] "z" (diff) // inputs
: "t", "memory" // clobbers
);
return dest;
}
#else
#define AV_MEMCPY4 memcpy
#endif
typedef struct {
uint8_t* __attribute__((aligned(32))) data;
uint32_t size;
uint32_t capacity;
uint32_t element_size;
} AlignedVector;
#define ALIGNED_VECTOR_CHUNK_SIZE 256u
#define ROUND_TO_CHUNK_SIZE(v) \
((((v) + ALIGNED_VECTOR_CHUNK_SIZE - 1) / ALIGNED_VECTOR_CHUNK_SIZE) * ALIGNED_VECTOR_CHUNK_SIZE)

View File

@ -665,6 +665,7 @@ GLAPI void APIENTRY glFrustum(GLfloat left, GLfloat right,
/* Fog Functions - client must enable GL_FOG for this to take effect */
GLAPI void APIENTRY glFogi(GLenum pname, GLint param);
GLAPI void APIENTRY glFogf(GLenum pname, GLfloat param);
GLAPI void APIENTRY glFogiv(GLenum pname, const GLint* params);
GLAPI void APIENTRY glFogfv(GLenum pname, const GLfloat *params);
/* Lighting Functions - client must enable GL_LIGHTING for this to take effect */

View File

@ -100,7 +100,7 @@ void do_frame() {
glKosSwapBuffers();
}
time_t start;
time_t begin;
void switch_tests(int ppf) {
printf("Beginning new test: %d polys per frame (%d per second at 60fps)\n",
ppf * 3, ppf * 3 * 60);
@ -113,8 +113,8 @@ void check_switch() {
now = time(NULL);
if(now >= (start + 5)) {
start = time(NULL);
if(now >= (begin + 5)) {
begin = time(NULL);
printf(" Average Frame Rate: ~%f fps (%d pps)\n", avgfps, (int)(polycnt * avgfps * 2));
switch(phase) {
@ -165,7 +165,7 @@ int main(int argc, char **argv) {
/* Start off with something obscene */
switch_tests(200000 / 60);
start = time(NULL);
begin = time(NULL);
for(;;) {
if(check_start())

View File

@ -112,7 +112,7 @@ void do_frame() {
glKosSwapBuffers();
}
time_t start;
time_t begin;
void switch_tests(int ppf) {
printf("Beginning new test: %d polys per frame (%d per second at 60fps)\n",
ppf * 2, ppf * 2 * 60);
@ -125,8 +125,8 @@ void check_switch() {
now = time(NULL);
if(now >= (start + 5)) {
start = time(NULL);
if(now >= (begin + 5)) {
begin = time(NULL);
printf(" Average Frame Rate: ~%f fps (%d pps)\n", avgfps, (int)(polycnt * avgfps * 2));
switch(phase) {
@ -184,7 +184,7 @@ int main(int argc, char **argv) {
/* Start off with something obscene */
switch_tests(200000 / 60);
start = time(NULL);
begin = time(NULL);
uint32_t iterations = 2000;

View File

@ -93,7 +93,7 @@ void do_frame() {
glKosSwapBuffers();
}
time_t start;
time_t begin;
void switch_tests(int ppf) {
printf("Beginning new test: %d polys per frame (%d per second at 60fps)\n",
ppf * 2, ppf * 2 * 60);
@ -106,8 +106,8 @@ void check_switch() {
now = time(NULL);
if(now >= (start + 5)) {
start = time(NULL);
if(now >= (begin + 5)) {
begin = time(NULL);
printf(" Average Frame Rate: ~%f fps (%d pps)\n", avgfps, (int)(polycnt * avgfps * 2));
switch(phase) {
@ -155,7 +155,7 @@ int main(int argc, char **argv) {
/* Start off with something obscene */
switch_tests(220000 / 60);
start = time(NULL);
begin = time(NULL);
for(;;) {
if(check_start())

View File

@ -49,7 +49,7 @@ ENDIF()
add_link_options(-L$ENV{KOS_BASE}/lib/dreamcast)
link_libraries(-Wl,--start-group -lstdc++ -lkallisti -lc -lgcc -Wl,--end-group m)
SET(CMAKE_EXECUTABLE_SUFFIX ".elf")
SET(CMAKE_EXECUTABLE_SUFFIX_C ".elf")
SET(CMAKE_EXECUTABLE_SUFFIX_CXX ".elf")
ADD_DEFINITIONS(