fixes: rollup of minor stuff
This commit is contained in:
parent
7aabea010d
commit
c222c23ae3
|
@ -91,9 +91,6 @@ void _glClipTriangle(const Triangle* triangle, const uint8_t visible, Submission
|
||||||
const Vertex* vertices = triangle->vertex;
|
const Vertex* vertices = triangle->vertex;
|
||||||
const VertexExtra* extras = triangle->extra;
|
const VertexExtra* extras = triangle->extra;
|
||||||
|
|
||||||
/* Used when flat shading is enabled */
|
|
||||||
uint32_t finalColour = *((uint32_t*) vertices[2].bgra);
|
|
||||||
|
|
||||||
for(i = 0; i < 4; ++i) {
|
for(i = 0; i < 4; ++i) {
|
||||||
uint8_t thisIndex = (i == 3) ? 0 : i;
|
uint8_t thisIndex = (i == 3) ? 0 : i;
|
||||||
|
|
||||||
|
@ -123,7 +120,7 @@ void _glClipTriangle(const Triangle* triangle, const uint8_t visible, Submission
|
||||||
interpolateVec2(ve1->st, ve2->st, t, veNext.st);
|
interpolateVec2(ve1->st, ve2->st, t, veNext.st);
|
||||||
|
|
||||||
if(flatShade) {
|
if(flatShade) {
|
||||||
*((uint32_t*) next.bgra) = finalColour;
|
*((uint32_t*) next.bgra) = *((uint32_t*) vertices[2].bgra);
|
||||||
} else {
|
} else {
|
||||||
interpolateColour(v1->bgra, v2->bgra, t, next.bgra);
|
interpolateColour(v1->bgra, v2->bgra, t, next.bgra);
|
||||||
}
|
}
|
||||||
|
|
49
GL/draw.c
49
GL/draw.c
|
@ -798,31 +798,34 @@ static void generate(SubmissionTarget* target, const GLenum mode, const GLsizei
|
||||||
|
|
||||||
sq_cpy(start, VERTEX_POINTER.ptr - ( sizeof(unsigned int) * 1 ), count * sizeof(Vertex) );
|
sq_cpy(start, VERTEX_POINTER.ptr - ( sizeof(unsigned int) * 1 ), count * sizeof(Vertex) );
|
||||||
|
|
||||||
ITERATE(count) {
|
if(start->flags == 0){
|
||||||
it->flags = PVR_CMD_VERTEX;
|
|
||||||
++it;
|
|
||||||
}
|
|
||||||
|
|
||||||
profiler_checkpoint("flags");
|
ITERATE(count) {
|
||||||
|
it->flags = PVR_CMD_VERTEX;
|
||||||
|
++it;
|
||||||
|
}
|
||||||
|
|
||||||
// Drawing arrays
|
profiler_checkpoint("flags");
|
||||||
switch(mode) {
|
|
||||||
case GL_TRIANGLES:
|
// Drawing arrays
|
||||||
genTriangles(start, count);
|
switch(mode) {
|
||||||
break;
|
case GL_TRIANGLES:
|
||||||
case GL_QUADS:
|
genTriangles(start, count);
|
||||||
genQuads(start, count);
|
break;
|
||||||
break;
|
case GL_QUADS:
|
||||||
case GL_TRIANGLE_FAN:
|
genQuads(start, count);
|
||||||
genTriangleFan(start, count);
|
break;
|
||||||
break;
|
case GL_TRIANGLE_FAN:
|
||||||
case GL_TRIANGLE_STRIP:
|
genTriangleFan(start, count);
|
||||||
genTriangleStrip(_glSubmissionTargetStart(target), count);
|
break;
|
||||||
break;
|
case GL_TRIANGLE_STRIP:
|
||||||
default:
|
genTriangleStrip(_glSubmissionTargetStart(target), count);
|
||||||
printf("mode: 0x%08x\n", mode);
|
break;
|
||||||
fflush(stdout);
|
default:
|
||||||
assert(0 && "Not Implemented");
|
printf("mode: 0x%08x\n", mode);
|
||||||
|
fflush(stdout);
|
||||||
|
assert(0 && "Not Implemented");
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
profiler_checkpoint("quads");
|
profiler_checkpoint("quads");
|
||||||
|
|
|
@ -13,7 +13,7 @@
|
||||||
#define DEG2RAD (0.01745329251994329576923690768489)
|
#define DEG2RAD (0.01745329251994329576923690768489)
|
||||||
|
|
||||||
/* Viewport mapping */
|
/* Viewport mapping */
|
||||||
static GLfloat gl_viewport_scale[3], gl_viewport_offset[3];
|
//static GLfloat gl_viewport_scale[3], gl_viewport_offset[3];
|
||||||
|
|
||||||
/* Depth range */
|
/* Depth range */
|
||||||
GLfloat DEPTH_RANGE_MULTIPLIER_L = (1 - 0) / 2;
|
GLfloat DEPTH_RANGE_MULTIPLIER_L = (1 - 0) / 2;
|
||||||
|
@ -78,8 +78,8 @@ void _glInitMatrices() {
|
||||||
stack_push(&MATRIX_STACKS[1], IDENTITY);
|
stack_push(&MATRIX_STACKS[1], IDENTITY);
|
||||||
stack_push(&MATRIX_STACKS[2], IDENTITY);
|
stack_push(&MATRIX_STACKS[2], IDENTITY);
|
||||||
|
|
||||||
memcpy(NORMAL_MATRIX, IDENTITY, sizeof(Matrix4x4));
|
sq_cpy(NORMAL_MATRIX, IDENTITY, sizeof(Matrix4x4));
|
||||||
memcpy(SCREENVIEW_MATRIX, IDENTITY, sizeof(Matrix4x4));
|
sq_cpy(SCREENVIEW_MATRIX, IDENTITY, sizeof(Matrix4x4));
|
||||||
|
|
||||||
glDepthRange(0.0f, 1.0f);
|
glDepthRange(0.0f, 1.0f);
|
||||||
glViewport(0, 0, vid_mode->width, vid_mode->height);
|
glViewport(0, 0, vid_mode->width, vid_mode->height);
|
||||||
|
@ -123,7 +123,7 @@ static void transpose(GLfloat* m) {
|
||||||
}
|
}
|
||||||
|
|
||||||
static void recalculateNormalMatrix() {
|
static void recalculateNormalMatrix() {
|
||||||
memcpy(NORMAL_MATRIX, stack_top(MATRIX_STACKS + (GL_MODELVIEW & 0xF)), sizeof(Matrix4x4));
|
sq_cpy(NORMAL_MATRIX, stack_top(MATRIX_STACKS + (GL_MODELVIEW & 0xF)), sizeof(Matrix4x4));
|
||||||
inverse((GLfloat*) NORMAL_MATRIX);
|
inverse((GLfloat*) NORMAL_MATRIX);
|
||||||
transpose((GLfloat*) NORMAL_MATRIX);
|
transpose((GLfloat*) NORMAL_MATRIX);
|
||||||
}
|
}
|
||||||
|
|
62
GL/private.h
62
GL/private.h
|
@ -7,6 +7,8 @@
|
||||||
#include "../containers/aligned_vector.h"
|
#include "../containers/aligned_vector.h"
|
||||||
#include "../containers/named_array.h"
|
#include "../containers/named_array.h"
|
||||||
|
|
||||||
|
extern void* memcpy4 (void *dest, const void *src, size_t count);
|
||||||
|
|
||||||
#define TRACE_ENABLED 0
|
#define TRACE_ENABLED 0
|
||||||
#define TRACE() if(TRACE_ENABLED) {fprintf(stderr, "%s\n", __func__);}
|
#define TRACE() if(TRACE_ENABLED) {fprintf(stderr, "%s\n", __func__);}
|
||||||
|
|
||||||
|
@ -158,64 +160,14 @@ typedef struct {
|
||||||
float w;
|
float w;
|
||||||
} Vertex;
|
} Vertex;
|
||||||
|
|
||||||
/* FIXME: SH4 has a swap.w instruction, we should leverage it here! */
|
#define swapVertex(a, b) \
|
||||||
#define _SWAP32(x, y) \
|
|
||||||
do { \
|
do { \
|
||||||
uint32_t t = *((uint32_t*) &x); \
|
Vertex temp __attribute__((aligned(32))); \
|
||||||
*((uint32_t*) &x) = *((uint32_t*) &y); \
|
memcpy4(&temp, &b, 32); \
|
||||||
*((uint32_t*) &y) = t; \
|
memcpy4(&b, &a, 32); \
|
||||||
|
memcpy4(&b, &temp, 32); \
|
||||||
} while(0)
|
} while(0)
|
||||||
|
|
||||||
/*
|
|
||||||
*((uint32_t*) &x) = *((uint32_t*) &x) ^ *((uint32_t*) &y); \
|
|
||||||
*((uint32_t*) &y) = *((uint32_t*) &x) ^ *((uint32_t*) &y); \
|
|
||||||
*((uint32_t*) &x) = *((uint32_t*) &x) ^ *((uint32_t*) &y); */
|
|
||||||
|
|
||||||
|
|
||||||
#define swapVertex(a, b) \
|
|
||||||
do { \
|
|
||||||
_SWAP32(a->flags, b->flags); \
|
|
||||||
_SWAP32(a->xyz[0], b->xyz[0]); \
|
|
||||||
_SWAP32(a->xyz[1], b->xyz[1]); \
|
|
||||||
_SWAP32(a->xyz[2], b->xyz[2]); \
|
|
||||||
_SWAP32(a->uv[0], b->uv[0]); \
|
|
||||||
_SWAP32(a->uv[1], b->uv[1]); \
|
|
||||||
_SWAP32(a->bgra, b->bgra); \
|
|
||||||
_SWAP32(a->w, b->w); \
|
|
||||||
} while(0)
|
|
||||||
|
|
||||||
#if 0
|
|
||||||
/* FIXME: SH4 has a swap.w instruction, we should leverage it here! */
|
|
||||||
inline void _SWAP32( void* x, void* y)
|
|
||||||
{
|
|
||||||
#pragma GCC diagnostic push
|
|
||||||
#pragma GCC diagnostic ignored "-Wstrict-aliasing"
|
|
||||||
*((uint32_t*) &x) = *((uint32_t*) &x) ^ *((uint32_t*) &y);
|
|
||||||
*((uint32_t*) &y) = *((uint32_t*) &x) ^ *((uint32_t*) &y);
|
|
||||||
*((uint32_t*) &x) = *((uint32_t*) &x) ^ *((uint32_t*) &y);
|
|
||||||
#pragma GCC diagnostic pop
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
*((uint32_t*) &x) = *((uint32_t*) &x) ^ *((uint32_t*) &y); \
|
|
||||||
*((uint32_t*) &y) = *((uint32_t*) &x) ^ *((uint32_t*) &y); \
|
|
||||||
*((uint32_t*) &x) = *((uint32_t*) &x) ^ *((uint32_t*) &y); */
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
#define swapVertex(a, b) \
|
|
||||||
{ \
|
|
||||||
_SWAP32(&a->flags, &b->flags); \
|
|
||||||
_SWAP32(&a->xyz[0], &b->xyz[0]); \
|
|
||||||
_SWAP32(&a->xyz[1], &b->xyz[1]); \
|
|
||||||
_SWAP32(&a->xyz[2], &b->xyz[2]); \
|
|
||||||
_SWAP32(&a->uv[0], &b->uv[0]); \
|
|
||||||
_SWAP32(&a->uv[1], &b->uv[1]); \
|
|
||||||
_SWAP32(&a->bgra, &b->bgra); \
|
|
||||||
_SWAP32(&a->w, &b->w); \
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
/* ClipVertex doesn't have room for these, so we need to parse them
|
/* ClipVertex doesn't have room for these, so we need to parse them
|
||||||
* out separately. Potentially 'w' will be housed here if we support oargb */
|
* out separately. Potentially 'w' will be housed here if we support oargb */
|
||||||
typedef struct {
|
typedef struct {
|
||||||
|
|
10
GL/texture.c
10
GL/texture.c
|
@ -917,12 +917,12 @@ void _glAllocateSpaceForMipmaps(TextureObject* active) {
|
||||||
* then free the original
|
* then free the original
|
||||||
*/
|
*/
|
||||||
GLubyte* temp = malloc(size);
|
GLubyte* temp = malloc(size);
|
||||||
memcpy(temp, active->data, size);
|
memcpy4(temp, active->data, size);
|
||||||
pvr_mem_free(active->data);
|
pvr_mem_free(active->data);
|
||||||
active->data = pvr_mem_malloc(_glGetMipmapDataSize(active));
|
active->data = pvr_mem_malloc(_glGetMipmapDataSize(active));
|
||||||
|
|
||||||
/* If there was existing data, then copy it where it should go */
|
/* If there was existing data, then copy it where it should go */
|
||||||
memcpy(_glGetMipmapLocation(active,0), temp, size);
|
memcpy4(_glGetMipmapLocation(active,0), temp, size);
|
||||||
free(temp);
|
free(temp);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1122,11 +1122,7 @@ void APIENTRY glTexImage2D(GLenum target, GLint level, GLint internalFormat,
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
/* No conversion? Just copy the data, and the pvr_format is correct */
|
/* No conversion? Just copy the data, and the pvr_format is correct */
|
||||||
if(bytes % 32 == 0){
|
FASTCPY(targetData, data, bytes);
|
||||||
sq_cpy(targetData, data, bytes);
|
|
||||||
} else {
|
|
||||||
memcpy(targetData, data, bytes);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return;
|
return;
|
||||||
|
|
2
Makefile
2
Makefile
|
@ -13,7 +13,7 @@ OBJS += containers/stack.o containers/named_array.o containers/aligned_vector.o
|
||||||
|
|
||||||
SUBDIRS =
|
SUBDIRS =
|
||||||
|
|
||||||
EXTRA_CFLAGS= -Wall -Wextra -Wstrict-aliasing=0
|
EXTRA_CFLAGS= -Wall -Wextra
|
||||||
KOS_CFLAGS += -ffast-math -O2 -funroll-loops -fsingle-precision-constant -Iinclude -funsafe-math-optimizations -DBUILD_LIBGL $(EXTRA_CFLAGS)
|
KOS_CFLAGS += -ffast-math -O2 -funroll-loops -fsingle-precision-constant -Iinclude -funsafe-math-optimizations -DBUILD_LIBGL $(EXTRA_CFLAGS)
|
||||||
#KOS_CFLAGS += -O1 -mlra -Iinclude -DBUILD_LIBGL -Wall -Wextra
|
#KOS_CFLAGS += -O1 -mlra -Iinclude -DBUILD_LIBGL -Wall -Wextra
|
||||||
#GCC5_FLAGS = -mfsca -mfsrra -mlra
|
#GCC5_FLAGS = -mfsca -mfsrra -mlra
|
||||||
|
|
|
@ -54,7 +54,10 @@ void aligned_vector_reserve(AlignedVector* vector, unsigned int element_count) {
|
||||||
assert(vector->data);
|
assert(vector->data);
|
||||||
|
|
||||||
if(original_data) {
|
if(original_data) {
|
||||||
memcpy(vector->data, original_data, original_byte_size);
|
if(vector->element_size == 32){
|
||||||
|
sq_cpy(vector->data, original_data, original_byte_size);
|
||||||
|
} else
|
||||||
|
memcpy4(vector->data, original_data, original_byte_size);
|
||||||
free(original_data);
|
free(original_data);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -74,7 +77,10 @@ void* aligned_vector_push_back(AlignedVector* vector, const void* objs, unsigned
|
||||||
unsigned char* dest = vector->data + (vector->element_size * initial_size);
|
unsigned char* dest = vector->data + (vector->element_size * initial_size);
|
||||||
|
|
||||||
/* Copy the objects in */
|
/* Copy the objects in */
|
||||||
memcpy(dest, objs, vector->element_size * count);
|
if(vector->element_size == 32){
|
||||||
|
sq_cpy(dest, objs, vector->element_size * count);
|
||||||
|
} else
|
||||||
|
memcpy4(dest, objs, vector->element_size * count);
|
||||||
|
|
||||||
return dest;
|
return dest;
|
||||||
}
|
}
|
||||||
|
@ -139,7 +145,10 @@ void aligned_vector_shrink_to_fit(AlignedVector* vector) {
|
||||||
vector->data = (unsigned char*) memalign(0x20, new_byte_size);
|
vector->data = (unsigned char*) memalign(0x20, new_byte_size);
|
||||||
|
|
||||||
if(original_data) {
|
if(original_data) {
|
||||||
memcpy(vector->data, original_data, new_byte_size);
|
if(vector->element_size == 32){
|
||||||
|
sq_cpy(vector->data, original_data, new_byte_size);
|
||||||
|
} else
|
||||||
|
memcpy4(vector->data, original_data, new_byte_size);
|
||||||
free(original_data);
|
free(original_data);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue
Block a user