fixes: rollup of minor stuff

This commit is contained in:
Hayden Kowalchuk 2019-10-04 21:32:54 -04:00
parent 7aabea010d
commit c222c23ae3
7 changed files with 54 additions and 97 deletions

View File

@ -91,9 +91,6 @@ void _glClipTriangle(const Triangle* triangle, const uint8_t visible, Submission
const Vertex* vertices = triangle->vertex; const Vertex* vertices = triangle->vertex;
const VertexExtra* extras = triangle->extra; const VertexExtra* extras = triangle->extra;
/* Used when flat shading is enabled */
uint32_t finalColour = *((uint32_t*) vertices[2].bgra);
for(i = 0; i < 4; ++i) { for(i = 0; i < 4; ++i) {
uint8_t thisIndex = (i == 3) ? 0 : i; uint8_t thisIndex = (i == 3) ? 0 : i;
@ -123,7 +120,7 @@ void _glClipTriangle(const Triangle* triangle, const uint8_t visible, Submission
interpolateVec2(ve1->st, ve2->st, t, veNext.st); interpolateVec2(ve1->st, ve2->st, t, veNext.st);
if(flatShade) { if(flatShade) {
*((uint32_t*) next.bgra) = finalColour; *((uint32_t*) next.bgra) = *((uint32_t*) vertices[2].bgra);
} else { } else {
interpolateColour(v1->bgra, v2->bgra, t, next.bgra); interpolateColour(v1->bgra, v2->bgra, t, next.bgra);
} }

View File

@ -798,31 +798,34 @@ static void generate(SubmissionTarget* target, const GLenum mode, const GLsizei
sq_cpy(start, VERTEX_POINTER.ptr - ( sizeof(unsigned int) * 1 ), count * sizeof(Vertex) ); sq_cpy(start, VERTEX_POINTER.ptr - ( sizeof(unsigned int) * 1 ), count * sizeof(Vertex) );
ITERATE(count) { if(start->flags == 0){
it->flags = PVR_CMD_VERTEX;
++it;
}
profiler_checkpoint("flags"); ITERATE(count) {
it->flags = PVR_CMD_VERTEX;
++it;
}
// Drawing arrays profiler_checkpoint("flags");
switch(mode) {
case GL_TRIANGLES: // Drawing arrays
genTriangles(start, count); switch(mode) {
break; case GL_TRIANGLES:
case GL_QUADS: genTriangles(start, count);
genQuads(start, count); break;
break; case GL_QUADS:
case GL_TRIANGLE_FAN: genQuads(start, count);
genTriangleFan(start, count); break;
break; case GL_TRIANGLE_FAN:
case GL_TRIANGLE_STRIP: genTriangleFan(start, count);
genTriangleStrip(_glSubmissionTargetStart(target), count); break;
break; case GL_TRIANGLE_STRIP:
default: genTriangleStrip(_glSubmissionTargetStart(target), count);
printf("mode: 0x%08x\n", mode); break;
fflush(stdout); default:
assert(0 && "Not Implemented"); printf("mode: 0x%08x\n", mode);
fflush(stdout);
assert(0 && "Not Implemented");
}
} }
profiler_checkpoint("quads"); profiler_checkpoint("quads");

View File

@ -13,7 +13,7 @@
#define DEG2RAD (0.01745329251994329576923690768489) #define DEG2RAD (0.01745329251994329576923690768489)
/* Viewport mapping */ /* Viewport mapping */
static GLfloat gl_viewport_scale[3], gl_viewport_offset[3]; //static GLfloat gl_viewport_scale[3], gl_viewport_offset[3];
/* Depth range */ /* Depth range */
GLfloat DEPTH_RANGE_MULTIPLIER_L = (1 - 0) / 2; GLfloat DEPTH_RANGE_MULTIPLIER_L = (1 - 0) / 2;
@ -78,8 +78,8 @@ void _glInitMatrices() {
stack_push(&MATRIX_STACKS[1], IDENTITY); stack_push(&MATRIX_STACKS[1], IDENTITY);
stack_push(&MATRIX_STACKS[2], IDENTITY); stack_push(&MATRIX_STACKS[2], IDENTITY);
memcpy(NORMAL_MATRIX, IDENTITY, sizeof(Matrix4x4)); sq_cpy(NORMAL_MATRIX, IDENTITY, sizeof(Matrix4x4));
memcpy(SCREENVIEW_MATRIX, IDENTITY, sizeof(Matrix4x4)); sq_cpy(SCREENVIEW_MATRIX, IDENTITY, sizeof(Matrix4x4));
glDepthRange(0.0f, 1.0f); glDepthRange(0.0f, 1.0f);
glViewport(0, 0, vid_mode->width, vid_mode->height); glViewport(0, 0, vid_mode->width, vid_mode->height);
@ -123,7 +123,7 @@ static void transpose(GLfloat* m) {
} }
static void recalculateNormalMatrix() { static void recalculateNormalMatrix() {
memcpy(NORMAL_MATRIX, stack_top(MATRIX_STACKS + (GL_MODELVIEW & 0xF)), sizeof(Matrix4x4)); sq_cpy(NORMAL_MATRIX, stack_top(MATRIX_STACKS + (GL_MODELVIEW & 0xF)), sizeof(Matrix4x4));
inverse((GLfloat*) NORMAL_MATRIX); inverse((GLfloat*) NORMAL_MATRIX);
transpose((GLfloat*) NORMAL_MATRIX); transpose((GLfloat*) NORMAL_MATRIX);
} }

View File

@ -7,6 +7,8 @@
#include "../containers/aligned_vector.h" #include "../containers/aligned_vector.h"
#include "../containers/named_array.h" #include "../containers/named_array.h"
extern void* memcpy4 (void *dest, const void *src, size_t count);
#define TRACE_ENABLED 0 #define TRACE_ENABLED 0
#define TRACE() if(TRACE_ENABLED) {fprintf(stderr, "%s\n", __func__);} #define TRACE() if(TRACE_ENABLED) {fprintf(stderr, "%s\n", __func__);}
@ -158,64 +160,14 @@ typedef struct {
float w; float w;
} Vertex; } Vertex;
/* FIXME: SH4 has a swap.w instruction, we should leverage it here! */ #define swapVertex(a, b) \
#define _SWAP32(x, y) \
do { \ do { \
uint32_t t = *((uint32_t*) &x); \ Vertex temp __attribute__((aligned(32))); \
*((uint32_t*) &x) = *((uint32_t*) &y); \ memcpy4(&temp, &b, 32); \
*((uint32_t*) &y) = t; \ memcpy4(&b, &a, 32); \
memcpy4(&b, &temp, 32); \
} while(0) } while(0)
/*
*((uint32_t*) &x) = *((uint32_t*) &x) ^ *((uint32_t*) &y); \
*((uint32_t*) &y) = *((uint32_t*) &x) ^ *((uint32_t*) &y); \
*((uint32_t*) &x) = *((uint32_t*) &x) ^ *((uint32_t*) &y); */
#define swapVertex(a, b) \
do { \
_SWAP32(a->flags, b->flags); \
_SWAP32(a->xyz[0], b->xyz[0]); \
_SWAP32(a->xyz[1], b->xyz[1]); \
_SWAP32(a->xyz[2], b->xyz[2]); \
_SWAP32(a->uv[0], b->uv[0]); \
_SWAP32(a->uv[1], b->uv[1]); \
_SWAP32(a->bgra, b->bgra); \
_SWAP32(a->w, b->w); \
} while(0)
#if 0
/* FIXME: SH4 has a swap.w instruction, we should leverage it here! */
inline void _SWAP32( void* x, void* y)
{
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wstrict-aliasing"
*((uint32_t*) &x) = *((uint32_t*) &x) ^ *((uint32_t*) &y);
*((uint32_t*) &y) = *((uint32_t*) &x) ^ *((uint32_t*) &y);
*((uint32_t*) &x) = *((uint32_t*) &x) ^ *((uint32_t*) &y);
#pragma GCC diagnostic pop
}
/*
*((uint32_t*) &x) = *((uint32_t*) &x) ^ *((uint32_t*) &y); \
*((uint32_t*) &y) = *((uint32_t*) &x) ^ *((uint32_t*) &y); \
*((uint32_t*) &x) = *((uint32_t*) &x) ^ *((uint32_t*) &y); */
#define swapVertex(a, b) \
{ \
_SWAP32(&a->flags, &b->flags); \
_SWAP32(&a->xyz[0], &b->xyz[0]); \
_SWAP32(&a->xyz[1], &b->xyz[1]); \
_SWAP32(&a->xyz[2], &b->xyz[2]); \
_SWAP32(&a->uv[0], &b->uv[0]); \
_SWAP32(&a->uv[1], &b->uv[1]); \
_SWAP32(&a->bgra, &b->bgra); \
_SWAP32(&a->w, &b->w); \
}
#endif
/* ClipVertex doesn't have room for these, so we need to parse them /* ClipVertex doesn't have room for these, so we need to parse them
* out separately. Potentially 'w' will be housed here if we support oargb */ * out separately. Potentially 'w' will be housed here if we support oargb */
typedef struct { typedef struct {

View File

@ -917,12 +917,12 @@ void _glAllocateSpaceForMipmaps(TextureObject* active) {
* then free the original * then free the original
*/ */
GLubyte* temp = malloc(size); GLubyte* temp = malloc(size);
memcpy(temp, active->data, size); memcpy4(temp, active->data, size);
pvr_mem_free(active->data); pvr_mem_free(active->data);
active->data = pvr_mem_malloc(_glGetMipmapDataSize(active)); active->data = pvr_mem_malloc(_glGetMipmapDataSize(active));
/* If there was existing data, then copy it where it should go */ /* If there was existing data, then copy it where it should go */
memcpy(_glGetMipmapLocation(active,0), temp, size); memcpy4(_glGetMipmapLocation(active,0), temp, size);
free(temp); free(temp);
} }
@ -1122,11 +1122,7 @@ void APIENTRY glTexImage2D(GLenum target, GLint level, GLint internalFormat,
} }
} else { } else {
/* No conversion? Just copy the data, and the pvr_format is correct */ /* No conversion? Just copy the data, and the pvr_format is correct */
if(bytes % 32 == 0){ FASTCPY(targetData, data, bytes);
sq_cpy(targetData, data, bytes);
} else {
memcpy(targetData, data, bytes);
}
} }
return; return;

View File

@ -13,7 +13,7 @@ OBJS += containers/stack.o containers/named_array.o containers/aligned_vector.o
SUBDIRS = SUBDIRS =
EXTRA_CFLAGS= -Wall -Wextra -Wstrict-aliasing=0 EXTRA_CFLAGS= -Wall -Wextra
KOS_CFLAGS += -ffast-math -O2 -funroll-loops -fsingle-precision-constant -Iinclude -funsafe-math-optimizations -DBUILD_LIBGL $(EXTRA_CFLAGS) KOS_CFLAGS += -ffast-math -O2 -funroll-loops -fsingle-precision-constant -Iinclude -funsafe-math-optimizations -DBUILD_LIBGL $(EXTRA_CFLAGS)
#KOS_CFLAGS += -O1 -mlra -Iinclude -DBUILD_LIBGL -Wall -Wextra #KOS_CFLAGS += -O1 -mlra -Iinclude -DBUILD_LIBGL -Wall -Wextra
#GCC5_FLAGS = -mfsca -mfsrra -mlra #GCC5_FLAGS = -mfsca -mfsrra -mlra

View File

@ -54,7 +54,10 @@ void aligned_vector_reserve(AlignedVector* vector, unsigned int element_count) {
assert(vector->data); assert(vector->data);
if(original_data) { if(original_data) {
memcpy(vector->data, original_data, original_byte_size); if(vector->element_size == 32){
sq_cpy(vector->data, original_data, original_byte_size);
} else
memcpy4(vector->data, original_data, original_byte_size);
free(original_data); free(original_data);
} }
@ -74,7 +77,10 @@ void* aligned_vector_push_back(AlignedVector* vector, const void* objs, unsigned
unsigned char* dest = vector->data + (vector->element_size * initial_size); unsigned char* dest = vector->data + (vector->element_size * initial_size);
/* Copy the objects in */ /* Copy the objects in */
memcpy(dest, objs, vector->element_size * count); if(vector->element_size == 32){
sq_cpy(dest, objs, vector->element_size * count);
} else
memcpy4(dest, objs, vector->element_size * count);
return dest; return dest;
} }
@ -139,7 +145,10 @@ void aligned_vector_shrink_to_fit(AlignedVector* vector) {
vector->data = (unsigned char*) memalign(0x20, new_byte_size); vector->data = (unsigned char*) memalign(0x20, new_byte_size);
if(original_data) { if(original_data) {
memcpy(vector->data, original_data, new_byte_size); if(vector->element_size == 32){
sq_cpy(vector->data, original_data, new_byte_size);
} else
memcpy4(vector->data, original_data, new_byte_size);
free(original_data); free(original_data);
} }