Merge branch 'dma_exploration' into 'master'

Fixes rollup

See merge request HaydenKow/GLdc!3
This commit is contained in:
Hayden Kowalchuk 2019-11-20 14:16:47 +00:00
commit 082c381667
7 changed files with 56 additions and 99 deletions

View File

@ -91,9 +91,6 @@ void _glClipTriangle(const Triangle* triangle, const uint8_t visible, Submission
const Vertex* vertices = triangle->vertex;
const VertexExtra* extras = triangle->extra;
/* Used when flat shading is enabled */
uint32_t finalColour = *((uint32_t*) vertices[2].bgra);
for(i = 0; i < 4; ++i) {
uint8_t thisIndex = (i == 3) ? 0 : i;
@ -123,7 +120,7 @@ void _glClipTriangle(const Triangle* triangle, const uint8_t visible, Submission
interpolateVec2(ve1->st, ve2->st, t, veNext.st);
if(flatShade) {
*((uint32_t*) next.bgra) = finalColour;
*((uint32_t*) next.bgra) = *((uint32_t*) vertices[2].bgra);
} else {
interpolateColour(v1->bgra, v2->bgra, t, next.bgra);
}

View File

@ -798,31 +798,34 @@ static void generate(SubmissionTarget* target, const GLenum mode, const GLsizei
sq_cpy(start, VERTEX_POINTER.ptr - ( sizeof(unsigned int) * 1 ), count * sizeof(Vertex) );
ITERATE(count) {
it->flags = PVR_CMD_VERTEX;
++it;
}
if(start->flags == 0){
profiler_checkpoint("flags");
ITERATE(count) {
it->flags = PVR_CMD_VERTEX;
++it;
}
// Drawing arrays
switch(mode) {
case GL_TRIANGLES:
genTriangles(start, count);
break;
case GL_QUADS:
genQuads(start, count);
break;
case GL_TRIANGLE_FAN:
genTriangleFan(start, count);
break;
case GL_TRIANGLE_STRIP:
genTriangleStrip(_glSubmissionTargetStart(target), count);
break;
default:
printf("mode: 0x%08x\n", mode);
fflush(stdout);
assert(0 && "Not Implemented");
profiler_checkpoint("flags");
// Drawing arrays
switch(mode) {
case GL_TRIANGLES:
genTriangles(start, count);
break;
case GL_QUADS:
genQuads(start, count);
break;
case GL_TRIANGLE_FAN:
genTriangleFan(start, count);
break;
case GL_TRIANGLE_STRIP:
genTriangleStrip(_glSubmissionTargetStart(target), count);
break;
default:
printf("mode: 0x%08x\n", mode);
fflush(stdout);
assert(0 && "Not Implemented");
}
}
profiler_checkpoint("quads");

View File

@ -13,7 +13,7 @@
#define DEG2RAD (0.01745329251994329576923690768489)
/* Viewport mapping */
static GLfloat gl_viewport_scale[3], gl_viewport_offset[3];
//static GLfloat gl_viewport_scale[3], gl_viewport_offset[3];
/* Depth range */
GLfloat DEPTH_RANGE_MULTIPLIER_L = (1 - 0) / 2;
@ -78,8 +78,8 @@ void _glInitMatrices() {
stack_push(&MATRIX_STACKS[1], IDENTITY);
stack_push(&MATRIX_STACKS[2], IDENTITY);
memcpy(NORMAL_MATRIX, IDENTITY, sizeof(Matrix4x4));
memcpy(SCREENVIEW_MATRIX, IDENTITY, sizeof(Matrix4x4));
sq_cpy(NORMAL_MATRIX, IDENTITY, sizeof(Matrix4x4));
sq_cpy(SCREENVIEW_MATRIX, IDENTITY, sizeof(Matrix4x4));
glDepthRange(0.0f, 1.0f);
glViewport(0, 0, vid_mode->width, vid_mode->height);
@ -123,7 +123,7 @@ static void transpose(GLfloat* m) {
}
static void recalculateNormalMatrix() {
memcpy(NORMAL_MATRIX, stack_top(MATRIX_STACKS + (GL_MODELVIEW & 0xF)), sizeof(Matrix4x4));
sq_cpy(NORMAL_MATRIX, stack_top(MATRIX_STACKS + (GL_MODELVIEW & 0xF)), sizeof(Matrix4x4));
inverse((GLfloat*) NORMAL_MATRIX);
transpose((GLfloat*) NORMAL_MATRIX);
}

View File

@ -7,6 +7,8 @@
#include "../containers/aligned_vector.h"
#include "../containers/named_array.h"
extern void* memcpy4 (void *dest, const void *src, size_t count);
#define TRACE_ENABLED 0
#define TRACE() if(TRACE_ENABLED) {fprintf(stderr, "%s\n", __func__);}
@ -158,64 +160,14 @@ typedef struct {
float w;
} Vertex;
/* FIXME: SH4 has a swap.w instruction, we should leverage it here! */
#define _SWAP32(x, y) \
#define swapVertex(a, b) \
do { \
uint32_t t = *((uint32_t*) &x); \
*((uint32_t*) &x) = *((uint32_t*) &y); \
*((uint32_t*) &y) = t; \
Vertex temp __attribute__((aligned(32))); \
memcpy4(&temp, &b, 32); \
memcpy4(&b, &a, 32); \
memcpy4(&b, &temp, 32); \
} while(0)
/*
*((uint32_t*) &x) = *((uint32_t*) &x) ^ *((uint32_t*) &y); \
*((uint32_t*) &y) = *((uint32_t*) &x) ^ *((uint32_t*) &y); \
*((uint32_t*) &x) = *((uint32_t*) &x) ^ *((uint32_t*) &y); */
#define swapVertex(a, b) \
do { \
_SWAP32(a->flags, b->flags); \
_SWAP32(a->xyz[0], b->xyz[0]); \
_SWAP32(a->xyz[1], b->xyz[1]); \
_SWAP32(a->xyz[2], b->xyz[2]); \
_SWAP32(a->uv[0], b->uv[0]); \
_SWAP32(a->uv[1], b->uv[1]); \
_SWAP32(a->bgra, b->bgra); \
_SWAP32(a->w, b->w); \
} while(0)
#if 0
/* FIXME: SH4 has a swap.w instruction, we should leverage it here! */
inline void _SWAP32( void* x, void* y)
{
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wstrict-aliasing"
*((uint32_t*) &x) = *((uint32_t*) &x) ^ *((uint32_t*) &y);
*((uint32_t*) &y) = *((uint32_t*) &x) ^ *((uint32_t*) &y);
*((uint32_t*) &x) = *((uint32_t*) &x) ^ *((uint32_t*) &y);
#pragma GCC diagnostic pop
}
/*
*((uint32_t*) &x) = *((uint32_t*) &x) ^ *((uint32_t*) &y); \
*((uint32_t*) &y) = *((uint32_t*) &x) ^ *((uint32_t*) &y); \
*((uint32_t*) &x) = *((uint32_t*) &x) ^ *((uint32_t*) &y); */
#define swapVertex(a, b) \
{ \
_SWAP32(&a->flags, &b->flags); \
_SWAP32(&a->xyz[0], &b->xyz[0]); \
_SWAP32(&a->xyz[1], &b->xyz[1]); \
_SWAP32(&a->xyz[2], &b->xyz[2]); \
_SWAP32(&a->uv[0], &b->uv[0]); \
_SWAP32(&a->uv[1], &b->uv[1]); \
_SWAP32(&a->bgra, &b->bgra); \
_SWAP32(&a->w, &b->w); \
}
#endif
/* ClipVertex doesn't have room for these, so we need to parse them
* out separately. Potentially 'w' will be housed here if we support oargb */
typedef struct {

View File

@ -358,7 +358,7 @@ static void _glInitializeTextureObject(TextureObject* txr, unsigned int id) {
txr->width = txr->height = 0;
txr->mipmap = 0;
txr->uv_clamp = 0;
txr->env = PVR_TXRENV_MODULATE;
txr->env = PVR_TXRENV_MODULATEALPHA;
txr->data = NULL;
txr->mipmapCount = 0;
txr->minFilter = GL_NEAREST;
@ -474,7 +474,7 @@ void APIENTRY glTexEnvi(GLenum target, GLenum pname, GLint param) {
switch(param) {
case GL_MODULATE:
active->env = PVR_TXRENV_MODULATE;
active->env = PVR_TXRENV_MODULATEALPHA;
break;
case GL_DECAL:
active->env = PVR_TXRENV_DECAL;
@ -917,12 +917,12 @@ void _glAllocateSpaceForMipmaps(TextureObject* active) {
* then free the original
*/
GLubyte* temp = malloc(size);
memcpy(temp, active->data, size);
memcpy4(temp, active->data, size);
pvr_mem_free(active->data);
active->data = pvr_mem_malloc(_glGetMipmapDataSize(active));
/* If there was existing data, then copy it where it should go */
memcpy(_glGetMipmapLocation(active,0), temp, size);
memcpy4(_glGetMipmapLocation(active,0), temp, size);
free(temp);
}
@ -1122,11 +1122,7 @@ void APIENTRY glTexImage2D(GLenum target, GLint level, GLint internalFormat,
}
} else {
/* No conversion? Just copy the data, and the pvr_format is correct */
if(bytes % 32 == 0){
sq_cpy(targetData, data, bytes);
} else {
memcpy(targetData, data, bytes);
}
FASTCPY(targetData, data, bytes);
}
return;

View File

@ -13,7 +13,7 @@ OBJS += containers/stack.o containers/named_array.o containers/aligned_vector.o
SUBDIRS =
EXTRA_CFLAGS= -Wall -Wextra -Wstrict-aliasing=0
EXTRA_CFLAGS= -Wall -Wextra
KOS_CFLAGS += -ffast-math -O2 -funroll-loops -fsingle-precision-constant -Iinclude -funsafe-math-optimizations -DBUILD_LIBGL $(EXTRA_CFLAGS)
#KOS_CFLAGS += -O1 -mlra -Iinclude -DBUILD_LIBGL -Wall -Wextra
#GCC5_FLAGS = -mfsca -mfsrra -mlra

View File

@ -54,7 +54,10 @@ void aligned_vector_reserve(AlignedVector* vector, unsigned int element_count) {
assert(vector->data);
if(original_data) {
memcpy(vector->data, original_data, original_byte_size);
if(vector->element_size == 32){
sq_cpy(vector->data, original_data, original_byte_size);
} else
memcpy4(vector->data, original_data, original_byte_size);
free(original_data);
}
@ -74,7 +77,10 @@ void* aligned_vector_push_back(AlignedVector* vector, const void* objs, unsigned
unsigned char* dest = vector->data + (vector->element_size * initial_size);
/* Copy the objects in */
memcpy(dest, objs, vector->element_size * count);
if(vector->element_size == 32){
sq_cpy(dest, objs, vector->element_size * count);
} else
memcpy4(dest, objs, vector->element_size * count);
return dest;
}
@ -139,7 +145,10 @@ void aligned_vector_shrink_to_fit(AlignedVector* vector) {
vector->data = (unsigned char*) memalign(0x20, new_byte_size);
if(original_data) {
memcpy(vector->data, original_data, new_byte_size);
if(vector->element_size == 32){
sq_cpy(vector->data, original_data, new_byte_size);
} else
memcpy4(vector->data, original_data, new_byte_size);
free(original_data);
}