Merge branch 'dma_exploration' into 'master'
Fixes rollup See merge request HaydenKow/GLdc!3
This commit is contained in:
commit
082c381667
|
@ -91,9 +91,6 @@ void _glClipTriangle(const Triangle* triangle, const uint8_t visible, Submission
|
|||
const Vertex* vertices = triangle->vertex;
|
||||
const VertexExtra* extras = triangle->extra;
|
||||
|
||||
/* Used when flat shading is enabled */
|
||||
uint32_t finalColour = *((uint32_t*) vertices[2].bgra);
|
||||
|
||||
for(i = 0; i < 4; ++i) {
|
||||
uint8_t thisIndex = (i == 3) ? 0 : i;
|
||||
|
||||
|
@ -123,7 +120,7 @@ void _glClipTriangle(const Triangle* triangle, const uint8_t visible, Submission
|
|||
interpolateVec2(ve1->st, ve2->st, t, veNext.st);
|
||||
|
||||
if(flatShade) {
|
||||
*((uint32_t*) next.bgra) = finalColour;
|
||||
*((uint32_t*) next.bgra) = *((uint32_t*) vertices[2].bgra);
|
||||
} else {
|
||||
interpolateColour(v1->bgra, v2->bgra, t, next.bgra);
|
||||
}
|
||||
|
|
49
GL/draw.c
49
GL/draw.c
|
@ -798,31 +798,34 @@ static void generate(SubmissionTarget* target, const GLenum mode, const GLsizei
|
|||
|
||||
sq_cpy(start, VERTEX_POINTER.ptr - ( sizeof(unsigned int) * 1 ), count * sizeof(Vertex) );
|
||||
|
||||
ITERATE(count) {
|
||||
it->flags = PVR_CMD_VERTEX;
|
||||
++it;
|
||||
}
|
||||
if(start->flags == 0){
|
||||
|
||||
profiler_checkpoint("flags");
|
||||
ITERATE(count) {
|
||||
it->flags = PVR_CMD_VERTEX;
|
||||
++it;
|
||||
}
|
||||
|
||||
// Drawing arrays
|
||||
switch(mode) {
|
||||
case GL_TRIANGLES:
|
||||
genTriangles(start, count);
|
||||
break;
|
||||
case GL_QUADS:
|
||||
genQuads(start, count);
|
||||
break;
|
||||
case GL_TRIANGLE_FAN:
|
||||
genTriangleFan(start, count);
|
||||
break;
|
||||
case GL_TRIANGLE_STRIP:
|
||||
genTriangleStrip(_glSubmissionTargetStart(target), count);
|
||||
break;
|
||||
default:
|
||||
printf("mode: 0x%08x\n", mode);
|
||||
fflush(stdout);
|
||||
assert(0 && "Not Implemented");
|
||||
profiler_checkpoint("flags");
|
||||
|
||||
// Drawing arrays
|
||||
switch(mode) {
|
||||
case GL_TRIANGLES:
|
||||
genTriangles(start, count);
|
||||
break;
|
||||
case GL_QUADS:
|
||||
genQuads(start, count);
|
||||
break;
|
||||
case GL_TRIANGLE_FAN:
|
||||
genTriangleFan(start, count);
|
||||
break;
|
||||
case GL_TRIANGLE_STRIP:
|
||||
genTriangleStrip(_glSubmissionTargetStart(target), count);
|
||||
break;
|
||||
default:
|
||||
printf("mode: 0x%08x\n", mode);
|
||||
fflush(stdout);
|
||||
assert(0 && "Not Implemented");
|
||||
}
|
||||
}
|
||||
|
||||
profiler_checkpoint("quads");
|
||||
|
|
|
@ -13,7 +13,7 @@
|
|||
#define DEG2RAD (0.01745329251994329576923690768489)
|
||||
|
||||
/* Viewport mapping */
|
||||
static GLfloat gl_viewport_scale[3], gl_viewport_offset[3];
|
||||
//static GLfloat gl_viewport_scale[3], gl_viewport_offset[3];
|
||||
|
||||
/* Depth range */
|
||||
GLfloat DEPTH_RANGE_MULTIPLIER_L = (1 - 0) / 2;
|
||||
|
@ -78,8 +78,8 @@ void _glInitMatrices() {
|
|||
stack_push(&MATRIX_STACKS[1], IDENTITY);
|
||||
stack_push(&MATRIX_STACKS[2], IDENTITY);
|
||||
|
||||
memcpy(NORMAL_MATRIX, IDENTITY, sizeof(Matrix4x4));
|
||||
memcpy(SCREENVIEW_MATRIX, IDENTITY, sizeof(Matrix4x4));
|
||||
sq_cpy(NORMAL_MATRIX, IDENTITY, sizeof(Matrix4x4));
|
||||
sq_cpy(SCREENVIEW_MATRIX, IDENTITY, sizeof(Matrix4x4));
|
||||
|
||||
glDepthRange(0.0f, 1.0f);
|
||||
glViewport(0, 0, vid_mode->width, vid_mode->height);
|
||||
|
@ -123,7 +123,7 @@ static void transpose(GLfloat* m) {
|
|||
}
|
||||
|
||||
static void recalculateNormalMatrix() {
|
||||
memcpy(NORMAL_MATRIX, stack_top(MATRIX_STACKS + (GL_MODELVIEW & 0xF)), sizeof(Matrix4x4));
|
||||
sq_cpy(NORMAL_MATRIX, stack_top(MATRIX_STACKS + (GL_MODELVIEW & 0xF)), sizeof(Matrix4x4));
|
||||
inverse((GLfloat*) NORMAL_MATRIX);
|
||||
transpose((GLfloat*) NORMAL_MATRIX);
|
||||
}
|
||||
|
|
62
GL/private.h
62
GL/private.h
|
@ -7,6 +7,8 @@
|
|||
#include "../containers/aligned_vector.h"
|
||||
#include "../containers/named_array.h"
|
||||
|
||||
extern void* memcpy4 (void *dest, const void *src, size_t count);
|
||||
|
||||
#define TRACE_ENABLED 0
|
||||
#define TRACE() if(TRACE_ENABLED) {fprintf(stderr, "%s\n", __func__);}
|
||||
|
||||
|
@ -158,64 +160,14 @@ typedef struct {
|
|||
float w;
|
||||
} Vertex;
|
||||
|
||||
/* FIXME: SH4 has a swap.w instruction, we should leverage it here! */
|
||||
#define _SWAP32(x, y) \
|
||||
#define swapVertex(a, b) \
|
||||
do { \
|
||||
uint32_t t = *((uint32_t*) &x); \
|
||||
*((uint32_t*) &x) = *((uint32_t*) &y); \
|
||||
*((uint32_t*) &y) = t; \
|
||||
Vertex temp __attribute__((aligned(32))); \
|
||||
memcpy4(&temp, &b, 32); \
|
||||
memcpy4(&b, &a, 32); \
|
||||
memcpy4(&b, &temp, 32); \
|
||||
} while(0)
|
||||
|
||||
/*
|
||||
*((uint32_t*) &x) = *((uint32_t*) &x) ^ *((uint32_t*) &y); \
|
||||
*((uint32_t*) &y) = *((uint32_t*) &x) ^ *((uint32_t*) &y); \
|
||||
*((uint32_t*) &x) = *((uint32_t*) &x) ^ *((uint32_t*) &y); */
|
||||
|
||||
|
||||
#define swapVertex(a, b) \
|
||||
do { \
|
||||
_SWAP32(a->flags, b->flags); \
|
||||
_SWAP32(a->xyz[0], b->xyz[0]); \
|
||||
_SWAP32(a->xyz[1], b->xyz[1]); \
|
||||
_SWAP32(a->xyz[2], b->xyz[2]); \
|
||||
_SWAP32(a->uv[0], b->uv[0]); \
|
||||
_SWAP32(a->uv[1], b->uv[1]); \
|
||||
_SWAP32(a->bgra, b->bgra); \
|
||||
_SWAP32(a->w, b->w); \
|
||||
} while(0)
|
||||
|
||||
#if 0
|
||||
/* FIXME: SH4 has a swap.w instruction, we should leverage it here! */
|
||||
inline void _SWAP32( void* x, void* y)
|
||||
{
|
||||
#pragma GCC diagnostic push
|
||||
#pragma GCC diagnostic ignored "-Wstrict-aliasing"
|
||||
*((uint32_t*) &x) = *((uint32_t*) &x) ^ *((uint32_t*) &y);
|
||||
*((uint32_t*) &y) = *((uint32_t*) &x) ^ *((uint32_t*) &y);
|
||||
*((uint32_t*) &x) = *((uint32_t*) &x) ^ *((uint32_t*) &y);
|
||||
#pragma GCC diagnostic pop
|
||||
}
|
||||
|
||||
/*
|
||||
*((uint32_t*) &x) = *((uint32_t*) &x) ^ *((uint32_t*) &y); \
|
||||
*((uint32_t*) &y) = *((uint32_t*) &x) ^ *((uint32_t*) &y); \
|
||||
*((uint32_t*) &x) = *((uint32_t*) &x) ^ *((uint32_t*) &y); */
|
||||
|
||||
|
||||
|
||||
#define swapVertex(a, b) \
|
||||
{ \
|
||||
_SWAP32(&a->flags, &b->flags); \
|
||||
_SWAP32(&a->xyz[0], &b->xyz[0]); \
|
||||
_SWAP32(&a->xyz[1], &b->xyz[1]); \
|
||||
_SWAP32(&a->xyz[2], &b->xyz[2]); \
|
||||
_SWAP32(&a->uv[0], &b->uv[0]); \
|
||||
_SWAP32(&a->uv[1], &b->uv[1]); \
|
||||
_SWAP32(&a->bgra, &b->bgra); \
|
||||
_SWAP32(&a->w, &b->w); \
|
||||
}
|
||||
#endif
|
||||
|
||||
/* ClipVertex doesn't have room for these, so we need to parse them
|
||||
* out separately. Potentially 'w' will be housed here if we support oargb */
|
||||
typedef struct {
|
||||
|
|
14
GL/texture.c
14
GL/texture.c
|
@ -358,7 +358,7 @@ static void _glInitializeTextureObject(TextureObject* txr, unsigned int id) {
|
|||
txr->width = txr->height = 0;
|
||||
txr->mipmap = 0;
|
||||
txr->uv_clamp = 0;
|
||||
txr->env = PVR_TXRENV_MODULATE;
|
||||
txr->env = PVR_TXRENV_MODULATEALPHA;
|
||||
txr->data = NULL;
|
||||
txr->mipmapCount = 0;
|
||||
txr->minFilter = GL_NEAREST;
|
||||
|
@ -474,7 +474,7 @@ void APIENTRY glTexEnvi(GLenum target, GLenum pname, GLint param) {
|
|||
|
||||
switch(param) {
|
||||
case GL_MODULATE:
|
||||
active->env = PVR_TXRENV_MODULATE;
|
||||
active->env = PVR_TXRENV_MODULATEALPHA;
|
||||
break;
|
||||
case GL_DECAL:
|
||||
active->env = PVR_TXRENV_DECAL;
|
||||
|
@ -917,12 +917,12 @@ void _glAllocateSpaceForMipmaps(TextureObject* active) {
|
|||
* then free the original
|
||||
*/
|
||||
GLubyte* temp = malloc(size);
|
||||
memcpy(temp, active->data, size);
|
||||
memcpy4(temp, active->data, size);
|
||||
pvr_mem_free(active->data);
|
||||
active->data = pvr_mem_malloc(_glGetMipmapDataSize(active));
|
||||
|
||||
/* If there was existing data, then copy it where it should go */
|
||||
memcpy(_glGetMipmapLocation(active,0), temp, size);
|
||||
memcpy4(_glGetMipmapLocation(active,0), temp, size);
|
||||
free(temp);
|
||||
}
|
||||
|
||||
|
@ -1122,11 +1122,7 @@ void APIENTRY glTexImage2D(GLenum target, GLint level, GLint internalFormat,
|
|||
}
|
||||
} else {
|
||||
/* No conversion? Just copy the data, and the pvr_format is correct */
|
||||
if(bytes % 32 == 0){
|
||||
sq_cpy(targetData, data, bytes);
|
||||
} else {
|
||||
memcpy(targetData, data, bytes);
|
||||
}
|
||||
FASTCPY(targetData, data, bytes);
|
||||
}
|
||||
|
||||
return;
|
||||
|
|
2
Makefile
2
Makefile
|
@ -13,7 +13,7 @@ OBJS += containers/stack.o containers/named_array.o containers/aligned_vector.o
|
|||
|
||||
SUBDIRS =
|
||||
|
||||
EXTRA_CFLAGS= -Wall -Wextra -Wstrict-aliasing=0
|
||||
EXTRA_CFLAGS= -Wall -Wextra
|
||||
KOS_CFLAGS += -ffast-math -O2 -funroll-loops -fsingle-precision-constant -Iinclude -funsafe-math-optimizations -DBUILD_LIBGL $(EXTRA_CFLAGS)
|
||||
#KOS_CFLAGS += -O1 -mlra -Iinclude -DBUILD_LIBGL -Wall -Wextra
|
||||
#GCC5_FLAGS = -mfsca -mfsrra -mlra
|
||||
|
|
|
@ -54,7 +54,10 @@ void aligned_vector_reserve(AlignedVector* vector, unsigned int element_count) {
|
|||
assert(vector->data);
|
||||
|
||||
if(original_data) {
|
||||
memcpy(vector->data, original_data, original_byte_size);
|
||||
if(vector->element_size == 32){
|
||||
sq_cpy(vector->data, original_data, original_byte_size);
|
||||
} else
|
||||
memcpy4(vector->data, original_data, original_byte_size);
|
||||
free(original_data);
|
||||
}
|
||||
|
||||
|
@ -74,7 +77,10 @@ void* aligned_vector_push_back(AlignedVector* vector, const void* objs, unsigned
|
|||
unsigned char* dest = vector->data + (vector->element_size * initial_size);
|
||||
|
||||
/* Copy the objects in */
|
||||
memcpy(dest, objs, vector->element_size * count);
|
||||
if(vector->element_size == 32){
|
||||
sq_cpy(dest, objs, vector->element_size * count);
|
||||
} else
|
||||
memcpy4(dest, objs, vector->element_size * count);
|
||||
|
||||
return dest;
|
||||
}
|
||||
|
@ -139,7 +145,10 @@ void aligned_vector_shrink_to_fit(AlignedVector* vector) {
|
|||
vector->data = (unsigned char*) memalign(0x20, new_byte_size);
|
||||
|
||||
if(original_data) {
|
||||
memcpy(vector->data, original_data, new_byte_size);
|
||||
if(vector->element_size == 32){
|
||||
sq_cpy(vector->data, original_data, new_byte_size);
|
||||
} else
|
||||
memcpy4(vector->data, original_data, new_byte_size);
|
||||
free(original_data);
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue
Block a user