Compare commits

..

No commits in common. "master" and "super-clip" have entirely different histories.

60 changed files with 3553 additions and 14160 deletions

2
.gitignore vendored
View File

@ -9,5 +9,3 @@ dc-build.sh
build/*
builddir/*
version.[c|h]
pcbuild/*
dcbuild/*

View File

@ -1,6 +1,5 @@
stages:
- build
- test
build:sh4-gcc:
stage: build
@ -18,28 +17,11 @@ build:sh4-gcc:
build:x86-gcc:
stage: build
image: fedora:38
image: fedora:34
before_script:
- sudo dnf install -y cmake gcc gcc-c++ SDL2.i686 SDL2-devel.x86_64 glibc-devel glibc-devel.i686 SDL2-devel.i686 pkgconf-pkg-config.i686 pkgconf-pkg-config.x86_64
- sudo dnf install -y cmake gcc gcc-c++ SDL2-devel glibc-devel pkgconf-pkg-config glibc-devel.i686 SDL2-devel.i686
script:
- mkdir builddir
- cd builddir
- cmake -DCMAKE_BUILD_TYPE=Release ..
- make
artifacts:
paths:
- builddir/tests/gldc_tests
test:x86-gcc:
stage: test
image: fedora:38
dependencies:
- build:x86-gcc
before_script:
- sudo dnf install -y cmake gcc gcc-c++ SDL2.i686 SDL2-devel glibc-devel pkgconf-pkg-config glibc-devel.i686 SDL2-devel.i686 pkgconf-pkg-config.i686
script:
- cd builddir/tests/
- SDL_VIDEODRIVER=dummy ./gldc_tests --junit-xml=report.xml
artifacts:
reports:
junit: builddir/tests/report.xml

View File

@ -1,8 +1,6 @@
cmake_minimum_required(VERSION 3.9)
cmake_minimum_required(VERSION 3.0)
project(GLdc)
set(CMAKE_VERBOSE_MAKEFILE ON)
# set the default backend
if(PLATFORM_DREAMCAST)
set(BACKEND "kospvr" CACHE STRING "Backend to use")
@ -10,9 +8,6 @@ else()
set(BACKEND "software" CACHE STRING "Backend to use")
endif()
include(CheckIPOSupported)
check_ipo_supported(RESULT FLTO_SUPPORTED OUTPUT FLTO_ERROR)
# List of possible backends
set_property(CACHE BACKEND PROPERTY STRINGS kospvr software)
@ -22,46 +17,16 @@ string(TOUPPER ${BACKEND} BACKEND_UPPER)
add_definitions(-DBACKEND_${BACKEND_UPPER})
set(CMAKE_C_STANDARD 99)
set(CMAKE_CXX_STANDARD 11)
include_directories(include)
if(NOT PLATFORM_DREAMCAST)
set(FIND_LIBRARY_USE_LIB32_PATHS true)
set(FIND_LIBRARY_USE_LIB64_PATHS false)
else()
include(CheckCCompilerFlag)
check_c_compiler_flag("-mfsrra" COMPILER_HAS_FSRRA)
check_c_compiler_flag("-mfsca" COMPILER_HAS_FSCA)
if(COMPILER_HAS_FSRRA)
set(CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE} -mfsrra")
set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -mfsrra")
set(CMAKE_C_FLAGS_RELWITHDEBINFO "${CMAKE_C_FLAGS_RELWITHDEBINFO} -mfsrra")
set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS_RELWITHDEBINFO} -mfsrra")
endif()
if(COMPILER_HAS_FSCA)
set(CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE} -mfsca")
set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -mfsca")
set(CMAKE_C_FLAGS_RELWITHDEBINFO "${CMAKE_C_FLAGS_RELWITHDEBINFO} -mfsca")
set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS_RELWITHDEBINFO} -mfsca")
endif()
set(CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE} -ffp-contract=fast -ffast-math")
set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -ffast-math")
set(CMAKE_C_FLAGS_RELWITHDEBINFO "${CMAKE_C_FLAGS_RELWITHDEBINFO} -ffp-contract=fast -ffast-math")
set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS_RELWITHDEBINFO} -ffast-math")
endif()
set(CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE} -O3 -fexpensive-optimizations -fomit-frame-pointer -finline-functions")
set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -std=c++14 -O3 -g0 -s -fomit-frame-pointer -fstrict-aliasing")
set(CMAKE_C_FLAGS_RELWITHDEBINFO "${CMAKE_C_FLAGS_RELWITHDEBINFO} -O3 -fexpensive-optimizations -fomit-frame-pointer -finline-functions")
set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS_RELWITHDEBINFO} -std=c++14 -O3 -fomit-frame-pointer -fstrict-aliasing")
set(CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE} -O3 --fast-math -fexpensive-optimizations -funroll-all-loops")
set(CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG} -O0 -g -Wall -Wextra")
set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -O0 -g -Wall -Wextra")
set(
SOURCES
@ -80,7 +45,7 @@ set(
GL/state.c
GL/texture.c
GL/util.c
GL/alloc/alloc.c
GL/yalloc/yalloc.c
${CMAKE_CURRENT_BINARY_DIR}/version.c
)
@ -111,10 +76,6 @@ endif()
add_library(GLdc STATIC ${SOURCES})
if(FLTO_SUPPORTED)
set_property(TARGET GLdc PROPERTY INTERPROCEDURAL_OPTIMIZATION TRUE)
endif()
if(NOT PLATFORM_DREAMCAST)
set_target_properties(GLdc PROPERTIES
COMPILE_OPTIONS "-m32"
@ -138,13 +99,6 @@ function(gen_sample sample)
add_executable(${sample} ${SAMPLE_SRCS})
if(FLTO_SUPPORTED)
# FIXME: Cubes + LTO causes an ICE
if(NOT ${sample} MATCHES "cubes")
set_property(TARGET ${sample} PROPERTY INTERPROCEDURAL_OPTIMIZATION TRUE)
endif()
endif()
if(PLATFORM_DREAMCAST)
if(EXISTS "${CMAKE_SOURCE_DIR}/samples/${sample}/romdisk")
message("Generating romdisk for sample: ${sample}")
@ -175,8 +129,6 @@ function(gen_sample sample)
endif()
endfunction()
add_subdirectory(tests)
gen_sample(blend_test samples/blend_test/main.c)
gen_sample(depth_funcs samples/depth_funcs/main.c)
gen_sample(depth_funcs_alpha_testing samples/depth_funcs_alpha_testing/main.c samples/depth_funcs_alpha_testing/gl_png.c)
@ -207,14 +159,11 @@ gen_sample(zclip_triangle samples/zclip_triangle/main.c)
gen_sample(zclip_trianglestrip samples/zclip_trianglestrip/main.c)
gen_sample(scissor samples/scissor/main.c)
gen_sample(polymark samples/polymark/main.c)
gen_sample(cubes samples/cubes/main.cpp)
gen_sample(zclip_test tests/zclip/main.cpp)
if(PLATFORM_DREAMCAST)
gen_sample(trimark samples/trimark/main.c)
gen_sample(quadmark samples/quadmark/main.c samples/profiler.c)
gen_sample(prof_texture_upload samples/prof_texture_upload/main.c samples/profiler.c)
else()
gen_sample(quadmark samples/quadmark/main.c)
gen_sample(prof_texture_upload samples/prof_texture_upload/main.c)
endif()

View File

@ -1,534 +0,0 @@
#include <stdint.h>
#include <string.h>
#include <stdlib.h>
#include <stdio.h>
#include "alloc.h"
/* This allocator is designed so that ideally all allocations larger
* than 2k, fall on a 2k boundary. Smaller allocations will
* never cross a 2k boundary.
*
* House keeping is stored in RAM to avoid reading back from the
* VRAM to check for usage. Headers can't be easily stored in the
* blocks anyway as they have to be 2k aligned (so you'd need to
* store them in reverse or something)
*
* Defragmenting the pool will move larger allocations first, then
* smaller ones, recursively until you tell it to stop, or until things
* stop moving.
*
* The maximum pool size is 8M, made up of:
*
* - 4096 blocks of 2k
* - each with 8 sub-blocks of 256 bytes
*
* Why?
*
* The PVR performs better if textures don't cross 2K memory
* addresses, so we try to avoid that. Obviously we can't
* if the allocation is > 2k, but in that case we can at least
* align with 2k and the VQ codebook (which is usually 2k) will
* be in its own page.
*
* The smallest PVR texture allowed is 8x8 at 16 bit (so 128 bytes)
* but we're unlikely to use too many of those, so having a min sub-block
* size of 256 should be OK (a 16x16 image is 512, so two sub-blocks).
*
* We could go down to 128 bytes if wastage is an issue, but then we have
* to store double the number of usage markers.
*
* FIXME:
*
* - Only operates on one pool (ignores what you pass)
*/
#include <assert.h>
#include <stdio.h>
#define EIGHT_MEG (8 * 1024 * 1024)
#define TWO_KILOBYTES (2 * 1024)
#define BLOCK_COUNT (EIGHT_MEG / TWO_KILOBYTES)
#define ALLOC_DEBUG 0
#if ALLOC_DEBUG
#define DBG_MSG(fmt, ...) fprintf(stderr, fmt, ##__VA_ARGS__)
#else
#define DBG_MSG(fmt, ...) do {} while (0)
#endif
static inline intptr_t round_up(intptr_t n, int multiple)
{
if((n % multiple) == 0) {
return n;
}
assert(multiple);
return ((n + multiple - 1) / multiple) * multiple;
}
struct AllocEntry {
void* pointer;
size_t size;
struct AllocEntry* next;
};
typedef struct {
/* This is a usage bitmask for each block. A block
* is divided into 8 x 256 byte subblocks. If a block
* is entirely used, it's value will be 255, if
* it's entirely free then it will be 0.
*/
uint8_t block_usage[BLOCK_COUNT];
uint8_t* pool; // Pointer to the memory pool
size_t pool_size; // Size of the memory pool
uint8_t* base_address; // First 2k aligned address in the pool
size_t block_count; // Number of 2k blocks in the pool
/* It's frustrating that we need to do this dynamically
* but we need to know the size allocated when we free()...
* we could store it statically but it would take 64k if we had
* an array of block_index -> block size where there would be 2 ** 32
* entries of 16 bit block sizes. The drawback (aside the memory usage)
* would be that we won't be able to order by size, so defragging will
* take much more time.*/
struct AllocEntry* allocations;
} PoolHeader;
static PoolHeader pool_header = {
{0}, NULL, 0, NULL, 0, NULL
};
void* alloc_base_address(void* pool) {
(void) pool;
return pool_header.base_address;
}
size_t alloc_block_count(void* pool) {
(void) pool;
return pool_header.block_count;
}
static inline void* calc_address(
uint8_t* block_usage_iterator,
int bit_offset,
size_t required_subblocks,
size_t* start_subblock_out
) {
uintptr_t offset = (block_usage_iterator - pool_header.block_usage) * 8;
offset += (bit_offset + 1);
offset -= required_subblocks;
if(start_subblock_out) {
*start_subblock_out = offset;
}
return pool_header.base_address + (offset * 256);
}
void* alloc_next_available_ex(void* pool, size_t required_size, size_t* start_subblock, size_t* required_subblocks);
void* alloc_next_available(void* pool, size_t required_size) {
return alloc_next_available_ex(pool, required_size, NULL, NULL);
}
void* alloc_next_available_ex(void* pool, size_t required_size, size_t* start_subblock_out, size_t* required_subblocks_out) {
(void) pool;
uint8_t* it = pool_header.block_usage;
uint32_t required_subblocks = (required_size / 256);
if(required_size % 256) required_subblocks += 1;
/* Anything gte to 2048 must be aligned to a 2048 boundary */
bool requires_alignment = required_size >= 2048;
if(required_subblocks_out) {
*required_subblocks_out = required_subblocks;
}
/* This is a fallback option. If while we're searching we find a possible slot
* but it's not aligned, or it's straddling a 2k boundary, then we store
* it here and if we reach the end of the search and find nothing better
* we use this instead */
uint8_t* poor_option = NULL;
size_t poor_start_subblock = 0;
uint32_t found_subblocks = 0;
uint32_t found_poor_subblocks = 0;
for(size_t j = 0; j < pool_header.block_count; ++j, ++it) {
/* We just need to find enough consecutive blocks */
if(found_subblocks < required_subblocks) {
uint8_t t = *it;
/* Optimisation only. Skip over full blocks */
if(t == 255) {
found_subblocks = 0;
found_poor_subblocks = 0;
} else {
/* Now let's see how many consecutive blocks we can find */
for(int i = 0; i < 8; ++i) {
if((t & 0x80) == 0) {
bool block_overflow = (
required_size < 2048 && found_subblocks > 0 && i == 0
);
bool reset_subblocks = (
(requires_alignment && found_subblocks == 0 && i != 0) ||
block_overflow
);
if(reset_subblocks) {
// Ignore this subblock, because we want the first subblock to be aligned
// at a 2048 boundary and this one isn't (i != 0)
found_subblocks = 0;
} else {
found_subblocks++;
}
/* If we reset the subblocks due to an overflow, we still
* want to count this free subblock in our count */
if(block_overflow) {
found_subblocks++;
}
found_poor_subblocks++;
if(found_subblocks >= required_subblocks) {
/* We found space! Now calculate the address */
return calc_address(it, i, required_subblocks, start_subblock_out);
}
if(!poor_option && (found_poor_subblocks >= required_subblocks)) {
poor_option = calc_address(it, i, required_subblocks, &poor_start_subblock);
}
} else {
found_subblocks = 0;
found_poor_subblocks = 0;
}
t <<= 1;
}
}
}
}
if(poor_option) {
if(start_subblock_out) {
*start_subblock_out = poor_start_subblock;
}
return poor_option;
} else {
return NULL;
}
}
int alloc_init(void* pool, size_t size) {
(void) pool;
if(pool_header.pool) {
return -1;
}
if(size > EIGHT_MEG) { // FIXME: >= ?
return -1;
}
uint8_t* p = (uint8_t*) pool;
memset(pool_header.block_usage, 0, BLOCK_COUNT);
pool_header.pool = pool;
pool_header.pool_size = size;
intptr_t base_address = (intptr_t) pool_header.pool;
base_address = round_up(base_address, 2048);
pool_header.base_address = (uint8_t*) base_address;
pool_header.block_count = ((p + size) - pool_header.base_address) / 2048;
pool_header.allocations = NULL;
assert(((uintptr_t) pool_header.base_address) % 2048 == 0);
return 0;
}
void alloc_shutdown(void* pool) {
(void) pool;
if(!pool_header.pool) {
return;
}
struct AllocEntry* it = pool_header.allocations;
while(it) {
struct AllocEntry* next = it->next;
free(it);
it = next;
}
memset(&pool_header, 0, sizeof(pool_header));
pool_header.pool = NULL;
}
static inline uint32_t size_to_subblock_count(size_t size) {
uint32_t required_subblocks = (size / 256);
if(size % 256) required_subblocks += 1;
return required_subblocks;
}
static inline uint32_t subblock_from_pointer(void* p) {
uint8_t* ptr = (uint8_t*) p;
return (ptr - pool_header.base_address) / 256;
}
static inline void block_and_offset_from_subblock(size_t sb, size_t* b, uint8_t* off) {
*b = sb / 8;
*off = (sb % 8);
}
void* alloc_malloc(void* pool, size_t size) {
DBG_MSG("Allocating: %d\n", size);
size_t start_subblock, required_subblocks;
void* ret = alloc_next_available_ex(pool, size, &start_subblock, &required_subblocks);
if(ret) {
size_t block;
uint8_t offset;
block_and_offset_from_subblock(start_subblock, &block, &offset);
uint8_t mask = 0;
DBG_MSG("Alloc: size: %d, rs: %d, sb: %d, b: %d, off: %d\n", size, required_subblocks, start_subblock, start_subblock / 8, start_subblock % 8);
/* Toggle any bits for the first block */
int c = (required_subblocks < 8) ? required_subblocks : 8;
for(int i = 0; i < c; ++i) {
mask |= (1 << (7 - (offset + i)));
required_subblocks--;
}
if(mask) {
pool_header.block_usage[block++] |= mask;
}
/* Fill any full blocks in the middle of the allocation */
while(required_subblocks > 8) {
pool_header.block_usage[block++] = 255;
required_subblocks -= 8;
}
/* Fill out any trailing subblocks */
mask = 0;
for(size_t i = 0; i < required_subblocks; ++i) {
mask |= (1 << (7 - i));
}
if(mask) {
pool_header.block_usage[block++] |= mask;
}
/* Insert allocations in the list by size descending so that when we
* defrag we can move the larger blocks before the smaller ones without
* much effort */
struct AllocEntry* new_entry = (struct AllocEntry*) malloc(sizeof(struct AllocEntry));
new_entry->pointer = ret;
new_entry->size = size;
new_entry->next = NULL;
struct AllocEntry* it = pool_header.allocations;
struct AllocEntry* last = NULL;
if(!it) {
pool_header.allocations = new_entry;
} else {
while(it) {
if(it->size < size) {
if(last) {
last->next = new_entry;
} else {
pool_header.allocations = new_entry;
}
new_entry->next = it;
break;
} else if(!it->next) {
it->next = new_entry;
new_entry->next = NULL;
break;
}
last = it;
it = it->next;
}
}
}
DBG_MSG("Alloc done\n");
return ret;
}
static void alloc_release_blocks(struct AllocEntry* it) {
size_t used_subblocks = size_to_subblock_count(it->size);
size_t subblock = subblock_from_pointer(it->pointer);
size_t block;
uint8_t offset;
block_and_offset_from_subblock(subblock, &block, &offset);
uint8_t mask = 0;
DBG_MSG("Free: size: %d, us: %d, sb: %d, off: %d\n", it->size, used_subblocks, block, offset);
/* Wipe out any leading subblocks */
int c = (used_subblocks < 8) ? used_subblocks : 8;
for(int i = 0; i < c; ++i) {
mask |= (1 << (7 - (offset + i)));
used_subblocks--;
}
if(mask) {
pool_header.block_usage[block++] &= ~mask;
}
/* Clear any full blocks in the middle of the allocation */
while(used_subblocks > 8) {
pool_header.block_usage[block++] = 0;
used_subblocks -= 8;
}
/* Wipe out any trailing subblocks */
mask = 0;
for(size_t i = 0; i < used_subblocks; ++i) {
mask |= (1 << (7 - i));
}
if(mask) {
pool_header.block_usage[block++] &= ~mask;
}
}
void alloc_free(void* pool, void* p) {
(void) pool;
struct AllocEntry* it = pool_header.allocations;
struct AllocEntry* last = NULL;
while(it) {
if(it->pointer == p) {
alloc_release_blocks(it);
if(last) {
last->next = it->next;
} else {
assert(it == pool_header.allocations);
pool_header.allocations = it->next;
}
DBG_MSG("Freed: size: %d, us: %d, sb: %d, off: %d\n", it->size, used_subblocks, block, offset);
free(it);
break;
}
last = it;
it = it->next;
}
DBG_MSG("Free done\n");
}
void alloc_run_defrag(void* pool, defrag_address_move callback, int max_iterations, void* user_data) {
for(int i = 0; i < max_iterations; ++i) {
bool move_occurred = false;
struct AllocEntry* it = pool_header.allocations;
if(!it) {
return;
}
while(it) {
void* potential_dest = alloc_next_available(pool, it->size);
if(potential_dest < it->pointer) {
potential_dest = alloc_malloc(pool, it->size);
memcpy(potential_dest, it->pointer, it->size);
/* Mark this block as now free, but don't fiddle with the
* allocation list */
alloc_release_blocks(it);
callback(it->pointer, potential_dest, user_data);
it->pointer = potential_dest;
move_occurred = true;
}
it = it->next;
}
if(!move_occurred) {
return;
}
}
}
static inline uint8_t count_ones(uint8_t byte) {
static const uint8_t NIBBLE_LOOKUP [16] = {
0, 1, 1, 2, 1, 2, 2, 3,
1, 2, 2, 3, 2, 3, 3, 4
};
return NIBBLE_LOOKUP[byte & 0x0F] + NIBBLE_LOOKUP[byte >> 4];
}
size_t alloc_count_free(void* pool) {
(void) pool;
uint8_t* it = pool_header.block_usage;
uint8_t* end = it + pool_header.block_count;
size_t total_free = 0;
while(it < end) {
total_free += count_ones(*it) * 256;
++it;
}
return total_free;
}
size_t alloc_count_continuous(void* pool) {
(void) pool;
size_t largest_block = 0;
uint8_t* it = pool_header.block_usage;
uint8_t* end = it + pool_header.block_count;
size_t current_block = 0;
while(it < end) {
uint8_t t = *it++;
if(!t) {
current_block += 2048;
} else {
for(int i = 7; i >= 0; --i) {
bool bitset = (t & (1 << i));
if(bitset) {
current_block += (7 - i) * 256;
if(largest_block < current_block) {
largest_block = current_block;
current_block = 0;
}
}
}
}
}
return largest_block;
}

View File

@ -1,29 +0,0 @@
#pragma once
#include <stdbool.h>
#include <stdint.h>
#include <stddef.h>
#ifdef __cplusplus
extern "C" {
#endif
int alloc_init(void* pool, size_t size);
void alloc_shutdown(void* pool);
void *alloc_malloc(void* pool, size_t size);
void alloc_free(void* pool, void* p);
typedef void (defrag_address_move)(void*, void*, void*);
void alloc_run_defrag(void* pool, defrag_address_move callback, int max_iterations, void* user_data);
size_t alloc_count_free(void* pool);
size_t alloc_count_continuous(void* pool);
void* alloc_next_available(void* pool, size_t required_size);
void* alloc_base_address(void* pool);
size_t alloc_block_count(void* pool);
#ifdef __cplusplus
}
#endif

592
GL/draw.c
View File

@ -3,48 +3,20 @@
#include <string.h>
#include <stdlib.h>
#include <math.h>
#include <limits.h>
#include <assert.h>
#include "private.h"
#include "platform.h"
GLushort _quantize( GLfloat v ) {
union { GLfloat f; GLuint ui; } u = {v};
GLuint ui = u.ui;
int s = (ui >> 16) & 0x8000;
int em = ui & 0x7fffffff;
int h = (em - (112 << 23) + (1 << 12)) >> 13;
h = (em < (113 << 23)) ? 0 : h;
h = (em >= (143 << 23)) ? 0x7c00 : h;
h = (em > (255 << 23)) ? 0x7e00 : h;
return (GLushort)(s | h);
}
GLfloat _dequantize( GLushort h ) {
GLuint s = (GLuint) (h & 0x8000) << 16;
int em = h & 0x7fff;
int r = (em + (112 << 10)) << 13;
r = (em < (1 << 10)) ? 0 : r;
r += (em >= (31 << 10)) ? (112 << 23) : 0;
union { GLfloat f; GLuint ui; } u;
u.ui = s | r;
return u.f;
}
AttribPointerList ATTRIB_POINTERS;
GLuint ENABLED_VERTEX_ATTRIBUTES = 0;
GLuint FAST_PATH_ENABLED = GL_FALSE;
static GLubyte ACTIVE_CLIENT_TEXTURE = 0;
static const float ONE_OVER_TWO_FIVE_FIVE = 1.0f / 255.0f;
extern inline GLuint _glRecalcFastPath();
extern GLboolean AUTOSORT_ENABLED;
#define ITERATE(count) \
GLuint i = count; \
while(i--)
@ -88,7 +60,6 @@ GL_FORCE_INLINE GLsizei byte_size(GLenum type) {
case GL_INT: return sizeof(GLint);
case GL_UNSIGNED_INT: return sizeof(GLuint);
case GL_DOUBLE: return sizeof(GLdouble);
case GL_HALF_FLOAT: return sizeof(GLhalf);
case GL_UNSIGNED_INT_2_10_10_10_REV: return sizeof(GLuint);
case GL_FLOAT:
default: return sizeof(GLfloat);
@ -105,7 +76,7 @@ static void _readVertexData3f3f(const GLubyte* __restrict__ in, GLubyte* __restr
// 10:10:10:2REV format
static void _readVertexData1i3f(const GLubyte* in, GLubyte* out) {
static const float MULTIPLIER = 1.0f / 1023.0f;
const static float MULTIPLIER = 1.0f / 1023.0f;
GLfloat* output = (GLfloat*) out;
@ -135,15 +106,6 @@ static void _readVertexData3us3f(const GLubyte* in, GLubyte* out) {
output[2] = input[2];
}
static void _readVertexData3usq3f(const GLubyte* in, GLubyte* out) {
const GLushort* input = (const GLushort*) in;
float* output = (float*) out;
output[0] = _dequantize(input[0]);
output[1] = _dequantize(input[1]);
output[2] = _dequantize(input[2]);
}
static void _readVertexData3ui3f(const GLubyte* in, GLubyte* out) {
const GLuint* input = (const GLuint*) in;
float* output = (float*) out;
@ -155,6 +117,8 @@ static void _readVertexData3ui3f(const GLubyte* in, GLubyte* out) {
static void _readVertexData3ub3f(const GLubyte* input, GLubyte* out) {
const float ONE_OVER_TWO_FIVE_FIVE = 1.0f / 255.0f;
float* output = (float*) out;
output[0] = input[0] * ONE_OVER_TWO_FIVE_FIVE;
@ -162,15 +126,6 @@ static void _readVertexData3ub3f(const GLubyte* input, GLubyte* out) {
output[2] = input[2] * ONE_OVER_TWO_FIVE_FIVE;
}
static void _readVertexData3f16_3f(const GLubyte* in, GLubyte* out) {
const GLhalf* input = (const GLhalf*) in;
float* output = (float*) out;
output[0] = input[0];
output[1] = input[1];
output[2] = input[2];
}
static void _readVertexData2f2f(const GLubyte* in, GLubyte* out) {
vec2cpy(out, in);
}
@ -184,6 +139,8 @@ static void _readVertexData2f3f(const GLubyte* in, GLubyte* out) {
}
static void _readVertexData2ub3f(const GLubyte* input, GLubyte* out) {
const float ONE_OVER_TWO_FIVE_FIVE = 1.0f / 255.0f;
float* output = (float*) out;
output[0] = input[0] * ONE_OVER_TWO_FIVE_FIVE;
@ -204,25 +161,8 @@ static void _readVertexData2us2f(const GLubyte* in, GLubyte* out) {
const GLushort* input = (const GLushort*) in;
float* output = (float*) out;
output[0] = (float)input[0] / SHRT_MAX;
output[1] = (float)input[1] / SHRT_MAX;
}
static void _readVertexData2usq3f(const GLubyte* in, GLubyte* out) {
const GLushort* input = (const GLushort*) in;
float* output = (float*) out;
output[0] = _dequantize(input[0]);
output[1] = _dequantize(input[1]);
output[2] = 0.0f;
}
static void _readVertexData2usq2f(const GLubyte* in, GLubyte* out) {
const GLushort* input = (const GLushort*) in;
float* output = (float*) out;
output[0] = _dequantize(input[0]);
output[1] = _dequantize(input[1]);
output[0] = input[0];
output[1] = input[1];
}
static void _readVertexData2ui2f(const GLubyte* in, GLubyte* out) {
@ -234,20 +174,13 @@ static void _readVertexData2ui2f(const GLubyte* in, GLubyte* out) {
}
static void _readVertexData2ub2f(const GLubyte* input, GLubyte* out) {
const float ONE_OVER_TWO_FIVE_FIVE = 1.0f / 255.0f;
float* output = (float*) out;
output[0] = input[0] * ONE_OVER_TWO_FIVE_FIVE;
output[1] = input[1] * ONE_OVER_TWO_FIVE_FIVE;
}
static void _readVertexData2f16_2f(const GLubyte* in, GLubyte* out) {
const GLhalf* input = (const GLhalf*) in;
float* output = (float*) out;
output[0] = input[0];
output[1] = input[1];
}
static void _readVertexData2ui3f(const GLubyte* in, GLubyte* out) {
const GLuint* input = (const GLuint*) in;
float* output = (float*) out;
@ -257,15 +190,6 @@ static void _readVertexData2ui3f(const GLubyte* in, GLubyte* out) {
output[2] = 0.0f;
}
static void _readVertexData2f16_3f(const GLubyte* in, GLubyte* out) {
const GLhalf* input = (const GLhalf*) in;
float* output = (float*) out;
output[0] = input[0];
output[1] = input[1];
output[2] = 0.0f;
}
static void _readVertexData4ubARGB(const GLubyte* input, GLubyte* output) {
output[R8IDX] = input[0];
output[G8IDX] = input[1];
@ -318,7 +242,7 @@ static void _fillWithNegZVE(const GLubyte* __restrict__ input, GLubyte* __restri
float x, y, z;
} V;
static const V NegZ = {0.0f, 0.0f, -1.0f};
const static V NegZ = {0.0f, 0.0f, -1.0f};
*((V*) out) = NegZ;
}
@ -336,37 +260,37 @@ static void _fillZero2f(const GLubyte* __restrict__ input, GLubyte* __restrict__
static void _readVertexData3usARGB(const GLubyte* input, GLubyte* output) {
_GL_UNUSED(input);
_GL_UNUSED(output);
gl_assert(0 && "Not Implemented");
assert(0 && "Not Implemented");
}
static void _readVertexData3uiARGB(const GLubyte* input, GLubyte* output) {
_GL_UNUSED(input);
_GL_UNUSED(output);
gl_assert(0 && "Not Implemented");
assert(0 && "Not Implemented");
}
static void _readVertexData4usARGB(const GLubyte* input, GLubyte* output) {
_GL_UNUSED(input);
_GL_UNUSED(output);
gl_assert(0 && "Not Implemented");
assert(0 && "Not Implemented");
}
static void _readVertexData4uiARGB(const GLubyte* input, GLubyte* output) {
_GL_UNUSED(input);
_GL_UNUSED(output);
gl_assert(0 && "Not Implemented");
assert(0 && "Not Implemented");
}
static void _readVertexData4usRevARGB(const GLubyte* input, GLubyte* output) {
_GL_UNUSED(input);
_GL_UNUSED(output);
gl_assert(0 && "Not Implemented");
assert(0 && "Not Implemented");
}
static void _readVertexData4uiRevARGB(const GLubyte* input, GLubyte* output) {
_GL_UNUSED(input);
_GL_UNUSED(output);
gl_assert(0 && "Not Implemented");
assert(0 && "Not Implemented");
}
GLuint* _glGetEnabledAttributes() {
@ -470,12 +394,12 @@ GL_FORCE_INLINE void transformNormalToEyeSpace(GLfloat* normal) {
}
GL_FORCE_INLINE PolyHeader *_glSubmissionTargetHeader(SubmissionTarget* target) {
gl_assert(target->header_offset < aligned_vector_size(&target->output->vector));
assert(target->header_offset < target->output->vector.size);
return aligned_vector_at(&target->output->vector, target->header_offset);
}
GL_INLINE_DEBUG Vertex* _glSubmissionTargetStart(SubmissionTarget* target) {
gl_assert(target->start_offset < aligned_vector_size(&target->output->vector));
assert(target->start_offset < target->output->vector.size);
return aligned_vector_at(&target->output->vector, target->start_offset);
}
@ -514,7 +438,7 @@ GL_FORCE_INLINE void genTriangleStrip(Vertex* output, GLuint count) {
}
static void genTriangleFan(Vertex* output, GLuint count) {
gl_assert(count <= 255);
assert(count <= 255);
Vertex* dst = output + (((count - 2) * 3) - 1);
Vertex* src = output + (count - 1);
@ -571,17 +495,14 @@ ReadPositionFunc calcReadPositionFunc() {
case GL_FLOAT:
return (ATTRIB_POINTERS.vertex.size == 3) ? _readVertexData3f3f:
_readVertexData2f3f;
case GL_HALF_FLOAT:
return (ATTRIB_POINTERS.vertex.size == 3) ? _readVertexData3f16_3f:
_readVertexData2f16_3f;
case GL_BYTE:
case GL_UNSIGNED_BYTE:
return (ATTRIB_POINTERS.vertex.size == 3) ? _readVertexData3ub3f:
_readVertexData2ub3f;
case GL_SHORT:
case GL_UNSIGNED_SHORT:
return (ATTRIB_POINTERS.vertex.size == 3) ? _readVertexData3usq3f:
_readVertexData2usq3f;
return (ATTRIB_POINTERS.vertex.size == 3) ? _readVertexData3us3f:
_readVertexData2us3f;
case GL_INT:
case GL_UNSIGNED_INT:
return (ATTRIB_POINTERS.vertex.size == 3) ? _readVertexData3ui3f:
@ -599,14 +520,12 @@ ReadUVFunc calcReadUVFunc() {
case GL_DOUBLE:
case GL_FLOAT:
return _readVertexData2f2f;
case GL_HALF_FLOAT:
return _readVertexData2f16_2f;
case GL_BYTE:
case GL_UNSIGNED_BYTE:
return _readVertexData2ub2f;
case GL_SHORT:
case GL_UNSIGNED_SHORT:
return _readVertexData2usq2f;
return _readVertexData2us2f;
case GL_INT:
case GL_UNSIGNED_INT:
return _readVertexData2ui2f;
@ -623,14 +542,12 @@ ReadUVFunc calcReadSTFunc() {
case GL_DOUBLE:
case GL_FLOAT:
return _readVertexData2f2f;
case GL_HALF_FLOAT:
return _readVertexData2f16_2f;
case GL_BYTE:
case GL_UNSIGNED_BYTE:
return _readVertexData2ub2f;
case GL_SHORT:
case GL_UNSIGNED_SHORT:
return _readVertexData2usq2f;
return _readVertexData2us2f;
case GL_INT:
case GL_UNSIGNED_INT:
return _readVertexData2ui2f;
@ -647,8 +564,6 @@ ReadNormalFunc calcReadNormalFunc() {
case GL_DOUBLE:
case GL_FLOAT:
return _readVertexData3f3f;
case GL_HALF_FLOAT:
return _readVertexData3f16_3f;
break;
case GL_BYTE:
case GL_UNSIGNED_BYTE:
@ -656,7 +571,7 @@ ReadNormalFunc calcReadNormalFunc() {
break;
case GL_SHORT:
case GL_UNSIGNED_SHORT:
return _readVertexData3usq3f;
return _readVertexData3us3f;
break;
case GL_INT:
case GL_UNSIGNED_INT:
@ -668,57 +583,74 @@ ReadNormalFunc calcReadNormalFunc() {
}
}
static void _readPositionData(ReadDiffuseFunc func, const GLuint first, const GLuint count, Vertex* it) {
static void _readPositionData(ReadDiffuseFunc func, const GLuint first, const GLuint count, const Vertex* output) {
const GLsizei vstride = ATTRIB_POINTERS.vertex.stride;
const GLubyte* vptr = ((GLubyte*) ATTRIB_POINTERS.vertex.ptr + (first * vstride));
float pos[3];
GLubyte* out = (GLubyte*) output[0].xyz;
uint32_t* flags;
ITERATE(count) {
PREFETCH(vptr + vstride);
func(vptr, (GLubyte*) pos);
it->flags = GPU_CMD_VERTEX;
func(vptr, out);
vptr += vstride;
++it;
/* Set the flags which are 4 bytes before the position. Doing it here saves
* an additional loop */
flags = (uint32_t*) out - 1;
*flags = GPU_CMD_VERTEX;
out += sizeof(Vertex);
}
}
static void _readUVData(ReadUVFunc func, const GLuint first, const GLuint count, Vertex* it) {
static void _readUVData(ReadUVFunc func, const GLuint first, const GLuint count, const Vertex* output) {
const GLsizei uvstride = ATTRIB_POINTERS.uv.stride;
const GLubyte* uvptr = ((GLubyte*) ATTRIB_POINTERS.uv.ptr + (first * uvstride));
GLubyte* out = (GLubyte*) output[0].uv;
ITERATE(count) {
PREFETCH(uvptr + uvstride);
func(uvptr, (GLubyte*) it->uv);
func(uvptr, out);
uvptr += uvstride;
++it;
out += sizeof(Vertex);
}
}
static void _readSTData(ReadUVFunc func, const GLuint first, const GLuint count, VertexExtra* it) {
static void _readSTData(ReadUVFunc func, const GLuint first, const GLuint count, const VertexExtra* extra) {
const GLsizei ststride = ATTRIB_POINTERS.st.stride;
const GLubyte* stptr = ((GLubyte*) ATTRIB_POINTERS.st.ptr + (first * ststride));
GLubyte* out = (GLubyte*) extra[0].st;
ITERATE(count) {
PREFETCH(stptr + ststride);
func(stptr, (GLubyte*) it->st);
func(stptr, out);
stptr += ststride;
++it;
out += sizeof(VertexExtra);
}
}
static void _readNormalData(ReadNormalFunc func, const GLuint first, const GLuint count, VertexExtra* it) {
static void _readNormalData(ReadNormalFunc func, const GLuint first, const GLuint count, const VertexExtra* extra) {
const GLsizei nstride = ATTRIB_POINTERS.normal.stride;
const GLubyte* nptr = ((GLubyte*) ATTRIB_POINTERS.normal.ptr + (first * nstride));
ITERATE(count) {
func(nptr, (GLubyte*) it->nxyz);
nptr += nstride;
GLubyte* out = (GLubyte*) extra[0].nxyz;
if(_glIsNormalizeEnabled()) {
GLfloat* n = (GLfloat*) it->nxyz;
ITERATE(count) {
func(nptr, out);
nptr += nstride;
out += sizeof(VertexExtra);
}
if(_glIsNormalizeEnabled()) {
GLubyte* ptr = (GLubyte*) extra->nxyz;
ITERATE(count) {
GLfloat* n = (GLfloat*) ptr;
float temp = n[0] * n[0];
temp = MATH_fmac(n[1], n[1], temp);
temp = MATH_fmac(n[2], n[2], temp);
@ -727,9 +659,9 @@ static void _readNormalData(ReadNormalFunc func, const GLuint first, const GLuin
n[0] *= ilength;
n[1] *= ilength;
n[2] *= ilength;
}
++it;
ptr += sizeof(VertexExtra);
}
}
}
@ -737,15 +669,18 @@ GL_FORCE_INLINE GLuint diffusePointerSize() {
return (ATTRIB_POINTERS.colour.size == GL_BGRA) ? 4 : ATTRIB_POINTERS.colour.size;
}
static void _readDiffuseData(ReadDiffuseFunc func, const GLuint first, const GLuint count, Vertex* it) {
static void _readDiffuseData(ReadDiffuseFunc func, const GLuint first, const GLuint count, const Vertex* output) {
const GLuint cstride = ATTRIB_POINTERS.colour.stride;
const GLubyte* cptr = ((GLubyte*) ATTRIB_POINTERS.colour.ptr) + (first * cstride);
GLubyte* out = (GLubyte*) output[0].bgra;
ITERATE(count) {
PREFETCH(cptr + cstride);
func(cptr, it->bgra);
func(cptr, out);
cptr += cstride;
++it;
out += sizeof(Vertex);
}
}
@ -813,7 +748,9 @@ typedef struct {
} Float2;
static const Float3 F3Z = {0.0f, 0.0f, 1.0f};
static const Float3 F3ZERO = {0.0f, 0.0f, 0.0f};
static const Float2 F2ZERO = {0.0f, 0.0f};
static const uint32_t U4ONE = ~0;
static void generateElementsFastPath(
SubmissionTarget* target, const GLsizei first, const GLuint count,
@ -900,15 +837,17 @@ static void generateElementsFastPath(
#define POLYMODE QUADS
#define PROCESS_VERTEX_FLAGS(it, i) { \
it->flags = GPU_CMD_VERTEX; \
if(((i + 1) % 4) == 0) { \
Vertex t = *it; \
*it = *(it - 1); \
*(it - 1) = t; \
if((i + 1) % 4 == 0) { \
Vertex* prev = ((it) - 1); \
Vertex t = (*prev); \
*(prev) = *((it)); \
*((it)) = t; \
prev->flags = GPU_CMD_VERTEX; \
it->flags = GPU_CMD_VERTEX_EOL; \
} else { \
it->flags = GPU_CMD_VERTEX; \
} \
}
#include "draw_fastpath.inc"
#undef PROCESS_VERTEX_FLAGS
#undef POLYMODE
@ -925,11 +864,11 @@ static void generateArrays(SubmissionTarget* target, const GLsizei first, const
Vertex* start = _glSubmissionTargetStart(target);
VertexExtra* ve = aligned_vector_at(target->extras, 0);
const ReadPositionFunc pfunc = calcReadPositionFunc();
const ReadDiffuseFunc dfunc = calcReadDiffuseFunc();
const ReadUVFunc uvfunc = calcReadUVFunc();
const ReadNormalFunc nfunc = calcReadNormalFunc();
const ReadUVFunc stfunc = calcReadSTFunc();
ReadPositionFunc pfunc = calcReadPositionFunc();
ReadDiffuseFunc dfunc = calcReadDiffuseFunc();
ReadUVFunc uvfunc = calcReadUVFunc();
ReadNormalFunc nfunc = calcReadNormalFunc();
ReadUVFunc stfunc = calcReadSTFunc();
_readPositionData(pfunc, first, count, start);
_readDiffuseData(dfunc, first, count, start);
@ -947,15 +886,14 @@ static void generate(SubmissionTarget* target, const GLenum mode, const GLsizei
if(indices) {
generateElementsFastPath(target, first, count, indices, type);
} else {
switch(mode) {
case GL_QUADS:
generateArraysFastPath_QUADS(target, first, count);
return; // Don't need to do any more processing
case GL_TRIANGLES:
generateArraysFastPath_TRIS(target, first, count);
return; // Don't need to do any more processing
default:
generateArraysFastPath_ALL(target, first, count);
if(mode == GL_QUADS) {
generateArraysFastPath_QUADS(target, first, count);
return; // Don't need to do any more processing
} else if(mode == GL_TRIANGLES) {
generateArraysFastPath_TRIS(target, first, count);
return; // Don't need to do any more processing
} else {
generateArraysFastPath_ALL(target, first, count);
}
}
} else {
@ -982,7 +920,7 @@ static void generate(SubmissionTarget* target, const GLenum mode, const GLsizei
genTriangleStrip(it, count);
break;
default:
gl_assert(0 && "Not Implemented");
assert(0 && "Not Implemented");
}
}
@ -995,6 +933,24 @@ static void transform(SubmissionTarget* target) {
TransformVertices(vertex, target->count);
}
static void mat_transform3(const float* xyz, const float* xyzOut, const uint32_t count, const uint32_t inStride, const uint32_t outStride) {
const uint8_t* dataIn = (const uint8_t*) xyz;
uint8_t* dataOut = (uint8_t*) xyzOut;
ITERATE(count) {
const float* in = (const float*) dataIn;
float* out = (float*) dataOut;
TransformVec3NoMod(
in,
out
);
dataIn += inStride;
dataOut += outStride;
}
}
static void mat_transform_normal3(const float* xyz, const float* xyzOut, const uint32_t count, const uint32_t inStride, const uint32_t outStride) {
const uint8_t* dataIn = (const uint8_t*) xyz;
uint8_t* dataOut = (uint8_t*) xyzOut;
@ -1062,143 +1018,40 @@ GL_FORCE_INLINE void divide(SubmissionTarget* target) {
}
}
GL_FORCE_INLINE int _calc_pvr_face_culling() {
if(!_glIsCullingEnabled()) {
return GPU_CULLING_SMALL;
} else {
if(_glGetCullFace() == GL_BACK) {
return (_glGetFrontFace() == GL_CW) ? GPU_CULLING_CCW : GPU_CULLING_CW;
} else {
return (_glGetFrontFace() == GL_CCW) ? GPU_CULLING_CCW : GPU_CULLING_CW;
}
}
}
GL_FORCE_INLINE int _calc_pvr_depth_test() {
if(!_glIsDepthTestEnabled()) {
return GPU_DEPTHCMP_ALWAYS;
}
switch(_glGetDepthFunc()) {
case GL_NEVER:
return GPU_DEPTHCMP_NEVER;
case GL_LESS:
return GPU_DEPTHCMP_GREATER;
case GL_EQUAL:
return GPU_DEPTHCMP_EQUAL;
case GL_LEQUAL:
return GPU_DEPTHCMP_GEQUAL;
case GL_GREATER:
return GPU_DEPTHCMP_LESS;
case GL_NOTEQUAL:
return GPU_DEPTHCMP_NOTEQUAL;
case GL_GEQUAL:
return GPU_DEPTHCMP_LEQUAL;
break;
case GL_ALWAYS:
default:
return GPU_DEPTHCMP_ALWAYS;
}
}
GL_FORCE_INLINE int _calcPVRBlendFactor(GLenum factor) {
switch(factor) {
case GL_ZERO:
return GPU_BLEND_ZERO;
case GL_SRC_ALPHA:
return GPU_BLEND_SRCALPHA;
case GL_DST_COLOR:
return GPU_BLEND_DESTCOLOR;
case GL_DST_ALPHA:
return GPU_BLEND_DESTALPHA;
case GL_ONE_MINUS_DST_COLOR:
return GPU_BLEND_INVDESTCOLOR;
case GL_ONE_MINUS_SRC_ALPHA:
return GPU_BLEND_INVSRCALPHA;
case GL_ONE_MINUS_DST_ALPHA:
return GPU_BLEND_INVDESTALPHA;
case GL_ONE:
return GPU_BLEND_ONE;
default:
fprintf(stderr, "Invalid blend mode: %u\n", (unsigned int) factor);
return GPU_BLEND_ONE;
}
}
GL_FORCE_INLINE void _updatePVRBlend(PolyContext* context) {
if(_glIsBlendingEnabled() || _glIsAlphaTestEnabled()) {
context->gen.alpha = GPU_ALPHA_ENABLE;
} else {
context->gen.alpha = GPU_ALPHA_DISABLE;
}
context->blend.src = _calcPVRBlendFactor(_glGetBlendSourceFactor());
context->blend.dst = _calcPVRBlendFactor(_glGetBlendDestFactor());
}
GL_FORCE_INLINE void apply_poly_header(PolyHeader* header, GLboolean multiTextureHeader, PolyList* activePolyList, GLshort textureUnit) {
GL_FORCE_INLINE void push(PolyHeader* header, GLboolean multiTextureHeader, PolyList* activePolyList, GLshort textureUnit) {
TRACE();
// Compile the header
PolyContext ctx;
memset(&ctx, 0, sizeof(PolyContext));
PolyContext cxt = *_glGetPVRContext();
cxt.list_type = activePolyList->list_type;
ctx.list_type = activePolyList->list_type;
ctx.fmt.color = GPU_CLRFMT_ARGBPACKED;
ctx.fmt.uv = GPU_UVFMT_32BIT;
ctx.gen.color_clamp = GPU_CLRCLAMP_DISABLE;
ctx.gen.culling = _calc_pvr_face_culling();
ctx.depth.comparison = _calc_pvr_depth_test();
ctx.depth.write = _glIsDepthWriteEnabled() ? GPU_DEPTHWRITE_ENABLE : GPU_DEPTHWRITE_DISABLE;
ctx.gen.shading = (_glGetShadeModel() == GL_SMOOTH) ? GPU_SHADE_GOURAUD : GPU_SHADE_FLAT;
if(_glIsScissorTestEnabled()) {
ctx.gen.clip_mode = GPU_USERCLIP_INSIDE;
} else {
ctx.gen.clip_mode = GPU_USERCLIP_DISABLE;
}
if(_glIsFogEnabled()) {
ctx.gen.fog_type = GPU_FOG_TABLE;
} else {
ctx.gen.fog_type = GPU_FOG_DISABLE;
}
_updatePVRBlend(&ctx);
if(ctx.list_type == GPU_LIST_OP_POLY) {
if(cxt.list_type == GPU_LIST_OP_POLY) {
/* Opaque polys are always one/zero */
ctx.blend.src = GPU_BLEND_ONE;
ctx.blend.dst = GPU_BLEND_ZERO;
} else if(ctx.list_type == GPU_LIST_PT_POLY) {
cxt.blend.src = GPU_BLEND_ONE;
cxt.blend.dst = GPU_BLEND_ZERO;
} else if(cxt.list_type == GPU_LIST_PT_POLY) {
/* Punch-through polys require fixed blending and depth modes */
ctx.blend.src = GPU_BLEND_SRCALPHA;
ctx.blend.dst = GPU_BLEND_INVSRCALPHA;
ctx.depth.comparison = GPU_DEPTHCMP_LEQUAL;
} else if(ctx.list_type == GPU_LIST_TR_POLY && AUTOSORT_ENABLED) {
cxt.blend.src = GPU_BLEND_SRCALPHA;
cxt.blend.dst = GPU_BLEND_INVSRCALPHA;
cxt.depth.comparison = GPU_DEPTHCMP_LEQUAL;
} else if(cxt.list_type == GPU_LIST_TR_POLY && AUTOSORT_ENABLED) {
/* Autosort mode requires this mode for transparent polys */
ctx.depth.comparison = GPU_DEPTHCMP_GEQUAL;
cxt.depth.comparison = GPU_DEPTHCMP_GEQUAL;
}
_glUpdatePVRTextureContext(&ctx, textureUnit);
_glUpdatePVRTextureContext(&cxt, textureUnit);
if(multiTextureHeader) {
gl_assert(ctx.list_type == GPU_LIST_TR_POLY);
assert(cxt.list_type == GPU_LIST_TR_POLY);
ctx.gen.alpha = GPU_ALPHA_ENABLE;
ctx.txr.alpha = GPU_TXRALPHA_ENABLE;
ctx.blend.src = GPU_BLEND_ZERO;
ctx.blend.dst = GPU_BLEND_DESTCOLOR;
ctx.depth.comparison = GPU_DEPTHCMP_EQUAL;
cxt.gen.alpha = GPU_ALPHA_ENABLE;
cxt.txr.alpha = GPU_TXRALPHA_ENABLE;
cxt.blend.src = GPU_BLEND_ZERO;
cxt.blend.dst = GPU_BLEND_DESTCOLOR;
cxt.depth.comparison = GPU_DEPTHCMP_EQUAL;
}
CompilePolyHeader(header, &ctx);
/* Force bits 18 and 19 on to switch to 6 triangle strips */
header->cmd |= 0xC0000;
CompilePolyHeader(header, &cxt);
/* Post-process the vertex list */
/*
@ -1216,29 +1069,7 @@ GL_FORCE_INLINE void apply_poly_header(PolyHeader* header, GLboolean multiTextur
#define DEBUG_CLIPPING 0
static AlignedVector VERTEX_EXTRAS;
static SubmissionTarget SUBMISSION_TARGET;
void _glInitSubmissionTarget() {
SubmissionTarget* target = &SUBMISSION_TARGET;
target->extras = NULL;
target->count = 0;
target->output = NULL;
target->header_offset = target->start_offset = 0;
aligned_vector_init(&VERTEX_EXTRAS, sizeof(VertexExtra));
target->extras = &VERTEX_EXTRAS;
}
GL_FORCE_INLINE void submitVertices(GLenum mode, GLsizei first, GLuint count, GLenum type, const GLvoid* indices) {
SubmissionTarget* const target = &SUBMISSION_TARGET;
AlignedVector* const extras = target->extras;
TRACE();
/* Do nothing if vertices aren't enabled */
@ -1251,59 +1082,55 @@ GL_FORCE_INLINE void submitVertices(GLenum mode, GLsizei first, GLuint count, GL
return;
}
/* Polygons are treated as triangle fans, the only time this would be a
* problem is if we supported glPolygonMode(..., GL_LINE) but we don't.
* We optimise the triangle and quad cases.
*/
if(mode == GL_POLYGON) {
switch(count) {
case 2:
mode = GL_LINES;
break;
case 3:
mode = GL_TRIANGLES;
break;
case 4:
mode = GL_QUADS;
break;
default:
mode = GL_TRIANGLE_FAN;
}
}
if(mode == GL_LINE_STRIP || mode == GL_LINES) {
fprintf(stderr, "Line drawing is currently unsupported\n");
return;
}
static SubmissionTarget* target = NULL;
static AlignedVector extras;
/* Initialization of the target and extras */
if(!target) {
target = (SubmissionTarget*) malloc(sizeof(SubmissionTarget));
target->extras = NULL;
target->count = 0;
target->output = NULL;
target->header_offset = target->start_offset = 0;
aligned_vector_init(&extras, sizeof(VertexExtra));
target->extras = &extras;
}
/* Polygons are treated as triangle fans, the only time this would be a
* problem is if we supported glPolygonMode(..., GL_LINE) but we don't.
* We optimise the triangle and quad cases.
*/
if(mode == GL_POLYGON) {
if(count == 3) {
mode = GL_TRIANGLES;
} else if(count == 4) {
mode = GL_QUADS;
} else {
mode = GL_TRIANGLE_FAN;
}
}
// We don't handle this any further, so just make sure we never pass it down */
gl_assert(mode != GL_POLYGON);
assert(mode != GL_POLYGON);
target->output = _glActivePolyList();
gl_assert(target->output);
gl_assert(extras);
uint32_t vector_size = aligned_vector_size(&target->output->vector);
GLboolean header_required = (vector_size == 0) || _glGPUStateIsDirty();
target->count = (mode == GL_TRIANGLE_FAN) ? ((count - 2) * 3) : count;
target->header_offset = vector_size;
target->start_offset = target->header_offset + (header_required ? 1 : 0);
target->header_offset = target->output->vector.size;
target->start_offset = target->header_offset + 1;
gl_assert(target->start_offset >= target->header_offset);
gl_assert(target->count);
assert(target->count);
/* Make sure we have enough room for all the "extra" data */
aligned_vector_resize(extras, target->count);
aligned_vector_resize(&extras, target->count);
/* Make room for the vertices and header */
aligned_vector_extend(&target->output->vector, target->count + (header_required));
if(header_required) {
apply_poly_header(_glSubmissionTargetHeader(target), GL_FALSE, target->output, 0);
_glGPUStateMarkClean();
}
aligned_vector_extend(&target->output->vector, target->count + 1);
/* If we're lighting, then we need to do some work in
* eye-space, so we only transform vertices by the modelview
@ -1312,7 +1139,7 @@ GL_FORCE_INLINE void submitVertices(GLenum mode, GLsizei first, GLuint count, GL
* If we're not doing lighting though we can optimise by taking
* vertices straight to clip-space */
if(_glIsLightingEnabled()) {
if(LIGHTING_ENABLED) {
_glMatrixLoadModelView();
} else {
_glMatrixLoadModelViewProjection();
@ -1327,7 +1154,7 @@ GL_FORCE_INLINE void submitVertices(GLenum mode, GLsizei first, GLuint count, GL
transform(target);
}
if(_glIsLightingEnabled()){
if(LIGHTING_ENABLED){
light(target);
/* OK eye-space work done, now move into clip space */
@ -1335,48 +1162,51 @@ GL_FORCE_INLINE void submitVertices(GLenum mode, GLsizei first, GLuint count, GL
transform(target);
}
// /*
// Now, if multitexturing is enabled, we want to send exactly the same vertices again, except:
// - We want to enable blending, and send them to the TR list
// - We want to set the depth func to GL_EQUAL
// - We want to set the second texture ID
// - We want to set the uv coordinates to the passed st ones
// */
push(_glSubmissionTargetHeader(target), GL_FALSE, target->output, 0);
// if(!TEXTURES_ENABLED[1]) {
// /* Multitexture actively disabled */
// return;
// }
/*
Now, if multitexturing is enabled, we want to send exactly the same vertices again, except:
- We want to enable blending, and send them to the TR list
- We want to set the depth func to GL_EQUAL
- We want to set the second texture ID
- We want to set the uv coordinates to the passed st ones
*/
// TextureObject* texture1 = _glGetTexture1();
if(!TEXTURES_ENABLED[1]) {
/* Multitexture actively disabled */
return;
}
// /* Multitexture implicitly disabled */
// if(!texture1 || ((ENABLED_VERTEX_ATTRIBUTES & ST_ENABLED_FLAG) != ST_ENABLED_FLAG)) {
// /* Multitexture actively disabled */
// return;
// }
TextureObject* texture1 = _glGetTexture1();
// /* Push back a copy of the list to the transparent poly list, including the header
// (hence the + 1)
// */
// Vertex* vertex = aligned_vector_push_back(
// &_glTransparentPolyList()->vector, (Vertex*) _glSubmissionTargetHeader(target), target->count + 1
// );
/* Multitexture implicitly disabled */
if(!texture1 || ((ENABLED_VERTEX_ATTRIBUTES & ST_ENABLED_FLAG) != ST_ENABLED_FLAG)) {
/* Multitexture actively disabled */
return;
}
// gl_assert(vertex);
/* Push back a copy of the list to the transparent poly list, including the header
(hence the + 1)
*/
Vertex* vertex = aligned_vector_push_back(
&_glTransparentPolyList()->vector, (Vertex*) _glSubmissionTargetHeader(target), target->count + 1
);
// PolyHeader* mtHeader = (PolyHeader*) vertex++;
// /* Send the buffer again to the transparent list */
// apply_poly_header(mtHeader, GL_TRUE, _glTransparentPolyList(), 1);
assert(vertex);
// /* Replace the UV coordinates with the ST ones */
// VertexExtra* ve = aligned_vector_at(target->extras, 0);
// ITERATE(target->count) {
// vertex->uv[0] = ve->st[0];
// vertex->uv[1] = ve->st[1];
// ++vertex;
// ++ve;
// }
PolyHeader* mtHeader = (PolyHeader*) vertex++;
/* Replace the UV coordinates with the ST ones */
VertexExtra* ve = aligned_vector_at(target->extras, 0);
ITERATE(target->count) {
vertex->uv[0] = ve->st[0];
vertex->uv[1] = ve->st[1];
++vertex;
++ve;
}
/* Send the buffer again to the transparent list */
push(mtHeader, GL_TRUE, _glTransparentPolyList(), 1);
}
void APIENTRY glDrawElements(GLenum mode, GLsizei count, GLenum type, const GLvoid* indices) {
@ -1480,8 +1310,6 @@ void APIENTRY glTexCoordPointer(GLint size, GLenum type, GLsizei stride, const G
return;
}
stride = (stride) ? stride : size * byte_size(type);
AttribPointer* tointer = (ACTIVE_CLIENT_TEXTURE == 0) ? &ATTRIB_POINTERS.uv : &ATTRIB_POINTERS.st;
if(_glComparePointers(tointer, size, type, stride, pointer)) {
@ -1490,7 +1318,7 @@ void APIENTRY glTexCoordPointer(GLint size, GLenum type, GLsizei stride, const G
}
tointer->ptr = pointer;
tointer->stride = stride;
tointer->stride = (stride) ? stride : size * byte_size(type);
tointer->type = type;
tointer->size = size;
@ -1505,15 +1333,13 @@ void APIENTRY glVertexPointer(GLint size, GLenum type, GLsizei stride, const G
return;
}
stride = (stride) ? stride : (size * byte_size(ATTRIB_POINTERS.vertex.type));
if(_glComparePointers(&ATTRIB_POINTERS.vertex, size, type, stride, pointer)) {
// No Change
return;
}
ATTRIB_POINTERS.vertex.ptr = pointer;
ATTRIB_POINTERS.vertex.stride = stride;
ATTRIB_POINTERS.vertex.stride = (stride) ? stride : (size * byte_size(ATTRIB_POINTERS.vertex.type));
ATTRIB_POINTERS.vertex.type = type;
ATTRIB_POINTERS.vertex.size = size;
@ -1528,8 +1354,6 @@ void APIENTRY glColorPointer(GLint size, GLenum type, GLsizei stride, const G
return;
}
stride = (stride) ? stride : ((size == GL_BGRA) ? 4 : size) * byte_size(type);
if(_glComparePointers(&ATTRIB_POINTERS.colour, size, type, stride, pointer)) {
// No Change
return;
@ -1538,7 +1362,7 @@ void APIENTRY glColorPointer(GLint size, GLenum type, GLsizei stride, const G
ATTRIB_POINTERS.colour.ptr = pointer;
ATTRIB_POINTERS.colour.type = type;
ATTRIB_POINTERS.colour.size = size;
ATTRIB_POINTERS.colour.stride = stride;
ATTRIB_POINTERS.colour.stride = (stride) ? stride : ((size == GL_BGRA) ? 4 : size) * byte_size(type);
_glRecalcFastPath();
}
@ -1561,8 +1385,6 @@ void APIENTRY glNormalPointer(GLenum type, GLsizei stride, const GLvoid * poin
return;
}
stride = (stride) ? stride : ATTRIB_POINTERS.normal.size * byte_size(type);
if(_glComparePointers(&ATTRIB_POINTERS.normal, 3, type, stride, pointer)) {
// No Change
return;
@ -1570,7 +1392,7 @@ void APIENTRY glNormalPointer(GLenum type, GLsizei stride, const GLvoid * poin
ATTRIB_POINTERS.normal.ptr = pointer;
ATTRIB_POINTERS.normal.size = (type == GL_UNSIGNED_INT_2_10_10_10_REV) ? 1 : 3;
ATTRIB_POINTERS.normal.stride = stride;
ATTRIB_POINTERS.normal.stride = (stride) ? stride : ATTRIB_POINTERS.normal.size * byte_size(type);
ATTRIB_POINTERS.normal.type = type;
_glRecalcFastPath();

View File

@ -5,123 +5,75 @@
MAKE_FUNC(POLYMODE)
{
static const float w = 1.0f;
if(!(ENABLED_VERTEX_ATTRIBUTES & VERTEX_ENABLED_FLAG)) {
const Vertex* const start = _glSubmissionTargetStart(target);
const VertexExtra* const ve_start = aligned_vector_at(target->extras, 0);
const GLuint vstride = ATTRIB_POINTERS.vertex.stride;
GLuint uvstride = ATTRIB_POINTERS.uv.stride;
GLuint ststride = ATTRIB_POINTERS.st.stride;
GLuint dstride = ATTRIB_POINTERS.colour.stride;
GLuint nstride = ATTRIB_POINTERS.normal.stride;
const GLubyte* pos = (ENABLED_VERTEX_ATTRIBUTES & VERTEX_ENABLED_FLAG) ? ATTRIB_POINTERS.vertex.ptr + (first * vstride) : NULL;
const GLubyte* uv = (ENABLED_VERTEX_ATTRIBUTES & UV_ENABLED_FLAG) ? ATTRIB_POINTERS.uv.ptr + (first * uvstride) : NULL;
const GLubyte* col = (ENABLED_VERTEX_ATTRIBUTES & DIFFUSE_ENABLED_FLAG) ? ATTRIB_POINTERS.colour.ptr + (first * dstride) : NULL;
const GLubyte* st = (ENABLED_VERTEX_ATTRIBUTES & ST_ENABLED_FLAG) ? ATTRIB_POINTERS.st.ptr + (first * ststride) : NULL;
const GLubyte* n = (ENABLED_VERTEX_ATTRIBUTES & NORMAL_ENABLED_FLAG) ? ATTRIB_POINTERS.normal.ptr + (first * nstride) : NULL;
const float w = 1.0f;
if(!pos) {
/* If we don't have vertices, do nothing */
return;
}
/* This is the best value we have. PROCESS_VERTEX_FLAGS needs to operate on quads and tris and so
this need to be divisible by 4 and 3. Even though we should be able to go much higher than this
and still be cache-local, trial and error says otherwise... */
if(!col) {
col = (GLubyte*) &U4ONE;
dstride = 0;
}
#define BATCH_SIZE 60
if(!uv) {
uv = (GLubyte*) &F2ZERO;
uvstride = 0;
}
GLuint min = 0;
GLuint stride;
const GLubyte* ptr;
Vertex* it;
VertexExtra* ve;
if(!st) {
st = (GLubyte*) &F2ZERO;
ststride = 0;
}
if(!n) {
n = (GLubyte*) &F3Z;
nstride = 0;
}
for(min = 0; min < count; min += BATCH_SIZE) {
const Vertex* start = ((Vertex*) _glSubmissionTargetStart(target)) + min;
const int_fast32_t loop = ((min + BATCH_SIZE) > count) ? count - min : BATCH_SIZE;
const int offset = (first + min);
VertexExtra* ve = (VertexExtra*) ve_start;
Vertex* it = (Vertex*) start;
stride = ATTRIB_POINTERS.uv.stride;
ptr = (ENABLED_VERTEX_ATTRIBUTES & UV_ENABLED_FLAG) ? ATTRIB_POINTERS.uv.ptr + ((first + min) * stride) : NULL;
it = (Vertex*) start;
for(int_fast32_t i = 0; i < count; ++i) {
TransformVertex((const float*) pos, &w, it->xyz, &it->w);
pos += vstride;
PREFETCH(pos);
if(ptr) {
PREFETCH(ptr);
for(int_fast32_t i = 0; i < loop; ++i, ++it) {
PREFETCH(ptr + stride);
it->uv[0] = ((float*) ptr)[0];
it->uv[1] = ((float*) ptr)[1];
ptr += stride;
}
} else {
for(int_fast32_t i = 0; i < loop; ++i, ++it) {
it->uv[0] = 0;
it->uv[1] = 0;
}
}
*((Float2*) it->uv) = *((Float2*) uv);
uv += uvstride;
PREFETCH(uv);
stride = ATTRIB_POINTERS.colour.stride;
ptr = (ENABLED_VERTEX_ATTRIBUTES & DIFFUSE_ENABLED_FLAG) ? ATTRIB_POINTERS.colour.ptr + (offset * stride) : NULL;
it = (Vertex*) start;
*((uint32_t*) it->bgra) = *((uint32_t*) col);
col += dstride;
PREFETCH(col);
if(ptr) {
PREFETCH(ptr);
for(int_fast32_t i = 0; i < loop; ++i, ++it) {
PREFETCH(ptr + stride);
it->bgra[0] = ptr[0];
it->bgra[1] = ptr[1];
it->bgra[2] = ptr[2];
it->bgra[3] = ptr[3];
ptr += stride;
}
} else {
for(int_fast32_t i = 0; i < loop; ++i, ++it) {
*((uint32_t*) it->bgra) = ~0;
}
}
*((Float2*) ve->st) = *((Float2*) st);
st += ststride;
PREFETCH(st);
stride = ATTRIB_POINTERS.vertex.stride;
ptr = ATTRIB_POINTERS.vertex.ptr + (offset * stride);
it = (Vertex*) start;
*((Float3*) ve->nxyz) = *((Float3*) n);
n += nstride;
PREFETCH(n);
PREFETCH(ptr);
for(int_fast32_t i = 0; i < loop; ++i, ++it) {
PREFETCH(ptr + stride);
TransformVertex((const float*) ptr, &w, it->xyz, &it->w);
PROCESS_VERTEX_FLAGS(it, min + i);
ptr += stride;
}
PROCESS_VERTEX_FLAGS(it, i);
start = aligned_vector_at(target->extras, min);
stride = ATTRIB_POINTERS.st.stride;
ptr = (ENABLED_VERTEX_ATTRIBUTES & ST_ENABLED_FLAG) ? ATTRIB_POINTERS.st.ptr + (offset * stride) : NULL;
ve = (VertexExtra*) start;
if(ptr) {
PREFETCH(ptr);
for(int_fast32_t i = 0; i < loop; ++i, ++ve) {
PREFETCH(ptr + stride);
ve->st[0] = ((float*) ptr)[0];
ve->st[1] = ((float*) ptr)[1];
ptr += stride;
}
} else {
for(int_fast32_t i = 0; i < loop; ++i, ++ve) {
ve->st[0] = 0;
ve->st[1] = 0;
}
}
stride = ATTRIB_POINTERS.normal.stride;
ptr = (ENABLED_VERTEX_ATTRIBUTES & NORMAL_ENABLED_FLAG) ? ATTRIB_POINTERS.normal.ptr + (offset * stride) : NULL;
ve = (VertexExtra*) start;
if(ptr) {
PREFETCH(ptr);
for(int_fast32_t i = 0; i < loop; ++i, ++ve) {
PREFETCH(ptr + stride);
ve->nxyz[0] = ((float*) ptr)[0];
ve->nxyz[1] = ((float*) ptr)[1];
ve->nxyz[2] = ((float*) ptr)[2];
ptr += stride;
}
} else {
for(int_fast32_t i = 0; i < loop; ++i, ++ve) {
ve->nxyz[0] = 0;
ve->nxyz[1] = 0;
ve->nxyz[2] = 0;
}
}
++it;
++ve;
}
}

View File

@ -46,22 +46,10 @@ void APIENTRY glKosInitConfig(GLdcConfig* config) {
config->initial_pt_capacity = 512 * 3;
config->initial_tr_capacity = 1024 * 3;
config->initial_immediate_capacity = 1024 * 3;
// RGBA4444 is the fastest general format - 8888 will cause a perf issue
config->internal_palette_format = GL_RGBA4;
config->texture_twiddle = GL_TRUE;
config->internal_palette_format = GL_RGBA8;
}
static bool _initialized = false;
void APIENTRY glKosInitEx(GLdcConfig* config) {
if(_initialized) {
return;
}
_initialized = true;
TRACE();
printf("\nWelcome to GLdc! Git revision: %s\n\n", GLDC_VERSION);
@ -70,7 +58,6 @@ void APIENTRY glKosInitEx(GLdcConfig* config) {
AUTOSORT_ENABLED = config->autosort_enabled;
_glInitSubmissionTarget();
_glInitMatrices();
_glInitAttributePointers();
_glInitContext();
@ -82,10 +69,6 @@ void APIENTRY glKosInitEx(GLdcConfig* config) {
_glInitTextures();
if(config->texture_twiddle) {
glEnable(GL_TEXTURE_TWIDDLE_KOS);
}
OP_LIST.list_type = GPU_LIST_OP_POLY;
PT_LIST.list_type = GPU_LIST_PT_POLY;
TR_LIST.list_type = GPU_LIST_TR_POLY;
@ -99,12 +82,6 @@ void APIENTRY glKosInitEx(GLdcConfig* config) {
aligned_vector_reserve(&TR_LIST.vector, config->initial_tr_capacity);
}
void APIENTRY glKosShutdown() {
aligned_vector_clear(&OP_LIST.vector);
aligned_vector_clear(&PT_LIST.vector);
aligned_vector_clear(&TR_LIST.vector);
}
void APIENTRY glKosInit() {
GLdcConfig config;
glKosInitConfig(&config);
@ -115,23 +92,17 @@ void APIENTRY glKosSwapBuffers() {
TRACE();
SceneBegin();
if(aligned_vector_header(&OP_LIST.vector)->size > 2) {
SceneListBegin(GPU_LIST_OP_POLY);
SceneListSubmit((Vertex*) aligned_vector_front(&OP_LIST.vector), aligned_vector_size(&OP_LIST.vector));
SceneListFinish();
}
SceneListBegin(GPU_LIST_OP_POLY);
SceneListSubmit(OP_LIST.vector.data, OP_LIST.vector.size);
SceneListFinish();
if(aligned_vector_header(&PT_LIST.vector)->size > 2) {
SceneListBegin(GPU_LIST_PT_POLY);
SceneListSubmit((Vertex*) aligned_vector_front(&PT_LIST.vector), aligned_vector_size(&PT_LIST.vector));
SceneListFinish();
}
SceneListBegin(GPU_LIST_PT_POLY);
SceneListSubmit(PT_LIST.vector.data, PT_LIST.vector.size);
SceneListFinish();
if(aligned_vector_header(&TR_LIST.vector)->size > 2) {
SceneListBegin(GPU_LIST_TR_POLY);
SceneListSubmit((Vertex*) aligned_vector_front(&TR_LIST.vector), aligned_vector_size(&TR_LIST.vector));
SceneListFinish();
}
SceneListBegin(GPU_LIST_TR_POLY);
SceneListSubmit(TR_LIST.vector.data, TR_LIST.vector.size);
SceneListFinish();
SceneFinish();
aligned_vector_clear(&OP_LIST.vector);
@ -139,4 +110,4 @@ void APIENTRY glKosSwapBuffers() {
aligned_vector_clear(&TR_LIST.vector);
_glApplyScissor(true);
}
}

View File

@ -1,4 +1,5 @@
#include <stdio.h>
#include <assert.h>
#include "private.h"
@ -196,7 +197,7 @@ static GL_NO_INSTRUMENT GLboolean _glCalculateAverageTexel(GLuint pvrFormat, con
*d1 = PACK_ARGB4444(a, r, g, b);
} else {
gl_assert(format == ARGB1555);
assert(format == ARGB1555);
GLushort* s1 = (GLushort*) src1;
GLushort* s2 = (GLushort*) src2;
@ -245,8 +246,8 @@ GLboolean _glGenerateMipmapTwiddled(const GLuint pvrFormat, const GLubyte* prevD
const GLubyte* s4 = s3 + stride;
GLubyte* t = &thisData[j * stride];
gl_assert(s4 < prevData + (lastHeight * lastWidth * stride));
gl_assert(t < thisData + (thisHeight * thisWidth * stride));
assert(s4 < prevData + (lastHeight * lastWidth * stride));
assert(t < thisData + (thisHeight * thisWidth * stride));
_glCalculateAverageTexel(pvrFormat, s1, s2, s3, s4, t);
}
@ -254,7 +255,7 @@ GLboolean _glGenerateMipmapTwiddled(const GLuint pvrFormat, const GLubyte* prevD
return GL_TRUE;
}
void APIENTRY glGenerateMipmap(GLenum target) {
void APIENTRY glGenerateMipmapEXT(GLenum target) {
if(target != GL_TEXTURE_2D) {
_glKosThrowError(GL_INVALID_OPERATION, __func__);
return;
@ -322,7 +323,7 @@ void APIENTRY glGenerateMipmap(GLenum target) {
prevHeight = thisHeight;
}
gl_assert(_glIsMipmapComplete(tex));
assert(_glIsMipmapComplete(tex));
}
/* generate mipmaps for any image provided by the user and then pass them to OpenGL */
@ -334,7 +335,7 @@ GLAPI GLvoid APIENTRY gluBuild2DMipmaps(GLenum target, GLint internalFormat,
unsigned byte data, and finally the data itself. */
glTexImage2D(GL_TEXTURE_2D, 0, 3, width, height, 0, GL_RGB, GL_UNSIGNED_BYTE, data);
glGenerateMipmap(GL_TEXTURE_2D);
glGenerateMipmapEXT(GL_TEXTURE_2D);
}
GLenum APIENTRY glCheckFramebufferStatusEXT(GLenum target) {

View File

@ -1,20 +0,0 @@
#ifndef NDEBUG
/* We're debugging, use normal assert */
#include <assert.h>
#define gl_assert assert
#else
/* Release mode, use our custom assert */
#include <stdio.h>
#include <stdlib.h>
#define gl_assert(x) \
do {\
if(!(x)) {\
fprintf(stderr, "Assertion failed at %s:%d\n", __FILE__, __LINE__);\
exit(1);\
}\
} while(0); \
#endif

View File

@ -17,10 +17,10 @@ extern inline GLuint _glRecalcFastPath();
GLboolean IMMEDIATE_MODE_ACTIVE = GL_FALSE;
static GLenum ACTIVE_POLYGON_MODE = GL_TRIANGLES;
static GLfloat __attribute__((aligned(32))) NORMAL[3] = {0.0f, 0.0f, 1.0f};
static GLubyte __attribute__((aligned(32))) COLOR[4] = {255, 255, 255, 255}; /* ARGB order for speed */
static GLfloat __attribute__((aligned(32))) UV_COORD[2] = {0.0f, 0.0f};
static GLfloat __attribute__((aligned(32))) ST_COORD[2] = {0.0f, 0.0f};
static GLfloat NORMAL[3] = {0.0f, 0.0f, 1.0f};
static GLubyte COLOR[4] = {255, 255, 255, 255}; /* ARGB order for speed */
static GLfloat UV_COORD[2] = {0.0f, 0.0f};
static GLfloat ST_COORD[2] = {0.0f, 0.0f};
static AlignedVector VERTICES;
static AttribPointerList IM_ATTRIBS;
@ -30,7 +30,7 @@ static AttribPointerList IM_ATTRIBS;
can be applied faster */
static GLuint IM_ENABLED_VERTEX_ATTRIBUTES = 0;
typedef struct __attribute__((aligned(32))) {
typedef struct {
GLfloat x;
GLfloat y;
GLfloat z;
@ -50,7 +50,7 @@ void _glInitImmediateMode(GLuint initial_size) {
aligned_vector_init(&VERTICES, sizeof(IMVertex));
aligned_vector_reserve(&VERTICES, initial_size);
IM_ATTRIBS.vertex.ptr = aligned_vector_front(&VERTICES);
IM_ATTRIBS.vertex.ptr = VERTICES.data;
IM_ATTRIBS.vertex.size = 3;
IM_ATTRIBS.vertex.type = GL_FLOAT;
IM_ATTRIBS.vertex.stride = sizeof(IMVertex);
@ -161,27 +161,31 @@ void APIENTRY glColor3fv(const GLfloat* v) {
void APIENTRY glVertex3f(GLfloat x, GLfloat y, GLfloat z) {
IM_ENABLED_VERTEX_ATTRIBUTES |= VERTEX_ENABLED_FLAG;
unsigned int cap = VERTICES.capacity;
IMVertex* vert = aligned_vector_extend(&VERTICES, 1);
/* Resizing could've invalidated the pointers */
IM_ATTRIBS.vertex.ptr = VERTICES.data;
IM_ATTRIBS.uv.ptr = IM_ATTRIBS.vertex.ptr + 12;
IM_ATTRIBS.st.ptr = IM_ATTRIBS.uv.ptr + 8;
IM_ATTRIBS.colour.ptr = IM_ATTRIBS.st.ptr + 8;
IM_ATTRIBS.normal.ptr = IM_ATTRIBS.colour.ptr + 4;
if(cap != VERTICES.capacity) {
/* Resizing could've invalidated the pointers */
IM_ATTRIBS.vertex.ptr = VERTICES.data;
IM_ATTRIBS.uv.ptr = IM_ATTRIBS.vertex.ptr + (sizeof(GLfloat) * 3);
IM_ATTRIBS.st.ptr = IM_ATTRIBS.vertex.ptr + (sizeof(GLfloat) * 5);
IM_ATTRIBS.colour.ptr = IM_ATTRIBS.vertex.ptr + (sizeof(GLfloat) * 7);
IM_ATTRIBS.normal.ptr = IM_ATTRIBS.vertex.ptr + (sizeof(GLfloat) * 7) + sizeof(uint32_t);
}
uint32_t* dest = (uint32_t*) &vert->x;
*(dest++) = *((uint32_t*) &x);
*(dest++) = *((uint32_t*) &y);
*(dest++) = *((uint32_t*) &z);
*(dest++) = *((uint32_t*) &UV_COORD[0]);
*(dest++) = *((uint32_t*) &UV_COORD[1]);
*(dest++) = *((uint32_t*) &ST_COORD[0]);
*(dest++) = *((uint32_t*) &ST_COORD[1]);
*(dest++) = *((uint32_t*) COLOR);
*(dest++) = *((uint32_t*) &NORMAL[0]);
*(dest++) = *((uint32_t*) &NORMAL[1]);
*(dest++) = *((uint32_t*) &NORMAL[2]);
vert->x = x;
vert->y = y;
vert->z = z;
vert->u = UV_COORD[0];
vert->v = UV_COORD[1];
vert->s = ST_COORD[0];
vert->t = ST_COORD[1];
*((uint32_t*) vert->bgra) = *((uint32_t*) COLOR);
vert->nx = NORMAL[0];
vert->ny = NORMAL[1];
vert->nz = NORMAL[2];
}
void APIENTRY glVertex3fv(const GLfloat* v) {
@ -271,13 +275,13 @@ void APIENTRY glEnd() {
#ifndef NDEBUG
// Immediate mode should always activate the fast path
GLuint fastPathEnabled = _glRecalcFastPath();
gl_assert(fastPathEnabled);
assert(fastPathEnabled);
#else
/* If we're not debugging, set to true - we assume we haven't broken it! */
FAST_PATH_ENABLED = GL_TRUE;
#endif
glDrawArrays(ACTIVE_POLYGON_MODE, 0, aligned_vector_header(&VERTICES)->size);
glDrawArrays(ACTIVE_POLYGON_MODE, 0, VERTICES.size);
ATTRIB_POINTERS = stashed_attrib_pointers;

View File

@ -1,3 +1,4 @@
#include <assert.h>
#include <stdio.h>
#include <string.h>
#include <math.h>
@ -12,107 +13,126 @@
* multiplier ends up less than this value */
#define ATTENUATION_THRESHOLD 100.0f
static GLfloat SCENE_AMBIENT [] = {0.2f, 0.2f, 0.2f, 1.0f};
static GLboolean VIEWER_IN_EYE_COORDINATES = GL_TRUE;
static GLenum COLOR_CONTROL = GL_SINGLE_COLOR;
void _glPrecalcLightingValues(GLuint mask) {
static GLenum COLOR_MATERIAL_MODE = GL_AMBIENT_AND_DIFFUSE;
#define AMBIENT_MASK 1
#define DIFFUSE_MASK 2
#define EMISSION_MASK 4
#define SPECULAR_MASK 8
#define SCENE_AMBIENT_MASK 16
static GLenum COLOR_MATERIAL_MASK = AMBIENT_MASK | DIFFUSE_MASK;
static LightSource LIGHTS[MAX_GLDC_LIGHTS];
static GLuint ENABLED_LIGHT_COUNT = 0;
static Material MATERIAL;
GL_FORCE_INLINE void _glPrecalcLightingValues(GLuint mask);
static void recalcEnabledLights() {
GLubyte i;
ENABLED_LIGHT_COUNT = 0;
for(i = 0; i < MAX_GLDC_LIGHTS; ++i) {
if(LIGHTS[i].isEnabled) {
ENABLED_LIGHT_COUNT++;
}
}
}
void _glInitLights() {
static GLfloat ONE [] = {1.0f, 1.0f, 1.0f, 1.0f};
static GLfloat ZERO [] = {0.0f, 0.0f, 0.0f, 1.0f};
static GLfloat PARTIAL [] = {0.2f, 0.2f, 0.2f, 1.0f};
static GLfloat MOSTLY [] = {0.8f, 0.8f, 0.8f, 1.0f};
memcpy(MATERIAL.ambient, PARTIAL, sizeof(GLfloat) * 4);
memcpy(MATERIAL.diffuse, MOSTLY, sizeof(GLfloat) * 4);
memcpy(MATERIAL.specular, ZERO, sizeof(GLfloat) * 4);
memcpy(MATERIAL.emissive, ZERO, sizeof(GLfloat) * 4);
MATERIAL.exponent = 0.0f;
GLubyte i;
for(i = 0; i < MAX_GLDC_LIGHTS; ++i) {
memcpy(LIGHTS[i].ambient, ZERO, sizeof(GLfloat) * 4);
memcpy(LIGHTS[i].diffuse, ONE, sizeof(GLfloat) * 4);
memcpy(LIGHTS[i].specular, ONE, sizeof(GLfloat) * 4);
if(i > 0) {
memcpy(LIGHTS[i].diffuse, ZERO, sizeof(GLfloat) * 4);
memcpy(LIGHTS[i].specular, ZERO, sizeof(GLfloat) * 4);
}
LIGHTS[i].position[0] = LIGHTS[i].position[1] = LIGHTS[i].position[3] = 0.0f;
LIGHTS[i].position[2] = 1.0f;
LIGHTS[i].isDirectional = GL_TRUE;
LIGHTS[i].isEnabled = GL_FALSE;
LIGHTS[i].spot_direction[0] = LIGHTS[i].spot_direction[1] = 0.0f;
LIGHTS[i].spot_direction[2] = -1.0f;
LIGHTS[i].spot_exponent = 0.0f;
LIGHTS[i].spot_cutoff = 180.0f;
LIGHTS[i].constant_attenuation = 1.0f;
LIGHTS[i].linear_attenuation = 0.0f;
LIGHTS[i].quadratic_attenuation = 0.0f;
}
_glPrecalcLightingValues(~0);
recalcEnabledLights();
}
void _glEnableLight(GLubyte light, GLboolean value) {
LIGHTS[light].isEnabled = value;
recalcEnabledLights();
}
GL_FORCE_INLINE void _glPrecalcLightingValues(GLuint mask) {
/* Pre-calculate lighting values */
GLshort i;
Material* material = _glActiveMaterial();
if(mask & AMBIENT_MASK) {
for(i = 0; i < MAX_GLDC_LIGHTS; ++i) {
LightSource* light = _glLightAt(i);
light->ambientMaterial[0] = light->ambient[0] * material->ambient[0];
light->ambientMaterial[1] = light->ambient[1] * material->ambient[1];
light->ambientMaterial[2] = light->ambient[2] * material->ambient[2];
light->ambientMaterial[3] = light->ambient[3] * material->ambient[3];
LIGHTS[i].ambientMaterial[0] = LIGHTS[i].ambient[0] * MATERIAL.ambient[0];
LIGHTS[i].ambientMaterial[1] = LIGHTS[i].ambient[1] * MATERIAL.ambient[1];
LIGHTS[i].ambientMaterial[2] = LIGHTS[i].ambient[2] * MATERIAL.ambient[2];
LIGHTS[i].ambientMaterial[3] = LIGHTS[i].ambient[3] * MATERIAL.ambient[3];
}
}
if(mask & DIFFUSE_MASK) {
for(i = 0; i < MAX_GLDC_LIGHTS; ++i) {
LightSource* light = _glLightAt(i);
light->diffuseMaterial[0] = light->diffuse[0] * material->diffuse[0];
light->diffuseMaterial[1] = light->diffuse[1] * material->diffuse[1];
light->diffuseMaterial[2] = light->diffuse[2] * material->diffuse[2];
light->diffuseMaterial[3] = light->diffuse[3] * material->diffuse[3];
LIGHTS[i].diffuseMaterial[0] = LIGHTS[i].diffuse[0] * MATERIAL.diffuse[0];
LIGHTS[i].diffuseMaterial[1] = LIGHTS[i].diffuse[1] * MATERIAL.diffuse[1];
LIGHTS[i].diffuseMaterial[2] = LIGHTS[i].diffuse[2] * MATERIAL.diffuse[2];
LIGHTS[i].diffuseMaterial[3] = LIGHTS[i].diffuse[3] * MATERIAL.diffuse[3];
}
}
if(mask & SPECULAR_MASK) {
for(i = 0; i < MAX_GLDC_LIGHTS; ++i) {
LightSource* light = _glLightAt(i);
light->specularMaterial[0] = light->specular[0] * material->specular[0];
light->specularMaterial[1] = light->specular[1] * material->specular[1];
light->specularMaterial[2] = light->specular[2] * material->specular[2];
light->specularMaterial[3] = light->specular[3] * material->specular[3];
LIGHTS[i].specularMaterial[0] = LIGHTS[i].specular[0] * MATERIAL.specular[0];
LIGHTS[i].specularMaterial[1] = LIGHTS[i].specular[1] * MATERIAL.specular[1];
LIGHTS[i].specularMaterial[2] = LIGHTS[i].specular[2] * MATERIAL.specular[2];
LIGHTS[i].specularMaterial[3] = LIGHTS[i].specular[3] * MATERIAL.specular[3];
}
}
/* If ambient or emission are updated, we need to update
* the base colour. */
if((mask & AMBIENT_MASK) || (mask & EMISSION_MASK) || (mask & SCENE_AMBIENT_MASK)) {
GLfloat* scene_ambient = _glLightModelSceneAmbient();
material->baseColour[0] = MATH_fmac(scene_ambient[0], material->ambient[0], material->emissive[0]);
material->baseColour[1] = MATH_fmac(scene_ambient[1], material->ambient[1], material->emissive[1]);
material->baseColour[2] = MATH_fmac(scene_ambient[2], material->ambient[2], material->emissive[2]);
material->baseColour[3] = MATH_fmac(scene_ambient[3], material->ambient[3], material->emissive[3]);
MATERIAL.baseColour[0] = MATH_fmac(SCENE_AMBIENT[0], MATERIAL.ambient[0], MATERIAL.emissive[0]);
MATERIAL.baseColour[1] = MATH_fmac(SCENE_AMBIENT[1], MATERIAL.ambient[1], MATERIAL.emissive[1]);
MATERIAL.baseColour[2] = MATH_fmac(SCENE_AMBIENT[2], MATERIAL.ambient[2], MATERIAL.emissive[2]);
MATERIAL.baseColour[3] = MATH_fmac(SCENE_AMBIENT[3], MATERIAL.ambient[3], MATERIAL.emissive[3]);
}
}
void _glInitLights() {
Material* material = _glActiveMaterial();
static GLfloat ONE [] = {1.0f, 1.0f, 1.0f, 1.0f};
static GLfloat ZERO [] = {0.0f, 0.0f, 0.0f, 1.0f};
static GLfloat PARTIAL [] = {0.2f, 0.2f, 0.2f, 1.0f};
static GLfloat MOSTLY [] = {0.8f, 0.8f, 0.8f, 1.0f};
memcpy(material->ambient, PARTIAL, sizeof(GLfloat) * 4);
memcpy(material->diffuse, MOSTLY, sizeof(GLfloat) * 4);
memcpy(material->specular, ZERO, sizeof(GLfloat) * 4);
memcpy(material->emissive, ZERO, sizeof(GLfloat) * 4);
material->exponent = 0.0f;
GLubyte i;
for(i = 0; i < MAX_GLDC_LIGHTS; ++i) {
LightSource* light = _glLightAt(i);
memcpy(light->ambient, ZERO, sizeof(GLfloat) * 4);
memcpy(light->diffuse, ONE, sizeof(GLfloat) * 4);
memcpy(light->specular, ONE, sizeof(GLfloat) * 4);
if(i > 0) {
memcpy(light->diffuse, ZERO, sizeof(GLfloat) * 4);
memcpy(light->specular, ZERO, sizeof(GLfloat) * 4);
}
light->position[0] = light->position[1] = light->position[3] = 0.0f;
light->position[2] = 1.0f;
light->isDirectional = GL_TRUE;
light->isEnabled = GL_FALSE;
light->spot_direction[0] = light->spot_direction[1] = 0.0f;
light->spot_direction[2] = -1.0f;
light->spot_exponent = 0.0f;
light->spot_cutoff = 180.0f;
light->constant_attenuation = 1.0f;
light->linear_attenuation = 0.0f;
light->quadratic_attenuation = 0.0f;
}
_glPrecalcLightingValues(~0);
_glRecalcEnabledLights();
}
void APIENTRY glLightModelf(GLenum pname, const GLfloat param) {
glLightModelfv(pname, &param);
}
@ -124,13 +144,11 @@ void APIENTRY glLightModeli(GLenum pname, const GLint param) {
void APIENTRY glLightModelfv(GLenum pname, const GLfloat *params) {
switch(pname) {
case GL_LIGHT_MODEL_AMBIENT: {
if(memcmp(_glGetLightModelSceneAmbient(), params, sizeof(float) * 4) != 0) {
_glSetLightModelSceneAmbient(params);
_glPrecalcLightingValues(SCENE_AMBIENT_MASK);
}
memcpy(SCENE_AMBIENT, params, sizeof(GLfloat) * 4);
_glPrecalcLightingValues(SCENE_AMBIENT_MASK);
} break;
case GL_LIGHT_MODEL_LOCAL_VIEWER:
_glSetLightModelViewerInEyeCoordinates((*params) ? GL_TRUE : GL_FALSE);
VIEWER_IN_EYE_COORDINATES = (*params) ? GL_TRUE : GL_FALSE;
break;
case GL_LIGHT_MODEL_TWO_SIDE:
/* Not implemented */
@ -142,10 +160,10 @@ void APIENTRY glLightModelfv(GLenum pname, const GLfloat *params) {
void APIENTRY glLightModeliv(GLenum pname, const GLint* params) {
switch(pname) {
case GL_LIGHT_MODEL_COLOR_CONTROL:
_glSetLightModelColorControl(*params);
COLOR_CONTROL = *params;
break;
case GL_LIGHT_MODEL_LOCAL_VIEWER:
_glSetLightModelViewerInEyeCoordinates((*params) ? GL_TRUE : GL_FALSE);
VIEWER_IN_EYE_COORDINATES = (*params) ? GL_TRUE : GL_FALSE;
break;
default:
_glKosThrowError(GL_INVALID_ENUM, __func__);
@ -156,7 +174,6 @@ void APIENTRY glLightfv(GLenum light, GLenum pname, const GLfloat *params) {
GLubyte idx = light & 0xF;
if(idx >= MAX_GLDC_LIGHTS) {
_glKosThrowError(GL_INVALID_VALUE, __func__);
return;
}
@ -164,46 +181,33 @@ void APIENTRY glLightfv(GLenum light, GLenum pname, const GLfloat *params) {
(pname == GL_DIFFUSE) ? DIFFUSE_MASK :
(pname == GL_SPECULAR) ? SPECULAR_MASK : 0;
LightSource* l = _glLightAt(idx);
GLboolean rebuild = GL_FALSE;
switch(pname) {
case GL_AMBIENT:
rebuild = memcmp(l->ambient, params, sizeof(GLfloat) * 4) != 0;
if(rebuild) {
memcpy(l->ambient, params, sizeof(GLfloat) * 4);
}
memcpy(LIGHTS[idx].ambient, params, sizeof(GLfloat) * 4);
break;
case GL_DIFFUSE:
rebuild = memcmp(l->diffuse, params, sizeof(GLfloat) * 4) != 0;
if(rebuild) {
memcpy(l->diffuse, params, sizeof(GLfloat) * 4);
}
memcpy(LIGHTS[idx].diffuse, params, sizeof(GLfloat) * 4);
break;
case GL_SPECULAR:
rebuild = memcmp(l->specular, params, sizeof(GLfloat) * 4) != 0;
if(rebuild) {
memcpy(l->specular, params, sizeof(GLfloat) * 4);
}
memcpy(LIGHTS[idx].specular, params, sizeof(GLfloat) * 4);
break;
case GL_POSITION: {
memcpy(l->position, params, sizeof(GLfloat) * 4);
_glMatrixLoadModelView();
memcpy(LIGHTS[idx].position, params, sizeof(GLfloat) * 4);
l->isDirectional = params[3] == 0.0f;
LIGHTS[idx].isDirectional = params[3] == 0.0f;
if(l->isDirectional) {
if(LIGHTS[idx].isDirectional) {
//FIXME: Do we need to rotate directional lights?
} else {
_glMatrixLoadModelView();
TransformVec3(l->position);
TransformVec3(LIGHTS[idx].position);
}
}
break;
case GL_SPOT_DIRECTION: {
l->spot_direction[0] = params[0];
l->spot_direction[1] = params[1];
l->spot_direction[2] = params[2];
LIGHTS[idx].spot_direction[0] = params[0];
LIGHTS[idx].spot_direction[1] = params[1];
LIGHTS[idx].spot_direction[2] = params[2];
} break;
case GL_CONSTANT_ATTENUATION:
case GL_LINEAR_ATTENUATION:
@ -217,36 +221,31 @@ void APIENTRY glLightfv(GLenum light, GLenum pname, const GLfloat *params) {
return;
}
if(rebuild) {
_glPrecalcLightingValues(mask);
}
_glPrecalcLightingValues(mask);
}
void APIENTRY glLightf(GLenum light, GLenum pname, GLfloat param) {
GLubyte idx = light & 0xF;
if(idx >= MAX_GLDC_LIGHTS) {
_glKosThrowError(GL_INVALID_VALUE, __func__);
return;
}
LightSource* l = _glLightAt(idx);
switch(pname) {
case GL_CONSTANT_ATTENUATION:
l->constant_attenuation = param;
LIGHTS[idx].constant_attenuation = param;
break;
case GL_LINEAR_ATTENUATION:
l->linear_attenuation = param;
LIGHTS[idx].linear_attenuation = param;
break;
case GL_QUADRATIC_ATTENUATION:
l->quadratic_attenuation = param;
LIGHTS[idx].quadratic_attenuation = param;
break;
case GL_SPOT_EXPONENT:
l->spot_exponent = param;
LIGHTS[idx].spot_exponent = param;
break;
case GL_SPOT_CUTOFF:
l->spot_cutoff = param;
LIGHTS[idx].spot_cutoff = param;
break;
default:
_glKosThrowError(GL_INVALID_ENUM, __func__);
@ -259,7 +258,7 @@ void APIENTRY glMaterialf(GLenum face, GLenum pname, const GLfloat param) {
return;
}
_glActiveMaterial()->exponent = _MIN(param, 128); /* 128 is the max according to the GL spec */
MATERIAL.exponent = _MIN(param, 128); /* 128 is the max according to the GL spec */
}
void APIENTRY glMateriali(GLenum face, GLenum pname, const GLint param) {
@ -272,49 +271,25 @@ void APIENTRY glMaterialfv(GLenum face, GLenum pname, const GLfloat *params) {
return;
}
Material* material = _glActiveMaterial();
GLboolean rebuild = GL_FALSE;
switch(pname) {
case GL_SHININESS:
glMaterialf(face, pname, *params);
rebuild = GL_TRUE;
break;
case GL_AMBIENT: {
if(memcmp(material->ambient, params, sizeof(float) * 4) != 0) {
vec4cpy(material->ambient, params);
rebuild = GL_TRUE;
}
} break;
case GL_AMBIENT:
vec4cpy(MATERIAL.ambient, params);
break;
case GL_DIFFUSE:
if(memcmp(material->diffuse, params, sizeof(float) * 4) != 0) {
vec4cpy(material->diffuse, params);
rebuild = GL_TRUE;
}
vec4cpy(MATERIAL.diffuse, params);
break;
case GL_SPECULAR:
if(memcmp(material->specular, params, sizeof(float) * 4) != 0) {
vec4cpy(material->specular, params);
rebuild = GL_TRUE;
}
vec4cpy(MATERIAL.specular, params);
break;
case GL_EMISSION:
if(memcmp(material->emissive, params, sizeof(float) * 4) != 0) {
vec4cpy(material->emissive, params);
rebuild = GL_TRUE;
}
vec4cpy(MATERIAL.emissive, params);
break;
case GL_AMBIENT_AND_DIFFUSE: {
rebuild = (
memcmp(material->ambient, params, sizeof(float) * 4) != 0 ||
memcmp(material->diffuse, params, sizeof(float) * 4) != 0
);
if(rebuild) {
vec4cpy(material->ambient, params);
vec4cpy(material->diffuse, params);
}
vec4cpy(MATERIAL.ambient, params);
vec4cpy(MATERIAL.diffuse, params);
} break;
case GL_COLOR_INDEXES:
default: {
@ -323,15 +298,13 @@ void APIENTRY glMaterialfv(GLenum face, GLenum pname, const GLfloat *params) {
}
}
if(rebuild) {
GLuint updateMask = (pname == GL_AMBIENT) ? AMBIENT_MASK:
(pname == GL_DIFFUSE) ? DIFFUSE_MASK:
(pname == GL_SPECULAR) ? SPECULAR_MASK:
(pname == GL_EMISSION) ? EMISSION_MASK:
(pname == GL_AMBIENT_AND_DIFFUSE) ? AMBIENT_MASK | DIFFUSE_MASK : 0;
GLuint updateMask = (pname == GL_AMBIENT) ? AMBIENT_MASK:
(pname == GL_DIFFUSE) ? DIFFUSE_MASK:
(pname == GL_SPECULAR) ? SPECULAR_MASK:
(pname == GL_EMISSION) ? EMISSION_MASK:
(pname == GL_AMBIENT_AND_DIFFUSE) ? AMBIENT_MASK | DIFFUSE_MASK : 0;
_glPrecalcLightingValues(updateMask);
}
_glPrecalcLightingValues(updateMask);
}
void APIENTRY glColorMaterial(GLenum face, GLenum mode) {
@ -346,13 +319,12 @@ void APIENTRY glColorMaterial(GLenum face, GLenum mode) {
return;
}
GLenum mask = (mode == GL_AMBIENT) ? AMBIENT_MASK:
COLOR_MATERIAL_MASK = (mode == GL_AMBIENT) ? AMBIENT_MASK:
(mode == GL_DIFFUSE) ? DIFFUSE_MASK:
(mode == GL_AMBIENT_AND_DIFFUSE) ? AMBIENT_MASK | DIFFUSE_MASK:
(mode == GL_EMISSION) ? EMISSION_MASK : SPECULAR_MASK;
_glSetColorMaterialMask(mask);
_glSetColorMaterialMode(mode);
COLOR_MATERIAL_MODE = mode;
}
GL_FORCE_INLINE void bgra_to_float(const uint8_t* input, GLfloat* output) {
@ -365,68 +337,44 @@ GL_FORCE_INLINE void bgra_to_float(const uint8_t* input, GLfloat* output) {
}
void _glUpdateColourMaterialA(const GLubyte* argb) {
Material* material = _glActiveMaterial();
float colour[4];
bgra_to_float(argb, colour);
vec4cpy(material->ambient, colour);
GLenum mask = _glColorMaterialMode();
_glPrecalcLightingValues(mask);
vec4cpy(MATERIAL.ambient, colour);
_glPrecalcLightingValues(COLOR_MATERIAL_MASK);
}
void _glUpdateColourMaterialD(const GLubyte* argb) {
Material* material = _glActiveMaterial();
float colour[4];
bgra_to_float(argb, colour);
vec4cpy(material->diffuse, colour);
GLenum mask = _glColorMaterialMode();
_glPrecalcLightingValues(mask);
vec4cpy(MATERIAL.diffuse, colour);
_glPrecalcLightingValues(COLOR_MATERIAL_MASK);
}
void _glUpdateColourMaterialE(const GLubyte* argb) {
Material* material = _glActiveMaterial();
float colour[4];
bgra_to_float(argb, colour);
vec4cpy(material->emissive, colour);
GLenum mask = _glColorMaterialMode();
_glPrecalcLightingValues(mask);
vec4cpy(MATERIAL.emissive, colour);
_glPrecalcLightingValues(COLOR_MATERIAL_MASK);
}
void _glUpdateColourMaterialAD(const GLubyte* argb) {
Material* material = _glActiveMaterial();
float colour[4];
bgra_to_float(argb, colour);
vec4cpy(material->ambient, colour);
vec4cpy(material->diffuse, colour);
GLenum mask = _glColorMaterialMode();
_glPrecalcLightingValues(mask);
vec4cpy(MATERIAL.ambient, colour);
vec4cpy(MATERIAL.diffuse, colour);
_glPrecalcLightingValues(COLOR_MATERIAL_MASK);
}
GL_FORCE_INLINE GLboolean isDiffuseColorMaterial() {
GLenum mode = _glColorMaterialMode();
return (
mode == GL_DIFFUSE ||
mode == GL_AMBIENT_AND_DIFFUSE
);
return (COLOR_MATERIAL_MODE == GL_DIFFUSE || COLOR_MATERIAL_MODE == GL_AMBIENT_AND_DIFFUSE);
}
GL_FORCE_INLINE GLboolean isAmbientColorMaterial() {
GLenum mode = _glColorMaterialMode();
return (
mode == GL_AMBIENT ||
mode == GL_AMBIENT_AND_DIFFUSE
);
return (COLOR_MATERIAL_MODE == GL_AMBIENT || COLOR_MATERIAL_MODE == GL_AMBIENT_AND_DIFFUSE);
}
GL_FORCE_INLINE GLboolean isSpecularColorMaterial() {
GLenum mode = _glColorMaterialMode();
return (mode == GL_SPECULAR);
return (COLOR_MATERIAL_MODE == GL_SPECULAR);
}
/*
@ -445,7 +393,7 @@ GL_FORCE_INLINE float faster_pow2(const float p) {
}
GL_FORCE_INLINE float faster_log2(const float x) {
gl_assert(x >= 0.0f);
assert(x >= 0.0f);
const union { float f; uint32_t i; } vx = { x };
const float y = (float) (vx.i) * 1.1920928955078125e-7f;
@ -461,15 +409,12 @@ GL_FORCE_INLINE void _glLightVertexDirectional(
float* final, uint8_t lid,
float LdotN, float NdotH) {
Material* material = _glActiveMaterial();
LightSource* light = _glLightAt(lid);
float FI = (material->exponent) ?
faster_pow((LdotN != 0.0f) * NdotH, material->exponent) : 1.0f;
float FI = (MATERIAL.exponent) ?
faster_pow((LdotN != 0.0f) * NdotH, MATERIAL.exponent) : 1.0f;
#define _PROCESS_COMPONENT(X) \
final[X] += (LdotN * light->diffuseMaterial[X] + light->ambientMaterial[X]) \
+ (FI * light->specularMaterial[X]); \
final[X] += (LdotN * LIGHTS[lid].diffuseMaterial[X] + LIGHTS[lid].ambientMaterial[X]) \
+ (FI * LIGHTS[lid].specularMaterial[X]); \
_PROCESS_COMPONENT(0);
_PROCESS_COMPONENT(1);
@ -482,15 +427,12 @@ GL_FORCE_INLINE void _glLightVertexPoint(
float* final, uint8_t lid,
float LdotN, float NdotH, float att) {
Material* material = _glActiveMaterial();
LightSource* light = _glLightAt(lid);
float FI = (material->exponent) ?
faster_pow((LdotN != 0.0f) * NdotH, material->exponent) : 1.0f;
float FI = (MATERIAL.exponent) ?
faster_pow((LdotN != 0.0f) * NdotH, MATERIAL.exponent) : 1.0f;
#define _PROCESS_COMPONENT(X) \
final[X] += ((LdotN * light->diffuseMaterial[X] + light->ambientMaterial[X]) \
+ (FI * light->specularMaterial[X])) * att; \
final[X] += ((LdotN * LIGHTS[lid].diffuseMaterial[X] + LIGHTS[lid].ambientMaterial[X]) \
+ (FI * LIGHTS[lid].specularMaterial[X])) * att; \
_PROCESS_COMPONENT(0);
_PROCESS_COMPONENT(1);
@ -503,8 +445,6 @@ void _glPerformLighting(Vertex* vertices, EyeSpaceData* es, const uint32_t count
GLubyte i;
GLuint j;
Material* material = _glActiveMaterial();
Vertex* vertex = vertices;
EyeSpaceData* data = es;
@ -512,8 +452,7 @@ void _glPerformLighting(Vertex* vertices, EyeSpaceData* es, const uint32_t count
void (*updateColourMaterial)(const GLubyte*) = NULL;
if(_glIsColorMaterialEnabled()) {
GLenum mode = _glColorMaterialMode();
switch(mode) {
switch(COLOR_MATERIAL_MODE) {
case GL_AMBIENT:
updateColourMaterial = _glUpdateColourMaterialA;
break;
@ -536,10 +475,10 @@ void _glPerformLighting(Vertex* vertices, EyeSpaceData* es, const uint32_t count
}
/* Copy the base colour across */
vec4cpy(data->finalColour, material->baseColour);
vec4cpy(data->finalColour, MATERIAL.baseColour);
}
if(!_glEnabledLightCount()) {
if(!ENABLED_LIGHT_COUNT) {
return;
}
@ -557,17 +496,15 @@ void _glPerformLighting(Vertex* vertices, EyeSpaceData* es, const uint32_t count
const float Nz = data->n[2];
for(i = 0; i < MAX_GLDC_LIGHTS; ++i) {
LightSource* light = _glLightAt(i);
if(!light->isEnabled) {
if(!LIGHTS[i].isEnabled) {
continue;
}
float Lx = light->position[0] - vertex->xyz[0];
float Ly = light->position[1] - vertex->xyz[1];
float Lz = light->position[2] - vertex->xyz[2];
float Lx = LIGHTS[i].position[0] - vertex->xyz[0];
float Ly = LIGHTS[i].position[1] - vertex->xyz[1];
float Lz = LIGHTS[i].position[2] - vertex->xyz[2];
if(light->isDirectional) {
if(LIGHTS[i].isDirectional) {
float Hx = (Lx + 0);
float Hy = (Ly + 0);
float Hz = (Lz + 1);
@ -596,9 +533,9 @@ void _glPerformLighting(Vertex* vertices, EyeSpaceData* es, const uint32_t count
VEC3_LENGTH(Lx, Ly, Lz, D);
float att = (
light->constant_attenuation + (
light->linear_attenuation * D
) + (light->quadratic_attenuation * D * D)
LIGHTS[i].constant_attenuation + (
LIGHTS[i].linear_attenuation * D
) + (LIGHTS[i].quadratic_attenuation * D * D)
);
/* Anything over the attenuation threshold will

View File

@ -13,8 +13,8 @@
GLfloat DEPTH_RANGE_MULTIPLIER_L = (1 - 0) / 2;
GLfloat DEPTH_RANGE_MULTIPLIER_H = (0 + 1) / 2;
static Stack __attribute__((aligned(32))) MATRIX_STACKS[4]; // modelview, projection, texture
static Matrix4x4 __attribute__((aligned(32))) NORMAL_MATRIX;
static Stack MATRIX_STACKS[3]; // modelview, projection, texture
static Matrix4x4 NORMAL_MATRIX __attribute__((aligned(32)));
Viewport VIEWPORT = {
0, 0, 640, 480, 320.0f, 240.0f, 320.0f, 240.0f
@ -23,7 +23,7 @@ Viewport VIEWPORT = {
static GLenum MATRIX_MODE = GL_MODELVIEW;
static GLubyte MATRIX_IDX = 0;
static const Matrix4x4 __attribute__((aligned(32))) IDENTITY = {
static const Matrix4x4 IDENTITY = {
1.0f, 0.0f, 0.0f, 0.0f,
0.0f, 1.0f, 0.0f, 0.0f,
0.0f, 0.0f, 1.0f, 0.0f,
@ -106,11 +106,7 @@ void APIENTRY glMatrixMode(GLenum mode) {
}
void APIENTRY glPushMatrix() {
void* top = stack_top(MATRIX_STACKS + MATRIX_IDX);
assert(top);
void* ret = stack_push(MATRIX_STACKS + MATRIX_IDX, top);
(void) ret;
assert(ret);
stack_push(MATRIX_STACKS + MATRIX_IDX, stack_top(MATRIX_STACKS + MATRIX_IDX));
}
void APIENTRY glPopMatrix() {
@ -131,16 +127,10 @@ void APIENTRY glTranslatef(GLfloat x, GLfloat y, GLfloat z) {
0.0f, 0.0f, 1.0f, 0.0f,
x, y, z, 1.0f
};
void* top = stack_top(MATRIX_STACKS + MATRIX_IDX);
assert(top);
UploadMatrix4x4(top);
UploadMatrix4x4(stack_top(MATRIX_STACKS + MATRIX_IDX));
MultiplyMatrix4x4(&trn);
top = stack_top(MATRIX_STACKS + MATRIX_IDX);
assert(top);
DownloadMatrix4x4(top);
DownloadMatrix4x4(stack_top(MATRIX_STACKS + MATRIX_IDX));
if(MATRIX_MODE == GL_MODELVIEW) {
recalculateNormalMatrix();
@ -210,9 +200,28 @@ void APIENTRY glRotatef(GLfloat angle, GLfloat x, GLfloat y, GLfloat z) {
/* Load an arbitrary matrix */
void APIENTRY glLoadMatrixf(const GLfloat *m) {
static Matrix4x4 __attribute__((aligned(32))) TEMP;
static Matrix4x4 TEMP;
TEMP[M0] = m[0];
TEMP[M1] = m[1];
TEMP[M2] = m[2];
TEMP[M3] = m[3];
TEMP[M4] = m[4];
TEMP[M5] = m[5];
TEMP[M6] = m[6];
TEMP[M7] = m[7];
TEMP[M8] = m[8];
TEMP[M9] = m[9];
TEMP[M10] = m[10];
TEMP[M11] = m[11];
TEMP[M12] = m[12];
TEMP[M13] = m[13];
TEMP[M14] = m[14];
TEMP[M15] = m[15];
memcpy(TEMP, m, sizeof(float) * 16);
stack_replace(MATRIX_STACKS + MATRIX_IDX, TEMP);
if(MATRIX_MODE == GL_MODELVIEW) {
@ -280,10 +289,18 @@ void APIENTRY glFrustum(GLfloat left, GLfloat right,
/* Multiply the current matrix by an arbitrary matrix */
void glMultMatrixf(const GLfloat *m) {
Matrix4x4 TEMP __attribute__((aligned(32)));
MEMCPY4(TEMP, m, sizeof(Matrix4x4));
const Matrix4x4 *pMatrix;
if (((GLint)m)&0xf){ /* Unaligned matrix */
pMatrix = &TEMP;
MEMCPY4(TEMP, m, sizeof(Matrix4x4));
}
else{
pMatrix = (const Matrix4x4*) m;
}
UploadMatrix4x4(stack_top(MATRIX_STACKS + MATRIX_IDX));
MultiplyMatrix4x4(&TEMP);
MultiplyMatrix4x4(pMatrix);
DownloadMatrix4x4(stack_top(MATRIX_STACKS + MATRIX_IDX));
if(MATRIX_MODE == GL_MODELVIEW) {
@ -409,7 +426,7 @@ GL_FORCE_INLINE void vec3f_normalize_sh4(float *v){
void gluLookAt(GLfloat eyex, GLfloat eyey, GLfloat eyez, GLfloat centerx,
GLfloat centery, GLfloat centerz, GLfloat upx, GLfloat upy,
GLfloat upz) {
GLfloat m [16] __attribute__((aligned(32)));
GLfloat m [16];
GLfloat f [3];
GLfloat u [3];
GLfloat s [3];

View File

@ -3,9 +3,7 @@
#include <stdint.h>
#include <stdlib.h>
#include <stdbool.h>
#include "gl_assert.h"
#include "types.h"
#include <assert.h>
#define MEMSET(dst, v, size) memset((dst), (v), (size))
@ -261,7 +259,7 @@ typedef float Matrix4x4[16];
void SceneBegin();
void SceneListBegin(GPUList list);
void SceneListSubmit(Vertex* v2, int n);
void SceneListSubmit(void* src, int n);
void SceneListFinish();
void SceneFinish();

View File

@ -4,14 +4,16 @@
#define CLIP_DEBUG 0
#define TA_SQ_ADDR (unsigned int *)(void *) \
(0xe0000000 | (((unsigned long)0x10000000) & 0x03ffffe0))
#define QACRTA ((((unsigned int)0x10000000)>>26)<<2)&0x1c
#define PVR_VERTEX_BUF_SIZE 2560 * 256
#define likely(x) __builtin_expect(!!(x), 1)
#define unlikely(x) __builtin_expect(!!(x), 0)
#define SQ_BASE_ADDRESS (void*) 0xe0000000
GL_FORCE_INLINE bool glIsVertex(const float flags) {
return flags == GPU_CMD_VERTEX_EOL || flags == GPU_CMD_VERTEX;
}
@ -20,6 +22,7 @@ GL_FORCE_INLINE bool glIsLastVertex(const float flags) {
return flags == GPU_CMD_VERTEX_EOL;
}
void InitGPU(_Bool autosort, _Bool fsaa) {
pvr_init_params_t params = {
/* Enable opaque and translucent polygons with size 32 and 32 */
@ -31,427 +34,412 @@ void InitGPU(_Bool autosort, _Bool fsaa) {
};
pvr_init(&params);
/* If we're PAL and we're NOT VGA, then use 50hz by default. This is the safest
thing to do. If someone wants to force 60hz then they can call vid_set_mode later and hopefully
that'll work... */
int cable = vid_check_cable();
int region = flashrom_get_region();
if(region == FLASHROM_REGION_EUROPE && cable != CT_VGA) {
printf("PAL region without VGA - enabling 50hz");
vid_set_mode(DM_640x480_PAL_IL, PM_RGB565);
}
}
void SceneBegin() {
pvr_wait_ready();
pvr_scene_begin();
QACR0 = QACRTA;
QACR1 = QACRTA;
}
void SceneListBegin(GPUList list) {
pvr_list_begin(list);
}
GL_FORCE_INLINE float _glFastInvert(float x) {
return (1.f / __builtin_sqrtf(x * x));
}
GL_FORCE_INLINE void _glPerspectiveDivideVertex(Vertex* vertex, const float h) {
TRACE();
const float f = _glFastInvert(vertex->w);
const float f = MATH_Fast_Invert(vertex->w);
/* Convert to NDC and apply viewport */
vertex->xyz[0] = (vertex->xyz[0] * f * 320) + 320;
vertex->xyz[1] = (vertex->xyz[1] * f * -240) + 240;
vertex->xyz[0] = __builtin_fmaf(
VIEWPORT.hwidth, vertex->xyz[0] * f, VIEWPORT.x_plus_hwidth
);
vertex->xyz[1] = h - __builtin_fmaf(
VIEWPORT.hheight, vertex->xyz[1] * f, VIEWPORT.y_plus_hheight
);
/* Orthographic projections need to use invZ otherwise we lose
the depth information. As w == 1, and clip-space range is -w to +w
we add 1.0 to the Z to bring it into range. We add a little extra to
avoid a divide by zero.
*/
if(vertex->w == 1.0f) {
vertex->xyz[2] = _glFastInvert(1.0001f + vertex->xyz[2]);
if(unlikely(vertex->w == 1.0f)) {
vertex->xyz[2] = MATH_Fast_Invert(1.0001f + vertex->xyz[2]);
} else {
vertex->xyz[2] = f;
}
}
static uint32_t *d; // SQ target
volatile uint32_t *sq = SQ_BASE_ADDRESS;
GL_FORCE_INLINE void _glSubmitHeaderOrVertex(const Vertex* v) {
#ifndef NDEBUG
assert(!isnan(v->xyz[2]));
assert(!isnan(v->w));
#endif
static inline void _glFlushBuffer() {
TRACE();
#if CLIP_DEBUG
printf("Submitting: %x (%x)\n", v, v->flags);
#endif
/* Wait for both store queues to complete */
sq = (uint32_t*) 0xe0000000;
sq[0] = sq[8] = 0;
uint32_t *s = (uint32_t*) v;
__asm__("pref @%0" : : "r"(s + 8)); /* prefetch 32 bytes for next loop */
d[0] = *(s++);
d[1] = *(s++);
d[2] = *(s++);
d[3] = *(s++);
d[4] = *(s++);
d[5] = *(s++);
d[6] = *(s++);
d[7] = *(s++);
__asm__("pref @%0" : : "r"(d));
d += 8;
}
static inline void _glPushHeaderOrVertex(Vertex* v) {
TRACE();
static struct {
Vertex* v;
int visible;
} triangle[3];
uint32_t* s = (uint32_t*) v;
sq[0] = *(s++);
sq[1] = *(s++);
sq[2] = *(s++);
sq[3] = *(s++);
sq[4] = *(s++);
sq[5] = *(s++);
sq[6] = *(s++);
sq[7] = *(s++);
__asm__("pref @%0" : : "r"(sq));
sq += 8;
static int tri_count = 0;
static int strip_count = 0;
GL_FORCE_INLINE void interpolateColour(const uint8_t* v1, const uint8_t* v2, const float t, uint8_t* out) {
const int MASK1 = 0x00FF00FF;
const int MASK2 = 0xFF00FF00;
const int f2 = 256 * t;
const int f1 = 256 - f2;
const uint32_t a = *(uint32_t*) v1;
const uint32_t b = *(uint32_t*) v2;
*((uint32_t*) out) = (((((a & MASK1) * f1) + ((b & MASK1) * f2)) >> 8) & MASK1) |
(((((a & MASK2) * f1) + ((b & MASK2) * f2)) >> 8) & MASK2);
}
static inline void _glClipEdge(const Vertex* const v1, const Vertex* const v2, Vertex* vout) {
const static float o = 0.003921569f; // 1 / 255
GL_FORCE_INLINE void _glClipEdge(const Vertex* v1, const Vertex* v2, Vertex* vout) {
/* Clipping time! */
const float d0 = v1->w + v1->xyz[2];
const float d1 = v2->w + v2->xyz[2];
const float t = (fabs(d0) * (1.0f / sqrtf((d1 - d0) * (d1 - d0)))) + 0.000001f;
const float invt = 1.0f - t;
vout->xyz[0] = invt * v1->xyz[0] + t * v2->xyz[0];
vout->xyz[1] = invt * v1->xyz[1] + t * v2->xyz[1];
vout->xyz[2] = invt * v1->xyz[2] + t * v2->xyz[2];
const float epsilon = (d0 < d1) ? -0.00001f : 0.00001f;
vout->uv[0] = invt * v1->uv[0] + t * v2->uv[0];
vout->uv[1] = invt * v1->uv[1] + t * v2->uv[1];
float t = MATH_Fast_Divide(d0, (d0 - d1)) + epsilon;
vout->w = invt * v1->w + t * v2->w;
t = (t > 1.0f) ? 1.0f : t;
t = (t < 0.0f) ? 0.0f : t;
const float m = 255 * t;
const float n = 255 - m;
vout->xyz[0] = __builtin_fmaf(v2->xyz[0] - v1->xyz[0], t, v1->xyz[0]);
vout->xyz[1] = __builtin_fmaf(v2->xyz[1] - v1->xyz[1], t, v1->xyz[1]);
vout->xyz[2] = __builtin_fmaf(v2->xyz[2] - v1->xyz[2], t, v1->xyz[2]);
vout->w = __builtin_fmaf(v2->w - v1->w, t, v1->w);
vout->bgra[0] = (v1->bgra[0] * n + v2->bgra[0] * m) * o;
vout->bgra[1] = (v1->bgra[1] * n + v2->bgra[1] * m) * o;
vout->bgra[2] = (v1->bgra[2] * n + v2->bgra[2] * m) * o;
vout->bgra[3] = (v1->bgra[3] * n + v2->bgra[3] * m) * o;
vout->uv[0] = __builtin_fmaf(v2->uv[0] - v1->uv[0], t, v1->uv[0]);
vout->uv[1] = __builtin_fmaf(v2->uv[1] - v1->uv[1], t, v1->uv[1]);
interpolateColour(v1->bgra, v2->bgra, t, vout->bgra);
}
#define SPAN_SORT_CFG 0x005F8030
static volatile uint32_t* PVR_LMMODE0 = (uint32_t*) 0xA05F6884;
static volatile uint32_t *PVR_LMMODE1 = (uint32_t*) 0xA05F6888;
static volatile uint32_t *QACR = (uint32_t*) 0xFF000038;
GL_FORCE_INLINE void ClearTriangle() {
tri_count = 0;
}
void SceneListSubmit(Vertex* v2, int n) {
TRACE();
/* You need at least a header, and 3 vertices to render anything */
if(n < 4) {
GL_FORCE_INLINE void ShiftTriangle() {
if(!tri_count) {
return;
}
tri_count--;
triangle[0] = triangle[1];
triangle[1] = triangle[2];
#ifndef NDEBUG
triangle[2].v = NULL;
triangle[2].visible = false;
#endif
}
GL_FORCE_INLINE void ShiftRotateTriangle() {
if(!tri_count) {
return;
}
if(triangle[0].v < triangle[1].v) {
triangle[0] = triangle[2];
} else {
triangle[1] = triangle[2];
}
tri_count--;
}
void SceneListSubmit(void* src, int n) {
/* Do everything, everywhere, all at once */
/* Prep store queues */
d = (uint32_t*) TA_SQ_ADDR;
/* Perform perspective divide on each vertex */
Vertex* vertex = (Vertex*) src;
const float h = GetVideoMode()->height;
PVR_SET(SPAN_SORT_CFG, 0x0);
if(!ZNEAR_CLIPPING_ENABLED) {
for(int i = 0; i < n; ++i, ++vertex) {
PREFETCH(vertex + 1);
if(glIsVertex(vertex->flags)) {
_glPerspectiveDivideVertex(vertex, h);
}
_glSubmitHeaderOrVertex(vertex);
}
//Set PVR DMA registers
*PVR_LMMODE0 = 0;
*PVR_LMMODE1 = 0;
/* Wait for both store queues to complete */
d = (uint32_t *)0xe0000000;
d[0] = d[8] = 0;
//Set QACR registers
QACR[1] = QACR[0] = 0x11;
return;
}
tri_count = 0;
strip_count = 0;
#if CLIP_DEBUG
Vertex* vertex = (Vertex*) src;
for(int i = 0; i < n; ++i) {
fprintf(stderr, "{%f, %f, %f, %f}, // %x (%x)\n", vertex[i].xyz[0], vertex[i].xyz[1], vertex[i].xyz[2], vertex[i].w, vertex[i].flags, &vertex[i]);
}
fprintf(stderr, "----\n");
printf("----\n");
#endif
uint8_t visible_mask = 0;
uint8_t counter = 0;
sq = SQ_BASE_ADDRESS;
for(int i = 0; i < n; ++i, ++vertex) {
PREFETCH(vertex + 1);
for(int i = 0; i < n; ++i, ++v2) {
PREFETCH(v2 + 1);
switch(v2->flags) {
case GPU_CMD_VERTEX_EOL:
if(counter < 2) {
bool is_last_in_strip = glIsLastVertex(vertex->flags);
/* Wait until we fill the triangle */
if(tri_count < 3) {
if(likely(glIsVertex(vertex->flags))) {
triangle[tri_count].v = vertex;
triangle[tri_count].visible = vertex->xyz[2] >= -vertex->w;
tri_count++;
strip_count++;
} else {
/* We hit a header */
tri_count = 0;
strip_count = 0;
_glSubmitHeaderOrVertex(vertex);
}
if(tri_count < 3) {
continue;
}
counter = 0;
break;
case GPU_CMD_VERTEX:
++counter;
if(counter < 3) {
continue;
}
break;
default:
_glPushHeaderOrVertex(v2);
counter = 0;
continue;
};
Vertex* const v0 = v2 - 2;
Vertex* const v1 = v2 - 1;
visible_mask = (
(v0->xyz[2] > -v0->w) << 0 |
(v1->xyz[2] > -v1->w) << 1 |
(v2->xyz[2] > -v2->w) << 2 |
(counter == 0) << 3
);
switch(visible_mask) {
case 15: /* All visible, but final vertex in strip */
{
_glPerspectiveDivideVertex(v0, h);
_glPushHeaderOrVertex(v0);
_glPerspectiveDivideVertex(v1, h);
_glPushHeaderOrVertex(v1);
_glPerspectiveDivideVertex(v2, h);
_glPushHeaderOrVertex(v2);
}
break;
case 7:
/* All visible, push the first vertex and move on */
_glPerspectiveDivideVertex(v0, h);
_glPushHeaderOrVertex(v0);
break;
case 9:
/* First vertex was visible, last in strip */
{
Vertex __attribute__((aligned(32))) scratch[2];
Vertex* a = &scratch[0];
Vertex* b = &scratch[1];
_glClipEdge(v0, v1, a);
a->flags = GPU_CMD_VERTEX;
#if CLIP_DEBUG
printf("SC: %d\n", strip_count);
#endif
_glClipEdge(v2, v0, b);
b->flags = GPU_CMD_VERTEX_EOL;
/* If we got here, then triangle contains 3 vertices */
int visible_mask = triangle[0].visible | (triangle[1].visible << 1) | (triangle[2].visible << 2);
if(visible_mask == 7) {
#if CLIP_DEBUG
printf("Visible\n");
#endif
/* All the vertices are visible! We divide and submit v0, then shift */
_glPerspectiveDivideVertex(vertex - 2, h);
_glSubmitHeaderOrVertex(vertex - 2);
_glPerspectiveDivideVertex(v0, h);
_glPushHeaderOrVertex(v0);
_glPerspectiveDivideVertex(a, h);
_glPushHeaderOrVertex(a);
_glPerspectiveDivideVertex(b, h);
_glPushHeaderOrVertex(b);
if(is_last_in_strip) {
_glPerspectiveDivideVertex(vertex - 1, h);
_glSubmitHeaderOrVertex(vertex - 1);
_glPerspectiveDivideVertex(vertex, h);
_glSubmitHeaderOrVertex(vertex);
tri_count = 0;
strip_count = 0;
}
break;
case 1:
/* First vertex was visible, but not last in strip */
{
Vertex __attribute__((aligned(32))) scratch[2];
Vertex* a = &scratch[0];
Vertex* b = &scratch[1];
_glClipEdge(v0, v1, a);
a->flags = GPU_CMD_VERTEX;
ShiftRotateTriangle();
_glClipEdge(v2, v0, b);
b->flags = GPU_CMD_VERTEX;
} else if(visible_mask) {
/* Clipping time!
_glPerspectiveDivideVertex(v0, h);
_glPushHeaderOrVertex(v0);
There are 6 distinct possibilities when clipping a triangle. 3 of them result
in another triangle, 3 of them result in a quadrilateral.
_glPerspectiveDivideVertex(a, h);
_glPushHeaderOrVertex(a);
Assuming you iterate the edges of the triangle in order, and create a new *visible*
vertex when you cross the plane, and discard vertices behind the plane, then the only
difference between the two cases is that the final two vertices that need submitting have
to be reversed.
_glPerspectiveDivideVertex(b, h);
_glPushHeaderOrVertex(b);
_glPushHeaderOrVertex(b);
Unfortunately we have to copy vertices here, because if we persp-divide a vertex it may
be used in a subsequent triangle in the strip and would end up being double divided.
*/
#if CLIP_DEBUG
printf("Clip: %d, SC: %d\n", visible_mask, strip_count);
printf("%d, %d, %d\n", triangle[0].v - (Vertex*) src - 1, triangle[1].v - (Vertex*) src - 1, triangle[2].v - (Vertex*) src - 1);
#endif
Vertex tmp;
if(strip_count > 3) {
#if CLIP_DEBUG
printf("Flush\n");
#endif
tmp = *(vertex - 2);
/* If we had triangles ahead of this one, submit and finalize */
_glPerspectiveDivideVertex(&tmp, h);
_glSubmitHeaderOrVertex(&tmp);
tmp = *(vertex - 1);
tmp.flags = GPU_CMD_VERTEX_EOL;
_glPerspectiveDivideVertex(&tmp, h);
_glSubmitHeaderOrVertex(&tmp);
}
break;
case 10:
case 2:
/* Second vertex was visible. In self case we need to create a triangle and produce
two new vertices: 1-2, and 2-3. */
{
Vertex __attribute__((aligned(32))) scratch[3];
Vertex* a = &scratch[0];
Vertex* b = &scratch[1];
Vertex* c = &scratch[2];
memcpy_vertex(c, v1);
switch(visible_mask) {
case 1: {
/* 0, 0a, 2a */
tmp = *triangle[0].v;
tmp.flags = GPU_CMD_VERTEX;
_glPerspectiveDivideVertex(&tmp, h);
_glSubmitHeaderOrVertex(&tmp);
_glClipEdge(v0, v1, a);
a->flags = GPU_CMD_VERTEX;
_glClipEdge(triangle[0].v, triangle[1].v, &tmp);
tmp.flags = GPU_CMD_VERTEX;
_glPerspectiveDivideVertex(&tmp, h);
_glSubmitHeaderOrVertex(&tmp);
_glClipEdge(v1, v2, b);
b->flags = v2->flags;
_glClipEdge(triangle[2].v, triangle[0].v, &tmp);
tmp.flags = GPU_CMD_VERTEX_EOL;
_glPerspectiveDivideVertex(&tmp, h);
_glSubmitHeaderOrVertex(&tmp);
} break;
case 2: {
/* 0a, 1, 1a */
_glClipEdge(triangle[0].v, triangle[1].v, &tmp);
tmp.flags = GPU_CMD_VERTEX;
_glPerspectiveDivideVertex(&tmp, h);
_glSubmitHeaderOrVertex(&tmp);
_glPerspectiveDivideVertex(a, h);
_glPushHeaderOrVertex(a);
tmp = *triangle[1].v;
tmp.flags = GPU_CMD_VERTEX;
_glPerspectiveDivideVertex(&tmp, h);
_glSubmitHeaderOrVertex(&tmp);
_glPerspectiveDivideVertex(c, h);
_glPushHeaderOrVertex(c);
_glClipEdge(triangle[1].v, triangle[2].v, &tmp);
tmp.flags = GPU_CMD_VERTEX_EOL;
_glPerspectiveDivideVertex(&tmp, h);
_glSubmitHeaderOrVertex(&tmp);
} break;
case 3: {
/* 0, 1, 2a, 1a */
tmp = *triangle[0].v;
tmp.flags = GPU_CMD_VERTEX;
_glPerspectiveDivideVertex(&tmp, h);
_glSubmitHeaderOrVertex(&tmp);
_glPerspectiveDivideVertex(b, h);
_glPushHeaderOrVertex(b);
tmp = *triangle[1].v;
tmp.flags = GPU_CMD_VERTEX;
_glPerspectiveDivideVertex(&tmp, h);
_glSubmitHeaderOrVertex(&tmp);
_glClipEdge(triangle[2].v, triangle[0].v, &tmp);
tmp.flags = GPU_CMD_VERTEX;
_glPerspectiveDivideVertex(&tmp, h);
_glSubmitHeaderOrVertex(&tmp);
_glClipEdge(triangle[1].v, triangle[2].v, &tmp);
tmp.flags = GPU_CMD_VERTEX_EOL;
_glPerspectiveDivideVertex(&tmp, h);
_glSubmitHeaderOrVertex(&tmp);
} break;
case 4: {
/* 1a, 2, 2a */
_glClipEdge(triangle[1].v, triangle[2].v, &tmp);
tmp.flags = GPU_CMD_VERTEX;
_glPerspectiveDivideVertex(&tmp, h);
_glSubmitHeaderOrVertex(&tmp);
tmp = *triangle[2].v;
tmp.flags = GPU_CMD_VERTEX;
_glPerspectiveDivideVertex(&tmp, h);
_glSubmitHeaderOrVertex(&tmp);
_glClipEdge(triangle[2].v, triangle[0].v, &tmp);
tmp.flags = GPU_CMD_VERTEX_EOL;
_glPerspectiveDivideVertex(&tmp, h);
_glSubmitHeaderOrVertex(&tmp);
} break;
case 5: {
/* 0, 0a, 2, 1a */
tmp = *triangle[0].v;
tmp.flags = GPU_CMD_VERTEX;
_glPerspectiveDivideVertex(&tmp, h);
_glSubmitHeaderOrVertex(&tmp);
_glClipEdge(triangle[0].v, triangle[1].v, &tmp);
tmp.flags = GPU_CMD_VERTEX;
_glPerspectiveDivideVertex(&tmp, h);
_glSubmitHeaderOrVertex(&tmp);
tmp = *triangle[2].v;
tmp.flags = GPU_CMD_VERTEX;
_glPerspectiveDivideVertex(&tmp, h);
_glSubmitHeaderOrVertex(&tmp);
_glClipEdge(triangle[1].v, triangle[2].v, &tmp);
tmp.flags = GPU_CMD_VERTEX_EOL;
_glPerspectiveDivideVertex(&tmp, h);
_glSubmitHeaderOrVertex(&tmp);
} break;
case 6: {
/* 0a, 1, 2a, 2 */
_glClipEdge(triangle[0].v, triangle[1].v, &tmp);
tmp.flags = GPU_CMD_VERTEX;
_glPerspectiveDivideVertex(&tmp, h);
_glSubmitHeaderOrVertex(&tmp);
tmp = *triangle[1].v;
tmp.flags = GPU_CMD_VERTEX;
_glPerspectiveDivideVertex(&tmp, h);
_glSubmitHeaderOrVertex(&tmp);
_glClipEdge(triangle[2].v, triangle[0].v, &tmp);
tmp.flags = GPU_CMD_VERTEX;
_glPerspectiveDivideVertex(&tmp, h);
_glSubmitHeaderOrVertex(&tmp);
tmp = *triangle[2].v;
tmp.flags = GPU_CMD_VERTEX_EOL;
_glPerspectiveDivideVertex(&tmp, h);
_glSubmitHeaderOrVertex(&tmp);
} break;
default:
break;
}
break;
case 11:
case 3: /* First and second vertex were visible */
{
Vertex __attribute__((aligned(32))) scratch[3];
Vertex* a = &scratch[0];
Vertex* b = &scratch[1];
Vertex* c = &scratch[2];
memcpy_vertex(c, v1);
_glClipEdge(v2, v0, b);
b->flags = GPU_CMD_VERTEX;
_glPerspectiveDivideVertex(v0, h);
_glPushHeaderOrVertex(v0);
_glClipEdge(v1, v2, a);
a->flags = v2->flags;
_glPerspectiveDivideVertex(c, h);
_glPushHeaderOrVertex(c);
_glPerspectiveDivideVertex(b, h);
_glPushHeaderOrVertex(b);
_glPerspectiveDivideVertex(a, h);
_glPushHeaderOrVertex(c);
_glPushHeaderOrVertex(a);
}
break;
case 12:
case 4:
/* Third vertex was visible. */
{
Vertex __attribute__((aligned(32))) scratch[3];
Vertex* a = &scratch[0];
Vertex* b = &scratch[1];
Vertex* c = &scratch[2];
memcpy_vertex(c, v2);
_glClipEdge(v2, v0, a);
a->flags = GPU_CMD_VERTEX;
_glClipEdge(v1, v2, b);
b->flags = GPU_CMD_VERTEX;
_glPerspectiveDivideVertex(a, h);
_glPushHeaderOrVertex(a);
if(counter % 2 == 1) {
_glPushHeaderOrVertex(a);
}
_glPerspectiveDivideVertex(b, h);
_glPushHeaderOrVertex(b);
_glPerspectiveDivideVertex(c, h);
_glPushHeaderOrVertex(c);
/* If this was the last in the strip, we don't need to
submit anything else, we just wipe the tri_count */
if(is_last_in_strip) {
tri_count = 0;
strip_count = 0;
} else {
ShiftRotateTriangle();
strip_count = 2;
}
break;
case 13:
{
Vertex __attribute__((aligned(32))) scratch[3];
Vertex* a = &scratch[0];
Vertex* b = &scratch[1];
Vertex* c = &scratch[2];
} else {
/* Invisible? Move to the next in the strip */
memcpy_vertex(c, v2);
c->flags = GPU_CMD_VERTEX;
_glClipEdge(v0, v1, a);
a->flags = GPU_CMD_VERTEX;
_glClipEdge(v1, v2, b);
b->flags = GPU_CMD_VERTEX;
_glPerspectiveDivideVertex(v0, h);
_glPushHeaderOrVertex(v0);
_glPerspectiveDivideVertex(a, h);
_glPushHeaderOrVertex(a);
_glPerspectiveDivideVertex(c, h);
_glPushHeaderOrVertex(c);
_glPerspectiveDivideVertex(b, h);
_glPushHeaderOrVertex(b);
c->flags = GPU_CMD_VERTEX_EOL;
_glPushHeaderOrVertex(c);
}
break;
case 5: /* First and third vertex were visible */
{
Vertex __attribute__((aligned(32))) scratch[3];
Vertex* a = &scratch[0];
Vertex* b = &scratch[1];
Vertex* c = &scratch[2];
memcpy_vertex(c, v2);
c->flags = GPU_CMD_VERTEX;
_glClipEdge(v0, v1, a);
a->flags = GPU_CMD_VERTEX;
_glClipEdge(v1, v2, b);
b->flags = GPU_CMD_VERTEX;
_glPerspectiveDivideVertex(v0, h);
_glPushHeaderOrVertex(v0);
_glPerspectiveDivideVertex(a, h);
_glPushHeaderOrVertex(a);
_glPerspectiveDivideVertex(c, h);
_glPushHeaderOrVertex(c);
_glPerspectiveDivideVertex(b, h);
_glPushHeaderOrVertex(b);
_glPushHeaderOrVertex(c);
}
break;
case 14:
case 6: /* Second and third vertex were visible */
{
Vertex __attribute__((aligned(32))) scratch[4];
Vertex* a = &scratch[0];
Vertex* b = &scratch[1];
Vertex* c = &scratch[2];
Vertex* d = &scratch[3];
memcpy_vertex(c, v1);
memcpy_vertex(d, v2);
_glClipEdge(v0, v1, a);
a->flags = GPU_CMD_VERTEX;
_glClipEdge(v2, v0, b);
b->flags = GPU_CMD_VERTEX;
_glPerspectiveDivideVertex(a, h);
_glPushHeaderOrVertex(a);
_glPerspectiveDivideVertex(c, h);
_glPushHeaderOrVertex(c);
_glPerspectiveDivideVertex(b, h);
_glPushHeaderOrVertex(b);
_glPushHeaderOrVertex(c);
_glPerspectiveDivideVertex(d, h);
_glPushHeaderOrVertex(d);
}
break;
case 8:
default:
break;
if(is_last_in_strip) {
tri_count = 0;
strip_count = 0;
}
strip_count = 2;
ShiftRotateTriangle();
}
}
_glFlushBuffer();
/* Wait for both store queues to complete */
d = (uint32_t *)0xe0000000;
d[0] = d[8] = 0;
}
void SceneListFinish() {

View File

@ -24,50 +24,21 @@
#define GL_FORCE_INLINE static GL_INLINE_DEBUG
#endif
#define PREFETCH(addr) __builtin_prefetch((addr))
GL_FORCE_INLINE void* memcpy_fast(void *dest, const void *src, size_t len) {
if(!len) {
return dest;
}
const uint8_t *s = (uint8_t *)src;
uint8_t *d = (uint8_t *)dest;
uint32_t diff = (uint32_t)d - (uint32_t)(s + 1); // extra offset because input gets incremented before output is calculated
// Underflow would be like adding a negative offset
// Can use 'd' as a scratch reg now
asm volatile (
"clrs\n" // Align for parallelism (CO) - SH4a use "stc SR, Rn" instead with a dummy Rn
".align 2\n"
"0:\n\t"
"dt %[size]\n\t" // (--len) ? 0 -> T : 1 -> T (EX 1)
"mov.b @%[in]+, %[scratch]\n\t" // scratch = *(s++) (LS 1/2)
"bf.s 0b\n\t" // while(s != nexts) aka while(!T) (BR 1/2)
" mov.b %[scratch], @(%[offset], %[in])\n" // *(datatype_of_s*) ((char*)s + diff) = scratch, where src + diff = dest (LS 1)
: [in] "+&r" ((uint32_t)s), [scratch] "=&r" ((uint32_t)d), [size] "+&r" (len) // outputs
: [offset] "z" (diff) // inputs
: "t", "memory" // clobbers
);
return dest;
}
#define PREFETCH(addr) __asm__("pref @%0" : : "r"((addr)))
/* We use sq_cpy if the src and size is properly aligned. We control that the
* destination is properly aligned so we assert that. */
#define FASTCPY(dst, src, bytes) \
do { \
if(bytes % 32 == 0 && ((uintptr_t) src % 4) == 0) { \
gl_assert(((uintptr_t) dst) % 32 == 0); \
assert(((uintptr_t) dst) % 32 == 0); \
sq_cpy(dst, src, bytes); \
} else { \
memcpy_fast(dst, src, bytes); \
memcpy(dst, src, bytes); \
} \
} while(0)
#define MEMCPY4(dst, src, bytes) memcpy_fast(dst, src, bytes)
#define MEMCPY4(dst, src, bytes) memcpy4(dst, src, bytes)
#define MEMSET4(dst, v, size) memset4((dst), (v), (size))

View File

@ -10,9 +10,8 @@
#include "software/parameter_equation.h"
#define CLIP_DEBUG 0
#define ZNEAR_CLIPPING_ENABLED 1
static size_t AVAILABLE_VRAM = 8 * 1024 * 1024;
static size_t AVAILABLE_VRAM = 16 * 1024 * 1024;
static Matrix4x4 MATRIX;
static SDL_Window* WINDOW = NULL;
@ -30,13 +29,83 @@ static VideoMode vid_mode = {
#define MIN(x, y) ((x) < (y) ? (x) : (y))
#define MAX(x, y) ((x) > (y) ? (x) : (y))
AlignedVector vbuffer;
static void DrawTriangle(Vertex* v0, Vertex* v1, Vertex* v2) {
// Compute triangle bounding box.
int minX = MIN(MIN(v0->xyz[0], v1->xyz[0]), v2->xyz[0]);
int maxX = MAX(MAX(v0->xyz[0], v1->xyz[0]), v2->xyz[0]);
int minY = MIN(MIN(v0->xyz[1], v1->xyz[1]), v2->xyz[1]);
int maxY = MAX(MAX(v0->xyz[1], v1->xyz[1]), v2->xyz[1]);
// Clip to scissor rect.
minX = MAX(minX, 0);
maxX = MIN(maxX, vid_mode.width);
minY = MAX(minY, 0);
maxY = MIN(maxY, vid_mode.height);
// Compute edge equations.
EdgeEquation e0, e1, e2;
EdgeEquationInit(&e0, &v0->xyz[0], &v1->xyz[0]);
EdgeEquationInit(&e1, &v1->xyz[0], &v2->xyz[0]);
EdgeEquationInit(&e2, &v2->xyz[0], &v0->xyz[0]);
float area = 0.5 * (e0.c + e1.c + e2.c);
/* This is very ugly. I don't understand the math properly
* so I just swap the vertex order if something is back-facing
* and we want to render it. Patches welcome! */
#define REVERSE_WINDING() \
Vertex* tv = v0; \
v0 = v1; \
v1 = tv; \
EdgeEquationInit(&e0, &v0->xyz[0], &v1->xyz[0]); \
EdgeEquationInit(&e1, &v1->xyz[0], &v2->xyz[0]); \
EdgeEquationInit(&e2, &v2->xyz[0], &v0->xyz[0]); \
area = 0.5f * (e0.c + e1.c + e2.c) \
// Check if triangle is backfacing.
if(CULL_MODE == GPU_CULLING_CCW) {
if(area < 0) {
return;
}
} else if(CULL_MODE == GPU_CULLING_CW) {
if(area < 0) {
// We only draw front-facing polygons, so swap
// the back to front and draw
REVERSE_WINDING();
} else {
// Front facing, so bail
return;
}
} else if(area < 0) {
/* We're not culling, but this is backfacing, so swap vertices and edges */
REVERSE_WINDING();
}
ParameterEquation r, g, b;
ParameterEquationInit(&r, v0->bgra[2], v1->bgra[2], v2->bgra[2], &e0, &e1, &e2, area);
ParameterEquationInit(&g, v0->bgra[1], v1->bgra[1], v2->bgra[1], &e0, &e1, &e2, area);
ParameterEquationInit(&b, v0->bgra[0], v1->bgra[0], v2->bgra[0], &e0, &e1, &e2, area);
// Add 0.5 to sample at pixel centers.
for (float x = minX + 0.5f, xm = maxX + 0.5f; x <= xm; x += 1.0f)
for (float y = minY + 0.5f, ym = maxY + 0.5f; y <= ym; y += 1.0f)
{
if (EdgeEquationTestPoint(&e0, x, y) && EdgeEquationTestPoint(&e1, x, y) && EdgeEquationTestPoint(&e2, x, y)) {
int rint = ParameterEquationEvaluate(&r, x, y);
int gint = ParameterEquationEvaluate(&g, x, y);
int bint = ParameterEquationEvaluate(&b, x, y);
SDL_SetRenderDrawColor(RENDERER, rint, gint, bint, 255);
SDL_RenderDrawPoint(RENDERER, x, y);
}
}
}
void InitGPU(_Bool autosort, _Bool fsaa) {
// 32-bit SDL has trouble with the wayland driver for some reason
setenv("SDL_VIDEODRIVER", "x11", 1);
SDL_Init(SDL_INIT_VIDEO | SDL_INIT_EVENTS);
WINDOW = SDL_CreateWindow(
@ -50,8 +119,6 @@ void InitGPU(_Bool autosort, _Bool fsaa) {
RENDERER = SDL_CreateRenderer(
WINDOW, -1, SDL_RENDERER_ACCELERATED
);
aligned_vector_init(&vbuffer, sizeof(SDL_Vertex));
}
void SceneBegin() {
@ -94,11 +161,11 @@ GL_FORCE_INLINE void _glPerspectiveDivideVertex(Vertex* vertex, const float h) {
}
}
GL_FORCE_INLINE void _glPushHeaderOrVertex(const Vertex* v) {
GL_FORCE_INLINE void _glSubmitHeaderOrVertex(const Vertex* v) {
#ifndef NDEBUG
if(glIsVertex(v->flags)) {
gl_assert(!isnan(v->xyz[2]));
gl_assert(!isnan(v->w));
assert(!isnan(v->xyz[2]));
assert(!isnan(v->w));
}
#endif
@ -109,329 +176,335 @@ GL_FORCE_INLINE void _glPushHeaderOrVertex(const Vertex* v) {
BUFFER[vertex_counter++] = *v;
}
static inline void _glFlushBuffer() {}
static struct {
Vertex* v;
int visible;
} triangle[3];
static int tri_count = 0;
static int strip_count = 0;
GL_FORCE_INLINE void _glClipEdge(const Vertex* v1, const Vertex* v2, Vertex* vout) {
const static float o = 0.003921569f; // 1 / 255
const float d0 = v1->w + v1->xyz[2];
const float d1 = v2->w + v2->xyz[2];
const float t = (fabs(d0) * (1.0f / sqrtf((d1 - d0) * (d1 - d0)))) + 0.000001f;
const float invt = 1.0f - t;
GL_FORCE_INLINE void interpolateColour(const uint8_t* v1, const uint8_t* v2, const float t, uint8_t* out) {
const int MASK1 = 0x00FF00FF;
const int MASK2 = 0xFF00FF00;
vout->xyz[0] = invt * v1->xyz[0] + t * v2->xyz[0];
vout->xyz[1] = invt * v1->xyz[1] + t * v2->xyz[1];
vout->xyz[2] = invt * v1->xyz[2] + t * v2->xyz[2];
const int f2 = 256 * t;
const int f1 = 256 - f2;
vout->uv[0] = invt * v1->uv[0] + t * v2->uv[0];
vout->uv[1] = invt * v1->uv[1] + t * v2->uv[1];
const uint32_t a = *(uint32_t*) v1;
const uint32_t b = *(uint32_t*) v2;
vout->w = invt * v1->w + t * v2->w;
const float m = 255 * t;
const float n = 255 - m;
vout->bgra[0] = (v1->bgra[0] * n + v2->bgra[0] * m) * o;
vout->bgra[1] = (v1->bgra[1] * n + v2->bgra[1] * m) * o;
vout->bgra[2] = (v1->bgra[2] * n + v2->bgra[2] * m) * o;
vout->bgra[3] = (v1->bgra[3] * n + v2->bgra[3] * m) * o;
*((uint32_t*) out) = (((((a & MASK1) * f1) + ((b & MASK1) * f2)) >> 8) & MASK1) |
(((((a & MASK2) * f1) + ((b & MASK2) * f2)) >> 8) & MASK2);
}
void SceneListSubmit(Vertex* v2, int n) {
/* You need at least a header, and 3 vertices to render anything */
if(n < 4) {
GL_FORCE_INLINE void _glClipEdge(const Vertex* v1, const Vertex* v2, Vertex* vout) {
/* Clipping time! */
const float d0 = v1->w + v1->xyz[2];
const float d1 = v2->w + v2->xyz[2];
const float epsilon = (d0 < d1) ? -0.00001f : 0.00001f;
float t = (d0 / (d0 - d1)) + epsilon;
t = (t > 1.0f) ? 1.0f : t;
t = (t < 0.0f) ? 0.0f : t;
vout->xyz[0] = __builtin_fmaf(v2->xyz[0] - v1->xyz[0], t, v1->xyz[0]);
vout->xyz[1] = __builtin_fmaf(v2->xyz[1] - v1->xyz[1], t, v1->xyz[1]);
vout->xyz[2] = __builtin_fmaf(v2->xyz[2] - v1->xyz[2], t, v1->xyz[2]);
vout->w = __builtin_fmaf(v2->w - v1->w, t, v1->w);
vout->uv[0] = __builtin_fmaf(v2->uv[0] - v1->uv[0], t, v1->uv[0]);
vout->uv[1] = __builtin_fmaf(v2->uv[1] - v1->uv[1], t, v1->uv[1]);
interpolateColour(v1->bgra, v2->bgra, t, vout->bgra);
}
GL_FORCE_INLINE void ClearTriangle() {
tri_count = 0;
}
GL_FORCE_INLINE void ShiftTriangle() {
if(!tri_count) {
return;
}
const float h = GetVideoMode()->height;
tri_count--;
triangle[0] = triangle[1];
triangle[1] = triangle[2];
uint8_t visible_mask = 0;
uint8_t counter = 0;
#ifndef NDEBUG
triangle[2].v = NULL;
triangle[2].visible = false;
#endif
}
for(int i = 0; i < n; ++i, ++v2) {
PREFETCH(v2 + 1);
switch(v2->flags) {
case GPU_CMD_VERTEX_EOL:
if(counter < 2) {
continue;
}
counter = 0;
break;
case GPU_CMD_VERTEX:
++counter;
if(counter < 3) {
continue;
}
break;
default:
_glPushHeaderOrVertex(v2);
counter = 0;
continue;
};
Vertex* const v0 = v2 - 2;
Vertex* const v1 = v2 - 1;
visible_mask = (
(v0->xyz[2] > -v0->w) << 0 |
(v1->xyz[2] > -v1->w) << 1 |
(v2->xyz[2] > -v2->w) << 2 |
(counter == 0) << 3
);
switch(visible_mask) {
case 15: /* All visible, but final vertex in strip */
{
_glPerspectiveDivideVertex(v0, h);
_glPushHeaderOrVertex(v0);
_glPerspectiveDivideVertex(v1, h);
_glPushHeaderOrVertex(v1);
_glPerspectiveDivideVertex(v2, h);
_glPushHeaderOrVertex(v2);
}
break;
case 7:
/* All visible, push the first vertex and move on */
_glPerspectiveDivideVertex(v0, h);
_glPushHeaderOrVertex(v0);
break;
case 9:
/* First vertex was visible, last in strip */
{
Vertex __attribute__((aligned(32))) scratch[2];
Vertex* a = &scratch[0];
Vertex* b = &scratch[1];
_glClipEdge(v0, v1, a);
a->flags = GPU_CMD_VERTEX;
_glClipEdge(v2, v0, b);
b->flags = GPU_CMD_VERTEX_EOL;
_glPerspectiveDivideVertex(v0, h);
_glPushHeaderOrVertex(v0);
_glPerspectiveDivideVertex(a, h);
_glPushHeaderOrVertex(a);
_glPerspectiveDivideVertex(b, h);
_glPushHeaderOrVertex(b);
}
break;
case 1:
/* First vertex was visible, but not last in strip */
{
Vertex __attribute__((aligned(32))) scratch[2];
Vertex* a = &scratch[0];
Vertex* b = &scratch[1];
_glClipEdge(v0, v1, a);
a->flags = GPU_CMD_VERTEX;
_glClipEdge(v2, v0, b);
b->flags = GPU_CMD_VERTEX;
_glPerspectiveDivideVertex(v0, h);
_glPushHeaderOrVertex(v0);
_glPerspectiveDivideVertex(a, h);
_glPushHeaderOrVertex(a);
_glPerspectiveDivideVertex(b, h);
_glPushHeaderOrVertex(b);
_glPushHeaderOrVertex(b);
}
break;
case 10:
case 2:
/* Second vertex was visible. In self case we need to create a triangle and produce
two new vertices: 1-2, and 2-3. */
{
Vertex __attribute__((aligned(32))) scratch[3];
Vertex* a = &scratch[0];
Vertex* b = &scratch[1];
Vertex* c = &scratch[2];
memcpy_vertex(c, v1);
_glClipEdge(v0, v1, a);
a->flags = GPU_CMD_VERTEX;
_glClipEdge(v1, v2, b);
b->flags = v2->flags;
_glPerspectiveDivideVertex(a, h);
_glPushHeaderOrVertex(a);
_glPerspectiveDivideVertex(c, h);
_glPushHeaderOrVertex(c);
_glPerspectiveDivideVertex(b, h);
_glPushHeaderOrVertex(b);
}
break;
case 11:
case 3: /* First and second vertex were visible */
{
Vertex __attribute__((aligned(32))) scratch[3];
Vertex* a = &scratch[0];
Vertex* b = &scratch[1];
Vertex* c = &scratch[2];
memcpy_vertex(c, v1);
_glClipEdge(v2, v0, b);
b->flags = GPU_CMD_VERTEX;
_glPerspectiveDivideVertex(v0, h);
_glPushHeaderOrVertex(v0);
_glClipEdge(v1, v2, a);
a->flags = v2->flags;
_glPerspectiveDivideVertex(c, h);
_glPushHeaderOrVertex(c);
_glPerspectiveDivideVertex(b, h);
_glPushHeaderOrVertex(b);
_glPerspectiveDivideVertex(a, h);
_glPushHeaderOrVertex(c);
_glPushHeaderOrVertex(a);
}
break;
case 12:
case 4:
/* Third vertex was visible. */
{
Vertex __attribute__((aligned(32))) scratch[3];
Vertex* a = &scratch[0];
Vertex* b = &scratch[1];
Vertex* c = &scratch[2];
memcpy_vertex(c, v2);
_glClipEdge(v2, v0, a);
a->flags = GPU_CMD_VERTEX;
_glClipEdge(v1, v2, b);
b->flags = GPU_CMD_VERTEX;
_glPerspectiveDivideVertex(a, h);
_glPushHeaderOrVertex(a);
if(counter % 2 == 1) {
_glPushHeaderOrVertex(a);
}
_glPerspectiveDivideVertex(b, h);
_glPushHeaderOrVertex(b);
_glPerspectiveDivideVertex(c, h);
_glPushHeaderOrVertex(c);
}
break;
case 13:
{
Vertex __attribute__((aligned(32))) scratch[3];
Vertex* a = &scratch[0];
Vertex* b = &scratch[1];
Vertex* c = &scratch[2];
memcpy_vertex(c, v2);
c->flags = GPU_CMD_VERTEX;
_glClipEdge(v0, v1, a);
a->flags = GPU_CMD_VERTEX;
_glClipEdge(v1, v2, b);
b->flags = GPU_CMD_VERTEX;
_glPerspectiveDivideVertex(v0, h);
_glPushHeaderOrVertex(v0);
_glPerspectiveDivideVertex(a, h);
_glPushHeaderOrVertex(a);
_glPerspectiveDivideVertex(c, h);
_glPushHeaderOrVertex(c);
_glPerspectiveDivideVertex(b, h);
_glPushHeaderOrVertex(b);
c->flags = GPU_CMD_VERTEX_EOL;
_glPushHeaderOrVertex(c);
}
break;
case 5: /* First and third vertex were visible */
{
Vertex __attribute__((aligned(32))) scratch[3];
Vertex* a = &scratch[0];
Vertex* b = &scratch[1];
Vertex* c = &scratch[2];
memcpy_vertex(c, v2);
c->flags = GPU_CMD_VERTEX;
_glClipEdge(v0, v1, a);
a->flags = GPU_CMD_VERTEX;
_glClipEdge(v1, v2, b);
b->flags = GPU_CMD_VERTEX;
_glPerspectiveDivideVertex(v0, h);
_glPushHeaderOrVertex(v0);
_glPerspectiveDivideVertex(a, h);
_glPushHeaderOrVertex(a);
_glPerspectiveDivideVertex(c, h);
_glPushHeaderOrVertex(c);
_glPerspectiveDivideVertex(b, h);
_glPushHeaderOrVertex(b);
_glPushHeaderOrVertex(c);
}
break;
case 14:
case 6: /* Second and third vertex were visible */
{
Vertex __attribute__((aligned(32))) scratch[4];
Vertex* a = &scratch[0];
Vertex* b = &scratch[1];
Vertex* c = &scratch[2];
Vertex* d = &scratch[3];
memcpy_vertex(c, v1);
memcpy_vertex(d, v2);
_glClipEdge(v0, v1, a);
a->flags = GPU_CMD_VERTEX;
_glClipEdge(v2, v0, b);
b->flags = GPU_CMD_VERTEX;
_glPerspectiveDivideVertex(a, h);
_glPushHeaderOrVertex(a);
_glPerspectiveDivideVertex(c, h);
_glPushHeaderOrVertex(c);
_glPerspectiveDivideVertex(b, h);
_glPushHeaderOrVertex(b);
_glPushHeaderOrVertex(c);
_glPerspectiveDivideVertex(d, h);
_glPushHeaderOrVertex(d);
}
break;
case 8:
default:
break;
}
GL_FORCE_INLINE void ShiftRotateTriangle() {
if(!tri_count) {
return;
}
_glFlushBuffer();
if(triangle[0].v < triangle[1].v) {
triangle[0] = triangle[2];
} else {
triangle[1] = triangle[2];
}
tri_count--;
}
void SceneListSubmit(void* src, int n) {
/* Perform perspective divide on each vertex */
Vertex* vertex = (Vertex*) src;
const float h = GetVideoMode()->height;
/* If Z-clipping is disabled, just fire everything over to the buffer */
if(!ZNEAR_CLIPPING_ENABLED) {
for(int i = 0; i < n; ++i, ++vertex) {
PREFETCH(vertex + 1);
if(glIsVertex(vertex->flags)) {
_glPerspectiveDivideVertex(vertex, h);
}
_glSubmitHeaderOrVertex(vertex);
}
return;
}
tri_count = 0;
strip_count = 0;
#if CLIP_DEBUG
printf("----\n");
#endif
for(int i = 0; i < n; ++i, ++vertex) {
PREFETCH(vertex + 1);
bool is_last_in_strip = glIsLastVertex(vertex->flags);
/* Wait until we fill the triangle */
if(tri_count < 3) {
if(glIsVertex(vertex->flags)) {
triangle[tri_count].v = vertex;
triangle[tri_count].visible = vertex->xyz[2] >= -vertex->w;
tri_count++;
strip_count++;
} else {
/* We hit a header */
tri_count = 0;
strip_count = 0;
_glSubmitHeaderOrVertex(vertex);
}
if(tri_count < 3) {
continue;
}
}
#if CLIP_DEBUG
printf("SC: %d\n", strip_count);
#endif
/* If we got here, then triangle contains 3 vertices */
int visible_mask = triangle[0].visible | (triangle[1].visible << 1) | (triangle[2].visible << 2);
if(visible_mask == 7) {
#if CLIP_DEBUG
printf("Visible\n");
#endif
/* All the vertices are visible! We divide and submit v0, then shift */
_glPerspectiveDivideVertex(vertex - 2, h);
_glSubmitHeaderOrVertex(vertex - 2);
if(is_last_in_strip) {
_glPerspectiveDivideVertex(vertex - 1, h);
_glSubmitHeaderOrVertex(vertex - 1);
_glPerspectiveDivideVertex(vertex, h);
_glSubmitHeaderOrVertex(vertex);
tri_count = 0;
strip_count = 0;
}
ShiftRotateTriangle();
} else if(visible_mask) {
/* Clipping time!
There are 6 distinct possibilities when clipping a triangle. 3 of them result
in another triangle, 3 of them result in a quadrilateral.
Assuming you iterate the edges of the triangle in order, and create a new *visible*
vertex when you cross the plane, and discard vertices behind the plane, then the only
difference between the two cases is that the final two vertices that need submitting have
to be reversed.
Unfortunately we have to copy vertices here, because if we persp-divide a vertex it may
be used in a subsequent triangle in the strip and would end up being double divided.
*/
#if CLIP_DEBUG
printf("Clip: %d, SC: %d\n", visible_mask, strip_count);
printf("%d, %d, %d\n", triangle[0].v - (Vertex*) src - 1, triangle[1].v - (Vertex*) src - 1, triangle[2].v - (Vertex*) src - 1);
#endif
Vertex tmp;
if(strip_count > 3) {
#if CLIP_DEBUG
printf("Flush\n");
#endif
tmp = *(vertex - 2);
/* If we had triangles ahead of this one, submit and finalize */
_glPerspectiveDivideVertex(&tmp, h);
_glSubmitHeaderOrVertex(&tmp);
tmp = *(vertex - 1);
tmp.flags = GPU_CMD_VERTEX_EOL;
_glPerspectiveDivideVertex(&tmp, h);
_glSubmitHeaderOrVertex(&tmp);
}
switch(visible_mask) {
case 1: {
/* 0, 0a, 2a */
tmp = *triangle[0].v;
tmp.flags = GPU_CMD_VERTEX;
_glPerspectiveDivideVertex(&tmp, h);
_glSubmitHeaderOrVertex(&tmp);
_glClipEdge(triangle[0].v, triangle[1].v, &tmp);
tmp.flags = GPU_CMD_VERTEX;
_glPerspectiveDivideVertex(&tmp, h);
_glSubmitHeaderOrVertex(&tmp);
_glClipEdge(triangle[2].v, triangle[0].v, &tmp);
tmp.flags = GPU_CMD_VERTEX_EOL;
_glPerspectiveDivideVertex(&tmp, h);
_glSubmitHeaderOrVertex(&tmp);
} break;
case 2: {
/* 0a, 1, 1a */
_glClipEdge(triangle[0].v, triangle[1].v, &tmp);
tmp.flags = GPU_CMD_VERTEX;
_glPerspectiveDivideVertex(&tmp, h);
_glSubmitHeaderOrVertex(&tmp);
tmp = *triangle[1].v;
tmp.flags = GPU_CMD_VERTEX;
_glPerspectiveDivideVertex(&tmp, h);
_glSubmitHeaderOrVertex(&tmp);
_glClipEdge(triangle[1].v, triangle[2].v, &tmp);
tmp.flags = GPU_CMD_VERTEX_EOL;
_glPerspectiveDivideVertex(&tmp, h);
_glSubmitHeaderOrVertex(&tmp);
} break;
case 3: {
/* 0, 1, 2a, 1a */
tmp = *triangle[0].v;
tmp.flags = GPU_CMD_VERTEX;
_glPerspectiveDivideVertex(&tmp, h);
_glSubmitHeaderOrVertex(&tmp);
tmp = *triangle[1].v;
tmp.flags = GPU_CMD_VERTEX;
_glPerspectiveDivideVertex(&tmp, h);
_glSubmitHeaderOrVertex(&tmp);
_glClipEdge(triangle[2].v, triangle[0].v, &tmp);
tmp.flags = GPU_CMD_VERTEX;
_glPerspectiveDivideVertex(&tmp, h);
_glSubmitHeaderOrVertex(&tmp);
_glClipEdge(triangle[1].v, triangle[2].v, &tmp);
tmp.flags = GPU_CMD_VERTEX_EOL;
_glPerspectiveDivideVertex(&tmp, h);
_glSubmitHeaderOrVertex(&tmp);
} break;
case 4: {
/* 1a, 2, 2a */
_glClipEdge(triangle[1].v, triangle[2].v, &tmp);
tmp.flags = GPU_CMD_VERTEX;
_glPerspectiveDivideVertex(&tmp, h);
_glSubmitHeaderOrVertex(&tmp);
tmp = *triangle[2].v;
tmp.flags = GPU_CMD_VERTEX;
_glPerspectiveDivideVertex(&tmp, h);
_glSubmitHeaderOrVertex(&tmp);
_glClipEdge(triangle[2].v, triangle[0].v, &tmp);
tmp.flags = GPU_CMD_VERTEX_EOL;
_glPerspectiveDivideVertex(&tmp, h);
_glSubmitHeaderOrVertex(&tmp);
} break;
case 5: {
/* 0, 0a, 2, 1a */
tmp = *triangle[0].v;
tmp.flags = GPU_CMD_VERTEX;
_glPerspectiveDivideVertex(&tmp, h);
_glSubmitHeaderOrVertex(&tmp);
_glClipEdge(triangle[0].v, triangle[1].v, &tmp);
tmp.flags = GPU_CMD_VERTEX;
_glPerspectiveDivideVertex(&tmp, h);
_glSubmitHeaderOrVertex(&tmp);
tmp = *triangle[2].v;
tmp.flags = GPU_CMD_VERTEX;
_glPerspectiveDivideVertex(&tmp, h);
_glSubmitHeaderOrVertex(&tmp);
_glClipEdge(triangle[1].v, triangle[2].v, &tmp);
tmp.flags = GPU_CMD_VERTEX_EOL;
_glPerspectiveDivideVertex(&tmp, h);
_glSubmitHeaderOrVertex(&tmp);
} break;
case 6: {
/* 0a, 1, 2a, 2 */
_glClipEdge(triangle[0].v, triangle[1].v, &tmp);
tmp.flags = GPU_CMD_VERTEX;
_glPerspectiveDivideVertex(&tmp, h);
_glSubmitHeaderOrVertex(&tmp);
tmp = *triangle[1].v;
tmp.flags = GPU_CMD_VERTEX;
_glPerspectiveDivideVertex(&tmp, h);
_glSubmitHeaderOrVertex(&tmp);
_glClipEdge(triangle[2].v, triangle[0].v, &tmp);
tmp.flags = GPU_CMD_VERTEX;
_glPerspectiveDivideVertex(&tmp, h);
_glSubmitHeaderOrVertex(&tmp);
tmp = *triangle[2].v;
tmp.flags = GPU_CMD_VERTEX_EOL;
_glPerspectiveDivideVertex(&tmp, h);
_glSubmitHeaderOrVertex(&tmp);
} break;
default:
break;
}
/* If this was the last in the strip, we don't need to
submit anything else, we just wipe the tri_count */
if(is_last_in_strip) {
tri_count = 0;
strip_count = 0;
} else {
ShiftRotateTriangle();
strip_count = 2;
}
} else {
/* Invisible? Move to the next in the strip */
if(is_last_in_strip) {
tri_count = 0;
strip_count = 0;
}
strip_count = 2;
ShiftRotateTriangle();
}
}
}
void SceneListFinish() {
@ -463,41 +536,18 @@ void SceneListFinish() {
Vertex* v0 = (Vertex*) (flags - step - step);
Vertex* v1 = (Vertex*) (flags - step);
Vertex* v2 = (Vertex*) (flags);
SDL_Vertex sv0 = {
{v0->xyz[0], v0->xyz[1]},
{v0->bgra[2], v0->bgra[1], v0->bgra[0], v0->bgra[3]},
{v0->uv[0], v0->uv[1]}
};
SDL_Vertex sv1 = {
{v1->xyz[0], v1->xyz[1]},
{v1->bgra[2], v1->bgra[1], v1->bgra[0], v1->bgra[3]},
{v1->uv[0], v1->uv[1]}
};
SDL_Vertex sv2 = {
{v2->xyz[0], v2->xyz[1]},
{v2->bgra[2], v2->bgra[1], v2->bgra[0], v2->bgra[3]},
{v2->uv[0], v2->uv[1]}
};
aligned_vector_push_back(&vbuffer, &sv0, 1);
aligned_vector_push_back(&vbuffer, &sv1, 1);
aligned_vector_push_back(&vbuffer, &sv2, 1);
(vidx % 2 == 0) ? DrawTriangle(v0, v1, v2) : DrawTriangle(v1, v0, v2);
}
if((*flags) == GPU_CMD_VERTEX_EOL) {
vidx = 0;
}
}
SDL_SetRenderDrawColor(RENDERER, 255, 255, 255, 255);
SDL_RenderGeometry(RENDERER, NULL, aligned_vector_front(&vbuffer), aligned_vector_size(&vbuffer), NULL, 0);
}
void SceneFinish() {
SDL_RenderPresent(RENDERER);
return;
/* Only sensible place to hook the quit signal */
SDL_Event e;
while (SDL_PollEvent(&e)) {

View File

@ -48,8 +48,7 @@ void TransformVec3NoMod(const float* v, float* ret);
/* Transform a 3-element normal using the stored matrix (w == 0)*/
static inline void TransformNormalNoMod(const float* xIn, float* xOut) {
(void) xIn;
(void) xOut;
}
void TransformVertices(Vertex* vertices, const int count);

View File

@ -4,7 +4,6 @@
#include <stdint.h>
#include <stdio.h>
#include "gl_assert.h"
#include "platform.h"
#include "types.h"
@ -164,10 +163,7 @@ typedef struct {
GLboolean isCompressed;
GLboolean isPaletted;
//50
GLenum internalFormat;
//54
GLubyte padding[10]; // Pad to 64-bytes
} __attribute__((aligned(32))) TextureObject;
} TextureObject;
typedef struct {
GLfloat emissive[4];
@ -236,41 +232,11 @@ GL_FORCE_INLINE float clamp(float d, float min, float max) {
return (d < min) ? min : (d > max) ? max : d;
}
GL_FORCE_INLINE void memcpy_vertex(Vertex *dest, const Vertex *src) {
#ifdef __DREAMCAST__
_Complex float double_scratch;
asm volatile (
"fschg\n\t"
"clrs\n\t"
".align 2\n\t"
"fmov.d @%[in]+, %[scratch]\n\t"
"fmov.d %[scratch], @%[out]\n\t"
"fmov.d @%[in]+, %[scratch]\n\t"
"add #8, %[out]\n\t"
"fmov.d %[scratch], @%[out]\n\t"
"fmov.d @%[in]+, %[scratch]\n\t"
"add #8, %[out]\n\t"
"fmov.d %[scratch], @%[out]\n\t"
"fmov.d @%[in], %[scratch]\n\t"
"add #8, %[out]\n\t"
"fmov.d %[scratch], @%[out]\n\t"
"fschg\n"
: [in] "+&r" ((uint32_t) src), [scratch] "=&d" (double_scratch), [out] "+&r" ((uint32_t) dest)
:
: "t", "memory" // clobbers
);
#else
*dest = *src;
#endif
}
#define swapVertex(a, b) \
do { \
Vertex __attribute__((aligned(32))) c; \
memcpy_vertex(&c, a); \
memcpy_vertex(a, b); \
memcpy_vertex(b, &c); \
Vertex c = *a; \
*a = *b; \
*b = c; \
} while(0)
/* ClipVertex doesn't have room for these, so we need to parse them
@ -284,7 +250,7 @@ typedef struct {
* when a realloc could invalidate pointers. This structure holds all the information
* we need on the target vertex array to allow passing around to the various stages (e.g. generate/clip etc.)
*/
typedef struct __attribute__((aligned(32))) {
typedef struct {
PolyList* output;
uint32_t header_offset; // The offset of the header in the output list
uint32_t start_offset; // The offset into the output list
@ -323,7 +289,6 @@ void _glInitLights();
void _glInitImmediateMode(GLuint initial_size);
void _glInitMatrices();
void _glInitFramebuffers();
void _glInitSubmissionTarget();
void _glMatrixLoadNormal();
void _glMatrixLoadModelView();
@ -339,6 +304,7 @@ Matrix4x4* _glGetModelViewMatrix();
void _glWipeTextureOnFramebuffers(GLuint texture);
PolyContext* _glGetPVRContext();
GLubyte _glInitTextures();
void _glUpdatePVRTextureContext(PolyContext* context, GLshort textureUnit);
@ -378,9 +344,6 @@ extern GLubyte ACTIVE_TEXTURE;
extern GLboolean TEXTURES_ENABLED[];
GLubyte _glGetActiveTexture();
GLint _glGetTextureInternalFormat();
GLboolean _glGetTextureTwiddle();
void _glSetTextureTwiddle(GLboolean v);
GLuint _glGetActiveClientTexture();
TexturePalette* _glGetSharedPalette(GLshort bank);
@ -389,27 +352,26 @@ void _glSetInternalPaletteFormat(GLenum val);
GLboolean _glIsSharedTexturePaletteEnabled();
void _glApplyColorTable(TexturePalette *palette);
GLboolean _glIsBlendingEnabled();
GLboolean _glIsAlphaTestEnabled();
GLboolean _glIsCullingEnabled();
GLboolean _glIsDepthTestEnabled();
GLboolean _glIsDepthWriteEnabled();
GLboolean _glIsScissorTestEnabled();
GLboolean _glIsFogEnabled();
GLenum _glGetDepthFunc();
GLenum _glGetCullFace();
GLenum _glGetFrontFace();
GLenum _glGetBlendSourceFactor();
GLenum _glGetBlendDestFactor();
extern GLboolean BLEND_ENABLED;
extern GLboolean ALPHA_TEST_ENABLED;
extern GLboolean AUTOSORT_ENABLED;
GL_FORCE_INLINE GLboolean _glIsBlendingEnabled() {
return BLEND_ENABLED;
}
GL_FORCE_INLINE GLboolean _glIsAlphaTestEnabled() {
return ALPHA_TEST_ENABLED;
}
extern PolyList OP_LIST;
extern PolyList PT_LIST;
extern PolyList TR_LIST;
GL_FORCE_INLINE PolyList* _glActivePolyList() {
if(_glIsBlendingEnabled()) {
if(BLEND_ENABLED) {
return &TR_LIST;
} else if(_glIsAlphaTestEnabled()) {
} else if(ALPHA_TEST_ENABLED) {
return &PT_LIST;
} else {
return &OP_LIST;
@ -419,9 +381,13 @@ GL_FORCE_INLINE PolyList* _glActivePolyList() {
GLboolean _glIsMipmapComplete(const TextureObject* obj);
GLubyte* _glGetMipmapLocation(const TextureObject* obj, GLuint level);
GLuint _glGetMipmapLevelCount(const TextureObject* obj);
extern GLboolean ZNEAR_CLIPPING_ENABLED;
extern GLboolean LIGHTING_ENABLED;
GLboolean _glIsLightingEnabled();
void _glEnableLight(GLubyte light, GLboolean value);
void _glEnableLight(GLubyte light, unsigned char value);
GLboolean _glIsColorMaterialEnabled();
GLboolean _glIsNormalizeEnabled();
@ -545,35 +511,10 @@ GLuint _glUsedTextureMemory();
GLuint _glFreeContiguousTextureMemory();
void _glApplyScissor(bool force);
void _glSetColorMaterialMask(GLenum mask);
void _glSetColorMaterialMode(GLenum mode);
GLenum _glColorMaterialMode();
Material* _glActiveMaterial();
void _glSetLightModelViewerInEyeCoordinates(GLboolean v);
void _glSetLightModelSceneAmbient(const GLfloat* v);
void _glSetLightModelColorControl(GLint v);
GLuint _glEnabledLightCount();
void _glRecalcEnabledLights();
GLfloat* _glLightModelSceneAmbient();
GLfloat* _glGetLightModelSceneAmbient();
LightSource* _glLightAt(GLuint i);
GLboolean _glNearZClippingEnabled();
GLboolean _glGPUStateIsDirty();
void _glGPUStateMarkClean();
void _glGPUStateMarkDirty();
#define MAX_GLDC_TEXTURE_UNITS 2
#define MAX_GLDC_LIGHTS 8
#define AMBIENT_MASK 1
#define DIFFUSE_MASK 2
#define EMISSION_MASK 4
#define SPECULAR_MASK 8
#define SCENE_AMBIENT_MASK 16
/* This is from KOS pvr_buffers.c */
#define PVR_MIN_Z 0.0001f

View File

@ -4,228 +4,136 @@
#include "private.h"
static PolyContext GL_CONTEXT;
static struct {
GLboolean is_dirty;
PolyContext *_glGetPVRContext() {
return &GL_CONTEXT;
}
/* We can't just use the GL_CONTEXT for this state as the two
* GL states are combined, so we store them separately and then
* calculate the appropriate PVR state from them. */
GLenum depth_func;
GLboolean depth_test_enabled;
GLenum cull_face;
GLenum front_face;
GLboolean culling_enabled;
GLboolean color_material_enabled;
GLboolean znear_clipping_enabled;
GLboolean lighting_enabled;
GLboolean shared_palette_enabled;
GLboolean alpha_test_enabled;
GLboolean polygon_offset_enabled;
GLboolean normalize_enabled;
GLboolean scissor_test_enabled;
GLboolean fog_enabled;
GLboolean depth_mask_enabled;
static GLenum CULL_FACE = GL_BACK;
static GLenum FRONT_FACE = GL_CCW;
static GLboolean CULLING_ENABLED = GL_FALSE;
static GLboolean COLOR_MATERIAL_ENABLED = GL_FALSE;
struct {
GLint x;
GLint y;
GLsizei width;
GLsizei height;
GLboolean applied;
} scissor_rect;
GLboolean ZNEAR_CLIPPING_ENABLED = GL_TRUE;
GLenum blend_sfactor;
GLenum blend_dfactor;
GLboolean blend_enabled;
GLfloat offset_factor;
GLfloat offset_units;
GLboolean LIGHTING_ENABLED = GL_FALSE;
GLfloat scene_ambient[4];
GLboolean viewer_in_eye_coords;
GLenum color_control;
GLenum color_material_mode;
GLenum color_material_mask;
/* Is the shared texture palette enabled? */
static GLboolean SHARED_PALETTE_ENABLED = GL_FALSE;
LightSource lights[MAX_GLDC_LIGHTS];
GLuint enabled_light_count;
Material material;
GLboolean ALPHA_TEST_ENABLED = GL_FALSE;
GLenum shade_model;
} GPUState = {
.is_dirty = GL_TRUE,
.depth_func = GL_LESS,
.depth_test_enabled = GL_FALSE,
.cull_face = GL_BACK,
.front_face = GL_CCW,
.culling_enabled = GL_FALSE,
.color_material_enabled = GL_FALSE,
.znear_clipping_enabled = GL_TRUE,
.lighting_enabled = GL_FALSE,
.shared_palette_enabled = GL_FALSE,
.alpha_test_enabled = GL_FALSE,
.polygon_offset_enabled = GL_FALSE,
.normalize_enabled = GL_FALSE,
.scissor_test_enabled = GL_FALSE,
.fog_enabled = GL_FALSE,
.depth_mask_enabled = GL_FALSE,
.scissor_rect = {0, 0, 640, 480, false},
.blend_sfactor = GL_ONE,
.blend_dfactor = GL_ZERO,
.blend_enabled = GL_FALSE,
.offset_factor = 0.0f,
.offset_units = 0.0f,
.scene_ambient = {0.2f, 0.2f, 0.2f, 1.0f},
.viewer_in_eye_coords = GL_TRUE,
.color_control = GL_SINGLE_COLOR,
.color_material_mode = GL_AMBIENT_AND_DIFFUSE,
.color_material_mask = AMBIENT_MASK | DIFFUSE_MASK,
.lights = {0},
.enabled_light_count = 0,
.material = {0},
.shade_model = GL_SMOOTH
static GLboolean POLYGON_OFFSET_ENABLED = GL_FALSE;
static GLboolean NORMALIZE_ENABLED = GL_FALSE;
static struct {
GLint x;
GLint y;
GLsizei width;
GLsizei height;
GLboolean applied;
} SCISSOR_RECT = {
0, 0, 640, 480, false
};
void _glGPUStateMarkClean() {
GPUState.is_dirty = GL_FALSE;
}
void _glGPUStateMarkDirty() {
GPUState.is_dirty = GL_TRUE;
}
GLboolean _glGPUStateIsDirty() {
return GPUState.is_dirty;
}
Material* _glActiveMaterial() {
return &GPUState.material;
}
LightSource* _glLightAt(GLuint i) {
assert(i < MAX_GLDC_LIGHTS);
return &GPUState.lights[i];
}
void _glEnableLight(GLubyte light, GLboolean value) {
GPUState.lights[light].isEnabled = value;
}
GLboolean _glIsDepthTestEnabled() {
return GPUState.depth_test_enabled;
}
GLenum _glGetDepthFunc() {
return GPUState.depth_func;
}
GLboolean _glIsDepthWriteEnabled() {
return GPUState.depth_mask_enabled;
}
GLenum _glGetShadeModel() {
return GPUState.shade_model;
}
GLuint _glEnabledLightCount() {
return GPUState.enabled_light_count;
}
GLfloat* _glLightModelSceneAmbient() {
return GPUState.scene_ambient;
}
GLboolean _glIsBlendingEnabled() {
return GPUState.blend_enabled;
}
GLboolean _glIsAlphaTestEnabled() {
return GPUState.alpha_test_enabled;
}
GLboolean _glIsCullingEnabled() {
return GPUState.culling_enabled;
}
GLenum _glGetCullFace() {
return GPUState.cull_face;
}
GLenum _glGetFrontFace() {
return GPUState.front_face;
}
GLboolean _glIsFogEnabled() {
return GPUState.fog_enabled;
}
GLboolean _glIsScissorTestEnabled() {
return GPUState.scissor_test_enabled;
}
void _glRecalcEnabledLights() {
GPUState.enabled_light_count = 0;
for(GLubyte i = 0; i < MAX_GLDC_LIGHTS; ++i) {
if(_glLightAt(i)->isEnabled) {
GPUState.enabled_light_count++;
}
}
}
void _glSetLightModelViewerInEyeCoordinates(GLboolean v) {
GPUState.viewer_in_eye_coords = v;
}
void _glSetLightModelSceneAmbient(const GLfloat* v) {
vec4cpy(GPUState.scene_ambient, v);
}
GLfloat* _glGetLightModelSceneAmbient() {
return GPUState.scene_ambient;
}
void _glSetLightModelColorControl(GLint v) {
GPUState.color_control = v;
}
GLenum _glColorMaterialMask() {
return GPUState.color_material_mask;
}
void _glSetColorMaterialMask(GLenum mask) {
GPUState.color_material_mask = mask;
}
void _glSetColorMaterialMode(GLenum mode) {
GPUState.color_material_mode = mode;
}
GLenum _glColorMaterialMode() {
return GPUState.color_material_mode;
}
GLboolean _glIsSharedTexturePaletteEnabled() {
return GPUState.shared_palette_enabled;
}
GLboolean _glNearZClippingEnabled() {
return GPUState.znear_clipping_enabled;
return SHARED_PALETTE_ENABLED;
}
void _glApplyScissor(bool force);
static int _calc_pvr_face_culling() {
if(!CULLING_ENABLED) {
return GPU_CULLING_NONE;
} else {
if(CULL_FACE == GL_BACK) {
return (FRONT_FACE == GL_CW) ? GPU_CULLING_CCW : GPU_CULLING_CW;
} else {
return (FRONT_FACE == GL_CCW) ? GPU_CULLING_CCW : GPU_CULLING_CW;
}
}
}
static GLenum DEPTH_FUNC = GL_LESS;
static GLboolean DEPTH_TEST_ENABLED = GL_FALSE;
static int _calc_pvr_depth_test() {
if(!DEPTH_TEST_ENABLED) {
return GPU_DEPTHCMP_ALWAYS;
}
switch(DEPTH_FUNC) {
case GL_NEVER:
return GPU_DEPTHCMP_NEVER;
case GL_LESS:
return GPU_DEPTHCMP_GREATER;
case GL_EQUAL:
return GPU_DEPTHCMP_EQUAL;
case GL_LEQUAL:
return GPU_DEPTHCMP_GEQUAL;
case GL_GREATER:
return GPU_DEPTHCMP_LESS;
case GL_NOTEQUAL:
return GPU_DEPTHCMP_NOTEQUAL;
case GL_GEQUAL:
return GPU_DEPTHCMP_LEQUAL;
break;
case GL_ALWAYS:
default:
return GPU_DEPTHCMP_ALWAYS;
}
}
static GLenum BLEND_SFACTOR = GL_ONE;
static GLenum BLEND_DFACTOR = GL_ZERO;
GLboolean BLEND_ENABLED = GL_FALSE;
static GLfloat OFFSET_FACTOR = 0.0f;
static GLfloat OFFSET_UNITS = 0.0f;
GLboolean _glIsNormalizeEnabled() {
return GPUState.normalize_enabled;
return NORMALIZE_ENABLED;
}
GLenum _glGetBlendSourceFactor() {
return GPUState.blend_sfactor;
static int _calcPVRBlendFactor(GLenum factor) {
switch(factor) {
case GL_ZERO:
return GPU_BLEND_ZERO;
case GL_SRC_ALPHA:
return GPU_BLEND_SRCALPHA;
case GL_DST_COLOR:
return GPU_BLEND_DESTCOLOR;
case GL_DST_ALPHA:
return GPU_BLEND_DESTALPHA;
case GL_ONE_MINUS_DST_COLOR:
return GPU_BLEND_INVDESTCOLOR;
case GL_ONE_MINUS_SRC_ALPHA:
return GPU_BLEND_INVSRCALPHA;
case GL_ONE_MINUS_DST_ALPHA:
return GPU_BLEND_INVDESTALPHA;
case GL_ONE:
return GPU_BLEND_ONE;
default:
fprintf(stderr, "Invalid blend mode: %u\n", (unsigned int) factor);
return GPU_BLEND_ONE;
}
}
GLenum _glGetBlendDestFactor() {
return GPUState.blend_dfactor;
}
static void _updatePVRBlend(PolyContext* context) {
if(BLEND_ENABLED || ALPHA_TEST_ENABLED) {
context->gen.alpha = GPU_ALPHA_ENABLE;
} else {
context->gen.alpha = GPU_ALPHA_DISABLE;
}
context->blend.src = _calcPVRBlendFactor(BLEND_SFACTOR);
context->blend.dst = _calcPVRBlendFactor(BLEND_DFACTOR);
}
GLboolean _glCheckValidEnum(GLint param, GLint* values, const char* func) {
GLubyte found = 0;
@ -255,12 +163,11 @@ void _glUpdatePVRTextureContext(PolyContext *context, GLshort textureUnit) {
context->txr2.enable = GPU_TEXTURE_DISABLE;
context->txr2.alpha = GPU_TXRALPHA_DISABLE;
if(!TEXTURES_ENABLED[textureUnit] || !tx1 || !tx1->data) {
context->txr.base = NULL;
if(!TEXTURES_ENABLED[textureUnit] || !tx1) {
return;
}
context->txr.alpha = (GPUState.blend_enabled || GPUState.alpha_test_enabled) ? GPU_TXRALPHA_ENABLE : GPU_TXRALPHA_DISABLE;
context->txr.alpha = (BLEND_ENABLED || ALPHA_TEST_ENABLED) ? GPU_TXRALPHA_ENABLE : GPU_TXRALPHA_DISABLE;
GLuint filter = GPU_FILTER_NEAREST;
GLboolean enableMipmaps = GL_FALSE;
@ -355,22 +262,29 @@ void _glUpdatePVRTextureContext(PolyContext *context, GLshort textureUnit) {
}
GLboolean _glIsLightingEnabled() {
return GPUState.lighting_enabled;
return LIGHTING_ENABLED;
}
GLboolean _glIsColorMaterialEnabled() {
return GPUState.color_material_enabled;
return COLOR_MATERIAL_ENABLED;
}
static GLfloat CLEAR_COLOUR[3];
void _glInitContext() {
memset(&GL_CONTEXT, 0, sizeof(PolyContext));
GL_CONTEXT.list_type = GPU_LIST_OP_POLY;
GL_CONTEXT.fmt.color = GPU_CLRFMT_ARGBPACKED;
GL_CONTEXT.fmt.uv = GPU_UVFMT_32BIT;
GL_CONTEXT.gen.color_clamp = GPU_CLRCLAMP_DISABLE;
const VideoMode* mode = GetVideoMode();
GPUState.scissor_rect.x = 0;
GPUState.scissor_rect.y = 0;
GPUState.scissor_rect.width = mode->width;
GPUState.scissor_rect.height = mode->height;
SCISSOR_RECT.x = 0;
SCISSOR_RECT.y = 0;
SCISSOR_RECT.width = mode->width;
SCISSOR_RECT.height = mode->height;
glClearDepth(1.0f);
glDepthFunc(GL_LESS);
@ -398,174 +312,40 @@ void _glInitContext() {
GLAPI void APIENTRY glEnable(GLenum cap) {
switch(cap) {
case GL_TEXTURE_2D:
if(TEXTURES_ENABLED[_glGetActiveTexture()] != GL_TRUE) {
TEXTURES_ENABLED[_glGetActiveTexture()] = GL_TRUE;
GPUState.is_dirty = GL_TRUE;
}
TEXTURES_ENABLED[_glGetActiveTexture()] = GL_TRUE;
break;
case GL_CULL_FACE: {
if(GPUState.culling_enabled != GL_TRUE) {
GPUState.culling_enabled = GL_TRUE;
GPUState.is_dirty = GL_TRUE;
}
CULLING_ENABLED = GL_TRUE;
GL_CONTEXT.gen.culling = _calc_pvr_face_culling();
} break;
case GL_DEPTH_TEST: {
if(GPUState.depth_test_enabled != GL_TRUE) {
GPUState.depth_test_enabled = GL_TRUE;
GPUState.is_dirty = GL_TRUE;
}
DEPTH_TEST_ENABLED = GL_TRUE;
GL_CONTEXT.depth.comparison = _calc_pvr_depth_test();
} break;
case GL_BLEND: {
if(GPUState.blend_enabled != GL_TRUE) {
GPUState.blend_enabled = GL_TRUE;
GPUState.is_dirty = GL_TRUE;
}
BLEND_ENABLED = GL_TRUE;
_updatePVRBlend(&GL_CONTEXT);
} break;
case GL_SCISSOR_TEST: {
if(GPUState.scissor_test_enabled != GL_TRUE) {
GPUState.scissor_test_enabled = GL_TRUE;
GPUState.is_dirty = GL_TRUE;
}
GL_CONTEXT.gen.clip_mode = GPU_USERCLIP_INSIDE;
_glApplyScissor(false);
} break;
case GL_LIGHTING: {
if(GPUState.lighting_enabled != GL_TRUE) {
GPUState.lighting_enabled = GL_TRUE;
GPUState.is_dirty = GL_TRUE;
}
LIGHTING_ENABLED = GL_TRUE;
} break;
case GL_FOG:
if(GPUState.fog_enabled != GL_TRUE) {
GPUState.fog_enabled = GL_TRUE;
GPUState.is_dirty = GL_TRUE;
}
GL_CONTEXT.gen.fog_type = GPU_FOG_TABLE;
break;
case GL_COLOR_MATERIAL:
if(GPUState.color_material_enabled != GL_TRUE) {
GPUState.color_material_enabled = GL_TRUE;
GPUState.is_dirty = GL_TRUE;
}
COLOR_MATERIAL_ENABLED = GL_TRUE;
break;
case GL_SHARED_TEXTURE_PALETTE_EXT: {
if(GPUState.shared_palette_enabled != GL_TRUE) {
GPUState.shared_palette_enabled = GL_TRUE;
GPUState.is_dirty = GL_TRUE;
}
SHARED_PALETTE_ENABLED = GL_TRUE;
}
break;
case GL_ALPHA_TEST: {
if(GPUState.alpha_test_enabled != GL_TRUE) {
GPUState.alpha_test_enabled = GL_TRUE;
GPUState.is_dirty = GL_TRUE;
}
} break;
case GL_LIGHT0:
case GL_LIGHT1:
case GL_LIGHT2:
case GL_LIGHT3:
case GL_LIGHT4:
case GL_LIGHT5:
case GL_LIGHT6:
case GL_LIGHT7: {
LightSource* ptr = _glLightAt(cap & 0xF);
if(ptr->isEnabled != GL_TRUE) {
ptr->isEnabled = GL_TRUE;
_glRecalcEnabledLights();
}
}
break;
case GL_NEARZ_CLIPPING_KOS:
if(GPUState.znear_clipping_enabled != GL_TRUE) {
GPUState.znear_clipping_enabled = GL_TRUE;
GPUState.is_dirty = GL_TRUE;
}
break;
case GL_POLYGON_OFFSET_POINT:
case GL_POLYGON_OFFSET_LINE:
case GL_POLYGON_OFFSET_FILL:
if(GPUState.polygon_offset_enabled != GL_TRUE) {
GPUState.polygon_offset_enabled = GL_TRUE;
GPUState.is_dirty = GL_TRUE;
}
break;
case GL_NORMALIZE:
if(GPUState.normalize_enabled != GL_TRUE) {
GPUState.normalize_enabled = GL_TRUE;
GPUState.is_dirty = GL_TRUE;
}
break;
case GL_TEXTURE_TWIDDLE_KOS:
_glSetTextureTwiddle(GL_TRUE);
break;
default:
_glKosThrowError(GL_INVALID_VALUE, __func__);
break;
}
}
GLAPI void APIENTRY glDisable(GLenum cap) {
switch(cap) {
case GL_TEXTURE_2D:
if(TEXTURES_ENABLED[_glGetActiveTexture()] != GL_FALSE) {
TEXTURES_ENABLED[_glGetActiveTexture()] = GL_FALSE;
GPUState.is_dirty = GL_TRUE;
}
break;
case GL_CULL_FACE: {
if(GPUState.culling_enabled != GL_FALSE) {
GPUState.culling_enabled = GL_FALSE;
GPUState.is_dirty = GL_TRUE;
}
} break;
case GL_DEPTH_TEST: {
if(GPUState.depth_test_enabled != GL_FALSE) {
GPUState.depth_test_enabled = GL_FALSE;
GPUState.is_dirty = GL_TRUE;
}
} break;
case GL_BLEND: {
if(GPUState.blend_enabled != GL_FALSE) {
GPUState.blend_enabled = GL_FALSE;
GPUState.is_dirty = GL_TRUE;
}
} break;
case GL_SCISSOR_TEST: {
if(GPUState.scissor_test_enabled != GL_FALSE) {
GPUState.scissor_test_enabled = GL_FALSE;
GPUState.is_dirty = GL_TRUE;
}
} break;
case GL_LIGHTING: {
if(GPUState.lighting_enabled != GL_FALSE) {
GPUState.lighting_enabled = GL_FALSE;
GPUState.is_dirty = GL_TRUE;
}
} break;
case GL_FOG:
if(GPUState.fog_enabled != GL_FALSE) {
GPUState.fog_enabled = GL_FALSE;
GPUState.is_dirty = GL_TRUE;
}
break;
case GL_COLOR_MATERIAL:
if(GPUState.color_material_enabled != GL_FALSE) {
GPUState.color_material_enabled = GL_FALSE;
GPUState.is_dirty = GL_TRUE;
}
break;
case GL_SHARED_TEXTURE_PALETTE_EXT: {
if(GPUState.shared_palette_enabled != GL_FALSE) {
GPUState.shared_palette_enabled = GL_FALSE;
GPUState.is_dirty = GL_TRUE;
}
}
break;
case GL_ALPHA_TEST: {
if(GPUState.alpha_test_enabled != GL_FALSE) {
GPUState.alpha_test_enabled = GL_FALSE;
GPUState.is_dirty = GL_TRUE;
}
ALPHA_TEST_ENABLED = GL_TRUE;
_updatePVRBlend(&GL_CONTEXT);
} break;
case GL_LIGHT0:
case GL_LIGHT1:
@ -575,36 +355,82 @@ GLAPI void APIENTRY glDisable(GLenum cap) {
case GL_LIGHT5:
case GL_LIGHT6:
case GL_LIGHT7:
if(GPUState.lights[cap & 0xF].isEnabled) {
_glEnableLight(cap & 0xF, GL_FALSE);
GPUState.is_dirty = GL_TRUE;
}
_glEnableLight(cap & 0xF, GL_TRUE);
break;
case GL_NEARZ_CLIPPING_KOS:
if(GPUState.znear_clipping_enabled != GL_FALSE) {
GPUState.znear_clipping_enabled = GL_FALSE;
GPUState.is_dirty = GL_TRUE;
}
ZNEAR_CLIPPING_ENABLED = GL_TRUE;
break;
case GL_POLYGON_OFFSET_POINT:
case GL_POLYGON_OFFSET_LINE:
case GL_POLYGON_OFFSET_FILL:
if(GPUState.polygon_offset_enabled != GL_FALSE) {
GPUState.polygon_offset_enabled = GL_FALSE;
GPUState.is_dirty = GL_TRUE;
}
POLYGON_OFFSET_ENABLED = GL_TRUE;
break;
case GL_NORMALIZE:
if(GPUState.normalize_enabled != GL_FALSE) {
GPUState.normalize_enabled = GL_FALSE;
GPUState.is_dirty = GL_TRUE;
}
break;
case GL_TEXTURE_TWIDDLE_KOS:
_glSetTextureTwiddle(GL_FALSE);
NORMALIZE_ENABLED = GL_TRUE;
break;
default:
break;
}
}
GLAPI void APIENTRY glDisable(GLenum cap) {
switch(cap) {
case GL_TEXTURE_2D: {
TEXTURES_ENABLED[_glGetActiveTexture()] = GL_FALSE;
} break;
case GL_CULL_FACE: {
CULLING_ENABLED = GL_FALSE;
GL_CONTEXT.gen.culling = _calc_pvr_face_culling();
} break;
case GL_DEPTH_TEST: {
DEPTH_TEST_ENABLED = GL_FALSE;
GL_CONTEXT.depth.comparison = _calc_pvr_depth_test();
} break;
case GL_BLEND:
BLEND_ENABLED = GL_FALSE;
_updatePVRBlend(&GL_CONTEXT);
break;
case GL_SCISSOR_TEST: {
GL_CONTEXT.gen.clip_mode = GPU_USERCLIP_DISABLE;
} break;
case GL_LIGHTING: {
LIGHTING_ENABLED = GL_FALSE;
} break;
case GL_FOG:
GL_CONTEXT.gen.fog_type = GPU_FOG_DISABLE;
break;
case GL_COLOR_MATERIAL:
COLOR_MATERIAL_ENABLED = GL_FALSE;
break;
case GL_SHARED_TEXTURE_PALETTE_EXT: {
SHARED_PALETTE_ENABLED = GL_FALSE;
}
break;
case GL_ALPHA_TEST: {
ALPHA_TEST_ENABLED = GL_FALSE;
} break;
case GL_LIGHT0:
case GL_LIGHT1:
case GL_LIGHT2:
case GL_LIGHT3:
case GL_LIGHT4:
case GL_LIGHT5:
case GL_LIGHT6:
case GL_LIGHT7:
_glEnableLight(cap & 0xF, GL_FALSE);
break;
case GL_NEARZ_CLIPPING_KOS:
ZNEAR_CLIPPING_ENABLED = GL_FALSE;
break;
case GL_POLYGON_OFFSET_POINT:
case GL_POLYGON_OFFSET_LINE:
case GL_POLYGON_OFFSET_FILL:
POLYGON_OFFSET_ENABLED = GL_FALSE;
break;
case GL_NORMALIZE:
NORMALIZE_ENABLED = GL_FALSE;
break;
default:
_glKosThrowError(GL_INVALID_VALUE, __func__);
break;
}
}
@ -651,17 +477,12 @@ GLAPI void APIENTRY glReadBuffer(GLenum mode) {
}
GLAPI void APIENTRY glDepthMask(GLboolean flag) {
if(GPUState.depth_mask_enabled != flag) {
GPUState.depth_mask_enabled = flag;
GPUState.is_dirty = GL_TRUE;
}
GL_CONTEXT.depth.write = (flag == GL_TRUE) ? GPU_DEPTHWRITE_ENABLE : GPU_DEPTHWRITE_DISABLE;
}
GLAPI void APIENTRY glDepthFunc(GLenum func) {
if(GPUState.depth_func != func) {
GPUState.depth_func = func;
GPUState.is_dirty = GL_TRUE;
}
DEPTH_FUNC = func;
GL_CONTEXT.depth.comparison = _calc_pvr_depth_test();
}
/* Hints */
@ -681,34 +502,29 @@ GLAPI void APIENTRY glPolygonMode(GLenum face, GLenum mode) {
/* Culling */
GLAPI void APIENTRY glFrontFace(GLenum mode) {
if(GPUState.front_face != mode) {
GPUState.front_face = mode;
GPUState.is_dirty = GL_TRUE;
}
FRONT_FACE = mode;
GL_CONTEXT.gen.culling = _calc_pvr_face_culling();
}
GLAPI void APIENTRY glCullFace(GLenum mode) {
if(GPUState.cull_face != mode) {
GPUState.cull_face = mode;
GPUState.is_dirty = GL_TRUE;
}
CULL_FACE = mode;
GL_CONTEXT.gen.culling = _calc_pvr_face_culling();
}
GLenum _glGetShadeModel() {
return (GL_CONTEXT.gen.shading == GPU_SHADE_FLAT) ? GL_FLAT : GL_SMOOTH;
}
/* Shading - Flat or Goraud */
GLAPI void APIENTRY glShadeModel(GLenum mode) {
if(GPUState.shade_model != mode) {
GPUState.shade_model = mode;
GPUState.is_dirty = GL_TRUE;
}
GL_CONTEXT.gen.shading = (mode == GL_SMOOTH) ? GPU_SHADE_GOURAUD : GPU_SHADE_FLAT;
}
/* Blending */
GLAPI void APIENTRY glBlendFunc(GLenum sfactor, GLenum dfactor) {
if(GPUState.blend_dfactor != dfactor || GPUState.blend_sfactor != sfactor) {
GPUState.blend_sfactor = sfactor;
GPUState.blend_dfactor = dfactor;
GPUState.is_dirty = GL_TRUE;
}
BLEND_SFACTOR = sfactor;
BLEND_DFACTOR = dfactor;
_updatePVRBlend(&GL_CONTEXT);
}
@ -731,9 +547,8 @@ void glLineWidth(GLfloat width) {
}
void glPolygonOffset(GLfloat factor, GLfloat units) {
GPUState.offset_factor = factor;
GPUState.offset_units = units;
GPUState.is_dirty = GL_TRUE;
OFFSET_FACTOR = factor;
OFFSET_UNITS = units;
}
void glGetTexParameterfv(GLenum target, GLenum pname, GLfloat *params) {
@ -762,20 +577,18 @@ void glPixelStorei(GLenum pname, GLint param) {
void APIENTRY glScissor(GLint x, GLint y, GLsizei width, GLsizei height) {
if(GPUState.scissor_rect.x == x &&
GPUState.scissor_rect.y == y &&
GPUState.scissor_rect.width == width &&
GPUState.scissor_rect.height == height) {
if(SCISSOR_RECT.x == x &&
SCISSOR_RECT.y == y &&
SCISSOR_RECT.width == width &&
SCISSOR_RECT.height == height) {
return;
}
GPUState.scissor_rect.x = x;
GPUState.scissor_rect.y = y;
GPUState.scissor_rect.width = width;
GPUState.scissor_rect.height = height;
GPUState.scissor_rect.applied = false;
GPUState.is_dirty = GL_TRUE; // FIXME: do we need this?
SCISSOR_RECT.x = x;
SCISSOR_RECT.y = y;
SCISSOR_RECT.width = width;
SCISSOR_RECT.height = height;
SCISSOR_RECT.applied = false;
_glApplyScissor(false);
}
@ -805,12 +618,12 @@ void APIENTRY glScissor(GLint x, GLint y, GLsizei width, GLsizei height) {
*/
void _glApplyScissor(bool force) {
/* Don't do anyting if clipping is disabled */
if(!GPUState.scissor_test_enabled) {
if(GL_CONTEXT.gen.clip_mode == GPU_USERCLIP_DISABLE) {
return;
}
/* Don't apply if we already applied - nothing changed */
if(GPUState.scissor_rect.applied && !force) {
if(SCISSOR_RECT.applied && !force) {
return;
}
@ -820,31 +633,27 @@ void _glApplyScissor(bool force) {
const VideoMode* vid_mode = GetVideoMode();
GLsizei scissor_width = MAX(MIN(GPUState.scissor_rect.width, vid_mode->width), 0);
GLsizei scissor_height = MAX(MIN(GPUState.scissor_rect.height, vid_mode->height), 0);
GLsizei scissor_width = MAX(MIN(SCISSOR_RECT.width, vid_mode->width), 0);
GLsizei scissor_height = MAX(MIN(SCISSOR_RECT.height, vid_mode->height), 0);
/* force the origin to the lower left-hand corner of the screen */
miny = (vid_mode->height - scissor_height) - GPUState.scissor_rect.y;
maxx = (scissor_width + GPUState.scissor_rect.x);
miny = (vid_mode->height - scissor_height) - SCISSOR_RECT.y;
maxx = (scissor_width + SCISSOR_RECT.x);
maxy = (scissor_height + miny);
/* load command structure while mapping screen coords to TA tiles */
c.flags = GPU_CMD_USERCLIP;
c.d1 = c.d2 = c.d3 = 0;
uint16_t vw = vid_mode->width >> 5;
uint16_t vh = vid_mode->height >> 5;
c.sx = CLAMP(GPUState.scissor_rect.x >> 5, 0, vw);
c.sy = CLAMP(miny >> 5, 0, vh);
c.ex = CLAMP((maxx >> 5) - 1, 0, vw);
c.ey = CLAMP((maxy >> 5) - 1, 0, vh);
c.sx = CLAMP(SCISSOR_RECT.x / 32, 0, vid_mode->width / 32);
c.sy = CLAMP(miny / 32, 0, vid_mode->height / 32);
c.ex = CLAMP((maxx / 32) - 1, 0, vid_mode->width / 32);
c.ey = CLAMP((maxy / 32) - 1, 0, vid_mode->height / 32);
aligned_vector_push_back(&_glOpaquePolyList()->vector, &c, 1);
aligned_vector_push_back(&_glPunchThruPolyList()->vector, &c, 1);
aligned_vector_push_back(&_glTransparentPolyList()->vector, &c, 1);
GPUState.scissor_rect.applied = true;
SCISSOR_RECT.applied = true;
}
void glStencilFunc(GLenum func, GLint ref, GLuint mask) {
@ -862,19 +671,19 @@ void glStencilOp(GLenum sfail, GLenum dpfail, GLenum dppass) {
GLboolean APIENTRY glIsEnabled(GLenum cap) {
switch(cap) {
case GL_DEPTH_TEST:
return GPUState.depth_test_enabled;
return DEPTH_TEST_ENABLED;
case GL_SCISSOR_TEST:
return GPUState.scissor_test_enabled;
return GL_CONTEXT.gen.clip_mode == GPU_USERCLIP_INSIDE;
case GL_CULL_FACE:
return GPUState.culling_enabled;
return CULLING_ENABLED;
case GL_LIGHTING:
return GPUState.lighting_enabled;
return LIGHTING_ENABLED;
case GL_BLEND:
return GPUState.blend_enabled;
return BLEND_ENABLED;
case GL_POLYGON_OFFSET_POINT:
case GL_POLYGON_OFFSET_LINE:
case GL_POLYGON_OFFSET_FILL:
return GPUState.polygon_offset_enabled;
return POLYGON_OFFSET_ENABLED;
}
return GL_FALSE;
@ -929,10 +738,10 @@ void APIENTRY glGetFloatv(GLenum pname, GLfloat* params) {
MEMCPY4(params, _glGetModelViewMatrix(), sizeof(float) * 16);
break;
case GL_POLYGON_OFFSET_FACTOR:
*params = GPUState.offset_factor;
*params = OFFSET_FACTOR;
break;
case GL_POLYGON_OFFSET_UNITS:
*params = GPUState.offset_units;
*params = OFFSET_UNITS;
break;
default:
_glKosThrowError(GL_INVALID_ENUM, __func__);
@ -949,13 +758,13 @@ void APIENTRY glGetIntegerv(GLenum pname, GLint *params) {
*params = (_glGetBoundTexture()) ? _glGetBoundTexture()->index : 0;
break;
case GL_DEPTH_FUNC:
*params = GPUState.depth_func;
*params = DEPTH_FUNC;
break;
case GL_BLEND_SRC:
*params = GPUState.blend_sfactor;
*params = BLEND_SFACTOR;
break;
case GL_BLEND_DST:
*params = GPUState.blend_dfactor;
*params = BLEND_DFACTOR;
break;
case GL_MAX_TEXTURE_SIZE:
*params = MAX_TEXTURE_SIZE;
@ -985,10 +794,6 @@ void APIENTRY glGetIntegerv(GLenum pname, GLint *params) {
case GL_FREE_CONTIGUOUS_TEXTURE_MEMORY_KOS:
*params = _glFreeContiguousTextureMemory();
break;
case GL_TEXTURE_INTERNAL_FORMAT_KOS:
*params = _glGetTextureInternalFormat();
break;
default:
_glKosThrowError(GL_INVALID_ENUM, __func__);
break;

File diff suppressed because it is too large Load Diff

View File

@ -13,4 +13,4 @@ typedef struct {
* but we're not using that for now, so having W here makes the code
* simpler */
float w;
} __attribute__ ((aligned (32))) Vertex;
} Vertex;

21
GL/yalloc/LICENSE Normal file
View File

@ -0,0 +1,21 @@
MIT License
Copyright (c) [year] [fullname]
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

158
GL/yalloc/README.md Normal file
View File

@ -0,0 +1,158 @@
# Summary
yalloc is a memory efficient allocator which is intended for embedded
applications that only have a low amount of RAM and want to maximize its
utilization. Properties of the allocator:
- pools can be up to 128k
- user data is 32bit aligned
- 4 bytes overhead per allocation
- supports defragmentation
- uses a free list for first fit allocation strategy (most recently freed
blocks are used first)
- extensively tested (see section below)
- MIT license
# Defragmentation
This feature was the initial motivation for this implementation. Especially
when dealing with highly memory constrained environments fragmenting memory
pools can be annoying. For this reason this implementation supports
defragmentation which moves all allocated blocks into a contiguous range at the
beginning of the pool, leaving a maximized free range at the end.
As there is no garbage collector or other runtime system involved that updates
the references, the application must do so. This is done in three steps:
1. yalloc_defrag_start() is called. This calculates the new
post-defragmentation-addresses for all allocations, but otherwise leaves
the allocations untouched.
2. yalloc_defrag_address() is called by the application for every pointer that
points to an allocation. It returns the post-defragmentation-address for
the allocation. The application must update all its relevant pointers this
way. Care must be taken not not yet dereference that moved pointers. If the
application works with hierarchical data then this can easily be done by
updating the pointers button up (first the leafs then their parents).
3. yalloc_defrag_commit() is called to finally perform the defragmentation.
All allocated blocks are moved to their post-defragmentation-address and
the application can continue using the pool the normal way.
It is up to the application when (and if) it performs defragmentation. One
strategy would be to delay it until an allocation failure. Another approach
would be to perform the defragmentation regularly when there is nothing else to
do.
# Configurable Defines
INTERNAL_VALIDATE
If this is not defined on the compiler commandline it will be defined as 0 if
NDEBUG is defined and otherwise as 1. If you want to disable internal
validation when NDEBUG is not defined then define INERNAL_VALIDATE as 0 on the
compiler commandline.
If it is nonzero the heap will be validated via a bunch of assert() calls at
the end of every function that modifies the heap. This has roughly O(N*M)
overhead where N is the number of allocated blocks and M the number of free
blocks in a heap. For applications with enough live allocations this will get
significant.
YALLOC_VALGRIND
If this is defined in yalloc.c and NVALGRIND is not defined then
valgrind/memcheck.h is included and the the allocator functions tell valgrind
about the pool, the allocations and makes the block headers inaccessible outside
of yalloc-functions. This allows valgrind to detect a lot of the accidents that
can happen when dealing dynamic memory. This also adds some overhead for every
yalloc-call because most of them will "unprotect" the internal structure on
entry and "protect" it again (marking it as inaccessible for valgrind) before
returning.
# Tests
The tests rely on internal validation of the pool (see INTERNAL_VALIDATE) to
check that no assumptions about the internal structure of the pool are
violated. They additionally check for correctness of observations that can be
made by using the public functions of the allocator (like checking if user data
stays unmodified). There are a few different scripts that run tests:
- run_coverage.sh runs a bunch of testfunctions that are carefully crafted to
cover all code paths. Coverage data is generated by clang and a summary is
shown at the end of the test.
- run_valgrind.sh tests if the valgrind integration is working as expected,
runs the functions from the coverage test and some randomly generated
testcases under valgrind.
- run_libfuzzer.sh uses libfuzzer from clang to generate interesting testcases
and runs them in multiple jobs in parallel for 10 seconds. It also generates
coverage data at the end (it always got 100% coverage in my testruns).
All tests exit with 0 and print "All fine!" at the end if there where no
errors. Coverage deficits are not counted as error, so you have to look at the
summary (they should show 100% coverage!).
# Implementation Details
The Headers and the user data are 32bit aligned. Headers have two 16bit fields
where the high 15 bits represent offsets (relative to the pools address) to the
previous/next block. The macros HDR_PTR() and HDR_OFFSET() are used to
translate an offset to an address and back. The 32bit alignment is exploited to
allow pools of up to 128k with that 15 significant bits.
A pool is always occupied by non-overlapping blocks that link to their
previous/next block in address order via the prev/next field of Header.
Free blocks are always joined: No two free blocks will ever be neighbors.
Free blocks have an additional header of the same structure. This additional
header is used to build a list of free blocks (independent of their address
order).
yalloc_free() will insert the freed block to the front of the free list.
yalloc_alloc() searches that list front to back and takes the first block that
is big enough to satisfy the allocation.
There is always a Header at the front and at the end of the pool. The Header at
the end is degenerate: It is marked as "used" but has no next block (which is
usually used to determine the size of a block).
The prev-field of the very first block in the pool has special meaning: It
points to the first free block in the pool. Or, if the pool is currently
defragmenting (after yalloc_defrag_start() and before yalloc_defrag_commit()),
points to the last header of the pool. This state can be recognized by checking
if it points to an empty block (normal pool state) or a used block
(defragmentation in progress). This logic can be seen in
yalloc_defrag_in_progress().
The lowest bit of next/prev have special meaning:
- low bit of prev is set for free blocks
- low bit of next is set for blocks with 32bit padding after the user data.
This is needed when a block is allocated from a free block that leaves only
4 free bytes after the user data... which is not enough to insert a
free-header (which is needs 8 bytes). The padding will be reclaimed when
that block is freed or when the pool is defragmented. The predicate
isPadded() can be used to test if a block is padded. Free blocks are never
padded.
The predicate isNil() can be used to test if an offset points nowhere (it tests
if all 15 high bits of an offset are 1). The constant NIL has all but the
lowest bit set. It is used to set offsets to point to nowhere, and in some
places it is used to mask out the actual address bits of an offset. This should
be kept in mind when modifying the code and updating prev/next: Think carefully
if you have to preserve the low bit when updating an offset!
Defragmentation is done in two phases: First the user calls
yalloc_defrag_start(). This will put the pool in a special state where no
alloc/free-calls are allowed. In this state the prev-fields of the used blocks
have a special meaning: They store the offset that the block will have after
defragmentation finished. This information is used by yalloc_defrag_address()
which can be called by the application to query the new addresses for its
allocations. After the application has updated all its pointers it must call
yalloc_defrag_commit() which moves all used blocks in contiguous space at the
beginning of the pool, leaving one maximized free block at the end.

802
GL/yalloc/yalloc.c Normal file
View File

@ -0,0 +1,802 @@
#include "yalloc.h"
#include "yalloc_internals.h"
#include <assert.h>
#include <string.h>
#define ALIGN(num, align) (((num) + ((align) - 1)) & ~((align) - 1))
#if defined(YALLOC_VALGRIND) && !defined(NVALGRIND)
# define USE_VALGRIND 1
#else
# define USE_VALGRIND 0
#endif
#if USE_VALGRIND
# include <valgrind/memcheck.h>
#else
# define VALGRIND_MAKE_MEM_UNDEFINED(p, s) ((void)0)
# define VALGRIND_MAKE_MEM_DEFINED(p, s) ((void)0)
# define VALGRIND_MAKE_MEM_NOACCESS(p, s) ((void)0)
# define VALGRIND_CREATE_MEMPOOL(pool, rz, z) ((void)0)
# define VALGRIND_MEMPOOL_ALLOC(pool, p, s) ((void)0)
# define VALGRIND_MEMPOOL_FREE(pool, p) ((void)0)
# define VALGRIND_MEMPOOL_CHANGE(pool, a, b, s) ((void)0)
#endif
#define MARK_NEW_FREE_HDR(p) VALGRIND_MAKE_MEM_UNDEFINED(p, sizeof(Header) * 2)
#define MARK_NEW_HDR(p) VALGRIND_MAKE_MEM_UNDEFINED(p, sizeof(Header))
#define PROTECT_HDR(p) VALGRIND_MAKE_MEM_NOACCESS(p, sizeof(Header))
#define PROTECT_FREE_HDR(p) VALGRIND_MAKE_MEM_NOACCESS(p, sizeof(Header) * 2)
#define UNPROTECT_HDR(p) VALGRIND_MAKE_MEM_DEFINED(p, sizeof(Header))
#define UNPROTECT_FREE_HDR(p) VALGRIND_MAKE_MEM_DEFINED(p, sizeof(Header) * 2)
#if USE_VALGRIND
static void _unprotect_pool(void * pool)
{
Header * cur = (Header*)pool;
for (;;)
{
UNPROTECT_HDR(cur);
if (isFree(cur))
UNPROTECT_HDR(cur + 1);
if (isNil(cur->next))
break;
cur = HDR_PTR(cur->next);
}
}
static void _protect_pool(void * pool)
{
Header * cur = (Header*)pool;
while (cur)
{
Header * next = isNil(cur->next) ? NULL : HDR_PTR(cur->next);
if (isFree(cur))
VALGRIND_MAKE_MEM_NOACCESS(cur, (char*)next - (char*)cur);
else
PROTECT_HDR(cur);
cur = next;
}
}
#define assert_is_pool(pool) assert(VALGRIND_MEMPOOL_EXISTS(pool));
#else
static void _unprotect_pool(void * pool){(void)pool;}
static void _protect_pool(void * pool){(void)pool;}
#define assert_is_pool(pool) ((void)0)
#endif
// internal version that does not unprotect/protect the pool
static int _yalloc_defrag_in_progress(void * pool)
{
// fragmentation is indicated by a free list with one entry: the last block of the pool, which has its "free"-bit cleared.
Header * p = (Header*)pool;
if (isNil(p->prev))
return 0;
return !(HDR_PTR(p->prev)->prev & 1);
}
int yalloc_defrag_in_progress(void * pool)
{
_unprotect_pool(pool);
int ret = _yalloc_defrag_in_progress(pool);
_protect_pool(pool);
return ret;
}
#if YALLOC_INTERNAL_VALIDATE
static size_t _count_free_list_occurences(Header * pool, Header * blk)
{
int n = 0;
if (!isNil(pool->prev))
{
Header * cur = HDR_PTR(pool->prev);
for (;;)
{
if (cur == blk)
++n;
if (isNil(cur[1].next))
break;
cur = HDR_PTR(cur[1].next);
}
}
return n;
}
static size_t _count_addr_list_occurences(Header * pool, Header * blk)
{
size_t n = 0;
Header * cur = pool;
for (;;)
{
if (cur == blk)
++n;
if (isNil(cur->next))
break;
cur = HDR_PTR(cur->next);
}
return n;
}
static void _validate_user_ptr(void * pool, void * p)
{
Header * hdr = (Header*)p - 1;
size_t n = _count_addr_list_occurences((Header*)pool, hdr);
assert(n == 1 && !isFree(hdr));
}
/**
Validates if all the invariants of a pool are intact.
This is very expensive when there are enough blocks in the heap (quadratic complexity!).
*/
static void _yalloc_validate(void * pool_)
{
Header * pool = (Header*)pool_;
Header * cur = pool;
assert(!isNil(pool->next)); // there must always be at least two blocks: a free/used one and the final block at the end
if (_yalloc_defrag_in_progress(pool))
{
Header * prevUsed = NULL;
while (!isNil(cur->next))
{
if (!isFree(cur))
{ // it is a used block
Header * newAddr = cur == pool ? pool : HDR_PTR(cur->prev);
assert(newAddr <= cur);
assert(newAddr >= pool);
if (prevUsed)
{
Header * prevNewAddr = prevUsed == pool ? pool : HDR_PTR(prevUsed->prev);
size_t prevBruttoSize = (char*)HDR_PTR(prevUsed->next) - (char*)prevUsed;
if (isPadded(prevUsed))
prevBruttoSize -= 4; // remove padding
assert((char*)newAddr == (char*)prevNewAddr + prevBruttoSize);
}
else
{
assert(newAddr == pool);
}
prevUsed = cur;
}
cur = HDR_PTR(cur->next);
}
assert(cur == HDR_PTR(pool->prev)); // the free-list should point to the last block
assert(!isFree(cur)); // the last block must not be free
}
else
{
Header * prev = NULL;
// iterate blocks in address order
for (;;)
{
if (prev)
{
Header * x = HDR_PTR(cur->prev);
assert(x == prev);
}
int n = _count_free_list_occurences(pool, cur);
if (isFree(cur))
{ // it is a free block
assert(n == 1);
assert(!isPadded(cur)); // free blocks must have a zero padding-bit
if (prev)
{
assert(!isFree(prev)); // free blocks must not be direct neighbours
}
}
else
{
assert(n == 0);
}
if (isNil(cur->next))
break;
Header * next = HDR_PTR(cur->next);
assert((char*)next >= (char*)cur + sizeof(Header) * 2);
prev = cur;
cur = next;
}
assert(isNil(cur->next));
if (!isNil(pool->prev))
{
// iterate free-list
Header * f = HDR_PTR(pool->prev);
assert(isNil(f[1].prev));
for (;;)
{
assert(isFree(f)); // must be free
int n = _count_addr_list_occurences(pool, f);
assert(n == 1);
if (isNil(f[1].next))
break;
f = HDR_PTR(f[1].next);
}
}
}
}
#else
static void _yalloc_validate(void * pool){(void)pool;}
static void _validate_user_ptr(void * pool, void * p){(void)pool; (void)p;}
#endif
int yalloc_init(void * pool, size_t size)
{
if (size > MAX_POOL_SIZE)
return -1;
// TODO: Error when pool is not properly aligned
// TODO: Error when size is not a multiple of the alignment?
while (size % sizeof(Header))
--size;
if(size < sizeof(Header) * 3)
return -1;
VALGRIND_CREATE_MEMPOOL(pool, 0, 0);
Header * first = (Header*)pool;
Header * last = (Header*)((char*)pool + size) - 1;
MARK_NEW_FREE_HDR(first);
MARK_NEW_HDR(first);
first->prev = HDR_OFFSET(first) | 1;
first->next = HDR_OFFSET(last);
first[1].prev = NIL;
first[1].next = NIL;
last->prev = HDR_OFFSET(first);
last->next = NIL;
_unprotect_pool(pool);
_yalloc_validate(pool);
_protect_pool(pool);
return 0;
}
void yalloc_deinit(void * pool)
{
#if USE_VALGRIND
VALGRIND_DESTROY_MEMPOOL(pool);
Header * last = (Header*)pool;
UNPROTECT_HDR(last);
while (!isNil(last->next))
{
Header * next = HDR_PTR(last->next);
UNPROTECT_HDR(next);
last = next;
}
VALGRIND_MAKE_MEM_UNDEFINED(pool, (char*)(last + 1) - (char*)pool);
#else
(void)pool;
#endif
}
void * yalloc_alloc(void * pool, size_t size)
{
assert_is_pool(pool);
_unprotect_pool(pool);
assert(!_yalloc_defrag_in_progress(pool));
_yalloc_validate(pool);
if (!size)
{
_protect_pool(pool);
return NULL;
}
Header * root = (Header*)pool;
if (isNil(root->prev))
{
_protect_pool(pool);
return NULL; /* no free block, no chance to allocate anything */ // TODO: Just read up which C standard supports single line comments and then fucking use them!
}
/* round up to alignment */
size = ALIGN(size, 32);
size_t bruttoSize = size + sizeof(Header);
Header * prev = NULL;
Header * cur = HDR_PTR(root->prev);
for (;;)
{
size_t curSize = (char*)HDR_PTR(cur->next) - (char*)cur; /* size of the block, including its header */
if (curSize >= bruttoSize) // it is big enough
{
// take action for unused space in the free block
if (curSize >= bruttoSize + sizeof(Header) * 2)
{ // the leftover space is big enough to make it a free block
// Build a free block from the unused space and insert it into the list of free blocks after the current free block
Header * tail = (Header*)((char*)cur + bruttoSize);
MARK_NEW_FREE_HDR(tail);
// update address-order-list
tail->next = cur->next;
tail->prev = HDR_OFFSET(cur) | 1;
HDR_PTR(cur->next)->prev = HDR_OFFSET(tail); // NOTE: We know the next block is used because free blocks are never neighbours. So we don't have to care about the lower bit which would be set for the prev of a free block.
cur->next = HDR_OFFSET(tail);
// update list of free blocks
tail[1].next = cur[1].next;
// NOTE: tail[1].prev is updated in the common path below (assignment to "HDR_PTR(cur[1].next)[1].prev")
if (!isNil(cur[1].next))
HDR_PTR(cur[1].next)[1].prev = HDR_OFFSET(tail);
cur[1].next = HDR_OFFSET(tail);
}
else if (curSize > bruttoSize)
{ // there will be unused space, but not enough to insert a free header
internal_assert(curSize - bruttoSize == sizeof(Header)); // unused space must be enough to build a free-block or it should be exactly the size of a Header
cur->next |= 1; // set marker for "has unused trailing space"
}
else
{
internal_assert(curSize == bruttoSize);
}
cur->prev &= NIL; // clear marker for "is a free block"
// remove from linked list of free blocks
if (prev)
prev[1].next = cur[1].next;
else
{
uint32_t freeBit = isFree(root);
root->prev = (cur[1].next & NIL) | freeBit;
}
if (!isNil(cur[1].next))
HDR_PTR(cur[1].next)[1].prev = prev ? HDR_OFFSET(prev) : NIL;
_yalloc_validate(pool);
VALGRIND_MEMPOOL_ALLOC(pool, cur + 1, size);
_protect_pool(pool);
return cur + 1; // return address after the header
}
if (isNil(cur[1].next))
break;
prev = cur;
cur = HDR_PTR(cur[1].next);
}
_yalloc_validate(pool);
_protect_pool(pool);
return NULL;
}
// Removes a block from the free-list and moves the pools first-free-bock pointer to its successor if it pointed to that block.
static void unlink_from_free_list(Header * pool, Header * blk)
{
// update the pools pointer to the first block in the free list if necessary
if (isNil(blk[1].prev))
{ // the block is the first in the free-list
// make the pools first-free-pointer point to the next in the free list
uint32_t freeBit = isFree(pool);
pool->prev = (blk[1].next & NIL) | freeBit;
}
else
HDR_PTR(blk[1].prev)[1].next = blk[1].next;
if (!isNil(blk[1].next))
HDR_PTR(blk[1].next)[1].prev = blk[1].prev;
}
size_t yalloc_block_size(void * pool, void * p)
{
Header * a = (Header*)p - 1;
UNPROTECT_HDR(a);
Header * b = HDR_PTR(a->next);
size_t payloadSize = (char*)b - (char*)p;
if (isPadded(a))
payloadSize -= sizeof(Header);
PROTECT_HDR(a);
return payloadSize;
}
void yalloc_free(void * pool_, void * p)
{
assert_is_pool(pool_);
assert(!yalloc_defrag_in_progress(pool_));
if (!p)
return;
_unprotect_pool(pool_);
Header * pool = (Header*)pool_;
Header * cur = (Header*)p - 1;
// get pointers to previous/next block in address order
Header * prev = cur == pool || isNil(cur->prev) ? NULL : HDR_PTR(cur->prev);
Header * next = isNil(cur->next) ? NULL : HDR_PTR(cur->next);
int prevFree = prev && isFree(prev);
int nextFree = next && isFree(next);
#if USE_VALGRIND
{
unsigned errs = VALGRIND_COUNT_ERRORS;
VALGRIND_MEMPOOL_FREE(pool, p);
if (VALGRIND_COUNT_ERRORS > errs)
{ // early exit if the free was invalid (so we get a valgrind error and don't mess up the pool, which is helpful for testing if invalid frees are detected by valgrind)
_protect_pool(pool_);
return;
}
}
#endif
_validate_user_ptr(pool_, p);
if (prevFree && nextFree)
{ // the freed block has two free neighbors
unlink_from_free_list(pool, prev);
unlink_from_free_list(pool, next);
// join prev, cur and next
prev->next = next->next;
HDR_PTR(next->next)->prev = cur->prev;
// prev is now the block we want to push onto the free-list
cur = prev;
}
else if (prevFree)
{
unlink_from_free_list(pool, prev);
// join prev and cur
prev->next = cur->next;
HDR_PTR(cur->next)->prev = cur->prev;
// prev is now the block we want to push onto the free-list
cur = prev;
}
else if (nextFree)
{
unlink_from_free_list(pool, next);
// join cur and next
cur->next = next->next;
HDR_PTR(next->next)->prev = next->prev & NIL;
}
// if there is a previous block and that block has padding then we want to grow the new free block into that padding
if (cur != pool && !isNil(cur->prev))
{ // there is a previous block
Header * left = HDR_PTR(cur->prev);
if (isPadded(left))
{ // the previous block has padding, so extend the current block to consume move the padding to the current free block
Header * grown = cur - 1;
MARK_NEW_HDR(grown);
grown->next = cur->next;
grown->prev = cur->prev;
left->next = HDR_OFFSET(grown);
if (!isNil(cur->next))
HDR_PTR(cur->next)->prev = HDR_OFFSET(grown);
cur = grown;
}
}
cur->prev |= 1; // it becomes a free block
cur->next &= NIL; // reset padding-bit
UNPROTECT_HDR(cur + 1);
cur[1].prev = NIL; // it will be the first free block in the free list, so it has no prevFree
if (!isNil(pool->prev))
{ // the free-list was already non-empty
HDR_PTR(pool->prev)[1].prev = HDR_OFFSET(cur); // make the first entry in the free list point back to the new free block (it will become the first one)
cur[1].next = pool->prev; // the next free block is the first of the old free-list
}
else
cur[1].next = NIL; // free-list was empty, so there is no successor
VALGRIND_MAKE_MEM_NOACCESS(cur + 2, (char*)HDR_PTR(cur->next) - (char*)(cur + 2));
// now the freed block is the first in the free-list
// update the offset to the first element of the free list
uint32_t freeBit = isFree(pool); // remember the free-bit of the offset
pool->prev = HDR_OFFSET(cur) | freeBit; // update the offset and restore the free-bit
_yalloc_validate(pool);
_protect_pool(pool);
}
size_t yalloc_count_free(void * pool_)
{
assert_is_pool(pool_);
_unprotect_pool(pool_);
assert(!_yalloc_defrag_in_progress(pool_));
Header * pool = (Header*)pool_;
size_t bruttoFree = 0;
Header * cur = pool;
_yalloc_validate(pool);
for (;;)
{
if (isFree(cur))
{ // it is a free block
bruttoFree += (char*)HDR_PTR(cur->next) - (char*)cur;
}
else
{ // it is a used block
if (isPadded(cur))
{ // the used block is padded
bruttoFree += sizeof(Header);
}
}
if (isNil(cur->next))
break;
cur = HDR_PTR(cur->next);
}
_protect_pool(pool);
if (bruttoFree < sizeof(Header))
{
internal_assert(!bruttoFree); // free space should always be a multiple of sizeof(Header)
return 0;
}
return bruttoFree - sizeof(Header);
}
size_t yalloc_count_continuous(void * pool_)
{
assert_is_pool(pool_);
_unprotect_pool(pool_);
assert(!_yalloc_defrag_in_progress(pool_));
Header * pool = (Header*)pool_;
size_t largestFree = 0;
Header * cur = pool;
_yalloc_validate(pool);
for (;;)
{
if (isFree(cur))
{ // it is a free block
size_t temp = (uintptr_t)HDR_PTR(cur->next) - (uintptr_t)cur;
if(temp > largestFree)
largestFree = temp;
}
if (isNil(cur->next))
break;
cur = HDR_PTR(cur->next);
}
_protect_pool(pool);
if (largestFree < sizeof(Header))
{
internal_assert(!largestFree); // free space should always be a multiple of sizeof(Header)
return 0;
}
return largestFree - sizeof(Header);
}
void * yalloc_first_used(void * pool)
{
assert_is_pool(pool);
_unprotect_pool(pool);
Header * blk = (Header*)pool;
while (!isNil(blk->next))
{
if (!isFree(blk))
{
_protect_pool(pool);
return blk + 1;
}
blk = HDR_PTR(blk->next);
}
_protect_pool(pool);
return NULL;
}
void * yalloc_next_used(void * pool, void * p)
{
assert_is_pool(pool);
_unprotect_pool(pool);
_validate_user_ptr(pool, p);
Header * prev = (Header*)p - 1;
assert(!isNil(prev->next)); // the last block should never end up as input to this function (because it is not user-visible)
Header * blk = HDR_PTR(prev->next);
while (!isNil(blk->next))
{
if (!isFree(blk))
{
_protect_pool(pool);
return blk + 1;
}
blk = HDR_PTR(blk->next);
}
_protect_pool(pool);
return NULL;
}
void yalloc_defrag_start(void * pool_)
{
assert_is_pool(pool_);
_unprotect_pool(pool_);
assert(!_yalloc_defrag_in_progress(pool_));
Header * pool = (Header*)pool_;
// iterate over all blocks in address order and store the post-defragment address of used blocks in their "prev" field
size_t end = 0; // offset for the next used block
Header * blk = (Header*)pool;
for (; !isNil(blk->next); blk = HDR_PTR(blk->next))
{
if (!isFree(blk))
{ // it is a used block
blk->prev = end >> 1;
internal_assert((char*)HDR_PTR(blk->prev) == (char*)pool + end);
size_t bruttoSize = (char*)HDR_PTR(blk->next) - (char*)blk;
if (isPadded(blk))
{ // the block is padded
bruttoSize -= sizeof(Header);
}
end += bruttoSize;
internal_assert(end % sizeof(Header) == 0);
}
}
// blk is now the last block (the dummy "used" block at the end of the pool)
internal_assert(isNil(blk->next));
internal_assert(!isFree(blk));
// mark the pool as "defragementation in progress"
uint32_t freeBit = isFree(pool);
pool->prev = (HDR_OFFSET(blk) & NIL) | freeBit;
_yalloc_validate(pool);
internal_assert(yalloc_defrag_in_progress(pool));
_protect_pool(pool);
}
void * yalloc_defrag_address(void * pool_, void * p)
{
assert_is_pool(pool_);
assert(yalloc_defrag_in_progress(pool_));
if (!p)
return NULL;
Header * pool = (Header*)pool_;
_unprotect_pool(pool);
_validate_user_ptr(pool_, p);
if (pool + 1 == p)
return pool + 1; // "prev" of the first block points to the last used block to mark the pool as "defragmentation in progress"
Header * blk = (Header*)p - 1;
void * defragP = HDR_PTR(blk->prev) + 1;
_protect_pool(pool);
return defragP;
}
void yalloc_defrag_commit(void * pool_)
{
assert_is_pool(pool_);
_unprotect_pool(pool_);
assert(_yalloc_defrag_in_progress(pool_));
Header * pool = (Header*)pool_;
// iterate over all blocks in address order and move them
size_t end = 0; // offset for the next used block
Header * blk = pool;
Header * lastUsed = NULL;
while (!isNil(blk->next))
{
if (!isFree(blk))
{ // it is a used block
size_t bruttoSize = (char*)HDR_PTR(blk->next) - (char*)blk;
if (isPadded(blk))
{ // the block is padded
bruttoSize -= sizeof(Header);
}
Header * next = HDR_PTR(blk->next);
blk->prev = lastUsed ? HDR_OFFSET(lastUsed) : NIL;
blk->next = (end + bruttoSize) >> 1;
lastUsed = (Header*)((char*)pool + end);
VALGRIND_MAKE_MEM_UNDEFINED(lastUsed, (char*)blk - (char*)lastUsed);
memmove(lastUsed, blk, bruttoSize);
VALGRIND_MEMPOOL_CHANGE(pool, blk + 1, lastUsed + 1, bruttoSize - sizeof(Header));
end += bruttoSize;
blk = next;
}
else
blk = HDR_PTR(blk->next);
}
// blk is now the last block (the dummy "used" block at the end of the pool)
internal_assert(isNil(blk->next));
internal_assert(!isFree(blk));
if (lastUsed)
{
Header * gap = HDR_PTR(lastUsed->next);
if (gap == blk)
{ // there is no gap
pool->prev = NIL; // the free list is empty
blk->prev = HDR_OFFSET(lastUsed);
}
else if (blk - gap > 1)
{ // the gap is big enouogh for a free Header
// set a free list that contains the gap as only element
gap->prev = HDR_OFFSET(lastUsed) | 1;
gap->next = HDR_OFFSET(blk);
gap[1].prev = NIL;
gap[1].next = NIL;
pool->prev = blk->prev = HDR_OFFSET(gap);
}
else
{ // there is a gap, but it is too small to be used as free-list-node, so just make it padding of the last used block
lastUsed->next = HDR_OFFSET(blk) | 1;
pool->prev = NIL;
blk->prev = HDR_OFFSET(lastUsed);
}
}
else
{ // the pool is empty
pool->prev = 1;
}
internal_assert(!_yalloc_defrag_in_progress(pool));
_yalloc_validate(pool);
_protect_pool(pool);
}

176
GL/yalloc/yalloc.h Normal file
View File

@ -0,0 +1,176 @@
/**
@file
API of the yalloc allocator.
*/
#ifndef YALLOC_H
#define YALLOC_H
#include <stddef.h>
/**
Maximum supported pool size. yalloc_init() will fail for larger pools.
*/
#define MAX_POOL_SIZE ((2 << 24) - 4)
/**
Creates a pool inside a given buffer.
Pools must be deinitialized with yalloc_deinit() when they are no longer needed.
@param pool The starting address of the pool. It must have at least 16bit
alignment (internal structure uses 16bit integers). Allocations are placed at
32bit boundaries starting from this address, so if the user data should be
32bit aligned then this address has to be 32bit aligned. Typically an address
of static memory, or an array on the stack is used if the pool is only used
temporarily.
@param size Size of the pool.
@return 0 on success, nonzero if the size is not supported.
*/
int yalloc_init(void * pool, size_t size);
/**
Deinitializes the buffer that is used by the pool and makes it available for other use.
The content of the buffer is undefined after this.
@param pool The starting address of an initialized pool.
*/
void yalloc_deinit(void * pool);
/**
Allocates a block of memory from a pool.
This function mimics malloc().
The pool must not be in the "defragmenting" state when this function is called.
@param pool The starting address of an initialized pool.
@param size Number of bytes to allocate.
@return Allocated buffer or \c NULL if there was no free range that could serve
the allocation. See @ref yalloc_defrag_start() for a way to remove
fragmentation which may cause allocations to fail even when there is enough
space in total.
*/
void * yalloc_alloc(void * pool, size_t size);
/**
Returns an allocation to a pool.
This function mimics free().
The pool must not be in the "defragmenting" state when this function is called.
@param pool The starting address of the initialized pool the allocation comes from.
@param p An address that was returned from yalloc_alloc() of the same pool.
*/
void yalloc_free(void * pool, void * p);
/**
Returns the maximum size of a successful allocation (assuming a completely unfragmented heap).
After defragmentation the first allocation with the returned size is guaranteed to succeed.
@param pool The starting address of an initialized pool.
@return Number of bytes that can be allocated (assuming the pool is defragmented).
*/
size_t yalloc_count_free(void * pool);
/**
Returns the maximum continuous free area.
@param pool The starting address of an initialized pool.
@return Number of free bytes that exist continuously.
*/
size_t yalloc_count_continuous(void * pool_);
/**
Queries the usable size of an allocated block.
@param pool The starting address of the initialized pool the allocation comes from.
@param p An address that was returned from yalloc_alloc() of the same pool.
@return Size of the memory block. This is the size passed to @ref yalloc_alloc() rounded up to 4.
*/
size_t yalloc_block_size(void * pool, void * p);
/**
Finds the first (in address order) allocation of a pool.
@param pool The starting address of an initialized pool.
@return Address of the allocation the lowest address inside the pool (this is
what @ref yalloc_alloc() returned), or \c NULL if there is no used block.
*/
void * yalloc_first_used(void * pool);
/**
Given a pointer to an allocation finds the next (in address order) used block of a pool.
@param pool The starting address of the initialized pool the allocation comes from.
@param p Pointer to an allocation in that pool, typically comes from a previous
call to @ref yalloc_first_used()
*/
void * yalloc_next_used(void * pool, void * p);
/**
Starts defragmentation for a pool.
Allocations will stay where they are. But the pool is put in the "defagmenting"
state (see @ref yalloc_defrag_in_progress()).
The pool must not be in the "defragmenting" state when this function is called.
The pool is put into the "defragmenting" state by this function.
@param pool The starting address of an initialized pool.
*/
void yalloc_defrag_start(void * pool);
/**
Returns the address that an allocation will have after @ref yalloc_defrag_commit() is called.
The pool must be in the "defragmenting" state when this function is called.
@param pool The starting address of the initialized pool the allocation comes from.
@param p Pointer to an allocation in that pool.
@return The address the alloation will have after @ref yalloc_defrag_commit() is called.
*/
void * yalloc_defrag_address(void * pool, void * p);
/**
Finishes the defragmentation.
The content of all allocations in the pool will be moved to the address that
was reported by @ref yalloc_defrag_address(). The pool will then have only one
free block. This means that an <tt>yalloc_alloc(pool, yalloc_count_free(pool))</tt>
will succeed.
The pool must be in the "defragmenting" state when this function is called. The
pool is put back to normal state by this function.
@param pool The starting address of an initialized pool.
*/
void yalloc_defrag_commit(void * pool);
/**
Tells if the pool is in the "defragmenting" state (after a @ref yalloc_defrag_start() and before a @ref yalloc_defrag_commit()).
@param pool The starting address of an initialized pool.
@return Nonzero if the pool is currently in the "defragmenting" state.
*/
int yalloc_defrag_in_progress(void * pool);
/**
Helper function that dumps the state of the pool to stdout.
This function is only available if build with <tt>yalloc_dump.c</tt>. This
function only exists for debugging purposes and can be ignored by normal users
that are not interested in the internal structure of the implementation.
@param pool The starting address of an initialized pool.
@param name A string that is used as "Title" for the output.
*/
void yalloc_dump(void * pool, char * name);
#endif // YALLOC_H

39
GL/yalloc/yalloc_dump.c Normal file
View File

@ -0,0 +1,39 @@
#include "yalloc_internals.h"
#include <stdio.h>
static void printOffset(void * pool, char * name, uint16_t offset)
{
if (isNil(offset))
printf(" %s: nil\n", name);
else
printf(" %s: %td\n", name, (char*)HDR_PTR(offset) - (char*)pool);
}
void yalloc_dump(void * pool, char * name)
{
printf("---- %s ----\n", name);
Header * cur = (Header*)pool;
for (;;)
{
printf(isFree(cur) ? "%td: free @%p\n" : "%td: used @%p\n", (char*)cur - (char*)pool, cur);
printOffset(pool, cur == pool ? "first free" : "prev", cur->prev);
printOffset(pool, "next", cur->next);
if (isFree(cur))
{
printOffset(pool, "prevFree", cur[1].prev);
printOffset(pool, "nextFree", cur[1].next);
}
else
printf(" payload includes padding: %i\n", isPadded(cur));
if (isNil(cur->next))
break;
printf(" %td bytes payload\n", (char*)HDR_PTR(cur->next) - (char*)cur - sizeof(Header));
cur = HDR_PTR(cur->next);
}
fflush(stdout);
}

View File

@ -0,0 +1,63 @@
#ifndef YALLOC_INTERNALS_H
#define YALLOC_INTERNALS_H
#include <stdint.h>
typedef struct
{
uint32_t prev; // low bit set if free
uint32_t next; // for used blocks: low bit set if unused header at the end
/* We need user data to be 32-byte aligned, so the header needs
* to be 32 bytes in size (as user data follows the header) */
uint8_t padding[32 - (sizeof(uint32_t) * 2)];
} Header;
// NOTE: We have 32bit aligned data and 16bit offsets where the lowest bit is used as flag. So we remove the low bit and shift by 1 to address 128k bytes with the 15bit significant offset bits.
#define NIL 0xFFFFFFFEu
// return Header-address for a prev/next
#define HDR_PTR(offset) ((Header*)((char*)pool + (((offset) & NIL)<<1)))
// return a prev/next for a Header-address
#define HDR_OFFSET(blockPtr) ((uint32_t)(((char*)blockPtr - (char*)pool) >> 1))
#ifndef YALLOC_INTERNAL_VALIDATE
# ifdef NDEBUG
# define YALLOC_INTERNAL_VALIDATE 0
# else
# define YALLOC_INTERNAL_VALIDATE 1
#endif
#endif
/*
internal_assert() is used in some places to check internal expections.
Activate this if you modify the code to detect problems as early as possible.
In other cases this should be deactivated.
*/
#if 0
#define internal_assert assert
#else
#define internal_assert(condition)((void) 0)
#endif
// detects offsets that point nowhere
static inline int isNil(uint32_t offset)
{
return (offset | 1) == 0xFFFFFFFF;
}
static inline int isFree(Header * hdr)
{
return hdr->prev & 1;
}
static inline int isPadded(Header * hdr)
{
return hdr->next & 1;
}
#endif // YALLOC_INTERNALS_H

View File

@ -32,7 +32,7 @@ GLdc uses CMake for its build system, it currently ships with two "backends":
- kospvr - This is the hardware-accelerated Dreamcast backend
- software - This is a stub software rasterizer used for testing testing and debugging
To compile a Dreamcast debug build, you'll want to do something like the following:
To compile for Dreamcast, you'll want to do something like the following:
```
mkdir dcbuild
@ -41,11 +41,6 @@ cmake -DCMAKE_TOOLCHAIN_FILE=../toolchains/Dreamcast.cmake -G "Unix Makefiles" .
make
```
For a release build, replace the cmake line with with the following:
```
cmake -DCMAKE_TOOLCHAIN_FILE=../toolchains/Dreamcast.cmake -G "Unix Makefiles" -DCMAKE_BUILD_TYPE=Release ..
```
You will need KallistiOS compiled and configured (e.g. the KOS_BASE environment
variable must be set)

View File

@ -12,45 +12,36 @@
#include "aligned_vector.h"
extern inline void* aligned_vector_resize(AlignedVector* vector, const uint32_t element_count);
extern inline void* aligned_vector_extend(AlignedVector* vector, const uint32_t additional_count);
extern inline void* aligned_vector_reserve(AlignedVector* vector, uint32_t element_count);
extern inline void* aligned_vector_push_back(AlignedVector* vector, const void* objs, uint32_t count);
extern inline void* aligned_vector_resize(AlignedVector* vector, const unsigned int element_count);
extern inline void* aligned_vector_extend(AlignedVector* vector, const unsigned int additional_count);
extern inline void* aligned_vector_reserve(AlignedVector* vector, unsigned int element_count);
extern inline void* aligned_vector_push_back(AlignedVector* vector, const void* objs, unsigned int count);
void aligned_vector_init(AlignedVector* vector, uint32_t element_size) {
/* Now initialize the header*/
AlignedVectorHeader* const hdr = &vector->hdr;
hdr->size = 0;
hdr->capacity = ALIGNED_VECTOR_CHUNK_SIZE;
hdr->element_size = element_size;
void aligned_vector_init(AlignedVector* vector, unsigned int element_size) {
vector->size = vector->capacity = 0;
vector->element_size = element_size;
vector->data = NULL;
/* Reserve some initial capacity. This will do the allocation but not set up the header */
void* ptr = aligned_vector_reserve(vector, ALIGNED_VECTOR_CHUNK_SIZE);
assert(ptr);
(void) ptr;
/* Reserve some initial capacity */
aligned_vector_reserve(vector, ALIGNED_VECTOR_CHUNK_SIZE);
}
void aligned_vector_shrink_to_fit(AlignedVector* vector) {
AlignedVectorHeader* const hdr = &vector->hdr;
if(hdr->size == 0) {
uint32_t element_size = hdr->element_size;
if(vector->size == 0) {
free(vector->data);
/* Reallocate the header */
vector->data = NULL;
hdr->size = hdr->capacity = 0;
hdr->element_size = element_size;
vector->capacity = 0;
} else {
uint32_t new_byte_size = (hdr->size * hdr->element_size);
uint8_t* original_data = vector->data;
unsigned int new_byte_size = vector->size * vector->element_size;
unsigned char* original_data = vector->data;
vector->data = (unsigned char*) memalign(0x20, new_byte_size);
if(original_data) {
FASTCPY(vector->data, original_data, new_byte_size);
free(original_data);
}
hdr->capacity = hdr->size;
vector->capacity = vector->size;
}
}

View File

@ -3,8 +3,6 @@
#include <assert.h>
#include <stdlib.h>
#include <string.h>
#include <stdint.h>
#include <stdio.h>
#ifdef __cplusplus
extern "C" {
@ -13,13 +11,28 @@ extern "C" {
#if defined(__APPLE__) || defined(__WIN32__)
/* Linux + Kos define this, OSX does not, so just use malloc there */
static inline void* memalign(size_t alignment, size_t size) {
(void) alignment;
return malloc(size);
}
#else
#include <malloc.h>
#endif
#ifdef __DREAMCAST__
#include <kos/string.h>
#define AV_MEMCPY4 memcpy4
#else
#define AV_MEMCPY4 memcpy
#endif
typedef struct {
unsigned int size;
unsigned int capacity;
unsigned char* data;
unsigned int element_size;
} AlignedVector;
#define ALIGNED_VECTOR_CHUNK_SIZE 256u
#ifdef __cplusplus
#define AV_FORCE_INLINE static inline
#else
@ -28,193 +41,94 @@ static inline void* memalign(size_t alignment, size_t size) {
#define AV_FORCE_INLINE static AV_INLINE_DEBUG
#endif
#ifdef __DREAMCAST__
#include <kos/string.h>
AV_FORCE_INLINE void *AV_MEMCPY4(void *dest, const void *src, size_t len)
{
if(!len)
{
return dest;
}
const uint8_t *s = (uint8_t *)src;
uint8_t *d = (uint8_t *)dest;
uint32_t diff = (uint32_t)d - (uint32_t)(s + 1); // extra offset because input gets incremented before output is calculated
// Underflow would be like adding a negative offset
// Can use 'd' as a scratch reg now
asm volatile (
"clrs\n" // Align for parallelism (CO) - SH4a use "stc SR, Rn" instead with a dummy Rn
".align 2\n"
"0:\n\t"
"dt %[size]\n\t" // (--len) ? 0 -> T : 1 -> T (EX 1)
"mov.b @%[in]+, %[scratch]\n\t" // scratch = *(s++) (LS 1/2)
"bf.s 0b\n\t" // while(s != nexts) aka while(!T) (BR 1/2)
" mov.b %[scratch], @(%[offset], %[in])\n" // *(datatype_of_s*) ((char*)s + diff) = scratch, where src + diff = dest (LS 1)
: [in] "+&r" ((uint32_t)s), [scratch] "=&r" ((uint32_t)d), [size] "+&r" (len) // outputs
: [offset] "z" (diff) // inputs
: "t", "memory" // clobbers
);
return dest;
}
#else
#define AV_MEMCPY4 memcpy
#endif
typedef struct {
uint32_t size;
uint32_t capacity;
uint32_t element_size;
} __attribute__((aligned(32))) AlignedVectorHeader;
typedef struct {
AlignedVectorHeader hdr;
uint8_t* data;
} AlignedVector;
#define ALIGNED_VECTOR_CHUNK_SIZE 256u
#define ROUND_TO_CHUNK_SIZE(v) \
((((v) + ALIGNED_VECTOR_CHUNK_SIZE - 1) / ALIGNED_VECTOR_CHUNK_SIZE) * ALIGNED_VECTOR_CHUNK_SIZE)
void aligned_vector_init(AlignedVector* vector, uint32_t element_size);
void aligned_vector_init(AlignedVector* vector, unsigned int element_size);
AV_FORCE_INLINE void* aligned_vector_at(const AlignedVector* vector, const uint32_t index) {
const AlignedVectorHeader* hdr = &vector->hdr;
assert(index < hdr->size);
return vector->data + (index * hdr->element_size);
}
AV_FORCE_INLINE void* aligned_vector_reserve(AlignedVector* vector, uint32_t element_count) {
AlignedVectorHeader* hdr = &vector->hdr;
if(element_count < hdr->capacity) {
return aligned_vector_at(vector, element_count);
AV_FORCE_INLINE void* aligned_vector_reserve(AlignedVector* vector, unsigned int element_count) {
if(element_count <= vector->capacity) {
return NULL;
}
uint32_t original_byte_size = (hdr->size * hdr->element_size);
unsigned int original_byte_size = vector->size * vector->element_size;
/* We overallocate so that we don't make small allocations during push backs */
element_count = ROUND_TO_CHUNK_SIZE(element_count);
uint32_t new_byte_size = (element_count * hdr->element_size);
uint8_t* original_data = vector->data;
unsigned int new_byte_size = element_count * vector->element_size;
unsigned char* original_data = vector->data;
vector->data = (uint8_t*) memalign(0x20, new_byte_size);
vector->data = (unsigned char*) memalign(0x20, new_byte_size);
assert(vector->data);
AV_MEMCPY4(vector->data, original_data, original_byte_size);
free(original_data);
if(original_data) {
AV_MEMCPY4(vector->data, original_data, original_byte_size);
free(original_data);
}
vector->capacity = element_count;
hdr->capacity = element_count;
return vector->data + original_byte_size;
}
AV_FORCE_INLINE AlignedVectorHeader* aligned_vector_header(const AlignedVector* vector) {
return (AlignedVectorHeader*) &vector->hdr;
AV_FORCE_INLINE void* aligned_vector_at(const AlignedVector* vector, const unsigned int index) {
assert(index < vector->size);
return &vector->data[index * vector->element_size];
}
AV_FORCE_INLINE uint32_t aligned_vector_size(const AlignedVector* vector) {
const AlignedVectorHeader* hdr = &vector->hdr;
return hdr->size;
}
AV_FORCE_INLINE uint32_t aligned_vector_capacity(const AlignedVector* vector) {
const AlignedVectorHeader* hdr = &vector->hdr;
return hdr->capacity;
}
AV_FORCE_INLINE void* aligned_vector_front(const AlignedVector* vector) {
return vector->data;
}
#define av_assert(x) \
do {\
if(!(x)) {\
fprintf(stderr, "Assertion failed at %s:%d\n", __FILE__, __LINE__);\
exit(1);\
}\
} while(0); \
/* Resizes the array and returns a pointer to the first new element (if upsizing) or NULL (if downsizing) */
AV_FORCE_INLINE void* aligned_vector_resize(AlignedVector* vector, const uint32_t element_count) {
AV_FORCE_INLINE void* aligned_vector_resize(AlignedVector* vector, const unsigned int element_count) {
void* ret = NULL;
AlignedVectorHeader* hdr = &vector->hdr;
uint32_t previous_count = hdr->size;
if(hdr->capacity <= element_count) {
unsigned int previousCount = vector->size;
if(vector->capacity < element_count) {
/* If we didn't have capacity, increase capacity (slow) */
aligned_vector_reserve(vector, element_count);
hdr->size = element_count;
ret = aligned_vector_at(vector, previous_count);
av_assert(hdr->size == element_count);
av_assert(hdr->size <= hdr->capacity);
} else if(previous_count < element_count) {
vector->size = element_count;
ret = aligned_vector_reserve(vector, element_count);
} else if(previousCount < element_count) {
/* So we grew, but had the capacity, just get a pointer to
* where we were */
hdr->size = element_count;
av_assert(hdr->size < hdr->capacity);
ret = aligned_vector_at(vector, previous_count);
} else if(hdr->size != element_count) {
hdr->size = element_count;
av_assert(hdr->size < hdr->capacity);
vector->size = element_count;
ret = aligned_vector_at(vector, previousCount);
} else {
vector->size = element_count;
}
return ret;
}
AV_FORCE_INLINE void* aligned_vector_push_back(AlignedVector* vector, const void* objs, uint32_t count) {
AV_FORCE_INLINE void* aligned_vector_push_back(AlignedVector* vector, const void* objs, unsigned int count) {
/* Resize enough room */
AlignedVectorHeader* hdr = &vector->hdr;
assert(count);
assert(hdr->element_size);
assert(vector->element_size);
#ifndef NDEBUG
uint32_t element_size = hdr->element_size;
uint32_t initial_size = hdr->size;
#endif
unsigned int initial_size = vector->size;
aligned_vector_resize(vector, vector->size + count);
uint8_t* dest = (uint8_t*) aligned_vector_resize(vector, hdr->size + count);
assert(dest);
assert(vector->size == initial_size + count);
unsigned char* dest = vector->data + (vector->element_size * initial_size);
/* Copy the objects in */
AV_MEMCPY4(dest, objs, hdr->element_size * count);
AV_MEMCPY4(dest, objs, vector->element_size * count);
assert(hdr->element_size == element_size);
assert(hdr->size == initial_size + count);
return dest;
}
AV_FORCE_INLINE void* aligned_vector_extend(AlignedVector* vector, const uint32_t additional_count) {
AlignedVectorHeader* hdr = &vector->hdr;
void* ret = aligned_vector_resize(vector, hdr->size + additional_count);
assert(ret); // Should always return something
return ret;
AV_FORCE_INLINE void* aligned_vector_extend(AlignedVector* vector, const unsigned int additional_count) {
return aligned_vector_resize(vector, vector->size + additional_count);
}
AV_FORCE_INLINE void aligned_vector_clear(AlignedVector* vector){
AlignedVectorHeader* hdr = &vector->hdr;
hdr->size = 0;
vector->size = 0;
}
void aligned_vector_shrink_to_fit(AlignedVector* vector);
void aligned_vector_cleanup(AlignedVector* vector);
AV_FORCE_INLINE void* aligned_vector_back(AlignedVector* vector){
AlignedVectorHeader* hdr = &vector->hdr;
return aligned_vector_at(vector, hdr->size ? hdr->size - 1 : 0);
static inline void* aligned_vector_back(AlignedVector* vector){
return aligned_vector_at(vector, vector->size - 1);
}
#ifdef __cplusplus

View File

@ -68,6 +68,7 @@ void* named_array_reserve(NamedArray* array, unsigned int id) {
void named_array_release(NamedArray* array, unsigned int new_id) {
unsigned int i = new_id / 8;
unsigned int j = new_id % 8;
array->used_markers[i] &= (unsigned char) ~(1 << j);
}

View File

@ -19,10 +19,6 @@ __BEGIN_DECLS
#include <math.h>
#if __STDCPP_FLOAT16_T__
#include <stdfloat>
#endif
/* Primitive Types taken from GL for compatability */
/* Not all types are implemented in Open GL DC V.1.0 */
#define GL_POINTS 0x0000
@ -309,13 +305,12 @@ __BEGIN_DECLS
#define GL_UNSIGNED_INT 0x1405
#define GL_FLOAT 0x1406
#define GL_DOUBLE 0x140A
#define GL_HALF_FLOAT 0x140B
#define GL_2_BYTES 0x1407
#define GL_3_BYTES 0x1408
#define GL_4_BYTES 0x1409
/* ErrorCode */
#define GL_NO_ERROR ((GLenum) 0)
#define GL_NO_ERROR 0
#define GL_INVALID_ENUM 0x0500
#define GL_INVALID_VALUE 0x0501
#define GL_INVALID_OPERATION 0x0502
@ -364,7 +359,7 @@ __BEGIN_DECLS
#define GL_UNSIGNED_SHORT_5_6_5_REV 0x8364
#define GL_UNSIGNED_SHORT_4_4_4_4_REV 0x8365
#define GL_UNSIGNED_SHORT_1_5_5_5_REV 0x8366
#define GL_UNSIGNED_INT_8_8_8_8_REV 0x8367
#define GL_UNSIGNED_INT_2_10_10_10_REV 0x8368
#define GL_COLOR_INDEX 0x1900
@ -376,32 +371,6 @@ __BEGIN_DECLS
#define GL_RGBA 0x1908
#define GL_LUMINANCE 0x1909
#define GL_LUMINANCE_ALPHA 0x190A
#define GL_R3_G3_B2 0x2A10
#define GL_ALPHA4 0x803B
#define GL_ALPHA8 0x803C
#define GL_ALPHA12 0x803D
#define GL_ALPHA16 0x803E
#define GL_LUMINANCE4 0x803F
#define GL_LUMINANCE8 0x8040
#define GL_LUMINANCE12 0x8041
#define GL_LUMINANCE16 0x8042
#define GL_LUMINANCE4_ALPHA4 0x8043
#define GL_LUMINANCE6_ALPHA2 0x8044
#define GL_LUMINANCE8_ALPHA8 0x8045
#define GL_LUMINANCE12_ALPHA4 0x8046
#define GL_LUMINANCE12_ALPHA12 0x8047
#define GL_LUMINANCE16_ALPHA16 0x8048
#define GL_INTENSITY4 0x804A
#define GL_INTENSITY8 0x804B
#define GL_INTENSITY12 0x804C
#define GL_INTENSITY16 0x804D
#define GL_BGR 0x80E0
#define GL_BGRA 0x80E1
#define GL_INTENSITY 0x8049
#define GL_RGB4 0x804F
@ -418,14 +387,6 @@ __BEGIN_DECLS
#define GL_RGBA12 0x805A
#define GL_RGBA16 0x805B
#define GL_R8 0x8229
#define GL_RG8 0x822B
#define GL_RG 0x8227
#define GL_R16 0x822A
#define GL_RG16 0x822C
#define GL_COMPRESSED_RED 0x8225
#define GL_COMPRESSED_RG 0x8226
/* Polygons */
#define GL_POINT 0x1B00
#define GL_LINE 0x1B01
@ -466,12 +427,6 @@ __BEGIN_DECLS
#define GL_FALSE 0
#define GL_TRUE 1
#if __STDCPP_FLOAT16_T__
#define GLhalf std::float16_t
#else
#define GLhalf unsigned short
#endif
/* Stubs for portability */
#define GL_LINE_SMOOTH 0x0B20
#define GL_ALPHA_TEST 0x0BC0
@ -710,7 +665,6 @@ GLAPI void APIENTRY glFrustum(GLfloat left, GLfloat right,
/* Fog Functions - client must enable GL_FOG for this to take effect */
GLAPI void APIENTRY glFogi(GLenum pname, GLint param);
GLAPI void APIENTRY glFogf(GLenum pname, GLfloat param);
GLAPI void APIENTRY glFogiv(GLenum pname, const GLint* params);
GLAPI void APIENTRY glFogfv(GLenum pname, const GLfloat *params);
/* Lighting Functions - client must enable GL_LIGHTING for this to take effect */

View File

@ -130,7 +130,7 @@ GLAPI void APIENTRY glGenFramebuffersEXT(GLsizei n, GLuint* framebuffers);
GLAPI void APIENTRY glDeleteFramebuffersEXT(GLsizei n, const GLuint* framebuffers);
GLAPI void APIENTRY glBindFramebufferEXT(GLenum target, GLuint framebuffer);
GLAPI void APIENTRY glFramebufferTexture2DEXT(GLenum target, GLenum attachment, GLenum textarget, GLuint texture, GLint level);
GLAPI void APIENTRY glGenerateMipmap(GLenum target);
GLAPI void APIENTRY glGenerateMipmapEXT(GLenum target);
GLAPI GLenum APIENTRY glCheckFramebufferStatusEXT(GLenum target);
GLAPI GLboolean APIENTRY glIsFramebufferEXT(GLuint framebuffer);
@ -203,7 +203,7 @@ GLAPI void APIENTRY glCompressedTexImage2DARB(GLenum target,
#define glClientActiveTexture glClientActiveTextureARB
#define glMultiTexCoord2f glMultiTexCoord2fARB
#define glGenerateMipmapEXT glGenerateMipmap
#define glGenerateMipmap glGenerateMipmapEXT
#define glCompressedTexImage2D glCompressedTexImage2DARB
#ifndef GL_VERSION_1_4

View File

@ -35,6 +35,8 @@ extern const char* GLDC_VERSION;
#define GL_NEARZ_CLIPPING_KOS 0xEEFA
#define GL_UNSIGNED_BYTE_TWID_KOS 0xEEFB
/* Initialize the GL pipeline. GL will initialize the PVR. */
GLAPI void APIENTRY glKosInit();
@ -55,13 +57,6 @@ typedef struct {
GLuint initial_pt_capacity;
GLuint initial_immediate_capacity;
/* Default: True
*
* Whether glTexImage should automatically twiddle textures
* if the internal format is a generic format (e.g. GL_RGB).
* this is the same as calling glEnable(GL_TEXTURE_TWIDDLE_KOS)
* on boot */
GLboolean texture_twiddle;
} GLdcConfig;
@ -92,7 +87,7 @@ GLAPI void APIENTRY glKosInitConfig(GLdcConfig* config);
*/
GLAPI void APIENTRY glKosInitEx(GLdcConfig* config);
GLAPI void APIENTRY glKosSwapBuffers();
GLAPI void APIENTRY glKosShutdown();
/*
* CUSTOM EXTENSION multiple_shared_palette_KOS
@ -191,28 +186,12 @@ GLAPI void APIENTRY glKosShutdown();
/* Memory allocation extension (GL_KOS_texture_memory_management) */
GLAPI GLvoid APIENTRY glDefragmentTextureMemory_KOS(void);
/* glGet extensions */
#define GL_FREE_TEXTURE_MEMORY_KOS 0xEF3D
#define GL_USED_TEXTURE_MEMORY_KOS 0xEF3E
#define GL_FREE_CONTIGUOUS_TEXTURE_MEMORY_KOS 0xEF3F
//for palette internal format (glfcConfig)
#define GL_RGB565_KOS 0xEF40
#define GL_ARGB4444_KOS 0xEF41
#define GL_ARGB1555_KOS 0xEF42
#define GL_RGB565_TWID_KOS 0xEF43
#define GL_ARGB4444_TWID_KOS 0xEF44
#define GL_ARGB1555_TWID_KOS 0xEF45
#define GL_COLOR_INDEX8_TWID_KOS 0xEF46
#define GL_COLOR_INDEX4_TWID_KOS 0xEF47
#define GL_RGB_TWID_KOS 0xEF48
#define GL_RGBA_TWID_KOS 0xEF49
/* glGet extensions */
#define GL_TEXTURE_INTERNAL_FORMAT_KOS 0xEF50
/* If enabled, will twiddle texture uploads where possible */
#define GL_TEXTURE_TWIDDLE_KOS 0xEF51
__END_DECLS

View File

@ -1,446 +0,0 @@
#include <cstdio>
#include <stdbool.h>
#include <stdlib.h>
#include <time.h>
#ifdef __DREAMCAST__
#include <kos.h>
float avgfps = -1;
#endif
#include "GL/gl.h"
#include "GL/glkos.h"
#include "GL/glu.h"
#include "GL/glext.h"
#define PI 3.14159265358979323846264338327950288f
#define RAD_TO_DEG 57.295779513082320876798154814105f
#define MAX_CUBES 350
float timeElapsed = 0.0f;
const float dt = 1.0f / 60.0f;
float angle = 0;
const float invAngle360 = 1.0f / 360.0f;
const float cameraDistance = 3.0f;
bool isDrawingArrays = false;
bool isBlendingEnabled = true;
bool isRunning = true;
typedef struct
{
GLubyte r;
GLubyte g;
GLubyte b;
GLubyte a;
} Color;
Color colors[] =
{
{255, 0, 0, 128},
{0, 255, 0, 128},
{0, 0, 255, 128},
{255, 255, 0, 128},
{255, 0, 255, 128},
{0, 255, 255, 128}
};
Color faceColors[24];
float cubeVertices[] =
{
// Front face
-1.0f, -1.0f, +1.0f, // vertex 0
+1.0f, -1.0f, +1.0f, // vertex 1
+1.0f, +1.0f, +1.0f, // vertex 2
-1.0f, +1.0f, +1.0f, // vertex 3
// Back face
-1.0f, -1.0f, -1.0f, // vertex 4
+1.0f, -1.0f, -1.0f, // vertex 5
+1.0f, +1.0f, -1.0f, // vertex 6
-1.0f, +1.0f, -1.0f, // vertex 7
// Top face
-1.0f, +1.0f, +1.0f, // vertex 8
+1.0f, +1.0f, +1.0f, // vertex 9
+1.0f, +1.0f, -1.0f, // vertex 10
-1.0f, +1.0f, -1.0f, // vertex 11
// Bottom face
-1.0f, -1.0f, +1.0f, // vertex 12
+1.0f, -1.0f, +1.0f, // vertex 13
+1.0f, -1.0f, -1.0f, // vertex 14
-1.0f, -1.0f, -1.0f, // vertex 15
// Right face
+1.0f, -1.0f, +1.0f, // vertex 16
+1.0f, -1.0f, -1.0f, // vertex 17
+1.0f, +1.0f, -1.0f, // vertex 18
+1.0f, +1.0f, +1.0f, // vertex 19
// Left face
-1.0f, -1.0f, +1.0f, // vertex 20
-1.0f, -1.0f, -1.0f, // vertex 21
-1.0f, +1.0f, -1.0f, // vertex 22
-1.0f, +1.0f, +1.0f // vertex 23
};
// Set up indices array
unsigned int cubeIndices[] =
{
// Front face
0, 1, 2, 3,
// Back face
4, 5, 6, 7,
// Top face
8, 9, 10, 11,
// Bottom face
12, 13, 14, 15,
// Right face
16, 17, 18, 19,
// Left face
20, 21, 22, 23
};
typedef struct
{
float r;
float x, y, z;
float vx, vy, vz;
} Cube;
Cube cubes[MAX_CUBES];
int numCubes = 0;
// Create a 4x4 identity matrix
float cubeTransformationMatrix[16] = { 1.0f, 0.0f, 0.0f, 0.0f,
0.0f, 1.0f, 0.0f, 0.0f,
0.0f, 0.0f, 1.0f, 0.0f,
0.0f, 0.0f, 0.0f, 1.0f };
void debugLog(const char* msg) {
#ifdef __DREAMCAST__
dbglog(DBG_KDEBUG, "%s\n", msg);
#else
printf("%s\n", msg);
#endif
}
void runningStats() {
#ifdef __DREAMCAST__
pvr_stats_t stats;
pvr_get_stats(&stats);
if (avgfps != -1)
avgfps = (avgfps + stats.frame_rate) * 0.5f;
else
avgfps = stats.frame_rate;
#endif
}
void avgStats() {
#ifdef __DREAMCAST__
dbglog(DBG_DEBUG, "Average frame rate: ~%f fps\n", avgfps);
#endif
}
void stats() {
#ifdef __DREAMCAST__
pvr_stats_t stats;
pvr_get_stats(&stats);
dbglog(DBG_DEBUG, "3D Stats: %d VBLs, current frame rate ~%f fps\n", stats.vbl_count, stats.frame_rate);
avgStats();
#endif
}
void addCube(float r, float x, float y, float z, float vx, float vy, float vz)
{
if (numCubes < MAX_CUBES) {
cubes[numCubes].r = r;
cubes[numCubes].x = x;
cubes[numCubes].y = y;
cubes[numCubes].z = z;
cubes[numCubes].vx = vx;
cubes[numCubes].vy = vy;
cubes[numCubes].vz = vz;
numCubes++;
}
}
void addCubeQuick(float x, float y, float z, float scale_factor)
{
addCube(0.5f * scale_factor, x, y, z, 0, 0, 0);
}
void updateCubes(float dt)
{
for (size_t i = 0; i < numCubes; i++)
{
Cube* cube = &cubes[i];
cube->x += cube->vx * dt;
cube->y += cube->vy * dt;
cube->z += cube->vz * dt;
if (cube->x < -3 || cube->x > +3) { cube->vx *= -1; }
if (cube->y < -3 || cube->y > +3) { cube->vy *= -1; }
if (cube->z < -3 || cube->z > +3) { cube->vz *= -1; }
}
}
void renderUnitCube()
{
glEnableClientState(GL_VERTEX_ARRAY);
glEnableClientState(GL_COLOR_ARRAY);
glVertexPointer(3, GL_FLOAT, 0, cubeVertices);
glColorPointer(4, GL_UNSIGNED_BYTE, 0, faceColors);
if (isDrawingArrays) {
glDrawArrays(GL_QUADS, 0, 24);
}
else {
glDrawElements(GL_QUADS, 24, GL_UNSIGNED_INT, cubeIndices);
}
glDisableClientState(GL_COLOR_ARRAY);
glDisableClientState(GL_VERTEX_ARRAY);
}
void renderCubes(float angle)
{
for (size_t i = 0; i < numCubes; i++) {
const float scale_factor = 0.05f + (i / (float)numCubes) * 0.35f;
Cube* cube = &cubes[i];
glPushMatrix(); // Save previous camera state
glMatrixMode(GL_MODELVIEW);
glTranslatef(cube->x, cube->y, cube->z);
glRotatef(angle, 1, 1, 1); // Rotate camera / object
glScalef(scale_factor, scale_factor, scale_factor); // Apply scale factor
renderUnitCube();
glPopMatrix(); // Restore previous camera state
}
}
float rnd(float Min, float Max)
{
return (Max - Min) * (float)rand() / (float)RAND_MAX + Min;
}
void initialize()
{
debugLog("Initialize video output");
glKosInit();
glClearDepth(1.0);
glDepthFunc(GL_LEQUAL);
glDepthMask(GL_TRUE);
glEnable(GL_DEPTH_TEST);
glShadeModel(GL_SMOOTH);
if (isBlendingEnabled)
{
glEnable(GL_BLEND);
}
else
{
glDisable(GL_BLEND);
}
glBlendFunc(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA);
glDisable(GL_CULL_FACE);
glViewport(0, 0, 640, 480);
glClearColor(0.0f, 0.0f, 0.3f, 1.0f);
glMatrixMode(GL_PROJECTION);
glLoadIdentity();
// Set up colors (each face has a different color)
for (int i = 0; i < 6; i++)
{
faceColors[i * 4] = colors[i];
faceColors[i * 4 + 1] = colors[i];
faceColors[i * 4 + 2] = colors[i];
faceColors[i * 4 + 3] = colors[i];
}
}
void updateTimer()
{
timeElapsed += dt;
if (timeElapsed > 10.0f)
{
stats();
timeElapsed = 0.0f;
}
}
void updateLogic()
{
updateTimer();
const int fullRot = (int)(angle * invAngle360);
angle -= fullRot * 360.0f;
angle += 50.0f * dt;
const float zoomVal = __builtin_sinf(timeElapsed) * 5.0f;
glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT);
glMatrixMode(GL_MODELVIEW);
glLoadIdentity();
// Set up the camera position and orientation
float cameraPos[] = { 0.0f, 0.0f, cameraDistance };
float cameraTarget[] = { 0.0f, 0.0f, 0.0f };
float cameraUp[] = { 0.0f, 1.0f, 0.0f };
// Move the camera
gluLookAt(cameraPos[0], cameraPos[1], cameraPos[2],
cameraTarget[0], cameraTarget[1], cameraTarget[2],
cameraUp[0], cameraUp[1], cameraUp[2]);
glTranslatef(0.0f, 0.0f, -cameraDistance + zoomVal);
// Apply cube transformation (identity matrix)
glLoadIdentity();
updateCubes(dt);
renderCubes(angle);
// Reset ModelView matrix to remove camera transformation
float matrix[16];
glGetFloatv(GL_MODELVIEW_MATRIX, matrix);
matrix[12] = 0.0f;
matrix[13] = 0.0f;
matrix[14] = 0.0f;
glMatrixMode(GL_MODELVIEW);
glLoadMatrixf(matrix);
}
void updateInput()
{
#ifdef __DREAMCAST__
static uint8_t prevButtons = 0;
maple_device_t* cont;
cont_state_t* state;
cont = maple_enum_type(0, MAPLE_FUNC_CONTROLLER);
if (cont)
{
state = (cont_state_t*)maple_dev_status(cont);
if (state && (state->buttons & CONT_START) && !(prevButtons & CONT_START))
{
isRunning = false;
}
if (state && (state->buttons & CONT_A) && !(prevButtons & CONT_A))
{
isDrawingArrays = !isDrawingArrays;
if (isDrawingArrays)
{
glClearColor(0.3f, 0.0f, 0.3f, 1.0f);
}
else
{
glClearColor(0.0f, 0.0f, 0.3f, 1.0f);
}
}
if (state && (state->buttons & CONT_B) && !(prevButtons & CONT_B))
{
isBlendingEnabled = !isBlendingEnabled;
if (isBlendingEnabled)
{
glEnable(GL_BLEND);
}
else
{
glDisable(GL_BLEND);
}
}
prevButtons = state->buttons;
}
#endif
}
void swapBuffers()
{
#ifdef __DREAMCAST__
glKosSwapBuffers();
#endif
}
int main(int argc, char* argv[])
{
initialize();
// Setup camera frustum
const float aspectRatio = 640.0f / 480.0f;
const float fov = 60;
const float zNear = 0.1f;
const float zFar = 1000.0f;
gluPerspective(fov, aspectRatio, zNear, zFar);
for (size_t i = 0; i < MAX_CUBES; i++)
{
const float r = rnd(0.1f, 0.5f);
const float x = rnd(-3.0f, 3.0f);
const float y = rnd(-3.0f, 3.0f);
const float z = rnd(-3.0f, 3.0f);
const float vx = rnd(-2.0f, 2.0f);
const float vy = rnd(-2.0f, 2.0f);
const float vz = rnd(-2.0f, 2.0f);
addCube(r, x, y, z, vx, vy, vz);
}
while (isRunning)
{
updateLogic();
updateInput();
swapBuffers();
runningStats();
}
avgStats();
return 0;
}

View File

@ -145,7 +145,7 @@ int check_start() {
void DrawCube(float x, float z) {
static float pos = 0.0f;
static const float radius = 30.0f;
const static float radius = 30.0f;
pos += 0.001f;

View File

@ -23,11 +23,7 @@ int ImageLoad(char *filename, Image *image) {
}
// seek through the bmp header, up to the width/height:
fseek(file, 10, SEEK_CUR);
uint32_t offset;
fread(&offset, 4, 1, file);
fseek(file, 4, SEEK_CUR);
fseek(file, 18, SEEK_CUR);
// read the width
if ((i = fread(&sizeX, 4, 1, file)) != 1) {
@ -69,7 +65,7 @@ int ImageLoad(char *filename, Image *image) {
}
// seek past the rest of the bitmap header.
fseek(file, offset, SEEK_SET);
fseek(file, 24, SEEK_CUR);
// read the data.
image->data = (char *) malloc(size);

View File

@ -9,7 +9,7 @@
/* A general OpenGL initialization function. Sets all of the initial parameters. */
void InitGL(int Width, int Height) // We call this right after our OpenGL window is created.
{
glClearColor(0.0f, 0.0f, 1.0f, 0.0f); // This Will Clear The Background Color To Black
glClearColor(0.0f, 0.0f, 0.0f, 0.0f); // This Will Clear The Background Color To Black
glClearDepth(1.0); // Enables Clearing Of The Depth Buffer
glDepthFunc(GL_LEQUAL); // The Type Of Depth Test To Do
glEnable(GL_DEPTH_TEST); // Enables Depth Testing
@ -20,7 +20,7 @@ void InitGL(int Width, int Height) // We call this right after our OpenG
gluPerspective(45.0f,(GLfloat)Width/(GLfloat)Height,0.1f,100.0f); // Calculate The Aspect Ratio Of The Window
glMatrixMode(GL_MODELVIEW);
glMatrixMode(GL_MODELVIEW);
}
/* The function called when our window is resized (which shouldn't happen, because we're fullscreen) */

View File

@ -53,10 +53,10 @@ void LoadGLTextures() {
// 2d texture, level of detail 0 (normal), 3 components (red, green, blue), x size from image, y size from image,
// border 0 (normal), rgb color data, unsigned byte data, and finally the data itself.
glTexImage2D(GL_TEXTURE_2D, 0, GL_RGB, image1->sizeX, image1->sizeY, 0, GL_RGB, GL_UNSIGNED_BYTE, image1->data);
glTexImage2D(GL_TEXTURE_2D, 0, 3, image1->sizeX, image1->sizeY, 0, GL_RGB, GL_UNSIGNED_BYTE, image1->data);
free(image1);
}
};
/* A general OpenGL initialization function. Sets all of the initial parameters. */
void InitGL(int Width, int Height) // We call this right after our OpenGL window is created.
@ -74,7 +74,7 @@ void InitGL(int Width, int Height) // We call this right after our OpenG
gluPerspective(45.0f,(GLfloat)Width/(GLfloat)Height,0.1f,100.0f); // Calculate The Aspect Ratio Of The Window
glMatrixMode(GL_MODELVIEW);
glMatrixMode(GL_MODELVIEW);
}
/* The function called when our window is resized (which shouldn't happen, because we're fullscreen) */

Binary file not shown.

Before

Width:  |  Height:  |  Size: 96 KiB

After

Width:  |  Height:  |  Size: 192 KiB

View File

@ -59,10 +59,10 @@ int ImageLoad(char *filename, Image *image) {
fread(&header, sizeof(header), 1, file);
GLboolean twiddled = (header.type & (1 << 26)) < 1;
GLboolean compressed = (header.type & (1 << 30)) > 0;
GLboolean mipmapped = (header.type & (1 << 31)) > 0;
GLboolean strided = (header.type & (1 << 25)) > 0;
GLboolean twiddled = (header.type & (1 << 25)) < 1;
GLboolean compressed = (header.type & (1 << 29)) > 0;
GLboolean mipmapped = (header.type & (1 << 30)) > 0;
GLboolean strided = (header.type & (1 << 24)) > 0;
GLuint format = (header.type >> 27) & 0b111;
image->data = (char *) malloc (header.size);

View File

@ -10,8 +10,6 @@
#ifdef __DREAMCAST__
#include <kos.h>
#else
#include <SDL.h>
#endif
#include <stdio.h>
@ -19,9 +17,7 @@
#include <GL/glu.h>
#include <GL/glkos.h>
#include <stdlib.h>
#include <stdbool.h>
#include <stdint.h>
#include "../loadbmp.h"
@ -88,16 +84,7 @@ void SetupWorld()
int numtriangles;
FILE *filein;
char oneline[255];
#ifdef __DREAMCAST__
filein = fopen("/rd/world.txt", "rt"); // File To Load World Data From
#else
filein = fopen("../samples/nehe10/romdisk/world.txt", "rt");
#endif
if(!filein) {
fprintf(stderr, "Failed to load world file\n");
exit(1);
}
readstr(filein,oneline);
sscanf(oneline, "NUMPOLLIES %d\n", &numtriangles);
@ -241,13 +228,6 @@ void DrawGLScene(void) {
}
int ReadController(void) {
bool start = false;
bool up = false;
bool down = false;
bool left = false;
bool right = false;
#ifdef __DREAMCAST__
maple_device_t *cont;
cont_state_t *state;
@ -261,27 +241,10 @@ int ReadController(void) {
return 0;
}
start = (state->buttons & CONT_START);
up = (state->buttons & CONT_DPAD_UP);
down = (state->buttons & CONT_DPAD_DOWN);
left = (state->buttons & CONT_DPAD_LEFT);
right = (state->buttons & CONT_DPAD_RIGHT);
#else
int num_keys = 0;
uint8_t* state = SDL_GetKeyboardState(&num_keys);
start = state[SDL_SCANCODE_RETURN];
up = state[SDL_SCANCODE_UP];
down = state[SDL_SCANCODE_DOWN];
left = state[SDL_SCANCODE_LEFT];
right = state[SDL_SCANCODE_RIGHT];
#endif
if(start) {
if(state->buttons & CONT_START)
return 0;
}
if(up) {
if(state->buttons & CONT_DPAD_UP) {
xpos -= (float)sin(heading*piover180) * 0.05f;
zpos -= (float)cos(heading*piover180) * 0.05f;
if (walkbiasangle >= 359.0f)
@ -295,7 +258,8 @@ int ReadController(void) {
walkbias = (float)sin(walkbiasangle * piover180)/20.0f;
}
if(down) {
if(state->buttons & CONT_DPAD_DOWN) {
xpos += (float)sin(heading*piover180) * 0.05f;
zpos += (float)cos(heading*piover180) * 0.05f;
if (walkbiasangle <= 1.0f)
@ -309,17 +273,18 @@ int ReadController(void) {
walkbias = (float)sin(walkbiasangle * piover180)/20.0f;
}
if(left) {
if(state->buttons & CONT_DPAD_LEFT) {
heading += 1.0f;
yrot = heading;
}
if(right) {
if(state->buttons & CONT_DPAD_RIGHT) {
heading -= 1.0f;
yrot = heading;
}
#endif
/* Switch to the blended polygon list if needed */
if(blend) {

View File

@ -157,4 +157,4 @@ NUMPOLLIES 36
2.0 0.0 -0.5 0.0 0.0
3.0 1.0 -0.5 1.0 1.0
2.0 1.0 -0.5 0.0 1.0
2.0 0.0 -0.5 0.0 0.0
2.0 0.0 -0.5 0.0 0.0

View File

@ -132,7 +132,7 @@ void LoadGLTextures() {
// 2d texture, level of detail 0 (normal), 3 components (red, green, blue), x size from image, y size from image,
// border 0 (normal), rgb color data, unsigned byte data, and finally the data itself.
glTexImage2D(GL_TEXTURE_2D, 0, GL_COLOR_INDEX8_EXT, image1->width, image1->height, 0, GL_COLOR_INDEX8_TWID_KOS, GL_UNSIGNED_BYTE, image1->data);
glTexImage2D(GL_TEXTURE_2D, 0, GL_COLOR_INDEX8_EXT, image1->width, image1->height, 0, GL_COLOR_INDEX, GL_UNSIGNED_BYTE_TWID_KOS, image1->data);
glGenerateMipmapEXT(GL_TEXTURE_2D);
free(image1);

View File

@ -254,8 +254,6 @@ int BMP_Infos(FILE *pFile, uint32_t *width, uint32_t *height)
*width = (uint32_t)BmpInfoHeader.Width;
*height = (uint32_t)BmpInfoHeader.Height;
fseek(pFile, BmpInfoHeader.Size + 14, SEEK_SET);
return 1;
}
@ -272,7 +270,6 @@ int BMP_GetPalette(FILE *pFile)
bitCount = BmpInfoHeader.ClrImportant * sizeof(RGB_QUAD);
if (fread(BmpRgbQuad, 1, bitCount, pFile) != bitCount){
fprintf(stderr, "Failed to read palette: %d\n", bitCount);
return 0;
}
@ -284,8 +281,6 @@ int BMP_GetPalette(FILE *pFile)
}
return 1;
}
fprintf(stderr, "BitCount: %d\n", BmpInfoHeader.BitCount);
return 0;
}
@ -351,7 +346,7 @@ int LoadPalettedBMP(const char* filename, Image* image)
}
if (!BMP_GetPalette(fp)) {
printf("Only 16c BMP are supported for this sample\n");
printf("Only 16c BMP are supported for this sample");
return 0;
}
@ -434,7 +429,7 @@ void LoadGLTextures() {
#ifndef USE_16C_PALETTE
glTexImage2D(GL_TEXTURE_2D, 0, GL_COLOR_INDEX8_EXT, image1.width, image1.height, 0, GL_COLOR_INDEX, GL_UNSIGNED_BYTE, image1.data);
#else
glTexImage2D(GL_TEXTURE_2D, 0, GL_COLOR_INDEX4_EXT, image1.width, image1.height, 0, GL_COLOR_INDEX4_EXT, GL_UNSIGNED_BYTE, image1.data);
glTexImage2D(GL_TEXTURE_2D, 0, GL_COLOR_INDEX4_EXT, image1.width, image1.height, 0, GL_COLOR_INDEX, GL_UNSIGNED_BYTE, image1.data);
#endif
glBindTexture(GL_TEXTURE_2D, textures[1]); // 2d texture (x and y size)
@ -449,7 +444,7 @@ void LoadGLTextures() {
#ifndef USE_16C_PALETTE
glTexImage2D(GL_TEXTURE_2D, 0, GL_COLOR_INDEX8_EXT, image1.width, image1.height, 0, GL_COLOR_INDEX, GL_UNSIGNED_BYTE, image1.data);
#else
glTexImage2D(GL_TEXTURE_2D, 0, GL_COLOR_INDEX4_EXT, image1.width, image1.height, 0, GL_COLOR_INDEX4_EXT, GL_UNSIGNED_BYTE, image1.data);
glTexImage2D(GL_TEXTURE_2D, 0, GL_COLOR_INDEX4_EXT, image1.width, image1.height, 0, GL_COLOR_INDEX, GL_UNSIGNED_BYTE, image1.data);
#endif
glBindTexture(GL_TEXTURE_2D, textures[2]);
@ -468,7 +463,7 @@ void LoadGLTextures() {
#ifndef USE_16C_PALETTE
glTexImage2D(GL_TEXTURE_2D, 0, GL_COLOR_INDEX8_EXT, image2.width, image2.height, 0, GL_COLOR_INDEX, GL_UNSIGNED_BYTE, image2.data);
#else
glTexImage2D(GL_TEXTURE_2D, 0, GL_COLOR_INDEX4_EXT, image2.width, image2.height, 0, GL_COLOR_INDEX4_EXT, GL_UNSIGNED_BYTE, image2.data);
glTexImage2D(GL_TEXTURE_2D, 0, GL_COLOR_INDEX4_EXT, image2.width, image2.height, 0, GL_COLOR_INDEX, GL_UNSIGNED_BYTE, image2.data);
#endif
}

Binary file not shown.

Before

Width:  |  Height:  |  Size: 16 KiB

After

Width:  |  Height:  |  Size: 32 KiB

View File

@ -100,7 +100,7 @@ void do_frame() {
glKosSwapBuffers();
}
time_t begin;
time_t start;
void switch_tests(int ppf) {
printf("Beginning new test: %d polys per frame (%d per second at 60fps)\n",
ppf * 3, ppf * 3 * 60);
@ -113,8 +113,8 @@ void check_switch() {
now = time(NULL);
if(now >= (begin + 5)) {
begin = time(NULL);
if(now >= (start + 5)) {
start = time(NULL);
printf(" Average Frame Rate: ~%f fps (%d pps)\n", avgfps, (int)(polycnt * avgfps * 2));
switch(phase) {
@ -165,7 +165,7 @@ int main(int argc, char **argv) {
/* Start off with something obscene */
switch_tests(200000 / 60);
begin = time(NULL);
start = time(NULL);
for(;;) {
if(check_start())

File diff suppressed because it is too large Load Diff

View File

@ -1,64 +0,0 @@
#include <stddef.h>
#include <time.h>
#include <stdio.h>
#ifdef __DREAMCAST__
#include <kos.h>
#include "../profiler.h"
#endif
#include <GL/gl.h>
#include <GL/glkos.h>
#include "image.h"
#define PROFILE 0
int main(int argc, char* argv[]) {
(void) argc;
(void) argv;
fprintf(stdout, "Initializing\n");
glKosInit();
glClearColor(0.5f, 0.0f, 0.5f, 1.0f);
glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT);
glKosSwapBuffers();
GLuint texture_id = 0;
glGenTextures(1, &texture_id);
glBindTexture(GL_TEXTURE_2D, texture_id);
time_t start = time(NULL);
time_t end = start;
int counter = 0;
fprintf(stderr, "Starting test run...\n");
#ifdef __DREAMCAST__
#if PROFILE
profiler_init("/pc/gmon.out");
profiler_start();
#endif
#endif
while((end - start) < 5) {
glTexImage2D(
GL_TEXTURE_2D, 0, GL_RGB, width, height, 0, GL_RGB, GL_UNSIGNED_BYTE, header_data
);
++counter;
end = time(NULL);
}
#ifdef __DREAMCAST__
#if PROFILE
profiler_stop();
profiler_clean_up();
#endif
#endif
fprintf(stderr, "Called glTexImage2D %d times (%.4f per call)\n", counter, (float)(end - start) / (float)(counter));
return 0;
}

View File

@ -68,16 +68,14 @@ int check_start() {
}
void setup() {
GLdcConfig cfg;
glKosInitConfig(&cfg);
cfg.initial_immediate_capacity = 14000;
glKosInitEx(&cfg);
glKosInit();
glMatrixMode(GL_MODELVIEW);
glLoadIdentity();
glOrtho(0, 640, 0, 480, -100, 100);
glMatrixMode(GL_PROJECTION);
glLoadIdentity();
glDisable(GL_NEARZ_CLIPPING_KOS);
}
void do_frame() {
@ -107,12 +105,10 @@ void do_frame() {
glKosSwapBuffers();
}
time_t begin;
time_t start;
void switch_tests(int ppf) {
printf("Beginning new test: %d polys per frame (%d per second at 60fps)\n",
ppf * 2, ppf * 2 * 60);
fflush(stdout);
avgfps = -1;
polycnt = ppf;
}
@ -122,9 +118,10 @@ void check_switch() {
now = time(NULL);
if(now >= (begin + 5)) {
begin = time(NULL);
if(now >= (start + 5)) {
start = time(NULL);
printf(" Average Frame Rate: ~%f fps (%d pps)\n", avgfps, (int)(polycnt * avgfps * 2));
switch(phase) {
case PHASE_HALVE:
@ -165,27 +162,22 @@ void check_switch() {
case PHASE_FINAL:
break;
}
fflush(stdout);
}
}
#define PROFILE 0
int main(int argc, char **argv) {
#if PROFILE
#ifndef NDEBUG
#ifdef __DREAMCAST__
profiler_init("/pc/gmon.out");
profiler_start();
#endif
#endif
setup();
#if PROFILE
profiler_start();
#endif
/* Start off with something obscene */
switch_tests(200000 / 60);
begin = time(NULL);
start = time(NULL);
uint32_t iterations = 2000;
@ -201,9 +193,11 @@ int main(int argc, char **argv) {
stats();
#if PROFILE
#ifdef __DREAMCAST__
#ifndef NDEBUG
profiler_stop();
profiler_clean_up();
#endif
#endif
return 0;

View File

@ -93,7 +93,7 @@ void do_frame() {
glKosSwapBuffers();
}
time_t begin;
time_t start;
void switch_tests(int ppf) {
printf("Beginning new test: %d polys per frame (%d per second at 60fps)\n",
ppf * 2, ppf * 2 * 60);
@ -106,8 +106,8 @@ void check_switch() {
now = time(NULL);
if(now >= (begin + 5)) {
begin = time(NULL);
if(now >= (start + 5)) {
start = time(NULL);
printf(" Average Frame Rate: ~%f fps (%d pps)\n", avgfps, (int)(polycnt * avgfps * 2));
switch(phase) {
@ -155,7 +155,7 @@ int main(int argc, char **argv) {
/* Start off with something obscene */
switch_tests(220000 / 60);
begin = time(NULL);
start = time(NULL);
for(;;) {
if(check_start())

View File

@ -28,8 +28,6 @@ void InitGL(int Width, int Height) // We call this right after our OpenG
glMatrixMode(GL_MODELVIEW);
glLoadIdentity();
glEnable(GL_CULL_FACE);
}
/* The function called when our window is resized (which shouldn't happen, because we're fullscreen) */
@ -88,13 +86,12 @@ void DrawGLScene()
rotation = (rotation > 360.0f) ? rotation - 360.0f : rotation;
glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT); // Clear The Screen And The Depth Buffer
glClearColor(0.5f, 0.5f, 0.5f, 0.5f);
glLoadIdentity(); // Reset The View
glDisable(GL_CULL_FACE);
glPushMatrix();
glTranslatef(0.0f, -1.0f, -movement);
glTranslatef(0.0f, -1.0f, movement);
glRotatef(rotation, 0.0f, 1.0f, 0.0f);
glBegin(GL_TRIANGLES);

View File

@ -1,26 +0,0 @@
FILE(GLOB GL_TESTS ${CMAKE_CURRENT_SOURCE_DIR}/test_*.h)
INCLUDE_DIRECTORIES(${CMAKE_SOURCE_DIR})
SET(TEST_GENERATOR_BIN ${CMAKE_SOURCE_DIR}/tools/test_generator.py)
SET(TEST_MAIN_FILENAME ${CMAKE_CURRENT_BINARY_DIR}/main.cpp)
ADD_CUSTOM_COMMAND(
OUTPUT ${TEST_MAIN_FILENAME}
COMMAND ${TEST_GENERATOR_BIN} --output ${TEST_MAIN_FILENAME} ${TEST_FILES} ${GL_TESTS}
DEPENDS ${TEST_FILES} ${GL_TESTS} ${TEST_GENERATOR_BIN}
)
add_executable(gldc_tests ${TEST_FILES} ${TEST_SOURCES} ${TEST_MAIN_FILENAME})
target_link_libraries(gldc_tests GLdc)
if(NOT PLATFORM_DREAMCAST)
set_target_properties(
gldc_tests
PROPERTIES
COMPILE_OPTIONS "-m32"
LINK_OPTIONS "-m32"
)
endif()

View File

@ -1,189 +0,0 @@
#include "tools/test.h"
#include <cstdint>
#include <cassert>
#include <malloc.h>
#include <utility>
#include <GL/gl.h>
#include <GL/glkos.h>
#include "GL/alloc/alloc.h"
static inline int round_up(int n, int multiple)
{
assert(multiple);
return ((n + multiple - 1) / multiple) * multiple;
}
#define POOL_SIZE (16 * 2048)
class AllocatorTests : public test::TestCase {
public:
uint8_t* pool = NULL;
std::vector<std::pair<void*, void*>> defrag_moves;
void set_up() {
pool = (uint8_t*) memalign(2048, POOL_SIZE);
assert(((intptr_t) pool) % 2048 == 0);
}
void tear_down() {
alloc_shutdown(pool);
free(pool);
}
static void on_defrag(void* src, void* dst, void* user_data) {
AllocatorTests* self = (AllocatorTests*) user_data;
self->defrag_moves.push_back(std::make_pair(src, dst));
}
void test_defrag() {
alloc_init(pool, POOL_SIZE);
alloc_malloc(pool, 256);
void* a2 = alloc_malloc(pool, 256);
void* a3 = alloc_malloc(pool, 256);
alloc_free(pool, a2);
alloc_run_defrag(pool, &AllocatorTests::on_defrag, 5, this);
assert_equal(defrag_moves.size(), 1u); // Moved a3 -> a2
assert_equal(defrag_moves[0].first, a3);
assert_equal(defrag_moves[0].second, a2);
assert_equal(alloc_malloc(pool, 256), a3);
}
void test_poor_alloc_aligned() {
/* If we try to allocate and there are no suitable aligned
* slots available, we fallback to any available unaligned slots */
alloc_init(pool, POOL_SIZE);
// Leave only space for an unaligned block
alloc_malloc(pool, (15 * 2048) - 256);
// Should work, we have space (just) but it's not aligned
void* a1 = alloc_malloc(pool, 2048 + 256);
assert_is_not_null(a1);
assert_equal(a1, pool + ((15 * 2048) - 256));
}
void test_poor_alloc_straddling() {
/*
* If we try to allocate a small block, it should not
* cross a 2048 boundary unless there is no other option */
alloc_init(pool, POOL_SIZE);
alloc_malloc(pool, (15 * 2048) - 256);
void* a1 = alloc_malloc(pool, 512);
assert_true((uintptr_t(a1) % 2048) == 0); // Should've aligned to the last 2048 block
/* Allocate the rest of the last block, this leaves a 256 block in the
* penultimate block */
alloc_malloc(pool, 1536);
alloc_free(pool, a1);
/* No choice but to straddle the boundary */
a1 = alloc_malloc(pool, 768);
}
void test_alloc_init() {
alloc_init(pool, POOL_SIZE);
void* expected_base_address = (void*) round_up((uintptr_t) pool, 2048);
assert_equal(alloc_next_available(pool, 16), expected_base_address);
assert_equal(alloc_base_address(pool), expected_base_address);
size_t expected_blocks = (
uintptr_t(pool + POOL_SIZE) -
uintptr_t(expected_base_address)
) / 2048;
assert_equal(alloc_block_count(pool), expected_blocks);
}
void test_complex_case() {
uint8_t* large_pool = (uint8_t*) malloc(8 * 1024 * 1024);
alloc_init(large_pool, 8 * 1024 * 1024);
alloc_malloc(large_pool, 262144);
alloc_malloc(large_pool, 262144);
void* a1 = alloc_malloc(large_pool, 524288);
alloc_free(large_pool, a1);
alloc_malloc(large_pool, 699056);
alloc_malloc(large_pool, 128);
alloc_shutdown(large_pool);
free(large_pool);
}
void test_complex_case2() {
uint8_t* large_pool = (uint8_t*) malloc(8 * 1024 * 1024);
alloc_init(large_pool, 8 * 1024 * 1024);
void* a1 = alloc_malloc(large_pool, 131072);
alloc_free(large_pool, a1);
alloc_malloc(large_pool, 174768);
void* a2 = alloc_malloc(large_pool, 131072);
alloc_free(large_pool, a2);
alloc_malloc(large_pool, 174768);
void* a3 = alloc_malloc(large_pool, 128);
alloc_free(large_pool, a3);
alloc_shutdown(large_pool);
free(large_pool);
}
void test_alloc_malloc() {
alloc_init(pool, POOL_SIZE);
uint8_t* base_address = (uint8_t*) alloc_base_address(pool);
void* a1 = alloc_malloc(pool, 1024);
/* First alloc should always be the base address */
assert_equal(a1, base_address);
/* An allocation of <= 2048 (well 1024) will not necessarily be at
* a 2k boundary */
void* expected_next_available = base_address + uintptr_t(1024);
assert_equal(alloc_next_available(pool, 1024), expected_next_available);
/* Requesting 2k though will force to a 2k boundary */
expected_next_available = base_address + uintptr_t(2048);
assert_equal(alloc_next_available(pool, 2048), expected_next_available);
/* Now alloc 2048 bytes, this should be on the 2k boundary */
void* a2 = alloc_malloc(pool, 2048);
assert_equal(a2, expected_next_available);
/* If we try to allocate 1k, this should go in the second half of the
* first block */
expected_next_available = base_address + uintptr_t(1024);
void* a3 = alloc_malloc(pool, 1024);
assert_equal(a3, expected_next_available);
alloc_free(pool, a1);
/* Next allocation would go in the just freed block */
expected_next_available = base_address;
assert_equal(alloc_next_available(pool, 64), expected_next_available);
/* Now allocate 14 more 2048 size blocks, the following one should
* return NULL */
for(int i = 0; i < 14; ++i) {
alloc_malloc(pool, 2048);
}
assert_is_null(alloc_malloc(pool, 2048));
/* But we should still have room in the second block for this */
assert_is_not_null(alloc_malloc(pool, 64));
}
};

View File

@ -1,77 +0,0 @@
#include "tools/test.h"
#include <stdint.h>
#include <GL/gl.h>
#include <GL/glkos.h>
class TexImage2DTests : public test::TestCase {
public:
uint8_t image_data[8 * 8 * 4] = {0};
void set_up() {
GLdcConfig config;
glKosInitConfig(&config);
config.texture_twiddle = false;
glKosInitEx(&config);
/* Init image data so each texel RGBA value matches the
* position in the array */
for(int i = 0; i < 8 * 8 * 4; i += 4) {
image_data[i + 0] = i;
image_data[i + 1] = i;
image_data[i + 2] = i;
image_data[i + 3] = i;
}
}
void tear_down() {
glKosShutdown();
}
void test_rgb_to_rgb565() {
glTexImage2D(GL_TEXTURE_2D, 0, GL_RGB, 8, 8, 0, GL_RGB, GL_UNSIGNED_BYTE, image_data);
assert_equal(glGetError(), GL_NO_ERROR);
GLint internalFormat;
glGetIntegerv(GL_TEXTURE_INTERNAL_FORMAT_KOS, &internalFormat);
assert_equal(internalFormat, GL_RGB565_KOS);
}
void test_rgb_to_rgb565_twiddle() {
glEnable(GL_TEXTURE_TWIDDLE_KOS);
glTexImage2D(GL_TEXTURE_2D, 0, GL_RGB, 8, 8, 0, GL_RGB, GL_UNSIGNED_BYTE, image_data);
glDisable(GL_TEXTURE_TWIDDLE_KOS);
assert_equal(glGetError(), GL_NO_ERROR);
GLint internalFormat;
glGetIntegerv(GL_TEXTURE_INTERNAL_FORMAT_KOS, &internalFormat);
assert_equal(internalFormat, GL_RGB565_TWID_KOS);
}
void test_rgba_to_argb4444() {
glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, 8, 8, 0, GL_RGBA, GL_UNSIGNED_BYTE, image_data);
assert_equal(glGetError(), GL_NO_ERROR);
GLint internalFormat;
glGetIntegerv(GL_TEXTURE_INTERNAL_FORMAT_KOS, &internalFormat);
assert_equal(internalFormat, GL_ARGB4444_KOS);
}
void test_rgba_to_argb4444_twiddle() {
glEnable(GL_TEXTURE_TWIDDLE_KOS);
glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, 8, 8, 0, GL_RGBA, GL_UNSIGNED_BYTE, image_data);
glDisable(GL_TEXTURE_TWIDDLE_KOS);
assert_equal(glGetError(), GL_NO_ERROR);
GLint internalFormat;
glGetIntegerv(GL_TEXTURE_INTERNAL_FORMAT_KOS, &internalFormat);
assert_equal(internalFormat, GL_ARGB4444_TWID_KOS);
}
};

View File

@ -1,637 +0,0 @@
#include <cstdint>
#include <vector>
#include <cstdio>
#include <cmath>
#include <stdexcept>
#include <cassert>
#define SQ_BASE_ADDRESS 0
#define SPAN_SORT_CFG 0
#define PVR_SET(x, y) (void)(x); (void)(y)
struct Vertex {
uint32_t flags;
float xyz[3];
float uv[2];
float w;
uint8_t bgra[4];
};
struct {
float hwidth;
float x_plus_hwidth;
float hheight;
float y_plus_hheight;
} VIEWPORT = {320, 320, 240, 240};
struct VideoMode {
float height;
};
static VideoMode* GetVideoMode() {
static VideoMode mode = {320.0f};
return &mode;
}
enum GPUCommand {
GPU_CMD_POLYHDR = 0x80840000,
GPU_CMD_VERTEX = 0xe0000000,
GPU_CMD_VERTEX_EOL = 0xf0000000,
GPU_CMD_USERCLIP = 0x20000000,
GPU_CMD_MODIFIER = 0x80000000,
GPU_CMD_SPRITE = 0xA0000000
};
static std::vector<Vertex> sent;
static inline void interpolateColour(const uint32_t* a, const uint32_t* b, const float t, uint32_t* out) {
const static uint32_t MASK1 = 0x00FF00FF;
const static uint32_t MASK2 = 0xFF00FF00;
const uint32_t f2 = 256 * t;
const uint32_t f1 = 256 - f2;
*out = (((((*a & MASK1) * f1) + ((*b & MASK1) * f2)) >> 8) & MASK1) |
(((((*a & MASK2) * f1) + ((*b & MASK2) * f2)) >> 8) & MASK2);
}
static inline void _glClipEdge(const Vertex* v1, const Vertex* v2, Vertex* vout) {
/* Clipping time! */
const float d0 = v1->w + v1->xyz[2];
const float d1 = v2->w + v2->xyz[2];
const float sign = ((2.0f * (d1 < d0)) - 1.0f);
const float epsilon = -0.00001f * sign;
const float n = (d0 - d1);
const float r = (1.f / sqrtf(n * n)) * sign;
float t = fmaf(r, d0, epsilon);
vout->xyz[0] = fmaf(v2->xyz[0] - v1->xyz[0], t, v1->xyz[0]);
vout->xyz[1] = fmaf(v2->xyz[1] - v1->xyz[1], t, v1->xyz[1]);
vout->xyz[2] = fmaf(v2->xyz[2] - v1->xyz[2], t, v1->xyz[2]);
vout->w = fmaf(v2->w - v1->w, t, v1->w);
vout->uv[0] = fmaf(v2->uv[0] - v1->uv[0], t, v1->uv[0]);
vout->uv[1] = fmaf(v2->uv[1] - v1->uv[1], t, v1->uv[1]);
interpolateColour((uint32_t*) v1->bgra, (uint32_t*) v2->bgra, t, (uint32_t*) vout->bgra);
}
bool glIsVertex(const uint32_t flags) {
return flags == GPU_CMD_VERTEX_EOL || flags == GPU_CMD_VERTEX;
}
bool glIsLastVertex(const uint32_t flags) {
return flags == GPU_CMD_VERTEX_EOL;
}
void _glSubmitHeaderOrVertex(volatile uint32_t*, Vertex* vtx) {
sent.push_back(*vtx);
}
float _glFastInvert(float x) {
return (1.f / __builtin_sqrtf(x * x));
}
void _glPerspectiveDivideVertex(Vertex* vertex, const float h) {
const float f = _glFastInvert(vertex->w);
/* Convert to NDC and apply viewport */
vertex->xyz[0] = __builtin_fmaf(
VIEWPORT.hwidth, vertex->xyz[0] * f, VIEWPORT.x_plus_hwidth
);
vertex->xyz[1] = h - __builtin_fmaf(
VIEWPORT.hheight, vertex->xyz[1] * f, VIEWPORT.y_plus_hheight
);
/* Orthographic projections need to use invZ otherwise we lose
the depth information. As w == 1, and clip-space range is -w to +w
we add 1.0 to the Z to bring it into range. We add a little extra to
avoid a divide by zero.
*/
vertex->xyz[2] = (vertex->w == 1.0f) ? _glFastInvert(1.0001f + vertex->xyz[2]) : f;
}
void memcpy_vertex(Vertex* dst, Vertex* src) {
*dst = *src;
}
/* Zclipping is so difficult to get right, that self sample tests all the cases of clipping and makes sure that things work as expected */
#ifdef __DREAMCAST__
static volatile int *pvrdmacfg = (int*)0xA05F6888;
static volatile int *qacr = (int*)0xFF000038;
#else
static int pvrdmacfg[2];
static int qacr[2];
#endif
void SceneListSubmit(void* src, int n) {
/* You need at least a header, and 3 vertices to render anything */
if(n < 4) {
return;
}
const float h = GetVideoMode()->height;
PVR_SET(SPAN_SORT_CFG, 0x0);
//Set PVR DMA registers
pvrdmacfg[0] = 1;
pvrdmacfg[1] = 1;
//Set QACR registers
qacr[1] = qacr[0] = 0x11;
volatile uint32_t *d = SQ_BASE_ADDRESS;
int8_t queue_head = 0;
int8_t queue_tail = 0;
/* The most vertices ever in the queue is 5 (as some clipping operations
* produce and additional couple of vertice, but we add one more so the ring buffer doesn't
* trip over itself (e.g. if tail == head we can guarantee it's empty, not full) */
Vertex __attribute__((aligned(32))) queue[4];
const int queue_capacity = sizeof(queue) / sizeof(Vertex);
Vertex* vertex = (Vertex*) src;
uint32_t visible_mask = 0;
#if CLIP_DEBUG
for(int i = 0; i < n; ++i) {
fprintf(stderr, "{%f, %f, %f, %f}, // %x (%x)\n", vertex[i].xyz[0], vertex[i].xyz[1], vertex[i].xyz[2], vertex[i].w, vertex[i].flags, &vertex[i]);
}
fprintf(stderr, "----\n");
#endif
while(n--) {
bool last_vertex = false;
memcpy_vertex(queue + queue_tail, vertex);
++vertex;
switch(queue[queue_tail].flags) {
case GPU_CMD_POLYHDR:
_glSubmitHeaderOrVertex(d, &queue[queue_tail]);
break;
case GPU_CMD_VERTEX_EOL:
last_vertex = true; // fallthru
case GPU_CMD_VERTEX:
visible_mask = (visible_mask >> 1) | (queue[queue_tail].xyz[2] >= -queue[queue_tail].w) << 2;
assert(visible_mask < 15);
queue_tail = (queue_tail + 1) % queue_capacity;
default:
break;
}
int counter = (queue_tail - queue_head + queue_capacity) % queue_capacity;
if(counter < 3) {
continue;
}
#if CLIP_DEBUG
fprintf(stderr, "%d\n", visible_mask);
#endif
Vertex __attribute__((aligned(32))) a, b; // Scratch vertices
switch(visible_mask) {
case 0:
break;
case 7:
/* All visible, push the first vertex and move on */
_glPerspectiveDivideVertex(&queue[queue_head], h);
_glSubmitHeaderOrVertex(d, &queue[queue_head]);
if(last_vertex) {
/* If this was the last vertex in the strip, we need to flush the queue and then
restart it again */
int v1 = (queue_head + 1) % queue_capacity;
int v2 = (queue_head + 2) % queue_capacity;
_glPerspectiveDivideVertex(&queue[v1], h);
_glSubmitHeaderOrVertex(d, &queue[v1]);
_glPerspectiveDivideVertex(&queue[v2], h);
_glSubmitHeaderOrVertex(d, &queue[v2]);
}
break;
case 1:
/* First vertex was visible */
{
Vertex* v0 = &queue[queue_head];
Vertex* v1 = &queue[(queue_head + 1) % queue_capacity];
Vertex* v2 = &queue[(queue_head + 2) % queue_capacity];
_glClipEdge(v0, v1, &a);
_glClipEdge(v2, v0, &b);
a.flags = GPU_CMD_VERTEX;
/* If v2 was the last in the strip, then b should be. If it wasn't
we'll create a degenerate triangle by adding b twice in a row so that the
strip processing will continue correctly after crossing the plane so it can
cross back*/
b.flags = v2->flags;
_glPerspectiveDivideVertex(v0, h);
_glPerspectiveDivideVertex(&a, h);
_glPerspectiveDivideVertex(&b, h);
_glSubmitHeaderOrVertex(d, v0);
_glSubmitHeaderOrVertex(d, &a);
_glSubmitHeaderOrVertex(d, &b);
_glSubmitHeaderOrVertex(d, &b);
}
break;
case 2:
/* Second vertex was visible. In self case we need to create a triangle and produce
two new vertices: 1-2, and 2-3. */
{
Vertex* v0 = &queue[queue_head];
const Vertex* v1 = &queue[(queue_head + 1) % queue_capacity];
const Vertex* v2 = &queue[(queue_head + 2) % queue_capacity];
_glClipEdge(v0, v1, &a);
_glClipEdge(v1, v2, &b);
a.flags = GPU_CMD_VERTEX;
b.flags = v2->flags;
_glPerspectiveDivideVertex(v0, h);
_glPerspectiveDivideVertex(&a, h);
_glPerspectiveDivideVertex(&b, h);
_glSubmitHeaderOrVertex(d, &a);
_glSubmitHeaderOrVertex(d, v0);
_glSubmitHeaderOrVertex(d, &b);
}
break;
case 3: /* First and second vertex were visible */
{
Vertex* v0 = &queue[queue_head];
Vertex __attribute__((aligned(32))) v1 = queue[(queue_head + 1) % queue_capacity];
Vertex* v2 = &queue[(queue_head + 2) % queue_capacity];
_glClipEdge(&v1, v2, &a);
_glClipEdge(v2, v0, &b);
a.flags = v2->flags;
b.flags = GPU_CMD_VERTEX;
_glPerspectiveDivideVertex(v0, h);
_glPerspectiveDivideVertex(&v1, h);
_glPerspectiveDivideVertex(&a, h);
_glPerspectiveDivideVertex(&b, h);
_glSubmitHeaderOrVertex(d, v0);
_glSubmitHeaderOrVertex(d, &v1);
_glSubmitHeaderOrVertex(d, &b);
_glSubmitHeaderOrVertex(d, &v1);
_glSubmitHeaderOrVertex(d, &a);
}
break;
case 4:
/* Third vertex was visible. */
{
Vertex* v0 = &queue[queue_head];
Vertex* v1 = &queue[(queue_head + 1) % queue_capacity];
Vertex __attribute__((aligned(32))) v2 = queue[(queue_head + 2) % queue_capacity];
_glClipEdge(&v2, v0, &a);
_glClipEdge(v1, &v2, &b);
a.flags = GPU_CMD_VERTEX;
b.flags = GPU_CMD_VERTEX;
_glPerspectiveDivideVertex(&v2, h);
_glPerspectiveDivideVertex(&a, h);
_glPerspectiveDivideVertex(&b, h);
_glSubmitHeaderOrVertex(d, &a);
_glSubmitHeaderOrVertex(d, &a);
_glSubmitHeaderOrVertex(d, &b);
_glSubmitHeaderOrVertex(d, &v2);
}
break;
case 5: /* First and third vertex were visible */
{
Vertex* v0 = &queue[queue_head];
Vertex* v1 = &queue[(queue_head + 1) % queue_capacity];
Vertex __attribute__((aligned(32))) v2 = queue[(queue_head + 2) % queue_capacity];
_glClipEdge(v0, v1, &a);
_glClipEdge(v1, &v2, &b);
a.flags = GPU_CMD_VERTEX;
b.flags = GPU_CMD_VERTEX;
_glPerspectiveDivideVertex(v0, h);
_glPerspectiveDivideVertex(&v2, h);
_glPerspectiveDivideVertex(&a, h);
_glPerspectiveDivideVertex(&b, h);
_glSubmitHeaderOrVertex(d, v0);
_glSubmitHeaderOrVertex(d, &a);
uint32_t v2_flags = v2.flags;
v2.flags = GPU_CMD_VERTEX;
_glSubmitHeaderOrVertex(d, &v2);
v2.flags = v2_flags;
_glSubmitHeaderOrVertex(d, &b);
_glSubmitHeaderOrVertex(d, &v2);
}
break;
case 6: /* Second and third vertex were visible */
{
Vertex* v0 = &queue[queue_head];
Vertex __attribute__((aligned(32))) v1 = queue[(queue_head + 1) % queue_capacity];
Vertex __attribute__((aligned(32))) v2 = queue[(queue_head + 2) % queue_capacity];
_glClipEdge(v0, &v1, &a);
_glClipEdge(&v2, v0, &b);
a.flags = GPU_CMD_VERTEX;
b.flags = GPU_CMD_VERTEX;
_glPerspectiveDivideVertex(&v1, h);
_glPerspectiveDivideVertex(&v2, h);
_glPerspectiveDivideVertex(&a, h);
_glPerspectiveDivideVertex(&b, h);
_glSubmitHeaderOrVertex(d, &a);
_glSubmitHeaderOrVertex(d, &v1);
_glSubmitHeaderOrVertex(d, &b);
_glSubmitHeaderOrVertex(d, &v1);
_glSubmitHeaderOrVertex(d, &v2);
}
break;
default:
break;
}
if(last_vertex) {
visible_mask = queue_head = queue_tail = 0;
} else {
queue_head = (queue_head + 1) % queue_capacity;
}
}
}
struct VertexTmpl {
VertexTmpl(float x, float y, float z, float w):
x(x), y(y), z(z), w(w) {}
float x, y, z, w;
};
std::vector<Vertex> make_vertices(const std::vector<VertexTmpl>& verts) {
std::vector<Vertex> result;
Vertex r;
r.flags = GPU_CMD_POLYHDR;
result.push_back(r);
for(auto& v: verts) {
r.flags = GPU_CMD_VERTEX;
r.xyz[0] = v.x;
r.xyz[1] = v.y;
r.xyz[2] = v.z;
r.uv[0] = 0.0f;
r.uv[1] = 0.0f;
r.w = v.w;
result.push_back(r);
}
result.back().flags = GPU_CMD_VERTEX_EOL;
return result;
}
template<typename T, typename U>
void check_equal(const T& lhs, const U& rhs) {
if(lhs != rhs) {
throw std::runtime_error("Assertion failed");
}
}
template<>
void check_equal(const Vertex& lhs, const Vertex& rhs) {
if(lhs.xyz[0] != rhs.xyz[0] ||
lhs.xyz[1] != rhs.xyz[1] ||
lhs.xyz[2] != rhs.xyz[2] ||
lhs.w != rhs.w) {
throw std::runtime_error("Assertion failed");
}
}
bool test_clip_case_001() {
/* The first vertex is visible only */
sent.clear();
auto data = make_vertices({
{0.000000, -2.414213, 3.080808, 5.000000},
{-4.526650, -2.414213, -7.121212, -5.000000},
{4.526650, -2.414213, -7.121212, -5.000000}
});
SceneListSubmit(&data[0], data.size());
check_equal(sent.size(), 5);
check_equal(sent[0].flags, GPU_CMD_POLYHDR);
check_equal(sent[1].flags, GPU_CMD_VERTEX);
check_equal(sent[2].flags, GPU_CMD_VERTEX);
// Because we're sending a single triangle, we end up sending a
// degenerate final vert. But if we were sending more than one triangle
// this would be GPU_CMD_VERTEX twice
check_equal(sent[3].flags, GPU_CMD_VERTEX_EOL);
check_equal(sent[4].flags, GPU_CMD_VERTEX_EOL);
check_equal(sent[3], sent[4]);
return true;
}
bool test_clip_case_010() {
/* The third vertex is visible only */
sent.clear();
auto data = make_vertices({
{-4.526650, -2.414213, -7.121212, -5.000000},
{0.000000, -2.414213, 3.080808, 5.000000},
{4.526650, -2.414213, -7.121212, -5.000000}
});
SceneListSubmit(&data[0], data.size());
check_equal(sent.size(), 4);
check_equal(sent[0].flags, GPU_CMD_POLYHDR);
check_equal(sent[1].flags, GPU_CMD_VERTEX);
check_equal(sent[2].flags, GPU_CMD_VERTEX);
check_equal(sent[3].flags, GPU_CMD_VERTEX_EOL);
return true;
}
bool test_clip_case_100() {
/* The third vertex is visible only */
sent.clear();
auto data = make_vertices({
{-4.526650, -2.414213, -7.121212, -5.000000},
{4.526650, -2.414213, -7.121212, -5.000000},
{0.000000, -2.414213, 3.080808, 5.000000}
});
SceneListSubmit(&data[0], data.size());
check_equal(sent.size(), 5);
check_equal(sent[0].flags, GPU_CMD_POLYHDR);
check_equal(sent[1].flags, GPU_CMD_VERTEX);
check_equal(sent[2].flags, GPU_CMD_VERTEX);
// Because we're sending a single triangle, we end up sending a
// degenerate final vert. But if we were sending more than one triangle
// this would be GPU_CMD_VERTEX twice
check_equal(sent[3].flags, GPU_CMD_VERTEX);
check_equal(sent[4].flags, GPU_CMD_VERTEX_EOL);
check_equal(sent[1], sent[2]);
return true;
}
bool test_clip_case_110() {
/* 2nd and 3rd visible */
sent.clear();
auto data = make_vertices({
{0.0, -2.414213, -7.121212, -5.000000},
{-4.526650, -2.414213, 3.080808, 5.000000},
{4.526650, -2.414213, 3.080808, 5.000000}
});
SceneListSubmit(&data[0], data.size());
check_equal(sent.size(), 6);
check_equal(sent[0].flags, GPU_CMD_POLYHDR);
check_equal(sent[1].flags, GPU_CMD_VERTEX);
check_equal(sent[2].flags, GPU_CMD_VERTEX);
check_equal(sent[3].flags, GPU_CMD_VERTEX);
check_equal(sent[4].flags, GPU_CMD_VERTEX);
check_equal(sent[5].flags, GPU_CMD_VERTEX_EOL);
check_equal(sent[2], sent[4]);
return true;
}
bool test_clip_case_011() {
/* 1st and 2nd visible */
sent.clear();
auto data = make_vertices({
{-4.526650, -2.414213, 3.080808, 5.000000},
{4.526650, -2.414213, 3.080808, 5.000000},
{0.0, -2.414213, -7.121212, -5.000000}
});
SceneListSubmit(&data[0], data.size());
check_equal(sent.size(), 6);
check_equal(sent[0].flags, GPU_CMD_POLYHDR);
check_equal(sent[1].flags, GPU_CMD_VERTEX);
check_equal(sent[2].flags, GPU_CMD_VERTEX);
check_equal(sent[3].flags, GPU_CMD_VERTEX);
check_equal(sent[4].flags, GPU_CMD_VERTEX);
check_equal(sent[5].flags, GPU_CMD_VERTEX_EOL);
check_equal(sent[2], sent[4]);
return true;
}
bool test_clip_case_101() {
/* 1st and 3rd visible */
sent.clear();
auto data = make_vertices({
{-4.526650, -2.414213, 3.080808, 5.000000},
{0.0, -2.414213, -7.121212, -5.000000},
{4.526650, -2.414213, 3.080808, 5.000000},
});
SceneListSubmit(&data[0], data.size());
check_equal(sent.size(), 6);
check_equal(sent[0].flags, GPU_CMD_POLYHDR);
check_equal(sent[1].flags, GPU_CMD_VERTEX);
check_equal(sent[2].flags, GPU_CMD_VERTEX);
check_equal(sent[3].flags, GPU_CMD_VERTEX);
check_equal(sent[4].flags, GPU_CMD_VERTEX);
check_equal(sent[5].flags, GPU_CMD_VERTEX_EOL);
check_equal(sent[3], sent[5]);
return true;
}
bool test_clip_case_111() {
/* 1st and 3rd visible */
sent.clear();
auto data = make_vertices({
{-4.526650, -2.414213, 3.080808, 5.000000},
{0.0, -2.414213, -7.121212, 8.000000},
{4.526650, -2.414213, 3.080808, 5.000000},
});
SceneListSubmit(&data[0], data.size());
check_equal(sent.size(), 4);
check_equal(sent[0].flags, GPU_CMD_POLYHDR);
check_equal(sent[1].flags, GPU_CMD_VERTEX);
check_equal(sent[2].flags, GPU_CMD_VERTEX);
check_equal(sent[3].flags, GPU_CMD_VERTEX_EOL);
return true;
}
bool test_start_behind() {
/* Triangle behind the plane, but the strip continues in front */
sent.clear();
auto data = make_vertices({
{-3.021717, -2.414213, -10.155344, -9.935254},
{5.915236, -2.414213, -9.354721, -9.136231},
{-5.915236, -2.414213, -0.264096, -0.063767},
{3.021717, -2.414213, 0.536527, 0.735255},
{-7.361995, -2.414213, 4.681529, 4.871976},
{1.574958, -2.414213, 5.482152, 5.670999},
});
SceneListSubmit(&data[0], data.size());
return true;
}
bool test_longer_strip() {
sent.clear();
auto data = make_vertices({
{-4.384623, -2.414213, -5.699644, -5.488456},
{4.667572, -2.414213, -5.621354, -5.410322},
{-4.667572, -2.414213, 4.319152, 4.510323},
{4.384623, -2.414213, 4.397442, 4.588456},
{-4.809045, -2.414213, 9.328549, 9.509711},
{4.243149, -2.414213, 9.406840, 9.587846},
});
SceneListSubmit(&data[0], data.size());
return true;
}
int main(int argc, char* argv[]) {
// test_clip_case_000();
test_clip_case_001();
test_clip_case_010();
test_clip_case_100();
test_clip_case_110();
test_clip_case_011();
test_clip_case_101();
test_clip_case_111();
test_start_behind();
test_longer_strip();
return 0;
}

View File

@ -49,7 +49,7 @@ ENDIF()
add_link_options(-L$ENV{KOS_BASE}/lib/dreamcast)
link_libraries(-Wl,--start-group -lstdc++ -lkallisti -lc -lgcc -Wl,--end-group m)
SET(CMAKE_EXECUTABLE_SUFFIX_C ".elf")
SET(CMAKE_EXECUTABLE_SUFFIX ".elf")
SET(CMAKE_EXECUTABLE_SUFFIX_CXX ".elf")
ADD_DEFINITIONS(

View File

@ -1,451 +0,0 @@
/* * Copyright (c) 2011-2017 Luke Benstead https://simulant-engine.appspot.com
*
* This file is part of Simulant.
*
* Simulant is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* Simulant is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with Simulant. If not, see <http://www.gnu.org/licenses/>.
*/
#pragma once
#include <vector>
#include <functional>
#include <stdexcept>
#include <iostream>
#include <sstream>
#include <algorithm>
#include <fstream>
#include <memory>
#define assert_equal(expected, actual) _assert_equal((expected), (actual), __FILE__, __LINE__)
#define assert_not_equal(expected, actual) _assert_not_equal((expected), (actual), __FILE__, __LINE__)
#define assert_false(actual) _assert_false((actual), __FILE__, __LINE__)
#define assert_true(actual) _assert_true((actual), __FILE__, __LINE__)
#define assert_close(expected, actual, difference) _assert_close((expected), (actual), (difference), __FILE__, __LINE__)
#define assert_is_null(actual) _assert_is_null((actual), __FILE__, __LINE__)
#define assert_is_not_null(actual) _assert_is_not_null((actual), __FILE__, __LINE__)
#define assert_raises(exception, func) _assert_raises<exception>((func), __FILE__, __LINE__)
#define assert_items_equal(expected, actual) _assert_items_equal((actual), (expected), __FILE__, __LINE__)
#define not_implemented() _not_implemented(__FILE__, __LINE__)
namespace test {
class StringFormatter {
public:
StringFormatter(const std::string& templ):
templ_(templ) { }
struct Counter {
Counter(uint32_t c): c(c) {}
uint32_t c;
};
template<typename T>
std::string format(T value) {
std::stringstream ss;
ss << value;
return _do_format(0, ss.str());
}
template<typename T>
std::string format(Counter count, T value) {
std::stringstream ss;
ss << value;
return _do_format(count.c, ss.str());
}
template<typename T, typename... Args>
std::string format(T value, const Args&... args) {
std::stringstream ss;
ss << value;
return StringFormatter(_do_format(0, ss.str())).format(Counter(1), args...);
}
template<typename T, typename... Args>
std::string format(Counter count, T value, const Args&... args) {
std::stringstream ss;
ss << value;
return StringFormatter(_do_format(count.c, ss.str())).format(Counter(count.c + 1), args...);
}
std::string _do_format(uint32_t counter, const std::string& value) {
std::stringstream ss; // Can't use to_string on all platforms
ss << counter;
const std::string to_replace = "{" + ss.str() + "}";
std::string output = templ_;
auto replace = [](std::string& str, const std::string& from, const std::string& to) -> bool {
size_t start_pos = str.find(from);
if(start_pos == std::string::npos)
return false;
str.replace(start_pos, from.length(), to);
return true;
};
replace(output, to_replace, value);
return output;
}
private:
std::string templ_;
};
class StringSplitter {
public:
StringSplitter(const std::string& str):
str_(str) {
}
std::vector<std::string> split() {
std::vector<std::string> result;
std::string buffer;
for(auto c: str_) {
if(c == '\n') {
if(!buffer.empty()) {
result.push_back(buffer);
buffer.clear();
}
} else {
buffer.push_back(c);
}
}
if(!buffer.empty()) {
result.push_back(buffer);
}
return result;
}
private:
std::string str_;
};
typedef StringFormatter _Format;
class AssertionError : public std::logic_error {
public:
AssertionError(const std::string& what):
std::logic_error(what),
file(""),
line(-1) {
}
AssertionError(const std::pair<std::string, int> file_and_line, const std::string& what):
std::logic_error(what),
file(file_and_line.first),
line(file_and_line.second) {
}
~AssertionError() noexcept (true) {
}
std::string file;
int line;
};
class NotImplementedError: public std::logic_error {
public:
NotImplementedError(const std::string& file, int line):
std::logic_error(_Format("Not implemented at {0}:{1}").format(file, line)) {}
};
class SkippedTestError: public std::logic_error {
public:
SkippedTestError(const std::string& reason):
std::logic_error(reason) {
}
};
class TestCase {
public:
virtual ~TestCase() {}
virtual void set_up() {}
virtual void tear_down() {}
void skip_if(const bool& flag, const std::string& reason) {
if(flag) { throw test::SkippedTestError(reason); }
}
template<typename T, typename U>
void _assert_equal(T expected, U actual, std::string file, int line) {
if(expected != actual) {
auto file_and_line = std::make_pair(file, line);
throw test::AssertionError(file_and_line, test::_Format("{0} does not match {1}").format(actual, expected));
}
}
template<typename T, typename U>
void _assert_not_equal(T lhs, U rhs, std::string file, int line) {
if(lhs == (T) rhs) {
auto file_and_line = std::make_pair(file, line);
throw test::AssertionError(file_and_line, test::_Format("{0} should not match {1}").format(lhs, rhs));
}
}
template<typename T>
void _assert_true(T actual, std::string file, int line) {
if(!bool(actual)) {
auto file_and_line = std::make_pair(file, line);
throw test::AssertionError(file_and_line, test::_Format("{0} is not true").format(bool(actual) ? "true" : "false"));
}
}
template<typename T>
void _assert_false(T actual, std::string file, int line) {
if(bool(actual)) {
auto file_and_line = std::make_pair(file, line);
throw test::AssertionError(file_and_line, test::_Format("{0} is not false").format(bool(actual) ? "true" : "false"));
}
}
template<typename T, typename U, typename V>
void _assert_close(T expected, U actual, V difference, std::string file, int line) {
if(actual < expected - difference ||
actual > expected + difference) {
auto file_and_line = std::make_pair(file, line);
throw test::AssertionError(file_and_line, test::_Format("{0} is not close enough to {1}").format(actual, expected));
}
}
template<typename T>
void _assert_is_null(T* thing, std::string file, int line) {
if(thing != nullptr) {
auto file_and_line = std::make_pair(file, line);
throw test::AssertionError(file_and_line, "Pointer was not NULL");
}
}
template<typename T>
void _assert_is_not_null(T* thing, std::string file, int line) {
if(thing == nullptr) {
auto file_and_line = std::make_pair(file, line);
throw test::AssertionError(file_and_line, "Pointer was unexpectedly NULL");
}
}
template<typename T, typename Func>
void _assert_raises(Func func, std::string file, int line) {
try {
func();
auto file_and_line = std::make_pair(file, line);
throw test::AssertionError(file_and_line, test::_Format("Expected exception ({0}) was not thrown").format(typeid(T).name()));
} catch(T& e) {}
}
template<typename T, typename U>
void _assert_items_equal(const T& lhs, const U& rhs, std::string file, int line) {
auto file_and_line = std::make_pair(file, line);
if(lhs.size() != rhs.size()) {
throw test::AssertionError(file_and_line, "Containers are not the same length");
}
for(auto item: lhs) {
if(std::find(rhs.begin(), rhs.end(), item) == rhs.end()) {
throw test::AssertionError(file_and_line, test::_Format("Container does not contain {0}").format(item));
}
}
}
void _not_implemented(std::string file, int line) {
throw test::NotImplementedError(file, line);
}
};
class TestRunner {
public:
template<typename T, typename U>
void register_case(std::vector<U> methods, std::vector<std::string> names) {
std::shared_ptr<TestCase> instance = std::make_shared<T>();
instances_.push_back(instance); //Hold on to it
for(std::string name: names) {
names_.push_back(name);
}
for(U& method: methods) {
std::function<void()> func = std::bind(method, dynamic_cast<T*>(instance.get()));
tests_.push_back([=]() {
instance->set_up();
try {
func();
} catch(...) {
instance->tear_down();
throw;
}
instance->tear_down();
});
}
}
int32_t run(const std::string& test_case, const std::string& junit_output="") {
int failed = 0;
int skipped = 0;
int ran = 0;
int crashed = 0;
auto new_tests = tests_;
auto new_names = names_;
if(!test_case.empty()) {
new_tests.clear();
new_names.clear();
for(uint32_t i = 0; i < names_.size(); ++i) {
if(names_[i].find(test_case) == 0) {
new_tests.push_back(tests_[i]);
new_names.push_back(names_[i]);
}
}
}
std::cout << std::endl << "Running " << new_tests.size() << " tests" << std::endl << std::endl;
std::vector<std::string> junit_lines;
junit_lines.push_back("<testsuites>\n");
std::string klass = "";
for(std::function<void ()> test: new_tests) {
std::string name = new_names[ran];
std::string this_klass(name.begin(), name.begin() + name.find_first_of(":"));
bool close_klass = ran == (int) new_tests.size() - 1;
if(this_klass != klass) {
if(!klass.empty()) {
junit_lines.push_back(" </testsuite>\n");
}
klass = this_klass;
junit_lines.push_back(" <testsuite name=\"" + this_klass + "\">\n");
}
try {
junit_lines.push_back(" <testcase name=\"" + new_names[ran] + "\">\n");
std::string output = " " + new_names[ran];
for(int i = output.length(); i < 76; ++i) {
output += " ";
}
std::cout << output;
test();
std::cout << "\033[32m" << " OK " << "\033[0m" << std::endl;
junit_lines.push_back(" </testcase>\n");
} catch(test::NotImplementedError& e) {
std::cout << "\033[34m" << " SKIPPED" << "\033[0m" << std::endl;
++skipped;
junit_lines.push_back(" </testcase>\n");
} catch(test::SkippedTestError& e) {
std::cout << "\033[34m" << " SKIPPED" << "\033[0m" << std::endl;
++skipped;
junit_lines.push_back(" </testcase>\n");
} catch(test::AssertionError& e) {
std::cout << "\033[33m" << " FAILED " << "\033[0m" << std::endl;
std::cout << " " << e.what() << std::endl;
if(!e.file.empty()) {
std::cout << " " << e.file << ":" << e.line << std::endl;
std::ifstream ifs(e.file);
if(ifs.good()) {
std::string buffer;
std::vector<std::string> lines;
while(std::getline(ifs, buffer)) {
lines.push_back(buffer);
}
int line_count = lines.size();
if(line_count && e.line <= line_count) {
std::cout << lines.at(e.line - 1) << std::endl << std::endl;
}
}
}
++failed;
junit_lines.push_back(" <failure message=\"" + std::string(e.what()) + "\"/>\n");
junit_lines.push_back(" </testcase>\n");
} catch(std::exception& e) {
std::cout << "\033[31m" << " EXCEPT " << std::endl;
std::cout << " " << e.what() << "\033[0m" << std::endl;
++crashed;
junit_lines.push_back(" <failure message=\"" + std::string(e.what()) + "\"/>\n");
junit_lines.push_back(" </testcase>\n");
}
std::cout << "\033[0m";
++ran;
if(close_klass) {
junit_lines.push_back(" </testsuite>\n");
}
}
junit_lines.push_back("</testsuites>\n");
if(!junit_output.empty()) {
FILE* f = fopen(junit_output.c_str(), "wt");
if(f) {
for(auto& line: junit_lines) {
fwrite(line.c_str(), sizeof(char), line.length(), f);
}
}
fclose(f);
}
std::cout << "-----------------------" << std::endl;
if(!failed && !crashed && !skipped) {
std::cout << "All tests passed" << std::endl << std::endl;
} else {
if(skipped) {
std::cout << skipped << " tests skipped";
}
if(failed) {
if(skipped) {
std::cout << ", ";
}
std::cout << failed << " tests failed";
}
if(crashed) {
if(failed) {
std::cout << ", ";
}
std::cout << crashed << " tests crashed";
}
std::cout << std::endl << std::endl;
}
return failed + crashed;
}
private:
std::vector<std::shared_ptr<TestCase>> instances_;
std::vector<std::function<void()> > tests_;
std::vector<std::string> names_;
};
} // test

View File

@ -1,212 +0,0 @@
#!/usr/bin/env python3
import argparse
import re
import sys
parser = argparse.ArgumentParser(description="Generate C++ unit tests")
parser.add_argument("--output", type=str, nargs=1, help="The output source file for the generated test main()", required=True)
parser.add_argument("test_files", type=str, nargs="+", help="The list of C++ files containing your tests")
parser.add_argument("--verbose", help="Verbose logging", action="store_true", default=False)
CLASS_REGEX = r"\s*class\s+(\w+)\s*([\:|,]\s*(?:public|private|protected)\s+[\w|::]+\s*)*"
TEST_FUNC_REGEX = r"void\s+(?P<func_name>test_\S[^\(]+)\(\s*(void)?\s*\)"
INCLUDE_TEMPLATE = "#include \"%(file_path)s\""
REGISTER_TEMPLATE = """
runner->register_case<%(class_name)s>(
std::vector<void (%(class_name)s::*)()>({%(members)s}),
{%(names)s}
);"""
MAIN_TEMPLATE = """
#include <functional>
#include <memory>
#include <map>
#include "tools/test.h"
%(includes)s
std::map<std::string, std::string> parse_args(int argc, char* argv[]) {
std::map<std::string, std::string> ret;
for(int i = 1; i < argc; ++i) {
std::string arg = argv[i];
auto eq = arg.find('=');
if(eq != std::string::npos && arg[0] == '-' && arg[1] == '-') {
auto key = std::string(arg.begin(), arg.begin() + eq);
auto value = std::string(arg.begin() + eq + 1, arg.end());
ret[key] = value;
} else if(arg[0] == '-' && arg[1] == '-') {
auto key = arg;
if(i < (argc - 1)) {
auto value = argv[++i];
ret[key] = value;
} else {
ret[key] = "";
}
} else {
ret[arg] = ""; // Positional, not key=value
}
}
return ret;
}
int main(int argc, char* argv[]) {
auto runner = std::make_shared<test::TestRunner>();
auto args = parse_args(argc, argv);
std::string junit_xml;
auto junit_xml_it = args.find("--junit-xml");
if(junit_xml_it != args.end()) {
junit_xml = junit_xml_it->second;
std::cout << " Outputting junit XML to: " << junit_xml << std::endl;
args.erase(junit_xml_it);
}
std::string test_case;
if(args.size()) {
test_case = args.begin()->first;
}
%(registrations)s
return runner->run(test_case, junit_xml);
}
"""
VERBOSE = False
def log_verbose(message):
if VERBOSE:
print(message)
def find_tests(files):
subclasses = []
# First pass, find all class definitions
for path in files:
with open(path, "rt") as f:
source_file_data = f.read().replace("\r\n", "").replace("\n", "")
while True:
match = re.search(CLASS_REGEX, source_file_data)
if not match:
break
class_name = match.group().split(":")[0].replace("class", "").strip()
try:
parents = match.group().split(":", 1)[1]
except IndexError:
pass
else:
parents = [ x.strip() for x in parents.split(",") ]
parents = [
x.replace("public", "").replace("private", "").replace("protected", "").strip()
for x in parents
]
subclasses.append((path, class_name, parents, []))
log_verbose("Found: %s" % str(subclasses[-1]))
start = match.end()
# Find the next opening brace
while source_file_data[start] in (' ', '\t'):
start += 1
start -= 1
end = start
if source_file_data[start+1] == '{':
class_data = []
brace_counter = 1
for i in range(start+2, len(source_file_data)):
class_data.append(source_file_data[i])
if class_data[-1] == '{': brace_counter += 1
if class_data[-1] == '}': brace_counter -= 1
if not brace_counter:
end = i
break
class_data = "".join(class_data)
while True:
match = re.search(TEST_FUNC_REGEX, class_data)
if not match:
break
subclasses[-1][-1].append(match.group('func_name'))
class_data = class_data[match.end():]
source_file_data = source_file_data[end:]
# Now, simplify the list by finding all potential superclasses, and then keeping any classes
# that subclass them.
test_case_subclasses = []
i = 0
while i < len(subclasses):
subclass_names = [x.rsplit("::")[-1] for x in subclasses[i][2]]
# If this subclasses TestCase, or it subclasses any of the already found testcase subclasses
# then add it to the list
if "TestCase" in subclass_names or "SimulantTestCase" in subclass_names or any(x[1] in subclasses[i][2] for x in test_case_subclasses):
if subclasses[i] not in test_case_subclasses:
test_case_subclasses.append(subclasses[i])
i = 0 # Go back to the start, as we may have just found another parent class
continue
i += 1
log_verbose("\n".join([str(x) for x in test_case_subclasses]))
return test_case_subclasses
def main():
global VERBOSE
args = parser.parse_args()
VERBOSE = args.verbose
testcases = find_tests(args.test_files)
includes = "\n".join([ INCLUDE_TEMPLATE % { 'file_path' : x } for x in set([y[0] for y in testcases]) ])
registrations = []
for path, class_name, superclasses, funcs in testcases:
BIND_TEMPLATE = "&%(class_name)s::%(func)s"
members = ", ".join([ BIND_TEMPLATE % { 'class_name' : class_name, 'func' : x } for x in funcs ])
names = ", ".join([ '"%s::%s"' % (class_name, x) for x in funcs ])
registrations.append(REGISTER_TEMPLATE % { 'class_name' : class_name, 'members' : members, 'names' : names })
registrations = "\n".join(registrations)
final = MAIN_TEMPLATE % {
'registrations' : registrations,
'includes' : includes
}
open(args.output[0], "w").write(final)
return 0
if __name__ == '__main__':
sys.exit(main())