Compare commits

...

82 Commits

Author SHA1 Message Date
mrq
2601afb5f3 experimental support for parsing from a float16 mesh (or a float32-quantized-as-ushort mesh) 2023-10-26 13:22:47 -05:00
Luke Benstead
0efe4c6cef Add missing defines 2023-10-19 22:26:13 +01:00
Luke Benstead
744dfb32f7 Merge branch 'fix-glshort-uv-read' into 'master'
Convert GL_SHORT to proper float on conversion

See merge request simulant/GLdc!109
2023-09-26 18:51:30 +00:00
Spencer Elliott
79172452f2 Convert GL_SHORT to proper float on conversion 2023-09-26 18:51:29 +00:00
Luke Benstead
420e2d75f2 Merge branch 'fix-glulookat-alignment' into 'master'
Fixed alignment for matrix passed into UploadMatrix4x4 in gluLookAt

See merge request simulant/GLdc!108
2023-09-20 15:47:38 +00:00
Spencer Elliott
202f546848 Fixed alignment for matrix passed into UploadMatrix4x4 in gluLookAt 2023-09-20 10:18:55 -05:00
Luke Benstead
d054dde785 Fix paletted texture glitch 2023-09-12 21:11:05 +01:00
Luke Benstead
00b4468928 Remove unused function 2023-09-11 20:42:09 +01:00
Luke Benstead
f0d799d14f Merge branch 'texture-refactor' into 'master'
Drastically refactor glTexImage2D

See merge request simulant/GLdc!107
2023-09-11 19:39:03 +00:00
Luke Benstead
1bf8554926 More erquirements 2023-09-11 20:34:18 +01:00
Luke Benstead
9bc6da9fba Add some requirements 2023-09-11 19:55:26 +01:00
Luke Benstead
a1536cba44 Add dependency 2023-09-11 17:31:08 +01:00
Luke Benstead
3eee140add Fix stage 2023-09-11 17:29:59 +01:00
Luke Benstead
43d64a4957 Fix twiddling issues 2023-09-11 17:27:04 +01:00
Luke Benstead
951ece6d19 Add test job to CI 2023-09-11 17:25:47 +01:00
Luke Benstead
61e5a7a2a6 More twiddling work 2023-09-10 19:41:25 +01:00
Luke Benstead
3308a57e59 Implement defragmenting the memory 2023-09-08 17:49:46 +01:00
Luke Benstead
db9e1cd424 Fall back to unaligned if there's no more aligned spaced 2023-09-08 09:13:33 +01:00
Luke Benstead
6eb079228e Fix infinite loop 2023-09-06 21:01:37 +01:00
Luke Benstead
7ce01ad93f Fix up paletted textures 2023-09-06 08:01:01 +01:00
Luke Benstead
12bd6f474f Fix issues with the allocator 2023-09-06 07:59:40 +01:00
Luke Benstead
e5a4f4f716 Continue fixing up paletted texture issues 2023-09-03 21:12:11 +01:00
Luke Benstead
4d39e19ed5 Start repairing paletted textures 2023-09-02 21:10:42 +01:00
Luke Benstead
49a0e103cb Fix up CI 2023-09-01 20:34:29 +01:00
Luke Benstead
9cedc81850 Fix broken merge 2023-09-01 20:29:24 +01:00
Luke Benstead
0e31aa3d27 Tweak 2023-09-01 20:25:27 +01:00
Luke Benstead
5e7b33797d Perf improvements and fixes 2023-09-01 20:25:27 +01:00
Luke Benstead
b19b9d498a Clean up the allocator code 2023-09-01 20:25:21 +01:00
Luke Benstead
36de063756 Allow configuring automatic texture twiddling in glKosInitEx
Defaults to enabled.
2023-09-01 20:23:55 +01:00
Luke Benstead
246cb997da Add optional dcprof to prof_texture_upload 2023-09-01 20:23:55 +01:00
Luke Benstead
cfbaea4a46 Don't calculate things twice 2023-09-01 08:52:01 +01:00
Luke Benstead
4b47f6878f Remove yalloc 2023-09-01 08:34:48 +01:00
Luke Benstead
3248499d5a Switch to the new allocator 2023-08-31 21:21:14 +01:00
Luke Benstead
fd9a9d1c25 Merge commit 'f49a98ab543b1be0049e07456fb23022435ba450' into texture-refactor 2023-08-31 20:49:42 +01:00
Luke Benstead
f49a98ab54 Fix allocate and free 2023-08-31 20:49:34 +01:00
Luke Benstead
f278777c0e WIP: Start implementing new allocator 2023-08-31 08:47:00 +01:00
Luke Benstead
34173d926c Drastically refactor glTexImage2D 2023-08-31 08:47:00 +01:00
Luke Benstead
77531ca347 Drastically refactor glTexImage2D 2023-08-26 20:34:11 +01:00
Luke Benstead
a05e1b01fa Make glGenerateMipmap the function, and EXT the alias 2023-07-26 20:33:12 +01:00
Luke Benstead
3dcbbdbde6 Add logging 2023-06-09 20:35:00 +01:00
Luke Benstead
92ee4f616d Set mode to PAL@50 if it's a European console without VGA 2023-06-06 21:05:52 +01:00
Luke Benstead
e7574bca1d Fix issues with GL_QUADS 2023-05-31 18:27:17 +01:00
Luke Benstead
026bdeff09 Fix infuriating memory corruption bug 2023-05-20 07:47:39 +01:00
Luke Benstead
f6713bc778 Speed up the software renderer 2023-05-20 07:45:45 +01:00
Luke Benstead
5865d57384 Wait for the store queues to finish when we've uploaded everything 2023-05-20 07:45:16 +01:00
Luke Benstead
1e3896e699 Clean up 2023-05-20 07:44:55 +01:00
Luke Benstead
bd47f333d6 Add more assertions 2023-05-20 07:43:57 +01:00
Luke Benstead
e57b503355 Fix memory errors 2023-05-18 16:44:11 +01:00
Luke Benstead
d81472ef57 Liberally assert stuff 2023-05-17 20:39:58 +01:00
Luke Benstead
462eb40d7a Fix bugs in texture deletion 2023-05-17 20:39:49 +01:00
Luke Benstead
c4c0bf4239 Fix an off-by-one error 2023-05-17 20:39:27 +01:00
Luke Benstead
9037d157d5 Clean up 2023-05-17 20:38:21 +01:00
Luke Benstead
52a0215ed8 Make sure we initialize texture 0. We don't actually use it yet
(binding zero disables texturing) but I believe the spec says that
texture 0 is the "default texture" and is an actual texture object.
2023-05-17 20:36:59 +01:00
Luke Benstead
a5891056db Many bug fixes and optimisations 2023-05-16 13:31:44 +01:00
Luke Benstead
9cffe14ad6 Clean up aligned vector 2023-05-12 20:51:36 +01:00
Luke Benstead
e683b8becb Optimisations 2023-05-11 20:00:13 +01:00
Luke Benstead
cba2fb7ceb Fix a memory corruption issue 2023-05-11 15:22:46 +01:00
Luke Benstead
c754c5c338 Ensure RelWithDebInfo builds use release flags + debugging 2023-05-11 15:22:27 +01:00
Luke Benstead
452cda5a3b Fix backface culling 2023-04-28 19:49:01 +01:00
Luke Benstead
9e1b1bc40a Merge branch 'clipping-rewrite-for-the-last-time-ffs' into 'master'
Restructure clipping to be much MUCH faster in the visible case

See merge request simulant/GLdc!105
2023-04-26 20:00:17 +00:00
Luke Benstead
0f65eab86a Much faster clipping 2023-04-26 20:50:43 +01:00
Luke Benstead
1a678d2c8d Undo some bad changes 2023-04-23 21:00:01 +01:00
Luke Benstead
0923b5c601 Further optimisations 2023-04-23 20:16:15 +01:00
Luke Benstead
2ec7055547 Optimisations 2023-04-23 07:44:09 +01:00
Luke Benstead
9cc52a01fe Better clipping 2023-04-22 20:47:45 +01:00
Luke Benstead
095ebf2790 Fix final bug 2023-04-22 11:37:42 +01:00
Luke Benstead
baa275b41b Fix a bunch of issues with clipping (almost working) 2023-04-21 20:38:21 +01:00
Luke Benstead
72c375f87c Fix some things 2023-04-21 11:39:37 +01:00
Luke Benstead
e54494e995 More clipping work 2023-04-20 20:45:59 +01:00
Luke Benstead
c5ce81a38d WIP: Restructure clipping to be much MUCH faster in the visible case
This currently only works with triangles, anything more and it crashes
due to me not queuing subsequent vertices in the strip correctly
2023-04-19 20:57:44 +01:00
Luke Benstead
34448939a4 Merge branch 'update-cubes-cmakelists' into 'master'
Update cubes sample + cmakelists

See merge request simulant/GLdc!104
2023-04-17 18:24:56 +00:00
Dave
b6249e9ca4 Update README 2023-04-17 19:35:06 +02:00
Dave
1a181f702c Update CMakeLists with CXX flags (Debug and Release) 2023-04-17 19:34:56 +02:00
Dave
3b53691e4b Change float colors to GLubyte in cubes sample 2023-04-17 19:33:59 +02:00
Luke Benstead
25d215dad3 Add some compiler flags for lolz 2023-04-11 20:46:44 +01:00
Luke Benstead
307d371c55 Various store queue shinanigans 2023-04-11 20:46:31 +01:00
Luke Benstead
4ad58bea89 Optimise glLoadMatrixf 2023-04-11 20:46:12 +01:00
Luke Benstead
8e60b18f29 Merge branch 'add-new-cubes-sample' into 'master'
Add new cubes sample

See merge request simulant/GLdc!103
2023-04-08 20:32:18 +00:00
David Reichelt
190b4ecfb7 Add include for printf 2023-04-08 20:07:21 +00:00
David Reichelt
a4b778063a Make sure ZNEAR_CLIPPING_ENABLED is defined in software renderer 2023-04-08 20:00:33 +00:00
Dave Reichelt
df9a12bbd6 Added new cubes sample
- A button toggles between glDrawElements and glDrawArrays
- B button toggles glBlend
- Start quits the sample
- Every 10 seconds a log with stats will be sent to the terminal
2023-04-08 21:42:03 +02:00
Luke Benstead
6ee9a823c1 Don't update lights unnecessarily 2023-03-23 20:01:41 +00:00
54 changed files with 13013 additions and 2845 deletions

View File

@ -1,5 +1,6 @@
stages:
- build
- test
build:sh4-gcc:
stage: build
@ -17,11 +18,28 @@ build:sh4-gcc:
build:x86-gcc:
stage: build
image: fedora:34
image: fedora:38
before_script:
- sudo dnf install -y cmake gcc gcc-c++ SDL2-devel glibc-devel pkgconf-pkg-config glibc-devel.i686 SDL2-devel.i686
- sudo dnf install -y cmake gcc gcc-c++ SDL2.i686 SDL2-devel.x86_64 glibc-devel glibc-devel.i686 SDL2-devel.i686 pkgconf-pkg-config.i686 pkgconf-pkg-config.x86_64
script:
- mkdir builddir
- cd builddir
- cmake -DCMAKE_BUILD_TYPE=Release ..
- make
artifacts:
paths:
- builddir/tests/gldc_tests
test:x86-gcc:
stage: test
image: fedora:38
dependencies:
- build:x86-gcc
before_script:
- sudo dnf install -y cmake gcc gcc-c++ SDL2.i686 SDL2-devel glibc-devel pkgconf-pkg-config glibc-devel.i686 SDL2-devel.i686 pkgconf-pkg-config.i686
script:
- cd builddir/tests/
- SDL_VIDEODRIVER=dummy ./gldc_tests --junit-xml=report.xml
artifacts:
reports:
junit: builddir/tests/report.xml

View File

@ -1,6 +1,8 @@
cmake_minimum_required(VERSION 3.0)
cmake_minimum_required(VERSION 3.9)
project(GLdc)
set(CMAKE_VERBOSE_MAKEFILE ON)
# set the default backend
if(PLATFORM_DREAMCAST)
set(BACKEND "kospvr" CACHE STRING "Backend to use")
@ -8,6 +10,9 @@ else()
set(BACKEND "software" CACHE STRING "Backend to use")
endif()
include(CheckIPOSupported)
check_ipo_supported(RESULT FLTO_SUPPORTED OUTPUT FLTO_ERROR)
# List of possible backends
set_property(CACHE BACKEND PROPERTY STRINGS kospvr software)
@ -17,6 +22,7 @@ string(TOUPPER ${BACKEND} BACKEND_UPPER)
add_definitions(-DBACKEND_${BACKEND_UPPER})
set(CMAKE_C_STANDARD 99)
set(CMAKE_CXX_STANDARD 11)
include_directories(include)
@ -29,15 +35,33 @@ else()
check_c_compiler_flag("-mfsca" COMPILER_HAS_FSCA)
if(COMPILER_HAS_FSRRA)
set(CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE} -mfsrra")
set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -mfsrra")
set(CMAKE_C_FLAGS_RELWITHDEBINFO "${CMAKE_C_FLAGS_RELWITHDEBINFO} -mfsrra")
set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS_RELWITHDEBINFO} -mfsrra")
endif()
if(COMPILER_HAS_FSCA)
set(CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE} -mfsca")
set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -mfsca")
set(CMAKE_C_FLAGS_RELWITHDEBINFO "${CMAKE_C_FLAGS_RELWITHDEBINFO} -mfsca")
set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS_RELWITHDEBINFO} -mfsca")
endif()
set(CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE} -ffp-contract=fast -ffast-math")
set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -ffast-math")
set(CMAKE_C_FLAGS_RELWITHDEBINFO "${CMAKE_C_FLAGS_RELWITHDEBINFO} -ffp-contract=fast -ffast-math")
set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS_RELWITHDEBINFO} -ffast-math")
endif()
set(CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE} -O3 -fexpensive-optimizations")
set(CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE} -O3 -fexpensive-optimizations -fomit-frame-pointer -finline-functions")
set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -std=c++14 -O3 -g0 -s -fomit-frame-pointer -fstrict-aliasing")
set(CMAKE_C_FLAGS_RELWITHDEBINFO "${CMAKE_C_FLAGS_RELWITHDEBINFO} -O3 -fexpensive-optimizations -fomit-frame-pointer -finline-functions")
set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS_RELWITHDEBINFO} -std=c++14 -O3 -fomit-frame-pointer -fstrict-aliasing")
set(CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG} -O0 -g -Wall -Wextra")
set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -O0 -g -Wall -Wextra")
set(
SOURCES
@ -56,7 +80,7 @@ set(
GL/state.c
GL/texture.c
GL/util.c
GL/yalloc/yalloc.c
GL/alloc/alloc.c
${CMAKE_CURRENT_BINARY_DIR}/version.c
)
@ -87,6 +111,10 @@ endif()
add_library(GLdc STATIC ${SOURCES})
if(FLTO_SUPPORTED)
set_property(TARGET GLdc PROPERTY INTERPROCEDURAL_OPTIMIZATION TRUE)
endif()
if(NOT PLATFORM_DREAMCAST)
set_target_properties(GLdc PROPERTIES
COMPILE_OPTIONS "-m32"
@ -110,6 +138,13 @@ function(gen_sample sample)
add_executable(${sample} ${SAMPLE_SRCS})
if(FLTO_SUPPORTED)
# FIXME: Cubes + LTO causes an ICE
if(NOT ${sample} MATCHES "cubes")
set_property(TARGET ${sample} PROPERTY INTERPROCEDURAL_OPTIMIZATION TRUE)
endif()
endif()
if(PLATFORM_DREAMCAST)
if(EXISTS "${CMAKE_SOURCE_DIR}/samples/${sample}/romdisk")
message("Generating romdisk for sample: ${sample}")
@ -140,6 +175,8 @@ function(gen_sample sample)
endif()
endfunction()
add_subdirectory(tests)
gen_sample(blend_test samples/blend_test/main.c)
gen_sample(depth_funcs samples/depth_funcs/main.c)
gen_sample(depth_funcs_alpha_testing samples/depth_funcs_alpha_testing/main.c samples/depth_funcs_alpha_testing/gl_png.c)
@ -170,11 +207,14 @@ gen_sample(zclip_triangle samples/zclip_triangle/main.c)
gen_sample(zclip_trianglestrip samples/zclip_trianglestrip/main.c)
gen_sample(scissor samples/scissor/main.c)
gen_sample(polymark samples/polymark/main.c)
gen_sample(cubes samples/cubes/main.cpp)
gen_sample(zclip_test tests/zclip/main.cpp)
if(PLATFORM_DREAMCAST)
gen_sample(trimark samples/trimark/main.c)
gen_sample(quadmark samples/quadmark/main.c samples/profiler.c)
gen_sample(prof_texture_upload samples/prof_texture_upload/main.c samples/profiler.c)
else()
gen_sample(quadmark samples/quadmark/main.c)
gen_sample(prof_texture_upload samples/prof_texture_upload/main.c)
endif()

534
GL/alloc/alloc.c Normal file
View File

@ -0,0 +1,534 @@
#include <stdint.h>
#include <string.h>
#include <stdlib.h>
#include <stdio.h>
#include "alloc.h"
/* This allocator is designed so that ideally all allocations larger
* than 2k, fall on a 2k boundary. Smaller allocations will
* never cross a 2k boundary.
*
* House keeping is stored in RAM to avoid reading back from the
* VRAM to check for usage. Headers can't be easily stored in the
* blocks anyway as they have to be 2k aligned (so you'd need to
* store them in reverse or something)
*
* Defragmenting the pool will move larger allocations first, then
* smaller ones, recursively until you tell it to stop, or until things
* stop moving.
*
* The maximum pool size is 8M, made up of:
*
* - 4096 blocks of 2k
* - each with 8 sub-blocks of 256 bytes
*
* Why?
*
* The PVR performs better if textures don't cross 2K memory
* addresses, so we try to avoid that. Obviously we can't
* if the allocation is > 2k, but in that case we can at least
* align with 2k and the VQ codebook (which is usually 2k) will
* be in its own page.
*
* The smallest PVR texture allowed is 8x8 at 16 bit (so 128 bytes)
* but we're unlikely to use too many of those, so having a min sub-block
* size of 256 should be OK (a 16x16 image is 512, so two sub-blocks).
*
* We could go down to 128 bytes if wastage is an issue, but then we have
* to store double the number of usage markers.
*
* FIXME:
*
* - Only operates on one pool (ignores what you pass)
*/
#include <assert.h>
#include <stdio.h>
#define EIGHT_MEG (8 * 1024 * 1024)
#define TWO_KILOBYTES (2 * 1024)
#define BLOCK_COUNT (EIGHT_MEG / TWO_KILOBYTES)
#define ALLOC_DEBUG 0
#if ALLOC_DEBUG
#define DBG_MSG(fmt, ...) fprintf(stderr, fmt, ##__VA_ARGS__)
#else
#define DBG_MSG(fmt, ...) do {} while (0)
#endif
static inline intptr_t round_up(intptr_t n, int multiple)
{
if((n % multiple) == 0) {
return n;
}
assert(multiple);
return ((n + multiple - 1) / multiple) * multiple;
}
struct AllocEntry {
void* pointer;
size_t size;
struct AllocEntry* next;
};
typedef struct {
/* This is a usage bitmask for each block. A block
* is divided into 8 x 256 byte subblocks. If a block
* is entirely used, it's value will be 255, if
* it's entirely free then it will be 0.
*/
uint8_t block_usage[BLOCK_COUNT];
uint8_t* pool; // Pointer to the memory pool
size_t pool_size; // Size of the memory pool
uint8_t* base_address; // First 2k aligned address in the pool
size_t block_count; // Number of 2k blocks in the pool
/* It's frustrating that we need to do this dynamically
* but we need to know the size allocated when we free()...
* we could store it statically but it would take 64k if we had
* an array of block_index -> block size where there would be 2 ** 32
* entries of 16 bit block sizes. The drawback (aside the memory usage)
* would be that we won't be able to order by size, so defragging will
* take much more time.*/
struct AllocEntry* allocations;
} PoolHeader;
static PoolHeader pool_header = {
{0}, NULL, 0, NULL, 0, NULL
};
void* alloc_base_address(void* pool) {
(void) pool;
return pool_header.base_address;
}
size_t alloc_block_count(void* pool) {
(void) pool;
return pool_header.block_count;
}
static inline void* calc_address(
uint8_t* block_usage_iterator,
int bit_offset,
size_t required_subblocks,
size_t* start_subblock_out
) {
uintptr_t offset = (block_usage_iterator - pool_header.block_usage) * 8;
offset += (bit_offset + 1);
offset -= required_subblocks;
if(start_subblock_out) {
*start_subblock_out = offset;
}
return pool_header.base_address + (offset * 256);
}
void* alloc_next_available_ex(void* pool, size_t required_size, size_t* start_subblock, size_t* required_subblocks);
void* alloc_next_available(void* pool, size_t required_size) {
return alloc_next_available_ex(pool, required_size, NULL, NULL);
}
void* alloc_next_available_ex(void* pool, size_t required_size, size_t* start_subblock_out, size_t* required_subblocks_out) {
(void) pool;
uint8_t* it = pool_header.block_usage;
uint32_t required_subblocks = (required_size / 256);
if(required_size % 256) required_subblocks += 1;
/* Anything gte to 2048 must be aligned to a 2048 boundary */
bool requires_alignment = required_size >= 2048;
if(required_subblocks_out) {
*required_subblocks_out = required_subblocks;
}
/* This is a fallback option. If while we're searching we find a possible slot
* but it's not aligned, or it's straddling a 2k boundary, then we store
* it here and if we reach the end of the search and find nothing better
* we use this instead */
uint8_t* poor_option = NULL;
size_t poor_start_subblock = 0;
uint32_t found_subblocks = 0;
uint32_t found_poor_subblocks = 0;
for(size_t j = 0; j < pool_header.block_count; ++j, ++it) {
/* We just need to find enough consecutive blocks */
if(found_subblocks < required_subblocks) {
uint8_t t = *it;
/* Optimisation only. Skip over full blocks */
if(t == 255) {
found_subblocks = 0;
found_poor_subblocks = 0;
} else {
/* Now let's see how many consecutive blocks we can find */
for(int i = 0; i < 8; ++i) {
if((t & 0x80) == 0) {
bool block_overflow = (
required_size < 2048 && found_subblocks > 0 && i == 0
);
bool reset_subblocks = (
(requires_alignment && found_subblocks == 0 && i != 0) ||
block_overflow
);
if(reset_subblocks) {
// Ignore this subblock, because we want the first subblock to be aligned
// at a 2048 boundary and this one isn't (i != 0)
found_subblocks = 0;
} else {
found_subblocks++;
}
/* If we reset the subblocks due to an overflow, we still
* want to count this free subblock in our count */
if(block_overflow) {
found_subblocks++;
}
found_poor_subblocks++;
if(found_subblocks >= required_subblocks) {
/* We found space! Now calculate the address */
return calc_address(it, i, required_subblocks, start_subblock_out);
}
if(!poor_option && (found_poor_subblocks >= required_subblocks)) {
poor_option = calc_address(it, i, required_subblocks, &poor_start_subblock);
}
} else {
found_subblocks = 0;
found_poor_subblocks = 0;
}
t <<= 1;
}
}
}
}
if(poor_option) {
if(start_subblock_out) {
*start_subblock_out = poor_start_subblock;
}
return poor_option;
} else {
return NULL;
}
}
int alloc_init(void* pool, size_t size) {
(void) pool;
if(pool_header.pool) {
return -1;
}
if(size > EIGHT_MEG) { // FIXME: >= ?
return -1;
}
uint8_t* p = (uint8_t*) pool;
memset(pool_header.block_usage, 0, BLOCK_COUNT);
pool_header.pool = pool;
pool_header.pool_size = size;
intptr_t base_address = (intptr_t) pool_header.pool;
base_address = round_up(base_address, 2048);
pool_header.base_address = (uint8_t*) base_address;
pool_header.block_count = ((p + size) - pool_header.base_address) / 2048;
pool_header.allocations = NULL;
assert(((uintptr_t) pool_header.base_address) % 2048 == 0);
return 0;
}
void alloc_shutdown(void* pool) {
(void) pool;
if(!pool_header.pool) {
return;
}
struct AllocEntry* it = pool_header.allocations;
while(it) {
struct AllocEntry* next = it->next;
free(it);
it = next;
}
memset(&pool_header, 0, sizeof(pool_header));
pool_header.pool = NULL;
}
static inline uint32_t size_to_subblock_count(size_t size) {
uint32_t required_subblocks = (size / 256);
if(size % 256) required_subblocks += 1;
return required_subblocks;
}
static inline uint32_t subblock_from_pointer(void* p) {
uint8_t* ptr = (uint8_t*) p;
return (ptr - pool_header.base_address) / 256;
}
static inline void block_and_offset_from_subblock(size_t sb, size_t* b, uint8_t* off) {
*b = sb / 8;
*off = (sb % 8);
}
void* alloc_malloc(void* pool, size_t size) {
DBG_MSG("Allocating: %d\n", size);
size_t start_subblock, required_subblocks;
void* ret = alloc_next_available_ex(pool, size, &start_subblock, &required_subblocks);
if(ret) {
size_t block;
uint8_t offset;
block_and_offset_from_subblock(start_subblock, &block, &offset);
uint8_t mask = 0;
DBG_MSG("Alloc: size: %d, rs: %d, sb: %d, b: %d, off: %d\n", size, required_subblocks, start_subblock, start_subblock / 8, start_subblock % 8);
/* Toggle any bits for the first block */
int c = (required_subblocks < 8) ? required_subblocks : 8;
for(int i = 0; i < c; ++i) {
mask |= (1 << (7 - (offset + i)));
required_subblocks--;
}
if(mask) {
pool_header.block_usage[block++] |= mask;
}
/* Fill any full blocks in the middle of the allocation */
while(required_subblocks > 8) {
pool_header.block_usage[block++] = 255;
required_subblocks -= 8;
}
/* Fill out any trailing subblocks */
mask = 0;
for(size_t i = 0; i < required_subblocks; ++i) {
mask |= (1 << (7 - i));
}
if(mask) {
pool_header.block_usage[block++] |= mask;
}
/* Insert allocations in the list by size descending so that when we
* defrag we can move the larger blocks before the smaller ones without
* much effort */
struct AllocEntry* new_entry = (struct AllocEntry*) malloc(sizeof(struct AllocEntry));
new_entry->pointer = ret;
new_entry->size = size;
new_entry->next = NULL;
struct AllocEntry* it = pool_header.allocations;
struct AllocEntry* last = NULL;
if(!it) {
pool_header.allocations = new_entry;
} else {
while(it) {
if(it->size < size) {
if(last) {
last->next = new_entry;
} else {
pool_header.allocations = new_entry;
}
new_entry->next = it;
break;
} else if(!it->next) {
it->next = new_entry;
new_entry->next = NULL;
break;
}
last = it;
it = it->next;
}
}
}
DBG_MSG("Alloc done\n");
return ret;
}
static void alloc_release_blocks(struct AllocEntry* it) {
size_t used_subblocks = size_to_subblock_count(it->size);
size_t subblock = subblock_from_pointer(it->pointer);
size_t block;
uint8_t offset;
block_and_offset_from_subblock(subblock, &block, &offset);
uint8_t mask = 0;
DBG_MSG("Free: size: %d, us: %d, sb: %d, off: %d\n", it->size, used_subblocks, block, offset);
/* Wipe out any leading subblocks */
int c = (used_subblocks < 8) ? used_subblocks : 8;
for(int i = 0; i < c; ++i) {
mask |= (1 << (7 - (offset + i)));
used_subblocks--;
}
if(mask) {
pool_header.block_usage[block++] &= ~mask;
}
/* Clear any full blocks in the middle of the allocation */
while(used_subblocks > 8) {
pool_header.block_usage[block++] = 0;
used_subblocks -= 8;
}
/* Wipe out any trailing subblocks */
mask = 0;
for(size_t i = 0; i < used_subblocks; ++i) {
mask |= (1 << (7 - i));
}
if(mask) {
pool_header.block_usage[block++] &= ~mask;
}
}
void alloc_free(void* pool, void* p) {
(void) pool;
struct AllocEntry* it = pool_header.allocations;
struct AllocEntry* last = NULL;
while(it) {
if(it->pointer == p) {
alloc_release_blocks(it);
if(last) {
last->next = it->next;
} else {
assert(it == pool_header.allocations);
pool_header.allocations = it->next;
}
DBG_MSG("Freed: size: %d, us: %d, sb: %d, off: %d\n", it->size, used_subblocks, block, offset);
free(it);
break;
}
last = it;
it = it->next;
}
DBG_MSG("Free done\n");
}
void alloc_run_defrag(void* pool, defrag_address_move callback, int max_iterations, void* user_data) {
for(int i = 0; i < max_iterations; ++i) {
bool move_occurred = false;
struct AllocEntry* it = pool_header.allocations;
if(!it) {
return;
}
while(it) {
void* potential_dest = alloc_next_available(pool, it->size);
if(potential_dest < it->pointer) {
potential_dest = alloc_malloc(pool, it->size);
memcpy(potential_dest, it->pointer, it->size);
/* Mark this block as now free, but don't fiddle with the
* allocation list */
alloc_release_blocks(it);
callback(it->pointer, potential_dest, user_data);
it->pointer = potential_dest;
move_occurred = true;
}
it = it->next;
}
if(!move_occurred) {
return;
}
}
}
static inline uint8_t count_ones(uint8_t byte) {
static const uint8_t NIBBLE_LOOKUP [16] = {
0, 1, 1, 2, 1, 2, 2, 3,
1, 2, 2, 3, 2, 3, 3, 4
};
return NIBBLE_LOOKUP[byte & 0x0F] + NIBBLE_LOOKUP[byte >> 4];
}
size_t alloc_count_free(void* pool) {
(void) pool;
uint8_t* it = pool_header.block_usage;
uint8_t* end = it + pool_header.block_count;
size_t total_free = 0;
while(it < end) {
total_free += count_ones(*it) * 256;
++it;
}
return total_free;
}
size_t alloc_count_continuous(void* pool) {
(void) pool;
size_t largest_block = 0;
uint8_t* it = pool_header.block_usage;
uint8_t* end = it + pool_header.block_count;
size_t current_block = 0;
while(it < end) {
uint8_t t = *it++;
if(!t) {
current_block += 2048;
} else {
for(int i = 7; i >= 0; --i) {
bool bitset = (t & (1 << i));
if(bitset) {
current_block += (7 - i) * 256;
if(largest_block < current_block) {
largest_block = current_block;
current_block = 0;
}
}
}
}
}
return largest_block;
}

29
GL/alloc/alloc.h Normal file
View File

@ -0,0 +1,29 @@
#pragma once
#include <stdbool.h>
#include <stdint.h>
#include <stddef.h>
#ifdef __cplusplus
extern "C" {
#endif
int alloc_init(void* pool, size_t size);
void alloc_shutdown(void* pool);
void *alloc_malloc(void* pool, size_t size);
void alloc_free(void* pool, void* p);
typedef void (defrag_address_move)(void*, void*, void*);
void alloc_run_defrag(void* pool, defrag_address_move callback, int max_iterations, void* user_data);
size_t alloc_count_free(void* pool);
size_t alloc_count_continuous(void* pool);
void* alloc_next_available(void* pool, size_t required_size);
void* alloc_base_address(void* pool);
size_t alloc_block_count(void* pool);
#ifdef __cplusplus
}
#endif

163
GL/draw.c
View File

@ -3,10 +3,36 @@
#include <string.h>
#include <stdlib.h>
#include <math.h>
#include <limits.h>
#include "private.h"
#include "platform.h"
GLushort _quantize( GLfloat v ) {
union { GLfloat f; GLuint ui; } u = {v};
GLuint ui = u.ui;
int s = (ui >> 16) & 0x8000;
int em = ui & 0x7fffffff;
int h = (em - (112 << 23) + (1 << 12)) >> 13;
h = (em < (113 << 23)) ? 0 : h;
h = (em >= (143 << 23)) ? 0x7c00 : h;
h = (em > (255 << 23)) ? 0x7e00 : h;
return (GLushort)(s | h);
}
GLfloat _dequantize( GLushort h ) {
GLuint s = (GLuint) (h & 0x8000) << 16;
int em = h & 0x7fff;
int r = (em + (112 << 10)) << 13;
r = (em < (1 << 10)) ? 0 : r;
r += (em >= (31 << 10)) ? (112 << 23) : 0;
union { GLfloat f; GLuint ui; } u;
u.ui = s | r;
return u.f;
}
AttribPointerList ATTRIB_POINTERS;
GLuint ENABLED_VERTEX_ATTRIBUTES = 0;
@ -62,6 +88,7 @@ GL_FORCE_INLINE GLsizei byte_size(GLenum type) {
case GL_INT: return sizeof(GLint);
case GL_UNSIGNED_INT: return sizeof(GLuint);
case GL_DOUBLE: return sizeof(GLdouble);
case GL_HALF_FLOAT: return sizeof(GLhalf);
case GL_UNSIGNED_INT_2_10_10_10_REV: return sizeof(GLuint);
case GL_FLOAT:
default: return sizeof(GLfloat);
@ -78,7 +105,7 @@ static void _readVertexData3f3f(const GLubyte* __restrict__ in, GLubyte* __restr
// 10:10:10:2REV format
static void _readVertexData1i3f(const GLubyte* in, GLubyte* out) {
const static float MULTIPLIER = 1.0f / 1023.0f;
static const float MULTIPLIER = 1.0f / 1023.0f;
GLfloat* output = (GLfloat*) out;
@ -108,6 +135,15 @@ static void _readVertexData3us3f(const GLubyte* in, GLubyte* out) {
output[2] = input[2];
}
static void _readVertexData3usq3f(const GLubyte* in, GLubyte* out) {
const GLushort* input = (const GLushort*) in;
float* output = (float*) out;
output[0] = _dequantize(input[0]);
output[1] = _dequantize(input[1]);
output[2] = _dequantize(input[2]);
}
static void _readVertexData3ui3f(const GLubyte* in, GLubyte* out) {
const GLuint* input = (const GLuint*) in;
float* output = (float*) out;
@ -126,6 +162,15 @@ static void _readVertexData3ub3f(const GLubyte* input, GLubyte* out) {
output[2] = input[2] * ONE_OVER_TWO_FIVE_FIVE;
}
static void _readVertexData3f16_3f(const GLubyte* in, GLubyte* out) {
const GLhalf* input = (const GLhalf*) in;
float* output = (float*) out;
output[0] = input[0];
output[1] = input[1];
output[2] = input[2];
}
static void _readVertexData2f2f(const GLubyte* in, GLubyte* out) {
vec2cpy(out, in);
}
@ -159,8 +204,25 @@ static void _readVertexData2us2f(const GLubyte* in, GLubyte* out) {
const GLushort* input = (const GLushort*) in;
float* output = (float*) out;
output[0] = input[0];
output[1] = input[1];
output[0] = (float)input[0] / SHRT_MAX;
output[1] = (float)input[1] / SHRT_MAX;
}
static void _readVertexData2usq3f(const GLubyte* in, GLubyte* out) {
const GLushort* input = (const GLushort*) in;
float* output = (float*) out;
output[0] = _dequantize(input[0]);
output[1] = _dequantize(input[1]);
output[2] = 0.0f;
}
static void _readVertexData2usq2f(const GLubyte* in, GLubyte* out) {
const GLushort* input = (const GLushort*) in;
float* output = (float*) out;
output[0] = _dequantize(input[0]);
output[1] = _dequantize(input[1]);
}
static void _readVertexData2ui2f(const GLubyte* in, GLubyte* out) {
@ -178,6 +240,14 @@ static void _readVertexData2ub2f(const GLubyte* input, GLubyte* out) {
output[1] = input[1] * ONE_OVER_TWO_FIVE_FIVE;
}
static void _readVertexData2f16_2f(const GLubyte* in, GLubyte* out) {
const GLhalf* input = (const GLhalf*) in;
float* output = (float*) out;
output[0] = input[0];
output[1] = input[1];
}
static void _readVertexData2ui3f(const GLubyte* in, GLubyte* out) {
const GLuint* input = (const GLuint*) in;
float* output = (float*) out;
@ -187,6 +257,15 @@ static void _readVertexData2ui3f(const GLubyte* in, GLubyte* out) {
output[2] = 0.0f;
}
static void _readVertexData2f16_3f(const GLubyte* in, GLubyte* out) {
const GLhalf* input = (const GLhalf*) in;
float* output = (float*) out;
output[0] = input[0];
output[1] = input[1];
output[2] = 0.0f;
}
static void _readVertexData4ubARGB(const GLubyte* input, GLubyte* output) {
output[R8IDX] = input[0];
output[G8IDX] = input[1];
@ -239,7 +318,7 @@ static void _fillWithNegZVE(const GLubyte* __restrict__ input, GLubyte* __restri
float x, y, z;
} V;
const static V NegZ = {0.0f, 0.0f, -1.0f};
static const V NegZ = {0.0f, 0.0f, -1.0f};
*((V*) out) = NegZ;
}
@ -391,12 +470,12 @@ GL_FORCE_INLINE void transformNormalToEyeSpace(GLfloat* normal) {
}
GL_FORCE_INLINE PolyHeader *_glSubmissionTargetHeader(SubmissionTarget* target) {
gl_assert(target->header_offset < target->output->vector.size);
gl_assert(target->header_offset < aligned_vector_size(&target->output->vector));
return aligned_vector_at(&target->output->vector, target->header_offset);
}
GL_INLINE_DEBUG Vertex* _glSubmissionTargetStart(SubmissionTarget* target) {
gl_assert(target->start_offset < target->output->vector.size);
gl_assert(target->start_offset < aligned_vector_size(&target->output->vector));
return aligned_vector_at(&target->output->vector, target->start_offset);
}
@ -492,14 +571,17 @@ ReadPositionFunc calcReadPositionFunc() {
case GL_FLOAT:
return (ATTRIB_POINTERS.vertex.size == 3) ? _readVertexData3f3f:
_readVertexData2f3f;
case GL_HALF_FLOAT:
return (ATTRIB_POINTERS.vertex.size == 3) ? _readVertexData3f16_3f:
_readVertexData2f16_3f;
case GL_BYTE:
case GL_UNSIGNED_BYTE:
return (ATTRIB_POINTERS.vertex.size == 3) ? _readVertexData3ub3f:
_readVertexData2ub3f;
case GL_SHORT:
case GL_UNSIGNED_SHORT:
return (ATTRIB_POINTERS.vertex.size == 3) ? _readVertexData3us3f:
_readVertexData2us3f;
return (ATTRIB_POINTERS.vertex.size == 3) ? _readVertexData3usq3f:
_readVertexData2usq3f;
case GL_INT:
case GL_UNSIGNED_INT:
return (ATTRIB_POINTERS.vertex.size == 3) ? _readVertexData3ui3f:
@ -517,12 +599,14 @@ ReadUVFunc calcReadUVFunc() {
case GL_DOUBLE:
case GL_FLOAT:
return _readVertexData2f2f;
case GL_HALF_FLOAT:
return _readVertexData2f16_2f;
case GL_BYTE:
case GL_UNSIGNED_BYTE:
return _readVertexData2ub2f;
case GL_SHORT:
case GL_UNSIGNED_SHORT:
return _readVertexData2us2f;
return _readVertexData2usq2f;
case GL_INT:
case GL_UNSIGNED_INT:
return _readVertexData2ui2f;
@ -539,12 +623,14 @@ ReadUVFunc calcReadSTFunc() {
case GL_DOUBLE:
case GL_FLOAT:
return _readVertexData2f2f;
case GL_HALF_FLOAT:
return _readVertexData2f16_2f;
case GL_BYTE:
case GL_UNSIGNED_BYTE:
return _readVertexData2ub2f;
case GL_SHORT:
case GL_UNSIGNED_SHORT:
return _readVertexData2us2f;
return _readVertexData2usq2f;
case GL_INT:
case GL_UNSIGNED_INT:
return _readVertexData2ui2f;
@ -561,6 +647,8 @@ ReadNormalFunc calcReadNormalFunc() {
case GL_DOUBLE:
case GL_FLOAT:
return _readVertexData3f3f;
case GL_HALF_FLOAT:
return _readVertexData3f16_3f;
break;
case GL_BYTE:
case GL_UNSIGNED_BYTE:
@ -568,7 +656,7 @@ ReadNormalFunc calcReadNormalFunc() {
break;
case GL_SHORT:
case GL_UNSIGNED_SHORT:
return _readVertexData3us3f;
return _readVertexData3usq3f;
break;
case GL_INT:
case GL_UNSIGNED_INT:
@ -585,7 +673,6 @@ static void _readPositionData(ReadDiffuseFunc func, const GLuint first, const GL
const GLubyte* vptr = ((GLubyte*) ATTRIB_POINTERS.vertex.ptr + (first * vstride));
float pos[3];
float w = 0.0f;
ITERATE(count) {
PREFETCH(vptr + vstride);
@ -726,9 +813,7 @@ typedef struct {
} Float2;
static const Float3 F3Z = {0.0f, 0.0f, 1.0f};
static const Float3 F3ZERO = {0.0f, 0.0f, 0.0f};
static const Float2 F2ZERO = {0.0f, 0.0f};
static const uint32_t U4ONE = ~0;
static void generateElementsFastPath(
SubmissionTarget* target, const GLsizei first, const GLuint count,
@ -815,17 +900,15 @@ static void generateElementsFastPath(
#define POLYMODE QUADS
#define PROCESS_VERTEX_FLAGS(it, i) { \
if((i + 1) % 4 == 0) { \
Vertex* prev = ((it) - 1); \
Vertex t = (*prev); \
*(prev) = *((it)); \
*((it)) = t; \
prev->flags = GPU_CMD_VERTEX; \
it->flags = GPU_CMD_VERTEX; \
if(((i + 1) % 4) == 0) { \
Vertex t = *it; \
*it = *(it - 1); \
*(it - 1) = t; \
it->flags = GPU_CMD_VERTEX_EOL; \
} else { \
it->flags = GPU_CMD_VERTEX; \
} \
}
#include "draw_fastpath.inc"
#undef PROCESS_VERTEX_FLAGS
#undef POLYMODE
@ -912,24 +995,6 @@ static void transform(SubmissionTarget* target) {
TransformVertices(vertex, target->count);
}
static void mat_transform3(const float* xyz, const float* xyzOut, const uint32_t count, const uint32_t inStride, const uint32_t outStride) {
const uint8_t* dataIn = (const uint8_t*) xyz;
uint8_t* dataOut = (uint8_t*) xyzOut;
ITERATE(count) {
const float* in = (const float*) dataIn;
float* out = (float*) dataOut;
TransformVec3NoMod(
in,
out
);
dataIn += inStride;
dataOut += outStride;
}
}
static void mat_transform_normal3(const float* xyz, const float* xyzOut, const uint32_t count, const uint32_t inStride, const uint32_t outStride) {
const uint8_t* dataIn = (const uint8_t*) xyz;
uint8_t* dataOut = (uint8_t*) xyzOut;
@ -1170,6 +1235,7 @@ void _glInitSubmissionTarget() {
GL_FORCE_INLINE void submitVertices(GLenum mode, GLsizei first, GLuint count, GLenum type, const GLvoid* indices) {
SubmissionTarget* const target = &SUBMISSION_TARGET;
AlignedVector* const extras = target->extras;
@ -1210,17 +1276,22 @@ GL_FORCE_INLINE void submitVertices(GLenum mode, GLsizei first, GLuint count, GL
return;
}
GLboolean header_required = (target->output->vector.size == 0) || _glGPUStateIsDirty();
// We don't handle this any further, so just make sure we never pass it down */
gl_assert(mode != GL_POLYGON);
target->output = _glActivePolyList();
target->count = (mode == GL_TRIANGLE_FAN) ? ((count - 2) * 3) : count;
target->header_offset = target->output->vector.size;
target->start_offset = target->header_offset + (header_required);
gl_assert(target->output);
gl_assert(extras);
uint32_t vector_size = aligned_vector_size(&target->output->vector);
GLboolean header_required = (vector_size == 0) || _glGPUStateIsDirty();
target->count = (mode == GL_TRIANGLE_FAN) ? ((count - 2) * 3) : count;
target->header_offset = vector_size;
target->start_offset = target->header_offset + (header_required ? 1 : 0);
gl_assert(target->start_offset >= target->header_offset);
gl_assert(target->count);
/* Make sure we have enough room for all the "extra" data */

View File

@ -5,75 +5,123 @@
MAKE_FUNC(POLYMODE)
{
const Vertex* const start = _glSubmissionTargetStart(target);
const VertexExtra* const ve_start = aligned_vector_at(target->extras, 0);
const GLuint vstride = ATTRIB_POINTERS.vertex.stride;
GLuint uvstride = ATTRIB_POINTERS.uv.stride;
GLuint ststride = ATTRIB_POINTERS.st.stride;
GLuint dstride = ATTRIB_POINTERS.colour.stride;
GLuint nstride = ATTRIB_POINTERS.normal.stride;
const GLubyte* pos = (ENABLED_VERTEX_ATTRIBUTES & VERTEX_ENABLED_FLAG) ? ATTRIB_POINTERS.vertex.ptr + (first * vstride) : NULL;
const GLubyte* uv = (ENABLED_VERTEX_ATTRIBUTES & UV_ENABLED_FLAG) ? ATTRIB_POINTERS.uv.ptr + (first * uvstride) : NULL;
const GLubyte* col = (ENABLED_VERTEX_ATTRIBUTES & DIFFUSE_ENABLED_FLAG) ? ATTRIB_POINTERS.colour.ptr + (first * dstride) : NULL;
const GLubyte* st = (ENABLED_VERTEX_ATTRIBUTES & ST_ENABLED_FLAG) ? ATTRIB_POINTERS.st.ptr + (first * ststride) : NULL;
const GLubyte* n = (ENABLED_VERTEX_ATTRIBUTES & NORMAL_ENABLED_FLAG) ? ATTRIB_POINTERS.normal.ptr + (first * nstride) : NULL;
const float w = 1.0f;
if(!pos) {
static const float w = 1.0f;
if(!(ENABLED_VERTEX_ATTRIBUTES & VERTEX_ENABLED_FLAG)) {
/* If we don't have vertices, do nothing */
return;
}
if(!col) {
col = (GLubyte*) &U4ONE;
dstride = 0;
}
/* This is the best value we have. PROCESS_VERTEX_FLAGS needs to operate on quads and tris and so
this need to be divisible by 4 and 3. Even though we should be able to go much higher than this
and still be cache-local, trial and error says otherwise... */
if(!uv) {
uv = (GLubyte*) &F2ZERO;
uvstride = 0;
}
#define BATCH_SIZE 60
if(!st) {
st = (GLubyte*) &F2ZERO;
ststride = 0;
}
GLuint min = 0;
GLuint stride;
const GLubyte* ptr;
Vertex* it;
VertexExtra* ve;
if(!n) {
n = (GLubyte*) &F3Z;
nstride = 0;
}
VertexExtra* ve = (VertexExtra*) ve_start;
Vertex* it = (Vertex*) start;
for(min = 0; min < count; min += BATCH_SIZE) {
const Vertex* start = ((Vertex*) _glSubmissionTargetStart(target)) + min;
const int_fast32_t loop = ((min + BATCH_SIZE) > count) ? count - min : BATCH_SIZE;
const int offset = (first + min);
for(int_fast32_t i = 0; i < count; ++i) {
TransformVertex((const float*) pos, &w, it->xyz, &it->w);
pos += vstride;
PREFETCH(pos);
stride = ATTRIB_POINTERS.uv.stride;
ptr = (ENABLED_VERTEX_ATTRIBUTES & UV_ENABLED_FLAG) ? ATTRIB_POINTERS.uv.ptr + ((first + min) * stride) : NULL;
it = (Vertex*) start;
*((Float2*) it->uv) = *((Float2*) uv);
uv += uvstride;
PREFETCH(uv);
if(ptr) {
PREFETCH(ptr);
for(int_fast32_t i = 0; i < loop; ++i, ++it) {
PREFETCH(ptr + stride);
it->uv[0] = ((float*) ptr)[0];
it->uv[1] = ((float*) ptr)[1];
ptr += stride;
}
} else {
for(int_fast32_t i = 0; i < loop; ++i, ++it) {
it->uv[0] = 0;
it->uv[1] = 0;
}
}
*((uint32_t*) it->bgra) = *((uint32_t*) col);
col += dstride;
PREFETCH(col);
stride = ATTRIB_POINTERS.colour.stride;
ptr = (ENABLED_VERTEX_ATTRIBUTES & DIFFUSE_ENABLED_FLAG) ? ATTRIB_POINTERS.colour.ptr + (offset * stride) : NULL;
it = (Vertex*) start;
*((Float2*) ve->st) = *((Float2*) st);
st += ststride;
PREFETCH(st);
if(ptr) {
PREFETCH(ptr);
for(int_fast32_t i = 0; i < loop; ++i, ++it) {
PREFETCH(ptr + stride);
it->bgra[0] = ptr[0];
it->bgra[1] = ptr[1];
it->bgra[2] = ptr[2];
it->bgra[3] = ptr[3];
ptr += stride;
}
} else {
for(int_fast32_t i = 0; i < loop; ++i, ++it) {
*((uint32_t*) it->bgra) = ~0;
}
}
*((Float3*) ve->nxyz) = *((Float3*) n);
n += nstride;
PREFETCH(n);
stride = ATTRIB_POINTERS.vertex.stride;
ptr = ATTRIB_POINTERS.vertex.ptr + (offset * stride);
it = (Vertex*) start;
PROCESS_VERTEX_FLAGS(it, i);
PREFETCH(ptr);
for(int_fast32_t i = 0; i < loop; ++i, ++it) {
PREFETCH(ptr + stride);
TransformVertex((const float*) ptr, &w, it->xyz, &it->w);
PROCESS_VERTEX_FLAGS(it, min + i);
ptr += stride;
}
++it;
++ve;
start = aligned_vector_at(target->extras, min);
stride = ATTRIB_POINTERS.st.stride;
ptr = (ENABLED_VERTEX_ATTRIBUTES & ST_ENABLED_FLAG) ? ATTRIB_POINTERS.st.ptr + (offset * stride) : NULL;
ve = (VertexExtra*) start;
if(ptr) {
PREFETCH(ptr);
for(int_fast32_t i = 0; i < loop; ++i, ++ve) {
PREFETCH(ptr + stride);
ve->st[0] = ((float*) ptr)[0];
ve->st[1] = ((float*) ptr)[1];
ptr += stride;
}
} else {
for(int_fast32_t i = 0; i < loop; ++i, ++ve) {
ve->st[0] = 0;
ve->st[1] = 0;
}
}
stride = ATTRIB_POINTERS.normal.stride;
ptr = (ENABLED_VERTEX_ATTRIBUTES & NORMAL_ENABLED_FLAG) ? ATTRIB_POINTERS.normal.ptr + (offset * stride) : NULL;
ve = (VertexExtra*) start;
if(ptr) {
PREFETCH(ptr);
for(int_fast32_t i = 0; i < loop; ++i, ++ve) {
PREFETCH(ptr + stride);
ve->nxyz[0] = ((float*) ptr)[0];
ve->nxyz[1] = ((float*) ptr)[1];
ve->nxyz[2] = ((float*) ptr)[2];
ptr += stride;
}
} else {
for(int_fast32_t i = 0; i < loop; ++i, ++ve) {
ve->nxyz[0] = 0;
ve->nxyz[1] = 0;
ve->nxyz[2] = 0;
}
}
}
}

View File

@ -46,10 +46,22 @@ void APIENTRY glKosInitConfig(GLdcConfig* config) {
config->initial_pt_capacity = 512 * 3;
config->initial_tr_capacity = 1024 * 3;
config->initial_immediate_capacity = 1024 * 3;
config->internal_palette_format = GL_RGBA8;
// RGBA4444 is the fastest general format - 8888 will cause a perf issue
config->internal_palette_format = GL_RGBA4;
config->texture_twiddle = GL_TRUE;
}
static bool _initialized = false;
void APIENTRY glKosInitEx(GLdcConfig* config) {
if(_initialized) {
return;
}
_initialized = true;
TRACE();
printf("\nWelcome to GLdc! Git revision: %s\n\n", GLDC_VERSION);
@ -70,6 +82,10 @@ void APIENTRY glKosInitEx(GLdcConfig* config) {
_glInitTextures();
if(config->texture_twiddle) {
glEnable(GL_TEXTURE_TWIDDLE_KOS);
}
OP_LIST.list_type = GPU_LIST_OP_POLY;
PT_LIST.list_type = GPU_LIST_PT_POLY;
TR_LIST.list_type = GPU_LIST_TR_POLY;
@ -83,6 +99,12 @@ void APIENTRY glKosInitEx(GLdcConfig* config) {
aligned_vector_reserve(&TR_LIST.vector, config->initial_tr_capacity);
}
void APIENTRY glKosShutdown() {
aligned_vector_clear(&OP_LIST.vector);
aligned_vector_clear(&PT_LIST.vector);
aligned_vector_clear(&TR_LIST.vector);
}
void APIENTRY glKosInit() {
GLdcConfig config;
glKosInitConfig(&config);
@ -93,21 +115,21 @@ void APIENTRY glKosSwapBuffers() {
TRACE();
SceneBegin();
if(OP_LIST.vector.size > 2) {
if(aligned_vector_header(&OP_LIST.vector)->size > 2) {
SceneListBegin(GPU_LIST_OP_POLY);
SceneListSubmit(OP_LIST.vector.data, OP_LIST.vector.size);
SceneListSubmit((Vertex*) aligned_vector_front(&OP_LIST.vector), aligned_vector_size(&OP_LIST.vector));
SceneListFinish();
}
if(PT_LIST.vector.size > 2) {
if(aligned_vector_header(&PT_LIST.vector)->size > 2) {
SceneListBegin(GPU_LIST_PT_POLY);
SceneListSubmit(PT_LIST.vector.data, PT_LIST.vector.size);
SceneListSubmit((Vertex*) aligned_vector_front(&PT_LIST.vector), aligned_vector_size(&PT_LIST.vector));
SceneListFinish();
}
if(TR_LIST.vector.size > 2) {
if(aligned_vector_header(&TR_LIST.vector)->size > 2) {
SceneListBegin(GPU_LIST_TR_POLY);
SceneListSubmit(TR_LIST.vector.data, TR_LIST.vector.size);
SceneListSubmit((Vertex*) aligned_vector_front(&TR_LIST.vector), aligned_vector_size(&TR_LIST.vector));
SceneListFinish();
}
SceneFinish();
@ -117,4 +139,4 @@ void APIENTRY glKosSwapBuffers() {
aligned_vector_clear(&TR_LIST.vector);
_glApplyScissor(true);
}
}

View File

@ -254,7 +254,7 @@ GLboolean _glGenerateMipmapTwiddled(const GLuint pvrFormat, const GLubyte* prevD
return GL_TRUE;
}
void APIENTRY glGenerateMipmapEXT(GLenum target) {
void APIENTRY glGenerateMipmap(GLenum target) {
if(target != GL_TEXTURE_2D) {
_glKosThrowError(GL_INVALID_OPERATION, __func__);
return;
@ -334,7 +334,7 @@ GLAPI GLvoid APIENTRY gluBuild2DMipmaps(GLenum target, GLint internalFormat,
unsigned byte data, and finally the data itself. */
glTexImage2D(GL_TEXTURE_2D, 0, 3, width, height, 0, GL_RGB, GL_UNSIGNED_BYTE, data);
glGenerateMipmapEXT(GL_TEXTURE_2D);
glGenerateMipmap(GL_TEXTURE_2D);
}
GLenum APIENTRY glCheckFramebufferStatusEXT(GLenum target) {

View File

@ -17,10 +17,10 @@ extern inline GLuint _glRecalcFastPath();
GLboolean IMMEDIATE_MODE_ACTIVE = GL_FALSE;
static GLenum ACTIVE_POLYGON_MODE = GL_TRIANGLES;
static GLfloat NORMAL[3] = {0.0f, 0.0f, 1.0f};
static GLubyte COLOR[4] = {255, 255, 255, 255}; /* ARGB order for speed */
static GLfloat UV_COORD[2] = {0.0f, 0.0f};
static GLfloat ST_COORD[2] = {0.0f, 0.0f};
static GLfloat __attribute__((aligned(32))) NORMAL[3] = {0.0f, 0.0f, 1.0f};
static GLubyte __attribute__((aligned(32))) COLOR[4] = {255, 255, 255, 255}; /* ARGB order for speed */
static GLfloat __attribute__((aligned(32))) UV_COORD[2] = {0.0f, 0.0f};
static GLfloat __attribute__((aligned(32))) ST_COORD[2] = {0.0f, 0.0f};
static AlignedVector VERTICES;
static AttribPointerList IM_ATTRIBS;
@ -30,7 +30,7 @@ static AttribPointerList IM_ATTRIBS;
can be applied faster */
static GLuint IM_ENABLED_VERTEX_ATTRIBUTES = 0;
typedef struct {
typedef struct __attribute__((aligned(32))) {
GLfloat x;
GLfloat y;
GLfloat z;
@ -50,7 +50,7 @@ void _glInitImmediateMode(GLuint initial_size) {
aligned_vector_init(&VERTICES, sizeof(IMVertex));
aligned_vector_reserve(&VERTICES, initial_size);
IM_ATTRIBS.vertex.ptr = VERTICES.data;
IM_ATTRIBS.vertex.ptr = aligned_vector_front(&VERTICES);
IM_ATTRIBS.vertex.size = 3;
IM_ATTRIBS.vertex.type = GL_FLOAT;
IM_ATTRIBS.vertex.stride = sizeof(IMVertex);
@ -161,31 +161,27 @@ void APIENTRY glColor3fv(const GLfloat* v) {
void APIENTRY glVertex3f(GLfloat x, GLfloat y, GLfloat z) {
IM_ENABLED_VERTEX_ATTRIBUTES |= VERTEX_ENABLED_FLAG;
unsigned int cap = VERTICES.capacity;
IMVertex* vert = aligned_vector_extend(&VERTICES, 1);
if(cap != VERTICES.capacity) {
/* Resizing could've invalidated the pointers */
IM_ATTRIBS.vertex.ptr = VERTICES.data;
IM_ATTRIBS.uv.ptr = IM_ATTRIBS.vertex.ptr + (sizeof(GLfloat) * 3);
IM_ATTRIBS.st.ptr = IM_ATTRIBS.vertex.ptr + (sizeof(GLfloat) * 5);
IM_ATTRIBS.colour.ptr = IM_ATTRIBS.vertex.ptr + (sizeof(GLfloat) * 7);
IM_ATTRIBS.normal.ptr = IM_ATTRIBS.vertex.ptr + (sizeof(GLfloat) * 7) + sizeof(uint32_t);
}
/* Resizing could've invalidated the pointers */
IM_ATTRIBS.vertex.ptr = VERTICES.data;
IM_ATTRIBS.uv.ptr = IM_ATTRIBS.vertex.ptr + 12;
IM_ATTRIBS.st.ptr = IM_ATTRIBS.uv.ptr + 8;
IM_ATTRIBS.colour.ptr = IM_ATTRIBS.st.ptr + 8;
IM_ATTRIBS.normal.ptr = IM_ATTRIBS.colour.ptr + 4;
vert->x = x;
vert->y = y;
vert->z = z;
vert->u = UV_COORD[0];
vert->v = UV_COORD[1];
vert->s = ST_COORD[0];
vert->t = ST_COORD[1];
*((uint32_t*) vert->bgra) = *((uint32_t*) COLOR);
vert->nx = NORMAL[0];
vert->ny = NORMAL[1];
vert->nz = NORMAL[2];
uint32_t* dest = (uint32_t*) &vert->x;
*(dest++) = *((uint32_t*) &x);
*(dest++) = *((uint32_t*) &y);
*(dest++) = *((uint32_t*) &z);
*(dest++) = *((uint32_t*) &UV_COORD[0]);
*(dest++) = *((uint32_t*) &UV_COORD[1]);
*(dest++) = *((uint32_t*) &ST_COORD[0]);
*(dest++) = *((uint32_t*) &ST_COORD[1]);
*(dest++) = *((uint32_t*) COLOR);
*(dest++) = *((uint32_t*) &NORMAL[0]);
*(dest++) = *((uint32_t*) &NORMAL[1]);
*(dest++) = *((uint32_t*) &NORMAL[2]);
}
void APIENTRY glVertex3fv(const GLfloat* v) {
@ -281,7 +277,7 @@ void APIENTRY glEnd() {
FAST_PATH_ENABLED = GL_TRUE;
#endif
glDrawArrays(ACTIVE_POLYGON_MODE, 0, VERTICES.size);
glDrawArrays(ACTIVE_POLYGON_MODE, 0, aligned_vector_header(&VERTICES)->size);
ATTRIB_POINTERS = stashed_attrib_pointers;

View File

@ -124,8 +124,10 @@ void APIENTRY glLightModeli(GLenum pname, const GLint param) {
void APIENTRY glLightModelfv(GLenum pname, const GLfloat *params) {
switch(pname) {
case GL_LIGHT_MODEL_AMBIENT: {
_glSetLightModelSceneAmbient(params);
_glPrecalcLightingValues(SCENE_AMBIENT_MASK);
if(memcmp(_glGetLightModelSceneAmbient(), params, sizeof(float) * 4) != 0) {
_glSetLightModelSceneAmbient(params);
_glPrecalcLightingValues(SCENE_AMBIENT_MASK);
}
} break;
case GL_LIGHT_MODEL_LOCAL_VIEWER:
_glSetLightModelViewerInEyeCoordinates((*params) ? GL_TRUE : GL_FALSE);
@ -164,18 +166,28 @@ void APIENTRY glLightfv(GLenum light, GLenum pname, const GLfloat *params) {
LightSource* l = _glLightAt(idx);
GLboolean rebuild = GL_FALSE;
switch(pname) {
case GL_AMBIENT:
memcpy(l->ambient, params, sizeof(GLfloat) * 4);
rebuild = memcmp(l->ambient, params, sizeof(GLfloat) * 4) != 0;
if(rebuild) {
memcpy(l->ambient, params, sizeof(GLfloat) * 4);
}
break;
case GL_DIFFUSE:
memcpy(l->diffuse, params, sizeof(GLfloat) * 4);
rebuild = memcmp(l->diffuse, params, sizeof(GLfloat) * 4) != 0;
if(rebuild) {
memcpy(l->diffuse, params, sizeof(GLfloat) * 4);
}
break;
case GL_SPECULAR:
memcpy(l->specular, params, sizeof(GLfloat) * 4);
rebuild = memcmp(l->specular, params, sizeof(GLfloat) * 4) != 0;
if(rebuild) {
memcpy(l->specular, params, sizeof(GLfloat) * 4);
}
break;
case GL_POSITION: {
_glMatrixLoadModelView();
memcpy(l->position, params, sizeof(GLfloat) * 4);
l->isDirectional = params[3] == 0.0f;
@ -183,6 +195,7 @@ void APIENTRY glLightfv(GLenum light, GLenum pname, const GLfloat *params) {
if(l->isDirectional) {
//FIXME: Do we need to rotate directional lights?
} else {
_glMatrixLoadModelView();
TransformVec3(l->position);
}
}
@ -204,7 +217,10 @@ void APIENTRY glLightfv(GLenum light, GLenum pname, const GLfloat *params) {
return;
}
_glPrecalcLightingValues(mask);
if(rebuild) {
_glPrecalcLightingValues(mask);
}
}
void APIENTRY glLightf(GLenum light, GLenum pname, GLfloat param) {
@ -258,25 +274,47 @@ void APIENTRY glMaterialfv(GLenum face, GLenum pname, const GLfloat *params) {
Material* material = _glActiveMaterial();
GLboolean rebuild = GL_FALSE;
switch(pname) {
case GL_SHININESS:
glMaterialf(face, pname, *params);
rebuild = GL_TRUE;
break;
case GL_AMBIENT:
vec4cpy(material->ambient, params);
break;
case GL_AMBIENT: {
if(memcmp(material->ambient, params, sizeof(float) * 4) != 0) {
vec4cpy(material->ambient, params);
rebuild = GL_TRUE;
}
} break;
case GL_DIFFUSE:
vec4cpy(material->diffuse, params);
if(memcmp(material->diffuse, params, sizeof(float) * 4) != 0) {
vec4cpy(material->diffuse, params);
rebuild = GL_TRUE;
}
break;
case GL_SPECULAR:
vec4cpy(material->specular, params);
if(memcmp(material->specular, params, sizeof(float) * 4) != 0) {
vec4cpy(material->specular, params);
rebuild = GL_TRUE;
}
break;
case GL_EMISSION:
vec4cpy(material->emissive, params);
if(memcmp(material->emissive, params, sizeof(float) * 4) != 0) {
vec4cpy(material->emissive, params);
rebuild = GL_TRUE;
}
break;
case GL_AMBIENT_AND_DIFFUSE: {
vec4cpy(material->ambient, params);
vec4cpy(material->diffuse, params);
rebuild = (
memcmp(material->ambient, params, sizeof(float) * 4) != 0 ||
memcmp(material->diffuse, params, sizeof(float) * 4) != 0
);
if(rebuild) {
vec4cpy(material->ambient, params);
vec4cpy(material->diffuse, params);
}
} break;
case GL_COLOR_INDEXES:
default: {
@ -285,13 +323,15 @@ void APIENTRY glMaterialfv(GLenum face, GLenum pname, const GLfloat *params) {
}
}
GLuint updateMask = (pname == GL_AMBIENT) ? AMBIENT_MASK:
(pname == GL_DIFFUSE) ? DIFFUSE_MASK:
(pname == GL_SPECULAR) ? SPECULAR_MASK:
(pname == GL_EMISSION) ? EMISSION_MASK:
(pname == GL_AMBIENT_AND_DIFFUSE) ? AMBIENT_MASK | DIFFUSE_MASK : 0;
if(rebuild) {
GLuint updateMask = (pname == GL_AMBIENT) ? AMBIENT_MASK:
(pname == GL_DIFFUSE) ? DIFFUSE_MASK:
(pname == GL_SPECULAR) ? SPECULAR_MASK:
(pname == GL_EMISSION) ? EMISSION_MASK:
(pname == GL_AMBIENT_AND_DIFFUSE) ? AMBIENT_MASK | DIFFUSE_MASK : 0;
_glPrecalcLightingValues(updateMask);
_glPrecalcLightingValues(updateMask);
}
}
void APIENTRY glColorMaterial(GLenum face, GLenum mode) {

View File

@ -13,8 +13,8 @@
GLfloat DEPTH_RANGE_MULTIPLIER_L = (1 - 0) / 2;
GLfloat DEPTH_RANGE_MULTIPLIER_H = (0 + 1) / 2;
static Stack MATRIX_STACKS[3]; // modelview, projection, texture
static Matrix4x4 NORMAL_MATRIX __attribute__((aligned(32)));
static Stack __attribute__((aligned(32))) MATRIX_STACKS[4]; // modelview, projection, texture
static Matrix4x4 __attribute__((aligned(32))) NORMAL_MATRIX;
Viewport VIEWPORT = {
0, 0, 640, 480, 320.0f, 240.0f, 320.0f, 240.0f
@ -23,7 +23,7 @@ Viewport VIEWPORT = {
static GLenum MATRIX_MODE = GL_MODELVIEW;
static GLubyte MATRIX_IDX = 0;
static const Matrix4x4 IDENTITY = {
static const Matrix4x4 __attribute__((aligned(32))) IDENTITY = {
1.0f, 0.0f, 0.0f, 0.0f,
0.0f, 1.0f, 0.0f, 0.0f,
0.0f, 0.0f, 1.0f, 0.0f,
@ -106,7 +106,11 @@ void APIENTRY glMatrixMode(GLenum mode) {
}
void APIENTRY glPushMatrix() {
stack_push(MATRIX_STACKS + MATRIX_IDX, stack_top(MATRIX_STACKS + MATRIX_IDX));
void* top = stack_top(MATRIX_STACKS + MATRIX_IDX);
assert(top);
void* ret = stack_push(MATRIX_STACKS + MATRIX_IDX, top);
(void) ret;
assert(ret);
}
void APIENTRY glPopMatrix() {
@ -127,10 +131,16 @@ void APIENTRY glTranslatef(GLfloat x, GLfloat y, GLfloat z) {
0.0f, 0.0f, 1.0f, 0.0f,
x, y, z, 1.0f
};
void* top = stack_top(MATRIX_STACKS + MATRIX_IDX);
assert(top);
UploadMatrix4x4(stack_top(MATRIX_STACKS + MATRIX_IDX));
UploadMatrix4x4(top);
MultiplyMatrix4x4(&trn);
DownloadMatrix4x4(stack_top(MATRIX_STACKS + MATRIX_IDX));
top = stack_top(MATRIX_STACKS + MATRIX_IDX);
assert(top);
DownloadMatrix4x4(top);
if(MATRIX_MODE == GL_MODELVIEW) {
recalculateNormalMatrix();
@ -200,28 +210,9 @@ void APIENTRY glRotatef(GLfloat angle, GLfloat x, GLfloat y, GLfloat z) {
/* Load an arbitrary matrix */
void APIENTRY glLoadMatrixf(const GLfloat *m) {
static Matrix4x4 TEMP;
TEMP[M0] = m[0];
TEMP[M1] = m[1];
TEMP[M2] = m[2];
TEMP[M3] = m[3];
TEMP[M4] = m[4];
TEMP[M5] = m[5];
TEMP[M6] = m[6];
TEMP[M7] = m[7];
TEMP[M8] = m[8];
TEMP[M9] = m[9];
TEMP[M10] = m[10];
TEMP[M11] = m[11];
TEMP[M12] = m[12];
TEMP[M13] = m[13];
TEMP[M14] = m[14];
TEMP[M15] = m[15];
static Matrix4x4 __attribute__((aligned(32))) TEMP;
memcpy(TEMP, m, sizeof(float) * 16);
stack_replace(MATRIX_STACKS + MATRIX_IDX, TEMP);
if(MATRIX_MODE == GL_MODELVIEW) {
@ -289,18 +280,10 @@ void APIENTRY glFrustum(GLfloat left, GLfloat right,
/* Multiply the current matrix by an arbitrary matrix */
void glMultMatrixf(const GLfloat *m) {
Matrix4x4 TEMP __attribute__((aligned(32)));
const Matrix4x4 *pMatrix;
if (((GLint)m)&0xf){ /* Unaligned matrix */
pMatrix = &TEMP;
MEMCPY4(TEMP, m, sizeof(Matrix4x4));
}
else{
pMatrix = (const Matrix4x4*) m;
}
MEMCPY4(TEMP, m, sizeof(Matrix4x4));
UploadMatrix4x4(stack_top(MATRIX_STACKS + MATRIX_IDX));
MultiplyMatrix4x4(pMatrix);
MultiplyMatrix4x4(&TEMP);
DownloadMatrix4x4(stack_top(MATRIX_STACKS + MATRIX_IDX));
if(MATRIX_MODE == GL_MODELVIEW) {
@ -426,7 +409,7 @@ GL_FORCE_INLINE void vec3f_normalize_sh4(float *v){
void gluLookAt(GLfloat eyex, GLfloat eyey, GLfloat eyez, GLfloat centerx,
GLfloat centery, GLfloat centerz, GLfloat upx, GLfloat upy,
GLfloat upz) {
GLfloat m [16];
GLfloat m [16] __attribute__((aligned(32)));
GLfloat f [3];
GLfloat u [3];
GLfloat s [3];

View File

@ -5,6 +5,7 @@
#include <stdbool.h>
#include "gl_assert.h"
#include "types.h"
#define MEMSET(dst, v, size) memset((dst), (v), (size))
@ -260,7 +261,7 @@ typedef float Matrix4x4[16];
void SceneBegin();
void SceneListBegin(GPUList list);
void SceneListSubmit(void* src, int n);
void SceneListSubmit(Vertex* v2, int n);
void SceneListFinish();
void SceneFinish();

View File

@ -9,9 +9,7 @@
#define likely(x) __builtin_expect(!!(x), 1)
#define unlikely(x) __builtin_expect(!!(x), 0)
#define SQ_BASE_ADDRESS 0xe0000000
static volatile uint32_t* PVR_LMMODE0 = (uint32_t*) 0xA05F6884;
#define SQ_BASE_ADDRESS (void*) 0xe0000000
GL_FORCE_INLINE bool glIsVertex(const float flags) {
@ -33,14 +31,23 @@ void InitGPU(_Bool autosort, _Bool fsaa) {
};
pvr_init(&params);
/* If we're PAL and we're NOT VGA, then use 50hz by default. This is the safest
thing to do. If someone wants to force 60hz then they can call vid_set_mode later and hopefully
that'll work... */
int cable = vid_check_cable();
int region = flashrom_get_region();
if(region == FLASHROM_REGION_EUROPE && cable != CT_VGA) {
printf("PAL region without VGA - enabling 50hz");
vid_set_mode(DM_640x480_PAL_IL, PM_RGB565);
}
}
void SceneBegin() {
pvr_wait_ready();
pvr_scene_begin();
QACR0 = 0x11; /* Enable the direct texture path by setting the higher two bits */
QACR1 = 0x11;
}
void SceneListBegin(GPUList list) {
@ -52,380 +59,399 @@ GL_FORCE_INLINE float _glFastInvert(float x) {
}
GL_FORCE_INLINE void _glPerspectiveDivideVertex(Vertex* vertex, const float h) {
TRACE();
const float f = _glFastInvert(vertex->w);
/* Convert to NDC and apply viewport */
vertex->xyz[0] = __builtin_fmaf(
VIEWPORT.hwidth, vertex->xyz[0] * f, VIEWPORT.x_plus_hwidth
);
vertex->xyz[1] = h - __builtin_fmaf(
VIEWPORT.hheight, vertex->xyz[1] * f, VIEWPORT.y_plus_hheight
);
vertex->xyz[0] = (vertex->xyz[0] * f * 320) + 320;
vertex->xyz[1] = (vertex->xyz[1] * f * -240) + 240;
/* Orthographic projections need to use invZ otherwise we lose
the depth information. As w == 1, and clip-space range is -w to +w
we add 1.0 to the Z to bring it into range. We add a little extra to
avoid a divide by zero.
*/
vertex->xyz[2] = (vertex->w == 1.0f) ? _glFastInvert(1.0001f + vertex->xyz[2]) : f;
if(vertex->w == 1.0f) {
vertex->xyz[2] = _glFastInvert(1.0001f + vertex->xyz[2]);
} else {
vertex->xyz[2] = f;
}
}
GL_FORCE_INLINE void _glSubmitHeaderOrVertex(uint32_t* d, const Vertex* v) {
#ifndef NDEBUG
gl_assert(!isnan(v->xyz[2]));
gl_assert(!isnan(v->w));
#endif
#if CLIP_DEBUG
printf("Submitting: %x (%x)\n", v, v->flags);
#endif
volatile uint32_t *sq = SQ_BASE_ADDRESS;
uint32_t *s = (uint32_t*) v;
__asm__("pref @%0" : : "r"(s + 8)); /* prefetch 32 bytes for next loop */
d[0] = *(s++);
d[1] = *(s++);
d[2] = *(s++);
d[3] = *(s++);
d[4] = *(s++);
d[5] = *(s++);
d[6] = *(s++);
d[7] = *(s++);
__asm__("pref @%0" : : "r"(d));
d += 8;
static inline void _glFlushBuffer() {
TRACE();
/* Wait for both store queues to complete */
sq = (uint32_t*) 0xe0000000;
sq[0] = sq[8] = 0;
}
static struct __attribute__((aligned(32))) {
Vertex* v;
int visible;
} triangle[3];
static inline void _glPushHeaderOrVertex(Vertex* v) {
TRACE();
static int tri_count = 0;
static int strip_count = 0;
GL_FORCE_INLINE void interpolateColour(const uint32_t* a, const uint32_t* b, const float t, uint32_t* out) {
const static uint32_t MASK1 = 0x00FF00FF;
const static uint32_t MASK2 = 0xFF00FF00;
const uint32_t f2 = 256 * t;
const uint32_t f1 = 256 - f2;
*out = (((((*a & MASK1) * f1) + ((*b & MASK1) * f2)) >> 8) & MASK1) |
(((((*a & MASK2) * f1) + ((*b & MASK2) * f2)) >> 8) & MASK2);
uint32_t* s = (uint32_t*) v;
sq[0] = *(s++);
sq[1] = *(s++);
sq[2] = *(s++);
sq[3] = *(s++);
sq[4] = *(s++);
sq[5] = *(s++);
sq[6] = *(s++);
sq[7] = *(s++);
__asm__("pref @%0" : : "r"(sq));
sq += 8;
}
static inline void _glClipEdge(const Vertex* v1, const Vertex* v2, Vertex* vout) {
/* Clipping time! */
static inline void _glClipEdge(const Vertex* const v1, const Vertex* const v2, Vertex* vout) {
const static float o = 0.003921569f; // 1 / 255
const float d0 = v1->w + v1->xyz[2];
const float d1 = v2->w + v2->xyz[2];
const float sign = ((2.0f * (d1 < d0)) - 1.0f);
const float epsilon = -0.00001f * sign;
const float n = (d0 - d1);
const float r = (1.f / sqrtf(n * n)) * sign;
float t = fmaf(r, d0, epsilon);
const float t = (fabs(d0) * (1.0f / sqrtf((d1 - d0) * (d1 - d0)))) + 0.000001f;
const float invt = 1.0f - t;
vout->xyz[0] = fmaf(v2->xyz[0] - v1->xyz[0], t, v1->xyz[0]);
vout->xyz[1] = fmaf(v2->xyz[1] - v1->xyz[1], t, v1->xyz[1]);
vout->xyz[2] = fmaf(v2->xyz[2] - v1->xyz[2], t, v1->xyz[2]);
vout->w = fmaf(v2->w - v1->w, t, v1->w);
vout->xyz[0] = invt * v1->xyz[0] + t * v2->xyz[0];
vout->xyz[1] = invt * v1->xyz[1] + t * v2->xyz[1];
vout->xyz[2] = invt * v1->xyz[2] + t * v2->xyz[2];
vout->uv[0] = fmaf(v2->uv[0] - v1->uv[0], t, v1->uv[0]);
vout->uv[1] = fmaf(v2->uv[1] - v1->uv[1], t, v1->uv[1]);
vout->uv[0] = invt * v1->uv[0] + t * v2->uv[0];
vout->uv[1] = invt * v1->uv[1] + t * v2->uv[1];
interpolateColour((uint32_t*) v1->bgra, (uint32_t*) v2->bgra, t, (uint32_t*) vout->bgra);
}
vout->w = invt * v1->w + t * v2->w;
GL_FORCE_INLINE void ClearTriangle() {
tri_count = 0;
}
const float m = 255 * t;
const float n = 255 - m;
GL_FORCE_INLINE void ShiftTriangle() {
if(!tri_count) {
return;
}
tri_count--;
triangle[0] = triangle[1];
triangle[1] = triangle[2];
#ifndef NDEBUG
triangle[2].v = NULL;
triangle[2].visible = false;
#endif
}
GL_FORCE_INLINE void ShiftRotateTriangle() {
if(!tri_count) {
return;
}
if(triangle[0].v < triangle[1].v) {
triangle[0] = triangle[2];
} else {
triangle[1] = triangle[2];
}
tri_count--;
vout->bgra[0] = (v1->bgra[0] * n + v2->bgra[0] * m) * o;
vout->bgra[1] = (v1->bgra[1] * n + v2->bgra[1] * m) * o;
vout->bgra[2] = (v1->bgra[2] * n + v2->bgra[2] * m) * o;
vout->bgra[3] = (v1->bgra[3] * n + v2->bgra[3] * m) * o;
}
#define SPAN_SORT_CFG 0x005F8030
static volatile uint32_t* PVR_LMMODE0 = (uint32_t*) 0xA05F6884;
static volatile uint32_t *PVR_LMMODE1 = (uint32_t*) 0xA05F6888;
static volatile uint32_t *QACR = (uint32_t*) 0xFF000038;
void SceneListSubmit(Vertex* v2, int n) {
TRACE();
/* You need at least a header, and 3 vertices to render anything */
if(n < 4) {
return;
}
void SceneListSubmit(void* src, int n) {
const float h = GetVideoMode()->height;
PVR_SET(SPAN_SORT_CFG, 0x0);
uint32_t *d = (uint32_t*) SQ_BASE_ADDRESS;
*PVR_LMMODE0 = 0x0; /* Enable 64bit mode */
//Set PVR DMA registers
*PVR_LMMODE0 = 0;
*PVR_LMMODE1 = 0;
Vertex __attribute__((aligned(32))) tmp;
/* Perform perspective divide on each vertex */
Vertex* vertex = (Vertex*) src;
if(!_glNearZClippingEnabled()) {
/* Prep store queues */
for(int i = 0; i < n; ++i, ++vertex) {
PREFETCH(vertex + 1);
if(glIsVertex(vertex->flags)) {
_glPerspectiveDivideVertex(vertex, h);
}
_glSubmitHeaderOrVertex(d, vertex);
}
/* Wait for both store queues to complete */
d = (uint32_t *) SQ_BASE_ADDRESS;
d[0] = d[8] = 0;
return;
}
tri_count = 0;
strip_count = 0;
//Set QACR registers
QACR[1] = QACR[0] = 0x11;
#if CLIP_DEBUG
printf("----\n");
Vertex* vertex = (Vertex*) src;
for(int i = 0; i < n; ++i) {
fprintf(stderr, "{%f, %f, %f, %f}, // %x (%x)\n", vertex[i].xyz[0], vertex[i].xyz[1], vertex[i].xyz[2], vertex[i].w, vertex[i].flags, &vertex[i]);
}
fprintf(stderr, "----\n");
#endif
uint8_t visible_mask = 0;
uint8_t counter = 0;
for(int i = 0; i < n; ++i, ++vertex) {
PREFETCH(vertex + 12);
sq = SQ_BASE_ADDRESS;
/* Wait until we fill the triangle */
if(tri_count < 3) {
if(glIsVertex(vertex->flags)) {
++strip_count;
triangle[tri_count].v = vertex;
triangle[tri_count].visible = vertex->xyz[2] >= -vertex->w;
if(++tri_count < 3) {
continue;
}
} else {
/* We hit a header */
tri_count = 0;
strip_count = 0;
_glSubmitHeaderOrVertex(d, vertex);
for(int i = 0; i < n; ++i, ++v2) {
PREFETCH(v2 + 1);
switch(v2->flags) {
case GPU_CMD_VERTEX_EOL:
if(counter < 2) {
continue;
}
}
counter = 0;
break;
case GPU_CMD_VERTEX:
++counter;
if(counter < 3) {
continue;
}
break;
default:
_glPushHeaderOrVertex(v2);
counter = 0;
continue;
};
#if CLIP_DEBUG
printf("SC: %d\n", strip_count);
#endif
Vertex* const v0 = v2 - 2;
Vertex* const v1 = v2 - 1;
/* If we got here, then triangle contains 3 vertices */
int visible_mask = triangle[0].visible | (triangle[1].visible << 1) | (triangle[2].visible << 2);
/* Clipping time!
There are 6 distinct possibilities when clipping a triangle. 3 of them result
in another triangle, 3 of them result in a quadrilateral.
Assuming you iterate the edges of the triangle in order, and create a new *visible*
vertex when you cross the plane, and discard vertices behind the plane, then the only
difference between the two cases is that the final two vertices that need submitting have
to be reversed.
Unfortunately we have to copy vertices here, because if we persp-divide a vertex it may
be used in a subsequent triangle in the strip and would end up being double divided.
*/
#define SUBMIT_QUEUED() \
if(strip_count > 3) { \
tmp = *(vertex - 2); \
/* If we had triangles ahead of this one, submit and finalize */ \
_glPerspectiveDivideVertex(&tmp, h); \
_glSubmitHeaderOrVertex(d, &tmp); \
tmp = *(vertex - 1); \
tmp.flags = GPU_CMD_VERTEX_EOL; \
_glPerspectiveDivideVertex(&tmp, h); \
_glSubmitHeaderOrVertex(d, &tmp); \
}
bool is_last_in_strip = glIsLastVertex(vertex->flags);
visible_mask = (
(v0->xyz[2] > -v0->w) << 0 |
(v1->xyz[2] > -v1->w) << 1 |
(v2->xyz[2] > -v2->w) << 2 |
(counter == 0) << 3
);
switch(visible_mask) {
case 1: {
SUBMIT_QUEUED();
/* 0, 0a, 2a */
tmp = *triangle[0].v;
tmp.flags = GPU_CMD_VERTEX;
_glPerspectiveDivideVertex(&tmp, h);
_glSubmitHeaderOrVertex(d, &tmp);
case 15: /* All visible, but final vertex in strip */
{
_glPerspectiveDivideVertex(v0, h);
_glPushHeaderOrVertex(v0);
_glClipEdge(triangle[0].v, triangle[1].v, &tmp);
tmp.flags = GPU_CMD_VERTEX;
_glPerspectiveDivideVertex(&tmp, h);
_glSubmitHeaderOrVertex(d, &tmp);
_glPerspectiveDivideVertex(v1, h);
_glPushHeaderOrVertex(v1);
_glClipEdge(triangle[2].v, triangle[0].v, &tmp);
tmp.flags = GPU_CMD_VERTEX_EOL;
_glPerspectiveDivideVertex(&tmp, h);
_glSubmitHeaderOrVertex(d, &tmp);
} break;
case 2: {
SUBMIT_QUEUED();
/* 0a, 1, 1a */
_glClipEdge(triangle[0].v, triangle[1].v, &tmp);
tmp.flags = GPU_CMD_VERTEX;
_glPerspectiveDivideVertex(&tmp, h);
_glSubmitHeaderOrVertex(d, &tmp);
_glPerspectiveDivideVertex(v2, h);
_glPushHeaderOrVertex(v2);
}
break;
case 7:
/* All visible, push the first vertex and move on */
_glPerspectiveDivideVertex(v0, h);
_glPushHeaderOrVertex(v0);
break;
case 9:
/* First vertex was visible, last in strip */
{
Vertex __attribute__((aligned(32))) scratch[2];
Vertex* a = &scratch[0];
Vertex* b = &scratch[1];
tmp = *triangle[1].v;
tmp.flags = GPU_CMD_VERTEX;
_glPerspectiveDivideVertex(&tmp, h);
_glSubmitHeaderOrVertex(d, &tmp);
_glClipEdge(v0, v1, a);
a->flags = GPU_CMD_VERTEX;
_glClipEdge(triangle[1].v, triangle[2].v, &tmp);
tmp.flags = GPU_CMD_VERTEX_EOL;
_glPerspectiveDivideVertex(&tmp, h);
_glSubmitHeaderOrVertex(d, &tmp);
} break;
case 3: {
SUBMIT_QUEUED();
/* 0, 1, 2a, 1a */
tmp = *triangle[0].v;
tmp.flags = GPU_CMD_VERTEX;
_glPerspectiveDivideVertex(&tmp, h);
_glSubmitHeaderOrVertex(d, &tmp);
_glClipEdge(v2, v0, b);
b->flags = GPU_CMD_VERTEX_EOL;
tmp = *triangle[1].v;
tmp.flags = GPU_CMD_VERTEX;
_glPerspectiveDivideVertex(&tmp, h);
_glSubmitHeaderOrVertex(d, &tmp);
_glPerspectiveDivideVertex(v0, h);
_glPushHeaderOrVertex(v0);
_glClipEdge(triangle[2].v, triangle[0].v, &tmp);
tmp.flags = GPU_CMD_VERTEX;
_glPerspectiveDivideVertex(&tmp, h);
_glSubmitHeaderOrVertex(d, &tmp);
_glPerspectiveDivideVertex(a, h);
_glPushHeaderOrVertex(a);
_glClipEdge(triangle[1].v, triangle[2].v, &tmp);
tmp.flags = GPU_CMD_VERTEX_EOL;
_glPerspectiveDivideVertex(&tmp, h);
_glSubmitHeaderOrVertex(d, &tmp);
} break;
case 4: {
SUBMIT_QUEUED();
/* 1a, 2, 2a */
_glClipEdge(triangle[1].v, triangle[2].v, &tmp);
tmp.flags = GPU_CMD_VERTEX;
_glPerspectiveDivideVertex(&tmp, h);
_glSubmitHeaderOrVertex(d, &tmp);
_glPerspectiveDivideVertex(b, h);
_glPushHeaderOrVertex(b);
}
break;
case 1:
/* First vertex was visible, but not last in strip */
{
Vertex __attribute__((aligned(32))) scratch[2];
Vertex* a = &scratch[0];
Vertex* b = &scratch[1];
tmp = *triangle[2].v;
tmp.flags = GPU_CMD_VERTEX;
_glPerspectiveDivideVertex(&tmp, h);
_glSubmitHeaderOrVertex(d, &tmp);
_glClipEdge(v0, v1, a);
a->flags = GPU_CMD_VERTEX;
_glClipEdge(triangle[2].v, triangle[0].v, &tmp);
tmp.flags = GPU_CMD_VERTEX_EOL;
_glPerspectiveDivideVertex(&tmp, h);
_glSubmitHeaderOrVertex(d, &tmp);
} break;
case 5: {
SUBMIT_QUEUED();
/* 0, 0a, 2, 1a */
tmp = *triangle[0].v;
tmp.flags = GPU_CMD_VERTEX;
_glPerspectiveDivideVertex(&tmp, h);
_glSubmitHeaderOrVertex(d, &tmp);
_glClipEdge(v2, v0, b);
b->flags = GPU_CMD_VERTEX;
_glClipEdge(triangle[0].v, triangle[1].v, &tmp);
tmp.flags = GPU_CMD_VERTEX;
_glPerspectiveDivideVertex(&tmp, h);
_glSubmitHeaderOrVertex(d, &tmp);
_glPerspectiveDivideVertex(v0, h);
_glPushHeaderOrVertex(v0);
tmp = *triangle[2].v;
tmp.flags = GPU_CMD_VERTEX;
_glPerspectiveDivideVertex(&tmp, h);
_glSubmitHeaderOrVertex(d, &tmp);
_glPerspectiveDivideVertex(a, h);
_glPushHeaderOrVertex(a);
_glClipEdge(triangle[1].v, triangle[2].v, &tmp);
tmp.flags = GPU_CMD_VERTEX_EOL;
_glPerspectiveDivideVertex(&tmp, h);
_glSubmitHeaderOrVertex(d, &tmp);
} break;
case 6: {
SUBMIT_QUEUED();
/* 0a, 1, 2a, 2 */
_glClipEdge(triangle[0].v, triangle[1].v, &tmp);
tmp.flags = GPU_CMD_VERTEX;
_glPerspectiveDivideVertex(&tmp, h);
_glSubmitHeaderOrVertex(d, &tmp);
_glPerspectiveDivideVertex(b, h);
_glPushHeaderOrVertex(b);
_glPushHeaderOrVertex(b);
}
break;
case 10:
case 2:
/* Second vertex was visible. In self case we need to create a triangle and produce
two new vertices: 1-2, and 2-3. */
{
Vertex __attribute__((aligned(32))) scratch[3];
Vertex* a = &scratch[0];
Vertex* b = &scratch[1];
Vertex* c = &scratch[2];
tmp = *triangle[1].v;
tmp.flags = GPU_CMD_VERTEX;
_glPerspectiveDivideVertex(&tmp, h);
_glSubmitHeaderOrVertex(d, &tmp);
memcpy_vertex(c, v1);
_glClipEdge(triangle[2].v, triangle[0].v, &tmp);
tmp.flags = GPU_CMD_VERTEX;
_glPerspectiveDivideVertex(&tmp, h);
_glSubmitHeaderOrVertex(d, &tmp);
_glClipEdge(v0, v1, a);
a->flags = GPU_CMD_VERTEX;
tmp = *triangle[2].v;
tmp.flags = GPU_CMD_VERTEX_EOL;
_glPerspectiveDivideVertex(&tmp, h);
_glSubmitHeaderOrVertex(d, &tmp);
} break;
case 7: {
/* All the vertices are visible! We divide and submit v0, then shift */
_glPerspectiveDivideVertex(vertex - 2, h);
_glSubmitHeaderOrVertex(d, vertex - 2);
_glClipEdge(v1, v2, b);
b->flags = v2->flags;
if(is_last_in_strip) {
_glPerspectiveDivideVertex(vertex - 1, h);
_glSubmitHeaderOrVertex(d, vertex - 1);
_glPerspectiveDivideVertex(vertex, h);
_glSubmitHeaderOrVertex(d, vertex);
tri_count = 0;
strip_count = 0;
_glPerspectiveDivideVertex(a, h);
_glPushHeaderOrVertex(a);
_glPerspectiveDivideVertex(c, h);
_glPushHeaderOrVertex(c);
_glPerspectiveDivideVertex(b, h);
_glPushHeaderOrVertex(b);
}
break;
case 11:
case 3: /* First and second vertex were visible */
{
Vertex __attribute__((aligned(32))) scratch[3];
Vertex* a = &scratch[0];
Vertex* b = &scratch[1];
Vertex* c = &scratch[2];
memcpy_vertex(c, v1);
_glClipEdge(v2, v0, b);
b->flags = GPU_CMD_VERTEX;
_glPerspectiveDivideVertex(v0, h);
_glPushHeaderOrVertex(v0);
_glClipEdge(v1, v2, a);
a->flags = v2->flags;
_glPerspectiveDivideVertex(c, h);
_glPushHeaderOrVertex(c);
_glPerspectiveDivideVertex(b, h);
_glPushHeaderOrVertex(b);
_glPerspectiveDivideVertex(a, h);
_glPushHeaderOrVertex(c);
_glPushHeaderOrVertex(a);
}
break;
case 12:
case 4:
/* Third vertex was visible. */
{
Vertex __attribute__((aligned(32))) scratch[3];
Vertex* a = &scratch[0];
Vertex* b = &scratch[1];
Vertex* c = &scratch[2];
memcpy_vertex(c, v2);
_glClipEdge(v2, v0, a);
a->flags = GPU_CMD_VERTEX;
_glClipEdge(v1, v2, b);
b->flags = GPU_CMD_VERTEX;
_glPerspectiveDivideVertex(a, h);
_glPushHeaderOrVertex(a);
if(counter % 2 == 1) {
_glPushHeaderOrVertex(a);
}
ShiftRotateTriangle();
continue;
} break;
case 0:
default:
break;
}
_glPerspectiveDivideVertex(b, h);
_glPushHeaderOrVertex(b);
/* If this was the last in the strip, we don't need to
submit anything else, we just wipe the tri_count */
if(is_last_in_strip) {
tri_count = 0;
strip_count = 0;
} else {
ShiftRotateTriangle();
strip_count = 2;
_glPerspectiveDivideVertex(c, h);
_glPushHeaderOrVertex(c);
}
break;
case 13:
{
Vertex __attribute__((aligned(32))) scratch[3];
Vertex* a = &scratch[0];
Vertex* b = &scratch[1];
Vertex* c = &scratch[2];
memcpy_vertex(c, v2);
c->flags = GPU_CMD_VERTEX;
_glClipEdge(v0, v1, a);
a->flags = GPU_CMD_VERTEX;
_glClipEdge(v1, v2, b);
b->flags = GPU_CMD_VERTEX;
_glPerspectiveDivideVertex(v0, h);
_glPushHeaderOrVertex(v0);
_glPerspectiveDivideVertex(a, h);
_glPushHeaderOrVertex(a);
_glPerspectiveDivideVertex(c, h);
_glPushHeaderOrVertex(c);
_glPerspectiveDivideVertex(b, h);
_glPushHeaderOrVertex(b);
c->flags = GPU_CMD_VERTEX_EOL;
_glPushHeaderOrVertex(c);
}
break;
case 5: /* First and third vertex were visible */
{
Vertex __attribute__((aligned(32))) scratch[3];
Vertex* a = &scratch[0];
Vertex* b = &scratch[1];
Vertex* c = &scratch[2];
memcpy_vertex(c, v2);
c->flags = GPU_CMD_VERTEX;
_glClipEdge(v0, v1, a);
a->flags = GPU_CMD_VERTEX;
_glClipEdge(v1, v2, b);
b->flags = GPU_CMD_VERTEX;
_glPerspectiveDivideVertex(v0, h);
_glPushHeaderOrVertex(v0);
_glPerspectiveDivideVertex(a, h);
_glPushHeaderOrVertex(a);
_glPerspectiveDivideVertex(c, h);
_glPushHeaderOrVertex(c);
_glPerspectiveDivideVertex(b, h);
_glPushHeaderOrVertex(b);
_glPushHeaderOrVertex(c);
}
break;
case 14:
case 6: /* Second and third vertex were visible */
{
Vertex __attribute__((aligned(32))) scratch[4];
Vertex* a = &scratch[0];
Vertex* b = &scratch[1];
Vertex* c = &scratch[2];
Vertex* d = &scratch[3];
memcpy_vertex(c, v1);
memcpy_vertex(d, v2);
_glClipEdge(v0, v1, a);
a->flags = GPU_CMD_VERTEX;
_glClipEdge(v2, v0, b);
b->flags = GPU_CMD_VERTEX;
_glPerspectiveDivideVertex(a, h);
_glPushHeaderOrVertex(a);
_glPerspectiveDivideVertex(c, h);
_glPushHeaderOrVertex(c);
_glPerspectiveDivideVertex(b, h);
_glPushHeaderOrVertex(b);
_glPushHeaderOrVertex(c);
_glPerspectiveDivideVertex(d, h);
_glPushHeaderOrVertex(d);
}
break;
case 8:
default:
break;
}
}
/* Wait for both store queues to complete */
d = (uint32_t *)0xe0000000;
d[0] = d[8] = 0;
_glFlushBuffer();
}
void SceneListFinish() {

View File

@ -24,7 +24,7 @@
#define GL_FORCE_INLINE static GL_INLINE_DEBUG
#endif
#define PREFETCH(addr) __asm__("pref @%0" : : "r"((addr)))
#define PREFETCH(addr) __builtin_prefetch((addr))
GL_FORCE_INLINE void* memcpy_fast(void *dest, const void *src, size_t len) {
if(!len) {

View File

@ -10,8 +10,9 @@
#include "software/parameter_equation.h"
#define CLIP_DEBUG 0
#define ZNEAR_CLIPPING_ENABLED 1
static size_t AVAILABLE_VRAM = 16 * 1024 * 1024;
static size_t AVAILABLE_VRAM = 8 * 1024 * 1024;
static Matrix4x4 MATRIX;
static SDL_Window* WINDOW = NULL;
@ -29,83 +30,13 @@ static VideoMode vid_mode = {
#define MIN(x, y) ((x) < (y) ? (x) : (y))
#define MAX(x, y) ((x) > (y) ? (x) : (y))
static void DrawTriangle(Vertex* v0, Vertex* v1, Vertex* v2) {
// Compute triangle bounding box.
int minX = MIN(MIN(v0->xyz[0], v1->xyz[0]), v2->xyz[0]);
int maxX = MAX(MAX(v0->xyz[0], v1->xyz[0]), v2->xyz[0]);
int minY = MIN(MIN(v0->xyz[1], v1->xyz[1]), v2->xyz[1]);
int maxY = MAX(MAX(v0->xyz[1], v1->xyz[1]), v2->xyz[1]);
// Clip to scissor rect.
minX = MAX(minX, 0);
maxX = MIN(maxX, vid_mode.width);
minY = MAX(minY, 0);
maxY = MIN(maxY, vid_mode.height);
// Compute edge equations.
EdgeEquation e0, e1, e2;
EdgeEquationInit(&e0, &v0->xyz[0], &v1->xyz[0]);
EdgeEquationInit(&e1, &v1->xyz[0], &v2->xyz[0]);
EdgeEquationInit(&e2, &v2->xyz[0], &v0->xyz[0]);
float area = 0.5 * (e0.c + e1.c + e2.c);
/* This is very ugly. I don't understand the math properly
* so I just swap the vertex order if something is back-facing
* and we want to render it. Patches welcome! */
#define REVERSE_WINDING() \
Vertex* tv = v0; \
v0 = v1; \
v1 = tv; \
EdgeEquationInit(&e0, &v0->xyz[0], &v1->xyz[0]); \
EdgeEquationInit(&e1, &v1->xyz[0], &v2->xyz[0]); \
EdgeEquationInit(&e2, &v2->xyz[0], &v0->xyz[0]); \
area = 0.5f * (e0.c + e1.c + e2.c) \
// Check if triangle is backfacing.
if(CULL_MODE == GPU_CULLING_CCW) {
if(area < 0) {
return;
}
} else if(CULL_MODE == GPU_CULLING_CW) {
if(area < 0) {
// We only draw front-facing polygons, so swap
// the back to front and draw
REVERSE_WINDING();
} else {
// Front facing, so bail
return;
}
} else if(area < 0) {
/* We're not culling, but this is backfacing, so swap vertices and edges */
REVERSE_WINDING();
}
ParameterEquation r, g, b;
ParameterEquationInit(&r, v0->bgra[2], v1->bgra[2], v2->bgra[2], &e0, &e1, &e2, area);
ParameterEquationInit(&g, v0->bgra[1], v1->bgra[1], v2->bgra[1], &e0, &e1, &e2, area);
ParameterEquationInit(&b, v0->bgra[0], v1->bgra[0], v2->bgra[0], &e0, &e1, &e2, area);
// Add 0.5 to sample at pixel centers.
for (float x = minX + 0.5f, xm = maxX + 0.5f; x <= xm; x += 1.0f)
for (float y = minY + 0.5f, ym = maxY + 0.5f; y <= ym; y += 1.0f)
{
if (EdgeEquationTestPoint(&e0, x, y) && EdgeEquationTestPoint(&e1, x, y) && EdgeEquationTestPoint(&e2, x, y)) {
int rint = ParameterEquationEvaluate(&r, x, y);
int gint = ParameterEquationEvaluate(&g, x, y);
int bint = ParameterEquationEvaluate(&b, x, y);
SDL_SetRenderDrawColor(RENDERER, rint, gint, bint, 255);
SDL_RenderDrawPoint(RENDERER, x, y);
}
}
}
AlignedVector vbuffer;
void InitGPU(_Bool autosort, _Bool fsaa) {
// 32-bit SDL has trouble with the wayland driver for some reason
setenv("SDL_VIDEODRIVER", "x11", 1);
SDL_Init(SDL_INIT_VIDEO | SDL_INIT_EVENTS);
WINDOW = SDL_CreateWindow(
@ -119,6 +50,8 @@ void InitGPU(_Bool autosort, _Bool fsaa) {
RENDERER = SDL_CreateRenderer(
WINDOW, -1, SDL_RENDERER_ACCELERATED
);
aligned_vector_init(&vbuffer, sizeof(SDL_Vertex));
}
void SceneBegin() {
@ -161,7 +94,7 @@ GL_FORCE_INLINE void _glPerspectiveDivideVertex(Vertex* vertex, const float h) {
}
}
GL_FORCE_INLINE void _glSubmitHeaderOrVertex(const Vertex* v) {
GL_FORCE_INLINE void _glPushHeaderOrVertex(const Vertex* v) {
#ifndef NDEBUG
if(glIsVertex(v->flags)) {
gl_assert(!isnan(v->xyz[2]));
@ -176,335 +109,329 @@ GL_FORCE_INLINE void _glSubmitHeaderOrVertex(const Vertex* v) {
BUFFER[vertex_counter++] = *v;
}
static struct {
Vertex* v;
int visible;
} triangle[3];
static inline void _glFlushBuffer() {}
static int tri_count = 0;
static int strip_count = 0;
GL_FORCE_INLINE void interpolateColour(const uint8_t* v1, const uint8_t* v2, const float t, uint8_t* out) {
const int MASK1 = 0x00FF00FF;
const int MASK2 = 0xFF00FF00;
const int f2 = 256 * t;
const int f1 = 256 - f2;
const uint32_t a = *(uint32_t*) v1;
const uint32_t b = *(uint32_t*) v2;
*((uint32_t*) out) = (((((a & MASK1) * f1) + ((b & MASK1) * f2)) >> 8) & MASK1) |
(((((a & MASK2) * f1) + ((b & MASK2) * f2)) >> 8) & MASK2);
}
GL_FORCE_INLINE void _glClipEdge(const Vertex* v1, const Vertex* v2, Vertex* vout) {
/* Clipping time! */
const static float o = 0.003921569f; // 1 / 255
const float d0 = v1->w + v1->xyz[2];
const float d1 = v2->w + v2->xyz[2];
const float t = (fabs(d0) * (1.0f / sqrtf((d1 - d0) * (d1 - d0)))) + 0.000001f;
const float invt = 1.0f - t;
const float epsilon = (d0 < d1) ? -0.00001f : 0.00001f;
vout->xyz[0] = invt * v1->xyz[0] + t * v2->xyz[0];
vout->xyz[1] = invt * v1->xyz[1] + t * v2->xyz[1];
vout->xyz[2] = invt * v1->xyz[2] + t * v2->xyz[2];
float t = (d0 / (d0 - d1)) + epsilon;
vout->uv[0] = invt * v1->uv[0] + t * v2->uv[0];
vout->uv[1] = invt * v1->uv[1] + t * v2->uv[1];
t = (t > 1.0f) ? 1.0f : t;
t = (t < 0.0f) ? 0.0f : t;
vout->w = invt * v1->w + t * v2->w;
vout->xyz[0] = __builtin_fmaf(v2->xyz[0] - v1->xyz[0], t, v1->xyz[0]);
vout->xyz[1] = __builtin_fmaf(v2->xyz[1] - v1->xyz[1], t, v1->xyz[1]);
vout->xyz[2] = __builtin_fmaf(v2->xyz[2] - v1->xyz[2], t, v1->xyz[2]);
vout->w = __builtin_fmaf(v2->w - v1->w, t, v1->w);
const float m = 255 * t;
const float n = 255 - m;
vout->uv[0] = __builtin_fmaf(v2->uv[0] - v1->uv[0], t, v1->uv[0]);
vout->uv[1] = __builtin_fmaf(v2->uv[1] - v1->uv[1], t, v1->uv[1]);
interpolateColour(v1->bgra, v2->bgra, t, vout->bgra);
vout->bgra[0] = (v1->bgra[0] * n + v2->bgra[0] * m) * o;
vout->bgra[1] = (v1->bgra[1] * n + v2->bgra[1] * m) * o;
vout->bgra[2] = (v1->bgra[2] * n + v2->bgra[2] * m) * o;
vout->bgra[3] = (v1->bgra[3] * n + v2->bgra[3] * m) * o;
}
GL_FORCE_INLINE void ClearTriangle() {
tri_count = 0;
}
GL_FORCE_INLINE void ShiftTriangle() {
if(!tri_count) {
void SceneListSubmit(Vertex* v2, int n) {
/* You need at least a header, and 3 vertices to render anything */
if(n < 4) {
return;
}
tri_count--;
triangle[0] = triangle[1];
triangle[1] = triangle[2];
#ifndef NDEBUG
triangle[2].v = NULL;
triangle[2].visible = false;
#endif
}
GL_FORCE_INLINE void ShiftRotateTriangle() {
if(!tri_count) {
return;
}
if(triangle[0].v < triangle[1].v) {
triangle[0] = triangle[2];
} else {
triangle[1] = triangle[2];
}
tri_count--;
}
void SceneListSubmit(void* src, int n) {
/* Perform perspective divide on each vertex */
Vertex* vertex = (Vertex*) src;
const float h = GetVideoMode()->height;
/* If Z-clipping is disabled, just fire everything over to the buffer */
if(!ZNEAR_CLIPPING_ENABLED) {
for(int i = 0; i < n; ++i, ++vertex) {
PREFETCH(vertex + 1);
if(glIsVertex(vertex->flags)) {
_glPerspectiveDivideVertex(vertex, h);
}
_glSubmitHeaderOrVertex(vertex);
}
uint8_t visible_mask = 0;
uint8_t counter = 0;
return;
}
tri_count = 0;
strip_count = 0;
#if CLIP_DEBUG
printf("----\n");
#endif
for(int i = 0; i < n; ++i, ++vertex) {
PREFETCH(vertex + 1);
bool is_last_in_strip = glIsLastVertex(vertex->flags);
/* Wait until we fill the triangle */
if(tri_count < 3) {
if(glIsVertex(vertex->flags)) {
triangle[tri_count].v = vertex;
triangle[tri_count].visible = vertex->xyz[2] >= -vertex->w;
tri_count++;
strip_count++;
} else {
/* We hit a header */
tri_count = 0;
strip_count = 0;
_glSubmitHeaderOrVertex(vertex);
}
if(tri_count < 3) {
for(int i = 0; i < n; ++i, ++v2) {
PREFETCH(v2 + 1);
switch(v2->flags) {
case GPU_CMD_VERTEX_EOL:
if(counter < 2) {
continue;
}
counter = 0;
break;
case GPU_CMD_VERTEX:
++counter;
if(counter < 3) {
continue;
}
break;
default:
_glPushHeaderOrVertex(v2);
counter = 0;
continue;
}
};
Vertex* const v0 = v2 - 2;
Vertex* const v1 = v2 - 1;
visible_mask = (
(v0->xyz[2] > -v0->w) << 0 |
(v1->xyz[2] > -v1->w) << 1 |
(v2->xyz[2] > -v2->w) << 2 |
(counter == 0) << 3
);
switch(visible_mask) {
case 15: /* All visible, but final vertex in strip */
{
_glPerspectiveDivideVertex(v0, h);
_glPushHeaderOrVertex(v0);
_glPerspectiveDivideVertex(v1, h);
_glPushHeaderOrVertex(v1);
_glPerspectiveDivideVertex(v2, h);
_glPushHeaderOrVertex(v2);
}
break;
case 7:
/* All visible, push the first vertex and move on */
_glPerspectiveDivideVertex(v0, h);
_glPushHeaderOrVertex(v0);
break;
case 9:
/* First vertex was visible, last in strip */
{
Vertex __attribute__((aligned(32))) scratch[2];
Vertex* a = &scratch[0];
Vertex* b = &scratch[1];
#if CLIP_DEBUG
printf("SC: %d\n", strip_count);
#endif
_glClipEdge(v0, v1, a);
a->flags = GPU_CMD_VERTEX;
/* If we got here, then triangle contains 3 vertices */
int visible_mask = triangle[0].visible | (triangle[1].visible << 1) | (triangle[2].visible << 2);
if(visible_mask == 7) {
#if CLIP_DEBUG
printf("Visible\n");
#endif
/* All the vertices are visible! We divide and submit v0, then shift */
_glPerspectiveDivideVertex(vertex - 2, h);
_glSubmitHeaderOrVertex(vertex - 2);
_glClipEdge(v2, v0, b);
b->flags = GPU_CMD_VERTEX_EOL;
if(is_last_in_strip) {
_glPerspectiveDivideVertex(vertex - 1, h);
_glSubmitHeaderOrVertex(vertex - 1);
_glPerspectiveDivideVertex(vertex, h);
_glSubmitHeaderOrVertex(vertex);
tri_count = 0;
strip_count = 0;
_glPerspectiveDivideVertex(v0, h);
_glPushHeaderOrVertex(v0);
_glPerspectiveDivideVertex(a, h);
_glPushHeaderOrVertex(a);
_glPerspectiveDivideVertex(b, h);
_glPushHeaderOrVertex(b);
}
break;
case 1:
/* First vertex was visible, but not last in strip */
{
Vertex __attribute__((aligned(32))) scratch[2];
Vertex* a = &scratch[0];
Vertex* b = &scratch[1];
_glClipEdge(v0, v1, a);
a->flags = GPU_CMD_VERTEX;
_glClipEdge(v2, v0, b);
b->flags = GPU_CMD_VERTEX;
_glPerspectiveDivideVertex(v0, h);
_glPushHeaderOrVertex(v0);
_glPerspectiveDivideVertex(a, h);
_glPushHeaderOrVertex(a);
_glPerspectiveDivideVertex(b, h);
_glPushHeaderOrVertex(b);
_glPushHeaderOrVertex(b);
}
break;
case 10:
case 2:
/* Second vertex was visible. In self case we need to create a triangle and produce
two new vertices: 1-2, and 2-3. */
{
Vertex __attribute__((aligned(32))) scratch[3];
Vertex* a = &scratch[0];
Vertex* b = &scratch[1];
Vertex* c = &scratch[2];
memcpy_vertex(c, v1);
_glClipEdge(v0, v1, a);
a->flags = GPU_CMD_VERTEX;
_glClipEdge(v1, v2, b);
b->flags = v2->flags;
_glPerspectiveDivideVertex(a, h);
_glPushHeaderOrVertex(a);
_glPerspectiveDivideVertex(c, h);
_glPushHeaderOrVertex(c);
_glPerspectiveDivideVertex(b, h);
_glPushHeaderOrVertex(b);
}
break;
case 11:
case 3: /* First and second vertex were visible */
{
Vertex __attribute__((aligned(32))) scratch[3];
Vertex* a = &scratch[0];
Vertex* b = &scratch[1];
Vertex* c = &scratch[2];
memcpy_vertex(c, v1);
_glClipEdge(v2, v0, b);
b->flags = GPU_CMD_VERTEX;
_glPerspectiveDivideVertex(v0, h);
_glPushHeaderOrVertex(v0);
_glClipEdge(v1, v2, a);
a->flags = v2->flags;
_glPerspectiveDivideVertex(c, h);
_glPushHeaderOrVertex(c);
_glPerspectiveDivideVertex(b, h);
_glPushHeaderOrVertex(b);
_glPerspectiveDivideVertex(a, h);
_glPushHeaderOrVertex(c);
_glPushHeaderOrVertex(a);
}
break;
case 12:
case 4:
/* Third vertex was visible. */
{
Vertex __attribute__((aligned(32))) scratch[3];
Vertex* a = &scratch[0];
Vertex* b = &scratch[1];
Vertex* c = &scratch[2];
memcpy_vertex(c, v2);
_glClipEdge(v2, v0, a);
a->flags = GPU_CMD_VERTEX;
_glClipEdge(v1, v2, b);
b->flags = GPU_CMD_VERTEX;
_glPerspectiveDivideVertex(a, h);
_glPushHeaderOrVertex(a);
if(counter % 2 == 1) {
_glPushHeaderOrVertex(a);
}
ShiftRotateTriangle();
_glPerspectiveDivideVertex(b, h);
_glPushHeaderOrVertex(b);
} else if(visible_mask) {
/* Clipping time!
_glPerspectiveDivideVertex(c, h);
_glPushHeaderOrVertex(c);
}
break;
case 13:
{
Vertex __attribute__((aligned(32))) scratch[3];
Vertex* a = &scratch[0];
Vertex* b = &scratch[1];
Vertex* c = &scratch[2];
There are 6 distinct possibilities when clipping a triangle. 3 of them result
in another triangle, 3 of them result in a quadrilateral.
memcpy_vertex(c, v2);
c->flags = GPU_CMD_VERTEX;
Assuming you iterate the edges of the triangle in order, and create a new *visible*
vertex when you cross the plane, and discard vertices behind the plane, then the only
difference between the two cases is that the final two vertices that need submitting have
to be reversed.
_glClipEdge(v0, v1, a);
a->flags = GPU_CMD_VERTEX;
Unfortunately we have to copy vertices here, because if we persp-divide a vertex it may
be used in a subsequent triangle in the strip and would end up being double divided.
*/
#if CLIP_DEBUG
printf("Clip: %d, SC: %d\n", visible_mask, strip_count);
printf("%d, %d, %d\n", triangle[0].v - (Vertex*) src - 1, triangle[1].v - (Vertex*) src - 1, triangle[2].v - (Vertex*) src - 1);
#endif
Vertex tmp;
if(strip_count > 3) {
#if CLIP_DEBUG
printf("Flush\n");
#endif
tmp = *(vertex - 2);
/* If we had triangles ahead of this one, submit and finalize */
_glPerspectiveDivideVertex(&tmp, h);
_glSubmitHeaderOrVertex(&tmp);
_glClipEdge(v1, v2, b);
b->flags = GPU_CMD_VERTEX;
tmp = *(vertex - 1);
tmp.flags = GPU_CMD_VERTEX_EOL;
_glPerspectiveDivideVertex(&tmp, h);
_glSubmitHeaderOrVertex(&tmp);
}
_glPerspectiveDivideVertex(v0, h);
_glPushHeaderOrVertex(v0);
switch(visible_mask) {
case 1: {
/* 0, 0a, 2a */
tmp = *triangle[0].v;
tmp.flags = GPU_CMD_VERTEX;
_glPerspectiveDivideVertex(&tmp, h);
_glSubmitHeaderOrVertex(&tmp);
_glPerspectiveDivideVertex(a, h);
_glPushHeaderOrVertex(a);
_glClipEdge(triangle[0].v, triangle[1].v, &tmp);
tmp.flags = GPU_CMD_VERTEX;
_glPerspectiveDivideVertex(&tmp, h);
_glSubmitHeaderOrVertex(&tmp);
_glPerspectiveDivideVertex(c, h);
_glPushHeaderOrVertex(c);
_glPerspectiveDivideVertex(b, h);
_glPushHeaderOrVertex(b);
_glClipEdge(triangle[2].v, triangle[0].v, &tmp);
tmp.flags = GPU_CMD_VERTEX_EOL;
_glPerspectiveDivideVertex(&tmp, h);
_glSubmitHeaderOrVertex(&tmp);
} break;
case 2: {
/* 0a, 1, 1a */
_glClipEdge(triangle[0].v, triangle[1].v, &tmp);
tmp.flags = GPU_CMD_VERTEX;
_glPerspectiveDivideVertex(&tmp, h);
_glSubmitHeaderOrVertex(&tmp);
c->flags = GPU_CMD_VERTEX_EOL;
_glPushHeaderOrVertex(c);
}
break;
case 5: /* First and third vertex were visible */
{
Vertex __attribute__((aligned(32))) scratch[3];
Vertex* a = &scratch[0];
Vertex* b = &scratch[1];
Vertex* c = &scratch[2];
tmp = *triangle[1].v;
tmp.flags = GPU_CMD_VERTEX;
_glPerspectiveDivideVertex(&tmp, h);
_glSubmitHeaderOrVertex(&tmp);
memcpy_vertex(c, v2);
c->flags = GPU_CMD_VERTEX;
_glClipEdge(triangle[1].v, triangle[2].v, &tmp);
tmp.flags = GPU_CMD_VERTEX_EOL;
_glPerspectiveDivideVertex(&tmp, h);
_glSubmitHeaderOrVertex(&tmp);
} break;
case 3: {
/* 0, 1, 2a, 1a */
tmp = *triangle[0].v;
tmp.flags = GPU_CMD_VERTEX;
_glPerspectiveDivideVertex(&tmp, h);
_glSubmitHeaderOrVertex(&tmp);
_glClipEdge(v0, v1, a);
a->flags = GPU_CMD_VERTEX;
tmp = *triangle[1].v;
tmp.flags = GPU_CMD_VERTEX;
_glPerspectiveDivideVertex(&tmp, h);
_glSubmitHeaderOrVertex(&tmp);
_glClipEdge(v1, v2, b);
b->flags = GPU_CMD_VERTEX;
_glClipEdge(triangle[2].v, triangle[0].v, &tmp);
tmp.flags = GPU_CMD_VERTEX;
_glPerspectiveDivideVertex(&tmp, h);
_glSubmitHeaderOrVertex(&tmp);
_glPerspectiveDivideVertex(v0, h);
_glPushHeaderOrVertex(v0);
_glClipEdge(triangle[1].v, triangle[2].v, &tmp);
tmp.flags = GPU_CMD_VERTEX_EOL;
_glPerspectiveDivideVertex(&tmp, h);
_glSubmitHeaderOrVertex(&tmp);
} break;
case 4: {
/* 1a, 2, 2a */
_glClipEdge(triangle[1].v, triangle[2].v, &tmp);
tmp.flags = GPU_CMD_VERTEX;
_glPerspectiveDivideVertex(&tmp, h);
_glSubmitHeaderOrVertex(&tmp);
_glPerspectiveDivideVertex(a, h);
_glPushHeaderOrVertex(a);
tmp = *triangle[2].v;
tmp.flags = GPU_CMD_VERTEX;
_glPerspectiveDivideVertex(&tmp, h);
_glSubmitHeaderOrVertex(&tmp);
_glPerspectiveDivideVertex(c, h);
_glPushHeaderOrVertex(c);
_glPerspectiveDivideVertex(b, h);
_glPushHeaderOrVertex(b);
_glPushHeaderOrVertex(c);
}
break;
case 14:
case 6: /* Second and third vertex were visible */
{
Vertex __attribute__((aligned(32))) scratch[4];
Vertex* a = &scratch[0];
Vertex* b = &scratch[1];
Vertex* c = &scratch[2];
Vertex* d = &scratch[3];
_glClipEdge(triangle[2].v, triangle[0].v, &tmp);
tmp.flags = GPU_CMD_VERTEX_EOL;
_glPerspectiveDivideVertex(&tmp, h);
_glSubmitHeaderOrVertex(&tmp);
} break;
case 5: {
/* 0, 0a, 2, 1a */
tmp = *triangle[0].v;
tmp.flags = GPU_CMD_VERTEX;
_glPerspectiveDivideVertex(&tmp, h);
_glSubmitHeaderOrVertex(&tmp);
memcpy_vertex(c, v1);
memcpy_vertex(d, v2);
_glClipEdge(triangle[0].v, triangle[1].v, &tmp);
tmp.flags = GPU_CMD_VERTEX;
_glPerspectiveDivideVertex(&tmp, h);
_glSubmitHeaderOrVertex(&tmp);
_glClipEdge(v0, v1, a);
a->flags = GPU_CMD_VERTEX;
tmp = *triangle[2].v;
tmp.flags = GPU_CMD_VERTEX;
_glPerspectiveDivideVertex(&tmp, h);
_glSubmitHeaderOrVertex(&tmp);
_glClipEdge(v2, v0, b);
b->flags = GPU_CMD_VERTEX;
_glClipEdge(triangle[1].v, triangle[2].v, &tmp);
tmp.flags = GPU_CMD_VERTEX_EOL;
_glPerspectiveDivideVertex(&tmp, h);
_glSubmitHeaderOrVertex(&tmp);
} break;
case 6: {
/* 0a, 1, 2a, 2 */
_glClipEdge(triangle[0].v, triangle[1].v, &tmp);
tmp.flags = GPU_CMD_VERTEX;
_glPerspectiveDivideVertex(&tmp, h);
_glSubmitHeaderOrVertex(&tmp);
_glPerspectiveDivideVertex(a, h);
_glPushHeaderOrVertex(a);
tmp = *triangle[1].v;
tmp.flags = GPU_CMD_VERTEX;
_glPerspectiveDivideVertex(&tmp, h);
_glSubmitHeaderOrVertex(&tmp);
_glPerspectiveDivideVertex(c, h);
_glPushHeaderOrVertex(c);
_glClipEdge(triangle[2].v, triangle[0].v, &tmp);
tmp.flags = GPU_CMD_VERTEX;
_glPerspectiveDivideVertex(&tmp, h);
_glSubmitHeaderOrVertex(&tmp);
_glPerspectiveDivideVertex(b, h);
_glPushHeaderOrVertex(b);
_glPushHeaderOrVertex(c);
tmp = *triangle[2].v;
tmp.flags = GPU_CMD_VERTEX_EOL;
_glPerspectiveDivideVertex(&tmp, h);
_glSubmitHeaderOrVertex(&tmp);
} break;
default:
break;
}
/* If this was the last in the strip, we don't need to
submit anything else, we just wipe the tri_count */
if(is_last_in_strip) {
tri_count = 0;
strip_count = 0;
} else {
ShiftRotateTriangle();
strip_count = 2;
}
} else {
/* Invisible? Move to the next in the strip */
if(is_last_in_strip) {
tri_count = 0;
strip_count = 0;
}
strip_count = 2;
ShiftRotateTriangle();
_glPerspectiveDivideVertex(d, h);
_glPushHeaderOrVertex(d);
}
break;
case 8:
default:
break;
}
}
_glFlushBuffer();
}
void SceneListFinish() {
@ -536,18 +463,41 @@ void SceneListFinish() {
Vertex* v0 = (Vertex*) (flags - step - step);
Vertex* v1 = (Vertex*) (flags - step);
Vertex* v2 = (Vertex*) (flags);
(vidx % 2 == 0) ? DrawTriangle(v0, v1, v2) : DrawTriangle(v1, v0, v2);
SDL_Vertex sv0 = {
{v0->xyz[0], v0->xyz[1]},
{v0->bgra[2], v0->bgra[1], v0->bgra[0], v0->bgra[3]},
{v0->uv[0], v0->uv[1]}
};
SDL_Vertex sv1 = {
{v1->xyz[0], v1->xyz[1]},
{v1->bgra[2], v1->bgra[1], v1->bgra[0], v1->bgra[3]},
{v1->uv[0], v1->uv[1]}
};
SDL_Vertex sv2 = {
{v2->xyz[0], v2->xyz[1]},
{v2->bgra[2], v2->bgra[1], v2->bgra[0], v2->bgra[3]},
{v2->uv[0], v2->uv[1]}
};
aligned_vector_push_back(&vbuffer, &sv0, 1);
aligned_vector_push_back(&vbuffer, &sv1, 1);
aligned_vector_push_back(&vbuffer, &sv2, 1);
}
if((*flags) == GPU_CMD_VERTEX_EOL) {
vidx = 0;
}
}
SDL_SetRenderDrawColor(RENDERER, 255, 255, 255, 255);
SDL_RenderGeometry(RENDERER, NULL, aligned_vector_front(&vbuffer), aligned_vector_size(&vbuffer), NULL, 0);
}
void SceneFinish() {
SDL_RenderPresent(RENDERER);
return;
/* Only sensible place to hook the quit signal */
SDL_Event e;
while (SDL_PollEvent(&e)) {

View File

@ -48,7 +48,8 @@ void TransformVec3NoMod(const float* v, float* ret);
/* Transform a 3-element normal using the stored matrix (w == 0)*/
static inline void TransformNormalNoMod(const float* xIn, float* xOut) {
(void) xIn;
(void) xOut;
}
void TransformVertices(Vertex* vertices, const int count);

View File

@ -164,7 +164,10 @@ typedef struct {
GLboolean isCompressed;
GLboolean isPaletted;
//50
} TextureObject;
GLenum internalFormat;
//54
GLubyte padding[10]; // Pad to 64-bytes
} __attribute__((aligned(32))) TextureObject;
typedef struct {
GLfloat emissive[4];
@ -233,11 +236,41 @@ GL_FORCE_INLINE float clamp(float d, float min, float max) {
return (d < min) ? min : (d > max) ? max : d;
}
GL_FORCE_INLINE void memcpy_vertex(Vertex *dest, const Vertex *src) {
#ifdef __DREAMCAST__
_Complex float double_scratch;
asm volatile (
"fschg\n\t"
"clrs\n\t"
".align 2\n\t"
"fmov.d @%[in]+, %[scratch]\n\t"
"fmov.d %[scratch], @%[out]\n\t"
"fmov.d @%[in]+, %[scratch]\n\t"
"add #8, %[out]\n\t"
"fmov.d %[scratch], @%[out]\n\t"
"fmov.d @%[in]+, %[scratch]\n\t"
"add #8, %[out]\n\t"
"fmov.d %[scratch], @%[out]\n\t"
"fmov.d @%[in], %[scratch]\n\t"
"add #8, %[out]\n\t"
"fmov.d %[scratch], @%[out]\n\t"
"fschg\n"
: [in] "+&r" ((uint32_t) src), [scratch] "=&d" (double_scratch), [out] "+&r" ((uint32_t) dest)
:
: "t", "memory" // clobbers
);
#else
*dest = *src;
#endif
}
#define swapVertex(a, b) \
do { \
Vertex c = *a; \
*a = *b; \
*b = c; \
Vertex __attribute__((aligned(32))) c; \
memcpy_vertex(&c, a); \
memcpy_vertex(a, b); \
memcpy_vertex(b, &c); \
} while(0)
/* ClipVertex doesn't have room for these, so we need to parse them
@ -345,6 +378,9 @@ extern GLubyte ACTIVE_TEXTURE;
extern GLboolean TEXTURES_ENABLED[];
GLubyte _glGetActiveTexture();
GLint _glGetTextureInternalFormat();
GLboolean _glGetTextureTwiddle();
void _glSetTextureTwiddle(GLboolean v);
GLuint _glGetActiveClientTexture();
TexturePalette* _glGetSharedPalette(GLshort bank);
@ -520,6 +556,7 @@ void _glSetLightModelColorControl(GLint v);
GLuint _glEnabledLightCount();
void _glRecalcEnabledLights();
GLfloat* _glLightModelSceneAmbient();
GLfloat* _glGetLightModelSceneAmbient();
LightSource* _glLightAt(GLuint i);
GLboolean _glNearZClippingEnabled();

View File

@ -180,6 +180,10 @@ void _glSetLightModelSceneAmbient(const GLfloat* v) {
vec4cpy(GPUState.scene_ambient, v);
}
GLfloat* _glGetLightModelSceneAmbient() {
return GPUState.scene_ambient;
}
void _glSetLightModelColorControl(GLint v) {
GPUState.color_control = v;
}
@ -251,7 +255,8 @@ void _glUpdatePVRTextureContext(PolyContext *context, GLshort textureUnit) {
context->txr2.enable = GPU_TEXTURE_DISABLE;
context->txr2.alpha = GPU_TXRALPHA_DISABLE;
if(!TEXTURES_ENABLED[textureUnit] || !tx1) {
if(!TEXTURES_ENABLED[textureUnit] || !tx1 || !tx1->data) {
context->txr.base = NULL;
return;
}
@ -399,8 +404,8 @@ GLAPI void APIENTRY glEnable(GLenum cap) {
}
break;
case GL_CULL_FACE: {
if(GPUState.cull_face != GL_TRUE) {
GPUState.cull_face = GL_TRUE;
if(GPUState.culling_enabled != GL_TRUE) {
GPUState.culling_enabled = GL_TRUE;
GPUState.is_dirty = GL_TRUE;
}
@ -489,7 +494,11 @@ GLAPI void APIENTRY glEnable(GLenum cap) {
GPUState.is_dirty = GL_TRUE;
}
break;
case GL_TEXTURE_TWIDDLE_KOS:
_glSetTextureTwiddle(GL_TRUE);
break;
default:
_glKosThrowError(GL_INVALID_VALUE, __func__);
break;
}
}
@ -503,8 +512,8 @@ GLAPI void APIENTRY glDisable(GLenum cap) {
}
break;
case GL_CULL_FACE: {
if(GPUState.cull_face != GL_FALSE) {
GPUState.cull_face = GL_FALSE;
if(GPUState.culling_enabled != GL_FALSE) {
GPUState.culling_enabled = GL_FALSE;
GPUState.is_dirty = GL_TRUE;
}
@ -591,7 +600,11 @@ GLAPI void APIENTRY glDisable(GLenum cap) {
GPUState.is_dirty = GL_TRUE;
}
break;
case GL_TEXTURE_TWIDDLE_KOS:
_glSetTextureTwiddle(GL_FALSE);
break;
default:
_glKosThrowError(GL_INVALID_VALUE, __func__);
break;
}
}
@ -972,6 +985,10 @@ void APIENTRY glGetIntegerv(GLenum pname, GLint *params) {
case GL_FREE_CONTIGUOUS_TEXTURE_MEMORY_KOS:
*params = _glFreeContiguousTextureMemory();
break;
case GL_TEXTURE_INTERNAL_FORMAT_KOS:
*params = _glGetTextureInternalFormat();
break;
default:
_glKosThrowError(GL_INVALID_ENUM, __func__);
break;

File diff suppressed because it is too large Load Diff

View File

@ -1,21 +0,0 @@
MIT License
Copyright (c) [year] [fullname]
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

View File

@ -1,158 +0,0 @@
# Summary
yalloc is a memory efficient allocator which is intended for embedded
applications that only have a low amount of RAM and want to maximize its
utilization. Properties of the allocator:
- pools can be up to 128k
- user data is 32bit aligned
- 4 bytes overhead per allocation
- supports defragmentation
- uses a free list for first fit allocation strategy (most recently freed
blocks are used first)
- extensively tested (see section below)
- MIT license
# Defragmentation
This feature was the initial motivation for this implementation. Especially
when dealing with highly memory constrained environments fragmenting memory
pools can be annoying. For this reason this implementation supports
defragmentation which moves all allocated blocks into a contiguous range at the
beginning of the pool, leaving a maximized free range at the end.
As there is no garbage collector or other runtime system involved that updates
the references, the application must do so. This is done in three steps:
1. yalloc_defrag_start() is called. This calculates the new
post-defragmentation-addresses for all allocations, but otherwise leaves
the allocations untouched.
2. yalloc_defrag_address() is called by the application for every pointer that
points to an allocation. It returns the post-defragmentation-address for
the allocation. The application must update all its relevant pointers this
way. Care must be taken not not yet dereference that moved pointers. If the
application works with hierarchical data then this can easily be done by
updating the pointers button up (first the leafs then their parents).
3. yalloc_defrag_commit() is called to finally perform the defragmentation.
All allocated blocks are moved to their post-defragmentation-address and
the application can continue using the pool the normal way.
It is up to the application when (and if) it performs defragmentation. One
strategy would be to delay it until an allocation failure. Another approach
would be to perform the defragmentation regularly when there is nothing else to
do.
# Configurable Defines
INTERNAL_VALIDATE
If this is not defined on the compiler commandline it will be defined as 0 if
NDEBUG is defined and otherwise as 1. If you want to disable internal
validation when NDEBUG is not defined then define INERNAL_VALIDATE as 0 on the
compiler commandline.
If it is nonzero the heap will be validated via a bunch of assert() calls at
the end of every function that modifies the heap. This has roughly O(N*M)
overhead where N is the number of allocated blocks and M the number of free
blocks in a heap. For applications with enough live allocations this will get
significant.
YALLOC_VALGRIND
If this is defined in yalloc.c and NVALGRIND is not defined then
valgrind/memcheck.h is included and the the allocator functions tell valgrind
about the pool, the allocations and makes the block headers inaccessible outside
of yalloc-functions. This allows valgrind to detect a lot of the accidents that
can happen when dealing dynamic memory. This also adds some overhead for every
yalloc-call because most of them will "unprotect" the internal structure on
entry and "protect" it again (marking it as inaccessible for valgrind) before
returning.
# Tests
The tests rely on internal validation of the pool (see INTERNAL_VALIDATE) to
check that no assumptions about the internal structure of the pool are
violated. They additionally check for correctness of observations that can be
made by using the public functions of the allocator (like checking if user data
stays unmodified). There are a few different scripts that run tests:
- run_coverage.sh runs a bunch of testfunctions that are carefully crafted to
cover all code paths. Coverage data is generated by clang and a summary is
shown at the end of the test.
- run_valgrind.sh tests if the valgrind integration is working as expected,
runs the functions from the coverage test and some randomly generated
testcases under valgrind.
- run_libfuzzer.sh uses libfuzzer from clang to generate interesting testcases
and runs them in multiple jobs in parallel for 10 seconds. It also generates
coverage data at the end (it always got 100% coverage in my testruns).
All tests exit with 0 and print "All fine!" at the end if there where no
errors. Coverage deficits are not counted as error, so you have to look at the
summary (they should show 100% coverage!).
# Implementation Details
The Headers and the user data are 32bit aligned. Headers have two 16bit fields
where the high 15 bits represent offsets (relative to the pools address) to the
previous/next block. The macros HDR_PTR() and HDR_OFFSET() are used to
translate an offset to an address and back. The 32bit alignment is exploited to
allow pools of up to 128k with that 15 significant bits.
A pool is always occupied by non-overlapping blocks that link to their
previous/next block in address order via the prev/next field of Header.
Free blocks are always joined: No two free blocks will ever be neighbors.
Free blocks have an additional header of the same structure. This additional
header is used to build a list of free blocks (independent of their address
order).
yalloc_free() will insert the freed block to the front of the free list.
yalloc_alloc() searches that list front to back and takes the first block that
is big enough to satisfy the allocation.
There is always a Header at the front and at the end of the pool. The Header at
the end is degenerate: It is marked as "used" but has no next block (which is
usually used to determine the size of a block).
The prev-field of the very first block in the pool has special meaning: It
points to the first free block in the pool. Or, if the pool is currently
defragmenting (after yalloc_defrag_start() and before yalloc_defrag_commit()),
points to the last header of the pool. This state can be recognized by checking
if it points to an empty block (normal pool state) or a used block
(defragmentation in progress). This logic can be seen in
yalloc_defrag_in_progress().
The lowest bit of next/prev have special meaning:
- low bit of prev is set for free blocks
- low bit of next is set for blocks with 32bit padding after the user data.
This is needed when a block is allocated from a free block that leaves only
4 free bytes after the user data... which is not enough to insert a
free-header (which is needs 8 bytes). The padding will be reclaimed when
that block is freed or when the pool is defragmented. The predicate
isPadded() can be used to test if a block is padded. Free blocks are never
padded.
The predicate isNil() can be used to test if an offset points nowhere (it tests
if all 15 high bits of an offset are 1). The constant NIL has all but the
lowest bit set. It is used to set offsets to point to nowhere, and in some
places it is used to mask out the actual address bits of an offset. This should
be kept in mind when modifying the code and updating prev/next: Think carefully
if you have to preserve the low bit when updating an offset!
Defragmentation is done in two phases: First the user calls
yalloc_defrag_start(). This will put the pool in a special state where no
alloc/free-calls are allowed. In this state the prev-fields of the used blocks
have a special meaning: They store the offset that the block will have after
defragmentation finished. This information is used by yalloc_defrag_address()
which can be called by the application to query the new addresses for its
allocations. After the application has updated all its pointers it must call
yalloc_defrag_commit() which moves all used blocks in contiguous space at the
beginning of the pool, leaving one maximized free block at the end.

View File

@ -1,803 +0,0 @@
#include "yalloc.h"
#include "yalloc_internals.h"
#include <assert.h>
#include <string.h>
#define ALIGN(num, align) (((num) + ((align) - 1)) & ~((align) - 1))
#if defined(YALLOC_VALGRIND) && !defined(NVALGRIND)
# define USE_VALGRIND 1
#else
# define USE_VALGRIND 0
#endif
#if USE_VALGRIND
# include <valgrind/memcheck.h>
#else
# define VALGRIND_MAKE_MEM_UNDEFINED(p, s) ((void)0)
# define VALGRIND_MAKE_MEM_DEFINED(p, s) ((void)0)
# define VALGRIND_MAKE_MEM_NOACCESS(p, s) ((void)0)
# define VALGRIND_CREATE_MEMPOOL(pool, rz, z) ((void)0)
# define VALGRIND_MEMPOOL_ALLOC(pool, p, s) ((void)0)
# define VALGRIND_MEMPOOL_FREE(pool, p) ((void)0)
# define VALGRIND_MEMPOOL_CHANGE(pool, a, b, s) ((void)0)
#endif
#define MARK_NEW_FREE_HDR(p) VALGRIND_MAKE_MEM_UNDEFINED(p, sizeof(Header) * 2)
#define MARK_NEW_HDR(p) VALGRIND_MAKE_MEM_UNDEFINED(p, sizeof(Header))
#define PROTECT_HDR(p) VALGRIND_MAKE_MEM_NOACCESS(p, sizeof(Header))
#define PROTECT_FREE_HDR(p) VALGRIND_MAKE_MEM_NOACCESS(p, sizeof(Header) * 2)
#define UNPROTECT_HDR(p) VALGRIND_MAKE_MEM_DEFINED(p, sizeof(Header))
#define UNPROTECT_FREE_HDR(p) VALGRIND_MAKE_MEM_DEFINED(p, sizeof(Header) * 2)
#if USE_VALGRIND
static void _unprotect_pool(void * pool)
{
Header * cur = (Header*)pool;
for (;;)
{
UNPROTECT_HDR(cur);
if (isFree(cur))
UNPROTECT_HDR(cur + 1);
if (isNil(cur->next))
break;
cur = HDR_PTR(cur->next);
}
}
static void _protect_pool(void * pool)
{
Header * cur = (Header*)pool;
while (cur)
{
Header * next = isNil(cur->next) ? NULL : HDR_PTR(cur->next);
if (isFree(cur))
VALGRIND_MAKE_MEM_NOACCESS(cur, (char*)next - (char*)cur);
else
PROTECT_HDR(cur);
cur = next;
}
}
#define assert_is_pool(pool) assert(VALGRIND_MEMPOOL_EXISTS(pool));
#else
static void _unprotect_pool(void * pool){(void)pool;}
static void _protect_pool(void * pool){(void)pool;}
#define assert_is_pool(pool) ((void)0)
#endif
// internal version that does not unprotect/protect the pool
static int _yalloc_defrag_in_progress(void * pool)
{
// fragmentation is indicated by a free list with one entry: the last block of the pool, which has its "free"-bit cleared.
Header * p = (Header*)pool;
if (isNil(p->prev))
return 0;
return !(HDR_PTR(p->prev)->prev & 1);
}
int yalloc_defrag_in_progress(void * pool)
{
_unprotect_pool(pool);
int ret = _yalloc_defrag_in_progress(pool);
_protect_pool(pool);
return ret;
}
#if YALLOC_INTERNAL_VALIDATE
static size_t _count_free_list_occurences(Header * pool, Header * blk)
{
int n = 0;
if (!isNil(pool->prev))
{
Header * cur = HDR_PTR(pool->prev);
for (;;)
{
if (cur == blk)
++n;
if (isNil(cur[1].next))
break;
cur = HDR_PTR(cur[1].next);
}
}
return n;
}
static size_t _count_addr_list_occurences(Header * pool, Header * blk)
{
size_t n = 0;
Header * cur = pool;
for (;;)
{
if (cur == blk)
++n;
if (isNil(cur->next))
break;
cur = HDR_PTR(cur->next);
}
return n;
}
static void _validate_user_ptr(void * pool, void * p)
{
Header * hdr = (Header*)p - 1;
size_t n = _count_addr_list_occurences((Header*)pool, hdr);
assert(n == 1 && !isFree(hdr));
}
/**
Validates if all the invariants of a pool are intact.
This is very expensive when there are enough blocks in the heap (quadratic complexity!).
*/
static void _yalloc_validate(void * pool_)
{
Header * pool = (Header*)pool_;
Header * cur = pool;
assert(!isNil(pool->next)); // there must always be at least two blocks: a free/used one and the final block at the end
if (_yalloc_defrag_in_progress(pool))
{
Header * prevUsed = NULL;
while (!isNil(cur->next))
{
if (!isFree(cur))
{ // it is a used block
Header * newAddr = cur == pool ? pool : HDR_PTR(cur->prev);
assert(newAddr <= cur);
assert(newAddr >= pool);
if (prevUsed)
{
Header * prevNewAddr = prevUsed == pool ? pool : HDR_PTR(prevUsed->prev);
size_t prevBruttoSize = (char*)HDR_PTR(prevUsed->next) - (char*)prevUsed;
if (isPadded(prevUsed))
prevBruttoSize -= 4; // remove padding
assert((char*)newAddr == (char*)prevNewAddr + prevBruttoSize);
}
else
{
assert(newAddr == pool);
}
prevUsed = cur;
}
cur = HDR_PTR(cur->next);
}
assert(cur == HDR_PTR(pool->prev)); // the free-list should point to the last block
assert(!isFree(cur)); // the last block must not be free
}
else
{
Header * prev = NULL;
// iterate blocks in address order
for (;;)
{
if (prev)
{
Header * x = HDR_PTR(cur->prev);
assert(x == prev);
}
int n = _count_free_list_occurences(pool, cur);
if (isFree(cur))
{ // it is a free block
assert(n == 1);
assert(!isPadded(cur)); // free blocks must have a zero padding-bit
if (prev)
{
assert(!isFree(prev)); // free blocks must not be direct neighbours
}
}
else
{
assert(n == 0);
}
if (isNil(cur->next))
break;
Header * next = HDR_PTR(cur->next);
assert((char*)next >= (char*)cur + sizeof(Header) * 2);
prev = cur;
cur = next;
}
assert(isNil(cur->next));
if (!isNil(pool->prev))
{
// iterate free-list
Header * f = HDR_PTR(pool->prev);
assert(isNil(f[1].prev));
for (;;)
{
assert(isFree(f)); // must be free
int n = _count_addr_list_occurences(pool, f);
assert(n == 1);
if (isNil(f[1].next))
break;
f = HDR_PTR(f[1].next);
}
}
}
}
#else
static void _yalloc_validate(void * pool){(void)pool;}
static void _validate_user_ptr(void * pool, void * p){(void)pool; (void)p;}
#endif
int yalloc_init(void * pool, size_t size)
{
if (size > MAX_POOL_SIZE)
return -1;
// TODO: Error when pool is not properly aligned
// TODO: Error when size is not a multiple of the alignment?
while (size % sizeof(Header))
--size;
if(size < sizeof(Header) * 3)
return -1;
VALGRIND_CREATE_MEMPOOL(pool, 0, 0);
Header * first = (Header*)pool;
Header * last = (Header*)((char*)pool + size) - 1;
MARK_NEW_FREE_HDR(first);
MARK_NEW_HDR(first);
first->prev = HDR_OFFSET(first) | 1;
first->next = HDR_OFFSET(last);
first[1].prev = NIL;
first[1].next = NIL;
last->prev = HDR_OFFSET(first);
last->next = NIL;
_unprotect_pool(pool);
_yalloc_validate(pool);
_protect_pool(pool);
return 0;
}
void yalloc_deinit(void * pool)
{
#if USE_VALGRIND
VALGRIND_DESTROY_MEMPOOL(pool);
Header * last = (Header*)pool;
UNPROTECT_HDR(last);
while (!isNil(last->next))
{
Header * next = HDR_PTR(last->next);
UNPROTECT_HDR(next);
last = next;
}
VALGRIND_MAKE_MEM_UNDEFINED(pool, (char*)(last + 1) - (char*)pool);
#else
(void)pool;
#endif
}
void * yalloc_alloc(void * pool, size_t size)
{
assert_is_pool(pool);
_unprotect_pool(pool);
assert(!_yalloc_defrag_in_progress(pool));
_yalloc_validate(pool);
if (!size)
{
_protect_pool(pool);
return NULL;
}
Header * root = (Header*)pool;
if (isNil(root->prev))
{
_protect_pool(pool);
return NULL; /* no free block, no chance to allocate anything */ // TODO: Just read up which C standard supports single line comments and then fucking use them!
}
/* round up to alignment */
size = ALIGN(size, 32);
size_t bruttoSize = size + sizeof(Header);
Header * prev = NULL;
Header * cur = HDR_PTR(root->prev);
for (;;)
{
size_t curSize = (char*)HDR_PTR(cur->next) - (char*)cur; /* size of the block, including its header */
if (curSize >= bruttoSize) // it is big enough
{
// take action for unused space in the free block
if (curSize >= bruttoSize + sizeof(Header) * 2)
{ // the leftover space is big enough to make it a free block
// Build a free block from the unused space and insert it into the list of free blocks after the current free block
Header * tail = (Header*)((char*)cur + bruttoSize);
MARK_NEW_FREE_HDR(tail);
// update address-order-list
tail->next = cur->next;
tail->prev = HDR_OFFSET(cur) | 1;
HDR_PTR(cur->next)->prev = HDR_OFFSET(tail); // NOTE: We know the next block is used because free blocks are never neighbours. So we don't have to care about the lower bit which would be set for the prev of a free block.
cur->next = HDR_OFFSET(tail);
// update list of free blocks
tail[1].next = cur[1].next;
// NOTE: tail[1].prev is updated in the common path below (assignment to "HDR_PTR(cur[1].next)[1].prev")
if (!isNil(cur[1].next))
HDR_PTR(cur[1].next)[1].prev = HDR_OFFSET(tail);
cur[1].next = HDR_OFFSET(tail);
}
else if (curSize > bruttoSize)
{ // there will be unused space, but not enough to insert a free header
internal_assert(curSize - bruttoSize == sizeof(Header)); // unused space must be enough to build a free-block or it should be exactly the size of a Header
cur->next |= 1; // set marker for "has unused trailing space"
}
else
{
internal_assert(curSize == bruttoSize);
}
cur->prev &= NIL; // clear marker for "is a free block"
// remove from linked list of free blocks
if (prev)
prev[1].next = cur[1].next;
else
{
uint32_t freeBit = isFree(root);
root->prev = (cur[1].next & NIL) | freeBit;
}
if (!isNil(cur[1].next))
HDR_PTR(cur[1].next)[1].prev = prev ? HDR_OFFSET(prev) : NIL;
_yalloc_validate(pool);
VALGRIND_MEMPOOL_ALLOC(pool, cur + 1, size);
_protect_pool(pool);
return cur + 1; // return address after the header
}
if (isNil(cur[1].next))
break;
prev = cur;
cur = HDR_PTR(cur[1].next);
}
_yalloc_validate(pool);
_protect_pool(pool);
return NULL;
}
// Removes a block from the free-list and moves the pools first-free-bock pointer to its successor if it pointed to that block.
static void unlink_from_free_list(Header * pool, Header * blk)
{
// update the pools pointer to the first block in the free list if necessary
if (isNil(blk[1].prev))
{ // the block is the first in the free-list
// make the pools first-free-pointer point to the next in the free list
uint32_t freeBit = isFree(pool);
pool->prev = (blk[1].next & NIL) | freeBit;
}
else
HDR_PTR(blk[1].prev)[1].next = blk[1].next;
if (!isNil(blk[1].next))
HDR_PTR(blk[1].next)[1].prev = blk[1].prev;
}
size_t yalloc_block_size(void * pool, void * p)
{
Header * a = (Header*)p - 1;
UNPROTECT_HDR(a);
Header * b = HDR_PTR(a->next);
size_t payloadSize = (char*)b - (char*)p;
if (isPadded(a))
payloadSize -= sizeof(Header);
PROTECT_HDR(a);
return payloadSize;
}
void yalloc_free(void * pool_, void * p)
{
assert_is_pool(pool_);
assert(!yalloc_defrag_in_progress(pool_));
if (!p)
return;
_unprotect_pool(pool_);
Header * pool = (Header*)pool_;
Header * cur = (Header*)p - 1;
// get pointers to previous/next block in address order
Header * prev = cur == pool || isNil(cur->prev) ? NULL : HDR_PTR(cur->prev);
Header * next = isNil(cur->next) ? NULL : HDR_PTR(cur->next);
int prevFree = prev && isFree(prev);
int nextFree = next && isFree(next);
#if USE_VALGRIND
{
unsigned errs = VALGRIND_COUNT_ERRORS;
VALGRIND_MEMPOOL_FREE(pool, p);
if (VALGRIND_COUNT_ERRORS > errs)
{ // early exit if the free was invalid (so we get a valgrind error and don't mess up the pool, which is helpful for testing if invalid frees are detected by valgrind)
_protect_pool(pool_);
return;
}
}
#endif
_validate_user_ptr(pool_, p);
if (prevFree && nextFree)
{ // the freed block has two free neighbors
unlink_from_free_list(pool, prev);
unlink_from_free_list(pool, next);
// join prev, cur and next
prev->next = next->next;
HDR_PTR(next->next)->prev = cur->prev;
// prev is now the block we want to push onto the free-list
cur = prev;
}
else if (prevFree)
{
unlink_from_free_list(pool, prev);
// join prev and cur
prev->next = cur->next;
HDR_PTR(cur->next)->prev = cur->prev;
// prev is now the block we want to push onto the free-list
cur = prev;
}
else if (nextFree)
{
unlink_from_free_list(pool, next);
// join cur and next
cur->next = next->next;
HDR_PTR(next->next)->prev = next->prev & NIL;
}
// if there is a previous block and that block has padding then we want to grow the new free block into that padding
if (cur != pool && !isNil(cur->prev))
{ // there is a previous block
Header * left = HDR_PTR(cur->prev);
if (isPadded(left))
{ // the previous block has padding, so extend the current block to consume move the padding to the current free block
Header * grown = cur - 1;
MARK_NEW_HDR(grown);
grown->next = cur->next;
grown->prev = cur->prev;
left->next = HDR_OFFSET(grown);
if (!isNil(cur->next))
HDR_PTR(cur->next)->prev = HDR_OFFSET(grown);
cur = grown;
}
}
cur->prev |= 1; // it becomes a free block
cur->next &= NIL; // reset padding-bit
UNPROTECT_HDR(cur + 1);
cur[1].prev = NIL; // it will be the first free block in the free list, so it has no prevFree
if (!isNil(pool->prev))
{ // the free-list was already non-empty
HDR_PTR(pool->prev)[1].prev = HDR_OFFSET(cur); // make the first entry in the free list point back to the new free block (it will become the first one)
cur[1].next = pool->prev; // the next free block is the first of the old free-list
}
else
cur[1].next = NIL; // free-list was empty, so there is no successor
VALGRIND_MAKE_MEM_NOACCESS(cur + 2, (char*)HDR_PTR(cur->next) - (char*)(cur + 2));
// now the freed block is the first in the free-list
// update the offset to the first element of the free list
uint32_t freeBit = isFree(pool); // remember the free-bit of the offset
pool->prev = HDR_OFFSET(cur) | freeBit; // update the offset and restore the free-bit
_yalloc_validate(pool);
_protect_pool(pool);
}
size_t yalloc_count_free(void * pool_)
{
assert_is_pool(pool_);
_unprotect_pool(pool_);
assert(!_yalloc_defrag_in_progress(pool_));
Header * pool = (Header*)pool_;
size_t bruttoFree = 0;
Header * cur = pool;
_yalloc_validate(pool);
for (;;)
{
if (isFree(cur))
{ // it is a free block
bruttoFree += (char*)HDR_PTR(cur->next) - (char*)cur;
}
else
{ // it is a used block
if (isPadded(cur))
{ // the used block is padded
bruttoFree += sizeof(Header);
}
}
if (isNil(cur->next))
break;
cur = HDR_PTR(cur->next);
}
_protect_pool(pool);
if (bruttoFree < sizeof(Header))
{
internal_assert(!bruttoFree); // free space should always be a multiple of sizeof(Header)
return 0;
}
return bruttoFree - sizeof(Header);
}
size_t yalloc_count_continuous(void * pool_)
{
assert_is_pool(pool_);
_unprotect_pool(pool_);
assert(!_yalloc_defrag_in_progress(pool_));
Header * pool = (Header*)pool_;
size_t largestFree = 0;
Header * cur = pool;
_yalloc_validate(pool);
for (;;)
{
if (isFree(cur))
{ // it is a free block
size_t temp = (uintptr_t)HDR_PTR(cur->next) - (uintptr_t)cur;
if(temp > largestFree)
largestFree = temp;
}
if (isNil(cur->next))
break;
cur = HDR_PTR(cur->next);
}
_protect_pool(pool);
if (largestFree < sizeof(Header))
{
internal_assert(!largestFree); // free space should always be a multiple of sizeof(Header)
return 0;
}
return largestFree - sizeof(Header);
}
void * yalloc_first_used(void * pool)
{
assert_is_pool(pool);
_unprotect_pool(pool);
Header * blk = (Header*)pool;
while (!isNil(blk->next))
{
if (!isFree(blk))
{
_protect_pool(pool);
return blk + 1;
}
blk = HDR_PTR(blk->next);
}
_protect_pool(pool);
return NULL;
}
void * yalloc_next_used(void * pool, void * p)
{
assert_is_pool(pool);
_unprotect_pool(pool);
_validate_user_ptr(pool, p);
Header * prev = (Header*)p - 1;
assert(!isNil(prev->next)); // the last block should never end up as input to this function (because it is not user-visible)
Header * blk = HDR_PTR(prev->next);
while (!isNil(blk->next))
{
if (!isFree(blk))
{
_protect_pool(pool);
return blk + 1;
}
blk = HDR_PTR(blk->next);
}
_protect_pool(pool);
return NULL;
}
void yalloc_defrag_start(void * pool_)
{
assert_is_pool(pool_);
_unprotect_pool(pool_);
assert(!_yalloc_defrag_in_progress(pool_));
Header * pool = (Header*)pool_;
// iterate over all blocks in address order and store the post-defragment address of used blocks in their "prev" field
size_t end = 0; // offset for the next used block
Header * blk = (Header*)pool;
for (; !isNil(blk->next); blk = HDR_PTR(blk->next))
{
if (!isFree(blk))
{ // it is a used block
blk->prev = end >> 1;
internal_assert((char*)HDR_PTR(blk->prev) == (char*)pool + end);
size_t bruttoSize = (char*)HDR_PTR(blk->next) - (char*)blk;
if (isPadded(blk))
{ // the block is padded
bruttoSize -= sizeof(Header);
}
end += bruttoSize;
internal_assert(end % sizeof(Header) == 0);
}
}
// blk is now the last block (the dummy "used" block at the end of the pool)
internal_assert(isNil(blk->next));
internal_assert(!isFree(blk));
// mark the pool as "defragementation in progress"
uint32_t freeBit = isFree(pool);
pool->prev = (HDR_OFFSET(blk) & NIL) | freeBit;
_yalloc_validate(pool);
internal_assert(yalloc_defrag_in_progress(pool));
_protect_pool(pool);
}
void * yalloc_defrag_address(void * pool_, void * p)
{
assert_is_pool(pool_);
assert(yalloc_defrag_in_progress(pool_));
if (!p)
return NULL;
Header * pool = (Header*)pool_;
_unprotect_pool(pool);
_validate_user_ptr(pool_, p);
if (pool + 1 == p)
return pool + 1; // "prev" of the first block points to the last used block to mark the pool as "defragmentation in progress"
Header * blk = (Header*)p - 1;
void * defragP = HDR_PTR(blk->prev) + 1;
_protect_pool(pool);
return defragP;
}
void yalloc_defrag_commit(void * pool_)
{
assert_is_pool(pool_);
_unprotect_pool(pool_);
assert(_yalloc_defrag_in_progress(pool_));
Header * pool = (Header*)pool_;
// iterate over all blocks in address order and move them
size_t end = 0; // offset for the next used block
Header * blk = pool;
Header * lastUsed = NULL;
while (!isNil(blk->next))
{
if (!isFree(blk))
{ // it is a used block
size_t bruttoSize = (char*)HDR_PTR(blk->next) - (char*)blk;
if (isPadded(blk))
{ // the block is padded
bruttoSize -= sizeof(Header);
}
Header * next = HDR_PTR(blk->next);
blk->prev = lastUsed ? HDR_OFFSET(lastUsed) : NIL;
blk->next = (end + bruttoSize) >> 1;
lastUsed = (Header*)((char*)pool + end);
VALGRIND_MAKE_MEM_UNDEFINED(lastUsed, (char*)blk - (char*)lastUsed);
memmove(lastUsed, blk, bruttoSize);
VALGRIND_MEMPOOL_CHANGE(pool, blk + 1, lastUsed + 1, bruttoSize - sizeof(Header));
end += bruttoSize;
blk = next;
}
else
blk = HDR_PTR(blk->next);
}
// blk is now the last block (the dummy "used" block at the end of the pool)
internal_assert(isNil(blk->next));
internal_assert(!isFree(blk));
if (lastUsed)
{
Header * gap = HDR_PTR(lastUsed->next);
if (gap == blk)
{ // there is no gap
pool->prev = NIL; // the free list is empty
blk->prev = HDR_OFFSET(lastUsed);
}
else if (blk - gap > 1)
{ // the gap is big enouogh for a free Header
// set a free list that contains the gap as only element
gap->prev = HDR_OFFSET(lastUsed) | 1;
gap->next = HDR_OFFSET(blk);
gap[1].prev = NIL;
gap[1].next = NIL;
pool->prev = blk->prev = HDR_OFFSET(gap);
}
else
{ // there is a gap, but it is too small to be used as free-list-node, so just make it padding of the last used block
lastUsed->next = HDR_OFFSET(blk) | 1;
pool->prev = NIL;
blk->prev = HDR_OFFSET(lastUsed);
}
}
else
{ // the pool is empty
pool->prev = 1;
}
internal_assert(!_yalloc_defrag_in_progress(pool));
_yalloc_validate(pool);
_protect_pool(pool);
}

View File

@ -1,176 +0,0 @@
/**
@file
API of the yalloc allocator.
*/
#ifndef YALLOC_H
#define YALLOC_H
#include <stddef.h>
/**
Maximum supported pool size. yalloc_init() will fail for larger pools.
*/
#define MAX_POOL_SIZE ((2 << 24) - 4)
/**
Creates a pool inside a given buffer.
Pools must be deinitialized with yalloc_deinit() when they are no longer needed.
@param pool The starting address of the pool. It must have at least 16bit
alignment (internal structure uses 16bit integers). Allocations are placed at
32bit boundaries starting from this address, so if the user data should be
32bit aligned then this address has to be 32bit aligned. Typically an address
of static memory, or an array on the stack is used if the pool is only used
temporarily.
@param size Size of the pool.
@return 0 on success, nonzero if the size is not supported.
*/
int yalloc_init(void * pool, size_t size);
/**
Deinitializes the buffer that is used by the pool and makes it available for other use.
The content of the buffer is undefined after this.
@param pool The starting address of an initialized pool.
*/
void yalloc_deinit(void * pool);
/**
Allocates a block of memory from a pool.
This function mimics malloc().
The pool must not be in the "defragmenting" state when this function is called.
@param pool The starting address of an initialized pool.
@param size Number of bytes to allocate.
@return Allocated buffer or \c NULL if there was no free range that could serve
the allocation. See @ref yalloc_defrag_start() for a way to remove
fragmentation which may cause allocations to fail even when there is enough
space in total.
*/
void * yalloc_alloc(void * pool, size_t size);
/**
Returns an allocation to a pool.
This function mimics free().
The pool must not be in the "defragmenting" state when this function is called.
@param pool The starting address of the initialized pool the allocation comes from.
@param p An address that was returned from yalloc_alloc() of the same pool.
*/
void yalloc_free(void * pool, void * p);
/**
Returns the maximum size of a successful allocation (assuming a completely unfragmented heap).
After defragmentation the first allocation with the returned size is guaranteed to succeed.
@param pool The starting address of an initialized pool.
@return Number of bytes that can be allocated (assuming the pool is defragmented).
*/
size_t yalloc_count_free(void * pool);
/**
Returns the maximum continuous free area.
@param pool The starting address of an initialized pool.
@return Number of free bytes that exist continuously.
*/
size_t yalloc_count_continuous(void * pool_);
/**
Queries the usable size of an allocated block.
@param pool The starting address of the initialized pool the allocation comes from.
@param p An address that was returned from yalloc_alloc() of the same pool.
@return Size of the memory block. This is the size passed to @ref yalloc_alloc() rounded up to 4.
*/
size_t yalloc_block_size(void * pool, void * p);
/**
Finds the first (in address order) allocation of a pool.
@param pool The starting address of an initialized pool.
@return Address of the allocation the lowest address inside the pool (this is
what @ref yalloc_alloc() returned), or \c NULL if there is no used block.
*/
void * yalloc_first_used(void * pool);
/**
Given a pointer to an allocation finds the next (in address order) used block of a pool.
@param pool The starting address of the initialized pool the allocation comes from.
@param p Pointer to an allocation in that pool, typically comes from a previous
call to @ref yalloc_first_used()
*/
void * yalloc_next_used(void * pool, void * p);
/**
Starts defragmentation for a pool.
Allocations will stay where they are. But the pool is put in the "defagmenting"
state (see @ref yalloc_defrag_in_progress()).
The pool must not be in the "defragmenting" state when this function is called.
The pool is put into the "defragmenting" state by this function.
@param pool The starting address of an initialized pool.
*/
void yalloc_defrag_start(void * pool);
/**
Returns the address that an allocation will have after @ref yalloc_defrag_commit() is called.
The pool must be in the "defragmenting" state when this function is called.
@param pool The starting address of the initialized pool the allocation comes from.
@param p Pointer to an allocation in that pool.
@return The address the alloation will have after @ref yalloc_defrag_commit() is called.
*/
void * yalloc_defrag_address(void * pool, void * p);
/**
Finishes the defragmentation.
The content of all allocations in the pool will be moved to the address that
was reported by @ref yalloc_defrag_address(). The pool will then have only one
free block. This means that an <tt>yalloc_alloc(pool, yalloc_count_free(pool))</tt>
will succeed.
The pool must be in the "defragmenting" state when this function is called. The
pool is put back to normal state by this function.
@param pool The starting address of an initialized pool.
*/
void yalloc_defrag_commit(void * pool);
/**
Tells if the pool is in the "defragmenting" state (after a @ref yalloc_defrag_start() and before a @ref yalloc_defrag_commit()).
@param pool The starting address of an initialized pool.
@return Nonzero if the pool is currently in the "defragmenting" state.
*/
int yalloc_defrag_in_progress(void * pool);
/**
Helper function that dumps the state of the pool to stdout.
This function is only available if build with <tt>yalloc_dump.c</tt>. This
function only exists for debugging purposes and can be ignored by normal users
that are not interested in the internal structure of the implementation.
@param pool The starting address of an initialized pool.
@param name A string that is used as "Title" for the output.
*/
void yalloc_dump(void * pool, char * name);
#endif // YALLOC_H

View File

@ -1,39 +0,0 @@
#include "yalloc_internals.h"
#include <stdio.h>
static void printOffset(void * pool, char * name, uint16_t offset)
{
if (isNil(offset))
printf(" %s: nil\n", name);
else
printf(" %s: %td\n", name, (char*)HDR_PTR(offset) - (char*)pool);
}
void yalloc_dump(void * pool, char * name)
{
printf("---- %s ----\n", name);
Header * cur = (Header*)pool;
for (;;)
{
printf(isFree(cur) ? "%td: free @%p\n" : "%td: used @%p\n", (char*)cur - (char*)pool, cur);
printOffset(pool, cur == pool ? "first free" : "prev", cur->prev);
printOffset(pool, "next", cur->next);
if (isFree(cur))
{
printOffset(pool, "prevFree", cur[1].prev);
printOffset(pool, "nextFree", cur[1].next);
}
else
printf(" payload includes padding: %i\n", isPadded(cur));
if (isNil(cur->next))
break;
printf(" %td bytes payload\n", (char*)HDR_PTR(cur->next) - (char*)cur - sizeof(Header));
cur = HDR_PTR(cur->next);
}
fflush(stdout);
}

View File

@ -1,63 +0,0 @@
#ifndef YALLOC_INTERNALS_H
#define YALLOC_INTERNALS_H
#include <stdint.h>
typedef struct
{
uint32_t prev; // low bit set if free
uint32_t next; // for used blocks: low bit set if unused header at the end
/* We need user data to be 32-byte aligned, so the header needs
* to be 32 bytes in size (as user data follows the header) */
uint8_t padding[32 - (sizeof(uint32_t) * 2)];
} Header;
// NOTE: We have 32bit aligned data and 16bit offsets where the lowest bit is used as flag. So we remove the low bit and shift by 1 to address 128k bytes with the 15bit significant offset bits.
#define NIL 0xFFFFFFFEu
// return Header-address for a prev/next
#define HDR_PTR(offset) ((Header*)((char*)pool + (((offset) & NIL)<<1)))
// return a prev/next for a Header-address
#define HDR_OFFSET(blockPtr) ((uint32_t)(((char*)blockPtr - (char*)pool) >> 1))
#ifndef YALLOC_INTERNAL_VALIDATE
# ifdef NDEBUG
# define YALLOC_INTERNAL_VALIDATE 0
# else
# define YALLOC_INTERNAL_VALIDATE 1
#endif
#endif
/*
internal_assert() is used in some places to check internal expections.
Activate this if you modify the code to detect problems as early as possible.
In other cases this should be deactivated.
*/
#if 0
#define internal_assert assert
#else
#define internal_assert(condition)((void) 0)
#endif
// detects offsets that point nowhere
static inline int isNil(uint32_t offset)
{
return (offset | 1) == 0xFFFFFFFF;
}
static inline int isFree(Header * hdr)
{
return hdr->prev & 1;
}
static inline int isPadded(Header * hdr)
{
return hdr->next & 1;
}
#endif // YALLOC_INTERNALS_H

View File

@ -32,7 +32,7 @@ GLdc uses CMake for its build system, it currently ships with two "backends":
- kospvr - This is the hardware-accelerated Dreamcast backend
- software - This is a stub software rasterizer used for testing testing and debugging
To compile for Dreamcast, you'll want to do something like the following:
To compile a Dreamcast debug build, you'll want to do something like the following:
```
mkdir dcbuild
@ -41,6 +41,11 @@ cmake -DCMAKE_TOOLCHAIN_FILE=../toolchains/Dreamcast.cmake -G "Unix Makefiles" .
make
```
For a release build, replace the cmake line with with the following:
```
cmake -DCMAKE_TOOLCHAIN_FILE=../toolchains/Dreamcast.cmake -G "Unix Makefiles" -DCMAKE_BUILD_TYPE=Release ..
```
You will need KallistiOS compiled and configured (e.g. the KOS_BASE environment
variable must be set)

View File

@ -12,36 +12,45 @@
#include "aligned_vector.h"
extern inline void* aligned_vector_resize(AlignedVector* vector, const unsigned int element_count);
extern inline void* aligned_vector_extend(AlignedVector* vector, const unsigned int additional_count);
extern inline void* aligned_vector_reserve(AlignedVector* vector, unsigned int element_count);
extern inline void* aligned_vector_push_back(AlignedVector* vector, const void* objs, unsigned int count);
extern inline void* aligned_vector_resize(AlignedVector* vector, const uint32_t element_count);
extern inline void* aligned_vector_extend(AlignedVector* vector, const uint32_t additional_count);
extern inline void* aligned_vector_reserve(AlignedVector* vector, uint32_t element_count);
extern inline void* aligned_vector_push_back(AlignedVector* vector, const void* objs, uint32_t count);
void aligned_vector_init(AlignedVector* vector, unsigned int element_size) {
vector->size = vector->capacity = 0;
vector->element_size = element_size;
void aligned_vector_init(AlignedVector* vector, uint32_t element_size) {
/* Now initialize the header*/
AlignedVectorHeader* const hdr = &vector->hdr;
hdr->size = 0;
hdr->capacity = ALIGNED_VECTOR_CHUNK_SIZE;
hdr->element_size = element_size;
vector->data = NULL;
/* Reserve some initial capacity */
aligned_vector_reserve(vector, ALIGNED_VECTOR_CHUNK_SIZE);
/* Reserve some initial capacity. This will do the allocation but not set up the header */
void* ptr = aligned_vector_reserve(vector, ALIGNED_VECTOR_CHUNK_SIZE);
assert(ptr);
(void) ptr;
}
void aligned_vector_shrink_to_fit(AlignedVector* vector) {
if(vector->size == 0) {
AlignedVectorHeader* const hdr = &vector->hdr;
if(hdr->size == 0) {
uint32_t element_size = hdr->element_size;
free(vector->data);
/* Reallocate the header */
vector->data = NULL;
vector->capacity = 0;
hdr->size = hdr->capacity = 0;
hdr->element_size = element_size;
} else {
unsigned int new_byte_size = vector->size * vector->element_size;
unsigned char* original_data = vector->data;
uint32_t new_byte_size = (hdr->size * hdr->element_size);
uint8_t* original_data = vector->data;
vector->data = (unsigned char*) memalign(0x20, new_byte_size);
if(original_data) {
FASTCPY(vector->data, original_data, new_byte_size);
free(original_data);
}
vector->capacity = vector->size;
hdr->capacity = hdr->size;
}
}

View File

@ -4,6 +4,7 @@
#include <stdlib.h>
#include <string.h>
#include <stdint.h>
#include <stdio.h>
#ifdef __cplusplus
extern "C" {
@ -12,6 +13,7 @@ extern "C" {
#if defined(__APPLE__) || defined(__WIN32__)
/* Linux + Kos define this, OSX does not, so just use malloc there */
static inline void* memalign(size_t alignment, size_t size) {
(void) alignment;
return malloc(size);
}
#else
@ -65,10 +67,14 @@ AV_FORCE_INLINE void *AV_MEMCPY4(void *dest, const void *src, size_t len)
#endif
typedef struct {
uint8_t* __attribute__((aligned(32))) data;
uint32_t size;
uint32_t capacity;
uint32_t element_size;
} __attribute__((aligned(32))) AlignedVectorHeader;
typedef struct {
AlignedVectorHeader hdr;
uint8_t* data;
} AlignedVector;
#define ALIGNED_VECTOR_CHUNK_SIZE 256u
@ -78,90 +84,137 @@ typedef struct {
((((v) + ALIGNED_VECTOR_CHUNK_SIZE - 1) / ALIGNED_VECTOR_CHUNK_SIZE) * ALIGNED_VECTOR_CHUNK_SIZE)
void aligned_vector_init(AlignedVector* vector, unsigned int element_size);
void aligned_vector_init(AlignedVector* vector, uint32_t element_size);
AV_FORCE_INLINE void* aligned_vector_reserve(AlignedVector* vector, unsigned int element_count) {
if(element_count <= vector->capacity) {
return NULL;
AV_FORCE_INLINE void* aligned_vector_at(const AlignedVector* vector, const uint32_t index) {
const AlignedVectorHeader* hdr = &vector->hdr;
assert(index < hdr->size);
return vector->data + (index * hdr->element_size);
}
AV_FORCE_INLINE void* aligned_vector_reserve(AlignedVector* vector, uint32_t element_count) {
AlignedVectorHeader* hdr = &vector->hdr;
if(element_count < hdr->capacity) {
return aligned_vector_at(vector, element_count);
}
unsigned int original_byte_size = vector->size * vector->element_size;
uint32_t original_byte_size = (hdr->size * hdr->element_size);
/* We overallocate so that we don't make small allocations during push backs */
element_count = ROUND_TO_CHUNK_SIZE(element_count);
unsigned int new_byte_size = element_count * vector->element_size;
unsigned char* original_data = vector->data;
uint32_t new_byte_size = (element_count * hdr->element_size);
uint8_t* original_data = vector->data;
vector->data = (unsigned char*) memalign(0x20, new_byte_size);
vector->data = (uint8_t*) memalign(0x20, new_byte_size);
assert(vector->data);
if(original_data) {
AV_MEMCPY4(vector->data, original_data, original_byte_size);
free(original_data);
}
vector->capacity = element_count;
AV_MEMCPY4(vector->data, original_data, original_byte_size);
free(original_data);
hdr->capacity = element_count;
return vector->data + original_byte_size;
}
AV_FORCE_INLINE void* aligned_vector_at(const AlignedVector* vector, const unsigned int index) {
assert(index < vector->size);
return &vector->data[index * vector->element_size];
AV_FORCE_INLINE AlignedVectorHeader* aligned_vector_header(const AlignedVector* vector) {
return (AlignedVectorHeader*) &vector->hdr;
}
AV_FORCE_INLINE void* aligned_vector_resize(AlignedVector* vector, const unsigned int element_count) {
AV_FORCE_INLINE uint32_t aligned_vector_size(const AlignedVector* vector) {
const AlignedVectorHeader* hdr = &vector->hdr;
return hdr->size;
}
AV_FORCE_INLINE uint32_t aligned_vector_capacity(const AlignedVector* vector) {
const AlignedVectorHeader* hdr = &vector->hdr;
return hdr->capacity;
}
AV_FORCE_INLINE void* aligned_vector_front(const AlignedVector* vector) {
return vector->data;
}
#define av_assert(x) \
do {\
if(!(x)) {\
fprintf(stderr, "Assertion failed at %s:%d\n", __FILE__, __LINE__);\
exit(1);\
}\
} while(0); \
/* Resizes the array and returns a pointer to the first new element (if upsizing) or NULL (if downsizing) */
AV_FORCE_INLINE void* aligned_vector_resize(AlignedVector* vector, const uint32_t element_count) {
void* ret = NULL;
unsigned int previousCount = vector->size;
if(vector->capacity < element_count) {
AlignedVectorHeader* hdr = &vector->hdr;
uint32_t previous_count = hdr->size;
if(hdr->capacity <= element_count) {
/* If we didn't have capacity, increase capacity (slow) */
vector->size = element_count;
ret = aligned_vector_reserve(vector, element_count);
} else if(previousCount < element_count) {
aligned_vector_reserve(vector, element_count);
hdr->size = element_count;
ret = aligned_vector_at(vector, previous_count);
av_assert(hdr->size == element_count);
av_assert(hdr->size <= hdr->capacity);
} else if(previous_count < element_count) {
/* So we grew, but had the capacity, just get a pointer to
* where we were */
vector->size = element_count;
ret = aligned_vector_at(vector, previousCount);
} else {
vector->size = element_count;
hdr->size = element_count;
av_assert(hdr->size < hdr->capacity);
ret = aligned_vector_at(vector, previous_count);
} else if(hdr->size != element_count) {
hdr->size = element_count;
av_assert(hdr->size < hdr->capacity);
}
return ret;
}
AV_FORCE_INLINE void* aligned_vector_push_back(AlignedVector* vector, const void* objs, unsigned int count) {
AV_FORCE_INLINE void* aligned_vector_push_back(AlignedVector* vector, const void* objs, uint32_t count) {
/* Resize enough room */
AlignedVectorHeader* hdr = &vector->hdr;
assert(count);
assert(vector->element_size);
assert(hdr->element_size);
unsigned int initial_size = vector->size;
aligned_vector_resize(vector, vector->size + count);
#ifndef NDEBUG
uint32_t element_size = hdr->element_size;
uint32_t initial_size = hdr->size;
#endif
assert(vector->size == initial_size + count);
unsigned char* dest = vector->data + (vector->element_size * initial_size);
uint8_t* dest = (uint8_t*) aligned_vector_resize(vector, hdr->size + count);
assert(dest);
/* Copy the objects in */
AV_MEMCPY4(dest, objs, vector->element_size * count);
AV_MEMCPY4(dest, objs, hdr->element_size * count);
assert(hdr->element_size == element_size);
assert(hdr->size == initial_size + count);
return dest;
}
AV_FORCE_INLINE void* aligned_vector_extend(AlignedVector* vector, const unsigned int additional_count) {
return aligned_vector_resize(vector, vector->size + additional_count);
AV_FORCE_INLINE void* aligned_vector_extend(AlignedVector* vector, const uint32_t additional_count) {
AlignedVectorHeader* hdr = &vector->hdr;
void* ret = aligned_vector_resize(vector, hdr->size + additional_count);
assert(ret); // Should always return something
return ret;
}
AV_FORCE_INLINE void aligned_vector_clear(AlignedVector* vector){
vector->size = 0;
AlignedVectorHeader* hdr = &vector->hdr;
hdr->size = 0;
}
void aligned_vector_shrink_to_fit(AlignedVector* vector);
void aligned_vector_cleanup(AlignedVector* vector);
static inline void* aligned_vector_back(AlignedVector* vector){
return aligned_vector_at(vector, vector->size - 1);
AV_FORCE_INLINE void* aligned_vector_back(AlignedVector* vector){
AlignedVectorHeader* hdr = &vector->hdr;
return aligned_vector_at(vector, hdr->size ? hdr->size - 1 : 0);
}
#ifdef __cplusplus

View File

@ -68,7 +68,6 @@ void* named_array_reserve(NamedArray* array, unsigned int id) {
void named_array_release(NamedArray* array, unsigned int new_id) {
unsigned int i = new_id / 8;
unsigned int j = new_id % 8;
array->used_markers[i] &= (unsigned char) ~(1 << j);
}

View File

@ -19,6 +19,10 @@ __BEGIN_DECLS
#include <math.h>
#if __STDCPP_FLOAT16_T__
#include <stdfloat>
#endif
/* Primitive Types taken from GL for compatability */
/* Not all types are implemented in Open GL DC V.1.0 */
#define GL_POINTS 0x0000
@ -305,12 +309,13 @@ __BEGIN_DECLS
#define GL_UNSIGNED_INT 0x1405
#define GL_FLOAT 0x1406
#define GL_DOUBLE 0x140A
#define GL_HALF_FLOAT 0x140B
#define GL_2_BYTES 0x1407
#define GL_3_BYTES 0x1408
#define GL_4_BYTES 0x1409
/* ErrorCode */
#define GL_NO_ERROR 0
#define GL_NO_ERROR ((GLenum) 0)
#define GL_INVALID_ENUM 0x0500
#define GL_INVALID_VALUE 0x0501
#define GL_INVALID_OPERATION 0x0502
@ -359,7 +364,7 @@ __BEGIN_DECLS
#define GL_UNSIGNED_SHORT_5_6_5_REV 0x8364
#define GL_UNSIGNED_SHORT_4_4_4_4_REV 0x8365
#define GL_UNSIGNED_SHORT_1_5_5_5_REV 0x8366
#define GL_UNSIGNED_INT_8_8_8_8_REV 0x8367
#define GL_UNSIGNED_INT_2_10_10_10_REV 0x8368
#define GL_COLOR_INDEX 0x1900
@ -371,6 +376,32 @@ __BEGIN_DECLS
#define GL_RGBA 0x1908
#define GL_LUMINANCE 0x1909
#define GL_LUMINANCE_ALPHA 0x190A
#define GL_R3_G3_B2 0x2A10
#define GL_ALPHA4 0x803B
#define GL_ALPHA8 0x803C
#define GL_ALPHA12 0x803D
#define GL_ALPHA16 0x803E
#define GL_LUMINANCE4 0x803F
#define GL_LUMINANCE8 0x8040
#define GL_LUMINANCE12 0x8041
#define GL_LUMINANCE16 0x8042
#define GL_LUMINANCE4_ALPHA4 0x8043
#define GL_LUMINANCE6_ALPHA2 0x8044
#define GL_LUMINANCE8_ALPHA8 0x8045
#define GL_LUMINANCE12_ALPHA4 0x8046
#define GL_LUMINANCE12_ALPHA12 0x8047
#define GL_LUMINANCE16_ALPHA16 0x8048
#define GL_INTENSITY4 0x804A
#define GL_INTENSITY8 0x804B
#define GL_INTENSITY12 0x804C
#define GL_INTENSITY16 0x804D
#define GL_BGR 0x80E0
#define GL_BGRA 0x80E1
#define GL_INTENSITY 0x8049
#define GL_RGB4 0x804F
@ -387,6 +418,14 @@ __BEGIN_DECLS
#define GL_RGBA12 0x805A
#define GL_RGBA16 0x805B
#define GL_R8 0x8229
#define GL_RG8 0x822B
#define GL_RG 0x8227
#define GL_R16 0x822A
#define GL_RG16 0x822C
#define GL_COMPRESSED_RED 0x8225
#define GL_COMPRESSED_RG 0x8226
/* Polygons */
#define GL_POINT 0x1B00
#define GL_LINE 0x1B01
@ -427,6 +466,12 @@ __BEGIN_DECLS
#define GL_FALSE 0
#define GL_TRUE 1
#if __STDCPP_FLOAT16_T__
#define GLhalf std::float16_t
#else
#define GLhalf unsigned short
#endif
/* Stubs for portability */
#define GL_LINE_SMOOTH 0x0B20
#define GL_ALPHA_TEST 0x0BC0

View File

@ -130,7 +130,7 @@ GLAPI void APIENTRY glGenFramebuffersEXT(GLsizei n, GLuint* framebuffers);
GLAPI void APIENTRY glDeleteFramebuffersEXT(GLsizei n, const GLuint* framebuffers);
GLAPI void APIENTRY glBindFramebufferEXT(GLenum target, GLuint framebuffer);
GLAPI void APIENTRY glFramebufferTexture2DEXT(GLenum target, GLenum attachment, GLenum textarget, GLuint texture, GLint level);
GLAPI void APIENTRY glGenerateMipmapEXT(GLenum target);
GLAPI void APIENTRY glGenerateMipmap(GLenum target);
GLAPI GLenum APIENTRY glCheckFramebufferStatusEXT(GLenum target);
GLAPI GLboolean APIENTRY glIsFramebufferEXT(GLuint framebuffer);
@ -203,7 +203,7 @@ GLAPI void APIENTRY glCompressedTexImage2DARB(GLenum target,
#define glClientActiveTexture glClientActiveTextureARB
#define glMultiTexCoord2f glMultiTexCoord2fARB
#define glGenerateMipmap glGenerateMipmapEXT
#define glGenerateMipmapEXT glGenerateMipmap
#define glCompressedTexImage2D glCompressedTexImage2DARB
#ifndef GL_VERSION_1_4

View File

@ -35,8 +35,6 @@ extern const char* GLDC_VERSION;
#define GL_NEARZ_CLIPPING_KOS 0xEEFA
#define GL_UNSIGNED_BYTE_TWID_KOS 0xEEFB
/* Initialize the GL pipeline. GL will initialize the PVR. */
GLAPI void APIENTRY glKosInit();
@ -57,6 +55,13 @@ typedef struct {
GLuint initial_pt_capacity;
GLuint initial_immediate_capacity;
/* Default: True
*
* Whether glTexImage should automatically twiddle textures
* if the internal format is a generic format (e.g. GL_RGB).
* this is the same as calling glEnable(GL_TEXTURE_TWIDDLE_KOS)
* on boot */
GLboolean texture_twiddle;
} GLdcConfig;
@ -87,7 +92,7 @@ GLAPI void APIENTRY glKosInitConfig(GLdcConfig* config);
*/
GLAPI void APIENTRY glKosInitEx(GLdcConfig* config);
GLAPI void APIENTRY glKosSwapBuffers();
GLAPI void APIENTRY glKosShutdown();
/*
* CUSTOM EXTENSION multiple_shared_palette_KOS
@ -186,12 +191,28 @@ GLAPI void APIENTRY glKosSwapBuffers();
/* Memory allocation extension (GL_KOS_texture_memory_management) */
GLAPI GLvoid APIENTRY glDefragmentTextureMemory_KOS(void);
/* glGet extensions */
#define GL_FREE_TEXTURE_MEMORY_KOS 0xEF3D
#define GL_USED_TEXTURE_MEMORY_KOS 0xEF3E
#define GL_FREE_CONTIGUOUS_TEXTURE_MEMORY_KOS 0xEF3F
//for palette internal format (glfcConfig)
#define GL_RGB565_KOS 0xEF40
#define GL_ARGB4444_KOS 0xEF41
#define GL_ARGB1555_KOS 0xEF42
#define GL_RGB565_TWID_KOS 0xEF43
#define GL_ARGB4444_TWID_KOS 0xEF44
#define GL_ARGB1555_TWID_KOS 0xEF45
#define GL_COLOR_INDEX8_TWID_KOS 0xEF46
#define GL_COLOR_INDEX4_TWID_KOS 0xEF47
#define GL_RGB_TWID_KOS 0xEF48
#define GL_RGBA_TWID_KOS 0xEF49
/* glGet extensions */
#define GL_TEXTURE_INTERNAL_FORMAT_KOS 0xEF50
/* If enabled, will twiddle texture uploads where possible */
#define GL_TEXTURE_TWIDDLE_KOS 0xEF51
__END_DECLS

446
samples/cubes/main.cpp Normal file
View File

@ -0,0 +1,446 @@
#include <cstdio>
#include <stdbool.h>
#include <stdlib.h>
#include <time.h>
#ifdef __DREAMCAST__
#include <kos.h>
float avgfps = -1;
#endif
#include "GL/gl.h"
#include "GL/glkos.h"
#include "GL/glu.h"
#include "GL/glext.h"
#define PI 3.14159265358979323846264338327950288f
#define RAD_TO_DEG 57.295779513082320876798154814105f
#define MAX_CUBES 350
float timeElapsed = 0.0f;
const float dt = 1.0f / 60.0f;
float angle = 0;
const float invAngle360 = 1.0f / 360.0f;
const float cameraDistance = 3.0f;
bool isDrawingArrays = false;
bool isBlendingEnabled = true;
bool isRunning = true;
typedef struct
{
GLubyte r;
GLubyte g;
GLubyte b;
GLubyte a;
} Color;
Color colors[] =
{
{255, 0, 0, 128},
{0, 255, 0, 128},
{0, 0, 255, 128},
{255, 255, 0, 128},
{255, 0, 255, 128},
{0, 255, 255, 128}
};
Color faceColors[24];
float cubeVertices[] =
{
// Front face
-1.0f, -1.0f, +1.0f, // vertex 0
+1.0f, -1.0f, +1.0f, // vertex 1
+1.0f, +1.0f, +1.0f, // vertex 2
-1.0f, +1.0f, +1.0f, // vertex 3
// Back face
-1.0f, -1.0f, -1.0f, // vertex 4
+1.0f, -1.0f, -1.0f, // vertex 5
+1.0f, +1.0f, -1.0f, // vertex 6
-1.0f, +1.0f, -1.0f, // vertex 7
// Top face
-1.0f, +1.0f, +1.0f, // vertex 8
+1.0f, +1.0f, +1.0f, // vertex 9
+1.0f, +1.0f, -1.0f, // vertex 10
-1.0f, +1.0f, -1.0f, // vertex 11
// Bottom face
-1.0f, -1.0f, +1.0f, // vertex 12
+1.0f, -1.0f, +1.0f, // vertex 13
+1.0f, -1.0f, -1.0f, // vertex 14
-1.0f, -1.0f, -1.0f, // vertex 15
// Right face
+1.0f, -1.0f, +1.0f, // vertex 16
+1.0f, -1.0f, -1.0f, // vertex 17
+1.0f, +1.0f, -1.0f, // vertex 18
+1.0f, +1.0f, +1.0f, // vertex 19
// Left face
-1.0f, -1.0f, +1.0f, // vertex 20
-1.0f, -1.0f, -1.0f, // vertex 21
-1.0f, +1.0f, -1.0f, // vertex 22
-1.0f, +1.0f, +1.0f // vertex 23
};
// Set up indices array
unsigned int cubeIndices[] =
{
// Front face
0, 1, 2, 3,
// Back face
4, 5, 6, 7,
// Top face
8, 9, 10, 11,
// Bottom face
12, 13, 14, 15,
// Right face
16, 17, 18, 19,
// Left face
20, 21, 22, 23
};
typedef struct
{
float r;
float x, y, z;
float vx, vy, vz;
} Cube;
Cube cubes[MAX_CUBES];
int numCubes = 0;
// Create a 4x4 identity matrix
float cubeTransformationMatrix[16] = { 1.0f, 0.0f, 0.0f, 0.0f,
0.0f, 1.0f, 0.0f, 0.0f,
0.0f, 0.0f, 1.0f, 0.0f,
0.0f, 0.0f, 0.0f, 1.0f };
void debugLog(const char* msg) {
#ifdef __DREAMCAST__
dbglog(DBG_KDEBUG, "%s\n", msg);
#else
printf("%s\n", msg);
#endif
}
void runningStats() {
#ifdef __DREAMCAST__
pvr_stats_t stats;
pvr_get_stats(&stats);
if (avgfps != -1)
avgfps = (avgfps + stats.frame_rate) * 0.5f;
else
avgfps = stats.frame_rate;
#endif
}
void avgStats() {
#ifdef __DREAMCAST__
dbglog(DBG_DEBUG, "Average frame rate: ~%f fps\n", avgfps);
#endif
}
void stats() {
#ifdef __DREAMCAST__
pvr_stats_t stats;
pvr_get_stats(&stats);
dbglog(DBG_DEBUG, "3D Stats: %d VBLs, current frame rate ~%f fps\n", stats.vbl_count, stats.frame_rate);
avgStats();
#endif
}
void addCube(float r, float x, float y, float z, float vx, float vy, float vz)
{
if (numCubes < MAX_CUBES) {
cubes[numCubes].r = r;
cubes[numCubes].x = x;
cubes[numCubes].y = y;
cubes[numCubes].z = z;
cubes[numCubes].vx = vx;
cubes[numCubes].vy = vy;
cubes[numCubes].vz = vz;
numCubes++;
}
}
void addCubeQuick(float x, float y, float z, float scale_factor)
{
addCube(0.5f * scale_factor, x, y, z, 0, 0, 0);
}
void updateCubes(float dt)
{
for (size_t i = 0; i < numCubes; i++)
{
Cube* cube = &cubes[i];
cube->x += cube->vx * dt;
cube->y += cube->vy * dt;
cube->z += cube->vz * dt;
if (cube->x < -3 || cube->x > +3) { cube->vx *= -1; }
if (cube->y < -3 || cube->y > +3) { cube->vy *= -1; }
if (cube->z < -3 || cube->z > +3) { cube->vz *= -1; }
}
}
void renderUnitCube()
{
glEnableClientState(GL_VERTEX_ARRAY);
glEnableClientState(GL_COLOR_ARRAY);
glVertexPointer(3, GL_FLOAT, 0, cubeVertices);
glColorPointer(4, GL_UNSIGNED_BYTE, 0, faceColors);
if (isDrawingArrays) {
glDrawArrays(GL_QUADS, 0, 24);
}
else {
glDrawElements(GL_QUADS, 24, GL_UNSIGNED_INT, cubeIndices);
}
glDisableClientState(GL_COLOR_ARRAY);
glDisableClientState(GL_VERTEX_ARRAY);
}
void renderCubes(float angle)
{
for (size_t i = 0; i < numCubes; i++) {
const float scale_factor = 0.05f + (i / (float)numCubes) * 0.35f;
Cube* cube = &cubes[i];
glPushMatrix(); // Save previous camera state
glMatrixMode(GL_MODELVIEW);
glTranslatef(cube->x, cube->y, cube->z);
glRotatef(angle, 1, 1, 1); // Rotate camera / object
glScalef(scale_factor, scale_factor, scale_factor); // Apply scale factor
renderUnitCube();
glPopMatrix(); // Restore previous camera state
}
}
float rnd(float Min, float Max)
{
return (Max - Min) * (float)rand() / (float)RAND_MAX + Min;
}
void initialize()
{
debugLog("Initialize video output");
glKosInit();
glClearDepth(1.0);
glDepthFunc(GL_LEQUAL);
glDepthMask(GL_TRUE);
glEnable(GL_DEPTH_TEST);
glShadeModel(GL_SMOOTH);
if (isBlendingEnabled)
{
glEnable(GL_BLEND);
}
else
{
glDisable(GL_BLEND);
}
glBlendFunc(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA);
glDisable(GL_CULL_FACE);
glViewport(0, 0, 640, 480);
glClearColor(0.0f, 0.0f, 0.3f, 1.0f);
glMatrixMode(GL_PROJECTION);
glLoadIdentity();
// Set up colors (each face has a different color)
for (int i = 0; i < 6; i++)
{
faceColors[i * 4] = colors[i];
faceColors[i * 4 + 1] = colors[i];
faceColors[i * 4 + 2] = colors[i];
faceColors[i * 4 + 3] = colors[i];
}
}
void updateTimer()
{
timeElapsed += dt;
if (timeElapsed > 10.0f)
{
stats();
timeElapsed = 0.0f;
}
}
void updateLogic()
{
updateTimer();
const int fullRot = (int)(angle * invAngle360);
angle -= fullRot * 360.0f;
angle += 50.0f * dt;
const float zoomVal = __builtin_sinf(timeElapsed) * 5.0f;
glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT);
glMatrixMode(GL_MODELVIEW);
glLoadIdentity();
// Set up the camera position and orientation
float cameraPos[] = { 0.0f, 0.0f, cameraDistance };
float cameraTarget[] = { 0.0f, 0.0f, 0.0f };
float cameraUp[] = { 0.0f, 1.0f, 0.0f };
// Move the camera
gluLookAt(cameraPos[0], cameraPos[1], cameraPos[2],
cameraTarget[0], cameraTarget[1], cameraTarget[2],
cameraUp[0], cameraUp[1], cameraUp[2]);
glTranslatef(0.0f, 0.0f, -cameraDistance + zoomVal);
// Apply cube transformation (identity matrix)
glLoadIdentity();
updateCubes(dt);
renderCubes(angle);
// Reset ModelView matrix to remove camera transformation
float matrix[16];
glGetFloatv(GL_MODELVIEW_MATRIX, matrix);
matrix[12] = 0.0f;
matrix[13] = 0.0f;
matrix[14] = 0.0f;
glMatrixMode(GL_MODELVIEW);
glLoadMatrixf(matrix);
}
void updateInput()
{
#ifdef __DREAMCAST__
static uint8_t prevButtons = 0;
maple_device_t* cont;
cont_state_t* state;
cont = maple_enum_type(0, MAPLE_FUNC_CONTROLLER);
if (cont)
{
state = (cont_state_t*)maple_dev_status(cont);
if (state && (state->buttons & CONT_START) && !(prevButtons & CONT_START))
{
isRunning = false;
}
if (state && (state->buttons & CONT_A) && !(prevButtons & CONT_A))
{
isDrawingArrays = !isDrawingArrays;
if (isDrawingArrays)
{
glClearColor(0.3f, 0.0f, 0.3f, 1.0f);
}
else
{
glClearColor(0.0f, 0.0f, 0.3f, 1.0f);
}
}
if (state && (state->buttons & CONT_B) && !(prevButtons & CONT_B))
{
isBlendingEnabled = !isBlendingEnabled;
if (isBlendingEnabled)
{
glEnable(GL_BLEND);
}
else
{
glDisable(GL_BLEND);
}
}
prevButtons = state->buttons;
}
#endif
}
void swapBuffers()
{
#ifdef __DREAMCAST__
glKosSwapBuffers();
#endif
}
int main(int argc, char* argv[])
{
initialize();
// Setup camera frustum
const float aspectRatio = 640.0f / 480.0f;
const float fov = 60;
const float zNear = 0.1f;
const float zFar = 1000.0f;
gluPerspective(fov, aspectRatio, zNear, zFar);
for (size_t i = 0; i < MAX_CUBES; i++)
{
const float r = rnd(0.1f, 0.5f);
const float x = rnd(-3.0f, 3.0f);
const float y = rnd(-3.0f, 3.0f);
const float z = rnd(-3.0f, 3.0f);
const float vx = rnd(-2.0f, 2.0f);
const float vy = rnd(-2.0f, 2.0f);
const float vz = rnd(-2.0f, 2.0f);
addCube(r, x, y, z, vx, vy, vz);
}
while (isRunning)
{
updateLogic();
updateInput();
swapBuffers();
runningStats();
}
avgStats();
return 0;
}

View File

@ -145,7 +145,7 @@ int check_start() {
void DrawCube(float x, float z) {
static float pos = 0.0f;
const static float radius = 30.0f;
static const float radius = 30.0f;
pos += 0.001f;

View File

@ -23,7 +23,11 @@ int ImageLoad(char *filename, Image *image) {
}
// seek through the bmp header, up to the width/height:
fseek(file, 18, SEEK_CUR);
fseek(file, 10, SEEK_CUR);
uint32_t offset;
fread(&offset, 4, 1, file);
fseek(file, 4, SEEK_CUR);
// read the width
if ((i = fread(&sizeX, 4, 1, file)) != 1) {
@ -65,7 +69,7 @@ int ImageLoad(char *filename, Image *image) {
}
// seek past the rest of the bitmap header.
fseek(file, 24, SEEK_CUR);
fseek(file, offset, SEEK_SET);
// read the data.
image->data = (char *) malloc(size);

View File

@ -9,7 +9,7 @@
/* A general OpenGL initialization function. Sets all of the initial parameters. */
void InitGL(int Width, int Height) // We call this right after our OpenGL window is created.
{
glClearColor(0.0f, 0.0f, 0.0f, 0.0f); // This Will Clear The Background Color To Black
glClearColor(0.0f, 0.0f, 1.0f, 0.0f); // This Will Clear The Background Color To Black
glClearDepth(1.0); // Enables Clearing Of The Depth Buffer
glDepthFunc(GL_LEQUAL); // The Type Of Depth Test To Do
glEnable(GL_DEPTH_TEST); // Enables Depth Testing
@ -20,7 +20,7 @@ void InitGL(int Width, int Height) // We call this right after our OpenG
gluPerspective(45.0f,(GLfloat)Width/(GLfloat)Height,0.1f,100.0f); // Calculate The Aspect Ratio Of The Window
glMatrixMode(GL_MODELVIEW);
glMatrixMode(GL_MODELVIEW);
}
/* The function called when our window is resized (which shouldn't happen, because we're fullscreen) */

View File

@ -53,10 +53,10 @@ void LoadGLTextures() {
// 2d texture, level of detail 0 (normal), 3 components (red, green, blue), x size from image, y size from image,
// border 0 (normal), rgb color data, unsigned byte data, and finally the data itself.
glTexImage2D(GL_TEXTURE_2D, 0, 3, image1->sizeX, image1->sizeY, 0, GL_RGB, GL_UNSIGNED_BYTE, image1->data);
glTexImage2D(GL_TEXTURE_2D, 0, GL_RGB, image1->sizeX, image1->sizeY, 0, GL_RGB, GL_UNSIGNED_BYTE, image1->data);
free(image1);
};
}
/* A general OpenGL initialization function. Sets all of the initial parameters. */
void InitGL(int Width, int Height) // We call this right after our OpenGL window is created.
@ -74,7 +74,7 @@ void InitGL(int Width, int Height) // We call this right after our OpenG
gluPerspective(45.0f,(GLfloat)Width/(GLfloat)Height,0.1f,100.0f); // Calculate The Aspect Ratio Of The Window
glMatrixMode(GL_MODELVIEW);
glMatrixMode(GL_MODELVIEW);
}
/* The function called when our window is resized (which shouldn't happen, because we're fullscreen) */

Binary file not shown.

Before

Width:  |  Height:  |  Size: 192 KiB

After

Width:  |  Height:  |  Size: 96 KiB

View File

@ -59,10 +59,10 @@ int ImageLoad(char *filename, Image *image) {
fread(&header, sizeof(header), 1, file);
GLboolean twiddled = (header.type & (1 << 25)) < 1;
GLboolean compressed = (header.type & (1 << 29)) > 0;
GLboolean mipmapped = (header.type & (1 << 30)) > 0;
GLboolean strided = (header.type & (1 << 24)) > 0;
GLboolean twiddled = (header.type & (1 << 26)) < 1;
GLboolean compressed = (header.type & (1 << 30)) > 0;
GLboolean mipmapped = (header.type & (1 << 31)) > 0;
GLboolean strided = (header.type & (1 << 25)) > 0;
GLuint format = (header.type >> 27) & 0b111;
image->data = (char *) malloc (header.size);

View File

@ -10,6 +10,8 @@
#ifdef __DREAMCAST__
#include <kos.h>
#else
#include <SDL.h>
#endif
#include <stdio.h>
@ -17,7 +19,9 @@
#include <GL/glu.h>
#include <GL/glkos.h>
#include <stdlib.h>
#include <stdbool.h>
#include <stdint.h>
#include "../loadbmp.h"
@ -84,7 +88,16 @@ void SetupWorld()
int numtriangles;
FILE *filein;
char oneline[255];
#ifdef __DREAMCAST__
filein = fopen("/rd/world.txt", "rt"); // File To Load World Data From
#else
filein = fopen("../samples/nehe10/romdisk/world.txt", "rt");
#endif
if(!filein) {
fprintf(stderr, "Failed to load world file\n");
exit(1);
}
readstr(filein,oneline);
sscanf(oneline, "NUMPOLLIES %d\n", &numtriangles);
@ -228,6 +241,13 @@ void DrawGLScene(void) {
}
int ReadController(void) {
bool start = false;
bool up = false;
bool down = false;
bool left = false;
bool right = false;
#ifdef __DREAMCAST__
maple_device_t *cont;
cont_state_t *state;
@ -241,10 +261,27 @@ int ReadController(void) {
return 0;
}
if(state->buttons & CONT_START)
return 0;
start = (state->buttons & CONT_START);
up = (state->buttons & CONT_DPAD_UP);
down = (state->buttons & CONT_DPAD_DOWN);
left = (state->buttons & CONT_DPAD_LEFT);
right = (state->buttons & CONT_DPAD_RIGHT);
if(state->buttons & CONT_DPAD_UP) {
#else
int num_keys = 0;
uint8_t* state = SDL_GetKeyboardState(&num_keys);
start = state[SDL_SCANCODE_RETURN];
up = state[SDL_SCANCODE_UP];
down = state[SDL_SCANCODE_DOWN];
left = state[SDL_SCANCODE_LEFT];
right = state[SDL_SCANCODE_RIGHT];
#endif
if(start) {
return 0;
}
if(up) {
xpos -= (float)sin(heading*piover180) * 0.05f;
zpos -= (float)cos(heading*piover180) * 0.05f;
if (walkbiasangle >= 359.0f)
@ -258,8 +295,7 @@ int ReadController(void) {
walkbias = (float)sin(walkbiasangle * piover180)/20.0f;
}
if(state->buttons & CONT_DPAD_DOWN) {
if(down) {
xpos += (float)sin(heading*piover180) * 0.05f;
zpos += (float)cos(heading*piover180) * 0.05f;
if (walkbiasangle <= 1.0f)
@ -273,18 +309,17 @@ int ReadController(void) {
walkbias = (float)sin(walkbiasangle * piover180)/20.0f;
}
if(state->buttons & CONT_DPAD_LEFT) {
if(left) {
heading += 1.0f;
yrot = heading;
}
if(state->buttons & CONT_DPAD_RIGHT) {
if(right) {
heading -= 1.0f;
yrot = heading;
}
#endif
/* Switch to the blended polygon list if needed */
if(blend) {

View File

@ -157,4 +157,4 @@ NUMPOLLIES 36
2.0 0.0 -0.5 0.0 0.0
3.0 1.0 -0.5 1.0 1.0
2.0 1.0 -0.5 0.0 1.0
2.0 0.0 -0.5 0.0 0.0
2.0 0.0 -0.5 0.0 0.0

View File

@ -132,7 +132,7 @@ void LoadGLTextures() {
// 2d texture, level of detail 0 (normal), 3 components (red, green, blue), x size from image, y size from image,
// border 0 (normal), rgb color data, unsigned byte data, and finally the data itself.
glTexImage2D(GL_TEXTURE_2D, 0, GL_COLOR_INDEX8_EXT, image1->width, image1->height, 0, GL_COLOR_INDEX, GL_UNSIGNED_BYTE_TWID_KOS, image1->data);
glTexImage2D(GL_TEXTURE_2D, 0, GL_COLOR_INDEX8_EXT, image1->width, image1->height, 0, GL_COLOR_INDEX8_TWID_KOS, GL_UNSIGNED_BYTE, image1->data);
glGenerateMipmapEXT(GL_TEXTURE_2D);
free(image1);

View File

@ -254,6 +254,8 @@ int BMP_Infos(FILE *pFile, uint32_t *width, uint32_t *height)
*width = (uint32_t)BmpInfoHeader.Width;
*height = (uint32_t)BmpInfoHeader.Height;
fseek(pFile, BmpInfoHeader.Size + 14, SEEK_SET);
return 1;
}
@ -270,6 +272,7 @@ int BMP_GetPalette(FILE *pFile)
bitCount = BmpInfoHeader.ClrImportant * sizeof(RGB_QUAD);
if (fread(BmpRgbQuad, 1, bitCount, pFile) != bitCount){
fprintf(stderr, "Failed to read palette: %d\n", bitCount);
return 0;
}
@ -281,6 +284,8 @@ int BMP_GetPalette(FILE *pFile)
}
return 1;
}
fprintf(stderr, "BitCount: %d\n", BmpInfoHeader.BitCount);
return 0;
}
@ -346,7 +351,7 @@ int LoadPalettedBMP(const char* filename, Image* image)
}
if (!BMP_GetPalette(fp)) {
printf("Only 16c BMP are supported for this sample");
printf("Only 16c BMP are supported for this sample\n");
return 0;
}
@ -429,7 +434,7 @@ void LoadGLTextures() {
#ifndef USE_16C_PALETTE
glTexImage2D(GL_TEXTURE_2D, 0, GL_COLOR_INDEX8_EXT, image1.width, image1.height, 0, GL_COLOR_INDEX, GL_UNSIGNED_BYTE, image1.data);
#else
glTexImage2D(GL_TEXTURE_2D, 0, GL_COLOR_INDEX4_EXT, image1.width, image1.height, 0, GL_COLOR_INDEX, GL_UNSIGNED_BYTE, image1.data);
glTexImage2D(GL_TEXTURE_2D, 0, GL_COLOR_INDEX4_EXT, image1.width, image1.height, 0, GL_COLOR_INDEX4_EXT, GL_UNSIGNED_BYTE, image1.data);
#endif
glBindTexture(GL_TEXTURE_2D, textures[1]); // 2d texture (x and y size)
@ -444,7 +449,7 @@ void LoadGLTextures() {
#ifndef USE_16C_PALETTE
glTexImage2D(GL_TEXTURE_2D, 0, GL_COLOR_INDEX8_EXT, image1.width, image1.height, 0, GL_COLOR_INDEX, GL_UNSIGNED_BYTE, image1.data);
#else
glTexImage2D(GL_TEXTURE_2D, 0, GL_COLOR_INDEX4_EXT, image1.width, image1.height, 0, GL_COLOR_INDEX, GL_UNSIGNED_BYTE, image1.data);
glTexImage2D(GL_TEXTURE_2D, 0, GL_COLOR_INDEX4_EXT, image1.width, image1.height, 0, GL_COLOR_INDEX4_EXT, GL_UNSIGNED_BYTE, image1.data);
#endif
glBindTexture(GL_TEXTURE_2D, textures[2]);
@ -463,7 +468,7 @@ void LoadGLTextures() {
#ifndef USE_16C_PALETTE
glTexImage2D(GL_TEXTURE_2D, 0, GL_COLOR_INDEX8_EXT, image2.width, image2.height, 0, GL_COLOR_INDEX, GL_UNSIGNED_BYTE, image2.data);
#else
glTexImage2D(GL_TEXTURE_2D, 0, GL_COLOR_INDEX4_EXT, image2.width, image2.height, 0, GL_COLOR_INDEX, GL_UNSIGNED_BYTE, image2.data);
glTexImage2D(GL_TEXTURE_2D, 0, GL_COLOR_INDEX4_EXT, image2.width, image2.height, 0, GL_COLOR_INDEX4_EXT, GL_UNSIGNED_BYTE, image2.data);
#endif
}

Binary file not shown.

Before

Width:  |  Height:  |  Size: 32 KiB

After

Width:  |  Height:  |  Size: 16 KiB

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,64 @@
#include <stddef.h>
#include <time.h>
#include <stdio.h>
#ifdef __DREAMCAST__
#include <kos.h>
#include "../profiler.h"
#endif
#include <GL/gl.h>
#include <GL/glkos.h>
#include "image.h"
#define PROFILE 0
int main(int argc, char* argv[]) {
(void) argc;
(void) argv;
fprintf(stdout, "Initializing\n");
glKosInit();
glClearColor(0.5f, 0.0f, 0.5f, 1.0f);
glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT);
glKosSwapBuffers();
GLuint texture_id = 0;
glGenTextures(1, &texture_id);
glBindTexture(GL_TEXTURE_2D, texture_id);
time_t start = time(NULL);
time_t end = start;
int counter = 0;
fprintf(stderr, "Starting test run...\n");
#ifdef __DREAMCAST__
#if PROFILE
profiler_init("/pc/gmon.out");
profiler_start();
#endif
#endif
while((end - start) < 5) {
glTexImage2D(
GL_TEXTURE_2D, 0, GL_RGB, width, height, 0, GL_RGB, GL_UNSIGNED_BYTE, header_data
);
++counter;
end = time(NULL);
}
#ifdef __DREAMCAST__
#if PROFILE
profiler_stop();
profiler_clean_up();
#endif
#endif
fprintf(stderr, "Called glTexImage2D %d times (%.4f per call)\n", counter, (float)(end - start) / (float)(counter));
return 0;
}

View File

@ -68,21 +68,16 @@ int check_start() {
}
void setup() {
//PVR needs to warm up for a frame, or results will be low
glKosInit();
GLdcConfig cfg;
glKosInitConfig(&cfg);
cfg.initial_immediate_capacity = 14000;
glKosInitEx(&cfg);
glMatrixMode(GL_MODELVIEW);
glLoadIdentity();
glOrtho(0, 640, 0, 480, -100, 100);
glMatrixMode(GL_PROJECTION);
glLoadIdentity();
glDisable(GL_NEARZ_CLIPPING_KOS);
#ifdef __DREAMCAST__
pvr_wait_ready();
pvr_scene_begin();
pvr_scene_finish();
#endif
}
void do_frame() {
@ -116,6 +111,8 @@ time_t begin;
void switch_tests(int ppf) {
printf("Beginning new test: %d polys per frame (%d per second at 60fps)\n",
ppf * 2, ppf * 2 * 60);
fflush(stdout);
avgfps = -1;
polycnt = ppf;
}
@ -128,7 +125,6 @@ void check_switch() {
if(now >= (begin + 5)) {
begin = time(NULL);
printf(" Average Frame Rate: ~%f fps (%d pps)\n", avgfps, (int)(polycnt * avgfps * 2));
switch(phase) {
case PHASE_HALVE:
@ -169,19 +165,24 @@ void check_switch() {
case PHASE_FINAL:
break;
}
fflush(stdout);
}
}
#define PROFILE 0
int main(int argc, char **argv) {
#ifndef NDEBUG
#ifdef __DREAMCAST__
#if PROFILE
profiler_init("/pc/gmon.out");
profiler_start();
#endif
#endif
setup();
#if PROFILE
profiler_start();
#endif
/* Start off with something obscene */
switch_tests(200000 / 60);
begin = time(NULL);
@ -200,11 +201,9 @@ int main(int argc, char **argv) {
stats();
#ifdef __DREAMCAST__
#ifndef NDEBUG
#if PROFILE
profiler_stop();
profiler_clean_up();
#endif
#endif
return 0;

View File

@ -28,6 +28,8 @@ void InitGL(int Width, int Height) // We call this right after our OpenG
glMatrixMode(GL_MODELVIEW);
glLoadIdentity();
glEnable(GL_CULL_FACE);
}
/* The function called when our window is resized (which shouldn't happen, because we're fullscreen) */
@ -86,12 +88,13 @@ void DrawGLScene()
rotation = (rotation > 360.0f) ? rotation - 360.0f : rotation;
glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT); // Clear The Screen And The Depth Buffer
glClearColor(0.5f, 0.5f, 0.5f, 0.5f);
glLoadIdentity(); // Reset The View
glDisable(GL_CULL_FACE);
glPushMatrix();
glTranslatef(0.0f, -1.0f, movement);
glTranslatef(0.0f, -1.0f, -movement);
glRotatef(rotation, 0.0f, 1.0f, 0.0f);
glBegin(GL_TRIANGLES);

26
tests/CMakeLists.txt Normal file
View File

@ -0,0 +1,26 @@
FILE(GLOB GL_TESTS ${CMAKE_CURRENT_SOURCE_DIR}/test_*.h)
INCLUDE_DIRECTORIES(${CMAKE_SOURCE_DIR})
SET(TEST_GENERATOR_BIN ${CMAKE_SOURCE_DIR}/tools/test_generator.py)
SET(TEST_MAIN_FILENAME ${CMAKE_CURRENT_BINARY_DIR}/main.cpp)
ADD_CUSTOM_COMMAND(
OUTPUT ${TEST_MAIN_FILENAME}
COMMAND ${TEST_GENERATOR_BIN} --output ${TEST_MAIN_FILENAME} ${TEST_FILES} ${GL_TESTS}
DEPENDS ${TEST_FILES} ${GL_TESTS} ${TEST_GENERATOR_BIN}
)
add_executable(gldc_tests ${TEST_FILES} ${TEST_SOURCES} ${TEST_MAIN_FILENAME})
target_link_libraries(gldc_tests GLdc)
if(NOT PLATFORM_DREAMCAST)
set_target_properties(
gldc_tests
PROPERTIES
COMPILE_OPTIONS "-m32"
LINK_OPTIONS "-m32"
)
endif()

189
tests/test_allocator.h Normal file
View File

@ -0,0 +1,189 @@
#include "tools/test.h"
#include <cstdint>
#include <cassert>
#include <malloc.h>
#include <utility>
#include <GL/gl.h>
#include <GL/glkos.h>
#include "GL/alloc/alloc.h"
static inline int round_up(int n, int multiple)
{
assert(multiple);
return ((n + multiple - 1) / multiple) * multiple;
}
#define POOL_SIZE (16 * 2048)
class AllocatorTests : public test::TestCase {
public:
uint8_t* pool = NULL;
std::vector<std::pair<void*, void*>> defrag_moves;
void set_up() {
pool = (uint8_t*) memalign(2048, POOL_SIZE);
assert(((intptr_t) pool) % 2048 == 0);
}
void tear_down() {
alloc_shutdown(pool);
free(pool);
}
static void on_defrag(void* src, void* dst, void* user_data) {
AllocatorTests* self = (AllocatorTests*) user_data;
self->defrag_moves.push_back(std::make_pair(src, dst));
}
void test_defrag() {
alloc_init(pool, POOL_SIZE);
alloc_malloc(pool, 256);
void* a2 = alloc_malloc(pool, 256);
void* a3 = alloc_malloc(pool, 256);
alloc_free(pool, a2);
alloc_run_defrag(pool, &AllocatorTests::on_defrag, 5, this);
assert_equal(defrag_moves.size(), 1u); // Moved a3 -> a2
assert_equal(defrag_moves[0].first, a3);
assert_equal(defrag_moves[0].second, a2);
assert_equal(alloc_malloc(pool, 256), a3);
}
void test_poor_alloc_aligned() {
/* If we try to allocate and there are no suitable aligned
* slots available, we fallback to any available unaligned slots */
alloc_init(pool, POOL_SIZE);
// Leave only space for an unaligned block
alloc_malloc(pool, (15 * 2048) - 256);
// Should work, we have space (just) but it's not aligned
void* a1 = alloc_malloc(pool, 2048 + 256);
assert_is_not_null(a1);
assert_equal(a1, pool + ((15 * 2048) - 256));
}
void test_poor_alloc_straddling() {
/*
* If we try to allocate a small block, it should not
* cross a 2048 boundary unless there is no other option */
alloc_init(pool, POOL_SIZE);
alloc_malloc(pool, (15 * 2048) - 256);
void* a1 = alloc_malloc(pool, 512);
assert_true((uintptr_t(a1) % 2048) == 0); // Should've aligned to the last 2048 block
/* Allocate the rest of the last block, this leaves a 256 block in the
* penultimate block */
alloc_malloc(pool, 1536);
alloc_free(pool, a1);
/* No choice but to straddle the boundary */
a1 = alloc_malloc(pool, 768);
}
void test_alloc_init() {
alloc_init(pool, POOL_SIZE);
void* expected_base_address = (void*) round_up((uintptr_t) pool, 2048);
assert_equal(alloc_next_available(pool, 16), expected_base_address);
assert_equal(alloc_base_address(pool), expected_base_address);
size_t expected_blocks = (
uintptr_t(pool + POOL_SIZE) -
uintptr_t(expected_base_address)
) / 2048;
assert_equal(alloc_block_count(pool), expected_blocks);
}
void test_complex_case() {
uint8_t* large_pool = (uint8_t*) malloc(8 * 1024 * 1024);
alloc_init(large_pool, 8 * 1024 * 1024);
alloc_malloc(large_pool, 262144);
alloc_malloc(large_pool, 262144);
void* a1 = alloc_malloc(large_pool, 524288);
alloc_free(large_pool, a1);
alloc_malloc(large_pool, 699056);
alloc_malloc(large_pool, 128);
alloc_shutdown(large_pool);
free(large_pool);
}
void test_complex_case2() {
uint8_t* large_pool = (uint8_t*) malloc(8 * 1024 * 1024);
alloc_init(large_pool, 8 * 1024 * 1024);
void* a1 = alloc_malloc(large_pool, 131072);
alloc_free(large_pool, a1);
alloc_malloc(large_pool, 174768);
void* a2 = alloc_malloc(large_pool, 131072);
alloc_free(large_pool, a2);
alloc_malloc(large_pool, 174768);
void* a3 = alloc_malloc(large_pool, 128);
alloc_free(large_pool, a3);
alloc_shutdown(large_pool);
free(large_pool);
}
void test_alloc_malloc() {
alloc_init(pool, POOL_SIZE);
uint8_t* base_address = (uint8_t*) alloc_base_address(pool);
void* a1 = alloc_malloc(pool, 1024);
/* First alloc should always be the base address */
assert_equal(a1, base_address);
/* An allocation of <= 2048 (well 1024) will not necessarily be at
* a 2k boundary */
void* expected_next_available = base_address + uintptr_t(1024);
assert_equal(alloc_next_available(pool, 1024), expected_next_available);
/* Requesting 2k though will force to a 2k boundary */
expected_next_available = base_address + uintptr_t(2048);
assert_equal(alloc_next_available(pool, 2048), expected_next_available);
/* Now alloc 2048 bytes, this should be on the 2k boundary */
void* a2 = alloc_malloc(pool, 2048);
assert_equal(a2, expected_next_available);
/* If we try to allocate 1k, this should go in the second half of the
* first block */
expected_next_available = base_address + uintptr_t(1024);
void* a3 = alloc_malloc(pool, 1024);
assert_equal(a3, expected_next_available);
alloc_free(pool, a1);
/* Next allocation would go in the just freed block */
expected_next_available = base_address;
assert_equal(alloc_next_available(pool, 64), expected_next_available);
/* Now allocate 14 more 2048 size blocks, the following one should
* return NULL */
for(int i = 0; i < 14; ++i) {
alloc_malloc(pool, 2048);
}
assert_is_null(alloc_malloc(pool, 2048));
/* But we should still have room in the second block for this */
assert_is_not_null(alloc_malloc(pool, 64));
}
};

77
tests/test_glteximage2d.h Normal file
View File

@ -0,0 +1,77 @@
#include "tools/test.h"
#include <stdint.h>
#include <GL/gl.h>
#include <GL/glkos.h>
class TexImage2DTests : public test::TestCase {
public:
uint8_t image_data[8 * 8 * 4] = {0};
void set_up() {
GLdcConfig config;
glKosInitConfig(&config);
config.texture_twiddle = false;
glKosInitEx(&config);
/* Init image data so each texel RGBA value matches the
* position in the array */
for(int i = 0; i < 8 * 8 * 4; i += 4) {
image_data[i + 0] = i;
image_data[i + 1] = i;
image_data[i + 2] = i;
image_data[i + 3] = i;
}
}
void tear_down() {
glKosShutdown();
}
void test_rgb_to_rgb565() {
glTexImage2D(GL_TEXTURE_2D, 0, GL_RGB, 8, 8, 0, GL_RGB, GL_UNSIGNED_BYTE, image_data);
assert_equal(glGetError(), GL_NO_ERROR);
GLint internalFormat;
glGetIntegerv(GL_TEXTURE_INTERNAL_FORMAT_KOS, &internalFormat);
assert_equal(internalFormat, GL_RGB565_KOS);
}
void test_rgb_to_rgb565_twiddle() {
glEnable(GL_TEXTURE_TWIDDLE_KOS);
glTexImage2D(GL_TEXTURE_2D, 0, GL_RGB, 8, 8, 0, GL_RGB, GL_UNSIGNED_BYTE, image_data);
glDisable(GL_TEXTURE_TWIDDLE_KOS);
assert_equal(glGetError(), GL_NO_ERROR);
GLint internalFormat;
glGetIntegerv(GL_TEXTURE_INTERNAL_FORMAT_KOS, &internalFormat);
assert_equal(internalFormat, GL_RGB565_TWID_KOS);
}
void test_rgba_to_argb4444() {
glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, 8, 8, 0, GL_RGBA, GL_UNSIGNED_BYTE, image_data);
assert_equal(glGetError(), GL_NO_ERROR);
GLint internalFormat;
glGetIntegerv(GL_TEXTURE_INTERNAL_FORMAT_KOS, &internalFormat);
assert_equal(internalFormat, GL_ARGB4444_KOS);
}
void test_rgba_to_argb4444_twiddle() {
glEnable(GL_TEXTURE_TWIDDLE_KOS);
glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, 8, 8, 0, GL_RGBA, GL_UNSIGNED_BYTE, image_data);
glDisable(GL_TEXTURE_TWIDDLE_KOS);
assert_equal(glGetError(), GL_NO_ERROR);
GLint internalFormat;
glGetIntegerv(GL_TEXTURE_INTERNAL_FORMAT_KOS, &internalFormat);
assert_equal(internalFormat, GL_ARGB4444_TWID_KOS);
}
};

637
tests/zclip/main.cpp Normal file
View File

@ -0,0 +1,637 @@
#include <cstdint>
#include <vector>
#include <cstdio>
#include <cmath>
#include <stdexcept>
#include <cassert>
#define SQ_BASE_ADDRESS 0
#define SPAN_SORT_CFG 0
#define PVR_SET(x, y) (void)(x); (void)(y)
struct Vertex {
uint32_t flags;
float xyz[3];
float uv[2];
float w;
uint8_t bgra[4];
};
struct {
float hwidth;
float x_plus_hwidth;
float hheight;
float y_plus_hheight;
} VIEWPORT = {320, 320, 240, 240};
struct VideoMode {
float height;
};
static VideoMode* GetVideoMode() {
static VideoMode mode = {320.0f};
return &mode;
}
enum GPUCommand {
GPU_CMD_POLYHDR = 0x80840000,
GPU_CMD_VERTEX = 0xe0000000,
GPU_CMD_VERTEX_EOL = 0xf0000000,
GPU_CMD_USERCLIP = 0x20000000,
GPU_CMD_MODIFIER = 0x80000000,
GPU_CMD_SPRITE = 0xA0000000
};
static std::vector<Vertex> sent;
static inline void interpolateColour(const uint32_t* a, const uint32_t* b, const float t, uint32_t* out) {
const static uint32_t MASK1 = 0x00FF00FF;
const static uint32_t MASK2 = 0xFF00FF00;
const uint32_t f2 = 256 * t;
const uint32_t f1 = 256 - f2;
*out = (((((*a & MASK1) * f1) + ((*b & MASK1) * f2)) >> 8) & MASK1) |
(((((*a & MASK2) * f1) + ((*b & MASK2) * f2)) >> 8) & MASK2);
}
static inline void _glClipEdge(const Vertex* v1, const Vertex* v2, Vertex* vout) {
/* Clipping time! */
const float d0 = v1->w + v1->xyz[2];
const float d1 = v2->w + v2->xyz[2];
const float sign = ((2.0f * (d1 < d0)) - 1.0f);
const float epsilon = -0.00001f * sign;
const float n = (d0 - d1);
const float r = (1.f / sqrtf(n * n)) * sign;
float t = fmaf(r, d0, epsilon);
vout->xyz[0] = fmaf(v2->xyz[0] - v1->xyz[0], t, v1->xyz[0]);
vout->xyz[1] = fmaf(v2->xyz[1] - v1->xyz[1], t, v1->xyz[1]);
vout->xyz[2] = fmaf(v2->xyz[2] - v1->xyz[2], t, v1->xyz[2]);
vout->w = fmaf(v2->w - v1->w, t, v1->w);
vout->uv[0] = fmaf(v2->uv[0] - v1->uv[0], t, v1->uv[0]);
vout->uv[1] = fmaf(v2->uv[1] - v1->uv[1], t, v1->uv[1]);
interpolateColour((uint32_t*) v1->bgra, (uint32_t*) v2->bgra, t, (uint32_t*) vout->bgra);
}
bool glIsVertex(const uint32_t flags) {
return flags == GPU_CMD_VERTEX_EOL || flags == GPU_CMD_VERTEX;
}
bool glIsLastVertex(const uint32_t flags) {
return flags == GPU_CMD_VERTEX_EOL;
}
void _glSubmitHeaderOrVertex(volatile uint32_t*, Vertex* vtx) {
sent.push_back(*vtx);
}
float _glFastInvert(float x) {
return (1.f / __builtin_sqrtf(x * x));
}
void _glPerspectiveDivideVertex(Vertex* vertex, const float h) {
const float f = _glFastInvert(vertex->w);
/* Convert to NDC and apply viewport */
vertex->xyz[0] = __builtin_fmaf(
VIEWPORT.hwidth, vertex->xyz[0] * f, VIEWPORT.x_plus_hwidth
);
vertex->xyz[1] = h - __builtin_fmaf(
VIEWPORT.hheight, vertex->xyz[1] * f, VIEWPORT.y_plus_hheight
);
/* Orthographic projections need to use invZ otherwise we lose
the depth information. As w == 1, and clip-space range is -w to +w
we add 1.0 to the Z to bring it into range. We add a little extra to
avoid a divide by zero.
*/
vertex->xyz[2] = (vertex->w == 1.0f) ? _glFastInvert(1.0001f + vertex->xyz[2]) : f;
}
void memcpy_vertex(Vertex* dst, Vertex* src) {
*dst = *src;
}
/* Zclipping is so difficult to get right, that self sample tests all the cases of clipping and makes sure that things work as expected */
#ifdef __DREAMCAST__
static volatile int *pvrdmacfg = (int*)0xA05F6888;
static volatile int *qacr = (int*)0xFF000038;
#else
static int pvrdmacfg[2];
static int qacr[2];
#endif
void SceneListSubmit(void* src, int n) {
/* You need at least a header, and 3 vertices to render anything */
if(n < 4) {
return;
}
const float h = GetVideoMode()->height;
PVR_SET(SPAN_SORT_CFG, 0x0);
//Set PVR DMA registers
pvrdmacfg[0] = 1;
pvrdmacfg[1] = 1;
//Set QACR registers
qacr[1] = qacr[0] = 0x11;
volatile uint32_t *d = SQ_BASE_ADDRESS;
int8_t queue_head = 0;
int8_t queue_tail = 0;
/* The most vertices ever in the queue is 5 (as some clipping operations
* produce and additional couple of vertice, but we add one more so the ring buffer doesn't
* trip over itself (e.g. if tail == head we can guarantee it's empty, not full) */
Vertex __attribute__((aligned(32))) queue[4];
const int queue_capacity = sizeof(queue) / sizeof(Vertex);
Vertex* vertex = (Vertex*) src;
uint32_t visible_mask = 0;
#if CLIP_DEBUG
for(int i = 0; i < n; ++i) {
fprintf(stderr, "{%f, %f, %f, %f}, // %x (%x)\n", vertex[i].xyz[0], vertex[i].xyz[1], vertex[i].xyz[2], vertex[i].w, vertex[i].flags, &vertex[i]);
}
fprintf(stderr, "----\n");
#endif
while(n--) {
bool last_vertex = false;
memcpy_vertex(queue + queue_tail, vertex);
++vertex;
switch(queue[queue_tail].flags) {
case GPU_CMD_POLYHDR:
_glSubmitHeaderOrVertex(d, &queue[queue_tail]);
break;
case GPU_CMD_VERTEX_EOL:
last_vertex = true; // fallthru
case GPU_CMD_VERTEX:
visible_mask = (visible_mask >> 1) | (queue[queue_tail].xyz[2] >= -queue[queue_tail].w) << 2;
assert(visible_mask < 15);
queue_tail = (queue_tail + 1) % queue_capacity;
default:
break;
}
int counter = (queue_tail - queue_head + queue_capacity) % queue_capacity;
if(counter < 3) {
continue;
}
#if CLIP_DEBUG
fprintf(stderr, "%d\n", visible_mask);
#endif
Vertex __attribute__((aligned(32))) a, b; // Scratch vertices
switch(visible_mask) {
case 0:
break;
case 7:
/* All visible, push the first vertex and move on */
_glPerspectiveDivideVertex(&queue[queue_head], h);
_glSubmitHeaderOrVertex(d, &queue[queue_head]);
if(last_vertex) {
/* If this was the last vertex in the strip, we need to flush the queue and then
restart it again */
int v1 = (queue_head + 1) % queue_capacity;
int v2 = (queue_head + 2) % queue_capacity;
_glPerspectiveDivideVertex(&queue[v1], h);
_glSubmitHeaderOrVertex(d, &queue[v1]);
_glPerspectiveDivideVertex(&queue[v2], h);
_glSubmitHeaderOrVertex(d, &queue[v2]);
}
break;
case 1:
/* First vertex was visible */
{
Vertex* v0 = &queue[queue_head];
Vertex* v1 = &queue[(queue_head + 1) % queue_capacity];
Vertex* v2 = &queue[(queue_head + 2) % queue_capacity];
_glClipEdge(v0, v1, &a);
_glClipEdge(v2, v0, &b);
a.flags = GPU_CMD_VERTEX;
/* If v2 was the last in the strip, then b should be. If it wasn't
we'll create a degenerate triangle by adding b twice in a row so that the
strip processing will continue correctly after crossing the plane so it can
cross back*/
b.flags = v2->flags;
_glPerspectiveDivideVertex(v0, h);
_glPerspectiveDivideVertex(&a, h);
_glPerspectiveDivideVertex(&b, h);
_glSubmitHeaderOrVertex(d, v0);
_glSubmitHeaderOrVertex(d, &a);
_glSubmitHeaderOrVertex(d, &b);
_glSubmitHeaderOrVertex(d, &b);
}
break;
case 2:
/* Second vertex was visible. In self case we need to create a triangle and produce
two new vertices: 1-2, and 2-3. */
{
Vertex* v0 = &queue[queue_head];
const Vertex* v1 = &queue[(queue_head + 1) % queue_capacity];
const Vertex* v2 = &queue[(queue_head + 2) % queue_capacity];
_glClipEdge(v0, v1, &a);
_glClipEdge(v1, v2, &b);
a.flags = GPU_CMD_VERTEX;
b.flags = v2->flags;
_glPerspectiveDivideVertex(v0, h);
_glPerspectiveDivideVertex(&a, h);
_glPerspectiveDivideVertex(&b, h);
_glSubmitHeaderOrVertex(d, &a);
_glSubmitHeaderOrVertex(d, v0);
_glSubmitHeaderOrVertex(d, &b);
}
break;
case 3: /* First and second vertex were visible */
{
Vertex* v0 = &queue[queue_head];
Vertex __attribute__((aligned(32))) v1 = queue[(queue_head + 1) % queue_capacity];
Vertex* v2 = &queue[(queue_head + 2) % queue_capacity];
_glClipEdge(&v1, v2, &a);
_glClipEdge(v2, v0, &b);
a.flags = v2->flags;
b.flags = GPU_CMD_VERTEX;
_glPerspectiveDivideVertex(v0, h);
_glPerspectiveDivideVertex(&v1, h);
_glPerspectiveDivideVertex(&a, h);
_glPerspectiveDivideVertex(&b, h);
_glSubmitHeaderOrVertex(d, v0);
_glSubmitHeaderOrVertex(d, &v1);
_glSubmitHeaderOrVertex(d, &b);
_glSubmitHeaderOrVertex(d, &v1);
_glSubmitHeaderOrVertex(d, &a);
}
break;
case 4:
/* Third vertex was visible. */
{
Vertex* v0 = &queue[queue_head];
Vertex* v1 = &queue[(queue_head + 1) % queue_capacity];
Vertex __attribute__((aligned(32))) v2 = queue[(queue_head + 2) % queue_capacity];
_glClipEdge(&v2, v0, &a);
_glClipEdge(v1, &v2, &b);
a.flags = GPU_CMD_VERTEX;
b.flags = GPU_CMD_VERTEX;
_glPerspectiveDivideVertex(&v2, h);
_glPerspectiveDivideVertex(&a, h);
_glPerspectiveDivideVertex(&b, h);
_glSubmitHeaderOrVertex(d, &a);
_glSubmitHeaderOrVertex(d, &a);
_glSubmitHeaderOrVertex(d, &b);
_glSubmitHeaderOrVertex(d, &v2);
}
break;
case 5: /* First and third vertex were visible */
{
Vertex* v0 = &queue[queue_head];
Vertex* v1 = &queue[(queue_head + 1) % queue_capacity];
Vertex __attribute__((aligned(32))) v2 = queue[(queue_head + 2) % queue_capacity];
_glClipEdge(v0, v1, &a);
_glClipEdge(v1, &v2, &b);
a.flags = GPU_CMD_VERTEX;
b.flags = GPU_CMD_VERTEX;
_glPerspectiveDivideVertex(v0, h);
_glPerspectiveDivideVertex(&v2, h);
_glPerspectiveDivideVertex(&a, h);
_glPerspectiveDivideVertex(&b, h);
_glSubmitHeaderOrVertex(d, v0);
_glSubmitHeaderOrVertex(d, &a);
uint32_t v2_flags = v2.flags;
v2.flags = GPU_CMD_VERTEX;
_glSubmitHeaderOrVertex(d, &v2);
v2.flags = v2_flags;
_glSubmitHeaderOrVertex(d, &b);
_glSubmitHeaderOrVertex(d, &v2);
}
break;
case 6: /* Second and third vertex were visible */
{
Vertex* v0 = &queue[queue_head];
Vertex __attribute__((aligned(32))) v1 = queue[(queue_head + 1) % queue_capacity];
Vertex __attribute__((aligned(32))) v2 = queue[(queue_head + 2) % queue_capacity];
_glClipEdge(v0, &v1, &a);
_glClipEdge(&v2, v0, &b);
a.flags = GPU_CMD_VERTEX;
b.flags = GPU_CMD_VERTEX;
_glPerspectiveDivideVertex(&v1, h);
_glPerspectiveDivideVertex(&v2, h);
_glPerspectiveDivideVertex(&a, h);
_glPerspectiveDivideVertex(&b, h);
_glSubmitHeaderOrVertex(d, &a);
_glSubmitHeaderOrVertex(d, &v1);
_glSubmitHeaderOrVertex(d, &b);
_glSubmitHeaderOrVertex(d, &v1);
_glSubmitHeaderOrVertex(d, &v2);
}
break;
default:
break;
}
if(last_vertex) {
visible_mask = queue_head = queue_tail = 0;
} else {
queue_head = (queue_head + 1) % queue_capacity;
}
}
}
struct VertexTmpl {
VertexTmpl(float x, float y, float z, float w):
x(x), y(y), z(z), w(w) {}
float x, y, z, w;
};
std::vector<Vertex> make_vertices(const std::vector<VertexTmpl>& verts) {
std::vector<Vertex> result;
Vertex r;
r.flags = GPU_CMD_POLYHDR;
result.push_back(r);
for(auto& v: verts) {
r.flags = GPU_CMD_VERTEX;
r.xyz[0] = v.x;
r.xyz[1] = v.y;
r.xyz[2] = v.z;
r.uv[0] = 0.0f;
r.uv[1] = 0.0f;
r.w = v.w;
result.push_back(r);
}
result.back().flags = GPU_CMD_VERTEX_EOL;
return result;
}
template<typename T, typename U>
void check_equal(const T& lhs, const U& rhs) {
if(lhs != rhs) {
throw std::runtime_error("Assertion failed");
}
}
template<>
void check_equal(const Vertex& lhs, const Vertex& rhs) {
if(lhs.xyz[0] != rhs.xyz[0] ||
lhs.xyz[1] != rhs.xyz[1] ||
lhs.xyz[2] != rhs.xyz[2] ||
lhs.w != rhs.w) {
throw std::runtime_error("Assertion failed");
}
}
bool test_clip_case_001() {
/* The first vertex is visible only */
sent.clear();
auto data = make_vertices({
{0.000000, -2.414213, 3.080808, 5.000000},
{-4.526650, -2.414213, -7.121212, -5.000000},
{4.526650, -2.414213, -7.121212, -5.000000}
});
SceneListSubmit(&data[0], data.size());
check_equal(sent.size(), 5);
check_equal(sent[0].flags, GPU_CMD_POLYHDR);
check_equal(sent[1].flags, GPU_CMD_VERTEX);
check_equal(sent[2].flags, GPU_CMD_VERTEX);
// Because we're sending a single triangle, we end up sending a
// degenerate final vert. But if we were sending more than one triangle
// this would be GPU_CMD_VERTEX twice
check_equal(sent[3].flags, GPU_CMD_VERTEX_EOL);
check_equal(sent[4].flags, GPU_CMD_VERTEX_EOL);
check_equal(sent[3], sent[4]);
return true;
}
bool test_clip_case_010() {
/* The third vertex is visible only */
sent.clear();
auto data = make_vertices({
{-4.526650, -2.414213, -7.121212, -5.000000},
{0.000000, -2.414213, 3.080808, 5.000000},
{4.526650, -2.414213, -7.121212, -5.000000}
});
SceneListSubmit(&data[0], data.size());
check_equal(sent.size(), 4);
check_equal(sent[0].flags, GPU_CMD_POLYHDR);
check_equal(sent[1].flags, GPU_CMD_VERTEX);
check_equal(sent[2].flags, GPU_CMD_VERTEX);
check_equal(sent[3].flags, GPU_CMD_VERTEX_EOL);
return true;
}
bool test_clip_case_100() {
/* The third vertex is visible only */
sent.clear();
auto data = make_vertices({
{-4.526650, -2.414213, -7.121212, -5.000000},
{4.526650, -2.414213, -7.121212, -5.000000},
{0.000000, -2.414213, 3.080808, 5.000000}
});
SceneListSubmit(&data[0], data.size());
check_equal(sent.size(), 5);
check_equal(sent[0].flags, GPU_CMD_POLYHDR);
check_equal(sent[1].flags, GPU_CMD_VERTEX);
check_equal(sent[2].flags, GPU_CMD_VERTEX);
// Because we're sending a single triangle, we end up sending a
// degenerate final vert. But if we were sending more than one triangle
// this would be GPU_CMD_VERTEX twice
check_equal(sent[3].flags, GPU_CMD_VERTEX);
check_equal(sent[4].flags, GPU_CMD_VERTEX_EOL);
check_equal(sent[1], sent[2]);
return true;
}
bool test_clip_case_110() {
/* 2nd and 3rd visible */
sent.clear();
auto data = make_vertices({
{0.0, -2.414213, -7.121212, -5.000000},
{-4.526650, -2.414213, 3.080808, 5.000000},
{4.526650, -2.414213, 3.080808, 5.000000}
});
SceneListSubmit(&data[0], data.size());
check_equal(sent.size(), 6);
check_equal(sent[0].flags, GPU_CMD_POLYHDR);
check_equal(sent[1].flags, GPU_CMD_VERTEX);
check_equal(sent[2].flags, GPU_CMD_VERTEX);
check_equal(sent[3].flags, GPU_CMD_VERTEX);
check_equal(sent[4].flags, GPU_CMD_VERTEX);
check_equal(sent[5].flags, GPU_CMD_VERTEX_EOL);
check_equal(sent[2], sent[4]);
return true;
}
bool test_clip_case_011() {
/* 1st and 2nd visible */
sent.clear();
auto data = make_vertices({
{-4.526650, -2.414213, 3.080808, 5.000000},
{4.526650, -2.414213, 3.080808, 5.000000},
{0.0, -2.414213, -7.121212, -5.000000}
});
SceneListSubmit(&data[0], data.size());
check_equal(sent.size(), 6);
check_equal(sent[0].flags, GPU_CMD_POLYHDR);
check_equal(sent[1].flags, GPU_CMD_VERTEX);
check_equal(sent[2].flags, GPU_CMD_VERTEX);
check_equal(sent[3].flags, GPU_CMD_VERTEX);
check_equal(sent[4].flags, GPU_CMD_VERTEX);
check_equal(sent[5].flags, GPU_CMD_VERTEX_EOL);
check_equal(sent[2], sent[4]);
return true;
}
bool test_clip_case_101() {
/* 1st and 3rd visible */
sent.clear();
auto data = make_vertices({
{-4.526650, -2.414213, 3.080808, 5.000000},
{0.0, -2.414213, -7.121212, -5.000000},
{4.526650, -2.414213, 3.080808, 5.000000},
});
SceneListSubmit(&data[0], data.size());
check_equal(sent.size(), 6);
check_equal(sent[0].flags, GPU_CMD_POLYHDR);
check_equal(sent[1].flags, GPU_CMD_VERTEX);
check_equal(sent[2].flags, GPU_CMD_VERTEX);
check_equal(sent[3].flags, GPU_CMD_VERTEX);
check_equal(sent[4].flags, GPU_CMD_VERTEX);
check_equal(sent[5].flags, GPU_CMD_VERTEX_EOL);
check_equal(sent[3], sent[5]);
return true;
}
bool test_clip_case_111() {
/* 1st and 3rd visible */
sent.clear();
auto data = make_vertices({
{-4.526650, -2.414213, 3.080808, 5.000000},
{0.0, -2.414213, -7.121212, 8.000000},
{4.526650, -2.414213, 3.080808, 5.000000},
});
SceneListSubmit(&data[0], data.size());
check_equal(sent.size(), 4);
check_equal(sent[0].flags, GPU_CMD_POLYHDR);
check_equal(sent[1].flags, GPU_CMD_VERTEX);
check_equal(sent[2].flags, GPU_CMD_VERTEX);
check_equal(sent[3].flags, GPU_CMD_VERTEX_EOL);
return true;
}
bool test_start_behind() {
/* Triangle behind the plane, but the strip continues in front */
sent.clear();
auto data = make_vertices({
{-3.021717, -2.414213, -10.155344, -9.935254},
{5.915236, -2.414213, -9.354721, -9.136231},
{-5.915236, -2.414213, -0.264096, -0.063767},
{3.021717, -2.414213, 0.536527, 0.735255},
{-7.361995, -2.414213, 4.681529, 4.871976},
{1.574958, -2.414213, 5.482152, 5.670999},
});
SceneListSubmit(&data[0], data.size());
return true;
}
bool test_longer_strip() {
sent.clear();
auto data = make_vertices({
{-4.384623, -2.414213, -5.699644, -5.488456},
{4.667572, -2.414213, -5.621354, -5.410322},
{-4.667572, -2.414213, 4.319152, 4.510323},
{4.384623, -2.414213, 4.397442, 4.588456},
{-4.809045, -2.414213, 9.328549, 9.509711},
{4.243149, -2.414213, 9.406840, 9.587846},
});
SceneListSubmit(&data[0], data.size());
return true;
}
int main(int argc, char* argv[]) {
// test_clip_case_000();
test_clip_case_001();
test_clip_case_010();
test_clip_case_100();
test_clip_case_110();
test_clip_case_011();
test_clip_case_101();
test_clip_case_111();
test_start_behind();
test_longer_strip();
return 0;
}

451
tools/test.h Normal file
View File

@ -0,0 +1,451 @@
/* * Copyright (c) 2011-2017 Luke Benstead https://simulant-engine.appspot.com
*
* This file is part of Simulant.
*
* Simulant is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* Simulant is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with Simulant. If not, see <http://www.gnu.org/licenses/>.
*/
#pragma once
#include <vector>
#include <functional>
#include <stdexcept>
#include <iostream>
#include <sstream>
#include <algorithm>
#include <fstream>
#include <memory>
#define assert_equal(expected, actual) _assert_equal((expected), (actual), __FILE__, __LINE__)
#define assert_not_equal(expected, actual) _assert_not_equal((expected), (actual), __FILE__, __LINE__)
#define assert_false(actual) _assert_false((actual), __FILE__, __LINE__)
#define assert_true(actual) _assert_true((actual), __FILE__, __LINE__)
#define assert_close(expected, actual, difference) _assert_close((expected), (actual), (difference), __FILE__, __LINE__)
#define assert_is_null(actual) _assert_is_null((actual), __FILE__, __LINE__)
#define assert_is_not_null(actual) _assert_is_not_null((actual), __FILE__, __LINE__)
#define assert_raises(exception, func) _assert_raises<exception>((func), __FILE__, __LINE__)
#define assert_items_equal(expected, actual) _assert_items_equal((actual), (expected), __FILE__, __LINE__)
#define not_implemented() _not_implemented(__FILE__, __LINE__)
namespace test {
class StringFormatter {
public:
StringFormatter(const std::string& templ):
templ_(templ) { }
struct Counter {
Counter(uint32_t c): c(c) {}
uint32_t c;
};
template<typename T>
std::string format(T value) {
std::stringstream ss;
ss << value;
return _do_format(0, ss.str());
}
template<typename T>
std::string format(Counter count, T value) {
std::stringstream ss;
ss << value;
return _do_format(count.c, ss.str());
}
template<typename T, typename... Args>
std::string format(T value, const Args&... args) {
std::stringstream ss;
ss << value;
return StringFormatter(_do_format(0, ss.str())).format(Counter(1), args...);
}
template<typename T, typename... Args>
std::string format(Counter count, T value, const Args&... args) {
std::stringstream ss;
ss << value;
return StringFormatter(_do_format(count.c, ss.str())).format(Counter(count.c + 1), args...);
}
std::string _do_format(uint32_t counter, const std::string& value) {
std::stringstream ss; // Can't use to_string on all platforms
ss << counter;
const std::string to_replace = "{" + ss.str() + "}";
std::string output = templ_;
auto replace = [](std::string& str, const std::string& from, const std::string& to) -> bool {
size_t start_pos = str.find(from);
if(start_pos == std::string::npos)
return false;
str.replace(start_pos, from.length(), to);
return true;
};
replace(output, to_replace, value);
return output;
}
private:
std::string templ_;
};
class StringSplitter {
public:
StringSplitter(const std::string& str):
str_(str) {
}
std::vector<std::string> split() {
std::vector<std::string> result;
std::string buffer;
for(auto c: str_) {
if(c == '\n') {
if(!buffer.empty()) {
result.push_back(buffer);
buffer.clear();
}
} else {
buffer.push_back(c);
}
}
if(!buffer.empty()) {
result.push_back(buffer);
}
return result;
}
private:
std::string str_;
};
typedef StringFormatter _Format;
class AssertionError : public std::logic_error {
public:
AssertionError(const std::string& what):
std::logic_error(what),
file(""),
line(-1) {
}
AssertionError(const std::pair<std::string, int> file_and_line, const std::string& what):
std::logic_error(what),
file(file_and_line.first),
line(file_and_line.second) {
}
~AssertionError() noexcept (true) {
}
std::string file;
int line;
};
class NotImplementedError: public std::logic_error {
public:
NotImplementedError(const std::string& file, int line):
std::logic_error(_Format("Not implemented at {0}:{1}").format(file, line)) {}
};
class SkippedTestError: public std::logic_error {
public:
SkippedTestError(const std::string& reason):
std::logic_error(reason) {
}
};
class TestCase {
public:
virtual ~TestCase() {}
virtual void set_up() {}
virtual void tear_down() {}
void skip_if(const bool& flag, const std::string& reason) {
if(flag) { throw test::SkippedTestError(reason); }
}
template<typename T, typename U>
void _assert_equal(T expected, U actual, std::string file, int line) {
if(expected != actual) {
auto file_and_line = std::make_pair(file, line);
throw test::AssertionError(file_and_line, test::_Format("{0} does not match {1}").format(actual, expected));
}
}
template<typename T, typename U>
void _assert_not_equal(T lhs, U rhs, std::string file, int line) {
if(lhs == (T) rhs) {
auto file_and_line = std::make_pair(file, line);
throw test::AssertionError(file_and_line, test::_Format("{0} should not match {1}").format(lhs, rhs));
}
}
template<typename T>
void _assert_true(T actual, std::string file, int line) {
if(!bool(actual)) {
auto file_and_line = std::make_pair(file, line);
throw test::AssertionError(file_and_line, test::_Format("{0} is not true").format(bool(actual) ? "true" : "false"));
}
}
template<typename T>
void _assert_false(T actual, std::string file, int line) {
if(bool(actual)) {
auto file_and_line = std::make_pair(file, line);
throw test::AssertionError(file_and_line, test::_Format("{0} is not false").format(bool(actual) ? "true" : "false"));
}
}
template<typename T, typename U, typename V>
void _assert_close(T expected, U actual, V difference, std::string file, int line) {
if(actual < expected - difference ||
actual > expected + difference) {
auto file_and_line = std::make_pair(file, line);
throw test::AssertionError(file_and_line, test::_Format("{0} is not close enough to {1}").format(actual, expected));
}
}
template<typename T>
void _assert_is_null(T* thing, std::string file, int line) {
if(thing != nullptr) {
auto file_and_line = std::make_pair(file, line);
throw test::AssertionError(file_and_line, "Pointer was not NULL");
}
}
template<typename T>
void _assert_is_not_null(T* thing, std::string file, int line) {
if(thing == nullptr) {
auto file_and_line = std::make_pair(file, line);
throw test::AssertionError(file_and_line, "Pointer was unexpectedly NULL");
}
}
template<typename T, typename Func>
void _assert_raises(Func func, std::string file, int line) {
try {
func();
auto file_and_line = std::make_pair(file, line);
throw test::AssertionError(file_and_line, test::_Format("Expected exception ({0}) was not thrown").format(typeid(T).name()));
} catch(T& e) {}
}
template<typename T, typename U>
void _assert_items_equal(const T& lhs, const U& rhs, std::string file, int line) {
auto file_and_line = std::make_pair(file, line);
if(lhs.size() != rhs.size()) {
throw test::AssertionError(file_and_line, "Containers are not the same length");
}
for(auto item: lhs) {
if(std::find(rhs.begin(), rhs.end(), item) == rhs.end()) {
throw test::AssertionError(file_and_line, test::_Format("Container does not contain {0}").format(item));
}
}
}
void _not_implemented(std::string file, int line) {
throw test::NotImplementedError(file, line);
}
};
class TestRunner {
public:
template<typename T, typename U>
void register_case(std::vector<U> methods, std::vector<std::string> names) {
std::shared_ptr<TestCase> instance = std::make_shared<T>();
instances_.push_back(instance); //Hold on to it
for(std::string name: names) {
names_.push_back(name);
}
for(U& method: methods) {
std::function<void()> func = std::bind(method, dynamic_cast<T*>(instance.get()));
tests_.push_back([=]() {
instance->set_up();
try {
func();
} catch(...) {
instance->tear_down();
throw;
}
instance->tear_down();
});
}
}
int32_t run(const std::string& test_case, const std::string& junit_output="") {
int failed = 0;
int skipped = 0;
int ran = 0;
int crashed = 0;
auto new_tests = tests_;
auto new_names = names_;
if(!test_case.empty()) {
new_tests.clear();
new_names.clear();
for(uint32_t i = 0; i < names_.size(); ++i) {
if(names_[i].find(test_case) == 0) {
new_tests.push_back(tests_[i]);
new_names.push_back(names_[i]);
}
}
}
std::cout << std::endl << "Running " << new_tests.size() << " tests" << std::endl << std::endl;
std::vector<std::string> junit_lines;
junit_lines.push_back("<testsuites>\n");
std::string klass = "";
for(std::function<void ()> test: new_tests) {
std::string name = new_names[ran];
std::string this_klass(name.begin(), name.begin() + name.find_first_of(":"));
bool close_klass = ran == (int) new_tests.size() - 1;
if(this_klass != klass) {
if(!klass.empty()) {
junit_lines.push_back(" </testsuite>\n");
}
klass = this_klass;
junit_lines.push_back(" <testsuite name=\"" + this_klass + "\">\n");
}
try {
junit_lines.push_back(" <testcase name=\"" + new_names[ran] + "\">\n");
std::string output = " " + new_names[ran];
for(int i = output.length(); i < 76; ++i) {
output += " ";
}
std::cout << output;
test();
std::cout << "\033[32m" << " OK " << "\033[0m" << std::endl;
junit_lines.push_back(" </testcase>\n");
} catch(test::NotImplementedError& e) {
std::cout << "\033[34m" << " SKIPPED" << "\033[0m" << std::endl;
++skipped;
junit_lines.push_back(" </testcase>\n");
} catch(test::SkippedTestError& e) {
std::cout << "\033[34m" << " SKIPPED" << "\033[0m" << std::endl;
++skipped;
junit_lines.push_back(" </testcase>\n");
} catch(test::AssertionError& e) {
std::cout << "\033[33m" << " FAILED " << "\033[0m" << std::endl;
std::cout << " " << e.what() << std::endl;
if(!e.file.empty()) {
std::cout << " " << e.file << ":" << e.line << std::endl;
std::ifstream ifs(e.file);
if(ifs.good()) {
std::string buffer;
std::vector<std::string> lines;
while(std::getline(ifs, buffer)) {
lines.push_back(buffer);
}
int line_count = lines.size();
if(line_count && e.line <= line_count) {
std::cout << lines.at(e.line - 1) << std::endl << std::endl;
}
}
}
++failed;
junit_lines.push_back(" <failure message=\"" + std::string(e.what()) + "\"/>\n");
junit_lines.push_back(" </testcase>\n");
} catch(std::exception& e) {
std::cout << "\033[31m" << " EXCEPT " << std::endl;
std::cout << " " << e.what() << "\033[0m" << std::endl;
++crashed;
junit_lines.push_back(" <failure message=\"" + std::string(e.what()) + "\"/>\n");
junit_lines.push_back(" </testcase>\n");
}
std::cout << "\033[0m";
++ran;
if(close_klass) {
junit_lines.push_back(" </testsuite>\n");
}
}
junit_lines.push_back("</testsuites>\n");
if(!junit_output.empty()) {
FILE* f = fopen(junit_output.c_str(), "wt");
if(f) {
for(auto& line: junit_lines) {
fwrite(line.c_str(), sizeof(char), line.length(), f);
}
}
fclose(f);
}
std::cout << "-----------------------" << std::endl;
if(!failed && !crashed && !skipped) {
std::cout << "All tests passed" << std::endl << std::endl;
} else {
if(skipped) {
std::cout << skipped << " tests skipped";
}
if(failed) {
if(skipped) {
std::cout << ", ";
}
std::cout << failed << " tests failed";
}
if(crashed) {
if(failed) {
std::cout << ", ";
}
std::cout << crashed << " tests crashed";
}
std::cout << std::endl << std::endl;
}
return failed + crashed;
}
private:
std::vector<std::shared_ptr<TestCase>> instances_;
std::vector<std::function<void()> > tests_;
std::vector<std::string> names_;
};
} // test

212
tools/test_generator.py Executable file
View File

@ -0,0 +1,212 @@
#!/usr/bin/env python3
import argparse
import re
import sys
parser = argparse.ArgumentParser(description="Generate C++ unit tests")
parser.add_argument("--output", type=str, nargs=1, help="The output source file for the generated test main()", required=True)
parser.add_argument("test_files", type=str, nargs="+", help="The list of C++ files containing your tests")
parser.add_argument("--verbose", help="Verbose logging", action="store_true", default=False)
CLASS_REGEX = r"\s*class\s+(\w+)\s*([\:|,]\s*(?:public|private|protected)\s+[\w|::]+\s*)*"
TEST_FUNC_REGEX = r"void\s+(?P<func_name>test_\S[^\(]+)\(\s*(void)?\s*\)"
INCLUDE_TEMPLATE = "#include \"%(file_path)s\""
REGISTER_TEMPLATE = """
runner->register_case<%(class_name)s>(
std::vector<void (%(class_name)s::*)()>({%(members)s}),
{%(names)s}
);"""
MAIN_TEMPLATE = """
#include <functional>
#include <memory>
#include <map>
#include "tools/test.h"
%(includes)s
std::map<std::string, std::string> parse_args(int argc, char* argv[]) {
std::map<std::string, std::string> ret;
for(int i = 1; i < argc; ++i) {
std::string arg = argv[i];
auto eq = arg.find('=');
if(eq != std::string::npos && arg[0] == '-' && arg[1] == '-') {
auto key = std::string(arg.begin(), arg.begin() + eq);
auto value = std::string(arg.begin() + eq + 1, arg.end());
ret[key] = value;
} else if(arg[0] == '-' && arg[1] == '-') {
auto key = arg;
if(i < (argc - 1)) {
auto value = argv[++i];
ret[key] = value;
} else {
ret[key] = "";
}
} else {
ret[arg] = ""; // Positional, not key=value
}
}
return ret;
}
int main(int argc, char* argv[]) {
auto runner = std::make_shared<test::TestRunner>();
auto args = parse_args(argc, argv);
std::string junit_xml;
auto junit_xml_it = args.find("--junit-xml");
if(junit_xml_it != args.end()) {
junit_xml = junit_xml_it->second;
std::cout << " Outputting junit XML to: " << junit_xml << std::endl;
args.erase(junit_xml_it);
}
std::string test_case;
if(args.size()) {
test_case = args.begin()->first;
}
%(registrations)s
return runner->run(test_case, junit_xml);
}
"""
VERBOSE = False
def log_verbose(message):
if VERBOSE:
print(message)
def find_tests(files):
subclasses = []
# First pass, find all class definitions
for path in files:
with open(path, "rt") as f:
source_file_data = f.read().replace("\r\n", "").replace("\n", "")
while True:
match = re.search(CLASS_REGEX, source_file_data)
if not match:
break
class_name = match.group().split(":")[0].replace("class", "").strip()
try:
parents = match.group().split(":", 1)[1]
except IndexError:
pass
else:
parents = [ x.strip() for x in parents.split(",") ]
parents = [
x.replace("public", "").replace("private", "").replace("protected", "").strip()
for x in parents
]
subclasses.append((path, class_name, parents, []))
log_verbose("Found: %s" % str(subclasses[-1]))
start = match.end()
# Find the next opening brace
while source_file_data[start] in (' ', '\t'):
start += 1
start -= 1
end = start
if source_file_data[start+1] == '{':
class_data = []
brace_counter = 1
for i in range(start+2, len(source_file_data)):
class_data.append(source_file_data[i])
if class_data[-1] == '{': brace_counter += 1
if class_data[-1] == '}': brace_counter -= 1
if not brace_counter:
end = i
break
class_data = "".join(class_data)
while True:
match = re.search(TEST_FUNC_REGEX, class_data)
if not match:
break
subclasses[-1][-1].append(match.group('func_name'))
class_data = class_data[match.end():]
source_file_data = source_file_data[end:]
# Now, simplify the list by finding all potential superclasses, and then keeping any classes
# that subclass them.
test_case_subclasses = []
i = 0
while i < len(subclasses):
subclass_names = [x.rsplit("::")[-1] for x in subclasses[i][2]]
# If this subclasses TestCase, or it subclasses any of the already found testcase subclasses
# then add it to the list
if "TestCase" in subclass_names or "SimulantTestCase" in subclass_names or any(x[1] in subclasses[i][2] for x in test_case_subclasses):
if subclasses[i] not in test_case_subclasses:
test_case_subclasses.append(subclasses[i])
i = 0 # Go back to the start, as we may have just found another parent class
continue
i += 1
log_verbose("\n".join([str(x) for x in test_case_subclasses]))
return test_case_subclasses
def main():
global VERBOSE
args = parser.parse_args()
VERBOSE = args.verbose
testcases = find_tests(args.test_files)
includes = "\n".join([ INCLUDE_TEMPLATE % { 'file_path' : x } for x in set([y[0] for y in testcases]) ])
registrations = []
for path, class_name, superclasses, funcs in testcases:
BIND_TEMPLATE = "&%(class_name)s::%(func)s"
members = ", ".join([ BIND_TEMPLATE % { 'class_name' : class_name, 'func' : x } for x in funcs ])
names = ", ".join([ '"%s::%s"' % (class_name, x) for x in funcs ])
registrations.append(REGISTER_TEMPLATE % { 'class_name' : class_name, 'members' : members, 'names' : names })
registrations = "\n".join(registrations)
final = MAIN_TEMPLATE % {
'registrations' : registrations,
'includes' : includes
}
open(args.output[0], "w").write(final)
return 0
if __name__ == '__main__':
sys.exit(main())