Compare commits
82 Commits
new-submis
...
master
Author | SHA1 | Date | |
---|---|---|---|
2601afb5f3 | |||
|
0efe4c6cef | ||
|
744dfb32f7 | ||
|
79172452f2 | ||
|
420e2d75f2 | ||
|
202f546848 | ||
|
d054dde785 | ||
|
00b4468928 | ||
|
f0d799d14f | ||
|
1bf8554926 | ||
|
9bc6da9fba | ||
|
a1536cba44 | ||
|
3eee140add | ||
|
43d64a4957 | ||
|
951ece6d19 | ||
|
61e5a7a2a6 | ||
|
3308a57e59 | ||
|
db9e1cd424 | ||
|
6eb079228e | ||
|
7ce01ad93f | ||
|
12bd6f474f | ||
|
e5a4f4f716 | ||
|
4d39e19ed5 | ||
|
49a0e103cb | ||
|
9cedc81850 | ||
|
0e31aa3d27 | ||
|
5e7b33797d | ||
|
b19b9d498a | ||
|
36de063756 | ||
|
246cb997da | ||
|
cfbaea4a46 | ||
|
4b47f6878f | ||
|
3248499d5a | ||
|
fd9a9d1c25 | ||
|
f49a98ab54 | ||
|
f278777c0e | ||
|
34173d926c | ||
|
77531ca347 | ||
|
a05e1b01fa | ||
|
3dcbbdbde6 | ||
|
92ee4f616d | ||
|
e7574bca1d | ||
|
026bdeff09 | ||
|
f6713bc778 | ||
|
5865d57384 | ||
|
1e3896e699 | ||
|
bd47f333d6 | ||
|
e57b503355 | ||
|
d81472ef57 | ||
|
462eb40d7a | ||
|
c4c0bf4239 | ||
|
9037d157d5 | ||
|
52a0215ed8 | ||
|
a5891056db | ||
|
9cffe14ad6 | ||
|
e683b8becb | ||
|
cba2fb7ceb | ||
|
c754c5c338 | ||
|
452cda5a3b | ||
|
9e1b1bc40a | ||
|
0f65eab86a | ||
|
1a678d2c8d | ||
|
0923b5c601 | ||
|
2ec7055547 | ||
|
9cc52a01fe | ||
|
095ebf2790 | ||
|
baa275b41b | ||
|
72c375f87c | ||
|
e54494e995 | ||
|
c5ce81a38d | ||
|
34448939a4 | ||
|
b6249e9ca4 | ||
|
1a181f702c | ||
|
3b53691e4b | ||
|
25d215dad3 | ||
|
307d371c55 | ||
|
4ad58bea89 | ||
|
8e60b18f29 | ||
|
190b4ecfb7 | ||
|
a4b778063a | ||
|
df9a12bbd6 | ||
|
6ee9a823c1 |
|
@ -1,5 +1,6 @@
|
|||
stages:
|
||||
- build
|
||||
- test
|
||||
|
||||
build:sh4-gcc:
|
||||
stage: build
|
||||
|
@ -17,11 +18,28 @@ build:sh4-gcc:
|
|||
|
||||
build:x86-gcc:
|
||||
stage: build
|
||||
image: fedora:34
|
||||
image: fedora:38
|
||||
before_script:
|
||||
- sudo dnf install -y cmake gcc gcc-c++ SDL2-devel glibc-devel pkgconf-pkg-config glibc-devel.i686 SDL2-devel.i686
|
||||
- sudo dnf install -y cmake gcc gcc-c++ SDL2.i686 SDL2-devel.x86_64 glibc-devel glibc-devel.i686 SDL2-devel.i686 pkgconf-pkg-config.i686 pkgconf-pkg-config.x86_64
|
||||
script:
|
||||
- mkdir builddir
|
||||
- cd builddir
|
||||
- cmake -DCMAKE_BUILD_TYPE=Release ..
|
||||
- make
|
||||
artifacts:
|
||||
paths:
|
||||
- builddir/tests/gldc_tests
|
||||
|
||||
test:x86-gcc:
|
||||
stage: test
|
||||
image: fedora:38
|
||||
dependencies:
|
||||
- build:x86-gcc
|
||||
before_script:
|
||||
- sudo dnf install -y cmake gcc gcc-c++ SDL2.i686 SDL2-devel glibc-devel pkgconf-pkg-config glibc-devel.i686 SDL2-devel.i686 pkgconf-pkg-config.i686
|
||||
script:
|
||||
- cd builddir/tests/
|
||||
- SDL_VIDEODRIVER=dummy ./gldc_tests --junit-xml=report.xml
|
||||
artifacts:
|
||||
reports:
|
||||
junit: builddir/tests/report.xml
|
||||
|
|
|
@ -1,6 +1,8 @@
|
|||
cmake_minimum_required(VERSION 3.0)
|
||||
cmake_minimum_required(VERSION 3.9)
|
||||
project(GLdc)
|
||||
|
||||
set(CMAKE_VERBOSE_MAKEFILE ON)
|
||||
|
||||
# set the default backend
|
||||
if(PLATFORM_DREAMCAST)
|
||||
set(BACKEND "kospvr" CACHE STRING "Backend to use")
|
||||
|
@ -8,6 +10,9 @@ else()
|
|||
set(BACKEND "software" CACHE STRING "Backend to use")
|
||||
endif()
|
||||
|
||||
include(CheckIPOSupported)
|
||||
check_ipo_supported(RESULT FLTO_SUPPORTED OUTPUT FLTO_ERROR)
|
||||
|
||||
# List of possible backends
|
||||
set_property(CACHE BACKEND PROPERTY STRINGS kospvr software)
|
||||
|
||||
|
@ -17,6 +22,7 @@ string(TOUPPER ${BACKEND} BACKEND_UPPER)
|
|||
add_definitions(-DBACKEND_${BACKEND_UPPER})
|
||||
|
||||
set(CMAKE_C_STANDARD 99)
|
||||
set(CMAKE_CXX_STANDARD 11)
|
||||
|
||||
include_directories(include)
|
||||
|
||||
|
@ -29,15 +35,33 @@ else()
|
|||
check_c_compiler_flag("-mfsca" COMPILER_HAS_FSCA)
|
||||
if(COMPILER_HAS_FSRRA)
|
||||
set(CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE} -mfsrra")
|
||||
set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -mfsrra")
|
||||
|
||||
set(CMAKE_C_FLAGS_RELWITHDEBINFO "${CMAKE_C_FLAGS_RELWITHDEBINFO} -mfsrra")
|
||||
set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS_RELWITHDEBINFO} -mfsrra")
|
||||
endif()
|
||||
if(COMPILER_HAS_FSCA)
|
||||
set(CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE} -mfsca")
|
||||
set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -mfsca")
|
||||
|
||||
set(CMAKE_C_FLAGS_RELWITHDEBINFO "${CMAKE_C_FLAGS_RELWITHDEBINFO} -mfsca")
|
||||
set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS_RELWITHDEBINFO} -mfsca")
|
||||
endif()
|
||||
set(CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE} -ffp-contract=fast -ffast-math")
|
||||
set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -ffast-math")
|
||||
|
||||
set(CMAKE_C_FLAGS_RELWITHDEBINFO "${CMAKE_C_FLAGS_RELWITHDEBINFO} -ffp-contract=fast -ffast-math")
|
||||
set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS_RELWITHDEBINFO} -ffast-math")
|
||||
endif()
|
||||
|
||||
set(CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE} -O3 -fexpensive-optimizations")
|
||||
set(CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE} -O3 -fexpensive-optimizations -fomit-frame-pointer -finline-functions")
|
||||
set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -std=c++14 -O3 -g0 -s -fomit-frame-pointer -fstrict-aliasing")
|
||||
|
||||
set(CMAKE_C_FLAGS_RELWITHDEBINFO "${CMAKE_C_FLAGS_RELWITHDEBINFO} -O3 -fexpensive-optimizations -fomit-frame-pointer -finline-functions")
|
||||
set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS_RELWITHDEBINFO} -std=c++14 -O3 -fomit-frame-pointer -fstrict-aliasing")
|
||||
|
||||
set(CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG} -O0 -g -Wall -Wextra")
|
||||
set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -O0 -g -Wall -Wextra")
|
||||
|
||||
set(
|
||||
SOURCES
|
||||
|
@ -56,7 +80,7 @@ set(
|
|||
GL/state.c
|
||||
GL/texture.c
|
||||
GL/util.c
|
||||
GL/yalloc/yalloc.c
|
||||
GL/alloc/alloc.c
|
||||
${CMAKE_CURRENT_BINARY_DIR}/version.c
|
||||
)
|
||||
|
||||
|
@ -87,6 +111,10 @@ endif()
|
|||
|
||||
add_library(GLdc STATIC ${SOURCES})
|
||||
|
||||
if(FLTO_SUPPORTED)
|
||||
set_property(TARGET GLdc PROPERTY INTERPROCEDURAL_OPTIMIZATION TRUE)
|
||||
endif()
|
||||
|
||||
if(NOT PLATFORM_DREAMCAST)
|
||||
set_target_properties(GLdc PROPERTIES
|
||||
COMPILE_OPTIONS "-m32"
|
||||
|
@ -110,6 +138,13 @@ function(gen_sample sample)
|
|||
|
||||
add_executable(${sample} ${SAMPLE_SRCS})
|
||||
|
||||
if(FLTO_SUPPORTED)
|
||||
# FIXME: Cubes + LTO causes an ICE
|
||||
if(NOT ${sample} MATCHES "cubes")
|
||||
set_property(TARGET ${sample} PROPERTY INTERPROCEDURAL_OPTIMIZATION TRUE)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if(PLATFORM_DREAMCAST)
|
||||
if(EXISTS "${CMAKE_SOURCE_DIR}/samples/${sample}/romdisk")
|
||||
message("Generating romdisk for sample: ${sample}")
|
||||
|
@ -140,6 +175,8 @@ function(gen_sample sample)
|
|||
endif()
|
||||
endfunction()
|
||||
|
||||
add_subdirectory(tests)
|
||||
|
||||
gen_sample(blend_test samples/blend_test/main.c)
|
||||
gen_sample(depth_funcs samples/depth_funcs/main.c)
|
||||
gen_sample(depth_funcs_alpha_testing samples/depth_funcs_alpha_testing/main.c samples/depth_funcs_alpha_testing/gl_png.c)
|
||||
|
@ -170,11 +207,14 @@ gen_sample(zclip_triangle samples/zclip_triangle/main.c)
|
|||
gen_sample(zclip_trianglestrip samples/zclip_trianglestrip/main.c)
|
||||
gen_sample(scissor samples/scissor/main.c)
|
||||
gen_sample(polymark samples/polymark/main.c)
|
||||
|
||||
gen_sample(cubes samples/cubes/main.cpp)
|
||||
gen_sample(zclip_test tests/zclip/main.cpp)
|
||||
|
||||
if(PLATFORM_DREAMCAST)
|
||||
gen_sample(trimark samples/trimark/main.c)
|
||||
gen_sample(quadmark samples/quadmark/main.c samples/profiler.c)
|
||||
gen_sample(prof_texture_upload samples/prof_texture_upload/main.c samples/profiler.c)
|
||||
else()
|
||||
gen_sample(quadmark samples/quadmark/main.c)
|
||||
gen_sample(prof_texture_upload samples/prof_texture_upload/main.c)
|
||||
endif()
|
||||
|
|
534
GL/alloc/alloc.c
Normal file
534
GL/alloc/alloc.c
Normal file
|
@ -0,0 +1,534 @@
|
|||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
|
||||
#include "alloc.h"
|
||||
|
||||
|
||||
/* This allocator is designed so that ideally all allocations larger
|
||||
* than 2k, fall on a 2k boundary. Smaller allocations will
|
||||
* never cross a 2k boundary.
|
||||
*
|
||||
* House keeping is stored in RAM to avoid reading back from the
|
||||
* VRAM to check for usage. Headers can't be easily stored in the
|
||||
* blocks anyway as they have to be 2k aligned (so you'd need to
|
||||
* store them in reverse or something)
|
||||
*
|
||||
* Defragmenting the pool will move larger allocations first, then
|
||||
* smaller ones, recursively until you tell it to stop, or until things
|
||||
* stop moving.
|
||||
*
|
||||
* The maximum pool size is 8M, made up of:
|
||||
*
|
||||
* - 4096 blocks of 2k
|
||||
* - each with 8 sub-blocks of 256 bytes
|
||||
*
|
||||
* Why?
|
||||
*
|
||||
* The PVR performs better if textures don't cross 2K memory
|
||||
* addresses, so we try to avoid that. Obviously we can't
|
||||
* if the allocation is > 2k, but in that case we can at least
|
||||
* align with 2k and the VQ codebook (which is usually 2k) will
|
||||
* be in its own page.
|
||||
*
|
||||
* The smallest PVR texture allowed is 8x8 at 16 bit (so 128 bytes)
|
||||
* but we're unlikely to use too many of those, so having a min sub-block
|
||||
* size of 256 should be OK (a 16x16 image is 512, so two sub-blocks).
|
||||
*
|
||||
* We could go down to 128 bytes if wastage is an issue, but then we have
|
||||
* to store double the number of usage markers.
|
||||
*
|
||||
* FIXME:
|
||||
*
|
||||
* - Only operates on one pool (ignores what you pass)
|
||||
*/
|
||||
|
||||
#include <assert.h>
|
||||
#include <stdio.h>
|
||||
|
||||
#define EIGHT_MEG (8 * 1024 * 1024)
|
||||
#define TWO_KILOBYTES (2 * 1024)
|
||||
#define BLOCK_COUNT (EIGHT_MEG / TWO_KILOBYTES)
|
||||
|
||||
#define ALLOC_DEBUG 0
|
||||
#if ALLOC_DEBUG
|
||||
#define DBG_MSG(fmt, ...) fprintf(stderr, fmt, ##__VA_ARGS__)
|
||||
#else
|
||||
#define DBG_MSG(fmt, ...) do {} while (0)
|
||||
#endif
|
||||
|
||||
|
||||
static inline intptr_t round_up(intptr_t n, int multiple)
|
||||
{
|
||||
if((n % multiple) == 0) {
|
||||
return n;
|
||||
}
|
||||
|
||||
assert(multiple);
|
||||
return ((n + multiple - 1) / multiple) * multiple;
|
||||
}
|
||||
|
||||
struct AllocEntry {
|
||||
void* pointer;
|
||||
size_t size;
|
||||
struct AllocEntry* next;
|
||||
};
|
||||
|
||||
|
||||
typedef struct {
|
||||
/* This is a usage bitmask for each block. A block
|
||||
* is divided into 8 x 256 byte subblocks. If a block
|
||||
* is entirely used, it's value will be 255, if
|
||||
* it's entirely free then it will be 0.
|
||||
*/
|
||||
uint8_t block_usage[BLOCK_COUNT];
|
||||
uint8_t* pool; // Pointer to the memory pool
|
||||
size_t pool_size; // Size of the memory pool
|
||||
uint8_t* base_address; // First 2k aligned address in the pool
|
||||
size_t block_count; // Number of 2k blocks in the pool
|
||||
|
||||
/* It's frustrating that we need to do this dynamically
|
||||
* but we need to know the size allocated when we free()...
|
||||
* we could store it statically but it would take 64k if we had
|
||||
* an array of block_index -> block size where there would be 2 ** 32
|
||||
* entries of 16 bit block sizes. The drawback (aside the memory usage)
|
||||
* would be that we won't be able to order by size, so defragging will
|
||||
* take much more time.*/
|
||||
struct AllocEntry* allocations;
|
||||
} PoolHeader;
|
||||
|
||||
|
||||
static PoolHeader pool_header = {
|
||||
{0}, NULL, 0, NULL, 0, NULL
|
||||
};
|
||||
|
||||
void* alloc_base_address(void* pool) {
|
||||
(void) pool;
|
||||
return pool_header.base_address;
|
||||
}
|
||||
|
||||
size_t alloc_block_count(void* pool) {
|
||||
(void) pool;
|
||||
return pool_header.block_count;
|
||||
}
|
||||
|
||||
static inline void* calc_address(
|
||||
uint8_t* block_usage_iterator,
|
||||
int bit_offset,
|
||||
size_t required_subblocks,
|
||||
size_t* start_subblock_out
|
||||
) {
|
||||
uintptr_t offset = (block_usage_iterator - pool_header.block_usage) * 8;
|
||||
offset += (bit_offset + 1);
|
||||
offset -= required_subblocks;
|
||||
|
||||
if(start_subblock_out) {
|
||||
*start_subblock_out = offset;
|
||||
}
|
||||
|
||||
return pool_header.base_address + (offset * 256);
|
||||
}
|
||||
|
||||
void* alloc_next_available_ex(void* pool, size_t required_size, size_t* start_subblock, size_t* required_subblocks);
|
||||
|
||||
void* alloc_next_available(void* pool, size_t required_size) {
|
||||
return alloc_next_available_ex(pool, required_size, NULL, NULL);
|
||||
}
|
||||
|
||||
void* alloc_next_available_ex(void* pool, size_t required_size, size_t* start_subblock_out, size_t* required_subblocks_out) {
|
||||
(void) pool;
|
||||
|
||||
uint8_t* it = pool_header.block_usage;
|
||||
uint32_t required_subblocks = (required_size / 256);
|
||||
if(required_size % 256) required_subblocks += 1;
|
||||
|
||||
/* Anything gte to 2048 must be aligned to a 2048 boundary */
|
||||
bool requires_alignment = required_size >= 2048;
|
||||
|
||||
if(required_subblocks_out) {
|
||||
*required_subblocks_out = required_subblocks;
|
||||
}
|
||||
|
||||
/* This is a fallback option. If while we're searching we find a possible slot
|
||||
* but it's not aligned, or it's straddling a 2k boundary, then we store
|
||||
* it here and if we reach the end of the search and find nothing better
|
||||
* we use this instead */
|
||||
uint8_t* poor_option = NULL;
|
||||
size_t poor_start_subblock = 0;
|
||||
|
||||
uint32_t found_subblocks = 0;
|
||||
uint32_t found_poor_subblocks = 0;
|
||||
|
||||
for(size_t j = 0; j < pool_header.block_count; ++j, ++it) {
|
||||
/* We just need to find enough consecutive blocks */
|
||||
if(found_subblocks < required_subblocks) {
|
||||
uint8_t t = *it;
|
||||
|
||||
/* Optimisation only. Skip over full blocks */
|
||||
if(t == 255) {
|
||||
found_subblocks = 0;
|
||||
found_poor_subblocks = 0;
|
||||
} else {
|
||||
/* Now let's see how many consecutive blocks we can find */
|
||||
for(int i = 0; i < 8; ++i) {
|
||||
if((t & 0x80) == 0) {
|
||||
bool block_overflow = (
|
||||
required_size < 2048 && found_subblocks > 0 && i == 0
|
||||
);
|
||||
|
||||
bool reset_subblocks = (
|
||||
(requires_alignment && found_subblocks == 0 && i != 0) ||
|
||||
block_overflow
|
||||
);
|
||||
|
||||
if(reset_subblocks) {
|
||||
// Ignore this subblock, because we want the first subblock to be aligned
|
||||
// at a 2048 boundary and this one isn't (i != 0)
|
||||
found_subblocks = 0;
|
||||
} else {
|
||||
found_subblocks++;
|
||||
}
|
||||
|
||||
/* If we reset the subblocks due to an overflow, we still
|
||||
* want to count this free subblock in our count */
|
||||
if(block_overflow) {
|
||||
found_subblocks++;
|
||||
}
|
||||
|
||||
found_poor_subblocks++;
|
||||
|
||||
if(found_subblocks >= required_subblocks) {
|
||||
/* We found space! Now calculate the address */
|
||||
return calc_address(it, i, required_subblocks, start_subblock_out);
|
||||
}
|
||||
|
||||
if(!poor_option && (found_poor_subblocks >= required_subblocks)) {
|
||||
poor_option = calc_address(it, i, required_subblocks, &poor_start_subblock);
|
||||
}
|
||||
|
||||
} else {
|
||||
found_subblocks = 0;
|
||||
found_poor_subblocks = 0;
|
||||
}
|
||||
|
||||
t <<= 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if(poor_option) {
|
||||
if(start_subblock_out) {
|
||||
*start_subblock_out = poor_start_subblock;
|
||||
}
|
||||
|
||||
return poor_option;
|
||||
} else {
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
|
||||
int alloc_init(void* pool, size_t size) {
|
||||
(void) pool;
|
||||
|
||||
if(pool_header.pool) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
if(size > EIGHT_MEG) { // FIXME: >= ?
|
||||
return -1;
|
||||
}
|
||||
|
||||
uint8_t* p = (uint8_t*) pool;
|
||||
|
||||
memset(pool_header.block_usage, 0, BLOCK_COUNT);
|
||||
pool_header.pool = pool;
|
||||
pool_header.pool_size = size;
|
||||
|
||||
intptr_t base_address = (intptr_t) pool_header.pool;
|
||||
base_address = round_up(base_address, 2048);
|
||||
|
||||
pool_header.base_address = (uint8_t*) base_address;
|
||||
pool_header.block_count = ((p + size) - pool_header.base_address) / 2048;
|
||||
pool_header.allocations = NULL;
|
||||
|
||||
assert(((uintptr_t) pool_header.base_address) % 2048 == 0);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void alloc_shutdown(void* pool) {
|
||||
(void) pool;
|
||||
|
||||
if(!pool_header.pool) {
|
||||
return;
|
||||
}
|
||||
|
||||
struct AllocEntry* it = pool_header.allocations;
|
||||
while(it) {
|
||||
struct AllocEntry* next = it->next;
|
||||
free(it);
|
||||
it = next;
|
||||
}
|
||||
|
||||
memset(&pool_header, 0, sizeof(pool_header));
|
||||
pool_header.pool = NULL;
|
||||
}
|
||||
|
||||
static inline uint32_t size_to_subblock_count(size_t size) {
|
||||
uint32_t required_subblocks = (size / 256);
|
||||
if(size % 256) required_subblocks += 1;
|
||||
return required_subblocks;
|
||||
}
|
||||
|
||||
static inline uint32_t subblock_from_pointer(void* p) {
|
||||
uint8_t* ptr = (uint8_t*) p;
|
||||
return (ptr - pool_header.base_address) / 256;
|
||||
}
|
||||
|
||||
static inline void block_and_offset_from_subblock(size_t sb, size_t* b, uint8_t* off) {
|
||||
*b = sb / 8;
|
||||
*off = (sb % 8);
|
||||
}
|
||||
|
||||
void* alloc_malloc(void* pool, size_t size) {
|
||||
DBG_MSG("Allocating: %d\n", size);
|
||||
|
||||
size_t start_subblock, required_subblocks;
|
||||
void* ret = alloc_next_available_ex(pool, size, &start_subblock, &required_subblocks);
|
||||
|
||||
if(ret) {
|
||||
size_t block;
|
||||
uint8_t offset;
|
||||
|
||||
block_and_offset_from_subblock(start_subblock, &block, &offset);
|
||||
|
||||
uint8_t mask = 0;
|
||||
|
||||
DBG_MSG("Alloc: size: %d, rs: %d, sb: %d, b: %d, off: %d\n", size, required_subblocks, start_subblock, start_subblock / 8, start_subblock % 8);
|
||||
|
||||
/* Toggle any bits for the first block */
|
||||
int c = (required_subblocks < 8) ? required_subblocks : 8;
|
||||
for(int i = 0; i < c; ++i) {
|
||||
mask |= (1 << (7 - (offset + i)));
|
||||
required_subblocks--;
|
||||
}
|
||||
|
||||
if(mask) {
|
||||
pool_header.block_usage[block++] |= mask;
|
||||
}
|
||||
|
||||
/* Fill any full blocks in the middle of the allocation */
|
||||
while(required_subblocks > 8) {
|
||||
pool_header.block_usage[block++] = 255;
|
||||
required_subblocks -= 8;
|
||||
}
|
||||
|
||||
/* Fill out any trailing subblocks */
|
||||
mask = 0;
|
||||
for(size_t i = 0; i < required_subblocks; ++i) {
|
||||
mask |= (1 << (7 - i));
|
||||
}
|
||||
|
||||
if(mask) {
|
||||
pool_header.block_usage[block++] |= mask;
|
||||
}
|
||||
|
||||
/* Insert allocations in the list by size descending so that when we
|
||||
* defrag we can move the larger blocks before the smaller ones without
|
||||
* much effort */
|
||||
struct AllocEntry* new_entry = (struct AllocEntry*) malloc(sizeof(struct AllocEntry));
|
||||
new_entry->pointer = ret;
|
||||
new_entry->size = size;
|
||||
new_entry->next = NULL;
|
||||
|
||||
struct AllocEntry* it = pool_header.allocations;
|
||||
struct AllocEntry* last = NULL;
|
||||
|
||||
if(!it) {
|
||||
pool_header.allocations = new_entry;
|
||||
} else {
|
||||
while(it) {
|
||||
if(it->size < size) {
|
||||
if(last) {
|
||||
last->next = new_entry;
|
||||
} else {
|
||||
pool_header.allocations = new_entry;
|
||||
}
|
||||
|
||||
new_entry->next = it;
|
||||
break;
|
||||
} else if(!it->next) {
|
||||
it->next = new_entry;
|
||||
new_entry->next = NULL;
|
||||
break;
|
||||
}
|
||||
|
||||
last = it;
|
||||
it = it->next;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
DBG_MSG("Alloc done\n");
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void alloc_release_blocks(struct AllocEntry* it) {
|
||||
size_t used_subblocks = size_to_subblock_count(it->size);
|
||||
size_t subblock = subblock_from_pointer(it->pointer);
|
||||
size_t block;
|
||||
uint8_t offset;
|
||||
block_and_offset_from_subblock(subblock, &block, &offset);
|
||||
|
||||
uint8_t mask = 0;
|
||||
|
||||
DBG_MSG("Free: size: %d, us: %d, sb: %d, off: %d\n", it->size, used_subblocks, block, offset);
|
||||
|
||||
/* Wipe out any leading subblocks */
|
||||
int c = (used_subblocks < 8) ? used_subblocks : 8;
|
||||
for(int i = 0; i < c; ++i) {
|
||||
mask |= (1 << (7 - (offset + i)));
|
||||
used_subblocks--;
|
||||
}
|
||||
|
||||
if(mask) {
|
||||
pool_header.block_usage[block++] &= ~mask;
|
||||
}
|
||||
|
||||
/* Clear any full blocks in the middle of the allocation */
|
||||
while(used_subblocks > 8) {
|
||||
pool_header.block_usage[block++] = 0;
|
||||
used_subblocks -= 8;
|
||||
}
|
||||
|
||||
/* Wipe out any trailing subblocks */
|
||||
mask = 0;
|
||||
for(size_t i = 0; i < used_subblocks; ++i) {
|
||||
mask |= (1 << (7 - i));
|
||||
}
|
||||
|
||||
if(mask) {
|
||||
pool_header.block_usage[block++] &= ~mask;
|
||||
}
|
||||
}
|
||||
|
||||
void alloc_free(void* pool, void* p) {
|
||||
(void) pool;
|
||||
|
||||
struct AllocEntry* it = pool_header.allocations;
|
||||
struct AllocEntry* last = NULL;
|
||||
while(it) {
|
||||
if(it->pointer == p) {
|
||||
alloc_release_blocks(it);
|
||||
|
||||
if(last) {
|
||||
last->next = it->next;
|
||||
} else {
|
||||
assert(it == pool_header.allocations);
|
||||
pool_header.allocations = it->next;
|
||||
}
|
||||
|
||||
DBG_MSG("Freed: size: %d, us: %d, sb: %d, off: %d\n", it->size, used_subblocks, block, offset);
|
||||
free(it);
|
||||
break;
|
||||
}
|
||||
|
||||
last = it;
|
||||
it = it->next;
|
||||
}
|
||||
|
||||
DBG_MSG("Free done\n");
|
||||
}
|
||||
|
||||
void alloc_run_defrag(void* pool, defrag_address_move callback, int max_iterations, void* user_data) {
|
||||
|
||||
for(int i = 0; i < max_iterations; ++i) {
|
||||
bool move_occurred = false;
|
||||
|
||||
struct AllocEntry* it = pool_header.allocations;
|
||||
|
||||
if(!it) {
|
||||
return;
|
||||
}
|
||||
|
||||
while(it) {
|
||||
void* potential_dest = alloc_next_available(pool, it->size);
|
||||
if(potential_dest < it->pointer) {
|
||||
potential_dest = alloc_malloc(pool, it->size);
|
||||
memcpy(potential_dest, it->pointer, it->size);
|
||||
|
||||
/* Mark this block as now free, but don't fiddle with the
|
||||
* allocation list */
|
||||
alloc_release_blocks(it);
|
||||
|
||||
callback(it->pointer, potential_dest, user_data);
|
||||
|
||||
it->pointer = potential_dest;
|
||||
move_occurred = true;
|
||||
}
|
||||
|
||||
it = it->next;
|
||||
}
|
||||
|
||||
if(!move_occurred) {
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static inline uint8_t count_ones(uint8_t byte) {
|
||||
static const uint8_t NIBBLE_LOOKUP [16] = {
|
||||
0, 1, 1, 2, 1, 2, 2, 3,
|
||||
1, 2, 2, 3, 2, 3, 3, 4
|
||||
};
|
||||
return NIBBLE_LOOKUP[byte & 0x0F] + NIBBLE_LOOKUP[byte >> 4];
|
||||
}
|
||||
|
||||
size_t alloc_count_free(void* pool) {
|
||||
(void) pool;
|
||||
|
||||
uint8_t* it = pool_header.block_usage;
|
||||
uint8_t* end = it + pool_header.block_count;
|
||||
|
||||
size_t total_free = 0;
|
||||
|
||||
while(it < end) {
|
||||
total_free += count_ones(*it) * 256;
|
||||
++it;
|
||||
}
|
||||
|
||||
return total_free;
|
||||
}
|
||||
|
||||
size_t alloc_count_continuous(void* pool) {
|
||||
(void) pool;
|
||||
|
||||
size_t largest_block = 0;
|
||||
|
||||
uint8_t* it = pool_header.block_usage;
|
||||
uint8_t* end = it + pool_header.block_count;
|
||||
|
||||
size_t current_block = 0;
|
||||
while(it < end) {
|
||||
uint8_t t = *it++;
|
||||
if(!t) {
|
||||
current_block += 2048;
|
||||
} else {
|
||||
for(int i = 7; i >= 0; --i) {
|
||||
bool bitset = (t & (1 << i));
|
||||
if(bitset) {
|
||||
current_block += (7 - i) * 256;
|
||||
if(largest_block < current_block) {
|
||||
largest_block = current_block;
|
||||
current_block = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return largest_block;
|
||||
}
|
29
GL/alloc/alloc.h
Normal file
29
GL/alloc/alloc.h
Normal file
|
@ -0,0 +1,29 @@
|
|||
#pragma once
|
||||
|
||||
#include <stdbool.h>
|
||||
#include <stdint.h>
|
||||
#include <stddef.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
int alloc_init(void* pool, size_t size);
|
||||
void alloc_shutdown(void* pool);
|
||||
|
||||
void *alloc_malloc(void* pool, size_t size);
|
||||
void alloc_free(void* pool, void* p);
|
||||
|
||||
typedef void (defrag_address_move)(void*, void*, void*);
|
||||
void alloc_run_defrag(void* pool, defrag_address_move callback, int max_iterations, void* user_data);
|
||||
|
||||
size_t alloc_count_free(void* pool);
|
||||
size_t alloc_count_continuous(void* pool);
|
||||
|
||||
void* alloc_next_available(void* pool, size_t required_size);
|
||||
void* alloc_base_address(void* pool);
|
||||
size_t alloc_block_count(void* pool);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
163
GL/draw.c
163
GL/draw.c
|
@ -3,10 +3,36 @@
|
|||
#include <string.h>
|
||||
#include <stdlib.h>
|
||||
#include <math.h>
|
||||
#include <limits.h>
|
||||
|
||||
#include "private.h"
|
||||
#include "platform.h"
|
||||
|
||||
GLushort _quantize( GLfloat v ) {
|
||||
union { GLfloat f; GLuint ui; } u = {v};
|
||||
GLuint ui = u.ui;
|
||||
|
||||
int s = (ui >> 16) & 0x8000;
|
||||
int em = ui & 0x7fffffff;
|
||||
|
||||
int h = (em - (112 << 23) + (1 << 12)) >> 13;
|
||||
h = (em < (113 << 23)) ? 0 : h;
|
||||
h = (em >= (143 << 23)) ? 0x7c00 : h;
|
||||
h = (em > (255 << 23)) ? 0x7e00 : h;
|
||||
|
||||
return (GLushort)(s | h);
|
||||
}
|
||||
GLfloat _dequantize( GLushort h ) {
|
||||
GLuint s = (GLuint) (h & 0x8000) << 16;
|
||||
int em = h & 0x7fff;
|
||||
int r = (em + (112 << 10)) << 13;
|
||||
r = (em < (1 << 10)) ? 0 : r;
|
||||
r += (em >= (31 << 10)) ? (112 << 23) : 0;
|
||||
|
||||
union { GLfloat f; GLuint ui; } u;
|
||||
u.ui = s | r;
|
||||
return u.f;
|
||||
}
|
||||
|
||||
AttribPointerList ATTRIB_POINTERS;
|
||||
GLuint ENABLED_VERTEX_ATTRIBUTES = 0;
|
||||
|
@ -62,6 +88,7 @@ GL_FORCE_INLINE GLsizei byte_size(GLenum type) {
|
|||
case GL_INT: return sizeof(GLint);
|
||||
case GL_UNSIGNED_INT: return sizeof(GLuint);
|
||||
case GL_DOUBLE: return sizeof(GLdouble);
|
||||
case GL_HALF_FLOAT: return sizeof(GLhalf);
|
||||
case GL_UNSIGNED_INT_2_10_10_10_REV: return sizeof(GLuint);
|
||||
case GL_FLOAT:
|
||||
default: return sizeof(GLfloat);
|
||||
|
@ -78,7 +105,7 @@ static void _readVertexData3f3f(const GLubyte* __restrict__ in, GLubyte* __restr
|
|||
|
||||
// 10:10:10:2REV format
|
||||
static void _readVertexData1i3f(const GLubyte* in, GLubyte* out) {
|
||||
const static float MULTIPLIER = 1.0f / 1023.0f;
|
||||
static const float MULTIPLIER = 1.0f / 1023.0f;
|
||||
|
||||
GLfloat* output = (GLfloat*) out;
|
||||
|
||||
|
@ -108,6 +135,15 @@ static void _readVertexData3us3f(const GLubyte* in, GLubyte* out) {
|
|||
output[2] = input[2];
|
||||
}
|
||||
|
||||
static void _readVertexData3usq3f(const GLubyte* in, GLubyte* out) {
|
||||
const GLushort* input = (const GLushort*) in;
|
||||
float* output = (float*) out;
|
||||
|
||||
output[0] = _dequantize(input[0]);
|
||||
output[1] = _dequantize(input[1]);
|
||||
output[2] = _dequantize(input[2]);
|
||||
}
|
||||
|
||||
static void _readVertexData3ui3f(const GLubyte* in, GLubyte* out) {
|
||||
const GLuint* input = (const GLuint*) in;
|
||||
float* output = (float*) out;
|
||||
|
@ -126,6 +162,15 @@ static void _readVertexData3ub3f(const GLubyte* input, GLubyte* out) {
|
|||
output[2] = input[2] * ONE_OVER_TWO_FIVE_FIVE;
|
||||
}
|
||||
|
||||
static void _readVertexData3f16_3f(const GLubyte* in, GLubyte* out) {
|
||||
const GLhalf* input = (const GLhalf*) in;
|
||||
float* output = (float*) out;
|
||||
|
||||
output[0] = input[0];
|
||||
output[1] = input[1];
|
||||
output[2] = input[2];
|
||||
}
|
||||
|
||||
static void _readVertexData2f2f(const GLubyte* in, GLubyte* out) {
|
||||
vec2cpy(out, in);
|
||||
}
|
||||
|
@ -159,8 +204,25 @@ static void _readVertexData2us2f(const GLubyte* in, GLubyte* out) {
|
|||
const GLushort* input = (const GLushort*) in;
|
||||
float* output = (float*) out;
|
||||
|
||||
output[0] = input[0];
|
||||
output[1] = input[1];
|
||||
output[0] = (float)input[0] / SHRT_MAX;
|
||||
output[1] = (float)input[1] / SHRT_MAX;
|
||||
}
|
||||
|
||||
static void _readVertexData2usq3f(const GLubyte* in, GLubyte* out) {
|
||||
const GLushort* input = (const GLushort*) in;
|
||||
float* output = (float*) out;
|
||||
|
||||
output[0] = _dequantize(input[0]);
|
||||
output[1] = _dequantize(input[1]);
|
||||
output[2] = 0.0f;
|
||||
}
|
||||
|
||||
static void _readVertexData2usq2f(const GLubyte* in, GLubyte* out) {
|
||||
const GLushort* input = (const GLushort*) in;
|
||||
float* output = (float*) out;
|
||||
|
||||
output[0] = _dequantize(input[0]);
|
||||
output[1] = _dequantize(input[1]);
|
||||
}
|
||||
|
||||
static void _readVertexData2ui2f(const GLubyte* in, GLubyte* out) {
|
||||
|
@ -178,6 +240,14 @@ static void _readVertexData2ub2f(const GLubyte* input, GLubyte* out) {
|
|||
output[1] = input[1] * ONE_OVER_TWO_FIVE_FIVE;
|
||||
}
|
||||
|
||||
static void _readVertexData2f16_2f(const GLubyte* in, GLubyte* out) {
|
||||
const GLhalf* input = (const GLhalf*) in;
|
||||
float* output = (float*) out;
|
||||
|
||||
output[0] = input[0];
|
||||
output[1] = input[1];
|
||||
}
|
||||
|
||||
static void _readVertexData2ui3f(const GLubyte* in, GLubyte* out) {
|
||||
const GLuint* input = (const GLuint*) in;
|
||||
float* output = (float*) out;
|
||||
|
@ -187,6 +257,15 @@ static void _readVertexData2ui3f(const GLubyte* in, GLubyte* out) {
|
|||
output[2] = 0.0f;
|
||||
}
|
||||
|
||||
static void _readVertexData2f16_3f(const GLubyte* in, GLubyte* out) {
|
||||
const GLhalf* input = (const GLhalf*) in;
|
||||
float* output = (float*) out;
|
||||
|
||||
output[0] = input[0];
|
||||
output[1] = input[1];
|
||||
output[2] = 0.0f;
|
||||
}
|
||||
|
||||
static void _readVertexData4ubARGB(const GLubyte* input, GLubyte* output) {
|
||||
output[R8IDX] = input[0];
|
||||
output[G8IDX] = input[1];
|
||||
|
@ -239,7 +318,7 @@ static void _fillWithNegZVE(const GLubyte* __restrict__ input, GLubyte* __restri
|
|||
float x, y, z;
|
||||
} V;
|
||||
|
||||
const static V NegZ = {0.0f, 0.0f, -1.0f};
|
||||
static const V NegZ = {0.0f, 0.0f, -1.0f};
|
||||
|
||||
*((V*) out) = NegZ;
|
||||
}
|
||||
|
@ -391,12 +470,12 @@ GL_FORCE_INLINE void transformNormalToEyeSpace(GLfloat* normal) {
|
|||
}
|
||||
|
||||
GL_FORCE_INLINE PolyHeader *_glSubmissionTargetHeader(SubmissionTarget* target) {
|
||||
gl_assert(target->header_offset < target->output->vector.size);
|
||||
gl_assert(target->header_offset < aligned_vector_size(&target->output->vector));
|
||||
return aligned_vector_at(&target->output->vector, target->header_offset);
|
||||
}
|
||||
|
||||
GL_INLINE_DEBUG Vertex* _glSubmissionTargetStart(SubmissionTarget* target) {
|
||||
gl_assert(target->start_offset < target->output->vector.size);
|
||||
gl_assert(target->start_offset < aligned_vector_size(&target->output->vector));
|
||||
return aligned_vector_at(&target->output->vector, target->start_offset);
|
||||
}
|
||||
|
||||
|
@ -492,14 +571,17 @@ ReadPositionFunc calcReadPositionFunc() {
|
|||
case GL_FLOAT:
|
||||
return (ATTRIB_POINTERS.vertex.size == 3) ? _readVertexData3f3f:
|
||||
_readVertexData2f3f;
|
||||
case GL_HALF_FLOAT:
|
||||
return (ATTRIB_POINTERS.vertex.size == 3) ? _readVertexData3f16_3f:
|
||||
_readVertexData2f16_3f;
|
||||
case GL_BYTE:
|
||||
case GL_UNSIGNED_BYTE:
|
||||
return (ATTRIB_POINTERS.vertex.size == 3) ? _readVertexData3ub3f:
|
||||
_readVertexData2ub3f;
|
||||
case GL_SHORT:
|
||||
case GL_UNSIGNED_SHORT:
|
||||
return (ATTRIB_POINTERS.vertex.size == 3) ? _readVertexData3us3f:
|
||||
_readVertexData2us3f;
|
||||
return (ATTRIB_POINTERS.vertex.size == 3) ? _readVertexData3usq3f:
|
||||
_readVertexData2usq3f;
|
||||
case GL_INT:
|
||||
case GL_UNSIGNED_INT:
|
||||
return (ATTRIB_POINTERS.vertex.size == 3) ? _readVertexData3ui3f:
|
||||
|
@ -517,12 +599,14 @@ ReadUVFunc calcReadUVFunc() {
|
|||
case GL_DOUBLE:
|
||||
case GL_FLOAT:
|
||||
return _readVertexData2f2f;
|
||||
case GL_HALF_FLOAT:
|
||||
return _readVertexData2f16_2f;
|
||||
case GL_BYTE:
|
||||
case GL_UNSIGNED_BYTE:
|
||||
return _readVertexData2ub2f;
|
||||
case GL_SHORT:
|
||||
case GL_UNSIGNED_SHORT:
|
||||
return _readVertexData2us2f;
|
||||
return _readVertexData2usq2f;
|
||||
case GL_INT:
|
||||
case GL_UNSIGNED_INT:
|
||||
return _readVertexData2ui2f;
|
||||
|
@ -539,12 +623,14 @@ ReadUVFunc calcReadSTFunc() {
|
|||
case GL_DOUBLE:
|
||||
case GL_FLOAT:
|
||||
return _readVertexData2f2f;
|
||||
case GL_HALF_FLOAT:
|
||||
return _readVertexData2f16_2f;
|
||||
case GL_BYTE:
|
||||
case GL_UNSIGNED_BYTE:
|
||||
return _readVertexData2ub2f;
|
||||
case GL_SHORT:
|
||||
case GL_UNSIGNED_SHORT:
|
||||
return _readVertexData2us2f;
|
||||
return _readVertexData2usq2f;
|
||||
case GL_INT:
|
||||
case GL_UNSIGNED_INT:
|
||||
return _readVertexData2ui2f;
|
||||
|
@ -561,6 +647,8 @@ ReadNormalFunc calcReadNormalFunc() {
|
|||
case GL_DOUBLE:
|
||||
case GL_FLOAT:
|
||||
return _readVertexData3f3f;
|
||||
case GL_HALF_FLOAT:
|
||||
return _readVertexData3f16_3f;
|
||||
break;
|
||||
case GL_BYTE:
|
||||
case GL_UNSIGNED_BYTE:
|
||||
|
@ -568,7 +656,7 @@ ReadNormalFunc calcReadNormalFunc() {
|
|||
break;
|
||||
case GL_SHORT:
|
||||
case GL_UNSIGNED_SHORT:
|
||||
return _readVertexData3us3f;
|
||||
return _readVertexData3usq3f;
|
||||
break;
|
||||
case GL_INT:
|
||||
case GL_UNSIGNED_INT:
|
||||
|
@ -585,7 +673,6 @@ static void _readPositionData(ReadDiffuseFunc func, const GLuint first, const GL
|
|||
const GLubyte* vptr = ((GLubyte*) ATTRIB_POINTERS.vertex.ptr + (first * vstride));
|
||||
|
||||
float pos[3];
|
||||
float w = 0.0f;
|
||||
|
||||
ITERATE(count) {
|
||||
PREFETCH(vptr + vstride);
|
||||
|
@ -726,9 +813,7 @@ typedef struct {
|
|||
} Float2;
|
||||
|
||||
static const Float3 F3Z = {0.0f, 0.0f, 1.0f};
|
||||
static const Float3 F3ZERO = {0.0f, 0.0f, 0.0f};
|
||||
static const Float2 F2ZERO = {0.0f, 0.0f};
|
||||
static const uint32_t U4ONE = ~0;
|
||||
|
||||
static void generateElementsFastPath(
|
||||
SubmissionTarget* target, const GLsizei first, const GLuint count,
|
||||
|
@ -815,17 +900,15 @@ static void generateElementsFastPath(
|
|||
|
||||
#define POLYMODE QUADS
|
||||
#define PROCESS_VERTEX_FLAGS(it, i) { \
|
||||
if((i + 1) % 4 == 0) { \
|
||||
Vertex* prev = ((it) - 1); \
|
||||
Vertex t = (*prev); \
|
||||
*(prev) = *((it)); \
|
||||
*((it)) = t; \
|
||||
prev->flags = GPU_CMD_VERTEX; \
|
||||
it->flags = GPU_CMD_VERTEX; \
|
||||
if(((i + 1) % 4) == 0) { \
|
||||
Vertex t = *it; \
|
||||
*it = *(it - 1); \
|
||||
*(it - 1) = t; \
|
||||
it->flags = GPU_CMD_VERTEX_EOL; \
|
||||
} else { \
|
||||
it->flags = GPU_CMD_VERTEX; \
|
||||
} \
|
||||
}
|
||||
|
||||
#include "draw_fastpath.inc"
|
||||
#undef PROCESS_VERTEX_FLAGS
|
||||
#undef POLYMODE
|
||||
|
@ -912,24 +995,6 @@ static void transform(SubmissionTarget* target) {
|
|||
TransformVertices(vertex, target->count);
|
||||
}
|
||||
|
||||
static void mat_transform3(const float* xyz, const float* xyzOut, const uint32_t count, const uint32_t inStride, const uint32_t outStride) {
|
||||
const uint8_t* dataIn = (const uint8_t*) xyz;
|
||||
uint8_t* dataOut = (uint8_t*) xyzOut;
|
||||
|
||||
ITERATE(count) {
|
||||
const float* in = (const float*) dataIn;
|
||||
float* out = (float*) dataOut;
|
||||
|
||||
TransformVec3NoMod(
|
||||
in,
|
||||
out
|
||||
);
|
||||
|
||||
dataIn += inStride;
|
||||
dataOut += outStride;
|
||||
}
|
||||
}
|
||||
|
||||
static void mat_transform_normal3(const float* xyz, const float* xyzOut, const uint32_t count, const uint32_t inStride, const uint32_t outStride) {
|
||||
const uint8_t* dataIn = (const uint8_t*) xyz;
|
||||
uint8_t* dataOut = (uint8_t*) xyzOut;
|
||||
|
@ -1170,6 +1235,7 @@ void _glInitSubmissionTarget() {
|
|||
|
||||
|
||||
GL_FORCE_INLINE void submitVertices(GLenum mode, GLsizei first, GLuint count, GLenum type, const GLvoid* indices) {
|
||||
|
||||
SubmissionTarget* const target = &SUBMISSION_TARGET;
|
||||
AlignedVector* const extras = target->extras;
|
||||
|
||||
|
@ -1210,17 +1276,22 @@ GL_FORCE_INLINE void submitVertices(GLenum mode, GLsizei first, GLuint count, GL
|
|||
return;
|
||||
}
|
||||
|
||||
GLboolean header_required = (target->output->vector.size == 0) || _glGPUStateIsDirty();
|
||||
|
||||
|
||||
// We don't handle this any further, so just make sure we never pass it down */
|
||||
gl_assert(mode != GL_POLYGON);
|
||||
|
||||
target->output = _glActivePolyList();
|
||||
target->count = (mode == GL_TRIANGLE_FAN) ? ((count - 2) * 3) : count;
|
||||
target->header_offset = target->output->vector.size;
|
||||
target->start_offset = target->header_offset + (header_required);
|
||||
gl_assert(target->output);
|
||||
gl_assert(extras);
|
||||
|
||||
uint32_t vector_size = aligned_vector_size(&target->output->vector);
|
||||
|
||||
GLboolean header_required = (vector_size == 0) || _glGPUStateIsDirty();
|
||||
|
||||
target->count = (mode == GL_TRIANGLE_FAN) ? ((count - 2) * 3) : count;
|
||||
target->header_offset = vector_size;
|
||||
target->start_offset = target->header_offset + (header_required ? 1 : 0);
|
||||
|
||||
gl_assert(target->start_offset >= target->header_offset);
|
||||
gl_assert(target->count);
|
||||
|
||||
/* Make sure we have enough room for all the "extra" data */
|
||||
|
|
|
@ -5,75 +5,123 @@
|
|||
|
||||
MAKE_FUNC(POLYMODE)
|
||||
{
|
||||
const Vertex* const start = _glSubmissionTargetStart(target);
|
||||
const VertexExtra* const ve_start = aligned_vector_at(target->extras, 0);
|
||||
|
||||
const GLuint vstride = ATTRIB_POINTERS.vertex.stride;
|
||||
GLuint uvstride = ATTRIB_POINTERS.uv.stride;
|
||||
GLuint ststride = ATTRIB_POINTERS.st.stride;
|
||||
GLuint dstride = ATTRIB_POINTERS.colour.stride;
|
||||
GLuint nstride = ATTRIB_POINTERS.normal.stride;
|
||||
|
||||
const GLubyte* pos = (ENABLED_VERTEX_ATTRIBUTES & VERTEX_ENABLED_FLAG) ? ATTRIB_POINTERS.vertex.ptr + (first * vstride) : NULL;
|
||||
const GLubyte* uv = (ENABLED_VERTEX_ATTRIBUTES & UV_ENABLED_FLAG) ? ATTRIB_POINTERS.uv.ptr + (first * uvstride) : NULL;
|
||||
const GLubyte* col = (ENABLED_VERTEX_ATTRIBUTES & DIFFUSE_ENABLED_FLAG) ? ATTRIB_POINTERS.colour.ptr + (first * dstride) : NULL;
|
||||
const GLubyte* st = (ENABLED_VERTEX_ATTRIBUTES & ST_ENABLED_FLAG) ? ATTRIB_POINTERS.st.ptr + (first * ststride) : NULL;
|
||||
const GLubyte* n = (ENABLED_VERTEX_ATTRIBUTES & NORMAL_ENABLED_FLAG) ? ATTRIB_POINTERS.normal.ptr + (first * nstride) : NULL;
|
||||
|
||||
const float w = 1.0f;
|
||||
|
||||
if(!pos) {
|
||||
static const float w = 1.0f;
|
||||
if(!(ENABLED_VERTEX_ATTRIBUTES & VERTEX_ENABLED_FLAG)) {
|
||||
/* If we don't have vertices, do nothing */
|
||||
return;
|
||||
}
|
||||
|
||||
if(!col) {
|
||||
col = (GLubyte*) &U4ONE;
|
||||
dstride = 0;
|
||||
}
|
||||
/* This is the best value we have. PROCESS_VERTEX_FLAGS needs to operate on quads and tris and so
|
||||
this need to be divisible by 4 and 3. Even though we should be able to go much higher than this
|
||||
and still be cache-local, trial and error says otherwise... */
|
||||
|
||||
if(!uv) {
|
||||
uv = (GLubyte*) &F2ZERO;
|
||||
uvstride = 0;
|
||||
}
|
||||
#define BATCH_SIZE 60
|
||||
|
||||
if(!st) {
|
||||
st = (GLubyte*) &F2ZERO;
|
||||
ststride = 0;
|
||||
}
|
||||
GLuint min = 0;
|
||||
GLuint stride;
|
||||
const GLubyte* ptr;
|
||||
Vertex* it;
|
||||
VertexExtra* ve;
|
||||
|
||||
if(!n) {
|
||||
n = (GLubyte*) &F3Z;
|
||||
nstride = 0;
|
||||
}
|
||||
|
||||
VertexExtra* ve = (VertexExtra*) ve_start;
|
||||
Vertex* it = (Vertex*) start;
|
||||
for(min = 0; min < count; min += BATCH_SIZE) {
|
||||
const Vertex* start = ((Vertex*) _glSubmissionTargetStart(target)) + min;
|
||||
const int_fast32_t loop = ((min + BATCH_SIZE) > count) ? count - min : BATCH_SIZE;
|
||||
const int offset = (first + min);
|
||||
|
||||
for(int_fast32_t i = 0; i < count; ++i) {
|
||||
TransformVertex((const float*) pos, &w, it->xyz, &it->w);
|
||||
pos += vstride;
|
||||
PREFETCH(pos);
|
||||
stride = ATTRIB_POINTERS.uv.stride;
|
||||
ptr = (ENABLED_VERTEX_ATTRIBUTES & UV_ENABLED_FLAG) ? ATTRIB_POINTERS.uv.ptr + ((first + min) * stride) : NULL;
|
||||
it = (Vertex*) start;
|
||||
|
||||
*((Float2*) it->uv) = *((Float2*) uv);
|
||||
uv += uvstride;
|
||||
PREFETCH(uv);
|
||||
if(ptr) {
|
||||
PREFETCH(ptr);
|
||||
for(int_fast32_t i = 0; i < loop; ++i, ++it) {
|
||||
PREFETCH(ptr + stride);
|
||||
it->uv[0] = ((float*) ptr)[0];
|
||||
it->uv[1] = ((float*) ptr)[1];
|
||||
ptr += stride;
|
||||
}
|
||||
} else {
|
||||
for(int_fast32_t i = 0; i < loop; ++i, ++it) {
|
||||
it->uv[0] = 0;
|
||||
it->uv[1] = 0;
|
||||
}
|
||||
}
|
||||
|
||||
*((uint32_t*) it->bgra) = *((uint32_t*) col);
|
||||
col += dstride;
|
||||
PREFETCH(col);
|
||||
stride = ATTRIB_POINTERS.colour.stride;
|
||||
ptr = (ENABLED_VERTEX_ATTRIBUTES & DIFFUSE_ENABLED_FLAG) ? ATTRIB_POINTERS.colour.ptr + (offset * stride) : NULL;
|
||||
it = (Vertex*) start;
|
||||
|
||||
*((Float2*) ve->st) = *((Float2*) st);
|
||||
st += ststride;
|
||||
PREFETCH(st);
|
||||
if(ptr) {
|
||||
PREFETCH(ptr);
|
||||
for(int_fast32_t i = 0; i < loop; ++i, ++it) {
|
||||
PREFETCH(ptr + stride);
|
||||
it->bgra[0] = ptr[0];
|
||||
it->bgra[1] = ptr[1];
|
||||
it->bgra[2] = ptr[2];
|
||||
it->bgra[3] = ptr[3];
|
||||
ptr += stride;
|
||||
}
|
||||
} else {
|
||||
for(int_fast32_t i = 0; i < loop; ++i, ++it) {
|
||||
*((uint32_t*) it->bgra) = ~0;
|
||||
}
|
||||
}
|
||||
|
||||
*((Float3*) ve->nxyz) = *((Float3*) n);
|
||||
n += nstride;
|
||||
PREFETCH(n);
|
||||
stride = ATTRIB_POINTERS.vertex.stride;
|
||||
ptr = ATTRIB_POINTERS.vertex.ptr + (offset * stride);
|
||||
it = (Vertex*) start;
|
||||
|
||||
PROCESS_VERTEX_FLAGS(it, i);
|
||||
PREFETCH(ptr);
|
||||
for(int_fast32_t i = 0; i < loop; ++i, ++it) {
|
||||
PREFETCH(ptr + stride);
|
||||
TransformVertex((const float*) ptr, &w, it->xyz, &it->w);
|
||||
PROCESS_VERTEX_FLAGS(it, min + i);
|
||||
ptr += stride;
|
||||
}
|
||||
|
||||
++it;
|
||||
++ve;
|
||||
start = aligned_vector_at(target->extras, min);
|
||||
|
||||
stride = ATTRIB_POINTERS.st.stride;
|
||||
ptr = (ENABLED_VERTEX_ATTRIBUTES & ST_ENABLED_FLAG) ? ATTRIB_POINTERS.st.ptr + (offset * stride) : NULL;
|
||||
ve = (VertexExtra*) start;
|
||||
|
||||
if(ptr) {
|
||||
PREFETCH(ptr);
|
||||
|
||||
for(int_fast32_t i = 0; i < loop; ++i, ++ve) {
|
||||
PREFETCH(ptr + stride);
|
||||
ve->st[0] = ((float*) ptr)[0];
|
||||
ve->st[1] = ((float*) ptr)[1];
|
||||
ptr += stride;
|
||||
}
|
||||
} else {
|
||||
for(int_fast32_t i = 0; i < loop; ++i, ++ve) {
|
||||
ve->st[0] = 0;
|
||||
ve->st[1] = 0;
|
||||
}
|
||||
}
|
||||
|
||||
stride = ATTRIB_POINTERS.normal.stride;
|
||||
ptr = (ENABLED_VERTEX_ATTRIBUTES & NORMAL_ENABLED_FLAG) ? ATTRIB_POINTERS.normal.ptr + (offset * stride) : NULL;
|
||||
ve = (VertexExtra*) start;
|
||||
|
||||
if(ptr) {
|
||||
PREFETCH(ptr);
|
||||
|
||||
for(int_fast32_t i = 0; i < loop; ++i, ++ve) {
|
||||
PREFETCH(ptr + stride);
|
||||
ve->nxyz[0] = ((float*) ptr)[0];
|
||||
ve->nxyz[1] = ((float*) ptr)[1];
|
||||
ve->nxyz[2] = ((float*) ptr)[2];
|
||||
ptr += stride;
|
||||
}
|
||||
} else {
|
||||
for(int_fast32_t i = 0; i < loop; ++i, ++ve) {
|
||||
ve->nxyz[0] = 0;
|
||||
ve->nxyz[1] = 0;
|
||||
ve->nxyz[2] = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
38
GL/flush.c
38
GL/flush.c
|
@ -46,10 +46,22 @@ void APIENTRY glKosInitConfig(GLdcConfig* config) {
|
|||
config->initial_pt_capacity = 512 * 3;
|
||||
config->initial_tr_capacity = 1024 * 3;
|
||||
config->initial_immediate_capacity = 1024 * 3;
|
||||
config->internal_palette_format = GL_RGBA8;
|
||||
|
||||
// RGBA4444 is the fastest general format - 8888 will cause a perf issue
|
||||
config->internal_palette_format = GL_RGBA4;
|
||||
|
||||
config->texture_twiddle = GL_TRUE;
|
||||
}
|
||||
|
||||
static bool _initialized = false;
|
||||
|
||||
void APIENTRY glKosInitEx(GLdcConfig* config) {
|
||||
if(_initialized) {
|
||||
return;
|
||||
}
|
||||
|
||||
_initialized = true;
|
||||
|
||||
TRACE();
|
||||
|
||||
printf("\nWelcome to GLdc! Git revision: %s\n\n", GLDC_VERSION);
|
||||
|
@ -70,6 +82,10 @@ void APIENTRY glKosInitEx(GLdcConfig* config) {
|
|||
|
||||
_glInitTextures();
|
||||
|
||||
if(config->texture_twiddle) {
|
||||
glEnable(GL_TEXTURE_TWIDDLE_KOS);
|
||||
}
|
||||
|
||||
OP_LIST.list_type = GPU_LIST_OP_POLY;
|
||||
PT_LIST.list_type = GPU_LIST_PT_POLY;
|
||||
TR_LIST.list_type = GPU_LIST_TR_POLY;
|
||||
|
@ -83,6 +99,12 @@ void APIENTRY glKosInitEx(GLdcConfig* config) {
|
|||
aligned_vector_reserve(&TR_LIST.vector, config->initial_tr_capacity);
|
||||
}
|
||||
|
||||
void APIENTRY glKosShutdown() {
|
||||
aligned_vector_clear(&OP_LIST.vector);
|
||||
aligned_vector_clear(&PT_LIST.vector);
|
||||
aligned_vector_clear(&TR_LIST.vector);
|
||||
}
|
||||
|
||||
void APIENTRY glKosInit() {
|
||||
GLdcConfig config;
|
||||
glKosInitConfig(&config);
|
||||
|
@ -93,21 +115,21 @@ void APIENTRY glKosSwapBuffers() {
|
|||
TRACE();
|
||||
|
||||
SceneBegin();
|
||||
if(OP_LIST.vector.size > 2) {
|
||||
if(aligned_vector_header(&OP_LIST.vector)->size > 2) {
|
||||
SceneListBegin(GPU_LIST_OP_POLY);
|
||||
SceneListSubmit(OP_LIST.vector.data, OP_LIST.vector.size);
|
||||
SceneListSubmit((Vertex*) aligned_vector_front(&OP_LIST.vector), aligned_vector_size(&OP_LIST.vector));
|
||||
SceneListFinish();
|
||||
}
|
||||
|
||||
if(PT_LIST.vector.size > 2) {
|
||||
if(aligned_vector_header(&PT_LIST.vector)->size > 2) {
|
||||
SceneListBegin(GPU_LIST_PT_POLY);
|
||||
SceneListSubmit(PT_LIST.vector.data, PT_LIST.vector.size);
|
||||
SceneListSubmit((Vertex*) aligned_vector_front(&PT_LIST.vector), aligned_vector_size(&PT_LIST.vector));
|
||||
SceneListFinish();
|
||||
}
|
||||
|
||||
if(TR_LIST.vector.size > 2) {
|
||||
if(aligned_vector_header(&TR_LIST.vector)->size > 2) {
|
||||
SceneListBegin(GPU_LIST_TR_POLY);
|
||||
SceneListSubmit(TR_LIST.vector.data, TR_LIST.vector.size);
|
||||
SceneListSubmit((Vertex*) aligned_vector_front(&TR_LIST.vector), aligned_vector_size(&TR_LIST.vector));
|
||||
SceneListFinish();
|
||||
}
|
||||
SceneFinish();
|
||||
|
@ -117,4 +139,4 @@ void APIENTRY glKosSwapBuffers() {
|
|||
aligned_vector_clear(&TR_LIST.vector);
|
||||
|
||||
_glApplyScissor(true);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -254,7 +254,7 @@ GLboolean _glGenerateMipmapTwiddled(const GLuint pvrFormat, const GLubyte* prevD
|
|||
return GL_TRUE;
|
||||
}
|
||||
|
||||
void APIENTRY glGenerateMipmapEXT(GLenum target) {
|
||||
void APIENTRY glGenerateMipmap(GLenum target) {
|
||||
if(target != GL_TEXTURE_2D) {
|
||||
_glKosThrowError(GL_INVALID_OPERATION, __func__);
|
||||
return;
|
||||
|
@ -334,7 +334,7 @@ GLAPI GLvoid APIENTRY gluBuild2DMipmaps(GLenum target, GLint internalFormat,
|
|||
unsigned byte data, and finally the data itself. */
|
||||
glTexImage2D(GL_TEXTURE_2D, 0, 3, width, height, 0, GL_RGB, GL_UNSIGNED_BYTE, data);
|
||||
|
||||
glGenerateMipmapEXT(GL_TEXTURE_2D);
|
||||
glGenerateMipmap(GL_TEXTURE_2D);
|
||||
}
|
||||
|
||||
GLenum APIENTRY glCheckFramebufferStatusEXT(GLenum target) {
|
||||
|
|
|
@ -17,10 +17,10 @@ extern inline GLuint _glRecalcFastPath();
|
|||
GLboolean IMMEDIATE_MODE_ACTIVE = GL_FALSE;
|
||||
static GLenum ACTIVE_POLYGON_MODE = GL_TRIANGLES;
|
||||
|
||||
static GLfloat NORMAL[3] = {0.0f, 0.0f, 1.0f};
|
||||
static GLubyte COLOR[4] = {255, 255, 255, 255}; /* ARGB order for speed */
|
||||
static GLfloat UV_COORD[2] = {0.0f, 0.0f};
|
||||
static GLfloat ST_COORD[2] = {0.0f, 0.0f};
|
||||
static GLfloat __attribute__((aligned(32))) NORMAL[3] = {0.0f, 0.0f, 1.0f};
|
||||
static GLubyte __attribute__((aligned(32))) COLOR[4] = {255, 255, 255, 255}; /* ARGB order for speed */
|
||||
static GLfloat __attribute__((aligned(32))) UV_COORD[2] = {0.0f, 0.0f};
|
||||
static GLfloat __attribute__((aligned(32))) ST_COORD[2] = {0.0f, 0.0f};
|
||||
|
||||
static AlignedVector VERTICES;
|
||||
static AttribPointerList IM_ATTRIBS;
|
||||
|
@ -30,7 +30,7 @@ static AttribPointerList IM_ATTRIBS;
|
|||
can be applied faster */
|
||||
static GLuint IM_ENABLED_VERTEX_ATTRIBUTES = 0;
|
||||
|
||||
typedef struct {
|
||||
typedef struct __attribute__((aligned(32))) {
|
||||
GLfloat x;
|
||||
GLfloat y;
|
||||
GLfloat z;
|
||||
|
@ -50,7 +50,7 @@ void _glInitImmediateMode(GLuint initial_size) {
|
|||
aligned_vector_init(&VERTICES, sizeof(IMVertex));
|
||||
aligned_vector_reserve(&VERTICES, initial_size);
|
||||
|
||||
IM_ATTRIBS.vertex.ptr = VERTICES.data;
|
||||
IM_ATTRIBS.vertex.ptr = aligned_vector_front(&VERTICES);
|
||||
IM_ATTRIBS.vertex.size = 3;
|
||||
IM_ATTRIBS.vertex.type = GL_FLOAT;
|
||||
IM_ATTRIBS.vertex.stride = sizeof(IMVertex);
|
||||
|
@ -161,31 +161,27 @@ void APIENTRY glColor3fv(const GLfloat* v) {
|
|||
void APIENTRY glVertex3f(GLfloat x, GLfloat y, GLfloat z) {
|
||||
IM_ENABLED_VERTEX_ATTRIBUTES |= VERTEX_ENABLED_FLAG;
|
||||
|
||||
unsigned int cap = VERTICES.capacity;
|
||||
IMVertex* vert = aligned_vector_extend(&VERTICES, 1);
|
||||
|
||||
if(cap != VERTICES.capacity) {
|
||||
/* Resizing could've invalidated the pointers */
|
||||
IM_ATTRIBS.vertex.ptr = VERTICES.data;
|
||||
IM_ATTRIBS.uv.ptr = IM_ATTRIBS.vertex.ptr + (sizeof(GLfloat) * 3);
|
||||
IM_ATTRIBS.st.ptr = IM_ATTRIBS.vertex.ptr + (sizeof(GLfloat) * 5);
|
||||
IM_ATTRIBS.colour.ptr = IM_ATTRIBS.vertex.ptr + (sizeof(GLfloat) * 7);
|
||||
IM_ATTRIBS.normal.ptr = IM_ATTRIBS.vertex.ptr + (sizeof(GLfloat) * 7) + sizeof(uint32_t);
|
||||
}
|
||||
/* Resizing could've invalidated the pointers */
|
||||
IM_ATTRIBS.vertex.ptr = VERTICES.data;
|
||||
IM_ATTRIBS.uv.ptr = IM_ATTRIBS.vertex.ptr + 12;
|
||||
IM_ATTRIBS.st.ptr = IM_ATTRIBS.uv.ptr + 8;
|
||||
IM_ATTRIBS.colour.ptr = IM_ATTRIBS.st.ptr + 8;
|
||||
IM_ATTRIBS.normal.ptr = IM_ATTRIBS.colour.ptr + 4;
|
||||
|
||||
vert->x = x;
|
||||
vert->y = y;
|
||||
vert->z = z;
|
||||
vert->u = UV_COORD[0];
|
||||
vert->v = UV_COORD[1];
|
||||
vert->s = ST_COORD[0];
|
||||
vert->t = ST_COORD[1];
|
||||
|
||||
*((uint32_t*) vert->bgra) = *((uint32_t*) COLOR);
|
||||
|
||||
vert->nx = NORMAL[0];
|
||||
vert->ny = NORMAL[1];
|
||||
vert->nz = NORMAL[2];
|
||||
uint32_t* dest = (uint32_t*) &vert->x;
|
||||
*(dest++) = *((uint32_t*) &x);
|
||||
*(dest++) = *((uint32_t*) &y);
|
||||
*(dest++) = *((uint32_t*) &z);
|
||||
*(dest++) = *((uint32_t*) &UV_COORD[0]);
|
||||
*(dest++) = *((uint32_t*) &UV_COORD[1]);
|
||||
*(dest++) = *((uint32_t*) &ST_COORD[0]);
|
||||
*(dest++) = *((uint32_t*) &ST_COORD[1]);
|
||||
*(dest++) = *((uint32_t*) COLOR);
|
||||
*(dest++) = *((uint32_t*) &NORMAL[0]);
|
||||
*(dest++) = *((uint32_t*) &NORMAL[1]);
|
||||
*(dest++) = *((uint32_t*) &NORMAL[2]);
|
||||
}
|
||||
|
||||
void APIENTRY glVertex3fv(const GLfloat* v) {
|
||||
|
@ -281,7 +277,7 @@ void APIENTRY glEnd() {
|
|||
FAST_PATH_ENABLED = GL_TRUE;
|
||||
#endif
|
||||
|
||||
glDrawArrays(ACTIVE_POLYGON_MODE, 0, VERTICES.size);
|
||||
glDrawArrays(ACTIVE_POLYGON_MODE, 0, aligned_vector_header(&VERTICES)->size);
|
||||
|
||||
ATTRIB_POINTERS = stashed_attrib_pointers;
|
||||
|
||||
|
|
|
@ -124,8 +124,10 @@ void APIENTRY glLightModeli(GLenum pname, const GLint param) {
|
|||
void APIENTRY glLightModelfv(GLenum pname, const GLfloat *params) {
|
||||
switch(pname) {
|
||||
case GL_LIGHT_MODEL_AMBIENT: {
|
||||
_glSetLightModelSceneAmbient(params);
|
||||
_glPrecalcLightingValues(SCENE_AMBIENT_MASK);
|
||||
if(memcmp(_glGetLightModelSceneAmbient(), params, sizeof(float) * 4) != 0) {
|
||||
_glSetLightModelSceneAmbient(params);
|
||||
_glPrecalcLightingValues(SCENE_AMBIENT_MASK);
|
||||
}
|
||||
} break;
|
||||
case GL_LIGHT_MODEL_LOCAL_VIEWER:
|
||||
_glSetLightModelViewerInEyeCoordinates((*params) ? GL_TRUE : GL_FALSE);
|
||||
|
@ -164,18 +166,28 @@ void APIENTRY glLightfv(GLenum light, GLenum pname, const GLfloat *params) {
|
|||
|
||||
LightSource* l = _glLightAt(idx);
|
||||
|
||||
GLboolean rebuild = GL_FALSE;
|
||||
|
||||
switch(pname) {
|
||||
case GL_AMBIENT:
|
||||
memcpy(l->ambient, params, sizeof(GLfloat) * 4);
|
||||
rebuild = memcmp(l->ambient, params, sizeof(GLfloat) * 4) != 0;
|
||||
if(rebuild) {
|
||||
memcpy(l->ambient, params, sizeof(GLfloat) * 4);
|
||||
}
|
||||
break;
|
||||
case GL_DIFFUSE:
|
||||
memcpy(l->diffuse, params, sizeof(GLfloat) * 4);
|
||||
rebuild = memcmp(l->diffuse, params, sizeof(GLfloat) * 4) != 0;
|
||||
if(rebuild) {
|
||||
memcpy(l->diffuse, params, sizeof(GLfloat) * 4);
|
||||
}
|
||||
break;
|
||||
case GL_SPECULAR:
|
||||
memcpy(l->specular, params, sizeof(GLfloat) * 4);
|
||||
rebuild = memcmp(l->specular, params, sizeof(GLfloat) * 4) != 0;
|
||||
if(rebuild) {
|
||||
memcpy(l->specular, params, sizeof(GLfloat) * 4);
|
||||
}
|
||||
break;
|
||||
case GL_POSITION: {
|
||||
_glMatrixLoadModelView();
|
||||
memcpy(l->position, params, sizeof(GLfloat) * 4);
|
||||
|
||||
l->isDirectional = params[3] == 0.0f;
|
||||
|
@ -183,6 +195,7 @@ void APIENTRY glLightfv(GLenum light, GLenum pname, const GLfloat *params) {
|
|||
if(l->isDirectional) {
|
||||
//FIXME: Do we need to rotate directional lights?
|
||||
} else {
|
||||
_glMatrixLoadModelView();
|
||||
TransformVec3(l->position);
|
||||
}
|
||||
}
|
||||
|
@ -204,7 +217,10 @@ void APIENTRY glLightfv(GLenum light, GLenum pname, const GLfloat *params) {
|
|||
return;
|
||||
}
|
||||
|
||||
_glPrecalcLightingValues(mask);
|
||||
if(rebuild) {
|
||||
_glPrecalcLightingValues(mask);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
void APIENTRY glLightf(GLenum light, GLenum pname, GLfloat param) {
|
||||
|
@ -258,25 +274,47 @@ void APIENTRY glMaterialfv(GLenum face, GLenum pname, const GLfloat *params) {
|
|||
|
||||
Material* material = _glActiveMaterial();
|
||||
|
||||
GLboolean rebuild = GL_FALSE;
|
||||
|
||||
switch(pname) {
|
||||
case GL_SHININESS:
|
||||
glMaterialf(face, pname, *params);
|
||||
rebuild = GL_TRUE;
|
||||
break;
|
||||
case GL_AMBIENT:
|
||||
vec4cpy(material->ambient, params);
|
||||
break;
|
||||
case GL_AMBIENT: {
|
||||
if(memcmp(material->ambient, params, sizeof(float) * 4) != 0) {
|
||||
vec4cpy(material->ambient, params);
|
||||
rebuild = GL_TRUE;
|
||||
}
|
||||
} break;
|
||||
case GL_DIFFUSE:
|
||||
vec4cpy(material->diffuse, params);
|
||||
if(memcmp(material->diffuse, params, sizeof(float) * 4) != 0) {
|
||||
vec4cpy(material->diffuse, params);
|
||||
rebuild = GL_TRUE;
|
||||
}
|
||||
break;
|
||||
case GL_SPECULAR:
|
||||
vec4cpy(material->specular, params);
|
||||
if(memcmp(material->specular, params, sizeof(float) * 4) != 0) {
|
||||
vec4cpy(material->specular, params);
|
||||
rebuild = GL_TRUE;
|
||||
}
|
||||
break;
|
||||
case GL_EMISSION:
|
||||
vec4cpy(material->emissive, params);
|
||||
if(memcmp(material->emissive, params, sizeof(float) * 4) != 0) {
|
||||
vec4cpy(material->emissive, params);
|
||||
rebuild = GL_TRUE;
|
||||
}
|
||||
break;
|
||||
case GL_AMBIENT_AND_DIFFUSE: {
|
||||
vec4cpy(material->ambient, params);
|
||||
vec4cpy(material->diffuse, params);
|
||||
rebuild = (
|
||||
memcmp(material->ambient, params, sizeof(float) * 4) != 0 ||
|
||||
memcmp(material->diffuse, params, sizeof(float) * 4) != 0
|
||||
);
|
||||
|
||||
if(rebuild) {
|
||||
vec4cpy(material->ambient, params);
|
||||
vec4cpy(material->diffuse, params);
|
||||
}
|
||||
} break;
|
||||
case GL_COLOR_INDEXES:
|
||||
default: {
|
||||
|
@ -285,13 +323,15 @@ void APIENTRY glMaterialfv(GLenum face, GLenum pname, const GLfloat *params) {
|
|||
}
|
||||
}
|
||||
|
||||
GLuint updateMask = (pname == GL_AMBIENT) ? AMBIENT_MASK:
|
||||
(pname == GL_DIFFUSE) ? DIFFUSE_MASK:
|
||||
(pname == GL_SPECULAR) ? SPECULAR_MASK:
|
||||
(pname == GL_EMISSION) ? EMISSION_MASK:
|
||||
(pname == GL_AMBIENT_AND_DIFFUSE) ? AMBIENT_MASK | DIFFUSE_MASK : 0;
|
||||
if(rebuild) {
|
||||
GLuint updateMask = (pname == GL_AMBIENT) ? AMBIENT_MASK:
|
||||
(pname == GL_DIFFUSE) ? DIFFUSE_MASK:
|
||||
(pname == GL_SPECULAR) ? SPECULAR_MASK:
|
||||
(pname == GL_EMISSION) ? EMISSION_MASK:
|
||||
(pname == GL_AMBIENT_AND_DIFFUSE) ? AMBIENT_MASK | DIFFUSE_MASK : 0;
|
||||
|
||||
_glPrecalcLightingValues(updateMask);
|
||||
_glPrecalcLightingValues(updateMask);
|
||||
}
|
||||
}
|
||||
|
||||
void APIENTRY glColorMaterial(GLenum face, GLenum mode) {
|
||||
|
|
59
GL/matrix.c
59
GL/matrix.c
|
@ -13,8 +13,8 @@
|
|||
GLfloat DEPTH_RANGE_MULTIPLIER_L = (1 - 0) / 2;
|
||||
GLfloat DEPTH_RANGE_MULTIPLIER_H = (0 + 1) / 2;
|
||||
|
||||
static Stack MATRIX_STACKS[3]; // modelview, projection, texture
|
||||
static Matrix4x4 NORMAL_MATRIX __attribute__((aligned(32)));
|
||||
static Stack __attribute__((aligned(32))) MATRIX_STACKS[4]; // modelview, projection, texture
|
||||
static Matrix4x4 __attribute__((aligned(32))) NORMAL_MATRIX;
|
||||
|
||||
Viewport VIEWPORT = {
|
||||
0, 0, 640, 480, 320.0f, 240.0f, 320.0f, 240.0f
|
||||
|
@ -23,7 +23,7 @@ Viewport VIEWPORT = {
|
|||
static GLenum MATRIX_MODE = GL_MODELVIEW;
|
||||
static GLubyte MATRIX_IDX = 0;
|
||||
|
||||
static const Matrix4x4 IDENTITY = {
|
||||
static const Matrix4x4 __attribute__((aligned(32))) IDENTITY = {
|
||||
1.0f, 0.0f, 0.0f, 0.0f,
|
||||
0.0f, 1.0f, 0.0f, 0.0f,
|
||||
0.0f, 0.0f, 1.0f, 0.0f,
|
||||
|
@ -106,7 +106,11 @@ void APIENTRY glMatrixMode(GLenum mode) {
|
|||
}
|
||||
|
||||
void APIENTRY glPushMatrix() {
|
||||
stack_push(MATRIX_STACKS + MATRIX_IDX, stack_top(MATRIX_STACKS + MATRIX_IDX));
|
||||
void* top = stack_top(MATRIX_STACKS + MATRIX_IDX);
|
||||
assert(top);
|
||||
void* ret = stack_push(MATRIX_STACKS + MATRIX_IDX, top);
|
||||
(void) ret;
|
||||
assert(ret);
|
||||
}
|
||||
|
||||
void APIENTRY glPopMatrix() {
|
||||
|
@ -127,10 +131,16 @@ void APIENTRY glTranslatef(GLfloat x, GLfloat y, GLfloat z) {
|
|||
0.0f, 0.0f, 1.0f, 0.0f,
|
||||
x, y, z, 1.0f
|
||||
};
|
||||
void* top = stack_top(MATRIX_STACKS + MATRIX_IDX);
|
||||
assert(top);
|
||||
|
||||
UploadMatrix4x4(stack_top(MATRIX_STACKS + MATRIX_IDX));
|
||||
UploadMatrix4x4(top);
|
||||
MultiplyMatrix4x4(&trn);
|
||||
DownloadMatrix4x4(stack_top(MATRIX_STACKS + MATRIX_IDX));
|
||||
|
||||
top = stack_top(MATRIX_STACKS + MATRIX_IDX);
|
||||
assert(top);
|
||||
|
||||
DownloadMatrix4x4(top);
|
||||
|
||||
if(MATRIX_MODE == GL_MODELVIEW) {
|
||||
recalculateNormalMatrix();
|
||||
|
@ -200,28 +210,9 @@ void APIENTRY glRotatef(GLfloat angle, GLfloat x, GLfloat y, GLfloat z) {
|
|||
|
||||
/* Load an arbitrary matrix */
|
||||
void APIENTRY glLoadMatrixf(const GLfloat *m) {
|
||||
static Matrix4x4 TEMP;
|
||||
|
||||
TEMP[M0] = m[0];
|
||||
TEMP[M1] = m[1];
|
||||
TEMP[M2] = m[2];
|
||||
TEMP[M3] = m[3];
|
||||
|
||||
TEMP[M4] = m[4];
|
||||
TEMP[M5] = m[5];
|
||||
TEMP[M6] = m[6];
|
||||
TEMP[M7] = m[7];
|
||||
|
||||
TEMP[M8] = m[8];
|
||||
TEMP[M9] = m[9];
|
||||
TEMP[M10] = m[10];
|
||||
TEMP[M11] = m[11];
|
||||
|
||||
TEMP[M12] = m[12];
|
||||
TEMP[M13] = m[13];
|
||||
TEMP[M14] = m[14];
|
||||
TEMP[M15] = m[15];
|
||||
static Matrix4x4 __attribute__((aligned(32))) TEMP;
|
||||
|
||||
memcpy(TEMP, m, sizeof(float) * 16);
|
||||
stack_replace(MATRIX_STACKS + MATRIX_IDX, TEMP);
|
||||
|
||||
if(MATRIX_MODE == GL_MODELVIEW) {
|
||||
|
@ -289,18 +280,10 @@ void APIENTRY glFrustum(GLfloat left, GLfloat right,
|
|||
/* Multiply the current matrix by an arbitrary matrix */
|
||||
void glMultMatrixf(const GLfloat *m) {
|
||||
Matrix4x4 TEMP __attribute__((aligned(32)));
|
||||
const Matrix4x4 *pMatrix;
|
||||
|
||||
if (((GLint)m)&0xf){ /* Unaligned matrix */
|
||||
pMatrix = &TEMP;
|
||||
MEMCPY4(TEMP, m, sizeof(Matrix4x4));
|
||||
}
|
||||
else{
|
||||
pMatrix = (const Matrix4x4*) m;
|
||||
}
|
||||
MEMCPY4(TEMP, m, sizeof(Matrix4x4));
|
||||
|
||||
UploadMatrix4x4(stack_top(MATRIX_STACKS + MATRIX_IDX));
|
||||
MultiplyMatrix4x4(pMatrix);
|
||||
MultiplyMatrix4x4(&TEMP);
|
||||
DownloadMatrix4x4(stack_top(MATRIX_STACKS + MATRIX_IDX));
|
||||
|
||||
if(MATRIX_MODE == GL_MODELVIEW) {
|
||||
|
@ -426,7 +409,7 @@ GL_FORCE_INLINE void vec3f_normalize_sh4(float *v){
|
|||
void gluLookAt(GLfloat eyex, GLfloat eyey, GLfloat eyez, GLfloat centerx,
|
||||
GLfloat centery, GLfloat centerz, GLfloat upx, GLfloat upy,
|
||||
GLfloat upz) {
|
||||
GLfloat m [16];
|
||||
GLfloat m [16] __attribute__((aligned(32)));
|
||||
GLfloat f [3];
|
||||
GLfloat u [3];
|
||||
GLfloat s [3];
|
||||
|
|
|
@ -5,6 +5,7 @@
|
|||
#include <stdbool.h>
|
||||
|
||||
#include "gl_assert.h"
|
||||
#include "types.h"
|
||||
|
||||
#define MEMSET(dst, v, size) memset((dst), (v), (size))
|
||||
|
||||
|
@ -260,7 +261,7 @@ typedef float Matrix4x4[16];
|
|||
void SceneBegin();
|
||||
|
||||
void SceneListBegin(GPUList list);
|
||||
void SceneListSubmit(void* src, int n);
|
||||
void SceneListSubmit(Vertex* v2, int n);
|
||||
void SceneListFinish();
|
||||
|
||||
void SceneFinish();
|
||||
|
|
|
@ -9,9 +9,7 @@
|
|||
#define likely(x) __builtin_expect(!!(x), 1)
|
||||
#define unlikely(x) __builtin_expect(!!(x), 0)
|
||||
|
||||
#define SQ_BASE_ADDRESS 0xe0000000
|
||||
|
||||
static volatile uint32_t* PVR_LMMODE0 = (uint32_t*) 0xA05F6884;
|
||||
#define SQ_BASE_ADDRESS (void*) 0xe0000000
|
||||
|
||||
|
||||
GL_FORCE_INLINE bool glIsVertex(const float flags) {
|
||||
|
@ -33,14 +31,23 @@ void InitGPU(_Bool autosort, _Bool fsaa) {
|
|||
};
|
||||
|
||||
pvr_init(¶ms);
|
||||
|
||||
/* If we're PAL and we're NOT VGA, then use 50hz by default. This is the safest
|
||||
thing to do. If someone wants to force 60hz then they can call vid_set_mode later and hopefully
|
||||
that'll work... */
|
||||
|
||||
int cable = vid_check_cable();
|
||||
int region = flashrom_get_region();
|
||||
|
||||
if(region == FLASHROM_REGION_EUROPE && cable != CT_VGA) {
|
||||
printf("PAL region without VGA - enabling 50hz");
|
||||
vid_set_mode(DM_640x480_PAL_IL, PM_RGB565);
|
||||
}
|
||||
}
|
||||
|
||||
void SceneBegin() {
|
||||
pvr_wait_ready();
|
||||
pvr_scene_begin();
|
||||
|
||||
QACR0 = 0x11; /* Enable the direct texture path by setting the higher two bits */
|
||||
QACR1 = 0x11;
|
||||
}
|
||||
|
||||
void SceneListBegin(GPUList list) {
|
||||
|
@ -52,380 +59,399 @@ GL_FORCE_INLINE float _glFastInvert(float x) {
|
|||
}
|
||||
|
||||
GL_FORCE_INLINE void _glPerspectiveDivideVertex(Vertex* vertex, const float h) {
|
||||
TRACE();
|
||||
|
||||
const float f = _glFastInvert(vertex->w);
|
||||
|
||||
/* Convert to NDC and apply viewport */
|
||||
vertex->xyz[0] = __builtin_fmaf(
|
||||
VIEWPORT.hwidth, vertex->xyz[0] * f, VIEWPORT.x_plus_hwidth
|
||||
);
|
||||
|
||||
vertex->xyz[1] = h - __builtin_fmaf(
|
||||
VIEWPORT.hheight, vertex->xyz[1] * f, VIEWPORT.y_plus_hheight
|
||||
);
|
||||
vertex->xyz[0] = (vertex->xyz[0] * f * 320) + 320;
|
||||
vertex->xyz[1] = (vertex->xyz[1] * f * -240) + 240;
|
||||
|
||||
/* Orthographic projections need to use invZ otherwise we lose
|
||||
the depth information. As w == 1, and clip-space range is -w to +w
|
||||
we add 1.0 to the Z to bring it into range. We add a little extra to
|
||||
avoid a divide by zero.
|
||||
*/
|
||||
|
||||
vertex->xyz[2] = (vertex->w == 1.0f) ? _glFastInvert(1.0001f + vertex->xyz[2]) : f;
|
||||
if(vertex->w == 1.0f) {
|
||||
vertex->xyz[2] = _glFastInvert(1.0001f + vertex->xyz[2]);
|
||||
} else {
|
||||
vertex->xyz[2] = f;
|
||||
}
|
||||
}
|
||||
|
||||
GL_FORCE_INLINE void _glSubmitHeaderOrVertex(uint32_t* d, const Vertex* v) {
|
||||
#ifndef NDEBUG
|
||||
gl_assert(!isnan(v->xyz[2]));
|
||||
gl_assert(!isnan(v->w));
|
||||
#endif
|
||||
|
||||
#if CLIP_DEBUG
|
||||
printf("Submitting: %x (%x)\n", v, v->flags);
|
||||
#endif
|
||||
volatile uint32_t *sq = SQ_BASE_ADDRESS;
|
||||
|
||||
uint32_t *s = (uint32_t*) v;
|
||||
__asm__("pref @%0" : : "r"(s + 8)); /* prefetch 32 bytes for next loop */
|
||||
d[0] = *(s++);
|
||||
d[1] = *(s++);
|
||||
d[2] = *(s++);
|
||||
d[3] = *(s++);
|
||||
d[4] = *(s++);
|
||||
d[5] = *(s++);
|
||||
d[6] = *(s++);
|
||||
d[7] = *(s++);
|
||||
__asm__("pref @%0" : : "r"(d));
|
||||
d += 8;
|
||||
static inline void _glFlushBuffer() {
|
||||
TRACE();
|
||||
|
||||
/* Wait for both store queues to complete */
|
||||
sq = (uint32_t*) 0xe0000000;
|
||||
sq[0] = sq[8] = 0;
|
||||
}
|
||||
|
||||
static struct __attribute__((aligned(32))) {
|
||||
Vertex* v;
|
||||
int visible;
|
||||
} triangle[3];
|
||||
static inline void _glPushHeaderOrVertex(Vertex* v) {
|
||||
TRACE();
|
||||
|
||||
static int tri_count = 0;
|
||||
static int strip_count = 0;
|
||||
|
||||
GL_FORCE_INLINE void interpolateColour(const uint32_t* a, const uint32_t* b, const float t, uint32_t* out) {
|
||||
const static uint32_t MASK1 = 0x00FF00FF;
|
||||
const static uint32_t MASK2 = 0xFF00FF00;
|
||||
|
||||
const uint32_t f2 = 256 * t;
|
||||
const uint32_t f1 = 256 - f2;
|
||||
|
||||
*out = (((((*a & MASK1) * f1) + ((*b & MASK1) * f2)) >> 8) & MASK1) |
|
||||
(((((*a & MASK2) * f1) + ((*b & MASK2) * f2)) >> 8) & MASK2);
|
||||
uint32_t* s = (uint32_t*) v;
|
||||
sq[0] = *(s++);
|
||||
sq[1] = *(s++);
|
||||
sq[2] = *(s++);
|
||||
sq[3] = *(s++);
|
||||
sq[4] = *(s++);
|
||||
sq[5] = *(s++);
|
||||
sq[6] = *(s++);
|
||||
sq[7] = *(s++);
|
||||
__asm__("pref @%0" : : "r"(sq));
|
||||
sq += 8;
|
||||
}
|
||||
|
||||
static inline void _glClipEdge(const Vertex* v1, const Vertex* v2, Vertex* vout) {
|
||||
/* Clipping time! */
|
||||
static inline void _glClipEdge(const Vertex* const v1, const Vertex* const v2, Vertex* vout) {
|
||||
const static float o = 0.003921569f; // 1 / 255
|
||||
const float d0 = v1->w + v1->xyz[2];
|
||||
const float d1 = v2->w + v2->xyz[2];
|
||||
const float sign = ((2.0f * (d1 < d0)) - 1.0f);
|
||||
const float epsilon = -0.00001f * sign;
|
||||
const float n = (d0 - d1);
|
||||
const float r = (1.f / sqrtf(n * n)) * sign;
|
||||
float t = fmaf(r, d0, epsilon);
|
||||
const float t = (fabs(d0) * (1.0f / sqrtf((d1 - d0) * (d1 - d0)))) + 0.000001f;
|
||||
const float invt = 1.0f - t;
|
||||
|
||||
vout->xyz[0] = fmaf(v2->xyz[0] - v1->xyz[0], t, v1->xyz[0]);
|
||||
vout->xyz[1] = fmaf(v2->xyz[1] - v1->xyz[1], t, v1->xyz[1]);
|
||||
vout->xyz[2] = fmaf(v2->xyz[2] - v1->xyz[2], t, v1->xyz[2]);
|
||||
vout->w = fmaf(v2->w - v1->w, t, v1->w);
|
||||
vout->xyz[0] = invt * v1->xyz[0] + t * v2->xyz[0];
|
||||
vout->xyz[1] = invt * v1->xyz[1] + t * v2->xyz[1];
|
||||
vout->xyz[2] = invt * v1->xyz[2] + t * v2->xyz[2];
|
||||
|
||||
vout->uv[0] = fmaf(v2->uv[0] - v1->uv[0], t, v1->uv[0]);
|
||||
vout->uv[1] = fmaf(v2->uv[1] - v1->uv[1], t, v1->uv[1]);
|
||||
vout->uv[0] = invt * v1->uv[0] + t * v2->uv[0];
|
||||
vout->uv[1] = invt * v1->uv[1] + t * v2->uv[1];
|
||||
|
||||
interpolateColour((uint32_t*) v1->bgra, (uint32_t*) v2->bgra, t, (uint32_t*) vout->bgra);
|
||||
}
|
||||
vout->w = invt * v1->w + t * v2->w;
|
||||
|
||||
GL_FORCE_INLINE void ClearTriangle() {
|
||||
tri_count = 0;
|
||||
}
|
||||
const float m = 255 * t;
|
||||
const float n = 255 - m;
|
||||
|
||||
GL_FORCE_INLINE void ShiftTriangle() {
|
||||
if(!tri_count) {
|
||||
return;
|
||||
}
|
||||
|
||||
tri_count--;
|
||||
triangle[0] = triangle[1];
|
||||
triangle[1] = triangle[2];
|
||||
|
||||
#ifndef NDEBUG
|
||||
triangle[2].v = NULL;
|
||||
triangle[2].visible = false;
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
GL_FORCE_INLINE void ShiftRotateTriangle() {
|
||||
if(!tri_count) {
|
||||
return;
|
||||
}
|
||||
|
||||
if(triangle[0].v < triangle[1].v) {
|
||||
triangle[0] = triangle[2];
|
||||
} else {
|
||||
triangle[1] = triangle[2];
|
||||
}
|
||||
|
||||
tri_count--;
|
||||
vout->bgra[0] = (v1->bgra[0] * n + v2->bgra[0] * m) * o;
|
||||
vout->bgra[1] = (v1->bgra[1] * n + v2->bgra[1] * m) * o;
|
||||
vout->bgra[2] = (v1->bgra[2] * n + v2->bgra[2] * m) * o;
|
||||
vout->bgra[3] = (v1->bgra[3] * n + v2->bgra[3] * m) * o;
|
||||
}
|
||||
|
||||
#define SPAN_SORT_CFG 0x005F8030
|
||||
static volatile uint32_t* PVR_LMMODE0 = (uint32_t*) 0xA05F6884;
|
||||
static volatile uint32_t *PVR_LMMODE1 = (uint32_t*) 0xA05F6888;
|
||||
static volatile uint32_t *QACR = (uint32_t*) 0xFF000038;
|
||||
|
||||
void SceneListSubmit(Vertex* v2, int n) {
|
||||
TRACE();
|
||||
|
||||
/* You need at least a header, and 3 vertices to render anything */
|
||||
if(n < 4) {
|
||||
return;
|
||||
}
|
||||
|
||||
void SceneListSubmit(void* src, int n) {
|
||||
const float h = GetVideoMode()->height;
|
||||
|
||||
PVR_SET(SPAN_SORT_CFG, 0x0);
|
||||
|
||||
uint32_t *d = (uint32_t*) SQ_BASE_ADDRESS;
|
||||
*PVR_LMMODE0 = 0x0; /* Enable 64bit mode */
|
||||
//Set PVR DMA registers
|
||||
*PVR_LMMODE0 = 0;
|
||||
*PVR_LMMODE1 = 0;
|
||||
|
||||
Vertex __attribute__((aligned(32))) tmp;
|
||||
|
||||
/* Perform perspective divide on each vertex */
|
||||
Vertex* vertex = (Vertex*) src;
|
||||
|
||||
if(!_glNearZClippingEnabled()) {
|
||||
/* Prep store queues */
|
||||
|
||||
for(int i = 0; i < n; ++i, ++vertex) {
|
||||
PREFETCH(vertex + 1);
|
||||
if(glIsVertex(vertex->flags)) {
|
||||
_glPerspectiveDivideVertex(vertex, h);
|
||||
}
|
||||
_glSubmitHeaderOrVertex(d, vertex);
|
||||
}
|
||||
|
||||
/* Wait for both store queues to complete */
|
||||
d = (uint32_t *) SQ_BASE_ADDRESS;
|
||||
d[0] = d[8] = 0;
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
tri_count = 0;
|
||||
strip_count = 0;
|
||||
//Set QACR registers
|
||||
QACR[1] = QACR[0] = 0x11;
|
||||
|
||||
#if CLIP_DEBUG
|
||||
printf("----\n");
|
||||
Vertex* vertex = (Vertex*) src;
|
||||
for(int i = 0; i < n; ++i) {
|
||||
fprintf(stderr, "{%f, %f, %f, %f}, // %x (%x)\n", vertex[i].xyz[0], vertex[i].xyz[1], vertex[i].xyz[2], vertex[i].w, vertex[i].flags, &vertex[i]);
|
||||
}
|
||||
|
||||
fprintf(stderr, "----\n");
|
||||
#endif
|
||||
uint8_t visible_mask = 0;
|
||||
uint8_t counter = 0;
|
||||
|
||||
for(int i = 0; i < n; ++i, ++vertex) {
|
||||
PREFETCH(vertex + 12);
|
||||
sq = SQ_BASE_ADDRESS;
|
||||
|
||||
/* Wait until we fill the triangle */
|
||||
if(tri_count < 3) {
|
||||
if(glIsVertex(vertex->flags)) {
|
||||
++strip_count;
|
||||
triangle[tri_count].v = vertex;
|
||||
triangle[tri_count].visible = vertex->xyz[2] >= -vertex->w;
|
||||
if(++tri_count < 3) {
|
||||
continue;
|
||||
}
|
||||
} else {
|
||||
/* We hit a header */
|
||||
tri_count = 0;
|
||||
strip_count = 0;
|
||||
_glSubmitHeaderOrVertex(d, vertex);
|
||||
for(int i = 0; i < n; ++i, ++v2) {
|
||||
PREFETCH(v2 + 1);
|
||||
switch(v2->flags) {
|
||||
case GPU_CMD_VERTEX_EOL:
|
||||
if(counter < 2) {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
counter = 0;
|
||||
break;
|
||||
case GPU_CMD_VERTEX:
|
||||
++counter;
|
||||
if(counter < 3) {
|
||||
continue;
|
||||
}
|
||||
break;
|
||||
default:
|
||||
_glPushHeaderOrVertex(v2);
|
||||
counter = 0;
|
||||
continue;
|
||||
};
|
||||
|
||||
#if CLIP_DEBUG
|
||||
printf("SC: %d\n", strip_count);
|
||||
#endif
|
||||
Vertex* const v0 = v2 - 2;
|
||||
Vertex* const v1 = v2 - 1;
|
||||
|
||||
/* If we got here, then triangle contains 3 vertices */
|
||||
int visible_mask = triangle[0].visible | (triangle[1].visible << 1) | (triangle[2].visible << 2);
|
||||
|
||||
/* Clipping time!
|
||||
|
||||
There are 6 distinct possibilities when clipping a triangle. 3 of them result
|
||||
in another triangle, 3 of them result in a quadrilateral.
|
||||
|
||||
Assuming you iterate the edges of the triangle in order, and create a new *visible*
|
||||
vertex when you cross the plane, and discard vertices behind the plane, then the only
|
||||
difference between the two cases is that the final two vertices that need submitting have
|
||||
to be reversed.
|
||||
|
||||
Unfortunately we have to copy vertices here, because if we persp-divide a vertex it may
|
||||
be used in a subsequent triangle in the strip and would end up being double divided.
|
||||
*/
|
||||
|
||||
#define SUBMIT_QUEUED() \
|
||||
if(strip_count > 3) { \
|
||||
tmp = *(vertex - 2); \
|
||||
/* If we had triangles ahead of this one, submit and finalize */ \
|
||||
_glPerspectiveDivideVertex(&tmp, h); \
|
||||
_glSubmitHeaderOrVertex(d, &tmp); \
|
||||
tmp = *(vertex - 1); \
|
||||
tmp.flags = GPU_CMD_VERTEX_EOL; \
|
||||
_glPerspectiveDivideVertex(&tmp, h); \
|
||||
_glSubmitHeaderOrVertex(d, &tmp); \
|
||||
}
|
||||
|
||||
bool is_last_in_strip = glIsLastVertex(vertex->flags);
|
||||
visible_mask = (
|
||||
(v0->xyz[2] > -v0->w) << 0 |
|
||||
(v1->xyz[2] > -v1->w) << 1 |
|
||||
(v2->xyz[2] > -v2->w) << 2 |
|
||||
(counter == 0) << 3
|
||||
);
|
||||
|
||||
switch(visible_mask) {
|
||||
case 1: {
|
||||
SUBMIT_QUEUED();
|
||||
/* 0, 0a, 2a */
|
||||
tmp = *triangle[0].v;
|
||||
tmp.flags = GPU_CMD_VERTEX;
|
||||
_glPerspectiveDivideVertex(&tmp, h);
|
||||
_glSubmitHeaderOrVertex(d, &tmp);
|
||||
case 15: /* All visible, but final vertex in strip */
|
||||
{
|
||||
_glPerspectiveDivideVertex(v0, h);
|
||||
_glPushHeaderOrVertex(v0);
|
||||
|
||||
_glClipEdge(triangle[0].v, triangle[1].v, &tmp);
|
||||
tmp.flags = GPU_CMD_VERTEX;
|
||||
_glPerspectiveDivideVertex(&tmp, h);
|
||||
_glSubmitHeaderOrVertex(d, &tmp);
|
||||
_glPerspectiveDivideVertex(v1, h);
|
||||
_glPushHeaderOrVertex(v1);
|
||||
|
||||
_glClipEdge(triangle[2].v, triangle[0].v, &tmp);
|
||||
tmp.flags = GPU_CMD_VERTEX_EOL;
|
||||
_glPerspectiveDivideVertex(&tmp, h);
|
||||
_glSubmitHeaderOrVertex(d, &tmp);
|
||||
} break;
|
||||
case 2: {
|
||||
SUBMIT_QUEUED();
|
||||
/* 0a, 1, 1a */
|
||||
_glClipEdge(triangle[0].v, triangle[1].v, &tmp);
|
||||
tmp.flags = GPU_CMD_VERTEX;
|
||||
_glPerspectiveDivideVertex(&tmp, h);
|
||||
_glSubmitHeaderOrVertex(d, &tmp);
|
||||
_glPerspectiveDivideVertex(v2, h);
|
||||
_glPushHeaderOrVertex(v2);
|
||||
}
|
||||
break;
|
||||
case 7:
|
||||
/* All visible, push the first vertex and move on */
|
||||
_glPerspectiveDivideVertex(v0, h);
|
||||
_glPushHeaderOrVertex(v0);
|
||||
break;
|
||||
case 9:
|
||||
/* First vertex was visible, last in strip */
|
||||
{
|
||||
Vertex __attribute__((aligned(32))) scratch[2];
|
||||
Vertex* a = &scratch[0];
|
||||
Vertex* b = &scratch[1];
|
||||
|
||||
tmp = *triangle[1].v;
|
||||
tmp.flags = GPU_CMD_VERTEX;
|
||||
_glPerspectiveDivideVertex(&tmp, h);
|
||||
_glSubmitHeaderOrVertex(d, &tmp);
|
||||
_glClipEdge(v0, v1, a);
|
||||
a->flags = GPU_CMD_VERTEX;
|
||||
|
||||
_glClipEdge(triangle[1].v, triangle[2].v, &tmp);
|
||||
tmp.flags = GPU_CMD_VERTEX_EOL;
|
||||
_glPerspectiveDivideVertex(&tmp, h);
|
||||
_glSubmitHeaderOrVertex(d, &tmp);
|
||||
} break;
|
||||
case 3: {
|
||||
SUBMIT_QUEUED();
|
||||
/* 0, 1, 2a, 1a */
|
||||
tmp = *triangle[0].v;
|
||||
tmp.flags = GPU_CMD_VERTEX;
|
||||
_glPerspectiveDivideVertex(&tmp, h);
|
||||
_glSubmitHeaderOrVertex(d, &tmp);
|
||||
_glClipEdge(v2, v0, b);
|
||||
b->flags = GPU_CMD_VERTEX_EOL;
|
||||
|
||||
tmp = *triangle[1].v;
|
||||
tmp.flags = GPU_CMD_VERTEX;
|
||||
_glPerspectiveDivideVertex(&tmp, h);
|
||||
_glSubmitHeaderOrVertex(d, &tmp);
|
||||
_glPerspectiveDivideVertex(v0, h);
|
||||
_glPushHeaderOrVertex(v0);
|
||||
|
||||
_glClipEdge(triangle[2].v, triangle[0].v, &tmp);
|
||||
tmp.flags = GPU_CMD_VERTEX;
|
||||
_glPerspectiveDivideVertex(&tmp, h);
|
||||
_glSubmitHeaderOrVertex(d, &tmp);
|
||||
_glPerspectiveDivideVertex(a, h);
|
||||
_glPushHeaderOrVertex(a);
|
||||
|
||||
_glClipEdge(triangle[1].v, triangle[2].v, &tmp);
|
||||
tmp.flags = GPU_CMD_VERTEX_EOL;
|
||||
_glPerspectiveDivideVertex(&tmp, h);
|
||||
_glSubmitHeaderOrVertex(d, &tmp);
|
||||
} break;
|
||||
case 4: {
|
||||
SUBMIT_QUEUED();
|
||||
/* 1a, 2, 2a */
|
||||
_glClipEdge(triangle[1].v, triangle[2].v, &tmp);
|
||||
tmp.flags = GPU_CMD_VERTEX;
|
||||
_glPerspectiveDivideVertex(&tmp, h);
|
||||
_glSubmitHeaderOrVertex(d, &tmp);
|
||||
_glPerspectiveDivideVertex(b, h);
|
||||
_glPushHeaderOrVertex(b);
|
||||
}
|
||||
break;
|
||||
case 1:
|
||||
/* First vertex was visible, but not last in strip */
|
||||
{
|
||||
Vertex __attribute__((aligned(32))) scratch[2];
|
||||
Vertex* a = &scratch[0];
|
||||
Vertex* b = &scratch[1];
|
||||
|
||||
tmp = *triangle[2].v;
|
||||
tmp.flags = GPU_CMD_VERTEX;
|
||||
_glPerspectiveDivideVertex(&tmp, h);
|
||||
_glSubmitHeaderOrVertex(d, &tmp);
|
||||
_glClipEdge(v0, v1, a);
|
||||
a->flags = GPU_CMD_VERTEX;
|
||||
|
||||
_glClipEdge(triangle[2].v, triangle[0].v, &tmp);
|
||||
tmp.flags = GPU_CMD_VERTEX_EOL;
|
||||
_glPerspectiveDivideVertex(&tmp, h);
|
||||
_glSubmitHeaderOrVertex(d, &tmp);
|
||||
} break;
|
||||
case 5: {
|
||||
SUBMIT_QUEUED();
|
||||
/* 0, 0a, 2, 1a */
|
||||
tmp = *triangle[0].v;
|
||||
tmp.flags = GPU_CMD_VERTEX;
|
||||
_glPerspectiveDivideVertex(&tmp, h);
|
||||
_glSubmitHeaderOrVertex(d, &tmp);
|
||||
_glClipEdge(v2, v0, b);
|
||||
b->flags = GPU_CMD_VERTEX;
|
||||
|
||||
_glClipEdge(triangle[0].v, triangle[1].v, &tmp);
|
||||
tmp.flags = GPU_CMD_VERTEX;
|
||||
_glPerspectiveDivideVertex(&tmp, h);
|
||||
_glSubmitHeaderOrVertex(d, &tmp);
|
||||
_glPerspectiveDivideVertex(v0, h);
|
||||
_glPushHeaderOrVertex(v0);
|
||||
|
||||
tmp = *triangle[2].v;
|
||||
tmp.flags = GPU_CMD_VERTEX;
|
||||
_glPerspectiveDivideVertex(&tmp, h);
|
||||
_glSubmitHeaderOrVertex(d, &tmp);
|
||||
_glPerspectiveDivideVertex(a, h);
|
||||
_glPushHeaderOrVertex(a);
|
||||
|
||||
_glClipEdge(triangle[1].v, triangle[2].v, &tmp);
|
||||
tmp.flags = GPU_CMD_VERTEX_EOL;
|
||||
_glPerspectiveDivideVertex(&tmp, h);
|
||||
_glSubmitHeaderOrVertex(d, &tmp);
|
||||
} break;
|
||||
case 6: {
|
||||
SUBMIT_QUEUED();
|
||||
/* 0a, 1, 2a, 2 */
|
||||
_glClipEdge(triangle[0].v, triangle[1].v, &tmp);
|
||||
tmp.flags = GPU_CMD_VERTEX;
|
||||
_glPerspectiveDivideVertex(&tmp, h);
|
||||
_glSubmitHeaderOrVertex(d, &tmp);
|
||||
_glPerspectiveDivideVertex(b, h);
|
||||
_glPushHeaderOrVertex(b);
|
||||
_glPushHeaderOrVertex(b);
|
||||
}
|
||||
break;
|
||||
case 10:
|
||||
case 2:
|
||||
/* Second vertex was visible. In self case we need to create a triangle and produce
|
||||
two new vertices: 1-2, and 2-3. */
|
||||
{
|
||||
Vertex __attribute__((aligned(32))) scratch[3];
|
||||
Vertex* a = &scratch[0];
|
||||
Vertex* b = &scratch[1];
|
||||
Vertex* c = &scratch[2];
|
||||
|
||||
tmp = *triangle[1].v;
|
||||
tmp.flags = GPU_CMD_VERTEX;
|
||||
_glPerspectiveDivideVertex(&tmp, h);
|
||||
_glSubmitHeaderOrVertex(d, &tmp);
|
||||
memcpy_vertex(c, v1);
|
||||
|
||||
_glClipEdge(triangle[2].v, triangle[0].v, &tmp);
|
||||
tmp.flags = GPU_CMD_VERTEX;
|
||||
_glPerspectiveDivideVertex(&tmp, h);
|
||||
_glSubmitHeaderOrVertex(d, &tmp);
|
||||
_glClipEdge(v0, v1, a);
|
||||
a->flags = GPU_CMD_VERTEX;
|
||||
|
||||
tmp = *triangle[2].v;
|
||||
tmp.flags = GPU_CMD_VERTEX_EOL;
|
||||
_glPerspectiveDivideVertex(&tmp, h);
|
||||
_glSubmitHeaderOrVertex(d, &tmp);
|
||||
} break;
|
||||
case 7: {
|
||||
/* All the vertices are visible! We divide and submit v0, then shift */
|
||||
_glPerspectiveDivideVertex(vertex - 2, h);
|
||||
_glSubmitHeaderOrVertex(d, vertex - 2);
|
||||
_glClipEdge(v1, v2, b);
|
||||
b->flags = v2->flags;
|
||||
|
||||
if(is_last_in_strip) {
|
||||
_glPerspectiveDivideVertex(vertex - 1, h);
|
||||
_glSubmitHeaderOrVertex(d, vertex - 1);
|
||||
_glPerspectiveDivideVertex(vertex, h);
|
||||
_glSubmitHeaderOrVertex(d, vertex);
|
||||
tri_count = 0;
|
||||
strip_count = 0;
|
||||
_glPerspectiveDivideVertex(a, h);
|
||||
_glPushHeaderOrVertex(a);
|
||||
|
||||
_glPerspectiveDivideVertex(c, h);
|
||||
_glPushHeaderOrVertex(c);
|
||||
|
||||
_glPerspectiveDivideVertex(b, h);
|
||||
_glPushHeaderOrVertex(b);
|
||||
}
|
||||
break;
|
||||
case 11:
|
||||
case 3: /* First and second vertex were visible */
|
||||
{
|
||||
Vertex __attribute__((aligned(32))) scratch[3];
|
||||
Vertex* a = &scratch[0];
|
||||
Vertex* b = &scratch[1];
|
||||
Vertex* c = &scratch[2];
|
||||
|
||||
memcpy_vertex(c, v1);
|
||||
|
||||
_glClipEdge(v2, v0, b);
|
||||
b->flags = GPU_CMD_VERTEX;
|
||||
|
||||
_glPerspectiveDivideVertex(v0, h);
|
||||
_glPushHeaderOrVertex(v0);
|
||||
|
||||
_glClipEdge(v1, v2, a);
|
||||
a->flags = v2->flags;
|
||||
|
||||
_glPerspectiveDivideVertex(c, h);
|
||||
_glPushHeaderOrVertex(c);
|
||||
|
||||
_glPerspectiveDivideVertex(b, h);
|
||||
_glPushHeaderOrVertex(b);
|
||||
|
||||
_glPerspectiveDivideVertex(a, h);
|
||||
_glPushHeaderOrVertex(c);
|
||||
_glPushHeaderOrVertex(a);
|
||||
}
|
||||
break;
|
||||
case 12:
|
||||
case 4:
|
||||
/* Third vertex was visible. */
|
||||
{
|
||||
Vertex __attribute__((aligned(32))) scratch[3];
|
||||
Vertex* a = &scratch[0];
|
||||
Vertex* b = &scratch[1];
|
||||
Vertex* c = &scratch[2];
|
||||
|
||||
memcpy_vertex(c, v2);
|
||||
|
||||
_glClipEdge(v2, v0, a);
|
||||
a->flags = GPU_CMD_VERTEX;
|
||||
|
||||
_glClipEdge(v1, v2, b);
|
||||
b->flags = GPU_CMD_VERTEX;
|
||||
|
||||
_glPerspectiveDivideVertex(a, h);
|
||||
_glPushHeaderOrVertex(a);
|
||||
|
||||
if(counter % 2 == 1) {
|
||||
_glPushHeaderOrVertex(a);
|
||||
}
|
||||
|
||||
ShiftRotateTriangle();
|
||||
continue;
|
||||
} break;
|
||||
case 0:
|
||||
default:
|
||||
break;
|
||||
}
|
||||
_glPerspectiveDivideVertex(b, h);
|
||||
_glPushHeaderOrVertex(b);
|
||||
|
||||
/* If this was the last in the strip, we don't need to
|
||||
submit anything else, we just wipe the tri_count */
|
||||
if(is_last_in_strip) {
|
||||
tri_count = 0;
|
||||
strip_count = 0;
|
||||
} else {
|
||||
ShiftRotateTriangle();
|
||||
strip_count = 2;
|
||||
_glPerspectiveDivideVertex(c, h);
|
||||
_glPushHeaderOrVertex(c);
|
||||
}
|
||||
break;
|
||||
case 13:
|
||||
{
|
||||
Vertex __attribute__((aligned(32))) scratch[3];
|
||||
Vertex* a = &scratch[0];
|
||||
Vertex* b = &scratch[1];
|
||||
Vertex* c = &scratch[2];
|
||||
|
||||
memcpy_vertex(c, v2);
|
||||
c->flags = GPU_CMD_VERTEX;
|
||||
|
||||
_glClipEdge(v0, v1, a);
|
||||
a->flags = GPU_CMD_VERTEX;
|
||||
|
||||
_glClipEdge(v1, v2, b);
|
||||
b->flags = GPU_CMD_VERTEX;
|
||||
|
||||
_glPerspectiveDivideVertex(v0, h);
|
||||
_glPushHeaderOrVertex(v0);
|
||||
|
||||
_glPerspectiveDivideVertex(a, h);
|
||||
_glPushHeaderOrVertex(a);
|
||||
|
||||
_glPerspectiveDivideVertex(c, h);
|
||||
_glPushHeaderOrVertex(c);
|
||||
_glPerspectiveDivideVertex(b, h);
|
||||
_glPushHeaderOrVertex(b);
|
||||
|
||||
c->flags = GPU_CMD_VERTEX_EOL;
|
||||
_glPushHeaderOrVertex(c);
|
||||
}
|
||||
break;
|
||||
case 5: /* First and third vertex were visible */
|
||||
{
|
||||
Vertex __attribute__((aligned(32))) scratch[3];
|
||||
Vertex* a = &scratch[0];
|
||||
Vertex* b = &scratch[1];
|
||||
Vertex* c = &scratch[2];
|
||||
|
||||
memcpy_vertex(c, v2);
|
||||
c->flags = GPU_CMD_VERTEX;
|
||||
|
||||
_glClipEdge(v0, v1, a);
|
||||
a->flags = GPU_CMD_VERTEX;
|
||||
|
||||
_glClipEdge(v1, v2, b);
|
||||
b->flags = GPU_CMD_VERTEX;
|
||||
|
||||
_glPerspectiveDivideVertex(v0, h);
|
||||
_glPushHeaderOrVertex(v0);
|
||||
|
||||
_glPerspectiveDivideVertex(a, h);
|
||||
_glPushHeaderOrVertex(a);
|
||||
|
||||
_glPerspectiveDivideVertex(c, h);
|
||||
_glPushHeaderOrVertex(c);
|
||||
_glPerspectiveDivideVertex(b, h);
|
||||
_glPushHeaderOrVertex(b);
|
||||
_glPushHeaderOrVertex(c);
|
||||
}
|
||||
break;
|
||||
case 14:
|
||||
case 6: /* Second and third vertex were visible */
|
||||
{
|
||||
Vertex __attribute__((aligned(32))) scratch[4];
|
||||
Vertex* a = &scratch[0];
|
||||
Vertex* b = &scratch[1];
|
||||
Vertex* c = &scratch[2];
|
||||
Vertex* d = &scratch[3];
|
||||
|
||||
memcpy_vertex(c, v1);
|
||||
memcpy_vertex(d, v2);
|
||||
|
||||
_glClipEdge(v0, v1, a);
|
||||
a->flags = GPU_CMD_VERTEX;
|
||||
|
||||
_glClipEdge(v2, v0, b);
|
||||
b->flags = GPU_CMD_VERTEX;
|
||||
|
||||
_glPerspectiveDivideVertex(a, h);
|
||||
_glPushHeaderOrVertex(a);
|
||||
|
||||
_glPerspectiveDivideVertex(c, h);
|
||||
_glPushHeaderOrVertex(c);
|
||||
|
||||
_glPerspectiveDivideVertex(b, h);
|
||||
_glPushHeaderOrVertex(b);
|
||||
_glPushHeaderOrVertex(c);
|
||||
|
||||
_glPerspectiveDivideVertex(d, h);
|
||||
_glPushHeaderOrVertex(d);
|
||||
}
|
||||
break;
|
||||
case 8:
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/* Wait for both store queues to complete */
|
||||
d = (uint32_t *)0xe0000000;
|
||||
d[0] = d[8] = 0;
|
||||
_glFlushBuffer();
|
||||
}
|
||||
|
||||
void SceneListFinish() {
|
||||
|
|
|
@ -24,7 +24,7 @@
|
|||
#define GL_FORCE_INLINE static GL_INLINE_DEBUG
|
||||
#endif
|
||||
|
||||
#define PREFETCH(addr) __asm__("pref @%0" : : "r"((addr)))
|
||||
#define PREFETCH(addr) __builtin_prefetch((addr))
|
||||
|
||||
GL_FORCE_INLINE void* memcpy_fast(void *dest, const void *src, size_t len) {
|
||||
if(!len) {
|
||||
|
|
|
@ -10,8 +10,9 @@
|
|||
#include "software/parameter_equation.h"
|
||||
|
||||
#define CLIP_DEBUG 0
|
||||
#define ZNEAR_CLIPPING_ENABLED 1
|
||||
|
||||
static size_t AVAILABLE_VRAM = 16 * 1024 * 1024;
|
||||
static size_t AVAILABLE_VRAM = 8 * 1024 * 1024;
|
||||
static Matrix4x4 MATRIX;
|
||||
|
||||
static SDL_Window* WINDOW = NULL;
|
||||
|
@ -29,83 +30,13 @@ static VideoMode vid_mode = {
|
|||
#define MIN(x, y) ((x) < (y) ? (x) : (y))
|
||||
#define MAX(x, y) ((x) > (y) ? (x) : (y))
|
||||
|
||||
static void DrawTriangle(Vertex* v0, Vertex* v1, Vertex* v2) {
|
||||
// Compute triangle bounding box.
|
||||
|
||||
int minX = MIN(MIN(v0->xyz[0], v1->xyz[0]), v2->xyz[0]);
|
||||
int maxX = MAX(MAX(v0->xyz[0], v1->xyz[0]), v2->xyz[0]);
|
||||
int minY = MIN(MIN(v0->xyz[1], v1->xyz[1]), v2->xyz[1]);
|
||||
int maxY = MAX(MAX(v0->xyz[1], v1->xyz[1]), v2->xyz[1]);
|
||||
|
||||
// Clip to scissor rect.
|
||||
|
||||
minX = MAX(minX, 0);
|
||||
maxX = MIN(maxX, vid_mode.width);
|
||||
minY = MAX(minY, 0);
|
||||
maxY = MIN(maxY, vid_mode.height);
|
||||
|
||||
// Compute edge equations.
|
||||
|
||||
EdgeEquation e0, e1, e2;
|
||||
EdgeEquationInit(&e0, &v0->xyz[0], &v1->xyz[0]);
|
||||
EdgeEquationInit(&e1, &v1->xyz[0], &v2->xyz[0]);
|
||||
EdgeEquationInit(&e2, &v2->xyz[0], &v0->xyz[0]);
|
||||
|
||||
float area = 0.5 * (e0.c + e1.c + e2.c);
|
||||
|
||||
/* This is very ugly. I don't understand the math properly
|
||||
* so I just swap the vertex order if something is back-facing
|
||||
* and we want to render it. Patches welcome! */
|
||||
#define REVERSE_WINDING() \
|
||||
Vertex* tv = v0; \
|
||||
v0 = v1; \
|
||||
v1 = tv; \
|
||||
EdgeEquationInit(&e0, &v0->xyz[0], &v1->xyz[0]); \
|
||||
EdgeEquationInit(&e1, &v1->xyz[0], &v2->xyz[0]); \
|
||||
EdgeEquationInit(&e2, &v2->xyz[0], &v0->xyz[0]); \
|
||||
area = 0.5f * (e0.c + e1.c + e2.c) \
|
||||
|
||||
// Check if triangle is backfacing.
|
||||
if(CULL_MODE == GPU_CULLING_CCW) {
|
||||
if(area < 0) {
|
||||
return;
|
||||
}
|
||||
} else if(CULL_MODE == GPU_CULLING_CW) {
|
||||
if(area < 0) {
|
||||
// We only draw front-facing polygons, so swap
|
||||
// the back to front and draw
|
||||
REVERSE_WINDING();
|
||||
} else {
|
||||
// Front facing, so bail
|
||||
return;
|
||||
}
|
||||
} else if(area < 0) {
|
||||
/* We're not culling, but this is backfacing, so swap vertices and edges */
|
||||
REVERSE_WINDING();
|
||||
}
|
||||
|
||||
ParameterEquation r, g, b;
|
||||
|
||||
ParameterEquationInit(&r, v0->bgra[2], v1->bgra[2], v2->bgra[2], &e0, &e1, &e2, area);
|
||||
ParameterEquationInit(&g, v0->bgra[1], v1->bgra[1], v2->bgra[1], &e0, &e1, &e2, area);
|
||||
ParameterEquationInit(&b, v0->bgra[0], v1->bgra[0], v2->bgra[0], &e0, &e1, &e2, area);
|
||||
|
||||
// Add 0.5 to sample at pixel centers.
|
||||
for (float x = minX + 0.5f, xm = maxX + 0.5f; x <= xm; x += 1.0f)
|
||||
for (float y = minY + 0.5f, ym = maxY + 0.5f; y <= ym; y += 1.0f)
|
||||
{
|
||||
if (EdgeEquationTestPoint(&e0, x, y) && EdgeEquationTestPoint(&e1, x, y) && EdgeEquationTestPoint(&e2, x, y)) {
|
||||
int rint = ParameterEquationEvaluate(&r, x, y);
|
||||
int gint = ParameterEquationEvaluate(&g, x, y);
|
||||
int bint = ParameterEquationEvaluate(&b, x, y);
|
||||
SDL_SetRenderDrawColor(RENDERER, rint, gint, bint, 255);
|
||||
SDL_RenderDrawPoint(RENDERER, x, y);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
AlignedVector vbuffer;
|
||||
|
||||
void InitGPU(_Bool autosort, _Bool fsaa) {
|
||||
|
||||
// 32-bit SDL has trouble with the wayland driver for some reason
|
||||
setenv("SDL_VIDEODRIVER", "x11", 1);
|
||||
|
||||
SDL_Init(SDL_INIT_VIDEO | SDL_INIT_EVENTS);
|
||||
|
||||
WINDOW = SDL_CreateWindow(
|
||||
|
@ -119,6 +50,8 @@ void InitGPU(_Bool autosort, _Bool fsaa) {
|
|||
RENDERER = SDL_CreateRenderer(
|
||||
WINDOW, -1, SDL_RENDERER_ACCELERATED
|
||||
);
|
||||
|
||||
aligned_vector_init(&vbuffer, sizeof(SDL_Vertex));
|
||||
}
|
||||
|
||||
void SceneBegin() {
|
||||
|
@ -161,7 +94,7 @@ GL_FORCE_INLINE void _glPerspectiveDivideVertex(Vertex* vertex, const float h) {
|
|||
}
|
||||
}
|
||||
|
||||
GL_FORCE_INLINE void _glSubmitHeaderOrVertex(const Vertex* v) {
|
||||
GL_FORCE_INLINE void _glPushHeaderOrVertex(const Vertex* v) {
|
||||
#ifndef NDEBUG
|
||||
if(glIsVertex(v->flags)) {
|
||||
gl_assert(!isnan(v->xyz[2]));
|
||||
|
@ -176,335 +109,329 @@ GL_FORCE_INLINE void _glSubmitHeaderOrVertex(const Vertex* v) {
|
|||
BUFFER[vertex_counter++] = *v;
|
||||
}
|
||||
|
||||
static struct {
|
||||
Vertex* v;
|
||||
int visible;
|
||||
} triangle[3];
|
||||
static inline void _glFlushBuffer() {}
|
||||
|
||||
static int tri_count = 0;
|
||||
static int strip_count = 0;
|
||||
|
||||
GL_FORCE_INLINE void interpolateColour(const uint8_t* v1, const uint8_t* v2, const float t, uint8_t* out) {
|
||||
const int MASK1 = 0x00FF00FF;
|
||||
const int MASK2 = 0xFF00FF00;
|
||||
|
||||
const int f2 = 256 * t;
|
||||
const int f1 = 256 - f2;
|
||||
|
||||
const uint32_t a = *(uint32_t*) v1;
|
||||
const uint32_t b = *(uint32_t*) v2;
|
||||
|
||||
*((uint32_t*) out) = (((((a & MASK1) * f1) + ((b & MASK1) * f2)) >> 8) & MASK1) |
|
||||
(((((a & MASK2) * f1) + ((b & MASK2) * f2)) >> 8) & MASK2);
|
||||
}
|
||||
|
||||
GL_FORCE_INLINE void _glClipEdge(const Vertex* v1, const Vertex* v2, Vertex* vout) {
|
||||
/* Clipping time! */
|
||||
const static float o = 0.003921569f; // 1 / 255
|
||||
const float d0 = v1->w + v1->xyz[2];
|
||||
const float d1 = v2->w + v2->xyz[2];
|
||||
const float t = (fabs(d0) * (1.0f / sqrtf((d1 - d0) * (d1 - d0)))) + 0.000001f;
|
||||
const float invt = 1.0f - t;
|
||||
|
||||
const float epsilon = (d0 < d1) ? -0.00001f : 0.00001f;
|
||||
vout->xyz[0] = invt * v1->xyz[0] + t * v2->xyz[0];
|
||||
vout->xyz[1] = invt * v1->xyz[1] + t * v2->xyz[1];
|
||||
vout->xyz[2] = invt * v1->xyz[2] + t * v2->xyz[2];
|
||||
|
||||
float t = (d0 / (d0 - d1)) + epsilon;
|
||||
vout->uv[0] = invt * v1->uv[0] + t * v2->uv[0];
|
||||
vout->uv[1] = invt * v1->uv[1] + t * v2->uv[1];
|
||||
|
||||
t = (t > 1.0f) ? 1.0f : t;
|
||||
t = (t < 0.0f) ? 0.0f : t;
|
||||
vout->w = invt * v1->w + t * v2->w;
|
||||
|
||||
vout->xyz[0] = __builtin_fmaf(v2->xyz[0] - v1->xyz[0], t, v1->xyz[0]);
|
||||
vout->xyz[1] = __builtin_fmaf(v2->xyz[1] - v1->xyz[1], t, v1->xyz[1]);
|
||||
vout->xyz[2] = __builtin_fmaf(v2->xyz[2] - v1->xyz[2], t, v1->xyz[2]);
|
||||
vout->w = __builtin_fmaf(v2->w - v1->w, t, v1->w);
|
||||
const float m = 255 * t;
|
||||
const float n = 255 - m;
|
||||
|
||||
vout->uv[0] = __builtin_fmaf(v2->uv[0] - v1->uv[0], t, v1->uv[0]);
|
||||
vout->uv[1] = __builtin_fmaf(v2->uv[1] - v1->uv[1], t, v1->uv[1]);
|
||||
|
||||
interpolateColour(v1->bgra, v2->bgra, t, vout->bgra);
|
||||
vout->bgra[0] = (v1->bgra[0] * n + v2->bgra[0] * m) * o;
|
||||
vout->bgra[1] = (v1->bgra[1] * n + v2->bgra[1] * m) * o;
|
||||
vout->bgra[2] = (v1->bgra[2] * n + v2->bgra[2] * m) * o;
|
||||
vout->bgra[3] = (v1->bgra[3] * n + v2->bgra[3] * m) * o;
|
||||
}
|
||||
|
||||
GL_FORCE_INLINE void ClearTriangle() {
|
||||
tri_count = 0;
|
||||
}
|
||||
|
||||
GL_FORCE_INLINE void ShiftTriangle() {
|
||||
if(!tri_count) {
|
||||
void SceneListSubmit(Vertex* v2, int n) {
|
||||
/* You need at least a header, and 3 vertices to render anything */
|
||||
if(n < 4) {
|
||||
return;
|
||||
}
|
||||
|
||||
tri_count--;
|
||||
triangle[0] = triangle[1];
|
||||
triangle[1] = triangle[2];
|
||||
|
||||
#ifndef NDEBUG
|
||||
triangle[2].v = NULL;
|
||||
triangle[2].visible = false;
|
||||
#endif
|
||||
}
|
||||
|
||||
GL_FORCE_INLINE void ShiftRotateTriangle() {
|
||||
if(!tri_count) {
|
||||
return;
|
||||
}
|
||||
|
||||
if(triangle[0].v < triangle[1].v) {
|
||||
triangle[0] = triangle[2];
|
||||
} else {
|
||||
triangle[1] = triangle[2];
|
||||
}
|
||||
|
||||
tri_count--;
|
||||
}
|
||||
|
||||
void SceneListSubmit(void* src, int n) {
|
||||
/* Perform perspective divide on each vertex */
|
||||
Vertex* vertex = (Vertex*) src;
|
||||
|
||||
const float h = GetVideoMode()->height;
|
||||
|
||||
/* If Z-clipping is disabled, just fire everything over to the buffer */
|
||||
if(!ZNEAR_CLIPPING_ENABLED) {
|
||||
for(int i = 0; i < n; ++i, ++vertex) {
|
||||
PREFETCH(vertex + 1);
|
||||
if(glIsVertex(vertex->flags)) {
|
||||
_glPerspectiveDivideVertex(vertex, h);
|
||||
}
|
||||
_glSubmitHeaderOrVertex(vertex);
|
||||
}
|
||||
uint8_t visible_mask = 0;
|
||||
uint8_t counter = 0;
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
tri_count = 0;
|
||||
strip_count = 0;
|
||||
|
||||
#if CLIP_DEBUG
|
||||
printf("----\n");
|
||||
#endif
|
||||
|
||||
for(int i = 0; i < n; ++i, ++vertex) {
|
||||
PREFETCH(vertex + 1);
|
||||
|
||||
bool is_last_in_strip = glIsLastVertex(vertex->flags);
|
||||
|
||||
/* Wait until we fill the triangle */
|
||||
if(tri_count < 3) {
|
||||
if(glIsVertex(vertex->flags)) {
|
||||
triangle[tri_count].v = vertex;
|
||||
triangle[tri_count].visible = vertex->xyz[2] >= -vertex->w;
|
||||
tri_count++;
|
||||
strip_count++;
|
||||
} else {
|
||||
/* We hit a header */
|
||||
tri_count = 0;
|
||||
strip_count = 0;
|
||||
_glSubmitHeaderOrVertex(vertex);
|
||||
}
|
||||
|
||||
if(tri_count < 3) {
|
||||
for(int i = 0; i < n; ++i, ++v2) {
|
||||
PREFETCH(v2 + 1);
|
||||
switch(v2->flags) {
|
||||
case GPU_CMD_VERTEX_EOL:
|
||||
if(counter < 2) {
|
||||
continue;
|
||||
}
|
||||
counter = 0;
|
||||
break;
|
||||
case GPU_CMD_VERTEX:
|
||||
++counter;
|
||||
if(counter < 3) {
|
||||
continue;
|
||||
}
|
||||
break;
|
||||
default:
|
||||
_glPushHeaderOrVertex(v2);
|
||||
counter = 0;
|
||||
continue;
|
||||
}
|
||||
};
|
||||
|
||||
Vertex* const v0 = v2 - 2;
|
||||
Vertex* const v1 = v2 - 1;
|
||||
|
||||
visible_mask = (
|
||||
(v0->xyz[2] > -v0->w) << 0 |
|
||||
(v1->xyz[2] > -v1->w) << 1 |
|
||||
(v2->xyz[2] > -v2->w) << 2 |
|
||||
(counter == 0) << 3
|
||||
);
|
||||
|
||||
switch(visible_mask) {
|
||||
case 15: /* All visible, but final vertex in strip */
|
||||
{
|
||||
_glPerspectiveDivideVertex(v0, h);
|
||||
_glPushHeaderOrVertex(v0);
|
||||
|
||||
_glPerspectiveDivideVertex(v1, h);
|
||||
_glPushHeaderOrVertex(v1);
|
||||
|
||||
_glPerspectiveDivideVertex(v2, h);
|
||||
_glPushHeaderOrVertex(v2);
|
||||
}
|
||||
break;
|
||||
case 7:
|
||||
/* All visible, push the first vertex and move on */
|
||||
_glPerspectiveDivideVertex(v0, h);
|
||||
_glPushHeaderOrVertex(v0);
|
||||
break;
|
||||
case 9:
|
||||
/* First vertex was visible, last in strip */
|
||||
{
|
||||
Vertex __attribute__((aligned(32))) scratch[2];
|
||||
Vertex* a = &scratch[0];
|
||||
Vertex* b = &scratch[1];
|
||||
|
||||
#if CLIP_DEBUG
|
||||
printf("SC: %d\n", strip_count);
|
||||
#endif
|
||||
_glClipEdge(v0, v1, a);
|
||||
a->flags = GPU_CMD_VERTEX;
|
||||
|
||||
/* If we got here, then triangle contains 3 vertices */
|
||||
int visible_mask = triangle[0].visible | (triangle[1].visible << 1) | (triangle[2].visible << 2);
|
||||
if(visible_mask == 7) {
|
||||
#if CLIP_DEBUG
|
||||
printf("Visible\n");
|
||||
#endif
|
||||
/* All the vertices are visible! We divide and submit v0, then shift */
|
||||
_glPerspectiveDivideVertex(vertex - 2, h);
|
||||
_glSubmitHeaderOrVertex(vertex - 2);
|
||||
_glClipEdge(v2, v0, b);
|
||||
b->flags = GPU_CMD_VERTEX_EOL;
|
||||
|
||||
if(is_last_in_strip) {
|
||||
_glPerspectiveDivideVertex(vertex - 1, h);
|
||||
_glSubmitHeaderOrVertex(vertex - 1);
|
||||
_glPerspectiveDivideVertex(vertex, h);
|
||||
_glSubmitHeaderOrVertex(vertex);
|
||||
tri_count = 0;
|
||||
strip_count = 0;
|
||||
_glPerspectiveDivideVertex(v0, h);
|
||||
_glPushHeaderOrVertex(v0);
|
||||
|
||||
_glPerspectiveDivideVertex(a, h);
|
||||
_glPushHeaderOrVertex(a);
|
||||
|
||||
_glPerspectiveDivideVertex(b, h);
|
||||
_glPushHeaderOrVertex(b);
|
||||
}
|
||||
break;
|
||||
case 1:
|
||||
/* First vertex was visible, but not last in strip */
|
||||
{
|
||||
Vertex __attribute__((aligned(32))) scratch[2];
|
||||
Vertex* a = &scratch[0];
|
||||
Vertex* b = &scratch[1];
|
||||
|
||||
_glClipEdge(v0, v1, a);
|
||||
a->flags = GPU_CMD_VERTEX;
|
||||
|
||||
_glClipEdge(v2, v0, b);
|
||||
b->flags = GPU_CMD_VERTEX;
|
||||
|
||||
_glPerspectiveDivideVertex(v0, h);
|
||||
_glPushHeaderOrVertex(v0);
|
||||
|
||||
_glPerspectiveDivideVertex(a, h);
|
||||
_glPushHeaderOrVertex(a);
|
||||
|
||||
_glPerspectiveDivideVertex(b, h);
|
||||
_glPushHeaderOrVertex(b);
|
||||
_glPushHeaderOrVertex(b);
|
||||
}
|
||||
break;
|
||||
case 10:
|
||||
case 2:
|
||||
/* Second vertex was visible. In self case we need to create a triangle and produce
|
||||
two new vertices: 1-2, and 2-3. */
|
||||
{
|
||||
Vertex __attribute__((aligned(32))) scratch[3];
|
||||
Vertex* a = &scratch[0];
|
||||
Vertex* b = &scratch[1];
|
||||
Vertex* c = &scratch[2];
|
||||
|
||||
memcpy_vertex(c, v1);
|
||||
|
||||
_glClipEdge(v0, v1, a);
|
||||
a->flags = GPU_CMD_VERTEX;
|
||||
|
||||
_glClipEdge(v1, v2, b);
|
||||
b->flags = v2->flags;
|
||||
|
||||
_glPerspectiveDivideVertex(a, h);
|
||||
_glPushHeaderOrVertex(a);
|
||||
|
||||
_glPerspectiveDivideVertex(c, h);
|
||||
_glPushHeaderOrVertex(c);
|
||||
|
||||
_glPerspectiveDivideVertex(b, h);
|
||||
_glPushHeaderOrVertex(b);
|
||||
}
|
||||
break;
|
||||
case 11:
|
||||
case 3: /* First and second vertex were visible */
|
||||
{
|
||||
Vertex __attribute__((aligned(32))) scratch[3];
|
||||
Vertex* a = &scratch[0];
|
||||
Vertex* b = &scratch[1];
|
||||
Vertex* c = &scratch[2];
|
||||
|
||||
memcpy_vertex(c, v1);
|
||||
|
||||
_glClipEdge(v2, v0, b);
|
||||
b->flags = GPU_CMD_VERTEX;
|
||||
|
||||
_glPerspectiveDivideVertex(v0, h);
|
||||
_glPushHeaderOrVertex(v0);
|
||||
|
||||
_glClipEdge(v1, v2, a);
|
||||
a->flags = v2->flags;
|
||||
|
||||
_glPerspectiveDivideVertex(c, h);
|
||||
_glPushHeaderOrVertex(c);
|
||||
|
||||
_glPerspectiveDivideVertex(b, h);
|
||||
_glPushHeaderOrVertex(b);
|
||||
|
||||
_glPerspectiveDivideVertex(a, h);
|
||||
_glPushHeaderOrVertex(c);
|
||||
_glPushHeaderOrVertex(a);
|
||||
}
|
||||
break;
|
||||
case 12:
|
||||
case 4:
|
||||
/* Third vertex was visible. */
|
||||
{
|
||||
Vertex __attribute__((aligned(32))) scratch[3];
|
||||
Vertex* a = &scratch[0];
|
||||
Vertex* b = &scratch[1];
|
||||
Vertex* c = &scratch[2];
|
||||
|
||||
memcpy_vertex(c, v2);
|
||||
|
||||
_glClipEdge(v2, v0, a);
|
||||
a->flags = GPU_CMD_VERTEX;
|
||||
|
||||
_glClipEdge(v1, v2, b);
|
||||
b->flags = GPU_CMD_VERTEX;
|
||||
|
||||
_glPerspectiveDivideVertex(a, h);
|
||||
_glPushHeaderOrVertex(a);
|
||||
|
||||
if(counter % 2 == 1) {
|
||||
_glPushHeaderOrVertex(a);
|
||||
}
|
||||
|
||||
ShiftRotateTriangle();
|
||||
_glPerspectiveDivideVertex(b, h);
|
||||
_glPushHeaderOrVertex(b);
|
||||
|
||||
} else if(visible_mask) {
|
||||
/* Clipping time!
|
||||
_glPerspectiveDivideVertex(c, h);
|
||||
_glPushHeaderOrVertex(c);
|
||||
}
|
||||
break;
|
||||
case 13:
|
||||
{
|
||||
Vertex __attribute__((aligned(32))) scratch[3];
|
||||
Vertex* a = &scratch[0];
|
||||
Vertex* b = &scratch[1];
|
||||
Vertex* c = &scratch[2];
|
||||
|
||||
There are 6 distinct possibilities when clipping a triangle. 3 of them result
|
||||
in another triangle, 3 of them result in a quadrilateral.
|
||||
memcpy_vertex(c, v2);
|
||||
c->flags = GPU_CMD_VERTEX;
|
||||
|
||||
Assuming you iterate the edges of the triangle in order, and create a new *visible*
|
||||
vertex when you cross the plane, and discard vertices behind the plane, then the only
|
||||
difference between the two cases is that the final two vertices that need submitting have
|
||||
to be reversed.
|
||||
_glClipEdge(v0, v1, a);
|
||||
a->flags = GPU_CMD_VERTEX;
|
||||
|
||||
Unfortunately we have to copy vertices here, because if we persp-divide a vertex it may
|
||||
be used in a subsequent triangle in the strip and would end up being double divided.
|
||||
*/
|
||||
#if CLIP_DEBUG
|
||||
printf("Clip: %d, SC: %d\n", visible_mask, strip_count);
|
||||
printf("%d, %d, %d\n", triangle[0].v - (Vertex*) src - 1, triangle[1].v - (Vertex*) src - 1, triangle[2].v - (Vertex*) src - 1);
|
||||
#endif
|
||||
Vertex tmp;
|
||||
if(strip_count > 3) {
|
||||
#if CLIP_DEBUG
|
||||
printf("Flush\n");
|
||||
#endif
|
||||
tmp = *(vertex - 2);
|
||||
/* If we had triangles ahead of this one, submit and finalize */
|
||||
_glPerspectiveDivideVertex(&tmp, h);
|
||||
_glSubmitHeaderOrVertex(&tmp);
|
||||
_glClipEdge(v1, v2, b);
|
||||
b->flags = GPU_CMD_VERTEX;
|
||||
|
||||
tmp = *(vertex - 1);
|
||||
tmp.flags = GPU_CMD_VERTEX_EOL;
|
||||
_glPerspectiveDivideVertex(&tmp, h);
|
||||
_glSubmitHeaderOrVertex(&tmp);
|
||||
}
|
||||
_glPerspectiveDivideVertex(v0, h);
|
||||
_glPushHeaderOrVertex(v0);
|
||||
|
||||
switch(visible_mask) {
|
||||
case 1: {
|
||||
/* 0, 0a, 2a */
|
||||
tmp = *triangle[0].v;
|
||||
tmp.flags = GPU_CMD_VERTEX;
|
||||
_glPerspectiveDivideVertex(&tmp, h);
|
||||
_glSubmitHeaderOrVertex(&tmp);
|
||||
_glPerspectiveDivideVertex(a, h);
|
||||
_glPushHeaderOrVertex(a);
|
||||
|
||||
_glClipEdge(triangle[0].v, triangle[1].v, &tmp);
|
||||
tmp.flags = GPU_CMD_VERTEX;
|
||||
_glPerspectiveDivideVertex(&tmp, h);
|
||||
_glSubmitHeaderOrVertex(&tmp);
|
||||
_glPerspectiveDivideVertex(c, h);
|
||||
_glPushHeaderOrVertex(c);
|
||||
_glPerspectiveDivideVertex(b, h);
|
||||
_glPushHeaderOrVertex(b);
|
||||
|
||||
_glClipEdge(triangle[2].v, triangle[0].v, &tmp);
|
||||
tmp.flags = GPU_CMD_VERTEX_EOL;
|
||||
_glPerspectiveDivideVertex(&tmp, h);
|
||||
_glSubmitHeaderOrVertex(&tmp);
|
||||
} break;
|
||||
case 2: {
|
||||
/* 0a, 1, 1a */
|
||||
_glClipEdge(triangle[0].v, triangle[1].v, &tmp);
|
||||
tmp.flags = GPU_CMD_VERTEX;
|
||||
_glPerspectiveDivideVertex(&tmp, h);
|
||||
_glSubmitHeaderOrVertex(&tmp);
|
||||
c->flags = GPU_CMD_VERTEX_EOL;
|
||||
_glPushHeaderOrVertex(c);
|
||||
}
|
||||
break;
|
||||
case 5: /* First and third vertex were visible */
|
||||
{
|
||||
Vertex __attribute__((aligned(32))) scratch[3];
|
||||
Vertex* a = &scratch[0];
|
||||
Vertex* b = &scratch[1];
|
||||
Vertex* c = &scratch[2];
|
||||
|
||||
tmp = *triangle[1].v;
|
||||
tmp.flags = GPU_CMD_VERTEX;
|
||||
_glPerspectiveDivideVertex(&tmp, h);
|
||||
_glSubmitHeaderOrVertex(&tmp);
|
||||
memcpy_vertex(c, v2);
|
||||
c->flags = GPU_CMD_VERTEX;
|
||||
|
||||
_glClipEdge(triangle[1].v, triangle[2].v, &tmp);
|
||||
tmp.flags = GPU_CMD_VERTEX_EOL;
|
||||
_glPerspectiveDivideVertex(&tmp, h);
|
||||
_glSubmitHeaderOrVertex(&tmp);
|
||||
} break;
|
||||
case 3: {
|
||||
/* 0, 1, 2a, 1a */
|
||||
tmp = *triangle[0].v;
|
||||
tmp.flags = GPU_CMD_VERTEX;
|
||||
_glPerspectiveDivideVertex(&tmp, h);
|
||||
_glSubmitHeaderOrVertex(&tmp);
|
||||
_glClipEdge(v0, v1, a);
|
||||
a->flags = GPU_CMD_VERTEX;
|
||||
|
||||
tmp = *triangle[1].v;
|
||||
tmp.flags = GPU_CMD_VERTEX;
|
||||
_glPerspectiveDivideVertex(&tmp, h);
|
||||
_glSubmitHeaderOrVertex(&tmp);
|
||||
_glClipEdge(v1, v2, b);
|
||||
b->flags = GPU_CMD_VERTEX;
|
||||
|
||||
_glClipEdge(triangle[2].v, triangle[0].v, &tmp);
|
||||
tmp.flags = GPU_CMD_VERTEX;
|
||||
_glPerspectiveDivideVertex(&tmp, h);
|
||||
_glSubmitHeaderOrVertex(&tmp);
|
||||
_glPerspectiveDivideVertex(v0, h);
|
||||
_glPushHeaderOrVertex(v0);
|
||||
|
||||
_glClipEdge(triangle[1].v, triangle[2].v, &tmp);
|
||||
tmp.flags = GPU_CMD_VERTEX_EOL;
|
||||
_glPerspectiveDivideVertex(&tmp, h);
|
||||
_glSubmitHeaderOrVertex(&tmp);
|
||||
} break;
|
||||
case 4: {
|
||||
/* 1a, 2, 2a */
|
||||
_glClipEdge(triangle[1].v, triangle[2].v, &tmp);
|
||||
tmp.flags = GPU_CMD_VERTEX;
|
||||
_glPerspectiveDivideVertex(&tmp, h);
|
||||
_glSubmitHeaderOrVertex(&tmp);
|
||||
_glPerspectiveDivideVertex(a, h);
|
||||
_glPushHeaderOrVertex(a);
|
||||
|
||||
tmp = *triangle[2].v;
|
||||
tmp.flags = GPU_CMD_VERTEX;
|
||||
_glPerspectiveDivideVertex(&tmp, h);
|
||||
_glSubmitHeaderOrVertex(&tmp);
|
||||
_glPerspectiveDivideVertex(c, h);
|
||||
_glPushHeaderOrVertex(c);
|
||||
_glPerspectiveDivideVertex(b, h);
|
||||
_glPushHeaderOrVertex(b);
|
||||
_glPushHeaderOrVertex(c);
|
||||
}
|
||||
break;
|
||||
case 14:
|
||||
case 6: /* Second and third vertex were visible */
|
||||
{
|
||||
Vertex __attribute__((aligned(32))) scratch[4];
|
||||
Vertex* a = &scratch[0];
|
||||
Vertex* b = &scratch[1];
|
||||
Vertex* c = &scratch[2];
|
||||
Vertex* d = &scratch[3];
|
||||
|
||||
_glClipEdge(triangle[2].v, triangle[0].v, &tmp);
|
||||
tmp.flags = GPU_CMD_VERTEX_EOL;
|
||||
_glPerspectiveDivideVertex(&tmp, h);
|
||||
_glSubmitHeaderOrVertex(&tmp);
|
||||
} break;
|
||||
case 5: {
|
||||
/* 0, 0a, 2, 1a */
|
||||
tmp = *triangle[0].v;
|
||||
tmp.flags = GPU_CMD_VERTEX;
|
||||
_glPerspectiveDivideVertex(&tmp, h);
|
||||
_glSubmitHeaderOrVertex(&tmp);
|
||||
memcpy_vertex(c, v1);
|
||||
memcpy_vertex(d, v2);
|
||||
|
||||
_glClipEdge(triangle[0].v, triangle[1].v, &tmp);
|
||||
tmp.flags = GPU_CMD_VERTEX;
|
||||
_glPerspectiveDivideVertex(&tmp, h);
|
||||
_glSubmitHeaderOrVertex(&tmp);
|
||||
_glClipEdge(v0, v1, a);
|
||||
a->flags = GPU_CMD_VERTEX;
|
||||
|
||||
tmp = *triangle[2].v;
|
||||
tmp.flags = GPU_CMD_VERTEX;
|
||||
_glPerspectiveDivideVertex(&tmp, h);
|
||||
_glSubmitHeaderOrVertex(&tmp);
|
||||
_glClipEdge(v2, v0, b);
|
||||
b->flags = GPU_CMD_VERTEX;
|
||||
|
||||
_glClipEdge(triangle[1].v, triangle[2].v, &tmp);
|
||||
tmp.flags = GPU_CMD_VERTEX_EOL;
|
||||
_glPerspectiveDivideVertex(&tmp, h);
|
||||
_glSubmitHeaderOrVertex(&tmp);
|
||||
} break;
|
||||
case 6: {
|
||||
/* 0a, 1, 2a, 2 */
|
||||
_glClipEdge(triangle[0].v, triangle[1].v, &tmp);
|
||||
tmp.flags = GPU_CMD_VERTEX;
|
||||
_glPerspectiveDivideVertex(&tmp, h);
|
||||
_glSubmitHeaderOrVertex(&tmp);
|
||||
_glPerspectiveDivideVertex(a, h);
|
||||
_glPushHeaderOrVertex(a);
|
||||
|
||||
tmp = *triangle[1].v;
|
||||
tmp.flags = GPU_CMD_VERTEX;
|
||||
_glPerspectiveDivideVertex(&tmp, h);
|
||||
_glSubmitHeaderOrVertex(&tmp);
|
||||
_glPerspectiveDivideVertex(c, h);
|
||||
_glPushHeaderOrVertex(c);
|
||||
|
||||
_glClipEdge(triangle[2].v, triangle[0].v, &tmp);
|
||||
tmp.flags = GPU_CMD_VERTEX;
|
||||
_glPerspectiveDivideVertex(&tmp, h);
|
||||
_glSubmitHeaderOrVertex(&tmp);
|
||||
_glPerspectiveDivideVertex(b, h);
|
||||
_glPushHeaderOrVertex(b);
|
||||
_glPushHeaderOrVertex(c);
|
||||
|
||||
tmp = *triangle[2].v;
|
||||
tmp.flags = GPU_CMD_VERTEX_EOL;
|
||||
_glPerspectiveDivideVertex(&tmp, h);
|
||||
_glSubmitHeaderOrVertex(&tmp);
|
||||
} break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
/* If this was the last in the strip, we don't need to
|
||||
submit anything else, we just wipe the tri_count */
|
||||
if(is_last_in_strip) {
|
||||
tri_count = 0;
|
||||
strip_count = 0;
|
||||
} else {
|
||||
ShiftRotateTriangle();
|
||||
strip_count = 2;
|
||||
}
|
||||
} else {
|
||||
/* Invisible? Move to the next in the strip */
|
||||
|
||||
if(is_last_in_strip) {
|
||||
tri_count = 0;
|
||||
strip_count = 0;
|
||||
}
|
||||
strip_count = 2;
|
||||
ShiftRotateTriangle();
|
||||
_glPerspectiveDivideVertex(d, h);
|
||||
_glPushHeaderOrVertex(d);
|
||||
}
|
||||
break;
|
||||
case 8:
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
_glFlushBuffer();
|
||||
}
|
||||
|
||||
void SceneListFinish() {
|
||||
|
@ -536,18 +463,41 @@ void SceneListFinish() {
|
|||
Vertex* v0 = (Vertex*) (flags - step - step);
|
||||
Vertex* v1 = (Vertex*) (flags - step);
|
||||
Vertex* v2 = (Vertex*) (flags);
|
||||
(vidx % 2 == 0) ? DrawTriangle(v0, v1, v2) : DrawTriangle(v1, v0, v2);
|
||||
|
||||
SDL_Vertex sv0 = {
|
||||
{v0->xyz[0], v0->xyz[1]},
|
||||
{v0->bgra[2], v0->bgra[1], v0->bgra[0], v0->bgra[3]},
|
||||
{v0->uv[0], v0->uv[1]}
|
||||
};
|
||||
|
||||
SDL_Vertex sv1 = {
|
||||
{v1->xyz[0], v1->xyz[1]},
|
||||
{v1->bgra[2], v1->bgra[1], v1->bgra[0], v1->bgra[3]},
|
||||
{v1->uv[0], v1->uv[1]}
|
||||
};
|
||||
|
||||
SDL_Vertex sv2 = {
|
||||
{v2->xyz[0], v2->xyz[1]},
|
||||
{v2->bgra[2], v2->bgra[1], v2->bgra[0], v2->bgra[3]},
|
||||
{v2->uv[0], v2->uv[1]}
|
||||
};
|
||||
|
||||
aligned_vector_push_back(&vbuffer, &sv0, 1);
|
||||
aligned_vector_push_back(&vbuffer, &sv1, 1);
|
||||
aligned_vector_push_back(&vbuffer, &sv2, 1);
|
||||
}
|
||||
|
||||
if((*flags) == GPU_CMD_VERTEX_EOL) {
|
||||
vidx = 0;
|
||||
}
|
||||
}
|
||||
|
||||
SDL_SetRenderDrawColor(RENDERER, 255, 255, 255, 255);
|
||||
SDL_RenderGeometry(RENDERER, NULL, aligned_vector_front(&vbuffer), aligned_vector_size(&vbuffer), NULL, 0);
|
||||
}
|
||||
|
||||
void SceneFinish() {
|
||||
SDL_RenderPresent(RENDERER);
|
||||
return;
|
||||
/* Only sensible place to hook the quit signal */
|
||||
SDL_Event e;
|
||||
while (SDL_PollEvent(&e)) {
|
||||
|
|
|
@ -48,7 +48,8 @@ void TransformVec3NoMod(const float* v, float* ret);
|
|||
|
||||
/* Transform a 3-element normal using the stored matrix (w == 0)*/
|
||||
static inline void TransformNormalNoMod(const float* xIn, float* xOut) {
|
||||
|
||||
(void) xIn;
|
||||
(void) xOut;
|
||||
}
|
||||
|
||||
void TransformVertices(Vertex* vertices, const int count);
|
||||
|
|
45
GL/private.h
45
GL/private.h
|
@ -164,7 +164,10 @@ typedef struct {
|
|||
GLboolean isCompressed;
|
||||
GLboolean isPaletted;
|
||||
//50
|
||||
} TextureObject;
|
||||
GLenum internalFormat;
|
||||
//54
|
||||
GLubyte padding[10]; // Pad to 64-bytes
|
||||
} __attribute__((aligned(32))) TextureObject;
|
||||
|
||||
typedef struct {
|
||||
GLfloat emissive[4];
|
||||
|
@ -233,11 +236,41 @@ GL_FORCE_INLINE float clamp(float d, float min, float max) {
|
|||
return (d < min) ? min : (d > max) ? max : d;
|
||||
}
|
||||
|
||||
GL_FORCE_INLINE void memcpy_vertex(Vertex *dest, const Vertex *src) {
|
||||
#ifdef __DREAMCAST__
|
||||
_Complex float double_scratch;
|
||||
|
||||
asm volatile (
|
||||
"fschg\n\t"
|
||||
"clrs\n\t"
|
||||
".align 2\n\t"
|
||||
"fmov.d @%[in]+, %[scratch]\n\t"
|
||||
"fmov.d %[scratch], @%[out]\n\t"
|
||||
"fmov.d @%[in]+, %[scratch]\n\t"
|
||||
"add #8, %[out]\n\t"
|
||||
"fmov.d %[scratch], @%[out]\n\t"
|
||||
"fmov.d @%[in]+, %[scratch]\n\t"
|
||||
"add #8, %[out]\n\t"
|
||||
"fmov.d %[scratch], @%[out]\n\t"
|
||||
"fmov.d @%[in], %[scratch]\n\t"
|
||||
"add #8, %[out]\n\t"
|
||||
"fmov.d %[scratch], @%[out]\n\t"
|
||||
"fschg\n"
|
||||
: [in] "+&r" ((uint32_t) src), [scratch] "=&d" (double_scratch), [out] "+&r" ((uint32_t) dest)
|
||||
:
|
||||
: "t", "memory" // clobbers
|
||||
);
|
||||
#else
|
||||
*dest = *src;
|
||||
#endif
|
||||
}
|
||||
|
||||
#define swapVertex(a, b) \
|
||||
do { \
|
||||
Vertex c = *a; \
|
||||
*a = *b; \
|
||||
*b = c; \
|
||||
Vertex __attribute__((aligned(32))) c; \
|
||||
memcpy_vertex(&c, a); \
|
||||
memcpy_vertex(a, b); \
|
||||
memcpy_vertex(b, &c); \
|
||||
} while(0)
|
||||
|
||||
/* ClipVertex doesn't have room for these, so we need to parse them
|
||||
|
@ -345,6 +378,9 @@ extern GLubyte ACTIVE_TEXTURE;
|
|||
extern GLboolean TEXTURES_ENABLED[];
|
||||
|
||||
GLubyte _glGetActiveTexture();
|
||||
GLint _glGetTextureInternalFormat();
|
||||
GLboolean _glGetTextureTwiddle();
|
||||
void _glSetTextureTwiddle(GLboolean v);
|
||||
|
||||
GLuint _glGetActiveClientTexture();
|
||||
TexturePalette* _glGetSharedPalette(GLshort bank);
|
||||
|
@ -520,6 +556,7 @@ void _glSetLightModelColorControl(GLint v);
|
|||
GLuint _glEnabledLightCount();
|
||||
void _glRecalcEnabledLights();
|
||||
GLfloat* _glLightModelSceneAmbient();
|
||||
GLfloat* _glGetLightModelSceneAmbient();
|
||||
LightSource* _glLightAt(GLuint i);
|
||||
GLboolean _glNearZClippingEnabled();
|
||||
|
||||
|
|
27
GL/state.c
27
GL/state.c
|
@ -180,6 +180,10 @@ void _glSetLightModelSceneAmbient(const GLfloat* v) {
|
|||
vec4cpy(GPUState.scene_ambient, v);
|
||||
}
|
||||
|
||||
GLfloat* _glGetLightModelSceneAmbient() {
|
||||
return GPUState.scene_ambient;
|
||||
}
|
||||
|
||||
void _glSetLightModelColorControl(GLint v) {
|
||||
GPUState.color_control = v;
|
||||
}
|
||||
|
@ -251,7 +255,8 @@ void _glUpdatePVRTextureContext(PolyContext *context, GLshort textureUnit) {
|
|||
context->txr2.enable = GPU_TEXTURE_DISABLE;
|
||||
context->txr2.alpha = GPU_TXRALPHA_DISABLE;
|
||||
|
||||
if(!TEXTURES_ENABLED[textureUnit] || !tx1) {
|
||||
if(!TEXTURES_ENABLED[textureUnit] || !tx1 || !tx1->data) {
|
||||
context->txr.base = NULL;
|
||||
return;
|
||||
}
|
||||
|
||||
|
@ -399,8 +404,8 @@ GLAPI void APIENTRY glEnable(GLenum cap) {
|
|||
}
|
||||
break;
|
||||
case GL_CULL_FACE: {
|
||||
if(GPUState.cull_face != GL_TRUE) {
|
||||
GPUState.cull_face = GL_TRUE;
|
||||
if(GPUState.culling_enabled != GL_TRUE) {
|
||||
GPUState.culling_enabled = GL_TRUE;
|
||||
GPUState.is_dirty = GL_TRUE;
|
||||
}
|
||||
|
||||
|
@ -489,7 +494,11 @@ GLAPI void APIENTRY glEnable(GLenum cap) {
|
|||
GPUState.is_dirty = GL_TRUE;
|
||||
}
|
||||
break;
|
||||
case GL_TEXTURE_TWIDDLE_KOS:
|
||||
_glSetTextureTwiddle(GL_TRUE);
|
||||
break;
|
||||
default:
|
||||
_glKosThrowError(GL_INVALID_VALUE, __func__);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
@ -503,8 +512,8 @@ GLAPI void APIENTRY glDisable(GLenum cap) {
|
|||
}
|
||||
break;
|
||||
case GL_CULL_FACE: {
|
||||
if(GPUState.cull_face != GL_FALSE) {
|
||||
GPUState.cull_face = GL_FALSE;
|
||||
if(GPUState.culling_enabled != GL_FALSE) {
|
||||
GPUState.culling_enabled = GL_FALSE;
|
||||
GPUState.is_dirty = GL_TRUE;
|
||||
}
|
||||
|
||||
|
@ -591,7 +600,11 @@ GLAPI void APIENTRY glDisable(GLenum cap) {
|
|||
GPUState.is_dirty = GL_TRUE;
|
||||
}
|
||||
break;
|
||||
case GL_TEXTURE_TWIDDLE_KOS:
|
||||
_glSetTextureTwiddle(GL_FALSE);
|
||||
break;
|
||||
default:
|
||||
_glKosThrowError(GL_INVALID_VALUE, __func__);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
@ -972,6 +985,10 @@ void APIENTRY glGetIntegerv(GLenum pname, GLint *params) {
|
|||
case GL_FREE_CONTIGUOUS_TEXTURE_MEMORY_KOS:
|
||||
*params = _glFreeContiguousTextureMemory();
|
||||
break;
|
||||
case GL_TEXTURE_INTERNAL_FORMAT_KOS:
|
||||
*params = _glGetTextureInternalFormat();
|
||||
break;
|
||||
|
||||
default:
|
||||
_glKosThrowError(GL_INVALID_ENUM, __func__);
|
||||
break;
|
||||
|
|
1294
GL/texture.c
1294
GL/texture.c
File diff suppressed because it is too large
Load Diff
|
@ -1,21 +0,0 @@
|
|||
MIT License
|
||||
|
||||
Copyright (c) [year] [fullname]
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in all
|
||||
copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
SOFTWARE.
|
|
@ -1,158 +0,0 @@
|
|||
# Summary
|
||||
|
||||
yalloc is a memory efficient allocator which is intended for embedded
|
||||
applications that only have a low amount of RAM and want to maximize its
|
||||
utilization. Properties of the allocator:
|
||||
|
||||
- pools can be up to 128k
|
||||
- user data is 32bit aligned
|
||||
- 4 bytes overhead per allocation
|
||||
- supports defragmentation
|
||||
- uses a free list for first fit allocation strategy (most recently freed
|
||||
blocks are used first)
|
||||
- extensively tested (see section below)
|
||||
- MIT license
|
||||
|
||||
# Defragmentation
|
||||
|
||||
This feature was the initial motivation for this implementation. Especially
|
||||
when dealing with highly memory constrained environments fragmenting memory
|
||||
pools can be annoying. For this reason this implementation supports
|
||||
defragmentation which moves all allocated blocks into a contiguous range at the
|
||||
beginning of the pool, leaving a maximized free range at the end.
|
||||
|
||||
As there is no garbage collector or other runtime system involved that updates
|
||||
the references, the application must do so. This is done in three steps:
|
||||
|
||||
1. yalloc_defrag_start() is called. This calculates the new
|
||||
post-defragmentation-addresses for all allocations, but otherwise leaves
|
||||
the allocations untouched.
|
||||
|
||||
2. yalloc_defrag_address() is called by the application for every pointer that
|
||||
points to an allocation. It returns the post-defragmentation-address for
|
||||
the allocation. The application must update all its relevant pointers this
|
||||
way. Care must be taken not not yet dereference that moved pointers. If the
|
||||
application works with hierarchical data then this can easily be done by
|
||||
updating the pointers button up (first the leafs then their parents).
|
||||
|
||||
3. yalloc_defrag_commit() is called to finally perform the defragmentation.
|
||||
All allocated blocks are moved to their post-defragmentation-address and
|
||||
the application can continue using the pool the normal way.
|
||||
|
||||
It is up to the application when (and if) it performs defragmentation. One
|
||||
strategy would be to delay it until an allocation failure. Another approach
|
||||
would be to perform the defragmentation regularly when there is nothing else to
|
||||
do.
|
||||
|
||||
# Configurable Defines
|
||||
|
||||
INTERNAL_VALIDATE
|
||||
|
||||
If this is not defined on the compiler commandline it will be defined as 0 if
|
||||
NDEBUG is defined and otherwise as 1. If you want to disable internal
|
||||
validation when NDEBUG is not defined then define INERNAL_VALIDATE as 0 on the
|
||||
compiler commandline.
|
||||
|
||||
If it is nonzero the heap will be validated via a bunch of assert() calls at
|
||||
the end of every function that modifies the heap. This has roughly O(N*M)
|
||||
overhead where N is the number of allocated blocks and M the number of free
|
||||
blocks in a heap. For applications with enough live allocations this will get
|
||||
significant.
|
||||
|
||||
YALLOC_VALGRIND
|
||||
|
||||
If this is defined in yalloc.c and NVALGRIND is not defined then
|
||||
valgrind/memcheck.h is included and the the allocator functions tell valgrind
|
||||
about the pool, the allocations and makes the block headers inaccessible outside
|
||||
of yalloc-functions. This allows valgrind to detect a lot of the accidents that
|
||||
can happen when dealing dynamic memory. This also adds some overhead for every
|
||||
yalloc-call because most of them will "unprotect" the internal structure on
|
||||
entry and "protect" it again (marking it as inaccessible for valgrind) before
|
||||
returning.
|
||||
|
||||
# Tests
|
||||
|
||||
The tests rely on internal validation of the pool (see INTERNAL_VALIDATE) to
|
||||
check that no assumptions about the internal structure of the pool are
|
||||
violated. They additionally check for correctness of observations that can be
|
||||
made by using the public functions of the allocator (like checking if user data
|
||||
stays unmodified). There are a few different scripts that run tests:
|
||||
|
||||
- run_coverage.sh runs a bunch of testfunctions that are carefully crafted to
|
||||
cover all code paths. Coverage data is generated by clang and a summary is
|
||||
shown at the end of the test.
|
||||
|
||||
- run_valgrind.sh tests if the valgrind integration is working as expected,
|
||||
runs the functions from the coverage test and some randomly generated
|
||||
testcases under valgrind.
|
||||
|
||||
- run_libfuzzer.sh uses libfuzzer from clang to generate interesting testcases
|
||||
and runs them in multiple jobs in parallel for 10 seconds. It also generates
|
||||
coverage data at the end (it always got 100% coverage in my testruns).
|
||||
|
||||
All tests exit with 0 and print "All fine!" at the end if there where no
|
||||
errors. Coverage deficits are not counted as error, so you have to look at the
|
||||
summary (they should show 100% coverage!).
|
||||
|
||||
|
||||
# Implementation Details
|
||||
|
||||
The Headers and the user data are 32bit aligned. Headers have two 16bit fields
|
||||
where the high 15 bits represent offsets (relative to the pools address) to the
|
||||
previous/next block. The macros HDR_PTR() and HDR_OFFSET() are used to
|
||||
translate an offset to an address and back. The 32bit alignment is exploited to
|
||||
allow pools of up to 128k with that 15 significant bits.
|
||||
|
||||
A pool is always occupied by non-overlapping blocks that link to their
|
||||
previous/next block in address order via the prev/next field of Header.
|
||||
|
||||
Free blocks are always joined: No two free blocks will ever be neighbors.
|
||||
|
||||
Free blocks have an additional header of the same structure. This additional
|
||||
header is used to build a list of free blocks (independent of their address
|
||||
order).
|
||||
|
||||
yalloc_free() will insert the freed block to the front of the free list.
|
||||
yalloc_alloc() searches that list front to back and takes the first block that
|
||||
is big enough to satisfy the allocation.
|
||||
|
||||
There is always a Header at the front and at the end of the pool. The Header at
|
||||
the end is degenerate: It is marked as "used" but has no next block (which is
|
||||
usually used to determine the size of a block).
|
||||
|
||||
The prev-field of the very first block in the pool has special meaning: It
|
||||
points to the first free block in the pool. Or, if the pool is currently
|
||||
defragmenting (after yalloc_defrag_start() and before yalloc_defrag_commit()),
|
||||
points to the last header of the pool. This state can be recognized by checking
|
||||
if it points to an empty block (normal pool state) or a used block
|
||||
(defragmentation in progress). This logic can be seen in
|
||||
yalloc_defrag_in_progress().
|
||||
|
||||
The lowest bit of next/prev have special meaning:
|
||||
|
||||
- low bit of prev is set for free blocks
|
||||
|
||||
- low bit of next is set for blocks with 32bit padding after the user data.
|
||||
This is needed when a block is allocated from a free block that leaves only
|
||||
4 free bytes after the user data... which is not enough to insert a
|
||||
free-header (which is needs 8 bytes). The padding will be reclaimed when
|
||||
that block is freed or when the pool is defragmented. The predicate
|
||||
isPadded() can be used to test if a block is padded. Free blocks are never
|
||||
padded.
|
||||
|
||||
The predicate isNil() can be used to test if an offset points nowhere (it tests
|
||||
if all 15 high bits of an offset are 1). The constant NIL has all but the
|
||||
lowest bit set. It is used to set offsets to point to nowhere, and in some
|
||||
places it is used to mask out the actual address bits of an offset. This should
|
||||
be kept in mind when modifying the code and updating prev/next: Think carefully
|
||||
if you have to preserve the low bit when updating an offset!
|
||||
|
||||
Defragmentation is done in two phases: First the user calls
|
||||
yalloc_defrag_start(). This will put the pool in a special state where no
|
||||
alloc/free-calls are allowed. In this state the prev-fields of the used blocks
|
||||
have a special meaning: They store the offset that the block will have after
|
||||
defragmentation finished. This information is used by yalloc_defrag_address()
|
||||
which can be called by the application to query the new addresses for its
|
||||
allocations. After the application has updated all its pointers it must call
|
||||
yalloc_defrag_commit() which moves all used blocks in contiguous space at the
|
||||
beginning of the pool, leaving one maximized free block at the end.
|
|
@ -1,803 +0,0 @@
|
|||
#include "yalloc.h"
|
||||
#include "yalloc_internals.h"
|
||||
|
||||
#include <assert.h>
|
||||
#include <string.h>
|
||||
|
||||
#define ALIGN(num, align) (((num) + ((align) - 1)) & ~((align) - 1))
|
||||
|
||||
#if defined(YALLOC_VALGRIND) && !defined(NVALGRIND)
|
||||
# define USE_VALGRIND 1
|
||||
#else
|
||||
# define USE_VALGRIND 0
|
||||
#endif
|
||||
|
||||
#if USE_VALGRIND
|
||||
# include <valgrind/memcheck.h>
|
||||
#else
|
||||
# define VALGRIND_MAKE_MEM_UNDEFINED(p, s) ((void)0)
|
||||
# define VALGRIND_MAKE_MEM_DEFINED(p, s) ((void)0)
|
||||
# define VALGRIND_MAKE_MEM_NOACCESS(p, s) ((void)0)
|
||||
# define VALGRIND_CREATE_MEMPOOL(pool, rz, z) ((void)0)
|
||||
# define VALGRIND_MEMPOOL_ALLOC(pool, p, s) ((void)0)
|
||||
# define VALGRIND_MEMPOOL_FREE(pool, p) ((void)0)
|
||||
# define VALGRIND_MEMPOOL_CHANGE(pool, a, b, s) ((void)0)
|
||||
#endif
|
||||
|
||||
#define MARK_NEW_FREE_HDR(p) VALGRIND_MAKE_MEM_UNDEFINED(p, sizeof(Header) * 2)
|
||||
#define MARK_NEW_HDR(p) VALGRIND_MAKE_MEM_UNDEFINED(p, sizeof(Header))
|
||||
#define PROTECT_HDR(p) VALGRIND_MAKE_MEM_NOACCESS(p, sizeof(Header))
|
||||
#define PROTECT_FREE_HDR(p) VALGRIND_MAKE_MEM_NOACCESS(p, sizeof(Header) * 2)
|
||||
#define UNPROTECT_HDR(p) VALGRIND_MAKE_MEM_DEFINED(p, sizeof(Header))
|
||||
#define UNPROTECT_FREE_HDR(p) VALGRIND_MAKE_MEM_DEFINED(p, sizeof(Header) * 2)
|
||||
|
||||
|
||||
#if USE_VALGRIND
|
||||
static void _unprotect_pool(void * pool)
|
||||
{
|
||||
Header * cur = (Header*)pool;
|
||||
for (;;)
|
||||
{
|
||||
UNPROTECT_HDR(cur);
|
||||
if (isFree(cur))
|
||||
UNPROTECT_HDR(cur + 1);
|
||||
|
||||
if (isNil(cur->next))
|
||||
break;
|
||||
|
||||
cur = HDR_PTR(cur->next);
|
||||
}
|
||||
}
|
||||
|
||||
static void _protect_pool(void * pool)
|
||||
{
|
||||
Header * cur = (Header*)pool;
|
||||
while (cur)
|
||||
{
|
||||
Header * next = isNil(cur->next) ? NULL : HDR_PTR(cur->next);
|
||||
|
||||
if (isFree(cur))
|
||||
VALGRIND_MAKE_MEM_NOACCESS(cur, (char*)next - (char*)cur);
|
||||
else
|
||||
PROTECT_HDR(cur);
|
||||
|
||||
cur = next;
|
||||
}
|
||||
}
|
||||
#define assert_is_pool(pool) assert(VALGRIND_MEMPOOL_EXISTS(pool));
|
||||
|
||||
#else
|
||||
|
||||
static void _unprotect_pool(void * pool){(void)pool;}
|
||||
static void _protect_pool(void * pool){(void)pool;}
|
||||
#define assert_is_pool(pool) ((void)0)
|
||||
#endif
|
||||
|
||||
// internal version that does not unprotect/protect the pool
|
||||
static int _yalloc_defrag_in_progress(void * pool)
|
||||
{
|
||||
// fragmentation is indicated by a free list with one entry: the last block of the pool, which has its "free"-bit cleared.
|
||||
Header * p = (Header*)pool;
|
||||
if (isNil(p->prev))
|
||||
return 0;
|
||||
|
||||
return !(HDR_PTR(p->prev)->prev & 1);
|
||||
}
|
||||
|
||||
int yalloc_defrag_in_progress(void * pool)
|
||||
{
|
||||
_unprotect_pool(pool);
|
||||
int ret = _yalloc_defrag_in_progress(pool);
|
||||
_protect_pool(pool);
|
||||
return ret;
|
||||
}
|
||||
|
||||
#if YALLOC_INTERNAL_VALIDATE
|
||||
|
||||
static size_t _count_free_list_occurences(Header * pool, Header * blk)
|
||||
{
|
||||
int n = 0;
|
||||
if (!isNil(pool->prev))
|
||||
{
|
||||
Header * cur = HDR_PTR(pool->prev);
|
||||
for (;;)
|
||||
{
|
||||
if (cur == blk)
|
||||
++n;
|
||||
|
||||
if (isNil(cur[1].next))
|
||||
break;
|
||||
|
||||
cur = HDR_PTR(cur[1].next);
|
||||
}
|
||||
}
|
||||
return n;
|
||||
}
|
||||
|
||||
static size_t _count_addr_list_occurences(Header * pool, Header * blk)
|
||||
{
|
||||
size_t n = 0;
|
||||
Header * cur = pool;
|
||||
for (;;)
|
||||
{
|
||||
if (cur == blk)
|
||||
++n;
|
||||
|
||||
if (isNil(cur->next))
|
||||
break;
|
||||
|
||||
cur = HDR_PTR(cur->next);
|
||||
}
|
||||
return n;
|
||||
}
|
||||
|
||||
static void _validate_user_ptr(void * pool, void * p)
|
||||
{
|
||||
Header * hdr = (Header*)p - 1;
|
||||
size_t n = _count_addr_list_occurences((Header*)pool, hdr);
|
||||
assert(n == 1 && !isFree(hdr));
|
||||
}
|
||||
|
||||
/**
|
||||
Validates if all the invariants of a pool are intact.
|
||||
|
||||
This is very expensive when there are enough blocks in the heap (quadratic complexity!).
|
||||
*/
|
||||
static void _yalloc_validate(void * pool_)
|
||||
{
|
||||
Header * pool = (Header*)pool_;
|
||||
Header * cur = pool;
|
||||
|
||||
assert(!isNil(pool->next)); // there must always be at least two blocks: a free/used one and the final block at the end
|
||||
|
||||
if (_yalloc_defrag_in_progress(pool))
|
||||
{
|
||||
Header * prevUsed = NULL;
|
||||
while (!isNil(cur->next))
|
||||
{
|
||||
if (!isFree(cur))
|
||||
{ // it is a used block
|
||||
Header * newAddr = cur == pool ? pool : HDR_PTR(cur->prev);
|
||||
assert(newAddr <= cur);
|
||||
assert(newAddr >= pool);
|
||||
|
||||
if (prevUsed)
|
||||
{
|
||||
Header * prevNewAddr = prevUsed == pool ? pool : HDR_PTR(prevUsed->prev);
|
||||
size_t prevBruttoSize = (char*)HDR_PTR(prevUsed->next) - (char*)prevUsed;
|
||||
if (isPadded(prevUsed))
|
||||
prevBruttoSize -= 4; // remove padding
|
||||
assert((char*)newAddr == (char*)prevNewAddr + prevBruttoSize);
|
||||
}
|
||||
else
|
||||
{
|
||||
assert(newAddr == pool);
|
||||
}
|
||||
|
||||
prevUsed = cur;
|
||||
}
|
||||
|
||||
cur = HDR_PTR(cur->next);
|
||||
}
|
||||
|
||||
assert(cur == HDR_PTR(pool->prev)); // the free-list should point to the last block
|
||||
assert(!isFree(cur)); // the last block must not be free
|
||||
}
|
||||
else
|
||||
{
|
||||
Header * prev = NULL;
|
||||
|
||||
// iterate blocks in address order
|
||||
for (;;)
|
||||
{
|
||||
if (prev)
|
||||
{
|
||||
Header * x = HDR_PTR(cur->prev);
|
||||
assert(x == prev);
|
||||
}
|
||||
|
||||
int n = _count_free_list_occurences(pool, cur);
|
||||
if (isFree(cur))
|
||||
{ // it is a free block
|
||||
assert(n == 1);
|
||||
assert(!isPadded(cur)); // free blocks must have a zero padding-bit
|
||||
|
||||
if (prev)
|
||||
{
|
||||
assert(!isFree(prev)); // free blocks must not be direct neighbours
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
assert(n == 0);
|
||||
}
|
||||
|
||||
if (isNil(cur->next))
|
||||
break;
|
||||
|
||||
Header * next = HDR_PTR(cur->next);
|
||||
assert((char*)next >= (char*)cur + sizeof(Header) * 2);
|
||||
prev = cur;
|
||||
cur = next;
|
||||
}
|
||||
|
||||
assert(isNil(cur->next));
|
||||
|
||||
if (!isNil(pool->prev))
|
||||
{
|
||||
// iterate free-list
|
||||
Header * f = HDR_PTR(pool->prev);
|
||||
assert(isNil(f[1].prev));
|
||||
for (;;)
|
||||
{
|
||||
assert(isFree(f)); // must be free
|
||||
|
||||
int n = _count_addr_list_occurences(pool, f);
|
||||
assert(n == 1);
|
||||
|
||||
if (isNil(f[1].next))
|
||||
break;
|
||||
|
||||
f = HDR_PTR(f[1].next);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#else
|
||||
static void _yalloc_validate(void * pool){(void)pool;}
|
||||
static void _validate_user_ptr(void * pool, void * p){(void)pool; (void)p;}
|
||||
#endif
|
||||
|
||||
int yalloc_init(void * pool, size_t size)
|
||||
{
|
||||
if (size > MAX_POOL_SIZE)
|
||||
return -1;
|
||||
|
||||
// TODO: Error when pool is not properly aligned
|
||||
|
||||
// TODO: Error when size is not a multiple of the alignment?
|
||||
while (size % sizeof(Header))
|
||||
--size;
|
||||
|
||||
if(size < sizeof(Header) * 3)
|
||||
return -1;
|
||||
|
||||
VALGRIND_CREATE_MEMPOOL(pool, 0, 0);
|
||||
|
||||
Header * first = (Header*)pool;
|
||||
Header * last = (Header*)((char*)pool + size) - 1;
|
||||
|
||||
MARK_NEW_FREE_HDR(first);
|
||||
MARK_NEW_HDR(first);
|
||||
|
||||
first->prev = HDR_OFFSET(first) | 1;
|
||||
first->next = HDR_OFFSET(last);
|
||||
first[1].prev = NIL;
|
||||
first[1].next = NIL;
|
||||
|
||||
last->prev = HDR_OFFSET(first);
|
||||
last->next = NIL;
|
||||
|
||||
_unprotect_pool(pool);
|
||||
_yalloc_validate(pool);
|
||||
_protect_pool(pool);
|
||||
return 0;
|
||||
}
|
||||
|
||||
void yalloc_deinit(void * pool)
|
||||
{
|
||||
#if USE_VALGRIND
|
||||
VALGRIND_DESTROY_MEMPOOL(pool);
|
||||
|
||||
Header * last = (Header*)pool;
|
||||
UNPROTECT_HDR(last);
|
||||
while (!isNil(last->next))
|
||||
{
|
||||
Header * next = HDR_PTR(last->next);
|
||||
UNPROTECT_HDR(next);
|
||||
last = next;
|
||||
}
|
||||
|
||||
VALGRIND_MAKE_MEM_UNDEFINED(pool, (char*)(last + 1) - (char*)pool);
|
||||
#else
|
||||
(void)pool;
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
void * yalloc_alloc(void * pool, size_t size)
|
||||
{
|
||||
assert_is_pool(pool);
|
||||
_unprotect_pool(pool);
|
||||
assert(!_yalloc_defrag_in_progress(pool));
|
||||
_yalloc_validate(pool);
|
||||
if (!size)
|
||||
{
|
||||
_protect_pool(pool);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
Header * root = (Header*)pool;
|
||||
if (isNil(root->prev))
|
||||
{
|
||||
_protect_pool(pool);
|
||||
return NULL; /* no free block, no chance to allocate anything */ // TODO: Just read up which C standard supports single line comments and then fucking use them!
|
||||
}
|
||||
|
||||
/* round up to alignment */
|
||||
size = ALIGN(size, 32);
|
||||
|
||||
size_t bruttoSize = size + sizeof(Header);
|
||||
Header * prev = NULL;
|
||||
Header * cur = HDR_PTR(root->prev);
|
||||
for (;;)
|
||||
{
|
||||
size_t curSize = (char*)HDR_PTR(cur->next) - (char*)cur; /* size of the block, including its header */
|
||||
|
||||
if (curSize >= bruttoSize) // it is big enough
|
||||
{
|
||||
// take action for unused space in the free block
|
||||
if (curSize >= bruttoSize + sizeof(Header) * 2)
|
||||
{ // the leftover space is big enough to make it a free block
|
||||
// Build a free block from the unused space and insert it into the list of free blocks after the current free block
|
||||
Header * tail = (Header*)((char*)cur + bruttoSize);
|
||||
MARK_NEW_FREE_HDR(tail);
|
||||
|
||||
// update address-order-list
|
||||
tail->next = cur->next;
|
||||
tail->prev = HDR_OFFSET(cur) | 1;
|
||||
HDR_PTR(cur->next)->prev = HDR_OFFSET(tail); // NOTE: We know the next block is used because free blocks are never neighbours. So we don't have to care about the lower bit which would be set for the prev of a free block.
|
||||
cur->next = HDR_OFFSET(tail);
|
||||
|
||||
// update list of free blocks
|
||||
tail[1].next = cur[1].next;
|
||||
// NOTE: tail[1].prev is updated in the common path below (assignment to "HDR_PTR(cur[1].next)[1].prev")
|
||||
|
||||
if (!isNil(cur[1].next))
|
||||
HDR_PTR(cur[1].next)[1].prev = HDR_OFFSET(tail);
|
||||
cur[1].next = HDR_OFFSET(tail);
|
||||
}
|
||||
else if (curSize > bruttoSize)
|
||||
{ // there will be unused space, but not enough to insert a free header
|
||||
internal_assert(curSize - bruttoSize == sizeof(Header)); // unused space must be enough to build a free-block or it should be exactly the size of a Header
|
||||
cur->next |= 1; // set marker for "has unused trailing space"
|
||||
}
|
||||
else
|
||||
{
|
||||
internal_assert(curSize == bruttoSize);
|
||||
}
|
||||
|
||||
cur->prev &= NIL; // clear marker for "is a free block"
|
||||
|
||||
// remove from linked list of free blocks
|
||||
if (prev)
|
||||
prev[1].next = cur[1].next;
|
||||
else
|
||||
{
|
||||
uint32_t freeBit = isFree(root);
|
||||
root->prev = (cur[1].next & NIL) | freeBit;
|
||||
}
|
||||
|
||||
if (!isNil(cur[1].next))
|
||||
HDR_PTR(cur[1].next)[1].prev = prev ? HDR_OFFSET(prev) : NIL;
|
||||
|
||||
_yalloc_validate(pool);
|
||||
VALGRIND_MEMPOOL_ALLOC(pool, cur + 1, size);
|
||||
_protect_pool(pool);
|
||||
return cur + 1; // return address after the header
|
||||
}
|
||||
|
||||
if (isNil(cur[1].next))
|
||||
break;
|
||||
|
||||
prev = cur;
|
||||
cur = HDR_PTR(cur[1].next);
|
||||
}
|
||||
|
||||
_yalloc_validate(pool);
|
||||
_protect_pool(pool);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
// Removes a block from the free-list and moves the pools first-free-bock pointer to its successor if it pointed to that block.
|
||||
static void unlink_from_free_list(Header * pool, Header * blk)
|
||||
{
|
||||
// update the pools pointer to the first block in the free list if necessary
|
||||
if (isNil(blk[1].prev))
|
||||
{ // the block is the first in the free-list
|
||||
// make the pools first-free-pointer point to the next in the free list
|
||||
uint32_t freeBit = isFree(pool);
|
||||
pool->prev = (blk[1].next & NIL) | freeBit;
|
||||
}
|
||||
else
|
||||
HDR_PTR(blk[1].prev)[1].next = blk[1].next;
|
||||
|
||||
if (!isNil(blk[1].next))
|
||||
HDR_PTR(blk[1].next)[1].prev = blk[1].prev;
|
||||
}
|
||||
|
||||
size_t yalloc_block_size(void * pool, void * p)
|
||||
{
|
||||
Header * a = (Header*)p - 1;
|
||||
UNPROTECT_HDR(a);
|
||||
Header * b = HDR_PTR(a->next);
|
||||
size_t payloadSize = (char*)b - (char*)p;
|
||||
if (isPadded(a))
|
||||
payloadSize -= sizeof(Header);
|
||||
PROTECT_HDR(a);
|
||||
return payloadSize;
|
||||
}
|
||||
|
||||
void yalloc_free(void * pool_, void * p)
|
||||
{
|
||||
assert_is_pool(pool_);
|
||||
assert(!yalloc_defrag_in_progress(pool_));
|
||||
if (!p)
|
||||
return;
|
||||
|
||||
_unprotect_pool(pool_);
|
||||
|
||||
Header * pool = (Header*)pool_;
|
||||
Header * cur = (Header*)p - 1;
|
||||
|
||||
// get pointers to previous/next block in address order
|
||||
Header * prev = cur == pool || isNil(cur->prev) ? NULL : HDR_PTR(cur->prev);
|
||||
Header * next = isNil(cur->next) ? NULL : HDR_PTR(cur->next);
|
||||
|
||||
int prevFree = prev && isFree(prev);
|
||||
int nextFree = next && isFree(next);
|
||||
|
||||
#if USE_VALGRIND
|
||||
{
|
||||
unsigned errs = VALGRIND_COUNT_ERRORS;
|
||||
VALGRIND_MEMPOOL_FREE(pool, p);
|
||||
if (VALGRIND_COUNT_ERRORS > errs)
|
||||
{ // early exit if the free was invalid (so we get a valgrind error and don't mess up the pool, which is helpful for testing if invalid frees are detected by valgrind)
|
||||
_protect_pool(pool_);
|
||||
return;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
_validate_user_ptr(pool_, p);
|
||||
|
||||
if (prevFree && nextFree)
|
||||
{ // the freed block has two free neighbors
|
||||
unlink_from_free_list(pool, prev);
|
||||
unlink_from_free_list(pool, next);
|
||||
|
||||
// join prev, cur and next
|
||||
prev->next = next->next;
|
||||
HDR_PTR(next->next)->prev = cur->prev;
|
||||
|
||||
// prev is now the block we want to push onto the free-list
|
||||
cur = prev;
|
||||
}
|
||||
else if (prevFree)
|
||||
{
|
||||
unlink_from_free_list(pool, prev);
|
||||
|
||||
// join prev and cur
|
||||
prev->next = cur->next;
|
||||
HDR_PTR(cur->next)->prev = cur->prev;
|
||||
|
||||
// prev is now the block we want to push onto the free-list
|
||||
cur = prev;
|
||||
}
|
||||
else if (nextFree)
|
||||
{
|
||||
unlink_from_free_list(pool, next);
|
||||
|
||||
// join cur and next
|
||||
cur->next = next->next;
|
||||
HDR_PTR(next->next)->prev = next->prev & NIL;
|
||||
}
|
||||
|
||||
// if there is a previous block and that block has padding then we want to grow the new free block into that padding
|
||||
if (cur != pool && !isNil(cur->prev))
|
||||
{ // there is a previous block
|
||||
Header * left = HDR_PTR(cur->prev);
|
||||
if (isPadded(left))
|
||||
{ // the previous block has padding, so extend the current block to consume move the padding to the current free block
|
||||
Header * grown = cur - 1;
|
||||
MARK_NEW_HDR(grown);
|
||||
grown->next = cur->next;
|
||||
grown->prev = cur->prev;
|
||||
left->next = HDR_OFFSET(grown);
|
||||
if (!isNil(cur->next))
|
||||
HDR_PTR(cur->next)->prev = HDR_OFFSET(grown);
|
||||
|
||||
cur = grown;
|
||||
}
|
||||
}
|
||||
|
||||
cur->prev |= 1; // it becomes a free block
|
||||
cur->next &= NIL; // reset padding-bit
|
||||
UNPROTECT_HDR(cur + 1);
|
||||
cur[1].prev = NIL; // it will be the first free block in the free list, so it has no prevFree
|
||||
|
||||
if (!isNil(pool->prev))
|
||||
{ // the free-list was already non-empty
|
||||
HDR_PTR(pool->prev)[1].prev = HDR_OFFSET(cur); // make the first entry in the free list point back to the new free block (it will become the first one)
|
||||
cur[1].next = pool->prev; // the next free block is the first of the old free-list
|
||||
}
|
||||
else
|
||||
cur[1].next = NIL; // free-list was empty, so there is no successor
|
||||
|
||||
VALGRIND_MAKE_MEM_NOACCESS(cur + 2, (char*)HDR_PTR(cur->next) - (char*)(cur + 2));
|
||||
|
||||
// now the freed block is the first in the free-list
|
||||
|
||||
// update the offset to the first element of the free list
|
||||
uint32_t freeBit = isFree(pool); // remember the free-bit of the offset
|
||||
pool->prev = HDR_OFFSET(cur) | freeBit; // update the offset and restore the free-bit
|
||||
_yalloc_validate(pool);
|
||||
_protect_pool(pool);
|
||||
}
|
||||
|
||||
size_t yalloc_count_free(void * pool_)
|
||||
{
|
||||
assert_is_pool(pool_);
|
||||
_unprotect_pool(pool_);
|
||||
assert(!_yalloc_defrag_in_progress(pool_));
|
||||
Header * pool = (Header*)pool_;
|
||||
size_t bruttoFree = 0;
|
||||
Header * cur = pool;
|
||||
|
||||
_yalloc_validate(pool);
|
||||
|
||||
for (;;)
|
||||
{
|
||||
if (isFree(cur))
|
||||
{ // it is a free block
|
||||
bruttoFree += (char*)HDR_PTR(cur->next) - (char*)cur;
|
||||
}
|
||||
else
|
||||
{ // it is a used block
|
||||
if (isPadded(cur))
|
||||
{ // the used block is padded
|
||||
bruttoFree += sizeof(Header);
|
||||
}
|
||||
}
|
||||
|
||||
if (isNil(cur->next))
|
||||
break;
|
||||
|
||||
cur = HDR_PTR(cur->next);
|
||||
}
|
||||
|
||||
_protect_pool(pool);
|
||||
|
||||
if (bruttoFree < sizeof(Header))
|
||||
{
|
||||
internal_assert(!bruttoFree); // free space should always be a multiple of sizeof(Header)
|
||||
return 0;
|
||||
}
|
||||
|
||||
return bruttoFree - sizeof(Header);
|
||||
}
|
||||
|
||||
size_t yalloc_count_continuous(void * pool_)
|
||||
{
|
||||
assert_is_pool(pool_);
|
||||
_unprotect_pool(pool_);
|
||||
assert(!_yalloc_defrag_in_progress(pool_));
|
||||
Header * pool = (Header*)pool_;
|
||||
size_t largestFree = 0;
|
||||
Header * cur = pool;
|
||||
|
||||
_yalloc_validate(pool);
|
||||
|
||||
for (;;)
|
||||
{
|
||||
if (isFree(cur))
|
||||
{ // it is a free block
|
||||
size_t temp = (uintptr_t)HDR_PTR(cur->next) - (uintptr_t)cur;
|
||||
if(temp > largestFree)
|
||||
largestFree = temp;
|
||||
}
|
||||
|
||||
if (isNil(cur->next))
|
||||
break;
|
||||
|
||||
cur = HDR_PTR(cur->next);
|
||||
}
|
||||
|
||||
_protect_pool(pool);
|
||||
|
||||
if (largestFree < sizeof(Header))
|
||||
{
|
||||
internal_assert(!largestFree); // free space should always be a multiple of sizeof(Header)
|
||||
return 0;
|
||||
}
|
||||
|
||||
return largestFree - sizeof(Header);
|
||||
}
|
||||
|
||||
void * yalloc_first_used(void * pool)
|
||||
{
|
||||
assert_is_pool(pool);
|
||||
_unprotect_pool(pool);
|
||||
Header * blk = (Header*)pool;
|
||||
while (!isNil(blk->next))
|
||||
{
|
||||
if (!isFree(blk))
|
||||
{
|
||||
_protect_pool(pool);
|
||||
return blk + 1;
|
||||
}
|
||||
|
||||
blk = HDR_PTR(blk->next);
|
||||
}
|
||||
|
||||
_protect_pool(pool);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
void * yalloc_next_used(void * pool, void * p)
|
||||
{
|
||||
assert_is_pool(pool);
|
||||
_unprotect_pool(pool);
|
||||
_validate_user_ptr(pool, p);
|
||||
Header * prev = (Header*)p - 1;
|
||||
assert(!isNil(prev->next)); // the last block should never end up as input to this function (because it is not user-visible)
|
||||
|
||||
Header * blk = HDR_PTR(prev->next);
|
||||
while (!isNil(blk->next))
|
||||
{
|
||||
if (!isFree(blk))
|
||||
{
|
||||
_protect_pool(pool);
|
||||
return blk + 1;
|
||||
}
|
||||
|
||||
blk = HDR_PTR(blk->next);
|
||||
}
|
||||
|
||||
_protect_pool(pool);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
void yalloc_defrag_start(void * pool_)
|
||||
{
|
||||
assert_is_pool(pool_);
|
||||
_unprotect_pool(pool_);
|
||||
assert(!_yalloc_defrag_in_progress(pool_));
|
||||
Header * pool = (Header*)pool_;
|
||||
|
||||
// iterate over all blocks in address order and store the post-defragment address of used blocks in their "prev" field
|
||||
size_t end = 0; // offset for the next used block
|
||||
Header * blk = (Header*)pool;
|
||||
for (; !isNil(blk->next); blk = HDR_PTR(blk->next))
|
||||
{
|
||||
if (!isFree(blk))
|
||||
{ // it is a used block
|
||||
blk->prev = end >> 1;
|
||||
internal_assert((char*)HDR_PTR(blk->prev) == (char*)pool + end);
|
||||
|
||||
size_t bruttoSize = (char*)HDR_PTR(blk->next) - (char*)blk;
|
||||
|
||||
if (isPadded(blk))
|
||||
{ // the block is padded
|
||||
bruttoSize -= sizeof(Header);
|
||||
}
|
||||
|
||||
end += bruttoSize;
|
||||
internal_assert(end % sizeof(Header) == 0);
|
||||
}
|
||||
}
|
||||
|
||||
// blk is now the last block (the dummy "used" block at the end of the pool)
|
||||
internal_assert(isNil(blk->next));
|
||||
internal_assert(!isFree(blk));
|
||||
|
||||
// mark the pool as "defragementation in progress"
|
||||
uint32_t freeBit = isFree(pool);
|
||||
pool->prev = (HDR_OFFSET(blk) & NIL) | freeBit;
|
||||
|
||||
_yalloc_validate(pool);
|
||||
internal_assert(yalloc_defrag_in_progress(pool));
|
||||
_protect_pool(pool);
|
||||
}
|
||||
|
||||
void * yalloc_defrag_address(void * pool_, void * p)
|
||||
{
|
||||
assert_is_pool(pool_);
|
||||
assert(yalloc_defrag_in_progress(pool_));
|
||||
if (!p)
|
||||
return NULL;
|
||||
|
||||
Header * pool = (Header*)pool_;
|
||||
|
||||
_unprotect_pool(pool);
|
||||
_validate_user_ptr(pool_, p);
|
||||
|
||||
if (pool + 1 == p)
|
||||
return pool + 1; // "prev" of the first block points to the last used block to mark the pool as "defragmentation in progress"
|
||||
|
||||
Header * blk = (Header*)p - 1;
|
||||
|
||||
void * defragP = HDR_PTR(blk->prev) + 1;
|
||||
|
||||
_protect_pool(pool);
|
||||
return defragP;
|
||||
}
|
||||
|
||||
void yalloc_defrag_commit(void * pool_)
|
||||
{
|
||||
assert_is_pool(pool_);
|
||||
_unprotect_pool(pool_);
|
||||
assert(_yalloc_defrag_in_progress(pool_));
|
||||
Header * pool = (Header*)pool_;
|
||||
|
||||
// iterate over all blocks in address order and move them
|
||||
size_t end = 0; // offset for the next used block
|
||||
Header * blk = pool;
|
||||
Header * lastUsed = NULL;
|
||||
while (!isNil(blk->next))
|
||||
{
|
||||
if (!isFree(blk))
|
||||
{ // it is a used block
|
||||
size_t bruttoSize = (char*)HDR_PTR(blk->next) - (char*)blk;
|
||||
|
||||
if (isPadded(blk))
|
||||
{ // the block is padded
|
||||
bruttoSize -= sizeof(Header);
|
||||
}
|
||||
|
||||
Header * next = HDR_PTR(blk->next);
|
||||
|
||||
blk->prev = lastUsed ? HDR_OFFSET(lastUsed) : NIL;
|
||||
blk->next = (end + bruttoSize) >> 1;
|
||||
|
||||
lastUsed = (Header*)((char*)pool + end);
|
||||
VALGRIND_MAKE_MEM_UNDEFINED(lastUsed, (char*)blk - (char*)lastUsed);
|
||||
memmove(lastUsed, blk, bruttoSize);
|
||||
VALGRIND_MEMPOOL_CHANGE(pool, blk + 1, lastUsed + 1, bruttoSize - sizeof(Header));
|
||||
|
||||
end += bruttoSize;
|
||||
blk = next;
|
||||
}
|
||||
else
|
||||
blk = HDR_PTR(blk->next);
|
||||
}
|
||||
|
||||
// blk is now the last block (the dummy "used" block at the end of the pool)
|
||||
internal_assert(isNil(blk->next));
|
||||
internal_assert(!isFree(blk));
|
||||
|
||||
if (lastUsed)
|
||||
{
|
||||
Header * gap = HDR_PTR(lastUsed->next);
|
||||
if (gap == blk)
|
||||
{ // there is no gap
|
||||
pool->prev = NIL; // the free list is empty
|
||||
blk->prev = HDR_OFFSET(lastUsed);
|
||||
}
|
||||
else if (blk - gap > 1)
|
||||
{ // the gap is big enouogh for a free Header
|
||||
|
||||
// set a free list that contains the gap as only element
|
||||
gap->prev = HDR_OFFSET(lastUsed) | 1;
|
||||
gap->next = HDR_OFFSET(blk);
|
||||
gap[1].prev = NIL;
|
||||
gap[1].next = NIL;
|
||||
pool->prev = blk->prev = HDR_OFFSET(gap);
|
||||
}
|
||||
else
|
||||
{ // there is a gap, but it is too small to be used as free-list-node, so just make it padding of the last used block
|
||||
lastUsed->next = HDR_OFFSET(blk) | 1;
|
||||
pool->prev = NIL;
|
||||
blk->prev = HDR_OFFSET(lastUsed);
|
||||
}
|
||||
}
|
||||
else
|
||||
{ // the pool is empty
|
||||
pool->prev = 1;
|
||||
}
|
||||
|
||||
internal_assert(!_yalloc_defrag_in_progress(pool));
|
||||
_yalloc_validate(pool);
|
||||
_protect_pool(pool);
|
||||
}
|
|
@ -1,176 +0,0 @@
|
|||
/**
|
||||
@file
|
||||
|
||||
API of the yalloc allocator.
|
||||
*/
|
||||
|
||||
#ifndef YALLOC_H
|
||||
#define YALLOC_H
|
||||
|
||||
#include <stddef.h>
|
||||
|
||||
/**
|
||||
Maximum supported pool size. yalloc_init() will fail for larger pools.
|
||||
*/
|
||||
#define MAX_POOL_SIZE ((2 << 24) - 4)
|
||||
|
||||
/**
|
||||
Creates a pool inside a given buffer.
|
||||
|
||||
Pools must be deinitialized with yalloc_deinit() when they are no longer needed.
|
||||
|
||||
@param pool The starting address of the pool. It must have at least 16bit
|
||||
alignment (internal structure uses 16bit integers). Allocations are placed at
|
||||
32bit boundaries starting from this address, so if the user data should be
|
||||
32bit aligned then this address has to be 32bit aligned. Typically an address
|
||||
of static memory, or an array on the stack is used if the pool is only used
|
||||
temporarily.
|
||||
@param size Size of the pool.
|
||||
@return 0 on success, nonzero if the size is not supported.
|
||||
*/
|
||||
int yalloc_init(void * pool, size_t size);
|
||||
|
||||
/**
|
||||
Deinitializes the buffer that is used by the pool and makes it available for other use.
|
||||
|
||||
The content of the buffer is undefined after this.
|
||||
|
||||
@param pool The starting address of an initialized pool.
|
||||
*/
|
||||
void yalloc_deinit(void * pool);
|
||||
|
||||
/**
|
||||
Allocates a block of memory from a pool.
|
||||
|
||||
This function mimics malloc().
|
||||
|
||||
The pool must not be in the "defragmenting" state when this function is called.
|
||||
|
||||
@param pool The starting address of an initialized pool.
|
||||
@param size Number of bytes to allocate.
|
||||
@return Allocated buffer or \c NULL if there was no free range that could serve
|
||||
the allocation. See @ref yalloc_defrag_start() for a way to remove
|
||||
fragmentation which may cause allocations to fail even when there is enough
|
||||
space in total.
|
||||
*/
|
||||
void * yalloc_alloc(void * pool, size_t size);
|
||||
|
||||
/**
|
||||
Returns an allocation to a pool.
|
||||
|
||||
This function mimics free().
|
||||
|
||||
The pool must not be in the "defragmenting" state when this function is called.
|
||||
|
||||
@param pool The starting address of the initialized pool the allocation comes from.
|
||||
@param p An address that was returned from yalloc_alloc() of the same pool.
|
||||
*/
|
||||
void yalloc_free(void * pool, void * p);
|
||||
|
||||
/**
|
||||
Returns the maximum size of a successful allocation (assuming a completely unfragmented heap).
|
||||
|
||||
After defragmentation the first allocation with the returned size is guaranteed to succeed.
|
||||
|
||||
@param pool The starting address of an initialized pool.
|
||||
@return Number of bytes that can be allocated (assuming the pool is defragmented).
|
||||
*/
|
||||
size_t yalloc_count_free(void * pool);
|
||||
|
||||
/**
|
||||
Returns the maximum continuous free area.
|
||||
|
||||
@param pool The starting address of an initialized pool.
|
||||
@return Number of free bytes that exist continuously.
|
||||
*/
|
||||
size_t yalloc_count_continuous(void * pool_);
|
||||
|
||||
/**
|
||||
Queries the usable size of an allocated block.
|
||||
|
||||
@param pool The starting address of the initialized pool the allocation comes from.
|
||||
@param p An address that was returned from yalloc_alloc() of the same pool.
|
||||
@return Size of the memory block. This is the size passed to @ref yalloc_alloc() rounded up to 4.
|
||||
*/
|
||||
size_t yalloc_block_size(void * pool, void * p);
|
||||
|
||||
/**
|
||||
Finds the first (in address order) allocation of a pool.
|
||||
|
||||
@param pool The starting address of an initialized pool.
|
||||
@return Address of the allocation the lowest address inside the pool (this is
|
||||
what @ref yalloc_alloc() returned), or \c NULL if there is no used block.
|
||||
*/
|
||||
void * yalloc_first_used(void * pool);
|
||||
|
||||
/**
|
||||
Given a pointer to an allocation finds the next (in address order) used block of a pool.
|
||||
|
||||
@param pool The starting address of the initialized pool the allocation comes from.
|
||||
@param p Pointer to an allocation in that pool, typically comes from a previous
|
||||
call to @ref yalloc_first_used()
|
||||
*/
|
||||
void * yalloc_next_used(void * pool, void * p);
|
||||
|
||||
/**
|
||||
Starts defragmentation for a pool.
|
||||
|
||||
Allocations will stay where they are. But the pool is put in the "defagmenting"
|
||||
state (see @ref yalloc_defrag_in_progress()).
|
||||
|
||||
The pool must not be in the "defragmenting" state when this function is called.
|
||||
The pool is put into the "defragmenting" state by this function.
|
||||
|
||||
@param pool The starting address of an initialized pool.
|
||||
*/
|
||||
void yalloc_defrag_start(void * pool);
|
||||
|
||||
/**
|
||||
Returns the address that an allocation will have after @ref yalloc_defrag_commit() is called.
|
||||
|
||||
The pool must be in the "defragmenting" state when this function is called.
|
||||
|
||||
@param pool The starting address of the initialized pool the allocation comes from.
|
||||
@param p Pointer to an allocation in that pool.
|
||||
@return The address the alloation will have after @ref yalloc_defrag_commit() is called.
|
||||
*/
|
||||
void * yalloc_defrag_address(void * pool, void * p);
|
||||
|
||||
/**
|
||||
Finishes the defragmentation.
|
||||
|
||||
The content of all allocations in the pool will be moved to the address that
|
||||
was reported by @ref yalloc_defrag_address(). The pool will then have only one
|
||||
free block. This means that an <tt>yalloc_alloc(pool, yalloc_count_free(pool))</tt>
|
||||
will succeed.
|
||||
|
||||
The pool must be in the "defragmenting" state when this function is called. The
|
||||
pool is put back to normal state by this function.
|
||||
|
||||
@param pool The starting address of an initialized pool.
|
||||
*/
|
||||
void yalloc_defrag_commit(void * pool);
|
||||
|
||||
/**
|
||||
Tells if the pool is in the "defragmenting" state (after a @ref yalloc_defrag_start() and before a @ref yalloc_defrag_commit()).
|
||||
|
||||
@param pool The starting address of an initialized pool.
|
||||
@return Nonzero if the pool is currently in the "defragmenting" state.
|
||||
*/
|
||||
int yalloc_defrag_in_progress(void * pool);
|
||||
|
||||
|
||||
/**
|
||||
Helper function that dumps the state of the pool to stdout.
|
||||
|
||||
This function is only available if build with <tt>yalloc_dump.c</tt>. This
|
||||
function only exists for debugging purposes and can be ignored by normal users
|
||||
that are not interested in the internal structure of the implementation.
|
||||
|
||||
@param pool The starting address of an initialized pool.
|
||||
@param name A string that is used as "Title" for the output.
|
||||
*/
|
||||
void yalloc_dump(void * pool, char * name);
|
||||
|
||||
|
||||
#endif // YALLOC_H
|
|
@ -1,39 +0,0 @@
|
|||
#include "yalloc_internals.h"
|
||||
|
||||
#include <stdio.h>
|
||||
|
||||
static void printOffset(void * pool, char * name, uint16_t offset)
|
||||
{
|
||||
if (isNil(offset))
|
||||
printf(" %s: nil\n", name);
|
||||
else
|
||||
printf(" %s: %td\n", name, (char*)HDR_PTR(offset) - (char*)pool);
|
||||
}
|
||||
|
||||
void yalloc_dump(void * pool, char * name)
|
||||
{
|
||||
printf("---- %s ----\n", name);
|
||||
Header * cur = (Header*)pool;
|
||||
for (;;)
|
||||
{
|
||||
printf(isFree(cur) ? "%td: free @%p\n" : "%td: used @%p\n", (char*)cur - (char*)pool, cur);
|
||||
printOffset(pool, cur == pool ? "first free" : "prev", cur->prev);
|
||||
printOffset(pool, "next", cur->next);
|
||||
if (isFree(cur))
|
||||
{
|
||||
printOffset(pool, "prevFree", cur[1].prev);
|
||||
printOffset(pool, "nextFree", cur[1].next);
|
||||
}
|
||||
else
|
||||
printf(" payload includes padding: %i\n", isPadded(cur));
|
||||
|
||||
if (isNil(cur->next))
|
||||
break;
|
||||
|
||||
printf(" %td bytes payload\n", (char*)HDR_PTR(cur->next) - (char*)cur - sizeof(Header));
|
||||
|
||||
cur = HDR_PTR(cur->next);
|
||||
}
|
||||
|
||||
fflush(stdout);
|
||||
}
|
|
@ -1,63 +0,0 @@
|
|||
#ifndef YALLOC_INTERNALS_H
|
||||
#define YALLOC_INTERNALS_H
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
typedef struct
|
||||
{
|
||||
uint32_t prev; // low bit set if free
|
||||
uint32_t next; // for used blocks: low bit set if unused header at the end
|
||||
|
||||
/* We need user data to be 32-byte aligned, so the header needs
|
||||
* to be 32 bytes in size (as user data follows the header) */
|
||||
uint8_t padding[32 - (sizeof(uint32_t) * 2)];
|
||||
} Header;
|
||||
|
||||
// NOTE: We have 32bit aligned data and 16bit offsets where the lowest bit is used as flag. So we remove the low bit and shift by 1 to address 128k bytes with the 15bit significant offset bits.
|
||||
|
||||
#define NIL 0xFFFFFFFEu
|
||||
|
||||
// return Header-address for a prev/next
|
||||
#define HDR_PTR(offset) ((Header*)((char*)pool + (((offset) & NIL)<<1)))
|
||||
|
||||
// return a prev/next for a Header-address
|
||||
#define HDR_OFFSET(blockPtr) ((uint32_t)(((char*)blockPtr - (char*)pool) >> 1))
|
||||
|
||||
#ifndef YALLOC_INTERNAL_VALIDATE
|
||||
# ifdef NDEBUG
|
||||
# define YALLOC_INTERNAL_VALIDATE 0
|
||||
# else
|
||||
# define YALLOC_INTERNAL_VALIDATE 1
|
||||
#endif
|
||||
#endif
|
||||
|
||||
|
||||
/*
|
||||
internal_assert() is used in some places to check internal expections.
|
||||
Activate this if you modify the code to detect problems as early as possible.
|
||||
In other cases this should be deactivated.
|
||||
*/
|
||||
#if 0
|
||||
#define internal_assert assert
|
||||
#else
|
||||
#define internal_assert(condition)((void) 0)
|
||||
#endif
|
||||
|
||||
// detects offsets that point nowhere
|
||||
static inline int isNil(uint32_t offset)
|
||||
{
|
||||
return (offset | 1) == 0xFFFFFFFF;
|
||||
}
|
||||
|
||||
static inline int isFree(Header * hdr)
|
||||
{
|
||||
return hdr->prev & 1;
|
||||
}
|
||||
|
||||
static inline int isPadded(Header * hdr)
|
||||
{
|
||||
return hdr->next & 1;
|
||||
}
|
||||
|
||||
|
||||
#endif // YALLOC_INTERNALS_H
|
|
@ -32,7 +32,7 @@ GLdc uses CMake for its build system, it currently ships with two "backends":
|
|||
- kospvr - This is the hardware-accelerated Dreamcast backend
|
||||
- software - This is a stub software rasterizer used for testing testing and debugging
|
||||
|
||||
To compile for Dreamcast, you'll want to do something like the following:
|
||||
To compile a Dreamcast debug build, you'll want to do something like the following:
|
||||
|
||||
```
|
||||
mkdir dcbuild
|
||||
|
@ -41,6 +41,11 @@ cmake -DCMAKE_TOOLCHAIN_FILE=../toolchains/Dreamcast.cmake -G "Unix Makefiles" .
|
|||
make
|
||||
```
|
||||
|
||||
For a release build, replace the cmake line with with the following:
|
||||
```
|
||||
cmake -DCMAKE_TOOLCHAIN_FILE=../toolchains/Dreamcast.cmake -G "Unix Makefiles" -DCMAKE_BUILD_TYPE=Release ..
|
||||
```
|
||||
|
||||
You will need KallistiOS compiled and configured (e.g. the KOS_BASE environment
|
||||
variable must be set)
|
||||
|
||||
|
|
|
@ -12,36 +12,45 @@
|
|||
|
||||
#include "aligned_vector.h"
|
||||
|
||||
extern inline void* aligned_vector_resize(AlignedVector* vector, const unsigned int element_count);
|
||||
extern inline void* aligned_vector_extend(AlignedVector* vector, const unsigned int additional_count);
|
||||
extern inline void* aligned_vector_reserve(AlignedVector* vector, unsigned int element_count);
|
||||
extern inline void* aligned_vector_push_back(AlignedVector* vector, const void* objs, unsigned int count);
|
||||
extern inline void* aligned_vector_resize(AlignedVector* vector, const uint32_t element_count);
|
||||
extern inline void* aligned_vector_extend(AlignedVector* vector, const uint32_t additional_count);
|
||||
extern inline void* aligned_vector_reserve(AlignedVector* vector, uint32_t element_count);
|
||||
extern inline void* aligned_vector_push_back(AlignedVector* vector, const void* objs, uint32_t count);
|
||||
|
||||
void aligned_vector_init(AlignedVector* vector, unsigned int element_size) {
|
||||
vector->size = vector->capacity = 0;
|
||||
vector->element_size = element_size;
|
||||
void aligned_vector_init(AlignedVector* vector, uint32_t element_size) {
|
||||
/* Now initialize the header*/
|
||||
AlignedVectorHeader* const hdr = &vector->hdr;
|
||||
hdr->size = 0;
|
||||
hdr->capacity = ALIGNED_VECTOR_CHUNK_SIZE;
|
||||
hdr->element_size = element_size;
|
||||
vector->data = NULL;
|
||||
|
||||
/* Reserve some initial capacity */
|
||||
aligned_vector_reserve(vector, ALIGNED_VECTOR_CHUNK_SIZE);
|
||||
/* Reserve some initial capacity. This will do the allocation but not set up the header */
|
||||
void* ptr = aligned_vector_reserve(vector, ALIGNED_VECTOR_CHUNK_SIZE);
|
||||
assert(ptr);
|
||||
(void) ptr;
|
||||
}
|
||||
|
||||
void aligned_vector_shrink_to_fit(AlignedVector* vector) {
|
||||
if(vector->size == 0) {
|
||||
AlignedVectorHeader* const hdr = &vector->hdr;
|
||||
if(hdr->size == 0) {
|
||||
uint32_t element_size = hdr->element_size;
|
||||
free(vector->data);
|
||||
|
||||
/* Reallocate the header */
|
||||
vector->data = NULL;
|
||||
vector->capacity = 0;
|
||||
hdr->size = hdr->capacity = 0;
|
||||
hdr->element_size = element_size;
|
||||
} else {
|
||||
unsigned int new_byte_size = vector->size * vector->element_size;
|
||||
unsigned char* original_data = vector->data;
|
||||
uint32_t new_byte_size = (hdr->size * hdr->element_size);
|
||||
uint8_t* original_data = vector->data;
|
||||
vector->data = (unsigned char*) memalign(0x20, new_byte_size);
|
||||
|
||||
if(original_data) {
|
||||
FASTCPY(vector->data, original_data, new_byte_size);
|
||||
free(original_data);
|
||||
}
|
||||
|
||||
vector->capacity = vector->size;
|
||||
hdr->capacity = hdr->size;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -4,6 +4,7 @@
|
|||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <stdint.h>
|
||||
#include <stdio.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
|
@ -12,6 +13,7 @@ extern "C" {
|
|||
#if defined(__APPLE__) || defined(__WIN32__)
|
||||
/* Linux + Kos define this, OSX does not, so just use malloc there */
|
||||
static inline void* memalign(size_t alignment, size_t size) {
|
||||
(void) alignment;
|
||||
return malloc(size);
|
||||
}
|
||||
#else
|
||||
|
@ -65,10 +67,14 @@ AV_FORCE_INLINE void *AV_MEMCPY4(void *dest, const void *src, size_t len)
|
|||
#endif
|
||||
|
||||
typedef struct {
|
||||
uint8_t* __attribute__((aligned(32))) data;
|
||||
uint32_t size;
|
||||
uint32_t capacity;
|
||||
uint32_t element_size;
|
||||
} __attribute__((aligned(32))) AlignedVectorHeader;
|
||||
|
||||
typedef struct {
|
||||
AlignedVectorHeader hdr;
|
||||
uint8_t* data;
|
||||
} AlignedVector;
|
||||
|
||||
#define ALIGNED_VECTOR_CHUNK_SIZE 256u
|
||||
|
@ -78,90 +84,137 @@ typedef struct {
|
|||
((((v) + ALIGNED_VECTOR_CHUNK_SIZE - 1) / ALIGNED_VECTOR_CHUNK_SIZE) * ALIGNED_VECTOR_CHUNK_SIZE)
|
||||
|
||||
|
||||
void aligned_vector_init(AlignedVector* vector, unsigned int element_size);
|
||||
void aligned_vector_init(AlignedVector* vector, uint32_t element_size);
|
||||
|
||||
AV_FORCE_INLINE void* aligned_vector_reserve(AlignedVector* vector, unsigned int element_count) {
|
||||
if(element_count <= vector->capacity) {
|
||||
return NULL;
|
||||
AV_FORCE_INLINE void* aligned_vector_at(const AlignedVector* vector, const uint32_t index) {
|
||||
const AlignedVectorHeader* hdr = &vector->hdr;
|
||||
assert(index < hdr->size);
|
||||
return vector->data + (index * hdr->element_size);
|
||||
}
|
||||
|
||||
AV_FORCE_INLINE void* aligned_vector_reserve(AlignedVector* vector, uint32_t element_count) {
|
||||
AlignedVectorHeader* hdr = &vector->hdr;
|
||||
|
||||
if(element_count < hdr->capacity) {
|
||||
return aligned_vector_at(vector, element_count);
|
||||
}
|
||||
|
||||
unsigned int original_byte_size = vector->size * vector->element_size;
|
||||
uint32_t original_byte_size = (hdr->size * hdr->element_size);
|
||||
|
||||
/* We overallocate so that we don't make small allocations during push backs */
|
||||
element_count = ROUND_TO_CHUNK_SIZE(element_count);
|
||||
|
||||
unsigned int new_byte_size = element_count * vector->element_size;
|
||||
unsigned char* original_data = vector->data;
|
||||
uint32_t new_byte_size = (element_count * hdr->element_size);
|
||||
uint8_t* original_data = vector->data;
|
||||
|
||||
vector->data = (unsigned char*) memalign(0x20, new_byte_size);
|
||||
vector->data = (uint8_t*) memalign(0x20, new_byte_size);
|
||||
assert(vector->data);
|
||||
|
||||
if(original_data) {
|
||||
AV_MEMCPY4(vector->data, original_data, original_byte_size);
|
||||
free(original_data);
|
||||
}
|
||||
|
||||
vector->capacity = element_count;
|
||||
AV_MEMCPY4(vector->data, original_data, original_byte_size);
|
||||
free(original_data);
|
||||
|
||||
hdr->capacity = element_count;
|
||||
return vector->data + original_byte_size;
|
||||
}
|
||||
|
||||
AV_FORCE_INLINE void* aligned_vector_at(const AlignedVector* vector, const unsigned int index) {
|
||||
assert(index < vector->size);
|
||||
return &vector->data[index * vector->element_size];
|
||||
AV_FORCE_INLINE AlignedVectorHeader* aligned_vector_header(const AlignedVector* vector) {
|
||||
return (AlignedVectorHeader*) &vector->hdr;
|
||||
}
|
||||
|
||||
AV_FORCE_INLINE void* aligned_vector_resize(AlignedVector* vector, const unsigned int element_count) {
|
||||
AV_FORCE_INLINE uint32_t aligned_vector_size(const AlignedVector* vector) {
|
||||
const AlignedVectorHeader* hdr = &vector->hdr;
|
||||
return hdr->size;
|
||||
}
|
||||
|
||||
AV_FORCE_INLINE uint32_t aligned_vector_capacity(const AlignedVector* vector) {
|
||||
const AlignedVectorHeader* hdr = &vector->hdr;
|
||||
return hdr->capacity;
|
||||
}
|
||||
|
||||
AV_FORCE_INLINE void* aligned_vector_front(const AlignedVector* vector) {
|
||||
return vector->data;
|
||||
}
|
||||
|
||||
#define av_assert(x) \
|
||||
do {\
|
||||
if(!(x)) {\
|
||||
fprintf(stderr, "Assertion failed at %s:%d\n", __FILE__, __LINE__);\
|
||||
exit(1);\
|
||||
}\
|
||||
} while(0); \
|
||||
|
||||
/* Resizes the array and returns a pointer to the first new element (if upsizing) or NULL (if downsizing) */
|
||||
AV_FORCE_INLINE void* aligned_vector_resize(AlignedVector* vector, const uint32_t element_count) {
|
||||
void* ret = NULL;
|
||||
|
||||
unsigned int previousCount = vector->size;
|
||||
|
||||
if(vector->capacity < element_count) {
|
||||
AlignedVectorHeader* hdr = &vector->hdr;
|
||||
uint32_t previous_count = hdr->size;
|
||||
if(hdr->capacity <= element_count) {
|
||||
/* If we didn't have capacity, increase capacity (slow) */
|
||||
vector->size = element_count;
|
||||
ret = aligned_vector_reserve(vector, element_count);
|
||||
} else if(previousCount < element_count) {
|
||||
|
||||
aligned_vector_reserve(vector, element_count);
|
||||
hdr->size = element_count;
|
||||
|
||||
ret = aligned_vector_at(vector, previous_count);
|
||||
|
||||
av_assert(hdr->size == element_count);
|
||||
av_assert(hdr->size <= hdr->capacity);
|
||||
} else if(previous_count < element_count) {
|
||||
/* So we grew, but had the capacity, just get a pointer to
|
||||
* where we were */
|
||||
vector->size = element_count;
|
||||
ret = aligned_vector_at(vector, previousCount);
|
||||
} else {
|
||||
vector->size = element_count;
|
||||
hdr->size = element_count;
|
||||
av_assert(hdr->size < hdr->capacity);
|
||||
ret = aligned_vector_at(vector, previous_count);
|
||||
} else if(hdr->size != element_count) {
|
||||
hdr->size = element_count;
|
||||
av_assert(hdr->size < hdr->capacity);
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
AV_FORCE_INLINE void* aligned_vector_push_back(AlignedVector* vector, const void* objs, unsigned int count) {
|
||||
AV_FORCE_INLINE void* aligned_vector_push_back(AlignedVector* vector, const void* objs, uint32_t count) {
|
||||
/* Resize enough room */
|
||||
AlignedVectorHeader* hdr = &vector->hdr;
|
||||
|
||||
assert(count);
|
||||
assert(vector->element_size);
|
||||
assert(hdr->element_size);
|
||||
|
||||
unsigned int initial_size = vector->size;
|
||||
aligned_vector_resize(vector, vector->size + count);
|
||||
#ifndef NDEBUG
|
||||
uint32_t element_size = hdr->element_size;
|
||||
uint32_t initial_size = hdr->size;
|
||||
#endif
|
||||
|
||||
assert(vector->size == initial_size + count);
|
||||
|
||||
unsigned char* dest = vector->data + (vector->element_size * initial_size);
|
||||
uint8_t* dest = (uint8_t*) aligned_vector_resize(vector, hdr->size + count);
|
||||
assert(dest);
|
||||
|
||||
/* Copy the objects in */
|
||||
AV_MEMCPY4(dest, objs, vector->element_size * count);
|
||||
AV_MEMCPY4(dest, objs, hdr->element_size * count);
|
||||
|
||||
assert(hdr->element_size == element_size);
|
||||
assert(hdr->size == initial_size + count);
|
||||
return dest;
|
||||
}
|
||||
|
||||
|
||||
AV_FORCE_INLINE void* aligned_vector_extend(AlignedVector* vector, const unsigned int additional_count) {
|
||||
return aligned_vector_resize(vector, vector->size + additional_count);
|
||||
AV_FORCE_INLINE void* aligned_vector_extend(AlignedVector* vector, const uint32_t additional_count) {
|
||||
AlignedVectorHeader* hdr = &vector->hdr;
|
||||
void* ret = aligned_vector_resize(vector, hdr->size + additional_count);
|
||||
assert(ret); // Should always return something
|
||||
return ret;
|
||||
}
|
||||
|
||||
AV_FORCE_INLINE void aligned_vector_clear(AlignedVector* vector){
|
||||
vector->size = 0;
|
||||
AlignedVectorHeader* hdr = &vector->hdr;
|
||||
hdr->size = 0;
|
||||
}
|
||||
|
||||
void aligned_vector_shrink_to_fit(AlignedVector* vector);
|
||||
void aligned_vector_cleanup(AlignedVector* vector);
|
||||
static inline void* aligned_vector_back(AlignedVector* vector){
|
||||
return aligned_vector_at(vector, vector->size - 1);
|
||||
|
||||
AV_FORCE_INLINE void* aligned_vector_back(AlignedVector* vector){
|
||||
AlignedVectorHeader* hdr = &vector->hdr;
|
||||
return aligned_vector_at(vector, hdr->size ? hdr->size - 1 : 0);
|
||||
}
|
||||
|
||||
#ifdef __cplusplus
|
||||
|
|
|
@ -68,7 +68,6 @@ void* named_array_reserve(NamedArray* array, unsigned int id) {
|
|||
void named_array_release(NamedArray* array, unsigned int new_id) {
|
||||
unsigned int i = new_id / 8;
|
||||
unsigned int j = new_id % 8;
|
||||
|
||||
array->used_markers[i] &= (unsigned char) ~(1 << j);
|
||||
}
|
||||
|
||||
|
|
|
@ -19,6 +19,10 @@ __BEGIN_DECLS
|
|||
|
||||
#include <math.h>
|
||||
|
||||
#if __STDCPP_FLOAT16_T__
|
||||
#include <stdfloat>
|
||||
#endif
|
||||
|
||||
/* Primitive Types taken from GL for compatability */
|
||||
/* Not all types are implemented in Open GL DC V.1.0 */
|
||||
#define GL_POINTS 0x0000
|
||||
|
@ -305,12 +309,13 @@ __BEGIN_DECLS
|
|||
#define GL_UNSIGNED_INT 0x1405
|
||||
#define GL_FLOAT 0x1406
|
||||
#define GL_DOUBLE 0x140A
|
||||
#define GL_HALF_FLOAT 0x140B
|
||||
#define GL_2_BYTES 0x1407
|
||||
#define GL_3_BYTES 0x1408
|
||||
#define GL_4_BYTES 0x1409
|
||||
|
||||
/* ErrorCode */
|
||||
#define GL_NO_ERROR 0
|
||||
#define GL_NO_ERROR ((GLenum) 0)
|
||||
#define GL_INVALID_ENUM 0x0500
|
||||
#define GL_INVALID_VALUE 0x0501
|
||||
#define GL_INVALID_OPERATION 0x0502
|
||||
|
@ -359,7 +364,7 @@ __BEGIN_DECLS
|
|||
#define GL_UNSIGNED_SHORT_5_6_5_REV 0x8364
|
||||
#define GL_UNSIGNED_SHORT_4_4_4_4_REV 0x8365
|
||||
#define GL_UNSIGNED_SHORT_1_5_5_5_REV 0x8366
|
||||
|
||||
#define GL_UNSIGNED_INT_8_8_8_8_REV 0x8367
|
||||
#define GL_UNSIGNED_INT_2_10_10_10_REV 0x8368
|
||||
|
||||
#define GL_COLOR_INDEX 0x1900
|
||||
|
@ -371,6 +376,32 @@ __BEGIN_DECLS
|
|||
#define GL_RGBA 0x1908
|
||||
#define GL_LUMINANCE 0x1909
|
||||
#define GL_LUMINANCE_ALPHA 0x190A
|
||||
|
||||
#define GL_R3_G3_B2 0x2A10
|
||||
|
||||
#define GL_ALPHA4 0x803B
|
||||
#define GL_ALPHA8 0x803C
|
||||
#define GL_ALPHA12 0x803D
|
||||
#define GL_ALPHA16 0x803E
|
||||
|
||||
#define GL_LUMINANCE4 0x803F
|
||||
#define GL_LUMINANCE8 0x8040
|
||||
#define GL_LUMINANCE12 0x8041
|
||||
#define GL_LUMINANCE16 0x8042
|
||||
|
||||
#define GL_LUMINANCE4_ALPHA4 0x8043
|
||||
#define GL_LUMINANCE6_ALPHA2 0x8044
|
||||
#define GL_LUMINANCE8_ALPHA8 0x8045
|
||||
#define GL_LUMINANCE12_ALPHA4 0x8046
|
||||
#define GL_LUMINANCE12_ALPHA12 0x8047
|
||||
#define GL_LUMINANCE16_ALPHA16 0x8048
|
||||
|
||||
#define GL_INTENSITY4 0x804A
|
||||
#define GL_INTENSITY8 0x804B
|
||||
#define GL_INTENSITY12 0x804C
|
||||
#define GL_INTENSITY16 0x804D
|
||||
|
||||
#define GL_BGR 0x80E0
|
||||
#define GL_BGRA 0x80E1
|
||||
#define GL_INTENSITY 0x8049
|
||||
#define GL_RGB4 0x804F
|
||||
|
@ -387,6 +418,14 @@ __BEGIN_DECLS
|
|||
#define GL_RGBA12 0x805A
|
||||
#define GL_RGBA16 0x805B
|
||||
|
||||
#define GL_R8 0x8229
|
||||
#define GL_RG8 0x822B
|
||||
#define GL_RG 0x8227
|
||||
#define GL_R16 0x822A
|
||||
#define GL_RG16 0x822C
|
||||
#define GL_COMPRESSED_RED 0x8225
|
||||
#define GL_COMPRESSED_RG 0x8226
|
||||
|
||||
/* Polygons */
|
||||
#define GL_POINT 0x1B00
|
||||
#define GL_LINE 0x1B01
|
||||
|
@ -427,6 +466,12 @@ __BEGIN_DECLS
|
|||
#define GL_FALSE 0
|
||||
#define GL_TRUE 1
|
||||
|
||||
#if __STDCPP_FLOAT16_T__
|
||||
#define GLhalf std::float16_t
|
||||
#else
|
||||
#define GLhalf unsigned short
|
||||
#endif
|
||||
|
||||
/* Stubs for portability */
|
||||
#define GL_LINE_SMOOTH 0x0B20
|
||||
#define GL_ALPHA_TEST 0x0BC0
|
||||
|
|
|
@ -130,7 +130,7 @@ GLAPI void APIENTRY glGenFramebuffersEXT(GLsizei n, GLuint* framebuffers);
|
|||
GLAPI void APIENTRY glDeleteFramebuffersEXT(GLsizei n, const GLuint* framebuffers);
|
||||
GLAPI void APIENTRY glBindFramebufferEXT(GLenum target, GLuint framebuffer);
|
||||
GLAPI void APIENTRY glFramebufferTexture2DEXT(GLenum target, GLenum attachment, GLenum textarget, GLuint texture, GLint level);
|
||||
GLAPI void APIENTRY glGenerateMipmapEXT(GLenum target);
|
||||
GLAPI void APIENTRY glGenerateMipmap(GLenum target);
|
||||
GLAPI GLenum APIENTRY glCheckFramebufferStatusEXT(GLenum target);
|
||||
GLAPI GLboolean APIENTRY glIsFramebufferEXT(GLuint framebuffer);
|
||||
|
||||
|
@ -203,7 +203,7 @@ GLAPI void APIENTRY glCompressedTexImage2DARB(GLenum target,
|
|||
#define glClientActiveTexture glClientActiveTextureARB
|
||||
#define glMultiTexCoord2f glMultiTexCoord2fARB
|
||||
|
||||
#define glGenerateMipmap glGenerateMipmapEXT
|
||||
#define glGenerateMipmapEXT glGenerateMipmap
|
||||
#define glCompressedTexImage2D glCompressedTexImage2DARB
|
||||
|
||||
#ifndef GL_VERSION_1_4
|
||||
|
|
|
@ -35,8 +35,6 @@ extern const char* GLDC_VERSION;
|
|||
|
||||
#define GL_NEARZ_CLIPPING_KOS 0xEEFA
|
||||
|
||||
#define GL_UNSIGNED_BYTE_TWID_KOS 0xEEFB
|
||||
|
||||
|
||||
/* Initialize the GL pipeline. GL will initialize the PVR. */
|
||||
GLAPI void APIENTRY glKosInit();
|
||||
|
@ -57,6 +55,13 @@ typedef struct {
|
|||
GLuint initial_pt_capacity;
|
||||
GLuint initial_immediate_capacity;
|
||||
|
||||
/* Default: True
|
||||
*
|
||||
* Whether glTexImage should automatically twiddle textures
|
||||
* if the internal format is a generic format (e.g. GL_RGB).
|
||||
* this is the same as calling glEnable(GL_TEXTURE_TWIDDLE_KOS)
|
||||
* on boot */
|
||||
GLboolean texture_twiddle;
|
||||
} GLdcConfig;
|
||||
|
||||
|
||||
|
@ -87,7 +92,7 @@ GLAPI void APIENTRY glKosInitConfig(GLdcConfig* config);
|
|||
*/
|
||||
GLAPI void APIENTRY glKosInitEx(GLdcConfig* config);
|
||||
GLAPI void APIENTRY glKosSwapBuffers();
|
||||
|
||||
GLAPI void APIENTRY glKosShutdown();
|
||||
|
||||
/*
|
||||
* CUSTOM EXTENSION multiple_shared_palette_KOS
|
||||
|
@ -186,12 +191,28 @@ GLAPI void APIENTRY glKosSwapBuffers();
|
|||
/* Memory allocation extension (GL_KOS_texture_memory_management) */
|
||||
GLAPI GLvoid APIENTRY glDefragmentTextureMemory_KOS(void);
|
||||
|
||||
/* glGet extensions */
|
||||
#define GL_FREE_TEXTURE_MEMORY_KOS 0xEF3D
|
||||
#define GL_USED_TEXTURE_MEMORY_KOS 0xEF3E
|
||||
#define GL_FREE_CONTIGUOUS_TEXTURE_MEMORY_KOS 0xEF3F
|
||||
|
||||
//for palette internal format (glfcConfig)
|
||||
#define GL_RGB565_KOS 0xEF40
|
||||
#define GL_ARGB4444_KOS 0xEF41
|
||||
#define GL_ARGB1555_KOS 0xEF42
|
||||
#define GL_RGB565_TWID_KOS 0xEF43
|
||||
#define GL_ARGB4444_TWID_KOS 0xEF44
|
||||
#define GL_ARGB1555_TWID_KOS 0xEF45
|
||||
#define GL_COLOR_INDEX8_TWID_KOS 0xEF46
|
||||
#define GL_COLOR_INDEX4_TWID_KOS 0xEF47
|
||||
#define GL_RGB_TWID_KOS 0xEF48
|
||||
#define GL_RGBA_TWID_KOS 0xEF49
|
||||
|
||||
/* glGet extensions */
|
||||
#define GL_TEXTURE_INTERNAL_FORMAT_KOS 0xEF50
|
||||
|
||||
/* If enabled, will twiddle texture uploads where possible */
|
||||
#define GL_TEXTURE_TWIDDLE_KOS 0xEF51
|
||||
|
||||
__END_DECLS
|
||||
|
||||
|
|
446
samples/cubes/main.cpp
Normal file
446
samples/cubes/main.cpp
Normal file
|
@ -0,0 +1,446 @@
|
|||
|
||||
#include <cstdio>
|
||||
#include <stdbool.h>
|
||||
#include <stdlib.h>
|
||||
#include <time.h>
|
||||
|
||||
#ifdef __DREAMCAST__
|
||||
#include <kos.h>
|
||||
float avgfps = -1;
|
||||
#endif
|
||||
|
||||
#include "GL/gl.h"
|
||||
#include "GL/glkos.h"
|
||||
#include "GL/glu.h"
|
||||
#include "GL/glext.h"
|
||||
|
||||
#define PI 3.14159265358979323846264338327950288f
|
||||
#define RAD_TO_DEG 57.295779513082320876798154814105f
|
||||
#define MAX_CUBES 350
|
||||
|
||||
float timeElapsed = 0.0f;
|
||||
const float dt = 1.0f / 60.0f;
|
||||
|
||||
float angle = 0;
|
||||
const float invAngle360 = 1.0f / 360.0f;
|
||||
const float cameraDistance = 3.0f;
|
||||
|
||||
bool isDrawingArrays = false;
|
||||
bool isBlendingEnabled = true;
|
||||
bool isRunning = true;
|
||||
|
||||
typedef struct
|
||||
{
|
||||
GLubyte r;
|
||||
GLubyte g;
|
||||
GLubyte b;
|
||||
GLubyte a;
|
||||
} Color;
|
||||
|
||||
Color colors[] =
|
||||
{
|
||||
{255, 0, 0, 128},
|
||||
{0, 255, 0, 128},
|
||||
{0, 0, 255, 128},
|
||||
{255, 255, 0, 128},
|
||||
{255, 0, 255, 128},
|
||||
{0, 255, 255, 128}
|
||||
};
|
||||
Color faceColors[24];
|
||||
|
||||
float cubeVertices[] =
|
||||
{
|
||||
// Front face
|
||||
-1.0f, -1.0f, +1.0f, // vertex 0
|
||||
+1.0f, -1.0f, +1.0f, // vertex 1
|
||||
+1.0f, +1.0f, +1.0f, // vertex 2
|
||||
-1.0f, +1.0f, +1.0f, // vertex 3
|
||||
|
||||
// Back face
|
||||
-1.0f, -1.0f, -1.0f, // vertex 4
|
||||
+1.0f, -1.0f, -1.0f, // vertex 5
|
||||
+1.0f, +1.0f, -1.0f, // vertex 6
|
||||
-1.0f, +1.0f, -1.0f, // vertex 7
|
||||
|
||||
// Top face
|
||||
-1.0f, +1.0f, +1.0f, // vertex 8
|
||||
+1.0f, +1.0f, +1.0f, // vertex 9
|
||||
+1.0f, +1.0f, -1.0f, // vertex 10
|
||||
-1.0f, +1.0f, -1.0f, // vertex 11
|
||||
|
||||
// Bottom face
|
||||
-1.0f, -1.0f, +1.0f, // vertex 12
|
||||
+1.0f, -1.0f, +1.0f, // vertex 13
|
||||
+1.0f, -1.0f, -1.0f, // vertex 14
|
||||
-1.0f, -1.0f, -1.0f, // vertex 15
|
||||
|
||||
// Right face
|
||||
+1.0f, -1.0f, +1.0f, // vertex 16
|
||||
+1.0f, -1.0f, -1.0f, // vertex 17
|
||||
+1.0f, +1.0f, -1.0f, // vertex 18
|
||||
+1.0f, +1.0f, +1.0f, // vertex 19
|
||||
|
||||
// Left face
|
||||
-1.0f, -1.0f, +1.0f, // vertex 20
|
||||
-1.0f, -1.0f, -1.0f, // vertex 21
|
||||
-1.0f, +1.0f, -1.0f, // vertex 22
|
||||
-1.0f, +1.0f, +1.0f // vertex 23
|
||||
};
|
||||
|
||||
// Set up indices array
|
||||
unsigned int cubeIndices[] =
|
||||
{
|
||||
// Front face
|
||||
0, 1, 2, 3,
|
||||
|
||||
// Back face
|
||||
4, 5, 6, 7,
|
||||
|
||||
// Top face
|
||||
8, 9, 10, 11,
|
||||
|
||||
// Bottom face
|
||||
12, 13, 14, 15,
|
||||
|
||||
// Right face
|
||||
16, 17, 18, 19,
|
||||
|
||||
// Left face
|
||||
20, 21, 22, 23
|
||||
};
|
||||
|
||||
typedef struct
|
||||
{
|
||||
float r;
|
||||
float x, y, z;
|
||||
float vx, vy, vz;
|
||||
} Cube;
|
||||
|
||||
Cube cubes[MAX_CUBES];
|
||||
|
||||
int numCubes = 0;
|
||||
|
||||
// Create a 4x4 identity matrix
|
||||
float cubeTransformationMatrix[16] = { 1.0f, 0.0f, 0.0f, 0.0f,
|
||||
0.0f, 1.0f, 0.0f, 0.0f,
|
||||
0.0f, 0.0f, 1.0f, 0.0f,
|
||||
0.0f, 0.0f, 0.0f, 1.0f };
|
||||
|
||||
|
||||
void debugLog(const char* msg) {
|
||||
#ifdef __DREAMCAST__
|
||||
dbglog(DBG_KDEBUG, "%s\n", msg);
|
||||
#else
|
||||
printf("%s\n", msg);
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
void runningStats() {
|
||||
#ifdef __DREAMCAST__
|
||||
pvr_stats_t stats;
|
||||
pvr_get_stats(&stats);
|
||||
|
||||
if (avgfps != -1)
|
||||
avgfps = (avgfps + stats.frame_rate) * 0.5f;
|
||||
else
|
||||
avgfps = stats.frame_rate;
|
||||
#endif
|
||||
}
|
||||
|
||||
void avgStats() {
|
||||
#ifdef __DREAMCAST__
|
||||
dbglog(DBG_DEBUG, "Average frame rate: ~%f fps\n", avgfps);
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
void stats() {
|
||||
#ifdef __DREAMCAST__
|
||||
pvr_stats_t stats;
|
||||
|
||||
pvr_get_stats(&stats);
|
||||
dbglog(DBG_DEBUG, "3D Stats: %d VBLs, current frame rate ~%f fps\n", stats.vbl_count, stats.frame_rate);
|
||||
avgStats();
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
void addCube(float r, float x, float y, float z, float vx, float vy, float vz)
|
||||
{
|
||||
if (numCubes < MAX_CUBES) {
|
||||
cubes[numCubes].r = r;
|
||||
cubes[numCubes].x = x;
|
||||
cubes[numCubes].y = y;
|
||||
cubes[numCubes].z = z;
|
||||
cubes[numCubes].vx = vx;
|
||||
cubes[numCubes].vy = vy;
|
||||
cubes[numCubes].vz = vz;
|
||||
numCubes++;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void addCubeQuick(float x, float y, float z, float scale_factor)
|
||||
{
|
||||
addCube(0.5f * scale_factor, x, y, z, 0, 0, 0);
|
||||
}
|
||||
|
||||
|
||||
void updateCubes(float dt)
|
||||
{
|
||||
for (size_t i = 0; i < numCubes; i++)
|
||||
{
|
||||
Cube* cube = &cubes[i];
|
||||
cube->x += cube->vx * dt;
|
||||
cube->y += cube->vy * dt;
|
||||
cube->z += cube->vz * dt;
|
||||
|
||||
if (cube->x < -3 || cube->x > +3) { cube->vx *= -1; }
|
||||
if (cube->y < -3 || cube->y > +3) { cube->vy *= -1; }
|
||||
if (cube->z < -3 || cube->z > +3) { cube->vz *= -1; }
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void renderUnitCube()
|
||||
{
|
||||
glEnableClientState(GL_VERTEX_ARRAY);
|
||||
glEnableClientState(GL_COLOR_ARRAY);
|
||||
|
||||
glVertexPointer(3, GL_FLOAT, 0, cubeVertices);
|
||||
glColorPointer(4, GL_UNSIGNED_BYTE, 0, faceColors);
|
||||
|
||||
if (isDrawingArrays) {
|
||||
glDrawArrays(GL_QUADS, 0, 24);
|
||||
}
|
||||
else {
|
||||
glDrawElements(GL_QUADS, 24, GL_UNSIGNED_INT, cubeIndices);
|
||||
}
|
||||
|
||||
glDisableClientState(GL_COLOR_ARRAY);
|
||||
glDisableClientState(GL_VERTEX_ARRAY);
|
||||
}
|
||||
|
||||
|
||||
void renderCubes(float angle)
|
||||
{
|
||||
for (size_t i = 0; i < numCubes; i++) {
|
||||
const float scale_factor = 0.05f + (i / (float)numCubes) * 0.35f;
|
||||
Cube* cube = &cubes[i];
|
||||
|
||||
glPushMatrix(); // Save previous camera state
|
||||
glMatrixMode(GL_MODELVIEW);
|
||||
|
||||
glTranslatef(cube->x, cube->y, cube->z);
|
||||
glRotatef(angle, 1, 1, 1); // Rotate camera / object
|
||||
|
||||
glScalef(scale_factor, scale_factor, scale_factor); // Apply scale factor
|
||||
|
||||
renderUnitCube();
|
||||
glPopMatrix(); // Restore previous camera state
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
float rnd(float Min, float Max)
|
||||
{
|
||||
return (Max - Min) * (float)rand() / (float)RAND_MAX + Min;
|
||||
}
|
||||
|
||||
|
||||
void initialize()
|
||||
{
|
||||
debugLog("Initialize video output");
|
||||
glKosInit();
|
||||
|
||||
glClearDepth(1.0);
|
||||
glDepthFunc(GL_LEQUAL);
|
||||
glDepthMask(GL_TRUE);
|
||||
glEnable(GL_DEPTH_TEST);
|
||||
glShadeModel(GL_SMOOTH);
|
||||
|
||||
if (isBlendingEnabled)
|
||||
{
|
||||
glEnable(GL_BLEND);
|
||||
}
|
||||
else
|
||||
{
|
||||
glDisable(GL_BLEND);
|
||||
}
|
||||
|
||||
glBlendFunc(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA);
|
||||
glDisable(GL_CULL_FACE);
|
||||
|
||||
glViewport(0, 0, 640, 480);
|
||||
glClearColor(0.0f, 0.0f, 0.3f, 1.0f);
|
||||
|
||||
glMatrixMode(GL_PROJECTION);
|
||||
glLoadIdentity();
|
||||
|
||||
// Set up colors (each face has a different color)
|
||||
for (int i = 0; i < 6; i++)
|
||||
{
|
||||
faceColors[i * 4] = colors[i];
|
||||
faceColors[i * 4 + 1] = colors[i];
|
||||
faceColors[i * 4 + 2] = colors[i];
|
||||
faceColors[i * 4 + 3] = colors[i];
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void updateTimer()
|
||||
{
|
||||
timeElapsed += dt;
|
||||
|
||||
if (timeElapsed > 10.0f)
|
||||
{
|
||||
stats();
|
||||
timeElapsed = 0.0f;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void updateLogic()
|
||||
{
|
||||
updateTimer();
|
||||
|
||||
const int fullRot = (int)(angle * invAngle360);
|
||||
angle -= fullRot * 360.0f;
|
||||
angle += 50.0f * dt;
|
||||
|
||||
const float zoomVal = __builtin_sinf(timeElapsed) * 5.0f;
|
||||
|
||||
glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT);
|
||||
glMatrixMode(GL_MODELVIEW);
|
||||
glLoadIdentity();
|
||||
|
||||
// Set up the camera position and orientation
|
||||
float cameraPos[] = { 0.0f, 0.0f, cameraDistance };
|
||||
float cameraTarget[] = { 0.0f, 0.0f, 0.0f };
|
||||
float cameraUp[] = { 0.0f, 1.0f, 0.0f };
|
||||
|
||||
// Move the camera
|
||||
gluLookAt(cameraPos[0], cameraPos[1], cameraPos[2],
|
||||
cameraTarget[0], cameraTarget[1], cameraTarget[2],
|
||||
cameraUp[0], cameraUp[1], cameraUp[2]);
|
||||
|
||||
glTranslatef(0.0f, 0.0f, -cameraDistance + zoomVal);
|
||||
|
||||
// Apply cube transformation (identity matrix)
|
||||
glLoadIdentity();
|
||||
|
||||
updateCubes(dt);
|
||||
|
||||
renderCubes(angle);
|
||||
|
||||
// Reset ModelView matrix to remove camera transformation
|
||||
float matrix[16];
|
||||
glGetFloatv(GL_MODELVIEW_MATRIX, matrix);
|
||||
matrix[12] = 0.0f;
|
||||
matrix[13] = 0.0f;
|
||||
matrix[14] = 0.0f;
|
||||
|
||||
glMatrixMode(GL_MODELVIEW);
|
||||
glLoadMatrixf(matrix);
|
||||
}
|
||||
|
||||
|
||||
void updateInput()
|
||||
{
|
||||
#ifdef __DREAMCAST__
|
||||
static uint8_t prevButtons = 0;
|
||||
maple_device_t* cont;
|
||||
cont_state_t* state;
|
||||
|
||||
cont = maple_enum_type(0, MAPLE_FUNC_CONTROLLER);
|
||||
|
||||
if (cont)
|
||||
{
|
||||
state = (cont_state_t*)maple_dev_status(cont);
|
||||
|
||||
if (state && (state->buttons & CONT_START) && !(prevButtons & CONT_START))
|
||||
{
|
||||
isRunning = false;
|
||||
}
|
||||
|
||||
if (state && (state->buttons & CONT_A) && !(prevButtons & CONT_A))
|
||||
{
|
||||
isDrawingArrays = !isDrawingArrays;
|
||||
|
||||
if (isDrawingArrays)
|
||||
{
|
||||
glClearColor(0.3f, 0.0f, 0.3f, 1.0f);
|
||||
}
|
||||
else
|
||||
{
|
||||
glClearColor(0.0f, 0.0f, 0.3f, 1.0f);
|
||||
}
|
||||
}
|
||||
|
||||
if (state && (state->buttons & CONT_B) && !(prevButtons & CONT_B))
|
||||
{
|
||||
isBlendingEnabled = !isBlendingEnabled;
|
||||
|
||||
if (isBlendingEnabled)
|
||||
{
|
||||
glEnable(GL_BLEND);
|
||||
}
|
||||
else
|
||||
{
|
||||
glDisable(GL_BLEND);
|
||||
}
|
||||
}
|
||||
|
||||
prevButtons = state->buttons;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
void swapBuffers()
|
||||
{
|
||||
#ifdef __DREAMCAST__
|
||||
glKosSwapBuffers();
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
int main(int argc, char* argv[])
|
||||
{
|
||||
initialize();
|
||||
|
||||
// Setup camera frustum
|
||||
const float aspectRatio = 640.0f / 480.0f;
|
||||
const float fov = 60;
|
||||
const float zNear = 0.1f;
|
||||
const float zFar = 1000.0f;
|
||||
|
||||
gluPerspective(fov, aspectRatio, zNear, zFar);
|
||||
|
||||
for (size_t i = 0; i < MAX_CUBES; i++)
|
||||
{
|
||||
|
||||
const float r = rnd(0.1f, 0.5f);
|
||||
const float x = rnd(-3.0f, 3.0f);
|
||||
const float y = rnd(-3.0f, 3.0f);
|
||||
const float z = rnd(-3.0f, 3.0f);
|
||||
const float vx = rnd(-2.0f, 2.0f);
|
||||
const float vy = rnd(-2.0f, 2.0f);
|
||||
const float vz = rnd(-2.0f, 2.0f);
|
||||
|
||||
addCube(r, x, y, z, vx, vy, vz);
|
||||
}
|
||||
|
||||
while (isRunning)
|
||||
{
|
||||
updateLogic();
|
||||
updateInput();
|
||||
swapBuffers();
|
||||
runningStats();
|
||||
}
|
||||
|
||||
avgStats();
|
||||
|
||||
return 0;
|
||||
}
|
|
@ -145,7 +145,7 @@ int check_start() {
|
|||
|
||||
void DrawCube(float x, float z) {
|
||||
static float pos = 0.0f;
|
||||
const static float radius = 30.0f;
|
||||
static const float radius = 30.0f;
|
||||
|
||||
pos += 0.001f;
|
||||
|
||||
|
|
|
@ -23,7 +23,11 @@ int ImageLoad(char *filename, Image *image) {
|
|||
}
|
||||
|
||||
// seek through the bmp header, up to the width/height:
|
||||
fseek(file, 18, SEEK_CUR);
|
||||
fseek(file, 10, SEEK_CUR);
|
||||
|
||||
uint32_t offset;
|
||||
fread(&offset, 4, 1, file);
|
||||
fseek(file, 4, SEEK_CUR);
|
||||
|
||||
// read the width
|
||||
if ((i = fread(&sizeX, 4, 1, file)) != 1) {
|
||||
|
@ -65,7 +69,7 @@ int ImageLoad(char *filename, Image *image) {
|
|||
}
|
||||
|
||||
// seek past the rest of the bitmap header.
|
||||
fseek(file, 24, SEEK_CUR);
|
||||
fseek(file, offset, SEEK_SET);
|
||||
|
||||
// read the data.
|
||||
image->data = (char *) malloc(size);
|
||||
|
|
|
@ -9,7 +9,7 @@
|
|||
/* A general OpenGL initialization function. Sets all of the initial parameters. */
|
||||
void InitGL(int Width, int Height) // We call this right after our OpenGL window is created.
|
||||
{
|
||||
glClearColor(0.0f, 0.0f, 0.0f, 0.0f); // This Will Clear The Background Color To Black
|
||||
glClearColor(0.0f, 0.0f, 1.0f, 0.0f); // This Will Clear The Background Color To Black
|
||||
glClearDepth(1.0); // Enables Clearing Of The Depth Buffer
|
||||
glDepthFunc(GL_LEQUAL); // The Type Of Depth Test To Do
|
||||
glEnable(GL_DEPTH_TEST); // Enables Depth Testing
|
||||
|
@ -20,7 +20,7 @@ void InitGL(int Width, int Height) // We call this right after our OpenG
|
|||
|
||||
gluPerspective(45.0f,(GLfloat)Width/(GLfloat)Height,0.1f,100.0f); // Calculate The Aspect Ratio Of The Window
|
||||
|
||||
glMatrixMode(GL_MODELVIEW);
|
||||
glMatrixMode(GL_MODELVIEW);
|
||||
}
|
||||
|
||||
/* The function called when our window is resized (which shouldn't happen, because we're fullscreen) */
|
||||
|
|
|
@ -53,10 +53,10 @@ void LoadGLTextures() {
|
|||
|
||||
// 2d texture, level of detail 0 (normal), 3 components (red, green, blue), x size from image, y size from image,
|
||||
// border 0 (normal), rgb color data, unsigned byte data, and finally the data itself.
|
||||
glTexImage2D(GL_TEXTURE_2D, 0, 3, image1->sizeX, image1->sizeY, 0, GL_RGB, GL_UNSIGNED_BYTE, image1->data);
|
||||
glTexImage2D(GL_TEXTURE_2D, 0, GL_RGB, image1->sizeX, image1->sizeY, 0, GL_RGB, GL_UNSIGNED_BYTE, image1->data);
|
||||
|
||||
free(image1);
|
||||
};
|
||||
}
|
||||
|
||||
/* A general OpenGL initialization function. Sets all of the initial parameters. */
|
||||
void InitGL(int Width, int Height) // We call this right after our OpenGL window is created.
|
||||
|
@ -74,7 +74,7 @@ void InitGL(int Width, int Height) // We call this right after our OpenG
|
|||
|
||||
gluPerspective(45.0f,(GLfloat)Width/(GLfloat)Height,0.1f,100.0f); // Calculate The Aspect Ratio Of The Window
|
||||
|
||||
glMatrixMode(GL_MODELVIEW);
|
||||
glMatrixMode(GL_MODELVIEW);
|
||||
}
|
||||
|
||||
/* The function called when our window is resized (which shouldn't happen, because we're fullscreen) */
|
||||
|
|
Binary file not shown.
Before Width: | Height: | Size: 192 KiB After Width: | Height: | Size: 96 KiB |
|
@ -59,10 +59,10 @@ int ImageLoad(char *filename, Image *image) {
|
|||
|
||||
fread(&header, sizeof(header), 1, file);
|
||||
|
||||
GLboolean twiddled = (header.type & (1 << 25)) < 1;
|
||||
GLboolean compressed = (header.type & (1 << 29)) > 0;
|
||||
GLboolean mipmapped = (header.type & (1 << 30)) > 0;
|
||||
GLboolean strided = (header.type & (1 << 24)) > 0;
|
||||
GLboolean twiddled = (header.type & (1 << 26)) < 1;
|
||||
GLboolean compressed = (header.type & (1 << 30)) > 0;
|
||||
GLboolean mipmapped = (header.type & (1 << 31)) > 0;
|
||||
GLboolean strided = (header.type & (1 << 25)) > 0;
|
||||
GLuint format = (header.type >> 27) & 0b111;
|
||||
|
||||
image->data = (char *) malloc (header.size);
|
||||
|
|
|
@ -10,6 +10,8 @@
|
|||
|
||||
#ifdef __DREAMCAST__
|
||||
#include <kos.h>
|
||||
#else
|
||||
#include <SDL.h>
|
||||
#endif
|
||||
|
||||
#include <stdio.h>
|
||||
|
@ -17,7 +19,9 @@
|
|||
#include <GL/glu.h>
|
||||
#include <GL/glkos.h>
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <stdbool.h>
|
||||
#include <stdint.h>
|
||||
|
||||
#include "../loadbmp.h"
|
||||
|
||||
|
@ -84,7 +88,16 @@ void SetupWorld()
|
|||
int numtriangles;
|
||||
FILE *filein;
|
||||
char oneline[255];
|
||||
#ifdef __DREAMCAST__
|
||||
filein = fopen("/rd/world.txt", "rt"); // File To Load World Data From
|
||||
#else
|
||||
filein = fopen("../samples/nehe10/romdisk/world.txt", "rt");
|
||||
#endif
|
||||
|
||||
if(!filein) {
|
||||
fprintf(stderr, "Failed to load world file\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
readstr(filein,oneline);
|
||||
sscanf(oneline, "NUMPOLLIES %d\n", &numtriangles);
|
||||
|
@ -228,6 +241,13 @@ void DrawGLScene(void) {
|
|||
}
|
||||
|
||||
int ReadController(void) {
|
||||
bool start = false;
|
||||
bool up = false;
|
||||
bool down = false;
|
||||
bool left = false;
|
||||
bool right = false;
|
||||
|
||||
|
||||
#ifdef __DREAMCAST__
|
||||
maple_device_t *cont;
|
||||
cont_state_t *state;
|
||||
|
@ -241,10 +261,27 @@ int ReadController(void) {
|
|||
return 0;
|
||||
}
|
||||
|
||||
if(state->buttons & CONT_START)
|
||||
return 0;
|
||||
start = (state->buttons & CONT_START);
|
||||
up = (state->buttons & CONT_DPAD_UP);
|
||||
down = (state->buttons & CONT_DPAD_DOWN);
|
||||
left = (state->buttons & CONT_DPAD_LEFT);
|
||||
right = (state->buttons & CONT_DPAD_RIGHT);
|
||||
|
||||
if(state->buttons & CONT_DPAD_UP) {
|
||||
#else
|
||||
int num_keys = 0;
|
||||
uint8_t* state = SDL_GetKeyboardState(&num_keys);
|
||||
start = state[SDL_SCANCODE_RETURN];
|
||||
up = state[SDL_SCANCODE_UP];
|
||||
down = state[SDL_SCANCODE_DOWN];
|
||||
left = state[SDL_SCANCODE_LEFT];
|
||||
right = state[SDL_SCANCODE_RIGHT];
|
||||
#endif
|
||||
|
||||
if(start) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
if(up) {
|
||||
xpos -= (float)sin(heading*piover180) * 0.05f;
|
||||
zpos -= (float)cos(heading*piover180) * 0.05f;
|
||||
if (walkbiasangle >= 359.0f)
|
||||
|
@ -258,8 +295,7 @@ int ReadController(void) {
|
|||
walkbias = (float)sin(walkbiasangle * piover180)/20.0f;
|
||||
}
|
||||
|
||||
|
||||
if(state->buttons & CONT_DPAD_DOWN) {
|
||||
if(down) {
|
||||
xpos += (float)sin(heading*piover180) * 0.05f;
|
||||
zpos += (float)cos(heading*piover180) * 0.05f;
|
||||
if (walkbiasangle <= 1.0f)
|
||||
|
@ -273,18 +309,17 @@ int ReadController(void) {
|
|||
walkbias = (float)sin(walkbiasangle * piover180)/20.0f;
|
||||
}
|
||||
|
||||
|
||||
if(state->buttons & CONT_DPAD_LEFT) {
|
||||
if(left) {
|
||||
heading += 1.0f;
|
||||
yrot = heading;
|
||||
}
|
||||
|
||||
if(state->buttons & CONT_DPAD_RIGHT) {
|
||||
if(right) {
|
||||
heading -= 1.0f;
|
||||
yrot = heading;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
/* Switch to the blended polygon list if needed */
|
||||
if(blend) {
|
||||
|
|
|
@ -157,4 +157,4 @@ NUMPOLLIES 36
|
|||
2.0 0.0 -0.5 0.0 0.0
|
||||
3.0 1.0 -0.5 1.0 1.0
|
||||
2.0 1.0 -0.5 0.0 1.0
|
||||
2.0 0.0 -0.5 0.0 0.0
|
||||
2.0 0.0 -0.5 0.0 0.0
|
||||
|
|
|
@ -132,7 +132,7 @@ void LoadGLTextures() {
|
|||
|
||||
// 2d texture, level of detail 0 (normal), 3 components (red, green, blue), x size from image, y size from image,
|
||||
// border 0 (normal), rgb color data, unsigned byte data, and finally the data itself.
|
||||
glTexImage2D(GL_TEXTURE_2D, 0, GL_COLOR_INDEX8_EXT, image1->width, image1->height, 0, GL_COLOR_INDEX, GL_UNSIGNED_BYTE_TWID_KOS, image1->data);
|
||||
glTexImage2D(GL_TEXTURE_2D, 0, GL_COLOR_INDEX8_EXT, image1->width, image1->height, 0, GL_COLOR_INDEX8_TWID_KOS, GL_UNSIGNED_BYTE, image1->data);
|
||||
glGenerateMipmapEXT(GL_TEXTURE_2D);
|
||||
|
||||
free(image1);
|
||||
|
|
|
@ -254,6 +254,8 @@ int BMP_Infos(FILE *pFile, uint32_t *width, uint32_t *height)
|
|||
*width = (uint32_t)BmpInfoHeader.Width;
|
||||
*height = (uint32_t)BmpInfoHeader.Height;
|
||||
|
||||
fseek(pFile, BmpInfoHeader.Size + 14, SEEK_SET);
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
|
@ -270,6 +272,7 @@ int BMP_GetPalette(FILE *pFile)
|
|||
bitCount = BmpInfoHeader.ClrImportant * sizeof(RGB_QUAD);
|
||||
|
||||
if (fread(BmpRgbQuad, 1, bitCount, pFile) != bitCount){
|
||||
fprintf(stderr, "Failed to read palette: %d\n", bitCount);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -281,6 +284,8 @@ int BMP_GetPalette(FILE *pFile)
|
|||
}
|
||||
return 1;
|
||||
}
|
||||
|
||||
fprintf(stderr, "BitCount: %d\n", BmpInfoHeader.BitCount);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -346,7 +351,7 @@ int LoadPalettedBMP(const char* filename, Image* image)
|
|||
}
|
||||
|
||||
if (!BMP_GetPalette(fp)) {
|
||||
printf("Only 16c BMP are supported for this sample");
|
||||
printf("Only 16c BMP are supported for this sample\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -429,7 +434,7 @@ void LoadGLTextures() {
|
|||
#ifndef USE_16C_PALETTE
|
||||
glTexImage2D(GL_TEXTURE_2D, 0, GL_COLOR_INDEX8_EXT, image1.width, image1.height, 0, GL_COLOR_INDEX, GL_UNSIGNED_BYTE, image1.data);
|
||||
#else
|
||||
glTexImage2D(GL_TEXTURE_2D, 0, GL_COLOR_INDEX4_EXT, image1.width, image1.height, 0, GL_COLOR_INDEX, GL_UNSIGNED_BYTE, image1.data);
|
||||
glTexImage2D(GL_TEXTURE_2D, 0, GL_COLOR_INDEX4_EXT, image1.width, image1.height, 0, GL_COLOR_INDEX4_EXT, GL_UNSIGNED_BYTE, image1.data);
|
||||
#endif
|
||||
|
||||
glBindTexture(GL_TEXTURE_2D, textures[1]); // 2d texture (x and y size)
|
||||
|
@ -444,7 +449,7 @@ void LoadGLTextures() {
|
|||
#ifndef USE_16C_PALETTE
|
||||
glTexImage2D(GL_TEXTURE_2D, 0, GL_COLOR_INDEX8_EXT, image1.width, image1.height, 0, GL_COLOR_INDEX, GL_UNSIGNED_BYTE, image1.data);
|
||||
#else
|
||||
glTexImage2D(GL_TEXTURE_2D, 0, GL_COLOR_INDEX4_EXT, image1.width, image1.height, 0, GL_COLOR_INDEX, GL_UNSIGNED_BYTE, image1.data);
|
||||
glTexImage2D(GL_TEXTURE_2D, 0, GL_COLOR_INDEX4_EXT, image1.width, image1.height, 0, GL_COLOR_INDEX4_EXT, GL_UNSIGNED_BYTE, image1.data);
|
||||
#endif
|
||||
|
||||
glBindTexture(GL_TEXTURE_2D, textures[2]);
|
||||
|
@ -463,7 +468,7 @@ void LoadGLTextures() {
|
|||
#ifndef USE_16C_PALETTE
|
||||
glTexImage2D(GL_TEXTURE_2D, 0, GL_COLOR_INDEX8_EXT, image2.width, image2.height, 0, GL_COLOR_INDEX, GL_UNSIGNED_BYTE, image2.data);
|
||||
#else
|
||||
glTexImage2D(GL_TEXTURE_2D, 0, GL_COLOR_INDEX4_EXT, image2.width, image2.height, 0, GL_COLOR_INDEX, GL_UNSIGNED_BYTE, image2.data);
|
||||
glTexImage2D(GL_TEXTURE_2D, 0, GL_COLOR_INDEX4_EXT, image2.width, image2.height, 0, GL_COLOR_INDEX4_EXT, GL_UNSIGNED_BYTE, image2.data);
|
||||
#endif
|
||||
}
|
||||
|
||||
|
|
Binary file not shown.
Before Width: | Height: | Size: 32 KiB After Width: | Height: | Size: 16 KiB |
8207
samples/prof_texture_upload/image.h
Normal file
8207
samples/prof_texture_upload/image.h
Normal file
File diff suppressed because it is too large
Load Diff
64
samples/prof_texture_upload/main.c
Normal file
64
samples/prof_texture_upload/main.c
Normal file
|
@ -0,0 +1,64 @@
|
|||
#include <stddef.h>
|
||||
#include <time.h>
|
||||
#include <stdio.h>
|
||||
|
||||
#ifdef __DREAMCAST__
|
||||
#include <kos.h>
|
||||
#include "../profiler.h"
|
||||
#endif
|
||||
|
||||
#include <GL/gl.h>
|
||||
#include <GL/glkos.h>
|
||||
|
||||
#include "image.h"
|
||||
|
||||
#define PROFILE 0
|
||||
|
||||
int main(int argc, char* argv[]) {
|
||||
(void) argc;
|
||||
(void) argv;
|
||||
|
||||
fprintf(stdout, "Initializing\n");
|
||||
glKosInit();
|
||||
glClearColor(0.5f, 0.0f, 0.5f, 1.0f);
|
||||
glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT);
|
||||
glKosSwapBuffers();
|
||||
|
||||
GLuint texture_id = 0;
|
||||
glGenTextures(1, &texture_id);
|
||||
glBindTexture(GL_TEXTURE_2D, texture_id);
|
||||
|
||||
time_t start = time(NULL);
|
||||
time_t end = start;
|
||||
|
||||
int counter = 0;
|
||||
|
||||
fprintf(stderr, "Starting test run...\n");
|
||||
|
||||
#ifdef __DREAMCAST__
|
||||
#if PROFILE
|
||||
profiler_init("/pc/gmon.out");
|
||||
profiler_start();
|
||||
#endif
|
||||
#endif
|
||||
|
||||
while((end - start) < 5) {
|
||||
glTexImage2D(
|
||||
GL_TEXTURE_2D, 0, GL_RGB, width, height, 0, GL_RGB, GL_UNSIGNED_BYTE, header_data
|
||||
);
|
||||
|
||||
++counter;
|
||||
end = time(NULL);
|
||||
}
|
||||
|
||||
#ifdef __DREAMCAST__
|
||||
#if PROFILE
|
||||
profiler_stop();
|
||||
profiler_clean_up();
|
||||
#endif
|
||||
#endif
|
||||
|
||||
fprintf(stderr, "Called glTexImage2D %d times (%.4f per call)\n", counter, (float)(end - start) / (float)(counter));
|
||||
|
||||
return 0;
|
||||
}
|
|
@ -68,21 +68,16 @@ int check_start() {
|
|||
}
|
||||
|
||||
void setup() {
|
||||
//PVR needs to warm up for a frame, or results will be low
|
||||
glKosInit();
|
||||
GLdcConfig cfg;
|
||||
glKosInitConfig(&cfg);
|
||||
cfg.initial_immediate_capacity = 14000;
|
||||
glKosInitEx(&cfg);
|
||||
|
||||
glMatrixMode(GL_MODELVIEW);
|
||||
glLoadIdentity();
|
||||
glOrtho(0, 640, 0, 480, -100, 100);
|
||||
glMatrixMode(GL_PROJECTION);
|
||||
glLoadIdentity();
|
||||
|
||||
glDisable(GL_NEARZ_CLIPPING_KOS);
|
||||
|
||||
#ifdef __DREAMCAST__
|
||||
pvr_wait_ready();
|
||||
pvr_scene_begin();
|
||||
pvr_scene_finish();
|
||||
#endif
|
||||
}
|
||||
|
||||
void do_frame() {
|
||||
|
@ -116,6 +111,8 @@ time_t begin;
|
|||
void switch_tests(int ppf) {
|
||||
printf("Beginning new test: %d polys per frame (%d per second at 60fps)\n",
|
||||
ppf * 2, ppf * 2 * 60);
|
||||
fflush(stdout);
|
||||
|
||||
avgfps = -1;
|
||||
polycnt = ppf;
|
||||
}
|
||||
|
@ -128,7 +125,6 @@ void check_switch() {
|
|||
if(now >= (begin + 5)) {
|
||||
begin = time(NULL);
|
||||
printf(" Average Frame Rate: ~%f fps (%d pps)\n", avgfps, (int)(polycnt * avgfps * 2));
|
||||
|
||||
switch(phase) {
|
||||
case PHASE_HALVE:
|
||||
|
||||
|
@ -169,19 +165,24 @@ void check_switch() {
|
|||
case PHASE_FINAL:
|
||||
break;
|
||||
}
|
||||
|
||||
fflush(stdout);
|
||||
}
|
||||
}
|
||||
|
||||
#define PROFILE 0
|
||||
|
||||
int main(int argc, char **argv) {
|
||||
#ifndef NDEBUG
|
||||
#ifdef __DREAMCAST__
|
||||
#if PROFILE
|
||||
profiler_init("/pc/gmon.out");
|
||||
profiler_start();
|
||||
#endif
|
||||
#endif
|
||||
|
||||
setup();
|
||||
|
||||
#if PROFILE
|
||||
profiler_start();
|
||||
#endif
|
||||
|
||||
/* Start off with something obscene */
|
||||
switch_tests(200000 / 60);
|
||||
begin = time(NULL);
|
||||
|
@ -200,11 +201,9 @@ int main(int argc, char **argv) {
|
|||
|
||||
stats();
|
||||
|
||||
#ifdef __DREAMCAST__
|
||||
#ifndef NDEBUG
|
||||
#if PROFILE
|
||||
profiler_stop();
|
||||
profiler_clean_up();
|
||||
#endif
|
||||
#endif
|
||||
|
||||
return 0;
|
||||
|
|
|
@ -28,6 +28,8 @@ void InitGL(int Width, int Height) // We call this right after our OpenG
|
|||
|
||||
glMatrixMode(GL_MODELVIEW);
|
||||
glLoadIdentity();
|
||||
|
||||
glEnable(GL_CULL_FACE);
|
||||
}
|
||||
|
||||
/* The function called when our window is resized (which shouldn't happen, because we're fullscreen) */
|
||||
|
@ -86,12 +88,13 @@ void DrawGLScene()
|
|||
rotation = (rotation > 360.0f) ? rotation - 360.0f : rotation;
|
||||
|
||||
glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT); // Clear The Screen And The Depth Buffer
|
||||
glClearColor(0.5f, 0.5f, 0.5f, 0.5f);
|
||||
glLoadIdentity(); // Reset The View
|
||||
|
||||
glDisable(GL_CULL_FACE);
|
||||
|
||||
glPushMatrix();
|
||||
glTranslatef(0.0f, -1.0f, movement);
|
||||
glTranslatef(0.0f, -1.0f, -movement);
|
||||
glRotatef(rotation, 0.0f, 1.0f, 0.0f);
|
||||
|
||||
glBegin(GL_TRIANGLES);
|
||||
|
|
26
tests/CMakeLists.txt
Normal file
26
tests/CMakeLists.txt
Normal file
|
@ -0,0 +1,26 @@
|
|||
|
||||
|
||||
FILE(GLOB GL_TESTS ${CMAKE_CURRENT_SOURCE_DIR}/test_*.h)
|
||||
|
||||
INCLUDE_DIRECTORIES(${CMAKE_SOURCE_DIR})
|
||||
|
||||
SET(TEST_GENERATOR_BIN ${CMAKE_SOURCE_DIR}/tools/test_generator.py)
|
||||
SET(TEST_MAIN_FILENAME ${CMAKE_CURRENT_BINARY_DIR}/main.cpp)
|
||||
|
||||
ADD_CUSTOM_COMMAND(
|
||||
OUTPUT ${TEST_MAIN_FILENAME}
|
||||
COMMAND ${TEST_GENERATOR_BIN} --output ${TEST_MAIN_FILENAME} ${TEST_FILES} ${GL_TESTS}
|
||||
DEPENDS ${TEST_FILES} ${GL_TESTS} ${TEST_GENERATOR_BIN}
|
||||
)
|
||||
|
||||
add_executable(gldc_tests ${TEST_FILES} ${TEST_SOURCES} ${TEST_MAIN_FILENAME})
|
||||
target_link_libraries(gldc_tests GLdc)
|
||||
|
||||
if(NOT PLATFORM_DREAMCAST)
|
||||
set_target_properties(
|
||||
gldc_tests
|
||||
PROPERTIES
|
||||
COMPILE_OPTIONS "-m32"
|
||||
LINK_OPTIONS "-m32"
|
||||
)
|
||||
endif()
|
189
tests/test_allocator.h
Normal file
189
tests/test_allocator.h
Normal file
|
@ -0,0 +1,189 @@
|
|||
#include "tools/test.h"
|
||||
|
||||
#include <cstdint>
|
||||
#include <cassert>
|
||||
#include <malloc.h>
|
||||
#include <utility>
|
||||
|
||||
#include <GL/gl.h>
|
||||
#include <GL/glkos.h>
|
||||
|
||||
#include "GL/alloc/alloc.h"
|
||||
|
||||
static inline int round_up(int n, int multiple)
|
||||
{
|
||||
assert(multiple);
|
||||
return ((n + multiple - 1) / multiple) * multiple;
|
||||
}
|
||||
|
||||
#define POOL_SIZE (16 * 2048)
|
||||
|
||||
class AllocatorTests : public test::TestCase {
|
||||
public:
|
||||
uint8_t* pool = NULL;
|
||||
|
||||
std::vector<std::pair<void*, void*>> defrag_moves;
|
||||
|
||||
void set_up() {
|
||||
pool = (uint8_t*) memalign(2048, POOL_SIZE);
|
||||
assert(((intptr_t) pool) % 2048 == 0);
|
||||
}
|
||||
|
||||
void tear_down() {
|
||||
alloc_shutdown(pool);
|
||||
free(pool);
|
||||
}
|
||||
|
||||
static void on_defrag(void* src, void* dst, void* user_data) {
|
||||
AllocatorTests* self = (AllocatorTests*) user_data;
|
||||
self->defrag_moves.push_back(std::make_pair(src, dst));
|
||||
}
|
||||
|
||||
void test_defrag() {
|
||||
alloc_init(pool, POOL_SIZE);
|
||||
|
||||
alloc_malloc(pool, 256);
|
||||
void* a2 = alloc_malloc(pool, 256);
|
||||
void* a3 = alloc_malloc(pool, 256);
|
||||
|
||||
alloc_free(pool, a2);
|
||||
|
||||
alloc_run_defrag(pool, &AllocatorTests::on_defrag, 5, this);
|
||||
|
||||
assert_equal(defrag_moves.size(), 1u); // Moved a3 -> a2
|
||||
|
||||
assert_equal(defrag_moves[0].first, a3);
|
||||
assert_equal(defrag_moves[0].second, a2);
|
||||
|
||||
assert_equal(alloc_malloc(pool, 256), a3);
|
||||
}
|
||||
|
||||
void test_poor_alloc_aligned() {
|
||||
/* If we try to allocate and there are no suitable aligned
|
||||
* slots available, we fallback to any available unaligned slots */
|
||||
alloc_init(pool, POOL_SIZE);
|
||||
|
||||
// Leave only space for an unaligned block
|
||||
alloc_malloc(pool, (15 * 2048) - 256);
|
||||
|
||||
// Should work, we have space (just) but it's not aligned
|
||||
void* a1 = alloc_malloc(pool, 2048 + 256);
|
||||
assert_is_not_null(a1);
|
||||
assert_equal(a1, pool + ((15 * 2048) - 256));
|
||||
}
|
||||
|
||||
void test_poor_alloc_straddling() {
|
||||
/*
|
||||
* If we try to allocate a small block, it should not
|
||||
* cross a 2048 boundary unless there is no other option */
|
||||
alloc_init(pool, POOL_SIZE);
|
||||
alloc_malloc(pool, (15 * 2048) - 256);
|
||||
void* a1 = alloc_malloc(pool, 512);
|
||||
assert_true((uintptr_t(a1) % 2048) == 0); // Should've aligned to the last 2048 block
|
||||
|
||||
/* Allocate the rest of the last block, this leaves a 256 block in the
|
||||
* penultimate block */
|
||||
alloc_malloc(pool, 1536);
|
||||
alloc_free(pool, a1);
|
||||
|
||||
/* No choice but to straddle the boundary */
|
||||
a1 = alloc_malloc(pool, 768);
|
||||
}
|
||||
|
||||
void test_alloc_init() {
|
||||
alloc_init(pool, POOL_SIZE);
|
||||
|
||||
void* expected_base_address = (void*) round_up((uintptr_t) pool, 2048);
|
||||
assert_equal(alloc_next_available(pool, 16), expected_base_address);
|
||||
assert_equal(alloc_base_address(pool), expected_base_address);
|
||||
|
||||
size_t expected_blocks = (
|
||||
uintptr_t(pool + POOL_SIZE) -
|
||||
uintptr_t(expected_base_address)
|
||||
) / 2048;
|
||||
|
||||
assert_equal(alloc_block_count(pool), expected_blocks);
|
||||
}
|
||||
|
||||
void test_complex_case() {
|
||||
uint8_t* large_pool = (uint8_t*) malloc(8 * 1024 * 1024);
|
||||
|
||||
alloc_init(large_pool, 8 * 1024 * 1024);
|
||||
alloc_malloc(large_pool, 262144);
|
||||
alloc_malloc(large_pool, 262144);
|
||||
void* a1 = alloc_malloc(large_pool, 524288);
|
||||
alloc_free(large_pool, a1);
|
||||
alloc_malloc(large_pool, 699056);
|
||||
alloc_malloc(large_pool, 128);
|
||||
alloc_shutdown(large_pool);
|
||||
|
||||
free(large_pool);
|
||||
}
|
||||
|
||||
void test_complex_case2() {
|
||||
uint8_t* large_pool = (uint8_t*) malloc(8 * 1024 * 1024);
|
||||
alloc_init(large_pool, 8 * 1024 * 1024);
|
||||
|
||||
void* a1 = alloc_malloc(large_pool, 131072);
|
||||
alloc_free(large_pool, a1);
|
||||
|
||||
alloc_malloc(large_pool, 174768);
|
||||
void* a2 = alloc_malloc(large_pool, 131072);
|
||||
alloc_free(large_pool, a2);
|
||||
|
||||
alloc_malloc(large_pool, 174768);
|
||||
void* a3 = alloc_malloc(large_pool, 128);
|
||||
|
||||
alloc_free(large_pool, a3);
|
||||
|
||||
alloc_shutdown(large_pool);
|
||||
free(large_pool);
|
||||
}
|
||||
|
||||
void test_alloc_malloc() {
|
||||
alloc_init(pool, POOL_SIZE);
|
||||
|
||||
uint8_t* base_address = (uint8_t*) alloc_base_address(pool);
|
||||
void* a1 = alloc_malloc(pool, 1024);
|
||||
|
||||
/* First alloc should always be the base address */
|
||||
assert_equal(a1, base_address);
|
||||
|
||||
/* An allocation of <= 2048 (well 1024) will not necessarily be at
|
||||
* a 2k boundary */
|
||||
void* expected_next_available = base_address + uintptr_t(1024);
|
||||
assert_equal(alloc_next_available(pool, 1024), expected_next_available);
|
||||
|
||||
/* Requesting 2k though will force to a 2k boundary */
|
||||
expected_next_available = base_address + uintptr_t(2048);
|
||||
assert_equal(alloc_next_available(pool, 2048), expected_next_available);
|
||||
|
||||
/* Now alloc 2048 bytes, this should be on the 2k boundary */
|
||||
void* a2 = alloc_malloc(pool, 2048);
|
||||
assert_equal(a2, expected_next_available);
|
||||
|
||||
/* If we try to allocate 1k, this should go in the second half of the
|
||||
* first block */
|
||||
expected_next_available = base_address + uintptr_t(1024);
|
||||
void* a3 = alloc_malloc(pool, 1024);
|
||||
assert_equal(a3, expected_next_available);
|
||||
|
||||
alloc_free(pool, a1);
|
||||
|
||||
/* Next allocation would go in the just freed block */
|
||||
expected_next_available = base_address;
|
||||
assert_equal(alloc_next_available(pool, 64), expected_next_available);
|
||||
|
||||
/* Now allocate 14 more 2048 size blocks, the following one should
|
||||
* return NULL */
|
||||
for(int i = 0; i < 14; ++i) {
|
||||
alloc_malloc(pool, 2048);
|
||||
}
|
||||
|
||||
assert_is_null(alloc_malloc(pool, 2048));
|
||||
|
||||
/* But we should still have room in the second block for this */
|
||||
assert_is_not_null(alloc_malloc(pool, 64));
|
||||
}
|
||||
|
||||
};
|
77
tests/test_glteximage2d.h
Normal file
77
tests/test_glteximage2d.h
Normal file
|
@ -0,0 +1,77 @@
|
|||
#include "tools/test.h"
|
||||
|
||||
#include <stdint.h>
|
||||
#include <GL/gl.h>
|
||||
#include <GL/glkos.h>
|
||||
|
||||
|
||||
class TexImage2DTests : public test::TestCase {
|
||||
public:
|
||||
uint8_t image_data[8 * 8 * 4] = {0};
|
||||
|
||||
void set_up() {
|
||||
GLdcConfig config;
|
||||
glKosInitConfig(&config);
|
||||
config.texture_twiddle = false;
|
||||
glKosInitEx(&config);
|
||||
|
||||
/* Init image data so each texel RGBA value matches the
|
||||
* position in the array */
|
||||
for(int i = 0; i < 8 * 8 * 4; i += 4) {
|
||||
image_data[i + 0] = i;
|
||||
image_data[i + 1] = i;
|
||||
image_data[i + 2] = i;
|
||||
image_data[i + 3] = i;
|
||||
}
|
||||
}
|
||||
|
||||
void tear_down() {
|
||||
glKosShutdown();
|
||||
}
|
||||
|
||||
void test_rgb_to_rgb565() {
|
||||
glTexImage2D(GL_TEXTURE_2D, 0, GL_RGB, 8, 8, 0, GL_RGB, GL_UNSIGNED_BYTE, image_data);
|
||||
assert_equal(glGetError(), GL_NO_ERROR);
|
||||
|
||||
GLint internalFormat;
|
||||
glGetIntegerv(GL_TEXTURE_INTERNAL_FORMAT_KOS, &internalFormat);
|
||||
|
||||
assert_equal(internalFormat, GL_RGB565_KOS);
|
||||
}
|
||||
|
||||
void test_rgb_to_rgb565_twiddle() {
|
||||
glEnable(GL_TEXTURE_TWIDDLE_KOS);
|
||||
glTexImage2D(GL_TEXTURE_2D, 0, GL_RGB, 8, 8, 0, GL_RGB, GL_UNSIGNED_BYTE, image_data);
|
||||
glDisable(GL_TEXTURE_TWIDDLE_KOS);
|
||||
|
||||
assert_equal(glGetError(), GL_NO_ERROR);
|
||||
|
||||
GLint internalFormat;
|
||||
glGetIntegerv(GL_TEXTURE_INTERNAL_FORMAT_KOS, &internalFormat);
|
||||
|
||||
assert_equal(internalFormat, GL_RGB565_TWID_KOS);
|
||||
}
|
||||
|
||||
void test_rgba_to_argb4444() {
|
||||
glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, 8, 8, 0, GL_RGBA, GL_UNSIGNED_BYTE, image_data);
|
||||
assert_equal(glGetError(), GL_NO_ERROR);
|
||||
|
||||
GLint internalFormat;
|
||||
glGetIntegerv(GL_TEXTURE_INTERNAL_FORMAT_KOS, &internalFormat);
|
||||
|
||||
assert_equal(internalFormat, GL_ARGB4444_KOS);
|
||||
}
|
||||
|
||||
void test_rgba_to_argb4444_twiddle() {
|
||||
glEnable(GL_TEXTURE_TWIDDLE_KOS);
|
||||
glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, 8, 8, 0, GL_RGBA, GL_UNSIGNED_BYTE, image_data);
|
||||
glDisable(GL_TEXTURE_TWIDDLE_KOS);
|
||||
|
||||
assert_equal(glGetError(), GL_NO_ERROR);
|
||||
|
||||
GLint internalFormat;
|
||||
glGetIntegerv(GL_TEXTURE_INTERNAL_FORMAT_KOS, &internalFormat);
|
||||
|
||||
assert_equal(internalFormat, GL_ARGB4444_TWID_KOS);
|
||||
}
|
||||
};
|
637
tests/zclip/main.cpp
Normal file
637
tests/zclip/main.cpp
Normal file
|
@ -0,0 +1,637 @@
|
|||
|
||||
#include <cstdint>
|
||||
#include <vector>
|
||||
#include <cstdio>
|
||||
#include <cmath>
|
||||
#include <stdexcept>
|
||||
#include <cassert>
|
||||
|
||||
#define SQ_BASE_ADDRESS 0
|
||||
#define SPAN_SORT_CFG 0
|
||||
#define PVR_SET(x, y) (void)(x); (void)(y)
|
||||
|
||||
struct Vertex {
|
||||
uint32_t flags;
|
||||
float xyz[3];
|
||||
float uv[2];
|
||||
float w;
|
||||
uint8_t bgra[4];
|
||||
};
|
||||
|
||||
struct {
|
||||
float hwidth;
|
||||
float x_plus_hwidth;
|
||||
float hheight;
|
||||
float y_plus_hheight;
|
||||
} VIEWPORT = {320, 320, 240, 240};
|
||||
|
||||
|
||||
struct VideoMode {
|
||||
float height;
|
||||
};
|
||||
|
||||
static VideoMode* GetVideoMode() {
|
||||
static VideoMode mode = {320.0f};
|
||||
return &mode;
|
||||
}
|
||||
|
||||
enum GPUCommand {
|
||||
GPU_CMD_POLYHDR = 0x80840000,
|
||||
GPU_CMD_VERTEX = 0xe0000000,
|
||||
GPU_CMD_VERTEX_EOL = 0xf0000000,
|
||||
GPU_CMD_USERCLIP = 0x20000000,
|
||||
GPU_CMD_MODIFIER = 0x80000000,
|
||||
GPU_CMD_SPRITE = 0xA0000000
|
||||
};
|
||||
|
||||
static std::vector<Vertex> sent;
|
||||
|
||||
static inline void interpolateColour(const uint32_t* a, const uint32_t* b, const float t, uint32_t* out) {
|
||||
const static uint32_t MASK1 = 0x00FF00FF;
|
||||
const static uint32_t MASK2 = 0xFF00FF00;
|
||||
|
||||
const uint32_t f2 = 256 * t;
|
||||
const uint32_t f1 = 256 - f2;
|
||||
|
||||
*out = (((((*a & MASK1) * f1) + ((*b & MASK1) * f2)) >> 8) & MASK1) |
|
||||
(((((*a & MASK2) * f1) + ((*b & MASK2) * f2)) >> 8) & MASK2);
|
||||
}
|
||||
|
||||
static inline void _glClipEdge(const Vertex* v1, const Vertex* v2, Vertex* vout) {
|
||||
/* Clipping time! */
|
||||
const float d0 = v1->w + v1->xyz[2];
|
||||
const float d1 = v2->w + v2->xyz[2];
|
||||
const float sign = ((2.0f * (d1 < d0)) - 1.0f);
|
||||
const float epsilon = -0.00001f * sign;
|
||||
const float n = (d0 - d1);
|
||||
const float r = (1.f / sqrtf(n * n)) * sign;
|
||||
float t = fmaf(r, d0, epsilon);
|
||||
|
||||
vout->xyz[0] = fmaf(v2->xyz[0] - v1->xyz[0], t, v1->xyz[0]);
|
||||
vout->xyz[1] = fmaf(v2->xyz[1] - v1->xyz[1], t, v1->xyz[1]);
|
||||
vout->xyz[2] = fmaf(v2->xyz[2] - v1->xyz[2], t, v1->xyz[2]);
|
||||
vout->w = fmaf(v2->w - v1->w, t, v1->w);
|
||||
|
||||
vout->uv[0] = fmaf(v2->uv[0] - v1->uv[0], t, v1->uv[0]);
|
||||
vout->uv[1] = fmaf(v2->uv[1] - v1->uv[1], t, v1->uv[1]);
|
||||
|
||||
interpolateColour((uint32_t*) v1->bgra, (uint32_t*) v2->bgra, t, (uint32_t*) vout->bgra);
|
||||
}
|
||||
|
||||
bool glIsVertex(const uint32_t flags) {
|
||||
return flags == GPU_CMD_VERTEX_EOL || flags == GPU_CMD_VERTEX;
|
||||
}
|
||||
|
||||
bool glIsLastVertex(const uint32_t flags) {
|
||||
return flags == GPU_CMD_VERTEX_EOL;
|
||||
}
|
||||
|
||||
void _glSubmitHeaderOrVertex(volatile uint32_t*, Vertex* vtx) {
|
||||
sent.push_back(*vtx);
|
||||
}
|
||||
|
||||
float _glFastInvert(float x) {
|
||||
return (1.f / __builtin_sqrtf(x * x));
|
||||
}
|
||||
|
||||
void _glPerspectiveDivideVertex(Vertex* vertex, const float h) {
|
||||
const float f = _glFastInvert(vertex->w);
|
||||
|
||||
/* Convert to NDC and apply viewport */
|
||||
vertex->xyz[0] = __builtin_fmaf(
|
||||
VIEWPORT.hwidth, vertex->xyz[0] * f, VIEWPORT.x_plus_hwidth
|
||||
);
|
||||
|
||||
vertex->xyz[1] = h - __builtin_fmaf(
|
||||
VIEWPORT.hheight, vertex->xyz[1] * f, VIEWPORT.y_plus_hheight
|
||||
);
|
||||
|
||||
/* Orthographic projections need to use invZ otherwise we lose
|
||||
the depth information. As w == 1, and clip-space range is -w to +w
|
||||
we add 1.0 to the Z to bring it into range. We add a little extra to
|
||||
avoid a divide by zero.
|
||||
*/
|
||||
|
||||
vertex->xyz[2] = (vertex->w == 1.0f) ? _glFastInvert(1.0001f + vertex->xyz[2]) : f;
|
||||
}
|
||||
|
||||
|
||||
void memcpy_vertex(Vertex* dst, Vertex* src) {
|
||||
*dst = *src;
|
||||
}
|
||||
|
||||
/* Zclipping is so difficult to get right, that self sample tests all the cases of clipping and makes sure that things work as expected */
|
||||
|
||||
#ifdef __DREAMCAST__
|
||||
static volatile int *pvrdmacfg = (int*)0xA05F6888;
|
||||
static volatile int *qacr = (int*)0xFF000038;
|
||||
#else
|
||||
static int pvrdmacfg[2];
|
||||
static int qacr[2];
|
||||
#endif
|
||||
|
||||
void SceneListSubmit(void* src, int n) {
|
||||
/* You need at least a header, and 3 vertices to render anything */
|
||||
if(n < 4) {
|
||||
return;
|
||||
}
|
||||
|
||||
const float h = GetVideoMode()->height;
|
||||
|
||||
PVR_SET(SPAN_SORT_CFG, 0x0);
|
||||
|
||||
//Set PVR DMA registers
|
||||
pvrdmacfg[0] = 1;
|
||||
pvrdmacfg[1] = 1;
|
||||
|
||||
//Set QACR registers
|
||||
qacr[1] = qacr[0] = 0x11;
|
||||
|
||||
volatile uint32_t *d = SQ_BASE_ADDRESS;
|
||||
|
||||
int8_t queue_head = 0;
|
||||
int8_t queue_tail = 0;
|
||||
|
||||
/* The most vertices ever in the queue is 5 (as some clipping operations
|
||||
* produce and additional couple of vertice, but we add one more so the ring buffer doesn't
|
||||
* trip over itself (e.g. if tail == head we can guarantee it's empty, not full) */
|
||||
Vertex __attribute__((aligned(32))) queue[4];
|
||||
const int queue_capacity = sizeof(queue) / sizeof(Vertex);
|
||||
|
||||
Vertex* vertex = (Vertex*) src;
|
||||
uint32_t visible_mask = 0;
|
||||
|
||||
#if CLIP_DEBUG
|
||||
for(int i = 0; i < n; ++i) {
|
||||
fprintf(stderr, "{%f, %f, %f, %f}, // %x (%x)\n", vertex[i].xyz[0], vertex[i].xyz[1], vertex[i].xyz[2], vertex[i].w, vertex[i].flags, &vertex[i]);
|
||||
}
|
||||
|
||||
fprintf(stderr, "----\n");
|
||||
#endif
|
||||
while(n--) {
|
||||
bool last_vertex = false;
|
||||
memcpy_vertex(queue + queue_tail, vertex);
|
||||
++vertex;
|
||||
switch(queue[queue_tail].flags) {
|
||||
case GPU_CMD_POLYHDR:
|
||||
_glSubmitHeaderOrVertex(d, &queue[queue_tail]);
|
||||
break;
|
||||
case GPU_CMD_VERTEX_EOL:
|
||||
last_vertex = true; // fallthru
|
||||
case GPU_CMD_VERTEX:
|
||||
visible_mask = (visible_mask >> 1) | (queue[queue_tail].xyz[2] >= -queue[queue_tail].w) << 2;
|
||||
assert(visible_mask < 15);
|
||||
queue_tail = (queue_tail + 1) % queue_capacity;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
int counter = (queue_tail - queue_head + queue_capacity) % queue_capacity;
|
||||
if(counter < 3) {
|
||||
continue;
|
||||
}
|
||||
|
||||
#if CLIP_DEBUG
|
||||
fprintf(stderr, "%d\n", visible_mask);
|
||||
#endif
|
||||
Vertex __attribute__((aligned(32))) a, b; // Scratch vertices
|
||||
switch(visible_mask) {
|
||||
case 0:
|
||||
break;
|
||||
case 7:
|
||||
/* All visible, push the first vertex and move on */
|
||||
_glPerspectiveDivideVertex(&queue[queue_head], h);
|
||||
_glSubmitHeaderOrVertex(d, &queue[queue_head]);
|
||||
|
||||
if(last_vertex) {
|
||||
/* If this was the last vertex in the strip, we need to flush the queue and then
|
||||
restart it again */
|
||||
|
||||
int v1 = (queue_head + 1) % queue_capacity;
|
||||
int v2 = (queue_head + 2) % queue_capacity;
|
||||
|
||||
_glPerspectiveDivideVertex(&queue[v1], h);
|
||||
_glSubmitHeaderOrVertex(d, &queue[v1]);
|
||||
|
||||
_glPerspectiveDivideVertex(&queue[v2], h);
|
||||
_glSubmitHeaderOrVertex(d, &queue[v2]);
|
||||
}
|
||||
break;
|
||||
case 1:
|
||||
/* First vertex was visible */
|
||||
{
|
||||
Vertex* v0 = &queue[queue_head];
|
||||
Vertex* v1 = &queue[(queue_head + 1) % queue_capacity];
|
||||
Vertex* v2 = &queue[(queue_head + 2) % queue_capacity];
|
||||
|
||||
_glClipEdge(v0, v1, &a);
|
||||
_glClipEdge(v2, v0, &b);
|
||||
a.flags = GPU_CMD_VERTEX;
|
||||
|
||||
/* If v2 was the last in the strip, then b should be. If it wasn't
|
||||
we'll create a degenerate triangle by adding b twice in a row so that the
|
||||
strip processing will continue correctly after crossing the plane so it can
|
||||
cross back*/
|
||||
b.flags = v2->flags;
|
||||
|
||||
_glPerspectiveDivideVertex(v0, h);
|
||||
_glPerspectiveDivideVertex(&a, h);
|
||||
_glPerspectiveDivideVertex(&b, h);
|
||||
|
||||
_glSubmitHeaderOrVertex(d, v0);
|
||||
_glSubmitHeaderOrVertex(d, &a);
|
||||
_glSubmitHeaderOrVertex(d, &b);
|
||||
_glSubmitHeaderOrVertex(d, &b);
|
||||
}
|
||||
break;
|
||||
case 2:
|
||||
/* Second vertex was visible. In self case we need to create a triangle and produce
|
||||
two new vertices: 1-2, and 2-3. */
|
||||
{
|
||||
Vertex* v0 = &queue[queue_head];
|
||||
const Vertex* v1 = &queue[(queue_head + 1) % queue_capacity];
|
||||
const Vertex* v2 = &queue[(queue_head + 2) % queue_capacity];
|
||||
|
||||
_glClipEdge(v0, v1, &a);
|
||||
_glClipEdge(v1, v2, &b);
|
||||
a.flags = GPU_CMD_VERTEX;
|
||||
b.flags = v2->flags;
|
||||
|
||||
_glPerspectiveDivideVertex(v0, h);
|
||||
_glPerspectiveDivideVertex(&a, h);
|
||||
_glPerspectiveDivideVertex(&b, h);
|
||||
|
||||
_glSubmitHeaderOrVertex(d, &a);
|
||||
_glSubmitHeaderOrVertex(d, v0);
|
||||
_glSubmitHeaderOrVertex(d, &b);
|
||||
}
|
||||
break;
|
||||
case 3: /* First and second vertex were visible */
|
||||
{
|
||||
Vertex* v0 = &queue[queue_head];
|
||||
Vertex __attribute__((aligned(32))) v1 = queue[(queue_head + 1) % queue_capacity];
|
||||
Vertex* v2 = &queue[(queue_head + 2) % queue_capacity];
|
||||
|
||||
_glClipEdge(&v1, v2, &a);
|
||||
_glClipEdge(v2, v0, &b);
|
||||
|
||||
a.flags = v2->flags;
|
||||
b.flags = GPU_CMD_VERTEX;
|
||||
|
||||
_glPerspectiveDivideVertex(v0, h);
|
||||
_glPerspectiveDivideVertex(&v1, h);
|
||||
_glPerspectiveDivideVertex(&a, h);
|
||||
_glPerspectiveDivideVertex(&b, h);
|
||||
|
||||
_glSubmitHeaderOrVertex(d, v0);
|
||||
_glSubmitHeaderOrVertex(d, &v1);
|
||||
_glSubmitHeaderOrVertex(d, &b);
|
||||
_glSubmitHeaderOrVertex(d, &v1);
|
||||
_glSubmitHeaderOrVertex(d, &a);
|
||||
}
|
||||
break;
|
||||
case 4:
|
||||
/* Third vertex was visible. */
|
||||
{
|
||||
Vertex* v0 = &queue[queue_head];
|
||||
Vertex* v1 = &queue[(queue_head + 1) % queue_capacity];
|
||||
Vertex __attribute__((aligned(32))) v2 = queue[(queue_head + 2) % queue_capacity];
|
||||
|
||||
_glClipEdge(&v2, v0, &a);
|
||||
_glClipEdge(v1, &v2, &b);
|
||||
a.flags = GPU_CMD_VERTEX;
|
||||
b.flags = GPU_CMD_VERTEX;
|
||||
|
||||
_glPerspectiveDivideVertex(&v2, h);
|
||||
_glPerspectiveDivideVertex(&a, h);
|
||||
_glPerspectiveDivideVertex(&b, h);
|
||||
|
||||
_glSubmitHeaderOrVertex(d, &a);
|
||||
_glSubmitHeaderOrVertex(d, &a);
|
||||
_glSubmitHeaderOrVertex(d, &b);
|
||||
_glSubmitHeaderOrVertex(d, &v2);
|
||||
}
|
||||
break;
|
||||
case 5: /* First and third vertex were visible */
|
||||
{
|
||||
Vertex* v0 = &queue[queue_head];
|
||||
Vertex* v1 = &queue[(queue_head + 1) % queue_capacity];
|
||||
Vertex __attribute__((aligned(32))) v2 = queue[(queue_head + 2) % queue_capacity];
|
||||
|
||||
_glClipEdge(v0, v1, &a);
|
||||
_glClipEdge(v1, &v2, &b);
|
||||
a.flags = GPU_CMD_VERTEX;
|
||||
b.flags = GPU_CMD_VERTEX;
|
||||
|
||||
_glPerspectiveDivideVertex(v0, h);
|
||||
_glPerspectiveDivideVertex(&v2, h);
|
||||
_glPerspectiveDivideVertex(&a, h);
|
||||
_glPerspectiveDivideVertex(&b, h);
|
||||
|
||||
_glSubmitHeaderOrVertex(d, v0);
|
||||
_glSubmitHeaderOrVertex(d, &a);
|
||||
uint32_t v2_flags = v2.flags;
|
||||
v2.flags = GPU_CMD_VERTEX;
|
||||
_glSubmitHeaderOrVertex(d, &v2);
|
||||
v2.flags = v2_flags;
|
||||
_glSubmitHeaderOrVertex(d, &b);
|
||||
_glSubmitHeaderOrVertex(d, &v2);
|
||||
}
|
||||
break;
|
||||
case 6: /* Second and third vertex were visible */
|
||||
{
|
||||
Vertex* v0 = &queue[queue_head];
|
||||
Vertex __attribute__((aligned(32))) v1 = queue[(queue_head + 1) % queue_capacity];
|
||||
Vertex __attribute__((aligned(32))) v2 = queue[(queue_head + 2) % queue_capacity];
|
||||
|
||||
_glClipEdge(v0, &v1, &a);
|
||||
_glClipEdge(&v2, v0, &b);
|
||||
|
||||
a.flags = GPU_CMD_VERTEX;
|
||||
b.flags = GPU_CMD_VERTEX;
|
||||
|
||||
_glPerspectiveDivideVertex(&v1, h);
|
||||
_glPerspectiveDivideVertex(&v2, h);
|
||||
_glPerspectiveDivideVertex(&a, h);
|
||||
_glPerspectiveDivideVertex(&b, h);
|
||||
|
||||
_glSubmitHeaderOrVertex(d, &a);
|
||||
_glSubmitHeaderOrVertex(d, &v1);
|
||||
_glSubmitHeaderOrVertex(d, &b);
|
||||
_glSubmitHeaderOrVertex(d, &v1);
|
||||
_glSubmitHeaderOrVertex(d, &v2);
|
||||
}
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
if(last_vertex) {
|
||||
visible_mask = queue_head = queue_tail = 0;
|
||||
} else {
|
||||
queue_head = (queue_head + 1) % queue_capacity;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
struct VertexTmpl {
|
||||
VertexTmpl(float x, float y, float z, float w):
|
||||
x(x), y(y), z(z), w(w) {}
|
||||
|
||||
float x, y, z, w;
|
||||
};
|
||||
|
||||
std::vector<Vertex> make_vertices(const std::vector<VertexTmpl>& verts) {
|
||||
std::vector<Vertex> result;
|
||||
Vertex r;
|
||||
|
||||
r.flags = GPU_CMD_POLYHDR;
|
||||
result.push_back(r);
|
||||
|
||||
for(auto& v: verts) {
|
||||
r.flags = GPU_CMD_VERTEX;
|
||||
r.xyz[0] = v.x;
|
||||
r.xyz[1] = v.y;
|
||||
r.xyz[2] = v.z;
|
||||
r.uv[0] = 0.0f;
|
||||
r.uv[1] = 0.0f;
|
||||
r.w = v.w;
|
||||
|
||||
result.push_back(r);
|
||||
}
|
||||
|
||||
result.back().flags = GPU_CMD_VERTEX_EOL;
|
||||
return result;
|
||||
}
|
||||
|
||||
template<typename T, typename U>
|
||||
void check_equal(const T& lhs, const U& rhs) {
|
||||
if(lhs != rhs) {
|
||||
throw std::runtime_error("Assertion failed");
|
||||
}
|
||||
}
|
||||
|
||||
template<>
|
||||
void check_equal(const Vertex& lhs, const Vertex& rhs) {
|
||||
if(lhs.xyz[0] != rhs.xyz[0] ||
|
||||
lhs.xyz[1] != rhs.xyz[1] ||
|
||||
lhs.xyz[2] != rhs.xyz[2] ||
|
||||
lhs.w != rhs.w) {
|
||||
throw std::runtime_error("Assertion failed");
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
bool test_clip_case_001() {
|
||||
/* The first vertex is visible only */
|
||||
sent.clear();
|
||||
|
||||
auto data = make_vertices({
|
||||
{0.000000, -2.414213, 3.080808, 5.000000},
|
||||
{-4.526650, -2.414213, -7.121212, -5.000000},
|
||||
{4.526650, -2.414213, -7.121212, -5.000000}
|
||||
});
|
||||
|
||||
SceneListSubmit(&data[0], data.size());
|
||||
|
||||
check_equal(sent.size(), 5);
|
||||
check_equal(sent[0].flags, GPU_CMD_POLYHDR);
|
||||
check_equal(sent[1].flags, GPU_CMD_VERTEX);
|
||||
check_equal(sent[2].flags, GPU_CMD_VERTEX);
|
||||
|
||||
// Because we're sending a single triangle, we end up sending a
|
||||
// degenerate final vert. But if we were sending more than one triangle
|
||||
// this would be GPU_CMD_VERTEX twice
|
||||
check_equal(sent[3].flags, GPU_CMD_VERTEX_EOL);
|
||||
check_equal(sent[4].flags, GPU_CMD_VERTEX_EOL);
|
||||
check_equal(sent[3], sent[4]);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool test_clip_case_010() {
|
||||
/* The third vertex is visible only */
|
||||
sent.clear();
|
||||
|
||||
auto data = make_vertices({
|
||||
{-4.526650, -2.414213, -7.121212, -5.000000},
|
||||
{0.000000, -2.414213, 3.080808, 5.000000},
|
||||
{4.526650, -2.414213, -7.121212, -5.000000}
|
||||
});
|
||||
|
||||
SceneListSubmit(&data[0], data.size());
|
||||
|
||||
check_equal(sent.size(), 4);
|
||||
check_equal(sent[0].flags, GPU_CMD_POLYHDR);
|
||||
check_equal(sent[1].flags, GPU_CMD_VERTEX);
|
||||
check_equal(sent[2].flags, GPU_CMD_VERTEX);
|
||||
check_equal(sent[3].flags, GPU_CMD_VERTEX_EOL);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool test_clip_case_100() {
|
||||
/* The third vertex is visible only */
|
||||
sent.clear();
|
||||
|
||||
auto data = make_vertices({
|
||||
{-4.526650, -2.414213, -7.121212, -5.000000},
|
||||
{4.526650, -2.414213, -7.121212, -5.000000},
|
||||
{0.000000, -2.414213, 3.080808, 5.000000}
|
||||
});
|
||||
|
||||
SceneListSubmit(&data[0], data.size());
|
||||
|
||||
check_equal(sent.size(), 5);
|
||||
check_equal(sent[0].flags, GPU_CMD_POLYHDR);
|
||||
check_equal(sent[1].flags, GPU_CMD_VERTEX);
|
||||
check_equal(sent[2].flags, GPU_CMD_VERTEX);
|
||||
|
||||
// Because we're sending a single triangle, we end up sending a
|
||||
// degenerate final vert. But if we were sending more than one triangle
|
||||
// this would be GPU_CMD_VERTEX twice
|
||||
check_equal(sent[3].flags, GPU_CMD_VERTEX);
|
||||
check_equal(sent[4].flags, GPU_CMD_VERTEX_EOL);
|
||||
check_equal(sent[1], sent[2]);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool test_clip_case_110() {
|
||||
/* 2nd and 3rd visible */
|
||||
sent.clear();
|
||||
|
||||
auto data = make_vertices({
|
||||
{0.0, -2.414213, -7.121212, -5.000000},
|
||||
{-4.526650, -2.414213, 3.080808, 5.000000},
|
||||
{4.526650, -2.414213, 3.080808, 5.000000}
|
||||
});
|
||||
|
||||
SceneListSubmit(&data[0], data.size());
|
||||
|
||||
check_equal(sent.size(), 6);
|
||||
check_equal(sent[0].flags, GPU_CMD_POLYHDR);
|
||||
check_equal(sent[1].flags, GPU_CMD_VERTEX);
|
||||
check_equal(sent[2].flags, GPU_CMD_VERTEX);
|
||||
check_equal(sent[3].flags, GPU_CMD_VERTEX);
|
||||
check_equal(sent[4].flags, GPU_CMD_VERTEX);
|
||||
check_equal(sent[5].flags, GPU_CMD_VERTEX_EOL);
|
||||
check_equal(sent[2], sent[4]);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool test_clip_case_011() {
|
||||
/* 1st and 2nd visible */
|
||||
sent.clear();
|
||||
|
||||
auto data = make_vertices({
|
||||
{-4.526650, -2.414213, 3.080808, 5.000000},
|
||||
{4.526650, -2.414213, 3.080808, 5.000000},
|
||||
{0.0, -2.414213, -7.121212, -5.000000}
|
||||
});
|
||||
|
||||
SceneListSubmit(&data[0], data.size());
|
||||
|
||||
check_equal(sent.size(), 6);
|
||||
check_equal(sent[0].flags, GPU_CMD_POLYHDR);
|
||||
check_equal(sent[1].flags, GPU_CMD_VERTEX);
|
||||
check_equal(sent[2].flags, GPU_CMD_VERTEX);
|
||||
check_equal(sent[3].flags, GPU_CMD_VERTEX);
|
||||
check_equal(sent[4].flags, GPU_CMD_VERTEX);
|
||||
check_equal(sent[5].flags, GPU_CMD_VERTEX_EOL);
|
||||
check_equal(sent[2], sent[4]);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool test_clip_case_101() {
|
||||
/* 1st and 3rd visible */
|
||||
sent.clear();
|
||||
|
||||
auto data = make_vertices({
|
||||
{-4.526650, -2.414213, 3.080808, 5.000000},
|
||||
{0.0, -2.414213, -7.121212, -5.000000},
|
||||
{4.526650, -2.414213, 3.080808, 5.000000},
|
||||
});
|
||||
|
||||
SceneListSubmit(&data[0], data.size());
|
||||
|
||||
check_equal(sent.size(), 6);
|
||||
check_equal(sent[0].flags, GPU_CMD_POLYHDR);
|
||||
check_equal(sent[1].flags, GPU_CMD_VERTEX);
|
||||
check_equal(sent[2].flags, GPU_CMD_VERTEX);
|
||||
check_equal(sent[3].flags, GPU_CMD_VERTEX);
|
||||
check_equal(sent[4].flags, GPU_CMD_VERTEX);
|
||||
check_equal(sent[5].flags, GPU_CMD_VERTEX_EOL);
|
||||
check_equal(sent[3], sent[5]);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool test_clip_case_111() {
|
||||
/* 1st and 3rd visible */
|
||||
sent.clear();
|
||||
|
||||
auto data = make_vertices({
|
||||
{-4.526650, -2.414213, 3.080808, 5.000000},
|
||||
{0.0, -2.414213, -7.121212, 8.000000},
|
||||
{4.526650, -2.414213, 3.080808, 5.000000},
|
||||
});
|
||||
|
||||
SceneListSubmit(&data[0], data.size());
|
||||
|
||||
check_equal(sent.size(), 4);
|
||||
check_equal(sent[0].flags, GPU_CMD_POLYHDR);
|
||||
check_equal(sent[1].flags, GPU_CMD_VERTEX);
|
||||
check_equal(sent[2].flags, GPU_CMD_VERTEX);
|
||||
check_equal(sent[3].flags, GPU_CMD_VERTEX_EOL);
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
bool test_start_behind() {
|
||||
/* Triangle behind the plane, but the strip continues in front */
|
||||
sent.clear();
|
||||
|
||||
auto data = make_vertices({
|
||||
{-3.021717, -2.414213, -10.155344, -9.935254},
|
||||
{5.915236, -2.414213, -9.354721, -9.136231},
|
||||
{-5.915236, -2.414213, -0.264096, -0.063767},
|
||||
{3.021717, -2.414213, 0.536527, 0.735255},
|
||||
{-7.361995, -2.414213, 4.681529, 4.871976},
|
||||
{1.574958, -2.414213, 5.482152, 5.670999},
|
||||
});
|
||||
|
||||
SceneListSubmit(&data[0], data.size());
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool test_longer_strip() {
|
||||
sent.clear();
|
||||
|
||||
auto data = make_vertices({
|
||||
{-4.384623, -2.414213, -5.699644, -5.488456},
|
||||
{4.667572, -2.414213, -5.621354, -5.410322},
|
||||
{-4.667572, -2.414213, 4.319152, 4.510323},
|
||||
{4.384623, -2.414213, 4.397442, 4.588456},
|
||||
{-4.809045, -2.414213, 9.328549, 9.509711},
|
||||
{4.243149, -2.414213, 9.406840, 9.587846},
|
||||
});
|
||||
|
||||
SceneListSubmit(&data[0], data.size());
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
int main(int argc, char* argv[]) {
|
||||
// test_clip_case_000();
|
||||
test_clip_case_001();
|
||||
test_clip_case_010();
|
||||
test_clip_case_100();
|
||||
test_clip_case_110();
|
||||
test_clip_case_011();
|
||||
test_clip_case_101();
|
||||
test_clip_case_111();
|
||||
|
||||
test_start_behind();
|
||||
test_longer_strip();
|
||||
|
||||
return 0;
|
||||
}
|
451
tools/test.h
Normal file
451
tools/test.h
Normal file
|
@ -0,0 +1,451 @@
|
|||
/* * Copyright (c) 2011-2017 Luke Benstead https://simulant-engine.appspot.com
|
||||
*
|
||||
* This file is part of Simulant.
|
||||
*
|
||||
* Simulant is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Lesser General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* Simulant is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public License
|
||||
* along with Simulant. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <vector>
|
||||
#include <functional>
|
||||
#include <stdexcept>
|
||||
#include <iostream>
|
||||
#include <sstream>
|
||||
#include <algorithm>
|
||||
#include <fstream>
|
||||
#include <memory>
|
||||
|
||||
#define assert_equal(expected, actual) _assert_equal((expected), (actual), __FILE__, __LINE__)
|
||||
#define assert_not_equal(expected, actual) _assert_not_equal((expected), (actual), __FILE__, __LINE__)
|
||||
#define assert_false(actual) _assert_false((actual), __FILE__, __LINE__)
|
||||
#define assert_true(actual) _assert_true((actual), __FILE__, __LINE__)
|
||||
#define assert_close(expected, actual, difference) _assert_close((expected), (actual), (difference), __FILE__, __LINE__)
|
||||
#define assert_is_null(actual) _assert_is_null((actual), __FILE__, __LINE__)
|
||||
#define assert_is_not_null(actual) _assert_is_not_null((actual), __FILE__, __LINE__)
|
||||
#define assert_raises(exception, func) _assert_raises<exception>((func), __FILE__, __LINE__)
|
||||
#define assert_items_equal(expected, actual) _assert_items_equal((actual), (expected), __FILE__, __LINE__)
|
||||
#define not_implemented() _not_implemented(__FILE__, __LINE__)
|
||||
|
||||
|
||||
namespace test {
|
||||
|
||||
class StringFormatter {
|
||||
public:
|
||||
StringFormatter(const std::string& templ):
|
||||
templ_(templ) { }
|
||||
|
||||
struct Counter {
|
||||
Counter(uint32_t c): c(c) {}
|
||||
uint32_t c;
|
||||
};
|
||||
|
||||
template<typename T>
|
||||
std::string format(T value) {
|
||||
std::stringstream ss;
|
||||
ss << value;
|
||||
return _do_format(0, ss.str());
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
std::string format(Counter count, T value) {
|
||||
std::stringstream ss;
|
||||
ss << value;
|
||||
return _do_format(count.c, ss.str());
|
||||
}
|
||||
|
||||
template<typename T, typename... Args>
|
||||
std::string format(T value, const Args&... args) {
|
||||
std::stringstream ss;
|
||||
ss << value;
|
||||
return StringFormatter(_do_format(0, ss.str())).format(Counter(1), args...);
|
||||
}
|
||||
|
||||
template<typename T, typename... Args>
|
||||
std::string format(Counter count, T value, const Args&... args) {
|
||||
std::stringstream ss;
|
||||
ss << value;
|
||||
return StringFormatter(_do_format(count.c, ss.str())).format(Counter(count.c + 1), args...);
|
||||
}
|
||||
|
||||
std::string _do_format(uint32_t counter, const std::string& value) {
|
||||
std::stringstream ss; // Can't use to_string on all platforms
|
||||
ss << counter;
|
||||
|
||||
const std::string to_replace = "{" + ss.str() + "}";
|
||||
std::string output = templ_;
|
||||
|
||||
auto replace = [](std::string& str, const std::string& from, const std::string& to) -> bool {
|
||||
size_t start_pos = str.find(from);
|
||||
if(start_pos == std::string::npos)
|
||||
return false;
|
||||
str.replace(start_pos, from.length(), to);
|
||||
return true;
|
||||
};
|
||||
|
||||
replace(output, to_replace, value);
|
||||
return output;
|
||||
}
|
||||
|
||||
private:
|
||||
std::string templ_;
|
||||
};
|
||||
|
||||
class StringSplitter {
|
||||
public:
|
||||
StringSplitter(const std::string& str):
|
||||
str_(str) {
|
||||
|
||||
}
|
||||
|
||||
std::vector<std::string> split() {
|
||||
std::vector<std::string> result;
|
||||
std::string buffer;
|
||||
|
||||
for(auto c: str_) {
|
||||
if(c == '\n') {
|
||||
if(!buffer.empty()) {
|
||||
result.push_back(buffer);
|
||||
buffer.clear();
|
||||
}
|
||||
} else {
|
||||
buffer.push_back(c);
|
||||
}
|
||||
}
|
||||
|
||||
if(!buffer.empty()) {
|
||||
result.push_back(buffer);
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
private:
|
||||
std::string str_;
|
||||
};
|
||||
|
||||
typedef StringFormatter _Format;
|
||||
|
||||
class AssertionError : public std::logic_error {
|
||||
public:
|
||||
AssertionError(const std::string& what):
|
||||
std::logic_error(what),
|
||||
file(""),
|
||||
line(-1) {
|
||||
}
|
||||
|
||||
AssertionError(const std::pair<std::string, int> file_and_line, const std::string& what):
|
||||
std::logic_error(what),
|
||||
file(file_and_line.first),
|
||||
line(file_and_line.second) {
|
||||
|
||||
}
|
||||
|
||||
~AssertionError() noexcept (true) {
|
||||
|
||||
}
|
||||
|
||||
std::string file;
|
||||
int line;
|
||||
};
|
||||
|
||||
|
||||
class NotImplementedError: public std::logic_error {
|
||||
public:
|
||||
NotImplementedError(const std::string& file, int line):
|
||||
std::logic_error(_Format("Not implemented at {0}:{1}").format(file, line)) {}
|
||||
};
|
||||
|
||||
|
||||
class SkippedTestError: public std::logic_error {
|
||||
public:
|
||||
SkippedTestError(const std::string& reason):
|
||||
std::logic_error(reason) {
|
||||
|
||||
}
|
||||
};
|
||||
|
||||
class TestCase {
|
||||
public:
|
||||
virtual ~TestCase() {}
|
||||
|
||||
virtual void set_up() {}
|
||||
virtual void tear_down() {}
|
||||
|
||||
void skip_if(const bool& flag, const std::string& reason) {
|
||||
if(flag) { throw test::SkippedTestError(reason); }
|
||||
}
|
||||
|
||||
template<typename T, typename U>
|
||||
void _assert_equal(T expected, U actual, std::string file, int line) {
|
||||
if(expected != actual) {
|
||||
auto file_and_line = std::make_pair(file, line);
|
||||
throw test::AssertionError(file_and_line, test::_Format("{0} does not match {1}").format(actual, expected));
|
||||
}
|
||||
}
|
||||
|
||||
template<typename T, typename U>
|
||||
void _assert_not_equal(T lhs, U rhs, std::string file, int line) {
|
||||
if(lhs == (T) rhs) {
|
||||
auto file_and_line = std::make_pair(file, line);
|
||||
throw test::AssertionError(file_and_line, test::_Format("{0} should not match {1}").format(lhs, rhs));
|
||||
}
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
void _assert_true(T actual, std::string file, int line) {
|
||||
if(!bool(actual)) {
|
||||
auto file_and_line = std::make_pair(file, line);
|
||||
throw test::AssertionError(file_and_line, test::_Format("{0} is not true").format(bool(actual) ? "true" : "false"));
|
||||
}
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
void _assert_false(T actual, std::string file, int line) {
|
||||
if(bool(actual)) {
|
||||
auto file_and_line = std::make_pair(file, line);
|
||||
throw test::AssertionError(file_and_line, test::_Format("{0} is not false").format(bool(actual) ? "true" : "false"));
|
||||
}
|
||||
}
|
||||
|
||||
template<typename T, typename U, typename V>
|
||||
void _assert_close(T expected, U actual, V difference, std::string file, int line) {
|
||||
if(actual < expected - difference ||
|
||||
actual > expected + difference) {
|
||||
auto file_and_line = std::make_pair(file, line);
|
||||
throw test::AssertionError(file_and_line, test::_Format("{0} is not close enough to {1}").format(actual, expected));
|
||||
}
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
void _assert_is_null(T* thing, std::string file, int line) {
|
||||
if(thing != nullptr) {
|
||||
auto file_and_line = std::make_pair(file, line);
|
||||
throw test::AssertionError(file_and_line, "Pointer was not NULL");
|
||||
}
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
void _assert_is_not_null(T* thing, std::string file, int line) {
|
||||
if(thing == nullptr) {
|
||||
auto file_and_line = std::make_pair(file, line);
|
||||
throw test::AssertionError(file_and_line, "Pointer was unexpectedly NULL");
|
||||
}
|
||||
}
|
||||
|
||||
template<typename T, typename Func>
|
||||
void _assert_raises(Func func, std::string file, int line) {
|
||||
try {
|
||||
func();
|
||||
auto file_and_line = std::make_pair(file, line);
|
||||
throw test::AssertionError(file_and_line, test::_Format("Expected exception ({0}) was not thrown").format(typeid(T).name()));
|
||||
} catch(T& e) {}
|
||||
}
|
||||
|
||||
template<typename T, typename U>
|
||||
void _assert_items_equal(const T& lhs, const U& rhs, std::string file, int line) {
|
||||
auto file_and_line = std::make_pair(file, line);
|
||||
|
||||
if(lhs.size() != rhs.size()) {
|
||||
throw test::AssertionError(file_and_line, "Containers are not the same length");
|
||||
}
|
||||
|
||||
for(auto item: lhs) {
|
||||
if(std::find(rhs.begin(), rhs.end(), item) == rhs.end()) {
|
||||
throw test::AssertionError(file_and_line, test::_Format("Container does not contain {0}").format(item));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void _not_implemented(std::string file, int line) {
|
||||
throw test::NotImplementedError(file, line);
|
||||
}
|
||||
};
|
||||
|
||||
class TestRunner {
|
||||
public:
|
||||
template<typename T, typename U>
|
||||
void register_case(std::vector<U> methods, std::vector<std::string> names) {
|
||||
std::shared_ptr<TestCase> instance = std::make_shared<T>();
|
||||
|
||||
instances_.push_back(instance); //Hold on to it
|
||||
|
||||
for(std::string name: names) {
|
||||
names_.push_back(name);
|
||||
}
|
||||
|
||||
for(U& method: methods) {
|
||||
std::function<void()> func = std::bind(method, dynamic_cast<T*>(instance.get()));
|
||||
tests_.push_back([=]() {
|
||||
instance->set_up();
|
||||
try {
|
||||
func();
|
||||
} catch(...) {
|
||||
instance->tear_down();
|
||||
throw;
|
||||
}
|
||||
|
||||
instance->tear_down();
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
int32_t run(const std::string& test_case, const std::string& junit_output="") {
|
||||
int failed = 0;
|
||||
int skipped = 0;
|
||||
int ran = 0;
|
||||
int crashed = 0;
|
||||
|
||||
auto new_tests = tests_;
|
||||
auto new_names = names_;
|
||||
|
||||
if(!test_case.empty()) {
|
||||
new_tests.clear();
|
||||
new_names.clear();
|
||||
|
||||
for(uint32_t i = 0; i < names_.size(); ++i) {
|
||||
if(names_[i].find(test_case) == 0) {
|
||||
new_tests.push_back(tests_[i]);
|
||||
new_names.push_back(names_[i]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
std::cout << std::endl << "Running " << new_tests.size() << " tests" << std::endl << std::endl;
|
||||
|
||||
std::vector<std::string> junit_lines;
|
||||
junit_lines.push_back("<testsuites>\n");
|
||||
|
||||
std::string klass = "";
|
||||
|
||||
for(std::function<void ()> test: new_tests) {
|
||||
std::string name = new_names[ran];
|
||||
std::string this_klass(name.begin(), name.begin() + name.find_first_of(":"));
|
||||
bool close_klass = ran == (int) new_tests.size() - 1;
|
||||
|
||||
if(this_klass != klass) {
|
||||
if(!klass.empty()) {
|
||||
junit_lines.push_back(" </testsuite>\n");
|
||||
}
|
||||
klass = this_klass;
|
||||
junit_lines.push_back(" <testsuite name=\"" + this_klass + "\">\n");
|
||||
}
|
||||
|
||||
try {
|
||||
junit_lines.push_back(" <testcase name=\"" + new_names[ran] + "\">\n");
|
||||
std::string output = " " + new_names[ran];
|
||||
|
||||
for(int i = output.length(); i < 76; ++i) {
|
||||
output += " ";
|
||||
}
|
||||
|
||||
std::cout << output;
|
||||
test();
|
||||
std::cout << "\033[32m" << " OK " << "\033[0m" << std::endl;
|
||||
junit_lines.push_back(" </testcase>\n");
|
||||
} catch(test::NotImplementedError& e) {
|
||||
std::cout << "\033[34m" << " SKIPPED" << "\033[0m" << std::endl;
|
||||
++skipped;
|
||||
junit_lines.push_back(" </testcase>\n");
|
||||
} catch(test::SkippedTestError& e) {
|
||||
std::cout << "\033[34m" << " SKIPPED" << "\033[0m" << std::endl;
|
||||
++skipped;
|
||||
junit_lines.push_back(" </testcase>\n");
|
||||
} catch(test::AssertionError& e) {
|
||||
std::cout << "\033[33m" << " FAILED " << "\033[0m" << std::endl;
|
||||
std::cout << " " << e.what() << std::endl;
|
||||
if(!e.file.empty()) {
|
||||
std::cout << " " << e.file << ":" << e.line << std::endl;
|
||||
|
||||
std::ifstream ifs(e.file);
|
||||
if(ifs.good()) {
|
||||
std::string buffer;
|
||||
std::vector<std::string> lines;
|
||||
while(std::getline(ifs, buffer)) {
|
||||
lines.push_back(buffer);
|
||||
}
|
||||
|
||||
int line_count = lines.size();
|
||||
if(line_count && e.line <= line_count) {
|
||||
std::cout << lines.at(e.line - 1) << std::endl << std::endl;
|
||||
}
|
||||
}
|
||||
}
|
||||
++failed;
|
||||
|
||||
junit_lines.push_back(" <failure message=\"" + std::string(e.what()) + "\"/>\n");
|
||||
junit_lines.push_back(" </testcase>\n");
|
||||
} catch(std::exception& e) {
|
||||
std::cout << "\033[31m" << " EXCEPT " << std::endl;
|
||||
std::cout << " " << e.what() << "\033[0m" << std::endl;
|
||||
++crashed;
|
||||
|
||||
junit_lines.push_back(" <failure message=\"" + std::string(e.what()) + "\"/>\n");
|
||||
junit_lines.push_back(" </testcase>\n");
|
||||
}
|
||||
std::cout << "\033[0m";
|
||||
++ran;
|
||||
|
||||
if(close_klass) {
|
||||
junit_lines.push_back(" </testsuite>\n");
|
||||
}
|
||||
}
|
||||
|
||||
junit_lines.push_back("</testsuites>\n");
|
||||
|
||||
if(!junit_output.empty()) {
|
||||
FILE* f = fopen(junit_output.c_str(), "wt");
|
||||
if(f) {
|
||||
for(auto& line: junit_lines) {
|
||||
fwrite(line.c_str(), sizeof(char), line.length(), f);
|
||||
}
|
||||
}
|
||||
|
||||
fclose(f);
|
||||
}
|
||||
|
||||
std::cout << "-----------------------" << std::endl;
|
||||
if(!failed && !crashed && !skipped) {
|
||||
std::cout << "All tests passed" << std::endl << std::endl;
|
||||
} else {
|
||||
if(skipped) {
|
||||
std::cout << skipped << " tests skipped";
|
||||
}
|
||||
|
||||
if(failed) {
|
||||
if(skipped) {
|
||||
std::cout << ", ";
|
||||
}
|
||||
std::cout << failed << " tests failed";
|
||||
}
|
||||
|
||||
if(crashed) {
|
||||
if(failed) {
|
||||
std::cout << ", ";
|
||||
}
|
||||
std::cout << crashed << " tests crashed";
|
||||
}
|
||||
std::cout << std::endl << std::endl;
|
||||
}
|
||||
|
||||
return failed + crashed;
|
||||
}
|
||||
|
||||
private:
|
||||
std::vector<std::shared_ptr<TestCase>> instances_;
|
||||
std::vector<std::function<void()> > tests_;
|
||||
std::vector<std::string> names_;
|
||||
};
|
||||
} // test
|
||||
|
212
tools/test_generator.py
Executable file
212
tools/test_generator.py
Executable file
|
@ -0,0 +1,212 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
import argparse
|
||||
import re
|
||||
import sys
|
||||
|
||||
parser = argparse.ArgumentParser(description="Generate C++ unit tests")
|
||||
parser.add_argument("--output", type=str, nargs=1, help="The output source file for the generated test main()", required=True)
|
||||
parser.add_argument("test_files", type=str, nargs="+", help="The list of C++ files containing your tests")
|
||||
parser.add_argument("--verbose", help="Verbose logging", action="store_true", default=False)
|
||||
|
||||
|
||||
CLASS_REGEX = r"\s*class\s+(\w+)\s*([\:|,]\s*(?:public|private|protected)\s+[\w|::]+\s*)*"
|
||||
TEST_FUNC_REGEX = r"void\s+(?P<func_name>test_\S[^\(]+)\(\s*(void)?\s*\)"
|
||||
|
||||
|
||||
INCLUDE_TEMPLATE = "#include \"%(file_path)s\""
|
||||
|
||||
REGISTER_TEMPLATE = """
|
||||
runner->register_case<%(class_name)s>(
|
||||
std::vector<void (%(class_name)s::*)()>({%(members)s}),
|
||||
{%(names)s}
|
||||
);"""
|
||||
|
||||
MAIN_TEMPLATE = """
|
||||
|
||||
#include <functional>
|
||||
#include <memory>
|
||||
#include <map>
|
||||
|
||||
#include "tools/test.h"
|
||||
|
||||
%(includes)s
|
||||
|
||||
|
||||
std::map<std::string, std::string> parse_args(int argc, char* argv[]) {
|
||||
std::map<std::string, std::string> ret;
|
||||
|
||||
for(int i = 1; i < argc; ++i) {
|
||||
std::string arg = argv[i];
|
||||
|
||||
auto eq = arg.find('=');
|
||||
if(eq != std::string::npos && arg[0] == '-' && arg[1] == '-') {
|
||||
auto key = std::string(arg.begin(), arg.begin() + eq);
|
||||
auto value = std::string(arg.begin() + eq + 1, arg.end());
|
||||
ret[key] = value;
|
||||
} else if(arg[0] == '-' && arg[1] == '-') {
|
||||
auto key = arg;
|
||||
if(i < (argc - 1)) {
|
||||
auto value = argv[++i];
|
||||
ret[key] = value;
|
||||
} else {
|
||||
ret[key] = "";
|
||||
}
|
||||
} else {
|
||||
ret[arg] = ""; // Positional, not key=value
|
||||
}
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
int main(int argc, char* argv[]) {
|
||||
auto runner = std::make_shared<test::TestRunner>();
|
||||
|
||||
auto args = parse_args(argc, argv);
|
||||
|
||||
std::string junit_xml;
|
||||
auto junit_xml_it = args.find("--junit-xml");
|
||||
if(junit_xml_it != args.end()) {
|
||||
junit_xml = junit_xml_it->second;
|
||||
std::cout << " Outputting junit XML to: " << junit_xml << std::endl;
|
||||
args.erase(junit_xml_it);
|
||||
}
|
||||
|
||||
std::string test_case;
|
||||
if(args.size()) {
|
||||
test_case = args.begin()->first;
|
||||
}
|
||||
|
||||
%(registrations)s
|
||||
|
||||
return runner->run(test_case, junit_xml);
|
||||
}
|
||||
|
||||
|
||||
"""
|
||||
|
||||
VERBOSE = False
|
||||
|
||||
def log_verbose(message):
|
||||
if VERBOSE:
|
||||
print(message)
|
||||
|
||||
|
||||
def find_tests(files):
|
||||
|
||||
subclasses = []
|
||||
|
||||
# First pass, find all class definitions
|
||||
for path in files:
|
||||
with open(path, "rt") as f:
|
||||
source_file_data = f.read().replace("\r\n", "").replace("\n", "")
|
||||
|
||||
while True:
|
||||
match = re.search(CLASS_REGEX, source_file_data)
|
||||
if not match:
|
||||
break
|
||||
|
||||
class_name = match.group().split(":")[0].replace("class", "").strip()
|
||||
|
||||
try:
|
||||
parents = match.group().split(":", 1)[1]
|
||||
except IndexError:
|
||||
pass
|
||||
else:
|
||||
parents = [ x.strip() for x in parents.split(",") ]
|
||||
parents = [
|
||||
x.replace("public", "").replace("private", "").replace("protected", "").strip()
|
||||
for x in parents
|
||||
]
|
||||
|
||||
subclasses.append((path, class_name, parents, []))
|
||||
log_verbose("Found: %s" % str(subclasses[-1]))
|
||||
|
||||
start = match.end()
|
||||
|
||||
# Find the next opening brace
|
||||
while source_file_data[start] in (' ', '\t'):
|
||||
start += 1
|
||||
|
||||
start -= 1
|
||||
end = start
|
||||
if source_file_data[start+1] == '{':
|
||||
|
||||
class_data = []
|
||||
brace_counter = 1
|
||||
for i in range(start+2, len(source_file_data)):
|
||||
class_data.append(source_file_data[i])
|
||||
if class_data[-1] == '{': brace_counter += 1
|
||||
if class_data[-1] == '}': brace_counter -= 1
|
||||
if not brace_counter:
|
||||
end = i
|
||||
break
|
||||
|
||||
class_data = "".join(class_data)
|
||||
|
||||
while True:
|
||||
match = re.search(TEST_FUNC_REGEX, class_data)
|
||||
if not match:
|
||||
break
|
||||
|
||||
subclasses[-1][-1].append(match.group('func_name'))
|
||||
class_data = class_data[match.end():]
|
||||
|
||||
source_file_data = source_file_data[end:]
|
||||
|
||||
|
||||
# Now, simplify the list by finding all potential superclasses, and then keeping any classes
|
||||
# that subclass them.
|
||||
test_case_subclasses = []
|
||||
i = 0
|
||||
while i < len(subclasses):
|
||||
subclass_names = [x.rsplit("::")[-1] for x in subclasses[i][2]]
|
||||
|
||||
# If this subclasses TestCase, or it subclasses any of the already found testcase subclasses
|
||||
# then add it to the list
|
||||
if "TestCase" in subclass_names or "SimulantTestCase" in subclass_names or any(x[1] in subclasses[i][2] for x in test_case_subclasses):
|
||||
if subclasses[i] not in test_case_subclasses:
|
||||
test_case_subclasses.append(subclasses[i])
|
||||
|
||||
i = 0 # Go back to the start, as we may have just found another parent class
|
||||
continue
|
||||
i += 1
|
||||
|
||||
log_verbose("\n".join([str(x) for x in test_case_subclasses]))
|
||||
return test_case_subclasses
|
||||
|
||||
|
||||
def main():
|
||||
global VERBOSE
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
VERBOSE = args.verbose
|
||||
|
||||
testcases = find_tests(args.test_files)
|
||||
|
||||
includes = "\n".join([ INCLUDE_TEMPLATE % { 'file_path' : x } for x in set([y[0] for y in testcases]) ])
|
||||
registrations = []
|
||||
|
||||
for path, class_name, superclasses, funcs in testcases:
|
||||
BIND_TEMPLATE = "&%(class_name)s::%(func)s"
|
||||
|
||||
members = ", ".join([ BIND_TEMPLATE % { 'class_name' : class_name, 'func' : x } for x in funcs ])
|
||||
names = ", ".join([ '"%s::%s"' % (class_name, x) for x in funcs ])
|
||||
|
||||
registrations.append(REGISTER_TEMPLATE % { 'class_name' : class_name, 'members' : members, 'names' : names })
|
||||
|
||||
registrations = "\n".join(registrations)
|
||||
|
||||
final = MAIN_TEMPLATE % {
|
||||
'registrations' : registrations,
|
||||
'includes' : includes
|
||||
}
|
||||
|
||||
open(args.output[0], "w").write(final)
|
||||
|
||||
return 0
|
||||
|
||||
if __name__ == '__main__':
|
||||
sys.exit(main())
|
Loading…
Reference in New Issue
Block a user