feat: implement proper chanegs from profiling
- math - inlining
This commit is contained in:
parent
a2dcfcf997
commit
3a4f09bef2
|
@ -1,9 +1,8 @@
|
||||||
|
#pragma once
|
||||||
#ifndef CONFIG_H
|
#ifndef CONFIG_H
|
||||||
#define CONFIG_H
|
#define CONFIG_H
|
||||||
|
|
||||||
|
|
||||||
/* This figure is derived from the needs of Quake 1 */
|
/* This figure is derived from the needs of Quake 1 */
|
||||||
#define MAX_TEXTURE_COUNT 1088
|
#define MAX_TEXTURE_COUNT 1088
|
||||||
|
|
||||||
|
|
||||||
#endif // CONFIG_H
|
#endif // CONFIG_H
|
||||||
|
|
227
GL/cygprofile.c
Normal file
227
GL/cygprofile.c
Normal file
|
@ -0,0 +1,227 @@
|
||||||
|
/* Based on the idea from Erich Styger */
|
||||||
|
/* profiled instrument guided profiling for gldc on hardware */
|
||||||
|
|
||||||
|
#include "cygprofile.h"
|
||||||
|
#include <kos.h>
|
||||||
|
#include <stdbool.h>
|
||||||
|
#include <stdint.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include "perfctr.h"
|
||||||
|
#include "private.h"
|
||||||
|
|
||||||
|
#if CYG_FUNC_TRACE_ENABLED
|
||||||
|
|
||||||
|
#define _strcat(x, y, z) strncat(x, z, y)
|
||||||
|
|
||||||
|
#ifndef __PE_Error_H
|
||||||
|
#define __PE_Error_H
|
||||||
|
|
||||||
|
#define ERR_OK 0 /* OK */
|
||||||
|
#define ERR_SPEED 1 /* This device does not work in the active speed mode. */
|
||||||
|
#define ERR_RANGE 2 /* Parameter out of range. */
|
||||||
|
#define ERR_VALUE 3 /* Parameter of incorrect value. */
|
||||||
|
#define ERR_OVERFLOW 4 /* Timer overflow. */
|
||||||
|
#define ERR_MATH 5 /* Overflow during evaluation. */
|
||||||
|
#define ERR_ENABLED 6 /* Device is enabled. */
|
||||||
|
#define ERR_DISABLED 7 /* Device is disabled. */
|
||||||
|
#define ERR_BUSY 8 /* Device is busy. */
|
||||||
|
#define ERR_NOTAVAIL 9 /* Requested value or method not available. */
|
||||||
|
#define ERR_RXEMPTY 10 /* No data in receiver. */
|
||||||
|
#define ERR_TXFULL 11 /* Transmitter is full. */
|
||||||
|
#define ERR_BUSOFF 12 /* Bus not available. */
|
||||||
|
#define ERR_OVERRUN 13 /* Overrun error is detected. */
|
||||||
|
#define ERR_FRAMING 14 /* Framing error is detected. */
|
||||||
|
#define ERR_PARITY 15 /* Parity error is detected. */
|
||||||
|
#define ERR_NOISE 16 /* Noise error is detected. */
|
||||||
|
#define ERR_IDLE 17 /* Idle error is detectes. */
|
||||||
|
#define ERR_FAULT 18 /* Fault error is detected. */
|
||||||
|
#define ERR_BREAK 19 /* Break char is received during communication. */
|
||||||
|
#define ERR_CRC 20 /* CRC error is detected. */
|
||||||
|
#define ERR_ARBITR 21 /* A node losts arbitration. This error occurs if two nodes start transmission at the same time. */
|
||||||
|
#define ERR_PROTECT 22 /* Protection error is detected. */
|
||||||
|
|
||||||
|
#endif /* __PE_Error_H */
|
||||||
|
|
||||||
|
#define CYG_RNG_BUF_NOF_ELEMS (8096 * 4)
|
||||||
|
/*!< Number of elements in the ring buffer which is used to record function calls */
|
||||||
|
#define CYG_THUMB_MASK 0xFFFFFFFF
|
||||||
|
/*!< mask out LSB (thumb) bit */
|
||||||
|
|
||||||
|
/* Hashing function for two uint32_ts */
|
||||||
|
#define HASH_PAIR(x, y) (((x)*0x1f1f1f1f) ^ (y))
|
||||||
|
|
||||||
|
static bool CYG_Enabled = false; /*!< flag which enables/disables tracing */
|
||||||
|
|
||||||
|
/*!
|
||||||
|
* Element in ring buffer to store the trace information.
|
||||||
|
*/
|
||||||
|
typedef struct
|
||||||
|
{
|
||||||
|
//bool isEnter; /*!< TRUE for __cyg_profile_func_enter(), FALSE for __cyg_profile_func_exit() */
|
||||||
|
void *this_fn; /*!< address (with thumb bit) of the (caller) function */
|
||||||
|
void *call_site; /*!< return address to the function which called this_fn */
|
||||||
|
uint32_t counter; /* also contains isEnter as highest bit */
|
||||||
|
} CYG_RNG_ElementType;
|
||||||
|
|
||||||
|
typedef uint32_t CYG_RNG_BufSizeType; /*!< index type for ring buffer */
|
||||||
|
|
||||||
|
static CYG_RNG_ElementType CYG_RNG_buffer[CYG_RNG_BUF_NOF_ELEMS]; /*!< ring buffer */
|
||||||
|
//static CYG_RNG_BufSizeType CYG_RNG_inIdx; /*!< input index */
|
||||||
|
static CYG_RNG_BufSizeType CYG_RNG_outIdx; /*!< output index */
|
||||||
|
static CYG_RNG_BufSizeType CYG_RNG_inSize; /*!< size/number of elements in buffer */
|
||||||
|
|
||||||
|
/*!
|
||||||
|
* \brief Stores a trace element into the ring buffer.
|
||||||
|
* \param elem Trace element to put into the buffer.
|
||||||
|
* \return Error code, ERR_OK if everything is ok.
|
||||||
|
*/
|
||||||
|
__attribute__((no_instrument_function)) static uint8_t CYG_RNG_Put(CYG_RNG_ElementType *elem) {
|
||||||
|
uint8_t res = ERR_OK;
|
||||||
|
|
||||||
|
#if 0
|
||||||
|
if (CYG_RNG_inSize == CYG_RNG_BUF_NOF_ELEMS)
|
||||||
|
{
|
||||||
|
res = ERR_TXFULL;
|
||||||
|
CYG_RNG_inSize--;
|
||||||
|
CYG_PrintCallTrace();
|
||||||
|
//CYG_RNG_inIdx = 0;
|
||||||
|
CYG_RNG_outIdx = 0;
|
||||||
|
CYG_RNG_inSize = 0;
|
||||||
|
return CYG_RNG_Put(elem);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
//CYG_RNG_buffer[CYG_RNG_inIdx] = *elem;
|
||||||
|
|
||||||
|
/*
|
||||||
|
CYG_RNG_inIdx++;
|
||||||
|
if (CYG_RNG_inIdx == CYG_RNG_BUF_NOF_ELEMS)
|
||||||
|
{
|
||||||
|
CYG_RNG_inIdx = 0;
|
||||||
|
}
|
||||||
|
*/
|
||||||
|
CYG_RNG_inSize++;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
CYG_RNG_ElementType *possible = &CYG_RNG_buffer[HASH_PAIR((uint32_t)elem->call_site, (uint32_t)elem->this_fn) % CYG_RNG_BUF_NOF_ELEMS];
|
||||||
|
if (possible->counter /*& 0x0FFFFFFF*/ == 0) {
|
||||||
|
*possible = *elem;
|
||||||
|
} else {
|
||||||
|
possible->counter++;
|
||||||
|
}
|
||||||
|
return res;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*!
|
||||||
|
* \brief Gets a trace element from the ring buffer.
|
||||||
|
* \param elem Pointer where to store the trace element.
|
||||||
|
* \return Error code, ERR_OK if everything is ok.
|
||||||
|
*/
|
||||||
|
__attribute__((no_instrument_function)) static uint8_t CYG_RNG_Get(CYG_RNG_ElementType *elemP) {
|
||||||
|
uint8_t res = ERR_OK;
|
||||||
|
|
||||||
|
if (CYG_RNG_inSize == 0) {
|
||||||
|
res = ERR_RXEMPTY;
|
||||||
|
} else {
|
||||||
|
*elemP = CYG_RNG_buffer[CYG_RNG_outIdx];
|
||||||
|
CYG_RNG_inSize--;
|
||||||
|
CYG_RNG_outIdx++;
|
||||||
|
if (CYG_RNG_outIdx == CYG_RNG_BUF_NOF_ELEMS) {
|
||||||
|
CYG_RNG_outIdx = 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return res;
|
||||||
|
}
|
||||||
|
|
||||||
|
static uint32_t currentTime[2];
|
||||||
|
static uint32_t lastTime;
|
||||||
|
|
||||||
|
/*!
|
||||||
|
* \brief Stores a trace element into the ring buffer.
|
||||||
|
* \param this_fn Address of the caller function.
|
||||||
|
* \param call_site Return address to the function which called this_fn
|
||||||
|
* \return Error code, ERR_OK if everything is ok.
|
||||||
|
*/
|
||||||
|
__attribute__((no_instrument_function)) static void CYG_Store(void *this_fn, void *call_site) {
|
||||||
|
CYG_RNG_ElementType elem;
|
||||||
|
lastTime = currentTime[0];
|
||||||
|
PMCR_Read(1, (unsigned int *)currentTime);
|
||||||
|
//elem.isEnter = isEnter;
|
||||||
|
elem.call_site = call_site;
|
||||||
|
elem.this_fn = this_fn;
|
||||||
|
elem.counter = 1; //currentTime[0] - lastTime;
|
||||||
|
CYG_RNG_Put(&elem);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*!
|
||||||
|
* \brief Function which is called upon function enter. The function call is inserted by the compiler.
|
||||||
|
* \param this_fn Address of the caller function.
|
||||||
|
* \param call_site Return address to the function which called this_fn
|
||||||
|
*/
|
||||||
|
__attribute__((no_instrument_function)) void __cyg_profile_func_enter(void *this_fn, void *call_site) {
|
||||||
|
if (CYG_Enabled) {
|
||||||
|
CYG_Store(call_site, this_fn);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/*!
|
||||||
|
* \brief Function which is called upon function exit. The function call is inserted by the compiler.
|
||||||
|
* \param this_fn Address of the caller function.
|
||||||
|
* \param call_site Return address to the function which called this_fn
|
||||||
|
*/
|
||||||
|
__attribute__((no_instrument_function)) void __cyg_profile_func_exit(__attribute__((unused)) void *this_fn, __attribute__((unused)) void *call_site) {
|
||||||
|
}
|
||||||
|
|
||||||
|
/*!
|
||||||
|
* \brief Dumps the trace to the console.
|
||||||
|
*/
|
||||||
|
__attribute__((no_instrument_function)) void CYG_PrintCallTrace(void) {
|
||||||
|
CYG_RNG_BufSizeType i;
|
||||||
|
char buf[40];
|
||||||
|
CYG_RNG_ElementType elem;
|
||||||
|
uint8_t res;
|
||||||
|
|
||||||
|
CYG_Enabled = false;
|
||||||
|
printf("0x%08x\n", ((unsigned int)&_etext) - BASE_ADDRESS);
|
||||||
|
//printf("Function Trace:\r\n");
|
||||||
|
CYG_RNG_outIdx = 0;
|
||||||
|
for (i = 0; i < CYG_RNG_BUF_NOF_ELEMS; i++) {
|
||||||
|
buf[0] = '\0';
|
||||||
|
res = CYG_RNG_Get(&elem);
|
||||||
|
if (res == ERR_OK && elem.call_site != NULL) {
|
||||||
|
snprintf(buf, sizeof(buf), "{ 0x%" PRIXPTR " 0x%" PRIXPTR " %u\r\n", (uintptr_t)(elem.this_fn) & CYG_THUMB_MASK, (uintptr_t)(elem.call_site) & CYG_THUMB_MASK, (unsigned int)elem.counter);
|
||||||
|
|
||||||
|
printf(buf);
|
||||||
|
} else {
|
||||||
|
//printf("ERROR getting element!\r\n");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
//printf("Function Trace: done!\r\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
__attribute__((no_instrument_function)) void CYG_Init(void) {
|
||||||
|
if (CYG_Enabled) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
CYG_RNG_inSize = CYG_RNG_BUF_NOF_ELEMS;
|
||||||
|
CYG_RNG_outIdx = 0;
|
||||||
|
CYG_Enabled = true;
|
||||||
|
currentTime[0] = currentTime[1] = 0;
|
||||||
|
lastTime = 0;
|
||||||
|
memset(CYG_RNG_buffer, 0, sizeof(CYG_RNG_buffer));
|
||||||
|
PMCR_Init(1, PMCR_ELAPSED_TIME_MODE, PMCR_COUNT_CPU_CYCLES);
|
||||||
|
}
|
||||||
|
|
||||||
|
__attribute__((no_instrument_function)) void CYG_Deinit(void) {
|
||||||
|
CYG_RNG_inSize = CYG_RNG_BUF_NOF_ELEMS;
|
||||||
|
CYG_RNG_outIdx = 0;
|
||||||
|
CYG_Enabled = false;
|
||||||
|
memset(CYG_RNG_buffer, 0, sizeof(CYG_RNG_buffer));
|
||||||
|
}
|
||||||
|
#else
|
||||||
|
|
||||||
|
void CYG_PrintCallTrace(void){}
|
||||||
|
void CYG_Init(void){}
|
||||||
|
void CYG_Deinit(void){}
|
||||||
|
|
||||||
|
#endif
|
33
GL/cygprofile.h
Normal file
33
GL/cygprofile.h
Normal file
|
@ -0,0 +1,33 @@
|
||||||
|
#pragma once
|
||||||
|
#ifndef CYGPROFILE_H_
|
||||||
|
#define CYGPROFILE_H_
|
||||||
|
|
||||||
|
/* Based on the idea from Erich Styger */
|
||||||
|
/* profiled instrument guided profiling for gldc on hardware */
|
||||||
|
|
||||||
|
#define NO_INSTRUMENT inline __attribute__((no_instrument_function))
|
||||||
|
#define INLINE_DEBUG NO_INSTRUMENT __attribute__((always_inline))
|
||||||
|
#define INLINE_ALWAYS static NO_INSTRUMENT __attribute__((always_inline))
|
||||||
|
|
||||||
|
extern char _etext;
|
||||||
|
#define BASE_ADDRESS 0x8c010000
|
||||||
|
|
||||||
|
#define CYG_FUNC_TRACE_ENABLED (1)
|
||||||
|
/*!< 1: Trace enabled, 0: trace disabled */
|
||||||
|
|
||||||
|
/*!
|
||||||
|
* \brief Print the call trace to the terminal.
|
||||||
|
*/
|
||||||
|
void CYG_PrintCallTrace(void);
|
||||||
|
|
||||||
|
/*!
|
||||||
|
* \brief Driver Initialization.
|
||||||
|
*/
|
||||||
|
void CYG_Init(void);
|
||||||
|
|
||||||
|
/*!
|
||||||
|
* \brief Driver De-Initialization.
|
||||||
|
*/
|
||||||
|
void CYG_Deinit(void);
|
||||||
|
|
||||||
|
#endif /* CYGPROFILE_H_ */
|
34
GL/draw.c
34
GL/draw.c
|
@ -56,7 +56,7 @@ void _glInitAttributePointers() {
|
||||||
NORMAL_POINTER.size = 3;
|
NORMAL_POINTER.size = 3;
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline GLuint byte_size(GLenum type) {
|
static INLINE_DEBUG GLuint byte_size(GLenum type) {
|
||||||
switch(type) {
|
switch(type) {
|
||||||
case GL_BYTE: return sizeof(GLbyte);
|
case GL_BYTE: return sizeof(GLbyte);
|
||||||
case GL_UNSIGNED_BYTE: return sizeof(GLubyte);
|
case GL_UNSIGNED_BYTE: return sizeof(GLubyte);
|
||||||
|
@ -513,7 +513,7 @@ PVRHeader* _glSubmissionTargetHeader(SubmissionTarget* target) {
|
||||||
return aligned_vector_at(&target->output->vector, target->header_offset);
|
return aligned_vector_at(&target->output->vector, target->header_offset);
|
||||||
}
|
}
|
||||||
|
|
||||||
Vertex* _glSubmissionTargetStart(SubmissionTarget* target) {
|
INLINE_DEBUG Vertex* _glSubmissionTargetStart(SubmissionTarget* target) {
|
||||||
assert(target->start_offset < target->output->vector.size);
|
assert(target->start_offset < target->output->vector.size);
|
||||||
return aligned_vector_at(&target->output->vector, target->start_offset);
|
return aligned_vector_at(&target->output->vector, target->start_offset);
|
||||||
}
|
}
|
||||||
|
@ -1006,6 +1006,7 @@ static void mat_transform_normal3(const float* xyz, const float* xyzOut, const u
|
||||||
|
|
||||||
static void light(SubmissionTarget* target) {
|
static void light(SubmissionTarget* target) {
|
||||||
|
|
||||||
|
#if 0
|
||||||
typedef struct {
|
typedef struct {
|
||||||
float xyz[3];
|
float xyz[3];
|
||||||
float n[3];
|
float n[3];
|
||||||
|
@ -1057,6 +1058,35 @@ static void light(SubmissionTarget* target) {
|
||||||
vertex->bgra[G8IDX] = (GLubyte) (255.0f * fminf(total[1], 1.0f));
|
vertex->bgra[G8IDX] = (GLubyte) (255.0f * fminf(total[1], 1.0f));
|
||||||
vertex->bgra[B8IDX] = (GLubyte) (255.0f * fminf(total[2], 1.0f));
|
vertex->bgra[B8IDX] = (GLubyte) (255.0f * fminf(total[2], 1.0f));
|
||||||
}
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
if(!_glIsLightingEnabled()) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
static AlignedVector* eye_space_data = NULL;
|
||||||
|
|
||||||
|
if(!eye_space_data) {
|
||||||
|
eye_space_data = (AlignedVector*) malloc(sizeof(AlignedVector));
|
||||||
|
aligned_vector_init(eye_space_data, sizeof(EyeSpaceData));
|
||||||
|
}
|
||||||
|
|
||||||
|
aligned_vector_resize(eye_space_data, target->count);
|
||||||
|
|
||||||
|
/* Perform lighting calculations and manipulate the colour */
|
||||||
|
Vertex* vertex = _glSubmissionTargetStart(target);
|
||||||
|
VertexExtra* extra = aligned_vector_at(target->extras, 0);
|
||||||
|
EyeSpaceData* eye_space = (EyeSpaceData*) eye_space_data->data;
|
||||||
|
|
||||||
|
_glMatrixLoadModelView();
|
||||||
|
mat_transform3(vertex->xyz, eye_space->xyz, target->count, sizeof(Vertex), sizeof(EyeSpaceData));
|
||||||
|
|
||||||
|
_glMatrixLoadNormal();
|
||||||
|
mat_transform_normal3(extra->nxyz, eye_space->n, target->count, sizeof(VertexExtra), sizeof(EyeSpaceData));
|
||||||
|
|
||||||
|
EyeSpaceData* ES = aligned_vector_at(eye_space_data, 0);
|
||||||
|
_glPerformLighting(vertex, ES, target->count);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static void divide(SubmissionTarget* target) {
|
static void divide(SubmissionTarget* target) {
|
||||||
|
|
|
@ -1,5 +1,6 @@
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
#include "private.h"
|
#include "private.h"
|
||||||
|
#include "config.h"
|
||||||
#include "../include/glkos.h"
|
#include "../include/glkos.h"
|
||||||
#include "../include/glext.h"
|
#include "../include/glext.h"
|
||||||
|
|
||||||
|
@ -94,62 +95,62 @@ void APIENTRY glFramebufferTexture2DEXT(GLenum target, GLenum attachment, GLenum
|
||||||
ACTIVE_FRAMEBUFFER->texture_id = texture;
|
ACTIVE_FRAMEBUFFER->texture_id = texture;
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline GLuint A1555(GLuint v) {
|
static INLINE_DEBUG GLuint A1555(GLuint v) {
|
||||||
const GLuint MASK = (1 << 15);
|
const GLuint MASK = (1 << 15);
|
||||||
return (v & MASK) >> 15;
|
return (v & MASK) >> 15;
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline GLuint R1555(GLuint v) {
|
static INLINE_DEBUG GLuint R1555(GLuint v) {
|
||||||
const GLuint MASK = (31 << 10);
|
const GLuint MASK = (31 << 10);
|
||||||
return (v & MASK) >> 10;
|
return (v & MASK) >> 10;
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline GLuint G1555(GLuint v) {
|
static INLINE_DEBUG GLuint G1555(GLuint v) {
|
||||||
const GLuint MASK = (31 << 5);
|
const GLuint MASK = (31 << 5);
|
||||||
return (v & MASK) >> 5;
|
return (v & MASK) >> 5;
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline GLuint B1555(GLuint v) {
|
static INLINE_DEBUG GLuint B1555(GLuint v) {
|
||||||
const GLuint MASK = (31 << 0);
|
const GLuint MASK = (31 << 0);
|
||||||
return (v & MASK) >> 0;
|
return (v & MASK) >> 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline GLuint A4444(GLuint v) {
|
static INLINE_DEBUG GLuint A4444(GLuint v) {
|
||||||
const GLuint MASK = (0xF << 12);
|
const GLuint MASK = (0xF << 12);
|
||||||
return (v & MASK) >> 12;
|
return (v & MASK) >> 12;
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline GLuint R4444(GLuint v) {
|
static INLINE_DEBUG GLuint R4444(GLuint v) {
|
||||||
const GLuint MASK = (0xF << 8);
|
const GLuint MASK = (0xF << 8);
|
||||||
return (v & MASK) >> 8;
|
return (v & MASK) >> 8;
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline GLuint G4444(GLuint v) {
|
static INLINE_DEBUG GLuint G4444(GLuint v) {
|
||||||
const GLuint MASK = (0xF << 4);
|
const GLuint MASK = (0xF << 4);
|
||||||
return (v & MASK) >> 4;
|
return (v & MASK) >> 4;
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline GLuint B4444(GLuint v) {
|
static INLINE_DEBUG GLuint B4444(GLuint v) {
|
||||||
const GLuint MASK = (0xF << 0);
|
const GLuint MASK = (0xF << 0);
|
||||||
return (v & MASK) >> 0;
|
return (v & MASK) >> 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline GLuint R565(GLuint v) {
|
static INLINE_DEBUG GLuint R565(GLuint v) {
|
||||||
const GLuint MASK = (31 << 11);
|
const GLuint MASK = (31 << 11);
|
||||||
return (v & MASK) >> 11;
|
return (v & MASK) >> 11;
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline GLuint G565(GLuint v) {
|
static INLINE_DEBUG GLuint G565(GLuint v) {
|
||||||
const GLuint MASK = (63 << 5);
|
const GLuint MASK = (63 << 5);
|
||||||
return (v & MASK) >> 5;
|
return (v & MASK) >> 5;
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline GLuint B565(GLuint v) {
|
static INLINE_DEBUG GLuint B565(GLuint v) {
|
||||||
const GLuint MASK = (31 << 0);
|
const GLuint MASK = (31 << 0);
|
||||||
return (v & MASK) >> 0;
|
return (v & MASK) >> 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
GLboolean _glCalculateAverageTexel(const GLubyte* src, const GLuint srcWidth, const GLuint pvrFormat, GLubyte* dest) {
|
static NO_INSTRUMENT GLboolean _glCalculateAverageTexel(const GLubyte* src, const GLuint srcWidth, const GLuint pvrFormat, GLubyte* dest) {
|
||||||
GLushort* s1 = ((GLushort*) src);
|
GLushort* s1 = ((GLushort*) src);
|
||||||
GLushort* s2 = ((GLushort*) src) + 1;
|
GLushort* s2 = ((GLushort*) src) + 1;
|
||||||
GLushort* s3 = ((GLushort*) src) + srcWidth;
|
GLushort* s3 = ((GLushort*) src) + srcWidth;
|
||||||
|
|
|
@ -19,3 +19,7 @@
|
||||||
#include "matrix.c"
|
#include "matrix.c"
|
||||||
#include "state.c"
|
#include "state.c"
|
||||||
#include "texture.c"
|
#include "texture.c"
|
||||||
|
|
||||||
|
#include "../containers/stack.c"
|
||||||
|
#include "../containers/aligned_vector.c"
|
||||||
|
#include "../containers/named_array.c"
|
233
GL/lighting.c
233
GL/lighting.c
|
@ -281,98 +281,143 @@ static inline float FPOW(float b, float p) {
|
||||||
return FEXP(FLOG(b) * p);
|
return FEXP(FLOG(b) * p);
|
||||||
}
|
}
|
||||||
|
|
||||||
void _glCalculateLightingContribution(const GLint light, const GLfloat* pos, const GLfloat* normal, uint8_t* bgra, GLfloat* colour) __attribute__((optimize("fast-math")));
|
#define LIGHT_COMPONENT(C) { \
|
||||||
void _glCalculateLightingContribution(const GLint light, const GLfloat* pos, const GLfloat* normal, uint8_t* bgra, GLfloat* colour) {
|
const GLfloat* acm = &MA[C]; \
|
||||||
LightSource* l = &LIGHTS[light];
|
const GLfloat* dcm = &MD[C]; \
|
||||||
|
const GLfloat* scm = &MS[C]; \
|
||||||
struct vec3f L = {
|
const GLfloat* scli = &light->specular[C]; \
|
||||||
l->position[0],
|
const GLfloat* dcli = &light->diffuse[C]; \
|
||||||
l->position[1],
|
const GLfloat* acli = &light->ambient[C]; \
|
||||||
l->position[2]
|
const GLfloat* srm = &MATERIAL.exponent; \
|
||||||
};
|
const GLfloat fi = (LdotN == 0) ? 0 : 1; \
|
||||||
|
GLfloat component = (*acm * *acli); \
|
||||||
if(!l->is_directional) {
|
component += (LdotN * *dcm * *dcli); \
|
||||||
L.x -= pos[0];
|
component += (FPOW((fi * NdotH), *srm) * *scm * *scli); \
|
||||||
L.y -= pos[1];
|
component *= att; \
|
||||||
L.z -= pos[2];
|
component *= spot; \
|
||||||
}
|
final[C] += component; \
|
||||||
|
|
||||||
struct vec3f N = {
|
|
||||||
normal[0],
|
|
||||||
normal[1],
|
|
||||||
normal[2]
|
|
||||||
};
|
|
||||||
|
|
||||||
struct vec3f V = {
|
|
||||||
pos[0],
|
|
||||||
pos[1],
|
|
||||||
pos[2]
|
|
||||||
};
|
|
||||||
|
|
||||||
GLfloat d;
|
|
||||||
vec3f_length(L.x, L.y, L.z, d);
|
|
||||||
|
|
||||||
GLfloat oneOverL = 1.0f / d;
|
|
||||||
|
|
||||||
L.x *= oneOverL;
|
|
||||||
L.y *= oneOverL;
|
|
||||||
L.z *= oneOverL;
|
|
||||||
|
|
||||||
vec3f_normalize(V.x, V.y, V.z);
|
|
||||||
|
|
||||||
GLfloat NdotL, VdotN;
|
|
||||||
vec3f_dot(N.x, N.y, N.z, L.x, L.y, L.z, NdotL);
|
|
||||||
vec3f_dot(V.x, V.y, V.z, N.x, N.y, N.z, VdotN);
|
|
||||||
|
|
||||||
GLfloat VdotR = VdotN - NdotL;
|
|
||||||
GLfloat specularPower = FPOW(VdotR > 0 ? VdotR : 0, MATERIAL.exponent);
|
|
||||||
|
|
||||||
GLboolean colorMaterial = _glIsColorMaterialEnabled();
|
|
||||||
|
|
||||||
GLfloat mD [] = {
|
|
||||||
(colorMaterial && isDiffuseColorMaterial()) ? ((GLfloat)bgra[R8IDX]) / 255.0f : MATERIAL.diffuse[0],
|
|
||||||
(colorMaterial && isDiffuseColorMaterial()) ? ((GLfloat)bgra[G8IDX]) / 255.0f : MATERIAL.diffuse[1],
|
|
||||||
(colorMaterial && isDiffuseColorMaterial()) ? ((GLfloat)bgra[B8IDX]) / 255.0f : MATERIAL.diffuse[2],
|
|
||||||
(colorMaterial && isDiffuseColorMaterial()) ? ((GLfloat)bgra[A8IDX]) / 255.0f : MATERIAL.diffuse[3]
|
|
||||||
};
|
|
||||||
|
|
||||||
GLfloat mA [] = {
|
|
||||||
(colorMaterial && isAmbientColorMaterial()) ? ((GLfloat)bgra[R8IDX]) / 255.0f : MATERIAL.ambient[0],
|
|
||||||
(colorMaterial && isAmbientColorMaterial()) ? ((GLfloat)bgra[G8IDX]) / 255.0f : MATERIAL.ambient[1],
|
|
||||||
(colorMaterial && isAmbientColorMaterial()) ? ((GLfloat)bgra[B8IDX]) / 255.0f : MATERIAL.ambient[2],
|
|
||||||
(colorMaterial && isAmbientColorMaterial()) ? ((GLfloat)bgra[A8IDX]) / 255.0f : MATERIAL.ambient[3]
|
|
||||||
};
|
|
||||||
|
|
||||||
GLfloat mS [] = {
|
|
||||||
(colorMaterial && isSpecularColorMaterial()) ? ((GLfloat)bgra[R8IDX]) / 255.0f : MATERIAL.specular[0],
|
|
||||||
(colorMaterial && isSpecularColorMaterial()) ? ((GLfloat)bgra[G8IDX]) / 255.0f : MATERIAL.specular[1],
|
|
||||||
(colorMaterial && isSpecularColorMaterial()) ? ((GLfloat)bgra[B8IDX]) / 255.0f : MATERIAL.specular[2],
|
|
||||||
(colorMaterial && isSpecularColorMaterial()) ? ((GLfloat)bgra[A8IDX]) / 255.0f : MATERIAL.specular[3]
|
|
||||||
};
|
|
||||||
|
|
||||||
colour[0] = l->ambient[0] * mA[0];
|
|
||||||
colour[1] = l->ambient[1] * mA[1];
|
|
||||||
colour[2] = l->ambient[2] * mA[2];
|
|
||||||
colour[3] = mD[3];
|
|
||||||
|
|
||||||
if(NdotL >= 0) {
|
|
||||||
colour[0] += (l->diffuse[0] * mD[0] * NdotL + l->specular[0] * mS[0] * specularPower);
|
|
||||||
colour[1] += (l->diffuse[1] * mD[1] * NdotL + l->specular[1] * mS[1] * specularPower);
|
|
||||||
colour[2] += (l->diffuse[2] * mD[2] * NdotL + l->specular[2] * mS[2] * specularPower);
|
|
||||||
}
|
|
||||||
|
|
||||||
if(!l->is_directional) {
|
|
||||||
GLfloat att = (
|
|
||||||
1.0f / (l->constant_attenuation + (l->linear_attenuation * d) + (l->quadratic_attenuation * d * d))
|
|
||||||
);
|
|
||||||
|
|
||||||
colour[0] *= att;
|
|
||||||
colour[1] *= att;
|
|
||||||
colour[2] *= att;
|
|
||||||
}
|
|
||||||
|
|
||||||
if(colour[0] > 1.0f) colour[0] = 1.0f;
|
|
||||||
if(colour[1] > 1.0f) colour[1] = 1.0f;
|
|
||||||
if(colour[2] > 1.0f) colour[2] = 1.0f;
|
|
||||||
if(colour[3] > 1.0f) colour[3] = 1.0f;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static inline float vec3_dot_limited(
|
||||||
|
const float* x1, const float* y1, const float* z1,
|
||||||
|
const float* x2, const float* y2, const float* z2) {
|
||||||
|
|
||||||
|
float ret;
|
||||||
|
vec3f_dot(*x1, *y1, *z1, *x2, *y2, *z2, ret);
|
||||||
|
return (ret < 0) ? 0 : ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
void _glPerformLighting(Vertex* vertices, const EyeSpaceData* es, const int32_t count) {
|
||||||
|
int8_t i;
|
||||||
|
int32_t j;
|
||||||
|
|
||||||
|
const LightSource* light = NULL;
|
||||||
|
|
||||||
|
const GLboolean colorMaterial = _glIsColorMaterialEnabled();
|
||||||
|
const GLboolean isDiffuseCM = isDiffuseColorMaterial();
|
||||||
|
const GLboolean isAmbientCM = isAmbientColorMaterial();
|
||||||
|
const GLboolean isSpecularCM = isSpecularColorMaterial();
|
||||||
|
|
||||||
|
static GLfloat CM[4];
|
||||||
|
|
||||||
|
/* So the DC has 16 floating point registers, that means
|
||||||
|
* we need to limit the number of floats as much as possible
|
||||||
|
* to give the compiler a good enough chance to do the right
|
||||||
|
* thing */
|
||||||
|
|
||||||
|
Vertex* vertex = vertices;
|
||||||
|
const EyeSpaceData* data = es;
|
||||||
|
|
||||||
|
static const float ONE_OVER_255 = 1.0f / 255.0f;
|
||||||
|
|
||||||
|
for(j = 0; j < count; ++j, ++vertex, ++data) {
|
||||||
|
/* When GL_COLOR_MATERIAL is on, we need to pull out
|
||||||
|
* the passed in diffuse and use it */
|
||||||
|
const GLfloat* MD = MATERIAL.diffuse;
|
||||||
|
const GLfloat* MA = MATERIAL.ambient;
|
||||||
|
const GLfloat* MS = MATERIAL.specular;
|
||||||
|
|
||||||
|
if(colorMaterial) {
|
||||||
|
CM[0] = ((GLfloat) vertex->bgra[R8IDX]) * ONE_OVER_255;
|
||||||
|
CM[1] = ((GLfloat) vertex->bgra[G8IDX]) * ONE_OVER_255;
|
||||||
|
CM[2] = ((GLfloat) vertex->bgra[B8IDX]) * ONE_OVER_255;
|
||||||
|
CM[3] = ((GLfloat) vertex->bgra[A8IDX]) * ONE_OVER_255;
|
||||||
|
|
||||||
|
MD = (isDiffuseCM) ? CM : MATERIAL.diffuse;
|
||||||
|
MA = (isAmbientCM) ? CM : MATERIAL.ambient;
|
||||||
|
MS = (isSpecularCM) ? CM : MATERIAL.specular;
|
||||||
|
}
|
||||||
|
|
||||||
|
float final[4];
|
||||||
|
|
||||||
|
/* Initial, non-light related values */
|
||||||
|
final[0] = (SCENE_AMBIENT[0] * MA[0]) + MATERIAL.emissive[0];
|
||||||
|
final[1] = (SCENE_AMBIENT[1] * MA[1]) + MATERIAL.emissive[1];
|
||||||
|
final[2] = (SCENE_AMBIENT[2] * MA[2]) + MATERIAL.emissive[2];
|
||||||
|
final[3] = MD[3];
|
||||||
|
|
||||||
|
float Vx, Vy, Vz;
|
||||||
|
Vx = -data->xyz[0];
|
||||||
|
Vy = -data->xyz[1];
|
||||||
|
Vz = -data->xyz[2];
|
||||||
|
vec3f_normalize(Vx, Vy, Vz);
|
||||||
|
|
||||||
|
for(i = 0; i < MAX_LIGHTS; ++i) {
|
||||||
|
if(!_glIsLightEnabled(i)) continue;
|
||||||
|
|
||||||
|
/* Calc light specific parameters */
|
||||||
|
light = &LIGHTS[i];
|
||||||
|
|
||||||
|
float Lx, Ly, Lz, D;
|
||||||
|
float Hx, Hy, Hz;
|
||||||
|
const float* Nx = &data->n[0];
|
||||||
|
const float* Ny = &data->n[1];
|
||||||
|
const float* Nz = &data->n[2];
|
||||||
|
|
||||||
|
Lx = light->position[0] - data->xyz[0];
|
||||||
|
Ly = light->position[1] - data->xyz[1];
|
||||||
|
Lz = light->position[2] - data->xyz[2];
|
||||||
|
vec3f_length(Lx, Ly, Lz, D);
|
||||||
|
|
||||||
|
{
|
||||||
|
/* Normalize L - scoping ensures Llen is temporary */
|
||||||
|
const float Llen = 1.0f / D;
|
||||||
|
Lx *= Llen;
|
||||||
|
Ly *= Llen;
|
||||||
|
Lz *= Llen;
|
||||||
|
}
|
||||||
|
|
||||||
|
Hx = (Lx + Vx);
|
||||||
|
Hy = (Ly + Vy);
|
||||||
|
Hz = (Lz + Vz);
|
||||||
|
vec3f_normalize(Hx, Hy, Hz);
|
||||||
|
|
||||||
|
const float LdotN = vec3_dot_limited(
|
||||||
|
&Lx, &Ly, &Lz,
|
||||||
|
Nx, Ny, Nz
|
||||||
|
);
|
||||||
|
|
||||||
|
const float NdotH = vec3_dot_limited(
|
||||||
|
Nx, Ny, Nz,
|
||||||
|
&Hx, &Hy, &Hz
|
||||||
|
);
|
||||||
|
|
||||||
|
const float att = (
|
||||||
|
light->position[3] == 0.0f) ? 1.0f :
|
||||||
|
1.0f / (light->constant_attenuation + (light->linear_attenuation * D) + (light->quadratic_attenuation * D * D)
|
||||||
|
);
|
||||||
|
|
||||||
|
const float spot = 1.0f;
|
||||||
|
|
||||||
|
LIGHT_COMPONENT(0);
|
||||||
|
LIGHT_COMPONENT(1);
|
||||||
|
LIGHT_COMPONENT(2);
|
||||||
|
}
|
||||||
|
|
||||||
|
vertex->bgra[R8IDX] = (GLubyte)(fminf(final[0] * 255.0f, 255.0f));
|
||||||
|
vertex->bgra[G8IDX] = (GLubyte)(fminf(final[1] * 255.0f, 255.0f));
|
||||||
|
vertex->bgra[B8IDX] = (GLubyte)(fminf(final[2] * 255.0f, 255.0f));
|
||||||
|
vertex->bgra[A8IDX] = (GLubyte)(fminf(final[3] * 255.0f, 255.0f));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
109
GL/matrix.c
109
GL/matrix.c
|
@ -476,84 +476,57 @@ void APIENTRY glDepthRange(GLclampf n, GLclampf f) {
|
||||||
DEPTH_RANGE_MULTIPLIER_H = (n + f) / 2.0f;
|
DEPTH_RANGE_MULTIPLIER_H = (n + f) / 2.0f;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#include "sh4_math.h"
|
||||||
|
|
||||||
/* Vector Cross Product - Used by glhLookAtf2 */
|
/* Vector Cross Product - Used by glhLookAtf2 */
|
||||||
static inline void vec3f_cross(const GLfloat* v1, const GLfloat* v2, GLfloat* result) {
|
static inline void vec3f_cross(GLfloat* v1, GLfloat* v2, GLfloat* result) {
|
||||||
result[0] = v1[1] * v2[2] - v1[2] * v2[1];
|
result[0] = (v1[1] * v2[2]) - (v1[2] * v2[1]);
|
||||||
result[1] = v1[2] * v2[0] - v1[0] * v2[2];
|
result[1] = (v1[2] * v2[0]) - (v1[0] * v2[2]);
|
||||||
result[2] = v1[0] * v2[1] - v1[1] * v2[0];
|
result[2] = (v1[0] * v2[1]) - (v1[1] * v2[0]);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* glhLookAtf2 adapted from http://www.opengl.org/wiki/GluLookAt_code */
|
|
||||||
void glhLookAtf2(const GLfloat* eyePosition3D,
|
|
||||||
const GLfloat* center3D,
|
|
||||||
const GLfloat* upVector3D) {
|
|
||||||
|
|
||||||
/* Look-At Matrix */
|
static inline void vec3f_normalize_sh4(float *v){
|
||||||
static Matrix4x4 MatrixLookAt __attribute__((aligned(32))) = {
|
float length, ilength;
|
||||||
1.0f, 0.0f, 0.0f, 0.0f,
|
|
||||||
0.0f, 1.0f, 0.0f, 0.0f,
|
|
||||||
0.0f, 0.0f, 1.0f, 0.0f,
|
|
||||||
0.0f, 0.0f, 0.0f, 1.0f
|
|
||||||
};
|
|
||||||
|
|
||||||
GLfloat forward[3];
|
ilength = MATH_fsrra(v[0]*v[0] + v[1]*v[1] + v[2]*v[2]);
|
||||||
GLfloat side[3];
|
length = MATH_Invert(ilength);
|
||||||
GLfloat up[3];
|
if (length)
|
||||||
|
{
|
||||||
vec3f_sub_normalize(center3D[0], center3D[1], center3D[2],
|
v[0] *= ilength;
|
||||||
eyePosition3D[0], eyePosition3D[1], eyePosition3D[2],
|
v[1] *= ilength;
|
||||||
forward[0], forward[1], forward[2]);
|
v[2] *= ilength;
|
||||||
|
}
|
||||||
//Side = forward x up
|
|
||||||
vec3f_cross(forward, upVector3D, side);
|
|
||||||
vec3f_normalize(side[0], side[1], side[2]);
|
|
||||||
|
|
||||||
//Recompute up as: up = side x forward
|
|
||||||
vec3f_cross(side, forward, up);
|
|
||||||
|
|
||||||
MatrixLookAt[M0] = side[0];
|
|
||||||
MatrixLookAt[M4] = side[1];
|
|
||||||
MatrixLookAt[M8] = side[2];
|
|
||||||
MatrixLookAt[M12] = 0;
|
|
||||||
|
|
||||||
MatrixLookAt[M1] = up[0];
|
|
||||||
MatrixLookAt[M5] = up[1];
|
|
||||||
MatrixLookAt[M9] = up[2];
|
|
||||||
MatrixLookAt[M13] = 0;
|
|
||||||
|
|
||||||
MatrixLookAt[M2] = -forward[0];
|
|
||||||
MatrixLookAt[M6] = -forward[1];
|
|
||||||
MatrixLookAt[M10] = -forward[2];
|
|
||||||
MatrixLookAt[M14] = 0;
|
|
||||||
|
|
||||||
MatrixLookAt[M3] = MatrixLookAt[11] = MatrixLookAt[15] = 0;
|
|
||||||
MatrixLookAt[M15] = 1;
|
|
||||||
|
|
||||||
static Matrix4x4 trn __attribute__((aligned(32))) = {
|
|
||||||
1.0f, 0.0f, 0.0f, 0.0f,
|
|
||||||
0.0f, 1.0f, 0.0f, 0.0f,
|
|
||||||
0.0f, 0.0f, 1.0f, 0.0f,
|
|
||||||
0.0f, 0.0f, 0.0f, 1.0f
|
|
||||||
};
|
|
||||||
|
|
||||||
trn[M12] = -eyePosition3D[0];
|
|
||||||
trn[M13] = -eyePosition3D[1];
|
|
||||||
trn[M14] = -eyePosition3D[2];
|
|
||||||
|
|
||||||
// Does not modify internal Modelview matrix
|
|
||||||
upload_matrix(&MatrixLookAt);
|
|
||||||
multiply_matrix(&trn);
|
|
||||||
multiply_matrix(stack_top(MATRIX_STACKS + (GL_MODELVIEW & 0xF)));
|
|
||||||
download_matrix(stack_top(MATRIX_STACKS + (GL_MODELVIEW & 0xF)));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void gluLookAt(GLfloat eyex, GLfloat eyey, GLfloat eyez, GLfloat centerx,
|
void gluLookAt(GLfloat eyex, GLfloat eyey, GLfloat eyez, GLfloat centerx,
|
||||||
GLfloat centery, GLfloat centerz, GLfloat upx, GLfloat upy,
|
GLfloat centery, GLfloat centerz, GLfloat upx, GLfloat upy,
|
||||||
GLfloat upz) {
|
GLfloat upz) {
|
||||||
GLfloat eye [] = { eyex, eyey, eyez };
|
GLfloat m [16];
|
||||||
GLfloat point [] = { centerx, centery, centerz };
|
GLfloat f [3];
|
||||||
GLfloat up [] = { upx, upy, upz };
|
GLfloat u [3];
|
||||||
glhLookAtf2(eye, point, up);
|
GLfloat s [3];
|
||||||
|
|
||||||
|
f[0] = centerx - eyex;
|
||||||
|
f[1] = centery - eyey;
|
||||||
|
f[2] = centerz - eyez;
|
||||||
|
|
||||||
|
u[0] = upx;
|
||||||
|
u[1] = upy;
|
||||||
|
u[2] = upz;
|
||||||
|
|
||||||
|
vec3f_normalize_sh4(f);
|
||||||
|
vec3f_cross(f, u, s);
|
||||||
|
vec3f_normalize_sh4(s);
|
||||||
|
vec3f_cross(s, f, u);
|
||||||
|
|
||||||
|
m[0] = s[0]; m[4] = s[1]; m[8] = s[2]; m[12] = 0.0f;
|
||||||
|
m[1] = u[0]; m[5] = u[1]; m[9] = u[2]; m[13] = 0.0f;
|
||||||
|
m[2] = -f[0]; m[6] = -f[1]; m[10] = -f[2]; m[14] = 0.0f;
|
||||||
|
m[3] = 0.0f; m[7] = 0.0f; m[11] = 0.0f; m[15] = 1.0f;
|
||||||
|
|
||||||
|
glMultMatrixf(m);
|
||||||
|
glTranslatef(-eyex, -eyey, -eyez);
|
||||||
}
|
}
|
||||||
|
|
||||||
void _glApplyRenderMatrix() {
|
void _glApplyRenderMatrix() {
|
||||||
|
|
247
GL/perfctr.c
Normal file
247
GL/perfctr.c
Normal file
|
@ -0,0 +1,247 @@
|
||||||
|
// ---- perfctr.c - SH7091 Performance Counter Module Code ----
|
||||||
|
//
|
||||||
|
// This file is part of the DreamHAL project, a hardware abstraction library
|
||||||
|
// primarily intended for use on the SH7091 found in hardware such as the SEGA
|
||||||
|
// Dreamcast game console.
|
||||||
|
//
|
||||||
|
// The performance counter module is hereby released into the public domain in
|
||||||
|
// the hope that it may prove useful. Now go profile some code and hit 60 fps! :)
|
||||||
|
//
|
||||||
|
// --Moopthehedgehog
|
||||||
|
|
||||||
|
// See perfctr.h for more of my notes and documentation on these counters.
|
||||||
|
#include "perfctr.h"
|
||||||
|
#include "cygprofile.h"
|
||||||
|
#if CYG_FUNC_TRACE_ENABLED
|
||||||
|
|
||||||
|
static unsigned char pmcr_enabled = 0;
|
||||||
|
|
||||||
|
//
|
||||||
|
// Initialize performance counters. It's just a clear -> enable.
|
||||||
|
// It's good practice to clear a counter before starting it for the first time.
|
||||||
|
//
|
||||||
|
// Also: Disabling and re-enabling the counters doesn't reset them; the clearing
|
||||||
|
// needs to happen while a counter is disabled to reset it.
|
||||||
|
//
|
||||||
|
// You can disable and re-enable with a different mode without explicitly
|
||||||
|
// clearing and have it keep going, continuing from where it left off.
|
||||||
|
//
|
||||||
|
|
||||||
|
__attribute__((no_instrument_function)) void PMCR_Init(int which, unsigned short mode, unsigned char count_type) // Will do nothing if perfcounter is already running!
|
||||||
|
{
|
||||||
|
// Don't do anything if being asked to enable an already-enabled counter
|
||||||
|
if( (which == 1) && ((!pmcr_enabled) || (pmcr_enabled == 2)) )
|
||||||
|
{
|
||||||
|
// counter 1
|
||||||
|
PMCR_Enable(1, mode, count_type, PMCR_RESET_COUNTER);
|
||||||
|
}
|
||||||
|
else if( (which == 2) && ((!pmcr_enabled) || (pmcr_enabled == 1)) )
|
||||||
|
{
|
||||||
|
// counter 2
|
||||||
|
PMCR_Enable(2, mode, count_type, PMCR_RESET_COUNTER);
|
||||||
|
}
|
||||||
|
else if( (which == 3) && (!pmcr_enabled) )
|
||||||
|
{
|
||||||
|
// Both
|
||||||
|
PMCR_Enable(3, mode, count_type, PMCR_RESET_COUNTER);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Enable "undocumented" performance counters (well, they were undocumented at one point. They're documented now!)
|
||||||
|
__attribute__((no_instrument_function)) void PMCR_Enable(int which, unsigned short mode, unsigned char count_type, unsigned char reset_count) // Will do nothing if perfcounter is already running!
|
||||||
|
{
|
||||||
|
// Don't do anything if count_type or reset_count are invalid
|
||||||
|
if((count_type | reset_count) > 1)
|
||||||
|
{
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Build config from parameters
|
||||||
|
unsigned short pmcr_ctrl = PMCR_RUN_COUNTER | (reset_count << PMCR_RESET_COUNTER_SHIFT) | (count_type << PMCR_CLOCK_TYPE_SHIFT) | mode;
|
||||||
|
|
||||||
|
// Don't do anything if being asked to enable an already-enabled counter
|
||||||
|
if( (which == 1) && ((!pmcr_enabled) || (pmcr_enabled == 2)) )
|
||||||
|
{
|
||||||
|
// counter 1
|
||||||
|
*((volatile unsigned short*)PMCR1_CTRL_REG) = pmcr_ctrl;
|
||||||
|
|
||||||
|
pmcr_enabled += 1;
|
||||||
|
}
|
||||||
|
else if( (which == 2) && ((!pmcr_enabled) || (pmcr_enabled == 1)) )
|
||||||
|
{
|
||||||
|
// counter 2
|
||||||
|
*((volatile unsigned short*)PMCR2_CTRL_REG) = pmcr_ctrl;
|
||||||
|
|
||||||
|
pmcr_enabled += 2;
|
||||||
|
}
|
||||||
|
else if( (which == 3) && (!pmcr_enabled) )
|
||||||
|
{
|
||||||
|
// Both
|
||||||
|
*((volatile unsigned short*)PMCR1_CTRL_REG) = pmcr_ctrl;
|
||||||
|
*((volatile unsigned short*)PMCR2_CTRL_REG) = pmcr_ctrl;
|
||||||
|
|
||||||
|
pmcr_enabled = 3;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// For reference:
|
||||||
|
// #define PMCTR1H_REG 0xFF100004
|
||||||
|
// #define PMCTR1L_REG 0xFF100008
|
||||||
|
|
||||||
|
// #define PMCTR2H_REG 0xFF10000C
|
||||||
|
// #define PMCTR2L_REG 0xFF100010
|
||||||
|
|
||||||
|
static const unsigned int pmcr1_regh = PMCTR1H_REG;
|
||||||
|
static const unsigned int pmcr1_regl = PMCTR1L_REG;
|
||||||
|
|
||||||
|
static const unsigned int pmcr2_regh = PMCTR2H_REG;
|
||||||
|
static const unsigned int pmcr2_regl = PMCTR2L_REG;
|
||||||
|
|
||||||
|
// Sorry, can only read one counter at a time!
|
||||||
|
// out_array should be an array consisting of 2x unsigned ints.
|
||||||
|
__attribute__((no_instrument_function)) void PMCR_Read(int which, volatile unsigned int *out_array)
|
||||||
|
{
|
||||||
|
// if pmcr is not enabled, this function will just return 0
|
||||||
|
|
||||||
|
// little endian (big endian would need to flip [0] and [1])
|
||||||
|
|
||||||
|
// Note: These reads really do need to be done in assembly: unfortunately it
|
||||||
|
// appears that using C causes GCC to insert a branch right smack in between
|
||||||
|
// the high and low reads of perf counter 2 (with a nop, so it's literally
|
||||||
|
// delaying the reads by several cycles!), which is totally insane. Doing it
|
||||||
|
// the assembly way ensures that nothing ridiculous like that happens. It's
|
||||||
|
// also portable between versions of GCC that do put the nonsensical branch in.
|
||||||
|
//
|
||||||
|
// One thing that would be nice is if SH4 had the movi20s instruction to make
|
||||||
|
// absolute addresses in 3 cycles, but only the SH2A has that... :(
|
||||||
|
if( (which == 1) && (pmcr_enabled & 0x1) )
|
||||||
|
{
|
||||||
|
// counter 1
|
||||||
|
// out_array[1] = *((volatile unsigned int*)PMCTR1H_REG) & 0xffff;
|
||||||
|
// out_array[0] = *((volatile unsigned int*)PMCTR1L_REG);
|
||||||
|
asm volatile("mov.l %[reg1h],r1\n\t" // load counter address (high)
|
||||||
|
"mov.l %[reg1l],r2\n\t" // load counter address (low)
|
||||||
|
"mov.l @r1,r1\n\t" // read counter (high)
|
||||||
|
"mov.l @r2,r2\n\t" // read counter (low)
|
||||||
|
"extu.w r1,r1\n\t" // zero-extend high, aka high & 0xffff
|
||||||
|
"mov.l r1,%[outh]\n\t" // get data to memory
|
||||||
|
"mov.l r2,%[outl]\n\t" // get data to memory
|
||||||
|
: [outh] "=m" (out_array[1]), [outl] "=m" (out_array[0])
|
||||||
|
: [reg1h] "m" (pmcr1_regh), [reg1l] "m" (pmcr1_regl) // SH4 can't mov an immediate longword into a register...
|
||||||
|
: "r1", "r2"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
else if( (which == 2) && (pmcr_enabled & 0x2) )
|
||||||
|
{
|
||||||
|
// counter 2
|
||||||
|
// out_array[1] = *((volatile unsigned int*)PMCTR2H_REG) & 0xffff;
|
||||||
|
// out_array[0] = *((volatile unsigned int*)PMCTR2L_REG);
|
||||||
|
asm volatile("mov.l %[reg2h],r1\n\t" // load counter address (high)
|
||||||
|
"mov.l %[reg2l],r2\n\t" // load counter address (low)
|
||||||
|
"mov.l @r1,r1\n\t" // read counter (high)
|
||||||
|
"mov.l @r2,r2\n\t" // read counter (low)
|
||||||
|
"extu.w r1,r1\n\t" // zero-extend high, aka high & 0xffff
|
||||||
|
"mov.l r1,%[outh]\n\t" // get data to memory
|
||||||
|
"mov.l r2,%[outl]\n\t" // get data to memory
|
||||||
|
: [outh] "=m" (out_array[1]), [outl] "=m" (out_array[0])
|
||||||
|
: [reg2h] "m" (pmcr2_regh), [reg2l] "m" (pmcr2_regl) // SH4 can't mov an immediate longword into a register...
|
||||||
|
: "r1", "r2"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
else if(!pmcr_enabled)
|
||||||
|
{
|
||||||
|
out_array[1] = 0;
|
||||||
|
out_array[0] = 0;
|
||||||
|
}
|
||||||
|
else // Invalid
|
||||||
|
{
|
||||||
|
out_array[1] = 0xffff;
|
||||||
|
out_array[0] = 0xffffffff;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Reset counter to 0 and start it again
|
||||||
|
// NOTE: It does not appear to be possible to clear a counter while it is running.
|
||||||
|
__attribute__((no_instrument_function)) void PMCR_Restart(int which, unsigned short mode, unsigned char count_type)
|
||||||
|
{
|
||||||
|
if( (which == 1) && (pmcr_enabled & 0x1) )
|
||||||
|
{
|
||||||
|
// counter 1
|
||||||
|
PMCR_Stop(1);
|
||||||
|
PMCR_Enable(1, mode, count_type, PMCR_RESET_COUNTER);
|
||||||
|
}
|
||||||
|
else if( (which == 2) && (pmcr_enabled & 0x2) )
|
||||||
|
{
|
||||||
|
// counter 2
|
||||||
|
PMCR_Stop(2);
|
||||||
|
PMCR_Enable(2, mode, count_type, PMCR_RESET_COUNTER);
|
||||||
|
}
|
||||||
|
else if( (which == 3) && (pmcr_enabled == 3) )
|
||||||
|
{
|
||||||
|
// Both
|
||||||
|
PMCR_Stop(3);
|
||||||
|
PMCR_Enable(3, mode, count_type, PMCR_RESET_COUNTER);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Clearing only works when the counter is disabled. Otherwise, stopping the
|
||||||
|
// counter via setting the 0x2000 bit holds the data in the data registers,
|
||||||
|
// whereas disabling without setting that bit reads back as all 0 (but doesn't
|
||||||
|
// clear the counters for next start). This function just stops a running
|
||||||
|
// counter and does nothing if the counter is already stopped or disabled, as
|
||||||
|
// clearing is handled by PMCR_Enable().
|
||||||
|
__attribute__((no_instrument_function)) void PMCR_Stop(int which)
|
||||||
|
{
|
||||||
|
if( (which == 1) && (pmcr_enabled & 0x1) )
|
||||||
|
{
|
||||||
|
// counter 1
|
||||||
|
*((volatile unsigned short*)PMCR1_CTRL_REG) = PMCR_STOP_COUNTER;
|
||||||
|
|
||||||
|
pmcr_enabled &= 0x2;
|
||||||
|
}
|
||||||
|
else if( (which == 2) && (pmcr_enabled & 0x2) )
|
||||||
|
{
|
||||||
|
// counter 2
|
||||||
|
*((volatile unsigned short*)PMCR2_CTRL_REG) = PMCR_STOP_COUNTER;
|
||||||
|
|
||||||
|
pmcr_enabled &= 0x1;
|
||||||
|
}
|
||||||
|
else if( (which == 3) && (pmcr_enabled == 3) )
|
||||||
|
{
|
||||||
|
// Both
|
||||||
|
*((volatile unsigned short*)PMCR1_CTRL_REG) = PMCR_STOP_COUNTER;
|
||||||
|
*((volatile unsigned short*)PMCR2_CTRL_REG) = PMCR_STOP_COUNTER;
|
||||||
|
|
||||||
|
pmcr_enabled = 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Note that disabling does NOT clear the counter.
|
||||||
|
// It may appear that way because reading a disabled counter returns 0, but re-
|
||||||
|
// enabling without first clearing will simply continue where it left off.
|
||||||
|
__attribute__((no_instrument_function)) void PMCR_Disable(int which)
|
||||||
|
{
|
||||||
|
if(which == 1)
|
||||||
|
{
|
||||||
|
// counter 1
|
||||||
|
*((volatile unsigned short*)PMCR1_CTRL_REG) = PMCR_DISABLE_COUNTER;
|
||||||
|
|
||||||
|
pmcr_enabled &= 0x2;
|
||||||
|
}
|
||||||
|
else if(which == 2)
|
||||||
|
{
|
||||||
|
// counter 2
|
||||||
|
*((volatile unsigned short*)PMCR2_CTRL_REG) = PMCR_DISABLE_COUNTER;
|
||||||
|
|
||||||
|
pmcr_enabled &= 0x1;
|
||||||
|
}
|
||||||
|
else if(which == 3)
|
||||||
|
{
|
||||||
|
// Both
|
||||||
|
*((volatile unsigned short*)PMCR1_CTRL_REG) = PMCR_DISABLE_COUNTER;
|
||||||
|
*((volatile unsigned short*)PMCR2_CTRL_REG) = PMCR_DISABLE_COUNTER;
|
||||||
|
|
||||||
|
pmcr_enabled = 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#endif
|
316
GL/perfctr.h
Normal file
316
GL/perfctr.h
Normal file
|
@ -0,0 +1,316 @@
|
||||||
|
// ---- perfctr.h - SH7091 Performance Counter Module Header ----
|
||||||
|
//
|
||||||
|
// This file is part of the DreamHAL project, a hardware abstraction library
|
||||||
|
// primarily intended for use on the SH7091 found in hardware such as the SEGA
|
||||||
|
// Dreamcast game console.
|
||||||
|
//
|
||||||
|
// The performance counter module is hereby released into the public domain in
|
||||||
|
// the hope that it may prove useful. Now go profile some code and hit 60 fps! :)
|
||||||
|
//
|
||||||
|
// --Moopthehedgehog
|
||||||
|
//
|
||||||
|
|
||||||
|
#ifndef __PERFCTR_H__
|
||||||
|
#define __PERFCTR_H__
|
||||||
|
|
||||||
|
//
|
||||||
|
// -- General SH4 Performance Counter Notes --
|
||||||
|
//
|
||||||
|
// There are 2 performance counters that can measure elapsed time. They are each
|
||||||
|
// 48-bit counters. They are part of the so-called "ASE" subsystem, which you can
|
||||||
|
// read about in chapter 13 of the "SuperH™ (SH) 32-bit RISC series SH-4, ST40
|
||||||
|
// system architecture, volume 1: system":
|
||||||
|
// https://www.st.com/content/ccc/resource/technical/document/user_manual/36/75/05/ac/e8/7e/42/2d/CD00147163.pdf/files/CD00147163.pdf/jcr:content/translations/en.CD00147163.pdf
|
||||||
|
//
|
||||||
|
// They can count cycles, so that's 199.5MHz (not 200MHz!!) a.k.a. roughly 5 ns
|
||||||
|
// increments. At 5 ns increments, a 48-bit cycle counter can run continuously
|
||||||
|
// for 16.33 days. It's actually 16 days, 7 hours, 55 minutes, and 2 seconds,
|
||||||
|
// depending on how close the bus clock is to 99.75MHz. There is also a second
|
||||||
|
// mode that counts cycles according to a ratio between the CPU frequency and
|
||||||
|
// the system bus clock, and it increments the counter by 12 every bus cycle.
|
||||||
|
// This second mode is detailed in the description for PMCR_CLOCK_TYPE in this
|
||||||
|
// file, and it is recommended for use when the CPU frequency is not a runtime
|
||||||
|
// constant.
|
||||||
|
//
|
||||||
|
// Side note: The counters don't have an overflow interrupt or overflow bit.
|
||||||
|
// (I did actually run one to 48-bit overflow in elapsed time mode using the
|
||||||
|
// ratio method to check this. They don't appear to sign-extend the upper 16
|
||||||
|
// bits in elapsed time mode, either.)
|
||||||
|
//
|
||||||
|
// The two counters are functionally identical. I would recommend using the
|
||||||
|
// PMCR_Init() function to start one (or both) up the first time.
|
||||||
|
//
|
||||||
|
// -- Configuration Address Info --
|
||||||
|
//
|
||||||
|
// Addresses for these counters can be easily seen here, in lxdream's source code:
|
||||||
|
// https://github.com/lutris/lxdream/blob/master/src/sh4/sh4mmio.h
|
||||||
|
//
|
||||||
|
// They are also on display in the Linux kernel, but at the time of writing appear
|
||||||
|
// to be set incorrectly (the clock mode at bit 0x100 is never set or cleared,
|
||||||
|
// for example, so they're at the mercy of whatever the hardware defaults are):
|
||||||
|
// http://git.lpclinux.com/cgit/linux-2.6.28.2-lpc313x/plain/arch/sh/oprofile/op_model_sh7750.c
|
||||||
|
// https://github.com/torvalds/linux/blob/master/arch/sh/kernel/cpu/sh4/perf_event.c
|
||||||
|
// ...It also appears as though they may not be handling bus ratio mode correctly,
|
||||||
|
// which appears to be the default mode on the Dreamcast in all my tests.
|
||||||
|
//
|
||||||
|
// You can also find these addresses by ripping a copy of Virtua Fighter 3 that
|
||||||
|
// you own for Dreamcast and looking at the raw byte code (or a raw disassembly)
|
||||||
|
// of its main program binary. It would appear as though they were timing a loop
|
||||||
|
// with the low half of perf counter 1 in elapsed time mode. Definitely seems
|
||||||
|
// like a good thing to do when targeting 60fps! Shenmue Disc 4 also uses the
|
||||||
|
// same configuration, but what's being timed is not as clear.
|
||||||
|
//
|
||||||
|
// Another place you can actually find both control addresses 0xFF00008x and all
|
||||||
|
// data addresses 0xFF10000x is in binaries of ancient, freely available versions
|
||||||
|
// of CodeScape. Literally all you need to do is open an SH7750-related DLL in a
|
||||||
|
// hex editor and do a search to find the control register addresses, and the
|
||||||
|
// data addresses are equally plain to see in any relevant performance profiling
|
||||||
|
// firmware. There's no effort or decryption required to find them whatsoever;
|
||||||
|
// all you need is an old trial version and a hex editor.
|
||||||
|
//
|
||||||
|
// However, something even better than all of that is if you search for "SH4
|
||||||
|
// 0xFF000084" (without quotes) online you'll find an old forum where some logs
|
||||||
|
// were posted of the terminal/command prompt output from some STMicro JTAG tool,
|
||||||
|
// which not only has the address registers but also clearly characterizes their
|
||||||
|
// size as 16-bit:
|
||||||
|
// https://www.multimediaforum.de/threads/36260834-alice-hsn-3800tw-usb-jtag-ft4232h/page2
|
||||||
|
//
|
||||||
|
// -- Event Mode Info --
|
||||||
|
//
|
||||||
|
// Specific information on each counter mode can be found in the document titled
|
||||||
|
// "SuperH™ Family E10A-USB Emulator: Additional Document for User’s Manual:
|
||||||
|
// Supplementary Information on Using the SH7750R Renesas Microcomputer Development Environment System"
|
||||||
|
// which is available on Renesas's website, in the "Documents" section of the
|
||||||
|
// E10A-USB product page:
|
||||||
|
// https://www.renesas.com/us/en/products/software-tools/tools/emulator/e10a-usb.html
|
||||||
|
// At the time of writing (12/2019), the E10A-USB adapter is still available
|
||||||
|
// for purchase, and it is priced around $1200 (USD).
|
||||||
|
//
|
||||||
|
// Appendix C of the "ST40 Micro Toolset Manual" also has these modes documented:
|
||||||
|
// https://www.st.com/content/ccc/resource/technical/document/user_manual/c5/98/11/89/50/68/41/66/CD17379953.pdf/files/CD17379953.pdf/jcr:content/translations/en.CD17379953.pdf
|
||||||
|
//
|
||||||
|
// See here for the hexadecimal values corresponding to each mode (pg. 370):
|
||||||
|
// http://www.macmadigan.com/BusaECU/Renesas%20documents/Hitachi_codescape_CS40_light_userguides.pdf
|
||||||
|
// You can also find the same "Counter Description Table" in user's guide PDFs
|
||||||
|
// bundled in ancient demo versions of CodeScape 3 from 2000 (e.g.
|
||||||
|
// CSDemo_272.exe), which can still be found in the Internet Archive.
|
||||||
|
// http://web.archive.org/web/*/http://codescape.com/dl/CSDemo/*
|
||||||
|
//
|
||||||
|
// See here for a support document on Lauterbach's SH2, SH3, and SH4 debugger,
|
||||||
|
// which contains units for each mode (e.g. which measure time and which just
|
||||||
|
// count): https://www.lauterbach.com/frames.html?home.html (It's in Downloads
|
||||||
|
// -> Trace32 Help System -> it's the file called "SH2, SH3 and SH4 Debugger"
|
||||||
|
// with the filename debugger_sh4.pdf).
|
||||||
|
//
|
||||||
|
|
||||||
|
//
|
||||||
|
// --- Performance Counter Registers ---
|
||||||
|
//
|
||||||
|
|
||||||
|
// These registers are 16 bits only and configure the performance counters
|
||||||
|
#define PMCR1_CTRL_REG 0xFF000084
|
||||||
|
#define PMCR2_CTRL_REG 0xFF000088
|
||||||
|
|
||||||
|
// These registers are 32-bits each and hold the high low parts of each counter
|
||||||
|
#define PMCTR1H_REG 0xFF100004
|
||||||
|
#define PMCTR1L_REG 0xFF100008
|
||||||
|
|
||||||
|
#define PMCTR2H_REG 0xFF10000C
|
||||||
|
#define PMCTR2L_REG 0xFF100010
|
||||||
|
|
||||||
|
//
|
||||||
|
// --- Performance Counter Configuration Flags ---
|
||||||
|
//
|
||||||
|
|
||||||
|
// These bits' functions are currently unknown, but they may simply be reserved.
|
||||||
|
// It's possible that there's a [maybe expired?] patent that details the
|
||||||
|
// configuration registers, though I haven't been able to find one. Places to
|
||||||
|
// check would be Google Patents and the Japanese Patent Office--maybe someone
|
||||||
|
// else can find something?
|
||||||
|
//
|
||||||
|
// Some notes:
|
||||||
|
// Writing 1 to all of these bits reads back as 0, so it looks like they aren't
|
||||||
|
// config bits. It's possible they are write-only like the stop bit, though,
|
||||||
|
// or that they're just reserved-write-0-only. It appears that they are always
|
||||||
|
// written with zeros in software that uses them, so that's confirmed safe to do.
|
||||||
|
//
|
||||||
|
// Also, after running counter 1 to overflow, it appears there's no overflow bit
|
||||||
|
// (maybe the designers thought 48-bits would be so much to count to that they
|
||||||
|
// didn't bother implementing one?). The upper 16-bits of the counter high
|
||||||
|
// register are also not sign-extension bits. They may be a hidden config area,
|
||||||
|
// but probably not because big endian mode would swap the byte order.
|
||||||
|
#define PMCR_UNKNOWN_BIT_0040 0x0040
|
||||||
|
#define PMCR_UNKNOWN_BIT_0080 0x0080
|
||||||
|
#define PMCR_UNKNOWN_BIT_0200 0x0200
|
||||||
|
#define PMCR_UNKNOWN_BIT_0400 0x0400
|
||||||
|
#define PMCR_UNKNOWN_BIT_0800 0x0800
|
||||||
|
#define PMCR_UNKNOWN_BIT_1000 0x1000
|
||||||
|
|
||||||
|
// PMCR_MODE_CLEAR_INVERTED just clears the event mode if it's inverted with
|
||||||
|
// '~', and event modes are listed below.
|
||||||
|
#define PMCR_MODE_CLEAR_INVERTED 0x003f
|
||||||
|
|
||||||
|
// PMCR_CLOCK_TYPE sets the counters to count clock cycles or CPU/bus ratio mode
|
||||||
|
// cycles (where T = C x B / 24 and T is time, C is count, and B is time
|
||||||
|
// of one bus cycle). Note: B = 1/99753008 or so, but it may vary, as mine is
|
||||||
|
// actually 1/99749010-ish; the target frequency is probably meant to be 99.75MHz.
|
||||||
|
//
|
||||||
|
// See the ST40 or Renesas SH7750R documents described in the above "Event Mode
|
||||||
|
// Info" section for more details about that formula.
|
||||||
|
//
|
||||||
|
// Set PMCR_CLOCK_TYPE to 0 for CPU cycle counting, where 1 count = 1 cycle, or
|
||||||
|
// set it to 1 to use the above formula. Renesas documentation recommends using
|
||||||
|
// the ratio version (set the bit to 1) when user programs alter CPU clock
|
||||||
|
// frequencies. This header has some definitions later on to help with this.
|
||||||
|
#define PMCR_CLOCK_TYPE 0x0100
|
||||||
|
#define PMCR_CLOCK_TYPE_SHIFT 8
|
||||||
|
|
||||||
|
// PMCR_STOP_COUNTER is write-only, as it always reads back as 0. It does what
|
||||||
|
// the name suggests: when this bit is written to, the counter stops. However,
|
||||||
|
// if written to while the counter is disabled or stopped, the counter's high
|
||||||
|
// and low registers are reset to 0.
|
||||||
|
//
|
||||||
|
// Using PMCR_STOP_COUNTER to stop the counter has the effect of holding the
|
||||||
|
// data in the data registers while stopped, unlike PMCR_DISABLE_COUNTER, and
|
||||||
|
// this bit needs to be written to again (e.g. on next start) in order to
|
||||||
|
// actually clear the counter data for another run. If not explicitly cleared,
|
||||||
|
// the counter will continue from where it left off before being stopped.
|
||||||
|
#define PMCR_STOP_COUNTER 0x2000
|
||||||
|
#define PMCR_RESET_COUNTER_SHIFT 13
|
||||||
|
|
||||||
|
// Bits 0xC000 both need to be set to 1 for the counters to actually begin
|
||||||
|
// counting. I have seen that the Linux kernel actually separates them out into
|
||||||
|
// two separate labelled bits (PMEN and PMST) for some reason, however they do
|
||||||
|
// not appear to do anything separately. Perhaps this is a two-bit mode where
|
||||||
|
// 1-1 is run, 1-0 and 0-1 are ???, and 0-0 is off.
|
||||||
|
#define PMCR_RUN_COUNTER 0xC000
|
||||||
|
#define PMCR_RUN_SHIFT 14
|
||||||
|
// Interestingly, the output here writes 0x6000 to the counter config registers,
|
||||||
|
// which would be the "PMST" bit and the "RESET" bit:
|
||||||
|
// https://www.multimediaforum.de/threads/36260834-alice-hsn-3800tw-usb-jtag-ft4232h/page2
|
||||||
|
|
||||||
|
// To disable a counter, just write 0 to its config register. This will not
|
||||||
|
// reset the counter to 0, as that requires an explicit clear via setting the
|
||||||
|
// PMCR_STOP_COUNTER bit. What's odd is that a disabled counter's data
|
||||||
|
// registers read back as all 0, but re-enabling it without a clear will
|
||||||
|
// continue from the last value before disabling.
|
||||||
|
#define PMCR_DISABLE_COUNTER 0x0000
|
||||||
|
|
||||||
|
// These definitions merely separate out the two PMCR_RUN_COUNTER bits, and
|
||||||
|
// they are included here for documentation purposes.
|
||||||
|
|
||||||
|
// PMST may mean PMCR START. It's consistently used to enable the counter.
|
||||||
|
// I'm just calling it PMST here for lack of a better name, since this is what
|
||||||
|
// the Linux kernel and lxdream call it. It could also have something to do with
|
||||||
|
// a mode specific to STMicroelectronics.
|
||||||
|
#define PMCR_PMST_BIT 0x4000
|
||||||
|
#define PMCR_PMST_SHIFT 14
|
||||||
|
|
||||||
|
// Likewise PMEN may mean PMCR ENABLE
|
||||||
|
#define PMCR_PMEN_BIT 0x8000
|
||||||
|
#define PMCR_PMEN_SHIFT 15
|
||||||
|
|
||||||
|
//
|
||||||
|
// --- Performance Counter Event Code Definitions ---
|
||||||
|
//
|
||||||
|
// Interestingly enough, it so happens that the SEGA Dreamcast's CPU seems to
|
||||||
|
// contain the same performance counter functionality as SH4 debug adapters for
|
||||||
|
// the SH7750R. Awesome!
|
||||||
|
//
|
||||||
|
|
||||||
|
// MODE DEFINITION VALUE MEASURMENT TYPE & NOTES
|
||||||
|
#define PMCR_INIT_NO_MODE 0x00 // None; Just here to be complete
|
||||||
|
#define PMCR_OPERAND_READ_ACCESS_MODE 0x01 // Quantity; With cache
|
||||||
|
#define PMCR_OPERAND_WRITE_ACCESS_MODE 0x02 // Quantity; With cache
|
||||||
|
#define PMCR_UTLB_MISS_MODE 0x03 // Quantity
|
||||||
|
#define PMCR_OPERAND_CACHE_READ_MISS_MODE 0x04 // Quantity
|
||||||
|
#define PMCR_OPERAND_CACHE_WRITE_MISS_MODE 0x05 // Quantity
|
||||||
|
#define PMCR_INSTRUCTION_FETCH_MODE 0x06 // Quantity; With cache
|
||||||
|
#define PMCR_INSTRUCTION_TLB_MISS_MODE 0x07 // Quantity
|
||||||
|
#define PMCR_INSTRUCTION_CACHE_MISS_MODE 0x08 // Quantity
|
||||||
|
#define PMCR_ALL_OPERAND_ACCESS_MODE 0x09 // Quantity
|
||||||
|
#define PMCR_ALL_INSTRUCTION_FETCH_MODE 0x0a // Quantity
|
||||||
|
#define PMCR_ON_CHIP_RAM_OPERAND_ACCESS_MODE 0x0b // Quantity
|
||||||
|
// No 0x0c
|
||||||
|
#define PMCR_ON_CHIP_IO_ACCESS_MODE 0x0d // Quantity
|
||||||
|
#define PMCR_OPERAND_ACCESS_MODE 0x0e // Quantity; With cache, counts both reads and writes
|
||||||
|
#define PMCR_OPERAND_CACHE_MISS_MODE 0x0f // Quantity
|
||||||
|
#define PMCR_BRANCH_ISSUED_MODE 0x10 // Quantity; Not the same as branch taken!
|
||||||
|
#define PMCR_BRANCH_TAKEN_MODE 0x11 // Quantity
|
||||||
|
#define PMCR_SUBROUTINE_ISSUED_MODE 0x12 // Quantity; Issued a BSR, BSRF, JSR, JSR/N
|
||||||
|
#define PMCR_INSTRUCTION_ISSUED_MODE 0x13 // Quantity
|
||||||
|
#define PMCR_PARALLEL_INSTRUCTION_ISSUED_MODE 0x14 // Quantity
|
||||||
|
#define PMCR_FPU_INSTRUCTION_ISSUED_MODE 0x15 // Quantity
|
||||||
|
#define PMCR_INTERRUPT_COUNTER_MODE 0x16 // Quantity
|
||||||
|
#define PMCR_NMI_COUNTER_MODE 0x17 // Quantity
|
||||||
|
#define PMCR_TRAPA_INSTRUCTION_COUNTER_MODE 0x18 // Quantity
|
||||||
|
#define PMCR_UBC_A_MATCH_MODE 0x19 // Quantity
|
||||||
|
#define PMCR_UBC_B_MATCH_MODE 0x1a // Quantity
|
||||||
|
// No 0x1b-0x20
|
||||||
|
#define PMCR_INSTRUCTION_CACHE_FILL_MODE 0x21 // Cycles
|
||||||
|
#define PMCR_OPERAND_CACHE_FILL_MODE 0x22 // Cycles
|
||||||
|
#define PMCR_ELAPSED_TIME_MODE 0x23 // Cycles; For 200MHz CPU: 5ns per count in 1 cycle = 1 count mode, or around 417.715ps per count (increments by 12) in CPU/bus ratio mode
|
||||||
|
#define PMCR_PIPELINE_FREEZE_BY_ICACHE_MISS_MODE 0x24 // Cycles
|
||||||
|
#define PMCR_PIPELINE_FREEZE_BY_DCACHE_MISS_MODE 0x25 // Cycles
|
||||||
|
// No 0x26
|
||||||
|
#define PMCR_PIPELINE_FREEZE_BY_BRANCH_MODE 0x27 // Cycles
|
||||||
|
#define PMCR_PIPELINE_FREEZE_BY_CPU_REGISTER_MODE 0x28 // Cycles
|
||||||
|
#define PMCR_PIPELINE_FREEZE_BY_FPU_MODE 0x29 // Cycles
|
||||||
|
|
||||||
|
//
|
||||||
|
// --- Performance Counter Support Definitions ---
|
||||||
|
//
|
||||||
|
|
||||||
|
// This definition can be passed as the init/enable/restart functions'
|
||||||
|
// count_type parameter to use the 1 cycle = 1 count mode. This is how the
|
||||||
|
// counter can be made to run for 16.3 days.
|
||||||
|
#define PMCR_COUNT_CPU_CYCLES 0
|
||||||
|
// Likewise this uses the CPU/bus ratio method
|
||||||
|
#define PMCR_COUNT_RATIO_CYCLES 1
|
||||||
|
|
||||||
|
// These definitions are for the enable function and specify whether to reset
|
||||||
|
// a counter to 0 or to continue from where it left off
|
||||||
|
#define PMCR_CONTINUE_COUNTER 0
|
||||||
|
#define PMCR_RESET_COUNTER 1
|
||||||
|
|
||||||
|
//
|
||||||
|
// --- Performance Counter Miscellaneous Definitions ---
|
||||||
|
//
|
||||||
|
// For convenience; assume stock bus clock of 99.75MHz
|
||||||
|
// (Bus clock is the external CPU clock, not the peripheral bus clock)
|
||||||
|
//
|
||||||
|
|
||||||
|
#define PMCR_SH4_CPU_FREQUENCY 199500000
|
||||||
|
#define PMCR_CPU_CYCLES_MAX_SECONDS 1410902
|
||||||
|
#define PMCR_SH4_BUS_FREQUENCY 99750000
|
||||||
|
#define PMCR_SH4_BUS_FREQUENCY_SCALED 2394000000 // 99.75MHz x 24
|
||||||
|
#define PMCR_BUS_RATIO_MAX_SECONDS 117575
|
||||||
|
|
||||||
|
//
|
||||||
|
// --- Performance Counter Functions ---
|
||||||
|
//
|
||||||
|
// See perfctr.c file for more details about each function and some more usage notes.
|
||||||
|
//
|
||||||
|
// Note: PMCR_Init() and PMCR_Enable() will do nothing if the perf counter is already running!
|
||||||
|
//
|
||||||
|
|
||||||
|
// Clear counter and enable
|
||||||
|
void PMCR_Init(int which, unsigned short mode, unsigned char count_type);
|
||||||
|
|
||||||
|
// Enable one or both of these "undocumented" performance counters.
|
||||||
|
void PMCR_Enable(int which, unsigned short mode, unsigned char count_type, unsigned char reset_counter);
|
||||||
|
|
||||||
|
// Disable, clear, and re-enable with new mode (or same mode)
|
||||||
|
void PMCR_Restart(int which, unsigned short mode, unsigned char count_type);
|
||||||
|
|
||||||
|
// Read a counter
|
||||||
|
// out_array is specifically uint32 out_array[2] -- 48-bit value needs a 64-bit storage unit
|
||||||
|
void PMCR_Read(int which, volatile unsigned int *out_array);
|
||||||
|
|
||||||
|
// Stop counter(s) (without clearing)
|
||||||
|
void PMCR_Stop(int which);
|
||||||
|
|
||||||
|
// Disable counter(s) (without clearing)
|
||||||
|
void PMCR_Disable(int which);
|
||||||
|
|
||||||
|
#endif /* __PERFCTR_H__ */
|
|
@ -6,6 +6,7 @@
|
||||||
#include "../include/gl.h"
|
#include "../include/gl.h"
|
||||||
#include "../containers/aligned_vector.h"
|
#include "../containers/aligned_vector.h"
|
||||||
#include "../containers/named_array.h"
|
#include "../containers/named_array.h"
|
||||||
|
#include "cygprofile.h"
|
||||||
|
|
||||||
extern void* memcpy4 (void *dest, const void *src, size_t count);
|
extern void* memcpy4 (void *dest, const void *src, size_t count);
|
||||||
|
|
||||||
|
@ -249,6 +250,11 @@ typedef struct {
|
||||||
GLint size;
|
GLint size;
|
||||||
} AttribPointer;
|
} AttribPointer;
|
||||||
|
|
||||||
|
typedef struct {
|
||||||
|
float xyz[3];
|
||||||
|
float n[3];
|
||||||
|
} EyeSpaceData;
|
||||||
|
|
||||||
GLboolean _glCheckValidEnum(GLint param, GLint* values, const char* func);
|
GLboolean _glCheckValidEnum(GLint param, GLint* values, const char* func);
|
||||||
|
|
||||||
GLuint* _glGetEnabledAttributes();
|
GLuint* _glGetEnabledAttributes();
|
||||||
|
@ -280,7 +286,7 @@ GLuint _glGetMipmapLevelCount(TextureObject* obj);
|
||||||
GLboolean _glIsLightingEnabled();
|
GLboolean _glIsLightingEnabled();
|
||||||
GLboolean _glIsLightEnabled(GLubyte light);
|
GLboolean _glIsLightEnabled(GLubyte light);
|
||||||
GLboolean _glIsColorMaterialEnabled();
|
GLboolean _glIsColorMaterialEnabled();
|
||||||
void _glCalculateLightingContribution(const GLint light, const GLfloat* pos, const GLfloat* normal, uint8_t* bgra, GLfloat* colour);
|
void _glPerformLighting(Vertex* vertices, const EyeSpaceData* es, const int32_t count);
|
||||||
|
|
||||||
unsigned char _glIsClippingEnabled();
|
unsigned char _glIsClippingEnabled();
|
||||||
void _glEnableClipping(unsigned char v);
|
void _glEnableClipping(unsigned char v);
|
||||||
|
|
|
@ -6,6 +6,8 @@
|
||||||
#include "profiler.h"
|
#include "profiler.h"
|
||||||
#include "../containers/aligned_vector.h"
|
#include "../containers/aligned_vector.h"
|
||||||
|
|
||||||
|
#if PROFILING_COMPILED
|
||||||
|
|
||||||
#define MAX_PATH 256
|
#define MAX_PATH 256
|
||||||
|
|
||||||
typedef struct {
|
typedef struct {
|
||||||
|
@ -141,3 +143,4 @@ void profiler_print_stats() {
|
||||||
fprintf(stderr, "%-60s%-20f%-20f%" PRIu64 "\n", result->name, (double)avg, (double)ms, result->total_calls);
|
fprintf(stderr, "%-60s%-20f%-20f%" PRIu64 "\n", result->name, (double)avg, (double)ms, result->total_calls);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
#endif
|
||||||
|
|
|
@ -7,12 +7,26 @@ typedef struct {
|
||||||
uint64_t start_time_in_us;
|
uint64_t start_time_in_us;
|
||||||
} Profiler;
|
} Profiler;
|
||||||
|
|
||||||
|
#define PROFILING_COMPILED 0
|
||||||
|
|
||||||
|
#if PROFILING_COMPILED
|
||||||
Profiler* profiler_push(const char* name);
|
Profiler* profiler_push(const char* name);
|
||||||
void profiler_checkpoint(const char* name);
|
void _profiler_checkpoint(const char* name);
|
||||||
void profiler_pop();
|
void _profiler_pop();
|
||||||
|
|
||||||
void profiler_print_stats();
|
void _profiler_print_stats();
|
||||||
|
|
||||||
void profiler_enable();
|
void _profiler_enable();
|
||||||
void profiler_disable();
|
void _profiler_disable();
|
||||||
|
|
||||||
|
#else
|
||||||
|
#define profiler_push(name);
|
||||||
|
#define profiler_checkpoint(name);
|
||||||
|
#define profiler_pop();
|
||||||
|
|
||||||
|
#define profiler_print_stats();
|
||||||
|
|
||||||
|
#define profiler_enable();
|
||||||
|
#define profiler_disable();
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
1448
GL/sh4_math.h
Normal file
1448
GL/sh4_math.h
Normal file
File diff suppressed because it is too large
Load Diff
20
GL/texture.c
20
GL/texture.c
|
@ -743,11 +743,11 @@ GLint _cleanInternalFormat(GLint internalFormat) {
|
||||||
|
|
||||||
typedef void (*TextureConversionFunc)(const GLubyte*, GLubyte*);
|
typedef void (*TextureConversionFunc)(const GLubyte*, GLubyte*);
|
||||||
|
|
||||||
static inline void _rgba8888_to_argb4444(const GLubyte* source, GLubyte* dest) {
|
static INLINE_DEBUG void _rgba8888_to_argb4444(const GLubyte* source, GLubyte* dest) {
|
||||||
*((GLushort*) dest) = (source[3] & 0xF0) << 8 | (source[0] & 0xF0) << 4 | (source[1] & 0xF0) | (source[2] & 0xF0) >> 4;
|
*((GLushort*) dest) = (source[3] & 0xF0) << 8 | (source[0] & 0xF0) << 4 | (source[1] & 0xF0) | (source[2] & 0xF0) >> 4;
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline void _rgba8888_to_rgba8888(const GLubyte* source, GLubyte* dest) {
|
static INLINE_DEBUG void _rgba8888_to_rgba8888(const GLubyte* source, GLubyte* dest) {
|
||||||
/* Noop */
|
/* Noop */
|
||||||
GLubyte* dst = (GLubyte*) dest;
|
GLubyte* dst = (GLubyte*) dest;
|
||||||
dst[0] = source[0];
|
dst[0] = source[0];
|
||||||
|
@ -756,11 +756,11 @@ static inline void _rgba8888_to_rgba8888(const GLubyte* source, GLubyte* dest) {
|
||||||
dst[3] = source[3];
|
dst[3] = source[3];
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline void _rgba8888_to_rgb565(const GLubyte* source, GLubyte* dest) {
|
static INLINE_DEBUG void _rgba8888_to_rgb565(const GLubyte* source, GLubyte* dest) {
|
||||||
*((GLushort*) dest) = ((source[0] & 0b11111000) << 8) | ((source[1] & 0b11111100) << 3) | (source[2] >> 3);
|
*((GLushort*) dest) = ((source[0] & 0b11111000) << 8) | ((source[1] & 0b11111100) << 3) | (source[2] >> 3);
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline void _rgb888_to_rgba8888(const GLubyte* source, GLubyte* dest) {
|
static INLINE_DEBUG void _rgb888_to_rgba8888(const GLubyte* source, GLubyte* dest) {
|
||||||
/* Noop */
|
/* Noop */
|
||||||
GLubyte* dst = (GLubyte*) dest;
|
GLubyte* dst = (GLubyte*) dest;
|
||||||
dst[0] = source[0];
|
dst[0] = source[0];
|
||||||
|
@ -769,24 +769,24 @@ static inline void _rgb888_to_rgba8888(const GLubyte* source, GLubyte* dest) {
|
||||||
dst[3] = 255;
|
dst[3] = 255;
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline void _rgb888_to_rgb565(const GLubyte* source, GLubyte* dest) {
|
static INLINE_DEBUG void _rgb888_to_rgb565(const GLubyte* source, GLubyte* dest) {
|
||||||
*((GLushort*) dest) = ((source[0] & 0b11111000) << 8) | ((source[1] & 0b11111100) << 3) | (source[2] >> 3);
|
*((GLushort*) dest) = ((source[0] & 0b11111000) << 8) | ((source[1] & 0b11111100) << 3) | (source[2] >> 3);
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline void _rgba8888_to_a000(const GLubyte* source, GLubyte* dest) {
|
static INLINE_DEBUG void _rgba8888_to_a000(const GLubyte* source, GLubyte* dest) {
|
||||||
*((GLushort*) dest) = ((source[3] & 0b11111000) << 8);
|
*((GLushort*) dest) = ((source[3] & 0b11111000) << 8);
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline void _r8_to_rgb565(const GLubyte* source, GLubyte* dest) {
|
static INLINE_DEBUG void _r8_to_rgb565(const GLubyte* source, GLubyte* dest) {
|
||||||
*((GLushort*) dest) = (source[0] & 0b11111000) << 8;
|
*((GLushort*) dest) = (source[0] & 0b11111000) << 8;
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline void _rgba4444_to_argb4444(const GLubyte* source, GLubyte* dest) {
|
static INLINE_DEBUG void _rgba4444_to_argb4444(const GLubyte* source, GLubyte* dest) {
|
||||||
GLushort* src = (GLushort*) source;
|
GLushort* src = (GLushort*) source;
|
||||||
*((GLushort*) dest) = ((*src & 0x000F) << 12) | *src >> 4;
|
*((GLushort*) dest) = ((*src & 0x000F) << 12) | *src >> 4;
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline void _rgba4444_to_rgba8888(const GLubyte* source, GLubyte* dest) {
|
static INLINE_DEBUG void _rgba4444_to_rgba8888(const GLubyte* source, GLubyte* dest) {
|
||||||
GLushort src = *((GLushort*) source);
|
GLushort src = *((GLushort*) source);
|
||||||
GLubyte* dst = (GLubyte*) dest;
|
GLubyte* dst = (GLubyte*) dest;
|
||||||
|
|
||||||
|
@ -796,7 +796,7 @@ static inline void _rgba4444_to_rgba8888(const GLubyte* source, GLubyte* dest) {
|
||||||
dst[3] = ((src & 0x000F)) * 2;
|
dst[3] = ((src & 0x000F)) * 2;
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline void _i8_to_i8(const GLubyte* source, GLubyte* dest) {
|
static INLINE_DEBUG void _i8_to_i8(const GLubyte* source, GLubyte* dest) {
|
||||||
/* For indexes */
|
/* For indexes */
|
||||||
GLubyte* dst = (GLubyte*) dest;
|
GLubyte* dst = (GLubyte*) dest;
|
||||||
*dst = *source;
|
*dst = *source;
|
||||||
|
|
|
@ -3,6 +3,8 @@
|
||||||
#include <math.h>
|
#include <math.h>
|
||||||
#include <assert.h>
|
#include <assert.h>
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
|
#include <dc/sq.h>
|
||||||
|
#include <kos/string.h>
|
||||||
|
|
||||||
#if defined(__APPLE__) || defined(__WIN32__)
|
#if defined(__APPLE__) || defined(__WIN32__)
|
||||||
/* Linux + Kos define this, OSX does not, so just use malloc there */
|
/* Linux + Kos define this, OSX does not, so just use malloc there */
|
||||||
|
@ -25,7 +27,7 @@ void aligned_vector_init(AlignedVector* vector, unsigned int element_size) {
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
static inline unsigned int round_to_chunk_size(unsigned int val) {
|
static INLINE_DEBUG unsigned int round_to_chunk_size(unsigned int val) {
|
||||||
const unsigned int n = val;
|
const unsigned int n = val;
|
||||||
const unsigned int m = ALIGNED_VECTOR_CHUNK_SIZE;
|
const unsigned int m = ALIGNED_VECTOR_CHUNK_SIZE;
|
||||||
|
|
||||||
|
@ -107,33 +109,12 @@ void* aligned_vector_resize(AlignedVector* vector, const unsigned int element_co
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void* aligned_vector_at(const AlignedVector* vector, const unsigned int index) {
|
|
||||||
#if 0
|
|
||||||
if(index >= vector->size){
|
|
||||||
char msg[60];
|
|
||||||
sprintf(msg, "Vector OOB: %d %d wanted %d\n", vector->capacity, vector->size, index);
|
|
||||||
//aligned_vector_resize(vector, index);
|
|
||||||
assert_msg(index < vector->size, msg);
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
assert(index < vector->size);
|
|
||||||
return &vector->data[index * vector->element_size];
|
|
||||||
}
|
|
||||||
|
|
||||||
void* aligned_vector_back(AlignedVector* vector) {
|
|
||||||
return aligned_vector_at(vector, vector->size - 1);
|
|
||||||
}
|
|
||||||
|
|
||||||
void* aligned_vector_extend(AlignedVector* vector, const unsigned int additional_count) {
|
void* aligned_vector_extend(AlignedVector* vector, const unsigned int additional_count) {
|
||||||
const unsigned int current = vector->size;
|
const unsigned int current = vector->size;
|
||||||
aligned_vector_resize(vector, vector->size + additional_count);
|
aligned_vector_resize(vector, vector->size + additional_count);
|
||||||
return aligned_vector_at(vector, current);
|
return aligned_vector_at(vector, current);
|
||||||
}
|
}
|
||||||
|
|
||||||
void aligned_vector_clear(AlignedVector* vector) {
|
|
||||||
vector->size = 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
void aligned_vector_shrink_to_fit(AlignedVector* vector) {
|
void aligned_vector_shrink_to_fit(AlignedVector* vector) {
|
||||||
if(vector->size == 0) {
|
if(vector->size == 0) {
|
||||||
free(vector->data);
|
free(vector->data);
|
||||||
|
|
|
@ -5,6 +5,8 @@
|
||||||
extern "C" {
|
extern "C" {
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#include "../GL/cygprofile.h"
|
||||||
|
|
||||||
typedef struct {
|
typedef struct {
|
||||||
unsigned int size;
|
unsigned int size;
|
||||||
unsigned int capacity;
|
unsigned int capacity;
|
||||||
|
@ -18,12 +20,27 @@ void aligned_vector_init(AlignedVector* vector, unsigned int element_size);
|
||||||
void aligned_vector_reserve(AlignedVector* vector, unsigned int element_count);
|
void aligned_vector_reserve(AlignedVector* vector, unsigned int element_count);
|
||||||
void* aligned_vector_push_back(AlignedVector* vector, const void* objs, unsigned int count);
|
void* aligned_vector_push_back(AlignedVector* vector, const void* objs, unsigned int count);
|
||||||
void* aligned_vector_resize(AlignedVector* vector, const unsigned int element_count);
|
void* aligned_vector_resize(AlignedVector* vector, const unsigned int element_count);
|
||||||
void* aligned_vector_at(const AlignedVector* vector, const unsigned int index);
|
INLINE_ALWAYS void* aligned_vector_at(const AlignedVector* vector, const unsigned int index) {
|
||||||
|
#if 0
|
||||||
|
if(index >= vector->size){
|
||||||
|
char msg[60];
|
||||||
|
sprintf(msg, "Vector OOB: %d %d wanted %d\n", vector->capacity, vector->size, index);
|
||||||
|
//aligned_vector_resize(vector, index);
|
||||||
|
assert_msg(index < vector->size, msg);
|
||||||
|
}
|
||||||
|
assert(index < vector->size); /* Check here */
|
||||||
|
#endif
|
||||||
|
return &vector->data[index * vector->element_size];
|
||||||
|
}
|
||||||
void* aligned_vector_extend(AlignedVector* vector, const unsigned int additional_count);
|
void* aligned_vector_extend(AlignedVector* vector, const unsigned int additional_count);
|
||||||
void aligned_vector_clear(AlignedVector* vector);
|
INLINE_ALWAYS void aligned_vector_clear(AlignedVector* vector){
|
||||||
|
vector->size = 0;
|
||||||
|
}
|
||||||
void aligned_vector_shrink_to_fit(AlignedVector* vector);
|
void aligned_vector_shrink_to_fit(AlignedVector* vector);
|
||||||
void aligned_vector_cleanup(AlignedVector* vector);
|
void aligned_vector_cleanup(AlignedVector* vector);
|
||||||
void* aligned_vector_back(AlignedVector* vector);
|
INLINE_ALWAYS void* aligned_vector_back(AlignedVector* vector){
|
||||||
|
return aligned_vector_at(vector, vector->size - 1);
|
||||||
|
}
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
}
|
}
|
||||||
|
|
|
@ -44,13 +44,6 @@ void named_array_init(NamedArray* array, unsigned int element_size, unsigned int
|
||||||
memset(array->elements, 0, element_size * max_elements);
|
memset(array->elements, 0, element_size * max_elements);
|
||||||
}
|
}
|
||||||
|
|
||||||
char named_array_used(NamedArray* array, unsigned int id) {
|
|
||||||
unsigned int i = id / 8;
|
|
||||||
unsigned int j = id % 8;
|
|
||||||
|
|
||||||
unsigned char v = array->used_markers[i] & (unsigned char) (1 << j);
|
|
||||||
return !!(v);
|
|
||||||
}
|
|
||||||
|
|
||||||
void* named_array_alloc(NamedArray* array, unsigned int* new_id) {
|
void* named_array_alloc(NamedArray* array, unsigned int* new_id) {
|
||||||
unsigned int i = 0, j = 0;
|
unsigned int i = 0, j = 0;
|
||||||
|
|
|
@ -5,6 +5,8 @@
|
||||||
extern "C" {
|
extern "C" {
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#include "../GL/cygprofile.h"
|
||||||
|
|
||||||
typedef struct {
|
typedef struct {
|
||||||
unsigned int element_size;
|
unsigned int element_size;
|
||||||
unsigned int max_element_count;
|
unsigned int max_element_count;
|
||||||
|
@ -14,7 +16,13 @@ typedef struct {
|
||||||
} NamedArray;
|
} NamedArray;
|
||||||
|
|
||||||
void named_array_init(NamedArray* array, unsigned int element_size, unsigned int max_elements);
|
void named_array_init(NamedArray* array, unsigned int element_size, unsigned int max_elements);
|
||||||
char named_array_used(NamedArray* array, unsigned int id);
|
INLINE_ALWAYS char named_array_used(NamedArray* array, unsigned int id) {
|
||||||
|
const unsigned int i = id / 8;
|
||||||
|
const unsigned int j = id % 8;
|
||||||
|
|
||||||
|
unsigned char v = array->used_markers[i] & (unsigned char) (1 << j);
|
||||||
|
return !!(v);
|
||||||
|
}
|
||||||
|
|
||||||
void* named_array_alloc(NamedArray* array, unsigned int* new_id);
|
void* named_array_alloc(NamedArray* array, unsigned int* new_id);
|
||||||
void* named_array_reserve(NamedArray* array, unsigned int id);
|
void* named_array_reserve(NamedArray* array, unsigned int id);
|
||||||
|
|
Loading…
Reference in New Issue
Block a user