Merge branch 'dreamroq' into 'master'

Dreamroq

See merge request simulant/GLdc!106
This commit is contained in:
lerabot 2023-09-26 18:51:35 +00:00
commit b32504bec4
16 changed files with 2654 additions and 0 deletions

View File

@ -210,6 +210,8 @@ gen_sample(polymark samples/polymark/main.c)
gen_sample(cubes samples/cubes/main.cpp)
gen_sample(zclip_test tests/zclip/main.cpp)
gen_sample(dreamroq samples/dreamroq/main.c samples/dreamroq/dreamroqlib.c samples/dreamroq/libdcmc/snd_stream.c samples/dreamroq/libdcmc/snddrv.c samples/dreamroq/libdcmc/timer.c samples/dreamroq/profiler.c)
if(PLATFORM_DREAMCAST)
gen_sample(trimark samples/trimark/main.c)
gen_sample(quadmark samples/quadmark/main.c samples/profiler.c)

View File

@ -2062,3 +2062,18 @@ GLAPI void APIENTRY glGetTexImage(GLenum tex, GLint lod, GLenum format, GLenum t
_GL_UNUSED(type);
_GL_UNUSED(img);
}
GLAPI void glKosCopyTexture(void* data, GLuint bytes) {
TextureObject* active = TEXTURE_UNITS[ACTIVE_TEXTURE];
FASTCPY(active->data, data, bytes);
/*
//Set PVR DMA register
*((volatile int *)0xA05F6888) = 1;
//Convert read/write area pointer to DMA write only area pointer
void *dmaareaptr = ((uintptr_t)active->data & 0xffffff) | 0x11000000;
sq_cpy(dmaareaptr, data, bytes);
*/
}

View File

@ -113,6 +113,8 @@ GLAPI void APIENTRY glKosShutdown();
* by default textures use shared palette 0.
*/
GLAPI void APIENTRY glKosCopyTexture(void *src, GLuint bytes);
#define GL_SHARED_TEXTURE_PALETTE_0_KOS 0xEEFC
#define GL_SHARED_TEXTURE_PALETTE_1_KOS 0xEEFD

View File

@ -0,0 +1,64 @@
Dreamroq is covered under the same terms as the KallistiOS license which
is copied below:
Most of the code of KallistiOS proper is currently covered under the KOS
License, which are the terms of the *new* BSD license with our names
inserted as the copyright holders and the "advertising clause" removed
entirely. In all files that state that they are part of the KallistiOS
operating system, you can assume that the following text is inserted in
the header:
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the KOS License.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* KOS License (README.KOS) for more details.
*
* You should have received a copy of the KOS License along with this
* program; if not, please visit Cryptic Allusion DCDev at:
*
* http://dcdev.allusion.net/
*
The text of that license follows. In layman's terms, all it really
says is that you have to give credit where credit is due (both in
derived source files and binary compilations; a credit in the
documentation is ok) and there is no warranty.
Dan Potter
All of the documentation and software included in the KallistiOS Releases
is copyrighted (c)2000-2002 by Dan Potter and others (as noted in each file).
Copyright 2000, 2001, 2002
Dan Potter and others (as noted in each file). All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
3. Neither the name of Cryptic Allusion nor the names of its contributors
may be used to endorse or promote products derived from this software
without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
SUCH DAMAGE.

View File

@ -0,0 +1,572 @@
/*
* Dreamroq by Mike Melanson
* Updated by Josh Pearson to add audio support
*
* This is the main playback engine.
*/
/*
Name:Ian micheal
Date: 15/08/23 08:16
Description: kos filesystem api port
*/
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <dc/fmath_base.h>
#include <kos/fs.h> // Include the KOS filesystem header
#include "dreamroqlib.h"
#define RoQ_INFO 0x1001
#define RoQ_QUAD_CODEBOOK 0x1002
#define RoQ_QUAD_VQ 0x1011
#define RoQ_SOUND_MONO 0x1020
#define RoQ_SOUND_STEREO 0x1021
#define RoQ_SIGNATURE 0x1084
#define CHUNK_HEADER_SIZE 8
#define LE_16(buf) (*buf | (*(buf+1) << 8))
#define LE_32(buf) (*buf | (*(buf+1) << 8) | (*(buf+2) << 16) | (*(buf+3) << 24))
#define MAX_BUF_SIZE (64 * 1024)
#define ROQ_CODEBOOK_SIZE 256
struct roq_audio
{
int pcm_samples;
int channels;
int position;
short snd_sqr_arr[260];
unsigned char pcm_sample[MAX_BUF_SIZE];
}roq_audio;
typedef struct
{
int width;
int height;
int mb_width;
int mb_height;
int mb_count;
int current_frame;
unsigned short *frame[2] __attribute__(( aligned(32)));
int stride;
int texture_height;
unsigned short cb2x2[ROQ_CODEBOOK_SIZE][4];
unsigned short cb4x4[ROQ_CODEBOOK_SIZE][16];
} roq_state;
static int roq_unpack_quad_codebook(unsigned char *buf, int size, int arg,
roq_state *state)
{
int y[4];
int yp, u, v;
int r, g, b;
int count2x2;
int count4x4;
int i, j;
unsigned short *v2x2;
unsigned short *v4x4;
count2x2 = (arg >> 8) & 0xFF;
count4x4 = arg & 0xFF;
if (!count2x2)
count2x2 = ROQ_CODEBOOK_SIZE;
/* 0x00 means 256 4x4 vectors iff there is enough space in the chunk
* after accounting for the 2x2 vectors */
if (!count4x4 && count2x2 * 6 < size)
count4x4 = ROQ_CODEBOOK_SIZE;
/* size sanity check */
if ((count2x2 * 6 + count4x4 * 4) != size)
{
return ROQ_BAD_CODEBOOK;
}
/* unpack the 2x2 vectors */
for (i = 0; i < count2x2; i++)
{
/* unpack the YUV components from the bytestream */
for (j = 0; j < 4; j++)
y[j] = *buf++;
u = *buf++;
v = *buf++;
/* convert to RGB565 */
for (j = 0; j < 4; j++)
{
yp = (y[j] - 16) * 1.164;
r = (yp + 1.596 * (v - 128)) / 8;
g = (yp - 0.813 * (v - 128) - 0.391 * (u - 128)) / 4;
b = (yp + 2.018 * (u - 128)) / 8;
if (r < 0) r = 0;
if (r > 31) r = 31;
if (g < 0) g = 0;
if (g > 63) g = 63;
if (b < 0) b = 0;
if (b > 31) b = 31;
state->cb2x2[i][j] = (
(r << 11) |
(g << 5) |
(b << 0) );
}
}
/* unpack the 4x4 vectors */
for (i = 0; i < count4x4; i++)
{
for (j = 0; j < 4; j++)
{
v2x2 = state->cb2x2[*buf++];
v4x4 = state->cb4x4[i] + (j / 2) * 8 + (j % 2) * 2;
v4x4[0] = v2x2[0];
v4x4[1] = v2x2[1];
v4x4[4] = v2x2[2];
v4x4[5] = v2x2[3];
}
}
return ROQ_SUCCESS;
}
#define GET_BYTE(x) \
if (index >= size) { \
status = ROQ_BAD_VQ_STREAM; \
x = 0; \
} else { \
x = buf[index++]; \
}
#define GET_MODE() \
if (!mode_count) { \
GET_BYTE(mode_lo); \
GET_BYTE(mode_hi); \
mode_set = (mode_hi << 8) | mode_lo; \
mode_count = 16; \
} \
mode_count -= 2; \
mode = (mode_set >> mode_count) & 0x03;
static int roq_unpack_vq(unsigned char *buf, int size, unsigned int arg,
roq_state *state)
{
int status = ROQ_SUCCESS;
int mb_x, mb_y;
int block; /* 8x8 blocks */
int subblock; /* 4x4 blocks */
int stride = state->stride;
int i;
/* frame and pixel management */
unsigned short *this_frame;
unsigned short *last_frame;
int line_offset;
int mb_offset;
int block_offset;
int subblock_offset;
unsigned short *this_ptr;
unsigned int *this_ptr32;
unsigned short *last_ptr;
/*unsigned int *last_ptr32;*/
unsigned short *vector16;
unsigned int *vector32;
int stride32 = stride / 2;
/* bytestream management */
int index = 0;
int mode_set = 0;
int mode, mode_lo, mode_hi;
int mode_count = 0;
/* vectors */
int mx, my;
int motion_x, motion_y;
unsigned char data_byte;
mx = (arg >> 8) & 0xFF;
my = arg & 0xFF;
if (state->current_frame == 1)
{
state->current_frame = 0;
this_frame = state->frame[0];
last_frame = state->frame[1];
}
else
{
state->current_frame = 1;
this_frame = state->frame[1];
last_frame = state->frame[0];
}
for (mb_y = 0; mb_y < state->mb_height && status == ROQ_SUCCESS; mb_y++)
{
line_offset = mb_y * 16 * stride;
for (mb_x = 0; mb_x < state->mb_width && status == ROQ_SUCCESS; mb_x++)
{
mb_offset = line_offset + mb_x * 16;
for (block = 0; block < 4 && status == ROQ_SUCCESS; block++)
{
block_offset = mb_offset + (block / 2 * 8 * stride) + (block % 2 * 8);
/* each 8x8 block gets a mode */
GET_MODE();
switch (mode)
{
case 0: /* MOT: skip */
break;
case 1: /* FCC: motion compensation */
/* this needs to be done 16 bits at a time due to
* data alignment issues on the SH-4 */
GET_BYTE(data_byte);
motion_x = 8 - (data_byte >> 4) - mx;
motion_y = 8 - (data_byte & 0xF) - my;
last_ptr = last_frame + block_offset +
(motion_y * stride) + motion_x;
this_ptr = this_frame + block_offset;
for (i = 0; i < 8; i++)
{
*this_ptr++ = *last_ptr++;
*this_ptr++ = *last_ptr++;
*this_ptr++ = *last_ptr++;
*this_ptr++ = *last_ptr++;
*this_ptr++ = *last_ptr++;
*this_ptr++ = *last_ptr++;
*this_ptr++ = *last_ptr++;
*this_ptr++ = *last_ptr++;
last_ptr += stride - 8;
this_ptr += stride - 8;
}
break;
case 2: /* SLD: upsample 4x4 vector */
GET_BYTE(data_byte);
vector16 = state->cb4x4[data_byte];
for (i = 0; i < 4*4; i++)
{
this_ptr = this_frame + block_offset +
(i / 4 * 2 * stride) + (i % 4 * 2);
this_ptr[0] = *vector16;
this_ptr[1] = *vector16;
this_ptr[stride+0] = *vector16;
this_ptr[stride+1] = *vector16;
vector16++;
}
break;
case 3: /* CCC: subdivide into 4 subblocks */
for (subblock = 0; subblock < 4; subblock++)
{
subblock_offset = block_offset + (subblock / 2 * 4 * stride) + (subblock % 2 * 4);
GET_MODE();
switch (mode)
{
case 0: /* MOT: skip */
break;
case 1: /* FCC: motion compensation */
GET_BYTE(data_byte);
motion_x = 8 - (data_byte >> 4) - mx;
motion_y = 8 - (data_byte & 0xF) - my;
last_ptr = last_frame + subblock_offset +
(motion_y * stride) + motion_x;
this_ptr = this_frame + subblock_offset;
for (i = 0; i < 4; i++)
{
*this_ptr++ = *last_ptr++;
*this_ptr++ = *last_ptr++;
*this_ptr++ = *last_ptr++;
*this_ptr++ = *last_ptr++;
last_ptr += stride - 4;
this_ptr += stride - 4;
}
break;;
case 2: /* SLD: use 4x4 vector from codebook */
GET_BYTE(data_byte);
vector32 = (unsigned int*)state->cb4x4[data_byte];
this_ptr32 = (unsigned int*)this_frame;
this_ptr32 += subblock_offset / 2;
for (i = 0; i < 4; i++)
{
*this_ptr32++ = *vector32++;
*this_ptr32++ = *vector32++;
this_ptr32 += stride32 - 2;
}
break;
case 3: /* CCC: subdivide into 4 subblocks */
GET_BYTE(data_byte);
vector16 = state->cb2x2[data_byte];
this_ptr = this_frame + subblock_offset;
this_ptr[0] = vector16[0];
this_ptr[1] = vector16[1];
this_ptr[stride+0] = vector16[2];
this_ptr[stride+1] = vector16[3];
GET_BYTE(data_byte);
vector16 = state->cb2x2[data_byte];
this_ptr[2] = vector16[0];
this_ptr[3] = vector16[1];
this_ptr[stride+2] = vector16[2];
this_ptr[stride+3] = vector16[3];
this_ptr += stride * 2;
GET_BYTE(data_byte);
vector16 = state->cb2x2[data_byte];
this_ptr[0] = vector16[0];
this_ptr[1] = vector16[1];
this_ptr[stride+0] = vector16[2];
this_ptr[stride+1] = vector16[3];
GET_BYTE(data_byte);
vector16 = state->cb2x2[data_byte];
this_ptr[2] = vector16[0];
this_ptr[3] = vector16[1];
this_ptr[stride+2] = vector16[2];
this_ptr[stride+3] = vector16[3];
break;
}
}
break;
}
}
}
}
/* sanity check to see if the stream was fully consumed */
if (status == ROQ_SUCCESS && index < size-2)
{
status = ROQ_BAD_VQ_STREAM;
}
return status;
}
/*
Name: Ian micheal
Copyright:
Author:
Date: 15/08/23 19:24
Description: ported from C normal file system to kos FS file system api because if this
Info from TapamN One issue you might run into is slow file access over ethernet.
Using the C library stdio.h functions (fread, fwrite) can be much slower than using the KOS filesystem calls directly (fs_read, fs_write) when reading/writing large blocks.
With stdio, you get something like tens of KB/sec, while with KOS you can get over 1 MB/sec. Stdio might be faster when preforming many very small operations.
dcload-serial doesn't have this issue.
*/
int dreamroq_play(char *filename, int loop, render_callback render_cb,
audio_callback audio_cb, quit_callback quit_cb)
{
file_t f;
ssize_t file_ret;
int framerate;
int chunk_id;
unsigned int chunk_size;
unsigned int chunk_arg;
roq_state state;
int status;
int initialized = 0;
unsigned char read_buffer[MAX_BUF_SIZE];
int i, snd_left, snd_right;
f = fs_open(filename, O_RDONLY);
if (f < 0)
return ROQ_FILE_OPEN_FAILURE;
file_ret = fs_read(f, read_buffer, CHUNK_HEADER_SIZE);
if (file_ret != CHUNK_HEADER_SIZE)
{
fs_close(f);
printf("\nROQ_FILE_READ_FAILURE\n\n");
return ROQ_FILE_READ_FAILURE;
}
framerate = LE_16(&read_buffer[6]);
printf("RoQ file plays at %d frames/sec\n", framerate);
/* Initialize Audio SQRT Look-Up Table */
for(i = 0; i < 128; i++)
{
roq_audio.snd_sqr_arr[i] = i * i;
roq_audio.snd_sqr_arr[i + 128] = -(i * i);
}
status = ROQ_SUCCESS;
while (1)
{
if (quit_cb && quit_cb())
break;
file_ret = fs_read(f, read_buffer, CHUNK_HEADER_SIZE);
#ifdef FPSGRAPH
printf("r\n");
#endif
if (file_ret < CHUNK_HEADER_SIZE)
{
if (file_ret == 0) // Indicates end of file
break;
else if (loop)
{
fs_seek(f, 8, SEEK_SET);
continue;
}
else
break;
}
chunk_id = LE_16(&read_buffer[0]);
chunk_size = LE_32(&read_buffer[2]);
chunk_arg = LE_16(&read_buffer[6]);
if (chunk_size > MAX_BUF_SIZE)
{
fs_close(f);
return ROQ_CHUNK_TOO_LARGE;
}
file_ret = fs_read(f, read_buffer, chunk_size);
if (file_ret != chunk_size)
{
status = ROQ_FILE_READ_FAILURE;
break;
}
switch(chunk_id)
{
case RoQ_INFO:
if (initialized)
continue;
state.width = LE_16(&read_buffer[0]);
state.height = LE_16(&read_buffer[2]);
/* width and height each need to be divisible by 16 */
if ((state.width & 0xF) || (state.height & 0xF))
{
status = ROQ_INVALID_PIC_SIZE;
break;
}
state.mb_width = state.width / 16;
state.mb_height = state.height / 16;
state.mb_count = state.mb_width * state.mb_height;
if (state.width < 8 || state.width > 1024)
status = ROQ_INVALID_DIMENSION;
else
{
state.stride = 8;
while (state.stride < state.width)
state.stride <<= 1;
}
if (state.height < 8 || state.height > 1024)
status = ROQ_INVALID_DIMENSION;
else
{
state.texture_height = 8;
while (state.texture_height < state.height)
state.texture_height <<= 1;
}
printf(" RoQ_INFO: dimensions = %dx%d, %dx%d; %d mbs, texture = %dx%d\n",
state.width, state.height, state.mb_width, state.mb_height,
state.mb_count, state.stride, state.texture_height);
state.frame[0] = (unsigned short*)malloc(state.texture_height * state.stride * sizeof(unsigned short));
state.frame[1] = (unsigned short*)malloc(state.texture_height * state.stride * sizeof(unsigned short));
state.current_frame = 0;
if (!state.frame[0] || !state.frame[1])
{
free (state.frame[0]);
free (state.frame[1]);
status = ROQ_NO_MEMORY;
break;
}
memset(state.frame[0], 0, state.texture_height * state.stride * sizeof(unsigned short));
memset(state.frame[1], 0, state.texture_height * state.stride * sizeof(unsigned short));
/* set this flag so that this code is not executed again when
* looping */
initialized = 1;
break;
case RoQ_QUAD_CODEBOOK:
status = roq_unpack_quad_codebook(read_buffer, chunk_size,
chunk_arg, &state);
break;
case RoQ_QUAD_VQ:
status = roq_unpack_vq(read_buffer, chunk_size,
chunk_arg, &state);
if (render_cb)
status = render_cb(state.frame[state.current_frame],
state.width, state.height, state.stride, state.texture_height);
break;
case RoQ_SOUND_MONO:
roq_audio.channels = 1;
roq_audio.pcm_samples = chunk_size*2;
snd_left = chunk_arg;
for(i = 0; i < chunk_size; i++)
{
snd_left += roq_audio.snd_sqr_arr[read_buffer[i]];
roq_audio.pcm_sample[i * 2] = snd_left & 0xff;
roq_audio.pcm_sample[i * 2 + 1] = (snd_left & 0xff00) >> 8;
}
if (audio_cb)
status = audio_cb( roq_audio.pcm_sample, roq_audio.pcm_samples,
roq_audio.channels );
break;
case RoQ_SOUND_STEREO:
roq_audio.channels = 2;
roq_audio.pcm_samples = chunk_size*2;
snd_left = (chunk_arg & 0xFF00);
snd_right = (chunk_arg & 0xFF) << 8;
for(i = 0; i < chunk_size; i += 2)
{
snd_left += roq_audio.snd_sqr_arr[read_buffer[i]];
snd_right += roq_audio.snd_sqr_arr[read_buffer[i+1]];
roq_audio.pcm_sample[i * 2] = snd_left & 0xff;
roq_audio.pcm_sample[i * 2 + 1] = (snd_left & 0xff00) >> 8;
roq_audio.pcm_sample[i * 2 + 2] = snd_right & 0xff;
roq_audio.pcm_sample[i * 2 + 3] = (snd_right & 0xff00) >> 8;
}
if (audio_cb)
status = audio_cb( roq_audio.pcm_sample, roq_audio.pcm_samples,
roq_audio.channels );
break;
default:
break;
}
}
free(state.frame[0]);
free(state.frame[1]);
fs_close(f);
return status;
}

View File

@ -0,0 +1,37 @@
/*
* Dreamroq by Mike Melanson
*
* This is the header file to be included in the programs wishing to
* use the Dreamroq playback engine.
*/
#ifndef NEWROQ_H
#define NEWROQ_H
#define ROQ_SUCCESS 0
#define ROQ_FILE_OPEN_FAILURE 1
#define ROQ_FILE_READ_FAILURE 2
#define ROQ_CHUNK_TOO_LARGE 3
#define ROQ_BAD_CODEBOOK 4
#define ROQ_INVALID_PIC_SIZE 5
#define ROQ_NO_MEMORY 6
#define ROQ_BAD_VQ_STREAM 7
#define ROQ_INVALID_DIMENSION 8
#define ROQ_RENDER_PROBLEM 9
#define ROQ_CLIENT_PROBLEM 10
/* The library calls this function when it has a frame ready for display. */
typedef int (*render_callback)(unsigned short *buf, int width, int height,
int stride, int texture_height);
/* The library calls this function when it has pcm samples ready for output. */
typedef int (*audio_callback)(unsigned char *buf, int samples, int channels);
/* The library calls this function to ask whether it should quit playback.
* Return non-zero if it's time to quite. */
typedef int (*quit_callback)();
int dreamroq_play(char *filename, int loop, render_callback render_cb,
audio_callback audio_cb, quit_callback quit_cb);
#endif /* NEWROQ_H */

View File

@ -0,0 +1,138 @@
/* KallistiOS ##version##
aica_cmd_iface.h
(c)2000-2002 Dan Potter
Definitions for the SH-4/AICA interface. This file is meant to be
included from both the ARM and SH-4 sides of the fence.
*/
#ifndef __ARM_AICA_CMD_IFACE_H
#define __ARM_AICA_CMD_IFACE_H
/* $Id: aica_cmd_iface.h,v 1.3 2002/06/13 05:52:35 bardtx Exp $ */
#ifndef __ARCH_TYPES_H
typedef unsigned long uint8;
typedef unsigned long uint32;
#endif
/* Command queue; one of these for passing data from the SH-4 to the
AICA, and another for the other direction. If a command is written
to the queue and it is longer than the amount of space between the
head point and the queue size, the command will wrap around to
the beginning (i.e., queue commands _can_ be split up). */
typedef struct aica_queue {
uint32 head; /* Insertion point offset (in bytes) */
uint32 tail; /* Removal point offset (in bytes) */
uint32 size; /* Queue size (in bytes) */
uint32 valid; /* 1 if the queue structs are valid */
uint32 process_ok; /* 1 if it's ok to process the data */
uint32 data; /* Pointer to queue data buffer */
} aica_queue_t;
/* Command queue struct for commanding the AICA from the SH-4 */
typedef struct aica_cmd {
uint32 size; /* Command data size in dwords */
uint32 cmd; /* Command ID */
uint32 timestamp; /* When to execute the command (0 == now) */
uint32 cmd_id; /* Command ID, for cmd/response pairs, or channel id */
uint32 misc[4]; /* Misc Parameters / Padding */
uint8 cmd_data[0]; /* Command data */
} aica_cmd_t;
/* Maximum command size -- 256 dwords */
#define AICA_CMD_MAX_SIZE 256
/* This is the cmd_data for AICA_CMD_CHAN. Make this 16 dwords long
for two aica bus queues. */
typedef struct aica_channel {
uint32 cmd; /* Command ID */
uint32 base; /* Sample base in RAM */
uint32 type; /* (8/16bit/ADPCM) */
uint32 length; /* Sample length */
uint32 loop; /* Sample looping */
uint32 loopstart; /* Sample loop start */
uint32 loopend; /* Sample loop end */
uint32 freq; /* Frequency */
uint32 vol; /* Volume 0-255 */
uint32 pan; /* Pan 0-255 */
uint32 pos; /* Sample playback pos */
uint32 pad[5]; /* Padding */
} aica_channel_t;
/* Declare an aica_cmd_t big enough to hold an aica_channel_t
using temp name T, aica_cmd_t name CMDR, and aica_channel_t name CHANR */
#define AICA_CMDSTR_CHANNEL(T, CMDR, CHANR) \
uint8 T[sizeof(aica_cmd_t) + sizeof(aica_channel_t)]; \
aica_cmd_t * CMDR = (aica_cmd_t *)T; \
aica_channel_t * CHANR = (aica_channel_t *)(CMDR->cmd_data);
#define AICA_CMDSTR_CHANNEL_SIZE ((sizeof(aica_cmd_t) + sizeof(aica_channel_t))/4)
/* Command values (for aica_cmd_t) */
#define AICA_CMD_NONE 0x00000000 /* No command (dummy packet) */
#define AICA_CMD_PING 0x00000001 /* Check for signs of life */
#define AICA_CMD_CHAN 0x00000002 /* Perform a wavetable action */
#define AICA_CMD_SYNC_CLOCK 0x00000003 /* Reset the millisecond clock */
/* Response values (for aica_cmd_t) */
#define AICA_RESP_NONE 0x00000000
#define AICA_RESP_PONG 0x00000001 /* Response to CMD_PING */
#define AICA_RESP_DBGPRINT 0x00000002 /* Entire payload is a null-terminated string */
/* Command values (for aica_channel_t commands) */
#define AICA_CH_CMD_MASK 0x0000000f
#define AICA_CH_CMD_NONE 0x00000000
#define AICA_CH_CMD_START 0x00000001
#define AICA_CH_CMD_STOP 0x00000002
#define AICA_CH_CMD_UPDATE 0x00000003
/* Start values */
#define AICA_CH_START_MASK 0x00300000
#define AICA_CH_START_DELAY 0x00100000 /* Set params, but delay key-on */
#define AICA_CH_START_SYNC 0x00200000 /* Set key-on for all selected channels */
/* Update values */
#define AICA_CH_UPDATE_MASK 0x000ff000
#define AICA_CH_UPDATE_SET_FREQ 0x00001000 /* frequency */
#define AICA_CH_UPDATE_SET_VOL 0x00002000 /* volume */
#define AICA_CH_UPDATE_SET_PAN 0x00004000 /* panning */
/* Sample types */
#define AICA_SM_8BIT 1
#define AICA_SM_16BIT 0
#define AICA_SM_ADPCM 2
/* This is where our SH-4/AICA comm variables go... */
/* 0x000000 - 0x010000 are reserved for the program */
/* Location of the SH-4 to AICA queue; commands from here will be
periodically processed by the AICA and then removed from the queue. */
#define AICA_MEM_CMD_QUEUE 0x010000 /* 32K */
/* Location of the AICA to SH-4 queue; commands from here will be
periodically processed by the SH-4 and then removed from the queue. */
#define AICA_MEM_RESP_QUEUE 0x018000 /* 32K */
/* This is the channel base, which holds status structs for all the
channels. This is READ-ONLY from the SH-4 side. */
#define AICA_MEM_CHANNELS 0x020000 /* 64 * 16*4 = 4K */
/* The clock value (in milliseconds) */
#define AICA_MEM_CLOCK 0x021000 /* 4 bytes */
/* 0x021004 - 0x030000 are reserved for future expansion */
/* Open ram for sample data */
#define AICA_RAM_START 0x030000
#define AICA_RAM_END 0x200000
/* Quick access to the AICA channels */
#define AICA_CHANNEL(x) (AICA_MEM_CHANNELS + (x) * sizeof(aica_channel_t))
#endif /* __ARM_AICA_CMD_IFACE_H */

View File

@ -0,0 +1,19 @@
/*
**
** This File is a part of Dreamcast Media Center
** (C) Josh "PH3NOM" Pearson 2011
**
*/
/* 'Public' Function Protocols */
#ifndef DCTIMER_H
#define DCTIMER_H
/* Returns hardware time in miliseconds */
int dc_get_time();
/* Regulate the Video Frame Rate */
void frame_delay( float AVI_video_rate, float AVI_delay, int frameCounter );
#endif

View File

@ -0,0 +1,531 @@
/*
**
** Josh 'PH3NOM' Pearson 2011
** Notes: Had to modify the requested samples by soundstream_poll
** for easy integration with libROQ
*/
/* KallistiOS ##version##
snd_stream.c
Copyright (c)2000,2001,2002,2003,2004 Dan Potter
Copyright (c)2002 Florian Schulze
SH-4 support routines for SPU streaming sound driver
*/
/* Missing headers Ian micheal 2020*/
/*
Name: Ian micheal
Copyright:
Author: Ian micheal
Date: 12/08/23 05:17
Description: kos 2.0 up port threading fix and wrappers and all warnings fixed
*/
#include <kos.h>
#include <assert.h>
#include <string.h>
#include <stdlib.h>
#include <stdio.h>
#include <sys/queue.h>
#include <arch/timer.h>
#include <dc/g2bus.h>
#include <dc/spu.h>
#include <dc/sound/sound.h>
#include <dc/sound/stream.h>
#include <dc/sound/sfxmgr.h>
#include "arm/aica_cmd_iface.h"
/*
This module uses a nice circularly queued data stream in SPU RAM, which is
looped by a program running in the SPU itself.
Basically the poll routine checks to see if a certain minimum amount of
data is available to the SPU to be played, and if not, we ask the user
routine for more sound data and load it up. That's about it.
This version is capable of playing back N streams at once, with the limit
being available CPU time and channels.
*/
typedef struct filter {
TAILQ_ENTRY(filter) lent;
snd_stream_filter_t func;
void * data;
} filter_t;
/* Each of these represents an active streaming channel */
typedef struct strchan {
// Which AICA channels are we using?
int ch[2];
// The last write position in the playing buffer
int last_write_pos; // = 0
int curbuffer; // = 0
// The buffer size allocated for this stream.
int buffer_size; // = 0x10000
// Stream data location in AICA RAM
uint32 spu_ram_sch[2];
// "Get data" callback; we'll call this any time we want to get
// another buffer of output data.
snd_stream_callback_t get_data;
// Our list of filter callback functions for this stream
TAILQ_HEAD(filterlist, filter) filters;
// Stereo/mono flag
int stereo;
// Playback frequency
int frequency;
/* Stream queueing is where we get everything ready to go but don't
actually start it playing until the signal (for music sync, etc) */
int queueing;
/* Have we been initialized yet? (and reserved a buffer, etc) */
volatile int initted;
} strchan_t;
// Our stream structs
static strchan_t streams[SND_STREAM_MAX] = { { { 0 } } };
// Separation buffers (for stereo)
int16 * sep_buffer[2] = { NULL, NULL };
/* the address of the sound ram from the SH4 side */
#define SPU_RAM_BASE 0xa0800000
// Check an incoming handle
#define CHECK_HND(x) do { \
assert( (x) >= 0 && (x) < SND_STREAM_MAX ); \
assert( streams[(x)].initted ); \
} while(0)
/* Set "get data" callback */
void snd_stream_set_callback(snd_stream_hnd_t hnd, snd_stream_callback_t cb) {
CHECK_HND(hnd);
streams[hnd].get_data = cb;
}
void snd_stream_filter_add(snd_stream_hnd_t hnd, snd_stream_filter_t filtfunc, void * obj) {
filter_t * f;
CHECK_HND(hnd);
f = malloc(sizeof(filter_t));
f->func = filtfunc;
f->data = obj;
TAILQ_INSERT_TAIL(&streams[hnd].filters, f, lent);
}
void snd_stream_filter_remove(snd_stream_hnd_t hnd, snd_stream_filter_t filtfunc, void * obj) {
filter_t * f;
CHECK_HND(hnd);
TAILQ_FOREACH(f, &streams[hnd].filters, lent) {
if (f->func == filtfunc && f->data == obj) {
TAILQ_REMOVE(&streams[hnd].filters, f, lent);
free(f);
return;
}
}
}
static void process_filters(snd_stream_hnd_t hnd, void **buffer, int *samplecnt) {
filter_t * f;
TAILQ_FOREACH(f, &streams[hnd].filters, lent) {
f->func(hnd, f->data, streams[hnd].frequency, streams[hnd].stereo ? 2 : 1, buffer, samplecnt);
}
}
/* Performs stereo seperation for the two channels; this routine
has been optimized for the SH-4. */
static void sep_data(void *buffer, int len, int stereo) {
register int16 *bufsrc, *bufdst;
register int x, y, cnt;
if (stereo) {
bufsrc = (int16*)buffer;
bufdst = sep_buffer[0];
x = 0; y = 0; cnt = len / 2;
do {
*bufdst = *bufsrc;
bufdst++; bufsrc+=2; cnt--;
} while (cnt > 0);
bufsrc = (int16*)buffer; bufsrc++;
bufdst = sep_buffer[1];
x = 1; y = 0; cnt = len / 2;
do {
*bufdst = *bufsrc;
bufdst++; bufsrc+=2; cnt--;
x+=2; y++;
} while (cnt > 0);
} else {
memcpy(sep_buffer[0], buffer, len);
memcpy(sep_buffer[1], buffer, len);
}
}
/* Prefill buffers -- do this before calling start() */
void snd_stream_prefill(snd_stream_hnd_t hnd) {
void *buf;
int got;
CHECK_HND(hnd);
if (!streams[hnd].get_data) return;
/* Load first buffer */
/* XXX Note: This will not work if the full data size is less than
buffer_size or buffer_size/2. */
if (streams[hnd].stereo)
buf = streams[hnd].get_data(hnd, streams[hnd].buffer_size, &got);
else
buf = streams[hnd].get_data(hnd, streams[hnd].buffer_size/2, &got);
process_filters(hnd, &buf, &got);
sep_data(buf, (streams[hnd].buffer_size/2), streams[hnd].stereo);
spu_memload(
streams[hnd].spu_ram_sch[0] + (streams[hnd].buffer_size/2)*0,
(uint8*)sep_buffer[0], streams[hnd].buffer_size/2);
spu_memload(
streams[hnd].spu_ram_sch[1] + (streams[hnd].buffer_size/2)*0,
(uint8*)sep_buffer[1], streams[hnd].buffer_size/2);
/* Load second buffer */
if (streams[hnd].stereo)
buf = streams[hnd].get_data(hnd, streams[hnd].buffer_size, &got);
else
buf = streams[hnd].get_data(hnd, streams[hnd].buffer_size/2, &got);
process_filters(hnd, &buf, &got);
sep_data(buf, (streams[hnd].buffer_size/2), streams[hnd].stereo);
spu_memload(
streams[hnd].spu_ram_sch[0] + (streams[hnd].buffer_size/2)*1,
(uint8*)sep_buffer[0], streams[hnd].buffer_size/2);
spu_memload(
streams[hnd].spu_ram_sch[1] + (streams[hnd].buffer_size/2)*1,
(uint8*)sep_buffer[1], streams[hnd].buffer_size/2);
/* Start with playing on buffer 0 */
streams[hnd].last_write_pos = 0;
streams[hnd].curbuffer = 0;
}
/* Initialize stream system */
int snd_stream_init() {
/* Create stereo seperation buffers */
if (!sep_buffer[0]) {
sep_buffer[0] = memalign(32, (SND_STREAM_BUFFER_MAX/2));
sep_buffer[1] = memalign(32, (SND_STREAM_BUFFER_MAX/2));
}
/* Finish loading the stream driver */
if (snd_init() < 0) {
dbglog(DBG_ERROR, "snd_stream_init(): snd_init() failed, giving up\n");
return -1;
}
return 0;
}
snd_stream_hnd_t snd_stream_alloc(snd_stream_callback_t cb, int bufsize) {
int i, old;
snd_stream_hnd_t hnd;
// Get an unused handle
hnd = -1;
old = irq_disable();
for (i=0; i<SND_STREAM_MAX; i++) {
if (!streams[i].initted) {
hnd = i;
break;
}
}
if (hnd != -1)
streams[hnd].initted = 1;
irq_restore(old);
if (hnd == -1)
return SND_STREAM_INVALID;
// Default this for now
streams[hnd].buffer_size = bufsize;
/* Start off with queueing disabled */
streams[hnd].queueing = 0;
/* Setup the callback */
snd_stream_set_callback(hnd, cb);
/* Initialize our filter chain list */
TAILQ_INIT(&streams[hnd].filters);
// Allocate stream buffers
streams[hnd].spu_ram_sch[0] = snd_mem_malloc(streams[hnd].buffer_size*2);
streams[hnd].spu_ram_sch[1] = streams[hnd].spu_ram_sch[0] + streams[hnd].buffer_size;
// And channels
streams[hnd].ch[0] = snd_sfx_chn_alloc();
streams[hnd].ch[1] = snd_sfx_chn_alloc();
printf("snd_stream: alloc'd channels %d/%d\n", streams[hnd].ch[0], streams[hnd].ch[1]);
return hnd;
}
int snd_stream_reinit(snd_stream_hnd_t hnd, snd_stream_callback_t cb) {
CHECK_HND(hnd);
/* Start off with queueing disabled */
streams[hnd].queueing = 0;
/* Setup the callback */
snd_stream_set_callback(hnd, cb);
return hnd;
}
void snd_stream_destroy(snd_stream_hnd_t hnd) {
filter_t * c, * n;
CHECK_HND(hnd);
if (!streams[hnd].initted)
return;
snd_sfx_chn_free(streams[hnd].ch[0]);
snd_sfx_chn_free(streams[hnd].ch[1]);
c = TAILQ_FIRST(&streams[hnd].filters);
while (c) {
n = TAILQ_NEXT(c, lent);
free(c);
c = n;
}
TAILQ_INIT(&streams[hnd].filters);
snd_stream_stop(hnd);
snd_mem_free(streams[hnd].spu_ram_sch[0]);
memset(streams+hnd, 0, sizeof(streams[0]));
}
/* Shut everything down and free mem */
void snd_stream_shutdown() {
/* Stop and destroy all active stream */
int i;
for (i=0; i<SND_STREAM_MAX; i++) {
if (streams[i].initted)
snd_stream_destroy(i);
}
/* Free global buffers */
if (sep_buffer[0]) {
free(sep_buffer[0]); sep_buffer[0] = NULL;
free(sep_buffer[1]); sep_buffer[1] = NULL;
}
}
/* Enable / disable stream queueing */
void snd_stream_queue_enable(snd_stream_hnd_t hnd) {
CHECK_HND(hnd);
streams[hnd].queueing = 1;
}
void snd_stream_queue_disable(snd_stream_hnd_t hnd) {
CHECK_HND(hnd);
streams[hnd].queueing = 0;
}
/* Start streaming (or if queueing is enabled, just get ready) */
void snd_stream_start(snd_stream_hnd_t hnd, uint32 freq, int st) {
AICA_CMDSTR_CHANNEL(tmp, cmd, chan);
CHECK_HND(hnd);
if (!streams[hnd].get_data) return;
streams[hnd].stereo = st;
streams[hnd].frequency = freq;
/* Make sure these are sync'd (and/or delayed) */
snd_sh4_to_aica_stop();
/* Prefill buffers */
snd_stream_prefill(hnd);
/* Channel 0 */
cmd->cmd = AICA_CMD_CHAN;
cmd->timestamp = 0;
cmd->size = AICA_CMDSTR_CHANNEL_SIZE;
cmd->cmd_id = streams[hnd].ch[0];
chan->cmd = AICA_CH_CMD_START | AICA_CH_START_DELAY;
chan->base = streams[hnd].spu_ram_sch[0];
chan->type = AICA_SM_16BIT;
chan->length = (streams[hnd].buffer_size/2);
chan->loop = 1;
chan->loopstart = 0;
chan->loopend = (streams[hnd].buffer_size/2);
chan->freq = freq;
chan->vol = 255;
chan->pan = 0;
snd_sh4_to_aica(tmp, cmd->size);
/* Channel 1 */
cmd->cmd_id = streams[hnd].ch[1];
chan->base = streams[hnd].spu_ram_sch[1];
chan->pan = 255;
snd_sh4_to_aica(tmp, cmd->size);
/* Start both channels simultaneously */
cmd->cmd_id = (1 << streams[hnd].ch[0]) |
(1 << streams[hnd].ch[1]);
chan->cmd = AICA_CH_CMD_START | AICA_CH_START_SYNC;
snd_sh4_to_aica(tmp, cmd->size);
/* Process the changes */
if (!streams[hnd].queueing)
snd_sh4_to_aica_start();
}
/* Actually make it go (in queued mode) */
void snd_stream_queue_go(snd_stream_hnd_t hnd) {
CHECK_HND(hnd);
snd_sh4_to_aica_start();
}
/* Stop streaming */
void snd_stream_stop(snd_stream_hnd_t hnd) {
AICA_CMDSTR_CHANNEL(tmp, cmd, chan);
CHECK_HND(hnd);
if (!streams[hnd].get_data) return;
/* Stop stream */
/* Channel 0 */
cmd->cmd = AICA_CMD_CHAN;
cmd->timestamp = 0;
cmd->size = AICA_CMDSTR_CHANNEL_SIZE;
cmd->cmd_id = streams[hnd].ch[0];
chan->cmd = AICA_CH_CMD_STOP;
snd_sh4_to_aica(tmp, cmd->size);
/* Channel 1 */
cmd->cmd_id = streams[hnd].ch[1];
snd_sh4_to_aica(tmp, AICA_CMDSTR_CHANNEL_SIZE);
}
/* The DMA will chain to this to start the second DMA. */
/* static uint32 dmadest, dmacnt;
static void dma_chain(ptr_t data) {
spu_dma_transfer(sep_buffer[1], dmadest, dmacnt, 0, NULL, 0);
} */
/* Poll streamer to load more data if necessary */
int snd_stream_poll(snd_stream_hnd_t hnd) {
uint32 ch0pos, ch1pos;
/* int realbuffer; */ // Remove this line
int current_play_pos;
int needed_samples;
int got_samples;
void *data;
CHECK_HND(hnd);
if (!streams[hnd].get_data) return -1;
/* Get "real" buffer */
ch0pos = g2_read_32(SPU_RAM_BASE + AICA_CHANNEL(streams[hnd].ch[0]) + offsetof(aica_channel_t, pos));
ch1pos = g2_read_32(SPU_RAM_BASE + AICA_CHANNEL(streams[hnd].ch[1]) + offsetof(aica_channel_t, pos));
if (ch0pos >= (streams[hnd].buffer_size/2)) {
dbglog(DBG_ERROR, "snd_stream_poll: chan0(%d).pos = %ld (%08lx)\n", streams[hnd].ch[0], ch0pos, ch0pos);
return -1;
}
current_play_pos = (ch0pos < ch1pos)?(ch0pos):(ch1pos);
/* count just till the end of the buffer, so we don't have to
handle buffer wraps */
if (streams[hnd].last_write_pos <= current_play_pos)
needed_samples = current_play_pos - streams[hnd].last_write_pos;
else
needed_samples = (streams[hnd].buffer_size/2) - streams[hnd].last_write_pos;
/* round it a little bit */
needed_samples &= ~0x7ff;
/* printf("last_write_pos %6i, current_play_pos %6i, needed_samples %6i\n",last_write_pos,current_play_pos,needed_samples); */
//Ian micheal wtf was this set to 4096? was causing a delay
if (needed_samples ==2048) {
if (streams[hnd].stereo) {
data = streams[hnd].get_data(hnd, needed_samples * 4, &got_samples);
process_filters(hnd, &data, &got_samples);
if (got_samples < needed_samples * 4) {
needed_samples = got_samples / 4;
if (needed_samples & 3)
needed_samples = (needed_samples + 4) & ~3;
}
} else {
data = streams[hnd].get_data(hnd, needed_samples * 2, &got_samples);
process_filters(hnd, &data, &got_samples);
if (got_samples < needed_samples * 2) {
needed_samples = got_samples / 2;
if (needed_samples & 1)
needed_samples = (needed_samples + 2) & ~1;
}
}
if (data == NULL) {
/* Fill the "other" buffer with zeros */
spu_memset(streams[hnd].spu_ram_sch[0] + (streams[hnd].last_write_pos * 2), 0, needed_samples * 2);
spu_memset(streams[hnd].spu_ram_sch[1] + (streams[hnd].last_write_pos * 2), 0, needed_samples * 2);
return -3;
}
sep_data(data, needed_samples * 2, streams[hnd].stereo);
spu_memload(streams[hnd].spu_ram_sch[0] + (streams[hnd].last_write_pos * 2), (uint8*)sep_buffer[0], needed_samples * 2);
spu_memload(streams[hnd].spu_ram_sch[1] + (streams[hnd].last_write_pos * 2), (uint8*)sep_buffer[1], needed_samples * 2);
// Second DMA will get started by the chain handler
/* dcache_flush_range(sep_buffer[0], needed_samples*2);
dcache_flush_range(sep_buffer[1], needed_samples*2);
dmadest = spu_ram_sch2 + (last_write_pos * 2);
dmacnt = needed_samples * 2;
spu_dma_transfer(sep_buffer[0], spu_ram_sch1 + (last_write_pos * 2), needed_samples * 2,
0, dma_chain, 0); */
streams[hnd].last_write_pos += needed_samples;
if (streams[hnd].last_write_pos >= (streams[hnd].buffer_size/2))
streams[hnd].last_write_pos -= (streams[hnd].buffer_size/2);
}
return 0;
}
/* Set the volume on the streaming channels */
void snd_stream_volume(snd_stream_hnd_t hnd, int vol) {
AICA_CMDSTR_CHANNEL(tmp, cmd, chan);
CHECK_HND(hnd);
cmd->cmd = AICA_CMD_CHAN;
cmd->timestamp = 0;
cmd->size = AICA_CMDSTR_CHANNEL_SIZE;
cmd->cmd_id = streams[hnd].ch[0];
chan->cmd = AICA_CH_CMD_UPDATE | AICA_CH_UPDATE_SET_VOL;
chan->vol = vol;
snd_sh4_to_aica(tmp, cmd->size);
cmd->cmd_id = streams[hnd].ch[1];
snd_sh4_to_aica(tmp, cmd->size);
}

View File

@ -0,0 +1,162 @@
/*
**
** (C) Josh 'PH3NOM' Pearson 2011
**
*/
/*
** To anyone looking at this code:
**
** This driver runs in its own thread on the sh4.
**
** When the AICA driver requests more samples,
** it will signal sndbuf_status=SNDDRV_STATUS_NEEDBUF
** and assign the number of requested samples to snddrv.pcm_needed.
**
** The decoders need to check sndbuf_status,
** when more samples are requested by the driver ** the decoders will loop
** decoding into pcm_buffer untill pcm_bytes==snddrv.pcm_needed
** at that point the decoder signals sndbuf_status=SNDDRV_STATUS_HAVEBUF
**
*/
/*
Name: Ian micheal
Copyright:
Author: Ian micheal
Date: 12/08/23 05:17
Description: kos 2.0 up port threading fix and wrappers and all warnings fixed
*/
#include <kos/thread.h>
#include <dc/sound/stream.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "snddrv.h"
snd_stream_hnd_t shnd;
kthread_t * snddrv_thd;
static int snddrv_vol = 255;
struct snddrv snddrv;
struct snddrv_song_info snd_sinfo;
/* Increase the Sound Driver volume */
int snddrv_volume_up() {
if( snddrv_vol <= 245 ) {
snddrv_vol += 10;
snd_stream_volume(shnd, snddrv_vol);
}
return snddrv_vol;
}
/* Decrease the Sound Driver volume */
int snddrv_volume_down() {
if( snddrv_vol >= 10 ) {
snddrv_vol -= 10;
snd_stream_volume(shnd, snddrv_vol);
}
return snddrv_vol;
}
/* Exit the Sound Driver */
int snddrv_exit() {
if( snddrv.drv_status != SNDDRV_STATUS_NULL ) {
snddrv.drv_status = SNDDRV_STATUS_DONE;
snddrv.buf_status = SNDDRV_STATUS_BUFEND;
while( snddrv.drv_status != SNDDRV_STATUS_NULL )
thd_pass();
printf("SNDDRV: Exited\n");
}
memset( snddrv.pcm_buffer, 0, 65536+16384);
snddrv.pcm_bytes = 0;
snddrv.pcm_needed = 0;
SNDDRV_FREE_STRUCT();
return snddrv.drv_status;
}
/* Signal how many samples the AICA needs, then wait for the deocder to produce them */
static void *snddrv_callback(snd_stream_hnd_t hnd, int len, int * actual) {
/* Signal the Decoder thread how many more samples are needed */
snddrv.pcm_needed = len;
snddrv.buf_status = SNDDRV_STATUS_NEEDBUF;
/* Wait for the samples to be ready */
while( snddrv.buf_status != SNDDRV_STATUS_HAVEBUF && snddrv.buf_status != SNDDRV_STATUS_BUFEND )
thd_pass();
snddrv.pcm_ptr = snddrv.pcm_buffer;
snddrv.pcm_bytes = 0;
*actual = len;
return snddrv.pcm_ptr;
}
static int snddrv_thread() {
printf("SNDDRV: Rate - %i, Channels - %i\n", snddrv.rate, snddrv.channels);
shnd = snd_stream_alloc(snddrv_callback, SND_STREAM_BUFFER_MAX/4);
snd_stream_start(shnd, snddrv.rate, snddrv.channels-1);
snddrv.drv_status = SNDDRV_STATUS_STREAMING;
while( snddrv.drv_status != SNDDRV_STATUS_DONE && snddrv.drv_status != SNDDRV_STATUS_ERROR ) {
snd_stream_poll(shnd);
thd_sleep(20);
}
snddrv.drv_status = SNDDRV_STATUS_NULL;
snd_stream_destroy(shnd);
snd_stream_shutdown();
printf("SNDDRV: Finished\n");
return snddrv.drv_status;
}
/* Wrapper function for snddrv_thread */
static void *snddrv_thread_wrapper(void *arg)
{
int status = snddrv_thread(); // Get the status value
return (void *)(size_t)status; // Cast the int status to void pointer
}
/* Start the AICA Sound Stream Thread */
int snddrv_start( int rate, int chans ) {
snddrv.rate = rate;
snddrv.channels = chans;
if( snddrv.channels > 2) {
printf("SNDDRV: ERROR - Exceeds maximum channels\n");
return -1;
}
printf("SNDDRV: Creating Driver Thread\n");
snddrv.drv_status = SNDDRV_STATUS_INITIALIZING;
snd_stream_init();
/*libdcmc/snddrv.c:136: warning: passing arg 1 of `thd_create' from incompatible pointer type */ //Ian micheal 2020 warning
/* Use the wrapper function here */
snddrv_thd = thd_create(0, snddrv_thread_wrapper, NULL);
printf("SNDDRV: Creating Driver Thread\n");
return snddrv.drv_status;
}

View File

@ -0,0 +1,111 @@
/*
**
** This File is a part of Dreamcast Media Center
** (C) Josh "PH3NOM" Pearson 2011
**
*/
#ifndef SNDDRV_H
#define SNDDRV_H
/* Keep track of things from the Driver side */
#define SNDDRV_STATUS_NULL 0x00
#define SNDDRV_STATUS_INITIALIZING 0x01
#define SNDDRV_STATUS_READY 0x02
#define SNDDRV_STATUS_STREAMING 0x03
#define SNDDRV_STATUS_DONE 0x04
#define SNDDRV_STATUS_ERROR 0x05
/* Keep track of things from the Decoder side */
#define SNDDEC_STATUS_NULL 0x00
#define SNDDEC_STATUS_INITIALIZING 0x01
#define SNDDEC_STATUS_READY 0x02
#define SNDDEC_STATUS_STREAMING 0x03
#define SNDDEC_STATUS_PAUSING 0x04
#define SNDDEC_STATUS_PAUSED 0x05
#define SNDDEC_STATUS_RESUMING 0x06
#define SNDDEC_STATUS_DONE 0x07
#define SNDDEC_STATUS_ERROR 0x08
/* Keep track of the buffer status from both sides*/
#define SNDDRV_STATUS_NEEDBUF 0x00
#define SNDDRV_STATUS_HAVEBUF 0x01
#define SNDDRV_STATUS_BUFEND 0x02
/* This seems to be a good number for file seeking on compressed audio */
#define SEEK_LEN 16384*48
extern struct snddrv snddrv;
extern struct snddrv_song_info snd_sinfo;
/* SNDDRV (C) AICA Audio Driver */
struct snddrv {
int rate;
int channels;
int pcm_bytes;
int pcm_needed;
volatile int drv_status;
volatile int dec_status;
volatile int buf_status;
unsigned int pcm_buffer[65536+16384];
unsigned int *pcm_ptr;
};
#define SNDDRV_FREE_STRUCT() { \
snddrv.rate = snddrv.channels = snddrv.drv_status = \
snddrv.dec_status = snddrv.buf_status = 0; }
struct snddrv_song_info {
char *artist[128];
char * title[128];
char * track[128];
char * album[128];
char * genre[128];
char *fname;
volatile int fpos;
volatile float spos;
int fsize;
float slen;
};
#define SNDDRV_FREE_SINFO() { \
sq_clr( snd_sinfo.artist, 128 ); \
sq_clr( snd_sinfo.title, 128 ); \
sq_clr( snd_sinfo.track, 128 ); \
sq_clr( snd_sinfo.album, 128 ); \
sq_clr( snd_sinfo.genre, 128 ); \
snd_sinfo.fpos = snd_sinfo.spos = snd_sinfo.fsize = snd_sinfo.slen = 0; }
#define min(a,b) ( (a) < (b) ? (a) : (b) )
#define MAX_CHANNELS 6 /* make this higher to support files with
more channels for LibFAAD */
/* MicroSoft channel definitions */
#define SPEAKER_FRONT_LEFT 0x1
#define SPEAKER_FRONT_RIGHT 0x2
#define SPEAKER_FRONT_CENTER 0x4
#define SPEAKER_LOW_FREQUENCY 0x8
#define SPEAKER_BACK_LEFT 0x10
#define SPEAKER_BACK_RIGHT 0x20
#define SPEAKER_FRONT_LEFT_OF_CENTER 0x40
#define SPEAKER_FRONT_RIGHT_OF_CENTER 0x80
#define SPEAKER_BACK_CENTER 0x100
#define SPEAKER_SIDE_LEFT 0x200
#define SPEAKER_SIDE_RIGHT 0x400
#define SPEAKER_TOP_CENTER 0x800
#define SPEAKER_TOP_FRONT_LEFT 0x1000
#define SPEAKER_TOP_FRONT_CENTER 0x2000
#define SPEAKER_TOP_FRONT_RIGHT 0x4000
#define SPEAKER_TOP_BACK_LEFT 0x8000
#define SPEAKER_TOP_BACK_CENTER 0x10000
#define SPEAKER_TOP_BACK_RIGHT 0x20000
#define SPEAKER_RESERVED 0x80000000
/* SNDDRV Function Protocols */
int snddrv_start( int rate, int chans );
int snddrv_exit();
int snddrv_volume_up();
int snddrv_volume_down();
#endif

View File

@ -0,0 +1,36 @@
/*
**
** This file is a part of Dreamcast Media Center
** (C) Josh PH3NOM Pearson 2011
**
*/
#include <stdio.h>
#include <time.h>
#include <arch/timer.h>
#include <kos/thread.h>
#include "dc_timer.h"
/* Get current hardware timing using arch/timer.h */
int dc_get_time()
{
uint32 s, ms;
uint64 msec;
timer_ms_gettime(&s, &ms);
msec = (((uint64)s) * ((uint64)1000)) + ((uint64)ms);
return (int)msec;
}
/* Regulate the Video Frame Rate */
void frame_delay( float AVI_video_rate, float AVI_delay, int frameCounter )
{
float AVI_real_time = frameCounter / AVI_video_rate;
float CPU_real_time= ( ( (float)dc_get_time()- AVI_delay ) / 1000.0f );
//printf("AVI_real_time: %f, CPU_real_time %f\n", AVI_real_time, CPU_real_time );
while ( CPU_real_time < AVI_real_time ) {
CPU_real_time= ( ( (float)dc_get_time()- AVI_delay ) / 1000.0f );
thd_pass();
}
}

495
samples/dreamroq/main.c Normal file
View File

@ -0,0 +1,495 @@
/*
* Dreamroq by Mike Melanson
* Updated by Josh Pearson to add audio support
*
* This is the sample Dreamcast player app, designed to be run under
* the KallistiOS operating system.
*/
/*
Name: Iaan micheal
Copyright:
Author: Ian micheal
Date: 12/08/23 05:17
Description: kos 2.0 up port threading fix and wrappers and all warnings fixed
Redone threading and main added benchmarking for timing acia and roq decoding audio
redone rendering order and code commented to be much easier to read.
example OUTPUT:> Wait for AICA Driver: 88 ms
OUTPUT:> Wait for RoQ Decoder: 1 ms
OUTPUT:> Copy PCM Samples: 1 ms
OUTPUT:> Inform AICA Driver: 0 ms
OUTPUT:> Wait for AICA Driver: 88 ms
OUTPUT:> Wait for RoQ Decoder: 0 ms
Before
OUTPUT:> Wait for AICA Driver: 168 ms
OUTPUT:> Wait for RoQ Decoder: 0 ms
OUTPUT:> Copy PCM Samples: 1 ms
OUTPUT:> Inform AICA Driver: 0 ms
OUTPUT:> Wait for AICA Driver: 187 ms
OUTPUT:> Wait for RoQ Decoder: 0 ms
OUTPUT:> Copy PCM Samples: 1 ms
OUTPUT:> Inform AICA Driver: 0 ms
OUTPUT:> Wait for AICA Driver: 197 ms
*/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <kos.h>
#include <dc/pvr.h>
#include <dc/maple.h>
#include <dc/maple/controller.h>
#include <kos/mutex.h>
#include <kos/thread.h>
#include <GL/gl.h>
#include <GL/glu.h>
#include <GL/glkos.h>
#include "dreamroqlib.h"
#include "libdcmc/dc_timer.h"
#include "libdcmc/snddrv.h"
#include <dc/sound/sound.h>
#include <stdio.h>
#include "profiler.h"
#ifdef __DREAMCAST__
extern uint8 romdisk[];
KOS_INIT_ROMDISK(romdisk);
#endif
/* Audio Global variables */
#define PCM_BUF_SIZE (1024 * 1024)
static unsigned char *pcm_buf = NULL;
static int pcm_size = 0;
#define AUDIO_THREAD_PRIO 0
kthread_t *audio_thread; // Thread handle for the audio thread
int audio_init = 0; // Flag to indicate audio initialization status
static mutex_t pcm_mut = MUTEX_INITIALIZER;
/* Video Global variables */
static pvr_ptr_t textures[2];
static int current_frame = 0;
static int graphics_initialized = 0;
static float video_delay;
GLfloat vertices[4][5];
GLuint frameTexture[2];
// Define the target frame rate
#define TARGET_FRAME_RATE 30
static void snd_thd()
{
do
{
unsigned int start_time, end_time;
// Measure time taken by waiting for AICA Driver request
start_time = dc_get_time();
while (snddrv.buf_status != SNDDRV_STATUS_NEEDBUF)
thd_pass();
end_time = dc_get_time();
printf("Wait for AICA Driver: %u ms\n", end_time - start_time);
// Measure time taken by waiting for RoQ Decoder
start_time = dc_get_time();
while (pcm_size < snddrv.pcm_needed)
{
if (snddrv.dec_status == SNDDEC_STATUS_DONE)
goto done;
thd_pass();
}
end_time = dc_get_time();
printf("Wait for RoQ Decoder: %u ms\n", end_time - start_time);
// Measure time taken by copying PCM samples
start_time = dc_get_time();
mutex_lock(&pcm_mut);
memcpy(snddrv.pcm_buffer, pcm_buf, snddrv.pcm_needed);
pcm_size -= snddrv.pcm_needed;
memmove(pcm_buf, pcm_buf + snddrv.pcm_needed, pcm_size);
mutex_unlock(&pcm_mut);
end_time = dc_get_time();
printf("Copy PCM Samples: %u ms\n", end_time - start_time);
// Measure time taken by informing AICA Driver
start_time = dc_get_time();
snddrv.buf_status = SNDDRV_STATUS_HAVEBUF;
end_time = dc_get_time();
printf("Inform AICA Driver: %u ms\n", end_time - start_time);
} while (snddrv.dec_status == SNDDEC_STATUS_STREAMING);
done:
snddrv.dec_status = SNDDEC_STATUS_NULL;
}
static int render_cb(unsigned short *buf, int width, int height, int stride,
int texture_height)
{
pvr_poly_cxt_t cxt;
static pvr_poly_hdr_t hdr[2];
static pvr_vertex_t vert[4];
float ratio;
// screen coordinates of upper left and bottom right corners
static int ul_x, ul_y, br_x, br_y;
// Initialize textures, drawing coordinates, and other parameters
if (!graphics_initialized)
{
textures[0] = pvr_mem_malloc(stride * texture_height * 2);
textures[1] = pvr_mem_malloc(stride * texture_height * 2);
if (!textures[0] || !textures[1])
{
return ROQ_RENDER_PROBLEM;
}
// Precompile the poly headers
for (int i = 0; i < 2; i++) {
pvr_poly_cxt_txr(&cxt, PVR_LIST_OP_POLY, PVR_TXRFMT_RGB565 | PVR_TXRFMT_NONTWIDDLED,
stride, texture_height, textures[i], PVR_FILTER_NONE);
pvr_poly_compile(&hdr[i], &cxt);
}
// Calculate drawing coordinates
ratio = 640.0 / width;
ul_x = 0;
br_x = (int)(ratio * stride);
ul_y = (int)((480 - ratio * height) / 2);
br_y = ul_y + (int)(ratio * texture_height);
// Set common vertex properties
for (int i = 0; i < 4; i++) {
vert[i].z = 1.0f;
vert[i].argb = PVR_PACK_COLOR(1.0f, 1.0f, 1.0f, 1.0f);
vert[i].oargb = 0;
vert[i].flags = (i < 3) ? PVR_CMD_VERTEX : PVR_CMD_VERTEX_EOL;
}
// Initialize vertex coordinates and UV coordinates
vert[0].x = ul_x;
vert[0].y = ul_y;
vert[0].u = 0.0;
vert[0].v = 0.0;
vert[1].x = br_x;
vert[1].y = ul_y;
vert[1].u = 1.0;
vert[1].v = 0.0;
vert[2].x = ul_x;
vert[2].y = br_y;
vert[2].u = 0.0;
vert[2].v = 1.0;
vert[3].x = br_x;
vert[3].y = br_y;
vert[3].u = 1.0;
vert[3].v = 1.0;
// Get the current hardware timing
video_delay = (float)dc_get_time();
graphics_initialized = 1;
}
// Send the video frame as a texture over to video RAM
pvr_txr_load(buf, textures[current_frame], stride * texture_height * 2);
// Calculate the elapsed time since the last frame
unsigned int current_time = dc_get_time();
unsigned int elapsed_time = current_time - video_delay;
unsigned int target_frame_time = 1000 / TARGET_FRAME_RATE;
// If the elapsed time is less than the target frame time, introduce a delay
if (elapsed_time < target_frame_time) {
unsigned int delay_time = target_frame_time - elapsed_time;
thd_sleep(delay_time);
}
// Update the hardware timing for the current frame
video_delay = (float)current_time;
pvr_wait_ready();
pvr_scene_begin();
pvr_list_begin(PVR_LIST_OP_POLY);
// Render the frame using precompiled headers and vertices
pvr_prim(&hdr[current_frame], sizeof(pvr_poly_hdr_t));
for (int i = 0; i < 4; i++) {
pvr_prim(&vert[i], sizeof(pvr_vertex_t));
}
pvr_list_finish();
pvr_scene_finish();
// Toggle between frames
current_frame = 1 - current_frame;
return ROQ_SUCCESS;
}
static int renderGLdc_cb(unsigned short *buf, int width, int height, int stride, int texture_height)
{
if(!graphics_initialized) {
glClearColor(0.0f, 0.0f, 0.0f, 1.0f); // This Will Clear The Background Color To Black
glClearDepth(1.0); // Enables Clearing Of The Depth Buffer
glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT);
//glDisable(GL_DEPTH_TEST);
//glEnable(GL_NORMALIZE);
glMatrixMode(GL_PROJECTION);
glLoadIdentity(); // Reset The Projection Matrix
glOrtho(0.0, 640.0, 0.0, 480.0, -1.0, 1.0);
glMatrixMode(GL_MODELVIEW);
glLoadIdentity();
glDisable(GL_LIGHTING);
glEnable(GL_TEXTURE_2D);
glGenTextures(2, frameTexture);
glBindTexture(GL_TEXTURE_2D, frameTexture[0]);
glTexImage2D(GL_TEXTURE_2D, 0, GL_RGB, 512, 512, 0, GL_RGB, GL_UNSIGNED_SHORT_5_6_5, NULL);
glBindTexture(GL_TEXTURE_2D, frameTexture[1]);
glTexImage2D(GL_TEXTURE_2D, 0, GL_RGB, 512, 512, 0, GL_RGB, GL_UNSIGNED_SHORT_5_6_5, NULL);
video_delay = (float)dc_get_time();
float w = 512;
float h = 512;
int v = 0;
vertices[v][0] = 0;
vertices[v][1] = 0;
vertices[v][2] = 0;
vertices[v][3] = 0.0f;
vertices[v][4] = 1.0f;
v++;
vertices[v][0] = 0;
vertices[v][1] = 480;
vertices[v][2] = 0;
vertices[v][3] = 0.0f;
vertices[v][4] = 0.0f;
v++;
vertices[v][0] = 640;
vertices[v][1] = 0;
vertices[v][2] = 0;
vertices[v][3] = 1.0f;
vertices[v][4] = 1.0f;
v++;
vertices[v][0] = 640;
vertices[v][1] = 480;
vertices[v][2] = 0;
vertices[v][3] = 1.0f;
vertices[v][4] = 0.0f;
v++;
GLfloat drawColor[4] = {1.0f, 1.0f, 1.0f, 1.0f};
GLfloat emissionColor[4] = {0.0, 0.0, 0.0, 1.0f};
glMaterialfv(GL_FRONT, GL_AMBIENT_AND_DIFFUSE, drawColor);
glMaterialfv(GL_FRONT, GL_SPECULAR, drawColor);
glMaterialfv(GL_FRONT, GL_EMISSION, emissionColor);
graphics_initialized = 1;
}
/* send the video frame as a texture over to video RAM */
//pvr_txr_load(buf, textures[current_frame], stride * texture_height * 2);
glBindTexture(GL_TEXTURE_2D, frameTexture[current_frame]);
//glTexImage2D(GL_TEXTURE_2D, 0, GL_RGB, 512, 512, 0, GL_RGB, GL_UNSIGNED_SHORT_5_6_5, buf);
glKosCopyTexture(buf, 512 * 512 * 2);
// Calculate the elapsed time since the last frame
unsigned int current_time = dc_get_time();
unsigned int elapsed_time = current_time - video_delay;
unsigned int target_frame_time = 1000 / TARGET_FRAME_RATE;
// If the elapsed time is less than the target frame time, introduce a delay
if (elapsed_time < target_frame_time) {
unsigned int delay_time = target_frame_time - elapsed_time;
thd_sleep(delay_time);
}
// Update the hardware timing for the current frame
video_delay = (float)current_time;
glEnableClientState(GL_VERTEX_ARRAY);
glEnableClientState(GL_TEXTURE_COORD_ARRAY);
glVertexPointer (3, GL_FLOAT, sizeof(vertices[0]), &vertices[0][0]);
glTexCoordPointer (2, GL_FLOAT, sizeof(vertices[0]), &vertices[0][3]);
glDrawArrays(GL_TRIANGLE_STRIP, 0, 4);
glDisableClientState(GL_VERTEX_ARRAY);
glDisableClientState(GL_TEXTURE_COORD_ARRAY);
glKosSwapBuffers();
current_frame = 1 - current_frame;
return ROQ_SUCCESS;
}
static int audio_cb(unsigned char *buf, int size, int channels)
{
// Copy the decoded PCM samples to our local PCM buffer
mutex_lock(&pcm_mut);
memcpy(pcm_buf + pcm_size, buf, size);
pcm_size += size;
mutex_unlock(&pcm_mut);
return ROQ_SUCCESS;
}
// Audio thread function
static void *snd_thd_wrapper(void *arg)
{
printf("Audio Thread: Started\n");
unsigned int start_time = dc_get_time();
// Call the actual audio thread function
snd_thd();
unsigned int end_time = dc_get_time();
unsigned int elapsed_time = end_time - start_time;
printf("Audio Thread: Finished (Time: %u ms)\n", elapsed_time);
return NULL;
}
static int quit_cb()
{
static int frame_count = 0;
static unsigned int last_time = 0;
static unsigned int target_frame_time = 1000 / 30; // 30 FPS
// Calculate time difference since the last frame
unsigned int current_time = dc_get_time();
unsigned int elapsed_time = current_time - last_time;
// Check if the video has ended and the audio decoding status is done
if (snddrv.dec_status == SNDDEC_STATUS_DONE) {
printf("Exiting due to audio decoding status\n");
return 1; // Exit the loop
}
// Check if the "Start" button is pressed
MAPLE_FOREACH_BEGIN(MAPLE_FUNC_CONTROLLER, cont_state_t, st)
if (st->buttons & CONT_START) {
printf("Exiting due to Start button\n");
return 1; // Exit the loop
}
MAPLE_FOREACH_END()
// Delay if necessary to maintain the target frame rate
if (elapsed_time < target_frame_time) {
unsigned int delay_time = target_frame_time - elapsed_time;
thd_sleep(delay_time);
}
// Print FPS information every second
if (elapsed_time >= 1000) {
// double fps = (double)frame_count / (elapsed_time / 1000.0);
// printf("FPS: %.2lf\n", fps);
frame_count = 0;
last_time = current_time;
}
// printf("Continuing loop\n");
fflush(stdout); // Flush the output buffer to ensure immediate display
frame_count++;
return 0; // Continue the loop
}
int main()
{
int status = 0;
profiler_init("/pc/out.gmon");
profiler_start();
glKosInit();
printf("dreamroq_play(C) Multimedia Mike Melanson & Josh PH3NOM Pearson 2011\n");
printf("dreamroq_play(C) Ian micheal Up port to Kos2.0 sound fix and threading\n");
printf("dreamroq_play(C) Ian micheal Kos2.0 free and exit when loop ends 2023\n");
printf("dreamroq_play(C) Ian micheal redo frame limit code and rendering and comment what it does 2023\n");
// Initialize audio resources and create the audio thread
if (!audio_init)
{
pcm_buf = malloc(PCM_BUF_SIZE);
if (pcm_buf == NULL)
{
printf("Failed to allocate PCM buffer\n");
return 1;
}
snddrv_start(22050, 2);
snddrv.dec_status = SNDDEC_STATUS_STREAMING;
printf("Creating Audio Thread\n");
audio_thread = thd_create(AUDIO_THREAD_PRIO, snd_thd_wrapper, NULL);
if (!audio_thread)
{
printf("Failed to create audio thread\n");
free(pcm_buf);
pcm_buf = NULL;
return 1;
}
audio_init = 1;
}
/* To disable a callback, simply replace the function name by 0 */
status = dreamroq_play("/rd/movie.roq", 0, renderGLdc_cb, audio_cb, quit_cb);
//status = dreamroq_play("/cd/romdisk/movie.roq", 0, renderGLdc_cb, audio_cb, quit_cb);
printf("dreamroq_play() status = %d\n", status);
// Terminate and clean up the audio thread
if (audio_init)
{
snddrv.dec_status = SNDDEC_STATUS_DONE;
while (snddrv.dec_status != SNDDEC_STATUS_NULL)
{
thd_sleep(1);
printf("Waiting for audio thread to finish...\n");
}
thd_destroy(audio_thread); // Destroy the audio thread
free(pcm_buf);
pcm_buf = NULL;
pcm_size = 0;
}
if (graphics_initialized)
{
pvr_mem_free(textures[0]);
pvr_mem_free(textures[1]);
printf("Freed PVR memory\n");
}
profiler_stop();
profiler_clean_up();
printf("Exiting main()\n");
return 0;
}

452
samples/dreamroq/profiler.c Normal file
View File

@ -0,0 +1,452 @@
#include <stdbool.h>
#include <assert.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <dirent.h>
#include <kos/thread.h>
#include <dc/fs_dcload.h>
static char OUTPUT_FILENAME[128];
static kthread_t* THREAD;
static volatile bool PROFILER_RUNNING = false;
static volatile bool PROFILER_RECORDING = false;
#define BASE_ADDRESS 0x8c010000
#define BUCKET_SIZE 10000
#define INTERVAL_IN_MS 10
/* Simple hash table of samples. An array of Samples
* but, each sample in that array can be the head of
* a linked list of other samples */
typedef struct Arc {
uint32_t pc;
uint32_t pr; // Caller return address
uint32_t count;
struct Arc* next;
} Arc;
static Arc ARCS[BUCKET_SIZE];
/* Hashing function for two uint32_ts */
#define HASH_PAIR(x, y) ((x * 0x1f1f1f1f) ^ y)
#define BUFFER_SIZE (1024 * 8) // 8K buffer
const static size_t MAX_ARC_COUNT = BUFFER_SIZE / sizeof(Arc);
static size_t ARC_COUNT = 0;
static bool WRITE_TO_STDOUT = false;
static bool write_samples(const char* path);
static bool write_samples_to_stdout();
static void clear_samples();
static Arc* new_arc(uint32_t PC, uint32_t PR) {
Arc* s = (Arc*) malloc(sizeof(Arc));
s->count = 1;
s->pc = PC;
s->pr = PR;
s->next = NULL;
++ARC_COUNT;
return s;
}
static void record_thread(uint32_t PC, uint32_t PR) {
uint32_t bucket = HASH_PAIR(PC, PR) % BUCKET_SIZE;
Arc* s = &ARCS[bucket];
if(s->pc) {
/* Initialized sample in this bucket,
* does it match though? */
while(s->pc != PC || s->pr != PR) {
if(s->next) {
s = s->next;
} else {
s->next = new_arc(PC, PR);
return; // We're done
}
}
s->count++;
} else {
/* Initialize this sample */
s->count = 1;
s->pc = PC;
s->pr = PR;
s->next = NULL;
++ARC_COUNT;
}
}
static int thd_each_cb(kthread_t* thd, void* data) {
(void) data;
/* Only record the main thread (for now) */
if(strcmp(thd->label, "[kernel]") != 0) {
return 0;
}
/* The idea is that if this code right here is running in the profiling
* thread, then all the PCs from the other threads are
* current. Obviouly thought between iterations the
* PC will change so it's not like this is a true snapshot
* in time across threads */
int old = irq_disable();
uint32_t PC = thd->context.pc;
uint32_t PR = thd->context.pr;
irq_restore(old);
record_thread(PC, PR);
return 0;
}
static void record_samples() {
/* Go through all the active threads and increase
* the sample count for the PC for each of them */
size_t initial = ARC_COUNT;
/* Note: This is a function added to kallistios-nitro that's
* not yet available upstream */
thd_each(&thd_each_cb, NULL);
if(ARC_COUNT >= MAX_ARC_COUNT) {
/* TIME TO FLUSH! */
if(!write_samples(OUTPUT_FILENAME)) {
fprintf(stderr, "Error writing samples\n");
}
}
/* We log when the number of PCs recorded hits a certain increment */
if((initial != ARC_COUNT) && ((ARC_COUNT % 1000) == 0)) {
printf("-- %d arcs recorded...\n", ARC_COUNT);
}
}
/* Declared in KOS in fs_dcload.c */
int fs_dcload_detected();
extern int dcload_type;
#define GMON_COOKIE "gmon"
#define GMON_VERSION 1
typedef struct {
char cookie[4]; // 'g','m','o','n'
int32_t version; // 1
char spare[3 * 4]; // Padding
} GmonHeader;
typedef struct {
uint32_t low_pc;
uint32_t high_pc;
uint32_t hist_size;
uint32_t prof_rate;
char dimen[15]; /* phys. dim., usually "seconds" */
char dimen_abbrev; /* usually 's' for "seconds" */
} GmonHistHeader;
typedef struct {
unsigned char tag; // GMON_TAG_TIME_HIST = 0, GMON_TAG_CG_ARC = 1, GMON_TAG_BB_COUNT = 2
size_t ncounts; // Number of address/count pairs in this sequence
} GmonBBHeader;
typedef struct {
uint32_t from_pc; /* address within caller's body */
uint32_t self_pc; /* address within callee's body */
uint32_t count; /* number of arc traversals */
} GmonArc;
static bool init_sample_file(const char* path) {
printf("Detecting dcload... ");
if(!fs_dcload_detected() && dcload_type != DCLOAD_TYPE_NONE) {
printf("[Not Found]\n");
WRITE_TO_STDOUT = true;
return false;
} else {
printf("[Found]\n");
}
FILE* out = fopen(path, "w");
if(!out) {
WRITE_TO_STDOUT = true;
return false;
}
/* Write the GMON header */
GmonHeader header;
memcpy(&header.cookie[0], GMON_COOKIE, sizeof(header.cookie));
header.version = 1;
memset(header.spare, '\0', sizeof(header.spare));
fwrite(&header, sizeof(header), 1, out);
fclose(out);
return true;
}
#define ROUNDDOWN(x,y) (((x)/(y))*(y))
#define ROUNDUP(x,y) ((((x)+(y)-1)/(y))*(y))
static bool write_samples(const char* path) {
/* Appends the samples to the output file in gmon format
*
* We iterate the data twice, first generating arcs, then generating
* basic block counts. While we do that though we calculate the data
* for the histogram so we don't need a third iteration */
if(WRITE_TO_STDOUT) {
write_samples_to_stdout();
return true;
}
extern char _etext;
const uint32_t HISTFRACTION = 8;
/* We know the lowest address, it's the same for all DC games */
uint32_t lowest_address = ROUNDDOWN(BASE_ADDRESS, HISTFRACTION);
/* We need to calculate the highest address though */
uint32_t highest_address = ROUNDUP((uint32_t) &_etext, HISTFRACTION);
/* Histogram data */
const int BIN_COUNT = ((highest_address - lowest_address) / HISTFRACTION);
uint16_t* bins = (uint16_t*) malloc(BIN_COUNT * sizeof(uint16_t));
memset(bins, 0, sizeof(uint16_t) * BIN_COUNT);
FILE* out = fopen(path, "r+"); /* Append, as init_sample_file would have created the file */
if(!out) {
fprintf(stderr, "-- Error writing samples to output file\n");
return false;
}
// Seek to the end of the file
fseek(out, 0, SEEK_END);
printf("-- Writing %d arcs\n", ARC_COUNT);
uint8_t tag = 1;
#ifndef NDEBUG
size_t written = 0;
#endif
/* Write arcs */
Arc* root = ARCS;
for(int i = 0; i < BUCKET_SIZE; ++i) {
if(root->pc) {
GmonArc arc;
arc.from_pc = root->pr;
arc.self_pc = root->pc;
arc.count = root->count;
/* Write the root sample if it has a program counter */
fwrite(&tag, sizeof(tag), 1, out);
fwrite(&arc, sizeof(GmonArc), 1, out);
#ifndef NDEBUG
++written;
#endif
/* If there's a next pointer, traverse the list */
Arc* s = root->next;
while(s) {
arc.from_pc = s->pr;
arc.self_pc = s->pc;
arc.count = s->count;
/* Write the root sample if it has a program counter */
fwrite(&tag, sizeof(tag), 1, out);
fwrite(&arc, sizeof(GmonArc), 1, out);
#ifndef NDEBUG
++written;
#endif
s = s->next;
}
}
root++;
}
uint32_t histogram_range = highest_address - lowest_address;
uint32_t bin_size = histogram_range / BIN_COUNT;
root = ARCS;
for(int i = 0; i < BUCKET_SIZE; ++i) {
if(root->pc) {
printf("Incrementing %d for %x. ", (root->pc - lowest_address) / bin_size, (unsigned int) root->pc);
bins[(root->pc - lowest_address) / bin_size]++;
printf("Now: %d\n", (int) bins[(root->pc - lowest_address) / bin_size]);
/* If there's a next pointer, traverse the list */
Arc* s = root->next;
while(s) {
assert(s->pc);
bins[(s->pc - lowest_address) / bin_size]++;
s = s->next;
}
}
root++;
}
/* Write histogram now that we have all the information we need */
GmonHistHeader hist_header;
hist_header.low_pc = lowest_address;
hist_header.high_pc = highest_address;
hist_header.hist_size = BIN_COUNT;
hist_header.prof_rate = INTERVAL_IN_MS;
strcpy(hist_header.dimen, "seconds");
hist_header.dimen_abbrev = 's';
unsigned char hist_tag = 0;
fwrite(&hist_tag, sizeof(hist_tag), 1, out);
fwrite(&hist_header, sizeof(hist_header), 1, out);
fwrite(bins, sizeof(uint16_t), BIN_COUNT, out);
fclose(out);
free(bins);
/* We should have written all the recorded samples */
assert(written == ARC_COUNT);
clear_samples();
return true;
}
static bool write_samples_to_stdout() {
/* Write samples to stdout as a CSV file
* for processing */
printf("--------------\n");
printf("\"PC\", \"PR\", \"COUNT\"\n");
Arc* root = ARCS;
for(int i = 0; i < BUCKET_SIZE; ++i) {
Arc* s = root;
while(s->next) {
printf("\"%x\", \"%x\", \"%d\"\n", (unsigned int) s->pc, (unsigned int) s->pr, (unsigned int) s->count);
s = s->next;
}
root++;
}
printf("--------------\n");
return true;
}
static void* run(void* args) {
printf("-- Entered profiler thread!\n");
while(PROFILER_RUNNING){
if(PROFILER_RECORDING) {
record_samples();
usleep(INTERVAL_IN_MS * 1000); //usleep takes milliseconds
}
}
printf("-- Profiler thread finished!\n");
return NULL;
}
void profiler_init(const char* output) {
/* Store the filename */
strncpy(OUTPUT_FILENAME, output, sizeof(OUTPUT_FILENAME));
/* Initialize the file */
printf("Creating samples file...\n");
if(!init_sample_file(OUTPUT_FILENAME)) {
printf("Read-only filesytem. Writing samples to stdout\n");
}
printf("Creating profiler thread...\n");
// Initialize the samples to zero
memset(ARCS, 0, sizeof(ARCS));
PROFILER_RUNNING = true;
THREAD = thd_create(0, run, NULL);
/* Lower priority is... er, higher */
thd_set_prio(THREAD, PRIO_DEFAULT / 2);
printf("Thread started.\n");
}
void profiler_start() {
assert(PROFILER_RUNNING);
if(PROFILER_RECORDING) {
return;
}
PROFILER_RECORDING = true;
printf("Starting profiling...\n");
}
static void clear_samples() {
/* Free the samples we've collected to start again */
Arc* root = ARCS;
for(int i = 0; i < BUCKET_SIZE; ++i) {
Arc* s = root;
Arc* next = s->next;
// While we have a next pointer
while(next) {
s = next; // Point S at it
next = s->next; // Store the new next pointer
free(s); // Free S
}
// We've wiped the chain so we can now clear the root
// which is statically allocated
root->next = NULL;
root++;
}
// Wipe the lot
memset(ARCS, 0, sizeof(ARCS));
ARC_COUNT = 0;
}
bool profiler_stop() {
if(!PROFILER_RECORDING) {
return false;
}
printf("Stopping profiling...\n");
PROFILER_RECORDING = false;
if(!write_samples(OUTPUT_FILENAME)) {
printf("ERROR WRITING SAMPLES (RO filesystem?)! Outputting to stdout\n");
return false;
}
return true;
}
void profiler_clean_up() {
profiler_stop(); // Make sure everything is stopped
PROFILER_RUNNING = false;
thd_join(THREAD, NULL);
}

View File

@ -0,0 +1,18 @@
#pragma once
/*
* The Dreamcast doesn't have any kind of profiling support from GCC
* so this is a cumbersome sampling profiler that runs in a background thread
*/
#ifdef __cplusplus
extern "C" {
#endif
void profiler_init(const char* output);
void profiler_start();
void profiler_stop();
void profiler_clean_up();
#ifdef __cplusplus
}
#endif

Binary file not shown.