diff --git a/CMakeLists.txt b/CMakeLists.txt index 65b940f..759d09c 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -210,6 +210,8 @@ gen_sample(polymark samples/polymark/main.c) gen_sample(cubes samples/cubes/main.cpp) gen_sample(zclip_test tests/zclip/main.cpp) +gen_sample(dreamroq samples/dreamroq/main.c samples/dreamroq/dreamroqlib.c samples/dreamroq/libdcmc/snd_stream.c samples/dreamroq/libdcmc/snddrv.c samples/dreamroq/libdcmc/timer.c samples/dreamroq/profiler.c) + if(PLATFORM_DREAMCAST) gen_sample(trimark samples/trimark/main.c) gen_sample(quadmark samples/quadmark/main.c samples/profiler.c) diff --git a/GL/texture.c b/GL/texture.c index f54cb2e..7e1e6f8 100644 --- a/GL/texture.c +++ b/GL/texture.c @@ -2062,3 +2062,18 @@ GLAPI void APIENTRY glGetTexImage(GLenum tex, GLint lod, GLenum format, GLenum t _GL_UNUSED(type); _GL_UNUSED(img); } + +GLAPI void glKosCopyTexture(void* data, GLuint bytes) { + TextureObject* active = TEXTURE_UNITS[ACTIVE_TEXTURE]; + FASTCPY(active->data, data, bytes); + + /* + //Set PVR DMA register + *((volatile int *)0xA05F6888) = 1; + + //Convert read/write area pointer to DMA write only area pointer + void *dmaareaptr = ((uintptr_t)active->data & 0xffffff) | 0x11000000; + + sq_cpy(dmaareaptr, data, bytes); + */ +} \ No newline at end of file diff --git a/include/GL/glkos.h b/include/GL/glkos.h index 81827d5..3455ee4 100644 --- a/include/GL/glkos.h +++ b/include/GL/glkos.h @@ -113,6 +113,8 @@ GLAPI void APIENTRY glKosShutdown(); * by default textures use shared palette 0. */ +GLAPI void APIENTRY glKosCopyTexture(void *src, GLuint bytes); + #define GL_SHARED_TEXTURE_PALETTE_0_KOS 0xEEFC #define GL_SHARED_TEXTURE_PALETTE_1_KOS 0xEEFD diff --git a/samples/dreamroq/LICENSE.KOS b/samples/dreamroq/LICENSE.KOS new file mode 100644 index 0000000..464d6a1 --- /dev/null +++ b/samples/dreamroq/LICENSE.KOS @@ -0,0 +1,64 @@ +Dreamroq is covered under the same terms as the KallistiOS license which +is copied below: + + +Most of the code of KallistiOS proper is currently covered under the KOS +License, which are the terms of the *new* BSD license with our names +inserted as the copyright holders and the "advertising clause" removed +entirely. In all files that state that they are part of the KallistiOS +operating system, you can assume that the following text is inserted in +the header: + +* +* This program is free software; you can redistribute it and/or modify +* it under the terms of the KOS License. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* KOS License (README.KOS) for more details. +* +* You should have received a copy of the KOS License along with this +* program; if not, please visit Cryptic Allusion DCDev at: +* +* http://dcdev.allusion.net/ +* + +The text of that license follows. In layman's terms, all it really +says is that you have to give credit where credit is due (both in +derived source files and binary compilations; a credit in the +documentation is ok) and there is no warranty. + + Dan Potter + + +All of the documentation and software included in the KallistiOS Releases +is copyrighted (c)2000-2002 by Dan Potter and others (as noted in each file). + +Copyright 2000, 2001, 2002 + Dan Potter and others (as noted in each file). All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: +1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. +3. Neither the name of Cryptic Allusion nor the names of its contributors + may be used to endorse or promote products derived from this software + without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +SUCH DAMAGE. + diff --git a/samples/dreamroq/dreamroqlib.c b/samples/dreamroq/dreamroqlib.c new file mode 100644 index 0000000..43e4c78 --- /dev/null +++ b/samples/dreamroq/dreamroqlib.c @@ -0,0 +1,572 @@ +/* + * Dreamroq by Mike Melanson + * Updated by Josh Pearson to add audio support + * + * This is the main playback engine. + */ +/* + Name:Ian micheal + Date: 15/08/23 08:16 + Description: kos filesystem api port +*/ + +#include +#include +#include +#include +#include +#include // Include the KOS filesystem header +#include "dreamroqlib.h" + +#define RoQ_INFO 0x1001 +#define RoQ_QUAD_CODEBOOK 0x1002 +#define RoQ_QUAD_VQ 0x1011 +#define RoQ_SOUND_MONO 0x1020 +#define RoQ_SOUND_STEREO 0x1021 +#define RoQ_SIGNATURE 0x1084 + +#define CHUNK_HEADER_SIZE 8 + +#define LE_16(buf) (*buf | (*(buf+1) << 8)) +#define LE_32(buf) (*buf | (*(buf+1) << 8) | (*(buf+2) << 16) | (*(buf+3) << 24)) + +#define MAX_BUF_SIZE (64 * 1024) + + +#define ROQ_CODEBOOK_SIZE 256 + +struct roq_audio +{ + int pcm_samples; + int channels; + int position; + short snd_sqr_arr[260]; + unsigned char pcm_sample[MAX_BUF_SIZE]; +}roq_audio; + +typedef struct +{ + int width; + int height; + int mb_width; + int mb_height; + int mb_count; + + int current_frame; + unsigned short *frame[2] __attribute__(( aligned(32))); + int stride; + int texture_height; + + unsigned short cb2x2[ROQ_CODEBOOK_SIZE][4]; + unsigned short cb4x4[ROQ_CODEBOOK_SIZE][16]; +} roq_state; + + + +static int roq_unpack_quad_codebook(unsigned char *buf, int size, int arg, + roq_state *state) +{ + int y[4]; + int yp, u, v; + int r, g, b; + int count2x2; + int count4x4; + int i, j; + unsigned short *v2x2; + unsigned short *v4x4; + + count2x2 = (arg >> 8) & 0xFF; + count4x4 = arg & 0xFF; + + if (!count2x2) + count2x2 = ROQ_CODEBOOK_SIZE; + /* 0x00 means 256 4x4 vectors iff there is enough space in the chunk + * after accounting for the 2x2 vectors */ + if (!count4x4 && count2x2 * 6 < size) + count4x4 = ROQ_CODEBOOK_SIZE; + + /* size sanity check */ + if ((count2x2 * 6 + count4x4 * 4) != size) + { + return ROQ_BAD_CODEBOOK; + } + + /* unpack the 2x2 vectors */ + for (i = 0; i < count2x2; i++) + { + /* unpack the YUV components from the bytestream */ + for (j = 0; j < 4; j++) + y[j] = *buf++; + u = *buf++; + v = *buf++; + + /* convert to RGB565 */ + for (j = 0; j < 4; j++) + { + yp = (y[j] - 16) * 1.164; + r = (yp + 1.596 * (v - 128)) / 8; + g = (yp - 0.813 * (v - 128) - 0.391 * (u - 128)) / 4; + b = (yp + 2.018 * (u - 128)) / 8; + + if (r < 0) r = 0; + if (r > 31) r = 31; + if (g < 0) g = 0; + if (g > 63) g = 63; + if (b < 0) b = 0; + if (b > 31) b = 31; + + state->cb2x2[i][j] = ( + (r << 11) | + (g << 5) | + (b << 0) ); + } + } + + /* unpack the 4x4 vectors */ + for (i = 0; i < count4x4; i++) + { + for (j = 0; j < 4; j++) + { + v2x2 = state->cb2x2[*buf++]; + v4x4 = state->cb4x4[i] + (j / 2) * 8 + (j % 2) * 2; + v4x4[0] = v2x2[0]; + v4x4[1] = v2x2[1]; + v4x4[4] = v2x2[2]; + v4x4[5] = v2x2[3]; + } + } + + return ROQ_SUCCESS; +} + +#define GET_BYTE(x) \ + if (index >= size) { \ + status = ROQ_BAD_VQ_STREAM; \ + x = 0; \ + } else { \ + x = buf[index++]; \ + } + +#define GET_MODE() \ + if (!mode_count) { \ + GET_BYTE(mode_lo); \ + GET_BYTE(mode_hi); \ + mode_set = (mode_hi << 8) | mode_lo; \ + mode_count = 16; \ + } \ + mode_count -= 2; \ + mode = (mode_set >> mode_count) & 0x03; + +static int roq_unpack_vq(unsigned char *buf, int size, unsigned int arg, + roq_state *state) +{ + int status = ROQ_SUCCESS; + int mb_x, mb_y; + int block; /* 8x8 blocks */ + int subblock; /* 4x4 blocks */ + int stride = state->stride; + int i; + + /* frame and pixel management */ + unsigned short *this_frame; + unsigned short *last_frame; + + int line_offset; + int mb_offset; + int block_offset; + int subblock_offset; + + unsigned short *this_ptr; + unsigned int *this_ptr32; + unsigned short *last_ptr; + /*unsigned int *last_ptr32;*/ + unsigned short *vector16; + unsigned int *vector32; + int stride32 = stride / 2; + + /* bytestream management */ + int index = 0; + int mode_set = 0; + int mode, mode_lo, mode_hi; + int mode_count = 0; + + /* vectors */ + int mx, my; + int motion_x, motion_y; + unsigned char data_byte; + + mx = (arg >> 8) & 0xFF; + my = arg & 0xFF; + + if (state->current_frame == 1) + { + state->current_frame = 0; + this_frame = state->frame[0]; + last_frame = state->frame[1]; + } + else + { + state->current_frame = 1; + this_frame = state->frame[1]; + last_frame = state->frame[0]; + } + + for (mb_y = 0; mb_y < state->mb_height && status == ROQ_SUCCESS; mb_y++) + { + line_offset = mb_y * 16 * stride; + for (mb_x = 0; mb_x < state->mb_width && status == ROQ_SUCCESS; mb_x++) + { + mb_offset = line_offset + mb_x * 16; + for (block = 0; block < 4 && status == ROQ_SUCCESS; block++) + { + block_offset = mb_offset + (block / 2 * 8 * stride) + (block % 2 * 8); + /* each 8x8 block gets a mode */ + GET_MODE(); + switch (mode) + { + case 0: /* MOT: skip */ + break; + + case 1: /* FCC: motion compensation */ + /* this needs to be done 16 bits at a time due to + * data alignment issues on the SH-4 */ + GET_BYTE(data_byte); + motion_x = 8 - (data_byte >> 4) - mx; + motion_y = 8 - (data_byte & 0xF) - my; + last_ptr = last_frame + block_offset + + (motion_y * stride) + motion_x; + this_ptr = this_frame + block_offset; + for (i = 0; i < 8; i++) + { + *this_ptr++ = *last_ptr++; + *this_ptr++ = *last_ptr++; + *this_ptr++ = *last_ptr++; + *this_ptr++ = *last_ptr++; + *this_ptr++ = *last_ptr++; + *this_ptr++ = *last_ptr++; + *this_ptr++ = *last_ptr++; + *this_ptr++ = *last_ptr++; + + last_ptr += stride - 8; + this_ptr += stride - 8; + } + break; + + case 2: /* SLD: upsample 4x4 vector */ + GET_BYTE(data_byte); + vector16 = state->cb4x4[data_byte]; + for (i = 0; i < 4*4; i++) + { + this_ptr = this_frame + block_offset + + (i / 4 * 2 * stride) + (i % 4 * 2); + this_ptr[0] = *vector16; + this_ptr[1] = *vector16; + this_ptr[stride+0] = *vector16; + this_ptr[stride+1] = *vector16; + vector16++; + } + break; + + case 3: /* CCC: subdivide into 4 subblocks */ + for (subblock = 0; subblock < 4; subblock++) + { + subblock_offset = block_offset + (subblock / 2 * 4 * stride) + (subblock % 2 * 4); + + GET_MODE(); + switch (mode) + { + case 0: /* MOT: skip */ + break; + + case 1: /* FCC: motion compensation */ + GET_BYTE(data_byte); + motion_x = 8 - (data_byte >> 4) - mx; + motion_y = 8 - (data_byte & 0xF) - my; + last_ptr = last_frame + subblock_offset + + (motion_y * stride) + motion_x; + this_ptr = this_frame + subblock_offset; + for (i = 0; i < 4; i++) + { + *this_ptr++ = *last_ptr++; + *this_ptr++ = *last_ptr++; + *this_ptr++ = *last_ptr++; + *this_ptr++ = *last_ptr++; + + last_ptr += stride - 4; + this_ptr += stride - 4; + } + break;; + case 2: /* SLD: use 4x4 vector from codebook */ + GET_BYTE(data_byte); + vector32 = (unsigned int*)state->cb4x4[data_byte]; + + this_ptr32 = (unsigned int*)this_frame; + this_ptr32 += subblock_offset / 2; + for (i = 0; i < 4; i++) + { + *this_ptr32++ = *vector32++; + *this_ptr32++ = *vector32++; + + + this_ptr32 += stride32 - 2; + } + break; + + case 3: /* CCC: subdivide into 4 subblocks */ + GET_BYTE(data_byte); + vector16 = state->cb2x2[data_byte]; + this_ptr = this_frame + subblock_offset; + + + this_ptr[0] = vector16[0]; + this_ptr[1] = vector16[1]; + this_ptr[stride+0] = vector16[2]; + this_ptr[stride+1] = vector16[3]; + + GET_BYTE(data_byte); + vector16 = state->cb2x2[data_byte]; + + + this_ptr[2] = vector16[0]; + this_ptr[3] = vector16[1]; + this_ptr[stride+2] = vector16[2]; + this_ptr[stride+3] = vector16[3]; + + this_ptr += stride * 2; + + GET_BYTE(data_byte); + vector16 = state->cb2x2[data_byte]; + + + this_ptr[0] = vector16[0]; + this_ptr[1] = vector16[1]; + this_ptr[stride+0] = vector16[2]; + this_ptr[stride+1] = vector16[3]; + + GET_BYTE(data_byte); + vector16 = state->cb2x2[data_byte]; + + + this_ptr[2] = vector16[0]; + this_ptr[3] = vector16[1]; + this_ptr[stride+2] = vector16[2]; + this_ptr[stride+3] = vector16[3]; + + break; + } + } + break; + } + } + } + } + + /* sanity check to see if the stream was fully consumed */ + if (status == ROQ_SUCCESS && index < size-2) + { + status = ROQ_BAD_VQ_STREAM; + } + + return status; +} +/* + Name: Ian micheal + Copyright: + Author: + Date: 15/08/23 19:24 + Description: ported from C normal file system to kos FS file system api because if this + + Info from TapamN One issue you might run into is slow file access over ethernet. + Using the C library stdio.h functions (fread, fwrite) can be much slower than using the KOS filesystem calls directly (fs_read, fs_write) when reading/writing large blocks. + With stdio, you get something like tens of KB/sec, while with KOS you can get over 1 MB/sec. Stdio might be faster when preforming many very small operations. + dcload-serial doesn't have this issue. +*/ + +int dreamroq_play(char *filename, int loop, render_callback render_cb, + audio_callback audio_cb, quit_callback quit_cb) +{ + file_t f; + ssize_t file_ret; + int framerate; + int chunk_id; + unsigned int chunk_size; + unsigned int chunk_arg; + roq_state state; + int status; + int initialized = 0; + unsigned char read_buffer[MAX_BUF_SIZE]; + int i, snd_left, snd_right; + + f = fs_open(filename, O_RDONLY); + if (f < 0) + return ROQ_FILE_OPEN_FAILURE; + + file_ret = fs_read(f, read_buffer, CHUNK_HEADER_SIZE); + if (file_ret != CHUNK_HEADER_SIZE) + { + fs_close(f); + printf("\nROQ_FILE_READ_FAILURE\n\n"); + return ROQ_FILE_READ_FAILURE; + } + framerate = LE_16(&read_buffer[6]); + printf("RoQ file plays at %d frames/sec\n", framerate); + + /* Initialize Audio SQRT Look-Up Table */ + for(i = 0; i < 128; i++) + { + roq_audio.snd_sqr_arr[i] = i * i; + roq_audio.snd_sqr_arr[i + 128] = -(i * i); + } + + +status = ROQ_SUCCESS; +while (1) +{ + if (quit_cb && quit_cb()) + break; + + file_ret = fs_read(f, read_buffer, CHUNK_HEADER_SIZE); + #ifdef FPSGRAPH + printf("r\n"); + #endif + if (file_ret < CHUNK_HEADER_SIZE) + { + if (file_ret == 0) // Indicates end of file + break; + else if (loop) + { + fs_seek(f, 8, SEEK_SET); + continue; + } + else + break; + } + chunk_id = LE_16(&read_buffer[0]); + chunk_size = LE_32(&read_buffer[2]); + chunk_arg = LE_16(&read_buffer[6]); + + if (chunk_size > MAX_BUF_SIZE) + { + fs_close(f); + return ROQ_CHUNK_TOO_LARGE; + } + + file_ret = fs_read(f, read_buffer, chunk_size); + if (file_ret != chunk_size) + { + status = ROQ_FILE_READ_FAILURE; + break; + } + + + switch(chunk_id) + { + case RoQ_INFO: + if (initialized) + continue; + + state.width = LE_16(&read_buffer[0]); + state.height = LE_16(&read_buffer[2]); + /* width and height each need to be divisible by 16 */ + if ((state.width & 0xF) || (state.height & 0xF)) + { + status = ROQ_INVALID_PIC_SIZE; + break; + } + state.mb_width = state.width / 16; + state.mb_height = state.height / 16; + state.mb_count = state.mb_width * state.mb_height; + if (state.width < 8 || state.width > 1024) + status = ROQ_INVALID_DIMENSION; + else + { + state.stride = 8; + while (state.stride < state.width) + state.stride <<= 1; + } + if (state.height < 8 || state.height > 1024) + status = ROQ_INVALID_DIMENSION; + else + { + state.texture_height = 8; + while (state.texture_height < state.height) + state.texture_height <<= 1; + } + printf(" RoQ_INFO: dimensions = %dx%d, %dx%d; %d mbs, texture = %dx%d\n", + state.width, state.height, state.mb_width, state.mb_height, + state.mb_count, state.stride, state.texture_height); + state.frame[0] = (unsigned short*)malloc(state.texture_height * state.stride * sizeof(unsigned short)); + state.frame[1] = (unsigned short*)malloc(state.texture_height * state.stride * sizeof(unsigned short)); + state.current_frame = 0; + if (!state.frame[0] || !state.frame[1]) + { + free (state.frame[0]); + free (state.frame[1]); + status = ROQ_NO_MEMORY; + break; + } + memset(state.frame[0], 0, state.texture_height * state.stride * sizeof(unsigned short)); + memset(state.frame[1], 0, state.texture_height * state.stride * sizeof(unsigned short)); + + /* set this flag so that this code is not executed again when + * looping */ + initialized = 1; + break; + + case RoQ_QUAD_CODEBOOK: + status = roq_unpack_quad_codebook(read_buffer, chunk_size, + chunk_arg, &state); + break; + + case RoQ_QUAD_VQ: + status = roq_unpack_vq(read_buffer, chunk_size, + chunk_arg, &state); + if (render_cb) + status = render_cb(state.frame[state.current_frame], + state.width, state.height, state.stride, state.texture_height); + break; + case RoQ_SOUND_MONO: + roq_audio.channels = 1; + roq_audio.pcm_samples = chunk_size*2; + snd_left = chunk_arg; + for(i = 0; i < chunk_size; i++) + { + snd_left += roq_audio.snd_sqr_arr[read_buffer[i]]; + roq_audio.pcm_sample[i * 2] = snd_left & 0xff; + roq_audio.pcm_sample[i * 2 + 1] = (snd_left & 0xff00) >> 8; + } + if (audio_cb) + status = audio_cb( roq_audio.pcm_sample, roq_audio.pcm_samples, + roq_audio.channels ); + break; + + case RoQ_SOUND_STEREO: + roq_audio.channels = 2; + roq_audio.pcm_samples = chunk_size*2; + snd_left = (chunk_arg & 0xFF00); + snd_right = (chunk_arg & 0xFF) << 8; + for(i = 0; i < chunk_size; i += 2) + { + snd_left += roq_audio.snd_sqr_arr[read_buffer[i]]; + snd_right += roq_audio.snd_sqr_arr[read_buffer[i+1]]; + roq_audio.pcm_sample[i * 2] = snd_left & 0xff; + roq_audio.pcm_sample[i * 2 + 1] = (snd_left & 0xff00) >> 8; + roq_audio.pcm_sample[i * 2 + 2] = snd_right & 0xff; + roq_audio.pcm_sample[i * 2 + 3] = (snd_right & 0xff00) >> 8; + } + if (audio_cb) + status = audio_cb( roq_audio.pcm_sample, roq_audio.pcm_samples, + roq_audio.channels ); + break; + + default: + break; + } +} + free(state.frame[0]); + free(state.frame[1]); + fs_close(f); + + return status; +} + diff --git a/samples/dreamroq/dreamroqlib.h b/samples/dreamroq/dreamroqlib.h new file mode 100644 index 0000000..09f7176 --- /dev/null +++ b/samples/dreamroq/dreamroqlib.h @@ -0,0 +1,37 @@ +/* + * Dreamroq by Mike Melanson + * + * This is the header file to be included in the programs wishing to + * use the Dreamroq playback engine. + */ + +#ifndef NEWROQ_H +#define NEWROQ_H + +#define ROQ_SUCCESS 0 +#define ROQ_FILE_OPEN_FAILURE 1 +#define ROQ_FILE_READ_FAILURE 2 +#define ROQ_CHUNK_TOO_LARGE 3 +#define ROQ_BAD_CODEBOOK 4 +#define ROQ_INVALID_PIC_SIZE 5 +#define ROQ_NO_MEMORY 6 +#define ROQ_BAD_VQ_STREAM 7 +#define ROQ_INVALID_DIMENSION 8 +#define ROQ_RENDER_PROBLEM 9 +#define ROQ_CLIENT_PROBLEM 10 + +/* The library calls this function when it has a frame ready for display. */ +typedef int (*render_callback)(unsigned short *buf, int width, int height, + int stride, int texture_height); + +/* The library calls this function when it has pcm samples ready for output. */ +typedef int (*audio_callback)(unsigned char *buf, int samples, int channels); + +/* The library calls this function to ask whether it should quit playback. + * Return non-zero if it's time to quite. */ +typedef int (*quit_callback)(); + +int dreamroq_play(char *filename, int loop, render_callback render_cb, + audio_callback audio_cb, quit_callback quit_cb); + +#endif /* NEWROQ_H */ diff --git a/samples/dreamroq/libdcmc/arm/aica_cmd_iface.h b/samples/dreamroq/libdcmc/arm/aica_cmd_iface.h new file mode 100644 index 0000000..4b34ad0 --- /dev/null +++ b/samples/dreamroq/libdcmc/arm/aica_cmd_iface.h @@ -0,0 +1,138 @@ +/* KallistiOS ##version## + + aica_cmd_iface.h + (c)2000-2002 Dan Potter + + Definitions for the SH-4/AICA interface. This file is meant to be + included from both the ARM and SH-4 sides of the fence. +*/ + +#ifndef __ARM_AICA_CMD_IFACE_H +#define __ARM_AICA_CMD_IFACE_H + +/* $Id: aica_cmd_iface.h,v 1.3 2002/06/13 05:52:35 bardtx Exp $ */ + +#ifndef __ARCH_TYPES_H +typedef unsigned long uint8; +typedef unsigned long uint32; +#endif + +/* Command queue; one of these for passing data from the SH-4 to the + AICA, and another for the other direction. If a command is written + to the queue and it is longer than the amount of space between the + head point and the queue size, the command will wrap around to + the beginning (i.e., queue commands _can_ be split up). */ +typedef struct aica_queue { + uint32 head; /* Insertion point offset (in bytes) */ + uint32 tail; /* Removal point offset (in bytes) */ + uint32 size; /* Queue size (in bytes) */ + uint32 valid; /* 1 if the queue structs are valid */ + uint32 process_ok; /* 1 if it's ok to process the data */ + uint32 data; /* Pointer to queue data buffer */ +} aica_queue_t; + +/* Command queue struct for commanding the AICA from the SH-4 */ +typedef struct aica_cmd { + uint32 size; /* Command data size in dwords */ + uint32 cmd; /* Command ID */ + uint32 timestamp; /* When to execute the command (0 == now) */ + uint32 cmd_id; /* Command ID, for cmd/response pairs, or channel id */ + uint32 misc[4]; /* Misc Parameters / Padding */ + uint8 cmd_data[0]; /* Command data */ +} aica_cmd_t; + +/* Maximum command size -- 256 dwords */ +#define AICA_CMD_MAX_SIZE 256 + +/* This is the cmd_data for AICA_CMD_CHAN. Make this 16 dwords long + for two aica bus queues. */ +typedef struct aica_channel { + uint32 cmd; /* Command ID */ + uint32 base; /* Sample base in RAM */ + uint32 type; /* (8/16bit/ADPCM) */ + uint32 length; /* Sample length */ + uint32 loop; /* Sample looping */ + uint32 loopstart; /* Sample loop start */ + uint32 loopend; /* Sample loop end */ + uint32 freq; /* Frequency */ + uint32 vol; /* Volume 0-255 */ + uint32 pan; /* Pan 0-255 */ + uint32 pos; /* Sample playback pos */ + uint32 pad[5]; /* Padding */ +} aica_channel_t; + +/* Declare an aica_cmd_t big enough to hold an aica_channel_t + using temp name T, aica_cmd_t name CMDR, and aica_channel_t name CHANR */ +#define AICA_CMDSTR_CHANNEL(T, CMDR, CHANR) \ + uint8 T[sizeof(aica_cmd_t) + sizeof(aica_channel_t)]; \ + aica_cmd_t * CMDR = (aica_cmd_t *)T; \ + aica_channel_t * CHANR = (aica_channel_t *)(CMDR->cmd_data); +#define AICA_CMDSTR_CHANNEL_SIZE ((sizeof(aica_cmd_t) + sizeof(aica_channel_t))/4) + +/* Command values (for aica_cmd_t) */ +#define AICA_CMD_NONE 0x00000000 /* No command (dummy packet) */ +#define AICA_CMD_PING 0x00000001 /* Check for signs of life */ +#define AICA_CMD_CHAN 0x00000002 /* Perform a wavetable action */ +#define AICA_CMD_SYNC_CLOCK 0x00000003 /* Reset the millisecond clock */ + +/* Response values (for aica_cmd_t) */ +#define AICA_RESP_NONE 0x00000000 +#define AICA_RESP_PONG 0x00000001 /* Response to CMD_PING */ +#define AICA_RESP_DBGPRINT 0x00000002 /* Entire payload is a null-terminated string */ + +/* Command values (for aica_channel_t commands) */ +#define AICA_CH_CMD_MASK 0x0000000f + +#define AICA_CH_CMD_NONE 0x00000000 +#define AICA_CH_CMD_START 0x00000001 +#define AICA_CH_CMD_STOP 0x00000002 +#define AICA_CH_CMD_UPDATE 0x00000003 + +/* Start values */ +#define AICA_CH_START_MASK 0x00300000 + +#define AICA_CH_START_DELAY 0x00100000 /* Set params, but delay key-on */ +#define AICA_CH_START_SYNC 0x00200000 /* Set key-on for all selected channels */ + +/* Update values */ +#define AICA_CH_UPDATE_MASK 0x000ff000 + +#define AICA_CH_UPDATE_SET_FREQ 0x00001000 /* frequency */ +#define AICA_CH_UPDATE_SET_VOL 0x00002000 /* volume */ +#define AICA_CH_UPDATE_SET_PAN 0x00004000 /* panning */ + +/* Sample types */ +#define AICA_SM_8BIT 1 +#define AICA_SM_16BIT 0 +#define AICA_SM_ADPCM 2 + + +/* This is where our SH-4/AICA comm variables go... */ + +/* 0x000000 - 0x010000 are reserved for the program */ + +/* Location of the SH-4 to AICA queue; commands from here will be + periodically processed by the AICA and then removed from the queue. */ +#define AICA_MEM_CMD_QUEUE 0x010000 /* 32K */ + +/* Location of the AICA to SH-4 queue; commands from here will be + periodically processed by the SH-4 and then removed from the queue. */ +#define AICA_MEM_RESP_QUEUE 0x018000 /* 32K */ + +/* This is the channel base, which holds status structs for all the + channels. This is READ-ONLY from the SH-4 side. */ +#define AICA_MEM_CHANNELS 0x020000 /* 64 * 16*4 = 4K */ + +/* The clock value (in milliseconds) */ +#define AICA_MEM_CLOCK 0x021000 /* 4 bytes */ + +/* 0x021004 - 0x030000 are reserved for future expansion */ + +/* Open ram for sample data */ +#define AICA_RAM_START 0x030000 +#define AICA_RAM_END 0x200000 + +/* Quick access to the AICA channels */ +#define AICA_CHANNEL(x) (AICA_MEM_CHANNELS + (x) * sizeof(aica_channel_t)) + +#endif /* __ARM_AICA_CMD_IFACE_H */ diff --git a/samples/dreamroq/libdcmc/dc_timer.h b/samples/dreamroq/libdcmc/dc_timer.h new file mode 100644 index 0000000..6df1ee9 --- /dev/null +++ b/samples/dreamroq/libdcmc/dc_timer.h @@ -0,0 +1,19 @@ +/* +** +** This File is a part of Dreamcast Media Center +** (C) Josh "PH3NOM" Pearson 2011 +** +*/ + +/* 'Public' Function Protocols */ + +#ifndef DCTIMER_H +#define DCTIMER_H + +/* Returns hardware time in miliseconds */ +int dc_get_time(); + +/* Regulate the Video Frame Rate */ +void frame_delay( float AVI_video_rate, float AVI_delay, int frameCounter ); + +#endif diff --git a/samples/dreamroq/libdcmc/snd_stream.c b/samples/dreamroq/libdcmc/snd_stream.c new file mode 100644 index 0000000..cd451b0 --- /dev/null +++ b/samples/dreamroq/libdcmc/snd_stream.c @@ -0,0 +1,531 @@ +/* +** +** Josh 'PH3NOM' Pearson 2011 +** Notes: Had to modify the requested samples by soundstream_poll +** for easy integration with libROQ +*/ + +/* KallistiOS ##version## + + snd_stream.c + Copyright (c)2000,2001,2002,2003,2004 Dan Potter + Copyright (c)2002 Florian Schulze + + SH-4 support routines for SPU streaming sound driver +*/ +/* Missing headers Ian micheal 2020*/ +/* + Name: Ian micheal + Copyright: + Author: Ian micheal + Date: 12/08/23 05:17 + Description: kos 2.0 up port threading fix and wrappers and all warnings fixed +*/ + +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#include "arm/aica_cmd_iface.h" + + +/* + +This module uses a nice circularly queued data stream in SPU RAM, which is +looped by a program running in the SPU itself. + +Basically the poll routine checks to see if a certain minimum amount of +data is available to the SPU to be played, and if not, we ask the user +routine for more sound data and load it up. That's about it. + +This version is capable of playing back N streams at once, with the limit +being available CPU time and channels. + +*/ + +typedef struct filter { + TAILQ_ENTRY(filter) lent; + snd_stream_filter_t func; + void * data; +} filter_t; + +/* Each of these represents an active streaming channel */ +typedef struct strchan { + // Which AICA channels are we using? + int ch[2]; + + // The last write position in the playing buffer + int last_write_pos; // = 0 + int curbuffer; // = 0 + + // The buffer size allocated for this stream. + int buffer_size; // = 0x10000 + + // Stream data location in AICA RAM + uint32 spu_ram_sch[2]; + + // "Get data" callback; we'll call this any time we want to get + // another buffer of output data. + snd_stream_callback_t get_data; + + // Our list of filter callback functions for this stream + TAILQ_HEAD(filterlist, filter) filters; + + // Stereo/mono flag + int stereo; + + // Playback frequency + int frequency; + + /* Stream queueing is where we get everything ready to go but don't + actually start it playing until the signal (for music sync, etc) */ + int queueing; + + /* Have we been initialized yet? (and reserved a buffer, etc) */ + volatile int initted; +} strchan_t; + +// Our stream structs +static strchan_t streams[SND_STREAM_MAX] = { { { 0 } } }; + +// Separation buffers (for stereo) +int16 * sep_buffer[2] = { NULL, NULL }; + +/* the address of the sound ram from the SH4 side */ +#define SPU_RAM_BASE 0xa0800000 + +// Check an incoming handle +#define CHECK_HND(x) do { \ + assert( (x) >= 0 && (x) < SND_STREAM_MAX ); \ + assert( streams[(x)].initted ); \ +} while(0) + +/* Set "get data" callback */ +void snd_stream_set_callback(snd_stream_hnd_t hnd, snd_stream_callback_t cb) { + CHECK_HND(hnd); + streams[hnd].get_data = cb; +} + +void snd_stream_filter_add(snd_stream_hnd_t hnd, snd_stream_filter_t filtfunc, void * obj) { + filter_t * f; + + CHECK_HND(hnd); + + f = malloc(sizeof(filter_t)); + f->func = filtfunc; + f->data = obj; + TAILQ_INSERT_TAIL(&streams[hnd].filters, f, lent); +} + +void snd_stream_filter_remove(snd_stream_hnd_t hnd, snd_stream_filter_t filtfunc, void * obj) { + filter_t * f; + + CHECK_HND(hnd); + + TAILQ_FOREACH(f, &streams[hnd].filters, lent) { + if (f->func == filtfunc && f->data == obj) { + TAILQ_REMOVE(&streams[hnd].filters, f, lent); + free(f); + return; + } + } +} + +static void process_filters(snd_stream_hnd_t hnd, void **buffer, int *samplecnt) { + filter_t * f; + + TAILQ_FOREACH(f, &streams[hnd].filters, lent) { + f->func(hnd, f->data, streams[hnd].frequency, streams[hnd].stereo ? 2 : 1, buffer, samplecnt); + } +} + + +/* Performs stereo seperation for the two channels; this routine + has been optimized for the SH-4. */ +static void sep_data(void *buffer, int len, int stereo) { + register int16 *bufsrc, *bufdst; + register int x, y, cnt; + + if (stereo) { + bufsrc = (int16*)buffer; + bufdst = sep_buffer[0]; + x = 0; y = 0; cnt = len / 2; + do { + *bufdst = *bufsrc; + bufdst++; bufsrc+=2; cnt--; + } while (cnt > 0); + + bufsrc = (int16*)buffer; bufsrc++; + bufdst = sep_buffer[1]; + x = 1; y = 0; cnt = len / 2; + do { + *bufdst = *bufsrc; + bufdst++; bufsrc+=2; cnt--; + x+=2; y++; + } while (cnt > 0); + } else { + memcpy(sep_buffer[0], buffer, len); + memcpy(sep_buffer[1], buffer, len); + } +} + +/* Prefill buffers -- do this before calling start() */ +void snd_stream_prefill(snd_stream_hnd_t hnd) { + void *buf; + int got; + + CHECK_HND(hnd); + + if (!streams[hnd].get_data) return; + + /* Load first buffer */ + /* XXX Note: This will not work if the full data size is less than + buffer_size or buffer_size/2. */ + if (streams[hnd].stereo) + buf = streams[hnd].get_data(hnd, streams[hnd].buffer_size, &got); + else + buf = streams[hnd].get_data(hnd, streams[hnd].buffer_size/2, &got); + process_filters(hnd, &buf, &got); + sep_data(buf, (streams[hnd].buffer_size/2), streams[hnd].stereo); + spu_memload( + streams[hnd].spu_ram_sch[0] + (streams[hnd].buffer_size/2)*0, + (uint8*)sep_buffer[0], streams[hnd].buffer_size/2); + spu_memload( + streams[hnd].spu_ram_sch[1] + (streams[hnd].buffer_size/2)*0, + (uint8*)sep_buffer[1], streams[hnd].buffer_size/2); + + /* Load second buffer */ + if (streams[hnd].stereo) + buf = streams[hnd].get_data(hnd, streams[hnd].buffer_size, &got); + else + buf = streams[hnd].get_data(hnd, streams[hnd].buffer_size/2, &got); + process_filters(hnd, &buf, &got); + sep_data(buf, (streams[hnd].buffer_size/2), streams[hnd].stereo); + spu_memload( + streams[hnd].spu_ram_sch[0] + (streams[hnd].buffer_size/2)*1, + (uint8*)sep_buffer[0], streams[hnd].buffer_size/2); + spu_memload( + streams[hnd].spu_ram_sch[1] + (streams[hnd].buffer_size/2)*1, + (uint8*)sep_buffer[1], streams[hnd].buffer_size/2); + + /* Start with playing on buffer 0 */ + streams[hnd].last_write_pos = 0; + streams[hnd].curbuffer = 0; +} + +/* Initialize stream system */ +int snd_stream_init() { + /* Create stereo seperation buffers */ + if (!sep_buffer[0]) { + sep_buffer[0] = memalign(32, (SND_STREAM_BUFFER_MAX/2)); + sep_buffer[1] = memalign(32, (SND_STREAM_BUFFER_MAX/2)); + } + + /* Finish loading the stream driver */ + if (snd_init() < 0) { + dbglog(DBG_ERROR, "snd_stream_init(): snd_init() failed, giving up\n"); + return -1; + } + + return 0; +} + +snd_stream_hnd_t snd_stream_alloc(snd_stream_callback_t cb, int bufsize) { + int i, old; + snd_stream_hnd_t hnd; + + // Get an unused handle + hnd = -1; + old = irq_disable(); + for (i=0; icmd = AICA_CMD_CHAN; + cmd->timestamp = 0; + cmd->size = AICA_CMDSTR_CHANNEL_SIZE; + cmd->cmd_id = streams[hnd].ch[0]; + chan->cmd = AICA_CH_CMD_START | AICA_CH_START_DELAY; + chan->base = streams[hnd].spu_ram_sch[0]; + chan->type = AICA_SM_16BIT; + chan->length = (streams[hnd].buffer_size/2); + chan->loop = 1; + chan->loopstart = 0; + chan->loopend = (streams[hnd].buffer_size/2); + chan->freq = freq; + chan->vol = 255; + chan->pan = 0; + snd_sh4_to_aica(tmp, cmd->size); + + /* Channel 1 */ + cmd->cmd_id = streams[hnd].ch[1]; + chan->base = streams[hnd].spu_ram_sch[1]; + chan->pan = 255; + snd_sh4_to_aica(tmp, cmd->size); + + /* Start both channels simultaneously */ + cmd->cmd_id = (1 << streams[hnd].ch[0]) | + (1 << streams[hnd].ch[1]); + chan->cmd = AICA_CH_CMD_START | AICA_CH_START_SYNC; + snd_sh4_to_aica(tmp, cmd->size); + + /* Process the changes */ + if (!streams[hnd].queueing) + snd_sh4_to_aica_start(); +} + +/* Actually make it go (in queued mode) */ +void snd_stream_queue_go(snd_stream_hnd_t hnd) { + CHECK_HND(hnd); + snd_sh4_to_aica_start(); +} + +/* Stop streaming */ +void snd_stream_stop(snd_stream_hnd_t hnd) { + AICA_CMDSTR_CHANNEL(tmp, cmd, chan); + + CHECK_HND(hnd); + + if (!streams[hnd].get_data) return; + + /* Stop stream */ + /* Channel 0 */ + cmd->cmd = AICA_CMD_CHAN; + cmd->timestamp = 0; + cmd->size = AICA_CMDSTR_CHANNEL_SIZE; + cmd->cmd_id = streams[hnd].ch[0]; + chan->cmd = AICA_CH_CMD_STOP; + snd_sh4_to_aica(tmp, cmd->size); + + /* Channel 1 */ + cmd->cmd_id = streams[hnd].ch[1]; + snd_sh4_to_aica(tmp, AICA_CMDSTR_CHANNEL_SIZE); +} + +/* The DMA will chain to this to start the second DMA. */ +/* static uint32 dmadest, dmacnt; +static void dma_chain(ptr_t data) { + spu_dma_transfer(sep_buffer[1], dmadest, dmacnt, 0, NULL, 0); +} */ + +/* Poll streamer to load more data if necessary */ +int snd_stream_poll(snd_stream_hnd_t hnd) { + uint32 ch0pos, ch1pos; + /* int realbuffer; */ // Remove this line + int current_play_pos; + int needed_samples; + int got_samples; + void *data; + + CHECK_HND(hnd); + + if (!streams[hnd].get_data) return -1; + + /* Get "real" buffer */ + ch0pos = g2_read_32(SPU_RAM_BASE + AICA_CHANNEL(streams[hnd].ch[0]) + offsetof(aica_channel_t, pos)); + ch1pos = g2_read_32(SPU_RAM_BASE + AICA_CHANNEL(streams[hnd].ch[1]) + offsetof(aica_channel_t, pos)); + + if (ch0pos >= (streams[hnd].buffer_size/2)) { + dbglog(DBG_ERROR, "snd_stream_poll: chan0(%d).pos = %ld (%08lx)\n", streams[hnd].ch[0], ch0pos, ch0pos); + return -1; + } + + current_play_pos = (ch0pos < ch1pos)?(ch0pos):(ch1pos); + + /* count just till the end of the buffer, so we don't have to + handle buffer wraps */ + if (streams[hnd].last_write_pos <= current_play_pos) + needed_samples = current_play_pos - streams[hnd].last_write_pos; + else + needed_samples = (streams[hnd].buffer_size/2) - streams[hnd].last_write_pos; + /* round it a little bit */ + needed_samples &= ~0x7ff; + /* printf("last_write_pos %6i, current_play_pos %6i, needed_samples %6i\n",last_write_pos,current_play_pos,needed_samples); */ + + //Ian micheal wtf was this set to 4096? was causing a delay + if (needed_samples ==2048) { + if (streams[hnd].stereo) { + data = streams[hnd].get_data(hnd, needed_samples * 4, &got_samples); + process_filters(hnd, &data, &got_samples); + if (got_samples < needed_samples * 4) { + needed_samples = got_samples / 4; + if (needed_samples & 3) + needed_samples = (needed_samples + 4) & ~3; + } + } else { + data = streams[hnd].get_data(hnd, needed_samples * 2, &got_samples); + process_filters(hnd, &data, &got_samples); + if (got_samples < needed_samples * 2) { + needed_samples = got_samples / 2; + if (needed_samples & 1) + needed_samples = (needed_samples + 2) & ~1; + } + } + if (data == NULL) { + /* Fill the "other" buffer with zeros */ + spu_memset(streams[hnd].spu_ram_sch[0] + (streams[hnd].last_write_pos * 2), 0, needed_samples * 2); + spu_memset(streams[hnd].spu_ram_sch[1] + (streams[hnd].last_write_pos * 2), 0, needed_samples * 2); + return -3; + } + + sep_data(data, needed_samples * 2, streams[hnd].stereo); + spu_memload(streams[hnd].spu_ram_sch[0] + (streams[hnd].last_write_pos * 2), (uint8*)sep_buffer[0], needed_samples * 2); + spu_memload(streams[hnd].spu_ram_sch[1] + (streams[hnd].last_write_pos * 2), (uint8*)sep_buffer[1], needed_samples * 2); + + // Second DMA will get started by the chain handler + /* dcache_flush_range(sep_buffer[0], needed_samples*2); + dcache_flush_range(sep_buffer[1], needed_samples*2); + dmadest = spu_ram_sch2 + (last_write_pos * 2); + dmacnt = needed_samples * 2; + spu_dma_transfer(sep_buffer[0], spu_ram_sch1 + (last_write_pos * 2), needed_samples * 2, + 0, dma_chain, 0); */ + + streams[hnd].last_write_pos += needed_samples; + if (streams[hnd].last_write_pos >= (streams[hnd].buffer_size/2)) + streams[hnd].last_write_pos -= (streams[hnd].buffer_size/2); + } + return 0; +} + +/* Set the volume on the streaming channels */ +void snd_stream_volume(snd_stream_hnd_t hnd, int vol) { + AICA_CMDSTR_CHANNEL(tmp, cmd, chan); + + CHECK_HND(hnd); + + cmd->cmd = AICA_CMD_CHAN; + cmd->timestamp = 0; + cmd->size = AICA_CMDSTR_CHANNEL_SIZE; + cmd->cmd_id = streams[hnd].ch[0]; + chan->cmd = AICA_CH_CMD_UPDATE | AICA_CH_UPDATE_SET_VOL; + chan->vol = vol; + snd_sh4_to_aica(tmp, cmd->size); + + cmd->cmd_id = streams[hnd].ch[1]; + snd_sh4_to_aica(tmp, cmd->size); +} + + diff --git a/samples/dreamroq/libdcmc/snddrv.c b/samples/dreamroq/libdcmc/snddrv.c new file mode 100644 index 0000000..622bace --- /dev/null +++ b/samples/dreamroq/libdcmc/snddrv.c @@ -0,0 +1,162 @@ +/* +** +** (C) Josh 'PH3NOM' Pearson 2011 +** +*/ +/* +** To anyone looking at this code: +** +** This driver runs in its own thread on the sh4. +** +** When the AICA driver requests more samples, +** it will signal sndbuf_status=SNDDRV_STATUS_NEEDBUF +** and assign the number of requested samples to snddrv.pcm_needed. +** +** The decoders need to check sndbuf_status, +** when more samples are requested by the driver ** the decoders will loop +** decoding into pcm_buffer untill pcm_bytes==snddrv.pcm_needed +** at that point the decoder signals sndbuf_status=SNDDRV_STATUS_HAVEBUF +** +*/ +/* + Name: Ian micheal + Copyright: + Author: Ian micheal + Date: 12/08/23 05:17 + Description: kos 2.0 up port threading fix and wrappers and all warnings fixed +*/ + +#include +#include +#include +#include +#include +#include "snddrv.h" + +snd_stream_hnd_t shnd; +kthread_t * snddrv_thd; +static int snddrv_vol = 255; + +struct snddrv snddrv; +struct snddrv_song_info snd_sinfo; + +/* Increase the Sound Driver volume */ +int snddrv_volume_up() { + + if( snddrv_vol <= 245 ) { + snddrv_vol += 10; + snd_stream_volume(shnd, snddrv_vol); + } + return snddrv_vol; +} + +/* Decrease the Sound Driver volume */ +int snddrv_volume_down() { + + if( snddrv_vol >= 10 ) { + snddrv_vol -= 10; + snd_stream_volume(shnd, snddrv_vol); + } + return snddrv_vol; +} + +/* Exit the Sound Driver */ +int snddrv_exit() { + + if( snddrv.drv_status != SNDDRV_STATUS_NULL ) { + snddrv.drv_status = SNDDRV_STATUS_DONE; + snddrv.buf_status = SNDDRV_STATUS_BUFEND; + + while( snddrv.drv_status != SNDDRV_STATUS_NULL ) + thd_pass(); + + + printf("SNDDRV: Exited\n"); + } + + memset( snddrv.pcm_buffer, 0, 65536+16384); + snddrv.pcm_bytes = 0; + snddrv.pcm_needed = 0; + + SNDDRV_FREE_STRUCT(); + + return snddrv.drv_status; +} + +/* Signal how many samples the AICA needs, then wait for the deocder to produce them */ +static void *snddrv_callback(snd_stream_hnd_t hnd, int len, int * actual) { + + /* Signal the Decoder thread how many more samples are needed */ + snddrv.pcm_needed = len; + snddrv.buf_status = SNDDRV_STATUS_NEEDBUF; + + /* Wait for the samples to be ready */ + while( snddrv.buf_status != SNDDRV_STATUS_HAVEBUF && snddrv.buf_status != SNDDRV_STATUS_BUFEND ) + thd_pass(); + + snddrv.pcm_ptr = snddrv.pcm_buffer; + snddrv.pcm_bytes = 0; + *actual = len; + + return snddrv.pcm_ptr; + +} + +static int snddrv_thread() { + + printf("SNDDRV: Rate - %i, Channels - %i\n", snddrv.rate, snddrv.channels); + + shnd = snd_stream_alloc(snddrv_callback, SND_STREAM_BUFFER_MAX/4); + + snd_stream_start(shnd, snddrv.rate, snddrv.channels-1); + snddrv.drv_status = SNDDRV_STATUS_STREAMING; + + while( snddrv.drv_status != SNDDRV_STATUS_DONE && snddrv.drv_status != SNDDRV_STATUS_ERROR ) { + + snd_stream_poll(shnd); + thd_sleep(20); + + } + snddrv.drv_status = SNDDRV_STATUS_NULL; + + snd_stream_destroy(shnd); + snd_stream_shutdown(); + + printf("SNDDRV: Finished\n"); + + return snddrv.drv_status; +} + +/* Wrapper function for snddrv_thread */ +static void *snddrv_thread_wrapper(void *arg) +{ + int status = snddrv_thread(); // Get the status value + return (void *)(size_t)status; // Cast the int status to void pointer +} + +/* Start the AICA Sound Stream Thread */ +int snddrv_start( int rate, int chans ) { + + snddrv.rate = rate; + snddrv.channels = chans; + if( snddrv.channels > 2) { + printf("SNDDRV: ERROR - Exceeds maximum channels\n"); + return -1; + } + + printf("SNDDRV: Creating Driver Thread\n"); + + snddrv.drv_status = SNDDRV_STATUS_INITIALIZING; + + snd_stream_init(); + /*libdcmc/snddrv.c:136: warning: passing arg 1 of `thd_create' from incompatible pointer type */ //Ian micheal 2020 warning + /* Use the wrapper function here */ + snddrv_thd = thd_create(0, snddrv_thread_wrapper, NULL); + + printf("SNDDRV: Creating Driver Thread\n"); + + + return snddrv.drv_status; + +} + diff --git a/samples/dreamroq/libdcmc/snddrv.h b/samples/dreamroq/libdcmc/snddrv.h new file mode 100644 index 0000000..4982233 --- /dev/null +++ b/samples/dreamroq/libdcmc/snddrv.h @@ -0,0 +1,111 @@ +/* +** +** This File is a part of Dreamcast Media Center +** (C) Josh "PH3NOM" Pearson 2011 +** +*/ + +#ifndef SNDDRV_H +#define SNDDRV_H + +/* Keep track of things from the Driver side */ +#define SNDDRV_STATUS_NULL 0x00 +#define SNDDRV_STATUS_INITIALIZING 0x01 +#define SNDDRV_STATUS_READY 0x02 +#define SNDDRV_STATUS_STREAMING 0x03 +#define SNDDRV_STATUS_DONE 0x04 +#define SNDDRV_STATUS_ERROR 0x05 + +/* Keep track of things from the Decoder side */ +#define SNDDEC_STATUS_NULL 0x00 +#define SNDDEC_STATUS_INITIALIZING 0x01 +#define SNDDEC_STATUS_READY 0x02 +#define SNDDEC_STATUS_STREAMING 0x03 +#define SNDDEC_STATUS_PAUSING 0x04 +#define SNDDEC_STATUS_PAUSED 0x05 +#define SNDDEC_STATUS_RESUMING 0x06 +#define SNDDEC_STATUS_DONE 0x07 +#define SNDDEC_STATUS_ERROR 0x08 + +/* Keep track of the buffer status from both sides*/ +#define SNDDRV_STATUS_NEEDBUF 0x00 +#define SNDDRV_STATUS_HAVEBUF 0x01 +#define SNDDRV_STATUS_BUFEND 0x02 + +/* This seems to be a good number for file seeking on compressed audio */ +#define SEEK_LEN 16384*48 + +extern struct snddrv snddrv; +extern struct snddrv_song_info snd_sinfo; + +/* SNDDRV (C) AICA Audio Driver */ +struct snddrv { + int rate; + int channels; + int pcm_bytes; + int pcm_needed; + volatile int drv_status; + volatile int dec_status; + volatile int buf_status; + unsigned int pcm_buffer[65536+16384]; + unsigned int *pcm_ptr; +}; + +#define SNDDRV_FREE_STRUCT() { \ + snddrv.rate = snddrv.channels = snddrv.drv_status = \ + snddrv.dec_status = snddrv.buf_status = 0; } + +struct snddrv_song_info { + char *artist[128]; + char * title[128]; + char * track[128]; + char * album[128]; + char * genre[128]; + char *fname; + volatile int fpos; + volatile float spos; + int fsize; + float slen; +}; + +#define SNDDRV_FREE_SINFO() { \ + sq_clr( snd_sinfo.artist, 128 ); \ + sq_clr( snd_sinfo.title, 128 ); \ + sq_clr( snd_sinfo.track, 128 ); \ + sq_clr( snd_sinfo.album, 128 ); \ + sq_clr( snd_sinfo.genre, 128 ); \ + snd_sinfo.fpos = snd_sinfo.spos = snd_sinfo.fsize = snd_sinfo.slen = 0; } + +#define min(a,b) ( (a) < (b) ? (a) : (b) ) + +#define MAX_CHANNELS 6 /* make this higher to support files with + more channels for LibFAAD */ + +/* MicroSoft channel definitions */ +#define SPEAKER_FRONT_LEFT 0x1 +#define SPEAKER_FRONT_RIGHT 0x2 +#define SPEAKER_FRONT_CENTER 0x4 +#define SPEAKER_LOW_FREQUENCY 0x8 +#define SPEAKER_BACK_LEFT 0x10 +#define SPEAKER_BACK_RIGHT 0x20 +#define SPEAKER_FRONT_LEFT_OF_CENTER 0x40 +#define SPEAKER_FRONT_RIGHT_OF_CENTER 0x80 +#define SPEAKER_BACK_CENTER 0x100 +#define SPEAKER_SIDE_LEFT 0x200 +#define SPEAKER_SIDE_RIGHT 0x400 +#define SPEAKER_TOP_CENTER 0x800 +#define SPEAKER_TOP_FRONT_LEFT 0x1000 +#define SPEAKER_TOP_FRONT_CENTER 0x2000 +#define SPEAKER_TOP_FRONT_RIGHT 0x4000 +#define SPEAKER_TOP_BACK_LEFT 0x8000 +#define SPEAKER_TOP_BACK_CENTER 0x10000 +#define SPEAKER_TOP_BACK_RIGHT 0x20000 +#define SPEAKER_RESERVED 0x80000000 + +/* SNDDRV Function Protocols */ +int snddrv_start( int rate, int chans ); +int snddrv_exit(); +int snddrv_volume_up(); +int snddrv_volume_down(); + +#endif diff --git a/samples/dreamroq/libdcmc/timer.c b/samples/dreamroq/libdcmc/timer.c new file mode 100644 index 0000000..fd1bb01 --- /dev/null +++ b/samples/dreamroq/libdcmc/timer.c @@ -0,0 +1,36 @@ +/* +** +** This file is a part of Dreamcast Media Center +** (C) Josh PH3NOM Pearson 2011 +** +*/ + +#include +#include +#include +#include +#include "dc_timer.h" + +/* Get current hardware timing using arch/timer.h */ +int dc_get_time() +{ + uint32 s, ms; + uint64 msec; + + timer_ms_gettime(&s, &ms); + msec = (((uint64)s) * ((uint64)1000)) + ((uint64)ms); + + return (int)msec; +} + +/* Regulate the Video Frame Rate */ +void frame_delay( float AVI_video_rate, float AVI_delay, int frameCounter ) +{ + float AVI_real_time = frameCounter / AVI_video_rate; + float CPU_real_time= ( ( (float)dc_get_time()- AVI_delay ) / 1000.0f ); + //printf("AVI_real_time: %f, CPU_real_time %f\n", AVI_real_time, CPU_real_time ); + while ( CPU_real_time < AVI_real_time ) { + CPU_real_time= ( ( (float)dc_get_time()- AVI_delay ) / 1000.0f ); + thd_pass(); + } +} diff --git a/samples/dreamroq/main.c b/samples/dreamroq/main.c new file mode 100644 index 0000000..70d6ea3 --- /dev/null +++ b/samples/dreamroq/main.c @@ -0,0 +1,495 @@ +/* + * Dreamroq by Mike Melanson + * Updated by Josh Pearson to add audio support + * + * This is the sample Dreamcast player app, designed to be run under + * the KallistiOS operating system. + */ +/* + Name: Iaan micheal + Copyright: + Author: Ian micheal + Date: 12/08/23 05:17 + Description: kos 2.0 up port threading fix and wrappers and all warnings fixed + Redone threading and main added benchmarking for timing acia and roq decoding audio + redone rendering order and code commented to be much easier to read. + example OUTPUT:> Wait for AICA Driver: 88 ms + OUTPUT:> Wait for RoQ Decoder: 1 ms + OUTPUT:> Copy PCM Samples: 1 ms + OUTPUT:> Inform AICA Driver: 0 ms + OUTPUT:> Wait for AICA Driver: 88 ms + OUTPUT:> Wait for RoQ Decoder: 0 ms + + Before + OUTPUT:> Wait for AICA Driver: 168 ms + OUTPUT:> Wait for RoQ Decoder: 0 ms + OUTPUT:> Copy PCM Samples: 1 ms + OUTPUT:> Inform AICA Driver: 0 ms + OUTPUT:> Wait for AICA Driver: 187 ms + OUTPUT:> Wait for RoQ Decoder: 0 ms + OUTPUT:> Copy PCM Samples: 1 ms + OUTPUT:> Inform AICA Driver: 0 ms + OUTPUT:> Wait for AICA Driver: 197 ms +*/ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include "dreamroqlib.h" +#include "libdcmc/dc_timer.h" +#include "libdcmc/snddrv.h" +#include +#include + +#include "profiler.h" + +#ifdef __DREAMCAST__ +extern uint8 romdisk[]; +KOS_INIT_ROMDISK(romdisk); +#endif + + +/* Audio Global variables */ +#define PCM_BUF_SIZE (1024 * 1024) +static unsigned char *pcm_buf = NULL; +static int pcm_size = 0; +#define AUDIO_THREAD_PRIO 0 +kthread_t *audio_thread; // Thread handle for the audio thread +int audio_init = 0; // Flag to indicate audio initialization status +static mutex_t pcm_mut = MUTEX_INITIALIZER; +/* Video Global variables */ +static pvr_ptr_t textures[2]; +static int current_frame = 0; +static int graphics_initialized = 0; +static float video_delay; + +GLfloat vertices[4][5]; +GLuint frameTexture[2]; + + +// Define the target frame rate +#define TARGET_FRAME_RATE 30 + + + + +static void snd_thd() +{ + do + { + unsigned int start_time, end_time; + + // Measure time taken by waiting for AICA Driver request + start_time = dc_get_time(); + while (snddrv.buf_status != SNDDRV_STATUS_NEEDBUF) + thd_pass(); + end_time = dc_get_time(); + printf("Wait for AICA Driver: %u ms\n", end_time - start_time); + + // Measure time taken by waiting for RoQ Decoder + start_time = dc_get_time(); + while (pcm_size < snddrv.pcm_needed) + { + if (snddrv.dec_status == SNDDEC_STATUS_DONE) + goto done; + thd_pass(); + } + end_time = dc_get_time(); + printf("Wait for RoQ Decoder: %u ms\n", end_time - start_time); + + // Measure time taken by copying PCM samples + start_time = dc_get_time(); + mutex_lock(&pcm_mut); + memcpy(snddrv.pcm_buffer, pcm_buf, snddrv.pcm_needed); + pcm_size -= snddrv.pcm_needed; + memmove(pcm_buf, pcm_buf + snddrv.pcm_needed, pcm_size); + mutex_unlock(&pcm_mut); + end_time = dc_get_time(); + printf("Copy PCM Samples: %u ms\n", end_time - start_time); + + // Measure time taken by informing AICA Driver + start_time = dc_get_time(); + snddrv.buf_status = SNDDRV_STATUS_HAVEBUF; + end_time = dc_get_time(); + printf("Inform AICA Driver: %u ms\n", end_time - start_time); + + } while (snddrv.dec_status == SNDDEC_STATUS_STREAMING); +done: + snddrv.dec_status = SNDDEC_STATUS_NULL; +} + +static int render_cb(unsigned short *buf, int width, int height, int stride, + int texture_height) +{ + pvr_poly_cxt_t cxt; + static pvr_poly_hdr_t hdr[2]; + static pvr_vertex_t vert[4]; + + float ratio; + // screen coordinates of upper left and bottom right corners + static int ul_x, ul_y, br_x, br_y; + + // Initialize textures, drawing coordinates, and other parameters + if (!graphics_initialized) + { + textures[0] = pvr_mem_malloc(stride * texture_height * 2); + textures[1] = pvr_mem_malloc(stride * texture_height * 2); + if (!textures[0] || !textures[1]) + { + return ROQ_RENDER_PROBLEM; + } + + // Precompile the poly headers + for (int i = 0; i < 2; i++) { + pvr_poly_cxt_txr(&cxt, PVR_LIST_OP_POLY, PVR_TXRFMT_RGB565 | PVR_TXRFMT_NONTWIDDLED, + stride, texture_height, textures[i], PVR_FILTER_NONE); + pvr_poly_compile(&hdr[i], &cxt); + } + + // Calculate drawing coordinates + ratio = 640.0 / width; + ul_x = 0; + br_x = (int)(ratio * stride); + ul_y = (int)((480 - ratio * height) / 2); + br_y = ul_y + (int)(ratio * texture_height); + + // Set common vertex properties + for (int i = 0; i < 4; i++) { + vert[i].z = 1.0f; + vert[i].argb = PVR_PACK_COLOR(1.0f, 1.0f, 1.0f, 1.0f); + vert[i].oargb = 0; + vert[i].flags = (i < 3) ? PVR_CMD_VERTEX : PVR_CMD_VERTEX_EOL; + } + + // Initialize vertex coordinates and UV coordinates + vert[0].x = ul_x; + vert[0].y = ul_y; + vert[0].u = 0.0; + vert[0].v = 0.0; + + vert[1].x = br_x; + vert[1].y = ul_y; + vert[1].u = 1.0; + vert[1].v = 0.0; + + vert[2].x = ul_x; + vert[2].y = br_y; + vert[2].u = 0.0; + vert[2].v = 1.0; + + vert[3].x = br_x; + vert[3].y = br_y; + vert[3].u = 1.0; + vert[3].v = 1.0; + + // Get the current hardware timing + video_delay = (float)dc_get_time(); + + graphics_initialized = 1; + } + + // Send the video frame as a texture over to video RAM + pvr_txr_load(buf, textures[current_frame], stride * texture_height * 2); + + // Calculate the elapsed time since the last frame + unsigned int current_time = dc_get_time(); + unsigned int elapsed_time = current_time - video_delay; + unsigned int target_frame_time = 1000 / TARGET_FRAME_RATE; + + // If the elapsed time is less than the target frame time, introduce a delay + if (elapsed_time < target_frame_time) { + unsigned int delay_time = target_frame_time - elapsed_time; + thd_sleep(delay_time); + } + + // Update the hardware timing for the current frame + video_delay = (float)current_time; + + pvr_wait_ready(); + pvr_scene_begin(); + pvr_list_begin(PVR_LIST_OP_POLY); + + // Render the frame using precompiled headers and vertices + pvr_prim(&hdr[current_frame], sizeof(pvr_poly_hdr_t)); + for (int i = 0; i < 4; i++) { + pvr_prim(&vert[i], sizeof(pvr_vertex_t)); + } + + pvr_list_finish(); + pvr_scene_finish(); + + // Toggle between frames + current_frame = 1 - current_frame; + + return ROQ_SUCCESS; +} + + +static int renderGLdc_cb(unsigned short *buf, int width, int height, int stride, int texture_height) +{ + if(!graphics_initialized) { + glClearColor(0.0f, 0.0f, 0.0f, 1.0f); // This Will Clear The Background Color To Black + glClearDepth(1.0); // Enables Clearing Of The Depth Buffer + glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT); + + //glDisable(GL_DEPTH_TEST); + //glEnable(GL_NORMALIZE); + + glMatrixMode(GL_PROJECTION); + glLoadIdentity(); // Reset The Projection Matrix + glOrtho(0.0, 640.0, 0.0, 480.0, -1.0, 1.0); + glMatrixMode(GL_MODELVIEW); + glLoadIdentity(); + + glDisable(GL_LIGHTING); + glEnable(GL_TEXTURE_2D); + glGenTextures(2, frameTexture); + glBindTexture(GL_TEXTURE_2D, frameTexture[0]); + glTexImage2D(GL_TEXTURE_2D, 0, GL_RGB, 512, 512, 0, GL_RGB, GL_UNSIGNED_SHORT_5_6_5, NULL); + + glBindTexture(GL_TEXTURE_2D, frameTexture[1]); + glTexImage2D(GL_TEXTURE_2D, 0, GL_RGB, 512, 512, 0, GL_RGB, GL_UNSIGNED_SHORT_5_6_5, NULL); + + video_delay = (float)dc_get_time(); + + float w = 512; + float h = 512; + int v = 0; + + vertices[v][0] = 0; + vertices[v][1] = 0; + vertices[v][2] = 0; + vertices[v][3] = 0.0f; + vertices[v][4] = 1.0f; + v++; + + vertices[v][0] = 0; + vertices[v][1] = 480; + vertices[v][2] = 0; + vertices[v][3] = 0.0f; + vertices[v][4] = 0.0f; + v++; + + vertices[v][0] = 640; + vertices[v][1] = 0; + vertices[v][2] = 0; + vertices[v][3] = 1.0f; + vertices[v][4] = 1.0f; + v++; + + vertices[v][0] = 640; + vertices[v][1] = 480; + vertices[v][2] = 0; + vertices[v][3] = 1.0f; + vertices[v][4] = 0.0f; + v++; + + GLfloat drawColor[4] = {1.0f, 1.0f, 1.0f, 1.0f}; + GLfloat emissionColor[4] = {0.0, 0.0, 0.0, 1.0f}; + glMaterialfv(GL_FRONT, GL_AMBIENT_AND_DIFFUSE, drawColor); + glMaterialfv(GL_FRONT, GL_SPECULAR, drawColor); + glMaterialfv(GL_FRONT, GL_EMISSION, emissionColor); + + graphics_initialized = 1; + } + + /* send the video frame as a texture over to video RAM */ + //pvr_txr_load(buf, textures[current_frame], stride * texture_height * 2); + glBindTexture(GL_TEXTURE_2D, frameTexture[current_frame]); + //glTexImage2D(GL_TEXTURE_2D, 0, GL_RGB, 512, 512, 0, GL_RGB, GL_UNSIGNED_SHORT_5_6_5, buf); + glKosCopyTexture(buf, 512 * 512 * 2); + + + // Calculate the elapsed time since the last frame + unsigned int current_time = dc_get_time(); + unsigned int elapsed_time = current_time - video_delay; + unsigned int target_frame_time = 1000 / TARGET_FRAME_RATE; + + // If the elapsed time is less than the target frame time, introduce a delay + if (elapsed_time < target_frame_time) { + unsigned int delay_time = target_frame_time - elapsed_time; + thd_sleep(delay_time); + } + + // Update the hardware timing for the current frame + video_delay = (float)current_time; + + glEnableClientState(GL_VERTEX_ARRAY); + glEnableClientState(GL_TEXTURE_COORD_ARRAY); + + glVertexPointer (3, GL_FLOAT, sizeof(vertices[0]), &vertices[0][0]); + glTexCoordPointer (2, GL_FLOAT, sizeof(vertices[0]), &vertices[0][3]); + + glDrawArrays(GL_TRIANGLE_STRIP, 0, 4); + + glDisableClientState(GL_VERTEX_ARRAY); + glDisableClientState(GL_TEXTURE_COORD_ARRAY); + + glKosSwapBuffers(); + + current_frame = 1 - current_frame; + + return ROQ_SUCCESS; +} + + +static int audio_cb(unsigned char *buf, int size, int channels) +{ + // Copy the decoded PCM samples to our local PCM buffer + mutex_lock(&pcm_mut); + memcpy(pcm_buf + pcm_size, buf, size); + pcm_size += size; + mutex_unlock(&pcm_mut); + + return ROQ_SUCCESS; +} + +// Audio thread function +static void *snd_thd_wrapper(void *arg) +{ + printf("Audio Thread: Started\n"); + unsigned int start_time = dc_get_time(); + + // Call the actual audio thread function + snd_thd(); + + unsigned int end_time = dc_get_time(); + unsigned int elapsed_time = end_time - start_time; + printf("Audio Thread: Finished (Time: %u ms)\n", elapsed_time); + + return NULL; +} + + +static int quit_cb() +{ + static int frame_count = 0; + static unsigned int last_time = 0; + static unsigned int target_frame_time = 1000 / 30; // 30 FPS + + // Calculate time difference since the last frame + unsigned int current_time = dc_get_time(); + unsigned int elapsed_time = current_time - last_time; + + // Check if the video has ended and the audio decoding status is done + if (snddrv.dec_status == SNDDEC_STATUS_DONE) { + printf("Exiting due to audio decoding status\n"); + return 1; // Exit the loop + } + + // Check if the "Start" button is pressed + MAPLE_FOREACH_BEGIN(MAPLE_FUNC_CONTROLLER, cont_state_t, st) + if (st->buttons & CONT_START) { + printf("Exiting due to Start button\n"); + return 1; // Exit the loop + } + MAPLE_FOREACH_END() + + // Delay if necessary to maintain the target frame rate + if (elapsed_time < target_frame_time) { + unsigned int delay_time = target_frame_time - elapsed_time; + thd_sleep(delay_time); + } + + // Print FPS information every second + if (elapsed_time >= 1000) { + // double fps = (double)frame_count / (elapsed_time / 1000.0); + // printf("FPS: %.2lf\n", fps); + + frame_count = 0; + last_time = current_time; + } + + // printf("Continuing loop\n"); + fflush(stdout); // Flush the output buffer to ensure immediate display + frame_count++; + + return 0; // Continue the loop +} + + +int main() +{ + int status = 0; + + profiler_init("/pc/out.gmon"); + profiler_start(); + + glKosInit(); + + printf("dreamroq_play(C) Multimedia Mike Melanson & Josh PH3NOM Pearson 2011\n"); + printf("dreamroq_play(C) Ian micheal Up port to Kos2.0 sound fix and threading\n"); + printf("dreamroq_play(C) Ian micheal Kos2.0 free and exit when loop ends 2023\n"); + printf("dreamroq_play(C) Ian micheal redo frame limit code and rendering and comment what it does 2023\n"); + + // Initialize audio resources and create the audio thread + if (!audio_init) + { + pcm_buf = malloc(PCM_BUF_SIZE); + if (pcm_buf == NULL) + { + printf("Failed to allocate PCM buffer\n"); + return 1; + } + + snddrv_start(22050, 2); + snddrv.dec_status = SNDDEC_STATUS_STREAMING; + + printf("Creating Audio Thread\n"); + audio_thread = thd_create(AUDIO_THREAD_PRIO, snd_thd_wrapper, NULL); + if (!audio_thread) + { + printf("Failed to create audio thread\n"); + free(pcm_buf); + pcm_buf = NULL; + return 1; + } + + audio_init = 1; + } + + /* To disable a callback, simply replace the function name by 0 */ + status = dreamroq_play("/rd/movie.roq", 0, renderGLdc_cb, audio_cb, quit_cb); + //status = dreamroq_play("/cd/romdisk/movie.roq", 0, renderGLdc_cb, audio_cb, quit_cb); + + printf("dreamroq_play() status = %d\n", status); + + // Terminate and clean up the audio thread + if (audio_init) + { + snddrv.dec_status = SNDDEC_STATUS_DONE; + while (snddrv.dec_status != SNDDEC_STATUS_NULL) + { + thd_sleep(1); + printf("Waiting for audio thread to finish...\n"); + } + thd_destroy(audio_thread); // Destroy the audio thread + free(pcm_buf); + pcm_buf = NULL; + pcm_size = 0; + } + + if (graphics_initialized) + { + pvr_mem_free(textures[0]); + pvr_mem_free(textures[1]); + printf("Freed PVR memory\n"); + } + + profiler_stop(); + profiler_clean_up(); + + printf("Exiting main()\n"); + return 0; +} diff --git a/samples/dreamroq/profiler.c b/samples/dreamroq/profiler.c new file mode 100644 index 0000000..c44c3c9 --- /dev/null +++ b/samples/dreamroq/profiler.c @@ -0,0 +1,452 @@ +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +static char OUTPUT_FILENAME[128]; +static kthread_t* THREAD; +static volatile bool PROFILER_RUNNING = false; +static volatile bool PROFILER_RECORDING = false; + +#define BASE_ADDRESS 0x8c010000 +#define BUCKET_SIZE 10000 + +#define INTERVAL_IN_MS 10 + +/* Simple hash table of samples. An array of Samples + * but, each sample in that array can be the head of + * a linked list of other samples */ +typedef struct Arc { + uint32_t pc; + uint32_t pr; // Caller return address + uint32_t count; + struct Arc* next; +} Arc; + +static Arc ARCS[BUCKET_SIZE]; + +/* Hashing function for two uint32_ts */ +#define HASH_PAIR(x, y) ((x * 0x1f1f1f1f) ^ y) + +#define BUFFER_SIZE (1024 * 8) // 8K buffer + +const static size_t MAX_ARC_COUNT = BUFFER_SIZE / sizeof(Arc); +static size_t ARC_COUNT = 0; + +static bool WRITE_TO_STDOUT = false; + +static bool write_samples(const char* path); +static bool write_samples_to_stdout(); +static void clear_samples(); + +static Arc* new_arc(uint32_t PC, uint32_t PR) { + Arc* s = (Arc*) malloc(sizeof(Arc)); + s->count = 1; + s->pc = PC; + s->pr = PR; + s->next = NULL; + + ++ARC_COUNT; + + return s; +} + +static void record_thread(uint32_t PC, uint32_t PR) { + uint32_t bucket = HASH_PAIR(PC, PR) % BUCKET_SIZE; + + Arc* s = &ARCS[bucket]; + + if(s->pc) { + /* Initialized sample in this bucket, + * does it match though? */ + while(s->pc != PC || s->pr != PR) { + if(s->next) { + s = s->next; + } else { + s->next = new_arc(PC, PR); + return; // We're done + } + } + + s->count++; + } else { + /* Initialize this sample */ + s->count = 1; + s->pc = PC; + s->pr = PR; + s->next = NULL; + ++ARC_COUNT; + } +} + +static int thd_each_cb(kthread_t* thd, void* data) { + (void) data; + + + /* Only record the main thread (for now) */ + if(strcmp(thd->label, "[kernel]") != 0) { + return 0; + } + + /* The idea is that if this code right here is running in the profiling + * thread, then all the PCs from the other threads are + * current. Obviouly thought between iterations the + * PC will change so it's not like this is a true snapshot + * in time across threads */ + int old = irq_disable(); + uint32_t PC = thd->context.pc; + uint32_t PR = thd->context.pr; + irq_restore(old); + + record_thread(PC, PR); + return 0; +} + +static void record_samples() { + /* Go through all the active threads and increase + * the sample count for the PC for each of them */ + + size_t initial = ARC_COUNT; + + /* Note: This is a function added to kallistios-nitro that's + * not yet available upstream */ + thd_each(&thd_each_cb, NULL); + + if(ARC_COUNT >= MAX_ARC_COUNT) { + /* TIME TO FLUSH! */ + if(!write_samples(OUTPUT_FILENAME)) { + fprintf(stderr, "Error writing samples\n"); + } + } + + /* We log when the number of PCs recorded hits a certain increment */ + if((initial != ARC_COUNT) && ((ARC_COUNT % 1000) == 0)) { + printf("-- %d arcs recorded...\n", ARC_COUNT); + } +} + +/* Declared in KOS in fs_dcload.c */ +int fs_dcload_detected(); +extern int dcload_type; + + +#define GMON_COOKIE "gmon" +#define GMON_VERSION 1 + +typedef struct { + char cookie[4]; // 'g','m','o','n' + int32_t version; // 1 + char spare[3 * 4]; // Padding +} GmonHeader; + +typedef struct { + uint32_t low_pc; + uint32_t high_pc; + uint32_t hist_size; + uint32_t prof_rate; + char dimen[15]; /* phys. dim., usually "seconds" */ + char dimen_abbrev; /* usually 's' for "seconds" */ +} GmonHistHeader; + +typedef struct { + unsigned char tag; // GMON_TAG_TIME_HIST = 0, GMON_TAG_CG_ARC = 1, GMON_TAG_BB_COUNT = 2 + size_t ncounts; // Number of address/count pairs in this sequence +} GmonBBHeader; + +typedef struct { + uint32_t from_pc; /* address within caller's body */ + uint32_t self_pc; /* address within callee's body */ + uint32_t count; /* number of arc traversals */ +} GmonArc; + +static bool init_sample_file(const char* path) { + printf("Detecting dcload... "); + + if(!fs_dcload_detected() && dcload_type != DCLOAD_TYPE_NONE) { + printf("[Not Found]\n"); + WRITE_TO_STDOUT = true; + return false; + } else { + printf("[Found]\n"); + } + + FILE* out = fopen(path, "w"); + if(!out) { + WRITE_TO_STDOUT = true; + return false; + } + + /* Write the GMON header */ + + GmonHeader header; + memcpy(&header.cookie[0], GMON_COOKIE, sizeof(header.cookie)); + header.version = 1; + memset(header.spare, '\0', sizeof(header.spare)); + + fwrite(&header, sizeof(header), 1, out); + + fclose(out); + return true; +} + +#define ROUNDDOWN(x,y) (((x)/(y))*(y)) +#define ROUNDUP(x,y) ((((x)+(y)-1)/(y))*(y)) + +static bool write_samples(const char* path) { + /* Appends the samples to the output file in gmon format + * + * We iterate the data twice, first generating arcs, then generating + * basic block counts. While we do that though we calculate the data + * for the histogram so we don't need a third iteration */ + + if(WRITE_TO_STDOUT) { + write_samples_to_stdout(); + return true; + } + + extern char _etext; + + const uint32_t HISTFRACTION = 8; + + /* We know the lowest address, it's the same for all DC games */ + uint32_t lowest_address = ROUNDDOWN(BASE_ADDRESS, HISTFRACTION); + + /* We need to calculate the highest address though */ + uint32_t highest_address = ROUNDUP((uint32_t) &_etext, HISTFRACTION); + + /* Histogram data */ + const int BIN_COUNT = ((highest_address - lowest_address) / HISTFRACTION); + uint16_t* bins = (uint16_t*) malloc(BIN_COUNT * sizeof(uint16_t)); + memset(bins, 0, sizeof(uint16_t) * BIN_COUNT); + + FILE* out = fopen(path, "r+"); /* Append, as init_sample_file would have created the file */ + if(!out) { + fprintf(stderr, "-- Error writing samples to output file\n"); + return false; + } + + // Seek to the end of the file + fseek(out, 0, SEEK_END); + + printf("-- Writing %d arcs\n", ARC_COUNT); + + uint8_t tag = 1; + +#ifndef NDEBUG + size_t written = 0; +#endif + + /* Write arcs */ + Arc* root = ARCS; + for(int i = 0; i < BUCKET_SIZE; ++i) { + if(root->pc) { + GmonArc arc; + arc.from_pc = root->pr; + arc.self_pc = root->pc; + arc.count = root->count; + + /* Write the root sample if it has a program counter */ + fwrite(&tag, sizeof(tag), 1, out); + fwrite(&arc, sizeof(GmonArc), 1, out); + +#ifndef NDEBUG + ++written; +#endif + + /* If there's a next pointer, traverse the list */ + Arc* s = root->next; + while(s) { + arc.from_pc = s->pr; + arc.self_pc = s->pc; + arc.count = s->count; + + /* Write the root sample if it has a program counter */ + fwrite(&tag, sizeof(tag), 1, out); + fwrite(&arc, sizeof(GmonArc), 1, out); + +#ifndef NDEBUG + ++written; +#endif + + s = s->next; + } + } + + root++; + } + + uint32_t histogram_range = highest_address - lowest_address; + uint32_t bin_size = histogram_range / BIN_COUNT; + + root = ARCS; + for(int i = 0; i < BUCKET_SIZE; ++i) { + if(root->pc) { + printf("Incrementing %d for %x. ", (root->pc - lowest_address) / bin_size, (unsigned int) root->pc); + bins[(root->pc - lowest_address) / bin_size]++; + printf("Now: %d\n", (int) bins[(root->pc - lowest_address) / bin_size]); + + /* If there's a next pointer, traverse the list */ + Arc* s = root->next; + while(s) { + assert(s->pc); + bins[(s->pc - lowest_address) / bin_size]++; + s = s->next; + } + } + + root++; + } + + + /* Write histogram now that we have all the information we need */ + GmonHistHeader hist_header; + hist_header.low_pc = lowest_address; + hist_header.high_pc = highest_address; + hist_header.hist_size = BIN_COUNT; + hist_header.prof_rate = INTERVAL_IN_MS; + strcpy(hist_header.dimen, "seconds"); + hist_header.dimen_abbrev = 's'; + + unsigned char hist_tag = 0; + fwrite(&hist_tag, sizeof(hist_tag), 1, out); + fwrite(&hist_header, sizeof(hist_header), 1, out); + fwrite(bins, sizeof(uint16_t), BIN_COUNT, out); + + fclose(out); + free(bins); + + /* We should have written all the recorded samples */ + assert(written == ARC_COUNT); + + clear_samples(); + + return true; +} + +static bool write_samples_to_stdout() { + /* Write samples to stdout as a CSV file + * for processing */ + + printf("--------------\n"); + printf("\"PC\", \"PR\", \"COUNT\"\n"); + + Arc* root = ARCS; + for(int i = 0; i < BUCKET_SIZE; ++i) { + Arc* s = root; + while(s->next) { + printf("\"%x\", \"%x\", \"%d\"\n", (unsigned int) s->pc, (unsigned int) s->pr, (unsigned int) s->count); + s = s->next; + } + + root++; + } + + printf("--------------\n"); + + return true; +} + + +static void* run(void* args) { + printf("-- Entered profiler thread!\n"); + + while(PROFILER_RUNNING){ + if(PROFILER_RECORDING) { + record_samples(); + usleep(INTERVAL_IN_MS * 1000); //usleep takes milliseconds + } + } + + printf("-- Profiler thread finished!\n"); + + return NULL; +} + +void profiler_init(const char* output) { + /* Store the filename */ + strncpy(OUTPUT_FILENAME, output, sizeof(OUTPUT_FILENAME)); + + /* Initialize the file */ + printf("Creating samples file...\n"); + if(!init_sample_file(OUTPUT_FILENAME)) { + printf("Read-only filesytem. Writing samples to stdout\n"); + } + + printf("Creating profiler thread...\n"); + // Initialize the samples to zero + memset(ARCS, 0, sizeof(ARCS)); + + PROFILER_RUNNING = true; + THREAD = thd_create(0, run, NULL); + + /* Lower priority is... er, higher */ + thd_set_prio(THREAD, PRIO_DEFAULT / 2); + + printf("Thread started.\n"); +} + +void profiler_start() { + assert(PROFILER_RUNNING); + + if(PROFILER_RECORDING) { + return; + } + + PROFILER_RECORDING = true; + printf("Starting profiling...\n"); +} + +static void clear_samples() { + /* Free the samples we've collected to start again */ + Arc* root = ARCS; + for(int i = 0; i < BUCKET_SIZE; ++i) { + Arc* s = root; + Arc* next = s->next; + + // While we have a next pointer + while(next) { + s = next; // Point S at it + next = s->next; // Store the new next pointer + free(s); // Free S + } + + // We've wiped the chain so we can now clear the root + // which is statically allocated + root->next = NULL; + root++; + } + + // Wipe the lot + memset(ARCS, 0, sizeof(ARCS)); + ARC_COUNT = 0; +} + +bool profiler_stop() { + if(!PROFILER_RECORDING) { + return false; + } + + printf("Stopping profiling...\n"); + + PROFILER_RECORDING = false; + if(!write_samples(OUTPUT_FILENAME)) { + printf("ERROR WRITING SAMPLES (RO filesystem?)! Outputting to stdout\n"); + return false; + } + + + return true; +} + +void profiler_clean_up() { + profiler_stop(); // Make sure everything is stopped + + PROFILER_RUNNING = false; + thd_join(THREAD, NULL); +} diff --git a/samples/dreamroq/profiler.h b/samples/dreamroq/profiler.h new file mode 100644 index 0000000..d2a7435 --- /dev/null +++ b/samples/dreamroq/profiler.h @@ -0,0 +1,18 @@ +#pragma once + +/* + * The Dreamcast doesn't have any kind of profiling support from GCC + * so this is a cumbersome sampling profiler that runs in a background thread + */ +#ifdef __cplusplus +extern "C" { +#endif + +void profiler_init(const char* output); +void profiler_start(); +void profiler_stop(); +void profiler_clean_up(); + +#ifdef __cplusplus +} +#endif diff --git a/samples/dreamroq/romdisk/movie.roq b/samples/dreamroq/romdisk/movie.roq new file mode 100644 index 0000000..d87f28f Binary files /dev/null and b/samples/dreamroq/romdisk/movie.roq differ