diff --git a/GL/flush.c b/GL/flush.c
index 0b9dd8b..e5e5274 100644
--- a/GL/flush.c
+++ b/GL/flush.c
@@ -2,6 +2,7 @@
 
 #include <kos.h>
 
+#include "../include/glkos.h"
 #include "../containers/aligned_vector.h"
 #include "private.h"
 #include "profiler.h"
@@ -38,14 +39,14 @@ static void pvr_list_submit(void *src, int n) {
     d[0] = d[8] = 0;
 }
 
-static void _glInitPVR() {
+static void _glInitPVR(GLboolean autosort) {
     pvr_init_params_t params = {
         /* Enable opaque and translucent polygons with size 32 and 32 */
         {PVR_BINSIZE_32, PVR_BINSIZE_0, PVR_BINSIZE_32, PVR_BINSIZE_0, PVR_BINSIZE_32},
         PVR_VERTEX_BUF_SIZE, /* Vertex buffer size */
         0, /* No DMA */
         0, /* No FSAA */
-        1 /* Disable translucent auto-sorting to match traditional GL */
+        (autosort) ? 0 : 1 /* Disable translucent auto-sorting to match traditional GL */
     };
 
     pvr_init(&params);
@@ -75,10 +76,16 @@ void APIENTRY glFinish() {
 }
 
 
-void APIENTRY glKosInit() {
+void APIENTRY glKosInitConfig(GLdcConfig* config) {
+    config->autosort_enabled = GL_FALSE;
+    config->initial_vbuf_capacity = 256;
+    config->internal_palette_format = GL_RGBA4;
+}
+
+void APIENTRY glKosInitEx(GLdcConfig* config) {
     TRACE();
 
-    _glInitPVR();
+    _glInitPVR(config->autosort_enabled);
 
     _glInitMatrices();
     _glInitAttributePointers();
@@ -87,6 +94,8 @@ void APIENTRY glKosInit() {
     _glInitImmediateMode();
     _glInitFramebuffers();
 
+    _glSetInternalPaletteFormat(GL_RGBA4);
+
     _glInitTextures();
 
     OP_LIST.list_type = PVR_LIST_OP_POLY;
@@ -96,6 +105,16 @@ void APIENTRY glKosInit() {
     aligned_vector_init(&OP_LIST.vector, sizeof(ClipVertex));
     aligned_vector_init(&PT_LIST.vector, sizeof(ClipVertex));
     aligned_vector_init(&TR_LIST.vector, sizeof(ClipVertex));
+
+    aligned_vector_reserve(&OP_LIST.vector, config->initial_vbuf_capacity);
+    aligned_vector_reserve(&PT_LIST.vector, config->initial_vbuf_capacity);
+    aligned_vector_reserve(&TR_LIST.vector, config->initial_vbuf_capacity);
+}
+
+void APIENTRY glKosInit() {
+    GLdcConfig config;
+    glKosInitConfig(&config);
+    glKosInitEx(&config);
 }
 
 #define QACRTA ((((unsigned int)0x10000000)>>26)<<2)&0x1c
diff --git a/GL/framebuffer.c b/GL/framebuffer.c
index c83edba..3fd44c2 100644
--- a/GL/framebuffer.c
+++ b/GL/framebuffer.c
@@ -18,6 +18,9 @@ static NamedArray FRAMEBUFFERS;
 
 void _glInitFramebuffers() {
     named_array_init(&FRAMEBUFFERS, sizeof(FrameBuffer), 32);
+
+    // Reserve zero so that it is never given to anyone as an ID!
+    named_array_reserve(&FRAMEBUFFERS, 0);
 }
 
 void _glWipeTextureOnFramebuffers(GLuint texture) {
diff --git a/GL/private.h b/GL/private.h
index 7465bc5..29392a1 100644
--- a/GL/private.h
+++ b/GL/private.h
@@ -147,6 +147,7 @@ TextureObject* _glGetBoundTexture();
 GLubyte _glGetActiveTexture();
 GLuint _glGetActiveClientTexture();
 TexturePalette* _glGetSharedPalette(GLshort bank);
+void _glSetInternalPaletteFormat(GLenum val);
 
 GLboolean _glIsSharedTexturePaletteEnabled();
 void _glApplyColorTable(TexturePalette *palette);
diff --git a/GL/texture.c b/GL/texture.c
index 6de433a..e4e60d4 100644
--- a/GL/texture.c
+++ b/GL/texture.c
@@ -24,9 +24,13 @@ static GLuint _determinePVRFormat(GLint internalFormat, GLenum type);
 
 #define PACK_ARGB8888(a,r,g,b) ( ((a & 0xFF) << 24) | ((r & 0xFF) << 16) | ((g & 0xFF) << 8) | (b & 0xFF) )
 
+
+#define _PACK4(v) ((v * 0xF) / 0xFF)
+#define PACK_ARGB4444(a,r,g,b) (_PACK4(a) << 12) | (_PACK4(r) << 8) | (_PACK4(g) << 4) | (_PACK4(b))
+
 static GLboolean BANKS_USED[4];  // Each time a 256 colour bank is used, this is set to true
 static GLboolean SUBBANKS_USED[4][16]; // 4 counts of the used 16 colour banks within the 256 ones
-
+static GLenum INTERNAL_PALETTE_FORMAT = GL_RGBA4;
 
 static TexturePalette* _initTexturePalette() {
     TexturePalette* palette = (TexturePalette*) malloc(sizeof(TexturePalette));
@@ -101,6 +105,17 @@ TexturePalette* _glGetSharedPalette(GLshort bank) {
     return SHARED_PALETTES[bank];
 }
 
+void _glSetInternalPaletteFormat(GLenum val) {
+    INTERNAL_PALETTE_FORMAT = val;
+
+    if(INTERNAL_PALETTE_FORMAT == GL_RGBA4) {
+        pvr_set_pal_format(PVR_PAL_ARGB4444);
+    } else {
+        assert(INTERNAL_PALETTE_FORMAT == GL_RGBA8);
+        pvr_set_pal_format(PVR_PAL_ARGB8888);
+    }
+}
+
 void _glApplyColorTable(TexturePalette* src) {
     /*
      * FIXME:
@@ -111,13 +126,15 @@ void _glApplyColorTable(TexturePalette* src) {
         return;
     }
 
-    pvr_set_pal_format(PVR_PAL_ARGB8888);
-
     GLushort i;
     GLushort offset = src->size * src->bank;
     for(i = 0; i < src->width; ++i) {
         GLubyte* entry = &src->data[i * 4];
-        pvr_set_pal_entry(offset + i, PACK_ARGB8888(entry[3], entry[0], entry[1], entry[2]));
+        if(INTERNAL_PALETTE_FORMAT == GL_RGBA8) {
+            pvr_set_pal_entry(offset + i, PACK_ARGB8888(entry[3], entry[0], entry[1], entry[2]));
+        } else {
+            pvr_set_pal_entry(offset + i, PACK_ARGB4444(entry[3], entry[0], entry[1], entry[2]));
+        }
     }
 }
 
@@ -758,19 +775,12 @@ GLboolean _glIsMipmapComplete(const TextureObject* obj) {
     return GL_TRUE;
 }
 
+#define TWIDTAB(x) ( (x&1)|((x&2)<<1)|((x&4)<<2)|((x&8)<<3)|((x&16)<<4)| \
+                     ((x&32)<<5)|((x&64)<<6)|((x&128)<<7)|((x&256)<<8)|((x&512)<<9) )
 
-static inline GLuint morton_1by1(GLuint x) {
-    x &= 0x0000ffff;                  // x = ---- ---- ---- ---- fedc ba98 7654 3210
-    x = (x ^ (x << 8)) & 0x00ff00ff; // x = ---- ---- fedc ba98 ---- ---- 7654 3210
-    x = (x ^ (x << 4)) & 0x0f0f0f0f; // x = ---- fedc ---- ba98 ---- 7654 ---- 3210
-    x = (x ^ (x << 2)) & 0x33333333; // x = --fe --dc --ba --98 --76 --54 --32 --10
-    x = (x ^ (x << 1)) & 0x55555555; // x = -f-e -d-c -b-a -9-8 -7-6 -5-4 -3-2 -1-0
-    return x;
-}
+#define TWIDOUT(x, y) ( TWIDTAB((y)) | (TWIDTAB((x)) << 1) )
+#define MIN(a, b) ( (a)<(b)? (a):(b) )
 
-static inline GLuint morton_index(GLuint x, GLuint y) {
-    return (morton_1by1(y) << 1) + morton_1by1(x);
-}
 
 void APIENTRY glTexImage2D(GLenum target, GLint level, GLint internalFormat,
                            GLsizei width, GLsizei height, GLint border,
@@ -930,43 +940,18 @@ void APIENTRY glTexImage2D(GLenum target, GLint level, GLint internalFormat,
         assert(bytes);
 
         if(needsTwiddling) {
-            /*assert(type == GL_UNSIGNED_BYTE);  // Anything else needs this loop adjusting
-            GLuint x, y;
-            for(y = 0; y < height; ++y) {
-                for(x = 0; x < width; ++x) {
-                    GLuint src = (y * width) + x;
-                    GLuint dest = morton_index(x, y);
-
-                    targetData[dest] = ((GLubyte*) data)[src];
-                }
-            }
-			*/
-			/* Don't convert color indexes */
-            /* Linear/iterative twiddling algorithm from Marcus' tatest */
-            #define TWIDTAB(x) ( (x&1)|((x&2)<<1)|((x&4)<<2)|((x&8)<<3)|((x&16)<<4)| \
-                                ((x&32)<<5)|((x&64)<<6)|((x&128)<<7)|((x&256)<<8)|((x&512)<<9) )
-            #define TWIDOUT(x, y) ( TWIDTAB((y)) | (TWIDTAB((x)) << 1) )
-
-            #define MIN(a, b) ( (a)<(b)? (a):(b) )
-    
-            uint32 x, y, min, mask;
+            assert(type == GL_UNSIGNED_BYTE);  // Anything else needs this loop adjusting
+            GLuint x, y, min, min2, mask;
 
             min = MIN(w, h);
+            min2 = min * min;
             mask = min - 1;
-            
-            uint8 * pixels;
-            uint16 * vtex;
-            pixels = (uint8 *) data;
-            vtex = (uint16*)targetData;
 
-            for(y = 0; y < h; y += 2) {
+            for(y = 0; y < h; y++) {
                 for(x = 0; x < w; x++) {
-                    vtex[TWIDOUT((y & mask) / 2, x & mask) +
-                            (x / min + y / min)*min * min / 2] =
-                                pixels[y * w + x] | (pixels[(y + 1) * w + x] << 8);
+                    targetData[TWIDOUT(x & mask, y & mask) + (x / min + y / min) * min2] = ((GLubyte*) data)[y * w + x];
                 }
             }
-            data = NULL;
         } else {
             /* No conversion? Just copy the data, and the pvr_format is correct */
             sq_cpy(targetData, data, bytes);
diff --git a/include/gl.h b/include/gl.h
index 5872240..8b5b060 100644
--- a/include/gl.h
+++ b/include/gl.h
@@ -116,7 +116,6 @@ __BEGIN_DECLS
 #define GL_MODULATE         0x2100
 #define GL_DECAL            0x2101
 
-
 /* TextureMagFilter */
 #define GL_NEAREST                      0x2600
 #define GL_LINEAR                       0x2601
@@ -390,9 +389,6 @@ __BEGIN_DECLS
 #define GLAPI extern
 #define APIENTRY
 
-/* Initialize the GL pipeline. GL will initialize the PVR. */
-GLAPI void APIENTRY glKosInit();
-
 GLAPI void APIENTRY glFlush();
 GLAPI void APIENTRY glFinish();
 
diff --git a/include/glkos.h b/include/glkos.h
index b809f73..ba2b323 100644
--- a/include/glkos.h
+++ b/include/glkos.h
@@ -35,8 +35,34 @@ __BEGIN_DECLS
 #define GL_UNSIGNED_BYTE_TWID_KOS                   0xEEFB
 
 
-GLAPI void APIENTRY glKosSwapBuffers();
+/* Initialize the GL pipeline. GL will initialize the PVR. */
+GLAPI void APIENTRY glKosInit();
 
+typedef struct {
+    /* If GL_TRUE, enables pvr autosorting, this *will* break glDepthFunc/glDepthTest */
+    GLboolean autosort_enabled;
+
+    /* The internal format for paletted textures, must be GL_RGBA4 (default) or GL_RGBA8 */
+    GLenum internal_palette_format;
+
+    /* Initial capacity of each of the OP, TR and PT lists in vertices */
+    GLuint initial_vbuf_capacity;
+} GLdcConfig;
+
+
+GLAPI void APIENTRY glKosInitConfig(GLdcConfig* config);
+
+/* Usage:
+ *
+ * GLdcConfig config;
+ * glKosInitConfig(&config);
+ *
+ * config.autosort_enabled = GL_TRUE;
+ *
+ * glKosInitEx(&config);
+ */
+GLAPI void APIENTRY glKosInitEx(GLdcConfig* config);
+GLAPI void APIENTRY glKosSwapBuffers();
 
 /*
  * CUSTOM EXTENSION multiple_shared_palette_KOS
diff --git a/samples/paletted_pcx/main.c b/samples/paletted_pcx/main.c
index 8cb52fc..b82a54a 100644
--- a/samples/paletted_pcx/main.c
+++ b/samples/paletted_pcx/main.c
@@ -329,7 +329,12 @@ void DrawGLScene()
 
 int main(int argc, char **argv)
 {
-    glKosInit();
+    GLdcConfig config;
+    glKosInitConfig(&config);
+
+    config.internal_palette_format = GL_RGBA8;
+
+    glKosInitEx(&config);
 
     InitGL(640, 480);
     ReSizeGLScene(640, 480);
diff --git a/samples/paletted_pcx/romdisk/NeHe.pcx b/samples/paletted_pcx/romdisk/NeHe.pcx
index 267225f..c234a72 100644
Binary files a/samples/paletted_pcx/romdisk/NeHe.pcx and b/samples/paletted_pcx/romdisk/NeHe.pcx differ