445 lines
12 KiB
C++
445 lines
12 KiB
C++
#pragma once
|
|
|
|
#include <iostream>
|
|
#include <fstream>
|
|
#include <chrono>
|
|
#include <cmath>
|
|
#include <cstdint>
|
|
#include <algorithm>
|
|
|
|
// makes life easier
|
|
#if UF_USE_DC_TEXCONV
|
|
#include <uf/utils/memory/string.h>
|
|
#include <uf/utils/memory/vector.h>
|
|
#include <uf/utils/memory/unordered_map.h>
|
|
#else
|
|
#include <string>
|
|
#include <vector>
|
|
#include <unordered_map>
|
|
|
|
namespace uf {
|
|
namespace stl {
|
|
using unordered_map = std::unordered_map;
|
|
using vector = std::vector;
|
|
using string = std::string;
|
|
}
|
|
}
|
|
#endif
|
|
|
|
typedef uint32_t uint;
|
|
|
|
enum FilterMode {
|
|
NEAREST, BILINEAR
|
|
};
|
|
|
|
struct RGBA {
|
|
uint8_t r, g, b, a;
|
|
};
|
|
|
|
// N-dimensional vectors, for input to a VectorQuantizer.
|
|
template <uint N>
|
|
class Vec {
|
|
public:
|
|
Vec(uint hval = 0) : hashVal(hval) {}
|
|
Vec(const Vec<N>& other);
|
|
void zero();
|
|
void operator= (const Vec<N>& other);
|
|
bool operator== (const Vec<N>& other) const;
|
|
void operator+= (const Vec<N>& other);
|
|
void operator-= (const Vec<N>& other);
|
|
Vec<N> operator+ (const Vec<N>& other) const;
|
|
Vec<N> operator- (const Vec<N>& other) const;
|
|
void addMultiplied(const Vec<N>& other, float x);
|
|
void operator/= (float x);
|
|
float& operator[] (int index);
|
|
const float& operator[] (int index) const;
|
|
void set(int index, float value);
|
|
float lengthSquared() const;
|
|
float length() const;
|
|
void setLength(float len);
|
|
void normalize();
|
|
static float distanceSquared(const Vec<N>& a, const Vec<N>& b);
|
|
uint hash() const;
|
|
void setHash(uint h) { hashVal = h; }
|
|
private:
|
|
float v[N];
|
|
uint hashVal; // Only used for the constant input vectors, so we only need to calc once.
|
|
uint64_t lololol; // Speeds up the average compression by a couple of seconds on my machine. Probably some alignment stuff.
|
|
};
|
|
|
|
template<uint N>
|
|
inline Vec<N>::Vec(const Vec<N> &other) {
|
|
*this = other;
|
|
}
|
|
|
|
template<uint N>
|
|
inline void Vec<N>::zero() {
|
|
for (uint i=0; i<N; ++i)
|
|
v[i] = 0;
|
|
}
|
|
|
|
template<uint N>
|
|
inline void Vec<N>::operator= (const Vec<N>& other) {
|
|
for (uint i=0; i<N; ++i)
|
|
v[i] = other.v[i];
|
|
hashVal = other.hashVal;
|
|
}
|
|
|
|
template<uint N>
|
|
inline bool Vec<N>::operator== (const Vec<N>& other) const {
|
|
for (uint i=0; i<N; ++i)
|
|
if (fabs(v[i] - other.v[i]) > 0.001f)
|
|
return false;
|
|
return true;
|
|
}
|
|
|
|
template<uint N>
|
|
inline Vec<N> Vec<N>::operator+ (const Vec<N>& other) const {
|
|
Vec<N> ret;
|
|
for (uint i=0; i<N; ++i)
|
|
ret.v[i] = v[i] + other.v[i];
|
|
return ret;
|
|
}
|
|
|
|
template<uint N>
|
|
inline Vec<N> Vec<N>::operator- (const Vec<N>& other) const {
|
|
Vec<N> ret;
|
|
for (uint i=0; i<N; ++i)
|
|
ret.v[i] = v[i] - other.v[i];
|
|
return ret;
|
|
}
|
|
|
|
template<uint N>
|
|
inline void Vec<N>::addMultiplied(const Vec<N>& other, float x) {
|
|
for (uint i=0; i<N; ++i)
|
|
v[i] += (other.v[i] * x);
|
|
}
|
|
|
|
template<uint N>
|
|
inline void Vec<N>::operator+= (const Vec<N>& other) {
|
|
for (uint i=0; i<N; ++i)
|
|
v[i] += other.v[i];
|
|
}
|
|
|
|
template<uint N>
|
|
inline void Vec<N>::operator-= (const Vec<N>& other) {
|
|
for (uint i=0; i<N; ++i)
|
|
v[i] -= other.v[i];
|
|
}
|
|
|
|
template<uint N>
|
|
inline void Vec<N>::operator/= (float x) {
|
|
const float invx = 1.0f / x;
|
|
for (uint i=0; i<N; ++i)
|
|
v[i] *= invx;
|
|
}
|
|
|
|
template<uint N>
|
|
inline float& Vec<N>::operator[] (int index) {
|
|
return v[index];
|
|
}
|
|
|
|
template<uint N>
|
|
inline const float& Vec<N>::operator[] (int index) const {
|
|
return v[index];
|
|
}
|
|
|
|
template<uint N>
|
|
inline void Vec<N>::set(int index, float value) {
|
|
v[index] = value;
|
|
}
|
|
|
|
template<uint N>
|
|
inline float Vec<N>::length() const {
|
|
return sqrt(lengthSquared());
|
|
}
|
|
|
|
template<uint N>
|
|
inline float Vec<N>::lengthSquared() const {
|
|
float ret = 0;
|
|
for (uint i=0; i<N; ++i)
|
|
ret += (v[i] * v[i]);
|
|
return ret;
|
|
}
|
|
|
|
template<uint N>
|
|
inline void Vec<N>::setLength(float len) {
|
|
float x = (1.0f / length()) * len;
|
|
for (uint i=0; i<N; ++i)
|
|
v[i] *= x;
|
|
}
|
|
|
|
template<uint N>
|
|
inline void Vec<N>::normalize() {
|
|
const float invlen = 1.0f / length();
|
|
for (uint i=0; i<N; ++i)
|
|
v[i] *= invlen;
|
|
}
|
|
|
|
template<uint N>
|
|
float Vec<N>::distanceSquared(const Vec<N>& a, const Vec<N>& b) {
|
|
return (a - b).lengthSquared();
|
|
}
|
|
|
|
template<uint N>
|
|
inline uint Vec<N>::hash() const {
|
|
return hashVal;
|
|
}
|
|
|
|
template<uint N>
|
|
uint qHash(const Vec<N>& vec) {
|
|
return vec.hash();
|
|
}
|
|
|
|
namespace std {
|
|
template<uint N>
|
|
struct hash<Vec<N>> {
|
|
size_t operator()(const Vec<N>& v) const {
|
|
uint32_t h=2166136261u;
|
|
for(uint i=0;i<N;i++){
|
|
uint32_t bits;
|
|
memcpy(&bits,&v[i],sizeof(float));
|
|
h ^= bits;
|
|
h *= 16777619u;
|
|
}
|
|
return h ^ v.hash();
|
|
}
|
|
};
|
|
}
|
|
|
|
template<uint N>
|
|
class VectorQuantizer {
|
|
public:
|
|
struct Code {
|
|
Vec<N> codeVec;
|
|
Vec<N> vecSum;
|
|
int vecCount = 0;
|
|
float maxDistance = 0;
|
|
Vec<N> maxDistanceVec;
|
|
};
|
|
|
|
uf::stl::vector<Code> codes;
|
|
|
|
int codeCount() const { return (int)codes.size(); }
|
|
const Vec<N>& codeVector(int i) const { return codes[i].codeVec; }
|
|
|
|
int findClosest(const Vec<N>& vec) const;
|
|
int findBestSplitCandidate() const;
|
|
void removeUnusedCodes();
|
|
void place(const uf::stl::unordered_map<Vec<N>,int>& vecs);
|
|
void split();
|
|
void splitCode(int index);
|
|
void compress(const uf::stl::vector<Vec<N>>& vectors,int numCodes);
|
|
bool writeReportToFile(const uf::stl::string& filename);
|
|
};
|
|
|
|
inline uint32_t packColor(const RGBA& c) {
|
|
return (uint32_t(c.a)<<24)|(uint32_t(c.r)<<16)|(uint32_t(c.g)<<8)|uint32_t(c.b);
|
|
}
|
|
inline RGBA unpackColor(uint32_t argb) {
|
|
RGBA c;
|
|
c.a = (argb>>24)&0xFF;
|
|
c.r = (argb>>16)&0xFF;
|
|
c.g = (argb>>8)&0xFF;
|
|
c.b = argb&0xFF;
|
|
return c;
|
|
}
|
|
|
|
template<uint N>
|
|
inline void rgb2vec(uint32_t rgb, Vec<N>& vec, uint offset=0) {
|
|
RGBA c = unpackColor(rgb);
|
|
vec[offset+0] = c.r/255.f;
|
|
vec[offset+1] = c.g/255.f;
|
|
vec[offset+2] = c.b/255.f;
|
|
}
|
|
|
|
template<uint N>
|
|
inline void argb2vec(uint32_t argb, Vec<N>& vec, uint offset=0) {
|
|
RGBA c = unpackColor(argb);
|
|
vec[offset+0] = c.a/255.f;
|
|
vec[offset+1] = c.r/255.f;
|
|
vec[offset+2] = c.g/255.f;
|
|
vec[offset+3] = c.b/255.f;
|
|
}
|
|
|
|
template<uint N>
|
|
inline void vec2rgb(const Vec<N>& vec, uint32_t& rgb, uint offset=0) {
|
|
RGBA c;
|
|
c.r = (uint8_t)(vec[offset+0]*255);
|
|
c.g = (uint8_t)(vec[offset+1]*255);
|
|
c.b = (uint8_t)(vec[offset+2]*255);
|
|
c.a = 255;
|
|
rgb = packColor(c);
|
|
}
|
|
|
|
template<uint N>
|
|
inline void vec2argb(const Vec<N>& vec, uint32_t& argb, uint offset=0) {
|
|
RGBA c;
|
|
c.a = (uint8_t)(vec[offset+0]*255);
|
|
c.r = (uint8_t)(vec[offset+1]*255);
|
|
c.g = (uint8_t)(vec[offset+2]*255);
|
|
c.b = (uint8_t)(vec[offset+3]*255);
|
|
argb = packColor(c);
|
|
}
|
|
|
|
template<uint N>
|
|
int VectorQuantizer<N>::findClosest(const Vec<N>& vec) const {
|
|
if (codes.size() <= 1) return 0;
|
|
int closestIndex = 0;
|
|
float closestDist = Vec<N>::distanceSquared(codes[0].codeVec, vec);
|
|
for(size_t i=1; i<codes.size(); i++) {
|
|
float d = Vec<N>::distanceSquared(codes[i].codeVec, vec);
|
|
if(d < closestDist){
|
|
closestDist=d;
|
|
closestIndex=(int)i;
|
|
if (closestDist < 0.0001f) return closestIndex;
|
|
}
|
|
}
|
|
return closestIndex;
|
|
}
|
|
|
|
template<uint N>
|
|
int VectorQuantizer<N>::findBestSplitCandidate() const {
|
|
int idx=-1;
|
|
float furthest=0;
|
|
for(size_t i=0;i<codes.size();i++){
|
|
if(codes[i].vecCount>1 && codes[i].maxDistance>furthest){
|
|
furthest=codes[i].maxDistance;
|
|
idx=(int)i;
|
|
}
|
|
}
|
|
return idx;
|
|
}
|
|
|
|
template<uint N>
|
|
void VectorQuantizer<N>::removeUnusedCodes() {
|
|
size_t oldSize=codes.size();
|
|
codes.erase(
|
|
std::remove_if(codes.begin(), codes.end(),
|
|
[](const Code& c){ return c.vecCount==0; }),
|
|
codes.end()
|
|
);
|
|
if(codes.size()<oldSize){
|
|
UF_MSG_DEBUG("Removed {} unused codes", (oldSize-codes.size()));
|
|
}
|
|
}
|
|
|
|
template<uint N>
|
|
void VectorQuantizer<N>::place(const uf::stl::unordered_map<Vec<N>,int>& vecs) {
|
|
for(auto& code:codes){
|
|
code.vecCount=0;
|
|
code.vecSum.zero();
|
|
code.maxDistance=0;
|
|
code.maxDistanceVec.zero();
|
|
}
|
|
|
|
for(const auto& kv:vecs){
|
|
const Vec<N>& vec=kv.first;
|
|
int count=kv.second;
|
|
Code& code = codes[findClosest(vec)];
|
|
|
|
code.vecSum.addMultiplied(vec,count);
|
|
code.vecCount+=count;
|
|
|
|
float dist=Vec<N>::distanceSquared(code.codeVec,vec);
|
|
if(dist>code.maxDistance){
|
|
code.maxDistance=dist;
|
|
code.maxDistanceVec=vec;
|
|
}
|
|
}
|
|
|
|
for(auto& code:codes){
|
|
if(code.vecCount>0){
|
|
code.vecSum /= (float)code.vecCount;
|
|
code.codeVec=code.vecSum;
|
|
}
|
|
}
|
|
}
|
|
|
|
template<uint N>
|
|
void VectorQuantizer<N>::split() {
|
|
int SIZE=(int)codes.size();
|
|
for(int i=0;i<SIZE;i++){
|
|
if(codes[i].vecCount>1){
|
|
splitCode(i);
|
|
}
|
|
}
|
|
}
|
|
|
|
template<uint N>
|
|
void VectorQuantizer<N>::splitCode(int index) {
|
|
Code& code=codes[index];
|
|
Vec<N> diff=code.maxDistanceVec-code.codeVec;
|
|
diff.setLength(0.01f);
|
|
Vec<N> newVec=code.codeVec;
|
|
for(uint i=0;i<N;i++) newVec[i]+=diff[i];
|
|
for(uint i=0;i<N;i++) code.codeVec[i]-=diff[i];
|
|
Code newCode;
|
|
newCode.codeVec=newVec;
|
|
codes.push_back(newCode);
|
|
}
|
|
|
|
template<uint N>
|
|
void VectorQuantizer<N>::compress(const uf::stl::vector<Vec<N>>& vectors,int numCodes) {
|
|
using clock=std::chrono::steady_clock;
|
|
auto start=clock::now();
|
|
|
|
uf::stl::unordered_map<Vec<N>,int> rle;
|
|
for(const auto& v:vectors) rle[v]++;
|
|
|
|
// UF_MSG_DEBUG("RLE result: {} => {}", vectors.size(), rle.size());
|
|
|
|
codes.clear();
|
|
codes.resize(1);
|
|
codes.reserve(numCodes);
|
|
place(rle);
|
|
|
|
int splits=0, repairs=0;
|
|
while((int)(codes.size()*2)<=numCodes){
|
|
size_t before=codes.size();
|
|
split();
|
|
place(rle); place(rle); place(rle);
|
|
removeUnusedCodes();
|
|
|
|
if(codes.size()==before){
|
|
//UF_MSG_DEBUG("No further improvement by splitting");
|
|
break;
|
|
}
|
|
splits++;
|
|
//UF_MSG_DEBUG("Split {} done. COdes: {}", splits, codes.size());
|
|
}
|
|
|
|
while((int)codes.size()<numCodes){
|
|
size_t before=codes.size();
|
|
int n=numCodes-before;
|
|
for(int i=0;i<n;i++){
|
|
int idx=findBestSplitCandidate();
|
|
if(idx==-1) break;
|
|
splitCode(idx);
|
|
codes[idx].maxDistance=0;
|
|
}
|
|
if(codes.size()==before){
|
|
//UF_MSG_DEBUG("No further improvement by repairing");
|
|
break;
|
|
}
|
|
place(rle); place(rle); place(rle);
|
|
removeUnusedCodes();
|
|
repairs++;
|
|
//UF_MSG_DEBUG("Repair {} done. Codes: {}", repairs, codes.size());
|
|
}
|
|
auto ms=std::chrono::duration_cast<std::chrono::milliseconds>(clock::now()-start).count();
|
|
//UF_MSG_DEBUG("Compression completed in {} ms", ms);
|
|
}
|
|
|
|
template<uint N>
|
|
bool VectorQuantizer<N>::writeReportToFile(const uf::stl::string& fname){
|
|
std::ofstream f(fname);
|
|
if(!f.is_open()){
|
|
UF_MSG_ERROR("Failed to open: {}", fname);
|
|
return false;
|
|
}
|
|
for(int i=0;i<(int)codes.size();i++){
|
|
f<<"Code: "<<i<<"\tUses: "<<codes[i].vecCount<<"\tError: "<<codes[i].maxDistance<<"\n";
|
|
}
|
|
return true;
|
|
} |