thurtle: Add lzw & ulz compression

This commit is contained in:
Remko Tronçon 2024-01-20 18:12:44 +01:00
parent 4b78c50ee4
commit 95243c368c
2 changed files with 150 additions and 0 deletions

46
src/web/thurtle/lzw.ts Normal file
View file

@ -0,0 +1,46 @@
export function lzwEncode(data: Uint8Array) {
const dict = new Map<string, number>();
const out: Array<number> = [];
let phrase = String.fromCharCode(data[0]);
let code = 256;
for (let i = 1; i < data.length; i++) {
const chr = String.fromCharCode(data[i]);
const nphrase = phrase + chr;
if (dict.has(nphrase)) {
phrase = nphrase;
} else {
out.push(phrase.length > 1 ? dict.get(phrase)! : phrase.charCodeAt(0));
dict.set(nphrase, code);
code++;
phrase = chr;
}
}
out.push(phrase.length > 1 ? dict.get(phrase)! : phrase.charCodeAt(0));
return new TextEncoder().encode(
out.map((c) => String.fromCharCode(c)).join("")
);
}
export function lzwDecode(rdata: Uint8Array) {
const data = [...new TextDecoder().decode(rdata)].map((c) => c.charCodeAt(0));
let dict = new Map<number, number[]>();
var curChar = data[0];
var curPhrase = [curChar];
var out = [curChar];
var code = 256;
for (var i = 1; i < data.length; i++) {
const c = data[i];
const phrase =
c < 256
? [data[i]]
: dict.has(c)
? dict.get(c)!
: curPhrase.concat(curChar);
out.push(...phrase);
curChar = phrase[0];
dict.set(code, curPhrase.concat(curChar));
code++;
curPhrase = phrase;
}
return new Uint8Array(out);
}

104
src/web/thurtle/ulz.ts Normal file
View file

@ -0,0 +1,104 @@
export function ulzDecode(src: Uint8Array) {
const dst: Array<number> = [];
let sp = 0;
while (sp < src.length) {
const c = src[sp++];
if (c & 0x80) {
// CPY
let length;
if (c & 0x40) {
if (sp >= src.length) {
throw new Error(`incomplete CPY2`);
}
length = ((c & 0x3f) << 8) | src[sp++];
} else {
length = c & 0x3f;
}
if (sp >= src.length) {
throw new Error(`incomplete CPY`);
}
let cp = dst.length - (src[sp++] + 1);
if (cp < 0) {
throw new Error(`CPY underflow`);
}
for (let i = 0; i < length + 4; i++) {
dst.push(dst[cp++]);
}
} else {
// LIT
if (sp + c >= src.length) {
throw new Error(`LIT out of bounds: ${sp} + ${c} >= ${src.length}`);
}
for (let i = 0; i < c + 1; i++) {
dst.push(src[sp++]);
}
}
}
return new Uint8Array(dst);
}
const MIN_MAX_LENGTH = 4;
function findBestMatch(
src: Uint8Array,
sp: number,
dlen: number,
slen: number
) {
let bmlen = 0;
let bmp = 0;
let dp = sp - dlen;
for (; dlen; dp++, dlen--) {
let i = 0;
for (; ; i++) {
if (i == slen) {
return [dp, i];
}
if (src[sp + i] != src[dp + (i % dlen)]) {
break;
}
}
if (i > bmlen) {
bmlen = i;
bmp = dp;
}
}
return [bmp, bmlen];
}
export function ulzEncode(src: Uint8Array) {
let dst: Array<number> = [];
let sp = 0;
let litp = -1;
while (sp < src.length) {
const dlen = Math.min(sp, 256);
const slen = Math.min(src.length - sp, 0x3fff + MIN_MAX_LENGTH);
const [bmp, bmlen] = findBestMatch(src, sp, dlen, slen);
if (bmlen >= MIN_MAX_LENGTH) {
// CPY
const bmctl = bmlen - MIN_MAX_LENGTH;
if (bmctl > 0x3f) {
// CPY2
dst.push((bmctl >> 8) | 0xc0);
dst.push(bmctl & 0xff);
} else {
dst.push(bmctl | 0x80);
}
dst.push(sp - bmp - 1);
sp += bmlen;
litp = -1;
} else {
// LIT
if (litp >= 0) {
if ((dst[litp] += 1) == 127) {
litp = -1;
}
} else {
dst.push(0);
litp = dst.length - 1;
}
dst.push(src[sp++]);
}
}
return new Uint8Array(dst);
}