diff --git a/Makefile.vc b/Makefile.vc index 408b0f6e..76ea1424 100644 --- a/Makefile.vc +++ b/Makefile.vc @@ -175,8 +175,11 @@ X_OBJS= \ $(DIROBJ)\enc\tree.obj \ $(DIROBJ)\enc\webpenc.obj \ $(DIROBJ)\mux\mux.obj \ + $(DIROBJ)\utils\alpha.obj \ $(DIROBJ)\utils\bit_reader.obj \ $(DIROBJ)\utils\bit_writer.obj \ + $(DIROBJ)\utils\quant_levels.obj \ + $(DIROBJ)\utils\tcoder.obj \ $(DIROBJ)\utils\thread.obj \ $(RESOURCE) \ diff --git a/makefile.unix b/makefile.unix index e828ebe6..104c89f0 100644 --- a/makefile.unix +++ b/makefile.unix @@ -54,6 +54,8 @@ ARFLAGS = r CC = gcc -Isrc/ -Iexamples/ -Wall CFLAGS = -O3 -DNDEBUG $(EXTRA_FLAGS) INSTALL = install +GROFF = /usr/bin/groff +COL = /usr/bin/col LDFLAGS = $(EXTRA_LIBS) -lm DEC_OBJS = src/dec/frame.o src/dec/webp.o src/dec/quant.o src/dec/tree.o \ @@ -68,7 +70,8 @@ DSP_OBJS = src/dsp/cpu.o src/dsp/enc.o \ src/dsp/enc_sse2.o src/dsp/dec.o src/dsp/dec_sse2.o \ src/dsp/dec_neon.o src/dsp/upsampling.o src/dsp/upsampling_sse2.o \ src/dsp/yuv.o -UTILS_OBJS = src/utils/bit_reader.o src/utils/bit_writer.o src/utils/thread.o +UTILS_OBJS = src/utils/alpha.o src/utils/bit_reader.o src/utils/bit_writer.o \ + src/utils/quant_levels.o src/utils/thread.o src/utils/tcoder.o OBJS = $(DEC_OBJS) $(ENC_OBJS) $(DSP_OBJS) $(UTILS_OBJS) @@ -77,10 +80,13 @@ MUX_OBJS = src/mux/mux.o HDRS = src/webp/encode.h src/enc/vp8enci.h src/enc/cost.h src/webp/mux.h \ src/dec/vp8i.h \ src/dsp/yuv.h src/dsp/dsp.h \ - src/utils/bit_writer.h src/utils/bit_reader.h src/utils/thread.h + src/utils/alpha.h src/utils/bit_writer.h src/utils/bit_reader.h \ + src/utils/thread.h src/utils/tcoder.h -OUTPUT = examples/cwebp examples/dwebp examples/webpmux \ - src/libwebp.a src/mux/libwebpmux.a +OUT_LIBS = src/libwebp.a src/mux/libwebpmux.a +OUT_EXAMPLES = examples/cwebp examples/dwebp examples/webpmux + +OUTPUT = $(OUT_LIBS) $(OUT_EXAMPLES) all:ex @@ -93,29 +99,29 @@ src/libwebp.a: $(OBJS) src/mux/libwebpmux.a: $(MUX_OBJS) $(AR) $(ARFLAGS) $@ $^ -ex: examples/cwebp examples/dwebp examples/webpmux +ex: $(OUT_EXAMPLES) examples/cwebp: examples/cwebp.o src/libwebp.a examples/dwebp: examples/dwebp.o src/libwebp.a examples/webpmux: examples/webpmux.o src/mux/libwebpmux.a src/libwebp.a -examples/cwebp examples/dwebp examples/webpmux: + +$(OUT_EXAMPLES): $(CC) -o $@ $^ $(LDFLAGS) dist: DESTDIR := dist dist: all $(INSTALL) -m755 -d $(DESTDIR)/include/webp \ - $(DESTDIR)/doc $(DESTDIR)/lib - $(INSTALL) -m755 -s examples/cwebp examples/dwebp examples/webpmux \ - $(DESTDIR) + $(DESTDIR)/doc $(DESTDIR)/lib + $(INSTALL) -m755 -s $(OUT_EXAMPLES) $(DESTDIR) $(INSTALL) -m644 src/webp/*.h $(DESTDIR)/include/webp $(INSTALL) -m644 src/libwebp.a $(DESTDIR)/lib umask 022; \ for m in man/[cd]webp.1; do \ basenam=$$(basename $$m .1); \ - /usr/bin/groff -t -e -man -T utf8 $$m \ - | col -bx >$(DESTDIR)/doc/$${basenam}.txt; \ - /usr/bin/groff -t -e -man -T html $$m \ - | col -bx >$(DESTDIR)/doc/$${basenam}.html; \ + $(GROFF) -t -e -man -T utf8 $$m \ + | $(COL) -bx >$(DESTDIR)/doc/$${basenam}.txt; \ + $(GROFF) -t -e -man -T html $$m \ + | $(COL) -bx >$(DESTDIR)/doc/$${basenam}.html; \ done clean: diff --git a/src/utils/Makefile.am b/src/utils/Makefile.am index bb35c633..e832b390 100644 --- a/src/utils/Makefile.am +++ b/src/utils/Makefile.am @@ -1,10 +1,12 @@ AM_CPPFLAGS = -I$(top_srcdir)/src -libwebputils_la_SOURCES = bit_reader.h bit_reader.c \ +libwebputils_la_SOURCES = alpha.h alpha.c \ + bit_reader.h bit_reader.c \ bit_writer.h bit_writer.c \ + quant_levels.c \ + tcoder.h tcoderi.h tcoder.c \ thread.h thread.c libwebputils_la_LDFLAGS = -version-info 0:0:0 -libwebputils_la_CPPFLAGS = $(USE_EXPERIMENTAL_CODE) libwebputilsinclude_HEADERS = ../webp/types.h libwebputilsincludedir = $(includedir)/webp diff --git a/src/utils/alpha.c b/src/utils/alpha.c new file mode 100644 index 00000000..65b30de2 --- /dev/null +++ b/src/utils/alpha.c @@ -0,0 +1,432 @@ +// Copyright 2011 Google Inc. All Rights Reserved. +// +// This code is licensed under the same terms as WebM: +// Software License Agreement: http://www.webmproject.org/license/software/ +// Additional IP Rights Grant: http://www.webmproject.org/license/additional/ +// ----------------------------------------------------------------------------- +// +// Alpha plane encoding and decoding library. +// +// Author: vikasa@google.com (Vikas Arora) + +#include // for memcpy() +#include "./alpha.h" + +#include "./bit_reader.h" +#include "./bit_writer.h" +#include "./tcoder.h" + +#define MAX_SYMBOLS 255 +#define ALPHA_HEADER_LEN 2 + +// ----------------------------------------------------------------------------- +// Alpha Encode. + +static int EncodeIdent(const uint8_t* data, int width, int height, + uint8_t** output, size_t* output_size) { + const size_t data_size = height * width; + uint8_t* alpha = NULL; + assert((output != NULL) && (output_size != NULL)); + + if (data == NULL) { + return 0; + } + + alpha = (uint8_t*)malloc(data_size); + if (alpha == NULL) { + return 0; + } + memcpy(alpha, data, data_size); + *output_size = data_size; + *output = alpha; + return 1; +} + +// ----------------------------------------------------------------------------- +// Zlib-like encoding using TCoder + +typedef struct { + int dist; // backward distance (=0 means: literal) + int literal; // literal value (if dist = 0) + size_t len; // length of matched string for non-literal +} Token; + +#define MIN_LEN 2 +#define DEFER_SKIP 1 // for deferred evaluation (0 = off) + +#define CACHED_COST(coder, c) ((cost_cache[(c)] == 0.) ? \ + (cost_cache[(c)] = lit_mode_cost + TCoderSymbolCost((coder), (c))) \ + : cost_cache[(c)]) + +// Record symbol +#define RECORD(TOKEN) { \ + TCoderEncode(coderd, (TOKEN)->dist, NULL); \ + if ((TOKEN)->dist == 0) { \ + TCoderEncode(coder, (TOKEN)->literal, NULL); \ + } else { \ + TCoderEncode(coderl, (TOKEN)->len - MIN_LEN, NULL); \ + } \ +} + +static size_t GetLongestMatch(const uint8_t* const data, + const uint8_t* const ref, size_t max_len) { + size_t n; + for (n = 0; n < max_len && (data[n] == ref[n]); ++n) { /* do nothing */ } + return n; +} + +static int EncodeZlibTCoder(uint8_t* data, int width, int height, + uint8_t** output, size_t* output_size) { + int ok = 0; + const size_t data_size = width * height; + const size_t MAX_DIST = 3 * width; + const size_t MAX_LEN = 2 * width; + Token* const msg = (Token*)malloc(data_size * sizeof(*msg)); + int num_tokens; + TCoder* const coder = TCoderNew(MAX_SYMBOLS); + TCoder* const coderd = TCoderNew(MAX_DIST); + TCoder* const coderl = TCoderNew(MAX_LEN - MIN_LEN); + + if (coder == NULL || coderd == NULL || coderl == NULL) { + goto End; + } + if (msg == NULL) { + goto End; + } + + { + int deferred_eval = 0; + size_t n = 0; + num_tokens = 0; + while (n < data_size) { + const double lit_mode_cost = TCoderSymbolCost(coderd, 0); + double cost_cache[MAX_SYMBOLS + 1] = { 0. }; + Token best; + size_t dist = 0; + double best_cost = CACHED_COST(coder, data[n]); + size_t max_len = MAX_LEN; + if (max_len > data_size - n) { + max_len = data_size - n; + } + best.dist = 0; + best.literal = data[n]; + best.len = 1; + for (dist = 1; dist <= MAX_DIST && dist <= n; ++dist) { + const int pos = n - dist; + const size_t min_len = best.len - 1; + size_t len; + + // Early out: we probe at two locations for a quick match check + if (data[pos] != data[n] || + data[pos + min_len] != data[n + min_len]) { + continue; + } + + len = GetLongestMatch(data + pos, data + n, max_len); + if (len >= MIN_LEN && len >= best.len) { + // This is the cost of the coding proposal + const double cost = TCoderSymbolCost(coderl, len - MIN_LEN) + + TCoderSymbolCost(coderd, dist); + // We're gaining an extra len-best.len coded message over the last + // known best. Compute how this would have cost if coded all literal. + // (TODO: we shoud fully re-evaluate at position best.len and not + // assume all is going be coded as literals. But it's at least an + // upper-bound (worst-case coding). Deferred evaluation usd below + // partially addresses this. + double lit_cost = 0; + size_t i; + for (i = best.len; i < len; ++i) { + lit_cost += CACHED_COST(coder, data[n + i]); + } + // So, is it worth ? + if (best_cost + lit_cost >= cost) { + best_cost = cost; + best.len = len; + best.dist = dist; + } + } + if (len >= MAX_LEN) { + break; // No need to search further. We already got a max-long match + } + } + // Deferred evaluation: before finalizing a choice we try to find + // best cost at position n + 1 and see if we get a longer + // match then current best. If so, we transform the current match + // into a literal, go to position n + 1, and try again. + { + Token* cur = &msg[num_tokens]; + int forget = 0; + if (deferred_eval) { + --cur; + // If the next match isn't longer, keep previous match + if (best.len <= cur->len) { + deferred_eval = 0; + n += cur->len - DEFER_SKIP; + forget = 1; // forget the new match + RECORD(cur) + } else { // else transform previous match into a shorter one + cur->len = DEFER_SKIP; + if (DEFER_SKIP == 1) { + cur->dist = 0; // literal + } + // TODO(later): RECORD() macro should be changed to take an extra + // "is_final" param, so that we could write the bitstream at once. + RECORD(cur) + ++cur; + } + } + if (!forget) { + *cur = best; + ++num_tokens; + if (DEFER_SKIP > 0) { + deferred_eval = (cur->len > 2) && (cur->len < MAX_LEN / 2); + } + if (deferred_eval) { + // will probe at a later position before finalizing. + n += DEFER_SKIP; + } else { + // Keep the current choice. + n += cur->len; + RECORD(cur) + } + } + } + } + } + + // Final bitstream assembly. + { + int n; + VP8BitWriter bw; + VP8BitWriterInit(&bw, 0); + TCoderInit(coder); + TCoderInit(coderd); + TCoderInit(coderl); + for (n = 0; n < num_tokens; ++n) { + const Token* const t = &msg[n]; + const int is_literal = (t->dist == 0); + TCoderEncode(coderd, t->dist, &bw); + if (is_literal) { // literal + TCoderEncode(coder, t->literal, &bw); + } else { + TCoderEncode(coderl, t->len - MIN_LEN, &bw); + } + } + + // clean up + VP8BitWriterFinish(&bw); + *output = VP8BitWriterBuf(&bw); + *output_size = VP8BitWriterSize(&bw); + ok = 1; + } + + End: + if (coder) TCoderDelete(coder); + if (coderl) TCoderDelete(coderl); + if (coderd) TCoderDelete(coderd); + free(msg); + return ok; +} + +// ----------------------------------------------------------------------------- + +int EncodeAlpha(const uint8_t* data, int width, int height, int stride, + int quality, int method, + uint8_t** output, size_t* output_size) { + const int kMaxImageDim = (1 << 14) - 1; + uint8_t* compressed_alpha = NULL; + uint8_t* quant_alpha = NULL; + uint8_t* out = NULL; + size_t compressed_size = 0; + size_t data_size = height * width; + float mse = 0.0; + int ok = 0; + int h; + + if ((data == NULL) || (output == NULL) || (output_size == NULL)) { + return 0; + } + + if (width <= 0 || width > kMaxImageDim || + height <= 0 || height > kMaxImageDim || stride < width) { + return 0; + } + + if (quality < 0 || quality > 100) { + return 0; + } + + if (method < 0 || method > 1) { + return 0; + } + + quant_alpha = (uint8_t*)malloc(data_size); + if (quant_alpha == NULL) { + return 0; + } + + // Extract the alpha data (WidthXHeight) from raw_data (StrideXHeight). + for (h = 0; h < height; ++h) { + memcpy(quant_alpha + h * width, data + h * stride, width * sizeof(*data)); + } + + if (quality < 100) { // No Quantization required for 'quality = 100'. + // 16 Alpha levels gives quite a low MSE w.r.t Original Alpha plane hence + // mapped to moderate quality 70. Hence Quality:[0, 70] -> Levels:[2, 16] + // and Quality:]70, 100] -> Levels:]16, 256]. + const int alpha_levels = (quality <= 70) ? + 2 + quality / 5 : + 16 + (quality - 70) * 8; + + ok = QuantizeLevels(quant_alpha, width, height, alpha_levels, &mse); + if (!ok) { + free(quant_alpha); + return 0; + } + } + + if (method == 0) { + ok = EncodeIdent(quant_alpha, width, height, + &compressed_alpha, &compressed_size); + } else if (method == 1) { + ok = EncodeZlibTCoder(quant_alpha, width, height, + &compressed_alpha, &compressed_size); + } + + free(quant_alpha); + if (!ok) { + return 0; + } + + out = (uint8_t*)malloc(compressed_size + ALPHA_HEADER_LEN); + if (out == NULL) { + free(compressed_alpha); + return 0; + } else { + *output = out; + } + + // Alpha bit-stream Header: + // Byte0: Compression Method. + // Byte1: Reserved for later extension. + out[0] = method & 0xff; + out[1] = 0; // Reserved Byte. + out += ALPHA_HEADER_LEN; + memcpy(out, compressed_alpha, compressed_size); + free(compressed_alpha); + out += compressed_size; + + *output_size = out - *output; + + return 1; +} + +// ----------------------------------------------------------------------------- +// Alpha Decode. + +static int DecodeIdent(const uint8_t* data, size_t data_size, + uint8_t* output) { + assert((data != NULL) && (output != NULL)); + memcpy(output, data, data_size); + return 1; +} + +static int DecompressZlibTCoder(const uint8_t* data, size_t data_size, + int width, int height, + uint8_t* output, size_t output_size) { + int ok = 1; + const size_t MAX_DIST = 3 * width; + const size_t MAX_LEN = 2 * width; + TCoder* const coder = TCoderNew(MAX_SYMBOLS); + TCoder* const coderd = TCoderNew(MAX_DIST); + TCoder* const coderl = TCoderNew(MAX_LEN - MIN_LEN); + + if (coder == NULL || coderd == NULL || coderl == NULL) { + goto End; + } + (void)height; // unused parameter + + { + size_t pos = 0; + VP8BitReader br; + VP8InitBitReader(&br, data, data + data_size); + while (pos < output_size) { + const int dist = TCoderDecode(coderd, &br); + if (dist == 0) { + const int literal = TCoderDecode(coder, &br); + output[pos] = literal; + ++pos; + } else { + const int len = MIN_LEN + TCoderDecode(coderl, &br); + int k; + if (pos + len > output_size) goto End; + for (k = 0; k < len; ++k) { + output[pos + k] = output[pos + k - dist]; + } + pos += len; + } + } + } + ok = 1; + + End: + if (coder) TCoderDelete(coder); + if (coderl) TCoderDelete(coderl); + if (coderd) TCoderDelete(coderd); + return ok; +} + +// ----------------------------------------------------------------------------- + +int DecodeAlpha(const uint8_t* data, size_t data_size, + int width, int height, int stride, + uint8_t* output) { + uint8_t* decoded_data = NULL; + int ok = 0; + int method; + size_t decoded_size = height * width; + + if (data == NULL || output == NULL) { + return 0; + } + + if (data_size <= ALPHA_HEADER_LEN) { + return 0; + } + + if (width <= 0 || height <= 0 || stride < width) { + return 0; + } + + method = data[0]; + if (method < 0 || method > 1) { + return 0; + } + + decoded_data = (uint8_t*)malloc(decoded_size); + if (decoded_data == NULL) { + return 0; + } + + data_size -= ALPHA_HEADER_LEN; + data += ALPHA_HEADER_LEN; + + if (method == 0) { + ok = DecodeIdent(data, data_size, decoded_data); + } else if (method == 1) { + ok = DecompressZlibTCoder(data, data_size, width, height, + decoded_data, decoded_size); + } + + if (ok) { + // Construct raw_data (HeightXStride) from the alpha data (HeightXWidth). + int h; + for (h = 0; h < height; ++h) { + memcpy(output + h * stride, decoded_data + h * width, + width * sizeof(*data)); + } + } + free(decoded_data); + + return ok; +} diff --git a/src/utils/alpha.h b/src/utils/alpha.h new file mode 100644 index 00000000..e0df9cc2 --- /dev/null +++ b/src/utils/alpha.h @@ -0,0 +1,68 @@ +// Copyright 2011 Google Inc. +// +// This code is licensed under the same terms as WebM: +// Software License Agreement: http://www.webmproject.org/license/software/ +// Additional IP Rights Grant: http://www.webmproject.org/license/additional/ +// ----------------------------------------------------------------------------- +// +// Alpha plane encoding and decoding library. +// +// Author: vikasa@google.com (Vikas Arora) + +#ifndef WEBP_UTILS_ALPHA_H_ +#define WEBP_UTILS_ALPHA_H_ + +#include + +#include "../webp/types.h" + +#if defined(__cplusplus) || defined(c_plusplus) +extern "C" { +#endif + +// Encodes the given Alpha data 'data' of size 'stride'x'height' via specified +// compression method 'method'. The pre-processing (Quantization) is +// performed if 'quality' is less than 100. For such cases, the encoding is +// lossy. Valid ranges for 'quality' is [0, 100] and 'method' is [0, 2]: +// 'method = 0' - No compression; +// 'method = 1' - zlib; +// 'output' corresponds to the buffer containing compressed Alpha data. +// This buffer is allocated by this method and caller should call +// free(*output) when done. +// 'output_size' corresponds to size of this compressed Alpha buffer. +// +// Returns 1 on successfully encoding the Alpha and +// 0 if either: +// data, output or output_size is NULL, or +// inappropriate width, height or stride, or +// invalid quality or method, or +// Memory allocation for the compressed data fails. + +int EncodeAlpha(const uint8_t* data, int width, int height, int stride, + int quality, int method, + uint8_t** output, size_t* output_size); + +// Decodes the compressed data 'data' of size 'data_size' into the 'output'. +// The 'output' buffer should be pre-alloacated and must be of the same +// dimension 'height'x'stride', as that of the image. +// +// Returns 1 on successfully decoding the compressed Alpha and +// 0 if either: +// data or output is NULL, or +// Error in bit-stream header (invalid compression mode or qbits), or +// Error returned by approppriate compression method. +int DecodeAlpha(const uint8_t* data, size_t data_size, + int width, int height, int stride, uint8_t* output); + +// Replace the input 'data' of size 'width'x'height' with 'num-levels' +// quantized values. If not NULL, 'mse' will contain the mean-squared error. +// Valid range for 'num_levels' is [2, 256]. +// Returns false in case of error (data is NULL, or parameters are invalid). +int QuantizeLevels(uint8_t* data, int width, int height, int num_levels, + float* mse); + +#if defined(__cplusplus) || defined(c_plusplus) +} // extern "C" +#endif + +#endif /* WEBP_UTILS_ALPHA_H_ */ diff --git a/src/utils/quant_levels.c b/src/utils/quant_levels.c new file mode 100644 index 00000000..89257d7b --- /dev/null +++ b/src/utils/quant_levels.c @@ -0,0 +1,143 @@ +// Copyright 2011 Google Inc. All Rights Reserved. +// +// This code is licensed under the same terms as WebM: +// Software License Agreement: http://www.webmproject.org/license/software/ +// Additional IP Rights Grant: http://www.webmproject.org/license/additional/ +// ----------------------------------------------------------------------------- +// +// Quantize levels for specified number of quantization-levels ([2, 256]). +// Min and max values are preserved (usual 0 and 255 for alpha plane). +// +// Author: skal@google.com (Pascal Massimino) + +#include +#include // for sqrt() + +#include "./alpha.h" + +#define NUM_SYMBOLS 256 + +#define MAX_ITER 6 // Maximum number of convergence steps. +#define ERROR_THRESHOLD 1e-4 // MSE stopping criterion. + +// ----------------------------------------------------------------------------- +// Quantize levels. + +int QuantizeLevels(uint8_t* data, int width, int height, + int num_levels, float* mse) { + int freq[NUM_SYMBOLS] = { 0 }; + int q_level[NUM_SYMBOLS] = { 0 }; + double inv_q_level[NUM_SYMBOLS] = { 0 }; + int min_s = 255, max_s = 0; + const size_t data_size = height * width; + size_t n = 0; + int s, num_levels_in, iter; + double last_err = 1.e38, err = 0.; + + if (data == NULL) { + return 0; + } + + if (width <= 0 || height <= 0) { + return 0; + } + + if (num_levels < 2 || num_levels > 256) { + return 0; + } + + num_levels_in = 0; + for (n = 0; n < data_size; ++n) { + num_levels_in += (freq[data[n]] == 0); + if (min_s > data[n]) min_s = data[n]; + if (max_s < data[n]) max_s = data[n]; + ++freq[data[n]]; + } + + if (num_levels_in <= num_levels) { + if (mse) *mse = 0.; + return 1; // nothing to do ! + } + + // Start with uniformly spread centroids. + for (s = 0; s < num_levels; ++s) { + inv_q_level[s] = min_s + (double)(max_s - min_s) * s / (num_levels - 1); + } + + // Fixed values. Won't be changed. + q_level[min_s] = 0; + q_level[max_s] = num_levels - 1; + assert(inv_q_level[0] == min_s); + assert(inv_q_level[num_levels - 1] == max_s); + + // k-Means iterations. + for (iter = 0; iter < MAX_ITER; ++iter) { + double err_count; + double q_sum[NUM_SYMBOLS] = { 0 }; + double q_count[NUM_SYMBOLS] = { 0 }; + int slot = 0; + + // Assign classes to representatives. + for (s = min_s; s <= max_s; ++s) { + // Keep track of the nearest neighbour 'slot' + while (slot < num_levels - 1 && + 2 * s > inv_q_level[slot] + inv_q_level[slot + 1]) { + ++slot; + } + if (freq[s] > 0) { + q_sum[slot] += s * freq[s]; + q_count[slot] += freq[s]; + } + q_level[s] = slot; + } + + // Assign new representatives to classes. + if (num_levels > 2) { + for (slot = 1; slot < num_levels - 1; ++slot) { + const double count = q_count[slot]; + if (count > 0.) { + inv_q_level[slot] = q_sum[slot] / count; + } + } + } + + // Compute convergence error. + err = 0.; + err_count = 0.; + for (s = min_s; s <= max_s; ++s) { + const double error = s - inv_q_level[q_level[s]]; + err += freq[s] * error * error; + err_count += freq[s]; + } + if (err_count > 0.) err /= err_count; + + // Check for convergence: we stop as soon as the error is no + // longer improving. + if (last_err - err < ERROR_THRESHOLD) break; + last_err = err; + } + + // Remap the alpha plane to quantized values. + { + // double->int rounding operation can be costly, so we do it + // once for all before remaping. We also perform the data[] -> slot + // mapping, while at it (avoid one indirection in the final loop). + uint8_t map[NUM_SYMBOLS]; + int s; + for (s = min_s; s <= max_s; ++s) { + const int slot = q_level[s]; + map[s] = (uint8_t)(inv_q_level[slot] + .5); + } + // Final pass. + for (n = 0; n < data_size; ++n) { + data[n] = map[data[n]]; + } + } + + // Compute final mean squared error if needed. + if (mse) { + *mse = sqrt(err); + } + + return 1; +} diff --git a/src/utils/tcoder.c b/src/utils/tcoder.c new file mode 100644 index 00000000..45fcc8a3 --- /dev/null +++ b/src/utils/tcoder.c @@ -0,0 +1,460 @@ +// Copyright 2011 Google Inc. +// +// This code is licensed under the same terms as WebM: +// Software License Agreement: http://www.webmproject.org/license/software/ +// Additional IP Rights Grant: http://www.webmproject.org/license/additional/ +// ----------------------------------------------------------------------------- +// +// Tree-coder using VP8's boolean coder +// +// Author: Skal (pascal.massimino@gmail.com) +// +// Rationale: +// We extend the boolean (binary) coder to handle arbitrary-sized alphabets, +// and not just binary ones. +// We dynamically maintain the population count and use the locally-optimal +// probability distribution for coding every symbol. Every symbol can be +// coded using _any_ binary tree. The boolean coder would traverse it and +// branch each nodes left and right with the accumulated probability. +// +// E.g. with 3 symbols A, B, C already coded 30, 50 and 120 times respectively: +// +/* Root Node #0 (count=30+50+120=200) + | \ + | A (count=30) + Inner-Node #1 (count=50+120=170) + | \ + | C (count=120) + B (count=50) +*/ +// If the next symbol to code is "C", we'll first code '0' with probability +// p0 = 170/200 (which is the probability of taking the left branch at the +// Root Node #0) and then code '1' with a probability p1 = 120/170 (which +// is the probability of taking the right branch at the Inner-Node #1). The +// total probability p0 * p1 = 120 / 200 is the correct one for symbol 'C' +// (up to small rounding differences in the boolean coder). +// The alphabet could be coded with _any_ tree, provided the count at the +// inner nodes are updated appropriately. Put otherwise, the binary tree +// is only used to efficiently update the frequency counts in O(ln(N)) time +// instead of O(N). +// For instance, we could use the equivalent tree: +/* Root (count=200) + | \ + | C (count=120) + Inner (count=50+30=80) + | \ + | B (count=50) + A (count=30) +*/ +// The frequency distribution would still be respected when coding the symbols. +// But! There's a noticeable difference: it only takes _one_ call to VP8PutBit() +// when coding the letter 'C' (with probability 120/200), which is the most +// frequent symbol. This has an impact on speed, considering that each call +// to VP8PutBit/VP8GetBit is costly. Hence, in order to minimize the number +// of binary coding, the frequent symbol should be up in the tree. +// Using Huffman tree is a solution, but the management and updating can be +// quite complicated. Here we opt for a simpler option: we use _ternary_ +// tree instead, where each inner node can be associated with a symbol, in +// addition to the regular left/right branches. When we traverse down +// the tree, a stop bit is used to signal whether the traversal is finished +// or not. Its probability is proportional to the frequency with which the +// node's symbol has been seen (see probaS_). If the traversal is not +// finished, we keep branching right or left according with a probability +// proportional to each branch's use count (see probaL_). +// When a symbol is seen more frequently than its parent, we simply +// exchange the two symbols without changing the tree structure or the +// left/right branches. +// Hence, both tree examples above can be coded using this ternary tree: +/* Root #0 (count=200) + / | \ + / C \ + Node #1 Node #2 + / | \ / | \ + x A x x B x <- where 'x' means un-assigned branches. +*/ +// Here, if the symbol 'A' becomes more frequent afterward, we'll just swap it +// with 'C' (cf ExchangeSymbol()) without reorganizing the tree. +// +// Using this simple maintainance, we obverved a typical 10-20% reduction +// in the number of calls to VP8PutBit(), leading to 3-5% speed gain. +// + +#include "./tcoderi.h" + +#if defined(__cplusplus) || defined(c_plusplus) +extern "C" { +#endif + +#ifdef _MSC_VER +static double log2(double d) { + const double kLog2Reciprocal = 1.442695040888963; + return log(d) * kLog2Reciprocal; +} +#endif + +// For code=00001xxx..., returns the position of the leftmost leading '1' bit. +static WEBP_INLINE int CodeLength(int code) { + int length = 0; + if (code > 0) { + while ((code >> length) != 1) ++length; + } + return length; +} + +// ----------------------------------------------------------------------------- + +TCoder* TCoderNew(int max_symbol) { + const int num_nodes = max_symbol + 1; + TCoder* c; + uint8_t* memory; + const int size = sizeof(*c) + + num_nodes * sizeof(*c->nodes_) + + num_nodes * sizeof(*c->symbols_); + if (max_symbol < 0) return NULL; + memory = (uint8_t*)malloc(size); + if (memory == NULL) return NULL; + + c = (TCoder*)memory; + memory += sizeof(*c); + c->nodes_ = (Node*)memory - 1; + memory += num_nodes * sizeof(*c->nodes_); + c->symbols_ = (int*)memory; + + c->num_nodes_ = num_nodes; + c->frozen_ = 0; + + TCoderInit(c); + return c; +} + +static WEBP_INLINE void ResetNode(Node* const node, Symbol_t symbol) { + assert(node); + node->countS_ = (Count_t)0; + node->count_ = (Count_t)0; + node->probaS_ = HALF_PROBA; + node->probaL_ = HALF_PROBA; + node->symbol_ = symbol; +} + +// Wipe the tree clean. +static void ResetTree(TCoder* const c) { + int pos; + assert(c); + c->num_symbols_ = 0; + c->total_coded_ = 0; + c->probaN_ = HALF_PROBA; + for (pos = 1; pos <= c->num_nodes_; ++pos) { + ResetNode(&c->nodes_[pos], INVALID_SYMBOL); + } + c->fixed_symbols_ = 0; + c->symbol_bit_cost_ = 5 + CodeLength(c->num_nodes_); +} + +static void ResetSymbolMap(TCoder* const c) { + Symbol_t s; + assert(c); + c->num_symbols_ = 0; + c->probaN_ = HALF_PROBA; + for (s = 0; s < c->num_nodes_; ++s) { + c->symbols_[s] = INVALID_POS; + } +} + +void TCoderInit(TCoder* const c) { + assert(c); + if (!c->frozen_) { // Reset counters + ResetTree(c); + ResetSymbolMap(c); + } +} + +void TCoderDelete(TCoder* const c) { + free(c); +} + +// ----------------------------------------------------------------------------- +// Tree utils around nodes + +// Total number of visits on this nodes +static WEBP_INLINE Count_t TotalCount(const Node* const n) { + return n->countS_ + n->count_; +} + +// Returns true if node has no child. +static WEBP_INLINE int IsLeaf(const TCoder* const c, int pos) { + return (2 * pos > c->num_symbols_); +} + +// Returns true if node has no child. +static WEBP_INLINE int HasOnlyRightChild(const TCoder* const c, int pos) { + return (2 * pos == c->num_symbols_); +} + +// ----------------------------------------------------------------------------- +// Node management + +static int NewNode(TCoder* const c, int s) { + // For an initial new symbol position, we pick the slot that is the + // closest to the top of the tree. It shortens the paths' length. + const int pos = 1 + c->num_symbols_; + assert(c); + assert(c->num_symbols_ < c->num_nodes_); + c->symbols_[s] = pos; + ResetNode(&c->nodes_[pos], s); + ++c->num_symbols_; + return pos; +} + +// trivial method, mainly for debug +static WEBP_INLINE int SymbolToNode(const TCoder* const c, int s) { + const int pos = c->symbols_[s]; + assert(s >= 0 && s < c->num_nodes_ && s != INVALID_SYMBOL); + assert(pos != INVALID_POS); + assert(c->nodes_[pos].symbol_ == s); + return pos; +} + +#define SWAP(T, a, b) do { \ + const T tmp = (a); \ + (a) = (b); \ + (b) = tmp; \ +} while (0) + +// Make child symbol bubble up one level +static void ExchangeSymbol(const TCoder* const c, const int pos) { + const int parent = pos >> 1; + Node* const node0 = &c->nodes_[parent]; // parent node + Node* const node1 = &c->nodes_[pos]; // child node + const Symbol_t S0 = node0->symbol_; + const Symbol_t S1 = node1->symbol_; + c->symbols_[S1] = parent; + c->symbols_[S0] = pos; + assert(node1->countS_ >= node0->countS_); + node0->count_ -= (node1->countS_ - node0->countS_); + assert(node0->count_ > 0); + SWAP(Count_t, node0->countS_, node1->countS_); + SWAP(Symbol_t, node0->symbol_, node1->symbol_); + // Note: probaL_ and probaS_ are recomputed. No need to SWAP them. +} +#undef SWAP + +// ----------------------------------------------------------------------------- +// probability computation + +static WEBP_INLINE int CalcProba(Count_t num, Count_t total, + int max_proba, int round) { + int p; + assert(total > 0); + p = (num * max_proba + round) / total; + assert(p >= 0 && p <= MAX_PROBA); + return MAX_PROBA - p; +} + +static WEBP_INLINE void UpdateNodeProbas(TCoder* const c, int pos) { + Node* const node = &c->nodes_[pos]; + const Count_t total = TotalCount(node); + node->probaS_ = CalcProba(node->countS_, total, MAX_PROBA, 0); + if (!IsLeaf(c, pos)) { + const Count_t total_count = node->count_; + const Count_t left_count = TotalCount(&c->nodes_[2 * pos]); + node->probaL_ = + MAX_PROBA - CalcProba(left_count, total_count, MAX_PROBA, 0); + } +} + +static void UpdateProbas(TCoder* const c, int pos) { + for ( ; pos >= 1; pos >>= 1) { + UpdateNodeProbas(c, pos); + } + c->probaN_ = CalcProba(c->num_symbols_, c->total_coded_, HALF_PROBA - 1, 0); +} + +// ----------------------------------------------------------------------------- + +static void UpdateTree(TCoder* const c, int pos, Count_t incr) { + Node* node = &c->nodes_[pos]; + const int is_fresh_new_symbol = (node->countS_ == 0); + assert(c); + assert(pos >= 1 && pos <= c->num_nodes_); + assert(node->symbol_ != INVALID_SYMBOL); + if (!c->frozen_ || is_fresh_new_symbol) { + const int starting_pos = pos; // save for later + // Update the counters up the tree, possibly exchanging some nodes + node->countS_ += incr; + while (pos > 1) { + Node* const parent = &c->nodes_[pos >> 1]; + parent->count_ += incr; + if (parent->countS_ < node->countS_) { + ExchangeSymbol(c, pos); + } + pos >>= 1; + node = parent; + } + c->total_coded_ += incr; + UpdateProbas(c, starting_pos); // Update the probas along the modified path + } +} + +// ----------------------------------------------------------------------------- +// Fixed-length symbol coding +// Note: the symbol will be coded exactly once at most, so using a fixed length +// code is better than Golomb-code (e.g.) on average. + +// We use the exact bit-distribution probability considering the upper-bound +// supplied: +// Written in binary, a symbol 's' has a probability of having its k-th bit +// set to 1 which is given by: +// If the k-th bit of max_value is 0: +// P0(k) = [(max_value >> (k + 1)) << k] / max_value +// If the k-th bit of max_value is 1: +// P1(k) = P0(k) + [max_value & ((1 << k) - 1)] / max_value + +static WEBP_INLINE void CodeSymbol(VP8BitWriter* const bw, int s, + int max_value) { + int i, up = 1; + assert(bw); + for (i = 0; up < max_value; up <<= 1, ++i) { + int den = (max_value >> 1) & ~(up - 1); + if (max_value & up) den |= max_value & (up - 1); + VP8PutBit(bw, (s >> i) & 1, MAX_PROBA - MAX_PROBA * den / max_value); + } +} + +static WEBP_INLINE int DecodeSymbol(VP8BitReader* const br, int max_value) { + int i, up = 1, v = 0; + assert(br); + for (i = 0; up < max_value; ++i) { + int den = (max_value >> 1) & ~(up - 1); + if (max_value & up) den |= max_value & (up - 1); + v |= VP8GetBit(br, MAX_PROBA - MAX_PROBA * den / max_value) << i; + up <<= 1; + } + return v; +} + +// ----------------------------------------------------------------------------- +// Encoding + +void TCoderEncode(TCoder* const c, int s, VP8BitWriter* const bw) { + int pos; + const int is_new_symbol = (c->symbols_[s] == INVALID_POS); + assert(c); + if (!c->fixed_symbols_ && c->num_symbols_ < c->num_nodes_) { + if (c->num_symbols_ > 0) { + if (bw != NULL) { + VP8PutBit(bw, is_new_symbol, c->probaN_); + } + } else { + assert(is_new_symbol); + } + } else { + assert(!is_new_symbol); + } + if (is_new_symbol) { + if (bw != NULL) { + CodeSymbol(bw, s, c->num_nodes_); + } + pos = NewNode(c, s); + } else { + pos = SymbolToNode(c, s); + if (bw != NULL) { + const int length = CodeLength(pos); + int parent = 1; + int i; + for (i = 0; !IsLeaf(c, parent); ++i) { + const Node* const node = &c->nodes_[parent]; + const int symbol_proba = node->probaS_; + const int is_stop = (i == length); + if (VP8PutBit(bw, is_stop, symbol_proba)) { + break; + } else if (!HasOnlyRightChild(c, parent)) { + const int left_proba = node->probaL_; + const int is_right = (pos >> (length - 1 - i)) & 1; // extract bits #i + VP8PutBit(bw, is_right, left_proba); + parent = (parent << 1) | is_right; + } else { + parent <<= 1; + break; + } + } + assert(parent == pos); + } + } + UpdateTree(c, pos, 1); +} + +// ----------------------------------------------------------------------------- +// Decoding + +int TCoderDecode(TCoder* const c, VP8BitReader* const br) { + int s; + int pos; + int is_new_symbol = 0; + assert(c); + assert(br); + // Check if we need to transmit the new symbol's value + if (!c->fixed_symbols_ && c->num_symbols_ < c->num_nodes_) { + if (c->num_symbols_ > 0) { + is_new_symbol = VP8GetBit(br, c->probaN_); + } else { + is_new_symbol = 1; + } + } + // Code either the raw value, or the path downward to its node. + if (is_new_symbol) { + s = DecodeSymbol(br, c->num_nodes_); + pos = NewNode(c, s); + } else { + pos = 1; + while (!IsLeaf(c, pos)) { + const Node* const node = &c->nodes_[pos]; + // Did we reach the stopping node? + const int symbol_proba = node->probaS_; + const int is_stop = VP8GetBit(br, symbol_proba); + if (is_stop) { + break; // reached the stopping node for the coded symbol. + } else { + // Not yet done, keep traversing and branching. + if (!HasOnlyRightChild(c, pos)) { + const int left_proba = node->probaL_; + const int is_right = VP8GetBit(br, left_proba); + pos = (pos << 1) | is_right; + } else { + pos <<= 1; + break; + } + assert(pos <= c->num_nodes_); + } + } + s = c->nodes_[pos].symbol_; + assert(pos == SymbolToNode(c, s)); + } + assert(pos <= c->num_nodes_); + UpdateTree(c, pos, 1); + return s; +} + +// ----------------------------------------------------------------------------- + +double TCoderSymbolCost(const TCoder* const c, int symbol) { + const int pos = c->symbols_[symbol]; + assert(c); + assert(symbol >= 0 && symbol < c->num_nodes_); + if (pos != INVALID_POS) { + const Node* const node = &c->nodes_[pos]; + const Count_t count = node->countS_; + assert(count > 0); + assert(c->total_coded_ > 0); + // Note: we use 1 + total_coded_ as denominator because we most probably + // intend to code an extra symbol afterward. + // TODO(skal): is log2() too slow ? + return -log2(count / (1. + c->total_coded_)); + } + return c->symbol_bit_cost_; +} + +// ----------------------------------------------------------------------------- + +#if defined(__cplusplus) || defined(c_plusplus) +} // extern "C" +#endif diff --git a/src/utils/tcoder.h b/src/utils/tcoder.h new file mode 100644 index 00000000..158cb2eb --- /dev/null +++ b/src/utils/tcoder.h @@ -0,0 +1,84 @@ +// Copyright 2011 Google Inc. +// +// This code is licensed under the same terms as WebM: +// Software License Agreement: http://www.webmproject.org/license/software/ +// Additional IP Rights Grant: http://www.webmproject.org/license/additional/ +// ----------------------------------------------------------------------------- +// +// Tree-coder using VP8's boolean coder +// +// Symbols are stored as nodes of a tree that records their frequencies and +// is dynamically updated. +// +// Author: Skal (pascal.massimino@gmail.com) +// +// Encoding example: +/* +static int Compress(const uint8_t* src, int src_length, + uint8_t** output, size_t* output_size) { + int i; + TCoder* coder = TCoderNew(255); + VP8BitWriter bw; + + VP8BitWriterInit(&bw, 0); + for (i = 0; i < src_length; ++i) + TCoderEncode(coder, src[i], &bw); + TCoderDelete(coder); + VP8BitWriterFinish(&bw); + + *output = VP8BitWriterBuf(&bw); + *output_size = VP8BitWriterSize(&bw); + return !bw.error_; +} +*/ +// +// Decoding example: +/* +static int Decompress(const uint8_t* src, size_t src_size, + uint8_t* dst, int dst_length) { + int i; + TCoder* coder = TCoderNew(255); + VP8BitReader br; + + VP8InitBitReader(&br, src, src + src_size); + for (i = 0; i < dst_length; ++i) + dst[i] = TCoderDecode(coder, &br); + TCoderDelete(coder); + return !br.eof_; +} +*/ + +#ifndef WEBP_UTILS_TCODER_H_ +#define WEBP_UTILS_TCODER_H_ + +#if defined(__cplusplus) || defined(c_plusplus) +extern "C" { +#endif + +struct VP8BitReader; +struct VP8BitWriter; +typedef struct TCoder TCoder; + +// Creates a tree-coder capable of coding symbols in +// the [0, max_symbol] range. Returns NULL in case of memory error. +TCoder* TCoderNew(int max_symbol); +// Re-initialize an existing object, make it ready for a new encoding or +// decoding cycle. +void TCoderInit(TCoder* const c); +// destroys the tree-ocder object and frees memory. +void TCoderDelete(TCoder* const c); + +// Code next symbol 's'. If the bit-writer 'bw' is NULL, the function will +// just record the symbol, and update the internal frequency counters. +void TCoderEncode(TCoder* const c, int s, struct VP8BitWriter* const bw); +// Decode and return next symbol. +int TCoderDecode(TCoder* const c, struct VP8BitReader* const br); + +// Theoretical number of bits needed to code 'symbol' in the current state. +double TCoderSymbolCost(const TCoder* const c, int symbol); + +#if defined(__cplusplus) || defined(c_plusplus) +} // extern "C" +#endif + +#endif // WEBP_UTILS_TCODER_H_ diff --git a/src/utils/tcoderi.h b/src/utils/tcoderi.h new file mode 100644 index 00000000..40f25855 --- /dev/null +++ b/src/utils/tcoderi.h @@ -0,0 +1,71 @@ +// Copyright 2011 Google Inc. +// +// This code is licensed under the same terms as WebM: +// Software License Agreement: http://www.webmproject.org/license/software/ +// Additional IP Rights Grant: http://www.webmproject.org/license/additional/ +// ----------------------------------------------------------------------------- +// +// Internal header for tree-coder +// +// Author: Skal (pascal.massimino@gmail.com) +// + +#ifndef WEBP_UTILS_TCODERI_H_ +#define WEBP_UTILS_TCODERI_H_ + +#include "./tcoder.h" + +#include +#include +#include +#include +#include + +#include "../utils/bit_reader.h" +#include "../utils/bit_writer.h" + +#if defined(__cplusplus) || defined(c_plusplus) +extern "C" { +#endif + +typedef int Symbol_t; +typedef uint32_t Count_t; // TODO(skal): check overflow during coding. + +#define INVALID_SYMBOL ((Symbol_t)(-1)) +#define INVALID_POS 0 + +#define MAX_PROBA 255 +#define HALF_PROBA 128 + +typedef struct { // ternary node. + Symbol_t symbol_; + // Note: theoretically, one of this three field is redundant and could be + // omitted, but it'd make the code quite complicated (having to look-up the + // parent's total count in order to deduce the missing field). Better not. + Count_t countS_; // count for symbol + Count_t count_; // count for non-symbol (derived from sub-tree) + int probaL_; // cached left proba = TotalCount(left) / count_ + int probaS_; // cached approximate proba = countS_ / TotalCount +} Node; + +struct TCoder { + // dynamic fields: + int num_symbols_; // number of symbols actually used + Count_t total_coded_; // total number of coded symbols + int frozen_; // if true, frequencies are not updated + int fixed_symbols_; // if true, symbols are not updated + int probaN_; // cached new-symbol probability + + // constants: + int num_nodes_; // max number of symbols or nodes. Constant, > 0. + double symbol_bit_cost_; // latest evaluation of the bit-cost per new symbol + + Node* nodes_; // nodes (1-based indexed) + int* symbols_; // for each symbol, location of its node +}; + +#if defined(__cplusplus) || defined(c_plusplus) +} // extern "C" +#endif + +#endif // WEBP_UTILS_TCODERI_H_