diff --git a/Makefile.vc b/Makefile.vc
index 408b0f6e..76ea1424 100644
--- a/Makefile.vc
+++ b/Makefile.vc
@@ -175,8 +175,11 @@ X_OBJS= \
     $(DIROBJ)\enc\tree.obj \
     $(DIROBJ)\enc\webpenc.obj \
     $(DIROBJ)\mux\mux.obj \
+    $(DIROBJ)\utils\alpha.obj \
     $(DIROBJ)\utils\bit_reader.obj \
     $(DIROBJ)\utils\bit_writer.obj \
+    $(DIROBJ)\utils\quant_levels.obj \
+    $(DIROBJ)\utils\tcoder.obj \
     $(DIROBJ)\utils\thread.obj \
     $(RESOURCE) \
 
diff --git a/makefile.unix b/makefile.unix
index e828ebe6..104c89f0 100644
--- a/makefile.unix
+++ b/makefile.unix
@@ -54,6 +54,8 @@ ARFLAGS = r
 CC = gcc -Isrc/ -Iexamples/ -Wall
 CFLAGS = -O3 -DNDEBUG $(EXTRA_FLAGS)
 INSTALL = install
+GROFF = /usr/bin/groff
+COL = /usr/bin/col
 LDFLAGS = $(EXTRA_LIBS) -lm
 
 DEC_OBJS = src/dec/frame.o src/dec/webp.o src/dec/quant.o src/dec/tree.o \
@@ -68,7 +70,8 @@ DSP_OBJS = src/dsp/cpu.o src/dsp/enc.o \
            src/dsp/enc_sse2.o src/dsp/dec.o src/dsp/dec_sse2.o \
            src/dsp/dec_neon.o src/dsp/upsampling.o src/dsp/upsampling_sse2.o \
            src/dsp/yuv.o
-UTILS_OBJS = src/utils/bit_reader.o src/utils/bit_writer.o src/utils/thread.o
+UTILS_OBJS = src/utils/alpha.o src/utils/bit_reader.o src/utils/bit_writer.o \
+             src/utils/quant_levels.o src/utils/thread.o src/utils/tcoder.o
 
 OBJS = $(DEC_OBJS) $(ENC_OBJS) $(DSP_OBJS) $(UTILS_OBJS)
 
@@ -77,10 +80,13 @@ MUX_OBJS = src/mux/mux.o
 HDRS = src/webp/encode.h src/enc/vp8enci.h src/enc/cost.h src/webp/mux.h \
        src/dec/vp8i.h  \
        src/dsp/yuv.h src/dsp/dsp.h \
-       src/utils/bit_writer.h src/utils/bit_reader.h src/utils/thread.h
+       src/utils/alpha.h src/utils/bit_writer.h src/utils/bit_reader.h \
+       src/utils/thread.h src/utils/tcoder.h
 
-OUTPUT = examples/cwebp examples/dwebp examples/webpmux \
-	 src/libwebp.a src/mux/libwebpmux.a
+OUT_LIBS = src/libwebp.a src/mux/libwebpmux.a
+OUT_EXAMPLES = examples/cwebp examples/dwebp examples/webpmux
+
+OUTPUT = $(OUT_LIBS) $(OUT_EXAMPLES)
 
 all:ex
 
@@ -93,29 +99,29 @@ src/libwebp.a:  $(OBJS)
 src/mux/libwebpmux.a:  $(MUX_OBJS)
 	$(AR) $(ARFLAGS) $@ $^
 
-ex: examples/cwebp examples/dwebp examples/webpmux
+ex: $(OUT_EXAMPLES)
 
 examples/cwebp: examples/cwebp.o src/libwebp.a
 examples/dwebp: examples/dwebp.o src/libwebp.a
 examples/webpmux: examples/webpmux.o src/mux/libwebpmux.a src/libwebp.a
-examples/cwebp examples/dwebp examples/webpmux:
+
+$(OUT_EXAMPLES):
 	$(CC) -o $@ $^ $(LDFLAGS)
 
 dist: DESTDIR := dist
 dist: all
 	$(INSTALL) -m755 -d $(DESTDIR)/include/webp \
-	    $(DESTDIR)/doc $(DESTDIR)/lib
-	$(INSTALL) -m755 -s examples/cwebp examples/dwebp examples/webpmux \
-	    $(DESTDIR)
+             $(DESTDIR)/doc $(DESTDIR)/lib
+	$(INSTALL) -m755 -s $(OUT_EXAMPLES) $(DESTDIR)
 	$(INSTALL) -m644 src/webp/*.h $(DESTDIR)/include/webp
 	$(INSTALL) -m644 src/libwebp.a $(DESTDIR)/lib
 	umask 022; \
 	for m in man/[cd]webp.1; do \
 	  basenam=$$(basename $$m .1); \
-	  /usr/bin/groff -t -e -man -T utf8 $$m \
-	    | col -bx >$(DESTDIR)/doc/$${basenam}.txt; \
-	  /usr/bin/groff -t -e -man -T html $$m \
-	    | col -bx >$(DESTDIR)/doc/$${basenam}.html; \
+	  $(GROFF) -t -e -man -T utf8 $$m \
+	    | $(COL) -bx >$(DESTDIR)/doc/$${basenam}.txt; \
+	  $(GROFF) -t -e -man -T html $$m \
+	    | $(COL) -bx >$(DESTDIR)/doc/$${basenam}.html; \
 	done
 
 clean:
diff --git a/src/utils/Makefile.am b/src/utils/Makefile.am
index bb35c633..e832b390 100644
--- a/src/utils/Makefile.am
+++ b/src/utils/Makefile.am
@@ -1,10 +1,12 @@
 AM_CPPFLAGS = -I$(top_srcdir)/src
 
-libwebputils_la_SOURCES = bit_reader.h bit_reader.c \
+libwebputils_la_SOURCES = alpha.h alpha.c \
+                          bit_reader.h bit_reader.c \
                           bit_writer.h bit_writer.c \
+                          quant_levels.c \
+                          tcoder.h tcoderi.h tcoder.c \
                           thread.h thread.c
 libwebputils_la_LDFLAGS = -version-info 0:0:0
-libwebputils_la_CPPFLAGS = $(USE_EXPERIMENTAL_CODE)
 libwebputilsinclude_HEADERS = ../webp/types.h
 libwebputilsincludedir = $(includedir)/webp
 
diff --git a/src/utils/alpha.c b/src/utils/alpha.c
new file mode 100644
index 00000000..65b30de2
--- /dev/null
+++ b/src/utils/alpha.c
@@ -0,0 +1,432 @@
+// Copyright 2011 Google Inc. All Rights Reserved.
+//
+// This code is licensed under the same terms as WebM:
+//  Software License Agreement:  http://www.webmproject.org/license/software/
+//  Additional IP Rights Grant:  http://www.webmproject.org/license/additional/
+// -----------------------------------------------------------------------------
+//
+// Alpha plane encoding and decoding library.
+//
+// Author: vikasa@google.com (Vikas Arora)
+
+#include <string.h>  // for memcpy()
+#include "./alpha.h"
+
+#include "./bit_reader.h"
+#include "./bit_writer.h"
+#include "./tcoder.h"
+
+#define MAX_SYMBOLS      255
+#define ALPHA_HEADER_LEN 2
+
+// -----------------------------------------------------------------------------
+// Alpha Encode.
+
+static int EncodeIdent(const uint8_t* data, int width, int height,
+                       uint8_t** output, size_t* output_size) {
+  const size_t data_size = height * width;
+  uint8_t* alpha = NULL;
+  assert((output != NULL) && (output_size != NULL));
+
+  if (data == NULL) {
+    return 0;
+  }
+
+  alpha = (uint8_t*)malloc(data_size);
+  if (alpha == NULL) {
+    return 0;
+  }
+  memcpy(alpha, data, data_size);
+  *output_size = data_size;
+  *output = alpha;
+  return 1;
+}
+
+// -----------------------------------------------------------------------------
+// Zlib-like encoding using TCoder
+
+typedef struct {
+  int dist;        // backward distance (=0 means: literal)
+  int literal;     // literal value (if dist = 0)
+  size_t len;      // length of matched string for non-literal
+} Token;
+
+#define MIN_LEN 2
+#define DEFER_SKIP 1      // for deferred evaluation (0 = off)
+
+#define CACHED_COST(coder, c) ((cost_cache[(c)] == 0.) ?             \
+  (cost_cache[(c)] = lit_mode_cost + TCoderSymbolCost((coder), (c))) \
+  : cost_cache[(c)])
+
+// Record symbol
+#define RECORD(TOKEN) {                                       \
+  TCoderEncode(coderd, (TOKEN)->dist, NULL);                  \
+  if ((TOKEN)->dist == 0) {                                   \
+    TCoderEncode(coder, (TOKEN)->literal, NULL);              \
+  } else {                                                    \
+    TCoderEncode(coderl, (TOKEN)->len - MIN_LEN, NULL);       \
+  }                                                           \
+}
+
+static size_t GetLongestMatch(const uint8_t* const data,
+                           const uint8_t* const ref, size_t max_len) {
+  size_t n;
+  for (n = 0; n < max_len && (data[n] == ref[n]); ++n) { /* do nothing */ }
+  return n;
+}
+
+static int EncodeZlibTCoder(uint8_t* data, int width, int height,
+                            uint8_t** output, size_t* output_size) {
+  int ok = 0;
+  const size_t data_size = width * height;
+  const size_t MAX_DIST = 3 * width;
+  const size_t MAX_LEN = 2 * width;
+  Token* const msg = (Token*)malloc(data_size * sizeof(*msg));
+  int num_tokens;
+  TCoder* const coder = TCoderNew(MAX_SYMBOLS);
+  TCoder* const coderd = TCoderNew(MAX_DIST);
+  TCoder* const coderl = TCoderNew(MAX_LEN - MIN_LEN);
+
+  if (coder == NULL || coderd == NULL || coderl == NULL) {
+    goto End;
+  }
+  if (msg == NULL) {
+    goto End;
+  }
+
+  {
+    int deferred_eval = 0;
+    size_t n = 0;
+    num_tokens = 0;
+    while (n < data_size) {
+      const double lit_mode_cost = TCoderSymbolCost(coderd, 0);
+      double cost_cache[MAX_SYMBOLS + 1] = { 0. };
+      Token best;
+      size_t dist = 0;
+      double best_cost = CACHED_COST(coder, data[n]);
+      size_t max_len = MAX_LEN;
+      if (max_len > data_size - n) {
+        max_len = data_size - n;
+      }
+      best.dist = 0;
+      best.literal = data[n];
+      best.len = 1;
+      for (dist = 1; dist <= MAX_DIST && dist <= n; ++dist) {
+        const int pos = n - dist;
+        const size_t min_len = best.len - 1;
+        size_t len;
+
+        // Early out: we probe at two locations for a quick match check
+        if (data[pos] != data[n] ||
+            data[pos + min_len] != data[n + min_len]) {
+          continue;
+        }
+
+        len = GetLongestMatch(data + pos, data + n, max_len);
+        if (len >= MIN_LEN && len >= best.len) {
+          // This is the cost of the coding proposal
+          const double cost = TCoderSymbolCost(coderl, len - MIN_LEN)
+                            + TCoderSymbolCost(coderd, dist);
+          // We're gaining an extra len-best.len coded message over the last
+          // known best. Compute how this would have cost if coded all literal.
+          // (TODO: we shoud fully re-evaluate at position best.len and not
+          // assume all is going be coded as literals. But it's at least an
+          // upper-bound (worst-case coding). Deferred evaluation usd below
+          // partially addresses this.
+          double lit_cost = 0;
+          size_t i;
+          for (i = best.len; i < len; ++i) {
+            lit_cost += CACHED_COST(coder, data[n + i]);
+          }
+          // So, is it worth ?
+          if (best_cost + lit_cost >= cost) {
+            best_cost = cost;
+            best.len = len;
+            best.dist = dist;
+          }
+        }
+        if (len >= MAX_LEN) {
+          break;  // No need to search further. We already got a max-long match
+        }
+      }
+      // Deferred evaluation: before finalizing a choice we try to find
+      // best cost at position n + 1 and see if we get a longer
+      // match then current best. If so, we transform the current match
+      // into a literal, go to position n + 1, and try again.
+      {
+        Token* cur = &msg[num_tokens];
+        int forget = 0;
+        if (deferred_eval) {
+          --cur;
+          // If the next match isn't longer, keep previous match
+          if (best.len <= cur->len) {
+            deferred_eval = 0;
+            n += cur->len - DEFER_SKIP;
+            forget = 1;   // forget the new match
+            RECORD(cur)
+          } else {   // else transform previous match into a shorter one
+            cur->len = DEFER_SKIP;
+            if (DEFER_SKIP == 1) {
+              cur->dist = 0;    // literal
+            }
+            // TODO(later): RECORD() macro should be changed to take an extra
+            // "is_final" param, so that we could write the bitstream at once.
+            RECORD(cur)
+            ++cur;
+          }
+        }
+        if (!forget) {
+          *cur = best;
+          ++num_tokens;
+          if (DEFER_SKIP > 0) {
+            deferred_eval = (cur->len > 2) && (cur->len < MAX_LEN / 2);
+          }
+          if (deferred_eval) {
+            // will probe at a later position before finalizing.
+            n += DEFER_SKIP;
+          } else {
+            // Keep the current choice.
+            n += cur->len;
+            RECORD(cur)
+          }
+        }
+      }
+    }
+  }
+
+  // Final bitstream assembly.
+  {
+    int n;
+    VP8BitWriter bw;
+    VP8BitWriterInit(&bw, 0);
+    TCoderInit(coder);
+    TCoderInit(coderd);
+    TCoderInit(coderl);
+    for (n = 0; n < num_tokens; ++n) {
+      const Token* const t = &msg[n];
+      const int is_literal = (t->dist == 0);
+      TCoderEncode(coderd, t->dist, &bw);
+      if (is_literal) {  // literal
+        TCoderEncode(coder, t->literal, &bw);
+      } else {
+        TCoderEncode(coderl, t->len - MIN_LEN, &bw);
+      }
+    }
+
+    // clean up
+    VP8BitWriterFinish(&bw);
+    *output = VP8BitWriterBuf(&bw);
+    *output_size = VP8BitWriterSize(&bw);
+    ok = 1;
+  }
+
+ End:
+  if (coder) TCoderDelete(coder);
+  if (coderl) TCoderDelete(coderl);
+  if (coderd) TCoderDelete(coderd);
+  free(msg);
+  return ok;
+}
+
+// -----------------------------------------------------------------------------
+
+int EncodeAlpha(const uint8_t* data, int width, int height, int stride,
+                int quality, int method,
+                uint8_t** output, size_t* output_size) {
+  const int kMaxImageDim = (1 << 14) - 1;
+  uint8_t* compressed_alpha = NULL;
+  uint8_t* quant_alpha = NULL;
+  uint8_t* out = NULL;
+  size_t compressed_size = 0;
+  size_t data_size = height * width;
+  float mse = 0.0;
+  int ok = 0;
+  int h;
+
+  if ((data == NULL) || (output == NULL) || (output_size == NULL)) {
+    return 0;
+  }
+
+  if (width <= 0 || width > kMaxImageDim ||
+      height <= 0 || height > kMaxImageDim || stride < width) {
+    return 0;
+  }
+
+  if (quality < 0 || quality > 100) {
+    return 0;
+  }
+
+  if (method < 0 || method > 1) {
+    return 0;
+  }
+
+  quant_alpha = (uint8_t*)malloc(data_size);
+  if (quant_alpha == NULL) {
+    return 0;
+  }
+
+  // Extract the alpha data (WidthXHeight) from raw_data (StrideXHeight).
+  for (h = 0; h < height; ++h) {
+    memcpy(quant_alpha + h * width, data + h * stride, width * sizeof(*data));
+  }
+
+  if (quality < 100) {  // No Quantization required for 'quality = 100'.
+    // 16 Alpha levels gives quite a low MSE w.r.t Original Alpha plane hence
+    // mapped to moderate quality 70. Hence Quality:[0, 70] -> Levels:[2, 16]
+    // and Quality:]70, 100] -> Levels:]16, 256].
+    const int alpha_levels = (quality <= 70) ?
+                             2 + quality / 5 :
+                             16 + (quality - 70) * 8;
+
+    ok = QuantizeLevels(quant_alpha, width, height, alpha_levels, &mse);
+    if (!ok) {
+      free(quant_alpha);
+      return 0;
+    }
+  }
+
+  if (method == 0) {
+    ok = EncodeIdent(quant_alpha, width, height,
+                     &compressed_alpha, &compressed_size);
+  } else if (method == 1) {
+    ok = EncodeZlibTCoder(quant_alpha, width, height,
+                          &compressed_alpha, &compressed_size);
+  }
+
+  free(quant_alpha);
+  if (!ok) {
+    return 0;
+  }
+
+  out = (uint8_t*)malloc(compressed_size + ALPHA_HEADER_LEN);
+  if (out == NULL) {
+    free(compressed_alpha);
+    return 0;
+  } else {
+    *output = out;
+  }
+
+  // Alpha bit-stream Header:
+  // Byte0: Compression Method.
+  // Byte1: Reserved for later extension.
+  out[0] = method & 0xff;
+  out[1] = 0;  // Reserved Byte.
+  out += ALPHA_HEADER_LEN;
+  memcpy(out, compressed_alpha, compressed_size);
+  free(compressed_alpha);
+  out += compressed_size;
+
+  *output_size = out - *output;
+
+  return 1;
+}
+
+// -----------------------------------------------------------------------------
+// Alpha Decode.
+
+static int DecodeIdent(const uint8_t* data, size_t data_size,
+                       uint8_t* output) {
+  assert((data != NULL) && (output != NULL));
+  memcpy(output, data, data_size);
+  return 1;
+}
+
+static int DecompressZlibTCoder(const uint8_t* data, size_t data_size,
+                                int width, int height,
+                                uint8_t* output, size_t output_size) {
+  int ok = 1;
+  const size_t MAX_DIST = 3 * width;
+  const size_t MAX_LEN = 2 * width;
+  TCoder* const coder = TCoderNew(MAX_SYMBOLS);
+  TCoder* const coderd = TCoderNew(MAX_DIST);
+  TCoder* const coderl = TCoderNew(MAX_LEN - MIN_LEN);
+
+  if (coder == NULL || coderd == NULL || coderl == NULL) {
+    goto End;
+  }
+  (void)height;     // unused parameter
+
+  {
+    size_t pos = 0;
+    VP8BitReader br;
+    VP8InitBitReader(&br, data, data + data_size);
+    while (pos < output_size) {
+      const int dist = TCoderDecode(coderd, &br);
+      if (dist == 0) {
+        const int literal = TCoderDecode(coder, &br);
+        output[pos] = literal;
+        ++pos;
+      } else {
+        const int len = MIN_LEN + TCoderDecode(coderl, &br);
+        int k;
+        if (pos + len > output_size) goto End;
+        for (k = 0; k < len; ++k) {
+          output[pos + k] = output[pos + k - dist];
+        }
+        pos += len;
+      }
+    }
+  }
+  ok = 1;
+
+ End:
+  if (coder) TCoderDelete(coder);
+  if (coderl) TCoderDelete(coderl);
+  if (coderd) TCoderDelete(coderd);
+  return ok;
+}
+
+// -----------------------------------------------------------------------------
+
+int DecodeAlpha(const uint8_t* data, size_t data_size,
+                int width, int height, int stride,
+                uint8_t* output) {
+  uint8_t* decoded_data = NULL;
+  int ok = 0;
+  int method;
+  size_t decoded_size = height * width;
+
+  if (data == NULL || output == NULL) {
+    return 0;
+  }
+
+  if (data_size <= ALPHA_HEADER_LEN) {
+    return 0;
+  }
+
+  if (width <= 0 || height <= 0 || stride < width) {
+    return 0;
+  }
+
+  method = data[0];
+  if (method < 0 || method > 1) {
+    return 0;
+  }
+
+  decoded_data = (uint8_t*)malloc(decoded_size);
+  if (decoded_data == NULL) {
+    return 0;
+  }
+
+  data_size -= ALPHA_HEADER_LEN;
+  data += ALPHA_HEADER_LEN;
+
+  if (method == 0) {
+    ok = DecodeIdent(data, data_size, decoded_data);
+  } else if (method == 1) {
+    ok = DecompressZlibTCoder(data, data_size, width, height,
+                              decoded_data, decoded_size);
+  }
+
+  if (ok) {
+    // Construct raw_data (HeightXStride) from the alpha data (HeightXWidth).
+    int h;
+    for (h = 0; h < height; ++h) {
+      memcpy(output + h * stride, decoded_data + h * width,
+             width * sizeof(*data));
+    }
+  }
+  free(decoded_data);
+
+  return ok;
+}
diff --git a/src/utils/alpha.h b/src/utils/alpha.h
new file mode 100644
index 00000000..e0df9cc2
--- /dev/null
+++ b/src/utils/alpha.h
@@ -0,0 +1,68 @@
+// Copyright 2011 Google Inc.
+//
+// This code is licensed under the same terms as WebM:
+//  Software License Agreement:  http://www.webmproject.org/license/software/
+//  Additional IP Rights Grant:  http://www.webmproject.org/license/additional/
+// -----------------------------------------------------------------------------
+//
+// Alpha plane encoding and decoding library.
+//
+// Author: vikasa@google.com (Vikas Arora)
+
+#ifndef WEBP_UTILS_ALPHA_H_
+#define WEBP_UTILS_ALPHA_H_
+
+#include <stdlib.h>
+
+#include "../webp/types.h"
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+// Encodes the given Alpha data 'data' of size 'stride'x'height' via specified
+// compression method 'method'. The pre-processing (Quantization) is
+// performed if 'quality' is less than 100. For such cases, the encoding is
+// lossy. Valid ranges for 'quality' is [0, 100] and 'method' is [0, 2]:
+//   'method = 0' - No compression;
+//   'method = 1' - zlib;
+// 'output' corresponds to the buffer containing compressed Alpha data.
+//          This buffer is allocated by this method and caller should call
+//          free(*output) when done.
+// 'output_size' corresponds to size of this compressed Alpha buffer.
+//
+// Returns 1 on successfully encoding the Alpha and
+//         0 if either:
+//           data, output or output_size is NULL, or
+//           inappropriate width, height or stride, or
+//           invalid quality or method, or
+//           Memory allocation for the compressed data fails.
+
+int EncodeAlpha(const uint8_t* data, int width, int height, int stride,
+                int quality, int method,
+                uint8_t** output, size_t* output_size);
+
+// Decodes the compressed data 'data' of size 'data_size' into the 'output'.
+// The 'output' buffer should be pre-alloacated and must be of the same
+// dimension 'height'x'stride', as that of the image.
+//
+// Returns 1 on successfully decoding the compressed Alpha and
+//         0 if either:
+//           data or output is NULL, or
+//           Error in bit-stream header (invalid compression mode or qbits), or
+//           Error returned by approppriate compression method.
+int DecodeAlpha(const uint8_t* data, size_t data_size,
+                int width, int height, int stride, uint8_t* output);
+
+// Replace the input 'data' of size 'width'x'height' with 'num-levels'
+// quantized values. If not NULL, 'mse' will contain the mean-squared error.
+// Valid range for 'num_levels' is [2, 256].
+// Returns false in case of error (data is NULL, or parameters are invalid).
+int QuantizeLevels(uint8_t* data, int width, int height, int num_levels,
+                   float* mse);
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}    // extern "C"
+#endif
+
+#endif  /* WEBP_UTILS_ALPHA_H_ */
diff --git a/src/utils/quant_levels.c b/src/utils/quant_levels.c
new file mode 100644
index 00000000..89257d7b
--- /dev/null
+++ b/src/utils/quant_levels.c
@@ -0,0 +1,143 @@
+// Copyright 2011 Google Inc. All Rights Reserved.
+//
+// This code is licensed under the same terms as WebM:
+//  Software License Agreement:  http://www.webmproject.org/license/software/
+//  Additional IP Rights Grant:  http://www.webmproject.org/license/additional/
+// -----------------------------------------------------------------------------
+//
+// Quantize levels for specified number of quantization-levels ([2, 256]).
+// Min and max values are preserved (usual 0 and 255 for alpha plane).
+//
+// Author: skal@google.com (Pascal Massimino)
+
+#include <assert.h>
+#include <math.h>    // for sqrt()
+
+#include "./alpha.h"
+
+#define NUM_SYMBOLS     256
+
+#define MAX_ITER  6             // Maximum number of convergence steps.
+#define ERROR_THRESHOLD 1e-4    // MSE stopping criterion.
+
+// -----------------------------------------------------------------------------
+// Quantize levels.
+
+int QuantizeLevels(uint8_t* data, int width, int height,
+                   int num_levels, float* mse) {
+  int freq[NUM_SYMBOLS] = { 0 };
+  int q_level[NUM_SYMBOLS] = { 0 };
+  double inv_q_level[NUM_SYMBOLS] = { 0 };
+  int min_s = 255, max_s = 0;
+  const size_t data_size = height * width;
+  size_t n = 0;
+  int s, num_levels_in, iter;
+  double last_err = 1.e38, err = 0.;
+
+  if (data == NULL) {
+    return 0;
+  }
+
+  if (width <= 0 || height <= 0) {
+    return 0;
+  }
+
+  if (num_levels < 2 || num_levels > 256) {
+    return 0;
+  }
+
+  num_levels_in = 0;
+  for (n = 0; n < data_size; ++n) {
+    num_levels_in += (freq[data[n]] == 0);
+    if (min_s > data[n]) min_s = data[n];
+    if (max_s < data[n]) max_s = data[n];
+    ++freq[data[n]];
+  }
+
+  if (num_levels_in <= num_levels) {
+    if (mse) *mse = 0.;
+    return 1;   // nothing to do !
+  }
+
+  // Start with uniformly spread centroids.
+  for (s = 0; s < num_levels; ++s) {
+    inv_q_level[s] = min_s + (double)(max_s - min_s) * s / (num_levels - 1);
+  }
+
+  // Fixed values. Won't be changed.
+  q_level[min_s] = 0;
+  q_level[max_s] = num_levels - 1;
+  assert(inv_q_level[0] == min_s);
+  assert(inv_q_level[num_levels - 1] == max_s);
+
+  // k-Means iterations.
+  for (iter = 0; iter < MAX_ITER; ++iter) {
+    double err_count;
+    double q_sum[NUM_SYMBOLS] = { 0 };
+    double q_count[NUM_SYMBOLS] = { 0 };
+    int slot = 0;
+
+    // Assign classes to representatives.
+    for (s = min_s; s <= max_s; ++s) {
+      // Keep track of the nearest neighbour 'slot'
+      while (slot < num_levels - 1 &&
+             2 * s > inv_q_level[slot] + inv_q_level[slot + 1]) {
+        ++slot;
+      }
+      if (freq[s] > 0) {
+        q_sum[slot] += s * freq[s];
+        q_count[slot] += freq[s];
+      }
+      q_level[s] = slot;
+    }
+
+    // Assign new representatives to classes.
+    if (num_levels > 2) {
+      for (slot = 1; slot < num_levels - 1; ++slot) {
+        const double count = q_count[slot];
+        if (count > 0.) {
+          inv_q_level[slot] = q_sum[slot] / count;
+        }
+      }
+    }
+
+    // Compute convergence error.
+    err = 0.;
+    err_count = 0.;
+    for (s = min_s; s <= max_s; ++s) {
+      const double error = s - inv_q_level[q_level[s]];
+      err += freq[s] * error * error;
+      err_count += freq[s];
+    }
+    if (err_count > 0.) err /= err_count;
+
+    // Check for convergence: we stop as soon as the error is no
+    // longer improving.
+    if (last_err - err < ERROR_THRESHOLD) break;
+    last_err = err;
+  }
+
+  // Remap the alpha plane to quantized values.
+  {
+    // double->int rounding operation can be costly, so we do it
+    // once for all before remaping. We also perform the data[] -> slot
+    // mapping, while at it (avoid one indirection in the final loop).
+    uint8_t map[NUM_SYMBOLS];
+    int s;
+    for (s = min_s; s <= max_s; ++s) {
+      const int slot = q_level[s];
+      map[s] = (uint8_t)(inv_q_level[slot] + .5);
+    }
+    // Final pass.
+    for (n = 0; n < data_size; ++n) {
+      data[n] = map[data[n]];
+    }
+  }
+
+  // Compute final mean squared error if needed.
+  if (mse) {
+    *mse = sqrt(err);
+  }
+
+  return 1;
+}
diff --git a/src/utils/tcoder.c b/src/utils/tcoder.c
new file mode 100644
index 00000000..45fcc8a3
--- /dev/null
+++ b/src/utils/tcoder.c
@@ -0,0 +1,460 @@
+// Copyright 2011 Google Inc.
+//
+// This code is licensed under the same terms as WebM:
+//  Software License Agreement:  http://www.webmproject.org/license/software/
+//  Additional IP Rights Grant:  http://www.webmproject.org/license/additional/
+// -----------------------------------------------------------------------------
+//
+// Tree-coder using VP8's boolean coder
+//
+// Author: Skal (pascal.massimino@gmail.com)
+//
+// Rationale:
+//   We extend the boolean (binary) coder to handle arbitrary-sized alphabets,
+// and not just binary ones.
+// We dynamically maintain the population count and use the locally-optimal
+// probability distribution for coding every symbol. Every symbol can be
+// coded using _any_ binary tree. The boolean coder would traverse it and
+// branch each nodes left and right with the accumulated probability.
+//
+// E.g. with 3 symbols A, B, C already coded 30, 50 and 120 times respectively:
+//
+/*  Root Node #0 (count=30+50+120=200)
+    |  \
+    |   A (count=30)
+   Inner-Node #1 (count=50+120=170)
+    | \
+    |  C (count=120)
+    B (count=50)
+*/
+// If the next symbol to code is "C", we'll first code '0' with probability
+// p0 = 170/200 (which is the probability of taking the left branch at the
+// Root Node #0) and then code '1' with a probability p1 = 120/170 (which
+// is the probability of taking the right branch at the Inner-Node #1). The
+// total probability p0 * p1  = 120 / 200 is the correct one for symbol 'C'
+// (up to small rounding differences in the boolean coder).
+// The alphabet could be coded with _any_ tree, provided the count at the
+// inner nodes are updated appropriately. Put otherwise, the binary tree
+// is only used to efficiently update the frequency counts in O(ln(N)) time
+// instead of O(N).
+// For instance, we could use the equivalent tree:
+/*  Root (count=200)
+     | \
+     |  C (count=120)
+    Inner (count=50+30=80)
+     |  \
+     |   B (count=50)
+     A (count=30)
+*/
+// The frequency distribution would still be respected when coding the symbols.
+// But! There's a noticeable difference: it only takes _one_ call to VP8PutBit()
+// when coding the letter 'C' (with probability 120/200), which is the most
+// frequent symbol. This has an impact on speed, considering that each call
+// to VP8PutBit/VP8GetBit is costly. Hence, in order to minimize the number
+// of binary coding, the frequent symbol should be up in the tree.
+// Using Huffman tree is a solution, but the management and updating can be
+// quite complicated. Here we opt for a simpler option: we use _ternary_
+// tree instead, where each inner node can be associated with a symbol, in
+// addition to the regular left/right branches. When we traverse down
+// the tree, a stop bit is used to signal whether the traversal is finished
+// or not. Its probability is proportional to the frequency with which the
+// node's symbol has been seen (see probaS_). If the traversal is not
+// finished, we keep branching right or left according with a probability
+// proportional to each branch's use count (see probaL_).
+// When a symbol is seen more frequently than its parent, we simply
+// exchange the two symbols without changing the tree structure or the
+// left/right branches.
+// Hence, both tree examples above can be coded using this ternary tree:
+/*       Root #0 (count=200)
+         / | \
+        /  C  \
+    Node #1   Node #2
+    / | \     / | \
+   x  A  x   x  B  x        <- where 'x' means un-assigned branches.
+*/
+// Here, if the symbol 'A' becomes more frequent afterward, we'll just swap it
+// with 'C' (cf ExchangeSymbol()) without reorganizing the tree.
+//
+// Using this simple maintainance, we obverved a typical 10-20% reduction
+// in the number of calls to VP8PutBit(), leading to 3-5% speed gain.
+//
+
+#include "./tcoderi.h"
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+#ifdef _MSC_VER
+static double log2(double d) {
+  const double kLog2Reciprocal = 1.442695040888963;
+  return log(d) * kLog2Reciprocal;
+}
+#endif
+
+// For code=00001xxx..., returns the position of the leftmost leading '1' bit.
+static WEBP_INLINE int CodeLength(int code) {
+  int length = 0;
+  if (code > 0) {
+    while ((code >> length) != 1) ++length;
+  }
+  return length;
+}
+
+// -----------------------------------------------------------------------------
+
+TCoder* TCoderNew(int max_symbol) {
+  const int num_nodes = max_symbol + 1;
+  TCoder* c;
+  uint8_t* memory;
+  const int size = sizeof(*c)
+                 + num_nodes * sizeof(*c->nodes_)
+                 + num_nodes * sizeof(*c->symbols_);
+  if (max_symbol < 0) return NULL;
+  memory = (uint8_t*)malloc(size);
+  if (memory == NULL) return NULL;
+
+  c = (TCoder*)memory;
+  memory += sizeof(*c);
+  c->nodes_ = (Node*)memory - 1;
+  memory += num_nodes * sizeof(*c->nodes_);
+  c->symbols_ = (int*)memory;
+
+  c->num_nodes_ = num_nodes;
+  c->frozen_ = 0;
+
+  TCoderInit(c);
+  return c;
+}
+
+static WEBP_INLINE void ResetNode(Node* const node, Symbol_t symbol) {
+  assert(node);
+  node->countS_ = (Count_t)0;
+  node->count_  = (Count_t)0;
+  node->probaS_ = HALF_PROBA;
+  node->probaL_ = HALF_PROBA;
+  node->symbol_ = symbol;
+}
+
+// Wipe the tree clean.
+static void ResetTree(TCoder* const c) {
+  int pos;
+  assert(c);
+  c->num_symbols_ = 0;
+  c->total_coded_ = 0;
+  c->probaN_ = HALF_PROBA;
+  for (pos = 1; pos <= c->num_nodes_; ++pos) {
+    ResetNode(&c->nodes_[pos], INVALID_SYMBOL);
+  }
+  c->fixed_symbols_ = 0;
+  c->symbol_bit_cost_ = 5 + CodeLength(c->num_nodes_);
+}
+
+static void ResetSymbolMap(TCoder* const c) {
+  Symbol_t s;
+  assert(c);
+  c->num_symbols_ = 0;
+  c->probaN_ = HALF_PROBA;
+  for (s = 0; s < c->num_nodes_; ++s) {
+    c->symbols_[s] = INVALID_POS;
+  }
+}
+
+void TCoderInit(TCoder* const c) {
+  assert(c);
+  if (!c->frozen_) {      // Reset counters
+    ResetTree(c);
+    ResetSymbolMap(c);
+  }
+}
+
+void TCoderDelete(TCoder* const c) {
+  free(c);
+}
+
+// -----------------------------------------------------------------------------
+// Tree utils around nodes
+
+// Total number of visits on this nodes
+static WEBP_INLINE Count_t TotalCount(const Node* const n) {
+  return n->countS_ + n->count_;
+}
+
+// Returns true if node has no child.
+static WEBP_INLINE int IsLeaf(const TCoder* const c, int pos) {
+  return (2 * pos > c->num_symbols_);
+}
+
+// Returns true if node has no child.
+static WEBP_INLINE int HasOnlyRightChild(const TCoder* const c, int pos) {
+  return (2 * pos == c->num_symbols_);
+}
+
+// -----------------------------------------------------------------------------
+// Node management
+
+static int NewNode(TCoder* const c, int s) {
+  // For an initial new symbol position, we pick the slot that is the
+  // closest to the top of the tree. It shortens the paths' length.
+  const int pos = 1 + c->num_symbols_;
+  assert(c);
+  assert(c->num_symbols_ < c->num_nodes_);
+  c->symbols_[s] = pos;
+  ResetNode(&c->nodes_[pos], s);
+  ++c->num_symbols_;
+  return pos;
+}
+
+// trivial method, mainly for debug
+static WEBP_INLINE int SymbolToNode(const TCoder* const c, int s) {
+  const int pos = c->symbols_[s];
+  assert(s >= 0 && s < c->num_nodes_ && s != INVALID_SYMBOL);
+  assert(pos != INVALID_POS);
+  assert(c->nodes_[pos].symbol_ == s);
+  return pos;
+}
+
+#define SWAP(T, a, b) do {  \
+  const T tmp = (a);        \
+  (a) = (b);                \
+  (b) = tmp;                \
+} while (0)
+
+// Make child symbol bubble up one level
+static void ExchangeSymbol(const TCoder* const c, const int pos) {
+  const int parent = pos >> 1;
+  Node* const node0 = &c->nodes_[parent];   // parent node
+  Node* const node1 = &c->nodes_[pos];      // child node
+  const Symbol_t S0 = node0->symbol_;
+  const Symbol_t S1 = node1->symbol_;
+  c->symbols_[S1] = parent;
+  c->symbols_[S0] = pos;
+  assert(node1->countS_ >= node0->countS_);
+  node0->count_ -= (node1->countS_ - node0->countS_);
+  assert(node0->count_ > 0);
+  SWAP(Count_t,  node0->countS_, node1->countS_);
+  SWAP(Symbol_t, node0->symbol_, node1->symbol_);
+  // Note: probaL_ and probaS_ are recomputed. No need to SWAP them.
+}
+#undef SWAP
+
+// -----------------------------------------------------------------------------
+// probability computation
+
+static WEBP_INLINE int CalcProba(Count_t num, Count_t total,
+                                 int max_proba, int round) {
+  int p;
+  assert(total > 0);
+  p = (num * max_proba + round) / total;
+  assert(p >= 0 && p <= MAX_PROBA);
+  return MAX_PROBA - p;
+}
+
+static WEBP_INLINE void UpdateNodeProbas(TCoder* const c, int pos) {
+  Node* const node = &c->nodes_[pos];
+  const Count_t total = TotalCount(node);
+  node->probaS_ = CalcProba(node->countS_, total, MAX_PROBA, 0);
+  if (!IsLeaf(c, pos)) {
+    const Count_t total_count = node->count_;
+    const Count_t left_count = TotalCount(&c->nodes_[2 * pos]);
+    node->probaL_ =
+        MAX_PROBA - CalcProba(left_count, total_count, MAX_PROBA, 0);
+  }
+}
+
+static void UpdateProbas(TCoder* const c, int pos) {
+  for ( ; pos >= 1; pos >>= 1) {
+    UpdateNodeProbas(c, pos);
+  }
+  c->probaN_ = CalcProba(c->num_symbols_, c->total_coded_, HALF_PROBA - 1, 0);
+}
+
+// -----------------------------------------------------------------------------
+
+static void UpdateTree(TCoder* const c, int pos, Count_t incr) {
+  Node* node = &c->nodes_[pos];
+  const int is_fresh_new_symbol = (node->countS_ == 0);
+  assert(c);
+  assert(pos >= 1 && pos <= c->num_nodes_);
+  assert(node->symbol_ != INVALID_SYMBOL);
+  if (!c->frozen_ || is_fresh_new_symbol) {
+    const int starting_pos = pos;   // save for later
+    // Update the counters up the tree, possibly exchanging some nodes
+    node->countS_ += incr;
+    while (pos > 1) {
+      Node* const parent = &c->nodes_[pos >> 1];
+      parent->count_ += incr;
+      if (parent->countS_ < node->countS_) {
+        ExchangeSymbol(c, pos);
+      }
+      pos >>= 1;
+      node = parent;
+    }
+    c->total_coded_ += incr;
+    UpdateProbas(c, starting_pos);  // Update the probas along the modified path
+  }
+}
+
+// -----------------------------------------------------------------------------
+// Fixed-length symbol coding
+// Note: the symbol will be coded exactly once at most, so using a fixed length
+// code is better than Golomb-code (e.g.) on average.
+
+// We use the exact bit-distribution probability considering the upper-bound
+// supplied:
+//  Written in binary, a symbol 's' has a probability of having its k-th bit
+// set to 1 which is given by:
+//  If the k-th bit of max_value is 0:
+//    P0(k) = [(max_value >> (k + 1)) << k] / max_value
+//  If the k-th bit of max_value is 1:
+//    P1(k) = P0(k) + [max_value & ((1 << k) - 1)] / max_value
+
+static WEBP_INLINE void CodeSymbol(VP8BitWriter* const bw, int s,
+                                   int max_value) {
+  int i, up = 1;
+  assert(bw);
+  for (i = 0; up < max_value; up <<= 1, ++i) {
+    int den = (max_value >> 1) & ~(up - 1);
+    if (max_value & up) den |= max_value & (up - 1);
+    VP8PutBit(bw, (s >> i) & 1, MAX_PROBA -  MAX_PROBA * den / max_value);
+  }
+}
+
+static WEBP_INLINE int DecodeSymbol(VP8BitReader* const br, int max_value) {
+  int i, up = 1, v = 0;
+  assert(br);
+  for (i = 0; up < max_value; ++i) {
+    int den = (max_value >> 1) & ~(up - 1);
+    if (max_value & up) den |= max_value & (up - 1);
+    v |= VP8GetBit(br, MAX_PROBA -  MAX_PROBA * den / max_value) << i;
+    up <<= 1;
+  }
+  return v;
+}
+
+// -----------------------------------------------------------------------------
+// Encoding
+
+void TCoderEncode(TCoder* const c, int s, VP8BitWriter* const bw) {
+  int pos;
+  const int is_new_symbol = (c->symbols_[s] == INVALID_POS);
+  assert(c);
+  if (!c->fixed_symbols_ && c->num_symbols_ < c->num_nodes_) {
+    if (c->num_symbols_ > 0) {
+      if (bw != NULL) {
+        VP8PutBit(bw, is_new_symbol, c->probaN_);
+      }
+    } else {
+      assert(is_new_symbol);
+    }
+  } else {
+    assert(!is_new_symbol);
+  }
+  if (is_new_symbol) {
+    if (bw != NULL) {
+      CodeSymbol(bw, s, c->num_nodes_);
+    }
+    pos = NewNode(c, s);
+  } else {
+    pos = SymbolToNode(c, s);
+    if (bw != NULL) {
+      const int length = CodeLength(pos);
+      int parent = 1;
+      int i;
+      for (i = 0; !IsLeaf(c, parent); ++i) {
+        const Node* const node = &c->nodes_[parent];
+        const int symbol_proba = node->probaS_;
+        const int is_stop = (i == length);
+        if (VP8PutBit(bw, is_stop, symbol_proba)) {
+          break;
+        } else if (!HasOnlyRightChild(c, parent)) {
+          const int left_proba = node->probaL_;
+          const int is_right = (pos >> (length - 1 - i)) & 1;  // extract bits #i
+          VP8PutBit(bw, is_right, left_proba);
+          parent = (parent << 1) | is_right;
+        } else {
+          parent <<= 1;
+          break;
+        }
+      }
+      assert(parent == pos);
+    }
+  }
+  UpdateTree(c, pos, 1);
+}
+
+// -----------------------------------------------------------------------------
+// Decoding
+
+int TCoderDecode(TCoder* const c, VP8BitReader* const br) {
+  int s;
+  int pos;
+  int is_new_symbol = 0;
+  assert(c);
+  assert(br);
+  // Check if we need to transmit the new symbol's value
+  if (!c->fixed_symbols_ && c->num_symbols_ < c->num_nodes_) {
+    if (c->num_symbols_ > 0) {
+      is_new_symbol = VP8GetBit(br, c->probaN_);
+    } else {
+      is_new_symbol = 1;
+    }
+  }
+  // Code either the raw value, or the path downward to its node.
+  if (is_new_symbol) {
+    s = DecodeSymbol(br, c->num_nodes_);
+    pos = NewNode(c, s);
+  } else {
+    pos = 1;
+    while (!IsLeaf(c, pos)) {
+      const Node* const node = &c->nodes_[pos];
+      // Did we reach the stopping node?
+      const int symbol_proba = node->probaS_;
+      const int is_stop = VP8GetBit(br, symbol_proba);
+      if (is_stop) {
+        break;  // reached the stopping node for the coded symbol.
+      } else {
+        // Not yet done, keep traversing and branching.
+        if (!HasOnlyRightChild(c, pos)) {
+          const int left_proba = node->probaL_;
+          const int is_right = VP8GetBit(br, left_proba);
+          pos = (pos << 1) | is_right;
+        } else {
+          pos <<= 1;
+          break;
+        }
+        assert(pos <= c->num_nodes_);
+      }
+    }
+    s = c->nodes_[pos].symbol_;
+    assert(pos == SymbolToNode(c, s));
+  }
+  assert(pos <= c->num_nodes_);
+  UpdateTree(c, pos, 1);
+  return s;
+}
+
+// -----------------------------------------------------------------------------
+
+double TCoderSymbolCost(const TCoder* const c, int symbol) {
+  const int pos = c->symbols_[symbol];
+  assert(c);
+  assert(symbol >= 0 && symbol < c->num_nodes_);
+  if (pos != INVALID_POS) {
+    const Node* const node = &c->nodes_[pos];
+    const Count_t count = node->countS_;
+    assert(count > 0);
+    assert(c->total_coded_ > 0);
+    // Note: we use 1 + total_coded_ as denominator because we most probably
+    // intend to code an extra symbol afterward.
+    // TODO(skal): is log2() too slow ?
+    return -log2(count / (1. + c->total_coded_));
+  }
+  return c->symbol_bit_cost_;
+}
+
+// -----------------------------------------------------------------------------
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}    // extern "C"
+#endif
diff --git a/src/utils/tcoder.h b/src/utils/tcoder.h
new file mode 100644
index 00000000..158cb2eb
--- /dev/null
+++ b/src/utils/tcoder.h
@@ -0,0 +1,84 @@
+// Copyright 2011 Google Inc.
+//
+// This code is licensed under the same terms as WebM:
+//  Software License Agreement:  http://www.webmproject.org/license/software/
+//  Additional IP Rights Grant:  http://www.webmproject.org/license/additional/
+// -----------------------------------------------------------------------------
+//
+// Tree-coder using VP8's boolean coder
+//
+// Symbols are stored as nodes of a tree that records their frequencies and
+// is dynamically updated.
+//
+// Author: Skal (pascal.massimino@gmail.com)
+//
+// Encoding example:
+/*
+static int Compress(const uint8_t* src, int src_length,
+                    uint8_t** output, size_t* output_size) {
+  int i;
+  TCoder* coder = TCoderNew(255);
+  VP8BitWriter bw;
+
+  VP8BitWriterInit(&bw, 0);
+  for (i = 0; i < src_length; ++i)
+    TCoderEncode(coder, src[i], &bw);
+  TCoderDelete(coder);
+  VP8BitWriterFinish(&bw);
+
+  *output = VP8BitWriterBuf(&bw);
+  *output_size = VP8BitWriterSize(&bw);
+  return !bw.error_;
+}
+*/
+//
+// Decoding example:
+/*
+static int Decompress(const uint8_t* src, size_t src_size,
+                      uint8_t* dst, int dst_length) {
+  int i;
+  TCoder* coder = TCoderNew(255);
+  VP8BitReader br;
+
+  VP8InitBitReader(&br, src, src + src_size);
+  for (i = 0; i < dst_length; ++i)
+    dst[i] = TCoderDecode(coder, &br);
+  TCoderDelete(coder);
+  return !br.eof_;
+}
+*/
+
+#ifndef WEBP_UTILS_TCODER_H_
+#define WEBP_UTILS_TCODER_H_
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+struct VP8BitReader;
+struct VP8BitWriter;
+typedef struct TCoder TCoder;
+
+// Creates a tree-coder capable of coding symbols in
+// the [0, max_symbol] range. Returns NULL in case of memory error.
+TCoder* TCoderNew(int max_symbol);
+// Re-initialize an existing object, make it ready for a new encoding or
+// decoding cycle.
+void TCoderInit(TCoder* const c);
+// destroys the tree-ocder object and frees memory.
+void TCoderDelete(TCoder* const c);
+
+// Code next symbol 's'. If the bit-writer 'bw' is NULL, the function will
+// just record the symbol, and update the internal frequency counters.
+void TCoderEncode(TCoder* const c, int s, struct VP8BitWriter* const bw);
+// Decode and return next symbol.
+int TCoderDecode(TCoder* const c, struct VP8BitReader* const br);
+
+// Theoretical number of bits needed to code 'symbol' in the current state.
+double TCoderSymbolCost(const TCoder* const c, int symbol);
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}    // extern "C"
+#endif
+
+#endif  // WEBP_UTILS_TCODER_H_
diff --git a/src/utils/tcoderi.h b/src/utils/tcoderi.h
new file mode 100644
index 00000000..40f25855
--- /dev/null
+++ b/src/utils/tcoderi.h
@@ -0,0 +1,71 @@
+// Copyright 2011 Google Inc.
+//
+// This code is licensed under the same terms as WebM:
+//  Software License Agreement:  http://www.webmproject.org/license/software/
+//  Additional IP Rights Grant:  http://www.webmproject.org/license/additional/
+// -----------------------------------------------------------------------------
+//
+// Internal header for tree-coder
+//
+// Author: Skal (pascal.massimino@gmail.com)
+//
+
+#ifndef WEBP_UTILS_TCODERI_H_
+#define WEBP_UTILS_TCODERI_H_
+
+#include "./tcoder.h"
+
+#include <assert.h>
+#include <math.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "../utils/bit_reader.h"
+#include "../utils/bit_writer.h"
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+typedef int Symbol_t;
+typedef uint32_t Count_t;  // TODO(skal): check overflow during coding.
+
+#define INVALID_SYMBOL ((Symbol_t)(-1))
+#define INVALID_POS    0
+
+#define MAX_PROBA 255
+#define HALF_PROBA 128
+
+typedef struct {        // ternary node.
+  Symbol_t symbol_;
+  // Note: theoretically, one of this three field is redundant and could be
+  // omitted, but it'd make the code quite complicated (having to look-up the
+  // parent's total count in order to deduce the missing field). Better not.
+  Count_t countS_;    // count for symbol
+  Count_t count_;     // count for non-symbol (derived from sub-tree)
+  int probaL_;        // cached left proba = TotalCount(left) / count_
+  int probaS_;        // cached approximate proba = countS_ / TotalCount
+} Node;
+
+struct TCoder {
+  // dynamic fields:
+  int num_symbols_;       // number of symbols actually used
+  Count_t total_coded_;   // total number of coded symbols
+  int frozen_;            // if true, frequencies are not updated
+  int fixed_symbols_;     // if true, symbols are not updated
+  int probaN_;            // cached new-symbol probability
+
+  // constants:
+  int num_nodes_;            // max number of symbols or nodes. Constant, > 0.
+  double symbol_bit_cost_;   // latest evaluation of the bit-cost per new symbol
+
+  Node* nodes_;              // nodes (1-based indexed)
+  int* symbols_;             // for each symbol, location of its node
+};
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}    // extern "C"
+#endif
+
+#endif  // WEBP_UTILS_TCODERI_H_