Further reduce memory to decode lossy+alpha images

Earlier such images were using roughly 9 * width * height bytes for
decoding. Now, they take 6 * width * height memory.

Change-Id: Ie4a681ca5074d96d64f30b2597fafdca648dd8f7
This commit is contained in:
Urvang Joshi 2013-05-13 16:24:49 -07:00
parent 5199eab516
commit 64c844863a
4 changed files with 253 additions and 166 deletions

View File

@ -625,10 +625,24 @@ static void ApplyInverseTransforms(VP8LDecoder* const dec, int num_rows,
} }
} }
// Special method for paletted alpha data.
static void ApplyInverseTransformsAlpha(VP8LDecoder* const dec, int num_rows,
const uint8_t* const rows) {
const int start_row = dec->last_row_;
const int end_row = start_row + num_rows;
const uint8_t* rows_in = rows;
uint8_t* rows_out = (uint8_t*)dec->io_->opaque + dec->io_->width * start_row;
VP8LTransform* const transform = &dec->transforms_[0];
assert(dec->next_transform_ == 1);
assert(transform->type_ == COLOR_INDEXING_TRANSFORM);
VP8LColorIndexInverseTransformAlpha(transform, start_row, end_row, rows_in,
rows_out);
}
// Processes (transforms, scales & color-converts) the rows decoded after the // Processes (transforms, scales & color-converts) the rows decoded after the
// last call. // last call.
static void ProcessRows(VP8LDecoder* const dec, int row) { static void ProcessRows(VP8LDecoder* const dec, int row) {
const uint32_t* const rows = dec->argb_ + dec->width_ * dec->last_row_; const uint32_t* const rows = dec->pixels_ + dec->width_ * dec->last_row_;
const int num_rows = row - dec->last_row_; const int num_rows = row - dec->last_row_;
if (num_rows <= 0) return; // Nothing to be done. if (num_rows <= 0) return; // Nothing to be done.
@ -667,121 +681,135 @@ static void ProcessRows(VP8LDecoder* const dec, int row) {
assert(dec->last_row_ <= dec->height_); assert(dec->last_row_ <= dec->height_);
} }
static int DecodeImageData(VP8LDecoder* const dec, #define DECODE_DATA_FUNC(FUNC_NAME, TYPE, STORE_PIXEL) \
uint32_t* const data, int width, int height, static int FUNC_NAME(VP8LDecoder* const dec, TYPE* const data, int width, \
ProcessRowsFunc process_func) { int height, ProcessRowsFunc process_func) { \
int ok = 1; int ok = 1; \
int col = 0, row = 0; int col = 0, row = 0; \
VP8LBitReader* const br = &dec->br_; VP8LBitReader* const br = &dec->br_; \
VP8LMetadata* const hdr = &dec->hdr_; VP8LMetadata* const hdr = &dec->hdr_; \
HTreeGroup* htree_group = hdr->htree_groups_; HTreeGroup* htree_group = hdr->htree_groups_; \
uint32_t* src = data; TYPE* src = data; \
uint32_t* last_cached = data; TYPE* last_cached = data; \
uint32_t* const src_end = data + width * height; TYPE* const src_end = data + width * height; \
const int len_code_limit = NUM_LITERAL_CODES + NUM_LENGTH_CODES; const int len_code_limit = NUM_LITERAL_CODES + NUM_LENGTH_CODES; \
const int color_cache_limit = len_code_limit + hdr->color_cache_size_; const int color_cache_limit = len_code_limit + hdr->color_cache_size_; \
VP8LColorCache* const color_cache = VP8LColorCache* const color_cache = \
(hdr->color_cache_size_ > 0) ? &hdr->color_cache_ : NULL; (hdr->color_cache_size_ > 0) ? &hdr->color_cache_ : NULL; \
const int mask = hdr->huffman_mask_; const int mask = hdr->huffman_mask_; \
assert(htree_group != NULL); \
assert(htree_group != NULL); while (!br->eos_ && src < src_end) { \
int code; \
while (!br->eos_ && src < src_end) { /* Only update when changing tile. Note we could use this test: */ \
int code; /* if "((((prev_col ^ col) | prev_row ^ row)) > mask)" -> tile changed */ \
// Only update when changing tile. Note we could use the following test: /* but that's actually slower and needs storing the previous col/row. */ \
// if "((((prev_col ^ col) | prev_row ^ row)) > mask)" -> tile changed if ((col & mask) == 0) { \
// but that's actually slower and requires storing the previous col/row htree_group = GetHtreeGroupForPos(hdr, col, row); \
if ((col & mask) == 0) { } \
htree_group = GetHtreeGroupForPos(hdr, col, row); VP8LFillBitWindow(br); \
} code = ReadSymbol(&htree_group->htrees_[GREEN], br); \
VP8LFillBitWindow(br); if (code < NUM_LITERAL_CODES) { /* Literal*/ \
code = ReadSymbol(&htree_group->htrees_[GREEN], br); int red, green, blue, alpha; \
if (code < NUM_LITERAL_CODES) { // Literal. red = ReadSymbol(&htree_group->htrees_[RED], br); \
int red, green, blue, alpha; green = code; \
red = ReadSymbol(&htree_group->htrees_[RED], br); VP8LFillBitWindow(br); \
green = code; blue = ReadSymbol(&htree_group->htrees_[BLUE], br); \
VP8LFillBitWindow(br); alpha = ReadSymbol(&htree_group->htrees_[ALPHA], br); \
blue = ReadSymbol(&htree_group->htrees_[BLUE], br); *src = STORE_PIXEL(alpha, red, green, blue); \
alpha = ReadSymbol(&htree_group->htrees_[ALPHA], br); AdvanceByOne: \
*src = (alpha << 24) + (red << 16) + (green << 8) + blue; ++src; \
AdvanceByOne: ++col; \
++src; if (col >= width) { \
++col; col = 0; \
if (col >= width) { ++row; \
col = 0; if ((process_func != NULL) && (row % NUM_ARGB_CACHE_ROWS == 0)) { \
++row; process_func(dec, row); \
if ((process_func != NULL) && (row % NUM_ARGB_CACHE_ROWS == 0)) { } \
process_func(dec, row); if (color_cache != NULL) { \
} while (last_cached < src) { \
if (color_cache != NULL) { VP8LColorCacheInsert(color_cache, *last_cached++); \
while (last_cached < src) { } \
VP8LColorCacheInsert(color_cache, *last_cached++); } \
} } \
} } else if (code < len_code_limit) { /* Backward reference */ \
} int dist_code, dist; \
} else if (code < len_code_limit) { // Backward reference const int length_sym = code - NUM_LITERAL_CODES; \
int dist_code, dist; const int length = GetCopyLength(length_sym, br); \
const int length_sym = code - NUM_LITERAL_CODES; const int dist_symbol = ReadSymbol(&htree_group->htrees_[DIST], br); \
const int length = GetCopyLength(length_sym, br); VP8LFillBitWindow(br); \
const int dist_symbol = ReadSymbol(&htree_group->htrees_[DIST], br); dist_code = GetCopyDistance(dist_symbol, br); \
VP8LFillBitWindow(br); dist = PlaneCodeToDistance(width, dist_code); \
dist_code = GetCopyDistance(dist_symbol, br); if (src - data < dist || src_end - src < length) { \
dist = PlaneCodeToDistance(width, dist_code); ok = 0; \
if (src - data < dist || src_end - src < length) { goto End; \
ok = 0; } \
goto End; { \
} int i; \
{ for (i = 0; i < length; ++i) src[i] = src[i - dist]; \
int i; src += length; \
for (i = 0; i < length; ++i) src[i] = src[i - dist]; } \
src += length; col += length; \
} while (col >= width) { \
col += length; col -= width; \
while (col >= width) { ++row; \
col -= width; if ((process_func != NULL) && (row % NUM_ARGB_CACHE_ROWS == 0)) { \
++row; process_func(dec, row); \
if ((process_func != NULL) && (row % NUM_ARGB_CACHE_ROWS == 0)) { } \
process_func(dec, row); } \
} if (src < src_end) { \
} htree_group = GetHtreeGroupForPos(hdr, col, row); \
if (src < src_end) { if (color_cache != NULL) { \
htree_group = GetHtreeGroupForPos(hdr, col, row); while (last_cached < src) { \
if (color_cache != NULL) { VP8LColorCacheInsert(color_cache, *last_cached++); \
while (last_cached < src) { } \
VP8LColorCacheInsert(color_cache, *last_cached++); } \
} } \
} } else if (code < color_cache_limit) { /* Color cache */ \
} const int key = code - len_code_limit; \
} else if (code < color_cache_limit) { // Color cache. assert(color_cache != NULL); \
const int key = code - len_code_limit; while (last_cached < src) { \
assert(color_cache != NULL); VP8LColorCacheInsert(color_cache, *last_cached++); \
while (last_cached < src) { } \
VP8LColorCacheInsert(color_cache, *last_cached++); *src = VP8LColorCacheLookup(color_cache, key); \
} goto AdvanceByOne; \
*src = VP8LColorCacheLookup(color_cache, key); } else { /* Not reached */ \
goto AdvanceByOne; ok = 0; \
} else { // Not reached. goto End; \
ok = 0; } \
goto End; ok = !br->error_; \
} if (!ok) goto End; \
ok = !br->error_; } \
if (!ok) goto End; /* Process the remaining rows corresponding to last row-block. */ \
} if (process_func != NULL) process_func(dec, row); \
// Process the remaining rows corresponding to last row-block. End: \
if (process_func != NULL) process_func(dec, row); if (br->error_ || !ok || (br->eos_ && src < src_end)) { \
ok = 0; \
End: dec->status_ = \
if (br->error_ || !ok || (br->eos_ && src < src_end)) { (!br->eos_) ? VP8_STATUS_BITSTREAM_ERROR : VP8_STATUS_SUSPENDED; \
ok = 0; } else if (src == src_end) { \
dec->status_ = (!br->eos_) ? dec->state_ = READ_DATA; \
VP8_STATUS_BITSTREAM_ERROR : VP8_STATUS_SUSPENDED; } \
} else if (src == src_end) { return ok; \
dec->state_ = READ_DATA;
}
return ok;
} }
static WEBP_INLINE uint32_t GetARGBPixel(int alpha, int red, int green,
int blue) {
return (alpha << 24) | (red << 16) | (green << 8) | blue;
}
static WEBP_INLINE uint8_t GetAlphaPixel(int alpha, int red, int green,
int blue) {
(void)alpha;
(void)red;
(void)blue;
return green; // Alpha value is stored in green channel.
}
DECODE_DATA_FUNC(DecodeImageData, uint32_t, GetARGBPixel)
DECODE_DATA_FUNC(DecodeAlphaData, uint8_t, GetAlphaPixel)
#undef DECODE_DATA_FUNC
// ----------------------------------------------------------------------------- // -----------------------------------------------------------------------------
// VP8LTransform // VP8LTransform
@ -903,8 +931,8 @@ void VP8LClear(VP8LDecoder* const dec) {
if (dec == NULL) return; if (dec == NULL) return;
ClearMetadata(&dec->hdr_); ClearMetadata(&dec->hdr_);
free(dec->argb_); free(dec->pixels_);
dec->argb_ = NULL; dec->pixels_ = NULL;
for (i = 0; i < dec->next_transform_; ++i) { for (i = 0; i < dec->next_transform_; ++i) {
ClearTransform(&dec->transforms_[i]); ClearTransform(&dec->transforms_[i]);
} }
@ -1028,35 +1056,38 @@ static int DecodeImageStream(int xsize, int ysize,
} }
//------------------------------------------------------------------------------ //------------------------------------------------------------------------------
// Allocate dec->argb_ and dec->argb_cache_ using dec->width_ and dec->height_ // Allocate internal buffers dec->pixels_ and dec->argb_cache_.
static int AllocateInternalBuffers(VP8LDecoder* const dec, int final_width,
static int AllocateARGBBuffers(VP8LDecoder* const dec, int final_width) { size_t bytes_per_pixel) {
const int argb_cache_needed = (bytes_per_pixel == sizeof(uint32_t));
const uint64_t num_pixels = (uint64_t)dec->width_ * dec->height_; const uint64_t num_pixels = (uint64_t)dec->width_ * dec->height_;
// Scratch buffer corresponding to top-prediction row for transforming the // Scratch buffer corresponding to top-prediction row for transforming the
// first row in the row-blocks. // first row in the row-blocks. Not needed for paletted alpha.
const uint64_t cache_top_pixels = final_width; const uint64_t cache_top_pixels = argb_cache_needed ? final_width : 0ULL;
// Scratch buffer for temporary BGRA storage. // Scratch buffer for temporary BGRA storage. Not needed for paletted alpha.
const uint64_t cache_pixels = (uint64_t)final_width * NUM_ARGB_CACHE_ROWS; const uint64_t cache_pixels =
argb_cache_needed ? (uint64_t)final_width * NUM_ARGB_CACHE_ROWS : 0ULL;
const uint64_t total_num_pixels = const uint64_t total_num_pixels =
num_pixels + cache_top_pixels + cache_pixels; num_pixels + cache_top_pixels + cache_pixels;
assert(dec->width_ <= final_width); assert(dec->width_ <= final_width);
dec->argb_ = (uint32_t*)WebPSafeMalloc(total_num_pixels, sizeof(*dec->argb_)); dec->pixels_ = (uint32_t*)WebPSafeMalloc(total_num_pixels, bytes_per_pixel);
if (dec->argb_ == NULL) { if (dec->pixels_ == NULL) {
dec->argb_cache_ = NULL; // for sanity check dec->argb_cache_ = NULL; // for sanity check
dec->status_ = VP8_STATUS_OUT_OF_MEMORY; dec->status_ = VP8_STATUS_OUT_OF_MEMORY;
return 0; return 0;
} }
dec->argb_cache_ = dec->argb_ + num_pixels + cache_top_pixels; dec->argb_cache_ =
argb_cache_needed ? dec->pixels_ + num_pixels + cache_top_pixels : NULL;
return 1; return 1;
} }
//------------------------------------------------------------------------------ //------------------------------------------------------------------------------
// Special row-processing that only stores the alpha data.
// Special row-processing that only stores the alpha data.
static void ExtractAlphaRows(VP8LDecoder* const dec, int row) { static void ExtractAlphaRows(VP8LDecoder* const dec, int row) {
const int num_rows = row - dec->last_row_; const int num_rows = row - dec->last_row_;
const uint32_t* const in = dec->argb_ + dec->width_ * dec->last_row_; const uint32_t* const in = dec->pixels_ + dec->width_ * dec->last_row_;
if (num_rows <= 0) return; // Nothing to be done. if (num_rows <= 0) return; // Nothing to be done.
ApplyInverseTransforms(dec, num_rows, in); ApplyInverseTransforms(dec, num_rows, in);
@ -1070,7 +1101,17 @@ static void ExtractAlphaRows(VP8LDecoder* const dec, int row) {
int i; int i;
for (i = 0; i < cache_pixs; ++i) dst[i] = (src[i] >> 8) & 0xff; for (i = 0; i < cache_pixs; ++i) dst[i] = (src[i] >> 8) & 0xff;
} }
dec->last_row_ = dec->last_out_row_ = row;
}
// Row-processing for the special case when alpha data contains only one
// transform: color indexing.
static void ExtractPalettedAlphaRows(VP8LDecoder* const dec, int row) {
const int num_rows = row - dec->last_row_;
const uint8_t* const in =
(uint8_t*)dec->pixels_ + dec->width_ * dec->last_row_;
if (num_rows <= 0) return; // Nothing to be done.
ApplyInverseTransformsAlpha(dec, num_rows, in);
dec->last_row_ = dec->last_out_row_ = row; dec->last_row_ = dec->last_out_row_ = row;
} }
@ -1079,6 +1120,7 @@ int VP8LDecodeAlphaImageStream(int width, int height, const uint8_t* const data,
VP8Io io; VP8Io io;
int ok = 0; int ok = 0;
VP8LDecoder* const dec = VP8LNew(); VP8LDecoder* const dec = VP8LNew();
size_t bytes_per_pixel = sizeof(uint32_t); // Default: BGRA mode.
if (dec == NULL) return 0; if (dec == NULL) return 0;
dec->width_ = width; dec->width_ = width;
@ -1097,13 +1139,24 @@ int VP8LDecodeAlphaImageStream(int width, int height, const uint8_t* const data,
dec->action_ = READ_HDR; dec->action_ = READ_HDR;
if (!DecodeImageStream(width, height, 1, dec, NULL)) goto Err; if (!DecodeImageStream(width, height, 1, dec, NULL)) goto Err;
// Allocate output (note that dec->width_ may have changed here). // Special case: if alpha data contains only the color indexing transform
if (!AllocateARGBBuffers(dec, width)) goto Err; // (a frequent case), we will use DecodeAlphaData() method that only needs
// allocation of 1 byte per pixel (alpha channel).
if (dec->next_transform_ == 1 &&
dec->transforms_[0].type_ == COLOR_INDEXING_TRANSFORM) {
bytes_per_pixel = sizeof(uint8_t);
}
// Allocate internal buffers (note that dec->width_ may have changed here).
if (!AllocateInternalBuffers(dec, width, bytes_per_pixel)) goto Err;
// Decode (with special row processing). // Decode (with special row processing).
dec->action_ = READ_DATA; dec->action_ = READ_DATA;
ok = DecodeImageData(dec, dec->argb_, dec->width_, dec->height_, ok = (bytes_per_pixel == sizeof(uint8_t)) ?
ExtractAlphaRows); DecodeAlphaData(dec, (uint8_t*)dec->pixels_, dec->width_, dec->height_,
ExtractPalettedAlphaRows) :
DecodeImageData(dec, dec->pixels_, dec->width_, dec->height_,
ExtractAlphaRows);
Err: Err:
VP8LDelete(dec); VP8LDelete(dec);
@ -1143,6 +1196,7 @@ int VP8LDecodeHeader(VP8LDecoder* const dec, VP8Io* const io) {
} }
int VP8LDecodeImage(VP8LDecoder* const dec) { int VP8LDecodeImage(VP8LDecoder* const dec) {
const size_t bytes_per_pixel = sizeof(uint32_t);
VP8Io* io = NULL; VP8Io* io = NULL;
WebPDecParams* params = NULL; WebPDecParams* params = NULL;
@ -1162,13 +1216,13 @@ int VP8LDecodeImage(VP8LDecoder* const dec) {
goto Err; goto Err;
} }
if (!AllocateARGBBuffers(dec, io->width)) goto Err; if (!AllocateInternalBuffers(dec, io->width, bytes_per_pixel)) goto Err;
if (io->use_scaling && !AllocateAndInitRescaler(dec, io)) goto Err; if (io->use_scaling && !AllocateAndInitRescaler(dec, io)) goto Err;
// Decode. // Decode.
dec->action_ = READ_DATA; dec->action_ = READ_DATA;
if (!DecodeImageData(dec, dec->argb_, dec->width_, dec->height_, if (!DecodeImageData(dec, dec->pixels_, dec->width_, dec->height_,
ProcessRows)) { ProcessRows)) {
goto Err; goto Err;
} }

View File

@ -63,7 +63,8 @@ typedef struct {
const WebPDecBuffer *output_; // shortcut to io->opaque->output const WebPDecBuffer *output_; // shortcut to io->opaque->output
uint32_t *argb_; // Internal data: always in BGRA color mode. uint32_t *pixels_; // Internal data: either uint8_t* for alpha
// or uint32_t* for BGRA.
uint32_t *argb_cache_; // Scratch buffer for temporary BGRA storage. uint32_t *argb_cache_; // Scratch buffer for temporary BGRA storage.
VP8LBitReader br_; VP8LBitReader br_;

View File

@ -1093,39 +1093,64 @@ static void ColorSpaceInverseTransform(const VP8LTransform* const transform,
} }
// Separate out pixels packed together using pixel-bundling. // Separate out pixels packed together using pixel-bundling.
static void ColorIndexInverseTransform( // We define two methods for ARGB data (uint32_t) and alpha-only data (uint8_t).
const VP8LTransform* const transform, #define COLOR_INDEX_INVERSE(FUNC_NAME, TYPE, GET_INDEX, GET_VALUE) \
int y_start, int y_end, const uint32_t* src, uint32_t* dst) { void FUNC_NAME(const VP8LTransform* const transform, \
int y; int y_start, int y_end, const TYPE* src, TYPE* dst) { \
const int bits_per_pixel = 8 >> transform->bits_; int y; \
const int width = transform->xsize_; const int bits_per_pixel = 8 >> transform->bits_; \
const uint32_t* const color_map = transform->data_; const int width = transform->xsize_; \
if (bits_per_pixel < 8) { const uint32_t* const color_map = transform->data_; \
const int pixels_per_byte = 1 << transform->bits_; if (bits_per_pixel < 8) { \
const int count_mask = pixels_per_byte - 1; const int pixels_per_byte = 1 << transform->bits_; \
const uint32_t bit_mask = (1 << bits_per_pixel) - 1; const int count_mask = pixels_per_byte - 1; \
for (y = y_start; y < y_end; ++y) { const uint32_t bit_mask = (1 << bits_per_pixel) - 1; \
uint32_t packed_pixels = 0; for (y = y_start; y < y_end; ++y) { \
int x; uint32_t packed_pixels = 0; \
for (x = 0; x < width; ++x) { int x; \
// We need to load fresh 'packed_pixels' once every 'pixels_per_byte' for (x = 0; x < width; ++x) { \
// increments of x. Fortunately, pixels_per_byte is a power of 2, so /* We need to load fresh 'packed_pixels' once every */ \
// can just use a mask for that, instead of decrementing a counter. /* 'pixels_per_byte' increments of x. Fortunately, pixels_per_byte */ \
if ((x & count_mask) == 0) packed_pixels = ((*src++) >> 8) & 0xff; /* is a power of 2, so can just use a mask for that, instead of */ \
*dst++ = color_map[packed_pixels & bit_mask]; /* decrementing a counter. */ \
packed_pixels >>= bits_per_pixel; if ((x & count_mask) == 0) packed_pixels = GET_INDEX(*src++); \
} *dst++ = GET_VALUE(color_map[packed_pixels & bit_mask]); \
} packed_pixels >>= bits_per_pixel; \
} else { } \
for (y = y_start; y < y_end; ++y) { } \
int x; } else { \
for (x = 0; x < width; ++x) { for (y = y_start; y < y_end; ++y) { \
*dst++ = color_map[((*src++) >> 8) & 0xff]; int x; \
} for (x = 0; x < width; ++x) { \
} *dst++ = GET_VALUE(color_map[GET_INDEX(*src++)]); \
} } \
} \
} \
} }
static WEBP_INLINE uint32_t GetARGBIndex(uint32_t index) {
return (index >> 8) & 0xff;
}
static WEBP_INLINE uint8_t GetAlphaIndex(uint8_t index) {
return index;
}
static WEBP_INLINE uint32_t GetARGBValue(uint32_t val) {
return val;
}
static WEBP_INLINE uint8_t GetAlphaValue(uint32_t val) {
return (val >> 8) & 0xff;
}
static COLOR_INDEX_INVERSE(ColorIndexInverseTransform, uint32_t, GetARGBIndex,
GetARGBValue)
COLOR_INDEX_INVERSE(VP8LColorIndexInverseTransformAlpha, uint8_t, GetAlphaIndex,
GetAlphaValue)
#undef COLOR_INDEX_INVERSE
void VP8LInverseTransform(const VP8LTransform* const transform, void VP8LInverseTransform(const VP8LTransform* const transform,
int row_start, int row_end, int row_start, int row_end,
const uint32_t* const in, uint32_t* const out) { const uint32_t* const in, uint32_t* const out) {

View File

@ -33,6 +33,13 @@ void VP8LInverseTransform(const struct VP8LTransform* const transform,
int row_start, int row_end, int row_start, int row_end,
const uint32_t* const in, uint32_t* const out); const uint32_t* const in, uint32_t* const out);
// Similar to the static method ColorIndexInverseTransform() that is part of
// lossless.c, but used only for alpha decoding. It takes uint8_t (rather than
// uint32_t) arguments for 'src' and 'dst'.
void VP8LColorIndexInverseTransformAlpha(
const struct VP8LTransform* const transform, int y_start, int y_end,
const uint8_t* src, uint8_t* dst);
// Subtracts green from blue and red channels. // Subtracts green from blue and red channels.
void VP8LSubtractGreenFromBlueAndRed(uint32_t* argb_data, int num_pixs); void VP8LSubtractGreenFromBlueAndRed(uint32_t* argb_data, int num_pixs);