mirror of
https://github.com/webmproject/libwebp.git
synced 2025-02-13 07:22:52 +01:00
enc->Iterator memory cleanup
* move yuv_in_/out_* scratch buffers to iterator * add y_top_/uv_top_ shortcuts in iterator That's ~3k of stack size instead of heap. But it allows having several iterators work in parallel. Change-Id: I6a437c0f2ef1e5d398c1d6a2fd4974fa0869f0c1
This commit is contained in:
parent
e81fac86dd
commit
733a7faae4
@ -47,6 +47,8 @@ void VP8IteratorSetRow(VP8EncIterator* const it, int y) {
|
|||||||
it->preds_ = enc->preds_ + y * 4 * enc->preds_w_;
|
it->preds_ = enc->preds_ + y * 4 * enc->preds_w_;
|
||||||
it->nz_ = enc->nz_;
|
it->nz_ = enc->nz_;
|
||||||
it->mb_ = enc->mb_info_ + y * enc->mb_w_;
|
it->mb_ = enc->mb_info_ + y * enc->mb_w_;
|
||||||
|
it->y_top_ = enc->y_top_;
|
||||||
|
it->uv_top_ = enc->uv_top_;
|
||||||
InitLeft(it);
|
InitLeft(it);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -64,11 +66,10 @@ void VP8IteratorInit(VP8Encoder* const enc, VP8EncIterator* const it) {
|
|||||||
it->enc_ = enc;
|
it->enc_ = enc;
|
||||||
it->y_stride_ = enc->pic_->y_stride;
|
it->y_stride_ = enc->pic_->y_stride;
|
||||||
it->uv_stride_ = enc->pic_->uv_stride;
|
it->uv_stride_ = enc->pic_->uv_stride;
|
||||||
// TODO(later): for multithreading, these should be owned by 'it'.
|
it->yuv_in_ = (uint8_t*)DO_ALIGN(it->yuv_mem_);
|
||||||
it->yuv_in_ = enc->yuv_in_;
|
it->yuv_out_ = it->yuv_in_ + YUV_SIZE;
|
||||||
it->yuv_out_ = enc->yuv_out_;
|
it->yuv_out2_ = it->yuv_out_ + YUV_SIZE;
|
||||||
it->yuv_out2_ = enc->yuv_out2_;
|
it->yuv_p_ = it->yuv_out2_ + YUV_SIZE;
|
||||||
it->yuv_p_ = enc->yuv_p_;
|
|
||||||
it->lf_stats_ = enc->lf_stats_;
|
it->lf_stats_ = enc->lf_stats_;
|
||||||
it->percent0_ = enc->percent_;
|
it->percent0_ = enc->percent_;
|
||||||
it->y_left_ = (uint8_t*)DO_ALIGN(it->yuv_left_mem_ + 1);
|
it->y_left_ = (uint8_t*)DO_ALIGN(it->yuv_left_mem_ + 1);
|
||||||
@ -267,19 +268,21 @@ int VP8IteratorNext(VP8EncIterator* const it,
|
|||||||
it->v_left_[i] = usrc[15 + i * BPS];
|
it->v_left_[i] = usrc[15 + i * BPS];
|
||||||
}
|
}
|
||||||
// top-left (before 'top'!)
|
// top-left (before 'top'!)
|
||||||
it->y_left_[-1] = enc->y_top_[x * 16 + 15];
|
it->y_left_[-1] = it->y_top_[15];
|
||||||
it->u_left_[-1] = enc->uv_top_[x * 16 + 0 + 7];
|
it->u_left_[-1] = it->uv_top_[0 + 7];
|
||||||
it->v_left_[-1] = enc->uv_top_[x * 16 + 8 + 7];
|
it->v_left_[-1] = it->uv_top_[8 + 7];
|
||||||
}
|
}
|
||||||
if (y < enc->mb_h_ - 1) { // top
|
if (y < enc->mb_h_ - 1) { // top
|
||||||
memcpy(enc->y_top_ + x * 16, ysrc + 15 * BPS, 16);
|
memcpy(it->y_top_, ysrc + 15 * BPS, 16);
|
||||||
memcpy(enc->uv_top_ + x * 16, usrc + 7 * BPS, 8 + 8);
|
memcpy(it->uv_top_, usrc + 7 * BPS, 8 + 8);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
it->preds_ += 4;
|
it->preds_ += 4;
|
||||||
it->mb_ += 1;
|
it->mb_ += 1;
|
||||||
it->nz_ += 1;
|
it->nz_ += 1;
|
||||||
|
it->y_top_ += 16;
|
||||||
|
it->uv_top_ += 16;
|
||||||
it->x_ += 1;
|
it->x_ += 1;
|
||||||
if (it->x_ == enc->mb_w_) {
|
if (it->x_ == enc->mb_w_) {
|
||||||
VP8IteratorSetRow(it, ++it->y_);
|
VP8IteratorSetRow(it, ++it->y_);
|
||||||
@ -374,12 +377,12 @@ void VP8IteratorStartI4(VP8EncIterator* const it) {
|
|||||||
it->i4_boundary_[i] = it->y_left_[15 - i];
|
it->i4_boundary_[i] = it->y_left_[15 - i];
|
||||||
}
|
}
|
||||||
for (i = 0; i < 16; ++i) { // top
|
for (i = 0; i < 16; ++i) { // top
|
||||||
it->i4_boundary_[17 + i] = enc->y_top_[it->x_ * 16 + i];
|
it->i4_boundary_[17 + i] = it->y_top_[i];
|
||||||
}
|
}
|
||||||
// top-right samples have a special case on the far right of the picture
|
// top-right samples have a special case on the far right of the picture
|
||||||
if (it->x_ < enc->mb_w_ - 1) {
|
if (it->x_ < enc->mb_w_ - 1) {
|
||||||
for (i = 16; i < 16 + 4; ++i) {
|
for (i = 16; i < 16 + 4; ++i) {
|
||||||
it->i4_boundary_[17 + i] = enc->y_top_[it->x_ * 16 + i];
|
it->i4_boundary_[17 + i] = it->y_top_[i];
|
||||||
}
|
}
|
||||||
} else { // else, replicate the last valid pixel four times
|
} else { // else, replicate the last valid pixel four times
|
||||||
for (i = 16; i < 16 + 4; ++i) {
|
for (i = 16; i < 16 + 4; ++i) {
|
||||||
|
@ -74,7 +74,7 @@ typedef enum { // Rate-distortion optimization levels
|
|||||||
// The predicted blocks can be accessed using offsets to yuv_p_ and
|
// The predicted blocks can be accessed using offsets to yuv_p_ and
|
||||||
// the arrays VP8*ModeOffsets[];
|
// the arrays VP8*ModeOffsets[];
|
||||||
// +----+ YUV Samples area. See VP8Scan[] for accessing the blocks.
|
// +----+ YUV Samples area. See VP8Scan[] for accessing the blocks.
|
||||||
// Y_OFF |YYYY| <- original samples (enc->yuv_in_)
|
// Y_OFF |YYYY| <- original samples ('yuv_in_')
|
||||||
// |YYYY|
|
// |YYYY|
|
||||||
// |YYYY|
|
// |YYYY|
|
||||||
// |YYYY|
|
// |YYYY|
|
||||||
@ -272,10 +272,10 @@ typedef struct {
|
|||||||
typedef struct {
|
typedef struct {
|
||||||
int x_, y_; // current macroblock
|
int x_, y_; // current macroblock
|
||||||
int y_stride_, uv_stride_; // respective strides
|
int y_stride_, uv_stride_; // respective strides
|
||||||
uint8_t* yuv_in_; // borrowed from enc_ (for now)
|
uint8_t* yuv_in_; // input samples
|
||||||
uint8_t* yuv_out_; // ''
|
uint8_t* yuv_out_; // output samples
|
||||||
uint8_t* yuv_out2_; // ''
|
uint8_t* yuv_out2_; // secondary buffer swapped with yuv_out_.
|
||||||
uint8_t* yuv_p_; // ''
|
uint8_t* yuv_p_; // scratch buffer for prediction
|
||||||
VP8Encoder* enc_; // back-pointer
|
VP8Encoder* enc_; // back-pointer
|
||||||
VP8MBInfo* mb_; // current macroblock
|
VP8MBInfo* mb_; // current macroblock
|
||||||
VP8BitWriter* bw_; // current bit-writer
|
VP8BitWriter* bw_; // current bit-writer
|
||||||
@ -297,7 +297,13 @@ typedef struct {
|
|||||||
uint8_t* y_left_; // left luma samples (addressable from index -1 to 15).
|
uint8_t* y_left_; // left luma samples (addressable from index -1 to 15).
|
||||||
uint8_t* u_left_; // left u samples (addressable from index -1 to 7)
|
uint8_t* u_left_; // left u samples (addressable from index -1 to 7)
|
||||||
uint8_t* v_left_; // left v samples (addressable from index -1 to 7)
|
uint8_t* v_left_; // left v samples (addressable from index -1 to 7)
|
||||||
uint8_t yuv_left_mem_[17 + 16 + 16 + 8 + ALIGN_CST]; // memory for *_left_
|
|
||||||
|
uint8_t* y_top_; // top luma samples at position 'x_'
|
||||||
|
uint8_t* uv_top_; // top u/v samples at position 'x_', packed as 16 bytes
|
||||||
|
|
||||||
|
// memory for storing y/u/v_left_ and yuv_in_/out_*
|
||||||
|
uint8_t yuv_left_mem_[17 + 16 + 16 + 8 + ALIGN_CST]; // memory for *_left_
|
||||||
|
uint8_t yuv_mem_[3 * YUV_SIZE + PRED_SIZE + ALIGN_CST]; // memory for yuv_*
|
||||||
} VP8EncIterator;
|
} VP8EncIterator;
|
||||||
|
|
||||||
// in iterator.c
|
// in iterator.c
|
||||||
@ -441,10 +447,6 @@ struct VP8Encoder {
|
|||||||
VP8MBInfo* mb_info_; // contextual macroblock infos (mb_w_ + 1)
|
VP8MBInfo* mb_info_; // contextual macroblock infos (mb_w_ + 1)
|
||||||
uint8_t* preds_; // predictions modes: (4*mb_w+1) * (4*mb_h+1)
|
uint8_t* preds_; // predictions modes: (4*mb_w+1) * (4*mb_h+1)
|
||||||
uint32_t* nz_; // non-zero bit context: mb_w+1
|
uint32_t* nz_; // non-zero bit context: mb_w+1
|
||||||
uint8_t* yuv_in_; // input samples
|
|
||||||
uint8_t* yuv_out_; // output samples
|
|
||||||
uint8_t* yuv_out2_; // secondary scratch out-buffer. swapped with yuv_out_.
|
|
||||||
uint8_t* yuv_p_; // scratch buffer for prediction
|
|
||||||
uint8_t *y_top_; // top luma samples.
|
uint8_t *y_top_; // top luma samples.
|
||||||
uint8_t *uv_top_; // top u/v samples.
|
uint8_t *uv_top_; // top u/v samples.
|
||||||
// U and V are packed into 16 bytes (8 U + 8 V)
|
// U and V are packed into 16 bytes (8 U + 8 V)
|
||||||
|
@ -176,7 +176,6 @@ static VP8Encoder* InitVP8Encoder(const WebPConfig* const config,
|
|||||||
const size_t preds_size = preds_w * preds_h * sizeof(uint8_t);
|
const size_t preds_size = preds_w * preds_h * sizeof(uint8_t);
|
||||||
const int top_stride = mb_w * 16;
|
const int top_stride = mb_w * 16;
|
||||||
const size_t nz_size = (mb_w + 1) * sizeof(uint32_t) + ALIGN_CST;
|
const size_t nz_size = (mb_w + 1) * sizeof(uint32_t) + ALIGN_CST;
|
||||||
const size_t cache_size = (3 * YUV_SIZE + PRED_SIZE) * sizeof(uint8_t);
|
|
||||||
const size_t info_size = mb_w * mb_h * sizeof(VP8MBInfo);
|
const size_t info_size = mb_w * mb_h * sizeof(VP8MBInfo);
|
||||||
const size_t samples_size = 2 * top_stride * sizeof(uint8_t) // top-luma/u/v
|
const size_t samples_size = 2 * top_stride * sizeof(uint8_t) // top-luma/u/v
|
||||||
+ ALIGN_CST; // align all
|
+ ALIGN_CST; // align all
|
||||||
@ -186,7 +185,6 @@ static VP8Encoder* InitVP8Encoder(const WebPConfig* const config,
|
|||||||
uint8_t* mem;
|
uint8_t* mem;
|
||||||
const uint64_t size = (uint64_t)sizeof(VP8Encoder) // main struct
|
const uint64_t size = (uint64_t)sizeof(VP8Encoder) // main struct
|
||||||
+ ALIGN_CST // cache alignment
|
+ ALIGN_CST // cache alignment
|
||||||
+ cache_size // working caches
|
|
||||||
+ info_size // modes info
|
+ info_size // modes info
|
||||||
+ preds_size // prediction modes
|
+ preds_size // prediction modes
|
||||||
+ samples_size // top/left samples
|
+ samples_size // top/left samples
|
||||||
@ -197,14 +195,13 @@ static VP8Encoder* InitVP8Encoder(const WebPConfig* const config,
|
|||||||
printf("===================================\n");
|
printf("===================================\n");
|
||||||
printf("Memory used:\n"
|
printf("Memory used:\n"
|
||||||
" encoder: %ld\n"
|
" encoder: %ld\n"
|
||||||
" block cache: %ld\n"
|
|
||||||
" info: %ld\n"
|
" info: %ld\n"
|
||||||
" preds: %ld\n"
|
" preds: %ld\n"
|
||||||
" top samples: %ld\n"
|
" top samples: %ld\n"
|
||||||
" non-zero: %ld\n"
|
" non-zero: %ld\n"
|
||||||
" lf-stats: %ld\n"
|
" lf-stats: %ld\n"
|
||||||
" total: %ld\n",
|
" total: %ld\n",
|
||||||
sizeof(VP8Encoder) + ALIGN_CST, cache_size, info_size,
|
sizeof(VP8Encoder) + ALIGN_CST, info_size,
|
||||||
preds_size, samples_size, nz_size, lf_stats_size, size);
|
preds_size, samples_size, nz_size, lf_stats_size, size);
|
||||||
printf("Transient object sizes:\n"
|
printf("Transient object sizes:\n"
|
||||||
" VP8EncIterator: %ld\n"
|
" VP8EncIterator: %ld\n"
|
||||||
@ -231,14 +228,6 @@ static VP8Encoder* InitVP8Encoder(const WebPConfig* const config,
|
|||||||
enc->mb_w_ = mb_w;
|
enc->mb_w_ = mb_w;
|
||||||
enc->mb_h_ = mb_h;
|
enc->mb_h_ = mb_h;
|
||||||
enc->preds_w_ = preds_w;
|
enc->preds_w_ = preds_w;
|
||||||
enc->yuv_in_ = (uint8_t*)mem;
|
|
||||||
mem += YUV_SIZE;
|
|
||||||
enc->yuv_out_ = (uint8_t*)mem;
|
|
||||||
mem += YUV_SIZE;
|
|
||||||
enc->yuv_out2_ = (uint8_t*)mem;
|
|
||||||
mem += YUV_SIZE;
|
|
||||||
enc->yuv_p_ = (uint8_t*)mem;
|
|
||||||
mem += PRED_SIZE;
|
|
||||||
enc->mb_info_ = (VP8MBInfo*)mem;
|
enc->mb_info_ = (VP8MBInfo*)mem;
|
||||||
mem += info_size;
|
mem += info_size;
|
||||||
enc->preds_ = ((uint8_t*)mem) + 1 + enc->preds_w_;
|
enc->preds_ = ((uint8_t*)mem) + 1 + enc->preds_w_;
|
||||||
|
Loading…
x
Reference in New Issue
Block a user