diff --git a/README b/README index 9a56abe4..5866b446 100644 --- a/README +++ b/README @@ -158,6 +158,7 @@ options: -pass ............ analysis pass number (1..10) -crop .. crop picture with the given rectangle -resize ........ resize picture (after any cropping) + -mt .................... use multi-threading if available -map ............. print map of extra info. -print_psnr ............ prints averaged PSNR distortion. -print_ssim ............ prints averaged SSIM distortion. diff --git a/examples/cwebp.c b/examples/cwebp.c index 3d81e25f..9c4e9a5e 100644 --- a/examples/cwebp.c +++ b/examples/cwebp.c @@ -546,6 +546,7 @@ static void HelpLong(void) { printf(" -pass ............ analysis pass number (1..10)\n"); printf(" -crop .. crop picture with the given rectangle\n"); printf(" -resize ........ resize picture (after any cropping)\n"); + printf(" -mt .................... use multi-threading if available\n"); #ifdef WEBP_EXPERIMENTAL_FEATURES printf(" -444 / -422 / -gray ..... Change colorspace\n"); #endif @@ -724,6 +725,8 @@ int main(int argc, const char *argv[]) { config.autofilter = 1; } else if (!strcmp(argv[c], "-jpeg_like")) { config.emulate_jpeg_size = 1; + } else if (!strcmp(argv[c], "-mt")) { + ++config.thread_level; // increase thread level } else if (!strcmp(argv[c], "-strong")) { config.filter_type = 1; } else if (!strcmp(argv[c], "-nostrong")) { diff --git a/man/cwebp.1 b/man/cwebp.1 index 5d1b3bbb..c52ca258 100644 --- a/man/cwebp.1 +++ b/man/cwebp.1 @@ -1,5 +1,5 @@ .\" Hey, EMACS: -*- nroff -*- -.TH CWEBP 1 "February 15, 2013" +.TH CWEBP 1 "February 28, 2013" .SH NAME cwebp \- compress an image file to a WebP file .SH SYNOPSIS @@ -82,6 +82,10 @@ of JPEG compression. This flag will generally produce an output file of similar size to its JPEG equivalent (for the same \fB\-q\fP setting), but with less visual distortion. .TP +.B \-mt +Use multi-threading for encoding, if possible. This option is only effective +when using lossy compression on a source with a transparency channel. +.TP .B \-af Turns auto-filter on. This algorithm will spend additional time optimizing the filtering strength to reach a well-balanced quality. diff --git a/src/enc/alpha.c b/src/enc/alpha.c index 9decbfcf..f1e32f44 100644 --- a/src/enc/alpha.c +++ b/src/enc/alpha.c @@ -286,42 +286,80 @@ static int EncodeAlpha(VP8Encoder* const enc, //------------------------------------------------------------------------------ // Main calls +static int CompressAlphaJob(VP8Encoder* const enc, void* dummy) { + const WebPConfig* config = enc->config_; + uint8_t* alpha_data = NULL; + size_t alpha_size = 0; + const int effort_level = config->method; // maps to [0..6] + const WEBP_FILTER_TYPE filter = + (config->alpha_filtering == 0) ? WEBP_FILTER_NONE : + (config->alpha_filtering == 1) ? WEBP_FILTER_FAST : + WEBP_FILTER_BEST; + if (!EncodeAlpha(enc, config->alpha_quality, config->alpha_compression, + filter, effort_level, &alpha_data, &alpha_size)) { + return 0; + } + if (alpha_size != (uint32_t)alpha_size) { // Sanity check. + free(alpha_data); + return 0; + } + enc->alpha_data_size_ = (uint32_t)alpha_size; + enc->alpha_data_ = alpha_data; + (void)dummy; + return 1; +} + void VP8EncInitAlpha(VP8Encoder* const enc) { enc->has_alpha_ = WebPPictureHasTransparency(enc->pic_); enc->alpha_data_ = NULL; enc->alpha_data_size_ = 0; + if (enc->thread_level_ > 0) { + WebPWorker* const worker = &enc->alpha_worker_; + WebPWorkerInit(worker); + worker->data1 = enc; + worker->data2 = NULL; + worker->hook = (WebPWorkerHook)CompressAlphaJob; + } +} + +int VP8EncStartAlpha(VP8Encoder* const enc) { + if (enc->has_alpha_) { + if (enc->thread_level_ > 0) { + WebPWorker* const worker = &enc->alpha_worker_; + if (!WebPWorkerReset(worker)) { // Makes sure worker is good to go. + return 0; + } + WebPWorkerLaunch(worker); + return 1; + } else { + return CompressAlphaJob(enc, NULL); // just do the job right away + } + } + return 1; } int VP8EncFinishAlpha(VP8Encoder* const enc) { if (enc->has_alpha_) { - const WebPConfig* config = enc->config_; - uint8_t* tmp_data = NULL; - size_t tmp_size = 0; - const int effort_level = config->method; // maps to [0..6] - const WEBP_FILTER_TYPE filter = - (config->alpha_filtering == 0) ? WEBP_FILTER_NONE : - (config->alpha_filtering == 1) ? WEBP_FILTER_FAST : - WEBP_FILTER_BEST; - - if (!EncodeAlpha(enc, config->alpha_quality, config->alpha_compression, - filter, effort_level, &tmp_data, &tmp_size)) { - return 0; + if (enc->thread_level_ > 0) { + WebPWorker* const worker = &enc->alpha_worker_; + if (!WebPWorkerSync(worker)) return 0; // error } - if (tmp_size != (uint32_t)tmp_size) { // Sanity check. - free(tmp_data); - return 0; - } - enc->alpha_data_size_ = (uint32_t)tmp_size; - enc->alpha_data_ = tmp_data; } return WebPReportProgress(enc->pic_, enc->percent_ + 20, &enc->percent_); } -void VP8EncDeleteAlpha(VP8Encoder* const enc) { +int VP8EncDeleteAlpha(VP8Encoder* const enc) { + int ok = 1; + if (enc->thread_level_ > 0) { + WebPWorker* const worker = &enc->alpha_worker_; + ok = WebPWorkerSync(worker); // finish anything left in flight + WebPWorkerEnd(worker); // still need to end the worker, even if !ok + } free(enc->alpha_data_); enc->alpha_data_ = NULL; enc->alpha_data_size_ = 0; enc->has_alpha_ = 0; + return ok; } #if defined(__cplusplus) || defined(c_plusplus) diff --git a/src/enc/config.c b/src/enc/config.c index 9ef9d327..8261ef98 100644 --- a/src/enc/config.c +++ b/src/enc/config.c @@ -47,6 +47,7 @@ int WebPConfigInitInternal(WebPConfig* config, config->lossless = 0; config->image_hint = WEBP_HINT_DEFAULT; config->emulate_jpeg_size = 0; + config->thread_level = 0; // TODO(skal): tune. switch (preset) { @@ -125,6 +126,8 @@ int WebPValidateConfig(const WebPConfig* config) { return 0; if (config->emulate_jpeg_size < 0 || config->emulate_jpeg_size > 1) return 0; + if (config->thread_level < 0 || config->thread_level > 1) + return 0; return 1; } diff --git a/src/enc/frame.c b/src/enc/frame.c index d7965bb0..a3cb7b72 100644 --- a/src/enc/frame.c +++ b/src/enc/frame.c @@ -625,7 +625,10 @@ static void SetBlock(uint8_t* p, int value, int size) { #endif static void ResetSSE(VP8Encoder* const enc) { - memset(enc->sse_, 0, sizeof(enc->sse_)); + enc->sse_[0] = 0; + enc->sse_[1] = 0; + enc->sse_[2] = 0; + // Note: enc->sse_[3] is managed by alpha.c enc->sse_count_ = 0; } diff --git a/src/enc/vp8enci.h b/src/enc/vp8enci.h index 736602a9..9dc8acd7 100644 --- a/src/enc/vp8enci.h +++ b/src/enc/vp8enci.h @@ -16,6 +16,7 @@ #include "../webp/encode.h" #include "../dsp/dsp.h" #include "../utils/bit_writer.h" +#include "../utils/thread.h" #if defined(__cplusplus) || defined(c_plusplus) extern "C" { @@ -386,6 +387,7 @@ struct VP8Encoder { int has_alpha_; uint8_t* alpha_data_; // non-NULL if transparency is present uint32_t alpha_data_size_; + WebPWorker alpha_worker_; // enhancement layer int use_layer_; @@ -416,6 +418,7 @@ struct VP8Encoder { int method_; // 0=fastest, 6=best/slowest. VP8RDLevel rd_opt_level_; // Deduced from method_. int max_i4_header_bits_; // partition #0 safeness factor + int thread_level_; // derived from config->thread_level // Memory VP8MBInfo* mb_info_; // contextual macroblock infos (mb_w_ + 1) @@ -499,8 +502,9 @@ int VP8Decimate(VP8EncIterator* const it, VP8ModeScore* const rd, // in alpha.c void VP8EncInitAlpha(VP8Encoder* const enc); // initialize alpha compression +int VP8EncStartAlpha(VP8Encoder* const enc); // start alpha coding process int VP8EncFinishAlpha(VP8Encoder* const enc); // finalize compressed data -void VP8EncDeleteAlpha(VP8Encoder* const enc); // delete compressed data +int VP8EncDeleteAlpha(VP8Encoder* const enc); // delete compressed data // in layer.c void VP8EncInitLayer(VP8Encoder* const enc); // init everything diff --git a/src/enc/webpenc.c b/src/enc/webpenc.c index b46678c2..72c99130 100644 --- a/src/enc/webpenc.c +++ b/src/enc/webpenc.c @@ -125,6 +125,8 @@ static void MapConfigToTools(VP8Encoder* const enc) { enc->max_i4_header_bits_ = 256 * 16 * 16 * // upper bound: up to 16bit per 4x4 block (limit * limit) / (100 * 100); // ... modulated with a quadratic curve. + + enc->thread_level_ = enc->config_->thread_level; } // Memory scaling with dimensions: @@ -266,14 +268,16 @@ static VP8Encoder* InitVP8Encoder(const WebPConfig* const config, return enc; } -static void DeleteVP8Encoder(VP8Encoder* enc) { +static int DeleteVP8Encoder(VP8Encoder* enc) { + int ok = 1; if (enc != NULL) { - VP8EncDeleteAlpha(enc); + ok = VP8EncDeleteAlpha(enc); #ifdef WEBP_EXPERIMENTAL_FEATURES VP8EncDeleteLayer(enc); #endif free(enc); } + return ok; } //------------------------------------------------------------------------------ @@ -336,7 +340,7 @@ int WebPReportProgress(const WebPPicture* const pic, //------------------------------------------------------------------------------ int WebPEncode(const WebPConfig* config, WebPPicture* pic) { - int ok; + int ok = 0; if (pic == NULL) return 0; @@ -365,19 +369,21 @@ int WebPEncode(const WebPConfig* config, WebPPicture* pic) { enc = InitVP8Encoder(config, pic); if (enc == NULL) return 0; // pic->error is already set. // Note: each of the tasks below account for 20% in the progress report. - ok = VP8EncAnalyze(enc) - && VP8StatLoop(enc) - && VP8EncLoop(enc) - && VP8EncFinishAlpha(enc) + ok = VP8EncAnalyze(enc); + + // Analysis is done, proceed to actual coding. + ok = ok && VP8EncStartAlpha(enc); // possibly done in parallel + ok = ok && VP8StatLoop(enc) && VP8EncLoop(enc); + ok = ok && VP8EncFinishAlpha(enc); #ifdef WEBP_EXPERIMENTAL_FEATURES - && VP8EncFinishLayer(enc) + ok = ok && VP8EncFinishLayer(enc); #endif - && VP8EncWrite(enc); + ok = ok && VP8EncWrite(enc); StoreStats(enc); if (!ok) { VP8EncFreeBitWriters(enc); } - DeleteVP8Encoder(enc); + ok &= DeleteVP8Encoder(enc); // must always be called, even if !ok } else { if (pic->argb == NULL) return WebPEncodingSetError(pic, VP8_ENC_ERROR_NULL_PARAMETER); diff --git a/src/webp/encode.h b/src/webp/encode.h index 48ed19fe..e1c3bebb 100644 --- a/src/webp/encode.h +++ b/src/webp/encode.h @@ -125,8 +125,9 @@ struct WebPConfig { // to better match the expected output size from // JPEG compression. Generally, the output size will // be similar but the degradation will be lower. + int thread_level; // If non-zero, try and use multi-threaded encoding. - uint32_t pad[7]; // padding for later use + uint32_t pad[6]; // padding for later use }; // Enumerate some predefined settings for WebPConfig, depending on the type