SSE2 for lossless decoding (critical) functions.

This speeds up WebP lossless decoding by 20%. In particular, the
photographic images get 35% speedup.

Change-Id: Idb94750342a140ec05df52c07e12be4bba335adc
This commit is contained in:
Vikas Arora
2013-06-27 11:41:21 -07:00
parent 699d80ea6b
commit 8967b9f37e
4 changed files with 172 additions and 100 deletions

View File

@ -22,6 +22,26 @@
extern "C" {
#endif
//------------------------------------------------------------------------------
//
typedef uint32_t (*VP8LPredClampedAddSubFunc)(uint32_t c0, uint32_t c1,
uint32_t c2);
typedef uint32_t (*VP8LPredSelectFunc)(uint32_t c0, uint32_t c1, uint32_t c2);
typedef void (*VP8LSubtractGreenFromBlueAndRedFunc)(uint32_t* argb_data,
int num_pixs);
typedef void (*VP8LAddGreenToBlueAndRedFunc)(uint32_t* data_start,
const uint32_t* data_end);
extern VP8LPredClampedAddSubFunc VP8LClampedAddSubtractFull;
extern VP8LPredClampedAddSubFunc VP8LClampedAddSubtractHalf;
extern VP8LPredSelectFunc VP8LSelect;
extern VP8LSubtractGreenFromBlueAndRedFunc VP8LSubtractGreenFromBlueAndRed;
extern VP8LAddGreenToBlueAndRedFunc VP8LAddGreenToBlueAndRed;
// Must be called before calling any of the above methods.
void VP8LDspInit(void);
//------------------------------------------------------------------------------
// Image transforms.
@ -42,9 +62,6 @@ void VP8LColorIndexInverseTransformAlpha(
const struct VP8LTransform* const transform, int y_start, int y_end,
const uint8_t* src, uint8_t* dst);
// Subtracts green from blue and red channels.
void VP8LSubtractGreenFromBlueAndRed(uint32_t* argb_data, int num_pixs);
void VP8LResidualImage(int width, int height, int bits,
uint32_t* const argb, uint32_t* const argb_scratch,
uint32_t* const image);