mirror of
https://github.com/webmproject/libwebp.git
synced 2024-12-27 22:28:22 +01:00
Merge changes I55f8da52,Id73a1e96
* changes: cosmetics: add some missing != NULL comparisons factorize BPS definition in dsp.h and add VP8Copy16x8
This commit is contained in:
commit
441f273f19
@ -69,7 +69,7 @@ enum { MB_FEATURE_TREE_PROBS = 3,
|
|||||||
NUM_PROBAS = 11,
|
NUM_PROBAS = 11,
|
||||||
NUM_MV_PROBAS = 19 };
|
NUM_MV_PROBAS = 19 };
|
||||||
|
|
||||||
// YUV-cache parameters.
|
// YUV-cache parameters. Cache is 32-bytes wide (= one cacheline).
|
||||||
// Constraints are: We need to store one 16x16 block of luma samples (y),
|
// Constraints are: We need to store one 16x16 block of luma samples (y),
|
||||||
// and two 8x8 chroma blocks (u/v). These are better be 16-bytes aligned,
|
// and two 8x8 chroma blocks (u/v). These are better be 16-bytes aligned,
|
||||||
// in order to be SIMD-friendly. We also need to store the top, left and
|
// in order to be SIMD-friendly. We also need to store the top, left and
|
||||||
@ -91,8 +91,6 @@ enum { MB_FEATURE_TREE_PROBS = 3,
|
|||||||
// 'y' = y-samples 'u' = u-samples 'v' = u-samples
|
// 'y' = y-samples 'u' = u-samples 'v' = u-samples
|
||||||
// '|' = left sample, '-' = top sample, '+' = top-left sample
|
// '|' = left sample, '-' = top sample, '+' = top-left sample
|
||||||
// 't' = extra top-right sample for 4x4 modes
|
// 't' = extra top-right sample for 4x4 modes
|
||||||
// With this layout, BPS (=Bytes Per Scan-line) is one cacheline size.
|
|
||||||
#define BPS 32 // this is the common stride used by yuv[]
|
|
||||||
#define YUV_SIZE (BPS * 17 + BPS * 9)
|
#define YUV_SIZE (BPS * 17 + BPS * 9)
|
||||||
#define Y_SIZE (BPS * 17)
|
#define Y_SIZE (BPS * 17)
|
||||||
#define Y_OFF (BPS * 1 + 8)
|
#define Y_OFF (BPS * 1 + 8)
|
||||||
|
@ -24,6 +24,8 @@
|
|||||||
extern "C" {
|
extern "C" {
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#define BPS 32 // this is the common stride for enc/dec
|
||||||
|
|
||||||
//------------------------------------------------------------------------------
|
//------------------------------------------------------------------------------
|
||||||
// CPU detection
|
// CPU detection
|
||||||
|
|
||||||
@ -132,6 +134,7 @@ extern VP8WMetric VP8TDisto4x4, VP8TDisto16x16;
|
|||||||
|
|
||||||
typedef void (*VP8BlockCopy)(const uint8_t* src, uint8_t* dst);
|
typedef void (*VP8BlockCopy)(const uint8_t* src, uint8_t* dst);
|
||||||
extern VP8BlockCopy VP8Copy4x4;
|
extern VP8BlockCopy VP8Copy4x4;
|
||||||
|
extern VP8BlockCopy VP8Copy16x8;
|
||||||
// Quantization
|
// Quantization
|
||||||
struct VP8Matrix; // forward declaration
|
struct VP8Matrix; // forward declaration
|
||||||
typedef int (*VP8QuantizeBlock)(int16_t in[16], int16_t out[16],
|
typedef int (*VP8QuantizeBlock)(int16_t in[16], int16_t out[16],
|
||||||
|
@ -207,7 +207,7 @@ static WEBP_INLINE void Fill(uint8_t* dst, int value, int size) {
|
|||||||
static WEBP_INLINE void VerticalPred(uint8_t* dst,
|
static WEBP_INLINE void VerticalPred(uint8_t* dst,
|
||||||
const uint8_t* top, int size) {
|
const uint8_t* top, int size) {
|
||||||
int j;
|
int j;
|
||||||
if (top) {
|
if (top != NULL) {
|
||||||
for (j = 0; j < size; ++j) memcpy(dst + j * BPS, top, size);
|
for (j = 0; j < size; ++j) memcpy(dst + j * BPS, top, size);
|
||||||
} else {
|
} else {
|
||||||
Fill(dst, 127, size);
|
Fill(dst, 127, size);
|
||||||
@ -216,7 +216,7 @@ static WEBP_INLINE void VerticalPred(uint8_t* dst,
|
|||||||
|
|
||||||
static WEBP_INLINE void HorizontalPred(uint8_t* dst,
|
static WEBP_INLINE void HorizontalPred(uint8_t* dst,
|
||||||
const uint8_t* left, int size) {
|
const uint8_t* left, int size) {
|
||||||
if (left) {
|
if (left != NULL) {
|
||||||
int j;
|
int j;
|
||||||
for (j = 0; j < size; ++j) {
|
for (j = 0; j < size; ++j) {
|
||||||
memset(dst + j * BPS, left[j], size);
|
memset(dst + j * BPS, left[j], size);
|
||||||
@ -229,8 +229,8 @@ static WEBP_INLINE void HorizontalPred(uint8_t* dst,
|
|||||||
static WEBP_INLINE void TrueMotion(uint8_t* dst, const uint8_t* left,
|
static WEBP_INLINE void TrueMotion(uint8_t* dst, const uint8_t* left,
|
||||||
const uint8_t* top, int size) {
|
const uint8_t* top, int size) {
|
||||||
int y;
|
int y;
|
||||||
if (left) {
|
if (left != NULL) {
|
||||||
if (top) {
|
if (top != NULL) {
|
||||||
const uint8_t* const clip = clip1 + 255 - left[-1];
|
const uint8_t* const clip = clip1 + 255 - left[-1];
|
||||||
for (y = 0; y < size; ++y) {
|
for (y = 0; y < size; ++y) {
|
||||||
const uint8_t* const clip_table = clip + left[y];
|
const uint8_t* const clip_table = clip + left[y];
|
||||||
@ -248,7 +248,7 @@ static WEBP_INLINE void TrueMotion(uint8_t* dst, const uint8_t* left,
|
|||||||
// is equivalent to VE prediction where you just copy the top samples.
|
// is equivalent to VE prediction where you just copy the top samples.
|
||||||
// Note that if top samples are not available, the default value is
|
// Note that if top samples are not available, the default value is
|
||||||
// then 129, and not 127 as in the VerticalPred case.
|
// then 129, and not 127 as in the VerticalPred case.
|
||||||
if (top) {
|
if (top != NULL) {
|
||||||
VerticalPred(dst, top, size);
|
VerticalPred(dst, top, size);
|
||||||
} else {
|
} else {
|
||||||
Fill(dst, 129, size);
|
Fill(dst, 129, size);
|
||||||
@ -261,15 +261,15 @@ static WEBP_INLINE void DCMode(uint8_t* dst, const uint8_t* left,
|
|||||||
int size, int round, int shift) {
|
int size, int round, int shift) {
|
||||||
int DC = 0;
|
int DC = 0;
|
||||||
int j;
|
int j;
|
||||||
if (top) {
|
if (top != NULL) {
|
||||||
for (j = 0; j < size; ++j) DC += top[j];
|
for (j = 0; j < size; ++j) DC += top[j];
|
||||||
if (left) { // top and left present
|
if (left != NULL) { // top and left present
|
||||||
for (j = 0; j < size; ++j) DC += left[j];
|
for (j = 0; j < size; ++j) DC += left[j];
|
||||||
} else { // top, but no left
|
} else { // top, but no left
|
||||||
DC += DC;
|
DC += DC;
|
||||||
}
|
}
|
||||||
DC = (DC + round) >> shift;
|
DC = (DC + round) >> shift;
|
||||||
} else if (left) { // left but no top
|
} else if (left != NULL) { // left but no top
|
||||||
for (j = 0; j < size; ++j) DC += left[j];
|
for (j = 0; j < size; ++j) DC += left[j];
|
||||||
DC += DC;
|
DC += DC;
|
||||||
DC = (DC + round) >> shift;
|
DC = (DC + round) >> shift;
|
||||||
@ -291,8 +291,8 @@ static void IntraChromaPreds(uint8_t* dst, const uint8_t* left,
|
|||||||
TrueMotion(C8TM8 + dst, left, top, 8);
|
TrueMotion(C8TM8 + dst, left, top, 8);
|
||||||
// V block
|
// V block
|
||||||
dst += 8;
|
dst += 8;
|
||||||
if (top) top += 8;
|
if (top != NULL) top += 8;
|
||||||
if (left) left += 16;
|
if (left != NULL) left += 16;
|
||||||
DCMode(C8DC8 + dst, left, top, 8, 8, 4);
|
DCMode(C8DC8 + dst, left, top, 8, 8, 4);
|
||||||
VerticalPred(C8VE8 + dst, top, 8);
|
VerticalPred(C8VE8 + dst, top, 8);
|
||||||
HorizontalPred(C8HE8 + dst, left, 8);
|
HorizontalPred(C8HE8 + dst, left, 8);
|
||||||
@ -662,16 +662,22 @@ static int QuantizeBlockWHT(int16_t in[16], int16_t out[16],
|
|||||||
//------------------------------------------------------------------------------
|
//------------------------------------------------------------------------------
|
||||||
// Block copy
|
// Block copy
|
||||||
|
|
||||||
static WEBP_INLINE void Copy(const uint8_t* src, uint8_t* dst, int size) {
|
static WEBP_INLINE void Copy(const uint8_t* src, uint8_t* dst, int w, int h) {
|
||||||
int y;
|
int y;
|
||||||
for (y = 0; y < size; ++y) {
|
for (y = 0; y < h; ++y) {
|
||||||
memcpy(dst, src, size);
|
memcpy(dst, src, w);
|
||||||
src += BPS;
|
src += BPS;
|
||||||
dst += BPS;
|
dst += BPS;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static void Copy4x4(const uint8_t* src, uint8_t* dst) { Copy(src, dst, 4); }
|
static void Copy4x4(const uint8_t* src, uint8_t* dst) {
|
||||||
|
Copy(src, dst, 4, 4);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void Copy16x8(const uint8_t* src, uint8_t* dst) {
|
||||||
|
Copy(src, dst, 16, 8);
|
||||||
|
}
|
||||||
|
|
||||||
//------------------------------------------------------------------------------
|
//------------------------------------------------------------------------------
|
||||||
// Initialization
|
// Initialization
|
||||||
@ -695,6 +701,7 @@ VP8QuantizeBlock VP8EncQuantizeBlock;
|
|||||||
VP8Quantize2Blocks VP8EncQuantize2Blocks;
|
VP8Quantize2Blocks VP8EncQuantize2Blocks;
|
||||||
VP8QuantizeBlockWHT VP8EncQuantizeBlockWHT;
|
VP8QuantizeBlockWHT VP8EncQuantizeBlockWHT;
|
||||||
VP8BlockCopy VP8Copy4x4;
|
VP8BlockCopy VP8Copy4x4;
|
||||||
|
VP8BlockCopy VP8Copy16x8;
|
||||||
|
|
||||||
extern void VP8EncDspInitSSE2(void);
|
extern void VP8EncDspInitSSE2(void);
|
||||||
extern void VP8EncDspInitAVX2(void);
|
extern void VP8EncDspInitAVX2(void);
|
||||||
@ -724,6 +731,7 @@ WEBP_TSAN_IGNORE_FUNCTION void VP8EncDspInit(void) {
|
|||||||
VP8EncQuantize2Blocks = Quantize2Blocks;
|
VP8EncQuantize2Blocks = Quantize2Blocks;
|
||||||
VP8EncQuantizeBlockWHT = QuantizeBlockWHT;
|
VP8EncQuantizeBlockWHT = QuantizeBlockWHT;
|
||||||
VP8Copy4x4 = Copy4x4;
|
VP8Copy4x4 = Copy4x4;
|
||||||
|
VP8Copy16x8 = Copy16x8;
|
||||||
|
|
||||||
// If defined, use CPUInfo() to overwrite some pointers with faster versions.
|
// If defined, use CPUInfo() to overwrite some pointers with faster versions.
|
||||||
if (VP8GetCPUInfo != NULL) {
|
if (VP8GetCPUInfo != NULL) {
|
||||||
|
@ -1067,10 +1067,7 @@ static void PickBestUV(VP8EncIterator* const it, VP8ModeScore* const rd) {
|
|||||||
VP8SetIntraUVMode(it, rd->mode_uv);
|
VP8SetIntraUVMode(it, rd->mode_uv);
|
||||||
AddScore(rd, &rd_best);
|
AddScore(rd, &rd_best);
|
||||||
if (dst != dst0) { // copy 16x8 block if needed
|
if (dst != dst0) { // copy 16x8 block if needed
|
||||||
int i;
|
VP8Copy16x8(dst0, dst);
|
||||||
for (i = 0; i < 8; ++i) {
|
|
||||||
memcpy(dst0 + i * BPS, dst + i * BPS, 2 * 8 * sizeof(*dst0));
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -69,7 +69,7 @@ typedef enum { // Rate-distortion optimization levels
|
|||||||
RD_OPT_TRELLIS_ALL = 3 // trellis-quant for every scoring (much slower)
|
RD_OPT_TRELLIS_ALL = 3 // trellis-quant for every scoring (much slower)
|
||||||
} VP8RDLevel;
|
} VP8RDLevel;
|
||||||
|
|
||||||
// YUV-cache parameters. Cache is 32-pixels wide.
|
// YUV-cache parameters. Cache is 32-bytes wide (= one cacheline).
|
||||||
// The original or reconstructed samples can be accessed using VP8Scan[].
|
// The original or reconstructed samples can be accessed using VP8Scan[].
|
||||||
// The predicted blocks can be accessed using offsets to yuv_p_ and
|
// The predicted blocks can be accessed using offsets to yuv_p_ and
|
||||||
// the arrays VP8*ModeOffsets[].
|
// the arrays VP8*ModeOffsets[].
|
||||||
@ -91,8 +91,7 @@ typedef enum { // Rate-distortion optimization levels
|
|||||||
// Intra 4x4 predictions (4x4 block each)
|
// Intra 4x4 predictions (4x4 block each)
|
||||||
// |I4DC4 I4TM4 I4VE4 I4HE4|I4RD4 I4VR4 I4LD4 I4VL4|
|
// |I4DC4 I4TM4 I4VE4 I4HE4|I4RD4 I4VR4 I4LD4 I4VL4|
|
||||||
// |I4HD4 I4HU4 I4TMP .....|.......................| <- ~31% wasted
|
// |I4HD4 I4HU4 I4TMP .....|.......................| <- ~31% wasted
|
||||||
#define BPS 32 // this is the common stride
|
#define YUV_SIZE (BPS * 16)
|
||||||
#define YUV_SIZE (BPS * 16) // 25% lost
|
|
||||||
#define PRED_SIZE (32 * BPS + 16 * BPS + 8 * BPS) // I16+Chroma+I4 preds
|
#define PRED_SIZE (32 * BPS + 16 * BPS + 8 * BPS) // I16+Chroma+I4 preds
|
||||||
#define Y_OFF (0)
|
#define Y_OFF (0)
|
||||||
#define U_OFF (16)
|
#define U_OFF (16)
|
||||||
|
Loading…
Reference in New Issue
Block a user