mirror of
https://github.com/webmproject/libwebp.git
synced 2025-02-13 07:22:52 +01:00
merge two ITransforms together when applicable and change the TTransform
to return the sum directly. output is bitwise the same, speed up 1-2%. This is preparatory to a more efficient SSE2 implementation. Change-Id: I0bcdf05808c93420fbe9dcb75e5e7e55a4ae5b89
This commit is contained in:
parent
ca554137d2
commit
e7ff3f9af6
@ -49,7 +49,8 @@ static const int kC1 = 20091 + (1 << 16);
|
|||||||
static const int kC2 = 35468;
|
static const int kC2 = 35468;
|
||||||
#define MUL(a, b) (((a) * (b)) >> 16)
|
#define MUL(a, b) (((a) * (b)) >> 16)
|
||||||
|
|
||||||
static void ITransform(const uint8_t* ref, const int16_t* in, uint8_t* dst) {
|
static inline void ITransformOne(const uint8_t* ref, const int16_t* in,
|
||||||
|
uint8_t* dst) {
|
||||||
int C[4 * 4], *tmp;
|
int C[4 * 4], *tmp;
|
||||||
int i;
|
int i;
|
||||||
tmp = C;
|
tmp = C;
|
||||||
@ -80,6 +81,13 @@ static void ITransform(const uint8_t* ref, const int16_t* in, uint8_t* dst) {
|
|||||||
tmp++;
|
tmp++;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
static void ITransform(const uint8_t* ref, const int16_t* in, uint8_t* dst,
|
||||||
|
int do_two) {
|
||||||
|
ITransformOne(ref, in, dst);
|
||||||
|
if (do_two) {
|
||||||
|
ITransformOne(ref + 4, in + 16, dst + 4);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
static void FTransform(const uint8_t* src, const uint8_t* ref, int16_t* out) {
|
static void FTransform(const uint8_t* src, const uint8_t* ref, int16_t* out) {
|
||||||
int i;
|
int i;
|
||||||
@ -526,9 +534,12 @@ VP8Metric VP8SSE4x4 = SSE4x4;
|
|||||||
// reconstructed samples.
|
// reconstructed samples.
|
||||||
|
|
||||||
// Hadamard transform
|
// Hadamard transform
|
||||||
static void TTransform(const uint8_t* in, int16_t* out) {
|
// Returns the weighted sum of the absolute value of transformed coefficients.
|
||||||
|
static int TTransform(const uint8_t* in, const uint16_t* w) {
|
||||||
|
int sum = 0;
|
||||||
int tmp[16];
|
int tmp[16];
|
||||||
int i;
|
int i;
|
||||||
|
// horizontal pass
|
||||||
for (i = 0; i < 4; ++i, in += BPS) {
|
for (i = 0; i < 4; ++i, in += BPS) {
|
||||||
const int a0 = (in[0] + in[2]) << 2;
|
const int a0 = (in[0] + in[2]) << 2;
|
||||||
const int a1 = (in[1] + in[3]) << 2;
|
const int a1 = (in[1] + in[3]) << 2;
|
||||||
@ -539,7 +550,8 @@ static void TTransform(const uint8_t* in, int16_t* out) {
|
|||||||
tmp[2 + i * 4] = a3 - a2;
|
tmp[2 + i * 4] = a3 - a2;
|
||||||
tmp[3 + i * 4] = a0 - a1;
|
tmp[3 + i * 4] = a0 - a1;
|
||||||
}
|
}
|
||||||
for (i = 0; i < 4; ++i) {
|
// vertical pass
|
||||||
|
for (i = 0; i < 4; ++i, ++w) {
|
||||||
const int a0 = (tmp[0 + i] + tmp[8 + i]);
|
const int a0 = (tmp[0 + i] + tmp[8 + i]);
|
||||||
const int a1 = (tmp[4 + i] + tmp[12+ i]);
|
const int a1 = (tmp[4 + i] + tmp[12+ i]);
|
||||||
const int a2 = (tmp[4 + i] - tmp[12+ i]);
|
const int a2 = (tmp[4 + i] - tmp[12+ i]);
|
||||||
@ -548,24 +560,20 @@ static void TTransform(const uint8_t* in, int16_t* out) {
|
|||||||
const int b1 = a3 + a2;
|
const int b1 = a3 + a2;
|
||||||
const int b2 = a3 - a2;
|
const int b2 = a3 - a2;
|
||||||
const int b3 = a0 - a1;
|
const int b3 = a0 - a1;
|
||||||
out[ 0 + i] = (b0 + (b0 < 0) + 3) >> 3;
|
// abs((b + (b<0) + 3) >> 3) = (abs(b) + 3) >> 3
|
||||||
out[ 4 + i] = (b1 + (b1 < 0) + 3) >> 3;
|
sum += w[ 0] * ((abs(b0) + 3) >> 3);
|
||||||
out[ 8 + i] = (b2 + (b2 < 0) + 3) >> 3;
|
sum += w[ 4] * ((abs(b1) + 3) >> 3);
|
||||||
out[12 + i] = (b3 + (b3 < 0) + 3) >> 3;
|
sum += w[ 8] * ((abs(b2) + 3) >> 3);
|
||||||
|
sum += w[12] * ((abs(b3) + 3) >> 3);
|
||||||
}
|
}
|
||||||
|
return sum;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int Disto4x4(const uint8_t* const a, const uint8_t* const b,
|
static int Disto4x4(const uint8_t* const a, const uint8_t* const b,
|
||||||
const uint16_t* const w) {
|
const uint16_t* const w) {
|
||||||
int16_t tmp1[16], tmp2[16];
|
const int sum1 = TTransform(a, w);
|
||||||
int k;
|
const int sum2 = TTransform(b, w);
|
||||||
int D;
|
return (abs(sum2 - sum1) + 8) >> 4;
|
||||||
TTransform(a, tmp1);
|
|
||||||
TTransform(b, tmp2);
|
|
||||||
D = 0;
|
|
||||||
for (k = 0; k < 16; ++k)
|
|
||||||
D += w[k] * (abs(tmp2[k]) - abs(tmp1[k]));
|
|
||||||
return (abs(D) + 8) >> 4;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static int Disto16x16(const uint8_t* const a, const uint8_t* const b,
|
static int Disto16x16(const uint8_t* const a, const uint8_t* const b,
|
||||||
|
@ -615,8 +615,8 @@ static int ReconstructIntra16(VP8EncIterator* const it,
|
|||||||
|
|
||||||
// Transform back
|
// Transform back
|
||||||
VP8ITransformWHT(dc_tmp, tmp[0]);
|
VP8ITransformWHT(dc_tmp, tmp[0]);
|
||||||
for (n = 0; n < 16; ++n) {
|
for (n = 0; n < 16; n += 2) {
|
||||||
VP8ITransform(ref + VP8Scan[n], tmp[n], yuv_out + VP8Scan[n]);
|
VP8ITransform(ref + VP8Scan[n], tmp[n], yuv_out + VP8Scan[n], 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
return nz;
|
return nz;
|
||||||
@ -642,7 +642,7 @@ static int ReconstructIntra4(VP8EncIterator* const it,
|
|||||||
} else {
|
} else {
|
||||||
nz = VP8EncQuantizeBlock(tmp, levels, 0, &dqm->y1_);
|
nz = VP8EncQuantizeBlock(tmp, levels, 0, &dqm->y1_);
|
||||||
}
|
}
|
||||||
VP8ITransform(ref, tmp, yuv_out);
|
VP8ITransform(ref, tmp, yuv_out, 0);
|
||||||
return nz;
|
return nz;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -666,8 +666,8 @@ static int ReconstructUV(VP8EncIterator* const it, VP8ModeScore* const rd,
|
|||||||
for (x = 0; x < 2; ++x, ++n) {
|
for (x = 0; x < 2; ++x, ++n) {
|
||||||
const int ctx = it->top_nz_[4 + ch + x] + it->left_nz_[4 + ch + y];
|
const int ctx = it->top_nz_[4 + ch + x] + it->left_nz_[4 + ch + y];
|
||||||
const int non_zero =
|
const int non_zero =
|
||||||
TrellisQuantizeBlock(it, tmp[n], rd->uv_levels[n], ctx, 2, &dqm->uv_,
|
TrellisQuantizeBlock(it, tmp[n], rd->uv_levels[n], ctx, 2,
|
||||||
dqm->lambda_trellis_uv_);
|
&dqm->uv_, dqm->lambda_trellis_uv_);
|
||||||
it->top_nz_[4 + ch + x] = it->left_nz_[4 + ch + y] = non_zero;
|
it->top_nz_[4 + ch + x] = it->left_nz_[4 + ch + y] = non_zero;
|
||||||
nz |= non_zero << n;
|
nz |= non_zero << n;
|
||||||
}
|
}
|
||||||
@ -679,8 +679,8 @@ static int ReconstructUV(VP8EncIterator* const it, VP8ModeScore* const rd,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
for (n = 0; n < 8; ++n) {
|
for (n = 0; n < 8; n += 2) {
|
||||||
VP8ITransform(ref + VP8Scan[16 + n], tmp[n], yuv_out + VP8Scan[16 + n]);
|
VP8ITransform(ref + VP8Scan[16 + n], tmp[n], yuv_out + VP8Scan[16 + n], 1);
|
||||||
}
|
}
|
||||||
return (nz << 16);
|
return (nz << 16);
|
||||||
}
|
}
|
||||||
|
@ -416,7 +416,10 @@ int VP8Decimate(VP8EncIterator* const it, VP8ModeScore* const rd, int rd_opt);
|
|||||||
|
|
||||||
// in dsp.c
|
// in dsp.c
|
||||||
// Transforms
|
// Transforms
|
||||||
typedef void (*VP8Idct)(const uint8_t* ref, const int16_t* in, uint8_t* dst);
|
// VP8Idct: Does one of two inverse transforms. If do_two is set, the transforms
|
||||||
|
// will be done for (ref, in, dst) and (ref + 4, in + 16, dst + 4).
|
||||||
|
typedef void (*VP8Idct)(const uint8_t* ref, const int16_t* in, uint8_t* dst,
|
||||||
|
int do_two);
|
||||||
typedef void (*VP8Fdct)(const uint8_t* src, const uint8_t* ref, int16_t* out);
|
typedef void (*VP8Fdct)(const uint8_t* src, const uint8_t* ref, int16_t* out);
|
||||||
typedef void (*VP8WHT)(const int16_t* in, int16_t* out);
|
typedef void (*VP8WHT)(const int16_t* in, int16_t* out);
|
||||||
extern VP8Idct VP8ITransform;
|
extern VP8Idct VP8ITransform;
|
||||||
|
Loading…
x
Reference in New Issue
Block a user