mirror of
https://github.com/webmproject/libwebp.git
synced 2024-11-20 04:18:26 +01:00
MIPS: dspr2: added optimization for function GetResidualCost
set/get residual C functions moved to new file in src/dsp mips32 version of GetResidualCost moved to new file Change-Id: I7cebb7933a89820ff28c187249a9181f281081d2
This commit is contained in:
parent
be6635e91d
commit
a987faedfa
@ -38,6 +38,9 @@ LOCAL_SRC_FILES := \
|
|||||||
src/dsp/argb.c \
|
src/dsp/argb.c \
|
||||||
src/dsp/argb_mips_dsp_r2.c \
|
src/dsp/argb_mips_dsp_r2.c \
|
||||||
src/dsp/argb_sse2.c \
|
src/dsp/argb_sse2.c \
|
||||||
|
src/dsp/cost.c \
|
||||||
|
src/dsp/cost_mips32.c \
|
||||||
|
src/dsp/cost_mips_dsp_r2.c \
|
||||||
src/dsp/cpu.c \
|
src/dsp/cpu.c \
|
||||||
src/dsp/dec.c \
|
src/dsp/dec.c \
|
||||||
src/dsp/dec_clip_tables.c \
|
src/dsp/dec_clip_tables.c \
|
||||||
|
@ -219,6 +219,9 @@ DSP_ENC_OBJS = \
|
|||||||
$(DIROBJ)\dsp\argb.obj \
|
$(DIROBJ)\dsp\argb.obj \
|
||||||
$(DIROBJ)\dsp\argb_mips_dsp_r2.obj \
|
$(DIROBJ)\dsp\argb_mips_dsp_r2.obj \
|
||||||
$(DIROBJ)\dsp\argb_sse2.obj \
|
$(DIROBJ)\dsp\argb_sse2.obj \
|
||||||
|
$(DIROBJ)\dsp\cost.obj \
|
||||||
|
$(DIROBJ)\dsp\cost_mips32.obj \
|
||||||
|
$(DIROBJ)\dsp\cost_mips_dsp_r2.obj \
|
||||||
$(DIROBJ)\dsp\enc.obj \
|
$(DIROBJ)\dsp\enc.obj \
|
||||||
$(DIROBJ)\dsp\enc_avx2.obj \
|
$(DIROBJ)\dsp\enc_avx2.obj \
|
||||||
$(DIROBJ)\dsp\enc_mips32.obj \
|
$(DIROBJ)\dsp\enc_mips32.obj \
|
||||||
|
@ -143,6 +143,9 @@ DSP_ENC_OBJS = \
|
|||||||
src/dsp/argb.o \
|
src/dsp/argb.o \
|
||||||
src/dsp/argb_mips_dsp_r2.o \
|
src/dsp/argb_mips_dsp_r2.o \
|
||||||
src/dsp/argb_sse2.o \
|
src/dsp/argb_sse2.o \
|
||||||
|
src/dsp/cost.o \
|
||||||
|
src/dsp/cost_mips32.o \
|
||||||
|
src/dsp/cost_mips_dsp_r2.o \
|
||||||
src/dsp/enc.o \
|
src/dsp/enc.o \
|
||||||
src/dsp/enc_avx2.o \
|
src/dsp/enc_avx2.o \
|
||||||
src/dsp/enc_mips32.o \
|
src/dsp/enc_mips32.o \
|
||||||
|
@ -41,6 +41,9 @@ COMMON_SOURCES += yuv_mips_dsp_r2.c
|
|||||||
ENC_SOURCES =
|
ENC_SOURCES =
|
||||||
ENC_SOURCES += argb.c
|
ENC_SOURCES += argb.c
|
||||||
ENC_SOURCES += argb_mips_dsp_r2.c
|
ENC_SOURCES += argb_mips_dsp_r2.c
|
||||||
|
ENC_SOURCES += cost.c
|
||||||
|
ENC_SOURCES += cost_mips32.c
|
||||||
|
ENC_SOURCES += cost_mips_dsp_r2.c
|
||||||
ENC_SOURCES += enc.c
|
ENC_SOURCES += enc.c
|
||||||
ENC_SOURCES += enc_mips32.c
|
ENC_SOURCES += enc_mips32.c
|
||||||
ENC_SOURCES += enc_mips_dsp_r2.c
|
ENC_SOURCES += enc_mips_dsp_r2.c
|
||||||
|
105
src/dsp/cost.c
Normal file
105
src/dsp/cost.c
Normal file
@ -0,0 +1,105 @@
|
|||||||
|
// Copyright 2014 Google Inc. All Rights Reserved.
|
||||||
|
//
|
||||||
|
// Use of this source code is governed by a BSD-style license
|
||||||
|
// that can be found in the COPYING file in the root of the source
|
||||||
|
// tree. An additional intellectual property rights grant can be found
|
||||||
|
// in the file PATENTS. All contributing project authors may
|
||||||
|
// be found in the AUTHORS file in the root of the source tree.
|
||||||
|
// -----------------------------------------------------------------------------
|
||||||
|
//
|
||||||
|
// Author: Skal (pascal.massimino@gmail.com)
|
||||||
|
|
||||||
|
#include "./dsp.h"
|
||||||
|
#include "../enc/cost.h"
|
||||||
|
|
||||||
|
//------------------------------------------------------------------------------
|
||||||
|
// Mode costs
|
||||||
|
|
||||||
|
static int GetResidualCost(int ctx0, const VP8Residual* const res) {
|
||||||
|
int n = res->first;
|
||||||
|
// should be prob[VP8EncBands[n]], but it's equivalent for n=0 or 1
|
||||||
|
const int p0 = res->prob[n][ctx0][0];
|
||||||
|
const uint16_t* t = res->cost[n][ctx0];
|
||||||
|
// bit_cost(1, p0) is already incorporated in t[] tables, but only if ctx != 0
|
||||||
|
// (as required by the syntax). For ctx0 == 0, we need to add it here or it'll
|
||||||
|
// be missing during the loop.
|
||||||
|
int cost = (ctx0 == 0) ? VP8BitCost(1, p0) : 0;
|
||||||
|
|
||||||
|
if (res->last < 0) {
|
||||||
|
return VP8BitCost(0, p0);
|
||||||
|
}
|
||||||
|
for (; n < res->last; ++n) {
|
||||||
|
const int v = abs(res->coeffs[n]);
|
||||||
|
const int b = VP8EncBands[n + 1];
|
||||||
|
const int ctx = (v >= 2) ? 2 : v;
|
||||||
|
cost += VP8LevelCost(t, v);
|
||||||
|
t = res->cost[b][ctx];
|
||||||
|
}
|
||||||
|
// Last coefficient is always non-zero
|
||||||
|
{
|
||||||
|
const int v = abs(res->coeffs[n]);
|
||||||
|
assert(v != 0);
|
||||||
|
cost += VP8LevelCost(t, v);
|
||||||
|
if (n < 15) {
|
||||||
|
const int b = VP8EncBands[n + 1];
|
||||||
|
const int ctx = (v == 1) ? 1 : 2;
|
||||||
|
const int last_p0 = res->prob[b][ctx][0];
|
||||||
|
cost += VP8BitCost(0, last_p0);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return cost;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void SetResidualCoeffs(const int16_t* const coeffs,
|
||||||
|
VP8Residual* const res) {
|
||||||
|
int n;
|
||||||
|
res->last = -1;
|
||||||
|
assert(res->first == 0 || coeffs[0] == 0);
|
||||||
|
for (n = 15; n >= 0; --n) {
|
||||||
|
if (coeffs[n]) {
|
||||||
|
res->last = n;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
res->coeffs = coeffs;
|
||||||
|
}
|
||||||
|
|
||||||
|
//------------------------------------------------------------------------------
|
||||||
|
// init function
|
||||||
|
|
||||||
|
VP8GetResidualCostFunc VP8GetResidualCost;
|
||||||
|
VP8SetResidualCoeffsFunc VP8SetResidualCoeffs;
|
||||||
|
|
||||||
|
extern void VP8EncDspCostInitMIPS32(void);
|
||||||
|
extern void VP8EncDspCostInitMIPSdspR2(void);
|
||||||
|
|
||||||
|
#if defined(WEBP_USE_SSE2)
|
||||||
|
extern void VP8SetResidualCoeffsSSE2(const int16_t* const coeffs,
|
||||||
|
VP8Residual* const res);
|
||||||
|
#endif // WEBP_USE_SSE2
|
||||||
|
|
||||||
|
void VP8EncDspCostInit(void) {
|
||||||
|
VP8GetResidualCost = GetResidualCost;
|
||||||
|
VP8SetResidualCoeffs = SetResidualCoeffs;
|
||||||
|
|
||||||
|
// If defined, use CPUInfo() to overwrite some pointers with faster versions.
|
||||||
|
if (VP8GetCPUInfo != NULL) {
|
||||||
|
#if defined(WEBP_USE_MIPS32)
|
||||||
|
if (VP8GetCPUInfo(kMIPS32)) {
|
||||||
|
VP8EncDspCostInitMIPS32();
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
#if defined(WEBP_USE_MIPS_DSP_R2)
|
||||||
|
if (VP8GetCPUInfo(kMIPSdspR2)) {
|
||||||
|
VP8EncDspCostInitMIPSdspR2();
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
#if defined(WEBP_USE_SSE2)
|
||||||
|
if (VP8GetCPUInfo(kSSE2)) {
|
||||||
|
VP8SetResidualCoeffs = VP8SetResidualCoeffsSSE2;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
//------------------------------------------------------------------------------
|
141
src/dsp/cost_mips32.c
Normal file
141
src/dsp/cost_mips32.c
Normal file
@ -0,0 +1,141 @@
|
|||||||
|
// Copyright 2014 Google Inc. All Rights Reserved.
|
||||||
|
//
|
||||||
|
// Use of this source code is governed by a BSD-style license
|
||||||
|
// that can be found in the COPYING file in the root of the source
|
||||||
|
// tree. An additional intellectual property rights grant can be found
|
||||||
|
// in the file PATENTS. All contributing project authors may
|
||||||
|
// be found in the AUTHORS file in the root of the source tree.
|
||||||
|
// -----------------------------------------------------------------------------
|
||||||
|
//
|
||||||
|
// Author: Djordje Pesut (djordje.pesut@imgtec.com)
|
||||||
|
|
||||||
|
#include "./dsp.h"
|
||||||
|
|
||||||
|
#if defined(WEBP_USE_MIPS32)
|
||||||
|
|
||||||
|
#include "../enc/cost.h"
|
||||||
|
|
||||||
|
static int GetResidualCost(int ctx0, const VP8Residual* const res) {
|
||||||
|
int n = res->first;
|
||||||
|
// should be prob[VP8EncBands[n]], but it's equivalent for n=0 or 1
|
||||||
|
int p0 = res->prob[n][ctx0][0];
|
||||||
|
const uint16_t* t = res->cost[n][ctx0];
|
||||||
|
int cost;
|
||||||
|
const int const_2 = 2;
|
||||||
|
const int const_255 = 255;
|
||||||
|
const int const_max_level = MAX_VARIABLE_LEVEL;
|
||||||
|
int res_cost;
|
||||||
|
int res_prob;
|
||||||
|
int res_coeffs;
|
||||||
|
int res_last;
|
||||||
|
int v_reg;
|
||||||
|
int b_reg;
|
||||||
|
int ctx_reg;
|
||||||
|
int cost_add, temp_1, temp_2, temp_3;
|
||||||
|
|
||||||
|
if (res->last < 0) {
|
||||||
|
return VP8BitCost(0, p0);
|
||||||
|
}
|
||||||
|
|
||||||
|
cost = (ctx0 == 0) ? VP8BitCost(1, p0) : 0;
|
||||||
|
|
||||||
|
res_cost = (int)res->cost;
|
||||||
|
res_prob = (int)res->prob;
|
||||||
|
res_coeffs = (int)res->coeffs;
|
||||||
|
res_last = (int)res->last;
|
||||||
|
|
||||||
|
__asm__ volatile(
|
||||||
|
".set push \n\t"
|
||||||
|
".set noreorder \n\t"
|
||||||
|
|
||||||
|
"sll %[temp_1], %[n], 1 \n\t"
|
||||||
|
"addu %[res_coeffs], %[res_coeffs], %[temp_1] \n\t"
|
||||||
|
"slt %[temp_2], %[n], %[res_last] \n\t"
|
||||||
|
"bnez %[temp_2], 1f \n\t"
|
||||||
|
" li %[cost_add], 0 \n\t"
|
||||||
|
"b 2f \n\t"
|
||||||
|
" nop \n\t"
|
||||||
|
"1: \n\t"
|
||||||
|
"lh %[v_reg], 0(%[res_coeffs]) \n\t"
|
||||||
|
"addu %[b_reg], %[n], %[VP8EncBands] \n\t"
|
||||||
|
"move %[temp_1], %[const_max_level] \n\t"
|
||||||
|
"addu %[cost], %[cost], %[cost_add] \n\t"
|
||||||
|
"negu %[temp_2], %[v_reg] \n\t"
|
||||||
|
"slti %[temp_3], %[v_reg], 0 \n\t"
|
||||||
|
"movn %[v_reg], %[temp_2], %[temp_3] \n\t"
|
||||||
|
"lbu %[b_reg], 1(%[b_reg]) \n\t"
|
||||||
|
"li %[cost_add], 0 \n\t"
|
||||||
|
|
||||||
|
"sltiu %[temp_3], %[v_reg], 2 \n\t"
|
||||||
|
"move %[ctx_reg], %[v_reg] \n\t"
|
||||||
|
"movz %[ctx_reg], %[const_2], %[temp_3] \n\t"
|
||||||
|
// cost += VP8LevelCost(t, v);
|
||||||
|
"slt %[temp_3], %[v_reg], %[const_max_level] \n\t"
|
||||||
|
"movn %[temp_1], %[v_reg], %[temp_3] \n\t"
|
||||||
|
"sll %[temp_2], %[v_reg], 1 \n\t"
|
||||||
|
"addu %[temp_2], %[temp_2], %[VP8LevelFixedCosts] \n\t"
|
||||||
|
"lhu %[temp_2], 0(%[temp_2]) \n\t"
|
||||||
|
"sll %[temp_1], %[temp_1], 1 \n\t"
|
||||||
|
"addu %[temp_1], %[temp_1], %[t] \n\t"
|
||||||
|
"lhu %[temp_3], 0(%[temp_1]) \n\t"
|
||||||
|
"addu %[cost], %[cost], %[temp_2] \n\t"
|
||||||
|
|
||||||
|
// t = res->cost[b][ctx];
|
||||||
|
"sll %[temp_1], %[ctx_reg], 7 \n\t"
|
||||||
|
"sll %[temp_2], %[ctx_reg], 3 \n\t"
|
||||||
|
"addu %[cost], %[cost], %[temp_3] \n\t"
|
||||||
|
"addu %[temp_1], %[temp_1], %[temp_2] \n\t"
|
||||||
|
"sll %[temp_2], %[b_reg], 3 \n\t"
|
||||||
|
"sll %[temp_3], %[b_reg], 5 \n\t"
|
||||||
|
"sub %[temp_2], %[temp_3], %[temp_2] \n\t"
|
||||||
|
"sll %[temp_3], %[temp_2], 4 \n\t"
|
||||||
|
"addu %[temp_1], %[temp_1], %[temp_3] \n\t"
|
||||||
|
"addu %[temp_2], %[temp_2], %[res_cost] \n\t"
|
||||||
|
"addiu %[n], %[n], 1 \n\t"
|
||||||
|
"addu %[t], %[temp_1], %[temp_2] \n\t"
|
||||||
|
"slt %[temp_1], %[n], %[res_last] \n\t"
|
||||||
|
"bnez %[temp_1], 1b \n\t"
|
||||||
|
" addiu %[res_coeffs], %[res_coeffs], 2 \n\t"
|
||||||
|
"2: \n\t"
|
||||||
|
|
||||||
|
".set pop \n\t"
|
||||||
|
: [cost]"+r"(cost), [t]"+r"(t), [n]"+r"(n), [v_reg]"=&r"(v_reg),
|
||||||
|
[ctx_reg]"=&r"(ctx_reg), [b_reg]"=&r"(b_reg), [cost_add]"=&r"(cost_add),
|
||||||
|
[temp_1]"=&r"(temp_1), [temp_2]"=&r"(temp_2), [temp_3]"=&r"(temp_3)
|
||||||
|
: [const_2]"r"(const_2), [const_255]"r"(const_255), [res_last]"r"(res_last),
|
||||||
|
[VP8EntropyCost]"r"(VP8EntropyCost), [VP8EncBands]"r"(VP8EncBands),
|
||||||
|
[const_max_level]"r"(const_max_level), [res_prob]"r"(res_prob),
|
||||||
|
[VP8LevelFixedCosts]"r"(VP8LevelFixedCosts), [res_coeffs]"r"(res_coeffs),
|
||||||
|
[res_cost]"r"(res_cost)
|
||||||
|
: "memory"
|
||||||
|
);
|
||||||
|
|
||||||
|
// Last coefficient is always non-zero
|
||||||
|
{
|
||||||
|
const int v = abs(res->coeffs[n]);
|
||||||
|
assert(v != 0);
|
||||||
|
cost += VP8LevelCost(t, v);
|
||||||
|
if (n < 15) {
|
||||||
|
const int b = VP8EncBands[n + 1];
|
||||||
|
const int ctx = (v == 1) ? 1 : 2;
|
||||||
|
const int last_p0 = res->prob[b][ctx][0];
|
||||||
|
cost += VP8BitCost(0, last_p0);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return cost;
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif // WEBP_USE_MIPS32
|
||||||
|
|
||||||
|
//------------------------------------------------------------------------------
|
||||||
|
// Entry point
|
||||||
|
|
||||||
|
extern WEBP_TSAN_IGNORE_FUNCTION void VP8EncDspCostInitMIPS32(void);
|
||||||
|
|
||||||
|
void VP8EncDspCostInitMIPS32(void) {
|
||||||
|
#if defined(WEBP_USE_MIPS32)
|
||||||
|
VP8GetResidualCost = GetResidualCost;
|
||||||
|
#endif // WEBP_USE_MIPS32
|
||||||
|
}
|
||||||
|
|
||||||
|
//------------------------------------------------------------------------------
|
112
src/dsp/cost_mips_dsp_r2.c
Normal file
112
src/dsp/cost_mips_dsp_r2.c
Normal file
@ -0,0 +1,112 @@
|
|||||||
|
// Copyright 2014 Google Inc. All Rights Reserved.
|
||||||
|
//
|
||||||
|
// Use of this source code is governed by a BSD-style license
|
||||||
|
// that can be found in the COPYING file in the root of the source
|
||||||
|
// tree. An additional intellectual property rights grant can be found
|
||||||
|
// in the file PATENTS. All contributing project authors may
|
||||||
|
// be found in the AUTHORS file in the root of the source tree.
|
||||||
|
// -----------------------------------------------------------------------------
|
||||||
|
//
|
||||||
|
// Author: Djordje Pesut (djordje.pesut@imgtec.com)
|
||||||
|
|
||||||
|
#include "./dsp.h"
|
||||||
|
|
||||||
|
#if defined(WEBP_USE_MIPS_DSP_R2)
|
||||||
|
|
||||||
|
#include "../enc/cost.h"
|
||||||
|
|
||||||
|
static int GetResidualCost(int ctx0, const VP8Residual* const res) {
|
||||||
|
int temp0, temp1, temp2;
|
||||||
|
int v_reg, b_reg, ctx_reg;
|
||||||
|
int n = res->first;
|
||||||
|
// should be prob[VP8EncBands[n]], but it's equivalent for n=0 or 1
|
||||||
|
int p0 = res->prob[n][ctx0][0];
|
||||||
|
const uint16_t* t = res->cost[n][ctx0];
|
||||||
|
// bit_cost(1, p0) is already incorporated in t[] tables, but only if ctx != 0
|
||||||
|
// (as required by the syntax). For ctx0 == 0, we need to add it here or it'll
|
||||||
|
// be missing during the loop.
|
||||||
|
int cost = (ctx0 == 0) ? VP8BitCost(1, p0) : 0;
|
||||||
|
int res_cost = (int)res->cost;
|
||||||
|
int res_coeffs = (int)res->coeffs;
|
||||||
|
int res_last = (int)res->last;
|
||||||
|
const int const_max_level = MAX_VARIABLE_LEVEL;
|
||||||
|
const int const_2 = 2;
|
||||||
|
const int const_408 = 408;
|
||||||
|
int mult_136_408 = 136;
|
||||||
|
|
||||||
|
if (res->last < 0) {
|
||||||
|
return VP8BitCost(0, p0);
|
||||||
|
}
|
||||||
|
|
||||||
|
__asm__ volatile(
|
||||||
|
".set push \n\t"
|
||||||
|
".set noreorder \n\t"
|
||||||
|
"subu %[temp1], %[res_last], %[n] \n\t"
|
||||||
|
"blez %[temp1], 2f \n\t"
|
||||||
|
" ins %[mult_136_408], %[const_408], 16, 16 \n\t"
|
||||||
|
"1: \n\t"
|
||||||
|
"sll %[temp0], %[n], 1 \n\t"
|
||||||
|
"lhx %[v_reg], %[temp0](%[res_coeffs]) \n\t"
|
||||||
|
"addiu %[n], %[n], 1 \n\t"
|
||||||
|
"absq_s.w %[v_reg], %[v_reg] \n\t"
|
||||||
|
"lbux %[b_reg], %[n](%[VP8EncBands]) \n\t"
|
||||||
|
"sltiu %[temp2], %[v_reg], 2 \n\t"
|
||||||
|
"move %[ctx_reg], %[v_reg] \n\t"
|
||||||
|
"movz %[ctx_reg], %[const_2], %[temp2] \n\t"
|
||||||
|
"sll %[temp1], %[v_reg], 1 \n\t"
|
||||||
|
"lhx %[temp1], %[temp1](%[VP8LevelFixedCosts]) \n\t"
|
||||||
|
"slt %[temp2], %[v_reg], %[const_max_level] \n\t"
|
||||||
|
"ins %[ctx_reg], %[b_reg], 16, 16 \n\t"
|
||||||
|
"movz %[v_reg], %[const_max_level], %[temp2] \n\t"
|
||||||
|
"mul.ph %[temp0], %[ctx_reg], %[mult_136_408] \n\t"
|
||||||
|
"addu %[cost], %[cost], %[temp1] \n\t"
|
||||||
|
"sll %[v_reg], %[v_reg], 1 \n\t"
|
||||||
|
"lhx %[temp2], %[v_reg](%[t]) \n\t"
|
||||||
|
"ext %[temp1], %[temp0], 0, 16 \n\t"
|
||||||
|
"ext %[temp0], %[temp0], 16, 16 \n\t"
|
||||||
|
"addu %[cost], %[cost], %[temp2] \n\t"
|
||||||
|
"addu %[temp1], %[temp1], %[res_cost] \n\t"
|
||||||
|
"bne %[n], %[res_last], 1b \n\t"
|
||||||
|
" addu %[t], %[temp0], %[temp1] \n\t"
|
||||||
|
"2: \n\t"
|
||||||
|
".set pop \n\t"
|
||||||
|
: [cost]"+&r"(cost), [t]"+&r"(t), [n]"+&r"(n), [v_reg]"=&r"(v_reg),
|
||||||
|
[ctx_reg]"=&r"(ctx_reg), [b_reg]"=&r"(b_reg), [temp0]"=&r"(temp0),
|
||||||
|
[temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
|
||||||
|
[mult_136_408]"+&r"(mult_136_408)
|
||||||
|
: [const_2]"r"(const_2), [res_last]"r"(res_last),
|
||||||
|
[VP8EncBands]"r"(VP8EncBands), [const_max_level]"r"(const_max_level),
|
||||||
|
[VP8LevelFixedCosts]"r"(VP8LevelFixedCosts), [res_cost]"r"(res_cost),
|
||||||
|
[const_408]"r"(const_408), [res_coeffs]"r"(res_coeffs)
|
||||||
|
: "memory"
|
||||||
|
);
|
||||||
|
|
||||||
|
// Last coefficient is always non-zero
|
||||||
|
{
|
||||||
|
const int v = abs(res->coeffs[n]);
|
||||||
|
assert(v != 0);
|
||||||
|
cost += VP8LevelCost(t, v);
|
||||||
|
if (n < 15) {
|
||||||
|
const int b = VP8EncBands[n + 1];
|
||||||
|
const int ctx = (v == 1) ? 1 : 2;
|
||||||
|
const int last_p0 = res->prob[b][ctx][0];
|
||||||
|
cost += VP8BitCost(0, last_p0);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return cost;
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif // WEBP_USE_MIPS_DSP_R2
|
||||||
|
|
||||||
|
//------------------------------------------------------------------------------
|
||||||
|
// Entry point
|
||||||
|
|
||||||
|
extern WEBP_TSAN_IGNORE_FUNCTION void VP8EncDspCostInitMIPSdspR2(void);
|
||||||
|
|
||||||
|
void VP8EncDspCostInitMIPSdspR2(void) {
|
||||||
|
#if defined(WEBP_USE_MIPS_DSP_R2)
|
||||||
|
VP8GetResidualCost = GetResidualCost;
|
||||||
|
#endif // WEBP_USE_MIPS_DSP_R2
|
||||||
|
}
|
||||||
|
|
||||||
|
//------------------------------------------------------------------------------
|
@ -176,6 +176,21 @@ void VP8LSetHistogramData(const int distribution[MAX_COEFF_THRESH + 1],
|
|||||||
// must be called before using any of the above
|
// must be called before using any of the above
|
||||||
WEBP_TSAN_IGNORE_FUNCTION void VP8EncDspInit(void);
|
WEBP_TSAN_IGNORE_FUNCTION void VP8EncDspInit(void);
|
||||||
|
|
||||||
|
//------------------------------------------------------------------------------
|
||||||
|
// cost functions (encoding)
|
||||||
|
|
||||||
|
struct VP8Residual;
|
||||||
|
typedef void (*VP8SetResidualCoeffsFunc)(const int16_t* const coeffs,
|
||||||
|
struct VP8Residual* const res);
|
||||||
|
extern VP8SetResidualCoeffsFunc VP8SetResidualCoeffs;
|
||||||
|
|
||||||
|
// Cost calculation function.
|
||||||
|
typedef int (*VP8GetResidualCostFunc)(int ctx0,
|
||||||
|
const struct VP8Residual* const res);
|
||||||
|
extern VP8GetResidualCostFunc VP8GetResidualCost;
|
||||||
|
|
||||||
|
void VP8EncDspCostInit(void); // must be called first
|
||||||
|
|
||||||
//------------------------------------------------------------------------------
|
//------------------------------------------------------------------------------
|
||||||
// Decoding
|
// Decoding
|
||||||
|
|
||||||
|
@ -513,119 +513,6 @@ static void FTransform(const uint8_t* src, const uint8_t* ref, int16_t* out) {
|
|||||||
#undef VERTICAL_PASS
|
#undef VERTICAL_PASS
|
||||||
#undef HORIZONTAL_PASS
|
#undef HORIZONTAL_PASS
|
||||||
|
|
||||||
// Forward declaration.
|
|
||||||
extern int VP8GetResidualCostMIPS32(int ctx0, const VP8Residual* const res);
|
|
||||||
|
|
||||||
int VP8GetResidualCostMIPS32(int ctx0, const VP8Residual* const res) {
|
|
||||||
int n = res->first;
|
|
||||||
// should be prob[VP8EncBands[n]], but it's equivalent for n=0 or 1
|
|
||||||
int p0 = res->prob[n][ctx0][0];
|
|
||||||
const uint16_t* t = res->cost[n][ctx0];
|
|
||||||
int cost;
|
|
||||||
const int const_2 = 2;
|
|
||||||
const int const_255 = 255;
|
|
||||||
const int const_max_level = MAX_VARIABLE_LEVEL;
|
|
||||||
int res_cost;
|
|
||||||
int res_prob;
|
|
||||||
int res_coeffs;
|
|
||||||
int res_last;
|
|
||||||
int v_reg;
|
|
||||||
int b_reg;
|
|
||||||
int ctx_reg;
|
|
||||||
int cost_add, temp_1, temp_2, temp_3;
|
|
||||||
|
|
||||||
if (res->last < 0) {
|
|
||||||
return VP8BitCost(0, p0);
|
|
||||||
}
|
|
||||||
|
|
||||||
cost = (ctx0 == 0) ? VP8BitCost(1, p0) : 0;
|
|
||||||
|
|
||||||
res_cost = (int)res->cost;
|
|
||||||
res_prob = (int)res->prob;
|
|
||||||
res_coeffs = (int)res->coeffs;
|
|
||||||
res_last = (int)res->last;
|
|
||||||
|
|
||||||
__asm__ volatile(
|
|
||||||
".set push \n\t"
|
|
||||||
".set noreorder \n\t"
|
|
||||||
|
|
||||||
"sll %[temp_1], %[n], 1 \n\t"
|
|
||||||
"addu %[res_coeffs], %[res_coeffs], %[temp_1] \n\t"
|
|
||||||
"slt %[temp_2], %[n], %[res_last] \n\t"
|
|
||||||
"bnez %[temp_2], 1f \n\t"
|
|
||||||
" li %[cost_add], 0 \n\t"
|
|
||||||
"b 2f \n\t"
|
|
||||||
" nop \n\t"
|
|
||||||
"1: \n\t"
|
|
||||||
"lh %[v_reg], 0(%[res_coeffs]) \n\t"
|
|
||||||
"addu %[b_reg], %[n], %[VP8EncBands] \n\t"
|
|
||||||
"move %[temp_1], %[const_max_level] \n\t"
|
|
||||||
"addu %[cost], %[cost], %[cost_add] \n\t"
|
|
||||||
"negu %[temp_2], %[v_reg] \n\t"
|
|
||||||
"slti %[temp_3], %[v_reg], 0 \n\t"
|
|
||||||
"movn %[v_reg], %[temp_2], %[temp_3] \n\t"
|
|
||||||
"lbu %[b_reg], 1(%[b_reg]) \n\t"
|
|
||||||
"li %[cost_add], 0 \n\t"
|
|
||||||
|
|
||||||
"sltiu %[temp_3], %[v_reg], 2 \n\t"
|
|
||||||
"move %[ctx_reg], %[v_reg] \n\t"
|
|
||||||
"movz %[ctx_reg], %[const_2], %[temp_3] \n\t"
|
|
||||||
// cost += VP8LevelCost(t, v);
|
|
||||||
"slt %[temp_3], %[v_reg], %[const_max_level] \n\t"
|
|
||||||
"movn %[temp_1], %[v_reg], %[temp_3] \n\t"
|
|
||||||
"sll %[temp_2], %[v_reg], 1 \n\t"
|
|
||||||
"addu %[temp_2], %[temp_2], %[VP8LevelFixedCosts] \n\t"
|
|
||||||
"lhu %[temp_2], 0(%[temp_2]) \n\t"
|
|
||||||
"sll %[temp_1], %[temp_1], 1 \n\t"
|
|
||||||
"addu %[temp_1], %[temp_1], %[t] \n\t"
|
|
||||||
"lhu %[temp_3], 0(%[temp_1]) \n\t"
|
|
||||||
"addu %[cost], %[cost], %[temp_2] \n\t"
|
|
||||||
|
|
||||||
// t = res->cost[b][ctx];
|
|
||||||
"sll %[temp_1], %[ctx_reg], 7 \n\t"
|
|
||||||
"sll %[temp_2], %[ctx_reg], 3 \n\t"
|
|
||||||
"addu %[cost], %[cost], %[temp_3] \n\t"
|
|
||||||
"addu %[temp_1], %[temp_1], %[temp_2] \n\t"
|
|
||||||
"sll %[temp_2], %[b_reg], 3 \n\t"
|
|
||||||
"sll %[temp_3], %[b_reg], 5 \n\t"
|
|
||||||
"sub %[temp_2], %[temp_3], %[temp_2] \n\t"
|
|
||||||
"sll %[temp_3], %[temp_2], 4 \n\t"
|
|
||||||
"addu %[temp_1], %[temp_1], %[temp_3] \n\t"
|
|
||||||
"addu %[temp_2], %[temp_2], %[res_cost] \n\t"
|
|
||||||
"addiu %[n], %[n], 1 \n\t"
|
|
||||||
"addu %[t], %[temp_1], %[temp_2] \n\t"
|
|
||||||
"slt %[temp_1], %[n], %[res_last] \n\t"
|
|
||||||
"bnez %[temp_1], 1b \n\t"
|
|
||||||
" addiu %[res_coeffs], %[res_coeffs], 2 \n\t"
|
|
||||||
"2: \n\t"
|
|
||||||
|
|
||||||
".set pop \n\t"
|
|
||||||
: [cost]"+r"(cost), [t]"+r"(t), [n]"+r"(n), [v_reg]"=&r"(v_reg),
|
|
||||||
[ctx_reg]"=&r"(ctx_reg), [b_reg]"=&r"(b_reg), [cost_add]"=&r"(cost_add),
|
|
||||||
[temp_1]"=&r"(temp_1), [temp_2]"=&r"(temp_2), [temp_3]"=&r"(temp_3)
|
|
||||||
: [const_2]"r"(const_2), [const_255]"r"(const_255), [res_last]"r"(res_last),
|
|
||||||
[VP8EntropyCost]"r"(VP8EntropyCost), [VP8EncBands]"r"(VP8EncBands),
|
|
||||||
[const_max_level]"r"(const_max_level), [res_prob]"r"(res_prob),
|
|
||||||
[VP8LevelFixedCosts]"r"(VP8LevelFixedCosts), [res_coeffs]"r"(res_coeffs),
|
|
||||||
[res_cost]"r"(res_cost)
|
|
||||||
: "memory"
|
|
||||||
);
|
|
||||||
|
|
||||||
// Last coefficient is always non-zero
|
|
||||||
{
|
|
||||||
const int v = abs(res->coeffs[n]);
|
|
||||||
assert(v != 0);
|
|
||||||
cost += VP8LevelCost(t, v);
|
|
||||||
if (n < 15) {
|
|
||||||
const int b = VP8EncBands[n + 1];
|
|
||||||
const int ctx = (v == 1) ? 1 : 2;
|
|
||||||
const int last_p0 = res->prob[b][ctx][0];
|
|
||||||
cost += VP8BitCost(0, last_p0);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return cost;
|
|
||||||
}
|
|
||||||
|
|
||||||
#if !defined(WORK_AROUND_GCC)
|
#if !defined(WORK_AROUND_GCC)
|
||||||
|
|
||||||
#define GET_SSE_INNER(A, B, C, D) \
|
#define GET_SSE_INNER(A, B, C, D) \
|
||||||
|
@ -486,66 +486,6 @@ const uint16_t VP8FixedCostsI4[NUM_BMODES][NUM_BMODES][NUM_BMODES] = {
|
|||||||
{ 305, 1167, 1358, 899, 1587, 1587, 987, 1988, 1332, 501 } }
|
{ 305, 1167, 1358, 899, 1587, 1587, 987, 1988, 1332, 501 } }
|
||||||
};
|
};
|
||||||
|
|
||||||
//------------------------------------------------------------------------------
|
|
||||||
// Mode costs
|
|
||||||
|
|
||||||
static int GetResidualCost(int ctx0, const VP8Residual* const res) {
|
|
||||||
int n = res->first;
|
|
||||||
// should be prob[VP8EncBands[n]], but it's equivalent for n=0 or 1
|
|
||||||
const int p0 = res->prob[n][ctx0][0];
|
|
||||||
const uint16_t* t = res->cost[n][ctx0];
|
|
||||||
// bit_cost(1, p0) is already incorporated in t[] tables, but only if ctx != 0
|
|
||||||
// (as required by the syntax). For ctx0 == 0, we need to add it here or it'll
|
|
||||||
// be missing during the loop.
|
|
||||||
int cost = (ctx0 == 0) ? VP8BitCost(1, p0) : 0;
|
|
||||||
|
|
||||||
if (res->last < 0) {
|
|
||||||
return VP8BitCost(0, p0);
|
|
||||||
}
|
|
||||||
for (; n < res->last; ++n) {
|
|
||||||
const int v = abs(res->coeffs[n]);
|
|
||||||
const int b = VP8EncBands[n + 1];
|
|
||||||
const int ctx = (v >= 2) ? 2 : v;
|
|
||||||
cost += VP8LevelCost(t, v);
|
|
||||||
t = res->cost[b][ctx];
|
|
||||||
}
|
|
||||||
// Last coefficient is always non-zero
|
|
||||||
{
|
|
||||||
const int v = abs(res->coeffs[n]);
|
|
||||||
assert(v != 0);
|
|
||||||
cost += VP8LevelCost(t, v);
|
|
||||||
if (n < 15) {
|
|
||||||
const int b = VP8EncBands[n + 1];
|
|
||||||
const int ctx = (v == 1) ? 1 : 2;
|
|
||||||
const int last_p0 = res->prob[b][ctx][0];
|
|
||||||
cost += VP8BitCost(0, last_p0);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return cost;
|
|
||||||
}
|
|
||||||
|
|
||||||
//------------------------------------------------------------------------------
|
|
||||||
// init function
|
|
||||||
|
|
||||||
#if defined(WEBP_USE_MIPS32)
|
|
||||||
extern int VP8GetResidualCostMIPS32(int ctx0, const VP8Residual* const res);
|
|
||||||
#endif // WEBP_USE_MIPS32
|
|
||||||
|
|
||||||
// TODO(skal): this, and GetResidualCost(), should probably go somewhere
|
|
||||||
// under src/dsp/ at some point.
|
|
||||||
VP8GetResidualCostFunc VP8GetResidualCost;
|
|
||||||
|
|
||||||
void VP8GetResidualCostInit(void) {
|
|
||||||
VP8GetResidualCost = GetResidualCost;
|
|
||||||
if (VP8GetCPUInfo != NULL) {
|
|
||||||
#if defined(WEBP_USE_MIPS32)
|
|
||||||
if (VP8GetCPUInfo(kMIPS32)) {
|
|
||||||
VP8GetResidualCost = VP8GetResidualCostMIPS32;
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
//------------------------------------------------------------------------------
|
//------------------------------------------------------------------------------
|
||||||
// helper functions for residuals struct VP8Residual.
|
// helper functions for residuals struct VP8Residual.
|
||||||
|
|
||||||
@ -558,41 +498,6 @@ void VP8InitResidual(int first, int coeff_type,
|
|||||||
res->first = first;
|
res->first = first;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void SetResidualCoeffs(const int16_t* const coeffs,
|
|
||||||
VP8Residual* const res) {
|
|
||||||
int n;
|
|
||||||
res->last = -1;
|
|
||||||
assert(res->first == 0 || coeffs[0] == 0);
|
|
||||||
for (n = 15; n >= 0; --n) {
|
|
||||||
if (coeffs[n]) {
|
|
||||||
res->last = n;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
res->coeffs = coeffs;
|
|
||||||
}
|
|
||||||
|
|
||||||
//------------------------------------------------------------------------------
|
|
||||||
// init function
|
|
||||||
|
|
||||||
#if defined(WEBP_USE_SSE2)
|
|
||||||
extern void VP8SetResidualCoeffsSSE2(const int16_t* const coeffs,
|
|
||||||
VP8Residual* const res);
|
|
||||||
#endif // WEBP_USE_SSE2
|
|
||||||
|
|
||||||
VP8SetResidualCoeffsFunc VP8SetResidualCoeffs;
|
|
||||||
|
|
||||||
void VP8SetResidualCoeffsInit(void) {
|
|
||||||
VP8SetResidualCoeffs = SetResidualCoeffs;
|
|
||||||
if (VP8GetCPUInfo != NULL) {
|
|
||||||
#if defined(WEBP_USE_SSE2)
|
|
||||||
if (VP8GetCPUInfo(kSSE2)) {
|
|
||||||
VP8SetResidualCoeffs = VP8SetResidualCoeffsSSE2;
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
//------------------------------------------------------------------------------
|
//------------------------------------------------------------------------------
|
||||||
// Mode costs
|
// Mode costs
|
||||||
|
|
||||||
|
@ -24,7 +24,8 @@ extern "C" {
|
|||||||
|
|
||||||
// On-the-fly info about the current set of residuals. Handy to avoid
|
// On-the-fly info about the current set of residuals. Handy to avoid
|
||||||
// passing zillions of params.
|
// passing zillions of params.
|
||||||
typedef struct {
|
typedef struct VP8Residual VP8Residual;
|
||||||
|
struct VP8Residual {
|
||||||
int first;
|
int first;
|
||||||
int last;
|
int last;
|
||||||
const int16_t* coeffs;
|
const int16_t* coeffs;
|
||||||
@ -33,17 +34,11 @@ typedef struct {
|
|||||||
ProbaArray* prob;
|
ProbaArray* prob;
|
||||||
StatsArray* stats;
|
StatsArray* stats;
|
||||||
CostArray* cost;
|
CostArray* cost;
|
||||||
} VP8Residual;
|
};
|
||||||
|
|
||||||
void VP8InitResidual(int first, int coeff_type,
|
void VP8InitResidual(int first, int coeff_type,
|
||||||
VP8Encoder* const enc, VP8Residual* const res);
|
VP8Encoder* const enc, VP8Residual* const res);
|
||||||
|
|
||||||
typedef void (*VP8SetResidualCoeffsFunc)(const int16_t* const coeffs,
|
|
||||||
VP8Residual* const res);
|
|
||||||
extern VP8SetResidualCoeffsFunc VP8SetResidualCoeffs;
|
|
||||||
|
|
||||||
void VP8SetResidualCoeffsInit(void); // must be called first
|
|
||||||
|
|
||||||
int VP8RecordCoeffs(int ctx, const VP8Residual* const res);
|
int VP8RecordCoeffs(int ctx, const VP8Residual* const res);
|
||||||
|
|
||||||
// approximate cost per level:
|
// approximate cost per level:
|
||||||
@ -55,12 +50,6 @@ static WEBP_INLINE int VP8BitCost(int bit, uint8_t proba) {
|
|||||||
return !bit ? VP8EntropyCost[proba] : VP8EntropyCost[255 - proba];
|
return !bit ? VP8EntropyCost[proba] : VP8EntropyCost[255 - proba];
|
||||||
}
|
}
|
||||||
|
|
||||||
// Cost calculation function.
|
|
||||||
typedef int (*VP8GetResidualCostFunc)(int ctx0, const VP8Residual* const res);
|
|
||||||
extern VP8GetResidualCostFunc VP8GetResidualCost;
|
|
||||||
|
|
||||||
void VP8GetResidualCostInit(void); // must be called first
|
|
||||||
|
|
||||||
// Level cost calculations
|
// Level cost calculations
|
||||||
extern const uint16_t VP8LevelCodes[MAX_VARIABLE_LEVEL][2];
|
extern const uint16_t VP8LevelCodes[MAX_VARIABLE_LEVEL][2];
|
||||||
void VP8CalculateLevelCosts(VP8Proba* const proba);
|
void VP8CalculateLevelCosts(VP8Proba* const proba);
|
||||||
|
@ -14,8 +14,9 @@
|
|||||||
#include <string.h>
|
#include <string.h>
|
||||||
#include <math.h>
|
#include <math.h>
|
||||||
|
|
||||||
#include "./vp8enci.h"
|
|
||||||
#include "./cost.h"
|
#include "./cost.h"
|
||||||
|
#include "./vp8enci.h"
|
||||||
|
#include "../dsp/dsp.h"
|
||||||
#include "../webp/format_constants.h" // RIFF constants
|
#include "../webp/format_constants.h" // RIFF constants
|
||||||
|
|
||||||
#define SEGMENT_VISU 0
|
#define SEGMENT_VISU 0
|
||||||
|
@ -16,9 +16,9 @@
|
|||||||
#include <string.h>
|
#include <string.h>
|
||||||
#include <math.h>
|
#include <math.h>
|
||||||
|
|
||||||
|
#include "./cost.h"
|
||||||
#include "./vp8enci.h"
|
#include "./vp8enci.h"
|
||||||
#include "./vp8li.h"
|
#include "./vp8li.h"
|
||||||
#include "./cost.h"
|
|
||||||
#include "../utils/utils.h"
|
#include "../utils/utils.h"
|
||||||
|
|
||||||
// #define PRINT_MEMORY_INFO
|
// #define PRINT_MEMORY_INFO
|
||||||
@ -225,8 +225,7 @@ static VP8Encoder* InitVP8Encoder(const WebPConfig* const config,
|
|||||||
ResetSegmentHeader(enc);
|
ResetSegmentHeader(enc);
|
||||||
ResetFilterHeader(enc);
|
ResetFilterHeader(enc);
|
||||||
ResetBoundaryPredictions(enc);
|
ResetBoundaryPredictions(enc);
|
||||||
VP8GetResidualCostInit();
|
VP8EncDspCostInit();
|
||||||
VP8SetResidualCoeffsInit();
|
|
||||||
VP8EncInitAlpha(enc);
|
VP8EncInitAlpha(enc);
|
||||||
|
|
||||||
// lower quality means smaller output -> we modulate a little the page
|
// lower quality means smaller output -> we modulate a little the page
|
||||||
|
Loading…
Reference in New Issue
Block a user