From 8a218b4a96ed627fad01fe18e2496cc0e9876914 Mon Sep 17 00:00:00 2001 From: Djordje Pesut Date: Fri, 20 Feb 2015 12:34:09 +0100 Subject: [PATCH] MIPS: [mips32|dspr2]: GetResidualCost rebased Change-Id: Ie15524c773f7a8c79e002097881a508187ca7cc6 --- src/dsp/cost_mips32.c | 133 +++++++++++++++---------------------- src/dsp/cost_mips_dsp_r2.c | 55 +++++++-------- 2 files changed, 77 insertions(+), 111 deletions(-) diff --git a/src/dsp/cost_mips32.c b/src/dsp/cost_mips32.c index 11e29b8d..8e71115e 100644 --- a/src/dsp/cost_mips32.c +++ b/src/dsp/cost_mips32.c @@ -16,97 +16,68 @@ #include "../enc/cost.h" static int GetResidualCost(int ctx0, const VP8Residual* const res) { + int temp0, temp1; + int v_reg, ctx_reg; int n = res->first; // should be prob[VP8EncBands[n]], but it's equivalent for n=0 or 1 int p0 = res->prob[n][ctx0][0]; - const uint16_t* t = res->cost[n][ctx0]; - int cost; - const int const_2 = 2; - const int const_255 = 255; + CostArrayPtr const costs = res->costs; + const uint16_t* t = costs[n][ctx0]; + // bit_cost(1, p0) is already incorporated in t[] tables, but only if ctx != 0 + // (as required by the syntax). For ctx0 == 0, we need to add it here or it'll + // be missing during the loop. + int cost = (ctx0 == 0) ? VP8BitCost(1, p0) : 0; + const int16_t* res_coeffs = res->coeffs; + const int res_last = res->last; const int const_max_level = MAX_VARIABLE_LEVEL; - int res_cost; - int res_prob; - int res_coeffs; - int res_last; - int v_reg; - int b_reg; - int ctx_reg; - int cost_add, temp_1, temp_2, temp_3; + const int const_2 = 2; + const uint16_t** p_costs = &costs[n][0]; + const size_t inc_p_costs = NUM_CTX * sizeof(*p_costs); if (res->last < 0) { return VP8BitCost(0, p0); } - cost = (ctx0 == 0) ? VP8BitCost(1, p0) : 0; - - res_cost = (int)res->cost; - res_prob = (int)res->prob; - res_coeffs = (int)res->coeffs; - res_last = (int)res->last; - - __asm__ volatile( - ".set push \n\t" - ".set noreorder \n\t" - - "sll %[temp_1], %[n], 1 \n\t" - "addu %[res_coeffs], %[res_coeffs], %[temp_1] \n\t" - "slt %[temp_2], %[n], %[res_last] \n\t" - "bnez %[temp_2], 1f \n\t" - " li %[cost_add], 0 \n\t" - "b 2f \n\t" - " nop \n\t" + __asm__ volatile ( + ".set push \n\t" + ".set noreorder \n\t" + "subu %[temp1], %[res_last], %[n] \n\t" + "sll %[temp0], %[n], 1 \n\t" + "blez %[temp1], 2f \n\t" + " addu %[res_coeffs], %[res_coeffs], %[temp0] \n\t" "1: \n\t" - "lh %[v_reg], 0(%[res_coeffs]) \n\t" - "addu %[b_reg], %[n], %[VP8EncBands] \n\t" - "move %[temp_1], %[const_max_level] \n\t" - "addu %[cost], %[cost], %[cost_add] \n\t" - "negu %[temp_2], %[v_reg] \n\t" - "slti %[temp_3], %[v_reg], 0 \n\t" - "movn %[v_reg], %[temp_2], %[temp_3] \n\t" - "lbu %[b_reg], 1(%[b_reg]) \n\t" - "li %[cost_add], 0 \n\t" - - "sltiu %[temp_3], %[v_reg], 2 \n\t" - "move %[ctx_reg], %[v_reg] \n\t" - "movz %[ctx_reg], %[const_2], %[temp_3] \n\t" - // cost += VP8LevelCost(t, v); - "slt %[temp_3], %[v_reg], %[const_max_level] \n\t" - "movn %[temp_1], %[v_reg], %[temp_3] \n\t" - "sll %[temp_2], %[v_reg], 1 \n\t" - "addu %[temp_2], %[temp_2], %[VP8LevelFixedCosts] \n\t" - "lhu %[temp_2], 0(%[temp_2]) \n\t" - "sll %[temp_1], %[temp_1], 1 \n\t" - "addu %[temp_1], %[temp_1], %[t] \n\t" - "lhu %[temp_3], 0(%[temp_1]) \n\t" - "addu %[cost], %[cost], %[temp_2] \n\t" - - // t = res->cost[b][ctx]; - "sll %[temp_1], %[ctx_reg], 7 \n\t" - "sll %[temp_2], %[ctx_reg], 3 \n\t" - "addu %[cost], %[cost], %[temp_3] \n\t" - "addu %[temp_1], %[temp_1], %[temp_2] \n\t" - "sll %[temp_2], %[b_reg], 3 \n\t" - "sll %[temp_3], %[b_reg], 5 \n\t" - "sub %[temp_2], %[temp_3], %[temp_2] \n\t" - "sll %[temp_3], %[temp_2], 4 \n\t" - "addu %[temp_1], %[temp_1], %[temp_3] \n\t" - "addu %[temp_2], %[temp_2], %[res_cost] \n\t" - "addiu %[n], %[n], 1 \n\t" - "addu %[t], %[temp_1], %[temp_2] \n\t" - "slt %[temp_1], %[n], %[res_last] \n\t" - "bnez %[temp_1], 1b \n\t" - " addiu %[res_coeffs], %[res_coeffs], 2 \n\t" - "2: \n\t" - - ".set pop \n\t" - : [cost]"+r"(cost), [t]"+r"(t), [n]"+r"(n), [v_reg]"=&r"(v_reg), - [ctx_reg]"=&r"(ctx_reg), [b_reg]"=&r"(b_reg), [cost_add]"=&r"(cost_add), - [temp_1]"=&r"(temp_1), [temp_2]"=&r"(temp_2), [temp_3]"=&r"(temp_3) - : [const_2]"r"(const_2), [const_255]"r"(const_255), [res_last]"r"(res_last), - [VP8EntropyCost]"r"(VP8EntropyCost), [VP8EncBands]"r"(VP8EncBands), - [const_max_level]"r"(const_max_level), [res_prob]"r"(res_prob), - [VP8LevelFixedCosts]"r"(VP8LevelFixedCosts), [res_coeffs]"r"(res_coeffs), - [res_cost]"r"(res_cost) + "lh %[v_reg], 0(%[res_coeffs]) \n\t" + "addiu %[n], %[n], 1 \n\t" + "negu %[temp0], %[v_reg] \n\t" + "slti %[temp1], %[v_reg], 0 \n\t" + "movn %[v_reg], %[temp0], %[temp1] \n\t" + "sltiu %[temp0], %[v_reg], 2 \n\t" + "move %[ctx_reg], %[v_reg] \n\t" + "movz %[ctx_reg], %[const_2], %[temp0] \n\t" + "sll %[temp1], %[v_reg], 1 \n\t" + "addu %[temp1], %[temp1], %[VP8LevelFixedCosts] \n\t" + "lhu %[temp1], 0(%[temp1]) \n\t" + "slt %[temp0], %[v_reg], %[const_max_level] \n\t" + "movz %[v_reg], %[const_max_level], %[temp0] \n\t" + "addu %[cost], %[cost], %[temp1] \n\t" + "sll %[v_reg], %[v_reg], 1 \n\t" + "sll %[ctx_reg], %[ctx_reg], 2 \n\t" + "addu %[v_reg], %[v_reg], %[t] \n\t" + "lhu %[temp0], 0(%[v_reg]) \n\t" + "addu %[p_costs], %[p_costs], %[inc_p_costs] \n\t" + "addu %[t], %[p_costs], %[ctx_reg] \n\t" + "addu %[cost], %[cost], %[temp0] \n\t" + "addiu %[res_coeffs], %[res_coeffs], 2 \n\t" + "bne %[n], %[res_last], 1b \n\t" + " lw %[t], 0(%[t]) \n\t" + "2: \n\t" + ".set pop \n\t" + : [cost]"+&r"(cost), [t]"+&r"(t), [n]"+&r"(n), [v_reg]"=&r"(v_reg), + [ctx_reg]"=&r"(ctx_reg), [p_costs]"+&r"(p_costs), [temp0]"=&r"(temp0), + [temp1]"=&r"(temp1), [res_coeffs]"+&r"(res_coeffs) + : [const_2]"r"(const_2), [const_max_level]"r"(const_max_level), + [VP8LevelFixedCosts]"r"(VP8LevelFixedCosts), [res_last]"r"(res_last), + [inc_p_costs]"r"(inc_p_costs) : "memory" ); diff --git a/src/dsp/cost_mips_dsp_r2.c b/src/dsp/cost_mips_dsp_r2.c index 89cd4cc4..f66ee50b 100644 --- a/src/dsp/cost_mips_dsp_r2.c +++ b/src/dsp/cost_mips_dsp_r2.c @@ -16,68 +16,63 @@ #include "../enc/cost.h" static int GetResidualCost(int ctx0, const VP8Residual* const res) { - int temp0, temp1, temp2; - int v_reg, b_reg, ctx_reg; + int temp0, temp1; + int v_reg, ctx_reg; int n = res->first; // should be prob[VP8EncBands[n]], but it's equivalent for n=0 or 1 int p0 = res->prob[n][ctx0][0]; - const uint16_t* t = res->cost[n][ctx0]; + CostArrayPtr const costs = res->costs; + const uint16_t* t = costs[n][ctx0]; // bit_cost(1, p0) is already incorporated in t[] tables, but only if ctx != 0 // (as required by the syntax). For ctx0 == 0, we need to add it here or it'll // be missing during the loop. int cost = (ctx0 == 0) ? VP8BitCost(1, p0) : 0; - int res_cost = (int)res->cost; - int res_coeffs = (int)res->coeffs; - int res_last = (int)res->last; + const int16_t* res_coeffs = res->coeffs; + const int res_last = res->last; const int const_max_level = MAX_VARIABLE_LEVEL; const int const_2 = 2; - const int const_408 = 408; - int mult_136_408 = 136; + const uint16_t** p_costs = &costs[n][0]; + const size_t inc_p_costs = NUM_CTX * sizeof(*p_costs); if (res->last < 0) { return VP8BitCost(0, p0); } - __asm__ volatile( + __asm__ volatile ( ".set push \n\t" ".set noreorder \n\t" "subu %[temp1], %[res_last], %[n] \n\t" "blez %[temp1], 2f \n\t" - " ins %[mult_136_408], %[const_408], 16, 16 \n\t" + " nop \n\t" "1: \n\t" "sll %[temp0], %[n], 1 \n\t" "lhx %[v_reg], %[temp0](%[res_coeffs]) \n\t" "addiu %[n], %[n], 1 \n\t" "absq_s.w %[v_reg], %[v_reg] \n\t" - "lbux %[b_reg], %[n](%[VP8EncBands]) \n\t" - "sltiu %[temp2], %[v_reg], 2 \n\t" + "sltiu %[temp0], %[v_reg], 2 \n\t" "move %[ctx_reg], %[v_reg] \n\t" - "movz %[ctx_reg], %[const_2], %[temp2] \n\t" + "movz %[ctx_reg], %[const_2], %[temp0] \n\t" "sll %[temp1], %[v_reg], 1 \n\t" "lhx %[temp1], %[temp1](%[VP8LevelFixedCosts]) \n\t" - "slt %[temp2], %[v_reg], %[const_max_level] \n\t" - "ins %[ctx_reg], %[b_reg], 16, 16 \n\t" - "movz %[v_reg], %[const_max_level], %[temp2] \n\t" - "mul.ph %[temp0], %[ctx_reg], %[mult_136_408] \n\t" + "slt %[temp0], %[v_reg], %[const_max_level] \n\t" + "movz %[v_reg], %[const_max_level], %[temp0] \n\t" "addu %[cost], %[cost], %[temp1] \n\t" "sll %[v_reg], %[v_reg], 1 \n\t" - "lhx %[temp2], %[v_reg](%[t]) \n\t" - "ext %[temp1], %[temp0], 0, 16 \n\t" - "ext %[temp0], %[temp0], 16, 16 \n\t" - "addu %[cost], %[cost], %[temp2] \n\t" - "addu %[temp1], %[temp1], %[res_cost] \n\t" + "sll %[ctx_reg], %[ctx_reg], 2 \n\t" + "lhx %[temp0], %[v_reg](%[t]) \n\t" + "addu %[p_costs], %[p_costs], %[inc_p_costs] \n\t" + "addu %[t], %[p_costs], %[ctx_reg] \n\t" + "addu %[cost], %[cost], %[temp0] \n\t" "bne %[n], %[res_last], 1b \n\t" - " addu %[t], %[temp0], %[temp1] \n\t" + " lw %[t], 0(%[t]) \n\t" "2: \n\t" ".set pop \n\t" : [cost]"+&r"(cost), [t]"+&r"(t), [n]"+&r"(n), [v_reg]"=&r"(v_reg), - [ctx_reg]"=&r"(ctx_reg), [b_reg]"=&r"(b_reg), [temp0]"=&r"(temp0), - [temp1]"=&r"(temp1), [temp2]"=&r"(temp2), - [mult_136_408]"+&r"(mult_136_408) - : [const_2]"r"(const_2), [res_last]"r"(res_last), - [VP8EncBands]"r"(VP8EncBands), [const_max_level]"r"(const_max_level), - [VP8LevelFixedCosts]"r"(VP8LevelFixedCosts), [res_cost]"r"(res_cost), - [const_408]"r"(const_408), [res_coeffs]"r"(res_coeffs) + [ctx_reg]"=&r"(ctx_reg), [p_costs]"+&r"(p_costs), [temp0]"=&r"(temp0), + [temp1]"=&r"(temp1) + : [const_2]"r"(const_2), [const_max_level]"r"(const_max_level), + [VP8LevelFixedCosts]"r"(VP8LevelFixedCosts), [res_last]"r"(res_last), + [res_coeffs]"r"(res_coeffs), [inc_p_costs]"r"(inc_p_costs) : "memory" );