From 42542be8558278a0dcbb19f7318a942765f10808 Mon Sep 17 00:00:00 2001 From: Pascal Massimino Date: Thu, 5 Sep 2013 10:36:39 -0700 Subject: [PATCH] up to 6% faster encoding with clang compiler mostly by revamping the main loop of GetResidualCost() and avoiding some branches Change-Id: Ib05763e18a6bf46c82dc3d5d1d8eb65e99474207 --- src/enc/frame.c | 25 +++++++------------------ 1 file changed, 7 insertions(+), 18 deletions(-) diff --git a/src/enc/frame.c b/src/enc/frame.c index 4624d913..2963ef69 100644 --- a/src/enc/frame.c +++ b/src/enc/frame.c @@ -292,31 +292,20 @@ static int GetResidualCost(int ctx0, const VP8Residual* const res) { if (res->last < 0) { return VP8BitCost(0, p0); } - cost = 0; - while (n < res->last) { - int v = res->coeffs[n]; + cost = VP8BitCost(1, p0); + for (; n < res->last; ++n) { + const int v = abs(res->coeffs[n]); const int b = VP8EncBands[n + 1]; - ++n; - if (v == 0) { - // short-case for VP8LevelCost(t, 0) (note: VP8LevelFixedCosts[0] == 0): - cost += t[0]; - t = res->cost[b][0]; - continue; - } - v = abs(v); - cost += VP8BitCost(1, p0); + const int ctx = (v >= 2) ? 2 : v; cost += VP8LevelCost(t, v); - { - const int ctx = (v == 1) ? 1 : 2; - p0 = res->prob[b][ctx][0]; - t = res->cost[b][ctx]; - } + t = res->cost[b][ctx]; + // the masking trick is faster than "if (v) cost += ..." with clang + cost += (v ? ~0U : 0) & VP8BitCost(1, res->prob[b][ctx][0]); } // Last coefficient is always non-zero { const int v = abs(res->coeffs[n]); assert(v != 0); - cost += VP8BitCost(1, p0); cost += VP8LevelCost(t, v); if (n < 15) { const int b = VP8EncBands[n + 1];