multi-threaded segment analysis

When -mt is used, the analysis pass will be split in two and each halves performed in parallel. This gives a 5%-9% speed-up. This was a good occasion to revamp the iterator and analysis-loop code. As a result, the default (non-mt) behaviour is a tad (~1%) faster. Change-Id: Id0828c2ebe2e968db8ca227da80af591d6a4055f
2025-07-13 14:34:33 +02:00 · 2013-09-05 09:13:36 +02:00
parent 7e2d65950f
commit 93402f02db
6 changed files with 200 additions and 76 deletions
--- a/src/enc/quant.c
+++ b/src/enc/quant.c
@ -367,16 +367,14 @@ const int VP8I4ModeOffsets[NUM_BMODES] = {
 };

 void VP8MakeLuma16Preds(const VP8EncIterator* const it) {
-  const VP8Encoder* const enc = it->enc_;
  const uint8_t* const left = it->x_ ? it->y_left_ : NULL;
-  const uint8_t* const top = it->y_ ? enc->y_top_ + it->x_ * 16 : NULL;
+  const uint8_t* const top = it->y_ ? it->y_top_ : NULL;
  VP8EncPredLuma16(it->yuv_p_, left, top);
 }

 void VP8MakeChroma8Preds(const VP8EncIterator* const it) {
-  const VP8Encoder* const enc = it->enc_;
  const uint8_t* const left = it->x_ ? it->u_left_ : NULL;
-  const uint8_t* const top = it->y_ ? enc->uv_top_ + it->x_ * 16 : NULL;
+  const uint8_t* const top = it->y_ ? it->uv_top_ : NULL;
  VP8EncPredChroma8(it->yuv_p_, left, top);
 }