dsp: avoid defining _C functions w/NEON builds

when targeting NEON C functions with NEON equivalents won't be used, but will contribute to binary size. the same goes for sse2, etc., but this change is primarily concerned with binary sizes for android arm targets. note '-noasm' or otherwise modifying VP8GetCPUInfo will have no effect on the use of NEON functions. this decision can be overridden by defining WEBP_DSP_OMIT_C_CODE to 0. Change-Id: I47bd453c84a3d341ca39bc986a39eb9c785aface
2025-07-13 06:24:27 +02:00 · 2017-10-26 20:31:27 -07:00
parent 6ba98764e8
commit b7971d0e22
10 changed files with 384 additions and 71 deletions
--- a/src/dsp/enc.c
+++ b/src/dsp/enc.c
@ -21,9 +21,11 @@ static WEBP_INLINE uint8_t clip_8b(int v) {
  return (!(v & ~0xff)) ? v : (v < 0) ? 0 : 255;
 }

+#if !WEBP_NEON_OMIT_C_CODE
 static WEBP_INLINE int clip_max(int v, int max) {
  return (v > max) ? max : v;
 }
+#endif  // !WEBP_NEON_OMIT_C_CODE

 //------------------------------------------------------------------------------
 // Compute susceptibility based on DCT-coeff histograms:
@ -56,6 +58,7 @@ void VP8SetHistogramData(const int distribution[MAX_COEFF_THRESH + 1],
  histo->last_non_zero = last_non_zero;
 }

+#if !WEBP_NEON_OMIT_C_CODE
 static void CollectHistogram_C(const uint8_t* ref, const uint8_t* pred,
                               int start_block, int end_block,
                               VP8Histogram* const histo) {
@ -76,6 +79,7 @@ static void CollectHistogram_C(const uint8_t* ref, const uint8_t* pred,
  }
  VP8SetHistogramData(distribution, histo);
 }
+#endif  // !WEBP_NEON_OMIT_C_CODE

 //------------------------------------------------------------------------------
 // run-time tables (~4k)
@ -100,6 +104,8 @@ static WEBP_TSAN_IGNORE_FUNCTION void InitTables(void) {
 //------------------------------------------------------------------------------
 // Transforms (Paragraph 14.4)

+#if !WEBP_NEON_OMIT_C_CODE
+
 #define STORE(x, y, v) \
  dst[(x) + (y) * BPS] = clip_8b(ref[(x) + (y) * BPS] + ((v) >> 3))

@ -176,6 +182,7 @@ static void FTransform_C(const uint8_t* src, const uint8_t* ref, int16_t* out) {
    out[12+ i] = ((a3 * 2217 - a2 * 5352 + 51000) >> 16);
  }
 }
+#endif  // !WEBP_NEON_OMIT_C_CODE

 static void FTransform2_C(const uint8_t* src, const uint8_t* ref,
                          int16_t* out) {
@ -183,6 +190,7 @@ static void FTransform2_C(const uint8_t* src, const uint8_t* ref,
  VP8FTransform(src + 4, ref + 4, out + 16);
 }

+#if !WEBP_NEON_OMIT_C_CODE
 static void FTransformWHT_C(const int16_t* in, int16_t* out) {
  // input is 12b signed
  int32_t tmp[16];
@ -212,6 +220,7 @@ static void FTransformWHT_C(const int16_t* in, int16_t* out) {
    out[12 + i] = b3 >> 1;
  }
 }
+#endif  // !WEBP_NEON_OMIT_C_CODE

 #undef MUL
 #undef STORE
@ -524,6 +533,7 @@ static void Intra4Preds_C(uint8_t* dst, const uint8_t* top) {
 //------------------------------------------------------------------------------
 // Metric

+#if !WEBP_NEON_OMIT_C_CODE
 static WEBP_INLINE int GetSSE(const uint8_t* a, const uint8_t* b,
                              int w, int h) {
  int count = 0;
@ -551,6 +561,7 @@ static int SSE8x8_C(const uint8_t* a, const uint8_t* b) {
 static int SSE4x4_C(const uint8_t* a, const uint8_t* b) {
  return GetSSE(a, b, 4, 4);
 }
+#endif  // !WEBP_NEON_OMIT_C_CODE

 static void Mean16x4_C(const uint8_t* ref, uint32_t dc[4]) {
  int k, x, y;
@ -572,6 +583,7 @@ static void Mean16x4_C(const uint8_t* ref, uint32_t dc[4]) {
 // We try to match the spectral content (weighted) between source and
 // reconstructed samples.

+#if !WEBP_NEON_OMIT_C_CODE
 // Hadamard transform
 // Returns the weighted sum of the absolute value of transformed coefficients.
 // w[] contains a row-major 4 by 4 symmetric matrix.
@ -627,6 +639,7 @@ static int Disto16x16_C(const uint8_t* const a, const uint8_t* const b,
  }
  return D;
 }
+#endif  // !WEBP_NEON_OMIT_C_CODE

 //------------------------------------------------------------------------------
 // Quantization
@ -663,6 +676,7 @@ static int QuantizeBlock_C(int16_t in[16], int16_t out[16],
  return (last >= 0);
 }

+#if !WEBP_NEON_OMIT_C_CODE || WEBP_NEON_WORK_AROUND_GCC
 static int Quantize2Blocks_C(int16_t in[32], int16_t out[32],
                             const VP8Matrix* const mtx) {
  int nz;
@ -670,6 +684,7 @@ static int Quantize2Blocks_C(int16_t in[32], int16_t out[32],
  nz |= VP8EncQuantizeBlock(in + 1 * 16, out + 1 * 16, mtx) << 1;
  return nz;
 }
+#endif  // !WEBP_NEON_OMIT_C_CODE || WEBP_NEON_WORK_AROUND_GCC

 //------------------------------------------------------------------------------
 // Block copy
@ -735,23 +750,29 @@ WEBP_TSAN_IGNORE_FUNCTION void VP8EncDspInit(void) {
  InitTables();

  // default C implementations
-  VP8CollectHistogram = CollectHistogram_C;
+#if !WEBP_NEON_OMIT_C_CODE
  VP8ITransform = ITransform_C;
  VP8FTransform = FTransform_C;
-  VP8FTransform2 = FTransform2_C;
  VP8FTransformWHT = FTransformWHT_C;
+  VP8TDisto4x4 = Disto4x4_C;
+  VP8TDisto16x16 = Disto16x16_C;
+  VP8CollectHistogram = CollectHistogram_C;
+  VP8SSE16x16 = SSE16x16_C;
+  VP8SSE16x8 = SSE16x8_C;
+  VP8SSE8x8 = SSE8x8_C;
+  VP8SSE4x4 = SSE4x4_C;
+#endif
+
+#if !WEBP_NEON_OMIT_C_CODE || WEBP_NEON_WORK_AROUND_GCC
+  VP8EncQuantizeBlock = QuantizeBlock_C;
+  VP8EncQuantize2Blocks = Quantize2Blocks_C;
+#endif
+
+  VP8FTransform2 = FTransform2_C;
  VP8EncPredLuma4 = Intra4Preds_C;
  VP8EncPredLuma16 = Intra16Preds_C;
  VP8EncPredChroma8 = IntraChromaPreds_C;
-  VP8SSE16x16 = SSE16x16_C;
-  VP8SSE8x8 = SSE8x8_C;
-  VP8SSE16x8 = SSE16x8_C;
-  VP8SSE4x4 = SSE4x4_C;
-  VP8TDisto4x4 = Disto4x4_C;
-  VP8TDisto16x16 = Disto16x16_C;
  VP8Mean16x4 = Mean16x4_C;
-  VP8EncQuantizeBlock = QuantizeBlock_C;
-  VP8EncQuantize2Blocks = Quantize2Blocks_C;
  VP8EncQuantizeBlockWHT = QuantizeBlock_C;
  VP8Copy4x4 = Copy4x4_C;
  VP8Copy16x8 = Copy16x8_C;
@ -773,11 +794,6 @@ WEBP_TSAN_IGNORE_FUNCTION void VP8EncDspInit(void) {
      VP8EncDspInitAVX2();
    }
 #endif
-#if defined(WEBP_USE_NEON)
-    if (VP8GetCPUInfo(kNEON)) {
-      VP8EncDspInitNEON();
-    }
-#endif
 #if defined(WEBP_USE_MIPS32)
    if (VP8GetCPUInfo(kMIPS32)) {
      VP8EncDspInitMIPS32();
@ -794,5 +810,34 @@ WEBP_TSAN_IGNORE_FUNCTION void VP8EncDspInit(void) {
    }
 #endif
  }
+
+#if defined(WEBP_USE_NEON)
+  if (WEBP_NEON_OMIT_C_CODE ||
+      (VP8GetCPUInfo != NULL && VP8GetCPUInfo(kNEON))) {
+    VP8EncDspInitNEON();
+  }
+#endif
+
+  assert(VP8ITransform != NULL);
+  assert(VP8FTransform != NULL);
+  assert(VP8FTransformWHT != NULL);
+  assert(VP8TDisto4x4 != NULL);
+  assert(VP8TDisto16x16 != NULL);
+  assert(VP8CollectHistogram != NULL);
+  assert(VP8SSE16x16 != NULL);
+  assert(VP8SSE16x8 != NULL);
+  assert(VP8SSE8x8 != NULL);
+  assert(VP8SSE4x4 != NULL);
+  assert(VP8EncQuantizeBlock != NULL);
+  assert(VP8EncQuantize2Blocks != NULL);
+  assert(VP8FTransform2 != NULL);
+  assert(VP8EncPredLuma4 != NULL);
+  assert(VP8EncPredLuma16 != NULL);
+  assert(VP8EncPredChroma8 != NULL);
+  assert(VP8Mean16x4 != NULL);
+  assert(VP8EncQuantizeBlockWHT != NULL);
+  assert(VP8Copy4x4 != NULL);
+  assert(VP8Copy16x8 != NULL);
+
  enc_last_cpuinfo_used = VP8GetCPUInfo;
 }