dsp: avoid defining _C functions w/NEON builds

when targeting NEON C functions with NEON equivalents won't be used, but
will contribute to binary size. the same goes for sse2, etc., but this
change is primarily concerned with binary sizes for android arm targets.

note '-noasm' or otherwise modifying VP8GetCPUInfo will have no effect
on the use of NEON functions.

this decision can be overridden by defining WEBP_DSP_OMIT_C_CODE to 0.

Change-Id: I47bd453c84a3d341ca39bc986a39eb9c785aface
This commit is contained in:
James Zern
2017-10-26 20:31:27 -07:00
parent 6ba98764e8
commit b7971d0e22
10 changed files with 384 additions and 71 deletions

View File

@ -21,9 +21,11 @@ static WEBP_INLINE uint8_t clip_8b(int v) {
return (!(v & ~0xff)) ? v : (v < 0) ? 0 : 255;
}
#if !WEBP_NEON_OMIT_C_CODE
static WEBP_INLINE int clip_max(int v, int max) {
return (v > max) ? max : v;
}
#endif // !WEBP_NEON_OMIT_C_CODE
//------------------------------------------------------------------------------
// Compute susceptibility based on DCT-coeff histograms:
@ -56,6 +58,7 @@ void VP8SetHistogramData(const int distribution[MAX_COEFF_THRESH + 1],
histo->last_non_zero = last_non_zero;
}
#if !WEBP_NEON_OMIT_C_CODE
static void CollectHistogram_C(const uint8_t* ref, const uint8_t* pred,
int start_block, int end_block,
VP8Histogram* const histo) {
@ -76,6 +79,7 @@ static void CollectHistogram_C(const uint8_t* ref, const uint8_t* pred,
}
VP8SetHistogramData(distribution, histo);
}
#endif // !WEBP_NEON_OMIT_C_CODE
//------------------------------------------------------------------------------
// run-time tables (~4k)
@ -100,6 +104,8 @@ static WEBP_TSAN_IGNORE_FUNCTION void InitTables(void) {
//------------------------------------------------------------------------------
// Transforms (Paragraph 14.4)
#if !WEBP_NEON_OMIT_C_CODE
#define STORE(x, y, v) \
dst[(x) + (y) * BPS] = clip_8b(ref[(x) + (y) * BPS] + ((v) >> 3))
@ -176,6 +182,7 @@ static void FTransform_C(const uint8_t* src, const uint8_t* ref, int16_t* out) {
out[12+ i] = ((a3 * 2217 - a2 * 5352 + 51000) >> 16);
}
}
#endif // !WEBP_NEON_OMIT_C_CODE
static void FTransform2_C(const uint8_t* src, const uint8_t* ref,
int16_t* out) {
@ -183,6 +190,7 @@ static void FTransform2_C(const uint8_t* src, const uint8_t* ref,
VP8FTransform(src + 4, ref + 4, out + 16);
}
#if !WEBP_NEON_OMIT_C_CODE
static void FTransformWHT_C(const int16_t* in, int16_t* out) {
// input is 12b signed
int32_t tmp[16];
@ -212,6 +220,7 @@ static void FTransformWHT_C(const int16_t* in, int16_t* out) {
out[12 + i] = b3 >> 1;
}
}
#endif // !WEBP_NEON_OMIT_C_CODE
#undef MUL
#undef STORE
@ -524,6 +533,7 @@ static void Intra4Preds_C(uint8_t* dst, const uint8_t* top) {
//------------------------------------------------------------------------------
// Metric
#if !WEBP_NEON_OMIT_C_CODE
static WEBP_INLINE int GetSSE(const uint8_t* a, const uint8_t* b,
int w, int h) {
int count = 0;
@ -551,6 +561,7 @@ static int SSE8x8_C(const uint8_t* a, const uint8_t* b) {
static int SSE4x4_C(const uint8_t* a, const uint8_t* b) {
return GetSSE(a, b, 4, 4);
}
#endif // !WEBP_NEON_OMIT_C_CODE
static void Mean16x4_C(const uint8_t* ref, uint32_t dc[4]) {
int k, x, y;
@ -572,6 +583,7 @@ static void Mean16x4_C(const uint8_t* ref, uint32_t dc[4]) {
// We try to match the spectral content (weighted) between source and
// reconstructed samples.
#if !WEBP_NEON_OMIT_C_CODE
// Hadamard transform
// Returns the weighted sum of the absolute value of transformed coefficients.
// w[] contains a row-major 4 by 4 symmetric matrix.
@ -627,6 +639,7 @@ static int Disto16x16_C(const uint8_t* const a, const uint8_t* const b,
}
return D;
}
#endif // !WEBP_NEON_OMIT_C_CODE
//------------------------------------------------------------------------------
// Quantization
@ -663,6 +676,7 @@ static int QuantizeBlock_C(int16_t in[16], int16_t out[16],
return (last >= 0);
}
#if !WEBP_NEON_OMIT_C_CODE || WEBP_NEON_WORK_AROUND_GCC
static int Quantize2Blocks_C(int16_t in[32], int16_t out[32],
const VP8Matrix* const mtx) {
int nz;
@ -670,6 +684,7 @@ static int Quantize2Blocks_C(int16_t in[32], int16_t out[32],
nz |= VP8EncQuantizeBlock(in + 1 * 16, out + 1 * 16, mtx) << 1;
return nz;
}
#endif // !WEBP_NEON_OMIT_C_CODE || WEBP_NEON_WORK_AROUND_GCC
//------------------------------------------------------------------------------
// Block copy
@ -735,23 +750,29 @@ WEBP_TSAN_IGNORE_FUNCTION void VP8EncDspInit(void) {
InitTables();
// default C implementations
VP8CollectHistogram = CollectHistogram_C;
#if !WEBP_NEON_OMIT_C_CODE
VP8ITransform = ITransform_C;
VP8FTransform = FTransform_C;
VP8FTransform2 = FTransform2_C;
VP8FTransformWHT = FTransformWHT_C;
VP8TDisto4x4 = Disto4x4_C;
VP8TDisto16x16 = Disto16x16_C;
VP8CollectHistogram = CollectHistogram_C;
VP8SSE16x16 = SSE16x16_C;
VP8SSE16x8 = SSE16x8_C;
VP8SSE8x8 = SSE8x8_C;
VP8SSE4x4 = SSE4x4_C;
#endif
#if !WEBP_NEON_OMIT_C_CODE || WEBP_NEON_WORK_AROUND_GCC
VP8EncQuantizeBlock = QuantizeBlock_C;
VP8EncQuantize2Blocks = Quantize2Blocks_C;
#endif
VP8FTransform2 = FTransform2_C;
VP8EncPredLuma4 = Intra4Preds_C;
VP8EncPredLuma16 = Intra16Preds_C;
VP8EncPredChroma8 = IntraChromaPreds_C;
VP8SSE16x16 = SSE16x16_C;
VP8SSE8x8 = SSE8x8_C;
VP8SSE16x8 = SSE16x8_C;
VP8SSE4x4 = SSE4x4_C;
VP8TDisto4x4 = Disto4x4_C;
VP8TDisto16x16 = Disto16x16_C;
VP8Mean16x4 = Mean16x4_C;
VP8EncQuantizeBlock = QuantizeBlock_C;
VP8EncQuantize2Blocks = Quantize2Blocks_C;
VP8EncQuantizeBlockWHT = QuantizeBlock_C;
VP8Copy4x4 = Copy4x4_C;
VP8Copy16x8 = Copy16x8_C;
@ -773,11 +794,6 @@ WEBP_TSAN_IGNORE_FUNCTION void VP8EncDspInit(void) {
VP8EncDspInitAVX2();
}
#endif
#if defined(WEBP_USE_NEON)
if (VP8GetCPUInfo(kNEON)) {
VP8EncDspInitNEON();
}
#endif
#if defined(WEBP_USE_MIPS32)
if (VP8GetCPUInfo(kMIPS32)) {
VP8EncDspInitMIPS32();
@ -794,5 +810,34 @@ WEBP_TSAN_IGNORE_FUNCTION void VP8EncDspInit(void) {
}
#endif
}
#if defined(WEBP_USE_NEON)
if (WEBP_NEON_OMIT_C_CODE ||
(VP8GetCPUInfo != NULL && VP8GetCPUInfo(kNEON))) {
VP8EncDspInitNEON();
}
#endif
assert(VP8ITransform != NULL);
assert(VP8FTransform != NULL);
assert(VP8FTransformWHT != NULL);
assert(VP8TDisto4x4 != NULL);
assert(VP8TDisto16x16 != NULL);
assert(VP8CollectHistogram != NULL);
assert(VP8SSE16x16 != NULL);
assert(VP8SSE16x8 != NULL);
assert(VP8SSE8x8 != NULL);
assert(VP8SSE4x4 != NULL);
assert(VP8EncQuantizeBlock != NULL);
assert(VP8EncQuantize2Blocks != NULL);
assert(VP8FTransform2 != NULL);
assert(VP8EncPredLuma4 != NULL);
assert(VP8EncPredLuma16 != NULL);
assert(VP8EncPredChroma8 != NULL);
assert(VP8Mean16x4 != NULL);
assert(VP8EncQuantizeBlockWHT != NULL);
assert(VP8Copy4x4 != NULL);
assert(VP8Copy16x8 != NULL);
enc_last_cpuinfo_used = VP8GetCPUInfo;
}