Merge "sharpyuv: increase precision of gamma<->linear conversion" into main

2025-08-29 15:22:12 +02:00 · 2022-06-02 08:30:18 +00:00
parent f81dd7d63e 2d607ee646
commit 5ecee06f7a
8 changed files with 173 additions and 120 deletions
--- a/Android.mk
+++ b/Android.mk
@@ -37,6 +37,7 @@ sharpyuv_srcs := \
    sharpyuv/sharpyuv.c \
    sharpyuv/sharpyuv_csp.c \
    sharpyuv/sharpyuv_dsp.c \
    sharpyuv/sharpyuv_gamma.c \
    sharpyuv/sharpyuv_neon.$(NEON) \
    sharpyuv/sharpyuv_sse2.c \
--- a/Makefile.vc
+++ b/Makefile.vc
@@ -178,6 +178,7 @@ SHARPYUV_OBJS = \
    $(DIROBJ)\sharpyuv\sharpyuv.obj \
    $(DIROBJ)\sharpyuv\sharpyuv_csp.obj \
    $(DIROBJ)\sharpyuv\sharpyuv_dsp.obj \
    $(DIROBJ)\sharpyuv\sharpyuv_gamma.obj \
    $(DIROBJ)\sharpyuv\sharpyuv_neon.obj \
    $(DIROBJ)\sharpyuv\sharpyuv_sse2.obj \
--- a/build.gradle
+++ b/build.gradle
@@ -109,6 +109,7 @@ model {
            include "sharpyuv.c"
            include "sharpyuv_csp.c"
            include "sharpyuv_dsp.c"
            include "sharpyuv_gamma.c"
            include "sharpyuv_neon.c"
            include "sharpyuv_sse2.c"
            srcDir "src/dec"
--- a/makefile.unix
+++ b/makefile.unix
@@ -129,6 +129,7 @@ SHARPYUV_OBJS = \
    sharpyuv/sharpyuv.o \
    sharpyuv/sharpyuv_csp.o \
    sharpyuv/sharpyuv_dsp.o \
    sharpyuv/sharpyuv_gamma.o \
    sharpyuv/sharpyuv_neon.o \
    sharpyuv/sharpyuv_sse2.o \
--- a/sharpyuv/Makefile.am
+++ b/sharpyuv/Makefile.am
@@ -22,6 +22,7 @@ libsharpyuv_neon_la_CFLAGS = $(AM_CFLAGS) $(NEON_FLAGS)
 libsharpyuv_la_SOURCES =
 libsharpyuv_la_SOURCES += sharpyuv_csp.c sharpyuv_csp.h
 libsharpyuv_la_SOURCES += sharpyuv_dsp.c sharpyuv_dsp.h
 libsharpyuv_la_SOURCES += sharpyuv_gamma.c sharpyuv_gamma.h
 libsharpyuv_la_SOURCES += sharpyuv.c sharpyuv.h
 libsharpyuv_la_CPPFLAGS = $(AM_CPPFLAGS)
--- a/sharpyuv/sharpyuv.c
+++ b/sharpyuv/sharpyuv.c
@@ -21,6 +21,7 @@
 #include "src/webp/types.h"
 #include "src/dsp/cpu.h"
 #include "sharpyuv/sharpyuv_dsp.h"
 #include "sharpyuv/sharpyuv_gamma.h"
 //------------------------------------------------------------------------------
 // Sharp RGB->YUV conversion
@@ -45,100 +46,6 @@ static int GetPrecisionShift(int rgb_bit_depth) {
 typedef int16_t fixed_t;      // signed type with extra precision for UV
 typedef uint16_t fixed_y_t;   // unsigned type with extra precision for W
 //------------------------------------------------------------------------------
 // Code for gamma correction
 // Gamma correction compensates loss of resolution during chroma subsampling.
 // Size of pre-computed table for converting from gamma to linear.
 #define GAMMA_TO_LINEAR_TAB_BITS 10
 #define GAMMA_TO_LINEAR_TAB_SIZE (1 << GAMMA_TO_LINEAR_TAB_BITS)
 static uint32_t kGammaToLinearTabS[GAMMA_TO_LINEAR_TAB_SIZE + 2];
 // Size of pre-computed table for converting from linear to gamma.
 #define LINEAR_TO_GAMMA_TAB_BITS 8
 #define LINEAR_TO_GAMMA_TAB_SIZE (1 << LINEAR_TO_GAMMA_TAB_BITS)
 static uint32_t kLinearToGammaTabS[LINEAR_TO_GAMMA_TAB_SIZE + 2];
 static const double kGammaF = 1. / 0.45;
 #define GAMMA_TO_LINEAR_BITS 14
 static volatile int kGammaTablesSOk = 0;
 static void InitGammaTablesS(void) {
  assert(2 * GAMMA_TO_LINEAR_BITS < 32);  // we use uint32_t intermediate values
  if (!kGammaTablesSOk) {
    int v;
    const double a = 0.09929682680944;
    const double thresh = 0.018053968510807;
    // Precompute gamma to linear table.
    {
      const double norm = 1. / GAMMA_TO_LINEAR_TAB_SIZE;
      const double a_rec = 1. / (1. + a);
      const double final_scale = 1 << GAMMA_TO_LINEAR_BITS;
      for (v = 0; v <= GAMMA_TO_LINEAR_TAB_SIZE; ++v) {
        const double g = norm * v;
        double value;
        if (g <= thresh * 4.5) {
          value = g / 4.5;
        } else {
          value = pow(a_rec * (g + a), kGammaF);
        }
        kGammaToLinearTabS[v] = (uint32_t)(value * final_scale + .5);
      }
      // to prevent small rounding errors to cause read-overflow:
      kGammaToLinearTabS[GAMMA_TO_LINEAR_TAB_SIZE + 1] =
          kGammaToLinearTabS[GAMMA_TO_LINEAR_TAB_SIZE];
    }
    // Precompute linear to gamma table.
    {
      const double scale = 1. / LINEAR_TO_GAMMA_TAB_SIZE;
      for (v = 0; v <= LINEAR_TO_GAMMA_TAB_SIZE; ++v) {
        const double g = scale * v;
        double value;
        if (g <= thresh) {
          value = 4.5 * g;
        } else {
          value = (1. + a) * pow(g, 1. / kGammaF) - a;
        }
        kLinearToGammaTabS[v] =
            (uint32_t)(GAMMA_TO_LINEAR_TAB_SIZE * value + 0.5);
      }
      // to prevent small rounding errors to cause read-overflow:
      kLinearToGammaTabS[LINEAR_TO_GAMMA_TAB_SIZE + 1] =
          kLinearToGammaTabS[LINEAR_TO_GAMMA_TAB_SIZE];
    }
    kGammaTablesSOk = 1;
  }
 }
 static WEBP_INLINE uint32_t FixedPointInterpolation(int v, uint32_t* tab,
                                                    int tab_pos_shift,
                                                    int tab_value_shift) {
  const uint32_t tab_pos = v >> tab_pos_shift;
  // fractional part, in 'tab_pos_shift' fixed-point precision
  const uint32_t x = v - (tab_pos << tab_pos_shift);  // fractional part
  // v0 / v1 are in kGammaToLinearBits fixed-point precision (range [0..1])
  const uint32_t v0 = tab[tab_pos + 0] << tab_value_shift;
  const uint32_t v1 = tab[tab_pos + 1] << tab_value_shift;
  // Final interpolation.
  const uint32_t v2 = (v1 - v0) * x;  // note: v1 >= v0.
  const int half = (tab_pos_shift > 0) ? 1 << (tab_pos_shift - 1) : 0;
  const uint32_t result = v0 + ((v2 + half) >> tab_pos_shift);
  return result;
 }
 static WEBP_INLINE uint32_t GammaToLinear(int v, int bit_depth) {
  const int shift = GAMMA_TO_LINEAR_TAB_BITS - bit_depth;
  if (shift > 0) {
    return kGammaToLinearTabS[v << shift];
  }
  return FixedPointInterpolation(v, kGammaToLinearTabS, -shift, 0);
 }
 static WEBP_INLINE uint32_t LinearToGamma(uint32_t value, int bit_depth) {
  const uint32_t v = value << LINEAR_TO_GAMMA_TAB_BITS;
  return FixedPointInterpolation(v, kLinearToGammaTabS, GAMMA_TO_LINEAR_BITS,
                                 bit_depth - GAMMA_TO_LINEAR_TAB_BITS);
 }
 //------------------------------------------------------------------------------
 static uint8_t clip_8b(fixed_t v) {
@@ -161,13 +68,14 @@ static int RGBToGray(int64_t r, int64_t g, int64_t b) {
  return (int)(luma >> YUV_FIX);
 }
-static uint32_t ScaleDown(int a, int b, int c, int d, int rgb_bit_depth) {
+static uint32_t ScaleDown(uint16_t a, uint16_t b, uint16_t c, uint16_t d,
                          int rgb_bit_depth) {
  const int bit_depth = rgb_bit_depth + GetPrecisionShift(rgb_bit_depth);
-  const uint32_t A = GammaToLinear(a, bit_depth);
+  const uint32_t A = SharpYuvGammaToLinear(a, bit_depth);
-  const uint32_t B = GammaToLinear(b, bit_depth);
+  const uint32_t B = SharpYuvGammaToLinear(b, bit_depth);
-  const uint32_t C = GammaToLinear(c, bit_depth);
+  const uint32_t C = SharpYuvGammaToLinear(c, bit_depth);
-  const uint32_t D = GammaToLinear(d, bit_depth);
+  const uint32_t D = SharpYuvGammaToLinear(d, bit_depth);
-  return LinearToGamma((A + B + C + D + 2) >> 2, bit_depth);
+  return SharpYuvLinearToGamma((A + B + C + D + 2) >> 2, bit_depth);
 }
 static WEBP_INLINE void UpdateW(const fixed_y_t* src, fixed_y_t* dst, int w,
@@ -175,11 +83,11 @@ static WEBP_INLINE void UpdateW(const fixed_y_t* src, fixed_y_t* dst, int w,
  const int bit_depth = rgb_bit_depth + GetPrecisionShift(rgb_bit_depth);
  int i;
  for (i = 0; i < w; ++i) {
-    const uint32_t R = GammaToLinear(src[0 * w + i], bit_depth);
+    const uint32_t R = SharpYuvGammaToLinear(src[0 * w + i], bit_depth);
-    const uint32_t G = GammaToLinear(src[1 * w + i], bit_depth);
+    const uint32_t G = SharpYuvGammaToLinear(src[1 * w + i], bit_depth);
-    const uint32_t B = GammaToLinear(src[2 * w + i], bit_depth);
+    const uint32_t B = SharpYuvGammaToLinear(src[2 * w + i], bit_depth);
    const uint32_t Y = RGBToGray(R, G, B);
-    dst[i] = (fixed_y_t)LinearToGamma(Y, bit_depth);
+    dst[i] = (fixed_y_t)SharpYuvLinearToGamma(Y, bit_depth);
  }
 }
@@ -227,15 +135,6 @@ static WEBP_INLINE int Shift(int v, int shift) {
  return (shift >= 0) ? (v << shift) : (v >> -shift);
 }
 static WEBP_INLINE fixed_y_t ChangePrecision(uint16_t a, int shift) {
  if (shift == 0) return a;
  if (shift < 0) {
    const int rounding = 1 << (-shift - 1);
    return (a + rounding) >> -shift;
  }
  return ((fixed_y_t)a << shift);
 }
 static void ImportOneRow(const uint8_t* const r_ptr,
                         const uint8_t* const g_ptr,
                         const uint8_t* const b_ptr,
@@ -252,13 +151,13 @@ static void ImportOneRow(const uint8_t* const r_ptr,
    const int off = i * step;
    const int shift = GetPrecisionShift(rgb_bit_depth);
    if (rgb_bit_depth == 8) {
-      dst[i + 0 * w] = ChangePrecision(r_ptr[off], shift);
+      dst[i + 0 * w] = Shift(r_ptr[off], shift);
-      dst[i + 1 * w] = ChangePrecision(g_ptr[off], shift);
+      dst[i + 1 * w] = Shift(g_ptr[off], shift);
-      dst[i + 2 * w] = ChangePrecision(b_ptr[off], shift);
+      dst[i + 2 * w] = Shift(b_ptr[off], shift);
    } else {
-      dst[i + 0 * w] = ChangePrecision(((uint16_t*)r_ptr)[off], shift);
+      dst[i + 0 * w] = Shift(((uint16_t*)r_ptr)[off], shift);
-      dst[i + 1 * w] = ChangePrecision(((uint16_t*)g_ptr)[off], shift);
+      dst[i + 1 * w] = Shift(((uint16_t*)g_ptr)[off], shift);
-      dst[i + 2 * w] = ChangePrecision(((uint16_t*)b_ptr)[off], shift);
+      dst[i + 2 * w] = Shift(((uint16_t*)b_ptr)[off], shift);
    }
  }
  if (pic_width & 1) {  // replicate rightmost pixel
@@ -527,7 +426,7 @@ void SharpYuvInit(VP8CPUInfo cpu_info_func) {
  SharpYuvInitDsp(cpu_info_func);
  if (!initialized) {
-    InitGammaTablesS();
+    SharpYuvInitGammaTables();
  }
  sharpyuv_last_cpuinfo_used = cpu_info_func;
--- a/sharpyuv/sharpyuv_gamma.c
+++ b/sharpyuv/sharpyuv_gamma.c
@@ -0,0 +1,114 @@
 // Copyright 2022 Google Inc. All Rights Reserved.
 //
 // Use of this source code is governed by a BSD-style license
 // that can be found in the COPYING file in the root of the source
 // tree. An additional intellectual property rights grant can be found
 // in the file PATENTS. All contributing project authors may
 // be found in the AUTHORS file in the root of the source tree.
 // -----------------------------------------------------------------------------
 //
 // Gamma correction utilities.
 #include "sharpyuv/sharpyuv_gamma.h"
 #include <assert.h>
 #include <math.h>
 #include <stdint.h>
 #include "src/webp/types.h"
 // Gamma correction compensates loss of resolution during chroma subsampling.
 // Size of pre-computed table for converting from gamma to linear.
 #define GAMMA_TO_LINEAR_TAB_BITS 10
 #define GAMMA_TO_LINEAR_TAB_SIZE (1 << GAMMA_TO_LINEAR_TAB_BITS)
 static uint32_t kGammaToLinearTabS[GAMMA_TO_LINEAR_TAB_SIZE + 2];
 #define LINEAR_TO_GAMMA_TAB_BITS 9
 #define LINEAR_TO_GAMMA_TAB_SIZE (1 << LINEAR_TO_GAMMA_TAB_BITS)
 static uint32_t kLinearToGammaTabS[LINEAR_TO_GAMMA_TAB_SIZE + 2];
 static const double kGammaF = 1. / 0.45;
 #define GAMMA_TO_LINEAR_BITS 16
 static volatile int kGammaTablesSOk = 0;
 void SharpYuvInitGammaTables(void) {
  assert(GAMMA_TO_LINEAR_BITS <= 16);
  if (!kGammaTablesSOk) {
    int v;
    const double a = 0.09929682680944;
    const double thresh = 0.018053968510807;
    const double final_scale = 1 << GAMMA_TO_LINEAR_BITS;
    // Precompute gamma to linear table.
    {
      const double norm = 1. / GAMMA_TO_LINEAR_TAB_SIZE;
      const double a_rec = 1. / (1. + a);
      for (v = 0; v <= GAMMA_TO_LINEAR_TAB_SIZE; ++v) {
        const double g = norm * v;
        double value;
        if (g <= thresh * 4.5) {
          value = g / 4.5;
        } else {
          value = pow(a_rec * (g + a), kGammaF);
        }
        kGammaToLinearTabS[v] = (uint32_t)(value * final_scale + .5);
      }
      // to prevent small rounding errors to cause read-overflow:
      kGammaToLinearTabS[GAMMA_TO_LINEAR_TAB_SIZE + 1] =
          kGammaToLinearTabS[GAMMA_TO_LINEAR_TAB_SIZE];
    }
    // Precompute linear to gamma table.
    {
      const double scale = 1. / LINEAR_TO_GAMMA_TAB_SIZE;
      for (v = 0; v <= LINEAR_TO_GAMMA_TAB_SIZE; ++v) {
        const double g = scale * v;
        double value;
        if (g <= thresh) {
          value = 4.5 * g;
        } else {
          value = (1. + a) * pow(g, 1. / kGammaF) - a;
        }
        kLinearToGammaTabS[v] =
            (uint32_t)(final_scale * value + 0.5);
      }
      // to prevent small rounding errors to cause read-overflow:
      kLinearToGammaTabS[LINEAR_TO_GAMMA_TAB_SIZE + 1] =
          kLinearToGammaTabS[LINEAR_TO_GAMMA_TAB_SIZE];
    }
    kGammaTablesSOk = 1;
  }
 }
 static WEBP_INLINE int Shift(int v, int shift) {
  return (shift >= 0) ? (v << shift) : (v >> -shift);
 }
 static WEBP_INLINE uint32_t FixedPointInterpolation(int v, uint32_t* tab,
                                                    int tab_pos_shift_right,
                                                    int tab_value_shift) {
  const uint32_t tab_pos = Shift(v, -tab_pos_shift_right);
  // fractional part, in 'tab_pos_shift' fixed-point precision
  const uint32_t x = v - (tab_pos << tab_pos_shift_right);  // fractional part
  // v0 / v1 are in kGammaToLinearBits fixed-point precision (range [0..1])
  const uint32_t v0 = Shift(tab[tab_pos + 0], tab_value_shift);
  const uint32_t v1 = Shift(tab[tab_pos + 1], tab_value_shift);
  // Final interpolation.
  const uint32_t v2 = (v1 - v0) * x;  // note: v1 >= v0.
  const int half =
      (tab_pos_shift_right > 0) ? 1 << (tab_pos_shift_right - 1) : 0;
  const uint32_t result = v0 + ((v2 + half) >> tab_pos_shift_right);
  return result;
 }
 uint32_t SharpYuvGammaToLinear(uint16_t v, int bit_depth) {
  const int shift = GAMMA_TO_LINEAR_TAB_BITS - bit_depth;
  if (shift > 0) {
    return kGammaToLinearTabS[v << shift];
  }
  return FixedPointInterpolation(v, kGammaToLinearTabS, -shift, 0);
 }
 uint16_t SharpYuvLinearToGamma(uint32_t value, int bit_depth) {
  return FixedPointInterpolation(
      value, kLinearToGammaTabS,
      (GAMMA_TO_LINEAR_BITS - LINEAR_TO_GAMMA_TAB_BITS),
      bit_depth - GAMMA_TO_LINEAR_BITS);
 }
--- a/sharpyuv/sharpyuv_gamma.h
+++ b/sharpyuv/sharpyuv_gamma.h
@@ -0,0 +1,35 @@
 // Copyright 2022 Google Inc. All Rights Reserved.
 //
 // Use of this source code is governed by a BSD-style license
 // that can be found in the COPYING file in the root of the source
 // tree. An additional intellectual property rights grant can be found
 // in the file PATENTS. All contributing project authors may
 // be found in the AUTHORS file in the root of the source tree.
 // -----------------------------------------------------------------------------
 //
 // Gamma correction utilities.
 #ifndef WEBP_SHARPYUV_SHARPYUV_GAMMA_H_
 #define WEBP_SHARPYUV_SHARPYUV_GAMMA_H_
 #include <stdint.h>
 #ifdef __cplusplus
 extern "C" {
 #endif
 // Initializes precomputed tables. Must be called once before calling
 // SharpYuvGammaToLinear or SharpYuvLinearToGamma.
 void SharpYuvInitGammaTables(void);
 // Converts a gamma color value on 'bit_depth' bits to a 16 bit linear value.
 uint32_t SharpYuvGammaToLinear(uint16_t v, int bit_depth);
 // Converts a 16 bit linear color value to a gamma value on 'bit_depth' bits.
 uint16_t SharpYuvLinearToGamma(uint32_t value, int bit_depth);
 #ifdef __cplusplus
 }  // extern "C"
 #endif
 #endif  // WEBP_SHARPYUV_SHARPYUV_GAMMA_H_