From 2d607ee6469c5a7b3ba00f87ecf09c3b7a732b1d Mon Sep 17 00:00:00 2001
From: Maryla <maryla@google.com>
Date: Wed, 1 Jun 2022 15:38:44 +0200
Subject: [PATCH] sharpyuv: increase precision of gamma<->linear conversion

Change-Id: I261bae3628315bda4ec0dafb8798c7512dd03a36
---
 Android.mk                |   1 +
 Makefile.vc               |   1 +
 build.gradle              |   1 +
 makefile.unix             |   1 +
 sharpyuv/Makefile.am      |   1 +
 sharpyuv/sharpyuv.c       | 139 ++++++--------------------------------
 sharpyuv/sharpyuv_gamma.c | 114 +++++++++++++++++++++++++++++++
 sharpyuv/sharpyuv_gamma.h |  35 ++++++++++
 8 files changed, 173 insertions(+), 120 deletions(-)
 create mode 100644 sharpyuv/sharpyuv_gamma.c
 create mode 100644 sharpyuv/sharpyuv_gamma.h

diff --git a/Android.mk b/Android.mk
index 06a80f45..85ba5c6b 100644
--- a/Android.mk
+++ b/Android.mk
@@ -37,6 +37,7 @@ sharpyuv_srcs := \
     sharpyuv/sharpyuv.c \
     sharpyuv/sharpyuv_csp.c \
     sharpyuv/sharpyuv_dsp.c \
+    sharpyuv/sharpyuv_gamma.c \
     sharpyuv/sharpyuv_neon.$(NEON) \
     sharpyuv/sharpyuv_sse2.c \
 
diff --git a/Makefile.vc b/Makefile.vc
index f13bf4e6..f2541097 100644
--- a/Makefile.vc
+++ b/Makefile.vc
@@ -178,6 +178,7 @@ SHARPYUV_OBJS = \
     $(DIROBJ)\sharpyuv\sharpyuv.obj \
     $(DIROBJ)\sharpyuv\sharpyuv_csp.obj \
     $(DIROBJ)\sharpyuv\sharpyuv_dsp.obj \
+    $(DIROBJ)\sharpyuv\sharpyuv_gamma.obj \
     $(DIROBJ)\sharpyuv\sharpyuv_neon.obj \
     $(DIROBJ)\sharpyuv\sharpyuv_sse2.obj \
 
diff --git a/build.gradle b/build.gradle
index ed2b1b4c..13202a85 100644
--- a/build.gradle
+++ b/build.gradle
@@ -109,6 +109,7 @@ model {
             include "sharpyuv.c"
             include "sharpyuv_csp.c"
             include "sharpyuv_dsp.c"
+            include "sharpyuv_gamma.c"
             include "sharpyuv_neon.c"
             include "sharpyuv_sse2.c"
             srcDir "src/dec"
diff --git a/makefile.unix b/makefile.unix
index 2e067a22..73e1cfe8 100644
--- a/makefile.unix
+++ b/makefile.unix
@@ -129,6 +129,7 @@ SHARPYUV_OBJS = \
     sharpyuv/sharpyuv.o \
     sharpyuv/sharpyuv_csp.o \
     sharpyuv/sharpyuv_dsp.o \
+    sharpyuv/sharpyuv_gamma.o \
     sharpyuv/sharpyuv_neon.o \
     sharpyuv/sharpyuv_sse2.o \
 
diff --git a/sharpyuv/Makefile.am b/sharpyuv/Makefile.am
index 8bc6c773..a2297a76 100644
--- a/sharpyuv/Makefile.am
+++ b/sharpyuv/Makefile.am
@@ -22,6 +22,7 @@ libsharpyuv_neon_la_CFLAGS = $(AM_CFLAGS) $(NEON_FLAGS)
 libsharpyuv_la_SOURCES =
 libsharpyuv_la_SOURCES += sharpyuv_csp.c sharpyuv_csp.h
 libsharpyuv_la_SOURCES += sharpyuv_dsp.c sharpyuv_dsp.h
+libsharpyuv_la_SOURCES += sharpyuv_gamma.c sharpyuv_gamma.h
 libsharpyuv_la_SOURCES += sharpyuv.c sharpyuv.h
 
 libsharpyuv_la_CPPFLAGS = $(AM_CPPFLAGS)
diff --git a/sharpyuv/sharpyuv.c b/sharpyuv/sharpyuv.c
index 8aa18051..a1e75303 100644
--- a/sharpyuv/sharpyuv.c
+++ b/sharpyuv/sharpyuv.c
@@ -21,6 +21,7 @@
 #include "src/webp/types.h"
 #include "src/dsp/cpu.h"
 #include "sharpyuv/sharpyuv_dsp.h"
+#include "sharpyuv/sharpyuv_gamma.h"
 
 //------------------------------------------------------------------------------
 // Sharp RGB->YUV conversion
@@ -45,100 +46,6 @@ static int GetPrecisionShift(int rgb_bit_depth) {
 typedef int16_t fixed_t;      // signed type with extra precision for UV
 typedef uint16_t fixed_y_t;   // unsigned type with extra precision for W
 
-//------------------------------------------------------------------------------
-// Code for gamma correction
-
-// Gamma correction compensates loss of resolution during chroma subsampling.
-// Size of pre-computed table for converting from gamma to linear.
-#define GAMMA_TO_LINEAR_TAB_BITS 10
-#define GAMMA_TO_LINEAR_TAB_SIZE (1 << GAMMA_TO_LINEAR_TAB_BITS)
-static uint32_t kGammaToLinearTabS[GAMMA_TO_LINEAR_TAB_SIZE + 2];
-// Size of pre-computed table for converting from linear to gamma.
-#define LINEAR_TO_GAMMA_TAB_BITS 8
-#define LINEAR_TO_GAMMA_TAB_SIZE (1 << LINEAR_TO_GAMMA_TAB_BITS)
-static uint32_t kLinearToGammaTabS[LINEAR_TO_GAMMA_TAB_SIZE + 2];
-
-static const double kGammaF = 1. / 0.45;
-#define GAMMA_TO_LINEAR_BITS 14
-
-static volatile int kGammaTablesSOk = 0;
-static void InitGammaTablesS(void) {
-  assert(2 * GAMMA_TO_LINEAR_BITS < 32);  // we use uint32_t intermediate values
-  if (!kGammaTablesSOk) {
-    int v;
-    const double a = 0.09929682680944;
-    const double thresh = 0.018053968510807;
-    // Precompute gamma to linear table.
-    {
-      const double norm = 1. / GAMMA_TO_LINEAR_TAB_SIZE;
-      const double a_rec = 1. / (1. + a);
-      const double final_scale = 1 << GAMMA_TO_LINEAR_BITS;
-      for (v = 0; v <= GAMMA_TO_LINEAR_TAB_SIZE; ++v) {
-        const double g = norm * v;
-        double value;
-        if (g <= thresh * 4.5) {
-          value = g / 4.5;
-        } else {
-          value = pow(a_rec * (g + a), kGammaF);
-        }
-        kGammaToLinearTabS[v] = (uint32_t)(value * final_scale + .5);
-      }
-      // to prevent small rounding errors to cause read-overflow:
-      kGammaToLinearTabS[GAMMA_TO_LINEAR_TAB_SIZE + 1] =
-          kGammaToLinearTabS[GAMMA_TO_LINEAR_TAB_SIZE];
-    }
-    // Precompute linear to gamma table.
-    {
-      const double scale = 1. / LINEAR_TO_GAMMA_TAB_SIZE;
-      for (v = 0; v <= LINEAR_TO_GAMMA_TAB_SIZE; ++v) {
-        const double g = scale * v;
-        double value;
-        if (g <= thresh) {
-          value = 4.5 * g;
-        } else {
-          value = (1. + a) * pow(g, 1. / kGammaF) - a;
-        }
-        kLinearToGammaTabS[v] =
-            (uint32_t)(GAMMA_TO_LINEAR_TAB_SIZE * value + 0.5);
-      }
-      // to prevent small rounding errors to cause read-overflow:
-      kLinearToGammaTabS[LINEAR_TO_GAMMA_TAB_SIZE + 1] =
-          kLinearToGammaTabS[LINEAR_TO_GAMMA_TAB_SIZE];
-    }
-    kGammaTablesSOk = 1;
-  }
-}
-
-static WEBP_INLINE uint32_t FixedPointInterpolation(int v, uint32_t* tab,
-                                                    int tab_pos_shift,
-                                                    int tab_value_shift) {
-  const uint32_t tab_pos = v >> tab_pos_shift;
-  // fractional part, in 'tab_pos_shift' fixed-point precision
-  const uint32_t x = v - (tab_pos << tab_pos_shift);  // fractional part
-  // v0 / v1 are in kGammaToLinearBits fixed-point precision (range [0..1])
-  const uint32_t v0 = tab[tab_pos + 0] << tab_value_shift;
-  const uint32_t v1 = tab[tab_pos + 1] << tab_value_shift;
-  // Final interpolation.
-  const uint32_t v2 = (v1 - v0) * x;  // note: v1 >= v0.
-  const int half = (tab_pos_shift > 0) ? 1 << (tab_pos_shift - 1) : 0;
-  const uint32_t result = v0 + ((v2 + half) >> tab_pos_shift);
-  return result;
-}
-
-static WEBP_INLINE uint32_t GammaToLinear(int v, int bit_depth) {
-  const int shift = GAMMA_TO_LINEAR_TAB_BITS - bit_depth;
-  if (shift > 0) {
-    return kGammaToLinearTabS[v << shift];
-  }
-  return FixedPointInterpolation(v, kGammaToLinearTabS, -shift, 0);
-}
-
-static WEBP_INLINE uint32_t LinearToGamma(uint32_t value, int bit_depth) {
-  const uint32_t v = value << LINEAR_TO_GAMMA_TAB_BITS;
-  return FixedPointInterpolation(v, kLinearToGammaTabS, GAMMA_TO_LINEAR_BITS,
-                                 bit_depth - GAMMA_TO_LINEAR_TAB_BITS);
-}
-
 //------------------------------------------------------------------------------
 
 static uint8_t clip_8b(fixed_t v) {
@@ -161,13 +68,14 @@ static int RGBToGray(int64_t r, int64_t g, int64_t b) {
   return (int)(luma >> YUV_FIX);
 }
 
-static uint32_t ScaleDown(int a, int b, int c, int d, int rgb_bit_depth) {
+static uint32_t ScaleDown(uint16_t a, uint16_t b, uint16_t c, uint16_t d,
+                          int rgb_bit_depth) {
   const int bit_depth = rgb_bit_depth + GetPrecisionShift(rgb_bit_depth);
-  const uint32_t A = GammaToLinear(a, bit_depth);
-  const uint32_t B = GammaToLinear(b, bit_depth);
-  const uint32_t C = GammaToLinear(c, bit_depth);
-  const uint32_t D = GammaToLinear(d, bit_depth);
-  return LinearToGamma((A + B + C + D + 2) >> 2, bit_depth);
+  const uint32_t A = SharpYuvGammaToLinear(a, bit_depth);
+  const uint32_t B = SharpYuvGammaToLinear(b, bit_depth);
+  const uint32_t C = SharpYuvGammaToLinear(c, bit_depth);
+  const uint32_t D = SharpYuvGammaToLinear(d, bit_depth);
+  return SharpYuvLinearToGamma((A + B + C + D + 2) >> 2, bit_depth);
 }
 
 static WEBP_INLINE void UpdateW(const fixed_y_t* src, fixed_y_t* dst, int w,
@@ -175,11 +83,11 @@ static WEBP_INLINE void UpdateW(const fixed_y_t* src, fixed_y_t* dst, int w,
   const int bit_depth = rgb_bit_depth + GetPrecisionShift(rgb_bit_depth);
   int i;
   for (i = 0; i < w; ++i) {
-    const uint32_t R = GammaToLinear(src[0 * w + i], bit_depth);
-    const uint32_t G = GammaToLinear(src[1 * w + i], bit_depth);
-    const uint32_t B = GammaToLinear(src[2 * w + i], bit_depth);
+    const uint32_t R = SharpYuvGammaToLinear(src[0 * w + i], bit_depth);
+    const uint32_t G = SharpYuvGammaToLinear(src[1 * w + i], bit_depth);
+    const uint32_t B = SharpYuvGammaToLinear(src[2 * w + i], bit_depth);
     const uint32_t Y = RGBToGray(R, G, B);
-    dst[i] = (fixed_y_t)LinearToGamma(Y, bit_depth);
+    dst[i] = (fixed_y_t)SharpYuvLinearToGamma(Y, bit_depth);
   }
 }
 
@@ -227,15 +135,6 @@ static WEBP_INLINE int Shift(int v, int shift) {
   return (shift >= 0) ? (v << shift) : (v >> -shift);
 }
 
-static WEBP_INLINE fixed_y_t ChangePrecision(uint16_t a, int shift) {
-  if (shift == 0) return a;
-  if (shift < 0) {
-    const int rounding = 1 << (-shift - 1);
-    return (a + rounding) >> -shift;
-  }
-  return ((fixed_y_t)a << shift);
-}
-
 static void ImportOneRow(const uint8_t* const r_ptr,
                          const uint8_t* const g_ptr,
                          const uint8_t* const b_ptr,
@@ -252,13 +151,13 @@ static void ImportOneRow(const uint8_t* const r_ptr,
     const int off = i * step;
     const int shift = GetPrecisionShift(rgb_bit_depth);
     if (rgb_bit_depth == 8) {
-      dst[i + 0 * w] = ChangePrecision(r_ptr[off], shift);
-      dst[i + 1 * w] = ChangePrecision(g_ptr[off], shift);
-      dst[i + 2 * w] = ChangePrecision(b_ptr[off], shift);
+      dst[i + 0 * w] = Shift(r_ptr[off], shift);
+      dst[i + 1 * w] = Shift(g_ptr[off], shift);
+      dst[i + 2 * w] = Shift(b_ptr[off], shift);
     } else {
-      dst[i + 0 * w] = ChangePrecision(((uint16_t*)r_ptr)[off], shift);
-      dst[i + 1 * w] = ChangePrecision(((uint16_t*)g_ptr)[off], shift);
-      dst[i + 2 * w] = ChangePrecision(((uint16_t*)b_ptr)[off], shift);
+      dst[i + 0 * w] = Shift(((uint16_t*)r_ptr)[off], shift);
+      dst[i + 1 * w] = Shift(((uint16_t*)g_ptr)[off], shift);
+      dst[i + 2 * w] = Shift(((uint16_t*)b_ptr)[off], shift);
     }
   }
   if (pic_width & 1) {  // replicate rightmost pixel
@@ -527,7 +426,7 @@ void SharpYuvInit(VP8CPUInfo cpu_info_func) {
 
   SharpYuvInitDsp(cpu_info_func);
   if (!initialized) {
-    InitGammaTablesS();
+    SharpYuvInitGammaTables();
   }
 
   sharpyuv_last_cpuinfo_used = cpu_info_func;
diff --git a/sharpyuv/sharpyuv_gamma.c b/sharpyuv/sharpyuv_gamma.c
new file mode 100644
index 00000000..05b5436f
--- /dev/null
+++ b/sharpyuv/sharpyuv_gamma.c
@@ -0,0 +1,114 @@
+// Copyright 2022 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// Gamma correction utilities.
+
+#include "sharpyuv/sharpyuv_gamma.h"
+
+#include <assert.h>
+#include <math.h>
+#include <stdint.h>
+
+#include "src/webp/types.h"
+
+// Gamma correction compensates loss of resolution during chroma subsampling.
+// Size of pre-computed table for converting from gamma to linear.
+#define GAMMA_TO_LINEAR_TAB_BITS 10
+#define GAMMA_TO_LINEAR_TAB_SIZE (1 << GAMMA_TO_LINEAR_TAB_BITS)
+static uint32_t kGammaToLinearTabS[GAMMA_TO_LINEAR_TAB_SIZE + 2];
+#define LINEAR_TO_GAMMA_TAB_BITS 9
+#define LINEAR_TO_GAMMA_TAB_SIZE (1 << LINEAR_TO_GAMMA_TAB_BITS)
+static uint32_t kLinearToGammaTabS[LINEAR_TO_GAMMA_TAB_SIZE + 2];
+
+static const double kGammaF = 1. / 0.45;
+#define GAMMA_TO_LINEAR_BITS 16
+
+static volatile int kGammaTablesSOk = 0;
+void SharpYuvInitGammaTables(void) {
+  assert(GAMMA_TO_LINEAR_BITS <= 16);
+  if (!kGammaTablesSOk) {
+    int v;
+    const double a = 0.09929682680944;
+    const double thresh = 0.018053968510807;
+    const double final_scale = 1 << GAMMA_TO_LINEAR_BITS;
+    // Precompute gamma to linear table.
+    {
+      const double norm = 1. / GAMMA_TO_LINEAR_TAB_SIZE;
+      const double a_rec = 1. / (1. + a);
+      for (v = 0; v <= GAMMA_TO_LINEAR_TAB_SIZE; ++v) {
+        const double g = norm * v;
+        double value;
+        if (g <= thresh * 4.5) {
+          value = g / 4.5;
+        } else {
+          value = pow(a_rec * (g + a), kGammaF);
+        }
+        kGammaToLinearTabS[v] = (uint32_t)(value * final_scale + .5);
+      }
+      // to prevent small rounding errors to cause read-overflow:
+      kGammaToLinearTabS[GAMMA_TO_LINEAR_TAB_SIZE + 1] =
+          kGammaToLinearTabS[GAMMA_TO_LINEAR_TAB_SIZE];
+    }
+    // Precompute linear to gamma table.
+    {
+      const double scale = 1. / LINEAR_TO_GAMMA_TAB_SIZE;
+      for (v = 0; v <= LINEAR_TO_GAMMA_TAB_SIZE; ++v) {
+        const double g = scale * v;
+        double value;
+        if (g <= thresh) {
+          value = 4.5 * g;
+        } else {
+          value = (1. + a) * pow(g, 1. / kGammaF) - a;
+        }
+        kLinearToGammaTabS[v] =
+            (uint32_t)(final_scale * value + 0.5);
+      }
+      // to prevent small rounding errors to cause read-overflow:
+      kLinearToGammaTabS[LINEAR_TO_GAMMA_TAB_SIZE + 1] =
+          kLinearToGammaTabS[LINEAR_TO_GAMMA_TAB_SIZE];
+    }
+    kGammaTablesSOk = 1;
+  }
+}
+
+static WEBP_INLINE int Shift(int v, int shift) {
+  return (shift >= 0) ? (v << shift) : (v >> -shift);
+}
+
+static WEBP_INLINE uint32_t FixedPointInterpolation(int v, uint32_t* tab,
+                                                    int tab_pos_shift_right,
+                                                    int tab_value_shift) {
+  const uint32_t tab_pos = Shift(v, -tab_pos_shift_right);
+  // fractional part, in 'tab_pos_shift' fixed-point precision
+  const uint32_t x = v - (tab_pos << tab_pos_shift_right);  // fractional part
+  // v0 / v1 are in kGammaToLinearBits fixed-point precision (range [0..1])
+  const uint32_t v0 = Shift(tab[tab_pos + 0], tab_value_shift);
+  const uint32_t v1 = Shift(tab[tab_pos + 1], tab_value_shift);
+  // Final interpolation.
+  const uint32_t v2 = (v1 - v0) * x;  // note: v1 >= v0.
+  const int half =
+      (tab_pos_shift_right > 0) ? 1 << (tab_pos_shift_right - 1) : 0;
+  const uint32_t result = v0 + ((v2 + half) >> tab_pos_shift_right);
+  return result;
+}
+
+uint32_t SharpYuvGammaToLinear(uint16_t v, int bit_depth) {
+  const int shift = GAMMA_TO_LINEAR_TAB_BITS - bit_depth;
+  if (shift > 0) {
+    return kGammaToLinearTabS[v << shift];
+  }
+  return FixedPointInterpolation(v, kGammaToLinearTabS, -shift, 0);
+}
+
+uint16_t SharpYuvLinearToGamma(uint32_t value, int bit_depth) {
+  return FixedPointInterpolation(
+      value, kLinearToGammaTabS,
+      (GAMMA_TO_LINEAR_BITS - LINEAR_TO_GAMMA_TAB_BITS),
+      bit_depth - GAMMA_TO_LINEAR_BITS);
+}
diff --git a/sharpyuv/sharpyuv_gamma.h b/sharpyuv/sharpyuv_gamma.h
new file mode 100644
index 00000000..2f1a3ff4
--- /dev/null
+++ b/sharpyuv/sharpyuv_gamma.h
@@ -0,0 +1,35 @@
+// Copyright 2022 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// Gamma correction utilities.
+
+#ifndef WEBP_SHARPYUV_SHARPYUV_GAMMA_H_
+#define WEBP_SHARPYUV_SHARPYUV_GAMMA_H_
+
+#include <stdint.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+// Initializes precomputed tables. Must be called once before calling
+// SharpYuvGammaToLinear or SharpYuvLinearToGamma.
+void SharpYuvInitGammaTables(void);
+
+// Converts a gamma color value on 'bit_depth' bits to a 16 bit linear value.
+uint32_t SharpYuvGammaToLinear(uint16_t v, int bit_depth);
+
+// Converts a 16 bit linear color value to a gamma value on 'bit_depth' bits.
+uint16_t SharpYuvLinearToGamma(uint32_t value, int bit_depth);
+
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
+#endif  // WEBP_SHARPYUV_SHARPYUV_GAMMA_H_