mirror of
				https://github.com/webmproject/libwebp.git
				synced 2025-10-30 18:05:36 +01:00 
			
		
		
		
	sharpyuv: add support for 10/12/16 bit rgb and 10/12 bit yuv.
10bit+ input is truncated to 10bits for now. Change-Id: I7ac00ca54c623d94c76ccd8954418e11095997d2
This commit is contained in:
		| @@ -31,83 +31,114 @@ static const int kMinDimensionIterativeConversion = 4; | |||||||
| #define YUV_FIX 16  // fixed-point precision for RGB->YUV | #define YUV_FIX 16  // fixed-point precision for RGB->YUV | ||||||
| static const int kYuvHalf = 1 << (YUV_FIX - 1); | static const int kYuvHalf = 1 << (YUV_FIX - 1); | ||||||
|  |  | ||||||
| // We could use SFIX=0 and only uint8_t for fixed_y_t, but it produces some | // Max bit depth so that intermediate calculations fit in 16 bits. | ||||||
| // banding sometimes. Better use extra precision. | // TODO(b/194336375): the C code can handle up to 14 bits, but the SIMD code | ||||||
| #define SFIX 2                // fixed-point precision of RGB and Y/W | // currently needs more room. | ||||||
| #define MAX_Y_T ((256 << SFIX) - 1) | static const int kMaxBitDepth = 10; | ||||||
| typedef int16_t fixed_t;      // signed type with extra SFIX precision for UV |  | ||||||
| typedef uint16_t fixed_y_t;   // unsigned type with extra SFIX precision for W |  | ||||||
|  |  | ||||||
| static const int kYuvRounder = (1 << (YUV_FIX + SFIX - 1)); | // Returns the precision shift to use based on the input rgb_bit_depth. | ||||||
|  | static int GetPrecisionShift(int rgb_bit_depth) { | ||||||
|  |   // Try to add 2 bits of precision if it fits in kMaxBitDepth. Otherwise remove | ||||||
|  |   // bits if needed. | ||||||
|  |   return ((rgb_bit_depth + 2) <= kMaxBitDepth) ? 2 | ||||||
|  |                                                : (kMaxBitDepth - rgb_bit_depth); | ||||||
|  | } | ||||||
|  |  | ||||||
|  | typedef int16_t fixed_t;      // signed type with extra precision for UV | ||||||
|  | typedef uint16_t fixed_y_t;   // unsigned type with extra precision for W | ||||||
|  |  | ||||||
| //------------------------------------------------------------------------------ | //------------------------------------------------------------------------------ | ||||||
| // Code for gamma correction | // Code for gamma correction | ||||||
|  |  | ||||||
| // Gamma correction compensates loss of resolution during chroma subsampling. | // Gamma correction compensates loss of resolution during chroma subsampling. | ||||||
| static const double kGammaF = 1./0.45; | // Size of pre-computed table for converting from gamma to linear. | ||||||
| #define GAMMA_TAB_FIX 8 | #define GAMMA_TO_LINEAR_TAB_BITS 10 | ||||||
| #define GAMMA_TAB_SIZE (1 << GAMMA_TAB_FIX) | #define GAMMA_TO_LINEAR_TAB_SIZE (1 << GAMMA_TO_LINEAR_TAB_BITS) | ||||||
| static uint32_t kLinearToGammaTabS[GAMMA_TAB_SIZE + 2]; | static uint32_t kGammaToLinearTabS[GAMMA_TO_LINEAR_TAB_SIZE + 2]; | ||||||
| #define GAMMA_TO_LINEAR_BITS 14 | // Size of pre-computed table for converting from linear to gamma. | ||||||
| static const int kGammaToLinearHalf = 1 << (GAMMA_TO_LINEAR_BITS - 1); | #define LINEAR_TO_GAMMA_TAB_BITS 8 | ||||||
| static uint32_t kGammaToLinearTabS[MAX_Y_T + 1];   // size scales with Y_FIX | #define LINEAR_TO_GAMMA_TAB_SIZE (1 << LINEAR_TO_GAMMA_TAB_BITS) | ||||||
| static volatile int kGammaTablesSOk = 0; | static uint32_t kLinearToGammaTabS[LINEAR_TO_GAMMA_TAB_SIZE + 2]; | ||||||
|  |  | ||||||
|  | static const double kGammaF = 1. / 0.45; | ||||||
|  | #define GAMMA_TO_LINEAR_BITS 14 | ||||||
|  |  | ||||||
|  | static volatile int kGammaTablesSOk = 0; | ||||||
| static void InitGammaTablesS(void) { | static void InitGammaTablesS(void) { | ||||||
|   assert(2 * GAMMA_TO_LINEAR_BITS < 32);  // we use uint32_t intermediate values |   assert(2 * GAMMA_TO_LINEAR_BITS < 32);  // we use uint32_t intermediate values | ||||||
|   if (!kGammaTablesSOk) { |   if (!kGammaTablesSOk) { | ||||||
|     int v; |     int v; | ||||||
|     const double norm = 1. / MAX_Y_T; |  | ||||||
|     const double scale = 1. / GAMMA_TAB_SIZE; |  | ||||||
|     const double a = 0.09929682680944; |     const double a = 0.09929682680944; | ||||||
|     const double thresh = 0.018053968510807; |     const double thresh = 0.018053968510807; | ||||||
|     const double final_scale = 1 << GAMMA_TO_LINEAR_BITS; |     // Precompute gamma to linear table. | ||||||
|     for (v = 0; v <= MAX_Y_T; ++v) { |     { | ||||||
|       const double g = norm * v; |       const double norm = 1. / GAMMA_TO_LINEAR_TAB_SIZE; | ||||||
|       double value; |       const double a_rec = 1. / (1. + a); | ||||||
|       if (g <= thresh * 4.5) { |       const double final_scale = 1 << GAMMA_TO_LINEAR_BITS; | ||||||
|         value = g / 4.5; |       for (v = 0; v <= GAMMA_TO_LINEAR_TAB_SIZE; ++v) { | ||||||
|       } else { |         const double g = norm * v; | ||||||
|         const double a_rec = 1. / (1. + a); |         double value; | ||||||
|         value = pow(a_rec * (g + a), kGammaF); |         if (g <= thresh * 4.5) { | ||||||
|  |           value = g / 4.5; | ||||||
|  |         } else { | ||||||
|  |           value = pow(a_rec * (g + a), kGammaF); | ||||||
|  |         } | ||||||
|  |         kGammaToLinearTabS[v] = (uint32_t)(value * final_scale + .5); | ||||||
|       } |       } | ||||||
|       kGammaToLinearTabS[v] = (uint32_t)(value * final_scale + .5); |       // to prevent small rounding errors to cause read-overflow: | ||||||
|  |       kGammaToLinearTabS[GAMMA_TO_LINEAR_TAB_SIZE + 1] = | ||||||
|  |           kGammaToLinearTabS[GAMMA_TO_LINEAR_TAB_SIZE]; | ||||||
|     } |     } | ||||||
|     for (v = 0; v <= GAMMA_TAB_SIZE; ++v) { |     // Precompute linear to gamma table. | ||||||
|       const double g = scale * v; |     { | ||||||
|       double value; |       const double scale = 1. / LINEAR_TO_GAMMA_TAB_SIZE; | ||||||
|       if (g <= thresh) { |       for (v = 0; v <= LINEAR_TO_GAMMA_TAB_SIZE; ++v) { | ||||||
|         value = 4.5 * g; |         const double g = scale * v; | ||||||
|       } else { |         double value; | ||||||
|         value = (1. + a) * pow(g, 1. / kGammaF) - a; |         if (g <= thresh) { | ||||||
|  |           value = 4.5 * g; | ||||||
|  |         } else { | ||||||
|  |           value = (1. + a) * pow(g, 1. / kGammaF) - a; | ||||||
|  |         } | ||||||
|  |         kLinearToGammaTabS[v] = | ||||||
|  |             (uint32_t)(GAMMA_TO_LINEAR_TAB_SIZE * value + 0.5); | ||||||
|       } |       } | ||||||
|       kLinearToGammaTabS[v] = (uint32_t)(MAX_Y_T * value + 0.5); |       // to prevent small rounding errors to cause read-overflow: | ||||||
|  |       kLinearToGammaTabS[LINEAR_TO_GAMMA_TAB_SIZE + 1] = | ||||||
|  |           kLinearToGammaTabS[LINEAR_TO_GAMMA_TAB_SIZE]; | ||||||
|     } |     } | ||||||
|     // to prevent small rounding errors to cause read-overflow: |  | ||||||
|     kLinearToGammaTabS[GAMMA_TAB_SIZE + 1] = kLinearToGammaTabS[GAMMA_TAB_SIZE]; |  | ||||||
|     kGammaTablesSOk = 1; |     kGammaTablesSOk = 1; | ||||||
|   } |   } | ||||||
| } | } | ||||||
|  |  | ||||||
| // return value has a fixed-point precision of GAMMA_TO_LINEAR_BITS | static WEBP_INLINE uint32_t FixedPointInterpolation(int v, uint32_t* tab, | ||||||
| static WEBP_INLINE uint32_t GammaToLinearS(int v) { |                                                     int tab_pos_shift, | ||||||
|   return kGammaToLinearTabS[v]; |                                                     int tab_value_shift) { | ||||||
|  |   const uint32_t tab_pos = v >> tab_pos_shift; | ||||||
|  |   // fractional part, in 'tab_pos_shift' fixed-point precision | ||||||
|  |   const uint32_t x = v - (tab_pos << tab_pos_shift);  // fractional part | ||||||
|  |   // v0 / v1 are in kGammaToLinearBits fixed-point precision (range [0..1]) | ||||||
|  |   const uint32_t v0 = tab[tab_pos + 0] << tab_value_shift; | ||||||
|  |   const uint32_t v1 = tab[tab_pos + 1] << tab_value_shift; | ||||||
|  |   // Final interpolation. | ||||||
|  |   const uint32_t v2 = (v1 - v0) * x;  // note: v1 >= v0. | ||||||
|  |   const int half = (tab_pos_shift > 0) ? 1 << (tab_pos_shift - 1) : 0; | ||||||
|  |   const uint32_t result = v0 + ((v2 + half) >> tab_pos_shift); | ||||||
|  |   return result; | ||||||
| } | } | ||||||
|  |  | ||||||
| static WEBP_INLINE uint32_t LinearToGammaS(uint32_t value) { | static WEBP_INLINE uint32_t GammaToLinear(int v, int bit_depth) { | ||||||
|   // 'value' is in GAMMA_TO_LINEAR_BITS fractional precision |   const int shift = GAMMA_TO_LINEAR_TAB_BITS - bit_depth; | ||||||
|   const uint32_t v = value * GAMMA_TAB_SIZE; |   if (shift > 0) { | ||||||
|   const uint32_t tab_pos = v >> GAMMA_TO_LINEAR_BITS; |     return kGammaToLinearTabS[v << shift]; | ||||||
|   // fractional part, in GAMMA_TO_LINEAR_BITS fixed-point precision |   } | ||||||
|   const uint32_t x = v - (tab_pos << GAMMA_TO_LINEAR_BITS);  // fractional part |   return FixedPointInterpolation(v, kGammaToLinearTabS, -shift, 0); | ||||||
|   // v0 / v1 are in GAMMA_TO_LINEAR_BITS fixed-point precision (range [0..1]) | } | ||||||
|   const uint32_t v0 = kLinearToGammaTabS[tab_pos + 0]; |  | ||||||
|   const uint32_t v1 = kLinearToGammaTabS[tab_pos + 1]; | static WEBP_INLINE uint32_t LinearToGamma(uint32_t value, int bit_depth) { | ||||||
|   // Final interpolation. |   const uint32_t v = value << LINEAR_TO_GAMMA_TAB_BITS; | ||||||
|   const uint32_t v2 = (v1 - v0) * x;    // note: v1 >= v0. |   return FixedPointInterpolation(v, kLinearToGammaTabS, GAMMA_TO_LINEAR_BITS, | ||||||
|   const uint32_t result = |                                  bit_depth - GAMMA_TO_LINEAR_TAB_BITS); | ||||||
|       v0 + ((v2 + kGammaToLinearHalf) >> GAMMA_TO_LINEAR_BITS); |  | ||||||
|   return result; |  | ||||||
| } | } | ||||||
|  |  | ||||||
| //------------------------------------------------------------------------------ | //------------------------------------------------------------------------------ | ||||||
| @@ -116,46 +147,57 @@ static uint8_t clip_8b(fixed_t v) { | |||||||
|   return (!(v & ~0xff)) ? (uint8_t)v : (v < 0) ? 0u : 255u; |   return (!(v & ~0xff)) ? (uint8_t)v : (v < 0) ? 0u : 255u; | ||||||
| } | } | ||||||
|  |  | ||||||
| static fixed_y_t clip_y(int y) { | static uint16_t clip(fixed_t v, int max) { | ||||||
|   return (!(y & ~MAX_Y_T)) ? (fixed_y_t)y : (y < 0) ? 0 : MAX_Y_T; |   return (v < 0) ? 0 : (v > max) ? max : (uint16_t)v; | ||||||
|  | } | ||||||
|  |  | ||||||
|  | static fixed_y_t clip_bit_depth(int y, int bit_depth) { | ||||||
|  |   const int max = (1 << bit_depth) - 1; | ||||||
|  |   return (!(y & ~max)) ? (fixed_y_t)y : (y < 0) ? 0 : max; | ||||||
| } | } | ||||||
|  |  | ||||||
| //------------------------------------------------------------------------------ | //------------------------------------------------------------------------------ | ||||||
|  |  | ||||||
| static int RGBToGray(int r, int g, int b) { | static int RGBToGray(int64_t r, int64_t g, int64_t b) { | ||||||
|   const int luma = 13933 * r + 46871 * g + 4732 * b + kYuvHalf; |   const int64_t luma = 13933 * r + 46871 * g + 4732 * b + kYuvHalf; | ||||||
|   return (luma >> YUV_FIX); |   return (int)(luma >> YUV_FIX); | ||||||
| } | } | ||||||
|  |  | ||||||
| static uint32_t ScaleDown(int a, int b, int c, int d) { | static uint32_t ScaleDown(int a, int b, int c, int d, int rgb_bit_depth) { | ||||||
|   const uint32_t A = GammaToLinearS(a); |   const int bit_depth = rgb_bit_depth + GetPrecisionShift(rgb_bit_depth); | ||||||
|   const uint32_t B = GammaToLinearS(b); |   const uint32_t A = GammaToLinear(a, bit_depth); | ||||||
|   const uint32_t C = GammaToLinearS(c); |   const uint32_t B = GammaToLinear(b, bit_depth); | ||||||
|   const uint32_t D = GammaToLinearS(d); |   const uint32_t C = GammaToLinear(c, bit_depth); | ||||||
|   return LinearToGammaS((A + B + C + D + 2) >> 2); |   const uint32_t D = GammaToLinear(d, bit_depth); | ||||||
|  |   return LinearToGamma((A + B + C + D + 2) >> 2, bit_depth); | ||||||
| } | } | ||||||
|  |  | ||||||
| static WEBP_INLINE void UpdateW(const fixed_y_t* src, fixed_y_t* dst, int w) { | static WEBP_INLINE void UpdateW(const fixed_y_t* src, fixed_y_t* dst, int w, | ||||||
|  |                                 int rgb_bit_depth) { | ||||||
|  |   const int bit_depth = rgb_bit_depth + GetPrecisionShift(rgb_bit_depth); | ||||||
|   int i; |   int i; | ||||||
|   for (i = 0; i < w; ++i) { |   for (i = 0; i < w; ++i) { | ||||||
|     const uint32_t R = GammaToLinearS(src[0 * w + i]); |     const uint32_t R = GammaToLinear(src[0 * w + i], bit_depth); | ||||||
|     const uint32_t G = GammaToLinearS(src[1 * w + i]); |     const uint32_t G = GammaToLinear(src[1 * w + i], bit_depth); | ||||||
|     const uint32_t B = GammaToLinearS(src[2 * w + i]); |     const uint32_t B = GammaToLinear(src[2 * w + i], bit_depth); | ||||||
|     const uint32_t Y = RGBToGray(R, G, B); |     const uint32_t Y = RGBToGray(R, G, B); | ||||||
|     dst[i] = (fixed_y_t)LinearToGammaS(Y); |     dst[i] = (fixed_y_t)LinearToGamma(Y, bit_depth); | ||||||
|   } |   } | ||||||
| } | } | ||||||
|  |  | ||||||
| static void UpdateChroma(const fixed_y_t* src1, const fixed_y_t* src2, | static void UpdateChroma(const fixed_y_t* src1, const fixed_y_t* src2, | ||||||
|                          fixed_t* dst, int uv_w) { |                          fixed_t* dst, int uv_w, int rgb_bit_depth) { | ||||||
|   int i; |   int i; | ||||||
|   for (i = 0; i < uv_w; ++i) { |   for (i = 0; i < uv_w; ++i) { | ||||||
|     const int r = ScaleDown(src1[0 * uv_w + 0], src1[0 * uv_w + 1], |     const int r = | ||||||
|                             src2[0 * uv_w + 0], src2[0 * uv_w + 1]); |         ScaleDown(src1[0 * uv_w + 0], src1[0 * uv_w + 1], src2[0 * uv_w + 0], | ||||||
|     const int g = ScaleDown(src1[2 * uv_w + 0], src1[2 * uv_w + 1], |                   src2[0 * uv_w + 1], rgb_bit_depth); | ||||||
|                             src2[2 * uv_w + 0], src2[2 * uv_w + 1]); |     const int g = | ||||||
|     const int b = ScaleDown(src1[4 * uv_w + 0], src1[4 * uv_w + 1], |         ScaleDown(src1[2 * uv_w + 0], src1[2 * uv_w + 1], src2[2 * uv_w + 0], | ||||||
|                             src2[4 * uv_w + 0], src2[4 * uv_w + 1]); |                   src2[2 * uv_w + 1], rgb_bit_depth); | ||||||
|  |     const int b = | ||||||
|  |         ScaleDown(src1[4 * uv_w + 0], src1[4 * uv_w + 1], src2[4 * uv_w + 0], | ||||||
|  |                   src2[4 * uv_w + 1], rgb_bit_depth); | ||||||
|     const int W = RGBToGray(r, g, b); |     const int W = RGBToGray(r, g, b); | ||||||
|     dst[0 * uv_w] = (fixed_t)(r - W); |     dst[0 * uv_w] = (fixed_t)(r - W); | ||||||
|     dst[1 * uv_w] = (fixed_t)(g - W); |     dst[1 * uv_w] = (fixed_t)(g - W); | ||||||
| @@ -176,30 +218,50 @@ static void StoreGray(const fixed_y_t* rgb, fixed_y_t* y, int w) { | |||||||
|  |  | ||||||
| //------------------------------------------------------------------------------ | //------------------------------------------------------------------------------ | ||||||
|  |  | ||||||
| static WEBP_INLINE fixed_y_t Filter2(int A, int B, int W0) { | static WEBP_INLINE fixed_y_t Filter2(int A, int B, int W0, int bit_depth) { | ||||||
|   const int v0 = (A * 3 + B + 2) >> 2; |   const int v0 = (A * 3 + B + 2) >> 2; | ||||||
|   return clip_y(v0 + W0); |   return clip_bit_depth(v0 + W0, bit_depth); | ||||||
| } | } | ||||||
|  |  | ||||||
| //------------------------------------------------------------------------------ | //------------------------------------------------------------------------------ | ||||||
|  |  | ||||||
| static WEBP_INLINE fixed_y_t UpLift(uint8_t a) {  // 8bit -> SFIX | static WEBP_INLINE int Shift(int v, int shift) { | ||||||
|   return ((fixed_y_t)a << SFIX); |   return (shift >= 0) ? (v << shift) : (v >> -shift); | ||||||
|  | } | ||||||
|  |  | ||||||
|  | static WEBP_INLINE fixed_y_t ChangePrecision(uint16_t a, int shift) { | ||||||
|  |   if (shift == 0) return a; | ||||||
|  |   if (shift < 0) { | ||||||
|  |     const int rounding = 1 << (-shift - 1); | ||||||
|  |     return (a + rounding) >> -shift; | ||||||
|  |   } | ||||||
|  |   return ((fixed_y_t)a << shift); | ||||||
| } | } | ||||||
|  |  | ||||||
| static void ImportOneRow(const uint8_t* const r_ptr, | static void ImportOneRow(const uint8_t* const r_ptr, | ||||||
|                          const uint8_t* const g_ptr, |                          const uint8_t* const g_ptr, | ||||||
|                          const uint8_t* const b_ptr, |                          const uint8_t* const b_ptr, | ||||||
|                          int step, |                          int rgb_step, | ||||||
|  |                          int rgb_bit_depth, | ||||||
|                          int pic_width, |                          int pic_width, | ||||||
|                          fixed_y_t* const dst) { |                          fixed_y_t* const dst) { | ||||||
|  |   // Convert the rgb_step from a number of bytes to a number of uint8_t or | ||||||
|  |   // uint16_t values depending the bit depth. | ||||||
|  |   const int step = (rgb_bit_depth > 8) ? rgb_step / 2 : rgb_step; | ||||||
|   int i; |   int i; | ||||||
|   const int w = (pic_width + 1) & ~1; |   const int w = (pic_width + 1) & ~1; | ||||||
|   for (i = 0; i < pic_width; ++i) { |   for (i = 0; i < pic_width; ++i) { | ||||||
|     const int off = i * step; |     const int off = i * step; | ||||||
|     dst[i + 0 * w] = UpLift(r_ptr[off]); |     const int shift = GetPrecisionShift(rgb_bit_depth); | ||||||
|     dst[i + 1 * w] = UpLift(g_ptr[off]); |     if (rgb_bit_depth == 8) { | ||||||
|     dst[i + 2 * w] = UpLift(b_ptr[off]); |       dst[i + 0 * w] = ChangePrecision(r_ptr[off], shift); | ||||||
|  |       dst[i + 1 * w] = ChangePrecision(g_ptr[off], shift); | ||||||
|  |       dst[i + 2 * w] = ChangePrecision(b_ptr[off], shift); | ||||||
|  |     } else { | ||||||
|  |       dst[i + 0 * w] = ChangePrecision(((uint16_t*)r_ptr)[off], shift); | ||||||
|  |       dst[i + 1 * w] = ChangePrecision(((uint16_t*)g_ptr)[off], shift); | ||||||
|  |       dst[i + 2 * w] = ChangePrecision(((uint16_t*)b_ptr)[off], shift); | ||||||
|  |     } | ||||||
|   } |   } | ||||||
|   if (pic_width & 1) {  // replicate rightmost pixel |   if (pic_width & 1) {  // replicate rightmost pixel | ||||||
|     dst[pic_width + 0 * w] = dst[pic_width + 0 * w - 1]; |     dst[pic_width + 0 * w] = dst[pic_width + 0 * w - 1]; | ||||||
| @@ -214,24 +276,28 @@ static void InterpolateTwoRows(const fixed_y_t* const best_y, | |||||||
|                                const fixed_t* next_uv, |                                const fixed_t* next_uv, | ||||||
|                                int w, |                                int w, | ||||||
|                                fixed_y_t* out1, |                                fixed_y_t* out1, | ||||||
|                                fixed_y_t* out2) { |                                fixed_y_t* out2, | ||||||
|  |                                int rgb_bit_depth) { | ||||||
|   const int uv_w = w >> 1; |   const int uv_w = w >> 1; | ||||||
|   const int len = (w - 1) >> 1;   // length to filter |   const int len = (w - 1) >> 1;   // length to filter | ||||||
|   int k = 3; |   int k = 3; | ||||||
|  |   const int bit_depth = rgb_bit_depth + GetPrecisionShift(rgb_bit_depth); | ||||||
|   while (k-- > 0) {   // process each R/G/B segments in turn |   while (k-- > 0) {   // process each R/G/B segments in turn | ||||||
|     // special boundary case for i==0 |     // special boundary case for i==0 | ||||||
|     out1[0] = Filter2(cur_uv[0], prev_uv[0], best_y[0]); |     out1[0] = Filter2(cur_uv[0], prev_uv[0], best_y[0], bit_depth); | ||||||
|     out2[0] = Filter2(cur_uv[0], next_uv[0], best_y[w]); |     out2[0] = Filter2(cur_uv[0], next_uv[0], best_y[w], bit_depth); | ||||||
|  |  | ||||||
|     SharpYuvFilterRow(cur_uv, prev_uv, len, best_y + 0 + 1, out1 + 1); |     SharpYuvFilterRow(cur_uv, prev_uv, len, best_y + 0 + 1, out1 + 1, | ||||||
|     SharpYuvFilterRow(cur_uv, next_uv, len, best_y + w + 1, out2 + 1); |                       bit_depth); | ||||||
|  |     SharpYuvFilterRow(cur_uv, next_uv, len, best_y + w + 1, out2 + 1, | ||||||
|  |                       bit_depth); | ||||||
|  |  | ||||||
|     // special boundary case for i == w - 1 when w is even |     // special boundary case for i == w - 1 when w is even | ||||||
|     if (!(w & 1)) { |     if (!(w & 1)) { | ||||||
|       out1[w - 1] = Filter2(cur_uv[uv_w - 1], prev_uv[uv_w - 1], |       out1[w - 1] = Filter2(cur_uv[uv_w - 1], prev_uv[uv_w - 1], | ||||||
|                             best_y[w - 1 + 0]); |                             best_y[w - 1 + 0], bit_depth); | ||||||
|       out2[w - 1] = Filter2(cur_uv[uv_w - 1], next_uv[uv_w - 1], |       out2[w - 1] = Filter2(cur_uv[uv_w - 1], next_uv[uv_w - 1], | ||||||
|                             best_y[w - 1 + w]); |                             best_y[w - 1 + w], bit_depth); | ||||||
|     } |     } | ||||||
|     out1 += w; |     out1 += w; | ||||||
|     out2 += w; |     out2 += w; | ||||||
| @@ -241,17 +307,19 @@ static void InterpolateTwoRows(const fixed_y_t* const best_y, | |||||||
|   } |   } | ||||||
| } | } | ||||||
|  |  | ||||||
| static WEBP_INLINE uint8_t RGBToYUVComponent(int r, int g, int b, | static WEBP_INLINE int RGBToYUVComponent(int r, int g, int b, | ||||||
|                                              const int coeffs[4]) { |                                          const int coeffs[4], int sfix) { | ||||||
|  |   const int srounder = 1 << (YUV_FIX + sfix - 1); | ||||||
|   const int luma = coeffs[0] * r + coeffs[1] * g + coeffs[2] * b + |   const int luma = coeffs[0] * r + coeffs[1] * g + coeffs[2] * b + | ||||||
|                    (coeffs[3] << SFIX) + kYuvRounder; |                    coeffs[3] + srounder; | ||||||
|   return clip_8b((luma >> (YUV_FIX + SFIX))); |   return (luma >> (YUV_FIX + sfix)); | ||||||
| } | } | ||||||
|  |  | ||||||
| static int ConvertWRGBToYUV(const fixed_y_t* best_y, const fixed_t* best_uv, | static int ConvertWRGBToYUV(const fixed_y_t* best_y, const fixed_t* best_uv, | ||||||
|                             uint8_t* dst_y, int dst_stride_y, uint8_t* dst_u, |                             uint8_t* y_ptr, int y_stride, uint8_t* u_ptr, | ||||||
|                             int dst_stride_u, uint8_t* dst_v, int dst_stride_v, |                             int u_stride, uint8_t* v_ptr, int v_stride, | ||||||
|                             int width, int height, |                             int rgb_bit_depth, | ||||||
|  |                             int yuv_bit_depth, int width, int height, | ||||||
|                             const SharpYuvConversionMatrix* yuv_matrix) { |                             const SharpYuvConversionMatrix* yuv_matrix) { | ||||||
|   int i, j; |   int i, j; | ||||||
|   const fixed_t* const best_uv_base = best_uv; |   const fixed_t* const best_uv_base = best_uv; | ||||||
| @@ -259,6 +327,9 @@ static int ConvertWRGBToYUV(const fixed_y_t* best_y, const fixed_t* best_uv, | |||||||
|   const int h = (height + 1) & ~1; |   const int h = (height + 1) & ~1; | ||||||
|   const int uv_w = w >> 1; |   const int uv_w = w >> 1; | ||||||
|   const int uv_h = h >> 1; |   const int uv_h = h >> 1; | ||||||
|  |   const int sfix = GetPrecisionShift(rgb_bit_depth); | ||||||
|  |   const int yuv_max = (1 << yuv_bit_depth) - 1; | ||||||
|  |  | ||||||
|   for (best_uv = best_uv_base, j = 0; j < height; ++j) { |   for (best_uv = best_uv_base, j = 0; j < height; ++j) { | ||||||
|     for (i = 0; i < width; ++i) { |     for (i = 0; i < width; ++i) { | ||||||
|       const int off = (i >> 1); |       const int off = (i >> 1); | ||||||
| @@ -266,24 +337,38 @@ static int ConvertWRGBToYUV(const fixed_y_t* best_y, const fixed_t* best_uv, | |||||||
|       const int r = best_uv[off + 0 * uv_w] + W; |       const int r = best_uv[off + 0 * uv_w] + W; | ||||||
|       const int g = best_uv[off + 1 * uv_w] + W; |       const int g = best_uv[off + 1 * uv_w] + W; | ||||||
|       const int b = best_uv[off + 2 * uv_w] + W; |       const int b = best_uv[off + 2 * uv_w] + W; | ||||||
|       dst_y[i] = RGBToYUVComponent(r, g, b, yuv_matrix->rgb_to_y); |       const int y = RGBToYUVComponent(r, g, b, yuv_matrix->rgb_to_y, sfix); | ||||||
|  |       if (yuv_bit_depth <= 8) { | ||||||
|  |         y_ptr[i] = clip_8b(y); | ||||||
|  |       } else { | ||||||
|  |         ((uint16_t*)y_ptr)[i] = clip(y, yuv_max); | ||||||
|  |       } | ||||||
|     } |     } | ||||||
|     best_y += w; |     best_y += w; | ||||||
|     best_uv += (j & 1) * 3 * uv_w; |     best_uv += (j & 1) * 3 * uv_w; | ||||||
|     dst_y += dst_stride_y; |     y_ptr += y_stride; | ||||||
|   } |   } | ||||||
|   for (best_uv = best_uv_base, j = 0; j < uv_h; ++j) { |   for (best_uv = best_uv_base, j = 0; j < uv_h; ++j) { | ||||||
|     for (i = 0; i < uv_w; ++i) { |     for (i = 0; i < uv_w; ++i) { | ||||||
|       const int off = i; |       const int off = i; | ||||||
|  |       // Note r, g and b values here are off by W, but a constant offset on all | ||||||
|  |       // 3 components doesn't change the value of u and v with a YCbCr matrix. | ||||||
|       const int r = best_uv[off + 0 * uv_w]; |       const int r = best_uv[off + 0 * uv_w]; | ||||||
|       const int g = best_uv[off + 1 * uv_w]; |       const int g = best_uv[off + 1 * uv_w]; | ||||||
|       const int b = best_uv[off + 2 * uv_w]; |       const int b = best_uv[off + 2 * uv_w]; | ||||||
|       dst_u[i] = RGBToYUVComponent(r, g, b, yuv_matrix->rgb_to_u); |       const int u = RGBToYUVComponent(r, g, b, yuv_matrix->rgb_to_u, sfix); | ||||||
|       dst_v[i] = RGBToYUVComponent(r, g, b, yuv_matrix->rgb_to_v); |       const int v = RGBToYUVComponent(r, g, b, yuv_matrix->rgb_to_v, sfix); | ||||||
|  |       if (yuv_bit_depth <= 8) { | ||||||
|  |         u_ptr[i] = clip_8b(u); | ||||||
|  |         v_ptr[i] = clip_8b(v); | ||||||
|  |       } else { | ||||||
|  |         ((uint16_t*)u_ptr)[i] = clip(u, yuv_max); | ||||||
|  |         ((uint16_t*)v_ptr)[i] = clip(v, yuv_max); | ||||||
|  |       } | ||||||
|     } |     } | ||||||
|     best_uv += 3 * uv_w; |     best_uv += 3 * uv_w; | ||||||
|     dst_u += dst_stride_u; |     u_ptr += u_stride; | ||||||
|     dst_v += dst_stride_v; |     v_ptr += v_stride; | ||||||
|   } |   } | ||||||
|   return 1; |   return 1; | ||||||
| } | } | ||||||
| @@ -300,10 +385,11 @@ static void* SafeMalloc(uint64_t nmemb, size_t size) { | |||||||
| #define SAFE_ALLOC(W, H, T) ((T*)SafeMalloc((W) * (H), sizeof(T))) | #define SAFE_ALLOC(W, H, T) ((T*)SafeMalloc((W) * (H), sizeof(T))) | ||||||
|  |  | ||||||
| static int DoSharpArgbToYuv(const uint8_t* r_ptr, const uint8_t* g_ptr, | static int DoSharpArgbToYuv(const uint8_t* r_ptr, const uint8_t* g_ptr, | ||||||
|                             const uint8_t* b_ptr, int step, int rgb_stride, |                             const uint8_t* b_ptr, int rgb_step, int rgb_stride, | ||||||
|                             uint8_t* dst_y, int dst_stride_y, uint8_t* dst_u, |                             int rgb_bit_depth, uint8_t* y_ptr, int y_stride, | ||||||
|                             int dst_stride_u, uint8_t* dst_v, int dst_stride_v, |                             uint8_t* u_ptr, int u_stride, uint8_t* v_ptr, | ||||||
|                             int width, int height, |                             int v_stride, int yuv_bit_depth, int width, | ||||||
|  |                             int height, | ||||||
|                             const SharpYuvConversionMatrix* yuv_matrix) { |                             const SharpYuvConversionMatrix* yuv_matrix) { | ||||||
|   // we expand the right/bottom border if needed |   // we expand the right/bottom border if needed | ||||||
|   const int w = (width + 1) & ~1; |   const int w = (width + 1) & ~1; | ||||||
| @@ -344,19 +430,20 @@ static int DoSharpArgbToYuv(const uint8_t* r_ptr, const uint8_t* g_ptr, | |||||||
|     fixed_y_t* const src2 = tmp_buffer + 3 * w; |     fixed_y_t* const src2 = tmp_buffer + 3 * w; | ||||||
|  |  | ||||||
|     // prepare two rows of input |     // prepare two rows of input | ||||||
|     ImportOneRow(r_ptr, g_ptr, b_ptr, step, width, src1); |     ImportOneRow(r_ptr, g_ptr, b_ptr, rgb_step, rgb_bit_depth, width, | ||||||
|  |                  src1); | ||||||
|     if (!is_last_row) { |     if (!is_last_row) { | ||||||
|       ImportOneRow(r_ptr + rgb_stride, g_ptr + rgb_stride, b_ptr + rgb_stride, |       ImportOneRow(r_ptr + rgb_stride, g_ptr + rgb_stride, b_ptr + rgb_stride, | ||||||
|                    step, width, src2); |                    rgb_step, rgb_bit_depth, width, src2); | ||||||
|     } else { |     } else { | ||||||
|       memcpy(src2, src1, 3 * w * sizeof(*src2)); |       memcpy(src2, src1, 3 * w * sizeof(*src2)); | ||||||
|     } |     } | ||||||
|     StoreGray(src1, best_y + 0, w); |     StoreGray(src1, best_y + 0, w); | ||||||
|     StoreGray(src2, best_y + w, w); |     StoreGray(src2, best_y + w, w); | ||||||
|  |  | ||||||
|     UpdateW(src1, target_y, w); |     UpdateW(src1, target_y, w, rgb_bit_depth); | ||||||
|     UpdateW(src2, target_y + w, w); |     UpdateW(src2, target_y + w, w, rgb_bit_depth); | ||||||
|     UpdateChroma(src1, src2, target_uv, uv_w); |     UpdateChroma(src1, src2, target_uv, uv_w, rgb_bit_depth); | ||||||
|     memcpy(best_uv, target_uv, 3 * uv_w * sizeof(*best_uv)); |     memcpy(best_uv, target_uv, 3 * uv_w * sizeof(*best_uv)); | ||||||
|     best_y += 2 * w; |     best_y += 2 * w; | ||||||
|     best_uv += 3 * uv_w; |     best_uv += 3 * uv_w; | ||||||
| @@ -382,17 +469,20 @@ static int DoSharpArgbToYuv(const uint8_t* r_ptr, const uint8_t* g_ptr, | |||||||
|       fixed_y_t* const src2 = tmp_buffer + 3 * w; |       fixed_y_t* const src2 = tmp_buffer + 3 * w; | ||||||
|       { |       { | ||||||
|         const fixed_t* const next_uv = cur_uv + ((j < h - 2) ? 3 * uv_w : 0); |         const fixed_t* const next_uv = cur_uv + ((j < h - 2) ? 3 * uv_w : 0); | ||||||
|         InterpolateTwoRows(best_y, prev_uv, cur_uv, next_uv, w, src1, src2); |         InterpolateTwoRows(best_y, prev_uv, cur_uv, next_uv, w, | ||||||
|  |                            src1, src2, rgb_bit_depth); | ||||||
|         prev_uv = cur_uv; |         prev_uv = cur_uv; | ||||||
|         cur_uv = next_uv; |         cur_uv = next_uv; | ||||||
|       } |       } | ||||||
|  |  | ||||||
|       UpdateW(src1, best_rgb_y + 0 * w, w); |       UpdateW(src1, best_rgb_y + 0 * w, w, rgb_bit_depth); | ||||||
|       UpdateW(src2, best_rgb_y + 1 * w, w); |       UpdateW(src2, best_rgb_y + 1 * w, w, rgb_bit_depth); | ||||||
|       UpdateChroma(src1, src2, best_rgb_uv, uv_w); |       UpdateChroma(src1, src2, best_rgb_uv, uv_w, rgb_bit_depth); | ||||||
|  |  | ||||||
|       // update two rows of Y and one row of RGB |       // update two rows of Y and one row of RGB | ||||||
|       diff_y_sum += SharpYuvUpdateY(target_y, best_rgb_y, best_y, 2 * w); |       diff_y_sum += | ||||||
|  |           SharpYuvUpdateY(target_y, best_rgb_y, best_y, 2 * w, | ||||||
|  |                           rgb_bit_depth + GetPrecisionShift(rgb_bit_depth)); | ||||||
|       SharpYuvUpdateRGB(target_uv, best_rgb_uv, best_uv, 3 * uv_w); |       SharpYuvUpdateRGB(target_uv, best_rgb_uv, best_uv, 3 * uv_w); | ||||||
|  |  | ||||||
|       best_y += 2 * w; |       best_y += 2 * w; | ||||||
| @@ -407,10 +497,11 @@ static int DoSharpArgbToYuv(const uint8_t* r_ptr, const uint8_t* g_ptr, | |||||||
|     } |     } | ||||||
|     prev_diff_y_sum = diff_y_sum; |     prev_diff_y_sum = diff_y_sum; | ||||||
|   } |   } | ||||||
|  |  | ||||||
|   // final reconstruction |   // final reconstruction | ||||||
|   ok = ConvertWRGBToYUV(best_y_base, best_uv_base, dst_y, dst_stride_y, dst_u, |   ok = ConvertWRGBToYUV(best_y_base, best_uv_base, y_ptr, y_stride, u_ptr, | ||||||
|                         dst_stride_u, dst_v, dst_stride_v, width, height, |                         u_stride, v_ptr, v_stride, rgb_bit_depth, yuv_bit_depth, | ||||||
|                         yuv_matrix); |                         width, height, yuv_matrix); | ||||||
|  |  | ||||||
|  End: |  End: | ||||||
|   free(best_y_base); |   free(best_y_base); | ||||||
| @@ -444,20 +535,66 @@ void SharpYuvInit(VP8CPUInfo cpu_info_func) { | |||||||
|   sharpyuv_last_cpuinfo_used = cpu_info_func; |   sharpyuv_last_cpuinfo_used = cpu_info_func; | ||||||
| } | } | ||||||
|  |  | ||||||
| int SharpYuvConvert(const uint8_t* r_ptr, const uint8_t* g_ptr, | int SharpYuvConvert(const void* r_ptr, const void* g_ptr, | ||||||
|                     const uint8_t* b_ptr, int step, int rgb_stride, |                     const void* b_ptr, int rgb_step, int rgb_stride, | ||||||
|                     uint8_t* dst_y, int dst_stride_y, uint8_t* dst_u, |                     int rgb_bit_depth, void* y_ptr, int y_stride, | ||||||
|                     int dst_stride_u, uint8_t* dst_v, int dst_stride_v, |                     void* u_ptr, int u_stride, void* v_ptr, | ||||||
|                     int width, int height, |                     int v_stride, int yuv_bit_depth, int width, | ||||||
|                     const SharpYuvConversionMatrix* yuv_matrix) { |                     int height, const SharpYuvConversionMatrix* yuv_matrix) { | ||||||
|  |   SharpYuvConversionMatrix scaled_matrix; | ||||||
|  |   const int rgb_max = (1 << rgb_bit_depth) - 1; | ||||||
|  |   const int rgb_round = 1 << (rgb_bit_depth - 1); | ||||||
|  |   const int yuv_max = (1 << yuv_bit_depth) - 1; | ||||||
|  |   const int sfix = GetPrecisionShift(rgb_bit_depth); | ||||||
|  |  | ||||||
|   if (width < kMinDimensionIterativeConversion || |   if (width < kMinDimensionIterativeConversion || | ||||||
|       height < kMinDimensionIterativeConversion) { |       height < kMinDimensionIterativeConversion || | ||||||
|  |       r_ptr == NULL || g_ptr == NULL || b_ptr == NULL || y_ptr == NULL || | ||||||
|  |       u_ptr == NULL || v_ptr == NULL) { | ||||||
|  |     return 0; | ||||||
|  |   } | ||||||
|  |   if (rgb_bit_depth != 8 && rgb_bit_depth != 10 && rgb_bit_depth != 12 && | ||||||
|  |       rgb_bit_depth != 16) { | ||||||
|  |     return 0; | ||||||
|  |   } | ||||||
|  |   if (yuv_bit_depth != 8 && yuv_bit_depth != 10 && yuv_bit_depth != 12) { | ||||||
|  |     return 0; | ||||||
|  |   } | ||||||
|  |   if (rgb_bit_depth > 8 && (rgb_step % 2 != 0 || rgb_stride %2 != 0)) { | ||||||
|  |     // Step/stride should be even for uint16_t buffers. | ||||||
|  |     return 0; | ||||||
|  |   } | ||||||
|  |   if (yuv_bit_depth > 8 && | ||||||
|  |       (y_stride % 2 != 0 || u_stride % 2 != 0 || v_stride % 2 != 0)) { | ||||||
|  |     // Stride should be even for uint16_t buffers. | ||||||
|     return 0; |     return 0; | ||||||
|   } |   } | ||||||
|   SharpYuvInit(NULL); |   SharpYuvInit(NULL); | ||||||
|   return DoSharpArgbToYuv(r_ptr, g_ptr, b_ptr, step, rgb_stride, dst_y, |  | ||||||
|                           dst_stride_y, dst_u, dst_stride_u, dst_v, |   // Add scaling factor to go from rgb_bit_depth to yuv_bit_depth, to the | ||||||
|                           dst_stride_v, width, height, yuv_matrix); |   // rgb->yuv conversion matrix. | ||||||
|  |   if (rgb_bit_depth == yuv_bit_depth) { | ||||||
|  |     memcpy(&scaled_matrix, yuv_matrix, sizeof(scaled_matrix)); | ||||||
|  |   } else { | ||||||
|  |     int i; | ||||||
|  |     for (i = 0; i < 3; ++i) { | ||||||
|  |       scaled_matrix.rgb_to_y[i] = | ||||||
|  |           (yuv_matrix->rgb_to_y[i] * yuv_max + rgb_round) / rgb_max; | ||||||
|  |       scaled_matrix.rgb_to_u[i] = | ||||||
|  |           (yuv_matrix->rgb_to_u[i] * yuv_max + rgb_round) / rgb_max; | ||||||
|  |       scaled_matrix.rgb_to_v[i] = | ||||||
|  |           (yuv_matrix->rgb_to_v[i] * yuv_max + rgb_round) / rgb_max; | ||||||
|  |     } | ||||||
|  |   } | ||||||
|  |   // Also incorporate precision change scaling. | ||||||
|  |   scaled_matrix.rgb_to_y[3] = Shift(yuv_matrix->rgb_to_y[3], sfix); | ||||||
|  |   scaled_matrix.rgb_to_u[3] = Shift(yuv_matrix->rgb_to_u[3], sfix); | ||||||
|  |   scaled_matrix.rgb_to_v[3] = Shift(yuv_matrix->rgb_to_v[3], sfix); | ||||||
|  |  | ||||||
|  |   return DoSharpArgbToYuv(r_ptr, g_ptr, b_ptr, rgb_step, rgb_stride, | ||||||
|  |                           rgb_bit_depth, y_ptr, y_stride, u_ptr, u_stride, | ||||||
|  |                           v_ptr, v_stride, yuv_bit_depth, width, height, | ||||||
|  |                           &scaled_matrix); | ||||||
| } | } | ||||||
|  |  | ||||||
| //------------------------------------------------------------------------------ | //------------------------------------------------------------------------------ | ||||||
|   | |||||||
| @@ -35,15 +35,33 @@ typedef struct { | |||||||
| // Assumes that the image will be upsampled using a bilinear filter. If nearest | // Assumes that the image will be upsampled using a bilinear filter. If nearest | ||||||
| // neighbor is used instead, the upsampled image might look worse than with | // neighbor is used instead, the upsampled image might look worse than with | ||||||
| // standard downsampling. | // standard downsampling. | ||||||
| // TODO(maryla): add 10 bits support. Add YUV444 to YUV420 conversion. | // r_ptr, g_ptr, b_ptr: pointers to the source r, g and b channels. Should point | ||||||
| // Maybe also add 422 support (it's rarely used in practice, especially for | //     to uint8_t buffers if rgb_bit_depth is 8, or uint16_t buffers otherwise. | ||||||
| // images). | // rgb_step: distance in bytes between two horizontally adjacent pixels on the | ||||||
| int SharpYuvConvert(const uint8_t* r_ptr, const uint8_t* g_ptr, | //     r, g and b channels. If rgb_bit_depth is > 8, it should be a | ||||||
|                     const uint8_t* b_ptr, int step, int rgb_stride, | //     multiple of 2. | ||||||
|                     uint8_t* dst_y, int dst_stride_y, uint8_t* dst_u, | // rgb_stride: distance in bytes between two vertically adjacent pixels on the | ||||||
|                     int dst_stride_u, uint8_t* dst_v, int dst_stride_v, | //     r, g, and b channels. If rgb_bit_depth is > 8, it should be a | ||||||
|                     int width, int height, | //     multiple of 2. | ||||||
|                     const SharpYuvConversionMatrix* yuv_matrix); | // rgb_bit_depth: number of bits for each r/g/b value. One of: 8, 10, 12, 16. | ||||||
|  | //     Note: for 10+ bit, input is truncated to 10 bits. | ||||||
|  | //     TODO(b/194336375): increase precision. | ||||||
|  | // yuv_bit_depth: number of bits for each y/u/v value. One of: 8, 10, 12. | ||||||
|  | // y_ptr, u_ptr, v_ptr: pointers to the destination y, u and v channels.  Should | ||||||
|  | //     point to uint8_t buffers if yuv_bit_depth is 8, or uint16_t buffers | ||||||
|  | //     otherwise. | ||||||
|  | // y_stride, u_stride, v_stride: distance in bytes between two vertically | ||||||
|  | //     adjacent pixels on the y, u and v channels. If yuv_bit_depth > 8, they | ||||||
|  | //     should be multiples of 2. | ||||||
|  | // width, height: width and height of the image in pixels | ||||||
|  | int SharpYuvConvert(const void* r_ptr, const void* g_ptr, const void* b_ptr, | ||||||
|  |                     int rgb_step, int rgb_stride, int rgb_bit_depth, | ||||||
|  |                     void* y_ptr, int y_stride, void* u_ptr, int u_stride, | ||||||
|  |                     void* v_ptr, int v_stride, int yuv_bit_depth, int width, | ||||||
|  |                     int height, const SharpYuvConversionMatrix* yuv_matrix); | ||||||
|  |  | ||||||
|  | // TODO(b/194336375): Add YUV444 to YUV420 conversion. Maybe also add 422 | ||||||
|  | // support (it's rarely used in practice, especially for images). | ||||||
|  |  | ||||||
| #ifdef __cplusplus | #ifdef __cplusplus | ||||||
| }  // extern "C" | }  // extern "C" | ||||||
|   | |||||||
| @@ -15,7 +15,7 @@ | |||||||
| #include <math.h> | #include <math.h> | ||||||
| #include <string.h> | #include <string.h> | ||||||
|  |  | ||||||
| static int ToFixed16(float f) { return (int)round(f * (1 << 16)); } | static int ToFixed16(float f) { return (int)floor(f * (1 << 16) + 0.5f); } | ||||||
|  |  | ||||||
| void SharpYuvComputeConversionMatrix(const SharpYuvColorSpace* yuv_color_space, | void SharpYuvComputeConversionMatrix(const SharpYuvColorSpace* yuv_color_space, | ||||||
|                                      SharpYuvConversionMatrix* matrix) { |                                      SharpYuvConversionMatrix* matrix) { | ||||||
| @@ -25,28 +25,27 @@ void SharpYuvComputeConversionMatrix(const SharpYuvColorSpace* yuv_color_space, | |||||||
|   const float cr = 0.5f / (1.0f - kb); |   const float cr = 0.5f / (1.0f - kb); | ||||||
|   const float cb = 0.5f / (1.0f - kr); |   const float cb = 0.5f / (1.0f - kr); | ||||||
|  |  | ||||||
|   const int shift = yuv_color_space->bits - 8; |   const int shift = yuv_color_space->bit_depth - 8; | ||||||
|  |  | ||||||
|   const float denom = (float)((1 << yuv_color_space->bits) - 1); |   const float denom = (float)((1 << yuv_color_space->bit_depth) - 1); | ||||||
|   float scale_y = 1.0f; |   float scale_y = 1.0f; | ||||||
|   float addY = 0.0f; |   float add_y = 0.0f; | ||||||
|   float scale_u = cr; |   float scale_u = cr; | ||||||
|   float scale_v = cb; |   float scale_v = cb; | ||||||
|   float add_uv = (float)(128 << shift); |   float add_uv = (float)(128 << shift); | ||||||
|  |   assert(yuv_color_space->bit_depth >= 8); | ||||||
|   assert(yuv_color_space->bits >= 8); |  | ||||||
|  |  | ||||||
|   if (yuv_color_space->range == kSharpYuvRangeLimited) { |   if (yuv_color_space->range == kSharpYuvRangeLimited) { | ||||||
|     scale_y *= (219 << shift) / denom; |     scale_y *= (219 << shift) / denom; | ||||||
|     scale_u *= (224 << shift) / denom; |     scale_u *= (224 << shift) / denom; | ||||||
|     scale_v *= (224 << shift) / denom; |     scale_v *= (224 << shift) / denom; | ||||||
|     addY = (float)(16 << shift); |     add_y = (float)(16 << shift); | ||||||
|   } |   } | ||||||
|  |  | ||||||
|   matrix->rgb_to_y[0] = ToFixed16(kr * scale_y); |   matrix->rgb_to_y[0] = ToFixed16(kr * scale_y); | ||||||
|   matrix->rgb_to_y[1] = ToFixed16(kg * scale_y); |   matrix->rgb_to_y[1] = ToFixed16(kg * scale_y); | ||||||
|   matrix->rgb_to_y[2] = ToFixed16(kb * scale_y); |   matrix->rgb_to_y[2] = ToFixed16(kb * scale_y); | ||||||
|   matrix->rgb_to_y[3] = ToFixed16(addY); |   matrix->rgb_to_y[3] = ToFixed16(add_y); | ||||||
|  |  | ||||||
|   matrix->rgb_to_u[0] = ToFixed16(-kr * scale_u); |   matrix->rgb_to_u[0] = ToFixed16(-kr * scale_u); | ||||||
|   matrix->rgb_to_u[1] = ToFixed16(-kg * scale_u); |   matrix->rgb_to_u[1] = ToFixed16(-kg * scale_u); | ||||||
|   | |||||||
| @@ -30,7 +30,7 @@ typedef struct { | |||||||
|   // Y = Kr * r + Kg * g + Kb * b where Kg = 1 - Kr - Kb. |   // Y = Kr * r + Kg * g + Kb * b where Kg = 1 - Kr - Kb. | ||||||
|   float kr; |   float kr; | ||||||
|   float kb; |   float kb; | ||||||
|   int bits;  // Only 8 bit is supported by SharpYuvConvert. |   int bit_depth;  // 8, 10 or 12 | ||||||
|   SharpYuvRange range; |   SharpYuvRange range; | ||||||
| } SharpYuvColorSpace; | } SharpYuvColorSpace; | ||||||
|  |  | ||||||
|   | |||||||
| @@ -21,19 +21,19 @@ | |||||||
| //----------------------------------------------------------------------------- | //----------------------------------------------------------------------------- | ||||||
|  |  | ||||||
| #if !WEBP_NEON_OMIT_C_CODE | #if !WEBP_NEON_OMIT_C_CODE | ||||||
| #define MAX_Y ((1 << 10) - 1)    // 10b precision over 16b-arithmetic | static uint16_t clip(int v, int max) { | ||||||
| static uint16_t clip_y(int v) { |   return (v < 0) ? 0 : (v > max) ? max : (uint16_t)v; | ||||||
|   return (v < 0) ? 0 : (v > MAX_Y) ? MAX_Y : (uint16_t)v; |  | ||||||
| } | } | ||||||
|  |  | ||||||
| static uint64_t SharpYuvUpdateY_C(const uint16_t* ref, const uint16_t* src, | static uint64_t SharpYuvUpdateY_C(const uint16_t* ref, const uint16_t* src, | ||||||
|                                   uint16_t* dst, int len) { |                                   uint16_t* dst, int len, int bit_depth) { | ||||||
|   uint64_t diff = 0; |   uint64_t diff = 0; | ||||||
|   int i; |   int i; | ||||||
|  |   const int max_y = (1 << bit_depth) - 1; | ||||||
|   for (i = 0; i < len; ++i) { |   for (i = 0; i < len; ++i) { | ||||||
|     const int diff_y = ref[i] - src[i]; |     const int diff_y = ref[i] - src[i]; | ||||||
|     const int new_y = (int)dst[i] + diff_y; |     const int new_y = (int)dst[i] + diff_y; | ||||||
|     dst[i] = clip_y(new_y); |     dst[i] = clip(new_y, max_y); | ||||||
|     diff += (uint64_t)abs(diff_y); |     diff += (uint64_t)abs(diff_y); | ||||||
|   } |   } | ||||||
|   return diff; |   return diff; | ||||||
| @@ -49,27 +49,28 @@ static void SharpYuvUpdateRGB_C(const int16_t* ref, const int16_t* src, | |||||||
| } | } | ||||||
|  |  | ||||||
| static void SharpYuvFilterRow_C(const int16_t* A, const int16_t* B, int len, | static void SharpYuvFilterRow_C(const int16_t* A, const int16_t* B, int len, | ||||||
|                                 const uint16_t* best_y, uint16_t* out) { |                                 const uint16_t* best_y, uint16_t* out, | ||||||
|  |                                 int bit_depth) { | ||||||
|   int i; |   int i; | ||||||
|  |   const int max_y = (1 << bit_depth) - 1; | ||||||
|   for (i = 0; i < len; ++i, ++A, ++B) { |   for (i = 0; i < len; ++i, ++A, ++B) { | ||||||
|     const int v0 = (A[0] * 9 + A[1] * 3 + B[0] * 3 + B[1] + 8) >> 4; |     const int v0 = (A[0] * 9 + A[1] * 3 + B[0] * 3 + B[1] + 8) >> 4; | ||||||
|     const int v1 = (A[1] * 9 + A[0] * 3 + B[1] * 3 + B[0] + 8) >> 4; |     const int v1 = (A[1] * 9 + A[0] * 3 + B[1] * 3 + B[0] + 8) >> 4; | ||||||
|     out[2 * i + 0] = clip_y(best_y[2 * i + 0] + v0); |     out[2 * i + 0] = clip(best_y[2 * i + 0] + v0, max_y); | ||||||
|     out[2 * i + 1] = clip_y(best_y[2 * i + 1] + v1); |     out[2 * i + 1] = clip(best_y[2 * i + 1] + v1, max_y); | ||||||
|   } |   } | ||||||
| } | } | ||||||
| #endif  // !WEBP_NEON_OMIT_C_CODE | #endif  // !WEBP_NEON_OMIT_C_CODE | ||||||
|  |  | ||||||
| #undef MAX_Y |  | ||||||
|  |  | ||||||
| //----------------------------------------------------------------------------- | //----------------------------------------------------------------------------- | ||||||
|  |  | ||||||
| uint64_t (*SharpYuvUpdateY)(const uint16_t* src, const uint16_t* ref, | uint64_t (*SharpYuvUpdateY)(const uint16_t* src, const uint16_t* ref, | ||||||
|                             uint16_t* dst, int len); |                             uint16_t* dst, int len, int bit_depth); | ||||||
| void (*SharpYuvUpdateRGB)(const int16_t* src, const int16_t* ref, int16_t* dst, | void (*SharpYuvUpdateRGB)(const int16_t* src, const int16_t* ref, int16_t* dst, | ||||||
|                           int len); |                           int len); | ||||||
| void (*SharpYuvFilterRow)(const int16_t* A, const int16_t* B, int len, | void (*SharpYuvFilterRow)(const int16_t* A, const int16_t* B, int len, | ||||||
|                           const uint16_t* best_y, uint16_t* out); |                           const uint16_t* best_y, uint16_t* out, | ||||||
|  |                           int bit_depth); | ||||||
|  |  | ||||||
| extern void InitSharpYuvSSE2(void); | extern void InitSharpYuvSSE2(void); | ||||||
| extern void InitSharpYuvNEON(void); | extern void InitSharpYuvNEON(void); | ||||||
|   | |||||||
| @@ -17,11 +17,12 @@ | |||||||
| #include "src/dsp/cpu.h" | #include "src/dsp/cpu.h" | ||||||
|  |  | ||||||
| extern uint64_t (*SharpYuvUpdateY)(const uint16_t* src, const uint16_t* ref, | extern uint64_t (*SharpYuvUpdateY)(const uint16_t* src, const uint16_t* ref, | ||||||
|                                    uint16_t* dst, int len); |                                    uint16_t* dst, int len, int bit_depth); | ||||||
| extern void (*SharpYuvUpdateRGB)(const int16_t* src, const int16_t* ref, | extern void (*SharpYuvUpdateRGB)(const int16_t* src, const int16_t* ref, | ||||||
|                                  int16_t* dst, int len); |                                  int16_t* dst, int len); | ||||||
| extern void (*SharpYuvFilterRow)(const int16_t* A, const int16_t* B, int len, | extern void (*SharpYuvFilterRow)(const int16_t* A, const int16_t* B, int len, | ||||||
|                                  const uint16_t* best_y, uint16_t* out); |                                  const uint16_t* best_y, uint16_t* out, | ||||||
|  |                                  int bit_depth); | ||||||
|  |  | ||||||
| void SharpYuvInitDsp(VP8CPUInfo cpu_info_func); | void SharpYuvInitDsp(VP8CPUInfo cpu_info_func); | ||||||
|  |  | ||||||
|   | |||||||
| @@ -23,16 +23,16 @@ extern void InitSharpYuvNEON(void); | |||||||
|  |  | ||||||
| #if defined(WEBP_USE_NEON) | #if defined(WEBP_USE_NEON) | ||||||
|  |  | ||||||
| #define MAX_Y ((1 << 10) - 1)    // 10b precision over 16b-arithmetic | static uint16_t clip_NEON(int v, int max) { | ||||||
| static uint16_t clip_y_NEON(int v) { |   return (v < 0) ? 0 : (v > max) ? max : (uint16_t)v; | ||||||
|   return (v < 0) ? 0 : (v > MAX_Y) ? MAX_Y : (uint16_t)v; |  | ||||||
| } | } | ||||||
|  |  | ||||||
| static uint64_t SharpYuvUpdateY_NEON(const uint16_t* ref, const uint16_t* src, | static uint64_t SharpYuvUpdateY_NEON(const uint16_t* ref, const uint16_t* src, | ||||||
|                                      uint16_t* dst, int len) { |                                      uint16_t* dst, int len, int bit_depth) { | ||||||
|  |   const int max_y = (1 << bit_depth) - 1; | ||||||
|   int i; |   int i; | ||||||
|   const int16x8_t zero = vdupq_n_s16(0); |   const int16x8_t zero = vdupq_n_s16(0); | ||||||
|   const int16x8_t max = vdupq_n_s16(MAX_Y); |   const int16x8_t max = vdupq_n_s16(max_y); | ||||||
|   uint64x2_t sum = vdupq_n_u64(0); |   uint64x2_t sum = vdupq_n_u64(0); | ||||||
|   uint64_t diff; |   uint64_t diff; | ||||||
|  |  | ||||||
| @@ -52,7 +52,7 @@ static uint64_t SharpYuvUpdateY_NEON(const uint16_t* ref, const uint16_t* src, | |||||||
|   for (; i < len; ++i) { |   for (; i < len; ++i) { | ||||||
|     const int diff_y = ref[i] - src[i]; |     const int diff_y = ref[i] - src[i]; | ||||||
|     const int new_y = (int)(dst[i]) + diff_y; |     const int new_y = (int)(dst[i]) + diff_y; | ||||||
|     dst[i] = clip_y_NEON(new_y); |     dst[i] = clip_NEON(new_y, max_y); | ||||||
|     diff += (uint64_t)(abs(diff_y)); |     diff += (uint64_t)(abs(diff_y)); | ||||||
|   } |   } | ||||||
|   return diff; |   return diff; | ||||||
| @@ -76,9 +76,11 @@ static void SharpYuvUpdateRGB_NEON(const int16_t* ref, const int16_t* src, | |||||||
| } | } | ||||||
|  |  | ||||||
| static void SharpYuvFilterRow_NEON(const int16_t* A, const int16_t* B, int len, | static void SharpYuvFilterRow_NEON(const int16_t* A, const int16_t* B, int len, | ||||||
|                                    const uint16_t* best_y, uint16_t* out) { |                                    const uint16_t* best_y, uint16_t* out, | ||||||
|  |                                    int bit_depth) { | ||||||
|  |   const int max_y = (1 << bit_depth) - 1; | ||||||
|   int i; |   int i; | ||||||
|   const int16x8_t max = vdupq_n_s16(MAX_Y); |   const int16x8_t max = vdupq_n_s16(max_y); | ||||||
|   const int16x8_t zero = vdupq_n_s16(0); |   const int16x8_t zero = vdupq_n_s16(0); | ||||||
|   for (i = 0; i + 8 <= len; i += 8) { |   for (i = 0; i + 8 <= len; i += 8) { | ||||||
|     const int16x8_t a0 = vld1q_s16(A + i + 0); |     const int16x8_t a0 = vld1q_s16(A + i + 0); | ||||||
| @@ -112,11 +114,10 @@ static void SharpYuvFilterRow_NEON(const int16_t* A, const int16_t* B, int len, | |||||||
|     const int a0a1b0b1 = a0b1 + a1b0 + 8; |     const int a0a1b0b1 = a0b1 + a1b0 + 8; | ||||||
|     const int v0 = (8 * A[i + 0] + 2 * a1b0 + a0a1b0b1) >> 4; |     const int v0 = (8 * A[i + 0] + 2 * a1b0 + a0a1b0b1) >> 4; | ||||||
|     const int v1 = (8 * A[i + 1] + 2 * a0b1 + a0a1b0b1) >> 4; |     const int v1 = (8 * A[i + 1] + 2 * a0b1 + a0a1b0b1) >> 4; | ||||||
|     out[2 * i + 0] = clip_y_NEON(best_y[2 * i + 0] + v0); |     out[2 * i + 0] = clip_NEON(best_y[2 * i + 0] + v0, max_y); | ||||||
|     out[2 * i + 1] = clip_y_NEON(best_y[2 * i + 1] + v1); |     out[2 * i + 1] = clip_NEON(best_y[2 * i + 1] + v1, max_y); | ||||||
|   } |   } | ||||||
| } | } | ||||||
| #undef MAX_Y |  | ||||||
|  |  | ||||||
| //------------------------------------------------------------------------------ | //------------------------------------------------------------------------------ | ||||||
|  |  | ||||||
|   | |||||||
| @@ -22,18 +22,18 @@ extern void InitSharpYuvSSE2(void); | |||||||
|  |  | ||||||
| #if defined(WEBP_USE_SSE2) | #if defined(WEBP_USE_SSE2) | ||||||
|  |  | ||||||
| #define MAX_Y ((1 << 10) - 1)    // 10b precision over 16b-arithmetic | static uint16_t clip_SSE2(int v, int max) { | ||||||
| static uint16_t clip_y(int v) { |   return (v < 0) ? 0 : (v > max) ? max : (uint16_t)v; | ||||||
|   return (v < 0) ? 0 : (v > MAX_Y) ? MAX_Y : (uint16_t)v; |  | ||||||
| } | } | ||||||
|  |  | ||||||
| static uint64_t SharpYuvUpdateY_SSE2(const uint16_t* ref, const uint16_t* src, | static uint64_t SharpYuvUpdateY_SSE2(const uint16_t* ref, const uint16_t* src, | ||||||
|                                      uint16_t* dst, int len) { |                                      uint16_t* dst, int len, int bit_depth) { | ||||||
|  |   const int max_y = (1 << bit_depth) - 1; | ||||||
|   uint64_t diff = 0; |   uint64_t diff = 0; | ||||||
|   uint32_t tmp[4]; |   uint32_t tmp[4]; | ||||||
|   int i; |   int i; | ||||||
|   const __m128i zero = _mm_setzero_si128(); |   const __m128i zero = _mm_setzero_si128(); | ||||||
|   const __m128i max = _mm_set1_epi16(MAX_Y); |   const __m128i max = _mm_set1_epi16(max_y); | ||||||
|   const __m128i one = _mm_set1_epi16(1); |   const __m128i one = _mm_set1_epi16(1); | ||||||
|   __m128i sum = zero; |   __m128i sum = zero; | ||||||
|  |  | ||||||
| @@ -55,7 +55,7 @@ static uint64_t SharpYuvUpdateY_SSE2(const uint16_t* ref, const uint16_t* src, | |||||||
|   for (; i < len; ++i) { |   for (; i < len; ++i) { | ||||||
|     const int diff_y = ref[i] - src[i]; |     const int diff_y = ref[i] - src[i]; | ||||||
|     const int new_y = (int)dst[i] + diff_y; |     const int new_y = (int)dst[i] + diff_y; | ||||||
|     dst[i] = clip_y(new_y); |     dst[i] = clip_SSE2(new_y, max_y); | ||||||
|     diff += (uint64_t)abs(diff_y); |     diff += (uint64_t)abs(diff_y); | ||||||
|   } |   } | ||||||
|   return diff; |   return diff; | ||||||
| @@ -79,10 +79,12 @@ static void SharpYuvUpdateRGB_SSE2(const int16_t* ref, const int16_t* src, | |||||||
| } | } | ||||||
|  |  | ||||||
| static void SharpYuvFilterRow_SSE2(const int16_t* A, const int16_t* B, int len, | static void SharpYuvFilterRow_SSE2(const int16_t* A, const int16_t* B, int len, | ||||||
|                                    const uint16_t* best_y, uint16_t* out) { |                                    const uint16_t* best_y, uint16_t* out, | ||||||
|  |                                    int bit_depth) { | ||||||
|  |   const int max_y = (1 << bit_depth) - 1; | ||||||
|   int i; |   int i; | ||||||
|   const __m128i kCst8 = _mm_set1_epi16(8); |   const __m128i kCst8 = _mm_set1_epi16(8); | ||||||
|   const __m128i max = _mm_set1_epi16(MAX_Y); |   const __m128i max = _mm_set1_epi16(max_y); | ||||||
|   const __m128i zero = _mm_setzero_si128(); |   const __m128i zero = _mm_setzero_si128(); | ||||||
|   for (i = 0; i + 8 <= len; i += 8) { |   for (i = 0; i + 8 <= len; i += 8) { | ||||||
|     const __m128i a0 = _mm_loadu_si128((const __m128i*)(A + i + 0)); |     const __m128i a0 = _mm_loadu_si128((const __m128i*)(A + i + 0)); | ||||||
| @@ -121,11 +123,10 @@ static void SharpYuvFilterRow_SSE2(const int16_t* A, const int16_t* B, int len, | |||||||
|     const int a0a1b0b1 = a0b1 + a1b0 + 8; |     const int a0a1b0b1 = a0b1 + a1b0 + 8; | ||||||
|     const int v0 = (8 * A[i + 0] + 2 * a1b0 + a0a1b0b1) >> 4; |     const int v0 = (8 * A[i + 0] + 2 * a1b0 + a0a1b0b1) >> 4; | ||||||
|     const int v1 = (8 * A[i + 1] + 2 * a0b1 + a0a1b0b1) >> 4; |     const int v1 = (8 * A[i + 1] + 2 * a0b1 + a0a1b0b1) >> 4; | ||||||
|     out[2 * i + 0] = clip_y(best_y[2 * i + 0] + v0); |     out[2 * i + 0] = clip_SSE2(best_y[2 * i + 0] + v0, max_y); | ||||||
|     out[2 * i + 1] = clip_y(best_y[2 * i + 1] + v1); |     out[2 * i + 1] = clip_SSE2(best_y[2 * i + 1] + v1, max_y); | ||||||
|   } |   } | ||||||
| } | } | ||||||
| #undef MAX_Y |  | ||||||
|  |  | ||||||
| //------------------------------------------------------------------------------ | //------------------------------------------------------------------------------ | ||||||
|  |  | ||||||
|   | |||||||
| @@ -191,10 +191,10 @@ static int PreprocessARGB(const uint8_t* r_ptr, | |||||||
|                           int step, int rgb_stride, |                           int step, int rgb_stride, | ||||||
|                           WebPPicture* const picture) { |                           WebPPicture* const picture) { | ||||||
|   const int ok = SharpYuvConvert( |   const int ok = SharpYuvConvert( | ||||||
|       r_ptr, g_ptr, b_ptr, step, rgb_stride, picture->y, picture->y_stride, |       r_ptr, g_ptr, b_ptr, step, rgb_stride, /*rgb_bit_depth=*/8, | ||||||
|       picture->u, picture->uv_stride, picture->v, picture->uv_stride, |       picture->y, picture->y_stride, picture->u, picture->uv_stride, picture->v, | ||||||
|       picture->width, picture->height, |       picture->uv_stride, /*yuv_bit_depth=*/8, picture->width, | ||||||
|       SharpYuvGetConversionMatrix(kSharpYuvMatrixWebp)); |       picture->height, SharpYuvGetConversionMatrix(kSharpYuvMatrixWebp)); | ||||||
|   if (!ok) { |   if (!ok) { | ||||||
|     return WebPEncodingSetError(picture, VP8_ENC_ERROR_OUT_OF_MEMORY); |     return WebPEncodingSetError(picture, VP8_ENC_ERROR_OUT_OF_MEMORY); | ||||||
|   } |   } | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user