Have lossless use ImportYUVAFromRGB

There was a duplicated functionality with a lower quality which
could lead to decoded lossless WebP to YUV being different from
lossless WebP to PNG to YUV.

The rescaler is not using it yet.

Bug: 432241412
Change-Id: Id794880957935b69729d4b34ae453551d13364dc
This commit is contained in:
Vincent Rabaud
2025-08-25 12:52:26 +02:00
parent fd2c2cc05b
commit 0d14d84bdb
5 changed files with 611 additions and 407 deletions

View File

@@ -24,6 +24,7 @@
#include "src/dsp/dsp.h"
#include "src/dsp/lossless.h"
#include "src/dsp/lossless_common.h"
#include "src/dsp/yuv.h"
#include "src/utils/bit_reader_utils.h"
#include "src/utils/color_cache_utils.h"
#include "src/utils/huffman_utils.h"
@@ -703,13 +704,71 @@ static int EmitRescaledRowsYUVA(const VP8LDecoder* const dec, uint8_t* in,
return y_pos;
}
static int EmitRowsYUVA(const VP8LDecoder* const dec, const uint8_t* in,
int in_stride, int mb_w, int num_rows) {
// Returns true if alpha[] has non-0xff values.
static int CheckNonOpaque(const uint8_t* alpha, int width, int height,
int y_step) {
WebPInitAlphaProcessing();
for (; height-- > 0; alpha += y_step) {
if (WebPHasAlpha8b(alpha, width)) return 1;
}
return 0;
}
static int EmitRowsYUVA(const uint8_t* const in, const VP8Io* const io,
int in_stride, uint16_t* tmp_rgb,
VP8LDecoder* const dec) {
int y_pos = dec->last_out_row;
while (num_rows-- > 0) {
ConvertToYUVA((const uint32_t*)in, mb_w, y_pos, dec->output);
in += in_stride;
++y_pos;
const int width = io->mb_w;
int num_rows = io->mb_h;
const int y_pos_final = y_pos + num_rows;
const int y_stride = dec->output->u.YUVA.y_stride;
const int uv_stride = dec->output->u.YUVA.u_stride;
const int a_stride = dec->output->u.YUVA.a_stride;
uint8_t* dst_a = dec->output->u.YUVA.a;
uint8_t* dst_y = dec->output->u.YUVA.y + y_pos * y_stride;
uint8_t* dst_u = dec->output->u.YUVA.u + (y_pos >> 1) * uv_stride;
uint8_t* dst_v = dec->output->u.YUVA.v + (y_pos >> 1) * uv_stride;
const uint8_t* r_ptr = in + CHANNEL_OFFSET(1);
const uint8_t* g_ptr = in + CHANNEL_OFFSET(2);
const uint8_t* b_ptr = in + CHANNEL_OFFSET(3);
const uint8_t* a_ptr = NULL;
int has_alpha = 0;
// Make sure the lines are processed two by two from the start.
assert(y_pos % 2 == 0);
// Make sure num_rows is even. y_pos_final will check if it not.
num_rows &= ~1;
if (dst_a) {
dst_a += y_pos * a_stride;
a_ptr = in + CHANNEL_OFFSET(0);
has_alpha = CheckNonOpaque(a_ptr, width, num_rows, in_stride);
}
// Process pairs of lines.
WebPImportYUVAFromRGBA(r_ptr, g_ptr, b_ptr, a_ptr, /*step=*/4, in_stride,
has_alpha, width, num_rows, tmp_rgb, y_stride,
uv_stride, a_stride, dst_y, dst_u, dst_v, dst_a);
y_pos += num_rows;
if (y_pos_final == io->crop_bottom - io->crop_top && y_pos < y_pos_final) {
assert(y_pos + 1 == y_pos_final);
// If we output the last line of an image with odd height.
dst_y += num_rows * y_stride;
dst_u += (num_rows >> 1) * uv_stride;
dst_v += (num_rows >> 1) * uv_stride;
r_ptr += num_rows * in_stride;
g_ptr += num_rows * in_stride;
b_ptr += num_rows * in_stride;
if (dst_a) {
dst_a += num_rows * a_stride;
a_ptr += num_rows * in_stride;
has_alpha = CheckNonOpaque(a_ptr, width, /*height=*/1, in_stride);
}
WebPImportYUVAFromRGBALastLine(r_ptr, g_ptr, b_ptr, a_ptr, /*step=*/4,
has_alpha, width, tmp_rgb, dst_y, dst_u,
dst_v, dst_a);
y_pos = y_pos_final;
}
return y_pos;
}
@@ -789,8 +848,17 @@ static void ApplyInverseTransforms(VP8LDecoder* const dec, int start_row,
// last call.
static void ProcessRows(VP8LDecoder* const dec, int row) {
const uint32_t* const rows = dec->pixels + dec->width * dec->last_row;
const int num_rows = row - dec->last_row;
int num_rows;
// In case of YUV conversion and if we do not need to get to the last row.
if (!WebPIsRGBMode(dec->output->colorspace) && row >= dec->io->crop_top &&
row < dec->io->crop_bottom) {
// Make sure the number of rows to process is even.
if ((row - dec->io->crop_top) % 2 == 1) {
--row;
}
}
num_rows = row - dec->last_row;
assert(row <= dec->io->crop_bottom);
// We can't process more than NUM_ARGB_CACHE_ROWS at a time (that's the size
// of argb_cache), but we currently don't need more than that.
@@ -822,7 +890,8 @@ static void ProcessRows(VP8LDecoder* const dec, int row) {
dec->last_out_row =
io->use_scaling
? EmitRescaledRowsYUVA(dec, rows_data, in_stride, io->mb_h)
: EmitRowsYUVA(dec, rows_data, in_stride, io->mb_w, io->mb_h);
: EmitRowsYUVA(rows_data, io, in_stride,
dec->accumulated_rgb_pixels, dec);
}
assert(dec->last_out_row <= output->height);
}
@@ -1526,9 +1595,16 @@ static int AllocateInternalBuffers32b(VP8LDecoder* const dec, int final_width) {
const uint64_t cache_top_pixels = (uint16_t)final_width;
// Scratch buffer for temporary BGRA storage. Not needed for paletted alpha.
const uint64_t cache_pixels = (uint64_t)final_width * NUM_ARGB_CACHE_ROWS;
const uint64_t total_num_pixels =
num_pixels + cache_top_pixels + cache_pixels;
// Scratch buffer to accumulate RGBA values (hence 4*)for YUV conversion.
uint64_t accumulated_rgb_pixels = 0;
uint64_t total_num_pixels;
if (dec->output != NULL && !WebPIsRGBMode(dec->output->colorspace)) {
const int uv_width = (dec->io->crop_right - dec->io->crop_left + 1) >> 1;
accumulated_rgb_pixels =
4 * uv_width * sizeof(*dec->accumulated_rgb_pixels) / sizeof(uint32_t);
}
total_num_pixels =
num_pixels + cache_top_pixels + cache_pixels + accumulated_rgb_pixels;
assert(dec->width <= final_width);
dec->pixels = (uint32_t*)WebPSafeMalloc(total_num_pixels, sizeof(uint32_t));
if (dec->pixels == NULL) {
@@ -1536,6 +1612,12 @@ static int AllocateInternalBuffers32b(VP8LDecoder* const dec, int final_width) {
return VP8LSetError(dec, VP8_STATUS_OUT_OF_MEMORY);
}
dec->argb_cache = dec->pixels + num_pixels + cache_top_pixels;
dec->accumulated_rgb_pixels =
accumulated_rgb_pixels == 0
? NULL
: (uint16_t*)(dec->pixels + num_pixels + cache_top_pixels +
cache_pixels);
return 1;
}

View File

@@ -67,6 +67,8 @@ struct VP8LDecoder {
uint32_t* pixels; // Internal data: either uint8_t* for alpha
// or uint32_t* for BGRA.
uint32_t* argb_cache; // Scratch buffer for temporary BGRA storage.
uint16_t* accumulated_rgb_pixels; // Scratch buffer for accumulated RGB for
// YUV conversion.
VP8LBitReader br;
int incremental; // if true, incremental decoding is expected

View File

@@ -21,6 +21,16 @@
#include "src/webp/decode.h"
#include "src/webp/types.h"
// Uncomment to disable gamma-compression during RGB->U/V averaging
#define USE_GAMMA_COMPRESSION
// If defined, use table to compute x / alpha.
#define USE_INVERSE_ALPHA_TABLE
#ifdef USE_GAMMA_COMPRESSION
#include <math.h>
#endif
//-----------------------------------------------------------------------------
// Plain-C version
@@ -204,6 +214,388 @@ void WebPConvertRGBA32ToUV_C(const uint16_t* WEBP_RESTRICT rgb,
}
}
//------------------------------------------------------------------------------
// Code for gamma correction
#if defined(USE_GAMMA_COMPRESSION)
// Gamma correction compensates loss of resolution during chroma subsampling.
#define GAMMA_FIX 12 // fixed-point precision for linear values
#define GAMMA_TAB_FIX 7 // fixed-point fractional bits precision
#define GAMMA_TAB_SIZE (1 << (GAMMA_FIX - GAMMA_TAB_FIX))
static const double kGamma = 0.80;
static const int kGammaScale = ((1 << GAMMA_FIX) - 1);
static const int kGammaTabScale = (1 << GAMMA_TAB_FIX);
static const int kGammaTabRounder = (1 << GAMMA_TAB_FIX >> 1);
static int kLinearToGammaTab[GAMMA_TAB_SIZE + 1];
static uint16_t kGammaToLinearTab[256];
static volatile int kGammaTablesOk = 0;
extern VP8CPUInfo VP8GetCPUInfo;
WEBP_DSP_INIT_FUNC(WebPInitGammaTables) {
if (!kGammaTablesOk) {
int v;
const double scale = (double)(1 << GAMMA_TAB_FIX) / kGammaScale;
const double norm = 1. / 255.;
for (v = 0; v <= 255; ++v) {
kGammaToLinearTab[v] =
(uint16_t)(pow(norm * v, kGamma) * kGammaScale + .5);
}
for (v = 0; v <= GAMMA_TAB_SIZE; ++v) {
kLinearToGammaTab[v] = (int)(255. * pow(scale * v, 1. / kGamma) + .5);
}
kGammaTablesOk = 1;
}
}
static WEBP_INLINE uint32_t GammaToLinear(uint8_t v) {
return kGammaToLinearTab[v];
}
static WEBP_INLINE int Interpolate(int v) {
const int tab_pos = v >> (GAMMA_TAB_FIX + 2); // integer part
const int x = v & ((kGammaTabScale << 2) - 1); // fractional part
const int v0 = kLinearToGammaTab[tab_pos];
const int v1 = kLinearToGammaTab[tab_pos + 1];
const int y = v1 * x + v0 * ((kGammaTabScale << 2) - x); // interpolate
assert(tab_pos + 1 < GAMMA_TAB_SIZE + 1);
return y;
}
// Convert a linear value 'v' to YUV_FIX+2 fixed-point precision
// U/V value, suitable for RGBToU/V calls.
static WEBP_INLINE int LinearToGamma(uint32_t base_value, int shift) {
const int y = Interpolate(base_value << shift); // final uplifted value
return (y + kGammaTabRounder) >> GAMMA_TAB_FIX; // descale
}
#else
void WebPInitGammaTables(void) {}
static WEBP_INLINE uint32_t GammaToLinear(uint8_t v) { return v; }
static WEBP_INLINE int LinearToGamma(uint32_t base_value, int shift) {
return (int)(base_value << shift);
}
#endif // USE_GAMMA_COMPRESSION
#define SUM4(ptr, step) \
LinearToGamma(GammaToLinear((ptr)[0]) + GammaToLinear((ptr)[(step)]) + \
GammaToLinear((ptr)[rgb_stride]) + \
GammaToLinear((ptr)[rgb_stride + (step)]), \
0)
#define SUM2(ptr) \
LinearToGamma(GammaToLinear((ptr)[0]) + GammaToLinear((ptr)[rgb_stride]), 1)
//------------------------------------------------------------------------------
// "Fast" regular RGB->YUV
#define SUM4(ptr, step) \
LinearToGamma(GammaToLinear((ptr)[0]) + GammaToLinear((ptr)[(step)]) + \
GammaToLinear((ptr)[rgb_stride]) + \
GammaToLinear((ptr)[rgb_stride + (step)]), \
0)
#define SUM2(ptr) \
LinearToGamma(GammaToLinear((ptr)[0]) + GammaToLinear((ptr)[rgb_stride]), 1)
#define SUM2ALPHA(ptr) ((ptr)[0] + (ptr)[rgb_stride])
#define SUM4ALPHA(ptr) (SUM2ALPHA(ptr) + SUM2ALPHA((ptr) + 4))
#if defined(USE_INVERSE_ALPHA_TABLE)
static const int kAlphaFix = 19;
// Following table is (1 << kAlphaFix) / a. The (v * kInvAlpha[a]) >> kAlphaFix
// formula is then equal to v / a in most (99.6%) cases. Note that this table
// and constant are adjusted very tightly to fit 32b arithmetic.
// In particular, they use the fact that the operands for 'v / a' are actually
// derived as v = (a0.p0 + a1.p1 + a2.p2 + a3.p3) and a = a0 + a1 + a2 + a3
// with ai in [0..255] and pi in [0..1<<GAMMA_FIX). The constraint to avoid
// overflow is: GAMMA_FIX + kAlphaFix <= 31.
static const uint32_t kInvAlpha[4 * 0xff + 1] = {
0, /* alpha = 0 */
524288, 262144, 174762, 131072, 104857, 87381, 74898, 65536, 58254, 52428,
47662, 43690, 40329, 37449, 34952, 32768, 30840, 29127, 27594, 26214,
24966, 23831, 22795, 21845, 20971, 20164, 19418, 18724, 18078, 17476,
16912, 16384, 15887, 15420, 14979, 14563, 14169, 13797, 13443, 13107,
12787, 12483, 12192, 11915, 11650, 11397, 11155, 10922, 10699, 10485,
10280, 10082, 9892, 9709, 9532, 9362, 9198, 9039, 8886, 8738,
8594, 8456, 8322, 8192, 8065, 7943, 7825, 7710, 7598, 7489,
7384, 7281, 7182, 7084, 6990, 6898, 6808, 6721, 6636, 6553,
6472, 6393, 6316, 6241, 6168, 6096, 6026, 5957, 5890, 5825,
5761, 5698, 5637, 5577, 5518, 5461, 5405, 5349, 5295, 5242,
5190, 5140, 5090, 5041, 4993, 4946, 4899, 4854, 4809, 4766,
4723, 4681, 4639, 4599, 4559, 4519, 4481, 4443, 4405, 4369,
4332, 4297, 4262, 4228, 4194, 4161, 4128, 4096, 4064, 4032,
4002, 3971, 3942, 3912, 3883, 3855, 3826, 3799, 3771, 3744,
3718, 3692, 3666, 3640, 3615, 3591, 3566, 3542, 3518, 3495,
3472, 3449, 3426, 3404, 3382, 3360, 3339, 3318, 3297, 3276,
3256, 3236, 3216, 3196, 3177, 3158, 3139, 3120, 3102, 3084,
3066, 3048, 3030, 3013, 2995, 2978, 2962, 2945, 2928, 2912,
2896, 2880, 2864, 2849, 2833, 2818, 2803, 2788, 2774, 2759,
2744, 2730, 2716, 2702, 2688, 2674, 2661, 2647, 2634, 2621,
2608, 2595, 2582, 2570, 2557, 2545, 2532, 2520, 2508, 2496,
2484, 2473, 2461, 2449, 2438, 2427, 2416, 2404, 2394, 2383,
2372, 2361, 2351, 2340, 2330, 2319, 2309, 2299, 2289, 2279,
2269, 2259, 2250, 2240, 2231, 2221, 2212, 2202, 2193, 2184,
2175, 2166, 2157, 2148, 2139, 2131, 2122, 2114, 2105, 2097,
2088, 2080, 2072, 2064, 2056, 2048, 2040, 2032, 2024, 2016,
2008, 2001, 1993, 1985, 1978, 1971, 1963, 1956, 1949, 1941,
1934, 1927, 1920, 1913, 1906, 1899, 1892, 1885, 1879, 1872,
1865, 1859, 1852, 1846, 1839, 1833, 1826, 1820, 1814, 1807,
1801, 1795, 1789, 1783, 1777, 1771, 1765, 1759, 1753, 1747,
1741, 1736, 1730, 1724, 1718, 1713, 1707, 1702, 1696, 1691,
1685, 1680, 1675, 1669, 1664, 1659, 1653, 1648, 1643, 1638,
1633, 1628, 1623, 1618, 1613, 1608, 1603, 1598, 1593, 1588,
1583, 1579, 1574, 1569, 1565, 1560, 1555, 1551, 1546, 1542,
1537, 1533, 1528, 1524, 1519, 1515, 1510, 1506, 1502, 1497,
1493, 1489, 1485, 1481, 1476, 1472, 1468, 1464, 1460, 1456,
1452, 1448, 1444, 1440, 1436, 1432, 1428, 1424, 1420, 1416,
1413, 1409, 1405, 1401, 1398, 1394, 1390, 1387, 1383, 1379,
1376, 1372, 1368, 1365, 1361, 1358, 1354, 1351, 1347, 1344,
1340, 1337, 1334, 1330, 1327, 1323, 1320, 1317, 1314, 1310,
1307, 1304, 1300, 1297, 1294, 1291, 1288, 1285, 1281, 1278,
1275, 1272, 1269, 1266, 1263, 1260, 1257, 1254, 1251, 1248,
1245, 1242, 1239, 1236, 1233, 1230, 1227, 1224, 1222, 1219,
1216, 1213, 1210, 1208, 1205, 1202, 1199, 1197, 1194, 1191,
1188, 1186, 1183, 1180, 1178, 1175, 1172, 1170, 1167, 1165,
1162, 1159, 1157, 1154, 1152, 1149, 1147, 1144, 1142, 1139,
1137, 1134, 1132, 1129, 1127, 1125, 1122, 1120, 1117, 1115,
1113, 1110, 1108, 1106, 1103, 1101, 1099, 1096, 1094, 1092,
1089, 1087, 1085, 1083, 1081, 1078, 1076, 1074, 1072, 1069,
1067, 1065, 1063, 1061, 1059, 1057, 1054, 1052, 1050, 1048,
1046, 1044, 1042, 1040, 1038, 1036, 1034, 1032, 1030, 1028,
1026, 1024, 1022, 1020, 1018, 1016, 1014, 1012, 1010, 1008,
1006, 1004, 1002, 1000, 998, 996, 994, 992, 991, 989,
987, 985, 983, 981, 979, 978, 976, 974, 972, 970,
969, 967, 965, 963, 961, 960, 958, 956, 954, 953,
951, 949, 948, 946, 944, 942, 941, 939, 937, 936,
934, 932, 931, 929, 927, 926, 924, 923, 921, 919,
918, 916, 914, 913, 911, 910, 908, 907, 905, 903,
902, 900, 899, 897, 896, 894, 893, 891, 890, 888,
887, 885, 884, 882, 881, 879, 878, 876, 875, 873,
872, 870, 869, 868, 866, 865, 863, 862, 860, 859,
858, 856, 855, 853, 852, 851, 849, 848, 846, 845,
844, 842, 841, 840, 838, 837, 836, 834, 833, 832,
830, 829, 828, 826, 825, 824, 823, 821, 820, 819,
817, 816, 815, 814, 812, 811, 810, 809, 807, 806,
805, 804, 802, 801, 800, 799, 798, 796, 795, 794,
793, 791, 790, 789, 788, 787, 786, 784, 783, 782,
781, 780, 779, 777, 776, 775, 774, 773, 772, 771,
769, 768, 767, 766, 765, 764, 763, 762, 760, 759,
758, 757, 756, 755, 754, 753, 752, 751, 750, 748,
747, 746, 745, 744, 743, 742, 741, 740, 739, 738,
737, 736, 735, 734, 733, 732, 731, 730, 729, 728,
727, 726, 725, 724, 723, 722, 721, 720, 719, 718,
717, 716, 715, 714, 713, 712, 711, 710, 709, 708,
707, 706, 705, 704, 703, 702, 701, 700, 699, 699,
698, 697, 696, 695, 694, 693, 692, 691, 690, 689,
688, 688, 687, 686, 685, 684, 683, 682, 681, 680,
680, 679, 678, 677, 676, 675, 674, 673, 673, 672,
671, 670, 669, 668, 667, 667, 666, 665, 664, 663,
662, 661, 661, 660, 659, 658, 657, 657, 656, 655,
654, 653, 652, 652, 651, 650, 649, 648, 648, 647,
646, 645, 644, 644, 643, 642, 641, 640, 640, 639,
638, 637, 637, 636, 635, 634, 633, 633, 632, 631,
630, 630, 629, 628, 627, 627, 626, 625, 624, 624,
623, 622, 621, 621, 620, 619, 618, 618, 617, 616,
616, 615, 614, 613, 613, 612, 611, 611, 610, 609,
608, 608, 607, 606, 606, 605, 604, 604, 603, 602,
601, 601, 600, 599, 599, 598, 597, 597, 596, 595,
595, 594, 593, 593, 592, 591, 591, 590, 589, 589,
588, 587, 587, 586, 585, 585, 584, 583, 583, 582,
581, 581, 580, 579, 579, 578, 578, 577, 576, 576,
575, 574, 574, 573, 572, 572, 571, 571, 570, 569,
569, 568, 568, 567, 566, 566, 565, 564, 564, 563,
563, 562, 561, 561, 560, 560, 559, 558, 558, 557,
557, 556, 555, 555, 554, 554, 553, 553, 552, 551,
551, 550, 550, 549, 548, 548, 547, 547, 546, 546,
545, 544, 544, 543, 543, 542, 542, 541, 541, 540,
539, 539, 538, 538, 537, 537, 536, 536, 535, 534,
534, 533, 533, 532, 532, 531, 531, 530, 530, 529,
529, 528, 527, 527, 526, 526, 525, 525, 524, 524,
523, 523, 522, 522, 521, 521, 520, 520, 519, 519,
518, 518, 517, 517, 516, 516, 515, 515, 514, 514};
// Note that LinearToGamma() expects the values to be premultiplied by 4,
// so we incorporate this factor 4 inside the DIVIDE_BY_ALPHA macro directly.
#define DIVIDE_BY_ALPHA(sum, a) (((sum) * kInvAlpha[(a)]) >> (kAlphaFix - 2))
#else
#define DIVIDE_BY_ALPHA(sum, a) (4 * (sum) / (a))
#endif // USE_INVERSE_ALPHA_TABLE
static WEBP_INLINE int LinearToGammaWeighted(const uint8_t* src,
const uint8_t* a_ptr,
uint32_t total_a, int step,
int rgb_stride) {
const uint32_t sum =
a_ptr[0] * GammaToLinear(src[0]) +
a_ptr[step] * GammaToLinear(src[step]) +
a_ptr[rgb_stride] * GammaToLinear(src[rgb_stride]) +
a_ptr[rgb_stride + step] * GammaToLinear(src[rgb_stride + step]);
assert(total_a > 0 && total_a <= 4 * 0xff);
#if defined(USE_INVERSE_ALPHA_TABLE)
assert((uint64_t)sum * kInvAlpha[total_a] < ((uint64_t)1 << 32));
#endif
return LinearToGamma(DIVIDE_BY_ALPHA(sum, total_a), 0);
}
void WebPAccumulateRGBA(const uint8_t* const r_ptr, const uint8_t* const g_ptr,
const uint8_t* const b_ptr, const uint8_t* const a_ptr,
int rgb_stride, uint16_t* dst, int width) {
int i, j;
// we loop over 2x2 blocks and produce one R/G/B/A value for each.
for (i = 0, j = 0; i < (width >> 1); i += 1, j += 2 * 4, dst += 4) {
const uint32_t a = SUM4ALPHA(a_ptr + j);
int r, g, b;
if (a == 4 * 0xff || a == 0) {
r = SUM4(r_ptr + j, 4);
g = SUM4(g_ptr + j, 4);
b = SUM4(b_ptr + j, 4);
} else {
r = LinearToGammaWeighted(r_ptr + j, a_ptr + j, a, 4, rgb_stride);
g = LinearToGammaWeighted(g_ptr + j, a_ptr + j, a, 4, rgb_stride);
b = LinearToGammaWeighted(b_ptr + j, a_ptr + j, a, 4, rgb_stride);
}
dst[0] = r;
dst[1] = g;
dst[2] = b;
dst[3] = a;
}
if (width & 1) {
const uint32_t a = 2u * SUM2ALPHA(a_ptr + j);
int r, g, b;
if (a == 4 * 0xff || a == 0) {
r = SUM2(r_ptr + j);
g = SUM2(g_ptr + j);
b = SUM2(b_ptr + j);
} else {
r = LinearToGammaWeighted(r_ptr + j, a_ptr + j, a, 0, rgb_stride);
g = LinearToGammaWeighted(g_ptr + j, a_ptr + j, a, 0, rgb_stride);
b = LinearToGammaWeighted(b_ptr + j, a_ptr + j, a, 0, rgb_stride);
}
dst[0] = r;
dst[1] = g;
dst[2] = b;
dst[3] = a;
}
}
void WebPAccumulateRGB(const uint8_t* const r_ptr, const uint8_t* const g_ptr,
const uint8_t* const b_ptr, int step, int rgb_stride,
uint16_t* dst, int width) {
int i, j;
for (i = 0, j = 0; i < (width >> 1); i += 1, j += 2 * step, dst += 4) {
dst[0] = SUM4(r_ptr + j, step);
dst[1] = SUM4(g_ptr + j, step);
dst[2] = SUM4(b_ptr + j, step);
// MemorySanitizer may raise false positives with data that passes through
// RGBA32PackedToPlanar_16b_SSE41() due to incorrect modeling of shuffles.
// See https://crbug.com/webp/573.
#ifdef WEBP_MSAN
dst[3] = 0;
#endif
}
if (width & 1) {
dst[0] = SUM2(r_ptr + j);
dst[1] = SUM2(g_ptr + j);
dst[2] = SUM2(b_ptr + j);
#ifdef WEBP_MSAN
dst[3] = 0;
#endif
}
}
static void ImportYUVAFromRGBA_C(const uint8_t* r_ptr, const uint8_t* g_ptr,
const uint8_t* b_ptr, const uint8_t* a_ptr,
int step, // bytes per pixel
int rgb_stride, // bytes per scanline
int has_alpha, int width, int height,
uint16_t* tmp_rgb, int y_stride, int uv_stride,
int a_stride, uint8_t* dst_y, uint8_t* dst_u,
uint8_t* dst_v, uint8_t* dst_a) {
int y;
const int is_rgb = (r_ptr < b_ptr); // otherwise it's bgr
const int uv_width = (width + 1) >> 1;
has_alpha &= dst_a != NULL;
if (has_alpha) {
#if defined(USE_GAMMA_COMPRESSION) && defined(USE_INVERSE_ALPHA_TABLE)
assert(kAlphaFix + GAMMA_FIX <= 31);
#endif
}
WebPInitGammaTables();
// Downsample Y/U/V planes, two rows at a time
for (y = 0; y < (height >> 1); ++y) {
int rows_have_alpha = has_alpha;
if (is_rgb) {
WebPConvertRGBToY(r_ptr, dst_y, width, step);
WebPConvertRGBToY(r_ptr + rgb_stride, dst_y + y_stride, width, step);
} else {
WebPConvertBGRToY(b_ptr, dst_y, width, step);
WebPConvertBGRToY(b_ptr + rgb_stride, dst_y + y_stride, width, step);
}
dst_y += 2 * y_stride;
if (has_alpha) {
rows_have_alpha &=
!WebPExtractAlpha(a_ptr, rgb_stride, width, 2, dst_a, a_stride);
dst_a += 2 * a_stride;
}
// Collect averaged R/G/B(/A)
if (!rows_have_alpha) {
WebPAccumulateRGB(r_ptr, g_ptr, b_ptr, step, rgb_stride, tmp_rgb, width);
} else {
WebPAccumulateRGBA(r_ptr, g_ptr, b_ptr, a_ptr, rgb_stride, tmp_rgb,
width);
}
// Convert to U/V
WebPConvertRGBA32ToUV(tmp_rgb, dst_u, dst_v, uv_width);
dst_u += uv_stride;
dst_v += uv_stride;
r_ptr += 2 * rgb_stride;
b_ptr += 2 * rgb_stride;
g_ptr += 2 * rgb_stride;
if (has_alpha) a_ptr += 2 * rgb_stride;
}
}
static void ImportYUVAFromRGBALastLine_C(
const uint8_t* r_ptr, const uint8_t* g_ptr, const uint8_t* b_ptr,
const uint8_t* a_ptr,
int step, // bytes per pixel
int has_alpha, int width, uint16_t* tmp_rgb, uint8_t* dst_y, uint8_t* dst_u,
uint8_t* dst_v, uint8_t* dst_a) {
const int is_rgb = (r_ptr < b_ptr); // otherwise it's bgr
const int uv_width = (width + 1) >> 1;
int row_has_alpha = has_alpha && dst_a != NULL;
if (is_rgb) {
WebPConvertRGBToY(r_ptr, dst_y, width, step);
} else {
WebPConvertBGRToY(b_ptr, dst_y, width, step);
}
if (row_has_alpha) {
row_has_alpha &= !WebPExtractAlpha(a_ptr, 0, width, 1, dst_a, 0);
}
// Collect averaged R/G/B(/A)
if (!row_has_alpha) {
// Collect averaged R/G/B
WebPAccumulateRGB(r_ptr, g_ptr, b_ptr, step, /*rgb_stride=*/0, tmp_rgb,
width);
} else {
WebPAccumulateRGBA(r_ptr, g_ptr, b_ptr, a_ptr, /*rgb_stride=*/0, tmp_rgb,
width);
}
WebPConvertRGBA32ToUV(tmp_rgb, dst_u, dst_v, uv_width);
}
//-----------------------------------------------------------------------------
void (*WebPConvertRGBToY)(const uint8_t* WEBP_RESTRICT rgb,
@@ -214,6 +606,21 @@ void (*WebPConvertRGBA32ToUV)(const uint16_t* WEBP_RESTRICT rgb,
uint8_t* WEBP_RESTRICT u,
uint8_t* WEBP_RESTRICT v, int width);
void (*WebPImportYUVAFromRGBA)(const uint8_t* r_ptr, const uint8_t* g_ptr,
const uint8_t* b_ptr, const uint8_t* a_ptr,
int step, // bytes per pixel
int rgb_stride, // bytes per scanline
int has_alpha, int width, int height,
uint16_t* tmp_rgb, int y_stride, int uv_stride,
int a_stride, uint8_t* dst_y, uint8_t* dst_u,
uint8_t* dst_v, uint8_t* dst_a);
void (*WebPImportYUVAFromRGBALastLine)(
const uint8_t* r_ptr, const uint8_t* g_ptr, const uint8_t* b_ptr,
const uint8_t* a_ptr,
int step, // bytes per pixel
int has_alpha, int width, uint16_t* tmp_rgb, uint8_t* dst_y, uint8_t* dst_u,
uint8_t* dst_v, uint8_t* dst_a);
void (*WebPConvertARGBToY)(const uint32_t* WEBP_RESTRICT argb,
uint8_t* WEBP_RESTRICT y, int width);
void (*WebPConvertARGBToUV)(const uint32_t* WEBP_RESTRICT argb,
@@ -233,6 +640,9 @@ WEBP_DSP_INIT_FUNC(WebPInitConvertARGBToYUV) {
WebPConvertRGBA32ToUV = WebPConvertRGBA32ToUV_C;
WebPImportYUVAFromRGBA = ImportYUVAFromRGBA_C;
WebPImportYUVAFromRGBALastLine = ImportYUVAFromRGBALastLine_C;
if (VP8GetCPUInfo != NULL) {
#if defined(WEBP_HAVE_SSE2)
if (VP8GetCPUInfo(kSSE2)) {

View File

@@ -40,6 +40,15 @@
#include "src/dsp/dsp.h"
#include "src/webp/types.h"
// Macros to give the offset of each channel in a uint32_t containing ARGB.
#ifdef WORDS_BIGENDIAN
// uint32_t 0xff000000 is 0xff,00,00,00 in memory
#define CHANNEL_OFFSET(i) (i)
#else
// uint32_t 0xff000000 is 0x00,00,00,ff in memory
#define CHANNEL_OFFSET(i) (3 - (i))
#endif
//------------------------------------------------------------------------------
// YUV -> RGB conversion
@@ -221,6 +230,31 @@ static WEBP_INLINE int VP8RGBToV(int r, int g, int b, int rounding) {
return VP8ClipUV(v, rounding);
}
extern void (*WebPImportYUVAFromRGBA)(
const uint8_t* r_ptr, const uint8_t* g_ptr, const uint8_t* b_ptr,
const uint8_t* a_ptr,
int step, // bytes per pixel
int rgb_stride, // bytes per scanline
int has_alpha, int width, int height, uint16_t* tmp_rgb, int y_stride,
int uv_stride, int a_stride, uint8_t* dst_y, uint8_t* dst_u, uint8_t* dst_v,
uint8_t* dst_a);
extern void (*WebPImportYUVAFromRGBALastLine)(
const uint8_t* r_ptr, const uint8_t* g_ptr, const uint8_t* b_ptr,
const uint8_t* a_ptr,
int step, // bytes per pixel
int has_alpha, int width, uint16_t* tmp_rgb, uint8_t* dst_y, uint8_t* dst_u,
uint8_t* dst_v, uint8_t* dst_a);
// Internal function to WebPImportYUVAFromRGBA* that can be reused.
void WebPAccumulateRGBA(const uint8_t* const r_ptr, const uint8_t* const g_ptr,
const uint8_t* const b_ptr, const uint8_t* const a_ptr,
int rgb_stride, uint16_t* dst, int width);
void WebPAccumulateRGB(const uint8_t* const r_ptr, const uint8_t* const g_ptr,
const uint8_t* const b_ptr, int step, int rgb_stride,
uint16_t* dst, int width);
// Must be called before calling WebPAccumulateRGB*.
void WebPInitGammaTables(void);
#ifdef __cplusplus
} // extern "C"
#endif

View File

@@ -32,20 +32,6 @@
#include <pthread.h>
#endif
// Uncomment to disable gamma-compression during RGB->U/V averaging
#define USE_GAMMA_COMPRESSION
// If defined, use table to compute x / alpha.
#define USE_INVERSE_ALPHA_TABLE
#ifdef WORDS_BIGENDIAN
// uint32_t 0xff000000 is 0xff,00,00,00 in memory
#define CHANNEL_OFFSET(i) (i)
#else
// uint32_t 0xff000000 is 0x00,00,00,ff in memory
#define CHANNEL_OFFSET(i) (3 - (i))
#endif
#define ALPHA_OFFSET CHANNEL_OFFSET(0)
//------------------------------------------------------------------------------
@@ -83,91 +69,8 @@ int WebPPictureHasTransparency(const WebPPicture* picture) {
picture->a_stride);
}
//------------------------------------------------------------------------------
// Code for gamma correction
#if defined(USE_GAMMA_COMPRESSION)
// Gamma correction compensates loss of resolution during chroma subsampling.
#define GAMMA_FIX 12 // fixed-point precision for linear values
#define GAMMA_TAB_FIX 7 // fixed-point fractional bits precision
#define GAMMA_TAB_SIZE (1 << (GAMMA_FIX - GAMMA_TAB_FIX))
static const double kGamma = 0.80;
static const int kGammaScale = ((1 << GAMMA_FIX) - 1);
static const int kGammaTabScale = (1 << GAMMA_TAB_FIX);
static const int kGammaTabRounder = (1 << GAMMA_TAB_FIX >> 1);
static int kLinearToGammaTab[GAMMA_TAB_SIZE + 1];
static uint16_t kGammaToLinearTab[256];
static volatile int kGammaTablesOk = 0;
static void InitGammaTables(void);
extern VP8CPUInfo VP8GetCPUInfo;
WEBP_DSP_INIT_FUNC(InitGammaTables) {
if (!kGammaTablesOk) {
int v;
const double scale = (double)(1 << GAMMA_TAB_FIX) / kGammaScale;
const double norm = 1. / 255.;
for (v = 0; v <= 255; ++v) {
kGammaToLinearTab[v] =
(uint16_t)(pow(norm * v, kGamma) * kGammaScale + .5);
}
for (v = 0; v <= GAMMA_TAB_SIZE; ++v) {
kLinearToGammaTab[v] = (int)(255. * pow(scale * v, 1. / kGamma) + .5);
}
kGammaTablesOk = 1;
}
}
static WEBP_INLINE uint32_t GammaToLinear(uint8_t v) {
return kGammaToLinearTab[v];
}
static WEBP_INLINE int Interpolate(int v) {
const int tab_pos = v >> (GAMMA_TAB_FIX + 2); // integer part
const int x = v & ((kGammaTabScale << 2) - 1); // fractional part
const int v0 = kLinearToGammaTab[tab_pos];
const int v1 = kLinearToGammaTab[tab_pos + 1];
const int y = v1 * x + v0 * ((kGammaTabScale << 2) - x); // interpolate
assert(tab_pos + 1 < GAMMA_TAB_SIZE + 1);
return y;
}
// Convert a linear value 'v' to YUV_FIX+2 fixed-point precision
// U/V value, suitable for RGBToU/V calls.
static WEBP_INLINE int LinearToGamma(uint32_t base_value, int shift) {
const int y = Interpolate(base_value << shift); // final uplifted value
return (y + kGammaTabRounder) >> GAMMA_TAB_FIX; // descale
}
#else
static void InitGammaTables(void) {}
static WEBP_INLINE uint32_t GammaToLinear(uint8_t v) { return v; }
static WEBP_INLINE int LinearToGamma(uint32_t base_value, int shift) {
return (int)(base_value << shift);
}
#endif // USE_GAMMA_COMPRESSION
//------------------------------------------------------------------------------
// RGB -> YUV conversion
static int RGBToY(int r, int g, int b, VP8Random* const rg) {
return (rg == NULL) ? VP8RGBToY(r, g, b, YUV_HALF)
: VP8RGBToY(r, g, b, VP8RandomBits(rg, YUV_FIX));
}
static int RGBToU(int r, int g, int b, VP8Random* const rg) {
return (rg == NULL) ? VP8RGBToU(r, g, b, YUV_HALF << 2)
: VP8RGBToU(r, g, b, VP8RandomBits(rg, YUV_FIX + 2));
}
static int RGBToV(int r, int g, int b, VP8Random* const rg) {
return (rg == NULL) ? VP8RGBToV(r, g, b, YUV_HALF << 2)
: VP8RGBToV(r, g, b, VP8RandomBits(rg, YUV_FIX + 2));
}
//------------------------------------------------------------------------------
// Sharp RGB->YUV conversion
@@ -190,162 +93,6 @@ static int PreprocessARGB(const uint8_t* r_ptr, const uint8_t* g_ptr,
return ok;
}
//------------------------------------------------------------------------------
// "Fast" regular RGB->YUV
#define SUM4(ptr, step) \
LinearToGamma(GammaToLinear((ptr)[0]) + GammaToLinear((ptr)[(step)]) + \
GammaToLinear((ptr)[rgb_stride]) + \
GammaToLinear((ptr)[rgb_stride + (step)]), \
0)
#define SUM2(ptr) \
LinearToGamma(GammaToLinear((ptr)[0]) + GammaToLinear((ptr)[rgb_stride]), 1)
#define SUM2ALPHA(ptr) ((ptr)[0] + (ptr)[rgb_stride])
#define SUM4ALPHA(ptr) (SUM2ALPHA(ptr) + SUM2ALPHA((ptr) + 4))
#if defined(USE_INVERSE_ALPHA_TABLE)
static const int kAlphaFix = 19;
// Following table is (1 << kAlphaFix) / a. The (v * kInvAlpha[a]) >> kAlphaFix
// formula is then equal to v / a in most (99.6%) cases. Note that this table
// and constant are adjusted very tightly to fit 32b arithmetic.
// In particular, they use the fact that the operands for 'v / a' are actually
// derived as v = (a0.p0 + a1.p1 + a2.p2 + a3.p3) and a = a0 + a1 + a2 + a3
// with ai in [0..255] and pi in [0..1<<GAMMA_FIX). The constraint to avoid
// overflow is: GAMMA_FIX + kAlphaFix <= 31.
static const uint32_t kInvAlpha[4 * 0xff + 1] = {
0, /* alpha = 0 */
524288, 262144, 174762, 131072, 104857, 87381, 74898, 65536, 58254, 52428,
47662, 43690, 40329, 37449, 34952, 32768, 30840, 29127, 27594, 26214,
24966, 23831, 22795, 21845, 20971, 20164, 19418, 18724, 18078, 17476,
16912, 16384, 15887, 15420, 14979, 14563, 14169, 13797, 13443, 13107,
12787, 12483, 12192, 11915, 11650, 11397, 11155, 10922, 10699, 10485,
10280, 10082, 9892, 9709, 9532, 9362, 9198, 9039, 8886, 8738,
8594, 8456, 8322, 8192, 8065, 7943, 7825, 7710, 7598, 7489,
7384, 7281, 7182, 7084, 6990, 6898, 6808, 6721, 6636, 6553,
6472, 6393, 6316, 6241, 6168, 6096, 6026, 5957, 5890, 5825,
5761, 5698, 5637, 5577, 5518, 5461, 5405, 5349, 5295, 5242,
5190, 5140, 5090, 5041, 4993, 4946, 4899, 4854, 4809, 4766,
4723, 4681, 4639, 4599, 4559, 4519, 4481, 4443, 4405, 4369,
4332, 4297, 4262, 4228, 4194, 4161, 4128, 4096, 4064, 4032,
4002, 3971, 3942, 3912, 3883, 3855, 3826, 3799, 3771, 3744,
3718, 3692, 3666, 3640, 3615, 3591, 3566, 3542, 3518, 3495,
3472, 3449, 3426, 3404, 3382, 3360, 3339, 3318, 3297, 3276,
3256, 3236, 3216, 3196, 3177, 3158, 3139, 3120, 3102, 3084,
3066, 3048, 3030, 3013, 2995, 2978, 2962, 2945, 2928, 2912,
2896, 2880, 2864, 2849, 2833, 2818, 2803, 2788, 2774, 2759,
2744, 2730, 2716, 2702, 2688, 2674, 2661, 2647, 2634, 2621,
2608, 2595, 2582, 2570, 2557, 2545, 2532, 2520, 2508, 2496,
2484, 2473, 2461, 2449, 2438, 2427, 2416, 2404, 2394, 2383,
2372, 2361, 2351, 2340, 2330, 2319, 2309, 2299, 2289, 2279,
2269, 2259, 2250, 2240, 2231, 2221, 2212, 2202, 2193, 2184,
2175, 2166, 2157, 2148, 2139, 2131, 2122, 2114, 2105, 2097,
2088, 2080, 2072, 2064, 2056, 2048, 2040, 2032, 2024, 2016,
2008, 2001, 1993, 1985, 1978, 1971, 1963, 1956, 1949, 1941,
1934, 1927, 1920, 1913, 1906, 1899, 1892, 1885, 1879, 1872,
1865, 1859, 1852, 1846, 1839, 1833, 1826, 1820, 1814, 1807,
1801, 1795, 1789, 1783, 1777, 1771, 1765, 1759, 1753, 1747,
1741, 1736, 1730, 1724, 1718, 1713, 1707, 1702, 1696, 1691,
1685, 1680, 1675, 1669, 1664, 1659, 1653, 1648, 1643, 1638,
1633, 1628, 1623, 1618, 1613, 1608, 1603, 1598, 1593, 1588,
1583, 1579, 1574, 1569, 1565, 1560, 1555, 1551, 1546, 1542,
1537, 1533, 1528, 1524, 1519, 1515, 1510, 1506, 1502, 1497,
1493, 1489, 1485, 1481, 1476, 1472, 1468, 1464, 1460, 1456,
1452, 1448, 1444, 1440, 1436, 1432, 1428, 1424, 1420, 1416,
1413, 1409, 1405, 1401, 1398, 1394, 1390, 1387, 1383, 1379,
1376, 1372, 1368, 1365, 1361, 1358, 1354, 1351, 1347, 1344,
1340, 1337, 1334, 1330, 1327, 1323, 1320, 1317, 1314, 1310,
1307, 1304, 1300, 1297, 1294, 1291, 1288, 1285, 1281, 1278,
1275, 1272, 1269, 1266, 1263, 1260, 1257, 1254, 1251, 1248,
1245, 1242, 1239, 1236, 1233, 1230, 1227, 1224, 1222, 1219,
1216, 1213, 1210, 1208, 1205, 1202, 1199, 1197, 1194, 1191,
1188, 1186, 1183, 1180, 1178, 1175, 1172, 1170, 1167, 1165,
1162, 1159, 1157, 1154, 1152, 1149, 1147, 1144, 1142, 1139,
1137, 1134, 1132, 1129, 1127, 1125, 1122, 1120, 1117, 1115,
1113, 1110, 1108, 1106, 1103, 1101, 1099, 1096, 1094, 1092,
1089, 1087, 1085, 1083, 1081, 1078, 1076, 1074, 1072, 1069,
1067, 1065, 1063, 1061, 1059, 1057, 1054, 1052, 1050, 1048,
1046, 1044, 1042, 1040, 1038, 1036, 1034, 1032, 1030, 1028,
1026, 1024, 1022, 1020, 1018, 1016, 1014, 1012, 1010, 1008,
1006, 1004, 1002, 1000, 998, 996, 994, 992, 991, 989,
987, 985, 983, 981, 979, 978, 976, 974, 972, 970,
969, 967, 965, 963, 961, 960, 958, 956, 954, 953,
951, 949, 948, 946, 944, 942, 941, 939, 937, 936,
934, 932, 931, 929, 927, 926, 924, 923, 921, 919,
918, 916, 914, 913, 911, 910, 908, 907, 905, 903,
902, 900, 899, 897, 896, 894, 893, 891, 890, 888,
887, 885, 884, 882, 881, 879, 878, 876, 875, 873,
872, 870, 869, 868, 866, 865, 863, 862, 860, 859,
858, 856, 855, 853, 852, 851, 849, 848, 846, 845,
844, 842, 841, 840, 838, 837, 836, 834, 833, 832,
830, 829, 828, 826, 825, 824, 823, 821, 820, 819,
817, 816, 815, 814, 812, 811, 810, 809, 807, 806,
805, 804, 802, 801, 800, 799, 798, 796, 795, 794,
793, 791, 790, 789, 788, 787, 786, 784, 783, 782,
781, 780, 779, 777, 776, 775, 774, 773, 772, 771,
769, 768, 767, 766, 765, 764, 763, 762, 760, 759,
758, 757, 756, 755, 754, 753, 752, 751, 750, 748,
747, 746, 745, 744, 743, 742, 741, 740, 739, 738,
737, 736, 735, 734, 733, 732, 731, 730, 729, 728,
727, 726, 725, 724, 723, 722, 721, 720, 719, 718,
717, 716, 715, 714, 713, 712, 711, 710, 709, 708,
707, 706, 705, 704, 703, 702, 701, 700, 699, 699,
698, 697, 696, 695, 694, 693, 692, 691, 690, 689,
688, 688, 687, 686, 685, 684, 683, 682, 681, 680,
680, 679, 678, 677, 676, 675, 674, 673, 673, 672,
671, 670, 669, 668, 667, 667, 666, 665, 664, 663,
662, 661, 661, 660, 659, 658, 657, 657, 656, 655,
654, 653, 652, 652, 651, 650, 649, 648, 648, 647,
646, 645, 644, 644, 643, 642, 641, 640, 640, 639,
638, 637, 637, 636, 635, 634, 633, 633, 632, 631,
630, 630, 629, 628, 627, 627, 626, 625, 624, 624,
623, 622, 621, 621, 620, 619, 618, 618, 617, 616,
616, 615, 614, 613, 613, 612, 611, 611, 610, 609,
608, 608, 607, 606, 606, 605, 604, 604, 603, 602,
601, 601, 600, 599, 599, 598, 597, 597, 596, 595,
595, 594, 593, 593, 592, 591, 591, 590, 589, 589,
588, 587, 587, 586, 585, 585, 584, 583, 583, 582,
581, 581, 580, 579, 579, 578, 578, 577, 576, 576,
575, 574, 574, 573, 572, 572, 571, 571, 570, 569,
569, 568, 568, 567, 566, 566, 565, 564, 564, 563,
563, 562, 561, 561, 560, 560, 559, 558, 558, 557,
557, 556, 555, 555, 554, 554, 553, 553, 552, 551,
551, 550, 550, 549, 548, 548, 547, 547, 546, 546,
545, 544, 544, 543, 543, 542, 542, 541, 541, 540,
539, 539, 538, 538, 537, 537, 536, 536, 535, 534,
534, 533, 533, 532, 532, 531, 531, 530, 530, 529,
529, 528, 527, 527, 526, 526, 525, 525, 524, 524,
523, 523, 522, 522, 521, 521, 520, 520, 519, 519,
518, 518, 517, 517, 516, 516, 515, 515, 514, 514};
// Note that LinearToGamma() expects the values to be premultiplied by 4,
// so we incorporate this factor 4 inside the DIVIDE_BY_ALPHA macro directly.
#define DIVIDE_BY_ALPHA(sum, a) (((sum) * kInvAlpha[(a)]) >> (kAlphaFix - 2))
#else
#define DIVIDE_BY_ALPHA(sum, a) (4 * (sum) / (a))
#endif // USE_INVERSE_ALPHA_TABLE
static WEBP_INLINE int LinearToGammaWeighted(const uint8_t* src,
const uint8_t* a_ptr,
uint32_t total_a, int step,
int rgb_stride) {
const uint32_t sum =
a_ptr[0] * GammaToLinear(src[0]) +
a_ptr[step] * GammaToLinear(src[step]) +
a_ptr[rgb_stride] * GammaToLinear(src[rgb_stride]) +
a_ptr[rgb_stride + step] * GammaToLinear(src[rgb_stride + step]);
assert(total_a > 0 && total_a <= 4 * 0xff);
#if defined(USE_INVERSE_ALPHA_TABLE)
assert((uint64_t)sum * kInvAlpha[total_a] < ((uint64_t)1 << 32));
#endif
return LinearToGamma(DIVIDE_BY_ALPHA(sum, total_a), 0);
}
static WEBP_INLINE void ConvertRowToY(const uint8_t* const r_ptr,
const uint8_t* const g_ptr,
const uint8_t* const b_ptr, int step,
@@ -353,78 +100,8 @@ static WEBP_INLINE void ConvertRowToY(const uint8_t* const r_ptr,
VP8Random* const rg) {
int i, j;
for (i = 0, j = 0; i < width; i += 1, j += step) {
dst_y[i] = RGBToY(r_ptr[j], g_ptr[j], b_ptr[j], rg);
}
}
static WEBP_INLINE void AccumulateRGBA(const uint8_t* const r_ptr,
const uint8_t* const g_ptr,
const uint8_t* const b_ptr,
const uint8_t* const a_ptr,
int rgb_stride, uint16_t* dst,
int width) {
int i, j;
// we loop over 2x2 blocks and produce one R/G/B/A value for each.
for (i = 0, j = 0; i < (width >> 1); i += 1, j += 2 * 4, dst += 4) {
const uint32_t a = SUM4ALPHA(a_ptr + j);
int r, g, b;
if (a == 4 * 0xff || a == 0) {
r = SUM4(r_ptr + j, 4);
g = SUM4(g_ptr + j, 4);
b = SUM4(b_ptr + j, 4);
} else {
r = LinearToGammaWeighted(r_ptr + j, a_ptr + j, a, 4, rgb_stride);
g = LinearToGammaWeighted(g_ptr + j, a_ptr + j, a, 4, rgb_stride);
b = LinearToGammaWeighted(b_ptr + j, a_ptr + j, a, 4, rgb_stride);
}
dst[0] = r;
dst[1] = g;
dst[2] = b;
dst[3] = a;
}
if (width & 1) {
const uint32_t a = 2u * SUM2ALPHA(a_ptr + j);
int r, g, b;
if (a == 4 * 0xff || a == 0) {
r = SUM2(r_ptr + j);
g = SUM2(g_ptr + j);
b = SUM2(b_ptr + j);
} else {
r = LinearToGammaWeighted(r_ptr + j, a_ptr + j, a, 0, rgb_stride);
g = LinearToGammaWeighted(g_ptr + j, a_ptr + j, a, 0, rgb_stride);
b = LinearToGammaWeighted(b_ptr + j, a_ptr + j, a, 0, rgb_stride);
}
dst[0] = r;
dst[1] = g;
dst[2] = b;
dst[3] = a;
}
}
static WEBP_INLINE void AccumulateRGB(const uint8_t* const r_ptr,
const uint8_t* const g_ptr,
const uint8_t* const b_ptr, int step,
int rgb_stride, uint16_t* dst,
int width) {
int i, j;
for (i = 0, j = 0; i < (width >> 1); i += 1, j += 2 * step, dst += 4) {
dst[0] = SUM4(r_ptr + j, step);
dst[1] = SUM4(g_ptr + j, step);
dst[2] = SUM4(b_ptr + j, step);
// MemorySanitizer may raise false positives with data that passes through
// RGBA32PackedToPlanar_16b_SSE41() due to incorrect modeling of shuffles.
// See https://crbug.com/webp/573.
#ifdef WEBP_MSAN
dst[3] = 0;
#endif
}
if (width & 1) {
dst[0] = SUM2(r_ptr + j);
dst[1] = SUM2(g_ptr + j);
dst[2] = SUM2(b_ptr + j);
#ifdef WEBP_MSAN
dst[3] = 0;
#endif
dst_y[i] =
VP8RGBToY(r_ptr[j], g_ptr[j], b_ptr[j], VP8RandomBits(rg, YUV_FIX));
}
}
@@ -435,8 +112,8 @@ static WEBP_INLINE void ConvertRowsToUV(const uint16_t* rgb,
int i;
for (i = 0; i < width; i += 1, rgb += 4) {
const int r = rgb[0], g = rgb[1], b = rgb[2];
dst_u[i] = RGBToU(r, g, b, rg);
dst_v[i] = RGBToV(r, g, b, rg);
dst_u[i] = VP8RGBToU(r, g, b, VP8RandomBits(rg, YUV_FIX + 2));
dst_v[i] = VP8RGBToV(r, g, b, VP8RandomBits(rg, YUV_FIX + 2));
}
}
@@ -452,7 +129,6 @@ static int ImportYUVAFromRGBA(const uint8_t* r_ptr, const uint8_t* g_ptr,
const int width = picture->width;
const int height = picture->height;
const int has_alpha = CheckNonOpaque(a_ptr, width, height, step, rgb_stride);
const int is_rgb = (r_ptr < b_ptr); // otherwise it's bgr
picture->colorspace = has_alpha ? WEBP_YUV420A : WEBP_YUV420;
picture->use_argb = 0;
@@ -468,9 +144,6 @@ static int ImportYUVAFromRGBA(const uint8_t* r_ptr, const uint8_t* g_ptr,
}
if (has_alpha) {
assert(step == 4);
#if defined(USE_GAMMA_COMPRESSION) && defined(USE_INVERSE_ALPHA_TABLE)
assert(kAlphaFix + GAMMA_FIX <= 31);
#endif
}
if (use_iterative_conversion) {
@@ -499,85 +172,88 @@ static int ImportYUVAFromRGBA(const uint8_t* r_ptr, const uint8_t* g_ptr,
rg = &base_rg;
}
WebPInitConvertARGBToYUV();
InitGammaTables();
WebPInitGammaTables();
if (tmp_rgb == NULL) {
return WebPEncodingSetError(picture, VP8_ENC_ERROR_OUT_OF_MEMORY);
}
// Downsample Y/U/V planes, two rows at a time
for (y = 0; y < (height >> 1); ++y) {
int rows_have_alpha = has_alpha;
if (rg == NULL) {
if (is_rgb) {
WebPConvertRGBToY(r_ptr, dst_y, width, step);
WebPConvertRGBToY(r_ptr + rgb_stride, dst_y + picture->y_stride,
width, step);
} else {
WebPConvertBGRToY(b_ptr, dst_y, width, step);
WebPConvertBGRToY(b_ptr + rgb_stride, dst_y + picture->y_stride,
width, step);
if (rg == NULL) {
// Downsample Y/U/V planes, two rows at a time
WebPImportYUVAFromRGBA(r_ptr, g_ptr, b_ptr, a_ptr, step, rgb_stride,
has_alpha, width, height, tmp_rgb,
picture->y_stride, picture->uv_stride,
picture->a_stride, dst_y, dst_u, dst_v, dst_a);
if (height & 1) {
dst_y += (height - 1) * picture->y_stride;
dst_u += (height >> 1) * picture->uv_stride;
dst_v += (height >> 1) * picture->uv_stride;
r_ptr += (height - 1) * rgb_stride;
b_ptr += (height - 1) * rgb_stride;
g_ptr += (height - 1) * rgb_stride;
if (has_alpha) {
dst_a += (height - 1) * picture->a_stride;
a_ptr += (height - 1) * rgb_stride;
}
} else {
WebPImportYUVAFromRGBALastLine(r_ptr, g_ptr, b_ptr, a_ptr, step,
has_alpha, width, tmp_rgb, dst_y, dst_u,
dst_v, dst_a);
}
} else {
// Copy of WebPImportYUVAFromRGBA/WebPImportYUVAFromRGBALastLine,
// but with dithering.
for (y = 0; y < (height >> 1); ++y) {
int rows_have_alpha = has_alpha;
ConvertRowToY(r_ptr, g_ptr, b_ptr, step, dst_y, width, rg);
ConvertRowToY(r_ptr + rgb_stride, g_ptr + rgb_stride,
b_ptr + rgb_stride, step, dst_y + picture->y_stride,
width, rg);
}
dst_y += 2 * picture->y_stride;
if (has_alpha) {
rows_have_alpha &= !WebPExtractAlpha(a_ptr, rgb_stride, width, 2, dst_a,
picture->a_stride);
dst_a += 2 * picture->a_stride;
}
// Collect averaged R/G/B(/A)
if (!rows_have_alpha) {
AccumulateRGB(r_ptr, g_ptr, b_ptr, step, rgb_stride, tmp_rgb, width);
} else {
AccumulateRGBA(r_ptr, g_ptr, b_ptr, a_ptr, rgb_stride, tmp_rgb, width);
}
// Convert to U/V
if (rg == NULL) {
WebPConvertRGBA32ToUV(tmp_rgb, dst_u, dst_v, uv_width);
} else {
ConvertRowsToUV(tmp_rgb, dst_u, dst_v, uv_width, rg);
}
dst_u += picture->uv_stride;
dst_v += picture->uv_stride;
r_ptr += 2 * rgb_stride;
b_ptr += 2 * rgb_stride;
g_ptr += 2 * rgb_stride;
if (has_alpha) a_ptr += 2 * rgb_stride;
}
if (height & 1) { // extra last row
int row_has_alpha = has_alpha;
if (rg == NULL) {
if (is_rgb) {
WebPConvertRGBToY(r_ptr, dst_y, width, step);
} else {
WebPConvertBGRToY(b_ptr, dst_y, width, step);
dst_y += 2 * picture->y_stride;
if (has_alpha) {
rows_have_alpha &= !WebPExtractAlpha(a_ptr, rgb_stride, width, 2,
dst_a, picture->a_stride);
dst_a += 2 * picture->a_stride;
}
} else {
ConvertRowToY(r_ptr, g_ptr, b_ptr, step, dst_y, width, rg);
}
if (row_has_alpha) {
row_has_alpha &= !WebPExtractAlpha(a_ptr, 0, width, 1, dst_a, 0);
}
// Collect averaged R/G/B(/A)
if (!row_has_alpha) {
// Collect averaged R/G/B
AccumulateRGB(r_ptr, g_ptr, b_ptr, step, /* rgb_stride = */ 0, tmp_rgb,
width);
} else {
AccumulateRGBA(r_ptr, g_ptr, b_ptr, a_ptr, /* rgb_stride = */ 0,
tmp_rgb, width);
}
if (rg == NULL) {
WebPConvertRGBA32ToUV(tmp_rgb, dst_u, dst_v, uv_width);
} else {
// Collect averaged R/G/B(/A)
if (!rows_have_alpha) {
WebPAccumulateRGB(r_ptr, g_ptr, b_ptr, step, rgb_stride, tmp_rgb,
width);
} else {
WebPAccumulateRGBA(r_ptr, g_ptr, b_ptr, a_ptr, rgb_stride, tmp_rgb,
width);
}
// Convert to U/V
ConvertRowsToUV(tmp_rgb, dst_u, dst_v, uv_width, rg);
dst_u += picture->uv_stride;
dst_v += picture->uv_stride;
r_ptr += 2 * rgb_stride;
b_ptr += 2 * rgb_stride;
g_ptr += 2 * rgb_stride;
if (has_alpha) a_ptr += 2 * rgb_stride;
}
if (height & 1) { // extra last row
int row_has_alpha = has_alpha;
ConvertRowToY(r_ptr, g_ptr, b_ptr, step, dst_y, width, rg);
if (row_has_alpha) {
row_has_alpha &= !WebPExtractAlpha(a_ptr, 0, width, 1, dst_a, 0);
}
// Collect averaged R/G/B(/A)
if (!row_has_alpha) {
// Collect averaged R/G/B
WebPAccumulateRGB(r_ptr, g_ptr, b_ptr, step, /*rgb_stride=*/0,
tmp_rgb, width);
} else {
WebPAccumulateRGBA(r_ptr, g_ptr, b_ptr, a_ptr, /*rgb_stride=*/0,
tmp_rgb, width);
}
if (rg == NULL) {
WebPConvertRGBA32ToUV(tmp_rgb, dst_u, dst_v, uv_width);
} else {
ConvertRowsToUV(tmp_rgb, dst_u, dst_v, uv_width, rg);
}
}
}
WebPSafeFree(tmp_rgb);
}
return 1;