Premultiply with alpha during U/V downsampling

This prevents the 'alpha-leak' reported in issue #220

Speed-diff is kept minimal.

Change-Id: I1976de5e6de7cfcec89a54df9233c1a6586a5846
This commit is contained in:
skal 2014-09-18 23:40:34 -07:00
parent 0cc811d7d6
commit c792d4129a

View File

@ -23,6 +23,9 @@
// Uncomment to disable gamma-compression during RGB->U/V averaging
#define USE_GAMMA_COMPRESSION
// If defined, use table to compute x / alpha.
#define USE_INVERSE_ALPHA_TABLE
static const union {
uint32_t argb;
uint8_t bytes[4];
@ -114,6 +117,7 @@ static WEBP_INLINE int Interpolate(int v) {
const int v0 = kLinearToGammaTab[tab_pos];
const int v1 = kLinearToGammaTab[tab_pos + 1];
const int y = v1 * x + v0 * ((kGammaTabScale << 2) - x); // interpolate
assert(tab_pos + 1 < kGammaTabSize + 1);
return y;
}
@ -444,7 +448,6 @@ static int ConvertWRGBToYUV(const fixed_y_t* const best_y,
return 1;
}
//------------------------------------------------------------------------------
// Main function
@ -571,15 +574,186 @@ static int PreprocessARGB(const uint8_t* const r_ptr,
//------------------------------------------------------------------------------
// "Fast" regular RGB->YUV
#define SUM4(ptr) LinearToGamma( \
#define SUM4(ptr, step) LinearToGamma( \
GammaToLinear((ptr)[0]) + \
GammaToLinear((ptr)[step]) + \
GammaToLinear((ptr)[(step)]) + \
GammaToLinear((ptr)[rgb_stride]) + \
GammaToLinear((ptr)[rgb_stride + step]), 0) \
GammaToLinear((ptr)[rgb_stride + (step)]), 0) \
#define SUM2V(ptr) \
#define SUM2(ptr) \
LinearToGamma(GammaToLinear((ptr)[0]) + GammaToLinear((ptr)[rgb_stride]), 1)
#define SUM2ALPHA(ptr) ((ptr)[0] + (ptr)[rgb_stride])
#define SUM4ALPHA(ptr) (SUM2ALPHA(ptr) + SUM2ALPHA((ptr) + 4))
#if defined(USE_INVERSE_ALPHA_TABLE)
static const int kAlphaFix = 19;
// Following table is (1 << kAlphaFix) / a. The (v * kInvAlpha[a]) >> kAlphaFix
// formula is then equal to v / a in most (99.6%) cases. Note that this table
// and constant are adjusted very tightly to fit 32b arithmetic.
// In particular, they use the fact that the operands for 'v / a' are actually
// derived as v = (a0.p0 + a1.p1 + a2.p2 + a3.p3) and a = a0 + a1 + a2 + a3
// with ai in [0..255] and pi in [0..1<<kGammaFix). The constraint to avoid
// overflow is: kGammaFix + kAlphaFix <= 31.
static const uint32_t kInvAlpha[4 * 0xff + 1] = {
0, /* alpha = 0 */
524288, 262144, 174762, 131072, 104857, 87381, 74898, 65536,
58254, 52428, 47662, 43690, 40329, 37449, 34952, 32768,
30840, 29127, 27594, 26214, 24966, 23831, 22795, 21845,
20971, 20164, 19418, 18724, 18078, 17476, 16912, 16384,
15887, 15420, 14979, 14563, 14169, 13797, 13443, 13107,
12787, 12483, 12192, 11915, 11650, 11397, 11155, 10922,
10699, 10485, 10280, 10082, 9892, 9709, 9532, 9362,
9198, 9039, 8886, 8738, 8594, 8456, 8322, 8192,
8065, 7943, 7825, 7710, 7598, 7489, 7384, 7281,
7182, 7084, 6990, 6898, 6808, 6721, 6636, 6553,
6472, 6393, 6316, 6241, 6168, 6096, 6026, 5957,
5890, 5825, 5761, 5698, 5637, 5577, 5518, 5461,
5405, 5349, 5295, 5242, 5190, 5140, 5090, 5041,
4993, 4946, 4899, 4854, 4809, 4766, 4723, 4681,
4639, 4599, 4559, 4519, 4481, 4443, 4405, 4369,
4332, 4297, 4262, 4228, 4194, 4161, 4128, 4096,
4064, 4032, 4002, 3971, 3942, 3912, 3883, 3855,
3826, 3799, 3771, 3744, 3718, 3692, 3666, 3640,
3615, 3591, 3566, 3542, 3518, 3495, 3472, 3449,
3426, 3404, 3382, 3360, 3339, 3318, 3297, 3276,
3256, 3236, 3216, 3196, 3177, 3158, 3139, 3120,
3102, 3084, 3066, 3048, 3030, 3013, 2995, 2978,
2962, 2945, 2928, 2912, 2896, 2880, 2864, 2849,
2833, 2818, 2803, 2788, 2774, 2759, 2744, 2730,
2716, 2702, 2688, 2674, 2661, 2647, 2634, 2621,
2608, 2595, 2582, 2570, 2557, 2545, 2532, 2520,
2508, 2496, 2484, 2473, 2461, 2449, 2438, 2427,
2416, 2404, 2394, 2383, 2372, 2361, 2351, 2340,
2330, 2319, 2309, 2299, 2289, 2279, 2269, 2259,
2250, 2240, 2231, 2221, 2212, 2202, 2193, 2184,
2175, 2166, 2157, 2148, 2139, 2131, 2122, 2114,
2105, 2097, 2088, 2080, 2072, 2064, 2056, 2048,
2040, 2032, 2024, 2016, 2008, 2001, 1993, 1985,
1978, 1971, 1963, 1956, 1949, 1941, 1934, 1927,
1920, 1913, 1906, 1899, 1892, 1885, 1879, 1872,
1865, 1859, 1852, 1846, 1839, 1833, 1826, 1820,
1814, 1807, 1801, 1795, 1789, 1783, 1777, 1771,
1765, 1759, 1753, 1747, 1741, 1736, 1730, 1724,
1718, 1713, 1707, 1702, 1696, 1691, 1685, 1680,
1675, 1669, 1664, 1659, 1653, 1648, 1643, 1638,
1633, 1628, 1623, 1618, 1613, 1608, 1603, 1598,
1593, 1588, 1583, 1579, 1574, 1569, 1565, 1560,
1555, 1551, 1546, 1542, 1537, 1533, 1528, 1524,
1519, 1515, 1510, 1506, 1502, 1497, 1493, 1489,
1485, 1481, 1476, 1472, 1468, 1464, 1460, 1456,
1452, 1448, 1444, 1440, 1436, 1432, 1428, 1424,
1420, 1416, 1413, 1409, 1405, 1401, 1398, 1394,
1390, 1387, 1383, 1379, 1376, 1372, 1368, 1365,
1361, 1358, 1354, 1351, 1347, 1344, 1340, 1337,
1334, 1330, 1327, 1323, 1320, 1317, 1314, 1310,
1307, 1304, 1300, 1297, 1294, 1291, 1288, 1285,
1281, 1278, 1275, 1272, 1269, 1266, 1263, 1260,
1257, 1254, 1251, 1248, 1245, 1242, 1239, 1236,
1233, 1230, 1227, 1224, 1222, 1219, 1216, 1213,
1210, 1208, 1205, 1202, 1199, 1197, 1194, 1191,
1188, 1186, 1183, 1180, 1178, 1175, 1172, 1170,
1167, 1165, 1162, 1159, 1157, 1154, 1152, 1149,
1147, 1144, 1142, 1139, 1137, 1134, 1132, 1129,
1127, 1125, 1122, 1120, 1117, 1115, 1113, 1110,
1108, 1106, 1103, 1101, 1099, 1096, 1094, 1092,
1089, 1087, 1085, 1083, 1081, 1078, 1076, 1074,
1072, 1069, 1067, 1065, 1063, 1061, 1059, 1057,
1054, 1052, 1050, 1048, 1046, 1044, 1042, 1040,
1038, 1036, 1034, 1032, 1030, 1028, 1026, 1024,
1022, 1020, 1018, 1016, 1014, 1012, 1010, 1008,
1006, 1004, 1002, 1000, 998, 996, 994, 992,
991, 989, 987, 985, 983, 981, 979, 978,
976, 974, 972, 970, 969, 967, 965, 963,
961, 960, 958, 956, 954, 953, 951, 949,
948, 946, 944, 942, 941, 939, 937, 936,
934, 932, 931, 929, 927, 926, 924, 923,
921, 919, 918, 916, 914, 913, 911, 910,
908, 907, 905, 903, 902, 900, 899, 897,
896, 894, 893, 891, 890, 888, 887, 885,
884, 882, 881, 879, 878, 876, 875, 873,
872, 870, 869, 868, 866, 865, 863, 862,
860, 859, 858, 856, 855, 853, 852, 851,
849, 848, 846, 845, 844, 842, 841, 840,
838, 837, 836, 834, 833, 832, 830, 829,
828, 826, 825, 824, 823, 821, 820, 819,
817, 816, 815, 814, 812, 811, 810, 809,
807, 806, 805, 804, 802, 801, 800, 799,
798, 796, 795, 794, 793, 791, 790, 789,
788, 787, 786, 784, 783, 782, 781, 780,
779, 777, 776, 775, 774, 773, 772, 771,
769, 768, 767, 766, 765, 764, 763, 762,
760, 759, 758, 757, 756, 755, 754, 753,
752, 751, 750, 748, 747, 746, 745, 744,
743, 742, 741, 740, 739, 738, 737, 736,
735, 734, 733, 732, 731, 730, 729, 728,
727, 726, 725, 724, 723, 722, 721, 720,
719, 718, 717, 716, 715, 714, 713, 712,
711, 710, 709, 708, 707, 706, 705, 704,
703, 702, 701, 700, 699, 699, 698, 697,
696, 695, 694, 693, 692, 691, 690, 689,
688, 688, 687, 686, 685, 684, 683, 682,
681, 680, 680, 679, 678, 677, 676, 675,
674, 673, 673, 672, 671, 670, 669, 668,
667, 667, 666, 665, 664, 663, 662, 661,
661, 660, 659, 658, 657, 657, 656, 655,
654, 653, 652, 652, 651, 650, 649, 648,
648, 647, 646, 645, 644, 644, 643, 642,
641, 640, 640, 639, 638, 637, 637, 636,
635, 634, 633, 633, 632, 631, 630, 630,
629, 628, 627, 627, 626, 625, 624, 624,
623, 622, 621, 621, 620, 619, 618, 618,
617, 616, 616, 615, 614, 613, 613, 612,
611, 611, 610, 609, 608, 608, 607, 606,
606, 605, 604, 604, 603, 602, 601, 601,
600, 599, 599, 598, 597, 597, 596, 595,
595, 594, 593, 593, 592, 591, 591, 590,
589, 589, 588, 587, 587, 586, 585, 585,
584, 583, 583, 582, 581, 581, 580, 579,
579, 578, 578, 577, 576, 576, 575, 574,
574, 573, 572, 572, 571, 571, 570, 569,
569, 568, 568, 567, 566, 566, 565, 564,
564, 563, 563, 562, 561, 561, 560, 560,
559, 558, 558, 557, 557, 556, 555, 555,
554, 554, 553, 553, 552, 551, 551, 550,
550, 549, 548, 548, 547, 547, 546, 546,
545, 544, 544, 543, 543, 542, 542, 541,
541, 540, 539, 539, 538, 538, 537, 537,
536, 536, 535, 534, 534, 533, 533, 532,
532, 531, 531, 530, 530, 529, 529, 528,
527, 527, 526, 526, 525, 525, 524, 524,
523, 523, 522, 522, 521, 521, 520, 520,
519, 519, 518, 518, 517, 517, 516, 516,
515, 515, 514, 514
};
// Note that LinearToGamma() expects the values to be premultiplied by 4,
// so we incorporate this factor 4 inside the DIVIDE_BY_ALPHA macro directly.
#define DIVIDE_BY_ALPHA(sum, a) (((sum) * kInvAlpha[(a)]) >> (kAlphaFix - 2))
#else
#define DIVIDE_BY_ALPHA(sum, a) (4 * (sum) / (a))
#endif // USE_INVERSE_ALPHA_TABLE
static WEBP_INLINE int LinearToGammaWeighted(const uint8_t* src,
const uint8_t* a_ptr,
uint32_t total_a, int step,
int rgb_stride) {
const uint32_t sum =
a_ptr[0] * GammaToLinear(src[0]) +
a_ptr[step] * GammaToLinear(src[step]) +
a_ptr[rgb_stride] * GammaToLinear(src[rgb_stride]) +
a_ptr[rgb_stride + step] * GammaToLinear(src[rgb_stride + step]);
assert(total_a > 0 && total_a <= 4 * 0xff);
#if defined(USE_INVERSE_ALPHA_TABLE)
assert((uint64_t)sum * kInvAlpha[total_a] < ((uint64_t)1 << 32));
#endif
return LinearToGamma(DIVIDE_BY_ALPHA(sum, total_a), 0);
}
static WEBP_INLINE void ConvertRowToY(const uint8_t* const r_ptr,
const uint8_t* const g_ptr,
const uint8_t* const b_ptr,
@ -593,6 +767,49 @@ static WEBP_INLINE void ConvertRowToY(const uint8_t* const r_ptr,
}
}
static WEBP_INLINE void ConvertRowsToUVWithAlpha(const uint8_t* const r_ptr,
const uint8_t* const g_ptr,
const uint8_t* const b_ptr,
const uint8_t* const a_ptr,
int rgb_stride,
uint8_t* const dst_u,
uint8_t* const dst_v,
int width,
VP8Random* const rg) {
int i, j;
// we loop over 2x2 blocks and produce one U/V value for each.
for (i = 0, j = 0; i < (width >> 1); ++i, j += 2 * sizeof(uint32_t)) {
const uint32_t a = SUM4ALPHA(a_ptr + j);
int r, g, b;
if (a == 4 * 0xff || a == 0) {
r = SUM4(r_ptr + j, 4);
g = SUM4(g_ptr + j, 4);
b = SUM4(b_ptr + j, 4);
} else {
r = LinearToGammaWeighted(r_ptr + j, a_ptr + j, a, 4, rgb_stride);
g = LinearToGammaWeighted(g_ptr + j, a_ptr + j, a, 4, rgb_stride);
b = LinearToGammaWeighted(b_ptr + j, a_ptr + j, a, 4, rgb_stride);
}
dst_u[i] = RGBToU(r, g, b, rg);
dst_v[i] = RGBToV(r, g, b, rg);
}
if (width & 1) {
const uint32_t a = 2u * SUM2ALPHA(a_ptr + j);
int r, g, b;
if (a == 4 * 0xff || a == 0) {
r = SUM2(r_ptr + j);
g = SUM2(g_ptr + j);
b = SUM2(b_ptr + j);
} else {
r = LinearToGammaWeighted(r_ptr + j, a_ptr + j, a, 0, rgb_stride);
g = LinearToGammaWeighted(g_ptr + j, a_ptr + j, a, 0, rgb_stride);
b = LinearToGammaWeighted(b_ptr + j, a_ptr + j, a, 0, rgb_stride);
}
dst_u[i] = RGBToU(r, g, b, rg);
dst_v[i] = RGBToV(r, g, b, rg);
}
}
static WEBP_INLINE void ConvertRowsToUV(const uint8_t* const r_ptr,
const uint8_t* const g_ptr,
const uint8_t* const b_ptr,
@ -603,16 +820,16 @@ static WEBP_INLINE void ConvertRowsToUV(const uint8_t* const r_ptr,
VP8Random* const rg) {
int i, j;
for (i = 0, j = 0; i < (width >> 1); ++i, j += 2 * step) {
const int r = SUM4(r_ptr + j);
const int g = SUM4(g_ptr + j);
const int b = SUM4(b_ptr + j);
const int r = SUM4(r_ptr + j, step);
const int g = SUM4(g_ptr + j, step);
const int b = SUM4(b_ptr + j, step);
dst_u[i] = RGBToU(r, g, b, rg);
dst_v[i] = RGBToV(r, g, b, rg);
}
if (width & 1) {
const int r = SUM2V(r_ptr + j);
const int g = SUM2V(g_ptr + j);
const int b = SUM2V(b_ptr + j);
const int r = SUM2(r_ptr + j);
const int g = SUM2(g_ptr + j);
const int b = SUM2(b_ptr + j);
dst_u[i] = RGBToU(r, g, b, rg);
dst_v[i] = RGBToV(r, g, b, rg);
}
@ -644,16 +861,28 @@ static int ImportYUVAFromRGBA(const uint8_t* const r_ptr,
if (!WebPPictureAllocYUVA(picture, width, height)) {
return 0;
}
if (has_alpha) {
WebPInitAlphaProcessing();
assert(step == 4);
#if defined(USE_INVERSE_ALPHA_TABLE)
assert(kAlphaFix + kGammaFix <= 31);
#endif
}
if (use_iterative_conversion) {
InitGammaTablesF();
if (!PreprocessARGB(r_ptr, g_ptr, b_ptr, step, rgb_stride, picture)) {
return 0;
}
if (has_alpha) {
WebPExtractAlpha(a_ptr, rgb_stride, width, height,
picture->a, picture->a_stride);
}
} else {
uint8_t* dst_y = picture->y;
uint8_t* dst_u = picture->u;
uint8_t* dst_v = picture->v;
uint8_t* dst_a = picture->a;
VP8Random base_rg;
VP8Random* rg = NULL;
@ -666,6 +895,7 @@ static int ImportYUVAFromRGBA(const uint8_t* const r_ptr,
// Downsample Y/U/V planes, two rows at a time
for (y = 0; y < (height >> 1); ++y) {
int rows_have_alpha = has_alpha;
const int off1 = (2 * y + 0) * rgb_stride;
const int off2 = (2 * y + 1) * rgb_stride;
ConvertRowToY(r_ptr + off1, g_ptr + off1, b_ptr + off1, step,
@ -673,28 +903,38 @@ static int ImportYUVAFromRGBA(const uint8_t* const r_ptr,
ConvertRowToY(r_ptr + off2, g_ptr + off2, b_ptr + off2, step,
dst_y + picture->y_stride, width, rg);
dst_y += 2 * picture->y_stride;
ConvertRowsToUV(r_ptr + off1, g_ptr + off1, b_ptr + off1,
step, rgb_stride, dst_u, dst_v, width, rg);
if (has_alpha) {
rows_have_alpha &= !WebPExtractAlpha(a_ptr + off1, rgb_stride,
width, 2,
dst_a, picture->a_stride);
dst_a += 2 * picture->a_stride;
}
if (!rows_have_alpha) {
ConvertRowsToUV(r_ptr + off1, g_ptr + off1, b_ptr + off1,
step, rgb_stride, dst_u, dst_v, width, rg);
} else {
ConvertRowsToUVWithAlpha(r_ptr + off1, g_ptr + off1, b_ptr + off1,
a_ptr + off1, rgb_stride,
dst_u, dst_v, width, rg);
}
dst_u += picture->uv_stride;
dst_v += picture->uv_stride;
}
if (height & 1) { // extra last row
const int off = 2 * y * rgb_stride;
int row_has_alpha = has_alpha;
ConvertRowToY(r_ptr + off, g_ptr + off, b_ptr + off, step,
dst_y, width, rg);
ConvertRowsToUV(r_ptr + off, g_ptr + off, b_ptr + off,
step, 0, dst_u, dst_v, width, rg);
}
}
if (has_alpha) {
assert(step >= 4);
assert(picture->a != NULL);
for (y = 0; y < height; ++y) {
int x;
for (x = 0; x < width; ++x) {
picture->a[x + y * picture->a_stride] =
a_ptr[step * x + y * rgb_stride];
if (row_has_alpha) {
row_has_alpha &= !WebPExtractAlpha(a_ptr + off, 0, width, 1, dst_a, 0);
}
if (!row_has_alpha) {
ConvertRowsToUV(r_ptr + off, g_ptr + off, b_ptr + off,
step, 0, dst_u, dst_v, width, rg);
} else {
ConvertRowsToUVWithAlpha(r_ptr + off, g_ptr + off, b_ptr + off,
a_ptr + off, 0,
dst_u, dst_v, width, rg);
}
}
}
@ -702,10 +942,9 @@ static int ImportYUVAFromRGBA(const uint8_t* const r_ptr,
}
#undef SUM4
#undef SUM2V
#undef SUM2H
#undef SUM1
#undef RGB_TO_UV
#undef SUM2
#undef SUM4ALPHA
#undef SUM2ALPHA
//------------------------------------------------------------------------------
// call for ARGB->YUVA conversion