diff --git a/src/enc/picture_csp.c b/src/enc/picture_csp.c
index 516e4b86..e18f1096 100644
--- a/src/enc/picture_csp.c
+++ b/src/enc/picture_csp.c
@@ -23,6 +23,9 @@
 // Uncomment to disable gamma-compression during RGB->U/V averaging
 #define USE_GAMMA_COMPRESSION
 
+// If defined, use table to compute x / alpha.
+#define USE_INVERSE_ALPHA_TABLE
+
 static const union {
   uint32_t argb;
   uint8_t  bytes[4];
@@ -114,6 +117,7 @@ static WEBP_INLINE int Interpolate(int v) {
   const int v0 = kLinearToGammaTab[tab_pos];
   const int v1 = kLinearToGammaTab[tab_pos + 1];
   const int y = v1 * x + v0 * ((kGammaTabScale << 2) - x);   // interpolate
+  assert(tab_pos + 1 < kGammaTabSize + 1);
   return y;
 }
 
@@ -444,7 +448,6 @@ static int ConvertWRGBToYUV(const fixed_y_t* const best_y,
   return 1;
 }
 
-
 //------------------------------------------------------------------------------
 // Main function
 
@@ -571,15 +574,186 @@ static int PreprocessARGB(const uint8_t* const r_ptr,
 //------------------------------------------------------------------------------
 // "Fast" regular RGB->YUV
 
-#define SUM4(ptr) LinearToGamma(                           \
+#define SUM4(ptr, step) LinearToGamma(                     \
     GammaToLinear((ptr)[0]) +                              \
-    GammaToLinear((ptr)[step]) +                           \
+    GammaToLinear((ptr)[(step)]) +                         \
     GammaToLinear((ptr)[rgb_stride]) +                     \
-    GammaToLinear((ptr)[rgb_stride + step]), 0)            \
+    GammaToLinear((ptr)[rgb_stride + (step)]), 0)          \
 
-#define SUM2V(ptr) \
+#define SUM2(ptr) \
     LinearToGamma(GammaToLinear((ptr)[0]) + GammaToLinear((ptr)[rgb_stride]), 1)
 
+#define SUM2ALPHA(ptr) ((ptr)[0] + (ptr)[rgb_stride])
+#define SUM4ALPHA(ptr) (SUM2ALPHA(ptr) + SUM2ALPHA((ptr) + 4))
+
+#if defined(USE_INVERSE_ALPHA_TABLE)
+
+static const int kAlphaFix = 19;
+// Following table is (1 << kAlphaFix) / a. The (v * kInvAlpha[a]) >> kAlphaFix
+// formula is then equal to v / a in most (99.6%) cases. Note that this table
+// and constant are adjusted very tightly to fit 32b arithmetic.
+// In particular, they use the fact that the operands for 'v / a' are actually
+// derived as v = (a0.p0 + a1.p1 + a2.p2 + a3.p3) and a = a0 + a1 + a2 + a3
+// with ai in [0..255] and pi in [0..1<<kGammaFix). The constraint to avoid
+// overflow is: kGammaFix + kAlphaFix <= 31.
+static const uint32_t kInvAlpha[4 * 0xff + 1] = {
+  0,  /* alpha = 0 */
+  524288, 262144, 174762, 131072, 104857, 87381, 74898, 65536,
+  58254, 52428, 47662, 43690, 40329, 37449, 34952, 32768,
+  30840, 29127, 27594, 26214, 24966, 23831, 22795, 21845,
+  20971, 20164, 19418, 18724, 18078, 17476, 16912, 16384,
+  15887, 15420, 14979, 14563, 14169, 13797, 13443, 13107,
+  12787, 12483, 12192, 11915, 11650, 11397, 11155, 10922,
+  10699, 10485, 10280, 10082, 9892, 9709, 9532, 9362,
+  9198, 9039, 8886, 8738, 8594, 8456, 8322, 8192,
+  8065, 7943, 7825, 7710, 7598, 7489, 7384, 7281,
+  7182, 7084, 6990, 6898, 6808, 6721, 6636, 6553,
+  6472, 6393, 6316, 6241, 6168, 6096, 6026, 5957,
+  5890, 5825, 5761, 5698, 5637, 5577, 5518, 5461,
+  5405, 5349, 5295, 5242, 5190, 5140, 5090, 5041,
+  4993, 4946, 4899, 4854, 4809, 4766, 4723, 4681,
+  4639, 4599, 4559, 4519, 4481, 4443, 4405, 4369,
+  4332, 4297, 4262, 4228, 4194, 4161, 4128, 4096,
+  4064, 4032, 4002, 3971, 3942, 3912, 3883, 3855,
+  3826, 3799, 3771, 3744, 3718, 3692, 3666, 3640,
+  3615, 3591, 3566, 3542, 3518, 3495, 3472, 3449,
+  3426, 3404, 3382, 3360, 3339, 3318, 3297, 3276,
+  3256, 3236, 3216, 3196, 3177, 3158, 3139, 3120,
+  3102, 3084, 3066, 3048, 3030, 3013, 2995, 2978,
+  2962, 2945, 2928, 2912, 2896, 2880, 2864, 2849,
+  2833, 2818, 2803, 2788, 2774, 2759, 2744, 2730,
+  2716, 2702, 2688, 2674, 2661, 2647, 2634, 2621,
+  2608, 2595, 2582, 2570, 2557, 2545, 2532, 2520,
+  2508, 2496, 2484, 2473, 2461, 2449, 2438, 2427,
+  2416, 2404, 2394, 2383, 2372, 2361, 2351, 2340,
+  2330, 2319, 2309, 2299, 2289, 2279, 2269, 2259,
+  2250, 2240, 2231, 2221, 2212, 2202, 2193, 2184,
+  2175, 2166, 2157, 2148, 2139, 2131, 2122, 2114,
+  2105, 2097, 2088, 2080, 2072, 2064, 2056, 2048,
+  2040, 2032, 2024, 2016, 2008, 2001, 1993, 1985,
+  1978, 1971, 1963, 1956, 1949, 1941, 1934, 1927,
+  1920, 1913, 1906, 1899, 1892, 1885, 1879, 1872,
+  1865, 1859, 1852, 1846, 1839, 1833, 1826, 1820,
+  1814, 1807, 1801, 1795, 1789, 1783, 1777, 1771,
+  1765, 1759, 1753, 1747, 1741, 1736, 1730, 1724,
+  1718, 1713, 1707, 1702, 1696, 1691, 1685, 1680,
+  1675, 1669, 1664, 1659, 1653, 1648, 1643, 1638,
+  1633, 1628, 1623, 1618, 1613, 1608, 1603, 1598,
+  1593, 1588, 1583, 1579, 1574, 1569, 1565, 1560,
+  1555, 1551, 1546, 1542, 1537, 1533, 1528, 1524,
+  1519, 1515, 1510, 1506, 1502, 1497, 1493, 1489,
+  1485, 1481, 1476, 1472, 1468, 1464, 1460, 1456,
+  1452, 1448, 1444, 1440, 1436, 1432, 1428, 1424,
+  1420, 1416, 1413, 1409, 1405, 1401, 1398, 1394,
+  1390, 1387, 1383, 1379, 1376, 1372, 1368, 1365,
+  1361, 1358, 1354, 1351, 1347, 1344, 1340, 1337,
+  1334, 1330, 1327, 1323, 1320, 1317, 1314, 1310,
+  1307, 1304, 1300, 1297, 1294, 1291, 1288, 1285,
+  1281, 1278, 1275, 1272, 1269, 1266, 1263, 1260,
+  1257, 1254, 1251, 1248, 1245, 1242, 1239, 1236,
+  1233, 1230, 1227, 1224, 1222, 1219, 1216, 1213,
+  1210, 1208, 1205, 1202, 1199, 1197, 1194, 1191,
+  1188, 1186, 1183, 1180, 1178, 1175, 1172, 1170,
+  1167, 1165, 1162, 1159, 1157, 1154, 1152, 1149,
+  1147, 1144, 1142, 1139, 1137, 1134, 1132, 1129,
+  1127, 1125, 1122, 1120, 1117, 1115, 1113, 1110,
+  1108, 1106, 1103, 1101, 1099, 1096, 1094, 1092,
+  1089, 1087, 1085, 1083, 1081, 1078, 1076, 1074,
+  1072, 1069, 1067, 1065, 1063, 1061, 1059, 1057,
+  1054, 1052, 1050, 1048, 1046, 1044, 1042, 1040,
+  1038, 1036, 1034, 1032, 1030, 1028, 1026, 1024,
+  1022, 1020, 1018, 1016, 1014, 1012, 1010, 1008,
+  1006, 1004, 1002, 1000, 998, 996, 994, 992,
+  991, 989, 987, 985, 983, 981, 979, 978,
+  976, 974, 972, 970, 969, 967, 965, 963,
+  961, 960, 958, 956, 954, 953, 951, 949,
+  948, 946, 944, 942, 941, 939, 937, 936,
+  934, 932, 931, 929, 927, 926, 924, 923,
+  921, 919, 918, 916, 914, 913, 911, 910,
+  908, 907, 905, 903, 902, 900, 899, 897,
+  896, 894, 893, 891, 890, 888, 887, 885,
+  884, 882, 881, 879, 878, 876, 875, 873,
+  872, 870, 869, 868, 866, 865, 863, 862,
+  860, 859, 858, 856, 855, 853, 852, 851,
+  849, 848, 846, 845, 844, 842, 841, 840,
+  838, 837, 836, 834, 833, 832, 830, 829,
+  828, 826, 825, 824, 823, 821, 820, 819,
+  817, 816, 815, 814, 812, 811, 810, 809,
+  807, 806, 805, 804, 802, 801, 800, 799,
+  798, 796, 795, 794, 793, 791, 790, 789,
+  788, 787, 786, 784, 783, 782, 781, 780,
+  779, 777, 776, 775, 774, 773, 772, 771,
+  769, 768, 767, 766, 765, 764, 763, 762,
+  760, 759, 758, 757, 756, 755, 754, 753,
+  752, 751, 750, 748, 747, 746, 745, 744,
+  743, 742, 741, 740, 739, 738, 737, 736,
+  735, 734, 733, 732, 731, 730, 729, 728,
+  727, 726, 725, 724, 723, 722, 721, 720,
+  719, 718, 717, 716, 715, 714, 713, 712,
+  711, 710, 709, 708, 707, 706, 705, 704,
+  703, 702, 701, 700, 699, 699, 698, 697,
+  696, 695, 694, 693, 692, 691, 690, 689,
+  688, 688, 687, 686, 685, 684, 683, 682,
+  681, 680, 680, 679, 678, 677, 676, 675,
+  674, 673, 673, 672, 671, 670, 669, 668,
+  667, 667, 666, 665, 664, 663, 662, 661,
+  661, 660, 659, 658, 657, 657, 656, 655,
+  654, 653, 652, 652, 651, 650, 649, 648,
+  648, 647, 646, 645, 644, 644, 643, 642,
+  641, 640, 640, 639, 638, 637, 637, 636,
+  635, 634, 633, 633, 632, 631, 630, 630,
+  629, 628, 627, 627, 626, 625, 624, 624,
+  623, 622, 621, 621, 620, 619, 618, 618,
+  617, 616, 616, 615, 614, 613, 613, 612,
+  611, 611, 610, 609, 608, 608, 607, 606,
+  606, 605, 604, 604, 603, 602, 601, 601,
+  600, 599, 599, 598, 597, 597, 596, 595,
+  595, 594, 593, 593, 592, 591, 591, 590,
+  589, 589, 588, 587, 587, 586, 585, 585,
+  584, 583, 583, 582, 581, 581, 580, 579,
+  579, 578, 578, 577, 576, 576, 575, 574,
+  574, 573, 572, 572, 571, 571, 570, 569,
+  569, 568, 568, 567, 566, 566, 565, 564,
+  564, 563, 563, 562, 561, 561, 560, 560,
+  559, 558, 558, 557, 557, 556, 555, 555,
+  554, 554, 553, 553, 552, 551, 551, 550,
+  550, 549, 548, 548, 547, 547, 546, 546,
+  545, 544, 544, 543, 543, 542, 542, 541,
+  541, 540, 539, 539, 538, 538, 537, 537,
+  536, 536, 535, 534, 534, 533, 533, 532,
+  532, 531, 531, 530, 530, 529, 529, 528,
+  527, 527, 526, 526, 525, 525, 524, 524,
+  523, 523, 522, 522, 521, 521, 520, 520,
+  519, 519, 518, 518, 517, 517, 516, 516,
+  515, 515, 514, 514
+};
+
+// Note that LinearToGamma() expects the values to be premultiplied by 4,
+// so we incorporate this factor 4 inside the DIVIDE_BY_ALPHA macro directly.
+#define DIVIDE_BY_ALPHA(sum, a)  (((sum) * kInvAlpha[(a)]) >> (kAlphaFix - 2))
+
+#else
+
+#define DIVIDE_BY_ALPHA(sum, a) (4 * (sum) / (a))
+
+#endif  // USE_INVERSE_ALPHA_TABLE
+
+static WEBP_INLINE int LinearToGammaWeighted(const uint8_t* src,
+                                             const uint8_t* a_ptr,
+                                             uint32_t total_a, int step,
+                                             int rgb_stride) {
+  const uint32_t sum =
+      a_ptr[0] * GammaToLinear(src[0]) +
+      a_ptr[step] * GammaToLinear(src[step]) +
+      a_ptr[rgb_stride] * GammaToLinear(src[rgb_stride]) +
+      a_ptr[rgb_stride + step] * GammaToLinear(src[rgb_stride + step]);
+  assert(total_a > 0 && total_a <= 4 * 0xff);
+#if defined(USE_INVERSE_ALPHA_TABLE)
+  assert((uint64_t)sum * kInvAlpha[total_a] < ((uint64_t)1 << 32));
+#endif
+  return LinearToGamma(DIVIDE_BY_ALPHA(sum, total_a), 0);
+}
+
 static WEBP_INLINE void ConvertRowToY(const uint8_t* const r_ptr,
                                       const uint8_t* const g_ptr,
                                       const uint8_t* const b_ptr,
@@ -593,6 +767,49 @@ static WEBP_INLINE void ConvertRowToY(const uint8_t* const r_ptr,
   }
 }
 
+static WEBP_INLINE void ConvertRowsToUVWithAlpha(const uint8_t* const r_ptr,
+                                                 const uint8_t* const g_ptr,
+                                                 const uint8_t* const b_ptr,
+                                                 const uint8_t* const a_ptr,
+                                                 int rgb_stride,
+                                                 uint8_t* const dst_u,
+                                                 uint8_t* const dst_v,
+                                                 int width,
+                                                 VP8Random* const rg) {
+  int i, j;
+  // we loop over 2x2 blocks and produce one U/V value for each.
+  for (i = 0, j = 0; i < (width >> 1); ++i, j += 2 * sizeof(uint32_t)) {
+    const uint32_t a = SUM4ALPHA(a_ptr + j);
+    int r, g, b;
+    if (a == 4 * 0xff || a == 0) {
+      r = SUM4(r_ptr + j, 4);
+      g = SUM4(g_ptr + j, 4);
+      b = SUM4(b_ptr + j, 4);
+    } else {
+      r = LinearToGammaWeighted(r_ptr + j, a_ptr + j, a, 4, rgb_stride);
+      g = LinearToGammaWeighted(g_ptr + j, a_ptr + j, a, 4, rgb_stride);
+      b = LinearToGammaWeighted(b_ptr + j, a_ptr + j, a, 4, rgb_stride);
+    }
+    dst_u[i] = RGBToU(r, g, b, rg);
+    dst_v[i] = RGBToV(r, g, b, rg);
+  }
+  if (width & 1) {
+    const uint32_t a = 2u * SUM2ALPHA(a_ptr + j);
+    int r, g, b;
+    if (a == 4 * 0xff || a == 0) {
+      r = SUM2(r_ptr + j);
+      g = SUM2(g_ptr + j);
+      b = SUM2(b_ptr + j);
+    } else {
+      r = LinearToGammaWeighted(r_ptr + j, a_ptr + j, a, 0, rgb_stride);
+      g = LinearToGammaWeighted(g_ptr + j, a_ptr + j, a, 0, rgb_stride);
+      b = LinearToGammaWeighted(b_ptr + j, a_ptr + j, a, 0, rgb_stride);
+    }
+    dst_u[i] = RGBToU(r, g, b, rg);
+    dst_v[i] = RGBToV(r, g, b, rg);
+  }
+}
+
 static WEBP_INLINE void ConvertRowsToUV(const uint8_t* const r_ptr,
                                         const uint8_t* const g_ptr,
                                         const uint8_t* const b_ptr,
@@ -603,16 +820,16 @@ static WEBP_INLINE void ConvertRowsToUV(const uint8_t* const r_ptr,
                                         VP8Random* const rg) {
   int i, j;
   for (i = 0, j = 0; i < (width >> 1); ++i, j += 2 * step) {
-    const int r = SUM4(r_ptr + j);
-    const int g = SUM4(g_ptr + j);
-    const int b = SUM4(b_ptr + j);
+    const int r = SUM4(r_ptr + j, step);
+    const int g = SUM4(g_ptr + j, step);
+    const int b = SUM4(b_ptr + j, step);
     dst_u[i] = RGBToU(r, g, b, rg);
     dst_v[i] = RGBToV(r, g, b, rg);
   }
   if (width & 1) {
-    const int r = SUM2V(r_ptr + j);
-    const int g = SUM2V(g_ptr + j);
-    const int b = SUM2V(b_ptr + j);
+    const int r = SUM2(r_ptr + j);
+    const int g = SUM2(g_ptr + j);
+    const int b = SUM2(b_ptr + j);
     dst_u[i] = RGBToU(r, g, b, rg);
     dst_v[i] = RGBToV(r, g, b, rg);
   }
@@ -644,16 +861,28 @@ static int ImportYUVAFromRGBA(const uint8_t* const r_ptr,
   if (!WebPPictureAllocYUVA(picture, width, height)) {
     return 0;
   }
+  if (has_alpha) {
+    WebPInitAlphaProcessing();
+    assert(step == 4);
+#if defined(USE_INVERSE_ALPHA_TABLE)
+    assert(kAlphaFix + kGammaFix <= 31);
+#endif
+  }
 
   if (use_iterative_conversion) {
     InitGammaTablesF();
     if (!PreprocessARGB(r_ptr, g_ptr, b_ptr, step, rgb_stride, picture)) {
       return 0;
     }
+    if (has_alpha) {
+      WebPExtractAlpha(a_ptr, rgb_stride, width, height,
+                       picture->a, picture->a_stride);
+    }
   } else {
     uint8_t* dst_y = picture->y;
     uint8_t* dst_u = picture->u;
     uint8_t* dst_v = picture->v;
+    uint8_t* dst_a = picture->a;
 
     VP8Random base_rg;
     VP8Random* rg = NULL;
@@ -666,6 +895,7 @@ static int ImportYUVAFromRGBA(const uint8_t* const r_ptr,
 
     // Downsample Y/U/V planes, two rows at a time
     for (y = 0; y < (height >> 1); ++y) {
+      int rows_have_alpha = has_alpha;
       const int off1 = (2 * y + 0) * rgb_stride;
       const int off2 = (2 * y + 1) * rgb_stride;
       ConvertRowToY(r_ptr + off1, g_ptr + off1, b_ptr + off1, step,
@@ -673,28 +903,38 @@ static int ImportYUVAFromRGBA(const uint8_t* const r_ptr,
       ConvertRowToY(r_ptr + off2, g_ptr + off2, b_ptr + off2, step,
                     dst_y + picture->y_stride, width, rg);
       dst_y += 2 * picture->y_stride;
-      ConvertRowsToUV(r_ptr + off1, g_ptr + off1, b_ptr + off1,
-                      step, rgb_stride, dst_u, dst_v, width, rg);
+      if (has_alpha) {
+        rows_have_alpha &= !WebPExtractAlpha(a_ptr + off1, rgb_stride,
+                                             width, 2,
+                                             dst_a, picture->a_stride);
+        dst_a += 2 * picture->a_stride;
+      }
+      if (!rows_have_alpha) {
+        ConvertRowsToUV(r_ptr + off1, g_ptr + off1, b_ptr + off1,
+                        step, rgb_stride, dst_u, dst_v, width, rg);
+      } else {
+        ConvertRowsToUVWithAlpha(r_ptr + off1, g_ptr + off1, b_ptr + off1,
+                                 a_ptr + off1, rgb_stride,
+                                 dst_u, dst_v, width, rg);
+      }
       dst_u += picture->uv_stride;
       dst_v += picture->uv_stride;
     }
     if (height & 1) {    // extra last row
       const int off = 2 * y * rgb_stride;
+      int row_has_alpha = has_alpha;
       ConvertRowToY(r_ptr + off, g_ptr + off, b_ptr + off, step,
                     dst_y, width, rg);
-      ConvertRowsToUV(r_ptr + off, g_ptr + off, b_ptr + off,
-                      step, 0, dst_u, dst_v, width, rg);
-    }
-  }
-
-  if (has_alpha) {
-    assert(step >= 4);
-    assert(picture->a != NULL);
-    for (y = 0; y < height; ++y) {
-      int x;
-      for (x = 0; x < width; ++x) {
-        picture->a[x + y * picture->a_stride] =
-            a_ptr[step * x + y * rgb_stride];
+      if (row_has_alpha) {
+        row_has_alpha &= !WebPExtractAlpha(a_ptr + off, 0, width, 1, dst_a, 0);
+      }
+      if (!row_has_alpha) {
+        ConvertRowsToUV(r_ptr + off, g_ptr + off, b_ptr + off,
+                        step, 0, dst_u, dst_v, width, rg);
+      } else {
+        ConvertRowsToUVWithAlpha(r_ptr + off, g_ptr + off, b_ptr + off,
+                                 a_ptr + off, 0,
+                                 dst_u, dst_v, width, rg);
       }
     }
   }
@@ -702,10 +942,9 @@ static int ImportYUVAFromRGBA(const uint8_t* const r_ptr,
 }
 
 #undef SUM4
-#undef SUM2V
-#undef SUM2H
-#undef SUM1
-#undef RGB_TO_UV
+#undef SUM2
+#undef SUM4ALPHA
+#undef SUM2ALPHA
 
 //------------------------------------------------------------------------------
 // call for ARGB->YUVA conversion