mirror of
https://github.com/webmproject/libwebp.git
synced 2024-11-20 12:28:26 +01:00
revamp the point-sampling functions by processing a full plane
-nofancy is slower than fancy upsampler, because the latter has SSE2 optim. Change-Id: Ibf22e5a8ea1de86a54248d4a4ecc63d514c01b88
This commit is contained in:
parent
2b5cb32612
commit
a2f8b28905
28
src/dec/io.c
28
src/dec/io.c
@ -45,27 +45,12 @@ static int EmitYUV(const VP8Io* const io, WebPDecParams* const p) {
|
|||||||
|
|
||||||
// Point-sampling U/V sampler.
|
// Point-sampling U/V sampler.
|
||||||
static int EmitSampledRGB(const VP8Io* const io, WebPDecParams* const p) {
|
static int EmitSampledRGB(const VP8Io* const io, WebPDecParams* const p) {
|
||||||
WebPDecBuffer* output = p->output;
|
WebPDecBuffer* const output = p->output;
|
||||||
const WebPRGBABuffer* const buf = &output->u.RGBA;
|
WebPRGBABuffer* const buf = &output->u.RGBA;
|
||||||
uint8_t* dst = buf->rgba + io->mb_y * buf->stride;
|
uint8_t* const dst = buf->rgba + io->mb_y * buf->stride;
|
||||||
const uint8_t* y_src = io->y;
|
WebPSamplers[output->colorspace](io->y, io->y_stride,
|
||||||
const uint8_t* u_src = io->u;
|
io->u, io->v, io->uv_stride,
|
||||||
const uint8_t* v_src = io->v;
|
dst, buf->stride, io->mb_w, io->mb_h);
|
||||||
const WebPSampleLinePairFunc sample = WebPSamplers[output->colorspace];
|
|
||||||
const int mb_w = io->mb_w;
|
|
||||||
const int last = io->mb_h - 1;
|
|
||||||
int j;
|
|
||||||
for (j = 0; j < last; j += 2) {
|
|
||||||
sample(y_src, y_src + io->y_stride, u_src, v_src,
|
|
||||||
dst, dst + buf->stride, mb_w);
|
|
||||||
y_src += 2 * io->y_stride;
|
|
||||||
u_src += io->uv_stride;
|
|
||||||
v_src += io->uv_stride;
|
|
||||||
dst += 2 * buf->stride;
|
|
||||||
}
|
|
||||||
if (j == last) { // Just do the last line twice
|
|
||||||
sample(y_src, y_src, u_src, v_src, dst, dst, mb_w);
|
|
||||||
}
|
|
||||||
return io->mb_h;
|
return io->mb_h;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -637,4 +622,3 @@ void WebPInitCustomIo(WebPDecParams* const params, VP8Io* const io) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
//------------------------------------------------------------------------------
|
//------------------------------------------------------------------------------
|
||||||
|
|
||||||
|
@ -195,13 +195,23 @@ void WebPInitUpsamplersNEON(void);
|
|||||||
#endif // FANCY_UPSAMPLING
|
#endif // FANCY_UPSAMPLING
|
||||||
|
|
||||||
// Point-sampling methods.
|
// Point-sampling methods.
|
||||||
typedef void (*WebPSampleLinePairFunc)(
|
typedef void (*WebPSamplePlaneFunc)(const uint8_t* y, int y_stride,
|
||||||
const uint8_t* top_y, const uint8_t* bottom_y,
|
|
||||||
const uint8_t* u, const uint8_t* v,
|
const uint8_t* u, const uint8_t* v,
|
||||||
uint8_t* top_dst, uint8_t* bottom_dst, int len);
|
int uv_stride,
|
||||||
|
uint8_t* dst, int dst_stride,
|
||||||
|
int width, int height);
|
||||||
|
|
||||||
|
typedef void (*WebPSamplerRowFunc)(const uint8_t* y,
|
||||||
|
const uint8_t* u, const uint8_t* v,
|
||||||
|
uint8_t* dst, int len);
|
||||||
|
// Generic function to apply 'WebPSamplerRowFunc' to the whole plane:
|
||||||
|
void WebPSamplerProcessPlane(const uint8_t* y, int y_stride,
|
||||||
|
const uint8_t* u, const uint8_t* v, int uv_stride,
|
||||||
|
uint8_t* dst, int dst_stride,
|
||||||
|
int width, int height, WebPSamplerRowFunc func);
|
||||||
|
|
||||||
// Sampling functions to convert YUV to RGB(A) modes
|
// Sampling functions to convert YUV to RGB(A) modes
|
||||||
extern WebPSampleLinePairFunc WebPSamplers[/* MODE_LAST */];
|
extern WebPSamplePlaneFunc WebPSamplers[/* MODE_LAST */];
|
||||||
|
|
||||||
// Initializes MIPS version of the samplers.
|
// Initializes MIPS version of the samplers.
|
||||||
void WebPInitSamplersMIPS32(void);
|
void WebPInitSamplersMIPS32(void);
|
||||||
|
@ -109,40 +109,59 @@ UPSAMPLE_FUNC(UpsampleRgb565LinePair, VP8YuvToRgb565, 2)
|
|||||||
//------------------------------------------------------------------------------
|
//------------------------------------------------------------------------------
|
||||||
// simple point-sampling
|
// simple point-sampling
|
||||||
|
|
||||||
|
WebPSamplePlaneFunc WebPSamplers[MODE_LAST];
|
||||||
|
|
||||||
WebPSampleLinePairFunc WebPSamplers[MODE_LAST];
|
void WebPSamplerProcessPlane(const uint8_t* y, int y_stride,
|
||||||
|
const uint8_t* u, const uint8_t* v, int uv_stride,
|
||||||
|
uint8_t* dst, int dst_stride,
|
||||||
|
int width, int height, WebPSamplerRowFunc func) {
|
||||||
|
int j;
|
||||||
|
for (j = 0; j < height; ++j) {
|
||||||
|
func(y, u, v, dst, width);
|
||||||
|
y += y_stride;
|
||||||
|
if (j & 1) {
|
||||||
|
u += uv_stride;
|
||||||
|
v += uv_stride;
|
||||||
|
}
|
||||||
|
dst += dst_stride;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TODO(skal): maybe some of these per-row functions should be in yuv.h?
|
||||||
#define SAMPLE_FUNC(FUNC_NAME, FUNC, XSTEP) \
|
#define SAMPLE_FUNC(FUNC_NAME, FUNC, XSTEP) \
|
||||||
static void FUNC_NAME(const uint8_t* top_y, const uint8_t* bottom_y, \
|
static void FUNC_NAME##Row(const uint8_t* y, \
|
||||||
const uint8_t* u, const uint8_t* v, \
|
const uint8_t* u, const uint8_t* v, \
|
||||||
uint8_t* top_dst, uint8_t* bottom_dst, int len) { \
|
uint8_t* dst, int len) { \
|
||||||
int i; \
|
const uint8_t* const end = dst + (len & ~1) * XSTEP; \
|
||||||
for (i = 0; i < len - 1; i += 2) { \
|
while (dst != end) { \
|
||||||
FUNC(top_y[0], u[0], v[0], top_dst); \
|
FUNC(y[0], u[0], v[0], dst); \
|
||||||
FUNC(top_y[1], u[0], v[0], top_dst + XSTEP); \
|
FUNC(y[1], u[0], v[0], dst + XSTEP); \
|
||||||
FUNC(bottom_y[0], u[0], v[0], bottom_dst); \
|
y += 2; \
|
||||||
FUNC(bottom_y[1], u[0], v[0], bottom_dst + XSTEP); \
|
++u; \
|
||||||
top_y += 2; \
|
++v; \
|
||||||
bottom_y += 2; \
|
dst += 2 * XSTEP; \
|
||||||
u++; \
|
|
||||||
v++; \
|
|
||||||
top_dst += 2 * XSTEP; \
|
|
||||||
bottom_dst += 2 * XSTEP; \
|
|
||||||
} \
|
} \
|
||||||
if (i == len - 1) { /* last one */ \
|
if (len & 1) { \
|
||||||
FUNC(top_y[0], u[0], v[0], top_dst); \
|
FUNC(y[0], u[0], v[0], dst); \
|
||||||
FUNC(bottom_y[0], u[0], v[0], bottom_dst); \
|
|
||||||
} \
|
} \
|
||||||
|
} \
|
||||||
|
static void FUNC_NAME(const uint8_t* y, int y_stride, \
|
||||||
|
const uint8_t* u, const uint8_t* v, int uv_stride, \
|
||||||
|
uint8_t* dst, int dst_stride, \
|
||||||
|
int width, int height) { \
|
||||||
|
WebPSamplerProcessPlane(y, y_stride, u, v, uv_stride, \
|
||||||
|
dst, dst_stride, width, height, \
|
||||||
|
FUNC_NAME##Row); \
|
||||||
}
|
}
|
||||||
|
|
||||||
// All variants implemented.
|
// All variants implemented.
|
||||||
SAMPLE_FUNC(SampleRgbLinePair, VP8YuvToRgb, 3)
|
SAMPLE_FUNC(SampleRgbPlane, VP8YuvToRgb, 3)
|
||||||
SAMPLE_FUNC(SampleBgrLinePair, VP8YuvToBgr, 3)
|
SAMPLE_FUNC(SampleBgrPlane, VP8YuvToBgr, 3)
|
||||||
SAMPLE_FUNC(SampleRgbaLinePair, VP8YuvToRgba, 4)
|
SAMPLE_FUNC(SampleRgbaPlane, VP8YuvToRgba, 4)
|
||||||
SAMPLE_FUNC(SampleBgraLinePair, VP8YuvToBgra, 4)
|
SAMPLE_FUNC(SampleBgraPlane, VP8YuvToBgra, 4)
|
||||||
SAMPLE_FUNC(SampleArgbLinePair, VP8YuvToArgb, 4)
|
SAMPLE_FUNC(SampleArgbPlane, VP8YuvToArgb, 4)
|
||||||
SAMPLE_FUNC(SampleRgba4444LinePair, VP8YuvToRgba4444, 2)
|
SAMPLE_FUNC(SampleRgba4444Plane, VP8YuvToRgba4444, 2)
|
||||||
SAMPLE_FUNC(SampleRgb565LinePair, VP8YuvToRgb565, 2)
|
SAMPLE_FUNC(SampleRgb565Plane, VP8YuvToRgb565, 2)
|
||||||
|
|
||||||
#undef SAMPLE_FUNC
|
#undef SAMPLE_FUNC
|
||||||
|
|
||||||
@ -341,17 +360,17 @@ void WebPInitUpsamplers(void) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
void WebPInitSamplers(void) {
|
void WebPInitSamplers(void) {
|
||||||
WebPSamplers[MODE_RGB] = SampleRgbLinePair;
|
WebPSamplers[MODE_RGB] = SampleRgbPlane;
|
||||||
WebPSamplers[MODE_RGBA] = SampleRgbaLinePair;
|
WebPSamplers[MODE_RGBA] = SampleRgbaPlane;
|
||||||
WebPSamplers[MODE_BGR] = SampleBgrLinePair;
|
WebPSamplers[MODE_BGR] = SampleBgrPlane;
|
||||||
WebPSamplers[MODE_BGRA] = SampleBgraLinePair;
|
WebPSamplers[MODE_BGRA] = SampleBgraPlane;
|
||||||
WebPSamplers[MODE_ARGB] = SampleArgbLinePair;
|
WebPSamplers[MODE_ARGB] = SampleArgbPlane;
|
||||||
WebPSamplers[MODE_RGBA_4444] = SampleRgba4444LinePair;
|
WebPSamplers[MODE_RGBA_4444] = SampleRgba4444Plane;
|
||||||
WebPSamplers[MODE_RGB_565] = SampleRgb565LinePair;
|
WebPSamplers[MODE_RGB_565] = SampleRgb565Plane;
|
||||||
WebPSamplers[MODE_rgbA] = SampleRgbaLinePair;
|
WebPSamplers[MODE_rgbA] = SampleRgbaPlane;
|
||||||
WebPSamplers[MODE_bgrA] = SampleBgraLinePair;
|
WebPSamplers[MODE_bgrA] = SampleBgraPlane;
|
||||||
WebPSamplers[MODE_Argb] = SampleArgbLinePair;
|
WebPSamplers[MODE_Argb] = SampleArgbPlane;
|
||||||
WebPSamplers[MODE_rgbA_4444] = SampleRgba4444LinePair;
|
WebPSamplers[MODE_rgbA_4444] = SampleRgba4444Plane;
|
||||||
|
|
||||||
// If defined, use CPUInfo() to overwrite some pointers with faster versions.
|
// If defined, use CPUInfo() to overwrite some pointers with faster versions.
|
||||||
if (VP8GetCPUInfo != NULL) {
|
if (VP8GetCPUInfo != NULL) {
|
||||||
@ -387,4 +406,3 @@ void WebPInitPremultiply(void) {
|
|||||||
}
|
}
|
||||||
#endif // FANCY_UPSAMPLING
|
#endif // FANCY_UPSAMPLING
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -22,9 +22,9 @@
|
|||||||
// simple point-sampling
|
// simple point-sampling
|
||||||
|
|
||||||
#define SAMPLE_FUNC_MIPS(FUNC_NAME, XSTEP, R, G, B, A) \
|
#define SAMPLE_FUNC_MIPS(FUNC_NAME, XSTEP, R, G, B, A) \
|
||||||
static void FUNC_NAME(const uint8_t* top_y, const uint8_t* bottom_y, \
|
static void FUNC_NAME##Row(const uint8_t* y, \
|
||||||
const uint8_t* u, const uint8_t* v, \
|
const uint8_t* u, const uint8_t* v, \
|
||||||
uint8_t* top_dst, uint8_t* bottom_dst, int len) { \
|
uint8_t* dst, int len) { \
|
||||||
int i, r, g, b; \
|
int i, r, g, b; \
|
||||||
int temp0, temp1, temp2, temp3, temp4; \
|
int temp0, temp1, temp2, temp3, temp4; \
|
||||||
for (i = 0; i < (len >> 1); i++) { \
|
for (i = 0; i < (len >> 1); i++) { \
|
||||||
@ -32,7 +32,7 @@ static void FUNC_NAME(const uint8_t* top_y, const uint8_t* bottom_y, \
|
|||||||
temp3 = kVToG * v[0]; \
|
temp3 = kVToG * v[0]; \
|
||||||
temp2 = kUToG * u[0]; \
|
temp2 = kUToG * u[0]; \
|
||||||
temp4 = kUToB * u[0]; \
|
temp4 = kUToB * u[0]; \
|
||||||
temp0 = kYScale * top_y[0]; \
|
temp0 = kYScale * y[0]; \
|
||||||
temp1 += kRCst; \
|
temp1 += kRCst; \
|
||||||
temp3 -= kGCst; \
|
temp3 -= kGCst; \
|
||||||
temp2 += temp3; \
|
temp2 += temp3; \
|
||||||
@ -40,47 +40,29 @@ static void FUNC_NAME(const uint8_t* top_y, const uint8_t* bottom_y, \
|
|||||||
r = VP8Clip8(temp0 + temp1); \
|
r = VP8Clip8(temp0 + temp1); \
|
||||||
g = VP8Clip8(temp0 - temp2); \
|
g = VP8Clip8(temp0 - temp2); \
|
||||||
b = VP8Clip8(temp0 + temp4); \
|
b = VP8Clip8(temp0 + temp4); \
|
||||||
temp0 = kYScale * top_y[1]; \
|
temp0 = kYScale * y[1]; \
|
||||||
top_dst[R] = r; \
|
dst[R] = r; \
|
||||||
top_dst[G] = g; \
|
dst[G] = g; \
|
||||||
top_dst[B] = b; \
|
dst[B] = b; \
|
||||||
if (A) top_dst[A] = 0xff; \
|
if (A) dst[A] = 0xff; \
|
||||||
r = VP8Clip8(temp0 + temp1); \
|
r = VP8Clip8(temp0 + temp1); \
|
||||||
g = VP8Clip8(temp0 - temp2); \
|
g = VP8Clip8(temp0 - temp2); \
|
||||||
b = VP8Clip8(temp0 + temp4); \
|
b = VP8Clip8(temp0 + temp4); \
|
||||||
temp0 = kYScale * bottom_y[0]; \
|
dst[R + XSTEP] = r; \
|
||||||
top_dst[R + XSTEP] = r; \
|
dst[G + XSTEP] = g; \
|
||||||
top_dst[G + XSTEP] = g; \
|
dst[B + XSTEP] = b; \
|
||||||
top_dst[B + XSTEP] = b; \
|
if (A) dst[A + XSTEP] = 0xff; \
|
||||||
if (A) top_dst[A + XSTEP] = 0xff; \
|
y += 2; \
|
||||||
r = VP8Clip8(temp0 + temp1); \
|
++u; \
|
||||||
g = VP8Clip8(temp0 - temp2); \
|
++v; \
|
||||||
b = VP8Clip8(temp0 + temp4); \
|
dst += 2 * XSTEP; \
|
||||||
temp0 = kYScale * bottom_y[1]; \
|
|
||||||
bottom_dst[R] = r; \
|
|
||||||
bottom_dst[G] = g; \
|
|
||||||
bottom_dst[B] = b; \
|
|
||||||
if (A) bottom_dst[A] = 0xff; \
|
|
||||||
r = VP8Clip8(temp0 + temp1); \
|
|
||||||
g = VP8Clip8(temp0 - temp2); \
|
|
||||||
b = VP8Clip8(temp0 + temp4); \
|
|
||||||
bottom_dst[R + XSTEP] = r; \
|
|
||||||
bottom_dst[G + XSTEP] = g; \
|
|
||||||
bottom_dst[B + XSTEP] = b; \
|
|
||||||
if (A) bottom_dst[A + XSTEP] = 0xff; \
|
|
||||||
top_y += 2; \
|
|
||||||
bottom_y += 2; \
|
|
||||||
u++; \
|
|
||||||
v++; \
|
|
||||||
top_dst += 2 * XSTEP; \
|
|
||||||
bottom_dst += 2 * XSTEP; \
|
|
||||||
} \
|
} \
|
||||||
if (len & 1) { \
|
if (len & 1) { \
|
||||||
temp1 = kVToR * v[0]; \
|
temp1 = kVToR * v[0]; \
|
||||||
temp3 = kVToG * v[0]; \
|
temp3 = kVToG * v[0]; \
|
||||||
temp2 = kUToG * u[0]; \
|
temp2 = kUToG * u[0]; \
|
||||||
temp4 = kUToB * u[0]; \
|
temp4 = kUToB * u[0]; \
|
||||||
temp0 = kYScale * top_y[0]; \
|
temp0 = kYScale * y[0]; \
|
||||||
temp1 += kRCst; \
|
temp1 += kRCst; \
|
||||||
temp3 -= kGCst; \
|
temp3 -= kGCst; \
|
||||||
temp2 += temp3; \
|
temp2 += temp3; \
|
||||||
@ -88,25 +70,25 @@ static void FUNC_NAME(const uint8_t* top_y, const uint8_t* bottom_y, \
|
|||||||
r = VP8Clip8(temp0 + temp1); \
|
r = VP8Clip8(temp0 + temp1); \
|
||||||
g = VP8Clip8(temp0 - temp2); \
|
g = VP8Clip8(temp0 - temp2); \
|
||||||
b = VP8Clip8(temp0 + temp4); \
|
b = VP8Clip8(temp0 + temp4); \
|
||||||
temp0 = kYScale * bottom_y[0]; \
|
dst[R] = r; \
|
||||||
top_dst[R] = r; \
|
dst[G] = g; \
|
||||||
top_dst[G] = g; \
|
dst[B] = b; \
|
||||||
top_dst[B] = b; \
|
if (A) dst[A] = 0xff; \
|
||||||
if (A) top_dst[A] = 0xff; \
|
|
||||||
r = VP8Clip8(temp0 + temp1); \
|
|
||||||
g = VP8Clip8(temp0 - temp2); \
|
|
||||||
b = VP8Clip8(temp0 + temp4); \
|
|
||||||
bottom_dst[R] = r; \
|
|
||||||
bottom_dst[G] = g; \
|
|
||||||
bottom_dst[B] = b; \
|
|
||||||
if (A) bottom_dst[A] = 0xff; \
|
|
||||||
} \
|
} \
|
||||||
|
} \
|
||||||
|
static void FUNC_NAME(const uint8_t* y, int y_stride, \
|
||||||
|
const uint8_t* u, const uint8_t* v, int uv_stride, \
|
||||||
|
uint8_t* dst, int dst_stride, \
|
||||||
|
int width, int height) { \
|
||||||
|
WebPSamplerProcessPlane(y, y_stride, u, v, uv_stride, \
|
||||||
|
dst, dst_stride, width, height, \
|
||||||
|
FUNC_NAME##Row); \
|
||||||
}
|
}
|
||||||
|
|
||||||
SAMPLE_FUNC_MIPS(SampleRgbLinePair, 3, 0, 1, 2, 0)
|
SAMPLE_FUNC_MIPS(SampleRgbPlane, 3, 0, 1, 2, 0)
|
||||||
SAMPLE_FUNC_MIPS(SampleRgbaLinePair, 4, 0, 1, 2, 3)
|
SAMPLE_FUNC_MIPS(SampleRgbaPlane, 4, 0, 1, 2, 3)
|
||||||
SAMPLE_FUNC_MIPS(SampleBgrLinePair, 3, 2, 1, 0, 0)
|
SAMPLE_FUNC_MIPS(SampleBgrPlane, 3, 2, 1, 0, 0)
|
||||||
SAMPLE_FUNC_MIPS(SampleBgraLinePair, 4, 2, 1, 0, 3)
|
SAMPLE_FUNC_MIPS(SampleBgraPlane, 4, 2, 1, 0, 3)
|
||||||
|
|
||||||
#endif // WEBP_USE_MIPS32
|
#endif // WEBP_USE_MIPS32
|
||||||
|
|
||||||
@ -114,9 +96,9 @@ SAMPLE_FUNC_MIPS(SampleBgraLinePair, 4, 2, 1, 0, 3)
|
|||||||
|
|
||||||
void WebPInitSamplersMIPS32(void) {
|
void WebPInitSamplersMIPS32(void) {
|
||||||
#if defined(WEBP_USE_MIPS32)
|
#if defined(WEBP_USE_MIPS32)
|
||||||
WebPSamplers[MODE_RGB] = SampleRgbLinePair;
|
WebPSamplers[MODE_RGB] = SampleRgbPlane;
|
||||||
WebPSamplers[MODE_RGBA] = SampleRgbaLinePair;
|
WebPSamplers[MODE_RGBA] = SampleRgbaPlane;
|
||||||
WebPSamplers[MODE_BGR] = SampleBgrLinePair;
|
WebPSamplers[MODE_BGR] = SampleBgrPlane;
|
||||||
WebPSamplers[MODE_BGRA] = SampleBgraLinePair;
|
WebPSamplers[MODE_BGRA] = SampleBgraPlane;
|
||||||
#endif // WEBP_USE_MIPS32
|
#endif // WEBP_USE_MIPS32
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user