apply clang-format

(Debian clang-format version 19.1.7 (3+build4)) with `--style=Google`.

Manual changes:
* clang-format disabled around macros with stringification (mostly
  assembly)
* some inline assembly strings were adjusted to avoid awkward line
  breaks
* trailing commas, `//` or suffixes (`ull`) added to help array
  formatting
* thread_utils.c: parameter comments were changed to the more common
  /*...=*/ style to improve formatting

The automatically generated code under swig/ was skipped.

Bug: 433996651
Change-Id: Iea3f24160d78d2a2653971cdf13fa932e47ff1b3
This commit is contained in:
clang-format
2025-07-28 18:23:12 -07:00
committed by James Zern
parent b569988d3f
commit 44257cb826
224 changed files with 16312 additions and 16734 deletions

View File

@@ -57,8 +57,8 @@ static WEBP_INLINE int PixelsAreSimilar(uint32_t src, uint32_t dst,
}
static int FramesAreSimilar(const uint8_t* const rgba1,
const uint8_t* const rgba2,
int width, int height, int max_allowed_diff) {
const uint8_t* const rgba2, int width, int height,
int max_allowed_diff) {
int i, j;
assert(max_allowed_diff > 0);
for (j = 0; j < height; ++j) {
@@ -120,8 +120,7 @@ static int CompareBackgroundColor(uint32_t bg1, uint32_t bg2, int premultiply) {
if (alpha1 == 0 && alpha2 == 0) return 1;
}
if (bg1 != bg2) {
fprintf(stderr, "Background color mismatch: 0x%08x vs 0x%08x\n",
bg1, bg2);
fprintf(stderr, "Background color mismatch: 0x%08x vs 0x%08x\n", bg1, bg2);
return 0;
}
return 1;
@@ -131,8 +130,7 @@ static int CompareBackgroundColor(uint32_t bg1, uint32_t bg2, int premultiply) {
// is OK for other aspects like offsets, dispose/blend method to vary.
static int CompareAnimatedImagePair(const AnimatedImage* const img1,
const AnimatedImage* const img2,
int premultiply,
double min_psnr) {
int premultiply, double min_psnr) {
int ok = 1;
const int is_multi_frame_image = (img1->num_frames > 1);
uint32_t i;
@@ -141,8 +139,8 @@ static int CompareAnimatedImagePair(const AnimatedImage* const img1,
"Canvas width mismatch");
ok &= CompareValues(img1->canvas_height, img2->canvas_height,
"Canvas height mismatch");
ok &= CompareValues(img1->num_frames, img2->num_frames,
"Frame count mismatch");
ok &=
CompareValues(img1->num_frames, img2->num_frames, "Frame count mismatch");
if (!ok) return 0; // These are fatal failures, can't proceed.
if (is_multi_frame_image) { // Checks relevant for multi-frame images only.
@@ -178,8 +176,8 @@ static int CompareAnimatedImagePair(const AnimatedImage* const img1,
premultiply, &max_diff, &psnr);
if (min_psnr > 0.) {
if (psnr < min_psnr) {
fprintf(stderr, "Frame #%d, psnr = %.2lf (min_psnr = %f)\n", i,
psnr, min_psnr);
fprintf(stderr, "Frame #%d, psnr = %.2lf (min_psnr = %f)\n", i, psnr,
min_psnr);
ok = 0;
}
} else {
@@ -199,9 +197,10 @@ static void Help(void) {
printf(" -min_psnr <float> ... minimum per-frame PSNR\n");
printf(" -raw_comparison ..... if this flag is not used, RGB is\n");
printf(" premultiplied before comparison\n");
printf(" -max_diff <int> ..... maximum allowed difference per channel\n"
" between corresponding pixels in subsequent\n"
" frames\n");
printf(
" -max_diff <int> ..... maximum allowed difference per channel\n"
" between corresponding pixels in subsequent\n"
" frames\n");
printf(" -h .................. this help\n");
printf(" -version ............ print version number and exit\n");
}
@@ -217,7 +216,7 @@ int main(int argc, const char* argv[]) {
int premultiply = 1;
int max_diff = 0;
int i, c;
const char* files[2] = { NULL, NULL };
const char* files[2] = {NULL, NULL};
AnimatedImage images[2];
INIT_WARGV(argc, argv);
@@ -253,9 +252,8 @@ int main(int argc, const char* argv[]) {
GetAnimatedImageVersions(&dec_version, &demux_version);
printf("WebP Decoder version: %d.%d.%d\nWebP Demux version: %d.%d.%d\n",
(dec_version >> 16) & 0xff, (dec_version >> 8) & 0xff,
(dec_version >> 0) & 0xff,
(demux_version >> 16) & 0xff, (demux_version >> 8) & 0xff,
(demux_version >> 0) & 0xff);
(dec_version >> 0) & 0xff, (demux_version >> 16) & 0xff,
(demux_version >> 8) & 0xff, (demux_version >> 0) & 0xff);
FREE_WARGV_AND_RETURN(0);
} else {
if (!got_input1) {
@@ -278,7 +276,6 @@ int main(int argc, const char* argv[]) {
FREE_WARGV_AND_RETURN(return_code);
}
if (!got_input2) {
Help();
FREE_WARGV_AND_RETURN(return_code);
@@ -301,8 +298,8 @@ int main(int argc, const char* argv[]) {
}
}
if (!CompareAnimatedImagePair(&images[0], &images[1],
premultiply, min_psnr)) {
if (!CompareAnimatedImagePair(&images[0], &images[1], premultiply,
min_psnr)) {
WFPRINTF(stderr, "\nFiles %s and %s differ.\n", (const W_CHAR*)files[0],
(const W_CHAR*)files[1]);
return_code = 1;
@@ -311,7 +308,7 @@ int main(int argc, const char* argv[]) {
(const W_CHAR*)files[1]);
return_code = 0;
}
End:
End:
ClearAnimatedImage(&images[0]);
ClearAnimatedImage(&images[1]);
FREE_WARGV_AND_RETURN(return_code);

View File

@@ -29,8 +29,9 @@ static void Help(void) {
printf("Usage: anim_dump [options] files...\n");
printf("\nOptions:\n");
printf(" -folder <string> .... dump folder (default: '.')\n");
printf(" -prefix <string> .... prefix for dumped frames "
"(default: 'dump_')\n");
printf(
" -prefix <string> .... prefix for dumped frames "
"(default: 'dump_')\n");
printf(" -tiff ............... save frames as TIFF\n");
printf(" -pam ................ save frames as PAM\n");
printf(" -h .................. this help\n");
@@ -82,17 +83,16 @@ int main(int argc, const char* argv[]) {
GetAnimatedImageVersions(&dec_version, &demux_version);
printf("WebP Decoder version: %d.%d.%d\nWebP Demux version: %d.%d.%d\n",
(dec_version >> 16) & 0xff, (dec_version >> 8) & 0xff,
(dec_version >> 0) & 0xff,
(demux_version >> 16) & 0xff, (demux_version >> 8) & 0xff,
(demux_version >> 0) & 0xff);
(dec_version >> 0) & 0xff, (demux_version >> 16) & 0xff,
(demux_version >> 8) & 0xff, (demux_version >> 0) & 0xff);
FREE_WARGV_AND_RETURN(EXIT_SUCCESS);
} else {
uint32_t i;
AnimatedImage image;
const W_CHAR* const file = GET_WARGV(argv, c);
memset(&image, 0, sizeof(image));
WPRINTF("Decoding file: %s as %s/%sxxxx.%s\n",
file, dump_folder, prefix, suffix);
WPRINTF("Decoding file: %s as %s/%sxxxx.%s\n", file, dump_folder, prefix,
suffix);
if (!ReadAnimatedImage((const char*)file, &image, 0, NULL)) {
WFPRINTF(stderr, "Error decoding file: %s\n Aborting.\n", file);
error = 1;
@@ -113,8 +113,8 @@ int main(int argc, const char* argv[]) {
buffer.u.RGBA.rgba = image.frames[i].rgba;
buffer.u.RGBA.stride = buffer.width * sizeof(uint32_t);
buffer.u.RGBA.size = buffer.u.RGBA.stride * buffer.height;
WSNPRINTF(out_file, sizeof(out_file), "%s/%s%.4d.%s",
dump_folder, prefix, i, suffix);
WSNPRINTF(out_file, sizeof(out_file), "%s/%s%.4d.%s", dump_folder,
prefix, i, suffix);
if (!WebPSaveImage(&buffer, format, (const char*)out_file)) {
WFPRINTF(stderr, "Error while saving image '%s'\n", out_file);
error = 1;

View File

@@ -41,11 +41,11 @@ static const int kNumChannels = 4;
#if defined(WEBP_HAVE_GIF)
// Returns true if the frame covers the full canvas.
static int IsFullFrame(int width, int height,
int canvas_width, int canvas_height) {
static int IsFullFrame(int width, int height, int canvas_width,
int canvas_height) {
return (width == canvas_width && height == canvas_height);
}
#endif // WEBP_HAVE_GIF
#endif // WEBP_HAVE_GIF
static int CheckSizeForOverflow(uint64_t size) {
return (size == (size_t)size);
@@ -95,8 +95,8 @@ void ClearAnimatedImage(AnimatedImage* const image) {
#if defined(WEBP_HAVE_GIF)
// Clear the canvas to transparent.
static void ZeroFillCanvas(uint8_t* rgba,
uint32_t canvas_width, uint32_t canvas_height) {
static void ZeroFillCanvas(uint8_t* rgba, uint32_t canvas_width,
uint32_t canvas_height) {
memset(rgba, 0, canvas_width * kNumChannels * canvas_height);
}
@@ -113,16 +113,16 @@ static void ZeroFillFrameRect(uint8_t* rgba, int rgba_stride, int x_offset,
}
// Copy width * height pixels from 'src' to 'dst'.
static void CopyCanvas(const uint8_t* src, uint8_t* dst,
uint32_t width, uint32_t height) {
static void CopyCanvas(const uint8_t* src, uint8_t* dst, uint32_t width,
uint32_t height) {
assert(src != NULL && dst != NULL);
memcpy(dst, src, width * kNumChannels * height);
}
// Copy pixels in the given rectangle from 'src' to 'dst' honoring the 'stride'.
static void CopyFrameRectangle(const uint8_t* src, uint8_t* dst, int stride,
int x_offset, int y_offset,
int width, int height) {
int x_offset, int y_offset, int width,
int height) {
int j;
const int width_in_bytes = width * kNumChannels;
const size_t offset = y_offset * stride + x_offset * kNumChannels;
@@ -135,11 +135,11 @@ static void CopyFrameRectangle(const uint8_t* src, uint8_t* dst, int stride,
dst += stride;
}
}
#endif // WEBP_HAVE_GIF
#endif // WEBP_HAVE_GIF
// Canonicalize all transparent pixels to transparent black to aid comparison.
static void CleanupTransparentPixels(uint32_t* rgba,
uint32_t width, uint32_t height) {
static void CleanupTransparentPixels(uint32_t* rgba, uint32_t width,
uint32_t height) {
const uint32_t* const rgba_end = rgba + width * height;
while (rgba < rgba_end) {
const uint8_t alpha = (*rgba >> 24) & 0xff;
@@ -152,8 +152,8 @@ static void CleanupTransparentPixels(uint32_t* rgba,
// Dump frame to a PAM file. Returns true on success.
static int DumpFrame(const char filename[], const char dump_folder[],
uint32_t frame_num, const uint8_t rgba[],
int canvas_width, int canvas_height) {
uint32_t frame_num, const uint8_t rgba[], int canvas_width,
int canvas_height) {
int ok = 0;
size_t max_len;
int y;
@@ -166,8 +166,8 @@ static int DumpFrame(const char filename[], const char dump_folder[],
base_name = WSTRRCHR(filename, '/');
base_name = (base_name == NULL) ? (const W_CHAR*)filename : base_name + 1;
max_len = WSTRLEN(dump_folder) + 1 + WSTRLEN(base_name)
+ strlen("_frame_") + strlen(".pam") + 8;
max_len = WSTRLEN(dump_folder) + 1 + WSTRLEN(base_name) + strlen("_frame_") +
strlen(".pam") + 8;
file_name = (W_CHAR*)WebPMalloc(max_len * sizeof(*file_name));
if (file_name == NULL) goto End;
@@ -183,7 +183,8 @@ static int DumpFrame(const char filename[], const char dump_folder[],
ok = 0;
goto End;
}
if (fprintf(f, "P7\nWIDTH %d\nHEIGHT %d\n"
if (fprintf(f,
"P7\nWIDTH %d\nHEIGHT %d\n"
"DEPTH 4\nMAXVAL 255\nTUPLTYPE RGB_ALPHA\nENDHDR\n",
canvas_width, canvas_height) < 0) {
WFPRINTF(stderr, "Write error for file %s\n", file_name);
@@ -198,7 +199,7 @@ static int DumpFrame(const char filename[], const char dump_folder[],
row += canvas_width * kNumChannels;
}
ok = 1;
End:
End:
if (f != NULL) fclose(f);
WebPFree(file_name);
return ok;
@@ -266,8 +267,8 @@ static int ReadAnimatedWebP(const char filename[],
image->canvas_width * kNumChannels * image->canvas_height);
// Needed only because we may want to compare with GIF later.
CleanupTransparentPixels((uint32_t*)curr_rgba,
image->canvas_width, image->canvas_height);
CleanupTransparentPixels((uint32_t*)curr_rgba, image->canvas_width,
image->canvas_height);
if (dump_frames && dump_ok) {
dump_ok = DumpFrame(filename, dump_folder, frame_index, curr_rgba,
@@ -283,7 +284,7 @@ static int ReadAnimatedWebP(const char filename[],
ok = dump_ok;
if (ok) image->format = ANIM_WEBP;
End:
End:
WebPAnimDecoderDelete(dec);
return ok;
}
@@ -303,12 +304,11 @@ static int IsGIF(const WebPData* const data) {
// GIFLIB_MAJOR is only defined in libgif >= 4.2.0.
#if defined(GIFLIB_MAJOR) && defined(GIFLIB_MINOR)
# define LOCAL_GIF_VERSION ((GIFLIB_MAJOR << 8) | GIFLIB_MINOR)
# define LOCAL_GIF_PREREQ(maj, min) \
(LOCAL_GIF_VERSION >= (((maj) << 8) | (min)))
#define LOCAL_GIF_VERSION ((GIFLIB_MAJOR << 8) | GIFLIB_MINOR)
#define LOCAL_GIF_PREREQ(maj, min) (LOCAL_GIF_VERSION >= (((maj) << 8) | (min)))
#else
# define LOCAL_GIF_VERSION 0
# define LOCAL_GIF_PREREQ(maj, min) 0
#define LOCAL_GIF_VERSION 0
#define LOCAL_GIF_PREREQ(maj, min) 0
#endif
#if !LOCAL_GIF_PREREQ(5, 0)
@@ -316,14 +316,14 @@ static int IsGIF(const WebPData* const data) {
// Added in v5.0
typedef struct {
int DisposalMode;
#define DISPOSAL_UNSPECIFIED 0 // No disposal specified
#define DISPOSE_DO_NOT 1 // Leave image in place
#define DISPOSE_BACKGROUND 2 // Set area to background color
#define DISPOSE_PREVIOUS 3 // Restore to previous content
int UserInputFlag; // User confirmation required before disposal
int DelayTime; // Pre-display delay in 0.01sec units
int TransparentColor; // Palette index for transparency, -1 if none
#define NO_TRANSPARENT_COLOR -1
#define DISPOSAL_UNSPECIFIED 0 // No disposal specified
#define DISPOSE_DO_NOT 1 // Leave image in place
#define DISPOSE_BACKGROUND 2 // Set area to background color
#define DISPOSE_PREVIOUS 3 // Restore to previous content
int UserInputFlag; // User confirmation required before disposal
int DelayTime; // Pre-display delay in 0.01sec units
int TransparentColor; // Palette index for transparency, -1 if none
#define NO_TRANSPARENT_COLOR -1
} GraphicsControlBlock;
static int DGifExtensionToGCB(const size_t GifExtensionLength,
@@ -357,8 +357,8 @@ static int DGifSavedExtensionToGCB(GifFileType* GifFile, int ImageIndex,
for (i = 0; i < GifFile->SavedImages[ImageIndex].ExtensionBlockCount; i++) {
ExtensionBlock* ep = &GifFile->SavedImages[ImageIndex].ExtensionBlocks[i];
if (ep->Function == GRAPHICS_EXT_FUNC_CODE) {
return DGifExtensionToGCB(
ep->ByteCount, (const GifByteType*)ep->Bytes, gcb);
return DGifExtensionToGCB(ep->ByteCount, (const GifByteType*)ep->Bytes,
gcb);
}
}
return GIF_ERROR;
@@ -377,12 +377,12 @@ static int DGifSavedExtensionToGCB(GifFileType* GifFile, int ImageIndex,
#endif
static int IsKeyFrameGIF(const GifImageDesc* prev_desc, int prev_dispose,
const DecodedFrame* const prev_frame,
int canvas_width, int canvas_height) {
const DecodedFrame* const prev_frame, int canvas_width,
int canvas_height) {
if (prev_frame == NULL) return 1;
if (prev_dispose == DISPOSE_BACKGROUND) {
if (IsFullFrame(prev_desc->Width, prev_desc->Height,
canvas_width, canvas_height)) {
if (IsFullFrame(prev_desc->Width, prev_desc->Height, canvas_width,
canvas_height)) {
return 1;
}
if (prev_frame->is_key_frame) return 1;
@@ -403,14 +403,12 @@ static uint32_t GetBackgroundColorGIF(GifFileType* gif) {
if (transparent_index != NO_TRANSPARENT_COLOR &&
gif->SBackGroundColor == transparent_index) {
return 0x00000000; // Special case: transparent black.
} else if (color_map == NULL || color_map->Colors == NULL
|| gif->SBackGroundColor >= color_map->ColorCount) {
} else if (color_map == NULL || color_map->Colors == NULL ||
gif->SBackGroundColor >= color_map->ColorCount) {
return 0xffffffff; // Invalid: assume white.
} else {
const GifColorType color = color_map->Colors[gif->SBackGroundColor];
return (0xffu << 24) |
(color.Red << 16) |
(color.Green << 8) |
return (0xffu << 24) | (color.Red << 16) | (color.Green << 8) |
(color.Blue << 0);
}
}
@@ -435,11 +433,10 @@ static uint32_t GetLoopCountGIF(const GifFileType* const gif) {
(eb1->ByteCount == 11) &&
(!memcmp(signature, "NETSCAPE2.0", 11) ||
!memcmp(signature, "ANIMEXTS1.0", 11));
if (signature_is_ok &&
eb2->Function == CONTINUE_EXT_FUNC_CODE && eb2->ByteCount >= 3 &&
eb2->Bytes[0] == 1) {
const uint32_t extra_loop = ((uint32_t)(eb2->Bytes[2]) << 8) +
((uint32_t)(eb2->Bytes[1]) << 0);
if (signature_is_ok && eb2->Function == CONTINUE_EXT_FUNC_CODE &&
eb2->ByteCount >= 3 && eb2->Bytes[0] == 1) {
const uint32_t extra_loop =
((uint32_t)(eb2->Bytes[2]) << 8) + ((uint32_t)(eb2->Bytes[1]) << 0);
return (extra_loop > 0) ? extra_loop + 1 : 0;
}
}
@@ -535,8 +532,8 @@ static int ReadAnimatedGIF(const char filename[], AnimatedImage* const image,
image->canvas_height = (uint32_t)gif->SHeight;
if (image->canvas_width > MAX_CANVAS_SIZE ||
image->canvas_height > MAX_CANVAS_SIZE) {
fprintf(stderr, "Invalid canvas dimension: %d x %d\n",
image->canvas_width, image->canvas_height);
fprintf(stderr, "Invalid canvas dimension: %d x %d\n", image->canvas_width,
image->canvas_height);
DGifCloseFile(gif, NULL);
return 0;
}
@@ -611,11 +608,9 @@ static int ReadAnimatedGIF(const char filename[], AnimatedImage* const image,
CopyCanvas(prev_rgba, curr_rgba, canvas_width, canvas_height);
// Dispose previous frame rectangle.
prev_frame_disposed =
(prev_gcb.DisposalMode == DISPOSE_BACKGROUND ||
prev_gcb.DisposalMode == DISPOSE_PREVIOUS);
curr_frame_opaque =
(curr_gcb.TransparentColor == NO_TRANSPARENT_COLOR);
prev_frame_disposed = (prev_gcb.DisposalMode == DISPOSE_BACKGROUND ||
prev_gcb.DisposalMode == DISPOSE_PREVIOUS);
curr_frame_opaque = (curr_gcb.TransparentColor == NO_TRANSPARENT_COLOR);
prev_frame_completely_covered =
curr_frame_opaque &&
CoversFrameGIF(&curr_gif_image->ImageDesc, prev_desc);
@@ -643,9 +638,9 @@ static int ReadAnimatedGIF(const char filename[], AnimatedImage* const image,
uint8_t* const src_frame_rgba =
image->frames[src_frame_num].rgba;
CopyFrameRectangle(src_frame_rgba, curr_rgba,
canvas_width_in_bytes,
prev_desc->Left, prev_desc->Top,
prev_desc->Width, prev_desc->Height);
canvas_width_in_bytes, prev_desc->Left,
prev_desc->Top, prev_desc->Width,
prev_desc->Height);
} else {
// Source canvas doesn't exist. So clear previous frame
// rectangle to background.
@@ -670,8 +665,8 @@ static int ReadAnimatedGIF(const char filename[], AnimatedImage* const image,
}
if (dump_frames) {
if (!DumpFrame(filename, dump_folder, i, curr_rgba,
canvas_width, canvas_height)) {
if (!DumpFrame(filename, dump_folder, i, curr_rgba, canvas_width,
canvas_height)) {
DGifCloseFile(gif, NULL);
return 0;
}
@@ -695,7 +690,8 @@ static int ReadAnimatedGIF(const char filename[], AnimatedImage* const image,
(void)image;
(void)dump_frames;
(void)dump_folder;
fprintf(stderr, "GIF support not compiled. Please install the libgif-dev "
fprintf(stderr,
"GIF support not compiled. Please install the libgif-dev "
"package before building.\n");
return 0;
}
@@ -718,8 +714,8 @@ int ReadAnimatedImage(const char filename[], AnimatedImage* const image,
}
if (IsWebP(&webp_data)) {
ok = ReadAnimatedWebP(filename, &webp_data, image, dump_frames,
dump_folder);
ok =
ReadAnimatedWebP(filename, &webp_data, image, dump_frames, dump_folder);
} else if (IsGIF(&webp_data)) {
ok = ReadAnimatedGIF(filename, image, dump_frames, dump_folder);
} else {
@@ -763,8 +759,7 @@ void GetDiffAndPSNR(const uint8_t rgba1[], const uint8_t rgba2[],
// premultiply R/G/B channels with alpha value
for (k = 0; k < kAlphaChannel; ++k) {
Accumulate(rgba1[offset + k] * alpha1 / 255.,
rgba2[offset + k] * alpha2 / 255.,
&f_max_diff, &sse);
rgba2[offset + k] * alpha2 / 255., &f_max_diff, &sse);
}
}
}

View File

@@ -22,15 +22,12 @@
extern "C" {
#endif
typedef enum {
ANIM_GIF,
ANIM_WEBP
} AnimatedFileFormat;
typedef enum { ANIM_GIF, ANIM_WEBP } AnimatedFileFormat;
typedef struct {
uint8_t* rgba; // Decoded and reconstructed full frame.
int duration; // Frame duration in milliseconds.
int is_key_frame; // True if this frame is a key-frame.
uint8_t* rgba; // Decoded and reconstructed full frame.
int duration; // Frame duration in milliseconds.
int is_key_frame; // True if this frame is a key-frame.
} DecodedFrame;
typedef struct {
@@ -67,7 +64,7 @@ void GetAnimatedImageVersions(int* const decoder_version,
int* const demux_version);
#ifdef __cplusplus
} // extern "C"
} // extern "C"
#endif
#endif // WEBP_EXAMPLES_ANIM_UTIL_H_

View File

@@ -37,10 +37,10 @@
extern "C" {
#endif
extern void* VP8GetCPUInfo; // opaque forward declaration.
extern void* VP8GetCPUInfo; // opaque forward declaration.
#ifdef __cplusplus
} // extern "C"
} // extern "C"
#endif
#endif // WEBP_DLL
@@ -66,12 +66,12 @@ static int ReadYUV(const uint8_t* const data, size_t data_size,
pic->use_argb = 0;
if (!WebPPictureAlloc(pic)) return 0;
ImgIoUtilCopyPlane(data, pic->width, pic->y, pic->y_stride,
pic->width, pic->height);
ImgIoUtilCopyPlane(data + y_plane_size, uv_width,
pic->u, pic->uv_stride, uv_width, uv_height);
ImgIoUtilCopyPlane(data + y_plane_size + uv_plane_size, uv_width,
pic->v, pic->uv_stride, uv_width, uv_height);
ImgIoUtilCopyPlane(data, pic->width, pic->y, pic->y_stride, pic->width,
pic->height);
ImgIoUtilCopyPlane(data + y_plane_size, uv_width, pic->u, pic->uv_stride,
uv_width, uv_height);
ImgIoUtilCopyPlane(data + y_plane_size + uv_plane_size, uv_width, pic->v,
pic->uv_stride, uv_width, uv_height);
return use_argb ? WebPPictureYUVAToARGB(pic) : 1;
}
@@ -119,7 +119,7 @@ static int ReadPicture(const char* const filename, WebPPicture* const pic,
// If image size is specified, infer it as YUV format.
ok = ReadYUV(data, data_size, pic);
}
End:
End:
if (!ok) {
WFPRINTF(stderr, "Error! Could not process file %s\n",
(const W_CHAR*)filename);
@@ -168,8 +168,8 @@ static void PrintValues(const int values[4]) {
static void PrintFullLosslessInfo(const WebPAuxStats* const stats,
const char* const description) {
fprintf(stderr, "Lossless-%s compressed size: %d bytes\n",
description, stats->lossless_size);
fprintf(stderr, "Lossless-%s compressed size: %d bytes\n", description,
stats->lossless_size);
fprintf(stderr, " * Header size: %d bytes, image data size: %d\n",
stats->lossless_hdr_size, stats->lossless_data_size);
if (stats->lossless_features) {
@@ -209,8 +209,7 @@ static void PrintExtraInfoLossless(const WebPPicture* const pic,
}
static void PrintExtraInfoLossy(const WebPPicture* const pic, int short_output,
int full_details,
const char* const file_name) {
int full_details, const char* const file_name) {
const WebPAuxStats* const stats = pic->stats;
if (short_output) {
fprintf(stderr, "%7d %2.2f\n", stats->coded_size, stats->PSNR[3]);
@@ -220,25 +219,25 @@ static void PrintExtraInfoLossy(const WebPPicture* const pic, int short_output,
const int num_skip = stats->block_count[2];
const int total = num_i4 + num_i16;
WFPRINTF(stderr, "File: %s\n", (const W_CHAR*)file_name);
fprintf(stderr, "Dimension: %d x %d%s\n",
pic->width, pic->height,
fprintf(stderr, "Dimension: %d x %d%s\n", pic->width, pic->height,
stats->alpha_data_size ? " (with alpha)" : "");
fprintf(stderr, "Output: "
fprintf(stderr,
"Output: "
"%d bytes Y-U-V-All-PSNR %2.2f %2.2f %2.2f %2.2f dB\n"
" (%.2f bpp)\n",
stats->coded_size,
stats->PSNR[0], stats->PSNR[1], stats->PSNR[2], stats->PSNR[3],
8.f * stats->coded_size / pic->width / pic->height);
stats->coded_size, stats->PSNR[0], stats->PSNR[1], stats->PSNR[2],
stats->PSNR[3], 8.f * stats->coded_size / pic->width / pic->height);
if (total > 0) {
int totals[4] = { 0, 0, 0, 0 };
fprintf(stderr, "block count: intra4: %6d (%.2f%%)\n"
" intra16: %6d (%.2f%%)\n"
" skipped: %6d (%.2f%%)\n",
num_i4, 100.f * num_i4 / total,
num_i16, 100.f * num_i16 / total,
int totals[4] = {0, 0, 0, 0};
fprintf(stderr,
"block count: intra4: %6d (%.2f%%)\n"
" intra16: %6d (%.2f%%)\n"
" skipped: %6d (%.2f%%)\n",
num_i4, 100.f * num_i4 / total, num_i16, 100.f * num_i16 / total,
num_skip, 100.f * num_skip / total);
fprintf(stderr, "bytes used: header: %6d (%.1f%%)\n"
" mode-partition: %6d (%.1f%%)\n",
fprintf(stderr,
"bytes used: header: %6d (%.1f%%)\n"
" mode-partition: %6d (%.1f%%)\n",
stats->header_bytes[0],
100.f * stats->header_bytes[0] / stats->coded_size,
stats->header_bytes[1],
@@ -247,9 +246,10 @@ static void PrintExtraInfoLossy(const WebPPicture* const pic, int short_output,
fprintf(stderr, " transparency: %6d (%.1f dB)\n",
stats->alpha_data_size, stats->PSNR[4]);
}
fprintf(stderr, " Residuals bytes "
"|segment 1|segment 2|segment 3"
"|segment 4| total\n");
fprintf(stderr,
" Residuals bytes "
"|segment 1|segment 2|segment 3"
"|segment 4| total\n");
if (full_details) {
fprintf(stderr, " intra4-coeffs: ");
PrintByteCount(stats->residual_bytes[0], stats->coded_size, totals);
@@ -286,11 +286,11 @@ static void PrintMapInfo(const WebPPicture* const pic) {
for (y = 0; y < mb_h; ++y) {
for (x = 0; x < mb_w; ++x) {
const int c = pic->extra_info[x + y * mb_w];
if (type == 1) { // intra4/intra16
if (type == 1) { // intra4/intra16
fprintf(stderr, "%c", "+."[c]);
} else if (type == 2) { // segments
} else if (type == 2) { // segments
fprintf(stderr, "%c", ".-*X"[c]);
} else if (type == 3) { // quantizers
} else if (type == 3) { // quantizers
fprintf(stderr, "%.2d ", c);
} else if (type == 6 || type == 7) {
fprintf(stderr, "%3d ", c);
@@ -346,7 +346,7 @@ static int DumpPicture(const WebPPicture* const picture, const char* PGM_name) {
}
ok = 1;
Error:
Error:
fclose(f);
return ok;
}
@@ -356,9 +356,9 @@ static int DumpPicture(const WebPPicture* const picture, const char* PGM_name) {
enum {
METADATA_EXIF = (1 << 0),
METADATA_ICC = (1 << 1),
METADATA_XMP = (1 << 2),
METADATA_ALL = METADATA_EXIF | METADATA_ICC | METADATA_XMP
METADATA_ICC = (1 << 1),
METADATA_XMP = (1 << 2),
METADATA_ALL = METADATA_EXIF | METADATA_ICC | METADATA_XMP
};
static const int kChunkHeaderSize = 8;
@@ -411,9 +411,9 @@ static int WriteMetadataChunk(FILE* const out, const char fourcc[4],
// Sets 'flag' in 'vp8x_flags' and updates 'metadata_size' with the size of the
// chunk if there is metadata and 'keep' is true.
static int UpdateFlagsAndSize(const MetadataPayload* const payload,
int keep, int flag,
uint32_t* vp8x_flags, uint64_t* metadata_size) {
static int UpdateFlagsAndSize(const MetadataPayload* const payload, int keep,
int flag, uint32_t* vp8x_flags,
uint64_t* metadata_size) {
if (keep && payload->bytes != NULL && payload->size > 0) {
*vp8x_flags |= flag;
*metadata_size += kChunkHeaderSize + payload->size + (payload->size & 1);
@@ -434,23 +434,23 @@ static int WriteWebPWithMetadata(FILE* const out,
int* const metadata_written) {
const char kVP8XHeader[] = "VP8X\x0a\x00\x00\x00";
const int kAlphaFlag = 0x10;
const int kEXIFFlag = 0x08;
const int kICCPFlag = 0x20;
const int kXMPFlag = 0x04;
const int kEXIFFlag = 0x08;
const int kICCPFlag = 0x20;
const int kXMPFlag = 0x04;
const size_t kRiffHeaderSize = 12;
const size_t kMaxChunkPayload = ~0 - kChunkHeaderSize - 1;
const size_t kMinSize = kRiffHeaderSize + kChunkHeaderSize;
uint32_t flags = 0;
uint64_t metadata_size = 0;
const int write_exif = UpdateFlagsAndSize(&metadata->exif,
!!(keep_metadata & METADATA_EXIF),
kEXIFFlag, &flags, &metadata_size);
const int write_iccp = UpdateFlagsAndSize(&metadata->iccp,
!!(keep_metadata & METADATA_ICC),
kICCPFlag, &flags, &metadata_size);
const int write_xmp = UpdateFlagsAndSize(&metadata->xmp,
!!(keep_metadata & METADATA_XMP),
kXMPFlag, &flags, &metadata_size);
const int write_exif =
UpdateFlagsAndSize(&metadata->exif, !!(keep_metadata & METADATA_EXIF),
kEXIFFlag, &flags, &metadata_size);
const int write_iccp =
UpdateFlagsAndSize(&metadata->iccp, !!(keep_metadata & METADATA_ICC),
kICCPFlag, &flags, &metadata_size);
const int write_xmp =
UpdateFlagsAndSize(&metadata->xmp, !!(keep_metadata & METADATA_XMP),
kXMPFlag, &flags, &metadata_size);
uint8_t* webp = memory_writer->mem;
size_t webp_size = memory_writer->size;
@@ -458,17 +458,18 @@ static int WriteWebPWithMetadata(FILE* const out,
if (webp_size < kMinSize) return 0;
if (webp_size - kChunkHeaderSize + metadata_size > kMaxChunkPayload) {
fprintf(stderr, "Error! Addition of metadata would exceed "
"container size limit.\n");
fprintf(stderr,
"Error! Addition of metadata would exceed "
"container size limit.\n");
return 0;
}
if (metadata_size > 0) {
const int kVP8XChunkSize = 18;
const int has_vp8x = !memcmp(webp + kRiffHeaderSize, "VP8X", kTagSize);
const uint32_t riff_size = (uint32_t)(webp_size - kChunkHeaderSize +
(has_vp8x ? 0 : kVP8XChunkSize) +
metadata_size);
const uint32_t riff_size =
(uint32_t)(webp_size - kChunkHeaderSize +
(has_vp8x ? 0 : kVP8XChunkSize) + metadata_size);
// RIFF
int ok = (fwrite(webp, kTagSize, 1, out) == 1);
// RIFF size (file header size is not recorded)
@@ -527,8 +528,7 @@ enum {
RESIZE_MODE_DEFAULT = RESIZE_MODE_ALWAYS
};
static void ApplyResizeMode(const int resize_mode,
const WebPPicture* const pic,
static void ApplyResizeMode(const int resize_mode, const WebPPicture* const pic,
int* const resize_w, int* const resize_h) {
const int src_w = pic->width;
const int src_h = pic->height;
@@ -536,8 +536,7 @@ static void ApplyResizeMode(const int resize_mode,
const int dst_h = *resize_h;
if (resize_mode == RESIZE_MODE_DOWN_ONLY) {
if ((dst_w == 0 && src_h <= dst_h) ||
(dst_h == 0 && src_w <= dst_w) ||
if ((dst_w == 0 && src_h <= dst_h) || (dst_h == 0 && src_w <= dst_w) ||
(src_w <= dst_w && src_h <= dst_h)) {
*resize_w = *resize_h = 0;
}
@@ -551,8 +550,7 @@ static void ApplyResizeMode(const int resize_mode,
//------------------------------------------------------------------------------
static int ProgressReport(int percent, const WebPPicture* const picture) {
fprintf(stderr, "[%s]: %3d %% \r",
(char*)picture->user_data, percent);
fprintf(stderr, "[%s]: %3d %% \r", (char*)picture->user_data, percent);
return 1; // all ok
}
@@ -569,8 +567,9 @@ static void HelpShort(void) {
static void HelpLong(void) {
printf("Usage:\n");
printf(" cwebp [-preset <...>] [options] in_file [-o out_file]\n\n");
printf("If input size (-s) for an image is not specified, it is\n"
"assumed to be a PNG, JPEG, TIFF or WebP file.\n");
printf(
"If input size (-s) for an image is not specified, it is\n"
"assumed to be a PNG, JPEG, TIFF or WebP file.\n");
printf("Note: Animated PNG and WebP files are not supported.\n");
#ifdef HAVE_WINCODEC_H
printf("Windows builds can take as input any of the files handled by WIC.\n");
@@ -578,46 +577,59 @@ static void HelpLong(void) {
printf("\nOptions:\n");
printf(" -h / -help ............. short help\n");
printf(" -H / -longhelp ......... long help\n");
printf(" -q <float> ............. quality factor (0:small..100:big), "
"default=75\n");
printf(" -alpha_q <int> ......... transparency-compression quality (0..100),"
"\n default=100\n");
printf(
" -q <float> ............. quality factor (0:small..100:big), "
"default=75\n");
printf(
" -alpha_q <int> ......... transparency-compression quality (0..100),"
"\n default=100\n");
printf(" -preset <string> ....... preset setting, one of:\n");
printf(" default, photo, picture,\n");
printf(" drawing, icon, text\n");
printf(" -preset must come first, as it overwrites other parameters\n");
printf(" -z <int> ............... activates lossless preset with given\n"
" level in [0:fast, ..., 9:slowest]\n");
printf(
" -z <int> ............... activates lossless preset with given\n"
" level in [0:fast, ..., 9:slowest]\n");
printf("\n");
printf(" -m <int> ............... compression method (0=fast, 6=slowest), "
"default=4\n");
printf(" -segments <int> ........ number of segments to use (1..4), "
"default=4\n");
printf(
" -m <int> ............... compression method (0=fast, 6=slowest), "
"default=4\n");
printf(
" -segments <int> ........ number of segments to use (1..4), "
"default=4\n");
printf(" -size <int> ............ target size (in bytes)\n");
printf(" -psnr <float> .......... target PSNR (in dB. typically: 42)\n");
printf("\n");
printf(" -s <int> <int> ......... input size (width x height) for YUV\n");
printf(" -sns <int> ............. spatial noise shaping (0:off, 100:max), "
"default=50\n");
printf(" -f <int> ............... filter strength (0=off..100), "
"default=60\n");
printf(" -sharpness <int> ....... "
"filter sharpness (0:most .. 7:least sharp), default=0\n");
printf(" -strong ................ use strong filter instead "
"of simple (default)\n");
printf(
" -sns <int> ............. spatial noise shaping (0:off, 100:max), "
"default=50\n");
printf(
" -f <int> ............... filter strength (0=off..100), "
"default=60\n");
printf(
" -sharpness <int> ....... "
"filter sharpness (0:most .. 7:least sharp), default=0\n");
printf(
" -strong ................ use strong filter instead "
"of simple (default)\n");
printf(" -nostrong .............. use simple filter instead of strong\n");
printf(" -sharp_yuv ............. use sharper (and slower) RGB->YUV "
"conversion\n");
printf(
" -sharp_yuv ............. use sharper (and slower) RGB->YUV "
"conversion\n");
printf(" -partition_limit <int> . limit quality to fit the 512k limit on\n");
printf(" "
"the first partition (0=no degradation ... 100=full)\n");
printf(
" "
"the first partition (0=no degradation ... 100=full)\n");
printf(" -pass <int> ............ analysis pass number (1..10)\n");
printf(" -qrange <min> <max> .... specifies the permissible quality range\n"
" (default: 0 100)\n");
printf(
" -qrange <min> <max> .... specifies the permissible quality range\n"
" (default: 0 100)\n");
printf(" -crop <x> <y> <w> <h> .. crop picture with the given rectangle\n");
printf(" -resize <w> <h> ........ resize picture (*after* any cropping)\n");
printf(" -resize_mode <string> .. one of: up_only, down_only,"
" always (default)\n");
printf(
" -resize_mode <string> .. one of: up_only, down_only,"
" always (default)\n");
printf(" -mt .................... use multi-threading if available\n");
printf(" -low_memory ............ reduce memory usage (slower encoding)\n");
printf(" -map <int> ............. print map of extra info\n");
@@ -625,20 +637,24 @@ static void HelpLong(void) {
printf(" -print_ssim ............ prints averaged SSIM distortion\n");
printf(" -print_lsim ............ prints local-similarity distortion\n");
printf(" -d <file.pgm> .......... dump the compressed output (PGM file)\n");
printf(" -alpha_method <int> .... transparency-compression method (0..1), "
"default=1\n");
printf(
" -alpha_method <int> .... transparency-compression method (0..1), "
"default=1\n");
printf(" -alpha_filter <string> . predictive filtering for alpha plane,\n");
printf(" one of: none, fast (default) or best\n");
printf(" -exact ................. preserve RGB values in transparent area, "
"default=off\n");
printf(" -blend_alpha <hex> ..... blend colors against background color\n"
" expressed as RGB values written in\n"
" hexadecimal, e.g. 0xc0e0d0 for red=0xc0\n"
" green=0xe0 and blue=0xd0\n");
printf(
" -exact ................. preserve RGB values in transparent area, "
"default=off\n");
printf(
" -blend_alpha <hex> ..... blend colors against background color\n"
" expressed as RGB values written in\n"
" hexadecimal, e.g. 0xc0e0d0 for red=0xc0\n"
" green=0xe0 and blue=0xd0\n");
printf(" -noalpha ............... discard any transparency information\n");
printf(" -lossless .............. encode image losslessly, default=off\n");
printf(" -near_lossless <int> ... use near-lossless image preprocessing\n"
" (0..100=off), default=100\n");
printf(
" -near_lossless <int> ... use near-lossless image preprocessing\n"
" (0..100=off), default=100\n");
printf(" -hint <string> ......... specify image characteristics hint,\n");
printf(" one of: photo, picture or graph\n");
@@ -646,8 +662,9 @@ static void HelpLong(void) {
printf(" -metadata <string> ..... comma separated list of metadata to\n");
printf(" ");
printf("copy from the input to the output if present.\n");
printf(" "
"Valid values: all, none (default), exif, icc, xmp\n");
printf(
" "
"Valid values: all, none (default), exif, icc, xmp\n");
printf("\n");
printf(" -short ................. condense printed message\n");
@@ -656,8 +673,9 @@ static void HelpLong(void) {
#ifndef WEBP_DLL
printf(" -noasm ................. disable all assembly optimizations\n");
#endif
printf(" -v ..................... verbose, e.g. print encoding/decoding "
"times\n");
printf(
" -v ..................... verbose, e.g. print encoding/decoding "
"times\n");
printf(" -progress .............. report encoding progress\n");
printf("\n");
printf("Experimental Options:\n");
@@ -672,30 +690,29 @@ static void HelpLong(void) {
// Error messages
static const char* const kErrorMessages[VP8_ENC_ERROR_LAST] = {
"OK",
"OUT_OF_MEMORY: Out of memory allocating objects",
"BITSTREAM_OUT_OF_MEMORY: Out of memory re-allocating byte buffer",
"NULL_PARAMETER: NULL parameter passed to function",
"INVALID_CONFIGURATION: configuration is invalid",
"BAD_DIMENSION: Bad picture dimension. Maximum width and height "
"allowed is 16383 pixels.",
"PARTITION0_OVERFLOW: Partition #0 is too big to fit 512k.\n"
"To reduce the size of this partition, try using less segments "
"with the -segments option, and eventually reduce the number of "
"header bits using -partition_limit. More details are available "
"in the manual (`man cwebp`)",
"PARTITION_OVERFLOW: Partition is too big to fit 16M",
"BAD_WRITE: Picture writer returned an I/O error",
"FILE_TOO_BIG: File would be too big to fit in 4G",
"USER_ABORT: encoding abort requested by user"
};
"OK",
"OUT_OF_MEMORY: Out of memory allocating objects",
"BITSTREAM_OUT_OF_MEMORY: Out of memory re-allocating byte buffer",
"NULL_PARAMETER: NULL parameter passed to function",
"INVALID_CONFIGURATION: configuration is invalid",
"BAD_DIMENSION: Bad picture dimension. Maximum width and height "
"allowed is 16383 pixels.",
"PARTITION0_OVERFLOW: Partition #0 is too big to fit 512k.\n"
"To reduce the size of this partition, try using less segments "
"with the -segments option, and eventually reduce the number of "
"header bits using -partition_limit. More details are available "
"in the manual (`man cwebp`)",
"PARTITION_OVERFLOW: Partition is too big to fit 16M",
"BAD_WRITE: Picture writer returned an I/O error",
"FILE_TOO_BIG: File would be too big to fit in 4G",
"USER_ABORT: encoding abort requested by user"};
//------------------------------------------------------------------------------
// Returns EXIT_SUCCESS on success, EXIT_FAILURE on failure.
int main(int argc, const char* argv[]) {
int return_value = EXIT_FAILURE;
const char* in_file = NULL, *out_file = NULL, *dump_file = NULL;
const char *in_file = NULL, *out_file = NULL, *dump_file = NULL;
FILE* out = NULL;
int c;
int short_output = 0;
@@ -712,8 +729,8 @@ int main(int argc, const char* argv[]) {
int keep_metadata = 0;
int metadata_written = 0;
WebPPicture picture;
int print_distortion = -1; // -1=off, 0=PSNR, 1=SSIM, 2=LSIM
WebPPicture original_picture; // when PSNR or SSIM is requested
int print_distortion = -1; // -1=off, 0=PSNR, 1=SSIM, 2=LSIM
WebPPicture original_picture; // when PSNR or SSIM is requested
WebPConfig config;
WebPAuxStats stats;
WebPMemoryWriter memory_writer;
@@ -725,8 +742,7 @@ int main(int argc, const char* argv[]) {
MetadataInit(&metadata);
WebPMemoryWriterInit(&memory_writer);
if (!WebPPictureInit(&picture) ||
!WebPPictureInit(&original_picture) ||
if (!WebPPictureInit(&picture) || !WebPPictureInit(&original_picture) ||
!WebPConfigInit(&config)) {
fprintf(stderr, "Error! Version mismatch!\n");
FREE_WARGV_AND_RETURN(EXIT_FAILURE);
@@ -765,18 +781,17 @@ int main(int argc, const char* argv[]) {
picture.width = ExUtilGetInt(argv[++c], 0, &parse_error);
picture.height = ExUtilGetInt(argv[++c], 0, &parse_error);
if (picture.width > WEBP_MAX_DIMENSION || picture.width < 0 ||
picture.height > WEBP_MAX_DIMENSION || picture.height < 0) {
fprintf(stderr,
"Specified dimension (%d x %d) is out of range.\n",
picture.height > WEBP_MAX_DIMENSION || picture.height < 0) {
fprintf(stderr, "Specified dimension (%d x %d) is out of range.\n",
picture.width, picture.height);
goto Error;
}
} else if (!strcmp(argv[c], "-m") && c + 1 < argc) {
config.method = ExUtilGetInt(argv[++c], 0, &parse_error);
use_lossless_preset = 0; // disable -z option
use_lossless_preset = 0; // disable -z option
} else if (!strcmp(argv[c], "-q") && c + 1 < argc) {
config.quality = ExUtilGetFloat(argv[++c], &parse_error);
use_lossless_preset = 0; // disable -z option
use_lossless_preset = 0; // disable -z option
} else if (!strcmp(argv[c], "-z") && c + 1 < argc) {
lossless_preset = ExUtilGetInt(argv[++c], 0, &parse_error);
if (use_lossless_preset != 0) use_lossless_preset = 1;
@@ -892,11 +907,10 @@ int main(int argc, const char* argv[]) {
} else if (!strcmp(argv[c], "-version")) {
const int version = WebPGetEncoderVersion();
const int sharpyuv_version = SharpYuvGetVersion();
printf("%d.%d.%d\n",
(version >> 16) & 0xff, (version >> 8) & 0xff, version & 0xff);
printf("libsharpyuv: %d.%d.%d\n",
(sharpyuv_version >> 24) & 0xff, (sharpyuv_version >> 16) & 0xffff,
sharpyuv_version & 0xff);
printf("%d.%d.%d\n", (version >> 16) & 0xff, (version >> 8) & 0xff,
version & 0xff);
printf("libsharpyuv: %d.%d.%d\n", (sharpyuv_version >> 24) & 0xff,
(sharpyuv_version >> 16) & 0xffff, sharpyuv_version & 0xff);
FREE_WARGV_AND_RETURN(EXIT_SUCCESS);
} else if (!strcmp(argv[c], "-progress")) {
show_progress = 1;
@@ -930,11 +944,11 @@ int main(int argc, const char* argv[]) {
const char* option;
int flag;
} kTokens[] = {
{ "all", METADATA_ALL },
{ "none", 0 },
{ "exif", METADATA_EXIF },
{ "icc", METADATA_ICC },
{ "xmp", METADATA_XMP },
{"all", METADATA_ALL}, //
{"none", 0}, //
{"exif", METADATA_EXIF}, //
{"icc", METADATA_ICC}, //
{"xmp", METADATA_XMP}, //
};
const size_t kNumTokens = sizeof(kTokens) / sizeof(kTokens[0]);
const char* start = argv[++c];
@@ -966,8 +980,9 @@ int main(int argc, const char* argv[]) {
#ifdef HAVE_WINCODEC_H
if (keep_metadata != 0 && keep_metadata != METADATA_ICC) {
// TODO(jzern): remove when -metadata is supported on all platforms.
fprintf(stderr, "Warning: only ICC profile extraction is currently"
" supported on this platform!\n");
fprintf(stderr,
"Warning: only ICC profile extraction is currently"
" supported on this platform!\n");
}
#endif
} else if (!strcmp(argv[c], "-v")) {
@@ -1005,12 +1020,14 @@ int main(int argc, const char* argv[]) {
// warning for such options.
if (!quiet && config.lossless == 1) {
if (config.target_size > 0 || config.target_PSNR > 0) {
fprintf(stderr, "Encoding for specified size or PSNR is not supported"
" for lossless encoding. Ignoring such option(s)!\n");
fprintf(stderr,
"Encoding for specified size or PSNR is not supported"
" for lossless encoding. Ignoring such option(s)!\n");
}
if (config.partition_limit > 0) {
fprintf(stderr, "Partition limit option is not required for lossless"
" encoding. Ignoring this option!\n");
fprintf(stderr,
"Partition limit option is not required for lossless"
" encoding. Ignoring this option!\n");
}
}
// If a target size or PSNR was given, but somehow the -pass option was
@@ -1027,9 +1044,9 @@ int main(int argc, const char* argv[]) {
// Read the input. We need to decide if we prefer ARGB or YUVA
// samples, depending on the expected compression mode (this saves
// some conversion steps).
picture.use_argb = (config.lossless || config.use_sharp_yuv ||
config.preprocessing > 0 ||
crop || (resize_w | resize_h) > 0);
picture.use_argb =
(config.lossless || config.use_sharp_yuv || config.preprocessing > 0 ||
crop || (resize_w | resize_h) > 0);
if (verbose) {
StopwatchReset(&stop_watch);
}
@@ -1177,8 +1194,8 @@ int main(int argc, const char* argv[]) {
}
if (!WebPEncode(&config, &picture)) {
fprintf(stderr, "Error! Cannot encode picture as WebP\n");
fprintf(stderr, "Error code: %d (%s)\n",
picture.error_code, kErrorMessages[picture.error_code]);
fprintf(stderr, "Error code: %d (%s)\n", picture.error_code,
kErrorMessages[picture.error_code]);
goto Error;
}
if (verbose) {
@@ -1221,8 +1238,9 @@ int main(int argc, const char* argv[]) {
// Write the YUV planes to a PGM file. Only available for lossy.
if (dump_file) {
if (picture.use_argb) {
fprintf(stderr, "Warning: can't dump file (-d option) "
"in lossless mode.\n");
fprintf(stderr,
"Warning: can't dump file (-d option) "
"in lossless mode.\n");
} else if (!DumpPicture(&picture, dump_file)) {
WFPRINTF(stderr, "Warning, couldn't dump picture %s\n",
(const W_CHAR*)dump_file);
@@ -1267,18 +1285,18 @@ int main(int argc, const char* argv[]) {
if (!short_output && picture.extra_info_type > 0) {
PrintMapInfo(&picture);
}
if (print_distortion >= 0) { // print distortion
static const char* distortion_names[] = { "PSNR", "SSIM", "LSIM" };
if (print_distortion >= 0) { // print distortion
static const char* distortion_names[] = {"PSNR", "SSIM", "LSIM"};
float values[5];
if (!WebPPictureDistortion(&picture, &original_picture,
print_distortion, values)) {
if (!WebPPictureDistortion(&picture, &original_picture, print_distortion,
values)) {
fprintf(stderr, "Error while computing the distortion.\n");
goto Error;
}
if (!short_output) {
fprintf(stderr, "%s: ", distortion_names[print_distortion]);
fprintf(stderr, "B:%.2f G:%.2f R:%.2f A:%.2f Total:%.2f\n",
values[0], values[1], values[2], values[3], values[4]);
fprintf(stderr, "B:%.2f G:%.2f R:%.2f A:%.2f Total:%.2f\n", values[0],
values[1], values[2], values[3], values[4]);
} else {
fprintf(stderr, "%7d %.4f\n", picture.stats->coded_size, values[4]);
}
@@ -1289,7 +1307,7 @@ int main(int argc, const char* argv[]) {
}
return_value = EXIT_SUCCESS;
Error:
Error:
WebPMemoryWriterClear(&memory_writer);
WebPFree(picture.extra_info);
MetadataFree(&metadata);

View File

@@ -35,14 +35,13 @@ static int quiet = 0;
extern "C" {
#endif
extern void* VP8GetCPUInfo; // opaque forward declaration.
extern void* VP8GetCPUInfo; // opaque forward declaration.
#ifdef __cplusplus
} // extern "C"
} // extern "C"
#endif
#endif // WEBP_DLL
static int SaveOutput(const WebPDecBuffer* const buffer,
WebPOutputFileFormat format, const char* const out_file) {
const int use_stdout = (out_file != NULL) && !WSTRCMP(out_file, "-");
@@ -77,43 +76,42 @@ static int SaveOutput(const WebPDecBuffer* const buffer,
}
static void Help(void) {
printf("Usage: dwebp in_file [options] [-o out_file]\n\n"
"Decodes the WebP image file to PNG format [Default].\n"
"Note: Animated WebP files are not supported.\n\n"
"Use following options to convert into alternate image formats:\n"
" -pam ......... save the raw RGBA samples as a color PAM\n"
" -ppm ......... save the raw RGB samples as a color PPM\n"
" -bmp ......... save as uncompressed BMP format\n"
" -tiff ........ save as uncompressed TIFF format\n"
" -pgm ......... save the raw YUV samples as a grayscale PGM\n"
" file with IMC4 layout\n"
" -yuv ......... save the raw YUV samples in flat layout\n"
"\n"
" Other options are:\n"
" -version ..... print version number and exit\n"
" -nofancy ..... don't use the fancy YUV420 upscaler\n"
" -nofilter .... disable in-loop filtering\n"
" -nodither .... disable dithering\n"
" -dither <d> .. dithering strength (in 0..100)\n"
" -alpha_dither use alpha-plane dithering if needed\n"
" -mt .......... use multi-threading\n"
" -crop <x> <y> <w> <h> ... crop output with the given rectangle\n"
" -resize <w> <h> ......... resize output (*after* any cropping)\n"
" -flip ........ flip the output vertically\n"
" -alpha ....... only save the alpha plane\n"
" -incremental . use incremental decoding (useful for tests)\n"
" -h ........... this help message\n"
" -v ........... verbose (e.g. print encoding/decoding times)\n"
" -quiet ....... quiet mode, don't print anything\n"
printf(
"Usage: dwebp in_file [options] [-o out_file]\n\n"
"Decodes the WebP image file to PNG format [Default].\n"
"Note: Animated WebP files are not supported.\n\n"
"Use following options to convert into alternate image formats:\n"
" -pam ......... save the raw RGBA samples as a color PAM\n"
" -ppm ......... save the raw RGB samples as a color PPM\n"
" -bmp ......... save as uncompressed BMP format\n"
" -tiff ........ save as uncompressed TIFF format\n"
" -pgm ......... save the raw YUV samples as a grayscale PGM\n"
" file with IMC4 layout\n"
" -yuv ......... save the raw YUV samples in flat layout\n"
"\n"
" Other options are:\n"
" -version ..... print version number and exit\n"
" -nofancy ..... don't use the fancy YUV420 upscaler\n"
" -nofilter .... disable in-loop filtering\n"
" -nodither .... disable dithering\n"
" -dither <d> .. dithering strength (in 0..100)\n"
" -alpha_dither use alpha-plane dithering if needed\n"
" -mt .......... use multi-threading\n"
" -crop <x> <y> <w> <h> ... crop output with the given rectangle\n"
" -resize <w> <h> ......... resize output (*after* any cropping)\n"
" -flip ........ flip the output vertically\n"
" -alpha ....... only save the alpha plane\n"
" -incremental . use incremental decoding (useful for tests)\n"
" -h ........... this help message\n"
" -v ........... verbose (e.g. print encoding/decoding times)\n"
" -quiet ....... quiet mode, don't print anything\n"
#ifndef WEBP_DLL
" -noasm ....... disable all assembly optimizations\n"
" -noasm ....... disable all assembly optimizations\n"
#endif
);
);
}
static const char* const kFormatType[] = {
"unspecified", "lossy", "lossless"
};
static const char* const kFormatType[] = {"unspecified", "lossy", "lossless"};
static uint8_t* AllocateExternalBuffer(WebPDecoderConfig* config,
WebPOutputFileFormat format,
@@ -130,23 +128,23 @@ static uint8_t* AllocateExternalBuffer(WebPDecoderConfig* config,
h = config->options.crop_height;
}
if (format >= RGB && format <= rgbA_4444) {
const int bpp = (format == RGB || format == BGR) ? 3
: (format == RGBA_4444 || format == rgbA_4444 ||
format == RGB_565) ? 2
: 4;
uint32_t stride = bpp * w + 7; // <- just for exercising
const int bpp =
(format == RGB || format == BGR) ? 3
: (format == RGBA_4444 || format == rgbA_4444 || format == RGB_565) ? 2
: 4;
uint32_t stride = bpp * w + 7; // <- just for exercising
external_buffer = (uint8_t*)WebPMalloc(stride * h);
if (external_buffer == NULL) return NULL;
output_buffer->u.RGBA.stride = stride;
output_buffer->u.RGBA.size = stride * h;
output_buffer->u.RGBA.rgba = external_buffer;
} else { // YUV and YUVA
} else { // YUV and YUVA
const int has_alpha = WebPIsAlphaMode(output_buffer->colorspace);
uint8_t* tmp;
uint32_t stride = w + 3;
uint32_t uv_stride = (w + 1) / 2 + 13;
uint32_t total_size = stride * h * (has_alpha ? 2 : 1)
+ 2 * uv_stride * (h + 1) / 2;
uint32_t total_size =
stride * h * (has_alpha ? 2 : 1) + 2 * uv_stride * (h + 1) / 2;
assert(format >= YUV && format <= YUVA);
external_buffer = (uint8_t*)WebPMalloc(total_size);
if (external_buffer == NULL) return NULL;
@@ -228,8 +226,8 @@ int main(int argc, const char* argv[]) {
quiet = 1;
} else if (!strcmp(argv[c], "-version")) {
const int version = WebPGetDecoderVersion();
printf("%d.%d.%d\n",
(version >> 16) & 0xff, (version >> 8) & 0xff, version & 0xff);
printf("%d.%d.%d\n", (version >> 16) & 0xff, (version >> 8) & 0xff,
version & 0xff);
FREE_WARGV_AND_RETURN(EXIT_SUCCESS);
} else if (!strcmp(argv[c], "-pgm")) {
format = PGM;
@@ -237,19 +235,32 @@ int main(int argc, const char* argv[]) {
format = RAW_YUV;
} else if (!strcmp(argv[c], "-pixel_format") && c < argc - 1) {
const char* const fmt = argv[++c];
if (!strcmp(fmt, "RGB")) format = RGB;
else if (!strcmp(fmt, "RGBA")) format = RGBA;
else if (!strcmp(fmt, "BGR")) format = BGR;
else if (!strcmp(fmt, "BGRA")) format = BGRA;
else if (!strcmp(fmt, "ARGB")) format = ARGB;
else if (!strcmp(fmt, "RGBA_4444")) format = RGBA_4444;
else if (!strcmp(fmt, "RGB_565")) format = RGB_565;
else if (!strcmp(fmt, "rgbA")) format = rgbA;
else if (!strcmp(fmt, "bgrA")) format = bgrA;
else if (!strcmp(fmt, "Argb")) format = Argb;
else if (!strcmp(fmt, "rgbA_4444")) format = rgbA_4444;
else if (!strcmp(fmt, "YUV")) format = YUV;
else if (!strcmp(fmt, "YUVA")) format = YUVA;
if (!strcmp(fmt, "RGB"))
format = RGB;
else if (!strcmp(fmt, "RGBA"))
format = RGBA;
else if (!strcmp(fmt, "BGR"))
format = BGR;
else if (!strcmp(fmt, "BGRA"))
format = BGRA;
else if (!strcmp(fmt, "ARGB"))
format = ARGB;
else if (!strcmp(fmt, "RGBA_4444"))
format = RGBA_4444;
else if (!strcmp(fmt, "RGB_565"))
format = RGB_565;
else if (!strcmp(fmt, "rgbA"))
format = rgbA;
else if (!strcmp(fmt, "bgrA"))
format = bgrA;
else if (!strcmp(fmt, "Argb"))
format = Argb;
else if (!strcmp(fmt, "rgbA_4444"))
format = rgbA_4444;
else if (!strcmp(fmt, "YUV"))
format = YUV;
else if (!strcmp(fmt, "YUVA"))
format = YUVA;
else {
fprintf(stderr, "Can't parse pixel_format %s\n", fmt);
parse_error = 1;
@@ -271,14 +282,14 @@ int main(int argc, const char* argv[]) {
ExUtilGetInt(argv[++c], 0, &parse_error);
} else if (!strcmp(argv[c], "-crop") && c < argc - 4) {
config.options.use_cropping = 1;
config.options.crop_left = ExUtilGetInt(argv[++c], 0, &parse_error);
config.options.crop_top = ExUtilGetInt(argv[++c], 0, &parse_error);
config.options.crop_width = ExUtilGetInt(argv[++c], 0, &parse_error);
config.options.crop_left = ExUtilGetInt(argv[++c], 0, &parse_error);
config.options.crop_top = ExUtilGetInt(argv[++c], 0, &parse_error);
config.options.crop_width = ExUtilGetInt(argv[++c], 0, &parse_error);
config.options.crop_height = ExUtilGetInt(argv[++c], 0, &parse_error);
} else if ((!strcmp(argv[c], "-scale") || !strcmp(argv[c], "-resize")) &&
c < argc - 2) { // '-scale' is left for compatibility
config.options.use_scaling = 1;
config.options.scaled_width = ExUtilGetInt(argv[++c], 0, &parse_error);
config.options.scaled_width = ExUtilGetInt(argv[++c], 0, &parse_error);
config.options.scaled_height = ExUtilGetInt(argv[++c], 0, &parse_error);
} else if (!strcmp(argv[c], "-flip")) {
config.options.flip = 1;
@@ -350,25 +361,52 @@ int main(int argc, const char* argv[]) {
output_buffer->colorspace = MODE_YUVA;
break;
// forced modes:
case RGB: output_buffer->colorspace = MODE_RGB; break;
case RGBA: output_buffer->colorspace = MODE_RGBA; break;
case BGR: output_buffer->colorspace = MODE_BGR; break;
case BGRA: output_buffer->colorspace = MODE_BGRA; break;
case ARGB: output_buffer->colorspace = MODE_ARGB; break;
case RGBA_4444: output_buffer->colorspace = MODE_RGBA_4444; break;
case RGB_565: output_buffer->colorspace = MODE_RGB_565; break;
case rgbA: output_buffer->colorspace = MODE_rgbA; break;
case bgrA: output_buffer->colorspace = MODE_bgrA; break;
case Argb: output_buffer->colorspace = MODE_Argb; break;
case rgbA_4444: output_buffer->colorspace = MODE_rgbA_4444; break;
case YUV: output_buffer->colorspace = MODE_YUV; break;
case YUVA: output_buffer->colorspace = MODE_YUVA; break;
default: goto Exit;
case RGB:
output_buffer->colorspace = MODE_RGB;
break;
case RGBA:
output_buffer->colorspace = MODE_RGBA;
break;
case BGR:
output_buffer->colorspace = MODE_BGR;
break;
case BGRA:
output_buffer->colorspace = MODE_BGRA;
break;
case ARGB:
output_buffer->colorspace = MODE_ARGB;
break;
case RGBA_4444:
output_buffer->colorspace = MODE_RGBA_4444;
break;
case RGB_565:
output_buffer->colorspace = MODE_RGB_565;
break;
case rgbA:
output_buffer->colorspace = MODE_rgbA;
break;
case bgrA:
output_buffer->colorspace = MODE_bgrA;
break;
case Argb:
output_buffer->colorspace = MODE_Argb;
break;
case rgbA_4444:
output_buffer->colorspace = MODE_rgbA_4444;
break;
case YUV:
output_buffer->colorspace = MODE_YUV;
break;
case YUVA:
output_buffer->colorspace = MODE_YUVA;
break;
default:
goto Exit;
}
if (use_external_memory > 0 && format >= RGB) {
external_buffer = AllocateExternalBuffer(&config, format,
use_external_memory);
external_buffer =
AllocateExternalBuffer(&config, format, use_external_memory);
if (external_buffer == NULL) goto Exit;
}
@@ -410,11 +448,12 @@ int main(int argc, const char* argv[]) {
output_buffer->width, output_buffer->height,
bitstream->has_alpha ? " (with alpha)" : "",
kFormatType[bitstream->format]);
fprintf(stderr, "Nothing written; "
"use -o flag to save the result as e.g. PNG.\n");
fprintf(stderr,
"Nothing written; "
"use -o flag to save the result as e.g. PNG.\n");
}
}
Exit:
Exit:
WebPFreeDecBuffer(output_buffer);
WebPFree((void*)external_buffer);
WebPFree((void*)data);

View File

@@ -46,7 +46,7 @@ int ExUtilGetInts(const char* v, int base, int max_output, int output[]) {
if (error) return -1;
output[n] = value;
v = strchr(v, ',');
if (v != NULL) ++v; // skip over the trailing ','
if (v != NULL) ++v; // skip over the trailing ','
}
return n;
}
@@ -110,8 +110,7 @@ int ExUtilInitCommandLineArguments(int argc, const char* argv[],
}
argc = 0;
for (cur = strtok((char*)args->argv_data.bytes, sep);
cur != NULL;
for (cur = strtok((char*)args->argv_data.bytes, sep); cur != NULL;
cur = strtok(NULL, sep)) {
if (argc == MAX_ARGC) {
fprintf(stderr, "ERROR: Arguments limit %d reached\n", MAX_ARGC);

View File

@@ -13,8 +13,8 @@
#ifndef WEBP_EXAMPLES_EXAMPLE_UTIL_H_
#define WEBP_EXAMPLES_EXAMPLE_UTIL_H_
#include "webp/types.h"
#include "webp/mux_types.h"
#include "webp/types.h"
#ifdef __cplusplus
extern "C" {
@@ -64,7 +64,7 @@ int ExUtilInitCommandLineArguments(int argc, const char* argv[],
void ExUtilDeleteCommandLineArguments(CommandLineArguments* const args);
#ifdef __cplusplus
} // extern "C"
} // extern "C"
#endif
#endif // WEBP_EXAMPLES_EXAMPLE_UTIL_H_

View File

@@ -28,14 +28,15 @@
#endif
#include <gif_lib.h>
#include "sharpyuv/sharpyuv.h"
#include "webp/encode.h"
#include "webp/mux.h"
#include "../examples/example_util.h"
#include "../imageio/imageio_util.h"
#include "./gifdec.h"
#include "./unicode.h"
#include "./unicode_gif.h"
#include "sharpyuv/sharpyuv.h"
#include "webp/encode.h"
#include "webp/mux.h"
#if !defined(STDIN_FILENO)
#define STDIN_FILENO 0
@@ -46,9 +47,8 @@
static int transparent_index = GIF_INDEX_INVALID; // Opaque by default.
static const char* const kErrorMessages[-WEBP_MUX_NOT_ENOUGH_DATA + 1] = {
"WEBP_MUX_NOT_FOUND", "WEBP_MUX_INVALID_ARGUMENT", "WEBP_MUX_BAD_DATA",
"WEBP_MUX_MEMORY_ERROR", "WEBP_MUX_NOT_ENOUGH_DATA"
};
"WEBP_MUX_NOT_FOUND", "WEBP_MUX_INVALID_ARGUMENT", "WEBP_MUX_BAD_DATA",
"WEBP_MUX_MEMORY_ERROR", "WEBP_MUX_NOT_ENOUGH_DATA"};
static const char* ErrorString(WebPMuxError err) {
assert(err <= WEBP_MUX_NOT_FOUND && err >= WEBP_MUX_NOT_ENOUGH_DATA);
@@ -56,9 +56,9 @@ static const char* ErrorString(WebPMuxError err) {
}
enum {
METADATA_ICC = (1 << 0),
METADATA_XMP = (1 << 1),
METADATA_ALL = METADATA_ICC | METADATA_XMP
METADATA_ICC = (1 << 0),
METADATA_XMP = (1 << 1),
METADATA_ALL = METADATA_ICC | METADATA_XMP
};
//------------------------------------------------------------------------------
@@ -69,20 +69,25 @@ static void Help(void) {
printf("Options:\n");
printf(" -h / -help ............. this help\n");
printf(" -lossy ................. encode image using lossy compression\n");
printf(" -mixed ................. for each frame in the image, pick lossy\n"
" or lossless compression heuristically\n");
printf(" -near_lossless <int> ... use near-lossless image preprocessing\n"
" (0..100=off), default=100\n");
printf(" -sharp_yuv ............. use sharper (and slower) RGB->YUV "
"conversion\n"
" (lossy only)\n");
printf(
" -mixed ................. for each frame in the image, pick lossy\n"
" or lossless compression heuristically\n");
printf(
" -near_lossless <int> ... use near-lossless image preprocessing\n"
" (0..100=off), default=100\n");
printf(
" -sharp_yuv ............. use sharper (and slower) RGB->YUV "
"conversion\n"
" (lossy only)\n");
printf(" -q <float> ............. quality factor (0:small..100:big)\n");
printf(" -m <int> ............... compression method (0=fast, 6=slowest), "
"default=4\n");
printf(" -min_size .............. minimize output size (default:off)\n"
" lossless compression by default; can be\n"
" combined with -q, -m, -lossy or -mixed\n"
" options\n");
printf(
" -m <int> ............... compression method (0=fast, 6=slowest), "
"default=4\n");
printf(
" -min_size .............. minimize output size (default:off)\n"
" lossless compression by default; can be\n"
" combined with -q, -m, -lossy or -mixed\n"
" options\n");
printf(" -kmin <int> ............ min distance between key frames\n");
printf(" -kmax <int> ............ max distance between key frames\n");
printf(" -f <int> ............... filter strength (0=off..100)\n");
@@ -109,21 +114,21 @@ int main(int argc, const char* argv[]) {
int gif_error = GIF_ERROR;
WebPMuxError err = WEBP_MUX_OK;
int ok = 0;
const W_CHAR* in_file = NULL, *out_file = NULL;
const W_CHAR *in_file = NULL, *out_file = NULL;
GifFileType* gif = NULL;
int frame_duration = 0;
int frame_timestamp = 0;
GIFDisposeMethod orig_dispose = GIF_DISPOSE_NONE;
WebPPicture frame; // Frame rectangle only (not disposed).
WebPPicture curr_canvas; // Not disposed.
WebPPicture prev_canvas; // Disposed.
WebPPicture frame; // Frame rectangle only (not disposed).
WebPPicture curr_canvas; // Not disposed.
WebPPicture prev_canvas; // Disposed.
WebPAnimEncoder* enc = NULL;
WebPAnimEncoderOptions enc_options;
WebPConfig config;
int frame_number = 0; // Whether we are processing the first frame.
int frame_number = 0; // Whether we are processing the first frame.
int done;
int c;
int quiet = 0;
@@ -131,7 +136,7 @@ int main(int argc, const char* argv[]) {
int keep_metadata = METADATA_XMP; // ICC not output by default.
WebPData icc_data;
int stored_icc = 0; // Whether we have already stored an ICC profile.
int stored_icc = 0; // Whether we have already stored an ICC profile.
WebPData xmp_data;
int stored_xmp = 0; // Whether we have already stored an XMP profile.
int loop_count = 0; // default: infinite
@@ -198,10 +203,10 @@ int main(int argc, const char* argv[]) {
const char* option;
int flag;
} kTokens[] = {
{ "all", METADATA_ALL },
{ "none", 0 },
{ "icc", METADATA_ICC },
{ "xmp", METADATA_XMP },
{"all", METADATA_ALL},
{"none", 0},
{"icc", METADATA_ICC},
{"xmp", METADATA_XMP},
};
const size_t kNumTokens = sizeof(kTokens) / sizeof(*kTokens);
const char* start = argv[++c];
@@ -319,8 +324,8 @@ int main(int argc, const char* argv[]) {
goto End;
}
if (verbose) {
printf("Fixed canvas screen dimension to: %d x %d\n",
gif->SWidth, gif->SHeight);
printf("Fixed canvas screen dimension to: %d x %d\n", gif->SWidth,
gif->SHeight);
}
}
// Allocate current buffer.
@@ -418,7 +423,7 @@ int main(int argc, const char* argv[]) {
break;
}
case APPLICATION_EXT_FUNC_CODE: {
if (data[0] != 11) break; // Chunk is too short
if (data[0] != 11) break; // Chunk is too short
if (!memcmp(data + 1, "NETSCAPE2.0", 11) ||
!memcmp(data + 1, "ANIMEXTS1.0", 11)) {
if (!GIFReadLoopCount(gif, &data, &loop_count)) {
@@ -529,7 +534,7 @@ int main(int argc, const char* argv[]) {
}
}
if (stored_icc) { // Add ICCP chunk.
if (stored_icc) { // Add ICCP chunk.
err = WebPMuxSetChunk(mux, "ICCP", &icc_data, 1);
if (verbose) {
fprintf(stderr, "ICC size: %d\n", (int)icc_data.size);
@@ -541,7 +546,7 @@ int main(int argc, const char* argv[]) {
}
}
if (stored_xmp) { // Add XMP chunk.
if (stored_xmp) { // Add XMP chunk.
err = WebPMuxSetChunk(mux, "XMP ", &xmp_data, 1);
if (verbose) {
fprintf(stderr, "XMP size: %d\n", (int)xmp_data.size);
@@ -555,8 +560,10 @@ int main(int argc, const char* argv[]) {
err = WebPMuxAssemble(mux, &webp_data);
if (err != WEBP_MUX_OK) {
fprintf(stderr, "ERROR (%s): Could not assemble when re-muxing to add "
"loop count/metadata.\n", ErrorString(err));
fprintf(stderr,
"ERROR (%s): Could not assemble when re-muxing to add "
"loop count/metadata.\n",
ErrorString(err));
goto End;
}
}
@@ -569,8 +576,7 @@ int main(int argc, const char* argv[]) {
}
if (!quiet) {
if (!WSTRCMP(out_file, "-")) {
fprintf(stderr, "Saved %d bytes to STDIO\n",
(int)webp_data.size);
fprintf(stderr, "Saved %d bytes to STDIO\n", (int)webp_data.size);
} else {
WFPRINTF(stderr, "Saved output file (%d bytes): %s\n",
(int)webp_data.size, out_file);
@@ -578,8 +584,10 @@ int main(int argc, const char* argv[]) {
}
} else {
if (!quiet) {
fprintf(stderr, "Nothing written; use -o flag to save the result "
"(%d bytes).\n", (int)webp_data.size);
fprintf(stderr,
"Nothing written; use -o flag to save the result "
"(%d bytes).\n",
(int)webp_data.size);
}
}
@@ -587,7 +595,7 @@ int main(int argc, const char* argv[]) {
ok = 1;
gif_error = GIF_OK;
End:
End:
WebPDataClear(&icc_data);
WebPDataClear(&xmp_data);
WebPMuxDelete(mux);
@@ -601,7 +609,7 @@ int main(int argc, const char* argv[]) {
GIFDisplayError(gif, gif_error);
}
if (gif != NULL) {
#if LOCAL_GIF_PREREQ(5,1)
#if LOCAL_GIF_PREREQ(5, 1)
DGifCloseFile(gif, &gif_error);
#else
DGifCloseFile(gif);

View File

@@ -19,22 +19,21 @@
#include <string.h>
#include "webp/encode.h"
#include "webp/types.h"
#include "webp/mux_types.h"
#include "webp/types.h"
#define GIF_TRANSPARENT_COLOR 0x00000000u
#define GIF_WHITE_COLOR 0xffffffffu
#define GIF_TRANSPARENT_MASK 0x01
#define GIF_DISPOSE_MASK 0x07
#define GIF_DISPOSE_SHIFT 2
#define GIF_WHITE_COLOR 0xffffffffu
#define GIF_TRANSPARENT_MASK 0x01
#define GIF_DISPOSE_MASK 0x07
#define GIF_DISPOSE_SHIFT 2
// from utils/utils.h
#ifdef __cplusplus
extern "C" {
#endif
extern void WebPCopyPlane(const uint8_t* src, int src_stride,
uint8_t* dst, int dst_stride,
int width, int height);
extern void WebPCopyPlane(const uint8_t* src, int src_stride, uint8_t* dst,
int dst_stride, int width, int height);
extern void WebPCopyPixels(const WebPPicture* const src,
WebPPicture* const dst);
#ifdef __cplusplus
@@ -47,18 +46,16 @@ void GIFGetBackgroundColor(const ColorMapObject* const color_map,
if (transparent_index != GIF_INDEX_INVALID &&
bgcolor_index == transparent_index) {
*bgcolor = GIF_TRANSPARENT_COLOR; // Special case.
} else if (color_map == NULL || color_map->Colors == NULL
|| bgcolor_index >= color_map->ColorCount) {
} else if (color_map == NULL || color_map->Colors == NULL ||
bgcolor_index >= color_map->ColorCount) {
*bgcolor = GIF_WHITE_COLOR;
fprintf(stderr,
"GIF decode warning: invalid background color index. Assuming "
"white background.\n");
} else {
const GifColorType color = color_map->Colors[bgcolor_index];
*bgcolor = (0xffu << 24)
| (color.Red << 16)
| (color.Green << 8)
| (color.Blue << 0);
*bgcolor = (0xffu << 24) | (color.Red << 16) | (color.Green << 8) |
(color.Blue << 0);
}
}
@@ -117,9 +114,8 @@ int GIFReadFrame(GifFileType* const gif, int transparent_index,
const GifImageDesc* const image_desc = &gif->Image;
uint32_t* dst = NULL;
uint8_t* tmp = NULL;
const GIFFrameRect rect = {
image_desc->Left, image_desc->Top, image_desc->Width, image_desc->Height
};
const GIFFrameRect rect = {image_desc->Left, image_desc->Top,
image_desc->Width, image_desc->Height};
const uint64_t memory_needed = 4 * rect.width * (uint64_t)rect.height;
int ok = 0;
*gif_rect = rect;
@@ -130,8 +126,8 @@ int GIFReadFrame(GifFileType* const gif, int transparent_index,
}
// Use a view for the sub-picture:
if (!WebPPictureView(picture, rect.x_offset, rect.y_offset,
rect.width, rect.height, &sub_image)) {
if (!WebPPictureView(picture, rect.x_offset, rect.y_offset, rect.width,
rect.height, &sub_image)) {
fprintf(stderr, "Sub-image %dx%d at position %d,%d is invalid!\n",
rect.width, rect.height, rect.x_offset, rect.y_offset);
return 0;
@@ -143,8 +139,8 @@ int GIFReadFrame(GifFileType* const gif, int transparent_index,
if (image_desc->Interlace) { // Interlaced image.
// We need 4 passes, with the following offsets and jumps.
const int interlace_offsets[] = { 0, 4, 2, 1 };
const int interlace_jumps[] = { 8, 8, 4, 2 };
const int interlace_offsets[] = {0, 4, 2, 1};
const int interlace_jumps[] = {8, 8, 4, 2};
int pass;
for (pass = 0; pass < 4; ++pass) {
const size_t stride = (size_t)sub_image.argb_stride;
@@ -166,7 +162,7 @@ int GIFReadFrame(GifFileType* const gif, int transparent_index,
}
ok = 1;
End:
End:
if (!ok) picture->error_code = sub_image.error_code;
WebPPictureFree(&sub_image);
WebPFree(tmp);
@@ -184,7 +180,7 @@ int GIFReadLoopCount(GifFileType* const gif, GifByteType** const buf,
return 0; // Loop count sub-block missing.
}
if ((*buf)[0] < 3 || (*buf)[1] != 1) {
return 0; // wrong size/marker
return 0; // wrong size/marker
}
*loop_count = (*buf)[2] | ((*buf)[3] << 8);
return 1;
@@ -220,8 +216,7 @@ int GIFReadMetadata(GifFileType* const gif, GifByteType** const buf,
if (tmp == NULL) {
return 0;
}
memcpy((void*)(tmp + metadata->size),
subblock.bytes, subblock.size);
memcpy((void*)(tmp + metadata->size), subblock.bytes, subblock.size);
metadata->bytes = tmp;
metadata->size += subblock.size;
}
@@ -235,8 +230,8 @@ int GIFReadMetadata(GifFileType* const gif, GifByteType** const buf,
return 1;
}
static void ClearRectangle(WebPPicture* const picture,
int left, int top, int width, int height) {
static void ClearRectangle(WebPPicture* const picture, int left, int top,
int width, int height) {
int i, j;
const size_t stride = picture->argb_stride;
uint32_t* dst = picture->argb + top * stride + left;
@@ -247,8 +242,8 @@ static void ClearRectangle(WebPPicture* const picture,
void GIFClearPic(WebPPicture* const pic, const GIFFrameRect* const rect) {
if (rect != NULL) {
ClearRectangle(pic, rect->x_offset, rect->y_offset,
rect->width, rect->height);
ClearRectangle(pic, rect->x_offset, rect->y_offset, rect->width,
rect->height);
} else {
ClearRectangle(pic, 0, 0, pic->width, pic->height);
}
@@ -266,15 +261,14 @@ void GIFDisposeFrame(GIFDisposeMethod dispose, const GIFFrameRect* const rect,
GIFClearPic(curr_canvas, rect);
} else if (dispose == GIF_DISPOSE_RESTORE_PREVIOUS) {
const size_t src_stride = prev_canvas->argb_stride;
const uint32_t* const src = prev_canvas->argb + rect->x_offset
+ rect->y_offset * src_stride;
const uint32_t* const src =
prev_canvas->argb + rect->x_offset + rect->y_offset * src_stride;
const size_t dst_stride = curr_canvas->argb_stride;
uint32_t* const dst = curr_canvas->argb + rect->x_offset
+ rect->y_offset * dst_stride;
uint32_t* const dst =
curr_canvas->argb + rect->x_offset + rect->y_offset * dst_stride;
assert(prev_canvas != NULL);
WebPCopyPlane((uint8_t*)src, (int)(4 * src_stride),
(uint8_t*)dst, (int)(4 * dst_stride),
4 * rect->width, rect->height);
WebPCopyPlane((uint8_t*)src, (int)(4 * src_stride), (uint8_t*)dst,
(int)(4 * dst_stride), 4 * rect->width, rect->height);
}
}
@@ -297,11 +291,11 @@ void GIFBlendFrames(const WebPPicture* const src,
void GIFDisplayError(const GifFileType* const gif, int gif_error) {
// libgif 4.2.0 has retired PrintGifError() and added GifErrorString().
#if LOCAL_GIF_PREREQ(4,2)
#if LOCAL_GIF_PREREQ(5,0)
#if LOCAL_GIF_PREREQ(4, 2)
#if LOCAL_GIF_PREREQ(5, 0)
// Static string actually, hence the const char* cast.
const char* error_str = (const char*)GifErrorString(
(gif == NULL) ? gif_error : gif->Error);
const char* error_str =
(const char*)GifErrorString((gif == NULL) ? gif_error : gif->Error);
#else
const char* error_str = (const char*)GifErrorString();
(void)gif;
@@ -319,7 +313,8 @@ void GIFDisplayError(const GifFileType* const gif, int gif_error) {
#else // !WEBP_HAVE_GIF
static void ErrorGIFNotAvailable(void) {
fprintf(stderr, "GIF support not compiled. Please install the libgif-dev "
fprintf(stderr,
"GIF support not compiled. Please install the libgif-dev "
"package before building.\n");
}

View File

@@ -30,12 +30,11 @@ extern "C" {
// GIFLIB_MAJOR is only defined in libgif >= 4.2.0.
#if defined(GIFLIB_MAJOR) && defined(GIFLIB_MINOR)
# define LOCAL_GIF_VERSION ((GIFLIB_MAJOR << 8) | GIFLIB_MINOR)
# define LOCAL_GIF_PREREQ(maj, min) \
(LOCAL_GIF_VERSION >= (((maj) << 8) | (min)))
#define LOCAL_GIF_VERSION ((GIFLIB_MAJOR << 8) | GIFLIB_MINOR)
#define LOCAL_GIF_PREREQ(maj, min) (LOCAL_GIF_VERSION >= (((maj) << 8) | (min)))
#else
# define LOCAL_GIF_VERSION 0
# define LOCAL_GIF_PREREQ(maj, min) 0
#define LOCAL_GIF_VERSION 0
#define LOCAL_GIF_PREREQ(maj, min) 0
#endif
#define GIF_INDEX_INVALID (-1)
@@ -111,7 +110,7 @@ void GIFCopyPixels(const struct WebPPicture* const src,
struct WebPPicture* const dst);
#ifdef __cplusplus
} // extern "C"
} // extern "C"
#endif
#endif // WEBP_EXAMPLES_GIFDEC_H_

View File

@@ -43,16 +43,20 @@ static void Help(void) {
printf("File-level options (only used at the start of compression):\n");
printf(" -min_size ............ minimize size\n");
printf(" -kmax <int> .......... maximum number of frame between key-frames\n"
" (0=only keyframes)\n");
printf(" -kmin <int> .......... minimum number of frame between key-frames\n"
" (0=disable key-frames altogether)\n");
printf(
" -kmax <int> .......... maximum number of frame between key-frames\n"
" (0=only keyframes)\n");
printf(
" -kmin <int> .......... minimum number of frame between key-frames\n"
" (0=disable key-frames altogether)\n");
printf(" -mixed ............... use mixed lossy/lossless automatic mode\n");
printf(" -near_lossless <int> . use near-lossless image preprocessing\n"
" (0..100=off), default=100\n");
printf(" -sharp_yuv ........... use sharper (and slower) RGB->YUV "
"conversion\n "
"(lossy only)\n");
printf(
" -near_lossless <int> . use near-lossless image preprocessing\n"
" (0..100=off), default=100\n");
printf(
" -sharp_yuv ........... use sharper (and slower) RGB->YUV "
"conversion\n "
"(lossy only)\n");
printf(" -loop <int> .......... loop count (default: 0, = infinite loop)\n");
printf(" -v ................... verbose mode\n");
printf(" -h ................... this help\n");
@@ -64,20 +68,25 @@ static void Help(void) {
printf(" -lossless ............ use lossless mode (default)\n");
printf(" -lossy ............... use lossy mode\n");
printf(" -q <float> ........... quality\n");
printf(" -m <int> ............. compression method (0=fast, 6=slowest), "
"default=4\n");
printf(" -exact, -noexact ..... preserve or alter RGB values in transparent "
"area\n"
" (default: -noexact, may cause artifacts\n"
" with lossy animations)\n");
printf(
" -m <int> ............. compression method (0=fast, 6=slowest), "
"default=4\n");
printf(
" -exact, -noexact ..... preserve or alter RGB values in transparent "
"area\n"
" (default: -noexact, may cause artifacts\n"
" with lossy animations)\n");
printf("\n");
printf("example: img2webp -loop 2 in0.png -lossy in1.jpg\n"
" -d 80 in2.tiff -o out.webp\n");
printf("\nNote: if a single file name is passed as the argument, the "
"arguments will be\n");
printf("tokenized from this file. The file name must not start with "
"the character '-'.\n");
printf(
"example: img2webp -loop 2 in0.png -lossy in1.jpg\n"
" -d 80 in2.tiff -o out.webp\n");
printf(
"\nNote: if a single file name is passed as the argument, the "
"arguments will be\n");
printf(
"tokenized from this file. The file name must not start with "
"the character '-'.\n");
printf("\nSupported input formats:\n %s\n",
WebPGetEnabledInputFileFormats());
}
@@ -127,7 +136,7 @@ static int SetLoopCount(int loop_count, WebPData* const webp_data) {
ok = (err == WEBP_MUX_OK);
}
End:
End:
WebPMuxDelete(mux);
if (!ok) {
fprintf(stderr, "Error during loop-count setting\n");
@@ -166,8 +175,7 @@ int main(int argc, const char* argv[]) {
argv = cmd_args.argv;
WebPDataInit(&webp_data);
if (!WebPAnimEncoderOptionsInit(&anim_config) ||
!WebPConfigInit(&config) ||
if (!WebPAnimEncoderOptionsInit(&anim_config) || !WebPConfigInit(&config) ||
!WebPPictureInit(&pic)) {
fprintf(stderr, "Library version mismatch!\n");
ok = 0;
@@ -225,7 +233,7 @@ int main(int argc, const char* argv[]) {
}
ok = !parse_error;
if (!ok) goto End;
argv[c] = NULL; // mark option as 'parsed' during 1st pass
argv[c] = NULL; // mark option as 'parsed' during 1st pass
} else {
have_input |= 1;
}
@@ -242,7 +250,7 @@ int main(int argc, const char* argv[]) {
config.lossless = 1;
for (c = 0; ok && c < argc; ++c) {
if (argv[c] == NULL) continue;
if (argv[c][0] == '-') { // parse local options
if (argv[c][0] == '-') { // parse local options
int parse_error = 0;
if (!strcmp(argv[c], "-lossy")) {
if (!anim_config.allow_mixed) config.lossless = 0;
@@ -263,7 +271,7 @@ int main(int argc, const char* argv[]) {
} else if (!strcmp(argv[c], "-noexact")) {
config.exact = 0;
} else {
parse_error = 1; // shouldn't be here.
parse_error = 1; // shouldn't be here.
fprintf(stderr, "Unknown option [%s]\n", argv[c]);
}
ok = !parse_error;
@@ -286,7 +294,7 @@ int main(int argc, const char* argv[]) {
if (!ok) goto End;
if (enc == NULL) {
width = pic.width;
width = pic.width;
height = pic.height;
enc = WebPAnimEncoderNew(width, height, &anim_config);
ok = (enc != NULL);
@@ -298,8 +306,9 @@ int main(int argc, const char* argv[]) {
if (ok) {
ok = (width == pic.width && height == pic.height);
if (!ok) {
fprintf(stderr, "Frame #%d dimension mismatched! "
"Got %d x %d. Was expecting %d x %d.\n",
fprintf(stderr,
"Frame #%d dimension mismatched! "
"Got %d x %d. Was expecting %d x %d.\n",
pic_num, pic.width, pic.height, width, height);
}
}
@@ -314,8 +323,8 @@ int main(int argc, const char* argv[]) {
if (!ok) goto End;
if (verbose) {
WFPRINTF(stderr, "Added frame #%3d at time %4d (file: %s)\n",
pic_num, timestamp_ms, GET_WARGV_SHIFTED(argv, c));
WFPRINTF(stderr, "Added frame #%3d at time %4d (file: %s)\n", pic_num,
timestamp_ms, GET_WARGV_SHIFTED(argv, c));
}
timestamp_ms += duration;
++pic_num;
@@ -323,8 +332,10 @@ int main(int argc, const char* argv[]) {
for (c = last_input_index + 1; c < argc; ++c) {
if (argv[c] != NULL) {
fprintf(stderr, "Warning: unused option [%s]!"
" Frame options go before the input frame.\n", argv[c]);
fprintf(stderr,
"Warning: unused option [%s]!"
" Frame options go before the input frame.\n",
argv[c]);
}
}
@@ -335,7 +346,7 @@ int main(int argc, const char* argv[]) {
fprintf(stderr, "Error during final animation assembly.\n");
}
End:
End:
// free resources
WebPAnimEncoderDelete(enc);
@@ -353,8 +364,8 @@ int main(int argc, const char* argv[]) {
}
if (ok) {
fprintf(stderr, "[%d frames, %u bytes].\n",
pic_num, (unsigned int)webp_data.size);
fprintf(stderr, "[%d frames, %u bytes].\n", pic_num,
(unsigned int)webp_data.size);
}
WebPDataClear(&webp_data);
ExUtilDeleteCommandLineArguments(&cmd_args);

View File

@@ -28,17 +28,13 @@ static WEBP_INLINE void StopwatchReset(Stopwatch* watch) {
static WEBP_INLINE double StopwatchReadAndReset(Stopwatch* watch) {
const LARGE_INTEGER old_value = *watch;
LARGE_INTEGER freq;
if (!QueryPerformanceCounter(watch))
return 0.0;
if (!QueryPerformanceFrequency(&freq))
return 0.0;
if (freq.QuadPart == 0)
return 0.0;
if (!QueryPerformanceCounter(watch)) return 0.0;
if (!QueryPerformanceFrequency(&freq)) return 0.0;
if (freq.QuadPart == 0) return 0.0;
return (watch->QuadPart - old_value.QuadPart) / (double)freq.QuadPart;
}
#else /* !_WIN32 */
#else /* !_WIN32 */
#include <string.h> // memcpy
#include <sys/time.h>
@@ -58,6 +54,6 @@ static WEBP_INLINE double StopwatchReadAndReset(Stopwatch* watch) {
return delta_sec + delta_usec / 1000000.0;
}
#endif /* _WIN32 */
#endif /* _WIN32 */
#endif // WEBP_EXAMPLES_STOPWATCH_H_

View File

@@ -25,20 +25,20 @@
#include <fcntl.h>
#include <io.h>
#include <shellapi.h>
#include <wchar.h>
#include <windows.h>
#include <shellapi.h>
// Create a wchar_t array containing Unicode parameters.
#define INIT_WARGV(ARGC, ARGV) \
int wargc; \
const W_CHAR** const wargv = \
(const W_CHAR**)CommandLineToArgvW(GetCommandLineW(), &wargc); \
do { \
if (wargv == NULL || wargc != (ARGC)) { \
fprintf(stderr, "Error: Unable to get Unicode arguments.\n"); \
FREE_WARGV_AND_RETURN(-1); \
} \
#define INIT_WARGV(ARGC, ARGV) \
int wargc; \
const W_CHAR** const wargv = \
(const W_CHAR**)CommandLineToArgvW(GetCommandLineW(), &wargc); \
do { \
if (wargv == NULL || wargc != (ARGC)) { \
fprintf(stderr, "Error: Unable to get Unicode arguments.\n"); \
FREE_WARGV_AND_RETURN(-1); \
} \
} while (0)
// Use this to get a Unicode argument (e.g. file path).

View File

@@ -28,6 +28,7 @@
#include <gif_lib.h>
#include <string.h>
#include "./gifdec.h"
#if !defined(STDIN_FILENO)

View File

@@ -38,12 +38,11 @@
#include <qcms.h>
#endif
#include "webp/decode.h"
#include "webp/demux.h"
#include "../examples/example_util.h"
#include "../imageio/imageio_util.h"
#include "./unicode.h"
#include "webp/decode.h"
#include "webp/demux.h"
#if defined(_MSC_VER) && _MSC_VER < 1900
#define snprintf _snprintf
@@ -135,9 +134,8 @@ static int ApplyColorProfile(const WebPData* const profile,
}
qcms_profile_precache_output_transform(output_profile);
transform = qcms_transform_create(input_profile, input_type,
output_profile, output_type,
intent);
transform = qcms_transform_create(input_profile, input_type, output_profile,
output_type, intent);
if (transform == NULL) {
fprintf(stderr, "Error creating color transform!\n");
goto Error;
@@ -149,7 +147,7 @@ static int ApplyColorProfile(const WebPData* const profile,
}
ok = 1;
Error:
Error:
if (input_profile != NULL) qcms_profile_release(input_profile);
if (output_profile != NULL) qcms_profile_release(output_profile);
if (transform != NULL) qcms_transform_release(transform);
@@ -164,7 +162,7 @@ static int ApplyColorProfile(const WebPData* const profile,
//------------------------------------------------------------------------------
// File decoding
static int Decode(void) { // Fills kParams.curr_frame
static int Decode(void) { // Fills kParams.curr_frame
const WebPIterator* const curr = &kParams.curr_frame;
WebPDecoderConfig* const config = &kParams.config;
WebPDecBuffer* const output_buffer = &config->output;
@@ -172,8 +170,8 @@ static int Decode(void) { // Fills kParams.curr_frame
ClearPreviousPic();
output_buffer->colorspace = MODE_RGBA;
ok = (WebPDecode(curr->fragment.bytes, curr->fragment.size,
config) == VP8_STATUS_OK);
ok = (WebPDecode(curr->fragment.bytes, curr->fragment.size, config) ==
VP8_STATUS_OK);
if (!ok) {
fprintf(stderr, "Decoding of frame #%d failed!\n", curr->frame_num);
} else {
@@ -341,8 +339,7 @@ static void DrawBackground(void) {
glPushMatrix();
glLoadIdentity();
glColor4f(GetColorf(kParams.bg_color, 16), // BGRA from spec
GetColorf(kParams.bg_color, 8),
GetColorf(kParams.bg_color, 0),
GetColorf(kParams.bg_color, 8), GetColorf(kParams.bg_color, 0),
GetColorf(kParams.bg_color, 24));
glRecti(-1, -1, +1, +1);
glPopMatrix();
@@ -402,8 +399,7 @@ static void HandleDisplay(void) {
*prev = *curr;
glDrawPixels(pic->width, pic->height,
GL_RGBA, GL_UNSIGNED_BYTE,
glDrawPixels(pic->width, pic->height, GL_RGBA, GL_UNSIGNED_BYTE,
(GLvoid*)pic->u.RGBA.rgba);
if (kParams.print_info) {
char tmp[32];
@@ -417,8 +413,8 @@ static void HandleDisplay(void) {
glRasterPos2f(-0.95f, 0.80f);
PrintString(tmp);
if (curr->x_offset != 0 || curr->y_offset != 0) {
snprintf(tmp, sizeof(tmp), " (offset:%d,%d)",
curr->x_offset, curr->y_offset);
snprintf(tmp, sizeof(tmp), " (offset:%d,%d)", curr->x_offset,
curr->y_offset);
glRasterPos2f(-0.95f, 0.70f);
PrintString(tmp);
}
@@ -571,8 +567,8 @@ int main(int argc, char* argv[]) {
FREE_WARGV_AND_RETURN(EXIT_FAILURE);
}
if (!ImgIoUtilReadFile(kParams.file_name,
&kParams.data.bytes, &kParams.data.size)) {
if (!ImgIoUtilReadFile(kParams.file_name, &kParams.data.bytes,
&kParams.data.size)) {
goto Error;
}
@@ -603,7 +599,8 @@ int main(int argc, char* argv[]) {
if (!WebPDemuxGetChunk(kParams.dmux, "ICCP", 1, &kParams.iccp)) goto Error;
printf("VP8X: Found color profile\n");
#else
fprintf(stderr, "Warning: color profile present, but qcms is unavailable!\n"
fprintf(stderr,
"Warning: color profile present, but qcms is unavailable!\n"
"Build libqcms from Mozilla or Chromium and define WEBP_HAVE_QCMS "
"before building.\n");
#endif
@@ -614,8 +611,8 @@ int main(int argc, char* argv[]) {
kParams.has_animation = (curr->num_frames > 1);
kParams.loop_count = (int)WebPDemuxGetI(kParams.dmux, WEBP_FF_LOOP_COUNT);
kParams.bg_color = WebPDemuxGetI(kParams.dmux, WEBP_FF_BACKGROUND_COLOR);
printf("VP8X: Found %d images in file (loop count = %d)\n",
curr->num_frames, kParams.loop_count);
printf("VP8X: Found %d images in file (loop count = %d)\n", curr->num_frames,
kParams.loop_count);
// Decode first frame
if (!Decode()) goto Error;
@@ -645,12 +642,12 @@ int main(int argc, char* argv[]) {
ClearParams();
FREE_WARGV_AND_RETURN(EXIT_SUCCESS);
Error:
Error:
ClearParams();
FREE_WARGV_AND_RETURN(EXIT_FAILURE);
}
#else // !WEBP_HAVE_GL
#else // !WEBP_HAVE_GL
int main(int argc, const char* argv[]) {
fprintf(stderr, "OpenGL support not enabled in %s.\n", argv[0]);

View File

@@ -47,25 +47,13 @@
++webp_info->num_warnings; \
} while (0)
static const char* const kFormats[3] = {
"Unknown",
"Lossy",
"Lossless"
};
static const char* const kFormats[3] = {"Unknown", "Lossy", "Lossless"};
static const char* const kLosslessTransforms[4] = {
"Predictor",
"Cross Color",
"Subtract Green",
"Color Indexing"
};
"Predictor", "Cross Color", "Subtract Green", "Color Indexing"};
static const char* const kAlphaFilterMethods[4] = {
"None",
"Horizontal",
"Vertical",
"Gradient"
};
static const char* const kAlphaFilterMethods[4] = {"None", "Horizontal",
"Vertical", "Gradient"};
typedef enum {
WEBP_INFO_OK = 0,
@@ -129,15 +117,15 @@ static void WebPInfoInit(WebPInfo* const webp_info) {
}
static const uint32_t kWebPChunkTags[CHUNK_TYPES] = {
MKFOURCC('V', 'P', '8', ' '),
MKFOURCC('V', 'P', '8', 'L'),
MKFOURCC('V', 'P', '8', 'X'),
MKFOURCC('A', 'L', 'P', 'H'),
MKFOURCC('A', 'N', 'I', 'M'),
MKFOURCC('A', 'N', 'M', 'F'),
MKFOURCC('I', 'C', 'C', 'P'),
MKFOURCC('E', 'X', 'I', 'F'),
MKFOURCC('X', 'M', 'P', ' '),
MKFOURCC('V', 'P', '8', ' '), //
MKFOURCC('V', 'P', '8', 'L'), //
MKFOURCC('V', 'P', '8', 'X'), //
MKFOURCC('A', 'L', 'P', 'H'), //
MKFOURCC('A', 'N', 'I', 'M'), //
MKFOURCC('A', 'N', 'M', 'F'), //
MKFOURCC('I', 'C', 'C', 'P'), //
MKFOURCC('E', 'X', 'I', 'F'), //
MKFOURCC('X', 'M', 'P', ' '), //
};
// -----------------------------------------------------------------------------
@@ -200,9 +188,7 @@ static const uint8_t* GetBuffer(MemBuffer* const mem) {
return mem->buf + mem->start;
}
static void Skip(MemBuffer* const mem, size_t size) {
mem->start += size;
}
static void Skip(MemBuffer* const mem, size_t size) { mem->start += size; }
static uint32_t ReadMemBufLE32(MemBuffer* const mem) {
const uint8_t* const data = mem->buf + mem->start;
@@ -266,9 +252,10 @@ static WebPInfoStatus ParseLossySegmentHeader(const WebPInfo* const webp_info,
int update_map, update_data;
GET_BITS(update_map, 1);
GET_BITS(update_data, 1);
printf(" Update map: %d\n"
" Update data: %d\n",
update_map, update_data);
printf(
" Update map: %d\n"
" Update data: %d\n",
update_map, update_data);
if (update_data) {
int i, a_delta;
int quantizer[4] = {0, 0, 0, 0};
@@ -298,8 +285,8 @@ static WebPInfoStatus ParseLossySegmentHeader(const WebPInfo* const webp_info,
GET_BITS(bit, 1);
if (bit) GET_BITS(prob_segment[i], 8);
}
printf(" Prob segment: %d %d %d\n",
prob_segment[0], prob_segment[1], prob_segment[2]);
printf(" Prob segment: %d %d %d\n", prob_segment[0], prob_segment[1],
prob_segment[2]);
}
}
return WEBP_INFO_OK;
@@ -371,12 +358,13 @@ static WebPInfoStatus ParseLossyHeader(const ChunkData* const chunk_data,
LOG_ERROR("Invalid lossy bitstream signature.");
return WEBP_INFO_BITSTREAM_ERROR;
}
printf(" Width: %d\n"
" X scale: %d\n"
" Height: %d\n"
" Y scale: %d\n",
((data[4] << 8) | data[3]) & 0x3fff, data[4] >> 6,
((data[6] << 8) | data[5]) & 0x3fff, data[6] >> 6);
printf(
" Width: %d\n"
" X scale: %d\n"
" Height: %d\n"
" Y scale: %d\n",
((data[4] << 8) | data[3]) & 0x3fff, data[4] >> 6,
((data[6] << 8) | data[5]) & 0x3fff, data[6] >> 6);
data += 7;
data_size -= 7;
} else {
@@ -479,7 +467,7 @@ static int LLGetBits(const uint8_t* const data, size_t data_size, size_t nb,
static WebPInfoStatus ParseLosslessTransform(WebPInfo* const webp_info,
const uint8_t* const data,
size_t data_size,
uint64_t* const bit_pos) {
uint64_t* const bit_pos) {
int use_transform, block_size, n_colors;
LL_GET_BITS(use_transform, 1);
printf(" Use transform: %s\n", use_transform ? "Yes" : "No");
@@ -499,7 +487,8 @@ static WebPInfoStatus ParseLosslessTransform(WebPInfo* const webp_info,
n_colors += 1;
printf(" No. of colors: %d\n", n_colors);
break;
default: break;
default:
break;
}
}
return WEBP_INFO_OK;
@@ -556,8 +545,8 @@ static WebPInfoStatus ParseAlphaHeader(const ChunkData* const chunk_data,
const int pre_processing = (data[0] >> 4) & 0x03;
const int reserved_bits = (data[0] >> 6) & 0x03;
printf(" Compression: %d\n", compression_method);
printf(" Filter: %s (%d)\n",
kAlphaFilterMethods[filter], filter);
printf(" Filter: %s (%d)\n", kAlphaFilterMethods[filter],
filter);
printf(" Pre-processing: %d\n", pre_processing);
if (compression_method > ALPHA_LOSSLESS_COMPRESSION) {
LOG_ERROR("Invalid Alpha compression method.");
@@ -642,7 +631,7 @@ static WebPInfoStatus ParseChunk(const WebPInfo* const webp_info,
LOG_ERROR("Size of chunk payload is over limit.");
return WEBP_INFO_INVALID_PARAM;
}
if (payload_size_padded > MemDataSize(mem)){
if (payload_size_padded > MemDataSize(mem)) {
LOG_ERROR("Truncated data detected when parsing chunk payload.");
return WEBP_INFO_TRUNCATED_DATA;
}
@@ -695,8 +684,8 @@ static WebPInfoStatus ProcessVP8XChunk(const ChunkData* const chunk_data,
(webp_info->feature_flags & EXIF_FLAG) != 0,
(webp_info->feature_flags & XMP_FLAG) != 0,
(webp_info->feature_flags & ANIMATION_FLAG) != 0);
printf(" Canvas size %d x %d\n",
webp_info->canvas_width, webp_info->canvas_height);
printf(" Canvas size %d x %d\n", webp_info->canvas_width,
webp_info->canvas_height);
}
if (webp_info->canvas_width > MAX_CANVAS_SIZE) {
LOG_WARN("Canvas width is out of range in VP8X chunk.");
@@ -727,10 +716,8 @@ static WebPInfoStatus ProcessANIMChunk(const ChunkData* const chunk_data,
++webp_info->chunk_counts[CHUNK_ANIM];
if (!webp_info->quiet) {
printf(" Background color:(ARGB) %02x %02x %02x %02x\n",
(webp_info->bgcolor >> 24) & 0xff,
(webp_info->bgcolor >> 16) & 0xff,
(webp_info->bgcolor >> 8) & 0xff,
webp_info->bgcolor & 0xff);
(webp_info->bgcolor >> 24) & 0xff, (webp_info->bgcolor >> 16) & 0xff,
(webp_info->bgcolor >> 8) & 0xff, webp_info->bgcolor & 0xff);
printf(" Loop count : %d\n", webp_info->loop_count);
}
if (webp_info->loop_count > MAX_LOOP_COUNT) {
@@ -765,9 +752,10 @@ static WebPInfoStatus ProcessANMFChunk(const ChunkData* const chunk_data,
blend = (temp >> 1) & 1;
++webp_info->chunk_counts[CHUNK_ANMF];
if (!webp_info->quiet) {
printf(" Offset_X: %d\n Offset_Y: %d\n Width: %d\n Height: %d\n"
" Duration: %d\n Dispose: %d\n Blend: %d\n",
offset_x, offset_y, width, height, duration, dispose, blend);
printf(
" Offset_X: %d\n Offset_Y: %d\n Width: %d\n Height: %d\n"
" Duration: %d\n Dispose: %d\n Blend: %d\n",
offset_x, offset_y, width, height, duration, dispose, blend);
}
if (duration > MAX_DURATION) {
LOG_ERROR("Invalid duration parameter in ANMF chunk.");
@@ -804,10 +792,11 @@ static WebPInfoStatus ProcessImageChunk(const ChunkData* const chunk_data,
}
if (!webp_info->quiet) {
assert(features.format >= 0 && features.format <= 2);
printf(" Width: %d\n Height: %d\n Alpha: %d\n Animation: %d\n"
" Format: %s (%d)\n",
features.width, features.height, features.has_alpha,
features.has_animation, kFormats[features.format], features.format);
printf(
" Width: %d\n Height: %d\n Alpha: %d\n Animation: %d\n"
" Format: %s (%d)\n",
features.width, features.height, features.has_alpha,
features.has_animation, kFormats[features.format], features.format);
}
if (webp_info->is_processing_anim_frame) {
++webp_info->anmf_subchunk_counts[chunk_data->id == CHUNK_VP8 ? 0 : 1];
@@ -831,8 +820,7 @@ static WebPInfoStatus ProcessImageChunk(const ChunkData* const chunk_data,
LOG_ERROR("Multiple VP8/VP8L chunks detected.");
return WEBP_INFO_PARSE_ERROR;
}
if (chunk_data->id == CHUNK_VP8L &&
webp_info->chunk_counts[CHUNK_ALPHA]) {
if (chunk_data->id == CHUNK_VP8L && webp_info->chunk_counts[CHUNK_ALPHA]) {
LOG_WARN("Both VP8L and ALPH chunks are detected.");
}
if (webp_info->chunk_counts[CHUNK_ANIM] ||
@@ -882,8 +870,9 @@ static WebPInfoStatus ProcessALPHChunk(const ChunkData* const chunk_data,
webp_info->seen_alpha_subchunk = 1;
if (webp_info->seen_image_subchunk) {
LOG_ERROR("ALPHA sub-chunk detected after VP8 sub-chunk "
"in an ANMF chunk.");
LOG_ERROR(
"ALPHA sub-chunk detected after VP8 sub-chunk "
"in an ANMF chunk.");
return WEBP_INFO_PARSE_ERROR;
}
} else {
@@ -938,7 +927,7 @@ static WebPInfoStatus ProcessChunk(const ChunkData* const chunk_data,
if (chunk_data->id == CHUNK_UNKNOWN) {
char error_message[50];
snprintf(error_message, 50, "Unknown chunk at offset %6d, length %6d",
(int)chunk_data->offset, (int)chunk_data->size);
(int)chunk_data->offset, (int)chunk_data->size);
LOG_WARN(error_message);
} else {
if (!webp_info->quiet) {
@@ -949,9 +938,8 @@ static WebPInfoStatus ProcessChunk(const ChunkData* const chunk_data,
((fourcc << 8) & 0xff0000) | (fourcc << 24);
#endif
memcpy(tag, &fourcc, sizeof(tag));
printf("Chunk %c%c%c%c at offset %6d, length %6d\n",
tag[0], tag[1], tag[2], tag[3], (int)chunk_data->offset,
(int)chunk_data->size);
printf("Chunk %c%c%c%c at offset %6d, length %6d\n", tag[0], tag[1],
tag[2], tag[3], (int)chunk_data->offset, (int)chunk_data->size);
}
}
switch (id) {
@@ -1048,7 +1036,7 @@ static WebPInfoStatus Validate(WebPInfo* const webp_info) {
return WEBP_INFO_PARSE_ERROR;
}
if (animation && (!webp_info->chunk_counts[CHUNK_ANIM] ||
!webp_info->chunk_counts[CHUNK_ANMF])) {
!webp_info->chunk_counts[CHUNK_ANMF])) {
LOG_ERROR("No ANIM/ANMF chunk detected in animation file.");
return WEBP_INFO_PARSE_ERROR;
}
@@ -1060,14 +1048,14 @@ static void ShowSummary(const WebPInfo* const webp_info) {
int i;
printf("Summary:\n");
printf("Number of frames: %d\n", webp_info->num_frames);
printf("Chunk type : VP8 VP8L VP8X ALPH ANIM ANMF(VP8 /VP8L/ALPH) ICCP "
printf(
"Chunk type : VP8 VP8L VP8X ALPH ANIM ANMF(VP8 /VP8L/ALPH) ICCP "
"EXIF XMP\n");
printf("Chunk counts: ");
for (i = 0; i < CHUNK_TYPES; ++i) {
printf("%4d ", webp_info->chunk_counts[i]);
if (i == CHUNK_ANMF) {
printf("%4d %4d %4d ",
webp_info->anmf_subchunk_counts[0],
printf("%4d %4d %4d ", webp_info->anmf_subchunk_counts[0],
webp_info->anmf_subchunk_counts[1],
webp_info->anmf_subchunk_counts[2]);
}
@@ -1097,7 +1085,7 @@ static WebPInfoStatus AnalyzeWebP(WebPInfo* const webp_info,
// Final check.
webp_info_status = Validate(webp_info);
Error:
Error:
if (!webp_info->quiet) {
if (webp_info_status == WEBP_INFO_OK) {
printf("No error detected.\n");
@@ -1112,15 +1100,16 @@ static WebPInfoStatus AnalyzeWebP(WebPInfo* const webp_info,
}
static void Help(void) {
printf("Usage: webpinfo [options] in_files\n"
"Note: there could be multiple input files;\n"
" options must come before input files.\n"
"Options:\n"
" -version ........... Print version number and exit.\n"
" -quiet ............. Do not show chunk parsing information.\n"
" -diag .............. Show parsing error diagnosis.\n"
" -summary ........... Show chunk stats summary.\n"
" -bitstream_info .... Parse bitstream header.\n");
printf(
"Usage: webpinfo [options] in_files\n"
"Note: there could be multiple input files;\n"
" options must come before input files.\n"
"Options:\n"
" -version ........... Print version number and exit.\n"
" -quiet ............. Do not show chunk parsing information.\n"
" -diag .............. Show parsing error diagnosis.\n"
" -summary ........... Show chunk stats summary.\n"
" -bitstream_info .... Parse bitstream header.\n");
}
// Returns EXIT_SUCCESS on success, EXIT_FAILURE on failure.
@@ -1153,8 +1142,8 @@ int main(int argc, const char* argv[]) {
parse_bitstream = 1;
} else if (!strcmp(argv[c], "-version")) {
const int version = WebPGetDecoderVersion();
printf("WebP Decoder version: %d.%d.%d\n",
(version >> 16) & 0xff, (version >> 8) & 0xff, version & 0xff);
printf("WebP Decoder version: %d.%d.%d\n", (version >> 16) & 0xff,
(version >> 8) & 0xff, version & 0xff);
FREE_WARGV_AND_RETURN(EXIT_SUCCESS);
} else { // Assume the remaining are all input files.
break;

View File

@@ -106,14 +106,11 @@ typedef enum {
LAST_FEATURE
} FeatureType;
static const char* const kFourccList[LAST_FEATURE] = {
NULL, "EXIF", "XMP ", "ICCP", "ANMF"
};
static const char* const kFourccList[LAST_FEATURE] = {NULL, "EXIF", "XMP ",
"ICCP", "ANMF"};
static const char* const kDescriptions[LAST_FEATURE] = {
NULL, "EXIF metadata", "XMP metadata", "ICC profile",
"Animation frame"
};
NULL, "EXIF metadata", "XMP metadata", "ICC profile", "Animation frame"};
typedef struct {
CommandLineArguments cmd_args;
@@ -143,50 +140,49 @@ static int CountOccurrences(const CommandLineArguments* const args,
}
static const char* const kErrorMessages[-WEBP_MUX_NOT_ENOUGH_DATA + 1] = {
"WEBP_MUX_NOT_FOUND", "WEBP_MUX_INVALID_ARGUMENT", "WEBP_MUX_BAD_DATA",
"WEBP_MUX_MEMORY_ERROR", "WEBP_MUX_NOT_ENOUGH_DATA"
};
"WEBP_MUX_NOT_FOUND", "WEBP_MUX_INVALID_ARGUMENT", "WEBP_MUX_BAD_DATA",
"WEBP_MUX_MEMORY_ERROR", "WEBP_MUX_NOT_ENOUGH_DATA"};
static const char* ErrorString(WebPMuxError err) {
assert(err <= WEBP_MUX_NOT_FOUND && err >= WEBP_MUX_NOT_ENOUGH_DATA);
return kErrorMessages[-err];
}
#define RETURN_IF_ERROR(ERR_MSG) \
do { \
if (err != WEBP_MUX_OK) { \
fprintf(stderr, ERR_MSG); \
return err; \
} \
#define RETURN_IF_ERROR(ERR_MSG) \
do { \
if (err != WEBP_MUX_OK) { \
fprintf(stderr, ERR_MSG); \
return err; \
} \
} while (0)
#define RETURN_IF_ERROR3(ERR_MSG, FORMAT_STR1, FORMAT_STR2) \
do { \
if (err != WEBP_MUX_OK) { \
fprintf(stderr, ERR_MSG, FORMAT_STR1, FORMAT_STR2); \
return err; \
} \
#define RETURN_IF_ERROR3(ERR_MSG, FORMAT_STR1, FORMAT_STR2) \
do { \
if (err != WEBP_MUX_OK) { \
fprintf(stderr, ERR_MSG, FORMAT_STR1, FORMAT_STR2); \
return err; \
} \
} while (0)
#define ERROR_GOTO1(ERR_MSG, LABEL) \
do { \
fprintf(stderr, ERR_MSG); \
ok = 0; \
goto LABEL; \
#define ERROR_GOTO1(ERR_MSG, LABEL) \
do { \
fprintf(stderr, ERR_MSG); \
ok = 0; \
goto LABEL; \
} while (0)
#define ERROR_GOTO2(ERR_MSG, FORMAT_STR, LABEL) \
do { \
fprintf(stderr, ERR_MSG, FORMAT_STR); \
ok = 0; \
goto LABEL; \
#define ERROR_GOTO2(ERR_MSG, FORMAT_STR, LABEL) \
do { \
fprintf(stderr, ERR_MSG, FORMAT_STR); \
ok = 0; \
goto LABEL; \
} while (0)
#define ERROR_GOTO3(ERR_MSG, FORMAT_STR1, FORMAT_STR2, LABEL) \
do { \
fprintf(stderr, ERR_MSG, FORMAT_STR1, FORMAT_STR2); \
ok = 0; \
goto LABEL; \
#define ERROR_GOTO3(ERR_MSG, FORMAT_STR1, FORMAT_STR2, LABEL) \
do { \
fprintf(stderr, ERR_MSG, FORMAT_STR1, FORMAT_STR2); \
ok = 0; \
goto LABEL; \
} while (0)
static WebPMuxError DisplayInfo(const WebPMux* mux) {
@@ -208,10 +204,10 @@ static WebPMuxError DisplayInfo(const WebPMux* mux) {
// Print the features present.
printf("Features present:");
if (flag & ANIMATION_FLAG) printf(" animation");
if (flag & ICCP_FLAG) printf(" ICC profile");
if (flag & EXIF_FLAG) printf(" EXIF metadata");
if (flag & XMP_FLAG) printf(" XMP metadata");
if (flag & ALPHA_FLAG) printf(" transparency");
if (flag & ICCP_FLAG) printf(" ICC profile");
if (flag & EXIF_FLAG) printf(" EXIF metadata");
if (flag & XMP_FLAG) printf(" XMP metadata");
if (flag & ALPHA_FLAG) printf(" transparency");
printf("\n");
if (flag & ANIMATION_FLAG) {
@@ -222,8 +218,8 @@ static WebPMuxError DisplayInfo(const WebPMux* mux) {
WebPMuxAnimParams params;
err = WebPMuxGetAnimationParams(mux, &params);
assert(err == WEBP_MUX_OK);
printf("Background color : 0x%.8X Loop Count : %d\n",
params.bgcolor, params.loop_count);
printf("Background color : 0x%.8X Loop Count : %d\n", params.bgcolor,
params.loop_count);
err = WebPMuxNumChunks(mux, id, &nFrames);
assert(err == WEBP_MUX_OK);
@@ -255,9 +251,9 @@ static WebPMuxError DisplayInfo(const WebPMux* mux) {
printf("%8d %10s %5s ", frame.duration, dispose, blend);
}
printf("%10d %11s\n", (int)frame.bitstream.size,
(features.format == 1) ? "lossy" :
(features.format == 2) ? "lossless" :
"undefined");
(features.format == 1) ? "lossy"
: (features.format == 2) ? "lossless"
: "undefined");
}
WebPDataClear(&frame.bitstream);
RETURN_IF_ERROR3("Failed to retrieve %s#%d\n", type_str, i);
@@ -305,8 +301,9 @@ static void PrintHelp(void) {
printf(" webpmux -duration DURATION_OPTIONS [-duration ...]\n");
printf(" INPUT -o OUTPUT\n");
printf(" webpmux -strip STRIP_OPTIONS INPUT -o OUTPUT\n");
printf(" webpmux -frame FRAME_OPTIONS [-frame...] [-loop LOOP_COUNT]"
"\n");
printf(
" webpmux -frame FRAME_OPTIONS [-frame...] [-loop LOOP_COUNT]"
"\n");
printf(" [-bgcolor BACKGROUND_COLOR] -o OUTPUT\n");
printf(" webpmux -info INPUT\n");
printf(" webpmux [-h|-help]\n");
@@ -360,8 +357,9 @@ static void PrintHelp(void) {
printf(" 'di' is the pause duration before next frame,\n");
printf(" 'xi','yi' specify the image offset for this frame,\n");
printf(" 'mi' is the dispose method for this frame (0 or 1),\n");
printf(" 'bi' is the blending method for this frame (+b or -b)"
"\n");
printf(
" 'bi' is the blending method for this frame (+b or -b)"
"\n");
printf("\n");
printf("LOOP_COUNT:\n");
@@ -372,27 +370,33 @@ static void PrintHelp(void) {
printf("BACKGROUND_COLOR:\n");
printf(" Background color of the canvas.\n");
printf(" A,R,G,B\n");
printf(" where: 'A', 'R', 'G' and 'B' are integers in the range 0 to 255 "
"specifying\n");
printf(" the Alpha, Red, Green and Blue component values "
"respectively\n");
printf(
" where: 'A', 'R', 'G' and 'B' are integers in the range 0 to 255 "
"specifying\n");
printf(
" the Alpha, Red, Green and Blue component values "
"respectively\n");
printf(" [Default: 255,255,255,255]\n");
printf("\nINPUT & OUTPUT are in WebP format.\n");
printf("\nNote: The nature of EXIF, XMP and ICC data is not checked");
printf(" and is assumed to be\nvalid.\n");
printf("\nNote: if a single file name is passed as the argument, the "
"arguments will be\n");
printf("tokenized from this file. The file name must not start with "
"the character '-'.\n");
printf(
"\nNote: if a single file name is passed as the argument, the "
"arguments will be\n");
printf(
"tokenized from this file. The file name must not start with "
"the character '-'.\n");
}
static void WarnAboutOddOffset(const WebPMuxFrameInfo* const info) {
if ((info->x_offset | info->y_offset) & 1) {
fprintf(stderr, "Warning: odd offsets will be snapped to even values"
" (%d, %d) -> (%d, %d)\n", info->x_offset, info->y_offset,
info->x_offset & ~1, info->y_offset & ~1);
fprintf(stderr,
"Warning: odd offsets will be snapped to even values"
" (%d, %d) -> (%d, %d)\n",
info->x_offset, info->y_offset, info->x_offset & ~1,
info->y_offset & ~1);
}
}
@@ -420,8 +424,8 @@ static int WriteData(const char* filename, const WebPData* const webpdata) {
if (fwrite(webpdata->bytes, webpdata->size, 1, fout) != 1) {
WFPRINTF(stderr, "Error writing file %s!\n", (const W_CHAR*)filename);
} else {
WFPRINTF(stderr, "Saved file %s (%d bytes)\n",
(const W_CHAR*)filename, (int)webpdata->size);
WFPRINTF(stderr, "Saved file %s (%d bytes)\n", (const W_CHAR*)filename,
(int)webpdata->size);
ok = 1;
}
if (fout != stdout) fclose(fout);
@@ -454,8 +458,8 @@ static WebPMux* DuplicateMuxHeader(const WebPMux* const mux) {
if (err == WEBP_MUX_OK) {
err = WebPMuxSetAnimationParams(new_mux, &p);
if (err != WEBP_MUX_OK) {
ERROR_GOTO2("Error (%s) handling animation params.\n",
ErrorString(err), End);
ERROR_GOTO2("Error (%s) handling animation params.\n", ErrorString(err),
End);
}
} else {
/* it might not be an animation. Just keep moving. */
@@ -473,7 +477,7 @@ static WebPMux* DuplicateMuxHeader(const WebPMux* const mux) {
}
}
End:
End:
if (!ok) {
WebPMuxDelete(new_mux);
new_mux = NULL;
@@ -511,8 +515,7 @@ static int ParseFrameArgs(const char* args, WebPMuxFrameInfo* const info) {
if (blend_method != 'b') return 0;
if (plus_minus != '-' && plus_minus != '+') return 0;
info->blend_method =
(plus_minus == '+') ? WEBP_MUX_BLEND : WEBP_MUX_NO_BLEND;
info->blend_method = (plus_minus == '+') ? WEBP_MUX_BLEND : WEBP_MUX_NO_BLEND;
return 1;
}
@@ -584,8 +587,10 @@ static int ValidateCommandLine(const CommandLineArguments* const cmd_args,
}
if ((num_frame_args == 0) && (num_loop_args + num_bgcolor_args > 0)) {
ERROR_GOTO1("ERROR: Loop count and background color are relevant only in "
"case of animation.\n", ErrValidate);
ERROR_GOTO1(
"ERROR: Loop count and background color are relevant only in "
"case of animation.\n",
ErrValidate);
}
if (num_durations_args > 0 && num_frame_args != 0) {
ERROR_GOTO1("ERROR: Can not combine -duration and -frame commands.\n",
@@ -603,7 +608,7 @@ static int ValidateCommandLine(const CommandLineArguments* const cmd_args,
*num_feature_args = num_frame_args + num_loop_args + num_bgcolor_args;
}
ErrValidate:
ErrValidate:
return ok;
}
@@ -611,12 +616,12 @@ static int ValidateCommandLine(const CommandLineArguments* const cmd_args,
#define FEATURETYPE_IS_NIL (config->type == NIL_FEATURE)
#define CHECK_NUM_ARGS_AT_LEAST(NUM, LABEL) \
do { \
if (argc < i + (NUM)) { \
fprintf(stderr, "ERROR: Too few arguments for '%s'.\n", argv[i]); \
goto LABEL; \
} \
#define CHECK_NUM_ARGS_AT_LEAST(NUM, LABEL) \
do { \
if (argc < i + (NUM)) { \
fprintf(stderr, "ERROR: Too few arguments for '%s'.\n", argv[i]); \
goto LABEL; \
} \
} while (0)
#define CHECK_NUM_ARGS_AT_MOST(NUM, LABEL) \
@@ -627,10 +632,10 @@ static int ValidateCommandLine(const CommandLineArguments* const cmd_args,
} \
} while (0)
#define CHECK_NUM_ARGS_EXACTLY(NUM, LABEL) \
do { \
CHECK_NUM_ARGS_AT_LEAST(NUM, LABEL); \
CHECK_NUM_ARGS_AT_MOST(NUM, LABEL); \
#define CHECK_NUM_ARGS_EXACTLY(NUM, LABEL) \
do { \
CHECK_NUM_ARGS_AT_LEAST(NUM, LABEL); \
CHECK_NUM_ARGS_AT_MOST(NUM, LABEL); \
} while (0)
// Parses command-line arguments to fill up config object. Also performs some
@@ -739,8 +744,8 @@ static int ParseCommandLine(Config* config, const W_CHAR** const unicode_argv) {
exit(0);
} else if (!strcmp(argv[i], "-version")) {
const int version = WebPGetMuxVersion();
printf("%d.%d.%d\n",
(version >> 16) & 0xff, (version >> 8) & 0xff, version & 0xff);
printf("%d.%d.%d\n", (version >> 16) & 0xff, (version >> 8) & 0xff,
version & 0xff);
DeleteConfig(config);
LOCAL_FREE((W_CHAR**)unicode_argv);
exit(0);
@@ -771,8 +776,9 @@ static int ParseCommandLine(Config* config, const W_CHAR** const unicode_argv) {
if (!strcmp(argv[i], "icc") || !strcmp(argv[i], "exif") ||
!strcmp(argv[i], "xmp")) {
if (FEATURETYPE_IS_NIL) {
config->type = (!strcmp(argv[i], "icc")) ? FEATURE_ICCP :
(!strcmp(argv[i], "exif")) ? FEATURE_EXIF : FEATURE_XMP;
config->type = (!strcmp(argv[i], "icc")) ? FEATURE_ICCP
: (!strcmp(argv[i], "exif")) ? FEATURE_EXIF
: FEATURE_XMP;
} else {
ERROR_GOTO1("ERROR: Multiple features specified.\n", ErrParse);
}
@@ -826,7 +832,7 @@ static int ParseCommandLine(Config* config, const W_CHAR** const unicode_argv) {
}
}
}
ErrParse:
ErrParse:
return ok;
}
@@ -858,7 +864,7 @@ static int ValidateConfig(Config* const config) {
ERROR_GOTO1("ERROR: No output file specified.\n", ErrValidate2);
}
ErrValidate2:
ErrValidate2:
return ok;
}
@@ -889,7 +895,7 @@ static int InitializeConfig(int argc, const char* argv[], Config* const config,
ERROR_GOTO1("Exiting due to command-line parsing error.\n", Err1);
}
Err1:
Err1:
return ok;
}
@@ -921,8 +927,8 @@ static int GetFrame(const WebPMux* mux, const Config* config) {
err = WebPMuxGetFrame(mux, num, &info);
if (err == WEBP_MUX_OK && info.id != id) err = WEBP_MUX_NOT_FOUND;
if (err != WEBP_MUX_OK) {
ERROR_GOTO3("ERROR (%s): Could not get frame %d.\n",
ErrorString(err), num, ErrGet);
ERROR_GOTO3("ERROR (%s): Could not get frame %d.\n", ErrorString(err), num,
ErrGet);
}
mux_single = WebPMuxNew();
@@ -939,7 +945,7 @@ static int GetFrame(const WebPMux* mux, const Config* config) {
ok = WriteWebP(mux_single, config->output);
ErrGet:
ErrGet:
WebPDataClear(&info.bitstream);
WebPMuxDelete(mux_single);
return ok && !parse_error;
@@ -966,8 +972,8 @@ static int Process(const Config* config) {
case FEATURE_XMP:
err = WebPMuxGetChunk(mux, kFourccList[config->type], &chunk);
if (err != WEBP_MUX_OK) {
ERROR_GOTO3("ERROR (%s): Could not get the %s.\n",
ErrorString(err), kDescriptions[config->type], Err2);
ERROR_GOTO3("ERROR (%s): Could not get the %s.\n", ErrorString(err),
kDescriptions[config->type], Err2);
}
ok = WriteData(config->output, &chunk);
break;
@@ -982,7 +988,7 @@ static int Process(const Config* config) {
switch (config->type) {
case FEATURE_ANMF: {
int i;
WebPMuxAnimParams params = { 0xFFFFFFFF, 0 };
WebPMuxAnimParams params = {0xFFFFFFFF, 0};
mux = WebPMuxNew();
if (mux == NULL) {
ERROR_GOTO2("ERROR (%s): Could not allocate a mux object.\n",
@@ -1008,8 +1014,10 @@ static int Process(const Config* config) {
// Note: This is only a 'necessary' condition for loop_count
// to be valid. The 'sufficient' conditioned in checked in
// WebPMuxSetAnimationParams() method called later.
ERROR_GOTO1("ERROR: Loop count must be in the range 0 to "
"65535.\n", Err2);
ERROR_GOTO1(
"ERROR: Loop count must be in the range 0 to "
"65535.\n",
Err2);
}
ok = !parse_error;
if (!ok) goto Err2;
@@ -1031,8 +1039,10 @@ static int Process(const Config* config) {
err = WebPMuxPushFrame(mux, &frame, 1);
WebPDataClear(&frame.bitstream);
if (err != WEBP_MUX_OK) {
ERROR_GOTO3("ERROR (%s): Could not add a frame at index %d."
"\n", ErrorString(err), i, Err2);
ERROR_GOTO3(
"ERROR (%s): Could not add a frame at index %d."
"\n",
ErrorString(err), i, Err2);
}
break;
}
@@ -1060,13 +1070,13 @@ static int Process(const Config* config) {
err = WebPMuxSetChunk(mux, kFourccList[config->type], &chunk, 1);
WebPDataClear(&chunk);
if (err != WEBP_MUX_OK) {
ERROR_GOTO3("ERROR (%s): Could not set the %s.\n",
ErrorString(err), kDescriptions[config->type], Err2);
ERROR_GOTO3("ERROR (%s): Could not set the %s.\n", ErrorString(err),
kDescriptions[config->type], Err2);
}
break;
}
case FEATURE_LOOP: {
WebPMuxAnimParams params = { 0xFFFFFFFF, 0 };
WebPMuxAnimParams params = {0xFFFFFFFF, 0};
int parse_error = 0;
const int loop_count =
ExUtilGetInt(config->args[0].params, 10, &parse_error);
@@ -1091,12 +1101,11 @@ static int Process(const Config* config) {
break;
}
case FEATURE_BGCOLOR: {
WebPMuxAnimParams params = { 0xFFFFFFFF, 0 };
WebPMuxAnimParams params = {0xFFFFFFFF, 0};
uint32_t bgcolor;
ok = ParseBgcolorArgs(config->args[0].params, &bgcolor);
if (!ok) {
ERROR_GOTO1("ERROR: Could not parse the background color.\n",
Err2);
ERROR_GOTO1("ERROR: Could not parse the background color.\n", Err2);
}
ok = CreateMux(config->input, &mux);
if (!ok) goto Err2;
@@ -1132,8 +1141,9 @@ static int Process(const Config* config) {
ERROR_GOTO1("ERROR: can not parse the number of frames.\n", Err2);
}
if (num_frames == 0) {
fprintf(stderr, "Doesn't look like the source is animated. "
"Skipping duration setting.\n");
fprintf(stderr,
"Doesn't look like the source is animated. "
"Skipping duration setting.\n");
ok = WriteWebP(mux, config->output);
if (!ok) goto Err2;
} else {
@@ -1150,8 +1160,8 @@ static int Process(const Config* config) {
int k;
int args[3];
int duration, start, end;
const int nb_args = ExUtilGetInts(config->args[i].params,
10, 3, args);
const int nb_args =
ExUtilGetInts(config->args[i].params, 10, 3, args);
ok = (nb_args >= 1);
if (!ok) goto Err3;
duration = args[0];
@@ -1159,7 +1169,7 @@ static int Process(const Config* config) {
ERROR_GOTO1("ERROR: duration must be strictly positive.\n", Err3);
}
if (nb_args == 1) { // only duration is present -> use full interval
if (nb_args == 1) { // only duration is present -> use full interval
start = 1;
end = num_frames;
} else {
@@ -1198,7 +1208,7 @@ static int Process(const Config* config) {
mux = new_mux; // transfer for the WebPMuxDelete() call
new_mux = NULL;
Err3:
Err3:
WebPFree(durations);
WebPMuxDelete(new_mux);
if (!ok) goto Err2;
@@ -1212,8 +1222,8 @@ static int Process(const Config* config) {
config->type == FEATURE_XMP) {
err = WebPMuxDeleteChunk(mux, kFourccList[config->type]);
if (err != WEBP_MUX_OK) {
ERROR_GOTO3("ERROR (%s): Could not strip the %s.\n",
ErrorString(err), kDescriptions[config->type], Err2);
ERROR_GOTO3("ERROR (%s): Could not strip the %s.\n", ErrorString(err),
kDescriptions[config->type], Err2);
}
} else {
ERROR_GOTO1("ERROR: Invalid feature for action 'strip'.\n", Err2);
@@ -1234,7 +1244,7 @@ static int Process(const Config* config) {
}
}
Err2:
Err2:
WebPMuxDelete(mux);
return ok;
}

View File

@@ -45,7 +45,7 @@ int WebPImportGray(const uint8_t* gray_data, WebPPicture* pic) {
uv_width = (width + 1) >> 1;
for (y = 0; y < pic->height; ++y) {
memcpy(pic->y + y * pic->y_stride, gray_data, width);
gray_data += width; // <- we could use some 'data_stride' here if needed
gray_data += width; // <- we could use some 'data_stride' here if needed
if ((y & 1) == 0) {
memset(pic->u + (y >> 1) * pic->uv_stride, 128, uv_width);
memset(pic->v + (y >> 1) * pic->uv_stride, 128, uv_width);

View File

@@ -22,7 +22,7 @@ extern "C" {
#include "sharpyuv/sharpyuv.h"
#include "webp/encode.h"
#define WEBP_EXTRAS_ABI_VERSION 0x0003 // MAJOR(8b) + MINOR(8b)
#define WEBP_EXTRAS_ABI_VERSION 0x0003 // MAJOR(8b) + MINOR(8b)
//------------------------------------------------------------------------------
@@ -49,10 +49,10 @@ WEBP_EXTERN int WebPImportRGB4444(const uint8_t* rgb4444, WebPPicture* pic);
// MAX_PALETTE_SIZE. 'pic' must have been initialized. Its content, if any,
// will be discarded. Returns 'false' in case of error, or if indexed[] contains
// invalid indices.
WEBP_EXTERN int
WebPImportColorMappedARGB(const uint8_t* indexed, int indexed_stride,
const uint32_t palette[], int palette_size,
WebPPicture* pic);
WEBP_EXTERN int WebPImportColorMappedARGB(const uint8_t* indexed,
int indexed_stride,
const uint32_t palette[],
int palette_size, WebPPicture* pic);
// Convert the ARGB content of 'pic' from associated to unassociated.
// 'pic' can be for instance the result of calling of some WebPPictureImportXXX
@@ -98,15 +98,17 @@ WEBP_EXTERN int VP8EstimateQuality(const uint8_t* const data, size_t size);
// currently supported.
// width, height: width and height of the image in pixels
// Returns 0 on failure.
WEBP_EXTERN int SharpYuvEstimate420Risk(
const void* r_ptr, const void* g_ptr, const void* b_ptr, int rgb_step,
int rgb_stride, int rgb_bit_depth, int width, int height,
const SharpYuvOptions* options, float* score);
WEBP_EXTERN int SharpYuvEstimate420Risk(const void* r_ptr, const void* g_ptr,
const void* b_ptr, int rgb_step,
int rgb_stride, int rgb_bit_depth,
int width, int height,
const SharpYuvOptions* options,
float* score);
//------------------------------------------------------------------------------
#ifdef __cplusplus
} // extern "C"
} // extern "C"
#endif
#endif // WEBP_EXTRAS_EXTRAS_H_

View File

@@ -48,7 +48,7 @@ static size_t ReadPicture(const char* const filename, WebPPicture* const pic,
reader = WebPGuessImageReader(data, data_size);
ok = reader(data, data_size, pic, keep_alpha, NULL);
End:
End:
if (!ok) {
WFPRINTF(stderr, "Error! Could not process file %s\n",
(const W_CHAR*)filename);
@@ -57,8 +57,8 @@ static size_t ReadPicture(const char* const filename, WebPPicture* const pic,
return ok ? data_size : 0;
}
static void RescalePlane(uint8_t* plane, int width, int height,
int x_stride, int y_stride, int max) {
static void RescalePlane(uint8_t* plane, int width, int height, int x_stride,
int y_stride, int max) {
const uint32_t factor = (max > 0) ? (255u << 16) / max : 0;
int x, y;
for (y = 0; y < height; ++y) {
@@ -71,9 +71,9 @@ static void RescalePlane(uint8_t* plane, int width, int height,
}
// Return the max absolute difference.
static int DiffScaleChannel(uint8_t* src1, int stride1,
const uint8_t* src2, int stride2,
int x_stride, int w, int h, int do_scaling) {
static int DiffScaleChannel(uint8_t* src1, int stride1, const uint8_t* src2,
int stride2, int x_stride, int w, int h,
int do_scaling) {
int x, y;
int max = 0;
for (y = 0; y < h; ++y) {
@@ -95,7 +95,7 @@ static int DiffScaleChannel(uint8_t* src1, int stride1,
// breaking the library's hidden visibility. This code duplication avoids the
// bigger annoyance of having to open up internal details of libdsp...
#define SSIM_KERNEL 3 // total size of the kernel: 2 * SSIM_KERNEL + 1
#define SSIM_KERNEL 3 // total size of the kernel: 2 * SSIM_KERNEL + 1
// struct for accumulating statistical moments
typedef struct {
@@ -105,19 +105,19 @@ typedef struct {
} DistoStats;
// hat-shaped filter. Sum of coefficients is equal to 16.
static const uint32_t kWeight[2 * SSIM_KERNEL + 1] = { 1, 2, 3, 4, 3, 2, 1 };
static const uint32_t kWeight[2 * SSIM_KERNEL + 1] = {1, 2, 3, 4, 3, 2, 1};
static WEBP_INLINE double SSIMCalculation(const DistoStats* const stats) {
const uint32_t N = stats->w;
const uint32_t w2 = N * N;
const uint32_t w2 = N * N;
const uint32_t C1 = 20 * w2;
const uint32_t C2 = 60 * w2;
const uint32_t C3 = 8 * 8 * w2; // 'dark' limit ~= 6
const uint32_t C3 = 8 * 8 * w2; // 'dark' limit ~= 6
const uint64_t xmxm = (uint64_t)stats->xm * stats->xm;
const uint64_t ymym = (uint64_t)stats->ym * stats->ym;
if (xmxm + ymym >= C3) {
const int64_t xmym = (int64_t)stats->xm * stats->ym;
const int64_t sxy = (int64_t)stats->xym * N - xmym; // can be negative
const int64_t sxy = (int64_t)stats->xym * N - xmym; // can be negative
const uint64_t sxx = (uint64_t)stats->xxm * N - xmxm;
const uint64_t syy = (uint64_t)stats->yym * N - ymym;
// we descale by 8 to prevent overflow during the fnum/fden multiply.
@@ -129,13 +129,13 @@ static WEBP_INLINE double SSIMCalculation(const DistoStats* const stats) {
assert(r >= 0. && r <= 1.0);
return r;
}
return 1.; // area is too dark to contribute meaningfully
return 1.; // area is too dark to contribute meaningfully
}
static double SSIMGetClipped(const uint8_t* src1, int stride1,
const uint8_t* src2, int stride2,
int xo, int yo, int W, int H) {
DistoStats stats = { 0, 0, 0, 0, 0, 0 };
const uint8_t* src2, int stride2, int xo, int yo,
int W, int H) {
DistoStats stats = {0, 0, 0, 0, 0, 0};
const int ymin = (yo - SSIM_KERNEL < 0) ? 0 : yo - SSIM_KERNEL;
const int ymax = (yo + SSIM_KERNEL > H - 1) ? H - 1 : yo + SSIM_KERNEL;
const int xmin = (xo - SSIM_KERNEL < 0) ? 0 : xo - SSIM_KERNEL;
@@ -145,13 +145,13 @@ static double SSIMGetClipped(const uint8_t* src1, int stride1,
src2 += ymin * stride2;
for (y = ymin; y <= ymax; ++y, src1 += stride1, src2 += stride2) {
for (x = xmin; x <= xmax; ++x) {
const uint32_t w = kWeight[SSIM_KERNEL + x - xo]
* kWeight[SSIM_KERNEL + y - yo];
const uint32_t w =
kWeight[SSIM_KERNEL + x - xo] * kWeight[SSIM_KERNEL + y - yo];
const uint32_t s1 = src1[x];
const uint32_t s2 = src2[x];
stats.w += w;
stats.xm += w * s1;
stats.ym += w * s2;
stats.w += w;
stats.xm += w * s1;
stats.ym += w * s2;
stats.xxm += w * s1 * s1;
stats.xym += w * s1 * s2;
stats.yym += w * s2 * s2;
@@ -161,9 +161,9 @@ static double SSIMGetClipped(const uint8_t* src1, int stride1,
}
// Compute SSIM-score map. Return -1 in case of error, max diff otherwise.
static int SSIMScaleChannel(uint8_t* src1, int stride1,
const uint8_t* src2, int stride2,
int x_stride, int w, int h, int do_scaling) {
static int SSIMScaleChannel(uint8_t* src1, int stride1, const uint8_t* src2,
int stride2, int x_stride, int w, int h,
int do_scaling) {
int x, y;
int max = 0;
uint8_t* const plane1 = (uint8_t*)malloc(2 * w * h * sizeof(*plane1));
@@ -205,8 +205,8 @@ static void ConvertToGray(WebPPicture* const pic) {
for (x = 0; x < pic->width; ++x) {
const uint32_t argb = row[x];
const uint32_t r = (argb >> 16) & 0xff;
const uint32_t g = (argb >> 8) & 0xff;
const uint32_t b = (argb >> 0) & 0xff;
const uint32_t g = (argb >> 8) & 0xff;
const uint32_t b = (argb >> 0) & 0xff;
// We use BT.709 for converting to luminance.
const uint32_t Y = (uint32_t)(0.2126 * r + 0.7152 * g + 0.0722 * b + .5);
row[x] = (argb & 0xff000000u) | (Y * 0x010101u);
@@ -297,8 +297,7 @@ int main(int argc, const char* argv[]) {
fprintf(stderr, "Error while computing the distortion.\n");
goto End;
}
printf("%u %.2f %.2f %.2f %.2f %.2f [ %.2f bpp ]\n",
(unsigned int)size1,
printf("%u %.2f %.2f %.2f %.2f %.2f [ %.2f bpp ]\n", (unsigned int)size1,
disto[4], disto[0], disto[1], disto[2], disto[3],
8.f * size1 / pic1.width / pic1.height);
@@ -306,21 +305,25 @@ int main(int argc, const char* argv[]) {
uint8_t* data = NULL;
size_t data_size = 0;
if (pic1.use_argb != pic2.use_argb) {
fprintf(stderr, "Pictures are not in the same argb format. "
"Can't save the difference map.\n");
fprintf(stderr,
"Pictures are not in the same argb format. "
"Can't save the difference map.\n");
goto End;
}
if (pic1.use_argb) {
int n;
fprintf(stderr, "max differences per channel: ");
for (n = 0; n < 3; ++n) { // skip the alpha channel
const int range = (type == 1) ?
SSIMScaleChannel((uint8_t*)pic1.argb + n, pic1.argb_stride * 4,
(const uint8_t*)pic2.argb + n, pic2.argb_stride * 4,
4, pic1.width, pic1.height, scale) :
DiffScaleChannel((uint8_t*)pic1.argb + n, pic1.argb_stride * 4,
(const uint8_t*)pic2.argb + n, pic2.argb_stride * 4,
4, pic1.width, pic1.height, scale);
for (n = 0; n < 3; ++n) { // skip the alpha channel
const int range =
(type == 1)
? SSIMScaleChannel(
(uint8_t*)pic1.argb + n, pic1.argb_stride * 4,
(const uint8_t*)pic2.argb + n, pic2.argb_stride * 4, 4,
pic1.width, pic1.height, scale)
: DiffScaleChannel(
(uint8_t*)pic1.argb + n, pic1.argb_stride * 4,
(const uint8_t*)pic2.argb + n, pic2.argb_stride * 4, 4,
pic1.width, pic1.height, scale);
if (range < 0) fprintf(stderr, "\nError computing diff map\n");
fprintf(stderr, "[%d]", range);
}
@@ -331,10 +334,9 @@ int main(int argc, const char* argv[]) {
goto End;
}
#if !defined(WEBP_REDUCE_CSP)
data_size = WebPEncodeLosslessBGRA((const uint8_t*)pic1.argb,
pic1.width, pic1.height,
pic1.argb_stride * 4,
&data);
data_size =
WebPEncodeLosslessBGRA((const uint8_t*)pic1.argb, pic1.width,
pic1.height, pic1.argb_stride * 4, &data);
if (data_size == 0) {
fprintf(stderr, "Error during lossless encoding.\n");
goto End;
@@ -346,14 +348,15 @@ int main(int argc, const char* argv[]) {
#else
(void)data;
(void)data_size;
fprintf(stderr, "Cannot save the difference map. Please recompile "
"without the WEBP_REDUCE_CSP flag.\n");
fprintf(stderr,
"Cannot save the difference map. Please recompile "
"without the WEBP_REDUCE_CSP flag.\n");
goto End;
#endif // WEBP_REDUCE_CSP
}
ret = EXIT_SUCCESS;
End:
End:
WebPPictureFree(&pic1);
WebPPictureFree(&pic2);
FREE_WARGV_AND_RETURN(ret);

View File

@@ -54,10 +54,10 @@ int VP8EstimateQuality(const uint8_t* const data, size_t size) {
if (data == NULL) return -1;
if (WebPGetFeatures(data, size, &features) != VP8_STATUS_OK) {
return -1; // invalid file
return -1; // invalid file
}
if (features.format == 2) return 101; // lossless
if (features.format == 0 || features.has_animation) return -1; // mixed
if (features.format == 2) return 101; // lossless
if (features.format == 0 || features.has_animation) return -1; // mixed
while (pos < size) {
sig = (sig >> 8) | ((uint64_t)data[pos++] << 40);
@@ -78,29 +78,29 @@ int VP8EstimateQuality(const uint8_t* const data, size_t size) {
GET_BIT(2); // colorspace + clamp type
// Segment header
if (GET_BIT(1)) { // use_segment
if (GET_BIT(1)) { // use_segment
int s;
const int update_map = GET_BIT(1);
if (GET_BIT(1)) { // update data
if (GET_BIT(1)) { // update data
const int absolute_delta = GET_BIT(1);
int q[4] = { 0, 0, 0, 0 };
int q[4] = {0, 0, 0, 0};
for (s = 0; s < 4; ++s) {
if (GET_BIT(1)) {
q[s] = GET_BIT(7);
if (GET_BIT(1)) q[s] = -q[s]; // sign
if (GET_BIT(1)) q[s] = -q[s]; // sign
}
}
if (absolute_delta) Q = q[0]; // just use the first segment's quantizer
for (s = 0; s < 4; ++s) CONDITIONAL_SKIP(7); // filter strength
for (s = 0; s < 4; ++s) CONDITIONAL_SKIP(7); // filter strength
}
if (update_map) {
for (s = 0; s < 3; ++s) CONDITIONAL_SKIP(8);
}
}
// Filter header
GET_BIT(1 + 6 + 3); // simple + level + sharpness
if (GET_BIT(1)) { // use_lf_delta
if (GET_BIT(1)) { // update lf_delta?
GET_BIT(1 + 6 + 3); // simple + level + sharpness
if (GET_BIT(1)) { // use_lf_delta
if (GET_BIT(1)) { // update lf_delta?
int n;
for (n = 0; n < 4 + 4; ++n) CONDITIONAL_SKIP(6);
}

View File

@@ -23,10 +23,10 @@
#if defined(WEBP_HAVE_SDL)
#include "webp_to_sdl.h"
#include "webp/decode.h"
#include "imageio/imageio_util.h"
#include "../examples/unicode.h"
#include "imageio/imageio_util.h"
#include "webp/decode.h"
#include "webp_to_sdl.h"
#if defined(WEBP_HAVE_JUST_SDL_H)
#include <SDL.h>
@@ -41,11 +41,15 @@ static void ProcessEvents(void) {
switch (event.type) {
case SDL_KEYUP:
switch (event.key.keysym.sym) {
case SDLK_q: done = 1; break;
default: break;
case SDLK_q:
done = 1;
break;
default:
break;
}
break;
default: break;
default:
break;
}
}
}
@@ -93,7 +97,7 @@ int main(int argc, char* argv[]) {
}
ok = 1;
Error:
Error:
SDL_Quit();
FREE_WARGV_AND_RETURN(ok ? EXIT_SUCCESS : EXIT_FAILURE);
}

View File

@@ -12,9 +12,9 @@
#include <string.h>
#include "../examples/unicode.h"
#include "src/webp/types.h"
#include "extras/extras.h"
#include "imageio/imageio_util.h"
#include "src/webp/types.h"
// Returns EXIT_SUCCESS on success, EXIT_FAILURE on failure.
int main(int argc, const char* argv[]) {
@@ -46,7 +46,7 @@ int main(int argc, const char* argv[]) {
if (!quiet) {
printf("Estimated quality factor: %d\n", q);
} else {
printf("%d\n", q); // just print the number
printf("%d\n", q); // just print the number
}
}
free((void*)data);

View File

@@ -17,11 +17,10 @@
#if defined(WEBP_HAVE_SDL)
#include "webp_to_sdl.h"
#include <stdio.h>
#include "src/webp/decode.h"
#include "webp_to_sdl.h"
#if defined(WEBP_HAVE_JUST_SDL_H)
#include <SDL.h>
@@ -67,11 +66,11 @@ int WebPToSDL(const char* data, unsigned int data_size) {
}
#if SDL_BYTEORDER == SDL_BIG_ENDIAN
output = WebPDecodeBGRA((const uint8_t*)data, (size_t)data_size, &width,
&height);
output =
WebPDecodeBGRA((const uint8_t*)data, (size_t)data_size, &width, &height);
#else
output = WebPDecodeRGBA((const uint8_t*)data, (size_t)data_size, &width,
&height);
output =
WebPDecodeRGBA((const uint8_t*)data, (size_t)data_size, &width, &height);
#endif
if (output == NULL) {
fprintf(stderr, "Error decoding image (%d)\n", status);
@@ -84,7 +83,7 @@ int WebPToSDL(const char* data, unsigned int data_size) {
SDL_RenderPresent(renderer);
ok = 1;
Error:
Error:
// We should call SDL_DestroyWindow(window) but that makes .js fail.
SDL_DestroyRenderer(renderer);
SDL_DestroyTexture(texture);

View File

@@ -9,9 +9,10 @@
//
// Generic image-type guessing.
#include "./image_dec.h"
#include <stddef.h>
#include "./image_dec.h"
#include "./metadata.h"
#include "webp/encode.h"
#include "webp/types.h"
@@ -62,8 +63,8 @@ WebPInputFileFormat WebPGuessImageType(const uint8_t* const data,
}
static int FailReader(const uint8_t* const data, size_t data_size,
struct WebPPicture* const pic,
int keep_alpha, struct Metadata* const metadata) {
struct WebPPicture* const pic, int keep_alpha,
struct Metadata* const metadata) {
(void)data;
(void)data_size;
(void)pic;
@@ -74,12 +75,18 @@ static int FailReader(const uint8_t* const data, size_t data_size,
WebPImageReader WebPGetImageReader(WebPInputFileFormat format) {
switch (format) {
case WEBP_PNG_FORMAT: return ReadPNG;
case WEBP_JPEG_FORMAT: return ReadJPEG;
case WEBP_TIFF_FORMAT: return ReadTIFF;
case WEBP_WEBP_FORMAT: return ReadWebP;
case WEBP_PNM_FORMAT: return ReadPNM;
default: return FailReader;
case WEBP_PNG_FORMAT:
return ReadPNG;
case WEBP_JPEG_FORMAT:
return ReadJPEG;
case WEBP_TIFF_FORMAT:
return ReadTIFF;
case WEBP_WEBP_FORMAT:
return ReadWebP;
case WEBP_PNM_FORMAT:
return ReadPNM;
default:
return FailReader;
}
}

View File

@@ -22,8 +22,8 @@
#include "webp/config.h"
#endif
#include "./metadata.h"
#include "./jpegdec.h"
#include "./metadata.h"
#include "./pngdec.h"
#include "./pnmdec.h"
#include "./tiffdec.h"
@@ -53,8 +53,8 @@ WebPInputFileFormat WebPGuessImageType(const uint8_t* const data,
// Signature for common image-reading functions (ReadPNG, ReadJPEG, ...)
typedef int (*WebPImageReader)(const uint8_t* const data, size_t data_size,
struct WebPPicture* const pic,
int keep_alpha, struct Metadata* const metadata);
struct WebPPicture* const pic, int keep_alpha,
struct Metadata* const metadata);
// Return the reader associated to a given file format.
WebPImageReader WebPGetImageReader(WebPInputFileFormat format);
@@ -66,7 +66,7 @@ WebPImageReader WebPGuessImageReader(const uint8_t* const data,
size_t data_size);
#ifdef __cplusplus
} // extern "C"
} // extern "C"
#endif
#endif // WEBP_IMAGEIO_IMAGE_DEC_H_

View File

@@ -17,7 +17,7 @@
#ifdef WEBP_HAVE_PNG
#include <png.h>
#include <setjmp.h> // note: this must be included *after* png.h
#include <setjmp.h> // note: this must be included *after* png.h
#endif
#ifdef HAVE_WINCODEC_H
@@ -26,13 +26,14 @@
#endif
#define CINTERFACE
#define COBJMACROS
#define _WIN32_IE 0x500 // Workaround bug in shlwapi.h when compiling C++
// code with COBJMACROS.
#define _WIN32_IE \
0x500 // Workaround bug in shlwapi.h when compiling C++
// code with COBJMACROS.
#include <ole2.h> // CreateStreamOnHGlobal()
#include <shlwapi.h>
#include <tchar.h>
#include <windows.h>
#include <wincodec.h>
#include <windows.h>
#endif
#include "../examples/unicode.h"
@@ -45,12 +46,12 @@
#ifdef HAVE_WINCODEC_H
#define IFS(fn) \
do { \
if (SUCCEEDED(hr)) { \
hr = (fn); \
if (FAILED(hr)) fprintf(stderr, #fn " failed %08lx\n", hr); \
} \
#define IFS(fn) \
do { \
if (SUCCEEDED(hr)) { \
hr = (fn); \
if (FAILED(hr)) fprintf(stderr, #fn " failed %08lx\n", hr); \
} \
} while (0)
#ifdef __cplusplus
@@ -59,8 +60,8 @@
#define MAKE_REFGUID(x) &(x)
#endif
static HRESULT CreateOutputStream(const char* out_file_name,
int write_to_mem, IStream** stream) {
static HRESULT CreateOutputStream(const char* out_file_name, int write_to_mem,
IStream** stream) {
HRESULT hr = S_OK;
if (write_to_mem) {
// Output to a memory buffer. This is freed when 'stream' is released.
@@ -77,24 +78,22 @@ static HRESULT CreateOutputStream(const char* out_file_name,
}
static HRESULT WriteUsingWIC(const char* out_file_name, int use_stdout,
REFGUID container_guid,
uint8_t* rgb, int stride,
REFGUID container_guid, uint8_t* rgb, int stride,
uint32_t width, uint32_t height, int has_alpha) {
HRESULT hr = S_OK;
IWICImagingFactory* factory = NULL;
IWICBitmapFrameEncode* frame = NULL;
IWICBitmapEncoder* encoder = NULL;
IStream* stream = NULL;
WICPixelFormatGUID pixel_format = has_alpha ? GUID_WICPixelFormat32bppBGRA
: GUID_WICPixelFormat24bppBGR;
WICPixelFormatGUID pixel_format =
has_alpha ? GUID_WICPixelFormat32bppBGRA : GUID_WICPixelFormat24bppBGR;
if (out_file_name == NULL || rgb == NULL) return E_INVALIDARG;
IFS(CoInitialize(NULL));
IFS(CoCreateInstance(MAKE_REFGUID(CLSID_WICImagingFactory), NULL,
CLSCTX_INPROC_SERVER,
MAKE_REFGUID(IID_IWICImagingFactory),
(LPVOID*)&factory));
IFS(CoCreateInstance(
MAKE_REFGUID(CLSID_WICImagingFactory), NULL, CLSCTX_INPROC_SERVER,
MAKE_REFGUID(IID_IWICImagingFactory), (LPVOID*)&factory));
if (hr == REGDB_E_CLASSNOTREG) {
fprintf(stderr,
"Couldn't access Windows Imaging Component (are you running "
@@ -104,14 +103,13 @@ static HRESULT WriteUsingWIC(const char* out_file_name, int use_stdout,
IFS(CreateOutputStream(out_file_name, use_stdout, &stream));
IFS(IWICImagingFactory_CreateEncoder(factory, container_guid, NULL,
&encoder));
IFS(IWICBitmapEncoder_Initialize(encoder, stream,
WICBitmapEncoderNoCache));
IFS(IWICBitmapEncoder_Initialize(encoder, stream, WICBitmapEncoderNoCache));
IFS(IWICBitmapEncoder_CreateNewFrame(encoder, &frame, NULL));
IFS(IWICBitmapFrameEncode_Initialize(frame, NULL));
IFS(IWICBitmapFrameEncode_SetSize(frame, width, height));
IFS(IWICBitmapFrameEncode_SetPixelFormat(frame, &pixel_format));
IFS(IWICBitmapFrameEncode_WritePixels(frame, height, stride,
height * stride, rgb));
IFS(IWICBitmapFrameEncode_WritePixels(frame, height, stride, height * stride,
rgb));
IFS(IWICBitmapFrameEncode_Commit(frame));
IFS(IWICBitmapEncoder_Commit(encoder));
@@ -153,11 +151,11 @@ int WebPWritePNG(const char* out_file_name, int use_stdout,
const int has_alpha = WebPIsAlphaMode(buffer->colorspace);
return SUCCEEDED(WriteUsingWIC(out_file_name, use_stdout,
MAKE_REFGUID(GUID_ContainerFormatPng),
rgb, stride, width, height, has_alpha));
MAKE_REFGUID(GUID_ContainerFormatPng), rgb,
stride, width, height, has_alpha));
}
#elif defined(WEBP_HAVE_PNG) // !HAVE_WINCODEC_H
#elif defined(WEBP_HAVE_PNG) // !HAVE_WINCODEC_H
static void PNGAPI PNGErrorFunction(png_structp png, png_const_charp unused) {
(void)unused; // remove variable-unused warning
longjmp(png_jmpbuf(png), 1);
@@ -169,8 +167,8 @@ int WebPWritePNG(FILE* out_file, const WebPDecBuffer* const buffer) {
if (out_file == NULL || buffer == NULL) return 0;
png = png_create_write_struct(PNG_LIBPNG_VER_STRING,
NULL, PNGErrorFunction, NULL);
png = png_create_write_struct(PNG_LIBPNG_VER_STRING, NULL, PNGErrorFunction,
NULL);
if (png == NULL) {
return 0;
}
@@ -206,11 +204,12 @@ int WebPWritePNG(FILE* out_file, const WebPDecBuffer* const buffer) {
png_destroy_write_struct((png_structpp)&png, (png_infopp)&info);
return 1;
}
#else // !HAVE_WINCODEC_H && !WEBP_HAVE_PNG
#else // !HAVE_WINCODEC_H && !WEBP_HAVE_PNG
int WebPWritePNG(FILE* fout, const WebPDecBuffer* const buffer) {
if (fout == NULL || buffer == NULL) return 0;
fprintf(stderr, "PNG support not compiled. Please install the libpng "
fprintf(stderr,
"PNG support not compiled. Please install the libpng "
"development package before building.\n");
fprintf(stderr, "You can run with -ppm flag to decode in PPM format.\n");
return 0;
@@ -235,8 +234,10 @@ static int WritePPMPAM(FILE* fout, const WebPDecBuffer* const buffer,
if (row == NULL) return 0;
if (alpha) {
fprintf(fout, "P7\nWIDTH %u\nHEIGHT %u\nDEPTH 4\nMAXVAL 255\n"
"TUPLTYPE RGB_ALPHA\nENDHDR\n", width, height);
fprintf(fout,
"P7\nWIDTH %u\nHEIGHT %u\nDEPTH 4\nMAXVAL 255\n"
"TUPLTYPE RGB_ALPHA\nENDHDR\n",
width, height);
} else {
fprintf(fout, "P6\n%u %u\n255\n", width, height);
}
@@ -297,7 +298,7 @@ static void PutLE16(uint8_t* const dst, uint32_t value) {
}
static void PutLE32(uint8_t* const dst, uint32_t value) {
PutLE16(dst + 0, (value >> 0) & 0xffff);
PutLE16(dst + 0, (value >> 0) & 0xffff);
PutLE16(dst + 2, (value >> 16) & 0xffff);
}
@@ -310,7 +311,7 @@ int WebPWriteBMP(FILE* fout, const WebPDecBuffer* const buffer) {
int stride;
uint32_t y;
uint32_t bytes_per_px, line_size, image_size, bmp_stride, total_size;
uint8_t bmp_header[BMP_HEADER_SIZE + BMP_HEADER_ALPHA_EXTRA_SIZE] = { 0 };
uint8_t bmp_header[BMP_HEADER_SIZE + BMP_HEADER_ALPHA_EXTRA_SIZE] = {0};
if (fout == NULL || buffer == NULL) return 0;
@@ -329,27 +330,27 @@ int WebPWriteBMP(FILE* fout, const WebPDecBuffer* const buffer) {
if (rgba == NULL) return 0;
// bitmap file header
PutLE16(bmp_header + 0, 0x4d42); // signature 'BM'
PutLE32(bmp_header + 2, total_size); // size including header
PutLE32(bmp_header + 6, 0); // reserved
PutLE32(bmp_header + 10, header_size); // offset to pixel array
PutLE16(bmp_header + 0, 0x4d42); // signature 'BM'
PutLE32(bmp_header + 2, total_size); // size including header
PutLE32(bmp_header + 6, 0); // reserved
PutLE32(bmp_header + 10, header_size); // offset to pixel array
// bitmap info header
PutLE32(bmp_header + 14, header_size - 14); // DIB header size
PutLE32(bmp_header + 18, width); // dimensions
PutLE32(bmp_header + 22, height); // no vertical flip
PutLE16(bmp_header + 26, 1); // number of planes
PutLE16(bmp_header + 28, bytes_per_px * 8); // bits per pixel
PutLE32(bmp_header + 30, has_alpha ? 3 : 0); // BI_BITFIELDS or BI_RGB
PutLE32(bmp_header + 14, header_size - 14); // DIB header size
PutLE32(bmp_header + 18, width); // dimensions
PutLE32(bmp_header + 22, height); // no vertical flip
PutLE16(bmp_header + 26, 1); // number of planes
PutLE16(bmp_header + 28, bytes_per_px * 8); // bits per pixel
PutLE32(bmp_header + 30, has_alpha ? 3 : 0); // BI_BITFIELDS or BI_RGB
PutLE32(bmp_header + 34, image_size);
PutLE32(bmp_header + 38, 2400); // x pixels/meter
PutLE32(bmp_header + 42, 2400); // y pixels/meter
PutLE32(bmp_header + 46, 0); // number of palette colors
PutLE32(bmp_header + 50, 0); // important color count
if (has_alpha) { // BITMAPV3INFOHEADER complement
PutLE32(bmp_header + 54, 0x00ff0000); // red mask
PutLE32(bmp_header + 58, 0x0000ff00); // green mask
PutLE32(bmp_header + 62, 0x000000ff); // blue mask
PutLE32(bmp_header + 66, 0xff000000); // alpha mask
PutLE32(bmp_header + 38, 2400); // x pixels/meter
PutLE32(bmp_header + 42, 2400); // y pixels/meter
PutLE32(bmp_header + 46, 0); // number of palette colors
PutLE32(bmp_header + 50, 0); // important color count
if (has_alpha) { // BITMAPV3INFOHEADER complement
PutLE32(bmp_header + 54, 0x00ff0000); // red mask
PutLE32(bmp_header + 58, 0x0000ff00); // green mask
PutLE32(bmp_header + 62, 0x000000ff); // blue mask
PutLE32(bmp_header + 66, 0xff000000); // alpha mask
}
// TODO(skal): color profile
@@ -367,7 +368,7 @@ int WebPWriteBMP(FILE* fout, const WebPDecBuffer* const buffer) {
}
// write padding zeroes
if (bmp_stride != line_size) {
const uint8_t zeroes[3] = { 0 };
const uint8_t zeroes[3] = {0};
if (fwrite(zeroes, bmp_stride - line_size, 1, fout) != 1) {
return 0;
}
@@ -397,35 +398,35 @@ int WebPWriteTIFF(FILE* fout, const WebPDecBuffer* const buffer) {
// For non-alpha case, we omit tag 0x152 (ExtraSamples).
const uint8_t num_ifd_entries = 0;
uint8_t tiff_header[TIFF_HEADER_SIZE] = {
0x49, 0x49, 0x2a, 0x00, // little endian signature
8, 0, 0, 0, // offset to the unique IFD that follows
// IFD (offset = 8). Entries must be written in increasing tag order.
num_ifd_entries, 0, // Number of entries in the IFD (12 bytes each).
0x00, 0x01, 3, 0, 1, 0, 0, 0, 0, 0, 0, 0, // 10: Width (TBD)
0x01, 0x01, 3, 0, 1, 0, 0, 0, 0, 0, 0, 0, // 22: Height (TBD)
0x02, 0x01, 3, 0, bytes_per_px, 0, 0, 0, // 34: BitsPerSample: 8888
EXTRA_DATA_OFFSET + 0, 0, 0, 0,
0x03, 0x01, 3, 0, 1, 0, 0, 0, 1, 0, 0, 0, // 46: Compression: none
0x06, 0x01, 3, 0, 1, 0, 0, 0, 2, 0, 0, 0, // 58: Photometric: RGB
0x11, 0x01, 4, 0, 1, 0, 0, 0, // 70: Strips offset:
TIFF_HEADER_SIZE, 0, 0, 0, // data follows header
0x12, 0x01, 3, 0, 1, 0, 0, 0, 1, 0, 0, 0, // 82: Orientation: topleft
0x15, 0x01, 3, 0, 1, 0, 0, 0, // 94: SamplesPerPixels
bytes_per_px, 0, 0, 0,
0x16, 0x01, 3, 0, 1, 0, 0, 0, 0, 0, 0, 0, // 106: Rows per strip (TBD)
0x17, 0x01, 4, 0, 1, 0, 0, 0, 0, 0, 0, 0, // 118: StripByteCount (TBD)
0x1a, 0x01, 5, 0, 1, 0, 0, 0, // 130: X-resolution
EXTRA_DATA_OFFSET + 8, 0, 0, 0,
0x1b, 0x01, 5, 0, 1, 0, 0, 0, // 142: Y-resolution
EXTRA_DATA_OFFSET + 8, 0, 0, 0,
0x1c, 0x01, 3, 0, 1, 0, 0, 0, 1, 0, 0, 0, // 154: PlanarConfiguration
0x28, 0x01, 3, 0, 1, 0, 0, 0, 2, 0, 0, 0, // 166: ResolutionUnit (inch)
0x52, 0x01, 3, 0, 1, 0, 0, 0,
assoc_alpha, 0, 0, 0, // 178: ExtraSamples: rgbA/RGBA
0, 0, 0, 0, // 190: IFD terminator
// EXTRA_DATA_OFFSET:
8, 0, 8, 0, 8, 0, 8, 0, // BitsPerSample
72, 0, 0, 0, 1, 0, 0, 0 // 72 pixels/inch, for X/Y-resolution
0x49, 0x49, 0x2a, 0x00, // little endian signature
8, 0, 0, 0, // offset to the unique IFD that follows
// IFD (offset = 8). Entries must be written in increasing tag order.
num_ifd_entries, 0, // Number of entries in the IFD (12 bytes each).
0x00, 0x01, 3, 0, 1, 0, 0, 0, 0, 0, 0, 0, // 10: Width (TBD)
0x01, 0x01, 3, 0, 1, 0, 0, 0, 0, 0, 0, 0, // 22: Height (TBD)
0x02, 0x01, 3, 0, bytes_per_px, 0, 0, 0, // 34: BitsPerSample: 8888
EXTRA_DATA_OFFSET + 0, 0, 0, 0, 0x03, 0x01, 3, 0, 1, 0, 0, 0, 1, 0, 0,
0, // 46: Compression: none
0x06, 0x01, 3, 0, 1, 0, 0, 0, 2, 0, 0, 0, // 58: Photometric: RGB
0x11, 0x01, 4, 0, 1, 0, 0, 0, // 70: Strips offset:
TIFF_HEADER_SIZE, 0, 0, 0, // data follows header
0x12, 0x01, 3, 0, 1, 0, 0, 0, 1, 0, 0, 0, // 82: Orientation: topleft
0x15, 0x01, 3, 0, 1, 0, 0, 0, // 94: SamplesPerPixels
bytes_per_px, 0, 0, 0, 0x16, 0x01, 3, 0, 1, 0, 0, 0, 0, 0, 0,
0, // 106: Rows per strip (TBD)
0x17, 0x01, 4, 0, 1, 0, 0, 0, 0, 0, 0, 0, // 118: StripByteCount (TBD)
0x1a, 0x01, 5, 0, 1, 0, 0, 0, // 130: X-resolution
EXTRA_DATA_OFFSET + 8, 0, 0, 0, 0x1b, 0x01, 5, 0, 1, 0, 0,
0, // 142: Y-resolution
EXTRA_DATA_OFFSET + 8, 0, 0, 0, 0x1c, 0x01, 3, 0, 1, 0, 0, 0, 1, 0, 0,
0, // 154: PlanarConfiguration
0x28, 0x01, 3, 0, 1, 0, 0, 0, 2, 0, 0, 0, // 166: ResolutionUnit (inch)
0x52, 0x01, 3, 0, 1, 0, 0, 0, assoc_alpha, 0, 0,
0, // 178: ExtraSamples: rgbA/RGBA
0, 0, 0, 0, // 190: IFD terminator
// EXTRA_DATA_OFFSET:
8, 0, 8, 0, 8, 0, 8, 0, // BitsPerSample
72, 0, 0, 0, 1, 0, 0, 0 // 72 pixels/inch, for X/Y-resolution
};
uint32_t y;
@@ -517,11 +518,11 @@ int WebPWritePGM(FILE* fout, const WebPDecBuffer* const buffer) {
if (src_y == NULL || src_u == NULL || src_v == NULL) return 0;
fprintf(fout, "P5\n%d %d\n255\n",
(width + 1) & ~1, height + uv_height + a_height);
fprintf(fout, "P5\n%d %d\n255\n", (width + 1) & ~1,
height + uv_height + a_height);
for (y = 0; ok && y < height; ++y) {
ok &= (fwrite(src_y, width, 1, fout) == 1);
if (width & 1) fputc(0, fout); // padding byte
if (width & 1) fputc(0, fout); // padding byte
src_y += yuv->y_stride;
}
for (y = 0; ok && y < uv_height; ++y) {
@@ -532,7 +533,7 @@ int WebPWritePGM(FILE* fout, const WebPDecBuffer* const buffer) {
}
for (y = 0; ok && y < a_height; ++y) {
ok &= (fwrite(src_a, width, 1, fout) == 1);
if (width & 1) fputc(0, fout); // padding byte
if (width & 1) fputc(0, fout); // padding byte
src_a += yuv->a_stride;
}
return ok;
@@ -609,8 +610,7 @@ int WebPSaveImage(const WebPDecBuffer* const buffer,
}
}
if (format == PNG ||
format == RGBA || format == BGRA || format == ARGB ||
if (format == PNG || format == RGBA || format == BGRA || format == ARGB ||
format == rgbA || format == bgrA || format == Argb) {
#ifdef HAVE_WINCODEC_H
ok &= WebPWritePNG(out_file_name, use_stdout, buffer);

View File

@@ -20,8 +20,8 @@
#include "webp/config.h"
#endif
#include "webp/types.h"
#include "webp/decode.h"
#include "webp/types.h"
#ifdef __cplusplus
extern "C" {
@@ -38,10 +38,19 @@ typedef enum {
RAW_YUV,
ALPHA_PLANE_ONLY, // this is for experimenting only
// forced colorspace output (for testing, mostly)
RGB, RGBA, BGR, BGRA, ARGB,
RGBA_4444, RGB_565,
rgbA, bgrA, Argb, rgbA_4444,
YUV, YUVA
RGB,
RGBA,
BGR,
BGRA,
ARGB,
RGBA_4444,
RGB_565,
rgbA,
bgrA,
Argb,
rgbA_4444,
YUV,
YUVA
} WebPOutputFileFormat;
// General all-purpose call.
@@ -90,7 +99,7 @@ int WebPWriteYUV(FILE* fout, const struct WebPDecBuffer* const buffer);
int WebPWrite16bAsPGM(FILE* fout, const struct WebPDecBuffer* const buffer);
#ifdef __cplusplus
} // extern "C"
} // extern "C"
#endif
#endif // WEBP_IMAGEIO_IMAGE_ENC_H_

View File

@@ -13,15 +13,15 @@
#include "./imageio_util.h"
#if defined(_WIN32)
#include <fcntl.h> // for _O_BINARY
#include <io.h> // for _setmode()
#include <fcntl.h> // for _O_BINARY
#include <io.h> // for _setmode()
#endif
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "webp/types.h"
#include "../examples/unicode.h"
#include "webp/types.h"
// -----------------------------------------------------------------------------
// File I/O
@@ -65,14 +65,14 @@ int ImgIoUtilReadFromStdin(const uint8_t** data, size_t* data_size) {
*data_size = size;
return 1;
Error:
Error:
free(input);
fprintf(stderr, "Could not read from stdin\n");
return 0;
}
int ImgIoUtilReadFile(const char* const file_name,
const uint8_t** data, size_t* data_size) {
int ImgIoUtilReadFile(const char* const file_name, const uint8_t** data,
size_t* data_size) {
int ok;
uint8_t* file_data;
size_t file_size;
@@ -123,8 +123,8 @@ int ImgIoUtilReadFile(const char* const file_name,
// -----------------------------------------------------------------------------
int ImgIoUtilWriteFile(const char* const file_name,
const uint8_t* data, size_t data_size) {
int ImgIoUtilWriteFile(const char* const file_name, const uint8_t* data,
size_t data_size) {
int ok;
FILE* out;
const int to_stdout = (file_name == NULL) || !WSTRCMP(file_name, "-");
@@ -145,8 +145,8 @@ int ImgIoUtilWriteFile(const char* const file_name,
// -----------------------------------------------------------------------------
void ImgIoUtilCopyPlane(const uint8_t* src, int src_stride,
uint8_t* dst, int dst_stride, int width, int height) {
void ImgIoUtilCopyPlane(const uint8_t* src, int src_stride, uint8_t* dst,
int dst_stride, int width, int height) {
while (height-- > 0) {
memcpy(dst, src, width * sizeof(*dst));
src += src_stride;

View File

@@ -36,22 +36,22 @@ FILE* ImgIoUtilSetBinaryMode(FILE* file);
// to be used as a C-string.
// If 'file_name' is NULL or equal to "-", input is read from stdin by calling
// the function ImgIoUtilReadFromStdin().
int ImgIoUtilReadFile(const char* const file_name,
const uint8_t** data, size_t* data_size);
int ImgIoUtilReadFile(const char* const file_name, const uint8_t** data,
size_t* data_size);
// Same as ImgIoUtilReadFile(), but reads until EOF from stdin instead.
int ImgIoUtilReadFromStdin(const uint8_t** data, size_t* data_size);
// Write a data segment into a file named 'file_name'. Returns true if ok.
// If 'file_name' is NULL or equal to "-", output is written to stdout.
int ImgIoUtilWriteFile(const char* const file_name,
const uint8_t* data, size_t data_size);
int ImgIoUtilWriteFile(const char* const file_name, const uint8_t* data,
size_t data_size);
//------------------------------------------------------------------------------
// Copy width x height pixels from 'src' to 'dst' honoring the strides.
void ImgIoUtilCopyPlane(const uint8_t* src, int src_stride,
uint8_t* dst, int dst_stride, int width, int height);
void ImgIoUtilCopyPlane(const uint8_t* src, int src_stride, uint8_t* dst,
int dst_stride, int width, int height);
//------------------------------------------------------------------------------
@@ -59,7 +59,7 @@ void ImgIoUtilCopyPlane(const uint8_t* src, int src_stride,
int ImgIoUtilCheckSizeArgumentsOverflow(uint64_t stride, size_t height);
#ifdef __cplusplus
} // extern "C"
} // extern "C"
#endif
#endif // WEBP_IMAGEIO_IMAGEIO_UTIL_H_

View File

@@ -18,8 +18,8 @@
#include <stdio.h>
#ifdef WEBP_HAVE_JPEG
#include <jpeglib.h>
#include <jerror.h>
#include <jpeglib.h>
#include <setjmp.h>
#include <stdlib.h>
#include <string.h>
@@ -33,10 +33,10 @@
// Metadata processing
#ifndef JPEG_APP1
# define JPEG_APP1 (JPEG_APP0 + 1)
#define JPEG_APP1 (JPEG_APP0 + 1)
#endif
#ifndef JPEG_APP2
# define JPEG_APP2 (JPEG_APP0 + 2)
#define JPEG_APP2 (JPEG_APP0 + 2)
#endif
typedef struct {
@@ -64,7 +64,7 @@ static int StoreICCP(j_decompress_ptr dinfo, MetadataPayload* const iccp) {
// ICC.1:2010-12 (4.3.0.0) Annex B.4 Embedding ICC Profiles in JPEG files
static const char kICCPSignature[] = "ICC_PROFILE";
static const size_t kICCPSignatureLength = 12; // signature includes '\0'
static const size_t kICCPSkipLength = 14; // signature + seq & count
static const size_t kICCPSkipLength = 14; // signature + seq & count
int expected_count = 0;
int actual_count = 0;
int seq_max = 0;
@@ -74,8 +74,7 @@ static int StoreICCP(j_decompress_ptr dinfo, MetadataPayload* const iccp) {
memset(iccp_segments, 0, sizeof(iccp_segments));
for (marker = dinfo->marker_list; marker != NULL; marker = marker->next) {
if (marker->marker == JPEG_APP2 &&
marker->data_length > kICCPSkipLength &&
if (marker->marker == JPEG_APP2 && marker->data_length > kICCPSkipLength &&
!memcmp(marker->data, kICCPSignature, kICCPSignatureLength)) {
// ICC_PROFILE\0<seq><count>; 'seq' starts at 1.
const int seq = marker->data[kICCPSignatureLength];
@@ -84,8 +83,9 @@ static int StoreICCP(j_decompress_ptr dinfo, MetadataPayload* const iccp) {
ICCPSegment* segment;
if (segment_size == 0 || count == 0 || seq == 0) {
fprintf(stderr, "[ICCP] size (%d) / count (%d) / sequence number (%d)"
" cannot be 0!\n",
fprintf(stderr,
"[ICCP] size (%d) / count (%d) / sequence number (%d)"
" cannot be 0!\n",
(int)segment_size, seq, count);
return 0;
}
@@ -100,7 +100,7 @@ static int StoreICCP(j_decompress_ptr dinfo, MetadataPayload* const iccp) {
segment = iccp_segments + seq - 1;
if (segment->data_length != 0) {
fprintf(stderr, "[ICCP] Duplicate segment number (%d)!\n" , seq);
fprintf(stderr, "[ICCP] Duplicate segment number (%d)!\n", seq);
return 0;
}
@@ -138,8 +138,8 @@ static int StoreICCP(j_decompress_ptr dinfo, MetadataPayload* const iccp) {
int i;
size_t offset = 0;
for (i = 0; i < seq_max; ++i) {
memcpy(iccp->bytes + offset,
iccp_segments[i].data, iccp_segments[i].data_length);
memcpy(iccp->bytes + offset, iccp_segments[i].data,
iccp_segments[i].data_length);
offset += iccp_segments[i].data_length;
}
}
@@ -156,12 +156,12 @@ static int ExtractMetadataFromJPEG(j_decompress_ptr dinfo,
size_t signature_length;
size_t storage_offset;
} kJPEGMetadataMap[] = {
// Exif 2.2 Section 4.7.2 Interoperability Structure of APP1 ...
{ JPEG_APP1, "Exif\0", 6, METADATA_OFFSET(exif) },
// XMP Specification Part 3 Section 3 Embedding XMP Metadata ... #JPEG
// TODO(jzern) Add support for 'ExtendedXMP'
{ JPEG_APP1, "http://ns.adobe.com/xap/1.0/", 29, METADATA_OFFSET(xmp) },
{ 0, NULL, 0, 0 },
// Exif 2.2 Section 4.7.2 Interoperability Structure of APP1 ...
{JPEG_APP1, "Exif\0", 6, METADATA_OFFSET(exif)},
// XMP Specification Part 3 Section 3 Embedding XMP Metadata ... #JPEG
// TODO(jzern) Add support for 'ExtendedXMP'
{JPEG_APP1, "http://ns.adobe.com/xap/1.0/", 29, METADATA_OFFSET(xmp)},
{0, NULL, 0, 0},
};
jpeg_saved_marker_ptr marker;
// Treat ICC profiles separately as they may be segmented and out of order.
@@ -179,8 +179,8 @@ static int ExtractMetadataFromJPEG(j_decompress_ptr dinfo,
kJPEGMetadataMap[i].storage_offset);
if (payload->bytes == NULL) {
const char* marker_data = (const char*)marker->data +
kJPEGMetadataMap[i].signature_length;
const char* marker_data =
(const char*)marker->data + kJPEGMetadataMap[i].signature_length;
const size_t marker_data_length =
marker->data_length - kJPEGMetadataMap[i].signature_length;
if (!MetadataCopy(marker_data, marker_data_length, payload)) return 0;
@@ -250,9 +250,7 @@ static void ContextSkip(j_decompress_ptr cinfo, long jump_size) {
ctx->pub.next_input_byte += jump;
}
static void ContextTerm(j_decompress_ptr cinfo) {
(void)cinfo;
}
static void ContextTerm(j_decompress_ptr cinfo) { (void)cinfo; }
static void ContextSetup(volatile struct jpeg_decompress_struct* const cinfo,
JPEGReadContext* const ctx) {
@@ -267,8 +265,7 @@ static void ContextSetup(volatile struct jpeg_decompress_struct* const cinfo,
}
int ReadJPEG(const uint8_t* const data, size_t data_size,
WebPPicture* const pic, int keep_alpha,
Metadata* const metadata) {
WebPPicture* const pic, int keep_alpha, Metadata* const metadata) {
volatile int ok = 0;
int width, height;
int64_t stride;
@@ -285,12 +282,12 @@ int ReadJPEG(const uint8_t* const data, size_t data_size,
ctx.data = data;
ctx.data_size = data_size;
memset((j_decompress_ptr)&dinfo, 0, sizeof(dinfo)); // for setjmp safety
memset((j_decompress_ptr)&dinfo, 0, sizeof(dinfo)); // for setjmp safety
dinfo.err = jpeg_std_error(&jerr.pub);
jerr.pub.error_exit = my_error_exit;
if (setjmp(jerr.setjmp_buffer)) {
Error:
Error:
MetadataFree(metadata);
jpeg_destroy_decompress((j_decompress_ptr)&dinfo);
goto End;
@@ -353,11 +350,11 @@ int ReadJPEG(const uint8_t* const data, size_t data_size,
MetadataFree(metadata); // In case the caller forgets to free it on error.
}
End:
End:
free(rgb);
return ok;
}
#else // !WEBP_HAVE_JPEG
#else // !WEBP_HAVE_JPEG
int ReadJPEG(const uint8_t* const data, size_t data_size,
struct WebPPicture* const pic, int keep_alpha,
struct Metadata* const metadata) {
@@ -366,7 +363,8 @@ int ReadJPEG(const uint8_t* const data, size_t data_size,
(void)pic;
(void)keep_alpha;
(void)metadata;
fprintf(stderr, "JPEG support not compiled. Please install the libjpeg "
fprintf(stderr,
"JPEG support not compiled. Please install the libjpeg "
"development package before building.\n");
return 0;
}

View File

@@ -33,7 +33,7 @@ int ReadJPEG(const uint8_t* const data, size_t data_size,
struct Metadata* const metadata);
#ifdef __cplusplus
} // extern "C"
} // extern "C"
#endif
#endif // WEBP_IMAGEIO_JPEGDEC_H_

View File

@@ -43,7 +43,7 @@ int MetadataCopy(const char* metadata, size_t metadata_len,
MetadataPayload* const payload);
#ifdef __cplusplus
} // extern "C"
} // extern "C"
#endif
#endif // WEBP_IMAGEIO_METADATA_H_

View File

@@ -22,8 +22,7 @@
#define PNG_USER_MEM_SUPPORTED // for png_create_read_struct_2
#endif
#include <png.h>
#include <setjmp.h> // note: this must be included *after* png.h
#include <setjmp.h> // note: this must be included *after* png.h
#include <stdlib.h>
#include <string.h>
@@ -33,15 +32,14 @@
#include "webp/types.h"
#define LOCAL_PNG_VERSION ((PNG_LIBPNG_VER_MAJOR << 8) | PNG_LIBPNG_VER_MINOR)
#define LOCAL_PNG_PREREQ(maj, min) \
(LOCAL_PNG_VERSION >= (((maj) << 8) | (min)))
#define LOCAL_PNG_PREREQ(maj, min) (LOCAL_PNG_VERSION >= (((maj) << 8) | (min)))
static void PNGAPI error_function(png_structp png, png_const_charp error) {
if (error != NULL) fprintf(stderr, "libpng error: %s\n", error);
longjmp(png_jmpbuf(png), 1);
}
#if LOCAL_PNG_PREREQ(1,4)
#if LOCAL_PNG_PREREQ(1, 4)
typedef png_alloc_size_t LocalPngAllocSize;
#else
typedef png_size_t LocalPngAllocSize;
@@ -113,7 +111,8 @@ static int ProcessRawProfile(const char* profile, size_t profile_len,
}
++src;
// skip the profile name and extract the length.
while (*src != '\0' && *src++ != '\n') {}
while (*src != '\0' && *src++ != '\n') {
}
expected_length = (int)strtol(src, &end, 10);
if (*end != '\n') {
fprintf(stderr, "Malformed raw profile, expected '\\n' got '\\x%.2X'\n",
@@ -135,30 +134,29 @@ static const struct {
MetadataPayload* const payload);
size_t storage_offset;
} kPNGMetadataMap[] = {
// https://exiftool.org/TagNames/PNG.html#TextualData
// See also: ExifTool on CPAN.
{ "Raw profile type exif", ProcessRawProfile, METADATA_OFFSET(exif) },
{ "Raw profile type xmp", ProcessRawProfile, METADATA_OFFSET(xmp) },
// Exiftool puts exif data in APP1 chunk, too.
{ "Raw profile type APP1", ProcessRawProfile, METADATA_OFFSET(exif) },
// ImageMagick uses lowercase app1.
{ "Raw profile type app1", ProcessRawProfile, METADATA_OFFSET(exif) },
// XMP Specification Part 3, Section 3 #PNG
{ "XML:com.adobe.xmp", MetadataCopy, METADATA_OFFSET(xmp) },
{ NULL, NULL, 0 },
// https://exiftool.org/TagNames/PNG.html#TextualData
// See also: ExifTool on CPAN.
{"Raw profile type exif", ProcessRawProfile, METADATA_OFFSET(exif)},
{"Raw profile type xmp", ProcessRawProfile, METADATA_OFFSET(xmp)},
// Exiftool puts exif data in APP1 chunk, too.
{"Raw profile type APP1", ProcessRawProfile, METADATA_OFFSET(exif)},
// ImageMagick uses lowercase app1.
{"Raw profile type app1", ProcessRawProfile, METADATA_OFFSET(exif)},
// XMP Specification Part 3, Section 3 #PNG
{"XML:com.adobe.xmp", MetadataCopy, METADATA_OFFSET(xmp)},
{NULL, NULL, 0},
};
// Looks for metadata at both the beginning and end of the PNG file, giving
// preference to the head.
// Returns true on success. The caller must use MetadataFree() on 'metadata' in
// all cases.
static int ExtractMetadataFromPNG(png_structp png,
png_infop const head_info,
static int ExtractMetadataFromPNG(png_structp png, png_infop const head_info,
png_infop const end_info,
Metadata* const metadata) {
int p;
for (p = 0; p < 2; ++p) {
for (p = 0; p < 2; ++p) {
png_infop const info = (p == 0) ? head_info : end_info;
png_textp text = NULL;
const png_uint_32 num = png_get_text(png, info, &text, NULL);
@@ -215,15 +213,15 @@ static int ExtractMetadataFromPNG(png_structp png,
{
png_charp name;
int comp_type;
#if LOCAL_PNG_PREREQ(1,5)
#if LOCAL_PNG_PREREQ(1, 5)
png_bytep profile;
#else
png_charp profile;
#endif
png_uint_32 len;
if (png_get_iCCP(png, info,
&name, &comp_type, &profile, &len) == PNG_INFO_iCCP) {
if (png_get_iCCP(png, info, &name, &comp_type, &profile, &len) ==
PNG_INFO_iCCP) {
if (!MetadataCopy((const char*)profile, len, &metadata->iccp)) return 0;
}
}
@@ -248,12 +246,12 @@ static void ReadFunc(png_structp png_ptr, png_bytep data, png_size_t length) {
}
int ReadPNG(const uint8_t* const data, size_t data_size,
struct WebPPicture* const pic,
int keep_alpha, struct Metadata* const metadata) {
struct WebPPicture* const pic, int keep_alpha,
struct Metadata* const metadata) {
volatile png_structp png = NULL;
volatile png_infop info = NULL;
volatile png_infop end_info = NULL;
PNGReadContext context = { NULL, 0, 0 };
PNGReadContext context = {NULL, 0, 0};
int color_type, bit_depth, interlaced;
int num_channels;
int num_passes;
@@ -268,19 +266,19 @@ int ReadPNG(const uint8_t* const data, size_t data_size,
context.data = data;
context.data_size = data_size;
png = png_create_read_struct_2(PNG_LIBPNG_VER_STRING, NULL, NULL, NULL,
NULL, MallocFunc, FreeFunc);
png = png_create_read_struct_2(PNG_LIBPNG_VER_STRING, NULL, NULL, NULL, NULL,
MallocFunc, FreeFunc);
if (png == NULL) goto End;
png_set_error_fn(png, 0, error_function, NULL);
if (setjmp(png_jmpbuf(png))) {
Error:
Error:
MetadataFree(metadata);
goto End;
}
#if LOCAL_PNG_PREREQ(1,5) || \
(LOCAL_PNG_PREREQ(1,4) && PNG_LIBPNG_VER_RELEASE >= 1)
#if LOCAL_PNG_PREREQ(1, 5) || \
(LOCAL_PNG_PREREQ(1, 4) && PNG_LIBPNG_VER_RELEASE >= 1)
// If it looks like the bitstream is going to need more memory than libpng's
// internal limit (default: 8M), try to (reasonably) raise it.
if (data_size > png_get_chunk_malloc_max(png) && data_size < (1u << 24)) {
@@ -295,9 +293,9 @@ int ReadPNG(const uint8_t* const data, size_t data_size,
png_set_read_fn(png, &context, ReadFunc);
png_read_info(png, info);
if (!png_get_IHDR(png, info,
&width, &height, &bit_depth, &color_type, &interlaced,
NULL, NULL)) goto Error;
if (!png_get_IHDR(png, info, &width, &height, &bit_depth, &color_type,
&interlaced, NULL, NULL))
goto Error;
png_set_strip_16(png);
png_set_packing(png);
@@ -368,24 +366,25 @@ int ReadPNG(const uint8_t* const data, size_t data_size,
goto Error;
}
End:
End:
if (png != NULL) {
png_destroy_read_struct((png_structpp)&png,
(png_infopp)&info, (png_infopp)&end_info);
png_destroy_read_struct((png_structpp)&png, (png_infopp)&info,
(png_infopp)&end_info);
}
free(rgb);
return ok;
}
#else // !WEBP_HAVE_PNG
#else // !WEBP_HAVE_PNG
int ReadPNG(const uint8_t* const data, size_t data_size,
struct WebPPicture* const pic,
int keep_alpha, struct Metadata* const metadata) {
struct WebPPicture* const pic, int keep_alpha,
struct Metadata* const metadata) {
(void)data;
(void)data_size;
(void)pic;
(void)keep_alpha;
(void)metadata;
fprintf(stderr, "PNG support not compiled. Please install the libpng "
fprintf(stderr,
"PNG support not compiled. Please install the libpng "
"development package before building.\n");
return 0;
}

View File

@@ -29,11 +29,11 @@ struct WebPPicture;
// or YUVA. Otherwise, alpha channel is dropped and output is RGB or YUV.
// Returns true on success.
int ReadPNG(const uint8_t* const data, size_t data_size,
struct WebPPicture* const pic,
int keep_alpha, struct Metadata* const metadata);
struct WebPPicture* const pic, int keep_alpha,
struct Metadata* const metadata);
#ifdef __cplusplus
} // extern "C"
} // extern "C"
#endif
#endif // WEBP_IMAGEIO_PNGDEC_H_

View File

@@ -26,11 +26,11 @@
#endif
typedef enum {
WIDTH_FLAG = 1 << 0,
HEIGHT_FLAG = 1 << 1,
DEPTH_FLAG = 1 << 2,
MAXVAL_FLAG = 1 << 3,
TUPLE_FLAG = 1 << 4,
WIDTH_FLAG = 1 << 0,
HEIGHT_FLAG = 1 << 1,
DEPTH_FLAG = 1 << 2,
MAXVAL_FLAG = 1 << 3,
TUPLE_FLAG = 1 << 4,
ALL_NEEDED_FLAGS = WIDTH_FLAG | HEIGHT_FLAG | DEPTH_FLAG | MAXVAL_FLAG
} PNMFlags;
@@ -39,9 +39,9 @@ typedef struct {
size_t data_size;
int width, height;
int bytes_per_px;
int depth; // 1 (grayscale), 2 (grayscale + alpha), 3 (rgb), 4 (rgba)
int depth; // 1 (grayscale), 2 (grayscale + alpha), 3 (rgb), 4 (rgba)
int max_value;
int type; // 5, 6 or 7
int type; // 5, 6 or 7
int seen_flags;
} PNMInfo;
@@ -55,7 +55,7 @@ static size_t ReadLine(const uint8_t* const data, size_t off, size_t data_size,
char out[MAX_LINE_SIZE + 1], size_t* const out_size) {
size_t i = 0;
*out_size = 0;
redo:
redo:
for (i = 0; i < MAX_LINE_SIZE && off < data_size; ++i) {
out[i] = data[off++];
if (out[i] == '\n') break;
@@ -64,7 +64,7 @@ static size_t ReadLine(const uint8_t* const data, size_t off, size_t data_size,
if (i == 0) goto redo; // empty line
if (out[0] == '#') goto redo; // skip comment
}
out[i] = 0; // safety sentinel
out[i] = 0; // safety sentinel
*out_size = i;
return off;
}
@@ -173,9 +173,8 @@ static size_t ReadHeader(PNMInfo* const info) {
info->depth = (info->type == 5) ? 1 : 3;
}
// perform some basic numerical validation
if (info->width <= 0 || info->height <= 0 ||
info->type <= 0 || info->type >= 9 ||
info->depth <= 0 || info->depth > 4 ||
if (info->width <= 0 || info->height <= 0 || info->type <= 0 ||
info->type >= 9 || info->depth <= 0 || info->depth > 4 ||
info->max_value <= 0 || info->max_value >= 65536) {
return 0;
}
@@ -183,13 +182,12 @@ static size_t ReadHeader(PNMInfo* const info) {
return off;
}
int ReadPNM(const uint8_t* const data, size_t data_size,
WebPPicture* const pic, int keep_alpha,
struct Metadata* const metadata) {
int ReadPNM(const uint8_t* const data, size_t data_size, WebPPicture* const pic,
int keep_alpha, struct Metadata* const metadata) {
int ok = 0;
int i, j;
uint64_t stride, pixel_bytes, sample_size, depth;
uint8_t* rgb = NULL, *tmp_rgb;
uint8_t *rgb = NULL, *tmp_rgb;
size_t offset;
PNMInfo info;
@@ -209,8 +207,8 @@ int ReadPNM(const uint8_t* const data, size_t data_size,
// Some basic validations.
if (pic == NULL) goto End;
if (info.width > WEBP_MAX_DIMENSION || info.height > WEBP_MAX_DIMENSION) {
fprintf(stderr, "Invalid %dx%d dimension for PNM\n",
info.width, info.height);
fprintf(stderr, "Invalid %dx%d dimension for PNM\n", info.width,
info.height);
goto End;
}
@@ -258,8 +256,8 @@ int ReadPNM(const uint8_t* const data, size_t data_size,
const uint32_t round = info.max_value / 2;
int k = 0;
for (i = 0; i < info.width * info.depth; ++i) {
uint32_t v = (sample_size == 2) ? 256u * in[2 * i + 0] + in[2 * i + 1]
: in[i];
uint32_t v =
(sample_size == 2) ? 256u * in[2 * i + 0] + in[2 * i + 1] : in[i];
if (info.max_value != 255) v = (v * 255u + round) / info.max_value;
if (v > 255u) v = 255u;
if (info.depth > 2) {
@@ -291,7 +289,7 @@ int ReadPNM(const uint8_t* const data, size_t data_size,
if (!ok) goto End;
ok = 1;
End:
End:
free((void*)rgb);
(void)metadata;

View File

@@ -33,7 +33,7 @@ int ReadPNM(const uint8_t* const data, size_t data_size,
struct Metadata* const metadata);
#ifdef __cplusplus
} // extern "C"
} // extern "C"
#endif
#endif // WEBP_IMAGEIO_PNMDEC_H_

View File

@@ -31,9 +31,9 @@ static const struct {
ttag_t tag;
size_t storage_offset;
} kTIFFMetadataMap[] = {
{ TIFFTAG_ICCPROFILE, METADATA_OFFSET(iccp) },
{ TIFFTAG_XMLPACKET, METADATA_OFFSET(xmp) },
{ 0, 0 },
{TIFFTAG_ICCPROFILE, METADATA_OFFSET(iccp)},
{TIFFTAG_XMLPACKET, METADATA_OFFSET(xmp)},
{0, 0},
};
// Returns true on success. The caller must use MetadataFree() on 'metadata' in
@@ -86,9 +86,9 @@ static toff_t MySize(thandle_t opaque) {
static toff_t MySeek(thandle_t opaque, toff_t offset, int whence) {
MyData* const my_data = (MyData*)opaque;
offset += (whence == SEEK_CUR) ? my_data->pos
: (whence == SEEK_SET) ? 0
: my_data->size;
offset += (whence == SEEK_CUR) ? my_data->pos
: (whence == SEEK_SET) ? 0
: my_data->size;
if (offset > my_data->size) return (toff_t)-1;
my_data->pos = offset;
return offset;
@@ -120,7 +120,7 @@ static tsize_t MyRead(thandle_t opaque, void* dst, tsize_t size) {
// Unmultiply Argb data. Taken from dsp/alpha_processing
// (we don't want to force a dependency to a libdspdec library).
#define MFIX 24 // 24bit fixed-point arithmetic
#define MFIX 24 // 24bit fixed-point arithmetic
#define HALF ((1u << MFIX) >> 1)
static uint32_t Unmult(uint8_t x, uint32_t mult) {
@@ -128,9 +128,7 @@ static uint32_t Unmult(uint8_t x, uint32_t mult) {
return (v > 255u) ? 255u : v;
}
static WEBP_INLINE uint32_t GetScale(uint32_t a) {
return (255u << MFIX) / a;
}
static WEBP_INLINE uint32_t GetScale(uint32_t a) { return (255u << MFIX) / a; }
#undef MFIX
#undef HALF
@@ -140,7 +138,7 @@ static void MultARGBRow(uint8_t* ptr, int width) {
for (x = 0; x < width; ++x, ptr += 4) {
const uint32_t alpha = ptr[3];
if (alpha < 255) {
if (alpha == 0) { // alpha == 0
if (alpha == 0) { // alpha == 0
ptr[0] = ptr[1] = ptr[2] = 0;
} else {
const uint32_t scale = GetScale(alpha);
@@ -153,9 +151,8 @@ static void MultARGBRow(uint8_t* ptr, int width) {
}
int ReadTIFF(const uint8_t* const data, size_t data_size,
WebPPicture* const pic, int keep_alpha,
Metadata* const metadata) {
MyData my_data = { data, (toff_t)data_size, 0 };
WebPPicture* const pic, int keep_alpha, Metadata* const metadata) {
MyData my_data = {data, (toff_t)data_size, 0};
TIFF* tif;
uint32_t image_width, image_height, tile_width, tile_height;
uint64_t stride;
@@ -171,8 +168,7 @@ int ReadTIFF(const uint8_t* const data, size_t data_size,
return 0;
}
tif = TIFFClientOpen("Memory", "r", &my_data,
MyRead, MyRead, MySeek, MyClose,
tif = TIFFClientOpen("Memory", "r", &my_data, MyRead, MyRead, MySeek, MyClose,
MySize, MyMapFile, MyUnmapFile);
if (tif == NULL) {
fprintf(stderr, "Error! Cannot parse TIFF file\n");
@@ -181,9 +177,10 @@ int ReadTIFF(const uint8_t* const data, size_t data_size,
dircount = TIFFNumberOfDirectories(tif);
if (dircount > 1) {
fprintf(stderr, "Warning: multi-directory TIFF files are not supported.\n"
"Only the first will be used, %d will be ignored.\n",
dircount - 1);
fprintf(stderr,
"Warning: multi-directory TIFF files are not supported.\n"
"Only the first will be used, %d will be ignored.\n",
dircount - 1);
}
if (!TIFFGetFieldDefaulted(tif, TIFFTAG_SAMPLESPERPIXEL, &samples_per_px)) {
fprintf(stderr, "Error! Cannot retrieve TIFF samples-per-pixel info.\n");
@@ -253,9 +250,10 @@ int ReadTIFF(const uint8_t* const data, size_t data_size,
tmp += stride;
}
}
ok = keep_alpha
? WebPPictureImportRGBA(pic, (const uint8_t*)raster, (int)stride)
: WebPPictureImportRGBX(pic, (const uint8_t*)raster, (int)stride);
ok =
keep_alpha
? WebPPictureImportRGBA(pic, (const uint8_t*)raster, (int)stride)
: WebPPictureImportRGBX(pic, (const uint8_t*)raster, (int)stride);
}
_TIFFfree(raster);
} else {
@@ -272,11 +270,11 @@ int ReadTIFF(const uint8_t* const data, size_t data_size,
}
}
}
End:
End:
TIFFClose(tif);
return ok;
}
#else // !WEBP_HAVE_TIFF
#else // !WEBP_HAVE_TIFF
int ReadTIFF(const uint8_t* const data, size_t data_size,
struct WebPPicture* const pic, int keep_alpha,
struct Metadata* const metadata) {
@@ -285,7 +283,8 @@ int ReadTIFF(const uint8_t* const data, size_t data_size,
(void)pic;
(void)keep_alpha;
(void)metadata;
fprintf(stderr, "TIFF support not compiled. Please install the libtiff "
fprintf(stderr,
"TIFF support not compiled. Please install the libtiff "
"development package before building.\n");
return 0;
}

View File

@@ -33,7 +33,7 @@ int ReadTIFF(const uint8_t* const data, size_t data_size,
struct Metadata* const metadata);
#ifdef __cplusplus
} // extern "C"
} // extern "C"
#endif
#endif // WEBP_IMAGEIO_TIFFDEC_H_

View File

@@ -31,9 +31,14 @@
// WebP decoding
static const char* const kStatusMessages[VP8_STATUS_NOT_ENOUGH_DATA + 1] = {
"OK", "OUT_OF_MEMORY", "INVALID_PARAM", "BITSTREAM_ERROR",
"UNSUPPORTED_FEATURE", "SUSPENDED", "USER_ABORT", "NOT_ENOUGH_DATA"
};
"OK",
"OUT_OF_MEMORY",
"INVALID_PARAM",
"BITSTREAM_ERROR",
"UNSUPPORTED_FEATURE",
"SUSPENDED",
"USER_ABORT",
"NOT_ENOUGH_DATA"};
static void PrintAnimationWarning(const WebPDecoderConfig* const config) {
if (config->input.has_animation) {
@@ -53,8 +58,7 @@ void PrintWebPError(const char* const in_file, int status) {
fprintf(stderr, "\n");
}
int LoadWebP(const char* const in_file,
const uint8_t** data, size_t* data_size,
int LoadWebP(const char* const in_file, const uint8_t** data, size_t* data_size,
WebPBitstreamFeatures* bitstream) {
VP8StatusCode status;
WebPBitstreamFeatures local_features;
@@ -84,9 +88,8 @@ VP8StatusCode DecodeWebP(const uint8_t* const data, size_t data_size,
return WebPDecode(data, data_size, config);
}
VP8StatusCode DecodeWebPIncremental(
const uint8_t* const data, size_t data_size,
WebPDecoderConfig* const config) {
VP8StatusCode DecodeWebPIncremental(const uint8_t* const data, size_t data_size,
WebPDecoderConfig* const config) {
VP8StatusCode status = VP8_STATUS_OK;
if (config == NULL) return VP8_STATUS_INVALID_PARAM;
@@ -111,7 +114,7 @@ VP8StatusCode DecodeWebPIncremental(
static int ExtractMetadata(const uint8_t* const data, size_t data_size,
Metadata* const metadata) {
WebPData webp_data = { data, data_size };
WebPData webp_data = {data, data_size};
WebPDemuxer* const demux = WebPDemux(&webp_data);
WebPChunkIterator chunk_iter;
uint32_t flags;
@@ -143,8 +146,7 @@ static int ExtractMetadata(const uint8_t* const data, size_t data_size,
// -----------------------------------------------------------------------------
int ReadWebP(const uint8_t* const data, size_t data_size,
WebPPicture* const pic,
int keep_alpha, Metadata* const metadata) {
WebPPicture* const pic, int keep_alpha, Metadata* const metadata) {
int ok = 0;
VP8StatusCode status = VP8_STATUS_OK;
WebPDecoderConfig config;
@@ -223,7 +225,7 @@ int ReadWebP(const uint8_t* const data, size_t data_size,
argb += pic->argb_stride;
}
}
} while (0); // <- so we can 'break' out of the loop
} while (0); // <- so we can 'break' out of the loop
if (status != VP8_STATUS_OK) {
PrintWebPError("input data", status);

View File

@@ -35,8 +35,7 @@ void PrintWebPError(const char* const in_file, int status);
// Reads a WebP from 'in_file', returning the contents and size in 'data' and
// 'data_size'. If not NULL, 'bitstream' is populated using WebPGetFeatures().
// Returns true on success.
int LoadWebP(const char* const in_file,
const uint8_t** data, size_t* data_size,
int LoadWebP(const char* const in_file, const uint8_t** data, size_t* data_size,
WebPBitstreamFeatures* bitstream);
// Decodes the WebP contained in 'data'.
@@ -48,9 +47,8 @@ VP8StatusCode DecodeWebP(const uint8_t* const data, size_t data_size,
WebPDecoderConfig* const config);
// Same as DecodeWebP(), but using the incremental decoder.
VP8StatusCode DecodeWebPIncremental(
const uint8_t* const data, size_t data_size,
WebPDecoderConfig* const config);
VP8StatusCode DecodeWebPIncremental(const uint8_t* const data, size_t data_size,
WebPDecoderConfig* const config);
//------------------------------------------------------------------------------
@@ -60,11 +58,11 @@ VP8StatusCode DecodeWebPIncremental(
// or YUVA. Otherwise, alpha channel is dropped and output is RGB or YUV.
// Returns true on success.
int ReadWebP(const uint8_t* const data, size_t data_size,
struct WebPPicture* const pic,
int keep_alpha, struct Metadata* const metadata);
struct WebPPicture* const pic, int keep_alpha,
struct Metadata* const metadata);
#ifdef __cplusplus
} // extern "C"
} // extern "C"
#endif
#endif // WEBP_IMAGEIO_WEBPDEC_H_

View File

@@ -25,31 +25,31 @@
#endif
#define CINTERFACE
#define COBJMACROS
#define _WIN32_IE 0x500 // Workaround bug in shlwapi.h when compiling C++
// code with COBJMACROS.
#define _WIN32_IE \
0x500 // Workaround bug in shlwapi.h when compiling C++
// code with COBJMACROS.
#include <ole2.h> // CreateStreamOnHGlobal()
#include <shlwapi.h>
#include <tchar.h>
#include <windows.h>
#include <wincodec.h>
#include <windows.h>
#include "../examples/unicode.h"
#include "./imageio_util.h"
#include "./metadata.h"
#include "webp/encode.h"
#define IFS(fn) \
do { \
if (SUCCEEDED(hr)) { \
hr = (fn); \
if (FAILED(hr)) fprintf(stderr, #fn " failed %08lx\n", hr); \
} \
#define IFS(fn) \
do { \
if (SUCCEEDED(hr)) { \
hr = (fn); \
if (FAILED(hr)) fprintf(stderr, #fn " failed %08lx\n", hr); \
} \
} while (0)
// modified version of DEFINE_GUID from guiddef.h.
#define WEBP_DEFINE_GUID(name, l, w1, w2, b1, b2, b3, b4, b5, b6, b7, b8) \
static const GUID name = \
{ l, w1, w2, { b1, b2, b3, b4, b5, b6, b7, b8 } }
static const GUID name = {l, w1, w2, {b1, b2, b3, b4, b5, b6, b7, b8}}
#ifdef __cplusplus
#define MAKE_REFGUID(x) (x)
@@ -66,23 +66,17 @@ typedef struct WICFormatImporter {
// From Microsoft SDK 7.0a -- wincodec.h
// Create local copies for compatibility when building against earlier
// versions of the SDK.
WEBP_DEFINE_GUID(GUID_WICPixelFormat24bppBGR_,
0x6fddc324, 0x4e03, 0x4bfe,
0xb1, 0x85, 0x3d, 0x77, 0x76, 0x8d, 0xc9, 0x0c);
WEBP_DEFINE_GUID(GUID_WICPixelFormat24bppRGB_,
0x6fddc324, 0x4e03, 0x4bfe,
0xb1, 0x85, 0x3d, 0x77, 0x76, 0x8d, 0xc9, 0x0d);
WEBP_DEFINE_GUID(GUID_WICPixelFormat32bppBGRA_,
0x6fddc324, 0x4e03, 0x4bfe,
WEBP_DEFINE_GUID(GUID_WICPixelFormat24bppBGR_, 0x6fddc324, 0x4e03, 0x4bfe, 0xb1,
0x85, 0x3d, 0x77, 0x76, 0x8d, 0xc9, 0x0c);
WEBP_DEFINE_GUID(GUID_WICPixelFormat24bppRGB_, 0x6fddc324, 0x4e03, 0x4bfe, 0xb1,
0x85, 0x3d, 0x77, 0x76, 0x8d, 0xc9, 0x0d);
WEBP_DEFINE_GUID(GUID_WICPixelFormat32bppBGRA_, 0x6fddc324, 0x4e03, 0x4bfe,
0xb1, 0x85, 0x3d, 0x77, 0x76, 0x8d, 0xc9, 0x0f);
WEBP_DEFINE_GUID(GUID_WICPixelFormat32bppRGBA_,
0xf5c7ad2d, 0x6a8d, 0x43dd,
WEBP_DEFINE_GUID(GUID_WICPixelFormat32bppRGBA_, 0xf5c7ad2d, 0x6a8d, 0x43dd,
0xa7, 0xa8, 0xa2, 0x99, 0x35, 0x26, 0x1a, 0xe9);
WEBP_DEFINE_GUID(GUID_WICPixelFormat64bppBGRA_,
0x1562ff7c, 0xd352, 0x46f9,
WEBP_DEFINE_GUID(GUID_WICPixelFormat64bppBGRA_, 0x1562ff7c, 0xd352, 0x46f9,
0x97, 0x9e, 0x42, 0x97, 0x6b, 0x79, 0x22, 0x46);
WEBP_DEFINE_GUID(GUID_WICPixelFormat64bppRGBA_,
0x6fddc324, 0x4e03, 0x4bfe,
WEBP_DEFINE_GUID(GUID_WICPixelFormat64bppRGBA_, 0x6fddc324, 0x4e03, 0x4bfe,
0xb1, 0x85, 0x3d, 0x77, 0x76, 0x8d, 0xc9, 0x16);
static HRESULT OpenInputStream(const char* filename, IStream** stream) {
@@ -147,8 +141,7 @@ static HRESULT ExtractICCP(IWICImagingFactory* const factory,
if (SUCCEEDED(hr)) {
UINT num_color_contexts;
IFS(IWICBitmapFrameDecode_GetColorContexts(frame,
count, color_contexts,
IFS(IWICBitmapFrameDecode_GetColorContexts(frame, count, color_contexts,
&num_color_contexts));
assert(FAILED(hr) || num_color_contexts <= count);
for (i = 0; SUCCEEDED(hr) && i < num_color_contexts; ++i) {
@@ -156,8 +149,8 @@ static HRESULT ExtractICCP(IWICImagingFactory* const factory,
IFS(IWICColorContext_GetType(color_contexts[i], &type));
if (SUCCEEDED(hr) && type == WICColorContextProfile) {
UINT size;
IFS(IWICColorContext_GetProfileBytes(color_contexts[i],
0, NULL, &size));
IFS(IWICColorContext_GetProfileBytes(color_contexts[i], 0, NULL,
&size));
if (SUCCEEDED(hr) && size > 0) {
iccp->bytes = (uint8_t*)malloc(size);
if (iccp->bytes == NULL) {
@@ -165,9 +158,8 @@ static HRESULT ExtractICCP(IWICImagingFactory* const factory,
break;
}
iccp->size = size;
IFS(IWICColorContext_GetProfileBytes(color_contexts[i],
(UINT)iccp->size, iccp->bytes,
&size));
IFS(IWICColorContext_GetProfileBytes(
color_contexts[i], (UINT)iccp->size, iccp->bytes, &size));
if (SUCCEEDED(hr) && size != iccp->size) {
fprintf(stderr, "Warning! ICC profile size (%u) != expected (%u)\n",
size, (uint32_t)iccp->size);
@@ -209,8 +201,7 @@ static int HasPalette(GUID pixel_format) {
static int HasAlpha(IWICImagingFactory* const factory,
IWICBitmapDecoder* const decoder,
IWICBitmapFrameDecode* const frame,
GUID pixel_format) {
IWICBitmapFrameDecode* const frame, GUID pixel_format) {
int has_alpha;
if (HasPalette(pixel_format)) {
IWICPalette* frame_palette = NULL;
@@ -245,21 +236,20 @@ static int HasAlpha(IWICImagingFactory* const factory,
return has_alpha;
}
int ReadPictureWithWIC(const char* const filename,
WebPPicture* const pic, int keep_alpha,
Metadata* const metadata) {
int ReadPictureWithWIC(const char* const filename, WebPPicture* const pic,
int keep_alpha, Metadata* const metadata) {
// From Microsoft SDK 6.0a -- ks.h
// Define a local copy to avoid link errors under mingw.
WEBP_DEFINE_GUID(GUID_NULL_, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
static const WICFormatImporter kAlphaFormatImporters[] = {
{ &GUID_WICPixelFormat32bppBGRA_, 4, WebPPictureImportBGRA },
{ &GUID_WICPixelFormat32bppRGBA_, 4, WebPPictureImportRGBA },
{ NULL, 0, NULL },
{&GUID_WICPixelFormat32bppBGRA_, 4, WebPPictureImportBGRA},
{&GUID_WICPixelFormat32bppRGBA_, 4, WebPPictureImportRGBA},
{NULL, 0, NULL},
};
static const WICFormatImporter kNonAlphaFormatImporters[] = {
{ &GUID_WICPixelFormat24bppBGR_, 3, WebPPictureImportBGR },
{ &GUID_WICPixelFormat24bppRGB_, 3, WebPPictureImportRGB },
{ NULL, 0, NULL },
{&GUID_WICPixelFormat24bppBGR_, 3, WebPPictureImportBGR},
{&GUID_WICPixelFormat24bppRGB_, 3, WebPPictureImportRGB},
{NULL, 0, NULL},
};
HRESULT hr = S_OK;
IWICBitmapFrameDecode* frame = NULL;
@@ -274,26 +264,20 @@ int ReadPictureWithWIC(const char* const filename,
const WICFormatImporter* importer = NULL;
GUID src_container_format = GUID_NULL_;
// From Windows Kits\10\Include\10.0.19041.0\um\wincodec.h
WEBP_DEFINE_GUID(GUID_ContainerFormatWebp_,
0xe094b0e2, 0x67f2, 0x45b3,
0xb0, 0xea, 0x11, 0x53, 0x37, 0xca, 0x7c, 0xf3);
WEBP_DEFINE_GUID(GUID_ContainerFormatWebp_, 0xe094b0e2, 0x67f2, 0x45b3, 0xb0,
0xea, 0x11, 0x53, 0x37, 0xca, 0x7c, 0xf3);
static const GUID* kAlphaContainers[] = {
&GUID_ContainerFormatBmp,
&GUID_ContainerFormatPng,
&GUID_ContainerFormatTiff,
&GUID_ContainerFormatWebp_,
NULL
};
&GUID_ContainerFormatBmp, &GUID_ContainerFormatPng,
&GUID_ContainerFormatTiff, &GUID_ContainerFormatWebp_, NULL};
int has_alpha = 0;
int64_t stride;
if (filename == NULL || pic == NULL) return 0;
IFS(CoInitialize(NULL));
IFS(CoCreateInstance(MAKE_REFGUID(CLSID_WICImagingFactory), NULL,
CLSCTX_INPROC_SERVER,
MAKE_REFGUID(IID_IWICImagingFactory),
(LPVOID*)&factory));
IFS(CoCreateInstance(
MAKE_REFGUID(CLSID_WICImagingFactory), NULL, CLSCTX_INPROC_SERVER,
MAKE_REFGUID(IID_IWICImagingFactory), (LPVOID*)&factory));
if (hr == REGDB_E_CLASSNOTREG) {
fprintf(stderr,
"Couldn't access Windows Imaging Component (are you running "
@@ -303,8 +287,7 @@ int ReadPictureWithWIC(const char* const filename,
// Prepare for image decoding.
IFS(OpenInputStream(filename, &stream));
IFS(IWICImagingFactory_CreateDecoderFromStream(
factory, stream, NULL,
WICDecodeMetadataCacheOnDemand, &decoder));
factory, stream, NULL, WICDecodeMetadataCacheOnDemand, &decoder));
IFS(IWICBitmapDecoder_GetFrameCount(decoder, &frame_count));
if (SUCCEEDED(hr)) {
if (frame_count == 0) {
@@ -338,18 +321,15 @@ int ReadPictureWithWIC(const char* const filename,
hr == S_OK && importer->import != NULL; ++importer) {
BOOL can_convert;
const HRESULT cchr = IWICFormatConverter_CanConvert(
converter,
MAKE_REFGUID(src_pixel_format),
MAKE_REFGUID(*importer->pixel_format),
&can_convert);
converter, MAKE_REFGUID(src_pixel_format),
MAKE_REFGUID(*importer->pixel_format), &can_convert);
if (SUCCEEDED(cchr) && can_convert) break;
}
if (importer->import == NULL) hr = E_FAIL;
IFS(IWICFormatConverter_Initialize(converter, (IWICBitmapSource*)frame,
importer->pixel_format,
WICBitmapDitherTypeNone,
NULL, 0.0, WICBitmapPaletteTypeCustom));
IFS(IWICFormatConverter_Initialize(
converter, (IWICBitmapSource*)frame, importer->pixel_format,
WICBitmapDitherTypeNone, NULL, 0.0, WICBitmapPaletteTypeCustom));
// Decode.
IFS(IWICFormatConverter_GetSize(converter, &width, &height));
@@ -361,18 +341,17 @@ int ReadPictureWithWIC(const char* const filename,
if (SUCCEEDED(hr)) {
rgb = (BYTE*)malloc((size_t)stride * height);
if (rgb == NULL)
hr = E_OUTOFMEMORY;
if (rgb == NULL) hr = E_OUTOFMEMORY;
}
IFS(IWICFormatConverter_CopyPixels(converter, NULL,
(UINT)stride, (UINT)stride * height, rgb));
IFS(IWICFormatConverter_CopyPixels(converter, NULL, (UINT)stride,
(UINT)stride * height, rgb));
// WebP conversion.
if (SUCCEEDED(hr)) {
int ok;
pic->width = width;
pic->height = height;
pic->use_argb = 1; // For WIC, we always force to argb
pic->use_argb = 1; // For WIC, we always force to argb
ok = importer->import(pic, rgb, (int)stride);
if (!ok) hr = E_FAIL;
}
@@ -394,7 +373,7 @@ int ReadPictureWithWIC(const char* const filename,
free(rgb);
return SUCCEEDED(hr);
}
#else // !HAVE_WINCODEC_H
#else // !HAVE_WINCODEC_H
int ReadPictureWithWIC(const char* const filename,
struct WebPPicture* const pic, int keep_alpha,
struct Metadata* const metadata) {
@@ -402,10 +381,11 @@ int ReadPictureWithWIC(const char* const filename,
(void)pic;
(void)keep_alpha;
(void)metadata;
fprintf(stderr, "Windows Imaging Component (WIC) support not compiled. "
"Visual Studio and mingw-w64 builds support WIC. Make sure "
"wincodec.h detection is working correctly if using autoconf "
"and HAVE_WINCODEC_H is defined before building.\n");
fprintf(stderr,
"Windows Imaging Component (WIC) support not compiled. "
"Visual Studio and mingw-w64 builds support WIC. Make sure "
"wincodec.h detection is working correctly if using autoconf "
"and HAVE_WINCODEC_H is defined before building.\n");
return 0;
}
#endif // HAVE_WINCODEC_H

View File

@@ -28,7 +28,7 @@ int ReadPictureWithWIC(const char* const filename,
struct Metadata* const metadata);
#ifdef __cplusplus
} // extern "C"
} // extern "C"
#endif
#endif // WEBP_IMAGEIO_WICDEC_H_

View File

@@ -26,9 +26,7 @@
//------------------------------------------------------------------------------
int SharpYuvGetVersion(void) {
return SHARPYUV_VERSION;
}
int SharpYuvGetVersion(void) { return SHARPYUV_VERSION; }
//------------------------------------------------------------------------------
// Sharp RGB->YUV conversion
@@ -49,8 +47,8 @@ static int GetPrecisionShift(int rgb_bit_depth) {
: (kMaxBitDepth - rgb_bit_depth);
}
typedef int16_t fixed_t; // signed type with extra precision for UV
typedef uint16_t fixed_y_t; // unsigned type with extra precision for W
typedef int16_t fixed_t; // signed type with extra precision for UV
typedef uint16_t fixed_y_t; // unsigned type with extra precision for W
//------------------------------------------------------------------------------
@@ -121,7 +119,7 @@ static void UpdateChroma(const fixed_y_t* src1, const fixed_y_t* src2,
dst[0 * uv_w] = (fixed_t)(r - W);
dst[1 * uv_w] = (fixed_t)(g - W);
dst[2 * uv_w] = (fixed_t)(b - W);
dst += 1;
dst += 1;
src1 += 2;
src2 += 2;
} while (++i < uv_w);
@@ -148,12 +146,9 @@ static WEBP_INLINE int Shift(int v, int shift) {
return (shift >= 0) ? (v << shift) : (v >> -shift);
}
static void ImportOneRow(const uint8_t* const r_ptr,
const uint8_t* const g_ptr,
const uint8_t* const b_ptr,
int rgb_step,
int rgb_bit_depth,
int pic_width,
static void ImportOneRow(const uint8_t* const r_ptr, const uint8_t* const g_ptr,
const uint8_t* const b_ptr, int rgb_step,
int rgb_bit_depth, int pic_width,
fixed_y_t* const dst) {
// Convert the rgb_step from a number of bytes to a number of uint8_t or
// uint16_t values depending the bit depth.
@@ -181,18 +176,14 @@ static void ImportOneRow(const uint8_t* const r_ptr,
}
static void InterpolateTwoRows(const fixed_y_t* const best_y,
const fixed_t* prev_uv,
const fixed_t* cur_uv,
const fixed_t* next_uv,
int w,
fixed_y_t* out1,
fixed_y_t* out2,
int rgb_bit_depth) {
const fixed_t* prev_uv, const fixed_t* cur_uv,
const fixed_t* next_uv, int w, fixed_y_t* out1,
fixed_y_t* out2, int rgb_bit_depth) {
const int uv_w = w >> 1;
const int len = (w - 1) >> 1; // length to filter
const int len = (w - 1) >> 1; // length to filter
int k = 3;
const int bit_depth = rgb_bit_depth + GetPrecisionShift(rgb_bit_depth);
while (k-- > 0) { // process each R/G/B segments in turn
while (k-- > 0) { // process each R/G/B segments in turn
// special boundary case for i==0
out1[0] = Filter2(cur_uv[0], prev_uv[0], best_y[0], bit_depth);
out2[0] = Filter2(cur_uv[0], next_uv[0], best_y[w], bit_depth);
@@ -212,7 +203,7 @@ static void InterpolateTwoRows(const fixed_y_t* const best_y,
out1 += w;
out2 += w;
prev_uv += uv_w;
cur_uv += uv_w;
cur_uv += uv_w;
next_uv += uv_w;
}
}
@@ -220,16 +211,16 @@ static void InterpolateTwoRows(const fixed_y_t* const best_y,
static WEBP_INLINE int RGBToYUVComponent(int r, int g, int b,
const int coeffs[4], int sfix) {
const int srounder = 1 << (YUV_FIX + sfix - 1);
const int luma = coeffs[0] * r + coeffs[1] * g + coeffs[2] * b +
coeffs[3] + srounder;
const int luma =
coeffs[0] * r + coeffs[1] * g + coeffs[2] * b + coeffs[3] + srounder;
return (luma >> (YUV_FIX + sfix));
}
static int ConvertWRGBToYUV(const fixed_y_t* best_y, const fixed_t* best_uv,
uint8_t* y_ptr, int y_stride, uint8_t* u_ptr,
int u_stride, uint8_t* v_ptr, int v_stride,
int rgb_bit_depth,
int yuv_bit_depth, int width, int height,
int rgb_bit_depth, int yuv_bit_depth, int width,
int height,
const SharpYuvConversionMatrix* yuv_matrix) {
int i, j;
const fixed_t* const best_uv_base = best_uv;
@@ -319,7 +310,7 @@ static int DoSharpArgbToYuv(const uint8_t* r_ptr, const uint8_t* g_ptr,
// TODO(skal): allocate one big memory chunk. But for now, it's easier
// for valgrind debugging to have several chunks.
fixed_y_t* const tmp_buffer = SAFE_ALLOC(w * 3, 2, fixed_y_t); // scratch
fixed_y_t* const tmp_buffer = SAFE_ALLOC(w * 3, 2, fixed_y_t); // scratch
fixed_y_t* const best_y_base = SAFE_ALLOC(w, h, fixed_y_t);
fixed_y_t* const target_y_base = SAFE_ALLOC(w, h, fixed_y_t);
fixed_y_t* const best_rgb_y = SAFE_ALLOC(w, 2, fixed_y_t);
@@ -335,9 +326,8 @@ static int DoSharpArgbToYuv(const uint8_t* r_ptr, const uint8_t* g_ptr,
assert(w > 0);
assert(h > 0);
if (best_y_base == NULL || best_uv_base == NULL ||
target_y_base == NULL || target_uv_base == NULL ||
best_rgb_y == NULL || best_rgb_uv == NULL ||
if (best_y_base == NULL || best_uv_base == NULL || target_y_base == NULL ||
target_uv_base == NULL || best_rgb_y == NULL || best_rgb_uv == NULL ||
tmp_buffer == NULL) {
ok = 0;
goto End;
@@ -350,8 +340,7 @@ static int DoSharpArgbToYuv(const uint8_t* r_ptr, const uint8_t* g_ptr,
fixed_y_t* const src2 = tmp_buffer + 3 * w;
// prepare two rows of input
ImportOneRow(r_ptr, g_ptr, b_ptr, rgb_step, rgb_bit_depth, width,
src1);
ImportOneRow(r_ptr, g_ptr, b_ptr, rgb_step, rgb_bit_depth, width, src1);
if (!is_last_row) {
ImportOneRow(r_ptr + rgb_stride, g_ptr + rgb_stride, b_ptr + rgb_stride,
rgb_step, rgb_bit_depth, width, src2);
@@ -390,8 +379,8 @@ static int DoSharpArgbToYuv(const uint8_t* r_ptr, const uint8_t* g_ptr,
fixed_y_t* const src2 = tmp_buffer + 3 * w;
{
const fixed_t* const next_uv = cur_uv + ((j < h - 2) ? 3 * uv_w : 0);
InterpolateTwoRows(best_y, prev_uv, cur_uv, next_uv, w,
src1, src2, rgb_bit_depth);
InterpolateTwoRows(best_y, prev_uv, cur_uv, next_uv, w, src1, src2,
rgb_bit_depth);
prev_uv = cur_uv;
cur_uv = next_uv;
}
@@ -424,7 +413,7 @@ static int DoSharpArgbToYuv(const uint8_t* r_ptr, const uint8_t* g_ptr,
u_stride, v_ptr, v_stride, rgb_bit_depth, yuv_bit_depth,
width, height, yuv_matrix);
End:
End:
free(best_y_base);
free(best_uv_base);
free(target_y_base);
@@ -440,16 +429,18 @@ static int DoSharpArgbToYuv(const uint8_t* r_ptr, const uint8_t* g_ptr,
#if defined(WEBP_USE_THREAD) && !defined(_WIN32)
#include <pthread.h> // NOLINT
#define LOCK_ACCESS \
static pthread_mutex_t sharpyuv_lock = PTHREAD_MUTEX_INITIALIZER; \
if (pthread_mutex_lock(&sharpyuv_lock)) return
#define UNLOCK_ACCESS_AND_RETURN \
do { \
(void)pthread_mutex_unlock(&sharpyuv_lock); \
return; \
} while (0)
#define LOCK_ACCESS \
static pthread_mutex_t sharpyuv_lock = PTHREAD_MUTEX_INITIALIZER; \
if (pthread_mutex_lock(&sharpyuv_lock)) return
#define UNLOCK_ACCESS_AND_RETURN \
do { \
(void)pthread_mutex_unlock(&sharpyuv_lock); \
return; \
} while (0)
#else // !(defined(WEBP_USE_THREAD) && !defined(_WIN32))
#define LOCK_ACCESS do {} while (0)
#define LOCK_ACCESS \
do { \
} while (0)
#define UNLOCK_ACCESS_AND_RETURN return
#endif // defined(WEBP_USE_THREAD) && !defined(_WIN32)

View File

@@ -67,33 +67,33 @@ void SharpYuvComputeConversionMatrix(const SharpYuvColorSpace* yuv_color_space,
// U = -0.1483 * R - 0.2911 * G + 0.4394 * B + 128
// V = 0.4394 * R - 0.3679 * G - 0.0715 * B + 128
static const SharpYuvConversionMatrix kWebpMatrix = {
{16839, 33059, 6420, 16 << 16},
{-9719, -19081, 28800, 128 << 16},
{28800, -24116, -4684, 128 << 16},
{16839, 33059, 6420, 16 << 16},
{-9719, -19081, 28800, 128 << 16},
{28800, -24116, -4684, 128 << 16},
};
// Kr=0.2990f Kb=0.1140f bit_depth=8 range=kSharpYuvRangeLimited
static const SharpYuvConversionMatrix kRec601LimitedMatrix = {
{16829, 33039, 6416, 16 << 16},
{-9714, -19071, 28784, 128 << 16},
{28784, -24103, -4681, 128 << 16},
{16829, 33039, 6416, 16 << 16},
{-9714, -19071, 28784, 128 << 16},
{28784, -24103, -4681, 128 << 16},
};
// Kr=0.2990f Kb=0.1140f bit_depth=8 range=kSharpYuvRangeFull
static const SharpYuvConversionMatrix kRec601FullMatrix = {
{19595, 38470, 7471, 0},
{-11058, -21710, 32768, 128 << 16},
{32768, -27439, -5329, 128 << 16},
{19595, 38470, 7471, 0},
{-11058, -21710, 32768, 128 << 16},
{32768, -27439, -5329, 128 << 16},
};
// Kr=0.2126f Kb=0.0722f bit_depth=8 range=kSharpYuvRangeLimited
static const SharpYuvConversionMatrix kRec709LimitedMatrix = {
{11966, 40254, 4064, 16 << 16},
{-6596, -22189, 28784, 128 << 16},
{28784, -26145, -2639, 128 << 16},
{11966, 40254, 4064, 16 << 16},
{-6596, -22189, 28784, 128 << 16},
{28784, -26145, -2639, 128 << 16},
};
// Kr=0.2126f Kb=0.0722f bit_depth=8 range=kSharpYuvRangeFull
static const SharpYuvConversionMatrix kRec709FullMatrix = {
{13933, 46871, 4732, 0},
{-7509, -25259, 32768, 128 << 16},
{32768, -29763, -3005, 128 << 16},
{13933, 46871, 4732, 0},
{-7509, -25259, 32768, 128 << 16},
{32768, -29763, -3005, 128 << 16},
};
const SharpYuvConversionMatrix* SharpYuvGetConversionMatrix(

View File

@@ -20,8 +20,8 @@ extern "C" {
// Range of YUV values.
typedef enum {
kSharpYuvRangeFull, // YUV values between [0;255] (for 8 bit)
kSharpYuvRangeLimited // Y in [16;235], YUV in [16;240] (for 8 bit)
kSharpYuvRangeFull, // YUV values between [0;255] (for 8 bit)
kSharpYuvRangeLimited // Y in [16;235], YUV in [16;240] (for 8 bit)
} SharpYuvRange;
// Constants that define a YUV color space.

View File

@@ -67,8 +67,7 @@ void SharpYuvInitGammaTables(void) {
} else {
value = (1. + a) * pow(g, 1. / kGammaF) - a;
}
kLinearToGammaTabS[v] =
(uint32_t)(final_scale * value + 0.5);
kLinearToGammaTabS[v] = (uint32_t)(final_scale * value + 0.5);
}
// to prevent small rounding errors to cause read-overflow:
kLinearToGammaTabS[LINEAR_TO_GAMMA_TAB_SIZE + 1] =
@@ -198,7 +197,7 @@ static float ToLinearLog100(float gamma) {
// The function is non-bijective so choose the middle of [0, 0.01].
const float mid_interval = 0.01f / 2.f;
return (gamma <= 0.0f) ? mid_interval
: Powf(10.0f, 2.f * (MIN(gamma, 1.f) - 1.0f));
: Powf(10.0f, 2.f * (MIN(gamma, 1.f) - 1.0f));
}
static float FromLinearLog100(float linear) {
@@ -209,12 +208,12 @@ static float ToLinearLog100Sqrt10(float gamma) {
// The function is non-bijective so choose the middle of [0, 0.00316227766f[.
const float mid_interval = 0.00316227766f / 2.f;
return (gamma <= 0.0f) ? mid_interval
: Powf(10.0f, 2.5f * (MIN(gamma, 1.f) - 1.0f));
: Powf(10.0f, 2.5f * (MIN(gamma, 1.f) - 1.0f));
}
static float FromLinearLog100Sqrt10(float linear) {
return (linear < 0.00316227766f) ? 0.0f
: 1.0f + Log10f(MIN(linear, 1.f)) / 2.5f;
: 1.0f + Log10f(MIN(linear, 1.f)) / 2.5f;
}
static float ToLinearIec61966(float gamma) {

View File

@@ -14,9 +14,9 @@
#include "sharpyuv/sharpyuv_dsp.h"
#if defined(WEBP_USE_NEON)
#include <arm_neon.h>
#include <assert.h>
#include <stdlib.h>
#include <arm_neon.h>
static uint16_t clip_NEON(int v, int max) {
return (v < 0) ? 0 : (v > max) ? max : (uint16_t)v;
@@ -35,11 +35,11 @@ static uint64_t SharpYuvUpdateY_NEON(const uint16_t* ref, const uint16_t* src,
const int16x8_t A = vreinterpretq_s16_u16(vld1q_u16(ref + i));
const int16x8_t B = vreinterpretq_s16_u16(vld1q_u16(src + i));
const int16x8_t C = vreinterpretq_s16_u16(vld1q_u16(dst + i));
const int16x8_t D = vsubq_s16(A, B); // diff_y
const int16x8_t F = vaddq_s16(C, D); // new_y
const int16x8_t D = vsubq_s16(A, B); // diff_y
const int16x8_t F = vaddq_s16(C, D); // new_y
const uint16x8_t H =
vreinterpretq_u16_s16(vmaxq_s16(vminq_s16(F, max), zero));
const int16x8_t I = vabsq_s16(D); // abs(diff_y)
const int16x8_t I = vabsq_s16(D); // abs(diff_y)
vst1q_u16(dst + i, H);
sum = vpadalq_u32(sum, vpaddlq_u16(vreinterpretq_u16_s16(I)));
}
@@ -60,8 +60,8 @@ static void SharpYuvUpdateRGB_NEON(const int16_t* ref, const int16_t* src,
const int16x8_t A = vld1q_s16(ref + i);
const int16x8_t B = vld1q_s16(src + i);
const int16x8_t C = vld1q_s16(dst + i);
const int16x8_t D = vsubq_s16(A, B); // diff_uv
const int16x8_t E = vaddq_s16(C, D); // new_uv
const int16x8_t D = vsubq_s16(A, B); // diff_uv
const int16x8_t E = vaddq_s16(C, D); // new_uv
vst1q_s16(dst + i, E);
}
for (; i < len; ++i) {

View File

@@ -15,7 +15,6 @@
#if defined(WEBP_USE_SSE2)
#include <emmintrin.h>
#include <stdlib.h>
#include "src/dsp/cpu.h"
@@ -45,7 +44,7 @@ static uint64_t SharpYuvUpdateY_SSE2(const uint16_t* ref, const uint16_t* src,
const __m128i F = _mm_add_epi16(C, D); // new_y
const __m128i G = _mm_or_si128(E, one); // -1 or 1
const __m128i H = _mm_max_epi16(_mm_min_epi16(F, max), zero);
const __m128i I = _mm_madd_epi16(D, G); // sum(abs(...))
const __m128i I = _mm_madd_epi16(D, G); // sum(abs(...))
_mm_storeu_si128((__m128i*)(dst + i), H);
sum = _mm_add_epi32(sum, I);
}
@@ -67,8 +66,8 @@ static void SharpYuvUpdateRGB_SSE2(const int16_t* ref, const int16_t* src,
const __m128i A = _mm_loadu_si128((const __m128i*)(ref + i));
const __m128i B = _mm_loadu_si128((const __m128i*)(src + i));
const __m128i C = _mm_loadu_si128((const __m128i*)(dst + i));
const __m128i D = _mm_sub_epi16(A, B); // diff_uv
const __m128i E = _mm_add_epi16(C, D); // new_uv
const __m128i D = _mm_sub_epi16(A, B); // diff_uv
const __m128i E = _mm_add_epi16(C, D); // new_uv
_mm_storeu_si128((__m128i*)(dst + i), E);
}
for (; i < len; ++i) {
@@ -94,8 +93,8 @@ static void SharpYuvFilterRow16_SSE2(const int16_t* A, const int16_t* B,
const __m128i a1b0 = _mm_add_epi16(a1, b0);
const __m128i a0a1b0b1 = _mm_add_epi16(a0b1, a1b0); // A0+A1+B0+B1
const __m128i a0a1b0b1_8 = _mm_add_epi16(a0a1b0b1, kCst8);
const __m128i a0b1_2 = _mm_add_epi16(a0b1, a0b1); // 2*(A0+B1)
const __m128i a1b0_2 = _mm_add_epi16(a1b0, a1b0); // 2*(A1+B0)
const __m128i a0b1_2 = _mm_add_epi16(a0b1, a0b1); // 2*(A0+B1)
const __m128i a1b0_2 = _mm_add_epi16(a1b0, a1b0); // 2*(A1+B0)
const __m128i c0 = _mm_srai_epi16(_mm_add_epi16(a0b1_2, a0a1b0b1_8), 3);
const __m128i c1 = _mm_srai_epi16(_mm_add_epi16(a1b0_2, a0a1b0b1_8), 3);
const __m128i d0 = _mm_add_epi16(c1, a0);

View File

@@ -79,8 +79,7 @@ WEBP_NODISCARD static int ALPHInit(ALPHDecoder* const dec, const uint8_t* data,
if (dec->method < ALPHA_NO_COMPRESSION ||
dec->method > ALPHA_LOSSLESS_COMPRESSION ||
dec->filter >= WEBP_FILTER_LAST ||
dec->pre_processing > ALPHA_PREPROCESSED_LEVELS ||
rsrv != 0) {
dec->pre_processing > ALPHA_PREPROCESSED_LEVELS || rsrv != 0) {
return 0;
}
@@ -189,7 +188,7 @@ WEBP_NODISCARD const uint8_t* VP8DecompressAlphaRows(VP8Decoder* const dec,
}
if (!dec->is_alpha_decoded) {
if (dec->alph_dec == NULL) { // Initialize decoder.
if (dec->alph_dec == NULL) { // Initialize decoder.
dec->alph_dec = ALPHNew();
if (dec->alph_dec == NULL) {
VP8SetError(dec, VP8_STATUS_OUT_OF_MEMORY,
@@ -197,20 +196,20 @@ WEBP_NODISCARD const uint8_t* VP8DecompressAlphaRows(VP8Decoder* const dec,
return NULL;
}
if (!AllocateAlphaPlane(dec, io)) goto Error;
if (!ALPHInit(dec->alph_dec, dec->alpha_data, dec->alpha_data_size,
io, dec->alpha_plane)) {
if (!ALPHInit(dec->alph_dec, dec->alpha_data, dec->alpha_data_size, io,
dec->alpha_plane)) {
VP8LDecoder* const vp8l_dec = dec->alph_dec->vp8l_dec;
VP8SetError(dec,
(vp8l_dec == NULL) ? VP8_STATUS_OUT_OF_MEMORY
: vp8l_dec->status,
"Alpha decoder initialization failed.");
VP8SetError(
dec,
(vp8l_dec == NULL) ? VP8_STATUS_OUT_OF_MEMORY : vp8l_dec->status,
"Alpha decoder initialization failed.");
goto Error;
}
// if we allowed use of alpha dithering, check whether it's needed at all
if (dec->alph_dec->pre_processing != ALPHA_PREPROCESSED_LEVELS) {
dec->alpha_dithering = 0; // disable dithering
dec->alpha_dithering = 0; // disable dithering
} else {
num_rows = height - row; // decode everything in one pass
num_rows = height - row; // decode everything in one pass
}
}
@@ -218,16 +217,15 @@ WEBP_NODISCARD const uint8_t* VP8DecompressAlphaRows(VP8Decoder* const dec,
assert(row + num_rows <= height);
if (!ALPHDecode(dec, row, num_rows)) goto Error;
if (dec->is_alpha_decoded) { // finished?
if (dec->is_alpha_decoded) { // finished?
ALPHDelete(dec->alph_dec);
dec->alph_dec = NULL;
if (dec->alpha_dithering > 0) {
uint8_t* const alpha = dec->alpha_plane + io->crop_top * width
+ io->crop_left;
if (!WebPDequantizeLevels(alpha,
io->crop_right - io->crop_left,
io->crop_bottom - io->crop_top,
width, dec->alpha_dithering)) {
uint8_t* const alpha =
dec->alpha_plane + io->crop_top * width + io->crop_left;
if (!WebPDequantizeLevels(alpha, io->crop_right - io->crop_left,
io->crop_bottom - io->crop_top, width,
dec->alpha_dithering)) {
goto Error;
}
}
@@ -237,7 +235,7 @@ WEBP_NODISCARD const uint8_t* VP8DecompressAlphaRows(VP8Decoder* const dec,
// Return a pointer to the current decoded row.
return dec->alpha_plane + row * width;
Error:
Error:
WebPDeallocateAlphaMemory(dec);
return NULL;
}

View File

@@ -15,10 +15,10 @@
#define WEBP_DEC_ALPHAI_DEC_H_
#include "src/dec/vp8_dec.h"
#include "src/webp/types.h"
#include "src/dec/webpi_dec.h"
#include "src/dsp/dsp.h"
#include "src/utils/filters_utils.h"
#include "src/webp/types.h"
#ifdef __cplusplus
extern "C" {
@@ -35,11 +35,11 @@ struct ALPHDecoder {
int pre_processing;
struct VP8LDecoder* vp8l_dec;
VP8Io io;
int use_8b_decode; // Although alpha channel requires only 1 byte per
// pixel, sometimes VP8LDecoder may need to allocate
// 4 bytes per pixel internally during decode.
int use_8b_decode; // Although alpha channel requires only 1 byte per
// pixel, sometimes VP8LDecoder may need to allocate
// 4 bytes per pixel internally during decode.
uint8_t* output;
const uint8_t* prev_line; // last output row (or NULL)
const uint8_t* prev_line; // last output row (or NULL)
};
//------------------------------------------------------------------------------
@@ -51,7 +51,7 @@ void WebPDeallocateAlphaMemory(VP8Decoder* const dec);
//------------------------------------------------------------------------------
#ifdef __cplusplus
} // extern "C"
} // extern "C"
#endif
#endif // WEBP_DEC_ALPHAI_DEC_H_

View File

@@ -26,10 +26,9 @@
// WebPDecBuffer
// Number of bytes per pixel for the different color-spaces.
static const uint8_t kModeBpp[MODE_LAST] = {
3, 4, 3, 4, 4, 2, 2,
4, 4, 4, 2, // pre-multiplied modes
1, 1 };
static const uint8_t kModeBpp[MODE_LAST] = {3, 4, 3, 4, 4, 2, 2, //
4, 4, 4, 2, // pre-multiplied modes
1, 1};
// Convert to an integer to handle both the unsigned/signed enum cases
// without the need for casting to remove type limit warnings.
@@ -39,8 +38,8 @@ int IsValidColorspace(int webp_csp_mode) {
// strictly speaking, the very last (or first, if flipped) row
// doesn't require padding.
#define MIN_BUFFER_SIZE(WIDTH, HEIGHT, STRIDE) \
((uint64_t)(STRIDE) * ((HEIGHT) - 1) + (WIDTH))
#define MIN_BUFFER_SIZE(WIDTH, HEIGHT, STRIDE) \
((uint64_t)(STRIDE) * ((HEIGHT) - 1) + (WIDTH))
static VP8StatusCode CheckDecBuffer(const WebPDecBuffer* const buffer) {
int ok = 1;
@@ -49,9 +48,9 @@ static VP8StatusCode CheckDecBuffer(const WebPDecBuffer* const buffer) {
const int height = buffer->height;
if (!IsValidColorspace(mode)) {
ok = 0;
} else if (!WebPIsRGBMode(mode)) { // YUV checks
} else if (!WebPIsRGBMode(mode)) { // YUV checks
const WebPYUVABuffer* const buf = &buffer->u.YUVA;
const int uv_width = (width + 1) / 2;
const int uv_width = (width + 1) / 2;
const int uv_height = (height + 1) / 2;
const int y_stride = abs(buf->y_stride);
const int u_stride = abs(buf->u_stride);
@@ -75,7 +74,7 @@ static VP8StatusCode CheckDecBuffer(const WebPDecBuffer* const buffer) {
ok &= (a_size <= buf->a_size);
ok &= (buf->a != NULL);
}
} else { // RGB checks
} else { // RGB checks
const WebPRGBABuffer* const buf = &buffer->u.RGBA;
const int stride = abs(buf->stride);
const uint64_t size =
@@ -127,7 +126,7 @@ static VP8StatusCode AllocateBuffer(WebPDecBuffer* const buffer) {
}
buffer->private_memory = output;
if (!WebPIsRGBMode(mode)) { // YUVA initialization
if (!WebPIsRGBMode(mode)) { // YUVA initialization
WebPYUVABuffer* const buf = &buffer->u.YUVA;
buf->y = output;
buf->y_stride = stride;
@@ -185,14 +184,14 @@ VP8StatusCode WebPAllocateDecBuffer(int width, int height,
if (buffer == NULL || width <= 0 || height <= 0) {
return VP8_STATUS_INVALID_PARAM;
}
if (options != NULL) { // First, apply options if there is any.
if (options != NULL) { // First, apply options if there is any.
if (options->use_cropping) {
const int cw = options->crop_width;
const int ch = options->crop_height;
const int x = options->crop_left & ~1;
const int y = options->crop_top & ~1;
if (!WebPCheckCropDimensions(width, height, x, y, cw, ch)) {
return VP8_STATUS_INVALID_PARAM; // out of frame boundary.
return VP8_STATUS_INVALID_PARAM; // out of frame boundary.
}
width = cw;
height = ch;
@@ -202,14 +201,14 @@ VP8StatusCode WebPAllocateDecBuffer(int width, int height,
#if !defined(WEBP_REDUCE_SIZE)
int scaled_width = options->scaled_width;
int scaled_height = options->scaled_height;
if (!WebPRescalerGetScaledDimensions(
width, height, &scaled_width, &scaled_height)) {
if (!WebPRescalerGetScaledDimensions(width, height, &scaled_width,
&scaled_height)) {
return VP8_STATUS_INVALID_PARAM;
}
width = scaled_width;
height = scaled_height;
#else
return VP8_STATUS_INVALID_PARAM; // rescaling not supported
return VP8_STATUS_INVALID_PARAM; // rescaling not supported
#endif
}
}
@@ -253,7 +252,7 @@ void WebPCopyDecBuffer(const WebPDecBuffer* const src,
if (src != NULL && dst != NULL) {
*dst = *src;
if (src->private_memory != NULL) {
dst->is_external_memory = 1; // dst buffer doesn't own the memory.
dst->is_external_memory = 1; // dst buffer doesn't own the memory.
dst->private_memory = NULL;
}
}
@@ -264,7 +263,7 @@ void WebPGrabDecBuffer(WebPDecBuffer* const src, WebPDecBuffer* const dst) {
if (src != NULL && dst != NULL) {
*dst = *src;
if (src->private_memory != NULL) {
src->is_external_memory = 1; // src relinquishes ownership
src->is_external_memory = 1; // src relinquishes ownership
src->private_memory = NULL;
}
}
@@ -289,8 +288,8 @@ VP8StatusCode WebPCopyDecBufferPixels(const WebPDecBuffer* const src_buf,
} else {
const WebPYUVABuffer* const src = &src_buf->u.YUVA;
const WebPYUVABuffer* const dst = &dst_buf->u.YUVA;
WebPCopyPlane(src->y, src->y_stride, dst->y, dst->y_stride,
src_buf->width, src_buf->height);
WebPCopyPlane(src->y, src->y_stride, dst->y, dst->y_stride, src_buf->width,
src_buf->height);
WebPCopyPlane(src->u, src->u_stride, dst->u, dst->u_stride,
(src_buf->width + 1) / 2, (src_buf->height + 1) / 2);
WebPCopyPlane(src->v, src->v_stride, dst->v, dst->v_stride,

View File

@@ -15,41 +15,46 @@
#define WEBP_DEC_COMMON_DEC_H_
// intra prediction modes
enum { B_DC_PRED = 0, // 4x4 modes
B_TM_PRED = 1,
B_VE_PRED = 2,
B_HE_PRED = 3,
B_RD_PRED = 4,
B_VR_PRED = 5,
B_LD_PRED = 6,
B_VL_PRED = 7,
B_HD_PRED = 8,
B_HU_PRED = 9,
NUM_BMODES = B_HU_PRED + 1 - B_DC_PRED, // = 10
enum {
B_DC_PRED = 0, // 4x4 modes
B_TM_PRED = 1,
B_VE_PRED = 2,
B_HE_PRED = 3,
B_RD_PRED = 4,
B_VR_PRED = 5,
B_LD_PRED = 6,
B_VL_PRED = 7,
B_HD_PRED = 8,
B_HU_PRED = 9,
NUM_BMODES = B_HU_PRED + 1 - B_DC_PRED, // = 10
// Luma16 or UV modes
DC_PRED = B_DC_PRED, V_PRED = B_VE_PRED,
H_PRED = B_HE_PRED, TM_PRED = B_TM_PRED,
B_PRED = NUM_BMODES, // refined I4x4 mode
NUM_PRED_MODES = 4,
// Luma16 or UV modes
DC_PRED = B_DC_PRED,
V_PRED = B_VE_PRED,
H_PRED = B_HE_PRED,
TM_PRED = B_TM_PRED,
B_PRED = NUM_BMODES, // refined I4x4 mode
NUM_PRED_MODES = 4,
// special modes
B_DC_PRED_NOTOP = 4,
B_DC_PRED_NOLEFT = 5,
B_DC_PRED_NOTOPLEFT = 6,
NUM_B_DC_MODES = 7 };
// special modes
B_DC_PRED_NOTOP = 4,
B_DC_PRED_NOLEFT = 5,
B_DC_PRED_NOTOPLEFT = 6,
NUM_B_DC_MODES = 7
};
enum { MB_FEATURE_TREE_PROBS = 3,
NUM_MB_SEGMENTS = 4,
NUM_REF_LF_DELTAS = 4,
NUM_MODE_LF_DELTAS = 4, // I4x4, ZERO, *, SPLIT
MAX_NUM_PARTITIONS = 8,
// Probabilities
NUM_TYPES = 4, // 0: i16-AC, 1: i16-DC, 2:chroma-AC, 3:i4-AC
NUM_BANDS = 8,
NUM_CTX = 3,
NUM_PROBAS = 11
};
enum {
MB_FEATURE_TREE_PROBS = 3,
NUM_MB_SEGMENTS = 4,
NUM_REF_LF_DELTAS = 4,
NUM_MODE_LF_DELTAS = 4, // I4x4, ZERO, *, SPLIT
MAX_NUM_PARTITIONS = 8,
// Probabilities
NUM_TYPES = 4, // 0: i16-AC, 1: i16-DC, 2:chroma-AC, 3:i4-AC
NUM_BANDS = 8,
NUM_CTX = 3,
NUM_PROBAS = 11
};
// Check that webp_csp_mode is within the bounds of WEBP_CSP_MODE.
int IsValidColorspace(int webp_csp_mode);

View File

@@ -30,11 +30,10 @@
// Main reconstruction function.
static const uint16_t kScan[16] = {
0 + 0 * BPS, 4 + 0 * BPS, 8 + 0 * BPS, 12 + 0 * BPS,
0 + 4 * BPS, 4 + 4 * BPS, 8 + 4 * BPS, 12 + 4 * BPS,
0 + 8 * BPS, 4 + 8 * BPS, 8 + 8 * BPS, 12 + 8 * BPS,
0 + 12 * BPS, 4 + 12 * BPS, 8 + 12 * BPS, 12 + 12 * BPS
};
0 + 0 * BPS, 4 + 0 * BPS, 8 + 0 * BPS, 12 + 0 * BPS,
0 + 4 * BPS, 4 + 4 * BPS, 8 + 4 * BPS, 12 + 4 * BPS,
0 + 8 * BPS, 4 + 8 * BPS, 8 + 8 * BPS, 12 + 8 * BPS,
0 + 12 * BPS, 4 + 12 * BPS, 8 + 12 * BPS, 12 + 12 * BPS};
static int CheckMode(int mb_x, int mb_y, int mode) {
if (mode == B_DC_PRED) {
@@ -70,9 +69,9 @@ static WEBP_INLINE void DoTransform(uint32_t bits, const int16_t* const src,
static void DoUVTransform(uint32_t bits, const int16_t* const src,
uint8_t* const dst) {
if (bits & 0xff) { // any non-zero coeff at all?
if (bits & 0xaa) { // any non-zero AC coefficient?
VP8TransformUV(src, dst); // note we don't use the AC3 variant for U/V
if (bits & 0xff) { // any non-zero coeff at all?
if (bits & 0xaa) { // any non-zero AC coefficient?
VP8TransformUV(src, dst); // note we don't use the AC3 variant for U/V
} else {
VP8TransformDCUV(src, dst);
}
@@ -138,11 +137,11 @@ static void ReconstructRow(const VP8Decoder* const dec,
}
// predict and add residuals
if (block->is_i4x4) { // 4x4
if (block->is_i4x4) { // 4x4
uint32_t* const top_right = (uint32_t*)(y_dst - BPS + 16);
if (mb_y > 0) {
if (mb_x >= dec->mb_w - 1) { // on rightmost border
if (mb_x >= dec->mb_w - 1) { // on rightmost border
memset(top_right, top_yuv[0].y[15], sizeof(*top_right));
} else {
memcpy(top_right, top_yuv[1].y, sizeof(*top_right));
@@ -157,7 +156,7 @@ static void ReconstructRow(const VP8Decoder* const dec,
VP8PredLuma4[block->imodes[n]](dst);
DoTransform(bits, coeffs + n * 16, dst);
}
} else { // 16x16
} else { // 16x16
const int pred_func = CheckMode(mb_x, mb_y, block->imodes[0]);
VP8PredLuma16[pred_func](y_dst);
if (bits != 0) {
@@ -179,8 +178,8 @@ static void ReconstructRow(const VP8Decoder* const dec,
// stash away top samples for next block
if (mb_y < dec->mb_h - 1) {
memcpy(top_yuv[0].y, y_dst + 15 * BPS, 16);
memcpy(top_yuv[0].u, u_dst + 7 * BPS, 8);
memcpy(top_yuv[0].v, v_dst + 7 * BPS, 8);
memcpy(top_yuv[0].u, u_dst + 7 * BPS, 8);
memcpy(top_yuv[0].v, v_dst + 7 * BPS, 8);
}
}
// Transfer reconstructed samples from yuv_b cache to final destination.
@@ -209,7 +208,7 @@ static void ReconstructRow(const VP8Decoder* const dec,
// Simple filter: up to 2 luma samples are read and 1 is written.
// Complex filter: up to 4 luma samples are read and 3 are written. Same for
// U/V, so it's 8 samples total (because of the 2x upsampling).
static const uint8_t kFilterExtraRows[3] = { 0, 2, 8 };
static const uint8_t kFilterExtraRows[3] = {0, 2, 8};
static void DoFilter(const VP8Decoder* const dec, int mb_x, int mb_y) {
const VP8ThreadContext* const ctx = &dec->thread_ctx;
@@ -223,7 +222,7 @@ static void DoFilter(const VP8Decoder* const dec, int mb_x, int mb_y) {
return;
}
assert(limit >= 3);
if (dec->filter_type == 1) { // simple
if (dec->filter_type == 1) { // simple
if (mb_x > 0) {
VP8SimpleHFilter16(y_dst, y_bps, limit + 4);
}
@@ -236,7 +235,7 @@ static void DoFilter(const VP8Decoder* const dec, int mb_x, int mb_y) {
if (f_info->f_inner) {
VP8SimpleVFilter16i(y_dst, y_bps, limit);
}
} else { // complex
} else { // complex
const int uv_bps = dec->cache_uv_stride;
uint8_t* const u_dst = dec->cache_u + cache_id * 8 * uv_bps + mb_x * 8;
uint8_t* const v_dst = dec->cache_v + cache_id * 8 * uv_bps + mb_x * 8;
@@ -332,9 +331,8 @@ static void PrecomputeFilterStrengths(VP8Decoder* const dec) {
#define DITHER_AMP_TAB_SIZE 12
static const uint8_t kQuantToDitherAmp[DITHER_AMP_TAB_SIZE] = {
// roughly, it's dqm->uv_mat[1]
8, 7, 6, 4, 4, 2, 2, 2, 1, 1, 1, 1
};
// roughly, it's dqm->uv_mat[1]
8, 7, 6, 4, 4, 2, 2, 2, 1, 1, 1, 1};
void VP8InitDithering(const WebPDecoderOptions* const options,
VP8Decoder* const dec) {
@@ -407,7 +405,7 @@ static void DitherRow(VP8Decoder* const dec) {
// * we must clip the remaining pixels against the cropping area. The VP8Io
// struct must have the following fields set correctly before calling put():
#define MACROBLOCK_VPOS(mb_y) ((mb_y) * 16) // vertical position of a MB
#define MACROBLOCK_VPOS(mb_y) ((mb_y) * 16) // vertical position of a MB
// Finalize and transmit a complete row. Return false in case of user-abort.
static int FinishRow(void* arg1, void* arg2) {
@@ -458,7 +456,7 @@ static int FinishRow(void* arg1, void* arg2) {
y_end -= extra_y_rows;
}
if (y_end > io->crop_bottom) {
y_end = io->crop_bottom; // make sure we don't overflow on last row.
y_end = io->crop_bottom; // make sure we don't overflow on last row.
}
// If dec->alpha_data is not NULL, we have some alpha plane present.
io->a = NULL;
@@ -512,9 +510,9 @@ static int FinishRow(void* arg1, void* arg2) {
int VP8ProcessRow(VP8Decoder* const dec, VP8Io* const io) {
int ok = 1;
VP8ThreadContext* const ctx = &dec->thread_ctx;
const int filter_row =
(dec->filter_type > 0) &&
(dec->mb_y >= dec->tl_mb_y) && (dec->mb_y <= dec->br_mb_y);
const int filter_row = (dec->filter_type > 0) &&
(dec->mb_y >= dec->tl_mb_y) &&
(dec->mb_y <= dec->br_mb_y);
if (dec->mt_method == 0) {
// ctx->id and ctx->f_info are already set
ctx->mb_y = dec->mb_y;
@@ -526,7 +524,7 @@ int VP8ProcessRow(VP8Decoder* const dec, VP8Io* const io) {
// Finish previous job *before* updating context
ok &= WebPGetWorkerInterface()->Sync(worker);
assert(worker->status == OK);
if (ok) { // spawn a new deblocking/output job
if (ok) { // spawn a new deblocking/output job
ctx->io = *io;
ctx->id = dec->cache_id;
ctx->mb_y = dec->mb_y;
@@ -539,7 +537,7 @@ int VP8ProcessRow(VP8Decoder* const dec, VP8Io* const io) {
// perform reconstruction directly in main thread
ReconstructRow(dec, ctx);
}
if (filter_row) { // swap filter info
if (filter_row) { // swap filter info
VP8FInfo* const tmp = ctx->f_info;
ctx->f_info = dec->f_info;
dec->f_info = tmp;
@@ -646,7 +644,7 @@ int VP8ExitCritical(VP8Decoder* const dec, VP8Io* const io) {
// io->put: [ 0..15][16..31][ 0..15][...
#define MT_CACHE_LINES 3
#define ST_CACHE_LINES 1 // 1 cache row only for single-threaded case
#define ST_CACHE_LINES 1 // 1 cache row only for single-threaded case
// Initialize multi/single-thread worker
static int InitThreadContext(VP8Decoder* const dec) {
@@ -669,8 +667,8 @@ static int InitThreadContext(VP8Decoder* const dec) {
}
int VP8GetThreadMethod(const WebPDecoderOptions* const options,
const WebPHeaderStructure* const headers,
int width, int height) {
const WebPHeaderStructure* const headers, int width,
int height) {
if (options == NULL || options->use_threads == 0) {
return 0;
}
@@ -698,22 +696,23 @@ static int AllocateMemory(VP8Decoder* const dec) {
const size_t top_size = sizeof(VP8TopSamples) * mb_w;
const size_t mb_info_size = (mb_w + 1) * sizeof(VP8MB);
const size_t f_info_size =
(dec->filter_type > 0) ?
mb_w * (dec->mt_method > 0 ? 2 : 1) * sizeof(VP8FInfo)
: 0;
(dec->filter_type > 0)
? mb_w * (dec->mt_method > 0 ? 2 : 1) * sizeof(VP8FInfo)
: 0;
const size_t yuv_size = YUV_SIZE * sizeof(*dec->yuv_b);
const size_t mb_data_size =
(dec->mt_method == 2 ? 2 : 1) * mb_w * sizeof(*dec->mb_data);
const size_t cache_height = (16 * num_caches
+ kFilterExtraRows[dec->filter_type]) * 3 / 2;
const size_t cache_height =
(16 * num_caches + kFilterExtraRows[dec->filter_type]) * 3 / 2;
const size_t cache_size = top_size * cache_height;
// alpha_size is the only one that scales as width x height.
const uint64_t alpha_size = (dec->alpha_data != NULL) ?
(uint64_t)dec->pic_hdr.width * dec->pic_hdr.height : 0ULL;
const uint64_t needed = (uint64_t)intra_pred_mode_size
+ top_size + mb_info_size + f_info_size
+ yuv_size + mb_data_size
+ cache_size + alpha_size + WEBP_ALIGN_CST;
const uint64_t alpha_size =
(dec->alpha_data != NULL)
? (uint64_t)dec->pic_hdr.width * dec->pic_hdr.height
: 0ULL;
const uint64_t needed = (uint64_t)intra_pred_mode_size + top_size +
mb_info_size + f_info_size + yuv_size + mb_data_size +
cache_size + alpha_size + WEBP_ALIGN_CST;
uint8_t* mem;
if (!CheckSizeOverflow(needed)) return 0; // check for overflow
@@ -769,10 +768,10 @@ static int AllocateMemory(VP8Decoder* const dec) {
const int extra_y = extra_rows * dec->cache_y_stride;
const int extra_uv = (extra_rows / 2) * dec->cache_uv_stride;
dec->cache_y = mem + extra_y;
dec->cache_u = dec->cache_y
+ 16 * num_caches * dec->cache_y_stride + extra_uv;
dec->cache_v = dec->cache_u
+ 8 * num_caches * dec->cache_uv_stride + extra_uv;
dec->cache_u =
dec->cache_y + 16 * num_caches * dec->cache_y_stride + extra_uv;
dec->cache_v =
dec->cache_u + 8 * num_caches * dec->cache_uv_stride + extra_uv;
dec->cache_id = 0;
}
mem += cache_size;
@@ -784,7 +783,7 @@ static int AllocateMemory(VP8Decoder* const dec) {
// note: left/top-info is initialized once for all.
memset(dec->mb_info - 1, 0, mb_info_size);
VP8InitScanline(dec); // initialize left too.
VP8InitScanline(dec); // initialize left too.
// initialize top
memset(dec->intra_t, B_DC_PRED, intra_pred_mode_size);

View File

@@ -51,11 +51,7 @@ typedef enum {
} DecState;
// Operating state for the MemBuffer
typedef enum {
MEM_MODE_NONE = 0,
MEM_MODE_APPEND,
MEM_MODE_MAP
} MemBufferMode;
typedef enum { MEM_MODE_NONE = 0, MEM_MODE_APPEND, MEM_MODE_MAP } MemBufferMode;
// storage for partition #0 and partial data (in a rolling fashion)
typedef struct {
@@ -70,19 +66,19 @@ typedef struct {
} MemBuffer;
struct WebPIDecoder {
DecState state; // current decoding state
WebPDecParams params; // Params to store output info
int is_lossless; // for down-casting 'dec'.
void* dec; // either a VP8Decoder or a VP8LDecoder instance
DecState state; // current decoding state
WebPDecParams params; // Params to store output info
int is_lossless; // for down-casting 'dec'.
void* dec; // either a VP8Decoder or a VP8LDecoder instance
VP8Io io;
MemBuffer mem; // input memory buffer.
WebPDecBuffer output; // output buffer (when no external one is supplied,
// or if the external one has slow-memory)
MemBuffer mem; // input memory buffer.
WebPDecBuffer output; // output buffer (when no external one is supplied,
// or if the external one has slow-memory)
WebPDecBuffer* final_output; // Slow-memory output to copy to eventually.
size_t chunk_size; // Compressed VP8/VP8L size extracted from Header.
size_t chunk_size; // Compressed VP8/VP8L size extracted from Header.
int last_mb_y; // last row reached for intra-mode decoding
int last_mb_y; // last row reached for intra-mode decoding
};
// MB context to restore in case VP8DecodeMB() fails
@@ -165,7 +161,7 @@ static void DoRemap(WebPIDecoder* const idec, ptrdiff_t offset) {
}
}
}
} else { // Resize lossless bitreader
} else { // Resize lossless bitreader
VP8LDecoder* const dec = (VP8LDecoder*)idec->dec;
VP8LBitReaderSetBuffer(&dec->br, new_base, MemDataSize(mem));
}
@@ -237,10 +233,10 @@ WEBP_NODISCARD static int RemapMemBuffer(WebPIDecoder* const idec,
}
static void InitMemBuffer(MemBuffer* const mem) {
mem->mode = MEM_MODE_NONE;
mem->buf = NULL;
mem->buf_size = 0;
mem->part0_buf = NULL;
mem->mode = MEM_MODE_NONE;
mem->buf = NULL;
mem->buf_size = 0;
mem->part0_buf = NULL;
mem->part0_size = 0;
}
@@ -255,11 +251,11 @@ static void ClearMemBuffer(MemBuffer* const mem) {
WEBP_NODISCARD static int CheckMemBufferMode(MemBuffer* const mem,
MemBufferMode expected) {
if (mem->mode == MEM_MODE_NONE) {
mem->mode = expected; // switch to the expected mode
mem->mode = expected; // switch to the expected mode
} else if (mem->mode != expected) {
return 0; // we mixed the modes => error
return 0; // we mixed the modes => error
}
assert(mem->mode == expected); // mode is ok
assert(mem->mode == expected); // mode is ok
return 1;
}
@@ -396,7 +392,7 @@ static VP8StatusCode CopyParts0Data(WebPIDecoder* const idec) {
assert(mem->part0_buf == NULL);
// the following is a format limitation, no need for runtime check:
assert(part_size <= mem->part0_size);
if (part_size == 0) { // can't have zero-size partition #0
if (part_size == 0) { // can't have zero-size partition #0
return VP8_STATUS_BITSTREAM_ERROR;
}
if (mem->mode == MEM_MODE_APPEND) {
@@ -437,14 +433,14 @@ static VP8StatusCode DecodePartition0(WebPIDecoder* const idec) {
}
// Allocate/Verify output buffer now
dec->status = WebPAllocateDecBuffer(io->width, io->height, params->options,
output);
dec->status =
WebPAllocateDecBuffer(io->width, io->height, params->options, output);
if (dec->status != VP8_STATUS_OK) {
return IDecError(idec, dec->status);
}
// This change must be done before calling VP8InitFrame()
dec->mt_method = VP8GetThreadMethod(params->options, NULL,
io->width, io->height);
dec->mt_method =
VP8GetThreadMethod(params->options, NULL, io->width, io->height);
VP8InitDithering(params->options, dec);
dec->status = CopyParts0Data(idec);
@@ -512,7 +508,7 @@ static VP8StatusCode DecodeRemaining(WebPIDecoder* const idec) {
assert(idec->mem.start <= idec->mem.end);
}
}
VP8InitScanline(dec); // Prepare for next scanline
VP8InitScanline(dec); // Prepare for next scanline
// Reconstruct, filter and emit the row.
if (!VP8ProcessRow(dec, io)) {
@@ -558,8 +554,8 @@ static VP8StatusCode DecodeVP8LHeader(WebPIDecoder* const idec) {
return ErrorStatusLossless(idec, dec->status);
}
// Allocate/verify output buffer now.
dec->status = WebPAllocateDecBuffer(io->width, io->height, params->options,
output);
dec->status =
WebPAllocateDecBuffer(io->width, io->height, params->options, output);
if (dec->status != VP8_STATUS_OK) {
return IDecError(idec, dec->status);
}
@@ -584,7 +580,7 @@ static VP8StatusCode DecodeVP8LData(WebPIDecoder* const idec) {
: FinishDecoding(idec);
}
// Main decoding loop
// Main decoding loop
static VP8StatusCode IDecode(WebPIDecoder* idec) {
VP8StatusCode status = VP8_STATUS_SUSPENDED;
@@ -592,7 +588,7 @@ static VP8StatusCode IDecode(WebPIDecoder* idec) {
status = DecodeWebPHeaders(idec);
} else {
if (idec->dec == NULL) {
return VP8_STATUS_SUSPENDED; // can't continue if we have no decoder.
return VP8_STATUS_SUSPENDED; // can't continue if we have no decoder.
}
}
if (idec->state == STATE_VP8_HEADER) {
@@ -718,12 +714,12 @@ WebPIDecoder* WebPINewRGB(WEBP_CSP_MODE csp, uint8_t* output_buffer,
WebPIDecoder* idec;
if (csp >= MODE_YUV) return NULL;
if (is_external_memory == 0) { // Overwrite parameters to sane values.
if (is_external_memory == 0) { // Overwrite parameters to sane values.
output_buffer_size = 0;
output_stride = 0;
} else { // A buffer was passed. Validate the other params.
if (output_stride == 0 || output_buffer_size == 0) {
return NULL; // invalid parameter.
return NULL; // invalid parameter.
}
}
idec = WebPINewDecoder(NULL);
@@ -737,14 +733,14 @@ WebPIDecoder* WebPINewRGB(WEBP_CSP_MODE csp, uint8_t* output_buffer,
}
WebPIDecoder* WebPINewYUVA(uint8_t* luma, size_t luma_size, int luma_stride,
uint8_t* u, size_t u_size, int u_stride,
uint8_t* v, size_t v_size, int v_stride,
uint8_t* a, size_t a_size, int a_stride) {
uint8_t* u, size_t u_size, int u_stride, uint8_t* v,
size_t v_size, int v_stride, uint8_t* a,
size_t a_size, int a_stride) {
const int is_external_memory = (luma != NULL) ? 1 : 0;
WebPIDecoder* idec;
WEBP_CSP_MODE colorspace;
if (is_external_memory == 0) { // Overwrite parameters to sane values.
if (is_external_memory == 0) { // Overwrite parameters to sane values.
luma_size = u_size = v_size = a_size = 0;
luma_stride = u_stride = v_stride = a_stride = 0;
u = v = a = NULL;
@@ -780,12 +776,10 @@ WebPIDecoder* WebPINewYUVA(uint8_t* luma, size_t luma_size, int luma_stride,
}
WebPIDecoder* WebPINewYUV(uint8_t* luma, size_t luma_size, int luma_stride,
uint8_t* u, size_t u_size, int u_stride,
uint8_t* v, size_t v_size, int v_stride) {
return WebPINewYUVA(luma, luma_size, luma_stride,
u, u_size, u_stride,
v, v_size, v_stride,
NULL, 0, 0);
uint8_t* u, size_t u_size, int u_stride, uint8_t* v,
size_t v_size, int v_stride) {
return WebPINewYUVA(luma, luma_size, luma_stride, u, u_size, u_stride, v,
v_size, v_stride, NULL, 0, 0);
}
//------------------------------------------------------------------------------
@@ -801,8 +795,8 @@ static VP8StatusCode IDecCheckStatus(const WebPIDecoder* const idec) {
return VP8_STATUS_SUSPENDED;
}
VP8StatusCode WebPIAppend(WebPIDecoder* idec,
const uint8_t* data, size_t data_size) {
VP8StatusCode WebPIAppend(WebPIDecoder* idec, const uint8_t* data,
size_t data_size) {
VP8StatusCode status;
if (idec == NULL || data == NULL) {
return VP8_STATUS_INVALID_PARAM;
@@ -822,8 +816,8 @@ VP8StatusCode WebPIAppend(WebPIDecoder* idec,
return IDecode(idec);
}
VP8StatusCode WebPIUpdate(WebPIDecoder* idec,
const uint8_t* data, size_t data_size) {
VP8StatusCode WebPIUpdate(WebPIDecoder* idec, const uint8_t* data,
size_t data_size) {
VP8StatusCode status;
if (idec == NULL || data == NULL) {
return VP8_STATUS_INVALID_PARAM;
@@ -853,14 +847,13 @@ static const WebPDecBuffer* GetOutputBuffer(const WebPIDecoder* const idec) {
return NULL;
}
if (idec->final_output != NULL) {
return NULL; // not yet slow-copied
return NULL; // not yet slow-copied
}
return idec->params.output;
}
const WebPDecBuffer* WebPIDecodedArea(const WebPIDecoder* idec,
int* left, int* top,
int* width, int* height) {
const WebPDecBuffer* WebPIDecodedArea(const WebPIDecoder* idec, int* left,
int* top, int* width, int* height) {
const WebPDecBuffer* const src = GetOutputBuffer(idec);
if (left != NULL) *left = 0;
if (top != NULL) *top = 0;
@@ -913,10 +906,8 @@ WEBP_NODISCARD uint8_t* WebPIDecGetYUVA(const WebPIDecoder* idec, int* last_y,
return src->u.YUVA.y;
}
int WebPISetIOHooks(WebPIDecoder* const idec,
VP8IoPutHook put,
VP8IoSetupHook setup,
VP8IoTeardownHook teardown,
int WebPISetIOHooks(WebPIDecoder* const idec, VP8IoPutHook put,
VP8IoSetupHook setup, VP8IoTeardownHook teardown,
void* user_data) {
if (idec == NULL || idec->state > STATE_WEBP_HEADER) {
return 0;

View File

@@ -17,7 +17,6 @@
#include <string.h>
#include "src/dec/vp8_dec.h"
#include "src/webp/types.h"
#include "src/dec/vp8i_dec.h"
#include "src/dec/webpi_dec.h"
#include "src/dsp/cpu.h"
@@ -26,6 +25,7 @@
#include "src/utils/rescaler_utils.h"
#include "src/utils/utils.h"
#include "src/webp/decode.h"
#include "src/webp/types.h"
//------------------------------------------------------------------------------
// Main YUV<->RGB conversion functions
@@ -51,9 +51,8 @@ static int EmitSampledRGB(const VP8Io* const io, WebPDecParams* const p) {
WebPDecBuffer* const output = p->output;
WebPRGBABuffer* const buf = &output->u.RGBA;
uint8_t* const dst = buf->rgba + (ptrdiff_t)io->mb_y * buf->stride;
WebPSamplerProcessPlane(io->y, io->y_stride,
io->u, io->v, io->uv_stride,
dst, buf->stride, io->mb_w, io->mb_h,
WebPSamplerProcessPlane(io->y, io->y_stride, io->u, io->v, io->uv_stride, dst,
buf->stride, io->mb_w, io->mb_h,
WebPSamplers[output->colorspace]);
return io->mb_h;
}
@@ -63,7 +62,7 @@ static int EmitSampledRGB(const VP8Io* const io, WebPDecParams* const p) {
#ifdef FANCY_UPSAMPLING
static int EmitFancyRGB(const VP8Io* const io, WebPDecParams* const p) {
int num_lines_out = io->mb_h; // a priori guess
int num_lines_out = io->mb_h; // a priori guess
const WebPRGBABuffer* const buf = &p->output->u.RGBA;
uint8_t* dst = buf->rgba + (ptrdiff_t)io->mb_y * buf->stride;
WebPUpsampleLinePairFunc upsample = WebPUpsamplers[p->output->colorspace];
@@ -82,8 +81,8 @@ static int EmitFancyRGB(const VP8Io* const io, WebPDecParams* const p) {
upsample(cur_y, NULL, cur_u, cur_v, cur_u, cur_v, dst, NULL, mb_w);
} else {
// We can finish the left-over line from previous call.
upsample(p->tmp_y, cur_y, top_u, top_v, cur_u, cur_v,
dst - buf->stride, dst, mb_w);
upsample(p->tmp_y, cur_y, top_u, top_v, cur_u, cur_v, dst - buf->stride,
dst, mb_w);
++num_lines_out;
}
// Loop over each output pairs of row.
@@ -94,8 +93,7 @@ static int EmitFancyRGB(const VP8Io* const io, WebPDecParams* const p) {
cur_v += io->uv_stride;
dst += 2 * buf->stride;
cur_y += 2 * io->y_stride;
upsample(cur_y - io->y_stride, cur_y,
top_u, top_v, cur_u, cur_v,
upsample(cur_y - io->y_stride, cur_y, top_u, top_v, cur_u, cur_v,
dst - buf->stride, dst, mb_w);
}
// move to last row
@@ -111,14 +109,14 @@ static int EmitFancyRGB(const VP8Io* const io, WebPDecParams* const p) {
} else {
// Process the very last row of even-sized picture
if (!(y_end & 1)) {
upsample(cur_y, NULL, cur_u, cur_v, cur_u, cur_v,
dst + buf->stride, NULL, mb_w);
upsample(cur_y, NULL, cur_u, cur_v, cur_u, cur_v, dst + buf->stride, NULL,
mb_w);
}
}
return num_lines_out;
}
#endif /* FANCY_UPSAMPLING */
#endif /* FANCY_UPSAMPLING */
//------------------------------------------------------------------------------
@@ -153,8 +151,8 @@ static int EmitAlphaYUV(const VP8Io* const io, WebPDecParams* const p,
return 0;
}
static int GetAlphaSourceRow(const VP8Io* const io,
const uint8_t** alpha, int* const num_rows) {
static int GetAlphaSourceRow(const VP8Io* const io, const uint8_t** alpha,
int* const num_rows) {
int start_y = io->mb_y;
*num_rows = io->mb_h;
@@ -192,14 +190,14 @@ static int EmitAlphaRGB(const VP8Io* const io, WebPDecParams* const p,
const int start_y = GetAlphaSourceRow(io, &alpha, &num_rows);
uint8_t* const base_rgba = buf->rgba + (ptrdiff_t)start_y * buf->stride;
uint8_t* const dst = base_rgba + (alpha_first ? 0 : 3);
const int has_alpha = WebPDispatchAlpha(alpha, io->width, mb_w,
num_rows, dst, buf->stride);
const int has_alpha =
WebPDispatchAlpha(alpha, io->width, mb_w, num_rows, dst, buf->stride);
(void)expected_num_lines_out;
assert(expected_num_lines_out == num_rows);
// has_alpha is true if there's non-trivial alpha to premultiply with.
if (has_alpha && WebPIsPremultipliedMode(colorspace)) {
WebPApplyAlphaMultiply(base_rgba, alpha_first,
mb_w, num_rows, buf->stride);
WebPApplyAlphaMultiply(base_rgba, alpha_first, mb_w, num_rows,
buf->stride);
}
}
return 0;
@@ -245,14 +243,14 @@ static int EmitAlphaRGBA4444(const VP8Io* const io, WebPDecParams* const p,
// YUV rescaling (no final RGB conversion needed)
#if !defined(WEBP_REDUCE_SIZE)
static int Rescale(const uint8_t* src, int src_stride,
int new_lines, WebPRescaler* const wrk) {
static int Rescale(const uint8_t* src, int src_stride, int new_lines,
WebPRescaler* const wrk) {
int num_lines_out = 0;
while (new_lines > 0) { // import new contributions of source rows.
while (new_lines > 0) { // import new contributions of source rows.
const int lines_in = WebPRescalerImport(wrk, new_lines, src, src_stride);
src += lines_in * src_stride;
new_lines -= lines_in;
num_lines_out += WebPRescalerExport(wrk); // emit output row(s)
num_lines_out += WebPRescalerExport(wrk); // emit output row(s)
}
return num_lines_out;
}
@@ -267,8 +265,8 @@ static int EmitRescaledYUV(const VP8Io* const io, WebPDecParams* const p) {
// internal buffer. This is OK since these samples are not used for
// intra-prediction (the top samples are saved in cache_y/u/v).
// But we need to cast the const away, though.
WebPMultRows((uint8_t*)io->y, io->y_stride,
io->a, io->width, io->mb_w, mb_h, 0);
WebPMultRows((uint8_t*)io->y, io->y_stride, io->a, io->width, io->mb_w,
mb_h, 0);
}
num_lines_out = Rescale(io->y, io->y_stride, mb_h, scaler);
Rescale(io->u, io->uv_stride, uv_mb_h, p->scaler_u);
@@ -284,7 +282,7 @@ static int EmitRescaledAlphaYUV(const VP8Io* const io, WebPDecParams* const p,
uint8_t* const dst_y = buf->y + (ptrdiff_t)p->last_y * buf->y_stride;
const int num_lines_out = Rescale(io->a, io->width, io->mb_h, p->scaler_a);
assert(expected_num_lines_out == num_lines_out);
if (num_lines_out > 0) { // unmultiply the Y
if (num_lines_out > 0) { // unmultiply the Y
WebPMultRows(dst_y, buf->y_stride, dst_a, buf->a_stride,
p->scaler_a->dst_width, num_lines_out, 1);
}
@@ -300,11 +298,11 @@ static int EmitRescaledAlphaYUV(const VP8Io* const io, WebPDecParams* const p,
static int InitYUVRescaler(const VP8Io* const io, WebPDecParams* const p) {
const int has_alpha = WebPIsAlphaMode(p->output->colorspace);
const WebPYUVABuffer* const buf = &p->output->u.YUVA;
const int out_width = io->scaled_width;
const int out_width = io->scaled_width;
const int out_height = io->scaled_height;
const int uv_out_width = (out_width + 1) >> 1;
const int uv_out_width = (out_width + 1) >> 1;
const int uv_out_height = (out_height + 1) >> 1;
const int uv_in_width = (io->mb_w + 1) >> 1;
const int uv_in_width = (io->mb_w + 1) >> 1;
const int uv_in_height = (io->mb_h + 1) >> 1;
// scratch memory for luma rescaler
const size_t work_size = 2 * (size_t)out_width;
@@ -327,33 +325,32 @@ static int InitYUVRescaler(const VP8Io* const io, WebPDecParams* const p) {
p->memory = WebPSafeMalloc(1ULL, (size_t)total_size);
if (p->memory == NULL) {
return 0; // memory error
return 0; // memory error
}
work = (rescaler_t*)p->memory;
scalers = (WebPRescaler*)WEBP_ALIGN(
(const uint8_t*)work + total_size - rescaler_size);
scalers = (WebPRescaler*)WEBP_ALIGN((const uint8_t*)work + total_size -
rescaler_size);
p->scaler_y = &scalers[0];
p->scaler_u = &scalers[1];
p->scaler_v = &scalers[2];
p->scaler_a = has_alpha ? &scalers[3] : NULL;
if (!WebPRescalerInit(p->scaler_y, io->mb_w, io->mb_h,
buf->y, out_width, out_height, buf->y_stride, 1,
work) ||
!WebPRescalerInit(p->scaler_u, uv_in_width, uv_in_height,
buf->u, uv_out_width, uv_out_height, buf->u_stride, 1,
if (!WebPRescalerInit(p->scaler_y, io->mb_w, io->mb_h, buf->y, out_width,
out_height, buf->y_stride, 1, work) ||
!WebPRescalerInit(p->scaler_u, uv_in_width, uv_in_height, buf->u,
uv_out_width, uv_out_height, buf->u_stride, 1,
work + work_size) ||
!WebPRescalerInit(p->scaler_v, uv_in_width, uv_in_height,
buf->v, uv_out_width, uv_out_height, buf->v_stride, 1,
!WebPRescalerInit(p->scaler_v, uv_in_width, uv_in_height, buf->v,
uv_out_width, uv_out_height, buf->v_stride, 1,
work + work_size + uv_work_size)) {
return 0;
}
p->emit = EmitRescaledYUV;
if (has_alpha) {
if (!WebPRescalerInit(p->scaler_a, io->mb_w, io->mb_h,
buf->a, out_width, out_height, buf->a_stride, 1,
if (!WebPRescalerInit(p->scaler_a, io->mb_w, io->mb_h, buf->a, out_width,
out_height, buf->a_stride, 1,
work + work_size + 2 * uv_work_size)) {
return 0;
}
@@ -381,8 +378,8 @@ static int ExportRGB(WebPDecParams* const p, int y_pos) {
WebPRescalerExportRow(p->scaler_y);
WebPRescalerExportRow(p->scaler_u);
WebPRescalerExportRow(p->scaler_v);
convert(p->scaler_y->dst, p->scaler_u->dst, p->scaler_v->dst,
dst, p->scaler_y->dst_width);
convert(p->scaler_y->dst, p->scaler_u->dst, p->scaler_v->dst, dst,
p->scaler_y->dst_width);
dst += buf->stride;
++num_lines_out;
}
@@ -406,7 +403,7 @@ static int EmitRescaledRGB(const VP8Io* const io, WebPDecParams* const p) {
const int v_lines_in = WebPRescalerImport(
p->scaler_v, uv_mb_h - uv_j, io->v + (ptrdiff_t)uv_j * io->uv_stride,
io->uv_stride);
(void)v_lines_in; // remove a gcc warning
(void)v_lines_in; // remove a gcc warning
assert(u_lines_in == v_lines_in);
uv_j += u_lines_in;
}
@@ -419,8 +416,7 @@ static int ExportAlpha(WebPDecParams* const p, int y_pos, int max_lines_out) {
const WebPRGBABuffer* const buf = &p->output->u.RGBA;
uint8_t* const base_rgba = buf->rgba + (ptrdiff_t)y_pos * buf->stride;
const WEBP_CSP_MODE colorspace = p->output->colorspace;
const int alpha_first =
(colorspace == MODE_ARGB || colorspace == MODE_Argb);
const int alpha_first = (colorspace == MODE_ARGB || colorspace == MODE_Argb);
uint8_t* dst = base_rgba + (alpha_first ? 0 : 3);
int num_lines_out = 0;
const int is_premult_alpha = WebPIsPremultipliedMode(colorspace);
@@ -436,8 +432,8 @@ static int ExportAlpha(WebPDecParams* const p, int y_pos, int max_lines_out) {
++num_lines_out;
}
if (is_premult_alpha && non_opaque) {
WebPApplyAlphaMultiply(base_rgba, alpha_first,
width, num_lines_out, buf->stride);
WebPApplyAlphaMultiply(base_rgba, alpha_first, width, num_lines_out,
buf->stride);
}
return num_lines_out;
}
@@ -495,14 +491,14 @@ static int EmitRescaledAlphaRGB(const VP8Io* const io, WebPDecParams* const p,
static int InitRGBRescaler(const VP8Io* const io, WebPDecParams* const p) {
const int has_alpha = WebPIsAlphaMode(p->output->colorspace);
const int out_width = io->scaled_width;
const int out_width = io->scaled_width;
const int out_height = io->scaled_height;
const int uv_in_width = (io->mb_w + 1) >> 1;
const int uv_in_width = (io->mb_w + 1) >> 1;
const int uv_in_height = (io->mb_h + 1) >> 1;
// scratch memory for one rescaler
const size_t work_size = 2 * (size_t)out_width;
rescaler_t* work; // rescalers work area
uint8_t* tmp; // tmp storage for scaled YUV444 samples before RGB conversion
uint8_t* tmp; // tmp storage for scaled YUV444 samples before RGB conversion
uint64_t tmp_size1, tmp_size2, total_size;
size_t rescaler_size;
WebPRescaler* scalers;
@@ -519,21 +515,20 @@ static int InitRGBRescaler(const VP8Io* const io, WebPDecParams* const p) {
p->memory = WebPSafeMalloc(1ULL, (size_t)total_size);
if (p->memory == NULL) {
return 0; // memory error
return 0; // memory error
}
work = (rescaler_t*)p->memory;
tmp = (uint8_t*)(work + tmp_size1);
scalers = (WebPRescaler*)WEBP_ALIGN(
(const uint8_t*)work + total_size - rescaler_size);
scalers = (WebPRescaler*)WEBP_ALIGN((const uint8_t*)work + total_size -
rescaler_size);
p->scaler_y = &scalers[0];
p->scaler_u = &scalers[1];
p->scaler_v = &scalers[2];
p->scaler_a = has_alpha ? &scalers[3] : NULL;
if (!WebPRescalerInit(p->scaler_y, io->mb_w, io->mb_h,
tmp + 0 * out_width, out_width, out_height, 0, 1,
work + 0 * work_size) ||
if (!WebPRescalerInit(p->scaler_y, io->mb_w, io->mb_h, tmp + 0 * out_width,
out_width, out_height, 0, 1, work + 0 * work_size) ||
!WebPRescalerInit(p->scaler_u, uv_in_width, uv_in_height,
tmp + 1 * out_width, out_width, out_height, 0, 1,
work + 1 * work_size) ||
@@ -546,9 +541,8 @@ static int InitRGBRescaler(const VP8Io* const io, WebPDecParams* const p) {
WebPInitYUV444Converters();
if (has_alpha) {
if (!WebPRescalerInit(p->scaler_a, io->mb_w, io->mb_h,
tmp + 3 * out_width, out_width, out_height, 0, 1,
work + 3 * work_size)) {
if (!WebPRescalerInit(p->scaler_a, io->mb_w, io->mb_h, tmp + 3 * out_width,
out_width, out_height, 0, 1, work + 3 * work_size)) {
return 0;
}
p->emit_alpha = EmitRescaledAlphaRGB;
@@ -588,21 +582,21 @@ static int CustomSetup(VP8Io* io) {
#if !defined(WEBP_REDUCE_SIZE)
const int ok = is_rgb ? InitRGBRescaler(io, p) : InitYUVRescaler(io, p);
if (!ok) {
return 0; // memory error
return 0; // memory error
}
#else
return 0; // rescaling support not compiled
return 0; // rescaling support not compiled
#endif
} else {
if (is_rgb) {
WebPInitSamplers();
p->emit = EmitSampledRGB; // default
p->emit = EmitSampledRGB; // default
if (io->fancy_upsampling) {
#ifdef FANCY_UPSAMPLING
const int uv_width = (io->mb_w + 1) >> 1;
p->memory = WebPSafeMalloc(1ULL, (size_t)(io->mb_w + 2 * uv_width));
if (p->memory == NULL) {
return 0; // memory error.
return 0; // memory error.
}
p->tmp_y = (uint8_t*)p->memory;
p->tmp_u = p->tmp_y + io->mb_w;
@@ -616,10 +610,10 @@ static int CustomSetup(VP8Io* io) {
}
if (is_alpha) { // need transparency output
p->emit_alpha =
(colorspace == MODE_RGBA_4444 || colorspace == MODE_rgbA_4444) ?
EmitAlphaRGBA4444
(colorspace == MODE_RGBA_4444 || colorspace == MODE_rgbA_4444)
? EmitAlphaRGBA4444
: is_rgb ? EmitAlphaRGB
: EmitAlphaYUV;
: EmitAlphaYUV;
if (is_rgb) {
WebPInitAlphaProcessing();
}
@@ -661,10 +655,10 @@ static void CustomTeardown(const VP8Io* io) {
// Main entry point
void WebPInitCustomIo(WebPDecParams* const params, VP8Io* const io) {
io->put = CustomPut;
io->setup = CustomSetup;
io->put = CustomPut;
io->setup = CustomSetup;
io->teardown = CustomTeardown;
io->opaque = params;
io->opaque = params;
}
//------------------------------------------------------------------------------

View File

@@ -17,48 +17,30 @@
#include "src/utils/bit_reader_utils.h"
#include "src/webp/types.h"
static WEBP_INLINE int clip(int v, int M) {
return v < 0 ? 0 : v > M ? M : v;
}
static WEBP_INLINE int clip(int v, int M) { return v < 0 ? 0 : v > M ? M : v; }
// Paragraph 14.1
static const uint8_t kDcTable[128] = {
4, 5, 6, 7, 8, 9, 10, 10,
11, 12, 13, 14, 15, 16, 17, 17,
18, 19, 20, 20, 21, 21, 22, 22,
23, 23, 24, 25, 25, 26, 27, 28,
29, 30, 31, 32, 33, 34, 35, 36,
37, 37, 38, 39, 40, 41, 42, 43,
44, 45, 46, 46, 47, 48, 49, 50,
51, 52, 53, 54, 55, 56, 57, 58,
59, 60, 61, 62, 63, 64, 65, 66,
67, 68, 69, 70, 71, 72, 73, 74,
75, 76, 76, 77, 78, 79, 80, 81,
82, 83, 84, 85, 86, 87, 88, 89,
91, 93, 95, 96, 98, 100, 101, 102,
104, 106, 108, 110, 112, 114, 116, 118,
122, 124, 126, 128, 130, 132, 134, 136,
138, 140, 143, 145, 148, 151, 154, 157
};
4, 5, 6, 7, 8, 9, 10, 10, 11, 12, 13, 14, 15, 16, 17,
17, 18, 19, 20, 20, 21, 21, 22, 22, 23, 23, 24, 25, 25, 26,
27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 37, 38, 39, 40,
41, 42, 43, 44, 45, 46, 46, 47, 48, 49, 50, 51, 52, 53, 54,
55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69,
70, 71, 72, 73, 74, 75, 76, 76, 77, 78, 79, 80, 81, 82, 83,
84, 85, 86, 87, 88, 89, 91, 93, 95, 96, 98, 100, 101, 102, 104,
106, 108, 110, 112, 114, 116, 118, 122, 124, 126, 128, 130, 132, 134, 136,
138, 140, 143, 145, 148, 151, 154, 157};
static const uint16_t kAcTable[128] = {
4, 5, 6, 7, 8, 9, 10, 11,
12, 13, 14, 15, 16, 17, 18, 19,
20, 21, 22, 23, 24, 25, 26, 27,
28, 29, 30, 31, 32, 33, 34, 35,
36, 37, 38, 39, 40, 41, 42, 43,
44, 45, 46, 47, 48, 49, 50, 51,
52, 53, 54, 55, 56, 57, 58, 60,
62, 64, 66, 68, 70, 72, 74, 76,
78, 80, 82, 84, 86, 88, 90, 92,
94, 96, 98, 100, 102, 104, 106, 108,
110, 112, 114, 116, 119, 122, 125, 128,
131, 134, 137, 140, 143, 146, 149, 152,
155, 158, 161, 164, 167, 170, 173, 177,
181, 185, 189, 193, 197, 201, 205, 209,
213, 217, 221, 225, 229, 234, 239, 245,
249, 254, 259, 264, 269, 274, 279, 284
};
4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18,
19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33,
34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48,
49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 60, 62, 64, 66, 68,
70, 72, 74, 76, 78, 80, 82, 84, 86, 88, 90, 92, 94, 96, 98,
100, 102, 104, 106, 108, 110, 112, 114, 116, 119, 122, 125, 128, 131, 134,
137, 140, 143, 146, 149, 152, 155, 158, 161, 164, 167, 170, 173, 177, 181,
185, 189, 193, 197, 201, 205, 209, 213, 217, 221, 225, 229, 234, 239, 245,
249, 254, 259, 264, 269, 274, 279, 284};
//------------------------------------------------------------------------------
// Paragraph 9.6
@@ -66,16 +48,21 @@ static const uint16_t kAcTable[128] = {
void VP8ParseQuant(VP8Decoder* const dec) {
VP8BitReader* const br = &dec->br;
const int base_q0 = VP8GetValue(br, 7, "global-header");
const int dqy1_dc = VP8Get(br, "global-header") ?
VP8GetSignedValue(br, 4, "global-header") : 0;
const int dqy2_dc = VP8Get(br, "global-header") ?
VP8GetSignedValue(br, 4, "global-header") : 0;
const int dqy2_ac = VP8Get(br, "global-header") ?
VP8GetSignedValue(br, 4, "global-header") : 0;
const int dquv_dc = VP8Get(br, "global-header") ?
VP8GetSignedValue(br, 4, "global-header") : 0;
const int dquv_ac = VP8Get(br, "global-header") ?
VP8GetSignedValue(br, 4, "global-header") : 0;
const int dqy1_dc = VP8Get(br, "global-header")
? VP8GetSignedValue(br, 4, "global-header")
: 0;
const int dqy2_dc = VP8Get(br, "global-header")
? VP8GetSignedValue(br, 4, "global-header")
: 0;
const int dqy2_ac = VP8Get(br, "global-header")
? VP8GetSignedValue(br, 4, "global-header")
: 0;
const int dquv_dc = VP8Get(br, "global-header")
? VP8GetSignedValue(br, 4, "global-header")
: 0;
const int dquv_ac = VP8Get(br, "global-header")
? VP8GetSignedValue(br, 4, "global-header")
: 0;
const VP8SegmentHeader* const hdr = &dec->segment_hdr;
int i;
@@ -98,7 +85,7 @@ void VP8ParseQuant(VP8Decoder* const dec) {
{
VP8QuantMatrix* const m = &dec->dqm[i];
m->y1_mat[0] = kDcTable[clip(q + dqy1_dc, 127)];
m->y1_mat[1] = kAcTable[clip(q + 0, 127)];
m->y1_mat[1] = kAcTable[clip(q + 0, 127)];
m->y2_mat[0] = kDcTable[clip(q + dqy2_dc, 127)] * 2;
// For all x in [0..284], x*155/100 is bitwise equal to (x*101581) >> 16.
@@ -110,7 +97,7 @@ void VP8ParseQuant(VP8Decoder* const dec) {
m->uv_mat[0] = kDcTable[clip(q + dquv_dc, 117)];
m->uv_mat[1] = kAcTable[clip(q + dquv_ac, 127)];
m->uv_quant = q + dquv_ac; // for dithering strength evaluation
m->uv_quant = q + dquv_ac; // for dithering strength evaluation
}
}
}

View File

@@ -14,18 +14,17 @@
#include <string.h>
#include "src/dec/common_dec.h"
#include "src/webp/types.h"
#include "src/dec/vp8_dec.h"
#include "src/dec/vp8i_dec.h"
#include "src/dsp/cpu.h"
#include "src/utils/bit_reader_inl_utils.h"
#include "src/utils/bit_reader_utils.h"
#include "src/webp/types.h"
#if !defined(USE_GENERIC_TREE)
#if !defined(__arm__) && !defined(_M_ARM) && !WEBP_AARCH64 && \
!defined(__wasm__)
#if !defined(__arm__) && !defined(_M_ARM) && !WEBP_AARCH64 && !defined(__wasm__)
// using a table is ~1-2% slower on ARM. Prefer the coded-tree approach then.
#define USE_GENERIC_TREE 1 // ALTERNATE_CODE
#define USE_GENERIC_TREE 1 // ALTERNATE_CODE
#else
#define USE_GENERIC_TREE 0
#endif
@@ -33,269 +32,227 @@
#if (USE_GENERIC_TREE == 1)
static const int8_t kYModesIntra4[18] = {
-B_DC_PRED, 1,
-B_TM_PRED, 2,
-B_VE_PRED, 3,
4, 6,
-B_HE_PRED, 5,
-B_RD_PRED, -B_VR_PRED,
-B_LD_PRED, 7,
-B_VL_PRED, 8,
-B_HD_PRED, -B_HU_PRED
};
-B_DC_PRED, 1, -B_TM_PRED, 2, -B_VE_PRED, 3,
4, 6, -B_HE_PRED, 5, -B_RD_PRED, -B_VR_PRED,
-B_LD_PRED, 7, -B_VL_PRED, 8, -B_HD_PRED, -B_HU_PRED};
#endif
//------------------------------------------------------------------------------
// Default probabilities
// Paragraph 13.5
static const uint8_t
CoeffsProba0[NUM_TYPES][NUM_BANDS][NUM_CTX][NUM_PROBAS] = {
{ { { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
{ 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
{ 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }
},
{ { 253, 136, 254, 255, 228, 219, 128, 128, 128, 128, 128 },
{ 189, 129, 242, 255, 227, 213, 255, 219, 128, 128, 128 },
{ 106, 126, 227, 252, 214, 209, 255, 255, 128, 128, 128 }
},
{ { 1, 98, 248, 255, 236, 226, 255, 255, 128, 128, 128 },
{ 181, 133, 238, 254, 221, 234, 255, 154, 128, 128, 128 },
{ 78, 134, 202, 247, 198, 180, 255, 219, 128, 128, 128 },
},
{ { 1, 185, 249, 255, 243, 255, 128, 128, 128, 128, 128 },
{ 184, 150, 247, 255, 236, 224, 128, 128, 128, 128, 128 },
{ 77, 110, 216, 255, 236, 230, 128, 128, 128, 128, 128 },
},
{ { 1, 101, 251, 255, 241, 255, 128, 128, 128, 128, 128 },
{ 170, 139, 241, 252, 236, 209, 255, 255, 128, 128, 128 },
{ 37, 116, 196, 243, 228, 255, 255, 255, 128, 128, 128 }
},
{ { 1, 204, 254, 255, 245, 255, 128, 128, 128, 128, 128 },
{ 207, 160, 250, 255, 238, 128, 128, 128, 128, 128, 128 },
{ 102, 103, 231, 255, 211, 171, 128, 128, 128, 128, 128 }
},
{ { 1, 152, 252, 255, 240, 255, 128, 128, 128, 128, 128 },
{ 177, 135, 243, 255, 234, 225, 128, 128, 128, 128, 128 },
{ 80, 129, 211, 255, 194, 224, 128, 128, 128, 128, 128 }
},
{ { 1, 1, 255, 128, 128, 128, 128, 128, 128, 128, 128 },
{ 246, 1, 255, 128, 128, 128, 128, 128, 128, 128, 128 },
{ 255, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }
}
},
{ { { 198, 35, 237, 223, 193, 187, 162, 160, 145, 155, 62 },
{ 131, 45, 198, 221, 172, 176, 220, 157, 252, 221, 1 },
{ 68, 47, 146, 208, 149, 167, 221, 162, 255, 223, 128 }
},
{ { 1, 149, 241, 255, 221, 224, 255, 255, 128, 128, 128 },
{ 184, 141, 234, 253, 222, 220, 255, 199, 128, 128, 128 },
{ 81, 99, 181, 242, 176, 190, 249, 202, 255, 255, 128 }
},
{ { 1, 129, 232, 253, 214, 197, 242, 196, 255, 255, 128 },
{ 99, 121, 210, 250, 201, 198, 255, 202, 128, 128, 128 },
{ 23, 91, 163, 242, 170, 187, 247, 210, 255, 255, 128 }
},
{ { 1, 200, 246, 255, 234, 255, 128, 128, 128, 128, 128 },
{ 109, 178, 241, 255, 231, 245, 255, 255, 128, 128, 128 },
{ 44, 130, 201, 253, 205, 192, 255, 255, 128, 128, 128 }
},
{ { 1, 132, 239, 251, 219, 209, 255, 165, 128, 128, 128 },
{ 94, 136, 225, 251, 218, 190, 255, 255, 128, 128, 128 },
{ 22, 100, 174, 245, 186, 161, 255, 199, 128, 128, 128 }
},
{ { 1, 182, 249, 255, 232, 235, 128, 128, 128, 128, 128 },
{ 124, 143, 241, 255, 227, 234, 128, 128, 128, 128, 128 },
{ 35, 77, 181, 251, 193, 211, 255, 205, 128, 128, 128 }
},
{ { 1, 157, 247, 255, 236, 231, 255, 255, 128, 128, 128 },
{ 121, 141, 235, 255, 225, 227, 255, 255, 128, 128, 128 },
{ 45, 99, 188, 251, 195, 217, 255, 224, 128, 128, 128 }
},
{ { 1, 1, 251, 255, 213, 255, 128, 128, 128, 128, 128 },
{ 203, 1, 248, 255, 255, 128, 128, 128, 128, 128, 128 },
{ 137, 1, 177, 255, 224, 255, 128, 128, 128, 128, 128 }
}
},
{ { { 253, 9, 248, 251, 207, 208, 255, 192, 128, 128, 128 },
{ 175, 13, 224, 243, 193, 185, 249, 198, 255, 255, 128 },
{ 73, 17, 171, 221, 161, 179, 236, 167, 255, 234, 128 }
},
{ { 1, 95, 247, 253, 212, 183, 255, 255, 128, 128, 128 },
{ 239, 90, 244, 250, 211, 209, 255, 255, 128, 128, 128 },
{ 155, 77, 195, 248, 188, 195, 255, 255, 128, 128, 128 }
},
{ { 1, 24, 239, 251, 218, 219, 255, 205, 128, 128, 128 },
{ 201, 51, 219, 255, 196, 186, 128, 128, 128, 128, 128 },
{ 69, 46, 190, 239, 201, 218, 255, 228, 128, 128, 128 }
},
{ { 1, 191, 251, 255, 255, 128, 128, 128, 128, 128, 128 },
{ 223, 165, 249, 255, 213, 255, 128, 128, 128, 128, 128 },
{ 141, 124, 248, 255, 255, 128, 128, 128, 128, 128, 128 }
},
{ { 1, 16, 248, 255, 255, 128, 128, 128, 128, 128, 128 },
{ 190, 36, 230, 255, 236, 255, 128, 128, 128, 128, 128 },
{ 149, 1, 255, 128, 128, 128, 128, 128, 128, 128, 128 }
},
{ { 1, 226, 255, 128, 128, 128, 128, 128, 128, 128, 128 },
{ 247, 192, 255, 128, 128, 128, 128, 128, 128, 128, 128 },
{ 240, 128, 255, 128, 128, 128, 128, 128, 128, 128, 128 }
},
{ { 1, 134, 252, 255, 255, 128, 128, 128, 128, 128, 128 },
{ 213, 62, 250, 255, 255, 128, 128, 128, 128, 128, 128 },
{ 55, 93, 255, 128, 128, 128, 128, 128, 128, 128, 128 }
},
{ { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
{ 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
{ 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }
}
},
{ { { 202, 24, 213, 235, 186, 191, 220, 160, 240, 175, 255 },
{ 126, 38, 182, 232, 169, 184, 228, 174, 255, 187, 128 },
{ 61, 46, 138, 219, 151, 178, 240, 170, 255, 216, 128 }
},
{ { 1, 112, 230, 250, 199, 191, 247, 159, 255, 255, 128 },
{ 166, 109, 228, 252, 211, 215, 255, 174, 128, 128, 128 },
{ 39, 77, 162, 232, 172, 180, 245, 178, 255, 255, 128 }
},
{ { 1, 52, 220, 246, 198, 199, 249, 220, 255, 255, 128 },
{ 124, 74, 191, 243, 183, 193, 250, 221, 255, 255, 128 },
{ 24, 71, 130, 219, 154, 170, 243, 182, 255, 255, 128 }
},
{ { 1, 182, 225, 249, 219, 240, 255, 224, 128, 128, 128 },
{ 149, 150, 226, 252, 216, 205, 255, 171, 128, 128, 128 },
{ 28, 108, 170, 242, 183, 194, 254, 223, 255, 255, 128 }
},
{ { 1, 81, 230, 252, 204, 203, 255, 192, 128, 128, 128 },
{ 123, 102, 209, 247, 188, 196, 255, 233, 128, 128, 128 },
{ 20, 95, 153, 243, 164, 173, 255, 203, 128, 128, 128 }
},
{ { 1, 222, 248, 255, 216, 213, 128, 128, 128, 128, 128 },
{ 168, 175, 246, 252, 235, 205, 255, 255, 128, 128, 128 },
{ 47, 116, 215, 255, 211, 212, 255, 255, 128, 128, 128 }
},
{ { 1, 121, 236, 253, 212, 214, 255, 255, 128, 128, 128 },
{ 141, 84, 213, 252, 201, 202, 255, 219, 128, 128, 128 },
{ 42, 80, 160, 240, 162, 185, 255, 205, 128, 128, 128 }
},
{ { 1, 1, 255, 128, 128, 128, 128, 128, 128, 128, 128 },
{ 244, 1, 255, 128, 128, 128, 128, 128, 128, 128, 128 },
{ 238, 1, 255, 128, 128, 128, 128, 128, 128, 128, 128 }
}
}
};
static const uint8_t CoeffsProba0[NUM_TYPES][NUM_BANDS][NUM_CTX][NUM_PROBAS] = {
{{{128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128},
{128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128},
{128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128}},
{{253, 136, 254, 255, 228, 219, 128, 128, 128, 128, 128},
{189, 129, 242, 255, 227, 213, 255, 219, 128, 128, 128},
{106, 126, 227, 252, 214, 209, 255, 255, 128, 128, 128}},
{
{1, 98, 248, 255, 236, 226, 255, 255, 128, 128, 128},
{181, 133, 238, 254, 221, 234, 255, 154, 128, 128, 128},
{78, 134, 202, 247, 198, 180, 255, 219, 128, 128, 128},
},
{
{1, 185, 249, 255, 243, 255, 128, 128, 128, 128, 128},
{184, 150, 247, 255, 236, 224, 128, 128, 128, 128, 128},
{77, 110, 216, 255, 236, 230, 128, 128, 128, 128, 128},
},
{{1, 101, 251, 255, 241, 255, 128, 128, 128, 128, 128},
{170, 139, 241, 252, 236, 209, 255, 255, 128, 128, 128},
{37, 116, 196, 243, 228, 255, 255, 255, 128, 128, 128}},
{{1, 204, 254, 255, 245, 255, 128, 128, 128, 128, 128},
{207, 160, 250, 255, 238, 128, 128, 128, 128, 128, 128},
{102, 103, 231, 255, 211, 171, 128, 128, 128, 128, 128}},
{{1, 152, 252, 255, 240, 255, 128, 128, 128, 128, 128},
{177, 135, 243, 255, 234, 225, 128, 128, 128, 128, 128},
{80, 129, 211, 255, 194, 224, 128, 128, 128, 128, 128}},
{{1, 1, 255, 128, 128, 128, 128, 128, 128, 128, 128},
{246, 1, 255, 128, 128, 128, 128, 128, 128, 128, 128},
{255, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128}}},
{{{198, 35, 237, 223, 193, 187, 162, 160, 145, 155, 62},
{131, 45, 198, 221, 172, 176, 220, 157, 252, 221, 1},
{68, 47, 146, 208, 149, 167, 221, 162, 255, 223, 128}},
{{1, 149, 241, 255, 221, 224, 255, 255, 128, 128, 128},
{184, 141, 234, 253, 222, 220, 255, 199, 128, 128, 128},
{81, 99, 181, 242, 176, 190, 249, 202, 255, 255, 128}},
{{1, 129, 232, 253, 214, 197, 242, 196, 255, 255, 128},
{99, 121, 210, 250, 201, 198, 255, 202, 128, 128, 128},
{23, 91, 163, 242, 170, 187, 247, 210, 255, 255, 128}},
{{1, 200, 246, 255, 234, 255, 128, 128, 128, 128, 128},
{109, 178, 241, 255, 231, 245, 255, 255, 128, 128, 128},
{44, 130, 201, 253, 205, 192, 255, 255, 128, 128, 128}},
{{1, 132, 239, 251, 219, 209, 255, 165, 128, 128, 128},
{94, 136, 225, 251, 218, 190, 255, 255, 128, 128, 128},
{22, 100, 174, 245, 186, 161, 255, 199, 128, 128, 128}},
{{1, 182, 249, 255, 232, 235, 128, 128, 128, 128, 128},
{124, 143, 241, 255, 227, 234, 128, 128, 128, 128, 128},
{35, 77, 181, 251, 193, 211, 255, 205, 128, 128, 128}},
{{1, 157, 247, 255, 236, 231, 255, 255, 128, 128, 128},
{121, 141, 235, 255, 225, 227, 255, 255, 128, 128, 128},
{45, 99, 188, 251, 195, 217, 255, 224, 128, 128, 128}},
{{1, 1, 251, 255, 213, 255, 128, 128, 128, 128, 128},
{203, 1, 248, 255, 255, 128, 128, 128, 128, 128, 128},
{137, 1, 177, 255, 224, 255, 128, 128, 128, 128, 128}}},
{{{253, 9, 248, 251, 207, 208, 255, 192, 128, 128, 128},
{175, 13, 224, 243, 193, 185, 249, 198, 255, 255, 128},
{73, 17, 171, 221, 161, 179, 236, 167, 255, 234, 128}},
{{1, 95, 247, 253, 212, 183, 255, 255, 128, 128, 128},
{239, 90, 244, 250, 211, 209, 255, 255, 128, 128, 128},
{155, 77, 195, 248, 188, 195, 255, 255, 128, 128, 128}},
{{1, 24, 239, 251, 218, 219, 255, 205, 128, 128, 128},
{201, 51, 219, 255, 196, 186, 128, 128, 128, 128, 128},
{69, 46, 190, 239, 201, 218, 255, 228, 128, 128, 128}},
{{1, 191, 251, 255, 255, 128, 128, 128, 128, 128, 128},
{223, 165, 249, 255, 213, 255, 128, 128, 128, 128, 128},
{141, 124, 248, 255, 255, 128, 128, 128, 128, 128, 128}},
{{1, 16, 248, 255, 255, 128, 128, 128, 128, 128, 128},
{190, 36, 230, 255, 236, 255, 128, 128, 128, 128, 128},
{149, 1, 255, 128, 128, 128, 128, 128, 128, 128, 128}},
{{1, 226, 255, 128, 128, 128, 128, 128, 128, 128, 128},
{247, 192, 255, 128, 128, 128, 128, 128, 128, 128, 128},
{240, 128, 255, 128, 128, 128, 128, 128, 128, 128, 128}},
{{1, 134, 252, 255, 255, 128, 128, 128, 128, 128, 128},
{213, 62, 250, 255, 255, 128, 128, 128, 128, 128, 128},
{55, 93, 255, 128, 128, 128, 128, 128, 128, 128, 128}},
{{128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128},
{128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128},
{128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128}}},
{{{202, 24, 213, 235, 186, 191, 220, 160, 240, 175, 255},
{126, 38, 182, 232, 169, 184, 228, 174, 255, 187, 128},
{61, 46, 138, 219, 151, 178, 240, 170, 255, 216, 128}},
{{1, 112, 230, 250, 199, 191, 247, 159, 255, 255, 128},
{166, 109, 228, 252, 211, 215, 255, 174, 128, 128, 128},
{39, 77, 162, 232, 172, 180, 245, 178, 255, 255, 128}},
{{1, 52, 220, 246, 198, 199, 249, 220, 255, 255, 128},
{124, 74, 191, 243, 183, 193, 250, 221, 255, 255, 128},
{24, 71, 130, 219, 154, 170, 243, 182, 255, 255, 128}},
{{1, 182, 225, 249, 219, 240, 255, 224, 128, 128, 128},
{149, 150, 226, 252, 216, 205, 255, 171, 128, 128, 128},
{28, 108, 170, 242, 183, 194, 254, 223, 255, 255, 128}},
{{1, 81, 230, 252, 204, 203, 255, 192, 128, 128, 128},
{123, 102, 209, 247, 188, 196, 255, 233, 128, 128, 128},
{20, 95, 153, 243, 164, 173, 255, 203, 128, 128, 128}},
{{1, 222, 248, 255, 216, 213, 128, 128, 128, 128, 128},
{168, 175, 246, 252, 235, 205, 255, 255, 128, 128, 128},
{47, 116, 215, 255, 211, 212, 255, 255, 128, 128, 128}},
{{1, 121, 236, 253, 212, 214, 255, 255, 128, 128, 128},
{141, 84, 213, 252, 201, 202, 255, 219, 128, 128, 128},
{42, 80, 160, 240, 162, 185, 255, 205, 128, 128, 128}},
{{1, 1, 255, 128, 128, 128, 128, 128, 128, 128, 128},
{244, 1, 255, 128, 128, 128, 128, 128, 128, 128, 128},
{238, 1, 255, 128, 128, 128, 128, 128, 128, 128, 128}}}};
// Paragraph 11.5
static const uint8_t kBModesProba[NUM_BMODES][NUM_BMODES][NUM_BMODES - 1] = {
{ { 231, 120, 48, 89, 115, 113, 120, 152, 112 },
{ 152, 179, 64, 126, 170, 118, 46, 70, 95 },
{ 175, 69, 143, 80, 85, 82, 72, 155, 103 },
{ 56, 58, 10, 171, 218, 189, 17, 13, 152 },
{ 114, 26, 17, 163, 44, 195, 21, 10, 173 },
{ 121, 24, 80, 195, 26, 62, 44, 64, 85 },
{ 144, 71, 10, 38, 171, 213, 144, 34, 26 },
{ 170, 46, 55, 19, 136, 160, 33, 206, 71 },
{ 63, 20, 8, 114, 114, 208, 12, 9, 226 },
{ 81, 40, 11, 96, 182, 84, 29, 16, 36 } },
{ { 134, 183, 89, 137, 98, 101, 106, 165, 148 },
{ 72, 187, 100, 130, 157, 111, 32, 75, 80 },
{ 66, 102, 167, 99, 74, 62, 40, 234, 128 },
{ 41, 53, 9, 178, 241, 141, 26, 8, 107 },
{ 74, 43, 26, 146, 73, 166, 49, 23, 157 },
{ 65, 38, 105, 160, 51, 52, 31, 115, 128 },
{ 104, 79, 12, 27, 217, 255, 87, 17, 7 },
{ 87, 68, 71, 44, 114, 51, 15, 186, 23 },
{ 47, 41, 14, 110, 182, 183, 21, 17, 194 },
{ 66, 45, 25, 102, 197, 189, 23, 18, 22 } },
{ { 88, 88, 147, 150, 42, 46, 45, 196, 205 },
{ 43, 97, 183, 117, 85, 38, 35, 179, 61 },
{ 39, 53, 200, 87, 26, 21, 43, 232, 171 },
{ 56, 34, 51, 104, 114, 102, 29, 93, 77 },
{ 39, 28, 85, 171, 58, 165, 90, 98, 64 },
{ 34, 22, 116, 206, 23, 34, 43, 166, 73 },
{ 107, 54, 32, 26, 51, 1, 81, 43, 31 },
{ 68, 25, 106, 22, 64, 171, 36, 225, 114 },
{ 34, 19, 21, 102, 132, 188, 16, 76, 124 },
{ 62, 18, 78, 95, 85, 57, 50, 48, 51 } },
{ { 193, 101, 35, 159, 215, 111, 89, 46, 111 },
{ 60, 148, 31, 172, 219, 228, 21, 18, 111 },
{ 112, 113, 77, 85, 179, 255, 38, 120, 114 },
{ 40, 42, 1, 196, 245, 209, 10, 25, 109 },
{ 88, 43, 29, 140, 166, 213, 37, 43, 154 },
{ 61, 63, 30, 155, 67, 45, 68, 1, 209 },
{ 100, 80, 8, 43, 154, 1, 51, 26, 71 },
{ 142, 78, 78, 16, 255, 128, 34, 197, 171 },
{ 41, 40, 5, 102, 211, 183, 4, 1, 221 },
{ 51, 50, 17, 168, 209, 192, 23, 25, 82 } },
{ { 138, 31, 36, 171, 27, 166, 38, 44, 229 },
{ 67, 87, 58, 169, 82, 115, 26, 59, 179 },
{ 63, 59, 90, 180, 59, 166, 93, 73, 154 },
{ 40, 40, 21, 116, 143, 209, 34, 39, 175 },
{ 47, 15, 16, 183, 34, 223, 49, 45, 183 },
{ 46, 17, 33, 183, 6, 98, 15, 32, 183 },
{ 57, 46, 22, 24, 128, 1, 54, 17, 37 },
{ 65, 32, 73, 115, 28, 128, 23, 128, 205 },
{ 40, 3, 9, 115, 51, 192, 18, 6, 223 },
{ 87, 37, 9, 115, 59, 77, 64, 21, 47 } },
{ { 104, 55, 44, 218, 9, 54, 53, 130, 226 },
{ 64, 90, 70, 205, 40, 41, 23, 26, 57 },
{ 54, 57, 112, 184, 5, 41, 38, 166, 213 },
{ 30, 34, 26, 133, 152, 116, 10, 32, 134 },
{ 39, 19, 53, 221, 26, 114, 32, 73, 255 },
{ 31, 9, 65, 234, 2, 15, 1, 118, 73 },
{ 75, 32, 12, 51, 192, 255, 160, 43, 51 },
{ 88, 31, 35, 67, 102, 85, 55, 186, 85 },
{ 56, 21, 23, 111, 59, 205, 45, 37, 192 },
{ 55, 38, 70, 124, 73, 102, 1, 34, 98 } },
{ { 125, 98, 42, 88, 104, 85, 117, 175, 82 },
{ 95, 84, 53, 89, 128, 100, 113, 101, 45 },
{ 75, 79, 123, 47, 51, 128, 81, 171, 1 },
{ 57, 17, 5, 71, 102, 57, 53, 41, 49 },
{ 38, 33, 13, 121, 57, 73, 26, 1, 85 },
{ 41, 10, 67, 138, 77, 110, 90, 47, 114 },
{ 115, 21, 2, 10, 102, 255, 166, 23, 6 },
{ 101, 29, 16, 10, 85, 128, 101, 196, 26 },
{ 57, 18, 10, 102, 102, 213, 34, 20, 43 },
{ 117, 20, 15, 36, 163, 128, 68, 1, 26 } },
{ { 102, 61, 71, 37, 34, 53, 31, 243, 192 },
{ 69, 60, 71, 38, 73, 119, 28, 222, 37 },
{ 68, 45, 128, 34, 1, 47, 11, 245, 171 },
{ 62, 17, 19, 70, 146, 85, 55, 62, 70 },
{ 37, 43, 37, 154, 100, 163, 85, 160, 1 },
{ 63, 9, 92, 136, 28, 64, 32, 201, 85 },
{ 75, 15, 9, 9, 64, 255, 184, 119, 16 },
{ 86, 6, 28, 5, 64, 255, 25, 248, 1 },
{ 56, 8, 17, 132, 137, 255, 55, 116, 128 },
{ 58, 15, 20, 82, 135, 57, 26, 121, 40 } },
{ { 164, 50, 31, 137, 154, 133, 25, 35, 218 },
{ 51, 103, 44, 131, 131, 123, 31, 6, 158 },
{ 86, 40, 64, 135, 148, 224, 45, 183, 128 },
{ 22, 26, 17, 131, 240, 154, 14, 1, 209 },
{ 45, 16, 21, 91, 64, 222, 7, 1, 197 },
{ 56, 21, 39, 155, 60, 138, 23, 102, 213 },
{ 83, 12, 13, 54, 192, 255, 68, 47, 28 },
{ 85, 26, 85, 85, 128, 128, 32, 146, 171 },
{ 18, 11, 7, 63, 144, 171, 4, 4, 246 },
{ 35, 27, 10, 146, 174, 171, 12, 26, 128 } },
{ { 190, 80, 35, 99, 180, 80, 126, 54, 45 },
{ 85, 126, 47, 87, 176, 51, 41, 20, 32 },
{ 101, 75, 128, 139, 118, 146, 116, 128, 85 },
{ 56, 41, 15, 176, 236, 85, 37, 9, 62 },
{ 71, 30, 17, 119, 118, 255, 17, 18, 138 },
{ 101, 38, 60, 138, 55, 70, 43, 26, 142 },
{ 146, 36, 19, 30, 171, 255, 97, 27, 20 },
{ 138, 45, 61, 62, 219, 1, 81, 188, 64 },
{ 32, 41, 20, 117, 151, 142, 20, 21, 163 },
{ 112, 19, 12, 61, 195, 128, 48, 4, 24 } }
};
{{231, 120, 48, 89, 115, 113, 120, 152, 112},
{152, 179, 64, 126, 170, 118, 46, 70, 95},
{175, 69, 143, 80, 85, 82, 72, 155, 103},
{56, 58, 10, 171, 218, 189, 17, 13, 152},
{114, 26, 17, 163, 44, 195, 21, 10, 173},
{121, 24, 80, 195, 26, 62, 44, 64, 85},
{144, 71, 10, 38, 171, 213, 144, 34, 26},
{170, 46, 55, 19, 136, 160, 33, 206, 71},
{63, 20, 8, 114, 114, 208, 12, 9, 226},
{81, 40, 11, 96, 182, 84, 29, 16, 36}},
{{134, 183, 89, 137, 98, 101, 106, 165, 148},
{72, 187, 100, 130, 157, 111, 32, 75, 80},
{66, 102, 167, 99, 74, 62, 40, 234, 128},
{41, 53, 9, 178, 241, 141, 26, 8, 107},
{74, 43, 26, 146, 73, 166, 49, 23, 157},
{65, 38, 105, 160, 51, 52, 31, 115, 128},
{104, 79, 12, 27, 217, 255, 87, 17, 7},
{87, 68, 71, 44, 114, 51, 15, 186, 23},
{47, 41, 14, 110, 182, 183, 21, 17, 194},
{66, 45, 25, 102, 197, 189, 23, 18, 22}},
{{88, 88, 147, 150, 42, 46, 45, 196, 205},
{43, 97, 183, 117, 85, 38, 35, 179, 61},
{39, 53, 200, 87, 26, 21, 43, 232, 171},
{56, 34, 51, 104, 114, 102, 29, 93, 77},
{39, 28, 85, 171, 58, 165, 90, 98, 64},
{34, 22, 116, 206, 23, 34, 43, 166, 73},
{107, 54, 32, 26, 51, 1, 81, 43, 31},
{68, 25, 106, 22, 64, 171, 36, 225, 114},
{34, 19, 21, 102, 132, 188, 16, 76, 124},
{62, 18, 78, 95, 85, 57, 50, 48, 51}},
{{193, 101, 35, 159, 215, 111, 89, 46, 111},
{60, 148, 31, 172, 219, 228, 21, 18, 111},
{112, 113, 77, 85, 179, 255, 38, 120, 114},
{40, 42, 1, 196, 245, 209, 10, 25, 109},
{88, 43, 29, 140, 166, 213, 37, 43, 154},
{61, 63, 30, 155, 67, 45, 68, 1, 209},
{100, 80, 8, 43, 154, 1, 51, 26, 71},
{142, 78, 78, 16, 255, 128, 34, 197, 171},
{41, 40, 5, 102, 211, 183, 4, 1, 221},
{51, 50, 17, 168, 209, 192, 23, 25, 82}},
{{138, 31, 36, 171, 27, 166, 38, 44, 229},
{67, 87, 58, 169, 82, 115, 26, 59, 179},
{63, 59, 90, 180, 59, 166, 93, 73, 154},
{40, 40, 21, 116, 143, 209, 34, 39, 175},
{47, 15, 16, 183, 34, 223, 49, 45, 183},
{46, 17, 33, 183, 6, 98, 15, 32, 183},
{57, 46, 22, 24, 128, 1, 54, 17, 37},
{65, 32, 73, 115, 28, 128, 23, 128, 205},
{40, 3, 9, 115, 51, 192, 18, 6, 223},
{87, 37, 9, 115, 59, 77, 64, 21, 47}},
{{104, 55, 44, 218, 9, 54, 53, 130, 226},
{64, 90, 70, 205, 40, 41, 23, 26, 57},
{54, 57, 112, 184, 5, 41, 38, 166, 213},
{30, 34, 26, 133, 152, 116, 10, 32, 134},
{39, 19, 53, 221, 26, 114, 32, 73, 255},
{31, 9, 65, 234, 2, 15, 1, 118, 73},
{75, 32, 12, 51, 192, 255, 160, 43, 51},
{88, 31, 35, 67, 102, 85, 55, 186, 85},
{56, 21, 23, 111, 59, 205, 45, 37, 192},
{55, 38, 70, 124, 73, 102, 1, 34, 98}},
{{125, 98, 42, 88, 104, 85, 117, 175, 82},
{95, 84, 53, 89, 128, 100, 113, 101, 45},
{75, 79, 123, 47, 51, 128, 81, 171, 1},
{57, 17, 5, 71, 102, 57, 53, 41, 49},
{38, 33, 13, 121, 57, 73, 26, 1, 85},
{41, 10, 67, 138, 77, 110, 90, 47, 114},
{115, 21, 2, 10, 102, 255, 166, 23, 6},
{101, 29, 16, 10, 85, 128, 101, 196, 26},
{57, 18, 10, 102, 102, 213, 34, 20, 43},
{117, 20, 15, 36, 163, 128, 68, 1, 26}},
{{102, 61, 71, 37, 34, 53, 31, 243, 192},
{69, 60, 71, 38, 73, 119, 28, 222, 37},
{68, 45, 128, 34, 1, 47, 11, 245, 171},
{62, 17, 19, 70, 146, 85, 55, 62, 70},
{37, 43, 37, 154, 100, 163, 85, 160, 1},
{63, 9, 92, 136, 28, 64, 32, 201, 85},
{75, 15, 9, 9, 64, 255, 184, 119, 16},
{86, 6, 28, 5, 64, 255, 25, 248, 1},
{56, 8, 17, 132, 137, 255, 55, 116, 128},
{58, 15, 20, 82, 135, 57, 26, 121, 40}},
{{164, 50, 31, 137, 154, 133, 25, 35, 218},
{51, 103, 44, 131, 131, 123, 31, 6, 158},
{86, 40, 64, 135, 148, 224, 45, 183, 128},
{22, 26, 17, 131, 240, 154, 14, 1, 209},
{45, 16, 21, 91, 64, 222, 7, 1, 197},
{56, 21, 39, 155, 60, 138, 23, 102, 213},
{83, 12, 13, 54, 192, 255, 68, 47, 28},
{85, 26, 85, 85, 128, 128, 32, 146, 171},
{18, 11, 7, 63, 144, 171, 4, 4, 246},
{35, 27, 10, 146, 174, 171, 12, 26, 128}},
{{190, 80, 35, 99, 180, 80, 126, 54, 45},
{85, 126, 47, 87, 176, 51, 41, 20, 32},
{101, 75, 128, 139, 118, 146, 116, 128, 85},
{56, 41, 15, 176, 236, 85, 37, 9, 62},
{71, 30, 17, 119, 118, 255, 17, 18, 138},
{101, 38, 60, 138, 55, 70, 43, 26, 142},
{146, 36, 19, 30, 171, 255, 97, 27, 20},
{138, 45, 61, 62, 219, 1, 81, 188, 64},
{32, 41, 20, 117, 151, 142, 20, 21, 163},
{112, 19, 12, 61, 195, 128, 48, 4, 24}}};
void VP8ResetProba(VP8Proba* const proba) {
memset(proba->segments, 255u, sizeof(proba->segments));
// proba->bands[][] is initialized later
}
static void ParseIntraMode(VP8BitReader* const br,
VP8Decoder* const dec, int mb_x) {
static void ParseIntraMode(VP8BitReader* const br, VP8Decoder* const dec,
int mb_x) {
uint8_t* const top = dec->intra_t + 4 * mb_x;
uint8_t* const left = dec->intra_l;
VP8MBData* const block = dec->mb_data + mb_x;
@@ -304,9 +261,10 @@ static void ParseIntraMode(VP8BitReader* const br,
// to decode more than 1 keyframe.
if (dec->segment_hdr.update_map) {
// Hardcoded tree parsing
block->segment = !VP8GetBit(br, dec->proba.segments[0], "segments")
? VP8GetBit(br, dec->proba.segments[1], "segments")
: VP8GetBit(br, dec->proba.segments[2], "segments") + 2;
block->segment =
!VP8GetBit(br, dec->proba.segments[0], "segments")
? VP8GetBit(br, dec->proba.segments[1], "segments")
: VP8GetBit(br, dec->proba.segments[2], "segments") + 2;
} else {
block->segment = 0; // default for intra
}
@@ -316,9 +274,9 @@ static void ParseIntraMode(VP8BitReader* const br,
if (!block->is_i4x4) {
// Hardcoded 16x16 intra-mode decision tree.
const int ymode =
VP8GetBit(br, 156, "pred-modes") ?
(VP8GetBit(br, 128, "pred-modes") ? TM_PRED : H_PRED) :
(VP8GetBit(br, 163, "pred-modes") ? V_PRED : DC_PRED);
VP8GetBit(br, 156, "pred-modes")
? (VP8GetBit(br, 128, "pred-modes") ? TM_PRED : H_PRED)
: (VP8GetBit(br, 163, "pred-modes") ? V_PRED : DC_PRED);
block->imodes[0] = ymode;
memset(top, ymode, 4 * sizeof(*top));
memset(left, ymode, 4 * sizeof(*left));
@@ -339,18 +297,22 @@ static void ParseIntraMode(VP8BitReader* const br,
ymode = -i;
#else
// Hardcoded tree parsing
ymode = !VP8GetBit(br, prob[0], "pred-modes") ? B_DC_PRED :
!VP8GetBit(br, prob[1], "pred-modes") ? B_TM_PRED :
!VP8GetBit(br, prob[2], "pred-modes") ? B_VE_PRED :
!VP8GetBit(br, prob[3], "pred-modes") ?
(!VP8GetBit(br, prob[4], "pred-modes") ? B_HE_PRED :
(!VP8GetBit(br, prob[5], "pred-modes") ? B_RD_PRED
: B_VR_PRED)) :
(!VP8GetBit(br, prob[6], "pred-modes") ? B_LD_PRED :
(!VP8GetBit(br, prob[7], "pred-modes") ? B_VL_PRED :
(!VP8GetBit(br, prob[8], "pred-modes") ? B_HD_PRED
: B_HU_PRED))
);
ymode =
!VP8GetBit(br, prob[0], "pred-modes") ? B_DC_PRED
: !VP8GetBit(br, prob[1], "pred-modes") ? B_TM_PRED
: !VP8GetBit(br, prob[2], "pred-modes") ? B_VE_PRED
: !VP8GetBit(br, prob[3], "pred-modes")
? (!VP8GetBit(br, prob[4], "pred-modes")
? B_HE_PRED
: (!VP8GetBit(br, prob[5], "pred-modes") ? B_RD_PRED
: B_VR_PRED))
: (!VP8GetBit(br, prob[6], "pred-modes")
? B_LD_PRED
: (!VP8GetBit(br, prob[7], "pred-modes")
? B_VL_PRED
: (!VP8GetBit(br, prob[8], "pred-modes")
? B_HD_PRED
: B_HU_PRED)));
#endif // USE_GENERIC_TREE
top[x] = ymode;
}
@@ -360,9 +322,10 @@ static void ParseIntraMode(VP8BitReader* const br,
}
}
// Hardcoded UVMode decision tree
block->uvmode = !VP8GetBit(br, 142, "pred-modes-uv") ? DC_PRED
: !VP8GetBit(br, 114, "pred-modes-uv") ? V_PRED
: VP8GetBit(br, 183, "pred-modes-uv") ? TM_PRED : H_PRED;
block->uvmode = !VP8GetBit(br, 142, "pred-modes-uv") ? DC_PRED
: !VP8GetBit(br, 114, "pred-modes-uv") ? V_PRED
: VP8GetBit(br, 183, "pred-modes-uv") ? TM_PRED
: H_PRED;
}
int VP8ParseIntraModeRow(VP8BitReader* const br, VP8Decoder* const dec) {
@@ -378,145 +341,108 @@ int VP8ParseIntraModeRow(VP8BitReader* const br, VP8Decoder* const dec) {
static const uint8_t
CoeffsUpdateProba[NUM_TYPES][NUM_BANDS][NUM_CTX][NUM_PROBAS] = {
{ { { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
{ 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
{ 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }
},
{ { 176, 246, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
{ 223, 241, 252, 255, 255, 255, 255, 255, 255, 255, 255 },
{ 249, 253, 253, 255, 255, 255, 255, 255, 255, 255, 255 }
},
{ { 255, 244, 252, 255, 255, 255, 255, 255, 255, 255, 255 },
{ 234, 254, 254, 255, 255, 255, 255, 255, 255, 255, 255 },
{ 253, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }
},
{ { 255, 246, 254, 255, 255, 255, 255, 255, 255, 255, 255 },
{ 239, 253, 254, 255, 255, 255, 255, 255, 255, 255, 255 },
{ 254, 255, 254, 255, 255, 255, 255, 255, 255, 255, 255 }
},
{ { 255, 248, 254, 255, 255, 255, 255, 255, 255, 255, 255 },
{ 251, 255, 254, 255, 255, 255, 255, 255, 255, 255, 255 },
{ 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }
},
{ { 255, 253, 254, 255, 255, 255, 255, 255, 255, 255, 255 },
{ 251, 254, 254, 255, 255, 255, 255, 255, 255, 255, 255 },
{ 254, 255, 254, 255, 255, 255, 255, 255, 255, 255, 255 }
},
{ { 255, 254, 253, 255, 254, 255, 255, 255, 255, 255, 255 },
{ 250, 255, 254, 255, 254, 255, 255, 255, 255, 255, 255 },
{ 254, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }
},
{ { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
{ 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
{ 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }
}
},
{ { { 217, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
{ 225, 252, 241, 253, 255, 255, 254, 255, 255, 255, 255 },
{ 234, 250, 241, 250, 253, 255, 253, 254, 255, 255, 255 }
},
{ { 255, 254, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
{ 223, 254, 254, 255, 255, 255, 255, 255, 255, 255, 255 },
{ 238, 253, 254, 254, 255, 255, 255, 255, 255, 255, 255 }
},
{ { 255, 248, 254, 255, 255, 255, 255, 255, 255, 255, 255 },
{ 249, 254, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
{ 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }
},
{ { 255, 253, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
{ 247, 254, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
{ 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }
},
{ { 255, 253, 254, 255, 255, 255, 255, 255, 255, 255, 255 },
{ 252, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
{ 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }
},
{ { 255, 254, 254, 255, 255, 255, 255, 255, 255, 255, 255 },
{ 253, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
{ 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }
},
{ { 255, 254, 253, 255, 255, 255, 255, 255, 255, 255, 255 },
{ 250, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
{ 254, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }
},
{ { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
{ 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
{ 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }
}
},
{ { { 186, 251, 250, 255, 255, 255, 255, 255, 255, 255, 255 },
{ 234, 251, 244, 254, 255, 255, 255, 255, 255, 255, 255 },
{ 251, 251, 243, 253, 254, 255, 254, 255, 255, 255, 255 }
},
{ { 255, 253, 254, 255, 255, 255, 255, 255, 255, 255, 255 },
{ 236, 253, 254, 255, 255, 255, 255, 255, 255, 255, 255 },
{ 251, 253, 253, 254, 254, 255, 255, 255, 255, 255, 255 }
},
{ { 255, 254, 254, 255, 255, 255, 255, 255, 255, 255, 255 },
{ 254, 254, 254, 255, 255, 255, 255, 255, 255, 255, 255 },
{ 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }
},
{ { 255, 254, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
{ 254, 254, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
{ 254, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }
},
{ { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
{ 254, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
{ 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }
},
{ { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
{ 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
{ 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }
},
{ { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
{ 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
{ 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }
},
{ { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
{ 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
{ 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }
}
},
{ { { 248, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
{ 250, 254, 252, 254, 255, 255, 255, 255, 255, 255, 255 },
{ 248, 254, 249, 253, 255, 255, 255, 255, 255, 255, 255 }
},
{ { 255, 253, 253, 255, 255, 255, 255, 255, 255, 255, 255 },
{ 246, 253, 253, 255, 255, 255, 255, 255, 255, 255, 255 },
{ 252, 254, 251, 254, 254, 255, 255, 255, 255, 255, 255 }
},
{ { 255, 254, 252, 255, 255, 255, 255, 255, 255, 255, 255 },
{ 248, 254, 253, 255, 255, 255, 255, 255, 255, 255, 255 },
{ 253, 255, 254, 254, 255, 255, 255, 255, 255, 255, 255 }
},
{ { 255, 251, 254, 255, 255, 255, 255, 255, 255, 255, 255 },
{ 245, 251, 254, 255, 255, 255, 255, 255, 255, 255, 255 },
{ 253, 253, 254, 255, 255, 255, 255, 255, 255, 255, 255 }
},
{ { 255, 251, 253, 255, 255, 255, 255, 255, 255, 255, 255 },
{ 252, 253, 254, 255, 255, 255, 255, 255, 255, 255, 255 },
{ 255, 254, 255, 255, 255, 255, 255, 255, 255, 255, 255 }
},
{ { 255, 252, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
{ 249, 255, 254, 255, 255, 255, 255, 255, 255, 255, 255 },
{ 255, 255, 254, 255, 255, 255, 255, 255, 255, 255, 255 }
},
{ { 255, 255, 253, 255, 255, 255, 255, 255, 255, 255, 255 },
{ 250, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
{ 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }
},
{ { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
{ 254, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
{ 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }
}
}
};
{{{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255},
{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255},
{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255}},
{{176, 246, 255, 255, 255, 255, 255, 255, 255, 255, 255},
{223, 241, 252, 255, 255, 255, 255, 255, 255, 255, 255},
{249, 253, 253, 255, 255, 255, 255, 255, 255, 255, 255}},
{{255, 244, 252, 255, 255, 255, 255, 255, 255, 255, 255},
{234, 254, 254, 255, 255, 255, 255, 255, 255, 255, 255},
{253, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255}},
{{255, 246, 254, 255, 255, 255, 255, 255, 255, 255, 255},
{239, 253, 254, 255, 255, 255, 255, 255, 255, 255, 255},
{254, 255, 254, 255, 255, 255, 255, 255, 255, 255, 255}},
{{255, 248, 254, 255, 255, 255, 255, 255, 255, 255, 255},
{251, 255, 254, 255, 255, 255, 255, 255, 255, 255, 255},
{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255}},
{{255, 253, 254, 255, 255, 255, 255, 255, 255, 255, 255},
{251, 254, 254, 255, 255, 255, 255, 255, 255, 255, 255},
{254, 255, 254, 255, 255, 255, 255, 255, 255, 255, 255}},
{{255, 254, 253, 255, 254, 255, 255, 255, 255, 255, 255},
{250, 255, 254, 255, 254, 255, 255, 255, 255, 255, 255},
{254, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255}},
{{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255},
{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255},
{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255}}},
{{{217, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255},
{225, 252, 241, 253, 255, 255, 254, 255, 255, 255, 255},
{234, 250, 241, 250, 253, 255, 253, 254, 255, 255, 255}},
{{255, 254, 255, 255, 255, 255, 255, 255, 255, 255, 255},
{223, 254, 254, 255, 255, 255, 255, 255, 255, 255, 255},
{238, 253, 254, 254, 255, 255, 255, 255, 255, 255, 255}},
{{255, 248, 254, 255, 255, 255, 255, 255, 255, 255, 255},
{249, 254, 255, 255, 255, 255, 255, 255, 255, 255, 255},
{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255}},
{{255, 253, 255, 255, 255, 255, 255, 255, 255, 255, 255},
{247, 254, 255, 255, 255, 255, 255, 255, 255, 255, 255},
{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255}},
{{255, 253, 254, 255, 255, 255, 255, 255, 255, 255, 255},
{252, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255},
{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255}},
{{255, 254, 254, 255, 255, 255, 255, 255, 255, 255, 255},
{253, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255},
{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255}},
{{255, 254, 253, 255, 255, 255, 255, 255, 255, 255, 255},
{250, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255},
{254, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255}},
{{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255},
{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255},
{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255}}},
{{{186, 251, 250, 255, 255, 255, 255, 255, 255, 255, 255},
{234, 251, 244, 254, 255, 255, 255, 255, 255, 255, 255},
{251, 251, 243, 253, 254, 255, 254, 255, 255, 255, 255}},
{{255, 253, 254, 255, 255, 255, 255, 255, 255, 255, 255},
{236, 253, 254, 255, 255, 255, 255, 255, 255, 255, 255},
{251, 253, 253, 254, 254, 255, 255, 255, 255, 255, 255}},
{{255, 254, 254, 255, 255, 255, 255, 255, 255, 255, 255},
{254, 254, 254, 255, 255, 255, 255, 255, 255, 255, 255},
{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255}},
{{255, 254, 255, 255, 255, 255, 255, 255, 255, 255, 255},
{254, 254, 255, 255, 255, 255, 255, 255, 255, 255, 255},
{254, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255}},
{{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255},
{254, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255},
{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255}},
{{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255},
{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255},
{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255}},
{{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255},
{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255},
{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255}},
{{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255},
{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255},
{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255}}},
{{{248, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255},
{250, 254, 252, 254, 255, 255, 255, 255, 255, 255, 255},
{248, 254, 249, 253, 255, 255, 255, 255, 255, 255, 255}},
{{255, 253, 253, 255, 255, 255, 255, 255, 255, 255, 255},
{246, 253, 253, 255, 255, 255, 255, 255, 255, 255, 255},
{252, 254, 251, 254, 254, 255, 255, 255, 255, 255, 255}},
{{255, 254, 252, 255, 255, 255, 255, 255, 255, 255, 255},
{248, 254, 253, 255, 255, 255, 255, 255, 255, 255, 255},
{253, 255, 254, 254, 255, 255, 255, 255, 255, 255, 255}},
{{255, 251, 254, 255, 255, 255, 255, 255, 255, 255, 255},
{245, 251, 254, 255, 255, 255, 255, 255, 255, 255, 255},
{253, 253, 254, 255, 255, 255, 255, 255, 255, 255, 255}},
{{255, 251, 253, 255, 255, 255, 255, 255, 255, 255, 255},
{252, 253, 254, 255, 255, 255, 255, 255, 255, 255, 255},
{255, 254, 255, 255, 255, 255, 255, 255, 255, 255, 255}},
{{255, 252, 255, 255, 255, 255, 255, 255, 255, 255, 255},
{249, 255, 254, 255, 255, 255, 255, 255, 255, 255, 255},
{255, 255, 254, 255, 255, 255, 255, 255, 255, 255, 255}},
{{255, 255, 253, 255, 255, 255, 255, 255, 255, 255, 255},
{250, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255},
{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255}},
{{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255},
{254, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255},
{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255}}}};
// Paragraph 9.9
static const uint8_t kBands[16 + 1] = {
0, 1, 2, 3, 6, 4, 5, 6, 6, 6, 6, 6, 6, 6, 6, 7,
0 // extra entry as sentinel
0, 1, 2, 3, 6, 4, 5, 6, 6, 6, 6, 6, 6, 6, 6, 7,
0 // extra entry as sentinel
};
void VP8ParseProba(VP8BitReader* const br, VP8Decoder* const dec) {
@@ -527,9 +453,9 @@ void VP8ParseProba(VP8BitReader* const br, VP8Decoder* const dec) {
for (c = 0; c < NUM_CTX; ++c) {
for (p = 0; p < NUM_PROBAS; ++p) {
const int v =
VP8GetBit(br, CoeffsUpdateProba[t][b][c][p], "global-header") ?
VP8GetValue(br, 8, "global-header") :
CoeffsProba0[t][b][c][p];
VP8GetBit(br, CoeffsUpdateProba[t][b][c][p], "global-header")
? VP8GetValue(br, 8, "global-header")
: CoeffsProba0[t][b][c][p];
proba->bands[t][b].probas[c][p] = v;
}
}

View File

@@ -11,13 +11,14 @@
//
// Author: Skal (pascal.massimino@gmail.com)
#include "src/dec/vp8_dec.h"
#include <assert.h>
#include <stdlib.h>
#include <string.h>
#include "src/dec/alphai_dec.h"
#include "src/dec/common_dec.h"
#include "src/dec/vp8_dec.h"
#include "src/dec/vp8i_dec.h"
#include "src/dec/vp8li_dec.h"
#include "src/dec/webpi_dec.h"
@@ -41,8 +42,8 @@ int WebPGetDecoderVersion(void) {
// Signature and pointer-to-function for GetCoeffs() variants below.
typedef int (*GetCoeffsFunc)(VP8BitReader* const br,
const VP8BandProbas* const prob[],
int ctx, const quant_t dq, int n, int16_t* out);
const VP8BandProbas* const prob[], int ctx,
const quant_t dq, int n, int16_t* out);
static volatile GetCoeffsFunc GetCoeffs = NULL;
static void InitGetCoeffs(void);
@@ -95,8 +96,8 @@ void VP8Delete(VP8Decoder* const dec) {
}
}
int VP8SetError(VP8Decoder* const dec,
VP8StatusCode error, const char* const msg) {
int VP8SetError(VP8Decoder* const dec, VP8StatusCode error,
const char* const msg) {
// VP8_STATUS_SUSPENDED is only meaningful in incremental decoding.
assert(dec->incremental || error != VP8_STATUS_SUSPENDED);
// The oldest error reported takes precedence over the new one.
@@ -111,39 +112,39 @@ int VP8SetError(VP8Decoder* const dec,
//------------------------------------------------------------------------------
int VP8CheckSignature(const uint8_t* const data, size_t data_size) {
return (data_size >= 3 &&
data[0] == 0x9d && data[1] == 0x01 && data[2] == 0x2a);
return (data_size >= 3 && data[0] == 0x9d && data[1] == 0x01 &&
data[2] == 0x2a);
}
int VP8GetInfo(const uint8_t* data, size_t data_size, size_t chunk_size,
int* const width, int* const height) {
if (data == NULL || data_size < VP8_FRAME_HEADER_SIZE) {
return 0; // not enough data
return 0; // not enough data
}
// check signature
if (!VP8CheckSignature(data + 3, data_size - 3)) {
return 0; // Wrong signature.
return 0; // Wrong signature.
} else {
const uint32_t bits = data[0] | (data[1] << 8) | (data[2] << 16);
const int key_frame = !(bits & 1);
const int w = ((data[7] << 8) | data[6]) & 0x3fff;
const int h = ((data[9] << 8) | data[8]) & 0x3fff;
if (!key_frame) { // Not a keyframe.
if (!key_frame) { // Not a keyframe.
return 0;
}
if (((bits >> 1) & 7) > 3) {
return 0; // unknown profile
return 0; // unknown profile
}
if (!((bits >> 4) & 1)) {
return 0; // first frame is invisible!
return 0; // first frame is invisible!
}
if (((bits >> 5)) >= chunk_size) { // partition_length
return 0; // inconsistent size information.
return 0; // inconsistent size information.
}
if (w == 0 || h == 0) {
return 0; // We don't support both width and height to be zero.
return 0; // We don't support both width and height to be zero.
}
if (width) {
@@ -170,30 +171,34 @@ static void ResetSegmentHeader(VP8SegmentHeader* const hdr) {
}
// Paragraph 9.3
static int ParseSegmentHeader(VP8BitReader* br,
VP8SegmentHeader* hdr, VP8Proba* proba) {
static int ParseSegmentHeader(VP8BitReader* br, VP8SegmentHeader* hdr,
VP8Proba* proba) {
assert(br != NULL);
assert(hdr != NULL);
hdr->use_segment = VP8Get(br, "global-header");
if (hdr->use_segment) {
hdr->update_map = VP8Get(br, "global-header");
if (VP8Get(br, "global-header")) { // update data
if (VP8Get(br, "global-header")) { // update data
int s;
hdr->absolute_delta = VP8Get(br, "global-header");
for (s = 0; s < NUM_MB_SEGMENTS; ++s) {
hdr->quantizer[s] = VP8Get(br, "global-header") ?
VP8GetSignedValue(br, 7, "global-header") : 0;
hdr->quantizer[s] = VP8Get(br, "global-header")
? VP8GetSignedValue(br, 7, "global-header")
: 0;
}
for (s = 0; s < NUM_MB_SEGMENTS; ++s) {
hdr->filter_strength[s] = VP8Get(br, "global-header") ?
VP8GetSignedValue(br, 6, "global-header") : 0;
hdr->filter_strength[s] =
VP8Get(br, "global-header")
? VP8GetSignedValue(br, 6, "global-header")
: 0;
}
}
if (hdr->update_map) {
int s;
for (s = 0; s < MB_FEATURE_TREE_PROBS; ++s) {
proba->segments[s] = VP8Get(br, "global-header") ?
VP8GetValue(br, 8, "global-header") : 255u;
proba->segments[s] = VP8Get(br, "global-header")
? VP8GetValue(br, 8, "global-header")
: 255u;
}
}
} else {
@@ -211,8 +216,8 @@ static int ParseSegmentHeader(VP8BitReader* br,
// If we don't even have the partitions' sizes, then VP8_STATUS_NOT_ENOUGH_DATA
// is returned, and this is an unrecoverable error.
// If the partitions were positioned ok, VP8_STATUS_OK is returned.
static VP8StatusCode ParsePartitions(VP8Decoder* const dec,
const uint8_t* buf, size_t size) {
static VP8StatusCode ParsePartitions(VP8Decoder* const dec, const uint8_t* buf,
size_t size) {
VP8BitReader* const br = &dec->br;
const uint8_t* sz = buf;
const uint8_t* buf_end = buf + size;
@@ -247,12 +252,12 @@ static VP8StatusCode ParsePartitions(VP8Decoder* const dec,
// Paragraph 9.4
static int ParseFilterHeader(VP8BitReader* br, VP8Decoder* const dec) {
VP8FilterHeader* const hdr = &dec->filter_hdr;
hdr->simple = VP8Get(br, "global-header");
hdr->level = VP8GetValue(br, 6, "global-header");
hdr->simple = VP8Get(br, "global-header");
hdr->level = VP8GetValue(br, 6, "global-header");
hdr->sharpness = VP8GetValue(br, 3, "global-header");
hdr->use_lf_delta = VP8Get(br, "global-header");
if (hdr->use_lf_delta) {
if (VP8Get(br, "global-header")) { // update lf-delta?
if (VP8Get(br, "global-header")) { // update lf-delta?
int i;
for (i = 0; i < NUM_REF_LF_DELTAS; ++i) {
if (VP8Get(br, "global-header")) {
@@ -290,8 +295,7 @@ int VP8GetHeaders(VP8Decoder* const dec, VP8Io* const io) {
buf = io->data;
buf_size = io->data_size;
if (buf_size < 4) {
return VP8SetError(dec, VP8_STATUS_NOT_ENOUGH_DATA,
"Truncated header.");
return VP8SetError(dec, VP8_STATUS_NOT_ENOUGH_DATA, "Truncated header.");
}
// Paragraph 9.1
@@ -322,11 +326,10 @@ int VP8GetHeaders(VP8Decoder* const dec, VP8Io* const io) {
"cannot parse picture header");
}
if (!VP8CheckSignature(buf, buf_size)) {
return VP8SetError(dec, VP8_STATUS_BITSTREAM_ERROR,
"Bad code word");
return VP8SetError(dec, VP8_STATUS_BITSTREAM_ERROR, "Bad code word");
}
pic_hdr->width = ((buf[4] << 8) | buf[3]) & 0x3fff;
pic_hdr->xscale = buf[4] >> 6; // ratio: 1, 5/4 5/3 or 2
pic_hdr->xscale = buf[4] >> 6; // ratio: 1, 5/4 5/3 or 2
pic_hdr->height = ((buf[6] << 8) | buf[5]) & 0x3fff;
pic_hdr->yscale = buf[6] >> 6;
buf += 7;
@@ -342,11 +345,11 @@ int VP8GetHeaders(VP8Decoder* const dec, VP8Io* const io) {
// So they can be used interchangeably without always testing for
// 'use_cropping'.
io->use_cropping = 0;
io->crop_top = 0;
io->crop_top = 0;
io->crop_left = 0;
io->crop_right = io->width;
io->crop_right = io->width;
io->crop_bottom = io->height;
io->use_scaling = 0;
io->use_scaling = 0;
io->scaled_width = io->width;
io->scaled_height = io->height;
@@ -360,8 +363,7 @@ int VP8GetHeaders(VP8Decoder* const dec, VP8Io* const io) {
// Check if we have all the partition #0 available, and initialize dec->br
// to read this partition (and this partition only).
if (frm_hdr->partition_length > buf_size) {
return VP8SetError(dec, VP8_STATUS_NOT_ENOUGH_DATA,
"bad partition length");
return VP8SetError(dec, VP8_STATUS_NOT_ENOUGH_DATA, "bad partition length");
}
br = &dec->br;
@@ -392,11 +394,10 @@ int VP8GetHeaders(VP8Decoder* const dec, VP8Io* const io) {
// Frame buffer marking
if (!frm_hdr->key_frame) {
return VP8SetError(dec, VP8_STATUS_UNSUPPORTED_FEATURE,
"Not a key frame.");
return VP8SetError(dec, VP8_STATUS_UNSUPPORTED_FEATURE, "Not a key frame.");
}
VP8Get(br, "global-header"); // ignore the value of 'update_proba'
VP8Get(br, "global-header"); // ignore the value of 'update_proba'
VP8ParseProba(br, dec);
@@ -408,15 +409,14 @@ int VP8GetHeaders(VP8Decoder* const dec, VP8Io* const io) {
//------------------------------------------------------------------------------
// Residual decoding (Paragraph 13.2 / 13.3)
static const uint8_t kCat3[] = { 173, 148, 140, 0 };
static const uint8_t kCat4[] = { 176, 155, 140, 135, 0 };
static const uint8_t kCat5[] = { 180, 157, 141, 134, 130, 0 };
static const uint8_t kCat6[] =
{ 254, 254, 243, 230, 196, 177, 153, 140, 133, 130, 129, 0 };
static const uint8_t* const kCat3456[] = { kCat3, kCat4, kCat5, kCat6 };
static const uint8_t kZigzag[16] = {
0, 1, 4, 8, 5, 2, 3, 6, 9, 12, 13, 10, 7, 11, 14, 15
};
static const uint8_t kCat3[] = {173, 148, 140, 0};
static const uint8_t kCat4[] = {176, 155, 140, 135, 0};
static const uint8_t kCat5[] = {180, 157, 141, 134, 130, 0};
static const uint8_t kCat6[] = {254, 254, 243, 230, 196, 177,
153, 140, 133, 130, 129, 0};
static const uint8_t* const kCat3456[] = {kCat3, kCat4, kCat5, kCat6};
static const uint8_t kZigzag[16] = {0, 1, 4, 8, 5, 2, 3, 6,
9, 12, 13, 10, 7, 11, 14, 15};
// See section 13-2: https://datatracker.ietf.org/doc/html/rfc6386#section-13.2
static int GetLargeValue(VP8BitReader* const br, const uint8_t* const p) {
@@ -452,18 +452,18 @@ static int GetLargeValue(VP8BitReader* const br, const uint8_t* const p) {
// Returns the position of the last non-zero coeff plus one
static int GetCoeffsFast(VP8BitReader* const br,
const VP8BandProbas* const prob[],
int ctx, const quant_t dq, int n, int16_t* out) {
const VP8BandProbas* const prob[], int ctx,
const quant_t dq, int n, int16_t* out) {
const uint8_t* p = prob[n]->probas[ctx];
for (; n < 16; ++n) {
if (!VP8GetBit(br, p[0], "coeffs")) {
return n; // previous coeff was last non-zero coeff
}
while (!VP8GetBit(br, p[1], "coeffs")) { // sequence of zero coeffs
while (!VP8GetBit(br, p[1], "coeffs")) { // sequence of zero coeffs
p = prob[++n]->probas[0];
if (n == 16) return 16;
}
{ // non zero coeff
{ // non zero coeff
const VP8ProbaArray* const p_ctx = &prob[n + 1]->probas[0];
int v;
if (!VP8GetBit(br, p[2], "coeffs")) {
@@ -482,18 +482,18 @@ static int GetCoeffsFast(VP8BitReader* const br,
// This version of GetCoeffs() uses VP8GetBitAlt() which is an alternate version
// of VP8GetBitAlt() targeting specific platforms.
static int GetCoeffsAlt(VP8BitReader* const br,
const VP8BandProbas* const prob[],
int ctx, const quant_t dq, int n, int16_t* out) {
const VP8BandProbas* const prob[], int ctx,
const quant_t dq, int n, int16_t* out) {
const uint8_t* p = prob[n]->probas[ctx];
for (; n < 16; ++n) {
if (!VP8GetBitAlt(br, p[0], "coeffs")) {
return n; // previous coeff was last non-zero coeff
}
while (!VP8GetBitAlt(br, p[1], "coeffs")) { // sequence of zero coeffs
while (!VP8GetBitAlt(br, p[1], "coeffs")) { // sequence of zero coeffs
p = prob[++n]->probas[0];
if (n == 16) return 16;
}
{ // non zero coeff
{ // non zero coeff
const VP8ProbaArray* const p_ctx = &prob[n + 1]->probas[0];
int v;
if (!VP8GetBitAlt(br, p[2], "coeffs")) {
@@ -525,10 +525,10 @@ static WEBP_INLINE uint32_t NzCodeBits(uint32_t nz_coeffs, int nz, int dc_nz) {
return nz_coeffs;
}
static int ParseResiduals(VP8Decoder* const dec,
VP8MB* const mb, VP8BitReader* const token_br) {
const VP8BandProbas* (* const bands)[16 + 1] = dec->proba.bands_ptr;
const VP8BandProbas* const * ac_proba;
static int ParseResiduals(VP8Decoder* const dec, VP8MB* const mb,
VP8BitReader* const token_br) {
const VP8BandProbas*(*const bands)[16 + 1] = dec->proba.bands_ptr;
const VP8BandProbas* const* ac_proba;
VP8MBData* const block = dec->mb_data + dec->mb_x;
const VP8QuantMatrix* const q = &dec->dqm[block->segment];
int16_t* dst = block->coeffs;
@@ -541,14 +541,14 @@ static int ParseResiduals(VP8Decoder* const dec,
int first;
memset(dst, 0, 384 * sizeof(*dst));
if (!block->is_i4x4) { // parse DC
int16_t dc[16] = { 0 };
if (!block->is_i4x4) { // parse DC
int16_t dc[16] = {0};
const int ctx = mb->nz_dc + left_mb->nz_dc;
const int nz = GetCoeffs(token_br, bands[1], ctx, q->y2_mat, 0, dc);
mb->nz_dc = left_mb->nz_dc = (nz > 0);
if (nz > 1) { // more than just the DC -> perform the full transform
if (nz > 1) { // more than just the DC -> perform the full transform
VP8TransformWHT(dc, dst);
} else { // only DC is non-zero -> inlined simplified transform
} else { // only DC is non-zero -> inlined simplified transform
int i;
const int dc0 = (dc[0] + 3) >> 3;
for (i = 0; i < 16 * 16; i += 16) dst[i] = dc0;
@@ -669,7 +669,7 @@ static int ParseFrame(VP8Decoder* const dec, VP8Io* io) {
"Premature end-of-file encountered.");
}
}
VP8InitScanline(dec); // Prepare for next scanline
VP8InitScanline(dec); // Prepare for next scanline
// Reconstruct, filter and emit the row.
if (!VP8ProcessRow(dec, io)) {
@@ -703,7 +703,7 @@ int VP8Decode(VP8Decoder* const dec, VP8Io* const io) {
// Finish setting up the decoding parameter. Will call io->setup().
ok = (VP8EnterCritical(dec, io) == VP8_STATUS_OK);
if (ok) { // good to go.
if (ok) { // good to go.
// Will allocate memory and prepare everything.
if (ok) ok = VP8InitFrame(dec, io);

View File

@@ -49,20 +49,20 @@ typedef void (*VP8IoTeardownHook)(const VP8Io* io);
struct VP8Io {
// set by VP8GetHeaders()
int width, height; // picture dimensions, in pixels (invariable).
// These are the original, uncropped dimensions.
// The actual area passed to put() is stored
// in mb_w / mb_h fields.
int width, height; // picture dimensions, in pixels (invariable).
// These are the original, uncropped dimensions.
// The actual area passed to put() is stored
// in mb_w / mb_h fields.
// set before calling put()
int mb_y; // position of the current rows (in pixels)
int mb_w; // number of columns in the sample
int mb_h; // number of rows in the sample
const uint8_t* y, *u, *v; // rows to copy (in yuv420 format)
const uint8_t *y, *u, *v; // rows to copy (in yuv420 format)
int y_stride; // row stride for luma
int uv_stride; // row stride for chroma
void* opaque; // user data
void* opaque; // user data
// called when fresh samples are available. Currently, samples are in
// YUV420 format, and can be up to width x 24 in size (depending on the
@@ -165,8 +165,8 @@ WEBP_EXTERN int VP8CheckSignature(const uint8_t* const data, size_t data_size);
// can be passed NULL.
WEBP_EXTERN int VP8GetInfo(
const uint8_t* data,
size_t data_size, // data available so far
size_t chunk_size, // total data size expected in the chunk
size_t data_size, // data available so far
size_t chunk_size, // total data size expected in the chunk
int* const width, int* const height);
// Returns true if the next byte(s) in data is a VP8L signature.
@@ -175,12 +175,13 @@ WEBP_EXTERN int VP8LCheckSignature(const uint8_t* const data, size_t size);
// Validates the VP8L data-header and retrieves basic header information viz
// width, height and alpha. Returns 0 in case of formatting error.
// width/height/has_alpha can be passed NULL.
WEBP_EXTERN int VP8LGetInfo(
const uint8_t* data, size_t data_size, // data available so far
int* const width, int* const height, int* const has_alpha);
WEBP_EXTERN int VP8LGetInfo(const uint8_t* data,
size_t data_size, // data available so far
int* const width, int* const height,
int* const has_alpha);
#ifdef __cplusplus
} // extern "C"
} // extern "C"
#endif
#endif // WEBP_DEC_VP8_DEC_H_

View File

@@ -14,7 +14,7 @@
#ifndef WEBP_DEC_VP8I_DEC_H_
#define WEBP_DEC_VP8I_DEC_H_
#include <string.h> // for memcpy()
#include <string.h> // for memcpy()
#include "src/dec/common_dec.h"
#include "src/dec/vp8_dec.h"
@@ -62,9 +62,9 @@ extern "C" {
// '|' = left sample, '-' = top sample, '+' = top-left sample
// 't' = extra top-right sample for 4x4 modes
#define YUV_SIZE (BPS * 17 + BPS * 9)
#define Y_OFF (BPS * 1 + 8)
#define U_OFF (Y_OFF + BPS * 16 + BPS)
#define V_OFF (U_OFF + 16)
#define Y_OFF (BPS * 1 + 8)
#define U_OFF (Y_OFF + BPS * 16 + BPS)
#define V_OFF (U_OFF + 16)
// minimal width under which lossy multi-threading is always disabled
#define MIN_WIDTH_FOR_THREADS 512
@@ -84,15 +84,15 @@ typedef struct {
uint16_t height;
uint8_t xscale;
uint8_t yscale;
uint8_t colorspace; // 0 = YCbCr
uint8_t colorspace; // 0 = YCbCr
uint8_t clamp_type;
} VP8PictureHeader;
// segment features
typedef struct {
int use_segment;
int update_map; // whether to update the segment map or not
int absolute_delta; // absolute or delta values for quantizer and filter
int update_map; // whether to update the segment map or not
int absolute_delta; // absolute or delta values for quantizer and filter
int8_t quantizer[NUM_MB_SEGMENTS]; // quantization changes
int8_t filter_strength[NUM_MB_SEGMENTS]; // filter strength for segments
} VP8SegmentHeader;
@@ -100,7 +100,7 @@ typedef struct {
// probas associated to one of the contexts
typedef uint8_t VP8ProbaArray[NUM_PROBAS];
typedef struct { // all the probas associated to one band
typedef struct { // all the probas associated to one band
VP8ProbaArray probas[NUM_CTX];
} VP8BandProbas;
@@ -114,9 +114,9 @@ typedef struct {
// Filter parameters
typedef struct {
int simple; // 0=complex, 1=simple
int level; // [0..63]
int sharpness; // [0..7]
int simple; // 0=complex, 1=simple
int level; // [0..63]
int sharpness; // [0..7]
int use_lf_delta;
int ref_lf_delta[NUM_REF_LF_DELTAS];
int mode_lf_delta[NUM_MODE_LF_DELTAS];
@@ -125,33 +125,33 @@ typedef struct {
//------------------------------------------------------------------------------
// Informations about the macroblocks.
typedef struct { // filter specs
uint8_t f_limit; // filter limit in [3..189], or 0 if no filtering
uint8_t f_ilevel; // inner limit in [1..63]
uint8_t f_inner; // do inner filtering?
uint8_t hev_thresh; // high edge variance threshold in [0..2]
typedef struct { // filter specs
uint8_t f_limit; // filter limit in [3..189], or 0 if no filtering
uint8_t f_ilevel; // inner limit in [1..63]
uint8_t f_inner; // do inner filtering?
uint8_t hev_thresh; // high edge variance threshold in [0..2]
} VP8FInfo;
typedef struct { // Top/Left Contexts used for syntax-parsing
uint8_t nz; // non-zero AC/DC coeffs (4bit for luma + 4bit for chroma)
uint8_t nz_dc; // non-zero DC coeff (1bit)
uint8_t nz; // non-zero AC/DC coeffs (4bit for luma + 4bit for chroma)
uint8_t nz_dc; // non-zero DC coeff (1bit)
} VP8MB;
// Dequantization matrices
typedef int quant_t[2]; // [DC / AC]. Can be 'uint16_t[2]' too (~slower).
typedef int quant_t[2]; // [DC / AC]. Can be 'uint16_t[2]' too (~slower).
typedef struct {
quant_t y1_mat, y2_mat, uv_mat;
int uv_quant; // U/V quantizer value
int dither; // dithering amplitude (0 = off, max=255)
int uv_quant; // U/V quantizer value
int dither; // dithering amplitude (0 = off, max=255)
} VP8QuantMatrix;
// Data needed to reconstruct a macroblock
typedef struct {
int16_t coeffs[384]; // 384 coeffs = (16+4+4) * 4*4
uint8_t is_i4x4; // true if intra4x4
uint8_t imodes[16]; // one 16x16 mode (#0) or sixteen 4x4 modes
uint8_t uvmode; // chroma prediction mode
int16_t coeffs[384]; // 384 coeffs = (16+4+4) * 4*4
uint8_t is_i4x4; // true if intra4x4
uint8_t imodes[16]; // one 16x16 mode (#0) or sixteen 4x4 modes
uint8_t uvmode; // chroma prediction mode
// bit-wise info about the content of each sub-4x4 blocks (in decoding order).
// Each of the 4x4 blocks for y/u/v is associated with a 2b code according to:
// code=0 -> no coefficient
@@ -161,7 +161,7 @@ typedef struct {
// This allows to call specialized transform functions.
uint32_t non_zero_y;
uint32_t non_zero_uv;
uint8_t dither; // local dithering strength (deduced from non_zero*)
uint8_t dither; // local dithering strength (deduced from non_zero*)
uint8_t skip;
uint8_t segment;
} VP8MBData;
@@ -186,7 +186,7 @@ typedef struct {
struct VP8Decoder {
VP8StatusCode status;
int ready; // true if ready to decode a picture with VP8Decode()
int ready; // true if ready to decode a picture with VP8Decode()
const char* error_msg; // set when status is not OK.
// Main data source
@@ -194,17 +194,17 @@ struct VP8Decoder {
int incremental; // if true, incremental decoding is expected
// headers
VP8FrameHeader frm_hdr;
VP8FrameHeader frm_hdr;
VP8PictureHeader pic_hdr;
VP8FilterHeader filter_hdr;
VP8FilterHeader filter_hdr;
VP8SegmentHeader segment_hdr;
// Worker
WebPWorker worker;
int mt_method; // multi-thread method: 0=off, 1=[parse+recon][filter]
// 2=[parse][recon+filter]
int cache_id; // current cache row
int num_caches; // number of cached rows of 16 pixels (1, 2 or 3)
int mt_method; // multi-thread method: 0=off, 1=[parse+recon][filter]
// 2=[parse][recon+filter]
int cache_id; // current cache row
int num_caches; // number of cached rows of 16 pixels (1, 2 or 3)
VP8ThreadContext thread_ctx; // Thread context
// dimension, in macroblock units.
@@ -220,8 +220,8 @@ struct VP8Decoder {
VP8BitReader parts[MAX_NUM_PARTITIONS];
// Dithering strength, deduced from decoding options
int dither; // whether to use dithering or not
VP8Random dithering_rg; // random generator for dithering
int dither; // whether to use dithering or not
VP8Random dithering_rg; // random generator for dithering
// dequantization (one set of DC/AC dequant factor per segment)
VP8QuantMatrix dqm[NUM_MB_SEGMENTS];
@@ -232,16 +232,16 @@ struct VP8Decoder {
uint8_t skip_p;
// Boundary data cache and persistent buffers.
uint8_t* intra_t; // top intra modes values: 4 * mb_w
uint8_t intra_l[4]; // left intra modes values
uint8_t* intra_t; // top intra modes values: 4 * mb_w
uint8_t intra_l[4]; // left intra modes values
VP8TopSamples* yuv_t; // top y/u/v samples
VP8MB* mb_info; // contextual macroblock info (mb_w + 1)
VP8FInfo* f_info; // filter strength info
uint8_t* yuv_b; // main block for Y/U/V (size = YUV_SIZE)
VP8MB* mb_info; // contextual macroblock info (mb_w + 1)
VP8FInfo* f_info; // filter strength info
uint8_t* yuv_b; // main block for Y/U/V (size = YUV_SIZE)
uint8_t* cache_y; // macroblock row for storing unfiltered samples
uint8_t* cache_y; // macroblock row for storing unfiltered samples
uint8_t* cache_u;
uint8_t* cache_v;
int cache_y_stride;
@@ -252,8 +252,8 @@ struct VP8Decoder {
size_t mem_size;
// Per macroblock non-persistent infos.
int mb_x, mb_y; // current position, in macroblock units
VP8MBData* mb_data; // parsed reconstruction data
int mb_x, mb_y; // current position, in macroblock units
VP8MBData* mb_data; // parsed reconstruction data
// Filtering side-info
int filter_type; // 0=off, 1=simple, 2=complex
@@ -267,15 +267,15 @@ struct VP8Decoder {
uint8_t* alpha_plane_mem; // memory allocated for alpha_plane
uint8_t* alpha_plane; // output. Persistent, contains the whole data.
const uint8_t* alpha_prev_line; // last decoded alpha row (or NULL)
int alpha_dithering; // derived from decoding options (0=off, 100=full)
int alpha_dithering; // derived from decoding options (0=off, 100=full)
};
//------------------------------------------------------------------------------
// internal functions. Not public.
// in vp8.c
int VP8SetError(VP8Decoder* const dec,
VP8StatusCode error, const char* const msg);
int VP8SetError(VP8Decoder* const dec, VP8StatusCode error,
const char* const msg);
// in tree.c
void VP8ResetProba(VP8Proba* const proba);
@@ -299,8 +299,8 @@ WEBP_NODISCARD int VP8ExitCritical(VP8Decoder* const dec, VP8Io* const io);
// Return the multi-threading method to use (0=off), depending
// on options and bitstream size. Only for lossy decoding.
int VP8GetThreadMethod(const WebPDecoderOptions* const options,
const WebPHeaderStructure* const headers,
int width, int height);
const WebPHeaderStructure* const headers, int width,
int height);
// Initialize dithering post-process if needed.
void VP8InitDithering(const WebPDecoderOptions* const options,
VP8Decoder* const dec);
@@ -314,13 +314,13 @@ WEBP_NODISCARD int VP8DecodeMB(VP8Decoder* const dec,
// in alpha.c
const uint8_t* VP8DecompressAlphaRows(VP8Decoder* const dec,
const VP8Io* const io,
int row, int num_rows);
const VP8Io* const io, int row,
int num_rows);
//------------------------------------------------------------------------------
#ifdef __cplusplus
} // extern "C"
} // extern "C"
#endif
#endif // WEBP_DEC_VP8I_DEC_H_

View File

@@ -33,12 +33,12 @@
#include "src/webp/format_constants.h"
#include "src/webp/types.h"
#define NUM_ARGB_CACHE_ROWS 16
#define NUM_ARGB_CACHE_ROWS 16
static const int kCodeLengthLiterals = 16;
static const int kCodeLengthRepeatCode = 16;
static const uint8_t kCodeLengthExtraBits[3] = { 2, 3, 7 };
static const uint8_t kCodeLengthRepeatOffsets[3] = { 3, 3, 11 };
static const uint8_t kCodeLengthExtraBits[3] = {2, 3, 7};
static const uint8_t kCodeLengthRepeatOffsets[3] = {3, 3, 11};
// -----------------------------------------------------------------------------
// Five Huffman codes are used at each meta code:
@@ -47,44 +47,30 @@ static const uint8_t kCodeLengthRepeatOffsets[3] = { 3, 3, 11 };
// 3. red,
// 4. blue, and,
// 5. distance prefix codes.
typedef enum {
GREEN = 0,
RED = 1,
BLUE = 2,
ALPHA = 3,
DIST = 4
} HuffIndex;
typedef enum { GREEN = 0, RED = 1, BLUE = 2, ALPHA = 3, DIST = 4 } HuffIndex;
static const uint16_t kAlphabetSize[HUFFMAN_CODES_PER_META_CODE] = {
NUM_LITERAL_CODES + NUM_LENGTH_CODES,
NUM_LITERAL_CODES, NUM_LITERAL_CODES, NUM_LITERAL_CODES,
NUM_DISTANCE_CODES
};
NUM_LITERAL_CODES + NUM_LENGTH_CODES, NUM_LITERAL_CODES, NUM_LITERAL_CODES,
NUM_LITERAL_CODES, NUM_DISTANCE_CODES};
static const uint8_t kLiteralMap[HUFFMAN_CODES_PER_META_CODE] = {
0, 1, 1, 1, 0
};
static const uint8_t kLiteralMap[HUFFMAN_CODES_PER_META_CODE] = {0, 1, 1, 1, 0};
#define NUM_CODE_LENGTH_CODES 19
#define NUM_CODE_LENGTH_CODES 19
static const uint8_t kCodeLengthCodeOrder[NUM_CODE_LENGTH_CODES] = {
17, 18, 0, 1, 2, 3, 4, 5, 16, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
};
17, 18, 0, 1, 2, 3, 4, 5, 16, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15};
#define CODE_TO_PLANE_CODES 120
#define CODE_TO_PLANE_CODES 120
static const uint8_t kCodeToPlane[CODE_TO_PLANE_CODES] = {
0x18, 0x07, 0x17, 0x19, 0x28, 0x06, 0x27, 0x29, 0x16, 0x1a,
0x26, 0x2a, 0x38, 0x05, 0x37, 0x39, 0x15, 0x1b, 0x36, 0x3a,
0x25, 0x2b, 0x48, 0x04, 0x47, 0x49, 0x14, 0x1c, 0x35, 0x3b,
0x46, 0x4a, 0x24, 0x2c, 0x58, 0x45, 0x4b, 0x34, 0x3c, 0x03,
0x57, 0x59, 0x13, 0x1d, 0x56, 0x5a, 0x23, 0x2d, 0x44, 0x4c,
0x55, 0x5b, 0x33, 0x3d, 0x68, 0x02, 0x67, 0x69, 0x12, 0x1e,
0x66, 0x6a, 0x22, 0x2e, 0x54, 0x5c, 0x43, 0x4d, 0x65, 0x6b,
0x32, 0x3e, 0x78, 0x01, 0x77, 0x79, 0x53, 0x5d, 0x11, 0x1f,
0x64, 0x6c, 0x42, 0x4e, 0x76, 0x7a, 0x21, 0x2f, 0x75, 0x7b,
0x31, 0x3f, 0x63, 0x6d, 0x52, 0x5e, 0x00, 0x74, 0x7c, 0x41,
0x4f, 0x10, 0x20, 0x62, 0x6e, 0x30, 0x73, 0x7d, 0x51, 0x5f,
0x40, 0x72, 0x7e, 0x61, 0x6f, 0x50, 0x71, 0x7f, 0x60, 0x70
};
0x18, 0x07, 0x17, 0x19, 0x28, 0x06, 0x27, 0x29, 0x16, 0x1a, 0x26, 0x2a,
0x38, 0x05, 0x37, 0x39, 0x15, 0x1b, 0x36, 0x3a, 0x25, 0x2b, 0x48, 0x04,
0x47, 0x49, 0x14, 0x1c, 0x35, 0x3b, 0x46, 0x4a, 0x24, 0x2c, 0x58, 0x45,
0x4b, 0x34, 0x3c, 0x03, 0x57, 0x59, 0x13, 0x1d, 0x56, 0x5a, 0x23, 0x2d,
0x44, 0x4c, 0x55, 0x5b, 0x33, 0x3d, 0x68, 0x02, 0x67, 0x69, 0x12, 0x1e,
0x66, 0x6a, 0x22, 0x2e, 0x54, 0x5c, 0x43, 0x4d, 0x65, 0x6b, 0x32, 0x3e,
0x78, 0x01, 0x77, 0x79, 0x53, 0x5d, 0x11, 0x1f, 0x64, 0x6c, 0x42, 0x4e,
0x76, 0x7a, 0x21, 0x2f, 0x75, 0x7b, 0x31, 0x3f, 0x63, 0x6d, 0x52, 0x5e,
0x00, 0x74, 0x7c, 0x41, 0x4f, 0x10, 0x20, 0x62, 0x6e, 0x30, 0x73, 0x7d,
0x51, 0x5f, 0x40, 0x72, 0x7e, 0x61, 0x6f, 0x50, 0x71, 0x7f, 0x60, 0x70};
// Memory needed for lookup tables of one Huffman tree group. Red, blue, alpha
// and distance alphabets are constant (256 for red, blue and alpha, 40 for
@@ -96,19 +82,10 @@ static const uint8_t kCodeToPlane[CODE_TO_PLANE_CODES] = {
// https://github.com/madler/zlib/blob/v1.2.5/examples/enough.c
#define FIXED_TABLE_SIZE (630 * 3 + 410)
static const uint16_t kTableSize[12] = {
FIXED_TABLE_SIZE + 654,
FIXED_TABLE_SIZE + 656,
FIXED_TABLE_SIZE + 658,
FIXED_TABLE_SIZE + 662,
FIXED_TABLE_SIZE + 670,
FIXED_TABLE_SIZE + 686,
FIXED_TABLE_SIZE + 718,
FIXED_TABLE_SIZE + 782,
FIXED_TABLE_SIZE + 912,
FIXED_TABLE_SIZE + 1168,
FIXED_TABLE_SIZE + 1680,
FIXED_TABLE_SIZE + 2704
};
FIXED_TABLE_SIZE + 654, FIXED_TABLE_SIZE + 656, FIXED_TABLE_SIZE + 658,
FIXED_TABLE_SIZE + 662, FIXED_TABLE_SIZE + 670, FIXED_TABLE_SIZE + 686,
FIXED_TABLE_SIZE + 718, FIXED_TABLE_SIZE + 782, FIXED_TABLE_SIZE + 912,
FIXED_TABLE_SIZE + 1168, FIXED_TABLE_SIZE + 1680, FIXED_TABLE_SIZE + 2704};
static int VP8LSetError(VP8LDecoder* const dec, VP8StatusCode error) {
// The oldest error reported takes precedence over the new one.
@@ -118,22 +95,19 @@ static int VP8LSetError(VP8LDecoder* const dec, VP8StatusCode error) {
return 0;
}
static int DecodeImageStream(int xsize, int ysize,
int is_level0,
static int DecodeImageStream(int xsize, int ysize, int is_level0,
VP8LDecoder* const dec,
uint32_t** const decoded_data);
//------------------------------------------------------------------------------
int VP8LCheckSignature(const uint8_t* const data, size_t size) {
return (size >= VP8L_FRAME_HEADER_SIZE &&
data[0] == VP8L_MAGIC_BYTE &&
return (size >= VP8L_FRAME_HEADER_SIZE && data[0] == VP8L_MAGIC_BYTE &&
(data[4] >> 5) == 0); // version
}
static int ReadImageInfo(VP8LBitReader* const br,
int* const width, int* const height,
int* const has_alpha) {
static int ReadImageInfo(VP8LBitReader* const br, int* const width,
int* const height, int* const has_alpha) {
if (VP8LReadBits(br, 8) != VP8L_MAGIC_BYTE) return 0;
*width = VP8LReadBits(br, VP8L_IMAGE_SIZE_BITS) + 1;
*height = VP8LReadBits(br, VP8L_IMAGE_SIZE_BITS) + 1;
@@ -142,12 +116,12 @@ static int ReadImageInfo(VP8LBitReader* const br,
return !br->eos;
}
int VP8LGetInfo(const uint8_t* data, size_t data_size,
int* const width, int* const height, int* const has_alpha) {
int VP8LGetInfo(const uint8_t* data, size_t data_size, int* const width,
int* const height, int* const has_alpha) {
if (data == NULL || data_size < VP8L_FRAME_HEADER_SIZE) {
return 0; // not enough data
return 0; // not enough data
} else if (!VP8LCheckSignature(data, data_size)) {
return 0; // bad signature
return 0; // bad signature
} else {
int w, h, a;
VP8LBitReader br;
@@ -262,9 +236,9 @@ static void BuildPackedTable(HTreeGroup* const htree_group) {
}
}
static int ReadHuffmanCodeLengths(
VP8LDecoder* const dec, const int* const code_length_code_lengths,
int num_symbols, int* const code_lengths) {
static int ReadHuffmanCodeLengths(VP8LDecoder* const dec,
const int* const code_length_code_lengths,
int num_symbols, int* const code_lengths) {
int ok = 0;
VP8LBitReader* const br = &dec->br;
int symbol;
@@ -278,7 +252,7 @@ static int ReadHuffmanCodeLengths(
goto End;
}
if (VP8LReadBits(br, 1)) { // use length
if (VP8LReadBits(br, 1)) { // use length
const int length_nbits = 2 + 2 * VP8LReadBits(br, 3);
max_symbol = 2 + VP8LReadBits(br, length_nbits);
if (max_symbol > num_symbols) {
@@ -316,7 +290,7 @@ static int ReadHuffmanCodeLengths(
}
ok = 1;
End:
End:
VP8LHuffmanTablesDeallocate(&tables);
if (!ok) return VP8LSetError(dec, VP8_STATUS_BITSTREAM_ERROR);
return ok;
@@ -348,7 +322,7 @@ static int ReadHuffmanCode(int alphabet_size, VP8LDecoder* const dec,
ok = 1;
} else { // Decode Huffman-coded code lengths.
int i;
int code_length_code_lengths[NUM_CODE_LENGTH_CODES] = { 0 };
int code_length_code_lengths[NUM_CODE_LENGTH_CODES] = {0};
const int num_codes = VP8LReadBits(br, 4) + 4;
assert(num_codes <= NUM_CODE_LENGTH_CODES);
@@ -361,8 +335,8 @@ static int ReadHuffmanCode(int alphabet_size, VP8LDecoder* const dec,
ok = ok && !br->eos;
if (ok) {
size = VP8LBuildHuffmanTable(table, HUFFMAN_TABLE_BITS,
code_lengths, alphabet_size);
size = VP8LBuildHuffmanTable(table, HUFFMAN_TABLE_BITS, code_lengths,
alphabet_size);
}
if (!ok || size == 0) {
return VP8LSetError(dec, VP8_STATUS_BITSTREAM_ERROR);
@@ -449,7 +423,7 @@ static int ReadHuffmanCodes(VP8LDecoder* const dec, int xsize, int ysize,
hdr->num_htree_groups = num_htree_groups;
hdr->htree_groups = htree_groups;
Error:
Error:
WebPSafeFree(mapping);
if (!ok) {
WebPSafeFree(huffman_image);
@@ -554,7 +528,7 @@ int ReadHuffmanCodesHelper(int color_cache_bits, int num_htree_groups,
}
ok = 1;
Error:
Error:
WebPSafeFree(code_lengths);
if (!ok) {
VP8LHuffmanTablesDeallocate(huffman_tables);
@@ -575,7 +549,7 @@ static int AllocateAndInitRescaler(VP8LDecoder* const dec, VP8Io* const io) {
const int in_height = io->mb_h;
const int out_height = io->scaled_height;
const uint64_t work_size = 2 * num_channels * (uint64_t)out_width;
rescaler_t* work; // Rescaler work area.
rescaler_t* work; // Rescaler work area.
const uint64_t scaled_data_size = (uint64_t)out_width;
uint32_t* scaled_data; // Temporary storage for scaled BGRA data.
const uint64_t memory_size = sizeof(*dec->rescaler) +
@@ -595,13 +569,13 @@ static int AllocateAndInitRescaler(VP8LDecoder* const dec, VP8Io* const io) {
scaled_data = (uint32_t*)memory;
if (!WebPRescalerInit(dec->rescaler, in_width, in_height,
(uint8_t*)scaled_data, out_width, out_height,
0, num_channels, work)) {
(uint8_t*)scaled_data, out_width, out_height, 0,
num_channels, work)) {
return 0;
}
return 1;
}
#endif // WEBP_REDUCE_SIZE
#endif // WEBP_REDUCE_SIZE
//------------------------------------------------------------------------------
// Export to ARGB
@@ -626,9 +600,9 @@ static int Export(WebPRescaler* const rescaler, WEBP_CSP_MODE colorspace,
}
// Emit scaled rows.
static int EmitRescaledRowsRGBA(const VP8LDecoder* const dec,
uint8_t* in, int in_stride, int mb_h,
uint8_t* const out, int out_stride) {
static int EmitRescaledRowsRGBA(const VP8LDecoder* const dec, uint8_t* in,
int in_stride, int mb_h, uint8_t* const out,
int out_stride) {
const WEBP_CSP_MODE colorspace = dec->output->colorspace;
int num_lines_in = 0;
int num_lines_out = 0;
@@ -639,8 +613,8 @@ static int EmitRescaledRowsRGBA(const VP8LDecoder* const dec,
const int needed_lines = WebPRescaleNeededLines(dec->rescaler, lines_left);
int lines_imported;
assert(needed_lines > 0 && needed_lines <= lines_left);
WebPMultARGBRows(row_in, in_stride,
dec->rescaler->src_width, needed_lines, 0);
WebPMultARGBRows(row_in, in_stride, dec->rescaler->src_width, needed_lines,
0);
lines_imported =
WebPRescalerImport(dec->rescaler, lines_left, row_in, in_stride);
assert(lines_imported == needed_lines);
@@ -650,13 +624,12 @@ static int EmitRescaledRowsRGBA(const VP8LDecoder* const dec,
return num_lines_out;
}
#endif // WEBP_REDUCE_SIZE
#endif // WEBP_REDUCE_SIZE
// Emit rows without any scaling.
static int EmitRows(WEBP_CSP_MODE colorspace,
const uint8_t* row_in, int in_stride,
int mb_w, int mb_h,
uint8_t* const out, int out_stride) {
static int EmitRows(WEBP_CSP_MODE colorspace, const uint8_t* row_in,
int in_stride, int mb_w, int mb_h, uint8_t* const out,
int out_stride) {
int lines = mb_h;
uint8_t* row_out = out;
while (lines-- > 0) {
@@ -711,8 +684,8 @@ static int ExportYUVA(const VP8LDecoder* const dec, int y_pos) {
return num_lines_out;
}
static int EmitRescaledRowsYUVA(const VP8LDecoder* const dec,
uint8_t* in, int in_stride, int mb_h) {
static int EmitRescaledRowsYUVA(const VP8LDecoder* const dec, uint8_t* in,
int in_stride, int mb_h) {
int num_lines_in = 0;
int y_pos = dec->last_out_row;
while (num_lines_in < mb_h) {
@@ -730,9 +703,8 @@ static int EmitRescaledRowsYUVA(const VP8LDecoder* const dec,
return y_pos;
}
static int EmitRowsYUVA(const VP8LDecoder* const dec,
const uint8_t* in, int in_stride,
int mb_w, int num_rows) {
static int EmitRowsYUVA(const VP8LDecoder* const dec, const uint8_t* in,
int in_stride, int mb_w, int num_rows) {
int y_pos = dec->last_out_row;
while (num_rows-- > 0) {
ConvertToYUVA((const uint32_t*)in, mb_w, y_pos, dec->output);
@@ -774,8 +746,8 @@ static int SetCropWindow(VP8Io* const io, int y_start, int y_end,
//------------------------------------------------------------------------------
static WEBP_INLINE int GetMetaIndex(
const uint32_t* const image, int xsize, int bits, int x, int y) {
static WEBP_INLINE int GetMetaIndex(const uint32_t* const image, int xsize,
int bits, int x, int y) {
if (bits == 0) return 0;
return image[xsize * (y >> bits) + (x >> bits)];
}
@@ -793,9 +765,8 @@ static WEBP_INLINE HTreeGroup* GetHtreeGroupForPos(VP8LMetadata* const hdr,
typedef void (*ProcessRowsFunc)(VP8LDecoder* const dec, int row);
static void ApplyInverseTransforms(VP8LDecoder* const dec,
int start_row, int num_rows,
const uint32_t* const rows) {
static void ApplyInverseTransforms(VP8LDecoder* const dec, int start_row,
int num_rows, const uint32_t* const rows) {
int n = dec->next_transform;
const int cache_pixs = dec->width * num_rows;
const int end_row = start_row + num_rows;
@@ -824,7 +795,7 @@ static void ProcessRows(VP8LDecoder* const dec, int row) {
// We can't process more than NUM_ARGB_CACHE_ROWS at a time (that's the size
// of argb_cache), but we currently don't need more than that.
assert(num_rows <= NUM_ARGB_CACHE_ROWS);
if (num_rows > 0) { // Emit output.
if (num_rows > 0) { // Emit output.
VP8Io* const io = dec->io;
uint8_t* rows_data = (uint8_t*)dec->argb_cache;
const int in_stride = io->width * sizeof(uint32_t); // in unit of RGBA
@@ -839,18 +810,19 @@ static void ProcessRows(VP8LDecoder* const dec, int row) {
buf->rgba + (ptrdiff_t)dec->last_out_row * buf->stride;
const int num_rows_out =
#if !defined(WEBP_REDUCE_SIZE)
io->use_scaling ?
EmitRescaledRowsRGBA(dec, rows_data, in_stride, io->mb_h,
rgba, buf->stride) :
io->use_scaling ? EmitRescaledRowsRGBA(dec, rows_data, in_stride,
io->mb_h, rgba, buf->stride)
:
#endif // WEBP_REDUCE_SIZE
EmitRows(output->colorspace, rows_data, in_stride,
io->mb_w, io->mb_h, rgba, buf->stride);
EmitRows(output->colorspace, rows_data, in_stride,
io->mb_w, io->mb_h, rgba, buf->stride);
// Update 'last_out_row'.
dec->last_out_row += num_rows_out;
} else { // convert to YUVA
dec->last_out_row = io->use_scaling ?
EmitRescaledRowsYUVA(dec, rows_data, in_stride, io->mb_h) :
EmitRowsYUVA(dec, rows_data, in_stride, io->mb_w, io->mb_h);
} else { // convert to YUVA
dec->last_out_row =
io->use_scaling
? EmitRescaledRowsYUVA(dec, rows_data, in_stride, io->mb_h)
: EmitRowsYUVA(dec, rows_data, in_stride, io->mb_w, io->mb_h);
}
assert(dec->last_out_row <= output->height);
}
@@ -877,9 +849,8 @@ static int Is8bOptimizable(const VP8LMetadata* const hdr) {
return 1;
}
static void AlphaApplyFilter(ALPHDecoder* const alph_dec,
int first_row, int last_row,
uint8_t* out, int stride) {
static void AlphaApplyFilter(ALPHDecoder* const alph_dec, int first_row,
int last_row, uint8_t* out, int stride) {
if (alph_dec->filter != WEBP_FILTER_NONE) {
int y;
const uint8_t* prev_line = alph_dec->prev_line;
@@ -897,23 +868,22 @@ static void ExtractPalettedAlphaRows(VP8LDecoder* const dec, int last_row) {
// For vertical and gradient filtering, we need to decode the part above the
// crop_top row, in order to have the correct spatial predictors.
ALPHDecoder* const alph_dec = (ALPHDecoder*)dec->io->opaque;
const int top_row =
(alph_dec->filter == WEBP_FILTER_NONE ||
alph_dec->filter == WEBP_FILTER_HORIZONTAL) ? dec->io->crop_top
: dec->last_row;
const int top_row = (alph_dec->filter == WEBP_FILTER_NONE ||
alph_dec->filter == WEBP_FILTER_HORIZONTAL)
? dec->io->crop_top
: dec->last_row;
const int first_row = (dec->last_row < top_row) ? top_row : dec->last_row;
assert(last_row <= dec->io->crop_bottom);
if (last_row > first_row) {
// Special method for paletted alpha data. We only process the cropped area.
const int width = dec->io->width;
uint8_t* out = alph_dec->output + width * first_row;
const uint8_t* const in =
(uint8_t*)dec->pixels + dec->width * first_row;
const uint8_t* const in = (uint8_t*)dec->pixels + dec->width * first_row;
VP8LTransform* const transform = &dec->transforms[0];
assert(dec->next_transform == 1);
assert(transform->type == COLOR_INDEXING_TRANSFORM);
VP8LColorIndexInverseTransformAlpha(transform, first_row, last_row,
in, out);
VP8LColorIndexInverseTransformAlpha(transform, first_row, last_row, in,
out);
AlphaApplyFilter(alph_dec, first_row, last_row, out, width);
}
dec->last_row = dec->last_out_row = last_row;
@@ -959,11 +929,11 @@ static WEBP_INLINE void CopyBlock8b(uint8_t* const dst, int dist, int length) {
switch (dist) {
case 1:
pattern = src[0];
#if defined(__arm__) || defined(_M_ARM) // arm doesn't like multiply that much
#if defined(__arm__) || defined(_M_ARM) // arm doesn't like multiply that much
pattern |= pattern << 8;
pattern |= pattern << 16;
#elif defined(WEBP_USE_MIPS_DSP_R2)
__asm__ volatile ("replv.qb %0, %0" : "+r"(pattern));
__asm__ volatile("replv.qb %0, %0" : "+r"(pattern));
#else
pattern = 0x01010101u * pattern;
#endif
@@ -977,7 +947,7 @@ static WEBP_INLINE void CopyBlock8b(uint8_t* const dst, int dist, int length) {
#if defined(__arm__) || defined(_M_ARM)
pattern |= pattern << 16;
#elif defined(WEBP_USE_MIPS_DSP_R2)
__asm__ volatile ("replv.ph %0, %0" : "+r"(pattern));
__asm__ volatile("replv.ph %0, %0" : "+r"(pattern));
#else
pattern = 0x00010001u * pattern;
#endif
@@ -991,7 +961,7 @@ static WEBP_INLINE void CopyBlock8b(uint8_t* const dst, int dist, int length) {
CopySmallPattern8b(src, dst, length, pattern);
return;
}
Copy:
Copy:
if (dist >= length) { // no overlap -> use memcpy()
memcpy(dst, src, length * sizeof(*dst));
} else {
@@ -1001,26 +971,25 @@ static WEBP_INLINE void CopyBlock8b(uint8_t* const dst, int dist, int length) {
}
// copy pattern of 1 or 2 uint32_t's
static WEBP_INLINE void CopySmallPattern32b(const uint32_t* src,
uint32_t* dst,
static WEBP_INLINE void CopySmallPattern32b(const uint32_t* src, uint32_t* dst,
int length, uint64_t pattern) {
int i;
if ((uintptr_t)dst & 4) { // Align 'dst' to 8-bytes boundary.
if ((uintptr_t)dst & 4) { // Align 'dst' to 8-bytes boundary.
*dst++ = *src++;
pattern = (pattern >> 32) | (pattern << 32);
--length;
}
assert(0 == ((uintptr_t)dst & 7));
for (i = 0; i < (length >> 1); ++i) {
((uint64_t*)dst)[i] = pattern; // Copy the pattern 8 bytes at a time.
((uint64_t*)dst)[i] = pattern; // Copy the pattern 8 bytes at a time.
}
if (length & 1) { // Finish with left-over.
if (length & 1) { // Finish with left-over.
dst[i << 1] = src[i << 1];
}
}
static WEBP_INLINE void CopyBlock32b(uint32_t* const dst,
int dist, int length) {
static WEBP_INLINE void CopyBlock32b(uint32_t* const dst, int dist,
int length) {
const uint32_t* const src = dst - dist;
if (dist <= 2 && length >= 4 && ((uintptr_t)dst & 3) == 0) {
uint64_t pattern;
@@ -1114,7 +1083,7 @@ static int DecodeAlphaData(VP8LDecoder* const dec, uint8_t* const data,
// Process the remaining rows corresponding to last row-block.
ExtractPalettedAlphaRows(dec, row > last_row ? last_row : row);
End:
End:
br->eos = VP8LIsEndOfStream(br);
if (!ok || (br->eos && pos < end)) {
return VP8LSetError(
@@ -1297,7 +1266,7 @@ static int DecodeImageData(VP8LDecoder* const dec, uint32_t* const data,
}
return 1;
Error:
Error:
return VP8LSetError(dec, VP8_STATUS_BITSTREAM_ERROR);
}
@@ -1314,9 +1283,8 @@ static void ClearTransform(VP8LTransform* const transform) {
static int ExpandColorMap(int num_colors, VP8LTransform* const transform) {
int i;
const int final_num_colors = 1 << (8 >> transform->bits);
uint32_t* const new_color_map =
(uint32_t*)WebPSafeMalloc((uint64_t)final_num_colors,
sizeof(*new_color_map));
uint32_t* const new_color_map = (uint32_t*)WebPSafeMalloc(
(uint64_t)final_num_colors, sizeof(*new_color_map));
if (new_color_map == NULL) {
return 0;
} else {
@@ -1362,31 +1330,30 @@ static int ReadTransform(int* const xsize, int const* ysize,
case CROSS_COLOR_TRANSFORM:
transform->bits =
MIN_TRANSFORM_BITS + VP8LReadBits(br, NUM_TRANSFORM_BITS);
ok = DecodeImageStream(VP8LSubSampleSize(transform->xsize,
transform->bits),
VP8LSubSampleSize(transform->ysize,
transform->bits),
/*is_level0=*/0, dec, &transform->data);
ok = DecodeImageStream(
VP8LSubSampleSize(transform->xsize, transform->bits),
VP8LSubSampleSize(transform->ysize, transform->bits),
/*is_level0=*/0, dec, &transform->data);
break;
case COLOR_INDEXING_TRANSFORM: {
const int num_colors = VP8LReadBits(br, 8) + 1;
const int bits = (num_colors > 16) ? 0
: (num_colors > 4) ? 1
: (num_colors > 2) ? 2
: 3;
*xsize = VP8LSubSampleSize(transform->xsize, bits);
transform->bits = bits;
ok = DecodeImageStream(num_colors, /*ysize=*/1, /*is_level0=*/0, dec,
&transform->data);
if (ok && !ExpandColorMap(num_colors, transform)) {
return VP8LSetError(dec, VP8_STATUS_OUT_OF_MEMORY);
}
const int num_colors = VP8LReadBits(br, 8) + 1;
const int bits = (num_colors > 16) ? 0
: (num_colors > 4) ? 1
: (num_colors > 2) ? 2
: 3;
*xsize = VP8LSubSampleSize(transform->xsize, bits);
transform->bits = bits;
ok = DecodeImageStream(num_colors, /*ysize=*/1, /*is_level0=*/0, dec,
&transform->data);
if (ok && !ExpandColorMap(num_colors, transform)) {
return VP8LSetError(dec, VP8_STATUS_OUT_OF_MEMORY);
}
break;
}
case SUBTRACT_GREEN_TRANSFORM:
break;
default:
assert(0); // can't happen
assert(0); // can't happen
break;
}
@@ -1444,7 +1411,7 @@ static void VP8LClear(VP8LDecoder* const dec) {
WebPSafeFree(dec->rescaler_memory);
dec->rescaler_memory = NULL;
dec->output = NULL; // leave no trace behind
dec->output = NULL; // leave no trace behind
}
void VP8LDelete(VP8LDecoder* const dec) {
@@ -1464,8 +1431,7 @@ static void UpdateDecoder(VP8LDecoder* const dec, int width, int height) {
hdr->huffman_mask = (num_bits == 0) ? ~0 : (1 << num_bits) - 1;
}
static int DecodeImageStream(int xsize, int ysize,
int is_level0,
static int DecodeImageStream(int xsize, int ysize, int is_level0,
VP8LDecoder* const dec,
uint32_t** const decoded_data) {
int ok = 1;
@@ -1513,7 +1479,7 @@ static int DecodeImageStream(int xsize, int ysize,
}
UpdateDecoder(dec, transform_xsize, transform_ysize);
if (is_level0) { // level 0 complete
if (is_level0) { // level 0 complete
dec->state = READ_HDR;
goto End;
}
@@ -1532,7 +1498,7 @@ static int DecodeImageStream(int xsize, int ysize,
transform_ysize, NULL);
ok = ok && !br->eos;
End:
End:
if (!ok) {
WebPSafeFree(data);
ClearMetadata(hdr);
@@ -1566,7 +1532,7 @@ static int AllocateInternalBuffers32b(VP8LDecoder* const dec, int final_width) {
assert(dec->width <= final_width);
dec->pixels = (uint32_t*)WebPSafeMalloc(total_num_pixels, sizeof(uint32_t));
if (dec->pixels == NULL) {
dec->argb_cache = NULL; // for soundness
dec->argb_cache = NULL; // for soundness
return VP8LSetError(dec, VP8_STATUS_OUT_OF_MEMORY);
}
dec->argb_cache = dec->pixels + num_pixels + cache_top_pixels;
@@ -1575,7 +1541,7 @@ static int AllocateInternalBuffers32b(VP8LDecoder* const dec, int final_width) {
static int AllocateInternalBuffers8b(VP8LDecoder* const dec) {
const uint64_t total_num_pixels = (uint64_t)dec->width * dec->height;
dec->argb_cache = NULL; // for soundness
dec->argb_cache = NULL; // for soundness
dec->pixels = (uint32_t*)WebPSafeMalloc(total_num_pixels, sizeof(uint8_t));
if (dec->pixels == NULL) {
return VP8LSetError(dec, VP8_STATUS_OUT_OF_MEMORY);
@@ -1598,14 +1564,14 @@ static void ExtractAlphaRows(VP8LDecoder* const dec, int last_row) {
// Extract alpha (which is stored in the green plane).
ALPHDecoder* const alph_dec = (ALPHDecoder*)dec->io->opaque;
uint8_t* const output = alph_dec->output;
const int width = dec->io->width; // the final width (!= dec->width)
const int width = dec->io->width; // the final width (!= dec->width)
const int cache_pixs = width * num_rows_to_process;
uint8_t* const dst = output + width * cur_row;
const uint32_t* const src = dec->argb_cache;
ApplyInverseTransforms(dec, cur_row, num_rows_to_process, in);
WebPExtractGreen(src, dst, cache_pixs);
AlphaApplyFilter(alph_dec,
cur_row, cur_row + num_rows_to_process, dst, width);
AlphaApplyFilter(alph_dec, cur_row, cur_row + num_rows_to_process, dst,
width);
num_rows -= num_rows_to_process;
in += num_rows_to_process * dec->width;
cur_row += num_rows_to_process;
@@ -1658,7 +1624,7 @@ int VP8LDecodeAlphaHeader(ALPHDecoder* const alph_dec,
alph_dec->vp8l_dec = dec;
return 1;
Err:
Err:
VP8LDelete(dec);
return 0;
}
@@ -1675,11 +1641,11 @@ int VP8LDecodeAlphaImageStream(ALPHDecoder* const alph_dec, int last_row) {
if (!alph_dec->use_8b_decode) WebPInitAlphaProcessing();
// Decode (with special row processing).
return alph_dec->use_8b_decode ?
DecodeAlphaData(dec, (uint8_t*)dec->pixels, dec->width, dec->height,
last_row) :
DecodeImageData(dec, dec->pixels, dec->width, dec->height,
last_row, ExtractAlphaRows);
return alph_dec->use_8b_decode
? DecodeAlphaData(dec, (uint8_t*)dec->pixels, dec->width,
dec->height, last_row)
: DecodeImageData(dec, dec->pixels, dec->width, dec->height,
last_row, ExtractAlphaRows);
}
//------------------------------------------------------------------------------
@@ -1709,7 +1675,7 @@ int VP8LDecodeHeader(VP8LDecoder* const dec, VP8Io* const io) {
}
return 1;
Error:
Error:
VP8LClear(dec);
assert(dec->status != VP8_STATUS_OK);
return 0;
@@ -1781,7 +1747,7 @@ int VP8LDecodeImage(VP8LDecoder* const dec) {
params->last_y = dec->last_out_row;
return 1;
Err:
Err:
VP8LClear(dec);
assert(dec->status != VP8_STATUS_OK);
return 0;

View File

@@ -15,7 +15,7 @@
#ifndef WEBP_DEC_VP8LI_DEC_H_
#define WEBP_DEC_VP8LI_DEC_H_
#include <string.h> // for memcpy()
#include <string.h> // for memcpy()
#include "src/dec/vp8_dec.h"
#include "src/dec/webpi_dec.h"
@@ -31,69 +31,65 @@
extern "C" {
#endif
typedef enum {
READ_DATA = 0,
READ_HDR = 1,
READ_DIM = 2
} VP8LDecodeState;
typedef enum { READ_DATA = 0, READ_HDR = 1, READ_DIM = 2 } VP8LDecodeState;
typedef struct VP8LTransform VP8LTransform;
struct VP8LTransform {
VP8LImageTransformType type; // transform type.
int bits; // subsampling bits defining transform window.
int xsize; // transform window X index.
int ysize; // transform window Y index.
uint32_t* data; // transform data.
VP8LImageTransformType type; // transform type.
int bits; // subsampling bits defining transform window.
int xsize; // transform window X index.
int ysize; // transform window Y index.
uint32_t* data; // transform data.
};
typedef struct {
int color_cache_size;
VP8LColorCache color_cache;
VP8LColorCache saved_color_cache; // for incremental
int color_cache_size;
VP8LColorCache color_cache;
VP8LColorCache saved_color_cache; // for incremental
int huffman_mask;
int huffman_subsample_bits;
int huffman_xsize;
uint32_t* huffman_image;
int num_htree_groups;
HTreeGroup* htree_groups;
HuffmanTables huffman_tables;
int huffman_mask;
int huffman_subsample_bits;
int huffman_xsize;
uint32_t* huffman_image;
int num_htree_groups;
HTreeGroup* htree_groups;
HuffmanTables huffman_tables;
} VP8LMetadata;
typedef struct VP8LDecoder VP8LDecoder;
struct VP8LDecoder {
VP8StatusCode status;
VP8LDecodeState state;
VP8Io* io;
VP8StatusCode status;
VP8LDecodeState state;
VP8Io* io;
const WebPDecBuffer* output; // shortcut to io->opaque->output
const WebPDecBuffer* output; // shortcut to io->opaque->output
uint32_t* pixels; // Internal data: either uint8_t* for alpha
// or uint32_t* for BGRA.
uint32_t* argb_cache; // Scratch buffer for temporary BGRA storage.
uint32_t* pixels; // Internal data: either uint8_t* for alpha
// or uint32_t* for BGRA.
uint32_t* argb_cache; // Scratch buffer for temporary BGRA storage.
VP8LBitReader br;
int incremental; // if true, incremental decoding is expected
VP8LBitReader saved_br; // note: could be local variables too
int saved_last_pixel;
VP8LBitReader br;
int incremental; // if true, incremental decoding is expected
VP8LBitReader saved_br; // note: could be local variables too
int saved_last_pixel;
int width;
int height;
int last_row; // last input row decoded so far.
int last_pixel; // last pixel decoded so far. However, it may
// not be transformed, scaled and
// color-converted yet.
int last_out_row; // last row output so far.
int width;
int height;
int last_row; // last input row decoded so far.
int last_pixel; // last pixel decoded so far. However, it may
// not be transformed, scaled and
// color-converted yet.
int last_out_row; // last row output so far.
VP8LMetadata hdr;
VP8LMetadata hdr;
int next_transform;
VP8LTransform transforms[NUM_TRANSFORMS];
int next_transform;
VP8LTransform transforms[NUM_TRANSFORMS];
// or'd bitset storing the transforms types.
uint32_t transforms_seen;
uint32_t transforms_seen;
uint8_t* rescaler_memory; // Working memory for rescaling work.
WebPRescaler* rescaler; // Common rescaler for all channels.
uint8_t* rescaler_memory; // Working memory for rescaling work.
WebPRescaler* rescaler; // Common rescaler for all channels.
};
//------------------------------------------------------------------------------
@@ -144,7 +140,7 @@ WEBP_NODISCARD int ReadHuffmanCodesHelper(
//------------------------------------------------------------------------------
#ifdef __cplusplus
} // extern "C"
} // extern "C"
#endif
#endif // WEBP_DEC_VP8LI_DEC_H_

View File

@@ -99,8 +99,7 @@ static VP8StatusCode ParseRIFF(const uint8_t** const data,
// *height_ptr and *flags_ptr are set to the corresponding values extracted
// from the VP8X chunk.
static VP8StatusCode ParseVP8X(const uint8_t** const data,
size_t* const data_size,
int* const found_vp8x,
size_t* const data_size, int* const found_vp8x,
int* const width_ptr, int* const height_ptr,
uint32_t* const flags_ptr) {
const uint32_t vp8x_size = CHUNK_HEADER_SIZE + VP8X_CHUNK_SIZE;
@@ -173,7 +172,7 @@ static VP8StatusCode ParseOptionalChunks(const uint8_t** const data,
while (1) {
uint32_t chunk_size;
uint32_t disk_chunk_size; // chunk_size with padding
uint32_t disk_chunk_size; // chunk_size with padding
*data = buf;
*data_size = buf_size;
@@ -184,7 +183,7 @@ static VP8StatusCode ParseOptionalChunks(const uint8_t** const data,
chunk_size = GetLE32(buf + TAG_SIZE);
if (chunk_size > MAX_CHUNK_PAYLOAD) {
return VP8_STATUS_BITSTREAM_ERROR; // Not a valid chunk size.
return VP8_STATUS_BITSTREAM_ERROR; // Not a valid chunk size.
}
// For odd-sized chunk-payload, there's one byte padding at the end.
disk_chunk_size = (CHUNK_HEADER_SIZE + chunk_size + 1) & ~1u;
@@ -192,23 +191,22 @@ static VP8StatusCode ParseOptionalChunks(const uint8_t** const data,
// Check that total bytes skipped so far does not exceed riff_size.
if (riff_size > 0 && (total_size > riff_size)) {
return VP8_STATUS_BITSTREAM_ERROR; // Not a valid chunk size.
return VP8_STATUS_BITSTREAM_ERROR; // Not a valid chunk size.
}
// Start of a (possibly incomplete) VP8/VP8L chunk implies that we have
// parsed all the optional chunks.
// Note: This check must occur before the check 'buf_size < disk_chunk_size'
// below to allow incomplete VP8/VP8L chunks.
if (!memcmp(buf, "VP8 ", TAG_SIZE) ||
!memcmp(buf, "VP8L", TAG_SIZE)) {
if (!memcmp(buf, "VP8 ", TAG_SIZE) || !memcmp(buf, "VP8L", TAG_SIZE)) {
return VP8_STATUS_OK;
}
if (buf_size < disk_chunk_size) { // Insufficient data.
if (buf_size < disk_chunk_size) { // Insufficient data.
return VP8_STATUS_NOT_ENOUGH_DATA;
}
if (!memcmp(buf, "ALPH", TAG_SIZE)) { // A valid ALPH header.
if (!memcmp(buf, "ALPH", TAG_SIZE)) { // A valid ALPH header.
*alpha_data = buf + CHUNK_HEADER_SIZE;
*alpha_size = chunk_size;
}
@@ -282,10 +280,8 @@ static VP8StatusCode ParseVP8Header(const uint8_t** const data_ptr,
// RIFF + VP8X + (optional chunks) + VP8(L)
// ALPH + VP8 <-- Not a valid WebP format: only allowed for internal purpose.
// VP8(L) <-- Not a valid WebP format: only allowed for internal purpose.
static VP8StatusCode ParseHeadersInternal(const uint8_t* data,
size_t data_size,
int* const width,
int* const height,
static VP8StatusCode ParseHeadersInternal(const uint8_t* data, size_t data_size,
int* const width, int* const height,
int* const has_alpha,
int* const has_animation,
int* const format,
@@ -312,15 +308,15 @@ static VP8StatusCode ParseHeadersInternal(const uint8_t* data,
// Skip over RIFF header.
status = ParseRIFF(&data, &data_size, have_all_data, &hdrs.riff_size);
if (status != VP8_STATUS_OK) {
return status; // Wrong RIFF header / insufficient data.
return status; // Wrong RIFF header / insufficient data.
}
found_riff = (hdrs.riff_size > 0);
// Skip over VP8X.
{
uint32_t flags = 0;
status = ParseVP8X(&data, &data_size, &found_vp8x,
&canvas_width, &canvas_height, &flags);
status = ParseVP8X(&data, &data_size, &found_vp8x, &canvas_width,
&canvas_height, &flags);
if (status != VP8_STATUS_OK) {
return status; // Wrong VP8X / insufficient data.
}
@@ -332,7 +328,7 @@ static VP8StatusCode ParseHeadersInternal(const uint8_t* data,
}
if (has_alpha != NULL) *has_alpha = !!(flags & ALPHA_FLAG);
if (has_animation != NULL) *has_animation = animation_present;
if (format != NULL) *format = 0; // default = undefined
if (format != NULL) *format = 0; // default = undefined
image_width = canvas_width;
image_height = canvas_height;
@@ -403,7 +399,7 @@ static VP8StatusCode ParseHeadersInternal(const uint8_t* data,
assert((uint64_t)(data - headers->data) < MAX_CHUNK_PAYLOAD);
assert(headers->offset == headers->data_size - data_size);
}
ReturnWidthHeight:
ReturnWidthHeight:
if (status == VP8_STATUS_OK ||
(status == VP8_STATUS_NOT_ENOUGH_DATA && found_vp8x && headers == NULL)) {
if (has_alpha != NULL) {
@@ -425,9 +421,8 @@ VP8StatusCode WebPParseHeaders(WebPHeaderStructure* const headers) {
int has_animation = 0;
assert(headers != NULL);
// fill out headers, ignore width/height/has_alpha.
status = ParseHeadersInternal(headers->data, headers->data_size,
NULL, NULL, NULL, &has_animation,
NULL, headers);
status = ParseHeadersInternal(headers->data, headers->data_size, NULL, NULL,
NULL, &has_animation, NULL, headers);
if (status == VP8_STATUS_OK || status == VP8_STATUS_NOT_ENOUGH_DATA) {
// The WebPDemux API + libwebp can be used to decode individual
// uncomposited frames or the WebPAnimDecoder can be used to fully
@@ -462,7 +457,7 @@ WEBP_NODISCARD static VP8StatusCode DecodeInto(const uint8_t* const data,
headers.data = data;
headers.data_size = data_size;
headers.have_all_data = 1;
status = WebPParseHeaders(&headers); // Process Pre-VP8 chunks.
status = WebPParseHeaders(&headers); // Process Pre-VP8 chunks.
if (status != VP8_STATUS_OK) {
return status;
}
@@ -485,15 +480,15 @@ WEBP_NODISCARD static VP8StatusCode DecodeInto(const uint8_t* const data,
// Decode bitstream header, update io->width/io->height.
if (!VP8GetHeaders(dec, &io)) {
status = dec->status; // An error occurred. Grab error status.
status = dec->status; // An error occurred. Grab error status.
} else {
// Allocate/check output buffers.
status = WebPAllocateDecBuffer(io.width, io.height, params->options,
params->output);
if (status == VP8_STATUS_OK) { // Decode
// This change must be done before calling VP8Decode()
dec->mt_method = VP8GetThreadMethod(params->options, &headers,
io.width, io.height);
dec->mt_method =
VP8GetThreadMethod(params->options, &headers, io.width, io.height);
VP8InitDithering(params->options, dec);
if (!VP8Decode(dec, &io)) {
status = dec->status;
@@ -507,7 +502,7 @@ WEBP_NODISCARD static VP8StatusCode DecodeInto(const uint8_t* const data,
return VP8_STATUS_OUT_OF_MEMORY;
}
if (!VP8LDecodeHeader(dec, &io)) {
status = dec->status; // An error occurred. Grab error status.
status = dec->status; // An error occurred. Grab error status.
} else {
// Allocate/check output buffers.
status = WebPAllocateDecBuffer(io.width, io.height, params->options,
@@ -546,10 +541,10 @@ WEBP_NODISCARD static uint8_t* DecodeIntoRGBABuffer(WEBP_CSP_MODE colorspace,
}
WebPResetDecParams(&params);
params.output = &buf;
buf.colorspace = colorspace;
buf.u.RGBA.rgba = rgba;
buf.colorspace = colorspace;
buf.u.RGBA.rgba = rgba;
buf.u.RGBA.stride = stride;
buf.u.RGBA.size = size;
buf.u.RGBA.size = size;
buf.is_external_memory = 1;
if (DecodeInto(data, data_size, &params) != VP8_STATUS_OK) {
return NULL;
@@ -582,25 +577,25 @@ uint8_t* WebPDecodeBGRAInto(const uint8_t* data, size_t data_size,
return DecodeIntoRGBABuffer(MODE_BGRA, data, data_size, output, stride, size);
}
uint8_t* WebPDecodeYUVInto(const uint8_t* data, size_t data_size,
uint8_t* luma, size_t luma_size, int luma_stride,
uint8_t* u, size_t u_size, int u_stride,
uint8_t* v, size_t v_size, int v_stride) {
uint8_t* WebPDecodeYUVInto(const uint8_t* data, size_t data_size, uint8_t* luma,
size_t luma_size, int luma_stride, uint8_t* u,
size_t u_size, int u_stride, uint8_t* v,
size_t v_size, int v_stride) {
WebPDecParams params;
WebPDecBuffer output;
if (luma == NULL || !WebPInitDecBuffer(&output)) return NULL;
WebPResetDecParams(&params);
params.output = &output;
output.colorspace = MODE_YUV;
output.u.YUVA.y = luma;
output.colorspace = MODE_YUV;
output.u.YUVA.y = luma;
output.u.YUVA.y_stride = luma_stride;
output.u.YUVA.y_size = luma_size;
output.u.YUVA.u = u;
output.u.YUVA.y_size = luma_size;
output.u.YUVA.u = u;
output.u.YUVA.u_stride = u_stride;
output.u.YUVA.u_size = u_size;
output.u.YUVA.v = v;
output.u.YUVA.u_size = u_size;
output.u.YUVA.v = v;
output.u.YUVA.v_stride = v_stride;
output.u.YUVA.v_size = v_size;
output.u.YUVA.v_size = v_size;
output.is_external_memory = 1;
if (DecodeInto(data, data_size, &params) != VP8_STATUS_OK) {
return NULL;
@@ -636,50 +631,50 @@ WEBP_NODISCARD static uint8_t* Decode(WEBP_CSP_MODE mode,
if (DecodeInto(data, data_size, &params) != VP8_STATUS_OK) {
return NULL;
}
if (keep_info != NULL) { // keep track of the side-info
if (keep_info != NULL) { // keep track of the side-info
WebPCopyDecBuffer(&output, keep_info);
}
// return decoded samples (don't clear 'output'!)
return WebPIsRGBMode(mode) ? output.u.RGBA.rgba : output.u.YUVA.y;
}
uint8_t* WebPDecodeRGB(const uint8_t* data, size_t data_size,
int* width, int* height) {
uint8_t* WebPDecodeRGB(const uint8_t* data, size_t data_size, int* width,
int* height) {
return Decode(MODE_RGB, data, data_size, width, height, NULL);
}
uint8_t* WebPDecodeRGBA(const uint8_t* data, size_t data_size,
int* width, int* height) {
uint8_t* WebPDecodeRGBA(const uint8_t* data, size_t data_size, int* width,
int* height) {
return Decode(MODE_RGBA, data, data_size, width, height, NULL);
}
uint8_t* WebPDecodeARGB(const uint8_t* data, size_t data_size,
int* width, int* height) {
uint8_t* WebPDecodeARGB(const uint8_t* data, size_t data_size, int* width,
int* height) {
return Decode(MODE_ARGB, data, data_size, width, height, NULL);
}
uint8_t* WebPDecodeBGR(const uint8_t* data, size_t data_size,
int* width, int* height) {
uint8_t* WebPDecodeBGR(const uint8_t* data, size_t data_size, int* width,
int* height) {
return Decode(MODE_BGR, data, data_size, width, height, NULL);
}
uint8_t* WebPDecodeBGRA(const uint8_t* data, size_t data_size,
int* width, int* height) {
uint8_t* WebPDecodeBGRA(const uint8_t* data, size_t data_size, int* width,
int* height) {
return Decode(MODE_BGRA, data, data_size, width, height, NULL);
}
uint8_t* WebPDecodeYUV(const uint8_t* data, size_t data_size,
int* width, int* height, uint8_t** u, uint8_t** v,
int* stride, int* uv_stride) {
uint8_t* WebPDecodeYUV(const uint8_t* data, size_t data_size, int* width,
int* height, uint8_t** u, uint8_t** v, int* stride,
int* uv_stride) {
// data, width and height are checked by Decode().
if (u == NULL || v == NULL || stride == NULL || uv_stride == NULL) {
return NULL;
}
{
WebPDecBuffer output; // only to preserve the side-infos
uint8_t* const out = Decode(MODE_YUV, data, data_size,
width, height, &output);
WebPDecBuffer output; // only to preserve the side-infos
uint8_t* const out =
Decode(MODE_YUV, data, data_size, width, height, &output);
if (out != NULL) {
const WebPYUVABuffer* const buf = &output.u.YUVA;
@@ -706,17 +701,16 @@ static VP8StatusCode GetFeatures(const uint8_t* const data, size_t data_size,
DefaultFeatures(features);
// Only parse enough of the data to retrieve the features.
return ParseHeadersInternal(data, data_size,
&features->width, &features->height,
&features->has_alpha, &features->has_animation,
&features->format, NULL);
return ParseHeadersInternal(
data, data_size, &features->width, &features->height,
&features->has_alpha, &features->has_animation, &features->format, NULL);
}
//------------------------------------------------------------------------------
// WebPGetInfo()
int WebPGetInfo(const uint8_t* data, size_t data_size,
int* width, int* height) {
int WebPGetInfo(const uint8_t* data, size_t data_size, int* width,
int* height) {
WebPBitstreamFeatures features;
if (GetFeatures(data, data_size, &features) != VP8_STATUS_OK) {
@@ -724,7 +718,7 @@ int WebPGetInfo(const uint8_t* data, size_t data_size,
}
if (width != NULL) {
*width = features.width;
*width = features.width;
}
if (height != NULL) {
*height = features.height;
@@ -736,10 +730,9 @@ int WebPGetInfo(const uint8_t* data, size_t data_size,
//------------------------------------------------------------------------------
// Advance decoding API
int WebPInitDecoderConfigInternal(WebPDecoderConfig* config,
int version) {
int WebPInitDecoderConfigInternal(WebPDecoderConfig* config, int version) {
if (WEBP_ABI_IS_INCOMPATIBLE(version, WEBP_DECODER_ABI_VERSION)) {
return 0; // version mismatch
return 0; // version mismatch
}
if (config == NULL) {
return 0;
@@ -811,7 +804,7 @@ VP8StatusCode WebPGetFeaturesInternal(const uint8_t* data, size_t data_size,
WebPBitstreamFeatures* features,
int version) {
if (WEBP_ABI_IS_INCOMPATIBLE(version, WEBP_DECODER_ABI_VERSION)) {
return VP8_STATUS_INVALID_PARAM; // version mismatch
return VP8_STATUS_INVALID_PARAM; // version mismatch
}
if (features == NULL) {
return VP8_STATUS_INVALID_PARAM;
@@ -864,8 +857,8 @@ VP8StatusCode WebPDecode(const uint8_t* data, size_t data_size,
//------------------------------------------------------------------------------
// Cropping and rescaling.
int WebPCheckCropDimensions(int image_width, int image_height,
int x, int y, int w, int h) {
int WebPCheckCropDimensions(int image_width, int image_height, int x, int y,
int w, int h) {
return WebPCheckCropDimensionsBasic(x, y, w, h) &&
!(x >= image_width || w > image_width || w > image_width - x ||
y >= image_height || h > image_height || h > image_height - y);
@@ -884,7 +877,7 @@ int WebPIoInitFromOptions(const WebPDecoderOptions* const options,
h = options->crop_height;
x = options->crop_left;
y = options->crop_top;
if (!WebPIsRGBMode(src_colorspace)) { // only snap for YUV420
if (!WebPIsRGBMode(src_colorspace)) { // only snap for YUV420
x &= ~1;
y &= ~1;
}
@@ -892,9 +885,9 @@ int WebPIoInitFromOptions(const WebPDecoderOptions* const options,
return 0; // out of frame boundary error
}
}
io->crop_left = x;
io->crop_top = y;
io->crop_right = x + w;
io->crop_left = x;
io->crop_top = y;
io->crop_right = x + w;
io->crop_bottom = y + h;
io->mb_w = w;
io->mb_h = h;
@@ -921,8 +914,8 @@ int WebPIoInitFromOptions(const WebPDecoderOptions* const options,
if (io->use_scaling) {
// disable filter (only for large downscaling ratio).
io->bypass_filtering |= (io->scaled_width < W * 3 / 4) &&
(io->scaled_height < H * 3 / 4);
io->bypass_filtering |=
(io->scaled_width < W * 3 / 4) && (io->scaled_height < H * 3 / 4);
io->fancy_upsampling = 0;
}
return 1;

View File

@@ -36,15 +36,15 @@ typedef int (*OutputRowFunc)(WebPDecParams* const p, int y_pos,
int max_out_lines);
struct WebPDecParams {
WebPDecBuffer* output; // output buffer.
uint8_t* tmp_y, *tmp_u, *tmp_v; // cache for the fancy upsampler
// or used for tmp rescaling
WebPDecBuffer* output; // output buffer.
uint8_t *tmp_y, *tmp_u, *tmp_v; // cache for the fancy upsampler
// or used for tmp rescaling
int last_y; // coordinate of the line that was last output
int last_y; // coordinate of the line that was last output
const WebPDecoderOptions* options; // if not NULL, use alt decoding features
WebPRescaler* scaler_y, *scaler_u, *scaler_v, *scaler_a; // rescalers
void* memory; // overall scratch memory for the output work.
WebPRescaler *scaler_y, *scaler_u, *scaler_v, *scaler_a; // rescalers
void* memory; // overall scratch memory for the output work.
OutputFunc emit; // output RGB or YUV samples
OutputAlphaFunc emit_alpha; // output alpha channel
@@ -59,15 +59,15 @@ void WebPResetDecParams(WebPDecParams* const params);
// Structure storing a description of the RIFF headers.
typedef struct {
const uint8_t* data; // input buffer
size_t data_size; // input buffer size
int have_all_data; // true if all data is known to be available
size_t offset; // offset to main data chunk (VP8 or VP8L)
const uint8_t* alpha_data; // points to alpha chunk (if present)
size_t alpha_data_size; // alpha chunk size
size_t compressed_size; // VP8/VP8L compressed data size
size_t riff_size; // size of the riff payload (or 0 if absent)
int is_lossless; // true if a VP8L chunk is present
const uint8_t* data; // input buffer
size_t data_size; // input buffer size
int have_all_data; // true if all data is known to be available
size_t offset; // offset to main data chunk (VP8 or VP8L)
const uint8_t* alpha_data; // points to alpha chunk (if present)
size_t alpha_data_size; // alpha chunk size
size_t compressed_size; // VP8/VP8L compressed data size
size_t riff_size; // size of the riff payload (or 0 if absent)
int is_lossless; // true if a VP8L chunk is present
} WebPHeaderStructure;
// Skips over all valid chunks prior to the first VP8/VP8L frame header.
@@ -82,8 +82,8 @@ VP8StatusCode WebPParseHeaders(WebPHeaderStructure* const headers);
// Misc utils
// Returns true if crop dimensions are within image bounds.
int WebPCheckCropDimensions(int image_width, int image_height,
int x, int y, int w, int h);
int WebPCheckCropDimensions(int image_width, int image_height, int x, int y,
int w, int h);
// Initializes VP8Io with custom setup, io and teardown functions. The default
// hooks will use the supplied 'params' as io->opaque handle.
@@ -136,7 +136,7 @@ int WebPAvoidSlowMemory(const WebPDecBuffer* const output,
//------------------------------------------------------------------------------
#ifdef __cplusplus
} // extern "C"
} // extern "C"
#endif
#endif // WEBP_DEC_WEBPI_DEC_H_

View File

@@ -41,19 +41,19 @@ static void BlendPixelRowPremult(uint32_t* const src, const uint32_t* const dst,
int num_pixels);
struct WebPAnimDecoder {
WebPDemuxer* demux; // Demuxer created from given WebP bitstream.
WebPDecoderConfig config; // Decoder config.
WebPDemuxer* demux; // Demuxer created from given WebP bitstream.
WebPDecoderConfig config; // Decoder config.
// Note: we use a pointer to a function blending multiple pixels at a time to
// allow possible inlining of per-pixel blending function.
BlendRowFunc blend_func; // Pointer to the chose blend row function.
WebPAnimInfo info; // Global info about the animation.
uint8_t* curr_frame; // Current canvas (not disposed).
uint8_t* prev_frame_disposed; // Previous canvas (properly disposed).
int prev_frame_timestamp; // Previous frame timestamp (milliseconds).
WebPIterator prev_iter; // Iterator object for previous frame.
int prev_frame_was_keyframe; // True if previous frame was a keyframe.
int next_frame; // Index of the next frame to be decoded
// (starting from 1).
BlendRowFunc blend_func; // Pointer to the chose blend row function.
WebPAnimInfo info; // Global info about the animation.
uint8_t* curr_frame; // Current canvas (not disposed).
uint8_t* prev_frame_disposed; // Previous canvas (properly disposed).
int prev_frame_timestamp; // Previous frame timestamp (milliseconds).
WebPIterator prev_iter; // Iterator object for previous frame.
int prev_frame_was_keyframe; // True if previous frame was a keyframe.
int next_frame; // Index of the next frame to be decoded
// (starting from 1).
};
static void DefaultDecoderOptions(WebPAnimDecoderOptions* const dec_options) {
@@ -79,8 +79,8 @@ WEBP_NODISCARD static int ApplyDecoderOptions(
assert(dec_options != NULL);
mode = dec_options->color_mode;
if (mode != MODE_RGBA && mode != MODE_BGRA &&
mode != MODE_rgbA && mode != MODE_bgrA) {
if (mode != MODE_RGBA && mode != MODE_BGRA && mode != MODE_rgbA &&
mode != MODE_bgrA) {
return 0;
}
dec->blend_func = (mode == MODE_RGBA || mode == MODE_BGRA)
@@ -145,7 +145,7 @@ WebPAnimDecoder* WebPAnimDecoderNewInternal(
WebPAnimDecoderReset(dec);
return dec;
Error:
Error:
WebPAnimDecoderDelete(dec);
return NULL;
}
@@ -197,13 +197,13 @@ WEBP_NODISCARD static int CopyCanvas(const uint8_t* src, uint8_t* dst,
// Returns true if the current frame is a key-frame.
static int IsKeyFrame(const WebPIterator* const curr,
const WebPIterator* const prev,
int prev_frame_was_key_frame,
int canvas_width, int canvas_height) {
int prev_frame_was_key_frame, int canvas_width,
int canvas_height) {
if (curr->frame_num == 1) {
return 1;
} else if ((!curr->has_alpha || curr->blend_method == WEBP_MUX_NO_BLEND) &&
IsFullFrame(curr->width, curr->height,
canvas_width, canvas_height)) {
IsFullFrame(curr->width, curr->height, canvas_width,
canvas_height)) {
return 1;
} else {
return (prev->dispose_method == WEBP_MUX_DISPOSE_BACKGROUND) &&
@@ -213,12 +213,11 @@ static int IsKeyFrame(const WebPIterator* const curr,
}
}
// Blend a single channel of 'src' over 'dst', given their alpha channel values.
// 'src' and 'dst' are assumed to be NOT pre-multiplied by alpha.
static uint8_t BlendChannelNonPremult(uint32_t src, uint8_t src_a,
uint32_t dst, uint8_t dst_a,
uint32_t scale, int shift) {
static uint8_t BlendChannelNonPremult(uint32_t src, uint8_t src_a, uint32_t dst,
uint8_t dst_a, uint32_t scale,
int shift) {
const uint8_t src_channel = (src >> shift) & 0xff;
const uint8_t dst_channel = (dst >> shift) & 0xff;
const uint32_t blend_unscaled = src_channel * src_a + dst_channel * dst_a;
@@ -328,8 +327,8 @@ static void FindBlendRangeAtRow(const WebPIterator* const src,
}
}
int WebPAnimDecoderGetNext(WebPAnimDecoder* dec,
uint8_t** buf_ptr, int* timestamp_ptr) {
int WebPAnimDecoderGetNext(WebPAnimDecoder* dec, uint8_t** buf_ptr,
int* timestamp_ptr) {
WebPIterator iter;
uint32_t width;
uint32_t height;
@@ -358,8 +357,7 @@ int WebPAnimDecoderGetNext(WebPAnimDecoder* dec,
goto Error;
}
} else {
if (!CopyCanvas(dec->prev_frame_disposed, dec->curr_frame,
width, height)) {
if (!CopyCanvas(dec->prev_frame_disposed, dec->curr_frame, width, height)) {
goto Error;
}
}
@@ -394,8 +392,7 @@ int WebPAnimDecoderGetNext(WebPAnimDecoder* dec,
int y;
// Blend transparent pixels with pixels in previous canvas.
for (y = 0; y < iter.height; ++y) {
const size_t offset =
(iter.y_offset + y) * width + iter.x_offset;
const size_t offset = (iter.y_offset + y) * width + iter.x_offset;
blend_row((uint32_t*)dec->curr_frame + offset,
(uint32_t*)dec->prev_frame_disposed + offset, iter.width);
}
@@ -445,7 +442,7 @@ int WebPAnimDecoderGetNext(WebPAnimDecoder* dec,
*timestamp_ptr = timestamp;
return 1;
Error:
Error:
WebPDemuxReleaseIterator(&iter);
return 0;
}

View File

@@ -19,7 +19,7 @@
#include <string.h>
#include "src/utils/utils.h"
#include "src/webp/decode.h" // WebPGetFeatures
#include "src/webp/decode.h" // WebPGetFeatures
#include "src/webp/demux.h"
#include "src/webp/format_constants.h"
#include "src/webp/mux.h"
@@ -31,10 +31,10 @@
#define DMUX_REV_VERSION 0
typedef struct {
size_t start; // start location of the data
size_t end; // end location
size_t riff_end; // riff chunk end location, can be > end.
size_t buf_size; // size of the buffer
size_t start; // start location of the data
size_t end; // end location
size_t riff_end; // riff chunk end location, can be > end.
size_t buf_size; // size of the buffer
const uint8_t* buf;
} MemBuffer;
@@ -51,7 +51,7 @@ typedef struct Frame {
WebPMuxAnimDispose dispose_method;
WebPMuxAnimBlend blend_method;
int frame_num;
int complete; // img_components contains a full image.
int complete; // img_components contains a full image.
ChunkData img_components[2]; // 0=VP8{,L} 1=ALPH
struct Frame* next;
} Frame;
@@ -76,11 +76,7 @@ struct WebPDemuxer {
Chunk** chunks_tail;
};
typedef enum {
PARSE_OK,
PARSE_NEED_MORE_DATA,
PARSE_ERROR
} ParseStatus;
typedef enum { PARSE_OK, PARSE_NEED_MORE_DATA, PARSE_ERROR } ParseStatus;
typedef struct ChunkParser {
uint8_t id[4];
@@ -94,10 +90,10 @@ static int IsValidSimpleFormat(const WebPDemuxer* const dmux);
static int IsValidExtendedFormat(const WebPDemuxer* const dmux);
static const ChunkParser kMasterChunks[] = {
{ { 'V', 'P', '8', ' ' }, ParseSingleImage, IsValidSimpleFormat },
{ { 'V', 'P', '8', 'L' }, ParseSingleImage, IsValidSimpleFormat },
{ { 'V', 'P', '8', 'X' }, ParseVP8X, IsValidExtendedFormat },
{ { '0', '0', '0', '0' }, NULL, NULL },
{{'V', 'P', '8', ' '}, ParseSingleImage, IsValidSimpleFormat},
{{'V', 'P', '8', 'L'}, ParseSingleImage, IsValidSimpleFormat},
{{'V', 'P', '8', 'X'}, ParseVP8X, IsValidExtendedFormat},
{{'0', '0', '0', '0'}, NULL, NULL},
};
//------------------------------------------------------------------------------
@@ -109,8 +105,8 @@ int WebPGetDemuxVersion(void) {
// -----------------------------------------------------------------------------
// MemBuffer
static int RemapMemBuffer(MemBuffer* const mem,
const uint8_t* data, size_t size) {
static int RemapMemBuffer(MemBuffer* const mem, const uint8_t* data,
size_t size) {
if (size < mem->buf_size) return 0; // can't remap to a shorter buffer!
mem->buf = data;
@@ -118,8 +114,8 @@ static int RemapMemBuffer(MemBuffer* const mem,
return 1;
}
static int InitMemBuffer(MemBuffer* const mem,
const uint8_t* data, size_t size) {
static int InitMemBuffer(MemBuffer* const mem, const uint8_t* data,
size_t size) {
memset(mem, 0, sizeof(*mem));
return RemapMemBuffer(mem, data, size);
}
@@ -195,8 +191,8 @@ static int AddFrame(WebPDemuxer* const dmux, Frame* const frame) {
return 1;
}
static void SetFrameInfo(size_t start_offset, size_t size,
int frame_num, int complete,
static void SetFrameInfo(size_t start_offset, size_t size, int frame_num,
int complete,
const WebPBitstreamFeatures* const features,
Frame* const frame) {
frame->img_components[0].offset = start_offset;
@@ -214,8 +210,8 @@ static ParseStatus StoreFrame(int frame_num, uint32_t min_size,
MemBuffer* const mem, Frame* const frame) {
int alpha_chunks = 0;
int image_chunks = 0;
int done = (MemDataSize(mem) < CHUNK_HEADER_SIZE ||
MemDataSize(mem) < min_size);
int done =
(MemDataSize(mem) < CHUNK_HEADER_SIZE || MemDataSize(mem) < min_size);
ParseStatus status = PARSE_OK;
if (done) return PARSE_NEED_MORE_DATA;
@@ -232,7 +228,8 @@ static ParseStatus StoreFrame(int frame_num, uint32_t min_size,
payload_size_padded = payload_size + (payload_size & 1);
payload_available = (payload_size_padded > MemDataSize(mem))
? MemDataSize(mem) : payload_size_padded;
? MemDataSize(mem)
: payload_size_padded;
chunk_size = CHUNK_HEADER_SIZE + payload_available;
if (SizeIsInvalid(mem, payload_size_padded)) return PARSE_ERROR;
if (payload_size_padded > MemDataSize(mem)) status = PARSE_NEED_MORE_DATA;
@@ -258,9 +255,8 @@ static ParseStatus StoreFrame(int frame_num, uint32_t min_size,
// Extract the bitstream features, tolerating failures when the data
// is incomplete.
WebPBitstreamFeatures features;
const VP8StatusCode vp8_status =
WebPGetFeatures(mem->buf + chunk_start_offset, chunk_size,
&features);
const VP8StatusCode vp8_status = WebPGetFeatures(
mem->buf + chunk_start_offset, chunk_size, &features);
if (status == PARSE_NEED_MORE_DATA &&
vp8_status == VP8_STATUS_NOT_ENOUGH_DATA) {
return PARSE_NEED_MORE_DATA;
@@ -276,7 +272,7 @@ static ParseStatus StoreFrame(int frame_num, uint32_t min_size,
goto Done;
}
break;
Done:
Done:
default:
// Restore fourcc/size when moving up one level in parsing.
Rewind(mem, CHUNK_HEADER_SIZE);
@@ -298,12 +294,11 @@ static ParseStatus StoreFrame(int frame_num, uint32_t min_size,
// enough data ('min_size') to parse the payload.
// Returns PARSE_OK on success with *frame pointing to the new Frame.
// Returns PARSE_NEED_MORE_DATA with insufficient data, PARSE_ERROR otherwise.
static ParseStatus NewFrame(const MemBuffer* const mem,
uint32_t min_size, uint32_t actual_size,
Frame** frame) {
static ParseStatus NewFrame(const MemBuffer* const mem, uint32_t min_size,
uint32_t actual_size, Frame** frame) {
if (SizeIsInvalid(mem, min_size)) return PARSE_ERROR;
if (actual_size < min_size) return PARSE_ERROR;
if (MemDataSize(mem) < min_size) return PARSE_NEED_MORE_DATA;
if (MemDataSize(mem) < min_size) return PARSE_NEED_MORE_DATA;
*frame = (Frame*)WebPSafeCalloc(1ULL, sizeof(**frame));
return (*frame == NULL) ? PARSE_ERROR : PARSE_OK;
@@ -311,8 +306,8 @@ static ParseStatus NewFrame(const MemBuffer* const mem,
// Parse a 'ANMF' chunk and any image bearing chunks that immediately follow.
// 'frame_chunk_size' is the previously validated, padded chunk size.
static ParseStatus ParseAnimationFrame(
WebPDemuxer* const dmux, uint32_t frame_chunk_size) {
static ParseStatus ParseAnimationFrame(WebPDemuxer* const dmux,
uint32_t frame_chunk_size) {
const int is_animation = !!(dmux->feature_flags & ANIMATION_FLAG);
const uint32_t anmf_payload_size = frame_chunk_size - ANMF_CHUNK_SIZE;
int added_frame = 0;
@@ -320,15 +315,14 @@ static ParseStatus ParseAnimationFrame(
MemBuffer* const mem = &dmux->mem;
Frame* frame;
size_t start_offset;
ParseStatus status =
NewFrame(mem, ANMF_CHUNK_SIZE, frame_chunk_size, &frame);
ParseStatus status = NewFrame(mem, ANMF_CHUNK_SIZE, frame_chunk_size, &frame);
if (status != PARSE_OK) return status;
frame->x_offset = 2 * ReadLE24s(mem);
frame->y_offset = 2 * ReadLE24s(mem);
frame->width = 1 + ReadLE24s(mem);
frame->height = 1 + ReadLE24s(mem);
frame->duration = ReadLE24s(mem);
frame->x_offset = 2 * ReadLE24s(mem);
frame->y_offset = 2 * ReadLE24s(mem);
frame->width = 1 + ReadLE24s(mem);
frame->height = 1 + ReadLE24s(mem);
frame->duration = ReadLE24s(mem);
bits = ReadByte(mem);
frame->dispose_method =
(bits & 1) ? WEBP_MUX_DISPOSE_BACKGROUND : WEBP_MUX_DISPOSE_NONE;
@@ -362,8 +356,8 @@ static ParseStatus ParseAnimationFrame(
// the user to request the payload via a fourcc string. 'size' includes the
// header and the unpadded payload size.
// Returns true on success, false otherwise.
static int StoreChunk(WebPDemuxer* const dmux,
size_t start_offset, uint32_t size) {
static int StoreChunk(WebPDemuxer* const dmux, size_t start_offset,
uint32_t size) {
Chunk* const chunk = (Chunk*)WebPSafeCalloc(1ULL, sizeof(*chunk));
if (chunk == NULL) return 0;
@@ -512,7 +506,7 @@ static ParseStatus ParseVP8XChunks(WebPDemuxer* const dmux) {
store_chunk = !!(dmux->feature_flags & XMP_FLAG);
goto Skip;
}
Skip:
Skip:
default: {
if (chunk_size_padded <= MemDataSize(mem)) {
if (store_chunk) {
@@ -557,7 +551,7 @@ static ParseStatus ParseVP8X(WebPDemuxer* const dmux) {
dmux->feature_flags = ReadByte(mem);
Skip(mem, 3); // Reserved.
dmux->canvas_width = 1 + ReadLE24s(mem);
dmux->canvas_width = 1 + ReadLE24s(mem);
dmux->canvas_height = 1 + ReadLE24s(mem);
if (dmux->canvas_width * (uint64_t)dmux->canvas_height >= MAX_IMAGE_AREA) {
return PARSE_ERROR; // image final dimension is too large
@@ -647,8 +641,8 @@ static int IsValidExtendedFormat(const WebPDemuxer* const dmux) {
}
if (f->width > 0 && f->height > 0 &&
!CheckFrameBounds(f, !is_animation,
dmux->canvas_width, dmux->canvas_height)) {
!CheckFrameBounds(f, !is_animation, dmux->canvas_width,
dmux->canvas_height)) {
return 0;
}
}
@@ -698,7 +692,7 @@ static ParseStatus CreateRawImageDemuxer(MemBuffer* const mem,
*demuxer = dmux;
return PARSE_OK;
Error:
Error:
WebPSafeFree(dmux);
WebPSafeFree(frame);
return PARSE_ERROR;
@@ -788,12 +782,18 @@ uint32_t WebPDemuxGetI(const WebPDemuxer* dmux, WebPFormatFeature feature) {
if (dmux == NULL) return 0;
switch (feature) {
case WEBP_FF_FORMAT_FLAGS: return dmux->feature_flags;
case WEBP_FF_CANVAS_WIDTH: return (uint32_t)dmux->canvas_width;
case WEBP_FF_CANVAS_HEIGHT: return (uint32_t)dmux->canvas_height;
case WEBP_FF_LOOP_COUNT: return (uint32_t)dmux->loop_count;
case WEBP_FF_BACKGROUND_COLOR: return dmux->bgcolor;
case WEBP_FF_FRAME_COUNT: return (uint32_t)dmux->num_frames;
case WEBP_FF_FORMAT_FLAGS:
return dmux->feature_flags;
case WEBP_FF_CANVAS_WIDTH:
return (uint32_t)dmux->canvas_width;
case WEBP_FF_CANVAS_HEIGHT:
return (uint32_t)dmux->canvas_height;
case WEBP_FF_LOOP_COUNT:
return (uint32_t)dmux->loop_count;
case WEBP_FF_BACKGROUND_COLOR:
return dmux->bgcolor;
case WEBP_FF_FRAME_COUNT:
return (uint32_t)dmux->num_frames;
}
return 0;
}
@@ -822,11 +822,11 @@ static const uint8_t* GetFramePayload(const uint8_t* const mem_buf,
// if alpha exists it precedes image, update the size allowing for
// intervening chunks.
if (alpha->size > 0) {
const size_t inter_size = (image->offset > 0)
? image->offset - (alpha->offset + alpha->size)
const size_t inter_size =
(image->offset > 0) ? image->offset - (alpha->offset + alpha->size)
: 0;
start_offset = alpha->offset;
*data_size += alpha->size + inter_size;
*data_size += alpha->size + inter_size;
}
return mem_buf + start_offset;
}
@@ -835,27 +835,26 @@ static const uint8_t* GetFramePayload(const uint8_t* const mem_buf,
// Create a whole 'frame' from VP8 (+ alpha) or lossless.
static int SynthesizeFrame(const WebPDemuxer* const dmux,
const Frame* const frame,
WebPIterator* const iter) {
const Frame* const frame, WebPIterator* const iter) {
const uint8_t* const mem_buf = dmux->mem.buf;
size_t payload_size = 0;
const uint8_t* const payload = GetFramePayload(mem_buf, frame, &payload_size);
if (payload == NULL) return 0;
assert(frame != NULL);
iter->frame_num = frame->frame_num;
iter->num_frames = dmux->num_frames;
iter->x_offset = frame->x_offset;
iter->y_offset = frame->y_offset;
iter->width = frame->width;
iter->height = frame->height;
iter->has_alpha = frame->has_alpha;
iter->duration = frame->duration;
iter->frame_num = frame->frame_num;
iter->num_frames = dmux->num_frames;
iter->x_offset = frame->x_offset;
iter->y_offset = frame->y_offset;
iter->width = frame->width;
iter->height = frame->height;
iter->has_alpha = frame->has_alpha;
iter->duration = frame->duration;
iter->dispose_method = frame->dispose_method;
iter->blend_method = frame->blend_method;
iter->complete = frame->complete;
iter->blend_method = frame->blend_method;
iter->complete = frame->complete;
iter->fragment.bytes = payload;
iter->fragment.size = payload_size;
iter->fragment.size = payload_size;
return 1;
}
@@ -891,9 +890,7 @@ int WebPDemuxPrevFrame(WebPIterator* iter) {
return SetFrame(iter->frame_num - 1, iter);
}
void WebPDemuxReleaseIterator(WebPIterator* iter) {
(void)iter;
}
void WebPDemuxReleaseIterator(WebPIterator* iter) { (void)iter; }
// -----------------------------------------------------------------------------
// Chunk iteration
@@ -936,17 +933,16 @@ static int SetChunk(const char fourcc[4], int chunk_num,
const uint8_t* const mem_buf = dmux->mem.buf;
const Chunk* const chunk = GetChunk(dmux, fourcc, chunk_num);
iter->chunk.bytes = mem_buf + chunk->data.offset + CHUNK_HEADER_SIZE;
iter->chunk.size = chunk->data.size - CHUNK_HEADER_SIZE;
iter->num_chunks = count;
iter->chunk_num = chunk_num;
iter->chunk.size = chunk->data.size - CHUNK_HEADER_SIZE;
iter->num_chunks = count;
iter->chunk_num = chunk_num;
return 1;
}
return 0;
}
int WebPDemuxGetChunk(const WebPDemuxer* dmux,
const char fourcc[4], int chunk_num,
WebPChunkIterator* iter) {
int WebPDemuxGetChunk(const WebPDemuxer* dmux, const char fourcc[4],
int chunk_num, WebPChunkIterator* iter) {
if (iter == NULL) return 0;
memset(iter, 0, sizeof(*iter));
@@ -972,6 +968,4 @@ int WebPDemuxPrevChunk(WebPChunkIterator* iter) {
return 0;
}
void WebPDemuxReleaseChunkIterator(WebPChunkIterator* iter) {
(void)iter;
}
void WebPDemuxReleaseChunkIterator(WebPChunkIterator* iter) { (void)iter; }

View File

@@ -20,13 +20,12 @@
// Tables can be faster on some platform but incur some extra binary size (~2k).
#if !defined(USE_TABLES_FOR_ALPHA_MULT)
#define USE_TABLES_FOR_ALPHA_MULT 0 // ALTERNATE_CODE
#define USE_TABLES_FOR_ALPHA_MULT 0 // ALTERNATE_CODE
#endif
// -----------------------------------------------------------------------------
#define MFIX 24 // 24bit fixed-point arithmetic
#define MFIX 24 // 24bit fixed-point arithmetic
#define HALF ((1u << MFIX) >> 1)
#define KINV_255 ((1u << MFIX) / 255u)
@@ -39,95 +38,94 @@ static uint32_t Mult(uint8_t x, uint32_t mult) {
#if (USE_TABLES_FOR_ALPHA_MULT == 1)
static const uint32_t kMultTables[2][256] = {
{ // (255u << MFIX) / alpha
0x00000000, 0xff000000, 0x7f800000, 0x55000000, 0x3fc00000, 0x33000000,
0x2a800000, 0x246db6db, 0x1fe00000, 0x1c555555, 0x19800000, 0x172e8ba2,
0x15400000, 0x139d89d8, 0x1236db6d, 0x11000000, 0x0ff00000, 0x0f000000,
0x0e2aaaaa, 0x0d6bca1a, 0x0cc00000, 0x0c249249, 0x0b9745d1, 0x0b1642c8,
0x0aa00000, 0x0a333333, 0x09cec4ec, 0x0971c71c, 0x091b6db6, 0x08cb08d3,
0x08800000, 0x0839ce73, 0x07f80000, 0x07ba2e8b, 0x07800000, 0x07492492,
0x07155555, 0x06e45306, 0x06b5e50d, 0x0689d89d, 0x06600000, 0x063831f3,
0x06124924, 0x05ee23b8, 0x05cba2e8, 0x05aaaaaa, 0x058b2164, 0x056cefa8,
0x05500000, 0x05343eb1, 0x05199999, 0x05000000, 0x04e76276, 0x04cfb2b7,
0x04b8e38e, 0x04a2e8ba, 0x048db6db, 0x0479435e, 0x04658469, 0x045270d0,
0x04400000, 0x042e29f7, 0x041ce739, 0x040c30c3, 0x03fc0000, 0x03ec4ec4,
0x03dd1745, 0x03ce540f, 0x03c00000, 0x03b21642, 0x03a49249, 0x03976fc6,
0x038aaaaa, 0x037e3f1f, 0x03722983, 0x03666666, 0x035af286, 0x034fcace,
0x0344ec4e, 0x033a5440, 0x03300000, 0x0325ed09, 0x031c18f9, 0x0312818a,
0x03092492, 0x03000000, 0x02f711dc, 0x02ee5846, 0x02e5d174, 0x02dd7baf,
0x02d55555, 0x02cd5cd5, 0x02c590b2, 0x02bdef7b, 0x02b677d4, 0x02af286b,
0x02a80000, 0x02a0fd5c, 0x029a1f58, 0x029364d9, 0x028ccccc, 0x0286562d,
0x02800000, 0x0279c952, 0x0273b13b, 0x026db6db, 0x0267d95b, 0x026217ec,
0x025c71c7, 0x0256e62a, 0x0251745d, 0x024c1bac, 0x0246db6d, 0x0241b2f9,
0x023ca1af, 0x0237a6f4, 0x0232c234, 0x022df2df, 0x02293868, 0x02249249,
0x02200000, 0x021b810e, 0x021714fb, 0x0212bb51, 0x020e739c, 0x020a3d70,
0x02061861, 0x02020408, 0x01fe0000, 0x01fa0be8, 0x01f62762, 0x01f25213,
0x01ee8ba2, 0x01ead3ba, 0x01e72a07, 0x01e38e38, 0x01e00000, 0x01dc7f10,
0x01d90b21, 0x01d5a3e9, 0x01d24924, 0x01cefa8d, 0x01cbb7e3, 0x01c880e5,
0x01c55555, 0x01c234f7, 0x01bf1f8f, 0x01bc14e5, 0x01b914c1, 0x01b61eed,
0x01b33333, 0x01b05160, 0x01ad7943, 0x01aaaaaa, 0x01a7e567, 0x01a5294a,
0x01a27627, 0x019fcbd2, 0x019d2a20, 0x019a90e7, 0x01980000, 0x01957741,
0x0192f684, 0x01907da4, 0x018e0c7c, 0x018ba2e8, 0x018940c5, 0x0186e5f0,
0x01849249, 0x018245ae, 0x01800000, 0x017dc11f, 0x017b88ee, 0x0179574e,
0x01772c23, 0x01750750, 0x0172e8ba, 0x0170d045, 0x016ebdd7, 0x016cb157,
0x016aaaaa, 0x0168a9b9, 0x0166ae6a, 0x0164b8a7, 0x0162c859, 0x0160dd67,
0x015ef7bd, 0x015d1745, 0x015b3bea, 0x01596596, 0x01579435, 0x0155c7b4,
0x01540000, 0x01523d03, 0x01507eae, 0x014ec4ec, 0x014d0fac, 0x014b5edc,
0x0149b26c, 0x01480a4a, 0x01466666, 0x0144c6af, 0x01432b16, 0x0141938b,
0x01400000, 0x013e7063, 0x013ce4a9, 0x013b5cc0, 0x0139d89d, 0x01385830,
0x0136db6d, 0x01356246, 0x0133ecad, 0x01327a97, 0x01310bf6, 0x012fa0be,
0x012e38e3, 0x012cd459, 0x012b7315, 0x012a150a, 0x0128ba2e, 0x01276276,
0x01260dd6, 0x0124bc44, 0x01236db6, 0x01222222, 0x0120d97c, 0x011f93bc,
0x011e50d7, 0x011d10c4, 0x011bd37a, 0x011a98ef, 0x0119611a, 0x01182bf2,
0x0116f96f, 0x0115c988, 0x01149c34, 0x0113716a, 0x01124924, 0x01112358,
0x01100000, 0x010edf12, 0x010dc087, 0x010ca458, 0x010b8a7d, 0x010a72f0,
0x01095da8, 0x01084a9f, 0x010739ce, 0x01062b2e, 0x01051eb8, 0x01041465,
0x01030c30, 0x01020612, 0x01010204, 0x01000000 },
{ // alpha * KINV_255
0x00000000, 0x00010101, 0x00020202, 0x00030303, 0x00040404, 0x00050505,
0x00060606, 0x00070707, 0x00080808, 0x00090909, 0x000a0a0a, 0x000b0b0b,
0x000c0c0c, 0x000d0d0d, 0x000e0e0e, 0x000f0f0f, 0x00101010, 0x00111111,
0x00121212, 0x00131313, 0x00141414, 0x00151515, 0x00161616, 0x00171717,
0x00181818, 0x00191919, 0x001a1a1a, 0x001b1b1b, 0x001c1c1c, 0x001d1d1d,
0x001e1e1e, 0x001f1f1f, 0x00202020, 0x00212121, 0x00222222, 0x00232323,
0x00242424, 0x00252525, 0x00262626, 0x00272727, 0x00282828, 0x00292929,
0x002a2a2a, 0x002b2b2b, 0x002c2c2c, 0x002d2d2d, 0x002e2e2e, 0x002f2f2f,
0x00303030, 0x00313131, 0x00323232, 0x00333333, 0x00343434, 0x00353535,
0x00363636, 0x00373737, 0x00383838, 0x00393939, 0x003a3a3a, 0x003b3b3b,
0x003c3c3c, 0x003d3d3d, 0x003e3e3e, 0x003f3f3f, 0x00404040, 0x00414141,
0x00424242, 0x00434343, 0x00444444, 0x00454545, 0x00464646, 0x00474747,
0x00484848, 0x00494949, 0x004a4a4a, 0x004b4b4b, 0x004c4c4c, 0x004d4d4d,
0x004e4e4e, 0x004f4f4f, 0x00505050, 0x00515151, 0x00525252, 0x00535353,
0x00545454, 0x00555555, 0x00565656, 0x00575757, 0x00585858, 0x00595959,
0x005a5a5a, 0x005b5b5b, 0x005c5c5c, 0x005d5d5d, 0x005e5e5e, 0x005f5f5f,
0x00606060, 0x00616161, 0x00626262, 0x00636363, 0x00646464, 0x00656565,
0x00666666, 0x00676767, 0x00686868, 0x00696969, 0x006a6a6a, 0x006b6b6b,
0x006c6c6c, 0x006d6d6d, 0x006e6e6e, 0x006f6f6f, 0x00707070, 0x00717171,
0x00727272, 0x00737373, 0x00747474, 0x00757575, 0x00767676, 0x00777777,
0x00787878, 0x00797979, 0x007a7a7a, 0x007b7b7b, 0x007c7c7c, 0x007d7d7d,
0x007e7e7e, 0x007f7f7f, 0x00808080, 0x00818181, 0x00828282, 0x00838383,
0x00848484, 0x00858585, 0x00868686, 0x00878787, 0x00888888, 0x00898989,
0x008a8a8a, 0x008b8b8b, 0x008c8c8c, 0x008d8d8d, 0x008e8e8e, 0x008f8f8f,
0x00909090, 0x00919191, 0x00929292, 0x00939393, 0x00949494, 0x00959595,
0x00969696, 0x00979797, 0x00989898, 0x00999999, 0x009a9a9a, 0x009b9b9b,
0x009c9c9c, 0x009d9d9d, 0x009e9e9e, 0x009f9f9f, 0x00a0a0a0, 0x00a1a1a1,
0x00a2a2a2, 0x00a3a3a3, 0x00a4a4a4, 0x00a5a5a5, 0x00a6a6a6, 0x00a7a7a7,
0x00a8a8a8, 0x00a9a9a9, 0x00aaaaaa, 0x00ababab, 0x00acacac, 0x00adadad,
0x00aeaeae, 0x00afafaf, 0x00b0b0b0, 0x00b1b1b1, 0x00b2b2b2, 0x00b3b3b3,
0x00b4b4b4, 0x00b5b5b5, 0x00b6b6b6, 0x00b7b7b7, 0x00b8b8b8, 0x00b9b9b9,
0x00bababa, 0x00bbbbbb, 0x00bcbcbc, 0x00bdbdbd, 0x00bebebe, 0x00bfbfbf,
0x00c0c0c0, 0x00c1c1c1, 0x00c2c2c2, 0x00c3c3c3, 0x00c4c4c4, 0x00c5c5c5,
0x00c6c6c6, 0x00c7c7c7, 0x00c8c8c8, 0x00c9c9c9, 0x00cacaca, 0x00cbcbcb,
0x00cccccc, 0x00cdcdcd, 0x00cecece, 0x00cfcfcf, 0x00d0d0d0, 0x00d1d1d1,
0x00d2d2d2, 0x00d3d3d3, 0x00d4d4d4, 0x00d5d5d5, 0x00d6d6d6, 0x00d7d7d7,
0x00d8d8d8, 0x00d9d9d9, 0x00dadada, 0x00dbdbdb, 0x00dcdcdc, 0x00dddddd,
0x00dedede, 0x00dfdfdf, 0x00e0e0e0, 0x00e1e1e1, 0x00e2e2e2, 0x00e3e3e3,
0x00e4e4e4, 0x00e5e5e5, 0x00e6e6e6, 0x00e7e7e7, 0x00e8e8e8, 0x00e9e9e9,
0x00eaeaea, 0x00ebebeb, 0x00ececec, 0x00ededed, 0x00eeeeee, 0x00efefef,
0x00f0f0f0, 0x00f1f1f1, 0x00f2f2f2, 0x00f3f3f3, 0x00f4f4f4, 0x00f5f5f5,
0x00f6f6f6, 0x00f7f7f7, 0x00f8f8f8, 0x00f9f9f9, 0x00fafafa, 0x00fbfbfb,
0x00fcfcfc, 0x00fdfdfd, 0x00fefefe, 0x00ffffff }
};
// (255u << MFIX) / alpha
{0x00000000, 0xff000000, 0x7f800000, 0x55000000, 0x3fc00000, 0x33000000,
0x2a800000, 0x246db6db, 0x1fe00000, 0x1c555555, 0x19800000, 0x172e8ba2,
0x15400000, 0x139d89d8, 0x1236db6d, 0x11000000, 0x0ff00000, 0x0f000000,
0x0e2aaaaa, 0x0d6bca1a, 0x0cc00000, 0x0c249249, 0x0b9745d1, 0x0b1642c8,
0x0aa00000, 0x0a333333, 0x09cec4ec, 0x0971c71c, 0x091b6db6, 0x08cb08d3,
0x08800000, 0x0839ce73, 0x07f80000, 0x07ba2e8b, 0x07800000, 0x07492492,
0x07155555, 0x06e45306, 0x06b5e50d, 0x0689d89d, 0x06600000, 0x063831f3,
0x06124924, 0x05ee23b8, 0x05cba2e8, 0x05aaaaaa, 0x058b2164, 0x056cefa8,
0x05500000, 0x05343eb1, 0x05199999, 0x05000000, 0x04e76276, 0x04cfb2b7,
0x04b8e38e, 0x04a2e8ba, 0x048db6db, 0x0479435e, 0x04658469, 0x045270d0,
0x04400000, 0x042e29f7, 0x041ce739, 0x040c30c3, 0x03fc0000, 0x03ec4ec4,
0x03dd1745, 0x03ce540f, 0x03c00000, 0x03b21642, 0x03a49249, 0x03976fc6,
0x038aaaaa, 0x037e3f1f, 0x03722983, 0x03666666, 0x035af286, 0x034fcace,
0x0344ec4e, 0x033a5440, 0x03300000, 0x0325ed09, 0x031c18f9, 0x0312818a,
0x03092492, 0x03000000, 0x02f711dc, 0x02ee5846, 0x02e5d174, 0x02dd7baf,
0x02d55555, 0x02cd5cd5, 0x02c590b2, 0x02bdef7b, 0x02b677d4, 0x02af286b,
0x02a80000, 0x02a0fd5c, 0x029a1f58, 0x029364d9, 0x028ccccc, 0x0286562d,
0x02800000, 0x0279c952, 0x0273b13b, 0x026db6db, 0x0267d95b, 0x026217ec,
0x025c71c7, 0x0256e62a, 0x0251745d, 0x024c1bac, 0x0246db6d, 0x0241b2f9,
0x023ca1af, 0x0237a6f4, 0x0232c234, 0x022df2df, 0x02293868, 0x02249249,
0x02200000, 0x021b810e, 0x021714fb, 0x0212bb51, 0x020e739c, 0x020a3d70,
0x02061861, 0x02020408, 0x01fe0000, 0x01fa0be8, 0x01f62762, 0x01f25213,
0x01ee8ba2, 0x01ead3ba, 0x01e72a07, 0x01e38e38, 0x01e00000, 0x01dc7f10,
0x01d90b21, 0x01d5a3e9, 0x01d24924, 0x01cefa8d, 0x01cbb7e3, 0x01c880e5,
0x01c55555, 0x01c234f7, 0x01bf1f8f, 0x01bc14e5, 0x01b914c1, 0x01b61eed,
0x01b33333, 0x01b05160, 0x01ad7943, 0x01aaaaaa, 0x01a7e567, 0x01a5294a,
0x01a27627, 0x019fcbd2, 0x019d2a20, 0x019a90e7, 0x01980000, 0x01957741,
0x0192f684, 0x01907da4, 0x018e0c7c, 0x018ba2e8, 0x018940c5, 0x0186e5f0,
0x01849249, 0x018245ae, 0x01800000, 0x017dc11f, 0x017b88ee, 0x0179574e,
0x01772c23, 0x01750750, 0x0172e8ba, 0x0170d045, 0x016ebdd7, 0x016cb157,
0x016aaaaa, 0x0168a9b9, 0x0166ae6a, 0x0164b8a7, 0x0162c859, 0x0160dd67,
0x015ef7bd, 0x015d1745, 0x015b3bea, 0x01596596, 0x01579435, 0x0155c7b4,
0x01540000, 0x01523d03, 0x01507eae, 0x014ec4ec, 0x014d0fac, 0x014b5edc,
0x0149b26c, 0x01480a4a, 0x01466666, 0x0144c6af, 0x01432b16, 0x0141938b,
0x01400000, 0x013e7063, 0x013ce4a9, 0x013b5cc0, 0x0139d89d, 0x01385830,
0x0136db6d, 0x01356246, 0x0133ecad, 0x01327a97, 0x01310bf6, 0x012fa0be,
0x012e38e3, 0x012cd459, 0x012b7315, 0x012a150a, 0x0128ba2e, 0x01276276,
0x01260dd6, 0x0124bc44, 0x01236db6, 0x01222222, 0x0120d97c, 0x011f93bc,
0x011e50d7, 0x011d10c4, 0x011bd37a, 0x011a98ef, 0x0119611a, 0x01182bf2,
0x0116f96f, 0x0115c988, 0x01149c34, 0x0113716a, 0x01124924, 0x01112358,
0x01100000, 0x010edf12, 0x010dc087, 0x010ca458, 0x010b8a7d, 0x010a72f0,
0x01095da8, 0x01084a9f, 0x010739ce, 0x01062b2e, 0x01051eb8, 0x01041465,
0x01030c30, 0x01020612, 0x01010204, 0x01000000},
// alpha * KINV_255
{0x00000000, 0x00010101, 0x00020202, 0x00030303, 0x00040404, 0x00050505,
0x00060606, 0x00070707, 0x00080808, 0x00090909, 0x000a0a0a, 0x000b0b0b,
0x000c0c0c, 0x000d0d0d, 0x000e0e0e, 0x000f0f0f, 0x00101010, 0x00111111,
0x00121212, 0x00131313, 0x00141414, 0x00151515, 0x00161616, 0x00171717,
0x00181818, 0x00191919, 0x001a1a1a, 0x001b1b1b, 0x001c1c1c, 0x001d1d1d,
0x001e1e1e, 0x001f1f1f, 0x00202020, 0x00212121, 0x00222222, 0x00232323,
0x00242424, 0x00252525, 0x00262626, 0x00272727, 0x00282828, 0x00292929,
0x002a2a2a, 0x002b2b2b, 0x002c2c2c, 0x002d2d2d, 0x002e2e2e, 0x002f2f2f,
0x00303030, 0x00313131, 0x00323232, 0x00333333, 0x00343434, 0x00353535,
0x00363636, 0x00373737, 0x00383838, 0x00393939, 0x003a3a3a, 0x003b3b3b,
0x003c3c3c, 0x003d3d3d, 0x003e3e3e, 0x003f3f3f, 0x00404040, 0x00414141,
0x00424242, 0x00434343, 0x00444444, 0x00454545, 0x00464646, 0x00474747,
0x00484848, 0x00494949, 0x004a4a4a, 0x004b4b4b, 0x004c4c4c, 0x004d4d4d,
0x004e4e4e, 0x004f4f4f, 0x00505050, 0x00515151, 0x00525252, 0x00535353,
0x00545454, 0x00555555, 0x00565656, 0x00575757, 0x00585858, 0x00595959,
0x005a5a5a, 0x005b5b5b, 0x005c5c5c, 0x005d5d5d, 0x005e5e5e, 0x005f5f5f,
0x00606060, 0x00616161, 0x00626262, 0x00636363, 0x00646464, 0x00656565,
0x00666666, 0x00676767, 0x00686868, 0x00696969, 0x006a6a6a, 0x006b6b6b,
0x006c6c6c, 0x006d6d6d, 0x006e6e6e, 0x006f6f6f, 0x00707070, 0x00717171,
0x00727272, 0x00737373, 0x00747474, 0x00757575, 0x00767676, 0x00777777,
0x00787878, 0x00797979, 0x007a7a7a, 0x007b7b7b, 0x007c7c7c, 0x007d7d7d,
0x007e7e7e, 0x007f7f7f, 0x00808080, 0x00818181, 0x00828282, 0x00838383,
0x00848484, 0x00858585, 0x00868686, 0x00878787, 0x00888888, 0x00898989,
0x008a8a8a, 0x008b8b8b, 0x008c8c8c, 0x008d8d8d, 0x008e8e8e, 0x008f8f8f,
0x00909090, 0x00919191, 0x00929292, 0x00939393, 0x00949494, 0x00959595,
0x00969696, 0x00979797, 0x00989898, 0x00999999, 0x009a9a9a, 0x009b9b9b,
0x009c9c9c, 0x009d9d9d, 0x009e9e9e, 0x009f9f9f, 0x00a0a0a0, 0x00a1a1a1,
0x00a2a2a2, 0x00a3a3a3, 0x00a4a4a4, 0x00a5a5a5, 0x00a6a6a6, 0x00a7a7a7,
0x00a8a8a8, 0x00a9a9a9, 0x00aaaaaa, 0x00ababab, 0x00acacac, 0x00adadad,
0x00aeaeae, 0x00afafaf, 0x00b0b0b0, 0x00b1b1b1, 0x00b2b2b2, 0x00b3b3b3,
0x00b4b4b4, 0x00b5b5b5, 0x00b6b6b6, 0x00b7b7b7, 0x00b8b8b8, 0x00b9b9b9,
0x00bababa, 0x00bbbbbb, 0x00bcbcbc, 0x00bdbdbd, 0x00bebebe, 0x00bfbfbf,
0x00c0c0c0, 0x00c1c1c1, 0x00c2c2c2, 0x00c3c3c3, 0x00c4c4c4, 0x00c5c5c5,
0x00c6c6c6, 0x00c7c7c7, 0x00c8c8c8, 0x00c9c9c9, 0x00cacaca, 0x00cbcbcb,
0x00cccccc, 0x00cdcdcd, 0x00cecece, 0x00cfcfcf, 0x00d0d0d0, 0x00d1d1d1,
0x00d2d2d2, 0x00d3d3d3, 0x00d4d4d4, 0x00d5d5d5, 0x00d6d6d6, 0x00d7d7d7,
0x00d8d8d8, 0x00d9d9d9, 0x00dadada, 0x00dbdbdb, 0x00dcdcdc, 0x00dddddd,
0x00dedede, 0x00dfdfdf, 0x00e0e0e0, 0x00e1e1e1, 0x00e2e2e2, 0x00e3e3e3,
0x00e4e4e4, 0x00e5e5e5, 0x00e6e6e6, 0x00e7e7e7, 0x00e8e8e8, 0x00e9e9e9,
0x00eaeaea, 0x00ebebeb, 0x00ececec, 0x00ededed, 0x00eeeeee, 0x00efefef,
0x00f0f0f0, 0x00f1f1f1, 0x00f2f2f2, 0x00f3f3f3, 0x00f4f4f4, 0x00f5f5f5,
0x00f6f6f6, 0x00f7f7f7, 0x00f8f8f8, 0x00f9f9f9, 0x00fafafa, 0x00fbfbfb,
0x00fcfcfc, 0x00fdfdfd, 0x00fefefe, 0x00ffffff}};
static WEBP_INLINE uint32_t GetScale(uint32_t a, int inverse) {
return kMultTables[!inverse][a];
@@ -145,15 +143,15 @@ void WebPMultARGBRow_C(uint32_t* const ptr, int width, int inverse) {
int x;
for (x = 0; x < width; ++x) {
const uint32_t argb = ptr[x];
if (argb < 0xff000000u) { // alpha < 255
if (argb <= 0x00ffffffu) { // alpha == 0
if (argb < 0xff000000u) { // alpha < 255
if (argb <= 0x00ffffffu) { // alpha == 0
ptr[x] = 0;
} else {
const uint32_t alpha = (argb >> 24) & 0xff;
const uint32_t scale = GetScale(alpha, inverse);
uint32_t out = argb & 0xff000000u;
out |= Mult(argb >> 0, scale) << 0;
out |= Mult(argb >> 8, scale) << 8;
out |= Mult(argb >> 0, scale) << 0;
out |= Mult(argb >> 8, scale) << 8;
out |= Mult(argb >> 16, scale) << 16;
ptr[x] = out;
}
@@ -162,8 +160,8 @@ void WebPMultARGBRow_C(uint32_t* const ptr, int width, int inverse) {
}
void WebPMultRow_C(uint8_t* WEBP_RESTRICT const ptr,
const uint8_t* WEBP_RESTRICT const alpha,
int width, int inverse) {
const uint8_t* WEBP_RESTRICT const alpha, int width,
int inverse) {
int x;
for (x = 0; x < width; ++x) {
const uint32_t a = alpha[x];
@@ -184,8 +182,8 @@ void WebPMultRow_C(uint8_t* WEBP_RESTRICT const ptr,
void (*WebPMultARGBRow)(uint32_t* const ptr, int width, int inverse);
void (*WebPMultRow)(uint8_t* WEBP_RESTRICT const ptr,
const uint8_t* WEBP_RESTRICT const alpha,
int width, int inverse);
const uint8_t* WEBP_RESTRICT const alpha, int width,
int inverse);
//------------------------------------------------------------------------------
// Generic per-plane calls
@@ -218,17 +216,17 @@ void WebPMultRows(uint8_t* WEBP_RESTRICT ptr, int stride,
// (x * a * 32897) >> 23 is bit-wise equivalent to (int)(x * a / 255.)
// for all 8bit x or a. For bit-wise equivalence to (int)(x * a / 255. + .5),
// one can use instead: (x * a * 65793 + (1 << 23)) >> 24
#if 1 // (int)(x * a / 255.)
#define MULTIPLIER(a) ((a) * 32897U)
#if 1 // (int)(x * a / 255.)
#define MULTIPLIER(a) ((a) * 32897U)
#define PREMULTIPLY(x, m) (((x) * (m)) >> 23)
#else // (int)(x * a / 255. + .5)
#else // (int)(x * a / 255. + .5)
#define MULTIPLIER(a) ((a) * 65793U)
#define PREMULTIPLY(x, m) (((x) * (m) + (1U << 23)) >> 24)
#endif
#if !WEBP_NEON_OMIT_C_CODE
static void ApplyAlphaMultiply_C(uint8_t* rgba, int alpha_first,
int w, int h, int stride) {
static void ApplyAlphaMultiply_C(uint8_t* rgba, int alpha_first, int w, int h,
int stride) {
while (h-- > 0) {
uint8_t* const rgb = rgba + (alpha_first ? 1 : 0);
const uint8_t* const alpha = rgba + (alpha_first ? 0 : 3);
@@ -251,7 +249,7 @@ static void ApplyAlphaMultiply_C(uint8_t* rgba, int alpha_first,
// rgbA4444
#define MULTIPLIER(a) ((a) * 0x1111) // 0x1111 ~= (1 << 16) / 15
#define MULTIPLIER(a) ((a) * 0x1111) // 0x1111 ~= (1 << 16) / 15
static WEBP_INLINE uint8_t dither_hi(uint8_t x) {
return (x & 0xf0) | (x >> 4);
@@ -265,8 +263,8 @@ static WEBP_INLINE uint8_t multiply(uint8_t x, uint32_t m) {
return (x * m) >> 16;
}
static WEBP_INLINE void ApplyAlphaMultiply4444_C(uint8_t* rgba4444,
int w, int h, int stride,
static WEBP_INLINE void ApplyAlphaMultiply4444_C(uint8_t* rgba4444, int w,
int h, int stride,
int rg_byte_pos /* 0 or 1 */) {
while (h-- > 0) {
int i;
@@ -286,8 +284,8 @@ static WEBP_INLINE void ApplyAlphaMultiply4444_C(uint8_t* rgba4444,
}
#undef MULTIPLIER
static void ApplyAlphaMultiply_16b_C(uint8_t* rgba4444,
int w, int h, int stride) {
static void ApplyAlphaMultiply_16b_C(uint8_t* rgba4444, int w, int h,
int stride) {
#if (WEBP_SWAP_16BIT_CSP == 1)
ApplyAlphaMultiply4444_C(rgba4444, w, h, stride, 1);
#else
@@ -297,8 +295,8 @@ static void ApplyAlphaMultiply_16b_C(uint8_t* rgba4444,
#if !WEBP_NEON_OMIT_C_CODE
static int DispatchAlpha_C(const uint8_t* WEBP_RESTRICT alpha, int alpha_stride,
int width, int height,
uint8_t* WEBP_RESTRICT dst, int dst_stride) {
int width, int height, uint8_t* WEBP_RESTRICT dst,
int dst_stride) {
uint32_t alpha_mask = 0xff;
int i, j;
@@ -330,8 +328,8 @@ static void DispatchAlphaToGreen_C(const uint8_t* WEBP_RESTRICT alpha,
}
static int ExtractAlpha_C(const uint8_t* WEBP_RESTRICT argb, int argb_stride,
int width, int height,
uint8_t* WEBP_RESTRICT alpha, int alpha_stride) {
int width, int height, uint8_t* WEBP_RESTRICT alpha,
int alpha_stride) {
uint8_t alpha_mask = 0xff;
int i, j;
@@ -357,19 +355,22 @@ static void ExtractGreen_C(const uint32_t* WEBP_RESTRICT argb,
//------------------------------------------------------------------------------
static int HasAlpha8b_C(const uint8_t* src, int length) {
while (length-- > 0) if (*src++ != 0xff) return 1;
while (length-- > 0)
if (*src++ != 0xff) return 1;
return 0;
}
static int HasAlpha32b_C(const uint8_t* src, int length) {
int x;
for (x = 0; length-- > 0; x += 4) if (src[x] != 0xff) return 1;
for (x = 0; length-- > 0; x += 4)
if (src[x] != 0xff) return 1;
return 0;
}
static void AlphaReplace_C(uint32_t* src, int length, uint32_t color) {
int x;
for (x = 0; x < length; ++x) if ((src[x] >> 24) == 0) src[x] = color;
for (x = 0; x < length; ++x)
if ((src[x] >> 24) == 0) src[x] = color;
}
//------------------------------------------------------------------------------
@@ -383,8 +384,8 @@ static WEBP_INLINE uint32_t MakeARGB32(int a, int r, int g, int b) {
static void PackARGB_C(const uint8_t* WEBP_RESTRICT a,
const uint8_t* WEBP_RESTRICT r,
const uint8_t* WEBP_RESTRICT g,
const uint8_t* WEBP_RESTRICT b,
int len, uint32_t* WEBP_RESTRICT out) {
const uint8_t* WEBP_RESTRICT b, int len,
uint32_t* WEBP_RESTRICT out) {
int i;
for (i = 0; i < len; ++i) {
out[i] = MakeARGB32(a[4 * i], r[4 * i], g[4 * i], b[4 * i]);
@@ -394,8 +395,8 @@ static void PackARGB_C(const uint8_t* WEBP_RESTRICT a,
static void PackRGB_C(const uint8_t* WEBP_RESTRICT r,
const uint8_t* WEBP_RESTRICT g,
const uint8_t* WEBP_RESTRICT b,
int len, int step, uint32_t* WEBP_RESTRICT out) {
const uint8_t* WEBP_RESTRICT b, int len, int step,
uint32_t* WEBP_RESTRICT out) {
int i, offset = 0;
for (i = 0; i < len; ++i) {
out[i] = MakeARGB32(0xff, r[offset], g[offset], b[offset]);
@@ -419,8 +420,8 @@ void (*WebPPackARGB)(const uint8_t* a, const uint8_t* r, const uint8_t* g,
#endif
void (*WebPPackRGB)(const uint8_t* WEBP_RESTRICT r,
const uint8_t* WEBP_RESTRICT g,
const uint8_t* WEBP_RESTRICT b,
int len, int step, uint32_t* WEBP_RESTRICT out);
const uint8_t* WEBP_RESTRICT b, int len, int step,
uint32_t* WEBP_RESTRICT out);
int (*WebPHasAlpha8b)(const uint8_t* src, int length);
int (*WebPHasAlpha32b)(const uint8_t* src, int length);

View File

@@ -17,8 +17,8 @@
#if defined(WEBP_USE_MIPS_DSP_R2)
static int DispatchAlpha_MIPSdspR2(const uint8_t* alpha, int alpha_stride,
int width, int height,
uint8_t* dst, int dst_stride) {
int width, int height, uint8_t* dst,
int dst_stride) {
uint32_t alpha_mask = 0xffffffff;
int i, j, temp0;
@@ -28,97 +28,92 @@ static int DispatchAlpha_MIPSdspR2(const uint8_t* alpha, int alpha_stride,
for (i = 0; i < (width >> 2); ++i) {
int temp1, temp2, temp3;
__asm__ volatile (
"ulw %[temp0], 0(%[palpha]) \n\t"
"addiu %[palpha], %[palpha], 4 \n\t"
"addiu %[pdst], %[pdst], 16 \n\t"
"srl %[temp1], %[temp0], 8 \n\t"
"srl %[temp2], %[temp0], 16 \n\t"
"srl %[temp3], %[temp0], 24 \n\t"
"and %[alpha_mask], %[alpha_mask], %[temp0] \n\t"
"sb %[temp0], -16(%[pdst]) \n\t"
"sb %[temp1], -12(%[pdst]) \n\t"
"sb %[temp2], -8(%[pdst]) \n\t"
"sb %[temp3], -4(%[pdst]) \n\t"
: [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
[temp3]"=&r"(temp3), [palpha]"+r"(palpha), [pdst]"+r"(pdst),
[alpha_mask]"+r"(alpha_mask)
:
: "memory"
);
__asm__ volatile(
"ulw %[temp0], 0(%[palpha]) \n\t"
"addiu %[palpha], %[palpha], 4 \n\t"
"addiu %[pdst], %[pdst], 16 \n\t"
"srl %[temp1], %[temp0], 8 \n\t"
"srl %[temp2], %[temp0], 16 \n\t"
"srl %[temp3], %[temp0], 24 \n\t"
"and %[alpha_mask], %[alpha_mask], %[temp0] \n\t"
"sb %[temp0], -16(%[pdst]) \n\t"
"sb %[temp1], -12(%[pdst]) \n\t"
"sb %[temp2], -8(%[pdst]) \n\t"
"sb %[temp3], -4(%[pdst]) \n\t"
: [temp0] "=&r"(temp0), [temp1] "=&r"(temp1), [temp2] "=&r"(temp2),
[temp3] "=&r"(temp3), [palpha] "+r"(palpha), [pdst] "+r"(pdst),
[alpha_mask] "+r"(alpha_mask)
:
: "memory");
}
for (i = 0; i < (width & 3); ++i) {
__asm__ volatile (
"lbu %[temp0], 0(%[palpha]) \n\t"
"addiu %[palpha], %[palpha], 1 \n\t"
"sb %[temp0], 0(%[pdst]) \n\t"
"and %[alpha_mask], %[alpha_mask], %[temp0] \n\t"
"addiu %[pdst], %[pdst], 4 \n\t"
: [temp0]"=&r"(temp0), [palpha]"+r"(palpha), [pdst]"+r"(pdst),
[alpha_mask]"+r"(alpha_mask)
:
: "memory"
);
__asm__ volatile(
"lbu %[temp0], 0(%[palpha]) \n\t"
"addiu %[palpha], %[palpha], 1 \n\t"
"sb %[temp0], 0(%[pdst]) \n\t"
"and %[alpha_mask], %[alpha_mask], %[temp0] \n\t"
"addiu %[pdst], %[pdst], 4 \n\t"
: [temp0] "=&r"(temp0), [palpha] "+r"(palpha), [pdst] "+r"(pdst),
[alpha_mask] "+r"(alpha_mask)
:
: "memory");
}
alpha += alpha_stride;
dst += dst_stride;
}
__asm__ volatile (
"ext %[temp0], %[alpha_mask], 0, 16 \n\t"
"srl %[alpha_mask], %[alpha_mask], 16 \n\t"
"and %[alpha_mask], %[alpha_mask], %[temp0] \n\t"
"ext %[temp0], %[alpha_mask], 0, 8 \n\t"
"srl %[alpha_mask], %[alpha_mask], 8 \n\t"
"and %[alpha_mask], %[alpha_mask], %[temp0] \n\t"
: [temp0]"=&r"(temp0), [alpha_mask]"+r"(alpha_mask)
:
);
__asm__ volatile(
"ext %[temp0], %[alpha_mask], 0, 16 \n\t"
"srl %[alpha_mask], %[alpha_mask], 16 \n\t"
"and %[alpha_mask], %[alpha_mask], %[temp0] \n\t"
"ext %[temp0], %[alpha_mask], 0, 8 \n\t"
"srl %[alpha_mask], %[alpha_mask], 8 \n\t"
"and %[alpha_mask], %[alpha_mask], %[temp0] \n\t"
: [temp0] "=&r"(temp0), [alpha_mask] "+r"(alpha_mask)
:);
return (alpha_mask != 0xff);
}
static void MultARGBRow_MIPSdspR2(uint32_t* const ptr, int width,
int inverse) {
static void MultARGBRow_MIPSdspR2(uint32_t* const ptr, int width, int inverse) {
int x;
const uint32_t c_00ffffff = 0x00ffffffu;
const uint32_t c_ff000000 = 0xff000000u;
const uint32_t c_8000000 = 0x00800000u;
const uint32_t c_8000080 = 0x00800080u;
const uint32_t c_8000000 = 0x00800000u;
const uint32_t c_8000080 = 0x00800080u;
for (x = 0; x < width; ++x) {
const uint32_t argb = ptr[x];
if (argb < 0xff000000u) { // alpha < 255
if (argb <= 0x00ffffffu) { // alpha == 0
if (argb < 0xff000000u) { // alpha < 255
if (argb <= 0x00ffffffu) { // alpha == 0
ptr[x] = 0;
} else {
int temp0, temp1, temp2, temp3, alpha;
__asm__ volatile (
"srl %[alpha], %[argb], 24 \n\t"
"replv.qb %[temp0], %[alpha] \n\t"
"and %[temp0], %[temp0], %[c_00ffffff] \n\t"
"beqz %[inverse], 0f \n\t"
"divu $zero, %[c_ff000000], %[alpha] \n\t"
"mflo %[temp0] \n\t"
"0: \n\t"
"andi %[temp1], %[argb], 0xff \n\t"
"ext %[temp2], %[argb], 8, 8 \n\t"
"ext %[temp3], %[argb], 16, 8 \n\t"
"mul %[temp1], %[temp1], %[temp0] \n\t"
"mul %[temp2], %[temp2], %[temp0] \n\t"
"mul %[temp3], %[temp3], %[temp0] \n\t"
"precrq.ph.w %[temp1], %[temp2], %[temp1] \n\t"
"addu %[temp3], %[temp3], %[c_8000000] \n\t"
"addu %[temp1], %[temp1], %[c_8000080] \n\t"
"precrq.ph.w %[temp3], %[argb], %[temp3] \n\t"
"precrq.qb.ph %[temp1], %[temp3], %[temp1] \n\t"
: [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
[temp3]"=&r"(temp3), [alpha]"=&r"(alpha)
: [inverse]"r"(inverse), [c_00ffffff]"r"(c_00ffffff),
[c_8000000]"r"(c_8000000), [c_8000080]"r"(c_8000080),
[c_ff000000]"r"(c_ff000000), [argb]"r"(argb)
: "memory", "hi", "lo"
);
__asm__ volatile(
"srl %[alpha], %[argb], 24 \n\t"
"replv.qb %[temp0], %[alpha] \n\t"
"and %[temp0], %[temp0], %[c_00ffffff] \n\t"
"beqz %[inverse], 0f \n\t"
"divu $zero, %[c_ff000000], %[alpha] \n\t"
"mflo %[temp0] \n\t"
"0: \n\t"
"andi %[temp1], %[argb], 0xff \n\t"
"ext %[temp2], %[argb], 8, 8 \n\t"
"ext %[temp3], %[argb], 16, 8 \n\t"
"mul %[temp1], %[temp1], %[temp0] \n\t"
"mul %[temp2], %[temp2], %[temp0] \n\t"
"mul %[temp3], %[temp3], %[temp0] \n\t"
"precrq.ph.w %[temp1], %[temp2], %[temp1] \n\t"
"addu %[temp3], %[temp3], %[c_8000000] \n\t"
"addu %[temp1], %[temp1], %[c_8000080] \n\t"
"precrq.ph.w %[temp3], %[argb], %[temp3] \n\t"
"precrq.qb.ph %[temp1], %[temp3], %[temp1] \n\t"
: [temp0] "=&r"(temp0), [temp1] "=&r"(temp1), [temp2] "=&r"(temp2),
[temp3] "=&r"(temp3), [alpha] "=&r"(alpha)
: [inverse] "r"(inverse), [c_00ffffff] "r"(c_00ffffff),
[c_8000000] "r"(c_8000000), [c_8000080] "r"(c_8000080),
[c_ff000000] "r"(c_ff000000), [argb] "r"(argb)
: "memory", "hi", "lo");
ptr[x] = temp1;
}
}
@@ -133,38 +128,37 @@ static void PackARGB_MIPSdspR2(const uint8_t* a, const uint8_t* r,
const int rest = len & 1;
const uint32_t* const loop_end = out + len - rest;
const int step = 4;
__asm__ volatile (
"xor %[offset], %[offset], %[offset] \n\t"
"beq %[loop_end], %[out], 0f \n\t"
"2: \n\t"
"lbux %[temp0], %[offset](%[a]) \n\t"
"lbux %[temp1], %[offset](%[r]) \n\t"
"lbux %[temp2], %[offset](%[g]) \n\t"
"lbux %[temp3], %[offset](%[b]) \n\t"
"ins %[temp1], %[temp0], 16, 16 \n\t"
"ins %[temp3], %[temp2], 16, 16 \n\t"
"addiu %[out], %[out], 4 \n\t"
"precr.qb.ph %[temp0], %[temp1], %[temp3] \n\t"
"sw %[temp0], -4(%[out]) \n\t"
"addu %[offset], %[offset], %[step] \n\t"
"bne %[loop_end], %[out], 2b \n\t"
"0: \n\t"
"beq %[rest], $zero, 1f \n\t"
"lbux %[temp0], %[offset](%[a]) \n\t"
"lbux %[temp1], %[offset](%[r]) \n\t"
"lbux %[temp2], %[offset](%[g]) \n\t"
"lbux %[temp3], %[offset](%[b]) \n\t"
"ins %[temp1], %[temp0], 16, 16 \n\t"
"ins %[temp3], %[temp2], 16, 16 \n\t"
"precr.qb.ph %[temp0], %[temp1], %[temp3] \n\t"
"sw %[temp0], 0(%[out]) \n\t"
"1: \n\t"
: [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
[temp3]"=&r"(temp3), [offset]"=&r"(offset), [out]"+&r"(out)
: [a]"r"(a), [r]"r"(r), [g]"r"(g), [b]"r"(b), [step]"r"(step),
[loop_end]"r"(loop_end), [rest]"r"(rest)
: "memory"
);
__asm__ volatile(
"xor %[offset], %[offset], %[offset] \n\t"
"beq %[loop_end], %[out], 0f \n\t"
"2: \n\t"
"lbux %[temp0], %[offset](%[a]) \n\t"
"lbux %[temp1], %[offset](%[r]) \n\t"
"lbux %[temp2], %[offset](%[g]) \n\t"
"lbux %[temp3], %[offset](%[b]) \n\t"
"ins %[temp1], %[temp0], 16, 16 \n\t"
"ins %[temp3], %[temp2], 16, 16 \n\t"
"addiu %[out], %[out], 4 \n\t"
"precr.qb.ph %[temp0], %[temp1], %[temp3] \n\t"
"sw %[temp0], -4(%[out]) \n\t"
"addu %[offset], %[offset], %[step] \n\t"
"bne %[loop_end], %[out], 2b \n\t"
"0: \n\t"
"beq %[rest], $zero, 1f \n\t"
"lbux %[temp0], %[offset](%[a]) \n\t"
"lbux %[temp1], %[offset](%[r]) \n\t"
"lbux %[temp2], %[offset](%[g]) \n\t"
"lbux %[temp3], %[offset](%[b]) \n\t"
"ins %[temp1], %[temp0], 16, 16 \n\t"
"ins %[temp3], %[temp2], 16, 16 \n\t"
"precr.qb.ph %[temp0], %[temp1], %[temp3] \n\t"
"sw %[temp0], 0(%[out]) \n\t"
"1: \n\t"
: [temp0] "=&r"(temp0), [temp1] "=&r"(temp1), [temp2] "=&r"(temp2),
[temp3] "=&r"(temp3), [offset] "=&r"(offset), [out] "+&r"(out)
: [a] "r"(a), [r] "r"(r), [g] "r"(g), [b] "r"(b), [step] "r"(step),
[loop_end] "r"(loop_end), [rest] "r"(rest)
: "memory");
}
#endif // WORDS_BIGENDIAN
@@ -175,36 +169,35 @@ static void PackRGB_MIPSdspR2(const uint8_t* r, const uint8_t* g,
const int rest = len & 1;
const int a = 0xff;
const uint32_t* const loop_end = out + len - rest;
__asm__ volatile (
"xor %[offset], %[offset], %[offset] \n\t"
"beq %[loop_end], %[out], 0f \n\t"
"2: \n\t"
"lbux %[temp0], %[offset](%[r]) \n\t"
"lbux %[temp1], %[offset](%[g]) \n\t"
"lbux %[temp2], %[offset](%[b]) \n\t"
"ins %[temp0], %[a], 16, 16 \n\t"
"ins %[temp2], %[temp1], 16, 16 \n\t"
"addiu %[out], %[out], 4 \n\t"
"precr.qb.ph %[temp0], %[temp0], %[temp2] \n\t"
"sw %[temp0], -4(%[out]) \n\t"
"addu %[offset], %[offset], %[step] \n\t"
"bne %[loop_end], %[out], 2b \n\t"
"0: \n\t"
"beq %[rest], $zero, 1f \n\t"
"lbux %[temp0], %[offset](%[r]) \n\t"
"lbux %[temp1], %[offset](%[g]) \n\t"
"lbux %[temp2], %[offset](%[b]) \n\t"
"ins %[temp0], %[a], 16, 16 \n\t"
"ins %[temp2], %[temp1], 16, 16 \n\t"
"precr.qb.ph %[temp0], %[temp0], %[temp2] \n\t"
"sw %[temp0], 0(%[out]) \n\t"
"1: \n\t"
: [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
[offset]"=&r"(offset), [out]"+&r"(out)
: [a]"r"(a), [r]"r"(r), [g]"r"(g), [b]"r"(b), [step]"r"(step),
[loop_end]"r"(loop_end), [rest]"r"(rest)
: "memory"
);
__asm__ volatile(
"xor %[offset], %[offset], %[offset] \n\t"
"beq %[loop_end], %[out], 0f \n\t"
"2: \n\t"
"lbux %[temp0], %[offset](%[r]) \n\t"
"lbux %[temp1], %[offset](%[g]) \n\t"
"lbux %[temp2], %[offset](%[b]) \n\t"
"ins %[temp0], %[a], 16, 16 \n\t"
"ins %[temp2], %[temp1], 16, 16 \n\t"
"addiu %[out], %[out], 4 \n\t"
"precr.qb.ph %[temp0], %[temp0], %[temp2] \n\t"
"sw %[temp0], -4(%[out]) \n\t"
"addu %[offset], %[offset], %[step] \n\t"
"bne %[loop_end], %[out], 2b \n\t"
"0: \n\t"
"beq %[rest], $zero, 1f \n\t"
"lbux %[temp0], %[offset](%[r]) \n\t"
"lbux %[temp1], %[offset](%[g]) \n\t"
"lbux %[temp2], %[offset](%[b]) \n\t"
"ins %[temp0], %[a], 16, 16 \n\t"
"ins %[temp2], %[temp1], 16, 16 \n\t"
"precr.qb.ph %[temp0], %[temp0], %[temp2] \n\t"
"sw %[temp0], 0(%[out]) \n\t"
"1: \n\t"
: [temp0] "=&r"(temp0), [temp1] "=&r"(temp1), [temp2] "=&r"(temp2),
[offset] "=&r"(offset), [out] "+&r"(out)
: [a] "r"(a), [r] "r"(r), [g] "r"(g), [b] "r"(b), [step] "r"(step),
[loop_end] "r"(loop_end), [rest] "r"(rest)
: "memory");
}
//------------------------------------------------------------------------------

View File

@@ -22,25 +22,26 @@
#define MULTIPLIER(a) ((a) * 0x8081)
#define PREMULTIPLY(x, m) (((x) * (m)) >> 23)
#define MULTIPLY_BY_ALPHA(V, ALPHA, OTHER) do { \
const uint8x8_t alpha = (V).val[(ALPHA)]; \
const uint16x8_t r1 = vmull_u8((V).val[1], alpha); \
const uint16x8_t g1 = vmull_u8((V).val[2], alpha); \
const uint16x8_t b1 = vmull_u8((V).val[(OTHER)], alpha); \
/* we use: v / 255 = (v + 1 + (v >> 8)) >> 8 */ \
const uint16x8_t r2 = vsraq_n_u16(r1, r1, 8); \
const uint16x8_t g2 = vsraq_n_u16(g1, g1, 8); \
const uint16x8_t b2 = vsraq_n_u16(b1, b1, 8); \
const uint16x8_t r3 = vaddq_u16(r2, kOne); \
const uint16x8_t g3 = vaddq_u16(g2, kOne); \
const uint16x8_t b3 = vaddq_u16(b2, kOne); \
(V).val[1] = vshrn_n_u16(r3, 8); \
(V).val[2] = vshrn_n_u16(g3, 8); \
(V).val[(OTHER)] = vshrn_n_u16(b3, 8); \
} while (0)
#define MULTIPLY_BY_ALPHA(V, ALPHA, OTHER) \
do { \
const uint8x8_t alpha = (V).val[(ALPHA)]; \
const uint16x8_t r1 = vmull_u8((V).val[1], alpha); \
const uint16x8_t g1 = vmull_u8((V).val[2], alpha); \
const uint16x8_t b1 = vmull_u8((V).val[(OTHER)], alpha); \
/* we use: v / 255 = (v + 1 + (v >> 8)) >> 8 */ \
const uint16x8_t r2 = vsraq_n_u16(r1, r1, 8); \
const uint16x8_t g2 = vsraq_n_u16(g1, g1, 8); \
const uint16x8_t b2 = vsraq_n_u16(b1, b1, 8); \
const uint16x8_t r3 = vaddq_u16(r2, kOne); \
const uint16x8_t g3 = vaddq_u16(g2, kOne); \
const uint16x8_t b3 = vaddq_u16(b2, kOne); \
(V).val[1] = vshrn_n_u16(r3, 8); \
(V).val[2] = vshrn_n_u16(g3, 8); \
(V).val[(OTHER)] = vshrn_n_u16(b3, 8); \
} while (0)
static void ApplyAlphaMultiply_NEON(uint8_t* rgba, int alpha_first,
int w, int h, int stride) {
static void ApplyAlphaMultiply_NEON(uint8_t* rgba, int alpha_first, int w,
int h, int stride) {
const uint16x8_t kOne = vdupq_n_u16(1u);
while (h-- > 0) {
uint32_t* const rgbx = (uint32_t*)rgba;
@@ -118,7 +119,7 @@ static void DispatchAlphaToGreen_NEON(const uint8_t* WEBP_RESTRICT alpha,
uint32_t* WEBP_RESTRICT dst,
int dst_stride) {
int i, j;
uint8x8x4_t greens; // leave A/R/B channels zero'd.
uint8x8x4_t greens; // leave A/R/B channels zero'd.
greens.val[0] = vdup_n_u8(0);
greens.val[2] = vdup_n_u8(0);
greens.val[3] = vdup_n_u8(0);

View File

@@ -16,8 +16,8 @@
#if defined(WEBP_USE_SSE2)
#include <emmintrin.h>
#include "src/webp/types.h"
#include "src/dsp/cpu.h"
#include "src/webp/types.h"
//------------------------------------------------------------------------------
@@ -90,7 +90,7 @@ static void DispatchAlphaToGreen_SSE2(const uint8_t* WEBP_RESTRICT alpha,
const __m128i zero = _mm_setzero_si128();
const int limit = width & ~15;
for (j = 0; j < height; ++j) {
for (i = 0; i < limit; i += 16) { // process 16 alpha bytes
for (i = 0; i < limit; i += 16) { // process 16 alpha bytes
const __m128i a0 = _mm_loadu_si128((const __m128i*)&alpha[i]);
const __m128i a1 = _mm_unpacklo_epi8(zero, a0); // note the 'zero' first!
const __m128i b1 = _mm_unpackhi_epi8(zero, a0);
@@ -98,9 +98,9 @@ static void DispatchAlphaToGreen_SSE2(const uint8_t* WEBP_RESTRICT alpha,
const __m128i b2_lo = _mm_unpacklo_epi16(b1, zero);
const __m128i a2_hi = _mm_unpackhi_epi16(a1, zero);
const __m128i b2_hi = _mm_unpackhi_epi16(b1, zero);
_mm_storeu_si128((__m128i*)&dst[i + 0], a2_lo);
_mm_storeu_si128((__m128i*)&dst[i + 4], a2_hi);
_mm_storeu_si128((__m128i*)&dst[i + 8], b2_lo);
_mm_storeu_si128((__m128i*)&dst[i + 0], a2_lo);
_mm_storeu_si128((__m128i*)&dst[i + 4], a2_hi);
_mm_storeu_si128((__m128i*)&dst[i + 8], b2_lo);
_mm_storeu_si128((__m128i*)&dst[i + 12], b2_hi);
}
for (; i < width; ++i) dst[i] = alpha[i] << 8;
@@ -197,36 +197,37 @@ static void ExtractGreen_SSE2(const uint32_t* WEBP_RESTRICT argb,
//------------------------------------------------------------------------------
// Non-dither premultiplied modes
#define MULTIPLIER(a) ((a) * 0x8081)
#define MULTIPLIER(a) ((a) * 0x8081)
#define PREMULTIPLY(x, m) (((x) * (m)) >> 23)
// We can't use a 'const int' for the SHUFFLE value, because it has to be an
// immediate in the _mm_shufflexx_epi16() instruction. We really need a macro.
// We use: v / 255 = (v * 0x8081) >> 23, where v = alpha * {r,g,b} is a 16bit
// value.
#define APPLY_ALPHA(RGBX, SHUFFLE) do { \
const __m128i argb0 = _mm_loadu_si128((const __m128i*)&(RGBX)); \
const __m128i argb1_lo = _mm_unpacklo_epi8(argb0, zero); \
const __m128i argb1_hi = _mm_unpackhi_epi8(argb0, zero); \
const __m128i alpha0_lo = _mm_or_si128(argb1_lo, kMask); \
const __m128i alpha0_hi = _mm_or_si128(argb1_hi, kMask); \
const __m128i alpha1_lo = _mm_shufflelo_epi16(alpha0_lo, SHUFFLE); \
const __m128i alpha1_hi = _mm_shufflelo_epi16(alpha0_hi, SHUFFLE); \
const __m128i alpha2_lo = _mm_shufflehi_epi16(alpha1_lo, SHUFFLE); \
const __m128i alpha2_hi = _mm_shufflehi_epi16(alpha1_hi, SHUFFLE); \
/* alpha2 = [ff a0 a0 a0][ff a1 a1 a1] */ \
const __m128i A0_lo = _mm_mullo_epi16(alpha2_lo, argb1_lo); \
const __m128i A0_hi = _mm_mullo_epi16(alpha2_hi, argb1_hi); \
const __m128i A1_lo = _mm_mulhi_epu16(A0_lo, kMult); \
const __m128i A1_hi = _mm_mulhi_epu16(A0_hi, kMult); \
const __m128i A2_lo = _mm_srli_epi16(A1_lo, 7); \
const __m128i A2_hi = _mm_srli_epi16(A1_hi, 7); \
const __m128i A3 = _mm_packus_epi16(A2_lo, A2_hi); \
_mm_storeu_si128((__m128i*)&(RGBX), A3); \
} while (0)
#define APPLY_ALPHA(RGBX, SHUFFLE) \
do { \
const __m128i argb0 = _mm_loadu_si128((const __m128i*)&(RGBX)); \
const __m128i argb1_lo = _mm_unpacklo_epi8(argb0, zero); \
const __m128i argb1_hi = _mm_unpackhi_epi8(argb0, zero); \
const __m128i alpha0_lo = _mm_or_si128(argb1_lo, kMask); \
const __m128i alpha0_hi = _mm_or_si128(argb1_hi, kMask); \
const __m128i alpha1_lo = _mm_shufflelo_epi16(alpha0_lo, SHUFFLE); \
const __m128i alpha1_hi = _mm_shufflelo_epi16(alpha0_hi, SHUFFLE); \
const __m128i alpha2_lo = _mm_shufflehi_epi16(alpha1_lo, SHUFFLE); \
const __m128i alpha2_hi = _mm_shufflehi_epi16(alpha1_hi, SHUFFLE); \
/* alpha2 = [ff a0 a0 a0][ff a1 a1 a1] */ \
const __m128i A0_lo = _mm_mullo_epi16(alpha2_lo, argb1_lo); \
const __m128i A0_hi = _mm_mullo_epi16(alpha2_hi, argb1_hi); \
const __m128i A1_lo = _mm_mulhi_epu16(A0_lo, kMult); \
const __m128i A1_hi = _mm_mulhi_epu16(A0_hi, kMult); \
const __m128i A2_lo = _mm_srli_epi16(A1_lo, 7); \
const __m128i A2_hi = _mm_srli_epi16(A1_hi, 7); \
const __m128i A3 = _mm_packus_epi16(A2_lo, A2_hi); \
_mm_storeu_si128((__m128i*)&(RGBX), A3); \
} while (0)
static void ApplyAlphaMultiply_SSE2(uint8_t* rgba, int alpha_first,
int w, int h, int stride) {
static void ApplyAlphaMultiply_SSE2(uint8_t* rgba, int alpha_first, int w,
int h, int stride) {
const __m128i zero = _mm_setzero_si128();
const __m128i kMult = _mm_set1_epi16((short)0x8081);
const __m128i kMask = _mm_set_epi16(0, 0xff, 0xff, 0, 0, 0xff, 0xff, 0);
@@ -273,7 +274,8 @@ static int HasAlpha8b_SSE2(const uint8_t* src, int length) {
const int mask = _mm_movemask_epi8(bits);
if (mask != 0xffff) return 1;
}
for (; i < length; ++i) if (src[i] != 0xff) return 1;
for (; i < length; ++i)
if (src[i] != 0xff) return 1;
return 0;
}
@@ -284,9 +286,9 @@ static int HasAlpha32b_SSE2(const uint8_t* src, int length) {
// We don't know if we can access the last 3 bytes after the last alpha
// value 'src[4 * length - 4]' (because we don't know if alpha is the first
// or the last byte of the quadruplet). Hence the '-3' protection below.
length = length * 4 - 3; // size in bytes
length = length * 4 - 3; // size in bytes
for (; i + 64 <= length; i += 64) {
const __m128i a0 = _mm_loadu_si128((const __m128i*)(src + i + 0));
const __m128i a0 = _mm_loadu_si128((const __m128i*)(src + i + 0));
const __m128i a1 = _mm_loadu_si128((const __m128i*)(src + i + 16));
const __m128i a2 = _mm_loadu_si128((const __m128i*)(src + i + 32));
const __m128i a3 = _mm_loadu_si128((const __m128i*)(src + i + 48));
@@ -296,23 +298,24 @@ static int HasAlpha32b_SSE2(const uint8_t* src, int length) {
const __m128i b3 = _mm_and_si128(a3, alpha_mask);
const __m128i c0 = _mm_packs_epi32(b0, b1);
const __m128i c1 = _mm_packs_epi32(b2, b3);
const __m128i d = _mm_packus_epi16(c0, c1);
const __m128i d = _mm_packus_epi16(c0, c1);
const __m128i bits = _mm_cmpeq_epi8(d, all_0xff);
const int mask = _mm_movemask_epi8(bits);
if (mask != 0xffff) return 1;
}
for (; i + 32 <= length; i += 32) {
const __m128i a0 = _mm_loadu_si128((const __m128i*)(src + i + 0));
const __m128i a0 = _mm_loadu_si128((const __m128i*)(src + i + 0));
const __m128i a1 = _mm_loadu_si128((const __m128i*)(src + i + 16));
const __m128i b0 = _mm_and_si128(a0, alpha_mask);
const __m128i b1 = _mm_and_si128(a1, alpha_mask);
const __m128i c = _mm_packs_epi32(b0, b1);
const __m128i d = _mm_packus_epi16(c, c);
const __m128i c = _mm_packs_epi32(b0, b1);
const __m128i d = _mm_packus_epi16(c, c);
const __m128i bits = _mm_cmpeq_epi8(d, all_0xff);
const int mask = _mm_movemask_epi8(bits);
if (mask != 0xffff) return 1;
}
for (; i <= length; i += 4) if (src[i] != 0xff) return 1;
for (; i <= length; i += 4)
if (src[i] != 0xff) return 1;
return 0;
}
@@ -334,7 +337,8 @@ static void AlphaReplace_SSE2(uint32_t* src, int length, uint32_t color) {
_mm_storeu_si128((__m128i*)(src + i + 0), _mm_or_si128(d0, e0));
_mm_storeu_si128((__m128i*)(src + i + 4), _mm_or_si128(d1, e1));
}
for (; i < length; ++i) if ((src[i] >> 24) == 0) src[i] = color;
for (; i < length; ++i)
if ((src[i] >> 24) == 0) src[i] = color;
}
// -----------------------------------------------------------------------------
@@ -369,8 +373,8 @@ static void MultARGBRow_SSE2(uint32_t* const ptr, int width, int inverse) {
}
static void MultRow_SSE2(uint8_t* WEBP_RESTRICT const ptr,
const uint8_t* WEBP_RESTRICT const alpha,
int width, int inverse) {
const uint8_t* WEBP_RESTRICT const alpha, int width,
int inverse) {
int x = 0;
if (!inverse) {
const __m128i zero = _mm_setzero_si128();

View File

@@ -12,8 +12,8 @@
// Author: Skal (pascal.massimino@gmail.com)
#include "src/dsp/cpu.h"
#include "src/webp/types.h"
#include "src/dsp/dsp.h"
#include "src/webp/types.h"
#if defined(WEBP_USE_SSE41)
#include <emmintrin.h>
@@ -35,14 +35,14 @@ static int ExtractAlpha_SSE41(const uint8_t* WEBP_RESTRICT argb,
// 'src[4 * width - 4]', because we don't know if alpha is the first or the
// last byte of the quadruplet.
const int limit = (width - 1) & ~15;
const __m128i kCstAlpha0 = _mm_set_epi8(-1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, 12, 8, 4, 0);
const __m128i kCstAlpha1 = _mm_set_epi8(-1, -1, -1, -1, -1, -1, -1, -1,
12, 8, 4, 0, -1, -1, -1, -1);
const __m128i kCstAlpha2 = _mm_set_epi8(-1, -1, -1, -1, 12, 8, 4, 0,
-1, -1, -1, -1, -1, -1, -1, -1);
const __m128i kCstAlpha3 = _mm_set_epi8(12, 8, 4, 0, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1);
const __m128i kCstAlpha0 =
_mm_set_epi8(-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 12, 8, 4, 0);
const __m128i kCstAlpha1 =
_mm_set_epi8(-1, -1, -1, -1, -1, -1, -1, -1, 12, 8, 4, 0, -1, -1, -1, -1);
const __m128i kCstAlpha2 =
_mm_set_epi8(-1, -1, -1, -1, 12, 8, 4, 0, -1, -1, -1, -1, -1, -1, -1, -1);
const __m128i kCstAlpha3 =
_mm_set_epi8(12, 8, 4, 0, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1);
for (j = 0; j < height; ++j) {
const __m128i* src = (const __m128i*)argb;
for (i = 0; i < limit; i += 16) {

View File

@@ -44,12 +44,12 @@ static WEBP_INLINE void VP8PlanarTo24b_SSE41(
// Process R.
{
const __m128i shuff0 = _mm_set_epi8(
5, -1, -1, 4, -1, -1, 3, -1, -1, 2, -1, -1, 1, -1, -1, 0);
const __m128i shuff1 = _mm_set_epi8(
-1, 10, -1, -1, 9, -1, -1, 8, -1, -1, 7, -1, -1, 6, -1, -1);
const __m128i shuff2 = _mm_set_epi8(
-1, -1, 15, -1, -1, 14, -1, -1, 13, -1, -1, 12, -1, -1, 11, -1);
const __m128i shuff0 =
_mm_set_epi8(5, -1, -1, 4, -1, -1, 3, -1, -1, 2, -1, -1, 1, -1, -1, 0);
const __m128i shuff1 = _mm_set_epi8(-1, 10, -1, -1, 9, -1, -1, 8, -1, -1, 7,
-1, -1, 6, -1, -1);
const __m128i shuff2 = _mm_set_epi8(-1, -1, 15, -1, -1, 14, -1, -1, 13, -1,
-1, 12, -1, -1, 11, -1);
WEBP_SSE41_SHUFF(R, in0, in1)
}
@@ -57,23 +57,23 @@ static WEBP_INLINE void VP8PlanarTo24b_SSE41(
{
// Same as before, just shifted to the left by one and including the right
// padding.
const __m128i shuff0 = _mm_set_epi8(
-1, -1, 4, -1, -1, 3, -1, -1, 2, -1, -1, 1, -1, -1, 0, -1);
const __m128i shuff1 = _mm_set_epi8(
10, -1, -1, 9, -1, -1, 8, -1, -1, 7, -1, -1, 6, -1, -1, 5);
const __m128i shuff2 = _mm_set_epi8(
-1, 15, -1, -1, 14, -1, -1, 13, -1, -1, 12, -1, -1, 11, -1, -1);
const __m128i shuff0 =
_mm_set_epi8(-1, -1, 4, -1, -1, 3, -1, -1, 2, -1, -1, 1, -1, -1, 0, -1);
const __m128i shuff1 =
_mm_set_epi8(10, -1, -1, 9, -1, -1, 8, -1, -1, 7, -1, -1, 6, -1, -1, 5);
const __m128i shuff2 = _mm_set_epi8(-1, 15, -1, -1, 14, -1, -1, 13, -1, -1,
12, -1, -1, 11, -1, -1);
WEBP_SSE41_SHUFF(G, in2, in3)
}
// Process B.
{
const __m128i shuff0 = _mm_set_epi8(
-1, 4, -1, -1, 3, -1, -1, 2, -1, -1, 1, -1, -1, 0, -1, -1);
const __m128i shuff1 = _mm_set_epi8(
-1, -1, 9, -1, -1, 8, -1, -1, 7, -1, -1, 6, -1, -1, 5, -1);
const __m128i shuff2 = _mm_set_epi8(
15, -1, -1, 14, -1, -1, 13, -1, -1, 12, -1, -1, 11, -1, -1, 10);
const __m128i shuff0 =
_mm_set_epi8(-1, 4, -1, -1, 3, -1, -1, 2, -1, -1, 1, -1, -1, 0, -1, -1);
const __m128i shuff1 =
_mm_set_epi8(-1, -1, 9, -1, -1, 8, -1, -1, 7, -1, -1, 6, -1, -1, 5, -1);
const __m128i shuff2 = _mm_set_epi8(15, -1, -1, 14, -1, -1, 13, -1, -1, 12,
-1, -1, 11, -1, -1, 10);
WEBP_SSE41_SHUFF(B, in4, in5)
}

View File

@@ -14,42 +14,37 @@
#include <stdlib.h>
#include "src/dsp/cpu.h"
#include "src/webp/types.h"
#include "src/dsp/dsp.h"
#include "src/enc/cost_enc.h"
#include "src/enc/vp8i_enc.h"
#include "src/webp/types.h"
//------------------------------------------------------------------------------
// Boolean-cost cost table
const uint16_t VP8EntropyCost[256] = {
1792, 1792, 1792, 1536, 1536, 1408, 1366, 1280, 1280, 1216,
1178, 1152, 1110, 1076, 1061, 1024, 1024, 992, 968, 951,
939, 911, 896, 878, 871, 854, 838, 820, 811, 794,
786, 768, 768, 752, 740, 732, 720, 709, 704, 690,
683, 672, 666, 655, 647, 640, 631, 622, 615, 607,
598, 592, 586, 576, 572, 564, 559, 555, 547, 541,
534, 528, 522, 512, 512, 504, 500, 494, 488, 483,
477, 473, 467, 461, 458, 452, 448, 443, 438, 434,
427, 424, 419, 415, 410, 406, 403, 399, 394, 390,
384, 384, 377, 374, 370, 366, 362, 359, 355, 351,
347, 342, 342, 336, 333, 330, 326, 323, 320, 316,
312, 308, 305, 302, 299, 296, 293, 288, 287, 283,
280, 277, 274, 272, 268, 266, 262, 256, 256, 256,
251, 248, 245, 242, 240, 237, 234, 232, 228, 226,
223, 221, 218, 216, 214, 211, 208, 205, 203, 201,
198, 196, 192, 191, 188, 187, 183, 181, 179, 176,
175, 171, 171, 168, 165, 163, 160, 159, 156, 154,
152, 150, 148, 146, 144, 142, 139, 138, 135, 133,
131, 128, 128, 125, 123, 121, 119, 117, 115, 113,
111, 110, 107, 105, 103, 102, 100, 98, 96, 94,
92, 91, 89, 86, 86, 83, 82, 80, 77, 76,
74, 73, 71, 69, 67, 66, 64, 63, 61, 59,
57, 55, 54, 52, 51, 49, 47, 46, 44, 43,
41, 40, 38, 36, 35, 33, 32, 30, 29, 27,
25, 24, 22, 21, 19, 18, 16, 15, 13, 12,
10, 9, 7, 6, 4, 3
};
1792, 1792, 1792, 1536, 1536, 1408, 1366, 1280, 1280, 1216, 1178, 1152,
1110, 1076, 1061, 1024, 1024, 992, 968, 951, 939, 911, 896, 878,
871, 854, 838, 820, 811, 794, 786, 768, 768, 752, 740, 732,
720, 709, 704, 690, 683, 672, 666, 655, 647, 640, 631, 622,
615, 607, 598, 592, 586, 576, 572, 564, 559, 555, 547, 541,
534, 528, 522, 512, 512, 504, 500, 494, 488, 483, 477, 473,
467, 461, 458, 452, 448, 443, 438, 434, 427, 424, 419, 415,
410, 406, 403, 399, 394, 390, 384, 384, 377, 374, 370, 366,
362, 359, 355, 351, 347, 342, 342, 336, 333, 330, 326, 323,
320, 316, 312, 308, 305, 302, 299, 296, 293, 288, 287, 283,
280, 277, 274, 272, 268, 266, 262, 256, 256, 256, 251, 248,
245, 242, 240, 237, 234, 232, 228, 226, 223, 221, 218, 216,
214, 211, 208, 205, 203, 201, 198, 196, 192, 191, 188, 187,
183, 181, 179, 176, 175, 171, 171, 168, 165, 163, 160, 159,
156, 154, 152, 150, 148, 146, 144, 142, 139, 138, 135, 133,
131, 128, 128, 125, 123, 121, 119, 117, 115, 113, 111, 110,
107, 105, 103, 102, 100, 98, 96, 94, 92, 91, 89, 86,
86, 83, 82, 80, 77, 76, 74, 73, 71, 69, 67, 66,
64, 63, 61, 59, 57, 55, 54, 52, 51, 49, 47, 46,
44, 43, 41, 40, 38, 36, 35, 33, 32, 30, 29, 27,
25, 24, 22, 21, 19, 18, 16, 15, 13, 12, 10, 9,
7, 6, 4, 3};
//------------------------------------------------------------------------------
// Level cost tables
@@ -57,270 +52,184 @@ const uint16_t VP8EntropyCost[256] = {
// fixed costs for coding levels, deduce from the coding tree.
// This is only the part that doesn't depend on the probability state.
const uint16_t VP8LevelFixedCosts[MAX_LEVEL + 1] = {
0, 256, 256, 256, 256, 432, 618, 630,
731, 640, 640, 828, 901, 948, 1021, 1101,
1174, 1221, 1294, 1042, 1085, 1115, 1158, 1202,
1245, 1275, 1318, 1337, 1380, 1410, 1453, 1497,
1540, 1570, 1613, 1280, 1295, 1317, 1332, 1358,
1373, 1395, 1410, 1454, 1469, 1491, 1506, 1532,
1547, 1569, 1584, 1601, 1616, 1638, 1653, 1679,
1694, 1716, 1731, 1775, 1790, 1812, 1827, 1853,
1868, 1890, 1905, 1727, 1733, 1742, 1748, 1759,
1765, 1774, 1780, 1800, 1806, 1815, 1821, 1832,
1838, 1847, 1853, 1878, 1884, 1893, 1899, 1910,
1916, 1925, 1931, 1951, 1957, 1966, 1972, 1983,
1989, 1998, 2004, 2027, 2033, 2042, 2048, 2059,
2065, 2074, 2080, 2100, 2106, 2115, 2121, 2132,
2138, 2147, 2153, 2178, 2184, 2193, 2199, 2210,
2216, 2225, 2231, 2251, 2257, 2266, 2272, 2283,
2289, 2298, 2304, 2168, 2174, 2183, 2189, 2200,
2206, 2215, 2221, 2241, 2247, 2256, 2262, 2273,
2279, 2288, 2294, 2319, 2325, 2334, 2340, 2351,
2357, 2366, 2372, 2392, 2398, 2407, 2413, 2424,
2430, 2439, 2445, 2468, 2474, 2483, 2489, 2500,
2506, 2515, 2521, 2541, 2547, 2556, 2562, 2573,
2579, 2588, 2594, 2619, 2625, 2634, 2640, 2651,
2657, 2666, 2672, 2692, 2698, 2707, 2713, 2724,
2730, 2739, 2745, 2540, 2546, 2555, 2561, 2572,
2578, 2587, 2593, 2613, 2619, 2628, 2634, 2645,
2651, 2660, 2666, 2691, 2697, 2706, 2712, 2723,
2729, 2738, 2744, 2764, 2770, 2779, 2785, 2796,
2802, 2811, 2817, 2840, 2846, 2855, 2861, 2872,
2878, 2887, 2893, 2913, 2919, 2928, 2934, 2945,
2951, 2960, 2966, 2991, 2997, 3006, 3012, 3023,
3029, 3038, 3044, 3064, 3070, 3079, 3085, 3096,
3102, 3111, 3117, 2981, 2987, 2996, 3002, 3013,
3019, 3028, 3034, 3054, 3060, 3069, 3075, 3086,
3092, 3101, 3107, 3132, 3138, 3147, 3153, 3164,
3170, 3179, 3185, 3205, 3211, 3220, 3226, 3237,
3243, 3252, 3258, 3281, 3287, 3296, 3302, 3313,
3319, 3328, 3334, 3354, 3360, 3369, 3375, 3386,
3392, 3401, 3407, 3432, 3438, 3447, 3453, 3464,
3470, 3479, 3485, 3505, 3511, 3520, 3526, 3537,
3543, 3552, 3558, 2816, 2822, 2831, 2837, 2848,
2854, 2863, 2869, 2889, 2895, 2904, 2910, 2921,
2927, 2936, 2942, 2967, 2973, 2982, 2988, 2999,
3005, 3014, 3020, 3040, 3046, 3055, 3061, 3072,
3078, 3087, 3093, 3116, 3122, 3131, 3137, 3148,
3154, 3163, 3169, 3189, 3195, 3204, 3210, 3221,
3227, 3236, 3242, 3267, 3273, 3282, 3288, 3299,
3305, 3314, 3320, 3340, 3346, 3355, 3361, 3372,
3378, 3387, 3393, 3257, 3263, 3272, 3278, 3289,
3295, 3304, 3310, 3330, 3336, 3345, 3351, 3362,
3368, 3377, 3383, 3408, 3414, 3423, 3429, 3440,
3446, 3455, 3461, 3481, 3487, 3496, 3502, 3513,
3519, 3528, 3534, 3557, 3563, 3572, 3578, 3589,
3595, 3604, 3610, 3630, 3636, 3645, 3651, 3662,
3668, 3677, 3683, 3708, 3714, 3723, 3729, 3740,
3746, 3755, 3761, 3781, 3787, 3796, 3802, 3813,
3819, 3828, 3834, 3629, 3635, 3644, 3650, 3661,
3667, 3676, 3682, 3702, 3708, 3717, 3723, 3734,
3740, 3749, 3755, 3780, 3786, 3795, 3801, 3812,
3818, 3827, 3833, 3853, 3859, 3868, 3874, 3885,
3891, 3900, 3906, 3929, 3935, 3944, 3950, 3961,
3967, 3976, 3982, 4002, 4008, 4017, 4023, 4034,
4040, 4049, 4055, 4080, 4086, 4095, 4101, 4112,
4118, 4127, 4133, 4153, 4159, 4168, 4174, 4185,
4191, 4200, 4206, 4070, 4076, 4085, 4091, 4102,
4108, 4117, 4123, 4143, 4149, 4158, 4164, 4175,
4181, 4190, 4196, 4221, 4227, 4236, 4242, 4253,
4259, 4268, 4274, 4294, 4300, 4309, 4315, 4326,
4332, 4341, 4347, 4370, 4376, 4385, 4391, 4402,
4408, 4417, 4423, 4443, 4449, 4458, 4464, 4475,
4481, 4490, 4496, 4521, 4527, 4536, 4542, 4553,
4559, 4568, 4574, 4594, 4600, 4609, 4615, 4626,
4632, 4641, 4647, 3515, 3521, 3530, 3536, 3547,
3553, 3562, 3568, 3588, 3594, 3603, 3609, 3620,
3626, 3635, 3641, 3666, 3672, 3681, 3687, 3698,
3704, 3713, 3719, 3739, 3745, 3754, 3760, 3771,
3777, 3786, 3792, 3815, 3821, 3830, 3836, 3847,
3853, 3862, 3868, 3888, 3894, 3903, 3909, 3920,
3926, 3935, 3941, 3966, 3972, 3981, 3987, 3998,
4004, 4013, 4019, 4039, 4045, 4054, 4060, 4071,
4077, 4086, 4092, 3956, 3962, 3971, 3977, 3988,
3994, 4003, 4009, 4029, 4035, 4044, 4050, 4061,
4067, 4076, 4082, 4107, 4113, 4122, 4128, 4139,
4145, 4154, 4160, 4180, 4186, 4195, 4201, 4212,
4218, 4227, 4233, 4256, 4262, 4271, 4277, 4288,
4294, 4303, 4309, 4329, 4335, 4344, 4350, 4361,
4367, 4376, 4382, 4407, 4413, 4422, 4428, 4439,
4445, 4454, 4460, 4480, 4486, 4495, 4501, 4512,
4518, 4527, 4533, 4328, 4334, 4343, 4349, 4360,
4366, 4375, 4381, 4401, 4407, 4416, 4422, 4433,
4439, 4448, 4454, 4479, 4485, 4494, 4500, 4511,
4517, 4526, 4532, 4552, 4558, 4567, 4573, 4584,
4590, 4599, 4605, 4628, 4634, 4643, 4649, 4660,
4666, 4675, 4681, 4701, 4707, 4716, 4722, 4733,
4739, 4748, 4754, 4779, 4785, 4794, 4800, 4811,
4817, 4826, 4832, 4852, 4858, 4867, 4873, 4884,
4890, 4899, 4905, 4769, 4775, 4784, 4790, 4801,
4807, 4816, 4822, 4842, 4848, 4857, 4863, 4874,
4880, 4889, 4895, 4920, 4926, 4935, 4941, 4952,
4958, 4967, 4973, 4993, 4999, 5008, 5014, 5025,
5031, 5040, 5046, 5069, 5075, 5084, 5090, 5101,
5107, 5116, 5122, 5142, 5148, 5157, 5163, 5174,
5180, 5189, 5195, 5220, 5226, 5235, 5241, 5252,
5258, 5267, 5273, 5293, 5299, 5308, 5314, 5325,
5331, 5340, 5346, 4604, 4610, 4619, 4625, 4636,
4642, 4651, 4657, 4677, 4683, 4692, 4698, 4709,
4715, 4724, 4730, 4755, 4761, 4770, 4776, 4787,
4793, 4802, 4808, 4828, 4834, 4843, 4849, 4860,
4866, 4875, 4881, 4904, 4910, 4919, 4925, 4936,
4942, 4951, 4957, 4977, 4983, 4992, 4998, 5009,
5015, 5024, 5030, 5055, 5061, 5070, 5076, 5087,
5093, 5102, 5108, 5128, 5134, 5143, 5149, 5160,
5166, 5175, 5181, 5045, 5051, 5060, 5066, 5077,
5083, 5092, 5098, 5118, 5124, 5133, 5139, 5150,
5156, 5165, 5171, 5196, 5202, 5211, 5217, 5228,
5234, 5243, 5249, 5269, 5275, 5284, 5290, 5301,
5307, 5316, 5322, 5345, 5351, 5360, 5366, 5377,
5383, 5392, 5398, 5418, 5424, 5433, 5439, 5450,
5456, 5465, 5471, 5496, 5502, 5511, 5517, 5528,
5534, 5543, 5549, 5569, 5575, 5584, 5590, 5601,
5607, 5616, 5622, 5417, 5423, 5432, 5438, 5449,
5455, 5464, 5470, 5490, 5496, 5505, 5511, 5522,
5528, 5537, 5543, 5568, 5574, 5583, 5589, 5600,
5606, 5615, 5621, 5641, 5647, 5656, 5662, 5673,
5679, 5688, 5694, 5717, 5723, 5732, 5738, 5749,
5755, 5764, 5770, 5790, 5796, 5805, 5811, 5822,
5828, 5837, 5843, 5868, 5874, 5883, 5889, 5900,
5906, 5915, 5921, 5941, 5947, 5956, 5962, 5973,
5979, 5988, 5994, 5858, 5864, 5873, 5879, 5890,
5896, 5905, 5911, 5931, 5937, 5946, 5952, 5963,
5969, 5978, 5984, 6009, 6015, 6024, 6030, 6041,
6047, 6056, 6062, 6082, 6088, 6097, 6103, 6114,
6120, 6129, 6135, 6158, 6164, 6173, 6179, 6190,
6196, 6205, 6211, 6231, 6237, 6246, 6252, 6263,
6269, 6278, 6284, 6309, 6315, 6324, 6330, 6341,
6347, 6356, 6362, 6382, 6388, 6397, 6403, 6414,
6420, 6429, 6435, 3515, 3521, 3530, 3536, 3547,
3553, 3562, 3568, 3588, 3594, 3603, 3609, 3620,
3626, 3635, 3641, 3666, 3672, 3681, 3687, 3698,
3704, 3713, 3719, 3739, 3745, 3754, 3760, 3771,
3777, 3786, 3792, 3815, 3821, 3830, 3836, 3847,
3853, 3862, 3868, 3888, 3894, 3903, 3909, 3920,
3926, 3935, 3941, 3966, 3972, 3981, 3987, 3998,
4004, 4013, 4019, 4039, 4045, 4054, 4060, 4071,
4077, 4086, 4092, 3956, 3962, 3971, 3977, 3988,
3994, 4003, 4009, 4029, 4035, 4044, 4050, 4061,
4067, 4076, 4082, 4107, 4113, 4122, 4128, 4139,
4145, 4154, 4160, 4180, 4186, 4195, 4201, 4212,
4218, 4227, 4233, 4256, 4262, 4271, 4277, 4288,
4294, 4303, 4309, 4329, 4335, 4344, 4350, 4361,
4367, 4376, 4382, 4407, 4413, 4422, 4428, 4439,
4445, 4454, 4460, 4480, 4486, 4495, 4501, 4512,
4518, 4527, 4533, 4328, 4334, 4343, 4349, 4360,
4366, 4375, 4381, 4401, 4407, 4416, 4422, 4433,
4439, 4448, 4454, 4479, 4485, 4494, 4500, 4511,
4517, 4526, 4532, 4552, 4558, 4567, 4573, 4584,
4590, 4599, 4605, 4628, 4634, 4643, 4649, 4660,
4666, 4675, 4681, 4701, 4707, 4716, 4722, 4733,
4739, 4748, 4754, 4779, 4785, 4794, 4800, 4811,
4817, 4826, 4832, 4852, 4858, 4867, 4873, 4884,
4890, 4899, 4905, 4769, 4775, 4784, 4790, 4801,
4807, 4816, 4822, 4842, 4848, 4857, 4863, 4874,
4880, 4889, 4895, 4920, 4926, 4935, 4941, 4952,
4958, 4967, 4973, 4993, 4999, 5008, 5014, 5025,
5031, 5040, 5046, 5069, 5075, 5084, 5090, 5101,
5107, 5116, 5122, 5142, 5148, 5157, 5163, 5174,
5180, 5189, 5195, 5220, 5226, 5235, 5241, 5252,
5258, 5267, 5273, 5293, 5299, 5308, 5314, 5325,
5331, 5340, 5346, 4604, 4610, 4619, 4625, 4636,
4642, 4651, 4657, 4677, 4683, 4692, 4698, 4709,
4715, 4724, 4730, 4755, 4761, 4770, 4776, 4787,
4793, 4802, 4808, 4828, 4834, 4843, 4849, 4860,
4866, 4875, 4881, 4904, 4910, 4919, 4925, 4936,
4942, 4951, 4957, 4977, 4983, 4992, 4998, 5009,
5015, 5024, 5030, 5055, 5061, 5070, 5076, 5087,
5093, 5102, 5108, 5128, 5134, 5143, 5149, 5160,
5166, 5175, 5181, 5045, 5051, 5060, 5066, 5077,
5083, 5092, 5098, 5118, 5124, 5133, 5139, 5150,
5156, 5165, 5171, 5196, 5202, 5211, 5217, 5228,
5234, 5243, 5249, 5269, 5275, 5284, 5290, 5301,
5307, 5316, 5322, 5345, 5351, 5360, 5366, 5377,
5383, 5392, 5398, 5418, 5424, 5433, 5439, 5450,
5456, 5465, 5471, 5496, 5502, 5511, 5517, 5528,
5534, 5543, 5549, 5569, 5575, 5584, 5590, 5601,
5607, 5616, 5622, 5417, 5423, 5432, 5438, 5449,
5455, 5464, 5470, 5490, 5496, 5505, 5511, 5522,
5528, 5537, 5543, 5568, 5574, 5583, 5589, 5600,
5606, 5615, 5621, 5641, 5647, 5656, 5662, 5673,
5679, 5688, 5694, 5717, 5723, 5732, 5738, 5749,
5755, 5764, 5770, 5790, 5796, 5805, 5811, 5822,
5828, 5837, 5843, 5868, 5874, 5883, 5889, 5900,
5906, 5915, 5921, 5941, 5947, 5956, 5962, 5973,
5979, 5988, 5994, 5858, 5864, 5873, 5879, 5890,
5896, 5905, 5911, 5931, 5937, 5946, 5952, 5963,
5969, 5978, 5984, 6009, 6015, 6024, 6030, 6041,
6047, 6056, 6062, 6082, 6088, 6097, 6103, 6114,
6120, 6129, 6135, 6158, 6164, 6173, 6179, 6190,
6196, 6205, 6211, 6231, 6237, 6246, 6252, 6263,
6269, 6278, 6284, 6309, 6315, 6324, 6330, 6341,
6347, 6356, 6362, 6382, 6388, 6397, 6403, 6414,
6420, 6429, 6435, 5303, 5309, 5318, 5324, 5335,
5341, 5350, 5356, 5376, 5382, 5391, 5397, 5408,
5414, 5423, 5429, 5454, 5460, 5469, 5475, 5486,
5492, 5501, 5507, 5527, 5533, 5542, 5548, 5559,
5565, 5574, 5580, 5603, 5609, 5618, 5624, 5635,
5641, 5650, 5656, 5676, 5682, 5691, 5697, 5708,
5714, 5723, 5729, 5754, 5760, 5769, 5775, 5786,
5792, 5801, 5807, 5827, 5833, 5842, 5848, 5859,
5865, 5874, 5880, 5744, 5750, 5759, 5765, 5776,
5782, 5791, 5797, 5817, 5823, 5832, 5838, 5849,
5855, 5864, 5870, 5895, 5901, 5910, 5916, 5927,
5933, 5942, 5948, 5968, 5974, 5983, 5989, 6000,
6006, 6015, 6021, 6044, 6050, 6059, 6065, 6076,
6082, 6091, 6097, 6117, 6123, 6132, 6138, 6149,
6155, 6164, 6170, 6195, 6201, 6210, 6216, 6227,
6233, 6242, 6248, 6268, 6274, 6283, 6289, 6300,
6306, 6315, 6321, 6116, 6122, 6131, 6137, 6148,
6154, 6163, 6169, 6189, 6195, 6204, 6210, 6221,
6227, 6236, 6242, 6267, 6273, 6282, 6288, 6299,
6305, 6314, 6320, 6340, 6346, 6355, 6361, 6372,
6378, 6387, 6393, 6416, 6422, 6431, 6437, 6448,
6454, 6463, 6469, 6489, 6495, 6504, 6510, 6521,
6527, 6536, 6542, 6567, 6573, 6582, 6588, 6599,
6605, 6614, 6620, 6640, 6646, 6655, 6661, 6672,
6678, 6687, 6693, 6557, 6563, 6572, 6578, 6589,
6595, 6604, 6610, 6630, 6636, 6645, 6651, 6662,
6668, 6677, 6683, 6708, 6714, 6723, 6729, 6740,
6746, 6755, 6761, 6781, 6787, 6796, 6802, 6813,
6819, 6828, 6834, 6857, 6863, 6872, 6878, 6889,
6895, 6904, 6910, 6930, 6936, 6945, 6951, 6962,
6968, 6977, 6983, 7008, 7014, 7023, 7029, 7040,
7046, 7055, 7061, 7081, 7087, 7096, 7102, 7113,
7119, 7128, 7134, 6392, 6398, 6407, 6413, 6424,
6430, 6439, 6445, 6465, 6471, 6480, 6486, 6497,
6503, 6512, 6518, 6543, 6549, 6558, 6564, 6575,
6581, 6590, 6596, 6616, 6622, 6631, 6637, 6648,
6654, 6663, 6669, 6692, 6698, 6707, 6713, 6724,
6730, 6739, 6745, 6765, 6771, 6780, 6786, 6797,
6803, 6812, 6818, 6843, 6849, 6858, 6864, 6875,
6881, 6890, 6896, 6916, 6922, 6931, 6937, 6948,
6954, 6963, 6969, 6833, 6839, 6848, 6854, 6865,
6871, 6880, 6886, 6906, 6912, 6921, 6927, 6938,
6944, 6953, 6959, 6984, 6990, 6999, 7005, 7016,
7022, 7031, 7037, 7057, 7063, 7072, 7078, 7089,
7095, 7104, 7110, 7133, 7139, 7148, 7154, 7165,
7171, 7180, 7186, 7206, 7212, 7221, 7227, 7238,
7244, 7253, 7259, 7284, 7290, 7299, 7305, 7316,
7322, 7331, 7337, 7357, 7363, 7372, 7378, 7389,
7395, 7404, 7410, 7205, 7211, 7220, 7226, 7237,
7243, 7252, 7258, 7278, 7284, 7293, 7299, 7310,
7316, 7325, 7331, 7356, 7362, 7371, 7377, 7388,
7394, 7403, 7409, 7429, 7435, 7444, 7450, 7461,
7467, 7476, 7482, 7505, 7511, 7520, 7526, 7537,
7543, 7552, 7558, 7578, 7584, 7593, 7599, 7610,
7616, 7625, 7631, 7656, 7662, 7671, 7677, 7688,
7694, 7703, 7709, 7729, 7735, 7744, 7750, 7761
};
0, 256, 256, 256, 256, 432, 618, 630, 731, 640, 640, 828,
901, 948, 1021, 1101, 1174, 1221, 1294, 1042, 1085, 1115, 1158, 1202,
1245, 1275, 1318, 1337, 1380, 1410, 1453, 1497, 1540, 1570, 1613, 1280,
1295, 1317, 1332, 1358, 1373, 1395, 1410, 1454, 1469, 1491, 1506, 1532,
1547, 1569, 1584, 1601, 1616, 1638, 1653, 1679, 1694, 1716, 1731, 1775,
1790, 1812, 1827, 1853, 1868, 1890, 1905, 1727, 1733, 1742, 1748, 1759,
1765, 1774, 1780, 1800, 1806, 1815, 1821, 1832, 1838, 1847, 1853, 1878,
1884, 1893, 1899, 1910, 1916, 1925, 1931, 1951, 1957, 1966, 1972, 1983,
1989, 1998, 2004, 2027, 2033, 2042, 2048, 2059, 2065, 2074, 2080, 2100,
2106, 2115, 2121, 2132, 2138, 2147, 2153, 2178, 2184, 2193, 2199, 2210,
2216, 2225, 2231, 2251, 2257, 2266, 2272, 2283, 2289, 2298, 2304, 2168,
2174, 2183, 2189, 2200, 2206, 2215, 2221, 2241, 2247, 2256, 2262, 2273,
2279, 2288, 2294, 2319, 2325, 2334, 2340, 2351, 2357, 2366, 2372, 2392,
2398, 2407, 2413, 2424, 2430, 2439, 2445, 2468, 2474, 2483, 2489, 2500,
2506, 2515, 2521, 2541, 2547, 2556, 2562, 2573, 2579, 2588, 2594, 2619,
2625, 2634, 2640, 2651, 2657, 2666, 2672, 2692, 2698, 2707, 2713, 2724,
2730, 2739, 2745, 2540, 2546, 2555, 2561, 2572, 2578, 2587, 2593, 2613,
2619, 2628, 2634, 2645, 2651, 2660, 2666, 2691, 2697, 2706, 2712, 2723,
2729, 2738, 2744, 2764, 2770, 2779, 2785, 2796, 2802, 2811, 2817, 2840,
2846, 2855, 2861, 2872, 2878, 2887, 2893, 2913, 2919, 2928, 2934, 2945,
2951, 2960, 2966, 2991, 2997, 3006, 3012, 3023, 3029, 3038, 3044, 3064,
3070, 3079, 3085, 3096, 3102, 3111, 3117, 2981, 2987, 2996, 3002, 3013,
3019, 3028, 3034, 3054, 3060, 3069, 3075, 3086, 3092, 3101, 3107, 3132,
3138, 3147, 3153, 3164, 3170, 3179, 3185, 3205, 3211, 3220, 3226, 3237,
3243, 3252, 3258, 3281, 3287, 3296, 3302, 3313, 3319, 3328, 3334, 3354,
3360, 3369, 3375, 3386, 3392, 3401, 3407, 3432, 3438, 3447, 3453, 3464,
3470, 3479, 3485, 3505, 3511, 3520, 3526, 3537, 3543, 3552, 3558, 2816,
2822, 2831, 2837, 2848, 2854, 2863, 2869, 2889, 2895, 2904, 2910, 2921,
2927, 2936, 2942, 2967, 2973, 2982, 2988, 2999, 3005, 3014, 3020, 3040,
3046, 3055, 3061, 3072, 3078, 3087, 3093, 3116, 3122, 3131, 3137, 3148,
3154, 3163, 3169, 3189, 3195, 3204, 3210, 3221, 3227, 3236, 3242, 3267,
3273, 3282, 3288, 3299, 3305, 3314, 3320, 3340, 3346, 3355, 3361, 3372,
3378, 3387, 3393, 3257, 3263, 3272, 3278, 3289, 3295, 3304, 3310, 3330,
3336, 3345, 3351, 3362, 3368, 3377, 3383, 3408, 3414, 3423, 3429, 3440,
3446, 3455, 3461, 3481, 3487, 3496, 3502, 3513, 3519, 3528, 3534, 3557,
3563, 3572, 3578, 3589, 3595, 3604, 3610, 3630, 3636, 3645, 3651, 3662,
3668, 3677, 3683, 3708, 3714, 3723, 3729, 3740, 3746, 3755, 3761, 3781,
3787, 3796, 3802, 3813, 3819, 3828, 3834, 3629, 3635, 3644, 3650, 3661,
3667, 3676, 3682, 3702, 3708, 3717, 3723, 3734, 3740, 3749, 3755, 3780,
3786, 3795, 3801, 3812, 3818, 3827, 3833, 3853, 3859, 3868, 3874, 3885,
3891, 3900, 3906, 3929, 3935, 3944, 3950, 3961, 3967, 3976, 3982, 4002,
4008, 4017, 4023, 4034, 4040, 4049, 4055, 4080, 4086, 4095, 4101, 4112,
4118, 4127, 4133, 4153, 4159, 4168, 4174, 4185, 4191, 4200, 4206, 4070,
4076, 4085, 4091, 4102, 4108, 4117, 4123, 4143, 4149, 4158, 4164, 4175,
4181, 4190, 4196, 4221, 4227, 4236, 4242, 4253, 4259, 4268, 4274, 4294,
4300, 4309, 4315, 4326, 4332, 4341, 4347, 4370, 4376, 4385, 4391, 4402,
4408, 4417, 4423, 4443, 4449, 4458, 4464, 4475, 4481, 4490, 4496, 4521,
4527, 4536, 4542, 4553, 4559, 4568, 4574, 4594, 4600, 4609, 4615, 4626,
4632, 4641, 4647, 3515, 3521, 3530, 3536, 3547, 3553, 3562, 3568, 3588,
3594, 3603, 3609, 3620, 3626, 3635, 3641, 3666, 3672, 3681, 3687, 3698,
3704, 3713, 3719, 3739, 3745, 3754, 3760, 3771, 3777, 3786, 3792, 3815,
3821, 3830, 3836, 3847, 3853, 3862, 3868, 3888, 3894, 3903, 3909, 3920,
3926, 3935, 3941, 3966, 3972, 3981, 3987, 3998, 4004, 4013, 4019, 4039,
4045, 4054, 4060, 4071, 4077, 4086, 4092, 3956, 3962, 3971, 3977, 3988,
3994, 4003, 4009, 4029, 4035, 4044, 4050, 4061, 4067, 4076, 4082, 4107,
4113, 4122, 4128, 4139, 4145, 4154, 4160, 4180, 4186, 4195, 4201, 4212,
4218, 4227, 4233, 4256, 4262, 4271, 4277, 4288, 4294, 4303, 4309, 4329,
4335, 4344, 4350, 4361, 4367, 4376, 4382, 4407, 4413, 4422, 4428, 4439,
4445, 4454, 4460, 4480, 4486, 4495, 4501, 4512, 4518, 4527, 4533, 4328,
4334, 4343, 4349, 4360, 4366, 4375, 4381, 4401, 4407, 4416, 4422, 4433,
4439, 4448, 4454, 4479, 4485, 4494, 4500, 4511, 4517, 4526, 4532, 4552,
4558, 4567, 4573, 4584, 4590, 4599, 4605, 4628, 4634, 4643, 4649, 4660,
4666, 4675, 4681, 4701, 4707, 4716, 4722, 4733, 4739, 4748, 4754, 4779,
4785, 4794, 4800, 4811, 4817, 4826, 4832, 4852, 4858, 4867, 4873, 4884,
4890, 4899, 4905, 4769, 4775, 4784, 4790, 4801, 4807, 4816, 4822, 4842,
4848, 4857, 4863, 4874, 4880, 4889, 4895, 4920, 4926, 4935, 4941, 4952,
4958, 4967, 4973, 4993, 4999, 5008, 5014, 5025, 5031, 5040, 5046, 5069,
5075, 5084, 5090, 5101, 5107, 5116, 5122, 5142, 5148, 5157, 5163, 5174,
5180, 5189, 5195, 5220, 5226, 5235, 5241, 5252, 5258, 5267, 5273, 5293,
5299, 5308, 5314, 5325, 5331, 5340, 5346, 4604, 4610, 4619, 4625, 4636,
4642, 4651, 4657, 4677, 4683, 4692, 4698, 4709, 4715, 4724, 4730, 4755,
4761, 4770, 4776, 4787, 4793, 4802, 4808, 4828, 4834, 4843, 4849, 4860,
4866, 4875, 4881, 4904, 4910, 4919, 4925, 4936, 4942, 4951, 4957, 4977,
4983, 4992, 4998, 5009, 5015, 5024, 5030, 5055, 5061, 5070, 5076, 5087,
5093, 5102, 5108, 5128, 5134, 5143, 5149, 5160, 5166, 5175, 5181, 5045,
5051, 5060, 5066, 5077, 5083, 5092, 5098, 5118, 5124, 5133, 5139, 5150,
5156, 5165, 5171, 5196, 5202, 5211, 5217, 5228, 5234, 5243, 5249, 5269,
5275, 5284, 5290, 5301, 5307, 5316, 5322, 5345, 5351, 5360, 5366, 5377,
5383, 5392, 5398, 5418, 5424, 5433, 5439, 5450, 5456, 5465, 5471, 5496,
5502, 5511, 5517, 5528, 5534, 5543, 5549, 5569, 5575, 5584, 5590, 5601,
5607, 5616, 5622, 5417, 5423, 5432, 5438, 5449, 5455, 5464, 5470, 5490,
5496, 5505, 5511, 5522, 5528, 5537, 5543, 5568, 5574, 5583, 5589, 5600,
5606, 5615, 5621, 5641, 5647, 5656, 5662, 5673, 5679, 5688, 5694, 5717,
5723, 5732, 5738, 5749, 5755, 5764, 5770, 5790, 5796, 5805, 5811, 5822,
5828, 5837, 5843, 5868, 5874, 5883, 5889, 5900, 5906, 5915, 5921, 5941,
5947, 5956, 5962, 5973, 5979, 5988, 5994, 5858, 5864, 5873, 5879, 5890,
5896, 5905, 5911, 5931, 5937, 5946, 5952, 5963, 5969, 5978, 5984, 6009,
6015, 6024, 6030, 6041, 6047, 6056, 6062, 6082, 6088, 6097, 6103, 6114,
6120, 6129, 6135, 6158, 6164, 6173, 6179, 6190, 6196, 6205, 6211, 6231,
6237, 6246, 6252, 6263, 6269, 6278, 6284, 6309, 6315, 6324, 6330, 6341,
6347, 6356, 6362, 6382, 6388, 6397, 6403, 6414, 6420, 6429, 6435, 3515,
3521, 3530, 3536, 3547, 3553, 3562, 3568, 3588, 3594, 3603, 3609, 3620,
3626, 3635, 3641, 3666, 3672, 3681, 3687, 3698, 3704, 3713, 3719, 3739,
3745, 3754, 3760, 3771, 3777, 3786, 3792, 3815, 3821, 3830, 3836, 3847,
3853, 3862, 3868, 3888, 3894, 3903, 3909, 3920, 3926, 3935, 3941, 3966,
3972, 3981, 3987, 3998, 4004, 4013, 4019, 4039, 4045, 4054, 4060, 4071,
4077, 4086, 4092, 3956, 3962, 3971, 3977, 3988, 3994, 4003, 4009, 4029,
4035, 4044, 4050, 4061, 4067, 4076, 4082, 4107, 4113, 4122, 4128, 4139,
4145, 4154, 4160, 4180, 4186, 4195, 4201, 4212, 4218, 4227, 4233, 4256,
4262, 4271, 4277, 4288, 4294, 4303, 4309, 4329, 4335, 4344, 4350, 4361,
4367, 4376, 4382, 4407, 4413, 4422, 4428, 4439, 4445, 4454, 4460, 4480,
4486, 4495, 4501, 4512, 4518, 4527, 4533, 4328, 4334, 4343, 4349, 4360,
4366, 4375, 4381, 4401, 4407, 4416, 4422, 4433, 4439, 4448, 4454, 4479,
4485, 4494, 4500, 4511, 4517, 4526, 4532, 4552, 4558, 4567, 4573, 4584,
4590, 4599, 4605, 4628, 4634, 4643, 4649, 4660, 4666, 4675, 4681, 4701,
4707, 4716, 4722, 4733, 4739, 4748, 4754, 4779, 4785, 4794, 4800, 4811,
4817, 4826, 4832, 4852, 4858, 4867, 4873, 4884, 4890, 4899, 4905, 4769,
4775, 4784, 4790, 4801, 4807, 4816, 4822, 4842, 4848, 4857, 4863, 4874,
4880, 4889, 4895, 4920, 4926, 4935, 4941, 4952, 4958, 4967, 4973, 4993,
4999, 5008, 5014, 5025, 5031, 5040, 5046, 5069, 5075, 5084, 5090, 5101,
5107, 5116, 5122, 5142, 5148, 5157, 5163, 5174, 5180, 5189, 5195, 5220,
5226, 5235, 5241, 5252, 5258, 5267, 5273, 5293, 5299, 5308, 5314, 5325,
5331, 5340, 5346, 4604, 4610, 4619, 4625, 4636, 4642, 4651, 4657, 4677,
4683, 4692, 4698, 4709, 4715, 4724, 4730, 4755, 4761, 4770, 4776, 4787,
4793, 4802, 4808, 4828, 4834, 4843, 4849, 4860, 4866, 4875, 4881, 4904,
4910, 4919, 4925, 4936, 4942, 4951, 4957, 4977, 4983, 4992, 4998, 5009,
5015, 5024, 5030, 5055, 5061, 5070, 5076, 5087, 5093, 5102, 5108, 5128,
5134, 5143, 5149, 5160, 5166, 5175, 5181, 5045, 5051, 5060, 5066, 5077,
5083, 5092, 5098, 5118, 5124, 5133, 5139, 5150, 5156, 5165, 5171, 5196,
5202, 5211, 5217, 5228, 5234, 5243, 5249, 5269, 5275, 5284, 5290, 5301,
5307, 5316, 5322, 5345, 5351, 5360, 5366, 5377, 5383, 5392, 5398, 5418,
5424, 5433, 5439, 5450, 5456, 5465, 5471, 5496, 5502, 5511, 5517, 5528,
5534, 5543, 5549, 5569, 5575, 5584, 5590, 5601, 5607, 5616, 5622, 5417,
5423, 5432, 5438, 5449, 5455, 5464, 5470, 5490, 5496, 5505, 5511, 5522,
5528, 5537, 5543, 5568, 5574, 5583, 5589, 5600, 5606, 5615, 5621, 5641,
5647, 5656, 5662, 5673, 5679, 5688, 5694, 5717, 5723, 5732, 5738, 5749,
5755, 5764, 5770, 5790, 5796, 5805, 5811, 5822, 5828, 5837, 5843, 5868,
5874, 5883, 5889, 5900, 5906, 5915, 5921, 5941, 5947, 5956, 5962, 5973,
5979, 5988, 5994, 5858, 5864, 5873, 5879, 5890, 5896, 5905, 5911, 5931,
5937, 5946, 5952, 5963, 5969, 5978, 5984, 6009, 6015, 6024, 6030, 6041,
6047, 6056, 6062, 6082, 6088, 6097, 6103, 6114, 6120, 6129, 6135, 6158,
6164, 6173, 6179, 6190, 6196, 6205, 6211, 6231, 6237, 6246, 6252, 6263,
6269, 6278, 6284, 6309, 6315, 6324, 6330, 6341, 6347, 6356, 6362, 6382,
6388, 6397, 6403, 6414, 6420, 6429, 6435, 5303, 5309, 5318, 5324, 5335,
5341, 5350, 5356, 5376, 5382, 5391, 5397, 5408, 5414, 5423, 5429, 5454,
5460, 5469, 5475, 5486, 5492, 5501, 5507, 5527, 5533, 5542, 5548, 5559,
5565, 5574, 5580, 5603, 5609, 5618, 5624, 5635, 5641, 5650, 5656, 5676,
5682, 5691, 5697, 5708, 5714, 5723, 5729, 5754, 5760, 5769, 5775, 5786,
5792, 5801, 5807, 5827, 5833, 5842, 5848, 5859, 5865, 5874, 5880, 5744,
5750, 5759, 5765, 5776, 5782, 5791, 5797, 5817, 5823, 5832, 5838, 5849,
5855, 5864, 5870, 5895, 5901, 5910, 5916, 5927, 5933, 5942, 5948, 5968,
5974, 5983, 5989, 6000, 6006, 6015, 6021, 6044, 6050, 6059, 6065, 6076,
6082, 6091, 6097, 6117, 6123, 6132, 6138, 6149, 6155, 6164, 6170, 6195,
6201, 6210, 6216, 6227, 6233, 6242, 6248, 6268, 6274, 6283, 6289, 6300,
6306, 6315, 6321, 6116, 6122, 6131, 6137, 6148, 6154, 6163, 6169, 6189,
6195, 6204, 6210, 6221, 6227, 6236, 6242, 6267, 6273, 6282, 6288, 6299,
6305, 6314, 6320, 6340, 6346, 6355, 6361, 6372, 6378, 6387, 6393, 6416,
6422, 6431, 6437, 6448, 6454, 6463, 6469, 6489, 6495, 6504, 6510, 6521,
6527, 6536, 6542, 6567, 6573, 6582, 6588, 6599, 6605, 6614, 6620, 6640,
6646, 6655, 6661, 6672, 6678, 6687, 6693, 6557, 6563, 6572, 6578, 6589,
6595, 6604, 6610, 6630, 6636, 6645, 6651, 6662, 6668, 6677, 6683, 6708,
6714, 6723, 6729, 6740, 6746, 6755, 6761, 6781, 6787, 6796, 6802, 6813,
6819, 6828, 6834, 6857, 6863, 6872, 6878, 6889, 6895, 6904, 6910, 6930,
6936, 6945, 6951, 6962, 6968, 6977, 6983, 7008, 7014, 7023, 7029, 7040,
7046, 7055, 7061, 7081, 7087, 7096, 7102, 7113, 7119, 7128, 7134, 6392,
6398, 6407, 6413, 6424, 6430, 6439, 6445, 6465, 6471, 6480, 6486, 6497,
6503, 6512, 6518, 6543, 6549, 6558, 6564, 6575, 6581, 6590, 6596, 6616,
6622, 6631, 6637, 6648, 6654, 6663, 6669, 6692, 6698, 6707, 6713, 6724,
6730, 6739, 6745, 6765, 6771, 6780, 6786, 6797, 6803, 6812, 6818, 6843,
6849, 6858, 6864, 6875, 6881, 6890, 6896, 6916, 6922, 6931, 6937, 6948,
6954, 6963, 6969, 6833, 6839, 6848, 6854, 6865, 6871, 6880, 6886, 6906,
6912, 6921, 6927, 6938, 6944, 6953, 6959, 6984, 6990, 6999, 7005, 7016,
7022, 7031, 7037, 7057, 7063, 7072, 7078, 7089, 7095, 7104, 7110, 7133,
7139, 7148, 7154, 7165, 7171, 7180, 7186, 7206, 7212, 7221, 7227, 7238,
7244, 7253, 7259, 7284, 7290, 7299, 7305, 7316, 7322, 7331, 7337, 7357,
7363, 7372, 7378, 7389, 7395, 7404, 7410, 7205, 7211, 7220, 7226, 7237,
7243, 7252, 7258, 7278, 7284, 7293, 7299, 7310, 7316, 7325, 7331, 7356,
7362, 7371, 7377, 7388, 7394, 7403, 7409, 7429, 7435, 7444, 7450, 7461,
7467, 7476, 7482, 7505, 7511, 7520, 7526, 7537, 7543, 7552, 7558, 7578,
7584, 7593, 7599, 7610, 7616, 7625, 7631, 7656, 7662, 7671, 7677, 7688,
7694, 7703, 7709, 7729, 7735, 7744, 7750, 7761};
//------------------------------------------------------------------------------
// Tables for level coding
const uint8_t VP8EncBands[16 + 1] = {
0, 1, 2, 3, 6, 4, 5, 6, 6, 6, 6, 6, 6, 6, 6, 7,
0 // sentinel
0, 1, 2, 3, 6, 4, 5, 6, 6, 6, 6, 6, 6, 6, 6, 7,
0 // sentinel
};
//------------------------------------------------------------------------------

View File

@@ -38,48 +38,48 @@ static int GetResidualCost_MIPS32(int ctx0, const VP8Residual* const res) {
return VP8BitCost(0, p0);
}
__asm__ volatile (
".set push \n\t"
".set noreorder \n\t"
"subu %[temp1], %[res_last], %[n] \n\t"
"sll %[temp0], %[n], 1 \n\t"
"blez %[temp1], 2f \n\t"
" addu %[res_coeffs], %[res_coeffs], %[temp0] \n\t"
"1: \n\t"
"lh %[v_reg], 0(%[res_coeffs]) \n\t"
"addiu %[n], %[n], 1 \n\t"
"negu %[temp0], %[v_reg] \n\t"
"slti %[temp1], %[v_reg], 0 \n\t"
"movn %[v_reg], %[temp0], %[temp1] \n\t"
"sltiu %[temp0], %[v_reg], 2 \n\t"
"move %[ctx_reg], %[v_reg] \n\t"
"movz %[ctx_reg], %[const_2], %[temp0] \n\t"
"sll %[temp1], %[v_reg], 1 \n\t"
"addu %[temp1], %[temp1], %[VP8LevelFixedCosts] \n\t"
"lhu %[temp1], 0(%[temp1]) \n\t"
"slt %[temp0], %[v_reg], %[const_max_level] \n\t"
"movz %[v_reg], %[const_max_level], %[temp0] \n\t"
"addu %[cost], %[cost], %[temp1] \n\t"
"sll %[v_reg], %[v_reg], 1 \n\t"
"sll %[ctx_reg], %[ctx_reg], 2 \n\t"
"addu %[v_reg], %[v_reg], %[t] \n\t"
"lhu %[temp0], 0(%[v_reg]) \n\t"
"addu %[p_costs], %[p_costs], %[inc_p_costs] \n\t"
"addu %[t], %[p_costs], %[ctx_reg] \n\t"
"addu %[cost], %[cost], %[temp0] \n\t"
"addiu %[res_coeffs], %[res_coeffs], 2 \n\t"
"bne %[n], %[res_last], 1b \n\t"
" lw %[t], 0(%[t]) \n\t"
"2: \n\t"
".set pop \n\t"
: [cost]"+&r"(cost), [t]"+&r"(t), [n]"+&r"(n), [v_reg]"=&r"(v_reg),
[ctx_reg]"=&r"(ctx_reg), [p_costs]"+&r"(p_costs), [temp0]"=&r"(temp0),
[temp1]"=&r"(temp1), [res_coeffs]"+&r"(res_coeffs)
: [const_2]"r"(const_2), [const_max_level]"r"(const_max_level),
[VP8LevelFixedCosts]"r"(VP8LevelFixedCosts), [res_last]"r"(res_last),
[inc_p_costs]"r"(inc_p_costs)
: "memory"
);
__asm__ volatile(
".set push \n\t"
".set noreorder \n\t"
"subu %[temp1], %[res_last], %[n] \n\t"
"sll %[temp0], %[n], 1 \n\t"
"blez %[temp1], 2f \n\t"
" addu %[res_coeffs], %[res_coeffs], %[temp0] \n\t"
"1: \n\t"
"lh %[v_reg], 0(%[res_coeffs]) \n\t"
"addiu %[n], %[n], 1 \n\t"
"negu %[temp0], %[v_reg] \n\t"
"slti %[temp1], %[v_reg], 0 \n\t"
"movn %[v_reg], %[temp0], %[temp1] \n\t"
"sltiu %[temp0], %[v_reg], 2 \n\t"
"move %[ctx_reg], %[v_reg] \n\t"
"movz %[ctx_reg], %[const_2], %[temp0] \n\t"
"sll %[temp1], %[v_reg], 1 \n\t"
"addu %[temp1], %[temp1], %[VP8LevelFixedCosts] \n\t"
"lhu %[temp1], 0(%[temp1]) \n\t"
"slt %[temp0], %[v_reg], %[const_max_level] \n\t"
"movz %[v_reg], %[const_max_level], %[temp0] \n\t"
"addu %[cost], %[cost], %[temp1] \n\t"
"sll %[v_reg], %[v_reg], 1 \n\t"
"sll %[ctx_reg], %[ctx_reg], 2 \n\t"
"addu %[v_reg], %[v_reg], %[t] \n\t"
"lhu %[temp0], 0(%[v_reg]) \n\t"
"addu %[p_costs], %[p_costs], %[inc_p_costs] \n\t"
"addu %[t], %[p_costs], %[ctx_reg] \n\t"
"addu %[cost], %[cost], %[temp0] \n\t"
"addiu %[res_coeffs], %[res_coeffs], 2 \n\t"
"bne %[n], %[res_last], 1b \n\t"
" lw %[t], 0(%[t]) \n\t"
"2: \n\t"
".set pop \n\t"
: [cost] "+&r"(cost), [t] "+&r"(t), [n] "+&r"(n), [v_reg] "=&r"(v_reg),
[ctx_reg] "=&r"(ctx_reg), [p_costs] "+&r"(p_costs),
[temp0] "=&r"(temp0), [temp1] "=&r"(temp1),
[res_coeffs] "+&r"(res_coeffs)
: [const_2] "r"(const_2), [const_max_level] "r"(const_max_level),
[VP8LevelFixedCosts] "r"(VP8LevelFixedCosts), [res_last] "r"(res_last),
[inc_p_costs] "r"(inc_p_costs)
: "memory");
// Last coefficient is always non-zero
{
@@ -102,37 +102,35 @@ static void SetResidualCoeffs_MIPS32(const int16_t* WEBP_RESTRICT const coeffs,
int temp0, temp1, temp2, n, n1;
assert(res->first == 0 || coeffs[0] == 0);
__asm__ volatile (
".set push \n\t"
".set noreorder \n\t"
"addiu %[p_coeffs], %[p_coeffs], 28 \n\t"
"li %[n], 15 \n\t"
"li %[temp2], -1 \n\t"
"0: \n\t"
"ulw %[temp0], 0(%[p_coeffs]) \n\t"
"beqz %[temp0], 1f \n\t"
__asm__ volatile(
".set push \n\t"
".set noreorder \n\t"
"addiu %[p_coeffs], %[p_coeffs], 28 \n\t"
"li %[n], 15 \n\t"
"li %[temp2], -1 \n\t"
"0: \n\t"
"ulw %[temp0], 0(%[p_coeffs]) \n\t"
"beqz %[temp0], 1f \n\t"
#if defined(WORDS_BIGENDIAN)
" sll %[temp1], %[temp0], 16 \n\t"
" sll %[temp1], %[temp0], 16 \n\t"
#else
" srl %[temp1], %[temp0], 16 \n\t"
" srl %[temp1], %[temp0], 16 \n\t"
#endif
"addiu %[n1], %[n], -1 \n\t"
"movz %[temp0], %[n1], %[temp1] \n\t"
"movn %[temp0], %[n], %[temp1] \n\t"
"j 2f \n\t"
" addiu %[temp2], %[temp0], 0 \n\t"
"1: \n\t"
"addiu %[n], %[n], -2 \n\t"
"bgtz %[n], 0b \n\t"
" addiu %[p_coeffs], %[p_coeffs], -4 \n\t"
"2: \n\t"
".set pop \n\t"
: [p_coeffs]"+&r"(p_coeffs), [temp0]"=&r"(temp0),
[temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
[n]"=&r"(n), [n1]"=&r"(n1)
:
: "memory"
);
"addiu %[n1], %[n], -1 \n\t"
"movz %[temp0], %[n1], %[temp1] \n\t"
"movn %[temp0], %[n], %[temp1] \n\t"
"j 2f \n\t"
" addiu %[temp2], %[temp0], 0 \n\t"
"1: \n\t"
"addiu %[n], %[n], -2 \n\t"
"bgtz %[n], 0b \n\t"
" addiu %[p_coeffs], %[p_coeffs], -4 \n\t"
"2: \n\t"
".set pop \n\t"
: [p_coeffs] "+&r"(p_coeffs), [temp0] "=&r"(temp0), [temp1] "=&r"(temp1),
[temp2] "=&r"(temp2), [n] "=&r"(n), [n1] "=&r"(n1)
:
: "memory");
res->last = temp2;
res->coeffs = coeffs;
}

View File

@@ -38,43 +38,44 @@ static int GetResidualCost_MIPSdspR2(int ctx0, const VP8Residual* const res) {
return VP8BitCost(0, p0);
}
__asm__ volatile (
".set push \n\t"
".set noreorder \n\t"
"subu %[temp1], %[res_last], %[n] \n\t"
"blez %[temp1], 2f \n\t"
" nop \n\t"
"1: \n\t"
"sll %[temp0], %[n], 1 \n\t"
"lhx %[v_reg], %[temp0](%[res_coeffs]) \n\t"
"addiu %[n], %[n], 1 \n\t"
"absq_s.w %[v_reg], %[v_reg] \n\t"
"sltiu %[temp0], %[v_reg], 2 \n\t"
"move %[ctx_reg], %[v_reg] \n\t"
"movz %[ctx_reg], %[const_2], %[temp0] \n\t"
"sll %[temp1], %[v_reg], 1 \n\t"
"lhx %[temp1], %[temp1](%[VP8LevelFixedCosts]) \n\t"
"slt %[temp0], %[v_reg], %[const_max_level] \n\t"
"movz %[v_reg], %[const_max_level], %[temp0] \n\t"
"addu %[cost], %[cost], %[temp1] \n\t"
"sll %[v_reg], %[v_reg], 1 \n\t"
"sll %[ctx_reg], %[ctx_reg], 2 \n\t"
"lhx %[temp0], %[v_reg](%[t]) \n\t"
"addu %[p_costs], %[p_costs], %[inc_p_costs] \n\t"
"addu %[t], %[p_costs], %[ctx_reg] \n\t"
"addu %[cost], %[cost], %[temp0] \n\t"
"bne %[n], %[res_last], 1b \n\t"
" lw %[t], 0(%[t]) \n\t"
"2: \n\t"
".set pop \n\t"
: [cost]"+&r"(cost), [t]"+&r"(t), [n]"+&r"(n), [v_reg]"=&r"(v_reg),
[ctx_reg]"=&r"(ctx_reg), [p_costs]"+&r"(p_costs), [temp0]"=&r"(temp0),
[temp1]"=&r"(temp1)
: [const_2]"r"(const_2), [const_max_level]"r"(const_max_level),
[VP8LevelFixedCosts]"r"(VP8LevelFixedCosts), [res_last]"r"(res_last),
[res_coeffs]"r"(res_coeffs), [inc_p_costs]"r"(inc_p_costs)
: "memory"
);
__asm__ volatile(
".set push \n\t"
".set noreorder \n\t"
"subu %[temp1], %[res_last], %[n] \n\t"
"blez %[temp1], 2f \n\t"
" nop \n\t"
"1: "
"\n\t"
"sll %[temp0], %[n], 1 \n\t"
"lhx %[v_reg], %[temp0](%[res_coeffs]) \n\t"
"addiu %[n], %[n], 1 \n\t"
"absq_s.w %[v_reg], %[v_reg] \n\t"
"sltiu %[temp0], %[v_reg], 2 \n\t"
"move %[ctx_reg], %[v_reg] \n\t"
"movz %[ctx_reg], %[const_2], %[temp0] \n\t"
"sll %[temp1], %[v_reg], 1 \n\t"
"lhx %[temp1], %[temp1](%[VP8LevelFixedCosts]) \n\t"
"slt %[temp0], %[v_reg], %[const_max_level] \n\t"
"movz %[v_reg], %[const_max_level], %[temp0] \n\t"
"addu %[cost], %[cost], %[temp1] \n\t"
"sll %[v_reg], %[v_reg], 1 \n\t"
"sll %[ctx_reg], %[ctx_reg], 2 \n\t"
"lhx %[temp0], %[v_reg](%[t]) \n\t"
"addu %[p_costs], %[p_costs], %[inc_p_costs] \n\t"
"addu %[t], %[p_costs], %[ctx_reg] \n\t"
"addu %[cost], %[cost], %[temp0] \n\t"
"bne %[n], %[res_last], 1b \n\t"
" lw %[t], 0(%[t]) \n\t"
"2: "
"\n\t"
".set pop \n\t"
: [cost] "+&r"(cost), [t] "+&r"(t), [n] "+&r"(n), [v_reg] "=&r"(v_reg),
[ctx_reg] "=&r"(ctx_reg), [p_costs] "+&r"(p_costs),
[temp0] "=&r"(temp0), [temp1] "=&r"(temp1)
: [const_2] "r"(const_2), [const_max_level] "r"(const_max_level),
[VP8LevelFixedCosts] "r"(VP8LevelFixedCosts), [res_last] "r"(res_last),
[res_coeffs] "r"(res_coeffs), [inc_p_costs] "r"(inc_p_costs)
: "memory");
// Last coefficient is always non-zero
{

View File

@@ -16,8 +16,8 @@
#include "src/dsp/neon.h"
#include "src/enc/cost_enc.h"
static const uint8_t position[16] = { 1, 2, 3, 4, 5, 6, 7, 8,
9, 10, 11, 12, 13, 14, 15, 16 };
static const uint8_t position[16] = {1, 2, 3, 4, 5, 6, 7, 8,
9, 10, 11, 12, 13, 14, 15, 16};
static void SetResidualCoeffs_NEON(const int16_t* WEBP_RESTRICT const coeffs,
VP8Residual* WEBP_RESTRICT const res) {
@@ -65,7 +65,7 @@ static int GetResidualCost_NEON(int ctx0, const VP8Residual* const res) {
return VP8BitCost(0, p0);
}
{ // precompute clamped levels and contexts, packed to 8b.
{ // precompute clamped levels and contexts, packed to 8b.
const uint8x16_t kCst2 = vdupq_n_u8(2);
const uint8x16_t kCst67 = vdupq_n_u8(MAX_VARIABLE_LEVEL);
const int16x8_t c0 = vld1q_s16(res->coeffs);
@@ -85,7 +85,7 @@ static int GetResidualCost_NEON(int ctx0, const VP8Residual* const res) {
for (; n < res->last; ++n) {
const int ctx = ctxs[n];
const int level = levels[n];
const int flevel = abs_levels[n]; // full level
const int flevel = abs_levels[n]; // full level
cost += VP8LevelFixedCosts[flevel] + t[level]; // simplified VP8LevelCost()
t = costs[n + 1][ctx];
}

View File

@@ -14,15 +14,14 @@
#include "src/dsp/dsp.h"
#if defined(WEBP_USE_SSE2)
#include <assert.h>
#include <emmintrin.h>
#include <assert.h>
#include "src/webp/types.h"
#include "src/dsp/cpu.h"
#include "src/enc/cost_enc.h"
#include "src/enc/vp8i_enc.h"
#include "src/utils/utils.h"
#include "src/webp/types.h"
//------------------------------------------------------------------------------
@@ -63,7 +62,7 @@ static int GetResidualCost_SSE2(int ctx0, const VP8Residual* const res) {
return VP8BitCost(0, p0);
}
{ // precompute clamped levels and contexts, packed to 8b.
{ // precompute clamped levels and contexts, packed to 8b.
const __m128i zero = _mm_setzero_si128();
const __m128i kCst2 = _mm_set1_epi8(2);
const __m128i kCst67 = _mm_set1_epi8(MAX_VARIABLE_LEVEL);
@@ -71,11 +70,11 @@ static int GetResidualCost_SSE2(int ctx0, const VP8Residual* const res) {
const __m128i c1 = _mm_loadu_si128((const __m128i*)&res->coeffs[8]);
const __m128i D0 = _mm_sub_epi16(zero, c0);
const __m128i D1 = _mm_sub_epi16(zero, c1);
const __m128i E0 = _mm_max_epi16(c0, D0); // abs(v), 16b
const __m128i E0 = _mm_max_epi16(c0, D0); // abs(v), 16b
const __m128i E1 = _mm_max_epi16(c1, D1);
const __m128i F = _mm_packs_epi16(E0, E1);
const __m128i G = _mm_min_epu8(F, kCst2); // context = 0,1,2
const __m128i H = _mm_min_epu8(F, kCst67); // clamp_level in [0..67]
const __m128i G = _mm_min_epu8(F, kCst2); // context = 0,1,2
const __m128i H = _mm_min_epu8(F, kCst67); // clamp_level in [0..67]
_mm_storeu_si128((__m128i*)&ctxs[0], G);
_mm_storeu_si128((__m128i*)&levels[0], H);
@@ -86,7 +85,7 @@ static int GetResidualCost_SSE2(int ctx0, const VP8Residual* const res) {
for (; n < res->last; ++n) {
const int ctx = ctxs[n];
const int level = levels[n];
const int flevel = abs_levels[n]; // full level
const int flevel = abs_levels[n]; // full level
cost += VP8LevelFixedCosts[flevel] + t[level]; // simplified VP8LevelCost()
t = costs[n + 1][ctx];
}

View File

@@ -33,19 +33,20 @@
// apple/darwin gcc-4.0.1 defines __PIC__, but not __pic__ with -fPIC.
#if (defined(__pic__) || defined(__PIC__)) && defined(__i386__)
static WEBP_INLINE void GetCPUInfo(int cpu_info[4], int info_type) {
__asm__ volatile (
"mov %%ebx, %%edi\n"
"cpuid\n"
"xchg %%edi, %%ebx\n"
: "=a"(cpu_info[0]), "=D"(cpu_info[1]), "=c"(cpu_info[2]), "=d"(cpu_info[3])
: "a"(info_type), "c"(0));
__asm__ volatile(
"mov %%ebx, %%edi\n"
"cpuid\n"
"xchg %%edi, %%ebx\n"
: "=a"(cpu_info[0]), "=D"(cpu_info[1]), "=c"(cpu_info[2]),
"=d"(cpu_info[3])
: "a"(info_type), "c"(0));
}
#elif defined(__i386__) || defined(__x86_64__)
static WEBP_INLINE void GetCPUInfo(int cpu_info[4], int info_type) {
__asm__ volatile (
"cpuid\n"
: "=a"(cpu_info[0]), "=b"(cpu_info[1]), "=c"(cpu_info[2]), "=d"(cpu_info[3])
: "a"(info_type), "c"(0));
__asm__ volatile("cpuid\n"
: "=a"(cpu_info[0]), "=b"(cpu_info[1]), "=c"(cpu_info[2]),
"=d"(cpu_info[3])
: "a"(info_type), "c"(0));
}
#elif defined(_MSC_VER) && (defined(_M_X64) || defined(_M_IX86))
@@ -67,13 +68,13 @@ static WEBP_INLINE uint64_t xgetbv(void) {
const uint32_t ecx = 0;
uint32_t eax, edx;
// Use the raw opcode for xgetbv for compatibility with older toolchains.
__asm__ volatile (
".byte 0x0f, 0x01, 0xd0\n"
: "=a"(eax), "=d"(edx) : "c" (ecx));
__asm__ volatile(".byte 0x0f, 0x01, 0xd0\n"
: "=a"(eax), "=d"(edx)
: "c"(ecx));
return ((uint64_t)edx << 32) | eax;
}
#elif (defined(_M_X64) || defined(_M_IX86)) && \
defined(_MSC_FULL_VER) && _MSC_FULL_VER >= 160040219 // >= VS2010 SP1
#elif (defined(_M_X64) || defined(_M_IX86)) && defined(_MSC_FULL_VER) && \
_MSC_FULL_VER >= 160040219 // >= VS2010 SP1
#include <immintrin.h>
#define xgetbv() _xgetbv(0)
#elif defined(_MSC_VER) && defined(_M_IX86)
@@ -100,8 +101,8 @@ static int CheckSlowModel(int info) {
// (ie 2 cycles vs 10/16 cycles) and some SSSE3 instructions like pshufb.
// Refer to Intel 64 and IA-32 Architectures Optimization Reference Manual.
static const uint8_t kSlowModels[] = {
0x37, 0x4a, 0x4d, // Silvermont Microarchitecture
0x1c, 0x26, 0x27 // Atom Microarchitecture
0x37, 0x4a, 0x4d, // Silvermont Microarchitecture
0x1c, 0x26, 0x27 // Atom Microarchitecture
};
const uint32_t model = ((info & 0xf0000) >> 12) | ((info >> 4) & 0xf);
const uint32_t family = (info >> 8) & 0xf;
@@ -130,7 +131,7 @@ static int x86CPUInfo(CPUFeature feature) {
const int VENDOR_ID_INTEL_ECX = 0x6c65746e; // letn
is_intel = (cpu_info[1] == VENDOR_ID_INTEL_EBX &&
cpu_info[2] == VENDOR_ID_INTEL_ECX &&
cpu_info[3] == VENDOR_ID_INTEL_EDX); // genuine Intel?
cpu_info[3] == VENDOR_ID_INTEL_EDX); // genuine Intel?
}
GetCPUInfo(cpu_info, 1);
@@ -141,7 +142,7 @@ static int x86CPUInfo(CPUFeature feature) {
return !!(cpu_info[2] & (1 << 0));
}
if (feature == kSlowSSSE3) {
if (is_intel && (cpu_info[2] & (1 << 9))) { // SSSE3?
if (is_intel && (cpu_info[2] & (1 << 9))) { // SSSE3?
return CheckSlowModel(cpu_info[0]);
}
return 0;
@@ -179,7 +180,7 @@ static int AndroidCPUInfo(CPUFeature feature) {
}
WEBP_EXTERN VP8CPUInfo VP8GetCPUInfo;
VP8CPUInfo VP8GetCPUInfo = AndroidCPUInfo;
#elif defined(EMSCRIPTEN) // also needs to be before generic NEON test
#elif defined(EMSCRIPTEN) // also needs to be before generic NEON test
// Use compile flags as an indicator of SIMD support instead of a runtime check.
static int wasmCPUInfo(CPUFeature feature) {
switch (feature) {
@@ -234,14 +235,13 @@ static int armCPUInfo(CPUFeature feature) {
WEBP_EXTERN VP8CPUInfo VP8GetCPUInfo;
VP8CPUInfo VP8GetCPUInfo = armCPUInfo;
#elif defined(WEBP_USE_MIPS32) || defined(WEBP_USE_MIPS_DSP_R2) || \
defined(WEBP_USE_MSA)
defined(WEBP_USE_MSA)
static int mipsCPUInfo(CPUFeature feature) {
if ((feature == kMIPS32) || (feature == kMIPSdspR2) || (feature == kMSA)) {
return 1;
} else {
return 0;
}
}
WEBP_EXTERN VP8CPUInfo VP8GetCPUInfo;
VP8CPUInfo VP8GetCPUInfo = mipsCPUInfo;

View File

@@ -119,7 +119,7 @@
// inclusion of arm64_neon.h; Visual Studio 2019 includes this file in
// arm_neon.h. Compile errors were seen with Visual Studio 2019 16.4 with
// vtbl4_u8(); a fix was made in 16.6.
#if defined(_MSC_VER) && \
#if defined(_MSC_VER) && \
((_MSC_VER >= 1700 && defined(_M_ARM)) || \
(_MSC_VER >= 1926 && (defined(_M_ARM64) || defined(_M_ARM64EC))))
#define WEBP_USE_NEON
@@ -192,6 +192,7 @@
#if defined(WEBP_USE_THREAD) && !defined(_WIN32)
#include <pthread.h> // NOLINT
// clang-format off
#define WEBP_DSP_INIT(func) \
do { \
static volatile VP8CPUInfo func##_last_cpuinfo_used = \
@@ -202,7 +203,9 @@
func##_last_cpuinfo_used = VP8GetCPUInfo; \
(void)pthread_mutex_unlock(&func##_lock); \
} while (0)
#else // !(defined(WEBP_USE_THREAD) && !defined(_WIN32))
// clang-format on
#else // !(defined(WEBP_USE_THREAD) && !defined(_WIN32))
// clang-format off
#define WEBP_DSP_INIT(func) \
do { \
static volatile VP8CPUInfo func##_last_cpuinfo_used = \
@@ -211,6 +214,7 @@
func(); \
func##_last_cpuinfo_used = VP8GetCPUInfo; \
} while (0)
// clang-format on
#endif // defined(WEBP_USE_THREAD) && !defined(_WIN32)
// Defines an Init + helper function that control multiple initialization of

View File

@@ -34,13 +34,14 @@ static WEBP_INLINE uint8_t clip_8b(int v) {
#define STORE(x, y, v) \
dst[(x) + (y) * BPS] = clip_8b(dst[(x) + (y) * BPS] + ((v) >> 3))
#define STORE2(y, dc, d, c) do { \
const int DC = (dc); \
STORE(0, y, DC + (d)); \
STORE(1, y, DC + (c)); \
STORE(2, y, DC - (c)); \
STORE(3, y, DC - (d)); \
} while (0)
#define STORE2(y, dc, d, c) \
do { \
const int DC = (dc); \
STORE(0, y, DC + (d)); \
STORE(1, y, DC + (c)); \
STORE(2, y, DC - (c)); \
STORE(3, y, DC - (d)); \
} while (0)
#if !WEBP_NEON_OMIT_C_CODE
static void TransformOne_C(const int16_t* WEBP_RESTRICT in,
@@ -48,17 +49,17 @@ static void TransformOne_C(const int16_t* WEBP_RESTRICT in,
int C[4 * 4], *tmp;
int i;
tmp = C;
for (i = 0; i < 4; ++i) { // vertical pass
const int a = in[0] + in[8]; // [-4096, 4094]
const int b = in[0] - in[8]; // [-4095, 4095]
for (i = 0; i < 4; ++i) { // vertical pass
const int a = in[0] + in[8]; // [-4096, 4094]
const int b = in[0] - in[8]; // [-4095, 4095]
const int c = WEBP_TRANSFORM_AC3_MUL2(in[4]) -
WEBP_TRANSFORM_AC3_MUL1(in[12]); // [-3783, 3783]
const int d = WEBP_TRANSFORM_AC3_MUL1(in[4]) +
WEBP_TRANSFORM_AC3_MUL2(in[12]); // [-3785, 3781]
tmp[0] = a + d; // [-7881, 7875]
tmp[1] = b + c; // [-7878, 7878]
tmp[2] = b - c; // [-7878, 7878]
tmp[3] = a - d; // [-7877, 7879]
tmp[0] = a + d; // [-7881, 7875]
tmp[1] = b + c; // [-7878, 7878]
tmp[2] = b - c; // [-7878, 7878]
tmp[3] = a - d; // [-7877, 7879]
tmp += 4;
in++;
}
@@ -70,10 +71,10 @@ static void TransformOne_C(const int16_t* WEBP_RESTRICT in,
// In the worst case scenario, the input to clip_8b() can be as large as
// [-60713, 60968].
tmp = C;
for (i = 0; i < 4; ++i) { // horizontal pass
for (i = 0; i < 4; ++i) { // horizontal pass
const int dc = tmp[0] + 4;
const int a = dc + tmp[8];
const int b = dc - tmp[8];
const int a = dc + tmp[8];
const int b = dc - tmp[8];
const int c =
WEBP_TRANSFORM_AC3_MUL2(tmp[4]) - WEBP_TRANSFORM_AC3_MUL1(tmp[12]);
const int d =
@@ -150,21 +151,21 @@ static void TransformWHT_C(const int16_t* WEBP_RESTRICT in,
int i;
for (i = 0; i < 4; ++i) {
const int a0 = in[0 + i] + in[12 + i];
const int a1 = in[4 + i] + in[ 8 + i];
const int a2 = in[4 + i] - in[ 8 + i];
const int a1 = in[4 + i] + in[8 + i];
const int a2 = in[4 + i] - in[8 + i];
const int a3 = in[0 + i] - in[12 + i];
tmp[0 + i] = a0 + a1;
tmp[8 + i] = a0 - a1;
tmp[4 + i] = a3 + a2;
tmp[0 + i] = a0 + a1;
tmp[8 + i] = a0 - a1;
tmp[4 + i] = a3 + a2;
tmp[12 + i] = a3 - a2;
}
for (i = 0; i < 4; ++i) {
const int dc = tmp[0 + i * 4] + 3; // w/ rounder
const int a0 = dc + tmp[3 + i * 4];
const int dc = tmp[0 + i * 4] + 3; // w/ rounder
const int a0 = dc + tmp[3 + i * 4];
const int a1 = tmp[1 + i * 4] + tmp[2 + i * 4];
const int a2 = tmp[1 + i * 4] - tmp[2 + i * 4];
const int a3 = dc - tmp[3 + i * 4];
out[ 0] = (a0 + a1) >> 3;
const int a3 = dc - tmp[3 + i * 4];
out[0] = (a0 + a1) >> 3;
out[16] = (a3 + a2) >> 3;
out[32] = (a0 - a1) >> 3;
out[48] = (a3 - a2) >> 3;
@@ -194,21 +195,21 @@ static WEBP_INLINE void TrueMotion(uint8_t* dst, int size) {
dst += BPS;
}
}
static void TM4_C(uint8_t* dst) { TrueMotion(dst, 4); }
static void TM4_C(uint8_t* dst) { TrueMotion(dst, 4); }
static void TM8uv_C(uint8_t* dst) { TrueMotion(dst, 8); }
static void TM16_C(uint8_t* dst) { TrueMotion(dst, 16); }
static void TM16_C(uint8_t* dst) { TrueMotion(dst, 16); }
//------------------------------------------------------------------------------
// 16x16
static void VE16_C(uint8_t* dst) { // vertical
static void VE16_C(uint8_t* dst) { // vertical
int j;
for (j = 0; j < 16; ++j) {
memcpy(dst + j * BPS, dst - BPS, 16);
}
}
static void HE16_C(uint8_t* dst) { // horizontal
static void HE16_C(uint8_t* dst) { // horizontal
int j;
for (j = 16; j > 0; --j) {
memset(dst, dst[-1], 16);
@@ -223,7 +224,7 @@ static WEBP_INLINE void Put16(int v, uint8_t* dst) {
}
}
static void DC16_C(uint8_t* dst) { // DC
static void DC16_C(uint8_t* dst) { // DC
int DC = 16;
int j;
for (j = 0; j < 16; ++j) {
@@ -232,7 +233,7 @@ static void DC16_C(uint8_t* dst) { // DC
Put16(DC >> 5, dst);
}
static void DC16NoTop_C(uint8_t* dst) { // DC with top samples not available
static void DC16NoTop_C(uint8_t* dst) { // DC with top samples not available
int DC = 8;
int j;
for (j = 0; j < 16; ++j) {
@@ -264,13 +265,13 @@ VP8PredFunc VP8PredLuma16[NUM_B_DC_MODES];
#define AVG2(a, b) (((a) + (b) + 1) >> 1)
#if !WEBP_NEON_OMIT_C_CODE
static void VE4_C(uint8_t* dst) { // vertical
static void VE4_C(uint8_t* dst) { // vertical
const uint8_t* top = dst - BPS;
const uint8_t vals[4] = {
AVG3(top[-1], top[0], top[1]),
AVG3(top[ 0], top[1], top[2]),
AVG3(top[ 1], top[2], top[3]),
AVG3(top[ 2], top[3], top[4])
AVG3(top[-1], top[0], top[1]),
AVG3(top[0], top[1], top[2]),
AVG3(top[1], top[2], top[3]),
AVG3(top[2], top[3], top[4]),
};
int i;
for (i = 0; i < 4; ++i) {
@@ -279,7 +280,7 @@ static void VE4_C(uint8_t* dst) { // vertical
}
#endif // !WEBP_NEON_OMIT_C_CODE
static void HE4_C(uint8_t* dst) { // horizontal
static void HE4_C(uint8_t* dst) { // horizontal
const int A = dst[-1 - BPS];
const int B = dst[-1];
const int C = dst[-1 + BPS];
@@ -292,7 +293,7 @@ static void HE4_C(uint8_t* dst) { // horizontal
}
#if !WEBP_NEON_OMIT_C_CODE
static void DC4_C(uint8_t* dst) { // DC
static void DC4_C(uint8_t* dst) { // DC
uint32_t dc = 4;
int i;
for (i = 0; i < 4; ++i) dc += dst[i - BPS] + dst[-1 + i * BPS];
@@ -300,7 +301,7 @@ static void DC4_C(uint8_t* dst) { // DC
for (i = 0; i < 4; ++i) memset(dst + i * BPS, dc, 4);
}
static void RD4_C(uint8_t* dst) { // Down-right
static void RD4_C(uint8_t* dst) { // Down-right
const int I = dst[-1 + 0 * BPS];
const int J = dst[-1 + 1 * BPS];
const int K = dst[-1 + 2 * BPS];
@@ -310,16 +311,16 @@ static void RD4_C(uint8_t* dst) { // Down-right
const int B = dst[1 - BPS];
const int C = dst[2 - BPS];
const int D = dst[3 - BPS];
DST(0, 3) = AVG3(J, K, L);
DST(1, 3) = DST(0, 2) = AVG3(I, J, K);
DST(2, 3) = DST(1, 2) = DST(0, 1) = AVG3(X, I, J);
DST(0, 3) = AVG3(J, K, L);
DST(1, 3) = DST(0, 2) = AVG3(I, J, K);
DST(2, 3) = DST(1, 2) = DST(0, 1) = AVG3(X, I, J);
DST(3, 3) = DST(2, 2) = DST(1, 1) = DST(0, 0) = AVG3(A, X, I);
DST(3, 2) = DST(2, 1) = DST(1, 0) = AVG3(B, A, X);
DST(3, 1) = DST(2, 0) = AVG3(C, B, A);
DST(3, 0) = AVG3(D, C, B);
DST(3, 2) = DST(2, 1) = DST(1, 0) = AVG3(B, A, X);
DST(3, 1) = DST(2, 0) = AVG3(C, B, A);
DST(3, 0) = AVG3(D, C, B);
}
static void LD4_C(uint8_t* dst) { // Down-Left
static void LD4_C(uint8_t* dst) { // Down-Left
const int A = dst[0 - BPS];
const int B = dst[1 - BPS];
const int C = dst[2 - BPS];
@@ -328,17 +329,17 @@ static void LD4_C(uint8_t* dst) { // Down-Left
const int F = dst[5 - BPS];
const int G = dst[6 - BPS];
const int H = dst[7 - BPS];
DST(0, 0) = AVG3(A, B, C);
DST(1, 0) = DST(0, 1) = AVG3(B, C, D);
DST(2, 0) = DST(1, 1) = DST(0, 2) = AVG3(C, D, E);
DST(0, 0) = AVG3(A, B, C);
DST(1, 0) = DST(0, 1) = AVG3(B, C, D);
DST(2, 0) = DST(1, 1) = DST(0, 2) = AVG3(C, D, E);
DST(3, 0) = DST(2, 1) = DST(1, 2) = DST(0, 3) = AVG3(D, E, F);
DST(3, 1) = DST(2, 2) = DST(1, 3) = AVG3(E, F, G);
DST(3, 2) = DST(2, 3) = AVG3(F, G, H);
DST(3, 3) = AVG3(G, H, H);
DST(3, 1) = DST(2, 2) = DST(1, 3) = AVG3(E, F, G);
DST(3, 2) = DST(2, 3) = AVG3(F, G, H);
DST(3, 3) = AVG3(G, H, H);
}
#endif // !WEBP_NEON_OMIT_C_CODE
static void VR4_C(uint8_t* dst) { // Vertical-Right
static void VR4_C(uint8_t* dst) { // Vertical-Right
const int I = dst[-1 + 0 * BPS];
const int J = dst[-1 + 1 * BPS];
const int K = dst[-1 + 2 * BPS];
@@ -350,17 +351,17 @@ static void VR4_C(uint8_t* dst) { // Vertical-Right
DST(0, 0) = DST(1, 2) = AVG2(X, A);
DST(1, 0) = DST(2, 2) = AVG2(A, B);
DST(2, 0) = DST(3, 2) = AVG2(B, C);
DST(3, 0) = AVG2(C, D);
DST(3, 0) = AVG2(C, D);
DST(0, 3) = AVG3(K, J, I);
DST(0, 2) = AVG3(J, I, X);
DST(0, 3) = AVG3(K, J, I);
DST(0, 2) = AVG3(J, I, X);
DST(0, 1) = DST(1, 3) = AVG3(I, X, A);
DST(1, 1) = DST(2, 3) = AVG3(X, A, B);
DST(2, 1) = DST(3, 3) = AVG3(A, B, C);
DST(3, 1) = AVG3(B, C, D);
DST(3, 1) = AVG3(B, C, D);
}
static void VL4_C(uint8_t* dst) { // Vertical-Left
static void VL4_C(uint8_t* dst) { // Vertical-Left
const int A = dst[0 - BPS];
const int B = dst[1 - BPS];
const int C = dst[2 - BPS];
@@ -369,32 +370,31 @@ static void VL4_C(uint8_t* dst) { // Vertical-Left
const int F = dst[5 - BPS];
const int G = dst[6 - BPS];
const int H = dst[7 - BPS];
DST(0, 0) = AVG2(A, B);
DST(0, 0) = AVG2(A, B);
DST(1, 0) = DST(0, 2) = AVG2(B, C);
DST(2, 0) = DST(1, 2) = AVG2(C, D);
DST(3, 0) = DST(2, 2) = AVG2(D, E);
DST(0, 1) = AVG3(A, B, C);
DST(0, 1) = AVG3(A, B, C);
DST(1, 1) = DST(0, 3) = AVG3(B, C, D);
DST(2, 1) = DST(1, 3) = AVG3(C, D, E);
DST(3, 1) = DST(2, 3) = AVG3(D, E, F);
DST(3, 2) = AVG3(E, F, G);
DST(3, 3) = AVG3(F, G, H);
DST(3, 2) = AVG3(E, F, G);
DST(3, 3) = AVG3(F, G, H);
}
static void HU4_C(uint8_t* dst) { // Horizontal-Up
static void HU4_C(uint8_t* dst) { // Horizontal-Up
const int I = dst[-1 + 0 * BPS];
const int J = dst[-1 + 1 * BPS];
const int K = dst[-1 + 2 * BPS];
const int L = dst[-1 + 3 * BPS];
DST(0, 0) = AVG2(I, J);
DST(0, 0) = AVG2(I, J);
DST(2, 0) = DST(0, 1) = AVG2(J, K);
DST(2, 1) = DST(0, 2) = AVG2(K, L);
DST(1, 0) = AVG3(I, J, K);
DST(1, 0) = AVG3(I, J, K);
DST(3, 0) = DST(1, 1) = AVG3(J, K, L);
DST(3, 1) = DST(1, 2) = AVG3(K, L, L);
DST(3, 2) = DST(2, 2) =
DST(0, 3) = DST(1, 3) = DST(2, 3) = DST(3, 3) = L;
DST(3, 2) = DST(2, 2) = DST(0, 3) = DST(1, 3) = DST(2, 3) = DST(3, 3) = L;
}
static void HD4_C(uint8_t* dst) { // Horizontal-Down
@@ -410,14 +410,14 @@ static void HD4_C(uint8_t* dst) { // Horizontal-Down
DST(0, 0) = DST(2, 1) = AVG2(I, X);
DST(0, 1) = DST(2, 2) = AVG2(J, I);
DST(0, 2) = DST(2, 3) = AVG2(K, J);
DST(0, 3) = AVG2(L, K);
DST(0, 3) = AVG2(L, K);
DST(3, 0) = AVG3(A, B, C);
DST(2, 0) = AVG3(X, A, B);
DST(3, 0) = AVG3(A, B, C);
DST(2, 0) = AVG3(X, A, B);
DST(1, 0) = DST(3, 1) = AVG3(I, X, A);
DST(1, 1) = DST(3, 2) = AVG3(J, I, X);
DST(1, 2) = DST(3, 3) = AVG3(K, J, I);
DST(1, 3) = AVG3(L, K, J);
DST(1, 3) = AVG3(L, K, J);
}
#undef DST
@@ -430,14 +430,14 @@ VP8PredFunc VP8PredLuma4[NUM_BMODES];
// Chroma
#if !WEBP_NEON_OMIT_C_CODE
static void VE8uv_C(uint8_t* dst) { // vertical
static void VE8uv_C(uint8_t* dst) { // vertical
int j;
for (j = 0; j < 8; ++j) {
memcpy(dst + j * BPS, dst - BPS, 8);
}
}
static void HE8uv_C(uint8_t* dst) { // horizontal
static void HE8uv_C(uint8_t* dst) { // horizontal
int j;
for (j = 0; j < 8; ++j) {
memset(dst, dst[-1], 8);
@@ -453,7 +453,7 @@ static WEBP_INLINE void Put8x8uv(uint8_t value, uint8_t* dst) {
}
}
static void DC8uv_C(uint8_t* dst) { // DC
static void DC8uv_C(uint8_t* dst) { // DC
int dc0 = 8;
int i;
for (i = 0; i < 8; ++i) {
@@ -462,7 +462,7 @@ static void DC8uv_C(uint8_t* dst) { // DC
Put8x8uv(dc0 >> 4, dst);
}
static void DC8uvNoLeft_C(uint8_t* dst) { // DC with no left samples
static void DC8uvNoLeft_C(uint8_t* dst) { // DC with no left samples
int dc0 = 4;
int i;
for (i = 0; i < 8; ++i) {
@@ -480,7 +480,7 @@ static void DC8uvNoTop_C(uint8_t* dst) { // DC with no top samples
Put8x8uv(dc0 >> 3, dst);
}
static void DC8uvNoTopLeft_C(uint8_t* dst) { // DC with nothing
static void DC8uvNoTopLeft_C(uint8_t* dst) { // DC with nothing
Put8x8uv(0x80, dst);
}
#endif // !WEBP_NEON_OMIT_C_CODE
@@ -493,46 +493,46 @@ VP8PredFunc VP8PredChroma8[NUM_B_DC_MODES];
#if !WEBP_NEON_OMIT_C_CODE || WEBP_NEON_WORK_AROUND_GCC
// 4 pixels in, 2 pixels out
static WEBP_INLINE void DoFilter2_C(uint8_t* p, int step) {
const int p1 = p[-2*step], p0 = p[-step], q0 = p[0], q1 = p[step];
const int p1 = p[-2 * step], p0 = p[-step], q0 = p[0], q1 = p[step];
const int a = 3 * (q0 - p0) + VP8ksclip1[p1 - q1]; // in [-893,892]
const int a1 = VP8ksclip2[(a + 4) >> 3]; // in [-16,15]
const int a2 = VP8ksclip2[(a + 3) >> 3];
p[-step] = VP8kclip1[p0 + a2];
p[ 0] = VP8kclip1[q0 - a1];
p[0] = VP8kclip1[q0 - a1];
}
// 4 pixels in, 4 pixels out
static WEBP_INLINE void DoFilter4_C(uint8_t* p, int step) {
const int p1 = p[-2*step], p0 = p[-step], q0 = p[0], q1 = p[step];
const int p1 = p[-2 * step], p0 = p[-step], q0 = p[0], q1 = p[step];
const int a = 3 * (q0 - p0);
const int a1 = VP8ksclip2[(a + 4) >> 3];
const int a2 = VP8ksclip2[(a + 3) >> 3];
const int a3 = (a1 + 1) >> 1;
p[-2*step] = VP8kclip1[p1 + a3];
p[- step] = VP8kclip1[p0 + a2];
p[ 0] = VP8kclip1[q0 - a1];
p[ step] = VP8kclip1[q1 - a3];
p[-2 * step] = VP8kclip1[p1 + a3];
p[-step] = VP8kclip1[p0 + a2];
p[0] = VP8kclip1[q0 - a1];
p[step] = VP8kclip1[q1 - a3];
}
// 6 pixels in, 6 pixels out
static WEBP_INLINE void DoFilter6_C(uint8_t* p, int step) {
const int p2 = p[-3*step], p1 = p[-2*step], p0 = p[-step];
const int q0 = p[0], q1 = p[step], q2 = p[2*step];
const int p2 = p[-3 * step], p1 = p[-2 * step], p0 = p[-step];
const int q0 = p[0], q1 = p[step], q2 = p[2 * step];
const int a = VP8ksclip1[3 * (q0 - p0) + VP8ksclip1[p1 - q1]];
// a is in [-128,127], a1 in [-27,27], a2 in [-18,18] and a3 in [-9,9]
const int a1 = (27 * a + 63) >> 7; // eq. to ((3 * a + 7) * 9) >> 7
const int a2 = (18 * a + 63) >> 7; // eq. to ((2 * a + 7) * 9) >> 7
const int a3 = (9 * a + 63) >> 7; // eq. to ((1 * a + 7) * 9) >> 7
p[-3*step] = VP8kclip1[p2 + a3];
p[-2*step] = VP8kclip1[p1 + a2];
p[- step] = VP8kclip1[p0 + a1];
p[ 0] = VP8kclip1[q0 - a1];
p[ step] = VP8kclip1[q1 - a2];
p[ 2*step] = VP8kclip1[q2 - a3];
const int a3 = (9 * a + 63) >> 7; // eq. to ((1 * a + 7) * 9) >> 7
p[-3 * step] = VP8kclip1[p2 + a3];
p[-2 * step] = VP8kclip1[p1 + a2];
p[-step] = VP8kclip1[p0 + a1];
p[0] = VP8kclip1[q0 - a1];
p[step] = VP8kclip1[q1 - a2];
p[2 * step] = VP8kclip1[q2 - a3];
}
static WEBP_INLINE int Hev(const uint8_t* p, int step, int thresh) {
const int p1 = p[-2*step], p0 = p[-step], q0 = p[0], q1 = p[step];
const int p1 = p[-2 * step], p0 = p[-step], q0 = p[0], q1 = p[step];
return (VP8kabs0[p1 - p0] > thresh) || (VP8kabs0[q1 - q0] > thresh);
}
#endif // !WEBP_NEON_OMIT_C_CODE || WEBP_NEON_WORK_AROUND_GCC
@@ -545,8 +545,8 @@ static WEBP_INLINE int NeedsFilter_C(const uint8_t* p, int step, int t) {
#endif // !WEBP_NEON_OMIT_C_CODE
#if !WEBP_NEON_OMIT_C_CODE || WEBP_NEON_WORK_AROUND_GCC
static WEBP_INLINE int NeedsFilter2_C(const uint8_t* p,
int step, int t, int it) {
static WEBP_INLINE int NeedsFilter2_C(const uint8_t* p, int step, int t,
int it) {
const int p3 = p[-4 * step], p2 = p[-3 * step], p1 = p[-2 * step];
const int p0 = p[-step], q0 = p[0];
const int q1 = p[step], q2 = p[2 * step], q3 = p[3 * step];
@@ -602,9 +602,8 @@ static void SimpleHFilter16i_C(uint8_t* p, int stride, int thresh) {
// Complex In-loop filtering (Paragraph 15.3)
#if !WEBP_NEON_OMIT_C_CODE || WEBP_NEON_WORK_AROUND_GCC
static WEBP_INLINE void FilterLoop26_C(uint8_t* p,
int hstride, int vstride, int size,
int thresh, int ithresh,
static WEBP_INLINE void FilterLoop26_C(uint8_t* p, int hstride, int vstride,
int size, int thresh, int ithresh,
int hev_thresh) {
const int thresh2 = 2 * thresh + 1;
while (size-- > 0) {
@@ -619,9 +618,8 @@ static WEBP_INLINE void FilterLoop26_C(uint8_t* p,
}
}
static WEBP_INLINE void FilterLoop24_C(uint8_t* p,
int hstride, int vstride, int size,
int thresh, int ithresh,
static WEBP_INLINE void FilterLoop24_C(uint8_t* p, int hstride, int vstride,
int size, int thresh, int ithresh,
int hev_thresh) {
const int thresh2 = 2 * thresh + 1;
while (size-- > 0) {
@@ -639,19 +637,19 @@ static WEBP_INLINE void FilterLoop24_C(uint8_t* p,
#if !WEBP_NEON_OMIT_C_CODE
// on macroblock edges
static void VFilter16_C(uint8_t* p, int stride,
int thresh, int ithresh, int hev_thresh) {
static void VFilter16_C(uint8_t* p, int stride, int thresh, int ithresh,
int hev_thresh) {
FilterLoop26_C(p, stride, 1, 16, thresh, ithresh, hev_thresh);
}
static void HFilter16_C(uint8_t* p, int stride,
int thresh, int ithresh, int hev_thresh) {
static void HFilter16_C(uint8_t* p, int stride, int thresh, int ithresh,
int hev_thresh) {
FilterLoop26_C(p, 1, stride, 16, thresh, ithresh, hev_thresh);
}
// on three inner edges
static void VFilter16i_C(uint8_t* p, int stride,
int thresh, int ithresh, int hev_thresh) {
static void VFilter16i_C(uint8_t* p, int stride, int thresh, int ithresh,
int hev_thresh) {
int k;
for (k = 3; k > 0; --k) {
p += 4 * stride;
@@ -661,8 +659,8 @@ static void VFilter16i_C(uint8_t* p, int stride,
#endif // !WEBP_NEON_OMIT_C_CODE
#if !WEBP_NEON_OMIT_C_CODE || WEBP_NEON_WORK_AROUND_GCC
static void HFilter16i_C(uint8_t* p, int stride,
int thresh, int ithresh, int hev_thresh) {
static void HFilter16i_C(uint8_t* p, int stride, int thresh, int ithresh,
int hev_thresh) {
int k;
for (k = 3; k > 0; --k) {
p += 4;

View File

@@ -12,323 +12,319 @@
// Author: Skal (pascal.massimino@gmail.com)
#include "src/dsp/cpu.h"
#include "src/webp/types.h"
#include "src/dsp/dsp.h"
#include "src/webp/types.h"
// define to 0 to have run-time table initialization
#if !defined(USE_STATIC_TABLES)
#define USE_STATIC_TABLES 1 // ALTERNATE_CODE
#define USE_STATIC_TABLES 1 // ALTERNATE_CODE
#endif
#if (USE_STATIC_TABLES == 1)
static const uint8_t abs0[255 + 255 + 1] = {
0xff, 0xfe, 0xfd, 0xfc, 0xfb, 0xfa, 0xf9, 0xf8, 0xf7, 0xf6, 0xf5, 0xf4,
0xf3, 0xf2, 0xf1, 0xf0, 0xef, 0xee, 0xed, 0xec, 0xeb, 0xea, 0xe9, 0xe8,
0xe7, 0xe6, 0xe5, 0xe4, 0xe3, 0xe2, 0xe1, 0xe0, 0xdf, 0xde, 0xdd, 0xdc,
0xdb, 0xda, 0xd9, 0xd8, 0xd7, 0xd6, 0xd5, 0xd4, 0xd3, 0xd2, 0xd1, 0xd0,
0xcf, 0xce, 0xcd, 0xcc, 0xcb, 0xca, 0xc9, 0xc8, 0xc7, 0xc6, 0xc5, 0xc4,
0xc3, 0xc2, 0xc1, 0xc0, 0xbf, 0xbe, 0xbd, 0xbc, 0xbb, 0xba, 0xb9, 0xb8,
0xb7, 0xb6, 0xb5, 0xb4, 0xb3, 0xb2, 0xb1, 0xb0, 0xaf, 0xae, 0xad, 0xac,
0xab, 0xaa, 0xa9, 0xa8, 0xa7, 0xa6, 0xa5, 0xa4, 0xa3, 0xa2, 0xa1, 0xa0,
0x9f, 0x9e, 0x9d, 0x9c, 0x9b, 0x9a, 0x99, 0x98, 0x97, 0x96, 0x95, 0x94,
0x93, 0x92, 0x91, 0x90, 0x8f, 0x8e, 0x8d, 0x8c, 0x8b, 0x8a, 0x89, 0x88,
0x87, 0x86, 0x85, 0x84, 0x83, 0x82, 0x81, 0x80, 0x7f, 0x7e, 0x7d, 0x7c,
0x7b, 0x7a, 0x79, 0x78, 0x77, 0x76, 0x75, 0x74, 0x73, 0x72, 0x71, 0x70,
0x6f, 0x6e, 0x6d, 0x6c, 0x6b, 0x6a, 0x69, 0x68, 0x67, 0x66, 0x65, 0x64,
0x63, 0x62, 0x61, 0x60, 0x5f, 0x5e, 0x5d, 0x5c, 0x5b, 0x5a, 0x59, 0x58,
0x57, 0x56, 0x55, 0x54, 0x53, 0x52, 0x51, 0x50, 0x4f, 0x4e, 0x4d, 0x4c,
0x4b, 0x4a, 0x49, 0x48, 0x47, 0x46, 0x45, 0x44, 0x43, 0x42, 0x41, 0x40,
0x3f, 0x3e, 0x3d, 0x3c, 0x3b, 0x3a, 0x39, 0x38, 0x37, 0x36, 0x35, 0x34,
0x33, 0x32, 0x31, 0x30, 0x2f, 0x2e, 0x2d, 0x2c, 0x2b, 0x2a, 0x29, 0x28,
0x27, 0x26, 0x25, 0x24, 0x23, 0x22, 0x21, 0x20, 0x1f, 0x1e, 0x1d, 0x1c,
0x1b, 0x1a, 0x19, 0x18, 0x17, 0x16, 0x15, 0x14, 0x13, 0x12, 0x11, 0x10,
0x0f, 0x0e, 0x0d, 0x0c, 0x0b, 0x0a, 0x09, 0x08, 0x07, 0x06, 0x05, 0x04,
0x03, 0x02, 0x01, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08,
0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14,
0x15, 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b, 0x2c,
0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38,
0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x40, 0x41, 0x42, 0x43, 0x44,
0x45, 0x46, 0x47, 0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f, 0x50,
0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5a, 0x5b, 0x5c,
0x5d, 0x5e, 0x5f, 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68,
0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74,
0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80,
0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c,
0x8d, 0x8e, 0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98,
0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4,
0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf, 0xb0,
0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba, 0xbb, 0xbc,
0xbd, 0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7, 0xc8,
0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0, 0xd1, 0xd2, 0xd3, 0xd4,
0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0xdd, 0xde, 0xdf, 0xe0,
0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec,
0xed, 0xee, 0xef, 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8,
0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff
};
0xff, 0xfe, 0xfd, 0xfc, 0xfb, 0xfa, 0xf9, 0xf8, 0xf7, 0xf6, 0xf5, 0xf4,
0xf3, 0xf2, 0xf1, 0xf0, 0xef, 0xee, 0xed, 0xec, 0xeb, 0xea, 0xe9, 0xe8,
0xe7, 0xe6, 0xe5, 0xe4, 0xe3, 0xe2, 0xe1, 0xe0, 0xdf, 0xde, 0xdd, 0xdc,
0xdb, 0xda, 0xd9, 0xd8, 0xd7, 0xd6, 0xd5, 0xd4, 0xd3, 0xd2, 0xd1, 0xd0,
0xcf, 0xce, 0xcd, 0xcc, 0xcb, 0xca, 0xc9, 0xc8, 0xc7, 0xc6, 0xc5, 0xc4,
0xc3, 0xc2, 0xc1, 0xc0, 0xbf, 0xbe, 0xbd, 0xbc, 0xbb, 0xba, 0xb9, 0xb8,
0xb7, 0xb6, 0xb5, 0xb4, 0xb3, 0xb2, 0xb1, 0xb0, 0xaf, 0xae, 0xad, 0xac,
0xab, 0xaa, 0xa9, 0xa8, 0xa7, 0xa6, 0xa5, 0xa4, 0xa3, 0xa2, 0xa1, 0xa0,
0x9f, 0x9e, 0x9d, 0x9c, 0x9b, 0x9a, 0x99, 0x98, 0x97, 0x96, 0x95, 0x94,
0x93, 0x92, 0x91, 0x90, 0x8f, 0x8e, 0x8d, 0x8c, 0x8b, 0x8a, 0x89, 0x88,
0x87, 0x86, 0x85, 0x84, 0x83, 0x82, 0x81, 0x80, 0x7f, 0x7e, 0x7d, 0x7c,
0x7b, 0x7a, 0x79, 0x78, 0x77, 0x76, 0x75, 0x74, 0x73, 0x72, 0x71, 0x70,
0x6f, 0x6e, 0x6d, 0x6c, 0x6b, 0x6a, 0x69, 0x68, 0x67, 0x66, 0x65, 0x64,
0x63, 0x62, 0x61, 0x60, 0x5f, 0x5e, 0x5d, 0x5c, 0x5b, 0x5a, 0x59, 0x58,
0x57, 0x56, 0x55, 0x54, 0x53, 0x52, 0x51, 0x50, 0x4f, 0x4e, 0x4d, 0x4c,
0x4b, 0x4a, 0x49, 0x48, 0x47, 0x46, 0x45, 0x44, 0x43, 0x42, 0x41, 0x40,
0x3f, 0x3e, 0x3d, 0x3c, 0x3b, 0x3a, 0x39, 0x38, 0x37, 0x36, 0x35, 0x34,
0x33, 0x32, 0x31, 0x30, 0x2f, 0x2e, 0x2d, 0x2c, 0x2b, 0x2a, 0x29, 0x28,
0x27, 0x26, 0x25, 0x24, 0x23, 0x22, 0x21, 0x20, 0x1f, 0x1e, 0x1d, 0x1c,
0x1b, 0x1a, 0x19, 0x18, 0x17, 0x16, 0x15, 0x14, 0x13, 0x12, 0x11, 0x10,
0x0f, 0x0e, 0x0d, 0x0c, 0x0b, 0x0a, 0x09, 0x08, 0x07, 0x06, 0x05, 0x04,
0x03, 0x02, 0x01, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08,
0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14,
0x15, 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b, 0x2c,
0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38,
0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x40, 0x41, 0x42, 0x43, 0x44,
0x45, 0x46, 0x47, 0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f, 0x50,
0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5a, 0x5b, 0x5c,
0x5d, 0x5e, 0x5f, 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68,
0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74,
0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80,
0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c,
0x8d, 0x8e, 0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98,
0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4,
0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf, 0xb0,
0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba, 0xbb, 0xbc,
0xbd, 0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7, 0xc8,
0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0, 0xd1, 0xd2, 0xd3, 0xd4,
0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0xdd, 0xde, 0xdf, 0xe0,
0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec,
0xed, 0xee, 0xef, 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8,
0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff};
static const uint8_t sclip1[1020 + 1020 + 1] = {
0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
0x80, 0x80, 0x80, 0x80, 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, 0x90, 0x91, 0x92, 0x93,
0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f,
0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab,
0xac, 0xad, 0xae, 0xaf, 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7,
0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3,
0xc4, 0xc5, 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf,
0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb,
0xdc, 0xdd, 0xde, 0xdf, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7,
0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0, 0xf1, 0xf2, 0xf3,
0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff,
0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b,
0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20, 0x21, 0x22, 0x23,
0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f,
0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b,
0x3c, 0x3d, 0x3e, 0x3f, 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f, 0x50, 0x51, 0x52, 0x53,
0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f,
0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b,
0x6c, 0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f
};
0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
0x80, 0x80, 0x80, 0x80, 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, 0x90, 0x91, 0x92, 0x93,
0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f,
0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab,
0xac, 0xad, 0xae, 0xaf, 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7,
0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3,
0xc4, 0xc5, 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf,
0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb,
0xdc, 0xdd, 0xde, 0xdf, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7,
0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0, 0xf1, 0xf2, 0xf3,
0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff,
0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b,
0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20, 0x21, 0x22, 0x23,
0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f,
0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b,
0x3c, 0x3d, 0x3e, 0x3f, 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f, 0x50, 0x51, 0x52, 0x53,
0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f,
0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b,
0x6c, 0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f};
static const uint8_t sclip2[112 + 112 + 1] = {
0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0,
0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0,
0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0,
0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0,
0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0,
0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0,
0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0,
0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0,
0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb,
0xfc, 0xfd, 0xfe, 0xff, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f,
0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f,
0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f,
0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f,
0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f,
0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f,
0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f,
0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f,
0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f
};
0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0,
0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0,
0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0,
0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0,
0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0,
0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0,
0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0,
0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0,
0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb,
0xfc, 0xfd, 0xfe, 0xff, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f,
0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f,
0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f,
0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f,
0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f,
0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f,
0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f,
0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f,
0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f};
static const uint8_t clip1[255 + 511 + 1] = {
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08,
0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14,
0x15, 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b, 0x2c,
0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38,
0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x40, 0x41, 0x42, 0x43, 0x44,
0x45, 0x46, 0x47, 0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f, 0x50,
0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5a, 0x5b, 0x5c,
0x5d, 0x5e, 0x5f, 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68,
0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74,
0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80,
0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c,
0x8d, 0x8e, 0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98,
0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4,
0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf, 0xb0,
0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba, 0xbb, 0xbc,
0xbd, 0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7, 0xc8,
0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0, 0xd1, 0xd2, 0xd3, 0xd4,
0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0xdd, 0xde, 0xdf, 0xe0,
0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec,
0xed, 0xee, 0xef, 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8,
0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
};
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08,
0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14,
0x15, 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b, 0x2c,
0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38,
0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x40, 0x41, 0x42, 0x43, 0x44,
0x45, 0x46, 0x47, 0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f, 0x50,
0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5a, 0x5b, 0x5c,
0x5d, 0x5e, 0x5f, 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68,
0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74,
0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80,
0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c,
0x8d, 0x8e, 0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98,
0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4,
0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf, 0xb0,
0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba, 0xbb, 0xbc,
0xbd, 0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7, 0xc8,
0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0, 0xd1, 0xd2, 0xd3, 0xd4,
0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0xdd, 0xde, 0xdf, 0xe0,
0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec,
0xed, 0xee, 0xef, 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8,
0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
#else
@@ -342,7 +338,7 @@ static uint8_t clip1[255 + 511 + 1];
// and make sure it's set to true _last_ (so as to be thread-safe)
static volatile int tables_ok = 0;
#endif // USE_STATIC_TABLES
#endif // USE_STATIC_TABLES
const int8_t* const VP8ksclip1 = (const int8_t*)&sclip1[1020];
const int8_t* const VP8ksclip2 = (const int8_t*)&sclip2[112];
@@ -367,5 +363,5 @@ WEBP_TSAN_IGNORE_FUNCTION void VP8InitClipTables(void) {
}
tables_ok = 1;
}
#endif // USE_STATIC_TABLES
#endif // USE_STATIC_TABLES
}

View File

@@ -33,7 +33,7 @@ static WEBP_INLINE void do_filter2(uint8_t* p, int step) {
const int a1 = VP8ksclip2[(a + 4) >> 3];
const int a2 = VP8ksclip2[(a + 3) >> 3];
p[-step] = VP8kclip1[p0 + a2];
p[ 0] = VP8kclip1[q0 - a1];
p[0] = VP8kclip1[q0 - a1];
}
// 4 pixels in, 4 pixels out
@@ -44,9 +44,9 @@ static WEBP_INLINE void do_filter4(uint8_t* p, int step) {
const int a2 = VP8ksclip2[(a + 3) >> 3];
const int a3 = (a1 + 1) >> 1;
p[-2 * step] = VP8kclip1[p1 + a3];
p[- step] = VP8kclip1[p0 + a2];
p[ 0] = VP8kclip1[q0 - a1];
p[ step] = VP8kclip1[q1 - a3];
p[-step] = VP8kclip1[p0 + a2];
p[0] = VP8kclip1[q0 - a1];
p[step] = VP8kclip1[q1 - a3];
}
// 6 pixels in, 6 pixels out
@@ -57,13 +57,13 @@ static WEBP_INLINE void do_filter6(uint8_t* p, int step) {
// a is in [-128,127], a1 in [-27,27], a2 in [-18,18] and a3 in [-9,9]
const int a1 = (27 * a + 63) >> 7; // eq. to ((3 * a + 7) * 9) >> 7
const int a2 = (18 * a + 63) >> 7; // eq. to ((2 * a + 7) * 9) >> 7
const int a3 = (9 * a + 63) >> 7; // eq. to ((1 * a + 7) * 9) >> 7
const int a3 = (9 * a + 63) >> 7; // eq. to ((1 * a + 7) * 9) >> 7
p[-3 * step] = VP8kclip1[p2 + a3];
p[-2 * step] = VP8kclip1[p1 + a2];
p[- step] = VP8kclip1[p0 + a1];
p[ 0] = VP8kclip1[q0 - a1];
p[ step] = VP8kclip1[q1 - a2];
p[ 2 * step] = VP8kclip1[q2 - a3];
p[-step] = VP8kclip1[p0 + a1];
p[0] = VP8kclip1[q0 - a1];
p[step] = VP8kclip1[q1 - a2];
p[2 * step] = VP8kclip1[q2 - a3];
}
static WEBP_INLINE int hev(const uint8_t* p, int step, int thresh) {
@@ -76,8 +76,8 @@ static WEBP_INLINE int needs_filter(const uint8_t* p, int step, int t) {
return ((4 * abs_mips32(p0 - q0) + abs_mips32(p1 - q1)) <= t);
}
static WEBP_INLINE int needs_filter2(const uint8_t* p,
int step, int t, int it) {
static WEBP_INLINE int needs_filter2(const uint8_t* p, int step, int t,
int it) {
const int p3 = p[-4 * step], p2 = p[-3 * step];
const int p1 = p[-2 * step], p0 = p[-step];
const int q0 = p[0], q1 = p[step], q2 = p[2 * step], q3 = p[3 * step];
@@ -89,9 +89,9 @@ static WEBP_INLINE int needs_filter2(const uint8_t* p,
abs_mips32(q2 - q1) <= it && abs_mips32(q1 - q0) <= it;
}
static WEBP_INLINE void FilterLoop26(uint8_t* p,
int hstride, int vstride, int size,
int thresh, int ithresh, int hev_thresh) {
static WEBP_INLINE void FilterLoop26(uint8_t* p, int hstride, int vstride,
int size, int thresh, int ithresh,
int hev_thresh) {
const int thresh2 = 2 * thresh + 1;
while (size-- > 0) {
if (needs_filter2(p, hstride, thresh2, ithresh)) {
@@ -105,9 +105,9 @@ static WEBP_INLINE void FilterLoop26(uint8_t* p,
}
}
static WEBP_INLINE void FilterLoop24(uint8_t* p,
int hstride, int vstride, int size,
int thresh, int ithresh, int hev_thresh) {
static WEBP_INLINE void FilterLoop24(uint8_t* p, int hstride, int vstride,
int size, int thresh, int ithresh,
int hev_thresh) {
const int thresh2 = 2 * thresh + 1;
while (size-- > 0) {
if (needs_filter2(p, hstride, thresh2, ithresh)) {
@@ -122,13 +122,13 @@ static WEBP_INLINE void FilterLoop24(uint8_t* p,
}
// on macroblock edges
static void VFilter16(uint8_t* p, int stride,
int thresh, int ithresh, int hev_thresh) {
static void VFilter16(uint8_t* p, int stride, int thresh, int ithresh,
int hev_thresh) {
FilterLoop26(p, stride, 1, 16, thresh, ithresh, hev_thresh);
}
static void HFilter16(uint8_t* p, int stride,
int thresh, int ithresh, int hev_thresh) {
static void HFilter16(uint8_t* p, int stride, int thresh, int ithresh,
int hev_thresh) {
FilterLoop26(p, 1, stride, 16, thresh, ithresh, hev_thresh);
}
@@ -158,8 +158,8 @@ static void HFilter8i(uint8_t* WEBP_RESTRICT u, uint8_t* WEBP_RESTRICT v,
}
// on three inner edges
static void VFilter16i(uint8_t* p, int stride,
int thresh, int ithresh, int hev_thresh) {
static void VFilter16i(uint8_t* p, int stride, int thresh, int ithresh,
int hev_thresh) {
int k;
for (k = 3; k > 0; --k) {
p += 4 * stride;
@@ -167,8 +167,8 @@ static void VFilter16i(uint8_t* p, int stride,
}
}
static void HFilter16i(uint8_t* p, int stride,
int thresh, int ithresh, int hev_thresh) {
static void HFilter16i(uint8_t* p, int stride, int thresh, int ithresh,
int hev_thresh) {
int k;
for (k = 3; k > 0; --k) {
p += 4;

File diff suppressed because it is too large Load Diff

View File

@@ -11,7 +11,6 @@
//
// Author(s): Prashant Patil (prashant.patil@imgtec.com)
#include "src/dsp/dsp.h"
#if defined(WEBP_USE_MSA)
@@ -21,29 +20,30 @@
//------------------------------------------------------------------------------
// Transforms
#define IDCT_1D_W(in0, in1, in2, in3, out0, out1, out2, out3) { \
v4i32 a1_m, b1_m, c1_m, d1_m; \
v4i32 c_tmp1_m, c_tmp2_m, d_tmp1_m, d_tmp2_m; \
const v4i32 cospi8sqrt2minus1 = __msa_fill_w(20091); \
const v4i32 sinpi8sqrt2 = __msa_fill_w(35468); \
#define IDCT_1D_W(in0, in1, in2, in3, out0, out1, out2, out3) \
{ \
v4i32 a1_m, b1_m, c1_m, d1_m; \
v4i32 c_tmp1_m, c_tmp2_m, d_tmp1_m, d_tmp2_m; \
const v4i32 cospi8sqrt2minus1 = __msa_fill_w(20091); \
const v4i32 sinpi8sqrt2 = __msa_fill_w(35468); \
\
a1_m = in0 + in2; \
b1_m = in0 - in2; \
c_tmp1_m = (in1 * sinpi8sqrt2) >> 16; \
c_tmp2_m = in3 + ((in3 * cospi8sqrt2minus1) >> 16); \
c1_m = c_tmp1_m - c_tmp2_m; \
d_tmp1_m = in1 + ((in1 * cospi8sqrt2minus1) >> 16); \
d_tmp2_m = (in3 * sinpi8sqrt2) >> 16; \
d1_m = d_tmp1_m + d_tmp2_m; \
BUTTERFLY_4(a1_m, b1_m, c1_m, d1_m, out0, out1, out2, out3); \
}
a1_m = in0 + in2; \
b1_m = in0 - in2; \
c_tmp1_m = (in1 * sinpi8sqrt2) >> 16; \
c_tmp2_m = in3 + ((in3 * cospi8sqrt2minus1) >> 16); \
c1_m = c_tmp1_m - c_tmp2_m; \
d_tmp1_m = in1 + ((in1 * cospi8sqrt2minus1) >> 16); \
d_tmp2_m = (in3 * sinpi8sqrt2) >> 16; \
d1_m = d_tmp1_m + d_tmp2_m; \
BUTTERFLY_4(a1_m, b1_m, c1_m, d1_m, out0, out1, out2, out3); \
}
static void TransformOne(const int16_t* WEBP_RESTRICT in,
uint8_t* WEBP_RESTRICT dst) {
v8i16 input0, input1;
v4i32 in0, in1, in2, in3, hz0, hz1, hz2, hz3, vt0, vt1, vt2, vt3;
v4i32 res0, res1, res2, res3;
const v16i8 zero = { 0 };
const v16i8 zero = {0};
v16i8 dest0, dest1, dest2, dest3;
LD_SH2(in, 8, input0, input1);
@@ -55,10 +55,10 @@ static void TransformOne(const int16_t* WEBP_RESTRICT in,
SRARI_W4_SW(vt0, vt1, vt2, vt3, 3);
TRANSPOSE4x4_SW_SW(vt0, vt1, vt2, vt3, vt0, vt1, vt2, vt3);
LD_SB4(dst, BPS, dest0, dest1, dest2, dest3);
ILVR_B4_SW(zero, dest0, zero, dest1, zero, dest2, zero, dest3,
res0, res1, res2, res3);
ILVR_H4_SW(zero, res0, zero, res1, zero, res2, zero, res3,
res0, res1, res2, res3);
ILVR_B4_SW(zero, dest0, zero, dest1, zero, dest2, zero, dest3, res0, res1,
res2, res3);
ILVR_H4_SW(zero, res0, zero, res1, zero, res2, zero, res3, res0, res1, res2,
res3);
ADD4(res0, vt0, res1, vt1, res2, vt2, res3, vt3, res0, res1, res2, res3);
CLIP_SW4_0_255(res0, res1, res2, res3);
PCKEV_B2_SW(res0, res1, res2, res3, vt0, vt1);
@@ -77,10 +77,10 @@ static void TransformTwo(const int16_t* WEBP_RESTRICT in,
static void TransformWHT(const int16_t* WEBP_RESTRICT in,
int16_t* WEBP_RESTRICT out) {
v8i16 input0, input1;
const v8i16 mask0 = { 0, 1, 2, 3, 8, 9, 10, 11 };
const v8i16 mask1 = { 4, 5, 6, 7, 12, 13, 14, 15 };
const v8i16 mask2 = { 0, 4, 8, 12, 1, 5, 9, 13 };
const v8i16 mask3 = { 3, 7, 11, 15, 2, 6, 10, 14 };
const v8i16 mask0 = {0, 1, 2, 3, 8, 9, 10, 11};
const v8i16 mask1 = {4, 5, 6, 7, 12, 13, 14, 15};
const v8i16 mask2 = {0, 4, 8, 12, 1, 5, 9, 13};
const v8i16 mask3 = {3, 7, 11, 15, 2, 6, 10, 14};
v8i16 tmp0, tmp1, tmp2, tmp3;
v8i16 out0, out1;
@@ -131,24 +131,23 @@ static void TransformAC3(const int16_t* WEBP_RESTRICT in,
const int d4 = WEBP_TRANSFORM_AC3_MUL1(in[4]);
const int in2 = WEBP_TRANSFORM_AC3_MUL2(in[1]);
const int in3 = WEBP_TRANSFORM_AC3_MUL1(in[1]);
v4i32 tmp0 = { 0 };
v4i32 tmp0 = {0};
v4i32 out0 = __msa_fill_w(a + d4);
v4i32 out1 = __msa_fill_w(a + c4);
v4i32 out2 = __msa_fill_w(a - c4);
v4i32 out3 = __msa_fill_w(a - d4);
v4i32 res0, res1, res2, res3;
const v4i32 zero = { 0 };
const v4i32 zero = {0};
v16u8 dest0, dest1, dest2, dest3;
INSERT_W4_SW(in3, in2, -in2, -in3, tmp0);
ADD4(out0, tmp0, out1, tmp0, out2, tmp0, out3, tmp0,
out0, out1, out2, out3);
ADD4(out0, tmp0, out1, tmp0, out2, tmp0, out3, tmp0, out0, out1, out2, out3);
SRAI_W4_SW(out0, out1, out2, out3, 3);
LD_UB4(dst, BPS, dest0, dest1, dest2, dest3);
ILVR_B4_SW(zero, dest0, zero, dest1, zero, dest2, zero, dest3,
res0, res1, res2, res3);
ILVR_H4_SW(zero, res0, zero, res1, zero, res2, zero, res3,
res0, res1, res2, res3);
ILVR_B4_SW(zero, dest0, zero, dest1, zero, dest2, zero, dest3, res0, res1,
res2, res3);
ILVR_H4_SW(zero, res0, zero, res1, zero, res2, zero, res3, res0, res1, res2,
res3);
ADD4(res0, out0, res1, out1, res2, out2, res3, out3, res0, res1, res2, res3);
CLIP_SW4_0_255(res0, res1, res2, res3);
PCKEV_B2_SW(res0, res1, res2, res3, out0, out1);
@@ -159,189 +158,198 @@ static void TransformAC3(const int16_t* WEBP_RESTRICT in,
//------------------------------------------------------------------------------
// Edge filtering functions
#define FLIP_SIGN2(in0, in1, out0, out1) { \
out0 = (v16i8)__msa_xori_b(in0, 0x80); \
out1 = (v16i8)__msa_xori_b(in1, 0x80); \
}
#define FLIP_SIGN2(in0, in1, out0, out1) \
{ \
out0 = (v16i8)__msa_xori_b(in0, 0x80); \
out1 = (v16i8)__msa_xori_b(in1, 0x80); \
}
#define FLIP_SIGN4(in0, in1, in2, in3, out0, out1, out2, out3) { \
FLIP_SIGN2(in0, in1, out0, out1); \
FLIP_SIGN2(in2, in3, out2, out3); \
}
#define FLIP_SIGN4(in0, in1, in2, in3, out0, out1, out2, out3) \
{ \
FLIP_SIGN2(in0, in1, out0, out1); \
FLIP_SIGN2(in2, in3, out2, out3); \
}
#define FILT_VAL(q0_m, p0_m, mask, filt) do { \
v16i8 q0_sub_p0; \
q0_sub_p0 = __msa_subs_s_b(q0_m, p0_m); \
filt = __msa_adds_s_b(filt, q0_sub_p0); \
filt = __msa_adds_s_b(filt, q0_sub_p0); \
filt = __msa_adds_s_b(filt, q0_sub_p0); \
filt = filt & mask; \
} while (0)
#define FILT_VAL(q0_m, p0_m, mask, filt) \
do { \
v16i8 q0_sub_p0; \
q0_sub_p0 = __msa_subs_s_b(q0_m, p0_m); \
filt = __msa_adds_s_b(filt, q0_sub_p0); \
filt = __msa_adds_s_b(filt, q0_sub_p0); \
filt = __msa_adds_s_b(filt, q0_sub_p0); \
filt = filt & mask; \
} while (0)
#define FILT2(q_m, p_m, q, p) do { \
u_r = SRAI_H(temp1, 7); \
u_r = __msa_sat_s_h(u_r, 7); \
u_l = SRAI_H(temp3, 7); \
u_l = __msa_sat_s_h(u_l, 7); \
u = __msa_pckev_b((v16i8)u_l, (v16i8)u_r); \
q_m = __msa_subs_s_b(q_m, u); \
p_m = __msa_adds_s_b(p_m, u); \
q = __msa_xori_b((v16u8)q_m, 0x80); \
p = __msa_xori_b((v16u8)p_m, 0x80); \
} while (0)
#define FILT2(q_m, p_m, q, p) \
do { \
u_r = SRAI_H(temp1, 7); \
u_r = __msa_sat_s_h(u_r, 7); \
u_l = SRAI_H(temp3, 7); \
u_l = __msa_sat_s_h(u_l, 7); \
u = __msa_pckev_b((v16i8)u_l, (v16i8)u_r); \
q_m = __msa_subs_s_b(q_m, u); \
p_m = __msa_adds_s_b(p_m, u); \
q = __msa_xori_b((v16u8)q_m, 0x80); \
p = __msa_xori_b((v16u8)p_m, 0x80); \
} while (0)
#define LPF_FILTER4_4W(p1, p0, q0, q1, mask, hev) do { \
v16i8 p1_m, p0_m, q0_m, q1_m; \
v16i8 filt, t1, t2; \
const v16i8 cnst4b = __msa_ldi_b(4); \
const v16i8 cnst3b = __msa_ldi_b(3); \
#define LPF_FILTER4_4W(p1, p0, q0, q1, mask, hev) \
do { \
v16i8 p1_m, p0_m, q0_m, q1_m; \
v16i8 filt, t1, t2; \
const v16i8 cnst4b = __msa_ldi_b(4); \
const v16i8 cnst3b = __msa_ldi_b(3); \
\
FLIP_SIGN4(p1, p0, q0, q1, p1_m, p0_m, q0_m, q1_m); \
filt = __msa_subs_s_b(p1_m, q1_m); \
filt = filt & hev; \
FILT_VAL(q0_m, p0_m, mask, filt); \
t1 = __msa_adds_s_b(filt, cnst4b); \
t1 = SRAI_B(t1, 3); \
t2 = __msa_adds_s_b(filt, cnst3b); \
t2 = SRAI_B(t2, 3); \
q0_m = __msa_subs_s_b(q0_m, t1); \
q0 = __msa_xori_b((v16u8)q0_m, 0x80); \
p0_m = __msa_adds_s_b(p0_m, t2); \
p0 = __msa_xori_b((v16u8)p0_m, 0x80); \
filt = __msa_srari_b(t1, 1); \
hev = __msa_xori_b(hev, 0xff); \
filt = filt & hev; \
q1_m = __msa_subs_s_b(q1_m, filt); \
q1 = __msa_xori_b((v16u8)q1_m, 0x80); \
p1_m = __msa_adds_s_b(p1_m, filt); \
p1 = __msa_xori_b((v16u8)p1_m, 0x80); \
} while (0)
FLIP_SIGN4(p1, p0, q0, q1, p1_m, p0_m, q0_m, q1_m); \
filt = __msa_subs_s_b(p1_m, q1_m); \
filt = filt & hev; \
FILT_VAL(q0_m, p0_m, mask, filt); \
t1 = __msa_adds_s_b(filt, cnst4b); \
t1 = SRAI_B(t1, 3); \
t2 = __msa_adds_s_b(filt, cnst3b); \
t2 = SRAI_B(t2, 3); \
q0_m = __msa_subs_s_b(q0_m, t1); \
q0 = __msa_xori_b((v16u8)q0_m, 0x80); \
p0_m = __msa_adds_s_b(p0_m, t2); \
p0 = __msa_xori_b((v16u8)p0_m, 0x80); \
filt = __msa_srari_b(t1, 1); \
hev = __msa_xori_b(hev, 0xff); \
filt = filt & hev; \
q1_m = __msa_subs_s_b(q1_m, filt); \
q1 = __msa_xori_b((v16u8)q1_m, 0x80); \
p1_m = __msa_adds_s_b(p1_m, filt); \
p1 = __msa_xori_b((v16u8)p1_m, 0x80); \
} while (0)
#define LPF_MBFILTER(p2, p1, p0, q0, q1, q2, mask, hev) do { \
v16i8 p2_m, p1_m, p0_m, q2_m, q1_m, q0_m; \
v16i8 u, filt, t1, t2, filt_sign; \
v8i16 filt_r, filt_l, u_r, u_l; \
v8i16 temp0, temp1, temp2, temp3; \
const v16i8 cnst4b = __msa_ldi_b(4); \
const v16i8 cnst3b = __msa_ldi_b(3); \
const v8i16 cnst9h = __msa_ldi_h(9); \
const v8i16 cnst63h = __msa_ldi_h(63); \
#define LPF_MBFILTER(p2, p1, p0, q0, q1, q2, mask, hev) \
do { \
v16i8 p2_m, p1_m, p0_m, q2_m, q1_m, q0_m; \
v16i8 u, filt, t1, t2, filt_sign; \
v8i16 filt_r, filt_l, u_r, u_l; \
v8i16 temp0, temp1, temp2, temp3; \
const v16i8 cnst4b = __msa_ldi_b(4); \
const v16i8 cnst3b = __msa_ldi_b(3); \
const v8i16 cnst9h = __msa_ldi_h(9); \
const v8i16 cnst63h = __msa_ldi_h(63); \
\
FLIP_SIGN4(p1, p0, q0, q1, p1_m, p0_m, q0_m, q1_m); \
filt = __msa_subs_s_b(p1_m, q1_m); \
FILT_VAL(q0_m, p0_m, mask, filt); \
FLIP_SIGN2(p2, q2, p2_m, q2_m); \
t2 = filt & hev; \
/* filt_val &= ~hev */ \
hev = __msa_xori_b(hev, 0xff); \
filt = filt & hev; \
t1 = __msa_adds_s_b(t2, cnst4b); \
t1 = SRAI_B(t1, 3); \
t2 = __msa_adds_s_b(t2, cnst3b); \
t2 = SRAI_B(t2, 3); \
q0_m = __msa_subs_s_b(q0_m, t1); \
p0_m = __msa_adds_s_b(p0_m, t2); \
filt_sign = __msa_clti_s_b(filt, 0); \
ILVRL_B2_SH(filt_sign, filt, filt_r, filt_l); \
/* update q2/p2 */ \
temp0 = filt_r * cnst9h; \
temp1 = temp0 + cnst63h; \
temp2 = filt_l * cnst9h; \
temp3 = temp2 + cnst63h; \
FILT2(q2_m, p2_m, q2, p2); \
/* update q1/p1 */ \
temp1 = temp1 + temp0; \
temp3 = temp3 + temp2; \
FILT2(q1_m, p1_m, q1, p1); \
/* update q0/p0 */ \
temp1 = temp1 + temp0; \
temp3 = temp3 + temp2; \
FILT2(q0_m, p0_m, q0, p0); \
} while (0)
#define LPF_MASK_HEV(p3_in, p2_in, p1_in, p0_in, q0_in, q1_in, q2_in, q3_in, \
limit_in, b_limit_in, thresh_in, hev_out, mask_out) \
do { \
v16u8 p3_asub_p2_m, p2_asub_p1_m, p1_asub_p0_m, q1_asub_q0_m; \
v16u8 p1_asub_q1_m, p0_asub_q0_m, q3_asub_q2_m, q2_asub_q1_m; \
v16u8 flat_out; \
\
/* absolute subtraction of pixel values */ \
p3_asub_p2_m = __msa_asub_u_b(p3_in, p2_in); \
p2_asub_p1_m = __msa_asub_u_b(p2_in, p1_in); \
p1_asub_p0_m = __msa_asub_u_b(p1_in, p0_in); \
q1_asub_q0_m = __msa_asub_u_b(q1_in, q0_in); \
q2_asub_q1_m = __msa_asub_u_b(q2_in, q1_in); \
q3_asub_q2_m = __msa_asub_u_b(q3_in, q2_in); \
p0_asub_q0_m = __msa_asub_u_b(p0_in, q0_in); \
p1_asub_q1_m = __msa_asub_u_b(p1_in, q1_in); \
/* calculation of hev */ \
flat_out = __msa_max_u_b(p1_asub_p0_m, q1_asub_q0_m); \
hev_out = (thresh_in < flat_out); \
/* calculation of mask */ \
p0_asub_q0_m = __msa_adds_u_b(p0_asub_q0_m, p0_asub_q0_m); \
p1_asub_q1_m = SRAI_B(p1_asub_q1_m, 1); \
p0_asub_q0_m = __msa_adds_u_b(p0_asub_q0_m, p1_asub_q1_m); \
mask_out = (b_limit_in < p0_asub_q0_m); \
mask_out = __msa_max_u_b(flat_out, mask_out); \
p3_asub_p2_m = __msa_max_u_b(p3_asub_p2_m, p2_asub_p1_m); \
mask_out = __msa_max_u_b(p3_asub_p2_m, mask_out); \
q2_asub_q1_m = __msa_max_u_b(q2_asub_q1_m, q3_asub_q2_m); \
mask_out = __msa_max_u_b(q2_asub_q1_m, mask_out); \
mask_out = (limit_in < mask_out); \
mask_out = __msa_xori_b(mask_out, 0xff); \
} while (0)
#define ST6x1_UB(in0, in0_idx, in1, in1_idx, pdst, stride) \
do { \
const uint16_t tmp0_h = __msa_copy_s_h((v8i16)in1, in1_idx); \
const uint32_t tmp0_w = __msa_copy_s_w((v4i32)in0, in0_idx); \
SW(tmp0_w, pdst); \
SH(tmp0_h, pdst + stride); \
} while (0)
#define ST6x4_UB(in0, start_in0_idx, in1, start_in1_idx, pdst, stride) \
do { \
uint8_t* ptmp1 = (uint8_t*)pdst; \
ST6x1_UB(in0, start_in0_idx, in1, start_in1_idx, ptmp1, 4); \
ptmp1 += stride; \
ST6x1_UB(in0, start_in0_idx + 1, in1, start_in1_idx + 1, ptmp1, 4); \
ptmp1 += stride; \
ST6x1_UB(in0, start_in0_idx + 2, in1, start_in1_idx + 2, ptmp1, 4); \
ptmp1 += stride; \
ST6x1_UB(in0, start_in0_idx + 3, in1, start_in1_idx + 3, ptmp1, 4); \
} while (0)
#define LPF_SIMPLE_FILT(p1_in, p0_in, q0_in, q1_in, mask) \
do { \
v16i8 p1_m, p0_m, q0_m, q1_m, filt, filt1, filt2; \
const v16i8 cnst4b = __msa_ldi_b(4); \
const v16i8 cnst3b = __msa_ldi_b(3); \
\
FLIP_SIGN4(p1_in, p0_in, q0_in, q1_in, p1_m, p0_m, q0_m, q1_m); \
filt = __msa_subs_s_b(p1_m, q1_m); \
FILT_VAL(q0_m, p0_m, mask, filt); \
filt1 = __msa_adds_s_b(filt, cnst4b); \
filt1 = SRAI_B(filt1, 3); \
filt2 = __msa_adds_s_b(filt, cnst3b); \
filt2 = SRAI_B(filt2, 3); \
q0_m = __msa_subs_s_b(q0_m, filt1); \
p0_m = __msa_adds_s_b(p0_m, filt2); \
q0_in = __msa_xori_b((v16u8)q0_m, 0x80); \
p0_in = __msa_xori_b((v16u8)p0_m, 0x80); \
} while (0)
#define LPF_SIMPLE_MASK(p1, p0, q0, q1, b_limit, mask) \
do { \
v16u8 p1_a_sub_q1, p0_a_sub_q0; \
\
FLIP_SIGN4(p1, p0, q0, q1, p1_m, p0_m, q0_m, q1_m); \
filt = __msa_subs_s_b(p1_m, q1_m); \
FILT_VAL(q0_m, p0_m, mask, filt); \
FLIP_SIGN2(p2, q2, p2_m, q2_m); \
t2 = filt & hev; \
/* filt_val &= ~hev */ \
hev = __msa_xori_b(hev, 0xff); \
filt = filt & hev; \
t1 = __msa_adds_s_b(t2, cnst4b); \
t1 = SRAI_B(t1, 3); \
t2 = __msa_adds_s_b(t2, cnst3b); \
t2 = SRAI_B(t2, 3); \
q0_m = __msa_subs_s_b(q0_m, t1); \
p0_m = __msa_adds_s_b(p0_m, t2); \
filt_sign = __msa_clti_s_b(filt, 0); \
ILVRL_B2_SH(filt_sign, filt, filt_r, filt_l); \
/* update q2/p2 */ \
temp0 = filt_r * cnst9h; \
temp1 = temp0 + cnst63h; \
temp2 = filt_l * cnst9h; \
temp3 = temp2 + cnst63h; \
FILT2(q2_m, p2_m, q2, p2); \
/* update q1/p1 */ \
temp1 = temp1 + temp0; \
temp3 = temp3 + temp2; \
FILT2(q1_m, p1_m, q1, p1); \
/* update q0/p0 */ \
temp1 = temp1 + temp0; \
temp3 = temp3 + temp2; \
FILT2(q0_m, p0_m, q0, p0); \
} while (0)
p0_a_sub_q0 = __msa_asub_u_b(p0, q0); \
p1_a_sub_q1 = __msa_asub_u_b(p1, q1); \
p1_a_sub_q1 = (v16u8)__msa_srli_b((v16i8)p1_a_sub_q1, 1); \
p0_a_sub_q0 = __msa_adds_u_b(p0_a_sub_q0, p0_a_sub_q0); \
mask = __msa_adds_u_b(p0_a_sub_q0, p1_a_sub_q1); \
mask = (mask <= b_limit); \
} while (0)
#define LPF_MASK_HEV(p3_in, p2_in, p1_in, p0_in, \
q0_in, q1_in, q2_in, q3_in, \
limit_in, b_limit_in, thresh_in, \
hev_out, mask_out) do { \
v16u8 p3_asub_p2_m, p2_asub_p1_m, p1_asub_p0_m, q1_asub_q0_m; \
v16u8 p1_asub_q1_m, p0_asub_q0_m, q3_asub_q2_m, q2_asub_q1_m; \
v16u8 flat_out; \
\
/* absolute subtraction of pixel values */ \
p3_asub_p2_m = __msa_asub_u_b(p3_in, p2_in); \
p2_asub_p1_m = __msa_asub_u_b(p2_in, p1_in); \
p1_asub_p0_m = __msa_asub_u_b(p1_in, p0_in); \
q1_asub_q0_m = __msa_asub_u_b(q1_in, q0_in); \
q2_asub_q1_m = __msa_asub_u_b(q2_in, q1_in); \
q3_asub_q2_m = __msa_asub_u_b(q3_in, q2_in); \
p0_asub_q0_m = __msa_asub_u_b(p0_in, q0_in); \
p1_asub_q1_m = __msa_asub_u_b(p1_in, q1_in); \
/* calculation of hev */ \
flat_out = __msa_max_u_b(p1_asub_p0_m, q1_asub_q0_m); \
hev_out = (thresh_in < flat_out); \
/* calculation of mask */ \
p0_asub_q0_m = __msa_adds_u_b(p0_asub_q0_m, p0_asub_q0_m); \
p1_asub_q1_m = SRAI_B(p1_asub_q1_m, 1); \
p0_asub_q0_m = __msa_adds_u_b(p0_asub_q0_m, p1_asub_q1_m); \
mask_out = (b_limit_in < p0_asub_q0_m); \
mask_out = __msa_max_u_b(flat_out, mask_out); \
p3_asub_p2_m = __msa_max_u_b(p3_asub_p2_m, p2_asub_p1_m); \
mask_out = __msa_max_u_b(p3_asub_p2_m, mask_out); \
q2_asub_q1_m = __msa_max_u_b(q2_asub_q1_m, q3_asub_q2_m); \
mask_out = __msa_max_u_b(q2_asub_q1_m, mask_out); \
mask_out = (limit_in < mask_out); \
mask_out = __msa_xori_b(mask_out, 0xff); \
} while (0)
#define ST6x1_UB(in0, in0_idx, in1, in1_idx, pdst, stride) do { \
const uint16_t tmp0_h = __msa_copy_s_h((v8i16)in1, in1_idx); \
const uint32_t tmp0_w = __msa_copy_s_w((v4i32)in0, in0_idx); \
SW(tmp0_w, pdst); \
SH(tmp0_h, pdst + stride); \
} while (0)
#define ST6x4_UB(in0, start_in0_idx, in1, start_in1_idx, pdst, stride) do { \
uint8_t* ptmp1 = (uint8_t*)pdst; \
ST6x1_UB(in0, start_in0_idx, in1, start_in1_idx, ptmp1, 4); \
ptmp1 += stride; \
ST6x1_UB(in0, start_in0_idx + 1, in1, start_in1_idx + 1, ptmp1, 4); \
ptmp1 += stride; \
ST6x1_UB(in0, start_in0_idx + 2, in1, start_in1_idx + 2, ptmp1, 4); \
ptmp1 += stride; \
ST6x1_UB(in0, start_in0_idx + 3, in1, start_in1_idx + 3, ptmp1, 4); \
} while (0)
#define LPF_SIMPLE_FILT(p1_in, p0_in, q0_in, q1_in, mask) do { \
v16i8 p1_m, p0_m, q0_m, q1_m, filt, filt1, filt2; \
const v16i8 cnst4b = __msa_ldi_b(4); \
const v16i8 cnst3b = __msa_ldi_b(3); \
\
FLIP_SIGN4(p1_in, p0_in, q0_in, q1_in, p1_m, p0_m, q0_m, q1_m); \
filt = __msa_subs_s_b(p1_m, q1_m); \
FILT_VAL(q0_m, p0_m, mask, filt); \
filt1 = __msa_adds_s_b(filt, cnst4b); \
filt1 = SRAI_B(filt1, 3); \
filt2 = __msa_adds_s_b(filt, cnst3b); \
filt2 = SRAI_B(filt2, 3); \
q0_m = __msa_subs_s_b(q0_m, filt1); \
p0_m = __msa_adds_s_b(p0_m, filt2); \
q0_in = __msa_xori_b((v16u8)q0_m, 0x80); \
p0_in = __msa_xori_b((v16u8)p0_m, 0x80); \
} while (0)
#define LPF_SIMPLE_MASK(p1, p0, q0, q1, b_limit, mask) do { \
v16u8 p1_a_sub_q1, p0_a_sub_q0; \
\
p0_a_sub_q0 = __msa_asub_u_b(p0, q0); \
p1_a_sub_q1 = __msa_asub_u_b(p1, q1); \
p1_a_sub_q1 = (v16u8)__msa_srli_b((v16i8)p1_a_sub_q1, 1); \
p0_a_sub_q0 = __msa_adds_u_b(p0_a_sub_q0, p0_a_sub_q0); \
mask = __msa_adds_u_b(p0_a_sub_q0, p1_a_sub_q1); \
mask = (mask <= b_limit); \
} while (0)
static void VFilter16(uint8_t* src, int stride,
int b_limit_in, int limit_in, int thresh_in) {
static void VFilter16(uint8_t* src, int stride, int b_limit_in, int limit_in,
int thresh_in) {
uint8_t* ptemp = src - 4 * stride;
v16u8 p3, p2, p1, p0, q3, q2, q1, q0;
v16u8 mask, hev;
@@ -350,8 +358,8 @@ static void VFilter16(uint8_t* src, int stride,
const v16u8 b_limit = (v16u8)__msa_fill_b(b_limit_in);
LD_UB8(ptemp, stride, p3, p2, p1, p0, q0, q1, q2, q3);
LPF_MASK_HEV(p3, p2, p1, p0, q0, q1, q2, q3, limit, b_limit, thresh,
hev, mask);
LPF_MASK_HEV(p3, p2, p1, p0, q0, q1, q2, q3, limit, b_limit, thresh, hev,
mask);
LPF_MBFILTER(p2, p1, p0, q0, q1, q2, mask, hev);
ptemp = src - 3 * stride;
ST_UB4(p2, p1, p0, q0, ptemp, stride);
@@ -359,9 +367,9 @@ static void VFilter16(uint8_t* src, int stride,
ST_UB2(q1, q2, ptemp, stride);
}
static void HFilter16(uint8_t* src, int stride,
int b_limit_in, int limit_in, int thresh_in) {
uint8_t* ptmp = src - 4;
static void HFilter16(uint8_t* src, int stride, int b_limit_in, int limit_in,
int thresh_in) {
uint8_t* ptmp = src - 4;
v16u8 p3, p2, p1, p0, q3, q2, q1, q0;
v16u8 mask, hev;
v16u8 row0, row1, row2, row3, row4, row5, row6, row7, row8;
@@ -374,11 +382,11 @@ static void HFilter16(uint8_t* src, int stride,
LD_UB8(ptmp, stride, row0, row1, row2, row3, row4, row5, row6, row7);
ptmp += (8 * stride);
LD_UB8(ptmp, stride, row8, row9, row10, row11, row12, row13, row14, row15);
TRANSPOSE16x8_UB_UB(row0, row1, row2, row3, row4, row5, row6, row7,
row8, row9, row10, row11, row12, row13, row14, row15,
p3, p2, p1, p0, q0, q1, q2, q3);
LPF_MASK_HEV(p3, p2, p1, p0, q0, q1, q2, q3, limit, b_limit, thresh,
hev, mask);
TRANSPOSE16x8_UB_UB(row0, row1, row2, row3, row4, row5, row6, row7, row8,
row9, row10, row11, row12, row13, row14, row15, p3, p2,
p1, p0, q0, q1, q2, q3);
LPF_MASK_HEV(p3, p2, p1, p0, q0, q1, q2, q3, limit, b_limit, thresh, hev,
mask);
LPF_MBFILTER(p2, p1, p0, q0, q1, q2, mask, hev);
ILVR_B2_SH(p1, p2, q0, p0, tmp0, tmp1);
ILVRL_H2_SH(tmp1, tmp0, tmp3, tmp4);
@@ -420,8 +428,8 @@ static void HFilter16(uint8_t* src, int stride,
}
// on three inner edges
static void VFilterHorEdge16i(uint8_t* src, int stride,
int b_limit, int limit, int thresh) {
static void VFilterHorEdge16i(uint8_t* src, int stride, int b_limit, int limit,
int thresh) {
v16u8 mask, hev;
v16u8 p3, p2, p1, p0, q3, q2, q1, q0;
const v16u8 thresh0 = (v16u8)__msa_fill_b(thresh);
@@ -429,21 +437,21 @@ static void VFilterHorEdge16i(uint8_t* src, int stride,
const v16u8 limit0 = (v16u8)__msa_fill_b(limit);
LD_UB8((src - 4 * stride), stride, p3, p2, p1, p0, q0, q1, q2, q3);
LPF_MASK_HEV(p3, p2, p1, p0, q0, q1, q2, q3, limit0, b_limit0, thresh0,
hev, mask);
LPF_MASK_HEV(p3, p2, p1, p0, q0, q1, q2, q3, limit0, b_limit0, thresh0, hev,
mask);
LPF_FILTER4_4W(p1, p0, q0, q1, mask, hev);
ST_UB4(p1, p0, q0, q1, (src - 2 * stride), stride);
}
static void VFilter16i(uint8_t* src_y, int stride,
int b_limit, int limit, int thresh) {
VFilterHorEdge16i(src_y + 4 * stride, stride, b_limit, limit, thresh);
VFilterHorEdge16i(src_y + 8 * stride, stride, b_limit, limit, thresh);
static void VFilter16i(uint8_t* src_y, int stride, int b_limit, int limit,
int thresh) {
VFilterHorEdge16i(src_y + 4 * stride, stride, b_limit, limit, thresh);
VFilterHorEdge16i(src_y + 8 * stride, stride, b_limit, limit, thresh);
VFilterHorEdge16i(src_y + 12 * stride, stride, b_limit, limit, thresh);
}
static void HFilterVertEdge16i(uint8_t* src, int stride,
int b_limit, int limit, int thresh) {
static void HFilterVertEdge16i(uint8_t* src, int stride, int b_limit, int limit,
int thresh) {
v16u8 mask, hev;
v16u8 p3, p2, p1, p0, q3, q2, q1, q0;
v16u8 row0, row1, row2, row3, row4, row5, row6, row7;
@@ -454,13 +462,13 @@ static void HFilterVertEdge16i(uint8_t* src, int stride,
const v16u8 limit0 = (v16u8)__msa_fill_b(limit);
LD_UB8(src - 4, stride, row0, row1, row2, row3, row4, row5, row6, row7);
LD_UB8(src - 4 + (8 * stride), stride,
row8, row9, row10, row11, row12, row13, row14, row15);
TRANSPOSE16x8_UB_UB(row0, row1, row2, row3, row4, row5, row6, row7,
row8, row9, row10, row11, row12, row13, row14, row15,
p3, p2, p1, p0, q0, q1, q2, q3);
LPF_MASK_HEV(p3, p2, p1, p0, q0, q1, q2, q3, limit0, b_limit0, thresh0,
hev, mask);
LD_UB8(src - 4 + (8 * stride), stride, row8, row9, row10, row11, row12, row13,
row14, row15);
TRANSPOSE16x8_UB_UB(row0, row1, row2, row3, row4, row5, row6, row7, row8,
row9, row10, row11, row12, row13, row14, row15, p3, p2,
p1, p0, q0, q1, q2, q3);
LPF_MASK_HEV(p3, p2, p1, p0, q0, q1, q2, q3, limit0, b_limit0, thresh0, hev,
mask);
LPF_FILTER4_4W(p1, p0, q0, q1, mask, hev);
ILVR_B2_SH(p0, p1, q1, q0, tmp0, tmp1);
ILVRL_H2_SH(tmp1, tmp0, tmp2, tmp3);
@@ -472,10 +480,10 @@ static void HFilterVertEdge16i(uint8_t* src, int stride,
ST4x8_UB(tmp4, tmp5, src, stride);
}
static void HFilter16i(uint8_t* src_y, int stride,
int b_limit, int limit, int thresh) {
HFilterVertEdge16i(src_y + 4, stride, b_limit, limit, thresh);
HFilterVertEdge16i(src_y + 8, stride, b_limit, limit, thresh);
static void HFilter16i(uint8_t* src_y, int stride, int b_limit, int limit,
int thresh) {
HFilterVertEdge16i(src_y + 4, stride, b_limit, limit, thresh);
HFilterVertEdge16i(src_y + 8, stride, b_limit, limit, thresh);
HFilterVertEdge16i(src_y + 12, stride, b_limit, limit, thresh);
}
@@ -496,8 +504,8 @@ static void VFilter8(uint8_t* WEBP_RESTRICT src_u, uint8_t* WEBP_RESTRICT src_v,
LD_UB8(ptmp_src_v, stride, p3_v, p2_v, p1_v, p0_v, q0_v, q1_v, q2_v, q3_v);
ILVR_D4_UB(p3_v, p3_u, p2_v, p2_u, p1_v, p1_u, p0_v, p0_u, p3, p2, p1, p0);
ILVR_D4_UB(q0_v, q0_u, q1_v, q1_u, q2_v, q2_u, q3_v, q3_u, q0, q1, q2, q3);
LPF_MASK_HEV(p3, p2, p1, p0, q0, q1, q2, q3, limit, b_limit, thresh,
hev, mask);
LPF_MASK_HEV(p3, p2, p1, p0, q0, q1, q2, q3, limit, b_limit, thresh, hev,
mask);
LPF_MBFILTER(p2, p1, p0, q0, q1, q2, mask, hev);
p2_d = __msa_copy_s_d((v2i64)p2, 0);
p1_d = __msa_copy_s_d((v2i64)p1, 0);
@@ -538,13 +546,13 @@ static void HFilter8(uint8_t* WEBP_RESTRICT src_u, uint8_t* WEBP_RESTRICT src_v,
const v16u8 thresh = (v16u8)__msa_fill_b(thresh_in);
LD_UB8(ptmp_src_u, stride, row0, row1, row2, row3, row4, row5, row6, row7);
LD_UB8(ptmp_src_v, stride,
row8, row9, row10, row11, row12, row13, row14, row15);
TRANSPOSE16x8_UB_UB(row0, row1, row2, row3, row4, row5, row6, row7,
row8, row9, row10, row11, row12, row13, row14, row15,
p3, p2, p1, p0, q0, q1, q2, q3);
LPF_MASK_HEV(p3, p2, p1, p0, q0, q1, q2, q3, limit, b_limit, thresh,
hev, mask);
LD_UB8(ptmp_src_v, stride, row8, row9, row10, row11, row12, row13, row14,
row15);
TRANSPOSE16x8_UB_UB(row0, row1, row2, row3, row4, row5, row6, row7, row8,
row9, row10, row11, row12, row13, row14, row15, p3, p2,
p1, p0, q0, q1, q2, q3);
LPF_MASK_HEV(p3, p2, p1, p0, q0, q1, q2, q3, limit, b_limit, thresh, hev,
mask);
LPF_MBFILTER(p2, p1, p0, q0, q1, q2, mask, hev);
ILVR_B2_SH(p1, p2, q0, p0, tmp0, tmp1);
ILVRL_H2_SH(tmp1, tmp0, tmp3, tmp4);
@@ -562,8 +570,8 @@ static void HFilter8(uint8_t* WEBP_RESTRICT src_u, uint8_t* WEBP_RESTRICT src_v,
}
static void VFilter8i(uint8_t* WEBP_RESTRICT src_u,
uint8_t* WEBP_RESTRICT src_v, int stride,
int b_limit_in, int limit_in, int thresh_in) {
uint8_t* WEBP_RESTRICT src_v, int stride, int b_limit_in,
int limit_in, int thresh_in) {
uint64_t p1_d, p0_d, q0_d, q1_d;
v16u8 p3, p2, p1, p0, q3, q2, q1, q0, mask, hev;
v16u8 p3_u, p2_u, p1_u, p0_u, q3_u, q2_u, q1_u, q0_u;
@@ -578,8 +586,8 @@ static void VFilter8i(uint8_t* WEBP_RESTRICT src_u,
src_v += (5 * stride);
ILVR_D4_UB(p3_v, p3_u, p2_v, p2_u, p1_v, p1_u, p0_v, p0_u, p3, p2, p1, p0);
ILVR_D4_UB(q0_v, q0_u, q1_v, q1_u, q2_v, q2_u, q3_v, q3_u, q0, q1, q2, q3);
LPF_MASK_HEV(p3, p2, p1, p0, q0, q1, q2, q3, limit, b_limit, thresh,
hev, mask);
LPF_MASK_HEV(p3, p2, p1, p0, q0, q1, q2, q3, limit, b_limit, thresh, hev,
mask);
LPF_FILTER4_4W(p1, p0, q0, q1, mask, hev);
p1_d = __msa_copy_s_d((v2i64)p1, 0);
p0_d = __msa_copy_s_d((v2i64)p0, 0);
@@ -594,8 +602,8 @@ static void VFilter8i(uint8_t* WEBP_RESTRICT src_u,
}
static void HFilter8i(uint8_t* WEBP_RESTRICT src_u,
uint8_t* WEBP_RESTRICT src_v, int stride,
int b_limit_in, int limit_in, int thresh_in) {
uint8_t* WEBP_RESTRICT src_v, int stride, int b_limit_in,
int limit_in, int thresh_in) {
v16u8 p3, p2, p1, p0, q3, q2, q1, q0, mask, hev;
v16u8 row0, row1, row2, row3, row4, row5, row6, row7, row8;
v16u8 row9, row10, row11, row12, row13, row14, row15;
@@ -605,13 +613,12 @@ static void HFilter8i(uint8_t* WEBP_RESTRICT src_u,
const v16u8 b_limit = (v16u8)__msa_fill_b(b_limit_in);
LD_UB8(src_u, stride, row0, row1, row2, row3, row4, row5, row6, row7);
LD_UB8(src_v, stride,
row8, row9, row10, row11, row12, row13, row14, row15);
TRANSPOSE16x8_UB_UB(row0, row1, row2, row3, row4, row5, row6, row7,
row8, row9, row10, row11, row12, row13, row14, row15,
p3, p2, p1, p0, q0, q1, q2, q3);
LPF_MASK_HEV(p3, p2, p1, p0, q0, q1, q2, q3, limit, b_limit, thresh,
hev, mask);
LD_UB8(src_v, stride, row8, row9, row10, row11, row12, row13, row14, row15);
TRANSPOSE16x8_UB_UB(row0, row1, row2, row3, row4, row5, row6, row7, row8,
row9, row10, row11, row12, row13, row14, row15, p3, p2,
p1, p0, q0, q1, q2, q3);
LPF_MASK_HEV(p3, p2, p1, p0, q0, q1, q2, q3, limit, b_limit, thresh, hev,
mask);
LPF_FILTER4_4W(p1, p0, q0, q1, mask, hev);
ILVR_B2_SW(p0, p1, q1, q0, tmp0, tmp1);
ILVRL_H2_SW(tmp1, tmp0, tmp2, tmp3);
@@ -645,11 +652,11 @@ static void SimpleHFilter16(uint8_t* src, int stride, int b_limit_in) {
uint8_t* ptemp_src = src - 2;
LD_UB8(ptemp_src, stride, row0, row1, row2, row3, row4, row5, row6, row7);
LD_UB8(ptemp_src + 8 * stride, stride,
row8, row9, row10, row11, row12, row13, row14, row15);
TRANSPOSE16x4_UB_UB(row0, row1, row2, row3, row4, row5, row6, row7,
row8, row9, row10, row11, row12, row13, row14, row15,
p1, p0, q0, q1);
LD_UB8(ptemp_src + 8 * stride, stride, row8, row9, row10, row11, row12, row13,
row14, row15);
TRANSPOSE16x4_UB_UB(row0, row1, row2, row3, row4, row5, row6, row7, row8,
row9, row10, row11, row12, row13, row14, row15, p1, p0,
q0, q1);
LPF_SIMPLE_MASK(p1, p0, q0, q1, b_limit, mask);
LPF_SIMPLE_FILT(p1, p0, q0, q1, mask);
ILVRL_B2_SH(q0, p0, tmp1, tmp0);
@@ -665,14 +672,14 @@ static void SimpleHFilter16(uint8_t* src, int stride, int b_limit_in) {
}
static void SimpleVFilter16i(uint8_t* src_y, int stride, int b_limit_in) {
SimpleVFilter16(src_y + 4 * stride, stride, b_limit_in);
SimpleVFilter16(src_y + 8 * stride, stride, b_limit_in);
SimpleVFilter16(src_y + 4 * stride, stride, b_limit_in);
SimpleVFilter16(src_y + 8 * stride, stride, b_limit_in);
SimpleVFilter16(src_y + 12 * stride, stride, b_limit_in);
}
static void SimpleHFilter16i(uint8_t* src_y, int stride, int b_limit_in) {
SimpleHFilter16(src_y + 4, stride, b_limit_in);
SimpleHFilter16(src_y + 8, stride, b_limit_in);
SimpleHFilter16(src_y + 4, stride, b_limit_in);
SimpleHFilter16(src_y + 8, stride, b_limit_in);
SimpleHFilter16(src_y + 12, stride, b_limit_in);
}
@@ -682,7 +689,7 @@ static void SimpleHFilter16i(uint8_t* src_y, int stride, int b_limit_in) {
// 4x4
static void DC4(uint8_t* dst) { // DC
static void DC4(uint8_t* dst) { // DC
uint32_t dc = 4;
int i;
for (i = 0; i < 4; ++i) dc += dst[i - BPS] + dst[-1 + i * BPS];
@@ -694,7 +701,7 @@ static void DC4(uint8_t* dst) { // DC
static void TM4(uint8_t* dst) {
const uint8_t* const ptemp = dst - BPS - 1;
v8i16 T, d, r0, r1, r2, r3;
const v16i8 zero = { 0 };
const v16i8 zero = {0};
const v8i16 TL = (v8i16)__msa_fill_h(ptemp[0 * BPS]);
const v8i16 L0 = (v8i16)__msa_fill_h(ptemp[1 * BPS]);
const v8i16 L1 = (v8i16)__msa_fill_h(ptemp[2 * BPS]);
@@ -702,19 +709,19 @@ static void TM4(uint8_t* dst) {
const v8i16 L3 = (v8i16)__msa_fill_h(ptemp[4 * BPS]);
const v16u8 T1 = LD_UB(ptemp + 1);
T = (v8i16)__msa_ilvr_b(zero, (v16i8)T1);
T = (v8i16)__msa_ilvr_b(zero, (v16i8)T1);
d = T - TL;
ADD4(d, L0, d, L1, d, L2, d, L3, r0, r1, r2, r3);
CLIP_SH4_0_255(r0, r1, r2, r3);
PCKEV_ST4x4_UB(r0, r1, r2, r3, dst, BPS);
}
static void VE4(uint8_t* dst) { // vertical
static void VE4(uint8_t* dst) { // vertical
const uint8_t* const ptop = dst - BPS - 1;
const uint32_t val0 = LW(ptop + 0);
const uint32_t val1 = LW(ptop + 4);
uint32_t out;
v16u8 A = { 0 }, B, C, AC, B2, R;
v16u8 A = {0}, B, C, AC, B2, R;
INSERT_W2_UB(val0, val1, A);
B = SLDI_UB(A, A, 1);
@@ -726,12 +733,12 @@ static void VE4(uint8_t* dst) { // vertical
SW4(out, out, out, out, dst, BPS);
}
static void RD4(uint8_t* dst) { // Down-right
static void RD4(uint8_t* dst) { // Down-right
const uint8_t* const ptop = dst - 1 - BPS;
uint32_t val0 = LW(ptop + 0);
uint32_t val1 = LW(ptop + 4);
uint32_t val2, val3;
v16u8 A, B, C, AC, B2, R, A1 = { 0 };
v16u8 A, B, C, AC, B2, R, A1 = {0};
INSERT_W2_UB(val0, val1, A1);
A = SLDI_UB(A1, A1, 12);
@@ -754,12 +761,12 @@ static void RD4(uint8_t* dst) { // Down-right
SW4(val0, val1, val2, val3, dst, BPS);
}
static void LD4(uint8_t* dst) { // Down-Left
static void LD4(uint8_t* dst) { // Down-Left
const uint8_t* const ptop = dst - BPS;
uint32_t val0 = LW(ptop + 0);
uint32_t val1 = LW(ptop + 4);
uint32_t val2, val3;
v16u8 A = { 0 }, B, C, AC, B2, R;
v16u8 A = {0}, B, C, AC, B2, R;
INSERT_W2_UB(val0, val1, A);
B = SLDI_UB(A, A, 1);
@@ -780,7 +787,7 @@ static void LD4(uint8_t* dst) { // Down-Left
// 16x16
static void DC16(uint8_t* dst) { // DC
static void DC16(uint8_t* dst) { // DC
uint32_t dc = 16;
int i;
const v16u8 rtop = LD_UB(dst - BPS);
@@ -799,7 +806,7 @@ static void DC16(uint8_t* dst) { // DC
static void TM16(uint8_t* dst) {
int j;
v8i16 d1, d2;
const v16i8 zero = { 0 };
const v16i8 zero = {0};
const v8i16 TL = (v8i16)__msa_fill_h(dst[-1 - BPS]);
const v16i8 T = LD_SB(dst - BPS);
@@ -822,13 +829,13 @@ static void TM16(uint8_t* dst) {
}
}
static void VE16(uint8_t* dst) { // vertical
static void VE16(uint8_t* dst) { // vertical
const v16u8 rtop = LD_UB(dst - BPS);
ST_UB8(rtop, rtop, rtop, rtop, rtop, rtop, rtop, rtop, dst, BPS);
ST_UB8(rtop, rtop, rtop, rtop, rtop, rtop, rtop, rtop, dst + 8 * BPS, BPS);
}
static void HE16(uint8_t* dst) { // horizontal
static void HE16(uint8_t* dst) { // horizontal
int j;
for (j = 16; j > 0; j -= 4) {
const v16u8 L0 = (v16u8)__msa_fill_b(dst[-1 + 0 * BPS]);
@@ -840,7 +847,7 @@ static void HE16(uint8_t* dst) { // horizontal
}
}
static void DC16NoTop(uint8_t* dst) { // DC with top samples not available
static void DC16NoTop(uint8_t* dst) { // DC with top samples not available
int j;
uint32_t dc = 8;
v16u8 out;
@@ -853,7 +860,7 @@ static void DC16NoTop(uint8_t* dst) { // DC with top samples not available
ST_UB8(out, out, out, out, out, out, out, out, dst + 8 * BPS, BPS);
}
static void DC16NoLeft(uint8_t* dst) { // DC with left samples not available
static void DC16NoLeft(uint8_t* dst) { // DC with left samples not available
uint32_t dc = 8;
const v16u8 rtop = LD_UB(dst - BPS);
const v8u16 dctop = __msa_hadd_u_h(rtop, rtop);
@@ -865,7 +872,7 @@ static void DC16NoLeft(uint8_t* dst) { // DC with left samples not available
ST_UB8(out, out, out, out, out, out, out, out, dst + 8 * BPS, BPS);
}
static void DC16NoTopLeft(uint8_t* dst) { // DC with nothing
static void DC16NoTopLeft(uint8_t* dst) { // DC with nothing
const v16u8 out = (v16u8)__msa_fill_b(0x80);
ST_UB8(out, out, out, out, out, out, out, out, dst, BPS);
ST_UB8(out, out, out, out, out, out, out, out, dst + 8 * BPS, BPS);
@@ -873,12 +880,13 @@ static void DC16NoTopLeft(uint8_t* dst) { // DC with nothing
// Chroma
#define STORE8x8(out, dst) do { \
SD4(out, out, out, out, dst + 0 * BPS, BPS); \
SD4(out, out, out, out, dst + 4 * BPS, BPS); \
} while (0)
#define STORE8x8(out, dst) \
do { \
SD4(out, out, out, out, dst + 0 * BPS, BPS); \
SD4(out, out, out, out, dst + 4 * BPS, BPS); \
} while (0)
static void DC8uv(uint8_t* dst) { // DC
static void DC8uv(uint8_t* dst) { // DC
uint32_t dc = 8;
int i;
uint64_t out;
@@ -900,8 +908,8 @@ static void DC8uv(uint8_t* dst) { // DC
static void TM8uv(uint8_t* dst) {
int j;
const v16i8 T1 = LD_SB(dst - BPS);
const v16i8 zero = { 0 };
const v8i16 T = (v8i16)__msa_ilvr_b(zero, T1);
const v16i8 zero = {0};
const v8i16 T = (v8i16)__msa_ilvr_b(zero, T1);
const v8i16 TL = (v8i16)__msa_fill_h(dst[-1 - BPS]);
const v8i16 d = T - TL;
@@ -920,13 +928,13 @@ static void TM8uv(uint8_t* dst) {
}
}
static void VE8uv(uint8_t* dst) { // vertical
static void VE8uv(uint8_t* dst) { // vertical
const v16u8 rtop = LD_UB(dst - BPS);
const uint64_t out = __msa_copy_s_d((v2i64)rtop, 0);
STORE8x8(out, dst);
}
static void HE8uv(uint8_t* dst) { // horizontal
static void HE8uv(uint8_t* dst) { // horizontal
int j;
for (j = 0; j < 8; j += 4) {
const v16u8 L0 = (v16u8)__msa_fill_b(dst[-1 + 0 * BPS]);
@@ -942,7 +950,7 @@ static void HE8uv(uint8_t* dst) { // horizontal
}
}
static void DC8uvNoLeft(uint8_t* dst) { // DC with no left samples
static void DC8uvNoLeft(uint8_t* dst) { // DC with no left samples
const uint32_t dc = 4;
const v16u8 rtop = LD_UB(dst - BPS);
const v8u16 temp0 = __msa_hadd_u_h(rtop, rtop);
@@ -954,7 +962,7 @@ static void DC8uvNoLeft(uint8_t* dst) { // DC with no left samples
STORE8x8(out, dst);
}
static void DC8uvNoTop(uint8_t* dst) { // DC with no top samples
static void DC8uvNoTop(uint8_t* dst) { // DC with no top samples
uint32_t dc = 4;
int i;
uint64_t out;
@@ -968,7 +976,7 @@ static void DC8uvNoTop(uint8_t* dst) { // DC with no top samples
STORE8x8(out, dst);
}
static void DC8uvNoTopLeft(uint8_t* dst) { // DC with nothing
static void DC8uvNoTopLeft(uint8_t* dst) { // DC with nothing
const uint64_t out = 0x8080808080808080ULL;
STORE8x8(out, dst);
}
@@ -984,16 +992,16 @@ WEBP_TSAN_IGNORE_FUNCTION void VP8DspInitMSA(void) {
VP8TransformDC = TransformDC;
VP8TransformAC3 = TransformAC3;
VP8VFilter16 = VFilter16;
VP8HFilter16 = HFilter16;
VP8VFilter16 = VFilter16;
VP8HFilter16 = HFilter16;
VP8VFilter16i = VFilter16i;
VP8HFilter16i = HFilter16i;
VP8VFilter8 = VFilter8;
VP8HFilter8 = HFilter8;
VP8VFilter8 = VFilter8;
VP8HFilter8 = HFilter8;
VP8VFilter8i = VFilter8i;
VP8HFilter8i = HFilter8i;
VP8SimpleVFilter16 = SimpleVFilter16;
VP8SimpleHFilter16 = SimpleHFilter16;
VP8SimpleVFilter16 = SimpleVFilter16;
VP8SimpleHFilter16 = SimpleHFilter16;
VP8SimpleVFilter16i = SimpleVFilter16i;
VP8SimpleHFilter16i = SimpleHFilter16i;

View File

@@ -16,8 +16,8 @@
#if defined(WEBP_USE_NEON)
#include "src/dsp/neon.h"
#include "src/dec/vp8i_dec.h"
#include "src/dsp/neon.h"
//------------------------------------------------------------------------------
// NxM Loading functions
@@ -60,10 +60,11 @@ static WEBP_INLINE void Load4x16_NEON(const uint8_t* const src, int stride,
#else // WORK_AROUND_GCC
#define LOADQ_LANE_32b(VALUE, LANE) do { \
(VALUE) = vld1q_lane_u32((const uint32_t*)src, (VALUE), (LANE)); \
src += stride; \
} while (0)
#define LOADQ_LANE_32b(VALUE, LANE) \
do { \
(VALUE) = vld1q_lane_u32((const uint32_t*)src, (VALUE), (LANE)); \
src += stride; \
} while (0)
static WEBP_INLINE void Load4x16_NEON(const uint8_t* src, int stride,
uint8x16_t* const p1,
@@ -111,10 +112,10 @@ static WEBP_INLINE void Load4x16_NEON(const uint8_t* src, int stride,
#endif // !WORK_AROUND_GCC
static WEBP_INLINE void Load8x16_NEON(
const uint8_t* const src, int stride,
uint8x16_t* const p3, uint8x16_t* const p2, uint8x16_t* const p1,
uint8x16_t* const p0, uint8x16_t* const q0, uint8x16_t* const q1,
uint8x16_t* const q2, uint8x16_t* const q3) {
const uint8_t* const src, int stride, uint8x16_t* const p3,
uint8x16_t* const p2, uint8x16_t* const p1, uint8x16_t* const p0,
uint8x16_t* const q0, uint8x16_t* const q1, uint8x16_t* const q2,
uint8x16_t* const q3) {
Load4x16_NEON(src - 2, stride, p3, p2, p1, p0);
Load4x16_NEON(src + 2, stride, q0, q1, q2, q3);
}
@@ -131,12 +132,12 @@ static WEBP_INLINE void Load16x4_NEON(const uint8_t* const src, int stride,
}
static WEBP_INLINE void Load16x8_NEON(
const uint8_t* const src, int stride,
uint8x16_t* const p3, uint8x16_t* const p2, uint8x16_t* const p1,
uint8x16_t* const p0, uint8x16_t* const q0, uint8x16_t* const q1,
uint8x16_t* const q2, uint8x16_t* const q3) {
Load16x4_NEON(src - 2 * stride, stride, p3, p2, p1, p0);
Load16x4_NEON(src + 2 * stride, stride, q0, q1, q2, q3);
const uint8_t* const src, int stride, uint8x16_t* const p3,
uint8x16_t* const p2, uint8x16_t* const p1, uint8x16_t* const p0,
uint8x16_t* const q0, uint8x16_t* const q1, uint8x16_t* const q2,
uint8x16_t* const q3) {
Load16x4_NEON(src - 2 * stride, stride, p3, p2, p1, p0);
Load16x4_NEON(src + 2 * stride, stride, q0, q1, q2, q3);
}
static WEBP_INLINE void Load8x8x2_NEON(
@@ -220,8 +221,8 @@ static WEBP_INLINE void Load8x8x2T_NEON(
#endif // !WORK_AROUND_GCC
static WEBP_INLINE void Store2x8_NEON(const uint8x8x2_t v,
uint8_t* const dst, int stride) {
static WEBP_INLINE void Store2x8_NEON(const uint8x8x2_t v, uint8_t* const dst,
int stride) {
vst2_lane_u8(dst + 0 * stride, v, 0);
vst2_lane_u8(dst + 1 * stride, v, 1);
vst2_lane_u8(dst + 2 * stride, v, 2);
@@ -244,8 +245,8 @@ static WEBP_INLINE void Store2x16_NEON(const uint8x16_t p0, const uint8x16_t q0,
}
#if !defined(WORK_AROUND_GCC)
static WEBP_INLINE void Store4x8_NEON(const uint8x8x4_t v,
uint8_t* const dst, int stride) {
static WEBP_INLINE void Store4x8_NEON(const uint8x8x4_t v, uint8_t* const dst,
int stride) {
vst4_lane_u8(dst + 0 * stride, v, 0);
vst4_lane_u8(dst + 1 * stride, v, 1);
vst4_lane_u8(dst + 2 * stride, v, 2);
@@ -260,12 +261,10 @@ static WEBP_INLINE void Store4x16_NEON(const uint8x16_t p1, const uint8x16_t p0,
const uint8x16_t q0, const uint8x16_t q1,
uint8_t* const dst, int stride) {
uint8x8x4_t lo, hi;
INIT_VECTOR4(lo,
vget_low_u8(p1), vget_low_u8(p0),
vget_low_u8(q0), vget_low_u8(q1));
INIT_VECTOR4(hi,
vget_high_u8(p1), vget_high_u8(p0),
vget_high_u8(q0), vget_high_u8(q1));
INIT_VECTOR4(lo, vget_low_u8(p1), vget_low_u8(p0), vget_low_u8(q0),
vget_low_u8(q1));
INIT_VECTOR4(hi, vget_high_u8(p1), vget_high_u8(p0), vget_high_u8(q0),
vget_high_u8(q1));
Store4x8_NEON(lo, dst - 2 + 0 * stride, stride);
Store4x8_NEON(hi, dst - 2 + 8 * stride, stride);
}
@@ -285,22 +284,20 @@ static WEBP_INLINE void Store16x4_NEON(const uint8x16_t p1, const uint8x16_t p0,
}
static WEBP_INLINE void Store8x2x2_NEON(const uint8x16_t p0,
const uint8x16_t q0,
uint8_t* const u, uint8_t* const v,
int stride) {
const uint8x16_t q0, uint8_t* const u,
uint8_t* const v, int stride) {
// p0 and q0 contain the u+v samples packed in low/high halves.
vst1_u8(u - stride, vget_low_u8(p0));
vst1_u8(u, vget_low_u8(q0));
vst1_u8(u, vget_low_u8(q0));
vst1_u8(v - stride, vget_high_u8(p0));
vst1_u8(v, vget_high_u8(q0));
vst1_u8(v, vget_high_u8(q0));
}
static WEBP_INLINE void Store8x4x2_NEON(const uint8x16_t p1,
const uint8x16_t p0,
const uint8x16_t q0,
const uint8x16_t q1,
uint8_t* const u, uint8_t* const v,
int stride) {
const uint8x16_t q1, uint8_t* const u,
uint8_t* const v, int stride) {
// The p1...q1 registers contain the u+v samples packed in low/high halves.
Store8x2x2_NEON(p1, p0, u - stride, v - stride, stride);
Store8x2x2_NEON(q0, q1, u + stride, v + stride, stride);
@@ -308,16 +305,17 @@ static WEBP_INLINE void Store8x4x2_NEON(const uint8x16_t p1,
#if !defined(WORK_AROUND_GCC)
#define STORE6_LANE(DST, VAL0, VAL1, LANE) do { \
vst3_lane_u8((DST) - 3, (VAL0), (LANE)); \
vst3_lane_u8((DST) + 0, (VAL1), (LANE)); \
(DST) += stride; \
} while (0)
#define STORE6_LANE(DST, VAL0, VAL1, LANE) \
do { \
vst3_lane_u8((DST) - 3, (VAL0), (LANE)); \
vst3_lane_u8((DST) + 0, (VAL1), (LANE)); \
(DST) += stride; \
} while (0)
static WEBP_INLINE void Store6x8x2_NEON(
const uint8x16_t p2, const uint8x16_t p1, const uint8x16_t p0,
const uint8x16_t q0, const uint8x16_t q1, const uint8x16_t q2,
uint8_t* u, uint8_t* v, int stride) {
const uint8x16_t q0, const uint8x16_t q1, const uint8x16_t q2, uint8_t* u,
uint8_t* v, int stride) {
uint8x8x3_t u0, u1, v0, v1;
INIT_VECTOR3(u0, vget_low_u8(p2), vget_low_u8(p1), vget_low_u8(p0));
INIT_VECTOR3(u1, vget_low_u8(q0), vget_low_u8(q1), vget_low_u8(q2));
@@ -345,16 +343,13 @@ static WEBP_INLINE void Store6x8x2_NEON(
static WEBP_INLINE void Store4x8x2_NEON(const uint8x16_t p1,
const uint8x16_t p0,
const uint8x16_t q0,
const uint8x16_t q1,
uint8_t* const u, uint8_t* const v,
int stride) {
const uint8x16_t q1, uint8_t* const u,
uint8_t* const v, int stride) {
uint8x8x4_t u0, v0;
INIT_VECTOR4(u0,
vget_low_u8(p1), vget_low_u8(p0),
vget_low_u8(q0), vget_low_u8(q1));
INIT_VECTOR4(v0,
vget_high_u8(p1), vget_high_u8(p0),
vget_high_u8(q0), vget_high_u8(q1));
INIT_VECTOR4(u0, vget_low_u8(p1), vget_low_u8(p0), vget_low_u8(q0),
vget_low_u8(q1));
INIT_VECTOR4(v0, vget_high_u8(p1), vget_high_u8(p0), vget_high_u8(q0),
vget_high_u8(q1));
vst4_lane_u8(u - 2 + 0 * stride, u0, 0);
vst4_lane_u8(u - 2 + 1 * stride, u0, 1);
vst4_lane_u8(u - 2 + 2 * stride, u0, 2);
@@ -397,8 +392,7 @@ static WEBP_INLINE void SaturateAndStore4x4_NEON(uint8_t* const dst,
}
static WEBP_INLINE void Add4x4_NEON(const int16x8_t row01,
const int16x8_t row23,
uint8_t* const dst) {
const int16x8_t row23, uint8_t* const dst) {
uint32x2_t dst01 = vdup_n_u32(0);
uint32x2_t dst23 = vdup_n_u32(0);
@@ -449,26 +443,25 @@ static uint8x16_t FlipSignBack_NEON(const int8x16_t v) {
static int8x16_t GetBaseDelta_NEON(const int8x16_t p1, const int8x16_t p0,
const int8x16_t q0, const int8x16_t q1) {
const int8x16_t q0_p0 = vqsubq_s8(q0, p0); // (q0-p0)
const int8x16_t p1_q1 = vqsubq_s8(p1, q1); // (p1-q1)
const int8x16_t s1 = vqaddq_s8(p1_q1, q0_p0); // (p1-q1) + 1 * (q0 - p0)
const int8x16_t s2 = vqaddq_s8(q0_p0, s1); // (p1-q1) + 2 * (q0 - p0)
const int8x16_t s3 = vqaddq_s8(q0_p0, s2); // (p1-q1) + 3 * (q0 - p0)
const int8x16_t q0_p0 = vqsubq_s8(q0, p0); // (q0-p0)
const int8x16_t p1_q1 = vqsubq_s8(p1, q1); // (p1-q1)
const int8x16_t s1 = vqaddq_s8(p1_q1, q0_p0); // (p1-q1) + 1 * (q0 - p0)
const int8x16_t s2 = vqaddq_s8(q0_p0, s1); // (p1-q1) + 2 * (q0 - p0)
const int8x16_t s3 = vqaddq_s8(q0_p0, s2); // (p1-q1) + 3 * (q0 - p0)
return s3;
}
static int8x16_t GetBaseDelta0_NEON(const int8x16_t p0, const int8x16_t q0) {
const int8x16_t q0_p0 = vqsubq_s8(q0, p0); // (q0-p0)
const int8x16_t s1 = vqaddq_s8(q0_p0, q0_p0); // 2 * (q0 - p0)
const int8x16_t s2 = vqaddq_s8(q0_p0, s1); // 3 * (q0 - p0)
const int8x16_t q0_p0 = vqsubq_s8(q0, p0); // (q0-p0)
const int8x16_t s1 = vqaddq_s8(q0_p0, q0_p0); // 2 * (q0 - p0)
const int8x16_t s2 = vqaddq_s8(q0_p0, s1); // 3 * (q0 - p0)
return s2;
}
//------------------------------------------------------------------------------
static void ApplyFilter2NoFlip_NEON(const int8x16_t p0s, const int8x16_t q0s,
const int8x16_t delta,
int8x16_t* const op0,
const int8x16_t delta, int8x16_t* const op0,
int8x16_t* const oq0) {
const int8x16_t kCst3 = vdupq_n_s8(0x03);
const int8x16_t kCst4 = vdupq_n_s8(0x04);
@@ -483,8 +476,8 @@ static void ApplyFilter2NoFlip_NEON(const int8x16_t p0s, const int8x16_t q0s,
#if defined(WEBP_USE_INTRINSICS)
static void ApplyFilter2_NEON(const int8x16_t p0s, const int8x16_t q0s,
const int8x16_t delta,
uint8x16_t* const op0, uint8x16_t* const oq0) {
const int8x16_t delta, uint8x16_t* const op0,
uint8x16_t* const oq0) {
const int8x16_t kCst3 = vdupq_n_s8(0x03);
const int8x16_t kCst4 = vdupq_n_s8(0x04);
const int8x16_t delta_p3 = vqaddq_s8(delta, kCst3);
@@ -499,8 +492,8 @@ static void ApplyFilter2_NEON(const int8x16_t p0s, const int8x16_t q0s,
static void DoFilter2_NEON(const uint8x16_t p1, const uint8x16_t p0,
const uint8x16_t q0, const uint8x16_t q1,
const uint8x16_t mask,
uint8x16_t* const op0, uint8x16_t* const oq0) {
const uint8x16_t mask, uint8x16_t* const op0,
uint8x16_t* const oq0) {
const int8x16_t p1s = FlipSign_NEON(p1);
const int8x16_t p0s = FlipSign_NEON(p0);
const int8x16_t q0s = FlipSign_NEON(q0);
@@ -533,6 +526,7 @@ static void SimpleHFilter16_NEON(uint8_t* p, int stride, int thresh) {
#else
// Load/Store vertical edge
// clang-format off
#define LOAD8x4(c1, c2, c3, c4, b1, b2, stride) \
"vld4.8 {" #c1 "[0]," #c2 "[0]," #c3 "[0]," #c4 "[0]}," #b1 "," #stride "\n" \
"vld4.8 {" #c1 "[1]," #c2 "[1]," #c3 "[1]," #c4 "[1]}," #b2 "," #stride "\n" \
@@ -600,30 +594,30 @@ static void SimpleHFilter16_NEON(uint8_t* p, int stride, int thresh) {
"vand q9, q9, q11 \n" /* apply filter mask */ \
DO_SIMPLE_FILTER(p0, q0, q9) /* apply filter */ \
FLIP_SIGN_BIT2(p0, q0, q10)
// clang-format on
static void SimpleVFilter16_NEON(uint8_t* p, int stride, int thresh) {
__asm__ volatile (
"sub %[p], %[p], %[stride], lsl #1 \n" // p -= 2 * stride
__asm__ volatile(
"sub %[p], %[p], %[stride], lsl #1 \n" // p -= 2 * stride
"vld1.u8 {q1}, [%[p]], %[stride] \n" // p1
"vld1.u8 {q2}, [%[p]], %[stride] \n" // p0
"vld1.u8 {q3}, [%[p]], %[stride] \n" // q0
"vld1.u8 {q12}, [%[p]] \n" // q1
"vld1.u8 {q1}, [%[p]], %[stride] \n" // p1
"vld1.u8 {q2}, [%[p]], %[stride] \n" // p0
"vld1.u8 {q3}, [%[p]], %[stride] \n" // q0
"vld1.u8 {q12}, [%[p]] \n" // q1
DO_FILTER2(q1, q2, q3, q12, %[thresh])
DO_FILTER2(q1, q2, q3, q12, % [thresh]) //
"sub %[p], %[p], %[stride], lsl #1 \n" // p -= 2 * stride
"sub %[p], %[p], %[stride], lsl #1 \n" // p -= 2 * stride
"vst1.u8 {q2}, [%[p]], %[stride] \n" // store op0
"vst1.u8 {q3}, [%[p]] \n" // store oq0
: [p] "+r"(p)
: [stride] "r"(stride), [thresh] "r"(thresh)
: "memory", QRegs
);
"vst1.u8 {q2}, [%[p]], %[stride] \n" // store op0
"vst1.u8 {q3}, [%[p]] \n" // store oq0
: [p] "+r"(p)
: [stride] "r"(stride), [thresh] "r"(thresh)
: "memory", QRegs);
}
static void SimpleHFilter16_NEON(uint8_t* p, int stride, int thresh) {
__asm__ volatile (
__asm__ volatile(
"sub r4, %[p], #2 \n" // base1 = p - 2
"lsl r6, %[stride], #1 \n" // r6 = 2 * stride
"add r5, r4, %[stride] \n" // base2 = base1 + stride
@@ -651,7 +645,7 @@ static void SimpleHFilter16_NEON(uint8_t* p, int stride, int thresh) {
#undef LOAD8x4
#undef STORE8x2
#endif // WEBP_USE_INTRINSICS
#endif // WEBP_USE_INTRINSICS
static void SimpleVFilter16i_NEON(uint8_t* p, int stride, int thresh) {
uint32_t k;
@@ -708,31 +702,29 @@ static uint8x16_t NeedsFilter2_NEON(const uint8x16_t p3, const uint8x16_t p2,
// 4-points filter
static void ApplyFilter4_NEON(
const int8x16_t p1, const int8x16_t p0,
const int8x16_t q0, const int8x16_t q1,
const int8x16_t delta0,
uint8x16_t* const op1, uint8x16_t* const op0,
uint8x16_t* const oq0, uint8x16_t* const oq1) {
static void ApplyFilter4_NEON(const int8x16_t p1, const int8x16_t p0,
const int8x16_t q0, const int8x16_t q1,
const int8x16_t delta0, uint8x16_t* const op1,
uint8x16_t* const op0, uint8x16_t* const oq0,
uint8x16_t* const oq1) {
const int8x16_t kCst3 = vdupq_n_s8(0x03);
const int8x16_t kCst4 = vdupq_n_s8(0x04);
const int8x16_t delta1 = vqaddq_s8(delta0, kCst4);
const int8x16_t delta2 = vqaddq_s8(delta0, kCst3);
const int8x16_t a1 = vshrq_n_s8(delta1, 3);
const int8x16_t a2 = vshrq_n_s8(delta2, 3);
const int8x16_t a3 = vrshrq_n_s8(a1, 1); // a3 = (a1 + 1) >> 1
const int8x16_t a3 = vrshrq_n_s8(a1, 1); // a3 = (a1 + 1) >> 1
*op0 = FlipSignBack_NEON(vqaddq_s8(p0, a2)); // clip(p0 + a2)
*oq0 = FlipSignBack_NEON(vqsubq_s8(q0, a1)); // clip(q0 - a1)
*op1 = FlipSignBack_NEON(vqaddq_s8(p1, a3)); // clip(p1 + a3)
*oq1 = FlipSignBack_NEON(vqsubq_s8(q1, a3)); // clip(q1 - a3)
}
static void DoFilter4_NEON(
const uint8x16_t p1, const uint8x16_t p0,
const uint8x16_t q0, const uint8x16_t q1,
const uint8x16_t mask, const uint8x16_t hev_mask,
uint8x16_t* const op1, uint8x16_t* const op0,
uint8x16_t* const oq0, uint8x16_t* const oq1) {
static void DoFilter4_NEON(const uint8x16_t p1, const uint8x16_t p0,
const uint8x16_t q0, const uint8x16_t q1,
const uint8x16_t mask, const uint8x16_t hev_mask,
uint8x16_t* const op1, uint8x16_t* const op0,
uint8x16_t* const oq0, uint8x16_t* const oq1) {
// This is a fused version of DoFilter2() calling ApplyFilter2 directly
const int8x16_t p1s = FlipSign_NEON(p1);
int8x16_t p0s = FlipSign_NEON(p0);
@@ -761,12 +753,13 @@ static void DoFilter4_NEON(
// 6-points filter
static void ApplyFilter6_NEON(
const int8x16_t p2, const int8x16_t p1, const int8x16_t p0,
const int8x16_t q0, const int8x16_t q1, const int8x16_t q2,
const int8x16_t delta,
uint8x16_t* const op2, uint8x16_t* const op1, uint8x16_t* const op0,
uint8x16_t* const oq0, uint8x16_t* const oq1, uint8x16_t* const oq2) {
static void ApplyFilter6_NEON(const int8x16_t p2, const int8x16_t p1,
const int8x16_t p0, const int8x16_t q0,
const int8x16_t q1, const int8x16_t q2,
const int8x16_t delta, uint8x16_t* const op2,
uint8x16_t* const op1, uint8x16_t* const op0,
uint8x16_t* const oq0, uint8x16_t* const oq1,
uint8x16_t* const oq2) {
// We have to compute: X = (9*a+63) >> 7, Y = (18*a+63)>>7, Z = (27*a+63) >> 7
// Turns out, there's a common sub-expression S=9 * a - 1 that can be used
// with the special vqrshrn_n_s16 rounding-shift-and-narrow instruction:
@@ -778,13 +771,13 @@ static void ApplyFilter6_NEON(
const int8x8_t kCst18 = vdup_n_s8(18);
const int16x8_t S_lo = vmlal_s8(kCstm1, kCst9, delta_lo); // S = 9 * a - 1
const int16x8_t S_hi = vmlal_s8(kCstm1, kCst9, delta_hi);
const int16x8_t Z_lo = vmlal_s8(S_lo, kCst18, delta_lo); // S + 18 * a
const int16x8_t Z_lo = vmlal_s8(S_lo, kCst18, delta_lo); // S + 18 * a
const int16x8_t Z_hi = vmlal_s8(S_hi, kCst18, delta_hi);
const int8x8_t a3_lo = vqrshrn_n_s16(S_lo, 7); // (9 * a + 63) >> 7
const int8x8_t a3_lo = vqrshrn_n_s16(S_lo, 7); // (9 * a + 63) >> 7
const int8x8_t a3_hi = vqrshrn_n_s16(S_hi, 7);
const int8x8_t a2_lo = vqrshrn_n_s16(S_lo, 6); // (9 * a + 31) >> 6
const int8x8_t a2_lo = vqrshrn_n_s16(S_lo, 6); // (9 * a + 31) >> 6
const int8x8_t a2_hi = vqrshrn_n_s16(S_hi, 6);
const int8x8_t a1_lo = vqrshrn_n_s16(Z_lo, 7); // (27 * a + 63) >> 7
const int8x8_t a1_lo = vqrshrn_n_s16(Z_lo, 7); // (27 * a + 63) >> 7
const int8x8_t a1_hi = vqrshrn_n_s16(Z_hi, 7);
const int8x16_t a1 = vcombine_s8(a1_lo, a1_hi);
const int8x16_t a2 = vcombine_s8(a2_lo, a2_hi);
@@ -798,12 +791,13 @@ static void ApplyFilter6_NEON(
*op2 = FlipSignBack_NEON(vqaddq_s8(p2, a3)); // clip(p2 + a3)
}
static void DoFilter6_NEON(
const uint8x16_t p2, const uint8x16_t p1, const uint8x16_t p0,
const uint8x16_t q0, const uint8x16_t q1, const uint8x16_t q2,
const uint8x16_t mask, const uint8x16_t hev_mask,
uint8x16_t* const op2, uint8x16_t* const op1, uint8x16_t* const op0,
uint8x16_t* const oq0, uint8x16_t* const oq1, uint8x16_t* const oq2) {
static void DoFilter6_NEON(const uint8x16_t p2, const uint8x16_t p1,
const uint8x16_t p0, const uint8x16_t q0,
const uint8x16_t q1, const uint8x16_t q2,
const uint8x16_t mask, const uint8x16_t hev_mask,
uint8x16_t* const op2, uint8x16_t* const op1,
uint8x16_t* const op0, uint8x16_t* const oq0,
uint8x16_t* const oq1, uint8x16_t* const oq2) {
// This is a fused version of DoFilter2() calling ApplyFilter2 directly
const int8x16_t p2s = FlipSign_NEON(p2);
const int8x16_t p1s = FlipSign_NEON(p1);
@@ -827,41 +821,41 @@ static void DoFilter6_NEON(
const uint8x16_t complex_lf_mask = veorq_u8(simple_lf_mask, mask);
const int8x16_t complex_lf_delta =
vandq_s8(delta0, vreinterpretq_s8_u8(complex_lf_mask));
ApplyFilter6_NEON(p2s, p1s, p0s, q0s, q1s, q2s, complex_lf_delta,
op2, op1, op0, oq0, oq1, oq2);
ApplyFilter6_NEON(p2s, p1s, p0s, q0s, q1s, q2s, complex_lf_delta, op2, op1,
op0, oq0, oq1, oq2);
}
}
// on macroblock edges
static void VFilter16_NEON(uint8_t* p, int stride,
int thresh, int ithresh, int hev_thresh) {
static void VFilter16_NEON(uint8_t* p, int stride, int thresh, int ithresh,
int hev_thresh) {
uint8x16_t p3, p2, p1, p0, q0, q1, q2, q3;
Load16x8_NEON(p, stride, &p3, &p2, &p1, &p0, &q0, &q1, &q2, &q3);
{
const uint8x16_t mask = NeedsFilter2_NEON(p3, p2, p1, p0, q0, q1, q2, q3,
ithresh, thresh);
const uint8x16_t mask =
NeedsFilter2_NEON(p3, p2, p1, p0, q0, q1, q2, q3, ithresh, thresh);
const uint8x16_t hev_mask = NeedsHev_NEON(p1, p0, q0, q1, hev_thresh);
uint8x16_t op2, op1, op0, oq0, oq1, oq2;
DoFilter6_NEON(p2, p1, p0, q0, q1, q2, mask, hev_mask,
&op2, &op1, &op0, &oq0, &oq1, &oq2);
DoFilter6_NEON(p2, p1, p0, q0, q1, q2, mask, hev_mask, &op2, &op1, &op0,
&oq0, &oq1, &oq2);
Store16x2_NEON(op2, op1, p - 2 * stride, stride);
Store16x2_NEON(op0, oq0, p + 0 * stride, stride);
Store16x2_NEON(oq1, oq2, p + 2 * stride, stride);
}
}
static void HFilter16_NEON(uint8_t* p, int stride,
int thresh, int ithresh, int hev_thresh) {
static void HFilter16_NEON(uint8_t* p, int stride, int thresh, int ithresh,
int hev_thresh) {
uint8x16_t p3, p2, p1, p0, q0, q1, q2, q3;
Load8x16_NEON(p, stride, &p3, &p2, &p1, &p0, &q0, &q1, &q2, &q3);
{
const uint8x16_t mask = NeedsFilter2_NEON(p3, p2, p1, p0, q0, q1, q2, q3,
ithresh, thresh);
const uint8x16_t mask =
NeedsFilter2_NEON(p3, p2, p1, p0, q0, q1, q2, q3, ithresh, thresh);
const uint8x16_t hev_mask = NeedsHev_NEON(p1, p0, q0, q1, hev_thresh);
uint8x16_t op2, op1, op0, oq0, oq1, oq2;
DoFilter6_NEON(p2, p1, p0, q0, q1, q2, mask, hev_mask,
&op2, &op1, &op0, &oq0, &oq1, &oq2);
DoFilter6_NEON(p2, p1, p0, q0, q1, q2, mask, hev_mask, &op2, &op1, &op0,
&oq0, &oq1, &oq2);
Store2x16_NEON(op2, op1, p - 2, stride);
Store2x16_NEON(op0, oq0, p + 0, stride);
Store2x16_NEON(oq1, oq2, p + 2, stride);
@@ -869,15 +863,15 @@ static void HFilter16_NEON(uint8_t* p, int stride,
}
// on three inner edges
static void VFilter16i_NEON(uint8_t* p, int stride,
int thresh, int ithresh, int hev_thresh) {
static void VFilter16i_NEON(uint8_t* p, int stride, int thresh, int ithresh,
int hev_thresh) {
uint32_t k;
uint8x16_t p3, p2, p1, p0;
Load16x4_NEON(p + 2 * stride, stride, &p3, &p2, &p1, &p0);
Load16x4_NEON(p + 2 * stride, stride, &p3, &p2, &p1, &p0);
for (k = 3; k != 0; --k) {
uint8x16_t q0, q1, q2, q3;
p += 4 * stride;
Load16x4_NEON(p + 2 * stride, stride, &q0, &q1, &q2, &q3);
Load16x4_NEON(p + 2 * stride, stride, &q0, &q1, &q2, &q3);
{
const uint8x16_t mask =
NeedsFilter2_NEON(p3, p2, p1, p0, q0, q1, q2, q3, ithresh, thresh);
@@ -893,8 +887,8 @@ static void VFilter16i_NEON(uint8_t* p, int stride,
}
#if !defined(WORK_AROUND_GCC)
static void HFilter16i_NEON(uint8_t* p, int stride,
int thresh, int ithresh, int hev_thresh) {
static void HFilter16i_NEON(uint8_t* p, int stride, int thresh, int ithresh,
int hev_thresh) {
uint32_t k;
uint8x16_t p3, p2, p1, p0;
Load4x16_NEON(p + 2, stride, &p3, &p2, &p1, &p0);
@@ -921,27 +915,27 @@ static void VFilter8_NEON(uint8_t* WEBP_RESTRICT u, uint8_t* WEBP_RESTRICT v,
uint8x16_t p3, p2, p1, p0, q0, q1, q2, q3;
Load8x8x2_NEON(u, v, stride, &p3, &p2, &p1, &p0, &q0, &q1, &q2, &q3);
{
const uint8x16_t mask = NeedsFilter2_NEON(p3, p2, p1, p0, q0, q1, q2, q3,
ithresh, thresh);
const uint8x16_t mask =
NeedsFilter2_NEON(p3, p2, p1, p0, q0, q1, q2, q3, ithresh, thresh);
const uint8x16_t hev_mask = NeedsHev_NEON(p1, p0, q0, q1, hev_thresh);
uint8x16_t op2, op1, op0, oq0, oq1, oq2;
DoFilter6_NEON(p2, p1, p0, q0, q1, q2, mask, hev_mask,
&op2, &op1, &op0, &oq0, &oq1, &oq2);
DoFilter6_NEON(p2, p1, p0, q0, q1, q2, mask, hev_mask, &op2, &op1, &op0,
&oq0, &oq1, &oq2);
Store8x2x2_NEON(op2, op1, u - 2 * stride, v - 2 * stride, stride);
Store8x2x2_NEON(op0, oq0, u + 0 * stride, v + 0 * stride, stride);
Store8x2x2_NEON(oq1, oq2, u + 2 * stride, v + 2 * stride, stride);
}
}
static void VFilter8i_NEON(uint8_t* WEBP_RESTRICT u, uint8_t* WEBP_RESTRICT v,
int stride,
int thresh, int ithresh, int hev_thresh) {
int stride, int thresh, int ithresh,
int hev_thresh) {
uint8x16_t p3, p2, p1, p0, q0, q1, q2, q3;
u += 4 * stride;
v += 4 * stride;
Load8x8x2_NEON(u, v, stride, &p3, &p2, &p1, &p0, &q0, &q1, &q2, &q3);
{
const uint8x16_t mask = NeedsFilter2_NEON(p3, p2, p1, p0, q0, q1, q2, q3,
ithresh, thresh);
const uint8x16_t mask =
NeedsFilter2_NEON(p3, p2, p1, p0, q0, q1, q2, q3, ithresh, thresh);
const uint8x16_t hev_mask = NeedsHev_NEON(p1, p0, q0, q1, hev_thresh);
uint8x16_t op1, op0, oq0, oq1;
DoFilter4_NEON(p1, p0, q0, q1, mask, hev_mask, &op1, &op0, &oq0, &oq1);
@@ -955,26 +949,26 @@ static void HFilter8_NEON(uint8_t* WEBP_RESTRICT u, uint8_t* WEBP_RESTRICT v,
uint8x16_t p3, p2, p1, p0, q0, q1, q2, q3;
Load8x8x2T_NEON(u, v, stride, &p3, &p2, &p1, &p0, &q0, &q1, &q2, &q3);
{
const uint8x16_t mask = NeedsFilter2_NEON(p3, p2, p1, p0, q0, q1, q2, q3,
ithresh, thresh);
const uint8x16_t mask =
NeedsFilter2_NEON(p3, p2, p1, p0, q0, q1, q2, q3, ithresh, thresh);
const uint8x16_t hev_mask = NeedsHev_NEON(p1, p0, q0, q1, hev_thresh);
uint8x16_t op2, op1, op0, oq0, oq1, oq2;
DoFilter6_NEON(p2, p1, p0, q0, q1, q2, mask, hev_mask,
&op2, &op1, &op0, &oq0, &oq1, &oq2);
DoFilter6_NEON(p2, p1, p0, q0, q1, q2, mask, hev_mask, &op2, &op1, &op0,
&oq0, &oq1, &oq2);
Store6x8x2_NEON(op2, op1, op0, oq0, oq1, oq2, u, v, stride);
}
}
static void HFilter8i_NEON(uint8_t* WEBP_RESTRICT u, uint8_t* WEBP_RESTRICT v,
int stride,
int thresh, int ithresh, int hev_thresh) {
int stride, int thresh, int ithresh,
int hev_thresh) {
uint8x16_t p3, p2, p1, p0, q0, q1, q2, q3;
u += 4;
v += 4;
Load8x8x2T_NEON(u, v, stride, &p3, &p2, &p1, &p0, &q0, &q1, &q2, &q3);
{
const uint8x16_t mask = NeedsFilter2_NEON(p3, p2, p1, p0, q0, q1, q2, q3,
ithresh, thresh);
const uint8x16_t mask =
NeedsFilter2_NEON(p3, p2, p1, p0, q0, q1, q2, q3, ithresh, thresh);
const uint8x16_t hev_mask = NeedsHev_NEON(p1, p0, q0, q1, hev_thresh);
uint8x16_t op1, op0, oq0, oq1;
DoFilter4_NEON(p1, p0, q0, q1, mask, hev_mask, &op1, &op0, &oq0, &oq1);
@@ -1012,8 +1006,8 @@ static WEBP_INLINE void Transpose8x2_NEON(const int16x8_t in0,
int16x8x2_t* const out) {
// a0 a1 a2 a3 | b0 b1 b2 b3 => a0 b0 c0 d0 | a1 b1 c1 d1
// c0 c1 c2 c3 | d0 d1 d2 d3 a2 b2 c2 d2 | a3 b3 c3 d3
const int16x8x2_t tmp0 = vzipq_s16(in0, in1); // a0 c0 a1 c1 a2 c2 ...
// b0 d0 b1 d1 b2 d2 ...
const int16x8x2_t tmp0 = vzipq_s16(in0, in1); // a0 c0 a1 c1 a2 c2 ...
// b0 d0 b1 d1 b2 d2 ...
*out = vzipq_s16(tmp0.val[0], tmp0.val[1]);
}
@@ -1028,17 +1022,17 @@ static WEBP_INLINE void TransformPass_NEON(int16x8x2_t* const rows) {
const int16x8_t C0 = vsraq_n_s16(B1, vqdmulhq_n_s16(B1, kC1), 1);
const int16x8_t C1 = vqdmulhq_n_s16(B1, kC2);
const int16x4_t a = vqadd_s16(vget_low_s16(rows->val[0]),
vget_low_s16(rows->val[1])); // in0 + in8
vget_low_s16(rows->val[1])); // in0 + in8
const int16x4_t b = vqsub_s16(vget_low_s16(rows->val[0]),
vget_low_s16(rows->val[1])); // in0 - in8
vget_low_s16(rows->val[1])); // in0 - in8
// c = kC2 * in4 - kC1 * in12
// d = kC1 * in4 + kC2 * in12
const int16x4_t c = vqsub_s16(vget_low_s16(C1), vget_high_s16(C0));
const int16x4_t d = vqadd_s16(vget_low_s16(C0), vget_high_s16(C1));
const int16x8_t D0 = vcombine_s16(a, b); // D0 = a | b
const int16x8_t D1 = vcombine_s16(d, c); // D1 = d | c
const int16x8_t E0 = vqaddq_s16(D0, D1); // a+d | b+c
const int16x8_t E_tmp = vqsubq_s16(D0, D1); // a-d | b-c
const int16x8_t D0 = vcombine_s16(a, b); // D0 = a | b
const int16x8_t D1 = vcombine_s16(d, c); // D1 = d | c
const int16x8_t E0 = vqaddq_s16(D0, D1); // a+d | b+c
const int16x8_t E_tmp = vqsubq_s16(D0, D1); // a-d | b-c
const int16x8_t E1 = vcombine_s16(vget_high_s16(E_tmp), vget_low_s16(E_tmp));
Transpose8x2_NEON(E0, E1, rows);
}
@@ -1058,135 +1052,135 @@ static void TransformOne_NEON(const int16_t* WEBP_RESTRICT in,
uint8_t* WEBP_RESTRICT dst) {
const int kBPS = BPS;
// kC1, kC2. Padded because vld1.16 loads 8 bytes
const int16_t constants[4] = { kC1, kC2, 0, 0 };
const int16_t constants[4] = {kC1, kC2, 0, 0};
/* Adapted from libvpx: vp8/common/arm/neon/shortidct4x4llm_neon.asm */
__asm__ volatile (
"vld1.16 {q1, q2}, [%[in]] \n"
"vld1.16 {d0}, [%[constants]] \n"
__asm__ volatile(
"vld1.16 {q1, q2}, [%[in]] \n"
"vld1.16 {d0}, [%[constants]] \n"
/* d2: in[0]
* d3: in[8]
* d4: in[4]
* d5: in[12]
*/
"vswp d3, d4 \n"
/* d2: in[0]
* d3: in[8]
* d4: in[4]
* d5: in[12]
*/
"vswp d3, d4 \n"
/* q8 = {in[4], in[12]} * kC1 * 2 >> 16
* q9 = {in[4], in[12]} * kC2 >> 16
*/
"vqdmulh.s16 q8, q2, d0[0] \n"
"vqdmulh.s16 q9, q2, d0[1] \n"
/* q8 = {in[4], in[12]} * kC1 * 2 >> 16
* q9 = {in[4], in[12]} * kC2 >> 16
*/
"vqdmulh.s16 q8, q2, d0[0] \n"
"vqdmulh.s16 q9, q2, d0[1] \n"
/* d22 = a = in[0] + in[8]
* d23 = b = in[0] - in[8]
*/
"vqadd.s16 d22, d2, d3 \n"
"vqsub.s16 d23, d2, d3 \n"
/* d22 = a = in[0] + in[8]
* d23 = b = in[0] - in[8]
*/
"vqadd.s16 d22, d2, d3 \n"
"vqsub.s16 d23, d2, d3 \n"
/* The multiplication should be x * kC1 >> 16
* However, with vqdmulh we get x * kC1 * 2 >> 16
* (multiply, double, return high half)
* We avoided this in kC2 by pre-shifting the constant.
* q8 = in[4]/[12] * kC1 >> 16
*/
"vshr.s16 q8, q8, #1 \n"
/* The multiplication should be x * kC1 >> 16
* However, with vqdmulh we get x * kC1 * 2 >> 16
* (multiply, double, return high half)
* We avoided this in kC2 by pre-shifting the constant.
* q8 = in[4]/[12] * kC1 >> 16
*/
"vshr.s16 q8, q8, #1 \n"
/* Add {in[4], in[12]} back after the multiplication. This is handled by
* adding 1 << 16 to kC1 in the libwebp C code.
*/
"vqadd.s16 q8, q2, q8 \n"
/* Add {in[4], in[12]} back after the multiplication. This is handled by
* adding 1 << 16 to kC1 in the libwebp C code.
*/
"vqadd.s16 q8, q2, q8 \n"
/* d20 = c = in[4]*kC2 - in[12]*kC1
* d21 = d = in[4]*kC1 + in[12]*kC2
*/
"vqsub.s16 d20, d18, d17 \n"
"vqadd.s16 d21, d19, d16 \n"
/* d20 = c = in[4]*kC2 - in[12]*kC1
* d21 = d = in[4]*kC1 + in[12]*kC2
*/
"vqsub.s16 d20, d18, d17 \n"
"vqadd.s16 d21, d19, d16 \n"
/* d2 = tmp[0] = a + d
* d3 = tmp[1] = b + c
* d4 = tmp[2] = b - c
* d5 = tmp[3] = a - d
*/
"vqadd.s16 d2, d22, d21 \n"
"vqadd.s16 d3, d23, d20 \n"
"vqsub.s16 d4, d23, d20 \n"
"vqsub.s16 d5, d22, d21 \n"
/* d2 = tmp[0] = a + d
* d3 = tmp[1] = b + c
* d4 = tmp[2] = b - c
* d5 = tmp[3] = a - d
*/
"vqadd.s16 d2, d22, d21 \n"
"vqadd.s16 d3, d23, d20 \n"
"vqsub.s16 d4, d23, d20 \n"
"vqsub.s16 d5, d22, d21 \n"
"vzip.16 q1, q2 \n"
"vzip.16 q1, q2 \n"
"vzip.16 q1, q2 \n"
"vzip.16 q1, q2 \n"
"vswp d3, d4 \n"
"vswp d3, d4 \n"
/* q8 = {tmp[4], tmp[12]} * kC1 * 2 >> 16
* q9 = {tmp[4], tmp[12]} * kC2 >> 16
*/
"vqdmulh.s16 q8, q2, d0[0] \n"
"vqdmulh.s16 q9, q2, d0[1] \n"
/* q8 = {tmp[4], tmp[12]} * kC1 * 2 >> 16
* q9 = {tmp[4], tmp[12]} * kC2 >> 16
*/
"vqdmulh.s16 q8, q2, d0[0] \n"
"vqdmulh.s16 q9, q2, d0[1] \n"
/* d22 = a = tmp[0] + tmp[8]
* d23 = b = tmp[0] - tmp[8]
*/
"vqadd.s16 d22, d2, d3 \n"
"vqsub.s16 d23, d2, d3 \n"
/* d22 = a = tmp[0] + tmp[8]
* d23 = b = tmp[0] - tmp[8]
*/
"vqadd.s16 d22, d2, d3 \n"
"vqsub.s16 d23, d2, d3 \n"
/* See long winded explanations prior */
"vshr.s16 q8, q8, #1 \n"
"vqadd.s16 q8, q2, q8 \n"
/* See long winded explanations prior */
"vshr.s16 q8, q8, #1 \n"
"vqadd.s16 q8, q2, q8 \n"
/* d20 = c = in[4]*kC2 - in[12]*kC1
* d21 = d = in[4]*kC1 + in[12]*kC2
*/
"vqsub.s16 d20, d18, d17 \n"
"vqadd.s16 d21, d19, d16 \n"
/* d20 = c = in[4]*kC2 - in[12]*kC1
* d21 = d = in[4]*kC1 + in[12]*kC2
*/
"vqsub.s16 d20, d18, d17 \n"
"vqadd.s16 d21, d19, d16 \n"
/* d2 = tmp[0] = a + d
* d3 = tmp[1] = b + c
* d4 = tmp[2] = b - c
* d5 = tmp[3] = a - d
*/
"vqadd.s16 d2, d22, d21 \n"
"vqadd.s16 d3, d23, d20 \n"
"vqsub.s16 d4, d23, d20 \n"
"vqsub.s16 d5, d22, d21 \n"
/* d2 = tmp[0] = a + d
* d3 = tmp[1] = b + c
* d4 = tmp[2] = b - c
* d5 = tmp[3] = a - d
*/
"vqadd.s16 d2, d22, d21 \n"
"vqadd.s16 d3, d23, d20 \n"
"vqsub.s16 d4, d23, d20 \n"
"vqsub.s16 d5, d22, d21 \n"
"vld1.32 d6[0], [%[dst]], %[kBPS] \n"
"vld1.32 d6[1], [%[dst]], %[kBPS] \n"
"vld1.32 d7[0], [%[dst]], %[kBPS] \n"
"vld1.32 d7[1], [%[dst]], %[kBPS] \n"
"vld1.32 d6[0], [%[dst]], %[kBPS] \n"
"vld1.32 d6[1], [%[dst]], %[kBPS] \n"
"vld1.32 d7[0], [%[dst]], %[kBPS] \n"
"vld1.32 d7[1], [%[dst]], %[kBPS] \n"
"sub %[dst], %[dst], %[kBPS], lsl #2 \n"
"sub %[dst], %[dst], %[kBPS], lsl #2 \n"
/* (val) + 4 >> 3 */
"vrshr.s16 d2, d2, #3 \n"
"vrshr.s16 d3, d3, #3 \n"
"vrshr.s16 d4, d4, #3 \n"
"vrshr.s16 d5, d5, #3 \n"
/* (val) + 4 >> 3 */
"vrshr.s16 d2, d2, #3 \n"
"vrshr.s16 d3, d3, #3 \n"
"vrshr.s16 d4, d4, #3 \n"
"vrshr.s16 d5, d5, #3 \n"
"vzip.16 q1, q2 \n"
"vzip.16 q1, q2 \n"
"vzip.16 q1, q2 \n"
"vzip.16 q1, q2 \n"
/* Must accumulate before saturating */
"vmovl.u8 q8, d6 \n"
"vmovl.u8 q9, d7 \n"
/* Must accumulate before saturating */
"vmovl.u8 q8, d6 \n"
"vmovl.u8 q9, d7 \n"
"vqadd.s16 q1, q1, q8 \n"
"vqadd.s16 q2, q2, q9 \n"
"vqadd.s16 q1, q1, q8 \n"
"vqadd.s16 q2, q2, q9 \n"
"vqmovun.s16 d0, q1 \n"
"vqmovun.s16 d1, q2 \n"
"vqmovun.s16 d0, q1 \n"
"vqmovun.s16 d1, q2 \n"
"vst1.32 d0[0], [%[dst]], %[kBPS] \n"
"vst1.32 d0[1], [%[dst]], %[kBPS] \n"
"vst1.32 d1[0], [%[dst]], %[kBPS] \n"
"vst1.32 d1[1], [%[dst]] \n"
"vst1.32 d0[0], [%[dst]], %[kBPS] \n"
"vst1.32 d0[1], [%[dst]], %[kBPS] \n"
"vst1.32 d1[0], [%[dst]], %[kBPS] \n"
"vst1.32 d1[1], [%[dst]] \n"
: [in] "+r"(in), [dst] "+r"(dst) /* modified registers */
: [kBPS] "r"(kBPS), [constants] "r"(constants) /* constants */
: "memory", "q0", "q1", "q2", "q8", "q9", "q10", "q11" /* clobbered */
: [in] "+r"(in), [dst] "+r"(dst) /* modified registers */
: [kBPS] "r"(kBPS), [constants] "r"(constants) /* constants */
: "memory", "q0", "q1", "q2", "q8", "q9", "q10", "q11" /* clobbered */
);
}
#endif // WEBP_USE_INTRINSICS
#endif // WEBP_USE_INTRINSICS
static void TransformTwo_NEON(const int16_t* WEBP_RESTRICT in,
uint8_t* WEBP_RESTRICT dst, int do_two) {
@@ -1204,12 +1198,17 @@ static void TransformDC_NEON(const int16_t* WEBP_RESTRICT in,
//------------------------------------------------------------------------------
#define STORE_WHT(dst, col, rows) do { \
*dst = vgetq_lane_s32(rows.val[0], col); (dst) += 16; \
*dst = vgetq_lane_s32(rows.val[1], col); (dst) += 16; \
*dst = vgetq_lane_s32(rows.val[2], col); (dst) += 16; \
*dst = vgetq_lane_s32(rows.val[3], col); (dst) += 16; \
} while (0)
#define STORE_WHT(dst, col, rows) \
do { \
*dst = vgetq_lane_s32(rows.val[0], col); \
(dst) += 16; \
*dst = vgetq_lane_s32(rows.val[1], col); \
(dst) += 16; \
*dst = vgetq_lane_s32(rows.val[2], col); \
(dst) += 16; \
*dst = vgetq_lane_s32(rows.val[3], col); \
(dst) += 16; \
} while (0)
static void TransformWHT_NEON(const int16_t* WEBP_RESTRICT in,
int16_t* WEBP_RESTRICT out) {
@@ -1270,10 +1269,9 @@ static void TransformAC3_NEON(const int16_t* WEBP_RESTRICT in,
const int16x4_t d4 = vdup_n_s16(WEBP_TRANSFORM_AC3_MUL1(in[4]));
const int c1 = WEBP_TRANSFORM_AC3_MUL2(in[1]);
const int d1 = WEBP_TRANSFORM_AC3_MUL1(in[1]);
const uint64_t cd = (uint64_t)( d1 & 0xffff) << 0 |
(uint64_t)( c1 & 0xffff) << 16 |
(uint64_t)(-c1 & 0xffff) << 32 |
(uint64_t)(-d1 & 0xffff) << 48;
const uint64_t cd =
(uint64_t)(d1 & 0xffff) << 0 | (uint64_t)(c1 & 0xffff) << 16 |
(uint64_t)(-c1 & 0xffff) << 32 | (uint64_t)(-d1 & 0xffff) << 48;
const int16x4_t CD = vcreate_s16(cd);
const int16x4_t B = vqadd_s16(A, CD);
const int16x8_t m0_m1 = vcombine_s16(vqadd_s16(B, d4), vqadd_s16(B, c4));
@@ -1284,9 +1282,9 @@ static void TransformAC3_NEON(const int16_t* WEBP_RESTRICT in,
//------------------------------------------------------------------------------
// 4x4
static void DC4_NEON(uint8_t* dst) { // DC
static void DC4_NEON(uint8_t* dst) { // DC
const uint8x8_t A = vld1_u8(dst - BPS); // top row
const uint16x4_t p0 = vpaddl_u8(A); // cascading summation of the top
const uint16x4_t p0 = vpaddl_u8(A); // cascading summation of the top
const uint16x4_t p1 = vpadd_u16(p0, p0);
const uint8x8_t L0 = vld1_u8(dst + 0 * BPS - 1);
const uint8x8_t L1 = vld1_u8(dst + 1 * BPS - 1);
@@ -1307,8 +1305,8 @@ static void DC4_NEON(uint8_t* dst) { // DC
// TrueMotion (4x4 + 8x8)
static WEBP_INLINE void TrueMotion_NEON(uint8_t* dst, int size) {
const uint8x8_t TL = vld1_dup_u8(dst - BPS - 1); // top-left pixel 'A[-1]'
const uint8x8_t T = vld1_u8(dst - BPS); // top row 'A[0..3]'
const uint16x8_t d = vsubl_u8(T, TL); // A[c] - A[-1]
const uint8x8_t T = vld1_u8(dst - BPS); // top row 'A[0..3]'
const uint16x8_t d = vsubl_u8(T, TL); // A[c] - A[-1]
int y;
for (y = 0; y < size; y += 4) {
// left edge
@@ -1343,7 +1341,7 @@ static WEBP_INLINE void TrueMotion_NEON(uint8_t* dst, int size) {
static void TM4_NEON(uint8_t* dst) { TrueMotion_NEON(dst, 4); }
static void VE4_NEON(uint8_t* dst) { // vertical
static void VE4_NEON(uint8_t* dst) { // vertical
// NB: avoid vld1_u64 here as an alignment hint may be added -> SIGBUS.
const uint64x1_t A0 = vreinterpret_u64_u8(vld1_u8(dst - BPS - 1)); // top row
const uint64x1_t A1 = vshr_n_u64(A0, 8);
@@ -1359,7 +1357,7 @@ static void VE4_NEON(uint8_t* dst) { // vertical
}
}
static void RD4_NEON(uint8_t* dst) { // Down-right
static void RD4_NEON(uint8_t* dst) { // Down-right
const uint8x8_t XABCD_u8 = vld1_u8(dst - BPS - 1);
const uint64x1_t XABCD = vreinterpret_u64_u8(XABCD_u8);
const uint64x1_t ____XABC = vshl_n_u64(XABCD, 32);
@@ -1388,7 +1386,7 @@ static void RD4_NEON(uint8_t* dst) { // Down-right
vst1_lane_u32((uint32_t*)(dst + 3 * BPS), r3, 0);
}
static void LD4_NEON(uint8_t* dst) { // Down-left
static void LD4_NEON(uint8_t* dst) { // Down-left
// Note using the same shift trick as VE4() is slower here.
const uint8x8_t ABCDEFGH = vld1_u8(dst - BPS + 0);
const uint8x8_t BCDEFGH0 = vld1_u8(dst - BPS + 1);
@@ -1410,7 +1408,7 @@ static void LD4_NEON(uint8_t* dst) { // Down-left
//------------------------------------------------------------------------------
// Chroma
static void VE8uv_NEON(uint8_t* dst) { // vertical
static void VE8uv_NEON(uint8_t* dst) { // vertical
const uint8x8_t top = vld1_u8(dst - BPS);
int j;
for (j = 0; j < 8; ++j) {
@@ -1418,7 +1416,7 @@ static void VE8uv_NEON(uint8_t* dst) { // vertical
}
}
static void HE8uv_NEON(uint8_t* dst) { // horizontal
static void HE8uv_NEON(uint8_t* dst) { // horizontal
int j;
for (j = 0; j < 8; ++j) {
const uint8x8_t left = vld1_dup_u8(dst - 1);
@@ -1493,7 +1491,7 @@ static void TM8uv_NEON(uint8_t* dst) { TrueMotion_NEON(dst, 8); }
//------------------------------------------------------------------------------
// 16x16
static void VE16_NEON(uint8_t* dst) { // vertical
static void VE16_NEON(uint8_t* dst) { // vertical
const uint8x16_t top = vld1q_u8(dst - BPS);
int j;
for (j = 0; j < 16; ++j) {
@@ -1501,7 +1499,7 @@ static void VE16_NEON(uint8_t* dst) { // vertical
}
}
static void HE16_NEON(uint8_t* dst) { // horizontal
static void HE16_NEON(uint8_t* dst) { // horizontal
int j;
for (j = 0; j < 16; ++j) {
const uint8x16_t left = vld1q_dup_u8(dst - 1);
@@ -1579,7 +1577,7 @@ static void DC16NoTopLeft_NEON(uint8_t* dst) { DC16_NEON(dst, 0, 0); }
static void TM16_NEON(uint8_t* dst) {
const uint8x8_t TL = vld1_dup_u8(dst - BPS - 1); // top-left pixel 'A[-1]'
const uint8x16_t T = vld1q_u8(dst - BPS); // top row 'A[0..15]'
const uint8x16_t T = vld1q_u8(dst - BPS); // top row 'A[0..15]'
// A[c] - A[-1]
const uint16x8_t d_lo = vsubl_u8(vget_low_u8(T), TL);
const uint16x8_t d_hi = vsubl_u8(vget_high_u8(T), TL);

View File

@@ -19,7 +19,7 @@
// The 3-coeff sparse transform in SSE2 is not really faster than the plain-C
// one it seems => disable it by default. Uncomment the following to enable:
#if !defined(USE_TRANSFORM_AC3)
#define USE_TRANSFORM_AC3 0 // ALTERNATE_CODE
#define USE_TRANSFORM_AC3 0 // ALTERNATE_CODE
#endif
#include <emmintrin.h>
@@ -119,8 +119,8 @@ static void Transform_SSE2(const int16_t* WEBP_RESTRICT in,
// multiplications.
const __m128i four = _mm_set1_epi16(4);
const __m128i dc = _mm_add_epi16(T0, four);
const __m128i a = _mm_add_epi16(dc, T2);
const __m128i b = _mm_sub_epi16(dc, T2);
const __m128i a = _mm_add_epi16(dc, T2);
const __m128i b = _mm_sub_epi16(dc, T2);
// c = MUL(T1, K2) - MUL(T3, K1) = MUL(T1, k2) - MUL(T3, k1) + T1 - T3
const __m128i c1 = _mm_mulhi_epi16(T1, k2);
const __m128i c2 = _mm_mulhi_epi16(T3, k1);
@@ -242,15 +242,14 @@ static void TransformAC3_SSE2(const int16_t* WEBP_RESTRICT in,
WebPInt32ToMem(dst + 3 * BPS, _mm_cvtsi128_si32(dst3));
}
#endif // USE_TRANSFORM_AC3
#endif // USE_TRANSFORM_AC3
//------------------------------------------------------------------------------
// Loop Filter (Paragraph 15)
// Compute abs(p - q) = subs(p - q) OR subs(q - p)
#define MM_ABS(p, q) _mm_or_si128( \
_mm_subs_epu8((q), (p)), \
_mm_subs_epu8((p), (q)))
#define MM_ABS(p, q) \
_mm_or_si128(_mm_subs_epu8((q), (p)), _mm_subs_epu8((p), (q)))
// Shift each byte of "x" by 3 bits while preserving by the sign bit.
static WEBP_INLINE void SignedShift8b_SSE2(__m128i* const x) {
@@ -262,22 +261,24 @@ static WEBP_INLINE void SignedShift8b_SSE2(__m128i* const x) {
*x = _mm_packs_epi16(lo_1, hi_1);
}
#define FLIP_SIGN_BIT2(a, b) do { \
(a) = _mm_xor_si128(a, sign_bit); \
(b) = _mm_xor_si128(b, sign_bit); \
} while (0)
#define FLIP_SIGN_BIT2(a, b) \
do { \
(a) = _mm_xor_si128(a, sign_bit); \
(b) = _mm_xor_si128(b, sign_bit); \
} while (0)
#define FLIP_SIGN_BIT4(a, b, c, d) do { \
FLIP_SIGN_BIT2(a, b); \
FLIP_SIGN_BIT2(c, d); \
} while (0)
#define FLIP_SIGN_BIT4(a, b, c, d) \
do { \
FLIP_SIGN_BIT2(a, b); \
FLIP_SIGN_BIT2(c, d); \
} while (0)
// input/output is uint8_t
static WEBP_INLINE void GetNotHEV_SSE2(const __m128i* const p1,
const __m128i* const p0,
const __m128i* const q0,
const __m128i* const q1,
int hev_thresh, __m128i* const not_hev) {
const __m128i* const q1, int hev_thresh,
__m128i* const not_hev) {
const __m128i zero = _mm_setzero_si128();
const __m128i t_1 = MM_ABS(*p1, *p0);
const __m128i t_2 = MM_ABS(*q1, *q0);
@@ -313,10 +314,10 @@ static WEBP_INLINE void DoSimpleFilter_SSE2(__m128i* const p0,
__m128i v3 = _mm_adds_epi8(*fl, k3);
__m128i v4 = _mm_adds_epi8(*fl, k4);
SignedShift8b_SSE2(&v4); // v4 >> 3
SignedShift8b_SSE2(&v3); // v3 >> 3
*q0 = _mm_subs_epi8(*q0, v4); // q0 -= v4
*p0 = _mm_adds_epi8(*p0, v3); // p0 += v3
SignedShift8b_SSE2(&v4); // v4 >> 3
SignedShift8b_SSE2(&v3); // v3 >> 3
*q0 = _mm_subs_epi8(*q0, v4); // q0 -= v4
*p0 = _mm_adds_epi8(*p0, v3); // p0 += v3
}
// Updates values of 2 pixels at MB edge during complex filtering.
@@ -339,17 +340,17 @@ static WEBP_INLINE void Update2Pixels_SSE2(__m128i* const pi, __m128i* const qi,
static WEBP_INLINE void NeedsFilter_SSE2(const __m128i* const p1,
const __m128i* const p0,
const __m128i* const q0,
const __m128i* const q1,
int thresh, __m128i* const mask) {
const __m128i* const q1, int thresh,
__m128i* const mask) {
const __m128i m_thresh = _mm_set1_epi8((char)thresh);
const __m128i t1 = MM_ABS(*p1, *q1); // abs(p1 - q1)
const __m128i t1 = MM_ABS(*p1, *q1); // abs(p1 - q1)
const __m128i kFE = _mm_set1_epi8((char)0xFE);
const __m128i t2 = _mm_and_si128(t1, kFE); // set lsb of each byte to zero
const __m128i t3 = _mm_srli_epi16(t2, 1); // abs(p1 - q1) / 2
const __m128i t4 = MM_ABS(*p0, *q0); // abs(p0 - q0)
const __m128i t5 = _mm_adds_epu8(t4, t4); // abs(p0 - q0) * 2
const __m128i t6 = _mm_adds_epu8(t5, t3); // abs(p0-q0)*2 + abs(p1-q1)/2
const __m128i t4 = MM_ABS(*p0, *q0); // abs(p0 - q0)
const __m128i t5 = _mm_adds_epu8(t4, t4); // abs(p0 - q0) * 2
const __m128i t6 = _mm_adds_epu8(t5, t3); // abs(p0-q0)*2 + abs(p1-q1)/2
const __m128i t7 = _mm_subs_epu8(t6, m_thresh); // mask <= m_thresh
*mask = _mm_cmpeq_epi8(t7, _mm_setzero_si128());
@@ -372,7 +373,7 @@ static WEBP_INLINE void DoFilter2_SSE2(__m128i* const p1, __m128i* const p0,
FLIP_SIGN_BIT2(*p0, *q0);
GetBaseDelta_SSE2(&p1s, p0, q0, &q1s, &a);
a = _mm_and_si128(a, mask); // mask filter values we don't care about
a = _mm_and_si128(a, mask); // mask filter values we don't care about
DoSimpleFilter_SSE2(p0, q0, &a);
FLIP_SIGN_BIT2(*p0, *q0);
}
@@ -404,12 +405,12 @@ static WEBP_INLINE void DoFilter4_SSE2(__m128i* const p1, __m128i* const p0,
t1 = _mm_adds_epi8(t1, t2); // hev(p1 - q1) + 3 * (q0 - p0)
t1 = _mm_and_si128(t1, *mask); // mask filter values we don't care about
t2 = _mm_adds_epi8(t1, k3); // 3 * (q0 - p0) + hev(p1 - q1) + 3
t3 = _mm_adds_epi8(t1, k4); // 3 * (q0 - p0) + hev(p1 - q1) + 4
SignedShift8b_SSE2(&t2); // (3 * (q0 - p0) + hev(p1 - q1) + 3) >> 3
SignedShift8b_SSE2(&t3); // (3 * (q0 - p0) + hev(p1 - q1) + 4) >> 3
*p0 = _mm_adds_epi8(*p0, t2); // p0 += t2
*q0 = _mm_subs_epi8(*q0, t3); // q0 -= t3
t2 = _mm_adds_epi8(t1, k3); // 3 * (q0 - p0) + hev(p1 - q1) + 3
t3 = _mm_adds_epi8(t1, k4); // 3 * (q0 - p0) + hev(p1 - q1) + 4
SignedShift8b_SSE2(&t2); // (3 * (q0 - p0) + hev(p1 - q1) + 3) >> 3
SignedShift8b_SSE2(&t3); // (3 * (q0 - p0) + hev(p1 - q1) + 4) >> 3
*p0 = _mm_adds_epi8(*p0, t2); // p0 += t2
*q0 = _mm_subs_epi8(*q0, t3); // q0 -= t3
FLIP_SIGN_BIT2(*p0, *q0);
// this is equivalent to signed (a + 1) >> 1 calculation
@@ -417,9 +418,9 @@ static WEBP_INLINE void DoFilter4_SSE2(__m128i* const p1, __m128i* const p0,
t3 = _mm_avg_epu8(t2, zero);
t3 = _mm_sub_epi8(t3, k64);
t3 = _mm_and_si128(not_hev, t3); // if !hev
*q1 = _mm_subs_epi8(*q1, t3); // q1 -= t3
*p1 = _mm_adds_epi8(*p1, t3); // p1 += t3
t3 = _mm_and_si128(not_hev, t3); // if !hev
*q1 = _mm_subs_epi8(*q1, t3); // q1 -= t3
*p1 = _mm_adds_epi8(*p1, t3); // p1 += t3
FLIP_SIGN_BIT2(*p1, *q1);
}
@@ -440,13 +441,13 @@ static WEBP_INLINE void DoFilter6_SSE2(__m128i* const p2, __m128i* const p1,
FLIP_SIGN_BIT2(*p2, *q2);
GetBaseDelta_SSE2(p1, p0, q0, q1, &a);
{ // do simple filter on pixels with hev
{ // do simple filter on pixels with hev
const __m128i m = _mm_andnot_si128(not_hev, *mask);
const __m128i f = _mm_and_si128(a, m);
DoSimpleFilter_SSE2(p0, q0, &f);
}
{ // do strong filter on pixels with not hev
{ // do strong filter on pixels with not hev
const __m128i k9 = _mm_set1_epi16(0x0900);
const __m128i k63 = _mm_set1_epi16(63);
@@ -456,11 +457,11 @@ static WEBP_INLINE void DoFilter6_SSE2(__m128i* const p2, __m128i* const p1,
const __m128i f_lo = _mm_unpacklo_epi8(zero, f);
const __m128i f_hi = _mm_unpackhi_epi8(zero, f);
const __m128i f9_lo = _mm_mulhi_epi16(f_lo, k9); // Filter (lo) * 9
const __m128i f9_hi = _mm_mulhi_epi16(f_hi, k9); // Filter (hi) * 9
const __m128i f9_lo = _mm_mulhi_epi16(f_lo, k9); // Filter (lo) * 9
const __m128i f9_hi = _mm_mulhi_epi16(f_hi, k9); // Filter (hi) * 9
const __m128i a2_lo = _mm_add_epi16(f9_lo, k63); // Filter * 9 + 63
const __m128i a2_hi = _mm_add_epi16(f9_hi, k63); // Filter * 9 + 63
const __m128i a2_lo = _mm_add_epi16(f9_lo, k63); // Filter * 9 + 63
const __m128i a2_hi = _mm_add_epi16(f9_hi, k63); // Filter * 9 + 63
const __m128i a1_lo = _mm_add_epi16(a2_lo, f9_lo); // Filter * 18 + 63
const __m128i a1_hi = _mm_add_epi16(a2_hi, f9_hi); // Filter * 18 + 63
@@ -503,8 +504,7 @@ static WEBP_INLINE void Load8x4_SSE2(const uint8_t* const b, int stride,
}
static WEBP_INLINE void Load16x4_SSE2(const uint8_t* const r0,
const uint8_t* const r8,
int stride,
const uint8_t* const r8, int stride,
__m128i* const p1, __m128i* const p0,
__m128i* const q0, __m128i* const q1) {
// Assume the pixels around the edge (|) are numbered as follows
@@ -539,8 +539,8 @@ static WEBP_INLINE void Load16x4_SSE2(const uint8_t* const r0,
}
}
static WEBP_INLINE void Store4x4_SSE2(__m128i* const x,
uint8_t* dst, int stride) {
static WEBP_INLINE void Store4x4_SSE2(__m128i* const x, uint8_t* dst,
int stride) {
int i;
for (i = 0; i < 4; ++i, dst += stride) {
WebPInt32ToMem(dst, _mm_cvtsi128_si32(*x));
@@ -552,9 +552,8 @@ static WEBP_INLINE void Store4x4_SSE2(__m128i* const x,
static WEBP_INLINE void Store16x4_SSE2(const __m128i* const p1,
const __m128i* const p0,
const __m128i* const q0,
const __m128i* const q1,
uint8_t* r0, uint8_t* r8,
int stride) {
const __m128i* const q1, uint8_t* r0,
uint8_t* r8, int stride) {
__m128i t1, p1_s, p0_s, q0_s, q1_s;
// p0 = 71 70 61 60 51 50 41 40 31 30 21 20 11 10 01 00
@@ -636,50 +635,55 @@ static void SimpleHFilter16i_SSE2(uint8_t* p, int stride, int thresh) {
//------------------------------------------------------------------------------
// Complex In-loop filtering (Paragraph 15.3)
#define MAX_DIFF1(p3, p2, p1, p0, m) do { \
(m) = MM_ABS(p1, p0); \
(m) = _mm_max_epu8(m, MM_ABS(p3, p2)); \
(m) = _mm_max_epu8(m, MM_ABS(p2, p1)); \
} while (0)
#define MAX_DIFF1(p3, p2, p1, p0, m) \
do { \
(m) = MM_ABS(p1, p0); \
(m) = _mm_max_epu8(m, MM_ABS(p3, p2)); \
(m) = _mm_max_epu8(m, MM_ABS(p2, p1)); \
} while (0)
#define MAX_DIFF2(p3, p2, p1, p0, m) do { \
(m) = _mm_max_epu8(m, MM_ABS(p1, p0)); \
(m) = _mm_max_epu8(m, MM_ABS(p3, p2)); \
(m) = _mm_max_epu8(m, MM_ABS(p2, p1)); \
} while (0)
#define MAX_DIFF2(p3, p2, p1, p0, m) \
do { \
(m) = _mm_max_epu8(m, MM_ABS(p1, p0)); \
(m) = _mm_max_epu8(m, MM_ABS(p3, p2)); \
(m) = _mm_max_epu8(m, MM_ABS(p2, p1)); \
} while (0)
#define LOAD_H_EDGES4(p, stride, e1, e2, e3, e4) do { \
(e1) = _mm_loadu_si128((__m128i*)&(p)[0 * (stride)]); \
(e2) = _mm_loadu_si128((__m128i*)&(p)[1 * (stride)]); \
(e3) = _mm_loadu_si128((__m128i*)&(p)[2 * (stride)]); \
(e4) = _mm_loadu_si128((__m128i*)&(p)[3 * (stride)]); \
} while (0)
#define LOAD_H_EDGES4(p, stride, e1, e2, e3, e4) \
do { \
(e1) = _mm_loadu_si128((__m128i*)&(p)[0 * (stride)]); \
(e2) = _mm_loadu_si128((__m128i*)&(p)[1 * (stride)]); \
(e3) = _mm_loadu_si128((__m128i*)&(p)[2 * (stride)]); \
(e4) = _mm_loadu_si128((__m128i*)&(p)[3 * (stride)]); \
} while (0)
#define LOADUV_H_EDGE(p, u, v, stride) do { \
const __m128i U = _mm_loadl_epi64((__m128i*)&(u)[(stride)]); \
const __m128i V = _mm_loadl_epi64((__m128i*)&(v)[(stride)]); \
(p) = _mm_unpacklo_epi64(U, V); \
} while (0)
#define LOADUV_H_EDGE(p, u, v, stride) \
do { \
const __m128i U = _mm_loadl_epi64((__m128i*)&(u)[(stride)]); \
const __m128i V = _mm_loadl_epi64((__m128i*)&(v)[(stride)]); \
(p) = _mm_unpacklo_epi64(U, V); \
} while (0)
#define LOADUV_H_EDGES4(u, v, stride, e1, e2, e3, e4) do { \
LOADUV_H_EDGE(e1, u, v, 0 * (stride)); \
LOADUV_H_EDGE(e2, u, v, 1 * (stride)); \
LOADUV_H_EDGE(e3, u, v, 2 * (stride)); \
LOADUV_H_EDGE(e4, u, v, 3 * (stride)); \
} while (0)
#define LOADUV_H_EDGES4(u, v, stride, e1, e2, e3, e4) \
do { \
LOADUV_H_EDGE(e1, u, v, 0 * (stride)); \
LOADUV_H_EDGE(e2, u, v, 1 * (stride)); \
LOADUV_H_EDGE(e3, u, v, 2 * (stride)); \
LOADUV_H_EDGE(e4, u, v, 3 * (stride)); \
} while (0)
#define STOREUV(p, u, v, stride) do { \
_mm_storel_epi64((__m128i*)&(u)[(stride)], p); \
(p) = _mm_srli_si128(p, 8); \
_mm_storel_epi64((__m128i*)&(v)[(stride)], p); \
} while (0)
#define STOREUV(p, u, v, stride) \
do { \
_mm_storel_epi64((__m128i*)&(u)[(stride)], p); \
(p) = _mm_srli_si128(p, 8); \
_mm_storel_epi64((__m128i*)&(v)[(stride)], p); \
} while (0)
static WEBP_INLINE void ComplexMask_SSE2(const __m128i* const p1,
const __m128i* const p0,
const __m128i* const q0,
const __m128i* const q1,
int thresh, int ithresh,
__m128i* const mask) {
const __m128i* const q1, int thresh,
int ithresh, __m128i* const mask) {
const __m128i it = _mm_set1_epi8(ithresh);
const __m128i diff = _mm_subs_epu8(*mask, it);
const __m128i thresh_mask = _mm_cmpeq_epi8(diff, _mm_setzero_si128());
@@ -689,8 +693,8 @@ static WEBP_INLINE void ComplexMask_SSE2(const __m128i* const p1,
}
// on macroblock edges
static void VFilter16_SSE2(uint8_t* p, int stride,
int thresh, int ithresh, int hev_thresh) {
static void VFilter16_SSE2(uint8_t* p, int stride, int thresh, int ithresh,
int hev_thresh) {
__m128i t1;
__m128i mask;
__m128i p2, p1, p0, q0, q1, q2;
@@ -715,8 +719,8 @@ static void VFilter16_SSE2(uint8_t* p, int stride,
_mm_storeu_si128((__m128i*)&p[+2 * stride], q2);
}
static void HFilter16_SSE2(uint8_t* p, int stride,
int thresh, int ithresh, int hev_thresh) {
static void HFilter16_SSE2(uint8_t* p, int stride, int thresh, int ithresh,
int hev_thresh) {
__m128i mask;
__m128i p3, p2, p1, p0, q0, q1, q2, q3;
@@ -735,19 +739,19 @@ static void HFilter16_SSE2(uint8_t* p, int stride,
}
// on three inner edges
static void VFilter16i_SSE2(uint8_t* p, int stride,
int thresh, int ithresh, int hev_thresh) {
static void VFilter16i_SSE2(uint8_t* p, int stride, int thresh, int ithresh,
int hev_thresh) {
int k;
__m128i p3, p2, p1, p0; // loop invariants
__m128i p3, p2, p1, p0; // loop invariants
LOAD_H_EDGES4(p, stride, p3, p2, p1, p0); // prologue
for (k = 3; k > 0; --k) {
__m128i mask, tmp1, tmp2;
uint8_t* const b = p + 2 * stride; // beginning of p1
uint8_t* const b = p + 2 * stride; // beginning of p1
p += 4 * stride;
MAX_DIFF1(p3, p2, p1, p0, mask); // compute partial mask
MAX_DIFF1(p3, p2, p1, p0, mask); // compute partial mask
LOAD_H_EDGES4(p, stride, p3, p2, tmp1, tmp2);
MAX_DIFF2(p3, p2, tmp1, tmp2, mask);
@@ -768,20 +772,20 @@ static void VFilter16i_SSE2(uint8_t* p, int stride,
}
}
static void HFilter16i_SSE2(uint8_t* p, int stride,
int thresh, int ithresh, int hev_thresh) {
static void HFilter16i_SSE2(uint8_t* p, int stride, int thresh, int ithresh,
int hev_thresh) {
int k;
__m128i p3, p2, p1, p0; // loop invariants
__m128i p3, p2, p1, p0; // loop invariants
Load16x4_SSE2(p, p + 8 * stride, stride, &p3, &p2, &p1, &p0); // prologue
for (k = 3; k > 0; --k) {
__m128i mask, tmp1, tmp2;
uint8_t* const b = p + 2; // beginning of p1
uint8_t* const b = p + 2; // beginning of p1
p += 4; // beginning of q0 (and next span)
MAX_DIFF1(p3, p2, p1, p0, mask); // compute partial mask
MAX_DIFF1(p3, p2, p1, p0, mask); // compute partial mask
Load16x4_SSE2(p, p + 8 * stride, stride, &p3, &p2, &tmp1, &tmp2);
MAX_DIFF2(p3, p2, tmp1, tmp2, mask);
@@ -843,8 +847,8 @@ static void HFilter8_SSE2(uint8_t* WEBP_RESTRICT u, uint8_t* WEBP_RESTRICT v,
}
static void VFilter8i_SSE2(uint8_t* WEBP_RESTRICT u, uint8_t* WEBP_RESTRICT v,
int stride,
int thresh, int ithresh, int hev_thresh) {
int stride, int thresh, int ithresh,
int hev_thresh) {
__m128i mask;
__m128i t1, t2, p1, p0, q0, q1;
@@ -870,11 +874,11 @@ static void VFilter8i_SSE2(uint8_t* WEBP_RESTRICT u, uint8_t* WEBP_RESTRICT v,
}
static void HFilter8i_SSE2(uint8_t* WEBP_RESTRICT u, uint8_t* WEBP_RESTRICT v,
int stride,
int thresh, int ithresh, int hev_thresh) {
int stride, int thresh, int ithresh,
int hev_thresh) {
__m128i mask;
__m128i t1, t2, p1, p0, q0, q1;
Load16x4_SSE2(u, v, stride, &t2, &t1, &p1, &p0); // p3, p2, p1, p0
Load16x4_SSE2(u, v, stride, &t2, &t1, &p1, &p0); // p3, p2, p1, p0
MAX_DIFF1(t2, t1, p1, p0, mask);
u += 4; // beginning of q0
@@ -904,7 +908,7 @@ static void HFilter8i_SSE2(uint8_t* WEBP_RESTRICT u, uint8_t* WEBP_RESTRICT v,
// where: AC = (a + b + 1) >> 1, BC = (b + c + 1) >> 1
// and ab = a ^ b, bc = b ^ c, lsb = (AC^BC)&1
static void VE4_SSE2(uint8_t* dst) { // vertical
static void VE4_SSE2(uint8_t* dst) { // vertical
const __m128i one = _mm_set1_epi8(1);
const __m128i ABCDEFGH = _mm_loadl_epi64((__m128i*)(dst - BPS - 1));
const __m128i BCDEFGH0 = _mm_srli_si128(ABCDEFGH, 1);
@@ -920,7 +924,7 @@ static void VE4_SSE2(uint8_t* dst) { // vertical
}
}
static void LD4_SSE2(uint8_t* dst) { // Down-Left
static void LD4_SSE2(uint8_t* dst) { // Down-Left
const __m128i one = _mm_set1_epi8(1);
const __m128i ABCDEFGH = _mm_loadl_epi64((__m128i*)(dst - BPS));
const __m128i BCDEFGH0 = _mm_srli_si128(ABCDEFGH, 1);
@@ -930,13 +934,13 @@ static void LD4_SSE2(uint8_t* dst) { // Down-Left
const __m128i lsb = _mm_and_si128(_mm_xor_si128(ABCDEFGH, CDEFGHH0), one);
const __m128i avg2 = _mm_subs_epu8(avg1, lsb);
const __m128i abcdefg = _mm_avg_epu8(avg2, BCDEFGH0);
WebPInt32ToMem(dst + 0 * BPS, _mm_cvtsi128_si32( abcdefg ));
WebPInt32ToMem(dst + 0 * BPS, _mm_cvtsi128_si32(abcdefg));
WebPInt32ToMem(dst + 1 * BPS, _mm_cvtsi128_si32(_mm_srli_si128(abcdefg, 1)));
WebPInt32ToMem(dst + 2 * BPS, _mm_cvtsi128_si32(_mm_srli_si128(abcdefg, 2)));
WebPInt32ToMem(dst + 3 * BPS, _mm_cvtsi128_si32(_mm_srli_si128(abcdefg, 3)));
}
static void VR4_SSE2(uint8_t* dst) { // Vertical-Right
static void VR4_SSE2(uint8_t* dst) { // Vertical-Right
const __m128i one = _mm_set1_epi8(1);
const int I = dst[-1 + 0 * BPS];
const int J = dst[-1 + 1 * BPS];
@@ -951,8 +955,8 @@ static void VR4_SSE2(uint8_t* dst) { // Vertical-Right
const __m128i lsb = _mm_and_si128(_mm_xor_si128(IXABCD, ABCD0), one);
const __m128i avg2 = _mm_subs_epu8(avg1, lsb);
const __m128i efgh = _mm_avg_epu8(avg2, XABCD);
WebPInt32ToMem(dst + 0 * BPS, _mm_cvtsi128_si32( abcd ));
WebPInt32ToMem(dst + 1 * BPS, _mm_cvtsi128_si32( efgh ));
WebPInt32ToMem(dst + 0 * BPS, _mm_cvtsi128_si32(abcd));
WebPInt32ToMem(dst + 1 * BPS, _mm_cvtsi128_si32(efgh));
WebPInt32ToMem(dst + 2 * BPS, _mm_cvtsi128_si32(_mm_slli_si128(abcd, 1)));
WebPInt32ToMem(dst + 3 * BPS, _mm_cvtsi128_si32(_mm_slli_si128(efgh, 1)));
@@ -961,7 +965,7 @@ static void VR4_SSE2(uint8_t* dst) { // Vertical-Right
DST(0, 3) = AVG3(K, J, I);
}
static void VL4_SSE2(uint8_t* dst) { // Vertical-Left
static void VL4_SSE2(uint8_t* dst) { // Vertical-Left
const __m128i one = _mm_set1_epi8(1);
const __m128i ABCDEFGH = _mm_loadl_epi64((__m128i*)(dst - BPS));
const __m128i BCDEFGH_ = _mm_srli_si128(ABCDEFGH, 1);
@@ -977,8 +981,8 @@ static void VL4_SSE2(uint8_t* dst) { // Vertical-Left
const __m128i avg4 = _mm_subs_epu8(avg3, lsb2);
const uint32_t extra_out =
(uint32_t)_mm_cvtsi128_si32(_mm_srli_si128(avg4, 4));
WebPInt32ToMem(dst + 0 * BPS, _mm_cvtsi128_si32( avg1 ));
WebPInt32ToMem(dst + 1 * BPS, _mm_cvtsi128_si32( avg4 ));
WebPInt32ToMem(dst + 0 * BPS, _mm_cvtsi128_si32(avg1));
WebPInt32ToMem(dst + 1 * BPS, _mm_cvtsi128_si32(avg4));
WebPInt32ToMem(dst + 2 * BPS, _mm_cvtsi128_si32(_mm_srli_si128(avg1, 1)));
WebPInt32ToMem(dst + 3 * BPS, _mm_cvtsi128_si32(_mm_srli_si128(avg4, 1)));
@@ -987,7 +991,7 @@ static void VL4_SSE2(uint8_t* dst) { // Vertical-Left
DST(3, 3) = (extra_out >> 8) & 0xff;
}
static void RD4_SSE2(uint8_t* dst) { // Down-right
static void RD4_SSE2(uint8_t* dst) { // Down-right
const __m128i one = _mm_set1_epi8(1);
const __m128i XABCD = _mm_loadl_epi64((__m128i*)(dst - BPS - 1));
const __m128i ____XABCD = _mm_slli_si128(XABCD, 4);
@@ -1004,7 +1008,7 @@ static void RD4_SSE2(uint8_t* dst) { // Down-right
const __m128i lsb = _mm_and_si128(_mm_xor_si128(JIXABCD__, LKJIXABCD), one);
const __m128i avg2 = _mm_subs_epu8(avg1, lsb);
const __m128i abcdefg = _mm_avg_epu8(avg2, KJIXABCD_);
WebPInt32ToMem(dst + 3 * BPS, _mm_cvtsi128_si32( abcdefg ));
WebPInt32ToMem(dst + 3 * BPS, _mm_cvtsi128_si32(abcdefg));
WebPInt32ToMem(dst + 2 * BPS, _mm_cvtsi128_si32(_mm_srli_si128(abcdefg, 1)));
WebPInt32ToMem(dst + 1 * BPS, _mm_cvtsi128_si32(_mm_srli_si128(abcdefg, 2)));
WebPInt32ToMem(dst + 0 * BPS, _mm_cvtsi128_si32(_mm_srli_si128(abcdefg, 3)));
@@ -1053,9 +1057,9 @@ static WEBP_INLINE void TrueMotion_SSE2(uint8_t* dst, int size) {
}
}
static void TM4_SSE2(uint8_t* dst) { TrueMotion_SSE2(dst, 4); }
static void TM4_SSE2(uint8_t* dst) { TrueMotion_SSE2(dst, 4); }
static void TM8uv_SSE2(uint8_t* dst) { TrueMotion_SSE2(dst, 8); }
static void TM16_SSE2(uint8_t* dst) { TrueMotion_SSE2(dst, 16); }
static void TM16_SSE2(uint8_t* dst) { TrueMotion_SSE2(dst, 16); }
static void VE16_SSE2(uint8_t* dst) {
const __m128i top = _mm_loadu_si128((const __m128i*)(dst - BPS));
@@ -1065,7 +1069,7 @@ static void VE16_SSE2(uint8_t* dst) {
}
}
static void HE16_SSE2(uint8_t* dst) { // horizontal
static void HE16_SSE2(uint8_t* dst) { // horizontal
int j;
for (j = 16; j > 0; --j) {
const __m128i values = _mm_set1_epi8((char)dst[-1]);
@@ -1125,7 +1129,7 @@ static void DC16NoTopLeft_SSE2(uint8_t* dst) { // DC with no top & left samples
//------------------------------------------------------------------------------
// Chroma
static void VE8uv_SSE2(uint8_t* dst) { // vertical
static void VE8uv_SSE2(uint8_t* dst) { // vertical
int j;
const __m128i top = _mm_loadl_epi64((const __m128i*)(dst - BPS));
for (j = 0; j < 8; ++j) {
@@ -1142,7 +1146,7 @@ static WEBP_INLINE void Put8x8uv_SSE2(uint8_t v, uint8_t* dst) {
}
}
static void DC8uv_SSE2(uint8_t* dst) { // DC
static void DC8uv_SSE2(uint8_t* dst) { // DC
const __m128i zero = _mm_setzero_si128();
const __m128i top = _mm_loadl_epi64((const __m128i*)(dst - BPS));
const __m128i sum = _mm_sad_epu8(top, zero);
@@ -1157,7 +1161,7 @@ static void DC8uv_SSE2(uint8_t* dst) { // DC
}
}
static void DC8uvNoLeft_SSE2(uint8_t* dst) { // DC with no left samples
static void DC8uvNoLeft_SSE2(uint8_t* dst) { // DC with no left samples
const __m128i zero = _mm_setzero_si128();
const __m128i top = _mm_loadl_epi64((const __m128i*)(dst - BPS));
const __m128i sum = _mm_sad_epu8(top, zero);
@@ -1174,7 +1178,7 @@ static void DC8uvNoTop_SSE2(uint8_t* dst) { // DC with no top samples
Put8x8uv_SSE2(dc0 >> 3, dst);
}
static void DC8uvNoTopLeft_SSE2(uint8_t* dst) { // DC with nothing
static void DC8uvNoTopLeft_SSE2(uint8_t* dst) { // DC with nothing
Put8x8uv_SSE2(0x80, dst);
}

View File

@@ -17,12 +17,12 @@
#include <emmintrin.h>
#include <smmintrin.h>
#include "src/webp/types.h"
#include "src/dec/vp8i_dec.h"
#include "src/dsp/cpu.h"
#include "src/utils/utils.h"
#include "src/webp/types.h"
static void HE16_SSE41(uint8_t* dst) { // horizontal
static void HE16_SSE41(uint8_t* dst) { // horizontal
int j;
const __m128i kShuffle3 = _mm_set1_epi8(3);
for (j = 16; j > 0; --j) {

View File

@@ -25,7 +25,7 @@
extern "C" {
#endif
#define BPS 32 // this is the common stride for enc/dec
#define BPS 32 // this is the common stride for enc/dec
//------------------------------------------------------------------------------
// WEBP_RESTRICT
@@ -44,14 +44,13 @@ extern "C" {
#define WEBP_RESTRICT
#endif
//------------------------------------------------------------------------------
// Init stub generator
// Defines an init function stub to ensure each module exposes a symbol,
// avoiding a compiler warning.
#define WEBP_DSP_INIT_STUB(func) \
extern void func(void); \
extern void func(void); \
void func(void) {}
//------------------------------------------------------------------------------
@@ -70,7 +69,7 @@ typedef void (*VP8WHT)(const int16_t* WEBP_RESTRICT in,
int16_t* WEBP_RESTRICT out);
extern VP8Idct VP8ITransform;
extern VP8Fdct VP8FTransform;
extern VP8Fdct VP8FTransform2; // performs two transforms at a time
extern VP8Fdct VP8FTransform2; // performs two transforms at a time
extern VP8WHT VP8FTransformWHT;
// Predictions
// *dst is the destination block. *top and *left can be NULL.
@@ -95,8 +94,7 @@ extern VP8WMetric VP8TDisto4x4, VP8TDisto16x16;
// Compute the average (DC) of four 4x4 blocks.
// Each sub-4x4 block #i sum is stored in dc[i].
typedef void (*VP8MeanMetric)(const uint8_t* WEBP_RESTRICT ref,
uint32_t dc[4]);
typedef void (*VP8MeanMetric)(const uint8_t* WEBP_RESTRICT ref, uint32_t dc[4]);
extern VP8MeanMetric VP8Mean16x4;
typedef void (*VP8BlockCopy)(const uint8_t* WEBP_RESTRICT src,
@@ -104,7 +102,7 @@ typedef void (*VP8BlockCopy)(const uint8_t* WEBP_RESTRICT src,
extern VP8BlockCopy VP8Copy4x4;
extern VP8BlockCopy VP8Copy16x8;
// Quantization
struct VP8Matrix; // forward declaration
struct VP8Matrix; // forward declaration
typedef int (*VP8QuantizeBlock)(
int16_t in[16], int16_t out[16],
const struct VP8Matrix* WEBP_RESTRICT const mtx);
@@ -125,15 +123,15 @@ extern VP8QuantizeBlockWHT VP8EncQuantizeBlockWHT;
extern const int VP8DspScan[16 + 4 + 4];
// Collect histogram for susceptibility calculation.
#define MAX_COEFF_THRESH 31 // size of histogram used by CollectHistogram.
#define MAX_COEFF_THRESH 31 // size of histogram used by CollectHistogram.
typedef struct {
// We only need to store max_value and last_non_zero, not the distribution.
int max_value;
int last_non_zero;
} VP8Histogram;
typedef void (*VP8CHisto)(const uint8_t* WEBP_RESTRICT ref,
const uint8_t* WEBP_RESTRICT pred,
int start_block, int end_block,
const uint8_t* WEBP_RESTRICT pred, int start_block,
int end_block,
VP8Histogram* WEBP_RESTRICT const histo);
extern VP8CHisto VP8CollectHistogram;
// General-purpose util function to help VP8CollectHistogram().
@@ -146,7 +144,7 @@ void VP8EncDspInit(void);
//------------------------------------------------------------------------------
// cost functions (encoding)
extern const uint16_t VP8EntropyCost[256]; // 8bit fixed-point log(p)
extern const uint16_t VP8EntropyCost[256]; // 8bit fixed-point log(p)
// approximate cost per level:
extern const uint16_t VP8LevelFixedCosts[2047 /*MAX_LEVEL*/ + 1];
extern const uint8_t VP8EncBands[16 + 1];
@@ -180,7 +178,7 @@ typedef struct {
double VP8SSIMFromStats(const VP8DistoStats* const stats);
double VP8SSIMFromStatsClipped(const VP8DistoStats* const stats);
#define VP8_SSIM_KERNEL 3 // total size of the kernel: 2 * VP8_SSIM_KERNEL + 1
#define VP8_SSIM_KERNEL 3 // total size of the kernel: 2 * VP8_SSIM_KERNEL + 1
typedef double (*VP8SSIMGetClippedFunc)(const uint8_t* src1, int stride1,
const uint8_t* src2, int stride2,
int xo, int yo, // center position
@@ -192,8 +190,8 @@ typedef double (*VP8SSIMGetClippedFunc)(const uint8_t* src1, int stride1,
typedef double (*VP8SSIMGetFunc)(const uint8_t* src1, int stride1,
const uint8_t* src2, int stride2);
extern VP8SSIMGetFunc VP8SSIMGet; // unclipped / unchecked
extern VP8SSIMGetClippedFunc VP8SSIMGetClipped; // with clipping
extern VP8SSIMGetFunc VP8SSIMGet; // unclipped / unchecked
extern VP8SSIMGetClippedFunc VP8SSIMGetClipped; // with clipping
#endif
#if !defined(WEBP_DISABLE_STATS)
@@ -248,8 +246,8 @@ extern VP8SimpleFilterFunc VP8SimpleVFilter16i; // filter 3 inner edges
extern VP8SimpleFilterFunc VP8SimpleHFilter16i;
// regular filter (on both macroblock edges and inner edges)
typedef void (*VP8LumaFilterFunc)(uint8_t* luma, int stride,
int thresh, int ithresh, int hev_t);
typedef void (*VP8LumaFilterFunc)(uint8_t* luma, int stride, int thresh,
int ithresh, int hev_t);
typedef void (*VP8ChromaFilterFunc)(uint8_t* WEBP_RESTRICT u,
uint8_t* WEBP_RESTRICT v, int stride,
int thresh, int ithresh, int hev_t);
@@ -260,7 +258,7 @@ extern VP8ChromaFilterFunc VP8VFilter8;
extern VP8ChromaFilterFunc VP8HFilter8;
// on inner edge
extern VP8LumaFilterFunc VP8VFilter16i; // filtering 3 inner edges altogether
extern VP8LumaFilterFunc VP8VFilter16i; // filtering 3 inner edges altogether
extern VP8LumaFilterFunc VP8HFilter16i;
extern VP8ChromaFilterFunc VP8VFilter8i; // filtering u and v altogether
extern VP8ChromaFilterFunc VP8HFilter8i;
@@ -280,7 +278,7 @@ void VP8DspInit(void);
//------------------------------------------------------------------------------
// WebP I/O
#define FANCY_UPSAMPLING // undefined to remove fancy upsampling support
#define FANCY_UPSAMPLING // undefined to remove fancy upsampling support
// Convert a pair of y/u/v lines together to the output rgb/a colorspace.
// bottom_y can be NULL if only one line of output is needed (at top/bottom).
@@ -295,7 +293,7 @@ typedef void (*WebPUpsampleLinePairFunc)(
// Fancy upsampling functions to convert YUV to RGB(A) modes
extern WebPUpsampleLinePairFunc WebPUpsamplers[/* MODE_LAST */];
#endif // FANCY_UPSAMPLING
#endif // FANCY_UPSAMPLING
// Per-row point-sampling methods.
typedef void (*WebPSamplerRowFunc)(const uint8_t* WEBP_RESTRICT y,
@@ -344,8 +342,8 @@ extern void (*WebPConvertARGBToY)(const uint32_t* WEBP_RESTRICT argb,
// the U/V one.
extern void (*WebPConvertARGBToUV)(const uint32_t* WEBP_RESTRICT argb,
uint8_t* WEBP_RESTRICT u,
uint8_t* WEBP_RESTRICT v,
int src_width, int do_store);
uint8_t* WEBP_RESTRICT v, int src_width,
int do_store);
// Convert a row of accumulated (four-values) of rgba32 toward U/V
extern void (*WebPConvertRGBA32ToUV)(const uint16_t* WEBP_RESTRICT rgb,
@@ -361,8 +359,8 @@ extern void (*WebPConvertBGRToY)(const uint8_t* WEBP_RESTRICT bgr,
// used for plain-C fallback.
extern void WebPConvertARGBToUV_C(const uint32_t* WEBP_RESTRICT argb,
uint8_t* WEBP_RESTRICT u,
uint8_t* WEBP_RESTRICT v,
int src_width, int do_store);
uint8_t* WEBP_RESTRICT v, int src_width,
int do_store);
extern void WebPConvertRGBA32ToUV_C(const uint16_t* WEBP_RESTRICT rgb,
uint8_t* WEBP_RESTRICT u,
uint8_t* WEBP_RESTRICT v, int width);
@@ -403,9 +401,8 @@ extern void WebPRescalerExportRowExpand_C(struct WebPRescaler* const wrk);
extern void WebPRescalerExportRowShrink_C(struct WebPRescaler* const wrk);
// Main entry calls:
extern void WebPRescalerImportRow(
struct WebPRescaler* WEBP_RESTRICT const wrk,
const uint8_t* WEBP_RESTRICT src);
extern void WebPRescalerImportRow(struct WebPRescaler* WEBP_RESTRICT const wrk,
const uint8_t* WEBP_RESTRICT src);
// Export one row (starting at x_out position) from rescaler.
extern void WebPRescalerExportRow(struct WebPRescaler* const wrk);
@@ -417,12 +414,12 @@ void WebPRescalerDspInit(void);
// Apply alpha pre-multiply on an rgba, bgra or argb plane of size w * h.
// alpha_first should be 0 for argb, 1 for rgba or bgra (where alpha is last).
extern void (*WebPApplyAlphaMultiply)(
uint8_t* rgba, int alpha_first, int w, int h, int stride);
extern void (*WebPApplyAlphaMultiply)(uint8_t* rgba, int alpha_first, int w,
int h, int stride);
// Same, buf specifically for RGBA4444 format
extern void (*WebPApplyAlphaMultiply4444)(
uint8_t* rgba4444, int w, int h, int stride);
extern void (*WebPApplyAlphaMultiply4444)(uint8_t* rgba4444, int w, int h,
int stride);
// Dispatch the values from alpha[] plane to the ARGB destination 'dst'.
// Returns true if alpha[] plane has non-trivial values different from 0xff.
@@ -442,8 +439,7 @@ extern void (*WebPDispatchAlphaToGreen)(const uint8_t* WEBP_RESTRICT alpha,
// Returns true if there's only trivial 0xff alpha values.
extern int (*WebPExtractAlpha)(const uint8_t* WEBP_RESTRICT argb,
int argb_stride, int width, int height,
uint8_t* WEBP_RESTRICT alpha,
int alpha_stride);
uint8_t* WEBP_RESTRICT alpha, int alpha_stride);
// Extract the green values from 32b values in argb[] and pack them into alpha[]
// (this is the opposite of WebPDispatchAlphaToGreen).
@@ -462,8 +458,8 @@ void WebPMultARGBRows(uint8_t* ptr, int stride, int width, int num_rows,
// Same for a row of single values, with side alpha values.
extern void (*WebPMultRow)(uint8_t* WEBP_RESTRICT const ptr,
const uint8_t* WEBP_RESTRICT const alpha,
int width, int inverse);
const uint8_t* WEBP_RESTRICT const alpha, int width,
int inverse);
// Same a WebPMultRow(), but for several 'num_rows' rows.
void WebPMultRows(uint8_t* WEBP_RESTRICT ptr, int stride,
@@ -472,8 +468,8 @@ void WebPMultRows(uint8_t* WEBP_RESTRICT ptr, int stride,
// Plain-C versions, used as fallback by some implementations.
void WebPMultRow_C(uint8_t* WEBP_RESTRICT const ptr,
const uint8_t* WEBP_RESTRICT const alpha,
int width, int inverse);
const uint8_t* WEBP_RESTRICT const alpha, int width,
int inverse);
void WebPMultARGBRow_C(uint32_t* const ptr, int width, int inverse);
#ifdef WORDS_BIGENDIAN
@@ -481,15 +477,15 @@ void WebPMultARGBRow_C(uint32_t* const ptr, int width, int inverse);
extern void (*WebPPackARGB)(const uint8_t* WEBP_RESTRICT a,
const uint8_t* WEBP_RESTRICT r,
const uint8_t* WEBP_RESTRICT g,
const uint8_t* WEBP_RESTRICT b,
int len, uint32_t* WEBP_RESTRICT out);
const uint8_t* WEBP_RESTRICT b, int len,
uint32_t* WEBP_RESTRICT out);
#endif
// RGB packing function. 'step' can be 3 or 4. r/g/b input is rgb or bgr order.
extern void (*WebPPackRGB)(const uint8_t* WEBP_RESTRICT r,
const uint8_t* WEBP_RESTRICT g,
const uint8_t* WEBP_RESTRICT b,
int len, int step, uint32_t* WEBP_RESTRICT out);
const uint8_t* WEBP_RESTRICT b, int len, int step,
uint32_t* WEBP_RESTRICT out);
// This function returns true if src[i] contains a value different from 0xff.
extern int (*WebPHasAlpha8b)(const uint8_t* src, int length);
@@ -504,18 +500,18 @@ void WebPInitAlphaProcessing(void);
//------------------------------------------------------------------------------
// Filter functions
typedef enum { // Filter types.
typedef enum { // Filter types.
WEBP_FILTER_NONE = 0,
WEBP_FILTER_HORIZONTAL,
WEBP_FILTER_VERTICAL,
WEBP_FILTER_GRADIENT,
WEBP_FILTER_LAST = WEBP_FILTER_GRADIENT + 1, // end marker
WEBP_FILTER_BEST, // meta-types
WEBP_FILTER_BEST, // meta-types
WEBP_FILTER_FAST
} WEBP_FILTER_TYPE;
typedef void (*WebPFilterFunc)(const uint8_t* WEBP_RESTRICT in,
int width, int height, int stride,
typedef void (*WebPFilterFunc)(const uint8_t* WEBP_RESTRICT in, int width,
int height, int stride,
uint8_t* WEBP_RESTRICT out);
// In-place un-filtering.
// Warning! 'prev_line' pointer can be equal to 'cur_line' or 'preds'.
@@ -538,7 +534,7 @@ extern WebPUnfilterFunc WebPUnfilters[WEBP_FILTER_LAST];
void VP8FiltersInit(void);
#ifdef __cplusplus
} // extern "C"
} // extern "C"
#endif
#endif // WEBP_DSP_DSP_H_

View File

@@ -26,9 +26,7 @@ static WEBP_INLINE uint8_t clip_8b(int v) {
}
#if !WEBP_NEON_OMIT_C_CODE
static WEBP_INLINE int clip_max(int v, int max) {
return (v > max) ? max : v;
}
static WEBP_INLINE int clip_max(int v, int max) { return (v > max) ? max : v; }
#endif // !WEBP_NEON_OMIT_C_CODE
//------------------------------------------------------------------------------
@@ -36,14 +34,14 @@ static WEBP_INLINE int clip_max(int v, int max) {
// the higher, the "easier" the macroblock is to compress.
const int VP8DspScan[16 + 4 + 4] = {
// Luma
0 + 0 * BPS, 4 + 0 * BPS, 8 + 0 * BPS, 12 + 0 * BPS,
0 + 4 * BPS, 4 + 4 * BPS, 8 + 4 * BPS, 12 + 4 * BPS,
0 + 8 * BPS, 4 + 8 * BPS, 8 + 8 * BPS, 12 + 8 * BPS,
0 + 12 * BPS, 4 + 12 * BPS, 8 + 12 * BPS, 12 + 12 * BPS,
// Luma
0 + 0 * BPS, 4 + 0 * BPS, 8 + 0 * BPS, 12 + 0 * BPS,
0 + 4 * BPS, 4 + 4 * BPS, 8 + 4 * BPS, 12 + 4 * BPS,
0 + 8 * BPS, 4 + 8 * BPS, 8 + 8 * BPS, 12 + 8 * BPS,
0 + 12 * BPS, 4 + 12 * BPS, 8 + 12 * BPS, 12 + 12 * BPS,
0 + 0 * BPS, 4 + 0 * BPS, 0 + 4 * BPS, 4 + 4 * BPS, // U
8 + 0 * BPS, 12 + 0 * BPS, 8 + 4 * BPS, 12 + 4 * BPS // V
0 + 0 * BPS, 4 + 0 * BPS, 0 + 4 * BPS, 4 + 4 * BPS, // U
8 + 0 * BPS, 12 + 0 * BPS, 8 + 4 * BPS, 12 + 4 * BPS // V
};
// general-purpose util function
@@ -68,7 +66,7 @@ static void CollectHistogram_C(const uint8_t* WEBP_RESTRICT ref,
int start_block, int end_block,
VP8Histogram* WEBP_RESTRICT const histo) {
int j;
int distribution[MAX_COEFF_THRESH + 1] = { 0 };
int distribution[MAX_COEFF_THRESH + 1] = {0};
for (j = start_block; j < end_block; ++j) {
int k;
int16_t out[16];
@@ -89,7 +87,7 @@ static void CollectHistogram_C(const uint8_t* WEBP_RESTRICT ref,
//------------------------------------------------------------------------------
// run-time tables (~4k)
static uint8_t clip1[255 + 510 + 1]; // clips [-255,510] to [0,255]
static uint8_t clip1[255 + 510 + 1]; // clips [-255,510] to [0,255]
// We declare this variable 'volatile' to prevent instruction reordering
// and make sure it's set to true _last_ (so as to be thread-safe)
@@ -105,7 +103,6 @@ static WEBP_TSAN_IGNORE_FUNCTION void InitTables(void) {
}
}
//------------------------------------------------------------------------------
// Transforms (Paragraph 14.4)
@@ -120,7 +117,7 @@ static WEBP_INLINE void ITransformOne(const uint8_t* WEBP_RESTRICT ref,
int C[4 * 4], *tmp;
int i;
tmp = C;
for (i = 0; i < 4; ++i) { // vertical pass
for (i = 0; i < 4; ++i) { // vertical pass
const int a = in[0] + in[8];
const int b = in[0] - in[8];
const int c =
@@ -136,7 +133,7 @@ static WEBP_INLINE void ITransformOne(const uint8_t* WEBP_RESTRICT ref,
}
tmp = C;
for (i = 0; i < 4; ++i) { // horizontal pass
for (i = 0; i < 4; ++i) { // horizontal pass
const int dc = tmp[0] + 4;
const int a = dc + tmp[8];
const int b = dc - tmp[8];
@@ -154,8 +151,7 @@ static WEBP_INLINE void ITransformOne(const uint8_t* WEBP_RESTRICT ref,
static void ITransform_C(const uint8_t* WEBP_RESTRICT ref,
const int16_t* WEBP_RESTRICT in,
uint8_t* WEBP_RESTRICT dst,
int do_two) {
uint8_t* WEBP_RESTRICT dst, int do_two) {
ITransformOne(ref, in, dst);
if (do_two) {
ITransformOne(ref + 4, in + 16, dst + 4);
@@ -168,28 +164,28 @@ static void FTransform_C(const uint8_t* WEBP_RESTRICT src,
int i;
int tmp[16];
for (i = 0; i < 4; ++i, src += BPS, ref += BPS) {
const int d0 = src[0] - ref[0]; // 9bit dynamic range ([-255,255])
const int d0 = src[0] - ref[0]; // 9bit dynamic range ([-255,255])
const int d1 = src[1] - ref[1];
const int d2 = src[2] - ref[2];
const int d3 = src[3] - ref[3];
const int a0 = (d0 + d3); // 10b [-510,510]
const int a0 = (d0 + d3); // 10b [-510,510]
const int a1 = (d1 + d2);
const int a2 = (d1 - d2);
const int a3 = (d0 - d3);
tmp[0 + i * 4] = (a0 + a1) * 8; // 14b [-8160,8160]
tmp[1 + i * 4] = (a2 * 2217 + a3 * 5352 + 1812) >> 9; // [-7536,7542]
tmp[0 + i * 4] = (a0 + a1) * 8; // 14b [-8160,8160]
tmp[1 + i * 4] = (a2 * 2217 + a3 * 5352 + 1812) >> 9; // [-7536,7542]
tmp[2 + i * 4] = (a0 - a1) * 8;
tmp[3 + i * 4] = (a3 * 2217 - a2 * 5352 + 937) >> 9;
tmp[3 + i * 4] = (a3 * 2217 - a2 * 5352 + 937) >> 9;
}
for (i = 0; i < 4; ++i) {
const int a0 = (tmp[0 + i] + tmp[12 + i]); // 15b
const int a1 = (tmp[4 + i] + tmp[ 8 + i]);
const int a2 = (tmp[4 + i] - tmp[ 8 + i]);
const int a1 = (tmp[4 + i] + tmp[8 + i]);
const int a2 = (tmp[4 + i] - tmp[8 + i]);
const int a3 = (tmp[0 + i] - tmp[12 + i]);
out[0 + i] = (a0 + a1 + 7) >> 4; // 12b
out[0 + i] = (a0 + a1 + 7) >> 4; // 12b
out[4 + i] = ((a2 * 2217 + a3 * 5352 + 12000) >> 16) + (a3 != 0);
out[8 + i] = (a0 - a1 + 7) >> 4;
out[12+ i] = ((a3 * 2217 - a2 * 5352 + 51000) >> 16);
out[12 + i] = ((a3 * 2217 - a2 * 5352 + 51000) >> 16);
}
}
#endif // !WEBP_NEON_OMIT_C_CODE
@@ -212,23 +208,23 @@ static void FTransformWHT_C(const int16_t* WEBP_RESTRICT in,
const int a1 = (in[1 * 16] + in[3 * 16]);
const int a2 = (in[1 * 16] - in[3 * 16]);
const int a3 = (in[0 * 16] - in[2 * 16]);
tmp[0 + i * 4] = a0 + a1; // 14b
tmp[0 + i * 4] = a0 + a1; // 14b
tmp[1 + i * 4] = a3 + a2;
tmp[2 + i * 4] = a3 - a2;
tmp[3 + i * 4] = a0 - a1;
}
for (i = 0; i < 4; ++i) {
const int a0 = (tmp[0 + i] + tmp[8 + i]); // 15b
const int a1 = (tmp[4 + i] + tmp[12+ i]);
const int a2 = (tmp[4 + i] - tmp[12+ i]);
const int a1 = (tmp[4 + i] + tmp[12 + i]);
const int a2 = (tmp[4 + i] - tmp[12 + i]);
const int a3 = (tmp[0 + i] - tmp[8 + i]);
const int b0 = a0 + a1; // 16b
const int b0 = a0 + a1; // 16b
const int b1 = a3 + a2;
const int b2 = a3 - a2;
const int b3 = a0 - a1;
out[ 0 + i] = b0 >> 1; // 15b
out[ 4 + i] = b1 >> 1;
out[ 8 + i] = b2 >> 1;
out[0 + i] = b0 >> 1; // 15b
out[4 + i] = b1 >> 1;
out[8 + i] = b2 >> 1;
out[12 + i] = b3 >> 1;
}
}
@@ -303,23 +299,23 @@ static WEBP_INLINE void TrueMotion(uint8_t* WEBP_RESTRICT dst,
static WEBP_INLINE void DCMode(uint8_t* WEBP_RESTRICT dst,
const uint8_t* WEBP_RESTRICT left,
const uint8_t* WEBP_RESTRICT top,
int size, int round, int shift) {
const uint8_t* WEBP_RESTRICT top, int size,
int round, int shift) {
int DC = 0;
int j;
if (top != NULL) {
for (j = 0; j < size; ++j) DC += top[j];
if (left != NULL) { // top and left present
if (left != NULL) { // top and left present
for (j = 0; j < size; ++j) DC += left[j];
} else { // top, but no left
} else { // top, but no left
DC += DC;
}
DC = (DC + round) >> shift;
} else if (left != NULL) { // left but no top
} else if (left != NULL) { // left but no top
for (j = 0; j < size; ++j) DC += left[j];
DC += DC;
DC = (DC + round) >> shift;
} else { // no top, no left, nothing.
} else { // no top, no left, nothing.
DC = 0x80;
}
Fill(dst, DC, size);
@@ -372,10 +368,10 @@ static void Intra16Preds_C(uint8_t* WEBP_RESTRICT dst,
// vertical
static void VE4(uint8_t* WEBP_RESTRICT dst, const uint8_t* WEBP_RESTRICT top) {
const uint8_t vals[4] = {
AVG3(top[-1], top[0], top[1]),
AVG3(top[ 0], top[1], top[2]),
AVG3(top[ 1], top[2], top[3]),
AVG3(top[ 2], top[3], top[4])
AVG3(top[-1], top[0], top[1]),
AVG3(top[0], top[1], top[2]),
AVG3(top[1], top[2], top[3]),
AVG3(top[2], top[3], top[4]),
};
int i;
for (i = 0; i < 4; ++i) {
@@ -413,13 +409,13 @@ static void RD4(uint8_t* WEBP_RESTRICT dst, const uint8_t* WEBP_RESTRICT top) {
const int B = top[1];
const int C = top[2];
const int D = top[3];
DST(0, 3) = AVG3(J, K, L);
DST(0, 2) = DST(1, 3) = AVG3(I, J, K);
DST(0, 1) = DST(1, 2) = DST(2, 3) = AVG3(X, I, J);
DST(0, 3) = AVG3(J, K, L);
DST(0, 2) = DST(1, 3) = AVG3(I, J, K);
DST(0, 1) = DST(1, 2) = DST(2, 3) = AVG3(X, I, J);
DST(0, 0) = DST(1, 1) = DST(2, 2) = DST(3, 3) = AVG3(A, X, I);
DST(1, 0) = DST(2, 1) = DST(3, 2) = AVG3(B, A, X);
DST(2, 0) = DST(3, 1) = AVG3(C, B, A);
DST(3, 0) = AVG3(D, C, B);
DST(1, 0) = DST(2, 1) = DST(3, 2) = AVG3(B, A, X);
DST(2, 0) = DST(3, 1) = AVG3(C, B, A);
DST(3, 0) = AVG3(D, C, B);
}
static void LD4(uint8_t* WEBP_RESTRICT dst, const uint8_t* WEBP_RESTRICT top) {
@@ -431,13 +427,13 @@ static void LD4(uint8_t* WEBP_RESTRICT dst, const uint8_t* WEBP_RESTRICT top) {
const int F = top[5];
const int G = top[6];
const int H = top[7];
DST(0, 0) = AVG3(A, B, C);
DST(1, 0) = DST(0, 1) = AVG3(B, C, D);
DST(2, 0) = DST(1, 1) = DST(0, 2) = AVG3(C, D, E);
DST(0, 0) = AVG3(A, B, C);
DST(1, 0) = DST(0, 1) = AVG3(B, C, D);
DST(2, 0) = DST(1, 1) = DST(0, 2) = AVG3(C, D, E);
DST(3, 0) = DST(2, 1) = DST(1, 2) = DST(0, 3) = AVG3(D, E, F);
DST(3, 1) = DST(2, 2) = DST(1, 3) = AVG3(E, F, G);
DST(3, 2) = DST(2, 3) = AVG3(F, G, H);
DST(3, 3) = AVG3(G, H, H);
DST(3, 1) = DST(2, 2) = DST(1, 3) = AVG3(E, F, G);
DST(3, 2) = DST(2, 3) = AVG3(F, G, H);
DST(3, 3) = AVG3(G, H, H);
}
static void VR4(uint8_t* WEBP_RESTRICT dst, const uint8_t* WEBP_RESTRICT top) {
@@ -452,14 +448,14 @@ static void VR4(uint8_t* WEBP_RESTRICT dst, const uint8_t* WEBP_RESTRICT top) {
DST(0, 0) = DST(1, 2) = AVG2(X, A);
DST(1, 0) = DST(2, 2) = AVG2(A, B);
DST(2, 0) = DST(3, 2) = AVG2(B, C);
DST(3, 0) = AVG2(C, D);
DST(3, 0) = AVG2(C, D);
DST(0, 3) = AVG3(K, J, I);
DST(0, 2) = AVG3(J, I, X);
DST(0, 3) = AVG3(K, J, I);
DST(0, 2) = AVG3(J, I, X);
DST(0, 1) = DST(1, 3) = AVG3(I, X, A);
DST(1, 1) = DST(2, 3) = AVG3(X, A, B);
DST(2, 1) = DST(3, 3) = AVG3(A, B, C);
DST(3, 1) = AVG3(B, C, D);
DST(3, 1) = AVG3(B, C, D);
}
static void VL4(uint8_t* WEBP_RESTRICT dst, const uint8_t* WEBP_RESTRICT top) {
@@ -471,17 +467,17 @@ static void VL4(uint8_t* WEBP_RESTRICT dst, const uint8_t* WEBP_RESTRICT top) {
const int F = top[5];
const int G = top[6];
const int H = top[7];
DST(0, 0) = AVG2(A, B);
DST(0, 0) = AVG2(A, B);
DST(1, 0) = DST(0, 2) = AVG2(B, C);
DST(2, 0) = DST(1, 2) = AVG2(C, D);
DST(3, 0) = DST(2, 2) = AVG2(D, E);
DST(0, 1) = AVG3(A, B, C);
DST(0, 1) = AVG3(A, B, C);
DST(1, 1) = DST(0, 3) = AVG3(B, C, D);
DST(2, 1) = DST(1, 3) = AVG3(C, D, E);
DST(3, 1) = DST(2, 3) = AVG3(D, E, F);
DST(3, 2) = AVG3(E, F, G);
DST(3, 3) = AVG3(F, G, H);
DST(3, 2) = AVG3(E, F, G);
DST(3, 3) = AVG3(F, G, H);
}
static void HU4(uint8_t* WEBP_RESTRICT dst, const uint8_t* WEBP_RESTRICT top) {
@@ -489,14 +485,13 @@ static void HU4(uint8_t* WEBP_RESTRICT dst, const uint8_t* WEBP_RESTRICT top) {
const int J = top[-3];
const int K = top[-4];
const int L = top[-5];
DST(0, 0) = AVG2(I, J);
DST(0, 0) = AVG2(I, J);
DST(2, 0) = DST(0, 1) = AVG2(J, K);
DST(2, 1) = DST(0, 2) = AVG2(K, L);
DST(1, 0) = AVG3(I, J, K);
DST(1, 0) = AVG3(I, J, K);
DST(3, 0) = DST(1, 1) = AVG3(J, K, L);
DST(3, 1) = DST(1, 2) = AVG3(K, L, L);
DST(3, 2) = DST(2, 2) =
DST(0, 3) = DST(1, 3) = DST(2, 3) = DST(3, 3) = L;
DST(3, 2) = DST(2, 2) = DST(0, 3) = DST(1, 3) = DST(2, 3) = DST(3, 3) = L;
}
static void HD4(uint8_t* WEBP_RESTRICT dst, const uint8_t* WEBP_RESTRICT top) {
@@ -512,14 +507,14 @@ static void HD4(uint8_t* WEBP_RESTRICT dst, const uint8_t* WEBP_RESTRICT top) {
DST(0, 0) = DST(2, 1) = AVG2(I, X);
DST(0, 1) = DST(2, 2) = AVG2(J, I);
DST(0, 2) = DST(2, 3) = AVG2(K, J);
DST(0, 3) = AVG2(L, K);
DST(0, 3) = AVG2(L, K);
DST(3, 0) = AVG3(A, B, C);
DST(2, 0) = AVG3(X, A, B);
DST(3, 0) = AVG3(A, B, C);
DST(2, 0) = AVG3(X, A, B);
DST(1, 0) = DST(3, 1) = AVG3(I, X, A);
DST(1, 1) = DST(3, 2) = AVG3(J, I, X);
DST(1, 2) = DST(3, 3) = AVG3(K, J, I);
DST(1, 3) = AVG3(L, K, J);
DST(1, 3) = AVG3(L, K, J);
}
static void TM4(uint8_t* WEBP_RESTRICT dst, const uint8_t* WEBP_RESTRICT top) {
@@ -561,8 +556,7 @@ static void Intra4Preds_C(uint8_t* WEBP_RESTRICT dst,
#if !WEBP_NEON_OMIT_C_CODE
static WEBP_INLINE int GetSSE(const uint8_t* WEBP_RESTRICT a,
const uint8_t* WEBP_RESTRICT b,
int w, int h) {
const uint8_t* WEBP_RESTRICT b, int w, int h) {
int count = 0;
int y, x;
for (y = 0; y < h; ++y) {
@@ -604,7 +598,7 @@ static void Mean16x4_C(const uint8_t* WEBP_RESTRICT ref, uint32_t dc[4]) {
}
}
dc[k] = avg;
ref += 4; // go to next 4x4 block.
ref += 4; // go to next 4x4 block.
}
}
@@ -637,17 +631,17 @@ static int TTransform(const uint8_t* WEBP_RESTRICT in,
// vertical pass
for (i = 0; i < 4; ++i, ++w) {
const int a0 = tmp[0 + i] + tmp[8 + i];
const int a1 = tmp[4 + i] + tmp[12+ i];
const int a2 = tmp[4 + i] - tmp[12+ i];
const int a1 = tmp[4 + i] + tmp[12 + i];
const int a2 = tmp[4 + i] - tmp[12 + i];
const int a3 = tmp[0 + i] - tmp[8 + i];
const int b0 = a0 + a1;
const int b1 = a3 + a2;
const int b2 = a3 - a2;
const int b3 = a0 - a1;
sum += w[ 0] * abs(b0);
sum += w[ 4] * abs(b1);
sum += w[ 8] * abs(b2);
sum += w[0] * abs(b0);
sum += w[4] * abs(b1);
sum += w[8] * abs(b2);
sum += w[12] * abs(b3);
}
return sum;
@@ -680,9 +674,8 @@ static int Disto16x16_C(const uint8_t* WEBP_RESTRICT const a,
//
#if !WEBP_NEON_OMIT_C_CODE || WEBP_NEON_WORK_AROUND_GCC
static const uint8_t kZigzag[16] = {
0, 1, 4, 8, 5, 2, 3, 6, 9, 12, 13, 10, 7, 11, 14, 15
};
static const uint8_t kZigzag[16] = {0, 1, 4, 8, 5, 2, 3, 6,
9, 12, 13, 10, 7, 11, 14, 15};
// Simple quantization
static int QuantizeBlock_C(int16_t in[16], int16_t out[16],
@@ -714,7 +707,7 @@ static int QuantizeBlock_C(int16_t in[16], int16_t out[16],
static int Quantize2Blocks_C(int16_t in[32], int16_t out[32],
const VP8Matrix* WEBP_RESTRICT const mtx) {
int nz;
nz = VP8EncQuantizeBlock(in + 0 * 16, out + 0 * 16, mtx) << 0;
nz = VP8EncQuantizeBlock(in + 0 * 16, out + 0 * 16, mtx) << 0;
nz |= VP8EncQuantizeBlock(in + 1 * 16, out + 1 * 16, mtx) << 1;
return nz;
}

View File

@@ -18,8 +18,8 @@
#if defined(WEBP_USE_MIPS32)
#include "src/dsp/mips_macro.h"
#include "src/enc/vp8i_enc.h"
#include "src/enc/cost_enc.h"
#include "src/enc/vp8i_enc.h"
static const int kC1 = WEBP_TRANSFORM_AC3_C1;
static const int kC2 = WEBP_TRANSFORM_AC3_C2;
@@ -30,6 +30,7 @@ static const int kC2 = WEBP_TRANSFORM_AC3_C2;
// A..D - offsets in bytes to load from in buffer
// TEMP0..TEMP3 - registers for corresponding tmp elements
// TEMP4..TEMP5 - temporary registers
// clang-format off
#define VERTICAL_PASS(A, B, C, D, TEMP4, TEMP0, TEMP1, TEMP2, TEMP3) \
"lh %[temp16], " #A "(%[temp20]) \n\t" \
"lh %[temp18], " #B "(%[temp20]) \n\t" \
@@ -107,6 +108,7 @@ static const int kC2 = WEBP_TRANSFORM_AC3_C2;
"sb %[" #TEMP4 "], 1+" XSTR(BPS) "*" #A "(%[temp16]) \n\t" \
"sb %[" #TEMP8 "], 2+" XSTR(BPS) "*" #A "(%[temp16]) \n\t" \
"sb %[" #TEMP12 "], 3+" XSTR(BPS) "*" #A "(%[temp16]) \n\t"
// clang-format on
// Does one or two inverse transforms.
static WEBP_INLINE void ITransformOne_MIPS32(const uint8_t* WEBP_RESTRICT ref,
@@ -118,27 +120,26 @@ static WEBP_INLINE void ITransformOne_MIPS32(const uint8_t* WEBP_RESTRICT ref,
const int* args[3] = {(const int*)ref, (const int*)in, (const int*)dst};
__asm__ volatile(
"lw %[temp20], 4(%[args]) \n\t"
VERTICAL_PASS(0, 16, 8, 24, temp4, temp0, temp1, temp2, temp3)
VERTICAL_PASS(2, 18, 10, 26, temp8, temp4, temp5, temp6, temp7)
VERTICAL_PASS(4, 20, 12, 28, temp12, temp8, temp9, temp10, temp11)
VERTICAL_PASS(6, 22, 14, 30, temp20, temp12, temp13, temp14, temp15)
"lw %[temp20], 4(%[args]) \n\t" //
VERTICAL_PASS(0, 16, 8, 24, temp4, temp0, temp1, temp2, temp3) //
VERTICAL_PASS(2, 18, 10, 26, temp8, temp4, temp5, temp6, temp7) //
VERTICAL_PASS(4, 20, 12, 28, temp12, temp8, temp9, temp10, temp11) //
VERTICAL_PASS(6, 22, 14, 30, temp20, temp12, temp13, temp14, temp15) //
HORIZONTAL_PASS(0, temp0, temp4, temp8, temp12)
HORIZONTAL_PASS(1, temp1, temp5, temp9, temp13)
HORIZONTAL_PASS(2, temp2, temp6, temp10, temp14)
HORIZONTAL_PASS(3, temp3, temp7, temp11, temp15)
HORIZONTAL_PASS(0, temp0, temp4, temp8, temp12) //
HORIZONTAL_PASS(1, temp1, temp5, temp9, temp13) //
HORIZONTAL_PASS(2, temp2, temp6, temp10, temp14) //
HORIZONTAL_PASS(3, temp3, temp7, temp11, temp15) //
: [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
[temp3]"=&r"(temp3), [temp4]"=&r"(temp4), [temp5]"=&r"(temp5),
[temp6]"=&r"(temp6), [temp7]"=&r"(temp7), [temp8]"=&r"(temp8),
[temp9]"=&r"(temp9), [temp10]"=&r"(temp10), [temp11]"=&r"(temp11),
[temp12]"=&r"(temp12), [temp13]"=&r"(temp13), [temp14]"=&r"(temp14),
[temp15]"=&r"(temp15), [temp16]"=&r"(temp16), [temp17]"=&r"(temp17),
[temp18]"=&r"(temp18), [temp19]"=&r"(temp19), [temp20]"=&r"(temp20)
: [args]"r"(args), [kC1]"r"(kC1), [kC2]"r"(kC2)
: "memory", "hi", "lo"
);
: [temp0] "=&r"(temp0), [temp1] "=&r"(temp1), [temp2] "=&r"(temp2),
[temp3] "=&r"(temp3), [temp4] "=&r"(temp4), [temp5] "=&r"(temp5),
[temp6] "=&r"(temp6), [temp7] "=&r"(temp7), [temp8] "=&r"(temp8),
[temp9] "=&r"(temp9), [temp10] "=&r"(temp10), [temp11] "=&r"(temp11),
[temp12] "=&r"(temp12), [temp13] "=&r"(temp13), [temp14] "=&r"(temp14),
[temp15] "=&r"(temp15), [temp16] "=&r"(temp16), [temp17] "=&r"(temp17),
[temp18] "=&r"(temp18), [temp19] "=&r"(temp19), [temp20] "=&r"(temp20)
: [args] "r"(args), [kC1] "r"(kC1), [kC2] "r"(kC2)
: "memory", "hi", "lo");
}
static void ITransform_MIPS32(const uint8_t* WEBP_RESTRICT ref,
@@ -158,6 +159,7 @@ static void ITransform_MIPS32(const uint8_t* WEBP_RESTRICT ref,
// J - offset in bytes (kZigzag[n] * 2)
// K - offset in bytes (kZigzag[n] * 4)
// N - offset in bytes (n * 2)
// clang-format off
#define QUANTIZE_ONE(J, K, N) \
"lh %[temp0], " #J "(%[ppin]) \n\t" \
"lhu %[temp1], " #J "(%[ppsharpen]) \n\t" \
@@ -184,6 +186,7 @@ static void ITransform_MIPS32(const uint8_t* WEBP_RESTRICT ref,
"2: \n\t" \
"sh %[temp5], " #J "(%[ppin]) \n\t" \
"sh %[level], " #N "(%[pout]) \n\t"
// clang-format on
static int QuantizeBlock_MIPS32(int16_t in[16], int16_t out[16],
const VP8Matrix* const mtx) {
@@ -191,43 +194,39 @@ static int QuantizeBlock_MIPS32(int16_t in[16], int16_t out[16],
int sign, coeff, level, i;
int max_level = MAX_LEVEL;
int16_t* ppin = &in[0];
int16_t* pout = &out[0];
int16_t* ppin = &in[0];
int16_t* pout = &out[0];
const uint16_t* ppsharpen = &mtx->sharpen[0];
const uint32_t* ppzthresh = &mtx->zthresh[0];
const uint16_t* ppq = &mtx->q[0];
const uint16_t* ppiq = &mtx->iq[0];
const uint32_t* ppbias = &mtx->bias[0];
const uint16_t* ppq = &mtx->q[0];
const uint16_t* ppiq = &mtx->iq[0];
const uint32_t* ppbias = &mtx->bias[0];
__asm__ volatile(
QUANTIZE_ONE( 0, 0, 0)
QUANTIZE_ONE( 2, 4, 2)
QUANTIZE_ONE( 8, 16, 4)
QUANTIZE_ONE(16, 32, 6)
QUANTIZE_ONE(10, 20, 8)
QUANTIZE_ONE( 4, 8, 10)
QUANTIZE_ONE( 6, 12, 12)
QUANTIZE_ONE(12, 24, 14)
QUANTIZE_ONE(18, 36, 16)
QUANTIZE_ONE(24, 48, 18)
QUANTIZE_ONE(26, 52, 20)
QUANTIZE_ONE(20, 40, 22)
QUANTIZE_ONE(14, 28, 24)
QUANTIZE_ONE(22, 44, 26)
QUANTIZE_ONE(28, 56, 28)
QUANTIZE_ONE(30, 60, 30)
QUANTIZE_ONE(0, 0, 0) //
QUANTIZE_ONE(2, 4, 2) //
QUANTIZE_ONE(8, 16, 4) //
QUANTIZE_ONE(16, 32, 6) //
QUANTIZE_ONE(10, 20, 8) //
QUANTIZE_ONE(4, 8, 10) //
QUANTIZE_ONE(6, 12, 12) //
QUANTIZE_ONE(12, 24, 14) //
QUANTIZE_ONE(18, 36, 16) //
QUANTIZE_ONE(24, 48, 18) //
QUANTIZE_ONE(26, 52, 20) //
QUANTIZE_ONE(20, 40, 22) //
QUANTIZE_ONE(14, 28, 24) //
QUANTIZE_ONE(22, 44, 26) //
QUANTIZE_ONE(28, 56, 28) //
QUANTIZE_ONE(30, 60, 30) //
: [temp0]"=&r"(temp0), [temp1]"=&r"(temp1),
[temp2]"=&r"(temp2), [temp3]"=&r"(temp3),
[temp4]"=&r"(temp4), [temp5]"=&r"(temp5),
[sign]"=&r"(sign), [coeff]"=&r"(coeff),
[level]"=&r"(level)
: [pout]"r"(pout), [ppin]"r"(ppin),
[ppiq]"r"(ppiq), [max_level]"r"(max_level),
[ppbias]"r"(ppbias), [ppzthresh]"r"(ppzthresh),
[ppsharpen]"r"(ppsharpen), [ppq]"r"(ppq)
: "memory", "hi", "lo"
);
: [temp0] "=&r"(temp0), [temp1] "=&r"(temp1), [temp2] "=&r"(temp2),
[temp3] "=&r"(temp3), [temp4] "=&r"(temp4), [temp5] "=&r"(temp5),
[sign] "=&r"(sign), [coeff] "=&r"(coeff), [level] "=&r"(level)
: [pout] "r"(pout), [ppin] "r"(ppin), [ppiq] "r"(ppiq),
[max_level] "r"(max_level), [ppbias] "r"(ppbias),
[ppzthresh] "r"(ppzthresh), [ppsharpen] "r"(ppsharpen), [ppq] "r"(ppq)
: "memory", "hi", "lo");
// moved out from macro to increase possibility for earlier breaking
for (i = 15; i >= 0; i--) {
@@ -239,7 +238,7 @@ static int QuantizeBlock_MIPS32(int16_t in[16], int16_t out[16],
static int Quantize2Blocks_MIPS32(int16_t in[32], int16_t out[32],
const VP8Matrix* WEBP_RESTRICT const mtx) {
int nz;
nz = QuantizeBlock_MIPS32(in + 0 * 16, out + 0 * 16, mtx) << 0;
nz = QuantizeBlock_MIPS32(in + 0 * 16, out + 0 * 16, mtx) << 0;
nz |= QuantizeBlock_MIPS32(in + 1 * 16, out + 1 * 16, mtx) << 1;
return nz;
}
@@ -251,6 +250,7 @@ static int Quantize2Blocks_MIPS32(int16_t in[32], int16_t out[32],
// A - offset in bytes to load from a and b buffers
// E..H - offsets in bytes to store first results to tmp buffer
// E1..H1 - offsets in bytes to store second results to tmp buffer
// clang-format off
#define HORIZONTAL_PASS(A, E, F, G, H, E1, F1, G1, H1) \
"lbu %[temp0], 0+" XSTR(BPS) "*" #A "(%[a]) \n\t" \
"lbu %[temp1], 1+" XSTR(BPS) "*" #A "(%[a]) \n\t" \
@@ -358,6 +358,7 @@ static int Quantize2Blocks_MIPS32(int16_t in[32], int16_t out[32],
"msub %[temp5], %[temp8] \n\t" \
"msub %[temp6], %[temp0] \n\t" \
"msub %[temp7], %[temp1] \n\t"
// clang-format on
static int Disto4x4_MIPS32(const uint8_t* WEBP_RESTRICT const a,
const uint8_t* WEBP_RESTRICT const b,
@@ -366,28 +367,27 @@ static int Disto4x4_MIPS32(const uint8_t* WEBP_RESTRICT const a,
int temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7, temp8;
__asm__ volatile(
HORIZONTAL_PASS(0, 0, 4, 8, 12, 64, 68, 72, 76)
HORIZONTAL_PASS(1, 16, 20, 24, 28, 80, 84, 88, 92)
HORIZONTAL_PASS(2, 32, 36, 40, 44, 96, 100, 104, 108)
HORIZONTAL_PASS(3, 48, 52, 56, 60, 112, 116, 120, 124)
"mthi $zero \n\t"
"mtlo $zero \n\t"
VERTICAL_PASS( 0, 16, 32, 48, 64, 80, 96, 112, 0, 8, 16, 24)
VERTICAL_PASS( 4, 20, 36, 52, 68, 84, 100, 116, 2, 10, 18, 26)
VERTICAL_PASS( 8, 24, 40, 56, 72, 88, 104, 120, 4, 12, 20, 28)
VERTICAL_PASS(12, 28, 44, 60, 76, 92, 108, 124, 6, 14, 22, 30)
"mflo %[temp0] \n\t"
"sra %[temp1], %[temp0], 31 \n\t"
"xor %[temp0], %[temp0], %[temp1] \n\t"
"subu %[temp0], %[temp0], %[temp1] \n\t"
"sra %[temp0], %[temp0], 5 \n\t"
HORIZONTAL_PASS(0, 0, 4, 8, 12, 64, 68, 72, 76) //
HORIZONTAL_PASS(1, 16, 20, 24, 28, 80, 84, 88, 92) //
HORIZONTAL_PASS(2, 32, 36, 40, 44, 96, 100, 104, 108) //
HORIZONTAL_PASS(3, 48, 52, 56, 60, 112, 116, 120, 124) //
"mthi $zero \n\t"
"mtlo $zero \n\t" //
VERTICAL_PASS(0, 16, 32, 48, 64, 80, 96, 112, 0, 8, 16, 24) //
VERTICAL_PASS(4, 20, 36, 52, 68, 84, 100, 116, 2, 10, 18, 26) //
VERTICAL_PASS(8, 24, 40, 56, 72, 88, 104, 120, 4, 12, 20, 28) //
VERTICAL_PASS(12, 28, 44, 60, 76, 92, 108, 124, 6, 14, 22, 30) //
"mflo %[temp0] \n\t"
"sra %[temp1], %[temp0], 31 \n\t"
"xor %[temp0], %[temp0], %[temp1] \n\t"
"subu %[temp0], %[temp0], %[temp1] \n\t"
"sra %[temp0], %[temp0], 5 \n\t"
: [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
[temp3]"=&r"(temp3), [temp4]"=&r"(temp4), [temp5]"=&r"(temp5),
[temp6]"=&r"(temp6), [temp7]"=&r"(temp7), [temp8]"=&r"(temp8)
: [a]"r"(a), [b]"r"(b), [w]"r"(w), [tmp]"r"(tmp)
: "memory", "hi", "lo"
);
: [temp0] "=&r"(temp0), [temp1] "=&r"(temp1), [temp2] "=&r"(temp2),
[temp3] "=&r"(temp3), [temp4] "=&r"(temp4), [temp5] "=&r"(temp5),
[temp6] "=&r"(temp6), [temp7] "=&r"(temp7), [temp8] "=&r"(temp8)
: [a] "r"(a), [b] "r"(b), [w] "r"(w), [tmp] "r"(tmp)
: "memory", "hi", "lo");
return temp0;
}
@@ -412,6 +412,7 @@ static int Disto16x16_MIPS32(const uint8_t* WEBP_RESTRICT const a,
// temp0..temp15 holds tmp[0]..tmp[15]
// A - offset in bytes to load from src and ref buffers
// TEMP0..TEMP3 - registers for corresponding tmp elements
// clang-format off
#define HORIZONTAL_PASS(A, TEMP0, TEMP1, TEMP2, TEMP3) \
"lw %[" #TEMP1 "], 0(%[args]) \n\t" \
"lw %[" #TEMP2 "], 4(%[args]) \n\t" \
@@ -477,6 +478,7 @@ static int Disto16x16_MIPS32(const uint8_t* WEBP_RESTRICT const a,
"sh %[" #TEMP4 "], " #C "(%[temp20]) \n\t" \
"sh %[" #TEMP8 "], " #D "(%[temp20]) \n\t" \
"sh %[" #TEMP12 "], " #B "(%[temp20]) \n\t"
// clang-format on
static void FTransform_MIPS32(const uint8_t* WEBP_RESTRICT src,
const uint8_t* WEBP_RESTRICT ref,
@@ -486,8 +488,8 @@ static void FTransform_MIPS32(const uint8_t* WEBP_RESTRICT src,
int temp17, temp18, temp19, temp20;
const int c2217 = 2217;
const int c5352 = 5352;
const int* const args[3] =
{ (const int*)src, (const int*)ref, (const int*)out };
const int* const args[3] = {(const int*)src, (const int*)ref,
(const int*)out};
__asm__ volatile(
HORIZONTAL_PASS(0, temp0, temp1, temp2, temp3)
@@ -517,6 +519,7 @@ static void FTransform_MIPS32(const uint8_t* WEBP_RESTRICT src,
#if !defined(WORK_AROUND_GCC)
// clang-format off
#define GET_SSE_INNER(A, B, C, D) \
"lbu %[temp0], " #A "(%[a]) \n\t" \
"lbu %[temp1], " #A "(%[b]) \n\t" \
@@ -534,11 +537,12 @@ static void FTransform_MIPS32(const uint8_t* WEBP_RESTRICT src,
"madd %[temp2], %[temp2] \n\t" \
"madd %[temp4], %[temp4] \n\t" \
"madd %[temp6], %[temp6] \n\t"
// clang-format on
#define GET_SSE(A, B, C, D) \
GET_SSE_INNER(A, A + 1, A + 2, A + 3) \
GET_SSE_INNER(B, B + 1, B + 2, B + 3) \
GET_SSE_INNER(C, C + 1, C + 2, C + 3) \
#define GET_SSE(A, B, C, D) \
GET_SSE_INNER(A, A + 1, A + 2, A + 3) \
GET_SSE_INNER(B, B + 1, B + 2, B + 3) \
GET_SSE_INNER(C, C + 1, C + 2, C + 3) \
GET_SSE_INNER(D, D + 1, D + 2, D + 3)
static int SSE16x16_MIPS32(const uint8_t* WEBP_RESTRICT a,
@@ -547,32 +551,31 @@ static int SSE16x16_MIPS32(const uint8_t* WEBP_RESTRICT a,
int temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7;
__asm__ volatile(
"mult $zero, $zero \n\t"
"mult $zero, $zero \n\t"
GET_SSE( 0 * BPS, 4 + 0 * BPS, 8 + 0 * BPS, 12 + 0 * BPS)
GET_SSE( 1 * BPS, 4 + 1 * BPS, 8 + 1 * BPS, 12 + 1 * BPS)
GET_SSE( 2 * BPS, 4 + 2 * BPS, 8 + 2 * BPS, 12 + 2 * BPS)
GET_SSE( 3 * BPS, 4 + 3 * BPS, 8 + 3 * BPS, 12 + 3 * BPS)
GET_SSE( 4 * BPS, 4 + 4 * BPS, 8 + 4 * BPS, 12 + 4 * BPS)
GET_SSE( 5 * BPS, 4 + 5 * BPS, 8 + 5 * BPS, 12 + 5 * BPS)
GET_SSE( 6 * BPS, 4 + 6 * BPS, 8 + 6 * BPS, 12 + 6 * BPS)
GET_SSE( 7 * BPS, 4 + 7 * BPS, 8 + 7 * BPS, 12 + 7 * BPS)
GET_SSE( 8 * BPS, 4 + 8 * BPS, 8 + 8 * BPS, 12 + 8 * BPS)
GET_SSE( 9 * BPS, 4 + 9 * BPS, 8 + 9 * BPS, 12 + 9 * BPS)
GET_SSE(10 * BPS, 4 + 10 * BPS, 8 + 10 * BPS, 12 + 10 * BPS)
GET_SSE(11 * BPS, 4 + 11 * BPS, 8 + 11 * BPS, 12 + 11 * BPS)
GET_SSE(12 * BPS, 4 + 12 * BPS, 8 + 12 * BPS, 12 + 12 * BPS)
GET_SSE(13 * BPS, 4 + 13 * BPS, 8 + 13 * BPS, 12 + 13 * BPS)
GET_SSE(14 * BPS, 4 + 14 * BPS, 8 + 14 * BPS, 12 + 14 * BPS)
GET_SSE(15 * BPS, 4 + 15 * BPS, 8 + 15 * BPS, 12 + 15 * BPS)
GET_SSE(0 * BPS, 4 + 0 * BPS, 8 + 0 * BPS, 12 + 0 * BPS) //
GET_SSE(1 * BPS, 4 + 1 * BPS, 8 + 1 * BPS, 12 + 1 * BPS) //
GET_SSE(2 * BPS, 4 + 2 * BPS, 8 + 2 * BPS, 12 + 2 * BPS) //
GET_SSE(3 * BPS, 4 + 3 * BPS, 8 + 3 * BPS, 12 + 3 * BPS) //
GET_SSE(4 * BPS, 4 + 4 * BPS, 8 + 4 * BPS, 12 + 4 * BPS) //
GET_SSE(5 * BPS, 4 + 5 * BPS, 8 + 5 * BPS, 12 + 5 * BPS) //
GET_SSE(6 * BPS, 4 + 6 * BPS, 8 + 6 * BPS, 12 + 6 * BPS) //
GET_SSE(7 * BPS, 4 + 7 * BPS, 8 + 7 * BPS, 12 + 7 * BPS) //
GET_SSE(8 * BPS, 4 + 8 * BPS, 8 + 8 * BPS, 12 + 8 * BPS) //
GET_SSE(9 * BPS, 4 + 9 * BPS, 8 + 9 * BPS, 12 + 9 * BPS) //
GET_SSE(10 * BPS, 4 + 10 * BPS, 8 + 10 * BPS, 12 + 10 * BPS) //
GET_SSE(11 * BPS, 4 + 11 * BPS, 8 + 11 * BPS, 12 + 11 * BPS) //
GET_SSE(12 * BPS, 4 + 12 * BPS, 8 + 12 * BPS, 12 + 12 * BPS) //
GET_SSE(13 * BPS, 4 + 13 * BPS, 8 + 13 * BPS, 12 + 13 * BPS) //
GET_SSE(14 * BPS, 4 + 14 * BPS, 8 + 14 * BPS, 12 + 14 * BPS) //
GET_SSE(15 * BPS, 4 + 15 * BPS, 8 + 15 * BPS, 12 + 15 * BPS) //
"mflo %[count] \n\t"
: [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
[temp3]"=&r"(temp3), [temp4]"=&r"(temp4), [temp5]"=&r"(temp5),
[temp6]"=&r"(temp6), [temp7]"=&r"(temp7), [count]"=&r"(count)
: [a]"r"(a), [b]"r"(b)
: "memory", "hi", "lo"
);
"mflo %[count] \n\t"
: [temp0] "=&r"(temp0), [temp1] "=&r"(temp1), [temp2] "=&r"(temp2),
[temp3] "=&r"(temp3), [temp4] "=&r"(temp4), [temp5] "=&r"(temp5),
[temp6] "=&r"(temp6), [temp7] "=&r"(temp7), [count] "=&r"(count)
: [a] "r"(a), [b] "r"(b)
: "memory", "hi", "lo");
return count;
}
@@ -582,24 +585,23 @@ static int SSE16x8_MIPS32(const uint8_t* WEBP_RESTRICT a,
int temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7;
__asm__ volatile(
"mult $zero, $zero \n\t"
"mult $zero, $zero \n\t"
GET_SSE( 0 * BPS, 4 + 0 * BPS, 8 + 0 * BPS, 12 + 0 * BPS)
GET_SSE( 1 * BPS, 4 + 1 * BPS, 8 + 1 * BPS, 12 + 1 * BPS)
GET_SSE( 2 * BPS, 4 + 2 * BPS, 8 + 2 * BPS, 12 + 2 * BPS)
GET_SSE( 3 * BPS, 4 + 3 * BPS, 8 + 3 * BPS, 12 + 3 * BPS)
GET_SSE( 4 * BPS, 4 + 4 * BPS, 8 + 4 * BPS, 12 + 4 * BPS)
GET_SSE( 5 * BPS, 4 + 5 * BPS, 8 + 5 * BPS, 12 + 5 * BPS)
GET_SSE( 6 * BPS, 4 + 6 * BPS, 8 + 6 * BPS, 12 + 6 * BPS)
GET_SSE( 7 * BPS, 4 + 7 * BPS, 8 + 7 * BPS, 12 + 7 * BPS)
GET_SSE(0 * BPS, 4 + 0 * BPS, 8 + 0 * BPS, 12 + 0 * BPS) //
GET_SSE(1 * BPS, 4 + 1 * BPS, 8 + 1 * BPS, 12 + 1 * BPS) //
GET_SSE(2 * BPS, 4 + 2 * BPS, 8 + 2 * BPS, 12 + 2 * BPS) //
GET_SSE(3 * BPS, 4 + 3 * BPS, 8 + 3 * BPS, 12 + 3 * BPS) //
GET_SSE(4 * BPS, 4 + 4 * BPS, 8 + 4 * BPS, 12 + 4 * BPS) //
GET_SSE(5 * BPS, 4 + 5 * BPS, 8 + 5 * BPS, 12 + 5 * BPS) //
GET_SSE(6 * BPS, 4 + 6 * BPS, 8 + 6 * BPS, 12 + 6 * BPS) //
GET_SSE(7 * BPS, 4 + 7 * BPS, 8 + 7 * BPS, 12 + 7 * BPS) //
"mflo %[count] \n\t"
: [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
[temp3]"=&r"(temp3), [temp4]"=&r"(temp4), [temp5]"=&r"(temp5),
[temp6]"=&r"(temp6), [temp7]"=&r"(temp7), [count]"=&r"(count)
: [a]"r"(a), [b]"r"(b)
: "memory", "hi", "lo"
);
"mflo %[count] \n\t"
: [temp0] "=&r"(temp0), [temp1] "=&r"(temp1), [temp2] "=&r"(temp2),
[temp3] "=&r"(temp3), [temp4] "=&r"(temp4), [temp5] "=&r"(temp5),
[temp6] "=&r"(temp6), [temp7] "=&r"(temp7), [count] "=&r"(count)
: [a] "r"(a), [b] "r"(b)
: "memory", "hi", "lo");
return count;
}
@@ -609,20 +611,19 @@ static int SSE8x8_MIPS32(const uint8_t* WEBP_RESTRICT a,
int temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7;
__asm__ volatile(
"mult $zero, $zero \n\t"
"mult $zero, $zero \n\t"
GET_SSE(0 * BPS, 4 + 0 * BPS, 1 * BPS, 4 + 1 * BPS)
GET_SSE(2 * BPS, 4 + 2 * BPS, 3 * BPS, 4 + 3 * BPS)
GET_SSE(4 * BPS, 4 + 4 * BPS, 5 * BPS, 4 + 5 * BPS)
GET_SSE(6 * BPS, 4 + 6 * BPS, 7 * BPS, 4 + 7 * BPS)
GET_SSE(0 * BPS, 4 + 0 * BPS, 1 * BPS, 4 + 1 * BPS) //
GET_SSE(2 * BPS, 4 + 2 * BPS, 3 * BPS, 4 + 3 * BPS) //
GET_SSE(4 * BPS, 4 + 4 * BPS, 5 * BPS, 4 + 5 * BPS) //
GET_SSE(6 * BPS, 4 + 6 * BPS, 7 * BPS, 4 + 7 * BPS) //
"mflo %[count] \n\t"
: [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
[temp3]"=&r"(temp3), [temp4]"=&r"(temp4), [temp5]"=&r"(temp5),
[temp6]"=&r"(temp6), [temp7]"=&r"(temp7), [count]"=&r"(count)
: [a]"r"(a), [b]"r"(b)
: "memory", "hi", "lo"
);
"mflo %[count] \n\t"
: [temp0] "=&r"(temp0), [temp1] "=&r"(temp1), [temp2] "=&r"(temp2),
[temp3] "=&r"(temp3), [temp4] "=&r"(temp4), [temp5] "=&r"(temp5),
[temp6] "=&r"(temp6), [temp7] "=&r"(temp7), [count] "=&r"(count)
: [a] "r"(a), [b] "r"(b)
: "memory", "hi", "lo");
return count;
}
@@ -632,17 +633,16 @@ static int SSE4x4_MIPS32(const uint8_t* WEBP_RESTRICT a,
int temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7;
__asm__ volatile(
"mult $zero, $zero \n\t"
"mult $zero, $zero \n\t"
GET_SSE(0 * BPS, 1 * BPS, 2 * BPS, 3 * BPS)
GET_SSE(0 * BPS, 1 * BPS, 2 * BPS, 3 * BPS) //
"mflo %[count] \n\t"
: [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
[temp3]"=&r"(temp3), [temp4]"=&r"(temp4), [temp5]"=&r"(temp5),
[temp6]"=&r"(temp6), [temp7]"=&r"(temp7), [count]"=&r"(count)
: [a]"r"(a), [b]"r"(b)
: "memory", "hi", "lo"
);
"mflo %[count] \n\t"
: [temp0] "=&r"(temp0), [temp1] "=&r"(temp1), [temp2] "=&r"(temp2),
[temp3] "=&r"(temp3), [temp4] "=&r"(temp4), [temp5] "=&r"(temp5),
[temp6] "=&r"(temp6), [temp7] "=&r"(temp7), [count] "=&r"(count)
: [a] "r"(a), [b] "r"(b)
: "memory", "hi", "lo");
return count;
}

View File

@@ -25,6 +25,7 @@ static const int kC2 = WEBP_TRANSFORM_AC3_C2;
// O - output
// I - input (macro doesn't change it)
// clang-format off
#define ADD_SUB_HALVES_X4(O0, O1, O2, O3, O4, O5, O6, O7, \
I0, I1, I2, I3, I4, I5, I6, I7) \
"addq.ph %[" #O0 "], %[" #I0 "], %[" #I1 "] \n\t" \
@@ -140,6 +141,7 @@ static const int kC2 = WEBP_TRANSFORM_AC3_C2;
"sh %[" #TEMP4 "], " #C "(%[temp20]) \n\t" \
"sh %[" #TEMP8 "], " #D "(%[temp20]) \n\t" \
"sh %[" #TEMP12 "], " #B "(%[temp20]) \n\t"
// clang-format on
static void FTransform_MIPSdspR2(const uint8_t* WEBP_RESTRICT src,
const uint8_t* WEBP_RESTRICT ref,
@@ -149,10 +151,10 @@ static void FTransform_MIPSdspR2(const uint8_t* WEBP_RESTRICT src,
int temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7, temp8;
int temp9, temp10, temp11, temp12, temp13, temp14, temp15, temp16;
int temp17, temp18, temp19, temp20;
const int* const args[3] =
{ (const int*)src, (const int*)ref, (const int*)out };
const int* const args[3] = {(const int*)src, (const int*)ref,
(const int*)out};
__asm__ volatile (
__asm__ volatile(
HORIZONTAL_PASS(0, temp0, temp1, temp2, temp3)
HORIZONTAL_PASS(1, temp4, temp5, temp6, temp7)
HORIZONTAL_PASS(2, temp8, temp9, temp10, temp11)
@@ -178,7 +180,7 @@ static WEBP_INLINE void ITransformOne(const uint8_t* WEBP_RESTRICT ref,
int temp1, temp2, temp3, temp4, temp5, temp6, temp7, temp8, temp9;
int temp10, temp11, temp12, temp13, temp14, temp15, temp16, temp17, temp18;
__asm__ volatile (
__asm__ volatile(
"ulw %[temp1], 0(%[in]) \n\t"
"ulw %[temp2], 16(%[in]) \n\t"
LOAD_IN_X2(temp5, temp6, 24, 26)
@@ -250,13 +252,14 @@ static void ITransform_MIPSdspR2(const uint8_t* WEBP_RESTRICT ref,
}
}
// clang-format off
static int Disto4x4_MIPSdspR2(const uint8_t* WEBP_RESTRICT const a,
const uint8_t* WEBP_RESTRICT const b,
const uint16_t* WEBP_RESTRICT const w) {
int temp1, temp2, temp3, temp4, temp5, temp6, temp7, temp8, temp9;
int temp10, temp11, temp12, temp13, temp14, temp15, temp16, temp17;
__asm__ volatile (
__asm__ volatile(
LOAD_WITH_OFFSET_X4(temp1, temp2, temp3, temp4, a,
0, 0, 0, 0,
0, 1, 2, 3,
@@ -317,6 +320,7 @@ static int Disto4x4_MIPSdspR2(const uint8_t* WEBP_RESTRICT const a,
);
return abs(temp3 - temp17) >> 5;
}
// clang-format on
static int Disto16x16_MIPSdspR2(const uint8_t* WEBP_RESTRICT const a,
const uint8_t* WEBP_RESTRICT const b,
@@ -334,6 +338,7 @@ static int Disto16x16_MIPSdspR2(const uint8_t* WEBP_RESTRICT const a,
//------------------------------------------------------------------------------
// Intra predictions
// clang-format off
#define FILL_PART(J, SIZE) \
"usw %[value], 0+" #J "*" XSTR(BPS) "(%[dst]) \n\t" \
"usw %[value], 4+" #J "*" XSTR(BPS) "(%[dst]) \n\t" \
@@ -342,118 +347,125 @@ static int Disto16x16_MIPSdspR2(const uint8_t* WEBP_RESTRICT const a,
"usw %[value], 12+" #J "*" XSTR(BPS) "(%[dst]) \n\t" \
".endif \n\t"
#define FILL_8_OR_16(DST, VALUE, SIZE) do { \
int value = (VALUE); \
__asm__ volatile ( \
"replv.qb %[value], %[value] \n\t" \
FILL_PART( 0, SIZE) \
FILL_PART( 1, SIZE) \
FILL_PART( 2, SIZE) \
FILL_PART( 3, SIZE) \
FILL_PART( 4, SIZE) \
FILL_PART( 5, SIZE) \
FILL_PART( 6, SIZE) \
FILL_PART( 7, SIZE) \
".if " #SIZE " == 16 \n\t" \
FILL_PART( 8, 16) \
FILL_PART( 9, 16) \
FILL_PART(10, 16) \
FILL_PART(11, 16) \
FILL_PART(12, 16) \
FILL_PART(13, 16) \
FILL_PART(14, 16) \
FILL_PART(15, 16) \
".endif \n\t" \
: [value]"+&r"(value) \
: [dst]"r"((DST)) \
: "memory" \
); \
} while (0)
#define FILL_8_OR_16(DST, VALUE, SIZE) \
do { \
int value = (VALUE); \
__asm__ volatile( \
"replv.qb %[value], %[value] \n\t" \
FILL_PART( 0, SIZE) \
FILL_PART( 1, SIZE) \
FILL_PART( 2, SIZE) \
FILL_PART( 3, SIZE) \
FILL_PART( 4, SIZE) \
FILL_PART( 5, SIZE) \
FILL_PART( 6, SIZE) \
FILL_PART( 7, SIZE) \
".if " #SIZE " == 16 \n\t" \
FILL_PART( 8, 16) \
FILL_PART( 9, 16) \
FILL_PART(10, 16) \
FILL_PART(11, 16) \
FILL_PART(12, 16) \
FILL_PART(13, 16) \
FILL_PART(14, 16) \
FILL_PART(15, 16) \
".endif \n\t" \
: [value]"+&r"(value) \
: [dst]"r"((DST)) \
: "memory" \
); \
} while (0)
// clang-format on
#define VERTICAL_PRED(DST, TOP, SIZE) \
static WEBP_INLINE void VerticalPred##SIZE( \
uint8_t* WEBP_RESTRICT (DST), const uint8_t* WEBP_RESTRICT (TOP)) { \
int j; \
if ((TOP)) { \
for (j = 0; j < (SIZE); ++j) memcpy((DST) + j * BPS, (TOP), (SIZE)); \
} else { \
FILL_8_OR_16((DST), 127, (SIZE)); \
} \
}
#define VERTICAL_PRED(DST, TOP, SIZE) \
static WEBP_INLINE void VerticalPred##SIZE( \
uint8_t* WEBP_RESTRICT(DST), const uint8_t* WEBP_RESTRICT(TOP)) { \
int j; \
if ((TOP)) { \
for (j = 0; j < (SIZE); ++j) memcpy((DST) + j * BPS, (TOP), (SIZE)); \
} else { \
FILL_8_OR_16((DST), 127, (SIZE)); \
} \
}
VERTICAL_PRED(dst, top, 8)
VERTICAL_PRED(dst, top, 16)
#undef VERTICAL_PRED
#define HORIZONTAL_PRED(DST, LEFT, SIZE) \
static WEBP_INLINE void HorizontalPred##SIZE( \
uint8_t* WEBP_RESTRICT (DST), const uint8_t* WEBP_RESTRICT (LEFT)) { \
if (LEFT) { \
int j; \
for (j = 0; j < (SIZE); ++j) { \
memset((DST) + j * BPS, (LEFT)[j], (SIZE)); \
} \
} else { \
FILL_8_OR_16((DST), 129, (SIZE)); \
} \
}
#define HORIZONTAL_PRED(DST, LEFT, SIZE) \
static WEBP_INLINE void HorizontalPred##SIZE( \
uint8_t* WEBP_RESTRICT(DST), const uint8_t* WEBP_RESTRICT(LEFT)) { \
if (LEFT) { \
int j; \
for (j = 0; j < (SIZE); ++j) { \
memset((DST) + j * BPS, (LEFT)[j], (SIZE)); \
} \
} else { \
FILL_8_OR_16((DST), 129, (SIZE)); \
} \
}
HORIZONTAL_PRED(dst, left, 8)
HORIZONTAL_PRED(dst, left, 16)
#undef HORIZONTAL_PRED
#define CLIPPING() \
"preceu.ph.qbl %[temp2], %[temp0] \n\t" \
"preceu.ph.qbr %[temp0], %[temp0] \n\t" \
"preceu.ph.qbl %[temp3], %[temp1] \n\t" \
"preceu.ph.qbr %[temp1], %[temp1] \n\t" \
"addu.ph %[temp2], %[temp2], %[leftY_1] \n\t" \
"addu.ph %[temp0], %[temp0], %[leftY_1] \n\t" \
"addu.ph %[temp3], %[temp3], %[leftY_1] \n\t" \
"addu.ph %[temp1], %[temp1], %[leftY_1] \n\t" \
"shll_s.ph %[temp2], %[temp2], 7 \n\t" \
"shll_s.ph %[temp0], %[temp0], 7 \n\t" \
"shll_s.ph %[temp3], %[temp3], 7 \n\t" \
"shll_s.ph %[temp1], %[temp1], 7 \n\t" \
"precrqu_s.qb.ph %[temp0], %[temp2], %[temp0] \n\t" \
#define CLIPPING() \
"preceu.ph.qbl %[temp2], %[temp0] \n\t" \
"preceu.ph.qbr %[temp0], %[temp0] \n\t" \
"preceu.ph.qbl %[temp3], %[temp1] \n\t" \
"preceu.ph.qbr %[temp1], %[temp1] \n\t" \
"addu.ph %[temp2], %[temp2], %[leftY_1] \n\t" \
"addu.ph %[temp0], %[temp0], %[leftY_1] \n\t" \
"addu.ph %[temp3], %[temp3], %[leftY_1] \n\t" \
"addu.ph %[temp1], %[temp1], %[leftY_1] \n\t" \
"shll_s.ph %[temp2], %[temp2], 7 \n\t" \
"shll_s.ph %[temp0], %[temp0], 7 \n\t" \
"shll_s.ph %[temp3], %[temp3], 7 \n\t" \
"shll_s.ph %[temp1], %[temp1], 7 \n\t" \
"precrqu_s.qb.ph %[temp0], %[temp2], %[temp0] \n\t" \
"precrqu_s.qb.ph %[temp1], %[temp3], %[temp1] \n\t"
#define CLIP_8B_TO_DST(DST, LEFT, TOP, SIZE) do { \
int leftY_1 = ((int)(LEFT)[y] << 16) + (LEFT)[y]; \
int temp0, temp1, temp2, temp3; \
__asm__ volatile ( \
"replv.ph %[leftY_1], %[leftY_1] \n\t" \
"ulw %[temp0], 0(%[top]) \n\t" \
"ulw %[temp1], 4(%[top]) \n\t" \
"subu.ph %[leftY_1], %[leftY_1], %[left_1] \n\t" \
CLIPPING() \
"usw %[temp0], 0(%[dst]) \n\t" \
"usw %[temp1], 4(%[dst]) \n\t" \
".if " #SIZE " == 16 \n\t" \
"ulw %[temp0], 8(%[top]) \n\t" \
"ulw %[temp1], 12(%[top]) \n\t" \
CLIPPING() \
"usw %[temp0], 8(%[dst]) \n\t" \
"usw %[temp1], 12(%[dst]) \n\t" \
".endif \n\t" \
: [leftY_1]"+&r"(leftY_1), [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), \
[temp2]"=&r"(temp2), [temp3]"=&r"(temp3) \
: [left_1]"r"(left_1), [top]"r"((TOP)), [dst]"r"((DST)) \
: "memory" \
); \
} while (0)
// clang-format off
#define CLIP_8B_TO_DST(DST, LEFT, TOP, SIZE) \
do { \
int leftY_1 = ((int)(LEFT)[y] << 16) + (LEFT)[y]; \
int temp0, temp1, temp2, temp3; \
__asm__ volatile( \
"replv.ph %[leftY_1], %[leftY_1] \n\t" \
"ulw %[temp0], 0(%[top]) \n\t" \
"ulw %[temp1], 4(%[top]) \n\t" \
"subu.ph %[leftY_1], %[leftY_1], %[left_1] \n\t" \
CLIPPING() \
"usw %[temp0], 0(%[dst]) \n\t" \
"usw %[temp1], 4(%[dst]) \n\t" \
".if " #SIZE " == 16 \n\t" \
"ulw %[temp0], 8(%[top]) \n\t" \
"ulw %[temp1], 12(%[top]) \n\t" \
CLIPPING() \
"usw %[temp0], 8(%[dst]) \n\t" \
"usw %[temp1], 12(%[dst]) \n\t" \
".endif \n\t" \
: [leftY_1]"+&r"(leftY_1), [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), \
[temp2]"=&r"(temp2), [temp3]"=&r"(temp3) \
: [left_1]"r"(left_1), [top]"r"((TOP)), [dst]"r"((DST)) \
: "memory" \
); \
} while (0)
// clang-format on
#define CLIP_TO_DST(DST, LEFT, TOP, SIZE) do { \
int y; \
const int left_1 = ((int)(LEFT)[-1] << 16) + (LEFT)[-1]; \
for (y = 0; y < (SIZE); ++y) { \
CLIP_8B_TO_DST((DST), (LEFT), (TOP), (SIZE)); \
(DST) += BPS; \
} \
} while (0)
#define CLIP_TO_DST(DST, LEFT, TOP, SIZE) \
do { \
int y; \
const int left_1 = ((int)(LEFT)[-1] << 16) + (LEFT)[-1]; \
for (y = 0; y < (SIZE); ++y) { \
CLIP_8B_TO_DST((DST), (LEFT), (TOP), (SIZE)); \
(DST) += BPS; \
} \
} while (0)
// clang-format off
#define TRUE_MOTION(DST, LEFT, TOP, SIZE) \
static WEBP_INLINE void TrueMotion##SIZE(uint8_t* WEBP_RESTRICT (DST), \
const uint8_t* WEBP_RESTRICT (LEFT), \
@@ -476,6 +488,7 @@ static WEBP_INLINE void TrueMotion##SIZE(uint8_t* WEBP_RESTRICT (DST), \
} \
} \
}
// clang-format on
TRUE_MOTION(dst, left, top, 8)
TRUE_MOTION(dst, left, top, 16)
@@ -556,41 +569,40 @@ static WEBP_INLINE void DCMode8(uint8_t* WEBP_RESTRICT dst,
int temp0, temp1, temp2, temp3;
__asm__ volatile(
"beqz %[top], 2f \n\t"
"ulw %[temp0], 0(%[top]) \n\t"
"ulw %[temp1], 4(%[top]) \n\t"
"raddu.w.qb %[temp0], %[temp0] \n\t"
"raddu.w.qb %[temp1], %[temp1] \n\t"
"addu %[DC], %[temp0], %[temp1] \n\t"
"move %[DC1], %[DC] \n\t"
"beqz %[left], 1f \n\t"
"ulw %[temp2], 0(%[left]) \n\t"
"ulw %[temp3], 4(%[left]) \n\t"
"raddu.w.qb %[temp2], %[temp2] \n\t"
"raddu.w.qb %[temp3], %[temp3] \n\t"
"addu %[DC1], %[temp2], %[temp3] \n\t"
"1: \n\t"
"addu %[DC], %[DC], %[DC1] \n\t"
"j 3f \n\t"
"2: \n\t"
"beqz %[left], 4f \n\t"
"ulw %[temp2], 0(%[left]) \n\t"
"ulw %[temp3], 4(%[left]) \n\t"
"raddu.w.qb %[temp2], %[temp2] \n\t"
"raddu.w.qb %[temp3], %[temp3] \n\t"
"addu %[DC], %[temp2], %[temp3] \n\t"
"addu %[DC], %[DC], %[DC] \n\t"
"3: \n\t"
"shra_r.w %[DC], %[DC], 4 \n\t"
"j 5f \n\t"
"4: \n\t"
"li %[DC], 0x80 \n\t"
"5: \n\t"
: [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [DC]"=&r"(DC),
[temp2]"=&r"(temp2), [temp3]"=&r"(temp3), [DC1]"=&r"(DC1)
: [left]"r"(left), [top]"r"(top)
: "memory"
);
"beqz %[top], 2f \n\t"
"ulw %[temp0], 0(%[top]) \n\t"
"ulw %[temp1], 4(%[top]) \n\t"
"raddu.w.qb %[temp0], %[temp0] \n\t"
"raddu.w.qb %[temp1], %[temp1] \n\t"
"addu %[DC], %[temp0], %[temp1] \n\t"
"move %[DC1], %[DC] \n\t"
"beqz %[left], 1f \n\t"
"ulw %[temp2], 0(%[left]) \n\t"
"ulw %[temp3], 4(%[left]) \n\t"
"raddu.w.qb %[temp2], %[temp2] \n\t"
"raddu.w.qb %[temp3], %[temp3] \n\t"
"addu %[DC1], %[temp2], %[temp3] \n\t"
"1: \n\t"
"addu %[DC], %[DC], %[DC1] \n\t"
"j 3f \n\t"
"2: \n\t"
"beqz %[left], 4f \n\t"
"ulw %[temp2], 0(%[left]) \n\t"
"ulw %[temp3], 4(%[left]) \n\t"
"raddu.w.qb %[temp2], %[temp2] \n\t"
"raddu.w.qb %[temp3], %[temp3] \n\t"
"addu %[DC], %[temp2], %[temp3] \n\t"
"addu %[DC], %[DC], %[DC] \n\t"
"3: \n\t"
"shra_r.w %[DC], %[DC], 4 \n\t"
"j 5f \n\t"
"4: \n\t"
"li %[DC], 0x80 \n\t"
"5: \n\t"
: [temp0] "=&r"(temp0), [temp1] "=&r"(temp1), [DC] "=&r"(DC),
[temp2] "=&r"(temp2), [temp3] "=&r"(temp3), [DC1] "=&r"(DC1)
: [left] "r"(left), [top] "r"(top)
: "memory");
FILL_8_OR_16(dst, DC, 8);
}
@@ -619,7 +631,7 @@ static void DC4(uint8_t* WEBP_RESTRICT dst, const uint8_t* WEBP_RESTRICT top) {
static void TM4(uint8_t* WEBP_RESTRICT dst, const uint8_t* WEBP_RESTRICT top) {
int a10, a32, temp0, temp1, temp2, temp3, temp4, temp5;
const int c35 = 0xff00ff;
__asm__ volatile (
__asm__ volatile(
"lbu %[temp1], 0(%[top]) \n\t"
"lbu %[a10], 1(%[top]) \n\t"
"lbu %[temp2], 2(%[top]) \n\t"
@@ -790,7 +802,7 @@ static void RD4(uint8_t* WEBP_RESTRICT dst, const uint8_t* WEBP_RESTRICT top) {
static void VR4(uint8_t* WEBP_RESTRICT dst, const uint8_t* WEBP_RESTRICT top) {
int temp0, temp1, temp2, temp3, temp4;
int temp5, temp6, temp7, temp8, temp9;
__asm__ volatile (
__asm__ volatile(
"ulw %[temp0], -4(%[top]) \n\t"
"ulw %[temp1], 0(%[top]) \n\t"
"preceu.ph.qbl %[temp2], %[temp0] \n\t"
@@ -887,7 +899,7 @@ static void LD4(uint8_t* WEBP_RESTRICT dst, const uint8_t* WEBP_RESTRICT top) {
static void VL4(uint8_t* WEBP_RESTRICT dst, const uint8_t* WEBP_RESTRICT top) {
int temp0, temp1, temp2, temp3, temp4;
int temp5, temp6, temp7, temp8, temp9;
__asm__ volatile (
__asm__ volatile(
"ulw %[temp0], 0(%[top]) \n\t"
"ulw %[temp1], 4(%[top]) \n\t"
"preceu.ph.qbla %[temp2], %[temp0] \n\t"
@@ -936,7 +948,7 @@ static void VL4(uint8_t* WEBP_RESTRICT dst, const uint8_t* WEBP_RESTRICT top) {
static void HD4(uint8_t* WEBP_RESTRICT dst, const uint8_t* WEBP_RESTRICT top) {
int temp0, temp1, temp2, temp3, temp4;
int temp5, temp6, temp7, temp8, temp9;
__asm__ volatile (
__asm__ volatile(
"ulw %[temp0], -5(%[top]) \n\t"
"ulw %[temp1], -1(%[top]) \n\t"
"preceu.ph.qbla %[temp2], %[temp0] \n\t"
@@ -983,7 +995,7 @@ static void HD4(uint8_t* WEBP_RESTRICT dst, const uint8_t* WEBP_RESTRICT top) {
static void HU4(uint8_t* WEBP_RESTRICT dst, const uint8_t* WEBP_RESTRICT top) {
int temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7;
__asm__ volatile (
__asm__ volatile(
"ulw %[temp0], -5(%[top]) \n\t"
"preceu.ph.qbl %[temp1], %[temp0] \n\t"
"preceu.ph.qbr %[temp2], %[temp0] \n\t"
@@ -1071,6 +1083,7 @@ static void Intra4Preds_MIPSdspR2(uint8_t* WEBP_RESTRICT dst,
#if !defined(WORK_AROUND_GCC)
// clang-format off
#define GET_SSE_INNER(A) \
"lw %[temp0], " #A "(%[a]) \n\t" \
"lw %[temp1], " #A "(%[b]) \n\t" \
@@ -1082,41 +1095,41 @@ static void Intra4Preds_MIPSdspR2(uint8_t* WEBP_RESTRICT dst,
"subq.ph %[temp0], %[temp0], %[temp1] \n\t" \
"dpa.w.ph $ac0, %[temp2], %[temp2] \n\t" \
"dpa.w.ph $ac0, %[temp0], %[temp0] \n\t"
// clang-format on
#define GET_SSE(A, B, C, D) \
GET_SSE_INNER(A) \
GET_SSE_INNER(B) \
GET_SSE_INNER(C) \
#define GET_SSE(A, B, C, D) \
GET_SSE_INNER(A) \
GET_SSE_INNER(B) \
GET_SSE_INNER(C) \
GET_SSE_INNER(D)
static int SSE16x16_MIPSdspR2(const uint8_t* WEBP_RESTRICT a,
const uint8_t* WEBP_RESTRICT b) {
int count;
int temp0, temp1, temp2, temp3;
__asm__ volatile (
"mult $zero, $zero \n\t"
GET_SSE( 0 * BPS, 4 + 0 * BPS, 8 + 0 * BPS, 12 + 0 * BPS)
GET_SSE( 1 * BPS, 4 + 1 * BPS, 8 + 1 * BPS, 12 + 1 * BPS)
GET_SSE( 2 * BPS, 4 + 2 * BPS, 8 + 2 * BPS, 12 + 2 * BPS)
GET_SSE( 3 * BPS, 4 + 3 * BPS, 8 + 3 * BPS, 12 + 3 * BPS)
GET_SSE( 4 * BPS, 4 + 4 * BPS, 8 + 4 * BPS, 12 + 4 * BPS)
GET_SSE( 5 * BPS, 4 + 5 * BPS, 8 + 5 * BPS, 12 + 5 * BPS)
GET_SSE( 6 * BPS, 4 + 6 * BPS, 8 + 6 * BPS, 12 + 6 * BPS)
GET_SSE( 7 * BPS, 4 + 7 * BPS, 8 + 7 * BPS, 12 + 7 * BPS)
GET_SSE( 8 * BPS, 4 + 8 * BPS, 8 + 8 * BPS, 12 + 8 * BPS)
GET_SSE( 9 * BPS, 4 + 9 * BPS, 8 + 9 * BPS, 12 + 9 * BPS)
GET_SSE(10 * BPS, 4 + 10 * BPS, 8 + 10 * BPS, 12 + 10 * BPS)
GET_SSE(11 * BPS, 4 + 11 * BPS, 8 + 11 * BPS, 12 + 11 * BPS)
GET_SSE(12 * BPS, 4 + 12 * BPS, 8 + 12 * BPS, 12 + 12 * BPS)
GET_SSE(13 * BPS, 4 + 13 * BPS, 8 + 13 * BPS, 12 + 13 * BPS)
GET_SSE(14 * BPS, 4 + 14 * BPS, 8 + 14 * BPS, 12 + 14 * BPS)
GET_SSE(15 * BPS, 4 + 15 * BPS, 8 + 15 * BPS, 12 + 15 * BPS)
"mflo %[count] \n\t"
: [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
[temp3]"=&r"(temp3), [count]"=&r"(count)
: [a]"r"(a), [b]"r"(b)
: "memory", "hi", "lo"
);
__asm__ volatile(
"mult $zero, $zero \n\t" //
GET_SSE(0 * BPS, 4 + 0 * BPS, 8 + 0 * BPS, 12 + 0 * BPS) //
GET_SSE(1 * BPS, 4 + 1 * BPS, 8 + 1 * BPS, 12 + 1 * BPS) //
GET_SSE(2 * BPS, 4 + 2 * BPS, 8 + 2 * BPS, 12 + 2 * BPS) //
GET_SSE(3 * BPS, 4 + 3 * BPS, 8 + 3 * BPS, 12 + 3 * BPS) //
GET_SSE(4 * BPS, 4 + 4 * BPS, 8 + 4 * BPS, 12 + 4 * BPS) //
GET_SSE(5 * BPS, 4 + 5 * BPS, 8 + 5 * BPS, 12 + 5 * BPS) //
GET_SSE(6 * BPS, 4 + 6 * BPS, 8 + 6 * BPS, 12 + 6 * BPS) //
GET_SSE(7 * BPS, 4 + 7 * BPS, 8 + 7 * BPS, 12 + 7 * BPS) //
GET_SSE(8 * BPS, 4 + 8 * BPS, 8 + 8 * BPS, 12 + 8 * BPS) //
GET_SSE(9 * BPS, 4 + 9 * BPS, 8 + 9 * BPS, 12 + 9 * BPS) //
GET_SSE(10 * BPS, 4 + 10 * BPS, 8 + 10 * BPS, 12 + 10 * BPS) //
GET_SSE(11 * BPS, 4 + 11 * BPS, 8 + 11 * BPS, 12 + 11 * BPS) //
GET_SSE(12 * BPS, 4 + 12 * BPS, 8 + 12 * BPS, 12 + 12 * BPS) //
GET_SSE(13 * BPS, 4 + 13 * BPS, 8 + 13 * BPS, 12 + 13 * BPS) //
GET_SSE(14 * BPS, 4 + 14 * BPS, 8 + 14 * BPS, 12 + 14 * BPS) //
GET_SSE(15 * BPS, 4 + 15 * BPS, 8 + 15 * BPS, 12 + 15 * BPS) //
"mflo %[count] \n\t"
: [temp0] "=&r"(temp0), [temp1] "=&r"(temp1), [temp2] "=&r"(temp2),
[temp3] "=&r"(temp3), [count] "=&r"(count)
: [a] "r"(a), [b] "r"(b)
: "memory", "hi", "lo");
return count;
}
@@ -1124,22 +1137,21 @@ static int SSE16x8_MIPSdspR2(const uint8_t* WEBP_RESTRICT a,
const uint8_t* WEBP_RESTRICT b) {
int count;
int temp0, temp1, temp2, temp3;
__asm__ volatile (
"mult $zero, $zero \n\t"
GET_SSE( 0 * BPS, 4 + 0 * BPS, 8 + 0 * BPS, 12 + 0 * BPS)
GET_SSE( 1 * BPS, 4 + 1 * BPS, 8 + 1 * BPS, 12 + 1 * BPS)
GET_SSE( 2 * BPS, 4 + 2 * BPS, 8 + 2 * BPS, 12 + 2 * BPS)
GET_SSE( 3 * BPS, 4 + 3 * BPS, 8 + 3 * BPS, 12 + 3 * BPS)
GET_SSE( 4 * BPS, 4 + 4 * BPS, 8 + 4 * BPS, 12 + 4 * BPS)
GET_SSE( 5 * BPS, 4 + 5 * BPS, 8 + 5 * BPS, 12 + 5 * BPS)
GET_SSE( 6 * BPS, 4 + 6 * BPS, 8 + 6 * BPS, 12 + 6 * BPS)
GET_SSE( 7 * BPS, 4 + 7 * BPS, 8 + 7 * BPS, 12 + 7 * BPS)
"mflo %[count] \n\t"
: [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
[temp3]"=&r"(temp3), [count]"=&r"(count)
: [a]"r"(a), [b]"r"(b)
: "memory", "hi", "lo"
);
__asm__ volatile(
"mult $zero, $zero \n\t" //
GET_SSE(0 * BPS, 4 + 0 * BPS, 8 + 0 * BPS, 12 + 0 * BPS) //
GET_SSE(1 * BPS, 4 + 1 * BPS, 8 + 1 * BPS, 12 + 1 * BPS) //
GET_SSE(2 * BPS, 4 + 2 * BPS, 8 + 2 * BPS, 12 + 2 * BPS) //
GET_SSE(3 * BPS, 4 + 3 * BPS, 8 + 3 * BPS, 12 + 3 * BPS) //
GET_SSE(4 * BPS, 4 + 4 * BPS, 8 + 4 * BPS, 12 + 4 * BPS) //
GET_SSE(5 * BPS, 4 + 5 * BPS, 8 + 5 * BPS, 12 + 5 * BPS) //
GET_SSE(6 * BPS, 4 + 6 * BPS, 8 + 6 * BPS, 12 + 6 * BPS) //
GET_SSE(7 * BPS, 4 + 7 * BPS, 8 + 7 * BPS, 12 + 7 * BPS) //
"mflo %[count] \n\t"
: [temp0] "=&r"(temp0), [temp1] "=&r"(temp1), [temp2] "=&r"(temp2),
[temp3] "=&r"(temp3), [count] "=&r"(count)
: [a] "r"(a), [b] "r"(b)
: "memory", "hi", "lo");
return count;
}
@@ -1147,18 +1159,17 @@ static int SSE8x8_MIPSdspR2(const uint8_t* WEBP_RESTRICT a,
const uint8_t* WEBP_RESTRICT b) {
int count;
int temp0, temp1, temp2, temp3;
__asm__ volatile (
"mult $zero, $zero \n\t"
GET_SSE(0 * BPS, 4 + 0 * BPS, 1 * BPS, 4 + 1 * BPS)
GET_SSE(2 * BPS, 4 + 2 * BPS, 3 * BPS, 4 + 3 * BPS)
GET_SSE(4 * BPS, 4 + 4 * BPS, 5 * BPS, 4 + 5 * BPS)
GET_SSE(6 * BPS, 4 + 6 * BPS, 7 * BPS, 4 + 7 * BPS)
"mflo %[count] \n\t"
: [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
[temp3]"=&r"(temp3), [count]"=&r"(count)
: [a]"r"(a), [b]"r"(b)
: "memory", "hi", "lo"
);
__asm__ volatile(
"mult $zero, $zero \n\t" //
GET_SSE(0 * BPS, 4 + 0 * BPS, 1 * BPS, 4 + 1 * BPS) //
GET_SSE(2 * BPS, 4 + 2 * BPS, 3 * BPS, 4 + 3 * BPS) //
GET_SSE(4 * BPS, 4 + 4 * BPS, 5 * BPS, 4 + 5 * BPS) //
GET_SSE(6 * BPS, 4 + 6 * BPS, 7 * BPS, 4 + 7 * BPS) //
"mflo %[count] \n\t"
: [temp0] "=&r"(temp0), [temp1] "=&r"(temp1), [temp2] "=&r"(temp2),
[temp3] "=&r"(temp3), [count] "=&r"(count)
: [a] "r"(a), [b] "r"(b)
: "memory", "hi", "lo");
return count;
}
@@ -1166,15 +1177,14 @@ static int SSE4x4_MIPSdspR2(const uint8_t* WEBP_RESTRICT a,
const uint8_t* WEBP_RESTRICT b) {
int count;
int temp0, temp1, temp2, temp3;
__asm__ volatile (
"mult $zero, $zero \n\t"
GET_SSE(0 * BPS, 1 * BPS, 2 * BPS, 3 * BPS)
"mflo %[count] \n\t"
: [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
[temp3]"=&r"(temp3), [count]"=&r"(count)
: [a]"r"(a), [b]"r"(b)
: "memory", "hi", "lo"
);
__asm__ volatile(
"mult $zero, $zero \n\t" //
GET_SSE(0 * BPS, 1 * BPS, 2 * BPS, 3 * BPS) //
"mflo %[count] \n\t"
: [temp0] "=&r"(temp0), [temp1] "=&r"(temp1), [temp2] "=&r"(temp2),
[temp3] "=&r"(temp3), [count] "=&r"(count)
: [a] "r"(a), [b] "r"(b)
: "memory", "hi", "lo");
return count;
}
@@ -1200,6 +1210,7 @@ static int SSE4x4_MIPSdspR2(const uint8_t* WEBP_RESTRICT a,
// K - offset in bytes (kZigzag[n] * 4)
// N - offset in bytes (n * 2)
// N1 - offset in bytes ((n + 1) * 2)
// clang-format off
#define QUANTIZE_ONE(J, K, N, N1) \
"ulw %[temp1], " #J "(%[ppin]) \n\t" \
"ulw %[temp2], " #J "(%[ppsharpen]) \n\t" \
@@ -1285,44 +1296,42 @@ static int SSE4x4_MIPSdspR2(const uint8_t* WEBP_RESTRICT a,
"sh $0, " #N1 "(%[pout]) \n\t" \
"usw $0, " #J "(%[ppin]) \n\t" \
"3: \n\t"
// clang-format on
static int QuantizeBlock_MIPSdspR2(int16_t in[16], int16_t out[16],
const VP8Matrix* WEBP_RESTRICT const mtx) {
int temp0, temp1, temp2, temp3, temp4, temp5,temp6;
int temp0, temp1, temp2, temp3, temp4, temp5, temp6;
int sign, coeff, level;
int max_level = MAX_LEVEL;
int max_level1 = max_level << 16 | max_level;
int ret = 0;
int16_t* ppin = &in[0];
int16_t* pout = &out[0];
int16_t* ppin = &in[0];
int16_t* pout = &out[0];
const uint16_t* ppsharpen = &mtx->sharpen[0];
const uint32_t* ppzthresh = &mtx->zthresh[0];
const uint16_t* ppq = &mtx->q[0];
const uint16_t* ppiq = &mtx->iq[0];
const uint32_t* ppbias = &mtx->bias[0];
const uint16_t* ppq = &mtx->q[0];
const uint16_t* ppiq = &mtx->iq[0];
const uint32_t* ppbias = &mtx->bias[0];
__asm__ volatile (
QUANTIZE_ONE( 0, 0, 0, 2)
QUANTIZE_ONE( 4, 8, 10, 12)
QUANTIZE_ONE( 8, 16, 4, 8)
QUANTIZE_ONE(12, 24, 14, 24)
QUANTIZE_ONE(16, 32, 6, 16)
QUANTIZE_ONE(20, 40, 22, 26)
QUANTIZE_ONE(24, 48, 18, 20)
QUANTIZE_ONE(28, 56, 28, 30)
__asm__ volatile(
QUANTIZE_ONE(0, 0, 0, 2) //
QUANTIZE_ONE(4, 8, 10, 12) //
QUANTIZE_ONE(8, 16, 4, 8) //
QUANTIZE_ONE(12, 24, 14, 24) //
QUANTIZE_ONE(16, 32, 6, 16) //
QUANTIZE_ONE(20, 40, 22, 26) //
QUANTIZE_ONE(24, 48, 18, 20) //
QUANTIZE_ONE(28, 56, 28, 30) //
: [temp0]"=&r"(temp0), [temp1]"=&r"(temp1),
[temp2]"=&r"(temp2), [temp3]"=&r"(temp3),
[temp4]"=&r"(temp4), [temp5]"=&r"(temp5),
[sign]"=&r"(sign), [coeff]"=&r"(coeff),
[level]"=&r"(level), [temp6]"=&r"(temp6), [ret]"+&r"(ret)
: [ppin]"r"(ppin), [pout]"r"(pout), [max_level1]"r"(max_level1),
[ppiq]"r"(ppiq), [max_level]"r"(max_level),
[ppbias]"r"(ppbias), [ppzthresh]"r"(ppzthresh),
[ppsharpen]"r"(ppsharpen), [ppq]"r"(ppq)
: "memory", "hi", "lo"
);
: [temp0] "=&r"(temp0), [temp1] "=&r"(temp1), [temp2] "=&r"(temp2),
[temp3] "=&r"(temp3), [temp4] "=&r"(temp4), [temp5] "=&r"(temp5),
[sign] "=&r"(sign), [coeff] "=&r"(coeff), [level] "=&r"(level),
[temp6] "=&r"(temp6), [ret] "+&r"(ret)
: [ppin] "r"(ppin), [pout] "r"(pout), [max_level1] "r"(max_level1),
[ppiq] "r"(ppiq), [max_level] "r"(max_level), [ppbias] "r"(ppbias),
[ppzthresh] "r"(ppzthresh), [ppsharpen] "r"(ppsharpen), [ppq] "r"(ppq)
: "memory", "hi", "lo");
return (ret != 0);
}
@@ -1330,7 +1339,7 @@ static int QuantizeBlock_MIPSdspR2(int16_t in[16], int16_t out[16],
static int Quantize2Blocks_MIPSdspR2(int16_t in[32], int16_t out[32],
const VP8Matrix* WEBP_RESTRICT const mtx) {
int nz;
nz = QuantizeBlock_MIPSdspR2(in + 0 * 16, out + 0 * 16, mtx) << 0;
nz = QuantizeBlock_MIPSdspR2(in + 0 * 16, out + 0 * 16, mtx) << 0;
nz |= QuantizeBlock_MIPSdspR2(in + 1 * 16, out + 1 * 16, mtx) << 1;
return nz;
}
@@ -1341,6 +1350,7 @@ static int Quantize2Blocks_MIPSdspR2(int16_t in[32], int16_t out[32],
// temp0..temp7 holds tmp[0]..tmp[15]
// A, B, C, D - offset in bytes to load from in buffer
// TEMP0, TEMP1 - registers for corresponding tmp elements
// clang-format off
#define HORIZONTAL_PASS_WHT(A, B, C, D, TEMP0, TEMP1) \
"lh %[" #TEMP0 "], " #A "(%[in]) \n\t" \
"lh %[" #TEMP1 "], " #B "(%[in]) \n\t" \
@@ -1373,26 +1383,26 @@ static int Quantize2Blocks_MIPSdspR2(int16_t in[32], int16_t out[32],
"usw %[" #TEMP2 "], " #B "(%[out]) \n\t" \
"usw %[" #TEMP4 "], " #C "(%[out]) \n\t" \
"usw %[" #TEMP6 "], " #D "(%[out]) \n\t"
// clang-format on
static void FTransformWHT_MIPSdspR2(const int16_t* WEBP_RESTRICT in,
int16_t* WEBP_RESTRICT out) {
int temp0, temp1, temp2, temp3, temp4;
int temp5, temp6, temp7, temp8, temp9;
__asm__ volatile (
HORIZONTAL_PASS_WHT( 0, 32, 64, 96, temp0, temp1)
HORIZONTAL_PASS_WHT(128, 160, 192, 224, temp2, temp3)
HORIZONTAL_PASS_WHT(256, 288, 320, 352, temp4, temp5)
HORIZONTAL_PASS_WHT(384, 416, 448, 480, temp6, temp7)
VERTICAL_PASS_WHT(0, 8, 16, 24, temp0, temp2, temp4, temp6)
VERTICAL_PASS_WHT(4, 12, 20, 28, temp1, temp3, temp5, temp7)
: [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
[temp3]"=&r"(temp3), [temp4]"=&r"(temp4), [temp5]"=&r"(temp5),
[temp6]"=&r"(temp6), [temp7]"=&r"(temp7), [temp8]"=&r"(temp8),
[temp9]"=&r"(temp9)
: [in]"r"(in), [out]"r"(out)
: "memory"
);
__asm__ volatile(
HORIZONTAL_PASS_WHT(0, 32, 64, 96, temp0, temp1) //
HORIZONTAL_PASS_WHT(128, 160, 192, 224, temp2, temp3) //
HORIZONTAL_PASS_WHT(256, 288, 320, 352, temp4, temp5) //
HORIZONTAL_PASS_WHT(384, 416, 448, 480, temp6, temp7) //
VERTICAL_PASS_WHT(0, 8, 16, 24, temp0, temp2, temp4, temp6) //
VERTICAL_PASS_WHT(4, 12, 20, 28, temp1, temp3, temp5, temp7) //
: [temp0] "=&r"(temp0), [temp1] "=&r"(temp1), [temp2] "=&r"(temp2),
[temp3] "=&r"(temp3), [temp4] "=&r"(temp4), [temp5] "=&r"(temp5),
[temp6] "=&r"(temp6), [temp7] "=&r"(temp7), [temp8] "=&r"(temp8),
[temp9] "=&r"(temp9)
: [in] "r"(in), [out] "r"(out)
: "memory");
}
#undef VERTICAL_PASS_WHT
@@ -1401,6 +1411,7 @@ static void FTransformWHT_MIPSdspR2(const int16_t* WEBP_RESTRICT in,
// macro for converting coefficients to bin
// convert 8 coeffs at time
// A, B, C, D - offsets in bytes to load from out buffer
// clang-format off
#define CONVERT_COEFFS_TO_BIN(A, B, C, D) \
"ulw %[temp0], " #A "(%[out]) \n\t" \
"ulw %[temp1], " #B "(%[out]) \n\t" \
@@ -1466,12 +1477,13 @@ static void FTransformWHT_MIPSdspR2(const int16_t* WEBP_RESTRICT in,
"lw %[temp8], 0(%[temp3]) \n\t" \
"addiu %[temp8], %[temp8], 1 \n\t" \
"sw %[temp8], 0(%[temp3]) \n\t"
// clang-format on
static void CollectHistogram_MIPSdspR2(const uint8_t* ref, const uint8_t* pred,
int start_block, int end_block,
VP8Histogram* const histo) {
int j;
int distribution[MAX_COEFF_THRESH + 1] = { 0 };
int distribution[MAX_COEFF_THRESH + 1] = {0};
const int max_coeff = (MAX_COEFF_THRESH << 16) + MAX_COEFF_THRESH;
for (j = start_block; j < end_block; ++j) {
int16_t out[16];
@@ -1480,15 +1492,14 @@ static void CollectHistogram_MIPSdspR2(const uint8_t* ref, const uint8_t* pred,
VP8FTransform(ref + VP8DspScan[j], pred + VP8DspScan[j], out);
// Convert coefficients to bin.
__asm__ volatile (
CONVERT_COEFFS_TO_BIN( 0, 4, 8, 12)
CONVERT_COEFFS_TO_BIN(16, 20, 24, 28)
: [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
[temp3]"=&r"(temp3), [temp4]"=&r"(temp4), [temp5]"=&r"(temp5),
[temp6]"=&r"(temp6), [temp7]"=&r"(temp7), [temp8]"=&r"(temp8)
: [dist]"r"(distribution), [out]"r"(out), [max_coeff]"r"(max_coeff)
: "memory"
);
__asm__ volatile(
CONVERT_COEFFS_TO_BIN(0, 4, 8, 12) //
CONVERT_COEFFS_TO_BIN(16, 20, 24, 28) //
: [temp0] "=&r"(temp0), [temp1] "=&r"(temp1), [temp2] "=&r"(temp2),
[temp3] "=&r"(temp3), [temp4] "=&r"(temp4), [temp5] "=&r"(temp5),
[temp6] "=&r"(temp6), [temp7] "=&r"(temp7), [temp8] "=&r"(temp8)
: [dist] "r"(distribution), [out] "r"(out), [max_coeff] "r"(max_coeff)
: "memory");
}
VP8SetHistogramData(distribution, histo);
}

View File

@@ -16,30 +16,32 @@
#if defined(WEBP_USE_MSA)
#include <stdlib.h>
#include "src/dsp/msa_macro.h"
#include "src/enc/vp8i_enc.h"
//------------------------------------------------------------------------------
// Transforms
#define IDCT_1D_W(in0, in1, in2, in3, out0, out1, out2, out3) do { \
v4i32 a1_m, b1_m, c1_m, d1_m; \
const v4i32 cospi8sqrt2minus1 = __msa_fill_w(20091); \
const v4i32 sinpi8sqrt2 = __msa_fill_w(35468); \
v4i32 c_tmp1_m = in1 * sinpi8sqrt2; \
v4i32 c_tmp2_m = in3 * cospi8sqrt2minus1; \
v4i32 d_tmp1_m = in1 * cospi8sqrt2minus1; \
v4i32 d_tmp2_m = in3 * sinpi8sqrt2; \
\
ADDSUB2(in0, in2, a1_m, b1_m); \
SRAI_W2_SW(c_tmp1_m, c_tmp2_m, 16); \
c_tmp2_m = c_tmp2_m + in3; \
c1_m = c_tmp1_m - c_tmp2_m; \
SRAI_W2_SW(d_tmp1_m, d_tmp2_m, 16); \
d_tmp1_m = d_tmp1_m + in1; \
d1_m = d_tmp1_m + d_tmp2_m; \
BUTTERFLY_4(a1_m, b1_m, c1_m, d1_m, out0, out1, out2, out3); \
} while (0)
#define IDCT_1D_W(in0, in1, in2, in3, out0, out1, out2, out3) \
do { \
v4i32 a1_m, b1_m, c1_m, d1_m; \
const v4i32 cospi8sqrt2minus1 = __msa_fill_w(20091); \
const v4i32 sinpi8sqrt2 = __msa_fill_w(35468); \
v4i32 c_tmp1_m = in1 * sinpi8sqrt2; \
v4i32 c_tmp2_m = in3 * cospi8sqrt2minus1; \
v4i32 d_tmp1_m = in1 * cospi8sqrt2minus1; \
v4i32 d_tmp2_m = in3 * sinpi8sqrt2; \
\
ADDSUB2(in0, in2, a1_m, b1_m); \
SRAI_W2_SW(c_tmp1_m, c_tmp2_m, 16); \
c_tmp2_m = c_tmp2_m + in3; \
c1_m = c_tmp1_m - c_tmp2_m; \
SRAI_W2_SW(d_tmp1_m, d_tmp2_m, 16); \
d_tmp1_m = d_tmp1_m + in1; \
d1_m = d_tmp1_m + d_tmp2_m; \
BUTTERFLY_4(a1_m, b1_m, c1_m, d1_m, out0, out1, out2, out3); \
} while (0)
static WEBP_INLINE void ITransformOne(const uint8_t* WEBP_RESTRICT ref,
const int16_t* WEBP_RESTRICT in,
@@ -48,7 +50,7 @@ static WEBP_INLINE void ITransformOne(const uint8_t* WEBP_RESTRICT ref,
v4i32 in0, in1, in2, in3, hz0, hz1, hz2, hz3, vt0, vt1, vt2, vt3;
v4i32 res0, res1, res2, res3;
v16i8 dest0, dest1, dest2, dest3;
const v16i8 zero = { 0 };
const v16i8 zero = {0};
LD_SH2(in, 8, input0, input1);
UNPCK_SH_SW(input0, in0, in1);
@@ -59,10 +61,10 @@ static WEBP_INLINE void ITransformOne(const uint8_t* WEBP_RESTRICT ref,
SRARI_W4_SW(vt0, vt1, vt2, vt3, 3);
TRANSPOSE4x4_SW_SW(vt0, vt1, vt2, vt3, vt0, vt1, vt2, vt3);
LD_SB4(ref, BPS, dest0, dest1, dest2, dest3);
ILVR_B4_SW(zero, dest0, zero, dest1, zero, dest2, zero, dest3,
res0, res1, res2, res3);
ILVR_H4_SW(zero, res0, zero, res1, zero, res2, zero, res3,
res0, res1, res2, res3);
ILVR_B4_SW(zero, dest0, zero, dest1, zero, dest2, zero, dest3, res0, res1,
res2, res3);
ILVR_H4_SW(zero, res0, zero, res1, zero, res2, zero, res3, res0, res1, res2,
res3);
ADD4(res0, vt0, res1, vt1, res2, vt2, res3, vt3, res0, res1, res2, res3);
CLIP_SW4_0_255(res0, res1, res2, res3);
PCKEV_B2_SW(res0, res1, res2, res3, vt0, vt1);
@@ -86,13 +88,13 @@ static void FTransform_MSA(const uint8_t* WEBP_RESTRICT src,
uint32_t in0, in1, in2, in3;
v4i32 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5;
v8i16 t0, t1, t2, t3;
v16u8 srcl0, srcl1, src0 = { 0 }, src1 = { 0 };
const v8i16 mask0 = { 0, 4, 8, 12, 1, 5, 9, 13 };
const v8i16 mask1 = { 3, 7, 11, 15, 2, 6, 10, 14 };
const v8i16 mask2 = { 4, 0, 5, 1, 6, 2, 7, 3 };
const v8i16 mask3 = { 0, 4, 1, 5, 2, 6, 3, 7 };
const v8i16 cnst0 = { 2217, -5352, 2217, -5352, 2217, -5352, 2217, -5352 };
const v8i16 cnst1 = { 5352, 2217, 5352, 2217, 5352, 2217, 5352, 2217 };
v16u8 srcl0, srcl1, src0 = {0}, src1 = {0};
const v8i16 mask0 = {0, 4, 8, 12, 1, 5, 9, 13};
const v8i16 mask1 = {3, 7, 11, 15, 2, 6, 10, 14};
const v8i16 mask2 = {4, 0, 5, 1, 6, 2, 7, 3};
const v8i16 mask3 = {0, 4, 1, 5, 2, 6, 3, 7};
const v8i16 cnst0 = {2217, -5352, 2217, -5352, 2217, -5352, 2217, -5352};
const v8i16 cnst1 = {5352, 2217, 5352, 2217, 5352, 2217, 5352, 2217};
LW4(src, BPS, in0, in1, in2, in3);
INSERT_W4_UB(in0, in1, in2, in3, src0);
@@ -136,29 +138,29 @@ static void FTransform_MSA(const uint8_t* WEBP_RESTRICT src,
static void FTransformWHT_MSA(const int16_t* WEBP_RESTRICT in,
int16_t* WEBP_RESTRICT out) {
v8i16 in0 = { 0 };
v8i16 in1 = { 0 };
v8i16 in0 = {0};
v8i16 in1 = {0};
v8i16 tmp0, tmp1, tmp2, tmp3;
v8i16 out0, out1;
const v8i16 mask0 = { 0, 1, 2, 3, 8, 9, 10, 11 };
const v8i16 mask1 = { 4, 5, 6, 7, 12, 13, 14, 15 };
const v8i16 mask2 = { 0, 4, 8, 12, 1, 5, 9, 13 };
const v8i16 mask3 = { 3, 7, 11, 15, 2, 6, 10, 14 };
const v8i16 mask0 = {0, 1, 2, 3, 8, 9, 10, 11};
const v8i16 mask1 = {4, 5, 6, 7, 12, 13, 14, 15};
const v8i16 mask2 = {0, 4, 8, 12, 1, 5, 9, 13};
const v8i16 mask3 = {3, 7, 11, 15, 2, 6, 10, 14};
in0 = __msa_insert_h(in0, 0, in[ 0]);
in0 = __msa_insert_h(in0, 1, in[ 64]);
in0 = __msa_insert_h(in0, 0, in[0]);
in0 = __msa_insert_h(in0, 1, in[64]);
in0 = __msa_insert_h(in0, 2, in[128]);
in0 = __msa_insert_h(in0, 3, in[192]);
in0 = __msa_insert_h(in0, 4, in[ 16]);
in0 = __msa_insert_h(in0, 5, in[ 80]);
in0 = __msa_insert_h(in0, 4, in[16]);
in0 = __msa_insert_h(in0, 5, in[80]);
in0 = __msa_insert_h(in0, 6, in[144]);
in0 = __msa_insert_h(in0, 7, in[208]);
in1 = __msa_insert_h(in1, 0, in[ 48]);
in1 = __msa_insert_h(in1, 0, in[48]);
in1 = __msa_insert_h(in1, 1, in[112]);
in1 = __msa_insert_h(in1, 2, in[176]);
in1 = __msa_insert_h(in1, 3, in[240]);
in1 = __msa_insert_h(in1, 4, in[ 32]);
in1 = __msa_insert_h(in1, 5, in[ 96]);
in1 = __msa_insert_h(in1, 4, in[32]);
in1 = __msa_insert_h(in1, 5, in[96]);
in1 = __msa_insert_h(in1, 6, in[160]);
in1 = __msa_insert_h(in1, 7, in[224]);
ADDSUB2(in0, in1, tmp0, tmp1);
@@ -176,14 +178,14 @@ static int TTransform_MSA(const uint8_t* WEBP_RESTRICT in,
const uint16_t* WEBP_RESTRICT w) {
int sum;
uint32_t in0_m, in1_m, in2_m, in3_m;
v16i8 src0 = { 0 };
v16i8 src0 = {0};
v8i16 in0, in1, tmp0, tmp1, tmp2, tmp3;
v4i32 dst0, dst1;
const v16i8 zero = { 0 };
const v8i16 mask0 = { 0, 1, 2, 3, 8, 9, 10, 11 };
const v8i16 mask1 = { 4, 5, 6, 7, 12, 13, 14, 15 };
const v8i16 mask2 = { 0, 4, 8, 12, 1, 5, 9, 13 };
const v8i16 mask3 = { 3, 7, 11, 15, 2, 6, 10, 14 };
const v16i8 zero = {0};
const v8i16 mask0 = {0, 1, 2, 3, 8, 9, 10, 11};
const v8i16 mask1 = {4, 5, 6, 7, 12, 13, 14, 15};
const v8i16 mask2 = {0, 4, 8, 12, 1, 5, 9, 13};
const v8i16 mask3 = {3, 7, 11, 15, 2, 6, 10, 14};
LW4(in, BPS, in0_m, in1_m, in2_m, in3_m);
INSERT_W4_SB(in0_m, in1_m, in2_m, in3_m, src0);
@@ -233,14 +235,14 @@ static void CollectHistogram_MSA(const uint8_t* ref, const uint8_t* pred,
int start_block, int end_block,
VP8Histogram* const histo) {
int j;
int distribution[MAX_COEFF_THRESH + 1] = { 0 };
int distribution[MAX_COEFF_THRESH + 1] = {0};
for (j = start_block; j < end_block; ++j) {
int16_t out[16];
VP8FTransform(ref + VP8DspScan[j], pred + VP8DspScan[j], out);
{
int k;
v8i16 coeff0, coeff1;
const v8i16 zero = { 0 };
const v8i16 zero = {0};
const v8i16 max_coeff_thr = __msa_ldi_h(MAX_COEFF_THRESH);
LD_SH2(&out[0], 8, coeff0, coeff1);
coeff0 = __msa_add_a_h(coeff0, zero);
@@ -269,7 +271,7 @@ static void CollectHistogram_MSA(const uint8_t* ref, const uint8_t* pred,
// vertical
static WEBP_INLINE void VE4(uint8_t* WEBP_RESTRICT dst,
const uint8_t* WEBP_RESTRICT top) {
const v16u8 A1 = { 0 };
const v16u8 A1 = {0};
const uint64_t val_m = LD(top - 1);
const v16u8 A = (v16u8)__msa_insert_d((v2i64)A1, 0, val_m);
const v16u8 B = SLDI_UB(A, A, 1);
@@ -307,7 +309,7 @@ static WEBP_INLINE void DC4(uint8_t* WEBP_RESTRICT dst,
static WEBP_INLINE void RD4(uint8_t* WEBP_RESTRICT dst,
const uint8_t* WEBP_RESTRICT top) {
const v16u8 A2 = { 0 };
const v16u8 A2 = {0};
const uint64_t val_m = LD(top - 5);
const v16u8 A1 = (v16u8)__msa_insert_d((v2i64)A2, 0, val_m);
const v16u8 A = (v16u8)__msa_insert_b((v16i8)A1, 8, top[3]);
@@ -328,7 +330,7 @@ static WEBP_INLINE void RD4(uint8_t* WEBP_RESTRICT dst,
static WEBP_INLINE void LD4(uint8_t* WEBP_RESTRICT dst,
const uint8_t* WEBP_RESTRICT top) {
const v16u8 A1 = { 0 };
const v16u8 A1 = {0};
const uint64_t val_m = LD(top);
const v16u8 A = (v16u8)__msa_insert_d((v2i64)A1, 0, val_m);
const v16u8 B = SLDI_UB(A, A, 1);
@@ -360,13 +362,13 @@ static WEBP_INLINE void VR4(uint8_t* WEBP_RESTRICT dst,
DST(0, 0) = DST(1, 2) = AVG2(X, A);
DST(1, 0) = DST(2, 2) = AVG2(A, B);
DST(2, 0) = DST(3, 2) = AVG2(B, C);
DST(3, 0) = AVG2(C, D);
DST(0, 3) = AVG3(K, J, I);
DST(0, 2) = AVG3(J, I, X);
DST(3, 0) = AVG2(C, D);
DST(0, 3) = AVG3(K, J, I);
DST(0, 2) = AVG3(J, I, X);
DST(0, 1) = DST(1, 3) = AVG3(I, X, A);
DST(1, 1) = DST(2, 3) = AVG3(X, A, B);
DST(2, 1) = DST(3, 3) = AVG3(A, B, C);
DST(3, 1) = AVG3(B, C, D);
DST(3, 1) = AVG3(B, C, D);
}
static WEBP_INLINE void VL4(uint8_t* WEBP_RESTRICT dst,
@@ -379,16 +381,16 @@ static WEBP_INLINE void VL4(uint8_t* WEBP_RESTRICT dst,
const int F = top[5];
const int G = top[6];
const int H = top[7];
DST(0, 0) = AVG2(A, B);
DST(0, 0) = AVG2(A, B);
DST(1, 0) = DST(0, 2) = AVG2(B, C);
DST(2, 0) = DST(1, 2) = AVG2(C, D);
DST(3, 0) = DST(2, 2) = AVG2(D, E);
DST(0, 1) = AVG3(A, B, C);
DST(0, 1) = AVG3(A, B, C);
DST(1, 1) = DST(0, 3) = AVG3(B, C, D);
DST(2, 1) = DST(1, 3) = AVG3(C, D, E);
DST(3, 1) = DST(2, 3) = AVG3(D, E, F);
DST(3, 2) = AVG3(E, F, G);
DST(3, 3) = AVG3(F, G, H);
DST(3, 2) = AVG3(E, F, G);
DST(3, 3) = AVG3(F, G, H);
}
static WEBP_INLINE void HU4(uint8_t* WEBP_RESTRICT dst,
@@ -397,14 +399,13 @@ static WEBP_INLINE void HU4(uint8_t* WEBP_RESTRICT dst,
const int J = top[-3];
const int K = top[-4];
const int L = top[-5];
DST(0, 0) = AVG2(I, J);
DST(0, 0) = AVG2(I, J);
DST(2, 0) = DST(0, 1) = AVG2(J, K);
DST(2, 1) = DST(0, 2) = AVG2(K, L);
DST(1, 0) = AVG3(I, J, K);
DST(1, 0) = AVG3(I, J, K);
DST(3, 0) = DST(1, 1) = AVG3(J, K, L);
DST(3, 1) = DST(1, 2) = AVG3(K, L, L);
DST(3, 2) = DST(2, 2) =
DST(0, 3) = DST(1, 3) = DST(2, 3) = DST(3, 3) = L;
DST(3, 2) = DST(2, 2) = DST(0, 3) = DST(1, 3) = DST(2, 3) = DST(3, 3) = L;
}
static WEBP_INLINE void HD4(uint8_t* WEBP_RESTRICT dst,
@@ -420,25 +421,25 @@ static WEBP_INLINE void HD4(uint8_t* WEBP_RESTRICT dst,
DST(0, 0) = DST(2, 1) = AVG2(I, X);
DST(0, 1) = DST(2, 2) = AVG2(J, I);
DST(0, 2) = DST(2, 3) = AVG2(K, J);
DST(0, 3) = AVG2(L, K);
DST(3, 0) = AVG3(A, B, C);
DST(2, 0) = AVG3(X, A, B);
DST(0, 3) = AVG2(L, K);
DST(3, 0) = AVG3(A, B, C);
DST(2, 0) = AVG3(X, A, B);
DST(1, 0) = DST(3, 1) = AVG3(I, X, A);
DST(1, 1) = DST(3, 2) = AVG3(J, I, X);
DST(1, 2) = DST(3, 3) = AVG3(K, J, I);
DST(1, 3) = AVG3(L, K, J);
DST(1, 3) = AVG3(L, K, J);
}
static WEBP_INLINE void TM4(uint8_t* WEBP_RESTRICT dst,
const uint8_t* WEBP_RESTRICT top) {
const v16i8 zero = { 0 };
const v16i8 zero = {0};
const v8i16 TL = (v8i16)__msa_fill_h(top[-1]);
const v8i16 L0 = (v8i16)__msa_fill_h(top[-2]);
const v8i16 L1 = (v8i16)__msa_fill_h(top[-3]);
const v8i16 L2 = (v8i16)__msa_fill_h(top[-4]);
const v8i16 L3 = (v8i16)__msa_fill_h(top[-5]);
const v16u8 T1 = LD_UB(top);
const v8i16 T = (v8i16)__msa_ilvr_b(zero, (v16i8)T1);
const v8i16 T = (v8i16)__msa_ilvr_b(zero, (v16i8)T1);
const v8i16 d = T - TL;
v8i16 r0, r1, r2, r3;
ADD4(d, L0, d, L1, d, L2, d, L3, r0, r1, r2, r3);
@@ -466,10 +467,11 @@ static void Intra4Preds_MSA(uint8_t* WEBP_RESTRICT dst,
// luma 16x16 prediction
#define STORE16x16(out, dst) do { \
ST_UB8(out, out, out, out, out, out, out, out, dst + 0 * BPS, BPS); \
ST_UB8(out, out, out, out, out, out, out, out, dst + 8 * BPS, BPS); \
} while (0)
#define STORE16x16(out, dst) \
do { \
ST_UB8(out, out, out, out, out, out, out, out, dst + 0 * BPS, BPS); \
ST_UB8(out, out, out, out, out, out, out, out, dst + 8 * BPS, BPS); \
} while (0)
static WEBP_INLINE void VerticalPred16x16(uint8_t* WEBP_RESTRICT dst,
const uint8_t* WEBP_RESTRICT top) {
@@ -508,7 +510,7 @@ static WEBP_INLINE void TrueMotion16x16(uint8_t* WEBP_RESTRICT dst,
if (top != NULL) {
int j;
v8i16 d1, d2;
const v16i8 zero = { 0 };
const v16i8 zero = {0};
const v8i16 TL = (v8i16)__msa_fill_h(left[-1]);
const v16u8 T = LD_UB(top);
ILVRL_B2_SH(zero, T, d1, d2);
@@ -554,17 +556,17 @@ static WEBP_INLINE void DCMode16x16(uint8_t* WEBP_RESTRICT dst,
const v8u16 dctemp = dctop + dcleft;
DC = HADD_UH_U32(dctemp);
DC = (DC + 16) >> 5;
} else if (left != NULL) { // left but no top
} else if (left != NULL) { // left but no top
const v16u8 rleft = LD_UB(left);
const v8u16 dcleft = __msa_hadd_u_h(rleft, rleft);
DC = HADD_UH_U32(dcleft);
DC = (DC + DC + 16) >> 5;
} else if (top != NULL) { // top but no left
} else if (top != NULL) { // top but no left
const v16u8 rtop = LD_UB(top);
const v8u16 dctop = __msa_hadd_u_h(rtop, rtop);
DC = HADD_UH_U32(dctop);
DC = (DC + DC + 16) >> 5;
} else { // no top, no left, nothing.
} else { // no top, no left, nothing.
DC = 0x80;
}
out = (v16u8)__msa_fill_b(DC);
@@ -582,21 +584,23 @@ static void Intra16Preds_MSA(uint8_t* WEBP_RESTRICT dst,
// Chroma 8x8 prediction
#define CALC_DC8(in, out) do { \
const v8u16 temp0 = __msa_hadd_u_h(in, in); \
const v4u32 temp1 = __msa_hadd_u_w(temp0, temp0); \
const v2i64 temp2 = (v2i64)__msa_hadd_u_d(temp1, temp1); \
const v2i64 temp3 = __msa_splati_d(temp2, 1); \
const v2i64 temp4 = temp3 + temp2; \
const v16i8 temp5 = (v16i8)__msa_srari_d(temp4, 4); \
const v2i64 temp6 = (v2i64)__msa_splati_b(temp5, 0); \
out = __msa_copy_s_d(temp6, 0); \
} while (0)
#define CALC_DC8(in, out) \
do { \
const v8u16 temp0 = __msa_hadd_u_h(in, in); \
const v4u32 temp1 = __msa_hadd_u_w(temp0, temp0); \
const v2i64 temp2 = (v2i64)__msa_hadd_u_d(temp1, temp1); \
const v2i64 temp3 = __msa_splati_d(temp2, 1); \
const v2i64 temp4 = temp3 + temp2; \
const v16i8 temp5 = (v16i8)__msa_srari_d(temp4, 4); \
const v2i64 temp6 = (v2i64)__msa_splati_b(temp5, 0); \
out = __msa_copy_s_d(temp6, 0); \
} while (0)
#define STORE8x8(out, dst) do { \
SD4(out, out, out, out, dst + 0 * BPS, BPS); \
SD4(out, out, out, out, dst + 4 * BPS, BPS); \
} while (0)
#define STORE8x8(out, dst) \
do { \
SD4(out, out, out, out, dst + 0 * BPS, BPS); \
SD4(out, out, out, out, dst + 4 * BPS, BPS); \
} while (0)
static WEBP_INLINE void VerticalPred8x8(uint8_t* WEBP_RESTRICT dst,
const uint8_t* WEBP_RESTRICT top) {
@@ -640,8 +644,8 @@ static WEBP_INLINE void TrueMotion8x8(uint8_t* WEBP_RESTRICT dst,
int j;
const v8i16 TL = (v8i16)__msa_fill_h(left[-1]);
const v16u8 T1 = LD_UB(top);
const v16i8 zero = { 0 };
const v8i16 T = (v8i16)__msa_ilvr_b(zero, (v16i8)T1);
const v16i8 zero = {0};
const v8i16 T = (v8i16)__msa_ilvr_b(zero, (v16i8)T1);
const v8i16 d = T - TL;
for (j = 0; j < 8; j += 4) {
uint64_t out0, out1, out2, out3;
@@ -677,21 +681,21 @@ static WEBP_INLINE void DCMode8x8(uint8_t* WEBP_RESTRICT dst,
const uint8_t* WEBP_RESTRICT left,
const uint8_t* WEBP_RESTRICT top) {
uint64_t out;
v16u8 src = { 0 };
v16u8 src = {0};
if (top != NULL && left != NULL) {
const uint64_t left_m = LD(left);
const uint64_t top_m = LD(top);
INSERT_D2_UB(left_m, top_m, src);
CALC_DC8(src, out);
} else if (left != NULL) { // left but no top
} else if (left != NULL) { // left but no top
const uint64_t left_m = LD(left);
INSERT_D2_UB(left_m, left_m, src);
CALC_DC8(src, out);
} else if (top != NULL) { // top but no left
} else if (top != NULL) { // top but no left
const uint64_t top_m = LD(top);
INSERT_D2_UB(top_m, top_m, src);
CALC_DC8(src, out);
} else { // no top, no left, nothing.
} else { // no top, no left, nothing.
src = (v16u8)__msa_fill_b(0x80);
out = __msa_copy_s_d((v2i64)src, 0);
}
@@ -719,27 +723,29 @@ static void IntraChromaPreds_MSA(uint8_t* WEBP_RESTRICT dst,
//------------------------------------------------------------------------------
// Metric
#define PACK_DOTP_UB4_SW(in0, in1, in2, in3, out0, out1, out2, out3) do { \
v16u8 tmp0, tmp1; \
v8i16 tmp2, tmp3; \
ILVRL_B2_UB(in0, in1, tmp0, tmp1); \
HSUB_UB2_SH(tmp0, tmp1, tmp2, tmp3); \
DOTP_SH2_SW(tmp2, tmp3, tmp2, tmp3, out0, out1); \
ILVRL_B2_UB(in2, in3, tmp0, tmp1); \
HSUB_UB2_SH(tmp0, tmp1, tmp2, tmp3); \
DOTP_SH2_SW(tmp2, tmp3, tmp2, tmp3, out2, out3); \
} while (0)
#define PACK_DOTP_UB4_SW(in0, in1, in2, in3, out0, out1, out2, out3) \
do { \
v16u8 tmp0, tmp1; \
v8i16 tmp2, tmp3; \
ILVRL_B2_UB(in0, in1, tmp0, tmp1); \
HSUB_UB2_SH(tmp0, tmp1, tmp2, tmp3); \
DOTP_SH2_SW(tmp2, tmp3, tmp2, tmp3, out0, out1); \
ILVRL_B2_UB(in2, in3, tmp0, tmp1); \
HSUB_UB2_SH(tmp0, tmp1, tmp2, tmp3); \
DOTP_SH2_SW(tmp2, tmp3, tmp2, tmp3, out2, out3); \
} while (0)
#define PACK_DPADD_UB4_SW(in0, in1, in2, in3, out0, out1, out2, out3) do { \
v16u8 tmp0, tmp1; \
v8i16 tmp2, tmp3; \
ILVRL_B2_UB(in0, in1, tmp0, tmp1); \
HSUB_UB2_SH(tmp0, tmp1, tmp2, tmp3); \
DPADD_SH2_SW(tmp2, tmp3, tmp2, tmp3, out0, out1); \
ILVRL_B2_UB(in2, in3, tmp0, tmp1); \
HSUB_UB2_SH(tmp0, tmp1, tmp2, tmp3); \
DPADD_SH2_SW(tmp2, tmp3, tmp2, tmp3, out2, out3); \
} while (0)
#define PACK_DPADD_UB4_SW(in0, in1, in2, in3, out0, out1, out2, out3) \
do { \
v16u8 tmp0, tmp1; \
v8i16 tmp2, tmp3; \
ILVRL_B2_UB(in0, in1, tmp0, tmp1); \
HSUB_UB2_SH(tmp0, tmp1, tmp2, tmp3); \
DPADD_SH2_SW(tmp2, tmp3, tmp2, tmp3, out0, out1); \
ILVRL_B2_UB(in2, in3, tmp0, tmp1); \
HSUB_UB2_SH(tmp0, tmp1, tmp2, tmp3); \
DPADD_SH2_SW(tmp2, tmp3, tmp2, tmp3, out2, out3); \
} while (0)
static int SSE16x16_MSA(const uint8_t* WEBP_RESTRICT a,
const uint8_t* WEBP_RESTRICT b) {
@@ -814,7 +820,7 @@ static int SSE4x4_MSA(const uint8_t* WEBP_RESTRICT a,
const uint8_t* WEBP_RESTRICT b) {
uint32_t sum = 0;
uint32_t src0, src1, src2, src3, ref0, ref1, ref2, ref3;
v16u8 src = { 0 }, ref = { 0 }, tmp0, tmp1;
v16u8 src = {0}, ref = {0}, tmp0, tmp1;
v8i16 diff0, diff1;
v4i32 out0, out1;
@@ -839,9 +845,9 @@ static int QuantizeBlock_MSA(int16_t in[16], int16_t out[16],
v8i16 in0, in1, sh0, sh1, out0, out1;
v8i16 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, sign0, sign1;
v4i32 s0, s1, s2, s3, b0, b1, b2, b3, t0, t1, t2, t3;
const v8i16 zero = { 0 };
const v8i16 zigzag0 = { 0, 1, 4, 8, 5, 2, 3, 6 };
const v8i16 zigzag1 = { 9, 12, 13, 10, 7, 11, 14, 15 };
const v8i16 zero = {0};
const v8i16 zigzag0 = {0, 1, 4, 8, 5, 2, 3, 6};
const v8i16 zigzag1 = {9, 12, 13, 10, 7, 11, 14, 15};
const v8i16 maxlevel = __msa_fill_h(MAX_LEVEL);
LD_SH2(&in[0], 8, in0, in1);
@@ -852,11 +858,11 @@ static int QuantizeBlock_MSA(int16_t in[16], int16_t out[16],
ILVRL_H2_SH(sh1, tmp5, tmp2, tmp3);
HADD_SH4_SW(tmp0, tmp1, tmp2, tmp3, s0, s1, s2, s3);
sign0 = (in0 < zero);
sign1 = (in1 < zero); // sign
LD_SH2(&mtx->iq[0], 8, tmp0, tmp1); // iq
sign1 = (in1 < zero); // sign
LD_SH2(&mtx->iq[0], 8, tmp0, tmp1); // iq
ILVRL_H2_SW(zero, tmp0, t0, t1);
ILVRL_H2_SW(zero, tmp1, t2, t3);
LD_SW4(&mtx->bias[0], 4, b0, b1, b2, b3); // bias
LD_SW4(&mtx->bias[0], 4, b0, b1, b2, b3); // bias
MUL4(t0, s0, t1, s1, t2, s2, t3, s3, t0, t1, t2, t3);
ADD4(b0, t0, b1, t1, b2, t2, b3, t3, b0, b1, b2, b3);
SRAI_W4_SW(b0, b1, b2, b3, 17);
@@ -868,7 +874,7 @@ static int QuantizeBlock_MSA(int16_t in[16], int16_t out[16],
SUB2(zero, tmp2, zero, tmp3, tmp0, tmp1);
tmp2 = (v8i16)__msa_bmnz_v((v16u8)tmp2, (v16u8)tmp0, (v16u8)sign0);
tmp3 = (v8i16)__msa_bmnz_v((v16u8)tmp3, (v16u8)tmp1, (v16u8)sign1);
LD_SW4(&mtx->zthresh[0], 4, t0, t1, t2, t3); // zthresh
LD_SW4(&mtx->zthresh[0], 4, t0, t1, t2, t3); // zthresh
t0 = (s0 > t0);
t1 = (s1 > t1);
t2 = (s2 > t2);
@@ -889,7 +895,7 @@ static int QuantizeBlock_MSA(int16_t in[16], int16_t out[16],
static int Quantize2Blocks_MSA(int16_t in[32], int16_t out[32],
const VP8Matrix* WEBP_RESTRICT const mtx) {
int nz;
nz = VP8EncQuantizeBlock(in + 0 * 16, out + 0 * 16, mtx) << 0;
nz = VP8EncQuantizeBlock(in + 0 * 16, out + 0 * 16, mtx) << 0;
nz |= VP8EncQuantizeBlock(in + 1 * 16, out + 1 * 16, mtx) << 1;
return nz;
}

View File

@@ -89,8 +89,8 @@ static WEBP_INLINE void Transpose8x2_NEON(const int16x8_t in0,
int16x8x2_t* const out) {
// a0 a1 a2 a3 | b0 b1 b2 b3 => a0 b0 c0 d0 | a1 b1 c1 d1
// c0 c1 c2 c3 | d0 d1 d2 d3 a2 b2 c2 d2 | a3 b3 c3 d3
const int16x8x2_t tmp0 = vzipq_s16(in0, in1); // a0 c0 a1 c1 a2 c2 ...
// b0 d0 b1 d1 b2 d2 ...
const int16x8x2_t tmp0 = vzipq_s16(in0, in1); // a0 c0 a1 c1 a2 c2 ...
// b0 d0 b1 d1 b2 d2 ...
*out = vzipq_s16(tmp0.val[0], tmp0.val[1]);
}
@@ -105,17 +105,17 @@ static WEBP_INLINE void TransformPass_NEON(int16x8x2_t* const rows) {
const int16x8_t C0 = vsraq_n_s16(B1, vqdmulhq_n_s16(B1, kC1), 1);
const int16x8_t C1 = vqdmulhq_n_s16(B1, kC2);
const int16x4_t a = vqadd_s16(vget_low_s16(rows->val[0]),
vget_low_s16(rows->val[1])); // in0 + in8
vget_low_s16(rows->val[1])); // in0 + in8
const int16x4_t b = vqsub_s16(vget_low_s16(rows->val[0]),
vget_low_s16(rows->val[1])); // in0 - in8
vget_low_s16(rows->val[1])); // in0 - in8
// c = kC2 * in4 - kC1 * in12
// d = kC1 * in4 + kC2 * in12
const int16x4_t c = vqsub_s16(vget_low_s16(C1), vget_high_s16(C0));
const int16x4_t d = vqadd_s16(vget_low_s16(C0), vget_high_s16(C1));
const int16x8_t D0 = vcombine_s16(a, b); // D0 = a | b
const int16x8_t D1 = vcombine_s16(d, c); // D1 = d | c
const int16x8_t E0 = vqaddq_s16(D0, D1); // a+d | b+c
const int16x8_t E_tmp = vqsubq_s16(D0, D1); // a-d | b-c
const int16x8_t D0 = vcombine_s16(a, b); // D0 = a | b
const int16x8_t D1 = vcombine_s16(d, c); // D1 = d | c
const int16x8_t E0 = vqaddq_s16(D0, D1); // a+d | b+c
const int16x8_t E_tmp = vqsubq_s16(D0, D1); // a-d | b-c
const int16x8_t E1 = vcombine_s16(vget_high_s16(E_tmp), vget_low_s16(E_tmp));
Transpose8x2_NEON(E0, E1, rows);
}
@@ -136,118 +136,118 @@ static void ITransformOne_NEON(const uint8_t* WEBP_RESTRICT ref,
const int16_t* WEBP_RESTRICT in,
uint8_t* WEBP_RESTRICT dst) {
const int kBPS = BPS;
const int16_t kC1C2[] = { kC1, kC2, 0, 0 };
const int16_t kC1C2[] = {kC1, kC2, 0, 0};
__asm__ volatile (
"vld1.16 {q1, q2}, [%[in]] \n"
"vld1.16 {d0}, [%[kC1C2]] \n"
__asm__ volatile(
"vld1.16 {q1, q2}, [%[in]] \n"
"vld1.16 {d0}, [%[kC1C2]] \n"
// d2: in[0]
// d3: in[8]
// d4: in[4]
// d5: in[12]
"vswp d3, d4 \n"
// d2: in[0]
// d3: in[8]
// d4: in[4]
// d5: in[12]
"vswp d3, d4 \n"
// q8 = {in[4], in[12]} * kC1 * 2 >> 16
// q9 = {in[4], in[12]} * kC2 >> 16
"vqdmulh.s16 q8, q2, d0[0] \n"
"vqdmulh.s16 q9, q2, d0[1] \n"
// q8 = {in[4], in[12]} * kC1 * 2 >> 16
// q9 = {in[4], in[12]} * kC2 >> 16
"vqdmulh.s16 q8, q2, d0[0] \n"
"vqdmulh.s16 q9, q2, d0[1] \n"
// d22 = a = in[0] + in[8]
// d23 = b = in[0] - in[8]
"vqadd.s16 d22, d2, d3 \n"
"vqsub.s16 d23, d2, d3 \n"
// d22 = a = in[0] + in[8]
// d23 = b = in[0] - in[8]
"vqadd.s16 d22, d2, d3 \n"
"vqsub.s16 d23, d2, d3 \n"
// q8 = in[4]/[12] * kC1 >> 16
"vshr.s16 q8, q8, #1 \n"
// q8 = in[4]/[12] * kC1 >> 16
"vshr.s16 q8, q8, #1 \n"
// Add {in[4], in[12]} back after the multiplication.
"vqadd.s16 q8, q2, q8 \n"
// Add {in[4], in[12]} back after the multiplication.
"vqadd.s16 q8, q2, q8 \n"
// d20 = c = in[4]*kC2 - in[12]*kC1
// d21 = d = in[4]*kC1 + in[12]*kC2
"vqsub.s16 d20, d18, d17 \n"
"vqadd.s16 d21, d19, d16 \n"
// d20 = c = in[4]*kC2 - in[12]*kC1
// d21 = d = in[4]*kC1 + in[12]*kC2
"vqsub.s16 d20, d18, d17 \n"
"vqadd.s16 d21, d19, d16 \n"
// d2 = tmp[0] = a + d
// d3 = tmp[1] = b + c
// d4 = tmp[2] = b - c
// d5 = tmp[3] = a - d
"vqadd.s16 d2, d22, d21 \n"
"vqadd.s16 d3, d23, d20 \n"
"vqsub.s16 d4, d23, d20 \n"
"vqsub.s16 d5, d22, d21 \n"
// d2 = tmp[0] = a + d
// d3 = tmp[1] = b + c
// d4 = tmp[2] = b - c
// d5 = tmp[3] = a - d
"vqadd.s16 d2, d22, d21 \n"
"vqadd.s16 d3, d23, d20 \n"
"vqsub.s16 d4, d23, d20 \n"
"vqsub.s16 d5, d22, d21 \n"
"vzip.16 q1, q2 \n"
"vzip.16 q1, q2 \n"
"vzip.16 q1, q2 \n"
"vzip.16 q1, q2 \n"
"vswp d3, d4 \n"
"vswp d3, d4 \n"
// q8 = {tmp[4], tmp[12]} * kC1 * 2 >> 16
// q9 = {tmp[4], tmp[12]} * kC2 >> 16
"vqdmulh.s16 q8, q2, d0[0] \n"
"vqdmulh.s16 q9, q2, d0[1] \n"
// q8 = {tmp[4], tmp[12]} * kC1 * 2 >> 16
// q9 = {tmp[4], tmp[12]} * kC2 >> 16
"vqdmulh.s16 q8, q2, d0[0] \n"
"vqdmulh.s16 q9, q2, d0[1] \n"
// d22 = a = tmp[0] + tmp[8]
// d23 = b = tmp[0] - tmp[8]
"vqadd.s16 d22, d2, d3 \n"
"vqsub.s16 d23, d2, d3 \n"
// d22 = a = tmp[0] + tmp[8]
// d23 = b = tmp[0] - tmp[8]
"vqadd.s16 d22, d2, d3 \n"
"vqsub.s16 d23, d2, d3 \n"
"vshr.s16 q8, q8, #1 \n"
"vqadd.s16 q8, q2, q8 \n"
"vshr.s16 q8, q8, #1 \n"
"vqadd.s16 q8, q2, q8 \n"
// d20 = c = in[4]*kC2 - in[12]*kC1
// d21 = d = in[4]*kC1 + in[12]*kC2
"vqsub.s16 d20, d18, d17 \n"
"vqadd.s16 d21, d19, d16 \n"
// d20 = c = in[4]*kC2 - in[12]*kC1
// d21 = d = in[4]*kC1 + in[12]*kC2
"vqsub.s16 d20, d18, d17 \n"
"vqadd.s16 d21, d19, d16 \n"
// d2 = tmp[0] = a + d
// d3 = tmp[1] = b + c
// d4 = tmp[2] = b - c
// d5 = tmp[3] = a - d
"vqadd.s16 d2, d22, d21 \n"
"vqadd.s16 d3, d23, d20 \n"
"vqsub.s16 d4, d23, d20 \n"
"vqsub.s16 d5, d22, d21 \n"
// d2 = tmp[0] = a + d
// d3 = tmp[1] = b + c
// d4 = tmp[2] = b - c
// d5 = tmp[3] = a - d
"vqadd.s16 d2, d22, d21 \n"
"vqadd.s16 d3, d23, d20 \n"
"vqsub.s16 d4, d23, d20 \n"
"vqsub.s16 d5, d22, d21 \n"
"vld1.32 d6[0], [%[ref]], %[kBPS] \n"
"vld1.32 d6[1], [%[ref]], %[kBPS] \n"
"vld1.32 d7[0], [%[ref]], %[kBPS] \n"
"vld1.32 d7[1], [%[ref]], %[kBPS] \n"
"vld1.32 d6[0], [%[ref]], %[kBPS] \n"
"vld1.32 d6[1], [%[ref]], %[kBPS] \n"
"vld1.32 d7[0], [%[ref]], %[kBPS] \n"
"vld1.32 d7[1], [%[ref]], %[kBPS] \n"
"sub %[ref], %[ref], %[kBPS], lsl #2 \n"
"sub %[ref], %[ref], %[kBPS], lsl #2 \n"
// (val) + 4 >> 3
"vrshr.s16 d2, d2, #3 \n"
"vrshr.s16 d3, d3, #3 \n"
"vrshr.s16 d4, d4, #3 \n"
"vrshr.s16 d5, d5, #3 \n"
// (val) + 4 >> 3
"vrshr.s16 d2, d2, #3 \n"
"vrshr.s16 d3, d3, #3 \n"
"vrshr.s16 d4, d4, #3 \n"
"vrshr.s16 d5, d5, #3 \n"
"vzip.16 q1, q2 \n"
"vzip.16 q1, q2 \n"
"vzip.16 q1, q2 \n"
"vzip.16 q1, q2 \n"
// Must accumulate before saturating
"vmovl.u8 q8, d6 \n"
"vmovl.u8 q9, d7 \n"
// Must accumulate before saturating
"vmovl.u8 q8, d6 \n"
"vmovl.u8 q9, d7 \n"
"vqadd.s16 q1, q1, q8 \n"
"vqadd.s16 q2, q2, q9 \n"
"vqadd.s16 q1, q1, q8 \n"
"vqadd.s16 q2, q2, q9 \n"
"vqmovun.s16 d0, q1 \n"
"vqmovun.s16 d1, q2 \n"
"vqmovun.s16 d0, q1 \n"
"vqmovun.s16 d1, q2 \n"
"vst1.32 d0[0], [%[dst]], %[kBPS] \n"
"vst1.32 d0[1], [%[dst]], %[kBPS] \n"
"vst1.32 d1[0], [%[dst]], %[kBPS] \n"
"vst1.32 d1[1], [%[dst]] \n"
"vst1.32 d0[0], [%[dst]], %[kBPS] \n"
"vst1.32 d0[1], [%[dst]], %[kBPS] \n"
"vst1.32 d1[0], [%[dst]], %[kBPS] \n"
"vst1.32 d1[1], [%[dst]] \n"
: [in] "+r"(in), [dst] "+r"(dst) // modified registers
: [kBPS] "r"(kBPS), [kC1C2] "r"(kC1C2), [ref] "r"(ref) // constants
: "memory", "q0", "q1", "q2", "q8", "q9", "q10", "q11" // clobbered
: [in] "+r"(in), [dst] "+r"(dst) // modified registers
: [kBPS] "r"(kBPS), [kC1C2] "r"(kC1C2), [ref] "r"(ref) // constants
: "memory", "q0", "q1", "q2", "q8", "q9", "q10", "q11" // clobbered
);
}
#endif // WEBP_USE_INTRINSICS
#endif // WEBP_USE_INTRINSICS
static void ITransform_NEON(const uint8_t* WEBP_RESTRICT ref,
const int16_t* WEBP_RESTRICT in,
@@ -272,24 +272,19 @@ static uint8x16_t Load4x4_NEON(const uint8_t* src) {
#if defined(WEBP_USE_INTRINSICS)
static WEBP_INLINE void Transpose4x4_S16_NEON(const int16x4_t A,
const int16x4_t B,
const int16x4_t C,
const int16x4_t D,
int16x8_t* const out01,
int16x8_t* const out32) {
static WEBP_INLINE void Transpose4x4_S16_NEON(
const int16x4_t A, const int16x4_t B, const int16x4_t C, const int16x4_t D,
int16x8_t* const out01, int16x8_t* const out32) {
const int16x4x2_t AB = vtrn_s16(A, B);
const int16x4x2_t CD = vtrn_s16(C, D);
const int32x2x2_t tmp02 = vtrn_s32(vreinterpret_s32_s16(AB.val[0]),
vreinterpret_s32_s16(CD.val[0]));
const int32x2x2_t tmp13 = vtrn_s32(vreinterpret_s32_s16(AB.val[1]),
vreinterpret_s32_s16(CD.val[1]));
*out01 = vreinterpretq_s16_s64(
vcombine_s64(vreinterpret_s64_s32(tmp02.val[0]),
vreinterpret_s64_s32(tmp13.val[0])));
*out32 = vreinterpretq_s16_s64(
vcombine_s64(vreinterpret_s64_s32(tmp13.val[1]),
vreinterpret_s64_s32(tmp02.val[1])));
*out01 = vreinterpretq_s16_s64(vcombine_s64(
vreinterpret_s64_s32(tmp02.val[0]), vreinterpret_s64_s32(tmp13.val[0])));
*out32 = vreinterpretq_s16_s64(vcombine_s64(
vreinterpret_s64_s32(tmp13.val[1]), vreinterpret_s64_s32(tmp02.val[1])));
}
static WEBP_INLINE int16x8_t DiffU8ToS16_NEON(const uint8x8_t a,
@@ -300,7 +295,7 @@ static WEBP_INLINE int16x8_t DiffU8ToS16_NEON(const uint8x8_t a,
static void FTransform_NEON(const uint8_t* WEBP_RESTRICT src,
const uint8_t* WEBP_RESTRICT ref,
int16_t* WEBP_RESTRICT out) {
int16x8_t d0d1, d3d2; // working 4x4 int16 variables
int16x8_t d0d1, d3d2; // working 4x4 int16 variables
{
const uint8x16_t S0 = Load4x4_NEON(src);
const uint8x16_t R0 = Load4x4_NEON(ref);
@@ -312,16 +307,16 @@ static void FTransform_NEON(const uint8_t* WEBP_RESTRICT src,
const int16x4_t D3 = vget_high_s16(D2D3);
Transpose4x4_S16_NEON(D0, D1, D2, D3, &d0d1, &d3d2);
}
{ // 1rst pass
{ // 1rst pass
const int32x4_t kCst937 = vdupq_n_s32(937);
const int32x4_t kCst1812 = vdupq_n_s32(1812);
const int16x8_t a0a1 = vaddq_s16(d0d1, d3d2); // d0+d3 | d1+d2 (=a0|a1)
const int16x8_t a3a2 = vsubq_s16(d0d1, d3d2); // d0-d3 | d1-d2 (=a3|a2)
const int16x8_t a0a1 = vaddq_s16(d0d1, d3d2); // d0+d3 | d1+d2 (=a0|a1)
const int16x8_t a3a2 = vsubq_s16(d0d1, d3d2); // d0-d3 | d1-d2 (=a3|a2)
const int16x8_t a0a1_2 = vshlq_n_s16(a0a1, 3);
const int16x4_t tmp0 = vadd_s16(vget_low_s16(a0a1_2),
vget_high_s16(a0a1_2));
const int16x4_t tmp2 = vsub_s16(vget_low_s16(a0a1_2),
vget_high_s16(a0a1_2));
const int16x4_t tmp0 =
vadd_s16(vget_low_s16(a0a1_2), vget_high_s16(a0a1_2));
const int16x4_t tmp2 =
vsub_s16(vget_low_s16(a0a1_2), vget_high_s16(a0a1_2));
const int32x4_t a3_2217 = vmull_n_s16(vget_low_s16(a3a2), 2217);
const int32x4_t a2_2217 = vmull_n_s16(vget_high_s16(a3a2), 2217);
const int32x4_t a2_p_a3 = vmlal_n_s16(a2_2217, vget_low_s16(a3a2), 5352);
@@ -330,12 +325,12 @@ static void FTransform_NEON(const uint8_t* WEBP_RESTRICT src,
const int16x4_t tmp3 = vshrn_n_s32(vaddq_s32(a3_m_a2, kCst937), 9);
Transpose4x4_S16_NEON(tmp0, tmp1, tmp2, tmp3, &d0d1, &d3d2);
}
{ // 2nd pass
{ // 2nd pass
// the (1<<16) addition is for the replacement: a3!=0 <-> 1-(a3==0)
const int32x4_t kCst12000 = vdupq_n_s32(12000 + (1 << 16));
const int32x4_t kCst51000 = vdupq_n_s32(51000);
const int16x8_t a0a1 = vaddq_s16(d0d1, d3d2); // d0+d3 | d1+d2 (=a0|a1)
const int16x8_t a3a2 = vsubq_s16(d0d1, d3d2); // d0-d3 | d1-d2 (=a3|a2)
const int16x8_t a0a1 = vaddq_s16(d0d1, d3d2); // d0+d3 | d1+d2 (=a0|a1)
const int16x8_t a3a2 = vsubq_s16(d0d1, d3d2); // d0-d3 | d1-d2 (=a3|a2)
const int16x4_t a0_k7 = vadd_s16(vget_low_s16(a0a1), vdup_n_s16(7));
const int16x4_t out0 = vshr_n_s16(vadd_s16(a0_k7, vget_high_s16(a0a1)), 4);
const int16x4_t out2 = vshr_n_s16(vsub_s16(a0_k7, vget_high_s16(a0a1)), 4);
@@ -348,9 +343,9 @@ static void FTransform_NEON(const uint8_t* WEBP_RESTRICT src,
const int16x4_t a3_eq_0 =
vreinterpret_s16_u16(vceq_s16(vget_low_s16(a3a2), vdup_n_s16(0)));
const int16x4_t out1 = vadd_s16(tmp1, a3_eq_0);
vst1_s16(out + 0, out0);
vst1_s16(out + 4, out1);
vst1_s16(out + 8, out2);
vst1_s16(out + 0, out0);
vst1_s16(out + 4, out1);
vst1_s16(out + 8, out2);
vst1_s16(out + 12, out3);
}
}
@@ -358,15 +353,11 @@ static void FTransform_NEON(const uint8_t* WEBP_RESTRICT src,
#else
// adapted from vp8/encoder/arm/neon/shortfdct_neon.asm
static const int16_t kCoeff16[] = {
5352, 5352, 5352, 5352, 2217, 2217, 2217, 2217
};
static const int32_t kCoeff32[] = {
1812, 1812, 1812, 1812,
937, 937, 937, 937,
12000, 12000, 12000, 12000,
51000, 51000, 51000, 51000
};
static const int16_t kCoeff16[] = {5352, 5352, 5352, 5352,
2217, 2217, 2217, 2217};
static const int32_t kCoeff32[] = {1812, 1812, 1812, 1812, 937, 937,
937, 937, 12000, 12000, 12000, 12000,
51000, 51000, 51000, 51000};
static void FTransform_NEON(const uint8_t* WEBP_RESTRICT src,
const uint8_t* WEBP_RESTRICT ref,
@@ -377,117 +368,122 @@ static void FTransform_NEON(const uint8_t* WEBP_RESTRICT src,
const int16_t* coeff16 = kCoeff16;
const int32_t* coeff32 = kCoeff32;
__asm__ volatile (
// load src into q4, q5 in high half
"vld1.8 {d8}, [%[src_ptr]], %[kBPS] \n"
"vld1.8 {d10}, [%[src_ptr]], %[kBPS] \n"
"vld1.8 {d9}, [%[src_ptr]], %[kBPS] \n"
"vld1.8 {d11}, [%[src_ptr]] \n"
__asm__ volatile(
// load src into q4, q5 in high half
"vld1.8 {d8}, [%[src_ptr]], %[kBPS] \n"
"vld1.8 {d10}, [%[src_ptr]], %[kBPS] \n"
"vld1.8 {d9}, [%[src_ptr]], %[kBPS] \n"
"vld1.8 {d11}, [%[src_ptr]] \n"
// load ref into q6, q7 in high half
"vld1.8 {d12}, [%[ref_ptr]], %[kBPS] \n"
"vld1.8 {d14}, [%[ref_ptr]], %[kBPS] \n"
"vld1.8 {d13}, [%[ref_ptr]], %[kBPS] \n"
"vld1.8 {d15}, [%[ref_ptr]] \n"
// load ref into q6, q7 in high half
"vld1.8 {d12}, [%[ref_ptr]], %[kBPS] \n"
"vld1.8 {d14}, [%[ref_ptr]], %[kBPS] \n"
"vld1.8 {d13}, [%[ref_ptr]], %[kBPS] \n"
"vld1.8 {d15}, [%[ref_ptr]] \n"
// Pack the high values in to q4 and q6
"vtrn.32 q4, q5 \n"
"vtrn.32 q6, q7 \n"
// Pack the high values in to q4 and q6
"vtrn.32 q4, q5 \n"
"vtrn.32 q6, q7 \n"
// d[0-3] = src - ref
"vsubl.u8 q0, d8, d12 \n"
"vsubl.u8 q1, d9, d13 \n"
// d[0-3] = src - ref
"vsubl.u8 q0, d8, d12 \n"
"vsubl.u8 q1, d9, d13 \n"
// load coeff16 into q8(d16=5352, d17=2217)
"vld1.16 {q8}, [%[coeff16]] \n"
// load coeff16 into q8(d16=5352, d17=2217)
"vld1.16 {q8}, [%[coeff16]] \n"
// load coeff32 high half into q9 = 1812, q10 = 937
"vld1.32 {q9, q10}, [%[coeff32]]! \n"
// load coeff32 high half into q9 = 1812, q10 = 937
"vld1.32 {q9, q10}, [%[coeff32]]! \n"
// load coeff32 low half into q11=12000, q12=51000
"vld1.32 {q11,q12}, [%[coeff32]] \n"
// load coeff32 low half into q11=12000, q12=51000
"vld1.32 {q11,q12}, [%[coeff32]] \n"
// part 1
// Transpose. Register dN is the same as dN in C
"vtrn.32 d0, d2 \n"
"vtrn.32 d1, d3 \n"
"vtrn.16 d0, d1 \n"
"vtrn.16 d2, d3 \n"
// part 1
// Transpose. Register dN is the same as dN in C
"vtrn.32 d0, d2 \n"
"vtrn.32 d1, d3 \n"
"vtrn.16 d0, d1 \n"
"vtrn.16 d2, d3 \n"
"vadd.s16 d4, d0, d3 \n" // a0 = d0 + d3
"vadd.s16 d5, d1, d2 \n" // a1 = d1 + d2
"vsub.s16 d6, d1, d2 \n" // a2 = d1 - d2
"vsub.s16 d7, d0, d3 \n" // a3 = d0 - d3
"vadd.s16 d4, d0, d3 \n" // a0 = d0 + d3
"vadd.s16 d5, d1, d2 \n" // a1 = d1 + d2
"vsub.s16 d6, d1, d2 \n" // a2 = d1 - d2
"vsub.s16 d7, d0, d3 \n" // a3 = d0 - d3
"vadd.s16 d0, d4, d5 \n" // a0 + a1
"vshl.s16 d0, d0, #3 \n" // temp[0+i*4] = (a0+a1) << 3
"vsub.s16 d2, d4, d5 \n" // a0 - a1
"vshl.s16 d2, d2, #3 \n" // (temp[2+i*4] = (a0-a1) << 3
"vadd.s16 d0, d4, d5 \n" // a0 + a1
"vshl.s16 d0, d0, #3 \n" // temp[0+i*4] = (a0+a1) <<
// 3
"vsub.s16 d2, d4, d5 \n" // a0 - a1
"vshl.s16 d2, d2, #3 \n" // (temp[2+i*4] = (a0-a1)
// << 3
"vmlal.s16 q9, d7, d16 \n" // a3*5352 + 1812
"vmlal.s16 q10, d7, d17 \n" // a3*2217 + 937
"vmlal.s16 q9, d6, d17 \n" // a2*2217 + a3*5352 + 1812
"vmlsl.s16 q10, d6, d16 \n" // a3*2217 + 937 - a2*5352
"vmlal.s16 q9, d7, d16 \n" // a3*5352 + 1812
"vmlal.s16 q10, d7, d17 \n" // a3*2217 + 937
"vmlal.s16 q9, d6, d17 \n" // a2*2217 + a3*5352 + 1812
"vmlsl.s16 q10, d6, d16 \n" // a3*2217 + 937 - a2*5352
// temp[1+i*4] = (d2*2217 + d3*5352 + 1812) >> 9
// temp[3+i*4] = (d3*2217 + 937 - d2*5352) >> 9
"vshrn.s32 d1, q9, #9 \n"
"vshrn.s32 d3, q10, #9 \n"
// temp[1+i*4] = (d2*2217 + d3*5352 + 1812) >> 9
// temp[3+i*4] = (d3*2217 + 937 - d2*5352) >> 9
"vshrn.s32 d1, q9, #9 \n"
"vshrn.s32 d3, q10, #9 \n"
// part 2
// transpose d0=ip[0], d1=ip[4], d2=ip[8], d3=ip[12]
"vtrn.32 d0, d2 \n"
"vtrn.32 d1, d3 \n"
"vtrn.16 d0, d1 \n"
"vtrn.16 d2, d3 \n"
// part 2
// transpose d0=ip[0], d1=ip[4], d2=ip[8], d3=ip[12]
"vtrn.32 d0, d2 \n"
"vtrn.32 d1, d3 \n"
"vtrn.16 d0, d1 \n"
"vtrn.16 d2, d3 \n"
"vmov.s16 d26, #7 \n"
"vmov.s16 d26, #7 \n"
"vadd.s16 d4, d0, d3 \n" // a1 = ip[0] + ip[12]
"vadd.s16 d5, d1, d2 \n" // b1 = ip[4] + ip[8]
"vsub.s16 d6, d1, d2 \n" // c1 = ip[4] - ip[8]
"vadd.s16 d4, d4, d26 \n" // a1 + 7
"vsub.s16 d7, d0, d3 \n" // d1 = ip[0] - ip[12]
"vadd.s16 d4, d0, d3 \n" // a1 = ip[0] + ip[12]
"vadd.s16 d5, d1, d2 \n" // b1 = ip[4] + ip[8]
"vsub.s16 d6, d1, d2 \n" // c1 = ip[4] - ip[8]
"vadd.s16 d4, d4, d26 \n" // a1 + 7
"vsub.s16 d7, d0, d3 \n" // d1 = ip[0] - ip[12]
"vadd.s16 d0, d4, d5 \n" // op[0] = a1 + b1 + 7
"vsub.s16 d2, d4, d5 \n" // op[8] = a1 - b1 + 7
"vadd.s16 d0, d4, d5 \n" // op[0] = a1 + b1 + 7
"vsub.s16 d2, d4, d5 \n" // op[8] = a1 - b1 + 7
"vmlal.s16 q11, d7, d16 \n" // d1*5352 + 12000
"vmlal.s16 q12, d7, d17 \n" // d1*2217 + 51000
"vmlal.s16 q11, d7, d16 \n" // d1*5352 + 12000
"vmlal.s16 q12, d7, d17 \n" // d1*2217 + 51000
"vceq.s16 d4, d7, #0 \n"
"vceq.s16 d4, d7, #0 \n"
"vshr.s16 d0, d0, #4 \n"
"vshr.s16 d2, d2, #4 \n"
"vshr.s16 d0, d0, #4 \n"
"vshr.s16 d2, d2, #4 \n"
"vmlal.s16 q11, d6, d17 \n" // c1*2217 + d1*5352 + 12000
"vmlsl.s16 q12, d6, d16 \n" // d1*2217 - c1*5352 + 51000
"vmlal.s16 q11, d6, d17 \n" // c1*2217 + d1*5352 +
// 12000
"vmlsl.s16 q12, d6, d16 \n" // d1*2217 - c1*5352 +
// 51000
"vmvn d4, d4 \n" // !(d1 == 0)
// op[4] = (c1*2217 + d1*5352 + 12000)>>16
"vshrn.s32 d1, q11, #16 \n"
// op[4] += (d1!=0)
"vsub.s16 d1, d1, d4 \n"
// op[12]= (d1*2217 - c1*5352 + 51000)>>16
"vshrn.s32 d3, q12, #16 \n"
"vmvn d4, d4 \n" // !(d1 == 0)
// op[4] = (c1*2217 + d1*5352 + 12000)>>16
"vshrn.s32 d1, q11, #16 \n"
// op[4] += (d1!=0)
"vsub.s16 d1, d1, d4 \n"
// op[12]= (d1*2217 - c1*5352 + 51000)>>16
"vshrn.s32 d3, q12, #16 \n"
// set result to out array
"vst1.16 {q0, q1}, [%[out]] \n"
: [src_ptr] "+r"(src_ptr), [ref_ptr] "+r"(ref_ptr),
[coeff32] "+r"(coeff32) // modified registers
: [kBPS] "r"(kBPS), [coeff16] "r"(coeff16),
[out] "r"(out) // constants
: "memory", "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7", "q8", "q9",
"q10", "q11", "q12", "q13" // clobbered
// set result to out array
"vst1.16 {q0, q1}, [%[out]] \n"
: [src_ptr] "+r"(src_ptr), [ref_ptr] "+r"(ref_ptr),
[coeff32] "+r"(coeff32) // modified registers
: [kBPS] "r"(kBPS), [coeff16] "r"(coeff16),
[out] "r"(out) // constants
: "memory", "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7", "q8", "q9",
"q10", "q11", "q12", "q13" // clobbered
);
}
#endif
#define LOAD_LANE_16b(VALUE, LANE) do { \
(VALUE) = vld1_lane_s16(src, (VALUE), (LANE)); \
src += stride; \
} while (0)
#define LOAD_LANE_16b(VALUE, LANE) \
do { \
(VALUE) = vld1_lane_s16(src, (VALUE), (LANE)); \
src += stride; \
} while (0)
static void FTransformWHT_NEON(const int16_t* WEBP_RESTRICT src,
int16_t* WEBP_RESTRICT out) {
@@ -546,9 +542,9 @@ static void FTransformWHT_NEON(const int16_t* WEBP_RESTRICT src,
const int16x4_t out2 = vmovn_s32(b2);
const int16x4_t out3 = vmovn_s32(b3);
vst1_s16(out + 0, out0);
vst1_s16(out + 4, out1);
vst1_s16(out + 8, out2);
vst1_s16(out + 0, out0);
vst1_s16(out + 4, out1);
vst1_s16(out + 8, out2);
vst1_s16(out + 12, out3);
}
}
@@ -586,8 +582,8 @@ static WEBP_INLINE int16x8x4_t DistoTranspose4x4S16_NEON(int16x8x4_t q4_in) {
return q4_in;
}
static WEBP_INLINE int16x8x4_t DistoHorizontalPass_NEON(
const int16x8x4_t q4_in) {
static WEBP_INLINE int16x8x4_t
DistoHorizontalPass_NEON(const int16x8x4_t q4_in) {
// {a0, a1} = {in[0] + in[2], in[1] + in[3]}
// {a3, a2} = {in[0] - in[2], in[1] - in[3]}
const int16x8_t q_a0 = vaddq_s16(q4_in.val[0], q4_in.val[2]);
@@ -599,26 +595,24 @@ static WEBP_INLINE int16x8x4_t DistoHorizontalPass_NEON(
// tmp[1] = a3 + a2
// tmp[2] = a3 - a2
// tmp[3] = a0 - a1
INIT_VECTOR4(q4_out,
vabsq_s16(vaddq_s16(q_a0, q_a1)),
vabsq_s16(vaddq_s16(q_a3, q_a2)),
vabdq_s16(q_a3, q_a2), vabdq_s16(q_a0, q_a1));
INIT_VECTOR4(q4_out, vabsq_s16(vaddq_s16(q_a0, q_a1)),
vabsq_s16(vaddq_s16(q_a3, q_a2)), vabdq_s16(q_a3, q_a2),
vabdq_s16(q_a0, q_a1));
return q4_out;
}
static WEBP_INLINE int16x8x4_t DistoVerticalPass_NEON(const uint8x8x4_t q4_in) {
const int16x8_t q_a0 = vreinterpretq_s16_u16(vaddl_u8(q4_in.val[0],
q4_in.val[2]));
const int16x8_t q_a1 = vreinterpretq_s16_u16(vaddl_u8(q4_in.val[1],
q4_in.val[3]));
const int16x8_t q_a2 = vreinterpretq_s16_u16(vsubl_u8(q4_in.val[1],
q4_in.val[3]));
const int16x8_t q_a3 = vreinterpretq_s16_u16(vsubl_u8(q4_in.val[0],
q4_in.val[2]));
const int16x8_t q_a0 =
vreinterpretq_s16_u16(vaddl_u8(q4_in.val[0], q4_in.val[2]));
const int16x8_t q_a1 =
vreinterpretq_s16_u16(vaddl_u8(q4_in.val[1], q4_in.val[3]));
const int16x8_t q_a2 =
vreinterpretq_s16_u16(vsubl_u8(q4_in.val[1], q4_in.val[3]));
const int16x8_t q_a3 =
vreinterpretq_s16_u16(vsubl_u8(q4_in.val[0], q4_in.val[2]));
int16x8x4_t q4_out;
INIT_VECTOR4(q4_out,
vaddq_s16(q_a0, q_a1), vaddq_s16(q_a3, q_a2),
INIT_VECTOR4(q4_out, vaddq_s16(q_a0, q_a1), vaddq_s16(q_a3, q_a2),
vsubq_s16(q_a3, q_a2), vsubq_s16(q_a0, q_a1));
return q4_out;
}
@@ -627,8 +621,7 @@ static WEBP_INLINE int16x4x4_t DistoLoadW_NEON(const uint16_t* w) {
const uint16x8_t q_w07 = vld1q_u16(&w[0]);
const uint16x8_t q_w8f = vld1q_u16(&w[8]);
int16x4x4_t d4_w;
INIT_VECTOR4(d4_w,
vget_low_s16(vreinterpretq_s16_u16(q_w07)),
INIT_VECTOR4(d4_w, vget_low_s16(vreinterpretq_s16_u16(q_w07)),
vget_high_s16(vreinterpretq_s16_u16(q_w07)),
vget_low_s16(vreinterpretq_s16_u16(q_w8f)),
vget_high_s16(vreinterpretq_s16_u16(q_w8f)));
@@ -660,7 +653,7 @@ static WEBP_INLINE int32x2_t DistoSum_NEON(const int16x8x4_t q4_in,
}
#define LOAD_LANE_32b(src, VALUE, LANE) \
(VALUE) = vld1_lane_u32((const uint32_t*)(src), (VALUE), (LANE))
(VALUE) = vld1_lane_u32((const uint32_t*)(src), (VALUE), (LANE))
// Hadamard transform
// Returns the weighted sum of the absolute value of transformed coefficients.
@@ -683,8 +676,7 @@ static int Disto4x4_NEON(const uint8_t* WEBP_RESTRICT const a,
LOAD_LANE_32b(b + 1 * BPS, d_in_ab_4567, 1);
LOAD_LANE_32b(b + 2 * BPS, d_in_ab_89ab, 1);
LOAD_LANE_32b(b + 3 * BPS, d_in_ab_cdef, 1);
INIT_VECTOR4(d4_in,
vreinterpret_u8_u32(d_in_ab_0123),
INIT_VECTOR4(d4_in, vreinterpret_u8_u32(d_in_ab_0123),
vreinterpret_u8_u32(d_in_ab_4567),
vreinterpret_u8_u32(d_in_ab_89ab),
vreinterpret_u8_u32(d_in_ab_cdef));
@@ -729,7 +721,7 @@ static void CollectHistogram_NEON(const uint8_t* WEBP_RESTRICT ref,
VP8Histogram* WEBP_RESTRICT const histo) {
const uint16x8_t max_coeff_thresh = vdupq_n_u16(MAX_COEFF_THRESH);
int j;
int distribution[MAX_COEFF_THRESH + 1] = { 0 };
int distribution[MAX_COEFF_THRESH + 1] = {0};
for (j = start_block; j < end_block; ++j) {
int16_t out[16];
FTransform_NEON(ref + VP8DspScan[j], pred + VP8DspScan[j], out);
@@ -762,10 +754,10 @@ static WEBP_INLINE void AccumulateSSE16_NEON(
const uint8x16_t a0 = vld1q_u8(a);
const uint8x16_t b0 = vld1q_u8(b);
const uint8x16_t abs_diff = vabdq_u8(a0, b0);
const uint16x8_t prod1 = vmull_u8(vget_low_u8(abs_diff),
vget_low_u8(abs_diff));
const uint16x8_t prod2 = vmull_u8(vget_high_u8(abs_diff),
vget_high_u8(abs_diff));
const uint16x8_t prod1 =
vmull_u8(vget_low_u8(abs_diff), vget_low_u8(abs_diff));
const uint16x8_t prod2 =
vmull_u8(vget_high_u8(abs_diff), vget_high_u8(abs_diff));
/* pair-wise adds and widen */
const uint32x4_t sum1 = vpaddlq_u16(prod1);
const uint32x4_t sum2 = vpaddlq_u16(prod2);
@@ -823,10 +815,10 @@ static int SSE4x4_NEON(const uint8_t* WEBP_RESTRICT a,
const uint8x16_t a0 = Load4x4_NEON(a);
const uint8x16_t b0 = Load4x4_NEON(b);
const uint8x16_t abs_diff = vabdq_u8(a0, b0);
const uint16x8_t prod1 = vmull_u8(vget_low_u8(abs_diff),
vget_low_u8(abs_diff));
const uint16x8_t prod2 = vmull_u8(vget_high_u8(abs_diff),
vget_high_u8(abs_diff));
const uint16x8_t prod1 =
vmull_u8(vget_low_u8(abs_diff), vget_low_u8(abs_diff));
const uint16x8_t prod2 =
vmull_u8(vget_high_u8(abs_diff), vget_high_u8(abs_diff));
/* pair-wise adds and widen */
const uint32x4_t sum1 = vpaddlq_u16(prod1);
const uint32x4_t sum2 = vpaddlq_u16(prod2);
@@ -854,24 +846,22 @@ static int16x8_t Quantize_NEON(int16_t* WEBP_RESTRICT const in,
const uint32x4_t m0 = vmull_u16(vget_low_u16(c), vget_low_u16(iq));
const uint32x4_t m1 = vmull_u16(vget_high_u16(c), vget_high_u16(iq));
const uint32x4_t m2 = vhaddq_u32(m0, bias0);
const uint32x4_t m3 = vhaddq_u32(m1, bias1); // (coeff * iQ + bias) >> 1
const uint16x8_t c0 = vcombine_u16(vshrn_n_u32(m2, 16),
vshrn_n_u32(m3, 16)); // QFIX=17 = 16+1
const uint32x4_t m3 = vhaddq_u32(m1, bias1); // (coeff * iQ + bias) >> 1
const uint16x8_t c0 =
vcombine_u16(vshrn_n_u32(m2, 16), vshrn_n_u32(m3, 16)); // QFIX=17 = 16+1
const uint16x8_t c1 = vminq_u16(c0, vdupq_n_u16(MAX_LEVEL));
const int16x8_t c2 = veorq_s16(vreinterpretq_s16_u16(c1), sign);
const int16x8_t c3 = vsubq_s16(c2, sign); // restore sign
const int16x8_t c3 = vsubq_s16(c2, sign); // restore sign
const int16x8_t c4 = vmulq_s16(c3, vreinterpretq_s16_u16(q));
vst1q_s16(in + offset, c4);
assert(QFIX == 17); // this function can't work as is if QFIX != 16+1
return c3;
}
static const uint8_t kShuffles[4][8] = {
{ 0, 1, 2, 3, 8, 9, 16, 17 },
{ 10, 11, 4, 5, 6, 7, 12, 13 },
{ 18, 19, 24, 25, 26, 27, 20, 21 },
{ 14, 15, 22, 23, 28, 29, 30, 31 }
};
static const uint8_t kShuffles[4][8] = {{0, 1, 2, 3, 8, 9, 16, 17},
{10, 11, 4, 5, 6, 7, 12, 13},
{18, 19, 24, 25, 26, 27, 20, 21},
{14, 15, 22, 23, 28, 29, 30, 31}};
static int QuantizeBlock_NEON(int16_t in[16], int16_t out[16],
const VP8Matrix* WEBP_RESTRICT const mtx) {
@@ -880,37 +870,34 @@ static int QuantizeBlock_NEON(int16_t in[16], int16_t out[16],
uint8x8x4_t shuffles;
// vtbl?_u8 are marked unavailable for iOS arm64 with Xcode < 6.3, use
// non-standard versions there.
#if defined(__APPLE__) && WEBP_AARCH64 && \
defined(__apple_build_version__) && (__apple_build_version__< 6020037)
#if defined(__APPLE__) && WEBP_AARCH64 && defined(__apple_build_version__) && \
(__apple_build_version__ < 6020037)
uint8x16x2_t all_out;
INIT_VECTOR2(all_out, vreinterpretq_u8_s16(out0), vreinterpretq_u8_s16(out1));
INIT_VECTOR4(shuffles,
vtbl2q_u8(all_out, vld1_u8(kShuffles[0])),
INIT_VECTOR4(shuffles, vtbl2q_u8(all_out, vld1_u8(kShuffles[0])),
vtbl2q_u8(all_out, vld1_u8(kShuffles[1])),
vtbl2q_u8(all_out, vld1_u8(kShuffles[2])),
vtbl2q_u8(all_out, vld1_u8(kShuffles[3])));
#else
uint8x8x4_t all_out;
INIT_VECTOR4(all_out,
vreinterpret_u8_s16(vget_low_s16(out0)),
INIT_VECTOR4(all_out, vreinterpret_u8_s16(vget_low_s16(out0)),
vreinterpret_u8_s16(vget_high_s16(out0)),
vreinterpret_u8_s16(vget_low_s16(out1)),
vreinterpret_u8_s16(vget_high_s16(out1)));
INIT_VECTOR4(shuffles,
vtbl4_u8(all_out, vld1_u8(kShuffles[0])),
INIT_VECTOR4(shuffles, vtbl4_u8(all_out, vld1_u8(kShuffles[0])),
vtbl4_u8(all_out, vld1_u8(kShuffles[1])),
vtbl4_u8(all_out, vld1_u8(kShuffles[2])),
vtbl4_u8(all_out, vld1_u8(kShuffles[3])));
#endif
// Zigzag reordering
vst1_u8((uint8_t*)(out + 0), shuffles.val[0]);
vst1_u8((uint8_t*)(out + 4), shuffles.val[1]);
vst1_u8((uint8_t*)(out + 8), shuffles.val[2]);
vst1_u8((uint8_t*)(out + 0), shuffles.val[0]);
vst1_u8((uint8_t*)(out + 4), shuffles.val[1]);
vst1_u8((uint8_t*)(out + 8), shuffles.val[2]);
vst1_u8((uint8_t*)(out + 12), shuffles.val[3]);
// test zeros
if (*(uint64_t*)(out + 0) != 0) return 1;
if (*(uint64_t*)(out + 4) != 0) return 1;
if (*(uint64_t*)(out + 8) != 0) return 1;
if (*(uint64_t*)(out + 0) != 0) return 1;
if (*(uint64_t*)(out + 4) != 0) return 1;
if (*(uint64_t*)(out + 8) != 0) return 1;
if (*(uint64_t*)(out + 12) != 0) return 1;
return 0;
}
@@ -918,31 +905,31 @@ static int QuantizeBlock_NEON(int16_t in[16], int16_t out[16],
static int Quantize2Blocks_NEON(int16_t in[32], int16_t out[32],
const VP8Matrix* WEBP_RESTRICT const mtx) {
int nz;
nz = QuantizeBlock_NEON(in + 0 * 16, out + 0 * 16, mtx) << 0;
nz = QuantizeBlock_NEON(in + 0 * 16, out + 0 * 16, mtx) << 0;
nz |= QuantizeBlock_NEON(in + 1 * 16, out + 1 * 16, mtx) << 1;
return nz;
}
#endif // !WORK_AROUND_GCC
#endif // !WORK_AROUND_GCC
#if WEBP_AARCH64
#if BPS == 32
#define DC4_VE4_HE4_TM4_NEON(dst, tbl, res, lane) \
do { \
uint8x16_t r; \
r = vqtbl2q_u8(qcombined, tbl); \
r = vreinterpretq_u8_u32( \
vsetq_lane_u32(vget_lane_u32(vreinterpret_u32_u8(res), lane), \
vreinterpretq_u32_u8(r), 1)); \
vst1q_u8(dst, r); \
#define DC4_VE4_HE4_TM4_NEON(dst, tbl, res, lane) \
do { \
uint8x16_t r; \
r = vqtbl2q_u8(qcombined, tbl); \
r = vreinterpretq_u8_u32( \
vsetq_lane_u32(vget_lane_u32(vreinterpret_u32_u8(res), lane), \
vreinterpretq_u32_u8(r), 1)); \
vst1q_u8(dst, r); \
} while (0)
#define RD4_VR4_LD4_VL4_NEON(dst, tbl) \
do { \
uint8x16_t r; \
r = vqtbl2q_u8(qcombined, tbl); \
vst1q_u8(dst, r); \
#define RD4_VR4_LD4_VL4_NEON(dst, tbl) \
do { \
uint8x16_t r; \
r = vqtbl2q_u8(qcombined, tbl); \
vst1q_u8(dst, r); \
} while (0)
static WEBP_INLINE uint8x8x2_t Vld1U8x2(const uint8_t* ptr) {
@@ -960,8 +947,7 @@ static WEBP_INLINE uint8x16x4_t Vld1qU8x4(const uint8_t* ptr) {
return vld1q_u8_x4(ptr);
#else
uint8x16x4_t res;
INIT_VECTOR4(res,
vld1q_u8(ptr + 0 * 16), vld1q_u8(ptr + 1 * 16),
INIT_VECTOR4(res, vld1q_u8(ptr + 0 * 16), vld1q_u8(ptr + 1 * 16),
vld1q_u8(ptr + 2 * 16), vld1q_u8(ptr + 3 * 16));
return res;
#endif
@@ -973,25 +959,22 @@ static void Intra4Preds_NEON(uint8_t* WEBP_RESTRICT dst,
// L K J I X A B C D E F G H
// -5 -4 -3 -2 -1 0 1 2 3 4 5 6 7
static const uint8_t kLookupTbl1[64] = {
0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 12, 12,
3, 3, 3, 3, 2, 2, 2, 2, 1, 1, 1, 1, 0, 0, 0, 0,
4, 20, 21, 22, 3, 18, 2, 17, 3, 19, 4, 20, 2, 17, 1, 16,
2, 18, 3, 19, 1, 16, 31, 31, 1, 17, 2, 18, 31, 31, 31, 31
};
0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 12, 12,
3, 3, 3, 3, 2, 2, 2, 2, 1, 1, 1, 1, 0, 0, 0, 0,
4, 20, 21, 22, 3, 18, 2, 17, 3, 19, 4, 20, 2, 17, 1, 16,
2, 18, 3, 19, 1, 16, 31, 31, 1, 17, 2, 18, 31, 31, 31, 31};
static const uint8_t kLookupTbl2[64] = {
20, 21, 22, 23, 5, 6, 7, 8, 22, 23, 24, 25, 6, 7, 8, 9,
19, 20, 21, 22, 20, 21, 22, 23, 23, 24, 25, 26, 22, 23, 24, 25,
18, 19, 20, 21, 19, 5, 6, 7, 24, 25, 26, 27, 7, 8, 9, 26,
17, 18, 19, 20, 18, 20, 21, 22, 25, 26, 27, 28, 23, 24, 25, 27
};
20, 21, 22, 23, 5, 6, 7, 8, 22, 23, 24, 25, 6, 7, 8, 9,
19, 20, 21, 22, 20, 21, 22, 23, 23, 24, 25, 26, 22, 23, 24, 25,
18, 19, 20, 21, 19, 5, 6, 7, 24, 25, 26, 27, 7, 8, 9, 26,
17, 18, 19, 20, 18, 20, 21, 22, 25, 26, 27, 28, 23, 24, 25, 27};
static const uint8_t kLookupTbl3[64] = {
30, 30, 30, 30, 0, 0, 0, 0, 21, 22, 23, 24, 19, 19, 19, 19,
30, 30, 30, 30, 0, 0, 0, 0, 21, 22, 23, 24, 18, 18, 18, 18,
30, 30, 30, 30, 0, 0, 0, 0, 21, 22, 23, 24, 17, 17, 17, 17,
30, 30, 30, 30, 0, 0, 0, 0, 21, 22, 23, 24, 16, 16, 16, 16
};
30, 30, 30, 30, 0, 0, 0, 0, 21, 22, 23, 24, 19, 19, 19, 19,
30, 30, 30, 30, 0, 0, 0, 0, 21, 22, 23, 24, 18, 18, 18, 18,
30, 30, 30, 30, 0, 0, 0, 0, 21, 22, 23, 24, 17, 17, 17, 17,
30, 30, 30, 30, 0, 0, 0, 0, 21, 22, 23, 24, 16, 16, 16, 16};
const uint8x16x4_t lookup_avgs1 = Vld1qU8x4(kLookupTbl1);
const uint8x16x4_t lookup_avgs2 = Vld1qU8x4(kLookupTbl2);
@@ -1210,7 +1193,7 @@ static void Intra16Preds_NEON(uint8_t* WEBP_RESTRICT dst,
TrueMotion_NEON(I16TM16 + dst, left, top);
}
#endif // WEBP_AARCH64
#endif // WEBP_AARCH64
//------------------------------------------------------------------------------
// Entry point

View File

@@ -14,9 +14,8 @@
#include "src/dsp/dsp.h"
#if defined(WEBP_USE_SSE2)
#include <emmintrin.h>
#include <assert.h>
#include <emmintrin.h>
#include <stdlib.h> // for abs()
#include <string.h>
@@ -50,10 +49,10 @@ static void ITransform_One_SSE2(const uint8_t* WEBP_RESTRICT ref,
// variable and the multiplication of that variable by the associated
// constant:
// (x * K) >> 16 = (x * (k + (1 << 16))) >> 16 = ((x * k ) >> 16) + x
const __m128i k1k2 = _mm_set_epi16(-30068, -30068, -30068, -30068,
20091, 20091, 20091, 20091);
const __m128i k2k1 = _mm_set_epi16(20091, 20091, 20091, 20091,
-30068, -30068, -30068, -30068);
const __m128i k1k2 =
_mm_set_epi16(-30068, -30068, -30068, -30068, 20091, 20091, 20091, 20091);
const __m128i k2k1 =
_mm_set_epi16(20091, 20091, 20091, 20091, -30068, -30068, -30068, -30068);
const __m128i zero = _mm_setzero_si128();
const __m128i zero_four = _mm_set_epi16(0, 0, 0, 0, 4, 4, 4, 4);
__m128i T01, T23;
@@ -172,7 +171,7 @@ static void ITransform_One_SSE2(const uint8_t* WEBP_RESTRICT ref,
// Unsigned saturate to 8b.
ref0123 = _mm_packus_epi16(ref01, ref23);
_mm_storeu_si128((__m128i *)buf, ref0123);
_mm_storeu_si128((__m128i*)buf, ref0123);
// Store four bytes/pixels per line.
WebPInt32ToMem(&dst[0 * BPS], buf[0]);
@@ -259,8 +258,8 @@ static void ITransform_Two_SSE2(const uint8_t* WEBP_RESTRICT ref,
// multiplications.
const __m128i four = _mm_set1_epi16(4);
const __m128i dc = _mm_add_epi16(T0, four);
const __m128i a = _mm_add_epi16(dc, T2);
const __m128i b = _mm_sub_epi16(dc, T2);
const __m128i a = _mm_add_epi16(dc, T2);
const __m128i b = _mm_sub_epi16(dc, T2);
// c = MUL(T1, K2) - MUL(T3, K1) = MUL(T1, k2) - MUL(T3, k1) + T1 - T3
const __m128i c1 = _mm_mulhi_epi16(T1, k2);
const __m128i c2 = _mm_mulhi_epi16(T3, k1);
@@ -325,8 +324,7 @@ static void ITransform_Two_SSE2(const uint8_t* WEBP_RESTRICT ref,
// Does one or two inverse transforms.
static void ITransform_SSE2(const uint8_t* WEBP_RESTRICT ref,
const int16_t* WEBP_RESTRICT in,
uint8_t* WEBP_RESTRICT dst,
int do_two) {
uint8_t* WEBP_RESTRICT dst, int do_two) {
if (do_two) {
ITransform_Two_SSE2(ref, in, dst);
} else {
@@ -336,17 +334,16 @@ static void ITransform_SSE2(const uint8_t* WEBP_RESTRICT ref,
static void FTransformPass1_SSE2(const __m128i* const in01,
const __m128i* const in23,
__m128i* const out01,
__m128i* const out32) {
__m128i* const out01, __m128i* const out32) {
const __m128i k937 = _mm_set1_epi32(937);
const __m128i k1812 = _mm_set1_epi32(1812);
const __m128i k88p = _mm_set_epi16(8, 8, 8, 8, 8, 8, 8, 8);
const __m128i k88m = _mm_set_epi16(-8, 8, -8, 8, -8, 8, -8, 8);
const __m128i k5352_2217p = _mm_set_epi16(2217, 5352, 2217, 5352,
2217, 5352, 2217, 5352);
const __m128i k5352_2217m = _mm_set_epi16(-5352, 2217, -5352, 2217,
-5352, 2217, -5352, 2217);
const __m128i k5352_2217p =
_mm_set_epi16(2217, 5352, 2217, 5352, 2217, 5352, 2217, 5352);
const __m128i k5352_2217m =
_mm_set_epi16(-5352, 2217, -5352, 2217, -5352, 2217, -5352, 2217);
// *in01 = 00 01 10 11 02 03 12 13
// *in23 = 20 21 30 31 22 23 32 33
@@ -363,19 +360,19 @@ static void FTransformPass1_SSE2(const __m128i* const in01,
// [d0 + d3 | d1 + d2 | ...] = [a0 a1 | a0' a1' | ... ]
// [d0 - d3 | d1 - d2 | ...] = [a3 a2 | a3' a2' | ... ]
const __m128i tmp0 = _mm_madd_epi16(a01, k88p); // [ (a0 + a1) << 3, ... ]
const __m128i tmp2 = _mm_madd_epi16(a01, k88m); // [ (a0 - a1) << 3, ... ]
const __m128i tmp0 = _mm_madd_epi16(a01, k88p); // [ (a0 + a1) << 3, ... ]
const __m128i tmp2 = _mm_madd_epi16(a01, k88m); // [ (a0 - a1) << 3, ... ]
const __m128i tmp1_1 = _mm_madd_epi16(a32, k5352_2217p);
const __m128i tmp3_1 = _mm_madd_epi16(a32, k5352_2217m);
const __m128i tmp1_2 = _mm_add_epi32(tmp1_1, k1812);
const __m128i tmp3_2 = _mm_add_epi32(tmp3_1, k937);
const __m128i tmp1 = _mm_srai_epi32(tmp1_2, 9);
const __m128i tmp3 = _mm_srai_epi32(tmp3_2, 9);
const __m128i s03 = _mm_packs_epi32(tmp0, tmp2);
const __m128i s12 = _mm_packs_epi32(tmp1, tmp3);
const __m128i s_lo = _mm_unpacklo_epi16(s03, s12); // 0 1 0 1 0 1...
const __m128i s_hi = _mm_unpackhi_epi16(s03, s12); // 2 3 2 3 2 3
const __m128i v23 = _mm_unpackhi_epi32(s_lo, s_hi);
const __m128i tmp1 = _mm_srai_epi32(tmp1_2, 9);
const __m128i tmp3 = _mm_srai_epi32(tmp3_2, 9);
const __m128i s03 = _mm_packs_epi32(tmp0, tmp2);
const __m128i s12 = _mm_packs_epi32(tmp1, tmp3);
const __m128i s_lo = _mm_unpacklo_epi16(s03, s12); // 0 1 0 1 0 1...
const __m128i s_hi = _mm_unpackhi_epi16(s03, s12); // 2 3 2 3 2 3
const __m128i v23 = _mm_unpackhi_epi32(s_lo, s_hi);
*out01 = _mm_unpacklo_epi32(s_lo, s_hi);
*out32 = _mm_shuffle_epi32(v23, _MM_SHUFFLE(1, 0, 3, 2)); // 3 2 3 2 3 2..
}
@@ -385,10 +382,10 @@ static void FTransformPass2_SSE2(const __m128i* const v01,
int16_t* WEBP_RESTRICT out) {
const __m128i zero = _mm_setzero_si128();
const __m128i seven = _mm_set1_epi16(7);
const __m128i k5352_2217 = _mm_set_epi16(5352, 2217, 5352, 2217,
5352, 2217, 5352, 2217);
const __m128i k2217_5352 = _mm_set_epi16(2217, -5352, 2217, -5352,
2217, -5352, 2217, -5352);
const __m128i k5352_2217 =
_mm_set_epi16(5352, 2217, 5352, 2217, 5352, 2217, 5352, 2217);
const __m128i k2217_5352 =
_mm_set_epi16(2217, -5352, 2217, -5352, 2217, -5352, 2217, -5352);
const __m128i k12000_plus_one = _mm_set1_epi32(12000 + (1 << 16));
const __m128i k51000 = _mm_set1_epi32(51000);
@@ -537,8 +534,8 @@ static void FTransformWHTRow_SSE2(const int16_t* WEBP_RESTRICT const in,
const __m128i src3 = _mm_loadl_epi64((__m128i*)&in[3 * 16]);
const __m128i A01 = _mm_unpacklo_epi16(src0, src1); // A0 A1 | ...
const __m128i A23 = _mm_unpacklo_epi16(src2, src3); // A2 A3 | ...
const __m128i B0 = _mm_adds_epi16(A01, A23); // a0 | a1 | ...
const __m128i B1 = _mm_subs_epi16(A01, A23); // a3 | a2 | ...
const __m128i B0 = _mm_adds_epi16(A01, A23); // a0 | a1 | ...
const __m128i B1 = _mm_subs_epi16(A01, A23); // a3 | a2 | ...
const __m128i C0 = _mm_unpacklo_epi32(B0, B1); // a0 | a1 | a3 | a2 | ...
const __m128i C1 = _mm_unpacklo_epi32(B1, B0); // a3 | a2 | a0 | a1 | ...
const __m128i D = _mm_unpacklo_epi64(C0, C1); // a0 a1 a3 a2 a3 a2 a0 a1
@@ -586,7 +583,7 @@ static void CollectHistogram_SSE2(const uint8_t* WEBP_RESTRICT ref,
const __m128i zero = _mm_setzero_si128();
const __m128i max_coeff_thresh = _mm_set1_epi16(MAX_COEFF_THRESH);
int j;
int distribution[MAX_COEFF_THRESH + 1] = { 0 };
int distribution[MAX_COEFF_THRESH + 1] = {0};
for (j = start_block; j < end_block; ++j) {
int16_t out[16];
int k;
@@ -600,7 +597,7 @@ static void CollectHistogram_SSE2(const uint8_t* WEBP_RESTRICT ref,
const __m128i out1 = _mm_loadu_si128((__m128i*)&out[8]);
const __m128i d0 = _mm_sub_epi16(zero, out0);
const __m128i d1 = _mm_sub_epi16(zero, out1);
const __m128i abs0 = _mm_max_epi16(out0, d0); // abs(v), 16b
const __m128i abs0 = _mm_max_epi16(out0, d0); // abs(v), 16b
const __m128i abs1 = _mm_max_epi16(out1, d1);
// v = abs(out) >> 3
const __m128i v0 = _mm_srai_epi16(abs0, 3);
@@ -927,7 +924,7 @@ static WEBP_INLINE void LD4_SSE2(uint8_t* WEBP_RESTRICT dst,
const __m128i lsb = _mm_and_si128(_mm_xor_si128(ABCDEFGH, CDEFGHH0), one);
const __m128i avg2 = _mm_subs_epu8(avg1, lsb);
const __m128i abcdefg = _mm_avg_epu8(avg2, BCDEFGH0);
WebPInt32ToMem(dst + 0 * BPS, _mm_cvtsi128_si32( abcdefg ));
WebPInt32ToMem(dst + 0 * BPS, _mm_cvtsi128_si32(abcdefg));
WebPInt32ToMem(dst + 1 * BPS, _mm_cvtsi128_si32(_mm_srli_si128(abcdefg, 1)));
WebPInt32ToMem(dst + 2 * BPS, _mm_cvtsi128_si32(_mm_srli_si128(abcdefg, 2)));
WebPInt32ToMem(dst + 3 * BPS, _mm_cvtsi128_si32(_mm_srli_si128(abcdefg, 3)));
@@ -950,8 +947,8 @@ static WEBP_INLINE void VR4_SSE2(uint8_t* WEBP_RESTRICT dst,
const __m128i lsb = _mm_and_si128(_mm_xor_si128(IXABCD, ABCD0), one);
const __m128i avg2 = _mm_subs_epu8(avg1, lsb);
const __m128i efgh = _mm_avg_epu8(avg2, XABCD);
WebPInt32ToMem(dst + 0 * BPS, _mm_cvtsi128_si32( abcd ));
WebPInt32ToMem(dst + 1 * BPS, _mm_cvtsi128_si32( efgh ));
WebPInt32ToMem(dst + 0 * BPS, _mm_cvtsi128_si32(abcd));
WebPInt32ToMem(dst + 1 * BPS, _mm_cvtsi128_si32(efgh));
WebPInt32ToMem(dst + 2 * BPS, _mm_cvtsi128_si32(_mm_slli_si128(abcd, 1)));
WebPInt32ToMem(dst + 3 * BPS, _mm_cvtsi128_si32(_mm_slli_si128(efgh, 1)));
@@ -978,8 +975,8 @@ static WEBP_INLINE void VL4_SSE2(uint8_t* WEBP_RESTRICT dst,
const __m128i avg4 = _mm_subs_epu8(avg3, lsb2);
const uint32_t extra_out =
(uint32_t)_mm_cvtsi128_si32(_mm_srli_si128(avg4, 4));
WebPInt32ToMem(dst + 0 * BPS, _mm_cvtsi128_si32( avg1 ));
WebPInt32ToMem(dst + 1 * BPS, _mm_cvtsi128_si32( avg4 ));
WebPInt32ToMem(dst + 0 * BPS, _mm_cvtsi128_si32(avg1));
WebPInt32ToMem(dst + 1 * BPS, _mm_cvtsi128_si32(avg4));
WebPInt32ToMem(dst + 2 * BPS, _mm_cvtsi128_si32(_mm_srli_si128(avg1, 1)));
WebPInt32ToMem(dst + 3 * BPS, _mm_cvtsi128_si32(_mm_srli_si128(avg4, 1)));
@@ -1000,7 +997,7 @@ static WEBP_INLINE void RD4_SSE2(uint8_t* WEBP_RESTRICT dst,
const __m128i lsb = _mm_and_si128(_mm_xor_si128(JIXABCD__, LKJIXABCD), one);
const __m128i avg2 = _mm_subs_epu8(avg1, lsb);
const __m128i abcdefg = _mm_avg_epu8(avg2, KJIXABCD_);
WebPInt32ToMem(dst + 3 * BPS, _mm_cvtsi128_si32( abcdefg ));
WebPInt32ToMem(dst + 3 * BPS, _mm_cvtsi128_si32(abcdefg));
WebPInt32ToMem(dst + 2 * BPS, _mm_cvtsi128_si32(_mm_srli_si128(abcdefg, 1)));
WebPInt32ToMem(dst + 1 * BPS, _mm_cvtsi128_si32(_mm_srli_si128(abcdefg, 2)));
WebPInt32ToMem(dst + 0 * BPS, _mm_cvtsi128_si32(_mm_srli_si128(abcdefg, 3)));
@@ -1012,14 +1009,13 @@ static WEBP_INLINE void HU4_SSE2(uint8_t* WEBP_RESTRICT dst,
const int J = top[-3];
const int K = top[-4];
const int L = top[-5];
DST(0, 0) = AVG2(I, J);
DST(0, 0) = AVG2(I, J);
DST(2, 0) = DST(0, 1) = AVG2(J, K);
DST(2, 1) = DST(0, 2) = AVG2(K, L);
DST(1, 0) = AVG3(I, J, K);
DST(1, 0) = AVG3(I, J, K);
DST(3, 0) = DST(1, 1) = AVG3(J, K, L);
DST(3, 1) = DST(1, 2) = AVG3(K, L, L);
DST(3, 2) = DST(2, 2) =
DST(0, 3) = DST(1, 3) = DST(2, 3) = DST(3, 3) = L;
DST(3, 2) = DST(2, 2) = DST(0, 3) = DST(1, 3) = DST(2, 3) = DST(3, 3) = L;
}
static WEBP_INLINE void HD4_SSE2(uint8_t* WEBP_RESTRICT dst,
@@ -1036,14 +1032,14 @@ static WEBP_INLINE void HD4_SSE2(uint8_t* WEBP_RESTRICT dst,
DST(0, 0) = DST(2, 1) = AVG2(I, X);
DST(0, 1) = DST(2, 2) = AVG2(J, I);
DST(0, 2) = DST(2, 3) = AVG2(K, J);
DST(0, 3) = AVG2(L, K);
DST(0, 3) = AVG2(L, K);
DST(3, 0) = AVG3(A, B, C);
DST(2, 0) = AVG3(X, A, B);
DST(3, 0) = AVG3(A, B, C);
DST(2, 0) = AVG3(X, A, B);
DST(1, 0) = DST(3, 1) = AVG3(I, X, A);
DST(1, 1) = DST(3, 2) = AVG3(J, I, X);
DST(1, 2) = DST(3, 3) = AVG3(K, J, I);
DST(1, 3) = AVG3(L, K, J);
DST(1, 3) = AVG3(L, K, J);
}
static WEBP_INLINE void TM4_SSE2(uint8_t* WEBP_RESTRICT dst,
@@ -1244,11 +1240,11 @@ static void Mean16x4_SSE2(const uint8_t* WEBP_RESTRICT ref, uint32_t dc[4]) {
const __m128i a1 = _mm_loadu_si128((const __m128i*)&ref[BPS * 1]);
const __m128i a2 = _mm_loadu_si128((const __m128i*)&ref[BPS * 2]);
const __m128i a3 = _mm_loadu_si128((const __m128i*)&ref[BPS * 3]);
const __m128i b0 = _mm_srli_epi16(a0, 8); // hi byte
const __m128i b0 = _mm_srli_epi16(a0, 8); // hi byte
const __m128i b1 = _mm_srli_epi16(a1, 8);
const __m128i b2 = _mm_srli_epi16(a2, 8);
const __m128i b3 = _mm_srli_epi16(a3, 8);
const __m128i c0 = _mm_and_si128(a0, mask); // lo byte
const __m128i c0 = _mm_and_si128(a0, mask); // lo byte
const __m128i c1 = _mm_and_si128(a1, mask);
const __m128i c2 = _mm_and_si128(a2, mask);
const __m128i c3 = _mm_and_si128(a3, mask);
@@ -1357,7 +1353,7 @@ static int TTransform_SSE2(const uint8_t* WEBP_RESTRICT inA,
const __m128i d1 = _mm_sub_epi16(zero, A_b2);
const __m128i d2 = _mm_sub_epi16(zero, B_b0);
const __m128i d3 = _mm_sub_epi16(zero, B_b2);
A_b0 = _mm_max_epi16(A_b0, d0); // abs(v), 16b
A_b0 = _mm_max_epi16(A_b0, d0); // abs(v), 16b
A_b2 = _mm_max_epi16(A_b2, d1);
B_b0 = _mm_max_epi16(B_b0, d2);
B_b2 = _mm_max_epi16(B_b2, d3);
@@ -1494,11 +1490,11 @@ static WEBP_INLINE int DoQuantizeBlock_SSE2(
// position instead of twelfth) and 8th values.
{
__m128i outZ0, outZ8;
outZ0 = _mm_shufflehi_epi16(out0, _MM_SHUFFLE(2, 1, 3, 0));
outZ0 = _mm_shuffle_epi32 (outZ0, _MM_SHUFFLE(3, 1, 2, 0));
outZ0 = _mm_shufflehi_epi16(out0, _MM_SHUFFLE(2, 1, 3, 0));
outZ0 = _mm_shuffle_epi32(outZ0, _MM_SHUFFLE(3, 1, 2, 0));
outZ0 = _mm_shufflehi_epi16(outZ0, _MM_SHUFFLE(3, 1, 0, 2));
outZ8 = _mm_shufflelo_epi16(out8, _MM_SHUFFLE(3, 0, 2, 1));
outZ8 = _mm_shuffle_epi32 (outZ8, _MM_SHUFFLE(3, 1, 2, 0));
outZ8 = _mm_shufflelo_epi16(out8, _MM_SHUFFLE(3, 0, 2, 1));
outZ8 = _mm_shuffle_epi32(outZ8, _MM_SHUFFLE(3, 1, 2, 0));
outZ8 = _mm_shufflelo_epi16(outZ8, _MM_SHUFFLE(1, 3, 2, 0));
_mm_storeu_si128((__m128i*)&out[0], outZ0);
_mm_storeu_si128((__m128i*)&out[8], outZ8);
@@ -1529,7 +1525,7 @@ static int Quantize2Blocks_SSE2(int16_t in[32], int16_t out[32],
const VP8Matrix* WEBP_RESTRICT const mtx) {
int nz;
const uint16_t* const sharpen = &mtx->sharpen[0];
nz = DoQuantizeBlock_SSE2(in + 0 * 16, out + 0 * 16, sharpen, mtx) << 0;
nz = DoQuantizeBlock_SSE2(in + 0 * 16, out + 0 * 16, sharpen, mtx) << 0;
nz |= DoQuantizeBlock_SSE2(in + 1 * 16, out + 1 * 16, sharpen, mtx) << 1;
return nz;
}

View File

@@ -16,7 +16,6 @@
#if defined(WEBP_USE_SSE41)
#include <emmintrin.h>
#include <smmintrin.h>
#include <stdlib.h> // for abs()
#include "src/dsp/common_sse2.h"
@@ -33,7 +32,7 @@ static void CollectHistogram_SSE41(const uint8_t* WEBP_RESTRICT ref,
VP8Histogram* WEBP_RESTRICT const histo) {
const __m128i max_coeff_thresh = _mm_set1_epi16(MAX_COEFF_THRESH);
int j;
int distribution[MAX_COEFF_THRESH + 1] = { 0 };
int distribution[MAX_COEFF_THRESH + 1] = {0};
for (j = start_block; j < end_block; ++j) {
int16_t out[16];
int k;
@@ -198,7 +197,7 @@ static int Disto16x16_SSE41(const uint8_t* WEBP_RESTRICT const a,
//
// Generates a pshufb constant for shuffling 16b words.
#define PSHUFB_CST(A,B,C,D,E,F,G,H) \
#define PSHUFB_CST(A, B, C, D, E, F, G, H) \
_mm_set_epi8(2 * (H) + 1, 2 * (H) + 0, 2 * (G) + 1, 2 * (G) + 0, \
2 * (F) + 1, 2 * (F) + 0, 2 * (E) + 1, 2 * (E) + 0, \
2 * (D) + 1, 2 * (D) + 0, 2 * (C) + 1, 2 * (C) + 0, \
@@ -321,7 +320,7 @@ static int Quantize2Blocks_SSE41(int16_t in[32], int16_t out[32],
const VP8Matrix* WEBP_RESTRICT const mtx) {
int nz;
const uint16_t* const sharpen = &mtx->sharpen[0];
nz = DoQuantizeBlock_SSE41(in + 0 * 16, out + 0 * 16, sharpen, mtx) << 0;
nz = DoQuantizeBlock_SSE41(in + 0 * 16, out + 0 * 16, sharpen, mtx) << 0;
nz |= DoQuantizeBlock_SSE41(in + 1 * 16, out + 1 * 16, sharpen, mtx) << 1;
return nz;
}

View File

@@ -22,14 +22,14 @@
//------------------------------------------------------------------------------
// Helpful macro.
#define DCHECK(in, out) \
do { \
assert((in) != NULL); \
assert((out) != NULL); \
assert((in) != (out)); \
assert(width > 0); \
assert(height > 0); \
assert(stride >= width); \
#define DCHECK(in, out) \
do { \
assert((in) != NULL); \
assert((out) != NULL); \
assert((in) != (out)); \
assert(width > 0); \
assert(height > 0); \
assert(stride >= width); \
} while (0)
#if !WEBP_NEON_OMIT_C_CODE
@@ -124,8 +124,7 @@ static WEBP_INLINE void DoGradientFilter_C(const uint8_t* WEBP_RESTRICT in,
// leftmost pixel: predict from above.
PredictLine_C(in, preds - stride, out, 1);
for (w = 1; w < width; ++w) {
const int pred = GradientPredictor_C(preds[w - 1],
preds[w - stride],
const int pred = GradientPredictor_C(preds[w - 1], preds[w - stride],
preds[w - stride - 1]);
out[w] = (uint8_t)(in[w] - pred);
}
@@ -141,20 +140,20 @@ static WEBP_INLINE void DoGradientFilter_C(const uint8_t* WEBP_RESTRICT in,
//------------------------------------------------------------------------------
#if !WEBP_NEON_OMIT_C_CODE
static void HorizontalFilter_C(const uint8_t* WEBP_RESTRICT data,
int width, int height, int stride,
static void HorizontalFilter_C(const uint8_t* WEBP_RESTRICT data, int width,
int height, int stride,
uint8_t* WEBP_RESTRICT filtered_data) {
DoHorizontalFilter_C(data, width, height, stride, filtered_data);
}
static void VerticalFilter_C(const uint8_t* WEBP_RESTRICT data,
int width, int height, int stride,
static void VerticalFilter_C(const uint8_t* WEBP_RESTRICT data, int width,
int height, int stride,
uint8_t* WEBP_RESTRICT filtered_data) {
DoVerticalFilter_C(data, width, height, stride, filtered_data);
}
static void GradientFilter_C(const uint8_t* WEBP_RESTRICT data,
int width, int height, int stride,
static void GradientFilter_C(const uint8_t* WEBP_RESTRICT data, int width,
int height, int stride,
uint8_t* WEBP_RESTRICT filtered_data) {
DoGradientFilter_C(data, width, height, stride, filtered_data);
}
@@ -162,8 +161,8 @@ static void GradientFilter_C(const uint8_t* WEBP_RESTRICT data,
//------------------------------------------------------------------------------
static void NoneUnfilter_C(const uint8_t* prev, const uint8_t* in,
uint8_t* out, int width) {
static void NoneUnfilter_C(const uint8_t* prev, const uint8_t* in, uint8_t* out,
int width) {
(void)prev;
if (out != in) memcpy(out, in, width * sizeof(*out));
}

View File

@@ -16,30 +16,33 @@
#if defined(WEBP_USE_MIPS_DSP_R2)
#include "src/dsp/dsp.h"
#include <assert.h>
#include <stdlib.h>
#include <string.h>
#include "src/dsp/dsp.h"
//------------------------------------------------------------------------------
// Helpful macro.
#define DCHECK(in, out) \
do { \
assert((in) != NULL); \
assert((out) != NULL); \
assert((in) != (out)); \
assert(width > 0); \
assert(height > 0); \
assert(stride >= width); \
#define DCHECK(in, out) \
do { \
assert((in) != NULL); \
assert((out) != NULL); \
assert((in) != (out)); \
assert(width > 0); \
assert(height > 0); \
assert(stride >= width); \
} while (0)
#define DO_PREDICT_LINE(SRC, DST, LENGTH, INVERSE) do { \
// clang-format off
#define DO_PREDICT_LINE(SRC, DST, LENGTH, INVERSE) \
do { \
const uint8_t* psrc = (uint8_t*)(SRC); \
uint8_t* pdst = (uint8_t*)(DST); \
const int ilength = (int)(LENGTH); \
int temp0, temp1, temp2, temp3, temp4, temp5, temp6; \
__asm__ volatile ( \
__asm__ volatile( \
".set push \n\t" \
".set noreorder \n\t" \
"srl %[temp0], %[length], 2 \n\t" \
@@ -101,6 +104,7 @@
: "memory" \
); \
} while (0)
// clang-format on
static WEBP_INLINE void PredictLine_MIPSdspR2(const uint8_t* WEBP_RESTRICT src,
uint8_t* WEBP_RESTRICT dst,
@@ -108,13 +112,15 @@ static WEBP_INLINE void PredictLine_MIPSdspR2(const uint8_t* WEBP_RESTRICT src,
DO_PREDICT_LINE(src, dst, length, 0);
}
#define DO_PREDICT_LINE_VERTICAL(SRC, PRED, DST, LENGTH, INVERSE) do { \
// clang-format off
#define DO_PREDICT_LINE_VERTICAL(SRC, PRED, DST, LENGTH, INVERSE) \
do { \
const uint8_t* psrc = (uint8_t*)(SRC); \
const uint8_t* ppred = (uint8_t*)(PRED); \
uint8_t* pdst = (uint8_t*)(DST); \
const int ilength = (int)(LENGTH); \
int temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7; \
__asm__ volatile ( \
__asm__ volatile( \
".set push \n\t" \
".set noreorder \n\t" \
"srl %[temp0], %[length], 0x3 \n\t" \
@@ -167,9 +173,10 @@ static WEBP_INLINE void PredictLine_MIPSdspR2(const uint8_t* WEBP_RESTRICT src,
); \
} while (0)
#define PREDICT_LINE_ONE_PASS(SRC, PRED, DST) do { \
#define PREDICT_LINE_ONE_PASS(SRC, PRED, DST) \
do { \
int temp1, temp2, temp3; \
__asm__ volatile ( \
__asm__ volatile( \
"lbu %[temp1], 0(%[src]) \n\t" \
"lbu %[temp2], 0(%[pred]) \n\t" \
"subu %[temp3], %[temp1], %[temp2] \n\t" \
@@ -179,18 +186,20 @@ static WEBP_INLINE void PredictLine_MIPSdspR2(const uint8_t* WEBP_RESTRICT src,
: "memory" \
); \
} while (0)
// clang-format on
//------------------------------------------------------------------------------
// Horizontal filter.
#define FILTER_LINE_BY_LINE do { \
for (row = 1; row < height; ++row) { \
PREDICT_LINE_ONE_PASS(in, preds - stride, out); \
DO_PREDICT_LINE(in + 1, out + 1, width - 1, 0); \
preds += stride; \
in += stride; \
out += stride; \
} \
#define FILTER_LINE_BY_LINE \
do { \
for (row = 1; row < height; ++row) { \
PREDICT_LINE_ONE_PASS(in, preds - stride, out); \
DO_PREDICT_LINE(in + 1, out + 1, width - 1, 0); \
preds += stride; \
in += stride; \
out += stride; \
} \
} while (0)
static WEBP_INLINE void DoHorizontalFilter_MIPSdspR2(
@@ -221,13 +230,14 @@ static void HorizontalFilter_MIPSdspR2(const uint8_t* WEBP_RESTRICT data,
//------------------------------------------------------------------------------
// Vertical filter.
#define FILTER_LINE_BY_LINE do { \
for (row = 1; row < height; ++row) { \
DO_PREDICT_LINE_VERTICAL(in, preds, out, width, 0); \
preds += stride; \
in += stride; \
out += stride; \
} \
#define FILTER_LINE_BY_LINE \
do { \
for (row = 1; row < height; ++row) { \
DO_PREDICT_LINE_VERTICAL(in, preds, out, width, 0); \
preds += stride; \
in += stride; \
out += stride; \
} \
} while (0)
static WEBP_INLINE void DoVerticalFilter_MIPSdspR2(
@@ -260,31 +270,30 @@ static void VerticalFilter_MIPSdspR2(const uint8_t* WEBP_RESTRICT data,
static int GradientPredictor_MIPSdspR2(uint8_t a, uint8_t b, uint8_t c) {
int temp0;
__asm__ volatile (
"addu %[temp0], %[a], %[b] \n\t"
"subu %[temp0], %[temp0], %[c] \n\t"
"shll_s.w %[temp0], %[temp0], 23 \n\t"
"precrqu_s.qb.ph %[temp0], %[temp0], $zero \n\t"
"srl %[temp0], %[temp0], 24 \n\t"
: [temp0]"=&r"(temp0)
: [a]"r"(a),[b]"r"(b),[c]"r"(c)
);
__asm__ volatile(
"addu %[temp0], %[a], %[b] \n\t"
"subu %[temp0], %[temp0], %[c] \n\t"
"shll_s.w %[temp0], %[temp0], 23 \n\t"
"precrqu_s.qb.ph %[temp0], %[temp0], $zero \n\t"
"srl %[temp0], %[temp0], 24 \n\t"
: [temp0] "=&r"(temp0)
: [a] "r"(a), [b] "r"(b), [c] "r"(c));
return temp0;
}
#define FILTER_LINE_BY_LINE(PREDS, OPERATION) do { \
for (row = 1; row < height; ++row) { \
int w; \
PREDICT_LINE_ONE_PASS(in, PREDS - stride, out); \
for (w = 1; w < width; ++w) { \
const int pred = GradientPredictor_MIPSdspR2(PREDS[w - 1], \
PREDS[w - stride], \
PREDS[w - stride - 1]); \
out[w] = in[w] OPERATION pred; \
} \
in += stride; \
out += stride; \
} \
#define FILTER_LINE_BY_LINE(PREDS, OPERATION) \
do { \
for (row = 1; row < height; ++row) { \
int w; \
PREDICT_LINE_ONE_PASS(in, PREDS - stride, out); \
for (w = 1; w < width; ++w) { \
const int pred = GradientPredictor_MIPSdspR2( \
PREDS[w - 1], PREDS[w - stride], PREDS[w - stride - 1]); \
out[w] = in[w] OPERATION pred; \
} \
in += stride; \
out += stride; \
} \
} while (0)
static void DoGradientFilter_MIPSdspR2(const uint8_t* WEBP_RESTRICT in,
@@ -316,8 +325,8 @@ static void GradientFilter_MIPSdspR2(const uint8_t* WEBP_RESTRICT data,
static void HorizontalUnfilter_MIPSdspR2(const uint8_t* prev, const uint8_t* in,
uint8_t* out, int width) {
out[0] = in[0] + (prev == NULL ? 0 : prev[0]);
DO_PREDICT_LINE(in + 1, out + 1, width - 1, 1);
out[0] = in[0] + (prev == NULL ? 0 : prev[0]);
DO_PREDICT_LINE(in + 1, out + 1, width - 1, 1);
}
static void VerticalUnfilter_MIPSdspR2(const uint8_t* prev, const uint8_t* in,

View File

@@ -15,10 +15,10 @@
#if defined(WEBP_USE_MSA)
#include "src/dsp/msa_macro.h"
#include <assert.h>
#include "src/dsp/msa_macro.h"
static WEBP_INLINE void PredictLineInverse0(const uint8_t* src,
const uint8_t* pred,
uint8_t* WEBP_RESTRICT dst,
@@ -57,21 +57,21 @@ static WEBP_INLINE void PredictLineInverse0(const uint8_t* src,
//------------------------------------------------------------------------------
// Helpful macro.
#define DCHECK(in, out) \
do { \
assert((in) != NULL); \
assert((out) != NULL); \
assert((in) != (out)); \
assert(width > 0); \
assert(height > 0); \
assert(stride >= width); \
#define DCHECK(in, out) \
do { \
assert((in) != NULL); \
assert((out) != NULL); \
assert((in) != (out)); \
assert(width > 0); \
assert(height > 0); \
assert(stride >= width); \
} while (0)
//------------------------------------------------------------------------------
// Horrizontal filter
static void HorizontalFilter_MSA(const uint8_t* WEBP_RESTRICT data,
int width, int height, int stride,
static void HorizontalFilter_MSA(const uint8_t* WEBP_RESTRICT data, int width,
int height, int stride,
uint8_t* WEBP_RESTRICT filtered_data) {
const uint8_t* preds = data;
const uint8_t* in = data;
@@ -105,7 +105,7 @@ static WEBP_INLINE void PredictLineGradient(const uint8_t* pinput,
uint8_t* WEBP_RESTRICT poutput,
int stride, int size) {
int w;
const v16i8 zero = { 0 };
const v16i8 zero = {0};
while (size >= 16) {
v16u8 pred0, dst0;
v8i16 a0, a1, b0, b1, c0, c1;
@@ -133,9 +133,8 @@ static WEBP_INLINE void PredictLineGradient(const uint8_t* pinput,
}
}
static void GradientFilter_MSA(const uint8_t* WEBP_RESTRICT data,
int width, int height, int stride,
static void GradientFilter_MSA(const uint8_t* WEBP_RESTRICT data, int width,
int height, int stride,
uint8_t* WEBP_RESTRICT filtered_data) {
const uint8_t* in = data;
const uint8_t* preds = data;
@@ -151,7 +150,7 @@ static void GradientFilter_MSA(const uint8_t* WEBP_RESTRICT data,
out += stride;
// Filter line-by-line.
while (row < height) {
out[0] = in[0] - preds[- stride];
out[0] = in[0] - preds[-stride];
PredictLineGradient(preds + 1, in + 1, out + 1, stride, width - 1);
++row;
preds += stride;
@@ -163,8 +162,8 @@ static void GradientFilter_MSA(const uint8_t* WEBP_RESTRICT data,
//------------------------------------------------------------------------------
// Vertical filter
static void VerticalFilter_MSA(const uint8_t* WEBP_RESTRICT data,
int width, int height, int stride,
static void VerticalFilter_MSA(const uint8_t* WEBP_RESTRICT data, int width,
int height, int stride,
uint8_t* WEBP_RESTRICT filtered_data) {
const uint8_t* in = data;
const uint8_t* preds = data;

Some files were not shown because too many files have changed in this diff Show More