mirror of
https://github.com/webmproject/libwebp.git
synced 2025-04-05 08:26:51 +02:00
Compare commits
2 Commits
613be8fc61
...
7c70ff7a3b
Author | SHA1 | Date | |
---|---|---|---|
|
7c70ff7a3b | ||
|
9dd5ae819b |
@ -13,15 +13,21 @@
|
||||
// Jyrki Alakuijala (jyrki@google.com)
|
||||
// Urvang Joshi (urvang@google.com)
|
||||
|
||||
#include "src/dsp/dsp.h"
|
||||
#include "src/dsp/lossless.h"
|
||||
|
||||
#include <assert.h>
|
||||
#include <math.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "src/dec/vp8li_dec.h"
|
||||
#include "src/utils/endian_inl_utils.h"
|
||||
#include "src/dsp/lossless.h"
|
||||
#include "src/dsp/cpu.h"
|
||||
#include "src/dsp/dsp.h"
|
||||
#include "src/dsp/lossless_common.h"
|
||||
#include "src/utils/endian_inl_utils.h"
|
||||
#include "src/utils/utils.h"
|
||||
#include "src/webp/decode.h"
|
||||
#include "src/webp/format_constants.h"
|
||||
#include "src/webp/types.h"
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Image transforms.
|
||||
|
@ -13,16 +13,19 @@
|
||||
// Jyrki Alakuijala (jyrki@google.com)
|
||||
// Urvang Joshi (urvang@google.com)
|
||||
|
||||
#include "src/dsp/dsp.h"
|
||||
|
||||
#include <assert.h>
|
||||
#include <math.h>
|
||||
#include <stdlib.h>
|
||||
#include "src/dec/vp8li_dec.h"
|
||||
#include "src/utils/endian_inl_utils.h"
|
||||
#include <string.h>
|
||||
|
||||
#include "src/dsp/cpu.h"
|
||||
#include "src/dsp/dsp.h"
|
||||
#include "src/dsp/lossless.h"
|
||||
#include "src/dsp/lossless_common.h"
|
||||
#include "src/dsp/yuv.h"
|
||||
#include "src/enc/histogram_enc.h"
|
||||
#include "src/utils/utils.h"
|
||||
#include "src/webp/format_constants.h"
|
||||
#include "src/webp/types.h"
|
||||
|
||||
// lookup table for small values of log2(int) * (1 << LOG_2_PRECISION_BITS).
|
||||
// Obtained in Python with:
|
||||
|
@ -14,11 +14,15 @@
|
||||
#include "src/dsp/dsp.h"
|
||||
|
||||
#if defined(WEBP_USE_SSE2)
|
||||
|
||||
#include <assert.h>
|
||||
#include <emmintrin.h>
|
||||
|
||||
#include "src/dsp/cpu.h"
|
||||
#include "src/dsp/lossless.h"
|
||||
#include "src/dsp/common_sse2.h"
|
||||
#include "src/dsp/lossless_common.h"
|
||||
#include "src/utils/utils.h"
|
||||
#include "src/webp/types.h"
|
||||
|
||||
// For sign-extended multiplying constants, pre-shifted by 5:
|
||||
#define CST_5b(X) (((int16_t)((uint16_t)(X) << 8)) >> 5)
|
||||
@ -645,25 +649,43 @@ static void PredictorSub13_SSE2(const uint32_t* in, const uint32_t* upper,
|
||||
int num_pixels, uint32_t* WEBP_RESTRICT out) {
|
||||
int i;
|
||||
const __m128i zero = _mm_setzero_si128();
|
||||
for (i = 0; i + 2 <= num_pixels; i += 2) {
|
||||
// we can only process two pixels at a time
|
||||
const __m128i L = _mm_loadl_epi64((const __m128i*)&in[i - 1]);
|
||||
const __m128i src = _mm_loadl_epi64((const __m128i*)&in[i]);
|
||||
const __m128i T = _mm_loadl_epi64((const __m128i*)&upper[i]);
|
||||
const __m128i TL = _mm_loadl_epi64((const __m128i*)&upper[i - 1]);
|
||||
const __m128i L_lo = _mm_unpacklo_epi8(L, zero);
|
||||
const __m128i T_lo = _mm_unpacklo_epi8(T, zero);
|
||||
const __m128i TL_lo = _mm_unpacklo_epi8(TL, zero);
|
||||
const __m128i sum = _mm_add_epi16(T_lo, L_lo);
|
||||
const __m128i avg = _mm_srli_epi16(sum, 1);
|
||||
const __m128i A1 = _mm_sub_epi16(avg, TL_lo);
|
||||
const __m128i bit_fix = _mm_cmpgt_epi16(TL_lo, avg);
|
||||
const __m128i A2 = _mm_sub_epi16(A1, bit_fix);
|
||||
const __m128i A3 = _mm_srai_epi16(A2, 1);
|
||||
const __m128i A4 = _mm_add_epi16(avg, A3);
|
||||
const __m128i pred = _mm_packus_epi16(A4, A4);
|
||||
const __m128i res = _mm_sub_epi8(src, pred);
|
||||
_mm_storel_epi64((__m128i*)&out[i], res);
|
||||
for (i = 0; i + 4 <= num_pixels; i += 4) {
|
||||
const __m128i L = _mm_loadu_si128((const __m128i*)&in[i - 1]);
|
||||
const __m128i src = _mm_loadu_si128((const __m128i*)&in[i]);
|
||||
const __m128i T = _mm_loadu_si128((const __m128i*)&upper[i]);
|
||||
const __m128i TL = _mm_loadu_si128((const __m128i*)&upper[i - 1]);
|
||||
__m128i A4_lo, A4_hi;
|
||||
// lo.
|
||||
{
|
||||
const __m128i L_lo = _mm_unpacklo_epi8(L, zero);
|
||||
const __m128i T_lo = _mm_unpacklo_epi8(T, zero);
|
||||
const __m128i TL_lo = _mm_unpacklo_epi8(TL, zero);
|
||||
const __m128i sum_lo = _mm_add_epi16(T_lo, L_lo);
|
||||
const __m128i avg_lo = _mm_srli_epi16(sum_lo, 1);
|
||||
const __m128i A1_lo = _mm_sub_epi16(avg_lo, TL_lo);
|
||||
const __m128i bit_fix_lo = _mm_cmpgt_epi16(TL_lo, avg_lo);
|
||||
const __m128i A2_lo = _mm_sub_epi16(A1_lo, bit_fix_lo);
|
||||
const __m128i A3_lo = _mm_srai_epi16(A2_lo, 1);
|
||||
A4_lo = _mm_add_epi16(avg_lo, A3_lo);
|
||||
}
|
||||
// hi.
|
||||
{
|
||||
const __m128i L_hi = _mm_unpackhi_epi8(L, zero);
|
||||
const __m128i T_hi = _mm_unpackhi_epi8(T, zero);
|
||||
const __m128i TL_hi = _mm_unpackhi_epi8(TL, zero);
|
||||
const __m128i sum_hi = _mm_add_epi16(T_hi, L_hi);
|
||||
const __m128i avg_hi = _mm_srli_epi16(sum_hi, 1);
|
||||
const __m128i A1_hi = _mm_sub_epi16(avg_hi, TL_hi);
|
||||
const __m128i bit_fix_hi = _mm_cmpgt_epi16(TL_hi, avg_hi);
|
||||
const __m128i A2_hi = _mm_sub_epi16(A1_hi, bit_fix_hi);
|
||||
const __m128i A3_hi = _mm_srai_epi16(A2_hi, 1);
|
||||
A4_hi = _mm_add_epi16(avg_hi, A3_hi);
|
||||
}
|
||||
{
|
||||
const __m128i pred = _mm_packus_epi16(A4_lo, A4_hi);
|
||||
const __m128i res = _mm_sub_epi8(src, pred);
|
||||
_mm_storeu_si128((__m128i*)&out[i], res);
|
||||
}
|
||||
}
|
||||
if (i != num_pixels) {
|
||||
VP8LPredictorsSub_C[13](in + i, upper + i, num_pixels - i, out + i);
|
||||
|
@ -14,9 +14,13 @@
|
||||
#include "src/dsp/dsp.h"
|
||||
|
||||
#if defined(WEBP_USE_SSE41)
|
||||
|
||||
#include <assert.h>
|
||||
#include <smmintrin.h>
|
||||
|
||||
#include "src/dsp/cpu.h"
|
||||
#include "src/dsp/lossless.h"
|
||||
#include "src/webp/types.h"
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Cost operations.
|
||||
|
@ -15,10 +15,13 @@
|
||||
|
||||
#if defined(WEBP_USE_SSE2)
|
||||
|
||||
#include <emmintrin.h>
|
||||
|
||||
#include "src/dsp/common_sse2.h"
|
||||
#include "src/dsp/cpu.h"
|
||||
#include "src/dsp/lossless.h"
|
||||
#include "src/dsp/lossless_common.h"
|
||||
#include <emmintrin.h>
|
||||
#include "src/webp/types.h"
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Predictor Transform
|
||||
|
@ -13,9 +13,10 @@
|
||||
|
||||
#if defined(WEBP_USE_SSE41)
|
||||
|
||||
#include "src/dsp/common_sse41.h"
|
||||
#include <smmintrin.h>
|
||||
|
||||
#include "src/dsp/cpu.h"
|
||||
#include "src/dsp/lossless.h"
|
||||
#include "src/dsp/lossless_common.h"
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Color-space conversion functions
|
||||
|
Loading…
x
Reference in New Issue
Block a user