Add MSA optimized YUV to RGB upsampling functions

We add the following MSA optimized YUV to RGB upsampling functions: - UpsampleRgbLinePair - UpsampleBgrLinePair - UpsampleRgbaLinePair - UpsampleBgraLinePair - UpsampleArgbLinePair - UpsampleRgba4444LinePair - UpsampleRgb565LinePair Change-Id: I7264a615edc7eb376e443e9d38bd8e3c9a2cab1f
2025-07-15 21:39:59 +02:00 · 2016-07-21 20:00:44 +05:30
parent ebee57f4d1
commit d3ddacb625
8 changed files with 743 additions and 0 deletions
--- a/src/dsp/msa_macro.h
+++ b/src/dsp/msa_macro.h
@ -28,6 +28,9 @@
  #define SRAI_H(a, b)  __msa_srai_h((v8i16)a, b)
  #define SRAI_W(a, b)  __msa_srai_w((v4i32)a, b)
  #define SRLI_H(a, b)  __msa_srli_h((v8i16)a, b)
+  #define SLLI_B(a, b)  __msa_slli_b((v4i32)a, b)
+  #define ANDI_B(a, b)  __msa_andi_b((v16u8)a, b)
+  #define ORI_B(a, b)   __msa_ori_b((v16u8)a, b)
 #else
  #define ADDVI_H(a, b)  (a + b)
  #define ADDVI_W(a, b)  (a + b)
@ -35,6 +38,9 @@
  #define SRAI_H(a, b)  (a >> b)
  #define SRAI_W(a, b)  (a >> b)
  #define SRLI_H(a, b)  (a << b)
+  #define SLLI_B(a, b)  (a << b)
+  #define ANDI_B(a, b)  (a & b)
+  #define ORI_B(a, b)   (a | b)
 #endif

 #define LD_B(RTYPE, psrc) *((RTYPE*)(psrc))
@ -535,6 +541,23 @@
  CLIP_SH2_0_255(in2, in3);                      \
 } while (0)

+/* Description : Clips all unsigned halfword elements of input vector
+ *               between 0 & 255
+ * Arguments   : Input  - in
+ *               Output - out_m
+ *               Return Type - unsigned halfword
+ */
+#define CLIP_UH_0_255(in) do {                    \
+  const v8u16 max_m = (v8u16)__msa_ldi_h(255);    \
+  in = __msa_maxi_u_h((v8u16) in, 0);             \
+  in = __msa_min_u_h((v8u16) max_m, (v8u16) in);  \
+} while (0)
+
+#define CLIP_UH2_0_255(in0, in1) do {  \
+  CLIP_UH_0_255(in0);                  \
+  CLIP_UH_0_255(in1);                  \
+} while (0)
+
 /* Description : Clips all signed word elements of input vector
 *               between 0 & 255
 * Arguments   : Input/output  - val
@ -1000,6 +1023,23 @@ static WEBP_INLINE uint32_t func_hadd_uh_u32(v8u16 in) {
 #define PCKEV_W2_SW(...) PCKEV_W2(v4i32, __VA_ARGS__)
 #define PCKEV_W2_UW(...) PCKEV_W2(v4u32, __VA_ARGS__)

+/* Description : Pack odd halfword elements of vector pairs
+ * Arguments   : Inputs  - in0, in1, in2, in3
+ *               Outputs - out0, out1
+ *               Return Type - as per RTYPE
+ * Details     : Odd halfword elements of 'in0' are copied to the left half of
+ *               'out0' & odd halfword elements of 'in1' are copied to the
+ *               right half of 'out0'.
+ */
+#define PCKOD_H2(RTYPE, in0, in1, in2, in3, out0, out1) do {  \
+  out0 = (RTYPE)__msa_pckod_h((v8i16)in0, (v8i16)in1);        \
+  out1 = (RTYPE)__msa_pckod_h((v8i16)in2, (v8i16)in3);        \
+} while (0)
+#define PCKOD_H2_UH(...) PCKOD_H2(v8u16, __VA_ARGS__)
+#define PCKOD_H2_SH(...) PCKOD_H2(v8i16, __VA_ARGS__)
+#define PCKOD_H2_SW(...) PCKOD_H2(v4i32, __VA_ARGS__)
+#define PCKOD_H2_UW(...) PCKOD_H2(v4u32, __VA_ARGS__)
+
 /* Description : Arithmetic immediate shift right all elements of word vector
 * Arguments   : Inputs  - in0, in1, shift
 *               Outputs - in place operation
@ -1333,4 +1373,18 @@ static WEBP_INLINE uint32_t func_hadd_uh_u32(v8u16 in) {
  ST4x4_UB(tmp0_m, tmp1_m, 0, 2, 0, 2, pdst, stride);          \
 } while (0)

+/* Description : average with rounding (in0 + in1 + 1) / 2.
+ * Arguments   : Inputs  - in0, in1, in2, in3,
+ *               Outputs - out0, out1
+ *               Return Type - as per RTYPE
+ * Details     : Each unsigned byte element from 'in0' vector is added with
+ *               each unsigned byte element from 'in1' vector. Then the average
+ *               with rounding is calculated and written to 'out0'
+ */
+#define AVER_UB2(RTYPE, in0, in1, in2, in3, out0, out1) do {  \
+  out0 = (RTYPE)__msa_aver_u_b((v16u8)in0, (v16u8)in1);       \
+  out1 = (RTYPE)__msa_aver_u_b((v16u8)in2, (v16u8)in3);       \
+} while (0)
+#define AVER_UB2_UB(...) AVER_UB2(v16u8, __VA_ARGS__)
+
 #endif  /* WEBP_DSP_MSA_MACRO_H_ */