mirror of
				https://github.com/webmproject/libwebp.git
				synced 2025-10-31 10:25:46 +01:00 
			
		
		
		
	MIPS: dspr2: added optimizaton for functions VE4 and DC4
Change-Id: I118adc6d3872742d8b1f9dbac438cba6fc90b7a9
This commit is contained in:
		| @@ -612,6 +612,65 @@ static void SimpleHFilter16i(uint8_t* p, int stride, int thresh) { | |||||||
|   } |   } | ||||||
| } | } | ||||||
|  |  | ||||||
|  | static void VE4(uint8_t* dst) {    // vertical | ||||||
|  |   const uint8_t* top = dst - BPS; | ||||||
|  |   int temp0, temp1, temp2, temp3, temp4, temp5, temp6; | ||||||
|  |   __asm__ volatile ( | ||||||
|  |     "ulw             %[temp0],   -1(%[top])              \n\t" | ||||||
|  |     "ulh             %[temp1],   3(%[top])               \n\t" | ||||||
|  |     "preceu.ph.qbr   %[temp2],   %[temp0]                \n\t" | ||||||
|  |     "preceu.ph.qbl   %[temp3],   %[temp0]                \n\t" | ||||||
|  |     "preceu.ph.qbr   %[temp4],   %[temp1]                \n\t" | ||||||
|  |     "packrl.ph       %[temp5],   %[temp3],    %[temp2]   \n\t" | ||||||
|  |     "packrl.ph       %[temp6],   %[temp4],    %[temp3]   \n\t" | ||||||
|  |     "shll.ph         %[temp5],   %[temp5],    1          \n\t" | ||||||
|  |     "shll.ph         %[temp6],   %[temp6],    1          \n\t" | ||||||
|  |     "addq.ph         %[temp2],   %[temp5],    %[temp2]   \n\t" | ||||||
|  |     "addq.ph         %[temp6],   %[temp6],    %[temp4]   \n\t" | ||||||
|  |     "addq.ph         %[temp2],   %[temp2],    %[temp3]   \n\t" | ||||||
|  |     "addq.ph         %[temp6],   %[temp6],    %[temp3]   \n\t" | ||||||
|  |     "shra_r.ph       %[temp2],   %[temp2],    2          \n\t" | ||||||
|  |     "shra_r.ph       %[temp6],   %[temp6],    2          \n\t" | ||||||
|  |     "precr.qb.ph     %[temp4],   %[temp6],    %[temp2]   \n\t" | ||||||
|  |     "usw             %[temp4],   0*"XSTR(BPS)"(%[dst])   \n\t" | ||||||
|  |     "usw             %[temp4],   1*"XSTR(BPS)"(%[dst])   \n\t" | ||||||
|  |     "usw             %[temp4],   2*"XSTR(BPS)"(%[dst])   \n\t" | ||||||
|  |     "usw             %[temp4],   3*"XSTR(BPS)"(%[dst])   \n\t" | ||||||
|  |     : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2), | ||||||
|  |       [temp3]"=&r"(temp3), [temp4]"=&r"(temp4), [temp5]"=&r"(temp5), | ||||||
|  |       [temp6]"=&r"(temp6) | ||||||
|  |     : [top]"r"(top), [dst]"r"(dst) | ||||||
|  |     : "memory" | ||||||
|  |   ); | ||||||
|  | } | ||||||
|  |  | ||||||
|  | static void DC4(uint8_t* dst) {   // DC | ||||||
|  |   int temp0, temp1, temp2, temp3, temp4; | ||||||
|  |   __asm__ volatile ( | ||||||
|  |     "ulw          %[temp0],   -1*"XSTR(BPS)"(%[dst])   \n\t" | ||||||
|  |     "lbu          %[temp1],   -1+0*"XSTR(BPS)"(%[dst]) \n\t" | ||||||
|  |     "lbu          %[temp2],   -1+1*"XSTR(BPS)"(%[dst]) \n\t" | ||||||
|  |     "lbu          %[temp3],   -1+2*"XSTR(BPS)"(%[dst]) \n\t" | ||||||
|  |     "lbu          %[temp4],   -1+3*"XSTR(BPS)"(%[dst]) \n\t" | ||||||
|  |     "ins          %[temp1],   %[temp2],    8,     8    \n\t" | ||||||
|  |     "ins          %[temp1],   %[temp3],    16,    8    \n\t" | ||||||
|  |     "ins          %[temp1],   %[temp4],    24,    8    \n\t" | ||||||
|  |     "raddu.w.qb   %[temp0],   %[temp0]                 \n\t" | ||||||
|  |     "raddu.w.qb   %[temp1],   %[temp1]                 \n\t" | ||||||
|  |     "addu         %[temp0],   %[temp0],    %[temp1]    \n\t" | ||||||
|  |     "shra_r.w     %[temp0],   %[temp0],    3           \n\t" | ||||||
|  |     "replv.qb     %[temp0],   %[temp0]                 \n\t" | ||||||
|  |     "usw          %[temp0],   0*"XSTR(BPS)"(%[dst])    \n\t" | ||||||
|  |     "usw          %[temp0],   1*"XSTR(BPS)"(%[dst])    \n\t" | ||||||
|  |     "usw          %[temp0],   2*"XSTR(BPS)"(%[dst])    \n\t" | ||||||
|  |     "usw          %[temp0],   3*"XSTR(BPS)"(%[dst])    \n\t" | ||||||
|  |     : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2), | ||||||
|  |       [temp3]"=&r"(temp3), [temp4]"=&r"(temp4) | ||||||
|  |     : [dst]"r"(dst) | ||||||
|  |     : "memory" | ||||||
|  |   ); | ||||||
|  | } | ||||||
|  |  | ||||||
| #endif  // WEBP_USE_MIPS_DSP_R2 | #endif  // WEBP_USE_MIPS_DSP_R2 | ||||||
|  |  | ||||||
| //------------------------------------------------------------------------------ | //------------------------------------------------------------------------------ | ||||||
| @@ -636,5 +695,7 @@ WEBP_TSAN_IGNORE_FUNCTION void VP8DspInitMIPSdspR2(void) { | |||||||
|   VP8SimpleHFilter16 = SimpleHFilter16; |   VP8SimpleHFilter16 = SimpleHFilter16; | ||||||
|   VP8SimpleVFilter16i = SimpleVFilter16i; |   VP8SimpleVFilter16i = SimpleVFilter16i; | ||||||
|   VP8SimpleHFilter16i = SimpleHFilter16i; |   VP8SimpleHFilter16i = SimpleHFilter16i; | ||||||
|  |   VP8PredLuma4[0] = DC4; | ||||||
|  |   VP8PredLuma4[2] = VE4; | ||||||
| #endif | #endif | ||||||
| } | } | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user