diff --git a/src/dsp/dec_mips32.c b/src/dsp/dec_mips32.c index e85bbb15..4e9ef426 100644 --- a/src/dsp/dec_mips32.c +++ b/src/dsp/dec_mips32.c @@ -391,7 +391,7 @@ static void TransformOne(const int16_t* in, uint8_t* dst) { "sra %[temp7], %[temp7], 3 \n\t" "sra %[temp4], %[temp4], 3 \n\t" "addiu %[temp6], $zero, 255 \n\t" - "lbu %[temp1], 0+0*"XSTR(BPS)"(%[dst]) \n\t" + "lbu %[temp1], 0+0*" XSTR(BPS) "(%[dst]) \n\t" "addu %[temp1], %[temp1], %[temp5] \n\t" "sra %[temp5], %[temp1], 8 \n\t" "sra %[temp18], %[temp1], 31 \n\t" @@ -399,8 +399,8 @@ static void TransformOne(const int16_t* in, uint8_t* dst) { "xor %[temp1], %[temp1], %[temp1] \n\t" "movz %[temp1], %[temp6], %[temp18] \n\t" "1: \n\t" - "lbu %[temp18], 1+0*"XSTR(BPS)"(%[dst]) \n\t" - "sb %[temp1], 0+0*"XSTR(BPS)"(%[dst]) \n\t" + "lbu %[temp18], 1+0*" XSTR(BPS) "(%[dst]) \n\t" + "sb %[temp1], 0+0*" XSTR(BPS) "(%[dst]) \n\t" "addu %[temp18], %[temp18], %[temp11] \n\t" "sra %[temp11], %[temp18], 8 \n\t" "sra %[temp1], %[temp18], 31 \n\t" @@ -408,8 +408,8 @@ static void TransformOne(const int16_t* in, uint8_t* dst) { "xor %[temp18], %[temp18], %[temp18] \n\t" "movz %[temp18], %[temp6], %[temp1] \n\t" "2: \n\t" - "lbu %[temp1], 2+0*"XSTR(BPS)"(%[dst]) \n\t" - "sb %[temp18], 1+0*"XSTR(BPS)"(%[dst]) \n\t" + "lbu %[temp1], 2+0*" XSTR(BPS) "(%[dst]) \n\t" + "sb %[temp18], 1+0*" XSTR(BPS) "(%[dst]) \n\t" "addu %[temp1], %[temp1], %[temp8] \n\t" "sra %[temp8], %[temp1], 8 \n\t" "sra %[temp18], %[temp1], 31 \n\t" @@ -417,8 +417,8 @@ static void TransformOne(const int16_t* in, uint8_t* dst) { "xor %[temp1], %[temp1], %[temp1] \n\t" "movz %[temp1], %[temp6], %[temp18] \n\t" "3: \n\t" - "lbu %[temp18], 3+0*"XSTR(BPS)"(%[dst]) \n\t" - "sb %[temp1], 2+0*"XSTR(BPS)"(%[dst]) \n\t" + "lbu %[temp18], 3+0*" XSTR(BPS) "(%[dst]) \n\t" + "sb %[temp1], 2+0*" XSTR(BPS) "(%[dst]) \n\t" "addu %[temp18], %[temp18], %[temp16] \n\t" "sra %[temp16], %[temp18], 8 \n\t" "sra %[temp1], %[temp18], 31 \n\t" @@ -426,11 +426,11 @@ static void TransformOne(const int16_t* in, uint8_t* dst) { "xor %[temp18], %[temp18], %[temp18] \n\t" "movz %[temp18], %[temp6], %[temp1] \n\t" "4: \n\t" - "sb %[temp18], 3+0*"XSTR(BPS)"(%[dst]) \n\t" - "lbu %[temp5], 0+1*"XSTR(BPS)"(%[dst]) \n\t" - "lbu %[temp8], 1+1*"XSTR(BPS)"(%[dst]) \n\t" - "lbu %[temp11], 2+1*"XSTR(BPS)"(%[dst]) \n\t" - "lbu %[temp16], 3+1*"XSTR(BPS)"(%[dst]) \n\t" + "sb %[temp18], 3+0*" XSTR(BPS) "(%[dst]) \n\t" + "lbu %[temp5], 0+1*" XSTR(BPS) "(%[dst]) \n\t" + "lbu %[temp8], 1+1*" XSTR(BPS) "(%[dst]) \n\t" + "lbu %[temp11], 2+1*" XSTR(BPS) "(%[dst]) \n\t" + "lbu %[temp16], 3+1*" XSTR(BPS) "(%[dst]) \n\t" "addu %[temp5], %[temp5], %[temp17] \n\t" "addu %[temp8], %[temp8], %[temp15] \n\t" "addu %[temp11], %[temp11], %[temp12] \n\t" @@ -459,14 +459,14 @@ static void TransformOne(const int16_t* in, uint8_t* dst) { "xor %[temp16], %[temp16], %[temp16] \n\t" "movz %[temp16], %[temp6], %[temp15] \n\t" "8: \n\t" - "sb %[temp5], 0+1*"XSTR(BPS)"(%[dst]) \n\t" - "sb %[temp8], 1+1*"XSTR(BPS)"(%[dst]) \n\t" - "sb %[temp11], 2+1*"XSTR(BPS)"(%[dst]) \n\t" - "sb %[temp16], 3+1*"XSTR(BPS)"(%[dst]) \n\t" - "lbu %[temp5], 0+2*"XSTR(BPS)"(%[dst]) \n\t" - "lbu %[temp8], 1+2*"XSTR(BPS)"(%[dst]) \n\t" - "lbu %[temp11], 2+2*"XSTR(BPS)"(%[dst]) \n\t" - "lbu %[temp16], 3+2*"XSTR(BPS)"(%[dst]) \n\t" + "sb %[temp5], 0+1*" XSTR(BPS) "(%[dst]) \n\t" + "sb %[temp8], 1+1*" XSTR(BPS) "(%[dst]) \n\t" + "sb %[temp11], 2+1*" XSTR(BPS) "(%[dst]) \n\t" + "sb %[temp16], 3+1*" XSTR(BPS) "(%[dst]) \n\t" + "lbu %[temp5], 0+2*" XSTR(BPS) "(%[dst]) \n\t" + "lbu %[temp8], 1+2*" XSTR(BPS) "(%[dst]) \n\t" + "lbu %[temp11], 2+2*" XSTR(BPS) "(%[dst]) \n\t" + "lbu %[temp16], 3+2*" XSTR(BPS) "(%[dst]) \n\t" "addu %[temp5], %[temp5], %[temp9] \n\t" "addu %[temp8], %[temp8], %[temp3] \n\t" "addu %[temp11], %[temp11], %[temp0] \n\t" @@ -495,14 +495,14 @@ static void TransformOne(const int16_t* in, uint8_t* dst) { "xor %[temp16], %[temp16], %[temp16] \n\t" "movz %[temp16], %[temp6], %[temp3] \n\t" "12: \n\t" - "sb %[temp5], 0+2*"XSTR(BPS)"(%[dst]) \n\t" - "sb %[temp8], 1+2*"XSTR(BPS)"(%[dst]) \n\t" - "sb %[temp11], 2+2*"XSTR(BPS)"(%[dst]) \n\t" - "sb %[temp16], 3+2*"XSTR(BPS)"(%[dst]) \n\t" - "lbu %[temp5], 0+3*"XSTR(BPS)"(%[dst]) \n\t" - "lbu %[temp8], 1+3*"XSTR(BPS)"(%[dst]) \n\t" - "lbu %[temp11], 2+3*"XSTR(BPS)"(%[dst]) \n\t" - "lbu %[temp16], 3+3*"XSTR(BPS)"(%[dst]) \n\t" + "sb %[temp5], 0+2*" XSTR(BPS) "(%[dst]) \n\t" + "sb %[temp8], 1+2*" XSTR(BPS) "(%[dst]) \n\t" + "sb %[temp11], 2+2*" XSTR(BPS) "(%[dst]) \n\t" + "sb %[temp16], 3+2*" XSTR(BPS) "(%[dst]) \n\t" + "lbu %[temp5], 0+3*" XSTR(BPS) "(%[dst]) \n\t" + "lbu %[temp8], 1+3*" XSTR(BPS) "(%[dst]) \n\t" + "lbu %[temp11], 2+3*" XSTR(BPS) "(%[dst]) \n\t" + "lbu %[temp16], 3+3*" XSTR(BPS) "(%[dst]) \n\t" "addu %[temp5], %[temp5], %[temp13] \n\t" "addu %[temp8], %[temp8], %[temp7] \n\t" "addu %[temp11], %[temp11], %[temp4] \n\t" @@ -531,10 +531,10 @@ static void TransformOne(const int16_t* in, uint8_t* dst) { "xor %[temp16], %[temp16], %[temp16] \n\t" "movz %[temp16], %[temp6], %[temp3] \n\t" "16: \n\t" - "sb %[temp5], 0+3*"XSTR(BPS)"(%[dst]) \n\t" - "sb %[temp8], 1+3*"XSTR(BPS)"(%[dst]) \n\t" - "sb %[temp11], 2+3*"XSTR(BPS)"(%[dst]) \n\t" - "sb %[temp16], 3+3*"XSTR(BPS)"(%[dst]) \n\t" + "sb %[temp5], 0+3*" XSTR(BPS) "(%[dst]) \n\t" + "sb %[temp8], 1+3*" XSTR(BPS) "(%[dst]) \n\t" + "sb %[temp11], 2+3*" XSTR(BPS) "(%[dst]) \n\t" + "sb %[temp16], 3+3*" XSTR(BPS) "(%[dst]) \n\t" : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2), [temp3]"=&r"(temp3), [temp4]"=&r"(temp4), [temp5]"=&r"(temp5), diff --git a/src/dsp/dec_mips_dsp_r2.c b/src/dsp/dec_mips_dsp_r2.c index 40e4d821..db5c6572 100644 --- a/src/dsp/dec_mips_dsp_r2.c +++ b/src/dsp/dec_mips_dsp_r2.c @@ -548,10 +548,10 @@ static void SimpleVFilter16(uint8_t* p, int stride, int thresh) { // TEMP3 = SRC[D + D1 * BPS] #define LOAD_4_BYTES(TEMP0, TEMP1, TEMP2, TEMP3, \ A, A1, B, B1, C, C1, D, D1, SRC) \ - "lbu %[" #TEMP0 "], " #A "+" #A1 "*"XSTR(BPS)"(%[" #SRC "]) \n\t" \ - "lbu %[" #TEMP1 "], " #B "+" #B1 "*"XSTR(BPS)"(%[" #SRC "]) \n\t" \ - "lbu %[" #TEMP2 "], " #C "+" #C1 "*"XSTR(BPS)"(%[" #SRC "]) \n\t" \ - "lbu %[" #TEMP3 "], " #D "+" #D1 "*"XSTR(BPS)"(%[" #SRC "]) \n\t" \ + "lbu %[" #TEMP0 "], " #A "+" #A1 "*" XSTR(BPS) "(%[" #SRC "]) \n\t" \ + "lbu %[" #TEMP1 "], " #B "+" #B1 "*" XSTR(BPS) "(%[" #SRC "]) \n\t" \ + "lbu %[" #TEMP2 "], " #C "+" #C1 "*" XSTR(BPS) "(%[" #SRC "]) \n\t" \ + "lbu %[" #TEMP3 "], " #D "+" #D1 "*" XSTR(BPS) "(%[" #SRC "]) \n\t" \ static void SimpleHFilter16(uint8_t* p, int stride, int thresh) { int i; @@ -623,8 +623,8 @@ static void SimpleHFilter16i(uint8_t* p, int stride, int thresh) { // DST[A * BPS] = TEMP0 // DST[B + C * BPS] = TEMP1 #define STORE_8_BYTES(TEMP0, TEMP1, A, B, C, DST) \ - "usw %[" #TEMP0 "], " #A "*"XSTR(BPS)"(%[" #DST "]) \n\t" \ - "usw %[" #TEMP1 "], " #B "+" #C "*"XSTR(BPS)"(%[" #DST "]) \n\t" + "usw %[" #TEMP0 "], " #A "*" XSTR(BPS) "(%[" #DST "]) \n\t" \ + "usw %[" #TEMP1 "], " #B "+" #C "*" XSTR(BPS) "(%[" #DST "]) \n\t" static void VE4(uint8_t* dst) { // vertical const uint8_t* top = dst - BPS; @@ -659,7 +659,7 @@ static void VE4(uint8_t* dst) { // vertical static void DC4(uint8_t* dst) { // DC int temp0, temp1, temp2, temp3, temp4; __asm__ volatile ( - "ulw %[temp0], -1*"XSTR(BPS)"(%[dst]) \n\t" + "ulw %[temp0], -1*" XSTR(BPS) "(%[dst]) \n\t" LOAD_4_BYTES(temp1, temp2, temp3, temp4, -1, 0, -1, 1, -1, 2, -1, 3, dst) "ins %[temp1], %[temp2], 8, 8 \n\t" "ins %[temp1], %[temp3], 16, 8 \n\t" @@ -683,7 +683,7 @@ static void RD4(uint8_t* dst) { // Down-right int temp5, temp6, temp7, temp8; __asm__ volatile ( LOAD_4_BYTES(temp0, temp1, temp2, temp3, -1, 0, -1, 1, -1, 2, -1, 3, dst) - "ulw %[temp7], -1-"XSTR(BPS)"(%[dst]) \n\t" + "ulw %[temp7], -1-" XSTR(BPS) "(%[dst]) \n\t" "ins %[temp1], %[temp0], 16, 16 \n\t" "preceu.ph.qbr %[temp5], %[temp7] \n\t" "ins %[temp2], %[temp1], 16, 16 \n\t" @@ -702,7 +702,7 @@ static void RD4(uint8_t* dst) { // Down-right "shll.ph %[temp0], %[temp0], 1 \n\t" "shra_r.ph %[temp1], %[temp1], 2 \n\t" "addq.ph %[temp8], %[temp0], %[temp8] \n\t" - "lbu %[temp5], 3-"XSTR(BPS)"(%[dst]) \n\t" + "lbu %[temp5], 3-" XSTR(BPS) "(%[dst]) \n\t" "precrq.ph.w %[temp7], %[temp7], %[temp7] \n\t" "shra_r.ph %[temp8], %[temp8], 2 \n\t" "ins %[temp7], %[temp5], 0, 8 \n\t" @@ -725,8 +725,8 @@ static void RD4(uint8_t* dst) { // Down-right // TEMP0 = SRC[A * BPS] // TEMP1 = SRC[B + C * BPS] #define LOAD_8_BYTES(TEMP0, TEMP1, A, B, C, SRC) \ - "ulw %[" #TEMP0 "], " #A "*"XSTR(BPS)"(%[" #SRC "]) \n\t" \ - "ulw %[" #TEMP1 "], " #B "+" #C "*"XSTR(BPS)"(%[" #SRC "]) \n\t" + "ulw %[" #TEMP0 "], " #A "*" XSTR(BPS) "(%[" #SRC "]) \n\t" \ + "ulw %[" #TEMP1 "], " #B "+" #C "*" XSTR(BPS) "(%[" #SRC "]) \n\t" static void LD4(uint8_t* dst) { // Down-Left int temp0, temp1, temp2, temp3, temp4; diff --git a/src/dsp/enc_mips32.c b/src/dsp/enc_mips32.c index b50e08b2..fd10143d 100644 --- a/src/dsp/enc_mips32.c +++ b/src/dsp/enc_mips32.c @@ -59,58 +59,58 @@ static const int kC2 = 35468; // A - offset in bytes to load from ref and store to dst buffer // TEMP0, TEMP4, TEMP8 and TEMP12 - registers for corresponding tmp elements #define HORIZONTAL_PASS(A, TEMP0, TEMP4, TEMP8, TEMP12) \ - "addiu %[" #TEMP0 "], %[" #TEMP0 "], 4 \n\t" \ - "addu %[temp16], %[" #TEMP0 "], %[" #TEMP8 "] \n\t" \ - "subu %[temp17], %[" #TEMP0 "], %[" #TEMP8 "] \n\t" \ - "mul %[" #TEMP0 "], %[" #TEMP4 "], %[kC2] \n\t" \ - "mul %[" #TEMP8 "], %[" #TEMP12 "], %[kC1] \n\t" \ - "mul %[" #TEMP4 "], %[" #TEMP4 "], %[kC1] \n\t" \ - "mul %[" #TEMP12 "], %[" #TEMP12 "], %[kC2] \n\t" \ - "sra %[" #TEMP0 "], %[" #TEMP0 "], 16 \n\t" \ - "sra %[" #TEMP8 "], %[" #TEMP8 "], 16 \n\t" \ - "sra %[" #TEMP4 "], %[" #TEMP4 "], 16 \n\t" \ - "sra %[" #TEMP12 "], %[" #TEMP12 "], 16 \n\t" \ - "subu %[temp18], %[" #TEMP0 "], %[" #TEMP8 "] \n\t" \ - "addu %[temp19], %[" #TEMP4 "], %[" #TEMP12 "] \n\t" \ - "addu %[" #TEMP0 "], %[temp16], %[temp19] \n\t" \ - "addu %[" #TEMP4 "], %[temp17], %[temp18] \n\t" \ - "subu %[" #TEMP8 "], %[temp17], %[temp18] \n\t" \ - "subu %[" #TEMP12 "], %[temp16], %[temp19] \n\t" \ - "lw %[temp20], 0(%[args]) \n\t" \ - "sra %[" #TEMP0 "], %[" #TEMP0 "], 3 \n\t" \ - "sra %[" #TEMP4 "], %[" #TEMP4 "], 3 \n\t" \ - "sra %[" #TEMP8 "], %[" #TEMP8 "], 3 \n\t" \ - "sra %[" #TEMP12 "], %[" #TEMP12 "], 3 \n\t" \ - "lbu %[temp16], 0+"XSTR(BPS)"*" #A "(%[temp20]) \n\t" \ - "lbu %[temp17], 1+"XSTR(BPS)"*" #A "(%[temp20]) \n\t" \ - "lbu %[temp18], 2+"XSTR(BPS)"*" #A "(%[temp20]) \n\t" \ - "lbu %[temp19], 3+"XSTR(BPS)"*" #A "(%[temp20]) \n\t" \ - "addu %[" #TEMP0 "], %[temp16], %[" #TEMP0 "] \n\t" \ - "addu %[" #TEMP4 "], %[temp17], %[" #TEMP4 "] \n\t" \ - "addu %[" #TEMP8 "], %[temp18], %[" #TEMP8 "] \n\t" \ - "addu %[" #TEMP12 "], %[temp19], %[" #TEMP12 "] \n\t" \ - "slt %[temp16], %[" #TEMP0 "], $zero \n\t" \ - "slt %[temp17], %[" #TEMP4 "], $zero \n\t" \ - "slt %[temp18], %[" #TEMP8 "], $zero \n\t" \ - "slt %[temp19], %[" #TEMP12 "], $zero \n\t" \ - "movn %[" #TEMP0 "], $zero, %[temp16] \n\t" \ - "movn %[" #TEMP4 "], $zero, %[temp17] \n\t" \ - "movn %[" #TEMP8 "], $zero, %[temp18] \n\t" \ - "movn %[" #TEMP12 "], $zero, %[temp19] \n\t" \ - "addiu %[temp20], $zero, 255 \n\t" \ - "slt %[temp16], %[" #TEMP0 "], %[temp20] \n\t" \ - "slt %[temp17], %[" #TEMP4 "], %[temp20] \n\t" \ - "slt %[temp18], %[" #TEMP8 "], %[temp20] \n\t" \ - "slt %[temp19], %[" #TEMP12 "], %[temp20] \n\t" \ - "movz %[" #TEMP0 "], %[temp20], %[temp16] \n\t" \ - "movz %[" #TEMP4 "], %[temp20], %[temp17] \n\t" \ - "lw %[temp16], 8(%[args]) \n\t" \ - "movz %[" #TEMP8 "], %[temp20], %[temp18] \n\t" \ - "movz %[" #TEMP12 "], %[temp20], %[temp19] \n\t" \ - "sb %[" #TEMP0 "], 0+"XSTR(BPS)"*" #A "(%[temp16]) \n\t" \ - "sb %[" #TEMP4 "], 1+"XSTR(BPS)"*" #A "(%[temp16]) \n\t" \ - "sb %[" #TEMP8 "], 2+"XSTR(BPS)"*" #A "(%[temp16]) \n\t" \ - "sb %[" #TEMP12 "], 3+"XSTR(BPS)"*" #A "(%[temp16]) \n\t" + "addiu %[" #TEMP0 "], %[" #TEMP0 "], 4 \n\t" \ + "addu %[temp16], %[" #TEMP0 "], %[" #TEMP8 "] \n\t" \ + "subu %[temp17], %[" #TEMP0 "], %[" #TEMP8 "] \n\t" \ + "mul %[" #TEMP0 "], %[" #TEMP4 "], %[kC2] \n\t" \ + "mul %[" #TEMP8 "], %[" #TEMP12 "], %[kC1] \n\t" \ + "mul %[" #TEMP4 "], %[" #TEMP4 "], %[kC1] \n\t" \ + "mul %[" #TEMP12 "], %[" #TEMP12 "], %[kC2] \n\t" \ + "sra %[" #TEMP0 "], %[" #TEMP0 "], 16 \n\t" \ + "sra %[" #TEMP8 "], %[" #TEMP8 "], 16 \n\t" \ + "sra %[" #TEMP4 "], %[" #TEMP4 "], 16 \n\t" \ + "sra %[" #TEMP12 "], %[" #TEMP12 "], 16 \n\t" \ + "subu %[temp18], %[" #TEMP0 "], %[" #TEMP8 "] \n\t" \ + "addu %[temp19], %[" #TEMP4 "], %[" #TEMP12 "] \n\t" \ + "addu %[" #TEMP0 "], %[temp16], %[temp19] \n\t" \ + "addu %[" #TEMP4 "], %[temp17], %[temp18] \n\t" \ + "subu %[" #TEMP8 "], %[temp17], %[temp18] \n\t" \ + "subu %[" #TEMP12 "], %[temp16], %[temp19] \n\t" \ + "lw %[temp20], 0(%[args]) \n\t" \ + "sra %[" #TEMP0 "], %[" #TEMP0 "], 3 \n\t" \ + "sra %[" #TEMP4 "], %[" #TEMP4 "], 3 \n\t" \ + "sra %[" #TEMP8 "], %[" #TEMP8 "], 3 \n\t" \ + "sra %[" #TEMP12 "], %[" #TEMP12 "], 3 \n\t" \ + "lbu %[temp16], 0+" XSTR(BPS) "*" #A "(%[temp20]) \n\t" \ + "lbu %[temp17], 1+" XSTR(BPS) "*" #A "(%[temp20]) \n\t" \ + "lbu %[temp18], 2+" XSTR(BPS) "*" #A "(%[temp20]) \n\t" \ + "lbu %[temp19], 3+" XSTR(BPS) "*" #A "(%[temp20]) \n\t" \ + "addu %[" #TEMP0 "], %[temp16], %[" #TEMP0 "] \n\t" \ + "addu %[" #TEMP4 "], %[temp17], %[" #TEMP4 "] \n\t" \ + "addu %[" #TEMP8 "], %[temp18], %[" #TEMP8 "] \n\t" \ + "addu %[" #TEMP12 "], %[temp19], %[" #TEMP12 "] \n\t" \ + "slt %[temp16], %[" #TEMP0 "], $zero \n\t" \ + "slt %[temp17], %[" #TEMP4 "], $zero \n\t" \ + "slt %[temp18], %[" #TEMP8 "], $zero \n\t" \ + "slt %[temp19], %[" #TEMP12 "], $zero \n\t" \ + "movn %[" #TEMP0 "], $zero, %[temp16] \n\t" \ + "movn %[" #TEMP4 "], $zero, %[temp17] \n\t" \ + "movn %[" #TEMP8 "], $zero, %[temp18] \n\t" \ + "movn %[" #TEMP12 "], $zero, %[temp19] \n\t" \ + "addiu %[temp20], $zero, 255 \n\t" \ + "slt %[temp16], %[" #TEMP0 "], %[temp20] \n\t" \ + "slt %[temp17], %[" #TEMP4 "], %[temp20] \n\t" \ + "slt %[temp18], %[" #TEMP8 "], %[temp20] \n\t" \ + "slt %[temp19], %[" #TEMP12 "], %[temp20] \n\t" \ + "movz %[" #TEMP0 "], %[temp20], %[temp16] \n\t" \ + "movz %[" #TEMP4 "], %[temp20], %[temp17] \n\t" \ + "lw %[temp16], 8(%[args]) \n\t" \ + "movz %[" #TEMP8 "], %[temp20], %[temp18] \n\t" \ + "movz %[" #TEMP12 "], %[temp20], %[temp19] \n\t" \ + "sb %[" #TEMP0 "], 0+" XSTR(BPS) "*" #A "(%[temp16]) \n\t" \ + "sb %[" #TEMP4 "], 1+" XSTR(BPS) "*" #A "(%[temp16]) \n\t" \ + "sb %[" #TEMP8 "], 2+" XSTR(BPS) "*" #A "(%[temp16]) \n\t" \ + "sb %[" #TEMP12 "], 3+" XSTR(BPS) "*" #A "(%[temp16]) \n\t" // Does one or two inverse transforms. static WEBP_INLINE void ITransformOne(const uint8_t* ref, const int16_t* in, @@ -253,39 +253,39 @@ static int Quantize2Blocks(int16_t in[32], int16_t out[32], // A - offset in bytes to load from a and b buffers // E..H - offsets in bytes to store first results to tmp buffer // E1..H1 - offsets in bytes to store second results to tmp buffer -#define HORIZONTAL_PASS(A, E, F, G, H, E1, F1, G1, H1) \ - "lbu %[temp0], 0+"XSTR(BPS)"*" #A "(%[a]) \n\t" \ - "lbu %[temp1], 1+"XSTR(BPS)"*" #A "(%[a]) \n\t" \ - "lbu %[temp2], 2+"XSTR(BPS)"*" #A "(%[a]) \n\t" \ - "lbu %[temp3], 3+"XSTR(BPS)"*" #A "(%[a]) \n\t" \ - "lbu %[temp4], 0+"XSTR(BPS)"*" #A "(%[b]) \n\t" \ - "lbu %[temp5], 1+"XSTR(BPS)"*" #A "(%[b]) \n\t" \ - "lbu %[temp6], 2+"XSTR(BPS)"*" #A "(%[b]) \n\t" \ - "lbu %[temp7], 3+"XSTR(BPS)"*" #A "(%[b]) \n\t" \ - "addu %[temp8], %[temp0], %[temp2] \n\t" \ - "subu %[temp0], %[temp0], %[temp2] \n\t" \ - "addu %[temp2], %[temp1], %[temp3] \n\t" \ - "subu %[temp1], %[temp1], %[temp3] \n\t" \ - "addu %[temp3], %[temp4], %[temp6] \n\t" \ - "subu %[temp4], %[temp4], %[temp6] \n\t" \ - "addu %[temp6], %[temp5], %[temp7] \n\t" \ - "subu %[temp5], %[temp5], %[temp7] \n\t" \ - "addu %[temp7], %[temp8], %[temp2] \n\t" \ - "subu %[temp2], %[temp8], %[temp2] \n\t" \ - "addu %[temp8], %[temp0], %[temp1] \n\t" \ - "subu %[temp0], %[temp0], %[temp1] \n\t" \ - "addu %[temp1], %[temp3], %[temp6] \n\t" \ - "subu %[temp3], %[temp3], %[temp6] \n\t" \ - "addu %[temp6], %[temp4], %[temp5] \n\t" \ - "subu %[temp4], %[temp4], %[temp5] \n\t" \ - "sw %[temp7], " #E "(%[tmp]) \n\t" \ - "sw %[temp2], " #H "(%[tmp]) \n\t" \ - "sw %[temp8], " #F "(%[tmp]) \n\t" \ - "sw %[temp0], " #G "(%[tmp]) \n\t" \ - "sw %[temp1], " #E1 "(%[tmp]) \n\t" \ - "sw %[temp3], " #H1 "(%[tmp]) \n\t" \ - "sw %[temp6], " #F1 "(%[tmp]) \n\t" \ - "sw %[temp4], " #G1 "(%[tmp]) \n\t" +#define HORIZONTAL_PASS(A, E, F, G, H, E1, F1, G1, H1) \ + "lbu %[temp0], 0+" XSTR(BPS) "*" #A "(%[a]) \n\t" \ + "lbu %[temp1], 1+" XSTR(BPS) "*" #A "(%[a]) \n\t" \ + "lbu %[temp2], 2+" XSTR(BPS) "*" #A "(%[a]) \n\t" \ + "lbu %[temp3], 3+" XSTR(BPS) "*" #A "(%[a]) \n\t" \ + "lbu %[temp4], 0+" XSTR(BPS) "*" #A "(%[b]) \n\t" \ + "lbu %[temp5], 1+" XSTR(BPS) "*" #A "(%[b]) \n\t" \ + "lbu %[temp6], 2+" XSTR(BPS) "*" #A "(%[b]) \n\t" \ + "lbu %[temp7], 3+" XSTR(BPS) "*" #A "(%[b]) \n\t" \ + "addu %[temp8], %[temp0], %[temp2] \n\t" \ + "subu %[temp0], %[temp0], %[temp2] \n\t" \ + "addu %[temp2], %[temp1], %[temp3] \n\t" \ + "subu %[temp1], %[temp1], %[temp3] \n\t" \ + "addu %[temp3], %[temp4], %[temp6] \n\t" \ + "subu %[temp4], %[temp4], %[temp6] \n\t" \ + "addu %[temp6], %[temp5], %[temp7] \n\t" \ + "subu %[temp5], %[temp5], %[temp7] \n\t" \ + "addu %[temp7], %[temp8], %[temp2] \n\t" \ + "subu %[temp2], %[temp8], %[temp2] \n\t" \ + "addu %[temp8], %[temp0], %[temp1] \n\t" \ + "subu %[temp0], %[temp0], %[temp1] \n\t" \ + "addu %[temp1], %[temp3], %[temp6] \n\t" \ + "subu %[temp3], %[temp3], %[temp6] \n\t" \ + "addu %[temp6], %[temp4], %[temp5] \n\t" \ + "subu %[temp4], %[temp4], %[temp5] \n\t" \ + "sw %[temp7], " #E "(%[tmp]) \n\t" \ + "sw %[temp2], " #H "(%[tmp]) \n\t" \ + "sw %[temp8], " #F "(%[tmp]) \n\t" \ + "sw %[temp0], " #G "(%[tmp]) \n\t" \ + "sw %[temp1], " #E1 "(%[tmp]) \n\t" \ + "sw %[temp3], " #H1 "(%[tmp]) \n\t" \ + "sw %[temp6], " #F1 "(%[tmp]) \n\t" \ + "sw %[temp4], " #G1 "(%[tmp]) \n\t" // macro for one vertical pass in Disto4x4 (TTransform) // two calls of function TTransform are merged into single one @@ -412,39 +412,39 @@ static int Disto16x16(const uint8_t* const a, const uint8_t* const b, // temp0..temp15 holds tmp[0]..tmp[15] // A - offset in bytes to load from src and ref buffers // TEMP0..TEMP3 - registers for corresponding tmp elements -#define HORIZONTAL_PASS(A, TEMP0, TEMP1, TEMP2, TEMP3) \ - "lw %[" #TEMP1 "], 0(%[args]) \n\t" \ - "lw %[" #TEMP2 "], 4(%[args]) \n\t" \ - "lbu %[temp16], 0+"XSTR(BPS)"*" #A "(%[" #TEMP1 "]) \n\t" \ - "lbu %[temp17], 0+"XSTR(BPS)"*" #A "(%[" #TEMP2 "]) \n\t" \ - "lbu %[temp18], 1+"XSTR(BPS)"*" #A "(%[" #TEMP1 "]) \n\t" \ - "lbu %[temp19], 1+"XSTR(BPS)"*" #A "(%[" #TEMP2 "]) \n\t" \ - "subu %[temp20], %[temp16], %[temp17] \n\t" \ - "lbu %[temp16], 2+"XSTR(BPS)"*" #A "(%[" #TEMP1 "]) \n\t" \ - "lbu %[temp17], 2+"XSTR(BPS)"*" #A "(%[" #TEMP2 "]) \n\t" \ - "subu %[" #TEMP0 "], %[temp18], %[temp19] \n\t" \ - "lbu %[temp18], 3+"XSTR(BPS)"*" #A "(%[" #TEMP1 "]) \n\t" \ - "lbu %[temp19], 3+"XSTR(BPS)"*" #A "(%[" #TEMP2 "]) \n\t" \ - "subu %[" #TEMP1 "], %[temp16], %[temp17] \n\t" \ - "subu %[" #TEMP2 "], %[temp18], %[temp19] \n\t" \ - "addu %[" #TEMP3 "], %[temp20], %[" #TEMP2 "] \n\t" \ - "subu %[" #TEMP2 "], %[temp20], %[" #TEMP2 "] \n\t" \ - "addu %[temp20], %[" #TEMP0 "], %[" #TEMP1 "] \n\t" \ - "subu %[" #TEMP0 "], %[" #TEMP0 "], %[" #TEMP1 "] \n\t" \ - "mul %[temp16], %[" #TEMP2 "], %[c5352] \n\t" \ - "mul %[temp17], %[" #TEMP2 "], %[c2217] \n\t" \ - "mul %[temp18], %[" #TEMP0 "], %[c5352] \n\t" \ - "mul %[temp19], %[" #TEMP0 "], %[c2217] \n\t" \ - "addu %[" #TEMP1 "], %[" #TEMP3 "], %[temp20] \n\t" \ - "subu %[temp20], %[" #TEMP3 "], %[temp20] \n\t" \ - "sll %[" #TEMP0 "], %[" #TEMP1 "], 3 \n\t" \ - "sll %[" #TEMP2 "], %[temp20], 3 \n\t" \ - "addiu %[temp16], %[temp16], 1812 \n\t" \ - "addiu %[temp17], %[temp17], 937 \n\t" \ - "addu %[temp16], %[temp16], %[temp19] \n\t" \ - "subu %[temp17], %[temp17], %[temp18] \n\t" \ - "sra %[" #TEMP1 "], %[temp16], 9 \n\t" \ - "sra %[" #TEMP3 "], %[temp17], 9 \n\t" +#define HORIZONTAL_PASS(A, TEMP0, TEMP1, TEMP2, TEMP3) \ + "lw %[" #TEMP1 "], 0(%[args]) \n\t" \ + "lw %[" #TEMP2 "], 4(%[args]) \n\t" \ + "lbu %[temp16], 0+" XSTR(BPS) "*" #A "(%[" #TEMP1 "]) \n\t" \ + "lbu %[temp17], 0+" XSTR(BPS) "*" #A "(%[" #TEMP2 "]) \n\t" \ + "lbu %[temp18], 1+" XSTR(BPS) "*" #A "(%[" #TEMP1 "]) \n\t" \ + "lbu %[temp19], 1+" XSTR(BPS) "*" #A "(%[" #TEMP2 "]) \n\t" \ + "subu %[temp20], %[temp16], %[temp17] \n\t" \ + "lbu %[temp16], 2+" XSTR(BPS) "*" #A "(%[" #TEMP1 "]) \n\t" \ + "lbu %[temp17], 2+" XSTR(BPS) "*" #A "(%[" #TEMP2 "]) \n\t" \ + "subu %[" #TEMP0 "], %[temp18], %[temp19] \n\t" \ + "lbu %[temp18], 3+" XSTR(BPS) "*" #A "(%[" #TEMP1 "]) \n\t" \ + "lbu %[temp19], 3+" XSTR(BPS) "*" #A "(%[" #TEMP2 "]) \n\t" \ + "subu %[" #TEMP1 "], %[temp16], %[temp17] \n\t" \ + "subu %[" #TEMP2 "], %[temp18], %[temp19] \n\t" \ + "addu %[" #TEMP3 "], %[temp20], %[" #TEMP2 "] \n\t" \ + "subu %[" #TEMP2 "], %[temp20], %[" #TEMP2 "] \n\t" \ + "addu %[temp20], %[" #TEMP0 "], %[" #TEMP1 "] \n\t" \ + "subu %[" #TEMP0 "], %[" #TEMP0 "], %[" #TEMP1 "] \n\t" \ + "mul %[temp16], %[" #TEMP2 "], %[c5352] \n\t" \ + "mul %[temp17], %[" #TEMP2 "], %[c2217] \n\t" \ + "mul %[temp18], %[" #TEMP0 "], %[c5352] \n\t" \ + "mul %[temp19], %[" #TEMP0 "], %[c2217] \n\t" \ + "addu %[" #TEMP1 "], %[" #TEMP3 "], %[temp20] \n\t" \ + "subu %[temp20], %[" #TEMP3 "], %[temp20] \n\t" \ + "sll %[" #TEMP0 "], %[" #TEMP1 "], 3 \n\t" \ + "sll %[" #TEMP2 "], %[temp20], 3 \n\t" \ + "addiu %[temp16], %[temp16], 1812 \n\t" \ + "addiu %[temp17], %[temp17], 937 \n\t" \ + "addu %[temp16], %[temp16], %[temp19] \n\t" \ + "subu %[temp17], %[temp17], %[temp18] \n\t" \ + "sra %[" #TEMP1 "], %[temp16], 9 \n\t" \ + "sra %[" #TEMP3 "], %[temp17], 9 \n\t" // macro for one vertical pass in FTransform // temp0..temp15 holds tmp[0]..tmp[15] diff --git a/src/dsp/enc_mips_dsp_r2.c b/src/dsp/enc_mips_dsp_r2.c index 44f6fd25..7c814fa0 100644 --- a/src/dsp/enc_mips_dsp_r2.c +++ b/src/dsp/enc_mips_dsp_r2.c @@ -79,8 +79,8 @@ static const int kC2 = 35468; #define HORIZONTAL_PASS(A, TEMP0, TEMP1, TEMP2, TEMP3) \ "lw %[" #TEMP0 "], 0(%[args]) \n\t" \ "lw %[" #TEMP1 "], 4(%[args]) \n\t" \ - "lw %[" #TEMP2 "], "XSTR(BPS)"*" #A "(%[" #TEMP0 "]) \n\t" \ - "lw %[" #TEMP3 "], "XSTR(BPS)"*" #A "(%[" #TEMP1 "]) \n\t" \ + "lw %[" #TEMP2 "], " XSTR(BPS) "*" #A "(%[" #TEMP0 "]) \n\t" \ + "lw %[" #TEMP3 "], " XSTR(BPS) "*" #A "(%[" #TEMP1 "]) \n\t" \ "preceu.ph.qbl %[" #TEMP0 "], %[" #TEMP2 "] \n\t" \ "preceu.ph.qbl %[" #TEMP1 "], %[" #TEMP3 "] \n\t" \ "preceu.ph.qbr %[" #TEMP2 "], %[" #TEMP2 "] \n\t" \ @@ -328,13 +328,13 @@ static int Disto16x16(const uint8_t* const a, const uint8_t* const b, //------------------------------------------------------------------------------ // Intra predictions -#define FILL_PART(J, SIZE) \ - "usw %[value], 0+" #J "*"XSTR(BPS)"(%[dst]) \n\t" \ - "usw %[value], 4+" #J "*"XSTR(BPS)"(%[dst]) \n\t" \ - ".if " #SIZE " == 16 \n\t" \ - "usw %[value], 8+" #J "*"XSTR(BPS)"(%[dst]) \n\t" \ - "usw %[value], 12+" #J "*"XSTR(BPS)"(%[dst]) \n\t" \ - ".endif \n\t" +#define FILL_PART(J, SIZE) \ + "usw %[value], 0+" #J "*" XSTR(BPS) "(%[dst]) \n\t" \ + "usw %[value], 4+" #J "*" XSTR(BPS) "(%[dst]) \n\t" \ + ".if " #SIZE " == 16 \n\t" \ + "usw %[value], 8+" #J "*" XSTR(BPS) "(%[dst]) \n\t" \ + "usw %[value], 12+" #J "*" XSTR(BPS) "(%[dst]) \n\t" \ + ".endif \n\t" #define FILL_8_OR_16(DST, VALUE, SIZE) do { \ int value = (VALUE); \ @@ -597,10 +597,10 @@ static void DC4(uint8_t* dst, const uint8_t* top) { "addiu %[temp0], %[temp0], 4 \n\t" "srl %[temp0], %[temp0], 3 \n\t" "replv.qb %[temp0], %[temp0] \n\t" - "usw %[temp0], 0*"XSTR(BPS)"(%[dst]) \n\t" - "usw %[temp0], 1*"XSTR(BPS)"(%[dst]) \n\t" - "usw %[temp0], 2*"XSTR(BPS)"(%[dst]) \n\t" - "usw %[temp0], 3*"XSTR(BPS)"(%[dst]) \n\t" + "usw %[temp0], 0*" XSTR(BPS) "(%[dst]) \n\t" + "usw %[temp0], 1*" XSTR(BPS) "(%[dst]) \n\t" + "usw %[temp0], 2*" XSTR(BPS) "(%[dst]) \n\t" + "usw %[temp0], 3*" XSTR(BPS) "(%[dst]) \n\t" : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1) : [top]"r"(top), [dst]"r"(dst) : "memory" @@ -650,10 +650,10 @@ static void TM4(uint8_t* dst, const uint8_t* top) { "shll_s.ph %[temp5], %[temp5], 7 \n\t" "precrqu_s.qb.ph %[temp2], %[temp3], %[temp2] \n\t" "precrqu_s.qb.ph %[temp3], %[temp4], %[temp5] \n\t" - "usw %[temp1], 0*"XSTR(BPS)"(%[dst]) \n\t" - "usw %[temp0], 1*"XSTR(BPS)"(%[dst]) \n\t" - "usw %[temp3], 2*"XSTR(BPS)"(%[dst]) \n\t" - "usw %[temp2], 3*"XSTR(BPS)"(%[dst]) \n\t" + "usw %[temp1], 0*" XSTR(BPS) "(%[dst]) \n\t" + "usw %[temp0], 1*" XSTR(BPS) "(%[dst]) \n\t" + "usw %[temp3], 2*" XSTR(BPS) "(%[dst]) \n\t" + "usw %[temp2], 3*" XSTR(BPS) "(%[dst]) \n\t" : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2), [temp3]"=&r"(temp3), [temp4]"=&r"(temp4), [temp5]"=&r"(temp5), [a10]"=&r"(a10), [a32]"=&r"(a32) @@ -681,10 +681,10 @@ static void VE4(uint8_t* dst, const uint8_t* top) { "shra_r.ph %[temp2], %[temp2], 2 \n\t" "shra_r.ph %[temp6], %[temp6], 2 \n\t" "precr.qb.ph %[temp4], %[temp6], %[temp2] \n\t" - "usw %[temp4], 0*"XSTR(BPS)"(%[dst]) \n\t" - "usw %[temp4], 1*"XSTR(BPS)"(%[dst]) \n\t" - "usw %[temp4], 2*"XSTR(BPS)"(%[dst]) \n\t" - "usw %[temp4], 3*"XSTR(BPS)"(%[dst]) \n\t" + "usw %[temp4], 0*" XSTR(BPS) "(%[dst]) \n\t" + "usw %[temp4], 1*" XSTR(BPS) "(%[dst]) \n\t" + "usw %[temp4], 2*" XSTR(BPS) "(%[dst]) \n\t" + "usw %[temp4], 3*" XSTR(BPS) "(%[dst]) \n\t" : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2), [temp3]"=&r"(temp3), [temp4]"=&r"(temp4), [temp5]"=&r"(temp5), [temp6]"=&r"(temp6) @@ -717,10 +717,10 @@ static void HE4(uint8_t* dst, const uint8_t* top) { "srl %[temp2], %[temp2], 16 \n\t" "replv.qb %[temp3], %[temp3] \n\t" "replv.qb %[temp2], %[temp2] \n\t" - "usw %[temp3], 0*"XSTR(BPS)"(%[dst]) \n\t" - "usw %[temp0], 1*"XSTR(BPS)"(%[dst]) \n\t" - "usw %[temp2], 2*"XSTR(BPS)"(%[dst]) \n\t" - "usw %[temp1], 3*"XSTR(BPS)"(%[dst]) \n\t" + "usw %[temp3], 0*" XSTR(BPS) "(%[dst]) \n\t" + "usw %[temp0], 1*" XSTR(BPS) "(%[dst]) \n\t" + "usw %[temp2], 2*" XSTR(BPS) "(%[dst]) \n\t" + "usw %[temp1], 3*" XSTR(BPS) "(%[dst]) \n\t" : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2), [temp3]"=&r"(temp3), [temp4]"=&r"(temp4), [temp5]"=&r"(temp5), [temp6]"=&r"(temp6) @@ -763,12 +763,12 @@ static void RD4(uint8_t* dst, const uint8_t* top) { "precr.qb.ph %[temp9], %[temp10], %[temp9] \n\t" "shra_r.w %[temp0], %[temp0], 2 \n\t" "precr.qb.ph %[temp10], %[temp11], %[temp10] \n\t" - "usw %[temp9], 3*"XSTR(BPS)"(%[dst]) \n\t" - "usw %[temp10], 1*"XSTR(BPS)"(%[dst]) \n\t" + "usw %[temp9], 3*" XSTR(BPS) "(%[dst]) \n\t" + "usw %[temp10], 1*" XSTR(BPS) "(%[dst]) \n\t" "prepend %[temp9], %[temp11], 8 \n\t" "prepend %[temp10], %[temp0], 8 \n\t" - "usw %[temp9], 2*"XSTR(BPS)"(%[dst]) \n\t" - "usw %[temp10], 0*"XSTR(BPS)"(%[dst]) \n\t" + "usw %[temp9], 2*" XSTR(BPS) "(%[dst]) \n\t" + "usw %[temp10], 0*" XSTR(BPS) "(%[dst]) \n\t" : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2), [temp3]"=&r"(temp3), [temp4]"=&r"(temp4), [temp5]"=&r"(temp5), [temp6]"=&r"(temp6), [temp7]"=&r"(temp7), [temp8]"=&r"(temp8), @@ -812,13 +812,13 @@ static void VR4(uint8_t* dst, const uint8_t* top) { "append %[temp3], %[temp1], 16 \n\t" "precr.qb.ph %[temp8], %[temp8], %[temp4] \n\t" "precr.qb.ph %[temp3], %[temp2], %[temp3] \n\t" - "usw %[temp8], 0*"XSTR(BPS)"(%[dst]) \n\t" - "usw %[temp3], 1*"XSTR(BPS)"(%[dst]) \n\t" + "usw %[temp8], 0*" XSTR(BPS) "(%[dst]) \n\t" + "usw %[temp3], 1*" XSTR(BPS) "(%[dst]) \n\t" "append %[temp3], %[temp6], 8 \n\t" "srl %[temp6], %[temp6], 16 \n\t" "append %[temp8], %[temp6], 8 \n\t" - "usw %[temp3], 3*"XSTR(BPS)"(%[dst]) \n\t" - "usw %[temp8], 2*"XSTR(BPS)"(%[dst]) \n\t" + "usw %[temp3], 3*" XSTR(BPS) "(%[dst]) \n\t" + "usw %[temp8], 2*" XSTR(BPS) "(%[dst]) \n\t" : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2), [temp3]"=&r"(temp3), [temp4]"=&r"(temp4), [temp5]"=&r"(temp5), [temp6]"=&r"(temp6), [temp7]"=&r"(temp7), [temp8]"=&r"(temp8), @@ -860,12 +860,12 @@ static void LD4(uint8_t* dst, const uint8_t* top) { "precr.qb.ph %[temp10], %[temp11], %[temp10] \n\t" "addu %[temp1], %[temp1], %[temp5] \n\t" "shra_r.w %[temp1], %[temp1], 2 \n\t" - "usw %[temp9], 0*"XSTR(BPS)"(%[dst]) \n\t" - "usw %[temp10], 2*"XSTR(BPS)"(%[dst]) \n\t" + "usw %[temp9], 0*" XSTR(BPS) "(%[dst]) \n\t" + "usw %[temp10], 2*" XSTR(BPS) "(%[dst]) \n\t" "prepend %[temp9], %[temp11], 8 \n\t" "prepend %[temp10], %[temp1], 8 \n\t" - "usw %[temp9], 1*"XSTR(BPS)"(%[dst]) \n\t" - "usw %[temp10], 3*"XSTR(BPS)"(%[dst]) \n\t" + "usw %[temp9], 1*" XSTR(BPS) "(%[dst]) \n\t" + "usw %[temp10], 3*" XSTR(BPS) "(%[dst]) \n\t" : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2), [temp3]"=&r"(temp3), [temp4]"=&r"(temp4), [temp5]"=&r"(temp5), [temp6]"=&r"(temp6), [temp7]"=&r"(temp7), [temp8]"=&r"(temp8), @@ -908,13 +908,13 @@ static void VL4(uint8_t* dst, const uint8_t* top) { "append %[temp2], %[temp0], 16 \n\t" "precr.qb.ph %[temp8], %[temp8], %[temp5] \n\t" "precr.qb.ph %[temp3], %[temp3], %[temp2] \n\t" - "usw %[temp8], 0*"XSTR(BPS)"(%[dst]) \n\t" + "usw %[temp8], 0*" XSTR(BPS) "(%[dst]) \n\t" "prepend %[temp8], %[temp6], 8 \n\t" - "usw %[temp3], 1*"XSTR(BPS)"(%[dst]) \n\t" + "usw %[temp3], 1*" XSTR(BPS) "(%[dst]) \n\t" "srl %[temp6], %[temp6], 16 \n\t" "prepend %[temp3], %[temp6], 8 \n\t" - "usw %[temp8], 2*"XSTR(BPS)"(%[dst]) \n\t" - "usw %[temp3], 3*"XSTR(BPS)"(%[dst]) \n\t" + "usw %[temp8], 2*" XSTR(BPS) "(%[dst]) \n\t" + "usw %[temp3], 3*" XSTR(BPS) "(%[dst]) \n\t" : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2), [temp3]"=&r"(temp3), [temp4]"=&r"(temp4), [temp5]"=&r"(temp5), [temp6]"=&r"(temp6), [temp7]"=&r"(temp7), [temp8]"=&r"(temp8), @@ -955,14 +955,14 @@ static void HD4(uint8_t* dst, const uint8_t* top) { "precrq.ph.w %[temp3], %[temp0], %[temp4] \n\t" "precr.qb.ph %[temp7], %[temp6], %[temp1] \n\t" "precr.qb.ph %[temp6], %[temp1], %[temp3] \n\t" - "usw %[temp7], 0*"XSTR(BPS)"(%[dst]) \n\t" - "usw %[temp6], 1*"XSTR(BPS)"(%[dst]) \n\t" + "usw %[temp7], 0*" XSTR(BPS) "(%[dst]) \n\t" + "usw %[temp6], 1*" XSTR(BPS) "(%[dst]) \n\t" "append %[temp2], %[temp5], 16 \n\t" "append %[temp0], %[temp4], 16 \n\t" "precr.qb.ph %[temp5], %[temp3], %[temp2] \n\t" "precr.qb.ph %[temp4], %[temp2], %[temp0] \n\t" - "usw %[temp5], 2*"XSTR(BPS)"(%[dst]) \n\t" - "usw %[temp4], 3*"XSTR(BPS)"(%[dst]) \n\t" + "usw %[temp5], 2*" XSTR(BPS) "(%[dst]) \n\t" + "usw %[temp4], 3*" XSTR(BPS) "(%[dst]) \n\t" : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2), [temp3]"=&r"(temp3), [temp4]"=&r"(temp4), [temp5]"=&r"(temp5), [temp6]"=&r"(temp6), [temp7]"=&r"(temp7), [temp8]"=&r"(temp8), @@ -994,12 +994,12 @@ static void HU4(uint8_t* dst, const uint8_t* top) { "precrq.ph.w %[temp2], %[temp6], %[temp4] \n\t" "append %[temp0], %[temp5], 16 \n\t" "precr.qb.ph %[temp3], %[temp3], %[temp2] \n\t" - "usw %[temp3], 0*"XSTR(BPS)"(%[dst]) \n\t" + "usw %[temp3], 0*" XSTR(BPS) "(%[dst]) \n\t" "precr.qb.ph %[temp1], %[temp7], %[temp0] \n\t" - "usw %[temp7], 3*"XSTR(BPS)"(%[dst]) \n\t" + "usw %[temp7], 3*" XSTR(BPS) "(%[dst]) \n\t" "packrl.ph %[temp2], %[temp1], %[temp3] \n\t" - "usw %[temp1], 2*"XSTR(BPS)"(%[dst]) \n\t" - "usw %[temp2], 1*"XSTR(BPS)"(%[dst]) \n\t" + "usw %[temp1], 2*" XSTR(BPS) "(%[dst]) \n\t" + "usw %[temp2], 1*" XSTR(BPS) "(%[dst]) \n\t" : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2), [temp3]"=&r"(temp3), [temp4]"=&r"(temp4), [temp5]"=&r"(temp5), [temp6]"=&r"(temp6), [temp7]"=&r"(temp7) diff --git a/src/dsp/mips_macro.h b/src/dsp/mips_macro.h index e09d2c4a..44aba9b7 100644 --- a/src/dsp/mips_macro.h +++ b/src/dsp/mips_macro.h @@ -40,10 +40,10 @@ // I1..I9 - offsets in bytes #define LOAD_WITH_OFFSET_X4(O0, O1, O2, O3, \ I0, I1, I2, I3, I4, I5, I6, I7, I8, I9) \ - "ulw %[" #O0 "], " #I1 "+"XSTR(I9)"*" #I5 "(%[" #I0 "]) \n\t" \ - "ulw %[" #O1 "], " #I2 "+"XSTR(I9)"*" #I6 "(%[" #I0 "]) \n\t" \ - "ulw %[" #O2 "], " #I3 "+"XSTR(I9)"*" #I7 "(%[" #I0 "]) \n\t" \ - "ulw %[" #O3 "], " #I4 "+"XSTR(I9)"*" #I8 "(%[" #I0 "]) \n\t" + "ulw %[" #O0 "], " #I1 "+" XSTR(I9) "*" #I5 "(%[" #I0 "]) \n\t" \ + "ulw %[" #O1 "], " #I2 "+" XSTR(I9) "*" #I6 "(%[" #I0 "]) \n\t" \ + "ulw %[" #O2 "], " #I3 "+" XSTR(I9) "*" #I7 "(%[" #I0 "]) \n\t" \ + "ulw %[" #O3 "], " #I4 "+" XSTR(I9) "*" #I8 "(%[" #I0 "]) \n\t" // O - output // IO - input/output @@ -180,10 +180,10 @@ "precrqu_s.qb.ph %[" #IO2 "], %[" #IO3 "], %[" #IO2 "] \n\t" \ "precrqu_s.qb.ph %[" #IO4 "], %[" #IO5 "], %[" #IO4 "] \n\t" \ "precrqu_s.qb.ph %[" #IO6 "], %[" #IO7 "], %[" #IO6 "] \n\t" \ - "usw %[" #IO0 "], "XSTR(I13)"*" #I9 "(%[" #I8 "]) \n\t" \ - "usw %[" #IO2 "], "XSTR(I13)"*" #I10 "(%[" #I8 "]) \n\t" \ - "usw %[" #IO4 "], "XSTR(I13)"*" #I11 "(%[" #I8 "]) \n\t" \ - "usw %[" #IO6 "], "XSTR(I13)"*" #I12 "(%[" #I8 "]) \n\t" + "usw %[" #IO0 "], " XSTR(I13) "*" #I9 "(%[" #I8 "]) \n\t" \ + "usw %[" #IO2 "], " XSTR(I13) "*" #I10 "(%[" #I8 "]) \n\t" \ + "usw %[" #IO4 "], " XSTR(I13) "*" #I11 "(%[" #I8 "]) \n\t" \ + "usw %[" #IO6 "], " XSTR(I13) "*" #I12 "(%[" #I8 "]) \n\t" #define OUTPUT_EARLY_CLOBBER_REGS_10() \ : [temp1]"=&r"(temp1), [temp2]"=&r"(temp2), [temp3]"=&r"(temp3), \