2014-10-10 12:31:00 +02:00
|
|
|
// Copyright 2014 Google Inc. All Rights Reserved.
|
|
|
|
//
|
|
|
|
// Use of this source code is governed by a BSD-style license
|
|
|
|
// that can be found in the COPYING file in the root of the source
|
|
|
|
// tree. An additional intellectual property rights grant can be found
|
|
|
|
// in the file PATENTS. All contributing project authors may
|
|
|
|
// be found in the AUTHORS file in the root of the source tree.
|
|
|
|
// -----------------------------------------------------------------------------
|
|
|
|
//
|
|
|
|
// MIPS common macros
|
|
|
|
|
|
|
|
#ifndef WEBP_DSP_MIPS_MACRO_H_
|
|
|
|
#define WEBP_DSP_MIPS_MACRO_H_
|
|
|
|
|
2014-12-04 16:20:29 +01:00
|
|
|
#define STR(s) #s
|
|
|
|
#define XSTR(s) STR(s)
|
|
|
|
|
2014-10-10 12:31:00 +02:00
|
|
|
// O0[31..16 | 15..0] = I0[31..16 | 15..0] + I1[31..16 | 15..0]
|
|
|
|
// O1[31..16 | 15..0] = I0[31..16 | 15..0] - I1[31..16 | 15..0]
|
|
|
|
// O - output
|
|
|
|
// I - input (macro doesn't change it)
|
|
|
|
#define ADD_SUB_HALVES(O0, O1, \
|
|
|
|
I0, I1) \
|
|
|
|
"addq.ph %["#O0"], %["#I0"], %["#I1"] \n\t" \
|
|
|
|
"subq.ph %["#O1"], %["#I0"], %["#I1"] \n\t"
|
|
|
|
|
|
|
|
// O - output
|
|
|
|
// I - input (macro doesn't change it)
|
|
|
|
// I[0/1] - offset in bytes
|
|
|
|
#define LOAD_IN_X2(O0, O1, \
|
|
|
|
I0, I1) \
|
|
|
|
"lh %["#O0"], "#I0"(%[in]) \n\t" \
|
|
|
|
"lh %["#O1"], "#I1"(%[in]) \n\t"
|
|
|
|
|
2014-11-05 12:06:15 +01:00
|
|
|
// I0 - location
|
2014-12-04 16:20:29 +01:00
|
|
|
// I1..I9 - offsets in bytes
|
2014-11-05 12:06:15 +01:00
|
|
|
#define LOAD_WITH_OFFSET_X4(O0, O1, O2, O3, \
|
2014-12-04 16:20:29 +01:00
|
|
|
I0, I1, I2, I3, I4, I5, I6, I7, I8, I9) \
|
|
|
|
"ulw %["#O0"], "#I1"+"XSTR(I9)"*"#I5"(%["#I0"]) \n\t" \
|
|
|
|
"ulw %["#O1"], "#I2"+"XSTR(I9)"*"#I6"(%["#I0"]) \n\t" \
|
|
|
|
"ulw %["#O2"], "#I3"+"XSTR(I9)"*"#I7"(%["#I0"]) \n\t" \
|
|
|
|
"ulw %["#O3"], "#I4"+"XSTR(I9)"*"#I8"(%["#I0"]) \n\t"
|
2014-11-05 12:06:15 +01:00
|
|
|
|
2014-10-10 12:31:00 +02:00
|
|
|
// O - output
|
|
|
|
// IO - input/output
|
|
|
|
// I - input (macro doesn't change it)
|
|
|
|
#define MUL_SHIFT_SUM(O0, O1, O2, O3, O4, O5, O6, O7, \
|
|
|
|
IO0, IO1, IO2, IO3, \
|
|
|
|
I0, I1, I2, I3, I4, I5, I6, I7) \
|
|
|
|
"mul %["#O0"], %["#I0"], %[kC2] \n\t" \
|
|
|
|
"mul %["#O1"], %["#I0"], %[kC1] \n\t" \
|
|
|
|
"mul %["#O2"], %["#I1"], %[kC2] \n\t" \
|
|
|
|
"mul %["#O3"], %["#I1"], %[kC1] \n\t" \
|
|
|
|
"mul %["#O4"], %["#I2"], %[kC2] \n\t" \
|
|
|
|
"mul %["#O5"], %["#I2"], %[kC1] \n\t" \
|
|
|
|
"mul %["#O6"], %["#I3"], %[kC2] \n\t" \
|
|
|
|
"mul %["#O7"], %["#I3"], %[kC1] \n\t" \
|
|
|
|
"sra %["#O0"], %["#O0"], 16 \n\t" \
|
|
|
|
"sra %["#O1"], %["#O1"], 16 \n\t" \
|
|
|
|
"sra %["#O2"], %["#O2"], 16 \n\t" \
|
|
|
|
"sra %["#O3"], %["#O3"], 16 \n\t" \
|
|
|
|
"sra %["#O4"], %["#O4"], 16 \n\t" \
|
|
|
|
"sra %["#O5"], %["#O5"], 16 \n\t" \
|
|
|
|
"sra %["#O6"], %["#O6"], 16 \n\t" \
|
|
|
|
"sra %["#O7"], %["#O7"], 16 \n\t" \
|
|
|
|
"addu %["#IO0"], %["#IO0"], %["#I4"] \n\t" \
|
|
|
|
"addu %["#IO1"], %["#IO1"], %["#I5"] \n\t" \
|
|
|
|
"subu %["#IO2"], %["#IO2"], %["#I6"] \n\t" \
|
|
|
|
"subu %["#IO3"], %["#IO3"], %["#I7"] \n\t"
|
|
|
|
|
|
|
|
// O - output
|
|
|
|
// I - input (macro doesn't change it)
|
|
|
|
#define INSERT_HALF_X2(O0, O1, \
|
|
|
|
I0, I1) \
|
|
|
|
"ins %["#O0"], %["#I0"], 16, 16 \n\t" \
|
|
|
|
"ins %["#O1"], %["#I1"], 16, 16 \n\t"
|
|
|
|
|
|
|
|
// O - output
|
|
|
|
// I - input (macro doesn't change it)
|
|
|
|
#define SRA_16(O0, O1, O2, O3, \
|
|
|
|
I0, I1, I2, I3) \
|
|
|
|
"sra %["#O0"], %["#I0"], 16 \n\t" \
|
|
|
|
"sra %["#O1"], %["#I1"], 16 \n\t" \
|
|
|
|
"sra %["#O2"], %["#I2"], 16 \n\t" \
|
|
|
|
"sra %["#O3"], %["#I3"], 16 \n\t"
|
|
|
|
|
|
|
|
// temp0[31..16 | 15..0] = temp8[31..16 | 15..0] + temp12[31..16 | 15..0]
|
|
|
|
// temp1[31..16 | 15..0] = temp8[31..16 | 15..0] - temp12[31..16 | 15..0]
|
|
|
|
// temp0[31..16 | 15..0] = temp0[31..16 >> 3 | 15..0 >> 3]
|
|
|
|
// temp1[31..16 | 15..0] = temp1[31..16 >> 3 | 15..0 >> 3]
|
|
|
|
// O - output
|
|
|
|
// I - input (macro doesn't change it)
|
|
|
|
#define SHIFT_R_SUM_X2(O0, O1, O2, O3, O4, O5, O6, O7, \
|
|
|
|
I0, I1, I2, I3, I4, I5, I6, I7) \
|
|
|
|
"addq.ph %["#O0"], %["#I0"], %["#I4"] \n\t" \
|
|
|
|
"subq.ph %["#O1"], %["#I0"], %["#I4"] \n\t" \
|
|
|
|
"addq.ph %["#O2"], %["#I1"], %["#I5"] \n\t" \
|
|
|
|
"subq.ph %["#O3"], %["#I1"], %["#I5"] \n\t" \
|
|
|
|
"addq.ph %["#O4"], %["#I2"], %["#I6"] \n\t" \
|
|
|
|
"subq.ph %["#O5"], %["#I2"], %["#I6"] \n\t" \
|
|
|
|
"addq.ph %["#O6"], %["#I3"], %["#I7"] \n\t" \
|
|
|
|
"subq.ph %["#O7"], %["#I3"], %["#I7"] \n\t" \
|
|
|
|
"shra.ph %["#O0"], %["#O0"], 3 \n\t" \
|
|
|
|
"shra.ph %["#O1"], %["#O1"], 3 \n\t" \
|
|
|
|
"shra.ph %["#O2"], %["#O2"], 3 \n\t" \
|
|
|
|
"shra.ph %["#O3"], %["#O3"], 3 \n\t" \
|
|
|
|
"shra.ph %["#O4"], %["#O4"], 3 \n\t" \
|
|
|
|
"shra.ph %["#O5"], %["#O5"], 3 \n\t" \
|
|
|
|
"shra.ph %["#O6"], %["#O6"], 3 \n\t" \
|
|
|
|
"shra.ph %["#O7"], %["#O7"], 3 \n\t"
|
|
|
|
|
|
|
|
// precrq.ph.w temp0, temp8, temp2
|
|
|
|
// temp0 = temp8[31..16] | temp2[31..16]
|
|
|
|
// ins temp2, temp8, 16, 16
|
|
|
|
// temp2 = temp8[31..16] | temp2[15..0]
|
|
|
|
// O - output
|
|
|
|
// IO - input/output
|
|
|
|
// I - input (macro doesn't change it)
|
|
|
|
#define PACK_2_HALVES_TO_WORD(O0, O1, O2, O3, \
|
|
|
|
IO0, IO1, IO2, IO3, \
|
|
|
|
I0, I1, I2, I3) \
|
|
|
|
"precrq.ph.w %["#O0"], %["#I0"], %["#IO0"] \n\t" \
|
|
|
|
"precrq.ph.w %["#O1"], %["#I1"], %["#IO1"] \n\t" \
|
|
|
|
"ins %["#IO0"], %["#I0"], 16, 16 \n\t" \
|
|
|
|
"ins %["#IO1"], %["#I1"], 16, 16 \n\t" \
|
|
|
|
"precrq.ph.w %["#O2"], %["#I2"], %["#IO2"] \n\t" \
|
|
|
|
"precrq.ph.w %["#O3"], %["#I3"], %["#IO3"] \n\t" \
|
|
|
|
"ins %["#IO2"], %["#I2"], 16, 16 \n\t" \
|
|
|
|
"ins %["#IO3"], %["#I3"], 16, 16 \n\t"
|
|
|
|
|
|
|
|
// preceu.ph.qbr temp0, temp8
|
|
|
|
// temp0 = 0 | 0 | temp8[23..16] | temp8[7..0]
|
|
|
|
// preceu.ph.qbl temp1, temp8
|
|
|
|
// temp1 = temp8[23..16] | temp8[7..0] | 0 | 0
|
|
|
|
// O - output
|
|
|
|
// I - input (macro doesn't change it)
|
|
|
|
#define CONVERT_2_BYTES_TO_HALF(O0, O1, O2, O3, O4, O5, O6, O7, \
|
|
|
|
I0, I1, I2, I3) \
|
|
|
|
"preceu.ph.qbr %["#O0"], %["#I0"] \n\t" \
|
|
|
|
"preceu.ph.qbl %["#O1"], %["#I0"] \n\t" \
|
|
|
|
"preceu.ph.qbr %["#O2"], %["#I1"] \n\t" \
|
|
|
|
"preceu.ph.qbl %["#O3"], %["#I1"] \n\t" \
|
|
|
|
"preceu.ph.qbr %["#O4"], %["#I2"] \n\t" \
|
|
|
|
"preceu.ph.qbl %["#O5"], %["#I2"] \n\t" \
|
|
|
|
"preceu.ph.qbr %["#O6"], %["#I3"] \n\t" \
|
|
|
|
"preceu.ph.qbl %["#O7"], %["#I3"] \n\t"
|
|
|
|
|
2014-11-05 12:06:15 +01:00
|
|
|
// temp0[31..16 | 15..0] = temp0[31..16 | 15..0] + temp8[31..16 | 15..0]
|
|
|
|
// temp0[31..16 | 15..0] = temp0[31..16 <<(s) 7 | 15..0 <<(s) 7]
|
|
|
|
// temp1..temp7 same as temp0
|
|
|
|
// precrqu_s.qb.ph temp0, temp1, temp0:
|
|
|
|
// temp0 = temp1[31..24] | temp1[15..8] | temp0[31..24] | temp0[15..8]
|
|
|
|
// store temp0 to dst
|
|
|
|
// IO - input/output
|
|
|
|
// I - input (macro doesn't change it)
|
|
|
|
#define STORE_SAT_SUM_X2(IO0, IO1, IO2, IO3, IO4, IO5, IO6, IO7, \
|
|
|
|
I0, I1, I2, I3, I4, I5, I6, I7, \
|
2014-12-04 16:20:29 +01:00
|
|
|
I8, I9, I10, I11, I12, I13) \
|
2014-11-05 12:06:15 +01:00
|
|
|
"addq.ph %["#IO0"], %["#IO0"], %["#I0"] \n\t" \
|
|
|
|
"addq.ph %["#IO1"], %["#IO1"], %["#I1"] \n\t" \
|
|
|
|
"addq.ph %["#IO2"], %["#IO2"], %["#I2"] \n\t" \
|
|
|
|
"addq.ph %["#IO3"], %["#IO3"], %["#I3"] \n\t" \
|
|
|
|
"addq.ph %["#IO4"], %["#IO4"], %["#I4"] \n\t" \
|
|
|
|
"addq.ph %["#IO5"], %["#IO5"], %["#I5"] \n\t" \
|
|
|
|
"addq.ph %["#IO6"], %["#IO6"], %["#I6"] \n\t" \
|
|
|
|
"addq.ph %["#IO7"], %["#IO7"], %["#I7"] \n\t" \
|
|
|
|
"shll_s.ph %["#IO0"], %["#IO0"], 7 \n\t" \
|
|
|
|
"shll_s.ph %["#IO1"], %["#IO1"], 7 \n\t" \
|
|
|
|
"shll_s.ph %["#IO2"], %["#IO2"], 7 \n\t" \
|
|
|
|
"shll_s.ph %["#IO3"], %["#IO3"], 7 \n\t" \
|
|
|
|
"shll_s.ph %["#IO4"], %["#IO4"], 7 \n\t" \
|
|
|
|
"shll_s.ph %["#IO5"], %["#IO5"], 7 \n\t" \
|
|
|
|
"shll_s.ph %["#IO6"], %["#IO6"], 7 \n\t" \
|
|
|
|
"shll_s.ph %["#IO7"], %["#IO7"], 7 \n\t" \
|
|
|
|
"precrqu_s.qb.ph %["#IO0"], %["#IO1"], %["#IO0"] \n\t" \
|
|
|
|
"precrqu_s.qb.ph %["#IO2"], %["#IO3"], %["#IO2"] \n\t" \
|
|
|
|
"precrqu_s.qb.ph %["#IO4"], %["#IO5"], %["#IO4"] \n\t" \
|
|
|
|
"precrqu_s.qb.ph %["#IO6"], %["#IO7"], %["#IO6"] \n\t" \
|
2014-12-04 16:20:29 +01:00
|
|
|
"usw %["#IO0"], "XSTR(I13)"*"#I9"(%["#I8"]) \n\t" \
|
|
|
|
"usw %["#IO2"], "XSTR(I13)"*"#I10"(%["#I8"]) \n\t" \
|
|
|
|
"usw %["#IO4"], "XSTR(I13)"*"#I11"(%["#I8"]) \n\t" \
|
|
|
|
"usw %["#IO6"], "XSTR(I13)"*"#I12"(%["#I8"]) \n\t"
|
2014-11-05 12:06:15 +01:00
|
|
|
|
2014-10-10 12:31:00 +02:00
|
|
|
#define OUTPUT_EARLY_CLOBBER_REGS_10() \
|
|
|
|
: [temp1]"=&r"(temp1), [temp2]"=&r"(temp2), [temp3]"=&r"(temp3), \
|
|
|
|
[temp4]"=&r"(temp4), [temp5]"=&r"(temp5), [temp6]"=&r"(temp6), \
|
|
|
|
[temp7]"=&r"(temp7), [temp8]"=&r"(temp8), [temp9]"=&r"(temp9), \
|
|
|
|
[temp10]"=&r"(temp10)
|
|
|
|
|
|
|
|
#define OUTPUT_EARLY_CLOBBER_REGS_18() \
|
|
|
|
OUTPUT_EARLY_CLOBBER_REGS_10(), \
|
|
|
|
[temp11]"=&r"(temp11), [temp12]"=&r"(temp12), [temp13]"=&r"(temp13), \
|
|
|
|
[temp14]"=&r"(temp14), [temp15]"=&r"(temp15), [temp16]"=&r"(temp16), \
|
|
|
|
[temp17]"=&r"(temp17), [temp18]"=&r"(temp18)
|
|
|
|
|
|
|
|
#endif // WEBP_DSP_MIPS_MACRO_H_
|