| /**************************************************************************** |
| ** |
| ** Copyright (C) 2013 Imagination Technologies Limited, www.imgtec.com |
| ** Contact: https://www.qt.io/licensing/ |
| ** |
| ** This file is part of the QtGui module of the Qt Toolkit. |
| ** |
| ** $QT_BEGIN_LICENSE:LGPL$ |
| ** Commercial License Usage |
| ** Licensees holding valid commercial Qt licenses may use this file in |
| ** accordance with the commercial license agreement provided with the |
| ** Software or, alternatively, in accordance with the terms contained in |
| ** a written agreement between you and The Qt Company. For licensing terms |
| ** and conditions see https://www.qt.io/terms-conditions. For further |
| ** information use the contact form at https://www.qt.io/contact-us. |
| ** |
| ** GNU Lesser General Public License Usage |
| ** Alternatively, this file may be used under the terms of the GNU Lesser |
| ** General Public License version 3 as published by the Free Software |
| ** Foundation and appearing in the file LICENSE.LGPL3 included in the |
| ** packaging of this file. Please review the following information to |
| ** ensure the GNU Lesser General Public License version 3 requirements |
| ** will be met: https://www.gnu.org/licenses/lgpl-3.0.html. |
| ** |
| ** GNU General Public License Usage |
| ** Alternatively, this file may be used under the terms of the GNU |
| ** General Public License version 2.0 or (at your option) the GNU General |
| ** Public license version 3 or any later version approved by the KDE Free |
| ** Qt Foundation. The licenses are as published by the Free Software |
| ** Foundation and appearing in the file LICENSE.GPL2 and LICENSE.GPL3 |
| ** included in the packaging of this file. Please review the following |
| ** information to ensure the GNU General Public License requirements will |
| ** be met: https://www.gnu.org/licenses/gpl-2.0.html and |
| ** https://www.gnu.org/licenses/gpl-3.0.html. |
| ** |
| ** $QT_END_LICENSE$ |
| ** |
| ****************************************************************************/ |
| |
| #include "qt_mips_asm_dsp_p.h" |
| |
| LEAF_MIPS_DSPR2(qConvertRgb16To32_asm_mips_dspr2) |
| /* |
| * a0 - dst (a8r8g8b8) |
| * a1 - src (r5g6b5) |
| * a2 - w |
| */ |
| |
| beqz a2, 3f |
| nop |
| addiu t1, a2, -1 |
| beqz t1, 2f |
| nop |
| li t4, 0x07e007e0 |
| li t5, 0x001F001F |
| /* Convert two pixels at time (2 x rgb565 -> 2 x rgb8888) */ |
| 1: |
| lhu t0, 0(a1) |
| lhu t1, 2(a1) |
| addiu a1, a1, 4 |
| addiu a2, a2, -2 |
| |
| sll t6, t0, 16 |
| or t6, t6, t1 /* t6 = R1 G1 B1 | R2 G2 B2 */ |
| lui t3, 0xff00 |
| ori t3, t3, 0xff00 /* t3 = FF 00 | FF 00 (in place) */ |
| shrl.ph t7, t6, 11 /* t7 = 0 R1 | 0 R2 */ |
| and t8, t6, t4 /* t8 = 0 G1 0 | 0 G2 0 */ |
| shra.ph t9, t7, 2 /* t9 = 0 R1 | 0 R2 (lower) */ |
| shll.ph t7, t7, 3 /* t7 = 0 R1 | 0 R2 (higher) */ |
| shll.ph t8, t8, 5 /* t8 = G1 0 | G2 0 (higher) */ |
| or t7, t7, t9 /* t7 = 0 R1 | 0 R2 (in place) */ |
| shrl.qb t9, t8, 6 /* t9 = G1 0 | G2 0 (lower) */ |
| or t3, t3, t7 /* t3 = FF R1 | FF R2 (in place) */ |
| or t8, t8, t9 /* t8 = G1 0 | G2 0 (in place) */ |
| and t6, t6, t5 /* t6 = 0 B1 | 0 B2 */ |
| shll.ph t7, t6, 3 /* t7 = 0 B1 | 0 B2 (higher) */ |
| shra.ph t9, t6, 2 /* t9 = 0 B1 | 0 B2 (lower) */ |
| or t7, t7, t9 /* t7 = 0 B1 | 0 B2 (in place) */ |
| or t8, t7, t8 /* t8 = G1 B1 | G2 B2 (in place) */ |
| precrq.ph.w t2, t3, t8 /* t2 = FF R1 G1 B1 (in place) */ |
| precr_sra.ph.w t3, t8, 0 /* t3 = FF R2 G2 B2 (in place) */ |
| |
| sw t2, 0(a0) |
| sw t3, 4(a0) |
| |
| addiu t2, a2, -1 |
| bgtz t2, 1b |
| addiu a0, a0, 8 |
| 2: |
| beqz a2, 3f |
| nop |
| lhu t0, 0(a1) |
| |
| /* Remaining pixel conversion (rgb565 -> rgb8888) */ |
| lui t1, 0xff00 |
| sll t2, t0, 0x3 |
| andi t3, t2, 0xff |
| ext t2, t0, 0x2, 0x3 |
| or t2, t3, t2 |
| or t1, t1, t2 |
| |
| sll t2, t0, 0x5 |
| andi t2, t2, 0xfc00 |
| srl t3, t0, 0x1 |
| andi t3, t3, 0x300 |
| or t3, t2, t3 |
| or t1, t1, t3 |
| |
| andi t2, t0, 0xf800 |
| srl t3, t2, 0x5 |
| andi t3, t3, 0xff00 |
| or t2, t2, t3 |
| sll t2, t2, 0x8 |
| or t1, t1, t2 |
| |
| sw t1, 0(a0) |
| 3: |
| j ra |
| nop |
| |
| END(qConvertRgb16To32_asm_mips_dspr2) |
| |
| |
| #if defined(__MIPSEL) && __MIPSEL |
| # define PACK(r, s, t) packrl.ph r, s, t |
| # define LDHI(r, o, b) lwl r, o + 1 (b) |
| # define LDLO(r, o, b) lwr r, o + 2 (b) |
| #else |
| # define PACK(r, s, t) packrl.ph r, t, s |
| # define LDHI(r, o, b) lwr r, o + 1 (b) |
| # define LDLO(r, o, b) lwl r, o + 2 (b) |
| #endif |
| |
| |
| LEAF_MIPS_DSPR2(qt_blend_rgb16_on_rgb16_mips_dspr2_asm) |
| /* |
| + * a0 - dst (*r5g6b5) |
| * a1 - src (const *r5g6b5) |
| * a2 - len (unsigned int) - batch length |
| * a3 - alpha (int) |
| * |
| * Register usage: |
| * t0-3 - Scratch registers |
| * t4 - Number of iterations to do in unrolled loops |
| * t5 - Inverse alpha |
| * t6 - Alpha >> 2 |
| * t7 - Inverse alpha >> 2 |
| * t8 - magic1 (0x07e007e0) |
| * t9 - magic2 (0xf81ff81f) |
| * |
| * NOTE: |
| * Cannot use DSP instructions for the multiplication of two |
| * 16-bit values: overflow would be always rounded or saturated. |
| */ |
| |
| beqz a2, 0f |
| andi t0, a0, 0x3 |
| andi t1, a1, 0x3 |
| /* Adjust alpha value, and calculate inverse alpha value */ |
| li t5, 255 |
| or t2, t0, t1 /* t0 = (dst & 0x3) | (src & 0x3) */ |
| sll t8, a3, 8 |
| subu a3, t8, a3 |
| li t8, 0x07e007e0 /* magic1 */ |
| srl a3, a3, 8 /* alpha >>= 8 */ |
| li t9, 0xf81ff81f /* magic2 */ |
| subu t5, t5, a3 /* ialpha = 255 - alpha */ |
| addiu a3, a3, 1 /* alpha++ */ |
| addiu t5, t5, 1 /* ialpha++ */ |
| srl t6, a3, 2 /* ashift = alpha >> 2 */ |
| |
| beqz t2, 4f /* both aligned */ |
| srl t7, t5, 2 /* iashift = ialpha >> 2 */ |
| |
| beqz t1, 2f /* src aligned, dst unaligned */ |
| nop |
| |
| beqz t0, 3f /* dst aligned, src unaligned */ |
| nop |
| |
| /* |
| * Both src/dst are unaligned: read 1 halfword from each, then |
| * fall-off to continue with word-aligned operation. |
| */ |
| lhu t1, 0 (a1) |
| lhu t0, 0 (a0) |
| addiu a2, a2, -1 /* len-- */ |
| andi t2, t1, 0x07e0 |
| andi t1, t1, 0xf81f |
| mul t2, t2, a3 |
| mul t1, t1, t6 |
| andi t3, t0, 0x07e0 |
| andi t0, t0, 0xf81f |
| mul t3, t3, t5 |
| mul t0, t0, t7 |
| addiu a1, a1, 2 /* src++ */ |
| srl t2, t2, 8 |
| srl t1, t1, 6 |
| andi t2, t2, 0x07e0 |
| andi t1, t1, 0xf81f |
| or t1, t1, t2 |
| srl t3, t3, 8 |
| srl t0, t0, 6 |
| andi t3, t3, 0x07e0 |
| andi t0, t0, 0xf81f |
| or t0, t0, t3 |
| addu t0, t0, t1 /* src * alpha + dst * ialpha */ |
| sh t0, 0 (a0) |
| addiu a0, a0, 2 /* dst++ */ |
| |
| /* |
| * Both src/dst pointers are word-aligned, process eight |
| * items at a time in an unrolled loop. |
| */ |
| 4: beqz a2, 0f |
| srl t4, a2, 3 /* t4 = len / 8 */ |
| beqz t4, 5f |
| andi a2, a2, 0x7 /* len = len % 8 */ |
| SAVE_REGS_ON_STACK 12, s0, s1, s2, s3, s4, v0, v1 |
| |
| 1: lw t1, 0 (a1) /* [s0, s1] */ |
| lw v1, 4 (a1) /* [s2, s3] */ |
| lw s1, 8 (a1) /* [s4, s5] */ |
| lw s3, 12 (a1) /* [s6, s7] */ |
| |
| lw t0, 0 (a0) /* [d0, d1] */ |
| lw v0, 4 (a0) /* [d2, d3] */ |
| lw s0, 8 (a0) /* [d4, d5] */ |
| lw s2, 12 (a0) /* [d6, d7] */ |
| |
| pref 4, 16 (a1) |
| pref 5, 16 (a0) |
| |
| and t2, t1, t8 |
| ext t3, t2, 0, 16 |
| srl t2, t2, 16 |
| mul t3, t3, a3 |
| mul t2, t2, a3 |
| and t1, t1, t9 |
| ext s4, t1, 0, 16 |
| mul s4, s4, t6 |
| srl t1, t1, 16 |
| mul t1, t1, t6 |
| srl t3, t3, 8 |
| srl t2, t2, 8 |
| append t2, t3, 16 |
| and t2, t2, t8 |
| srl s4, s4, 6 |
| and t3, v1, t8 |
| srl t1, t1, 6 |
| append t1, s4, 16 |
| and t1, t1, t9 |
| or t1, t1, t2 |
| ext t2, t3, 0, 16 |
| srl t3, t3, 16 |
| mul t2, t2, a3 |
| mul t3, t3, a3 |
| and v1, v1, t9 |
| ext s4, v1, 0, 16 |
| mul s4, s4, t6 |
| srl v1, v1, 16 |
| mul v1, v1, t6 |
| srl t2, t2, 8 |
| srl t3, t3, 8 |
| append t3, t2, 16 |
| and t3, t3, t8 |
| srl s4, s4, 6 |
| and t2, s1, t8 |
| srl v1, v1, 6 |
| append v1, s4, 16 |
| and v1, v1, t9 |
| or v1, v1, t3 |
| ext t3, t2, 0, 16 |
| srl t2, t2, 16 |
| mul t3, t3, a3 |
| mul t2, t2, a3 |
| and s1, s1, t9 |
| ext s4, s1, 0, 16 |
| mul s4, s4, t6 |
| srl s1, s1, 16 |
| mul s1, s1, t6 |
| srl t3, t3, 8 |
| srl t2, t2, 8 |
| append t2, t3, 16 |
| and t2, t2, t8 |
| srl s4, s4, 6 |
| and t3, s3, t8 |
| srl s1, s1, 6 |
| append s1, s4, 16 |
| and s1, s1, t9 |
| or s1, s1, t2 |
| ext t2, t3, 0, 16 |
| srl t3, t3, 16 |
| mul t2, t2, a3 |
| mul t3, t3, a3 |
| and s3, s3, t9 |
| ext s4, s3, 0, 16 |
| mul s4, s4, t6 |
| srl s3, s3, 16 |
| mul s3, s3, t6 |
| srl t2, t2, 8 |
| srl t3, t3, 8 |
| append t3, t2, 16 |
| and t3, t3, t8 |
| srl s4, s4, 6 |
| and t2, t0, t8 |
| srl s3, s3, 6 |
| append s3, s4, 16 |
| and s3, s3, t9 |
| or s3, s3, t3 |
| ext t3, t2, 0, 16 |
| srl t2, t2, 16 |
| mul t3, t3, t5 |
| mul t2, t2, t5 |
| and t0, t0, t9 |
| ext s4, t0, 0, 16 |
| mul s4, s4, t7 |
| srl t0, t0, 16 |
| mul t0, t0, t7 |
| srl t3, t3, 8 |
| srl t2, t2, 8 |
| append t2, t3, 16 |
| and t2, t2, t8 |
| srl s4, s4, 6 |
| and t3, v0, t8 |
| srl t0, t0, 6 |
| append t0, s4, 16 |
| and t0, t0, t9 |
| or t0, t0, t2 |
| ext t2, t3, 0, 16 |
| srl t3, t3, 16 |
| mul t2, t2, t5 |
| mul t3, t3, t5 |
| and v0, v0, t9 |
| ext s4, v0, 0, 16 |
| mul s4, s4, t7 |
| srl v0, v0, 16 |
| mul v0, v0, t7 |
| srl t2, t2, 8 |
| srl t3, t3, 8 |
| append t3, t2, 16 |
| and t3, t3, t8 |
| srl s4, s4, 6 |
| and t2, s0, t8 |
| srl v0, v0, 6 |
| append v0, s4, 16 |
| and v0, v0, t9 |
| or v0, v0, t3 |
| ext t3, t2, 0, 16 |
| srl t2, t2, 16 |
| mul t3, t3, t5 |
| mul t2, t2, t5 |
| and s0, s0, t9 |
| ext s4, s0, 0, 16 |
| mul s4, s4, t7 |
| srl s0, s0, 16 |
| mul s0, s0, t7 |
| srl t3, t3, 8 |
| srl t2, t2, 8 |
| append t2, t3, 16 |
| and t2, t2, t8 |
| srl s4, s4, 6 |
| and t3, s2, t8 |
| srl s0, s0, 6 |
| append s0, s4, 16 |
| and s0, s0, t9 |
| or s0, s0, t2 |
| ext t2, t3, 0, 16 |
| srl t3, t3, 16 |
| mul t2, t2, t5 |
| mul t3, t3, t5 |
| and s2, s2, t9 |
| ext s4, s2, 0, 16 |
| mul s4, s4, t7 |
| srl s2, s2, 16 |
| mul s2, s2, t7 |
| srl t2, t2, 8 |
| srl t3, t3, 8 |
| append t3, t2, 16 |
| and t3, t3, t8 |
| srl s4, s4, 6 |
| addu.ph t0, t0, t1 |
| srl s2, s2, 6 |
| append s2, s4, 16 |
| and s2, s2, t9 |
| or s2, s2, t3 |
| addu.ph v0, v0, v1 /* v0 = [S2 + D2, S3 + D3] */ |
| addu.ph s0, s0, s1 /* s0 = [S4 + D4, S5 + D5] */ |
| addu.ph s2, s2, s3 /* s2 = [S6 + D6, S7 + D7] */ |
| |
| sw t0, 0 (a0) /* [SS0, SS1] */ |
| sw v0, 4 (a0) /* [SS2, SS3] */ |
| sw s0, 8 (a0) /* [SS4, SS5] */ |
| sw s2, 12 (a0) /* [SS6, SS7] */ |
| |
| addiu t4, t4, -1 /* t4-- */ |
| addiu a1, a1, 16 /* src += 8 */ |
| |
| bnez t4, 1b |
| addiu a0, a0, 16 /* dst += 8 */ |
| |
| RESTORE_REGS_FROM_STACK 12, s0, s1, s2, s3, s4, v0, v1 |
| b 5f |
| nop |
| |
| |
| /* dst unaligned: do one item and fall down to the src unaligned case */ |
| 2: lhu t1, 0 (a1) |
| lhu t0, 0 (a0) |
| addiu a2, a2, -1 /* len-- */ |
| andi t2, t1, 0x07e0 |
| andi t1, t1, 0xf81f |
| mul t2, t2, a3 |
| mul t1, t1, t6 |
| andi t3, t0, 0x07e0 |
| andi t0, t0, 0xf81f |
| mul t3, t3, t5 |
| mul t0, t0, t7 |
| addiu a1, a1, 2 /* src++ */ |
| srl t2, t2, 8 |
| srl t1, t1, 6 |
| andi t2, t2, 0x07e0 |
| andi t1, t1, 0xf81f |
| or t1, t1, t2 |
| srl t3, t3, 8 |
| srl t0, t0, 6 |
| andi t3, t3, 0x07e0 |
| andi t0, t0, 0xf81f |
| or t0, t0, t3 |
| addu t0, t0, t1 /* src * alpha + dst * ialpha */ |
| sh t0, 0 (a0) |
| addiu a0, a0, 2 /* dst++ */ |
| |
| /* src unaligned */ |
| 3: beqz a2, 0f |
| srl t4, a2, 3 /* t4 = len / 8 */ |
| beqz t4, 5f |
| andi a2, a2, 0x7 /* len = len % 8 */ |
| SAVE_REGS_ON_STACK 12, s0, s1, s2, s3, s4, v0, v1 |
| |
| 1: lw t0, 0 (a0) /* [d0, d1] */ |
| lw v0, 4 (a0) /* [d2, d3] */ |
| lw s0, 8 (a0) /* [d4, d5] */ |
| lw s2, 12 (a0) /* [d6, d7] */ |
| |
| LDHI (t1, 0, a1) /* [s0, __] */ |
| lw v1, 2 (a1) /* [s1, s2] */ |
| lw s1, 6 (a1) /* [s3, s4] */ |
| lw s3, 10 (a1) /* [s5, s6] */ |
| LDLO (s4, 12, a1) /* [__, s7] */ |
| |
| pref 4, 14 (a1) |
| pref 5, 16 (a0) |
| |
| PACK (t1, v1, t1) /* [s0, s1] */ |
| PACK (v1, s1, v1) /* [s2, s3] */ |
| PACK (s1, s3, s1) /* [s4, s5] */ |
| PACK (s3, s4, s3) /* [s6, s7] */ |
| |
| and t2, t1, t8 |
| ext t3, t2, 0, 16 |
| srl t2, t2, 16 |
| mul t3, t3, a3 |
| mul t2, t2, a3 |
| and t1, t1, t9 |
| ext s4, t1, 0, 16 |
| mul s4, s4, t6 |
| srl t1, t1, 16 |
| mul t1, t1, t6 |
| srl t3, t3, 8 |
| srl t2, t2, 8 |
| append t2, t3, 16 |
| and t2, t2, t8 |
| srl s4, s4, 6 |
| and t3, v1, t8 |
| srl t1, t1, 6 |
| append t1, s4, 16 |
| and t1, t1, t9 |
| or t1, t1, t2 |
| ext t2, t3, 0, 16 |
| srl t3, t3, 16 |
| mul t2, t2, a3 |
| mul t3, t3, a3 |
| and v1, v1, t9 |
| ext s4, v1, 0, 16 |
| mul s4, s4, t6 |
| srl v1, v1, 16 |
| mul v1, v1, t6 |
| srl t2, t2, 8 |
| srl t3, t3, 8 |
| append t3, t2, 16 |
| and t3, t3, t8 |
| srl s4, s4, 6 |
| and t2, s1, t8 |
| srl v1, v1, 6 |
| append v1, s4, 16 |
| and v1, v1, t9 |
| or v1, v1, t3 |
| ext t3, t2, 0, 16 |
| srl t2, t2, 16 |
| mul t3, t3, a3 |
| mul t2, t2, a3 |
| and s1, s1, t9 |
| ext s4, s1, 0, 16 |
| mul s4, s4, t6 |
| srl s1, s1, 16 |
| mul s1, s1, t6 |
| srl t3, t3, 8 |
| srl t2, t2, 8 |
| append t2, t3, 16 |
| and t2, t2, t8 |
| srl s4, s4, 6 |
| and t3, s3, t8 |
| srl s1, s1, 6 |
| append s1, s4, 16 |
| and s1, s1, t9 |
| or s1, s1, t2 |
| ext t2, t3, 0, 16 |
| srl t3, t3, 16 |
| mul t2, t2, a3 |
| mul t3, t3, a3 |
| and s3, s3, t9 |
| ext s4, s3, 0, 16 |
| mul s4, s4, t6 |
| srl s3, s3, 16 |
| mul s3, s3, t6 |
| srl t2, t2, 8 |
| srl t3, t3, 8 |
| append t3, t2, 16 |
| and t3, t3, t8 |
| srl s4, s4, 6 |
| and t2, t0, t8 |
| srl s3, s3, 6 |
| append s3, s4, 16 |
| and s3, s3, t9 |
| or s3, s3, t3 |
| ext t3, t2, 0, 16 |
| srl t2, t2, 16 |
| mul t3, t3, t5 |
| mul t2, t2, t5 |
| and t0, t0, t9 |
| ext s4, t0, 0, 16 |
| mul s4, s4, t7 |
| srl t0, t0, 16 |
| mul t0, t0, t7 |
| srl t3, t3, 8 |
| srl t2, t2, 8 |
| append t2, t3, 16 |
| and t2, t2, t8 |
| srl s4, s4, 6 |
| and t3, v0, t8 |
| srl t0, t0, 6 |
| append t0, s4, 16 |
| and t0, t0, t9 |
| or t0, t0, t2 |
| ext t2, t3, 0, 16 |
| srl t3, t3, 16 |
| mul t2, t2, t5 |
| mul t3, t3, t5 |
| and v0, v0, t9 |
| ext s4, v0, 0, 16 |
| mul s4, s4, t7 |
| srl v0, v0, 16 |
| mul v0, v0, t7 |
| srl t2, t2, 8 |
| srl t3, t3, 8 |
| append t3, t2, 16 |
| and t3, t3, t8 |
| srl s4, s4, 6 |
| and t2, s0, t8 |
| srl v0, v0, 6 |
| append v0, s4, 16 |
| and v0, v0, t9 |
| or v0, v0, t3 |
| ext t3, t2, 0, 16 |
| srl t2, t2, 16 |
| mul t3, t3, t5 |
| mul t2, t2, t5 |
| and s0, s0, t9 |
| ext s4, s0, 0, 16 |
| mul s4, s4, t7 |
| srl s0, s0, 16 |
| mul s0, s0, t7 |
| srl t3, t3, 8 |
| srl t2, t2, 8 |
| append t2, t3, 16 |
| and t2, t2, t8 |
| srl s4, s4, 6 |
| and t3, s2, t8 |
| srl s0, s0, 6 |
| append s0, s4, 16 |
| and s0, s0, t9 |
| or s0, s0, t2 |
| ext t2, t3, 0, 16 |
| srl t3, t3, 16 |
| mul t2, t2, t5 |
| mul t3, t3, t5 |
| and s2, s2, t9 |
| ext s4, s2, 0, 16 |
| mul s4, s4, t7 |
| srl s2, s2, 16 |
| mul s2, s2, t7 |
| srl t2, t2, 8 |
| srl t3, t3, 8 |
| append t3, t2, 16 |
| and t3, t3, t8 |
| srl s4, s4, 6 |
| addu.ph t0, t0, t1 |
| srl s2, s2, 6 |
| append s2, s4, 16 |
| and s2, s2, t9 |
| or s2, s2, t3 |
| addu.ph v0, v0, v1 /* v0 = [S2 + D2, S3 + D3] */ |
| addu.ph s0, s0, s1 /* s0 = [S4 + D4, S5 + D5] */ |
| addu.ph s2, s2, s3 /* s2 = [S6 + D6, S7 + D7] */ |
| |
| sw t0, 0 (a0) /* [SS0, SS1] */ |
| sw v0, 4 (a0) /* [SS2, SS3] */ |
| sw s0, 8 (a0) /* [SS4, SS5] */ |
| sw s2, 12 (a0) /* [SS6, SS7] */ |
| |
| addiu t4, t4, -1 /* t4-- */ |
| addiu a1, a1, 16 /* src += 8 */ |
| |
| bnez t4, 1b |
| addiu a0, a0, 16 /* dst += 8 */ |
| |
| RESTORE_REGS_FROM_STACK 12, s0, s1, s2, s3, s4, v0, v1 |
| |
| 5: /* Process remaining items (len < 8), one at a time */ |
| beqz a2, 0f |
| nop |
| |
| 1: lhu t1, 0 (a1) |
| lhu t0, 0 (a0) |
| addiu a1, a1, 2 /* src++ */ |
| andi t2, t1, 0x07e0 |
| andi t1, t1, 0xf81f |
| mul t2, t2, a3 |
| mul t1, t1, t6 |
| andi t3, t0, 0x07e0 |
| andi t0, t0, 0xf81f |
| mul t3, t3, t5 |
| mul t0, t0, t7 |
| addiu a2, a2, -1 /* len-- */ |
| srl t2, t2, 8 |
| srl t1, t1, 6 |
| andi t2, t2, 0x07e0 |
| andi t1, t1, 0xf81f |
| or t1, t1, t2 |
| srl t3, t3, 8 |
| srl t0, t0, 6 |
| andi t3, t3, 0x07e0 |
| andi t0, t0, 0xf81f |
| or t0, t0, t3 |
| |
| addu t0, t0, t1 /* src*alpha + dst*ialpha */ |
| sh t0, 0 (a0) |
| bnez a2, 1b |
| addiu a0, a0, 2 /* dst++ */ |
| |
| 0: jr ra |
| nop |
| |
| END(qt_blend_rgb16_on_rgb16_mips_dspr2_asm) |
| |
| #undef PACK |
| #undef LDHI |
| #undef LDLO |