| /**************************************************************************** |
| ** |
| ** Copyright (C) 2013 Imagination Technologies Limited, www.imgtec.com |
| ** Contact: https://www.qt.io/licensing/ |
| ** |
| ** This file is part of the QtGui module of the Qt Toolkit. |
| ** |
| ** $QT_BEGIN_LICENSE:LGPL$ |
| ** Commercial License Usage |
| ** Licensees holding valid commercial Qt licenses may use this file in |
| ** accordance with the commercial license agreement provided with the |
| ** Software or, alternatively, in accordance with the terms contained in |
| ** a written agreement between you and The Qt Company. For licensing terms |
| ** and conditions see https://www.qt.io/terms-conditions. For further |
| ** information use the contact form at https://www.qt.io/contact-us. |
| ** |
| ** GNU Lesser General Public License Usage |
| ** Alternatively, this file may be used under the terms of the GNU Lesser |
| ** General Public License version 3 as published by the Free Software |
| ** Foundation and appearing in the file LICENSE.LGPL3 included in the |
| ** packaging of this file. Please review the following information to |
| ** ensure the GNU Lesser General Public License version 3 requirements |
| ** will be met: https://www.gnu.org/licenses/lgpl-3.0.html. |
| ** |
| ** GNU General Public License Usage |
| ** Alternatively, this file may be used under the terms of the GNU |
| ** General Public License version 2.0 or (at your option) the GNU General |
| ** Public license version 3 or any later version approved by the KDE Free |
| ** Qt Foundation. The licenses are as published by the Free Software |
| ** Foundation and appearing in the file LICENSE.GPL2 and LICENSE.GPL3 |
| ** included in the packaging of this file. Please review the following |
| ** information to ensure the GNU General Public License requirements will |
| ** be met: https://www.gnu.org/licenses/gpl-2.0.html and |
| ** https://www.gnu.org/licenses/gpl-3.0.html. |
| ** |
| ** $QT_END_LICENSE$ |
| ** |
| ****************************************************************************/ |
| |
| #include "qt_mips_asm_dsp_p.h" |
| |
| LEAF_MIPS_DSP(destfetchARGB32_asm_mips_dsp) |
| /* |
| * a0 - buffer address (dst) |
| * a1 - data address (src) |
| * a2 - length |
| */ |
| |
| beqz a2, 2f |
| move v0, a0 /* just return the address of buffer |
| * for storing returning values */ |
| move v0, a0 |
| andi t1, a2, 0x1 |
| li t7, 8388736 /* t7 = 0x800080 */ |
| beqz t1, 1f |
| nop |
| lw t8, 0(a1) |
| addiu a2, a2, -1 |
| srl t6, t8, 24 /* t6 = alpha */ |
| |
| preceu.ph.qbra t0, t8 |
| mul t1, t0, t6 |
| preceu.ph.qbla t4, t8 |
| mul t5, t4, t6 |
| |
| preceu.ph.qbla t2, t1 |
| addq.ph t3, t1, t2 |
| addq.ph t3, t3, t7 |
| preceu.ph.qbla t1, t3 /* t1 holds R & B blended with alpha |
| * | 0 | dRab | 0 | dBab | */ |
| preceu.ph.qbla t2, t5 |
| addq.ph t3, t2, t5 |
| addq.ph t4, t3, t7 |
| preceu.ph.qbla t2, t4 /* t2 holds A & G blended with alpha |
| * | 0 | dAab | 0 | dGab | */ |
| andi t2, t2, 255 /* t2 = 0xff */ |
| |
| sll t0, t6, 24 |
| sll t3, t2, 8 |
| or t4, t0, t3 |
| or t0, t1, t4 |
| sw t0, 0(a0) |
| addiu a0, a0, 4 |
| addiu a1, a1, 4 |
| beqz a2, 2f /* there was only one member */ |
| nop |
| 1: |
| lw t0, 0(a1) /* t0 = src1 */ |
| lw t1, 4(a1) /* t1 = src2 */ |
| precrq.qb.ph t4, t0, t1 /* t4 = a1 G1 a2 G2 */ |
| preceu.ph.qbra t3, t4 /* t3 = 0 G1 0 G2 */ |
| preceu.ph.qbla t2, t4 /* t2 = | 0 | a1 | 0 | a2 | */ |
| srl t5, t2, 8 |
| or t8, t2, t5 /* t8 = 0 a1 a1 a2 */ |
| muleu_s.ph.qbr t5, t8, t3 |
| |
| addiu a2, a2, -2 |
| addiu a1, a1, 8 |
| precrq.ph.w t9, t0, t1 |
| preceu.ph.qbra t9, t9 |
| |
| preceu.ph.qbla t6, t5 |
| addq.ph t5, t5, t6 |
| addq.ph t2, t5, t7 |
| muleu_s.ph.qbr t6, t8, t9 |
| sll t3, t1, 16 |
| packrl.ph t3, t0, t3 |
| preceu.ph.qbra t3, t3 |
| muleu_s.ph.qbr t8, t8, t3 |
| preceu.ph.qbla t3, t6 |
| addq.ph t3, t6, t3 |
| addq.ph t3, t3, t7 |
| preceu.ph.qbla t5, t8 |
| addq.ph t5, t8, t5 |
| addq.ph t5, t5, t7 |
| |
| precrq.ph.w t0, t4, t3 /* t0 = | 0 | a1 | 0 | dR1 | */ |
| precrq.ph.w t1, t2, t5 /* t1 = | 0 | dG1 | 0 | dB1 | */ |
| precrq.qb.ph t6, t0, t1 /* t6 = | a1 | dR1 | dG1 | dB1 | */ |
| sll t3, t3, 16 |
| sll t5, t5, 16 |
| packrl.ph t0, t4, t3 |
| packrl.ph t1, t2, t5 |
| precrq.qb.ph t8, t0, t1 /* t8 = | a2 | dR2 | dG2 | dB2 | */ |
| sw t6, 0(a0) |
| sw t8, 4(a0) |
| bnez a2, 1b |
| addiu a0, a0, 8 |
| 2: |
| j ra |
| nop |
| |
| END(destfetchARGB32_asm_mips_dsp) |
| |
| LEAF_MIPS_DSP(qt_memfill32_asm_mips_dsp) |
| /* |
| * a0 - destination address (dst) |
| * a1 - value |
| * a2 - count |
| */ |
| |
| beqz a2, 5f |
| nop |
| li t8, 8 |
| andi t0, a2, 0x7 /* t0 holds how many counts exceeds 8 */ |
| beqzl t0, 2f /* count is multiple of 8 (8, 16, 24, ....) */ |
| addiu a2, a2, -8 |
| subu a2, a2, t0 |
| 1: |
| sw a1, 0(a0) |
| addiu t0, t0, -1 |
| bnez t0, 1b |
| addiu a0, a0, 4 |
| bgeu a2, t8, 2f |
| addiu a2, a2, -8 |
| b 5f |
| nop |
| 2: |
| beqz a2, 4f |
| nop |
| 3: |
| pref 30, 32(a0) |
| addiu a2, a2, -8 |
| sw a1, 0( a0) |
| sw a1, 4(a0) |
| sw a1, 8(a0) |
| sw a1, 12(a0) |
| addiu a0, a0, 32 |
| sw a1, -16(a0) |
| sw a1, -12(a0) |
| sw a1, -8(a0) |
| bnez a2, 3b |
| sw a1, -4(a0) |
| 4: |
| sw a1, 0(a0) |
| sw a1, 4(a0) |
| sw a1, 8(a0) |
| sw a1, 12(a0) |
| addiu a0, a0, 32 |
| sw a1, -16(a0) |
| sw a1, -12(a0) |
| sw a1, -8(a0) |
| sw a1, -4(a0) |
| 5: |
| jr ra |
| nop |
| |
| END(qt_memfill32_asm_mips_dsp) |
| |
| LEAF_MIPS_DSP(comp_func_SourceOver_asm_mips_dsp) |
| /* |
| * a0 - uint *dest |
| * a1 - const uint *src |
| * a2 - int length |
| * a3 - uint const_alpha |
| */ |
| |
| beqz a2, 5f |
| nop |
| li t8, 0xff |
| li t7, 8388736 /* t7 = 0x800080 */ |
| bne a3, t8, 4f |
| nop |
| |
| /* part where const_alpha = 255 */ |
| b 2f |
| nop |
| 1: |
| addiu a0, a0, 4 |
| addiu a2, a2, -1 |
| beqz a2, 5f |
| nop |
| 2: |
| lw t0, 0(a1) /* t0 = s = src[i] */ |
| addiu a1, a1, 4 |
| nor t1, t0, zero |
| srl t1, t1, 24 /* t1 = ~qAlpha(s) */ |
| bnez t1, 3f |
| nop |
| sw t0, 0(a0) /* dst[i] = src[i] */ |
| addiu a2, a2, -1 |
| bnez a2, 2b |
| addiu a0, a0, 4 |
| b 5f |
| nop |
| 3: |
| beqz t0, 1b |
| nop |
| |
| lw t4, 0(a0) |
| replv.ph t6, t1 |
| muleu_s.ph.qbl t2, t4, t6 |
| muleu_s.ph.qbr t3, t4, t6 |
| addiu a2, a2, -1 |
| preceu.ph.qbla t4, t2 |
| addq.ph t4, t2, t4 |
| addq.ph t4, t4, t7 |
| preceu.ph.qbla t5, t3 |
| addq.ph t5, t5, t3 |
| addq.ph t5, t5, t7 |
| precrq.qb.ph t8, t4, t5 /* t8 = | dsA | dsR | dsG | dsB | */ |
| addu t8, t0, t8 /* dst[i] = |
| * s + BYTE_MUL(dst[i],~qAlpha(s)) */ |
| sw t8, 0(a0) |
| bnez a2, 2b |
| addiu a0, a0, 4 |
| b 5f |
| nop |
| 4: |
| lw t0, 0(a0) /* t0 - dst[i] "1" */ |
| lw t1, 0(a1) /* t1 - src[i] "2" */ |
| addiu a1, a1, 4 |
| addiu a2, a2, -1 |
| replv.ph t6, a3 /* a1 = 0x00a00a */ |
| muleu_s.ph.qbl t2, t1, t6 |
| muleu_s.ph.qbr t3, t1, t6 |
| preceu.ph.qbla t4, t2 |
| addq.ph t4, t2, t4 |
| addq.ph t4, t4, t7 |
| preceu.ph.qbla t5, t3 |
| addq.ph t5, t5, t3 |
| addq.ph t5, t5, t7 |
| precrq.qb.ph t8, t4, t5 /* t8 = | dsA | dsR | dsG | dsB | */ |
| |
| nor t6, t8, zero |
| srl t6, t6, 24 |
| replv.ph t6, t6 |
| |
| muleu_s.ph.qbl t2, t0, t6 |
| muleu_s.ph.qbr t3, t0, t6 |
| preceu.ph.qbla t4, t2 |
| addq.ph t4, t2, t4 |
| addq.ph t4, t4, t7 |
| preceu.ph.qbla t5, t3 |
| addq.ph t5, t5, t3 |
| addq.ph t5, t5, t7 |
| precrq.qb.ph t6, t4, t5 /* t6 = | ddA | ddR | ddG | ddB | */ |
| |
| addu t0, t8, t6 |
| sw t0, 0(a0) |
| bnez a2, 4b |
| addiu a0, a0, 4 |
| 5: |
| jr ra |
| nop |
| |
| END(comp_func_SourceOver_asm_mips_dsp) |
| |
| LEAF_MIPS_DSPR2(qt_destStoreARGB32_asm_mips_dsp) |
| /* |
| * a0 - uint * data |
| * a1 - const uint *buffer |
| * a2 - int length |
| */ |
| |
| blez a2, 6f |
| move v1, zero |
| li t0, 255 |
| lui a3, 0xff |
| j 2f |
| lui t2, 0xff00 |
| 1: |
| addiu v1, v1, 1 |
| sw zero, 0(a0) |
| addiu a1, a1, 4 |
| beq v1, a2, 6f |
| addiu a0, a0, 4 |
| 2: |
| lw v0, 0(a1) |
| srl t3, v0, 0x18 |
| beql t3, t0, 5f |
| addiu v1, v1, 1 |
| beqz t3, 1b |
| |
| srl t1, v0, 0x8 |
| andi t1, t1, 0xff |
| |
| teq t3, zero, 0x7 |
| div zero, a3, t3 |
| move t8, t3 |
| andi t6, v0, 0xff |
| |
| srl t3,v0,0x10 |
| andi t3,t3,0xff |
| |
| and t5, v0, t2 |
| mflo t4 |
| |
| mult $ac0, t4, t6 |
| mult $ac1, t1, t4 |
| mul t4, t3, t4 |
| |
| sltiu t8, t8, 2 |
| beqz t8, 3f |
| nop |
| mflo t6, $ac0 |
| mflo t1, $ac1 |
| sra t6, t6, 0x10 |
| sra t1, t1, 0x8 |
| b 4f |
| nop |
| 3: |
| extr.w t6, $ac0, 0x10 |
| extr.w t1, $ac1, 0x8 |
| 4: |
| and v0, t4, a3 |
| or v0, v0, t6 |
| or v0, v0, t5 |
| andi t1, t1, 0xff00 |
| or v0, v0, t1 |
| addiu v1, v1, 1 |
| 5: |
| sw v0, 0(a0) |
| addiu a1, a1, 4 |
| bne v1, a2, 2b |
| addiu a0, a0, 4 |
| 6: |
| jr ra |
| nop |
| |
| END(qt_destStoreARGB32_asm_mips_dsp) |
| |
| LEAF_MIPS_DSP(comp_func_solid_Source_dsp_asm_x2) |
| /* |
| * a0 - const uint *dest |
| * a1 - int length |
| * a2 - uint color |
| * a3 - uint ialpha |
| */ |
| |
| beqz a1, 2f |
| nop |
| replv.ph a3, a3 |
| li t9, 8388736 /* t9 = 0x800080 */ |
| 1: |
| lw t0, 0(a0) |
| lw t1, 4(a0) |
| or t2, t0, t1 /* if both dest are zero, no computation needed */ |
| beqz t2, 12f |
| addiu a1, -2 |
| |
| BYTE_MUL_x2 t0, t1, t6, t7, a3, a3, t9, t2, t3, t4, t5, 0 |
| 11: |
| addu t2, a2, t6 |
| addu t3, a2, t7 |
| sw t2, 0(a0) |
| sw t3, 4(a0) |
| bnez a1, 1b |
| addiu a0, 8 |
| b 2f |
| 12: |
| addu t2, a2, t0 |
| addu t3, a2, t1 |
| sw t2, 0(a0) |
| sw t3, 4(a0) |
| bnez a1, 1b |
| addiu a0, 8 |
| 2: |
| jr ra |
| nop |
| |
| END(comp_func_solid_Source_dsp_asm_x2) |
| |
| LEAF_MIPS_DSP(comp_func_solid_DestinationOver_dsp_asm_x2) |
| /* |
| * a0 - uint *dest |
| * a1 - int length |
| * a2 - uint color |
| */ |
| |
| addiu sp, sp, -8 |
| sw s0, 0(sp) |
| sw s1, 4(sp) |
| beqz a1, 2f |
| nop |
| beqz a2, 2f |
| nop |
| li t9, 8388736 /* t4 = 0x800080 */ |
| |
| 1: |
| lw t0, 0(a0) |
| lw t1, 4(a0) |
| not t2, t0 |
| not t3, t1 |
| srl t4, t2, 24 |
| srl t5, t3, 24 |
| or t2, t4, t5 /* if both dest are zero, no computation needed */ |
| beqz t2, 11f |
| addiu a1, -2 |
| replv.ph t2, t4 |
| replv.ph t3, t5 |
| |
| BYTE_MUL_x2 a2, a2, t8, a3, t2, t3, t9, t4, t5, t6, t7 |
| |
| addu t0, t0, t8 |
| addu t1, t1, a3 |
| 11: |
| sw t0, 0(a0) |
| sw t1, 4(a0) |
| bnez a1, 1b |
| addiu a0, 8 |
| |
| 2: |
| lw s0, 0(sp) |
| lw s1, 4(sp) |
| addiu sp, sp, 8 |
| jr ra |
| nop |
| |
| END(comp_func_solid_DestinationOver_dsp_asm_x2) |
| |
| LEAF_MIPS_DSP(comp_func_DestinationOver_dsp_asm_x2) |
| /* |
| * a0 - uint *dest |
| * a1 - uint *src |
| * a2 - int length |
| * a3 - uint const_alpha |
| */ |
| |
| .set noat |
| addiu sp, sp, -8 |
| sw s0, 0(sp) |
| sw s1, 4(sp) |
| beqz a2, 3f |
| nop |
| li t9, 8388736 /* t4 = 0x800080 */ |
| li t0, 0xff |
| beq a3, t0, 2f |
| nop |
| |
| /* part where const_alpha != 255 */ |
| 1: |
| replv.ph a3, a3 |
| 11: |
| lw t0, 0(a1) # src_1 |
| lw t1, 4(a1) # src_2 |
| addiu a2, -2 |
| |
| BYTE_MUL_x2 t0, t1, t8, AT, a3, a3, t9, t4, t5, t6, t7, 0 |
| # t8 = s1 |
| # AT = s2 |
| lw t0, 0(a0) # dest_1 |
| lw t1, 4(a0) # dest_2 |
| addiu a1, 8 |
| not t2, t0 |
| not t3, t1 |
| srl t4, t2, 24 |
| srl t5, t3, 24 |
| replv.ph t2, t4 # qAlpha(~d) 1 |
| replv.ph t3, t5 # qAlpha(~d) 2 |
| |
| BYTE_MUL_x2 t8, AT, s0, s1, t2, t3, t9, t4, t5, t6, t7 |
| |
| addu t0, t0, s0 |
| addu t1, t1, s1 |
| sw t0, 0(a0) |
| sw t1, 4(a0) |
| bnez a2, 11b |
| addiu a0, 8 |
| b 3f |
| nop |
| |
| /* part where const_alpha = 255 */ |
| 2: |
| lw t0, 0(a0) # dest 1 |
| lw t1, 4(a0) # dest 2 |
| lw s0, 0(a1) # src 1 |
| lw s1, 4(a1) # src 2 |
| not t2, t0 |
| not t3, t1 |
| srl t4, t2, 24 |
| srl t5, t3, 24 |
| replv.ph t2, t4 |
| replv.ph t3, t5 |
| addiu a1, 8 |
| addiu a2, -2 |
| |
| BYTE_MUL_x2 s0, s1, t8, AT, t2, t3, t9, t4, t5, t6, t7 |
| |
| addu t0, t0, t8 |
| addu t1, t1, AT |
| sw t0, 0(a0) |
| sw t1, 4(a0) |
| bnez a2, 2b |
| addiu a0, 8 |
| |
| 3: |
| lw s0, 0(sp) |
| lw s1, 4(sp) |
| addiu sp, sp, 8 |
| jr ra |
| nop |
| .set at |
| |
| END(comp_func_DestinationOver_dsp_asm_x2) |
| |
| LEAF_MIPS_DSP(comp_func_solid_SourceIn_dsp_asm_x2) |
| /* |
| * a0 - uint *dest |
| * a1 - int length |
| * a2 - uint color |
| * a3 - uint const_alpha |
| */ |
| |
| .set noat |
| addiu sp, -12 |
| sw s0, 0(sp) |
| sw s1, 4(sp) |
| sw s2, 8(sp) |
| beqz a1, 3f |
| nop |
| li t9, 8388736 /* t9 = 0x800080 (rounding_factor) */ |
| lui t8, 0xff00 |
| li t0, 0xff |
| beq a3, t0, 2f |
| ori t8, t8, 0xff00 /* t8 = 0xff00ff00 (andi_factor) */ |
| |
| /* part where const_alpha != 255 */ |
| 1: |
| replv.ph t0, a3 |
| li t5, 0xff |
| BYTE_MUL a2, a2, t0, t9, t1, t2, t3, t4 /* a2 = color ( = BYTE_MUL(color, const_alpha)); */ |
| subu t1, t5, a3 /* t1 = cia = 255 - const_alpha */ |
| 11: |
| lw t2, 0(a0) /* t2 = d */ |
| lw s0, 4(a0) |
| addiu a1, -2 |
| srl t3, t2, 24 /* t3 = qAlpha(d) */ |
| srl s2, s0, 24 |
| |
| INTERPOLATE_PIXEL_255 a2, t3, t2, t1, AT, t9, t8, t4, t5, t6, t7 |
| INTERPOLATE_PIXEL_255 a2, s2, s0, t1, s1, t9, t8, t4, t5, t6, t7 |
| |
| sw AT, 0(a0) |
| sw s1, 4(a0) |
| bnez a1, 11b |
| addiu a0, 8 |
| b 3f |
| nop |
| |
| /* part where const_alpha = 255 */ |
| 2: |
| lw t0, 0(a0) /* dest 1 */ |
| lw t1, 4(a0) /* dest 2 */ |
| srl t4, t0, 24 |
| srl t5, t1, 24 |
| replv.ph t2, t4 |
| replv.ph t3, t5 |
| addiu a1, -2 |
| |
| BYTE_MUL_x2 a2, a2, t8, AT, t2, t3, t9, t4, t5, t6, t7 |
| |
| sw t8, 0(a0) |
| sw AT, 4(a0) |
| bnez a1, 2b |
| addiu a0, 8 |
| |
| 3: |
| lw s0, 0(sp) |
| lw s1, 4(sp) |
| lw s2, 8(sp) |
| addiu sp, 12 |
| jr ra |
| nop |
| .set at |
| |
| END(comp_func_solid_SourceIn_dsp_asm_x2) |
| |
| LEAF_MIPS_DSP(comp_func_SourceIn_dsp_asm_x2) |
| /* |
| * a0 - uint *dest |
| * a1 - const uint *src |
| * a2 - int length |
| * a3 - uint const_alpha |
| */ |
| |
| .set noat |
| addiu sp, -16 |
| sw s0, 0(sp) |
| sw s1, 4(sp) |
| sw s2, 8(sp) |
| sw s3, 12(sp) |
| beqz a2, 3f |
| nop |
| li t9, 8388736 /* t9 = 0x800080 (rounding_factor) */ |
| lui t8, 0xff00 |
| li t0, 0xff |
| beq a3, t0, 2f |
| ori t8, t8, 0xff00 /* t8 = 0xff00ff00 (andi_factor) */ |
| |
| /* part where const_alpha != 255 */ |
| 1: |
| li t5, 0xff |
| subu t7, t5, a3 /* t7 = cia = 255 - const_alpha */ |
| replv.ph a3, a3 |
| 11: |
| lw t0, 0(a1) /* t0 = src 1 */ |
| lw t1, 4(a1) /* t1 = src 2 */ |
| addiu a2, -2 |
| |
| BYTE_MUL_x2 t0, t1, AT, s0, a3, a3, t9, t3, t4, t5, t6, 0 |
| |
| lw t0, 0(a0) /* t0 = dest 1 */ |
| lw t1, 4(a0) /* t1 = dest 2 */ |
| addiu a1, 8 |
| |
| srl t2, t0, 24 /* t2 = qAlpha(d) 1 */ |
| srl t3, t1, 24 /* t3 = qAlpha(d) 2 */ |
| |
| INTERPOLATE_PIXEL_255 AT, t2, t0, t7, s1, t9, t8, t4, t5, t6, s3 |
| INTERPOLATE_PIXEL_255 s0, t3, t1, t7, s2, t9, t8, t4, t5, t6, s3 |
| |
| sw s1, 0(a0) |
| sw s2, 4(a0) |
| bnez a2, 11b |
| addiu a0, 8 |
| b 3f |
| nop |
| |
| /* part where const_alpha = 255 */ |
| 2: |
| lw t2, 0(a0) /* dest 1 */ |
| lw t3, 4(a0) /* dest 2 */ |
| lw t0, 0(a1) /* src 1 */ |
| lw t1, 4(a1) /* src 2 */ |
| srl t4, t2, 24 |
| srl t5, t3, 24 |
| replv.ph t2, t4 |
| replv.ph t3, t5 |
| addiu a2, -2 |
| |
| BYTE_MUL_x2 t0, t1, t8, AT, t2, t3, t9, t4, t5, t6, t7 |
| |
| addiu a1, 8 |
| sw t8, 0(a0) |
| sw AT, 4(a0) |
| bnez a2, 2b |
| addiu a0, 8 |
| |
| 3: |
| lw s0, 0(sp) |
| lw s1, 4(sp) |
| lw s2, 8(sp) |
| lw s3, 12(sp) |
| addiu sp, 16 |
| jr ra |
| nop |
| .set at |
| |
| END(comp_func_SourceIn_dsp_asm_x2) |
| |
| LEAF_MIPS_DSP(comp_func_solid_DestinationIn_dsp_asm_x2) |
| /* |
| * a0 - uint *dest |
| * a1 - int length |
| * a2 - uint a |
| */ |
| |
| .set noat |
| beqz a1, 2f |
| nop |
| li t9, 8388736 /* t9 = 0x800080 (rounding_factor) */ |
| replv.ph a2, a2 |
| 1: |
| lw t0, 0(a0) |
| lw t1, 4(a0) |
| addiu a1, -2 |
| |
| BYTE_MUL_x2 t0, t1, t8, AT, a2, a2, t9, t4, t5, t6, t7, 0 |
| |
| sw t8, 0(a0) |
| sw AT, 4(a0) |
| bnez a1, 1b |
| addiu a0, 8 |
| 2: |
| jr ra |
| nop |
| .set at |
| |
| END(comp_func_solid_DestinationIn_dsp_asm_x2) |
| |
| LEAF_MIPS_DSP(comp_func_DestinationIn_dsp_asm_x2) |
| /* |
| * a0 - uint *dest |
| * a1 - const uint *src |
| * a2 - int length |
| * a3 - uint const_alpha |
| */ |
| |
| addiu sp, -8 |
| sw s0, 0(sp) |
| sw s1, 4(sp) |
| beqz a2, 3f |
| nop |
| li t9, 8388736 /* t9 = 0x800080 (rounding_factor) */ |
| li t0, 0xff |
| beq a3, t0, 2f |
| nop |
| |
| /* part where const_alpha != 255 */ |
| 1: |
| li t5, 0xff |
| subu t8, t5, a3 /* t8 = cia = 255 - const_alpha */ |
| replv.ph a3, a3 |
| 11: |
| lw t0, 0(a1) /* t0 = src 1 */ |
| lw t1, 4(a1) /* t1 = src 2 */ |
| addiu a2, -2 |
| srl t0, t0, 24 |
| srl t1, t1, 24 |
| |
| BYTE_MUL_x2 t0, t1, s1, t7, a3, a3, t9, t3, t4, t5, t6, 0 |
| |
| lw t0, 0(a0) /* t0 = dest 1 */ |
| lw t1, 4(a0) /* t1 = dest 2 */ |
| addu s1, s1, t8 /* a 1 */ |
| addu t7, t7, t8 /* a 2 */ |
| replv.ph t2, s1 |
| replv.ph t3, t7 |
| |
| BYTE_MUL_x2 t0, t1, s1, t7, t2, t3, t9, t4, t5, t6, s0 |
| |
| addiu a1, 8 |
| sw s1, 0(a0) |
| sw t7, 4(a0) |
| bnez a2, 11b |
| addiu a0, 8 |
| b 3f |
| nop |
| |
| /* part where const_alpha = 255 */ |
| 2: |
| lw t2, 0(a1) /* src 1 */ |
| lw t3, 4(a1) /* src 2 */ |
| lw t0, 0(a0) /* dest 1 */ |
| lw t1, 4(a0) /* dest 2 */ |
| srl t4, t2, 24 |
| srl t5, t3, 24 |
| replv.ph t2, t4 /* t2 = qAlpha(src 1) */ |
| replv.ph t3, t5 /* t3 = qAlpha(src 2) */ |
| addiu a2, -2 |
| |
| BYTE_MUL_x2 t0, t1, t8, s1, t2, t3, t9, t4, t5, t6, t7 |
| |
| addiu a1, 8 |
| sw t8, 0(a0) |
| sw s1, 4(a0) |
| bnez a2, 2b |
| addiu a0, 8 |
| |
| 3: |
| lw s0, 0(sp) |
| lw s1, 4(sp) |
| addiu sp, 8 |
| jr ra |
| nop |
| |
| END(comp_func_DestinationIn_dsp_asm_x2) |
| |
| LEAF_MIPS_DSP(comp_func_DestinationOut_dsp_asm_x2) |
| /* |
| * a0 - uint *dest |
| * a1 - const uint *src |
| * a2 - int length |
| * a3 - uint const_alpha |
| */ |
| |
| .set noat |
| addiu sp, -4 |
| sw s0, 0(sp) |
| beqz a2, 3f |
| nop |
| li t9, 8388736 /* t9 = 0x800080 (rounding_factor) */ |
| li t0, 0xff |
| beq a3, t0, 2f |
| nop |
| |
| /* part where const_alpha != 255 */ |
| 1: |
| li t5, 0xff |
| subu t8, t5, a3 /* t8 = cia = 255 - const_alpha */ |
| replv.ph a3, a3 |
| 11: |
| lw t0, 0(a1) /* t0 = src 1 */ |
| lw t1, 4(a1) /* t1 = src 2 */ |
| not t0, t0 |
| not t1, t1 |
| addiu a2, -2 |
| srl t0, t0, 24 |
| srl t1, t1, 24 |
| |
| BYTE_MUL_x2 t0, t1, AT, t7, a3, a3, t9, t3, t4, t5, t6, 0 |
| |
| lw t0, 0(a0) /* t0 = dest 1 */ |
| lw t1, 4(a0) /* t1 = dest 2 */ |
| addu AT, AT, t8 /* a 1 */ |
| addu t7, t7, t8 /* a 2 */ |
| replv.ph t2, AT |
| replv.ph t3, t7 |
| |
| BYTE_MUL_x2 t0, t1, AT, t7, t2, t3, t9, t4, t5, t6, s0 |
| |
| addiu a1, 8 |
| sw AT, 0(a0) |
| sw t7, 4(a0) |
| bnez a2, 11b |
| addiu a0, 8 |
| b 3f |
| nop |
| |
| /* part where const_alpha = 255 */ |
| 2: |
| lw t2, 0(a1) /* src 1 */ |
| lw t3, 4(a1) /* src 2 */ |
| not t2, t2 |
| not t3, t3 |
| lw t0, 0(a0) /* dest 1 */ |
| lw t1, 4(a0) /* dest 2 */ |
| srl t4, t2, 24 |
| srl t5, t3, 24 |
| replv.ph t2, t4 /* t2 = qAlpha(src 1) */ |
| replv.ph t3, t5 /* t3 = qAlpha(src 2) */ |
| addiu a2, -2 |
| |
| BYTE_MUL_x2 t0, t1, t8, AT, t2, t3, t9, t4, t5, t6, t7 |
| |
| addiu a1, 8 |
| sw t8, 0(a0) |
| sw AT, 4(a0) |
| bnez a2, 2b |
| addiu a0, 8 |
| |
| 3: |
| lw s0, 0(sp) |
| addiu sp, 4 |
| jr ra |
| nop |
| .set at |
| |
| END(comp_func_DestinationOut_dsp_asm_x2) |
| |
| LEAF_MIPS_DSP(comp_func_solid_SourceAtop_dsp_asm_x2) |
| /* |
| * a0 - uint *dest |
| * a1 - int length |
| * a2 - uint color |
| * a3 - uint sia |
| */ |
| |
| .set noat |
| addu sp, -4 |
| sw s0, 0(sp) |
| beqz a1, 2f |
| nop |
| li t9, 8388736 /* t9 = 0x800080 (rounding_factor) */ |
| lui t8, 0xff00 |
| ori t8, t8, 0xff00 /* t8 = 0xff00ff00 (andi_factor) */ |
| 1: |
| lw t0, 0(a0) /* t0 = dest 1 */ |
| lw t1, 4(a0) /* t1 = dest 2 */ |
| addiu a1, -2 |
| srl t2, t0, 24 /* t2 = qAlpha(dest 1) */ |
| srl t3, t1, 24 /* t3 = qAlpha(dest 2) */ |
| |
| INTERPOLATE_PIXEL_255 a2, t2, t0, a3, AT, t9, t8, t4, t5, t6, t7 |
| INTERPOLATE_PIXEL_255 a2, t3, t1, a3, s0, t9, t8, t4, t5, t6, t7 |
| |
| sw AT, 0(a0) |
| sw s0, 4(a0) |
| bnez a1, 1b |
| addiu a0, 8 |
| 2: |
| lw s0, 0(sp) |
| addiu sp, 4 |
| jr ra |
| nop |
| .set at |
| |
| END(comp_func_solid_SourceAtop_dsp_asm_x2) |
| |
| LEAF_MIPS_DSP(comp_func_SourceAtop_dsp_asm_x2) |
| /* |
| * a0 - uint *dest |
| * a1 - const uint *src |
| * a2 - int length |
| * a3 - uint const_alpha |
| */ |
| |
| .set noat |
| addiu sp, -20 |
| sw s0, 0(sp) |
| sw s1, 4(sp) |
| sw s2, 8(sp) |
| sw s3, 12(sp) |
| sw s4, 16(sp) |
| beqz a2, 3f |
| nop |
| li t9, 8388736 /* t9 = 0x800080 (rounding_factor) */ |
| lui t8, 0xff00 |
| li t0, 0xff |
| beq a3, t0, 2f |
| ori t8, t8, 0xff00 /* t8 = 0xff00ff00 (andi_factor) */ |
| |
| /* part where const_alpha != 255 */ |
| 1: |
| replv.ph a3, a3 |
| 11: |
| lw AT, 0(a1) /* src 1 */ |
| lw s0, 4(a1) /* src 2 */ |
| |
| BYTE_MUL_x2 AT, s0, t0, t1, a3, a3, t9, t3, t4, t5, t6, 0 |
| /* t0 = s */ |
| |
| lw t2, 0(a0) /* t2 = dest 1 */ |
| lw t3, 4(a0) /* t3 = dest 2 */ |
| |
| srl t4, t2, 24 /* t4 = qAplpha(dest 1) */ |
| srl t5, t3, 24 |
| not t6, t0 |
| not t7, t1 |
| srl t6, t6, 24 /* t6 = qAlpha(~s) */ |
| srl t7, t7, 24 |
| addiu a2, -2 |
| |
| INTERPOLATE_PIXEL_255 t0, t4, t2, t6, AT, t9, t8, s1, s2, s3, s4 |
| INTERPOLATE_PIXEL_255 t1, t5, t3, t7, s0, t9, t8, s1, s2, s3, s4 |
| |
| addiu a1, 8 |
| sw AT, 0(a0) |
| sw s0, 4(a0) |
| bnez a2, 11b |
| addiu a0, 8 |
| b 3f |
| nop |
| |
| /* part where const_alpha = 255 */ |
| 2: |
| lw t2, 0(a0) /* dest 1 */ |
| lw t3, 4(a0) /* dest 2 */ |
| lw t0, 0(a1) /* src 1 */ |
| lw t1, 4(a1) /* src 2 */ |
| srl t4, t2, 24 |
| srl t5, t3, 24 |
| not t6, t0 |
| not t7, t1 |
| srl t6, t6, 24 |
| srl t7, t7, 24 |
| addiu a2, -2 |
| |
| INTERPOLATE_PIXEL_255 t0, t4, t2, t6, AT, t9, t8, s1, s2, s3, s4 |
| INTERPOLATE_PIXEL_255 t1, t5, t3, t7, s0, t9, t8, s1, s2, s3, s4 |
| |
| addiu a1, 8 |
| sw AT, 0(a0) |
| sw s0, 4(a0) |
| bnez a2, 2b |
| addiu a0, 8 |
| |
| 3: |
| lw s0, 0(sp) |
| lw s1, 4(sp) |
| lw s2, 8(sp) |
| lw s3, 12(sp) |
| lw s4, 16(sp) |
| addiu sp, 20 |
| jr ra |
| nop |
| .set at |
| |
| END(comp_func_SourceAtop_dsp_asm_x2) |
| |
| LEAF_MIPS_DSP(comp_func_solid_DestinationAtop_dsp_asm_x2) |
| /* |
| * a0 - uint *dest |
| * a1 - int length |
| * a2 - uint color |
| * a3 - uint a |
| */ |
| |
| .set noat |
| addiu sp, -4 |
| sw s0, 0(sp) |
| beqz a1, 2f |
| nop |
| li t9, 8388736 /* t9 = 0x800080 (rounding_factor) */ |
| lui t8, 0xff00 |
| ori t8, t8, 0xff00 /* t8 = 0xff00ff00 (andi_factor) */ |
| 1: |
| lw t0, 0(a0) /* t0 = dest 1 */ |
| lw t1, 4(a0) /* t1 = dest 2 */ |
| addiu a1, -2 |
| not t2, t0 |
| not t3, t1 |
| srl t2, t2, 24 /* t2 = qAlpha(~(dest 1)) */ |
| srl t3, t3, 24 /* t3 = qAlpha(~(dest 2)) */ |
| |
| INTERPOLATE_PIXEL_255 t0, a3, a2, t2, AT, t9, t8, t4, t5, t6, t7 |
| INTERPOLATE_PIXEL_255 t1, a3, a2, t3, s0, t9, t8, t4, t5, t6, t7 |
| |
| sw AT, 0(a0) |
| sw s0, 4(a0) |
| bnez a1, 1b |
| addiu a0, 8 |
| 2: |
| lw s0, 0(sp) |
| addiu sp, 4 |
| jr ra |
| nop |
| .set at |
| |
| END(comp_func_solid_DestinationAtop_dsp_asm_x2) |
| |
| LEAF_MIPS_DSP(comp_func_DestinationAtop_dsp_asm_x2) |
| /* |
| * a0 - uint *dest |
| * a1 - const uint *src |
| * a2 - int length |
| * a3 - uint const_alpha |
| */ |
| |
| .set noat |
| addiu sp, -24 |
| sw s0, 0(sp) |
| sw s1, 4(sp) |
| sw s2, 8(sp) |
| sw s3, 12(sp) |
| sw s4, 16(sp) |
| sw s5, 20(sp) |
| beqz a2, 3f |
| nop |
| li t9, 8388736 /* t9 = 0x800080 (rounding_factor) */ |
| lui t8, 0xff00 |
| li t0, 0xff |
| beq a3, t0, 2f |
| ori t8, t8, 0xff00 /* t8 = 0xff00ff00 (andi_factor) */ |
| |
| /* part where const_alpha != 255 */ |
| 1: |
| li s5, 0xff |
| subu s5, s5, a3 /* s5 = cia = 255 - const_alpha */ |
| replv.ph a3, a3 |
| 11: |
| lw AT, 0(a1) /* src 1 */ |
| lw s0, 4(a1) /* src 2 */ |
| |
| BYTE_MUL_x2 AT, s0, t0, t1, a3, a3, t9, t3, t4, t5, t6, 0 |
| /* t0 = s */ |
| |
| lw t2, 0(a0) /* t2 = dest 1 */ |
| lw t3, 4(a0) /* t3 = dest 2 */ |
| |
| not t4, t2 |
| not t5, t3 |
| srl t4, t4, 24 /* t4 = qAplpha(~(dest 1)) */ |
| srl t5, t5, 24 |
| srl t6, t0, 24 |
| srl t7, t1, 24 |
| addu t6, t6, s5 /* t6 = a = qAlpha(s1) + cia */ |
| addu t7, t7, s5 |
| addiu a2, -2 |
| |
| INTERPOLATE_PIXEL_255 t2, t6, t0, t4, AT, t9, t8, s1, s2, s3, s4 |
| INTERPOLATE_PIXEL_255 t3, t7, t1, t5, s0, t9, t8, s1, s2, s3, s4 |
| |
| addiu a1, 8 |
| sw AT, 0(a0) |
| sw s0, 4(a0) |
| bnez a2, 11b |
| addiu a0, 8 |
| b 3f |
| nop |
| |
| /* part where const_alpha = 255 */ |
| 2: |
| lw t2, 0(a0) /* d1 */ |
| lw t3, 4(a0) /* d2 */ |
| lw t0, 0(a1) /* s1 */ |
| lw t1, 4(a1) /* s2 */ |
| srl t4, t0, 24 /* t4 = qAlpha(s1) */ |
| srl t5, t1, 24 |
| not t6, t2 |
| not t7, t3 |
| srl t6, t6, 24 /* qAlpha(~d1) */ |
| srl t7, t7, 24 |
| addiu a2, -2 |
| |
| INTERPOLATE_PIXEL_255 t2, t4, t0, t6, AT, t9, t8, s1, s2, s3, s4 |
| INTERPOLATE_PIXEL_255 t3, t5, t1, t7, s0, t9, t8, s1, s2, s3, s4 |
| |
| addiu a1, 8 |
| sw AT, 0(a0) |
| sw s0, 4(a0) |
| bnez a2, 2b |
| addiu a0, 8 |
| |
| 3: |
| lw s0, 0(sp) |
| lw s1, 4(sp) |
| lw s2, 8(sp) |
| lw s3, 12(sp) |
| lw s4, 16(sp) |
| lw s5, 20(sp) |
| addiu sp, 24 |
| jr ra |
| nop |
| .set at |
| |
| END(comp_func_DestinationAtop_dsp_asm_x2) |
| |
| LEAF_MIPS_DSP(comp_func_solid_XOR_dsp_asm_x2) |
| /* |
| * a0 - uint *dest |
| * a1 - int length |
| * a2 - uint color |
| * a3 - uint sia |
| */ |
| |
| .set noat |
| addu sp, -4 |
| sw s0, 0(sp) |
| beqz a1, 2f |
| nop |
| li t9, 8388736 /* t9 = 0x800080 (rounding_factor) */ |
| lui t8, 0xff00 |
| ori t8, t8, 0xff00 /* t8 = 0xff00ff00 (andi_factor) */ |
| 1: |
| lw t0, 0(a0) /* t0 = dest 1 */ |
| lw t1, 4(a0) /* t1 = dest 2 */ |
| addiu a1, -2 |
| not t2, t0 |
| not t3, t1 |
| srl t2, t2, 24 /* t2 = qAlpha(~(dest 1)) */ |
| srl t3, t3, 24 /* t3 = qAlpha(~(dest 2)) */ |
| |
| INTERPOLATE_PIXEL_255 a2, t2, t0, a3, AT, t9, t8, t4, t5, t6, t7 |
| INTERPOLATE_PIXEL_255 a2, t3, t1, a3, s0, t9, t8, t4, t5, t6, t7 |
| |
| sw AT, 0(a0) |
| sw s0, 4(a0) |
| bnez a1, 1b |
| addiu a0, 8 |
| 2: |
| lw s0, 0(sp) |
| addu sp, 4 |
| jr ra |
| nop |
| .set at |
| |
| END(comp_func_solid_XOR_dsp_asm_x2) |
| |
| LEAF_MIPS_DSP(comp_func_XOR_dsp_asm_x2) |
| /* |
| * a0 - uint *dest |
| * a1 - const uint *src |
| * a2 - int length |
| * a3 - uint const_alpha |
| */ |
| |
| .set noat |
| addiu sp, -20 |
| sw s0, 0(sp) |
| sw s1, 4(sp) |
| sw s2, 8(sp) |
| sw s3, 12(sp) |
| sw s4, 16(sp) |
| beqz a2, 3f |
| nop |
| li t9, 8388736 /* t9 = 0x800080 (rounding_factor) */ |
| lui t8, 0xff00 |
| li t0, 0xff |
| beq a3, t0, 2f |
| ori t8, t8, 0xff00 /* t8 = 0xff00ff00 (andi_factor) */ |
| |
| /* part where const_alpha != 255 */ |
| 1: |
| replv.ph a3, a3 |
| 11: |
| lw AT, 0(a1) /* src 1 */ |
| lw s0, 4(a1) /* src 2 */ |
| |
| BYTE_MUL_x2 AT, s0, t0, t1, a3, a3, t9, t3, t4, t5, t6, 0 |
| /* t0 = s1 */ |
| /* t1 = s2 */ |
| |
| lw t2, 0(a0) /* t2 = dest 1 */ |
| lw t3, 4(a0) /* t3 = dest 2 */ |
| |
| not t4, t2 |
| not t5, t3 |
| srl t4, t4, 24 /* t4 = qAplpha(~(dest 1)) */ |
| srl t5, t5, 24 |
| not t6, t0 |
| not t7, t1 |
| srl t6, t6, 24 /* t6 = qAlpha(~s) */ |
| srl t7, t7, 24 |
| addiu a2, -2 |
| |
| INTERPOLATE_PIXEL_255 t0, t4, t2, t6, AT, t9, t8, s1, s2, s3, s4 |
| INTERPOLATE_PIXEL_255 t1, t5, t3, t7, s0, t9, t8, s1, s2, s3, s4 |
| |
| addiu a1, 8 |
| sw AT, 0(a0) |
| sw s0, 4(a0) |
| bnez a2, 11b |
| addiu a0, 8 |
| b 3f |
| nop |
| |
| /* part where const_alpha = 255 */ |
| 2: |
| lw t2, 0(a0) /* d1 */ |
| lw t3, 4(a0) /* d2 */ |
| lw t0, 0(a1) /* s1 */ |
| lw t1, 4(a1) /* s2 */ |
| not t4, t0 |
| not t5, t1 |
| srl t4, t4, 24 /* t4 = qAlpha(~s1) */ |
| srl t5, t5, 24 |
| not t6, t2 |
| not t7, t3 |
| srl t6, t6, 24 /* qAlpha(~d1) */ |
| srl t7, t7, 24 |
| addiu a2, -2 |
| |
| INTERPOLATE_PIXEL_255 t0, t6, t2, t4, AT, t9, t8, s1, s2, s3, s4 |
| INTERPOLATE_PIXEL_255 t1, t7, t3, t5, s0, t9, t8, s1, s2, s3, s4 |
| |
| addiu a1, 8 |
| sw AT, 0(a0) |
| sw s0, 4(a0) |
| bnez a2, 2b |
| addiu a0, 8 |
| |
| 3: |
| lw s0, 0(sp) |
| lw s1, 4(sp) |
| lw s2, 8(sp) |
| lw s3, 12(sp) |
| lw s4, 16(sp) |
| addiu sp, 20 |
| jr ra |
| nop |
| .set at |
| |
| END(comp_func_XOR_dsp_asm_x2) |
| |
| LEAF_MIPS_DSP(comp_func_solid_SourceOut_dsp_asm_x2) |
| /* |
| * a0 - uint *dest |
| * a1 - int length |
| * a2 - uint color |
| * a3 - uint const_alpha |
| */ |
| |
| .set noat |
| addiu sp, -12 |
| sw s0, 0(sp) |
| sw s1, 4(sp) |
| sw s2, 8(sp) |
| beqz a1, 3f |
| nop |
| li t9, 8388736 /* t9 = 0x800080 (rounding_factor) */ |
| lui t8, 0xff00 |
| li t0, 0xff |
| beq a3, t0, 2f |
| ori t8, t8, 0xff00 /* t8 = 0xff00ff00 (andi_factor) */ |
| |
| /* part where const_alpha != 255 */ |
| 1: |
| replv.ph t0, a3 |
| li t5, 0xff |
| BYTE_MUL a2, a2, t0, t9, t1, t2, t3, t4 /* a2 = color ( = BYTE_MUL(color, const_alpha)); */ |
| subu t1, t5, a3 /* t1 = cia = 255 - const_alpha */ |
| 11: |
| lw t2, 0(a0) /* t2 = d1 */ |
| lw s0, 4(a0) /* s0 = d2 */ |
| addiu a1, -2 |
| not t3, t2 |
| not s2, s0 |
| srl t3, t3, 24 /* t3 = qAlpha(~d1) */ |
| srl s2, s2, 24 /* s2 = qAlpha(~d2) */ |
| |
| INTERPOLATE_PIXEL_255 a2, t3, t2, t1, AT, t9, t8, t4, t5, t6, t7 |
| INTERPOLATE_PIXEL_255 a2, s2, s0, t1, s1, t9, t8, t4, t5, t6, t7 |
| |
| sw AT, 0(a0) |
| sw s1, 4(a0) |
| bnez a1, 11b |
| addiu a0, 8 |
| b 3f |
| nop |
| |
| /* part where const_alpha = 255 */ |
| 2: |
| lw t0, 0(a0) /* dest 1 */ |
| lw t1, 4(a0) /* dest 2 */ |
| not t4, t0 |
| not t5, t1 |
| srl t4, t4, 24 |
| srl t5, t5, 24 |
| replv.ph t2, t4 |
| replv.ph t3, t5 |
| addiu a1, -2 |
| |
| BYTE_MUL_x2 a2, a2, t8, AT, t2, t3, t9, t4, t5, t6, t7 |
| |
| sw t8, 0(a0) |
| sw AT, 4(a0) |
| bnez a1, 2b |
| addiu a0, 8 |
| |
| 3: |
| lw s0, 0(sp) |
| lw s1, 4(sp) |
| lw s2, 8(sp) |
| addiu sp, 12 |
| jr ra |
| nop |
| .set at |
| |
| END(comp_func_solid_SourceOut_dsp_asm_x2) |
| |
| LEAF_MIPS_DSP(comp_func_SourceOut_dsp_asm_x2) |
| /* |
| * a0 - uint *dest |
| * a1 - const uint *src |
| * a2 - int length |
| * a3 - uint const_alpha |
| */ |
| |
| .set noat |
| addiu sp, -16 |
| sw s0, 0(sp) |
| sw s1, 4(sp) |
| sw s2, 8(sp) |
| sw s3, 12(sp) |
| beqz a2, 3f |
| nop |
| li t9, 8388736 /* t9 = 0x800080 (rounding_factor) */ |
| lui t8, 0xff00 |
| li t0, 0xff |
| beq a3, t0, 2f |
| ori t8, t8, 0xff00 /* t8 = 0xff00ff00 (andi_factor) */ |
| |
| /* part where const_alpha != 255 */ |
| 1: |
| li t5, 0xff |
| subu t7, t5, a3 /* t7 = cia = 255 - const_alpha */ |
| replv.ph a3, a3 |
| 11: |
| lw t0, 0(a1) /* t0 = src 1 */ |
| lw t1, 4(a1) /* t1 = src 2 */ |
| addiu a2, -2 |
| |
| BYTE_MUL_x2 t0, t1, AT, s0, a3, a3, t9, t3, t4, t5, t6, 0 |
| |
| lw t0, 0(a0) /* t0 = dest 1 */ |
| lw t1, 4(a0) /* t1 = dest 2 */ |
| addiu a1, 8 |
| |
| not t2, t0 |
| not t3, t1 |
| srl t2, t2, 24 /* t2 = qAlpha(~d1) */ |
| srl t3, t3, 24 /* t3 = qAlpha(~d2) */ |
| |
| INTERPOLATE_PIXEL_255 AT, t2, t0, t7, s1, t9, t8, t4, t5, t6, s3 |
| INTERPOLATE_PIXEL_255 s0, t3, t1, t7, s2, t9, t8, t4, t5, t6, s3 |
| |
| sw s1, 0(a0) |
| sw s2, 4(a0) |
| bnez a2, 11b |
| addiu a0, 8 |
| b 3f |
| nop |
| |
| /* part where const_alpha = 255 */ |
| 2: |
| lw t2, 0(a0) /* dest 1 */ |
| lw t3, 4(a0) /* dest 2 */ |
| lw t0, 0(a1) /* src 1 */ |
| lw t1, 4(a1) /* src 2 */ |
| not t4, t2 |
| not t5, t3 |
| srl t4, t4, 24 /* qAlpha(~d1) */ |
| srl t5, t5, 24 /* qAlpha(~d2) */ |
| replv.ph t2, t4 |
| replv.ph t3, t5 |
| addiu a2, -2 |
| |
| BYTE_MUL_x2 t0, t1, t8, AT, t2, t3, t9, t4, t5, t6, t7 |
| |
| addiu a1, 8 |
| sw t8, 0(a0) |
| sw AT, 4(a0) |
| bnez a2, 2b |
| addiu a0, 8 |
| |
| 3: |
| lw s0, 0(sp) |
| lw s1, 4(sp) |
| lw s2, 8(sp) |
| lw s3, 12(sp) |
| addiu sp, 16 |
| jr ra |
| nop |
| .set at |
| |
| END(comp_func_SourceOut_dsp_asm_x2) |
| |
| LEAF_MIPS_DSP(comp_func_Source_dsp_asm_x2) |
| /* |
| * a0 - uint *dest |
| * a1 - const uint *src |
| * a2 - int length |
| * a3 - uint const_alpha |
| */ |
| |
| .set noat |
| addiu sp, -8 |
| sw s0, 0(sp) |
| sw s1, 4(sp) |
| beqz a2, 2f |
| nop |
| li t9, 8388736 /* t9 = 0x800080 (rounding_factor) */ |
| lui t8, 0xff00 |
| ori t8, t8, 0xff00 /* t8 = 0xff00ff00 (andi_factor) */ |
| li t7, 0xff |
| subu t7, t7, a3 /* t7 = ialpha */ |
| 1: |
| lw t0, 0(a0) /* t0 = dest 1 */ |
| lw t1, 4(a0) /* t1 = dest 2 */ |
| lw t2, 0(a1) /* t2 = src 1 */ |
| lw t3, 4(a1) /* t3 = src 2 */ |
| addiu a2, -2 |
| addiu a1, 8 |
| |
| INTERPOLATE_PIXEL_255 t2, a3, t0, t7, AT, t9, t8, t4, t5, t6, s1 |
| INTERPOLATE_PIXEL_255 t3, a3, t1, t7, s0, t9, t8, t4, t5, t6, s1 |
| |
| sw AT, 0(a0) |
| sw s0, 4(a0) |
| bnez a2, 1b |
| addiu a0, 8 |
| 2: |
| lw s0, 0(sp) |
| lw s1, 4(sp) |
| addiu sp, 8 |
| jr ra |
| nop |
| .set at |
| |
| END(comp_func_Source_dsp_asm_x2) |
| |
| LEAF_MIPS_DSP(qt_blend_argb32_on_argb32_mips_dsp_asm_x2) |
| /* |
| * a0 - uint *dest |
| * a1 - const uint *src |
| * a2 - int length |
| * a3 - uint const_alpha |
| */ |
| |
| .set noat |
| addiu sp, -12 |
| sw s0, 0(sp) |
| sw s1, 4(sp) |
| sw s2, 8(sp) |
| beqz a2, 2f |
| nop |
| replv.ph a3, a3 |
| li t9, 8388736 /* t9 = 0x800080 (rounding_factor) */ |
| |
| 1: |
| lw t0, 0(a1) /* t0 = src 1 */ |
| lw t1, 4(a1) /* t1 = src 2 */ |
| addiu a2, -2 |
| |
| BYTE_MUL_x2 t0, t1, AT, t7, a3, a3, t9, t3, t4, t5, t6, 0 |
| |
| lw t0, 0(a0) /* t0 = dest 1 */ |
| lw t1, 4(a0) /* t1 = dest 2 */ |
| not s1, AT |
| not s2, t7 |
| srl s1, s1, 24 /* s1 = qAlpha(~s1) */ |
| srl s2, s2, 24 /* s2 = qAlpha(~s2) */ |
| replv.ph s1, s1 |
| replv.ph s2, s2 |
| |
| BYTE_MUL_x2 t0, t1, t2, t3, s1, s2, t9, t4, t5, t6, s0 |
| |
| addiu a1, 8 |
| addu AT, AT, t2 |
| addu t7, t7, t3 |
| sw AT, 0(a0) |
| sw t7, 4(a0) |
| bnez a2, 1b |
| addiu a0, 8 |
| |
| 2: |
| lw s0, 0(sp) |
| lw s1, 4(sp) |
| lw s2, 8(sp) |
| addiu sp, 12 |
| jr ra |
| nop |
| .set at |
| |
| END(qt_blend_argb32_on_argb32_mips_dsp_asm_x2) |
| |
| LEAF_MIPS_DSP(qt_blend_argb32_on_argb32_const_alpha_256_mips_dsp_asm) |
| /* |
| * a0 - uint *dest |
| * a1 - const uint *src |
| * a2 - int length |
| */ |
| |
| beqz a2, 5f |
| nop |
| li t7, 8388736 /* t7 = 0x800080 */ |
| b 2f |
| nop |
| 1: |
| addiu a0, a0, 4 |
| addiu a2, a2, -1 |
| beqz a2, 5f |
| nop |
| 2: |
| lw t0, 0(a1) /* t0 = s = src[i] */ |
| addiu a1, a1, 4 |
| nor t1, t0, zero |
| srl t1, t1, 24 /* t1 = ~qAlpha(s) */ |
| bnez t1, 3f |
| nop |
| sw t0, 0(a0) /* dst[i] = src[i] */ |
| addiu a2, a2, -1 |
| bnez a2, 2b |
| addiu a0, a0, 4 |
| b 5f |
| nop |
| 3: |
| beqz t0, 1b |
| replv.ph t6, t1 /* | 0 | qAlpha(~s) | 0 | qAlpha(~s) | */ |
| |
| lw t4, 0(a0) |
| addiu a2, a2, -1 |
| beqz t4, 31f |
| move t8, zero |
| |
| BYTE_MUL t4, t8, t6, t7, t1, t2, t3, t4 |
| 31: |
| addu t8, t0, t8 /* dst[i] = |
| * s + BYTE_MUL(dst[i],~qAlpha(s)) */ |
| sw t8, 0(a0) |
| bnez a2, 2b |
| addiu a0, a0, 4 |
| b 5f |
| nop |
| 5: |
| jr ra |
| nop |
| |
| END(qt_blend_argb32_on_argb32_const_alpha_256_mips_dsp_asm) |
| |
| |
| #if defined(__MIPSEL) && __MIPSEL |
| # define PACK(r, s, t) packrl.ph r, s, t |
| # define SWHI(r, o, b) swl r, o + 1 (b) |
| # define SWLO(r, o, b) swr r, o + 0 (b) |
| # define LDHI(r, o, b) lwl r, o + 1 (b) |
| # define LDLO(r, o, b) lwr r, o + 2 (b) |
| #else |
| # define PACK(r, s, t) packrl.ph r, t, s |
| # define SWHI(r, o, b) swr r, o + 1 (b) |
| # define SWLO(r, o, b) swl r, o + 0 (b) |
| # define LDHI(r, o, b) lwr r, o + 1 (b) |
| # define LDLO(r, o, b) lwl r, o + 2 (b) |
| #endif |
| |
| LEAF_MIPS_DSP(qt_blend_rgb16_on_rgb16_const_alpha_256_mips_dsp_asm) |
| /* |
| * a0 - dst (*r5g6b5) |
| * a1 - src (const *r5g6b5) |
| * a2 - len (unsigned int) |
| * |
| * Register usage: |
| * t0-3 - Scratch registers |
| * t4 - Number of iterations to do in unrolled loops |
| * t5-7 - Auxiliary scratch registers. |
| * |
| * Check if base addresses of src/dst are aligned, cases: |
| * a) Both aligned. |
| * b) Both unaligned: |
| * 1. Copy a halfword |
| * 2. Use aligned case. |
| * c) dst aligned, src unaligned: |
| * 1. Read a word from dst, halfword from src. |
| * 2. Continue reading words from both. |
| * d) dst unaligned, src aligned: |
| * 1. Read a word from src, halfword from dst. |
| * 2. Continue reading words from both. |
| */ |
| |
| beqz a2, 0f /* if (a2:len == 0): return */ |
| andi t0, a0, 0x3 /* t0 = a0:dst % 4 */ |
| andi t1, a1, 0x3 /* t1 = a1:dst % 4 */ |
| or t2, t0, t1 /* t1 = t0 | t1 */ |
| |
| beqz t2, 4f /* both aligned */ |
| nop |
| beqz t0, 3f /* dst aligned, src unaligned */ |
| nop |
| beqz t1, 2f /* src aligned, dst unaligned */ |
| nop |
| |
| /* |
| * Both src/dst are unaligned: read 1 halfword from each, |
| * the fall-off to continue with word-aligned copy. |
| */ |
| lhu t0, 0 (a1) /* t0 <- ((uint16_t*) src)[0] */ |
| addiu a1, a1, 2 /* src++ */ |
| addiu a2, a2,-1 /* len-- */ |
| sh t0, 0 (a0) /* t1 -> ((uint16_t*) dst)[0] */ |
| addiu a0, a0, 2 /* dst++ */ |
| |
| /* |
| * Both src/dst pointers are word-aligned, process eight |
| * items at a time in an unrolled loop. |
| */ |
| 4: beqz a2, 0f /* if (len == 0): return */ |
| srl t4, a2, 3 /* t4 = len / 8 */ |
| |
| beqz t4, 5f /* if (t4 == 0): tail */ |
| andi a2, a2, 0x07 /* len = len % 8 */ |
| |
| 1: lw t0, 0 (a1) |
| lw t1, 4 (a1) |
| lw t2, 8 (a1) |
| lw t3, 12 (a1) |
| |
| addiu t4, t4, -1 /* t4-- */ |
| addiu a1, a1, 16 /* src += 8 */ |
| |
| sw t0, 0 (a0) |
| sw t1, 4 (a0) |
| sw t2, 8 (a0) |
| sw t3, 12 (a0) |
| |
| bnez t4, 1b |
| addiu a0, a0, 16 /* dst += 8 */ |
| |
| b 5f |
| nop |
| |
| |
| /* |
| * dst pointer is unaligned |
| */ |
| 2: beqz a2, 0f /* if (len == 0): return */ |
| srl t4, a2, 3 /* t4 = len / 8 */ |
| beqz t4, 5f /* if (t4 == 0): tail */ |
| andi a2, a2, 0x07 /* len = len % 8 */ |
| |
| 1: lw t0, 0 (a1) |
| lw t1, 4 (a1) |
| lw t2, 8 (a1) |
| lw t3, 12 (a1) |
| |
| addiu t4, t4, -1 /* t4-- */ |
| addiu a1, a1, 16 /* src += 8 */ |
| |
| SWLO (t0, 0, a0) |
| PACK (t5, t1, t0) |
| PACK (t6, t2, t1) |
| PACK (t7, t3, t2) |
| SWHI (t3, 14, a0) |
| sw t5, 2 (a0) |
| sw t6, 6 (a0) |
| sw t7, 10 (a0) |
| |
| bnez t4, 1b |
| addiu a0, a0, 16 /* dst += 8 */ |
| |
| b 5f |
| nop |
| |
| /* |
| * src pointer is unaligned |
| */ |
| 3: beqz a2, 0f /* if (len == 0): return */ |
| srl t4, a2, 3 /* t4 = len / 8 */ |
| beqz t4, 5f /* if (t4 == 0): tail */ |
| andi a2, a2, 0x07 /* len = len % 8 */ |
| |
| 1: LDHI (t0, 0, a1) |
| lw t1, 2 (a1) |
| lw t2, 6 (a1) |
| lw t3, 10 (a1) |
| LDLO (t5, 12, a1) |
| |
| addiu t4, t4, -1 /* t4-- */ |
| addiu a1, a1, 16 /* src += 8 */ |
| |
| PACK (t0, t1, t0) |
| PACK (t6, t2, t1) |
| PACK (t7, t3, t2) |
| sw t0, 0 (a0) |
| PACK (t0, t5, t3) |
| sw t6, 4 (a0) |
| sw t7, 8 (a0) |
| sw t0, 12 (a0) |
| |
| bnez t4, 1b |
| addiu a0, a0, 16 /* dst += 8 */ |
| |
| |
| 5: /* Process remaining items (a2:len < 4), one at a time */ |
| beqz a2, 0f |
| nop |
| |
| 1: lhu t0, 0 (a1) /* t0 <- ((uint16_t*) src)[0] */ |
| addiu a2, a2,-1 /* len-- */ |
| addiu a1, a1, 2 /* src++ */ |
| sh t0, 0 (a0) /* to -> ((uint16_t*) dst)[0] */ |
| bnez a2, 1b /* if (len != 0): loop */ |
| addiu a0, a0, 2 /* dst++ */ |
| |
| 0: jr ra |
| nop |
| |
| END(qt_blend_rgb16_on_rgb16_const_alpha_256_mips_dsp_asm) |
| |
| |
| #undef LDHI |
| #undef LDLO |
| #undef PACK |
| #undef SWHI |
| #undef SWLO |
| |
| |
| LEAF_MIPS_DSP(qt_blend_rgb16_on_rgb16_mips_dsp_asm) |
| /* |
| * a0 - dst (*r5g6b5) |
| * a1 - src (const *r5g6b5) |
| * a2 - len (unsigned int) - batch length |
| * a3 - alpha (int) |
| */ |
| |
| beqz a2, 2f |
| li t9, 255 |
| sll t8, a3, 8 |
| subu a3, t8, a3 |
| srl a3, a3, 8 |
| subu t9, t9, a3 |
| addiu a3, a3, 1 |
| srl t4, a3, 2 |
| addiu t9, t9, 1 |
| srl t5, t9, 2 |
| 1: |
| lhu t0, 0(a1) |
| lhu t1, 0(a0) |
| addiu a2, a2, -1 |
| andi t2, t0, 0x07e0 |
| andi t0, t0, 0xf81f |
| mul t2, t2, a3 |
| mul t0, t0, t4 |
| andi t3, t1, 0x07e0 |
| andi t1, t1, 0xf81f |
| mul t3, t3, t9 |
| mul t1, t1, t5 |
| addiu a1, a1, 2 |
| srl t2, t2, 8 |
| srl t0, t0, 6 |
| andi t2, t2, 0x07e0 |
| andi t0, t0, 0xf81f |
| or t0, t0, t2 |
| srl t3, t3, 8 |
| srl t1, t1, 6 |
| andi t3, t3, 0x07e0 |
| andi t1, t1, 0xf81f |
| or t1, t1, t3 |
| addu t0, t0, t1 |
| sh t0, 0(a0) |
| bgtz a2, 1b |
| addiu a0, a0, 2 |
| 2: |
| jr ra |
| nop |
| |
| END(qt_blend_rgb16_on_rgb16_mips_dsp_asm) |
| |
| |
| LEAF_MIPS_DSP(fetchUntransformed_888_asm_mips_dsp) |
| /* |
| * a0 - dst address (address of 32-bit aRGB value) |
| * a1 - src address |
| * a2 - length |
| */ |
| |
| beqz a2, 4f |
| lui t8, 0xff00 |
| andi t0, a2, 0x1 |
| beqz t0, 1f |
| nop |
| /* case for one pixel */ |
| lbu t1, 0(a1) |
| lbu v1, 2(a1) |
| lbu t0, 1(a1) |
| addiu a1, a1, 3 |
| addiu a2, a2, -1 |
| sll t1, t1, 0x10 |
| or v1, v1, t8 |
| sll t0, t0, 0x8 |
| or v1, v1, t1 |
| or v1, v1, t0 |
| sw v1, 0(a0) |
| addiu a0, a0, 4 |
| |
| beqz a2, 4f /* only one pixel is present (length = 1) */ |
| nop |
| 1: |
| andi t0, a1, 0x1 |
| beqz t0, 3f |
| nop |
| 2: |
| lbu t0, 0(a1) /* t0 = | 0 | 0 | 0 | R1 | */ |
| lhu t1, 1(a1) /* t1 = | 0 | 0 | B1 | G1 | */ |
| addiu a1, a1, 3 |
| lhu t2, 0(a1) /* t2 = | 0 | 0 | G2 | R2 | */ |
| lbu t3, 2(a1) /* t3 = | 0 | 0 | 0 | B2 | */ |
| |
| sll t0, t0, 16 |
| or t0, t0, t8 /* t0 = | ff | R1 | 0 | 0 | */ |
| shll.ph t4, t1, 8 /* t4 = | 0 | 0 | G1 | 0 | */ |
| srl t5, t1, 8 |
| or t4, t4, t5 /* t4 = | 0 | 0 | G1 | B1 | */ |
| or t0, t0, t4 /* t0 = | ff | R1 | G1 | B1 | */ |
| |
| shll.ph t4, t2, 8 /* t4 = | 0 | 0 | R2 | 0 | */ |
| srl t5, t2, 8 /* t5 = | 0 | 0 | 0 | G2 | */ |
| or t4, t4, t5 |
| sll t4, t4, 8 /* t4 = | 0 | R2 | G2 | 0 | */ |
| or t5, t3, t8 |
| or t2, t4, t5 /* t2 = | ff | R2 | G2 | B2 | */ |
| |
| sw t0, 0(a0) |
| addiu a1, a1, 3 |
| sw t2, 4(a0) |
| addiu a2, a2, -2 |
| bnez a2, 2b |
| addiu a0, a0, 8 |
| b 4f |
| nop |
| 3: |
| lhu t0, 0(a1) /* t0 = | 0 | 0 | G1 | R1 | */ |
| lbu t1, 2(a1) /* t1 = | 0 | 0 | 0 | B1 | */ |
| addiu a1, a1, 3 |
| lbu t2, 0(a1) /* t2 = | 0 | 0 | 0 | R2 | */ |
| lhu t3, 1(a1) /* t3 = | 0 | 0 | B2 | G2 | */ |
| |
| srl t4, t0, 8 /* t4 = | 0 | 0 | 0 | G1 | */ |
| shll.ph t5, t0, 8 /* t5 = | 0 | 0 | R1 | 0 | */ |
| or t0, t4, t5 |
| sll t6, t0, 8 /* t6 = | 0 | R1 | G1 | 0 | */ |
| or t4, t1, t8 /* t4 = | ff | 0 | 0 | B1 | */ |
| or t0, t6, t4 |
| |
| sll t2, t2, 16 |
| srl t4, t3, 8 |
| shll.ph t5, t3, 8 |
| or t3, t4, t5 |
| or t2, t2, t3 |
| or t2, t2, t8 |
| |
| sw t0, 0(a0) |
| addiu a1, a1, 3 |
| sw t2, 4(a0) |
| addiu a2, a2, -2 |
| bnez a2, 3b |
| addiu a0, a0, 8 |
| 4: |
| jr ra |
| nop |
| |
| END(fetchUntransformed_888_asm_mips_dsp) |
| |
| |
| LEAF_MIPS_DSP(fetchUntransformed_444_asm_mips_dsp) |
| /* |
| * a0 - dst address (address of 32-bit aRGB value) |
| * a1 - src address |
| * a2 - length |
| */ |
| |
| lui t8, 0xff00 |
| li t4, 0x1 |
| |
| beqz a2, 5f |
| move v0, a0 /* just return the address of buffer |
| * for storing returning values */ |
| andi t0, a2, 0x1 |
| beqz t0, 2f /* there is more then one pixel |
| * (check src memory alignment (word)) */ |
| nop |
| 1: |
| lhu v0, 0(a1) |
| addiu a1, a1, 2 |
| addiu a2, a2, -1 |
| andi t0, v0, 0xf00 |
| andi v1, v0, 0xf |
| andi v0, v0, 0xf0 |
| sra t3, t0, 0x4 |
| sra t1, v0, 0x4 |
| sra t0, t0, 0x8 |
| sll t2, v1, 0x4 |
| or t0, t0, t3 |
| or v0, t1, v0 |
| lui t1, 0xff00 |
| or v1, t2, v1 |
| sll t0, t0, 0x10 |
| or v1, v1, t1 |
| sll v0, v0, 0x8 |
| or v1, v1, t0 |
| or v0, v1, v0 |
| sw v0, 0(a0) |
| addiu a0, a0, 4 |
| beqz a2, 5f /* no more pixels for processing */ |
| nop |
| beq a2, t4, 4f /* only one more pixel remained */ |
| nop |
| /* check if src memory address is word aligned */ |
| 2: |
| andi t0, a1, 0x3 |
| beqz t0, 3f /* memory is word aligned */ |
| andi a3, a2, 0x1 /* set the a3 register as the comparation |
| * for ending the unrolled loop |
| * (1 if odd, 0 if even) */ |
| b 1b /* not word aligned, |
| * go another turn with |
| * just one pixel processing */ |
| nop |
| 3: |
| lw t0, 0(a1) |
| addiu a2, a2, -2 |
| preceu.ph.qbr t1, t0 /* t1 = | 0 | aR1 | 0 | G1B1 | */ |
| preceu.ph.qbl t2, t0 /* t1 = | 0 | aR2 | 0 | G2B2 | */ |
| shll.qb t3, t1, 4 /* t3 = | 0 | R1 0 | 0 | B1 0 | */ |
| srl t4, t3, 4 |
| or t0, t3, t4 /* t0 = | 0 | R1R1 | 0 | B1B1 | */ |
| andi t3, t1, 0xf0 |
| sll t3, t3, 8 |
| srl t4, t3, 4 |
| or t1, t3, t4 |
| or t0, t0, t1 /* t0 = | 0 | R1R1 | G1G1 | B1B1 | */ |
| or t0, t0, t8 /* t0 = | ff | R1R1 | G1G1 | B1B1 | */ |
| |
| shll.qb t3, t2, 4 /* t3 = | 0 | R1 0 | 0 | B1 0 | */ |
| srl t4, t3, 4 |
| or t7, t3, t4 /* t0 = | 0 | R1R1 | 0 | B1B1 | */ |
| andi t3, t2, 0xf0 |
| sll t3, t3, 8 |
| srl t4, t3, 4 |
| or t1, t3, t4 |
| or t2, t7, t1 /* t0 = | 0 | R1R1 | G1G1 | B1B1 | */ |
| or t2, t2, t8 /* t0 = | ff | R1R1 | G1G1 | B1B1 | */ |
| |
| sw t0, 0(a0) |
| addiu a1, a1, 4 |
| sw t2, 4(a0) |
| bne a2, a3, 3b |
| addiu a0, a0, 8 |
| beqz a2, 5f /* no more pixels for processing */ |
| nop |
| 4: |
| /* one more pixel remained (after loop unrolling process finished) */ |
| lhu v0, 0(a1) |
| addiu a1, a1, 2 |
| addiu a2, a2, -1 |
| andi t0, v0, 0xf00 |
| andi v1, v0, 0xf |
| andi v0, v0, 0xf0 |
| sra t3, t0, 0x4 |
| sra t1, v0, 0x4 |
| sra t0, t0, 0x8 |
| sll t2, v1, 0x4 |
| or t0, t0, t3 |
| or v0, t1, v0 |
| lui t1, 0xff00 |
| or v1, t2, v1 |
| sll t0, t0, 0x10 |
| or v1, v1, t1 |
| sll v0, v0, 0x8 |
| or v1, v1, t0 |
| or v0, v1, v0 |
| sw v0, 0(a0) |
| addiu a0, a0, 4 |
| 5: |
| jr ra |
| nop |
| |
| END(fetchUntransformed_444_asm_mips_dsp) |
| |
| |
| LEAF_MIPS_DSP(fetchUntransformed_argb8565_premultiplied_asm_mips_dsp) |
| /* |
| * a0 - dst address |
| * a1 - src address |
| * a2 - length |
| */ |
| |
| beqz a2, 2f |
| nop |
| |
| 1: |
| ulh t1, 0(a1) |
| lbu t2, 2(a1) |
| addiu a2, a2, -1 |
| wsbh t1, t1 |
| sll t0, t1, 8 /* t0 = 00000000rrrrrggggggbbbbb00000000 */ |
| ins t0, t1, 3, 16 /* t0 = 00000000rrrrrrrrrrggggggbbbbb000 */ |
| ins t0, t1, 5, 11 /* t0 = 00000000rrrrrrrrggggggbbbbbbb000 */ |
| srl t4, t1, 9 /* t4 = 0000000000000000000000000rrrrrgg */ |
| replv.qb t3, t2 |
| ins t0, t4, 8, 2 /* t0 = 00000000rrrrrrrrggggggggbbbbb000 */ |
| ins t0, t1, 3, 5 /* t0 = 00000000rrrrrrrrggggggggbbbbb000 */ |
| srl t4, t1, 2 /* t4 = 000000000000000000rrrrrggggggbbb */ |
| ins t0, t4, 0, 3 /* t0 = 00000000rrrrrrrrggggggggbbbbbbbb */ |
| ins t0, t2, 24, 8 /* t0 =aaaaaaaarrrrrrrrggggggggbbbbbbbb */ |
| cmpu.lt.qb t3, t0 |
| pick.qb t0, t3, t0 |
| addiu a1, a1, 3 |
| sw t0, 0(a0) |
| bgtz a2, 1b |
| addiu a0, a0, 4 |
| 2: |
| jr ra |
| nop |
| |
| END(fetchUntransformed_argb8565_premultiplied_asm_mips_dsp) |