diff options
| author | The Android Open Source Project <initial-contribution@android.com> | 2009-03-03 18:29:04 -0800 |
|---|---|---|
| committer | The Android Open Source Project <initial-contribution@android.com> | 2009-03-03 18:29:04 -0800 |
| commit | e54eebbf1a908d65ee8cf80bab62821c05666d70 (patch) | |
| tree | 4b825dc642cb6eb9a060e54bf8d69288fbee4904 /libpixelflinger/codeflinger | |
| parent | a1e1c1b106423de09bc918502e7a51d4ffe5a4ae (diff) | |
| download | system_core-e54eebbf1a908d65ee8cf80bab62821c05666d70.tar.gz system_core-e54eebbf1a908d65ee8cf80bab62821c05666d70.tar.bz2 system_core-e54eebbf1a908d65ee8cf80bab62821c05666d70.zip | |
auto import from //depot/cupcake/@135843
Diffstat (limited to 'libpixelflinger/codeflinger')
| -rw-r--r-- | libpixelflinger/codeflinger/ARMAssembler.cpp | 428 | ||||
| -rw-r--r-- | libpixelflinger/codeflinger/ARMAssembler.h | 155 | ||||
| -rw-r--r-- | libpixelflinger/codeflinger/ARMAssemblerInterface.cpp | 173 | ||||
| -rw-r--r-- | libpixelflinger/codeflinger/ARMAssemblerInterface.h | 324 | ||||
| -rw-r--r-- | libpixelflinger/codeflinger/ARMAssemblerProxy.cpp | 200 | ||||
| -rw-r--r-- | libpixelflinger/codeflinger/ARMAssemblerProxy.h | 123 | ||||
| -rw-r--r-- | libpixelflinger/codeflinger/CodeCache.cpp | 151 | ||||
| -rw-r--r-- | libpixelflinger/codeflinger/CodeCache.h | 134 | ||||
| -rw-r--r-- | libpixelflinger/codeflinger/GGLAssembler.cpp | 1150 | ||||
| -rw-r--r-- | libpixelflinger/codeflinger/GGLAssembler.h | 554 | ||||
| -rw-r--r-- | libpixelflinger/codeflinger/armreg.h | 300 | ||||
| -rw-r--r-- | libpixelflinger/codeflinger/blending.cpp | 682 | ||||
| -rw-r--r-- | libpixelflinger/codeflinger/disassem.c | 702 | ||||
| -rw-r--r-- | libpixelflinger/codeflinger/disassem.h | 65 | ||||
| -rw-r--r-- | libpixelflinger/codeflinger/load_store.cpp | 378 | ||||
| -rw-r--r-- | libpixelflinger/codeflinger/texturing.cpp | 1251 |
16 files changed, 0 insertions, 6770 deletions
diff --git a/libpixelflinger/codeflinger/ARMAssembler.cpp b/libpixelflinger/codeflinger/ARMAssembler.cpp deleted file mode 100644 index ff7b0b3e..00000000 --- a/libpixelflinger/codeflinger/ARMAssembler.cpp +++ /dev/null @@ -1,428 +0,0 @@ -/* libs/pixelflinger/codeflinger/ARMAssembler.cpp -** -** Copyright 2006, The Android Open Source Project -** -** Licensed under the Apache License, Version 2.0 (the "License"); -** you may not use this file except in compliance with the License. -** You may obtain a copy of the License at -** -** http://www.apache.org/licenses/LICENSE-2.0 -** -** Unless required by applicable law or agreed to in writing, software -** distributed under the License is distributed on an "AS IS" BASIS, -** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -** See the License for the specific language governing permissions and -** limitations under the License. -*/ - -#define LOG_TAG "ARMAssembler" - -#include <stdio.h> -#include <stdlib.h> -#include <cutils/log.h> -#include <cutils/properties.h> - -#if defined(WITH_LIB_HARDWARE) -#include <hardware_legacy/qemu_tracing.h> -#endif - -#include <private/pixelflinger/ggl_context.h> - -#include "codeflinger/ARMAssembler.h" -#include "codeflinger/CodeCache.h" -#include "codeflinger/disassem.h" - -// ---------------------------------------------------------------------------- - -namespace android { - -// ---------------------------------------------------------------------------- -#if 0 -#pragma mark - -#pragma mark ARMAssembler... -#endif - -ARMAssembler::ARMAssembler(const sp<Assembly>& assembly) - : ARMAssemblerInterface(), - mAssembly(assembly) -{ - mBase = mPC = (uint32_t *)assembly->base(); - mDuration = ggl_system_time(); -#if defined(WITH_LIB_HARDWARE) - mQemuTracing = true; -#endif -} - -ARMAssembler::~ARMAssembler() -{ -} - -uint32_t* ARMAssembler::pc() const -{ - return mPC; -} - -uint32_t* ARMAssembler::base() const -{ - return mBase; -} - -void ARMAssembler::reset() -{ - mBase = mPC = (uint32_t *)mAssembly->base(); - mBranchTargets.clear(); - mLabels.clear(); - mLabelsInverseMapping.clear(); - mComments.clear(); -} - -// ---------------------------------------------------------------------------- - -void ARMAssembler::disassemble(const char* name) -{ - if (name) { - printf("%s:\n", name); - } - size_t count = pc()-base(); - uint32_t* i = base(); - while (count--) { - ssize_t label = mLabelsInverseMapping.indexOfKey(i); - if (label >= 0) { - printf("%s:\n", mLabelsInverseMapping.valueAt(label)); - } - ssize_t comment = mComments.indexOfKey(i); - if (comment >= 0) { - printf("; %s\n", mComments.valueAt(comment)); - } - printf("%08x: %08x ", int(i), int(i[0])); - ::disassemble((u_int)i); - i++; - } -} - -void ARMAssembler::comment(const char* string) -{ - mComments.add(mPC, string); -} - -void ARMAssembler::label(const char* theLabel) -{ - mLabels.add(theLabel, mPC); - mLabelsInverseMapping.add(mPC, theLabel); -} - -void ARMAssembler::B(int cc, const char* label) -{ - mBranchTargets.add(branch_target_t(label, mPC)); - *mPC++ = (cc<<28) | (0xA<<24) | 0; -} - -void ARMAssembler::BL(int cc, const char* label) -{ - mBranchTargets.add(branch_target_t(label, mPC)); - *mPC++ = (cc<<28) | (0xB<<24) | 0; -} - -#if 0 -#pragma mark - -#pragma mark Prolog/Epilog & Generate... -#endif - - -void ARMAssembler::prolog() -{ - // write dummy prolog code - mPrologPC = mPC; - STM(AL, FD, SP, 1, LSAVED); -} - -void ARMAssembler::epilog(uint32_t touched) -{ - touched &= LSAVED; - if (touched) { - // write prolog code - uint32_t* pc = mPC; - mPC = mPrologPC; - STM(AL, FD, SP, 1, touched | LLR); - mPC = pc; - // write epilog code - LDM(AL, FD, SP, 1, touched | LLR); - BX(AL, LR); - } else { // heh, no registers to save! - // write prolog code - uint32_t* pc = mPC; - mPC = mPrologPC; - MOV(AL, 0, R0, R0); // NOP - mPC = pc; - // write epilog code - BX(AL, LR); - } -} - -int ARMAssembler::generate(const char* name) -{ - // fixup all the branches - size_t count = mBranchTargets.size(); - while (count--) { - const branch_target_t& bt = mBranchTargets[count]; - uint32_t* target_pc = mLabels.valueFor(bt.label); - LOG_ALWAYS_FATAL_IF(!target_pc, - "error resolving branch targets, target_pc is null"); - int32_t offset = int32_t(target_pc - (bt.pc+2)); - *bt.pc |= offset & 0xFFFFFF; - } - - mAssembly->resize( int(pc()-base())*4 ); - - // the instruction cache is flushed by CodeCache - const int64_t duration = ggl_system_time() - mDuration; - const char * const format = "generated %s (%d ins) at [%p:%p] in %lld ns\n"; - LOGI(format, name, int(pc()-base()), base(), pc(), duration); - -#if defined(WITH_LIB_HARDWARE) - if (__builtin_expect(mQemuTracing, 0)) { - int err = qemu_add_mapping(int(base()), name); - mQemuTracing = (err >= 0); - } -#endif - - char value[PROPERTY_VALUE_MAX]; - property_get("debug.pf.disasm", value, "0"); - if (atoi(value) != 0) { - printf(format, name, int(pc()-base()), base(), pc(), duration); - disassemble(name); - } - - return NO_ERROR; -} - -uint32_t* ARMAssembler::pcForLabel(const char* label) -{ - return mLabels.valueFor(label); -} - -// ---------------------------------------------------------------------------- - -#if 0 -#pragma mark - -#pragma mark Data Processing... -#endif - -void ARMAssembler::dataProcessing(int opcode, int cc, - int s, int Rd, int Rn, uint32_t Op2) -{ - *mPC++ = (cc<<28) | (opcode<<21) | (s<<20) | (Rn<<16) | (Rd<<12) | Op2; -} - -#if 0 -#pragma mark - -#pragma mark Multiply... -#endif - -// multiply... -void ARMAssembler::MLA(int cc, int s, - int Rd, int Rm, int Rs, int Rn) { - if (Rd == Rm) { int t = Rm; Rm=Rs; Rs=t; } - LOG_FATAL_IF(Rd==Rm, "MLA(r%u,r%u,r%u,r%u)", Rd,Rm,Rs,Rn); - *mPC++ = (cc<<28) | (1<<21) | (s<<20) | - (Rd<<16) | (Rn<<12) | (Rs<<8) | 0x90 | Rm; -} -void ARMAssembler::MUL(int cc, int s, - int Rd, int Rm, int Rs) { - if (Rd == Rm) { int t = Rm; Rm=Rs; Rs=t; } - LOG_FATAL_IF(Rd==Rm, "MUL(r%u,r%u,r%u)", Rd,Rm,Rs); - *mPC++ = (cc<<28) | (s<<20) | (Rd<<16) | (Rs<<8) | 0x90 | Rm; -} -void ARMAssembler::UMULL(int cc, int s, - int RdLo, int RdHi, int Rm, int Rs) { - LOG_FATAL_IF(RdLo==Rm || RdHi==Rm || RdLo==RdHi, - "UMULL(r%u,r%u,r%u,r%u)", RdLo,RdHi,Rm,Rs); - *mPC++ = (cc<<28) | (1<<23) | (s<<20) | - (RdHi<<16) | (RdLo<<12) | (Rs<<8) | 0x90 | Rm; -} -void ARMAssembler::UMUAL(int cc, int s, - int RdLo, int RdHi, int Rm, int Rs) { - LOG_FATAL_IF(RdLo==Rm || RdHi==Rm || RdLo==RdHi, - "UMUAL(r%u,r%u,r%u,r%u)", RdLo,RdHi,Rm,Rs); - *mPC++ = (cc<<28) | (1<<23) | (1<<21) | (s<<20) | - (RdHi<<16) | (RdLo<<12) | (Rs<<8) | 0x90 | Rm; -} -void ARMAssembler::SMULL(int cc, int s, - int RdLo, int RdHi, int Rm, int Rs) { - LOG_FATAL_IF(RdLo==Rm || RdHi==Rm || RdLo==RdHi, - "SMULL(r%u,r%u,r%u,r%u)", RdLo,RdHi,Rm,Rs); - *mPC++ = (cc<<28) | (1<<23) | (1<<22) | (s<<20) | - (RdHi<<16) | (RdLo<<12) | (Rs<<8) | 0x90 | Rm; -} -void ARMAssembler::SMUAL(int cc, int s, - int RdLo, int RdHi, int Rm, int Rs) { - LOG_FATAL_IF(RdLo==Rm || RdHi==Rm || RdLo==RdHi, - "SMUAL(r%u,r%u,r%u,r%u)", RdLo,RdHi,Rm,Rs); - *mPC++ = (cc<<28) | (1<<23) | (1<<22) | (1<<21) | (s<<20) | - (RdHi<<16) | (RdLo<<12) | (Rs<<8) | 0x90 | Rm; -} - -#if 0 -#pragma mark - -#pragma mark Branches... -#endif - -// branches... -void ARMAssembler::B(int cc, uint32_t* pc) -{ - int32_t offset = int32_t(pc - (mPC+2)); - *mPC++ = (cc<<28) | (0xA<<24) | (offset & 0xFFFFFF); -} - -void ARMAssembler::BL(int cc, uint32_t* pc) -{ - int32_t offset = int32_t(pc - (mPC+2)); - *mPC++ = (cc<<28) | (0xB<<24) | (offset & 0xFFFFFF); -} - -void ARMAssembler::BX(int cc, int Rn) -{ - *mPC++ = (cc<<28) | 0x12FFF10 | Rn; -} - -#if 0 -#pragma mark - -#pragma mark Data Transfer... -#endif - -// data transfert... -void ARMAssembler::LDR(int cc, int Rd, int Rn, uint32_t offset) { - *mPC++ = (cc<<28) | (1<<26) | (1<<20) | (Rn<<16) | (Rd<<12) | offset; -} -void ARMAssembler::LDRB(int cc, int Rd, int Rn, uint32_t offset) { - *mPC++ = (cc<<28) | (1<<26) | (1<<22) | (1<<20) | (Rn<<16) | (Rd<<12) | offset; -} -void ARMAssembler::STR(int cc, int Rd, int Rn, uint32_t offset) { - *mPC++ = (cc<<28) | (1<<26) | (Rn<<16) | (Rd<<12) | offset; -} -void ARMAssembler::STRB(int cc, int Rd, int Rn, uint32_t offset) { - *mPC++ = (cc<<28) | (1<<26) | (1<<22) | (Rn<<16) | (Rd<<12) | offset; -} - -void ARMAssembler::LDRH(int cc, int Rd, int Rn, uint32_t offset) { - *mPC++ = (cc<<28) | (1<<20) | (Rn<<16) | (Rd<<12) | 0xB0 | offset; -} -void ARMAssembler::LDRSB(int cc, int Rd, int Rn, uint32_t offset) { - *mPC++ = (cc<<28) | (1<<20) | (Rn<<16) | (Rd<<12) | 0xD0 | offset; -} -void ARMAssembler::LDRSH(int cc, int Rd, int Rn, uint32_t offset) { - *mPC++ = (cc<<28) | (1<<20) | (Rn<<16) | (Rd<<12) | 0xF0 | offset; -} -void ARMAssembler::STRH(int cc, int Rd, int Rn, uint32_t offset) { - *mPC++ = (cc<<28) | (Rn<<16) | (Rd<<12) | 0xB0 | offset; -} - -#if 0 -#pragma mark - -#pragma mark Block Data Transfer... -#endif - -// block data transfer... -void ARMAssembler::LDM(int cc, int dir, - int Rn, int W, uint32_t reg_list) -{ // ED FD EA FA IB IA DB DA - const uint8_t P[8] = { 1, 0, 1, 0, 1, 0, 1, 0 }; - const uint8_t U[8] = { 1, 1, 0, 0, 1, 1, 0, 0 }; - *mPC++ = (cc<<28) | (4<<25) | (uint32_t(P[dir])<<24) | - (uint32_t(U[dir])<<23) | (1<<20) | (W<<21) | (Rn<<16) | reg_list; -} - -void ARMAssembler::STM(int cc, int dir, - int Rn, int W, uint32_t reg_list) -{ // FA EA FD ED IB IA DB DA - const uint8_t P[8] = { 0, 1, 0, 1, 1, 0, 1, 0 }; - const uint8_t U[8] = { 0, 0, 1, 1, 1, 1, 0, 0 }; - *mPC++ = (cc<<28) | (4<<25) | (uint32_t(P[dir])<<24) | - (uint32_t(U[dir])<<23) | (0<<20) | (W<<21) | (Rn<<16) | reg_list; -} - -#if 0 -#pragma mark - -#pragma mark Special... -#endif - -// special... -void ARMAssembler::SWP(int cc, int Rn, int Rd, int Rm) { - *mPC++ = (cc<<28) | (2<<23) | (Rn<<16) | (Rd << 12) | 0x90 | Rm; -} -void ARMAssembler::SWPB(int cc, int Rn, int Rd, int Rm) { - *mPC++ = (cc<<28) | (2<<23) | (1<<22) | (Rn<<16) | (Rd << 12) | 0x90 | Rm; -} -void ARMAssembler::SWI(int cc, uint32_t comment) { - *mPC++ = (cc<<28) | (0xF<<24) | comment; -} - -#if 0 -#pragma mark - -#pragma mark DSP instructions... -#endif - -// DSP instructions... -void ARMAssembler::PLD(int Rn, uint32_t offset) { - LOG_ALWAYS_FATAL_IF(!((offset&(1<<24)) && !(offset&(1<<21))), - "PLD only P=1, W=0"); - *mPC++ = 0xF550F000 | (Rn<<16) | offset; -} - -void ARMAssembler::CLZ(int cc, int Rd, int Rm) -{ - *mPC++ = (cc<<28) | 0x16F0F10| (Rd<<12) | Rm; -} - -void ARMAssembler::QADD(int cc, int Rd, int Rm, int Rn) -{ - *mPC++ = (cc<<28) | 0x1000050 | (Rn<<16) | (Rd<<12) | Rm; -} - -void ARMAssembler::QDADD(int cc, int Rd, int Rm, int Rn) -{ - *mPC++ = (cc<<28) | 0x1400050 | (Rn<<16) | (Rd<<12) | Rm; -} - -void ARMAssembler::QSUB(int cc, int Rd, int Rm, int Rn) -{ - *mPC++ = (cc<<28) | 0x1200050 | (Rn<<16) | (Rd<<12) | Rm; -} - -void ARMAssembler::QDSUB(int cc, int Rd, int Rm, int Rn) -{ - *mPC++ = (cc<<28) | 0x1600050 | (Rn<<16) | (Rd<<12) | Rm; -} - -void ARMAssembler::SMUL(int cc, int xy, - int Rd, int Rm, int Rs) -{ - *mPC++ = (cc<<28) | 0x1600080 | (Rd<<16) | (Rs<<8) | (xy<<4) | Rm; -} - -void ARMAssembler::SMULW(int cc, int y, - int Rd, int Rm, int Rs) -{ - *mPC++ = (cc<<28) | 0x12000A0 | (Rd<<16) | (Rs<<8) | (y<<4) | Rm; -} - -void ARMAssembler::SMLA(int cc, int xy, - int Rd, int Rm, int Rs, int Rn) -{ - *mPC++ = (cc<<28) | 0x1000080 | (Rd<<16) | (Rn<<12) | (Rs<<8) | (xy<<4) | Rm; -} - -void ARMAssembler::SMLAL(int cc, int xy, - int RdHi, int RdLo, int Rs, int Rm) -{ - *mPC++ = (cc<<28) | 0x1400080 | (RdHi<<16) | (RdLo<<12) | (Rs<<8) | (xy<<4) | Rm; -} - -void ARMAssembler::SMLAW(int cc, int y, - int Rd, int Rm, int Rs, int Rn) -{ - *mPC++ = (cc<<28) | 0x1200080 | (Rd<<16) | (Rn<<12) | (Rs<<8) | (y<<4) | Rm; -} - -}; // namespace android - diff --git a/libpixelflinger/codeflinger/ARMAssembler.h b/libpixelflinger/codeflinger/ARMAssembler.h deleted file mode 100644 index 8837e07a..00000000 --- a/libpixelflinger/codeflinger/ARMAssembler.h +++ /dev/null @@ -1,155 +0,0 @@ -/* libs/pixelflinger/codeflinger/ARMAssembler.h -** -** Copyright 2006, The Android Open Source Project -** -** Licensed under the Apache License, Version 2.0 (the "License"); -** you may not use this file except in compliance with the License. -** You may obtain a copy of the License at -** -** http://www.apache.org/licenses/LICENSE-2.0 -** -** Unless required by applicable law or agreed to in writing, software -** distributed under the License is distributed on an "AS IS" BASIS, -** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -** See the License for the specific language governing permissions and -** limitations under the License. -*/ - -#ifndef ANDROID_ARMASSEMBLER_H -#define ANDROID_ARMASSEMBLER_H - -#include <stdint.h> -#include <sys/types.h> - -#include <utils/Vector.h> -#include <utils/KeyedVector.h> - -#include "tinyutils/smartpointer.h" -#include "codeflinger/ARMAssemblerInterface.h" -#include "codeflinger/CodeCache.h" - -namespace android { - -// ---------------------------------------------------------------------------- - -class ARMAssembler : public ARMAssemblerInterface -{ -public: - ARMAssembler(const sp<Assembly>& assembly); - virtual ~ARMAssembler(); - - uint32_t* base() const; - uint32_t* pc() const; - - - void disassemble(const char* name); - - // ------------------------------------------------------------------------ - // ARMAssemblerInterface... - // ------------------------------------------------------------------------ - - virtual void reset(); - - virtual int generate(const char* name); - - virtual void prolog(); - virtual void epilog(uint32_t touched); - virtual void comment(const char* string); - - virtual void dataProcessing(int opcode, int cc, int s, - int Rd, int Rn, - uint32_t Op2); - virtual void MLA(int cc, int s, - int Rd, int Rm, int Rs, int Rn); - virtual void MUL(int cc, int s, - int Rd, int Rm, int Rs); - virtual void UMULL(int cc, int s, - int RdLo, int RdHi, int Rm, int Rs); - virtual void UMUAL(int cc, int s, - int RdLo, int RdHi, int Rm, int Rs); - virtual void SMULL(int cc, int s, - int RdLo, int RdHi, int Rm, int Rs); - virtual void SMUAL(int cc, int s, - int RdLo, int RdHi, int Rm, int Rs); - - virtual void B(int cc, uint32_t* pc); - virtual void BL(int cc, uint32_t* pc); - virtual void BX(int cc, int Rn); - virtual void label(const char* theLabel); - virtual void B(int cc, const char* label); - virtual void BL(int cc, const char* label); - - virtual uint32_t* pcForLabel(const char* label); - - virtual void LDR (int cc, int Rd, - int Rn, uint32_t offset = immed12_pre(0)); - virtual void LDRB(int cc, int Rd, - int Rn, uint32_t offset = immed12_pre(0)); - virtual void STR (int cc, int Rd, - int Rn, uint32_t offset = immed12_pre(0)); - virtual void STRB(int cc, int Rd, - int Rn, uint32_t offset = immed12_pre(0)); - virtual void LDRH (int cc, int Rd, - int Rn, uint32_t offset = immed8_pre(0)); - virtual void LDRSB(int cc, int Rd, - int Rn, uint32_t offset = immed8_pre(0)); - virtual void LDRSH(int cc, int Rd, - int Rn, uint32_t offset = immed8_pre(0)); - virtual void STRH (int cc, int Rd, - int Rn, uint32_t offset = immed8_pre(0)); - virtual void LDM(int cc, int dir, - int Rn, int W, uint32_t reg_list); - virtual void STM(int cc, int dir, - int Rn, int W, uint32_t reg_list); - - virtual void SWP(int cc, int Rn, int Rd, int Rm); - virtual void SWPB(int cc, int Rn, int Rd, int Rm); - virtual void SWI(int cc, uint32_t comment); - - virtual void PLD(int Rn, uint32_t offset); - virtual void CLZ(int cc, int Rd, int Rm); - virtual void QADD(int cc, int Rd, int Rm, int Rn); - virtual void QDADD(int cc, int Rd, int Rm, int Rn); - virtual void QSUB(int cc, int Rd, int Rm, int Rn); - virtual void QDSUB(int cc, int Rd, int Rm, int Rn); - virtual void SMUL(int cc, int xy, - int Rd, int Rm, int Rs); - virtual void SMULW(int cc, int y, - int Rd, int Rm, int Rs); - virtual void SMLA(int cc, int xy, - int Rd, int Rm, int Rs, int Rn); - virtual void SMLAL(int cc, int xy, - int RdHi, int RdLo, int Rs, int Rm); - virtual void SMLAW(int cc, int y, - int Rd, int Rm, int Rs, int Rn); - -private: - ARMAssembler(const ARMAssembler& rhs); - ARMAssembler& operator = (const ARMAssembler& rhs); - - sp<Assembly> mAssembly; - uint32_t* mBase; - uint32_t* mPC; - uint32_t* mPrologPC; - int64_t mDuration; -#if defined(WITH_LIB_HARDWARE) - bool mQemuTracing; -#endif - - struct branch_target_t { - inline branch_target_t() : label(0), pc(0) { } - inline branch_target_t(const char* l, uint32_t* p) - : label(l), pc(p) { } - const char* label; - uint32_t* pc; - }; - - Vector<branch_target_t> mBranchTargets; - KeyedVector< const char*, uint32_t* > mLabels; - KeyedVector< uint32_t*, const char* > mLabelsInverseMapping; - KeyedVector< uint32_t*, const char* > mComments; -}; - -}; // namespace android - -#endif //ANDROID_ARMASSEMBLER_H diff --git a/libpixelflinger/codeflinger/ARMAssemblerInterface.cpp b/libpixelflinger/codeflinger/ARMAssemblerInterface.cpp deleted file mode 100644 index 7fa0de0a..00000000 --- a/libpixelflinger/codeflinger/ARMAssemblerInterface.cpp +++ /dev/null @@ -1,173 +0,0 @@ -/* libs/pixelflinger/codeflinger/ARMAssemblerInterface.cpp -** -** Copyright 2006, The Android Open Source Project -** -** Licensed under the Apache License, Version 2.0 (the "License"); -** you may not use this file except in compliance with the License. -** You may obtain a copy of the License at -** -** http://www.apache.org/licenses/LICENSE-2.0 -** -** Unless required by applicable law or agreed to in writing, software -** distributed under the License is distributed on an "AS IS" BASIS, -** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -** See the License for the specific language governing permissions and -** limitations under the License. -*/ - - -#include <errno.h> -#include <stdlib.h> -#include <stdint.h> -#include <sys/types.h> - -#include <cutils/log.h> -#include "codeflinger/ARMAssemblerInterface.h" - -namespace android { - -// ---------------------------------------------------------------------------- - -ARMAssemblerInterface::~ARMAssemblerInterface() -{ -} - -int ARMAssemblerInterface::buildImmediate( - uint32_t immediate, uint32_t& rot, uint32_t& imm) -{ - rot = 0; - imm = immediate; - if (imm > 0x7F) { // skip the easy cases - while (!(imm&3) || (imm&0xFC000000)) { - uint32_t newval; - newval = imm >> 2; - newval |= (imm&3) << 30; - imm = newval; - rot += 2; - if (rot == 32) { - rot = 0; - break; - } - } - } - rot = (16 - (rot>>1)) & 0xF; - - if (imm>=0x100) - return -EINVAL; - - if (((imm>>(rot<<1)) | (imm<<(32-(rot<<1)))) != immediate) - return -1; - - return 0; -} - -// shifters... - -bool ARMAssemblerInterface::isValidImmediate(uint32_t immediate) -{ - uint32_t rot, imm; - return buildImmediate(immediate, rot, imm) == 0; -} - -uint32_t ARMAssemblerInterface::imm(uint32_t immediate) -{ - uint32_t rot, imm; - int err = buildImmediate(immediate, rot, imm); - - LOG_ALWAYS_FATAL_IF(err==-EINVAL, - "immediate %08x cannot be encoded", - immediate); - - LOG_ALWAYS_FATAL_IF(err, - "immediate (%08x) encoding bogus!", - immediate); - - return (1<<25) | (rot<<8) | imm; -} - -uint32_t ARMAssemblerInterface::reg_imm(int Rm, int type, uint32_t shift) -{ - return ((shift&0x1F)<<7) | ((type&0x3)<<5) | (Rm&0xF); -} - -uint32_t ARMAssemblerInterface::reg_rrx(int Rm) -{ - return (ROR<<5) | (Rm&0xF); -} - -uint32_t ARMAssemblerInterface::reg_reg(int Rm, int type, int Rs) -{ - return ((Rs&0xF)<<8) | ((type&0x3)<<5) | (1<<4) | (Rm&0xF); -} - -// addressing modes... -// LDR(B)/STR(B)/PLD (immediate and Rm can be negative, which indicate U=0) -uint32_t ARMAssemblerInterface::immed12_pre(int32_t immed12, int W) -{ - LOG_ALWAYS_FATAL_IF(abs(immed12) >= 0x800, - "LDR(B)/STR(B)/PLD immediate too big (%08x)", - immed12); - return (1<<24) | (((uint32_t(immed12)>>31)^1)<<23) | - ((W&1)<<21) | (abs(immed12)&0x7FF); -} - -uint32_t ARMAssemblerInterface::immed12_post(int32_t immed12) -{ - LOG_ALWAYS_FATAL_IF(abs(immed12) >= 0x800, - "LDR(B)/STR(B)/PLD immediate too big (%08x)", - immed12); - - return (((uint32_t(immed12)>>31)^1)<<23) | (abs(immed12)&0x7FF); -} - -uint32_t ARMAssemblerInterface::reg_scale_pre(int Rm, int type, - uint32_t shift, int W) -{ - return (1<<25) | (1<<24) | - (((uint32_t(Rm)>>31)^1)<<23) | ((W&1)<<21) | - reg_imm(abs(Rm), type, shift); -} - -uint32_t ARMAssemblerInterface::reg_scale_post(int Rm, int type, uint32_t shift) -{ - return (1<<25) | (((uint32_t(Rm)>>31)^1)<<23) | reg_imm(abs(Rm), type, shift); -} - -// LDRH/LDRSB/LDRSH/STRH (immediate and Rm can be negative, which indicate U=0) -uint32_t ARMAssemblerInterface::immed8_pre(int32_t immed8, int W) -{ - uint32_t offset = abs(immed8); - - LOG_ALWAYS_FATAL_IF(abs(immed8) >= 0x100, - "LDRH/LDRSB/LDRSH/STRH immediate too big (%08x)", - immed8); - - return (1<<24) | (1<<22) | (((uint32_t(immed8)>>31)^1)<<23) | - ((W&1)<<21) | (((offset&0xF0)<<4)|(offset&0xF)); -} - -uint32_t ARMAssemblerInterface::immed8_post(int32_t immed8) -{ - uint32_t offset = abs(immed8); - - LOG_ALWAYS_FATAL_IF(abs(immed8) >= 0x100, - "LDRH/LDRSB/LDRSH/STRH immediate too big (%08x)", - immed8); - - return (1<<22) | (((uint32_t(immed8)>>31)^1)<<23) | - (((offset&0xF0)<<4) | (offset&0xF)); -} - -uint32_t ARMAssemblerInterface::reg_pre(int Rm, int W) -{ - return (1<<24) | (((uint32_t(Rm)>>31)^1)<<23) | ((W&1)<<21) | (abs(Rm)&0xF); -} - -uint32_t ARMAssemblerInterface::reg_post(int Rm) -{ - return (((uint32_t(Rm)>>31)^1)<<23) | (abs(Rm)&0xF); -} - - -}; // namespace android - diff --git a/libpixelflinger/codeflinger/ARMAssemblerInterface.h b/libpixelflinger/codeflinger/ARMAssemblerInterface.h deleted file mode 100644 index 465b3bd9..00000000 --- a/libpixelflinger/codeflinger/ARMAssemblerInterface.h +++ /dev/null @@ -1,324 +0,0 @@ -/* libs/pixelflinger/codeflinger/ARMAssemblerInterface.h -** -** Copyright 2006, The Android Open Source Project -** -** Licensed under the Apache License, Version 2.0 (the "License"); -** you may not use this file except in compliance with the License. -** You may obtain a copy of the License at -** -** http://www.apache.org/licenses/LICENSE-2.0 -** -** Unless required by applicable law or agreed to in writing, software -** distributed under the License is distributed on an "AS IS" BASIS, -** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -** See the License for the specific language governing permissions and -** limitations under the License. -*/ - - -#ifndef ANDROID_ARMASSEMBLER_INTERFACE_H -#define ANDROID_ARMASSEMBLER_INTERFACE_H - -#include <stdint.h> -#include <sys/types.h> - -namespace android { - -// ---------------------------------------------------------------------------- - -class ARMAssemblerInterface -{ -public: - virtual ~ARMAssemblerInterface(); - - enum { - EQ, NE, CS, CC, MI, PL, VS, VC, HI, LS, GE, LT, GT, LE, AL, NV, - HS = CS, - LO = CC - }; - enum { - S = 1 - }; - enum { - LSL, LSR, ASR, ROR - }; - enum { - ED, FD, EA, FA, - IB, IA, DB, DA - }; - enum { - R0, R1, R2, R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, R13, R14, R15, - SP = R13, - LR = R14, - PC = R15 - }; - enum { - #define LIST(rr) L##rr=1<<rr - LIST(R0), LIST(R1), LIST(R2), LIST(R3), LIST(R4), LIST(R5), LIST(R6), - LIST(R7), LIST(R8), LIST(R9), LIST(R10), LIST(R11), LIST(R12), - LIST(R13), LIST(R14), LIST(R15), - LIST(SP), LIST(LR), LIST(PC), - #undef LIST - LSAVED = LR4|LR5|LR6|LR7|LR8|LR9|LR10|LR11 | LLR - }; - - // ----------------------------------------------------------------------- - // shifters and addressing modes - // ----------------------------------------------------------------------- - - // shifters... - static bool isValidImmediate(uint32_t immed); - static int buildImmediate(uint32_t i, uint32_t& rot, uint32_t& imm); - - static uint32_t imm(uint32_t immediate); - static uint32_t reg_imm(int Rm, int type, uint32_t shift); - static uint32_t reg_rrx(int Rm); - static uint32_t reg_reg(int Rm, int type, int Rs); - - // addressing modes... - // LDR(B)/STR(B)/PLD - // (immediate and Rm can be negative, which indicates U=0) - static uint32_t immed12_pre(int32_t immed12, int W=0); - static uint32_t immed12_post(int32_t immed12); - static uint32_t reg_scale_pre(int Rm, int type=0, uint32_t shift=0, int W=0); - static uint32_t reg_scale_post(int Rm, int type=0, uint32_t shift=0); - - // LDRH/LDRSB/LDRSH/STRH - // (immediate and Rm can be negative, which indicates U=0) - static uint32_t immed8_pre(int32_t immed8, int W=0); - static uint32_t immed8_post(int32_t immed8); - static uint32_t reg_pre(int Rm, int W=0); - static uint32_t reg_post(int Rm); - - // ----------------------------------------------------------------------- - // basic instructions & code generation - // ----------------------------------------------------------------------- - - // generate the code - virtual void reset() = 0; - virtual int generate(const char* name) = 0; - virtual void disassemble(const char* name) = 0; - - // construct prolog and epilog - virtual void prolog() = 0; - virtual void epilog(uint32_t touched) = 0; - virtual void comment(const char* string) = 0; - - // data processing... - enum { - opAND, opEOR, opSUB, opRSB, opADD, opADC, opSBC, opRSC, - opTST, opTEQ, opCMP, opCMN, opORR, opMOV, opBIC, opMVN - }; - - virtual void - dataProcessing( int opcode, int cc, int s, - int Rd, int Rn, - uint32_t Op2) = 0; - - // multiply... - virtual void MLA(int cc, int s, - int Rd, int Rm, int Rs, int Rn) = 0; - virtual void MUL(int cc, int s, - int Rd, int Rm, int Rs) = 0; - virtual void UMULL(int cc, int s, - int RdLo, int RdHi, int Rm, int Rs) = 0; - virtual void UMUAL(int cc, int s, - int RdLo, int RdHi, int Rm, int Rs) = 0; - virtual void SMULL(int cc, int s, - int RdLo, int RdHi, int Rm, int Rs) = 0; - virtual void SMUAL(int cc, int s, - int RdLo, int RdHi, int Rm, int Rs) = 0; - - // branches... - virtual void B(int cc, uint32_t* pc) = 0; - virtual void BL(int cc, uint32_t* pc) = 0; - virtual void BX(int cc, int Rn) = 0; - - virtual void label(const char* theLabel) = 0; - virtual void B(int cc, const char* label) = 0; - virtual void BL(int cc, const char* label) = 0; - - // valid only after generate() has been called - virtual uint32_t* pcForLabel(const char* label) = 0; - - // data transfer... - virtual void LDR (int cc, int Rd, - int Rn, uint32_t offset = immed12_pre(0)) = 0; - virtual void LDRB(int cc, int Rd, - int Rn, uint32_t offset = immed12_pre(0)) = 0; - virtual void STR (int cc, int Rd, - int Rn, uint32_t offset = immed12_pre(0)) = 0; - virtual void STRB(int cc, int Rd, - int Rn, uint32_t offset = immed12_pre(0)) = 0; - - virtual void LDRH (int cc, int Rd, - int Rn, uint32_t offset = immed8_pre(0)) = 0; - virtual void LDRSB(int cc, int Rd, - int Rn, uint32_t offset = immed8_pre(0)) = 0; - virtual void LDRSH(int cc, int Rd, - int Rn, uint32_t offset = immed8_pre(0)) = 0; - virtual void STRH (int cc, int Rd, - int Rn, uint32_t offset = immed8_pre(0)) = 0; - - // block data transfer... - virtual void LDM(int cc, int dir, - int Rn, int W, uint32_t reg_list) = 0; - virtual void STM(int cc, int dir, - int Rn, int W, uint32_t reg_list) = 0; - - // special... - virtual void SWP(int cc, int Rn, int Rd, int Rm) = 0; - virtual void SWPB(int cc, int Rn, int Rd, int Rm) = 0; - virtual void SWI(int cc, uint32_t comment) = 0; - - // DSP instructions... - enum { - // B=0, T=1 - // yx - xyBB = 0, // 0000, - xyTB = 2, // 0010, - xyBT = 4, // 0100, - xyTT = 6, // 0110, - yB = 0, // 0000, - yT = 4, // 0100 - }; - - virtual void PLD(int Rn, uint32_t offset) = 0; - - virtual void CLZ(int cc, int Rd, int Rm) = 0; - - virtual void QADD(int cc, int Rd, int Rm, int Rn) = 0; - virtual void QDADD(int cc, int Rd, int Rm, int Rn) = 0; - virtual void QSUB(int cc, int Rd, int Rm, int Rn) = 0; - virtual void QDSUB(int cc, int Rd, int Rm, int Rn) = 0; - - virtual void SMUL(int cc, int xy, - int Rd, int Rm, int Rs) = 0; - virtual void SMULW(int cc, int y, - int Rd, int Rm, int Rs) = 0; - virtual void SMLA(int cc, int xy, - int Rd, int Rm, int Rs, int Rn) = 0; - virtual void SMLAL(int cc, int xy, - int RdHi, int RdLo, int Rs, int Rm) = 0; - virtual void SMLAW(int cc, int y, - int Rd, int Rm, int Rs, int Rn) = 0; - - // ----------------------------------------------------------------------- - // convenience... - // ----------------------------------------------------------------------- - inline void - ADC(int cc, int s, int Rd, int Rn, uint32_t Op2) { - dataProcessing(opADC, cc, s, Rd, Rn, Op2); - } - inline void - ADD(int cc, int s, int Rd, int Rn, uint32_t Op2) { - dataProcessing(opADD, cc, s, Rd, Rn, Op2); - } - inline void - AND(int cc, int s, int Rd, int Rn, uint32_t Op2) { - dataProcessing(opAND, cc, s, Rd, Rn, Op2); - } - inline void - BIC(int cc, int s, int Rd, int Rn, uint32_t Op2) { - dataProcessing(opBIC, cc, s, Rd, Rn, Op2); - } - inline void - EOR(int cc, int s, int Rd, int Rn, uint32_t Op2) { - dataProcessing(opEOR, cc, s, Rd, Rn, Op2); - } - inline void - MOV(int cc, int s, int Rd, uint32_t Op2) { - dataProcessing(opMOV, cc, s, Rd, 0, Op2); - } - inline void - MVN(int cc, int s, int Rd, uint32_t Op2) { - dataProcessing(opMVN, cc, s, Rd, 0, Op2); - } - inline void - ORR(int cc, int s, int Rd, int Rn, uint32_t Op2) { - dataProcessing(opORR, cc, s, Rd, Rn, Op2); - } - inline void - RSB(int cc, int s, int Rd, int Rn, uint32_t Op2) { - dataProcessing(opRSB, cc, s, Rd, Rn, Op2); - } - inline void - RSC(int cc, int s, int Rd, int Rn, uint32_t Op2) { - dataProcessing(opRSC, cc, s, Rd, Rn, Op2); - } - inline void - SBC(int cc, int s, int Rd, int Rn, uint32_t Op2) { - dataProcessing(opSBC, cc, s, Rd, Rn, Op2); - } - inline void - SUB(int cc, int s, int Rd, int Rn, uint32_t Op2) { - dataProcessing(opSUB, cc, s, Rd, Rn, Op2); - } - inline void - TEQ(int cc, int Rn, uint32_t Op2) { - dataProcessing(opTEQ, cc, 1, 0, Rn, Op2); - } - inline void - TST(int cc, int Rn, uint32_t Op2) { - dataProcessing(opTST, cc, 1, 0, Rn, Op2); - } - inline void - CMP(int cc, int Rn, uint32_t Op2) { - dataProcessing(opCMP, cc, 1, 0, Rn, Op2); - } - inline void - CMN(int cc, int Rn, uint32_t Op2) { - dataProcessing(opCMN, cc, 1, 0, Rn, Op2); - } - - inline void SMULBB(int cc, int Rd, int Rm, int Rs) { - SMUL(cc, xyBB, Rd, Rm, Rs); } - inline void SMULTB(int cc, int Rd, int Rm, int Rs) { - SMUL(cc, xyTB, Rd, Rm, Rs); } - inline void SMULBT(int cc, int Rd, int Rm, int Rs) { - SMUL(cc, xyBT, Rd, Rm, Rs); } - inline void SMULTT(int cc, int Rd, int Rm, int Rs) { - SMUL(cc, xyTT, Rd, Rm, Rs); } - - inline void SMULWB(int cc, int Rd, int Rm, int Rs) { - SMULW(cc, yB, Rd, Rm, Rs); } - inline void SMULWT(int cc, int Rd, int Rm, int Rs) { - SMULW(cc, yT, Rd, Rm, Rs); } - - inline void - SMLABB(int cc, int Rd, int Rm, int Rs, int Rn) { - SMLA(cc, xyBB, Rd, Rm, Rs, Rn); } - inline void - SMLATB(int cc, int Rd, int Rm, int Rs, int Rn) { - SMLA(cc, xyTB, Rd, Rm, Rs, Rn); } - inline void - SMLABT(int cc, int Rd, int Rm, int Rs, int Rn) { - SMLA(cc, xyBT, Rd, Rm, Rs, Rn); } - inline void - SMLATT(int cc, int Rd, int Rm, int Rs, int Rn) { - SMLA(cc, xyTT, Rd, Rm, Rs, Rn); } - - inline void - SMLALBB(int cc, int RdHi, int RdLo, int Rs, int Rm) { - SMLAL(cc, xyBB, RdHi, RdLo, Rs, Rm); } - inline void - SMLALTB(int cc, int RdHi, int RdLo, int Rs, int Rm) { - SMLAL(cc, xyTB, RdHi, RdLo, Rs, Rm); } - inline void - SMLALBT(int cc, int RdHi, int RdLo, int Rs, int Rm) { - SMLAL(cc, xyBT, RdHi, RdLo, Rs, Rm); } - inline void - SMLALTT(int cc, int RdHi, int RdLo, int Rs, int Rm) { - SMLAL(cc, xyTT, RdHi, RdLo, Rs, Rm); } - - inline void - SMLAWB(int cc, int Rd, int Rm, int Rs, int Rn) { - SMLAW(cc, yB, Rd, Rm, Rs, Rn); } - inline void - SMLAWT(int cc, int Rd, int Rm, int Rs, int Rn) { - SMLAW(cc, yT, Rd, Rm, Rs, Rn); } -}; - -}; // namespace android - -#endif //ANDROID_ARMASSEMBLER_INTERFACE_H diff --git a/libpixelflinger/codeflinger/ARMAssemblerProxy.cpp b/libpixelflinger/codeflinger/ARMAssemblerProxy.cpp deleted file mode 100644 index 18c46186..00000000 --- a/libpixelflinger/codeflinger/ARMAssemblerProxy.cpp +++ /dev/null @@ -1,200 +0,0 @@ -/* libs/pixelflinger/codeflinger/ARMAssemblerProxy.cpp -** -** Copyright 2006, The Android Open Source Project -** -** Licensed under the Apache License, Version 2.0 (the "License"); -** you may not use this file except in compliance with the License. -** You may obtain a copy of the License at -** -** http://www.apache.org/licenses/LICENSE-2.0 -** -** Unless required by applicable law or agreed to in writing, software -** distributed under the License is distributed on an "AS IS" BASIS, -** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -** See the License for the specific language governing permissions and -** limitations under the License. -*/ - - -#include <stdint.h> -#include <sys/types.h> - -#include "codeflinger/ARMAssemblerProxy.h" - -namespace android { - -// ---------------------------------------------------------------------------- - -ARMAssemblerProxy::ARMAssemblerProxy() - : mTarget(0) -{ -} - -ARMAssemblerProxy::ARMAssemblerProxy(ARMAssemblerInterface* target) - : mTarget(target) -{ -} - -ARMAssemblerProxy::~ARMAssemblerProxy() -{ - delete mTarget; -} - -void ARMAssemblerProxy::setTarget(ARMAssemblerInterface* target) -{ - delete mTarget; - mTarget = target; -} - -void ARMAssemblerProxy::reset() { - mTarget->reset(); -} -int ARMAssemblerProxy::generate(const char* name) { - return mTarget->generate(name); -} -void ARMAssemblerProxy::disassemble(const char* name) { - return mTarget->disassemble(name); -} -void ARMAssemblerProxy::prolog() { - mTarget->prolog(); -} -void ARMAssemblerProxy::epilog(uint32_t touched) { - mTarget->epilog(touched); -} -void ARMAssemblerProxy::comment(const char* string) { - mTarget->comment(string); -} - - -void ARMAssemblerProxy::dataProcessing( int opcode, int cc, int s, - int Rd, int Rn, uint32_t Op2) -{ - mTarget->dataProcessing(opcode, cc, s, Rd, Rn, Op2); -} - -void ARMAssemblerProxy::MLA(int cc, int s, int Rd, int Rm, int Rs, int Rn) { - mTarget->MLA(cc, s, Rd, Rm, Rs, Rn); -} -void ARMAssemblerProxy::MUL(int cc, int s, int Rd, int Rm, int Rs) { - mTarget->MUL(cc, s, Rd, Rm, Rs); -} -void ARMAssemblerProxy::UMULL(int cc, int s, - int RdLo, int RdHi, int Rm, int Rs) { - mTarget->UMULL(cc, s, RdLo, RdHi, Rm, Rs); -} -void ARMAssemblerProxy::UMUAL(int cc, int s, - int RdLo, int RdHi, int Rm, int Rs) { - mTarget->UMUAL(cc, s, RdLo, RdHi, Rm, Rs); -} -void ARMAssemblerProxy::SMULL(int cc, int s, - int RdLo, int RdHi, int Rm, int Rs) { - mTarget->SMULL(cc, s, RdLo, RdHi, Rm, Rs); -} -void ARMAssemblerProxy::SMUAL(int cc, int s, - int RdLo, int RdHi, int Rm, int Rs) { - mTarget->SMUAL(cc, s, RdLo, RdHi, Rm, Rs); -} - -void ARMAssemblerProxy::B(int cc, uint32_t* pc) { - mTarget->B(cc, pc); -} -void ARMAssemblerProxy::BL(int cc, uint32_t* pc) { - mTarget->BL(cc, pc); -} -void ARMAssemblerProxy::BX(int cc, int Rn) { - mTarget->BX(cc, Rn); -} -void ARMAssemblerProxy::label(const char* theLabel) { - mTarget->label(theLabel); -} -void ARMAssemblerProxy::B(int cc, const char* label) { - mTarget->B(cc, label); -} -void ARMAssemblerProxy::BL(int cc, const char* label) { - mTarget->BL(cc, label); -} - -uint32_t* ARMAssemblerProxy::pcForLabel(const char* label) { - return mTarget->pcForLabel(label); -} - -void ARMAssemblerProxy::LDR(int cc, int Rd, int Rn, uint32_t offset) { - mTarget->LDR(cc, Rd, Rn, offset); -} -void ARMAssemblerProxy::LDRB(int cc, int Rd, int Rn, uint32_t offset) { - mTarget->LDRB(cc, Rd, Rn, offset); -} -void ARMAssemblerProxy::STR(int cc, int Rd, int Rn, uint32_t offset) { - mTarget->STR(cc, Rd, Rn, offset); -} -void ARMAssemblerProxy::STRB(int cc, int Rd, int Rn, uint32_t offset) { - mTarget->STRB(cc, Rd, Rn, offset); -} -void ARMAssemblerProxy::LDRH(int cc, int Rd, int Rn, uint32_t offset) { - mTarget->LDRH(cc, Rd, Rn, offset); -} -void ARMAssemblerProxy::LDRSB(int cc, int Rd, int Rn, uint32_t offset) { - mTarget->LDRSB(cc, Rd, Rn, offset); -} -void ARMAssemblerProxy::LDRSH(int cc, int Rd, int Rn, uint32_t offset) { - mTarget->LDRSH(cc, Rd, Rn, offset); -} -void ARMAssemblerProxy::STRH(int cc, int Rd, int Rn, uint32_t offset) { - mTarget->STRH(cc, Rd, Rn, offset); -} -void ARMAssemblerProxy::LDM(int cc, int dir, int Rn, int W, uint32_t reg_list) { - mTarget->LDM(cc, dir, Rn, W, reg_list); -} -void ARMAssemblerProxy::STM(int cc, int dir, int Rn, int W, uint32_t reg_list) { - mTarget->STM(cc, dir, Rn, W, reg_list); -} - -void ARMAssemblerProxy::SWP(int cc, int Rn, int Rd, int Rm) { - mTarget->SWP(cc, Rn, Rd, Rm); -} -void ARMAssemblerProxy::SWPB(int cc, int Rn, int Rd, int Rm) { - mTarget->SWPB(cc, Rn, Rd, Rm); -} -void ARMAssemblerProxy::SWI(int cc, uint32_t comment) { - mTarget->SWI(cc, comment); -} - - -void ARMAssemblerProxy::PLD(int Rn, uint32_t offset) { - mTarget->PLD(Rn, offset); -} -void ARMAssemblerProxy::CLZ(int cc, int Rd, int Rm) { - mTarget->CLZ(cc, Rd, Rm); -} -void ARMAssemblerProxy::QADD(int cc, int Rd, int Rm, int Rn) { - mTarget->QADD(cc, Rd, Rm, Rn); -} -void ARMAssemblerProxy::QDADD(int cc, int Rd, int Rm, int Rn) { - mTarget->QDADD(cc, Rd, Rm, Rn); -} -void ARMAssemblerProxy::QSUB(int cc, int Rd, int Rm, int Rn) { - mTarget->QSUB(cc, Rd, Rm, Rn); -} -void ARMAssemblerProxy::QDSUB(int cc, int Rd, int Rm, int Rn) { - mTarget->QDSUB(cc, Rd, Rm, Rn); -} -void ARMAssemblerProxy::SMUL(int cc, int xy, int Rd, int Rm, int Rs) { - mTarget->SMUL(cc, xy, Rd, Rm, Rs); -} -void ARMAssemblerProxy::SMULW(int cc, int y, int Rd, int Rm, int Rs) { - mTarget->SMULW(cc, y, Rd, Rm, Rs); -} -void ARMAssemblerProxy::SMLA(int cc, int xy, int Rd, int Rm, int Rs, int Rn) { - mTarget->SMLA(cc, xy, Rd, Rm, Rs, Rn); -} -void ARMAssemblerProxy::SMLAL( int cc, int xy, - int RdHi, int RdLo, int Rs, int Rm) { - mTarget->SMLAL(cc, xy, RdHi, RdLo, Rs, Rm); -} -void ARMAssemblerProxy::SMLAW(int cc, int y, int Rd, int Rm, int Rs, int Rn) { - mTarget->SMLAW(cc, y, Rd, Rm, Rs, Rn); -} - - -}; // namespace android - diff --git a/libpixelflinger/codeflinger/ARMAssemblerProxy.h b/libpixelflinger/codeflinger/ARMAssemblerProxy.h deleted file mode 100644 index 4bdca9cf..00000000 --- a/libpixelflinger/codeflinger/ARMAssemblerProxy.h +++ /dev/null @@ -1,123 +0,0 @@ -/* libs/pixelflinger/codeflinger/ARMAssemblerProxy.h -** -** Copyright 2006, The Android Open Source Project -** -** Licensed under the Apache License, Version 2.0 (the "License"); -** you may not use this file except in compliance with the License. -** You may obtain a copy of the License at -** -** http://www.apache.org/licenses/LICENSE-2.0 -** -** Unless required by applicable law or agreed to in writing, software -** distributed under the License is distributed on an "AS IS" BASIS, -** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -** See the License for the specific language governing permissions and -** limitations under the License. -*/ - - -#ifndef ANDROID_ARMASSEMBLER_PROXY_H -#define ANDROID_ARMASSEMBLER_PROXY_H - -#include <stdint.h> -#include <sys/types.h> - -#include "codeflinger/ARMAssemblerInterface.h" - -namespace android { - -// ---------------------------------------------------------------------------- - -class ARMAssemblerProxy : public ARMAssemblerInterface -{ -public: - // ARMAssemblerProxy take ownership of the target - - ARMAssemblerProxy(); - ARMAssemblerProxy(ARMAssemblerInterface* target); - virtual ~ARMAssemblerProxy(); - - void setTarget(ARMAssemblerInterface* target); - - virtual void reset(); - virtual int generate(const char* name); - virtual void disassemble(const char* name); - - virtual void prolog(); - virtual void epilog(uint32_t touched); - virtual void comment(const char* string); - - virtual void dataProcessing(int opcode, int cc, int s, - int Rd, int Rn, - uint32_t Op2); - virtual void MLA(int cc, int s, - int Rd, int Rm, int Rs, int Rn); - virtual void MUL(int cc, int s, - int Rd, int Rm, int Rs); - virtual void UMULL(int cc, int s, - int RdLo, int RdHi, int Rm, int Rs); - virtual void UMUAL(int cc, int s, - int RdLo, int RdHi, int Rm, int Rs); - virtual void SMULL(int cc, int s, - int RdLo, int RdHi, int Rm, int Rs); - virtual void SMUAL(int cc, int s, - int RdLo, int RdHi, int Rm, int Rs); - - virtual void B(int cc, uint32_t* pc); - virtual void BL(int cc, uint32_t* pc); - virtual void BX(int cc, int Rn); - virtual void label(const char* theLabel); - virtual void B(int cc, const char* label); - virtual void BL(int cc, const char* label); - - uint32_t* pcForLabel(const char* label); - - virtual void LDR (int cc, int Rd, - int Rn, uint32_t offset = immed12_pre(0)); - virtual void LDRB(int cc, int Rd, - int Rn, uint32_t offset = immed12_pre(0)); - virtual void STR (int cc, int Rd, - int Rn, uint32_t offset = immed12_pre(0)); - virtual void STRB(int cc, int Rd, - int Rn, uint32_t offset = immed12_pre(0)); - virtual void LDRH (int cc, int Rd, - int Rn, uint32_t offset = immed8_pre(0)); - virtual void LDRSB(int cc, int Rd, - int Rn, uint32_t offset = immed8_pre(0)); - virtual void LDRSH(int cc, int Rd, - int Rn, uint32_t offset = immed8_pre(0)); - virtual void STRH (int cc, int Rd, - int Rn, uint32_t offset = immed8_pre(0)); - virtual void LDM(int cc, int dir, - int Rn, int W, uint32_t reg_list); - virtual void STM(int cc, int dir, - int Rn, int W, uint32_t reg_list); - - virtual void SWP(int cc, int Rn, int Rd, int Rm); - virtual void SWPB(int cc, int Rn, int Rd, int Rm); - virtual void SWI(int cc, uint32_t comment); - - virtual void PLD(int Rn, uint32_t offset); - virtual void CLZ(int cc, int Rd, int Rm); - virtual void QADD(int cc, int Rd, int Rm, int Rn); - virtual void QDADD(int cc, int Rd, int Rm, int Rn); - virtual void QSUB(int cc, int Rd, int Rm, int Rn); - virtual void QDSUB(int cc, int Rd, int Rm, int Rn); - virtual void SMUL(int cc, int xy, - int Rd, int Rm, int Rs); - virtual void SMULW(int cc, int y, - int Rd, int Rm, int Rs); - virtual void SMLA(int cc, int xy, - int Rd, int Rm, int Rs, int Rn); - virtual void SMLAL(int cc, int xy, - int RdHi, int RdLo, int Rs, int Rm); - virtual void SMLAW(int cc, int y, - int Rd, int Rm, int Rs, int Rn); - -private: - ARMAssemblerInterface* mTarget; -}; - -}; // namespace android - -#endif //ANDROID_ARMASSEMBLER_PROXY_H diff --git a/libpixelflinger/codeflinger/CodeCache.cpp b/libpixelflinger/codeflinger/CodeCache.cpp deleted file mode 100644 index 29410c8a..00000000 --- a/libpixelflinger/codeflinger/CodeCache.cpp +++ /dev/null @@ -1,151 +0,0 @@ -/* libs/pixelflinger/codeflinger/CodeCache.cpp -** -** Copyright 2006, The Android Open Source Project -** -** Licensed under the Apache License, Version 2.0 (the "License"); -** you may not use this file except in compliance with the License. -** You may obtain a copy of the License at -** -** http://www.apache.org/licenses/LICENSE-2.0 -** -** Unless required by applicable law or agreed to in writing, software -** distributed under the License is distributed on an "AS IS" BASIS, -** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -** See the License for the specific language governing permissions and -** limitations under the License. -*/ - - -#include <assert.h> -#include <stdio.h> -#include <stdlib.h> - -#include <cutils/log.h> -#include <cutils/atomic.h> - -#include "codeflinger/CodeCache.h" - -namespace android { - -// ---------------------------------------------------------------------------- - -#if defined(__arm__) -#include <unistd.h> -#include <errno.h> -#endif - -// ---------------------------------------------------------------------------- - -Assembly::Assembly(size_t size) - : mCount(1), mSize(0) -{ - mBase = (uint32_t*)malloc(size); - if (mBase) { - mSize = size; - } -} - -Assembly::~Assembly() -{ - free(mBase); -} - -void Assembly::incStrong(const void*) const -{ - android_atomic_inc(&mCount); -} - -void Assembly::decStrong(const void*) const -{ - if (android_atomic_dec(&mCount) == 1) { - delete this; - } -} - -ssize_t Assembly::size() const -{ - if (!mBase) return NO_MEMORY; - return mSize; -} - -uint32_t* Assembly::base() const -{ - return mBase; -} - -ssize_t Assembly::resize(size_t newSize) -{ - mBase = (uint32_t*)realloc(mBase, newSize); - mSize = newSize; - return size(); -} - -// ---------------------------------------------------------------------------- - -CodeCache::CodeCache(size_t size) - : mCacheSize(size), mCacheInUse(0) -{ - pthread_mutex_init(&mLock, 0); -} - -CodeCache::~CodeCache() -{ - pthread_mutex_destroy(&mLock); -} - -sp<Assembly> CodeCache::lookup(const AssemblyKeyBase& keyBase) const -{ - pthread_mutex_lock(&mLock); - sp<Assembly> r; - ssize_t index = mCacheData.indexOfKey(key_t(keyBase)); - if (index >= 0) { - const cache_entry_t& e = mCacheData.valueAt(index); - e.when = mWhen++; - r = e.entry; - } - pthread_mutex_unlock(&mLock); - return r; -} - -int CodeCache::cache( const AssemblyKeyBase& keyBase, - const sp<Assembly>& assembly) -{ - pthread_mutex_lock(&mLock); - - const ssize_t assemblySize = assembly->size(); - while (mCacheInUse + assemblySize > mCacheSize) { - // evict the LRU - size_t lru = 0; - size_t count = mCacheData.size(); - for (size_t i=0 ; i<count ; i++) { - const cache_entry_t& e = mCacheData.valueAt(i); - if (e.when < mCacheData.valueAt(lru).when) { - lru = i; - } - } - const cache_entry_t& e = mCacheData.valueAt(lru); - mCacheInUse -= e.entry->size(); - mCacheData.removeItemsAt(lru); - } - - ssize_t err = mCacheData.add(key_t(keyBase), cache_entry_t(assembly, mWhen)); - if (err >= 0) { - mCacheInUse += assemblySize; - mWhen++; - // synchronize caches... -#if defined(__arm__) - const long base = long(assembly->base()); - const long curr = base + long(assembly->size()); - err = cacheflush(base, curr, 0); - LOGE_IF(err, "__ARM_NR_cacheflush error %s\n", - strerror(errno)); -#endif - } - - pthread_mutex_unlock(&mLock); - return err; -} - -// ---------------------------------------------------------------------------- - -}; // namespace android diff --git a/libpixelflinger/codeflinger/CodeCache.h b/libpixelflinger/codeflinger/CodeCache.h deleted file mode 100644 index 370ce175..00000000 --- a/libpixelflinger/codeflinger/CodeCache.h +++ /dev/null @@ -1,134 +0,0 @@ -/* libs/pixelflinger/codeflinger/CodeCache.h -** -** Copyright 2006, The Android Open Source Project -** -** Licensed under the Apache License, Version 2.0 (the "License"); -** you may not use this file except in compliance with the License. -** You may obtain a copy of the License at -** -** http://www.apache.org/licenses/LICENSE-2.0 -** -** Unless required by applicable law or agreed to in writing, software -** distributed under the License is distributed on an "AS IS" BASIS, -** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -** See the License for the specific language governing permissions and -** limitations under the License. -*/ - - -#ifndef ANDROID_CODECACHE_H -#define ANDROID_CODECACHE_H - -#include <stdint.h> -#include <pthread.h> -#include <sys/types.h> - -#include <utils/KeyedVector.h> - -#include "tinyutils/smartpointer.h" - -namespace android { - -// ---------------------------------------------------------------------------- - -class AssemblyKeyBase { -public: - virtual ~AssemblyKeyBase() { } - virtual int compare_type(const AssemblyKeyBase& key) const = 0; -}; - -template <typename T> -class AssemblyKey : public AssemblyKeyBase -{ -public: - AssemblyKey(const T& rhs) : mKey(rhs) { } - virtual int compare_type(const AssemblyKeyBase& key) const { - const T& rhs = static_cast<const AssemblyKey&>(key).mKey; - return android::compare_type(mKey, rhs); - } -private: - T mKey; -}; - -// ---------------------------------------------------------------------------- - -class Assembly -{ -public: - Assembly(size_t size); - virtual ~Assembly(); - - ssize_t size() const; - uint32_t* base() const; - ssize_t resize(size_t size); - - // protocol for sp<> - void incStrong(const void* id) const; - void decStrong(const void* id) const; - typedef void weakref_type; - -private: - mutable int32_t mCount; - uint32_t* mBase; - ssize_t mSize; -}; - -// ---------------------------------------------------------------------------- - -class CodeCache -{ -public: -// pretty simple cache API... - CodeCache(size_t size); - ~CodeCache(); - - sp<Assembly> lookup(const AssemblyKeyBase& key) const; - - int cache( const AssemblyKeyBase& key, - const sp<Assembly>& assembly); - -private: - // nothing to see here... - struct cache_entry_t { - inline cache_entry_t() { } - inline cache_entry_t(const sp<Assembly>& a, int64_t w) - : entry(a), when(w) { } - sp<Assembly> entry; - mutable int64_t when; - }; - - class key_t { - friend int compare_type( - const key_value_pair_t<key_t, cache_entry_t>&, - const key_value_pair_t<key_t, cache_entry_t>&); - const AssemblyKeyBase* mKey; - public: - key_t() { }; - key_t(const AssemblyKeyBase& k) : mKey(&k) { } - }; - - mutable pthread_mutex_t mLock; - mutable int64_t mWhen; - size_t mCacheSize; - size_t mCacheInUse; - KeyedVector<key_t, cache_entry_t> mCacheData; - - friend int compare_type( - const key_value_pair_t<key_t, cache_entry_t>&, - const key_value_pair_t<key_t, cache_entry_t>&); -}; - -// KeyedVector uses compare_type(), which is more efficient, than -// just using operator < () -inline int compare_type( - const key_value_pair_t<CodeCache::key_t, CodeCache::cache_entry_t>& lhs, - const key_value_pair_t<CodeCache::key_t, CodeCache::cache_entry_t>& rhs) -{ - return lhs.key.mKey->compare_type(*(rhs.key.mKey)); -} - -// ---------------------------------------------------------------------------- - -}; // namespace android - -#endif //ANDROID_CODECACHE_H diff --git a/libpixelflinger/codeflinger/GGLAssembler.cpp b/libpixelflinger/codeflinger/GGLAssembler.cpp deleted file mode 100644 index 1cd189c9..00000000 --- a/libpixelflinger/codeflinger/GGLAssembler.cpp +++ /dev/null @@ -1,1150 +0,0 @@ -/* libs/pixelflinger/codeflinger/GGLAssembler.cpp -** -** Copyright 2006, The Android Open Source Project -** -** Licensed under the Apache License, Version 2.0 (the "License"); -** you may not use this file except in compliance with the License. -** You may obtain a copy of the License at -** -** http://www.apache.org/licenses/LICENSE-2.0 -** -** Unless required by applicable law or agreed to in writing, software -** distributed under the License is distributed on an "AS IS" BASIS, -** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -** See the License for the specific language governing permissions and -** limitations under the License. -*/ - -#define LOG_TAG "GGLAssembler" - -#include <assert.h> -#include <stdint.h> -#include <stdlib.h> -#include <stdio.h> -#include <sys/types.h> -#include <cutils/log.h> - -#include "codeflinger/GGLAssembler.h" - -namespace android { - -// ---------------------------------------------------------------------------- - -GGLAssembler::GGLAssembler(ARMAssemblerInterface* target) - : ARMAssemblerProxy(target), RegisterAllocator(), mOptLevel(7) -{ -} - -GGLAssembler::~GGLAssembler() -{ -} - -void GGLAssembler::prolog() -{ - ARMAssemblerProxy::prolog(); -} - -void GGLAssembler::epilog(uint32_t touched) -{ - ARMAssemblerProxy::epilog(touched); -} - -void GGLAssembler::reset(int opt_level) -{ - ARMAssemblerProxy::reset(); - RegisterAllocator::reset(); - mOptLevel = opt_level; -} - -// --------------------------------------------------------------------------- - -int GGLAssembler::scanline(const needs_t& needs, context_t const* c) -{ - int err = 0; - int opt_level = mOptLevel; - while (opt_level >= 0) { - reset(opt_level); - err = scanline_core(needs, c); - if (err == 0) - break; - opt_level--; - } - - // XXX: in theory, pcForLabel is not valid before generate() - uint32_t* fragment_start_pc = pcForLabel("fragment_loop"); - uint32_t* fragment_end_pc = pcForLabel("epilog"); - const int per_fragment_ops = int(fragment_end_pc - fragment_start_pc); - - // build a name for our pipeline - char name[64]; - sprintf(name, - "scanline__%08X:%08X_%08X_%08X [%3d ipp]", - needs.p, needs.n, needs.t[0], needs.t[1], per_fragment_ops); - - if (err) { - LOGE("Error while generating ""%s""\n", name); - disassemble(name); - return -1; - } - - return generate(name); -} - -int GGLAssembler::scanline_core(const needs_t& needs, context_t const* c) -{ - int64_t duration = ggl_system_time(); - - mBlendFactorCached = 0; - mBlending = 0; - mMasking = 0; - mAA = GGL_READ_NEEDS(P_AA, needs.p); - mDithering = GGL_READ_NEEDS(P_DITHER, needs.p); - mAlphaTest = GGL_READ_NEEDS(P_ALPHA_TEST, needs.p) + GGL_NEVER; - mDepthTest = GGL_READ_NEEDS(P_DEPTH_TEST, needs.p) + GGL_NEVER; - mFog = GGL_READ_NEEDS(P_FOG, needs.p) != 0; - mSmooth = GGL_READ_NEEDS(SHADE, needs.n) != 0; - mBuilderContext.needs = needs; - mBuilderContext.c = c; - mBuilderContext.Rctx = reserveReg(R0); // context always in R0 - mCbFormat = c->formats[ GGL_READ_NEEDS(CB_FORMAT, needs.n) ]; - - // ------------------------------------------------------------------------ - - decodeLogicOpNeeds(needs); - - decodeTMUNeeds(needs, c); - - mBlendSrc = ggl_needs_to_blendfactor(GGL_READ_NEEDS(BLEND_SRC, needs.n)); - mBlendDst = ggl_needs_to_blendfactor(GGL_READ_NEEDS(BLEND_DST, needs.n)); - mBlendSrcA = ggl_needs_to_blendfactor(GGL_READ_NEEDS(BLEND_SRCA, needs.n)); - mBlendDstA = ggl_needs_to_blendfactor(GGL_READ_NEEDS(BLEND_DSTA, needs.n)); - - if (!mCbFormat.c[GGLFormat::ALPHA].h) { - if ((mBlendSrc == GGL_ONE_MINUS_DST_ALPHA) || - (mBlendSrc == GGL_DST_ALPHA)) { - mBlendSrc = GGL_ONE; - } - if ((mBlendSrcA == GGL_ONE_MINUS_DST_ALPHA) || - (mBlendSrcA == GGL_DST_ALPHA)) { - mBlendSrcA = GGL_ONE; - } - if ((mBlendDst == GGL_ONE_MINUS_DST_ALPHA) || - (mBlendDst == GGL_DST_ALPHA)) { - mBlendDst = GGL_ONE; - } - if ((mBlendDstA == GGL_ONE_MINUS_DST_ALPHA) || - (mBlendDstA == GGL_DST_ALPHA)) { - mBlendDstA = GGL_ONE; - } - } - - // if we need the framebuffer, read it now - const int blending = blending_codes(mBlendSrc, mBlendDst) | - blending_codes(mBlendSrcA, mBlendDstA); - - // XXX: handle special cases, destination not modified... - if ((mBlendSrc==GGL_ZERO) && (mBlendSrcA==GGL_ZERO) && - (mBlendDst==GGL_ONE) && (mBlendDstA==GGL_ONE)) { - // Destination unmodified (beware of logic ops) - } else if ((mBlendSrc==GGL_ZERO) && (mBlendSrcA==GGL_ZERO) && - (mBlendDst==GGL_ZERO) && (mBlendDstA==GGL_ZERO)) { - // Destination is zero (beware of logic ops) - } - - int fbComponents = 0; - const int masking = GGL_READ_NEEDS(MASK_ARGB, needs.n); - for (int i=0 ; i<4 ; i++) { - const int mask = 1<<i; - component_info_t& info = mInfo[i]; - int fs = i==GGLFormat::ALPHA ? mBlendSrcA : mBlendSrc; - int fd = i==GGLFormat::ALPHA ? mBlendDstA : mBlendDst; - if (fs==GGL_SRC_ALPHA_SATURATE && i==GGLFormat::ALPHA) - fs = GGL_ONE; - info.masked = !!(masking & mask); - info.inDest = !info.masked && mCbFormat.c[i].h && - ((mLogicOp & LOGIC_OP_SRC) || (!mLogicOp)); - if (mCbFormat.components >= GGL_LUMINANCE && - (i==GGLFormat::GREEN || i==GGLFormat::BLUE)) { - info.inDest = false; - } - info.needed = (i==GGLFormat::ALPHA) && - (isAlphaSourceNeeded() || mAlphaTest != GGL_ALWAYS); - info.replaced = !!(mTextureMachine.replaced & mask); - info.iterated = (!info.replaced && (info.inDest || info.needed)); - info.smooth = mSmooth && info.iterated; - info.fog = mFog && info.inDest && (i != GGLFormat::ALPHA); - info.blend = (fs != int(GGL_ONE)) || (fd > int(GGL_ZERO)); - - mBlending |= (info.blend ? mask : 0); - mMasking |= (mCbFormat.c[i].h && info.masked) ? mask : 0; - fbComponents |= mCbFormat.c[i].h ? mask : 0; - } - - mAllMasked = (mMasking == fbComponents); - if (mAllMasked) { - mDithering = 0; - } - - fragment_parts_t parts; - - // ------------------------------------------------------------------------ - prolog(); - // ------------------------------------------------------------------------ - - build_scanline_prolog(parts, needs); - - if (registerFile().status()) - return registerFile().status(); - - // ------------------------------------------------------------------------ - label("fragment_loop"); - // ------------------------------------------------------------------------ - { - Scratch regs(registerFile()); - - if (mDithering) { - // update the dither index. - MOV(AL, 0, parts.count.reg, - reg_imm(parts.count.reg, ROR, GGL_DITHER_ORDER_SHIFT)); - ADD(AL, 0, parts.count.reg, parts.count.reg, - imm( 1 << (32 - GGL_DITHER_ORDER_SHIFT))); - MOV(AL, 0, parts.count.reg, - reg_imm(parts.count.reg, ROR, 32 - GGL_DITHER_ORDER_SHIFT)); - } - - // XXX: could we do an early alpha-test here in some cases? - // It would probaly be used only with smooth-alpha and no texture - // (or no alpha component in the texture). - - // Early z-test - if (mAlphaTest==GGL_ALWAYS) { - build_depth_test(parts, Z_TEST|Z_WRITE); - } else { - // we cannot do the z-write here, because - // it might be killed by the alpha-test later - build_depth_test(parts, Z_TEST); - } - - { // texture coordinates - Scratch scratches(registerFile()); - - // texel generation - build_textures(parts, regs); - } - - if ((blending & (FACTOR_DST|BLEND_DST)) || - (mMasking && !mAllMasked) || - (mLogicOp & LOGIC_OP_DST)) - { - // blending / logic_op / masking need the framebuffer - mDstPixel.setTo(regs.obtain(), &mCbFormat); - - // load the framebuffer pixel - comment("fetch color-buffer"); - load(parts.cbPtr, mDstPixel); - } - - if (registerFile().status()) - return registerFile().status(); - - pixel_t pixel; - int directTex = mTextureMachine.directTexture; - if (directTex | parts.packed) { - // note: we can't have both here - // iterated color or direct texture - pixel = directTex ? parts.texel[directTex-1] : parts.iterated; - pixel.flags &= ~CORRUPTIBLE; - } else { - if (mDithering) { - const int ctxtReg = mBuilderContext.Rctx; - const int mask = GGL_DITHER_SIZE-1; - parts.dither = reg_t(regs.obtain()); - AND(AL, 0, parts.dither.reg, parts.count.reg, imm(mask)); - ADD(AL, 0, parts.dither.reg, parts.dither.reg, ctxtReg); - LDRB(AL, parts.dither.reg, parts.dither.reg, - immed12_pre(GGL_OFFSETOF(ditherMatrix))); - } - - // allocate a register for the resulting pixel - pixel.setTo(regs.obtain(), &mCbFormat, FIRST); - - build_component(pixel, parts, GGLFormat::ALPHA, regs); - - if (mAlphaTest!=GGL_ALWAYS) { - // only handle the z-write part here. We know z-test - // was successful, as well as alpha-test. - build_depth_test(parts, Z_WRITE); - } - - build_component(pixel, parts, GGLFormat::RED, regs); - build_component(pixel, parts, GGLFormat::GREEN, regs); - build_component(pixel, parts, GGLFormat::BLUE, regs); - - pixel.flags |= CORRUPTIBLE; - } - - if (registerFile().status()) - return registerFile().status(); - - if (pixel.reg == -1) { - // be defensive here. if we're here it's probably - // that this whole fragment is a no-op. - pixel = mDstPixel; - } - - if (!mAllMasked) { - // logic operation - build_logic_op(pixel, regs); - - // masking - build_masking(pixel, regs); - - comment("store"); - store(parts.cbPtr, pixel, WRITE_BACK); - } - } - - if (registerFile().status()) - return registerFile().status(); - - // update the iterated color... - if (parts.reload != 3) { - build_smooth_shade(parts); - } - - // update iterated z - build_iterate_z(parts); - - // update iterated fog - build_iterate_f(parts); - - SUB(AL, S, parts.count.reg, parts.count.reg, imm(1<<16)); - B(PL, "fragment_loop"); - label("epilog"); - epilog(registerFile().touched()); - - if ((mAlphaTest!=GGL_ALWAYS) || (mDepthTest!=GGL_ALWAYS)) { - if (mDepthTest!=GGL_ALWAYS) { - label("discard_before_textures"); - build_iterate_texture_coordinates(parts); - } - label("discard_after_textures"); - build_smooth_shade(parts); - build_iterate_z(parts); - build_iterate_f(parts); - if (!mAllMasked) { - ADD(AL, 0, parts.cbPtr.reg, parts.cbPtr.reg, imm(parts.cbPtr.size>>3)); - } - SUB(AL, S, parts.count.reg, parts.count.reg, imm(1<<16)); - B(PL, "fragment_loop"); - epilog(registerFile().touched()); - } - - return registerFile().status(); -} - -// --------------------------------------------------------------------------- - -void GGLAssembler::build_scanline_prolog( - fragment_parts_t& parts, const needs_t& needs) -{ - Scratch scratches(registerFile()); - int Rctx = mBuilderContext.Rctx; - - // compute count - comment("compute ct (# of pixels to process)"); - parts.count.setTo(obtainReg()); - int Rx = scratches.obtain(); - int Ry = scratches.obtain(); - CONTEXT_LOAD(Rx, iterators.xl); - CONTEXT_LOAD(parts.count.reg, iterators.xr); - CONTEXT_LOAD(Ry, iterators.y); - - // parts.count = iterators.xr - Rx - SUB(AL, 0, parts.count.reg, parts.count.reg, Rx); - SUB(AL, 0, parts.count.reg, parts.count.reg, imm(1)); - - if (mDithering) { - // parts.count.reg = 0xNNNNXXDD - // NNNN = count-1 - // DD = dither offset - // XX = 0xxxxxxx (x = garbage) - Scratch scratches(registerFile()); - int tx = scratches.obtain(); - int ty = scratches.obtain(); - AND(AL, 0, tx, Rx, imm(GGL_DITHER_MASK)); - AND(AL, 0, ty, Ry, imm(GGL_DITHER_MASK)); - ADD(AL, 0, tx, tx, reg_imm(ty, LSL, GGL_DITHER_ORDER_SHIFT)); - ORR(AL, 0, parts.count.reg, tx, reg_imm(parts.count.reg, LSL, 16)); - } else { - // parts.count.reg = 0xNNNN0000 - // NNNN = count-1 - MOV(AL, 0, parts.count.reg, reg_imm(parts.count.reg, LSL, 16)); - } - - if (!mAllMasked) { - // compute dst ptr - comment("compute color-buffer pointer"); - const int cb_bits = mCbFormat.size*8; - int Rs = scratches.obtain(); - parts.cbPtr.setTo(obtainReg(), cb_bits); - CONTEXT_LOAD(Rs, state.buffers.color.stride); - CONTEXT_LOAD(parts.cbPtr.reg, state.buffers.color.data); - SMLABB(AL, Rs, Ry, Rs, Rx); // Rs = Rx + Ry*Rs - base_offset(parts.cbPtr, parts.cbPtr, Rs); - scratches.recycle(Rs); - } - - // init fog - const int need_fog = GGL_READ_NEEDS(P_FOG, needs.p); - if (need_fog) { - comment("compute initial fog coordinate"); - Scratch scratches(registerFile()); - int dfdx = scratches.obtain(); - int ydfdy = scratches.obtain(); - int f = ydfdy; - CONTEXT_LOAD(dfdx, generated_vars.dfdx); - CONTEXT_LOAD(ydfdy, iterators.ydfdy); - MLA(AL, 0, f, Rx, dfdx, ydfdy); - CONTEXT_STORE(f, generated_vars.f); - } - - // init Z coordinate - if ((mDepthTest != GGL_ALWAYS) || GGL_READ_NEEDS(P_MASK_Z, needs.p)) { - parts.z = reg_t(obtainReg()); - comment("compute initial Z coordinate"); - Scratch scratches(registerFile()); - int dzdx = scratches.obtain(); - int ydzdy = parts.z.reg; - CONTEXT_LOAD(dzdx, generated_vars.dzdx); // 1.31 fixed-point - CONTEXT_LOAD(ydzdy, iterators.ydzdy); // 1.31 fixed-point - MLA(AL, 0, parts.z.reg, Rx, dzdx, ydzdy); - - // we're going to index zbase of parts.count - // zbase = base + (xl-count + stride*y)*2 - int Rs = dzdx; - int zbase = scratches.obtain(); - CONTEXT_LOAD(Rs, state.buffers.depth.stride); - CONTEXT_LOAD(zbase, state.buffers.depth.data); - SMLABB(AL, Rs, Ry, Rs, Rx); - ADD(AL, 0, Rs, Rs, reg_imm(parts.count.reg, LSR, 16)); - ADD(AL, 0, zbase, zbase, reg_imm(Rs, LSL, 1)); - CONTEXT_STORE(zbase, generated_vars.zbase); - } - - // init texture coordinates - init_textures(parts.coords, reg_t(Rx), reg_t(Ry)); - scratches.recycle(Ry); - - // iterated color - init_iterated_color(parts, reg_t(Rx)); - - // init coverage factor application (anti-aliasing) - if (mAA) { - parts.covPtr.setTo(obtainReg(), 16); - CONTEXT_LOAD(parts.covPtr.reg, state.buffers.coverage); - ADD(AL, 0, parts.covPtr.reg, parts.covPtr.reg, reg_imm(Rx, LSL, 1)); - } -} - -// --------------------------------------------------------------------------- - -void GGLAssembler::build_component( pixel_t& pixel, - const fragment_parts_t& parts, - int component, - Scratch& regs) -{ - static char const * comments[] = {"alpha", "red", "green", "blue"}; - comment(comments[component]); - - // local register file - Scratch scratches(registerFile()); - const int dst_component_size = pixel.component_size(component); - - component_t temp(-1); - build_incoming_component( temp, dst_component_size, - parts, component, scratches, regs); - - if (mInfo[component].inDest) { - - // blending... - build_blending( temp, mDstPixel, component, scratches ); - - // downshift component and rebuild pixel... - downshift(pixel, component, temp, parts.dither); - } -} - -void GGLAssembler::build_incoming_component( - component_t& temp, - int dst_size, - const fragment_parts_t& parts, - int component, - Scratch& scratches, - Scratch& global_regs) -{ - const uint32_t component_mask = 1<<component; - - // Figure out what we need for the blending stage... - int fs = component==GGLFormat::ALPHA ? mBlendSrcA : mBlendSrc; - int fd = component==GGLFormat::ALPHA ? mBlendDstA : mBlendDst; - if (fs==GGL_SRC_ALPHA_SATURATE && component==GGLFormat::ALPHA) { - fs = GGL_ONE; - } - - // Figure out what we need to extract and for what reason - const int blending = blending_codes(fs, fd); - - // Are we actually going to blend? - const int need_blending = (fs != int(GGL_ONE)) || (fd > int(GGL_ZERO)); - - // expand the source if the destination has more bits - int need_expander = false; - for (int i=0 ; i<GGL_TEXTURE_UNIT_COUNT-1 ; i++) { - texture_unit_t& tmu = mTextureMachine.tmu[i]; - if ((tmu.format_idx) && - (parts.texel[i].component_size(component) < dst_size)) { - need_expander = true; - } - } - - // do we need to extract this component? - const bool multiTexture = mTextureMachine.activeUnits > 1; - const int blend_needs_alpha_source = (component==GGLFormat::ALPHA) && - (isAlphaSourceNeeded()); - int need_extract = mInfo[component].needed; - if (mInfo[component].inDest) - { - need_extract |= ((need_blending ? - (blending & (BLEND_SRC|FACTOR_SRC)) : need_expander)); - need_extract |= (mTextureMachine.mask != mTextureMachine.replaced); - need_extract |= mInfo[component].smooth; - need_extract |= mInfo[component].fog; - need_extract |= mDithering; - need_extract |= multiTexture; - } - - if (need_extract) { - Scratch& regs = blend_needs_alpha_source ? global_regs : scratches; - component_t fragment; - - // iterated color - build_iterated_color(fragment, parts, component, regs); - - // texture environement (decal, modulate, replace) - build_texture_environment(fragment, parts, component, regs); - - // expand the source if the destination has more bits - if (need_expander && (fragment.size() < dst_size)) { - // we're here only if we fetched a texel - // (so we know for sure fragment is CORRUPTIBLE) - expand(fragment, fragment, dst_size); - } - - // We have a few specific things to do for the alpha-channel - if ((component==GGLFormat::ALPHA) && - (mInfo[component].needed || fragment.size()<dst_size)) - { - // convert to integer_t first and make sure - // we don't corrupt a needed register - if (fragment.l) { - component_t incoming(fragment); - modify(fragment, regs); - MOV(AL, 0, fragment.reg, reg_imm(incoming.reg, LSR, incoming.l)); - fragment.h -= fragment.l; - fragment.l = 0; - } - - // coverage factor application - build_coverage_application(fragment, parts, regs); - - // alpha-test - build_alpha_test(fragment, parts); - - if (blend_needs_alpha_source) { - // We keep only 8 bits for the blending stage - const int shift = fragment.h <= 8 ? 0 : fragment.h-8; - if (fragment.flags & CORRUPTIBLE) { - fragment.flags &= ~CORRUPTIBLE; - mAlphaSource.setTo(fragment.reg, - fragment.size(), fragment.flags); - if (shift) { - MOV(AL, 0, mAlphaSource.reg, - reg_imm(mAlphaSource.reg, LSR, shift)); - } - } else { - // XXX: it would better to do this in build_blend_factor() - // so we can avoid the extra MOV below. - mAlphaSource.setTo(regs.obtain(), - fragment.size(), CORRUPTIBLE); - if (shift) { - MOV(AL, 0, mAlphaSource.reg, - reg_imm(fragment.reg, LSR, shift)); - } else { - MOV(AL, 0, mAlphaSource.reg, fragment.reg); - } - } - mAlphaSource.s -= shift; - } - } - - // fog... - build_fog( fragment, component, regs ); - - temp = fragment; - } else { - if (mInfo[component].inDest) { - // extraction not needed and replace - // we just select the right component - if ((mTextureMachine.replaced & component_mask) == 0) { - // component wasn't replaced, so use it! - temp = component_t(parts.iterated, component); - } - for (int i=0 ; i<GGL_TEXTURE_UNIT_COUNT ; i++) { - const texture_unit_t& tmu = mTextureMachine.tmu[i]; - if ((tmu.mask & component_mask) && - ((tmu.replaced & component_mask) == 0)) { - temp = component_t(parts.texel[i], component); - } - } - } - } -} - -bool GGLAssembler::isAlphaSourceNeeded() const -{ - // XXX: also needed for alpha-test - const int bs = mBlendSrc; - const int bd = mBlendDst; - return bs==GGL_SRC_ALPHA_SATURATE || - bs==GGL_SRC_ALPHA || bs==GGL_ONE_MINUS_SRC_ALPHA || - bd==GGL_SRC_ALPHA || bd==GGL_ONE_MINUS_SRC_ALPHA ; -} - -// --------------------------------------------------------------------------- - -void GGLAssembler::build_smooth_shade(const fragment_parts_t& parts) -{ - if (mSmooth && !parts.iterated_packed) { - // update the iterated color in a pipelined way... - comment("update iterated color"); - Scratch scratches(registerFile()); - - const int reload = parts.reload; - for (int i=0 ; i<4 ; i++) { - if (!mInfo[i].iterated) - continue; - - int c = parts.argb[i].reg; - int dx = parts.argb_dx[i].reg; - - if (reload & 1) { - c = scratches.obtain(); - CONTEXT_LOAD(c, generated_vars.argb[i].c); - } - if (reload & 2) { - dx = scratches.obtain(); - CONTEXT_LOAD(dx, generated_vars.argb[i].dx); - } - - if (mSmooth) { - ADD(AL, 0, c, c, dx); - } - - if (reload & 1) { - CONTEXT_STORE(c, generated_vars.argb[i].c); - scratches.recycle(c); - } - if (reload & 2) { - scratches.recycle(dx); - } - } - } -} - -// --------------------------------------------------------------------------- - -void GGLAssembler::build_coverage_application(component_t& fragment, - const fragment_parts_t& parts, Scratch& regs) -{ - // here fragment.l is guarenteed to be 0 - if (mAA) { - // coverages are 1.15 fixed-point numbers - comment("coverage application"); - - component_t incoming(fragment); - modify(fragment, regs); - - Scratch scratches(registerFile()); - int cf = scratches.obtain(); - LDRH(AL, cf, parts.covPtr.reg, immed8_post(2)); - if (fragment.h > 31) { - fragment.h--; - SMULWB(AL, fragment.reg, incoming.reg, cf); - } else { - MOV(AL, 0, fragment.reg, reg_imm(incoming.reg, LSL, 1)); - SMULWB(AL, fragment.reg, fragment.reg, cf); - } - } -} - -// --------------------------------------------------------------------------- - -void GGLAssembler::build_alpha_test(component_t& fragment, - const fragment_parts_t& parts) -{ - if (mAlphaTest != GGL_ALWAYS) { - comment("Alpha Test"); - Scratch scratches(registerFile()); - int ref = scratches.obtain(); - const int shift = GGL_COLOR_BITS-fragment.size(); - CONTEXT_LOAD(ref, state.alpha_test.ref); - if (shift) CMP(AL, fragment.reg, reg_imm(ref, LSR, shift)); - else CMP(AL, fragment.reg, ref); - int cc = NV; - switch (mAlphaTest) { - case GGL_NEVER: cc = NV; break; - case GGL_LESS: cc = LT; break; - case GGL_EQUAL: cc = EQ; break; - case GGL_LEQUAL: cc = LS; break; - case GGL_GREATER: cc = HI; break; - case GGL_NOTEQUAL: cc = NE; break; - case GGL_GEQUAL: cc = HS; break; - } - B(cc^1, "discard_after_textures"); - } -} - -// --------------------------------------------------------------------------- - -void GGLAssembler::build_depth_test( - const fragment_parts_t& parts, uint32_t mask) -{ - mask &= Z_TEST|Z_WRITE; - const needs_t& needs = mBuilderContext.needs; - const int zmask = GGL_READ_NEEDS(P_MASK_Z, needs.p); - Scratch scratches(registerFile()); - - if (mDepthTest != GGL_ALWAYS || zmask) { - int cc=AL, ic=AL; - switch (mDepthTest) { - case GGL_LESS: ic = HI; break; - case GGL_EQUAL: ic = EQ; break; - case GGL_LEQUAL: ic = HS; break; - case GGL_GREATER: ic = LT; break; - case GGL_NOTEQUAL: ic = NE; break; - case GGL_GEQUAL: ic = LS; break; - case GGL_NEVER: - // this never happens, because it's taken care of when - // computing the needs. but we keep it for completness. - comment("Depth Test (NEVER)"); - B(AL, "discard_before_textures"); - return; - case GGL_ALWAYS: - // we're here because zmask is enabled - mask &= ~Z_TEST; // test always passes. - break; - } - - // inverse the condition - cc = ic^1; - - if ((mask & Z_WRITE) && !zmask) { - mask &= ~Z_WRITE; - } - - if (!mask) - return; - - comment("Depth Test"); - - int zbase = scratches.obtain(); - int depth = scratches.obtain(); - int z = parts.z.reg; - - CONTEXT_LOAD(zbase, generated_vars.zbase); // stall - SUB(AL, 0, zbase, zbase, reg_imm(parts.count.reg, LSR, 15)); - // above does zbase = zbase + ((count >> 16) << 1) - - if (mask & Z_TEST) { - LDRH(AL, depth, zbase); // stall - CMP(AL, depth, reg_imm(z, LSR, 16)); - B(cc, "discard_before_textures"); - } - if (mask & Z_WRITE) { - if (mask == Z_WRITE) { - // only z-write asked, cc is meaningless - ic = AL; - } - MOV(AL, 0, depth, reg_imm(z, LSR, 16)); - STRH(ic, depth, zbase); - } - } -} - -void GGLAssembler::build_iterate_z(const fragment_parts_t& parts) -{ - const needs_t& needs = mBuilderContext.needs; - if ((mDepthTest != GGL_ALWAYS) || GGL_READ_NEEDS(P_MASK_Z, needs.p)) { - Scratch scratches(registerFile()); - int dzdx = scratches.obtain(); - CONTEXT_LOAD(dzdx, generated_vars.dzdx); // stall - ADD(AL, 0, parts.z.reg, parts.z.reg, dzdx); - } -} - -void GGLAssembler::build_iterate_f(const fragment_parts_t& parts) -{ - const needs_t& needs = mBuilderContext.needs; - if (GGL_READ_NEEDS(P_FOG, needs.p)) { - Scratch scratches(registerFile()); - int dfdx = scratches.obtain(); - int f = scratches.obtain(); - CONTEXT_LOAD(f, generated_vars.f); - CONTEXT_LOAD(dfdx, generated_vars.dfdx); // stall - ADD(AL, 0, f, f, dfdx); - CONTEXT_STORE(f, generated_vars.f); - } -} - -// --------------------------------------------------------------------------- - -void GGLAssembler::build_logic_op(pixel_t& pixel, Scratch& regs) -{ - const needs_t& needs = mBuilderContext.needs; - const int opcode = GGL_READ_NEEDS(LOGIC_OP, needs.n) | GGL_CLEAR; - if (opcode == GGL_COPY) - return; - - comment("logic operation"); - - pixel_t s(pixel); - if (!(pixel.flags & CORRUPTIBLE)) { - pixel.reg = regs.obtain(); - pixel.flags |= CORRUPTIBLE; - } - - pixel_t d(mDstPixel); - switch(opcode) { - case GGL_CLEAR: MOV(AL, 0, pixel.reg, imm(0)); break; - case GGL_AND: AND(AL, 0, pixel.reg, s.reg, d.reg); break; - case GGL_AND_REVERSE: BIC(AL, 0, pixel.reg, s.reg, d.reg); break; - case GGL_COPY: break; - case GGL_AND_INVERTED: BIC(AL, 0, pixel.reg, d.reg, s.reg); break; - case GGL_NOOP: MOV(AL, 0, pixel.reg, d.reg); break; - case GGL_XOR: EOR(AL, 0, pixel.reg, s.reg, d.reg); break; - case GGL_OR: ORR(AL, 0, pixel.reg, s.reg, d.reg); break; - case GGL_NOR: ORR(AL, 0, pixel.reg, s.reg, d.reg); - MVN(AL, 0, pixel.reg, pixel.reg); break; - case GGL_EQUIV: EOR(AL, 0, pixel.reg, s.reg, d.reg); - MVN(AL, 0, pixel.reg, pixel.reg); break; - case GGL_INVERT: MVN(AL, 0, pixel.reg, d.reg); break; - case GGL_OR_REVERSE: // s | ~d == ~(~s & d) - BIC(AL, 0, pixel.reg, d.reg, s.reg); - MVN(AL, 0, pixel.reg, pixel.reg); break; - case GGL_COPY_INVERTED: MVN(AL, 0, pixel.reg, s.reg); break; - case GGL_OR_INVERTED: // ~s | d == ~(s & ~d) - BIC(AL, 0, pixel.reg, s.reg, d.reg); - MVN(AL, 0, pixel.reg, pixel.reg); break; - case GGL_NAND: AND(AL, 0, pixel.reg, s.reg, d.reg); - MVN(AL, 0, pixel.reg, pixel.reg); break; - case GGL_SET: MVN(AL, 0, pixel.reg, imm(0)); break; - }; -} - -// --------------------------------------------------------------------------- - -static uint32_t find_bottom(uint32_t val) -{ - uint32_t i = 0; - while (!(val & (3<<i))) - i+= 2; - return i; -} - -static void normalize(uint32_t& val, uint32_t& rot) -{ - rot = 0; - while (!(val&3) || (val & 0xFC000000)) { - uint32_t newval; - newval = val >> 2; - newval |= (val&3) << 30; - val = newval; - rot += 2; - if (rot == 32) { - rot = 0; - break; - } - } -} - -void GGLAssembler::build_and_immediate(int d, int s, uint32_t mask, int bits) -{ - uint32_t rot; - uint32_t size = ((bits>=32) ? 0 : (1LU << bits)) - 1; - mask &= size; - - if (mask == size) { - if (d != s) - MOV( AL, 0, d, s); - return; - } - - int negative_logic = !isValidImmediate(mask); - if (negative_logic) { - mask = ~mask & size; - } - normalize(mask, rot); - - if (mask) { - while (mask) { - uint32_t bitpos = find_bottom(mask); - int shift = rot + bitpos; - uint32_t m = mask & (0xff << bitpos); - mask &= ~m; - m >>= bitpos; - int32_t newMask = (m<<shift) | (m>>(32-shift)); - if (!negative_logic) { - AND( AL, 0, d, s, imm(newMask) ); - } else { - BIC( AL, 0, d, s, imm(newMask) ); - } - s = d; - } - } else { - MOV( AL, 0, d, imm(0)); - } -} - -void GGLAssembler::build_masking(pixel_t& pixel, Scratch& regs) -{ - if (!mMasking || mAllMasked) { - return; - } - - comment("color mask"); - - pixel_t fb(mDstPixel); - pixel_t s(pixel); - if (!(pixel.flags & CORRUPTIBLE)) { - pixel.reg = regs.obtain(); - pixel.flags |= CORRUPTIBLE; - } - - int mask = 0; - for (int i=0 ; i<4 ; i++) { - const int component_mask = 1<<i; - const int h = fb.format.c[i].h; - const int l = fb.format.c[i].l; - if (h && (!(mMasking & component_mask))) { - mask |= ((1<<(h-l))-1) << l; - } - } - - // There is no need to clear the masked components of the source - // (unless we applied a logic op), because they're already zeroed - // by construction (masked components are not computed) - - if (mLogicOp) { - const needs_t& needs = mBuilderContext.needs; - const int opcode = GGL_READ_NEEDS(LOGIC_OP, needs.n) | GGL_CLEAR; - if (opcode != GGL_CLEAR) { - // clear masked component of source - build_and_immediate(pixel.reg, s.reg, mask, fb.size()); - s = pixel; - } - } - - // clear non masked components of destination - build_and_immediate(fb.reg, fb.reg, ~mask, fb.size()); - - // or back the channels that were masked - if (s.reg == fb.reg) { - // this is in fact a MOV - if (s.reg == pixel.reg) { - // ugh. this in in fact a nop - } else { - MOV(AL, 0, pixel.reg, fb.reg); - } - } else { - ORR(AL, 0, pixel.reg, s.reg, fb.reg); - } -} - -// --------------------------------------------------------------------------- - -void GGLAssembler::base_offset( - const pointer_t& d, const pointer_t& b, const reg_t& o) -{ - switch (b.size) { - case 32: - ADD(AL, 0, d.reg, b.reg, reg_imm(o.reg, LSL, 2)); - break; - case 24: - if (d.reg == b.reg) { - ADD(AL, 0, d.reg, b.reg, reg_imm(o.reg, LSL, 1)); - ADD(AL, 0, d.reg, d.reg, o.reg); - } else { - ADD(AL, 0, d.reg, o.reg, reg_imm(o.reg, LSL, 1)); - ADD(AL, 0, d.reg, d.reg, b.reg); - } - break; - case 16: - ADD(AL, 0, d.reg, b.reg, reg_imm(o.reg, LSL, 1)); - break; - case 8: - ADD(AL, 0, d.reg, b.reg, o.reg); - break; - } -} - -// ---------------------------------------------------------------------------- -// cheezy register allocator... -// ---------------------------------------------------------------------------- - -void RegisterAllocator::reset() -{ - mRegs.reset(); -} - -int RegisterAllocator::reserveReg(int reg) -{ - return mRegs.reserve(reg); -} - -int RegisterAllocator::obtainReg() -{ - return mRegs.obtain(); -} - -void RegisterAllocator::recycleReg(int reg) -{ - mRegs.recycle(reg); -} - -RegisterAllocator::RegisterFile& RegisterAllocator::registerFile() -{ - return mRegs; -} - -// ---------------------------------------------------------------------------- - -RegisterAllocator::RegisterFile::RegisterFile() - : mRegs(0), mTouched(0), mStatus(0) -{ - reserve(ARMAssemblerInterface::SP); - reserve(ARMAssemblerInterface::PC); -} - -RegisterAllocator::RegisterFile::RegisterFile(const RegisterFile& rhs) - : mRegs(rhs.mRegs), mTouched(rhs.mTouched) -{ -} - -RegisterAllocator::RegisterFile::~RegisterFile() -{ -} - -bool RegisterAllocator::RegisterFile::operator == (const RegisterFile& rhs) const -{ - return (mRegs == rhs.mRegs); -} - -void RegisterAllocator::RegisterFile::reset() -{ - mRegs = mTouched = mStatus = 0; - reserve(ARMAssemblerInterface::SP); - reserve(ARMAssemblerInterface::PC); -} - -int RegisterAllocator::RegisterFile::reserve(int reg) -{ - LOG_ALWAYS_FATAL_IF(isUsed(reg), - "reserving register %d, but already in use", - reg); - mRegs |= (1<<reg); - mTouched |= mRegs; - return reg; -} - -void RegisterAllocator::RegisterFile::reserveSeveral(uint32_t regMask) -{ - mRegs |= regMask; - mTouched |= regMask; -} - -int RegisterAllocator::RegisterFile::isUsed(int reg) const -{ - LOG_ALWAYS_FATAL_IF(reg>=16, "invalid register %d", reg); - return mRegs & (1<<reg); -} - -int RegisterAllocator::RegisterFile::obtain() -{ - const char priorityList[14] = { 0, 1, 2, 3, - 12, 14, 4, 5, - 6, 7, 8, 9, - 10, 11 }; - const int nbreg = sizeof(priorityList); - int i, r; - for (i=0 ; i<nbreg ; i++) { - r = priorityList[i]; - if (!isUsed(r)) { - break; - } - } - // this is not an error anymore because, we'll try again with - // a lower optimization level. - //LOGE_IF(i >= nbreg, "pixelflinger ran out of registers\n"); - if (i >= nbreg) { - mStatus |= OUT_OF_REGISTERS; - // we return SP so we can more easily debug things - // the code will never be run anyway. - return ARMAssemblerInterface::SP; - } - reserve(r); - return r; -} - -bool RegisterAllocator::RegisterFile::hasFreeRegs() const -{ - return ((mRegs & 0xFFFF) == 0xFFFF) ? false : true; -} - -int RegisterAllocator::RegisterFile::countFreeRegs() const -{ - int f = ~mRegs & 0xFFFF; - // now count number of 1 - f = (f & 0x5555) + ((f>>1) & 0x5555); - f = (f & 0x3333) + ((f>>2) & 0x3333); - f = (f & 0x0F0F) + ((f>>4) & 0x0F0F); - f = (f & 0x00FF) + ((f>>8) & 0x00FF); - return f; -} - -void RegisterAllocator::RegisterFile::recycle(int reg) -{ - LOG_FATAL_IF(!isUsed(reg), - "recycling unallocated register %d", - reg); - mRegs &= ~(1<<reg); -} - -void RegisterAllocator::RegisterFile::recycleSeveral(uint32_t regMask) -{ - LOG_FATAL_IF((mRegs & regMask)!=regMask, - "recycling unallocated registers " - "(recycle=%08x, allocated=%08x, unallocated=%08x)", - regMask, mRegs, mRegs®Mask); - mRegs &= ~regMask; -} - -uint32_t RegisterAllocator::RegisterFile::touched() const -{ - return mTouched; -} - -// ---------------------------------------------------------------------------- - -}; // namespace android - diff --git a/libpixelflinger/codeflinger/GGLAssembler.h b/libpixelflinger/codeflinger/GGLAssembler.h deleted file mode 100644 index d1d29f0b..00000000 --- a/libpixelflinger/codeflinger/GGLAssembler.h +++ /dev/null @@ -1,554 +0,0 @@ -/* libs/pixelflinger/codeflinger/GGLAssembler.h -** -** Copyright 2006, The Android Open Source Project -** -** Licensed under the Apache License, Version 2.0 (the "License"); -** you may not use this file except in compliance with the License. -** You may obtain a copy of the License at -** -** http://www.apache.org/licenses/LICENSE-2.0 -** -** Unless required by applicable law or agreed to in writing, software -** distributed under the License is distributed on an "AS IS" BASIS, -** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -** See the License for the specific language governing permissions and -** limitations under the License. -*/ - - -#ifndef ANDROID_GGLASSEMBLER_H -#define ANDROID_GGLASSEMBLER_H - -#include <stdint.h> -#include <sys/types.h> - -#include <private/pixelflinger/ggl_context.h> - -#include "codeflinger/ARMAssemblerProxy.h" - - -namespace android { - -// ---------------------------------------------------------------------------- - -#define CONTEXT_LOAD(REG, FIELD) \ - LDR(AL, REG, mBuilderContext.Rctx, immed12_pre(GGL_OFFSETOF(FIELD))) - -#define CONTEXT_STORE(REG, FIELD) \ - STR(AL, REG, mBuilderContext.Rctx, immed12_pre(GGL_OFFSETOF(FIELD))) - - -class RegisterAllocator -{ -public: - class RegisterFile; - - RegisterFile& registerFile(); - int reserveReg(int reg); - int obtainReg(); - void recycleReg(int reg); - void reset(); - - class RegisterFile - { - public: - RegisterFile(); - RegisterFile(const RegisterFile& rhs); - ~RegisterFile(); - - void reset(); - - bool operator == (const RegisterFile& rhs) const; - bool operator != (const RegisterFile& rhs) const { - return !operator == (rhs); - } - - int reserve(int reg); - void reserveSeveral(uint32_t regMask); - - void recycle(int reg); - void recycleSeveral(uint32_t regMask); - - int obtain(); - inline int isUsed(int reg) const; - - bool hasFreeRegs() const; - int countFreeRegs() const; - - uint32_t touched() const; - inline uint32_t status() const { return mStatus; } - - enum { - OUT_OF_REGISTERS = 0x1 - }; - - private: - uint32_t mRegs; - uint32_t mTouched; - uint32_t mStatus; - }; - - class Scratch - { - public: - Scratch(RegisterFile& regFile) - : mRegFile(regFile), mScratch(0) { - } - ~Scratch() { - mRegFile.recycleSeveral(mScratch); - } - int obtain() { - int reg = mRegFile.obtain(); - mScratch |= 1<<reg; - return reg; - } - void recycle(int reg) { - mRegFile.recycle(reg); - mScratch &= ~(1<<reg); - } - bool isUsed(int reg) { - return (mScratch & (1<<reg)); - } - int countFreeRegs() { - return mRegFile.countFreeRegs(); - } - private: - RegisterFile& mRegFile; - uint32_t mScratch; - }; - - class Spill - { - public: - Spill(RegisterFile& regFile, ARMAssemblerInterface& gen, uint32_t reglist) - : mRegFile(regFile), mGen(gen), mRegList(reglist), mCount(0) - { - if (reglist) { - int count = 0; - while (reglist) { - count++; - reglist &= ~(1 << (31 - __builtin_clz(reglist))); - } - if (count == 1) { - int reg = 31 - __builtin_clz(mRegList); - mGen.STR(mGen.AL, reg, mGen.SP, mGen.immed12_pre(-4, 1)); - } else { - mGen.STM(mGen.AL, mGen.DB, mGen.SP, 1, mRegList); - } - mRegFile.recycleSeveral(mRegList); - mCount = count; - } - } - ~Spill() { - if (mRegList) { - if (mCount == 1) { - int reg = 31 - __builtin_clz(mRegList); - mGen.LDR(mGen.AL, reg, mGen.SP, mGen.immed12_post(4)); - } else { - mGen.LDM(mGen.AL, mGen.IA, mGen.SP, 1, mRegList); - } - mRegFile.reserveSeveral(mRegList); - } - } - private: - RegisterFile& mRegFile; - ARMAssemblerInterface& mGen; - uint32_t mRegList; - int mCount; - }; - -private: - RegisterFile mRegs; -}; - -// ---------------------------------------------------------------------------- - -class GGLAssembler : public ARMAssemblerProxy, public RegisterAllocator -{ -public: - - GGLAssembler(ARMAssemblerInterface* target); - virtual ~GGLAssembler(); - - uint32_t* base() const { return 0; } // XXX - uint32_t* pc() const { return 0; } // XXX - - void reset(int opt_level); - - virtual void prolog(); - virtual void epilog(uint32_t touched); - - // generate scanline code for given needs - int scanline(const needs_t& needs, context_t const* c); - int scanline_core(const needs_t& needs, context_t const* c); - - enum { - CLEAR_LO = 0x0001, - CLEAR_HI = 0x0002, - CORRUPTIBLE = 0x0004, - FIRST = 0x0008 - }; - - enum { //load/store flags - WRITE_BACK = 0x0001 - }; - - struct reg_t { - reg_t() : reg(-1), flags(0) { - } - reg_t(int r, int f=0) - : reg(r), flags(f) { - } - void setTo(int r, int f=0) { - reg=r; flags=f; - } - int reg; - uint16_t flags; - }; - - struct integer_t : public reg_t { - integer_t() : reg_t(), s(0) { - } - integer_t(int r, int sz=32, int f=0) - : reg_t(r, f), s(sz) { - } - void setTo(int r, int sz=32, int f=0) { - reg_t::setTo(r, f); s=sz; - } - int8_t s; - inline int size() const { return s; } - }; - - struct pixel_t : public reg_t { - pixel_t() : reg_t() { - memset(&format, 0, sizeof(GGLFormat)); - } - pixel_t(int r, const GGLFormat* fmt, int f=0) - : reg_t(r, f), format(*fmt) { - } - void setTo(int r, const GGLFormat* fmt, int f=0) { - reg_t::setTo(r, f); format = *fmt; - } - GGLFormat format; - inline int hi(int c) const { return format.c[c].h; } - inline int low(int c) const { return format.c[c].l; } - inline int mask(int c) const { return ((1<<size(c))-1) << low(c); } - inline int size() const { return format.size*8; } - inline int size(int c) const { return component_size(c); } - inline int component_size(int c) const { return hi(c) - low(c); } - }; - - struct component_t : public reg_t { - component_t() : reg_t(), h(0), l(0) { - } - component_t(int r, int f=0) - : reg_t(r, f), h(0), l(0) { - } - component_t(int r, int lo, int hi, int f=0) - : reg_t(r, f), h(hi), l(lo) { - } - explicit component_t(const integer_t& rhs) - : reg_t(rhs.reg, rhs.flags), h(rhs.s), l(0) { - } - explicit component_t(const pixel_t& rhs, int component) { - setTo( rhs.reg, - rhs.format.c[component].l, - rhs.format.c[component].h, - rhs.flags|CLEAR_LO|CLEAR_HI); - } - void setTo(int r, int lo=0, int hi=0, int f=0) { - reg_t::setTo(r, f); h=hi; l=lo; - } - int8_t h; - int8_t l; - inline int size() const { return h-l; } - }; - - struct pointer_t : public reg_t { - pointer_t() : reg_t(), size(0) { - } - pointer_t(int r, int s, int f=0) - : reg_t(r, f), size(s) { - } - void setTo(int r, int s, int f=0) { - reg_t::setTo(r, f); size=s; - } - int8_t size; - }; - - -private: - struct tex_coord_t { - reg_t s; - reg_t t; - pointer_t ptr; - }; - - struct fragment_parts_t { - uint32_t packed : 1; - uint32_t reload : 2; - uint32_t iterated_packed : 1; - pixel_t iterated; - pointer_t cbPtr; - pointer_t covPtr; - reg_t count; - reg_t argb[4]; - reg_t argb_dx[4]; - reg_t z; - reg_t dither; - pixel_t texel[GGL_TEXTURE_UNIT_COUNT]; - tex_coord_t coords[GGL_TEXTURE_UNIT_COUNT]; - }; - - struct texture_unit_t { - int format_idx; - GGLFormat format; - int bits; - int swrap; - int twrap; - int env; - int pot; - int linear; - uint8_t mask; - uint8_t replaced; - }; - - struct texture_machine_t { - texture_unit_t tmu[GGL_TEXTURE_UNIT_COUNT]; - uint8_t mask; - uint8_t replaced; - uint8_t directTexture; - uint8_t activeUnits; - }; - - struct component_info_t { - bool masked : 1; - bool inDest : 1; - bool needed : 1; - bool replaced : 1; - bool iterated : 1; - bool smooth : 1; - bool blend : 1; - bool fog : 1; - }; - - struct builder_context_t { - context_t const* c; - needs_t needs; - int Rctx; - }; - - template <typename T> - void modify(T& r, Scratch& regs) - { - if (!(r.flags & CORRUPTIBLE)) { - r.reg = regs.obtain(); - r.flags |= CORRUPTIBLE; - } - } - - // helpers - void base_offset(const pointer_t& d, const pointer_t& b, const reg_t& o); - - // texture environement - void modulate( component_t& dest, - const component_t& incoming, - const pixel_t& texel, int component); - - void decal( component_t& dest, - const component_t& incoming, - const pixel_t& texel, int component); - - void blend( component_t& dest, - const component_t& incoming, - const pixel_t& texel, int component, int tmu); - - void add( component_t& dest, - const component_t& incoming, - const pixel_t& texel, int component); - - // load/store stuff - void store(const pointer_t& addr, const pixel_t& src, uint32_t flags=0); - void load(const pointer_t& addr, const pixel_t& dest, uint32_t flags=0); - void extract(integer_t& d, const pixel_t& s, int component); - void extract(component_t& d, const pixel_t& s, int component); - void extract(integer_t& d, int s, int h, int l, int bits=32); - void expand(integer_t& d, const integer_t& s, int dbits); - void expand(integer_t& d, const component_t& s, int dbits); - void expand(component_t& d, const component_t& s, int dbits); - void downshift(pixel_t& d, int component, component_t s, const reg_t& dither); - - - void mul_factor( component_t& d, - const integer_t& v, - const integer_t& f); - - void mul_factor_add( component_t& d, - const integer_t& v, - const integer_t& f, - const component_t& a); - - void component_add( component_t& d, - const integer_t& dst, - const integer_t& src); - - void component_sat( const component_t& v); - - - void build_scanline_prolog( fragment_parts_t& parts, - const needs_t& needs); - - void build_smooth_shade(const fragment_parts_t& parts); - - void build_component( pixel_t& pixel, - const fragment_parts_t& parts, - int component, - Scratch& global_scratches); - - void build_incoming_component( - component_t& temp, - int dst_size, - const fragment_parts_t& parts, - int component, - Scratch& scratches, - Scratch& global_scratches); - - void init_iterated_color(fragment_parts_t& parts, const reg_t& x); - - void build_iterated_color( component_t& fragment, - const fragment_parts_t& parts, - int component, - Scratch& regs); - - void decodeLogicOpNeeds(const needs_t& needs); - - void decodeTMUNeeds(const needs_t& needs, context_t const* c); - - void init_textures( tex_coord_t* coords, - const reg_t& x, - const reg_t& y); - - void build_textures( fragment_parts_t& parts, - Scratch& regs); - - void filter8( const fragment_parts_t& parts, - pixel_t& texel, const texture_unit_t& tmu, - int U, int V, pointer_t& txPtr, - int FRAC_BITS); - - void filter16( const fragment_parts_t& parts, - pixel_t& texel, const texture_unit_t& tmu, - int U, int V, pointer_t& txPtr, - int FRAC_BITS); - - void filter24( const fragment_parts_t& parts, - pixel_t& texel, const texture_unit_t& tmu, - int U, int V, pointer_t& txPtr, - int FRAC_BITS); - - void filter32( const fragment_parts_t& parts, - pixel_t& texel, const texture_unit_t& tmu, - int U, int V, pointer_t& txPtr, - int FRAC_BITS); - - void build_texture_environment( component_t& fragment, - const fragment_parts_t& parts, - int component, - Scratch& regs); - - void wrapping( int d, - int coord, int size, - int tx_wrap, int tx_linear); - - void build_fog( component_t& temp, - int component, - Scratch& parent_scratches); - - void build_blending( component_t& in_out, - const pixel_t& pixel, - int component, - Scratch& parent_scratches); - - void build_blend_factor( - integer_t& factor, int f, int component, - const pixel_t& dst_pixel, - integer_t& fragment, - integer_t& fb, - Scratch& scratches); - - void build_blendFOneMinusF( component_t& temp, - const integer_t& factor, - const integer_t& fragment, - const integer_t& fb); - - void build_blendOneMinusFF( component_t& temp, - const integer_t& factor, - const integer_t& fragment, - const integer_t& fb); - - void build_coverage_application(component_t& fragment, - const fragment_parts_t& parts, - Scratch& regs); - - void build_alpha_test(component_t& fragment, const fragment_parts_t& parts); - - enum { Z_TEST=1, Z_WRITE=2 }; - void build_depth_test(const fragment_parts_t& parts, uint32_t mask); - void build_iterate_z(const fragment_parts_t& parts); - void build_iterate_f(const fragment_parts_t& parts); - void build_iterate_texture_coordinates(const fragment_parts_t& parts); - - void build_logic_op(pixel_t& pixel, Scratch& regs); - - void build_masking(pixel_t& pixel, Scratch& regs); - - void build_and_immediate(int d, int s, uint32_t mask, int bits); - - bool isAlphaSourceNeeded() const; - - enum { - FACTOR_SRC=1, FACTOR_DST=2, BLEND_SRC=4, BLEND_DST=8 - }; - - enum { - LOGIC_OP=1, LOGIC_OP_SRC=2, LOGIC_OP_DST=4 - }; - - static int blending_codes(int fs, int fd); - - builder_context_t mBuilderContext; - texture_machine_t mTextureMachine; - component_info_t mInfo[4]; - int mBlending; - int mMasking; - int mAllMasked; - int mLogicOp; - int mAlphaTest; - int mAA; - int mDithering; - int mDepthTest; - - int mSmooth; - int mFog; - pixel_t mDstPixel; - - GGLFormat mCbFormat; - - int mBlendFactorCached; - integer_t mAlphaSource; - - int mBaseRegister; - - int mBlendSrc; - int mBlendDst; - int mBlendSrcA; - int mBlendDstA; - - int mOptLevel; -}; - -// ---------------------------------------------------------------------------- - -}; // namespace android - -#endif // ANDROID_GGLASSEMBLER_H diff --git a/libpixelflinger/codeflinger/armreg.h b/libpixelflinger/codeflinger/armreg.h deleted file mode 100644 index fde81ba8..00000000 --- a/libpixelflinger/codeflinger/armreg.h +++ /dev/null @@ -1,300 +0,0 @@ -/* $NetBSD: armreg.h,v 1.28 2003/10/31 16:30:15 scw Exp $ */ - -/*- - * Copyright (c) 1998, 2001 Ben Harris - * Copyright (c) 1994-1996 Mark Brinicombe. - * Copyright (c) 1994 Brini. - * All rights reserved. - * - * This code is derived from software written for Brini by Mark Brinicombe - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * This product includes software developed by Brini. - * 4. The name of the company nor the name of the author may be used to - * endorse or promote products derived from this software without specific - * prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY BRINI ``AS IS'' AND ANY EXPRESS OR IMPLIED - * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF - * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. - * IN NO EVENT SHALL BRINI OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, - * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR - * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * $FreeBSD: /repoman/r/ncvs/src/sys/arm/include/armreg.h,v 1.3 2005/11/21 19:06:25 cognet Exp $ - */ - -#ifndef MACHINE_ARMREG_H -#define MACHINE_ARMREG_H -#define INSN_SIZE 4 -#define INSN_COND_MASK 0xf0000000 /* Condition mask */ -#define PSR_MODE 0x0000001f /* mode mask */ -#define PSR_USR26_MODE 0x00000000 -#define PSR_FIQ26_MODE 0x00000001 -#define PSR_IRQ26_MODE 0x00000002 -#define PSR_SVC26_MODE 0x00000003 -#define PSR_USR32_MODE 0x00000010 -#define PSR_FIQ32_MODE 0x00000011 -#define PSR_IRQ32_MODE 0x00000012 -#define PSR_SVC32_MODE 0x00000013 -#define PSR_ABT32_MODE 0x00000017 -#define PSR_UND32_MODE 0x0000001b -#define PSR_SYS32_MODE 0x0000001f -#define PSR_32_MODE 0x00000010 -#define PSR_FLAGS 0xf0000000 /* flags */ - -#define PSR_C_bit (1 << 29) /* carry */ - -/* The high-order byte is always the implementor */ -#define CPU_ID_IMPLEMENTOR_MASK 0xff000000 -#define CPU_ID_ARM_LTD 0x41000000 /* 'A' */ -#define CPU_ID_DEC 0x44000000 /* 'D' */ -#define CPU_ID_INTEL 0x69000000 /* 'i' */ -#define CPU_ID_TI 0x54000000 /* 'T' */ - -/* How to decide what format the CPUID is in. */ -#define CPU_ID_ISOLD(x) (((x) & 0x0000f000) == 0x00000000) -#define CPU_ID_IS7(x) (((x) & 0x0000f000) == 0x00007000) -#define CPU_ID_ISNEW(x) (!CPU_ID_ISOLD(x) && !CPU_ID_IS7(x)) - -/* On ARM3 and ARM6, this byte holds the foundry ID. */ -#define CPU_ID_FOUNDRY_MASK 0x00ff0000 -#define CPU_ID_FOUNDRY_VLSI 0x00560000 - -/* On ARM7 it holds the architecture and variant (sub-model) */ -#define CPU_ID_7ARCH_MASK 0x00800000 -#define CPU_ID_7ARCH_V3 0x00000000 -#define CPU_ID_7ARCH_V4T 0x00800000 -#define CPU_ID_7VARIANT_MASK 0x007f0000 - -/* On more recent ARMs, it does the same, but in a different format */ -#define CPU_ID_ARCH_MASK 0x000f0000 -#define CPU_ID_ARCH_V3 0x00000000 -#define CPU_ID_ARCH_V4 0x00010000 -#define CPU_ID_ARCH_V4T 0x00020000 -#define CPU_ID_ARCH_V5 0x00030000 -#define CPU_ID_ARCH_V5T 0x00040000 -#define CPU_ID_ARCH_V5TE 0x00050000 -#define CPU_ID_VARIANT_MASK 0x00f00000 - -/* Next three nybbles are part number */ -#define CPU_ID_PARTNO_MASK 0x0000fff0 - -/* Intel XScale has sub fields in part number */ -#define CPU_ID_XSCALE_COREGEN_MASK 0x0000e000 /* core generation */ -#define CPU_ID_XSCALE_COREREV_MASK 0x00001c00 /* core revision */ -#define CPU_ID_XSCALE_PRODUCT_MASK 0x000003f0 /* product number */ - -/* And finally, the revision number. */ -#define CPU_ID_REVISION_MASK 0x0000000f - -/* Individual CPUs are probably best IDed by everything but the revision. */ -#define CPU_ID_CPU_MASK 0xfffffff0 - -/* Fake CPU IDs for ARMs without CP15 */ -#define CPU_ID_ARM2 0x41560200 -#define CPU_ID_ARM250 0x41560250 - -/* Pre-ARM7 CPUs -- [15:12] == 0 */ -#define CPU_ID_ARM3 0x41560300 -#define CPU_ID_ARM600 0x41560600 -#define CPU_ID_ARM610 0x41560610 -#define CPU_ID_ARM620 0x41560620 - -/* ARM7 CPUs -- [15:12] == 7 */ -#define CPU_ID_ARM700 0x41007000 /* XXX This is a guess. */ -#define CPU_ID_ARM710 0x41007100 -#define CPU_ID_ARM7500 0x41027100 /* XXX This is a guess. */ -#define CPU_ID_ARM710A 0x41047100 /* inc ARM7100 */ -#define CPU_ID_ARM7500FE 0x41077100 -#define CPU_ID_ARM710T 0x41807100 -#define CPU_ID_ARM720T 0x41807200 -#define CPU_ID_ARM740T8K 0x41807400 /* XXX no MMU, 8KB cache */ -#define CPU_ID_ARM740T4K 0x41817400 /* XXX no MMU, 4KB cache */ - -/* Post-ARM7 CPUs */ -#define CPU_ID_ARM810 0x41018100 -#define CPU_ID_ARM920T 0x41129200 -#define CPU_ID_ARM920T_ALT 0x41009200 -#define CPU_ID_ARM922T 0x41029220 -#define CPU_ID_ARM940T 0x41029400 /* XXX no MMU */ -#define CPU_ID_ARM946ES 0x41049460 /* XXX no MMU */ -#define CPU_ID_ARM966ES 0x41049660 /* XXX no MMU */ -#define CPU_ID_ARM966ESR1 0x41059660 /* XXX no MMU */ -#define CPU_ID_ARM1020E 0x4115a200 /* (AKA arm10 rev 1) */ -#define CPU_ID_ARM1022ES 0x4105a220 -#define CPU_ID_SA110 0x4401a100 -#define CPU_ID_SA1100 0x4401a110 -#define CPU_ID_TI925T 0x54029250 -#define CPU_ID_SA1110 0x6901b110 -#define CPU_ID_IXP1200 0x6901c120 -#define CPU_ID_80200 0x69052000 -#define CPU_ID_PXA250 0x69052100 /* sans core revision */ -#define CPU_ID_PXA210 0x69052120 -#define CPU_ID_PXA250A 0x69052100 /* 1st version Core */ -#define CPU_ID_PXA210A 0x69052120 /* 1st version Core */ -#define CPU_ID_PXA250B 0x69052900 /* 3rd version Core */ -#define CPU_ID_PXA210B 0x69052920 /* 3rd version Core */ -#define CPU_ID_PXA250C 0x69052d00 /* 4th version Core */ -#define CPU_ID_PXA210C 0x69052d20 /* 4th version Core */ -#define CPU_ID_80321_400 0x69052420 -#define CPU_ID_80321_600 0x69052430 -#define CPU_ID_80321_400_B0 0x69052c20 -#define CPU_ID_80321_600_B0 0x69052c30 -#define CPU_ID_IXP425_533 0x690541c0 -#define CPU_ID_IXP425_400 0x690541d0 -#define CPU_ID_IXP425_266 0x690541f0 - -/* ARM3-specific coprocessor 15 registers */ -#define ARM3_CP15_FLUSH 1 -#define ARM3_CP15_CONTROL 2 -#define ARM3_CP15_CACHEABLE 3 -#define ARM3_CP15_UPDATEABLE 4 -#define ARM3_CP15_DISRUPTIVE 5 - -/* ARM3 Control register bits */ -#define ARM3_CTL_CACHE_ON 0x00000001 -#define ARM3_CTL_SHARED 0x00000002 -#define ARM3_CTL_MONITOR 0x00000004 - -/* - * Post-ARM3 CP15 registers: - * - * 1 Control register - * - * 2 Translation Table Base - * - * 3 Domain Access Control - * - * 4 Reserved - * - * 5 Fault Status - * - * 6 Fault Address - * - * 7 Cache/write-buffer Control - * - * 8 TLB Control - * - * 9 Cache Lockdown - * - * 10 TLB Lockdown - * - * 11 Reserved - * - * 12 Reserved - * - * 13 Process ID (for FCSE) - * - * 14 Reserved - * - * 15 Implementation Dependent - */ - -/* Some of the definitions below need cleaning up for V3/V4 architectures */ - -/* CPU control register (CP15 register 1) */ -#define CPU_CONTROL_MMU_ENABLE 0x00000001 /* M: MMU/Protection unit enable */ -#define CPU_CONTROL_AFLT_ENABLE 0x00000002 /* A: Alignment fault enable */ -#define CPU_CONTROL_DC_ENABLE 0x00000004 /* C: IDC/DC enable */ -#define CPU_CONTROL_WBUF_ENABLE 0x00000008 /* W: Write buffer enable */ -#define CPU_CONTROL_32BP_ENABLE 0x00000010 /* P: 32-bit exception handlers */ -#define CPU_CONTROL_32BD_ENABLE 0x00000020 /* D: 32-bit addressing */ -#define CPU_CONTROL_LABT_ENABLE 0x00000040 /* L: Late abort enable */ -#define CPU_CONTROL_BEND_ENABLE 0x00000080 /* B: Big-endian mode */ -#define CPU_CONTROL_SYST_ENABLE 0x00000100 /* S: System protection bit */ -#define CPU_CONTROL_ROM_ENABLE 0x00000200 /* R: ROM protection bit */ -#define CPU_CONTROL_CPCLK 0x00000400 /* F: Implementation defined */ -#define CPU_CONTROL_BPRD_ENABLE 0x00000800 /* Z: Branch prediction enable */ -#define CPU_CONTROL_IC_ENABLE 0x00001000 /* I: IC enable */ -#define CPU_CONTROL_VECRELOC 0x00002000 /* V: Vector relocation */ -#define CPU_CONTROL_ROUNDROBIN 0x00004000 /* RR: Predictable replacement */ -#define CPU_CONTROL_V4COMPAT 0x00008000 /* L4: ARMv4 compat LDR R15 etc */ - -#define CPU_CONTROL_IDC_ENABLE CPU_CONTROL_DC_ENABLE - -/* XScale Auxillary Control Register (CP15 register 1, opcode2 1) */ -#define XSCALE_AUXCTL_K 0x00000001 /* dis. write buffer coalescing */ -#define XSCALE_AUXCTL_P 0x00000002 /* ECC protect page table access */ -#define XSCALE_AUXCTL_MD_WB_RA 0x00000000 /* mini-D$ wb, read-allocate */ -#define XSCALE_AUXCTL_MD_WB_RWA 0x00000010 /* mini-D$ wb, read/write-allocate */ -#define XSCALE_AUXCTL_MD_WT 0x00000020 /* mini-D$ wt, read-allocate */ -#define XSCALE_AUXCTL_MD_MASK 0x00000030 - -/* Cache type register definitions */ -#define CPU_CT_ISIZE(x) ((x) & 0xfff) /* I$ info */ -#define CPU_CT_DSIZE(x) (((x) >> 12) & 0xfff) /* D$ info */ -#define CPU_CT_S (1U << 24) /* split cache */ -#define CPU_CT_CTYPE(x) (((x) >> 25) & 0xf) /* cache type */ - -#define CPU_CT_CTYPE_WT 0 /* write-through */ -#define CPU_CT_CTYPE_WB1 1 /* write-back, clean w/ read */ -#define CPU_CT_CTYPE_WB2 2 /* w/b, clean w/ cp15,7 */ -#define CPU_CT_CTYPE_WB6 6 /* w/b, cp15,7, lockdown fmt A */ -#define CPU_CT_CTYPE_WB7 7 /* w/b, cp15,7, lockdown fmt B */ - -#define CPU_CT_xSIZE_LEN(x) ((x) & 0x3) /* line size */ -#define CPU_CT_xSIZE_M (1U << 2) /* multiplier */ -#define CPU_CT_xSIZE_ASSOC(x) (((x) >> 3) & 0x7) /* associativity */ -#define CPU_CT_xSIZE_SIZE(x) (((x) >> 6) & 0x7) /* size */ - -/* Fault status register definitions */ - -#define FAULT_TYPE_MASK 0x0f -#define FAULT_USER 0x10 - -#define FAULT_WRTBUF_0 0x00 /* Vector Exception */ -#define FAULT_WRTBUF_1 0x02 /* Terminal Exception */ -#define FAULT_BUSERR_0 0x04 /* External Abort on Linefetch -- Section */ -#define FAULT_BUSERR_1 0x06 /* External Abort on Linefetch -- Page */ -#define FAULT_BUSERR_2 0x08 /* External Abort on Non-linefetch -- Section */ -#define FAULT_BUSERR_3 0x0a /* External Abort on Non-linefetch -- Page */ -#define FAULT_BUSTRNL1 0x0c /* External abort on Translation -- Level 1 */ -#define FAULT_BUSTRNL2 0x0e /* External abort on Translation -- Level 2 */ -#define FAULT_ALIGN_0 0x01 /* Alignment */ -#define FAULT_ALIGN_1 0x03 /* Alignment */ -#define FAULT_TRANS_S 0x05 /* Translation -- Section */ -#define FAULT_TRANS_P 0x07 /* Translation -- Page */ -#define FAULT_DOMAIN_S 0x09 /* Domain -- Section */ -#define FAULT_DOMAIN_P 0x0b /* Domain -- Page */ -#define FAULT_PERM_S 0x0d /* Permission -- Section */ -#define FAULT_PERM_P 0x0f /* Permission -- Page */ - -#define FAULT_IMPRECISE 0x400 /* Imprecise exception (XSCALE) */ - -/* - * Address of the vector page, low and high versions. - */ -#define ARM_VECTORS_LOW 0x00000000U -#define ARM_VECTORS_HIGH 0xffff0000U - -/* - * ARM Instructions - * - * 3 3 2 2 2 - * 1 0 9 8 7 0 - * +-------+-------------------------------------------------------+ - * | cond | instruction dependant | - * |c c c c| | - * +-------+-------------------------------------------------------+ - */ - -#define INSN_SIZE 4 /* Always 4 bytes */ -#define INSN_COND_MASK 0xf0000000 /* Condition mask */ -#define INSN_COND_AL 0xe0000000 /* Always condition */ - -#endif /* !MACHINE_ARMREG_H */ diff --git a/libpixelflinger/codeflinger/blending.cpp b/libpixelflinger/codeflinger/blending.cpp deleted file mode 100644 index f10217b7..00000000 --- a/libpixelflinger/codeflinger/blending.cpp +++ /dev/null @@ -1,682 +0,0 @@ -/* libs/pixelflinger/codeflinger/blending.cpp -** -** Copyright 2006, The Android Open Source Project -** -** Licensed under the Apache License, Version 2.0 (the "License"); -** you may not use this file except in compliance with the License. -** You may obtain a copy of the License at -** -** http://www.apache.org/licenses/LICENSE-2.0 -** -** Unless required by applicable law or agreed to in writing, software -** distributed under the License is distributed on an "AS IS" BASIS, -** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -** See the License for the specific language governing permissions and -** limitations under the License. -*/ - -#include <assert.h> -#include <stdint.h> -#include <stdlib.h> -#include <stdio.h> -#include <sys/types.h> - -#include <cutils/log.h> - -#include "codeflinger/GGLAssembler.h" - - -namespace android { - -void GGLAssembler::build_fog( - component_t& temp, // incomming fragment / output - int component, - Scratch& regs) -{ - if (mInfo[component].fog) { - Scratch scratches(registerFile()); - comment("fog"); - - integer_t fragment(temp.reg, temp.h, temp.flags); - if (!(temp.flags & CORRUPTIBLE)) { - temp.reg = regs.obtain(); - temp.flags |= CORRUPTIBLE; - } - - integer_t fogColor(scratches.obtain(), 8, CORRUPTIBLE); - LDRB(AL, fogColor.reg, mBuilderContext.Rctx, - immed12_pre(GGL_OFFSETOF(state.fog.color[component]))); - - integer_t factor(scratches.obtain(), 16, CORRUPTIBLE); - CONTEXT_LOAD(factor.reg, generated_vars.f); - - // clamp fog factor (TODO: see if there is a way to guarantee - // we won't overflow, when setting the iterators) - BIC(AL, 0, factor.reg, factor.reg, reg_imm(factor.reg, ASR, 31)); - CMP(AL, factor.reg, imm( 0x10000 )); - MOV(HS, 0, factor.reg, imm( 0x10000 )); - - build_blendFOneMinusF(temp, factor, fragment, fogColor); - } -} - -void GGLAssembler::build_blending( - component_t& temp, // incomming fragment / output - const pixel_t& pixel, // framebuffer - int component, - Scratch& regs) -{ - if (!mInfo[component].blend) - return; - - int fs = component==GGLFormat::ALPHA ? mBlendSrcA : mBlendSrc; - int fd = component==GGLFormat::ALPHA ? mBlendDstA : mBlendDst; - if (fs==GGL_SRC_ALPHA_SATURATE && component==GGLFormat::ALPHA) - fs = GGL_ONE; - const int blending = blending_codes(fs, fd); - if (!temp.size()) { - // here, blending will produce something which doesn't depend on - // that component (eg: GL_ZERO:GL_*), so the register has not been - // allocated yet. Will never be used as a source. - temp = component_t(regs.obtain(), CORRUPTIBLE); - } - - // we are doing real blending... - // fb: extracted dst - // fragment: extracted src - // temp: component_t(fragment) and result - - // scoped register allocator - Scratch scratches(registerFile()); - comment("blending"); - - // we can optimize these cases a bit... - // (1) saturation is not needed - // (2) we can use only one multiply instead of 2 - // (3) we can reduce the register pressure - // R = S*f + D*(1-f) = (S-D)*f + D - // R = S*(1-f) + D*f = (D-S)*f + S - - const bool same_factor_opt1 = - (fs==GGL_DST_COLOR && fd==GGL_ONE_MINUS_DST_COLOR) || - (fs==GGL_SRC_COLOR && fd==GGL_ONE_MINUS_SRC_COLOR) || - (fs==GGL_DST_ALPHA && fd==GGL_ONE_MINUS_DST_ALPHA) || - (fs==GGL_SRC_ALPHA && fd==GGL_ONE_MINUS_SRC_ALPHA); - - const bool same_factor_opt2 = - (fs==GGL_ONE_MINUS_DST_COLOR && fd==GGL_DST_COLOR) || - (fs==GGL_ONE_MINUS_SRC_COLOR && fd==GGL_SRC_COLOR) || - (fs==GGL_ONE_MINUS_DST_ALPHA && fd==GGL_DST_ALPHA) || - (fs==GGL_ONE_MINUS_SRC_ALPHA && fd==GGL_SRC_ALPHA); - - - // XXX: we could also optimize these cases: - // R = S*f + D*f = (S+D)*f - // R = S*(1-f) + D*(1-f) = (S+D)*(1-f) - // R = S*D + D*S = 2*S*D - - - // see if we need to extract 'component' from the destination (fb) - integer_t fb; - if (blending & (BLEND_DST|FACTOR_DST)) { - fb.setTo(scratches.obtain(), 32); - extract(fb, pixel, component); - if (mDithering) { - // XXX: maybe what we should do instead, is simply - // expand fb -or- fragment to the larger of the two - if (fb.size() < temp.size()) { - // for now we expand 'fb' to min(fragment, 8) - int new_size = temp.size() < 8 ? temp.size() : 8; - expand(fb, fb, new_size); - } - } - } - - - // convert input fragment to integer_t - if (temp.l && (temp.flags & CORRUPTIBLE)) { - MOV(AL, 0, temp.reg, reg_imm(temp.reg, LSR, temp.l)); - temp.h -= temp.l; - temp.l = 0; - } - integer_t fragment(temp.reg, temp.size(), temp.flags); - - // if not done yet, convert input fragment to integer_t - if (temp.l) { - // here we know temp is not CORRUPTIBLE - fragment.reg = scratches.obtain(); - MOV(AL, 0, fragment.reg, reg_imm(temp.reg, LSR, temp.l)); - fragment.flags |= CORRUPTIBLE; - } - - if (!(temp.flags & CORRUPTIBLE)) { - // temp is not corruptible, but since it's the destination it - // will be modified, so we need to allocate a new register. - temp.reg = regs.obtain(); - temp.flags &= ~CORRUPTIBLE; - fragment.flags &= ~CORRUPTIBLE; - } - - if ((blending & BLEND_SRC) && !same_factor_opt1) { - // source (fragment) is needed for the blending stage - // so it's not CORRUPTIBLE (unless we're doing same_factor_opt1) - fragment.flags &= ~CORRUPTIBLE; - } - - - if (same_factor_opt1) { - // R = S*f + D*(1-f) = (S-D)*f + D - integer_t factor; - build_blend_factor(factor, fs, - component, pixel, fragment, fb, scratches); - // fb is always corruptible from this point - fb.flags |= CORRUPTIBLE; - build_blendFOneMinusF(temp, factor, fragment, fb); - } else if (same_factor_opt2) { - // R = S*(1-f) + D*f = (D-S)*f + S - integer_t factor; - // fb is always corrruptible here - fb.flags |= CORRUPTIBLE; - build_blend_factor(factor, fd, - component, pixel, fragment, fb, scratches); - build_blendOneMinusFF(temp, factor, fragment, fb); - } else { - integer_t src_factor; - integer_t dst_factor; - - // if destination (fb) is not needed for the blending stage, - // then it can be marked as CORRUPTIBLE - if (!(blending & BLEND_DST)) { - fb.flags |= CORRUPTIBLE; - } - - // XXX: try to mark some registers as CORRUPTIBLE - // in most case we could make those corruptible - // when we're processing the last component - // but not always, for instance - // when fragment is constant and not reloaded - // when fb is needed for logic-ops or masking - // when a register is aliased (for instance with mAlphaSource) - - // blend away... - if (fs==GGL_ZERO) { - if (fd==GGL_ZERO) { // R = 0 - // already taken care of - } else if (fd==GGL_ONE) { // R = D - // already taken care of - } else { // R = D*fd - // compute fd - build_blend_factor(dst_factor, fd, - component, pixel, fragment, fb, scratches); - mul_factor(temp, fb, dst_factor); - } - } else if (fs==GGL_ONE) { - if (fd==GGL_ZERO) { // R = S - // NOP, taken care of - } else if (fd==GGL_ONE) { // R = S + D - component_add(temp, fb, fragment); // args order matters - component_sat(temp); - } else { // R = S + D*fd - // compute fd - build_blend_factor(dst_factor, fd, - component, pixel, fragment, fb, scratches); - mul_factor_add(temp, fb, dst_factor, component_t(fragment)); - if (fd==GGL_ONE_MINUS_SRC_ALPHA) { - // XXX: in theory this is not correct, we should - // saturate here. However, this mode is often - // used for displaying alpha-premultiplied graphics, - // in which case, saturation is not necessary. - // unfortunatelly, we have no way to know. - // This is a case, where we sacrifice correctness for - // performance. we should probably have some heuristics. - } else { - component_sat(temp); - } - } - } else { - // compute fs - build_blend_factor(src_factor, fs, - component, pixel, fragment, fb, scratches); - if (fd==GGL_ZERO) { // R = S*fs - mul_factor(temp, fragment, src_factor); - } else if (fd==GGL_ONE) { // R = S*fs + D - mul_factor_add(temp, fragment, src_factor, component_t(fb)); - component_sat(temp); - } else { // R = S*fs + D*fd - mul_factor(temp, fragment, src_factor); - if (scratches.isUsed(src_factor.reg)) - scratches.recycle(src_factor.reg); - // compute fd - build_blend_factor(dst_factor, fd, - component, pixel, fragment, fb, scratches); - mul_factor_add(temp, fb, dst_factor, temp); - if (!same_factor_opt1 && !same_factor_opt2) { - component_sat(temp); - } - } - } - } - - // now we can be corrupted (it's the dest) - temp.flags |= CORRUPTIBLE; -} - -void GGLAssembler::build_blend_factor( - integer_t& factor, int f, int component, - const pixel_t& dst_pixel, - integer_t& fragment, - integer_t& fb, - Scratch& scratches) -{ - integer_t src_alpha(fragment); - - // src_factor/dst_factor won't be used after blending, - // so it's fine to mark them as CORRUPTIBLE (if not aliased) - factor.flags |= CORRUPTIBLE; - - switch(f) { - case GGL_ONE_MINUS_SRC_ALPHA: - case GGL_SRC_ALPHA: - if (component==GGLFormat::ALPHA && !isAlphaSourceNeeded()) { - // we're processing alpha, so we already have - // src-alpha in fragment, and we need src-alpha just this time. - } else { - // alpha-src will be needed for other components - if (!mBlendFactorCached || mBlendFactorCached==f) { - src_alpha = mAlphaSource; - factor = mAlphaSource; - factor.flags &= ~CORRUPTIBLE; - // we already computed the blend factor before, nothing to do. - if (mBlendFactorCached) - return; - // this is the first time, make sure to compute the blend - // factor properly. - mBlendFactorCached = f; - break; - } else { - // we have a cached alpha blend factor, but we want another one, - // this should really not happen because by construction, - // we cannot have BOTH source and destination - // blend factors use ALPHA *and* ONE_MINUS_ALPHA (because - // the blending stage uses the f/(1-f) optimization - - // for completeness, we handle this case though. Since there - // are only 2 choices, this meens we want "the other one" - // (1-factor) - factor = mAlphaSource; - factor.flags &= ~CORRUPTIBLE; - RSB(AL, 0, factor.reg, factor.reg, imm((1<<factor.s))); - mBlendFactorCached = f; - return; - } - } - // fall-through... - case GGL_ONE_MINUS_DST_COLOR: - case GGL_DST_COLOR: - case GGL_ONE_MINUS_SRC_COLOR: - case GGL_SRC_COLOR: - case GGL_ONE_MINUS_DST_ALPHA: - case GGL_DST_ALPHA: - case GGL_SRC_ALPHA_SATURATE: - // help us find out what register we can use for the blend-factor - // CORRUPTIBLE registers are chosen first, or a new one is allocated. - if (fragment.flags & CORRUPTIBLE) { - factor.setTo(fragment.reg, 32, CORRUPTIBLE); - fragment.flags &= ~CORRUPTIBLE; - } else if (fb.flags & CORRUPTIBLE) { - factor.setTo(fb.reg, 32, CORRUPTIBLE); - fb.flags &= ~CORRUPTIBLE; - } else { - factor.setTo(scratches.obtain(), 32, CORRUPTIBLE); - } - break; - } - - // XXX: doesn't work if size==1 - - switch(f) { - case GGL_ONE_MINUS_DST_COLOR: - case GGL_DST_COLOR: - factor.s = fb.s; - ADD(AL, 0, factor.reg, fb.reg, reg_imm(fb.reg, LSR, fb.s-1)); - break; - case GGL_ONE_MINUS_SRC_COLOR: - case GGL_SRC_COLOR: - factor.s = fragment.s; - ADD(AL, 0, factor.reg, fragment.reg, - reg_imm(fragment.reg, LSR, fragment.s-1)); - break; - case GGL_ONE_MINUS_SRC_ALPHA: - case GGL_SRC_ALPHA: - factor.s = src_alpha.s; - ADD(AL, 0, factor.reg, src_alpha.reg, - reg_imm(src_alpha.reg, LSR, src_alpha.s-1)); - break; - case GGL_ONE_MINUS_DST_ALPHA: - case GGL_DST_ALPHA: - // XXX: should be precomputed - extract(factor, dst_pixel, GGLFormat::ALPHA); - ADD(AL, 0, factor.reg, factor.reg, - reg_imm(factor.reg, LSR, factor.s-1)); - break; - case GGL_SRC_ALPHA_SATURATE: - // XXX: should be precomputed - // XXX: f = min(As, 1-Ad) - // btw, we're guaranteed that Ad's size is <= 8, because - // it's extracted from the framebuffer - break; - } - - switch(f) { - case GGL_ONE_MINUS_DST_COLOR: - case GGL_ONE_MINUS_SRC_COLOR: - case GGL_ONE_MINUS_DST_ALPHA: - case GGL_ONE_MINUS_SRC_ALPHA: - RSB(AL, 0, factor.reg, factor.reg, imm((1<<factor.s))); - } - - // don't need more than 8-bits for the blend factor - // and this will prevent overflows in the multiplies later - if (factor.s > 8) { - MOV(AL, 0, factor.reg, reg_imm(factor.reg, LSR, factor.s-8)); - factor.s = 8; - } -} - -int GGLAssembler::blending_codes(int fs, int fd) -{ - int blending = 0; - switch(fs) { - case GGL_ONE: - blending |= BLEND_SRC; - break; - - case GGL_ONE_MINUS_DST_COLOR: - case GGL_DST_COLOR: - blending |= FACTOR_DST|BLEND_SRC; - break; - case GGL_ONE_MINUS_DST_ALPHA: - case GGL_DST_ALPHA: - // no need to extract 'component' from the destination - // for the blend factor, because we need ALPHA only. - blending |= BLEND_SRC; - break; - - case GGL_ONE_MINUS_SRC_COLOR: - case GGL_SRC_COLOR: - blending |= FACTOR_SRC|BLEND_SRC; - break; - case GGL_ONE_MINUS_SRC_ALPHA: - case GGL_SRC_ALPHA: - case GGL_SRC_ALPHA_SATURATE: - blending |= FACTOR_SRC|BLEND_SRC; - break; - } - switch(fd) { - case GGL_ONE: - blending |= BLEND_DST; - break; - - case GGL_ONE_MINUS_DST_COLOR: - case GGL_DST_COLOR: - blending |= FACTOR_DST|BLEND_DST; - break; - case GGL_ONE_MINUS_DST_ALPHA: - case GGL_DST_ALPHA: - blending |= FACTOR_DST|BLEND_DST; - break; - - case GGL_ONE_MINUS_SRC_COLOR: - case GGL_SRC_COLOR: - blending |= FACTOR_SRC|BLEND_DST; - break; - case GGL_ONE_MINUS_SRC_ALPHA: - case GGL_SRC_ALPHA: - // no need to extract 'component' from the source - // for the blend factor, because we need ALPHA only. - blending |= BLEND_DST; - break; - } - return blending; -} - -// --------------------------------------------------------------------------- - -void GGLAssembler::build_blendFOneMinusF( - component_t& temp, - const integer_t& factor, - const integer_t& fragment, - const integer_t& fb) -{ - // R = S*f + D*(1-f) = (S-D)*f + D - Scratch scratches(registerFile()); - // compute S-D - integer_t diff(fragment.flags & CORRUPTIBLE ? - fragment.reg : scratches.obtain(), fb.size(), CORRUPTIBLE); - const int shift = fragment.size() - fb.size(); - if (shift>0) RSB(AL, 0, diff.reg, fb.reg, reg_imm(fragment.reg, LSR, shift)); - else if (shift<0) RSB(AL, 0, diff.reg, fb.reg, reg_imm(fragment.reg, LSL,-shift)); - else RSB(AL, 0, diff.reg, fb.reg, fragment.reg); - mul_factor_add(temp, diff, factor, component_t(fb)); -} - -void GGLAssembler::build_blendOneMinusFF( - component_t& temp, - const integer_t& factor, - const integer_t& fragment, - const integer_t& fb) -{ - // R = S*f + D*(1-f) = (S-D)*f + D - Scratch scratches(registerFile()); - // compute D-S - integer_t diff(fb.flags & CORRUPTIBLE ? - fb.reg : scratches.obtain(), fb.size(), CORRUPTIBLE); - const int shift = fragment.size() - fb.size(); - if (shift>0) SUB(AL, 0, diff.reg, fb.reg, reg_imm(fragment.reg, LSR, shift)); - else if (shift<0) SUB(AL, 0, diff.reg, fb.reg, reg_imm(fragment.reg, LSL,-shift)); - else SUB(AL, 0, diff.reg, fb.reg, fragment.reg); - mul_factor_add(temp, diff, factor, component_t(fragment)); -} - -// --------------------------------------------------------------------------- - -void GGLAssembler::mul_factor( component_t& d, - const integer_t& v, - const integer_t& f) -{ - int vs = v.size(); - int fs = f.size(); - int ms = vs+fs; - - // XXX: we could have special cases for 1 bit mul - - // all this code below to use the best multiply instruction - // wrt the parameters size. We take advantage of the fact - // that the 16-bits multiplies allow a 16-bit shift - // The trick is that we just make sure that we have at least 8-bits - // per component (which is enough for a 8 bits display). - - int xy; - int vshift = 0; - int fshift = 0; - int smulw = 0; - - if (vs<16) { - if (fs<16) { - xy = xyBB; - } else if (GGL_BETWEEN(fs, 24, 31)) { - ms -= 16; - xy = xyTB; - } else { - // eg: 15 * 18 -> 15 * 15 - fshift = fs - 15; - ms -= fshift; - xy = xyBB; - } - } else if (GGL_BETWEEN(vs, 24, 31)) { - if (fs<16) { - ms -= 16; - xy = xyTB; - } else if (GGL_BETWEEN(fs, 24, 31)) { - ms -= 32; - xy = xyTT; - } else { - // eg: 24 * 18 -> 8 * 18 - fshift = fs - 15; - ms -= 16 + fshift; - xy = xyTB; - } - } else { - if (fs<16) { - // eg: 18 * 15 -> 15 * 15 - vshift = vs - 15; - ms -= vshift; - xy = xyBB; - } else if (GGL_BETWEEN(fs, 24, 31)) { - // eg: 18 * 24 -> 15 * 8 - vshift = vs - 15; - ms -= 16 + vshift; - xy = xyBT; - } else { - // eg: 18 * 18 -> (15 * 18)>>16 - fshift = fs - 15; - ms -= 16 + fshift; - xy = yB; //XXX SMULWB - smulw = 1; - } - } - - LOGE_IF(ms>=32, "mul_factor overflow vs=%d, fs=%d", vs, fs); - - int vreg = v.reg; - int freg = f.reg; - if (vshift) { - MOV(AL, 0, d.reg, reg_imm(vreg, LSR, vshift)); - vreg = d.reg; - } - if (fshift) { - MOV(AL, 0, d.reg, reg_imm(vreg, LSR, fshift)); - freg = d.reg; - } - if (smulw) SMULW(AL, xy, d.reg, vreg, freg); - else SMUL(AL, xy, d.reg, vreg, freg); - - - d.h = ms; - if (mDithering) { - d.l = 0; - } else { - d.l = fs; - d.flags |= CLEAR_LO; - } -} - -void GGLAssembler::mul_factor_add( component_t& d, - const integer_t& v, - const integer_t& f, - const component_t& a) -{ - // XXX: we could have special cases for 1 bit mul - Scratch scratches(registerFile()); - - int vs = v.size(); - int fs = f.size(); - int as = a.h; - int ms = vs+fs; - - LOGE_IF(ms>=32, "mul_factor_add overflow vs=%d, fs=%d, as=%d", vs, fs, as); - - integer_t add(a.reg, a.h, a.flags); - - // 'a' is a component_t but it is guaranteed to have - // its high bits set to 0. However in the dithering case, - // we can't get away with truncating the potentially bad bits - // so extraction is needed. - - if ((mDithering) && (a.size() < ms)) { - // we need to expand a - if (!(a.flags & CORRUPTIBLE)) { - // ... but it's not corruptible, so we need to pick a - // temporary register. - // Try to uses the destination register first (it's likely - // to be usable, unless it aliases an input). - if (d.reg!=a.reg && d.reg!=v.reg && d.reg!=f.reg) { - add.reg = d.reg; - } else { - add.reg = scratches.obtain(); - } - } - expand(add, a, ms); // extracts and expands - as = ms; - } - - if (ms == as) { - if (vs<16 && fs<16) SMLABB(AL, d.reg, v.reg, f.reg, add.reg); - else MLA(AL, 0, d.reg, v.reg, f.reg, add.reg); - } else { - int temp = d.reg; - if (temp == add.reg) { - // the mul will modify add.reg, we need an intermediary reg - if (v.flags & CORRUPTIBLE) temp = v.reg; - else if (f.flags & CORRUPTIBLE) temp = f.reg; - else temp = scratches.obtain(); - } - - if (vs<16 && fs<16) SMULBB(AL, temp, v.reg, f.reg); - else MUL(AL, 0, temp, v.reg, f.reg); - - if (ms>as) { - ADD(AL, 0, d.reg, temp, reg_imm(add.reg, LSL, ms-as)); - } else if (ms<as) { - // not sure if we should expand the mul instead? - ADD(AL, 0, d.reg, temp, reg_imm(add.reg, LSR, as-ms)); - } - } - - d.h = ms; - if (mDithering) { - d.l = a.l; - } else { - d.l = fs>a.l ? fs : a.l; - d.flags |= CLEAR_LO; - } -} - -void GGLAssembler::component_add(component_t& d, - const integer_t& dst, const integer_t& src) -{ - // here we're guaranteed that fragment.size() >= fb.size() - const int shift = src.size() - dst.size(); - if (!shift) { - ADD(AL, 0, d.reg, src.reg, dst.reg); - } else { - ADD(AL, 0, d.reg, src.reg, reg_imm(dst.reg, LSL, shift)); - } - - d.h = src.size(); - if (mDithering) { - d.l = 0; - } else { - d.l = shift; - d.flags |= CLEAR_LO; - } -} - -void GGLAssembler::component_sat(const component_t& v) -{ - const int one = ((1<<v.size())-1)<<v.l; - CMP(AL, v.reg, imm( 1<<v.h )); - if (isValidImmediate(one)) { - MOV(HS, 0, v.reg, imm( one )); - } else if (isValidImmediate(~one)) { - MVN(HS, 0, v.reg, imm( ~one )); - } else { - MOV(HS, 0, v.reg, imm( 1<<v.h )); - SUB(HS, 0, v.reg, v.reg, imm( 1<<v.l )); - } -} - -// ---------------------------------------------------------------------------- - -}; // namespace android - diff --git a/libpixelflinger/codeflinger/disassem.c b/libpixelflinger/codeflinger/disassem.c deleted file mode 100644 index 4676da0d..00000000 --- a/libpixelflinger/codeflinger/disassem.c +++ /dev/null @@ -1,702 +0,0 @@ -/* $NetBSD: disassem.c,v 1.14 2003/03/27 16:58:36 mycroft Exp $ */ - -/*- - * Copyright (c) 1996 Mark Brinicombe. - * Copyright (c) 1996 Brini. - * - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * This product includes software developed by Brini. - * 4. The name of the company nor the name of the author may be used to - * endorse or promote products derived from this software without specific - * prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY BRINI ``AS IS'' AND ANY EXPRESS OR IMPLIED - * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF - * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. - * IN NO EVENT SHALL BRINI OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, - * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR - * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * RiscBSD kernel project - * - * db_disasm.c - * - * Kernel disassembler - * - * Created : 10/02/96 - * - * Structured after the sparc/sparc/db_disasm.c by David S. Miller & - * Paul Kranenburg - * - * This code is not complete. Not all instructions are disassembled. - */ - -#include <sys/cdefs.h> -//__FBSDID("$FreeBSD: /repoman/r/ncvs/src/sys/arm/arm/disassem.c,v 1.2 2005/01/05 21:58:47 imp Exp $"); -#include <sys/param.h> -#include <stdio.h> - -#include "disassem.h" -#include "armreg.h" -//#include <ddb/ddb.h> - -/* - * General instruction format - * - * insn[cc][mod] [operands] - * - * Those fields with an uppercase format code indicate that the field - * follows directly after the instruction before the separator i.e. - * they modify the instruction rather than just being an operand to - * the instruction. The only exception is the writeback flag which - * follows a operand. - * - * - * 2 - print Operand 2 of a data processing instruction - * d - destination register (bits 12-15) - * n - n register (bits 16-19) - * s - s register (bits 8-11) - * o - indirect register rn (bits 16-19) (used by swap) - * m - m register (bits 0-3) - * a - address operand of ldr/str instruction - * e - address operand of ldrh/strh instruction - * l - register list for ldm/stm instruction - * f - 1st fp operand (register) (bits 12-14) - * g - 2nd fp operand (register) (bits 16-18) - * h - 3rd fp operand (register/immediate) (bits 0-4) - * b - branch address - * t - thumb branch address (bits 24, 0-23) - * k - breakpoint comment (bits 0-3, 8-19) - * X - block transfer type - * Y - block transfer type (r13 base) - * c - comment field bits(0-23) - * p - saved or current status register - * F - PSR transfer fields - * D - destination-is-r15 (P) flag on TST, TEQ, CMP, CMN - * L - co-processor transfer size - * S - set status flag - * P - fp precision - * Q - fp precision (for ldf/stf) - * R - fp rounding - * v - co-processor data transfer registers + addressing mode - * W - writeback flag - * x - instruction in hex - * # - co-processor number - * y - co-processor data processing registers - * z - co-processor register transfer registers - */ - -struct arm32_insn { - u_int mask; - u_int pattern; - char* name; - char* format; -}; - -static const struct arm32_insn arm32_i[] = { - { 0x0fffffff, 0x0ff00000, "imb", "c" }, /* Before swi */ - { 0x0fffffff, 0x0ff00001, "imbrange", "c" }, /* Before swi */ - { 0x0f000000, 0x0f000000, "swi", "c" }, - { 0xfe000000, 0xfa000000, "blx", "t" }, /* Before b and bl */ - { 0x0f000000, 0x0a000000, "b", "b" }, - { 0x0f000000, 0x0b000000, "bl", "b" }, - { 0x0fe000f0, 0x00000090, "mul", "Snms" }, - { 0x0fe000f0, 0x00200090, "mla", "Snmsd" }, - { 0x0fe000f0, 0x00800090, "umull", "Sdnms" }, - { 0x0fe000f0, 0x00c00090, "smull", "Sdnms" }, - { 0x0fe000f0, 0x00a00090, "umlal", "Sdnms" }, - { 0x0fe000f0, 0x00e00090, "smlal", "Sdnms" }, - { 0x0d700000, 0x04200000, "strt", "daW" }, - { 0x0d700000, 0x04300000, "ldrt", "daW" }, - { 0x0d700000, 0x04600000, "strbt", "daW" }, - { 0x0d700000, 0x04700000, "ldrbt", "daW" }, - { 0x0c500000, 0x04000000, "str", "daW" }, - { 0x0c500000, 0x04100000, "ldr", "daW" }, - { 0x0c500000, 0x04400000, "strb", "daW" }, - { 0x0c500000, 0x04500000, "ldrb", "daW" }, - { 0x0e1f0000, 0x080d0000, "stm", "YnWl" },/* separate out r13 base */ - { 0x0e1f0000, 0x081d0000, "ldm", "YnWl" },/* separate out r13 base */ - { 0x0e100000, 0x08000000, "stm", "XnWl" }, - { 0x0e100000, 0x08100000, "ldm", "XnWl" }, - { 0x0e1000f0, 0x00100090, "ldrb", "deW" }, - { 0x0e1000f0, 0x00000090, "strb", "deW" }, - { 0x0e1000f0, 0x001000d0, "ldrsb", "deW" }, - { 0x0e1000f0, 0x001000b0, "ldrh", "deW" }, - { 0x0e1000f0, 0x000000b0, "strh", "deW" }, - { 0x0e1000f0, 0x001000f0, "ldrsh", "deW" }, - { 0x0f200090, 0x00200090, "und", "x" }, /* Before data processing */ - { 0x0e1000d0, 0x000000d0, "und", "x" }, /* Before data processing */ - { 0x0ff00ff0, 0x01000090, "swp", "dmo" }, - { 0x0ff00ff0, 0x01400090, "swpb", "dmo" }, - { 0x0fbf0fff, 0x010f0000, "mrs", "dp" }, /* Before data processing */ - { 0x0fb0fff0, 0x0120f000, "msr", "pFm" },/* Before data processing */ - { 0x0fb0f000, 0x0320f000, "msr", "pF2" },/* Before data processing */ - { 0x0ffffff0, 0x012fff10, "bx", "m" }, - { 0x0fff0ff0, 0x016f0f10, "clz", "dm" }, - { 0x0ffffff0, 0x012fff30, "blx", "m" }, - { 0xfff000f0, 0xe1200070, "bkpt", "k" }, - { 0x0de00000, 0x00000000, "and", "Sdn2" }, - { 0x0de00000, 0x00200000, "eor", "Sdn2" }, - { 0x0de00000, 0x00400000, "sub", "Sdn2" }, - { 0x0de00000, 0x00600000, "rsb", "Sdn2" }, - { 0x0de00000, 0x00800000, "add", "Sdn2" }, - { 0x0de00000, 0x00a00000, "adc", "Sdn2" }, - { 0x0de00000, 0x00c00000, "sbc", "Sdn2" }, - { 0x0de00000, 0x00e00000, "rsc", "Sdn2" }, - { 0x0df00000, 0x01100000, "tst", "Dn2" }, - { 0x0df00000, 0x01300000, "teq", "Dn2" }, - { 0x0df00000, 0x01500000, "cmp", "Dn2" }, - { 0x0df00000, 0x01700000, "cmn", "Dn2" }, - { 0x0de00000, 0x01800000, "orr", "Sdn2" }, - { 0x0de00000, 0x01a00000, "mov", "Sd2" }, - { 0x0de00000, 0x01c00000, "bic", "Sdn2" }, - { 0x0de00000, 0x01e00000, "mvn", "Sd2" }, - { 0x0ff08f10, 0x0e000100, "adf", "PRfgh" }, - { 0x0ff08f10, 0x0e100100, "muf", "PRfgh" }, - { 0x0ff08f10, 0x0e200100, "suf", "PRfgh" }, - { 0x0ff08f10, 0x0e300100, "rsf", "PRfgh" }, - { 0x0ff08f10, 0x0e400100, "dvf", "PRfgh" }, - { 0x0ff08f10, 0x0e500100, "rdf", "PRfgh" }, - { 0x0ff08f10, 0x0e600100, "pow", "PRfgh" }, - { 0x0ff08f10, 0x0e700100, "rpw", "PRfgh" }, - { 0x0ff08f10, 0x0e800100, "rmf", "PRfgh" }, - { 0x0ff08f10, 0x0e900100, "fml", "PRfgh" }, - { 0x0ff08f10, 0x0ea00100, "fdv", "PRfgh" }, - { 0x0ff08f10, 0x0eb00100, "frd", "PRfgh" }, - { 0x0ff08f10, 0x0ec00100, "pol", "PRfgh" }, - { 0x0f008f10, 0x0e000100, "fpbop", "PRfgh" }, - { 0x0ff08f10, 0x0e008100, "mvf", "PRfh" }, - { 0x0ff08f10, 0x0e108100, "mnf", "PRfh" }, - { 0x0ff08f10, 0x0e208100, "abs", "PRfh" }, - { 0x0ff08f10, 0x0e308100, "rnd", "PRfh" }, - { 0x0ff08f10, 0x0e408100, "sqt", "PRfh" }, - { 0x0ff08f10, 0x0e508100, "log", "PRfh" }, - { 0x0ff08f10, 0x0e608100, "lgn", "PRfh" }, - { 0x0ff08f10, 0x0e708100, "exp", "PRfh" }, - { 0x0ff08f10, 0x0e808100, "sin", "PRfh" }, - { 0x0ff08f10, 0x0e908100, "cos", "PRfh" }, - { 0x0ff08f10, 0x0ea08100, "tan", "PRfh" }, - { 0x0ff08f10, 0x0eb08100, "asn", "PRfh" }, - { 0x0ff08f10, 0x0ec08100, "acs", "PRfh" }, - { 0x0ff08f10, 0x0ed08100, "atn", "PRfh" }, - { 0x0f008f10, 0x0e008100, "fpuop", "PRfh" }, - { 0x0e100f00, 0x0c000100, "stf", "QLv" }, - { 0x0e100f00, 0x0c100100, "ldf", "QLv" }, - { 0x0ff00f10, 0x0e000110, "flt", "PRgd" }, - { 0x0ff00f10, 0x0e100110, "fix", "PRdh" }, - { 0x0ff00f10, 0x0e200110, "wfs", "d" }, - { 0x0ff00f10, 0x0e300110, "rfs", "d" }, - { 0x0ff00f10, 0x0e400110, "wfc", "d" }, - { 0x0ff00f10, 0x0e500110, "rfc", "d" }, - { 0x0ff0ff10, 0x0e90f110, "cmf", "PRgh" }, - { 0x0ff0ff10, 0x0eb0f110, "cnf", "PRgh" }, - { 0x0ff0ff10, 0x0ed0f110, "cmfe", "PRgh" }, - { 0x0ff0ff10, 0x0ef0f110, "cnfe", "PRgh" }, - { 0xff100010, 0xfe000010, "mcr2", "#z" }, - { 0x0f100010, 0x0e000010, "mcr", "#z" }, - { 0xff100010, 0xfe100010, "mrc2", "#z" }, - { 0x0f100010, 0x0e100010, "mrc", "#z" }, - { 0xff000010, 0xfe000000, "cdp2", "#y" }, - { 0x0f000010, 0x0e000000, "cdp", "#y" }, - { 0xfe100090, 0xfc100000, "ldc2", "L#v" }, - { 0x0e100090, 0x0c100000, "ldc", "L#v" }, - { 0xfe100090, 0xfc000000, "stc2", "L#v" }, - { 0x0e100090, 0x0c000000, "stc", "L#v" }, - { 0xf550f000, 0xf550f000, "pld", "ne" }, - { 0x0ff00ff0, 0x01000050, "qaad", "dmn" }, - { 0x0ff00ff0, 0x01400050, "qdaad", "dmn" }, - { 0x0ff00ff0, 0x01600050, "qdsub", "dmn" }, - { 0x0ff00ff0, 0x01200050, "dsub", "dmn" }, - { 0x0ff000f0, 0x01000080, "smlabb", "nmsd" }, // d & n inverted!! - { 0x0ff000f0, 0x010000a0, "smlatb", "nmsd" }, // d & n inverted!! - { 0x0ff000f0, 0x010000c0, "smlabt", "nmsd" }, // d & n inverted!! - { 0x0ff000f0, 0x010000e0, "smlatt", "nmsd" }, // d & n inverted!! - { 0x0ff000f0, 0x01400080, "smlalbb","ndms" }, // d & n inverted!! - { 0x0ff000f0, 0x014000a0, "smlaltb","ndms" }, // d & n inverted!! - { 0x0ff000f0, 0x014000c0, "smlalbt","ndms" }, // d & n inverted!! - { 0x0ff000f0, 0x014000e0, "smlaltt","ndms" }, // d & n inverted!! - { 0x0ff000f0, 0x01200080, "smlawb", "nmsd" }, // d & n inverted!! - { 0x0ff0f0f0, 0x012000a0, "smulwb","nms" }, // d & n inverted!! - { 0x0ff000f0, 0x012000c0, "smlawt", "nmsd" }, // d & n inverted!! - { 0x0ff0f0f0, 0x012000e0, "smulwt","nms" }, // d & n inverted!! - { 0x0ff0f0f0, 0x01600080, "smulbb","nms" }, // d & n inverted!! - { 0x0ff0f0f0, 0x016000a0, "smultb","nms" }, // d & n inverted!! - { 0x0ff0f0f0, 0x016000c0, "smulbt","nms" }, // d & n inverted!! - { 0x0ff0f0f0, 0x016000e0, "smultt","nms" }, // d & n inverted!! - { 0x00000000, 0x00000000, NULL, NULL } -}; - -static char const arm32_insn_conditions[][4] = { - "eq", "ne", "cs", "cc", - "mi", "pl", "vs", "vc", - "hi", "ls", "ge", "lt", - "gt", "le", "", "nv" -}; - -static char const insn_block_transfers[][4] = { - "da", "ia", "db", "ib" -}; - -static char const insn_stack_block_transfers[][4] = { - "ed", "ea", "fd", "fa" -}; - -static char const op_shifts[][4] = { - "lsl", "lsr", "asr", "ror" -}; - -static char const insn_fpa_rounding[][2] = { - "", "p", "m", "z" -}; - -static char const insn_fpa_precision[][2] = { - "s", "d", "e", "p" -}; - -static char const insn_fpaconstants[][8] = { - "0.0", "1.0", "2.0", "3.0", - "4.0", "5.0", "0.5", "10.0" -}; - -#define insn_condition(x) arm32_insn_conditions[(x >> 28) & 0x0f] -#define insn_blktrans(x) insn_block_transfers[(x >> 23) & 3] -#define insn_stkblktrans(x) insn_stack_block_transfers[(x >> 23) & 3] -#define op2_shift(x) op_shifts[(x >> 5) & 3] -#define insn_fparnd(x) insn_fpa_rounding[(x >> 5) & 0x03] -#define insn_fpaprec(x) insn_fpa_precision[(((x >> 18) & 2)|(x >> 7)) & 1] -#define insn_fpaprect(x) insn_fpa_precision[(((x >> 21) & 2)|(x >> 15)) & 1] -#define insn_fpaimm(x) insn_fpaconstants[x & 0x07] - -/* Local prototypes */ -static void disasm_register_shift(const disasm_interface_t *di, u_int insn); -static void disasm_print_reglist(const disasm_interface_t *di, u_int insn); -static void disasm_insn_ldrstr(const disasm_interface_t *di, u_int insn, - u_int loc); -static void disasm_insn_ldrhstrh(const disasm_interface_t *di, u_int insn, - u_int loc); -static void disasm_insn_ldcstc(const disasm_interface_t *di, u_int insn, - u_int loc); -static u_int disassemble_readword(u_int address); -static void disassemble_printaddr(u_int address); - -u_int -disasm(const disasm_interface_t *di, u_int loc, int altfmt) -{ - const struct arm32_insn *i_ptr = &arm32_i[0]; - - u_int insn; - int matchp; - int branch; - char* f_ptr; - int fmt; - - fmt = 0; - matchp = 0; - insn = di->di_readword(loc); - -/* di->di_printf("loc=%08x insn=%08x : ", loc, insn);*/ - - while (i_ptr->name) { - if ((insn & i_ptr->mask) == i_ptr->pattern) { - matchp = 1; - break; - } - i_ptr++; - } - - if (!matchp) { - di->di_printf("und%s\t%08x\n", insn_condition(insn), insn); - return(loc + INSN_SIZE); - } - - /* If instruction forces condition code, don't print it. */ - if ((i_ptr->mask & 0xf0000000) == 0xf0000000) - di->di_printf("%s", i_ptr->name); - else - di->di_printf("%s%s", i_ptr->name, insn_condition(insn)); - - f_ptr = i_ptr->format; - - /* Insert tab if there are no instruction modifiers */ - - if (*(f_ptr) < 'A' || *(f_ptr) > 'Z') { - ++fmt; - di->di_printf("\t"); - } - - while (*f_ptr) { - switch (*f_ptr) { - /* 2 - print Operand 2 of a data processing instruction */ - case '2': - if (insn & 0x02000000) { - int rotate= ((insn >> 7) & 0x1e); - - di->di_printf("#0x%08x", - (insn & 0xff) << (32 - rotate) | - (insn & 0xff) >> rotate); - } else { - disasm_register_shift(di, insn); - } - break; - /* d - destination register (bits 12-15) */ - case 'd': - di->di_printf("r%d", ((insn >> 12) & 0x0f)); - break; - /* D - insert 'p' if Rd is R15 */ - case 'D': - if (((insn >> 12) & 0x0f) == 15) - di->di_printf("p"); - break; - /* n - n register (bits 16-19) */ - case 'n': - di->di_printf("r%d", ((insn >> 16) & 0x0f)); - break; - /* s - s register (bits 8-11) */ - case 's': - di->di_printf("r%d", ((insn >> 8) & 0x0f)); - break; - /* o - indirect register rn (bits 16-19) (used by swap) */ - case 'o': - di->di_printf("[r%d]", ((insn >> 16) & 0x0f)); - break; - /* m - m register (bits 0-4) */ - case 'm': - di->di_printf("r%d", ((insn >> 0) & 0x0f)); - break; - /* a - address operand of ldr/str instruction */ - case 'a': - disasm_insn_ldrstr(di, insn, loc); - break; - /* e - address operand of ldrh/strh instruction */ - case 'e': - disasm_insn_ldrhstrh(di, insn, loc); - break; - /* l - register list for ldm/stm instruction */ - case 'l': - disasm_print_reglist(di, insn); - break; - /* f - 1st fp operand (register) (bits 12-14) */ - case 'f': - di->di_printf("f%d", (insn >> 12) & 7); - break; - /* g - 2nd fp operand (register) (bits 16-18) */ - case 'g': - di->di_printf("f%d", (insn >> 16) & 7); - break; - /* h - 3rd fp operand (register/immediate) (bits 0-4) */ - case 'h': - if (insn & (1 << 3)) - di->di_printf("#%s", insn_fpaimm(insn)); - else - di->di_printf("f%d", insn & 7); - break; - /* b - branch address */ - case 'b': - branch = ((insn << 2) & 0x03ffffff); - if (branch & 0x02000000) - branch |= 0xfc000000; - di->di_printaddr(loc + 8 + branch); - break; - /* t - blx address */ - case 't': - branch = ((insn << 2) & 0x03ffffff) | - (insn >> 23 & 0x00000002); - if (branch & 0x02000000) - branch |= 0xfc000000; - di->di_printaddr(loc + 8 + branch); - break; - /* X - block transfer type */ - case 'X': - di->di_printf("%s", insn_blktrans(insn)); - break; - /* Y - block transfer type (r13 base) */ - case 'Y': - di->di_printf("%s", insn_stkblktrans(insn)); - break; - /* c - comment field bits(0-23) */ - case 'c': - di->di_printf("0x%08x", (insn & 0x00ffffff)); - break; - /* k - breakpoint comment (bits 0-3, 8-19) */ - case 'k': - di->di_printf("0x%04x", - (insn & 0x000fff00) >> 4 | (insn & 0x0000000f)); - break; - /* p - saved or current status register */ - case 'p': - if (insn & 0x00400000) - di->di_printf("spsr"); - else - di->di_printf("cpsr"); - break; - /* F - PSR transfer fields */ - case 'F': - di->di_printf("_"); - if (insn & (1 << 16)) - di->di_printf("c"); - if (insn & (1 << 17)) - di->di_printf("x"); - if (insn & (1 << 18)) - di->di_printf("s"); - if (insn & (1 << 19)) - di->di_printf("f"); - break; - /* B - byte transfer flag */ - case 'B': - if (insn & 0x00400000) - di->di_printf("b"); - break; - /* L - co-processor transfer size */ - case 'L': - if (insn & (1 << 22)) - di->di_printf("l"); - break; - /* S - set status flag */ - case 'S': - if (insn & 0x00100000) - di->di_printf("s"); - break; - /* P - fp precision */ - case 'P': - di->di_printf("%s", insn_fpaprec(insn)); - break; - /* Q - fp precision (for ldf/stf) */ - case 'Q': - break; - /* R - fp rounding */ - case 'R': - di->di_printf("%s", insn_fparnd(insn)); - break; - /* W - writeback flag */ - case 'W': - if (insn & (1 << 21)) - di->di_printf("!"); - break; - /* # - co-processor number */ - case '#': - di->di_printf("p%d", (insn >> 8) & 0x0f); - break; - /* v - co-processor data transfer registers+addressing mode */ - case 'v': - disasm_insn_ldcstc(di, insn, loc); - break; - /* x - instruction in hex */ - case 'x': - di->di_printf("0x%08x", insn); - break; - /* y - co-processor data processing registers */ - case 'y': - di->di_printf("%d, ", (insn >> 20) & 0x0f); - - di->di_printf("c%d, c%d, c%d", (insn >> 12) & 0x0f, - (insn >> 16) & 0x0f, insn & 0x0f); - - di->di_printf(", %d", (insn >> 5) & 0x07); - break; - /* z - co-processor register transfer registers */ - case 'z': - di->di_printf("%d, ", (insn >> 21) & 0x07); - di->di_printf("r%d, c%d, c%d, %d", - (insn >> 12) & 0x0f, (insn >> 16) & 0x0f, - insn & 0x0f, (insn >> 5) & 0x07); - -/* if (((insn >> 5) & 0x07) != 0) - di->di_printf(", %d", (insn >> 5) & 0x07);*/ - break; - default: - di->di_printf("[%c - unknown]", *f_ptr); - break; - } - if (*(f_ptr+1) >= 'A' && *(f_ptr+1) <= 'Z') - ++f_ptr; - else if (*(++f_ptr)) { - ++fmt; - if (fmt == 1) - di->di_printf("\t"); - else - di->di_printf(", "); - } - }; - - di->di_printf("\n"); - - return(loc + INSN_SIZE); -} - - -static void -disasm_register_shift(const disasm_interface_t *di, u_int insn) -{ - di->di_printf("r%d", (insn & 0x0f)); - if ((insn & 0x00000ff0) == 0) - ; - else if ((insn & 0x00000ff0) == 0x00000060) - di->di_printf(", rrx"); - else { - if (insn & 0x10) - di->di_printf(", %s r%d", op2_shift(insn), - (insn >> 8) & 0x0f); - else - di->di_printf(", %s #%d", op2_shift(insn), - (insn >> 7) & 0x1f); - } -} - - -static void -disasm_print_reglist(const disasm_interface_t *di, u_int insn) -{ - int loop; - int start; - int comma; - - di->di_printf("{"); - start = -1; - comma = 0; - - for (loop = 0; loop < 17; ++loop) { - if (start != -1) { - if (loop == 16 || !(insn & (1 << loop))) { - if (comma) - di->di_printf(", "); - else - comma = 1; - if (start == loop - 1) - di->di_printf("r%d", start); - else - di->di_printf("r%d-r%d", start, loop - 1); - start = -1; - } - } else { - if (insn & (1 << loop)) - start = loop; - } - } - di->di_printf("}"); - - if (insn & (1 << 22)) - di->di_printf("^"); -} - -static void -disasm_insn_ldrstr(const disasm_interface_t *di, u_int insn, u_int loc) -{ - int offset; - - offset = insn & 0xfff; - if ((insn & 0x032f0000) == 0x010f0000) { - /* rA = pc, immediate index */ - if (insn & 0x00800000) - loc += offset; - else - loc -= offset; - di->di_printaddr(loc + 8); - } else { - di->di_printf("[r%d", (insn >> 16) & 0x0f); - if ((insn & 0x03000fff) != 0x01000000) { - di->di_printf("%s, ", (insn & (1 << 24)) ? "" : "]"); - if (!(insn & 0x00800000)) - di->di_printf("-"); - if (insn & (1 << 25)) - disasm_register_shift(di, insn); - else - di->di_printf("#0x%03x", offset); - } - if (insn & (1 << 24)) - di->di_printf("]"); - } -} - -static void -disasm_insn_ldrhstrh(const disasm_interface_t *di, u_int insn, u_int loc) -{ - int offset; - - offset = ((insn & 0xf00) >> 4) | (insn & 0xf); - if ((insn & 0x004f0000) == 0x004f0000) { - /* rA = pc, immediate index */ - if (insn & 0x00800000) - loc += offset; - else - loc -= offset; - di->di_printaddr(loc + 8); - } else { - di->di_printf("[r%d", (insn >> 16) & 0x0f); - if ((insn & 0x01400f0f) != 0x01400000) { - di->di_printf("%s, ", (insn & (1 << 24)) ? "" : "]"); - if (!(insn & 0x00800000)) - di->di_printf("-"); - if (insn & (1 << 22)) - di->di_printf("#0x%02x", offset); - else - di->di_printf("r%d", (insn & 0x0f)); - } - if (insn & (1 << 24)) - di->di_printf("]"); - } -} - -static void -disasm_insn_ldcstc(const disasm_interface_t *di, u_int insn, u_int loc) -{ - if (((insn >> 8) & 0xf) == 1) - di->di_printf("f%d, ", (insn >> 12) & 0x07); - else - di->di_printf("c%d, ", (insn >> 12) & 0x0f); - - di->di_printf("[r%d", (insn >> 16) & 0x0f); - - di->di_printf("%s, ", (insn & (1 << 24)) ? "" : "]"); - - if (!(insn & (1 << 23))) - di->di_printf("-"); - - di->di_printf("#0x%03x", (insn & 0xff) << 2); - - if (insn & (1 << 24)) - di->di_printf("]"); - - if (insn & (1 << 21)) - di->di_printf("!"); -} - -static u_int -disassemble_readword(u_int address) -{ - return(*((u_int *)address)); -} - -static void -disassemble_printaddr(u_int address) -{ - printf("0x%08x", address); -} - -static const disasm_interface_t disassemble_di = { - disassemble_readword, disassemble_printaddr, printf -}; - -void -disassemble(u_int address) -{ - - (void)disasm(&disassemble_di, address, 0); -} - -/* End of disassem.c */ diff --git a/libpixelflinger/codeflinger/disassem.h b/libpixelflinger/codeflinger/disassem.h deleted file mode 100644 index 02747cd0..00000000 --- a/libpixelflinger/codeflinger/disassem.h +++ /dev/null @@ -1,65 +0,0 @@ -/* $NetBSD: disassem.h,v 1.4 2001/03/04 04:15:58 matt Exp $ */ - -/*- - * Copyright (c) 1997 Mark Brinicombe. - * Copyright (c) 1997 Causality Limited. - * - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * This product includes software developed by Mark Brinicombe. - * 4. The name of the company nor the name of the author may be used to - * endorse or promote products derived from this software without specific - * prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED - * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF - * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. - * IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, - * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR - * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * Define the interface structure required by the disassembler. - * - * $FreeBSD: /repoman/r/ncvs/src/sys/arm/include/disassem.h,v 1.2 2005/01/05 21:58:48 imp Exp $ - */ - -#ifndef ANDROID_MACHINE_DISASSEM_H -#define ANDROID_MACHINE_DISASSEM_H - -#include <sys/types.h> - -#if __cplusplus -extern "C" { -#endif - -typedef struct { - u_int (*di_readword)(u_int); - void (*di_printaddr)(u_int); - void (*di_printf)(const char *, ...); -} disasm_interface_t; - -/* Prototypes for callable functions */ - -u_int disasm(const disasm_interface_t *, u_int, int); -void disassemble(u_int); - -#if __cplusplus -} -#endif - -#endif /* !ANDROID_MACHINE_DISASSEM_H */ diff --git a/libpixelflinger/codeflinger/load_store.cpp b/libpixelflinger/codeflinger/load_store.cpp deleted file mode 100644 index 93c58257..00000000 --- a/libpixelflinger/codeflinger/load_store.cpp +++ /dev/null @@ -1,378 +0,0 @@ -/* libs/pixelflinger/codeflinger/load_store.cpp -** -** Copyright 2006, The Android Open Source Project -** -** Licensed under the Apache License, Version 2.0 (the "License"); -** you may not use this file except in compliance with the License. -** You may obtain a copy of the License at -** -** http://www.apache.org/licenses/LICENSE-2.0 -** -** Unless required by applicable law or agreed to in writing, software -** distributed under the License is distributed on an "AS IS" BASIS, -** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -** See the License for the specific language governing permissions and -** limitations under the License. -*/ - -#include <assert.h> -#include <stdio.h> -#include <cutils/log.h> - -#include "codeflinger/GGLAssembler.h" - -namespace android { - -// ---------------------------------------------------------------------------- - -void GGLAssembler::store(const pointer_t& addr, const pixel_t& s, uint32_t flags) -{ - const int bits = addr.size; - const int inc = (flags & WRITE_BACK)?1:0; - switch (bits) { - case 32: - if (inc) STR(AL, s.reg, addr.reg, immed12_post(4)); - else STR(AL, s.reg, addr.reg); - break; - case 24: - // 24 bits formats are a little special and used only for RGB - // 0x00BBGGRR is unpacked as R,G,B - STRB(AL, s.reg, addr.reg, immed12_pre(0)); - MOV(AL, 0, s.reg, reg_imm(s.reg, ROR, 8)); - STRB(AL, s.reg, addr.reg, immed12_pre(1)); - MOV(AL, 0, s.reg, reg_imm(s.reg, ROR, 8)); - STRB(AL, s.reg, addr.reg, immed12_pre(2)); - if (!(s.flags & CORRUPTIBLE)) { - MOV(AL, 0, s.reg, reg_imm(s.reg, ROR, 16)); - } - if (inc) - ADD(AL, 0, addr.reg, addr.reg, imm(3)); - break; - case 16: - if (inc) STRH(AL, s.reg, addr.reg, immed8_post(2)); - else STRH(AL, s.reg, addr.reg); - break; - case 8: - if (inc) STRB(AL, s.reg, addr.reg, immed12_post(1)); - else STRB(AL, s.reg, addr.reg); - break; - } -} - -void GGLAssembler::load(const pointer_t& addr, const pixel_t& s, uint32_t flags) -{ - Scratch scratches(registerFile()); - int s0; - - const int bits = addr.size; - const int inc = (flags & WRITE_BACK)?1:0; - switch (bits) { - case 32: - if (inc) LDR(AL, s.reg, addr.reg, immed12_post(4)); - else LDR(AL, s.reg, addr.reg); - break; - case 24: - // 24 bits formats are a little special and used only for RGB - // R,G,B is packed as 0x00BBGGRR - s0 = scratches.obtain(); - if (s.reg != addr.reg) { - LDRB(AL, s.reg, addr.reg, immed12_pre(0)); // R - LDRB(AL, s0, addr.reg, immed12_pre(1)); // G - ORR(AL, 0, s.reg, s.reg, reg_imm(s0, LSL, 8)); - LDRB(AL, s0, addr.reg, immed12_pre(2)); // B - ORR(AL, 0, s.reg, s.reg, reg_imm(s0, LSL, 16)); - } else { - int s1 = scratches.obtain(); - LDRB(AL, s1, addr.reg, immed12_pre(0)); // R - LDRB(AL, s0, addr.reg, immed12_pre(1)); // G - ORR(AL, 0, s1, s1, reg_imm(s0, LSL, 8)); - LDRB(AL, s0, addr.reg, immed12_pre(2)); // B - ORR(AL, 0, s.reg, s1, reg_imm(s0, LSL, 16)); - } - if (inc) - ADD(AL, 0, addr.reg, addr.reg, imm(3)); - break; - case 16: - if (inc) LDRH(AL, s.reg, addr.reg, immed8_post(2)); - else LDRH(AL, s.reg, addr.reg); - break; - case 8: - if (inc) LDRB(AL, s.reg, addr.reg, immed12_post(1)); - else LDRB(AL, s.reg, addr.reg); - break; - } -} - -void GGLAssembler::extract(integer_t& d, int s, int h, int l, int bits) -{ - const int maskLen = h-l; - - assert(maskLen<=8); - assert(h); - - if (h != bits) { - const int mask = ((1<<maskLen)-1) << l; - if (isValidImmediate(mask)) { - AND(AL, 0, d.reg, s, imm(mask)); // component = packed & mask; - } else if (isValidImmediate(~mask)) { - BIC(AL, 0, d.reg, s, imm(~mask)); // component = packed & mask; - } else { - MOV(AL, 0, d.reg, reg_imm(s, LSL, 32-h)); - l += 32-h; - h = 32; - } - s = d.reg; - } - - if (l) { - MOV(AL, 0, d.reg, reg_imm(s, LSR, l)); // component = packed >> l; - s = d.reg; - } - - if (s != d.reg) { - MOV(AL, 0, d.reg, s); - } - - d.s = maskLen; -} - -void GGLAssembler::extract(integer_t& d, const pixel_t& s, int component) -{ - extract(d, s.reg, - s.format.c[component].h, - s.format.c[component].l, - s.size()); -} - -void GGLAssembler::extract(component_t& d, const pixel_t& s, int component) -{ - integer_t r(d.reg, 32, d.flags); - extract(r, s.reg, - s.format.c[component].h, - s.format.c[component].l, - s.size()); - d = component_t(r); -} - - -void GGLAssembler::expand(integer_t& d, const component_t& s, int dbits) -{ - if (s.l || (s.flags & CLEAR_HI)) { - extract(d, s.reg, s.h, s.l, 32); - expand(d, d, dbits); - } else { - expand(d, integer_t(s.reg, s.size(), s.flags), dbits); - } -} - -void GGLAssembler::expand(component_t& d, const component_t& s, int dbits) -{ - integer_t r(d.reg, 32, d.flags); - expand(r, s, dbits); - d = component_t(r); -} - -void GGLAssembler::expand(integer_t& dst, const integer_t& src, int dbits) -{ - assert(src.size()); - - int sbits = src.size(); - int s = src.reg; - int d = dst.reg; - - // be sure to set 'dst' after we read 'src' as they may be identical - dst.s = dbits; - dst.flags = 0; - - if (dbits<=sbits) { - if (s != d) { - MOV(AL, 0, d, s); - } - return; - } - - if (sbits == 1) { - RSB(AL, 0, d, s, reg_imm(s, LSL, dbits)); - // d = (s<<dbits) - s; - return; - } - - if (dbits % sbits) { - MOV(AL, 0, d, reg_imm(s, LSL, dbits-sbits)); - // d = s << (dbits-sbits); - dbits -= sbits; - do { - ORR(AL, 0, d, d, reg_imm(d, LSR, sbits)); - // d |= d >> sbits; - dbits -= sbits; - sbits *= 2; - } while(dbits>0); - return; - } - - dbits -= sbits; - do { - ORR(AL, 0, d, s, reg_imm(s, LSL, sbits)); - // d |= d<<sbits; - s = d; - dbits -= sbits; - if (sbits*2 < dbits) { - sbits *= 2; - } - } while(dbits>0); -} - -void GGLAssembler::downshift( - pixel_t& d, int component, component_t s, const reg_t& dither) -{ - const needs_t& needs = mBuilderContext.needs; - Scratch scratches(registerFile()); - - int sh = s.h; - int sl = s.l; - int maskHiBits = (sh!=32) ? ((s.flags & CLEAR_HI)?1:0) : 0; - int maskLoBits = (sl!=0) ? ((s.flags & CLEAR_LO)?1:0) : 0; - int sbits = sh - sl; - - int dh = d.format.c[component].h; - int dl = d.format.c[component].l; - int dbits = dh - dl; - int dithering = 0; - - LOGE_IF(sbits<dbits, "sbits (%d) < dbits (%d) in downshift", sbits, dbits); - - if (sbits>dbits) { - // see if we need to dither - dithering = mDithering; - } - - int ireg = d.reg; - if (!(d.flags & FIRST)) { - if (s.flags & CORRUPTIBLE) { - ireg = s.reg; - } else { - ireg = scratches.obtain(); - } - } - d.flags &= ~FIRST; - - if (maskHiBits) { - // we need to mask the high bits (and possibly the lowbits too) - // and we might be able to use immediate mask. - if (!dithering) { - // we don't do this if we only have maskLoBits because we can - // do it more efficiently below (in the case where dl=0) - const int offset = sh - dbits; - if (dbits<=8 && offset >= 0) { - const uint32_t mask = ((1<<dbits)-1) << offset; - if (isValidImmediate(mask) || isValidImmediate(~mask)) { - build_and_immediate(ireg, s.reg, mask, 32); - sl = offset; - s.reg = ireg; - sbits = dbits; - maskLoBits = maskHiBits = 0; - } - } - } else { - // in the dithering case though, we need to preserve the lower bits - const uint32_t mask = ((1<<sbits)-1) << sl; - if (isValidImmediate(mask) || isValidImmediate(~mask)) { - build_and_immediate(ireg, s.reg, mask, 32); - s.reg = ireg; - maskLoBits = maskHiBits = 0; - } - } - } - - // XXX: we could special case (maskHiBits & !maskLoBits) - // like we do for maskLoBits below, but it happens very rarely - // that we have maskHiBits only and the conditions necessary to lead - // to better code (like doing d |= s << 24) - - if (maskHiBits) { - MOV(AL, 0, ireg, reg_imm(s.reg, LSL, 32-sh)); - sl += 32-sh; - sh = 32; - s.reg = ireg; - maskHiBits = 0; - } - - // Downsampling should be performed as follows: - // V * ((1<<dbits)-1) / ((1<<sbits)-1) - // V * [(1<<dbits)/((1<<sbits)-1) - 1/((1<<sbits)-1)] - // V * [1/((1<<sbits)-1)>>dbits - 1/((1<<sbits)-1)] - // V/((1<<(sbits-dbits))-(1>>dbits)) - (V>>sbits)/((1<<sbits)-1)>>sbits - // V/((1<<(sbits-dbits))-(1>>dbits)) - (V>>sbits)/(1-(1>>sbits)) - // - // By approximating (1>>dbits) and (1>>sbits) to 0: - // - // V>>(sbits-dbits) - V>>sbits - // - // A good approximation is V>>(sbits-dbits), - // but better one (needed for dithering) is: - // - // (V>>(sbits-dbits)<<sbits - V)>>sbits - // (V<<dbits - V)>>sbits - // (V - V>>dbits)>>(sbits-dbits) - - // Dithering is done here - if (dithering) { - comment("dithering"); - if (sl) { - MOV(AL, 0, ireg, reg_imm(s.reg, LSR, sl)); - sh -= sl; - sl = 0; - s.reg = ireg; - } - // scaling (V-V>>dbits) - SUB(AL, 0, ireg, s.reg, reg_imm(s.reg, LSR, dbits)); - const int shift = (GGL_DITHER_BITS - (sbits-dbits)); - if (shift>0) ADD(AL, 0, ireg, ireg, reg_imm(dither.reg, LSR, shift)); - else if (shift<0) ADD(AL, 0, ireg, ireg, reg_imm(dither.reg, LSL,-shift)); - else ADD(AL, 0, ireg, ireg, dither.reg); - s.reg = ireg; - } - - if ((maskLoBits|dithering) && (sh > dbits)) { - int shift = sh-dbits; - if (dl) { - MOV(AL, 0, ireg, reg_imm(s.reg, LSR, shift)); - if (ireg == d.reg) { - MOV(AL, 0, d.reg, reg_imm(ireg, LSL, dl)); - } else { - ORR(AL, 0, d.reg, d.reg, reg_imm(ireg, LSL, dl)); - } - } else { - if (ireg == d.reg) { - MOV(AL, 0, d.reg, reg_imm(s.reg, LSR, shift)); - } else { - ORR(AL, 0, d.reg, d.reg, reg_imm(s.reg, LSR, shift)); - } - } - } else { - int shift = sh-dh; - if (shift>0) { - if (ireg == d.reg) { - MOV(AL, 0, d.reg, reg_imm(s.reg, LSR, shift)); - } else { - ORR(AL, 0, d.reg, d.reg, reg_imm(s.reg, LSR, shift)); - } - } else if (shift<0) { - if (ireg == d.reg) { - MOV(AL, 0, d.reg, reg_imm(s.reg, LSL, -shift)); - } else { - ORR(AL, 0, d.reg, d.reg, reg_imm(s.reg, LSL, -shift)); - } - } else { - if (ireg == d.reg) { - if (s.reg != d.reg) { - MOV(AL, 0, d.reg, s.reg); - } - } else { - ORR(AL, 0, d.reg, d.reg, s.reg); - } - } - } -} - -}; // namespace android diff --git a/libpixelflinger/codeflinger/texturing.cpp b/libpixelflinger/codeflinger/texturing.cpp deleted file mode 100644 index 90e65840..00000000 --- a/libpixelflinger/codeflinger/texturing.cpp +++ /dev/null @@ -1,1251 +0,0 @@ -/* libs/pixelflinger/codeflinger/texturing.cpp -** -** Copyright 2006, The Android Open Source Project -** -** Licensed under the Apache License, Version 2.0 (the "License"); -** you may not use this file except in compliance with the License. -** You may obtain a copy of the License at -** -** http://www.apache.org/licenses/LICENSE-2.0 -** -** Unless required by applicable law or agreed to in writing, software -** distributed under the License is distributed on an "AS IS" BASIS, -** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -** See the License for the specific language governing permissions and -** limitations under the License. -*/ - -#include <assert.h> -#include <stdint.h> -#include <stdlib.h> -#include <stdio.h> -#include <sys/types.h> - -#include <cutils/log.h> - -#include "codeflinger/GGLAssembler.h" - - -namespace android { - -// --------------------------------------------------------------------------- - -// iterators are initialized like this: -// (intToFixedCenter(x) * dx)>>16 + x0 -// ((x<<16 + 0x8000) * dx)>>16 + x0 -// ((x<<16)*dx + (0x8000*dx))>>16 + x0 -// ( (x*dx) + dx>>1 ) + x0 -// (x*dx) + (dx>>1 + x0) - -void GGLAssembler::init_iterated_color(fragment_parts_t& parts, const reg_t& x) -{ - context_t const* c = mBuilderContext.c; - const needs_t& needs = mBuilderContext.needs; - - if (mSmooth) { - // NOTE: we could take this case in the mDithering + !mSmooth case, - // but this would use up to 4 more registers for the color components - // for only a little added quality. - // Currently, this causes the system to run out of registers in - // some case (see issue #719496) - - comment("compute initial iterated color (smooth and/or dither case)"); - - parts.iterated_packed = 0; - parts.packed = 0; - - // 0x1: color component - // 0x2: iterators - const int optReload = mOptLevel >> 1; - if (optReload >= 3) parts.reload = 0; // reload nothing - else if (optReload == 2) parts.reload = 2; // reload iterators - else if (optReload == 1) parts.reload = 1; // reload colors - else if (optReload <= 0) parts.reload = 3; // reload both - - if (!mSmooth) { - // we're not smoothing (just dithering), we never have to - // reload the iterators - parts.reload &= ~2; - } - - Scratch scratches(registerFile()); - const int t0 = (parts.reload & 1) ? scratches.obtain() : 0; - const int t1 = (parts.reload & 2) ? scratches.obtain() : 0; - for (int i=0 ; i<4 ; i++) { - if (!mInfo[i].iterated) - continue; - - // this component exists in the destination and is not replaced - // by a texture unit. - const int c = (parts.reload & 1) ? t0 : obtainReg(); - if (i==0) CONTEXT_LOAD(c, iterators.ydady); - if (i==1) CONTEXT_LOAD(c, iterators.ydrdy); - if (i==2) CONTEXT_LOAD(c, iterators.ydgdy); - if (i==3) CONTEXT_LOAD(c, iterators.ydbdy); - parts.argb[i].reg = c; - - if (mInfo[i].smooth) { - parts.argb_dx[i].reg = (parts.reload & 2) ? t1 : obtainReg(); - const int dvdx = parts.argb_dx[i].reg; - CONTEXT_LOAD(dvdx, generated_vars.argb[i].dx); - MLA(AL, 0, c, x.reg, dvdx, c); - - // adjust the color iterator to make sure it won't overflow - if (!mAA) { - // this is not needed when we're using anti-aliasing - // because we will (have to) clamp the components - // anyway. - int end = scratches.obtain(); - MOV(AL, 0, end, reg_imm(parts.count.reg, LSR, 16)); - MLA(AL, 1, end, dvdx, end, c); - SUB(MI, 0, c, c, end); - BIC(AL, 0, c, c, reg_imm(c, ASR, 31)); - scratches.recycle(end); - } - } - - if (parts.reload & 1) { - CONTEXT_STORE(c, generated_vars.argb[i].c); - } - } - } else { - // We're not smoothed, so we can - // just use a packed version of the color and extract the - // components as needed (or not at all if we don't blend) - - // figure out if we need the iterated color - int load = 0; - for (int i=0 ; i<4 ; i++) { - component_info_t& info = mInfo[i]; - if ((info.inDest || info.needed) && !info.replaced) - load |= 1; - } - - parts.iterated_packed = 1; - parts.packed = (!mTextureMachine.mask && !mBlending - && !mFog && !mDithering); - parts.reload = 0; - if (load || parts.packed) { - if (mBlending || mDithering || mInfo[GGLFormat::ALPHA].needed) { - comment("load initial iterated color (8888 packed)"); - parts.iterated.setTo(obtainReg(), - &(c->formats[GGL_PIXEL_FORMAT_RGBA_8888])); - CONTEXT_LOAD(parts.iterated.reg, packed8888); - } else { - comment("load initial iterated color (dest format packed)"); - - parts.iterated.setTo(obtainReg(), &mCbFormat); - - // pre-mask the iterated color - const int bits = parts.iterated.size(); - const uint32_t size = ((bits>=32) ? 0 : (1LU << bits)) - 1; - uint32_t mask = 0; - if (mMasking) { - for (int i=0 ; i<4 ; i++) { - const int component_mask = 1<<i; - const int h = parts.iterated.format.c[i].h; - const int l = parts.iterated.format.c[i].l; - if (h && (!(mMasking & component_mask))) { - mask |= ((1<<(h-l))-1) << l; - } - } - } - - if (mMasking && ((mask & size)==0)) { - // none of the components are present in the mask - } else { - CONTEXT_LOAD(parts.iterated.reg, packed); - if (mCbFormat.size == 1) { - AND(AL, 0, parts.iterated.reg, - parts.iterated.reg, imm(0xFF)); - } else if (mCbFormat.size == 2) { - MOV(AL, 0, parts.iterated.reg, - reg_imm(parts.iterated.reg, LSR, 16)); - } - } - - // pre-mask the iterated color - if (mMasking) { - build_and_immediate(parts.iterated.reg, parts.iterated.reg, - mask, bits); - } - } - } - } -} - -void GGLAssembler::build_iterated_color( - component_t& fragment, - const fragment_parts_t& parts, - int component, - Scratch& regs) -{ - fragment.setTo( regs.obtain(), 0, 32, CORRUPTIBLE); - - if (!mInfo[component].iterated) - return; - - if (parts.iterated_packed) { - // iterated colors are packed, extract the one we need - extract(fragment, parts.iterated, component); - } else { - fragment.h = GGL_COLOR_BITS; - fragment.l = GGL_COLOR_BITS - 8; - fragment.flags |= CLEAR_LO; - // iterated colors are held in their own register, - // (smooth and/or dithering case) - if (parts.reload==3) { - // this implies mSmooth - Scratch scratches(registerFile()); - int dx = scratches.obtain(); - CONTEXT_LOAD(fragment.reg, generated_vars.argb[component].c); - CONTEXT_LOAD(dx, generated_vars.argb[component].dx); - ADD(AL, 0, dx, fragment.reg, dx); - CONTEXT_STORE(dx, generated_vars.argb[component].c); - } else if (parts.reload & 1) { - CONTEXT_LOAD(fragment.reg, generated_vars.argb[component].c); - } else { - // we don't reload, so simply rename the register and mark as - // non CORRUPTIBLE so that the texture env or blending code - // won't modify this (renamed) register - regs.recycle(fragment.reg); - fragment.reg = parts.argb[component].reg; - fragment.flags &= ~CORRUPTIBLE; - } - if (mInfo[component].smooth && mAA) { - // when using smooth shading AND anti-aliasing, we need to clamp - // the iterators because there is always an extra pixel on the - // edges, which most of the time will cause an overflow - // (since technically its outside of the domain). - BIC(AL, 0, fragment.reg, fragment.reg, - reg_imm(fragment.reg, ASR, 31)); - component_sat(fragment); - } - } -} - -// --------------------------------------------------------------------------- - -void GGLAssembler::decodeLogicOpNeeds(const needs_t& needs) -{ - // gather some informations about the components we need to process... - const int opcode = GGL_READ_NEEDS(LOGIC_OP, needs.n) | GGL_CLEAR; - switch(opcode) { - case GGL_COPY: - mLogicOp = 0; - break; - case GGL_CLEAR: - case GGL_SET: - mLogicOp = LOGIC_OP; - break; - case GGL_AND: - case GGL_AND_REVERSE: - case GGL_AND_INVERTED: - case GGL_XOR: - case GGL_OR: - case GGL_NOR: - case GGL_EQUIV: - case GGL_OR_REVERSE: - case GGL_OR_INVERTED: - case GGL_NAND: - mLogicOp = LOGIC_OP|LOGIC_OP_SRC|LOGIC_OP_DST; - break; - case GGL_NOOP: - case GGL_INVERT: - mLogicOp = LOGIC_OP|LOGIC_OP_DST; - break; - case GGL_COPY_INVERTED: - mLogicOp = LOGIC_OP|LOGIC_OP_SRC; - break; - }; -} - -void GGLAssembler::decodeTMUNeeds(const needs_t& needs, context_t const* c) -{ - uint8_t replaced=0; - mTextureMachine.mask = 0; - mTextureMachine.activeUnits = 0; - for (int i=GGL_TEXTURE_UNIT_COUNT-1 ; i>=0 ; i--) { - texture_unit_t& tmu = mTextureMachine.tmu[i]; - if (replaced == 0xF) { - // all components are replaced, skip this TMU. - tmu.format_idx = 0; - tmu.mask = 0; - tmu.replaced = replaced; - continue; - } - tmu.format_idx = GGL_READ_NEEDS(T_FORMAT, needs.t[i]); - tmu.format = c->formats[tmu.format_idx]; - tmu.bits = tmu.format.size*8; - tmu.swrap = GGL_READ_NEEDS(T_S_WRAP, needs.t[i]); - tmu.twrap = GGL_READ_NEEDS(T_T_WRAP, needs.t[i]); - tmu.env = ggl_needs_to_env(GGL_READ_NEEDS(T_ENV, needs.t[i])); - tmu.pot = GGL_READ_NEEDS(T_POT, needs.t[i]); - tmu.linear = GGL_READ_NEEDS(T_LINEAR, needs.t[i]) - && tmu.format.size!=3; // XXX: only 8, 16 and 32 modes for now - - // 5551 linear filtering is not supported - if (tmu.format_idx == GGL_PIXEL_FORMAT_RGBA_5551) - tmu.linear = 0; - - tmu.mask = 0; - tmu.replaced = replaced; - - if (tmu.format_idx) { - mTextureMachine.activeUnits++; - if (tmu.format.c[0].h) tmu.mask |= 0x1; - if (tmu.format.c[1].h) tmu.mask |= 0x2; - if (tmu.format.c[2].h) tmu.mask |= 0x4; - if (tmu.format.c[3].h) tmu.mask |= 0x8; - if (tmu.env == GGL_REPLACE) { - replaced |= tmu.mask; - } else if (tmu.env == GGL_DECAL) { - if (!tmu.format.c[GGLFormat::ALPHA].h) { - // if we don't have alpha, decal does nothing - tmu.mask = 0; - } else { - // decal always ignores At - tmu.mask &= ~(1<<GGLFormat::ALPHA); - } - } - } - mTextureMachine.mask |= tmu.mask; - //printf("%d: mask=%08lx, replaced=%08lx\n", - // i, int(tmu.mask), int(tmu.replaced)); - } - mTextureMachine.replaced = replaced; - mTextureMachine.directTexture = 0; - //printf("replaced=%08lx\n", mTextureMachine.replaced); -} - - -void GGLAssembler::init_textures( - tex_coord_t* coords, - const reg_t& x, const reg_t& y) -{ - context_t const* c = mBuilderContext.c; - const needs_t& needs = mBuilderContext.needs; - int Rctx = mBuilderContext.Rctx; - int Rx = x.reg; - int Ry = y.reg; - - if (mTextureMachine.mask) { - comment("compute texture coordinates"); - } - - // init texture coordinates for each tmu - const int cb_format_idx = GGL_READ_NEEDS(CB_FORMAT, needs.n); - const bool multiTexture = mTextureMachine.activeUnits > 1; - for (int i=0 ; i<GGL_TEXTURE_UNIT_COUNT; i++) { - const texture_unit_t& tmu = mTextureMachine.tmu[i]; - if (tmu.format_idx == 0) - continue; - if ((tmu.swrap == GGL_NEEDS_WRAP_11) && - (tmu.twrap == GGL_NEEDS_WRAP_11)) - { - // 1:1 texture - pointer_t& txPtr = coords[i].ptr; - txPtr.setTo(obtainReg(), tmu.bits); - CONTEXT_LOAD(txPtr.reg, state.texture[i].iterators.ydsdy); - ADD(AL, 0, Rx, Rx, reg_imm(txPtr.reg, ASR, 16)); // x += (s>>16) - CONTEXT_LOAD(txPtr.reg, state.texture[i].iterators.ydtdy); - ADD(AL, 0, Ry, Ry, reg_imm(txPtr.reg, ASR, 16)); // y += (t>>16) - // merge base & offset - CONTEXT_LOAD(txPtr.reg, generated_vars.texture[i].stride); - SMLABB(AL, Rx, Ry, txPtr.reg, Rx); // x+y*stride - CONTEXT_LOAD(txPtr.reg, generated_vars.texture[i].data); - base_offset(txPtr, txPtr, Rx); - } else { - Scratch scratches(registerFile()); - reg_t& s = coords[i].s; - reg_t& t = coords[i].t; - // s = (x * dsdx)>>16 + ydsdy - // s = (x * dsdx)>>16 + (y*dsdy)>>16 + s0 - // t = (x * dtdx)>>16 + ydtdy - // t = (x * dtdx)>>16 + (y*dtdy)>>16 + t0 - s.setTo(obtainReg()); - t.setTo(obtainReg()); - const int need_w = GGL_READ_NEEDS(W, needs.n); - if (need_w) { - CONTEXT_LOAD(s.reg, state.texture[i].iterators.ydsdy); - CONTEXT_LOAD(t.reg, state.texture[i].iterators.ydtdy); - } else { - int ydsdy = scratches.obtain(); - int ydtdy = scratches.obtain(); - CONTEXT_LOAD(s.reg, generated_vars.texture[i].dsdx); - CONTEXT_LOAD(ydsdy, state.texture[i].iterators.ydsdy); - CONTEXT_LOAD(t.reg, generated_vars.texture[i].dtdx); - CONTEXT_LOAD(ydtdy, state.texture[i].iterators.ydtdy); - MLA(AL, 0, s.reg, Rx, s.reg, ydsdy); - MLA(AL, 0, t.reg, Rx, t.reg, ydtdy); - } - - if ((mOptLevel&1)==0) { - CONTEXT_STORE(s.reg, generated_vars.texture[i].spill[0]); - CONTEXT_STORE(t.reg, generated_vars.texture[i].spill[1]); - recycleReg(s.reg); - recycleReg(t.reg); - } - } - - // direct texture? - if (!multiTexture && !mBlending && !mDithering && !mFog && - cb_format_idx == tmu.format_idx && !tmu.linear && - mTextureMachine.replaced == tmu.mask) - { - mTextureMachine.directTexture = i + 1; - } - } -} - -void GGLAssembler::build_textures( fragment_parts_t& parts, - Scratch& regs) -{ - context_t const* c = mBuilderContext.c; - const needs_t& needs = mBuilderContext.needs; - int Rctx = mBuilderContext.Rctx; - - // We don't have a way to spill registers automatically - // spill depth and AA regs, when we know we may have to. - // build the spill list... - uint32_t spill_list = 0; - for (int i=0 ; i<GGL_TEXTURE_UNIT_COUNT; i++) { - const texture_unit_t& tmu = mTextureMachine.tmu[i]; - if (tmu.format_idx == 0) - continue; - if (tmu.linear) { - // we may run out of register if we have linear filtering - // at 1 or 4 bytes / pixel on any texture unit. - if (tmu.format.size == 1) { - // if depth and AA enabled, we'll run out of 1 register - if (parts.z.reg > 0 && parts.covPtr.reg > 0) - spill_list |= 1<<parts.covPtr.reg; - } - if (tmu.format.size == 4) { - // if depth or AA enabled, we'll run out of 1 or 2 registers - if (parts.z.reg > 0) - spill_list |= 1<<parts.z.reg; - if (parts.covPtr.reg > 0) - spill_list |= 1<<parts.covPtr.reg; - } - } - } - - Spill spill(registerFile(), *this, spill_list); - - const bool multiTexture = mTextureMachine.activeUnits > 1; - for (int i=0 ; i<GGL_TEXTURE_UNIT_COUNT; i++) { - const texture_unit_t& tmu = mTextureMachine.tmu[i]; - if (tmu.format_idx == 0) - continue; - - pointer_t& txPtr = parts.coords[i].ptr; - pixel_t& texel = parts.texel[i]; - - // repeat... - if ((tmu.swrap == GGL_NEEDS_WRAP_11) && - (tmu.twrap == GGL_NEEDS_WRAP_11)) - { // 1:1 textures - comment("fetch texel"); - texel.setTo(regs.obtain(), &tmu.format); - load(txPtr, texel, WRITE_BACK); - } else { - Scratch scratches(registerFile()); - reg_t& s = parts.coords[i].s; - reg_t& t = parts.coords[i].t; - if ((mOptLevel&1)==0) { - comment("reload s/t (multitexture or linear filtering)"); - s.reg = scratches.obtain(); - t.reg = scratches.obtain(); - CONTEXT_LOAD(s.reg, generated_vars.texture[i].spill[0]); - CONTEXT_LOAD(t.reg, generated_vars.texture[i].spill[1]); - } - - comment("compute repeat/clamp"); - int u = scratches.obtain(); - int v = scratches.obtain(); - int width = scratches.obtain(); - int height = scratches.obtain(); - int U = 0; - int V = 0; - - CONTEXT_LOAD(width, generated_vars.texture[i].width); - CONTEXT_LOAD(height, generated_vars.texture[i].height); - - int FRAC_BITS = 0; - if (tmu.linear) { - // linear interpolation - if (tmu.format.size == 1) { - // for 8-bits textures, we can afford - // 7 bits of fractional precision at no - // additional cost (we can't do 8 bits - // because filter8 uses signed 16 bits muls) - FRAC_BITS = 7; - } else if (tmu.format.size == 2) { - // filter16() is internally limited to 4 bits, so: - // FRAC_BITS=2 generates less instructions, - // FRAC_BITS=3,4,5 creates unpleasant artifacts, - // FRAC_BITS=6+ looks good - FRAC_BITS = 6; - } else if (tmu.format.size == 4) { - // filter32() is internally limited to 8 bits, so: - // FRAC_BITS=4 looks good - // FRAC_BITS=5+ looks better, but generates 3 extra ipp - FRAC_BITS = 6; - } else { - // for all other cases we use 4 bits. - FRAC_BITS = 4; - } - } - wrapping(u, s.reg, width, tmu.swrap, FRAC_BITS); - wrapping(v, t.reg, height, tmu.twrap, FRAC_BITS); - - if (tmu.linear) { - comment("compute linear filtering offsets"); - // pixel size scale - const int shift = 31 - gglClz(tmu.format.size); - U = scratches.obtain(); - V = scratches.obtain(); - - // sample the texel center - SUB(AL, 0, u, u, imm(1<<(FRAC_BITS-1))); - SUB(AL, 0, v, v, imm(1<<(FRAC_BITS-1))); - - // get the fractionnal part of U,V - AND(AL, 0, U, u, imm((1<<FRAC_BITS)-1)); - AND(AL, 0, V, v, imm((1<<FRAC_BITS)-1)); - - // compute width-1 and height-1 - SUB(AL, 0, width, width, imm(1)); - SUB(AL, 0, height, height, imm(1)); - - // get the integer part of U,V and clamp/wrap - // and compute offset to the next texel - if (tmu.swrap == GGL_NEEDS_WRAP_REPEAT) { - // u has already been REPEATed - MOV(AL, 1, u, reg_imm(u, ASR, FRAC_BITS)); - MOV(MI, 0, u, width); - CMP(AL, u, width); - MOV(LT, 0, width, imm(1 << shift)); - if (shift) - MOV(GE, 0, width, reg_imm(width, LSL, shift)); - RSB(GE, 0, width, width, imm(0)); - } else { - // u has not been CLAMPed yet - // algorithm: - // if ((u>>4) >= width) - // u = width<<4 - // width = 0 - // else - // width = 1<<shift - // u = u>>4; // get integer part - // if (u<0) - // u = 0 - // width = 0 - // generated_vars.rt = width - - CMP(AL, width, reg_imm(u, ASR, FRAC_BITS)); - MOV(LE, 0, u, reg_imm(width, LSL, FRAC_BITS)); - MOV(LE, 0, width, imm(0)); - MOV(GT, 0, width, imm(1 << shift)); - MOV(AL, 1, u, reg_imm(u, ASR, FRAC_BITS)); - MOV(MI, 0, u, imm(0)); - MOV(MI, 0, width, imm(0)); - } - CONTEXT_STORE(width, generated_vars.rt); - - const int stride = width; - CONTEXT_LOAD(stride, generated_vars.texture[i].stride); - if (tmu.twrap == GGL_NEEDS_WRAP_REPEAT) { - // v has already been REPEATed - MOV(AL, 1, v, reg_imm(v, ASR, FRAC_BITS)); - MOV(MI, 0, v, height); - CMP(AL, v, height); - MOV(LT, 0, height, imm(1 << shift)); - if (shift) - MOV(GE, 0, height, reg_imm(height, LSL, shift)); - RSB(GE, 0, height, height, imm(0)); - MUL(AL, 0, height, stride, height); - } else { - // u has not been CLAMPed yet - CMP(AL, height, reg_imm(v, ASR, FRAC_BITS)); - MOV(LE, 0, v, reg_imm(height, LSL, FRAC_BITS)); - MOV(LE, 0, height, imm(0)); - if (shift) { - MOV(GT, 0, height, reg_imm(stride, LSL, shift)); - } else { - MOV(GT, 0, height, stride); - } - MOV(AL, 1, v, reg_imm(v, ASR, FRAC_BITS)); - MOV(MI, 0, v, imm(0)); - MOV(MI, 0, height, imm(0)); - } - CONTEXT_STORE(height, generated_vars.lb); - } - - scratches.recycle(width); - scratches.recycle(height); - - // iterate texture coordinates... - comment("iterate s,t"); - int dsdx = scratches.obtain(); - int dtdx = scratches.obtain(); - CONTEXT_LOAD(dsdx, generated_vars.texture[i].dsdx); - CONTEXT_LOAD(dtdx, generated_vars.texture[i].dtdx); - ADD(AL, 0, s.reg, s.reg, dsdx); - ADD(AL, 0, t.reg, t.reg, dtdx); - if ((mOptLevel&1)==0) { - CONTEXT_STORE(s.reg, generated_vars.texture[i].spill[0]); - CONTEXT_STORE(t.reg, generated_vars.texture[i].spill[1]); - scratches.recycle(s.reg); - scratches.recycle(t.reg); - } - scratches.recycle(dsdx); - scratches.recycle(dtdx); - - // merge base & offset... - comment("merge base & offset"); - texel.setTo(regs.obtain(), &tmu.format); - txPtr.setTo(texel.reg, tmu.bits); - int stride = scratches.obtain(); - CONTEXT_LOAD(stride, generated_vars.texture[i].stride); - CONTEXT_LOAD(txPtr.reg, generated_vars.texture[i].data); - SMLABB(AL, u, v, stride, u); // u+v*stride - base_offset(txPtr, txPtr, u); - - // load texel - if (!tmu.linear) { - comment("fetch texel"); - load(txPtr, texel, 0); - } else { - // recycle registers we don't need anymore - scratches.recycle(u); - scratches.recycle(v); - scratches.recycle(stride); - - comment("fetch texel, bilinear"); - switch (tmu.format.size) { - case 1: filter8(parts, texel, tmu, U, V, txPtr, FRAC_BITS); break; - case 2: filter16(parts, texel, tmu, U, V, txPtr, FRAC_BITS); break; - case 3: filter24(parts, texel, tmu, U, V, txPtr, FRAC_BITS); break; - case 4: filter32(parts, texel, tmu, U, V, txPtr, FRAC_BITS); break; - } - } - } - } -} - -void GGLAssembler::build_iterate_texture_coordinates( - const fragment_parts_t& parts) -{ - const bool multiTexture = mTextureMachine.activeUnits > 1; - for (int i=0 ; i<GGL_TEXTURE_UNIT_COUNT; i++) { - const texture_unit_t& tmu = mTextureMachine.tmu[i]; - if (tmu.format_idx == 0) - continue; - - if ((tmu.swrap == GGL_NEEDS_WRAP_11) && - (tmu.twrap == GGL_NEEDS_WRAP_11)) - { // 1:1 textures - const pointer_t& txPtr = parts.coords[i].ptr; - ADD(AL, 0, txPtr.reg, txPtr.reg, imm(txPtr.size>>3)); - } else { - Scratch scratches(registerFile()); - int s = parts.coords[i].s.reg; - int t = parts.coords[i].t.reg; - if ((mOptLevel&1)==0) { - s = scratches.obtain(); - t = scratches.obtain(); - CONTEXT_LOAD(s, generated_vars.texture[i].spill[0]); - CONTEXT_LOAD(t, generated_vars.texture[i].spill[1]); - } - int dsdx = scratches.obtain(); - int dtdx = scratches.obtain(); - CONTEXT_LOAD(dsdx, generated_vars.texture[i].dsdx); - CONTEXT_LOAD(dtdx, generated_vars.texture[i].dtdx); - ADD(AL, 0, s, s, dsdx); - ADD(AL, 0, t, t, dtdx); - if ((mOptLevel&1)==0) { - CONTEXT_STORE(s, generated_vars.texture[i].spill[0]); - CONTEXT_STORE(t, generated_vars.texture[i].spill[1]); - } - } - } -} - -void GGLAssembler::filter8( - const fragment_parts_t& parts, - pixel_t& texel, const texture_unit_t& tmu, - int U, int V, pointer_t& txPtr, - int FRAC_BITS) -{ - if (tmu.format.components != GGL_ALPHA && - tmu.format.components != GGL_LUMINANCE) - { - // this is a packed format, and we don't support - // linear filtering (it's probably RGB 332) - // Should not happen with OpenGL|ES - LDRB(AL, texel.reg, txPtr.reg); - return; - } - - // ------------------------ - // about ~22 cycles / pixel - Scratch scratches(registerFile()); - - int pixel= scratches.obtain(); - int d = scratches.obtain(); - int u = scratches.obtain(); - int k = scratches.obtain(); - int rt = scratches.obtain(); - int lb = scratches.obtain(); - - // RB -> U * V - - CONTEXT_LOAD(rt, generated_vars.rt); - CONTEXT_LOAD(lb, generated_vars.lb); - - int offset = pixel; - ADD(AL, 0, offset, lb, rt); - LDRB(AL, pixel, txPtr.reg, reg_scale_pre(offset)); - SMULBB(AL, u, U, V); - SMULBB(AL, d, pixel, u); - RSB(AL, 0, k, u, imm(1<<(FRAC_BITS*2))); - - // LB -> (1-U) * V - RSB(AL, 0, U, U, imm(1<<FRAC_BITS)); - LDRB(AL, pixel, txPtr.reg, reg_scale_pre(lb)); - SMULBB(AL, u, U, V); - SMLABB(AL, d, pixel, u, d); - SUB(AL, 0, k, k, u); - - // LT -> (1-U)*(1-V) - RSB(AL, 0, V, V, imm(1<<FRAC_BITS)); - LDRB(AL, pixel, txPtr.reg); - SMULBB(AL, u, U, V); - SMLABB(AL, d, pixel, u, d); - - // RT -> U*(1-V) - LDRB(AL, pixel, txPtr.reg, reg_scale_pre(rt)); - SUB(AL, 0, u, k, u); - SMLABB(AL, texel.reg, pixel, u, d); - - for (int i=0 ; i<4 ; i++) { - if (!texel.format.c[i].h) continue; - texel.format.c[i].h = FRAC_BITS*2+8; - texel.format.c[i].l = FRAC_BITS*2; // keeping 8 bits in enough - } - texel.format.size = 4; - texel.format.bitsPerPixel = 32; - texel.flags |= CLEAR_LO; -} - -void GGLAssembler::filter16( - const fragment_parts_t& parts, - pixel_t& texel, const texture_unit_t& tmu, - int U, int V, pointer_t& txPtr, - int FRAC_BITS) -{ - // compute the mask - // XXX: it would be nice if the mask below could be computed - // automatically. - uint32_t mask = 0; - int shift = 0; - int prec = 0; - switch (tmu.format_idx) { - case GGL_PIXEL_FORMAT_RGB_565: - // source: 00000ggg.ggg00000 | rrrrr000.000bbbbb - // result: gggggggg.gggrrrrr | rrrrr0bb.bbbbbbbb - mask = 0x07E0F81F; - shift = 16; - prec = 5; - break; - case GGL_PIXEL_FORMAT_RGBA_4444: - // 0000,1111,0000,1111 | 0000,1111,0000,1111 - mask = 0x0F0F0F0F; - shift = 12; - prec = 4; - break; - case GGL_PIXEL_FORMAT_LA_88: - // 0000,0000,1111,1111 | 0000,0000,1111,1111 - // AALL -> 00AA | 00LL - mask = 0x00FF00FF; - shift = 8; - prec = 8; - break; - default: - // unsupported format, do something sensical... - LOGE("Unsupported 16-bits texture format (%d)", tmu.format_idx); - LDRH(AL, texel.reg, txPtr.reg); - return; - } - - const int adjust = FRAC_BITS*2 - prec; - const int round = 0; - - // update the texel format - texel.format.size = 4; - texel.format.bitsPerPixel = 32; - texel.flags |= CLEAR_HI|CLEAR_LO; - for (int i=0 ; i<4 ; i++) { - if (!texel.format.c[i].h) continue; - const uint32_t offset = (mask & tmu.format.mask(i)) ? 0 : shift; - texel.format.c[i].h = tmu.format.c[i].h + offset + prec; - texel.format.c[i].l = texel.format.c[i].h - (tmu.format.bits(i) + prec); - } - - // ------------------------ - // about ~40 cycles / pixel - Scratch scratches(registerFile()); - - int pixel= scratches.obtain(); - int d = scratches.obtain(); - int u = scratches.obtain(); - int k = scratches.obtain(); - - // RB -> U * V - int offset = pixel; - CONTEXT_LOAD(offset, generated_vars.rt); - CONTEXT_LOAD(u, generated_vars.lb); - ADD(AL, 0, offset, offset, u); - - LDRH(AL, pixel, txPtr.reg, reg_pre(offset)); - SMULBB(AL, u, U, V); - ORR(AL, 0, pixel, pixel, reg_imm(pixel, LSL, shift)); - build_and_immediate(pixel, pixel, mask, 32); - if (adjust) { - if (round) - ADD(AL, 0, u, u, imm(1<<(adjust-1))); - MOV(AL, 0, u, reg_imm(u, LSR, adjust)); - } - MUL(AL, 0, d, pixel, u); - RSB(AL, 0, k, u, imm(1<<prec)); - - // LB -> (1-U) * V - CONTEXT_LOAD(offset, generated_vars.lb); - RSB(AL, 0, U, U, imm(1<<FRAC_BITS)); - LDRH(AL, pixel, txPtr.reg, reg_pre(offset)); - SMULBB(AL, u, U, V); - ORR(AL, 0, pixel, pixel, reg_imm(pixel, LSL, shift)); - build_and_immediate(pixel, pixel, mask, 32); - if (adjust) { - if (round) - ADD(AL, 0, u, u, imm(1<<(adjust-1))); - MOV(AL, 0, u, reg_imm(u, LSR, adjust)); - } - MLA(AL, 0, d, pixel, u, d); - SUB(AL, 0, k, k, u); - - // LT -> (1-U)*(1-V) - RSB(AL, 0, V, V, imm(1<<FRAC_BITS)); - LDRH(AL, pixel, txPtr.reg); - SMULBB(AL, u, U, V); - ORR(AL, 0, pixel, pixel, reg_imm(pixel, LSL, shift)); - build_and_immediate(pixel, pixel, mask, 32); - if (adjust) { - if (round) - ADD(AL, 0, u, u, imm(1<<(adjust-1))); - MOV(AL, 0, u, reg_imm(u, LSR, adjust)); - } - MLA(AL, 0, d, pixel, u, d); - - // RT -> U*(1-V) - CONTEXT_LOAD(offset, generated_vars.rt); - LDRH(AL, pixel, txPtr.reg, reg_pre(offset)); - SUB(AL, 0, u, k, u); - ORR(AL, 0, pixel, pixel, reg_imm(pixel, LSL, shift)); - build_and_immediate(pixel, pixel, mask, 32); - MLA(AL, 0, texel.reg, pixel, u, d); -} - -void GGLAssembler::filter24( - const fragment_parts_t& parts, - pixel_t& texel, const texture_unit_t& tmu, - int U, int V, pointer_t& txPtr, - int FRAC_BITS) -{ - // not supported yet (currently disabled) - load(txPtr, texel, 0); -} - -void GGLAssembler::filter32( - const fragment_parts_t& parts, - pixel_t& texel, const texture_unit_t& tmu, - int U, int V, pointer_t& txPtr, - int FRAC_BITS) -{ - const int adjust = FRAC_BITS*2 - 8; - const int round = 0; - - // ------------------------ - // about ~38 cycles / pixel - Scratch scratches(registerFile()); - - int pixel= scratches.obtain(); - int dh = scratches.obtain(); - int u = scratches.obtain(); - int k = scratches.obtain(); - - int temp = scratches.obtain(); - int dl = scratches.obtain(); - int mask = scratches.obtain(); - - MOV(AL, 0, mask, imm(0xFF)); - ORR(AL, 0, mask, mask, imm(0xFF0000)); - - // RB -> U * V - int offset = pixel; - CONTEXT_LOAD(offset, generated_vars.rt); - CONTEXT_LOAD(u, generated_vars.lb); - ADD(AL, 0, offset, offset, u); - - LDR(AL, pixel, txPtr.reg, reg_scale_pre(offset)); - SMULBB(AL, u, U, V); - AND(AL, 0, temp, mask, pixel); - if (adjust) { - if (round) - ADD(AL, 0, u, u, imm(1<<(adjust-1))); - MOV(AL, 0, u, reg_imm(u, LSR, adjust)); - } - MUL(AL, 0, dh, temp, u); - AND(AL, 0, temp, mask, reg_imm(pixel, LSR, 8)); - MUL(AL, 0, dl, temp, u); - RSB(AL, 0, k, u, imm(0x100)); - - // LB -> (1-U) * V - CONTEXT_LOAD(offset, generated_vars.lb); - RSB(AL, 0, U, U, imm(1<<FRAC_BITS)); - LDR(AL, pixel, txPtr.reg, reg_scale_pre(offset)); - SMULBB(AL, u, U, V); - AND(AL, 0, temp, mask, pixel); - if (adjust) { - if (round) - ADD(AL, 0, u, u, imm(1<<(adjust-1))); - MOV(AL, 0, u, reg_imm(u, LSR, adjust)); - } - MLA(AL, 0, dh, temp, u, dh); - AND(AL, 0, temp, mask, reg_imm(pixel, LSR, 8)); - MLA(AL, 0, dl, temp, u, dl); - SUB(AL, 0, k, k, u); - - // LT -> (1-U)*(1-V) - RSB(AL, 0, V, V, imm(1<<FRAC_BITS)); - LDR(AL, pixel, txPtr.reg); - SMULBB(AL, u, U, V); - AND(AL, 0, temp, mask, pixel); - if (adjust) { - if (round) - ADD(AL, 0, u, u, imm(1<<(adjust-1))); - MOV(AL, 0, u, reg_imm(u, LSR, adjust)); - } - MLA(AL, 0, dh, temp, u, dh); - AND(AL, 0, temp, mask, reg_imm(pixel, LSR, 8)); - MLA(AL, 0, dl, temp, u, dl); - - // RT -> U*(1-V) - CONTEXT_LOAD(offset, generated_vars.rt); - LDR(AL, pixel, txPtr.reg, reg_scale_pre(offset)); - SUB(AL, 0, u, k, u); - AND(AL, 0, temp, mask, pixel); - MLA(AL, 0, dh, temp, u, dh); - AND(AL, 0, temp, mask, reg_imm(pixel, LSR, 8)); - MLA(AL, 0, dl, temp, u, dl); - - AND(AL, 0, dh, mask, reg_imm(dh, LSR, 8)); - AND(AL, 0, dl, dl, reg_imm(mask, LSL, 8)); - ORR(AL, 0, texel.reg, dh, dl); -} - -void GGLAssembler::build_texture_environment( - component_t& fragment, - const fragment_parts_t& parts, - int component, - Scratch& regs) -{ - const uint32_t component_mask = 1<<component; - const bool multiTexture = mTextureMachine.activeUnits > 1; - for (int i=0 ; i<GGL_TEXTURE_UNIT_COUNT ; i++) { - texture_unit_t& tmu = mTextureMachine.tmu[i]; - - if (tmu.mask & component_mask) { - // replace or modulate with this texture - if ((tmu.replaced & component_mask) == 0) { - // not replaced by a later tmu... - - Scratch scratches(registerFile()); - pixel_t texel(parts.texel[i]); - if (multiTexture && - tmu.swrap == GGL_NEEDS_WRAP_11 && - tmu.twrap == GGL_NEEDS_WRAP_11) - { - texel.reg = scratches.obtain(); - texel.flags |= CORRUPTIBLE; - comment("fetch texel (multitexture 1:1)"); - load(parts.coords[i].ptr, texel, WRITE_BACK); - } - - component_t incoming(fragment); - modify(fragment, regs); - - switch (tmu.env) { - case GGL_REPLACE: - extract(fragment, texel, component); - break; - case GGL_MODULATE: - modulate(fragment, incoming, texel, component); - break; - case GGL_DECAL: - decal(fragment, incoming, texel, component); - break; - case GGL_BLEND: - blend(fragment, incoming, texel, component, i); - break; - case GGL_ADD: - add(fragment, incoming, texel, component); - break; - } - } - } - } -} - -// --------------------------------------------------------------------------- - -void GGLAssembler::wrapping( - int d, - int coord, int size, - int tx_wrap, int tx_linear) -{ - // notes: - // if tx_linear is set, we need 4 extra bits of precision on the result - // SMULL/UMULL is 3 cycles - Scratch scratches(registerFile()); - int c = coord; - if (tx_wrap == GGL_NEEDS_WRAP_REPEAT) { - // UMULL takes 4 cycles (interlocked), and we can get away with - // 2 cycles using SMULWB, but we're loosing 16 bits of precision - // out of 32 (this is not a problem because the iterator keeps - // its full precision) - // UMULL(AL, 0, size, d, c, size); - // note: we can't use SMULTB because it's signed. - MOV(AL, 0, d, reg_imm(c, LSR, 16-tx_linear)); - SMULWB(AL, d, d, size); - } else if (tx_wrap == GGL_NEEDS_WRAP_CLAMP_TO_EDGE) { - if (tx_linear) { - // 1 cycle - MOV(AL, 0, d, reg_imm(coord, ASR, 16-tx_linear)); - } else { - // 4 cycles (common case) - MOV(AL, 0, d, reg_imm(coord, ASR, 16)); - BIC(AL, 0, d, d, reg_imm(d, ASR, 31)); - CMP(AL, d, size); - SUB(GE, 0, d, size, imm(1)); - } - } -} - -// --------------------------------------------------------------------------- - -void GGLAssembler::modulate( - component_t& dest, - const component_t& incoming, - const pixel_t& incomingTexel, int component) -{ - Scratch locals(registerFile()); - integer_t texel(locals.obtain(), 32, CORRUPTIBLE); - extract(texel, incomingTexel, component); - - const int Nt = texel.size(); - // Nt should always be less than 10 bits because it comes - // from the TMU. - - int Ni = incoming.size(); - // Ni could be big because it comes from previous MODULATEs - - if (Nt == 1) { - // texel acts as a bit-mask - // dest = incoming & ((texel << incoming.h)-texel) - RSB(AL, 0, dest.reg, texel.reg, reg_imm(texel.reg, LSL, incoming.h)); - AND(AL, 0, dest.reg, dest.reg, incoming.reg); - dest.l = incoming.l; - dest.h = incoming.h; - dest.flags |= (incoming.flags & CLEAR_LO); - } else if (Ni == 1) { - MOV(AL, 0, dest.reg, reg_imm(incoming.reg, LSL, 31-incoming.h)); - AND(AL, 0, dest.reg, texel.reg, reg_imm(dest.reg, ASR, 31)); - dest.l = 0; - dest.h = Nt; - } else { - int inReg = incoming.reg; - int shift = incoming.l; - if ((Nt + Ni) > 32) { - // we will overflow, reduce the precision of Ni to 8 bits - // (Note Nt cannot be more than 10 bits which happens with - // 565 textures and GGL_LINEAR) - shift += Ni-8; - Ni = 8; - } - - // modulate by the component with the lowest precision - if (Nt >= Ni) { - if (shift) { - // XXX: we should be able to avoid this shift - // when shift==16 && Nt<16 && Ni<16, in which - // we could use SMULBT below. - MOV(AL, 0, dest.reg, reg_imm(inReg, LSR, shift)); - inReg = dest.reg; - shift = 0; - } - // operation: (Cf*Ct)/((1<<Ni)-1) - // approximated with: Cf*(Ct + Ct>>(Ni-1))>>Ni - // this operation doesn't change texel's size - ADD(AL, 0, dest.reg, inReg, reg_imm(inReg, LSR, Ni-1)); - if (Nt<16 && Ni<16) SMULBB(AL, dest.reg, texel.reg, dest.reg); - else MUL(AL, 0, dest.reg, texel.reg, dest.reg); - dest.l = Ni; - dest.h = Nt + Ni; - } else { - if (shift && (shift != 16)) { - // if shift==16, we can use 16-bits mul instructions later - MOV(AL, 0, dest.reg, reg_imm(inReg, LSR, shift)); - inReg = dest.reg; - shift = 0; - } - // operation: (Cf*Ct)/((1<<Nt)-1) - // approximated with: Ct*(Cf + Cf>>(Nt-1))>>Nt - // this operation doesn't change incoming's size - Scratch scratches(registerFile()); - int t = (texel.flags & CORRUPTIBLE) ? texel.reg : dest.reg; - if (t == inReg) - t = scratches.obtain(); - ADD(AL, 0, t, texel.reg, reg_imm(texel.reg, LSR, Nt-1)); - if (Nt<16 && Ni<16) { - if (shift==16) SMULBT(AL, dest.reg, t, inReg); - else SMULBB(AL, dest.reg, t, inReg); - } else MUL(AL, 0, dest.reg, t, inReg); - dest.l = Nt; - dest.h = Nt + Ni; - } - - // low bits are not valid - dest.flags |= CLEAR_LO; - - // no need to keep more than 8 bits/component - if (dest.size() > 8) - dest.l = dest.h-8; - } -} - -void GGLAssembler::decal( - component_t& dest, - const component_t& incoming, - const pixel_t& incomingTexel, int component) -{ - // RGBA: - // Cv = Cf*(1 - At) + Ct*At = Cf + (Ct - Cf)*At - // Av = Af - Scratch locals(registerFile()); - integer_t texel(locals.obtain(), 32, CORRUPTIBLE); - integer_t factor(locals.obtain(), 32, CORRUPTIBLE); - extract(texel, incomingTexel, component); - extract(factor, incomingTexel, GGLFormat::ALPHA); - - // no need to keep more than 8-bits for decal - int Ni = incoming.size(); - int shift = incoming.l; - if (Ni > 8) { - shift += Ni-8; - Ni = 8; - } - integer_t incomingNorm(incoming.reg, Ni, incoming.flags); - if (shift) { - MOV(AL, 0, dest.reg, reg_imm(incomingNorm.reg, LSR, shift)); - incomingNorm.reg = dest.reg; - incomingNorm.flags |= CORRUPTIBLE; - } - ADD(AL, 0, factor.reg, factor.reg, reg_imm(factor.reg, LSR, factor.s-1)); - build_blendOneMinusFF(dest, factor, incomingNorm, texel); -} - -void GGLAssembler::blend( - component_t& dest, - const component_t& incoming, - const pixel_t& incomingTexel, int component, int tmu) -{ - // RGBA: - // Cv = (1 - Ct)*Cf + Ct*Cc = Cf + (Cc - Cf)*Ct - // Av = At*Af - - if (component == GGLFormat::ALPHA) { - modulate(dest, incoming, incomingTexel, component); - return; - } - - Scratch locals(registerFile()); - integer_t color(locals.obtain(), 8, CORRUPTIBLE); - integer_t factor(locals.obtain(), 32, CORRUPTIBLE); - LDRB(AL, color.reg, mBuilderContext.Rctx, - immed12_pre(GGL_OFFSETOF(state.texture[tmu].env_color[component]))); - extract(factor, incomingTexel, component); - - // no need to keep more than 8-bits for blend - int Ni = incoming.size(); - int shift = incoming.l; - if (Ni > 8) { - shift += Ni-8; - Ni = 8; - } - integer_t incomingNorm(incoming.reg, Ni, incoming.flags); - if (shift) { - MOV(AL, 0, dest.reg, reg_imm(incomingNorm.reg, LSR, shift)); - incomingNorm.reg = dest.reg; - incomingNorm.flags |= CORRUPTIBLE; - } - ADD(AL, 0, factor.reg, factor.reg, reg_imm(factor.reg, LSR, factor.s-1)); - build_blendOneMinusFF(dest, factor, incomingNorm, color); -} - -void GGLAssembler::add( - component_t& dest, - const component_t& incoming, - const pixel_t& incomingTexel, int component) -{ - // RGBA: - // Cv = Cf + Ct; - Scratch locals(registerFile()); - - component_t incomingTemp(incoming); - - // use "dest" as a temporary for extracting the texel, unless "dest" - // overlaps "incoming". - integer_t texel(dest.reg, 32, CORRUPTIBLE); - if (dest.reg == incomingTemp.reg) - texel.reg = locals.obtain(); - extract(texel, incomingTexel, component); - - if (texel.s < incomingTemp.size()) { - expand(texel, texel, incomingTemp.size()); - } else if (texel.s > incomingTemp.size()) { - if (incomingTemp.flags & CORRUPTIBLE) { - expand(incomingTemp, incomingTemp, texel.s); - } else { - incomingTemp.reg = locals.obtain(); - expand(incomingTemp, incoming, texel.s); - } - } - - if (incomingTemp.l) { - ADD(AL, 0, dest.reg, texel.reg, - reg_imm(incomingTemp.reg, LSR, incomingTemp.l)); - } else { - ADD(AL, 0, dest.reg, texel.reg, incomingTemp.reg); - } - dest.l = 0; - dest.h = texel.size(); - component_sat(dest); -} - -// ---------------------------------------------------------------------------- - -}; // namespace android - |
