diff options
Diffstat (limited to 'libpixelflinger')
19 files changed, 3970 insertions, 182 deletions
diff --git a/libpixelflinger/Android.mk b/libpixelflinger/Android.mk index 1947c2d0f..488003f2a 100644 --- a/libpixelflinger/Android.mk +++ b/libpixelflinger/Android.mk @@ -44,6 +44,8 @@ PIXELFLINGER_CFLAGS += -fstrict-aliasing -fomit-frame-pointer endif ifeq ($(TARGET_ARCH),mips) +PIXELFLINGER_SRC_FILES += codeflinger/MIPSAssembler.cpp +PIXELFLINGER_SRC_FILES += codeflinger/mips_disassem.c PIXELFLINGER_SRC_FILES += arch-mips/t32cb16blend.S PIXELFLINGER_CFLAGS += -fstrict-aliasing -fomit-frame-pointer endif diff --git a/libpixelflinger/codeflinger/ARMAssembler.cpp b/libpixelflinger/codeflinger/ARMAssembler.cpp index 0dc50376d..c4f42f5bd 100644 --- a/libpixelflinger/codeflinger/ARMAssembler.cpp +++ b/libpixelflinger/codeflinger/ARMAssembler.cpp @@ -76,6 +76,11 @@ void ARMAssembler::reset() mComments.clear(); } +int ARMAssembler::getCodegenArch() +{ + return CODEGEN_ARCH_ARM; +} + // ---------------------------------------------------------------------------- void ARMAssembler::disassemble(const char* name) @@ -444,5 +449,146 @@ void ARMAssembler::UBFX(int cc, int Rd, int Rn, int lsb, int width) *mPC++ = (cc<<28) | 0x7E00000 | ((width-1)<<16) | (Rd<<12) | (lsb<<7) | 0x50 | Rn; } +#if 0 +#pragma mark - +#pragma mark Addressing modes... +#endif + +int ARMAssembler::buildImmediate( + uint32_t immediate, uint32_t& rot, uint32_t& imm) +{ + rot = 0; + imm = immediate; + if (imm > 0x7F) { // skip the easy cases + while (!(imm&3) || (imm&0xFC000000)) { + uint32_t newval; + newval = imm >> 2; + newval |= (imm&3) << 30; + imm = newval; + rot += 2; + if (rot == 32) { + rot = 0; + break; + } + } + } + rot = (16 - (rot>>1)) & 0xF; + + if (imm>=0x100) + return -EINVAL; + + if (((imm>>(rot<<1)) | (imm<<(32-(rot<<1)))) != immediate) + return -1; + + return 0; +} + +// shifters... + +bool ARMAssembler::isValidImmediate(uint32_t immediate) +{ + uint32_t rot, imm; + return buildImmediate(immediate, rot, imm) == 0; +} + +uint32_t ARMAssembler::imm(uint32_t immediate) +{ + uint32_t rot, imm; + int err = buildImmediate(immediate, rot, imm); + + LOG_ALWAYS_FATAL_IF(err==-EINVAL, + "immediate %08x cannot be encoded", + immediate); + + LOG_ALWAYS_FATAL_IF(err, + "immediate (%08x) encoding bogus!", + immediate); + + return (1<<25) | (rot<<8) | imm; +} + +uint32_t ARMAssembler::reg_imm(int Rm, int type, uint32_t shift) +{ + return ((shift&0x1F)<<7) | ((type&0x3)<<5) | (Rm&0xF); +} + +uint32_t ARMAssembler::reg_rrx(int Rm) +{ + return (ROR<<5) | (Rm&0xF); +} + +uint32_t ARMAssembler::reg_reg(int Rm, int type, int Rs) +{ + return ((Rs&0xF)<<8) | ((type&0x3)<<5) | (1<<4) | (Rm&0xF); +} + +// addressing modes... +// LDR(B)/STR(B)/PLD (immediate and Rm can be negative, which indicate U=0) +uint32_t ARMAssembler::immed12_pre(int32_t immed12, int W) +{ + LOG_ALWAYS_FATAL_IF(abs(immed12) >= 0x800, + "LDR(B)/STR(B)/PLD immediate too big (%08x)", + immed12); + return (1<<24) | (((uint32_t(immed12)>>31)^1)<<23) | + ((W&1)<<21) | (abs(immed12)&0x7FF); +} + +uint32_t ARMAssembler::immed12_post(int32_t immed12) +{ + LOG_ALWAYS_FATAL_IF(abs(immed12) >= 0x800, + "LDR(B)/STR(B)/PLD immediate too big (%08x)", + immed12); + + return (((uint32_t(immed12)>>31)^1)<<23) | (abs(immed12)&0x7FF); +} + +uint32_t ARMAssembler::reg_scale_pre(int Rm, int type, + uint32_t shift, int W) +{ + return (1<<25) | (1<<24) | + (((uint32_t(Rm)>>31)^1)<<23) | ((W&1)<<21) | + reg_imm(abs(Rm), type, shift); +} + +uint32_t ARMAssembler::reg_scale_post(int Rm, int type, uint32_t shift) +{ + return (1<<25) | (((uint32_t(Rm)>>31)^1)<<23) | reg_imm(abs(Rm), type, shift); +} + +// LDRH/LDRSB/LDRSH/STRH (immediate and Rm can be negative, which indicate U=0) +uint32_t ARMAssembler::immed8_pre(int32_t immed8, int W) +{ + uint32_t offset = abs(immed8); + + LOG_ALWAYS_FATAL_IF(abs(immed8) >= 0x100, + "LDRH/LDRSB/LDRSH/STRH immediate too big (%08x)", + immed8); + + return (1<<24) | (1<<22) | (((uint32_t(immed8)>>31)^1)<<23) | + ((W&1)<<21) | (((offset&0xF0)<<4)|(offset&0xF)); +} + +uint32_t ARMAssembler::immed8_post(int32_t immed8) +{ + uint32_t offset = abs(immed8); + + LOG_ALWAYS_FATAL_IF(abs(immed8) >= 0x100, + "LDRH/LDRSB/LDRSH/STRH immediate too big (%08x)", + immed8); + + return (1<<22) | (((uint32_t(immed8)>>31)^1)<<23) | + (((offset&0xF0)<<4) | (offset&0xF)); +} + +uint32_t ARMAssembler::reg_pre(int Rm, int W) +{ + return (1<<24) | (((uint32_t(Rm)>>31)^1)<<23) | ((W&1)<<21) | (abs(Rm)&0xF); +} + +uint32_t ARMAssembler::reg_post(int Rm) +{ + return (((uint32_t(Rm)>>31)^1)<<23) | (abs(Rm)&0xF); +} + }; // namespace android diff --git a/libpixelflinger/codeflinger/ARMAssembler.h b/libpixelflinger/codeflinger/ARMAssembler.h index e7f038ae4..06c66dd8b 100644 --- a/libpixelflinger/codeflinger/ARMAssembler.h +++ b/libpixelflinger/codeflinger/ARMAssembler.h @@ -52,11 +52,42 @@ public: virtual void reset(); virtual int generate(const char* name); + virtual int getCodegenArch(); virtual void prolog(); virtual void epilog(uint32_t touched); virtual void comment(const char* string); + + // ----------------------------------------------------------------------- + // shifters and addressing modes + // ----------------------------------------------------------------------- + + // shifters... + virtual bool isValidImmediate(uint32_t immed); + virtual int buildImmediate(uint32_t i, uint32_t& rot, uint32_t& imm); + + virtual uint32_t imm(uint32_t immediate); + virtual uint32_t reg_imm(int Rm, int type, uint32_t shift); + virtual uint32_t reg_rrx(int Rm); + virtual uint32_t reg_reg(int Rm, int type, int Rs); + + // addressing modes... + // LDR(B)/STR(B)/PLD + // (immediate and Rm can be negative, which indicates U=0) + virtual uint32_t immed12_pre(int32_t immed12, int W=0); + virtual uint32_t immed12_post(int32_t immed12); + virtual uint32_t reg_scale_pre(int Rm, int type=0, uint32_t shift=0, int W=0); + virtual uint32_t reg_scale_post(int Rm, int type=0, uint32_t shift=0); + + // LDRH/LDRSB/LDRSH/STRH + // (immediate and Rm can be negative, which indicates U=0) + virtual uint32_t immed8_pre(int32_t immed8, int W=0); + virtual uint32_t immed8_post(int32_t immed8); + virtual uint32_t reg_pre(int Rm, int W=0); + virtual uint32_t reg_post(int Rm); + + virtual void dataProcessing(int opcode, int cc, int s, int Rd, int Rn, uint32_t Op2); @@ -83,21 +114,23 @@ public: virtual uint32_t* pcForLabel(const char* label); virtual void LDR (int cc, int Rd, - int Rn, uint32_t offset = immed12_pre(0)); + int Rn, uint32_t offset = __immed12_pre(0)); virtual void LDRB(int cc, int Rd, - int Rn, uint32_t offset = immed12_pre(0)); + int Rn, uint32_t offset = __immed12_pre(0)); virtual void STR (int cc, int Rd, - int Rn, uint32_t offset = immed12_pre(0)); + int Rn, uint32_t offset = __immed12_pre(0)); virtual void STRB(int cc, int Rd, - int Rn, uint32_t offset = immed12_pre(0)); + int Rn, uint32_t offset = __immed12_pre(0)); virtual void LDRH (int cc, int Rd, - int Rn, uint32_t offset = immed8_pre(0)); + int Rn, uint32_t offset = __immed8_pre(0)); virtual void LDRSB(int cc, int Rd, - int Rn, uint32_t offset = immed8_pre(0)); + int Rn, uint32_t offset = __immed8_pre(0)); virtual void LDRSH(int cc, int Rd, - int Rn, uint32_t offset = immed8_pre(0)); + int Rn, uint32_t offset = __immed8_pre(0)); virtual void STRH (int cc, int Rd, - int Rn, uint32_t offset = immed8_pre(0)); + int Rn, uint32_t offset = __immed8_pre(0)); + + virtual void LDM(int cc, int dir, int Rn, int W, uint32_t reg_list); virtual void STM(int cc, int dir, diff --git a/libpixelflinger/codeflinger/ARMAssemblerInterface.cpp b/libpixelflinger/codeflinger/ARMAssemblerInterface.cpp index 7fa0de0a8..82180eef0 100644 --- a/libpixelflinger/codeflinger/ARMAssemblerInterface.cpp +++ b/libpixelflinger/codeflinger/ARMAssemblerInterface.cpp @@ -32,77 +32,15 @@ ARMAssemblerInterface::~ARMAssemblerInterface() { } -int ARMAssemblerInterface::buildImmediate( - uint32_t immediate, uint32_t& rot, uint32_t& imm) -{ - rot = 0; - imm = immediate; - if (imm > 0x7F) { // skip the easy cases - while (!(imm&3) || (imm&0xFC000000)) { - uint32_t newval; - newval = imm >> 2; - newval |= (imm&3) << 30; - imm = newval; - rot += 2; - if (rot == 32) { - rot = 0; - break; - } - } - } - rot = (16 - (rot>>1)) & 0xF; - - if (imm>=0x100) - return -EINVAL; - - if (((imm>>(rot<<1)) | (imm<<(32-(rot<<1)))) != immediate) - return -1; - - return 0; -} - -// shifters... - -bool ARMAssemblerInterface::isValidImmediate(uint32_t immediate) -{ - uint32_t rot, imm; - return buildImmediate(immediate, rot, imm) == 0; -} - -uint32_t ARMAssemblerInterface::imm(uint32_t immediate) -{ - uint32_t rot, imm; - int err = buildImmediate(immediate, rot, imm); - - LOG_ALWAYS_FATAL_IF(err==-EINVAL, - "immediate %08x cannot be encoded", - immediate); - - LOG_ALWAYS_FATAL_IF(err, - "immediate (%08x) encoding bogus!", - immediate); +// -------------------------------------------------------------------- - return (1<<25) | (rot<<8) | imm; -} - -uint32_t ARMAssemblerInterface::reg_imm(int Rm, int type, uint32_t shift) -{ - return ((shift&0x1F)<<7) | ((type&0x3)<<5) | (Rm&0xF); -} - -uint32_t ARMAssemblerInterface::reg_rrx(int Rm) -{ - return (ROR<<5) | (Rm&0xF); -} +// The following two functions are static and used for initializers +// in the original ARM code. The above versions (without __), are now +// virtual, and can be overridden in the MIPS code. But since these are +// needed at initialization time, they must be static. Not thrilled with +// this implementation, but it works... -uint32_t ARMAssemblerInterface::reg_reg(int Rm, int type, int Rs) -{ - return ((Rs&0xF)<<8) | ((type&0x3)<<5) | (1<<4) | (Rm&0xF); -} - -// addressing modes... -// LDR(B)/STR(B)/PLD (immediate and Rm can be negative, which indicate U=0) -uint32_t ARMAssemblerInterface::immed12_pre(int32_t immed12, int W) +uint32_t ARMAssemblerInterface::__immed12_pre(int32_t immed12, int W) { LOG_ALWAYS_FATAL_IF(abs(immed12) >= 0x800, "LDR(B)/STR(B)/PLD immediate too big (%08x)", @@ -111,30 +49,7 @@ uint32_t ARMAssemblerInterface::immed12_pre(int32_t immed12, int W) ((W&1)<<21) | (abs(immed12)&0x7FF); } -uint32_t ARMAssemblerInterface::immed12_post(int32_t immed12) -{ - LOG_ALWAYS_FATAL_IF(abs(immed12) >= 0x800, - "LDR(B)/STR(B)/PLD immediate too big (%08x)", - immed12); - - return (((uint32_t(immed12)>>31)^1)<<23) | (abs(immed12)&0x7FF); -} - -uint32_t ARMAssemblerInterface::reg_scale_pre(int Rm, int type, - uint32_t shift, int W) -{ - return (1<<25) | (1<<24) | - (((uint32_t(Rm)>>31)^1)<<23) | ((W&1)<<21) | - reg_imm(abs(Rm), type, shift); -} - -uint32_t ARMAssemblerInterface::reg_scale_post(int Rm, int type, uint32_t shift) -{ - return (1<<25) | (((uint32_t(Rm)>>31)^1)<<23) | reg_imm(abs(Rm), type, shift); -} - -// LDRH/LDRSB/LDRSH/STRH (immediate and Rm can be negative, which indicate U=0) -uint32_t ARMAssemblerInterface::immed8_pre(int32_t immed8, int W) +uint32_t ARMAssemblerInterface::__immed8_pre(int32_t immed8, int W) { uint32_t offset = abs(immed8); @@ -146,28 +61,6 @@ uint32_t ARMAssemblerInterface::immed8_pre(int32_t immed8, int W) ((W&1)<<21) | (((offset&0xF0)<<4)|(offset&0xF)); } -uint32_t ARMAssemblerInterface::immed8_post(int32_t immed8) -{ - uint32_t offset = abs(immed8); - - LOG_ALWAYS_FATAL_IF(abs(immed8) >= 0x100, - "LDRH/LDRSB/LDRSH/STRH immediate too big (%08x)", - immed8); - - return (1<<22) | (((uint32_t(immed8)>>31)^1)<<23) | - (((offset&0xF0)<<4) | (offset&0xF)); -} - -uint32_t ARMAssemblerInterface::reg_pre(int Rm, int W) -{ - return (1<<24) | (((uint32_t(Rm)>>31)^1)<<23) | ((W&1)<<21) | (abs(Rm)&0xF); -} - -uint32_t ARMAssemblerInterface::reg_post(int Rm) -{ - return (((uint32_t(Rm)>>31)^1)<<23) | (abs(Rm)&0xF); -} - }; // namespace android diff --git a/libpixelflinger/codeflinger/ARMAssemblerInterface.h b/libpixelflinger/codeflinger/ARMAssemblerInterface.h index 796342a06..99919804e 100644 --- a/libpixelflinger/codeflinger/ARMAssemblerInterface.h +++ b/libpixelflinger/codeflinger/ARMAssemblerInterface.h @@ -62,33 +62,40 @@ public: LSAVED = LR4|LR5|LR6|LR7|LR8|LR9|LR10|LR11 | LLR }; + enum { + CODEGEN_ARCH_ARM = 1, CODEGEN_ARCH_MIPS + }; + // ----------------------------------------------------------------------- // shifters and addressing modes // ----------------------------------------------------------------------- - // shifters... - static bool isValidImmediate(uint32_t immed); - static int buildImmediate(uint32_t i, uint32_t& rot, uint32_t& imm); + // these static versions are used for initializers on LDxx/STxx below + static uint32_t __immed12_pre(int32_t immed12, int W=0); + static uint32_t __immed8_pre(int32_t immed12, int W=0); + + virtual bool isValidImmediate(uint32_t immed) = 0; + virtual int buildImmediate(uint32_t i, uint32_t& rot, uint32_t& imm) = 0; - static uint32_t imm(uint32_t immediate); - static uint32_t reg_imm(int Rm, int type, uint32_t shift); - static uint32_t reg_rrx(int Rm); - static uint32_t reg_reg(int Rm, int type, int Rs); + virtual uint32_t imm(uint32_t immediate) = 0; + virtual uint32_t reg_imm(int Rm, int type, uint32_t shift) = 0; + virtual uint32_t reg_rrx(int Rm) = 0; + virtual uint32_t reg_reg(int Rm, int type, int Rs) = 0; // addressing modes... // LDR(B)/STR(B)/PLD // (immediate and Rm can be negative, which indicates U=0) - static uint32_t immed12_pre(int32_t immed12, int W=0); - static uint32_t immed12_post(int32_t immed12); - static uint32_t reg_scale_pre(int Rm, int type=0, uint32_t shift=0, int W=0); - static uint32_t reg_scale_post(int Rm, int type=0, uint32_t shift=0); + virtual uint32_t immed12_pre(int32_t immed12, int W=0) = 0; + virtual uint32_t immed12_post(int32_t immed12) = 0; + virtual uint32_t reg_scale_pre(int Rm, int type=0, uint32_t shift=0, int W=0) = 0; + virtual uint32_t reg_scale_post(int Rm, int type=0, uint32_t shift=0) = 0; // LDRH/LDRSB/LDRSH/STRH // (immediate and Rm can be negative, which indicates U=0) - static uint32_t immed8_pre(int32_t immed8, int W=0); - static uint32_t immed8_post(int32_t immed8); - static uint32_t reg_pre(int Rm, int W=0); - static uint32_t reg_post(int Rm); + virtual uint32_t immed8_pre(int32_t immed8, int W=0) = 0; + virtual uint32_t immed8_post(int32_t immed8) = 0; + virtual uint32_t reg_pre(int Rm, int W=0) = 0; + virtual uint32_t reg_post(int Rm) = 0; // ----------------------------------------------------------------------- // basic instructions & code generation @@ -98,6 +105,7 @@ public: virtual void reset() = 0; virtual int generate(const char* name) = 0; virtual void disassemble(const char* name) = 0; + virtual int getCodegenArch() = 0; // construct prolog and epilog virtual void prolog() = 0; @@ -143,22 +151,22 @@ public: // data transfer... virtual void LDR (int cc, int Rd, - int Rn, uint32_t offset = immed12_pre(0)) = 0; + int Rn, uint32_t offset = __immed12_pre(0)) = 0; virtual void LDRB(int cc, int Rd, - int Rn, uint32_t offset = immed12_pre(0)) = 0; + int Rn, uint32_t offset = __immed12_pre(0)) = 0; virtual void STR (int cc, int Rd, - int Rn, uint32_t offset = immed12_pre(0)) = 0; + int Rn, uint32_t offset = __immed12_pre(0)) = 0; virtual void STRB(int cc, int Rd, - int Rn, uint32_t offset = immed12_pre(0)) = 0; + int Rn, uint32_t offset = __immed12_pre(0)) = 0; virtual void LDRH (int cc, int Rd, - int Rn, uint32_t offset = immed8_pre(0)) = 0; + int Rn, uint32_t offset = __immed8_pre(0)) = 0; virtual void LDRSB(int cc, int Rd, - int Rn, uint32_t offset = immed8_pre(0)) = 0; + int Rn, uint32_t offset = __immed8_pre(0)) = 0; virtual void LDRSH(int cc, int Rd, - int Rn, uint32_t offset = immed8_pre(0)) = 0; + int Rn, uint32_t offset = __immed8_pre(0)) = 0; virtual void STRH (int cc, int Rd, - int Rn, uint32_t offset = immed8_pre(0)) = 0; + int Rn, uint32_t offset = __immed8_pre(0)) = 0; // block data transfer... virtual void LDM(int cc, int dir, diff --git a/libpixelflinger/codeflinger/ARMAssemblerProxy.cpp b/libpixelflinger/codeflinger/ARMAssemblerProxy.cpp index c57d7daa9..7feed622b 100644 --- a/libpixelflinger/codeflinger/ARMAssemblerProxy.cpp +++ b/libpixelflinger/codeflinger/ARMAssemblerProxy.cpp @@ -55,6 +55,10 @@ int ARMAssemblerProxy::generate(const char* name) { void ARMAssemblerProxy::disassemble(const char* name) { return mTarget->disassemble(name); } +int ARMAssemblerProxy::getCodegenArch() +{ + return mTarget->getCodegenArch(); +} void ARMAssemblerProxy::prolog() { mTarget->prolog(); } @@ -66,6 +70,93 @@ void ARMAssemblerProxy::comment(const char* string) { } + +// addressing modes + +bool ARMAssemblerProxy::isValidImmediate(uint32_t immed) +{ + return mTarget->isValidImmediate(immed); +} + +int ARMAssemblerProxy::buildImmediate(uint32_t i, uint32_t& rot, uint32_t& imm) +{ + return mTarget->buildImmediate(i, rot, imm); +} + + + +uint32_t ARMAssemblerProxy::imm(uint32_t immediate) +{ + return mTarget->imm(immediate); +} + +uint32_t ARMAssemblerProxy::reg_imm(int Rm, int type, uint32_t shift) +{ + return mTarget->reg_imm(Rm, type, shift); +} + +uint32_t ARMAssemblerProxy::reg_rrx(int Rm) +{ + return mTarget->reg_rrx(Rm); +} + +uint32_t ARMAssemblerProxy::reg_reg(int Rm, int type, int Rs) +{ + return mTarget->reg_reg(Rm, type, Rs); +} + + +// addressing modes... +// LDR(B)/STR(B)/PLD +// (immediate and Rm can be negative, which indicates U=0) +uint32_t ARMAssemblerProxy::immed12_pre(int32_t immed12, int W) +{ + return mTarget->immed12_pre(immed12, W); +} + +uint32_t ARMAssemblerProxy::immed12_post(int32_t immed12) +{ + return mTarget->immed12_post(immed12); +} + +uint32_t ARMAssemblerProxy::reg_scale_pre(int Rm, int type, uint32_t shift, int W) +{ + return mTarget->reg_scale_pre(Rm, type, shift, W); +} + +uint32_t ARMAssemblerProxy::reg_scale_post(int Rm, int type, uint32_t shift) +{ + return mTarget->reg_scale_post(Rm, type, shift); +} + + +// LDRH/LDRSB/LDRSH/STRH +// (immediate and Rm can be negative, which indicates U=0) +uint32_t ARMAssemblerProxy::immed8_pre(int32_t immed8, int W) +{ + return mTarget->immed8_pre(immed8, W); +} + +uint32_t ARMAssemblerProxy::immed8_post(int32_t immed8) +{ + return mTarget->immed8_post(immed8); +} + +uint32_t ARMAssemblerProxy::reg_pre(int Rm, int W) +{ + return mTarget->reg_pre(Rm, W); +} + +uint32_t ARMAssemblerProxy::reg_post(int Rm) +{ + return mTarget->reg_post(Rm); +} + + +//------------------------------------------------------------------------ + + + void ARMAssemblerProxy::dataProcessing( int opcode, int cc, int s, int Rd, int Rn, uint32_t Op2) { diff --git a/libpixelflinger/codeflinger/ARMAssemblerProxy.h b/libpixelflinger/codeflinger/ARMAssemblerProxy.h index 8c7f2707f..5e3f763bc 100644 --- a/libpixelflinger/codeflinger/ARMAssemblerProxy.h +++ b/libpixelflinger/codeflinger/ARMAssemblerProxy.h @@ -42,11 +42,40 @@ public: virtual void reset(); virtual int generate(const char* name); virtual void disassemble(const char* name); + virtual int getCodegenArch(); virtual void prolog(); virtual void epilog(uint32_t touched); virtual void comment(const char* string); + // ----------------------------------------------------------------------- + // shifters and addressing modes + // ----------------------------------------------------------------------- + + virtual bool isValidImmediate(uint32_t immed); + virtual int buildImmediate(uint32_t i, uint32_t& rot, uint32_t& imm); + + virtual uint32_t imm(uint32_t immediate); + virtual uint32_t reg_imm(int Rm, int type, uint32_t shift); + virtual uint32_t reg_rrx(int Rm); + virtual uint32_t reg_reg(int Rm, int type, int Rs); + + // addressing modes... + // LDR(B)/STR(B)/PLD + // (immediate and Rm can be negative, which indicates U=0) + virtual uint32_t immed12_pre(int32_t immed12, int W=0); + virtual uint32_t immed12_post(int32_t immed12); + virtual uint32_t reg_scale_pre(int Rm, int type=0, uint32_t shift=0, int W=0); + virtual uint32_t reg_scale_post(int Rm, int type=0, uint32_t shift=0); + + // LDRH/LDRSB/LDRSH/STRH + // (immediate and Rm can be negative, which indicates U=0) + virtual uint32_t immed8_pre(int32_t immed8, int W=0); + virtual uint32_t immed8_post(int32_t immed8); + virtual uint32_t reg_pre(int Rm, int W=0); + virtual uint32_t reg_post(int Rm); + + virtual void dataProcessing(int opcode, int cc, int s, int Rd, int Rn, uint32_t Op2); @@ -73,21 +102,21 @@ public: uint32_t* pcForLabel(const char* label); virtual void LDR (int cc, int Rd, - int Rn, uint32_t offset = immed12_pre(0)); + int Rn, uint32_t offset = __immed12_pre(0)); virtual void LDRB(int cc, int Rd, - int Rn, uint32_t offset = immed12_pre(0)); + int Rn, uint32_t offset = __immed12_pre(0)); virtual void STR (int cc, int Rd, - int Rn, uint32_t offset = immed12_pre(0)); + int Rn, uint32_t offset = __immed12_pre(0)); virtual void STRB(int cc, int Rd, - int Rn, uint32_t offset = immed12_pre(0)); + int Rn, uint32_t offset = __immed12_pre(0)); virtual void LDRH (int cc, int Rd, - int Rn, uint32_t offset = immed8_pre(0)); + int Rn, uint32_t offset = __immed8_pre(0)); virtual void LDRSB(int cc, int Rd, - int Rn, uint32_t offset = immed8_pre(0)); + int Rn, uint32_t offset = __immed8_pre(0)); virtual void LDRSH(int cc, int Rd, - int Rn, uint32_t offset = immed8_pre(0)); + int Rn, uint32_t offset = __immed8_pre(0)); virtual void STRH (int cc, int Rd, - int Rn, uint32_t offset = immed8_pre(0)); + int Rn, uint32_t offset = __immed8_pre(0)); virtual void LDM(int cc, int dir, int Rn, int W, uint32_t reg_list); virtual void STM(int cc, int dir, diff --git a/libpixelflinger/codeflinger/CodeCache.cpp b/libpixelflinger/codeflinger/CodeCache.cpp index a713febaf..4f2ede3f2 100644 --- a/libpixelflinger/codeflinger/CodeCache.cpp +++ b/libpixelflinger/codeflinger/CodeCache.cpp @@ -36,6 +36,12 @@ namespace android { #include <errno.h> #endif +#if defined(__mips__) +#include <asm/cachectl.h> +#include <errno.h> +#endif + +// ---------------------------------------------------------------------------- // ---------------------------------------------------------------------------- Assembly::Assembly(size_t size) @@ -155,12 +161,12 @@ int CodeCache::cache( const AssemblyKeyBase& keyBase, mCacheInUse += assemblySize; mWhen++; // synchronize caches... -#if defined(__arm__) +#if defined(__arm__) || defined(__mips__) const long base = long(assembly->base()); const long curr = base + long(assembly->size()); err = cacheflush(base, curr, 0); - ALOGE_IF(err, "__ARM_NR_cacheflush error %s\n", - strerror(errno)); + ALOGE_IF(err, "cacheflush error %s\n", + strerror(errno)); #endif } diff --git a/libpixelflinger/codeflinger/GGLAssembler.cpp b/libpixelflinger/codeflinger/GGLAssembler.cpp index f1d81b27c..1ddf93d57 100644 --- a/libpixelflinger/codeflinger/GGLAssembler.cpp +++ b/libpixelflinger/codeflinger/GGLAssembler.cpp @@ -31,7 +31,8 @@ namespace android { // ---------------------------------------------------------------------------- GGLAssembler::GGLAssembler(ARMAssemblerInterface* target) - : ARMAssemblerProxy(target), RegisterAllocator(), mOptLevel(7) + : ARMAssemblerProxy(target), + RegisterAllocator(ARMAssemblerProxy::getCodegenArch()), mOptLevel(7) { } @@ -230,7 +231,9 @@ int GGLAssembler::scanline_core(const needs_t& needs, context_t const* c) // texel generation build_textures(parts, regs); - } + if (registerFile().status()) + return registerFile().status(); + } if ((blending & (FACTOR_DST|BLEND_DST)) || (mMasking && !mAllMasked) || @@ -890,6 +893,15 @@ void GGLAssembler::build_and_immediate(int d, int s, uint32_t mask, int bits) return; } + if (getCodegenArch() == CODEGEN_ARCH_MIPS) { + // MIPS can do 16-bit imm in 1 instr, 32-bit in 3 instr + // the below ' while (mask)' code is buggy on mips + // since mips returns true on isValidImmediate() + // then we get multiple AND instr (positive logic) + AND( AL, 0, d, s, imm(mask) ); + return; + } + int negative_logic = !isValidImmediate(mask); if (negative_logic) { mask = ~mask & size; @@ -1002,6 +1014,15 @@ void GGLAssembler::base_offset( // cheezy register allocator... // ---------------------------------------------------------------------------- +// Modified to support MIPS processors, in a very simple way. We retain the +// (Arm) limit of 16 total registers, but shift the mapping of those registers +// from 0-15, to 2-17. Register 0 on Mips cannot be used as GP registers, and +// register 1 has a traditional use as a temp). + +RegisterAllocator::RegisterAllocator(int arch) : mRegs(arch) +{ +} + void RegisterAllocator::reset() { mRegs.reset(); @@ -1029,16 +1050,22 @@ RegisterAllocator::RegisterFile& RegisterAllocator::registerFile() // ---------------------------------------------------------------------------- -RegisterAllocator::RegisterFile::RegisterFile() - : mRegs(0), mTouched(0), mStatus(0) +RegisterAllocator::RegisterFile::RegisterFile(int codegen_arch) + : mRegs(0), mTouched(0), mStatus(0), mArch(codegen_arch), mRegisterOffset(0) { + if (mArch == ARMAssemblerInterface::CODEGEN_ARCH_MIPS) { + mRegisterOffset = 2; // ARM has regs 0..15, MIPS offset to 2..17 + } reserve(ARMAssemblerInterface::SP); reserve(ARMAssemblerInterface::PC); } -RegisterAllocator::RegisterFile::RegisterFile(const RegisterFile& rhs) - : mRegs(rhs.mRegs), mTouched(rhs.mTouched) +RegisterAllocator::RegisterFile::RegisterFile(const RegisterFile& rhs, int codegen_arch) + : mRegs(rhs.mRegs), mTouched(rhs.mTouched), mArch(codegen_arch), mRegisterOffset(0) { + if (mArch == ARMAssemblerInterface::CODEGEN_ARCH_MIPS) { + mRegisterOffset = 2; // ARM has regs 0..15, MIPS offset to 2..17 + } } RegisterAllocator::RegisterFile::~RegisterFile() @@ -1057,8 +1084,12 @@ void RegisterAllocator::RegisterFile::reset() reserve(ARMAssemblerInterface::PC); } +// RegisterFile::reserve() take a register parameter in the +// range 0-15 (Arm compatible), but on a Mips processor, will +// return the actual allocated register in the range 2-17. int RegisterAllocator::RegisterFile::reserve(int reg) { + reg += mRegisterOffset; LOG_ALWAYS_FATAL_IF(isUsed(reg), "reserving register %d, but already in use", reg); @@ -1067,6 +1098,7 @@ int RegisterAllocator::RegisterFile::reserve(int reg) return reg; } +// This interface uses regMask in range 2-17 on MIPS, no translation. void RegisterAllocator::RegisterFile::reserveSeveral(uint32_t regMask) { mRegs |= regMask; @@ -1075,7 +1107,7 @@ void RegisterAllocator::RegisterFile::reserveSeveral(uint32_t regMask) int RegisterAllocator::RegisterFile::isUsed(int reg) const { - LOG_ALWAYS_FATAL_IF(reg>=16, "invalid register %d", reg); + LOG_ALWAYS_FATAL_IF(reg>=16+(int)mRegisterOffset, "invalid register %d", reg); return mRegs & (1<<reg); } @@ -1086,10 +1118,10 @@ int RegisterAllocator::RegisterFile::obtain() 6, 7, 8, 9, 10, 11 }; const int nbreg = sizeof(priorityList); - int i, r; + int i, r, reg; for (i=0 ; i<nbreg ; i++) { r = priorityList[i]; - if (!isUsed(r)) { + if (!isUsed(r + mRegisterOffset)) { break; } } @@ -1102,18 +1134,20 @@ int RegisterAllocator::RegisterFile::obtain() // the code will never be run anyway. return ARMAssemblerInterface::SP; } - reserve(r); - return r; + reg = reserve(r); // Param in Arm range 0-15, returns range 2-17 on Mips. + return reg; } bool RegisterAllocator::RegisterFile::hasFreeRegs() const { - return ((mRegs & 0xFFFF) == 0xFFFF) ? false : true; + uint32_t regs = mRegs >> mRegisterOffset; // MIPS fix. + return ((regs & 0xFFFF) == 0xFFFF) ? false : true; } int RegisterAllocator::RegisterFile::countFreeRegs() const { - int f = ~mRegs & 0xFFFF; + uint32_t regs = mRegs >> mRegisterOffset; // MIPS fix. + int f = ~regs & 0xFFFF; // now count number of 1 f = (f & 0x5555) + ((f>>1) & 0x5555); f = (f & 0x3333) + ((f>>2) & 0x3333); @@ -1124,18 +1158,24 @@ int RegisterAllocator::RegisterFile::countFreeRegs() const void RegisterAllocator::RegisterFile::recycle(int reg) { - LOG_FATAL_IF(!isUsed(reg), - "recycling unallocated register %d", - reg); + // commented out, since common failure of running out of regs + // triggers this assertion. Since the code is not execectued + // in that case, it does not matter. No reason to FATAL err. + // LOG_FATAL_IF(!isUsed(reg), + // "recycling unallocated register %d", + // reg); mRegs &= ~(1<<reg); } void RegisterAllocator::RegisterFile::recycleSeveral(uint32_t regMask) { - LOG_FATAL_IF((mRegs & regMask)!=regMask, - "recycling unallocated registers " - "(recycle=%08x, allocated=%08x, unallocated=%08x)", - regMask, mRegs, mRegs®Mask); + // commented out, since common failure of running out of regs + // triggers this assertion. Since the code is not execectued + // in that case, it does not matter. No reason to FATAL err. + // LOG_FATAL_IF((mRegs & regMask)!=regMask, + // "recycling unallocated registers " + // "(recycle=%08x, allocated=%08x, unallocated=%08x)", + // regMask, mRegs, mRegs®Mask); mRegs &= ~regMask; } diff --git a/libpixelflinger/codeflinger/GGLAssembler.h b/libpixelflinger/codeflinger/GGLAssembler.h index d1d29f0b0..dd5f48ee9 100644 --- a/libpixelflinger/codeflinger/GGLAssembler.h +++ b/libpixelflinger/codeflinger/GGLAssembler.h @@ -43,6 +43,7 @@ class RegisterAllocator public: class RegisterFile; + RegisterAllocator(int arch); RegisterFile& registerFile(); int reserveReg(int reg); int obtainReg(); @@ -52,8 +53,8 @@ public: class RegisterFile { public: - RegisterFile(); - RegisterFile(const RegisterFile& rhs); + RegisterFile(int arch); + RegisterFile(const RegisterFile& rhs, int arch); ~RegisterFile(); void reset(); @@ -86,6 +87,9 @@ public: uint32_t mRegs; uint32_t mTouched; uint32_t mStatus; + int mArch; + uint32_t mRegisterOffset; // lets reg alloc use 2..17 for mips + // while arm uses 0..15 }; class Scratch diff --git a/libpixelflinger/codeflinger/MIPSAssembler.cpp b/libpixelflinger/codeflinger/MIPSAssembler.cpp new file mode 100644 index 000000000..7888a0e4e --- /dev/null +++ b/libpixelflinger/codeflinger/MIPSAssembler.cpp @@ -0,0 +1,1957 @@ +/* libs/pixelflinger/codeflinger/MIPSAssembler.cpp +** +** Copyright 2012, The Android Open Source Project +** +** Licensed under the Apache License, Version 2.0 (the "License"); +** you may not use this file except in compliance with the License. +** You may obtain a copy of the License at +** +** http://www.apache.org/licenses/LICENSE-2.0 +** +** Unless required by applicable law or agreed to in writing, software +** distributed under the License is distributed on an "AS IS" BASIS, +** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +** See the License for the specific language governing permissions and +** limitations under the License. +*/ + + +/* MIPS assembler and ARM->MIPS assembly translator +** +** The approach is to leave the GGLAssembler and associated files largely +** un-changed, still utilizing all Arm instruction generation. Via the +** ArmToMipsAssembler (subclassed from ArmAssemblerInterface) each Arm +** instruction is translated to one or more Mips instructions as necessary. This +** is clearly less efficient than a direct implementation within the +** GGLAssembler, but is far cleaner, more maintainable, and has yielded very +** significant performance gains on Mips compared to the generic pixel pipeline. +** +** +** GGLAssembler changes +** +** - The register allocator has been modified to re-map Arm registers 0-15 to mips +** registers 2-17. Mips register 0 cannot be used as general-purpose register, +** and register 1 has traditional uses as a short-term temporary. +** +** - Added some early bailouts for OUT_OF_REGISTERS in texturing.cpp and +** GGLAssembler.cpp, since this is not fatal, and can be retried at lower +** optimization level. +** +** +** ARMAssembler and ARMAssemblerInterface changes +** +** Refactored ARM address-mode static functions (imm(), reg_imm(), imm12_pre(), etc.) +** to virtual, so they can be overridden in MIPSAssembler. The implementation of these +** functions on ARM is moved from ARMAssemblerInterface.cpp to ARMAssembler.cpp, and +** is unchanged from the original. (This required duplicating 2 of these as static +** functions in ARMAssemblerInterface.cpp so they could be used as static initializers). +*/ + + +#define LOG_TAG "MIPSAssembler" + +#include <stdio.h> +#include <stdlib.h> +#include <cutils/log.h> +#include <cutils/properties.h> + +#if defined(WITH_LIB_HARDWARE) +#include <hardware_legacy/qemu_tracing.h> +#endif + +#include <private/pixelflinger/ggl_context.h> + +#include "codeflinger/MIPSAssembler.h" +#include "codeflinger/CodeCache.h" +#include "codeflinger/mips_disassem.h" + +// Choose MIPS arch variant following gcc flags +#if defined(__mips__) && __mips==32 && __mips_isa_rev>=2 +#define mips32r2 1 +#else +#define mips32r2 0 +#endif + + +#define NOT_IMPLEMENTED() LOG_ALWAYS_FATAL("Arm instruction %s not yet implemented\n", __func__) + + + +// ---------------------------------------------------------------------------- + +namespace android { + +// ---------------------------------------------------------------------------- +#if 0 +#pragma mark - +#pragma mark ArmToMipsAssembler... +#endif + +ArmToMipsAssembler::ArmToMipsAssembler(const sp<Assembly>& assembly, + char *abuf, int linesz, int instr_count) + : ARMAssemblerInterface(), + mArmDisassemblyBuffer(abuf), + mArmLineLength(linesz), + mArmInstrCount(instr_count), + mInum(0), + mAssembly(assembly) +{ + mMips = new MIPSAssembler(assembly, this); + mArmPC = (uint32_t **) malloc(ARM_MAX_INSTUCTIONS * sizeof(uint32_t *)); + init_conditional_labels(); +} + +ArmToMipsAssembler::~ArmToMipsAssembler() +{ + delete mMips; + free((void *) mArmPC); +} + +uint32_t* ArmToMipsAssembler::pc() const +{ + return mMips->pc(); +} + +uint32_t* ArmToMipsAssembler::base() const +{ + return mMips->base(); +} + +void ArmToMipsAssembler::reset() +{ + cond.labelnum = 0; + mInum = 0; + mMips->reset(); +} + +int ArmToMipsAssembler::getCodegenArch() +{ + return CODEGEN_ARCH_MIPS; +} + +void ArmToMipsAssembler::comment(const char* string) +{ + mMips->comment(string); +} + +void ArmToMipsAssembler::label(const char* theLabel) +{ + mMips->label(theLabel); +} + +void ArmToMipsAssembler::disassemble(const char* name) +{ + mMips->disassemble(name); +} + +void ArmToMipsAssembler::init_conditional_labels() +{ + int i; + for (i=0;i<99; ++i) { + sprintf(cond.label[i], "cond_%d", i); + } +} + + + +#if 0 +#pragma mark - +#pragma mark Prolog/Epilog & Generate... +#endif + +void ArmToMipsAssembler::prolog() +{ + mArmPC[mInum++] = pc(); // save starting PC for this instr + + mMips->ADDIU(R_sp, R_sp, -(5 * 4)); + mMips->SW(R_s0, R_sp, 0); + mMips->SW(R_s1, R_sp, 4); + mMips->SW(R_s2, R_sp, 8); + mMips->SW(R_s3, R_sp, 12); + mMips->SW(R_s4, R_sp, 16); + mMips->MOVE(R_v0, R_a0); // move context * passed in a0 to v0 (arm r0) +} + +void ArmToMipsAssembler::epilog(uint32_t touched) +{ + mArmPC[mInum++] = pc(); // save starting PC for this instr + + mMips->LW(R_s0, R_sp, 0); + mMips->LW(R_s1, R_sp, 4); + mMips->LW(R_s2, R_sp, 8); + mMips->LW(R_s3, R_sp, 12); + mMips->LW(R_s4, R_sp, 16); + mMips->ADDIU(R_sp, R_sp, (5 * 4)); + mMips->JR(R_ra); + +} + +int ArmToMipsAssembler::generate(const char* name) +{ + return mMips->generate(name); +} + +uint32_t* ArmToMipsAssembler::pcForLabel(const char* label) +{ + return mMips->pcForLabel(label); +} + + + +//---------------------------------------------------------- + +#if 0 +#pragma mark - +#pragma mark Addressing modes & shifters... +#endif + + +// do not need this for MIPS, but it is in the Interface (virtual) +int ArmToMipsAssembler::buildImmediate( + uint32_t immediate, uint32_t& rot, uint32_t& imm) +{ + // for MIPS, any 32-bit immediate is OK + rot = 0; + imm = immediate; + return 0; +} + +// shifters... + +bool ArmToMipsAssembler::isValidImmediate(uint32_t immediate) +{ + // for MIPS, any 32-bit immediate is OK + return true; +} + +uint32_t ArmToMipsAssembler::imm(uint32_t immediate) +{ + // ALOGW("immediate value %08x at pc %08x\n", immediate, (int)pc()); + amode.value = immediate; + return AMODE_IMM; +} + +uint32_t ArmToMipsAssembler::reg_imm(int Rm, int type, uint32_t shift) +{ + amode.reg = Rm; + amode.stype = type; + amode.value = shift; + return AMODE_REG_IMM; +} + +uint32_t ArmToMipsAssembler::reg_rrx(int Rm) +{ + // reg_rrx mode is not used in the GLLAssember code at this time + return AMODE_UNSUPPORTED; +} + +uint32_t ArmToMipsAssembler::reg_reg(int Rm, int type, int Rs) +{ + // reg_reg mode is not used in the GLLAssember code at this time + return AMODE_UNSUPPORTED; +} + + +// addressing modes... +// LDR(B)/STR(B)/PLD (immediate and Rm can be negative, which indicate U=0) +uint32_t ArmToMipsAssembler::immed12_pre(int32_t immed12, int W) +{ + LOG_ALWAYS_FATAL_IF(abs(immed12) >= 0x800, + "LDR(B)/STR(B)/PLD immediate too big (%08x)", + immed12); + amode.value = immed12; + amode.writeback = W; + return AMODE_IMM_12_PRE; +} + +uint32_t ArmToMipsAssembler::immed12_post(int32_t immed12) +{ + LOG_ALWAYS_FATAL_IF(abs(immed12) >= 0x800, + "LDR(B)/STR(B)/PLD immediate too big (%08x)", + immed12); + + amode.value = immed12; + return AMODE_IMM_12_POST; +} + +uint32_t ArmToMipsAssembler::reg_scale_pre(int Rm, int type, + uint32_t shift, int W) +{ + LOG_ALWAYS_FATAL_IF(W | type | shift, "reg_scale_pre adv modes not yet implemented"); + + amode.reg = Rm; + // amode.stype = type; // more advanced modes not used in GGLAssembler yet + // amode.value = shift; + // amode.writeback = W; + return AMODE_REG_SCALE_PRE; +} + +uint32_t ArmToMipsAssembler::reg_scale_post(int Rm, int type, uint32_t shift) +{ + LOG_ALWAYS_FATAL("adr mode reg_scale_post not yet implemented\n"); + return AMODE_UNSUPPORTED; +} + +// LDRH/LDRSB/LDRSH/STRH (immediate and Rm can be negative, which indicate U=0) +uint32_t ArmToMipsAssembler::immed8_pre(int32_t immed8, int W) +{ + // uint32_t offset = abs(immed8); + + LOG_ALWAYS_FATAL("adr mode immed8_pre not yet implemented\n"); + + LOG_ALWAYS_FATAL_IF(abs(immed8) >= 0x100, + "LDRH/LDRSB/LDRSH/STRH immediate too big (%08x)", + immed8); + return AMODE_IMM_8_PRE; +} + +uint32_t ArmToMipsAssembler::immed8_post(int32_t immed8) +{ + // uint32_t offset = abs(immed8); + + LOG_ALWAYS_FATAL_IF(abs(immed8) >= 0x100, + "LDRH/LDRSB/LDRSH/STRH immediate too big (%08x)", + immed8); + amode.value = immed8; + return AMODE_IMM_8_POST; +} + +uint32_t ArmToMipsAssembler::reg_pre(int Rm, int W) +{ + LOG_ALWAYS_FATAL_IF(W, "reg_pre writeback not yet implemented"); + amode.reg = Rm; + return AMODE_REG_PRE; +} + +uint32_t ArmToMipsAssembler::reg_post(int Rm) +{ + LOG_ALWAYS_FATAL("adr mode reg_post not yet implemented\n"); + return AMODE_UNSUPPORTED; +} + + + +// ---------------------------------------------------------------------------- + +#if 0 +#pragma mark - +#pragma mark Data Processing... +#endif + + +static const char * const dpOpNames[] = { + "AND", "EOR", "SUB", "RSB", "ADD", "ADC", "SBC", "RSC", + "TST", "TEQ", "CMP", "CMN", "ORR", "MOV", "BIC", "MVN" +}; + +// check if the operand registers from a previous CMP or S-bit instruction +// would be overwritten by this instruction. If so, move the value to a +// safe register. +// Note that we cannot tell at _this_ instruction time if a future (conditional) +// instruction will _also_ use this value (a defect of the simple 1-pass, one- +// instruction-at-a-time translation). Therefore we must be conservative and +// save the value before it is overwritten. This costs an extra MOVE instr. + +void ArmToMipsAssembler::protectConditionalOperands(int Rd) +{ + if (Rd == cond.r1) { + mMips->MOVE(R_cmp, cond.r1); + cond.r1 = R_cmp; + } + if (cond.type == CMP_COND && Rd == cond.r2) { + mMips->MOVE(R_cmp2, cond.r2); + cond.r2 = R_cmp2; + } +} + + +// interprets the addressing mode, and generates the common code +// used by the majority of data-processing ops. Many MIPS instructions +// have a register-based form and a different immediate form. See +// opAND below for an example. (this could be inlined) +// +// this works with the imm(), reg_imm() methods above, which are directly +// called by the GLLAssembler. +// note: _signed parameter defaults to false (un-signed) +// note: tmpReg parameter defaults to 1, MIPS register AT +int ArmToMipsAssembler::dataProcAdrModes(int op, int& source, bool _signed, int tmpReg) +{ + if (op < AMODE_REG) { + source = op; + return SRC_REG; + } else if (op == AMODE_IMM) { + if ((!_signed && amode.value > 0xffff) + || (_signed && ((int)amode.value < -32768 || (int)amode.value > 32767) )) { + mMips->LUI(tmpReg, (amode.value >> 16)); + if (amode.value & 0x0000ffff) { + mMips->ORI(tmpReg, tmpReg, (amode.value & 0x0000ffff)); + } + source = tmpReg; + return SRC_REG; + } else { + source = amode.value; + return SRC_IMM; + } + } else if (op == AMODE_REG_IMM) { + switch (amode.stype) { + case LSL: mMips->SLL(tmpReg, amode.reg, amode.value); break; + case LSR: mMips->SRL(tmpReg, amode.reg, amode.value); break; + case ASR: mMips->SRA(tmpReg, amode.reg, amode.value); break; + case ROR: if (mips32r2) { + mMips->ROTR(tmpReg, amode.reg, amode.value); + } else { + mMips->RORIsyn(tmpReg, amode.reg, amode.value); + } + break; + } + source = tmpReg; + return SRC_REG; + } else { // adr mode RRX is not used in GGL Assembler at this time + // we are screwed, this should be exception, assert-fail or something + LOG_ALWAYS_FATAL("adr mode reg_rrx not yet implemented\n"); + return SRC_ERROR; + } +} + + +void ArmToMipsAssembler::dataProcessing(int opcode, int cc, + int s, int Rd, int Rn, uint32_t Op2) +{ + int src; // src is modified by dataProcAdrModes() - passed as int& + + + if (cc != AL) { + protectConditionalOperands(Rd); + // the branch tests register(s) set by prev CMP or instr with 'S' bit set + // inverse the condition to jump past this conditional instruction + ArmToMipsAssembler::B(cc^1, cond.label[++cond.labelnum]); + } else { + mArmPC[mInum++] = pc(); // save starting PC for this instr + } + + switch (opcode) { + case opAND: + if (dataProcAdrModes(Op2, src) == SRC_REG) { + mMips->AND(Rd, Rn, src); + } else { // adr mode was SRC_IMM + mMips->ANDI(Rd, Rn, src); + } + break; + + case opADD: + // set "signed" to true for adr modes + if (dataProcAdrModes(Op2, src, true) == SRC_REG) { + mMips->ADDU(Rd, Rn, src); + } else { // adr mode was SRC_IMM + mMips->ADDIU(Rd, Rn, src); + } + break; + + case opSUB: + // set "signed" to true for adr modes + if (dataProcAdrModes(Op2, src, true) == SRC_REG) { + mMips->SUBU(Rd, Rn, src); + } else { // adr mode was SRC_IMM + mMips->SUBIU(Rd, Rn, src); + } + break; + + case opEOR: + if (dataProcAdrModes(Op2, src) == SRC_REG) { + mMips->XOR(Rd, Rn, src); + } else { // adr mode was SRC_IMM + mMips->XORI(Rd, Rn, src); + } + break; + + case opORR: + if (dataProcAdrModes(Op2, src) == SRC_REG) { + mMips->OR(Rd, Rn, src); + } else { // adr mode was SRC_IMM + mMips->ORI(Rd, Rn, src); + } + break; + + case opBIC: + if (dataProcAdrModes(Op2, src) == SRC_IMM) { + // if we are 16-bit imnmediate, load to AT reg + mMips->ORI(R_at, 0, src); + src = R_at; + } + mMips->NOT(R_at, src); + mMips->AND(Rd, Rn, R_at); + break; + + case opRSB: + if (dataProcAdrModes(Op2, src) == SRC_IMM) { + // if we are 16-bit imnmediate, load to AT reg + mMips->ORI(R_at, 0, src); + src = R_at; + } + mMips->SUBU(Rd, src, Rn); // subu with the parameters reversed + break; + + case opMOV: + if (Op2 < AMODE_REG) { // op2 is reg # in this case + mMips->MOVE(Rd, Op2); + } else if (Op2 == AMODE_IMM) { + if (amode.value > 0xffff) { + mMips->LUI(Rd, (amode.value >> 16)); + if (amode.value & 0x0000ffff) { + mMips->ORI(Rd, Rd, (amode.value & 0x0000ffff)); + } + } else { + mMips->ORI(Rd, 0, amode.value); + } + } else if (Op2 == AMODE_REG_IMM) { + switch (amode.stype) { + case LSL: mMips->SLL(Rd, amode.reg, amode.value); break; + case LSR: mMips->SRL(Rd, amode.reg, amode.value); break; + case ASR: mMips->SRA(Rd, amode.reg, amode.value); break; + case ROR: if (mips32r2) { + mMips->ROTR(Rd, amode.reg, amode.value); + } else { + mMips->RORIsyn(Rd, amode.reg, amode.value); + } + break; + } + } + else { + // adr mode RRX is not used in GGL Assembler at this time + mMips->UNIMPL(); + } + break; + + case opMVN: // this is a 1's complement: NOT + if (Op2 < AMODE_REG) { // op2 is reg # in this case + mMips->NOR(Rd, Op2, 0); // NOT is NOR with 0 + break; + } else if (Op2 == AMODE_IMM) { + if (amode.value > 0xffff) { + mMips->LUI(Rd, (amode.value >> 16)); + if (amode.value & 0x0000ffff) { + mMips->ORI(Rd, Rd, (amode.value & 0x0000ffff)); + } + } else { + mMips->ORI(Rd, 0, amode.value); + } + } else if (Op2 == AMODE_REG_IMM) { + switch (amode.stype) { + case LSL: mMips->SLL(Rd, amode.reg, amode.value); break; + case LSR: mMips->SRL(Rd, amode.reg, amode.value); break; + case ASR: mMips->SRA(Rd, amode.reg, amode.value); break; + case ROR: if (mips32r2) { + mMips->ROTR(Rd, amode.reg, amode.value); + } else { + mMips->RORIsyn(Rd, amode.reg, amode.value); + } + break; + } + } + else { + // adr mode RRX is not used in GGL Assembler at this time + mMips->UNIMPL(); + } + mMips->NOR(Rd, Rd, 0); // NOT is NOR with 0 + break; + + case opCMP: + // Either operand of a CMP instr could get overwritten by a subsequent + // conditional instruction, which is ok, _UNLESS_ there is a _second_ + // conditional instruction. Under MIPS, this requires doing the comparison + // again (SLT), and the original operands must be available. (and this + // pattern of multiple conditional instructions from same CMP _is_ used + // in GGL-Assembler) + // + // For now, if a conditional instr overwrites the operands, we will + // move them to dedicated temp regs. This is ugly, and inefficient, + // and should be optimized. + // + // WARNING: making an _Assumption_ that CMP operand regs will NOT be + // trashed by intervening NON-conditional instructions. In the general + // case this is legal, but it is NOT currently done in GGL-Assembler. + + cond.type = CMP_COND; + cond.r1 = Rn; + if (dataProcAdrModes(Op2, src, false, R_cmp2) == SRC_REG) { + cond.r2 = src; + } else { // adr mode was SRC_IMM + mMips->ORI(R_cmp2, R_zero, src); + cond.r2 = R_cmp2; + } + + break; + + + case opTST: + case opTEQ: + case opCMN: + case opADC: + case opSBC: + case opRSC: + mMips->UNIMPL(); // currently unused in GGL Assembler code + break; + } + + if (cc != AL) { + mMips->label(cond.label[cond.labelnum]); + } + if (s && opcode != opCMP) { + cond.type = SBIT_COND; + cond.r1 = Rd; + } +} + + + +#if 0 +#pragma mark - +#pragma mark Multiply... +#endif + +// multiply, accumulate +void ArmToMipsAssembler::MLA(int cc, int s, + int Rd, int Rm, int Rs, int Rn) { + + mArmPC[mInum++] = pc(); // save starting PC for this instr + + mMips->MUL(R_at, Rm, Rs); + mMips->ADDU(Rd, R_at, Rn); + if (s) { + cond.type = SBIT_COND; + cond.r1 = Rd; + } +} + +void ArmToMipsAssembler::MUL(int cc, int s, + int Rd, int Rm, int Rs) { + mArmPC[mInum++] = pc(); + mMips->MUL(Rd, Rm, Rs); + if (s) { + cond.type = SBIT_COND; + cond.r1 = Rd; + } +} + +void ArmToMipsAssembler::UMULL(int cc, int s, + int RdLo, int RdHi, int Rm, int Rs) { + mArmPC[mInum++] = pc(); + mMips->MULT(Rm, Rs); + mMips->MFHI(RdHi); + mMips->MFLO(RdLo); + if (s) { + cond.type = SBIT_COND; + cond.r1 = RdHi; // BUG... + LOG_ALWAYS_FATAL("Condition on UMULL must be on 64-bit result\n"); + } +} + +void ArmToMipsAssembler::UMUAL(int cc, int s, + int RdLo, int RdHi, int Rm, int Rs) { + LOG_FATAL_IF(RdLo==Rm || RdHi==Rm || RdLo==RdHi, + "UMUAL(r%u,r%u,r%u,r%u)", RdLo,RdHi,Rm,Rs); + // *mPC++ = (cc<<28) | (1<<23) | (1<<21) | (s<<20) | + // (RdHi<<16) | (RdLo<<12) | (Rs<<8) | 0x90 | Rm; + mArmPC[mInum++] = pc(); + mMips->NOP2(); + NOT_IMPLEMENTED(); + if (s) { + cond.type = SBIT_COND; + cond.r1 = RdHi; // BUG... + LOG_ALWAYS_FATAL("Condition on UMULL must be on 64-bit result\n"); + } +} + +void ArmToMipsAssembler::SMULL(int cc, int s, + int RdLo, int RdHi, int Rm, int Rs) { + LOG_FATAL_IF(RdLo==Rm || RdHi==Rm || RdLo==RdHi, + "SMULL(r%u,r%u,r%u,r%u)", RdLo,RdHi,Rm,Rs); + // *mPC++ = (cc<<28) | (1<<23) | (1<<22) | (s<<20) | + // (RdHi<<16) | (RdLo<<12) | (Rs<<8) | 0x90 | Rm; + mArmPC[mInum++] = pc(); + mMips->NOP2(); + NOT_IMPLEMENTED(); + if (s) { + cond.type = SBIT_COND; + cond.r1 = RdHi; // BUG... + LOG_ALWAYS_FATAL("Condition on SMULL must be on 64-bit result\n"); + } +} +void ArmToMipsAssembler::SMUAL(int cc, int s, + int RdLo, int RdHi, int Rm, int Rs) { + LOG_FATAL_IF(RdLo==Rm || RdHi==Rm || RdLo==RdHi, + "SMUAL(r%u,r%u,r%u,r%u)", RdLo,RdHi,Rm,Rs); + // *mPC++ = (cc<<28) | (1<<23) | (1<<22) | (1<<21) | (s<<20) | + // (RdHi<<16) | (RdLo<<12) | (Rs<<8) | 0x90 | Rm; + mArmPC[mInum++] = pc(); + mMips->NOP2(); + NOT_IMPLEMENTED(); + if (s) { + cond.type = SBIT_COND; + cond.r1 = RdHi; // BUG... + LOG_ALWAYS_FATAL("Condition on SMUAL must be on 64-bit result\n"); + } +} + + + +#if 0 +#pragma mark - +#pragma mark Branches... +#endif + +// branches... + +void ArmToMipsAssembler::B(int cc, const char* label) +{ + mArmPC[mInum++] = pc(); + if (cond.type == SBIT_COND) { cond.r2 = R_zero; } + + switch(cc) { + case EQ: mMips->BEQ(cond.r1, cond.r2, label); break; + case NE: mMips->BNE(cond.r1, cond.r2, label); break; + case HS: mMips->BGEU(cond.r1, cond.r2, label); break; + case LO: mMips->BLTU(cond.r1, cond.r2, label); break; + case MI: mMips->BLT(cond.r1, cond.r2, label); break; + case PL: mMips->BGE(cond.r1, cond.r2, label); break; + + case HI: mMips->BGTU(cond.r1, cond.r2, label); break; + case LS: mMips->BLEU(cond.r1, cond.r2, label); break; + case GE: mMips->BGE(cond.r1, cond.r2, label); break; + case LT: mMips->BLT(cond.r1, cond.r2, label); break; + case GT: mMips->BGT(cond.r1, cond.r2, label); break; + case LE: mMips->BLE(cond.r1, cond.r2, label); break; + case AL: mMips->B(label); break; + case NV: /* B Never - no instruction */ break; + + case VS: + case VC: + default: + LOG_ALWAYS_FATAL("Unsupported cc: %02x\n", cc); + break; + } +} + +void ArmToMipsAssembler::BL(int cc, const char* label) +{ + LOG_ALWAYS_FATAL("branch-and-link not supported yet\n"); + mArmPC[mInum++] = pc(); +} + +// no use for Branches with integer PC, but they're in the Interface class .... +void ArmToMipsAssembler::B(int cc, uint32_t* to_pc) +{ + LOG_ALWAYS_FATAL("branch to absolute PC not supported, use Label\n"); + mArmPC[mInum++] = pc(); +} + +void ArmToMipsAssembler::BL(int cc, uint32_t* to_pc) +{ + LOG_ALWAYS_FATAL("branch to absolute PC not supported, use Label\n"); + mArmPC[mInum++] = pc(); +} + +void ArmToMipsAssembler::BX(int cc, int Rn) +{ + LOG_ALWAYS_FATAL("branch to absolute PC not supported, use Label\n"); + mArmPC[mInum++] = pc(); +} + + + +#if 0 +#pragma mark - +#pragma mark Data Transfer... +#endif + +// data transfer... +void ArmToMipsAssembler::LDR(int cc, int Rd, int Rn, uint32_t offset) +{ + mArmPC[mInum++] = pc(); + // work-around for ARM default address mode of immed12_pre(0) + if (offset > AMODE_UNSUPPORTED) offset = 0; + switch (offset) { + case 0: + amode.value = 0; + amode.writeback = 0; + // fall thru to next case .... + case AMODE_IMM_12_PRE: + if (Rn == ARMAssemblerInterface::SP) { + Rn = R_sp; // convert LDR via Arm SP to LW via Mips SP + } + mMips->LW(Rd, Rn, amode.value); + if (amode.writeback) { // OPTIONAL writeback on pre-index mode + mMips->ADDIU(Rn, Rn, amode.value); + } + break; + case AMODE_IMM_12_POST: + if (Rn == ARMAssemblerInterface::SP) { + Rn = R_sp; // convert STR thru Arm SP to STR thru Mips SP + } + mMips->LW(Rd, Rn, 0); + mMips->ADDIU(Rn, Rn, amode.value); + break; + case AMODE_REG_SCALE_PRE: + // we only support simple base + index, no advanced modes for this one yet + mMips->ADDU(R_at, Rn, amode.reg); + mMips->LW(Rd, R_at, 0); + break; + } +} + +void ArmToMipsAssembler::LDRB(int cc, int Rd, int Rn, uint32_t offset) +{ + mArmPC[mInum++] = pc(); + // work-around for ARM default address mode of immed12_pre(0) + if (offset > AMODE_UNSUPPORTED) offset = 0; + switch (offset) { + case 0: + amode.value = 0; + amode.writeback = 0; + // fall thru to next case .... + case AMODE_IMM_12_PRE: + mMips->LBU(Rd, Rn, amode.value); + if (amode.writeback) { // OPTIONAL writeback on pre-index mode + mMips->ADDIU(Rn, Rn, amode.value); + } + break; + case AMODE_IMM_12_POST: + mMips->LBU(Rd, Rn, 0); + mMips->ADDIU(Rn, Rn, amode.value); + break; + case AMODE_REG_SCALE_PRE: + // we only support simple base + index, no advanced modes for this one yet + mMips->ADDU(R_at, Rn, amode.reg); + mMips->LBU(Rd, R_at, 0); + break; + } + +} + +void ArmToMipsAssembler::STR(int cc, int Rd, int Rn, uint32_t offset) +{ + mArmPC[mInum++] = pc(); + // work-around for ARM default address mode of immed12_pre(0) + if (offset > AMODE_UNSUPPORTED) offset = 0; + switch (offset) { + case 0: + amode.value = 0; + amode.writeback = 0; + // fall thru to next case .... + case AMODE_IMM_12_PRE: + if (Rn == ARMAssemblerInterface::SP) { + Rn = R_sp; // convert STR thru Arm SP to SW thru Mips SP + } + if (amode.writeback) { // OPTIONAL writeback on pre-index mode + // If we will writeback, then update the index reg, then store. + // This correctly handles stack-push case. + mMips->ADDIU(Rn, Rn, amode.value); + mMips->SW(Rd, Rn, 0); + } else { + // No writeback so store offset by value + mMips->SW(Rd, Rn, amode.value); + } + break; + case AMODE_IMM_12_POST: + mMips->SW(Rd, Rn, 0); + mMips->ADDIU(Rn, Rn, amode.value); // post index always writes back + break; + case AMODE_REG_SCALE_PRE: + // we only support simple base + index, no advanced modes for this one yet + mMips->ADDU(R_at, Rn, amode.reg); + mMips->SW(Rd, R_at, 0); + break; + } +} + +void ArmToMipsAssembler::STRB(int cc, int Rd, int Rn, uint32_t offset) +{ + mArmPC[mInum++] = pc(); + // work-around for ARM default address mode of immed12_pre(0) + if (offset > AMODE_UNSUPPORTED) offset = 0; + switch (offset) { + case 0: + amode.value = 0; + amode.writeback = 0; + // fall thru to next case .... + case AMODE_IMM_12_PRE: + mMips->SB(Rd, Rn, amode.value); + if (amode.writeback) { // OPTIONAL writeback on pre-index mode + mMips->ADDIU(Rn, Rn, amode.value); + } + break; + case AMODE_IMM_12_POST: + mMips->SB(Rd, Rn, 0); + mMips->ADDIU(Rn, Rn, amode.value); + break; + case AMODE_REG_SCALE_PRE: + // we only support simple base + index, no advanced modes for this one yet + mMips->ADDU(R_at, Rn, amode.reg); + mMips->SB(Rd, R_at, 0); + break; + } +} + +void ArmToMipsAssembler::LDRH(int cc, int Rd, int Rn, uint32_t offset) +{ + mArmPC[mInum++] = pc(); + // work-around for ARM default address mode of immed8_pre(0) + if (offset > AMODE_UNSUPPORTED) offset = 0; + switch (offset) { + case 0: + amode.value = 0; + // fall thru to next case .... + case AMODE_IMM_8_PRE: // no support yet for writeback + mMips->LHU(Rd, Rn, amode.value); + break; + case AMODE_IMM_8_POST: + mMips->LHU(Rd, Rn, 0); + mMips->ADDIU(Rn, Rn, amode.value); + break; + case AMODE_REG_PRE: + // we only support simple base +/- index + if (amode.reg >= 0) { + mMips->ADDU(R_at, Rn, amode.reg); + } else { + mMips->SUBU(R_at, Rn, abs(amode.reg)); + } + mMips->LHU(Rd, R_at, 0); + break; + } +} + +void ArmToMipsAssembler::LDRSB(int cc, int Rd, int Rn, uint32_t offset) +{ + mArmPC[mInum++] = pc(); + mMips->NOP2(); + NOT_IMPLEMENTED(); +} + +void ArmToMipsAssembler::LDRSH(int cc, int Rd, int Rn, uint32_t offset) +{ + mArmPC[mInum++] = pc(); + mMips->NOP2(); + NOT_IMPLEMENTED(); +} + +void ArmToMipsAssembler::STRH(int cc, int Rd, int Rn, uint32_t offset) +{ + mArmPC[mInum++] = pc(); + // work-around for ARM default address mode of immed8_pre(0) + if (offset > AMODE_UNSUPPORTED) offset = 0; + switch (offset) { + case 0: + amode.value = 0; + // fall thru to next case .... + case AMODE_IMM_8_PRE: // no support yet for writeback + mMips->SH(Rd, Rn, amode.value); + break; + case AMODE_IMM_8_POST: + mMips->SH(Rd, Rn, 0); + mMips->ADDIU(Rn, Rn, amode.value); + break; + case AMODE_REG_PRE: + // we only support simple base +/- index + if (amode.reg >= 0) { + mMips->ADDU(R_at, Rn, amode.reg); + } else { + mMips->SUBU(R_at, Rn, abs(amode.reg)); + } + mMips->SH(Rd, R_at, 0); + break; + } +} + + + +#if 0 +#pragma mark - +#pragma mark Block Data Transfer... +#endif + +// block data transfer... +void ArmToMipsAssembler::LDM(int cc, int dir, + int Rn, int W, uint32_t reg_list) +{ // ED FD EA FA IB IA DB DA + // const uint8_t P[8] = { 1, 0, 1, 0, 1, 0, 1, 0 }; + // const uint8_t U[8] = { 1, 1, 0, 0, 1, 1, 0, 0 }; + // *mPC++ = (cc<<28) | (4<<25) | (uint32_t(P[dir])<<24) | + // (uint32_t(U[dir])<<23) | (1<<20) | (W<<21) | (Rn<<16) | reg_list; + mArmPC[mInum++] = pc(); + mMips->NOP2(); + NOT_IMPLEMENTED(); +} + +void ArmToMipsAssembler::STM(int cc, int dir, + int Rn, int W, uint32_t reg_list) +{ // FA EA FD ED IB IA DB DA + // const uint8_t P[8] = { 0, 1, 0, 1, 1, 0, 1, 0 }; + // const uint8_t U[8] = { 0, 0, 1, 1, 1, 1, 0, 0 }; + // *mPC++ = (cc<<28) | (4<<25) | (uint32_t(P[dir])<<24) | + // (uint32_t(U[dir])<<23) | (0<<20) | (W<<21) | (Rn<<16) | reg_list; + mArmPC[mInum++] = pc(); + mMips->NOP2(); + NOT_IMPLEMENTED(); +} + + + +#if 0 +#pragma mark - +#pragma mark Special... +#endif + +// special... +void ArmToMipsAssembler::SWP(int cc, int Rn, int Rd, int Rm) { + // *mPC++ = (cc<<28) | (2<<23) | (Rn<<16) | (Rd << 12) | 0x90 | Rm; + mArmPC[mInum++] = pc(); + mMips->NOP2(); + NOT_IMPLEMENTED(); +} + +void ArmToMipsAssembler::SWPB(int cc, int Rn, int Rd, int Rm) { + // *mPC++ = (cc<<28) | (2<<23) | (1<<22) | (Rn<<16) | (Rd << 12) | 0x90 | Rm; + mArmPC[mInum++] = pc(); + mMips->NOP2(); + NOT_IMPLEMENTED(); +} + +void ArmToMipsAssembler::SWI(int cc, uint32_t comment) { + // *mPC++ = (cc<<28) | (0xF<<24) | comment; + mArmPC[mInum++] = pc(); + mMips->NOP2(); + NOT_IMPLEMENTED(); +} + + +#if 0 +#pragma mark - +#pragma mark DSP instructions... +#endif + +// DSP instructions... +void ArmToMipsAssembler::PLD(int Rn, uint32_t offset) { + LOG_ALWAYS_FATAL_IF(!((offset&(1<<24)) && !(offset&(1<<21))), + "PLD only P=1, W=0"); + // *mPC++ = 0xF550F000 | (Rn<<16) | offset; + mArmPC[mInum++] = pc(); + mMips->NOP2(); + NOT_IMPLEMENTED(); +} + +void ArmToMipsAssembler::CLZ(int cc, int Rd, int Rm) +{ + mArmPC[mInum++] = pc(); + mMips->CLZ(Rd, Rm); +} + +void ArmToMipsAssembler::QADD(int cc, int Rd, int Rm, int Rn) +{ + // *mPC++ = (cc<<28) | 0x1000050 | (Rn<<16) | (Rd<<12) | Rm; + mArmPC[mInum++] = pc(); + mMips->NOP2(); + NOT_IMPLEMENTED(); +} + +void ArmToMipsAssembler::QDADD(int cc, int Rd, int Rm, int Rn) +{ + // *mPC++ = (cc<<28) | 0x1400050 | (Rn<<16) | (Rd<<12) | Rm; + mArmPC[mInum++] = pc(); + mMips->NOP2(); + NOT_IMPLEMENTED(); +} + +void ArmToMipsAssembler::QSUB(int cc, int Rd, int Rm, int Rn) +{ + // *mPC++ = (cc<<28) | 0x1200050 | (Rn<<16) | (Rd<<12) | Rm; + mArmPC[mInum++] = pc(); + mMips->NOP2(); + NOT_IMPLEMENTED(); +} + +void ArmToMipsAssembler::QDSUB(int cc, int Rd, int Rm, int Rn) +{ + // *mPC++ = (cc<<28) | 0x1600050 | (Rn<<16) | (Rd<<12) | Rm; + mArmPC[mInum++] = pc(); + mMips->NOP2(); + NOT_IMPLEMENTED(); +} + +// 16 x 16 signed multiply (like SMLAxx without the accumulate) +void ArmToMipsAssembler::SMUL(int cc, int xy, + int Rd, int Rm, int Rs) +{ + mArmPC[mInum++] = pc(); + + // the 16 bits may be in the top or bottom half of 32-bit source reg, + // as defined by the codes BB, BT, TB, TT (compressed param xy) + // where x corresponds to Rm and y to Rs + + // select half-reg for Rm + if (xy & xyTB) { + // use top 16-bits + mMips->SRA(R_at, Rm, 16); + } else { + // use bottom 16, but sign-extend to 32 + if (mips32r2) { + mMips->SEH(R_at, Rm); + } else { + mMips->SLL(R_at, Rm, 16); + mMips->SRA(R_at, R_at, 16); + } + } + // select half-reg for Rs + if (xy & xyBT) { + // use top 16-bits + mMips->SRA(R_at2, Rs, 16); + } else { + // use bottom 16, but sign-extend to 32 + if (mips32r2) { + mMips->SEH(R_at2, Rs); + } else { + mMips->SLL(R_at2, Rs, 16); + mMips->SRA(R_at2, R_at2, 16); + } + } + mMips->MUL(Rd, R_at, R_at2); +} + +// signed 32b x 16b multiple, save top 32-bits of 48-bit result +void ArmToMipsAssembler::SMULW(int cc, int y, + int Rd, int Rm, int Rs) +{ + mArmPC[mInum++] = pc(); + + // the selector yT or yB refers to reg Rs + if (y & yT) { + // zero the bottom 16-bits, with 2 shifts, it can affect result + mMips->SRL(R_at, Rs, 16); + mMips->SLL(R_at, R_at, 16); + + } else { + // move low 16-bit half, to high half + mMips->SLL(R_at, Rs, 16); + } + mMips->MULT(Rm, R_at); + mMips->MFHI(Rd); +} + +// 16 x 16 signed multiply, accumulate: Rd = Rm{16} * Rs{16} + Rn +void ArmToMipsAssembler::SMLA(int cc, int xy, + int Rd, int Rm, int Rs, int Rn) +{ + mArmPC[mInum++] = pc(); + + // the 16 bits may be in the top or bottom half of 32-bit source reg, + // as defined by the codes BB, BT, TB, TT (compressed param xy) + // where x corresponds to Rm and y to Rs + + // select half-reg for Rm + if (xy & xyTB) { + // use top 16-bits + mMips->SRA(R_at, Rm, 16); + } else { + // use bottom 16, but sign-extend to 32 + if (mips32r2) { + mMips->SEH(R_at, Rm); + } else { + mMips->SLL(R_at, Rm, 16); + mMips->SRA(R_at, R_at, 16); + } + } + // select half-reg for Rs + if (xy & xyBT) { + // use top 16-bits + mMips->SRA(R_at2, Rs, 16); + } else { + // use bottom 16, but sign-extend to 32 + if (mips32r2) { + mMips->SEH(R_at2, Rs); + } else { + mMips->SLL(R_at2, Rs, 16); + mMips->SRA(R_at2, R_at2, 16); + } + } + + mMips->MUL(R_at, R_at, R_at2); + mMips->ADDU(Rd, R_at, Rn); +} + +void ArmToMipsAssembler::SMLAL(int cc, int xy, + int RdHi, int RdLo, int Rs, int Rm) +{ + // *mPC++ = (cc<<28) | 0x1400080 | (RdHi<<16) | (RdLo<<12) | (Rs<<8) | (xy<<4) | Rm; + mArmPC[mInum++] = pc(); + mMips->NOP2(); + NOT_IMPLEMENTED(); +} + +void ArmToMipsAssembler::SMLAW(int cc, int y, + int Rd, int Rm, int Rs, int Rn) +{ + // *mPC++ = (cc<<28) | 0x1200080 | (Rd<<16) | (Rn<<12) | (Rs<<8) | (y<<4) | Rm; + mArmPC[mInum++] = pc(); + mMips->NOP2(); + NOT_IMPLEMENTED(); +} + +// used by ARMv6 version of GGLAssembler::filter32 +void ArmToMipsAssembler::UXTB16(int cc, int Rd, int Rm, int rotate) +{ + mArmPC[mInum++] = pc(); + + //Rd[31:16] := ZeroExtend((Rm ROR (8 * sh))[23:16]), + //Rd[15:0] := ZeroExtend((Rm ROR (8 * sh))[7:0]). sh 0-3. + + mMips->ROTR(Rm, Rm, rotate * 8); + mMips->AND(Rd, Rm, 0x00FF00FF); +} + +void ArmToMipsAssembler::UBFX(int cc, int Rd, int Rn, int lsb, int width) +{ + /* Placeholder for UBFX */ + mArmPC[mInum++] = pc(); + + mMips->NOP2(); + NOT_IMPLEMENTED(); +} + + + + + +#if 0 +#pragma mark - +#pragma mark MIPS Assembler... +#endif + + +//************************************************************************** +//************************************************************************** +//************************************************************************** + + +/* mips assembler +** this is a subset of mips32r2, targeted specifically at ARM instruction +** replacement in the pixelflinger/codeflinger code. +** +** To that end, there is no need for floating point, or priviledged +** instructions. This all runs in user space, no float. +** +** The syntax makes no attempt to be as complete as the assember, with +** synthetic instructions, and automatic recognition of immedate operands +** (use the immediate form of the instruction), etc. +** +** We start with mips32r1, and may add r2 and dsp extensions if cpu +** supports. Decision will be made at compile time, based on gcc +** options. (makes sense since android will be built for a a specific +** device) +*/ + +MIPSAssembler::MIPSAssembler(const sp<Assembly>& assembly, ArmToMipsAssembler *parent) + : mParent(parent), + mAssembly(assembly) +{ + mBase = mPC = (uint32_t *)assembly->base(); + mDuration = ggl_system_time(); +} + +MIPSAssembler::~MIPSAssembler() +{ +} + + +uint32_t* MIPSAssembler::pc() const +{ + return mPC; +} + +uint32_t* MIPSAssembler::base() const +{ + return mBase; +} + +void MIPSAssembler::reset() +{ + mBase = mPC = (uint32_t *)mAssembly->base(); + mBranchTargets.clear(); + mLabels.clear(); + mLabelsInverseMapping.clear(); + mComments.clear(); +} + + +// convert tabs to spaces, and remove any newline +// works with strings of limited size (makes a temp copy) +#define TABSTOP 8 +void MIPSAssembler::string_detab(char *s) +{ + char *os = s; + char temp[100]; + char *t = temp; + int len = 99; + int i = TABSTOP; + + while (*s && len-- > 0) { + if (*s == '\n') { s++; continue; } + if (*s == '\t') { + s++; + for ( ; i>0; i--) {*t++ = ' '; len--; } + } else { + *t++ = *s++; + } + if (i <= 0) i = TABSTOP; + i--; + } + *t = '\0'; + strcpy(os, temp); +} + +void MIPSAssembler::string_pad(char *s, int padded_len) +{ + int len = strlen(s); + s += len; + for (int i = padded_len - len; i > 0; --i) { + *s++ = ' '; + } + *s = '\0'; +} + +// ---------------------------------------------------------------------------- + +void MIPSAssembler::disassemble(const char* name) +{ + char di_buf[140]; + + if (name) { + ALOGW("%s:\n", name); + } + + bool arm_disasm_fmt = (mParent->mArmDisassemblyBuffer == NULL) ? false : true; + + typedef char dstr[40]; + dstr *lines = (dstr *)mParent->mArmDisassemblyBuffer; + + if (mParent->mArmDisassemblyBuffer != NULL) { + for (int i=0; i<mParent->mArmInstrCount; ++i) { + string_detab(lines[i]); + } + } + + // iArm is an index to Arm instructions 1...n for this assembly sequence + // mArmPC[iArm] holds the value of the Mips-PC for the first MIPS + // instruction corresponding to that Arm instruction number + + int iArm = 0; + size_t count = pc()-base(); + uint32_t* mipsPC = base(); + while (count--) { + ssize_t label = mLabelsInverseMapping.indexOfKey(mipsPC); + if (label >= 0) { + ALOGW("%s:\n", mLabelsInverseMapping.valueAt(label)); + } + ssize_t comment = mComments.indexOfKey(mipsPC); + if (comment >= 0) { + ALOGW("; %s\n", mComments.valueAt(comment)); + } + // ALOGW("%08x: %08x ", int(i), int(i[0])); + ::mips_disassem(mipsPC, di_buf, arm_disasm_fmt); + string_detab(di_buf); + string_pad(di_buf, 30); + ALOGW("%08x: %08x %s", uint32_t(mipsPC), uint32_t(*mipsPC), di_buf); + mipsPC++; + } +} + +void MIPSAssembler::comment(const char* string) +{ + mComments.add(pc(), string); +} + +void MIPSAssembler::label(const char* theLabel) +{ + mLabels.add(theLabel, pc()); + mLabelsInverseMapping.add(pc(), theLabel); +} + + +void MIPSAssembler::prolog() +{ + // empty - done in ArmToMipsAssembler +} + +void MIPSAssembler::epilog(uint32_t touched) +{ + // empty - done in ArmToMipsAssembler +} + +int MIPSAssembler::generate(const char* name) +{ + // fixup all the branches + size_t count = mBranchTargets.size(); + while (count--) { + const branch_target_t& bt = mBranchTargets[count]; + uint32_t* target_pc = mLabels.valueFor(bt.label); + LOG_ALWAYS_FATAL_IF(!target_pc, + "error resolving branch targets, target_pc is null"); + int32_t offset = int32_t(target_pc - (bt.pc+1)); + *bt.pc |= offset & 0x00FFFF; + } + + mAssembly->resize( int(pc()-base())*4 ); + + // the instruction & data caches are flushed by CodeCache + const int64_t duration = ggl_system_time() - mDuration; + const char * const format = "generated %s (%d ins) at [%p:%p] in %lld ns\n"; + ALOGI(format, name, int(pc()-base()), base(), pc(), duration); + +#if defined(WITH_LIB_HARDWARE) + if (__builtin_expect(mQemuTracing, 0)) { + int err = qemu_add_mapping(int(base()), name); + mQemuTracing = (err >= 0); + } +#endif + + char value[PROPERTY_VALUE_MAX]; + value[0] = '\0'; + + property_get("debug.pf.disasm", value, "0"); + + if (atoi(value) != 0) { + disassemble(name); + } + + return NO_ERROR; +} + +uint32_t* MIPSAssembler::pcForLabel(const char* label) +{ + return mLabels.valueFor(label); +} + + + +#if 0 +#pragma mark - +#pragma mark Arithmetic... +#endif + +void MIPSAssembler::ADDU(int Rd, int Rs, int Rt) +{ + *mPC++ = (spec_op<<OP_SHF) | (addu_fn<<FUNC_SHF) + | (Rs<<RS_SHF) | (Rt<<RT_SHF) | (Rd<<RD_SHF); +} + +// MD00086 pdf says this is: ADDIU rt, rs, imm -- they do not use Rd +void MIPSAssembler::ADDIU(int Rt, int Rs, int16_t imm) +{ + *mPC++ = (addiu_op<<OP_SHF) | (Rt<<RT_SHF) | (Rs<<RS_SHF) | (imm & MSK_16); +} + + +void MIPSAssembler::SUBU(int Rd, int Rs, int Rt) +{ + *mPC++ = (spec_op<<OP_SHF) | (subu_fn<<FUNC_SHF) | + (Rs<<RS_SHF) | (Rt<<RT_SHF) | (Rd<<RD_SHF) ; +} + + +void MIPSAssembler::SUBIU(int Rt, int Rs, int16_t imm) // really addiu(d, s, -j) +{ + *mPC++ = (addiu_op<<OP_SHF) | (Rt<<RT_SHF) | (Rs<<RS_SHF) | ((-imm) & MSK_16); +} + + +void MIPSAssembler::NEGU(int Rd, int Rs) // really subu(d, zero, s) +{ + MIPSAssembler::SUBU(Rd, 0, Rs); +} + +void MIPSAssembler::MUL(int Rd, int Rs, int Rt) +{ + *mPC++ = (spec2_op<<OP_SHF) | (mul_fn<<FUNC_SHF) | + (Rs<<RS_SHF) | (Rt<<RT_SHF) | (Rd<<RD_SHF) ; +} + +void MIPSAssembler::MULT(int Rs, int Rt) // dest is hi,lo +{ + *mPC++ = (spec_op<<OP_SHF) | (mult_fn<<FUNC_SHF) | (Rt<<RT_SHF) | (Rs<<RS_SHF); +} + +void MIPSAssembler::MULTU(int Rs, int Rt) // dest is hi,lo +{ + *mPC++ = (spec_op<<OP_SHF) | (multu_fn<<FUNC_SHF) | (Rt<<RT_SHF) | (Rs<<RS_SHF); +} + +void MIPSAssembler::MADD(int Rs, int Rt) // hi,lo = hi,lo + Rs * Rt +{ + *mPC++ = (spec2_op<<OP_SHF) | (madd_fn<<FUNC_SHF) | (Rt<<RT_SHF) | (Rs<<RS_SHF); +} + +void MIPSAssembler::MADDU(int Rs, int Rt) // hi,lo = hi,lo + Rs * Rt +{ + *mPC++ = (spec2_op<<OP_SHF) | (maddu_fn<<FUNC_SHF) | (Rt<<RT_SHF) | (Rs<<RS_SHF); +} + + +void MIPSAssembler::MSUB(int Rs, int Rt) // hi,lo = hi,lo - Rs * Rt +{ + *mPC++ = (spec2_op<<OP_SHF) | (msub_fn<<FUNC_SHF) | (Rt<<RT_SHF) | (Rs<<RS_SHF); +} + +void MIPSAssembler::MSUBU(int Rs, int Rt) // hi,lo = hi,lo - Rs * Rt +{ + *mPC++ = (spec2_op<<OP_SHF) | (msubu_fn<<FUNC_SHF) | (Rt<<RT_SHF) | (Rs<<RS_SHF); +} + + +void MIPSAssembler::SEB(int Rd, int Rt) // sign-extend byte (mips32r2) +{ + *mPC++ = (spec3_op<<OP_SHF) | (bshfl_fn<<FUNC_SHF) | (seb_fn << SA_SHF) | + (Rt<<RT_SHF) | (Rd<<RD_SHF); +} + +void MIPSAssembler::SEH(int Rd, int Rt) // sign-extend half-word (mips32r2) +{ + *mPC++ = (spec3_op<<OP_SHF) | (bshfl_fn<<FUNC_SHF) | (seh_fn << SA_SHF) | + (Rt<<RT_SHF) | (Rd<<RD_SHF); +} + + + +#if 0 +#pragma mark - +#pragma mark Comparisons... +#endif + +void MIPSAssembler::SLT(int Rd, int Rs, int Rt) +{ + *mPC++ = (spec_op<<OP_SHF) | (slt_fn<<FUNC_SHF) | + (Rd<<RD_SHF) | (Rs<<RS_SHF) | (Rt<<RT_SHF); +} + +void MIPSAssembler::SLTI(int Rt, int Rs, int16_t imm) +{ + *mPC++ = (slti_op<<OP_SHF) | (Rt<<RT_SHF) | (Rs<<RS_SHF) | (imm & MSK_16); +} + + +void MIPSAssembler::SLTU(int Rd, int Rs, int Rt) +{ + *mPC++ = (spec_op<<OP_SHF) | (sltu_fn<<FUNC_SHF) | + (Rd<<RD_SHF) | (Rs<<RS_SHF) | (Rt<<RT_SHF); +} + +void MIPSAssembler::SLTIU(int Rt, int Rs, int16_t imm) +{ + *mPC++ = (sltiu_op<<OP_SHF) | (Rt<<RT_SHF) | (Rs<<RS_SHF) | (imm & MSK_16); +} + + + +#if 0 +#pragma mark - +#pragma mark Logical... +#endif + +void MIPSAssembler::AND(int Rd, int Rs, int Rt) +{ + *mPC++ = (spec_op<<OP_SHF) | (and_fn<<FUNC_SHF) | + (Rd<<RD_SHF) | (Rs<<RS_SHF) | (Rt<<RT_SHF); +} + +void MIPSAssembler::ANDI(int Rt, int Rs, uint16_t imm) // todo: support larger immediate +{ + *mPC++ = (andi_op<<OP_SHF) | (Rt<<RT_SHF) | (Rs<<RS_SHF) | (imm & MSK_16); +} + + +void MIPSAssembler::OR(int Rd, int Rs, int Rt) +{ + *mPC++ = (spec_op<<OP_SHF) | (or_fn<<FUNC_SHF) | + (Rd<<RD_SHF) | (Rs<<RS_SHF) | (Rt<<RT_SHF); +} + +void MIPSAssembler::ORI(int Rt, int Rs, uint16_t imm) +{ + *mPC++ = (ori_op<<OP_SHF) | (Rt<<RT_SHF) | (Rs<<RS_SHF) | (imm & MSK_16); +} + +void MIPSAssembler::NOR(int Rd, int Rs, int Rt) +{ + *mPC++ = (spec_op<<OP_SHF) | (nor_fn<<FUNC_SHF) | + (Rd<<RD_SHF) | (Rs<<RS_SHF) | (Rt<<RT_SHF); +} + +void MIPSAssembler::NOT(int Rd, int Rs) +{ + MIPSAssembler::NOR(Rd, Rs, 0); // NOT(d,s) = NOR(d,s,zero) +} + +void MIPSAssembler::XOR(int Rd, int Rs, int Rt) +{ + *mPC++ = (spec_op<<OP_SHF) | (xor_fn<<FUNC_SHF) | + (Rd<<RD_SHF) | (Rs<<RS_SHF) | (Rt<<RT_SHF); +} + +void MIPSAssembler::XORI(int Rt, int Rs, uint16_t imm) // todo: support larger immediate +{ + *mPC++ = (xori_op<<OP_SHF) | (Rt<<RT_SHF) | (Rs<<RS_SHF) | (imm & MSK_16); +} + +void MIPSAssembler::SLL(int Rd, int Rt, int shft) +{ + *mPC++ = (spec_op<<OP_SHF) | (sll_fn<<FUNC_SHF) | + (Rd<<RD_SHF) | (Rt<<RT_SHF) | (shft<<RE_SHF); +} + +void MIPSAssembler::SLLV(int Rd, int Rt, int Rs) +{ + *mPC++ = (spec_op<<OP_SHF) | (sllv_fn<<FUNC_SHF) | + (Rd<<RD_SHF) | (Rs<<RS_SHF) | (Rt<<RT_SHF); +} + +void MIPSAssembler::SRL(int Rd, int Rt, int shft) +{ + *mPC++ = (spec_op<<OP_SHF) | (srl_fn<<FUNC_SHF) | + (Rd<<RD_SHF) | (Rt<<RT_SHF) | (shft<<RE_SHF); +} + +void MIPSAssembler::SRLV(int Rd, int Rt, int Rs) +{ + *mPC++ = (spec_op<<OP_SHF) | (srlv_fn<<FUNC_SHF) | + (Rd<<RD_SHF) | (Rs<<RS_SHF) | (Rt<<RT_SHF); +} + +void MIPSAssembler::SRA(int Rd, int Rt, int shft) +{ + *mPC++ = (spec_op<<OP_SHF) | (sra_fn<<FUNC_SHF) | + (Rd<<RD_SHF) | (Rt<<RT_SHF) | (shft<<RE_SHF); +} + +void MIPSAssembler::SRAV(int Rd, int Rt, int Rs) +{ + *mPC++ = (spec_op<<OP_SHF) | (srav_fn<<FUNC_SHF) | + (Rd<<RD_SHF) | (Rs<<RS_SHF) | (Rt<<RT_SHF); +} + +void MIPSAssembler::ROTR(int Rd, int Rt, int shft) // mips32r2 +{ + // note weird encoding (SRL + 1) + *mPC++ = (spec_op<<OP_SHF) | (srl_fn<<FUNC_SHF) | + (1<<RS_SHF) | (Rd<<RD_SHF) | (Rt<<RT_SHF) | (shft<<RE_SHF); +} + +void MIPSAssembler::ROTRV(int Rd, int Rt, int Rs) // mips32r2 +{ + // note weird encoding (SRLV + 1) + *mPC++ = (spec_op<<OP_SHF) | (srlv_fn<<FUNC_SHF) | + (Rd<<RD_SHF) | (Rs<<RS_SHF) | (Rt<<RT_SHF) | (1<<RE_SHF); +} + +// uses at2 register (mapped to some appropriate mips reg) +void MIPSAssembler::RORsyn(int Rd, int Rt, int Rs) +{ + // synthetic: d = t rotated by s + MIPSAssembler::NEGU(R_at2, Rs); + MIPSAssembler::SLLV(R_at2, Rt, R_at2); + MIPSAssembler::SRLV(Rd, Rt, Rs); + MIPSAssembler::OR(Rd, Rd, R_at2); +} + +// immediate version - uses at2 register (mapped to some appropriate mips reg) +void MIPSAssembler::RORIsyn(int Rd, int Rt, int rot) +{ + // synthetic: d = t rotated by immed rot + // d = s >> rot | s << (32-rot) + MIPSAssembler::SLL(R_at2, Rt, 32-rot); + MIPSAssembler::SRL(Rd, Rt, rot); + MIPSAssembler::OR(Rd, Rd, R_at2); +} + +void MIPSAssembler::CLO(int Rd, int Rs) +{ + // Rt field must have same gpr # as Rd + *mPC++ = (spec2_op<<OP_SHF) | (clo_fn<<FUNC_SHF) | + (Rd<<RD_SHF) | (Rs<<RS_SHF) | (Rd<<RT_SHF); +} + +void MIPSAssembler::CLZ(int Rd, int Rs) +{ + // Rt field must have same gpr # as Rd + *mPC++ = (spec2_op<<OP_SHF) | (clz_fn<<FUNC_SHF) | + (Rd<<RD_SHF) | (Rs<<RS_SHF) | (Rd<<RT_SHF); +} + +void MIPSAssembler::WSBH(int Rd, int Rt) // mips32r2 +{ + *mPC++ = (spec3_op<<OP_SHF) | (bshfl_fn<<FUNC_SHF) | (wsbh_fn << SA_SHF) | + (Rt<<RT_SHF) | (Rd<<RD_SHF); +} + + + +#if 0 +#pragma mark - +#pragma mark Load/store... +#endif + +void MIPSAssembler::LW(int Rt, int Rbase, int16_t offset) +{ + *mPC++ = (lw_op<<OP_SHF) | (Rbase<<RS_SHF) | (Rt<<RT_SHF) | (offset & MSK_16); +} + +void MIPSAssembler::SW(int Rt, int Rbase, int16_t offset) +{ + *mPC++ = (sw_op<<OP_SHF) | (Rbase<<RS_SHF) | (Rt<<RT_SHF) | (offset & MSK_16); +} + +// lb is sign-extended +void MIPSAssembler::LB(int Rt, int Rbase, int16_t offset) +{ + *mPC++ = (lb_op<<OP_SHF) | (Rbase<<RS_SHF) | (Rt<<RT_SHF) | (offset & MSK_16); +} + +void MIPSAssembler::LBU(int Rt, int Rbase, int16_t offset) +{ + *mPC++ = (lbu_op<<OP_SHF) | (Rbase<<RS_SHF) | (Rt<<RT_SHF) | (offset & MSK_16); +} + +void MIPSAssembler::SB(int Rt, int Rbase, int16_t offset) +{ + *mPC++ = (sb_op<<OP_SHF) | (Rbase<<RS_SHF) | (Rt<<RT_SHF) | (offset & MSK_16); +} + +// lh is sign-extended +void MIPSAssembler::LH(int Rt, int Rbase, int16_t offset) +{ + *mPC++ = (lh_op<<OP_SHF) | (Rbase<<RS_SHF) | (Rt<<RT_SHF) | (offset & MSK_16); +} + +void MIPSAssembler::LHU(int Rt, int Rbase, int16_t offset) +{ + *mPC++ = (lhu_op<<OP_SHF) | (Rbase<<RS_SHF) | (Rt<<RT_SHF) | (offset & MSK_16); +} + +void MIPSAssembler::SH(int Rt, int Rbase, int16_t offset) +{ + *mPC++ = (sh_op<<OP_SHF) | (Rbase<<RS_SHF) | (Rt<<RT_SHF) | (offset & MSK_16); +} + +void MIPSAssembler::LUI(int Rt, int16_t offset) +{ + *mPC++ = (lui_op<<OP_SHF) | (Rt<<RT_SHF) | (offset & MSK_16); +} + + + +#if 0 +#pragma mark - +#pragma mark Register move... +#endif + +void MIPSAssembler::MOVE(int Rd, int Rs) +{ + // encoded as "or rd, rs, zero" + *mPC++ = (spec_op<<OP_SHF) | (or_fn<<FUNC_SHF) | + (Rd<<RD_SHF) | (Rs<<RS_SHF) | (0<<RT_SHF); +} + +void MIPSAssembler::MOVN(int Rd, int Rs, int Rt) +{ + *mPC++ = (spec_op<<OP_SHF) | (movn_fn<<FUNC_SHF) | + (Rd<<RD_SHF) | (Rs<<RS_SHF) | (Rt<<RT_SHF); +} + +void MIPSAssembler::MOVZ(int Rd, int Rs, int Rt) +{ + *mPC++ = (spec_op<<OP_SHF) | (movz_fn<<FUNC_SHF) | + (Rd<<RD_SHF) | (Rs<<RS_SHF) | (Rt<<RT_SHF); +} + +void MIPSAssembler::MFHI(int Rd) +{ + *mPC++ = (spec_op<<OP_SHF) | (mfhi_fn<<FUNC_SHF) | (Rd<<RD_SHF); +} + +void MIPSAssembler::MFLO(int Rd) +{ + *mPC++ = (spec_op<<OP_SHF) | (mflo_fn<<FUNC_SHF) | (Rd<<RD_SHF); +} + +void MIPSAssembler::MTHI(int Rs) +{ + *mPC++ = (spec_op<<OP_SHF) | (mthi_fn<<FUNC_SHF) | (Rs<<RS_SHF); +} + +void MIPSAssembler::MTLO(int Rs) +{ + *mPC++ = (spec_op<<OP_SHF) | (mtlo_fn<<FUNC_SHF) | (Rs<<RS_SHF); +} + + + +#if 0 +#pragma mark - +#pragma mark Branch... +#endif + +// temporarily forcing a NOP into branch-delay slot, just to be safe +// todo: remove NOP, optimze use of delay slots +void MIPSAssembler::B(const char* label) +{ + mBranchTargets.add(branch_target_t(label, mPC)); + + // encoded as BEQ zero, zero, offset + *mPC++ = (beq_op<<OP_SHF) | (0<<RT_SHF) + | (0<<RS_SHF) | 0; // offset filled in later + + MIPSAssembler::NOP(); +} + +void MIPSAssembler::BEQ(int Rs, int Rt, const char* label) +{ + mBranchTargets.add(branch_target_t(label, mPC)); + *mPC++ = (beq_op<<OP_SHF) | (Rt<<RT_SHF) | (Rs<<RS_SHF) | 0; + MIPSAssembler::NOP(); +} + +void MIPSAssembler::BNE(int Rs, int Rt, const char* label) +{ + mBranchTargets.add(branch_target_t(label, mPC)); + *mPC++ = (bne_op<<OP_SHF) | (Rt<<RT_SHF) | (Rs<<RS_SHF) | 0; + MIPSAssembler::NOP(); +} + +void MIPSAssembler::BLEZ(int Rs, const char* label) +{ + mBranchTargets.add(branch_target_t(label, mPC)); + *mPC++ = (blez_op<<OP_SHF) | (0<<RT_SHF) | (Rs<<RS_SHF) | 0; + MIPSAssembler::NOP(); +} + +void MIPSAssembler::BLTZ(int Rs, const char* label) +{ + mBranchTargets.add(branch_target_t(label, mPC)); + *mPC++ = (regimm_op<<OP_SHF) | (bltz_fn<<RT_SHF) | (Rs<<RS_SHF) | 0; + MIPSAssembler::NOP(); +} + +void MIPSAssembler::BGTZ(int Rs, const char* label) +{ + mBranchTargets.add(branch_target_t(label, mPC)); + *mPC++ = (bgtz_op<<OP_SHF) | (0<<RT_SHF) | (Rs<<RS_SHF) | 0; + MIPSAssembler::NOP(); +} + + +void MIPSAssembler::BGEZ(int Rs, const char* label) +{ + mBranchTargets.add(branch_target_t(label, mPC)); + *mPC++ = (regimm_op<<OP_SHF) | (bgez_fn<<RT_SHF) | (Rs<<RS_SHF) | 0; + MIPSAssembler::NOP(); +} + +void MIPSAssembler::JR(int Rs) +{ + *mPC++ = (spec_op<<OP_SHF) | (Rs<<RS_SHF) | (jr_fn << FUNC_SHF); + MIPSAssembler::NOP(); +} + + +#if 0 +#pragma mark - +#pragma mark Synthesized Branch... +#endif + +// synthetic variants of branches (using slt & friends) +void MIPSAssembler::BEQZ(int Rs, const char* label) +{ + BEQ(Rs, R_zero, label); +} + +void MIPSAssembler::BNEZ(int Rs, const char* label) +{ + BNE(R_at, R_zero, label); +} + +void MIPSAssembler::BGE(int Rs, int Rt, const char* label) +{ + SLT(R_at, Rs, Rt); + BEQ(R_at, R_zero, label); +} + +void MIPSAssembler::BGEU(int Rs, int Rt, const char* label) +{ + SLTU(R_at, Rs, Rt); + BEQ(R_at, R_zero, label); +} + +void MIPSAssembler::BGT(int Rs, int Rt, const char* label) +{ + SLT(R_at, Rt, Rs); // rev + BNE(R_at, R_zero, label); +} + +void MIPSAssembler::BGTU(int Rs, int Rt, const char* label) +{ + SLTU(R_at, Rt, Rs); // rev + BNE(R_at, R_zero, label); +} + +void MIPSAssembler::BLE(int Rs, int Rt, const char* label) +{ + SLT(R_at, Rt, Rs); // rev + BEQ(R_at, R_zero, label); +} + +void MIPSAssembler::BLEU(int Rs, int Rt, const char* label) +{ + SLTU(R_at, Rt, Rs); // rev + BEQ(R_at, R_zero, label); +} + +void MIPSAssembler::BLT(int Rs, int Rt, const char* label) +{ + SLT(R_at, Rs, Rt); + BNE(R_at, R_zero, label); +} + +void MIPSAssembler::BLTU(int Rs, int Rt, const char* label) +{ + SLTU(R_at, Rs, Rt); + BNE(R_at, R_zero, label); +} + + + + +#if 0 +#pragma mark - +#pragma mark Misc... +#endif + +void MIPSAssembler::NOP(void) +{ + // encoded as "sll zero, zero, 0", which is all zero + *mPC++ = (spec_op<<OP_SHF) | (sll_fn<<FUNC_SHF); +} + +// using this as special opcode for not-yet-implemented ARM instruction +void MIPSAssembler::NOP2(void) +{ + // encoded as "sll zero, zero, 2", still a nop, but a unique code + *mPC++ = (spec_op<<OP_SHF) | (sll_fn<<FUNC_SHF) | (2 << RE_SHF); +} + +// using this as special opcode for purposefully NOT implemented ARM instruction +void MIPSAssembler::UNIMPL(void) +{ + // encoded as "sll zero, zero, 3", still a nop, but a unique code + *mPC++ = (spec_op<<OP_SHF) | (sll_fn<<FUNC_SHF) | (3 << RE_SHF); +} + + +}; // namespace android: + + diff --git a/libpixelflinger/codeflinger/MIPSAssembler.h b/libpixelflinger/codeflinger/MIPSAssembler.h new file mode 100644 index 000000000..d8e8165e9 --- /dev/null +++ b/libpixelflinger/codeflinger/MIPSAssembler.h @@ -0,0 +1,555 @@ +/* libs/pixelflinger/codeflinger/MIPSAssembler.h +** +** Copyright 2012, The Android Open Source Project +** +** Licensed under the Apache License, Version 2.0 (the "License"); +** you may not use this file except in compliance with the License. +** You may obtain a copy of the License at +** +** http://www.apache.org/licenses/LICENSE-2.0 +** +** Unless required by applicable law or agreed to in writing, software +** distributed under the License is distributed on an "AS IS" BASIS, +** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +** See the License for the specific language governing permissions and +** limitations under the License. +*/ + +#ifndef ANDROID_MIPSASSEMBLER_H +#define ANDROID_MIPSASSEMBLER_H + +#include <stdint.h> +#include <sys/types.h> + +#include <utils/Vector.h> +#include <utils/KeyedVector.h> + +#include "tinyutils/smartpointer.h" +#include "codeflinger/ARMAssemblerInterface.h" +#include "codeflinger/CodeCache.h" + +namespace android { + +class MIPSAssembler; // forward reference + +// this class mimics ARMAssembler interface +// intent is to translate each ARM instruction to 1 or more MIPS instr +// implementation calls MIPSAssembler class to generate mips code +class ArmToMipsAssembler : public ARMAssemblerInterface +{ +public: + ArmToMipsAssembler(const sp<Assembly>& assembly, + char *abuf = 0, int linesz = 0, int instr_count = 0); + virtual ~ArmToMipsAssembler(); + + uint32_t* base() const; + uint32_t* pc() const; + void disassemble(const char* name); + + virtual void reset(); + + virtual int generate(const char* name); + virtual int getCodegenArch(); + + virtual void prolog(); + virtual void epilog(uint32_t touched); + virtual void comment(const char* string); + + + // ----------------------------------------------------------------------- + // shifters and addressing modes + // ----------------------------------------------------------------------- + + // shifters... + virtual bool isValidImmediate(uint32_t immed); + virtual int buildImmediate(uint32_t i, uint32_t& rot, uint32_t& imm); + + virtual uint32_t imm(uint32_t immediate); + virtual uint32_t reg_imm(int Rm, int type, uint32_t shift); + virtual uint32_t reg_rrx(int Rm); + virtual uint32_t reg_reg(int Rm, int type, int Rs); + + // addressing modes... + // LDR(B)/STR(B)/PLD + // (immediate and Rm can be negative, which indicates U=0) + virtual uint32_t immed12_pre(int32_t immed12, int W=0); + virtual uint32_t immed12_post(int32_t immed12); + virtual uint32_t reg_scale_pre(int Rm, int type=0, uint32_t shift=0, int W=0); + virtual uint32_t reg_scale_post(int Rm, int type=0, uint32_t shift=0); + + // LDRH/LDRSB/LDRSH/STRH + // (immediate and Rm can be negative, which indicates U=0) + virtual uint32_t immed8_pre(int32_t immed8, int W=0); + virtual uint32_t immed8_post(int32_t immed8); + virtual uint32_t reg_pre(int Rm, int W=0); + virtual uint32_t reg_post(int Rm); + + + + + virtual void dataProcessing(int opcode, int cc, int s, + int Rd, int Rn, + uint32_t Op2); + virtual void MLA(int cc, int s, + int Rd, int Rm, int Rs, int Rn); + virtual void MUL(int cc, int s, + int Rd, int Rm, int Rs); + virtual void UMULL(int cc, int s, + int RdLo, int RdHi, int Rm, int Rs); + virtual void UMUAL(int cc, int s, + int RdLo, int RdHi, int Rm, int Rs); + virtual void SMULL(int cc, int s, + int RdLo, int RdHi, int Rm, int Rs); + virtual void SMUAL(int cc, int s, + int RdLo, int RdHi, int Rm, int Rs); + + virtual void B(int cc, uint32_t* pc); + virtual void BL(int cc, uint32_t* pc); + virtual void BX(int cc, int Rn); + virtual void label(const char* theLabel); + virtual void B(int cc, const char* label); + virtual void BL(int cc, const char* label); + + virtual uint32_t* pcForLabel(const char* label); + + virtual void LDR (int cc, int Rd, + int Rn, uint32_t offset = 0); + virtual void LDRB(int cc, int Rd, + int Rn, uint32_t offset = 0); + virtual void STR (int cc, int Rd, + int Rn, uint32_t offset = 0); + virtual void STRB(int cc, int Rd, + int Rn, uint32_t offset = 0); + virtual void LDRH (int cc, int Rd, + int Rn, uint32_t offset = 0); + virtual void LDRSB(int cc, int Rd, + int Rn, uint32_t offset = 0); + virtual void LDRSH(int cc, int Rd, + int Rn, uint32_t offset = 0); + virtual void STRH (int cc, int Rd, + int Rn, uint32_t offset = 0); + + virtual void LDM(int cc, int dir, + int Rn, int W, uint32_t reg_list); + virtual void STM(int cc, int dir, + int Rn, int W, uint32_t reg_list); + + virtual void SWP(int cc, int Rn, int Rd, int Rm); + virtual void SWPB(int cc, int Rn, int Rd, int Rm); + virtual void SWI(int cc, uint32_t comment); + + virtual void PLD(int Rn, uint32_t offset); + virtual void CLZ(int cc, int Rd, int Rm); + virtual void QADD(int cc, int Rd, int Rm, int Rn); + virtual void QDADD(int cc, int Rd, int Rm, int Rn); + virtual void QSUB(int cc, int Rd, int Rm, int Rn); + virtual void QDSUB(int cc, int Rd, int Rm, int Rn); + virtual void SMUL(int cc, int xy, + int Rd, int Rm, int Rs); + virtual void SMULW(int cc, int y, + int Rd, int Rm, int Rs); + virtual void SMLA(int cc, int xy, + int Rd, int Rm, int Rs, int Rn); + virtual void SMLAL(int cc, int xy, + int RdHi, int RdLo, int Rs, int Rm); + virtual void SMLAW(int cc, int y, + int Rd, int Rm, int Rs, int Rn); + + // byte/half word extract... + virtual void UXTB16(int cc, int Rd, int Rm, int rotate); + + // bit manipulation... + virtual void UBFX(int cc, int Rd, int Rn, int lsb, int width); + + // this is some crap to share is MIPSAssembler class for debug + char * mArmDisassemblyBuffer; + int mArmLineLength; + int mArmInstrCount; + + int mInum; // current arm instuction number (0..n) + uint32_t** mArmPC; // array: PC for 1st mips instr of + // each translated ARM instr + + +private: + ArmToMipsAssembler(const ArmToMipsAssembler& rhs); + ArmToMipsAssembler& operator = (const ArmToMipsAssembler& rhs); + + void init_conditional_labels(void); + + void protectConditionalOperands(int Rd); + + // reg__tmp set to MIPS AT, reg 1 + int dataProcAdrModes(int op, int& source, bool sign = false, int reg_tmp = 1); + + sp<Assembly> mAssembly; + MIPSAssembler* mMips; + + + enum misc_constants_t { + ARM_MAX_INSTUCTIONS = 512 // based on ASSEMBLY_SCRATCH_SIZE + }; + + enum { + SRC_REG = 0, + SRC_IMM, + SRC_ERROR = -1 + }; + + enum addr_modes { + // start above the range of legal mips reg #'s (0-31) + AMODE_REG = 0x20, + AMODE_IMM, AMODE_REG_IMM, // for data processing + AMODE_IMM_12_PRE, AMODE_IMM_12_POST, // for load/store + AMODE_REG_SCALE_PRE, AMODE_IMM_8_PRE, + AMODE_IMM_8_POST, AMODE_REG_PRE, + AMODE_UNSUPPORTED + }; + + struct addr_mode_t { // address modes for current ARM instruction + int reg; + int stype; + uint32_t value; + bool writeback; // writeback the adr reg after modification + } amode; + + enum cond_types { + CMP_COND = 1, + SBIT_COND + }; + + struct cond_mode_t { // conditional-execution info for current ARM instruction + cond_types type; + int r1; + int r2; + int labelnum; + char label[100][10]; + } cond; + +}; + + + + +// ---------------------------------------------------------------------------- +// ---------------------------------------------------------------------------- +// ---------------------------------------------------------------------------- + +// This is the basic MIPS assembler, which just creates the opcodes in memory. +// All the more complicated work is done in ArmToMipsAssember above. + +class MIPSAssembler +{ +public: + MIPSAssembler(const sp<Assembly>& assembly, ArmToMipsAssembler *parent); + virtual ~MIPSAssembler(); + + uint32_t* base() const; + uint32_t* pc() const; + void reset(); + + void disassemble(const char* name); + + void prolog(); + void epilog(uint32_t touched); + int generate(const char* name); + void comment(const char* string); + void label(const char* string); + + // valid only after generate() has been called + uint32_t* pcForLabel(const char* label); + + + // ------------------------------------------------------------------------ + // MIPSAssemblerInterface... + // ------------------------------------------------------------------------ + +#if 0 +#pragma mark - +#pragma mark Arithmetic... +#endif + + void ADDU(int Rd, int Rs, int Rt); + void ADDIU(int Rt, int Rs, int16_t imm); + void SUBU(int Rd, int Rs, int Rt); + void SUBIU(int Rt, int Rs, int16_t imm); + void NEGU(int Rd, int Rs); + void MUL(int Rd, int Rs, int Rt); + void MULT(int Rs, int Rt); // dest is hi,lo + void MULTU(int Rs, int Rt); // dest is hi,lo + void MADD(int Rs, int Rt); // hi,lo = hi,lo + Rs * Rt + void MADDU(int Rs, int Rt); // hi,lo = hi,lo + Rs * Rt + void MSUB(int Rs, int Rt); // hi,lo = hi,lo - Rs * Rt + void MSUBU(int Rs, int Rt); // hi,lo = hi,lo - Rs * Rt + void SEB(int Rd, int Rt); // sign-extend byte (mips32r2) + void SEH(int Rd, int Rt); // sign-extend half-word (mips32r2) + + +#if 0 +#pragma mark - +#pragma mark Comparisons... +#endif + + void SLT(int Rd, int Rs, int Rt); + void SLTI(int Rt, int Rs, int16_t imm); + void SLTU(int Rd, int Rs, int Rt); + void SLTIU(int Rt, int Rs, int16_t imm); + + +#if 0 +#pragma mark - +#pragma mark Logical... +#endif + + void AND(int Rd, int Rs, int Rt); + void ANDI(int Rd, int Rs, uint16_t imm); + void OR(int Rd, int Rs, int Rt); + void ORI(int Rt, int Rs, uint16_t imm); + void NOR(int Rd, int Rs, int Rt); + void NOT(int Rd, int Rs); + void XOR(int Rd, int Rs, int Rt); + void XORI(int Rt, int Rs, uint16_t imm); + + void SLL(int Rd, int Rt, int shft); + void SLLV(int Rd, int Rt, int Rs); + void SRL(int Rd, int Rt, int shft); + void SRLV(int Rd, int Rt, int Rs); + void SRA(int Rd, int Rt, int shft); + void SRAV(int Rd, int Rt, int Rs); + void ROTR(int Rd, int Rt, int shft); // mips32r2 + void ROTRV(int Rd, int Rt, int Rs); // mips32r2 + void RORsyn(int Rd, int Rs, int Rt); // synthetic: d = s rotated by t + void RORIsyn(int Rd, int Rt, int rot); // synthetic: d = s rotated by immed + + void CLO(int Rd, int Rs); + void CLZ(int Rd, int Rs); + void WSBH(int Rd, int Rt); + + +#if 0 +#pragma mark - +#pragma mark Load/store... +#endif + + void LW(int Rt, int Rbase, int16_t offset); + void SW(int Rt, int Rbase, int16_t offset); + void LB(int Rt, int Rbase, int16_t offset); + void LBU(int Rt, int Rbase, int16_t offset); + void SB(int Rt, int Rbase, int16_t offset); + void LH(int Rt, int Rbase, int16_t offset); + void LHU(int Rt, int Rbase, int16_t offset); + void SH(int Rt, int Rbase, int16_t offset); + void LUI(int Rt, int16_t offset); + +#if 0 +#pragma mark - +#pragma mark Register moves... +#endif + + void MOVE(int Rd, int Rs); + void MOVN(int Rd, int Rs, int Rt); + void MOVZ(int Rd, int Rs, int Rt); + void MFHI(int Rd); + void MFLO(int Rd); + void MTHI(int Rs); + void MTLO(int Rs); + +#if 0 +#pragma mark - +#pragma mark Branch... +#endif + + void B(const char* label); + void BEQ(int Rs, int Rt, const char* label); + void BNE(int Rs, int Rt, const char* label); + void BGEZ(int Rs, const char* label); + void BGTZ(int Rs, const char* label); + void BLEZ(int Rs, const char* label); + void BLTZ(int Rs, const char* label); + void JR(int Rs); + + +#if 0 +#pragma mark - +#pragma mark Synthesized Branch... +#endif + + // synthetic variants of above (using slt & friends) + void BEQZ(int Rs, const char* label); + void BNEZ(int Rs, const char* label); + void BGE(int Rs, int Rt, const char* label); + void BGEU(int Rs, int Rt, const char* label); + void BGT(int Rs, int Rt, const char* label); + void BGTU(int Rs, int Rt, const char* label); + void BLE(int Rs, int Rt, const char* label); + void BLEU(int Rs, int Rt, const char* label); + void BLT(int Rs, int Rt, const char* label); + void BLTU(int Rs, int Rt, const char* label); + +#if 0 +#pragma mark - +#pragma mark Misc... +#endif + + void NOP(void); + void NOP2(void); + void UNIMPL(void); + + + + + +private: + void string_detab(char *s); + void string_pad(char *s, int padded_len); + + ArmToMipsAssembler *mParent; + sp<Assembly> mAssembly; + uint32_t* mBase; + uint32_t* mPC; + uint32_t* mPrologPC; + int64_t mDuration; +#if defined(WITH_LIB_HARDWARE) + bool mQemuTracing; +#endif + + struct branch_target_t { + inline branch_target_t() : label(0), pc(0) { } + inline branch_target_t(const char* l, uint32_t* p) + : label(l), pc(p) { } + const char* label; + uint32_t* pc; + }; + + Vector<branch_target_t> mBranchTargets; + KeyedVector< const char*, uint32_t* > mLabels; + KeyedVector< uint32_t*, const char* > mLabelsInverseMapping; + KeyedVector< uint32_t*, const char* > mComments; + + + + + // opcode field of all instructions + enum opcode_field { + spec_op, regimm_op, j_op, jal_op, // 00 + beq_op, bne_op, blez_op, bgtz_op, + addi_op, addiu_op, slti_op, sltiu_op, // 08 + andi_op, ori_op, xori_op, lui_op, + cop0_op, cop1_op, cop2_op, cop1x_op, // 10 + beql_op, bnel_op, blezl_op, bgtzl_op, + daddi_op, daddiu_op, ldl_op, ldr_op, // 18 + spec2_op, jalx_op, mdmx_op, spec3_op, + lb_op, lh_op, lwl_op, lw_op, // 20 + lbu_op, lhu_op, lwr_op, lwu_op, + sb_op, sh_op, swl_op, sw_op, // 28 + sdl_op, sdr_op, swr_op, cache_op, + ll_op, lwc1_op, lwc2_op, pref_op, // 30 + lld_op, ldc1_op, ldc2_op, ld_op, + sc_op, swc1_op, swc2_op, rsrv_3b_op, // 38 + scd_op, sdc1_op, sdc2_op, sd_op + }; + + + // func field for special opcode + enum func_spec_op { + sll_fn, movc_fn, srl_fn, sra_fn, // 00 + sllv_fn, pmon_fn, srlv_fn, srav_fn, + jr_fn, jalr_fn, movz_fn, movn_fn, // 08 + syscall_fn, break_fn, spim_fn, sync_fn, + mfhi_fn, mthi_fn, mflo_fn, mtlo_fn, // 10 + dsllv_fn, rsrv_spec_2, dsrlv_fn, dsrav_fn, + mult_fn, multu_fn, div_fn, divu_fn, // 18 + dmult_fn, dmultu_fn, ddiv_fn, ddivu_fn, + add_fn, addu_fn, sub_fn, subu_fn, // 20 + and_fn, or_fn, xor_fn, nor_fn, + rsrv_spec_3, rsrv_spec_4, slt_fn, sltu_fn, // 28 + dadd_fn, daddu_fn, dsub_fn, dsubu_fn, + tge_fn, tgeu_fn, tlt_fn, tltu_fn, // 30 + teq_fn, rsrv_spec_5, tne_fn, rsrv_spec_6, + dsll_fn, rsrv_spec_7, dsrl_fn, dsra_fn, // 38 + dsll32_fn, rsrv_spec_8, dsrl32_fn, dsra32_fn + }; + + // func field for spec2 opcode + enum func_spec2_op { + madd_fn, maddu_fn, mul_fn, rsrv_spec2_3, + msub_fn, msubu_fn, + clz_fn = 0x20, clo_fn, + dclz_fn = 0x24, dclo_fn, + sdbbp_fn = 0x3f + }; + + // func field for spec3 opcode + enum func_spec3_op { + ext_fn, dextm_fn, dextu_fn, dext_fn, + ins_fn, dinsm_fn, dinsu_fn, dins_fn, + bshfl_fn = 0x20, + dbshfl_fn = 0x24, + rdhwr_fn = 0x3b + }; + + // sa field for spec3 opcodes, with BSHFL function + enum func_spec3_bshfl { + wsbh_fn = 0x02, + seb_fn = 0x10, + seh_fn = 0x18 + }; + + // rt field of regimm opcodes. + enum regimm_fn { + bltz_fn, bgez_fn, bltzl_fn, bgezl_fn, + rsrv_ri_fn4, rsrv_ri_fn5, rsrv_ri_fn6, rsrv_ri_fn7, + tgei_fn, tgeiu_fn, tlti_fn, tltiu_fn, + teqi_fn, rsrv_ri_fn_0d, tnei_fn, rsrv_ri_fn0f, + bltzal_fn, bgezal_fn, bltzall_fn, bgezall_fn, + bposge32_fn= 0x1c, + synci_fn = 0x1f + }; + + + // func field for mad opcodes (MIPS IV). + enum mad_func { + madd_fp_op = 0x08, msub_fp_op = 0x0a, + nmadd_fp_op = 0x0c, nmsub_fp_op = 0x0e + }; + + + enum mips_inst_shifts { + OP_SHF = 26, + JTARGET_SHF = 0, + RS_SHF = 21, + RT_SHF = 16, + RD_SHF = 11, + RE_SHF = 6, + SA_SHF = RE_SHF, // synonym + IMM_SHF = 0, + FUNC_SHF = 0, + + // mask values + MSK_16 = 0xffff, + + + CACHEOP_SHF = 18, + CACHESEL_SHF = 16, + }; +}; + +enum mips_regnames { + R_zero = 0, + R_at, R_v0, R_v1, R_a0, R_a1, R_a2, R_a3, + R_t0, R_t1, R_t2, R_t3, R_t4, R_t5, R_t6, R_t7, + R_s0, R_s1, R_s2, R_s3, R_s4, R_s5, R_s6, R_s7, + R_t8, R_t9, R_k0, R_k1, R_gp, R_sp, R_s8, R_ra, + R_lr = R_s8, + + // arm regs 0-15 are mips regs 2-17 (meaning s0 & s1 are used) + R_at2 = R_s2, // R_at2 = 18 = s2 + R_cmp = R_s3, // R_cmp = 19 = s3 + R_cmp2 = R_s4 // R_cmp2 = 20 = s4 +}; + + + +}; // namespace android + +#endif //ANDROID_MIPSASSEMBLER_H diff --git a/libpixelflinger/codeflinger/load_store.cpp b/libpixelflinger/codeflinger/load_store.cpp index 62aa05c10..146fa52ea 100644 --- a/libpixelflinger/codeflinger/load_store.cpp +++ b/libpixelflinger/codeflinger/load_store.cpp @@ -110,7 +110,11 @@ void GGLAssembler::extract(integer_t& d, int s, int h, int l, int bits) { const int maskLen = h-l; +#ifdef __mips__ + assert(maskLen<=11); +#else assert(maskLen<=8); +#endif assert(h); #if __ARM_ARCH__ >= 7 diff --git a/libpixelflinger/codeflinger/mips_disassem.c b/libpixelflinger/codeflinger/mips_disassem.c new file mode 100644 index 000000000..4ab9bd35d --- /dev/null +++ b/libpixelflinger/codeflinger/mips_disassem.c @@ -0,0 +1,590 @@ +/* $NetBSD: db_disasm.c,v 1.19 2007/02/28 04:21:53 thorpej Exp $ */ + +/*- + * Copyright (c) 1991, 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Ralph Campbell. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * from: @(#)kadb.c 8.1 (Berkeley) 6/10/93 + */ + +#include <stdio.h> +#include <stdint.h> +#include <stdarg.h> +#include <stdbool.h> +#include <sys/cdefs.h> + +#include <sys/types.h> +#include "mips_opcode.h" + + +// #include <sys/systm.h> +// #include <sys/param.h> + +// #include <machine/reg.h> +// #include <machine/cpu.h> +/*#include <machine/param.h>*/ +// #include <machine/db_machdep.h> + +// #include <ddb/db_interface.h> +// #include <ddb/db_output.h> +// #include <ddb/db_extern.h> +// #include <ddb/db_sym.h> + + +static char *sprintf_buffer; +static int sprintf_buf_len; + + +typedef uint32_t db_addr_t; +static void db_printf(const char* fmt, ...); + +static const char * const op_name[64] = { +/* 0 */ "spec", "bcond","j ", "jal", "beq", "bne", "blez", "bgtz", +/* 8 */ "addi", "addiu","slti", "sltiu","andi", "ori", "xori", "lui", +/*16 */ "cop0", "cop1", "cop2", "cop3", "beql", "bnel", "blezl","bgtzl", +/*24 */ "daddi","daddiu","ldl", "ldr", "op34", "op35", "op36", "op37", +/*32 */ "lb ", "lh ", "lwl", "lw ", "lbu", "lhu", "lwr", "lwu", +/*40 */ "sb ", "sh ", "swl", "sw ", "sdl", "sdr", "swr", "cache", +/*48 */ "ll ", "lwc1", "lwc2", "lwc3", "lld", "ldc1", "ldc2", "ld ", +/*56 */ "sc ", "swc1", "swc2", "swc3", "scd", "sdc1", "sdc2", "sd " +}; + +static const char * const spec_name[64] = { +/* 0 */ "sll", "spec01","srl", "sra", "sllv", "spec05","srlv","srav", +/* 8 */ "jr", "jalr", "movz","movn","syscall","break","spec16","sync", +/*16 */ "mfhi", "mthi", "mflo", "mtlo", "dsllv","spec25","dsrlv","dsrav", +/*24 */ "mult", "multu","div", "divu", "dmult","dmultu","ddiv","ddivu", +/*32 */ "add", "addu", "sub", "subu", "and", "or ", "xor", "nor", +/*40 */ "spec50","spec51","slt","sltu", "dadd","daddu","dsub","dsubu", +/*48 */ "tge","tgeu","tlt","tltu","teq","spec65","tne","spec67", +/*56 */ "dsll","spec71","dsrl","dsra","dsll32","spec75","dsrl32","dsra32" +}; + +static const char * const spec2_name[64] = { /* QED RM4650, R5000, etc. */ +/* 0x00 */ "madd", "maddu", "mul", "spec3", "msub", "msubu", "rsrv6", "rsrv7", +/* 0x08 */ "rsrv", "rsrv", "rsrv", "rsrv", "rsrv", "rsrv", "rsrv", "rsrv", +/* 0x10 */ "rsrv", "rsrv", "rsrv", "rsrv", "rsrv", "rsrv", "rsrv", "rsrv", +/* 0x18 */ "rsrv", "rsrv", "rsrv", "rsrv", "rsrv", "rsrv", "rsrv", "rsrv", +/* 0x20 */ "clz", "clo", "rsrv", "rsrv", "dclz", "dclo", "rsrv", "rsrv", +/* 0x28 */ "rsrv", "rsrv", "rsrv", "rsrv", "rsrv", "rsrv", "rsrv", "rsrv", +/* 0x30 */ "rsrv", "rsrv", "rsrv", "rsrv", "rsrv", "rsrv", "rsrv", "rsrv", +/* 0x38 */ "rsrv", "rsrv", "rsrv", "resv", "rsrv", "rsrv", "rsrv", "sdbbp" +}; + +static const char * const bcond_name[32] = { +/* 0 */ "bltz", "bgez", "bltzl", "bgezl", "?", "?", "?", "?", +/* 8 */ "tgei", "tgeiu", "tlti", "tltiu", "teqi", "?", "tnei", "?", +/*16 */ "bltzal", "bgezal", "bltzall", "bgezall", "?", "?", "?", "?", +/*24 */ "?", "?", "?", "?", "?", "?", "?", "?", +}; + +static const char * const cop1_name[64] = { +/* 0 */ "fadd", "fsub", "fmpy", "fdiv", "fsqrt","fabs", "fmov", "fneg", +/* 8 */ "fop08","fop09","fop0a","fop0b","fop0c","fop0d","fop0e","fop0f", +/*16 */ "fop10","fop11","fop12","fop13","fop14","fop15","fop16","fop17", +/*24 */ "fop18","fop19","fop1a","fop1b","fop1c","fop1d","fop1e","fop1f", +/*32 */ "fcvts","fcvtd","fcvte","fop23","fcvtw","fop25","fop26","fop27", +/*40 */ "fop28","fop29","fop2a","fop2b","fop2c","fop2d","fop2e","fop2f", +/*48 */ "fcmp.f","fcmp.un","fcmp.eq","fcmp.ueq","fcmp.olt","fcmp.ult", + "fcmp.ole","fcmp.ule", +/*56 */ "fcmp.sf","fcmp.ngle","fcmp.seq","fcmp.ngl","fcmp.lt","fcmp.nge", + "fcmp.le","fcmp.ngt" +}; + +static const char * const fmt_name[16] = { + "s", "d", "e", "fmt3", + "w", "fmt5", "fmt6", "fmt7", + "fmt8", "fmt9", "fmta", "fmtb", + "fmtc", "fmtd", "fmte", "fmtf" +}; + +#if defined(__mips_n32) || defined(__mips_n64) +static char * const reg_name[32] = { + "zero", "at", "v0", "v1", "a0", "a1", "a2", "a3", + "a4", "a5", "a6", "a7", "t0", "t1", "t2", "t3", + "s0", "s1", "s2", "s3", "s4", "s5", "s6", "s7", + "t8", "t9", "k0", "k1", "gp", "sp", "s8", "ra" +}; +#else + +static char * alt_arm_reg_name[32] = { // hacked names for comparison with ARM code + "zero", "at", "r0", "r1", "r2", "r3", "r4", "r5", + "r6", "r7", "r8", "r9", "r10", "r11", "r12", "r13", + "r14", "r15", "at2", "cmp", "s4", "s5", "s6", "s7", + "t8", "t9", "k0", "k1", "gp", "sp", "s8", "ra" +}; + +static char * mips_reg_name[32] = { + "zero", "at", "v0", "v1", "a0", "a1", "a2", "a3", + "t0", "t1", "t2", "t3", "t4", "t5", "t6", "t7", + "s0", "s1", "s2", "s3", "s4", "s5", "s6", "s7", + "t8", "t9", "k0", "k1", "gp", "sp", "s8", "ra" +}; + +static char ** reg_name = &mips_reg_name[0]; + +#endif /* __mips_n32 || __mips_n64 */ + +static const char * const c0_opname[64] = { + "c0op00","tlbr", "tlbwi", "c0op03","c0op04","c0op05","tlbwr", "c0op07", + "tlbp", "c0op11","c0op12","c0op13","c0op14","c0op15","c0op16","c0op17", + "rfe", "c0op21","c0op22","c0op23","c0op24","c0op25","c0op26","c0op27", + "eret", "c0op31","c0op32","c0op33","c0op34","c0op35","c0op36","c0op37", + "c0op40","c0op41","c0op42","c0op43","c0op44","c0op45","c0op46","c0op47", + "c0op50","c0op51","c0op52","c0op53","c0op54","c0op55","c0op56","c0op57", + "c0op60","c0op61","c0op62","c0op63","c0op64","c0op65","c0op66","c0op67", + "c0op70","c0op71","c0op72","c0op73","c0op74","c0op75","c0op77","c0op77", +}; + +static const char * const c0_reg[32] = { + "index", "random", "tlblo0", "tlblo1", + "context", "pagemask", "wired", "cp0r7", + "badvaddr", "count", "tlbhi", "compare", + "status", "cause", "epc", "prid", + "config", "lladdr", "watchlo", "watchhi", + "xcontext", "cp0r21", "cp0r22", "debug", + "depc", "perfcnt", "ecc", "cacheerr", + "taglo", "taghi", "errepc", "desave" +}; + +static void print_addr(db_addr_t); +db_addr_t mips_disassem(db_addr_t loc, char *di_buffer, int alt_dis_format); + + +/* + * Disassemble instruction 'insn' nominally at 'loc'. + * 'loc' may in fact contain a breakpoint instruction. + */ +static db_addr_t +db_disasm_insn(int insn, db_addr_t loc, bool altfmt) +{ + bool bdslot = false; + InstFmt i; + + i.word = insn; + + switch (i.JType.op) { + case OP_SPECIAL: + if (i.word == 0) { + db_printf("nop"); + break; + } + if (i.word == 0x0080) { + db_printf("NIY"); + break; + } + if (i.word == 0x00c0) { + db_printf("NOT IMPL"); + break; + } + /* Special cases -------------------------------------------------- + * "addu" is a "move" only in 32-bit mode. What's the correct + * answer - never decode addu/daddu as "move"? + */ + if ( (i.RType.func == OP_ADDU && i.RType.rt == 0) || + (i.RType.func == OP_OR && i.RType.rt == 0) ) { + db_printf("move\t%s,%s", + reg_name[i.RType.rd], + reg_name[i.RType.rs]); + break; + } + // mips32r2, rotr & rotrv + if (i.RType.func == OP_SRL && (i.RType.rs & 1) == 1) { + db_printf("rotr\t%s,%s,%d", reg_name[i.RType.rd], + reg_name[i.RType.rt], i.RType.shamt); + break; + } + if (i.RType.func == OP_SRLV && (i.RType.shamt & 1) == 1) { + db_printf("rotrv\t%s,%s,%s", reg_name[i.RType.rd], + reg_name[i.RType.rt], reg_name[i.RType.rs]); + break; + } + + + db_printf("%s", spec_name[i.RType.func]); + switch (i.RType.func) { + case OP_SLL: + case OP_SRL: + case OP_SRA: + case OP_DSLL: + + case OP_DSRL: + case OP_DSRA: + case OP_DSLL32: + case OP_DSRL32: + case OP_DSRA32: + db_printf("\t%s,%s,%d", + reg_name[i.RType.rd], + reg_name[i.RType.rt], + i.RType.shamt); + break; + + case OP_SLLV: + case OP_SRLV: + case OP_SRAV: + case OP_DSLLV: + case OP_DSRLV: + case OP_DSRAV: + db_printf("\t%s,%s,%s", + reg_name[i.RType.rd], + reg_name[i.RType.rt], + reg_name[i.RType.rs]); + break; + + case OP_MFHI: + case OP_MFLO: + db_printf("\t%s", reg_name[i.RType.rd]); + break; + + case OP_JR: + case OP_JALR: + db_printf("\t%s", reg_name[i.RType.rs]); + bdslot = true; + break; + case OP_MTLO: + case OP_MTHI: + db_printf("\t%s", reg_name[i.RType.rs]); + break; + + case OP_MULT: + case OP_MULTU: + case OP_DMULT: + case OP_DMULTU: + case OP_DIV: + case OP_DIVU: + case OP_DDIV: + case OP_DDIVU: + db_printf("\t%s,%s", + reg_name[i.RType.rs], + reg_name[i.RType.rt]); + break; + + + case OP_SYSCALL: + case OP_SYNC: + break; + + case OP_BREAK: + db_printf("\t%d", (i.RType.rs << 5) | i.RType.rt); + break; + + default: + db_printf("\t%s,%s,%s", + reg_name[i.RType.rd], + reg_name[i.RType.rs], + reg_name[i.RType.rt]); + } + break; + + case OP_SPECIAL2: + if (i.RType.func == OP_MUL) + db_printf("%s\t%s,%s,%s", + spec2_name[i.RType.func & 0x3f], + reg_name[i.RType.rd], + reg_name[i.RType.rs], + reg_name[i.RType.rt]); + else + db_printf("%s\t%s,%s", + spec2_name[i.RType.func & 0x3f], + reg_name[i.RType.rs], + reg_name[i.RType.rt]); + + break; + + case OP_SPECIAL3: + if (i.RType.func == OP_EXT) + db_printf("ext\t%s,%s,%d,%d", + reg_name[i.RType.rt], + reg_name[i.RType.rs], + i.RType.rd+1, + i.RType.shamt); + else if (i.RType.func == OP_INS) + db_printf("ins\t%s,%s,%d,%d", + reg_name[i.RType.rt], + reg_name[i.RType.rs], + i.RType.rd+1, + i.RType.shamt); + else if (i.RType.func == OP_BSHFL && i.RType.shamt == OP_WSBH) + db_printf("wsbh\t%s,%s", + reg_name[i.RType.rd], + reg_name[i.RType.rt]); + else if (i.RType.func == OP_BSHFL && i.RType.shamt == OP_SEB) + db_printf("seb\t%s,%s", + reg_name[i.RType.rd], + reg_name[i.RType.rt]); + else if (i.RType.func == OP_BSHFL && i.RType.shamt == OP_SEH) + db_printf("seh\t%s,%s", + reg_name[i.RType.rd], + reg_name[i.RType.rt]); + else + db_printf("Unknown"); + break; + + case OP_BCOND: + db_printf("%s\t%s,", bcond_name[i.IType.rt], + reg_name[i.IType.rs]); + goto pr_displ; + + case OP_BLEZ: + case OP_BLEZL: + case OP_BGTZ: + case OP_BGTZL: + db_printf("%s\t%s,", op_name[i.IType.op], + reg_name[i.IType.rs]); + goto pr_displ; + + case OP_BEQ: + case OP_BEQL: + if (i.IType.rs == 0 && i.IType.rt == 0) { + db_printf("b \t"); + goto pr_displ; + } + /* FALLTHROUGH */ + case OP_BNE: + case OP_BNEL: + db_printf("%s\t%s,%s,", op_name[i.IType.op], + reg_name[i.IType.rs], + reg_name[i.IType.rt]); + pr_displ: + print_addr(loc + 4 + ((short)i.IType.imm << 2)); + bdslot = true; + break; + + case OP_COP0: + switch (i.RType.rs) { + case OP_BCx: + case OP_BCy: + + db_printf("bc0%c\t", + "ft"[i.RType.rt & COPz_BC_TF_MASK]); + goto pr_displ; + + case OP_MT: + db_printf("mtc0\t%s,%s", + reg_name[i.RType.rt], + c0_reg[i.RType.rd]); + break; + + case OP_DMT: + db_printf("dmtc0\t%s,%s", + reg_name[i.RType.rt], + c0_reg[i.RType.rd]); + break; + + case OP_MF: + db_printf("mfc0\t%s,%s", + reg_name[i.RType.rt], + c0_reg[i.RType.rd]); + break; + + case OP_DMF: + db_printf("dmfc0\t%s,%s", + reg_name[i.RType.rt], + c0_reg[i.RType.rd]); + break; + + default: + db_printf("%s", c0_opname[i.FRType.func]); + } + break; + + case OP_COP1: + switch (i.RType.rs) { + case OP_BCx: + case OP_BCy: + db_printf("bc1%c\t", + "ft"[i.RType.rt & COPz_BC_TF_MASK]); + goto pr_displ; + + case OP_MT: + db_printf("mtc1\t%s,f%d", + reg_name[i.RType.rt], + i.RType.rd); + break; + + case OP_MF: + db_printf("mfc1\t%s,f%d", + reg_name[i.RType.rt], + i.RType.rd); + break; + + case OP_CT: + db_printf("ctc1\t%s,f%d", + reg_name[i.RType.rt], + i.RType.rd); + break; + + case OP_CF: + db_printf("cfc1\t%s,f%d", + reg_name[i.RType.rt], + i.RType.rd); + break; + + default: + db_printf("%s.%s\tf%d,f%d,f%d", + cop1_name[i.FRType.func], + fmt_name[i.FRType.fmt], + i.FRType.fd, i.FRType.fs, i.FRType.ft); + } + break; + + case OP_J: + case OP_JAL: + db_printf("%s\t", op_name[i.JType.op]); + print_addr((loc & 0xF0000000) | (i.JType.target << 2)); + bdslot = true; + break; + + case OP_LWC1: + case OP_SWC1: + db_printf("%s\tf%d,", op_name[i.IType.op], + i.IType.rt); + goto loadstore; + + case OP_LB: + case OP_LH: + case OP_LW: + case OP_LD: + case OP_LBU: + case OP_LHU: + case OP_LWU: + case OP_SB: + case OP_SH: + case OP_SW: + case OP_SD: + db_printf("%s\t%s,", op_name[i.IType.op], + reg_name[i.IType.rt]); + loadstore: + db_printf("%d(%s)", (short)i.IType.imm, + reg_name[i.IType.rs]); + break; + + case OP_ORI: + case OP_XORI: + if (i.IType.rs == 0) { + db_printf("li\t%s,0x%x", + reg_name[i.IType.rt], + i.IType.imm); + break; + } + /* FALLTHROUGH */ + case OP_ANDI: + db_printf("%s\t%s,%s,0x%x", op_name[i.IType.op], + reg_name[i.IType.rt], + reg_name[i.IType.rs], + i.IType.imm); + break; + + case OP_LUI: + db_printf("%s\t%s,0x%x", op_name[i.IType.op], + reg_name[i.IType.rt], + i.IType.imm); + break; + + case OP_CACHE: + db_printf("%s\t0x%x,0x%x(%s)", + op_name[i.IType.op], + i.IType.rt, + i.IType.imm, + reg_name[i.IType.rs]); + break; + + case OP_ADDI: + case OP_DADDI: + case OP_ADDIU: + case OP_DADDIU: + if (i.IType.rs == 0) { + db_printf("li\t%s,%d", + reg_name[i.IType.rt], + (short)i.IType.imm); + break; + } + /* FALLTHROUGH */ + default: + db_printf("%s\t%s,%s,%d", op_name[i.IType.op], + reg_name[i.IType.rt], + reg_name[i.IType.rs], + (short)i.IType.imm); + } + // db_printf("\n"); + // if (bdslot) { + // db_printf(" bd: "); + // mips_disassem(loc+4); + // return (loc + 8); + // } + return (loc + 4); +} + +static void +print_addr(db_addr_t loc) +{ + db_printf("0x%08x", loc); +} + + + +static void db_printf(const char* fmt, ...) +{ + int cnt; + va_list argp; + va_start(argp, fmt); + if (sprintf_buffer) { + cnt = vsnprintf(sprintf_buffer, sprintf_buf_len, fmt, argp); + sprintf_buffer += cnt; + sprintf_buf_len -= cnt; + } else { + vprintf(fmt, argp); + } +} + + +/* + * Disassemble instruction at 'loc'. + * Return address of start of next instruction. + * Since this function is used by 'examine' and by 'step' + * "next instruction" does NOT mean the next instruction to + * be executed but the 'linear' next instruction. + */ +db_addr_t +mips_disassem(db_addr_t loc, char *di_buffer, int alt_dis_format) +{ + u_int32_t instr; + + if (alt_dis_format) { // use ARM register names for disassembly + reg_name = &alt_arm_reg_name[0]; + } + + sprintf_buffer = di_buffer; // quick 'n' dirty printf() vs sprintf() + sprintf_buf_len = 39; // should be passed in + + instr = *(u_int32_t *)loc; + return (db_disasm_insn(instr, loc, false)); +} + diff --git a/libpixelflinger/codeflinger/mips_disassem.h b/libpixelflinger/codeflinger/mips_disassem.h new file mode 100644 index 000000000..2d5b7f551 --- /dev/null +++ b/libpixelflinger/codeflinger/mips_disassem.h @@ -0,0 +1,66 @@ +/* $NetBSD: db_disasm.c,v 1.19 2007/02/28 04:21:53 thorpej Exp $ */ + +/*- + * Copyright (c) 1991, 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Ralph Campbell. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * from: @(#)kadb.c 8.1 (Berkeley) 6/10/93 + */ + + + +#ifndef ANDROID_MIPS_DISASSEM_H +#define ANDROID_MIPS_DISASSEM_H + +#include <sys/types.h> + +#if __cplusplus +extern "C" { +#endif + + +// could add an interface like this, but I have not +// typedef struct { +// u_int (*di_readword)(u_int); +// void (*di_printaddr)(u_int); +// void (*di_printf)(const char *, ...); +// } disasm_interface_t; + +/* Prototypes for callable functions */ + +// u_int disasm(const disasm_interface_t *, u_int, int); + +void mips_disassem(uint32_t *location, char *di_buffer, int alt_fmt); + +#if __cplusplus +} +#endif + +#endif /* !ANDROID_MIPS_DISASSEM_H */ diff --git a/libpixelflinger/codeflinger/mips_opcode.h b/libpixelflinger/codeflinger/mips_opcode.h new file mode 100644 index 000000000..7ed5ef579 --- /dev/null +++ b/libpixelflinger/codeflinger/mips_opcode.h @@ -0,0 +1,316 @@ +/* $NetBSD: mips_opcode.h,v 1.12 2005/12/11 12:18:09 christos Exp $ */ + +/*- + * Copyright (c) 1992, 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Ralph Campbell. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)mips_opcode.h 8.1 (Berkeley) 6/10/93 + */ + +/* + * Define the instruction formats and opcode values for the + * MIPS instruction set. + */ + +#include <endian.h> + +/* + * Define the instruction formats. + */ +typedef union { + unsigned word; + +#if BYTE_ORDER == LITTLE_ENDIAN + struct { + unsigned imm: 16; + unsigned rt: 5; + unsigned rs: 5; + unsigned op: 6; + } IType; + + struct { + unsigned target: 26; + unsigned op: 6; + } JType; + + struct { + unsigned func: 6; + unsigned shamt: 5; + unsigned rd: 5; + unsigned rt: 5; + unsigned rs: 5; + unsigned op: 6; + } RType; + + struct { + unsigned func: 6; + unsigned fd: 5; + unsigned fs: 5; + unsigned ft: 5; + unsigned fmt: 4; + unsigned : 1; /* always '1' */ + unsigned op: 6; /* always '0x11' */ + } FRType; +#endif +#if BYTE_ORDER == BIG_ENDIAN + struct { + unsigned op: 6; + unsigned rs: 5; + unsigned rt: 5; + unsigned imm: 16; + } IType; + + struct { + unsigned op: 6; + unsigned target: 26; + } JType; + + struct { + unsigned op: 6; + unsigned rs: 5; + unsigned rt: 5; + unsigned rd: 5; + unsigned shamt: 5; + unsigned func: 6; + } RType; + + struct { + unsigned op: 6; /* always '0x11' */ + unsigned : 1; /* always '1' */ + unsigned fmt: 4; + unsigned ft: 5; + unsigned fs: 5; + unsigned fd: 5; + unsigned func: 6; + } FRType; +#endif +} InstFmt; + +/* + * Values for the 'op' field. + */ +#define OP_SPECIAL 000 +#define OP_BCOND 001 +#define OP_J 002 +#define OP_JAL 003 +#define OP_BEQ 004 +#define OP_BNE 005 +#define OP_BLEZ 006 +#define OP_BGTZ 007 + +#define OP_ADDI 010 +#define OP_ADDIU 011 +#define OP_SLTI 012 +#define OP_SLTIU 013 +#define OP_ANDI 014 +#define OP_ORI 015 +#define OP_XORI 016 +#define OP_LUI 017 + +#define OP_COP0 020 +#define OP_COP1 021 +#define OP_COP2 022 +#define OP_COP3 023 +#define OP_BEQL 024 /* MIPS-II, for r4000 port */ +#define OP_BNEL 025 /* MIPS-II, for r4000 port */ +#define OP_BLEZL 026 /* MIPS-II, for r4000 port */ +#define OP_BGTZL 027 /* MIPS-II, for r4000 port */ + +#define OP_DADDI 030 /* MIPS-II, for r4000 port */ +#define OP_DADDIU 031 /* MIPS-II, for r4000 port */ +#define OP_LDL 032 /* MIPS-II, for r4000 port */ +#define OP_LDR 033 /* MIPS-II, for r4000 port */ + +#define OP_SPECIAL2 034 /* QED opcodes */ +#define OP_SPECIAL3 037 /* mips32r2 opcodes */ + +#define OP_LB 040 +#define OP_LH 041 +#define OP_LWL 042 +#define OP_LW 043 +#define OP_LBU 044 +#define OP_LHU 045 +#define OP_LWR 046 +#define OP_LHU 045 +#define OP_LWR 046 +#define OP_LWU 047 /* MIPS-II, for r4000 port */ + +#define OP_SB 050 +#define OP_SH 051 +#define OP_SWL 052 +#define OP_SW 053 +#define OP_SDL 054 /* MIPS-II, for r4000 port */ +#define OP_SDR 055 /* MIPS-II, for r4000 port */ +#define OP_SWR 056 +#define OP_CACHE 057 /* MIPS-II, for r4000 port */ + +#define OP_LL 060 +#define OP_LWC0 OP_LL /* backwards source compatibility */ +#define OP_LWC1 061 +#define OP_LWC2 062 +#define OP_LWC3 063 +#define OP_LLD 064 /* MIPS-II, for r4000 port */ +#define OP_LDC1 065 +#define OP_LD 067 /* MIPS-II, for r4000 port */ + +#define OP_SC 070 +#define OP_SWC0 OP_SC /* backwards source compatibility */ +#define OP_SWC1 071 +#define OP_SWC2 072 +#define OP_SWC3 073 +#define OP_SCD 074 /* MIPS-II, for r4000 port */ +#define OP_SDC1 075 +#define OP_SD 077 /* MIPS-II, for r4000 port */ + +/* + * Values for the 'func' field when 'op' == OP_SPECIAL. + */ +#define OP_SLL 000 +#define OP_SRL 002 +#define OP_SRA 003 +#define OP_SLLV 004 +#define OP_SRLV 006 +#define OP_SRAV 007 + +#define OP_JR 010 +#define OP_JALR 011 +#define OP_SYSCALL 014 +#define OP_BREAK 015 +#define OP_SYNC 017 /* MIPS-II, for r4000 port */ + +#define OP_MFHI 020 +#define OP_MTHI 021 +#define OP_MFLO 022 +#define OP_MTLO 023 +#define OP_DSLLV 024 /* MIPS-II, for r4000 port */ +#define OP_DSRLV 026 /* MIPS-II, for r4000 port */ +#define OP_DSRAV 027 /* MIPS-II, for r4000 port */ + +#define OP_MULT 030 +#define OP_MULTU 031 +#define OP_DIV 032 +#define OP_DIVU 033 +#define OP_DMULT 034 /* MIPS-II, for r4000 port */ +#define OP_DMULTU 035 /* MIPS-II, for r4000 port */ +#define OP_DDIV 036 /* MIPS-II, for r4000 port */ +#define OP_DDIVU 037 /* MIPS-II, for r4000 port */ + +#define OP_ADD 040 +#define OP_ADDU 041 +#define OP_SUB 042 +#define OP_SUBU 043 +#define OP_AND 044 +#define OP_OR 045 +#define OP_XOR 046 +#define OP_NOR 047 + +#define OP_SLT 052 +#define OP_SLTU 053 +#define OP_DADD 054 /* MIPS-II, for r4000 port */ +#define OP_DADDU 055 /* MIPS-II, for r4000 port */ +#define OP_DSUB 056 /* MIPS-II, for r4000 port */ +#define OP_DSUBU 057 /* MIPS-II, for r4000 port */ + +#define OP_TGE 060 /* MIPS-II, for r4000 port */ +#define OP_TGEU 061 /* MIPS-II, for r4000 port */ +#define OP_TLT 062 /* MIPS-II, for r4000 port */ +#define OP_TLTU 063 /* MIPS-II, for r4000 port */ +#define OP_TEQ 064 /* MIPS-II, for r4000 port */ +#define OP_TNE 066 /* MIPS-II, for r4000 port */ + +#define OP_DSLL 070 /* MIPS-II, for r4000 port */ +#define OP_DSRL 072 /* MIPS-II, for r4000 port */ +#define OP_DSRA 073 /* MIPS-II, for r4000 port */ +#define OP_DSLL32 074 /* MIPS-II, for r4000 port */ +#define OP_DSRL32 076 /* MIPS-II, for r4000 port */ +#define OP_DSRA32 077 /* MIPS-II, for r4000 port */ + +/* + * Values for the 'func' field when 'op' == OP_SPECIAL2. + */ +#define OP_MAD 000 /* QED */ +#define OP_MADU 001 /* QED */ +#define OP_MUL 002 /* QED */ + +/* + * Values for the 'func' field when 'op' == OP_SPECIAL3. + */ +#define OP_EXT 000 +#define OP_INS 004 +#define OP_BSHFL 040 + +/* + * Values for the 'shamt' field when OP_SPECIAL3 && func OP_BSHFL. + */ +#define OP_WSBH 002 +#define OP_SEB 020 +#define OP_SEH 030 + +/* + * Values for the 'func' field when 'op' == OP_BCOND. + */ +#define OP_BLTZ 000 +#define OP_BGEZ 001 +#define OP_BLTZL 002 /* MIPS-II, for r4000 port */ +#define OP_BGEZL 003 /* MIPS-II, for r4000 port */ + +#define OP_TGEI 010 /* MIPS-II, for r4000 port */ +#define OP_TGEIU 011 /* MIPS-II, for r4000 port */ +#define OP_TLTI 012 /* MIPS-II, for r4000 port */ +#define OP_TLTIU 013 /* MIPS-II, for r4000 port */ +#define OP_TEQI 014 /* MIPS-II, for r4000 port */ +#define OP_TNEI 016 /* MIPS-II, for r4000 port */ + +#define OP_BLTZAL 020 /* MIPS-II, for r4000 port */ +#define OP_BGEZAL 021 +#define OP_BLTZALL 022 +#define OP_BGEZALL 023 + +/* + * Values for the 'rs' field when 'op' == OP_COPz. + */ +#define OP_MF 000 +#define OP_DMF 001 /* MIPS-II, for r4000 port */ +#define OP_MT 004 +#define OP_DMT 005 /* MIPS-II, for r4000 port */ +#define OP_BCx 010 +#define OP_BCy 014 +#define OP_CF 002 +#define OP_CT 006 + +/* + * Values for the 'rt' field when 'op' == OP_COPz. + */ +#define COPz_BC_TF_MASK 0x01 +#define COPz_BC_TRUE 0x01 +#define COPz_BC_FALSE 0x00 +#define COPz_BCL_TF_MASK 0x02 /* MIPS-II, for r4000 port */ +#define COPz_BCL_TRUE 0x02 /* MIPS-II, for r4000 port */ +#define COPz_BCL_FALSE 0x00 /* MIPS-II, for r4000 port */ diff --git a/libpixelflinger/codeflinger/texturing.cpp b/libpixelflinger/codeflinger/texturing.cpp index 8464fbd84..4d5a50f1d 100644 --- a/libpixelflinger/codeflinger/texturing.cpp +++ b/libpixelflinger/codeflinger/texturing.cpp @@ -464,6 +464,9 @@ void GGLAssembler::build_textures( fragment_parts_t& parts, CONTEXT_LOAD(t.reg, generated_vars.texture[i].spill[1]); } + if (registerFile().status() & RegisterFile::OUT_OF_REGISTERS) + return; + comment("compute repeat/clamp"); int u = scratches.obtain(); int v = scratches.obtain(); @@ -472,6 +475,9 @@ void GGLAssembler::build_textures( fragment_parts_t& parts, int U = 0; int V = 0; + if (registerFile().status() & RegisterFile::OUT_OF_REGISTERS) + return; + CONTEXT_LOAD(width, generated_vars.texture[i].width); CONTEXT_LOAD(height, generated_vars.texture[i].height); @@ -510,6 +516,9 @@ void GGLAssembler::build_textures( fragment_parts_t& parts, U = scratches.obtain(); V = scratches.obtain(); + if (registerFile().status() & RegisterFile::OUT_OF_REGISTERS) + return; + // sample the texel center SUB(AL, 0, u, u, imm(1<<(FRAC_BITS-1))); SUB(AL, 0, v, v, imm(1<<(FRAC_BITS-1))); @@ -593,6 +602,10 @@ void GGLAssembler::build_textures( fragment_parts_t& parts, comment("iterate s,t"); int dsdx = scratches.obtain(); int dtdx = scratches.obtain(); + + if (registerFile().status() & RegisterFile::OUT_OF_REGISTERS) + return; + CONTEXT_LOAD(dsdx, generated_vars.texture[i].dsdx); CONTEXT_LOAD(dtdx, generated_vars.texture[i].dtdx); ADD(AL, 0, s.reg, s.reg, dsdx); @@ -611,6 +624,10 @@ void GGLAssembler::build_textures( fragment_parts_t& parts, texel.setTo(regs.obtain(), &tmu.format); txPtr.setTo(texel.reg, tmu.bits); int stride = scratches.obtain(); + + if (registerFile().status() & RegisterFile::OUT_OF_REGISTERS) + return; + CONTEXT_LOAD(stride, generated_vars.texture[i].stride); CONTEXT_LOAD(txPtr.reg, generated_vars.texture[i].data); SMLABB(AL, u, v, stride, u); // u+v*stride @@ -1078,6 +1095,7 @@ void GGLAssembler::build_texture_environment( Scratch scratches(registerFile()); pixel_t texel(parts.texel[i]); + if (multiTexture && tmu.swrap == GGL_NEEDS_WRAP_11 && tmu.twrap == GGL_NEEDS_WRAP_11) diff --git a/libpixelflinger/scanline.cpp b/libpixelflinger/scanline.cpp index d1f3d9677..a5d28b276 100644 --- a/libpixelflinger/scanline.cpp +++ b/libpixelflinger/scanline.cpp @@ -32,6 +32,9 @@ #include "codeflinger/CodeCache.h" #include "codeflinger/GGLAssembler.h" #include "codeflinger/ARMAssembler.h" +#if defined(__mips__) +#include "codeflinger/MIPSAssembler.h" +#endif //#include "codeflinger/ARMAssemblerOptimizer.h" // ---------------------------------------------------------------------------- @@ -49,7 +52,7 @@ # define ANDROID_CODEGEN ANDROID_CODEGEN_GENERATED #endif -#if defined(__arm__) +#if defined(__arm__) || defined(__mips__) # define ANDROID_ARM_CODEGEN 1 #else # define ANDROID_ARM_CODEGEN 0 @@ -63,7 +66,11 @@ */ #define DEBUG_NEEDS 0 +#ifdef __mips__ +#define ASSEMBLY_SCRATCH_SIZE 4096 +#else #define ASSEMBLY_SCRATCH_SIZE 2048 +#endif // ---------------------------------------------------------------------------- namespace android { @@ -266,7 +273,12 @@ static const needs_filter_t fill16noblend = { // ---------------------------------------------------------------------------- #if ANDROID_ARM_CODEGEN + +#if defined(__mips__) +static CodeCache gCodeCache(32 * 1024); +#else static CodeCache gCodeCache(12 * 1024); +#endif class ScanlineAssembly : public Assembly { AssemblyKey<needs_t> mKey; @@ -375,9 +387,14 @@ static void pick_scanline(context_t* c) sp<ScanlineAssembly> a = new ScanlineAssembly(c->state.needs, ASSEMBLY_SCRATCH_SIZE); // initialize our assembler +#if defined(__arm__) GGLAssembler assembler( new ARMAssembler(a) ); //GGLAssembler assembler( // new ARMAssemblerOptimizer(new ARMAssembler(a)) ); +#endif +#if defined(__mips__) + GGLAssembler assembler( new ArmToMipsAssembler(a) ); +#endif // generate the scanline code for the given needs int err = assembler.scanline(c->state.needs, c); if (ggl_likely(!err)) { diff --git a/libpixelflinger/tests/codegen/codegen.cpp b/libpixelflinger/tests/codegen/codegen.cpp index 94e24810d..3d5a040ef 100644 --- a/libpixelflinger/tests/codegen/codegen.cpp +++ b/libpixelflinger/tests/codegen/codegen.cpp @@ -9,14 +9,19 @@ #include "codeflinger/CodeCache.h" #include "codeflinger/GGLAssembler.h" #include "codeflinger/ARMAssembler.h" +#include "codeflinger/MIPSAssembler.h" -#if defined(__arm__) +#if defined(__arm__) || defined(__mips__) # define ANDROID_ARM_CODEGEN 1 #else # define ANDROID_ARM_CODEGEN 0 #endif +#if defined (__mips__) +#define ASSEMBLY_SCRATCH_SIZE 4096 +#else #define ASSEMBLY_SCRATCH_SIZE 2048 +#endif using namespace android; @@ -39,14 +44,22 @@ static void ggl_test_codegen(uint32_t n, uint32_t p, uint32_t t0, uint32_t t1) needs.t[0] = t0; needs.t[1] = t1; sp<ScanlineAssembly> a(new ScanlineAssembly(needs, ASSEMBLY_SCRATCH_SIZE)); + +#if defined(__arm__) GGLAssembler assembler( new ARMAssembler(a) ); +#endif + +#if defined(__mips__) + GGLAssembler assembler( new ArmToMipsAssembler(a) ); +#endif + int err = assembler.scanline(needs, (context_t*)c); if (err != 0) { printf("error %08x (%s)\n", err, strerror(-err)); } gglUninit(c); #else - printf("This test runs only on ARM\n"); + printf("This test runs only on ARM or MIPS\n"); #endif } |