diff options
Diffstat (limited to 'gcc-4.4.3/gcc/config/rs6000/cell.md')
-rw-r--r-- | gcc-4.4.3/gcc/config/rs6000/cell.md | 400 |
1 files changed, 0 insertions, 400 deletions
diff --git a/gcc-4.4.3/gcc/config/rs6000/cell.md b/gcc-4.4.3/gcc/config/rs6000/cell.md deleted file mode 100644 index 3fffd2740..000000000 --- a/gcc-4.4.3/gcc/config/rs6000/cell.md +++ /dev/null @@ -1,400 +0,0 @@ -;; Scheduling description for cell processor. -;; Copyright (C) 2001, 2002, 2003, 2004, 2005, 2006, 2007 -;; Free Software Foundation, Inc. -;; Contributed by Sony Computer Entertainment, Inc., - - -;; This file is free software; you can redistribute it and/or modify it under -;; the terms of the GNU General Public License as published by the Free -;; Software Foundation; either version 3 of the License, or (at your option) -;; any later version. - -;; This file is distributed in the hope that it will be useful, but WITHOUT -;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -;; FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -;; for more details. - -;; You should have received a copy of the GNU General Public License -;; along with GCC; see the file COPYING3. If not see -;; <http://www.gnu.org/licenses/>. - -;; Sources: BE BOOK4 (/sfs/enc/doc/PPU_BookIV_DD3.0_latest.pdf) - -;; BE Architecture *DD3.0 and DD3.1* -;; This file simulate PPU processor unit backend of pipeline, maualP24. -;; manual P27, stall and flush points -;; IU, XU, VSU, dispatcher decodes and dispatch 2 insns per cycle in program -;; order, the grouped address are aligned by 8 -;; This file only simulate one thread situation -;; XU executes all fixed point insns(3 units, a simple alu, a complex unit, -;; and load/store unit) -;; VSU executes all scalar floating points insn(a float unit), -;; VMX insns(VMX unit, 4 sub units, simple, permute, complex, floating point) - -;; Dual issue combination - -;; FXU LSU BR VMX VMX -;; (sx,cx,vsu_fp,fp_arith) (perm,vsu_ls,fp_ls) -;;FXU X -;;LSU X X X -;;BR X -;;VMX(sx,cx,vsu_fp,fp_arth) X -;;VMX(perm,vsu_ls, fp_ls) X -;; X are illegal combination. - -;; Dual issue exceptions: -;;(1) nop-pipelined FXU instr in slot 0 -;;(2) non-pipelined FPU inst in slot 0 -;; CSI instr(contex-synchronizing insn) -;; Microcode insn - -;; BRU unit: bru(none register stall), bru_cr(cr register stall) -;; VSU unit: vus(vmx simple), vup(vmx permute), vuc(vmx complex), -;; vuf(vmx float), fpu(floats). fpu_div is hypothetical, it is for -;; nonpipelined simulation -;; micr insns will stall at least 7 cycles to get the first instr from ROM, -;; micro instructions are not dual issued. - -;; slot0 is older than slot1 -;; non-pipelined insn need to be in slot1 to avoid 1cycle stall - -;; There different stall point -;; IB2, only stall one thread if stall here, so try to stall here as much as -;; we can -;; condition(1) insert nop, OR and ORI instruction form -;; condition(2) flush happens, in case of: RAW, WAW, D-ERAT miss, or -;; CR0-access while stdcx, or stwcx -;; IS2 stall ;; Page91 for details -;; VQ8 stall -;; IS2 stall can be activated by VQ8 stall and trying to issue a vsu instr to -;; the vsu issue queue - -;;(define_automaton "cellxu") - -;;(define_cpu_unit "fxu_cell,lsu_cell,bru_cell,vsu1_cell,vsu2_cell" "cellxu") - -;; ndfa -(define_automaton "cellxu,cellvsu,cellbru,cell_mis") - -(define_cpu_unit "fxu_cell,lsu_cell" "cellxu") -(define_cpu_unit "bru_cell" "cellbru") -(define_cpu_unit "vsu1_cell,vsu2_cell" "cellvsu") - -(define_cpu_unit "slot0,slot1" "cell_mis") - -(absence_set "slot0" "slot1") - -(define_reservation "nonpipeline" "fxu_cell+lsu_cell+vsu1_cell+vsu2_cell") -(define_reservation "slot01" "slot0|slot1") - - -;; Load/store -;; lmw, lswi, lswx are only generated for optimize for space, MC, -;; these instr are not simulated -(define_insn_reservation "cell-load" 2 - (and (eq_attr "type" "load") - (eq_attr "cpu" "cell")) - "slot01,lsu_cell") - -;; ldux, ldu, lbzux, lbzu, hardware breaks it down to two instrs, -;; if with 32bytes alignment, CMC -(define_insn_reservation "cell-load-ux" 2 - (and (eq_attr "type" "load_ux,load_u") - (eq_attr "cpu" "cell")) - "slot01,fxu_cell+lsu_cell") - -;; lha, lhax, lhau, lhaux, lwa, lwax, lwaux, MC, latency unknown -;; 11/7, 11/8, 11/12 -(define_insn_reservation "cell-load-ext" 2 - (and (eq_attr "type" "load_ext,load_ext_u,load_ext_ux") - (eq_attr "cpu" "cell")) - "slot01,fxu_cell+lsu_cell") - -;;lfs,lfsx,lfd,lfdx, 1 cycle -(define_insn_reservation "cell-fpload" 1 - (and (eq_attr "type" "fpload") - (eq_attr "cpu" "cell")) - "vsu2_cell+lsu_cell+slot01") - -;; lfsu,lfsux,lfdu,lfdux 1cycle(fpr) 2 cycle(gpr) -(define_insn_reservation "cell-fpload-update" 1 - (and (eq_attr "type" "fpload,fpload_u,fpload_ux") - (eq_attr "cpu" "cell")) - "fxu_cell+vsu2_cell+lsu_cell+slot01") - -(define_insn_reservation "cell-vecload" 2 - (and (eq_attr "type" "vecload") - (eq_attr "cpu" "cell")) - "slot01,vsu2_cell+lsu_cell") - -;;st? stw(MC) -(define_insn_reservation "cell-store" 1 - (and (eq_attr "type" "store") - (eq_attr "cpu" "cell")) - "lsu_cell+slot01") - -;;stdux, stdu, (hardware breaks into store and add) 2 for update reg -(define_insn_reservation "cell-store-update" 1 - (and (eq_attr "type" "store_ux,store_u") - (eq_attr "cpu" "cell")) - "fxu_cell+lsu_cell+slot01") - -(define_insn_reservation "cell-fpstore" 1 - (and (eq_attr "type" "fpstore") - (eq_attr "cpu" "cell")) - "vsu2_cell+lsu_cell+slot01") - -(define_insn_reservation "cell-fpstore-update" 1 - (and (eq_attr "type" "fpstore_ux,fpstore_u") - (eq_attr "cpu" "cell")) - "vsu2_cell+fxu_cell+lsu_cell+slot01") - -(define_insn_reservation "cell-vecstore" 1 - (and (eq_attr "type" "vecstore") - (eq_attr "cpu" "cell")) - "vsu2_cell+lsu_cell+slot01") - -;; Integer latency is 2 cycles -(define_insn_reservation "cell-integer" 2 - (and (eq_attr "type" "integer,insert_dword,shift,trap,\ - var_shift_rotate,cntlz,exts") - (eq_attr "cpu" "cell")) - "slot01,fxu_cell") - -;; Two integer latency is 4 cycles -(define_insn_reservation "cell-two" 4 - (and (eq_attr "type" "two") - (eq_attr "cpu" "cell")) - "slot01,fxu_cell,fxu_cell*2") - -;; Three integer latency is 6 cycles -(define_insn_reservation "cell-three" 6 - (and (eq_attr "type" "three") - (eq_attr "cpu" "cell")) - "slot01,fxu_cell,fxu_cell*4") - -;; rlwimi, alter cr0 -(define_insn_reservation "cell-insert" 2 - (and (eq_attr "type" "insert_word") - (eq_attr "cpu" "cell")) - "slot01,fxu_cell") - -;; cmpi, cmpli, cmpla, add, addo, sub, subo, alter cr0 -(define_insn_reservation "cell-cmp" 1 - (and (eq_attr "type" "cmp") - (eq_attr "cpu" "cell")) - "fxu_cell+slot01") - -;; add, addo, sub, subo, alter cr0, rldcli, rlwinm -(define_insn_reservation "cell-fast-cmp" 2 - (and (and (eq_attr "type" "fast_compare,delayed_compare,compare,\ - var_delayed_compare") - (eq_attr "cpu" "cell")) - (eq_attr "cell_micro" "not")) - "slot01,fxu_cell") - -(define_insn_reservation "cell-cmp-microcoded" 9 - (and (and (eq_attr "type" "fast_compare,delayed_compare,compare,\ - var_delayed_compare") - (eq_attr "cpu" "cell")) - (eq_attr "cell_micro" "always")) - "slot0+slot1,fxu_cell,fxu_cell*7") - -;; mulld -(define_insn_reservation "cell-lmul" 15 - (and (eq_attr "type" "lmul") - (eq_attr "cpu" "cell")) - "slot1,nonpipeline,nonpipeline*13") - -;; mulld. is microcoded -(define_insn_reservation "cell-lmul-cmp" 22 - (and (eq_attr "type" "lmul_compare") - (eq_attr "cpu" "cell")) - "slot0+slot1,nonpipeline,nonpipeline*20") - -;; mulli, 6 cycles -(define_insn_reservation "cell-imul23" 6 - (and (eq_attr "type" "imul2,imul3") - (eq_attr "cpu" "cell")) - "slot1,nonpipeline,nonpipeline*4") - -;; mullw, 9 -(define_insn_reservation "cell-imul" 9 - (and (eq_attr "type" "imul") - (eq_attr "cpu" "cell")) - "slot1,nonpipeline,nonpipeline*7") - -;; divide -(define_insn_reservation "cell-idiv" 32 - (and (eq_attr "type" "idiv") - (eq_attr "cpu" "cell")) - "slot1,nonpipeline,nonpipeline*30") - -(define_insn_reservation "cell-ldiv" 64 - (and (eq_attr "type" "ldiv") - (eq_attr "cpu" "cell")) - "slot1,nonpipeline,nonpipeline*62") - -;;mflr and mfctr are pipelined -(define_insn_reservation "cell-mfjmpr" 1 - (and (eq_attr "type" "mfjmpr") - (eq_attr "cpu" "cell")) - "slot01+bru_cell") - -;;mtlr and mtctr, -;;mtspr fully pipelined -(define_insn_reservation "cell-mtjmpr" 1 - (and (eq_attr "type" "mtjmpr") - (eq_attr "cpu" "cell")) - "bru_cell+slot01") - -;; Branches -;; b, ba, bl, bla, unconditional branch always predicts correctly n/a latency -;; bcctr, bcctrl, latency 2, actually adjust by be to 4 -(define_insn_reservation "cell-branch" 1 - (and (eq_attr "type" "branch") - (eq_attr "cpu" "cell")) - "bru_cell+slot1") - -(define_insn_reservation "cell-branchreg" 1 - (and (eq_attr "type" "jmpreg") - (eq_attr "cpu" "cell")) - "bru_cell+slot1") - -;; cr hazard -;; page 90, special cases for CR hazard, only one instr can access cr per cycle -;; if insn reads CR following a stwcx, pipeline stall till stwcx finish -(define_insn_reservation "cell-crlogical" 1 - (and (eq_attr "type" "cr_logical,delayed_cr") - (eq_attr "cpu" "cell")) - "bru_cell+slot01") - -;; mfcrf and mfcr is about 34 cycles and nonpipelined -(define_insn_reservation "cell-mfcr" 34 - (and (eq_attr "type" "mfcrf,mfcr") - (eq_attr "cpu" "cell")) - "slot1,nonpipeline,nonpipeline*32") - -;; mtcrf (1 field) -(define_insn_reservation "cell-mtcrf" 1 - (and (eq_attr "type" "mtcr") - (eq_attr "cpu" "cell")) - "fxu_cell+slot01") - -; Basic FP latency is 10 cycles, thoughput is 1/cycle -(define_insn_reservation "cell-fp" 10 - (and (eq_attr "type" "fp,dmul") - (eq_attr "cpu" "cell")) - "slot01,vsu1_cell,vsu1_cell*8") - -(define_insn_reservation "cell-fpcompare" 1 - (and (eq_attr "type" "fpcompare") - (eq_attr "cpu" "cell")) - "vsu1_cell+slot01") - -;; sdiv thoughput 1/74, not pipelined but only in the FPU -(define_insn_reservation "cell-sdiv" 74 - (and (eq_attr "type" "sdiv,ddiv") - (eq_attr "cpu" "cell")) - "slot1,nonpipeline,nonpipeline*72") - -;; fsqrt thoughput 1/84, not pipelined but only in the FPU -(define_insn_reservation "cell-sqrt" 84 - (and (eq_attr "type" "ssqrt,dsqrt") - (eq_attr "cpu" "cell")) - "slot1,nonpipeline,nonpipeline*82") - -; VMX -(define_insn_reservation "cell-vecsimple" 4 - (and (eq_attr "type" "vecsimple") - (eq_attr "cpu" "cell")) - "slot01,vsu1_cell,vsu1_cell*2") - -;; mult, div, madd -(define_insn_reservation "cell-veccomplex" 10 - (and (eq_attr "type" "veccomplex") - (eq_attr "cpu" "cell")) - "slot01,vsu1_cell,vsu1_cell*8") - -;; TODO: add support for recording instructions -(define_insn_reservation "cell-veccmp" 4 - (and (eq_attr "type" "veccmp") - (eq_attr "cpu" "cell")) - "slot01,vsu1_cell,vsu1_cell*2") - -(define_insn_reservation "cell-vecfloat" 12 - (and (eq_attr "type" "vecfloat") - (eq_attr "cpu" "cell")) - "slot01,vsu1_cell,vsu1_cell*10") - -(define_insn_reservation "cell-vecperm" 4 - (and (eq_attr "type" "vecperm") - (eq_attr "cpu" "cell")) - "slot01,vsu2_cell,vsu2_cell*2") - -;; New for 4.2, syncs - -(define_insn_reservation "cell-sync" 11 - (and (eq_attr "type" "sync") - (eq_attr "cpu" "cell")) - "slot01,lsu_cell,lsu_cell*9") - -(define_insn_reservation "cell-isync" 11 - (and (eq_attr "type" "isync") - (eq_attr "cpu" "cell")) - "slot01,lsu_cell,lsu_cell*9") - -(define_insn_reservation "cell-load_l" 11 - (and (eq_attr "type" "load_l") - (eq_attr "cpu" "cell")) - "slot01,lsu_cell,lsu_cell*9") - -(define_insn_reservation "cell-store_c" 11 - (and (eq_attr "type" "store_c") - (eq_attr "cpu" "cell")) - "slot01,lsu_cell,lsu_cell*9") - -;; RAW register dependency - -;; addi r3, r3, 1 -;; lw r4,offset(r3) -;; there are 5 cycle deplay for r3 bypassing -;; there are 5 cycle delay for a dependent load after a load -(define_bypass 5 "cell-integer" "cell-load") -(define_bypass 5 "cell-integer" "cell-load-ext") -(define_bypass 5 "cell-load,cell-load-ext" "cell-load,cell-load-ext") - -;; there is a 6 cycle delay after a fp compare until you can use the cr. -(define_bypass 6 "cell-fpcompare" "cell-branch,cell-branchreg,cell-mfcr,cell-crlogical") - -;; VXU float RAW -(define_bypass 11 "cell-vecfloat" "cell-vecfloat") - -;; VXU and FPU -(define_bypass 6 "cell-veccomplex" "cell-vecsimple") -;;(define_bypass 6 "cell-veccompare" "cell-branch,cell-branchreg") -(define_bypass 3 "cell-vecfloat" "cell-veccomplex") -; this is not correct, -;; this is a stall in general and not dependent on result -(define_bypass 13 "cell-vecstore" "cell-fpstore") -; this is not correct, this can never be true, not dependent on result -(define_bypass 7 "cell-fp" "cell-fpload") -;; vsu1 should avoid writing to the same target register as vsu2 insn -;; within 12 cycles. - -;; WAW hazard - -;; the target of VSU estimate should not be reused within 10 dispatch groups -;; the target of VSU float should not be reused within 8 dispatch groups -;; the target of VSU complex should not be reused within 5 dispatch groups -;; FP LOAD should not reuse an FPU Arithmetic target with 6 dispatch gropus - -;; mtctr-bcctr/bcctrl, branch target ctr register shadow update at -;; ex4 stage(10 cycles) -(define_bypass 10 "cell-mtjmpr" "cell-branchreg") - -;;Things are not simulated: -;; update instruction, update address gpr are not simulated -;; vrefp, vrsqrtefp have latency(14), currently simulated as 12 cycle float -;; insns - |