###################################- # # Copyright (C) 2009-2014 Free Software Foundation, Inc. # # Contributed by Michael Eager . # # This file is free software; you can redistribute it and/or modify it # under the terms of the GNU General Public License as published by the # Free Software Foundation; either version 3, or (at your option) any # later version. # # GCC is distributed in the hope that it will be useful, but WITHOUT # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY # or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public # License for more details. # # Under Section 7 of GPL version 3, you are granted additional # permissions described in the GCC Runtime Library Exception, version # 3.1, as published by the Free Software Foundation. # # You should have received a copy of the GNU General Public License and # a copy of the GCC Runtime Library Exception along with this program; # see the files COPYING3 and COPYING.RUNTIME respectively. If not, see # . # # muldi3_hard.S # # Multiply operation for 64 bit integers, for devices with hard multiply # Input : Operand1[H] in Reg r5 # Operand1[L] in Reg r6 # Operand2[H] in Reg r7 # Operand2[L] in Reg r8 # Output: Result[H] in Reg r3 # Result[L] in Reg r4 # # Explaination: # # Both the input numbers are divided into 16 bit number as follows # op1 = A B C D # op2 = E F G H # result = D * H # + (C * H + D * G) << 16 # + (B * H + C * G + D * F) << 32 # + (A * H + B * G + C * F + D * E) << 48 # # Only 64 bits of the output are considered # ####################################### .globl muldi3_hardproc .ent muldi3_hardproc muldi3_hardproc: addi r1,r1,-40 # Save the input operands on the caller's stack swi r5,r1,44 swi r6,r1,48 swi r7,r1,52 swi r8,r1,56 # Store all the callee saved registers sw r20,r1,r0 swi r21,r1,4 swi r22,r1,8 swi r23,r1,12 swi r24,r1,16 swi r25,r1,20 swi r26,r1,24 swi r27,r1,28 # Load all the 16 bit values for A through H lhui r20,r1,44 # A lhui r21,r1,46 # B lhui r22,r1,48 # C lhui r23,r1,50 # D lhui r24,r1,52 # E lhui r25,r1,54 # F lhui r26,r1,56 # G lhui r27,r1,58 # H # D * H ==> LSB of the result on stack ==> Store1 mul r9,r23,r27 swi r9,r1,36 # Pos2 and Pos3 # Hi (Store1) + C * H + D * G ==> Store2 ==> Pos1 and Pos2 # Store the carry generated in position 2 for Pos 3 lhui r11,r1,36 # Pos2 mul r9,r22,r27 # C * H mul r10,r23,r26 # D * G add r9,r9,r10 addc r12,r0,r0 add r9,r9,r11 addc r12,r12,r0 # Store the Carry shi r9,r1,36 # Store Pos2 swi r9,r1,32 lhui r11,r1,32 shi r11,r1,34 # Store Pos1 # Hi (Store2) + B * H + C * G + D * F ==> Store3 ==> Pos0 and Pos1 mul r9,r21,r27 # B * H mul r10,r22,r26 # C * G mul r7,r23,r25 # D * F add r9,r9,r11 add r9,r9,r10 add r9,r9,r7 swi r9,r1,32 # Pos0 and Pos1 # Hi (Store3) + A * H + B * G + C * F + D * E ==> Store3 ==> Pos0 lhui r11,r1,32 # Pos0 mul r9,r20,r27 # A * H mul r10,r21,r26 # B * G mul r7,r22,r25 # C * F mul r8,r23,r24 # D * E add r9,r9,r11 add r9,r9,r10 add r9,r9,r7 add r9,r9,r8 sext16 r9,r9 # Sign extend the MSB shi r9,r1,32 # Move results to r3 and r4 lhui r3,r1,32 add r3,r3,r12 shi r3,r1,32 lwi r3,r1,32 # Hi Part lwi r4,r1,36 # Lo Part # Restore Callee saved registers lw r20,r1,r0 lwi r21,r1,4 lwi r22,r1,8 lwi r23,r1,12 lwi r24,r1,16 lwi r25,r1,20 lwi r26,r1,24 lwi r27,r1,28 # Restore Frame and return rtsd r15,8 addi r1,r1,40 .end muldi3_hardproc