;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;; ;;; Microsoft Research Singularity ;;; ;;; Copyright (c) Microsoft Corporation. All rights reserved. ;;; ;;; This file contains ARM-specific assembly code. ;;; ; arith.s ; ; Copyright (C) Advanced RISC Machines Limited, 1994. All rights reserved. ; ; RCS Revision: 1 ; Checkin Date: 2007/06/29 02:59:16 ; Revising Author ; > coresrc.s.arith ; ; Assembler source for FPA support code and emulator ; ================================================== ; Routines to do arithmetic. ; ; These routines work on numbers in the standard internal format. ;=========================================================================== GBLS NormaliseOp1_str GBLS NormaliseOp1Neg_str GBLS NormaliseOp2_str GBLS NormDenormOp1_str GBLS NormDenormOp2_str GBLS ConvertNaNs_str GBLS ConvertNaN1_str GBLS ConvertNaN1Of2_str GBLS ConvertNaN2Of2_str GBLL FPLibWanted [ FPEWanted :LOR: FPASCWanted NormaliseOp1_str SETS "NormaliseOp1" NormaliseOp1Neg_str SETS "NormaliseOp1Neg" NormaliseOp2_str SETS "NormaliseOp2" NormDenormOp1_str SETS "NormDenormOp1" NormDenormOp2_str SETS "NormDenormOp2" ConvertNaNs_str SETS "ConvertNaNs" ConvertNaN1_str SETS "ConvertNaN1" ConvertNaN1Of2_str SETS "ConvertNaN1Of2" ConvertNaN2Of2_str SETS "ConvertNaN2Of2" FPLibWanted SETL {FALSE} | NormaliseOp1_str SETS "__fp_normalise_op1" NormaliseOp1Neg_str SETS "__fp_normalise_op1neg" NormaliseOp2_str SETS "__fp_normalise_op2" NormDenormOp1_str SETS "__fp_norm_denorm_op1" NormDenormOp2_str SETS "__fp_norm_denorm_op2" ConvertNaNs_str SETS "__fp_convert_NaNs" ConvertNaN1_str SETS "__fp_convert_NaN1" ConvertNaN1Of2_str SETS "__fp_convert_NaN_1Of2" ConvertNaN2Of2_str SETS "__fp_convert_NaN_2Of2" FPLibWanted SETL {TRUE} [ :LNOT: :DEF: normalise_s IMPORT $NormaliseOp1_str IMPORT $NormaliseOp1Neg_str IMPORT $NormaliseOp2_str IMPORT $NormDenormOp1_str IMPORT $NormDenormOp2_str IMPORT $ConvertNaNs_str IMPORT $ConvertNaN1_str IMPORT $ConvertNaN1Of2_str IMPORT $ConvertNaN2Of2_str ] ] [ :DEF: normalise_s :LOR: FPEWanted :LOR: FPASCWanted ; Many of these routines use some standard entry and exit conventions. There ; are two such sets of conventions: ; ; STANDARD MONADIC OPERATION ENTRY AND EXIT ; ----------------------------------------- ; ; Entry: OP1sue = Operand sign, uncommon, exponent; ; OP1mhi = Operand mantissa, high word; ; OP1mlo = Operand mantissa, low word; ; Rfpsr = FPSR; ; Rins = instruction (may be needed to determine the exact ; operation and/or for traps); ; Rwp, Rfp, Rsp hold their usual values; ; R14 = return link. ; Exit: OP1sue = the result's sign and uncommon bit; the remaining bits are ; zero if the uncommon bit is 0, and set correctly for the final ; result if the uncommon bit is 1; ; OP1mhi, OP1mlo = the result's mantissa; ; RNDexp (= OP2sue) = if the uncommon bit is 0, the result exponent, ; which may be negative; otherwise corrupt; ; Rarith is corrupt if the uncommon bit is 1; otherwise, if the ; destination precision is extended, it holds the round bit (in bit ; 31) and the sticky bit (in bits 30:0), and if the destination ; precision is single or double, it holds part of the sticky bit ; (the remainder of which is held in bits below the round bit in ; OP1mhi and OP1mlo); ; OP2mhi, OP2mlo, Rtmp, Rtmp2 and R14 may be corrupt; ; Rfpsr may be updated; ; All other registers preserved. ; ; STANDARD DYADIC OPERATION ENTRY AND EXIT ; ---------------------------------------- ; ; Entry: OP1sue = First operand sign, uncommon, exponent; ; OP1mhi = First operand mantissa, high word; ; OP1mlo = First operand mantissa, low word; ; OP2sue = Second operand sign, uncommon, exponent; ; OP2mhi = Second operand mantissa, high word; ; OP2mlo = Second operand mantissa, low word; ; Rfpsr = FPSR; ; Rins = instruction (may be needed to determine the exact ; operation and/or for traps); ; Rwp, Rfp, Rsp hold their usual values; ; R14 = return link. ; Exit: OP1sue = the result's sign and uncommon bit; the remaining bits are ; zero if the uncommon bit is 0, and set correctly for the final ; result if the uncommon bit is 1; ; OP1mhi, OP1mlo = the result's mantissa; ; RNDexp (= OP2sue) = if the uncommon bit is 0, the result exponent, ; which may be negative; otherwise corrupt; ; Rarith is corrupt if the uncommon bit is 1; otherwise, if the ; destination precision is extended, it holds the round bit (in bit ; 31) and the sticky bit (in bits 30:0), and if the destination ; precision is single or double, it holds part of the sticky bit ; (the remainder of which is held in bits below the round bit in ; OP1mhi and OP1mlo); ; OP2mhi, OP2mlo, Rtmp, Rtmp2 and R14 may be corrupt; ; Rfpsr may be updated; ; All other registers preserved. ; ; In both sets of conventions, the routine called is free to produce an ; incorrect result mantissa and rounding information, as long as it knows ; that it will in fact be rounded to the correct value. ;=========================================================================== ; Routine to normalise the first or only operand. The biased exponent won't ; be taken below 0: instead, the number will be denormalised if normalising ; it would cause this to happen. Note that the result will never be marked ; as uncommon: any caller of this routine must deal with this itself if ; necessary. ; Entry: OP1sue = First operand sign, remaining bits junk; ; OP1mhi, OP1mlo = First operand mantissa; ; Rarith = First operand exponent, shifted to be left aligned in the ; word; ; Rwp, Rfp, Rsp contain their usual values; ; R14 is the return link. ; Exit: OP1sue = First operand sign and exponent (uncommon is always 0); ; OP1mhi, OP1mlo updated; ; Rarith, Rtmp, Rtmp2 and R14 may be corrupt; ; All other registers preserved. $NormDenormOp1_str ; Clear out the junk bits in OP1sue. AND OP1sue,OP1sue,#Sign_bit ; Do we have to normalise by 32 bits or more? TEQ OP1mhi,#0 BEQ NormDenormOp1_LongShift ; If not, find out how much we do have to shift by. MOV Rtmp,#0 ;Accumulate shift amount in Rtmp MOVS Rtmp2,OP1mhi,LSR #16 MOVEQ OP1mhi,OP1mhi,LSL #16 ADDEQ Rtmp,Rtmp,#16 MOVS Rtmp2,OP1mhi,LSR #24 MOVEQ OP1mhi,OP1mhi,LSL #8 ADDEQ Rtmp,Rtmp,#8 MOVS Rtmp2,OP1mhi,LSR #28 MOVEQ OP1mhi,OP1mhi,LSL #4 ADDEQ Rtmp,Rtmp,#4 MOVS Rtmp2,OP1mhi,LSR #30 MOVEQ OP1mhi,OP1mhi,LSL #2 ADDEQ Rtmp,Rtmp,#2 MOVS Rtmp2,OP1mhi,LSR #31 MOVEQ OP1mhi,OP1mhi,LSL #1 ADDEQ Rtmp,Rtmp,#1 ; Have we shifted too far? - i.e. by more than the exponent? If so, go back ; the excess distance. Then complete the shift - i.e. convert the single ; word shift into a two word shift - adjust the exponent if the exponent was ; greater than the shift amount (otherwise we leave it zero) and return. SUBS Rtmp2,Rtmp,Rarith,LSR #32-EIExp_len ;Shift amt. - exp. MOVHI OP1mhi,OP1mhi,LSR Rtmp2 MOVHI Rtmp,Rarith,LSR #32-EIExp_len RSB Rarith,Rtmp,#32 ORR OP1mhi,OP1mhi,OP1mlo,LSR Rarith MOV OP1mlo,OP1mlo,LSL Rtmp SUBLO OP1sue,OP1sue,Rtmp2,LSL #EIExp_pos ;ADD exp.-shift amt. IF Interworking :LOR: Thumbing BX LR ELSE MOV PC, LR ENDIF NormDenormOp1_LongShift ; The top word is zero, so we need to shift by 32 bits or more. Or do we? - ; if the exponent is less than 32, we simply need to shift by the exponent. CMP Rarith,#32:SHL:(32-EIExp_len) BLO NormDenormOp1_ByExponent ; Now check the bottom word: if it is also zero, we simply need to ; denormalise to exponent 0. MOVS OP1mhi,OP1mlo IF Interworking :LOR: Thumbing BXEQ LR ELSE MOVEQ PC,LR ;OP1sue/mhi/mlo are all already correct! ENDIF MOV OP1mlo,#0 ; The bottom word is non-zero, so we have a shift amount in the range 32-63. MOV Rtmp,#32 MOVS Rtmp2,OP1mhi,LSR #16 MOVEQ OP1mhi,OP1mhi,LSL #16 ADDEQ Rtmp,Rtmp,#16 MOVS Rtmp2,OP1mhi,LSR #24 MOVEQ OP1mhi,OP1mhi,LSL #8 ADDEQ Rtmp,Rtmp,#8 MOVS Rtmp2,OP1mhi,LSR #28 MOVEQ OP1mhi,OP1mhi,LSL #4 ADDEQ Rtmp,Rtmp,#4 MOVS Rtmp2,OP1mhi,LSR #30 MOVEQ OP1mhi,OP1mhi,LSL #2 ADDEQ Rtmp,Rtmp,#2 MOVS Rtmp2,OP1mhi,LSR #31 MOVEQ OP1mhi,OP1mhi,LSL #1 ADDEQ Rtmp,Rtmp,#1 ; Have we shifted too far? - i.e. by more than the exponent? If so, go back ; the excess distance. Note that this cannot require us to undo the shift ; from the bottom word to the top word, since we know the exponent was at ; least 32. ; So we need to backshift if shift amount > exponent, and create a ; non-zero exponent if shift amount < exponent. SUBS Rtmp2,Rtmp,Rarith,LSR #32-EIExp_len ;Shift amt. - exp. MOVHI OP1mhi,OP1mhi,LSR Rtmp2 SUBLO OP1sue,OP1sue,Rtmp2,LSL #EIExp_pos ;ADD exp.-shift amt. IF Interworking :LOR: Thumbing BX LR ELSE MOV PC,LR ENDIF NormDenormOp1_ByExponent ; We need to shift the mantissa left by the exponent, which is guaranteed to ; be less than 32, and to return a zero exponent (note that OP1sue is ; already set up for this). MOV Rtmp,Rarith,LSR #32-EIExp_len RSB Rtmp2,Rtmp,#32 MOV OP1mhi,OP1mhi,LSL Rtmp ORR OP1mhi,OP1mhi,OP1mlo,LSR Rtmp2 MOV OP1mlo,OP1mlo,LSL Rtmp IF Interworking :LOR: Thumbing BX LR ELSE MOV PC,LR ENDIF ;=========================================================================== ; Routine to normalise the second operand. The biased exponent won't be ; taken below 0: instead, the number will be denormalised if normalising it ; would cause this to happen. Note that the result will never be marked ; as uncommon: any caller of this routine must deal with this itself if ; necessary. ; Entry: OP2sue = Second operand sign, remaining bits junk; ; OP2mhi, OP2mlo = Second operand mantissa; ; Rarith = Second operand exponent, shifted to be left aligned in the ; word; ; Rwp, Rfp, Rsp contain their usual values; ; R14 is the return link. ; Exit: OP2sue = Second operand sign and exponent (uncommon is always 0); ; OP2mhi, OP2mlo updated; ; Rarith, Rtmp, Rtmp2 and R14 may be corrupt; ; All other registers preserved. $NormDenormOp2_str ; Clear out the junk bits in OP2sue. AND OP2sue,OP2sue,#Sign_bit ; Do we have to normalise by 32 bits or more? TEQ OP2mhi,#0 BEQ NormDenormOp2_LongShift ; If not, find out how much we do have to shift by. MOV Rtmp,#0 ;Accumulate shift amount in Rtmp MOVS Rtmp2,OP2mhi,LSR #16 MOVEQ OP2mhi,OP2mhi,LSL #16 ADDEQ Rtmp,Rtmp,#16 MOVS Rtmp2,OP2mhi,LSR #24 MOVEQ OP2mhi,OP2mhi,LSL #8 ADDEQ Rtmp,Rtmp,#8 MOVS Rtmp2,OP2mhi,LSR #28 MOVEQ OP2mhi,OP2mhi,LSL #4 ADDEQ Rtmp,Rtmp,#4 MOVS Rtmp2,OP2mhi,LSR #30 MOVEQ OP2mhi,OP2mhi,LSL #2 ADDEQ Rtmp,Rtmp,#2 MOVS Rtmp2,OP2mhi,LSR #31 MOVEQ OP2mhi,OP2mhi,LSL #1 ADDEQ Rtmp,Rtmp,#1 ; Have we shifted too far? - i.e. by more than the exponent? If so, go back ; the excess distance. Then complete the shift - i.e. convert the single ; word shift into a two word shift - adjust the exponent if the exponent was ; greater than the shift amount (otherwise we leave it zero) and return. SUBS Rtmp2,Rtmp,Rarith,LSR #32-EIExp_len ;Shift amt. - exp. MOVHI OP2mhi,OP2mhi,LSR Rtmp2 MOVHI Rtmp,Rarith,LSR #32-EIExp_len RSB Rarith,Rtmp,#32 ORR OP2mhi,OP2mhi,OP2mlo,LSR Rarith MOV OP2mlo,OP2mlo,LSL Rtmp SUBLO OP2sue,OP2sue,Rtmp2,LSL #EIExp_pos ;ADD exp.-shift amt. IF Interworking :LOR: Thumbing BX LR ELSE MOV PC,LR ENDIF NormDenormOp2_LongShift ; The top word is zero, so we need to shift by 32 bits or more. Or do we? - ; if the exponent is less than 32, we simply need to shift by the exponent. CMP Rarith,#32:SHL:(32-EIExp_len) BLO NormDenormOp2_ByExponent ; Now check the bottom word: if it is also zero, we simply need to ; denormalise to exponent 0. MOVS OP2mhi,OP2mlo IF Interworking :LOR: Thumbing BXEQ LR ELSE MOVEQ PC,LR ;OP2sue/mhi/mlo are all already correct! ENDIF MOV OP2mlo,#0 ; The bottom word is non-zero, so we have a shift amount in the range 32-63. MOV Rtmp,#32 MOVS Rtmp2,OP2mhi,LSR #16 MOVEQ OP2mhi,OP2mhi,LSL #16 ADDEQ Rtmp,Rtmp,#16 MOVS Rtmp2,OP2mhi,LSR #24 MOVEQ OP2mhi,OP2mhi,LSL #8 ADDEQ Rtmp,Rtmp,#8 MOVS Rtmp2,OP2mhi,LSR #28 MOVEQ OP2mhi,OP2mhi,LSL #4 ADDEQ Rtmp,Rtmp,#4 MOVS Rtmp2,OP2mhi,LSR #30 MOVEQ OP2mhi,OP2mhi,LSL #2 ADDEQ Rtmp,Rtmp,#2 MOVS Rtmp2,OP2mhi,LSR #31 MOVEQ OP2mhi,OP2mhi,LSL #1 ADDEQ Rtmp,Rtmp,#1 ; Have we shifted too far? - i.e. by more than the exponent? If so, go back ; the excess distance. Note that this cannot require us to undo the shift ; from the bottom word to the top word, since we know the exponent was at ; least 32. ; So we need to backshift if shift amount > exponent, and create a ; non-zero exponent if shift amount < exponent. SUBS Rtmp2,Rtmp,Rarith,LSR #32-EIExp_len ;Shift amt. - exp. MOVHI OP2mhi,OP2mhi,LSR Rtmp2 SUBLO OP2sue,OP2sue,Rtmp2,LSL #EIExp_pos ;ADD exp.-shift amt. IF Interworking :LOR: Thumbing BX LR ELSE MOV PC,LR ENDIF NormDenormOp2_ByExponent ; We need to shift the mantissa left by the exponent, which is guaranteed to ; be less than 32, and to return a zero exponent (note that OP2sue is ; already set up for this). MOV Rtmp,Rarith,LSR #32-EIExp_len RSB Rtmp2,Rtmp,#32 MOV OP2mhi,OP2mhi,LSL Rtmp ORR OP2mhi,OP2mhi,OP2mlo,LSR Rtmp2 MOV OP2mlo,OP2mlo,LSL Rtmp IF Interworking :LOR: Thumbing BX LR ELSE MOV PC,LR ENDIF ;=========================================================================== ; Routine to float an integer. To fit in with the usual conventions, the ; entry point is given two labels, namely "FltFPE" and "FltFPASC". ; The value returned is always a numeric value plus associated rounding ; information, with the uncommon bit clear. ; Entry: Rarith = integer; ; Rfpsr = FPSR; ; Rins = instruction (needed for traps); ; Rwp, Rfp, Rsp hold their usual values; ; R14 = return link. ; Exit: OP1sue = the result's sign, with the remaining bits zero; ; OP1mhi, OP1mlo = the result's mantissa; ; RNDexp (= OP2sue) = the result exponent; ; Rarith = 0 (i.e. the appropriate round and sticky information for ; extended precision); ; OP2mhi, OP2mlo, Rtmp, Rtmp2 and R14 may be corrupt; ; Rfpsr may be updated; ; All other registers preserved. [ FPEWanted FltFPE ] [ FPASCWanted FltFPASC ] CDebug1 3,"FltFPE/FPASC: operand =",Rarith ; Extract the sign and produce an unnormalised mantissa. In the process, ; detect the special case of a zero operand. MOV OP1mlo,#0 ;Mantissa low word is always zero ANDS OP1sue,Rarith,#Sign_bit ;Extract sign ASSERT Sign_pos = 31 RSBNE OP1mhi,Rarith,#0 ;If -ve, 2's complement the integer MOVEQS OP1mhi,Rarith ;If +ve, copy and check for zero MOVEQ RNDexp,#0 ;If zero, result exponent is zero IF Interworking :LOR: Thumbing BXEQ LR ELSE MOVEQ PC,LR ; and return (Rarith is already 0) ENDIF ; If non-zero, set the approriate exponent and rounding information, then ; fall through into NormaliseOp1 to complete the job. MOV RNDexp,#(EIExp_bias+31):AND:&FF00 ORR RNDexp,RNDexp,#(EIExp_bias+31):AND:&FF ASSERT (EIExp_bias+31) <= &FFFF MOV Rarith,#0 ; Fall through to NormaliseOp1 ;=========================================================================== ; NB it is possible to fall through into this routine. ; Routine to normalise the result or first operand. Unlike the two routines ; above, this routine will normalise the exponent to a value less than zero ; if necessary, and it won't put the exponent back into OP1sue. Note that ; the result will never be marked as uncommon: any caller of this routine ; must deal with this itself if necessary. ; Entry: OP1mhi, OP1mlo = Result/first operand mantissa, which must not be ; all zero; ; RNDexp = Result/first operand exponent (in normal position in ; word); ; Rwp, Rfp, Rsp contain their usual values; ; R14 is the return link. ; Exit: OP1mhi, OP1mlo and RNDexp updated; ; Rtmp, Rtmp2 and R14 may be corrupt; ; All other registers preserved; ; NE condition is true. $NormaliseOp1_str TEQ OP1mhi,#0 ;Do full word shift if MOVEQ OP1mhi,OP1mlo ; necessary MOVEQ OP1mlo,#0 SUBEQ RNDexp,RNDexp,#32 MOV Rtmp,#0 ;Counter for rest of shift MOVS Rtmp2,OP1mhi,LSR #16 ;Shift top word by 16 if MOVEQ OP1mhi,OP1mhi,LSL #16 ; necessary ADDEQ Rtmp,Rtmp,#16 MOVS Rtmp2,OP1mhi,LSR #24 ;Shift top word by 8 if MOVEQ OP1mhi,OP1mhi,LSL #8 ; necessary ADDEQ Rtmp,Rtmp,#8 MOVS Rtmp2,OP1mhi,LSR #28 ;Shift top word by 4 if MOVEQ OP1mhi,OP1mhi,LSL #4 ; necessary ADDEQ Rtmp,Rtmp,#4 MOVS Rtmp2,OP1mhi,LSR #30 ;Shift top word by 2 if MOVEQ OP1mhi,OP1mhi,LSL #2 ; necessary ADDEQ Rtmp,Rtmp,#2 MOVS Rtmp2,OP1mhi,LSR #31 ;Shift top word by 1 if MOVEQ OP1mhi,OP1mhi,LSL #1 ; necessary ADDEQ Rtmp,Rtmp,#1 RSBS Rtmp2,Rtmp,#32 ;Shift the bottom word by ORR OP1mhi,OP1mhi,OP1mlo,LSR Rtmp2 ; the same amount and set NE MOV OP1mlo,OP1mlo,LSL Rtmp SUB RNDexp,RNDexp,Rtmp ;Adjust exponent by shift IF Interworking :LOR: Thumbing BX LR ELSE MOV PC,LR ; amount and return ENDIF ;=========================================================================== ; Routine to normalise the second operand. Unlike the two routines above, ; this routine will normalise the exponent to a value less than zero if ; necessary, and it won't put the exponent back into OP1sue. Note that the ; result will never be marked as uncommon: any caller of this routine must ; deal with this itself if necessary. ; Entry: OP2mhi, OP2mlo = Second operand mantissa, which must not be all ; zero; ; RNDexp = Second operand exponent (in normal position in word); ; Rwp, Rfp, Rsp contain their usual values; ; R14 is the return link. ; Exit: OP2mhi, OP2mlo and RNDexp updated; ; Rtmp, Rtmp2 and R14 may be corrupt; ; All other registers preserved; ; NE condition is true. $NormaliseOp2_str TEQ OP2mhi,#0 ;Do full word shift if MOVEQ OP2mhi,OP2mlo ; necessary MOVEQ OP2mlo,#0 SUBEQ RNDexp,RNDexp,#32 MOV Rtmp,#0 ;Counter for rest of shift MOVS Rtmp2,OP2mhi,LSR #16 ;Shift top word by 16 if MOVEQ OP2mhi,OP2mhi,LSL #16 ; necessary ADDEQ Rtmp,Rtmp,#16 MOVS Rtmp2,OP2mhi,LSR #24 ;Shift top word by 8 if MOVEQ OP2mhi,OP2mhi,LSL #8 ; necessary ADDEQ Rtmp,Rtmp,#8 MOVS Rtmp2,OP2mhi,LSR #28 ;Shift top word by 4 if MOVEQ OP2mhi,OP2mhi,LSL #4 ; necessary ADDEQ Rtmp,Rtmp,#4 MOVS Rtmp2,OP2mhi,LSR #30 ;Shift top word by 2 if MOVEQ OP2mhi,OP2mhi,LSL #2 ; necessary ADDEQ Rtmp,Rtmp,#2 MOVS Rtmp2,OP2mhi,LSR #31 ;Shift top word by 1 if MOVEQ OP2mhi,OP2mhi,LSL #1 ; necessary ADDEQ Rtmp,Rtmp,#1 RSBS Rtmp2,Rtmp,#32 ;Shift the bottom word by ORR OP2mhi,OP2mhi,OP2mlo,LSR Rtmp2 ; the same amount and set NE MOV OP2mlo,OP2mlo,LSL Rtmp SUB RNDexp,RNDexp,Rtmp ;Adjust exponent by shift IF Interworking :LOR: Thumbing BX LR ELSE MOV PC,LR ; amount and return ENDIF ;=========================================================================== ; Routine to normalise the first operand. Like "NormaliseOp1", except that ; it increments the exponent in RNDexp by the shift amount, rather than ; decrementing it. ; Entry: OP1mhi, OP1mlo = Second operand mantissa, which must not be all ; zero; ; RNDexp = Exponent (in normal position in word); ; Rwp, Rfp, Rsp contain their usual values; ; R14 is the return link. ; Exit: OP1mhi, OP1mlo and RNDexp updated; ; Rtmp, Rtmp2 and R14 may be corrupt; ; All other registers preserved; ; NE condition is true. $NormaliseOp1Neg_str TEQ OP1mhi,#0 ;Do full word shift if MOVEQ OP1mhi,OP1mlo ; necessary MOVEQ OP1mlo,#0 ADDEQ RNDexp,RNDexp,#32 MOV Rtmp,#0 ;Counter for rest of shift MOVS Rtmp2,OP1mhi,LSR #16 ;Shift top word by 16 if MOVEQ OP1mhi,OP1mhi,LSL #16 ; necessary ADDEQ Rtmp,Rtmp,#16 MOVS Rtmp2,OP1mhi,LSR #24 ;Shift top word by 8 if MOVEQ OP1mhi,OP1mhi,LSL #8 ; necessary ADDEQ Rtmp,Rtmp,#8 MOVS Rtmp2,OP1mhi,LSR #28 ;Shift top word by 4 if MOVEQ OP1mhi,OP1mhi,LSL #4 ; necessary ADDEQ Rtmp,Rtmp,#4 MOVS Rtmp2,OP1mhi,LSR #30 ;Shift top word by 2 if MOVEQ OP1mhi,OP1mhi,LSL #2 ; necessary ADDEQ Rtmp,Rtmp,#2 MOVS Rtmp2,OP1mhi,LSR #31 ;Shift top word by 1 if MOVEQ OP1mhi,OP1mhi,LSL #1 ; necessary ADDEQ Rtmp,Rtmp,#1 RSBS Rtmp2,Rtmp,#32 ;Shift the bottom word by ORR OP1mhi,OP1mhi,OP1mlo,LSR Rtmp2 ; the same amount and set NE MOV OP1mlo,OP1mlo,LSL Rtmp ADD RNDexp,RNDexp,Rtmp ;Adjust exponent by shift IF Interworking :LOR: Thumbing BX LR ELSE MOV PC,LR ; amount and return ENDIF ] ;=========================================================================== [ :DEF: addsub_s :LOR: FPEWanted :LOR: FPASCWanted ; Routine to add, subtract or reverse subtract two internal format floating ; point numbers. It has two entry points: "AddSubFPE", which has an ; optimised fast track for both operands being common, and "AddSubFPASC", ; which avoids the test for this optimised fast track - since it should ; never happen. The second entry point lies a long way down in the source ; to avoid addressing constraints. ; The value returned is either a numeric value plus associated rounding ; information, with the uncommon bit clear, or an infinity or NaN, with the ; uncommon bit set. ; This routine will not work correctly with inputs which are unnormalised ; URD results, or with invalid internal format numbers. ; ; Uses standard dyadic operation entry and exit conventions - see top of ; this file. ASSERT RNDexp = OP2sue ;We swap over from the use of OP2sue to that ; of RNDexp partway through this routine. [ FPEWanted AddSubFPE CDebug3 3,"AddSubFPE: op1 =",OP1sue,OP1mhi,OP1mlo CDebug3 3," op2 =",OP2sue,OP2mhi,OP2mlo ; Start by detecting the "fast track" case of both operands being common. TST OP1sue,#Uncommon_bit TSTEQ OP2sue,#Uncommon_bit BNE AddSub_Uncommon ] [ FPLibWanted __fp_addsub_common ] AddSub_Common STMFD Rsp!,{LR} ;Register needed, and we may get a ; subroutine call CDebug3 4,"AddSub_Common: op1 =",OP1sue,OP1mhi,OP1mlo CDebug3 4," op2 =",OP2sue,OP2mhi,OP2mlo ; Both operands are zeros or normalised numbers. We can distinguish between ; them on the basis of the units bit. However, note that the standard ; algorithm for adding/subtracting floating point numbers (i.e. do an ; alignment shift on the one with the smaller exponent, add or subtract the ; mantissas, then do a normalisation shift if necessary) works equally well ; on all of these. ; This entry point is also called from AddSub_Uncommon to add or subtract ; operands which are zeros, normalised numbers or extended denormalised ; numbers. It works perfectly well on such numbers, provided it is ; recognised that the result mantissa may be unnormalised and non-zero. ; Note that we know that the invalid operation and divide-by-zero ; exceptions won't occur - i.e. we don't need to preserve the operands. So ; we start by modifying the signs of the operands for SUF and RSF ; instructions. [ :LNOT: :DEF: addsub_s TST Rins,#SubNotAdd_bit ;Is it SUF/RSF, not ADF? EORNE OP2sue,OP2sue,#Sign_bit ;If so, change op2 sign (assuming SUF) TST Rins,#RSF_bit ;Is it RSF, not ADF/SUF? EORNE OP2sue,OP2sue,#Sign_bit ;If so, we shouldn't have changed op2 EORNE OP1sue,OP1sue,#Sign_bit ; sign and should have changed op1 sign ] ; We can consider this to be an addition from now on. Next, we'll deal with ; the basic exponent and sign calculation: the results of this may get ; modified later on. ; This section will leave the prospective sign for the result in OP1sue, ; R14 containing the exclusive-OR of the signs (which determines later ; whether we do a magnitude addition or subtraction), RNDexp equal to the ; first operand exponent and Rarith equal to the exponent difference. ExpDiff Rtmp,Rarith,OP1sue,OP2sue ;Get difference and op1 exp. EOR R14,OP1sue,OP2sue ;Make EOR of signs AND OP1sue,OP1sue,#Sign_bit ;Isolate prospective result sign MOV RNDexp,Rarith,LSR #32-EIExp_len ;Right-align operand 1 exponent BHI AddSub_Op2Shift MOVEQ Rtmp2,Rtmp ;If EQ, Rtmp = Rtmp2 = 0 BEQ AddSub_ShiftDone ; = correct guard/round/sticky AddSub_Op1Shift ; Operand 1 needs shifting, and so operand 2's exponent is used for the ; result. Rarith currently contains exp1-exp2 = -(shift amount), ; left-aligned. RSB Rarith,Rtmp,#0 ;Get shift amount = exp2 - exp1 MOV Rarith,Rarith,LSR #32-EIExp_len ;Right-align exponent difference ADD RNDexp,RNDexp,Rarith ;Resurrect operand 2 exponent ; Now denormalise (OP1mhi,OP1mlo) with a shift amount of Rarith, putting ; op1 guard/round/sticky bits into Rtmp, op2 guard/round/sticky bits into ; Rtmp2. Denorm OP1mhi,OP1mlo,Rtmp,Rarith,Rtmp2,Rarith MOV Rtmp2,#0 ;Operand 2 guard/round/sticky B AddSub_ShiftDone AddSub_Op2Shift ; Operand 2 needs shifting, and so we've already selected the correct result ; exponent. Furthermore, Rtmp currently contains exp1-exp2 = shift amount, ; left-aligned. So denormalise (OP2mhi,OP2mlo) with a shift amount of Rtmp, ; putting op1 guard/round/sticky bits into Rtmp, op2 guard/round/sticky bits ; into Rtmp2. MOV Rarith,Rtmp,LSR #32-EIExp_len ;Right-align exponent difference Denorm OP2mhi,OP2mlo,Rtmp2,Rarith,Rtmp,Rarith MOV Rtmp,#0 ;Operand 1 guard/round/sticky AddSub_ShiftDone ; We now have: ; OP1sue: Prospective result sign (= operand 1 sign); ; OP1mhi/OP1mlo: Operand 1 mantissa, possibly shifted; ; RNDexp: Prospective result exponent (= MAX(operand exponents)); ; OP2mhi/OP2mlo: Operand 2 mantissa, possibly shifted; ; Rarith: Free; ; Rfpsr: FPSR; ; Rtmp: Operand 1 guard, round and sticky bits; ; Rins: Instruction; ; Rtmp2: Operand 2 guard, round and sticky bits; ; Rwp,Rfp,Rsp: Standard values; ; R14: Sign bit indicates magnitude subtraction/NOT addition; ; Now we need to split according to whether we need to do a magnitude ; addition or a magnitude subtraction. TST R14,#Sign_bit BNE AddSub_MagSub AddSub_MagAdd ; Perform the magnitude addition. Note first that we have no need for a ; guard bit in this case, so we are going to regard the guard/round/sticky ; bits in Rtmp[31/30/29:0] and Rtmp2[31/30/29:0] as simply being ; round/sticky bits in Rtmp[31/30:0] and Rtmp2[31/30:0]. Secondly, note that ; since we know that at least one of Rtmp and Rtmp2 is zero, we can simply ; add these round/sticky bit representations to get the result round/sticky ; representation. ADDS Rarith,Rtmp,Rtmp2 ;Will not in fact generate C=1 ADCS OP1mlo,OP1mlo,OP2mlo ADCS OP1mhi,OP1mhi,OP2mhi ; If C=0, we're done. Otherwise, we've got to adjust the exponent, mantissa, ; round and sticky bits. IF Interworking :LOR: Thumbing LDMCCFD Rsp!,{LR} BXCC LR ELSE LDMCCFD Rsp!,{PC} ENDIF ADD RNDexp,RNDexp,#1 MOVS OP1mhi,OP1mhi,RRX MOVS OP1mlo,OP1mlo,RRX ORR Rarith,Rarith,Rarith,LSL #1 ;Sticky receives all of old MOV Rarith,Rarith,RRX ; round/sticky; round is new IF Interworking :LOR: Thumbing LDMFD Rsp!,{LR} BX LR ELSE LDMFD Rsp!,{PC} ENDIF AddSub_MagSub ; We need to do a magnitude subtraction of OP2mhi/OP2mlo/Rtmp2 from ; OP1mhi/OP1mlo/Rtmp. The prospective result exponent in RNDexp has been ; made right already, but if the subtraction comes out negative, we will ; have to change the sign of the result. Note we can subtract the ; guard/round/sticky representations in Rtmp and Rtmp2, because we know one ; of them is entirely zero. SUBS Rarith,Rtmp,Rtmp2 SBCS OP1mlo,OP1mlo,OP2mlo SBCS OP1mhi,OP1mhi,OP2mhi ; If the subtraction (which was of unsigned numbers) came out negative, we ; need to reverse the sign of the result and 2's complement the mantissa - ; again including the guard/round/sticky part. BCS AddSub_MagSub_Normalise EOR OP1sue,OP1sue,#Sign_bit RSBS Rarith,Rarith,#0 RSCS OP1mlo,OP1mlo,#0 RSC OP1mhi,OP1mhi,#0 AddSub_MagSub_Normalise ; Now we need to normalise the result. This is slightly tricky, because in ; the case of subtracting the largest possible number with one exponent from ; the smallest number of the next exponent (e.g. 1-(1-2^(-64))), the leading ; bit of the result is actually the round bit. We can divide into two cases: ; ; (a) The exponent difference was 0 or 1: in this case, the number may be ; normalised by up to 64 bits, but the current round and sticky bits ; are guaranteed to be 0 - this ensures that the eventual sticky bit ; is guaranteed to be zero, and that the round bit is also zero if a ; non-zero normalisation shift is required; ; ; (b) The exponent difference was 2 or more: in this case, the number can ; be normalised by at most one bit, but the eventual sticky bit may be ; non-zero. ; ; So we will first try to normalise by 1 bit, bringing the guard bit into the ; mantissa if necessary. TST OP1mhi,#EIUnits_bit ;Already normalised? IF Interworking :LOR: Thumbing LDMNEFD Rsp!,{LR} ;Return if so BXNE LR ELSE LDMNEFD Rsp!,{PC} ;Return if so ENDIF ADDS Rarith,Rarith,Rarith ;Shift mhi/mlo/guard/round/sticky ADCS OP1mlo,OP1mlo,OP1mlo ; left by one bit to form new ADC OP1mhi,OP1mhi,OP1mhi ; mhi/mlo/round/sticky SUB RNDexp,RNDexp,#1 ; If the result is normalised now, we're done. Otherwise, we know that a ; normalisation shift of 1-63 is still required, that the exponent ; difference was 0 or 1, and thus that the new round and sticky bits are ; both zero. ; However, at this point, we need to look out for the case of a magnitude ; subtraction of two equal numbers - for which we need to apply the special ; IEEE sign rule (i.e. -0 if rounding to -infinity, otherwise +0). TST OP1mhi,#EIUnits_bit ;Normalised now? IF Interworking :LOR: Thumbing LDMNEFD Rsp!,{LR} ;Return if so BXNE LR ELSE LDMNEFD Rsp!,{PC} ;Return if so ENDIF ORRS LR,OP1mhi,OP1mlo ;Is result zero? BLNE $NormaliseOp1_str ;If not, complete normalisation IF Interworking :LOR: Thumbing LDMNEFD Rsp!,{LR} ; and return (note NormaliseOp1 BXNE LR ELSE LDMNEFD Rsp!,{PC} ; and return (note NormaliseOp1 ENDIF ; We know the result is a zero, with sign determined by the rounding mode. ; Everything except the sign and exponent has been correctly set already, ; so we test the rounding mode, set the sign and exponent, and return. [ :DEF: addsub_s MOV dOPh, #0 MOV dOPl, #0 ASSERT dOPh = fOP :LOR: dOPl = fOP ; ADD sp,sp,#4 ; Pop link register off the stack ; VReturn IF Interworking :LOR: Thumbing LDMFD Rsp!,{LR} BX LR ELSE LDMFD Rsp!,{PC} ENDIF | AND Rtmp,Rins,#RM_mask TEQ Rtmp,#RM_MinusInf MOVEQ OP1sue,#Sign_bit MOVNE OP1sue,#0 MOV RNDexp,#0 IF Interworking :LOR: Thumbing LDMFD Rsp!,{LR} BX LR ELSE LDMFD Rsp!,{PC} ENDIF ] ] ; Conditional assembly of AddSub ;=========================================================================== [ :DEF: mul_s :LOR: FPEWanted :LOR: FPASCWanted ; Routine to multiply or fast-multiply two internal format floating point ; numbers. It has two entry points: "MultFPE", which has an optimised fast ; track for both operands being common, and "MultFPASC", which avoids the ; test for this optimised fast track - since it should never happen. The ; second entry point lies a long way down in the source to avoid addressing ; constraints. ; The value returned is either a numeric value plus associated rounding ; information, with the uncommon bit clear, or an infinity or NaN, with the ; uncommon bit set. ; This routine will not work correctly with inputs which are unnormalised ; URD results, or with invalid internal format numbers. ; ; Uses standard dyadic operation entry and exit conventions - see top of ; this file. ASSERT RNDexp = OP2sue ;We swap over from the use of OP2sue to that ; of RNDexp partway through this routine. [ FPEWanted MultFPE CDebug3 3,"MultFPE: op1 =",OP1sue,OP1mhi,OP1mlo CDebug3 3," op2 =",OP2sue,OP2mhi,OP2mlo ; Start by detecting the "fast track" case of both operands being common. TST OP1sue,#Uncommon_bit TSTEQ OP2sue,#Uncommon_bit BNE Mult_Uncommon ; If either operand is a zero, the product is a zero. Because the numbers ; are common and assumed not to be unnormalised URD results, we can check ; for zeros by means of the units bits. ANDS Rtmp,OP1mhi,OP2mhi ASSERT EIUnits_pos = 31 BPL Mult_Zero ; Both operands may now be assumed to be normalised numbers. Produce the ; result sign and the prospective result exponent. ] [ :DEF: mul_s :LOR: FPEWanted [ FPLibWanted __fp_mult_common ] AND Rtmp,OP1sue,#ToExp_mask AND Rtmp2,OP2sue,#ToExp_mask EOR OP1sue,OP1sue,OP2sue ;Produce result sign AND OP1sue,OP1sue,#Sign_bit ADD RNDexp,Rtmp,Rtmp2 SUB RNDexp,RNDexp,#(EIExp_bias-1):AND:&FF00 SUB RNDexp,RNDexp,#(EIExp_bias-1):AND:&FF ASSERT (EIExp_bias-1) < &10000 ;Result exponent if mantissa ; overflow is exp1+exp2-bias+1 ] ; This subsidiary entry point deals with multiplying two normalised ; mantissas together and adjusting the exponent if necessary. ; Entry: OP1sue = the result's sign, with an uncommon bit of 0 - the ; remaining bits are zero; ; OP1mhi = First operand mantissa, high word; ; OP1mlo = First operand mantissa, low word; ; RNDexp = Prospective result exponent, which may be negative; this ; needs to be decremented if mantissa overflow doesn't occur; ; OP2mhi = Second operand mantissa, high word; ; OP2mlo = Second operand mantissa, low word; ; Rins = instruction (may be needed to discriminate between MUF and ; FML); ; Rwp, Rfp, Rsp hold their usual values; ; R14 = return link. ; Exit: OP1sue = the result's sign, with an uncommon bit of 0; the ; remaining bits are zero; ; OP1mhi, OP1mlo = the result's mantissa; ; RNDexp = the result exponent, which may be negative; ; Rarith holds the round bit (in bit 31) and the sticky bit (in bits ; 30:0) if the destination precision is extended; if the ; destination precision is single or double, it holds part of the ; sticky bit (the remainder of which is held in bits below the ; round bit in OP1mhi and OP1mlo); ; OP2mhi, OP2mlo, Rtmp, Rtmp2 and R14 may be corrupt; ; All other registers preserved. Mult_Mantissas ; We will split into various lines, depending on the operands: ; ; if ((OP1mlo = 0) AND (OP2mlo = 0)) ; do 32x32->64 multiplication of OP1mhi by OP2mhi; ; if ((OP1mlo = 0) AND (OP2mlo != 0)) ; do 32x64->96 multiplication of OP1mhi by (OP2mhi,OP2mlo); ; if ((OP1mlo != 0) AND (OP2mlo = 0)) ; do 64x32->96 multiplication of (OP1mhi,OP1mlo) by OP2mhi; ; if ((OP1mlo != 0) AND (OP2mlo != 0)) ; do 64x32->128 multiplication of (OP1mhi,OP1mlo) by (OP2mhi,OP2mlo); ; ; In each case, this is then followed by code to deal with the case of no ; mantissa overflow (i.e. the top bit of the product was zero) and to create ; the round and sticky bits. ; ; This is all designed to make multiplications involving single precision ; numbers, immediate constants and/or FLTed integers as efficient as ; possible. ; ; If the instruction is an FML, we simply assume that both mantissa low ; words are zero. [ FPEWanted TST Rins,#Fast_bit BNE Mult_32x32 ] TEQ OP1mlo,#0 BEQ Mult_32xX Mult_64xX TEQ OP2mlo,#0 BEQ Mult_64x32 Mult_64x64 STMFD Rsp!,{OP1sue,Rfpsr,Rins,LR} ; We do this multiplication by applying the trick (described in Knuth ; section 4.3.3) for reducing the obvious algorithm involving four 32x32 ; multiplications to just three plus some additions and sign manipulations, ; by means of the formula: ; ; (a1*2^32 + a0) * (b1*2^32 + b0) ; = a1*b1*(2^64+2^32) + (a1-a0)*(b0-b1)*2^32 + a0*b0*(2^32+1) ; ; This has to be done carefully: the a1*b1 and a0*b0 multiplications are ; straightforward 32x32 multiplications, but each of a1-a0 and b0-b1 is in ; the range -2^32+1 < x < 2^32-1. To see what effect this has, we need to ; look at what we will get if we simply do the a1-a0 and b0-b1 subtractions, ; then multiply the results as unsigned numbers: ; ; (A) If a1-a0 >= 0, b0-b1 >= 0: ; product obtained = (a1-a0)*(b0-b1) ; ; (B) If a1-a0 >= 0, b0-b1 < 0: ; product obtained = (a1-a0)*(b0-b1+2^32) ; = (a1-a0)*(b0-b1) + (a1-a0)*2^32 ; ; (C) If a1-a0 < 0, b0-b1 >= 0: ; product obtained = (a1-a0+2^32)*(b0-b1) ; = (a1-a0)*(b0-b1) + (b0-b1)*2^32 ; ; (D) If a1-a0 < 0, b0-b1 < 0: ; product obtained = (a1-a0+2^32)*(b0-b1+2^32) ; = (a1-a0)*(b0-b1) + ((a1-a0)+(b0-b1))*2^32 + 2^64 ; = (a1-a0)*(b0-b1) ; + ((a1-a0+2^32) + (b0-b1+2^32))*2^32 - 2^64 ; ; So to get the real value of (a1-a0)*(b0-b1), we must look at the signs of ; a1-a0 and b0-b1: if a1-a0 is in fact negative, we must subtract the ; calculated value of b0-b1 from the high word of the calculated product; if ; b0-b1 is in fact negative, we must subtract the calculated value of a1-a0 ; from the high word of the calculated product; and finally we must add 2^64 ; if both were negative. ; ; This last step is awkward. However, note that (a1-a0)*(b0-b1) is actually ; guaranteed to lie in the range -2^64 < x < 2^64, which means that it is ; sufficient to calculate its value modulo 2^64 (i.e. disregarding carries ; out of the high word and the possible addition of 2^64), provided we take ; care to get the sign word right. ; ; We do the 32x32 multiplications by means of standard macros. First ; multiply a1*b1 = OP1mhi*OP2mhi into (OP1sue,Rfpsr). Split16 OP1sue,Rfpsr,OP1mhi Mul64 OP1sue,Rfpsr,OP1sue,Rfpsr,OP2mhi,,,Rarith,Rtmp,Rtmp2 ; Multiply a0*b0 = OP1mlo*OP2mlo into (Rins,R14). Split16 Rins,R14,OP1mlo Mul64 Rins,R14,Rins,R14,OP2mlo,,,Rarith,Rtmp,Rtmp2 ; Next, we need to calculate a1*b1*(2^64+2^32) + a0*b0*(2^32+1) ; ; = (2^32+1) * (a1*b1*2^32 + a0*b0) ; ; Note that a1*b1*2^32 + a0*b0 <= (2^32-1)*(2^32-1)*(2^32+1) ; = (2^32-1)*(2^64-1) < 2^96 and that (2^32+1) * (a1*b1*2^32 + a0*b0) ; <= (2^32+1)*(2^32-1)*(2^32-1)*(2^32+1) = (2^64-1)^2 < 2^128, so the ; calculations can be done respectively in 3- and 4-word unsigned ; arithmetic. ADDS Rfpsr,Rfpsr,Rins ;Put a1*b1*2^32 + a0*b0 into ADC OP1sue,OP1sue,#0 ; (OP1sue,Rfpsr,R14) ADDS Rins,Rfpsr,R14 ;Then multiply by 2^32+1, putting ADCS Rfpsr,Rfpsr,OP1sue ; result in (OP1sue,Rfpsr,Rins,R14) ADC OP1sue,OP1sue,#0 ; Calculate a1-a0 = OP1mhi-OP1mlo into Rtmp, ; b0-b1 = OP2mlo-OP2mhi into Rtmp2, ; addend to high word of calculated (a1-a0)*(b0-b1) product into ; Rarith, and ; correct sign of (a1-a0)*(b0-b1) product into OP1mhi. ; The sign word is 0 for a positive or zero result, &FFFFFFFF for a negative ; result - i.e. it is the word which, when prefixed to the 64-bit product ; calculated otherwise, gives us the true result as a 96-bit signed number. ; Getting this right is slightly tricky, because of the possibilities of ; a1-a0 and b0-b1 being zero and thus invalidating the usual EOR rule about ; the sign. The key to the code below is that if Rtmp = a1-a0 comes out as ; 0, OP1mhi and OP1mlo come out as zero and Rtmp2 never gets set - but this ; last doesn't matter, since zero times anything is zero! ; Note also that we don't care about carries out of the addend, since they ; go into the sign word, which we are getting right by other means. SUBS Rtmp,OP1mhi,OP1mlo ;Rtmp := a1-a0 MOV OP1mhi,#0 ;Sign if a1-a0,b0-b1 both +ve MOV Rarith,#0 ;Addend if both +ve MVNLO OP1mhi,OP1mhi ;If a1-a0 -ve, adjust sign and SUBLO Rarith,OP2mhi,OP2mlo ; addend = -(b0-b1) = b1-b0 SUBNES Rtmp2,OP2mlo,OP2mhi ;Rtmp2 := b0-b1 MOVEQ OP1mhi,#0 ;Override sign if b0-b1 = 0 MVNLO OP1mhi,OP1mhi ;If b0-b1 -ve, adjust sign and SUBLO Rarith,Rarith,Rtmp ; addend += -(a1-a0) ; Finish calculating the real value of (a1-a0)*(b0-b1) into ; (OP1mhi,OP1mlo,Rarith). I.e. multiply Rtmp by Rtmp2, adding OP1mlo into the ; high word and putting the result in (OP1mlo,Rarith). OP1mhi is already OK. Split16 OP2mhi,OP2mlo,Rtmp Mul64 OP1mlo,Rarith,OP2mhi,OP2mlo,Rtmp2,Rarith,,Rtmp,Rtmp2,OP1mlo ; Now add a1*b1*(2^64+2^32) + a0*b0*(2^32+1) and (a1-a0)*(b0-b1)*2^32 ; together, putting the result in (OP1mhi,OP1mlo,Rarith,R14). Note the low ; word is in R14 already. ADDS Rarith,Rins,Rarith ADCS OP1mlo,Rfpsr,OP1mlo ADCS OP1mhi,OP1sue,OP1mhi ; Transfer R14 into the sticky bit, without affecting flags. Also make ; certain we don't affect the guard or round bits. ORR R14,R14,R14,LSL #2 ORR Rarith,Rarith,R14,LSR #2 ; If result is normalised, return. Otherwise normalise by shifting left one ; bit. IF Interworking :LOR: Thumbing LDMMIFD Rsp!,{OP1sue,Rfpsr,Rins,LR} BXMI LR ELSE LDMMIFD Rsp!,{OP1sue,Rfpsr,Rins,PC} ENDIF ADDS Rarith,Rarith,Rarith ADCS OP1mlo,OP1mlo,OP1mlo ADC OP1mhi,OP1mhi,OP1mhi SUB RNDexp,RNDexp,#1 IF Interworking :LOR: Thumbing LDMFD Rsp!,{OP1sue,Rfpsr,Rins,LR} BX LR ELSE LDMFD Rsp!,{OP1sue,Rfpsr,Rins,PC} ENDIF Mult_64x32 ; To perform this multiplication, we do two 32x32 multiplications, then add ; the results together. We use the standard macros for the purpose. Split16 OP2mlo,Rarith,OP2mhi Mul64 OP2mhi,OP1mhi,OP2mlo,Rarith,OP1mhi,,,Rtmp,Rtmp2,OP2mhi Mul64 OP2mlo,Rarith,OP2mlo,Rarith,OP1mlo,,,Rtmp,Rtmp2,OP1mlo ADDS OP1mlo,OP2mlo,OP1mhi ADCS OP1mhi,OP2mhi,#0 ; If the top bit was clear, we need to shift the product, round and sticky ; bits left by one bit and decrement the exponent. Otherwise, everything is ; ready for the return. IF Interworking :LOR: Thumbing BXMI LR ELSE MOVMI PC,LR ENDIF ADDS Rarith,Rarith,Rarith ADCS OP1mlo,OP1mlo,OP1mlo ADC OP1mhi,OP1mhi,OP1mhi SUB RNDexp,RNDexp,#1 IF Interworking :LOR: Thumbing BX LR ELSE MOV PC,LR ENDIF Mult_32xX TEQ OP2mlo,#0 BEQ Mult_32x32 Mult_32x64 ; To perform this multiplication, we do two 32x32 multiplications, then add ; the results together. We use the standard macros for the purpose. Split16 OP1mlo,Rarith,OP1mhi Mul64 OP1mhi,OP2mhi,OP1mlo,Rarith,OP2mhi,,,Rtmp,Rtmp2,OP1mhi Mul64 OP1mlo,Rarith,OP1mlo,Rarith,OP2mlo,,,Rtmp,Rtmp2,OP2mlo ADDS OP1mlo,OP1mlo,OP2mhi ADCS OP1mhi,OP1mhi,#0 ; If the top bit was clear, we need to shift the product, round and sticky ; bits left by one bit and decrement the exponent. Otherwise, everything is ; ready for the return. IF Interworking :LOR: Thumbing BXMI LR ELSE MOVMI PC,LR ENDIF ADDS Rarith,Rarith,Rarith ADCS OP1mlo,OP1mlo,OP1mlo ADC OP1mhi,OP1mhi,OP1mhi SUB RNDexp,RNDexp,#1 IF Interworking :LOR: Thumbing BX LR ELSE MOV PC,LR ENDIF [ FPLibWanted KEEP |$F__fp_mult_fast_common| |$F__fp_mult_fast_common| __fp_mult_fast_common ; This code duplicated from about for the fast case. AND Rtmp,OP1sue,#ToExp_mask AND Rtmp2,OP2sue,#ToExp_mask EOR OP1sue,OP1sue,OP2sue ;Produce result sign AND OP1sue,OP1sue,#Sign_bit ADD RNDexp,Rtmp,Rtmp2 SUB RNDexp,RNDexp,#(EIExp_bias-1):AND:&FF00 SUB RNDexp,RNDexp,#(EIExp_bias-1):AND:&FF ASSERT (EIExp_bias-1) < &10000 ;Result exponent if mantissa ; overflow is exp1+exp2-bias+1 ] Mult_32x32 ; Only the high words of the operand mantissas need to be multiplied ; together. Use the standard macros for this purpose. Split16 OP2mlo,Rarith,OP2mhi Mul64 OP1mhi,OP1mlo,OP2mlo,Rarith,OP1mhi,,S,Rtmp,Rtmp2,OP1mhi ; The round and sticky bits are always going to be zero. MOV Rarith,#0 ; If the top bit was clear, we need to shift the product left one bit and ; decrement the exponent. Otherwise we're done. IF Interworking :LOR: Thumbing BXMI LR ELSE MOVMI PC,LR ENDIF ADDS OP1mlo,OP1mlo,OP1mlo ADC OP1mhi,OP1mhi,OP1mhi SUB RNDexp,RNDexp,#1 IF Interworking :LOR: Thumbing BX LR ELSE MOV PC,LR ENDIF ] ; Conditional compilation of Mult ;=========================================================================== [ :DEF: div_s :LOR: FPEWanted :LOR: FPASCWanted ; Routine to divide, reverse-divide, fast-divide or fast-reverse-divide two ; internal format floating point numbers. It has two entry points: "DivFPE", ; which has an optimised fast track for both operands being common, and ; "DivFPASC", which avoids the test for this optimised fast track - since it ; should rarely happen. The second entry point lies a long way down in the ; source to avoid addressing constraints. ; The value returned is either a numeric value plus associated rounding ; information, with the uncommon bit clear, or an infinity or NaN, with the ; uncommon bit set. ; This routine will not work correctly with inputs which are unnormalised ; URD results, or with invalid internal format numbers. ; ; Uses standard dyadic operation entry and exit conventions - see top of ; this file. ASSERT RNDexp = OP2sue ;We swap over from the use of OP2sue to that ; of RNDexp partway through this routine. [ FPEWanted DivFPE CDebug3 3,"DivFPE: op1 =",OP1sue,OP1mhi,OP1mlo CDebug3 3," op2 =",OP2sue,OP2mhi,OP2mlo ; Start by detecting the "fast track" case of both operands being common. TST OP1sue,#Uncommon_bit TSTEQ OP2sue,#Uncommon_bit BNE Div_Uncommon ; If either operand is a zero, we need to take special action. Because the ; numbers are common and assumed not to be unnormalised URD results, we can ; check for zeros by means of the units bits. ANDS Rtmp,OP1mhi,OP2mhi ASSERT EIUnits_pos = 31 BPL Div_Zero ; Both operands may now be assumed to be normalised numbers. We now know ; that we are not going to need to know the operands for trap purposes, so ; we can swap them if this is a normal division rather than a reverse ; division. TST Rins,#RevDiv_bit BNE Div_Common_Swapped ] [ FPLibWanted __fp_div_common ] MOV Rtmp,OP1sue MOV OP1sue,OP2sue MOV OP2sue,Rtmp MOV Rtmp,OP1mhi MOV OP1mhi,OP2mhi MOV OP2mhi,Rtmp MOV Rtmp,OP1mlo MOV OP1mlo,OP2mlo MOV OP2mlo,Rtmp [ FPLibWanted KEEP |$F__fp_rdv_common| |$F__fp_rdv_common| __fp_rdv_common ] Div_Common_Swapped ; Produce the result sign and the prospective result exponent. AND Rtmp,OP1sue,#ToExp_mask AND Rtmp2,OP2sue,#ToExp_mask EOR OP1sue,OP1sue,OP2sue ;Produce result sign AND OP1sue,OP1sue,#Sign_bit SUB RNDexp,Rtmp2,Rtmp ADD RNDexp,RNDexp,#EIExp_bias:AND:&FF00 ADD RNDexp,RNDexp,#EIExp_bias:AND:&FF ASSERT EIExp_bias < &10000 ;Result exponent if no mantissa ; underflow is exp1-exp2+bias ; This subsidiary entry point deals with dividing a normalised mantissa by ; another and adjusting the exponent if necessary. ; Entry: OP1sue = the result's sign, with an uncommon bit of 0 - the ; remaining bits are zero; ; OP1mhi = Divisor mantissa, high word; ; OP1mlo = Divisor mantissa, low word; ; RNDexp = Prospective result exponent, which may be negative; this ; needs to be decremented if mantissa underflow occurs; ; OP2mhi = Dividend mantissa, high word; ; OP2mlo = Dividend mantissa, low word; ; Rins = instruction (needed to determine precision; may be needed ; to discriminate between normal and fast divisions); ; Rwp, Rfp, Rsp hold their usual values; ; R14 = return link. ; Exit: OP1sue = the result's sign, with an uncommon bit of 0; the ; remaining bits are zero; ; OP1mhi, OP1mlo = the result's mantissa; ; RNDexp = the result exponent, which may be negative; ; Rarith holds the round bit (in bit 31) and the sticky bit (in bits ; 30:0) if the destination precision is extended; if the ; destination precision is single or double, it holds part of the ; sticky bit (the remainder of which is held in bits below the ; round bit in OP1mhi and OP1mlo); ; OP2mhi, OP2mlo, Rtmp, Rtmp2 and R14 may be corrupt; ; All other registers preserved. Div_Mantissas STMFD Rsp!,{OP1sue,OP2sue,Rfpsr,Rins,LR} CDebug2 4,"Div_Mantissas: dividend =",OP2mhi,OP2mlo CDebug2 4," divisor =",OP1mhi,OP1mlo CDebug1 4," exponent =",RNDexp ; We will do the mantissa division by an algorithm which is a hybrid between ; Newton-Raphson approximation and ordinary long division: this results in ; division being done to IEEE accuracy, yet more than 50% faster than the ; straightforward long division technique. A summary of the algorithm is: ; ; (a) Use table look-up to get an initial approximation to the reciprocal ; of the divisor; ; ; (b) Use two iterations of Newton-Raphson to improve the reciprocal ; approximation to one with about 15 bits accuracy; ; ; (c) Do long division base 2^13, using the reciprocal approximation to ; determine the result "digits" - which are in fact fixed point ; numbers with 13 bits before the binary point and 3 after it; ; ; (d) Resolve the exact values of the last three bits by ordinary long ; division; ; ; (e) Adjust the exponent and shift the mantissa if mantissa underflow ; occurs, and create the sticky bit. ; ; Exact details of the algorithm appear in comments next to the relevant ; parts of the code below. ; ; The long division is performed for 2 steps for single precision, 4 steps ; for double precision and 5 steps for extended precision, producing 2*13+3 ; = 29, 4*13+3 = 55 and 5*13+3 = 68 bits respectively, plus a sticky bit in ; each case. ; ; Note that this algorithm has been specifically tailored to the software ; environment - e.g. the availability of 32x32->32 bit multiplication and ; the fact that negative partial remainders during the long division will ; cause problems. This leads to some apparently strange bits of code below - ; e.g. getting less accuracy from a Newton-Raphson iteration than might ; appear to be available, in order to preserve knowledge of the sign of the ; error. ; ; In what follows, we will refer to the true mathematical value of the ; dividend mantissa as P, that of the divisor as D, that of the reciprocal ; of the divisor as R and that of the quotient as Q. So Q = P/D = P*R are ; exact mathematical relationships. Also, we have P = (2^32*OP1mhi + ; OP1mlo)*2^(-63), D = (2^32*OP2mhi + OP2mlo)*2^(-63). ; First step: initialise by breaking the divisor up into 16-bit chunks, ; held in (OP1sue,Rfpsr,Rins,R14). Split16 OP1sue,Rfpsr,OP1mhi Split16 Rins,R14,OP1mlo ; Second step: use table look-up to get an approximation to R. Specifically, ; we load Rarith with an 8-bit value such that we know: ; ; R <= Rarith*2^(-7) < R + 2^(-6) [ CoreDebugging = 0 ADR Rarith,Recip_Table-128 ;-128 to cancel units bit | ADRL Rarith,Recip_Table-128 ;-128 to cancel units bit ] LDRB Rarith,[Rarith,OP1sue,LSR #8] CDebug1 5,"Table look-up approx'n is",Rarith ; Third step: use a Newton-Raphson iteration to improve this to an 11-bit ; value in Rarith such that: ; ; R < Rarith*2^(-10) < R + 2^(-9) ; ; Details: Let W be the current value of Rarith, so we have: ; ; R <= W*2^(-7) < R + 2^(-6) ; ; Let X be the first 16 bits of D (i.e. OP1sue), incremented by 1. This has ; the property that: ; ; D < X*2^(-15) <= D + 2^(-15) ; ; Suppose further that W*2^(-7) = R+e, with 0 <= e < 2^(-6), and X*2^(-15) = ; D+f, with 0 < f <= 2^(-15). ; ; Now let Y = W * (2^23 - X*W), which is a calculation that can be performed ; without overflowing a word. This is equivalent to: ; ; Y*2^(-29) = W*2^(-7) * (2 - X*2^(-15) * W*2^(-7)) ; ; = (R+e) * (2 - (D+f)*(R+e)) ; ; = (R+e) * (2 - (1 + D*e + R*f + e*f)), since D*R=1 exactly, ; ; = (R+e) * (1 - D*e - R*f - e*f) ; ; = R + e - e - D*e*e - R*R*f - R*e*f - R*e*f - e*e*f, since D*R=1, ; ; = R - D*e*e - R*R*f - 2*R*e*f - e*e*f ; ; Since R > 0, D > 0, e >= 0 and f > 0, this is clearly less than R. On the ; other hand, we know that R <= 1, D < 2, e < 2^(-6) and f <= 2^(-15). So: ; ; R > Y*2^(-29) ; > R - 2^(-11) - 2^(-15) - 2^(-20) - 2^(-27) ; ; Now let Z be Y shifted right 19 bits. This gives us: ; ; Y*2^(-29) - 2^(-10) < Z*2^(-10) <= Y*2^(-29) ; ; Combining the inequalities, we get: ; ; R - 2^(-9) < R - 2^(-11) - 2^(-15) - 2^(-20) - 2^(-27) - 2^(-10) ; < Y*2^(-29) - 2^(-10) ; < Z*2^(-10) ; <= Y*2^(-29) ; < R ; ; So if we put Rarith = Z+2, we get: ; ; R < Rarith*2^(-10) < R + 2^(-9), ; ; as desired. MLA Rtmp,OP1sue,Rarith,Rarith ;Rtmp := (X-1)*W + W = X*W RSB Rtmp,Rtmp,#1:SHL:23 ;Rtmp := 2^23 - X*W MUL Rarith,Rtmp,Rarith ;Rarith := W*(2^23 - X*W) = Y MOV Rarith,Rarith,LSR #19 ;Shift right 19 bits and add ADD Rarith,Rarith,#2 ; 2 to get new approximation CDebug1 5,"First N-R approx'n is",Rarith ; Fourth step: use a Newton-Raphson iteration to improve this to a 16-bit ; value in Rarith such that: ; ; R - 2^(-15) < Rarith*2^(-16) < R ; ; Details: Let W be the current value of Rarith, so we have: ; ; R < W*2^(-10) < R + 2^(-9) ; ; Let X be the first 19 bits of D (i.e. the top 19 bits of OP1mhi), ; incremented by 1. This has the property that: ; ; D < X*2^(-18) <= D + 2^(-18) ; ; Suppose further that W*2^(-10) = R+e, with 0 < e < 2^(-9), and X*2^(-18) ; = D+f, with 0 < f <= 2^(-18). ; ; Now let Y = W * (2^29 - X*W): part of this calculation will require 2-word ; arithmetic. This is equivalent to: ; ; Y*2^(-38) = W*2^(-10) * (2 - X*2^(-18) * W*2^(-10)) ; ; = (R+e) * (2 - (D+f)*(R+e)) ; ; = R - D*e*e - R*R*f - 2*R*e*f - e*e*f, as in the third step. ; ; Since R > 0, D > 0, e >= 0 and f > 0, this is clearly less than R. On the ; other hand, we know that R <= 1, D < 2, e < 2^(-9) and f <= 2^(-18). So: ; ; R > Y*2^(-38) ; > R - 2^(-17) - 2^(-18) - 2^(-26) - 2^(-36) ; ; Now let Z be Y shifted right 22 bits. This gives us: ; ; Y*2^(-38) - 2^(-16) < Z*2^(-16) <= Y*2^(-38) ; ; Combining the inequalities, we get: ; ; R - 2^(-15) < R - 2^(-17) - 2^(-18) - 2^(-26) - 2^(-36) - 2^(-16) ; < Y*2^(-38) - 2^(-16) ; < Z*2^(-16) ; <= Y*2^(-38) ; < R ; ; So if we put Rarith = Z, we get the desired inequality. MOV Rtmp,OP1mhi,LSR #13 ;Rtmp := X-1 MLA Rtmp2,Rtmp,Rarith,Rarith ;Rtmp2 := (X-1)*W + W = X*W RSB Rtmp2,Rtmp2,#1:SHL:29 ;Rtmp2 := 2^29 - X*W Split16 Rtmp,Rtmp2,Rtmp2 ;Rtmp/Rtmp2 := top/bottom half MUL OP1mlo,Rtmp2,Rarith ;OP1mhi, OP1mlo := two MUL OP1mhi,Rtmp,Rarith ; parts of product with W ADD Rarith,OP1mhi,OP1mlo,LSR #16 ;Rarith := Y >> 16 MOV Rarith,Rarith,LSR #6 ;Rarith := Y >> 22 CDebug1 5,"Second N-R approx'n is",Rarith ; Fifth step: initialise the partial remainder - its binary point lies to ; the right of bit 30 of its top word to line up well with the results of ; later multiplications. MOVS OP2mhi,OP2mhi,LSR #1 MOVS OP2mlo,OP2mlo,RRX MOVCC OP2sue,#0 MOVCS OP2sue,#TopBit ; Sixth step: do the first iteration of the long division process. The ; register allocation during this is: ; ; OP1sue, Rfpsr, Rins, R14: Divisor, in 16-bit chunks; its binary point is ; considered to lie to the right of bit 15 of ; OP1sue; ; OP1mhi, OP1mlo: Quotient so far (Rarith joins into this near the ; end of the calculation); its binary point is ; considered to lie to the right of bit 31 of ; OP1mhi; ; OP2mhi, OP2mlo, OP2sue: Partial remainder; its binary point is ; considered to lie to the right of bit 30 of ; OP2mhi; ; Rarith: 16-bit reciprocal approximation, until near the ; end of the calculation; its binary point lies to ; the *left* of bit 15; ; Rtmp, Rtmp2: Temporaries. ; ; Some of these registers (OP1mhi and OP1mlo) only become set some way into ; the calculation: until they do become set, they should be regarded as ; being 0. ; ; The details of iteration N (for N=0 to 4) of the long division process ; are: ; ; Let D be the divisor represented by (OP1sue,Rfpsr,Rins,R14), and let R = ; 1/D be its reciprocal. Let A be the reciprocal approximation represented ; by Rarith from now until near the end of the calculation - i.e. A = ; Rarith*2^(-16). We know that: ; ; 1 <= D < 2; ; 0.5 < R <= 1; ; R-2^(-15) < A < R ; ; Let Q[N] be the quotient represented by those of OP1mhi, OP1mlo and Rarith ; that have become set at the end of iteration N-1/start of iteration N - ; i.e.: ; ; Q[0] = 0; ; Q[1],Q[2] = (OP1mhi at appropriate time) * 2^(-31); ; Q[3],Q[4] = (OP1mhi at appropriate time) * 2^(-31) ; + (OP1mlo at appropriate time) * 2^(-63); ; Q[5] = (OP1mhi at appropriate time) * 2^(-31) ; + (OP1mlo at appropriate time) * 2^(-63) ; + (Rarith at appropriate time) * 2^(-95); ; ; Let P[N] be the partial remainder represented by those of OP2mhi, OP2mlo ; and OP2sue that have become set at the end of iteration N-1/start of ; iteration N - i.e.: ; ; P[i] = (OP2mhi at appropriate time) * 2^(-30) ; + (OP2mlo at appropriate time) * 2^(-62) ; + (OP2sue at appropriate time) * 2^(-94); ; ; Finally, let P be the original dividend - i.e. P is the current value of ; OP2mhi*2^(-31) + OP2mlo*2^(-63). ; ; For i=0, we can clearly make the following three statements: ; ; (a) Q[i] is a multiple of 2^(-13*i-2); ; ; (b) P[i] is a multiple of 2^(-65); ; ; (c) P = Q[i]*D + P[i]*2^(-13*i); ; ; (d) 0 < P[i] < 2; ; ; since Q[0] = 0 and P[0] = P. The algorithm will result in the same ; statements being true for i = 1, 2, 3, 4 and 5 as well. ; ; Iteration i of the algorithm is: ; ; Papprox = P[i], rounded down to a multiple of 2^(-15); ; digit = Papprox * A, rounded down to a multiple of 2^(-15); ; P[i+1] = (P[i] - digit*D) * 2^13 ; Q[i+1] = Q[i] + digit*2^(-13*i) ; ; Proof that the three statements above are true for all i: we will do this ; by induction. We already know that they are true for i=0. So suppose they ; are true for i=N. Then: ; ; (a) Q[i+1] = Q[i] + digit*2^(-13*i) ; = (multiple of 2^(-13*i-2)) + (multiple of 2^(-15))*2^(-13*i) ; = multiple of 2^(-13*i-15) ; = multiple of 2^(-13*(i+1)-2). ; ; (b) P[i+1] = (P[i] - digit*D) * 2^13 ; = 2^13 * (multiple of 2^(-65) ; - (multiple of 2^(-15)) * (multiple of 2^(-63))) ; = multiple of 2^(-65). ; ; (c) P = Q[i]*D + P[i]*2^(-13*i) ; = (Q[i+1] - digit*2^(-13*i)) * D ; + (P[i+1]*2^(-13) + digit*D) * 2^(-13*i) ; = Q[i+1]*D + P[i+1]*2^(-13*i-13) ; = Q[i+1]*D + P[i+1]*2^(-13*(i+1)). ; ; (d) First, since Papprox = P[i] rounded down to a multiple of 2^(-15) and ; R-2^(-15) < A < R, we have Papprox = P[i]-e and A = R-f, where 0 <= e ; < 2^(-15) and 0 < f < 2^(-15). Then, since digit = Papprox * A rounded ; down to a multiple of 2^(-15), we have digit = Papprox * A - g, where ; 0 <= g < 2^(-15). Putting these together, we have: ; ; digit = (P[i]-e)*(R-f) - g ; = P[i]*R - P[i]*f - e*R + e*f - g ; ; Since everything is non-negative, 'digit' is clearly at most P[i]*R. ; Conversely, since P[i] < 2, R <= 1, e < 2^(-15), f < 2^(-15) and g < ; 2^(-15), we have: ; ; P[i]*R > digit ; > P[i]*R - 2*2^(-15) - 2^(-15)*1 - 2^(-15) ; = P[i]*R - 2^(-13) ; ; Or: ; ; 0 < P[i]*R - digit < 2^(-13) ; ; Multiplying by D, which is known to satisfy 1 <= D < 2: ; ; 0 < P[i] - digit*D < 2^(-12) ; ; Multiplying by 2^(13): ; ; 0 < P[i+1] < 2 ; ; Notes: ; ; (1) The subtraction to create P[i] is done by subtracting the four 16x16 ; products formed from the digit and the 16-bit chunks of the divisor ; from the partial remainder. Two of these 32-bit products are aligned ; with the partial remainder and thus don't cause any problems. The ; other two are both mis-aligned by 16 bits. One way to subtract them ; would be to do a double word shift on them and subtract the results ; from the partial remainder: this takes 2 instructions to form the ; central shifted word and 3 for the subtraction (two of which are ; "shift and subtracts"). However, this makes use of one register more ; than we have. So the code below makes use of a trick, based on the ; fact that if we subtract the top 16 bits and the bottom 16 bits of the ; central shifted word separately, only one of the subtractions can ; cause a borrow. So if we've got a borrow after the first one, we do ; the second one without setting the condition codes, knowing that it ; won't cause a borrow; if we don't, we set the condition codes on the ; result of the second subtraction. ; ; (2) The multiplication operands are generally ordered to maximise the ; chance of early termination. This means that all but the top chunk of ; the divisor are good second operands to the multiplication, the digit ; is next best, and the top chunk of the divisor is the least good. ; ; (3) The above is in fact not exactly true, due to the fact that it saves ; some cycles not to shift P[1] and P[3] left by 13 bits, but to wait ; until P[2] and P[4] are generated, then shift them left 26 bits. MOV Rtmp,OP2mhi,LSR #15 ;Rtmp := Papprox MUL Rtmp2,Rarith,Rtmp ;Rtmp2 := Papprox * A MOV Rtmp2,Rtmp2,LSR #16 ;Rtmp2 := digit MUL Rtmp,Rtmp2,Rins ;Subtract digit*D from P[0] to SUBS OP2mlo,OP2mlo,Rtmp ; form P[1]*2^(-13) - this requires MUL Rtmp,OP1sue,Rtmp2 ; 4 multiplications and subtractions SBC OP2mhi,OP2mhi,Rtmp ; at various alignments MUL Rtmp,Rtmp2,R14 SUBS OP2sue,OP2sue,Rtmp,LSL #16 SBCS OP2mlo,OP2mlo,Rtmp,LSR #16 MUL Rtmp,Rtmp2,Rfpsr SUBCC OP2mlo,OP2mlo,Rtmp,LSL #16 ;Already got a borrow SUBCSS OP2mlo,OP2mlo,Rtmp,LSL #16 ;No borrow yet - try for one SBC OP2mhi,OP2mhi,Rtmp,LSR #16 MOV OP1mhi,Rtmp2,LSL #16 ;OP1mhi := Q[1] CDebug1 5,"1st iter'n: quotient so far =",OP1mhi CDebug3 5," partial remainder =",OP2mhi,OP2mlo,OP2sue ; Seventh step: second iteration. At the end of this step, we check whether ; the multiplication is single precision and branch out to termination code ; if so. MOV Rtmp,OP2mhi,LSR #2 ;Rtmp := Papprox MUL Rtmp2,Rarith,Rtmp ;Rtmp2 := Papprox * A MOV Rtmp2,Rtmp2,LSR #16 ;Rtmp2 := digit MUL Rtmp,Rtmp2,Rins ;Subtract digit*D from P[1]*2^(-13) SUBS OP2sue,OP2sue,Rtmp,LSL #19 ; to form P[2]*2^(-26) - this SBCS OP2mlo,OP2mlo,Rtmp,LSR #13 ; requires 4 multiplications and MUL Rtmp,OP1sue,Rtmp2 ; subtractions at various alignments SUBCC OP2mlo,OP2mlo,Rtmp,LSL #19 ;Already got a borrow SUBCSS OP2mlo,OP2mlo,Rtmp,LSL #19 ;No borrow yet - try for one SBC OP2mhi,OP2mhi,Rtmp,LSR #13 MUL Rtmp,Rtmp2,R14 SUBS OP2sue,OP2sue,Rtmp,LSL #3 SBCS OP2mlo,OP2mlo,Rtmp,LSR #29 MUL Rtmp,Rtmp2,Rfpsr SUBCC OP2mlo,OP2mlo,Rtmp,LSL #3 ;Already got a borrow SUBCSS OP2mlo,OP2mlo,Rtmp,LSL #3 ;No borrow yet - try for one SBC OP2mhi,OP2mhi,Rtmp,LSR #29 MOV OP2mhi,OP2mhi,LSL #26 ;Shift by 26 bits to form P[2] ORR OP2mhi,OP2mhi,OP2mlo,LSR #6 MOV OP2mlo,OP2mlo,LSL #26 ORR OP2mlo,OP2mlo,OP2sue,LSR #6 MOV OP2sue,OP2sue,LSL #26 ADD OP1mhi,OP1mhi,Rtmp2,LSL #3 ;OP1mhi := Q[2] CDebug1 5,"2nd iter'n: quotient so far =",OP1mhi CDebug3 5," partial remainder =",OP2mhi,OP2mlo,OP2sue LDR Rtmp,[Rsp,#12] ;Recover instruction [ FPEWanted :LOR: FPASCWanted TST Rtmp,#Pr1_mask ;Check for single precision TSTEQ Rtmp,#Pr2_mask BEQ Div_Single | TST Rtmp,#Single_mask ;Use a simpler encoding BNE Div_Single ] ; Eighth step: third iteration. MOV Rtmp,OP2mhi,LSR #15 ;Rtmp := Papprox MUL Rtmp2,Rarith,Rtmp ;Rtmp2 := Papprox * A MOV Rtmp2,Rtmp2,LSR #16 ;Rtmp2 := digit MUL Rtmp,Rtmp2,Rins ;Subtract digit*D from P[2] to SUBS OP2mlo,OP2mlo,Rtmp ; form P[3]*2^(-13) - this requires MUL Rtmp,OP1sue,Rtmp2 ; 4 multiplications and subtractions SBC OP2mhi,OP2mhi,Rtmp ; at various alignments MUL Rtmp,Rtmp2,R14 SUBS OP2sue,OP2sue,Rtmp,LSL #16 SBCS OP2mlo,OP2mlo,Rtmp,LSR #16 MUL Rtmp,Rtmp2,Rfpsr SUBCC OP2mlo,OP2mlo,Rtmp,LSL #16 ;Already got a borrow SUBCSS OP2mlo,OP2mlo,Rtmp,LSL #16 ;No borrow yet - try for one SBC OP2mhi,OP2mhi,Rtmp,LSR #16 MOV OP1mlo,Rtmp2,LSL #22 ;(OP1mhi,OP1mlo) := Q[3] ADD OP1mhi,OP1mhi,Rtmp2,LSR #10 CDebug2 5,"3rd iter'n: quotient so far =",OP1mhi,OP1mlo CDebug3 5," partial remainder =",OP2mhi,OP2mlo,OP2sue ; Ninth step: fourth iteration. At the end of this step, we check whether ; the multiplication is double precision and branch out to termination code ; if so. MOV Rtmp,OP2mhi,LSR #2 ;Rtmp := Papprox MUL Rtmp2,Rarith,Rtmp ;Rtmp2 := Papprox * A MOV Rtmp2,Rtmp2,LSR #16 ;Rtmp2 := digit MUL Rtmp,Rtmp2,Rins ;Subtract digit*D from P[3]*2^(-13) SUBS OP2sue,OP2sue,Rtmp,LSL #19 ; to form P[4]*2^(-26) - this SBCS OP2mlo,OP2mlo,Rtmp,LSR #13 ; requires 4 multiplications and MUL Rtmp,OP1sue,Rtmp2 ; subtractions at various alignments SUBCC OP2mlo,OP2mlo,Rtmp,LSL #19 ;Already got a borrow SUBCSS OP2mlo,OP2mlo,Rtmp,LSL #19 ;No borrow yet - try for one SBC OP2mhi,OP2mhi,Rtmp,LSR #13 MUL Rtmp,Rtmp2,R14 SUBS OP2sue,OP2sue,Rtmp,LSL #3 SBCS OP2mlo,OP2mlo,Rtmp,LSR #29 MUL Rtmp,Rtmp2,Rfpsr SUBCC OP2mlo,OP2mlo,Rtmp,LSL #3 ;Already got a borrow SUBCSS OP2mlo,OP2mlo,Rtmp,LSL #3 ;No borrow yet - try for one SBC OP2mhi,OP2mhi,Rtmp,LSR #29 MOV OP2mhi,OP2mhi,LSL #26 ;Shift by 26 bits to form P[4] ORR OP2mhi,OP2mhi,OP2mlo,LSR #6 MOV OP2mlo,OP2mlo,LSL #26 ORR OP2mlo,OP2mlo,OP2sue,LSR #6 MOV OP2sue,OP2sue,LSL #26 ADDS OP1mlo,OP1mlo,Rtmp2,LSL #9 ;(OP1mhi,OP1mlo) := Q[4] ADC OP1mhi,OP1mhi,#0 CDebug2 5,"4th iter'n: quotient so far =",OP1mhi,OP1mlo CDebug3 5," partial remainder =",OP2mhi,OP2mlo,OP2sue LDR Rtmp,[Rsp,#12] ;Recover instruction [ FPEWanted :LOR: FPASCWanted TST Rtmp,#Pr1_mask ;Check for double precision BEQ Div_Double | TST Rtmp,#Double_mask BNE Div_Double ] ; Tenth step: fifth iteration. We can enter the extended precision ; termination code at the end of this iteration, since we know it must be an ; extended precision division. MOV Rtmp,OP2mhi,LSR #15 ;Rtmp := Papprox MUL Rtmp2,Rarith,Rtmp ;Rtmp2 := Papprox * A MOV Rtmp2,Rtmp2,LSR #16 ;Rtmp2 := digit MUL Rtmp,Rtmp2,Rins ;Subtract digit*D from P[4] to SUBS OP2mlo,OP2mlo,Rtmp ; form P[5]*2^(-13) - this requires MUL Rtmp,OP1sue,Rtmp2 ; 4 multiplications and subtractions SBC OP2mhi,OP2mhi,Rtmp ; at various alignments MUL Rtmp,Rtmp2,R14 SUBS OP2sue,OP2sue,Rtmp,LSL #16 SBCS OP2mlo,OP2mlo,Rtmp,LSR #16 MUL Rtmp,Rtmp2,Rfpsr SUBCC OP2mlo,OP2mlo,Rtmp,LSL #16 ;Already got a borrow SUBCSS OP2mlo,OP2mlo,Rtmp,LSL #16 ;No borrow yet - try for one SBC OP2mhi,OP2mhi,Rtmp,LSR #16 MOV OP2mhi,OP2mhi,LSL #14 ;Shift by 14 bits to form 2*P[5] ORR OP2mhi,OP2mhi,OP2mlo,LSR #18 MOV OP2mlo,OP2mlo,LSL #14 ORR OP2mlo,OP2mlo,OP2sue,LSR #18 MOV OP2sue,OP2sue,LSL #14 MOV Rarith,Rtmp2,LSL #28 ;(OP1mhi,OP1mlo,Rarith) := Q[5] ADDS OP1mlo,OP1mlo,Rtmp2,LSR #4 ADC OP1mhi,OP1mhi,#0 CDebug3 5,"5th iter'n: quotient so far =",OP1mhi,OP1mlo,Rarith CDebug3 5," partial remainder =",OP2mhi,OP2mlo,OP2sue Div_Extended ; We've completed the main work for an extended precision division. We've ; now got the divisor D in (OP1sue,Rfpsr,Rins,R14), the quotient Q[5] in ; (OP1mhi,OP1mlo,Rarith) and twice the partial remainder P[5] in ; (OP2mhi,OP2mlo,OP2sue) such that: ; ; (a) Q[5] is a multiple of 2^(-67); ; ; (b) P[5] is a multiple of 2^(-65); ; ; (c) P = Q[5]*D + P[5]*2^(-65); ; ; (d) 0 < P[5] < 2; ; ; The main problem with this is that P[5]*2^(-65) may be almost 2^(-64), ; while Q[5] is a multiple of 2^(-67). To know the correct IEEE answer, we ; have to make the partial remainder be less than the "quantum" in the ; quotient - i.e. less than 2^(-67) in this case. Without doing this, we ; can't calculate the sticky bit accurately: we know that a non-zero partial ; remainder at this point represents a string of quotient bits which are not ; all zero, but if they overlap the quotient bits we've already calculated, ; we don't know whether adding the bits together in the area of overlap ; would result in a string of all zero bits and thus a sticky bit of 0. ; ; We deal with this by doing three bits worth of ordinary long division. To ; save on multi-word additions and problems about carry flag use, we put the ; bits calculated into R14 and only add them into the quotient once at the ; end. ; ; Note that generating twice P[5] above with the binary point to the right ; of bit 30 of OP2mhi is equivalent to generating P[5] with the binary point ; to the right of bit 31 - i.e. to generating it in the position we want it ; to be for the code that follows. This is a trick we only use for extended ; precision, since for the other precisions, we need to be ready for another ; iteration of the algorithm above as well as for the termination code. ORR OP1sue,Rfpsr,OP1sue,LSL #16 ;Reform divisor ORR Rfpsr,R14,Rins,LSL #16 MOV R14,#0 ;Initialise extra bits SUBS Rtmp2,OP2mlo,Rfpsr ;First extra bit: trial subtraction SBCS Rtmp,OP2mhi,OP1sue ; of divisor from partial remainder MOVCS OP2mlo,Rtmp2 ;If bit is 1, really do subtraction MOVCS OP2mhi,Rtmp ADC R14,R14,R14 ;Accumulate bit MOV Rins,#0 ;Initialise overflow word ADDS OP2sue,OP2sue,OP2sue ;Second extra bit: shift partial ADCS OP2mlo,OP2mlo,OP2mlo ; remainder ADCS OP2mhi,OP2mhi,OP2mhi ADC Rins,Rins,Rins SUBS Rtmp2,OP2mlo,Rfpsr ;Trial subtraction of divisor SBCS Rtmp,OP2mhi,OP1sue ; from partial remainder SBCS Rins,Rins,#0 MOVCS OP2mlo,Rtmp2 ;If bit is 1, really do subtraction MOVCS OP2mhi,Rtmp ADC R14,R14,R14 ;Accumulate bit MOV Rins,#0 ;Initialise overflow word ADDS OP2sue,OP2sue,OP2sue ;Third extra bit: shift partial ADCS OP2mlo,OP2mlo,OP2mlo ; remainder ADCS OP2mhi,OP2mhi,OP2mhi ADC Rins,Rins,Rins SUBS Rtmp2,OP2mlo,Rfpsr ;Trial subtraction of divisor SBCS Rtmp,OP2mhi,OP1sue ; from partial remainder SBCS Rins,Rins,#0 MOVCS OP2mlo,Rtmp2 ;If bit is 1, really do subtraction MOVCS OP2mhi,Rtmp ADC R14,R14,R14 ;Accumulate bit CDebug1 5,"Extra bits to add in are",R14 ; (OP1mhi,OP1mlo,Rarith) now contains 68 bits of quotient, R14 three extra ; bits that need to be added into its low end and (OP2mhi,OP2mlo) the final ; partial remainder. (We've shifted all the extra bits out of OP2sue, and the ; overflow word Rins must be zero at this point.) ; This is enough bits to provide guard and round bits, plus 2 bits ; contributing to the sticky bit and enough information to complete ; generating it. We will finish generating it by setting bit 0 of Rarith if ; the partial remainder is non-zero. ORRS Rtmp,OP2mhi,OP2mlo ORRNE Rarith,Rarith,#1 ; Now add the three extra bits into the quotient and test for mantissa ; underflow. ADDS Rarith,Rarith,R14,LSL #28 ;Add extra bits into quotient ADCS OP1mlo,OP1mlo,#0 ADCS OP1mhi,OP1mhi,#0 ; If no mantissa underflow, we're ready to return. Otherwise, we must ; recover the spilled registers (to get hold of the result exponent), shift ; the mantissa left one bit, decrement the exponent and return. IF Interworking :LOR: Thumbing LDMMIFD Rsp!,{OP1sue,OP2sue,Rfpsr,Rins,LR} BXMI LR ELSE LDMMIFD Rsp!,{OP1sue,OP2sue,Rfpsr,Rins,PC} ENDIF LDMFD Rsp!,{OP1sue,OP2sue,Rfpsr,Rins,LR} ADDS Rarith,Rarith,Rarith ADCS OP1mlo,OP1mlo,OP1mlo ADC OP1mhi,OP1mhi,OP1mhi SUB OP2sue,OP2sue,#1 IF Interworking :LOR: Thumbing BX LR ELSE MOV PC,LR ENDIF Div_Double ; We've completed the main work for a double precision division. We've now ; got the divisor D in (OP1sue,Rfpsr,Rins,R14), the quotient Q[4] in ; (OP1mhi,OP1mlo,Rarith) and the partial remainder P[4] in ; (OP2mhi,OP2mlo,OP2sue) such that: ; ; (a) Q[4] is a multiple of 2^(-54); ; ; (b) P[4] is a multiple of 2^(-65); ; ; (c) P = Q[4]*D + P[4]*2^(-52); ; ; (d) 0 < P[4] < 2; ; ; The main problem with this is that P[4]*2^(-52) may be almost 2^(-51), ; while Q[4] is a multiple of 2^(-54). To know the correct IEEE answer, we ; have to make the partial remainder be less than the "quantum" in the ; quotient - i.e. less than 2^(-54) in this case. Without doing this, we ; can't calculate the sticky bit accurately: we know that a non-zero partial ; remainder at this point represents a string of quotient bits which are not ; all zero, but if they overlap the quotient bits we've already calculated, ; we don't know whether adding the bits together in the area of overlap ; would result in a string of all zero bits and thus a sticky bit of 0. ; ; We deal with this by doing three bits worth of ordinary long division. To ; save on multi-word additions and problems about carry flag use, we put the ; bits calculated into R14 and only add them into the quotient once at the ; end. ORR OP1sue,Rfpsr,OP1sue,LSL #16 ;Reform divisor ORR Rfpsr,R14,Rins,LSL #16 MOV R14,#0 ;Initialise extra bits ADDS OP2sue,OP2sue,OP2sue ;First extra bit: shift partial ADCS OP2mlo,OP2mlo,OP2mlo ; remainder ADC OP2mhi,OP2mhi,OP2mhi SUBS Rtmp2,OP2mlo,Rfpsr ;Trial subtraction of divisor from SBCS Rtmp,OP2mhi,OP1sue ; partial remainder MOVCS OP2mlo,Rtmp2 ;If bit is 1, really do subtraction MOVCS OP2mhi,Rtmp ADC R14,R14,R14 ;Accumulate bit MOV Rins,#0 ;Initialise overflow word ADDS OP2sue,OP2sue,OP2sue ;Second extra bit: shift partial ADCS OP2mlo,OP2mlo,OP2mlo ; remainder ADCS OP2mhi,OP2mhi,OP2mhi ADC Rins,Rins,Rins SUBS Rtmp2,OP2mlo,Rfpsr ;Trial subtraction of divisor SBCS Rtmp,OP2mhi,OP1sue ; from partial remainder SBCS Rins,Rins,#0 MOVCS OP2mlo,Rtmp2 ;If bit is 1, really do subtraction MOVCS OP2mhi,Rtmp ADC R14,R14,R14 ;Accumulate bit MOV Rins,#0 ;Initialise overflow word ADDS OP2sue,OP2sue,OP2sue ;Third extra bit: shift partial ADCS OP2mlo,OP2mlo,OP2mlo ; remainder ADCS OP2mhi,OP2mhi,OP2mhi ADC Rins,Rins,Rins SUBS Rtmp2,OP2mlo,Rfpsr ;Trial subtraction of divisor SBCS Rtmp,OP2mhi,OP1sue ; from partial remainder SBCS Rins,Rins,#0 MOVCS OP2mlo,Rtmp2 ;If bit is 1, really do subtraction MOVCS OP2mhi,Rtmp ADC R14,R14,R14 ;Accumulate bit CDebug1 5,"Extra bits to add in are",R14 ; (OP1mhi,OP1mlo) now contains 55 bits of quotient, R14 three extra bits ; that need to be added into its low end and (OP2mhi,OP2mlo) the final ; partial remainder. (We've shifted all the extra bits out of OP2sue, and ; the overflow word Rins must be zero at this point.) ; This is enough bits to provide guard and round bits, plus enough ; information to generate the sticky bit. We do this by setting Rarith to ; zero if the partial remainder is zero, non-zero if the partial remainder ; is non-zero. Note that since we know rounding will take place to double ; precision, we don't mind having the sticky bit overflow into the extended ; precision round bit. ORR Rarith,OP2mhi,OP2mlo ; Now add the three extra bits into the quotient and test for mantissa ; underflow. ADDS OP1mlo,OP1mlo,R14,LSL #9 ;Add extra bits into quotient ADCS OP1mhi,OP1mhi,#0 ; If no mantissa underflow, we're ready to return. Otherwise, we must ; recover the spilled registers (to get hold of the result exponent), shift ; the mantissa left one bit, decrement the exponent and return. IF Interworking :LOR: Thumbing LDMMIFD Rsp!,{OP1sue,OP2sue,Rfpsr,Rins,LR} BXMI LR ELSE LDMMIFD Rsp!,{OP1sue,OP2sue,Rfpsr,Rins,PC} ENDIF LDMFD Rsp!,{OP1sue,OP2sue,Rfpsr,Rins,LR} ADDS OP1mlo,OP1mlo,OP1mlo ADC OP1mhi,OP1mhi,OP1mhi SUB OP2sue,OP2sue,#1 IF Interworking :LOR: Thumbing BX LR ELSE MOV PC,LR ENDIF Div_Single ; We've completed the main work for a single precision division. We've now ; got the divisor D in (OP1sue,Rfpsr,Rins,R14), the quotient Q[2] in ; (OP1mhi,OP1mlo,Rarith) and the partial remainder P[2] in ; (OP2mhi,OP2mlo,OP2sue) such that: ; ; (a) Q[2] is a multiple of 2^(-28); ; ; (b) P[2] is a multiple of 2^(-65); ; ; (c) P = Q[2]*D + P[2]*2^(-26); ; ; (d) 0 < P[2] < 2; ; ; The main problem with this is that P[2]*2^(-26) may be almost 2^(-25), ; while Q[2] is a multiple of 2^(-28). To know the correct IEEE answer, we ; have to make the partial remainder be less than the "quantum" in the ; quotient - i.e. less than 2^(-28) in this case. Without doing this, we ; can't calculate the sticky bit accurately: we know that a non-zero partial ; remainder at this point represents a string of quotient bits which are not ; all zero, but if they overlap the quotient bits we've already calculated, ; we don't know whether adding the bits together in the area of overlap ; would result in a string of all zero bits and thus a sticky bit of 0. ; ; We deal with this by doing three bits worth of ordinary long division. ORR OP1sue,Rfpsr,OP1sue,LSL #16 ;Reform divisor ORR Rfpsr,R14,Rins,LSL #16 ADDS OP2sue,OP2sue,OP2sue ;First extra bit: shift partial ADCS OP2mlo,OP2mlo,OP2mlo ; remainder ADC OP2mhi,OP2mhi,OP2mhi SUBS Rtmp2,OP2mlo,Rfpsr ;Trial subtraction of divisor from SBCS Rtmp,OP2mhi,OP1sue ; partial remainder MOVCS OP2mlo,Rtmp2 ;If bit is 1, really do subtraction MOVCS OP2mhi,Rtmp ADDCS OP1mhi,OP1mhi,#1:SHL:5 ;Add bit to quotient MOV Rins,#0 ;Initialise overflow word ADDS OP2sue,OP2sue,OP2sue ;Second extra bit: shift partial ADCS OP2mlo,OP2mlo,OP2mlo ; remainder ADCS OP2mhi,OP2mhi,OP2mhi ADC Rins,Rins,Rins SUBS Rtmp2,OP2mlo,Rfpsr ;Trial subtraction of divisor SBCS Rtmp,OP2mhi,OP1sue ; from partial remainder SBCS Rins,Rins,#0 MOVCS OP2mlo,Rtmp2 ;If bit is 1, really do subtraction MOVCS OP2mhi,Rtmp ADDCS OP1mhi,OP1mhi,#1:SHL:4 ;Add bit to quotient MOV Rins,#0 ;Initialise overflow word ADDS OP2sue,OP2sue,OP2sue ;Third extra bit: shift partial ADCS OP2mlo,OP2mlo,OP2mlo ; remainder ADCS OP2mhi,OP2mhi,OP2mhi ADC Rins,Rins,Rins SUBS Rtmp2,OP2mlo,Rfpsr ;Trial subtraction of divisor SBCS Rtmp,OP2mhi,OP1sue ; from partial remainder SBCS Rins,Rins,#0 MOVCS OP2mlo,Rtmp2 ;If bit is 1, really do subtraction MOVCS OP2mhi,Rtmp ADDCS OP1mhi,OP1mhi,#1:SHL:3 ;Add bit to quotient CDebug1 5,"Quotient after adding in extra bits is",R14 ; (OP1mhi,OP1mlo,Rarith) now contains 29 bits of quotient and (OP2mhi,OP2mlo) ; the final partial remainder. (We've shifted all the extra bits out of ; OP2sue, and the overflow word Rins must be zero at this point.) ; This is enough bits to provide guard and round bits, plus 3 bits ; contributing to the sticky bit and enough information to complete ; generating it. We will finish generating it by setting Rarith to zero if ; the partial remainder zero, non-zero if the partial remainder is non-zero. ; We must also set the low word of the result mantissa to 0. ORR Rarith,OP2mhi,OP2mlo MOV OP1mlo,#0 ; Now test for mantissa underflow. If no mantissa underflow, we're ready to ; return. Otherwise, we must recover the spilled registers (to get hold of ; the result exponent), shift the mantissa left one bit, decrement the ; exponent and return. TEQ OP1mhi,#0 IF Interworking :LOR: Thumbing LDMMIFD Rsp!,{OP1sue,OP2sue,Rfpsr,Rins,LR} BXMI LR ELSE LDMMIFD Rsp!,{OP1sue,OP2sue,Rfpsr,Rins,PC} ENDIF LDMFD Rsp!,{OP1sue,OP2sue,Rfpsr,Rins,LR} MOV OP1mhi,OP1mhi,LSL #1 SUB OP2sue,OP2sue,#1 IF Interworking :LOR: Thumbing BX LR ELSE MOV PC,LR ENDIF ;=========================================================================== ; Reciprocal approximation table ; ------------------------------ ; ; This table contains 128 entries, indexed by the first 7 fractional bits of ; a normalised divisor mantissa D. The value Rapprox obtained has the ; property that: ; ; 1/D <= Rapprox*2^(-7) < 1/D + 2^(-6) ; ; In fact, entry N in the table is calculated by the formula: ; ; Entry(N) = 2^14 divided by (128+N), rounded up to an integer. ; ; Proof that this is correct: if the first 7 fractional bits of D are N, we ; know that: ; ; (128+N)*2^(-7) <= D < (129+N)*2^(-7) ; ; This gives us: ; (2^7)/(129+N) < 1/D <= (2^7)/(128+N) ; ; Next, we have: ; 1/(128+N) - 1/(129+N) = 1/((128+N)*(129+N)) ; < 1/(128*128) ; = 2^(-14) ; ; Multiplying by 2^7 and rearranging: ; (2^7)/(128+N) - 2^(-7) < (2^7)/(129+N) ; ; So: ; (2^7)/(128+N) - 2^(-7) < 1/D <= (2^7)/(128+N) ; ; Or: ; 1/D <= (2^7)/(128+N) < 1/D + 2^(-7) ; ; If we round (2^7)/(128+N) up to a multiple of 2^(-7), we increase it by ; less than 2^(-7), giving us: ; ; 1/D <= (2^7)/(128+N) rounded up to a multiple of 2^(-7) < 1/D + 2^(-64) ; ; But (2^7)/(128+N) rounded up to a multiple of 2^(-7) is Entry(N)*2^(-7), ; giving us the desired property. Recip_Table BytesStart GBLA Rec_tmp Rec_tmp SETA 0 WHILE Rec_tmp < 128 DCB (16384+127+Rec_tmp)/(128+Rec_tmp) Rec_tmp SETA Rec_tmp+1 WEND BytesEnd ] ; Conditional assembly of Div ;=========================================================================== [ :DEF: fmod_s :LOR: FPEWanted :LOR: FPASCWanted ; Routine to perform the IEEE remainder function. It has the usual two ; labels on its entry point. ; The value returned is either a numeric value plus associated rounding ; information, with the uncommon bit clear, or an infinity or NaN, with the ; uncommon bit set. ; This routine will not work correctly with inputs which are unnormalised ; URD results, or with invalid internal format numbers. ; ; Uses standard dyadic operation entry and exit conventions - see top of ; this file. ASSERT RNDexp = OP2sue ;We swap over from the use of OP2sue to that ; of RNDexp partway through this routine. [ FPEWanted RemFPE ] [ FPASCWanted RemFPASC ] CDebug3 3,"RemFPASC/FPE: op1 =",OP1sue,OP1mhi,OP1mlo CDebug3 3," op2 =",OP2sue,OP2mhi,OP2mlo [ FPEWanted :LOR: FPASCWanted ; Start by detecting the "fast track" case of both operands being common. TST OP1sue,#Uncommon_bit TSTEQ OP2sue,#Uncommon_bit BNE Rem_Uncommon ; If the second operand is a zero, we've got an invalid operation. ; Otherwise, if the first operand is a zero, the result is equal to the ; first operand. ORRS Rarith,OP2mhi,OP2mlo MOVEQ Rtmp,#InvReas_XRem0 BEQ InvalidOperation2ForSDE ORRS Rarith,OP1mhi,OP1mlo BEQ Rem_FirstOperand_Zero ] ; Both operands may now be assumed to be normalised numbers - now to deal ; with signs and exponents. ; ; We're going to generate the remainder by a long-division-like algorithm, ; which can be summarised as follows: ; ; partial remainder = ABS(op1); sign = SIGN(op1); ; FOR I = (op1 exponent) TO ((op2 exponent)-1) STEP -1 ; Trial subtract (partial remainder) from (op2 mantissa)*2^I; ; IF strictly negative THEN ; partial remainder := 2*(op2 mantissa)*2^I - (partial remainder); ; sign := NOT(sign); ; NEXT ; IF (partial remainder) = 0 ; THEN result = 0, with sign SIGN(op1); ; ELSE result = (-1)^(sign) * (partial remainder); ; ; We're clearly going to keep both the current sign and the original sign ; around: we'll do this in the top two bits of OP1sue. We'll also need to ; know the prospective result exponent (in OP2sue = RNDexp) and the number ; of iterations of the loop (in Rarith). However, note that if the ; calculated number of iterations is 0 or less, this means that the result ; is equal to the first operand. So we'll take care to calculate this number ; before disturbing the first operand in any way. ; ; Note also that the sign of the second operand is totally irrelevant, now ; that we've got past the stage of there being any potential invalid operation ; or divide-by-zero exceptions. Rem_Common STMFD Rsp!,{LR} ;Because we'll need the register, we ; may well call NormaliseOp1, and to ; match the Rem_Uncommon path. AND RNDexp,OP2sue,#ToExp_mask ;Second operand exponent SUB RNDexp,RNDexp,#1 ;Prospective result exponent AND Rarith,OP1sue,#ToExp_mask ;First operand exponent SUBS Rarith,Rarith,RNDexp ;Number of iterations - 1 Rem_ExponentsDone AND OP1sue,OP1sue,#Sign_bit ;All cases want this ADDLT RNDexp,Rarith,RNDexp ;Recover first operand exp. MOVLT Rarith,#0 ;And return first operand IF Interworking :LOR: Thumbing BXLT LR ELSE MOVLT PC,LR ; exactly ENDIF ; Prepare for the main loop and branch into it. MOV OP1sue,OP1sue,ASR #1 ;Make a copy of the sign, in ; case the result is zero MOV LR,#0 ;Top word of the partial ; remainder CDebug2 4,"Entering RMF loop: Rarith, LR",Rarith,LR CDebug3 4," op1",OP1sue,OP1mhi,OP1mlo CDebug3 4," op2",RNDexp,OP2mhi,OP2mlo B Rem_Loop_Entry Rem_Loop_Shift ; Shift the partial remainder left by 1 bit, using a bit of trickery to do ; each word in 1 cycle. MOV LR,OP1mhi,LSR #31 ADDS OP1mlo,OP1mlo,OP1mlo ADC OP1mhi,OP1mhi,OP1mhi Rem_Loop_Entry ; Do the trial subtraction of divisor - partial remainder; if it comes out ; non-negative, keep the previous partial remainder. RSBS Rtmp,OP1mlo,OP2mlo RSCS Rtmp2,OP1mhi,OP2mhi RSCS LR,LR,#0 BCS Rem_Loop_End ; Otherwise, use the trial division result to form a new partial remainder ; equal to 2*divisor minus old partial remainder, and note that the sign of ; the partial remainder has changed. ADDS OP1mlo,Rtmp,OP2mlo ADC OP1mhi,Rtmp2,OP2mhi EOR OP1sue,OP1sue,#Sign_bit Rem_Loop_End ; Loop until finished. Note the partial remainder is completely contained in ; OP2mhi and OP2mlo at this point. SUBS Rarith,Rarith,#1 BGE Rem_Loop_Shift ; The result will always be exact. MOV Rarith,#0 ; If we've now got a partial remainder of exactly zero, the result is zero, ; with sign equal to that of the original first operand. Otherwise, we've ; got to normalise the result. ORRS Rtmp,OP1mhi,OP1mlo MOVEQ OP1sue,OP1sue,LSL #1 ;Recover copy of original sign MOVEQ RNDexp,#0 ANDNE OP1sue,OP1sue,#Sign_bit BLNE $NormaliseOp1_str ; And return. IF Interworking :LOR: Thumbing LDMFD Rsp!,{LR} BX LR ELSE LDMFD Rsp!,{PC} ENDIF ] ; Conditional assembly of Rem/mod ;=========================================================================== [ :DEF: sqrt_s :LOR: FPEWanted :LOR: FPASCWanted ; Routine to take the square root of an internal format floating point ; number. Unlike the dyadic arithmetic instructions, only one entry point is ; required: we do however give it two labels for the sake of consistent ; naming. ; The value returned is either a numeric value plus associated rounding ; information, with the uncommon bit clear, or an infinity or NaN, with the ; uncommon bit set. ; This routine will not work correctly with an input which is an ; unnormalised URD result, or an invalid internal format number. ; ; Uses standard monadic operation entry and exit conventions - see top of ; this file. [ FPEWanted SqrtFPE ] [ FPASCWanted SqrtFPASC ] [ :LNOT: :DEF: sqrt_s CDebug3 3,"SqrtFPE/FPASC: operand =",OP1sue,OP1mhi,OP1mlo ; Start by splitting according to whether the operand is common or uncommon. ; The code to deal with uncommon operands lies a long way down in the ; source, to avoid addressability problems. TST OP1sue,#Uncommon_bit BNE Sqrt_Uncommon ; If the operand is a zero, the product is the same zero. Because the ; operand is common and assumed not to be an unnormalised URD result, we can ; check for zeros by means of the units bit. TST OP1mhi,#EIUnits_bit BEQ Sqrt_Zero ; The operand may now be assumed to be a normalised number. If it is ; negative, we have an invalid operation exception. Otherwise, the result ; sign is positive (equal to the operand sign) and we need to produce the ; result exponent. ; We produce the result exponent by adding the exponent bias to the ; already biased exponent, producing (unbiased exponent) + 2*bias, then ; shifting right by one bit, producing ((unbiased exponent) DIV 2) + bias. ; We set the condition codes on this last instruction in order to transfer ; the least significant bit of the unbiased exponent into C. ] [ FPLibWanted __fp_sqrt_common ] Sqrt_Common AND RNDexp,OP1sue,#ToExp_mask ;Extract operand exponent [ FPEWanted :LOR: FPASCWanted ANDS OP1sue,OP1sue,#Sign_bit ;Isolate sign bit & check positive MOVNE Rtmp,#InvReas_SqrtNeg BNE InvalidOperation1ForSDE | ANDS OP1sue,OP1sue,#Sign_bit ;Isolate sign bit ORRNE OP1sue,OP1sue,#IVO_bits IF Interworking :LOR: Thumbing BXNE LR ELSE MOVNE PC,LR ENDIF ] ADD RNDexp,RNDexp,#EIExp_bias:AND:&FF00 ADD RNDexp,RNDexp,#EIExp_bias:AND:&FF ASSERT (EIExp_bias-1) < &10000 ;Result exponent if mantissa ; overflow is (exp+bias) DIV 2 MOVS RNDexp,RNDexp,LSR #1 ; This subsidiary entry point deals with taking the square root of a ; normalised mantissa. ; Entry: OP1sue = the result's sign, with an uncommon bit of 0 - the ; remaining bits are zero; ; OP1mhi = Operand mantissa, high word; ; OP1mlo = Operand mantissa, low word; ; RNDexp = Prospective result exponent; ; Rins = instruction (needed to determine the precision); ; Rwp, Rfp, Rsp hold their usual values; ; R14 = return link; ; C = least significant bit of operand's unbiased exponent. ; Exit: OP1sue = the result's sign (always positive), with an uncommon bit ; of 0; the remaining bits are zero; ; OP1mhi, OP1mlo = the result's mantissa; ; RNDexp = the result exponent; ; Rarith holds the round bit (in bit 31) and the sticky bit (in bits ; 30:0) if the destination precision is extended; if the ; destination precision is single or double, it holds part of the ; sticky bit (the remainder of which is held in bits below the ; round bit in OP1mhi and OP1mlo); ; OP2mhi, OP2mlo, Rtmp, Rtmp2 and R14 may be corrupt; ; All other registers preserved. ; ; Note that the result exponent is in fact always equal to the prospective ; result exponent: the process of taking the square root always results in a ; normalised mantissa. (Subsequent rounding may of course lead to mantissa ; overflow, but the raw unrounded result mantissa is always normalised.) Sqrt_Mantissa CDebug2 4,"SqrtFPE/FPASC: mantissa =",OP1mhi,OP1mlo CDebug1 4," sign =",OP1sue CDebug1 4," exponent =",RNDexp ; We do the square root by the standard "long square root" algorithm. (There ; is an optimisation possibility here, of doing square roots by ; Newton-Raphson followed by a final correction. This only applies to the ; FPASC, since the FPE's division is too slow for there to be any ; possibility of this making a profit - even the FPA's division will have to ; be used very carefully for it to have a hope of working.) ; ; A description of the long square root algorithm follows: ; ; The problem is to take the square root of a mantissa M in the range 1 <= M ; < 4. An initial approximation R[0]=1 to the root has the property that it ; is the rounded-down root to 0 places after the binary point - i.e. that ; R[0] is a multiple of 2^(-0) and R[0] <= Sqrt(M) < R[0] + 2^(-0). We will ; evaluate successive approximations R[i] to the root such that R[i] is the ; correct rounded-down root to i places after the binary point - i.e. that ; R[i] is a multiple of 2^(-i) and R[i] <= Sqrt(M) < R[i] + 2^(-i). If we ; know R[24], R[53] or R[64] respectively for single, double or extended ; precision, and in addition know whether the result is exact (i.e. whether ; R[i] = Sqrt(M) exactly), we have enough information to provide all the ; required fractional bits and the round and sticky bits, and so to ; calculate the correct IEEE square root. (Note that a guard bit is not ; required: the infinite precision square root of M will not suffer mantissa ; overflow or underflow, and so its finite precision approximations can only ; suffer mantissa overflow during rounding, not prior to rounding.) ; ; So we will use a partial remainder P[i] = M - R[i]^2; initially, P[0] = ; M-1. Next, we know that R[i+1] is either equal to R[i] or to R[i] + ; 2^(-i-1), depending on whether the next bit of the root is 0 or 1. To ; determine which, we need to know whether R[i] + 2^(-i-1) <= Sqrt(M): if it ; is, the next bit of the root is 1; if it isn't, the next bit of the root ; is 0. ; ; This is equivalent to asking whether (R[i] + 2^(-i-1))^2 <= M, i.e. to ; whether: ; ; R[i]^2 + R[i]*2^(-i) + 2^(-2*i-2) <= M ; ; or to whether: ; ; R[i]*2^(-i) + 2^(-2*i-2) <= P[i] ; ; If it is, then R[i+1] = R[i] - 2^(-i-1) and: ; ; P[i+1] = M - R[i+1]^2 ; = M - (R[i] + 2^(-i-1))^2 ; = M - R[i]^2 - R[i]*2^(-i) - 2^(-2*i-2) ; = P[i] - R[i]*2^(-i) - 2^(-2*i-2) ; ; If it isn't, then R[i+1] = R[i] and P[i+1] = M - R[i+1]^2 = M - R[i]^2 = ; P[i]. ; ; So the long square root algorithm can be stated as follows, where N=24, 53 ; or 64 respectively for single, double or extended precision: ; ; (1) Initialise: R[0] = 1, P[0] = M-1; ; ; (2) For i=0 to N-1: ; Do a trial subtraction of R[i]*2^(-i) + 2^(-2*i-2) from P[i]; ; If result >= 0, put R[i+1] = R[i] + 2^(-i-1), P[i+1] = result of ; trial subtraction; ; Else put R[i+1] = R[i], P[i+1] = P[i]; ; ; (3) The units, fractional and round bits of the result are in R[N], while ; the sticky bit is 0 if P[N] = 0, 1 if P[N] > 0. ; ; Note that P[i] = M - R[i]^2 ; < M - (Sqrt(M) - 2^(-i))^2 ; = M - M + Sqrt(M)*2^(-i+1) - 2^(-2*i) ; = Sqrt(M)*2^(-i+1) - 2^(-2*i) ; < 2^(-i+2) ; ; So P[i] decreases greatly in magnitude during the long square root ; process. If we use it straightforwardly, this will result in a lot of ; spurious subtractions of bits known to be zero from other bits known to be ; zero during the algorithm. So instead, let us define Q[i] = P[i]*2^(i-1) ; and recast the algorithm in terms of Q[i]: ; ; (1) Initialise: R[0] = 1, Q[0] = (M-1)/2; ; ; (2) For i=0 to N-1: ; Do a trial subtraction of R[i] + 2^(-i-2) from 2*Q[i]; ; If result >= 0, put R[i+1] = R[i] + 2^(-i-1), Q[i+1] = result of ; trial subtraction; ; Else put R[i+1] = R[i], Q[i+1] = 2*Q[i]; ; ; (3) The units, fractional and round bits of the result are in R[N], while ; the sticky bit is 0 if Q[N] = 0, 1 if Q[N] > 0. ; ; Introducing a travelling bit variable T[i] to represent 2^(-i-2) and ; rephrasing in terms of shifts: ; ; (1) Initialise: R[0] = 1, Q[0] = (M-1)/2, T[0] = 2^(-2); ; ; (2) For i=0 to N-1: ; Do a trial subtraction of R[i] + T[i] from Q[i] << 1; ; If result >= 0, put R[i+1] = R[i] + (T[i] << 1), ; Q[i+1] = (Q[i] << 1) - (R[i]+T[i]); ; Else put R[i+1] = R[i], Q[i+1] = Q[i] << 1; ; ; (3) The units, fractional and round bits of the result are in R[N], while ; the sticky bit is 0 if Q[N] = 0, 1 if Q[N] > 0. ; ; This is more-or-less the algorithm we use, though we split into different ; sections depending on how far the travelling bit has been shifted down so ; far, to avoid doing multi-word arithmetic until we have to. ; ; One thing we do have to look at is the precision required for Q[i]. We ; know that 0 < Q[i] = P[i]*2^(i-1) < 2^(-i+2)*2^(i-1) = 2, so one place ; before the binary point is enough. Initially, Q[0] = (M-1)/2 is a multiple ; of 2^(-64), requiring 64 places after the binary point, or 65 bits in ; total - one bit more than 2 words. This is highly inconvenient, but we can ; get around it by noticing that if M < 2, then the first two bits of the ; result are definitely 1.0, and we have R[1] = 1.0, Q[1] = M-1 and T[0] = ; 2^(-2). So Q[1] is a multiple of 2^(-63) and can be represented in two ; words. On the other hand, if M >= 2, then Q[0] = (M-1)/2 is a multiple of ; 2^(-63) and can also be represented by two words. This transforms the ; algorithm to: ; ; IF M < 1 THEN ; ; (1) Initialise: R[1] = 1.0, Q[1] = M-1, T[1] = 2^(-3); ; ; (2) For i=1 to N-1: ; Do a trial subtraction of R[i] + T[i] from Q[i] << 1; ; If result >= 0, put R[i+1] = R[i] + (T[i] << 1), ; Q[i+1] = (Q[i] << 1) - (R[i]+T[i]); ; Else put R[i+1] = R[i], Q[i+1] = Q[i] << 1; ; ; (3) The units, fractional and round bits of the result are in R[N], while ; the sticky bit is 0 if Q[N] = 0, 1 if Q[N] > 0. ; ; ELSE ; ; (1') Initialise: R[0] = 1, Q[0] = (M-1)/2, T[0] = 2^(-2); ; ; (2') For i=0 to N-1: ; Do a trial subtraction of R[i] + T[i] from Q[i] << 1; ; If result >= 0, put R[i+1] = R[i] + (T[i] << 1), ; Q[i+1] = (Q[i] << 1) - (R[i]+T[i]); ; Else put R[i+1] = R[i], Q[i+1] = Q[i] << 1; ; ; (3') The units, fractional and round bits of the result are in R[N], while ; the sticky bit is 0 if Q[N] = 0, 1 if Q[N] > 0. ; ; ENDIF ; ; Now Q[i] can be represented in two words up to the point where the trial ; subtraction produces results that overflow two words. We have the ; following situation at various iterations, remembering that T[i] = 2^(-i-1): ; ; For i < 30: R[i] and T[i] can be represented by 1 word, with the binary ; point to the right of bit 31; Q[i+1] requires two words, with the trial ; subtraction being performed on the top word only. ; ; For 30 <= i < 62: R[i] can be represented by 2 words, with the binary point ; to the right of bit 31 of the top word (strictly, the low word isn't ; required for R[30]); T[i] can be represented by 1 word, now with an ; implicit word of zeros above it and the binary point to the right of bit ; 31 of this implicit word; Q[i+1] still requires two words, with the trial ; subtraction occurring on both words; ; ; For i=62: R[i] can be represented by 2 words, with the binary point to the ; right of bit 31 of the top word; T[i] can be represented by 1 word, now ; with two implicit words of zeros above it and the binary point to the ; right of bit 31 of the more significant of the two words; Q[i+1] still ; contains two words, but a third word is required for the trial ; subtraction. ; ; For i=63: R[i] now requires 3 words, with the binary point to the right of ; bit 31 of the most significant word; T[i] can be represented by 1 word, ; now with two implicit words of zeros above it and the binary point to ; the right of bit 31 of the more significant of the two words; Q[i+1] will ; require 3 words to represent it, with the trial subtraction occurring on ; all three words. ; ; So we will actually perform the square root in 5 stages: ; ; (A) Initialisation and iterations with 0 <= i < 30. Terminated after i=23 ; for single precision. ; (B) Iterations with 30 <= i < 62. Terminated after i=52 for double ; precision, not done at all for single precision. ; (C) Iteration with i=62. Only done for extended precision. ; (D) Iteration with i=63. Only done for extended precision. ; (E) Sticky bit construction. Done separately for single/double and ; extended precisions. ; ; Register usage: ; OP1mhi, OP1mlo: R[i] (the root so far); Rarith is also involved in this ; at the end of the i=63 iteration. ; OP2mhi, OP2mlo: Q[i] (the shifted partial remainder). ; Rarith: temporary register. ; Rtmp: T[i] (the travelling bit); ; Rtmp2: loop counter. ; Initialise remainder (Q[0] for odd exponent, Q[1] for even exponent) SUBCC OP2mhi,OP1mhi,#TopBit ;Subtract 1 for even exponent SUBCS OP2mhi,OP1mhi,#TopBit:SHR:1 ;Shift left, subtract 1 and shift ; right for odd exponent MOV OP2mlo,OP1mlo ;Bottom word is unaffected either way ; Initialise travelling bit. Due to the loop unwinding below, we actually ; want T[0] for an odd exponent, T[1] << 1 for an even exponent: both of ; these are 2^(-2). MOV Rtmp,#TopBit:SHR:2 ; Initialise result - both R[1] = 1.0 for even exponents and R[0] = 1 for ; odd exponents require the same bit pattern. MOV OP1mhi,#TopBit MOV OP1mlo,#0 ; Initialise the loop counter. This is a bit esoteric: it contains minus the ; number of times the first loop below is executed in its top four bits, ; plus the number of times the second loop is exceuted in its bottom 4 bits. ; The idea is that the first loop adds 1 << 28 to it until it becomes ; positive, then the second subtracts one from it until it becomes zero. ; This is the only time we actually need to look at the precision bits in ; the instruction! ; Note that we must take great care not to change the C flag in this code. [ FPEWanted :LOR: FPASCWanted MOV Rtmp2,#((-5):SHL:28) + 8 ;Correct value for extended [ Pr1_mask < &100 ;I.e. if immediate won't set C TST Rins,#Pr1_mask ;Z := 1 if single/double | MOV Rarith,Rins,LSR #Pr1_pos TST Rarith,#(Pr1_mask:SHR:Pr1_pos) ] MOVEQ Rtmp2,#((-5):SHL:28) + 6 ;Correct value for double [ Pr2_mask < &100 ;I.e. if immediate won't set C TSTEQ Rins,#Pr2_mask ;Z := 1 if single | MOVEQ Rarith,Rins,LSR #Pr2_pos TSTEQ Rarith,#(Pr2_mask:SHR:Pr2_pos) ] MOVEQ Rtmp2,#((-4):SHL:28) + 0 ;Correct value for single | ; Single precision square root is not allowed. Extended is though. [ Double_mask < &100 TST Rins,#Double_mask | MOV Rarith,Rins,LSR #Double_pos TST Rarith,#(Double_mask:SHR:Double_pos) ] MOVEQ Rtmp2,#((-5):SHL:28) + 8 MOVNE Rtmp2,#((-5):SHL:28) + 6 ] ; We now require the iterations with 0 <= i < 30 to be done - i.e.: ; ; 23 iterations for single precision, even exponent (1<=i<=23); ; 24 iterations for single precision, odd exponent (0<=i<=23); ; 29 iterations for double/extended precision, even exponent (1<=i<=29); ; 30 iterations for double/extended precision, odd exponent (0<=i<=29). ; ; We unwind this loop to produce 6 copies of the code, and branch in after ; the first one for even exponents. BCC Sqrt_Loop1A Sqrt_Loop1 ; First copy of code ADDS OP2mlo,OP2mlo,OP2mlo ;Get Q[i] << 1 - note top bit goes ADCS OP2mhi,OP2mhi,OP2mhi ; into C ORR Rarith,OP1mhi,Rtmp ;And R[i] + T[i] - note no overlap CMPCC OP2mhi,Rarith ;Trial subtraction - always works ; if (Q[i] << 1) >= 2. SUBCS OP2mhi,OP2mhi,Rarith ;Do real subtraction if trial works ORRCS OP1mhi,OP1mhi,Rtmp,LSL #1 ;Put 1 in result if trial works Sqrt_Loop1A ; Second copy of code - similar to first copy except we use Rtmp >> 1 ; instead of Rtmp. ADDS OP2mlo,OP2mlo,OP2mlo ADCS OP2mhi,OP2mhi,OP2mhi ORR Rarith,OP1mhi,Rtmp,LSR #1 CMPCC OP2mhi,Rarith SUBCS OP2mhi,OP2mhi,Rarith ORRCS OP1mhi,OP1mhi,Rtmp ; Third copy of code - similar to first copy except we use Rtmp >> 2 ; instead of Rtmp. ADDS OP2mlo,OP2mlo,OP2mlo ADCS OP2mhi,OP2mhi,OP2mhi ORR Rarith,OP1mhi,Rtmp,LSR #2 CMPCC OP2mhi,Rarith SUBCS OP2mhi,OP2mhi,Rarith ORRCS OP1mhi,OP1mhi,Rtmp,LSR #1 ; Fourth copy of code - similar to first copy except we use Rtmp >> 3 ; instead of Rtmp. ADDS OP2mlo,OP2mlo,OP2mlo ADCS OP2mhi,OP2mhi,OP2mhi ORR Rarith,OP1mhi,Rtmp,LSR #3 CMPCC OP2mhi,Rarith SUBCS OP2mhi,OP2mhi,Rarith ORRCS OP1mhi,OP1mhi,Rtmp,LSR #2 ; Fifth copy of code - similar to first copy except we use Rtmp >> 4 ; instead of Rtmp. ADDS OP2mlo,OP2mlo,OP2mlo ADCS OP2mhi,OP2mhi,OP2mhi ORR Rarith,OP1mhi,Rtmp,LSR #4 CMPCC OP2mhi,Rarith SUBCS OP2mhi,OP2mhi,Rarith ORRCS OP1mhi,OP1mhi,Rtmp,LSR #3 ; Sixth copy of code - similar to first copy except we use Rtmp >> 5 ; instead of Rtmp. ADDS OP2mlo,OP2mlo,OP2mlo ADCS OP2mhi,OP2mhi,OP2mhi ORR Rarith,OP1mhi,Rtmp,LSR #5 CMPCC OP2mhi,Rarith SUBCS OP2mhi,OP2mhi,Rarith ORRCS OP1mhi,OP1mhi,Rtmp,LSR #4 ; Now update the travelling bit and loop counter, then loop if required. ADDS Rtmp2,Rtmp2,#1:SHL:28 ;Increment loop counter MOV Rtmp,Rtmp,ROR #6 ;ROR rather than LSR to set up BLT Sqrt_Loop1 ; for next loop. ; If the result is exact at this point, we can obviously return with all the ; remaining fractional bits, the round bit and the sticky bit equal to 0. If ; the result is not exact but the precision is single, we can return with a ; sticky bit of 1. We only continue if the result is inexact and the ; precision is double or extended. ORRS Rarith,OP2mhi,OP2mlo CMPNE Rtmp,#TopBit:SHR:26 ;Will be EQ for single, NE for IF Interworking :LOR: Thumbing BXEQ LR ELSE MOVEQ PC,LR ; double or extended ENDIF ; Next, we need to do the iterations with 30 <= i < 62 - i.e.: ; ; 32 iterations for extended precision (30<=i<=61); ; 23 iterations for double precision (30<=i<=52). ; ; This is a bit awkward from the point of view of unwinding the loop, so we ; will instead do 24 iterations for double precision and unwind the loop to ; produce 4 copies of the code. The extra iteration for double precision is ; wasted work but does no harm. STMFD Rsp!,{Rfpsr,Rins,LR} ;We need a few more registers Sqrt_Loop2 ADDS OP2mlo,OP2mlo,OP2mlo ;Get Q[i] << 1, ADCS OP2mhi,OP2mhi,OP2mhi ADC LR,LR,LR ; putting overflow bit into LR[0] ORR Rarith,OP1mlo,Rtmp ;(OP1mhi,Rarith) := R[i] + T[i] SUBS Rins,OP2mlo,Rarith ;Do trial subtraction, which SBCS Rfpsr,OP2mhi,OP1mhi MOVCCS LR,LR,LSR #1 ; always works if (Q[i] << 1) >= 2. MOVCS OP2mlo,Rins ;Use subtraction result if MOVCS OP2mhi,Rfpsr ; successful ORRCS OP1mlo,OP1mlo,Rtmp,LSL #1 ;And put a 1 in the result ORRCS OP1mhi,OP1mhi,Rtmp,LSR #31 ;(NB Rtmp may be &80000000) ; Second copy of code - similar to first copy except we use Rtmp >> 1 in ; place of Rtmp, and don't need to worry about putting the 1 into OP1mhi. ADDS OP2mlo,OP2mlo,OP2mlo ADCS OP2mhi,OP2mhi,OP2mhi ADC LR,LR,LR ORR Rarith,OP1mlo,Rtmp,LSR #1 SUBS Rins,OP2mlo,Rarith SBCS Rfpsr,OP2mhi,OP1mhi MOVCCS LR,LR,LSR #1 MOVCS OP2mlo,Rins MOVCS OP2mhi,Rfpsr ORRCS OP1mlo,OP1mlo,Rtmp ; Third copy of code - similar to first copy except we use Rtmp >> 2 in ; place of Rtmp, and don't need to worry about putting the 1 into OP1mhi. ADDS OP2mlo,OP2mlo,OP2mlo ADCS OP2mhi,OP2mhi,OP2mhi ADC LR,LR,LR ORR Rarith,OP1mlo,Rtmp,LSR #2 SUBS Rins,OP2mlo,Rarith SBCS Rfpsr,OP2mhi,OP1mhi MOVCCS LR,LR,LSR #1 MOVCS OP2mlo,Rins MOVCS OP2mhi,Rfpsr ORRCS OP1mlo,OP1mlo,Rtmp,LSR #1 ; Fourth copy of code - similar to first copy except we use Rtmp >> 3 in ; place of Rtmp, and don't need to worry about putting the 1 into OP1mhi. ADDS OP2mlo,OP2mlo,OP2mlo ADCS OP2mhi,OP2mhi,OP2mhi ADC LR,LR,LR ORR Rarith,OP1mlo,Rtmp,LSR #3 SUBS Rins,OP2mlo,Rarith SBCS Rfpsr,OP2mhi,OP1mhi MOVCCS LR,LR,LSR #1 MOVCS OP2mlo,Rins MOVCS OP2mhi,Rfpsr ORRCS OP1mlo,OP1mlo,Rtmp,LSR #2 ; Now update the travelling bit and loop counter, then loop if required. SUBS Rtmp2,Rtmp2,#1 ;Decrement loop counter MOV Rtmp,Rtmp,ROR #4 ;ROR rather than LSR to set up BNE Sqrt_Loop2 ; for last couple of iterations. ; If the remainder is zero at this point, we've got an exact result: the ; last fractional bit, the round bit and the sticky bit must all be zero. ; Otherwise, we know that the result will *not* be exact, since each of ; the last two iterations either doesn't change the partial remainder (thus ; leaving it non-zero) or subtracts a value with a 1 in a less significant ; bit than the lowest bit currently in the partial remainder, which must ; leave it non-zero. ; So we can now return if either the result is currently exact or if it is ; inexact and the precision is double, taking care to make Rarith zero in ; the first case and non-zero in the second. We only need to perform the ; rest of the division if the precision is extended and the result is ; currently inexact - which implies that it will also ultimately be inexact ; and thus that the sticky bit is 1. ORRS Rarith,OP2mhi,OP2mlo CMPNE Rtmp,#TopBit:SHR:24 ;Will be EQ for double, NE for IF Interworking :LOR: Thumbing LDMEQFD Rsp!,{Rfpsr,Rins,LR} ; extended BXEQ LR ELSE LDMEQFD Rsp!,{Rfpsr,Rins,PC} ; extended ENDIF ; Now we need to get the last fractional bit. ADDS OP2mlo,OP2mlo,OP2mlo ;Get Q[i] << 1, ADCS OP2mhi,OP2mhi,OP2mhi ADC LR,LR,LR ; putting overflow bit into LR[0] RSBS Rtmp,Rtmp,#0 ;Do trial subtraction, which RSCS Rins,OP1mlo,OP2mlo RSCS Rfpsr,OP1mhi,OP2mhi MOVCCS LR,LR,LSR #1 ; always works if (Q[i] << 1) >= 2. MOVCS OP2mlo,Rins ;Use subtraction result if MOVCS OP2mhi,Rfpsr ; successful MOVCC Rtmp,#0 ;And forget it if not ORRCS OP1mlo,OP1mlo,#1 ;And put a 1 in the result ; And the round bit. MOV Rarith,#TopBit+1 ;We know sticky bit is 1 - assume ; round bit is also 1 ADDS Rtmp,Rtmp,Rtmp ;Get Q[i] << 1. ADCS OP2mlo,OP2mlo,OP2mlo ADCS OP2mhi,OP2mhi,OP2mhi IF Interworking :LOR: Thumbing LDMCSFD Rsp!,{Rfpsr,Rins,LR} ;If >= 2, round bit must be 1 BXCS LR ELSE LDMCSFD Rsp!,{Rfpsr,Rins,PC} ;If >= 2, round bit must be 1 ENDIF ;Omit low word of trial subtraction ; - we know it will borrow and thus ; leave C=0. But C=0 here anyway! SBCS Rins,OP2mlo,OP1mlo ;Do rest of trial subtraction SBCS Rins,OP2mhi,OP1mhi MOVCC Rarith,#1 ;If it fails, round=0, sticky=1 IF Interworking :LOR: Thumbing LDMFD Rsp!,{Rfpsr,Rins,LR} BX LR ELSE LDMFD Rsp!,{Rfpsr,Rins,PC} ENDIF ] ; Conditional compilation of sqrt ;=========================================================================== [ FPEWanted :LOR: FPASCWanted ; Routine to do a move/move negated/absolute value of an internal format ; floating point number. It has the usual pair of entry points, one ; optimised for the FPASC, the other for the FPE. ; The value returned is either a numeric value plus associated rounding ; information, with the uncommon bit clear, or an infinity or NaN, with the ; uncommon bit set. ; This routine will not work correctly with an input which is an ; unnormalised URD result, or an invalid internal format number. ; ; Uses standard monadic operation entry and exit conventions - see top of ; this file. ; ; Note that these operations are usually very simple: ; * Numeric values need their sign bits modified, then to be set up for ; rounding; note that in the process, uncommon numeric values need to be ; converted to zeros or normalised numbers to ensure that the rounding ; works; ; * Infinities and quiet NaNs need their sign bits modified; ; * Signalling NaNs just need their sign bits modified if no change of ; format is involved (what this means depends on the state of the FPSR ; NE bit); if a change of format is required, they should generate the ; usual invalid operation exception. [ FPEWanted MoveFPE CDebug3 3,"MoveFPE: operand =",OP1sue,OP1mhi,OP1mlo ; If the value is common, it's a numeric value and there's no problem. TST OP1sue,#Uncommon_bit BNE Move_Uncommon ; Split out the exponent. AND RNDexp,OP1sue,#ToExp_mask ] Move_Numeric ; Isolate sign bit and clear uncommon bit. Also set Rarith to 0, since all ; rounding information is completely contained in OP1mhi and OP1mlo. AND OP1sue,OP1sue,#Sign_bit MOV Rarith,#0 Move_DoSigns ; Do the sign manipulations and return. TST Rins,#MNF_bit EORNE OP1sue,OP1sue,#Sign_bit TST Rins,#ABS_bit BICNE OP1sue,OP1sue,#Sign_bit IF Interworking :LOR: Thumbing BX LR ELSE MOV PC,LR ENDIF ] ; Conditional assembly of Move ;=========================================================================== [ FPEWanted :LOR: FPASCWanted ; Routine to do a NRM instruction on an internal format floating point ; number. It has the usual pair of entry points, one optimised for the ; FPASC, the other for the FPE. ; The value returned is either a numeric value plus associated rounding ; information, with the uncommon bit clear, or an infinity or NaN, with the ; uncommon bit set. ; ; Uses standard monadic operation entry and exit conventions - see top of ; this file. ; ; This operation is very similar to MVF, except that we have to cater for ; unnormalised values with the uncommon bit equal to zero - i.e. an URD ; result. [ FPEWanted NormFPE CDebug3 3,"NormFPE: operand =",OP1sue,OP1mhi,OP1mlo ; Split according to whether the value is common or uncommon. TST OP1sue,#Uncommon_bit BNE Norm_Uncommon ; Split out the exponent. AND RNDexp,OP1sue,#ToExp_mask ; If the units bit is clear, it's either a URD result or a zero. URD results ; can be treated just like extended unnormalised numbers and zeros. TST OP1mhi,#EIUnits_bit BNE Norm_Numeric ] Norm_ZeroUnnormOrDenorm ; The value is an uncommon numeric value - i.e. a denormalised number, an ; extended unnormalised number or an extended unnormalised zero - or a ; proper zero or a URD result, which may be treated like an extended ; unnormalised number or zero. If it's any sort of zero, change it to a real ; zero and treat it as a numeric. ORRS Rtmp,OP1mhi,OP1mlo MOVEQ RNDexp,#0 BEQ Norm_Numeric ; The operand is now a denormalised number or extended unnormalised non-zero ; number. We will change it into the corresponding normalised number ; (possibly with a negative biased exponent), then treat it as a numeric. ; The types of numbers that require converting are extended unnormalised ; numbers and denormalised numbers of all precisions. In the case of the ; extended denormalised and unnormalised numbers, this just requires us to ; normalise them; in the case of the single and double denormalised numbers, ; we need to clear their units bits and add 1 to their exponents before we ; normalise them. ; At this stage, we can recognise that the numbers are single or double ; denormalised numbers simply by the fact that they have uncommon = units = ; 1: all other numbers with this property are NaNs or infinities and have ; been dealt with already. STMFD Rsp!,{LR} ;We will have subroutine calls below ANDS Rarith,OP1mhi,OP1sue,LSL #EIUnits_pos-Uncommon_pos ASSERT EIUnits_pos = 31 BICMI OP1mhi,OP1mhi,#EIUnits_bit ADDMI RNDexp,RNDexp,#1 BL $NormaliseOp1_str ;NB must be necessary, so no ; point in checking whether ; normalised LDMFD Rsp!,{LR} Norm_Numeric ; Isolate sign bit and clear uncommon bit. Also set Rarith to 0, since all ; rounding information is completely contained in OP1mhi and OP1mlo. AND OP1sue,OP1sue,#Sign_bit MOV Rarith,#0 IF Interworking :LOR: Thumbing BX LR ELSE MOV PC,LR ENDIF ] ; Conditional assembly of Norm ;=========================================================================== [ FPEWanted :LOR: FPASCWanted ; Routine to do a URD instruction on an internal format floating point ; number. There are the usual two entry points. ; This routine will not work correctly with inputs which are unnormalised ; URD results, or with invalid internal format numbers. ; ; Uses standard monadic operation entry and exit conventions - see top of ; this file. [ FPEWanted UrdFPE CDebug3 3,"UrdFPE: operand =",OP1sue,OP1mhi,OP1mlo ; Start by splitting between common and uncommon operands. TST OP1sue,#Uncommon_bit BNE Urd_Uncommon ] Urd_Common ; The operand is common. Split OP1sue into sign and biased exponent. AND Rarith,OP1sue,#ToExp_mask AND OP1sue,OP1sue,#Sign_bit Urd_Numeric ; Calculate shift amount to denormalise the number to put the true binary ; point at the rounding boundary - i.e. to give it an effective unbiased ; exponent of 23, 52 or 63 depending on whether the precision of the ; instruction is single, double or extended. MOV RNDexp,#((EIExp_bias+23):AND:&FF) TST Rins,#Pr2_mask MOVNE RNDexp,#((EIExp_bias+52):AND:&FF) TST Rins,#Pr1_mask MOVNE RNDexp,#((EIExp_bias+63):AND:&FF) ORR RNDexp,RNDexp,#((EIExp_bias+63):AND:&FF00) ASSERT ((EIExp_bias+63):AND:&FF00) = ((EIExp_bias+52):AND:&FF00) ASSERT ((EIExp_bias+63):AND:&FF00) = ((EIExp_bias+23):AND:&FF00) SUBS Rtmp,RNDexp,Rarith BLS Urd_Big ; Denormalise the number to have this unbiased exponent and return. Denorm OP1mhi,OP1mlo,Rarith,Rtmp,Rtmp2,Rtmp IF Interworking :LOR: Thumbing BX LR ELSE MOV PC,LR ENDIF Urd_Big ; We just need to return the number itself, with rounding bits equal to ; zero. MOV RNDexp,Rarith MOV Rarith,#0 IF Interworking :LOR: Thumbing BX LR ELSE MOV PC,LR ENDIF ] ; Conditional assembly of Urd ;=========================================================================== [ FPEWanted :LOR: FPASCWanted ; Routine to do a RND instruction on an internal format floating point ; number. There are the usual two entry points. ; This routine will not work correctly with inputs which are unnormalised ; URD results, or with invalid internal format numbers. ; ; Uses standard monadic operation entry and exit conventions - see top of ; this file. [ FPEWanted RndFPE ] [ FPASCWanted RndFPASC ] CDebug3 3,"RndFPASC/FPE: operand =",OP1sue,OP1mhi,OP1mlo ; Start by splitting between common and uncommon operands. TST OP1sue,#Uncommon_bit BNE Rnd_Uncommon Rnd_Common ; The operand is common. Split OP1sue into sign and biased exponent. AND RNDexp,OP1sue,#ToExp_mask AND OP1sue,OP1sue,#Sign_bit ; If the number is a zero, we're done. TST OP1mhi,#EIUnits_bit BEQ Rnd_Exact Rnd_Numeric ; Find the position of the real binary point. MOVNE Rarith,#((EIExp_bias+63):AND:&FF) ORR Rarith,Rarith,#((EIExp_bias+63):AND:&FF00) ASSERT (EIExp_bias + 63) < &10000 SUBS Rtmp,Rarith,RNDexp BLE Rnd_Exact ; The rounding position for an integer - i.e. the real binary point - is now ; Rtmp bits above the bottom of the mantissa. Split according to whether ; this puts the round bit in the low word of the mantissa, the high word of ; the mantissa or above the high word of the mantissa. RSBS Rtmp2,Rtmp,#32 BLT Rnd_AboveLowWord Rnd_LowWord ; Branch out if rounding is exact. MOVS Rtmp,OP1mlo,LSL Rtmp2 BEQ Rnd_Exact ; We now know we want to round down if we're rounding to zero, or if we're ; rounding to minus infinity and the number is positive, or if we're ; rounding to plus infinity and the number is negative. MOVS Rtmp,OP1sue,LSL #32-Sign_pos TSTCS Rins,#1:SHL:RM_pos TSTCC Rins,#1:SHL:(RM_pos+1) ASSERT RM_pos < 7 ;So that constants don't disturb C BNE Rnd_LowWord_RoundDown ; If we're not rounding to nearest, we must now be rounding up. TST Rins,#RM_mask BNE Rnd_LowWord_RoundUp ASSERT RM_Nearest = 0 ; We're rounding to nearest. Produce the round and sticky bits, then work ; out which way we're rounding. ADD Rtmp,Rtmp2,#1 MOVS Rtmp,OP1mlo,LSL Rtmp ;C<-round, Z<-NOT(sticky) BNE Rnd_LowWord_GotDir ;Branch if not halfway case MOVS Rtmp,OP1mhi,LSR #1 ;C<-least significant bit, from MOVS Rtmp,OP1mlo,LSL Rtmp2 ; low word unless Rtmp2 is 0. Rnd_LowWord_GotDir BCS Rnd_LowWord_RoundUp Rnd_LowWord_RoundDown RSB Rtmp2,Rtmp2,#32 ;Clear all bits below rounding MOV OP1mlo,OP1mlo,LSR Rtmp2 ; boundary MOV OP1mlo,OP1mlo,LSL Rtmp2 MOV Rarith,#&40000000 ;And set round=0, sticky=1 IF Interworking :LOR: Thumbing BX LR ELSE MOV PC,LR ENDIF Rnd_LowWord_RoundUp RSB Rtmp2,Rtmp2,#32 ;Set all bits below rounding MVN OP1mlo,OP1mlo,LSR Rtmp2 ; boundary MVN OP1mlo,OP1mlo,LSL Rtmp2 MOV Rarith,#&C0000000 ;And set round=1, sticky=1 IF Interworking :LOR: Thumbing BX LR ELSE MOV PC,LR ENDIF Rnd_AboveLowWord RSBS Rtmp2,Rtmp,#64 BLT Rnd_AboveMantissa Rnd_HighWord ; Branch out if rounding is exact. ORRS Rtmp,OP1mlo,OP1mhi,LSL Rtmp2 BEQ Rnd_Exact ; We now know we want to round down if we're rounding to zero, or if we're ; rounding to minus infinity and the number is positive, or if we're ; rounding to plus infinity and the number is negative. MOVS Rtmp,OP1sue,LSL #32-Sign_pos TSTCS Rins,#1:SHL:RM_pos TSTCC Rins,#1:SHL:(RM_pos+1) ASSERT RM_pos < 7 ;So that constants don't disturb C BNE Rnd_HighWord_RoundDown ; If we're not rounding to nearest, we must now be rounding up. TST Rins,#RM_mask BNE Rnd_HighWord_RoundUp ASSERT RM_Nearest = 0 ; We're rounding to nearest. Produce the round and sticky bits, then work ; out which way we're rounding. ADD Rtmp,Rtmp2,#1 ORRS Rtmp,OP1mlo,OP1mhi,LSL Rtmp ;C<-round, Z<-NOT(sticky) BNE Rnd_HighWord_GotDir ;Branch if not halfway case CMP Rtmp2,#1 ;C<-least significant bit, from MOVCSS Rtmp,OP1mhi,LSL Rtmp2 ; high word unless Rtmp2 is 0. Rnd_HighWord_GotDir BCS Rnd_HighWord_RoundUp Rnd_HighWord_RoundDown RSB Rtmp2,Rtmp2,#32 ;Clear all bits below rounding MOV OP1mhi,OP1mhi,LSR Rtmp2 ; boundary MOVS OP1mhi,OP1mhi,LSL Rtmp2 MOV OP1mlo,#0 MOVEQ RNDexp,#0 ;Exponent must change for 0 result MOV Rarith,#&40000000 ;And set round=0, sticky=1 IF Interworking :LOR: Thumbing BX LR ELSE MOV PC,LR ENDIF Rnd_HighWord_RoundUp RSB Rtmp2,Rtmp2,#32 ;Set all bits below rounding MVN OP1mhi,OP1mhi,LSR Rtmp2 ; boundary MVN OP1mhi,OP1mhi,LSL Rtmp2 MOV OP1mlo,#&FFFFFFFF MOV Rarith,#&C0000000 ;And set round=1, sticky=1 IF Interworking :LOR: Thumbing BX LR ELSE MOV PC,LR ENDIF Rnd_AboveMantissa ; The rounding cannot possibly be exact - we must either be rounding down to ; zero or up to one. Furthermore, we know that the round bit is 0 and the ; sticky bit is 1. So we can only be rounding up if we're rounding to plus ; or minus infinity, and the result must be of the correct sign as well. EOR Rtmp,OP1sue,Rins,LSL #31-RM_pos ;Somewhat tricky code to EOR Rtmp2,OP1sue,Rins,LSL #30-RM_pos ; establish the above BICS Rtmp,Rtmp,Rtmp2 BMI Rnd_UpToOne Rnd_DownToZero MOV OP1mhi,#0 MOV OP1mlo,#0 MOV RNDexp,#0 MOV Rarith,#&40000000 IF Interworking :LOR: Thumbing BX LR ELSE MOV PC,LR ENDIF Rnd_UpToOne MOV OP1mhi,#&FFFFFFFF MOV OP1mlo,#&FFFFFFFF MOV RNDexp,#(EIExp_bias-1):AND:&FF00 ORR RNDexp,RNDexp,#(EIExp_bias-1):AND:&FF ASSERT (EIExp_bias-1) < &10000 MOV Rarith,#&C0000000 IF Interworking :LOR: Thumbing BX LR ELSE MOV PC,LR ENDIF Rnd_Exact ; We just need to return the number itself, with rounding bits equal to ; zero. MOV Rarith,#0 IF Interworking :LOR: Thumbing BX LR ELSE MOV PC,LR ENDIF ] ; Conditional assembly of Rnd ;=========================================================================== [ :DEF: compare_s :LOR: FPEWanted :LOR: FPASCWanted ; Routine to compare two internal format floating point numbers. It has two ; entry points: "CompareFPE", which has an optimised fast track for common ; vs. common comparisons, and "CompareFPASC", which avoids the test for this ; optimised fast track - since it should never happen. The second entry ; point lies a long way down in the source to avoid addressing constraints. ; This routine will not work correctly with inputs which are unnormalised ; URD results, or with invalid internal format numbers. ; Entry: OP1sue = First operand sign, uncommon, exponent; ; OP1mhi = First operand mantissa, high word; ; OP1mlo = First operand mantissa, low word; ; OP2sue = Second operand sign, uncommon, exponent; ; OP2mhi = Second operand mantissa, high word; ; OP2mlo = Second operand mantissa, low word; ; Rfpsr = FPSR; ; Rins = instruction (needed to discriminate between ; CMF/CMFE/CNF/CNFE and for traps); ; Rwp, Rfp, Rsp hold their usual values; ; R14 = return link. ; Exit: Rarith = result NZCV in bits 31:28; other bits zero; ; OP1sue, OP1mhi, OP1mlo, OP2sue, OP2mhi, OP2mlo, Rtmp, Rtmp2 and R14 ; may be corrupt. ; Rfpsr may be updated. ; All other registers preserved. [ FPEWanted :LOR: FPLibWanted CompareFPE [ FPLibWanted __fp_compare ] CDebug3 3,"CompareFPE: op1 =",OP1sue,OP1mhi,OP1mlo CDebug3 3," op2 =",OP2sue,OP2mhi,OP2mlo ; Start by detecting the "fast track" case of both operands being common. TST OP1sue,#Uncommon_bit TSTEQ OP2sue,#Uncommon_bit BNE Compare_Uncommon ] Compare_Common ; Start by changing the sign of the second operand if the operation is ; CMF(E). (CNF(E) is easier than CMF(E), basically because addition is ; commutative and subtraction isn't.) [ FPEWanted :LOR: FPASCWanted TST Rins,#CompNeg_bit EOREQ OP2sue,OP2sue,#Sign_bit | EOR OP2sue,OP2sue,#Sign_bit ] ; Both operands are common. We start with a magnitude comparison - life is ; fairly easy if (as is likely) it comes out not equal. In this case, the ; results are: ; ; Magnitude Operand 1 Operand 2 | Result for ; comparison sign sign | CNF(E) ; ------------------------------------+------------ ; > + X | > ; > - X | < ; < X + | > ; < X - | < ExpComp Rtmp,OP1sue,OP2sue,Rtmp2 ;Rtmp := left-aligned op1 exp. CMPEQ OP1mhi,OP2mhi CMPEQ OP1mlo,OP2mlo BEQ Compare_EqualMag TEQCS OP1sue,#0 ;NB does not affect C TEQCC OP2sue,#0 ASSERT Sign_pos = 31 MOVPL Rarith,#Comp_GT MOVMI Rarith,#Comp_LT IF Interworking :LOR: Thumbing BX LR ELSE MOV PC,LR ENDIF Compare_EqualMag ; If the operands are equal magnitude, then if they're both zero, the ; results is equality. Otherwise, the result is given by the following ; table: ; ; Operand 1 Operand 2 | Result for ; sign sign | CNF(E) ; -----------------------+------------ ; + + | > ; + - | = ; - + | = ; - - | < ; ; Of course, since they're equal magnitude, they're both zero if the first ; one is. Note Rtmp still contains a left-aligned operand 1 exponent. EORS Rtmp2,OP1sue,OP2sue ;Are signs opposite or the same? ASSERT Sign_pos = 31 MOV Rarith,#Comp_EQ ;Result if signs opposite IF Interworking :LOR: Thumbing BXMI LR ELSE MOVMI PC,LR ENDIF ORR Rtmp,Rtmp,OP1mhi ;Otherwise, are they both zero? ORRS Rtmp,Rtmp,OP1mlo IF Interworking :LOR: Thumbing BXEQ LR ELSE MOVEQ PC,LR ENDIF TST OP1sue,#Sign_bit MOVEQ Rarith,#Comp_GT MOVNE Rarith,#Comp_LT IF Interworking :LOR: Thumbing BX LR ELSE MOV PC,LR ENDIF ] ; Conditional assembly of Compare ;=========================================================================== [ FPEWanted :LOR: FPASCWanted :LOR: :DEF: fix_s :LOR: :DEF: fixu_s ; Routine to FIX an internal format floating point number. There are the ; usual two entry points. ; This routine will not work correctly with inputs which are unnormalised ; URD results, or with invalid internal format numbers. ; Entry: OP1sue = Operand sign, uncommon, exponent; ; OP1mhi = Operand mantissa, high word; ; OP1mlo = Operand mantissa, low word; ; Rfpsr = FPSR; ; Rins = instruction (needed for rounding information and traps); ; Rwp, Rfp, Rsp hold their usual values; ; R14 = return link. ; Exit: Rarith = result value; ; OP1sue, OP1mhi, OP1mlo, OP2sue, OP2mhi, OP2mlo, Rtmp, Rtmp2 and R14 ; may be corrupt. ; Rfpsr may be updated. ; All other registers preserved. [ FPEWanted FixFPE CDebug3 3,"FixFPE: operand =",OP1sue,OP1mhi,OP1mlo ; Start by splitting between common and uncommon operands. TST OP1sue,#Uncommon_bit BNE Fix_Uncommon ] [ :DEF: fix_s __fp_fix_common ] [ :DEF: fixu_s __fp_fixu_common ] Fix_Common ; The operand is common. Split OP1sue into sign and biased exponent. AND Rarith,OP1sue,#ToExp_mask [ :LNOT: :DEF: fixu_s AND OP1sue,OP1sue,#Sign_bit ] Fix_Numeric ; Calculate shift amount to denormalise the number to have effective ; unbiased exponent 63 - i.e. to put the true binary point at the rounding ; boundary. STMFD Rsp!,{LR} ;There may be a subroutine call below MOV RNDexp,#((EIExp_bias+63):AND:&FF00) ORR RNDexp,RNDexp,#((EIExp_bias+63):AND:&FF) ASSERT (EIExp_bias+63) <= &FFFF SUBS Rtmp,RNDexp,Rarith BLS Fix_OutOfRange ;Deal with massively out of range values ; Now denormalise the number to have this unbiased exponent. Denorm OP1mhi,OP1mlo,Rarith,Rtmp,Rtmp2,Rtmp ; Next, we need to round the result to extended precision. [ FPEWanted :LOR: FPASCWanted AND RNDprm,Rins,#RM_mask ORR RNDprm,RNDprm,#2:SHL:(RM_pos+2) MOV RNDdir,#0 ;Result has not been rounded so far BL RoundNum_Extended | ; Expanded out rounding code MOVS Rtmp,Rarith,LSL #1 ;C<-round, Z<-"tied case" BCC Fix_NoRounding ;Skip all rounding code... MOVEQS Rtmp,OP1mlo,LSR #1 ; If "tied" C<-round ADDCSS OP1mlo,OP1mlo,#1 ;Increment low word ADDCSS OP1mlo,OP1mlo,#1 ;If carry out, increment high word MOVCS OP1mhi,#EIUnits_bit ;If mantissa overflow, adjust ADDCS RNDexp,RNDexp,#1 ; mantissa and exponent Fix_NoRounding ] [ :LNOT: :DEF: fixu_s ; Produce the potential result, checking for an out-of-range value. ; We know at this point that (OP1mhi,OP1mlo) contains the unsigned integer ; result, which is in the range 0 to 2^63, *both ends included*, and that ; OP1sue contains the sign of the result. We first need to apply the sign to ; this value - this is done by some slightly tricky code to avoid branches. ; Note we cannot tell the difference between a result of +2^63 and -2^63 ; after this. This doesn't matter, though - they're both well out of range! MOVS Rtmp,OP1sue,LSL #32-Sign_pos ;CS if -ve, CC if +ve MVNCS OP1mhi,OP1mhi ;If -ve, 1's compl't high RSBCSS OP1mlo,OP1mlo,#0 ; word, 2's compl't low word ADDCS OP1mhi,OP1mhi,#1 ; and do carry if needed ] ; The result is now in (OP1mhi,OP1mlo). Check for it being out of range - ; i.e. for its top 33 bits not being all identical. TEQ OP1mhi,OP1mlo,ASR #31 BNE Fix_OutOfRange [ FPEWanted :LOR: FPASCWanted MOV Rarith,OP1mlo ; The only remaining exception that could occur at this point is an inexact ; result. ; If the result is exact, we don't want to do anything about the inexact ; exception. If it's inexact and the inexact trap is disabled, we want to ; set the inexact cumulative bit in the FPSR. If it's inexact and the ; inexact trap is enabled, we want to call the trap. We use some tricky ; code to distinguish the three cases in-line. CMP RNDdir,#0 ;Leaves CS/EQ if exact, NE if inexact MOVNES Rtmp,Rfpsr,LSR #IXE_pos+1 ;Now CS/EQ if exact, CS/NE if inexact & ; trap enabled, CC/NE if inexact & trap ASSERT SysID_FPA <> 0 ; disabled (since SysID non-zero & not ASSERT SysID_FPE <> 0 ; shifted out) ASSERT SysID_pos > IXE_pos ORRCC Rfpsr,Rfpsr,#IXC_bit BLHI InexactTrapForI ;Works because HI = CS/NE | MOV OP1sue,#0 ;Signal no error ] IF Interworking :LOR: Thumbing LDMFD Rsp!,{LR} BX LR ELSE LDMFD Rsp!,{PC} ENDIF Fix_OutOfRange ; An out of range FIX produces an invalid operation, with a potential result ; of &7FFFFFFF or &80000000, depending on the sign of the operand. [ FPEWanted :LOR: FPASCWanted LDMFD Rsp!,{LR} MOV Rarith,#:NOT:TopBit ;Make &7FFFFFFF EOR Rarith,Rarith,OP1sue,ASR #31 ;Convert to &80000000 if -ve MOV Rtmp,#InvReas_FixRange B InvalidOperation1ForI | ORR OP1sue,OP1sue,#IVO_bits IF Interworking :LOR: Thumbing LDMFD Rsp!,{LR} BX LR ELSE LDMFD Rsp!,{PC} ENDIF ] ] ; Conditional assembly of Fix ;=========================================================================== [ :DEF: addsub_s :LOR: FPEWanted :LOR: FPASCWanted ; The second entry point to the addition/subtraction routine, meant for use ; by the FPASC and without a fast track for common operands. ; The value returned is either a numeric value plus associated rounding ; information, with the uncommon bit clear, or an infinity or NaN, with the ; uncommon bit set. ; This routine will not work correctly with inputs which are unnormalised ; URD results, or with invalid internal format numbers. ; ; Uses standard dyadic operation entry and exit conventions - see top of ; this file. [ FPASCWanted AddSubFPASC CDebug3 3,"AddSubFPASC: op1 =",OP1sue,OP1mhi,OP1mlo CDebug3 3," op2 =",OP2sue,OP2mhi,OP2mlo ] [ FPLibWanted __fp_addsub_uncommon ] AddSub_Uncommon ; We have to do a full addition/subtraction, since either or both of the ; operands may be uncommon. What we will do is: ; ; (a) Check for NaNs. If found, produce an invalid operation exception and ; suitable NaN result. ; ; (b) Check for infinities. If found, the infinity effectively becomes the ; result, unless both operands are infinities and (after taking ; account of whether an addition or subtraction is involved) they are ; effectively of opposite signs. ; ; (c) If no NaNs or infinities, adjust the operands by replacing all ; effectively unnormalised numbers by the corresponding normalised or ; extended denormalised number. Then call AddSub_Common, which will ; work correctly on zeros, normalised numbers and extended ; denormalised numbers. ; ; So the first thing we do is check for NaNs and infinities - if we find ; one, we'll generate the result by special case code. Note that we check ; for them together, since they have similar bit patterns. TNaNInf Rtmp2,OP2sue,OP2mhi ;Rtmp2[31] := (op2 is NaN/inf) TNaNInf Rtmp,OP1sue,OP1mhi ;Rtmp[31] := (op1 is NaN/inf) BMI AddSub_NaNInf1 TST Rtmp2,#TopBit ;Operand 2 NaN or infinity? BNE AddSub_NaNInf2Only ; Now we know there are no NaNs or infinities and therefore no Invalid ; Operation or Divide-By-Zero exceptions - which means we no longer need to ; keep track of exactly what the operands are. Next, we will convert the ; remaining types of numbers to zeros, normalised numbers and extended ; denormalised numbers, which can be dealt with by a call to AddSub_Common ; and one to NormaliseOp1. ; The types of numbers that require converting are extended unnormalised ; numbers and zeros, and single and double denormalised numbers. In the case ; of the extended unnormalised numbers and zeros, this just requires us to ; normalise them; in the case of the single and double denormalised numbers, ; we need to clear their units bits and add 1 to their exponents before we ; normalise them. ; At this stage, we can recognise that the numbers are single or double ; denormalised numbers simply by the fact that they have uncommon = units = ; 1: all other numbers with this property are NaNs or infinities and have ; been dealt with already. STMFD Rsp!,{LR} ;We will have subroutine calls below ANDS Rarith,OP1mhi,OP1sue,LSL #EIUnits_pos-Uncommon_pos ASSERT EIUnits_pos = 31 BICMI OP1mhi,OP1mhi,#EIUnits_bit ADDMI OP1sue,OP1sue,#1:SHL:EIExp_pos ANDS Rarith,OP2mhi,OP2sue,LSL #EIUnits_pos-Uncommon_pos ASSERT EIUnits_pos = 31 BICMI OP2mhi,OP2mhi,#EIUnits_bit ADDMI OP2sue,OP2sue,#1:SHL:EIExp_pos ; Now we need to normalise all these types of numbers, which now means all ; uncommon numbers except those with exponent 0 (which are extended ; precision denormalised numbers and should be left alone). TST OP1sue,#Uncommon_bit Exp2Top Rarith,OP1sue,NE,S ;Complete test & set up for call BLNE $NormDenormOp1_str TST OP2sue,#Uncommon_bit Exp2Top Rarith,OP2sue,NE,S ;Complete test & set up for call BLNE $NormDenormOp2_str ; Call AddSub_Common to do the addition, then normalise the result if it ; isn't already normalised and isn't zero. (This is necessary because e.g. a ; magnitude sum of two denormalised numbers will only have been shifted 1 ; bit by AddSub_Common.) BL AddSub_Common TST OP1mhi,#EIUnits_bit IF Interworking :LOR: Thumbing LDMNEFD Rsp!,{LR} BXNE LR ELSE LDMNEFD Rsp!,{PC} ENDIF ORRS LR,OP1mhi,OP1mlo BLNE $NormaliseOp1_str IF Interworking :LOR: Thumbing LDMFD Rsp!,{LR} BX LR ELSE LDMFD Rsp!,{PC} ENDIF AddSub_NaNInf1 ; The first operand is a NaN or infinity, the second may be (the top bit of ; Rtmp2 indicates whether it is). TST Rtmp2,#TopBit BEQ AddSub_NaNInf1Only ; Both operands are NaNs or infinities. If both operands are infinities, the ; result is an infinity with their shared sign if they have the same effective ; sign, or an invalid operation if they have opposite effective signs ; ("effective" means after taking ADF/SUF/RSF distinctions into account). ; If either operand is a NaN, the standard exception/NaN propagation rules ; apply. ORR Rtmp,OP1mlo,OP1mhi,LSL #1 ;Test if both are infinities ORR Rtmp,Rtmp,OP2mlo ORRS Rtmp,Rtmp,OP2mhi,LSL #1 BNE $ConvertNaNs_str ;If not, use shared code BiShift EOR,Rtmp,OP2sue,Rins,LSR #SubNotAdd_pos,LSL #Sign_pos EORS Rtmp,Rtmp,OP1sue ;Check whether signs are ASSERT Sign_pos = 31 ; effectively same. ANDPL Rtmp,OP1sue,#Sign_bit ;If so, result is infinity BPL AddSub_InfShared ; (with op1 sign unless RSF) [ FPEWanted :LOR: FPASCWanted MOV Rtmp,#InvReas_MagSubInf ;If not, it's an invalid B InvalidOperation2ForSDE ; operation | ORR OP1sue,OP1sue,#IVO_bits IF Interworking :LOR: Thumbing BX LR ELSE MOV PC,LR ENDIF ] AddSub_NaNInf1Only ; The first operand is a NaN or infinity, the second isn't. The result is: ; * an invalid operation exception if the first operand is a signalling ; NaN; ; * the first operand unchanged if it is a quiet NaN; ; * the standard infinity if the first operand is an infinity, with its ; sign determined by that of the first operand and whether the ; instruction is RSF. ORRS Rtmp,OP1mlo,OP1mhi,LSL #1 ;Is operand a NaN? BNE $ConvertNaN1Of2_str ;Use standard exception/quiet NaN ; propagation code if so AND Rtmp,OP1sue,#Sign_bit ;Make standard infinity with right B AddSub_InfShared ; sign AddSub_NaNInf2Only ; The first operand is not a NaN or infinity, the second is. The result is: ; * an invalid operation exception if the second operand is a signalling ; NaN; ; * the second operand unchanged if it is a quiet NaN; ; * the standard infinity if the second operand is an infinity, with its ; sign determined by that of the second operand and whether the ; instruction is SUF. ORRS Rtmp,OP2mlo,OP2mhi,LSL #1 ;Is operand a NaN? BNE $ConvertNaN2Of2_str ;Use standard exception/quiet NaN ; propagation code if so AND Rtmp,OP2sue,#Sign_bit ;Make standard infinity with right TST Rins,#SubNotAdd_bit ; sign EORNE Rtmp,Rtmp,#Sign_bit AddSub_InfShared TST Rins,#RSF_bit EORNE Rtmp,Rtmp,#Sign_bit [ CoreDebugging = 0 ADR OP1sue,Prototype_Infinity | ADRL OP1sue,Prototype_Infinity ] LDMIA OP1sue,OP1regs ORR OP1sue,OP1sue,Rtmp IF Interworking :LOR: Thumbing BX LR ELSE MOV PC,LR ENDIF ] ; Conditional assembly of AddSub ;=========================================================================== [ :DEF: mul_s :LOR: FPEWanted :LOR: FPASCWanted ; The second entry point to the normal/fast multiplication routine, meant ; for use by the FPASC and without a fast track for common operands. ; The value returned is either a numeric value plus associated rounding ; information, with the uncommon bit clear, or an infinity or NaN, with the ; uncommon bit set. ; This routine will not work correctly with inputs which are unnormalised ; URD results, or with invalid internal format numbers. ; ; Uses standard dyadic operation entry and exit conventions - see top of ; this file. [ FPASCWanted MultFPASC CDebug3 3,"MultFPASC: op1 =",OP1sue,OP1mhi,OP1mlo CDebug3 3," op2 =",OP2sue,OP2mhi,OP2mlo ] [ FPLibWanted __fp_mult_uncommon ] Mult_Uncommon ; We have to do a full multiplication, since either or both of the operands ; may be uncommon. What we will do is: ; ; (a) Check for NaNs. If found, produce an invalid operation exception and ; suitable NaN result. ; ; (b) Check for infinities. If found, the result is an infinity with sign ; equal to the exclusive-OR of the two operand signs, unless the other ; operand is a zero, in which case we have an invalid operation. ; ; (c) Check for zeros. If found, the result is a zero with sign equal to ; the exclusive-OR of the two operand signs. ; ; (d) If no NaNs, infinities or zeros, we can transform the problem into ; that of multiplying together two normalised numbers, though the ; normalised numbers concerned may have unusual exponents. ; ; So the first thing we do is check for NaNs and infinities - if we find ; one, we'll generate the result by special case code. Note that we check ; for them together, since they have similar bit patterns. TNaNInf Rtmp2,OP2sue,OP2mhi ;Rtmp2[31] := (op2 is NaN/inf) TNaNInf Rtmp,OP1sue,OP1mhi ;Rtmp[31] := (op1 is NaN/inf) BMI Mult_NaNInf1 TST Rtmp2,#TopBit ;Operand 2 NaN or infinity? BNE Mult_NaNInf2Only ; Now if either operand is a zero, the result is zero. We can detect zeros ; by the mantissa being all zero, since only zeros, some unnormalised URD ; results, extended unnormalised zeros and extended infinities have this ; property, we're assuming the operands are not URD results and we've ; already dealt with extended infinities. ORRS Rtmp,OP1mhi,OP1mlo ORRNES Rtmp,OP2mhi,OP2mlo BEQ Mult_Zero ; Both operands are now normalised numbers, denormalised numbers or extended ; unnormalised non-zero numbers. The first step is to convert all of these ; to normalised numbers, possibly with a negative biased exponent. After ; doing the exponent and sign calculations, we then call Mult_Mantissas to ; complete the calculation. ; The types of numbers that require converting are extended unnormalised ; numbers and denormalised numbers of all precisions. In the case of the ; extended denormalised and unnormalised numbers, this just requires us to ; normalise them; in the case of the single and double denormalised numbers, ; we need to clear their units bits and add 1 to their exponents before we ; normalise them. ; At this stage, we can recognise that the numbers are single or double ; denormalised numbers simply by the fact that they have uncommon = units = ; 1: all other numbers with this property are NaNs or infinities and have ; been dealt with already. ANDS Rarith,OP1mhi,OP1sue,LSL #EIUnits_pos-Uncommon_pos ASSERT EIUnits_pos = 31 BICMI OP1mhi,OP1mhi,#EIUnits_bit ADDMI OP1sue,OP1sue,#1:SHL:EIExp_pos ANDS Rarith,OP2mhi,OP2sue,LSL #EIUnits_pos-Uncommon_pos ASSERT EIUnits_pos = 31 BICMI OP2mhi,OP2mhi,#EIUnits_bit ADDMI OP2sue,OP2sue,#1:SHL:EIExp_pos AND Rtmp,OP1sue,#ToExp_mask AND Rtmp2,OP2sue,#ToExp_mask EOR OP1sue,OP1sue,OP2sue ;Produce result sign AND OP1sue,OP1sue,#Sign_bit ADD RNDexp,Rtmp,Rtmp2 SUB RNDexp,RNDexp,#(EIExp_bias-1):AND:&FF00 SUB RNDexp,RNDexp,#(EIExp_bias-1):AND:&FF ASSERT (EIExp_bias-1) < &10000 ;Result exponent if mantissa ; overflow is exp1+exp2-bias+1 STMFD Rsp!,{LR} ;We will have subroutine calls below TST OP1mhi,#EIUnits_bit BLEQ $NormaliseOp1_str TST OP2mhi,#EIUnits_bit BLEQ $NormaliseOp2_str LDMFD Rsp!,{LR} B Mult_Mantissas Mult_Zero ; The result is zero. EOR OP1sue,OP1sue,OP2sue ;Get sign right AND OP1sue,OP1sue,#Sign_bit MOV OP1mhi,#0 MOV OP1mlo,#0 MOV RNDexp,#0 ;And exponent MOV Rarith,#0 ;And round/sticky bits IF Interworking :LOR: Thumbing BX LR ELSE MOV PC,LR ENDIF Mult_NaNInf1 ; The first operand is a NaN or infinity, the second may be (the top bit of ; Rtmp2 indicates whether it is). TST Rtmp2,#TopBit BEQ Mult_NaNInf1Only ; Both operands are NaNs or infinities. If both operands are infinities, the ; result is an infinity with sign determined by those of the two operands. ; If either operand is a NaN, the standard exception/NaN propagation rules ; apply. ORR Rtmp,OP1mlo,OP1mhi,LSL #1 ;Test if both are infinities ORR Rtmp,Rtmp,OP2mlo ORRS Rtmp,Rtmp,OP2mhi,LSL #1 BNE $ConvertNaNs_str ;If not, use shared code Mult_InfShared EOR Rtmp,OP1sue,OP2sue ;If so, result is infinity AND Rtmp,Rtmp,#Sign_bit ; with correct sign ADR OP1sue,Prototype_Infinity LDMIA OP1sue,OP1regs ORR OP1sue,OP1sue,Rtmp IF Interworking :LOR: Thumbing BX LR ELSE MOV PC,LR ENDIF Mult_NaNInf1Only ; The first operand is a NaN or infinity, the second isn't. The result is: ; * an invalid operation exception if the first operand is a signalling ; NaN; ; * the first operand unchanged if it is a quiet NaN; ; * an invalid operation exception if the first operand is an infinity and ; the second is a zero; ; * the standard infinity if the first operand is an infinity and the ; second operand is not a zero, with its sign determined by those of the ; two operands. ; Note that we can detect the second operand being zero by its mantissa ; being all zero, since only zeros, some unnormalised URD results, extended ; unnormalised zeros and extended infinities have this property, we're ; assuming the operands are not URD results and we know the second operand ; isn't an extended infinity. ORRS Rtmp,OP1mlo,OP1mhi,LSL #1 ;Is first operand a NaN? BNE $ConvertNaN1Of2_str ;Use standard exception/quiet NaN ; propagation code if so ORRS Rtmp,OP2mhi,OP2mlo ;Is second operand a zero? BNE Mult_InfShared ;If not, result is an infinity [ FPEWanted :LOR: FPASCWanted MOV Rtmp,#InvReas_InfTimes0 ;Otherwise, an invalid operation B InvalidOperation2ForSDE | ORR OP1sue,OP1sue,#IVO_bits IF Interworking :LOR: Thumbing BX LR ELSE MOV PC,LR ENDIF ] Mult_NaNInf2Only ; The first operand is not a NaN or infinity, the second is. The result is: ; * an invalid operation exception if the second operand is a signalling ; NaN; ; * the second operand unchanged if it is a quiet NaN; ; * an invalid operation exception if the first operand is a zero and the ; second is an infinity; ; * the standard infinity if the first operand is not a zero and the second ; operand is an infinity, with its sign determined by those of the two ; operands. ; Note that we can detect the first operand being zero by its mantissa being ; all zero, since only zeros, some unnormalised URD results, extended ; unnormalised zeros and extended infinities have this property, we're ; assuming the operands are not URD results and we know it isn't an extended ; infinity. ORRS Rtmp,OP2mlo,OP2mhi,LSL #1 ;Is second operand a NaN? BNE $ConvertNaN2Of2_str ;Use standard exception/quiet NaN ; propagation code if so ORRS Rtmp,OP1mhi,OP1mlo ;Is first operand a zero? BNE Mult_InfShared ;If not, result is an infinity [ FPEWanted :LOR: FPASCWanted MOV Rtmp,#InvReas_0TimesInf ;Otherwise, an invalid operation B InvalidOperation2ForSDE | ORR OP1sue,OP1sue,#IVO_bits IF Interworking :LOR: Thumbing BX LR ELSE MOV PC,LR ENDIF ] ] ;=========================================================================== [ :DEF: div_s :LOR: FPEWanted :LOR: FPASCWanted ; The second entry point to the normal/fast division/reverse division ; routine, meant for use by the FPASC and without a fast track for common ; operands. ; The value returned is either a numeric value plus associated rounding ; information, with the uncommon bit clear, or an infinity or NaN, with the ; uncommon bit set. ; This routine will not work correctly with inputs which are unnormalised ; URD results, or with invalid internal format numbers. ; ; Uses standard dyadic operation entry and exit conventions - see top of ; this file. [ FPASCWanted DivFPASC CDebug3 3,"DivFPASC: op1 =",OP1sue,OP1mhi,OP1mlo CDebug3 3," op2 =",OP2sue,OP2mhi,OP2mlo ] [ FPLibWanted __fp_div_uncommon __fp_rdv_uncommon ] Div_Uncommon ; We have to do a full division, since either or both of the operands may be ; uncommon. What we will do is: ; ; (a) Check for NaNs. If found, produce an invalid operation exception and ; suitable NaN result. ; ; (b) Check for infinities. If found, the result is: ; * An invalid operation exception if both operands are infinities; ; * An infinite result if the dividend is an infinity and the ; divisor is numeric; ; * A zero result if the dividend is numeric and the divisor is an ; infinity; ; ; (c) Check for zeros. If found, the result is: ; * An invalid operation exception if both operands are zeros; ; * A divide-by-zero exception if the dividend is non-zero and the ; divisor is zero; ; * A zero if the dividend is zero and the divisor is non-zero. ; ; (d) If no NaNs, infinities or zeros, we can transform the problem into ; that of dividing a normalised number by another, though the ; normalised numbers concerned may have unusual exponents. ; ; So the first thing we do is check for NaNs and infinities - if we find ; one, we'll generate the result by special case code. Note that we check ; for them together, since they have similar bit patterns. TNaNInf Rtmp2,OP2sue,OP2mhi ;Rtmp2[31] := (op2 is NaN/inf) TNaNInf Rtmp,OP1sue,OP1mhi ;Rtmp[31] := (op1 is NaN/inf) BMI Div_NaNInf1 TST Rtmp2,#TopBit ;Operand 2 NaN or infinity? BNE Div_NaNInf2Only ; Now if either operand is a zero, we need to take special action. We can ; detect zeros by the mantissa being all zero, since only zeros, some ; unnormalised URD results, extended unnormalised zeros and extended ; infinities have this property, we're assuming the operands are not URD ; results and we've already dealt with extended infinities. [ FPEWanted :LOR: FPASCWanted ORRS Rtmp,OP1mhi,OP1mlo ORRNES Rtmp,OP2mhi,OP2mlo BEQ Div_Zero ; Both operands are now going to be converted to normalised numbers. We now ; know that we are not going to need to know the operands for trap purposes, ; so we can swap them if this is a normal (rather than reverse) division. TST Rins,#RevDiv_bit | TST Rins,#Reverse ] BNE Div_Uncommon_Swapped MOV Rtmp,OP1sue MOV OP1sue,OP2sue MOV OP2sue,Rtmp MOV Rtmp,OP1mhi MOV OP1mhi,OP2mhi MOV OP2mhi,Rtmp MOV Rtmp,OP1mlo MOV OP1mlo,OP2mlo MOV OP2mlo,Rtmp Div_Uncommon_Swapped ; Both operands are now normalised numbers, denormalised numbers or extended ; unnormalised non-zero numbers. The first step is to convert all of these ; to normalised numbers, possibly with a negative biased exponent. After ; doing the exponent and sign calculations, we then call Div_Mantissas to ; complete the calculation. ; The types of numbers that require converting are extended unnormalised ; numbers and denormalised numbers of all precisions. In the case of the ; extended denormalised and unnormalised numbers, this just requires us to ; normalise them; in the case of the single and double denormalised numbers, ; we need to clear their units bits and add 1 to their exponents before we ; normalise them. ; At this stage, we can recognise that the numbers are single or double ; denormalised numbers simply by the fact that they have uncommon = units = ; 1: all other numbers with this property are NaNs or infinities and have ; been dealt with already. ANDS Rarith,OP1mhi,OP1sue,LSL #EIUnits_pos-Uncommon_pos ASSERT EIUnits_pos = 31 BICMI OP1mhi,OP1mhi,#EIUnits_bit ADDMI OP1sue,OP1sue,#1:SHL:EIExp_pos ANDS Rarith,OP2mhi,OP2sue,LSL #EIUnits_pos-Uncommon_pos ASSERT EIUnits_pos = 31 BICMI OP2mhi,OP2mhi,#EIUnits_bit ADDMI OP2sue,OP2sue,#1:SHL:EIExp_pos AND Rtmp,OP1sue,#ToExp_mask AND Rtmp2,OP2sue,#ToExp_mask EOR OP1sue,OP1sue,OP2sue ;Produce result sign AND OP1sue,OP1sue,#Sign_bit SUB RNDexp,Rtmp2,Rtmp ADD RNDexp,RNDexp,#EIExp_bias:AND:&FF00 ADD RNDexp,RNDexp,#EIExp_bias:AND:&FF ASSERT EIExp_bias < &10000 ;Result exponent if no mantissa ; underflow is exp1-exp2+bias STMFD Rsp!,{LR} ;We will have subroutine calls below TST OP1mhi,#EIUnits_bit BLEQ $NormaliseOp1Neg_str TST OP2mhi,#EIUnits_bit BLEQ $NormaliseOp2_str LDMFD Rsp!,{LR} B Div_Mantissas [ FPEWanted :LOR: FPASCWanted Div_Zero ; One or both operands are zeros, and both are numeric values (i.e. not NaNs ; or infinities). The result is: ; * An invalid operation exception if both operands are zeros; ; * A divide-by-zero exception if the dividend is non-zero and the divisor ; is zero; ; * A zero if the dividend is zero and the divisor is non-zero. ; ; Split according to whether this is a normal or reverse division. MOV Rtmp,#InvReas_0Div0 ;The only type of invalid operation ; that occurs below TST Rins,#RevDiv_bit BNE Div_Zero_Reversed ; It's a normal division - check the three cases above. ORRS Rtmp2,OP1mhi,OP1mlo ;Check dividend BNE DivideByZero2 ORRS Rtmp2,OP2mhi,OP2mlo ;Check divisor BEQ InvalidOperation2ForSDE Div_ZeroByX ; The result is zero. EOR OP1sue,OP1sue,OP2sue ;Get sign right AND OP1sue,OP1sue,#Sign_bit ;Uncommon bit is zero MOV OP1mhi,#0 ;So is mantissa MOV OP1mlo,#0 MOV RNDexp,#0 ;And exponent MOV Rarith,#0 ;And round/sticky bits IF Interworking :LOR: Thumbing BX LR ELSE MOV PC,LR ENDIF Div_Zero_Reversed ; It's a reverse division - check the three cases above. ORRS Rtmp2,OP1mhi,OP1mlo ;Check divisor BNE Div_ZeroByX ORRS Rtmp2,OP2mhi,OP2mlo ;Check dividend BNE DivideByZero2 B InvalidOperation2ForSDE ] Div_NaNInf1 ; The first operand is a NaN or infinity, the second may be (the top bit of ; Rtmp2 indicates whether it is). TST Rtmp2,#TopBit BEQ Div_NaNInf1Only ; Both operands are NaNs or infinities. If both operands are infinities, the ; result is an invalid operation. ; If either operand is a NaN, the standard exception/NaN propagation rules ; apply. ORR Rtmp,OP1mlo,OP1mhi,LSL #1 ;Test if both are infinities ORR Rtmp,Rtmp,OP2mlo ORRS Rtmp,Rtmp,OP2mhi,LSL #1 BNE $ConvertNaNs_str ;If not, use shared code [ FPEWanted :LOR: FPASCWanted MOV Rtmp,#InvReas_InfDivInf B InvalidOperation2ForSDE | ORR OP1sue,OP1sue,#IVO_bits IF Interworking :LOR: Thumbing BX LR ELSE MOV PC,LR ENDIF ] Div_NaNInf1Only ; The first operand is a NaN or infinity, the second isn't. The result is: ; * an invalid operation exception if the first operand is a signalling ; NaN; ; * the first operand unchanged if it is a quiet NaN; ; * a standard infinity with sign equal to the exclusive-OR of the two ; operand signs if the first operand is an infinity and the instruction ; is a normal division; ; * a zero with sign equal to the exclusive-OR of the two operand signs if ; the first operand is an infinity and the instruction is a reverse ; division. ORRS Rtmp,OP1mlo,OP1mhi,LSL #1 ;Is first operand a NaN? BNE $ConvertNaN1Of2_str ;Use standard exception/quiet NaN ; propagation code if so EOR Rtmp,OP1sue,OP2sue AND Rtmp,Rtmp,#Sign_bit [ FPASCWanted :LOR: FPEWanted TST Rins,#RevDiv_bit | TST Rins,#Reverse ] ADREQ OP1sue,Prototype_Infinity ADRNE OP1sue,Prototype_Zero LDMIA OP1sue,OP1regs ORR OP1sue,OP1sue,Rtmp MOV RNDexp,#0 ;These two are only needed when MOV Rarith,#0 ; result is zero IF Interworking :LOR: Thumbing BX LR ELSE MOV PC,LR ENDIF Div_NaNInf2Only ; The first operand is not a NaN or infinity, the second is. The result is: ; * an invalid operation exception if the second operand is a signalling ; NaN; ; * the second operand unchanged if it is a quiet NaN; ; * a standard infinity with sign equal to the exclusive-OR of the two ; operand signs if the first operand is an infinity and the instruction ; is a reverse division; ; * a zero with sign equal to the exclusive-OR of the two operand signs if ; the first operand is an infinity and the instruction is a normal ; division. ORRS Rtmp,OP2mlo,OP2mhi,LSL #1 ;Is second operand a NaN? BNE $ConvertNaN2Of2_str ;Use standard exception/quiet NaN ; propagation code if so EOR Rtmp,OP1sue,OP2sue AND Rtmp,Rtmp,#Sign_bit [ FPEWanted :LOR: FPASCWanted TST Rins,#RevDiv_bit | TST Rins,#Reverse ] ADRNE OP1sue,Prototype_Infinity ADREQ OP1sue,Prototype_Zero LDMIA OP1sue,OP1regs ORR OP1sue,OP1sue,Rtmp MOV RNDexp,#0 ;These two are only needed when MOV Rarith,#0 ; result is zero IF Interworking :LOR: Thumbing BX LR ELSE MOV PC,LR ENDIF ] ;=========================================================================== [ :DEF: fmod_s :LOR: FPEWanted :LOR: FPASCWanted ; The second part of the IEEE remainder function. Rem_Uncommon ; One or both of the operands may be uncommon. What we will do is: ; ; (a) Check for NaNs. If found, produce an invalid operation exception and ; suitable NaN result. ; ; (b) Check for infinities. If found, the result is: ; * An invalid operation exception if the first operand is an ; infinity. ; * Equal to the first operand if the second operand is an infinity ; and the first isn't. ; ; (c) Check for zeros. If found, the result is: ; * An invalid operation exception if the second operand is a zero; ; * Equal to the first operand if the first operand is a zero and ; the second isn't; ; ; (d) If no NaNs, infinities or zeros, we can transform the problem into ; that of doing the remainder of one normalised number by another, ; though the normalised numbers concerned may have unusual exponents. ; ; So the first thing we do is check for NaNs and infinities - if we find ; one, we'll generate the result by special case code. Note that we check ; for them together, since they have similar bit patterns. TNaNInf Rtmp2,OP2sue,OP2mhi ;Rtmp2[31] := (op2 is NaN/inf) TNaNInf Rtmp,OP1sue,OP1mhi ;Rtmp[31] := (op1 is NaN/inf) BMI Rem_NaNInf1 TST Rtmp2,#TopBit ;Operand 2 NaN or infinity? BNE Rem_NaNInf2Only ; Now if the second operand is a zero, we've got an invalid operation, and ; if it isn't but the first operand is, we've got a result equal to the ; first operand. We can detect zeros by the mantissa being all zero, since ; only zeros, some unnormalised URD results, extended unnormalised zeros and ; extended infinities have this property, we're assuming the operands are ; not URD results and we've already dealt with extended infinities. ORRS Rtmp,OP2mhi,OP2mlo [ FPEWanted :LOR: FPASCWanted MOVEQ Rtmp,#InvReas_XRem0 BEQ InvalidOperation2ForSDE | ORREQ OP1sue,OP1sue,#IVO_bits IF Interworking :LOR: Thumbing BXEQ LR ELSE MOVEQ PC,LR ENDIF ] ORRS Rarith,OP1mhi,OP1mlo BEQ Rem_FirstOperand_Zero ; Both operands may now be forced to be normalised numbers - after we've ; dealt with signs and exponents, we can rejoin the main code. ; The types of numbers that require converting are extended unnormalised ; numbers and denormalised numbers of all precisions. In the case of the ; extended denormalised and unnormalised numbers, this just requires us to ; normalise them; in the case of the single and double denormalised numbers, ; we need to clear their units bits and add 1 to their exponents before we ; normalise them. ; At this stage, we can recognise that the numbers are single or double ; denormalised numbers simply by the fact that they have uncommon = units = ; 1: all other numbers with this property are NaNs or infinities and have ; been dealt with already. ANDS Rarith,OP1mhi,OP1sue,LSL #EIUnits_pos-Uncommon_pos ASSERT EIUnits_pos = 31 BICMI OP1mhi,OP1mhi,#EIUnits_bit ADDMI OP1sue,OP1sue,#1:SHL:EIExp_pos ANDS Rarith,OP2mhi,OP2sue,LSL #EIUnits_pos-Uncommon_pos ASSERT EIUnits_pos = 31 BICMI OP2mhi,OP2mhi,#EIUnits_bit ADDMI OP2sue,OP2sue,#1:SHL:EIExp_pos STMFD Rsp!,{LR} ;We will have subroutine calls below AND RNDexp,OP2sue,#ToExp_mask ;Raw second operand exponent TST OP2mhi,#EIUnits_bit ;Normalise second operand, BLEQ $NormaliseOp2_str ; then adjust to get SUB Rtmp2,RNDexp,#1 ; prospective result exp. AND RNDexp,OP1sue,#ToExp_mask ;Raw first operand exponent TST OP2mhi,#EIUnits_bit ;Normalise first operand BLEQ $NormaliseOp1_str ; then determine the number SUBS Rarith,RNDexp,Rtmp2 ; of iterations - 1 MOV RNDexp,Rtmp2 ;Get prospective result exp. ; back where it's wanted ; All the special exponent handling is done, so we might as well rejoin the ; main code. B Rem_ExponentsDone Rem_NaNInf1 ; The first operand is a NaN or infinity, the second may be (the top bit of ; Rtmp2 indicates whether it is). TST Rtmp2,#TopBit BEQ Rem_NaNInf1Only ; Both operands are NaNs or infinities. If both operands are infinities, the ; result is an invalid operation. ; If either operand is a NaN, the standard exception/NaN propagation rules ; apply. ORR Rtmp,OP1mlo,OP1mhi,LSL #1 ;Test if both are infinities ORR Rtmp,Rtmp,OP2mlo ORRS Rtmp,Rtmp,OP2mhi,LSL #1 BNE $ConvertNaNs_str ;If not, use shared code [ FPEWanted :LOR: FPASCWanted MOV Rtmp,#InvReas_InfRemX B InvalidOperation2ForSDE | ORR OP1sue,OP1sue,#IVO_bits IF Interworking :LOR: Thumbing BX LR ELSE MOV PC,LR ENDIF ] Rem_NaNInf1Only ; The first operand is a NaN or infinity, the second isn't. The result is: ; * an invalid operation exception if the first operand is a signalling ; NaN; ; * the first operand unchanged if it is a quiet NaN; ; * an invalid operation if it is an infinity. ORRS Rtmp,OP1mlo,OP1mhi,LSL #1 ;Is first operand a NaN? BNE $ConvertNaN1Of2_str ;Use standard exception/quiet NaN ; propagation code if so [ FPEWanted :LOR: FPASCWanted MOV Rtmp,#InvReas_InfRemX B InvalidOperation2ForSDE | ORR OP1sue,OP1sue,#IVO_bits IF Interworking :LOR: Thumbing BX LR ELSE MOV PC,LR ENDIF ] Rem_NaNInf2Only ; The first operand is not a NaN or infinity, the second is. The result is: ; * an invalid operation exception if the second operand is a signalling ; NaN; ; * the second operand unchanged if it is a quiet NaN; ; * equal to the first operand if the second operand is an infinity. ORRS Rtmp,OP2mlo,OP2mhi,LSL #1 ;Is second operand a NaN? BNE $ConvertNaN2Of2_str ;Use standard exception/quiet NaN ; propagation code if so Rem_FirstOperand ; If the first operand is common, life is easy. TST OP1sue,#Uncommon_bit ANDEQ RNDexp,OP1sue,#ToExp_mask ANDEQ OP1sue,OP1sue,#Sign_bit MOVEQ Rarith,#0 IF Interworking :LOR: Thumbing BXEQ LR ELSE MOVEQ PC,LR ENDIF ; If it's uncommon, life is trickier. First check for zeros. ORRS Rarith,OP1mhi,OP1mlo BEQ Rem_FirstOperand_Zero ; The operand is now a denormalised number or extended unnormalised non-zero ; number; it needs conversion to an internal precision number. In the case ; of the extended denormalised and unnormalised numbers, this just requires ; us to normalise them; in the case of the single and double denormalised ; numbers, we need to clear their units bits and add 1 to their exponents ; before we normalise them. ; ; At this stage, we can recognise that the numbers are single or double ; denormalised numbers simply by the fact that they have a units bit of 1: ; all other uncommon numbers with this property are NaNs or infinities and ; have been dealt with already. AND RNDexp,OP1sue,#ToExp_mask ;Extract operand exponent AND OP1sue,OP1sue,#Sign_bit ; and its sign TST OP1mhi,#EIUnits_bit BICNE OP1mhi,OP1mhi,#EIUnits_bit ADDNE RNDexp,RNDexp,#1 MOV Rarith,#0 ;Result is exact. B $NormaliseOp1_str ;NB must be necessary, so no ; point in checking whether ; normalised Rem_FirstOperand_Zero AND OP1sue,OP1sue,#Sign_bit MOV RNDexp,#0 ;We already know OP1mhi, OP1mlo and IF Interworking :LOR: Thumbing BX LR ELSE MOV PC,LR ; Rarith are zero ENDIF ] ;=========================================================================== Prototype_Zero DCD &00000000,&00000000,&00000000 Prototype_Infinity DCD &40007FFF,&00000000,&00000000 ;=========================================================================== [ :DEF: sqrt_s :LOR: FPEWanted :LOR: FPASCWanted ; The second part of the square root routine, which deals with uncommon ; operands. [ FPLibWanted __fp_sqrt_uncommon ] Sqrt_Uncommon ; We have to deal with the square root of an uncommon value. The cases are: ; ; * The square root of a signalling NaN is an invalid operation; ; ; * The square root of a quiet NaN is the NaN itself; ; ; * The square root of plus infinity is plus infinity; ; ; * The square root of minus infinity is an invalid operation; ; ; * The square root of an extended unnormalised zero is a zero of the same ; sign; ; ; * The square roots of denormalised numbers and extended unnormalised ; numbers can be determined by transforming them into normalised numbers ; (possibly with an out-of-range exponent), then using the standard ; square root code above. ; ; So the first thing we do is check for NaNs and infinities - if we find ; one, we'll generate the result by special case code. Note that we check ; for them together, since they have similar bit patterns. TNaNInf Rtmp,OP1sue,OP1mhi ;Rtmp[31] := (op is NaN/inf) BMI Sqrt_NaNInf ; Now if the operand is a zero, the result is a zero of the same sign. We ; can detect zeros by the mantissa being all zero, since only zeros, some ; unnormalised URD results, extended unnormalised zeros and extended ; infinities have this property, we're assuming the operand is not a URD ; result and we've already dealt with extended infinities. ORRS Rtmp,OP1mhi,OP1mlo ANDEQ OP1sue,OP1sue,#Sign_bit BEQ Sqrt_Zero ; The operand is now a denormalised number or extended unnormalised non-zero ; number. If it is negative, we've got an invalid operation. Otherwise, we ; know that no invalid operation or divide-by-zero exception is going to ; occur, so we can convert it to a normalised number, possibly with a ; negative biased exponent. After doing the exponent and sign calculations, ; we then call Sqrt_Mantissa to complete the calculation. ; The types of numbers that require converting are extended unnormalised ; numbers and denormalised numbers of all precisions. In the case of the ; extended denormalised and unnormalised numbers, this just requires us to ; normalise them; in the case of the single and double denormalised numbers, ; we need to clear their units bits and add 1 to their exponents before we ; normalise them. ; At this stage, we can recognise that the numbers are single or double ; denormalised numbers simply by the fact that they have a units bit of 1: ; all other numbers with this property are NaNs or infinities and have ; been dealt with already. AND RNDexp,OP1sue,#ToExp_mask ;Extract operand exponent ANDS OP1sue,OP1sue,#Sign_bit [ FPEWanted :LOR: FPASCWanted MOVNE Rtmp,#InvReas_SqrtNeg BNE InvalidOperation1ForSDE | ORRNE OP1sue,OP1sue,#IVO_bits IF Interworking :LOR: Thumbing BX LR ELSE MOV PC,LR ENDIF ] STMFD Rsp!,{LR} ;We will have subroutine calls below TST OP1mhi,#EIUnits_bit BICNE OP1mhi,OP1mhi,#EIUnits_bit ADDNE RNDexp,RNDexp,#1 BL $NormaliseOp1_str ;NB must be necessary, so no ; point in checking whether ; normalised ADD RNDexp,RNDexp,#EIExp_bias:AND:&FF00 ADD RNDexp,RNDexp,#EIExp_bias:AND:&FF ASSERT (EIExp_bias-1) < &10000 ;Result exponent if mantissa ; overflow is (exp+bias) DIV 2 MOVS RNDexp,RNDexp,LSR #1 LDMFD Rsp!,{LR} B Sqrt_Mantissa Sqrt_Zero ; The result is equal to the operand, which is a zero. MOV RNDexp,#0 ;Clear exponent MOV Rarith,#0 ;And round/sticky bits IF Interworking :LOR: Thumbing BX LR ELSE MOV PC,LR ENDIF Sqrt_NaNInf ; The operand is a NaN or infinity. If it's a NaN, we use the standard ; rules for propagating NaNs. If an infinity, we've got an invalid operation ; if it is negative and a result equal to the standard plus infinity if it ; is positive. ORRS Rtmp,OP1mlo,OP1mhi,LSL #1 ;Is operand a NaN? BNE $ConvertNaN1_str ;Use standard exception/quiet NaN ; propagation code if so TST OP1sue,#Sign_bit [ FPEWanted :LOR: FPASCWanted MOVNE Rtmp,#InvReas_SqrtNeg BNE InvalidOperation1ForSDE ADR OP1sue,Prototype_Infinity LDMIA OP1sue,OP1regs | ORRNE OP1sue,OP1sue,#IVO_bits ADREQ OP1sue,Prototype_Infinity LDMEQIA OP1sue,OP1regs ] IF Interworking :LOR: Thumbing BX LR ELSE MOV PC,LR ENDIF ] ;=========================================================================== [ FPEWanted :LOR: FPASCWanted ; The second entry point to the move/move negated/absolute value routine, ; meant for use by the FPASC. ; This routine will not work correctly with an input which is an ; unnormalised URD result, or an invalid internal format number. ; ; Uses standard monadic operation entry and exit conventions - see top of ; this file. [ FPASCWanted MoveFPASC CDebug3 3,"MoveFPASC: operand =",OP1sue,OP1mhi,OP1mlo ; The FPA does not bounce common values in the Prepare stage for these ; instructions, so no need to check the uncommon bit. ] Move_Uncommon ; Only uncommon values will get here. First split out NaNs and infinities. TNaNInf Rtmp,OP1sue,OP1mhi ;Rtmp[31] := (op is NaN/inf) BMI Move_NaNInf ; The value is an uncommon numeric value - i.e. a denormalised number, an ; extended unnormalised number or an extended unnormalised zero. If it's the ; last of these, change it to a real zero and treat it as a numeric. ORRS Rtmp,OP1mhi,OP1mlo MOVEQ RNDexp,#0 BEQ Move_Numeric ; The operand is now a denormalised number or extended unnormalised non-zero ; number. We will change it into the corresponding normalised number ; (possibly with a negative biased exponent), then treat it as a numeric. ; The types of numbers that require converting are extended unnormalised ; numbers and denormalised numbers of all precisions. In the case of the ; extended denormalised and unnormalised numbers, this just requires us to ; normalise them; in the case of the single and double denormalised numbers, ; we need to clear their units bits and add 1 to their exponents before we ; normalise them. ; At this stage, we can recognise that the numbers are single or double ; denormalised numbers simply by the fact that they have uncommon = units = ; 1: all other numbers with this property are NaNs or infinities and have ; been dealt with already. AND RNDexp,OP1sue,#ToExp_mask ASSERT EIExp_pos = 0 STMFD Rsp!,{LR} ;We will have subroutine calls below ANDS Rarith,OP1mhi,OP1sue,LSL #EIUnits_pos-Uncommon_pos ASSERT EIUnits_pos = 31 BICMI OP1mhi,OP1mhi,#EIUnits_bit ADDMI RNDexp,RNDexp,#1 BL $NormaliseOp1_str ;NB must be necessary, so no ; point in checking whether ; normalised LDMFD Rsp!,{LR} B Move_Numeric Move_NaNInf ; The operand is a NaN or infinity. If it's an infinity, we just want to ; perform the standard sign manipulations on it and return a standard ; infinity. If it's a NaN, we need to pay attention to the implicit IEEE ; format conversion. ORRS Rtmp,OP1mlo,OP1mhi,LSL #1 ;Is operand a NaN? BNE Move_NaN AND Rtmp,OP1sue,#Sign_bit ;Isolate sign TST Rins,#MNF_bit ;Do sign manipulations EORNE Rtmp,Rtmp,#Sign_bit TST Rins,#ABS_bit BICNE Rtmp,Rtmp,#Sign_bit ADR OP1sue,Prototype_Infinity LDMIA OP1sue,OP1regs ORR OP1sue,OP1sue,Rtmp IF Interworking :LOR: Thumbing BX LR ELSE MOV PC,LR ENDIF Move_NaN STMFD Rsp!,{LR} BL NaNConversionNeeded TEQ Rarith,#0 ;Conversion needed? BMI Move_NaN_DoSigns ;Just alter signs if not BL ConvertNaN1_Special ;Do correct NaN conversion IF Interworking :LOR: Thumbing LDMNEFD Rsp!,{LR} ;We're done and must *not* alter ; signs if an invalid operation trap ; occurred BXNE LR ELSE LDMNEFD Rsp!,{PC} ;We're done and must *not* alter ; signs if an invalid operation trap ; occurred ENDIF Move_NaN_DoSigns ; Do the sign manipulations and return. TST Rins,#MNF_bit EORNE OP1sue,OP1sue,#Sign_bit TST Rins,#ABS_bit BICNE OP1sue,OP1sue,#Sign_bit IF Interworking :LOR: Thumbing LDMFD Rsp!,{LR} BX LR ELSE LDMFD Rsp!,{PC} ENDIF ] ;=========================================================================== [ FPEWanted :LOR: FPASCWanted ; The second entry point to the NRM routine, intended for use by the FPASC. ; ; Uses standard monadic operation entry and exit conventions - see top of ; this file. [ FPASCWanted NormFPASC CDebug3 3,"NormFPASC: operand =",OP1sue,OP1mhi,OP1mlo ; The FPA does not bounce common values in the Prepare stage for these ; instructions, so no need to check the uncommon bit. ] Norm_Uncommon ; Only uncommon values will get here. First split out all but NaNs and ; infinities. TNaNInf Rtmp,OP1sue,OP1mhi ;Rtmp[31] := (op is NaN/inf) ANDPL RNDexp,OP1sue,#ToExp_mask BPL Norm_ZeroUnnormOrDenorm NormUrd_NaNInf ; The operand is a NaN or infinity. If it's an infinity, we just want to ; return a standard infinity. If it's a NaN, we use the standard NaN ; propagation code. ORRS Rtmp,OP1mlo,OP1mhi,LSL #1 ;Check for NaNs BNE $ConvertNaN1_str AND Rtmp,OP1sue,#Sign_bit ;Isolate sign ADR OP1sue,Prototype_Infinity LDMIA OP1sue,OP1regs ORR OP1sue,OP1sue,Rtmp IF Interworking :LOR: Thumbing BX LR ELSE MOV PC,LR ENDIF ] ;=========================================================================== [ FPEWanted :LOR: FPASCWanted ; The second entry point to the URD routine, meant for use by the FPASC and ; optimised for uncommon operands. ; This routine will not work correctly with inputs which are unnormalised ; URD results, or with invalid internal format numbers. ; ; Uses standard monadic operation entry and exit conventions - see top of ; this file. [ FPASCWanted UrdFPASC CDebug3 3,"UrdFPASC: operand =",OP1sue,OP1mhi,OP1mlo ; The FPA does not bounce common values in the Prepare stage for these ; instructions, so no need to check the uncommon bit. ] Urd_Uncommon ; Split out NaNs and infinities, which are dealt with in exactly the same ; way as by the NRM instruction. TNaNInf Rtmp,OP1sue,OP1mhi ;Rtmp[31] := (op1 is NaN/inf) BMI NormUrd_NaNInf ; The operand is now known to be a denormalised number or an extended ; precision unnormalised number or zero. We have to take a little care about ; single and double precision denormalised numbers, since their exponents ; and mantissas need correcting. Otherwise, we can just use the standard ; Urd_Numeric routine on them once we have separated the sign and the ; exponent from each other. We can recognise the single and double ; denormalised numbers by the fact that they are the only remaining cases ; with a units bit of 1. AND Rarith,OP1sue,#ToExp_mask ;Extract operand exponent AND OP1sue,OP1sue,#Sign_bit ; and sign TST OP1mhi,#EIUnits_bit BICNE OP1mhi,OP1mhi,#EIUnits_bit ADDNE Rarith,Rarith,#1 B Urd_Numeric ] ;=========================================================================== [ FPEWanted :LOR: FPASCWanted ; The second part of the RND routine, which deals with uncommon operands. Rnd_Uncommon ; Split out NaNs and infinities, which are dealt with in exactly the same ; way as by the NRM instruction. TNaNInf Rtmp,OP1sue,OP1mhi ;Rtmp[31] := (op1 is NaN/inf) BMI NormUrd_NaNInf ; The value is an uncommon numeric value - i.e. a denormalised number, an ; extended unnormalised number or an extended unnormalised zero. If it's the ; last of these, change it to a real zero and treat it as a numeric. ORRS RNDexp,OP1mhi,OP1mlo ANDEQ OP1sue,OP1sue,#Sign_bit BEQ Rnd_Exact ; The operand is now a denormalised number or extended unnormalised non-zero ; number. We will change it into the corresponding normalised number ; (possibly with a negative biased exponent), then treat it as a numeric. ; The types of numbers that require converting are extended unnormalised ; numbers and denormalised numbers of all precisions. In the case of the ; extended denormalised and unnormalised numbers, this just requires us to ; normalise them; in the case of the single and double denormalised numbers, ; we need to clear their units bits and add 1 to their exponents before we ; normalise them. ; At this stage, we can recognise that the numbers are single or double ; denormalised numbers simply by the fact that they have uncommon = units = ; 1: all other numbers with this property are NaNs or infinities and have ; been dealt with already. AND RNDexp,OP1sue,#ToExp_mask AND OP1sue,OP1sue,#Sign_bit ASSERT EIExp_pos = 0 STMFD Rsp!,{LR} ;We will have subroutine calls below TST OP1mhi,#EIUnits_bit BICNE OP1mhi,OP1mhi,#EIUnits_bit ADDNE RNDexp,RNDexp,#1 BL $NormaliseOp1_str ;NB must be necessary, so no ; point in checking whether ; normalised LDMFD Rsp!,{LR} B Rnd_Numeric ] ;=========================================================================== [ :DEF: compare_s :LOR: FPEWanted :LOR: FPASCWanted ; The second entry point to the comparison routine, meant for use by the ; FPASC and without a fast track for common operands. ; This routine will not work correctly with inputs which are unnormalised ; URD results, or with invalid internal format numbers. ; ; Has the same entry and exit conventions as "CompareFPE" above. [ FPASCWanted CompareFPASC CDebug3 3,"CompareFPASC: op1 =",OP1sue,OP1mhi,OP1mlo CDebug3 3," op2 =",OP2sue,OP2mhi,OP2mlo ] Compare_Uncommon ; We have to do a full comparison, since either or both of the operands may ; be uncommon. What we will do is: ; ; (a) Check for NaNs. If found, produce a trap if appropriate, or a result ; of "unordered" otherwise. ; ; (b) If no NaNs, adjust the operands by replacing all infinities by the ; standard extended infinity, and all effectively unnormalised numbers ; by the corresponding normalised or denormalised number. Then call ; Compare_Common, which will work correctly on zeros, denormalised ; numbers, normalised numbers and extended infinities. ; ; So the first thing we do is check for NaNs. This is done by first testing ; for a NaN or infinity (they have similar bit patterns) by a standard ; technique, then checking whether the fraction is non-zero. TNaNInf Rtmp,OP1sue,OP1mhi ;Rtmp[31] := (op1 is NaN/inf) TNaNInf Rtmp2,OP2sue,OP2mhi ;Rtmp2[31] := (op2 is NaN/inf) TST Rtmp,#TopBit ;Operand 1 NaN or infinity? ORRNES Rarith,OP1mlo,OP1mhi,LSL #1 ;If so, is it a NaN? BNE Compare_Unordered TST Rtmp2,#TopBit ;Operand 2 NaN or infinity? ORRNES Rarith,OP2mlo,OP2mhi,LSL #1 ;If so, is it a NaN? BNE Compare_Unordered ; Now we know there are no NaNs and therefore no exceptions - which means we ; no longer need to keep track of exactly what the operands are. We are ; going to massage the operands into a form where we can use the ; Compare_Common routine on them - note that it already works for zeros, ; normalised numbers, extended denormalised numbers and normal extended ; precision infinities. The remaining numbers are the other infinities, the ; extended unnormalised numbers and zeros, and the single and double ; precision denormalised numbers. ; We will first convert all the infinities to a standard extended ; precision infinity, to ensure that they compare equal with each other. Or ; rather, an almost standard one - we will mark the result as common to ; avoid mistaking it for an unnormalised or denormalised number later on. STMFD Rsp!,{LR} ;We're likely to make subroutine calls TST Rtmp,#TopBit ANDNE OP1sue,OP1sue,#Sign_bit ORRNE OP1sue,OP1sue,#&FF ORRNE OP1sue,OP1sue,#&7F00 BICNE OP1mhi,OP1mhi,#EIUnits_bit TST Rtmp2,#TopBit ANDNE OP2sue,OP2sue,#Sign_bit ORRNE OP2sue,OP2sue,#&FF ORRNE OP2sue,OP2sue,#&7F00 BICNE OP2mhi,OP2mhi,#EIUnits_bit ; Now we need to deal with the extended unnormalised numbers and zeros, and ; the single and double denormalised numbers. These basically need ; converting to extended precision normalised or denormalised numbers. In ; the case of the extended unnormalised numbers and zeros, this just ; requires us to normalise them; in the case of the single and double ; denormalised numbers, we need to clear their units bits and add 1 to their ; exponents before we normalise them. ; At this stage, we can recognise that the numbers are single or double ; denormalised numbers simply by the fact that they have uncommon = units = ; 1: all other numbers with this property are NaNs or infinities and have ; been dealt with already. ANDS Rarith,OP1mhi,OP1sue,LSL #EIUnits_pos-Uncommon_pos ASSERT EIUnits_pos = 31 BICMI OP1mhi,OP1mhi,#EIUnits_bit ADDMI OP1sue,OP1sue,#1:SHL:EIExp_pos ANDS Rarith,OP2mhi,OP2sue,LSL #EIUnits_pos-Uncommon_pos ASSERT EIUnits_pos = 31 BICMI OP2mhi,OP2mhi,#EIUnits_bit ADDMI OP2sue,OP2sue,#1:SHL:EIExp_pos ; Now we need to normalise all these types of numbers, which now means all ; uncommon numbers except those with exponent 0 (which are extended ; precision denormalised numbers and should be left alone). TST OP1sue,#Uncommon_bit Exp2Top Rarith,OP1sue,NE,S ;Complete test & set up for call BLNE $NormDenormOp1_str TST OP2sue,#Uncommon_bit Exp2Top Rarith,OP2sue,NE,S ;Complete test & set up for call BLNE $NormDenormOp2_str ; And now we can compare the results as though they were common numbers. LDMFD Rsp!,{LR} B Compare_Common Compare_Unordered ; The result is definitely unordered. We need to choose the correct result. TST Rfpsr,#AC_bit MOVEQ Rarith,#Comp_Un_Orig MOVNE Rarith,#Comp_Un_Alt ; Now we need to know whether there's an IEEE exception - there is one if ; either operand is a signalling NaN, or if the instruction is CMFE or CNFE. ; Note that the top bits of Rtmp and Rtmp2 are still NaN/infinity flags for ; the two operands. TST Rtmp,#TopBit ;Is operand 1 a NaN? ORRNES Rtmp,OP1mlo,OP1mhi,LSL #1 BEQ Compare_Unordered_Op1NotNaN ;If not, operand 2 must be ANDS Rtmp,OP1mhi,#EIFracTop_bit ;If so, is it signalling? [ FPLibWanted MOVEQ Rarith,#IVO_bits IF Interworking :LOR: Thumbing BXEQ LR ELSE MOVEQ PC,LR ENDIF | BEQ InvalidOperation2ForI ; (invalid operation if so) ASSERT InvReas_SigNaN = 0 ] TST Rtmp2,#TopBit ;Is operand 2 a NaN? ORRNES Rtmp,OP2mlo,OP2mhi,LSL #1 [ FPEWanted :LOR: FPASCWanted BEQ Compare_Unordered_Op2NotNaN ;Branch if not | IF Interworking :LOR: Thumbing BXEQ LR ELSE MOVEQ PC,LR ENDIF ] Compare_Unordered_Op1NotNaN ANDS Rtmp,OP2mhi,#EIFracTop_bit ;If so, is it signalling? [ FPLibWanted MOVEQ Rarith,#IVO_bits IF Interworking :LOR: Thumbing BX LR ELSE MOV PC,LR ENDIF | BEQ InvalidOperation2ForI ; (invalid operation if so) ASSERT InvReas_SigNaN = 0 ] [ FPEWanted :LOR: FPASCWanted Compare_Unordered_Op2NotNaN TST Rins,#CompExc_bit ;Is instruction CMFE/CNFE? IF Interworking :LOR: Thumbing BXEQ LR ELSE MOVEQ PC,LR ;If not, no exception ENDIF MOV Rtmp,#InvReas_CompQNaN ;Otherwise, invalid op B InvalidOperation2ForI ] ] ;=========================================================================== [ FPEWanted :LOR: FPASCWanted :LOR: :DEF: fix_s :LOR: :DEF: fixu_s ; The second entry point to the FIX routine, meant for use by the FPASC and ; optimised for uncommon operands. ; This routine will not work correctly with inputs which are unnormalised ; URD results, or with invalid internal format numbers. ; ; Has the same entry and exit conventions as "FixFPE" above. [ FPASCWanted FixFPASC CDebug3 3,"FixFPASC: operand =",OP1sue,OP1mhi,OP1mlo ; Start by splitting between common and uncommon operands. TST OP1sue,#Uncommon_bit BEQ Fix_Common ] [ :DEF: fix_s __fp_fix_uncommon ] [ :DEF: fixu_s __fp_fixu_uncommon ] Fix_Uncommon ; NaNs and infinities will produce invalid operation exceptions, with the ; precise nature of the exception depending on whether the operand is a ; signalling NaN, a quiet NaN or an infinity. TNaNInf Rtmp,OP1sue,OP1mhi ;Rtmp[31] := (op1 is NaN/inf) BMI Fix_NaNInf ; The operand is now known to be a denormalised number or an extended ; precision unnormalised number or zero. We have to take a little care about ; single and double precision denormalised numbers, since their exponents ; and mantissas need correcting. Otherwise, we can just use the standard ; Fix_Numeric routine on them once we have separated the sign and the ; exponent from each other. We can recognise the single and double ; denormalised numbers by the fact that they are the only remaining cases ; with a units bit of 1. AND Rarith,OP1sue,#ToExp_mask ;Extract operand exponent [ :LNOT: :DEF: fixu_s AND OP1sue,OP1sue,#Sign_bit ; and sign ] TST OP1mhi,#EIUnits_bit BICNE OP1mhi,OP1mhi,#EIUnits_bit ADDNE Rarith,Rarith,#1 B Fix_Numeric Fix_NaNInf ; All of these produce an invalid operation exception, with the reason being ; InvReas_SigNaN for signalling NaNs, InvReas_FixQNaN for quiet NaNs and ; InvReas_FixInf for infinities. [ FPEWanted :LOR: FPASCWanted TST OP1mhi,#EIFracTop_bit MOVEQ Rtmp,#InvReas_SigNaN MOVNE Rtmp,#InvReas_FixQNaN ORRS Rarith,OP1mlo,OP1mhi,LSL #1 MOVEQ Rtmp,#InvReas_FixInf MOV Rarith,#TopBit ;Some sort of integer result B InvalidOperation1ForI | MOV OP1sue,#IVO_bits IF Interworking :LOR: Thumbing BX LR ELSE MOV PC,LR ENDIF ] ] ;=========================================================================== END