singrdk/base/Kernel/Native/arm/Crt/arith.asm

5614 lines
210 KiB
NASM
Raw Normal View History

2008-11-17 18:29:00 -05:00
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;;
;;; Microsoft Research Singularity
;;;
;;; Copyright (c) Microsoft Corporation. All rights reserved.
;;;
;;; This file contains ARM-specific assembly code.
;;;
; arith.s
;
; Copyright (C) Advanced RISC Machines Limited, 1994. All rights reserved.
;
; RCS Revision: 1
; Checkin Date: 2007/06/29 02:59:16
; Revising Author
; > coresrc.s.arith
;
; Assembler source for FPA support code and emulator
; ==================================================
; Routines to do arithmetic.
;
; These routines work on numbers in the standard internal format.
;===========================================================================
GBLS NormaliseOp1_str
GBLS NormaliseOp1Neg_str
GBLS NormaliseOp2_str
GBLS NormDenormOp1_str
GBLS NormDenormOp2_str
GBLS ConvertNaNs_str
GBLS ConvertNaN1_str
GBLS ConvertNaN1Of2_str
GBLS ConvertNaN2Of2_str
GBLL FPLibWanted
[ FPEWanted :LOR: FPASCWanted
NormaliseOp1_str SETS "NormaliseOp1"
NormaliseOp1Neg_str SETS "NormaliseOp1Neg"
NormaliseOp2_str SETS "NormaliseOp2"
NormDenormOp1_str SETS "NormDenormOp1"
NormDenormOp2_str SETS "NormDenormOp2"
ConvertNaNs_str SETS "ConvertNaNs"
ConvertNaN1_str SETS "ConvertNaN1"
ConvertNaN1Of2_str SETS "ConvertNaN1Of2"
ConvertNaN2Of2_str SETS "ConvertNaN2Of2"
FPLibWanted SETL {FALSE}
|
NormaliseOp1_str SETS "__fp_normalise_op1"
NormaliseOp1Neg_str SETS "__fp_normalise_op1neg"
NormaliseOp2_str SETS "__fp_normalise_op2"
NormDenormOp1_str SETS "__fp_norm_denorm_op1"
NormDenormOp2_str SETS "__fp_norm_denorm_op2"
ConvertNaNs_str SETS "__fp_convert_NaNs"
ConvertNaN1_str SETS "__fp_convert_NaN1"
ConvertNaN1Of2_str SETS "__fp_convert_NaN_1Of2"
ConvertNaN2Of2_str SETS "__fp_convert_NaN_2Of2"
FPLibWanted SETL {TRUE}
[ :LNOT: :DEF: normalise_s
IMPORT $NormaliseOp1_str
IMPORT $NormaliseOp1Neg_str
IMPORT $NormaliseOp2_str
IMPORT $NormDenormOp1_str
IMPORT $NormDenormOp2_str
IMPORT $ConvertNaNs_str
IMPORT $ConvertNaN1_str
IMPORT $ConvertNaN1Of2_str
IMPORT $ConvertNaN2Of2_str
]
]
[ :DEF: normalise_s :LOR: FPEWanted :LOR: FPASCWanted
; Many of these routines use some standard entry and exit conventions. There
; are two such sets of conventions:
;
; STANDARD MONADIC OPERATION ENTRY AND EXIT
; -----------------------------------------
;
; Entry: OP1sue = Operand sign, uncommon, exponent;
; OP1mhi = Operand mantissa, high word;
; OP1mlo = Operand mantissa, low word;
; Rfpsr = FPSR;
; Rins = instruction (may be needed to determine the exact
; operation and/or for traps);
; Rwp, Rfp, Rsp hold their usual values;
; R14 = return link.
; Exit: OP1sue = the result's sign and uncommon bit; the remaining bits are
; zero if the uncommon bit is 0, and set correctly for the final
; result if the uncommon bit is 1;
; OP1mhi, OP1mlo = the result's mantissa;
; RNDexp (= OP2sue) = if the uncommon bit is 0, the result exponent,
; which may be negative; otherwise corrupt;
; Rarith is corrupt if the uncommon bit is 1; otherwise, if the
; destination precision is extended, it holds the round bit (in bit
; 31) and the sticky bit (in bits 30:0), and if the destination
; precision is single or double, it holds part of the sticky bit
; (the remainder of which is held in bits below the round bit in
; OP1mhi and OP1mlo);
; OP2mhi, OP2mlo, Rtmp, Rtmp2 and R14 may be corrupt;
; Rfpsr may be updated;
; All other registers preserved.
;
; STANDARD DYADIC OPERATION ENTRY AND EXIT
; ----------------------------------------
;
; Entry: OP1sue = First operand sign, uncommon, exponent;
; OP1mhi = First operand mantissa, high word;
; OP1mlo = First operand mantissa, low word;
; OP2sue = Second operand sign, uncommon, exponent;
; OP2mhi = Second operand mantissa, high word;
; OP2mlo = Second operand mantissa, low word;
; Rfpsr = FPSR;
; Rins = instruction (may be needed to determine the exact
; operation and/or for traps);
; Rwp, Rfp, Rsp hold their usual values;
; R14 = return link.
; Exit: OP1sue = the result's sign and uncommon bit; the remaining bits are
; zero if the uncommon bit is 0, and set correctly for the final
; result if the uncommon bit is 1;
; OP1mhi, OP1mlo = the result's mantissa;
; RNDexp (= OP2sue) = if the uncommon bit is 0, the result exponent,
; which may be negative; otherwise corrupt;
; Rarith is corrupt if the uncommon bit is 1; otherwise, if the
; destination precision is extended, it holds the round bit (in bit
; 31) and the sticky bit (in bits 30:0), and if the destination
; precision is single or double, it holds part of the sticky bit
; (the remainder of which is held in bits below the round bit in
; OP1mhi and OP1mlo);
; OP2mhi, OP2mlo, Rtmp, Rtmp2 and R14 may be corrupt;
; Rfpsr may be updated;
; All other registers preserved.
;
; In both sets of conventions, the routine called is free to produce an
; incorrect result mantissa and rounding information, as long as it knows
; that it will in fact be rounded to the correct value.
;===========================================================================
; Routine to normalise the first or only operand. The biased exponent won't
; be taken below 0: instead, the number will be denormalised if normalising
; it would cause this to happen. Note that the result will never be marked
; as uncommon: any caller of this routine must deal with this itself if
; necessary.
; Entry: OP1sue = First operand sign, remaining bits junk;
; OP1mhi, OP1mlo = First operand mantissa;
; Rarith = First operand exponent, shifted to be left aligned in the
; word;
; Rwp, Rfp, Rsp contain their usual values;
; R14 is the return link.
; Exit: OP1sue = First operand sign and exponent (uncommon is always 0);
; OP1mhi, OP1mlo updated;
; Rarith, Rtmp, Rtmp2 and R14 may be corrupt;
; All other registers preserved.
$NormDenormOp1_str
; Clear out the junk bits in OP1sue.
AND OP1sue,OP1sue,#Sign_bit
; Do we have to normalise by 32 bits or more?
TEQ OP1mhi,#0
BEQ NormDenormOp1_LongShift
; If not, find out how much we do have to shift by.
MOV Rtmp,#0 ;Accumulate shift amount in Rtmp
MOVS Rtmp2,OP1mhi,LSR #16
MOVEQ OP1mhi,OP1mhi,LSL #16
ADDEQ Rtmp,Rtmp,#16
MOVS Rtmp2,OP1mhi,LSR #24
MOVEQ OP1mhi,OP1mhi,LSL #8
ADDEQ Rtmp,Rtmp,#8
MOVS Rtmp2,OP1mhi,LSR #28
MOVEQ OP1mhi,OP1mhi,LSL #4
ADDEQ Rtmp,Rtmp,#4
MOVS Rtmp2,OP1mhi,LSR #30
MOVEQ OP1mhi,OP1mhi,LSL #2
ADDEQ Rtmp,Rtmp,#2
MOVS Rtmp2,OP1mhi,LSR #31
MOVEQ OP1mhi,OP1mhi,LSL #1
ADDEQ Rtmp,Rtmp,#1
; Have we shifted too far? - i.e. by more than the exponent? If so, go back
; the excess distance. Then complete the shift - i.e. convert the single
; word shift into a two word shift - adjust the exponent if the exponent was
; greater than the shift amount (otherwise we leave it zero) and return.
SUBS Rtmp2,Rtmp,Rarith,LSR #32-EIExp_len ;Shift amt. - exp.
MOVHI OP1mhi,OP1mhi,LSR Rtmp2
MOVHI Rtmp,Rarith,LSR #32-EIExp_len
RSB Rarith,Rtmp,#32
ORR OP1mhi,OP1mhi,OP1mlo,LSR Rarith
MOV OP1mlo,OP1mlo,LSL Rtmp
SUBLO OP1sue,OP1sue,Rtmp2,LSL #EIExp_pos ;ADD exp.-shift amt.
IF Interworking :LOR: Thumbing
BX LR
ELSE
MOV PC, LR
ENDIF
NormDenormOp1_LongShift
; The top word is zero, so we need to shift by 32 bits or more. Or do we? -
; if the exponent is less than 32, we simply need to shift by the exponent.
CMP Rarith,#32:SHL:(32-EIExp_len)
BLO NormDenormOp1_ByExponent
; Now check the bottom word: if it is also zero, we simply need to
; denormalise to exponent 0.
MOVS OP1mhi,OP1mlo
IF Interworking :LOR: Thumbing
BXEQ LR
ELSE
MOVEQ PC,LR ;OP1sue/mhi/mlo are all already correct!
ENDIF
MOV OP1mlo,#0
; The bottom word is non-zero, so we have a shift amount in the range 32-63.
MOV Rtmp,#32
MOVS Rtmp2,OP1mhi,LSR #16
MOVEQ OP1mhi,OP1mhi,LSL #16
ADDEQ Rtmp,Rtmp,#16
MOVS Rtmp2,OP1mhi,LSR #24
MOVEQ OP1mhi,OP1mhi,LSL #8
ADDEQ Rtmp,Rtmp,#8
MOVS Rtmp2,OP1mhi,LSR #28
MOVEQ OP1mhi,OP1mhi,LSL #4
ADDEQ Rtmp,Rtmp,#4
MOVS Rtmp2,OP1mhi,LSR #30
MOVEQ OP1mhi,OP1mhi,LSL #2
ADDEQ Rtmp,Rtmp,#2
MOVS Rtmp2,OP1mhi,LSR #31
MOVEQ OP1mhi,OP1mhi,LSL #1
ADDEQ Rtmp,Rtmp,#1
; Have we shifted too far? - i.e. by more than the exponent? If so, go back
; the excess distance. Note that this cannot require us to undo the shift
; from the bottom word to the top word, since we know the exponent was at
; least 32.
; So we need to backshift if shift amount > exponent, and create a
; non-zero exponent if shift amount < exponent.
SUBS Rtmp2,Rtmp,Rarith,LSR #32-EIExp_len ;Shift amt. - exp.
MOVHI OP1mhi,OP1mhi,LSR Rtmp2
SUBLO OP1sue,OP1sue,Rtmp2,LSL #EIExp_pos ;ADD exp.-shift amt.
IF Interworking :LOR: Thumbing
BX LR
ELSE
MOV PC,LR
ENDIF
NormDenormOp1_ByExponent
; We need to shift the mantissa left by the exponent, which is guaranteed to
; be less than 32, and to return a zero exponent (note that OP1sue is
; already set up for this).
MOV Rtmp,Rarith,LSR #32-EIExp_len
RSB Rtmp2,Rtmp,#32
MOV OP1mhi,OP1mhi,LSL Rtmp
ORR OP1mhi,OP1mhi,OP1mlo,LSR Rtmp2
MOV OP1mlo,OP1mlo,LSL Rtmp
IF Interworking :LOR: Thumbing
BX LR
ELSE
MOV PC,LR
ENDIF
;===========================================================================
; Routine to normalise the second operand. The biased exponent won't be
; taken below 0: instead, the number will be denormalised if normalising it
; would cause this to happen. Note that the result will never be marked
; as uncommon: any caller of this routine must deal with this itself if
; necessary.
; Entry: OP2sue = Second operand sign, remaining bits junk;
; OP2mhi, OP2mlo = Second operand mantissa;
; Rarith = Second operand exponent, shifted to be left aligned in the
; word;
; Rwp, Rfp, Rsp contain their usual values;
; R14 is the return link.
; Exit: OP2sue = Second operand sign and exponent (uncommon is always 0);
; OP2mhi, OP2mlo updated;
; Rarith, Rtmp, Rtmp2 and R14 may be corrupt;
; All other registers preserved.
$NormDenormOp2_str
; Clear out the junk bits in OP2sue.
AND OP2sue,OP2sue,#Sign_bit
; Do we have to normalise by 32 bits or more?
TEQ OP2mhi,#0
BEQ NormDenormOp2_LongShift
; If not, find out how much we do have to shift by.
MOV Rtmp,#0 ;Accumulate shift amount in Rtmp
MOVS Rtmp2,OP2mhi,LSR #16
MOVEQ OP2mhi,OP2mhi,LSL #16
ADDEQ Rtmp,Rtmp,#16
MOVS Rtmp2,OP2mhi,LSR #24
MOVEQ OP2mhi,OP2mhi,LSL #8
ADDEQ Rtmp,Rtmp,#8
MOVS Rtmp2,OP2mhi,LSR #28
MOVEQ OP2mhi,OP2mhi,LSL #4
ADDEQ Rtmp,Rtmp,#4
MOVS Rtmp2,OP2mhi,LSR #30
MOVEQ OP2mhi,OP2mhi,LSL #2
ADDEQ Rtmp,Rtmp,#2
MOVS Rtmp2,OP2mhi,LSR #31
MOVEQ OP2mhi,OP2mhi,LSL #1
ADDEQ Rtmp,Rtmp,#1
; Have we shifted too far? - i.e. by more than the exponent? If so, go back
; the excess distance. Then complete the shift - i.e. convert the single
; word shift into a two word shift - adjust the exponent if the exponent was
; greater than the shift amount (otherwise we leave it zero) and return.
SUBS Rtmp2,Rtmp,Rarith,LSR #32-EIExp_len ;Shift amt. - exp.
MOVHI OP2mhi,OP2mhi,LSR Rtmp2
MOVHI Rtmp,Rarith,LSR #32-EIExp_len
RSB Rarith,Rtmp,#32
ORR OP2mhi,OP2mhi,OP2mlo,LSR Rarith
MOV OP2mlo,OP2mlo,LSL Rtmp
SUBLO OP2sue,OP2sue,Rtmp2,LSL #EIExp_pos ;ADD exp.-shift amt.
IF Interworking :LOR: Thumbing
BX LR
ELSE
MOV PC,LR
ENDIF
NormDenormOp2_LongShift
; The top word is zero, so we need to shift by 32 bits or more. Or do we? -
; if the exponent is less than 32, we simply need to shift by the exponent.
CMP Rarith,#32:SHL:(32-EIExp_len)
BLO NormDenormOp2_ByExponent
; Now check the bottom word: if it is also zero, we simply need to
; denormalise to exponent 0.
MOVS OP2mhi,OP2mlo
IF Interworking :LOR: Thumbing
BXEQ LR
ELSE
MOVEQ PC,LR ;OP2sue/mhi/mlo are all already correct!
ENDIF
MOV OP2mlo,#0
; The bottom word is non-zero, so we have a shift amount in the range 32-63.
MOV Rtmp,#32
MOVS Rtmp2,OP2mhi,LSR #16
MOVEQ OP2mhi,OP2mhi,LSL #16
ADDEQ Rtmp,Rtmp,#16
MOVS Rtmp2,OP2mhi,LSR #24
MOVEQ OP2mhi,OP2mhi,LSL #8
ADDEQ Rtmp,Rtmp,#8
MOVS Rtmp2,OP2mhi,LSR #28
MOVEQ OP2mhi,OP2mhi,LSL #4
ADDEQ Rtmp,Rtmp,#4
MOVS Rtmp2,OP2mhi,LSR #30
MOVEQ OP2mhi,OP2mhi,LSL #2
ADDEQ Rtmp,Rtmp,#2
MOVS Rtmp2,OP2mhi,LSR #31
MOVEQ OP2mhi,OP2mhi,LSL #1
ADDEQ Rtmp,Rtmp,#1
; Have we shifted too far? - i.e. by more than the exponent? If so, go back
; the excess distance. Note that this cannot require us to undo the shift
; from the bottom word to the top word, since we know the exponent was at
; least 32.
; So we need to backshift if shift amount > exponent, and create a
; non-zero exponent if shift amount < exponent.
SUBS Rtmp2,Rtmp,Rarith,LSR #32-EIExp_len ;Shift amt. - exp.
MOVHI OP2mhi,OP2mhi,LSR Rtmp2
SUBLO OP2sue,OP2sue,Rtmp2,LSL #EIExp_pos ;ADD exp.-shift amt.
IF Interworking :LOR: Thumbing
BX LR
ELSE
MOV PC,LR
ENDIF
NormDenormOp2_ByExponent
; We need to shift the mantissa left by the exponent, which is guaranteed to
; be less than 32, and to return a zero exponent (note that OP2sue is
; already set up for this).
MOV Rtmp,Rarith,LSR #32-EIExp_len
RSB Rtmp2,Rtmp,#32
MOV OP2mhi,OP2mhi,LSL Rtmp
ORR OP2mhi,OP2mhi,OP2mlo,LSR Rtmp2
MOV OP2mlo,OP2mlo,LSL Rtmp
IF Interworking :LOR: Thumbing
BX LR
ELSE
MOV PC,LR
ENDIF
;===========================================================================
; Routine to float an integer. To fit in with the usual conventions, the
; entry point is given two labels, namely "FltFPE" and "FltFPASC".
; The value returned is always a numeric value plus associated rounding
; information, with the uncommon bit clear.
; Entry: Rarith = integer;
; Rfpsr = FPSR;
; Rins = instruction (needed for traps);
; Rwp, Rfp, Rsp hold their usual values;
; R14 = return link.
; Exit: OP1sue = the result's sign, with the remaining bits zero;
; OP1mhi, OP1mlo = the result's mantissa;
; RNDexp (= OP2sue) = the result exponent;
; Rarith = 0 (i.e. the appropriate round and sticky information for
; extended precision);
; OP2mhi, OP2mlo, Rtmp, Rtmp2 and R14 may be corrupt;
; Rfpsr may be updated;
; All other registers preserved.
[ FPEWanted
FltFPE
]
[ FPASCWanted
FltFPASC
]
CDebug1 3,"FltFPE/FPASC: operand =",Rarith
; Extract the sign and produce an unnormalised mantissa. In the process,
; detect the special case of a zero operand.
MOV OP1mlo,#0 ;Mantissa low word is always zero
ANDS OP1sue,Rarith,#Sign_bit ;Extract sign
ASSERT Sign_pos = 31
RSBNE OP1mhi,Rarith,#0 ;If -ve, 2's complement the integer
MOVEQS OP1mhi,Rarith ;If +ve, copy and check for zero
MOVEQ RNDexp,#0 ;If zero, result exponent is zero
IF Interworking :LOR: Thumbing
BXEQ LR
ELSE
MOVEQ PC,LR ; and return (Rarith is already 0)
ENDIF
; If non-zero, set the approriate exponent and rounding information, then
; fall through into NormaliseOp1 to complete the job.
MOV RNDexp,#(EIExp_bias+31):AND:&FF00
ORR RNDexp,RNDexp,#(EIExp_bias+31):AND:&FF
ASSERT (EIExp_bias+31) <= &FFFF
MOV Rarith,#0
; Fall through to NormaliseOp1
;===========================================================================
; NB it is possible to fall through into this routine.
; Routine to normalise the result or first operand. Unlike the two routines
; above, this routine will normalise the exponent to a value less than zero
; if necessary, and it won't put the exponent back into OP1sue. Note that
; the result will never be marked as uncommon: any caller of this routine
; must deal with this itself if necessary.
; Entry: OP1mhi, OP1mlo = Result/first operand mantissa, which must not be
; all zero;
; RNDexp = Result/first operand exponent (in normal position in
; word);
; Rwp, Rfp, Rsp contain their usual values;
; R14 is the return link.
; Exit: OP1mhi, OP1mlo and RNDexp updated;
; Rtmp, Rtmp2 and R14 may be corrupt;
; All other registers preserved;
; NE condition is true.
$NormaliseOp1_str
TEQ OP1mhi,#0 ;Do full word shift if
MOVEQ OP1mhi,OP1mlo ; necessary
MOVEQ OP1mlo,#0
SUBEQ RNDexp,RNDexp,#32
MOV Rtmp,#0 ;Counter for rest of shift
MOVS Rtmp2,OP1mhi,LSR #16 ;Shift top word by 16 if
MOVEQ OP1mhi,OP1mhi,LSL #16 ; necessary
ADDEQ Rtmp,Rtmp,#16
MOVS Rtmp2,OP1mhi,LSR #24 ;Shift top word by 8 if
MOVEQ OP1mhi,OP1mhi,LSL #8 ; necessary
ADDEQ Rtmp,Rtmp,#8
MOVS Rtmp2,OP1mhi,LSR #28 ;Shift top word by 4 if
MOVEQ OP1mhi,OP1mhi,LSL #4 ; necessary
ADDEQ Rtmp,Rtmp,#4
MOVS Rtmp2,OP1mhi,LSR #30 ;Shift top word by 2 if
MOVEQ OP1mhi,OP1mhi,LSL #2 ; necessary
ADDEQ Rtmp,Rtmp,#2
MOVS Rtmp2,OP1mhi,LSR #31 ;Shift top word by 1 if
MOVEQ OP1mhi,OP1mhi,LSL #1 ; necessary
ADDEQ Rtmp,Rtmp,#1
RSBS Rtmp2,Rtmp,#32 ;Shift the bottom word by
ORR OP1mhi,OP1mhi,OP1mlo,LSR Rtmp2 ; the same amount and set NE
MOV OP1mlo,OP1mlo,LSL Rtmp
SUB RNDexp,RNDexp,Rtmp ;Adjust exponent by shift
IF Interworking :LOR: Thumbing
BX LR
ELSE
MOV PC,LR ; amount and return
ENDIF
;===========================================================================
; Routine to normalise the second operand. Unlike the two routines above,
; this routine will normalise the exponent to a value less than zero if
; necessary, and it won't put the exponent back into OP1sue. Note that the
; result will never be marked as uncommon: any caller of this routine must
; deal with this itself if necessary.
; Entry: OP2mhi, OP2mlo = Second operand mantissa, which must not be all
; zero;
; RNDexp = Second operand exponent (in normal position in word);
; Rwp, Rfp, Rsp contain their usual values;
; R14 is the return link.
; Exit: OP2mhi, OP2mlo and RNDexp updated;
; Rtmp, Rtmp2 and R14 may be corrupt;
; All other registers preserved;
; NE condition is true.
$NormaliseOp2_str
TEQ OP2mhi,#0 ;Do full word shift if
MOVEQ OP2mhi,OP2mlo ; necessary
MOVEQ OP2mlo,#0
SUBEQ RNDexp,RNDexp,#32
MOV Rtmp,#0 ;Counter for rest of shift
MOVS Rtmp2,OP2mhi,LSR #16 ;Shift top word by 16 if
MOVEQ OP2mhi,OP2mhi,LSL #16 ; necessary
ADDEQ Rtmp,Rtmp,#16
MOVS Rtmp2,OP2mhi,LSR #24 ;Shift top word by 8 if
MOVEQ OP2mhi,OP2mhi,LSL #8 ; necessary
ADDEQ Rtmp,Rtmp,#8
MOVS Rtmp2,OP2mhi,LSR #28 ;Shift top word by 4 if
MOVEQ OP2mhi,OP2mhi,LSL #4 ; necessary
ADDEQ Rtmp,Rtmp,#4
MOVS Rtmp2,OP2mhi,LSR #30 ;Shift top word by 2 if
MOVEQ OP2mhi,OP2mhi,LSL #2 ; necessary
ADDEQ Rtmp,Rtmp,#2
MOVS Rtmp2,OP2mhi,LSR #31 ;Shift top word by 1 if
MOVEQ OP2mhi,OP2mhi,LSL #1 ; necessary
ADDEQ Rtmp,Rtmp,#1
RSBS Rtmp2,Rtmp,#32 ;Shift the bottom word by
ORR OP2mhi,OP2mhi,OP2mlo,LSR Rtmp2 ; the same amount and set NE
MOV OP2mlo,OP2mlo,LSL Rtmp
SUB RNDexp,RNDexp,Rtmp ;Adjust exponent by shift
IF Interworking :LOR: Thumbing
BX LR
ELSE
MOV PC,LR ; amount and return
ENDIF
;===========================================================================
; Routine to normalise the first operand. Like "NormaliseOp1", except that
; it increments the exponent in RNDexp by the shift amount, rather than
; decrementing it.
; Entry: OP1mhi, OP1mlo = Second operand mantissa, which must not be all
; zero;
; RNDexp = Exponent (in normal position in word);
; Rwp, Rfp, Rsp contain their usual values;
; R14 is the return link.
; Exit: OP1mhi, OP1mlo and RNDexp updated;
; Rtmp, Rtmp2 and R14 may be corrupt;
; All other registers preserved;
; NE condition is true.
$NormaliseOp1Neg_str
TEQ OP1mhi,#0 ;Do full word shift if
MOVEQ OP1mhi,OP1mlo ; necessary
MOVEQ OP1mlo,#0
ADDEQ RNDexp,RNDexp,#32
MOV Rtmp,#0 ;Counter for rest of shift
MOVS Rtmp2,OP1mhi,LSR #16 ;Shift top word by 16 if
MOVEQ OP1mhi,OP1mhi,LSL #16 ; necessary
ADDEQ Rtmp,Rtmp,#16
MOVS Rtmp2,OP1mhi,LSR #24 ;Shift top word by 8 if
MOVEQ OP1mhi,OP1mhi,LSL #8 ; necessary
ADDEQ Rtmp,Rtmp,#8
MOVS Rtmp2,OP1mhi,LSR #28 ;Shift top word by 4 if
MOVEQ OP1mhi,OP1mhi,LSL #4 ; necessary
ADDEQ Rtmp,Rtmp,#4
MOVS Rtmp2,OP1mhi,LSR #30 ;Shift top word by 2 if
MOVEQ OP1mhi,OP1mhi,LSL #2 ; necessary
ADDEQ Rtmp,Rtmp,#2
MOVS Rtmp2,OP1mhi,LSR #31 ;Shift top word by 1 if
MOVEQ OP1mhi,OP1mhi,LSL #1 ; necessary
ADDEQ Rtmp,Rtmp,#1
RSBS Rtmp2,Rtmp,#32 ;Shift the bottom word by
ORR OP1mhi,OP1mhi,OP1mlo,LSR Rtmp2 ; the same amount and set NE
MOV OP1mlo,OP1mlo,LSL Rtmp
ADD RNDexp,RNDexp,Rtmp ;Adjust exponent by shift
IF Interworking :LOR: Thumbing
BX LR
ELSE
MOV PC,LR ; amount and return
ENDIF
]
;===========================================================================
[ :DEF: addsub_s :LOR: FPEWanted :LOR: FPASCWanted
; Routine to add, subtract or reverse subtract two internal format floating
; point numbers. It has two entry points: "AddSubFPE", which has an
; optimised fast track for both operands being common, and "AddSubFPASC",
; which avoids the test for this optimised fast track - since it should
; never happen. The second entry point lies a long way down in the source
; to avoid addressing constraints.
; The value returned is either a numeric value plus associated rounding
; information, with the uncommon bit clear, or an infinity or NaN, with the
; uncommon bit set.
; This routine will not work correctly with inputs which are unnormalised
; URD results, or with invalid internal format numbers.
;
; Uses standard dyadic operation entry and exit conventions - see top of
; this file.
ASSERT RNDexp = OP2sue ;We swap over from the use of OP2sue to that
; of RNDexp partway through this routine.
[ FPEWanted
AddSubFPE
CDebug3 3,"AddSubFPE: op1 =",OP1sue,OP1mhi,OP1mlo
CDebug3 3," op2 =",OP2sue,OP2mhi,OP2mlo
; Start by detecting the "fast track" case of both operands being common.
TST OP1sue,#Uncommon_bit
TSTEQ OP2sue,#Uncommon_bit
BNE AddSub_Uncommon
]
[ FPLibWanted
__fp_addsub_common
]
AddSub_Common
STMFD Rsp!,{LR} ;Register needed, and we may get a
; subroutine call
CDebug3 4,"AddSub_Common: op1 =",OP1sue,OP1mhi,OP1mlo
CDebug3 4," op2 =",OP2sue,OP2mhi,OP2mlo
; Both operands are zeros or normalised numbers. We can distinguish between
; them on the basis of the units bit. However, note that the standard
; algorithm for adding/subtracting floating point numbers (i.e. do an
; alignment shift on the one with the smaller exponent, add or subtract the
; mantissas, then do a normalisation shift if necessary) works equally well
; on all of these.
; This entry point is also called from AddSub_Uncommon to add or subtract
; operands which are zeros, normalised numbers or extended denormalised
; numbers. It works perfectly well on such numbers, provided it is
; recognised that the result mantissa may be unnormalised and non-zero.
; Note that we know that the invalid operation and divide-by-zero
; exceptions won't occur - i.e. we don't need to preserve the operands. So
; we start by modifying the signs of the operands for SUF and RSF
; instructions.
[ :LNOT: :DEF: addsub_s
TST Rins,#SubNotAdd_bit ;Is it SUF/RSF, not ADF?
EORNE OP2sue,OP2sue,#Sign_bit ;If so, change op2 sign (assuming SUF)
TST Rins,#RSF_bit ;Is it RSF, not ADF/SUF?
EORNE OP2sue,OP2sue,#Sign_bit ;If so, we shouldn't have changed op2
EORNE OP1sue,OP1sue,#Sign_bit ; sign and should have changed op1 sign
]
; We can consider this to be an addition from now on. Next, we'll deal with
; the basic exponent and sign calculation: the results of this may get
; modified later on.
; This section will leave the prospective sign for the result in OP1sue,
; R14 containing the exclusive-OR of the signs (which determines later
; whether we do a magnitude addition or subtraction), RNDexp equal to the
; first operand exponent and Rarith equal to the exponent difference.
ExpDiff Rtmp,Rarith,OP1sue,OP2sue ;Get difference and op1 exp.
EOR R14,OP1sue,OP2sue ;Make EOR of signs
AND OP1sue,OP1sue,#Sign_bit ;Isolate prospective result sign
MOV RNDexp,Rarith,LSR #32-EIExp_len ;Right-align operand 1 exponent
BHI AddSub_Op2Shift
MOVEQ Rtmp2,Rtmp ;If EQ, Rtmp = Rtmp2 = 0
BEQ AddSub_ShiftDone ; = correct guard/round/sticky
AddSub_Op1Shift
; Operand 1 needs shifting, and so operand 2's exponent is used for the
; result. Rarith currently contains exp1-exp2 = -(shift amount),
; left-aligned.
RSB Rarith,Rtmp,#0 ;Get shift amount = exp2 - exp1
MOV Rarith,Rarith,LSR #32-EIExp_len ;Right-align exponent difference
ADD RNDexp,RNDexp,Rarith ;Resurrect operand 2 exponent
; Now denormalise (OP1mhi,OP1mlo) with a shift amount of Rarith, putting
; op1 guard/round/sticky bits into Rtmp, op2 guard/round/sticky bits into
; Rtmp2.
Denorm OP1mhi,OP1mlo,Rtmp,Rarith,Rtmp2,Rarith
MOV Rtmp2,#0 ;Operand 2 guard/round/sticky
B AddSub_ShiftDone
AddSub_Op2Shift
; Operand 2 needs shifting, and so we've already selected the correct result
; exponent. Furthermore, Rtmp currently contains exp1-exp2 = shift amount,
; left-aligned. So denormalise (OP2mhi,OP2mlo) with a shift amount of Rtmp,
; putting op1 guard/round/sticky bits into Rtmp, op2 guard/round/sticky bits
; into Rtmp2.
MOV Rarith,Rtmp,LSR #32-EIExp_len ;Right-align exponent difference
Denorm OP2mhi,OP2mlo,Rtmp2,Rarith,Rtmp,Rarith
MOV Rtmp,#0 ;Operand 1 guard/round/sticky
AddSub_ShiftDone
; We now have:
; OP1sue: Prospective result sign (= operand 1 sign);
; OP1mhi/OP1mlo: Operand 1 mantissa, possibly shifted;
; RNDexp: Prospective result exponent (= MAX(operand exponents));
; OP2mhi/OP2mlo: Operand 2 mantissa, possibly shifted;
; Rarith: Free;
; Rfpsr: FPSR;
; Rtmp: Operand 1 guard, round and sticky bits;
; Rins: Instruction;
; Rtmp2: Operand 2 guard, round and sticky bits;
; Rwp,Rfp,Rsp: Standard values;
; R14: Sign bit indicates magnitude subtraction/NOT addition;
; Now we need to split according to whether we need to do a magnitude
; addition or a magnitude subtraction.
TST R14,#Sign_bit
BNE AddSub_MagSub
AddSub_MagAdd
; Perform the magnitude addition. Note first that we have no need for a
; guard bit in this case, so we are going to regard the guard/round/sticky
; bits in Rtmp[31/30/29:0] and Rtmp2[31/30/29:0] as simply being
; round/sticky bits in Rtmp[31/30:0] and Rtmp2[31/30:0]. Secondly, note that
; since we know that at least one of Rtmp and Rtmp2 is zero, we can simply
; add these round/sticky bit representations to get the result round/sticky
; representation.
ADDS Rarith,Rtmp,Rtmp2 ;Will not in fact generate C=1
ADCS OP1mlo,OP1mlo,OP2mlo
ADCS OP1mhi,OP1mhi,OP2mhi
; If C=0, we're done. Otherwise, we've got to adjust the exponent, mantissa,
; round and sticky bits.
IF Interworking :LOR: Thumbing
LDMCCFD Rsp!,{LR}
BXCC LR
ELSE
LDMCCFD Rsp!,{PC}
ENDIF
ADD RNDexp,RNDexp,#1
MOVS OP1mhi,OP1mhi,RRX
MOVS OP1mlo,OP1mlo,RRX
ORR Rarith,Rarith,Rarith,LSL #1 ;Sticky receives all of old
MOV Rarith,Rarith,RRX ; round/sticky; round is new
IF Interworking :LOR: Thumbing
LDMFD Rsp!,{LR}
BX LR
ELSE
LDMFD Rsp!,{PC}
ENDIF
AddSub_MagSub
; We need to do a magnitude subtraction of OP2mhi/OP2mlo/Rtmp2 from
; OP1mhi/OP1mlo/Rtmp. The prospective result exponent in RNDexp has been
; made right already, but if the subtraction comes out negative, we will
; have to change the sign of the result. Note we can subtract the
; guard/round/sticky representations in Rtmp and Rtmp2, because we know one
; of them is entirely zero.
SUBS Rarith,Rtmp,Rtmp2
SBCS OP1mlo,OP1mlo,OP2mlo
SBCS OP1mhi,OP1mhi,OP2mhi
; If the subtraction (which was of unsigned numbers) came out negative, we
; need to reverse the sign of the result and 2's complement the mantissa -
; again including the guard/round/sticky part.
BCS AddSub_MagSub_Normalise
EOR OP1sue,OP1sue,#Sign_bit
RSBS Rarith,Rarith,#0
RSCS OP1mlo,OP1mlo,#0
RSC OP1mhi,OP1mhi,#0
AddSub_MagSub_Normalise
; Now we need to normalise the result. This is slightly tricky, because in
; the case of subtracting the largest possible number with one exponent from
; the smallest number of the next exponent (e.g. 1-(1-2^(-64))), the leading
; bit of the result is actually the round bit. We can divide into two cases:
;
; (a) The exponent difference was 0 or 1: in this case, the number may be
; normalised by up to 64 bits, but the current round and sticky bits
; are guaranteed to be 0 - this ensures that the eventual sticky bit
; is guaranteed to be zero, and that the round bit is also zero if a
; non-zero normalisation shift is required;
;
; (b) The exponent difference was 2 or more: in this case, the number can
; be normalised by at most one bit, but the eventual sticky bit may be
; non-zero.
;
; So we will first try to normalise by 1 bit, bringing the guard bit into the
; mantissa if necessary.
TST OP1mhi,#EIUnits_bit ;Already normalised?
IF Interworking :LOR: Thumbing
LDMNEFD Rsp!,{LR} ;Return if so
BXNE LR
ELSE
LDMNEFD Rsp!,{PC} ;Return if so
ENDIF
ADDS Rarith,Rarith,Rarith ;Shift mhi/mlo/guard/round/sticky
ADCS OP1mlo,OP1mlo,OP1mlo ; left by one bit to form new
ADC OP1mhi,OP1mhi,OP1mhi ; mhi/mlo/round/sticky
SUB RNDexp,RNDexp,#1
; If the result is normalised now, we're done. Otherwise, we know that a
; normalisation shift of 1-63 is still required, that the exponent
; difference was 0 or 1, and thus that the new round and sticky bits are
; both zero.
; However, at this point, we need to look out for the case of a magnitude
; subtraction of two equal numbers - for which we need to apply the special
; IEEE sign rule (i.e. -0 if rounding to -infinity, otherwise +0).
TST OP1mhi,#EIUnits_bit ;Normalised now?
IF Interworking :LOR: Thumbing
LDMNEFD Rsp!,{LR} ;Return if so
BXNE LR
ELSE
LDMNEFD Rsp!,{PC} ;Return if so
ENDIF
ORRS LR,OP1mhi,OP1mlo ;Is result zero?
BLNE $NormaliseOp1_str ;If not, complete normalisation
IF Interworking :LOR: Thumbing
LDMNEFD Rsp!,{LR} ; and return (note NormaliseOp1
BXNE LR
ELSE
LDMNEFD Rsp!,{PC} ; and return (note NormaliseOp1
ENDIF
; We know the result is a zero, with sign determined by the rounding mode.
; Everything except the sign and exponent has been correctly set already,
; so we test the rounding mode, set the sign and exponent, and return.
[ :DEF: addsub_s
MOV dOPh, #0
MOV dOPl, #0
ASSERT dOPh = fOP :LOR: dOPl = fOP
; ADD sp,sp,#4 ; Pop link register off the stack
; VReturn
IF Interworking :LOR: Thumbing
LDMFD Rsp!,{LR}
BX LR
ELSE
LDMFD Rsp!,{PC}
ENDIF
|
AND Rtmp,Rins,#RM_mask
TEQ Rtmp,#RM_MinusInf
MOVEQ OP1sue,#Sign_bit
MOVNE OP1sue,#0
MOV RNDexp,#0
IF Interworking :LOR: Thumbing
LDMFD Rsp!,{LR}
BX LR
ELSE
LDMFD Rsp!,{PC}
ENDIF
]
] ; Conditional assembly of AddSub
;===========================================================================
[ :DEF: mul_s :LOR: FPEWanted :LOR: FPASCWanted
; Routine to multiply or fast-multiply two internal format floating point
; numbers. It has two entry points: "MultFPE", which has an optimised fast
; track for both operands being common, and "MultFPASC", which avoids the
; test for this optimised fast track - since it should never happen. The
; second entry point lies a long way down in the source to avoid addressing
; constraints.
; The value returned is either a numeric value plus associated rounding
; information, with the uncommon bit clear, or an infinity or NaN, with the
; uncommon bit set.
; This routine will not work correctly with inputs which are unnormalised
; URD results, or with invalid internal format numbers.
;
; Uses standard dyadic operation entry and exit conventions - see top of
; this file.
ASSERT RNDexp = OP2sue ;We swap over from the use of OP2sue to that
; of RNDexp partway through this routine.
[ FPEWanted
MultFPE
CDebug3 3,"MultFPE: op1 =",OP1sue,OP1mhi,OP1mlo
CDebug3 3," op2 =",OP2sue,OP2mhi,OP2mlo
; Start by detecting the "fast track" case of both operands being common.
TST OP1sue,#Uncommon_bit
TSTEQ OP2sue,#Uncommon_bit
BNE Mult_Uncommon
; If either operand is a zero, the product is a zero. Because the numbers
; are common and assumed not to be unnormalised URD results, we can check
; for zeros by means of the units bits.
ANDS Rtmp,OP1mhi,OP2mhi
ASSERT EIUnits_pos = 31
BPL Mult_Zero
; Both operands may now be assumed to be normalised numbers. Produce the
; result sign and the prospective result exponent.
]
[ :DEF: mul_s :LOR: FPEWanted
[ FPLibWanted
__fp_mult_common
]
AND Rtmp,OP1sue,#ToExp_mask
AND Rtmp2,OP2sue,#ToExp_mask
EOR OP1sue,OP1sue,OP2sue ;Produce result sign
AND OP1sue,OP1sue,#Sign_bit
ADD RNDexp,Rtmp,Rtmp2
SUB RNDexp,RNDexp,#(EIExp_bias-1):AND:&FF00
SUB RNDexp,RNDexp,#(EIExp_bias-1):AND:&FF
ASSERT (EIExp_bias-1) < &10000 ;Result exponent if mantissa
; overflow is exp1+exp2-bias+1
]
; This subsidiary entry point deals with multiplying two normalised
; mantissas together and adjusting the exponent if necessary.
; Entry: OP1sue = the result's sign, with an uncommon bit of 0 - the
; remaining bits are zero;
; OP1mhi = First operand mantissa, high word;
; OP1mlo = First operand mantissa, low word;
; RNDexp = Prospective result exponent, which may be negative; this
; needs to be decremented if mantissa overflow doesn't occur;
; OP2mhi = Second operand mantissa, high word;
; OP2mlo = Second operand mantissa, low word;
; Rins = instruction (may be needed to discriminate between MUF and
; FML);
; Rwp, Rfp, Rsp hold their usual values;
; R14 = return link.
; Exit: OP1sue = the result's sign, with an uncommon bit of 0; the
; remaining bits are zero;
; OP1mhi, OP1mlo = the result's mantissa;
; RNDexp = the result exponent, which may be negative;
; Rarith holds the round bit (in bit 31) and the sticky bit (in bits
; 30:0) if the destination precision is extended; if the
; destination precision is single or double, it holds part of the
; sticky bit (the remainder of which is held in bits below the
; round bit in OP1mhi and OP1mlo);
; OP2mhi, OP2mlo, Rtmp, Rtmp2 and R14 may be corrupt;
; All other registers preserved.
Mult_Mantissas
; We will split into various lines, depending on the operands:
;
; if ((OP1mlo = 0) AND (OP2mlo = 0))
; do 32x32->64 multiplication of OP1mhi by OP2mhi;
; if ((OP1mlo = 0) AND (OP2mlo != 0))
; do 32x64->96 multiplication of OP1mhi by (OP2mhi,OP2mlo);
; if ((OP1mlo != 0) AND (OP2mlo = 0))
; do 64x32->96 multiplication of (OP1mhi,OP1mlo) by OP2mhi;
; if ((OP1mlo != 0) AND (OP2mlo != 0))
; do 64x32->128 multiplication of (OP1mhi,OP1mlo) by (OP2mhi,OP2mlo);
;
; In each case, this is then followed by code to deal with the case of no
; mantissa overflow (i.e. the top bit of the product was zero) and to create
; the round and sticky bits.
;
; This is all designed to make multiplications involving single precision
; numbers, immediate constants and/or FLTed integers as efficient as
; possible.
;
; If the instruction is an FML, we simply assume that both mantissa low
; words are zero.
[ FPEWanted
TST Rins,#Fast_bit
BNE Mult_32x32
]
TEQ OP1mlo,#0
BEQ Mult_32xX
Mult_64xX
TEQ OP2mlo,#0
BEQ Mult_64x32
Mult_64x64
STMFD Rsp!,{OP1sue,Rfpsr,Rins,LR}
; We do this multiplication by applying the trick (described in Knuth
; section 4.3.3) for reducing the obvious algorithm involving four 32x32
; multiplications to just three plus some additions and sign manipulations,
; by means of the formula:
;
; (a1*2^32 + a0) * (b1*2^32 + b0)
; = a1*b1*(2^64+2^32) + (a1-a0)*(b0-b1)*2^32 + a0*b0*(2^32+1)
;
; This has to be done carefully: the a1*b1 and a0*b0 multiplications are
; straightforward 32x32 multiplications, but each of a1-a0 and b0-b1 is in
; the range -2^32+1 < x < 2^32-1. To see what effect this has, we need to
; look at what we will get if we simply do the a1-a0 and b0-b1 subtractions,
; then multiply the results as unsigned numbers:
;
; (A) If a1-a0 >= 0, b0-b1 >= 0:
; product obtained = (a1-a0)*(b0-b1)
;
; (B) If a1-a0 >= 0, b0-b1 < 0:
; product obtained = (a1-a0)*(b0-b1+2^32)
; = (a1-a0)*(b0-b1) + (a1-a0)*2^32
;
; (C) If a1-a0 < 0, b0-b1 >= 0:
; product obtained = (a1-a0+2^32)*(b0-b1)
; = (a1-a0)*(b0-b1) + (b0-b1)*2^32
;
; (D) If a1-a0 < 0, b0-b1 < 0:
; product obtained = (a1-a0+2^32)*(b0-b1+2^32)
; = (a1-a0)*(b0-b1) + ((a1-a0)+(b0-b1))*2^32 + 2^64
; = (a1-a0)*(b0-b1)
; + ((a1-a0+2^32) + (b0-b1+2^32))*2^32 - 2^64
;
; So to get the real value of (a1-a0)*(b0-b1), we must look at the signs of
; a1-a0 and b0-b1: if a1-a0 is in fact negative, we must subtract the
; calculated value of b0-b1 from the high word of the calculated product; if
; b0-b1 is in fact negative, we must subtract the calculated value of a1-a0
; from the high word of the calculated product; and finally we must add 2^64
; if both were negative.
;
; This last step is awkward. However, note that (a1-a0)*(b0-b1) is actually
; guaranteed to lie in the range -2^64 < x < 2^64, which means that it is
; sufficient to calculate its value modulo 2^64 (i.e. disregarding carries
; out of the high word and the possible addition of 2^64), provided we take
; care to get the sign word right.
;
; We do the 32x32 multiplications by means of standard macros. First
; multiply a1*b1 = OP1mhi*OP2mhi into (OP1sue,Rfpsr).
Split16 OP1sue,Rfpsr,OP1mhi
Mul64 OP1sue,Rfpsr,OP1sue,Rfpsr,OP2mhi,,,Rarith,Rtmp,Rtmp2
; Multiply a0*b0 = OP1mlo*OP2mlo into (Rins,R14).
Split16 Rins,R14,OP1mlo
Mul64 Rins,R14,Rins,R14,OP2mlo,,,Rarith,Rtmp,Rtmp2
; Next, we need to calculate a1*b1*(2^64+2^32) + a0*b0*(2^32+1)
;
; = (2^32+1) * (a1*b1*2^32 + a0*b0)
;
; Note that a1*b1*2^32 + a0*b0 <= (2^32-1)*(2^32-1)*(2^32+1)
; = (2^32-1)*(2^64-1) < 2^96 and that (2^32+1) * (a1*b1*2^32 + a0*b0)
; <= (2^32+1)*(2^32-1)*(2^32-1)*(2^32+1) = (2^64-1)^2 < 2^128, so the
; calculations can be done respectively in 3- and 4-word unsigned
; arithmetic.
ADDS Rfpsr,Rfpsr,Rins ;Put a1*b1*2^32 + a0*b0 into
ADC OP1sue,OP1sue,#0 ; (OP1sue,Rfpsr,R14)
ADDS Rins,Rfpsr,R14 ;Then multiply by 2^32+1, putting
ADCS Rfpsr,Rfpsr,OP1sue ; result in (OP1sue,Rfpsr,Rins,R14)
ADC OP1sue,OP1sue,#0
; Calculate a1-a0 = OP1mhi-OP1mlo into Rtmp,
; b0-b1 = OP2mlo-OP2mhi into Rtmp2,
; addend to high word of calculated (a1-a0)*(b0-b1) product into
; Rarith, and
; correct sign of (a1-a0)*(b0-b1) product into OP1mhi.
; The sign word is 0 for a positive or zero result, &FFFFFFFF for a negative
; result - i.e. it is the word which, when prefixed to the 64-bit product
; calculated otherwise, gives us the true result as a 96-bit signed number.
; Getting this right is slightly tricky, because of the possibilities of
; a1-a0 and b0-b1 being zero and thus invalidating the usual EOR rule about
; the sign. The key to the code below is that if Rtmp = a1-a0 comes out as
; 0, OP1mhi and OP1mlo come out as zero and Rtmp2 never gets set - but this
; last doesn't matter, since zero times anything is zero!
; Note also that we don't care about carries out of the addend, since they
; go into the sign word, which we are getting right by other means.
SUBS Rtmp,OP1mhi,OP1mlo ;Rtmp := a1-a0
MOV OP1mhi,#0 ;Sign if a1-a0,b0-b1 both +ve
MOV Rarith,#0 ;Addend if both +ve
MVNLO OP1mhi,OP1mhi ;If a1-a0 -ve, adjust sign and
SUBLO Rarith,OP2mhi,OP2mlo ; addend = -(b0-b1) = b1-b0
SUBNES Rtmp2,OP2mlo,OP2mhi ;Rtmp2 := b0-b1
MOVEQ OP1mhi,#0 ;Override sign if b0-b1 = 0
MVNLO OP1mhi,OP1mhi ;If b0-b1 -ve, adjust sign and
SUBLO Rarith,Rarith,Rtmp ; addend += -(a1-a0)
; Finish calculating the real value of (a1-a0)*(b0-b1) into
; (OP1mhi,OP1mlo,Rarith). I.e. multiply Rtmp by Rtmp2, adding OP1mlo into the
; high word and putting the result in (OP1mlo,Rarith). OP1mhi is already OK.
Split16 OP2mhi,OP2mlo,Rtmp
Mul64 OP1mlo,Rarith,OP2mhi,OP2mlo,Rtmp2,Rarith,,Rtmp,Rtmp2,OP1mlo
; Now add a1*b1*(2^64+2^32) + a0*b0*(2^32+1) and (a1-a0)*(b0-b1)*2^32
; together, putting the result in (OP1mhi,OP1mlo,Rarith,R14). Note the low
; word is in R14 already.
ADDS Rarith,Rins,Rarith
ADCS OP1mlo,Rfpsr,OP1mlo
ADCS OP1mhi,OP1sue,OP1mhi
; Transfer R14 into the sticky bit, without affecting flags. Also make
; certain we don't affect the guard or round bits.
ORR R14,R14,R14,LSL #2
ORR Rarith,Rarith,R14,LSR #2
; If result is normalised, return. Otherwise normalise by shifting left one
; bit.
IF Interworking :LOR: Thumbing
LDMMIFD Rsp!,{OP1sue,Rfpsr,Rins,LR}
BXMI LR
ELSE
LDMMIFD Rsp!,{OP1sue,Rfpsr,Rins,PC}
ENDIF
ADDS Rarith,Rarith,Rarith
ADCS OP1mlo,OP1mlo,OP1mlo
ADC OP1mhi,OP1mhi,OP1mhi
SUB RNDexp,RNDexp,#1
IF Interworking :LOR: Thumbing
LDMFD Rsp!,{OP1sue,Rfpsr,Rins,LR}
BX LR
ELSE
LDMFD Rsp!,{OP1sue,Rfpsr,Rins,PC}
ENDIF
Mult_64x32
; To perform this multiplication, we do two 32x32 multiplications, then add
; the results together. We use the standard macros for the purpose.
Split16 OP2mlo,Rarith,OP2mhi
Mul64 OP2mhi,OP1mhi,OP2mlo,Rarith,OP1mhi,,,Rtmp,Rtmp2,OP2mhi
Mul64 OP2mlo,Rarith,OP2mlo,Rarith,OP1mlo,,,Rtmp,Rtmp2,OP1mlo
ADDS OP1mlo,OP2mlo,OP1mhi
ADCS OP1mhi,OP2mhi,#0
; If the top bit was clear, we need to shift the product, round and sticky
; bits left by one bit and decrement the exponent. Otherwise, everything is
; ready for the return.
IF Interworking :LOR: Thumbing
BXMI LR
ELSE
MOVMI PC,LR
ENDIF
ADDS Rarith,Rarith,Rarith
ADCS OP1mlo,OP1mlo,OP1mlo
ADC OP1mhi,OP1mhi,OP1mhi
SUB RNDexp,RNDexp,#1
IF Interworking :LOR: Thumbing
BX LR
ELSE
MOV PC,LR
ENDIF
Mult_32xX
TEQ OP2mlo,#0
BEQ Mult_32x32
Mult_32x64
; To perform this multiplication, we do two 32x32 multiplications, then add
; the results together. We use the standard macros for the purpose.
Split16 OP1mlo,Rarith,OP1mhi
Mul64 OP1mhi,OP2mhi,OP1mlo,Rarith,OP2mhi,,,Rtmp,Rtmp2,OP1mhi
Mul64 OP1mlo,Rarith,OP1mlo,Rarith,OP2mlo,,,Rtmp,Rtmp2,OP2mlo
ADDS OP1mlo,OP1mlo,OP2mhi
ADCS OP1mhi,OP1mhi,#0
; If the top bit was clear, we need to shift the product, round and sticky
; bits left by one bit and decrement the exponent. Otherwise, everything is
; ready for the return.
IF Interworking :LOR: Thumbing
BXMI LR
ELSE
MOVMI PC,LR
ENDIF
ADDS Rarith,Rarith,Rarith
ADCS OP1mlo,OP1mlo,OP1mlo
ADC OP1mhi,OP1mhi,OP1mhi
SUB RNDexp,RNDexp,#1
IF Interworking :LOR: Thumbing
BX LR
ELSE
MOV PC,LR
ENDIF
[ FPLibWanted
KEEP |$F__fp_mult_fast_common|
|$F__fp_mult_fast_common|
__fp_mult_fast_common
; This code duplicated from about for the fast case.
AND Rtmp,OP1sue,#ToExp_mask
AND Rtmp2,OP2sue,#ToExp_mask
EOR OP1sue,OP1sue,OP2sue ;Produce result sign
AND OP1sue,OP1sue,#Sign_bit
ADD RNDexp,Rtmp,Rtmp2
SUB RNDexp,RNDexp,#(EIExp_bias-1):AND:&FF00
SUB RNDexp,RNDexp,#(EIExp_bias-1):AND:&FF
ASSERT (EIExp_bias-1) < &10000 ;Result exponent if mantissa
; overflow is exp1+exp2-bias+1
]
Mult_32x32
; Only the high words of the operand mantissas need to be multiplied
; together. Use the standard macros for this purpose.
Split16 OP2mlo,Rarith,OP2mhi
Mul64 OP1mhi,OP1mlo,OP2mlo,Rarith,OP1mhi,,S,Rtmp,Rtmp2,OP1mhi
; The round and sticky bits are always going to be zero.
MOV Rarith,#0
; If the top bit was clear, we need to shift the product left one bit and
; decrement the exponent. Otherwise we're done.
IF Interworking :LOR: Thumbing
BXMI LR
ELSE
MOVMI PC,LR
ENDIF
ADDS OP1mlo,OP1mlo,OP1mlo
ADC OP1mhi,OP1mhi,OP1mhi
SUB RNDexp,RNDexp,#1
IF Interworking :LOR: Thumbing
BX LR
ELSE
MOV PC,LR
ENDIF
] ; Conditional compilation of Mult
;===========================================================================
[ :DEF: div_s :LOR: FPEWanted :LOR: FPASCWanted
; Routine to divide, reverse-divide, fast-divide or fast-reverse-divide two
; internal format floating point numbers. It has two entry points: "DivFPE",
; which has an optimised fast track for both operands being common, and
; "DivFPASC", which avoids the test for this optimised fast track - since it
; should rarely happen. The second entry point lies a long way down in the
; source to avoid addressing constraints.
; The value returned is either a numeric value plus associated rounding
; information, with the uncommon bit clear, or an infinity or NaN, with the
; uncommon bit set.
; This routine will not work correctly with inputs which are unnormalised
; URD results, or with invalid internal format numbers.
;
; Uses standard dyadic operation entry and exit conventions - see top of
; this file.
ASSERT RNDexp = OP2sue ;We swap over from the use of OP2sue to that
; of RNDexp partway through this routine.
[ FPEWanted
DivFPE
CDebug3 3,"DivFPE: op1 =",OP1sue,OP1mhi,OP1mlo
CDebug3 3," op2 =",OP2sue,OP2mhi,OP2mlo
; Start by detecting the "fast track" case of both operands being common.
TST OP1sue,#Uncommon_bit
TSTEQ OP2sue,#Uncommon_bit
BNE Div_Uncommon
; If either operand is a zero, we need to take special action. Because the
; numbers are common and assumed not to be unnormalised URD results, we can
; check for zeros by means of the units bits.
ANDS Rtmp,OP1mhi,OP2mhi
ASSERT EIUnits_pos = 31
BPL Div_Zero
; Both operands may now be assumed to be normalised numbers. We now know
; that we are not going to need to know the operands for trap purposes, so
; we can swap them if this is a normal division rather than a reverse
; division.
TST Rins,#RevDiv_bit
BNE Div_Common_Swapped
]
[ FPLibWanted
__fp_div_common
]
MOV Rtmp,OP1sue
MOV OP1sue,OP2sue
MOV OP2sue,Rtmp
MOV Rtmp,OP1mhi
MOV OP1mhi,OP2mhi
MOV OP2mhi,Rtmp
MOV Rtmp,OP1mlo
MOV OP1mlo,OP2mlo
MOV OP2mlo,Rtmp
[ FPLibWanted
KEEP |$F__fp_rdv_common|
|$F__fp_rdv_common|
__fp_rdv_common
]
Div_Common_Swapped
; Produce the result sign and the prospective result exponent.
AND Rtmp,OP1sue,#ToExp_mask
AND Rtmp2,OP2sue,#ToExp_mask
EOR OP1sue,OP1sue,OP2sue ;Produce result sign
AND OP1sue,OP1sue,#Sign_bit
SUB RNDexp,Rtmp2,Rtmp
ADD RNDexp,RNDexp,#EIExp_bias:AND:&FF00
ADD RNDexp,RNDexp,#EIExp_bias:AND:&FF
ASSERT EIExp_bias < &10000 ;Result exponent if no mantissa
; underflow is exp1-exp2+bias
; This subsidiary entry point deals with dividing a normalised mantissa by
; another and adjusting the exponent if necessary.
; Entry: OP1sue = the result's sign, with an uncommon bit of 0 - the
; remaining bits are zero;
; OP1mhi = Divisor mantissa, high word;
; OP1mlo = Divisor mantissa, low word;
; RNDexp = Prospective result exponent, which may be negative; this
; needs to be decremented if mantissa underflow occurs;
; OP2mhi = Dividend mantissa, high word;
; OP2mlo = Dividend mantissa, low word;
; Rins = instruction (needed to determine precision; may be needed
; to discriminate between normal and fast divisions);
; Rwp, Rfp, Rsp hold their usual values;
; R14 = return link.
; Exit: OP1sue = the result's sign, with an uncommon bit of 0; the
; remaining bits are zero;
; OP1mhi, OP1mlo = the result's mantissa;
; RNDexp = the result exponent, which may be negative;
; Rarith holds the round bit (in bit 31) and the sticky bit (in bits
; 30:0) if the destination precision is extended; if the
; destination precision is single or double, it holds part of the
; sticky bit (the remainder of which is held in bits below the
; round bit in OP1mhi and OP1mlo);
; OP2mhi, OP2mlo, Rtmp, Rtmp2 and R14 may be corrupt;
; All other registers preserved.
Div_Mantissas
STMFD Rsp!,{OP1sue,OP2sue,Rfpsr,Rins,LR}
CDebug2 4,"Div_Mantissas: dividend =",OP2mhi,OP2mlo
CDebug2 4," divisor =",OP1mhi,OP1mlo
CDebug1 4," exponent =",RNDexp
; We will do the mantissa division by an algorithm which is a hybrid between
; Newton-Raphson approximation and ordinary long division: this results in
; division being done to IEEE accuracy, yet more than 50% faster than the
; straightforward long division technique. A summary of the algorithm is:
;
; (a) Use table look-up to get an initial approximation to the reciprocal
; of the divisor;
;
; (b) Use two iterations of Newton-Raphson to improve the reciprocal
; approximation to one with about 15 bits accuracy;
;
; (c) Do long division base 2^13, using the reciprocal approximation to
; determine the result "digits" - which are in fact fixed point
; numbers with 13 bits before the binary point and 3 after it;
;
; (d) Resolve the exact values of the last three bits by ordinary long
; division;
;
; (e) Adjust the exponent and shift the mantissa if mantissa underflow
; occurs, and create the sticky bit.
;
; Exact details of the algorithm appear in comments next to the relevant
; parts of the code below.
;
; The long division is performed for 2 steps for single precision, 4 steps
; for double precision and 5 steps for extended precision, producing 2*13+3
; = 29, 4*13+3 = 55 and 5*13+3 = 68 bits respectively, plus a sticky bit in
; each case.
;
; Note that this algorithm has been specifically tailored to the software
; environment - e.g. the availability of 32x32->32 bit multiplication and
; the fact that negative partial remainders during the long division will
; cause problems. This leads to some apparently strange bits of code below -
; e.g. getting less accuracy from a Newton-Raphson iteration than might
; appear to be available, in order to preserve knowledge of the sign of the
; error.
;
; In what follows, we will refer to the true mathematical value of the
; dividend mantissa as P, that of the divisor as D, that of the reciprocal
; of the divisor as R and that of the quotient as Q. So Q = P/D = P*R are
; exact mathematical relationships. Also, we have P = (2^32*OP1mhi +
; OP1mlo)*2^(-63), D = (2^32*OP2mhi + OP2mlo)*2^(-63).
; First step: initialise by breaking the divisor up into 16-bit chunks,
; held in (OP1sue,Rfpsr,Rins,R14).
Split16 OP1sue,Rfpsr,OP1mhi
Split16 Rins,R14,OP1mlo
; Second step: use table look-up to get an approximation to R. Specifically,
; we load Rarith with an 8-bit value such that we know:
;
; R <= Rarith*2^(-7) < R + 2^(-6)
[ CoreDebugging = 0
ADR Rarith,Recip_Table-128 ;-128 to cancel units bit
|
ADRL Rarith,Recip_Table-128 ;-128 to cancel units bit
]
LDRB Rarith,[Rarith,OP1sue,LSR #8]
CDebug1 5,"Table look-up approx'n is",Rarith
; Third step: use a Newton-Raphson iteration to improve this to an 11-bit
; value in Rarith such that:
;
; R < Rarith*2^(-10) < R + 2^(-9)
;
; Details: Let W be the current value of Rarith, so we have:
;
; R <= W*2^(-7) < R + 2^(-6)
;
; Let X be the first 16 bits of D (i.e. OP1sue), incremented by 1. This has
; the property that:
;
; D < X*2^(-15) <= D + 2^(-15)
;
; Suppose further that W*2^(-7) = R+e, with 0 <= e < 2^(-6), and X*2^(-15) =
; D+f, with 0 < f <= 2^(-15).
;
; Now let Y = W * (2^23 - X*W), which is a calculation that can be performed
; without overflowing a word. This is equivalent to:
;
; Y*2^(-29) = W*2^(-7) * (2 - X*2^(-15) * W*2^(-7))
;
; = (R+e) * (2 - (D+f)*(R+e))
;
; = (R+e) * (2 - (1 + D*e + R*f + e*f)), since D*R=1 exactly,
;
; = (R+e) * (1 - D*e - R*f - e*f)
;
; = R + e - e - D*e*e - R*R*f - R*e*f - R*e*f - e*e*f, since D*R=1,
;
; = R - D*e*e - R*R*f - 2*R*e*f - e*e*f
;
; Since R > 0, D > 0, e >= 0 and f > 0, this is clearly less than R. On the
; other hand, we know that R <= 1, D < 2, e < 2^(-6) and f <= 2^(-15). So:
;
; R > Y*2^(-29)
; > R - 2^(-11) - 2^(-15) - 2^(-20) - 2^(-27)
;
; Now let Z be Y shifted right 19 bits. This gives us:
;
; Y*2^(-29) - 2^(-10) < Z*2^(-10) <= Y*2^(-29)
;
; Combining the inequalities, we get:
;
; R - 2^(-9) < R - 2^(-11) - 2^(-15) - 2^(-20) - 2^(-27) - 2^(-10)
; < Y*2^(-29) - 2^(-10)
; < Z*2^(-10)
; <= Y*2^(-29)
; < R
;
; So if we put Rarith = Z+2, we get:
;
; R < Rarith*2^(-10) < R + 2^(-9),
;
; as desired.
MLA Rtmp,OP1sue,Rarith,Rarith ;Rtmp := (X-1)*W + W = X*W
RSB Rtmp,Rtmp,#1:SHL:23 ;Rtmp := 2^23 - X*W
MUL Rarith,Rtmp,Rarith ;Rarith := W*(2^23 - X*W) = Y
MOV Rarith,Rarith,LSR #19 ;Shift right 19 bits and add
ADD Rarith,Rarith,#2 ; 2 to get new approximation
CDebug1 5,"First N-R approx'n is",Rarith
; Fourth step: use a Newton-Raphson iteration to improve this to a 16-bit
; value in Rarith such that:
;
; R - 2^(-15) < Rarith*2^(-16) < R
;
; Details: Let W be the current value of Rarith, so we have:
;
; R < W*2^(-10) < R + 2^(-9)
;
; Let X be the first 19 bits of D (i.e. the top 19 bits of OP1mhi),
; incremented by 1. This has the property that:
;
; D < X*2^(-18) <= D + 2^(-18)
;
; Suppose further that W*2^(-10) = R+e, with 0 < e < 2^(-9), and X*2^(-18)
; = D+f, with 0 < f <= 2^(-18).
;
; Now let Y = W * (2^29 - X*W): part of this calculation will require 2-word
; arithmetic. This is equivalent to:
;
; Y*2^(-38) = W*2^(-10) * (2 - X*2^(-18) * W*2^(-10))
;
; = (R+e) * (2 - (D+f)*(R+e))
;
; = R - D*e*e - R*R*f - 2*R*e*f - e*e*f, as in the third step.
;
; Since R > 0, D > 0, e >= 0 and f > 0, this is clearly less than R. On the
; other hand, we know that R <= 1, D < 2, e < 2^(-9) and f <= 2^(-18). So:
;
; R > Y*2^(-38)
; > R - 2^(-17) - 2^(-18) - 2^(-26) - 2^(-36)
;
; Now let Z be Y shifted right 22 bits. This gives us:
;
; Y*2^(-38) - 2^(-16) < Z*2^(-16) <= Y*2^(-38)
;
; Combining the inequalities, we get:
;
; R - 2^(-15) < R - 2^(-17) - 2^(-18) - 2^(-26) - 2^(-36) - 2^(-16)
; < Y*2^(-38) - 2^(-16)
; < Z*2^(-16)
; <= Y*2^(-38)
; < R
;
; So if we put Rarith = Z, we get the desired inequality.
MOV Rtmp,OP1mhi,LSR #13 ;Rtmp := X-1
MLA Rtmp2,Rtmp,Rarith,Rarith ;Rtmp2 := (X-1)*W + W = X*W
RSB Rtmp2,Rtmp2,#1:SHL:29 ;Rtmp2 := 2^29 - X*W
Split16 Rtmp,Rtmp2,Rtmp2 ;Rtmp/Rtmp2 := top/bottom half
MUL OP1mlo,Rtmp2,Rarith ;OP1mhi, OP1mlo := two
MUL OP1mhi,Rtmp,Rarith ; parts of product with W
ADD Rarith,OP1mhi,OP1mlo,LSR #16 ;Rarith := Y >> 16
MOV Rarith,Rarith,LSR #6 ;Rarith := Y >> 22
CDebug1 5,"Second N-R approx'n is",Rarith
; Fifth step: initialise the partial remainder - its binary point lies to
; the right of bit 30 of its top word to line up well with the results of
; later multiplications.
MOVS OP2mhi,OP2mhi,LSR #1
MOVS OP2mlo,OP2mlo,RRX
MOVCC OP2sue,#0
MOVCS OP2sue,#TopBit
; Sixth step: do the first iteration of the long division process. The
; register allocation during this is:
;
; OP1sue, Rfpsr, Rins, R14: Divisor, in 16-bit chunks; its binary point is
; considered to lie to the right of bit 15 of
; OP1sue;
; OP1mhi, OP1mlo: Quotient so far (Rarith joins into this near the
; end of the calculation); its binary point is
; considered to lie to the right of bit 31 of
; OP1mhi;
; OP2mhi, OP2mlo, OP2sue: Partial remainder; its binary point is
; considered to lie to the right of bit 30 of
; OP2mhi;
; Rarith: 16-bit reciprocal approximation, until near the
; end of the calculation; its binary point lies to
; the *left* of bit 15;
; Rtmp, Rtmp2: Temporaries.
;
; Some of these registers (OP1mhi and OP1mlo) only become set some way into
; the calculation: until they do become set, they should be regarded as
; being 0.
;
; The details of iteration N (for N=0 to 4) of the long division process
; are:
;
; Let D be the divisor represented by (OP1sue,Rfpsr,Rins,R14), and let R =
; 1/D be its reciprocal. Let A be the reciprocal approximation represented
; by Rarith from now until near the end of the calculation - i.e. A =
; Rarith*2^(-16). We know that:
;
; 1 <= D < 2;
; 0.5 < R <= 1;
; R-2^(-15) < A < R
;
; Let Q[N] be the quotient represented by those of OP1mhi, OP1mlo and Rarith
; that have become set at the end of iteration N-1/start of iteration N -
; i.e.:
;
; Q[0] = 0;
; Q[1],Q[2] = (OP1mhi at appropriate time) * 2^(-31);
; Q[3],Q[4] = (OP1mhi at appropriate time) * 2^(-31)
; + (OP1mlo at appropriate time) * 2^(-63);
; Q[5] = (OP1mhi at appropriate time) * 2^(-31)
; + (OP1mlo at appropriate time) * 2^(-63)
; + (Rarith at appropriate time) * 2^(-95);
;
; Let P[N] be the partial remainder represented by those of OP2mhi, OP2mlo
; and OP2sue that have become set at the end of iteration N-1/start of
; iteration N - i.e.:
;
; P[i] = (OP2mhi at appropriate time) * 2^(-30)
; + (OP2mlo at appropriate time) * 2^(-62)
; + (OP2sue at appropriate time) * 2^(-94);
;
; Finally, let P be the original dividend - i.e. P is the current value of
; OP2mhi*2^(-31) + OP2mlo*2^(-63).
;
; For i=0, we can clearly make the following three statements:
;
; (a) Q[i] is a multiple of 2^(-13*i-2);
;
; (b) P[i] is a multiple of 2^(-65);
;
; (c) P = Q[i]*D + P[i]*2^(-13*i);
;
; (d) 0 < P[i] < 2;
;
; since Q[0] = 0 and P[0] = P. The algorithm will result in the same
; statements being true for i = 1, 2, 3, 4 and 5 as well.
;
; Iteration i of the algorithm is:
;
; Papprox = P[i], rounded down to a multiple of 2^(-15);
; digit = Papprox * A, rounded down to a multiple of 2^(-15);
; P[i+1] = (P[i] - digit*D) * 2^13
; Q[i+1] = Q[i] + digit*2^(-13*i)
;
; Proof that the three statements above are true for all i: we will do this
; by induction. We already know that they are true for i=0. So suppose they
; are true for i=N. Then:
;
; (a) Q[i+1] = Q[i] + digit*2^(-13*i)
; = (multiple of 2^(-13*i-2)) + (multiple of 2^(-15))*2^(-13*i)
; = multiple of 2^(-13*i-15)
; = multiple of 2^(-13*(i+1)-2).
;
; (b) P[i+1] = (P[i] - digit*D) * 2^13
; = 2^13 * (multiple of 2^(-65)
; - (multiple of 2^(-15)) * (multiple of 2^(-63)))
; = multiple of 2^(-65).
;
; (c) P = Q[i]*D + P[i]*2^(-13*i)
; = (Q[i+1] - digit*2^(-13*i)) * D
; + (P[i+1]*2^(-13) + digit*D) * 2^(-13*i)
; = Q[i+1]*D + P[i+1]*2^(-13*i-13)
; = Q[i+1]*D + P[i+1]*2^(-13*(i+1)).
;
; (d) First, since Papprox = P[i] rounded down to a multiple of 2^(-15) and
; R-2^(-15) < A < R, we have Papprox = P[i]-e and A = R-f, where 0 <= e
; < 2^(-15) and 0 < f < 2^(-15). Then, since digit = Papprox * A rounded
; down to a multiple of 2^(-15), we have digit = Papprox * A - g, where
; 0 <= g < 2^(-15). Putting these together, we have:
;
; digit = (P[i]-e)*(R-f) - g
; = P[i]*R - P[i]*f - e*R + e*f - g
;
; Since everything is non-negative, 'digit' is clearly at most P[i]*R.
; Conversely, since P[i] < 2, R <= 1, e < 2^(-15), f < 2^(-15) and g <
; 2^(-15), we have:
;
; P[i]*R > digit
; > P[i]*R - 2*2^(-15) - 2^(-15)*1 - 2^(-15)
; = P[i]*R - 2^(-13)
;
; Or:
;
; 0 < P[i]*R - digit < 2^(-13)
;
; Multiplying by D, which is known to satisfy 1 <= D < 2:
;
; 0 < P[i] - digit*D < 2^(-12)
;
; Multiplying by 2^(13):
;
; 0 < P[i+1] < 2
;
; Notes:
;
; (1) The subtraction to create P[i] is done by subtracting the four 16x16
; products formed from the digit and the 16-bit chunks of the divisor
; from the partial remainder. Two of these 32-bit products are aligned
; with the partial remainder and thus don't cause any problems. The
; other two are both mis-aligned by 16 bits. One way to subtract them
; would be to do a double word shift on them and subtract the results
; from the partial remainder: this takes 2 instructions to form the
; central shifted word and 3 for the subtraction (two of which are
; "shift and subtracts"). However, this makes use of one register more
; than we have. So the code below makes use of a trick, based on the
; fact that if we subtract the top 16 bits and the bottom 16 bits of the
; central shifted word separately, only one of the subtractions can
; cause a borrow. So if we've got a borrow after the first one, we do
; the second one without setting the condition codes, knowing that it
; won't cause a borrow; if we don't, we set the condition codes on the
; result of the second subtraction.
;
; (2) The multiplication operands are generally ordered to maximise the
; chance of early termination. This means that all but the top chunk of
; the divisor are good second operands to the multiplication, the digit
; is next best, and the top chunk of the divisor is the least good.
;
; (3) The above is in fact not exactly true, due to the fact that it saves
; some cycles not to shift P[1] and P[3] left by 13 bits, but to wait
; until P[2] and P[4] are generated, then shift them left 26 bits.
MOV Rtmp,OP2mhi,LSR #15 ;Rtmp := Papprox
MUL Rtmp2,Rarith,Rtmp ;Rtmp2 := Papprox * A
MOV Rtmp2,Rtmp2,LSR #16 ;Rtmp2 := digit
MUL Rtmp,Rtmp2,Rins ;Subtract digit*D from P[0] to
SUBS OP2mlo,OP2mlo,Rtmp ; form P[1]*2^(-13) - this requires
MUL Rtmp,OP1sue,Rtmp2 ; 4 multiplications and subtractions
SBC OP2mhi,OP2mhi,Rtmp ; at various alignments
MUL Rtmp,Rtmp2,R14
SUBS OP2sue,OP2sue,Rtmp,LSL #16
SBCS OP2mlo,OP2mlo,Rtmp,LSR #16
MUL Rtmp,Rtmp2,Rfpsr
SUBCC OP2mlo,OP2mlo,Rtmp,LSL #16 ;Already got a borrow
SUBCSS OP2mlo,OP2mlo,Rtmp,LSL #16 ;No borrow yet - try for one
SBC OP2mhi,OP2mhi,Rtmp,LSR #16
MOV OP1mhi,Rtmp2,LSL #16 ;OP1mhi := Q[1]
CDebug1 5,"1st iter'n: quotient so far =",OP1mhi
CDebug3 5," partial remainder =",OP2mhi,OP2mlo,OP2sue
; Seventh step: second iteration. At the end of this step, we check whether
; the multiplication is single precision and branch out to termination code
; if so.
MOV Rtmp,OP2mhi,LSR #2 ;Rtmp := Papprox
MUL Rtmp2,Rarith,Rtmp ;Rtmp2 := Papprox * A
MOV Rtmp2,Rtmp2,LSR #16 ;Rtmp2 := digit
MUL Rtmp,Rtmp2,Rins ;Subtract digit*D from P[1]*2^(-13)
SUBS OP2sue,OP2sue,Rtmp,LSL #19 ; to form P[2]*2^(-26) - this
SBCS OP2mlo,OP2mlo,Rtmp,LSR #13 ; requires 4 multiplications and
MUL Rtmp,OP1sue,Rtmp2 ; subtractions at various alignments
SUBCC OP2mlo,OP2mlo,Rtmp,LSL #19 ;Already got a borrow
SUBCSS OP2mlo,OP2mlo,Rtmp,LSL #19 ;No borrow yet - try for one
SBC OP2mhi,OP2mhi,Rtmp,LSR #13
MUL Rtmp,Rtmp2,R14
SUBS OP2sue,OP2sue,Rtmp,LSL #3
SBCS OP2mlo,OP2mlo,Rtmp,LSR #29
MUL Rtmp,Rtmp2,Rfpsr
SUBCC OP2mlo,OP2mlo,Rtmp,LSL #3 ;Already got a borrow
SUBCSS OP2mlo,OP2mlo,Rtmp,LSL #3 ;No borrow yet - try for one
SBC OP2mhi,OP2mhi,Rtmp,LSR #29
MOV OP2mhi,OP2mhi,LSL #26 ;Shift by 26 bits to form P[2]
ORR OP2mhi,OP2mhi,OP2mlo,LSR #6
MOV OP2mlo,OP2mlo,LSL #26
ORR OP2mlo,OP2mlo,OP2sue,LSR #6
MOV OP2sue,OP2sue,LSL #26
ADD OP1mhi,OP1mhi,Rtmp2,LSL #3 ;OP1mhi := Q[2]
CDebug1 5,"2nd iter'n: quotient so far =",OP1mhi
CDebug3 5," partial remainder =",OP2mhi,OP2mlo,OP2sue
LDR Rtmp,[Rsp,#12] ;Recover instruction
[ FPEWanted :LOR: FPASCWanted
TST Rtmp,#Pr1_mask ;Check for single precision
TSTEQ Rtmp,#Pr2_mask
BEQ Div_Single
|
TST Rtmp,#Single_mask ;Use a simpler encoding
BNE Div_Single
]
; Eighth step: third iteration.
MOV Rtmp,OP2mhi,LSR #15 ;Rtmp := Papprox
MUL Rtmp2,Rarith,Rtmp ;Rtmp2 := Papprox * A
MOV Rtmp2,Rtmp2,LSR #16 ;Rtmp2 := digit
MUL Rtmp,Rtmp2,Rins ;Subtract digit*D from P[2] to
SUBS OP2mlo,OP2mlo,Rtmp ; form P[3]*2^(-13) - this requires
MUL Rtmp,OP1sue,Rtmp2 ; 4 multiplications and subtractions
SBC OP2mhi,OP2mhi,Rtmp ; at various alignments
MUL Rtmp,Rtmp2,R14
SUBS OP2sue,OP2sue,Rtmp,LSL #16
SBCS OP2mlo,OP2mlo,Rtmp,LSR #16
MUL Rtmp,Rtmp2,Rfpsr
SUBCC OP2mlo,OP2mlo,Rtmp,LSL #16 ;Already got a borrow
SUBCSS OP2mlo,OP2mlo,Rtmp,LSL #16 ;No borrow yet - try for one
SBC OP2mhi,OP2mhi,Rtmp,LSR #16
MOV OP1mlo,Rtmp2,LSL #22 ;(OP1mhi,OP1mlo) := Q[3]
ADD OP1mhi,OP1mhi,Rtmp2,LSR #10
CDebug2 5,"3rd iter'n: quotient so far =",OP1mhi,OP1mlo
CDebug3 5," partial remainder =",OP2mhi,OP2mlo,OP2sue
; Ninth step: fourth iteration. At the end of this step, we check whether
; the multiplication is double precision and branch out to termination code
; if so.
MOV Rtmp,OP2mhi,LSR #2 ;Rtmp := Papprox
MUL Rtmp2,Rarith,Rtmp ;Rtmp2 := Papprox * A
MOV Rtmp2,Rtmp2,LSR #16 ;Rtmp2 := digit
MUL Rtmp,Rtmp2,Rins ;Subtract digit*D from P[3]*2^(-13)
SUBS OP2sue,OP2sue,Rtmp,LSL #19 ; to form P[4]*2^(-26) - this
SBCS OP2mlo,OP2mlo,Rtmp,LSR #13 ; requires 4 multiplications and
MUL Rtmp,OP1sue,Rtmp2 ; subtractions at various alignments
SUBCC OP2mlo,OP2mlo,Rtmp,LSL #19 ;Already got a borrow
SUBCSS OP2mlo,OP2mlo,Rtmp,LSL #19 ;No borrow yet - try for one
SBC OP2mhi,OP2mhi,Rtmp,LSR #13
MUL Rtmp,Rtmp2,R14
SUBS OP2sue,OP2sue,Rtmp,LSL #3
SBCS OP2mlo,OP2mlo,Rtmp,LSR #29
MUL Rtmp,Rtmp2,Rfpsr
SUBCC OP2mlo,OP2mlo,Rtmp,LSL #3 ;Already got a borrow
SUBCSS OP2mlo,OP2mlo,Rtmp,LSL #3 ;No borrow yet - try for one
SBC OP2mhi,OP2mhi,Rtmp,LSR #29
MOV OP2mhi,OP2mhi,LSL #26 ;Shift by 26 bits to form P[4]
ORR OP2mhi,OP2mhi,OP2mlo,LSR #6
MOV OP2mlo,OP2mlo,LSL #26
ORR OP2mlo,OP2mlo,OP2sue,LSR #6
MOV OP2sue,OP2sue,LSL #26
ADDS OP1mlo,OP1mlo,Rtmp2,LSL #9 ;(OP1mhi,OP1mlo) := Q[4]
ADC OP1mhi,OP1mhi,#0
CDebug2 5,"4th iter'n: quotient so far =",OP1mhi,OP1mlo
CDebug3 5," partial remainder =",OP2mhi,OP2mlo,OP2sue
LDR Rtmp,[Rsp,#12] ;Recover instruction
[ FPEWanted :LOR: FPASCWanted
TST Rtmp,#Pr1_mask ;Check for double precision
BEQ Div_Double
|
TST Rtmp,#Double_mask
BNE Div_Double
]
; Tenth step: fifth iteration. We can enter the extended precision
; termination code at the end of this iteration, since we know it must be an
; extended precision division.
MOV Rtmp,OP2mhi,LSR #15 ;Rtmp := Papprox
MUL Rtmp2,Rarith,Rtmp ;Rtmp2 := Papprox * A
MOV Rtmp2,Rtmp2,LSR #16 ;Rtmp2 := digit
MUL Rtmp,Rtmp2,Rins ;Subtract digit*D from P[4] to
SUBS OP2mlo,OP2mlo,Rtmp ; form P[5]*2^(-13) - this requires
MUL Rtmp,OP1sue,Rtmp2 ; 4 multiplications and subtractions
SBC OP2mhi,OP2mhi,Rtmp ; at various alignments
MUL Rtmp,Rtmp2,R14
SUBS OP2sue,OP2sue,Rtmp,LSL #16
SBCS OP2mlo,OP2mlo,Rtmp,LSR #16
MUL Rtmp,Rtmp2,Rfpsr
SUBCC OP2mlo,OP2mlo,Rtmp,LSL #16 ;Already got a borrow
SUBCSS OP2mlo,OP2mlo,Rtmp,LSL #16 ;No borrow yet - try for one
SBC OP2mhi,OP2mhi,Rtmp,LSR #16
MOV OP2mhi,OP2mhi,LSL #14 ;Shift by 14 bits to form 2*P[5]
ORR OP2mhi,OP2mhi,OP2mlo,LSR #18
MOV OP2mlo,OP2mlo,LSL #14
ORR OP2mlo,OP2mlo,OP2sue,LSR #18
MOV OP2sue,OP2sue,LSL #14
MOV Rarith,Rtmp2,LSL #28 ;(OP1mhi,OP1mlo,Rarith) := Q[5]
ADDS OP1mlo,OP1mlo,Rtmp2,LSR #4
ADC OP1mhi,OP1mhi,#0
CDebug3 5,"5th iter'n: quotient so far =",OP1mhi,OP1mlo,Rarith
CDebug3 5," partial remainder =",OP2mhi,OP2mlo,OP2sue
Div_Extended
; We've completed the main work for an extended precision division. We've
; now got the divisor D in (OP1sue,Rfpsr,Rins,R14), the quotient Q[5] in
; (OP1mhi,OP1mlo,Rarith) and twice the partial remainder P[5] in
; (OP2mhi,OP2mlo,OP2sue) such that:
;
; (a) Q[5] is a multiple of 2^(-67);
;
; (b) P[5] is a multiple of 2^(-65);
;
; (c) P = Q[5]*D + P[5]*2^(-65);
;
; (d) 0 < P[5] < 2;
;
; The main problem with this is that P[5]*2^(-65) may be almost 2^(-64),
; while Q[5] is a multiple of 2^(-67). To know the correct IEEE answer, we
; have to make the partial remainder be less than the "quantum" in the
; quotient - i.e. less than 2^(-67) in this case. Without doing this, we
; can't calculate the sticky bit accurately: we know that a non-zero partial
; remainder at this point represents a string of quotient bits which are not
; all zero, but if they overlap the quotient bits we've already calculated,
; we don't know whether adding the bits together in the area of overlap
; would result in a string of all zero bits and thus a sticky bit of 0.
;
; We deal with this by doing three bits worth of ordinary long division. To
; save on multi-word additions and problems about carry flag use, we put the
; bits calculated into R14 and only add them into the quotient once at the
; end.
;
; Note that generating twice P[5] above with the binary point to the right
; of bit 30 of OP2mhi is equivalent to generating P[5] with the binary point
; to the right of bit 31 - i.e. to generating it in the position we want it
; to be for the code that follows. This is a trick we only use for extended
; precision, since for the other precisions, we need to be ready for another
; iteration of the algorithm above as well as for the termination code.
ORR OP1sue,Rfpsr,OP1sue,LSL #16 ;Reform divisor
ORR Rfpsr,R14,Rins,LSL #16
MOV R14,#0 ;Initialise extra bits
SUBS Rtmp2,OP2mlo,Rfpsr ;First extra bit: trial subtraction
SBCS Rtmp,OP2mhi,OP1sue ; of divisor from partial remainder
MOVCS OP2mlo,Rtmp2 ;If bit is 1, really do subtraction
MOVCS OP2mhi,Rtmp
ADC R14,R14,R14 ;Accumulate bit
MOV Rins,#0 ;Initialise overflow word
ADDS OP2sue,OP2sue,OP2sue ;Second extra bit: shift partial
ADCS OP2mlo,OP2mlo,OP2mlo ; remainder
ADCS OP2mhi,OP2mhi,OP2mhi
ADC Rins,Rins,Rins
SUBS Rtmp2,OP2mlo,Rfpsr ;Trial subtraction of divisor
SBCS Rtmp,OP2mhi,OP1sue ; from partial remainder
SBCS Rins,Rins,#0
MOVCS OP2mlo,Rtmp2 ;If bit is 1, really do subtraction
MOVCS OP2mhi,Rtmp
ADC R14,R14,R14 ;Accumulate bit
MOV Rins,#0 ;Initialise overflow word
ADDS OP2sue,OP2sue,OP2sue ;Third extra bit: shift partial
ADCS OP2mlo,OP2mlo,OP2mlo ; remainder
ADCS OP2mhi,OP2mhi,OP2mhi
ADC Rins,Rins,Rins
SUBS Rtmp2,OP2mlo,Rfpsr ;Trial subtraction of divisor
SBCS Rtmp,OP2mhi,OP1sue ; from partial remainder
SBCS Rins,Rins,#0
MOVCS OP2mlo,Rtmp2 ;If bit is 1, really do subtraction
MOVCS OP2mhi,Rtmp
ADC R14,R14,R14 ;Accumulate bit
CDebug1 5,"Extra bits to add in are",R14
; (OP1mhi,OP1mlo,Rarith) now contains 68 bits of quotient, R14 three extra
; bits that need to be added into its low end and (OP2mhi,OP2mlo) the final
; partial remainder. (We've shifted all the extra bits out of OP2sue, and the
; overflow word Rins must be zero at this point.)
; This is enough bits to provide guard and round bits, plus 2 bits
; contributing to the sticky bit and enough information to complete
; generating it. We will finish generating it by setting bit 0 of Rarith if
; the partial remainder is non-zero.
ORRS Rtmp,OP2mhi,OP2mlo
ORRNE Rarith,Rarith,#1
; Now add the three extra bits into the quotient and test for mantissa
; underflow.
ADDS Rarith,Rarith,R14,LSL #28 ;Add extra bits into quotient
ADCS OP1mlo,OP1mlo,#0
ADCS OP1mhi,OP1mhi,#0
; If no mantissa underflow, we're ready to return. Otherwise, we must
; recover the spilled registers (to get hold of the result exponent), shift
; the mantissa left one bit, decrement the exponent and return.
IF Interworking :LOR: Thumbing
LDMMIFD Rsp!,{OP1sue,OP2sue,Rfpsr,Rins,LR}
BXMI LR
ELSE
LDMMIFD Rsp!,{OP1sue,OP2sue,Rfpsr,Rins,PC}
ENDIF
LDMFD Rsp!,{OP1sue,OP2sue,Rfpsr,Rins,LR}
ADDS Rarith,Rarith,Rarith
ADCS OP1mlo,OP1mlo,OP1mlo
ADC OP1mhi,OP1mhi,OP1mhi
SUB OP2sue,OP2sue,#1
IF Interworking :LOR: Thumbing
BX LR
ELSE
MOV PC,LR
ENDIF
Div_Double
; We've completed the main work for a double precision division. We've now
; got the divisor D in (OP1sue,Rfpsr,Rins,R14), the quotient Q[4] in
; (OP1mhi,OP1mlo,Rarith) and the partial remainder P[4] in
; (OP2mhi,OP2mlo,OP2sue) such that:
;
; (a) Q[4] is a multiple of 2^(-54);
;
; (b) P[4] is a multiple of 2^(-65);
;
; (c) P = Q[4]*D + P[4]*2^(-52);
;
; (d) 0 < P[4] < 2;
;
; The main problem with this is that P[4]*2^(-52) may be almost 2^(-51),
; while Q[4] is a multiple of 2^(-54). To know the correct IEEE answer, we
; have to make the partial remainder be less than the "quantum" in the
; quotient - i.e. less than 2^(-54) in this case. Without doing this, we
; can't calculate the sticky bit accurately: we know that a non-zero partial
; remainder at this point represents a string of quotient bits which are not
; all zero, but if they overlap the quotient bits we've already calculated,
; we don't know whether adding the bits together in the area of overlap
; would result in a string of all zero bits and thus a sticky bit of 0.
;
; We deal with this by doing three bits worth of ordinary long division. To
; save on multi-word additions and problems about carry flag use, we put the
; bits calculated into R14 and only add them into the quotient once at the
; end.
ORR OP1sue,Rfpsr,OP1sue,LSL #16 ;Reform divisor
ORR Rfpsr,R14,Rins,LSL #16
MOV R14,#0 ;Initialise extra bits
ADDS OP2sue,OP2sue,OP2sue ;First extra bit: shift partial
ADCS OP2mlo,OP2mlo,OP2mlo ; remainder
ADC OP2mhi,OP2mhi,OP2mhi
SUBS Rtmp2,OP2mlo,Rfpsr ;Trial subtraction of divisor from
SBCS Rtmp,OP2mhi,OP1sue ; partial remainder
MOVCS OP2mlo,Rtmp2 ;If bit is 1, really do subtraction
MOVCS OP2mhi,Rtmp
ADC R14,R14,R14 ;Accumulate bit
MOV Rins,#0 ;Initialise overflow word
ADDS OP2sue,OP2sue,OP2sue ;Second extra bit: shift partial
ADCS OP2mlo,OP2mlo,OP2mlo ; remainder
ADCS OP2mhi,OP2mhi,OP2mhi
ADC Rins,Rins,Rins
SUBS Rtmp2,OP2mlo,Rfpsr ;Trial subtraction of divisor
SBCS Rtmp,OP2mhi,OP1sue ; from partial remainder
SBCS Rins,Rins,#0
MOVCS OP2mlo,Rtmp2 ;If bit is 1, really do subtraction
MOVCS OP2mhi,Rtmp
ADC R14,R14,R14 ;Accumulate bit
MOV Rins,#0 ;Initialise overflow word
ADDS OP2sue,OP2sue,OP2sue ;Third extra bit: shift partial
ADCS OP2mlo,OP2mlo,OP2mlo ; remainder
ADCS OP2mhi,OP2mhi,OP2mhi
ADC Rins,Rins,Rins
SUBS Rtmp2,OP2mlo,Rfpsr ;Trial subtraction of divisor
SBCS Rtmp,OP2mhi,OP1sue ; from partial remainder
SBCS Rins,Rins,#0
MOVCS OP2mlo,Rtmp2 ;If bit is 1, really do subtraction
MOVCS OP2mhi,Rtmp
ADC R14,R14,R14 ;Accumulate bit
CDebug1 5,"Extra bits to add in are",R14
; (OP1mhi,OP1mlo) now contains 55 bits of quotient, R14 three extra bits
; that need to be added into its low end and (OP2mhi,OP2mlo) the final
; partial remainder. (We've shifted all the extra bits out of OP2sue, and
; the overflow word Rins must be zero at this point.)
; This is enough bits to provide guard and round bits, plus enough
; information to generate the sticky bit. We do this by setting Rarith to
; zero if the partial remainder is zero, non-zero if the partial remainder
; is non-zero. Note that since we know rounding will take place to double
; precision, we don't mind having the sticky bit overflow into the extended
; precision round bit.
ORR Rarith,OP2mhi,OP2mlo
; Now add the three extra bits into the quotient and test for mantissa
; underflow.
ADDS OP1mlo,OP1mlo,R14,LSL #9 ;Add extra bits into quotient
ADCS OP1mhi,OP1mhi,#0
; If no mantissa underflow, we're ready to return. Otherwise, we must
; recover the spilled registers (to get hold of the result exponent), shift
; the mantissa left one bit, decrement the exponent and return.
IF Interworking :LOR: Thumbing
LDMMIFD Rsp!,{OP1sue,OP2sue,Rfpsr,Rins,LR}
BXMI LR
ELSE
LDMMIFD Rsp!,{OP1sue,OP2sue,Rfpsr,Rins,PC}
ENDIF
LDMFD Rsp!,{OP1sue,OP2sue,Rfpsr,Rins,LR}
ADDS OP1mlo,OP1mlo,OP1mlo
ADC OP1mhi,OP1mhi,OP1mhi
SUB OP2sue,OP2sue,#1
IF Interworking :LOR: Thumbing
BX LR
ELSE
MOV PC,LR
ENDIF
Div_Single
; We've completed the main work for a single precision division. We've now
; got the divisor D in (OP1sue,Rfpsr,Rins,R14), the quotient Q[2] in
; (OP1mhi,OP1mlo,Rarith) and the partial remainder P[2] in
; (OP2mhi,OP2mlo,OP2sue) such that:
;
; (a) Q[2] is a multiple of 2^(-28);
;
; (b) P[2] is a multiple of 2^(-65);
;
; (c) P = Q[2]*D + P[2]*2^(-26);
;
; (d) 0 < P[2] < 2;
;
; The main problem with this is that P[2]*2^(-26) may be almost 2^(-25),
; while Q[2] is a multiple of 2^(-28). To know the correct IEEE answer, we
; have to make the partial remainder be less than the "quantum" in the
; quotient - i.e. less than 2^(-28) in this case. Without doing this, we
; can't calculate the sticky bit accurately: we know that a non-zero partial
; remainder at this point represents a string of quotient bits which are not
; all zero, but if they overlap the quotient bits we've already calculated,
; we don't know whether adding the bits together in the area of overlap
; would result in a string of all zero bits and thus a sticky bit of 0.
;
; We deal with this by doing three bits worth of ordinary long division.
ORR OP1sue,Rfpsr,OP1sue,LSL #16 ;Reform divisor
ORR Rfpsr,R14,Rins,LSL #16
ADDS OP2sue,OP2sue,OP2sue ;First extra bit: shift partial
ADCS OP2mlo,OP2mlo,OP2mlo ; remainder
ADC OP2mhi,OP2mhi,OP2mhi
SUBS Rtmp2,OP2mlo,Rfpsr ;Trial subtraction of divisor from
SBCS Rtmp,OP2mhi,OP1sue ; partial remainder
MOVCS OP2mlo,Rtmp2 ;If bit is 1, really do subtraction
MOVCS OP2mhi,Rtmp
ADDCS OP1mhi,OP1mhi,#1:SHL:5 ;Add bit to quotient
MOV Rins,#0 ;Initialise overflow word
ADDS OP2sue,OP2sue,OP2sue ;Second extra bit: shift partial
ADCS OP2mlo,OP2mlo,OP2mlo ; remainder
ADCS OP2mhi,OP2mhi,OP2mhi
ADC Rins,Rins,Rins
SUBS Rtmp2,OP2mlo,Rfpsr ;Trial subtraction of divisor
SBCS Rtmp,OP2mhi,OP1sue ; from partial remainder
SBCS Rins,Rins,#0
MOVCS OP2mlo,Rtmp2 ;If bit is 1, really do subtraction
MOVCS OP2mhi,Rtmp
ADDCS OP1mhi,OP1mhi,#1:SHL:4 ;Add bit to quotient
MOV Rins,#0 ;Initialise overflow word
ADDS OP2sue,OP2sue,OP2sue ;Third extra bit: shift partial
ADCS OP2mlo,OP2mlo,OP2mlo ; remainder
ADCS OP2mhi,OP2mhi,OP2mhi
ADC Rins,Rins,Rins
SUBS Rtmp2,OP2mlo,Rfpsr ;Trial subtraction of divisor
SBCS Rtmp,OP2mhi,OP1sue ; from partial remainder
SBCS Rins,Rins,#0
MOVCS OP2mlo,Rtmp2 ;If bit is 1, really do subtraction
MOVCS OP2mhi,Rtmp
ADDCS OP1mhi,OP1mhi,#1:SHL:3 ;Add bit to quotient
CDebug1 5,"Quotient after adding in extra bits is",R14
; (OP1mhi,OP1mlo,Rarith) now contains 29 bits of quotient and (OP2mhi,OP2mlo)
; the final partial remainder. (We've shifted all the extra bits out of
; OP2sue, and the overflow word Rins must be zero at this point.)
; This is enough bits to provide guard and round bits, plus 3 bits
; contributing to the sticky bit and enough information to complete
; generating it. We will finish generating it by setting Rarith to zero if
; the partial remainder zero, non-zero if the partial remainder is non-zero.
; We must also set the low word of the result mantissa to 0.
ORR Rarith,OP2mhi,OP2mlo
MOV OP1mlo,#0
; Now test for mantissa underflow. If no mantissa underflow, we're ready to
; return. Otherwise, we must recover the spilled registers (to get hold of
; the result exponent), shift the mantissa left one bit, decrement the
; exponent and return.
TEQ OP1mhi,#0
IF Interworking :LOR: Thumbing
LDMMIFD Rsp!,{OP1sue,OP2sue,Rfpsr,Rins,LR}
BXMI LR
ELSE
LDMMIFD Rsp!,{OP1sue,OP2sue,Rfpsr,Rins,PC}
ENDIF
LDMFD Rsp!,{OP1sue,OP2sue,Rfpsr,Rins,LR}
MOV OP1mhi,OP1mhi,LSL #1
SUB OP2sue,OP2sue,#1
IF Interworking :LOR: Thumbing
BX LR
ELSE
MOV PC,LR
ENDIF
;===========================================================================
; Reciprocal approximation table
; ------------------------------
;
; This table contains 128 entries, indexed by the first 7 fractional bits of
; a normalised divisor mantissa D. The value Rapprox obtained has the
; property that:
;
; 1/D <= Rapprox*2^(-7) < 1/D + 2^(-6)
;
; In fact, entry N in the table is calculated by the formula:
;
; Entry(N) = 2^14 divided by (128+N), rounded up to an integer.
;
; Proof that this is correct: if the first 7 fractional bits of D are N, we
; know that:
;
; (128+N)*2^(-7) <= D < (129+N)*2^(-7)
;
; This gives us:
; (2^7)/(129+N) < 1/D <= (2^7)/(128+N)
;
; Next, we have:
; 1/(128+N) - 1/(129+N) = 1/((128+N)*(129+N))
; < 1/(128*128)
; = 2^(-14)
;
; Multiplying by 2^7 and rearranging:
; (2^7)/(128+N) - 2^(-7) < (2^7)/(129+N)
;
; So:
; (2^7)/(128+N) - 2^(-7) < 1/D <= (2^7)/(128+N)
;
; Or:
; 1/D <= (2^7)/(128+N) < 1/D + 2^(-7)
;
; If we round (2^7)/(128+N) up to a multiple of 2^(-7), we increase it by
; less than 2^(-7), giving us:
;
; 1/D <= (2^7)/(128+N) rounded up to a multiple of 2^(-7) < 1/D + 2^(-64)
;
; But (2^7)/(128+N) rounded up to a multiple of 2^(-7) is Entry(N)*2^(-7),
; giving us the desired property.
Recip_Table BytesStart
GBLA Rec_tmp
Rec_tmp SETA 0
WHILE Rec_tmp < 128
DCB (16384+127+Rec_tmp)/(128+Rec_tmp)
Rec_tmp SETA Rec_tmp+1
WEND
BytesEnd
] ; Conditional assembly of Div
;===========================================================================
[ :DEF: fmod_s :LOR: FPEWanted :LOR: FPASCWanted
; Routine to perform the IEEE remainder function. It has the usual two
; labels on its entry point.
; The value returned is either a numeric value plus associated rounding
; information, with the uncommon bit clear, or an infinity or NaN, with the
; uncommon bit set.
; This routine will not work correctly with inputs which are unnormalised
; URD results, or with invalid internal format numbers.
;
; Uses standard dyadic operation entry and exit conventions - see top of
; this file.
ASSERT RNDexp = OP2sue ;We swap over from the use of OP2sue to that
; of RNDexp partway through this routine.
[ FPEWanted
RemFPE
]
[ FPASCWanted
RemFPASC
]
CDebug3 3,"RemFPASC/FPE: op1 =",OP1sue,OP1mhi,OP1mlo
CDebug3 3," op2 =",OP2sue,OP2mhi,OP2mlo
[ FPEWanted :LOR: FPASCWanted
; Start by detecting the "fast track" case of both operands being common.
TST OP1sue,#Uncommon_bit
TSTEQ OP2sue,#Uncommon_bit
BNE Rem_Uncommon
; If the second operand is a zero, we've got an invalid operation.
; Otherwise, if the first operand is a zero, the result is equal to the
; first operand.
ORRS Rarith,OP2mhi,OP2mlo
MOVEQ Rtmp,#InvReas_XRem0
BEQ InvalidOperation2ForSDE
ORRS Rarith,OP1mhi,OP1mlo
BEQ Rem_FirstOperand_Zero
]
; Both operands may now be assumed to be normalised numbers - now to deal
; with signs and exponents.
;
; We're going to generate the remainder by a long-division-like algorithm,
; which can be summarised as follows:
;
; partial remainder = ABS(op1); sign = SIGN(op1);
; FOR I = (op1 exponent) TO ((op2 exponent)-1) STEP -1
; Trial subtract (partial remainder) from (op2 mantissa)*2^I;
; IF strictly negative THEN
; partial remainder := 2*(op2 mantissa)*2^I - (partial remainder);
; sign := NOT(sign);
; NEXT
; IF (partial remainder) = 0
; THEN result = 0, with sign SIGN(op1);
; ELSE result = (-1)^(sign) * (partial remainder);
;
; We're clearly going to keep both the current sign and the original sign
; around: we'll do this in the top two bits of OP1sue. We'll also need to
; know the prospective result exponent (in OP2sue = RNDexp) and the number
; of iterations of the loop (in Rarith). However, note that if the
; calculated number of iterations is 0 or less, this means that the result
; is equal to the first operand. So we'll take care to calculate this number
; before disturbing the first operand in any way.
;
; Note also that the sign of the second operand is totally irrelevant, now
; that we've got past the stage of there being any potential invalid operation
; or divide-by-zero exceptions.
Rem_Common
STMFD Rsp!,{LR} ;Because we'll need the register, we
; may well call NormaliseOp1, and to
; match the Rem_Uncommon path.
AND RNDexp,OP2sue,#ToExp_mask ;Second operand exponent
SUB RNDexp,RNDexp,#1 ;Prospective result exponent
AND Rarith,OP1sue,#ToExp_mask ;First operand exponent
SUBS Rarith,Rarith,RNDexp ;Number of iterations - 1
Rem_ExponentsDone
AND OP1sue,OP1sue,#Sign_bit ;All cases want this
ADDLT RNDexp,Rarith,RNDexp ;Recover first operand exp.
MOVLT Rarith,#0 ;And return first operand
IF Interworking :LOR: Thumbing
BXLT LR
ELSE
MOVLT PC,LR ; exactly
ENDIF
; Prepare for the main loop and branch into it.
MOV OP1sue,OP1sue,ASR #1 ;Make a copy of the sign, in
; case the result is zero
MOV LR,#0 ;Top word of the partial
; remainder
CDebug2 4,"Entering RMF loop: Rarith, LR",Rarith,LR
CDebug3 4," op1",OP1sue,OP1mhi,OP1mlo
CDebug3 4," op2",RNDexp,OP2mhi,OP2mlo
B Rem_Loop_Entry
Rem_Loop_Shift
; Shift the partial remainder left by 1 bit, using a bit of trickery to do
; each word in 1 cycle.
MOV LR,OP1mhi,LSR #31
ADDS OP1mlo,OP1mlo,OP1mlo
ADC OP1mhi,OP1mhi,OP1mhi
Rem_Loop_Entry
; Do the trial subtraction of divisor - partial remainder; if it comes out
; non-negative, keep the previous partial remainder.
RSBS Rtmp,OP1mlo,OP2mlo
RSCS Rtmp2,OP1mhi,OP2mhi
RSCS LR,LR,#0
BCS Rem_Loop_End
; Otherwise, use the trial division result to form a new partial remainder
; equal to 2*divisor minus old partial remainder, and note that the sign of
; the partial remainder has changed.
ADDS OP1mlo,Rtmp,OP2mlo
ADC OP1mhi,Rtmp2,OP2mhi
EOR OP1sue,OP1sue,#Sign_bit
Rem_Loop_End
; Loop until finished. Note the partial remainder is completely contained in
; OP2mhi and OP2mlo at this point.
SUBS Rarith,Rarith,#1
BGE Rem_Loop_Shift
; The result will always be exact.
MOV Rarith,#0
; If we've now got a partial remainder of exactly zero, the result is zero,
; with sign equal to that of the original first operand. Otherwise, we've
; got to normalise the result.
ORRS Rtmp,OP1mhi,OP1mlo
MOVEQ OP1sue,OP1sue,LSL #1 ;Recover copy of original sign
MOVEQ RNDexp,#0
ANDNE OP1sue,OP1sue,#Sign_bit
BLNE $NormaliseOp1_str
; And return.
IF Interworking :LOR: Thumbing
LDMFD Rsp!,{LR}
BX LR
ELSE
LDMFD Rsp!,{PC}
ENDIF
] ; Conditional assembly of Rem/mod
;===========================================================================
[ :DEF: sqrt_s :LOR: FPEWanted :LOR: FPASCWanted
; Routine to take the square root of an internal format floating point
; number. Unlike the dyadic arithmetic instructions, only one entry point is
; required: we do however give it two labels for the sake of consistent
; naming.
; The value returned is either a numeric value plus associated rounding
; information, with the uncommon bit clear, or an infinity or NaN, with the
; uncommon bit set.
; This routine will not work correctly with an input which is an
; unnormalised URD result, or an invalid internal format number.
;
; Uses standard monadic operation entry and exit conventions - see top of
; this file.
[ FPEWanted
SqrtFPE
]
[ FPASCWanted
SqrtFPASC
]
[ :LNOT: :DEF: sqrt_s
CDebug3 3,"SqrtFPE/FPASC: operand =",OP1sue,OP1mhi,OP1mlo
; Start by splitting according to whether the operand is common or uncommon.
; The code to deal with uncommon operands lies a long way down in the
; source, to avoid addressability problems.
TST OP1sue,#Uncommon_bit
BNE Sqrt_Uncommon
; If the operand is a zero, the product is the same zero. Because the
; operand is common and assumed not to be an unnormalised URD result, we can
; check for zeros by means of the units bit.
TST OP1mhi,#EIUnits_bit
BEQ Sqrt_Zero
; The operand may now be assumed to be a normalised number. If it is
; negative, we have an invalid operation exception. Otherwise, the result
; sign is positive (equal to the operand sign) and we need to produce the
; result exponent.
; We produce the result exponent by adding the exponent bias to the
; already biased exponent, producing (unbiased exponent) + 2*bias, then
; shifting right by one bit, producing ((unbiased exponent) DIV 2) + bias.
; We set the condition codes on this last instruction in order to transfer
; the least significant bit of the unbiased exponent into C.
]
[ FPLibWanted
__fp_sqrt_common
]
Sqrt_Common
AND RNDexp,OP1sue,#ToExp_mask ;Extract operand exponent
[ FPEWanted :LOR: FPASCWanted
ANDS OP1sue,OP1sue,#Sign_bit ;Isolate sign bit & check positive
MOVNE Rtmp,#InvReas_SqrtNeg
BNE InvalidOperation1ForSDE
|
ANDS OP1sue,OP1sue,#Sign_bit ;Isolate sign bit
ORRNE OP1sue,OP1sue,#IVO_bits
IF Interworking :LOR: Thumbing
BXNE LR
ELSE
MOVNE PC,LR
ENDIF
]
ADD RNDexp,RNDexp,#EIExp_bias:AND:&FF00
ADD RNDexp,RNDexp,#EIExp_bias:AND:&FF
ASSERT (EIExp_bias-1) < &10000 ;Result exponent if mantissa
; overflow is (exp+bias) DIV 2
MOVS RNDexp,RNDexp,LSR #1
; This subsidiary entry point deals with taking the square root of a
; normalised mantissa.
; Entry: OP1sue = the result's sign, with an uncommon bit of 0 - the
; remaining bits are zero;
; OP1mhi = Operand mantissa, high word;
; OP1mlo = Operand mantissa, low word;
; RNDexp = Prospective result exponent;
; Rins = instruction (needed to determine the precision);
; Rwp, Rfp, Rsp hold their usual values;
; R14 = return link;
; C = least significant bit of operand's unbiased exponent.
; Exit: OP1sue = the result's sign (always positive), with an uncommon bit
; of 0; the remaining bits are zero;
; OP1mhi, OP1mlo = the result's mantissa;
; RNDexp = the result exponent;
; Rarith holds the round bit (in bit 31) and the sticky bit (in bits
; 30:0) if the destination precision is extended; if the
; destination precision is single or double, it holds part of the
; sticky bit (the remainder of which is held in bits below the
; round bit in OP1mhi and OP1mlo);
; OP2mhi, OP2mlo, Rtmp, Rtmp2 and R14 may be corrupt;
; All other registers preserved.
;
; Note that the result exponent is in fact always equal to the prospective
; result exponent: the process of taking the square root always results in a
; normalised mantissa. (Subsequent rounding may of course lead to mantissa
; overflow, but the raw unrounded result mantissa is always normalised.)
Sqrt_Mantissa
CDebug2 4,"SqrtFPE/FPASC: mantissa =",OP1mhi,OP1mlo
CDebug1 4," sign =",OP1sue
CDebug1 4," exponent =",RNDexp
; We do the square root by the standard "long square root" algorithm. (There
; is an optimisation possibility here, of doing square roots by
; Newton-Raphson followed by a final correction. This only applies to the
; FPASC, since the FPE's division is too slow for there to be any
; possibility of this making a profit - even the FPA's division will have to
; be used very carefully for it to have a hope of working.)
;
; A description of the long square root algorithm follows:
;
; The problem is to take the square root of a mantissa M in the range 1 <= M
; < 4. An initial approximation R[0]=1 to the root has the property that it
; is the rounded-down root to 0 places after the binary point - i.e. that
; R[0] is a multiple of 2^(-0) and R[0] <= Sqrt(M) < R[0] + 2^(-0). We will
; evaluate successive approximations R[i] to the root such that R[i] is the
; correct rounded-down root to i places after the binary point - i.e. that
; R[i] is a multiple of 2^(-i) and R[i] <= Sqrt(M) < R[i] + 2^(-i). If we
; know R[24], R[53] or R[64] respectively for single, double or extended
; precision, and in addition know whether the result is exact (i.e. whether
; R[i] = Sqrt(M) exactly), we have enough information to provide all the
; required fractional bits and the round and sticky bits, and so to
; calculate the correct IEEE square root. (Note that a guard bit is not
; required: the infinite precision square root of M will not suffer mantissa
; overflow or underflow, and so its finite precision approximations can only
; suffer mantissa overflow during rounding, not prior to rounding.)
;
; So we will use a partial remainder P[i] = M - R[i]^2; initially, P[0] =
; M-1. Next, we know that R[i+1] is either equal to R[i] or to R[i] +
; 2^(-i-1), depending on whether the next bit of the root is 0 or 1. To
; determine which, we need to know whether R[i] + 2^(-i-1) <= Sqrt(M): if it
; is, the next bit of the root is 1; if it isn't, the next bit of the root
; is 0.
;
; This is equivalent to asking whether (R[i] + 2^(-i-1))^2 <= M, i.e. to
; whether:
;
; R[i]^2 + R[i]*2^(-i) + 2^(-2*i-2) <= M
;
; or to whether:
;
; R[i]*2^(-i) + 2^(-2*i-2) <= P[i]
;
; If it is, then R[i+1] = R[i] - 2^(-i-1) and:
;
; P[i+1] = M - R[i+1]^2
; = M - (R[i] + 2^(-i-1))^2
; = M - R[i]^2 - R[i]*2^(-i) - 2^(-2*i-2)
; = P[i] - R[i]*2^(-i) - 2^(-2*i-2)
;
; If it isn't, then R[i+1] = R[i] and P[i+1] = M - R[i+1]^2 = M - R[i]^2 =
; P[i].
;
; So the long square root algorithm can be stated as follows, where N=24, 53
; or 64 respectively for single, double or extended precision:
;
; (1) Initialise: R[0] = 1, P[0] = M-1;
;
; (2) For i=0 to N-1:
; Do a trial subtraction of R[i]*2^(-i) + 2^(-2*i-2) from P[i];
; If result >= 0, put R[i+1] = R[i] + 2^(-i-1), P[i+1] = result of
; trial subtraction;
; Else put R[i+1] = R[i], P[i+1] = P[i];
;
; (3) The units, fractional and round bits of the result are in R[N], while
; the sticky bit is 0 if P[N] = 0, 1 if P[N] > 0.
;
; Note that P[i] = M - R[i]^2
; < M - (Sqrt(M) - 2^(-i))^2
; = M - M + Sqrt(M)*2^(-i+1) - 2^(-2*i)
; = Sqrt(M)*2^(-i+1) - 2^(-2*i)
; < 2^(-i+2)
;
; So P[i] decreases greatly in magnitude during the long square root
; process. If we use it straightforwardly, this will result in a lot of
; spurious subtractions of bits known to be zero from other bits known to be
; zero during the algorithm. So instead, let us define Q[i] = P[i]*2^(i-1)
; and recast the algorithm in terms of Q[i]:
;
; (1) Initialise: R[0] = 1, Q[0] = (M-1)/2;
;
; (2) For i=0 to N-1:
; Do a trial subtraction of R[i] + 2^(-i-2) from 2*Q[i];
; If result >= 0, put R[i+1] = R[i] + 2^(-i-1), Q[i+1] = result of
; trial subtraction;
; Else put R[i+1] = R[i], Q[i+1] = 2*Q[i];
;
; (3) The units, fractional and round bits of the result are in R[N], while
; the sticky bit is 0 if Q[N] = 0, 1 if Q[N] > 0.
;
; Introducing a travelling bit variable T[i] to represent 2^(-i-2) and
; rephrasing in terms of shifts:
;
; (1) Initialise: R[0] = 1, Q[0] = (M-1)/2, T[0] = 2^(-2);
;
; (2) For i=0 to N-1:
; Do a trial subtraction of R[i] + T[i] from Q[i] << 1;
; If result >= 0, put R[i+1] = R[i] + (T[i] << 1),
; Q[i+1] = (Q[i] << 1) - (R[i]+T[i]);
; Else put R[i+1] = R[i], Q[i+1] = Q[i] << 1;
;
; (3) The units, fractional and round bits of the result are in R[N], while
; the sticky bit is 0 if Q[N] = 0, 1 if Q[N] > 0.
;
; This is more-or-less the algorithm we use, though we split into different
; sections depending on how far the travelling bit has been shifted down so
; far, to avoid doing multi-word arithmetic until we have to.
;
; One thing we do have to look at is the precision required for Q[i]. We
; know that 0 < Q[i] = P[i]*2^(i-1) < 2^(-i+2)*2^(i-1) = 2, so one place
; before the binary point is enough. Initially, Q[0] = (M-1)/2 is a multiple
; of 2^(-64), requiring 64 places after the binary point, or 65 bits in
; total - one bit more than 2 words. This is highly inconvenient, but we can
; get around it by noticing that if M < 2, then the first two bits of the
; result are definitely 1.0, and we have R[1] = 1.0, Q[1] = M-1 and T[0] =
; 2^(-2). So Q[1] is a multiple of 2^(-63) and can be represented in two
; words. On the other hand, if M >= 2, then Q[0] = (M-1)/2 is a multiple of
; 2^(-63) and can also be represented by two words. This transforms the
; algorithm to:
;
; IF M < 1 THEN
;
; (1) Initialise: R[1] = 1.0, Q[1] = M-1, T[1] = 2^(-3);
;
; (2) For i=1 to N-1:
; Do a trial subtraction of R[i] + T[i] from Q[i] << 1;
; If result >= 0, put R[i+1] = R[i] + (T[i] << 1),
; Q[i+1] = (Q[i] << 1) - (R[i]+T[i]);
; Else put R[i+1] = R[i], Q[i+1] = Q[i] << 1;
;
; (3) The units, fractional and round bits of the result are in R[N], while
; the sticky bit is 0 if Q[N] = 0, 1 if Q[N] > 0.
;
; ELSE
;
; (1') Initialise: R[0] = 1, Q[0] = (M-1)/2, T[0] = 2^(-2);
;
; (2') For i=0 to N-1:
; Do a trial subtraction of R[i] + T[i] from Q[i] << 1;
; If result >= 0, put R[i+1] = R[i] + (T[i] << 1),
; Q[i+1] = (Q[i] << 1) - (R[i]+T[i]);
; Else put R[i+1] = R[i], Q[i+1] = Q[i] << 1;
;
; (3') The units, fractional and round bits of the result are in R[N], while
; the sticky bit is 0 if Q[N] = 0, 1 if Q[N] > 0.
;
; ENDIF
;
; Now Q[i] can be represented in two words up to the point where the trial
; subtraction produces results that overflow two words. We have the
; following situation at various iterations, remembering that T[i] = 2^(-i-1):
;
; For i < 30: R[i] and T[i] can be represented by 1 word, with the binary
; point to the right of bit 31; Q[i+1] requires two words, with the trial
; subtraction being performed on the top word only.
;
; For 30 <= i < 62: R[i] can be represented by 2 words, with the binary point
; to the right of bit 31 of the top word (strictly, the low word isn't
; required for R[30]); T[i] can be represented by 1 word, now with an
; implicit word of zeros above it and the binary point to the right of bit
; 31 of this implicit word; Q[i+1] still requires two words, with the trial
; subtraction occurring on both words;
;
; For i=62: R[i] can be represented by 2 words, with the binary point to the
; right of bit 31 of the top word; T[i] can be represented by 1 word, now
; with two implicit words of zeros above it and the binary point to the
; right of bit 31 of the more significant of the two words; Q[i+1] still
; contains two words, but a third word is required for the trial
; subtraction.
;
; For i=63: R[i] now requires 3 words, with the binary point to the right of
; bit 31 of the most significant word; T[i] can be represented by 1 word,
; now with two implicit words of zeros above it and the binary point to
; the right of bit 31 of the more significant of the two words; Q[i+1] will
; require 3 words to represent it, with the trial subtraction occurring on
; all three words.
;
; So we will actually perform the square root in 5 stages:
;
; (A) Initialisation and iterations with 0 <= i < 30. Terminated after i=23
; for single precision.
; (B) Iterations with 30 <= i < 62. Terminated after i=52 for double
; precision, not done at all for single precision.
; (C) Iteration with i=62. Only done for extended precision.
; (D) Iteration with i=63. Only done for extended precision.
; (E) Sticky bit construction. Done separately for single/double and
; extended precisions.
;
; Register usage:
; OP1mhi, OP1mlo: R[i] (the root so far); Rarith is also involved in this
; at the end of the i=63 iteration.
; OP2mhi, OP2mlo: Q[i] (the shifted partial remainder).
; Rarith: temporary register.
; Rtmp: T[i] (the travelling bit);
; Rtmp2: loop counter.
; Initialise remainder (Q[0] for odd exponent, Q[1] for even exponent)
SUBCC OP2mhi,OP1mhi,#TopBit ;Subtract 1 for even exponent
SUBCS OP2mhi,OP1mhi,#TopBit:SHR:1 ;Shift left, subtract 1 and shift
; right for odd exponent
MOV OP2mlo,OP1mlo ;Bottom word is unaffected either way
; Initialise travelling bit. Due to the loop unwinding below, we actually
; want T[0] for an odd exponent, T[1] << 1 for an even exponent: both of
; these are 2^(-2).
MOV Rtmp,#TopBit:SHR:2
; Initialise result - both R[1] = 1.0 for even exponents and R[0] = 1 for
; odd exponents require the same bit pattern.
MOV OP1mhi,#TopBit
MOV OP1mlo,#0
; Initialise the loop counter. This is a bit esoteric: it contains minus the
; number of times the first loop below is executed in its top four bits,
; plus the number of times the second loop is exceuted in its bottom 4 bits.
; The idea is that the first loop adds 1 << 28 to it until it becomes
; positive, then the second subtracts one from it until it becomes zero.
; This is the only time we actually need to look at the precision bits in
; the instruction!
; Note that we must take great care not to change the C flag in this code.
[ FPEWanted :LOR: FPASCWanted
MOV Rtmp2,#((-5):SHL:28) + 8 ;Correct value for extended
[ Pr1_mask < &100 ;I.e. if immediate won't set C
TST Rins,#Pr1_mask ;Z := 1 if single/double
|
MOV Rarith,Rins,LSR #Pr1_pos
TST Rarith,#(Pr1_mask:SHR:Pr1_pos)
]
MOVEQ Rtmp2,#((-5):SHL:28) + 6 ;Correct value for double
[ Pr2_mask < &100 ;I.e. if immediate won't set C
TSTEQ Rins,#Pr2_mask ;Z := 1 if single
|
MOVEQ Rarith,Rins,LSR #Pr2_pos
TSTEQ Rarith,#(Pr2_mask:SHR:Pr2_pos)
]
MOVEQ Rtmp2,#((-4):SHL:28) + 0 ;Correct value for single
|
; Single precision square root is not allowed. Extended is though.
[ Double_mask < &100
TST Rins,#Double_mask
|
MOV Rarith,Rins,LSR #Double_pos
TST Rarith,#(Double_mask:SHR:Double_pos)
]
MOVEQ Rtmp2,#((-5):SHL:28) + 8
MOVNE Rtmp2,#((-5):SHL:28) + 6
]
; We now require the iterations with 0 <= i < 30 to be done - i.e.:
;
; 23 iterations for single precision, even exponent (1<=i<=23);
; 24 iterations for single precision, odd exponent (0<=i<=23);
; 29 iterations for double/extended precision, even exponent (1<=i<=29);
; 30 iterations for double/extended precision, odd exponent (0<=i<=29).
;
; We unwind this loop to produce 6 copies of the code, and branch in after
; the first one for even exponents.
BCC Sqrt_Loop1A
Sqrt_Loop1
; First copy of code
ADDS OP2mlo,OP2mlo,OP2mlo ;Get Q[i] << 1 - note top bit goes
ADCS OP2mhi,OP2mhi,OP2mhi ; into C
ORR Rarith,OP1mhi,Rtmp ;And R[i] + T[i] - note no overlap
CMPCC OP2mhi,Rarith ;Trial subtraction - always works
; if (Q[i] << 1) >= 2.
SUBCS OP2mhi,OP2mhi,Rarith ;Do real subtraction if trial works
ORRCS OP1mhi,OP1mhi,Rtmp,LSL #1 ;Put 1 in result if trial works
Sqrt_Loop1A
; Second copy of code - similar to first copy except we use Rtmp >> 1
; instead of Rtmp.
ADDS OP2mlo,OP2mlo,OP2mlo
ADCS OP2mhi,OP2mhi,OP2mhi
ORR Rarith,OP1mhi,Rtmp,LSR #1
CMPCC OP2mhi,Rarith
SUBCS OP2mhi,OP2mhi,Rarith
ORRCS OP1mhi,OP1mhi,Rtmp
; Third copy of code - similar to first copy except we use Rtmp >> 2
; instead of Rtmp.
ADDS OP2mlo,OP2mlo,OP2mlo
ADCS OP2mhi,OP2mhi,OP2mhi
ORR Rarith,OP1mhi,Rtmp,LSR #2
CMPCC OP2mhi,Rarith
SUBCS OP2mhi,OP2mhi,Rarith
ORRCS OP1mhi,OP1mhi,Rtmp,LSR #1
; Fourth copy of code - similar to first copy except we use Rtmp >> 3
; instead of Rtmp.
ADDS OP2mlo,OP2mlo,OP2mlo
ADCS OP2mhi,OP2mhi,OP2mhi
ORR Rarith,OP1mhi,Rtmp,LSR #3
CMPCC OP2mhi,Rarith
SUBCS OP2mhi,OP2mhi,Rarith
ORRCS OP1mhi,OP1mhi,Rtmp,LSR #2
; Fifth copy of code - similar to first copy except we use Rtmp >> 4
; instead of Rtmp.
ADDS OP2mlo,OP2mlo,OP2mlo
ADCS OP2mhi,OP2mhi,OP2mhi
ORR Rarith,OP1mhi,Rtmp,LSR #4
CMPCC OP2mhi,Rarith
SUBCS OP2mhi,OP2mhi,Rarith
ORRCS OP1mhi,OP1mhi,Rtmp,LSR #3
; Sixth copy of code - similar to first copy except we use Rtmp >> 5
; instead of Rtmp.
ADDS OP2mlo,OP2mlo,OP2mlo
ADCS OP2mhi,OP2mhi,OP2mhi
ORR Rarith,OP1mhi,Rtmp,LSR #5
CMPCC OP2mhi,Rarith
SUBCS OP2mhi,OP2mhi,Rarith
ORRCS OP1mhi,OP1mhi,Rtmp,LSR #4
; Now update the travelling bit and loop counter, then loop if required.
ADDS Rtmp2,Rtmp2,#1:SHL:28 ;Increment loop counter
MOV Rtmp,Rtmp,ROR #6 ;ROR rather than LSR to set up
BLT Sqrt_Loop1 ; for next loop.
; If the result is exact at this point, we can obviously return with all the
; remaining fractional bits, the round bit and the sticky bit equal to 0. If
; the result is not exact but the precision is single, we can return with a
; sticky bit of 1. We only continue if the result is inexact and the
; precision is double or extended.
ORRS Rarith,OP2mhi,OP2mlo
CMPNE Rtmp,#TopBit:SHR:26 ;Will be EQ for single, NE for
IF Interworking :LOR: Thumbing
BXEQ LR
ELSE
MOVEQ PC,LR ; double or extended
ENDIF
; Next, we need to do the iterations with 30 <= i < 62 - i.e.:
;
; 32 iterations for extended precision (30<=i<=61);
; 23 iterations for double precision (30<=i<=52).
;
; This is a bit awkward from the point of view of unwinding the loop, so we
; will instead do 24 iterations for double precision and unwind the loop to
; produce 4 copies of the code. The extra iteration for double precision is
; wasted work but does no harm.
STMFD Rsp!,{Rfpsr,Rins,LR} ;We need a few more registers
Sqrt_Loop2
ADDS OP2mlo,OP2mlo,OP2mlo ;Get Q[i] << 1,
ADCS OP2mhi,OP2mhi,OP2mhi
ADC LR,LR,LR ; putting overflow bit into LR[0]
ORR Rarith,OP1mlo,Rtmp ;(OP1mhi,Rarith) := R[i] + T[i]
SUBS Rins,OP2mlo,Rarith ;Do trial subtraction, which
SBCS Rfpsr,OP2mhi,OP1mhi
MOVCCS LR,LR,LSR #1 ; always works if (Q[i] << 1) >= 2.
MOVCS OP2mlo,Rins ;Use subtraction result if
MOVCS OP2mhi,Rfpsr ; successful
ORRCS OP1mlo,OP1mlo,Rtmp,LSL #1 ;And put a 1 in the result
ORRCS OP1mhi,OP1mhi,Rtmp,LSR #31 ;(NB Rtmp may be &80000000)
; Second copy of code - similar to first copy except we use Rtmp >> 1 in
; place of Rtmp, and don't need to worry about putting the 1 into OP1mhi.
ADDS OP2mlo,OP2mlo,OP2mlo
ADCS OP2mhi,OP2mhi,OP2mhi
ADC LR,LR,LR
ORR Rarith,OP1mlo,Rtmp,LSR #1
SUBS Rins,OP2mlo,Rarith
SBCS Rfpsr,OP2mhi,OP1mhi
MOVCCS LR,LR,LSR #1
MOVCS OP2mlo,Rins
MOVCS OP2mhi,Rfpsr
ORRCS OP1mlo,OP1mlo,Rtmp
; Third copy of code - similar to first copy except we use Rtmp >> 2 in
; place of Rtmp, and don't need to worry about putting the 1 into OP1mhi.
ADDS OP2mlo,OP2mlo,OP2mlo
ADCS OP2mhi,OP2mhi,OP2mhi
ADC LR,LR,LR
ORR Rarith,OP1mlo,Rtmp,LSR #2
SUBS Rins,OP2mlo,Rarith
SBCS Rfpsr,OP2mhi,OP1mhi
MOVCCS LR,LR,LSR #1
MOVCS OP2mlo,Rins
MOVCS OP2mhi,Rfpsr
ORRCS OP1mlo,OP1mlo,Rtmp,LSR #1
; Fourth copy of code - similar to first copy except we use Rtmp >> 3 in
; place of Rtmp, and don't need to worry about putting the 1 into OP1mhi.
ADDS OP2mlo,OP2mlo,OP2mlo
ADCS OP2mhi,OP2mhi,OP2mhi
ADC LR,LR,LR
ORR Rarith,OP1mlo,Rtmp,LSR #3
SUBS Rins,OP2mlo,Rarith
SBCS Rfpsr,OP2mhi,OP1mhi
MOVCCS LR,LR,LSR #1
MOVCS OP2mlo,Rins
MOVCS OP2mhi,Rfpsr
ORRCS OP1mlo,OP1mlo,Rtmp,LSR #2
; Now update the travelling bit and loop counter, then loop if required.
SUBS Rtmp2,Rtmp2,#1 ;Decrement loop counter
MOV Rtmp,Rtmp,ROR #4 ;ROR rather than LSR to set up
BNE Sqrt_Loop2 ; for last couple of iterations.
; If the remainder is zero at this point, we've got an exact result: the
; last fractional bit, the round bit and the sticky bit must all be zero.
; Otherwise, we know that the result will *not* be exact, since each of
; the last two iterations either doesn't change the partial remainder (thus
; leaving it non-zero) or subtracts a value with a 1 in a less significant
; bit than the lowest bit currently in the partial remainder, which must
; leave it non-zero.
; So we can now return if either the result is currently exact or if it is
; inexact and the precision is double, taking care to make Rarith zero in
; the first case and non-zero in the second. We only need to perform the
; rest of the division if the precision is extended and the result is
; currently inexact - which implies that it will also ultimately be inexact
; and thus that the sticky bit is 1.
ORRS Rarith,OP2mhi,OP2mlo
CMPNE Rtmp,#TopBit:SHR:24 ;Will be EQ for double, NE for
IF Interworking :LOR: Thumbing
LDMEQFD Rsp!,{Rfpsr,Rins,LR} ; extended
BXEQ LR
ELSE
LDMEQFD Rsp!,{Rfpsr,Rins,PC} ; extended
ENDIF
; Now we need to get the last fractional bit.
ADDS OP2mlo,OP2mlo,OP2mlo ;Get Q[i] << 1,
ADCS OP2mhi,OP2mhi,OP2mhi
ADC LR,LR,LR ; putting overflow bit into LR[0]
RSBS Rtmp,Rtmp,#0 ;Do trial subtraction, which
RSCS Rins,OP1mlo,OP2mlo
RSCS Rfpsr,OP1mhi,OP2mhi
MOVCCS LR,LR,LSR #1 ; always works if (Q[i] << 1) >= 2.
MOVCS OP2mlo,Rins ;Use subtraction result if
MOVCS OP2mhi,Rfpsr ; successful
MOVCC Rtmp,#0 ;And forget it if not
ORRCS OP1mlo,OP1mlo,#1 ;And put a 1 in the result
; And the round bit.
MOV Rarith,#TopBit+1 ;We know sticky bit is 1 - assume
; round bit is also 1
ADDS Rtmp,Rtmp,Rtmp ;Get Q[i] << 1.
ADCS OP2mlo,OP2mlo,OP2mlo
ADCS OP2mhi,OP2mhi,OP2mhi
IF Interworking :LOR: Thumbing
LDMCSFD Rsp!,{Rfpsr,Rins,LR} ;If >= 2, round bit must be 1
BXCS LR
ELSE
LDMCSFD Rsp!,{Rfpsr,Rins,PC} ;If >= 2, round bit must be 1
ENDIF
;Omit low word of trial subtraction
; - we know it will borrow and thus
; leave C=0. But C=0 here anyway!
SBCS Rins,OP2mlo,OP1mlo ;Do rest of trial subtraction
SBCS Rins,OP2mhi,OP1mhi
MOVCC Rarith,#1 ;If it fails, round=0, sticky=1
IF Interworking :LOR: Thumbing
LDMFD Rsp!,{Rfpsr,Rins,LR}
BX LR
ELSE
LDMFD Rsp!,{Rfpsr,Rins,PC}
ENDIF
] ; Conditional compilation of sqrt
;===========================================================================
[ FPEWanted :LOR: FPASCWanted
; Routine to do a move/move negated/absolute value of an internal format
; floating point number. It has the usual pair of entry points, one
; optimised for the FPASC, the other for the FPE.
; The value returned is either a numeric value plus associated rounding
; information, with the uncommon bit clear, or an infinity or NaN, with the
; uncommon bit set.
; This routine will not work correctly with an input which is an
; unnormalised URD result, or an invalid internal format number.
;
; Uses standard monadic operation entry and exit conventions - see top of
; this file.
;
; Note that these operations are usually very simple:
; * Numeric values need their sign bits modified, then to be set up for
; rounding; note that in the process, uncommon numeric values need to be
; converted to zeros or normalised numbers to ensure that the rounding
; works;
; * Infinities and quiet NaNs need their sign bits modified;
; * Signalling NaNs just need their sign bits modified if no change of
; format is involved (what this means depends on the state of the FPSR
; NE bit); if a change of format is required, they should generate the
; usual invalid operation exception.
[ FPEWanted
MoveFPE
CDebug3 3,"MoveFPE: operand =",OP1sue,OP1mhi,OP1mlo
; If the value is common, it's a numeric value and there's no problem.
TST OP1sue,#Uncommon_bit
BNE Move_Uncommon
; Split out the exponent.
AND RNDexp,OP1sue,#ToExp_mask
]
Move_Numeric
; Isolate sign bit and clear uncommon bit. Also set Rarith to 0, since all
; rounding information is completely contained in OP1mhi and OP1mlo.
AND OP1sue,OP1sue,#Sign_bit
MOV Rarith,#0
Move_DoSigns
; Do the sign manipulations and return.
TST Rins,#MNF_bit
EORNE OP1sue,OP1sue,#Sign_bit
TST Rins,#ABS_bit
BICNE OP1sue,OP1sue,#Sign_bit
IF Interworking :LOR: Thumbing
BX LR
ELSE
MOV PC,LR
ENDIF
] ; Conditional assembly of Move
;===========================================================================
[ FPEWanted :LOR: FPASCWanted
; Routine to do a NRM instruction on an internal format floating point
; number. It has the usual pair of entry points, one optimised for the
; FPASC, the other for the FPE.
; The value returned is either a numeric value plus associated rounding
; information, with the uncommon bit clear, or an infinity or NaN, with the
; uncommon bit set.
;
; Uses standard monadic operation entry and exit conventions - see top of
; this file.
;
; This operation is very similar to MVF, except that we have to cater for
; unnormalised values with the uncommon bit equal to zero - i.e. an URD
; result.
[ FPEWanted
NormFPE
CDebug3 3,"NormFPE: operand =",OP1sue,OP1mhi,OP1mlo
; Split according to whether the value is common or uncommon.
TST OP1sue,#Uncommon_bit
BNE Norm_Uncommon
; Split out the exponent.
AND RNDexp,OP1sue,#ToExp_mask
; If the units bit is clear, it's either a URD result or a zero. URD results
; can be treated just like extended unnormalised numbers and zeros.
TST OP1mhi,#EIUnits_bit
BNE Norm_Numeric
]
Norm_ZeroUnnormOrDenorm
; The value is an uncommon numeric value - i.e. a denormalised number, an
; extended unnormalised number or an extended unnormalised zero - or a
; proper zero or a URD result, which may be treated like an extended
; unnormalised number or zero. If it's any sort of zero, change it to a real
; zero and treat it as a numeric.
ORRS Rtmp,OP1mhi,OP1mlo
MOVEQ RNDexp,#0
BEQ Norm_Numeric
; The operand is now a denormalised number or extended unnormalised non-zero
; number. We will change it into the corresponding normalised number
; (possibly with a negative biased exponent), then treat it as a numeric.
; The types of numbers that require converting are extended unnormalised
; numbers and denormalised numbers of all precisions. In the case of the
; extended denormalised and unnormalised numbers, this just requires us to
; normalise them; in the case of the single and double denormalised numbers,
; we need to clear their units bits and add 1 to their exponents before we
; normalise them.
; At this stage, we can recognise that the numbers are single or double
; denormalised numbers simply by the fact that they have uncommon = units =
; 1: all other numbers with this property are NaNs or infinities and have
; been dealt with already.
STMFD Rsp!,{LR} ;We will have subroutine calls below
ANDS Rarith,OP1mhi,OP1sue,LSL #EIUnits_pos-Uncommon_pos
ASSERT EIUnits_pos = 31
BICMI OP1mhi,OP1mhi,#EIUnits_bit
ADDMI RNDexp,RNDexp,#1
BL $NormaliseOp1_str ;NB must be necessary, so no
; point in checking whether
; normalised
LDMFD Rsp!,{LR}
Norm_Numeric
; Isolate sign bit and clear uncommon bit. Also set Rarith to 0, since all
; rounding information is completely contained in OP1mhi and OP1mlo.
AND OP1sue,OP1sue,#Sign_bit
MOV Rarith,#0
IF Interworking :LOR: Thumbing
BX LR
ELSE
MOV PC,LR
ENDIF
] ; Conditional assembly of Norm
;===========================================================================
[ FPEWanted :LOR: FPASCWanted
; Routine to do a URD instruction on an internal format floating point
; number. There are the usual two entry points.
; This routine will not work correctly with inputs which are unnormalised
; URD results, or with invalid internal format numbers.
;
; Uses standard monadic operation entry and exit conventions - see top of
; this file.
[ FPEWanted
UrdFPE
CDebug3 3,"UrdFPE: operand =",OP1sue,OP1mhi,OP1mlo
; Start by splitting between common and uncommon operands.
TST OP1sue,#Uncommon_bit
BNE Urd_Uncommon
]
Urd_Common
; The operand is common. Split OP1sue into sign and biased exponent.
AND Rarith,OP1sue,#ToExp_mask
AND OP1sue,OP1sue,#Sign_bit
Urd_Numeric
; Calculate shift amount to denormalise the number to put the true binary
; point at the rounding boundary - i.e. to give it an effective unbiased
; exponent of 23, 52 or 63 depending on whether the precision of the
; instruction is single, double or extended.
MOV RNDexp,#((EIExp_bias+23):AND:&FF)
TST Rins,#Pr2_mask
MOVNE RNDexp,#((EIExp_bias+52):AND:&FF)
TST Rins,#Pr1_mask
MOVNE RNDexp,#((EIExp_bias+63):AND:&FF)
ORR RNDexp,RNDexp,#((EIExp_bias+63):AND:&FF00)
ASSERT ((EIExp_bias+63):AND:&FF00) = ((EIExp_bias+52):AND:&FF00)
ASSERT ((EIExp_bias+63):AND:&FF00) = ((EIExp_bias+23):AND:&FF00)
SUBS Rtmp,RNDexp,Rarith
BLS Urd_Big
; Denormalise the number to have this unbiased exponent and return.
Denorm OP1mhi,OP1mlo,Rarith,Rtmp,Rtmp2,Rtmp
IF Interworking :LOR: Thumbing
BX LR
ELSE
MOV PC,LR
ENDIF
Urd_Big
; We just need to return the number itself, with rounding bits equal to
; zero.
MOV RNDexp,Rarith
MOV Rarith,#0
IF Interworking :LOR: Thumbing
BX LR
ELSE
MOV PC,LR
ENDIF
] ; Conditional assembly of Urd
;===========================================================================
[ FPEWanted :LOR: FPASCWanted
; Routine to do a RND instruction on an internal format floating point
; number. There are the usual two entry points.
; This routine will not work correctly with inputs which are unnormalised
; URD results, or with invalid internal format numbers.
;
; Uses standard monadic operation entry and exit conventions - see top of
; this file.
[ FPEWanted
RndFPE
]
[ FPASCWanted
RndFPASC
]
CDebug3 3,"RndFPASC/FPE: operand =",OP1sue,OP1mhi,OP1mlo
; Start by splitting between common and uncommon operands.
TST OP1sue,#Uncommon_bit
BNE Rnd_Uncommon
Rnd_Common
; The operand is common. Split OP1sue into sign and biased exponent.
AND RNDexp,OP1sue,#ToExp_mask
AND OP1sue,OP1sue,#Sign_bit
; If the number is a zero, we're done.
TST OP1mhi,#EIUnits_bit
BEQ Rnd_Exact
Rnd_Numeric
; Find the position of the real binary point.
MOVNE Rarith,#((EIExp_bias+63):AND:&FF)
ORR Rarith,Rarith,#((EIExp_bias+63):AND:&FF00)
ASSERT (EIExp_bias + 63) < &10000
SUBS Rtmp,Rarith,RNDexp
BLE Rnd_Exact
; The rounding position for an integer - i.e. the real binary point - is now
; Rtmp bits above the bottom of the mantissa. Split according to whether
; this puts the round bit in the low word of the mantissa, the high word of
; the mantissa or above the high word of the mantissa.
RSBS Rtmp2,Rtmp,#32
BLT Rnd_AboveLowWord
Rnd_LowWord
; Branch out if rounding is exact.
MOVS Rtmp,OP1mlo,LSL Rtmp2
BEQ Rnd_Exact
; We now know we want to round down if we're rounding to zero, or if we're
; rounding to minus infinity and the number is positive, or if we're
; rounding to plus infinity and the number is negative.
MOVS Rtmp,OP1sue,LSL #32-Sign_pos
TSTCS Rins,#1:SHL:RM_pos
TSTCC Rins,#1:SHL:(RM_pos+1)
ASSERT RM_pos < 7 ;So that constants don't disturb C
BNE Rnd_LowWord_RoundDown
; If we're not rounding to nearest, we must now be rounding up.
TST Rins,#RM_mask
BNE Rnd_LowWord_RoundUp
ASSERT RM_Nearest = 0
; We're rounding to nearest. Produce the round and sticky bits, then work
; out which way we're rounding.
ADD Rtmp,Rtmp2,#1
MOVS Rtmp,OP1mlo,LSL Rtmp ;C<-round, Z<-NOT(sticky)
BNE Rnd_LowWord_GotDir ;Branch if not halfway case
MOVS Rtmp,OP1mhi,LSR #1 ;C<-least significant bit, from
MOVS Rtmp,OP1mlo,LSL Rtmp2 ; low word unless Rtmp2 is 0.
Rnd_LowWord_GotDir
BCS Rnd_LowWord_RoundUp
Rnd_LowWord_RoundDown
RSB Rtmp2,Rtmp2,#32 ;Clear all bits below rounding
MOV OP1mlo,OP1mlo,LSR Rtmp2 ; boundary
MOV OP1mlo,OP1mlo,LSL Rtmp2
MOV Rarith,#&40000000 ;And set round=0, sticky=1
IF Interworking :LOR: Thumbing
BX LR
ELSE
MOV PC,LR
ENDIF
Rnd_LowWord_RoundUp
RSB Rtmp2,Rtmp2,#32 ;Set all bits below rounding
MVN OP1mlo,OP1mlo,LSR Rtmp2 ; boundary
MVN OP1mlo,OP1mlo,LSL Rtmp2
MOV Rarith,#&C0000000 ;And set round=1, sticky=1
IF Interworking :LOR: Thumbing
BX LR
ELSE
MOV PC,LR
ENDIF
Rnd_AboveLowWord
RSBS Rtmp2,Rtmp,#64
BLT Rnd_AboveMantissa
Rnd_HighWord
; Branch out if rounding is exact.
ORRS Rtmp,OP1mlo,OP1mhi,LSL Rtmp2
BEQ Rnd_Exact
; We now know we want to round down if we're rounding to zero, or if we're
; rounding to minus infinity and the number is positive, or if we're
; rounding to plus infinity and the number is negative.
MOVS Rtmp,OP1sue,LSL #32-Sign_pos
TSTCS Rins,#1:SHL:RM_pos
TSTCC Rins,#1:SHL:(RM_pos+1)
ASSERT RM_pos < 7 ;So that constants don't disturb C
BNE Rnd_HighWord_RoundDown
; If we're not rounding to nearest, we must now be rounding up.
TST Rins,#RM_mask
BNE Rnd_HighWord_RoundUp
ASSERT RM_Nearest = 0
; We're rounding to nearest. Produce the round and sticky bits, then work
; out which way we're rounding.
ADD Rtmp,Rtmp2,#1
ORRS Rtmp,OP1mlo,OP1mhi,LSL Rtmp ;C<-round, Z<-NOT(sticky)
BNE Rnd_HighWord_GotDir ;Branch if not halfway case
CMP Rtmp2,#1 ;C<-least significant bit, from
MOVCSS Rtmp,OP1mhi,LSL Rtmp2 ; high word unless Rtmp2 is 0.
Rnd_HighWord_GotDir
BCS Rnd_HighWord_RoundUp
Rnd_HighWord_RoundDown
RSB Rtmp2,Rtmp2,#32 ;Clear all bits below rounding
MOV OP1mhi,OP1mhi,LSR Rtmp2 ; boundary
MOVS OP1mhi,OP1mhi,LSL Rtmp2
MOV OP1mlo,#0
MOVEQ RNDexp,#0 ;Exponent must change for 0 result
MOV Rarith,#&40000000 ;And set round=0, sticky=1
IF Interworking :LOR: Thumbing
BX LR
ELSE
MOV PC,LR
ENDIF
Rnd_HighWord_RoundUp
RSB Rtmp2,Rtmp2,#32 ;Set all bits below rounding
MVN OP1mhi,OP1mhi,LSR Rtmp2 ; boundary
MVN OP1mhi,OP1mhi,LSL Rtmp2
MOV OP1mlo,#&FFFFFFFF
MOV Rarith,#&C0000000 ;And set round=1, sticky=1
IF Interworking :LOR: Thumbing
BX LR
ELSE
MOV PC,LR
ENDIF
Rnd_AboveMantissa
; The rounding cannot possibly be exact - we must either be rounding down to
; zero or up to one. Furthermore, we know that the round bit is 0 and the
; sticky bit is 1. So we can only be rounding up if we're rounding to plus
; or minus infinity, and the result must be of the correct sign as well.
EOR Rtmp,OP1sue,Rins,LSL #31-RM_pos ;Somewhat tricky code to
EOR Rtmp2,OP1sue,Rins,LSL #30-RM_pos ; establish the above
BICS Rtmp,Rtmp,Rtmp2
BMI Rnd_UpToOne
Rnd_DownToZero
MOV OP1mhi,#0
MOV OP1mlo,#0
MOV RNDexp,#0
MOV Rarith,#&40000000
IF Interworking :LOR: Thumbing
BX LR
ELSE
MOV PC,LR
ENDIF
Rnd_UpToOne
MOV OP1mhi,#&FFFFFFFF
MOV OP1mlo,#&FFFFFFFF
MOV RNDexp,#(EIExp_bias-1):AND:&FF00
ORR RNDexp,RNDexp,#(EIExp_bias-1):AND:&FF
ASSERT (EIExp_bias-1) < &10000
MOV Rarith,#&C0000000
IF Interworking :LOR: Thumbing
BX LR
ELSE
MOV PC,LR
ENDIF
Rnd_Exact
; We just need to return the number itself, with rounding bits equal to
; zero.
MOV Rarith,#0
IF Interworking :LOR: Thumbing
BX LR
ELSE
MOV PC,LR
ENDIF
] ; Conditional assembly of Rnd
;===========================================================================
[ :DEF: compare_s :LOR: FPEWanted :LOR: FPASCWanted
; Routine to compare two internal format floating point numbers. It has two
; entry points: "CompareFPE", which has an optimised fast track for common
; vs. common comparisons, and "CompareFPASC", which avoids the test for this
; optimised fast track - since it should never happen. The second entry
; point lies a long way down in the source to avoid addressing constraints.
; This routine will not work correctly with inputs which are unnormalised
; URD results, or with invalid internal format numbers.
; Entry: OP1sue = First operand sign, uncommon, exponent;
; OP1mhi = First operand mantissa, high word;
; OP1mlo = First operand mantissa, low word;
; OP2sue = Second operand sign, uncommon, exponent;
; OP2mhi = Second operand mantissa, high word;
; OP2mlo = Second operand mantissa, low word;
; Rfpsr = FPSR;
; Rins = instruction (needed to discriminate between
; CMF/CMFE/CNF/CNFE and for traps);
; Rwp, Rfp, Rsp hold their usual values;
; R14 = return link.
; Exit: Rarith = result NZCV in bits 31:28; other bits zero;
; OP1sue, OP1mhi, OP1mlo, OP2sue, OP2mhi, OP2mlo, Rtmp, Rtmp2 and R14
; may be corrupt.
; Rfpsr may be updated.
; All other registers preserved.
[ FPEWanted :LOR: FPLibWanted
CompareFPE
[ FPLibWanted
__fp_compare
]
CDebug3 3,"CompareFPE: op1 =",OP1sue,OP1mhi,OP1mlo
CDebug3 3," op2 =",OP2sue,OP2mhi,OP2mlo
; Start by detecting the "fast track" case of both operands being common.
TST OP1sue,#Uncommon_bit
TSTEQ OP2sue,#Uncommon_bit
BNE Compare_Uncommon
]
Compare_Common
; Start by changing the sign of the second operand if the operation is
; CMF(E). (CNF(E) is easier than CMF(E), basically because addition is
; commutative and subtraction isn't.)
[ FPEWanted :LOR: FPASCWanted
TST Rins,#CompNeg_bit
EOREQ OP2sue,OP2sue,#Sign_bit
|
EOR OP2sue,OP2sue,#Sign_bit
]
; Both operands are common. We start with a magnitude comparison - life is
; fairly easy if (as is likely) it comes out not equal. In this case, the
; results are:
;
; Magnitude Operand 1 Operand 2 | Result for
; comparison sign sign | CNF(E)
; ------------------------------------+------------
; > + X | >
; > - X | <
; < X + | >
; < X - | <
ExpComp Rtmp,OP1sue,OP2sue,Rtmp2 ;Rtmp := left-aligned op1 exp.
CMPEQ OP1mhi,OP2mhi
CMPEQ OP1mlo,OP2mlo
BEQ Compare_EqualMag
TEQCS OP1sue,#0 ;NB does not affect C
TEQCC OP2sue,#0
ASSERT Sign_pos = 31
MOVPL Rarith,#Comp_GT
MOVMI Rarith,#Comp_LT
IF Interworking :LOR: Thumbing
BX LR
ELSE
MOV PC,LR
ENDIF
Compare_EqualMag
; If the operands are equal magnitude, then if they're both zero, the
; results is equality. Otherwise, the result is given by the following
; table:
;
; Operand 1 Operand 2 | Result for
; sign sign | CNF(E)
; -----------------------+------------
; + + | >
; + - | =
; - + | =
; - - | <
;
; Of course, since they're equal magnitude, they're both zero if the first
; one is. Note Rtmp still contains a left-aligned operand 1 exponent.
EORS Rtmp2,OP1sue,OP2sue ;Are signs opposite or the same?
ASSERT Sign_pos = 31
MOV Rarith,#Comp_EQ ;Result if signs opposite
IF Interworking :LOR: Thumbing
BXMI LR
ELSE
MOVMI PC,LR
ENDIF
ORR Rtmp,Rtmp,OP1mhi ;Otherwise, are they both zero?
ORRS Rtmp,Rtmp,OP1mlo
IF Interworking :LOR: Thumbing
BXEQ LR
ELSE
MOVEQ PC,LR
ENDIF
TST OP1sue,#Sign_bit
MOVEQ Rarith,#Comp_GT
MOVNE Rarith,#Comp_LT
IF Interworking :LOR: Thumbing
BX LR
ELSE
MOV PC,LR
ENDIF
] ; Conditional assembly of Compare
;===========================================================================
[ FPEWanted :LOR: FPASCWanted :LOR: :DEF: fix_s :LOR: :DEF: fixu_s
; Routine to FIX an internal format floating point number. There are the
; usual two entry points.
; This routine will not work correctly with inputs which are unnormalised
; URD results, or with invalid internal format numbers.
; Entry: OP1sue = Operand sign, uncommon, exponent;
; OP1mhi = Operand mantissa, high word;
; OP1mlo = Operand mantissa, low word;
; Rfpsr = FPSR;
; Rins = instruction (needed for rounding information and traps);
; Rwp, Rfp, Rsp hold their usual values;
; R14 = return link.
; Exit: Rarith = result value;
; OP1sue, OP1mhi, OP1mlo, OP2sue, OP2mhi, OP2mlo, Rtmp, Rtmp2 and R14
; may be corrupt.
; Rfpsr may be updated.
; All other registers preserved.
[ FPEWanted
FixFPE
CDebug3 3,"FixFPE: operand =",OP1sue,OP1mhi,OP1mlo
; Start by splitting between common and uncommon operands.
TST OP1sue,#Uncommon_bit
BNE Fix_Uncommon
]
[ :DEF: fix_s
__fp_fix_common
]
[ :DEF: fixu_s
__fp_fixu_common
]
Fix_Common
; The operand is common. Split OP1sue into sign and biased exponent.
AND Rarith,OP1sue,#ToExp_mask
[ :LNOT: :DEF: fixu_s
AND OP1sue,OP1sue,#Sign_bit
]
Fix_Numeric
; Calculate shift amount to denormalise the number to have effective
; unbiased exponent 63 - i.e. to put the true binary point at the rounding
; boundary.
STMFD Rsp!,{LR} ;There may be a subroutine call below
MOV RNDexp,#((EIExp_bias+63):AND:&FF00)
ORR RNDexp,RNDexp,#((EIExp_bias+63):AND:&FF)
ASSERT (EIExp_bias+63) <= &FFFF
SUBS Rtmp,RNDexp,Rarith
BLS Fix_OutOfRange ;Deal with massively out of range values
; Now denormalise the number to have this unbiased exponent.
Denorm OP1mhi,OP1mlo,Rarith,Rtmp,Rtmp2,Rtmp
; Next, we need to round the result to extended precision.
[ FPEWanted :LOR: FPASCWanted
AND RNDprm,Rins,#RM_mask
ORR RNDprm,RNDprm,#2:SHL:(RM_pos+2)
MOV RNDdir,#0 ;Result has not been rounded so far
BL RoundNum_Extended
|
; Expanded out rounding code
MOVS Rtmp,Rarith,LSL #1 ;C<-round, Z<-"tied case"
BCC Fix_NoRounding ;Skip all rounding code...
MOVEQS Rtmp,OP1mlo,LSR #1 ; If "tied" C<-round
ADDCSS OP1mlo,OP1mlo,#1 ;Increment low word
ADDCSS OP1mlo,OP1mlo,#1 ;If carry out, increment high word
MOVCS OP1mhi,#EIUnits_bit ;If mantissa overflow, adjust
ADDCS RNDexp,RNDexp,#1 ; mantissa and exponent
Fix_NoRounding
]
[ :LNOT: :DEF: fixu_s
; Produce the potential result, checking for an out-of-range value.
; We know at this point that (OP1mhi,OP1mlo) contains the unsigned integer
; result, which is in the range 0 to 2^63, *both ends included*, and that
; OP1sue contains the sign of the result. We first need to apply the sign to
; this value - this is done by some slightly tricky code to avoid branches.
; Note we cannot tell the difference between a result of +2^63 and -2^63
; after this. This doesn't matter, though - they're both well out of range!
MOVS Rtmp,OP1sue,LSL #32-Sign_pos ;CS if -ve, CC if +ve
MVNCS OP1mhi,OP1mhi ;If -ve, 1's compl't high
RSBCSS OP1mlo,OP1mlo,#0 ; word, 2's compl't low word
ADDCS OP1mhi,OP1mhi,#1 ; and do carry if needed
]
; The result is now in (OP1mhi,OP1mlo). Check for it being out of range -
; i.e. for its top 33 bits not being all identical.
TEQ OP1mhi,OP1mlo,ASR #31
BNE Fix_OutOfRange
[ FPEWanted :LOR: FPASCWanted
MOV Rarith,OP1mlo
; The only remaining exception that could occur at this point is an inexact
; result.
; If the result is exact, we don't want to do anything about the inexact
; exception. If it's inexact and the inexact trap is disabled, we want to
; set the inexact cumulative bit in the FPSR. If it's inexact and the
; inexact trap is enabled, we want to call the trap. We use some tricky
; code to distinguish the three cases in-line.
CMP RNDdir,#0 ;Leaves CS/EQ if exact, NE if inexact
MOVNES Rtmp,Rfpsr,LSR #IXE_pos+1
;Now CS/EQ if exact, CS/NE if inexact &
; trap enabled, CC/NE if inexact & trap
ASSERT SysID_FPA <> 0 ; disabled (since SysID non-zero & not
ASSERT SysID_FPE <> 0 ; shifted out)
ASSERT SysID_pos > IXE_pos
ORRCC Rfpsr,Rfpsr,#IXC_bit
BLHI InexactTrapForI ;Works because HI = CS/NE
|
MOV OP1sue,#0 ;Signal no error
]
IF Interworking :LOR: Thumbing
LDMFD Rsp!,{LR}
BX LR
ELSE
LDMFD Rsp!,{PC}
ENDIF
Fix_OutOfRange
; An out of range FIX produces an invalid operation, with a potential result
; of &7FFFFFFF or &80000000, depending on the sign of the operand.
[ FPEWanted :LOR: FPASCWanted
LDMFD Rsp!,{LR}
MOV Rarith,#:NOT:TopBit ;Make &7FFFFFFF
EOR Rarith,Rarith,OP1sue,ASR #31 ;Convert to &80000000 if -ve
MOV Rtmp,#InvReas_FixRange
B InvalidOperation1ForI
|
ORR OP1sue,OP1sue,#IVO_bits
IF Interworking :LOR: Thumbing
LDMFD Rsp!,{LR}
BX LR
ELSE
LDMFD Rsp!,{PC}
ENDIF
]
] ; Conditional assembly of Fix
;===========================================================================
[ :DEF: addsub_s :LOR: FPEWanted :LOR: FPASCWanted
; The second entry point to the addition/subtraction routine, meant for use
; by the FPASC and without a fast track for common operands.
; The value returned is either a numeric value plus associated rounding
; information, with the uncommon bit clear, or an infinity or NaN, with the
; uncommon bit set.
; This routine will not work correctly with inputs which are unnormalised
; URD results, or with invalid internal format numbers.
;
; Uses standard dyadic operation entry and exit conventions - see top of
; this file.
[ FPASCWanted
AddSubFPASC
CDebug3 3,"AddSubFPASC: op1 =",OP1sue,OP1mhi,OP1mlo
CDebug3 3," op2 =",OP2sue,OP2mhi,OP2mlo
]
[ FPLibWanted
__fp_addsub_uncommon
]
AddSub_Uncommon
; We have to do a full addition/subtraction, since either or both of the
; operands may be uncommon. What we will do is:
;
; (a) Check for NaNs. If found, produce an invalid operation exception and
; suitable NaN result.
;
; (b) Check for infinities. If found, the infinity effectively becomes the
; result, unless both operands are infinities and (after taking
; account of whether an addition or subtraction is involved) they are
; effectively of opposite signs.
;
; (c) If no NaNs or infinities, adjust the operands by replacing all
; effectively unnormalised numbers by the corresponding normalised or
; extended denormalised number. Then call AddSub_Common, which will
; work correctly on zeros, normalised numbers and extended
; denormalised numbers.
;
; So the first thing we do is check for NaNs and infinities - if we find
; one, we'll generate the result by special case code. Note that we check
; for them together, since they have similar bit patterns.
TNaNInf Rtmp2,OP2sue,OP2mhi ;Rtmp2[31] := (op2 is NaN/inf)
TNaNInf Rtmp,OP1sue,OP1mhi ;Rtmp[31] := (op1 is NaN/inf)
BMI AddSub_NaNInf1
TST Rtmp2,#TopBit ;Operand 2 NaN or infinity?
BNE AddSub_NaNInf2Only
; Now we know there are no NaNs or infinities and therefore no Invalid
; Operation or Divide-By-Zero exceptions - which means we no longer need to
; keep track of exactly what the operands are. Next, we will convert the
; remaining types of numbers to zeros, normalised numbers and extended
; denormalised numbers, which can be dealt with by a call to AddSub_Common
; and one to NormaliseOp1.
; The types of numbers that require converting are extended unnormalised
; numbers and zeros, and single and double denormalised numbers. In the case
; of the extended unnormalised numbers and zeros, this just requires us to
; normalise them; in the case of the single and double denormalised numbers,
; we need to clear their units bits and add 1 to their exponents before we
; normalise them.
; At this stage, we can recognise that the numbers are single or double
; denormalised numbers simply by the fact that they have uncommon = units =
; 1: all other numbers with this property are NaNs or infinities and have
; been dealt with already.
STMFD Rsp!,{LR} ;We will have subroutine calls below
ANDS Rarith,OP1mhi,OP1sue,LSL #EIUnits_pos-Uncommon_pos
ASSERT EIUnits_pos = 31
BICMI OP1mhi,OP1mhi,#EIUnits_bit
ADDMI OP1sue,OP1sue,#1:SHL:EIExp_pos
ANDS Rarith,OP2mhi,OP2sue,LSL #EIUnits_pos-Uncommon_pos
ASSERT EIUnits_pos = 31
BICMI OP2mhi,OP2mhi,#EIUnits_bit
ADDMI OP2sue,OP2sue,#1:SHL:EIExp_pos
; Now we need to normalise all these types of numbers, which now means all
; uncommon numbers except those with exponent 0 (which are extended
; precision denormalised numbers and should be left alone).
TST OP1sue,#Uncommon_bit
Exp2Top Rarith,OP1sue,NE,S ;Complete test & set up for call
BLNE $NormDenormOp1_str
TST OP2sue,#Uncommon_bit
Exp2Top Rarith,OP2sue,NE,S ;Complete test & set up for call
BLNE $NormDenormOp2_str
; Call AddSub_Common to do the addition, then normalise the result if it
; isn't already normalised and isn't zero. (This is necessary because e.g. a
; magnitude sum of two denormalised numbers will only have been shifted 1
; bit by AddSub_Common.)
BL AddSub_Common
TST OP1mhi,#EIUnits_bit
IF Interworking :LOR: Thumbing
LDMNEFD Rsp!,{LR}
BXNE LR
ELSE
LDMNEFD Rsp!,{PC}
ENDIF
ORRS LR,OP1mhi,OP1mlo
BLNE $NormaliseOp1_str
IF Interworking :LOR: Thumbing
LDMFD Rsp!,{LR}
BX LR
ELSE
LDMFD Rsp!,{PC}
ENDIF
AddSub_NaNInf1
; The first operand is a NaN or infinity, the second may be (the top bit of
; Rtmp2 indicates whether it is).
TST Rtmp2,#TopBit
BEQ AddSub_NaNInf1Only
; Both operands are NaNs or infinities. If both operands are infinities, the
; result is an infinity with their shared sign if they have the same effective
; sign, or an invalid operation if they have opposite effective signs
; ("effective" means after taking ADF/SUF/RSF distinctions into account).
; If either operand is a NaN, the standard exception/NaN propagation rules
; apply.
ORR Rtmp,OP1mlo,OP1mhi,LSL #1 ;Test if both are infinities
ORR Rtmp,Rtmp,OP2mlo
ORRS Rtmp,Rtmp,OP2mhi,LSL #1
BNE $ConvertNaNs_str ;If not, use shared code
BiShift EOR,Rtmp,OP2sue,Rins,LSR #SubNotAdd_pos,LSL #Sign_pos
EORS Rtmp,Rtmp,OP1sue ;Check whether signs are
ASSERT Sign_pos = 31 ; effectively same.
ANDPL Rtmp,OP1sue,#Sign_bit ;If so, result is infinity
BPL AddSub_InfShared ; (with op1 sign unless RSF)
[ FPEWanted :LOR: FPASCWanted
MOV Rtmp,#InvReas_MagSubInf ;If not, it's an invalid
B InvalidOperation2ForSDE ; operation
|
ORR OP1sue,OP1sue,#IVO_bits
IF Interworking :LOR: Thumbing
BX LR
ELSE
MOV PC,LR
ENDIF
]
AddSub_NaNInf1Only
; The first operand is a NaN or infinity, the second isn't. The result is:
; * an invalid operation exception if the first operand is a signalling
; NaN;
; * the first operand unchanged if it is a quiet NaN;
; * the standard infinity if the first operand is an infinity, with its
; sign determined by that of the first operand and whether the
; instruction is RSF.
ORRS Rtmp,OP1mlo,OP1mhi,LSL #1 ;Is operand a NaN?
BNE $ConvertNaN1Of2_str ;Use standard exception/quiet NaN
; propagation code if so
AND Rtmp,OP1sue,#Sign_bit ;Make standard infinity with right
B AddSub_InfShared ; sign
AddSub_NaNInf2Only
; The first operand is not a NaN or infinity, the second is. The result is:
; * an invalid operation exception if the second operand is a signalling
; NaN;
; * the second operand unchanged if it is a quiet NaN;
; * the standard infinity if the second operand is an infinity, with its
; sign determined by that of the second operand and whether the
; instruction is SUF.
ORRS Rtmp,OP2mlo,OP2mhi,LSL #1 ;Is operand a NaN?
BNE $ConvertNaN2Of2_str ;Use standard exception/quiet NaN
; propagation code if so
AND Rtmp,OP2sue,#Sign_bit ;Make standard infinity with right
TST Rins,#SubNotAdd_bit ; sign
EORNE Rtmp,Rtmp,#Sign_bit
AddSub_InfShared
TST Rins,#RSF_bit
EORNE Rtmp,Rtmp,#Sign_bit
[ CoreDebugging = 0
ADR OP1sue,Prototype_Infinity
|
ADRL OP1sue,Prototype_Infinity
]
LDMIA OP1sue,OP1regs
ORR OP1sue,OP1sue,Rtmp
IF Interworking :LOR: Thumbing
BX LR
ELSE
MOV PC,LR
ENDIF
] ; Conditional assembly of AddSub
;===========================================================================
[ :DEF: mul_s :LOR: FPEWanted :LOR: FPASCWanted
; The second entry point to the normal/fast multiplication routine, meant
; for use by the FPASC and without a fast track for common operands.
; The value returned is either a numeric value plus associated rounding
; information, with the uncommon bit clear, or an infinity or NaN, with the
; uncommon bit set.
; This routine will not work correctly with inputs which are unnormalised
; URD results, or with invalid internal format numbers.
;
; Uses standard dyadic operation entry and exit conventions - see top of
; this file.
[ FPASCWanted
MultFPASC
CDebug3 3,"MultFPASC: op1 =",OP1sue,OP1mhi,OP1mlo
CDebug3 3," op2 =",OP2sue,OP2mhi,OP2mlo
]
[ FPLibWanted
__fp_mult_uncommon
]
Mult_Uncommon
; We have to do a full multiplication, since either or both of the operands
; may be uncommon. What we will do is:
;
; (a) Check for NaNs. If found, produce an invalid operation exception and
; suitable NaN result.
;
; (b) Check for infinities. If found, the result is an infinity with sign
; equal to the exclusive-OR of the two operand signs, unless the other
; operand is a zero, in which case we have an invalid operation.
;
; (c) Check for zeros. If found, the result is a zero with sign equal to
; the exclusive-OR of the two operand signs.
;
; (d) If no NaNs, infinities or zeros, we can transform the problem into
; that of multiplying together two normalised numbers, though the
; normalised numbers concerned may have unusual exponents.
;
; So the first thing we do is check for NaNs and infinities - if we find
; one, we'll generate the result by special case code. Note that we check
; for them together, since they have similar bit patterns.
TNaNInf Rtmp2,OP2sue,OP2mhi ;Rtmp2[31] := (op2 is NaN/inf)
TNaNInf Rtmp,OP1sue,OP1mhi ;Rtmp[31] := (op1 is NaN/inf)
BMI Mult_NaNInf1
TST Rtmp2,#TopBit ;Operand 2 NaN or infinity?
BNE Mult_NaNInf2Only
; Now if either operand is a zero, the result is zero. We can detect zeros
; by the mantissa being all zero, since only zeros, some unnormalised URD
; results, extended unnormalised zeros and extended infinities have this
; property, we're assuming the operands are not URD results and we've
; already dealt with extended infinities.
ORRS Rtmp,OP1mhi,OP1mlo
ORRNES Rtmp,OP2mhi,OP2mlo
BEQ Mult_Zero
; Both operands are now normalised numbers, denormalised numbers or extended
; unnormalised non-zero numbers. The first step is to convert all of these
; to normalised numbers, possibly with a negative biased exponent. After
; doing the exponent and sign calculations, we then call Mult_Mantissas to
; complete the calculation.
; The types of numbers that require converting are extended unnormalised
; numbers and denormalised numbers of all precisions. In the case of the
; extended denormalised and unnormalised numbers, this just requires us to
; normalise them; in the case of the single and double denormalised numbers,
; we need to clear their units bits and add 1 to their exponents before we
; normalise them.
; At this stage, we can recognise that the numbers are single or double
; denormalised numbers simply by the fact that they have uncommon = units =
; 1: all other numbers with this property are NaNs or infinities and have
; been dealt with already.
ANDS Rarith,OP1mhi,OP1sue,LSL #EIUnits_pos-Uncommon_pos
ASSERT EIUnits_pos = 31
BICMI OP1mhi,OP1mhi,#EIUnits_bit
ADDMI OP1sue,OP1sue,#1:SHL:EIExp_pos
ANDS Rarith,OP2mhi,OP2sue,LSL #EIUnits_pos-Uncommon_pos
ASSERT EIUnits_pos = 31
BICMI OP2mhi,OP2mhi,#EIUnits_bit
ADDMI OP2sue,OP2sue,#1:SHL:EIExp_pos
AND Rtmp,OP1sue,#ToExp_mask
AND Rtmp2,OP2sue,#ToExp_mask
EOR OP1sue,OP1sue,OP2sue ;Produce result sign
AND OP1sue,OP1sue,#Sign_bit
ADD RNDexp,Rtmp,Rtmp2
SUB RNDexp,RNDexp,#(EIExp_bias-1):AND:&FF00
SUB RNDexp,RNDexp,#(EIExp_bias-1):AND:&FF
ASSERT (EIExp_bias-1) < &10000 ;Result exponent if mantissa
; overflow is exp1+exp2-bias+1
STMFD Rsp!,{LR} ;We will have subroutine calls below
TST OP1mhi,#EIUnits_bit
BLEQ $NormaliseOp1_str
TST OP2mhi,#EIUnits_bit
BLEQ $NormaliseOp2_str
LDMFD Rsp!,{LR}
B Mult_Mantissas
Mult_Zero
; The result is zero.
EOR OP1sue,OP1sue,OP2sue ;Get sign right
AND OP1sue,OP1sue,#Sign_bit
MOV OP1mhi,#0
MOV OP1mlo,#0
MOV RNDexp,#0 ;And exponent
MOV Rarith,#0 ;And round/sticky bits
IF Interworking :LOR: Thumbing
BX LR
ELSE
MOV PC,LR
ENDIF
Mult_NaNInf1
; The first operand is a NaN or infinity, the second may be (the top bit of
; Rtmp2 indicates whether it is).
TST Rtmp2,#TopBit
BEQ Mult_NaNInf1Only
; Both operands are NaNs or infinities. If both operands are infinities, the
; result is an infinity with sign determined by those of the two operands.
; If either operand is a NaN, the standard exception/NaN propagation rules
; apply.
ORR Rtmp,OP1mlo,OP1mhi,LSL #1 ;Test if both are infinities
ORR Rtmp,Rtmp,OP2mlo
ORRS Rtmp,Rtmp,OP2mhi,LSL #1
BNE $ConvertNaNs_str ;If not, use shared code
Mult_InfShared
EOR Rtmp,OP1sue,OP2sue ;If so, result is infinity
AND Rtmp,Rtmp,#Sign_bit ; with correct sign
ADR OP1sue,Prototype_Infinity
LDMIA OP1sue,OP1regs
ORR OP1sue,OP1sue,Rtmp
IF Interworking :LOR: Thumbing
BX LR
ELSE
MOV PC,LR
ENDIF
Mult_NaNInf1Only
; The first operand is a NaN or infinity, the second isn't. The result is:
; * an invalid operation exception if the first operand is a signalling
; NaN;
; * the first operand unchanged if it is a quiet NaN;
; * an invalid operation exception if the first operand is an infinity and
; the second is a zero;
; * the standard infinity if the first operand is an infinity and the
; second operand is not a zero, with its sign determined by those of the
; two operands.
; Note that we can detect the second operand being zero by its mantissa
; being all zero, since only zeros, some unnormalised URD results, extended
; unnormalised zeros and extended infinities have this property, we're
; assuming the operands are not URD results and we know the second operand
; isn't an extended infinity.
ORRS Rtmp,OP1mlo,OP1mhi,LSL #1 ;Is first operand a NaN?
BNE $ConvertNaN1Of2_str ;Use standard exception/quiet NaN
; propagation code if so
ORRS Rtmp,OP2mhi,OP2mlo ;Is second operand a zero?
BNE Mult_InfShared ;If not, result is an infinity
[ FPEWanted :LOR: FPASCWanted
MOV Rtmp,#InvReas_InfTimes0 ;Otherwise, an invalid operation
B InvalidOperation2ForSDE
|
ORR OP1sue,OP1sue,#IVO_bits
IF Interworking :LOR: Thumbing
BX LR
ELSE
MOV PC,LR
ENDIF
]
Mult_NaNInf2Only
; The first operand is not a NaN or infinity, the second is. The result is:
; * an invalid operation exception if the second operand is a signalling
; NaN;
; * the second operand unchanged if it is a quiet NaN;
; * an invalid operation exception if the first operand is a zero and the
; second is an infinity;
; * the standard infinity if the first operand is not a zero and the second
; operand is an infinity, with its sign determined by those of the two
; operands.
; Note that we can detect the first operand being zero by its mantissa being
; all zero, since only zeros, some unnormalised URD results, extended
; unnormalised zeros and extended infinities have this property, we're
; assuming the operands are not URD results and we know it isn't an extended
; infinity.
ORRS Rtmp,OP2mlo,OP2mhi,LSL #1 ;Is second operand a NaN?
BNE $ConvertNaN2Of2_str ;Use standard exception/quiet NaN
; propagation code if so
ORRS Rtmp,OP1mhi,OP1mlo ;Is first operand a zero?
BNE Mult_InfShared ;If not, result is an infinity
[ FPEWanted :LOR: FPASCWanted
MOV Rtmp,#InvReas_0TimesInf ;Otherwise, an invalid operation
B InvalidOperation2ForSDE
|
ORR OP1sue,OP1sue,#IVO_bits
IF Interworking :LOR: Thumbing
BX LR
ELSE
MOV PC,LR
ENDIF
]
]
;===========================================================================
[ :DEF: div_s :LOR: FPEWanted :LOR: FPASCWanted
; The second entry point to the normal/fast division/reverse division
; routine, meant for use by the FPASC and without a fast track for common
; operands.
; The value returned is either a numeric value plus associated rounding
; information, with the uncommon bit clear, or an infinity or NaN, with the
; uncommon bit set.
; This routine will not work correctly with inputs which are unnormalised
; URD results, or with invalid internal format numbers.
;
; Uses standard dyadic operation entry and exit conventions - see top of
; this file.
[ FPASCWanted
DivFPASC
CDebug3 3,"DivFPASC: op1 =",OP1sue,OP1mhi,OP1mlo
CDebug3 3," op2 =",OP2sue,OP2mhi,OP2mlo
]
[ FPLibWanted
__fp_div_uncommon
__fp_rdv_uncommon
]
Div_Uncommon
; We have to do a full division, since either or both of the operands may be
; uncommon. What we will do is:
;
; (a) Check for NaNs. If found, produce an invalid operation exception and
; suitable NaN result.
;
; (b) Check for infinities. If found, the result is:
; * An invalid operation exception if both operands are infinities;
; * An infinite result if the dividend is an infinity and the
; divisor is numeric;
; * A zero result if the dividend is numeric and the divisor is an
; infinity;
;
; (c) Check for zeros. If found, the result is:
; * An invalid operation exception if both operands are zeros;
; * A divide-by-zero exception if the dividend is non-zero and the
; divisor is zero;
; * A zero if the dividend is zero and the divisor is non-zero.
;
; (d) If no NaNs, infinities or zeros, we can transform the problem into
; that of dividing a normalised number by another, though the
; normalised numbers concerned may have unusual exponents.
;
; So the first thing we do is check for NaNs and infinities - if we find
; one, we'll generate the result by special case code. Note that we check
; for them together, since they have similar bit patterns.
TNaNInf Rtmp2,OP2sue,OP2mhi ;Rtmp2[31] := (op2 is NaN/inf)
TNaNInf Rtmp,OP1sue,OP1mhi ;Rtmp[31] := (op1 is NaN/inf)
BMI Div_NaNInf1
TST Rtmp2,#TopBit ;Operand 2 NaN or infinity?
BNE Div_NaNInf2Only
; Now if either operand is a zero, we need to take special action. We can
; detect zeros by the mantissa being all zero, since only zeros, some
; unnormalised URD results, extended unnormalised zeros and extended
; infinities have this property, we're assuming the operands are not URD
; results and we've already dealt with extended infinities.
[ FPEWanted :LOR: FPASCWanted
ORRS Rtmp,OP1mhi,OP1mlo
ORRNES Rtmp,OP2mhi,OP2mlo
BEQ Div_Zero
; Both operands are now going to be converted to normalised numbers. We now
; know that we are not going to need to know the operands for trap purposes,
; so we can swap them if this is a normal (rather than reverse) division.
TST Rins,#RevDiv_bit
|
TST Rins,#Reverse
]
BNE Div_Uncommon_Swapped
MOV Rtmp,OP1sue
MOV OP1sue,OP2sue
MOV OP2sue,Rtmp
MOV Rtmp,OP1mhi
MOV OP1mhi,OP2mhi
MOV OP2mhi,Rtmp
MOV Rtmp,OP1mlo
MOV OP1mlo,OP2mlo
MOV OP2mlo,Rtmp
Div_Uncommon_Swapped
; Both operands are now normalised numbers, denormalised numbers or extended
; unnormalised non-zero numbers. The first step is to convert all of these
; to normalised numbers, possibly with a negative biased exponent. After
; doing the exponent and sign calculations, we then call Div_Mantissas to
; complete the calculation.
; The types of numbers that require converting are extended unnormalised
; numbers and denormalised numbers of all precisions. In the case of the
; extended denormalised and unnormalised numbers, this just requires us to
; normalise them; in the case of the single and double denormalised numbers,
; we need to clear their units bits and add 1 to their exponents before we
; normalise them.
; At this stage, we can recognise that the numbers are single or double
; denormalised numbers simply by the fact that they have uncommon = units =
; 1: all other numbers with this property are NaNs or infinities and have
; been dealt with already.
ANDS Rarith,OP1mhi,OP1sue,LSL #EIUnits_pos-Uncommon_pos
ASSERT EIUnits_pos = 31
BICMI OP1mhi,OP1mhi,#EIUnits_bit
ADDMI OP1sue,OP1sue,#1:SHL:EIExp_pos
ANDS Rarith,OP2mhi,OP2sue,LSL #EIUnits_pos-Uncommon_pos
ASSERT EIUnits_pos = 31
BICMI OP2mhi,OP2mhi,#EIUnits_bit
ADDMI OP2sue,OP2sue,#1:SHL:EIExp_pos
AND Rtmp,OP1sue,#ToExp_mask
AND Rtmp2,OP2sue,#ToExp_mask
EOR OP1sue,OP1sue,OP2sue ;Produce result sign
AND OP1sue,OP1sue,#Sign_bit
SUB RNDexp,Rtmp2,Rtmp
ADD RNDexp,RNDexp,#EIExp_bias:AND:&FF00
ADD RNDexp,RNDexp,#EIExp_bias:AND:&FF
ASSERT EIExp_bias < &10000 ;Result exponent if no mantissa
; underflow is exp1-exp2+bias
STMFD Rsp!,{LR} ;We will have subroutine calls below
TST OP1mhi,#EIUnits_bit
BLEQ $NormaliseOp1Neg_str
TST OP2mhi,#EIUnits_bit
BLEQ $NormaliseOp2_str
LDMFD Rsp!,{LR}
B Div_Mantissas
[ FPEWanted :LOR: FPASCWanted
Div_Zero
; One or both operands are zeros, and both are numeric values (i.e. not NaNs
; or infinities). The result is:
; * An invalid operation exception if both operands are zeros;
; * A divide-by-zero exception if the dividend is non-zero and the divisor
; is zero;
; * A zero if the dividend is zero and the divisor is non-zero.
;
; Split according to whether this is a normal or reverse division.
MOV Rtmp,#InvReas_0Div0 ;The only type of invalid operation
; that occurs below
TST Rins,#RevDiv_bit
BNE Div_Zero_Reversed
; It's a normal division - check the three cases above.
ORRS Rtmp2,OP1mhi,OP1mlo ;Check dividend
BNE DivideByZero2
ORRS Rtmp2,OP2mhi,OP2mlo ;Check divisor
BEQ InvalidOperation2ForSDE
Div_ZeroByX
; The result is zero.
EOR OP1sue,OP1sue,OP2sue ;Get sign right
AND OP1sue,OP1sue,#Sign_bit ;Uncommon bit is zero
MOV OP1mhi,#0 ;So is mantissa
MOV OP1mlo,#0
MOV RNDexp,#0 ;And exponent
MOV Rarith,#0 ;And round/sticky bits
IF Interworking :LOR: Thumbing
BX LR
ELSE
MOV PC,LR
ENDIF
Div_Zero_Reversed
; It's a reverse division - check the three cases above.
ORRS Rtmp2,OP1mhi,OP1mlo ;Check divisor
BNE Div_ZeroByX
ORRS Rtmp2,OP2mhi,OP2mlo ;Check dividend
BNE DivideByZero2
B InvalidOperation2ForSDE
]
Div_NaNInf1
; The first operand is a NaN or infinity, the second may be (the top bit of
; Rtmp2 indicates whether it is).
TST Rtmp2,#TopBit
BEQ Div_NaNInf1Only
; Both operands are NaNs or infinities. If both operands are infinities, the
; result is an invalid operation.
; If either operand is a NaN, the standard exception/NaN propagation rules
; apply.
ORR Rtmp,OP1mlo,OP1mhi,LSL #1 ;Test if both are infinities
ORR Rtmp,Rtmp,OP2mlo
ORRS Rtmp,Rtmp,OP2mhi,LSL #1
BNE $ConvertNaNs_str ;If not, use shared code
[ FPEWanted :LOR: FPASCWanted
MOV Rtmp,#InvReas_InfDivInf
B InvalidOperation2ForSDE
|
ORR OP1sue,OP1sue,#IVO_bits
IF Interworking :LOR: Thumbing
BX LR
ELSE
MOV PC,LR
ENDIF
]
Div_NaNInf1Only
; The first operand is a NaN or infinity, the second isn't. The result is:
; * an invalid operation exception if the first operand is a signalling
; NaN;
; * the first operand unchanged if it is a quiet NaN;
; * a standard infinity with sign equal to the exclusive-OR of the two
; operand signs if the first operand is an infinity and the instruction
; is a normal division;
; * a zero with sign equal to the exclusive-OR of the two operand signs if
; the first operand is an infinity and the instruction is a reverse
; division.
ORRS Rtmp,OP1mlo,OP1mhi,LSL #1 ;Is first operand a NaN?
BNE $ConvertNaN1Of2_str ;Use standard exception/quiet NaN
; propagation code if so
EOR Rtmp,OP1sue,OP2sue
AND Rtmp,Rtmp,#Sign_bit
[ FPASCWanted :LOR: FPEWanted
TST Rins,#RevDiv_bit
|
TST Rins,#Reverse
]
ADREQ OP1sue,Prototype_Infinity
ADRNE OP1sue,Prototype_Zero
LDMIA OP1sue,OP1regs
ORR OP1sue,OP1sue,Rtmp
MOV RNDexp,#0 ;These two are only needed when
MOV Rarith,#0 ; result is zero
IF Interworking :LOR: Thumbing
BX LR
ELSE
MOV PC,LR
ENDIF
Div_NaNInf2Only
; The first operand is not a NaN or infinity, the second is. The result is:
; * an invalid operation exception if the second operand is a signalling
; NaN;
; * the second operand unchanged if it is a quiet NaN;
; * a standard infinity with sign equal to the exclusive-OR of the two
; operand signs if the first operand is an infinity and the instruction
; is a reverse division;
; * a zero with sign equal to the exclusive-OR of the two operand signs if
; the first operand is an infinity and the instruction is a normal
; division.
ORRS Rtmp,OP2mlo,OP2mhi,LSL #1 ;Is second operand a NaN?
BNE $ConvertNaN2Of2_str ;Use standard exception/quiet NaN
; propagation code if so
EOR Rtmp,OP1sue,OP2sue
AND Rtmp,Rtmp,#Sign_bit
[ FPEWanted :LOR: FPASCWanted
TST Rins,#RevDiv_bit
|
TST Rins,#Reverse
]
ADRNE OP1sue,Prototype_Infinity
ADREQ OP1sue,Prototype_Zero
LDMIA OP1sue,OP1regs
ORR OP1sue,OP1sue,Rtmp
MOV RNDexp,#0 ;These two are only needed when
MOV Rarith,#0 ; result is zero
IF Interworking :LOR: Thumbing
BX LR
ELSE
MOV PC,LR
ENDIF
]
;===========================================================================
[ :DEF: fmod_s :LOR: FPEWanted :LOR: FPASCWanted
; The second part of the IEEE remainder function.
Rem_Uncommon
; One or both of the operands may be uncommon. What we will do is:
;
; (a) Check for NaNs. If found, produce an invalid operation exception and
; suitable NaN result.
;
; (b) Check for infinities. If found, the result is:
; * An invalid operation exception if the first operand is an
; infinity.
; * Equal to the first operand if the second operand is an infinity
; and the first isn't.
;
; (c) Check for zeros. If found, the result is:
; * An invalid operation exception if the second operand is a zero;
; * Equal to the first operand if the first operand is a zero and
; the second isn't;
;
; (d) If no NaNs, infinities or zeros, we can transform the problem into
; that of doing the remainder of one normalised number by another,
; though the normalised numbers concerned may have unusual exponents.
;
; So the first thing we do is check for NaNs and infinities - if we find
; one, we'll generate the result by special case code. Note that we check
; for them together, since they have similar bit patterns.
TNaNInf Rtmp2,OP2sue,OP2mhi ;Rtmp2[31] := (op2 is NaN/inf)
TNaNInf Rtmp,OP1sue,OP1mhi ;Rtmp[31] := (op1 is NaN/inf)
BMI Rem_NaNInf1
TST Rtmp2,#TopBit ;Operand 2 NaN or infinity?
BNE Rem_NaNInf2Only
; Now if the second operand is a zero, we've got an invalid operation, and
; if it isn't but the first operand is, we've got a result equal to the
; first operand. We can detect zeros by the mantissa being all zero, since
; only zeros, some unnormalised URD results, extended unnormalised zeros and
; extended infinities have this property, we're assuming the operands are
; not URD results and we've already dealt with extended infinities.
ORRS Rtmp,OP2mhi,OP2mlo
[ FPEWanted :LOR: FPASCWanted
MOVEQ Rtmp,#InvReas_XRem0
BEQ InvalidOperation2ForSDE
|
ORREQ OP1sue,OP1sue,#IVO_bits
IF Interworking :LOR: Thumbing
BXEQ LR
ELSE
MOVEQ PC,LR
ENDIF
]
ORRS Rarith,OP1mhi,OP1mlo
BEQ Rem_FirstOperand_Zero
; Both operands may now be forced to be normalised numbers - after we've
; dealt with signs and exponents, we can rejoin the main code.
; The types of numbers that require converting are extended unnormalised
; numbers and denormalised numbers of all precisions. In the case of the
; extended denormalised and unnormalised numbers, this just requires us to
; normalise them; in the case of the single and double denormalised numbers,
; we need to clear their units bits and add 1 to their exponents before we
; normalise them.
; At this stage, we can recognise that the numbers are single or double
; denormalised numbers simply by the fact that they have uncommon = units =
; 1: all other numbers with this property are NaNs or infinities and have
; been dealt with already.
ANDS Rarith,OP1mhi,OP1sue,LSL #EIUnits_pos-Uncommon_pos
ASSERT EIUnits_pos = 31
BICMI OP1mhi,OP1mhi,#EIUnits_bit
ADDMI OP1sue,OP1sue,#1:SHL:EIExp_pos
ANDS Rarith,OP2mhi,OP2sue,LSL #EIUnits_pos-Uncommon_pos
ASSERT EIUnits_pos = 31
BICMI OP2mhi,OP2mhi,#EIUnits_bit
ADDMI OP2sue,OP2sue,#1:SHL:EIExp_pos
STMFD Rsp!,{LR} ;We will have subroutine calls below
AND RNDexp,OP2sue,#ToExp_mask ;Raw second operand exponent
TST OP2mhi,#EIUnits_bit ;Normalise second operand,
BLEQ $NormaliseOp2_str ; then adjust to get
SUB Rtmp2,RNDexp,#1 ; prospective result exp.
AND RNDexp,OP1sue,#ToExp_mask ;Raw first operand exponent
TST OP2mhi,#EIUnits_bit ;Normalise first operand
BLEQ $NormaliseOp1_str ; then determine the number
SUBS Rarith,RNDexp,Rtmp2 ; of iterations - 1
MOV RNDexp,Rtmp2 ;Get prospective result exp.
; back where it's wanted
; All the special exponent handling is done, so we might as well rejoin the
; main code.
B Rem_ExponentsDone
Rem_NaNInf1
; The first operand is a NaN or infinity, the second may be (the top bit of
; Rtmp2 indicates whether it is).
TST Rtmp2,#TopBit
BEQ Rem_NaNInf1Only
; Both operands are NaNs or infinities. If both operands are infinities, the
; result is an invalid operation.
; If either operand is a NaN, the standard exception/NaN propagation rules
; apply.
ORR Rtmp,OP1mlo,OP1mhi,LSL #1 ;Test if both are infinities
ORR Rtmp,Rtmp,OP2mlo
ORRS Rtmp,Rtmp,OP2mhi,LSL #1
BNE $ConvertNaNs_str ;If not, use shared code
[ FPEWanted :LOR: FPASCWanted
MOV Rtmp,#InvReas_InfRemX
B InvalidOperation2ForSDE
|
ORR OP1sue,OP1sue,#IVO_bits
IF Interworking :LOR: Thumbing
BX LR
ELSE
MOV PC,LR
ENDIF
]
Rem_NaNInf1Only
; The first operand is a NaN or infinity, the second isn't. The result is:
; * an invalid operation exception if the first operand is a signalling
; NaN;
; * the first operand unchanged if it is a quiet NaN;
; * an invalid operation if it is an infinity.
ORRS Rtmp,OP1mlo,OP1mhi,LSL #1 ;Is first operand a NaN?
BNE $ConvertNaN1Of2_str ;Use standard exception/quiet NaN
; propagation code if so
[ FPEWanted :LOR: FPASCWanted
MOV Rtmp,#InvReas_InfRemX
B InvalidOperation2ForSDE
|
ORR OP1sue,OP1sue,#IVO_bits
IF Interworking :LOR: Thumbing
BX LR
ELSE
MOV PC,LR
ENDIF
]
Rem_NaNInf2Only
; The first operand is not a NaN or infinity, the second is. The result is:
; * an invalid operation exception if the second operand is a signalling
; NaN;
; * the second operand unchanged if it is a quiet NaN;
; * equal to the first operand if the second operand is an infinity.
ORRS Rtmp,OP2mlo,OP2mhi,LSL #1 ;Is second operand a NaN?
BNE $ConvertNaN2Of2_str ;Use standard exception/quiet NaN
; propagation code if so
Rem_FirstOperand
; If the first operand is common, life is easy.
TST OP1sue,#Uncommon_bit
ANDEQ RNDexp,OP1sue,#ToExp_mask
ANDEQ OP1sue,OP1sue,#Sign_bit
MOVEQ Rarith,#0
IF Interworking :LOR: Thumbing
BXEQ LR
ELSE
MOVEQ PC,LR
ENDIF
; If it's uncommon, life is trickier. First check for zeros.
ORRS Rarith,OP1mhi,OP1mlo
BEQ Rem_FirstOperand_Zero
; The operand is now a denormalised number or extended unnormalised non-zero
; number; it needs conversion to an internal precision number. In the case
; of the extended denormalised and unnormalised numbers, this just requires
; us to normalise them; in the case of the single and double denormalised
; numbers, we need to clear their units bits and add 1 to their exponents
; before we normalise them.
;
; At this stage, we can recognise that the numbers are single or double
; denormalised numbers simply by the fact that they have a units bit of 1:
; all other uncommon numbers with this property are NaNs or infinities and
; have been dealt with already.
AND RNDexp,OP1sue,#ToExp_mask ;Extract operand exponent
AND OP1sue,OP1sue,#Sign_bit ; and its sign
TST OP1mhi,#EIUnits_bit
BICNE OP1mhi,OP1mhi,#EIUnits_bit
ADDNE RNDexp,RNDexp,#1
MOV Rarith,#0 ;Result is exact.
B $NormaliseOp1_str ;NB must be necessary, so no
; point in checking whether
; normalised
Rem_FirstOperand_Zero
AND OP1sue,OP1sue,#Sign_bit
MOV RNDexp,#0 ;We already know OP1mhi, OP1mlo and
IF Interworking :LOR: Thumbing
BX LR
ELSE
MOV PC,LR ; Rarith are zero
ENDIF
]
;===========================================================================
Prototype_Zero
DCD &00000000,&00000000,&00000000
Prototype_Infinity
DCD &40007FFF,&00000000,&00000000
;===========================================================================
[ :DEF: sqrt_s :LOR: FPEWanted :LOR: FPASCWanted
; The second part of the square root routine, which deals with uncommon
; operands.
[ FPLibWanted
__fp_sqrt_uncommon
]
Sqrt_Uncommon
; We have to deal with the square root of an uncommon value. The cases are:
;
; * The square root of a signalling NaN is an invalid operation;
;
; * The square root of a quiet NaN is the NaN itself;
;
; * The square root of plus infinity is plus infinity;
;
; * The square root of minus infinity is an invalid operation;
;
; * The square root of an extended unnormalised zero is a zero of the same
; sign;
;
; * The square roots of denormalised numbers and extended unnormalised
; numbers can be determined by transforming them into normalised numbers
; (possibly with an out-of-range exponent), then using the standard
; square root code above.
;
; So the first thing we do is check for NaNs and infinities - if we find
; one, we'll generate the result by special case code. Note that we check
; for them together, since they have similar bit patterns.
TNaNInf Rtmp,OP1sue,OP1mhi ;Rtmp[31] := (op is NaN/inf)
BMI Sqrt_NaNInf
; Now if the operand is a zero, the result is a zero of the same sign. We
; can detect zeros by the mantissa being all zero, since only zeros, some
; unnormalised URD results, extended unnormalised zeros and extended
; infinities have this property, we're assuming the operand is not a URD
; result and we've already dealt with extended infinities.
ORRS Rtmp,OP1mhi,OP1mlo
ANDEQ OP1sue,OP1sue,#Sign_bit
BEQ Sqrt_Zero
; The operand is now a denormalised number or extended unnormalised non-zero
; number. If it is negative, we've got an invalid operation. Otherwise, we
; know that no invalid operation or divide-by-zero exception is going to
; occur, so we can convert it to a normalised number, possibly with a
; negative biased exponent. After doing the exponent and sign calculations,
; we then call Sqrt_Mantissa to complete the calculation.
; The types of numbers that require converting are extended unnormalised
; numbers and denormalised numbers of all precisions. In the case of the
; extended denormalised and unnormalised numbers, this just requires us to
; normalise them; in the case of the single and double denormalised numbers,
; we need to clear their units bits and add 1 to their exponents before we
; normalise them.
; At this stage, we can recognise that the numbers are single or double
; denormalised numbers simply by the fact that they have a units bit of 1:
; all other numbers with this property are NaNs or infinities and have
; been dealt with already.
AND RNDexp,OP1sue,#ToExp_mask ;Extract operand exponent
ANDS OP1sue,OP1sue,#Sign_bit
[ FPEWanted :LOR: FPASCWanted
MOVNE Rtmp,#InvReas_SqrtNeg
BNE InvalidOperation1ForSDE
|
ORRNE OP1sue,OP1sue,#IVO_bits
IF Interworking :LOR: Thumbing
BX LR
ELSE
MOV PC,LR
ENDIF
]
STMFD Rsp!,{LR} ;We will have subroutine calls below
TST OP1mhi,#EIUnits_bit
BICNE OP1mhi,OP1mhi,#EIUnits_bit
ADDNE RNDexp,RNDexp,#1
BL $NormaliseOp1_str ;NB must be necessary, so no
; point in checking whether
; normalised
ADD RNDexp,RNDexp,#EIExp_bias:AND:&FF00
ADD RNDexp,RNDexp,#EIExp_bias:AND:&FF
ASSERT (EIExp_bias-1) < &10000 ;Result exponent if mantissa
; overflow is (exp+bias) DIV 2
MOVS RNDexp,RNDexp,LSR #1
LDMFD Rsp!,{LR}
B Sqrt_Mantissa
Sqrt_Zero
; The result is equal to the operand, which is a zero.
MOV RNDexp,#0 ;Clear exponent
MOV Rarith,#0 ;And round/sticky bits
IF Interworking :LOR: Thumbing
BX LR
ELSE
MOV PC,LR
ENDIF
Sqrt_NaNInf
; The operand is a NaN or infinity. If it's a NaN, we use the standard
; rules for propagating NaNs. If an infinity, we've got an invalid operation
; if it is negative and a result equal to the standard plus infinity if it
; is positive.
ORRS Rtmp,OP1mlo,OP1mhi,LSL #1 ;Is operand a NaN?
BNE $ConvertNaN1_str ;Use standard exception/quiet NaN
; propagation code if so
TST OP1sue,#Sign_bit
[ FPEWanted :LOR: FPASCWanted
MOVNE Rtmp,#InvReas_SqrtNeg
BNE InvalidOperation1ForSDE
ADR OP1sue,Prototype_Infinity
LDMIA OP1sue,OP1regs
|
ORRNE OP1sue,OP1sue,#IVO_bits
ADREQ OP1sue,Prototype_Infinity
LDMEQIA OP1sue,OP1regs
]
IF Interworking :LOR: Thumbing
BX LR
ELSE
MOV PC,LR
ENDIF
]
;===========================================================================
[ FPEWanted :LOR: FPASCWanted
; The second entry point to the move/move negated/absolute value routine,
; meant for use by the FPASC.
; This routine will not work correctly with an input which is an
; unnormalised URD result, or an invalid internal format number.
;
; Uses standard monadic operation entry and exit conventions - see top of
; this file.
[ FPASCWanted
MoveFPASC
CDebug3 3,"MoveFPASC: operand =",OP1sue,OP1mhi,OP1mlo
; The FPA does not bounce common values in the Prepare stage for these
; instructions, so no need to check the uncommon bit.
]
Move_Uncommon
; Only uncommon values will get here. First split out NaNs and infinities.
TNaNInf Rtmp,OP1sue,OP1mhi ;Rtmp[31] := (op is NaN/inf)
BMI Move_NaNInf
; The value is an uncommon numeric value - i.e. a denormalised number, an
; extended unnormalised number or an extended unnormalised zero. If it's the
; last of these, change it to a real zero and treat it as a numeric.
ORRS Rtmp,OP1mhi,OP1mlo
MOVEQ RNDexp,#0
BEQ Move_Numeric
; The operand is now a denormalised number or extended unnormalised non-zero
; number. We will change it into the corresponding normalised number
; (possibly with a negative biased exponent), then treat it as a numeric.
; The types of numbers that require converting are extended unnormalised
; numbers and denormalised numbers of all precisions. In the case of the
; extended denormalised and unnormalised numbers, this just requires us to
; normalise them; in the case of the single and double denormalised numbers,
; we need to clear their units bits and add 1 to their exponents before we
; normalise them.
; At this stage, we can recognise that the numbers are single or double
; denormalised numbers simply by the fact that they have uncommon = units =
; 1: all other numbers with this property are NaNs or infinities and have
; been dealt with already.
AND RNDexp,OP1sue,#ToExp_mask
ASSERT EIExp_pos = 0
STMFD Rsp!,{LR} ;We will have subroutine calls below
ANDS Rarith,OP1mhi,OP1sue,LSL #EIUnits_pos-Uncommon_pos
ASSERT EIUnits_pos = 31
BICMI OP1mhi,OP1mhi,#EIUnits_bit
ADDMI RNDexp,RNDexp,#1
BL $NormaliseOp1_str ;NB must be necessary, so no
; point in checking whether
; normalised
LDMFD Rsp!,{LR}
B Move_Numeric
Move_NaNInf
; The operand is a NaN or infinity. If it's an infinity, we just want to
; perform the standard sign manipulations on it and return a standard
; infinity. If it's a NaN, we need to pay attention to the implicit IEEE
; format conversion.
ORRS Rtmp,OP1mlo,OP1mhi,LSL #1 ;Is operand a NaN?
BNE Move_NaN
AND Rtmp,OP1sue,#Sign_bit ;Isolate sign
TST Rins,#MNF_bit ;Do sign manipulations
EORNE Rtmp,Rtmp,#Sign_bit
TST Rins,#ABS_bit
BICNE Rtmp,Rtmp,#Sign_bit
ADR OP1sue,Prototype_Infinity
LDMIA OP1sue,OP1regs
ORR OP1sue,OP1sue,Rtmp
IF Interworking :LOR: Thumbing
BX LR
ELSE
MOV PC,LR
ENDIF
Move_NaN
STMFD Rsp!,{LR}
BL NaNConversionNeeded
TEQ Rarith,#0 ;Conversion needed?
BMI Move_NaN_DoSigns ;Just alter signs if not
BL ConvertNaN1_Special ;Do correct NaN conversion
IF Interworking :LOR: Thumbing
LDMNEFD Rsp!,{LR} ;We're done and must *not* alter
; signs if an invalid operation trap
; occurred
BXNE LR
ELSE
LDMNEFD Rsp!,{PC} ;We're done and must *not* alter
; signs if an invalid operation trap
; occurred
ENDIF
Move_NaN_DoSigns
; Do the sign manipulations and return.
TST Rins,#MNF_bit
EORNE OP1sue,OP1sue,#Sign_bit
TST Rins,#ABS_bit
BICNE OP1sue,OP1sue,#Sign_bit
IF Interworking :LOR: Thumbing
LDMFD Rsp!,{LR}
BX LR
ELSE
LDMFD Rsp!,{PC}
ENDIF
]
;===========================================================================
[ FPEWanted :LOR: FPASCWanted
; The second entry point to the NRM routine, intended for use by the FPASC.
;
; Uses standard monadic operation entry and exit conventions - see top of
; this file.
[ FPASCWanted
NormFPASC
CDebug3 3,"NormFPASC: operand =",OP1sue,OP1mhi,OP1mlo
; The FPA does not bounce common values in the Prepare stage for these
; instructions, so no need to check the uncommon bit.
]
Norm_Uncommon
; Only uncommon values will get here. First split out all but NaNs and
; infinities.
TNaNInf Rtmp,OP1sue,OP1mhi ;Rtmp[31] := (op is NaN/inf)
ANDPL RNDexp,OP1sue,#ToExp_mask
BPL Norm_ZeroUnnormOrDenorm
NormUrd_NaNInf
; The operand is a NaN or infinity. If it's an infinity, we just want to
; return a standard infinity. If it's a NaN, we use the standard NaN
; propagation code.
ORRS Rtmp,OP1mlo,OP1mhi,LSL #1 ;Check for NaNs
BNE $ConvertNaN1_str
AND Rtmp,OP1sue,#Sign_bit ;Isolate sign
ADR OP1sue,Prototype_Infinity
LDMIA OP1sue,OP1regs
ORR OP1sue,OP1sue,Rtmp
IF Interworking :LOR: Thumbing
BX LR
ELSE
MOV PC,LR
ENDIF
]
;===========================================================================
[ FPEWanted :LOR: FPASCWanted
; The second entry point to the URD routine, meant for use by the FPASC and
; optimised for uncommon operands.
; This routine will not work correctly with inputs which are unnormalised
; URD results, or with invalid internal format numbers.
;
; Uses standard monadic operation entry and exit conventions - see top of
; this file.
[ FPASCWanted
UrdFPASC
CDebug3 3,"UrdFPASC: operand =",OP1sue,OP1mhi,OP1mlo
; The FPA does not bounce common values in the Prepare stage for these
; instructions, so no need to check the uncommon bit.
]
Urd_Uncommon
; Split out NaNs and infinities, which are dealt with in exactly the same
; way as by the NRM instruction.
TNaNInf Rtmp,OP1sue,OP1mhi ;Rtmp[31] := (op1 is NaN/inf)
BMI NormUrd_NaNInf
; The operand is now known to be a denormalised number or an extended
; precision unnormalised number or zero. We have to take a little care about
; single and double precision denormalised numbers, since their exponents
; and mantissas need correcting. Otherwise, we can just use the standard
; Urd_Numeric routine on them once we have separated the sign and the
; exponent from each other. We can recognise the single and double
; denormalised numbers by the fact that they are the only remaining cases
; with a units bit of 1.
AND Rarith,OP1sue,#ToExp_mask ;Extract operand exponent
AND OP1sue,OP1sue,#Sign_bit ; and sign
TST OP1mhi,#EIUnits_bit
BICNE OP1mhi,OP1mhi,#EIUnits_bit
ADDNE Rarith,Rarith,#1
B Urd_Numeric
]
;===========================================================================
[ FPEWanted :LOR: FPASCWanted
; The second part of the RND routine, which deals with uncommon operands.
Rnd_Uncommon
; Split out NaNs and infinities, which are dealt with in exactly the same
; way as by the NRM instruction.
TNaNInf Rtmp,OP1sue,OP1mhi ;Rtmp[31] := (op1 is NaN/inf)
BMI NormUrd_NaNInf
; The value is an uncommon numeric value - i.e. a denormalised number, an
; extended unnormalised number or an extended unnormalised zero. If it's the
; last of these, change it to a real zero and treat it as a numeric.
ORRS RNDexp,OP1mhi,OP1mlo
ANDEQ OP1sue,OP1sue,#Sign_bit
BEQ Rnd_Exact
; The operand is now a denormalised number or extended unnormalised non-zero
; number. We will change it into the corresponding normalised number
; (possibly with a negative biased exponent), then treat it as a numeric.
; The types of numbers that require converting are extended unnormalised
; numbers and denormalised numbers of all precisions. In the case of the
; extended denormalised and unnormalised numbers, this just requires us to
; normalise them; in the case of the single and double denormalised numbers,
; we need to clear their units bits and add 1 to their exponents before we
; normalise them.
; At this stage, we can recognise that the numbers are single or double
; denormalised numbers simply by the fact that they have uncommon = units =
; 1: all other numbers with this property are NaNs or infinities and have
; been dealt with already.
AND RNDexp,OP1sue,#ToExp_mask
AND OP1sue,OP1sue,#Sign_bit
ASSERT EIExp_pos = 0
STMFD Rsp!,{LR} ;We will have subroutine calls below
TST OP1mhi,#EIUnits_bit
BICNE OP1mhi,OP1mhi,#EIUnits_bit
ADDNE RNDexp,RNDexp,#1
BL $NormaliseOp1_str ;NB must be necessary, so no
; point in checking whether
; normalised
LDMFD Rsp!,{LR}
B Rnd_Numeric
]
;===========================================================================
[ :DEF: compare_s :LOR: FPEWanted :LOR: FPASCWanted
; The second entry point to the comparison routine, meant for use by the
; FPASC and without a fast track for common operands.
; This routine will not work correctly with inputs which are unnormalised
; URD results, or with invalid internal format numbers.
;
; Has the same entry and exit conventions as "CompareFPE" above.
[ FPASCWanted
CompareFPASC
CDebug3 3,"CompareFPASC: op1 =",OP1sue,OP1mhi,OP1mlo
CDebug3 3," op2 =",OP2sue,OP2mhi,OP2mlo
]
Compare_Uncommon
; We have to do a full comparison, since either or both of the operands may
; be uncommon. What we will do is:
;
; (a) Check for NaNs. If found, produce a trap if appropriate, or a result
; of "unordered" otherwise.
;
; (b) If no NaNs, adjust the operands by replacing all infinities by the
; standard extended infinity, and all effectively unnormalised numbers
; by the corresponding normalised or denormalised number. Then call
; Compare_Common, which will work correctly on zeros, denormalised
; numbers, normalised numbers and extended infinities.
;
; So the first thing we do is check for NaNs. This is done by first testing
; for a NaN or infinity (they have similar bit patterns) by a standard
; technique, then checking whether the fraction is non-zero.
TNaNInf Rtmp,OP1sue,OP1mhi ;Rtmp[31] := (op1 is NaN/inf)
TNaNInf Rtmp2,OP2sue,OP2mhi ;Rtmp2[31] := (op2 is NaN/inf)
TST Rtmp,#TopBit ;Operand 1 NaN or infinity?
ORRNES Rarith,OP1mlo,OP1mhi,LSL #1 ;If so, is it a NaN?
BNE Compare_Unordered
TST Rtmp2,#TopBit ;Operand 2 NaN or infinity?
ORRNES Rarith,OP2mlo,OP2mhi,LSL #1 ;If so, is it a NaN?
BNE Compare_Unordered
; Now we know there are no NaNs and therefore no exceptions - which means we
; no longer need to keep track of exactly what the operands are. We are
; going to massage the operands into a form where we can use the
; Compare_Common routine on them - note that it already works for zeros,
; normalised numbers, extended denormalised numbers and normal extended
; precision infinities. The remaining numbers are the other infinities, the
; extended unnormalised numbers and zeros, and the single and double
; precision denormalised numbers.
; We will first convert all the infinities to a standard extended
; precision infinity, to ensure that they compare equal with each other. Or
; rather, an almost standard one - we will mark the result as common to
; avoid mistaking it for an unnormalised or denormalised number later on.
STMFD Rsp!,{LR} ;We're likely to make subroutine calls
TST Rtmp,#TopBit
ANDNE OP1sue,OP1sue,#Sign_bit
ORRNE OP1sue,OP1sue,#&FF
ORRNE OP1sue,OP1sue,#&7F00
BICNE OP1mhi,OP1mhi,#EIUnits_bit
TST Rtmp2,#TopBit
ANDNE OP2sue,OP2sue,#Sign_bit
ORRNE OP2sue,OP2sue,#&FF
ORRNE OP2sue,OP2sue,#&7F00
BICNE OP2mhi,OP2mhi,#EIUnits_bit
; Now we need to deal with the extended unnormalised numbers and zeros, and
; the single and double denormalised numbers. These basically need
; converting to extended precision normalised or denormalised numbers. In
; the case of the extended unnormalised numbers and zeros, this just
; requires us to normalise them; in the case of the single and double
; denormalised numbers, we need to clear their units bits and add 1 to their
; exponents before we normalise them.
; At this stage, we can recognise that the numbers are single or double
; denormalised numbers simply by the fact that they have uncommon = units =
; 1: all other numbers with this property are NaNs or infinities and have
; been dealt with already.
ANDS Rarith,OP1mhi,OP1sue,LSL #EIUnits_pos-Uncommon_pos
ASSERT EIUnits_pos = 31
BICMI OP1mhi,OP1mhi,#EIUnits_bit
ADDMI OP1sue,OP1sue,#1:SHL:EIExp_pos
ANDS Rarith,OP2mhi,OP2sue,LSL #EIUnits_pos-Uncommon_pos
ASSERT EIUnits_pos = 31
BICMI OP2mhi,OP2mhi,#EIUnits_bit
ADDMI OP2sue,OP2sue,#1:SHL:EIExp_pos
; Now we need to normalise all these types of numbers, which now means all
; uncommon numbers except those with exponent 0 (which are extended
; precision denormalised numbers and should be left alone).
TST OP1sue,#Uncommon_bit
Exp2Top Rarith,OP1sue,NE,S ;Complete test & set up for call
BLNE $NormDenormOp1_str
TST OP2sue,#Uncommon_bit
Exp2Top Rarith,OP2sue,NE,S ;Complete test & set up for call
BLNE $NormDenormOp2_str
; And now we can compare the results as though they were common numbers.
LDMFD Rsp!,{LR}
B Compare_Common
Compare_Unordered
; The result is definitely unordered. We need to choose the correct result.
TST Rfpsr,#AC_bit
MOVEQ Rarith,#Comp_Un_Orig
MOVNE Rarith,#Comp_Un_Alt
; Now we need to know whether there's an IEEE exception - there is one if
; either operand is a signalling NaN, or if the instruction is CMFE or CNFE.
; Note that the top bits of Rtmp and Rtmp2 are still NaN/infinity flags for
; the two operands.
TST Rtmp,#TopBit ;Is operand 1 a NaN?
ORRNES Rtmp,OP1mlo,OP1mhi,LSL #1
BEQ Compare_Unordered_Op1NotNaN ;If not, operand 2 must be
ANDS Rtmp,OP1mhi,#EIFracTop_bit ;If so, is it signalling?
[ FPLibWanted
MOVEQ Rarith,#IVO_bits
IF Interworking :LOR: Thumbing
BXEQ LR
ELSE
MOVEQ PC,LR
ENDIF
|
BEQ InvalidOperation2ForI ; (invalid operation if so)
ASSERT InvReas_SigNaN = 0
]
TST Rtmp2,#TopBit ;Is operand 2 a NaN?
ORRNES Rtmp,OP2mlo,OP2mhi,LSL #1
[ FPEWanted :LOR: FPASCWanted
BEQ Compare_Unordered_Op2NotNaN ;Branch if not
|
IF Interworking :LOR: Thumbing
BXEQ LR
ELSE
MOVEQ PC,LR
ENDIF
]
Compare_Unordered_Op1NotNaN
ANDS Rtmp,OP2mhi,#EIFracTop_bit ;If so, is it signalling?
[ FPLibWanted
MOVEQ Rarith,#IVO_bits
IF Interworking :LOR: Thumbing
BX LR
ELSE
MOV PC,LR
ENDIF
|
BEQ InvalidOperation2ForI ; (invalid operation if so)
ASSERT InvReas_SigNaN = 0
]
[ FPEWanted :LOR: FPASCWanted
Compare_Unordered_Op2NotNaN
TST Rins,#CompExc_bit ;Is instruction CMFE/CNFE?
IF Interworking :LOR: Thumbing
BXEQ LR
ELSE
MOVEQ PC,LR ;If not, no exception
ENDIF
MOV Rtmp,#InvReas_CompQNaN ;Otherwise, invalid op
B InvalidOperation2ForI
]
]
;===========================================================================
[ FPEWanted :LOR: FPASCWanted :LOR: :DEF: fix_s :LOR: :DEF: fixu_s
; The second entry point to the FIX routine, meant for use by the FPASC and
; optimised for uncommon operands.
; This routine will not work correctly with inputs which are unnormalised
; URD results, or with invalid internal format numbers.
;
; Has the same entry and exit conventions as "FixFPE" above.
[ FPASCWanted
FixFPASC
CDebug3 3,"FixFPASC: operand =",OP1sue,OP1mhi,OP1mlo
; Start by splitting between common and uncommon operands.
TST OP1sue,#Uncommon_bit
BEQ Fix_Common
]
[ :DEF: fix_s
__fp_fix_uncommon
]
[ :DEF: fixu_s
__fp_fixu_uncommon
]
Fix_Uncommon
; NaNs and infinities will produce invalid operation exceptions, with the
; precise nature of the exception depending on whether the operand is a
; signalling NaN, a quiet NaN or an infinity.
TNaNInf Rtmp,OP1sue,OP1mhi ;Rtmp[31] := (op1 is NaN/inf)
BMI Fix_NaNInf
; The operand is now known to be a denormalised number or an extended
; precision unnormalised number or zero. We have to take a little care about
; single and double precision denormalised numbers, since their exponents
; and mantissas need correcting. Otherwise, we can just use the standard
; Fix_Numeric routine on them once we have separated the sign and the
; exponent from each other. We can recognise the single and double
; denormalised numbers by the fact that they are the only remaining cases
; with a units bit of 1.
AND Rarith,OP1sue,#ToExp_mask ;Extract operand exponent
[ :LNOT: :DEF: fixu_s
AND OP1sue,OP1sue,#Sign_bit ; and sign
]
TST OP1mhi,#EIUnits_bit
BICNE OP1mhi,OP1mhi,#EIUnits_bit
ADDNE Rarith,Rarith,#1
B Fix_Numeric
Fix_NaNInf
; All of these produce an invalid operation exception, with the reason being
; InvReas_SigNaN for signalling NaNs, InvReas_FixQNaN for quiet NaNs and
; InvReas_FixInf for infinities.
[ FPEWanted :LOR: FPASCWanted
TST OP1mhi,#EIFracTop_bit
MOVEQ Rtmp,#InvReas_SigNaN
MOVNE Rtmp,#InvReas_FixQNaN
ORRS Rarith,OP1mlo,OP1mhi,LSL #1
MOVEQ Rtmp,#InvReas_FixInf
MOV Rarith,#TopBit ;Some sort of integer result
B InvalidOperation1ForI
|
MOV OP1sue,#IVO_bits
IF Interworking :LOR: Thumbing
BX LR
ELSE
MOV PC,LR
ENDIF
]
]
;===========================================================================
END