singrdk/base/Kernel/Native/arm/Crt/arith.asm

;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;;
;;; Microsoft Research Singularity
;;; 
;;; Copyright (c) Microsoft Corporation.  All rights reserved.
;;;
;;; This file contains ARM-specific assembly code.
;;;
        
; arith.s
;
; Copyright (C) Advanced RISC Machines Limited, 1994. All rights reserved.
;
; RCS Revision: 1 
; Checkin Date: 2007/06/29 02:59:16 
; Revising Author

; > coresrc.s.arith
;
; Assembler source for FPA support code and emulator
; ==================================================
; Routines to do arithmetic.
;

; These routines work on numbers in the standard internal format.

;===========================================================================

        GBLS    NormaliseOp1_str
        GBLS    NormaliseOp1Neg_str
        GBLS    NormaliseOp2_str
        GBLS    NormDenormOp1_str
        GBLS    NormDenormOp2_str

        GBLS    ConvertNaNs_str
        GBLS    ConvertNaN1_str
        GBLS    ConvertNaN1Of2_str
        GBLS    ConvertNaN2Of2_str

        GBLL    FPLibWanted

 [ FPEWanted :LOR: FPASCWanted

NormaliseOp1_str        SETS    "NormaliseOp1"
NormaliseOp1Neg_str     SETS    "NormaliseOp1Neg"
NormaliseOp2_str        SETS    "NormaliseOp2"
NormDenormOp1_str       SETS    "NormDenormOp1"
NormDenormOp2_str       SETS    "NormDenormOp2"

ConvertNaNs_str         SETS    "ConvertNaNs"
ConvertNaN1_str         SETS    "ConvertNaN1"
ConvertNaN1Of2_str      SETS    "ConvertNaN1Of2"
ConvertNaN2Of2_str      SETS    "ConvertNaN2Of2"

FPLibWanted             SETL    {FALSE}

 |

NormaliseOp1_str        SETS    "__fp_normalise_op1"
NormaliseOp1Neg_str     SETS    "__fp_normalise_op1neg"
NormaliseOp2_str        SETS    "__fp_normalise_op2"
NormDenormOp1_str       SETS    "__fp_norm_denorm_op1"
NormDenormOp2_str       SETS    "__fp_norm_denorm_op2"

ConvertNaNs_str         SETS    "__fp_convert_NaNs"
ConvertNaN1_str         SETS    "__fp_convert_NaN1"
ConvertNaN1Of2_str      SETS    "__fp_convert_NaN_1Of2"
ConvertNaN2Of2_str      SETS    "__fp_convert_NaN_2Of2"

FPLibWanted             SETL    {TRUE}

   [ :LNOT: :DEF: normalise_s

        IMPORT  $NormaliseOp1_str
        IMPORT  $NormaliseOp1Neg_str
        IMPORT  $NormaliseOp2_str
        IMPORT  $NormDenormOp1_str
        IMPORT  $NormDenormOp2_str

        IMPORT  $ConvertNaNs_str
        IMPORT  $ConvertNaN1_str
        IMPORT  $ConvertNaN1Of2_str
        IMPORT  $ConvertNaN2Of2_str
   ]
        
 ]

        [ :DEF: normalise_s :LOR: FPEWanted :LOR: FPASCWanted

; Many of these routines use some standard entry and exit conventions. There
; are two such sets of conventions:
;
; STANDARD MONADIC OPERATION ENTRY AND EXIT
; -----------------------------------------
;
; Entry: OP1sue = Operand sign, uncommon, exponent;
;        OP1mhi = Operand mantissa, high word;
;        OP1mlo = Operand mantissa, low word;
;        Rfpsr  = FPSR;
;        Rins   = instruction (may be needed to determine the exact
;          operation and/or for traps);
;        Rwp, Rfp, Rsp hold their usual values;
;        R14    = return link.
; Exit:  OP1sue = the result's sign and uncommon bit; the remaining bits are
;          zero if the uncommon bit is 0, and set correctly for the final
;          result if the uncommon bit is 1;
;        OP1mhi, OP1mlo = the result's mantissa;
;        RNDexp (= OP2sue) = if the uncommon bit is 0, the result exponent,
;          which may be negative; otherwise corrupt;
;        Rarith is corrupt if the uncommon bit is 1; otherwise, if the
;          destination precision is extended, it holds the round bit (in bit
;          31) and the sticky bit (in bits 30:0), and if the destination
;          precision is single or double, it holds part of the sticky bit
;          (the remainder of which is held in bits below the round bit in
;          OP1mhi and OP1mlo);
;        OP2mhi, OP2mlo, Rtmp, Rtmp2 and R14 may be corrupt;
;        Rfpsr may be updated;
;        All other registers preserved.
;
; STANDARD DYADIC OPERATION ENTRY AND EXIT
; ----------------------------------------
;
; Entry: OP1sue = First operand sign, uncommon, exponent;
;        OP1mhi = First operand mantissa, high word;
;        OP1mlo = First operand mantissa, low word;
;        OP2sue = Second operand sign, uncommon, exponent;
;        OP2mhi = Second operand mantissa, high word;
;        OP2mlo = Second operand mantissa, low word;
;        Rfpsr  = FPSR;
;        Rins   = instruction (may be needed to determine the exact
;          operation and/or for traps);
;        Rwp, Rfp, Rsp hold their usual values;
;        R14    = return link.
; Exit:  OP1sue = the result's sign and uncommon bit; the remaining bits are
;          zero if the uncommon bit is 0, and set correctly for the final
;          result if the uncommon bit is 1;
;        OP1mhi, OP1mlo = the result's mantissa;
;        RNDexp (= OP2sue) = if the uncommon bit is 0, the result exponent,
;          which may be negative; otherwise corrupt;
;        Rarith is corrupt if the uncommon bit is 1; otherwise, if the
;          destination precision is extended, it holds the round bit (in bit
;          31) and the sticky bit (in bits 30:0), and if the destination
;          precision is single or double, it holds part of the sticky bit
;          (the remainder of which is held in bits below the round bit in
;          OP1mhi and OP1mlo);
;        OP2mhi, OP2mlo, Rtmp, Rtmp2 and R14 may be corrupt;
;        Rfpsr may be updated;
;        All other registers preserved.
;
; In both sets of conventions, the routine called is free to produce an
; incorrect result mantissa and rounding information, as long as it knows
; that it will in fact be rounded to the correct value.

;===========================================================================

; Routine to normalise the first or only operand. The biased exponent won't
; be taken below 0: instead, the number will be denormalised if normalising
; it would cause this to happen. Note that the result will never be marked
; as uncommon: any caller of this routine must deal with this itself if
; necessary.
; Entry: OP1sue = First operand sign, remaining bits junk;
;        OP1mhi, OP1mlo = First operand mantissa;
;        Rarith = First operand exponent, shifted to be left aligned in the
;          word;
;        Rwp, Rfp, Rsp contain their usual values;
;        R14 is the return link.
; Exit:  OP1sue = First operand sign and exponent (uncommon is always 0);
;        OP1mhi, OP1mlo updated;
;        Rarith, Rtmp, Rtmp2 and R14 may be corrupt;
;        All other registers preserved.

$NormDenormOp1_str

; Clear out the junk bits in OP1sue.

        AND     OP1sue,OP1sue,#Sign_bit

; Do we have to normalise by 32 bits or more?

        TEQ     OP1mhi,#0
        BEQ     NormDenormOp1_LongShift

; If not, find out how much we do have to shift by.

        MOV     Rtmp,#0                 ;Accumulate shift amount in Rtmp
        MOVS    Rtmp2,OP1mhi,LSR #16
        MOVEQ   OP1mhi,OP1mhi,LSL #16
        ADDEQ   Rtmp,Rtmp,#16
        MOVS    Rtmp2,OP1mhi,LSR #24
        MOVEQ   OP1mhi,OP1mhi,LSL #8
        ADDEQ   Rtmp,Rtmp,#8
        MOVS    Rtmp2,OP1mhi,LSR #28
        MOVEQ   OP1mhi,OP1mhi,LSL #4
        ADDEQ   Rtmp,Rtmp,#4
        MOVS    Rtmp2,OP1mhi,LSR #30
        MOVEQ   OP1mhi,OP1mhi,LSL #2
        ADDEQ   Rtmp,Rtmp,#2
        MOVS    Rtmp2,OP1mhi,LSR #31
        MOVEQ   OP1mhi,OP1mhi,LSL #1
        ADDEQ   Rtmp,Rtmp,#1

; Have we shifted too far? - i.e. by more than the exponent? If so, go back
; the excess distance. Then complete the shift - i.e. convert the single
; word shift into a two word shift - adjust the exponent if the exponent was
; greater than the shift amount (otherwise we leave it zero) and return.

        SUBS    Rtmp2,Rtmp,Rarith,LSR #32-EIExp_len     ;Shift amt. - exp.
        MOVHI   OP1mhi,OP1mhi,LSR Rtmp2
        MOVHI   Rtmp,Rarith,LSR #32-EIExp_len
        RSB     Rarith,Rtmp,#32
        ORR     OP1mhi,OP1mhi,OP1mlo,LSR Rarith
        MOV     OP1mlo,OP1mlo,LSL Rtmp
        SUBLO   OP1sue,OP1sue,Rtmp2,LSL #EIExp_pos      ;ADD exp.-shift amt.

  IF Interworking :LOR: Thumbing
        BX      LR
  ELSE
        MOV     PC, LR
  ENDIF

NormDenormOp1_LongShift

; The top word is zero, so we need to shift by 32 bits or more. Or do we? -
; if the exponent is less than 32, we simply need to shift by the exponent.

        CMP     Rarith,#32:SHL:(32-EIExp_len)
        BLO     NormDenormOp1_ByExponent

; Now check the bottom word: if it is also zero, we simply need to
; denormalise to exponent 0.

        MOVS    OP1mhi,OP1mlo
  IF Interworking :LOR: Thumbing
        BXEQ    LR
  ELSE
        MOVEQ   PC,LR           ;OP1sue/mhi/mlo are all already correct!
  ENDIF

        MOV     OP1mlo,#0

; The bottom word is non-zero, so we have a shift amount in the range 32-63.

        MOV     Rtmp,#32
        MOVS    Rtmp2,OP1mhi,LSR #16
        MOVEQ   OP1mhi,OP1mhi,LSL #16
        ADDEQ   Rtmp,Rtmp,#16
        MOVS    Rtmp2,OP1mhi,LSR #24
        MOVEQ   OP1mhi,OP1mhi,LSL #8
        ADDEQ   Rtmp,Rtmp,#8
        MOVS    Rtmp2,OP1mhi,LSR #28
        MOVEQ   OP1mhi,OP1mhi,LSL #4
        ADDEQ   Rtmp,Rtmp,#4
        MOVS    Rtmp2,OP1mhi,LSR #30
        MOVEQ   OP1mhi,OP1mhi,LSL #2
        ADDEQ   Rtmp,Rtmp,#2
        MOVS    Rtmp2,OP1mhi,LSR #31
        MOVEQ   OP1mhi,OP1mhi,LSL #1
        ADDEQ   Rtmp,Rtmp,#1

; Have we shifted too far? - i.e. by more than the exponent? If so, go back
; the excess distance. Note that this cannot require us to undo the shift
; from the bottom word to the top word, since we know the exponent was at
; least 32.
;   So we need to backshift if shift amount > exponent, and create a
; non-zero exponent if shift amount < exponent.

        SUBS    Rtmp2,Rtmp,Rarith,LSR #32-EIExp_len     ;Shift amt. - exp.
        MOVHI   OP1mhi,OP1mhi,LSR Rtmp2
        SUBLO   OP1sue,OP1sue,Rtmp2,LSL #EIExp_pos      ;ADD exp.-shift amt.
  IF Interworking :LOR: Thumbing
        BX      LR
  ELSE
        MOV     PC,LR
  ENDIF


NormDenormOp1_ByExponent

; We need to shift the mantissa left by the exponent, which is guaranteed to
; be less than 32, and to return a zero exponent (note that OP1sue is
; already set up for this).

        MOV     Rtmp,Rarith,LSR #32-EIExp_len
        RSB     Rtmp2,Rtmp,#32
        MOV     OP1mhi,OP1mhi,LSL Rtmp
        ORR     OP1mhi,OP1mhi,OP1mlo,LSR Rtmp2
        MOV     OP1mlo,OP1mlo,LSL Rtmp
  IF Interworking :LOR: Thumbing
        BX      LR
  ELSE
        MOV     PC,LR
  ENDIF

;===========================================================================

; Routine to normalise the second operand. The biased exponent won't be
; taken below 0: instead, the number will be denormalised if normalising it
; would cause this to happen. Note that the result will never be marked
; as uncommon: any caller of this routine must deal with this itself if
; necessary.
; Entry: OP2sue = Second operand sign, remaining bits junk;
;        OP2mhi, OP2mlo = Second operand mantissa;
;        Rarith = Second operand exponent, shifted to be left aligned in the
;          word;
;        Rwp, Rfp, Rsp contain their usual values;
;        R14 is the return link.
; Exit:  OP2sue = Second operand sign and exponent (uncommon is always 0);
;        OP2mhi, OP2mlo updated;
;        Rarith, Rtmp, Rtmp2 and R14 may be corrupt;
;        All other registers preserved.

$NormDenormOp2_str

; Clear out the junk bits in OP2sue.

        AND     OP2sue,OP2sue,#Sign_bit

; Do we have to normalise by 32 bits or more?

        TEQ     OP2mhi,#0
        BEQ     NormDenormOp2_LongShift

; If not, find out how much we do have to shift by.

        MOV     Rtmp,#0                 ;Accumulate shift amount in Rtmp
        MOVS    Rtmp2,OP2mhi,LSR #16
        MOVEQ   OP2mhi,OP2mhi,LSL #16
        ADDEQ   Rtmp,Rtmp,#16
        MOVS    Rtmp2,OP2mhi,LSR #24
        MOVEQ   OP2mhi,OP2mhi,LSL #8
        ADDEQ   Rtmp,Rtmp,#8
        MOVS    Rtmp2,OP2mhi,LSR #28
        MOVEQ   OP2mhi,OP2mhi,LSL #4
        ADDEQ   Rtmp,Rtmp,#4
        MOVS    Rtmp2,OP2mhi,LSR #30
        MOVEQ   OP2mhi,OP2mhi,LSL #2
        ADDEQ   Rtmp,Rtmp,#2
        MOVS    Rtmp2,OP2mhi,LSR #31
        MOVEQ   OP2mhi,OP2mhi,LSL #1
        ADDEQ   Rtmp,Rtmp,#1

; Have we shifted too far? - i.e. by more than the exponent? If so, go back
; the excess distance. Then complete the shift - i.e. convert the single
; word shift into a two word shift - adjust the exponent if the exponent was
; greater than the shift amount (otherwise we leave it zero) and return.

        SUBS    Rtmp2,Rtmp,Rarith,LSR #32-EIExp_len     ;Shift amt. - exp.
        MOVHI   OP2mhi,OP2mhi,LSR Rtmp2
        MOVHI   Rtmp,Rarith,LSR #32-EIExp_len
        RSB     Rarith,Rtmp,#32
        ORR     OP2mhi,OP2mhi,OP2mlo,LSR Rarith
        MOV     OP2mlo,OP2mlo,LSL Rtmp
        SUBLO   OP2sue,OP2sue,Rtmp2,LSL #EIExp_pos      ;ADD exp.-shift amt.
  IF Interworking :LOR: Thumbing
        BX      LR
  ELSE
        MOV     PC,LR
  ENDIF

NormDenormOp2_LongShift

; The top word is zero, so we need to shift by 32 bits or more. Or do we? -
; if the exponent is less than 32, we simply need to shift by the exponent.

        CMP     Rarith,#32:SHL:(32-EIExp_len)
        BLO     NormDenormOp2_ByExponent

; Now check the bottom word: if it is also zero, we simply need to
; denormalise to exponent 0.

        MOVS    OP2mhi,OP2mlo
  IF Interworking :LOR: Thumbing
        BXEQ    LR
  ELSE
        MOVEQ   PC,LR           ;OP2sue/mhi/mlo are all already correct!
  ENDIF
        MOV     OP2mlo,#0

; The bottom word is non-zero, so we have a shift amount in the range 32-63.

        MOV     Rtmp,#32
        MOVS    Rtmp2,OP2mhi,LSR #16
        MOVEQ   OP2mhi,OP2mhi,LSL #16
        ADDEQ   Rtmp,Rtmp,#16
        MOVS    Rtmp2,OP2mhi,LSR #24
        MOVEQ   OP2mhi,OP2mhi,LSL #8
        ADDEQ   Rtmp,Rtmp,#8
        MOVS    Rtmp2,OP2mhi,LSR #28
        MOVEQ   OP2mhi,OP2mhi,LSL #4
        ADDEQ   Rtmp,Rtmp,#4
        MOVS    Rtmp2,OP2mhi,LSR #30
        MOVEQ   OP2mhi,OP2mhi,LSL #2
        ADDEQ   Rtmp,Rtmp,#2
        MOVS    Rtmp2,OP2mhi,LSR #31
        MOVEQ   OP2mhi,OP2mhi,LSL #1
        ADDEQ   Rtmp,Rtmp,#1

; Have we shifted too far? - i.e. by more than the exponent? If so, go back
; the excess distance. Note that this cannot require us to undo the shift
; from the bottom word to the top word, since we know the exponent was at
; least 32.
;   So we need to backshift if shift amount > exponent, and create a
; non-zero exponent if shift amount < exponent.

        SUBS    Rtmp2,Rtmp,Rarith,LSR #32-EIExp_len     ;Shift amt. - exp.
        MOVHI   OP2mhi,OP2mhi,LSR Rtmp2
        SUBLO   OP2sue,OP2sue,Rtmp2,LSL #EIExp_pos      ;ADD exp.-shift amt.
  IF Interworking :LOR: Thumbing
        BX      LR
  ELSE
        MOV     PC,LR
  ENDIF

NormDenormOp2_ByExponent

; We need to shift the mantissa left by the exponent, which is guaranteed to
; be less than 32, and to return a zero exponent (note that OP2sue is
; already set up for this).

        MOV     Rtmp,Rarith,LSR #32-EIExp_len
        RSB     Rtmp2,Rtmp,#32
        MOV     OP2mhi,OP2mhi,LSL Rtmp
        ORR     OP2mhi,OP2mhi,OP2mlo,LSR Rtmp2
        MOV     OP2mlo,OP2mlo,LSL Rtmp
  IF Interworking :LOR: Thumbing
        BX      LR
  ELSE
        MOV     PC,LR
  ENDIF

;===========================================================================

; Routine to float an integer. To fit in with the usual conventions, the
; entry point is given two labels, namely "FltFPE" and "FltFPASC".
;   The value returned is always a numeric value plus associated rounding
; information, with the uncommon bit clear.
; Entry: Rarith = integer;
;        Rfpsr  = FPSR;
;        Rins   = instruction (needed for traps);
;        Rwp, Rfp, Rsp hold their usual values;
;        R14    = return link.
; Exit:  OP1sue = the result's sign, with the remaining bits zero;
;        OP1mhi, OP1mlo = the result's mantissa;
;        RNDexp (= OP2sue) = the result exponent;
;        Rarith = 0 (i.e. the appropriate round and sticky information for
;          extended precision);
;        OP2mhi, OP2mlo, Rtmp, Rtmp2 and R14 may be corrupt;
;        Rfpsr may be updated;
;        All other registers preserved.

        [ FPEWanted
FltFPE
        ]

        [ FPASCWanted
FltFPASC
        ]

        CDebug1 3,"FltFPE/FPASC: operand =",Rarith

; Extract the sign and produce an unnormalised mantissa. In the process,
; detect the special case of a zero operand.

        MOV     OP1mlo,#0               ;Mantissa low word is always zero
        ANDS    OP1sue,Rarith,#Sign_bit ;Extract sign
        ASSERT  Sign_pos = 31
        RSBNE   OP1mhi,Rarith,#0        ;If -ve, 2's complement the integer
        MOVEQS  OP1mhi,Rarith           ;If +ve, copy and check for zero
        MOVEQ   RNDexp,#0               ;If zero, result exponent is zero
  IF Interworking :LOR: Thumbing
        BXEQ    LR
  ELSE
        MOVEQ   PC,LR                   ; and return (Rarith is already 0)
  ENDIF

; If non-zero, set the approriate exponent and rounding information, then
; fall through into NormaliseOp1 to complete the job.

        MOV     RNDexp,#(EIExp_bias+31):AND:&FF00
        ORR     RNDexp,RNDexp,#(EIExp_bias+31):AND:&FF
        ASSERT  (EIExp_bias+31) <= &FFFF
        MOV     Rarith,#0

; Fall through to NormaliseOp1

;===========================================================================

; NB it is possible to fall through into this routine.

; Routine to normalise the result or first operand. Unlike the two routines
; above, this routine will normalise the exponent to a value less than zero
; if necessary, and it won't put the exponent back into OP1sue. Note that
; the result will never be marked as uncommon: any caller of this routine
; must deal with this itself if necessary.
; Entry: OP1mhi, OP1mlo = Result/first operand mantissa, which must not be
;          all zero;
;        RNDexp = Result/first operand exponent (in normal position in
;          word);
;        Rwp, Rfp, Rsp contain their usual values;
;        R14 is the return link.
; Exit:  OP1mhi, OP1mlo and RNDexp updated;
;        Rtmp, Rtmp2 and R14 may be corrupt;
;        All other registers preserved;
;        NE condition is true.

$NormaliseOp1_str
        TEQ     OP1mhi,#0                       ;Do full word shift if
        MOVEQ   OP1mhi,OP1mlo                   ; necessary
        MOVEQ   OP1mlo,#0
        SUBEQ   RNDexp,RNDexp,#32
        MOV     Rtmp,#0                         ;Counter for rest of shift
        MOVS    Rtmp2,OP1mhi,LSR #16            ;Shift top word by 16 if
        MOVEQ   OP1mhi,OP1mhi,LSL #16           ; necessary
        ADDEQ   Rtmp,Rtmp,#16
        MOVS    Rtmp2,OP1mhi,LSR #24            ;Shift top word by 8 if
        MOVEQ   OP1mhi,OP1mhi,LSL #8            ; necessary
        ADDEQ   Rtmp,Rtmp,#8
        MOVS    Rtmp2,OP1mhi,LSR #28            ;Shift top word by 4 if
        MOVEQ   OP1mhi,OP1mhi,LSL #4            ; necessary
        ADDEQ   Rtmp,Rtmp,#4
        MOVS    Rtmp2,OP1mhi,LSR #30            ;Shift top word by 2 if
        MOVEQ   OP1mhi,OP1mhi,LSL #2            ; necessary
        ADDEQ   Rtmp,Rtmp,#2
        MOVS    Rtmp2,OP1mhi,LSR #31            ;Shift top word by 1 if
        MOVEQ   OP1mhi,OP1mhi,LSL #1            ; necessary
        ADDEQ   Rtmp,Rtmp,#1
        RSBS    Rtmp2,Rtmp,#32                  ;Shift the bottom word by
        ORR     OP1mhi,OP1mhi,OP1mlo,LSR Rtmp2  ; the same amount and set NE
        MOV     OP1mlo,OP1mlo,LSL Rtmp
        SUB     RNDexp,RNDexp,Rtmp              ;Adjust exponent by shift
  IF Interworking :LOR: Thumbing
        BX      LR
  ELSE
        MOV     PC,LR                           ; amount and return
  ENDIF

;===========================================================================

; Routine to normalise the second operand. Unlike the two routines above,
; this routine will normalise the exponent to a value less than zero if
; necessary, and it won't put the exponent back into OP1sue. Note that the
; result will never be marked as uncommon: any caller of this routine must
; deal with this itself if necessary.
; Entry: OP2mhi, OP2mlo = Second operand mantissa, which must not be all
;          zero;
;        RNDexp = Second operand exponent (in normal position in word);
;        Rwp, Rfp, Rsp contain their usual values;
;        R14 is the return link.
; Exit:  OP2mhi, OP2mlo and RNDexp updated;
;        Rtmp, Rtmp2 and R14 may be corrupt;
;        All other registers preserved;
;        NE condition is true.

$NormaliseOp2_str
        TEQ     OP2mhi,#0                       ;Do full word shift if
        MOVEQ   OP2mhi,OP2mlo                   ; necessary
        MOVEQ   OP2mlo,#0
        SUBEQ   RNDexp,RNDexp,#32
        MOV     Rtmp,#0                         ;Counter for rest of shift
        MOVS    Rtmp2,OP2mhi,LSR #16            ;Shift top word by 16 if
        MOVEQ   OP2mhi,OP2mhi,LSL #16           ; necessary
        ADDEQ   Rtmp,Rtmp,#16
        MOVS    Rtmp2,OP2mhi,LSR #24            ;Shift top word by 8 if
        MOVEQ   OP2mhi,OP2mhi,LSL #8            ; necessary
        ADDEQ   Rtmp,Rtmp,#8
        MOVS    Rtmp2,OP2mhi,LSR #28            ;Shift top word by 4 if
        MOVEQ   OP2mhi,OP2mhi,LSL #4            ; necessary
        ADDEQ   Rtmp,Rtmp,#4
        MOVS    Rtmp2,OP2mhi,LSR #30            ;Shift top word by 2 if
        MOVEQ   OP2mhi,OP2mhi,LSL #2            ; necessary
        ADDEQ   Rtmp,Rtmp,#2
        MOVS    Rtmp2,OP2mhi,LSR #31            ;Shift top word by 1 if
        MOVEQ   OP2mhi,OP2mhi,LSL #1            ; necessary
        ADDEQ   Rtmp,Rtmp,#1
        RSBS    Rtmp2,Rtmp,#32                  ;Shift the bottom word by
        ORR     OP2mhi,OP2mhi,OP2mlo,LSR Rtmp2  ; the same amount and set NE
        MOV     OP2mlo,OP2mlo,LSL Rtmp
        SUB     RNDexp,RNDexp,Rtmp              ;Adjust exponent by shift
  IF Interworking :LOR: Thumbing
        BX      LR
  ELSE
        MOV     PC,LR                           ; amount and return
  ENDIF

;===========================================================================

; Routine to normalise the first operand. Like "NormaliseOp1", except that
; it increments the exponent in RNDexp by the shift amount, rather than
; decrementing it.
; Entry: OP1mhi, OP1mlo = Second operand mantissa, which must not be all
;          zero;
;        RNDexp = Exponent (in normal position in word);
;        Rwp, Rfp, Rsp contain their usual values;
;        R14 is the return link.
; Exit:  OP1mhi, OP1mlo and RNDexp updated;
;        Rtmp, Rtmp2 and R14 may be corrupt;
;        All other registers preserved;
;        NE condition is true.

$NormaliseOp1Neg_str
        TEQ     OP1mhi,#0                       ;Do full word shift if
        MOVEQ   OP1mhi,OP1mlo                   ; necessary
        MOVEQ   OP1mlo,#0
        ADDEQ   RNDexp,RNDexp,#32
        MOV     Rtmp,#0                         ;Counter for rest of shift
        MOVS    Rtmp2,OP1mhi,LSR #16            ;Shift top word by 16 if
        MOVEQ   OP1mhi,OP1mhi,LSL #16           ; necessary
        ADDEQ   Rtmp,Rtmp,#16
        MOVS    Rtmp2,OP1mhi,LSR #24            ;Shift top word by 8 if
        MOVEQ   OP1mhi,OP1mhi,LSL #8            ; necessary
        ADDEQ   Rtmp,Rtmp,#8
        MOVS    Rtmp2,OP1mhi,LSR #28            ;Shift top word by 4 if
        MOVEQ   OP1mhi,OP1mhi,LSL #4            ; necessary
        ADDEQ   Rtmp,Rtmp,#4
        MOVS    Rtmp2,OP1mhi,LSR #30            ;Shift top word by 2 if
        MOVEQ   OP1mhi,OP1mhi,LSL #2            ; necessary
        ADDEQ   Rtmp,Rtmp,#2
        MOVS    Rtmp2,OP1mhi,LSR #31            ;Shift top word by 1 if
        MOVEQ   OP1mhi,OP1mhi,LSL #1            ; necessary
        ADDEQ   Rtmp,Rtmp,#1
        RSBS    Rtmp2,Rtmp,#32                  ;Shift the bottom word by
        ORR     OP1mhi,OP1mhi,OP1mlo,LSR Rtmp2  ; the same amount and set NE
        MOV     OP1mlo,OP1mlo,LSL Rtmp
        ADD     RNDexp,RNDexp,Rtmp              ;Adjust exponent by shift
  IF Interworking :LOR: Thumbing
        BX      LR
  ELSE
        MOV     PC,LR                           ; amount and return
  ENDIF

        ]

;===========================================================================

        [ :DEF: addsub_s :LOR: FPEWanted :LOR: FPASCWanted

; Routine to add, subtract or reverse subtract two internal format floating
; point numbers. It has two entry points: "AddSubFPE", which has an
; optimised fast track for both operands being common, and "AddSubFPASC",
; which avoids the test for this optimised fast track - since it should
; never happen. The second entry point lies a long way down in the source
; to avoid addressing constraints.
;   The value returned is either a numeric value plus associated rounding
; information, with the uncommon bit clear, or an infinity or NaN, with the
; uncommon bit set.
;   This routine will not work correctly with inputs which are unnormalised
; URD results, or with invalid internal format numbers.
;
; Uses standard dyadic operation entry and exit conventions - see top of
; this file.

        ASSERT  RNDexp = OP2sue ;We swap over from the use of OP2sue to that
                                ; of RNDexp partway through this routine.

        [ FPEWanted

AddSubFPE

        CDebug3 3,"AddSubFPE: op1 =",OP1sue,OP1mhi,OP1mlo
        CDebug3 3,"           op2 =",OP2sue,OP2mhi,OP2mlo

; Start by detecting the "fast track" case of both operands being common.

        TST     OP1sue,#Uncommon_bit
        TSTEQ   OP2sue,#Uncommon_bit
        BNE     AddSub_Uncommon

        ]

        [ FPLibWanted
__fp_addsub_common
        ]

AddSub_Common

        STMFD   Rsp!,{LR}               ;Register needed, and we may get a
                                        ; subroutine call

        CDebug3 4,"AddSub_Common: op1 =",OP1sue,OP1mhi,OP1mlo
        CDebug3 4,"               op2 =",OP2sue,OP2mhi,OP2mlo

; Both operands are zeros or normalised numbers. We can distinguish between
; them on the basis of the units bit. However, note that the standard
; algorithm for adding/subtracting floating point numbers (i.e. do an
; alignment shift on the one with the smaller exponent, add or subtract the
; mantissas, then do a normalisation shift if necessary) works equally well
; on all of these.

; This entry point is also called from AddSub_Uncommon to add or subtract
; operands which are zeros, normalised numbers or extended denormalised
; numbers. It works perfectly well on such numbers, provided it is
; recognised that the result mantissa may be unnormalised and non-zero.
;   Note that we know that the invalid operation and divide-by-zero
; exceptions won't occur - i.e. we don't need to preserve the operands. So
; we start by modifying the signs of the operands for SUF and RSF
; instructions.

        [ :LNOT: :DEF: addsub_s

        TST     Rins,#SubNotAdd_bit     ;Is it SUF/RSF, not ADF?
        EORNE   OP2sue,OP2sue,#Sign_bit ;If so, change op2 sign (assuming SUF)
        TST     Rins,#RSF_bit           ;Is it RSF, not ADF/SUF?
        EORNE   OP2sue,OP2sue,#Sign_bit ;If so, we shouldn't have changed op2
        EORNE   OP1sue,OP1sue,#Sign_bit ; sign and should have changed op1 sign

        ]

; We can consider this to be an addition from now on. Next, we'll deal with
; the basic exponent and sign calculation: the results of this may get
; modified later on.
;   This section will leave the prospective sign for the result in OP1sue,
; R14 containing the exclusive-OR of the signs (which determines later
; whether we do a magnitude addition or subtraction), RNDexp equal to the
; first operand exponent and Rarith equal to the exponent difference.

        ExpDiff Rtmp,Rarith,OP1sue,OP2sue       ;Get difference and op1 exp.
        EOR     R14,OP1sue,OP2sue               ;Make EOR of signs
        AND     OP1sue,OP1sue,#Sign_bit         ;Isolate prospective result sign
        MOV     RNDexp,Rarith,LSR #32-EIExp_len ;Right-align operand 1 exponent
        BHI     AddSub_Op2Shift
        MOVEQ   Rtmp2,Rtmp                      ;If EQ, Rtmp = Rtmp2 = 0
        BEQ     AddSub_ShiftDone                ; = correct guard/round/sticky

AddSub_Op1Shift

; Operand 1 needs shifting, and so operand 2's exponent is used for the
; result. Rarith currently contains exp1-exp2 = -(shift amount),
; left-aligned.

        RSB     Rarith,Rtmp,#0          ;Get shift amount = exp2 - exp1
        MOV     Rarith,Rarith,LSR #32-EIExp_len ;Right-align exponent difference
        ADD     RNDexp,RNDexp,Rarith    ;Resurrect operand 2 exponent

; Now denormalise (OP1mhi,OP1mlo) with a shift amount of Rarith, putting
; op1 guard/round/sticky bits into Rtmp, op2 guard/round/sticky bits into
; Rtmp2.

        Denorm  OP1mhi,OP1mlo,Rtmp,Rarith,Rtmp2,Rarith
        MOV     Rtmp2,#0                ;Operand 2 guard/round/sticky
        B       AddSub_ShiftDone

AddSub_Op2Shift

; Operand 2 needs shifting, and so we've already selected the correct result
; exponent. Furthermore, Rtmp currently contains exp1-exp2 = shift amount,
; left-aligned. So denormalise (OP2mhi,OP2mlo) with a shift amount of Rtmp,
; putting op1 guard/round/sticky bits into Rtmp, op2 guard/round/sticky bits
; into Rtmp2.

        MOV     Rarith,Rtmp,LSR #32-EIExp_len   ;Right-align exponent difference
        Denorm  OP2mhi,OP2mlo,Rtmp2,Rarith,Rtmp,Rarith
        MOV     Rtmp,#0                 ;Operand 1 guard/round/sticky

AddSub_ShiftDone

; We now have:
;   OP1sue:        Prospective result sign (= operand 1 sign);
;   OP1mhi/OP1mlo: Operand 1 mantissa, possibly shifted;
;   RNDexp:        Prospective result exponent (= MAX(operand exponents));
;   OP2mhi/OP2mlo: Operand 2 mantissa, possibly shifted;
;   Rarith:        Free;
;   Rfpsr:         FPSR;
;   Rtmp:          Operand 1 guard, round and sticky bits;
;   Rins:          Instruction;
;   Rtmp2:         Operand 2 guard, round and sticky bits;
;   Rwp,Rfp,Rsp:   Standard values;
;   R14:           Sign bit indicates magnitude subtraction/NOT addition;
; Now we need to split according to whether we need to do a magnitude
; addition or a magnitude subtraction.

        TST     R14,#Sign_bit
        BNE     AddSub_MagSub

AddSub_MagAdd

; Perform the magnitude addition. Note first that we have no need for a
; guard bit in this case, so we are going to regard the guard/round/sticky
; bits in Rtmp[31/30/29:0] and Rtmp2[31/30/29:0] as simply being
; round/sticky bits in Rtmp[31/30:0] and Rtmp2[31/30:0]. Secondly, note that
; since we know that at least one of Rtmp and Rtmp2 is zero, we can simply
; add these round/sticky bit representations to get the result round/sticky
; representation.

        ADDS    Rarith,Rtmp,Rtmp2       ;Will not in fact generate C=1
        ADCS    OP1mlo,OP1mlo,OP2mlo
        ADCS    OP1mhi,OP1mhi,OP2mhi

; If C=0, we're done. Otherwise, we've got to adjust the exponent, mantissa,
; round and sticky bits.

  IF Interworking :LOR: Thumbing
        LDMCCFD Rsp!,{LR}
        BXCC    LR
  ELSE
        LDMCCFD Rsp!,{PC}
  ENDIF
        ADD     RNDexp,RNDexp,#1
        MOVS    OP1mhi,OP1mhi,RRX
        MOVS    OP1mlo,OP1mlo,RRX
        ORR     Rarith,Rarith,Rarith,LSL #1     ;Sticky receives all of old
        MOV     Rarith,Rarith,RRX               ; round/sticky; round is new
  IF Interworking :LOR: Thumbing
        LDMFD   Rsp!,{LR}
        BX      LR
  ELSE
        LDMFD   Rsp!,{PC}
  ENDIF


AddSub_MagSub

; We need to do a magnitude subtraction of OP2mhi/OP2mlo/Rtmp2 from
; OP1mhi/OP1mlo/Rtmp. The prospective result exponent in RNDexp has been
; made right already, but if the subtraction comes out negative, we will
; have to change the sign of the result. Note we can subtract the
; guard/round/sticky representations in Rtmp and Rtmp2, because we know one
; of them is entirely zero.

        SUBS    Rarith,Rtmp,Rtmp2
        SBCS    OP1mlo,OP1mlo,OP2mlo
        SBCS    OP1mhi,OP1mhi,OP2mhi

; If the subtraction (which was of unsigned numbers) came out negative, we
; need to reverse the sign of the result and 2's complement the mantissa -
; again including the guard/round/sticky part.

        BCS     AddSub_MagSub_Normalise
        EOR     OP1sue,OP1sue,#Sign_bit
        RSBS    Rarith,Rarith,#0
        RSCS    OP1mlo,OP1mlo,#0
        RSC     OP1mhi,OP1mhi,#0

AddSub_MagSub_Normalise

; Now we need to normalise the result. This is slightly tricky, because in
; the case of subtracting the largest possible number with one exponent from
; the smallest number of the next exponent (e.g. 1-(1-2^(-64))), the leading
; bit of the result is actually the round bit. We can divide into two cases:
;
;   (a) The exponent difference was 0 or 1: in this case, the number may be
;       normalised by up to 64 bits, but the current round and sticky bits
;       are guaranteed to be 0 - this ensures that the eventual sticky bit
;       is guaranteed to be zero, and that the round bit is also zero if a
;       non-zero normalisation shift is required;
;
;   (b) The exponent difference was 2 or more: in this case, the number can
;       be normalised by at most one bit, but the eventual sticky bit may be
;       non-zero.
;
; So we will first try to normalise by 1 bit, bringing the guard bit into the
; mantissa if necessary.

        TST     OP1mhi,#EIUnits_bit     ;Already normalised?
  IF Interworking :LOR: Thumbing
        LDMNEFD Rsp!,{LR}               ;Return if so
        BXNE    LR
  ELSE
        LDMNEFD Rsp!,{PC}               ;Return if so
  ENDIF
        ADDS    Rarith,Rarith,Rarith    ;Shift mhi/mlo/guard/round/sticky
        ADCS    OP1mlo,OP1mlo,OP1mlo    ; left by one bit to form new
        ADC     OP1mhi,OP1mhi,OP1mhi    ; mhi/mlo/round/sticky
        SUB     RNDexp,RNDexp,#1

; If the result is normalised now, we're done. Otherwise, we know that a
; normalisation shift of 1-63 is still required, that the exponent
; difference was 0 or 1, and thus that the new round and sticky bits are
; both zero.
;   However, at this point, we need to look out for the case of a magnitude
; subtraction of two equal numbers - for which we need to apply the special
; IEEE sign rule (i.e. -0 if rounding to -infinity, otherwise +0).

        TST     OP1mhi,#EIUnits_bit     ;Normalised now?
  IF Interworking :LOR: Thumbing
        LDMNEFD Rsp!,{LR}               ;Return if so
        BXNE    LR
  ELSE
        LDMNEFD Rsp!,{PC}               ;Return if so
  ENDIF

        ORRS    LR,OP1mhi,OP1mlo        ;Is result zero?
        BLNE    $NormaliseOp1_str       ;If not, complete normalisation
  IF Interworking :LOR: Thumbing
        LDMNEFD Rsp!,{LR}               ; and return (note NormaliseOp1
        BXNE    LR
  ELSE
        LDMNEFD Rsp!,{PC}               ; and return (note NormaliseOp1
  ENDIF


; We know the result is a zero, with sign determined by the rounding mode.
; Everything except the sign and exponent has been correctly set already,
; so we test the rounding mode, set the sign and exponent, and return.

        [ :DEF: addsub_s
          MOV   dOPh, #0
          MOV   dOPl, #0
          ASSERT dOPh = fOP :LOR: dOPl = fOP
 ;         ADD   sp,sp,#4        ; Pop link register off the stack
 ;         VReturn
  IF Interworking :LOR: Thumbing
          LDMFD   Rsp!,{LR}
          BX      LR
  ELSE
          LDMFD Rsp!,{PC}
  ENDIF

        |
          AND   Rtmp,Rins,#RM_mask
          TEQ   Rtmp,#RM_MinusInf
          MOVEQ OP1sue,#Sign_bit
          MOVNE OP1sue,#0
          MOV   RNDexp,#0

  IF Interworking :LOR: Thumbing
          LDMFD Rsp!,{LR}
          BX      LR
  ELSE
          LDMFD Rsp!,{PC}
  ENDIF

 
        ]

        ]                               ; Conditional assembly of AddSub

;===========================================================================

        [ :DEF: mul_s :LOR: FPEWanted :LOR: FPASCWanted

; Routine to multiply or fast-multiply two internal format floating point
; numbers. It has two entry points: "MultFPE", which has an optimised fast
; track for both operands being common, and "MultFPASC", which avoids the
; test for this optimised fast track - since it should never happen. The
; second entry point lies a long way down in the source to avoid addressing
; constraints.
;   The value returned is either a numeric value plus associated rounding
; information, with the uncommon bit clear, or an infinity or NaN, with the
; uncommon bit set.
;   This routine will not work correctly with inputs which are unnormalised
; URD results, or with invalid internal format numbers.
;
; Uses standard dyadic operation entry and exit conventions - see top of
; this file.

        ASSERT  RNDexp = OP2sue ;We swap over from the use of OP2sue to that
                                ; of RNDexp partway through this routine.

        [ FPEWanted

MultFPE

        CDebug3 3,"MultFPE: op1 =",OP1sue,OP1mhi,OP1mlo
        CDebug3 3,"         op2 =",OP2sue,OP2mhi,OP2mlo

; Start by detecting the "fast track" case of both operands being common.

        TST     OP1sue,#Uncommon_bit
        TSTEQ   OP2sue,#Uncommon_bit
        BNE     Mult_Uncommon

; If either operand is a zero, the product is a zero. Because the numbers
; are common and assumed not to be unnormalised URD results, we can check
; for zeros by means of the units bits.

        ANDS    Rtmp,OP1mhi,OP2mhi
        ASSERT  EIUnits_pos = 31
        BPL     Mult_Zero

; Both operands may now be assumed to be normalised numbers. Produce the
; result sign and the prospective result exponent.

        ]

        [ :DEF: mul_s :LOR: FPEWanted

        [ FPLibWanted
__fp_mult_common
        ]

        AND     Rtmp,OP1sue,#ToExp_mask
        AND     Rtmp2,OP2sue,#ToExp_mask
        EOR     OP1sue,OP1sue,OP2sue    ;Produce result sign
        AND     OP1sue,OP1sue,#Sign_bit
        ADD     RNDexp,Rtmp,Rtmp2
        SUB     RNDexp,RNDexp,#(EIExp_bias-1):AND:&FF00
        SUB     RNDexp,RNDexp,#(EIExp_bias-1):AND:&FF
        ASSERT  (EIExp_bias-1) < &10000 ;Result exponent if mantissa
                                        ; overflow is exp1+exp2-bias+1

        ]

; This subsidiary entry point deals with multiplying two normalised
; mantissas together and adjusting the exponent if necessary.
; Entry: OP1sue = the result's sign, with an uncommon bit of 0 - the
;          remaining bits are zero;
;        OP1mhi = First operand mantissa, high word;
;        OP1mlo = First operand mantissa, low word;
;        RNDexp = Prospective result exponent, which may be negative; this
;          needs to be decremented if mantissa overflow doesn't occur;
;        OP2mhi = Second operand mantissa, high word;
;        OP2mlo = Second operand mantissa, low word;
;        Rins   = instruction (may be needed to discriminate between MUF and
;          FML);
;        Rwp, Rfp, Rsp hold their usual values;
;        R14    = return link.
; Exit:  OP1sue = the result's sign, with an uncommon bit of 0; the
;          remaining bits are zero;
;        OP1mhi, OP1mlo = the result's mantissa;
;        RNDexp = the result exponent, which may be negative;
;        Rarith holds the round bit (in bit 31) and the sticky bit (in bits
;          30:0) if the destination precision is extended; if the
;          destination precision is single or double, it holds part of the
;          sticky bit (the remainder of which is held in bits below the
;          round bit in OP1mhi and OP1mlo);
;        OP2mhi, OP2mlo, Rtmp, Rtmp2 and R14 may be corrupt;
;        All other registers preserved.

Mult_Mantissas

; We will split into various lines, depending on the operands:
;
;   if ((OP1mlo = 0) AND (OP2mlo = 0))
;     do 32x32->64 multiplication of OP1mhi by OP2mhi;
;   if ((OP1mlo = 0) AND (OP2mlo != 0))
;     do 32x64->96 multiplication of OP1mhi by (OP2mhi,OP2mlo);
;   if ((OP1mlo != 0) AND (OP2mlo = 0))
;     do 64x32->96 multiplication of (OP1mhi,OP1mlo) by OP2mhi;
;   if ((OP1mlo != 0) AND (OP2mlo != 0))
;     do 64x32->128 multiplication of (OP1mhi,OP1mlo) by (OP2mhi,OP2mlo);
;
; In each case, this is then followed by code to deal with the case of no
; mantissa overflow (i.e. the top bit of the product was zero) and to create
; the round and sticky bits.
;
; This is all designed to make multiplications involving single precision
; numbers, immediate constants and/or FLTed integers as efficient as
; possible.
;
; If the instruction is an FML, we simply assume that both mantissa low
; words are zero.

        [ FPEWanted

        TST     Rins,#Fast_bit
        BNE     Mult_32x32

        ]

        TEQ     OP1mlo,#0
        BEQ     Mult_32xX

Mult_64xX

        TEQ     OP2mlo,#0
        BEQ     Mult_64x32

Mult_64x64

        STMFD   Rsp!,{OP1sue,Rfpsr,Rins,LR}

; We do this multiplication by applying the trick (described in Knuth
; section 4.3.3) for reducing the obvious algorithm involving four 32x32
; multiplications to just three plus some additions and sign manipulations,
; by means of the formula:
;
;   (a1*2^32 + a0) * (b1*2^32 + b0)
;     = a1*b1*(2^64+2^32) + (a1-a0)*(b0-b1)*2^32 + a0*b0*(2^32+1)
;
; This has to be done carefully: the a1*b1 and a0*b0 multiplications are
; straightforward 32x32 multiplications, but each of a1-a0 and b0-b1 is in
; the range -2^32+1 < x < 2^32-1. To see what effect this has, we need to
; look at what we will get if we simply do the a1-a0 and b0-b1 subtractions,
; then multiply the results as unsigned numbers:
;
; (A) If a1-a0 >= 0, b0-b1 >= 0:
;       product obtained = (a1-a0)*(b0-b1)
;
; (B) If a1-a0 >= 0, b0-b1 < 0:
;       product obtained = (a1-a0)*(b0-b1+2^32)
;                        = (a1-a0)*(b0-b1) + (a1-a0)*2^32
;
; (C) If a1-a0 < 0, b0-b1 >= 0:
;       product obtained = (a1-a0+2^32)*(b0-b1)
;                        = (a1-a0)*(b0-b1) + (b0-b1)*2^32
;
; (D) If a1-a0 < 0, b0-b1 < 0:
;       product obtained = (a1-a0+2^32)*(b0-b1+2^32)
;                        = (a1-a0)*(b0-b1) + ((a1-a0)+(b0-b1))*2^32 + 2^64
;                        = (a1-a0)*(b0-b1)
;                            + ((a1-a0+2^32) + (b0-b1+2^32))*2^32 - 2^64
;
; So to get the real value of (a1-a0)*(b0-b1), we must look at the signs of
; a1-a0 and b0-b1: if a1-a0 is in fact negative, we must subtract the
; calculated value of b0-b1 from the high word of the calculated product; if
; b0-b1 is in fact negative, we must subtract the calculated value of a1-a0
; from the high word of the calculated product; and finally we must add 2^64
; if both were negative.
;
; This last step is awkward. However, note that (a1-a0)*(b0-b1) is actually
; guaranteed to lie in the range -2^64 < x < 2^64, which means that it is
; sufficient to calculate its value modulo 2^64 (i.e. disregarding carries
; out of the high word and the possible addition of 2^64), provided we take
; care to get the sign word right.
;
; We do the 32x32 multiplications by means of standard macros. First
; multiply a1*b1 = OP1mhi*OP2mhi into (OP1sue,Rfpsr).

        Split16 OP1sue,Rfpsr,OP1mhi
        Mul64   OP1sue,Rfpsr,OP1sue,Rfpsr,OP2mhi,,,Rarith,Rtmp,Rtmp2

; Multiply a0*b0 = OP1mlo*OP2mlo into (Rins,R14).

        Split16 Rins,R14,OP1mlo
        Mul64   Rins,R14,Rins,R14,OP2mlo,,,Rarith,Rtmp,Rtmp2

; Next, we need to calculate a1*b1*(2^64+2^32) + a0*b0*(2^32+1)
;
;   = (2^32+1) * (a1*b1*2^32 + a0*b0)
;
; Note that a1*b1*2^32 + a0*b0 <= (2^32-1)*(2^32-1)*(2^32+1)
; = (2^32-1)*(2^64-1) < 2^96 and that (2^32+1) * (a1*b1*2^32 + a0*b0)
; <= (2^32+1)*(2^32-1)*(2^32-1)*(2^32+1) = (2^64-1)^2 < 2^128, so the
; calculations can be done respectively in 3- and 4-word unsigned
; arithmetic.

        ADDS    Rfpsr,Rfpsr,Rins        ;Put a1*b1*2^32 + a0*b0 into
        ADC     OP1sue,OP1sue,#0        ; (OP1sue,Rfpsr,R14)
        ADDS    Rins,Rfpsr,R14          ;Then multiply by 2^32+1, putting
        ADCS    Rfpsr,Rfpsr,OP1sue      ; result in (OP1sue,Rfpsr,Rins,R14)
        ADC     OP1sue,OP1sue,#0

; Calculate a1-a0 = OP1mhi-OP1mlo into Rtmp,
;           b0-b1 = OP2mlo-OP2mhi into Rtmp2,
;           addend to high word of calculated (a1-a0)*(b0-b1) product into
;             Rarith, and
;           correct sign of (a1-a0)*(b0-b1) product into OP1mhi.
; The sign word is 0 for a positive or zero result, &FFFFFFFF for a negative
; result - i.e. it is the word which, when prefixed to the 64-bit product
; calculated otherwise, gives us the true result as a 96-bit signed number.
;   Getting this right is slightly tricky, because of the possibilities of
; a1-a0 and b0-b1 being zero and thus invalidating the usual EOR rule about
; the sign. The key to the code below is that if Rtmp = a1-a0 comes out as
; 0, OP1mhi and OP1mlo come out as zero and Rtmp2 never gets set - but this
; last doesn't matter, since zero times anything is zero!
;   Note also that we don't care about carries out of the addend, since they
; go into the sign word, which we are getting right by other means.

        SUBS    Rtmp,OP1mhi,OP1mlo              ;Rtmp := a1-a0
        MOV     OP1mhi,#0                       ;Sign if a1-a0,b0-b1 both +ve
        MOV     Rarith,#0                       ;Addend if both +ve
        MVNLO   OP1mhi,OP1mhi                   ;If a1-a0 -ve, adjust sign and
        SUBLO   Rarith,OP2mhi,OP2mlo            ; addend = -(b0-b1) = b1-b0
        SUBNES  Rtmp2,OP2mlo,OP2mhi             ;Rtmp2 := b0-b1
        MOVEQ   OP1mhi,#0                       ;Override sign if b0-b1 = 0
        MVNLO   OP1mhi,OP1mhi                   ;If b0-b1 -ve, adjust sign and
        SUBLO   Rarith,Rarith,Rtmp              ; addend += -(a1-a0)

; Finish calculating the real value of (a1-a0)*(b0-b1) into
; (OP1mhi,OP1mlo,Rarith). I.e. multiply Rtmp by Rtmp2, adding OP1mlo into the
; high word and putting the result in (OP1mlo,Rarith). OP1mhi is already OK.

        Split16 OP2mhi,OP2mlo,Rtmp
        Mul64   OP1mlo,Rarith,OP2mhi,OP2mlo,Rtmp2,Rarith,,Rtmp,Rtmp2,OP1mlo

; Now add a1*b1*(2^64+2^32) + a0*b0*(2^32+1) and (a1-a0)*(b0-b1)*2^32
; together, putting the result in (OP1mhi,OP1mlo,Rarith,R14). Note the low
; word is in R14 already.

        ADDS    Rarith,Rins,Rarith
        ADCS    OP1mlo,Rfpsr,OP1mlo
        ADCS    OP1mhi,OP1sue,OP1mhi

; Transfer R14 into the sticky bit, without affecting flags. Also make
; certain we don't affect the guard or round bits.

        ORR     R14,R14,R14,LSL #2
        ORR     Rarith,Rarith,R14,LSR #2

; If result is normalised, return. Otherwise normalise by shifting left one
; bit.

  IF Interworking :LOR: Thumbing
        LDMMIFD Rsp!,{OP1sue,Rfpsr,Rins,LR}
        BXMI    LR
  ELSE
        LDMMIFD Rsp!,{OP1sue,Rfpsr,Rins,PC}
  ENDIF

        ADDS    Rarith,Rarith,Rarith
        ADCS    OP1mlo,OP1mlo,OP1mlo
        ADC     OP1mhi,OP1mhi,OP1mhi
        SUB     RNDexp,RNDexp,#1
  IF Interworking :LOR: Thumbing
        LDMFD   Rsp!,{OP1sue,Rfpsr,Rins,LR}
        BX      LR
  ELSE
        LDMFD   Rsp!,{OP1sue,Rfpsr,Rins,PC}
  ENDIF


Mult_64x32

; To perform this multiplication, we do two 32x32 multiplications, then add
; the results together. We use the standard macros for the purpose.

        Split16 OP2mlo,Rarith,OP2mhi
        Mul64   OP2mhi,OP1mhi,OP2mlo,Rarith,OP1mhi,,,Rtmp,Rtmp2,OP2mhi
        Mul64   OP2mlo,Rarith,OP2mlo,Rarith,OP1mlo,,,Rtmp,Rtmp2,OP1mlo
        ADDS    OP1mlo,OP2mlo,OP1mhi
        ADCS    OP1mhi,OP2mhi,#0

; If the top bit was clear, we need to shift the product, round and sticky
; bits left by one bit and decrement the exponent. Otherwise, everything is
; ready for the return.

  IF Interworking :LOR: Thumbing
        BXMI    LR
  ELSE
        MOVMI   PC,LR
  ENDIF

        ADDS    Rarith,Rarith,Rarith
        ADCS    OP1mlo,OP1mlo,OP1mlo
        ADC     OP1mhi,OP1mhi,OP1mhi
        SUB     RNDexp,RNDexp,#1
  IF Interworking :LOR: Thumbing
        BX      LR
  ELSE
        MOV     PC,LR
  ENDIF


Mult_32xX

        TEQ     OP2mlo,#0
        BEQ     Mult_32x32

Mult_32x64

; To perform this multiplication, we do two 32x32 multiplications, then add
; the results together. We use the standard macros for the purpose.

        Split16 OP1mlo,Rarith,OP1mhi
        Mul64   OP1mhi,OP2mhi,OP1mlo,Rarith,OP2mhi,,,Rtmp,Rtmp2,OP1mhi
        Mul64   OP1mlo,Rarith,OP1mlo,Rarith,OP2mlo,,,Rtmp,Rtmp2,OP2mlo
        ADDS    OP1mlo,OP1mlo,OP2mhi
        ADCS    OP1mhi,OP1mhi,#0

; If the top bit was clear, we need to shift the product, round and sticky
; bits left by one bit and decrement the exponent. Otherwise, everything is
; ready for the return.

  IF Interworking :LOR: Thumbing
        BXMI    LR
  ELSE
        MOVMI   PC,LR
  ENDIF

        ADDS    Rarith,Rarith,Rarith
        ADCS    OP1mlo,OP1mlo,OP1mlo
        ADC     OP1mhi,OP1mhi,OP1mhi
        SUB     RNDexp,RNDexp,#1
  IF Interworking :LOR: Thumbing
        BX      LR
  ELSE
        MOV     PC,LR
  ENDIF

        [ FPLibWanted
        KEEP    |$F__fp_mult_fast_common|
|$F__fp_mult_fast_common|
__fp_mult_fast_common
; This code duplicated from about for the fast case.
        AND     Rtmp,OP1sue,#ToExp_mask
        AND     Rtmp2,OP2sue,#ToExp_mask
        EOR     OP1sue,OP1sue,OP2sue    ;Produce result sign
        AND     OP1sue,OP1sue,#Sign_bit
        ADD     RNDexp,Rtmp,Rtmp2
        SUB     RNDexp,RNDexp,#(EIExp_bias-1):AND:&FF00
        SUB     RNDexp,RNDexp,#(EIExp_bias-1):AND:&FF
        ASSERT  (EIExp_bias-1) < &10000 ;Result exponent if mantissa
                                        ; overflow is exp1+exp2-bias+1

        ]

Mult_32x32

; Only the high words of the operand mantissas need to be multiplied
; together. Use the standard macros for this purpose.

        Split16 OP2mlo,Rarith,OP2mhi
        Mul64   OP1mhi,OP1mlo,OP2mlo,Rarith,OP1mhi,,S,Rtmp,Rtmp2,OP1mhi

; The round and sticky bits are always going to be zero.

        MOV     Rarith,#0

; If the top bit was clear, we need to shift the product left one bit and
; decrement the exponent. Otherwise we're done.

  IF Interworking :LOR: Thumbing
        BXMI    LR
  ELSE
        MOVMI   PC,LR
  ENDIF

        ADDS    OP1mlo,OP1mlo,OP1mlo
        ADC     OP1mhi,OP1mhi,OP1mhi
        SUB     RNDexp,RNDexp,#1

  IF Interworking :LOR: Thumbing
        BX      LR
  ELSE
        MOV     PC,LR
  ENDIF

        ]                               ; Conditional compilation of Mult

;===========================================================================

        [ :DEF: div_s :LOR: FPEWanted :LOR: FPASCWanted

; Routine to divide, reverse-divide, fast-divide or fast-reverse-divide two
; internal format floating point numbers. It has two entry points: "DivFPE",
; which has an optimised fast track for both operands being common, and
; "DivFPASC", which avoids the test for this optimised fast track - since it
; should rarely happen. The second entry point lies a long way down in the
; source to avoid addressing constraints.
;   The value returned is either a numeric value plus associated rounding
; information, with the uncommon bit clear, or an infinity or NaN, with the
; uncommon bit set.
;   This routine will not work correctly with inputs which are unnormalised
; URD results, or with invalid internal format numbers.
;
; Uses standard dyadic operation entry and exit conventions - see top of
; this file.

        ASSERT  RNDexp = OP2sue ;We swap over from the use of OP2sue to that
                                ; of RNDexp partway through this routine.

        [ FPEWanted

DivFPE

        CDebug3 3,"DivFPE: op1 =",OP1sue,OP1mhi,OP1mlo
        CDebug3 3,"        op2 =",OP2sue,OP2mhi,OP2mlo

; Start by detecting the "fast track" case of both operands being common.

        TST     OP1sue,#Uncommon_bit
        TSTEQ   OP2sue,#Uncommon_bit
        BNE     Div_Uncommon

; If either operand is a zero, we need to take special action. Because the
; numbers are common and assumed not to be unnormalised URD results, we can
; check for zeros by means of the units bits.

        ANDS    Rtmp,OP1mhi,OP2mhi
        ASSERT  EIUnits_pos = 31
        BPL     Div_Zero

; Both operands may now be assumed to be normalised numbers. We now know
; that we are not going to need to know the operands for trap purposes, so
; we can swap them if this is a normal division rather than a reverse
; division.

        TST     Rins,#RevDiv_bit
        BNE     Div_Common_Swapped

        ]

        [ FPLibWanted
__fp_div_common
        ]

        MOV     Rtmp,OP1sue
        MOV     OP1sue,OP2sue
        MOV     OP2sue,Rtmp
        MOV     Rtmp,OP1mhi
        MOV     OP1mhi,OP2mhi
        MOV     OP2mhi,Rtmp
        MOV     Rtmp,OP1mlo
        MOV     OP1mlo,OP2mlo
        MOV     OP2mlo,Rtmp

        [ FPLibWanted
        KEEP    |$F__fp_rdv_common|
|$F__fp_rdv_common|
__fp_rdv_common
        ]

Div_Common_Swapped

; Produce the result sign and the prospective result exponent.

        AND     Rtmp,OP1sue,#ToExp_mask
        AND     Rtmp2,OP2sue,#ToExp_mask
        EOR     OP1sue,OP1sue,OP2sue    ;Produce result sign
        AND     OP1sue,OP1sue,#Sign_bit
        SUB     RNDexp,Rtmp2,Rtmp
        ADD     RNDexp,RNDexp,#EIExp_bias:AND:&FF00
        ADD     RNDexp,RNDexp,#EIExp_bias:AND:&FF
        ASSERT  EIExp_bias < &10000     ;Result exponent if no mantissa
                                        ; underflow is exp1-exp2+bias

; This subsidiary entry point deals with dividing a normalised mantissa by
; another and adjusting the exponent if necessary.
; Entry: OP1sue = the result's sign, with an uncommon bit of 0 - the
;          remaining bits are zero;
;        OP1mhi = Divisor mantissa, high word;
;        OP1mlo = Divisor mantissa, low word;
;        RNDexp = Prospective result exponent, which may be negative; this
;          needs to be decremented if mantissa underflow occurs;
;        OP2mhi = Dividend mantissa, high word;
;        OP2mlo = Dividend mantissa, low word;
;        Rins   = instruction (needed to determine precision; may be needed
;          to discriminate between normal and fast divisions);
;        Rwp, Rfp, Rsp hold their usual values;
;        R14    = return link.
; Exit:  OP1sue = the result's sign, with an uncommon bit of 0; the
;          remaining bits are zero;
;        OP1mhi, OP1mlo = the result's mantissa;
;        RNDexp = the result exponent, which may be negative;
;        Rarith holds the round bit (in bit 31) and the sticky bit (in bits
;          30:0) if the destination precision is extended; if the
;          destination precision is single or double, it holds part of the
;          sticky bit (the remainder of which is held in bits below the
;          round bit in OP1mhi and OP1mlo);
;        OP2mhi, OP2mlo, Rtmp, Rtmp2 and R14 may be corrupt;
;        All other registers preserved.

Div_Mantissas

        STMFD   Rsp!,{OP1sue,OP2sue,Rfpsr,Rins,LR}

        CDebug2 4,"Div_Mantissas: dividend =",OP2mhi,OP2mlo
        CDebug2 4,"               divisor  =",OP1mhi,OP1mlo
        CDebug1 4,"               exponent =",RNDexp

; We will do the mantissa division by an algorithm which is a hybrid between
; Newton-Raphson approximation and ordinary long division: this results in
; division being done to IEEE accuracy, yet more than 50% faster than the
; straightforward long division technique. A summary of the algorithm is:
;
;   (a) Use table look-up to get an initial approximation to the reciprocal
;       of the divisor;
;
;   (b) Use two iterations of Newton-Raphson to improve the reciprocal
;       approximation to one with about 15 bits accuracy;
;
;   (c) Do long division base 2^13, using the reciprocal approximation to
;       determine the result "digits" - which are in fact fixed point
;       numbers with 13 bits before the binary point and 3 after it;
;
;   (d) Resolve the exact values of the last three bits by ordinary long
;       division;
;
;   (e) Adjust the exponent and shift the mantissa if mantissa underflow
;       occurs, and create the sticky bit.
;
; Exact details of the algorithm appear in comments next to the relevant
; parts of the code below.
;
; The long division is performed for 2 steps for single precision, 4 steps
; for double precision and 5 steps for extended precision, producing 2*13+3
; = 29, 4*13+3 = 55 and 5*13+3 = 68 bits respectively, plus a sticky bit in
; each case.
;
; Note that this algorithm has been specifically tailored to the software
; environment - e.g. the availability of 32x32->32 bit multiplication and
; the fact that negative partial remainders during the long division will
; cause problems. This leads to some apparently strange bits of code below -
; e.g. getting less accuracy from a Newton-Raphson iteration than might
; appear to be available, in order to preserve knowledge of the sign of the
; error.
;
; In what follows, we will refer to the true mathematical value of the
; dividend mantissa as P, that of the divisor as D, that of the reciprocal
; of the divisor as R and that of the quotient as Q. So Q = P/D = P*R are
; exact mathematical relationships. Also, we have P = (2^32*OP1mhi +
; OP1mlo)*2^(-63), D = (2^32*OP2mhi + OP2mlo)*2^(-63).

; First step: initialise by breaking the divisor up into 16-bit chunks,
; held in (OP1sue,Rfpsr,Rins,R14).

        Split16 OP1sue,Rfpsr,OP1mhi
        Split16 Rins,R14,OP1mlo

; Second step: use table look-up to get an approximation to R. Specifically,
; we load Rarith with an 8-bit value such that we know:
;
;   R <= Rarith*2^(-7) < R + 2^(-6)

        [ CoreDebugging = 0
          ADR     Rarith,Recip_Table-128        ;-128 to cancel units bit
        |
          ADRL    Rarith,Recip_Table-128        ;-128 to cancel units bit
        ]
        LDRB    Rarith,[Rarith,OP1sue,LSR #8]

        CDebug1 5,"Table look-up approx'n is",Rarith

; Third step: use a Newton-Raphson iteration to improve this to an 11-bit
; value in Rarith such that:
;
;   R < Rarith*2^(-10) < R + 2^(-9)
;
; Details: Let W be the current value of Rarith, so we have:
;
;   R <= W*2^(-7) < R + 2^(-6)
;
; Let X be the first 16 bits of D (i.e. OP1sue), incremented by 1. This has
; the property that:
;
;   D < X*2^(-15) <= D + 2^(-15)
;
; Suppose further that W*2^(-7) = R+e, with 0 <= e < 2^(-6), and X*2^(-15) =
; D+f, with 0 < f <= 2^(-15).
;
; Now let Y = W * (2^23 - X*W), which is a calculation that can be performed
; without overflowing a word. This is equivalent to:
;
;   Y*2^(-29) = W*2^(-7) * (2 - X*2^(-15) * W*2^(-7))
;
;             = (R+e) * (2 - (D+f)*(R+e))
;
;             = (R+e) * (2 - (1 + D*e + R*f + e*f)), since D*R=1 exactly,
;
;             = (R+e) * (1 - D*e - R*f - e*f)
;
;             = R + e - e - D*e*e - R*R*f - R*e*f - R*e*f - e*e*f, since D*R=1,
;
;             = R - D*e*e - R*R*f - 2*R*e*f - e*e*f
;
; Since R > 0, D > 0, e >= 0 and f > 0, this is clearly less than R. On the
; other hand, we know that R <= 1, D < 2, e < 2^(-6) and f <= 2^(-15). So:
;
;   R > Y*2^(-29)
;     > R - 2^(-11) - 2^(-15) - 2^(-20) - 2^(-27)
;
; Now let Z be Y shifted right 19 bits. This gives us:
;
;   Y*2^(-29) - 2^(-10) < Z*2^(-10) <= Y*2^(-29)
;
; Combining the inequalities, we get:
;
;   R - 2^(-9) < R - 2^(-11) - 2^(-15) - 2^(-20) - 2^(-27) - 2^(-10)
;              < Y*2^(-29) - 2^(-10)
;              < Z*2^(-10)
;              <= Y*2^(-29)
;              < R
;
; So if we put Rarith = Z+2, we get:
;
;   R < Rarith*2^(-10) < R + 2^(-9),
;
; as desired.

        MLA     Rtmp,OP1sue,Rarith,Rarith       ;Rtmp := (X-1)*W + W = X*W
        RSB     Rtmp,Rtmp,#1:SHL:23             ;Rtmp := 2^23 - X*W
        MUL     Rarith,Rtmp,Rarith              ;Rarith := W*(2^23 - X*W) = Y
        MOV     Rarith,Rarith,LSR #19           ;Shift right 19 bits and add
        ADD     Rarith,Rarith,#2                ; 2 to get new approximation

        CDebug1 5,"First N-R approx'n is",Rarith

; Fourth step: use a Newton-Raphson iteration to improve this to a 16-bit
; value in Rarith such that:
;
;   R - 2^(-15) < Rarith*2^(-16) < R
;
; Details: Let W be the current value of Rarith, so we have:
;
;   R < W*2^(-10) < R + 2^(-9)
;
; Let X be the first 19 bits of D (i.e. the top 19 bits of OP1mhi),
; incremented by 1. This has the property that:
;
;   D < X*2^(-18) <= D + 2^(-18)
;
; Suppose further that W*2^(-10) = R+e, with 0 < e < 2^(-9), and X*2^(-18)
; = D+f, with 0 < f <= 2^(-18).
;
; Now let Y = W * (2^29 - X*W): part of this calculation will require 2-word
; arithmetic. This is equivalent to:
;
;   Y*2^(-38) = W*2^(-10) * (2 - X*2^(-18) * W*2^(-10))
;
;             = (R+e) * (2 - (D+f)*(R+e))
;
;             = R - D*e*e - R*R*f - 2*R*e*f - e*e*f, as in the third step.
;
; Since R > 0, D > 0, e >= 0 and f > 0, this is clearly less than R. On the
; other hand, we know that R <= 1, D < 2, e < 2^(-9) and f <= 2^(-18). So:
;
;   R > Y*2^(-38)
;     > R - 2^(-17) - 2^(-18) - 2^(-26) - 2^(-36)
;
; Now let Z be Y shifted right 22 bits. This gives us:
;
;   Y*2^(-38) - 2^(-16) < Z*2^(-16) <= Y*2^(-38)
;
; Combining the inequalities, we get:
;
;   R - 2^(-15) < R - 2^(-17) - 2^(-18) - 2^(-26) - 2^(-36) - 2^(-16)
;               < Y*2^(-38) - 2^(-16)
;               < Z*2^(-16)
;               <= Y*2^(-38)
;               < R
;
; So if we put Rarith = Z, we get the desired inequality.

        MOV     Rtmp,OP1mhi,LSR #13             ;Rtmp := X-1
        MLA     Rtmp2,Rtmp,Rarith,Rarith        ;Rtmp2 := (X-1)*W + W = X*W
        RSB     Rtmp2,Rtmp2,#1:SHL:29           ;Rtmp2 := 2^29 - X*W
        Split16 Rtmp,Rtmp2,Rtmp2                ;Rtmp/Rtmp2 := top/bottom half
        MUL     OP1mlo,Rtmp2,Rarith             ;OP1mhi, OP1mlo := two
        MUL     OP1mhi,Rtmp,Rarith              ; parts of product with W
        ADD     Rarith,OP1mhi,OP1mlo,LSR #16    ;Rarith := Y >> 16
        MOV     Rarith,Rarith,LSR #6            ;Rarith := Y >> 22

        CDebug1 5,"Second N-R approx'n is",Rarith

; Fifth step: initialise the partial remainder - its binary point lies to
; the right of bit 30 of its top word to line up well with the results of
; later multiplications.

        MOVS    OP2mhi,OP2mhi,LSR #1
        MOVS    OP2mlo,OP2mlo,RRX
        MOVCC   OP2sue,#0
        MOVCS   OP2sue,#TopBit

; Sixth step: do the first iteration of the long division process. The
; register allocation during this is:
;
; OP1sue, Rfpsr, Rins, R14: Divisor, in 16-bit chunks; its binary point is
;                           considered to lie to the right of bit 15 of
;                           OP1sue;
; OP1mhi, OP1mlo:           Quotient so far (Rarith joins into this near the
;                           end of the calculation); its binary point is
;                           considered to lie to the right of bit 31 of
;                           OP1mhi;
; OP2mhi, OP2mlo, OP2sue:   Partial remainder; its binary point is
;                           considered to lie to the right of bit 30 of
;                           OP2mhi;
; Rarith:                   16-bit reciprocal approximation, until near the
;                           end of the calculation; its binary point lies to
;                           the *left* of bit 15;
; Rtmp, Rtmp2:              Temporaries.
;
; Some of these registers (OP1mhi and OP1mlo) only become set some way into
; the calculation: until they do become set, they should be regarded as
; being 0.
;
; The details of iteration N (for N=0 to 4) of the long division process
; are:
;
; Let D be the divisor represented by (OP1sue,Rfpsr,Rins,R14), and let R =
; 1/D be its reciprocal. Let A be the reciprocal approximation represented
; by Rarith from now until near the end of the calculation - i.e. A =
; Rarith*2^(-16). We know that:
;
;   1 <= D < 2;
;   0.5 < R <= 1;
;   R-2^(-15) < A < R
;
; Let Q[N] be the quotient represented by those of OP1mhi, OP1mlo and Rarith
; that have become set at the end of iteration N-1/start of iteration N -
; i.e.:
;
;   Q[0]      = 0;
;   Q[1],Q[2] = (OP1mhi at appropriate time) * 2^(-31);
;   Q[3],Q[4] = (OP1mhi at appropriate time) * 2^(-31)
;               + (OP1mlo at appropriate time) * 2^(-63);
;   Q[5]      = (OP1mhi at appropriate time) * 2^(-31)
;               + (OP1mlo at appropriate time) * 2^(-63)
;               + (Rarith at appropriate time) * 2^(-95);
;
; Let P[N] be the partial remainder represented by those of OP2mhi, OP2mlo
; and OP2sue that have become set at the end of iteration N-1/start of
; iteration N - i.e.:
;
;   P[i] = (OP2mhi at appropriate time) * 2^(-30)
;          + (OP2mlo at appropriate time) * 2^(-62)
;          + (OP2sue at appropriate time) * 2^(-94);
;
; Finally, let P be the original dividend - i.e. P is the current value of
; OP2mhi*2^(-31) + OP2mlo*2^(-63).
;
; For i=0, we can clearly make the following three statements:
;
;   (a) Q[i] is a multiple of 2^(-13*i-2);
;
;   (b) P[i] is a multiple of 2^(-65);
;
;   (c) P = Q[i]*D + P[i]*2^(-13*i);
;
;   (d) 0 < P[i] < 2;
;
; since Q[0] = 0 and P[0] = P. The algorithm will result in the same
; statements being true for i = 1, 2, 3, 4 and 5 as well.
;
; Iteration i of the algorithm is:
;
;   Papprox = P[i], rounded down to a multiple of 2^(-15);
;   digit   = Papprox * A, rounded down to a multiple of 2^(-15);
;   P[i+1]  = (P[i] - digit*D) * 2^13
;   Q[i+1]  = Q[i] + digit*2^(-13*i)
;
; Proof that the three statements above are true for all i: we will do this
; by induction. We already know that they are true for i=0. So suppose they
; are true for i=N. Then:
;
; (a) Q[i+1] = Q[i] + digit*2^(-13*i)
;            = (multiple of 2^(-13*i-2)) + (multiple of 2^(-15))*2^(-13*i)
;            = multiple of 2^(-13*i-15)
;            = multiple of 2^(-13*(i+1)-2).
;
; (b) P[i+1] = (P[i] - digit*D) * 2^13
;            = 2^13 * (multiple of 2^(-65)
;                      - (multiple of 2^(-15)) * (multiple of 2^(-63)))
;            = multiple of 2^(-65).
;
; (c) P = Q[i]*D + P[i]*2^(-13*i)
;       = (Q[i+1] - digit*2^(-13*i)) * D
;         + (P[i+1]*2^(-13) + digit*D) * 2^(-13*i)
;       = Q[i+1]*D + P[i+1]*2^(-13*i-13)
;       = Q[i+1]*D + P[i+1]*2^(-13*(i+1)).
;
; (d) First, since Papprox = P[i] rounded down to a multiple of 2^(-15) and
;     R-2^(-15) < A < R, we have Papprox = P[i]-e and A = R-f, where 0 <= e
;     < 2^(-15) and 0 < f < 2^(-15). Then, since digit = Papprox * A rounded
;     down to a multiple of 2^(-15), we have digit = Papprox * A - g, where
;     0 <= g < 2^(-15). Putting these together, we have:
;
;       digit = (P[i]-e)*(R-f) - g
;             = P[i]*R - P[i]*f - e*R + e*f - g
;
;     Since everything is non-negative, 'digit' is clearly at most P[i]*R.
;     Conversely, since P[i] < 2, R <= 1, e < 2^(-15), f < 2^(-15) and g <
;     2^(-15), we have:
;
;       P[i]*R > digit
;              > P[i]*R - 2*2^(-15) - 2^(-15)*1 - 2^(-15)
;              = P[i]*R - 2^(-13)
;
;     Or:
;
;       0 < P[i]*R - digit < 2^(-13)
;
;     Multiplying by D, which is known to satisfy 1 <= D < 2:
;
;       0 < P[i] - digit*D < 2^(-12)
;
;     Multiplying by 2^(13):
;
;       0 < P[i+1] < 2
;
; Notes:
;
; (1) The subtraction to create P[i] is done by subtracting the four 16x16
;     products formed from the digit and the 16-bit chunks of the divisor
;     from the partial remainder. Two of these 32-bit products are aligned
;     with the partial remainder and thus don't cause any problems. The
;     other two are both mis-aligned by 16 bits. One way to subtract them
;     would be to do a double word shift on them and subtract the results
;     from the partial remainder: this takes 2 instructions to form the
;     central shifted word and 3 for the subtraction (two of which are
;     "shift and subtracts"). However, this makes use of one register more
;     than we have. So the code below makes use of a trick, based on the
;     fact that if we subtract the top 16 bits and the bottom 16 bits of the
;     central shifted word separately, only one of the subtractions can
;     cause a borrow. So if we've got a borrow after the first one, we do
;     the second one without setting the condition codes, knowing that it
;     won't cause a borrow; if we don't, we set the condition codes on the
;     result of the second subtraction.
;
; (2) The multiplication operands are generally ordered to maximise the
;     chance of early termination. This means that all but the top chunk of
;     the divisor are good second operands to the multiplication, the digit
;     is next best, and the top chunk of the divisor is the least good.
;
; (3) The above is in fact not exactly true, due to the fact that it saves
;     some cycles not to shift P[1] and P[3] left by 13 bits, but to wait
;     until P[2] and P[4] are generated, then shift them left 26 bits.

        MOV     Rtmp,OP2mhi,LSR #15     ;Rtmp := Papprox
        MUL     Rtmp2,Rarith,Rtmp       ;Rtmp2 := Papprox * A
        MOV     Rtmp2,Rtmp2,LSR #16     ;Rtmp2 := digit
        MUL     Rtmp,Rtmp2,Rins         ;Subtract digit*D from P[0] to
        SUBS    OP2mlo,OP2mlo,Rtmp      ; form P[1]*2^(-13) - this requires
        MUL     Rtmp,OP1sue,Rtmp2       ; 4 multiplications and subtractions
        SBC     OP2mhi,OP2mhi,Rtmp      ; at various alignments
        MUL     Rtmp,Rtmp2,R14
        SUBS    OP2sue,OP2sue,Rtmp,LSL #16
        SBCS    OP2mlo,OP2mlo,Rtmp,LSR #16
        MUL     Rtmp,Rtmp2,Rfpsr
        SUBCC   OP2mlo,OP2mlo,Rtmp,LSL #16  ;Already got a borrow
        SUBCSS  OP2mlo,OP2mlo,Rtmp,LSL #16  ;No borrow yet - try for one
        SBC     OP2mhi,OP2mhi,Rtmp,LSR #16
        MOV     OP1mhi,Rtmp2,LSL #16    ;OP1mhi := Q[1]

        CDebug1 5,"1st iter'n: quotient so far =",OP1mhi
        CDebug3 5,"          partial remainder =",OP2mhi,OP2mlo,OP2sue

; Seventh step: second iteration. At the end of this step, we check whether
; the multiplication is single precision and branch out to termination code
; if so.

        MOV     Rtmp,OP2mhi,LSR #2      ;Rtmp := Papprox
        MUL     Rtmp2,Rarith,Rtmp       ;Rtmp2 := Papprox * A
        MOV     Rtmp2,Rtmp2,LSR #16     ;Rtmp2 := digit
        MUL     Rtmp,Rtmp2,Rins         ;Subtract digit*D from P[1]*2^(-13)
        SUBS    OP2sue,OP2sue,Rtmp,LSL #19  ; to form P[2]*2^(-26) - this
        SBCS    OP2mlo,OP2mlo,Rtmp,LSR #13  ; requires 4 multiplications and
        MUL     Rtmp,OP1sue,Rtmp2       ; subtractions at various alignments
        SUBCC   OP2mlo,OP2mlo,Rtmp,LSL #19  ;Already got a borrow
        SUBCSS  OP2mlo,OP2mlo,Rtmp,LSL #19  ;No borrow yet - try for one
        SBC     OP2mhi,OP2mhi,Rtmp,LSR #13
        MUL     Rtmp,Rtmp2,R14
        SUBS    OP2sue,OP2sue,Rtmp,LSL #3
        SBCS    OP2mlo,OP2mlo,Rtmp,LSR #29
        MUL     Rtmp,Rtmp2,Rfpsr
        SUBCC   OP2mlo,OP2mlo,Rtmp,LSL #3   ;Already got a borrow
        SUBCSS  OP2mlo,OP2mlo,Rtmp,LSL #3   ;No borrow yet - try for one
        SBC     OP2mhi,OP2mhi,Rtmp,LSR #29
        MOV     OP2mhi,OP2mhi,LSL #26   ;Shift by 26 bits to form P[2]
        ORR     OP2mhi,OP2mhi,OP2mlo,LSR #6
        MOV     OP2mlo,OP2mlo,LSL #26
        ORR     OP2mlo,OP2mlo,OP2sue,LSR #6
        MOV     OP2sue,OP2sue,LSL #26
        ADD     OP1mhi,OP1mhi,Rtmp2,LSL #3  ;OP1mhi := Q[2]

        CDebug1 5,"2nd iter'n: quotient so far =",OP1mhi
        CDebug3 5,"          partial remainder =",OP2mhi,OP2mlo,OP2sue

        LDR     Rtmp,[Rsp,#12]          ;Recover instruction

        [ FPEWanted :LOR: FPASCWanted

        TST     Rtmp,#Pr1_mask          ;Check for single precision
        TSTEQ   Rtmp,#Pr2_mask
        BEQ     Div_Single

        |

        TST     Rtmp,#Single_mask       ;Use a simpler encoding
        BNE     Div_Single

        ]

; Eighth step: third iteration.

        MOV     Rtmp,OP2mhi,LSR #15     ;Rtmp := Papprox
        MUL     Rtmp2,Rarith,Rtmp       ;Rtmp2 := Papprox * A
        MOV     Rtmp2,Rtmp2,LSR #16     ;Rtmp2 := digit
        MUL     Rtmp,Rtmp2,Rins         ;Subtract digit*D from P[2] to
        SUBS    OP2mlo,OP2mlo,Rtmp      ; form P[3]*2^(-13) - this requires
        MUL     Rtmp,OP1sue,Rtmp2       ; 4 multiplications and subtractions
        SBC     OP2mhi,OP2mhi,Rtmp      ; at various alignments
        MUL     Rtmp,Rtmp2,R14
        SUBS    OP2sue,OP2sue,Rtmp,LSL #16
        SBCS    OP2mlo,OP2mlo,Rtmp,LSR #16
        MUL     Rtmp,Rtmp2,Rfpsr
        SUBCC   OP2mlo,OP2mlo,Rtmp,LSL #16  ;Already got a borrow
        SUBCSS  OP2mlo,OP2mlo,Rtmp,LSL #16  ;No borrow yet - try for one
        SBC     OP2mhi,OP2mhi,Rtmp,LSR #16
        MOV     OP1mlo,Rtmp2,LSL #22    ;(OP1mhi,OP1mlo) := Q[3]
        ADD     OP1mhi,OP1mhi,Rtmp2,LSR #10

        CDebug2 5,"3rd iter'n: quotient so far =",OP1mhi,OP1mlo
        CDebug3 5,"          partial remainder =",OP2mhi,OP2mlo,OP2sue

; Ninth step: fourth iteration. At the end of this step, we check whether
; the multiplication is double precision and branch out to termination code
; if so.

        MOV     Rtmp,OP2mhi,LSR #2      ;Rtmp := Papprox
        MUL     Rtmp2,Rarith,Rtmp       ;Rtmp2 := Papprox * A
        MOV     Rtmp2,Rtmp2,LSR #16     ;Rtmp2 := digit
        MUL     Rtmp,Rtmp2,Rins         ;Subtract digit*D from P[3]*2^(-13)
        SUBS    OP2sue,OP2sue,Rtmp,LSL #19  ; to form P[4]*2^(-26) - this
        SBCS    OP2mlo,OP2mlo,Rtmp,LSR #13  ; requires 4 multiplications and
        MUL     Rtmp,OP1sue,Rtmp2       ; subtractions at various alignments
        SUBCC   OP2mlo,OP2mlo,Rtmp,LSL #19  ;Already got a borrow
        SUBCSS  OP2mlo,OP2mlo,Rtmp,LSL #19  ;No borrow yet - try for one
        SBC     OP2mhi,OP2mhi,Rtmp,LSR #13
        MUL     Rtmp,Rtmp2,R14
        SUBS    OP2sue,OP2sue,Rtmp,LSL #3
        SBCS    OP2mlo,OP2mlo,Rtmp,LSR #29
        MUL     Rtmp,Rtmp2,Rfpsr
        SUBCC   OP2mlo,OP2mlo,Rtmp,LSL #3   ;Already got a borrow
        SUBCSS  OP2mlo,OP2mlo,Rtmp,LSL #3   ;No borrow yet - try for one
        SBC     OP2mhi,OP2mhi,Rtmp,LSR #29
        MOV     OP2mhi,OP2mhi,LSL #26   ;Shift by 26 bits to form P[4]
        ORR     OP2mhi,OP2mhi,OP2mlo,LSR #6
        MOV     OP2mlo,OP2mlo,LSL #26
        ORR     OP2mlo,OP2mlo,OP2sue,LSR #6
        MOV     OP2sue,OP2sue,LSL #26
        ADDS    OP1mlo,OP1mlo,Rtmp2,LSL #9  ;(OP1mhi,OP1mlo) := Q[4]
        ADC     OP1mhi,OP1mhi,#0

        CDebug2 5,"4th iter'n: quotient so far =",OP1mhi,OP1mlo
        CDebug3 5,"          partial remainder =",OP2mhi,OP2mlo,OP2sue

        LDR     Rtmp,[Rsp,#12]          ;Recover instruction

        [ FPEWanted :LOR: FPASCWanted

        TST     Rtmp,#Pr1_mask          ;Check for double precision
        BEQ     Div_Double

        |

        TST     Rtmp,#Double_mask
        BNE     Div_Double

        ]

; Tenth step: fifth iteration. We can enter the extended precision
; termination code at the end of this iteration, since we know it must be an
; extended precision division.

        MOV     Rtmp,OP2mhi,LSR #15     ;Rtmp := Papprox
        MUL     Rtmp2,Rarith,Rtmp       ;Rtmp2 := Papprox * A
        MOV     Rtmp2,Rtmp2,LSR #16     ;Rtmp2 := digit
        MUL     Rtmp,Rtmp2,Rins         ;Subtract digit*D from P[4] to
        SUBS    OP2mlo,OP2mlo,Rtmp      ; form P[5]*2^(-13) - this requires
        MUL     Rtmp,OP1sue,Rtmp2       ; 4 multiplications and subtractions
        SBC     OP2mhi,OP2mhi,Rtmp      ; at various alignments
        MUL     Rtmp,Rtmp2,R14
        SUBS    OP2sue,OP2sue,Rtmp,LSL #16
        SBCS    OP2mlo,OP2mlo,Rtmp,LSR #16
        MUL     Rtmp,Rtmp2,Rfpsr
        SUBCC   OP2mlo,OP2mlo,Rtmp,LSL #16  ;Already got a borrow
        SUBCSS  OP2mlo,OP2mlo,Rtmp,LSL #16  ;No borrow yet - try for one
        SBC     OP2mhi,OP2mhi,Rtmp,LSR #16
        MOV     OP2mhi,OP2mhi,LSL #14   ;Shift by 14 bits to form 2*P[5]
        ORR     OP2mhi,OP2mhi,OP2mlo,LSR #18
        MOV     OP2mlo,OP2mlo,LSL #14
        ORR     OP2mlo,OP2mlo,OP2sue,LSR #18
        MOV     OP2sue,OP2sue,LSL #14
        MOV     Rarith,Rtmp2,LSL #28    ;(OP1mhi,OP1mlo,Rarith) := Q[5]
        ADDS    OP1mlo,OP1mlo,Rtmp2,LSR #4
        ADC     OP1mhi,OP1mhi,#0

        CDebug3 5,"5th iter'n: quotient so far =",OP1mhi,OP1mlo,Rarith
        CDebug3 5,"          partial remainder =",OP2mhi,OP2mlo,OP2sue

Div_Extended

; We've completed the main work for an extended precision division. We've
; now got the divisor D in (OP1sue,Rfpsr,Rins,R14), the quotient Q[5] in
; (OP1mhi,OP1mlo,Rarith) and twice the partial remainder P[5] in
; (OP2mhi,OP2mlo,OP2sue) such that:
;
;   (a) Q[5] is a multiple of 2^(-67);
;
;   (b) P[5] is a multiple of 2^(-65);
;
;   (c) P = Q[5]*D + P[5]*2^(-65);
;
;   (d) 0 < P[5] < 2;
;
; The main problem with this is that P[5]*2^(-65) may be almost 2^(-64),
; while Q[5] is a multiple of 2^(-67). To know the correct IEEE answer, we
; have to make the partial remainder be less than the "quantum" in the
; quotient - i.e. less than 2^(-67) in this case. Without doing this, we
; can't calculate the sticky bit accurately: we know that a non-zero partial
; remainder at this point represents a string of quotient bits which are not
; all zero, but if they overlap the quotient bits we've already calculated,
; we don't know whether adding the bits together in the area of overlap
; would result in a string of all zero bits and thus a sticky bit of 0.
;
; We deal with this by doing three bits worth of ordinary long division. To
; save on multi-word additions and problems about carry flag use, we put the
; bits calculated into R14 and only add them into the quotient once at the
; end.
;
; Note that generating twice P[5] above with the binary point to the right
; of bit 30 of OP2mhi is equivalent to generating P[5] with the binary point
; to the right of bit 31 - i.e. to generating it in the position we want it
; to be for the code that follows. This is a trick we only use for extended
; precision, since for the other precisions, we need to be ready for another
; iteration of the algorithm above as well as for the termination code.

        ORR     OP1sue,Rfpsr,OP1sue,LSL #16     ;Reform divisor
        ORR     Rfpsr,R14,Rins,LSL #16

        MOV     R14,#0                  ;Initialise extra bits

        SUBS    Rtmp2,OP2mlo,Rfpsr      ;First extra bit: trial subtraction
        SBCS    Rtmp,OP2mhi,OP1sue      ; of divisor from partial remainder
        MOVCS   OP2mlo,Rtmp2            ;If bit is 1, really do subtraction
        MOVCS   OP2mhi,Rtmp
        ADC     R14,R14,R14             ;Accumulate bit

        MOV     Rins,#0                 ;Initialise overflow word
        ADDS    OP2sue,OP2sue,OP2sue    ;Second extra bit: shift partial
        ADCS    OP2mlo,OP2mlo,OP2mlo    ; remainder
        ADCS    OP2mhi,OP2mhi,OP2mhi
        ADC     Rins,Rins,Rins
        SUBS    Rtmp2,OP2mlo,Rfpsr      ;Trial subtraction of divisor
        SBCS    Rtmp,OP2mhi,OP1sue      ; from partial remainder
        SBCS    Rins,Rins,#0
        MOVCS   OP2mlo,Rtmp2            ;If bit is 1, really do subtraction
        MOVCS   OP2mhi,Rtmp
        ADC     R14,R14,R14             ;Accumulate bit

        MOV     Rins,#0                 ;Initialise overflow word
        ADDS    OP2sue,OP2sue,OP2sue    ;Third extra bit: shift partial
        ADCS    OP2mlo,OP2mlo,OP2mlo    ; remainder
        ADCS    OP2mhi,OP2mhi,OP2mhi
        ADC     Rins,Rins,Rins
        SUBS    Rtmp2,OP2mlo,Rfpsr      ;Trial subtraction of divisor
        SBCS    Rtmp,OP2mhi,OP1sue      ; from partial remainder
        SBCS    Rins,Rins,#0
        MOVCS   OP2mlo,Rtmp2            ;If bit is 1, really do subtraction
        MOVCS   OP2mhi,Rtmp
        ADC     R14,R14,R14             ;Accumulate bit

        CDebug1 5,"Extra bits to add in are",R14

; (OP1mhi,OP1mlo,Rarith) now contains 68 bits of quotient, R14 three extra
; bits that need to be added into its low end and (OP2mhi,OP2mlo) the final
; partial remainder. (We've shifted all the extra bits out of OP2sue, and the
; overflow word Rins must be zero at this point.)
;   This is enough bits to provide guard and round bits, plus 2 bits
; contributing to the sticky bit and enough information to complete
; generating it. We will finish generating it by setting bit 0 of Rarith if
; the partial remainder is non-zero.

        ORRS    Rtmp,OP2mhi,OP2mlo
        ORRNE   Rarith,Rarith,#1

; Now add the three extra bits into the quotient and test for mantissa
; underflow.

        ADDS    Rarith,Rarith,R14,LSL #28   ;Add extra bits into quotient
        ADCS    OP1mlo,OP1mlo,#0
        ADCS    OP1mhi,OP1mhi,#0

; If no mantissa underflow, we're ready to return. Otherwise, we must
; recover the spilled registers (to get hold of the result exponent), shift
; the mantissa left one bit, decrement the exponent and return.

  IF Interworking :LOR: Thumbing
        LDMMIFD Rsp!,{OP1sue,OP2sue,Rfpsr,Rins,LR}
        BXMI    LR
  ELSE
        LDMMIFD Rsp!,{OP1sue,OP2sue,Rfpsr,Rins,PC}
  ENDIF

        LDMFD   Rsp!,{OP1sue,OP2sue,Rfpsr,Rins,LR}
        ADDS    Rarith,Rarith,Rarith
        ADCS    OP1mlo,OP1mlo,OP1mlo
        ADC     OP1mhi,OP1mhi,OP1mhi
        SUB     OP2sue,OP2sue,#1
  IF Interworking :LOR: Thumbing
        BX      LR
  ELSE
        MOV     PC,LR
  ENDIF

Div_Double

; We've completed the main work for a double precision division. We've now
; got the divisor D in (OP1sue,Rfpsr,Rins,R14), the quotient Q[4] in
; (OP1mhi,OP1mlo,Rarith) and the partial remainder P[4] in
; (OP2mhi,OP2mlo,OP2sue) such that:
;
;   (a) Q[4] is a multiple of 2^(-54);
;
;   (b) P[4] is a multiple of 2^(-65);
;
;   (c) P = Q[4]*D + P[4]*2^(-52);
;
;   (d) 0 < P[4] < 2;
;
; The main problem with this is that P[4]*2^(-52) may be almost 2^(-51),
; while Q[4] is a multiple of 2^(-54). To know the correct IEEE answer, we
; have to make the partial remainder be less than the "quantum" in the
; quotient - i.e. less than 2^(-54) in this case. Without doing this, we
; can't calculate the sticky bit accurately: we know that a non-zero partial
; remainder at this point represents a string of quotient bits which are not
; all zero, but if they overlap the quotient bits we've already calculated,
; we don't know whether adding the bits together in the area of overlap
; would result in a string of all zero bits and thus a sticky bit of 0.
;
; We deal with this by doing three bits worth of ordinary long division. To
; save on multi-word additions and problems about carry flag use, we put the
; bits calculated into R14 and only add them into the quotient once at the
; end.

        ORR     OP1sue,Rfpsr,OP1sue,LSL #16     ;Reform divisor
        ORR     Rfpsr,R14,Rins,LSL #16

        MOV     R14,#0                  ;Initialise extra bits

        ADDS    OP2sue,OP2sue,OP2sue    ;First extra bit: shift partial
        ADCS    OP2mlo,OP2mlo,OP2mlo    ; remainder
        ADC     OP2mhi,OP2mhi,OP2mhi
        SUBS    Rtmp2,OP2mlo,Rfpsr      ;Trial subtraction of divisor from
        SBCS    Rtmp,OP2mhi,OP1sue      ; partial remainder
        MOVCS   OP2mlo,Rtmp2            ;If bit is 1, really do subtraction
        MOVCS   OP2mhi,Rtmp
        ADC     R14,R14,R14             ;Accumulate bit

        MOV     Rins,#0                 ;Initialise overflow word
        ADDS    OP2sue,OP2sue,OP2sue    ;Second extra bit: shift partial
        ADCS    OP2mlo,OP2mlo,OP2mlo    ; remainder
        ADCS    OP2mhi,OP2mhi,OP2mhi
        ADC     Rins,Rins,Rins
        SUBS    Rtmp2,OP2mlo,Rfpsr      ;Trial subtraction of divisor
        SBCS    Rtmp,OP2mhi,OP1sue      ; from partial remainder
        SBCS    Rins,Rins,#0
        MOVCS   OP2mlo,Rtmp2            ;If bit is 1, really do subtraction
        MOVCS   OP2mhi,Rtmp
        ADC     R14,R14,R14             ;Accumulate bit

        MOV     Rins,#0                 ;Initialise overflow word
        ADDS    OP2sue,OP2sue,OP2sue    ;Third extra bit: shift partial
        ADCS    OP2mlo,OP2mlo,OP2mlo    ; remainder
        ADCS    OP2mhi,OP2mhi,OP2mhi
        ADC     Rins,Rins,Rins
        SUBS    Rtmp2,OP2mlo,Rfpsr      ;Trial subtraction of divisor
        SBCS    Rtmp,OP2mhi,OP1sue      ; from partial remainder
        SBCS    Rins,Rins,#0
        MOVCS   OP2mlo,Rtmp2            ;If bit is 1, really do subtraction
        MOVCS   OP2mhi,Rtmp
        ADC     R14,R14,R14             ;Accumulate bit

        CDebug1 5,"Extra bits to add in are",R14

; (OP1mhi,OP1mlo) now contains 55 bits of quotient, R14 three extra bits
; that need to be added into its low end and (OP2mhi,OP2mlo) the final
; partial remainder. (We've shifted all the extra bits out of OP2sue, and
; the overflow word Rins must be zero at this point.)
;   This is enough bits to provide guard and round bits, plus enough
; information to generate the sticky bit. We do this by setting Rarith to
; zero if the partial remainder is zero, non-zero if the partial remainder
; is non-zero. Note that since we know rounding will take place to double
; precision, we don't mind having the sticky bit overflow into the extended
; precision round bit.

        ORR     Rarith,OP2mhi,OP2mlo

; Now add the three extra bits into the quotient and test for mantissa
; underflow.

        ADDS    OP1mlo,OP1mlo,R14,LSL #9    ;Add extra bits into quotient
        ADCS    OP1mhi,OP1mhi,#0

; If no mantissa underflow, we're ready to return. Otherwise, we must
; recover the spilled registers (to get hold of the result exponent), shift
; the mantissa left one bit, decrement the exponent and return.

  IF Interworking :LOR: Thumbing
        LDMMIFD Rsp!,{OP1sue,OP2sue,Rfpsr,Rins,LR}
        BXMI    LR
  ELSE
        LDMMIFD Rsp!,{OP1sue,OP2sue,Rfpsr,Rins,PC}
  ENDIF

        LDMFD   Rsp!,{OP1sue,OP2sue,Rfpsr,Rins,LR}
        ADDS    OP1mlo,OP1mlo,OP1mlo
        ADC     OP1mhi,OP1mhi,OP1mhi
        SUB     OP2sue,OP2sue,#1
  IF Interworking :LOR: Thumbing
        BX      LR
  ELSE
        MOV     PC,LR
  ENDIF

Div_Single

; We've completed the main work for a single precision division. We've now
; got the divisor D in (OP1sue,Rfpsr,Rins,R14), the quotient Q[2] in
; (OP1mhi,OP1mlo,Rarith) and the partial remainder P[2] in
; (OP2mhi,OP2mlo,OP2sue) such that:
;
;   (a) Q[2] is a multiple of 2^(-28);
;
;   (b) P[2] is a multiple of 2^(-65);
;
;   (c) P = Q[2]*D + P[2]*2^(-26);
;
;   (d) 0 < P[2] < 2;
;
; The main problem with this is that P[2]*2^(-26) may be almost 2^(-25),
; while Q[2] is a multiple of 2^(-28). To know the correct IEEE answer, we
; have to make the partial remainder be less than the "quantum" in the
; quotient - i.e. less than 2^(-28) in this case. Without doing this, we
; can't calculate the sticky bit accurately: we know that a non-zero partial
; remainder at this point represents a string of quotient bits which are not
; all zero, but if they overlap the quotient bits we've already calculated,
; we don't know whether adding the bits together in the area of overlap
; would result in a string of all zero bits and thus a sticky bit of 0.
;
; We deal with this by doing three bits worth of ordinary long division.

        ORR     OP1sue,Rfpsr,OP1sue,LSL #16     ;Reform divisor
        ORR     Rfpsr,R14,Rins,LSL #16

        ADDS    OP2sue,OP2sue,OP2sue    ;First extra bit: shift partial
        ADCS    OP2mlo,OP2mlo,OP2mlo    ; remainder
        ADC     OP2mhi,OP2mhi,OP2mhi
        SUBS    Rtmp2,OP2mlo,Rfpsr      ;Trial subtraction of divisor from
        SBCS    Rtmp,OP2mhi,OP1sue      ; partial remainder
        MOVCS   OP2mlo,Rtmp2            ;If bit is 1, really do subtraction
        MOVCS   OP2mhi,Rtmp
        ADDCS   OP1mhi,OP1mhi,#1:SHL:5  ;Add bit to quotient

        MOV     Rins,#0                 ;Initialise overflow word
        ADDS    OP2sue,OP2sue,OP2sue    ;Second extra bit: shift partial
        ADCS    OP2mlo,OP2mlo,OP2mlo    ; remainder
        ADCS    OP2mhi,OP2mhi,OP2mhi
        ADC     Rins,Rins,Rins
        SUBS    Rtmp2,OP2mlo,Rfpsr      ;Trial subtraction of divisor
        SBCS    Rtmp,OP2mhi,OP1sue      ; from partial remainder
        SBCS    Rins,Rins,#0
        MOVCS   OP2mlo,Rtmp2            ;If bit is 1, really do subtraction
        MOVCS   OP2mhi,Rtmp
        ADDCS   OP1mhi,OP1mhi,#1:SHL:4  ;Add bit to quotient

        MOV     Rins,#0                 ;Initialise overflow word
        ADDS    OP2sue,OP2sue,OP2sue    ;Third extra bit: shift partial
        ADCS    OP2mlo,OP2mlo,OP2mlo    ; remainder
        ADCS    OP2mhi,OP2mhi,OP2mhi
        ADC     Rins,Rins,Rins
        SUBS    Rtmp2,OP2mlo,Rfpsr      ;Trial subtraction of divisor
        SBCS    Rtmp,OP2mhi,OP1sue      ; from partial remainder
        SBCS    Rins,Rins,#0
        MOVCS   OP2mlo,Rtmp2            ;If bit is 1, really do subtraction
        MOVCS   OP2mhi,Rtmp
        ADDCS   OP1mhi,OP1mhi,#1:SHL:3  ;Add bit to quotient

        CDebug1 5,"Quotient after adding in extra bits is",R14

; (OP1mhi,OP1mlo,Rarith) now contains 29 bits of quotient and (OP2mhi,OP2mlo)
; the final partial remainder. (We've shifted all the extra bits out of
; OP2sue, and the overflow word Rins must be zero at this point.)
;   This is enough bits to provide guard and round bits, plus 3 bits
; contributing to the sticky bit and enough information to complete
; generating it. We will finish generating it by setting Rarith to zero if
; the partial remainder zero, non-zero if the partial remainder is non-zero.
;   We must also set the low word of the result mantissa to 0.

        ORR     Rarith,OP2mhi,OP2mlo
        MOV     OP1mlo,#0

; Now test for mantissa underflow. If no mantissa underflow, we're ready to
; return. Otherwise, we must recover the spilled registers (to get hold of
; the result exponent), shift the mantissa left one bit, decrement the
; exponent and return.

        TEQ     OP1mhi,#0

  IF Interworking :LOR: Thumbing
        LDMMIFD Rsp!,{OP1sue,OP2sue,Rfpsr,Rins,LR}
        BXMI    LR
  ELSE
        LDMMIFD Rsp!,{OP1sue,OP2sue,Rfpsr,Rins,PC}
  ENDIF

        LDMFD   Rsp!,{OP1sue,OP2sue,Rfpsr,Rins,LR}
        MOV     OP1mhi,OP1mhi,LSL #1
        SUB     OP2sue,OP2sue,#1

  IF Interworking :LOR: Thumbing
        BX      LR
  ELSE
        MOV     PC,LR
  ENDIF

;===========================================================================

; Reciprocal approximation table
; ------------------------------
;
; This table contains 128 entries, indexed by the first 7 fractional bits of
; a normalised divisor mantissa D. The value Rapprox obtained has the
; property that:
;
;   1/D <= Rapprox*2^(-7) < 1/D + 2^(-6)
;
; In fact, entry N in the table is calculated by the formula:
;
;   Entry(N) = 2^14 divided by (128+N), rounded up to an integer.
;
; Proof that this is correct: if the first 7 fractional bits of D are N, we
; know that:
;
;   (128+N)*2^(-7) <= D < (129+N)*2^(-7)
;
; This gives us:
;   (2^7)/(129+N) < 1/D <= (2^7)/(128+N)
;
; Next, we have:
;   1/(128+N) - 1/(129+N) = 1/((128+N)*(129+N))
;                         < 1/(128*128)
;                         = 2^(-14)
;
; Multiplying by 2^7 and rearranging:
;   (2^7)/(128+N) - 2^(-7) < (2^7)/(129+N)
;
; So:
;   (2^7)/(128+N) - 2^(-7) < 1/D <= (2^7)/(128+N)
;
; Or:
;   1/D <= (2^7)/(128+N) < 1/D + 2^(-7)
;
; If we round (2^7)/(128+N) up to a multiple of 2^(-7), we increase it by
; less than 2^(-7), giving us:
;
;   1/D <= (2^7)/(128+N) rounded up to a multiple of 2^(-7) < 1/D + 2^(-64)
;
; But (2^7)/(128+N) rounded up to a multiple of 2^(-7) is Entry(N)*2^(-7),
; giving us the desired property.

Recip_Table BytesStart

        GBLA    Rec_tmp
Rec_tmp SETA    0
        WHILE   Rec_tmp < 128
        DCB     (16384+127+Rec_tmp)/(128+Rec_tmp)
Rec_tmp SETA    Rec_tmp+1
        WEND

        BytesEnd

        ]                               ; Conditional assembly of Div

;===========================================================================

        [ :DEF: fmod_s :LOR: FPEWanted :LOR: FPASCWanted

; Routine to perform the IEEE remainder function. It has the usual two 
; labels on its entry point.
;   The value returned is either a numeric value plus associated rounding
; information, with the uncommon bit clear, or an infinity or NaN, with the
; uncommon bit set.
;   This routine will not work correctly with inputs which are unnormalised
; URD results, or with invalid internal format numbers.
;
; Uses standard dyadic operation entry and exit conventions - see top of
; this file.

        ASSERT  RNDexp = OP2sue ;We swap over from the use of OP2sue to that
                                ; of RNDexp partway through this routine.

        [ FPEWanted
RemFPE
        ]

        [ FPASCWanted
RemFPASC
        ]

        CDebug3 3,"RemFPASC/FPE: op1 =",OP1sue,OP1mhi,OP1mlo
        CDebug3 3,"              op2 =",OP2sue,OP2mhi,OP2mlo

        [ FPEWanted :LOR: FPASCWanted

; Start by detecting the "fast track" case of both operands being common.

        TST     OP1sue,#Uncommon_bit
        TSTEQ   OP2sue,#Uncommon_bit
        BNE     Rem_Uncommon

; If the second operand is a zero, we've got an invalid operation.
; Otherwise, if the first operand is a zero, the result is equal to the
; first operand.

        ORRS    Rarith,OP2mhi,OP2mlo
        MOVEQ   Rtmp,#InvReas_XRem0
        BEQ     InvalidOperation2ForSDE

        ORRS    Rarith,OP1mhi,OP1mlo
        BEQ     Rem_FirstOperand_Zero

        ]

; Both operands may now be assumed to be normalised numbers - now to deal
; with signs and exponents.
;
; We're going to generate the remainder by a long-division-like algorithm,
; which can be summarised as follows:
;
;   partial remainder = ABS(op1); sign = SIGN(op1);
;   FOR I = (op1 exponent) TO ((op2 exponent)-1) STEP -1
;     Trial subtract (partial remainder) from (op2 mantissa)*2^I;
;     IF strictly negative THEN
;       partial remainder := 2*(op2 mantissa)*2^I - (partial remainder);
;       sign := NOT(sign);
;   NEXT
;   IF (partial remainder) = 0
;     THEN result = 0, with sign SIGN(op1);
;     ELSE result = (-1)^(sign) * (partial remainder);
;
; We're clearly going to keep both the current sign and the original sign
; around: we'll do this in the top two bits of OP1sue. We'll also need to
; know the prospective result exponent (in OP2sue = RNDexp) and the number
; of iterations of the loop (in Rarith). However, note that if the
; calculated number of iterations is 0 or less, this means that the result
; is equal to the first operand. So we'll take care to calculate this number
; before disturbing the first operand in any way.
;
; Note also that the sign of the second operand is totally irrelevant, now
; that we've got past the stage of there being any potential invalid operation
; or divide-by-zero exceptions.

Rem_Common

        STMFD   Rsp!,{LR}               ;Because we'll need the register, we
                                        ; may well call NormaliseOp1, and to
                                        ; match the Rem_Uncommon path.

        AND     RNDexp,OP2sue,#ToExp_mask       ;Second operand exponent
        SUB     RNDexp,RNDexp,#1                ;Prospective result exponent
        AND     Rarith,OP1sue,#ToExp_mask       ;First operand exponent
        SUBS    Rarith,Rarith,RNDexp            ;Number of iterations - 1

Rem_ExponentsDone

        AND     OP1sue,OP1sue,#Sign_bit         ;All cases want this
        ADDLT   RNDexp,Rarith,RNDexp            ;Recover first operand exp.
        MOVLT   Rarith,#0                       ;And return first operand
  IF Interworking :LOR: Thumbing
        BXLT    LR
  ELSE
        MOVLT   PC,LR                           ; exactly
  ENDIF


; Prepare for the main loop and branch into it.

        MOV     OP1sue,OP1sue,ASR #1            ;Make a copy of the sign, in
                                                ; case the result is zero
        MOV     LR,#0                           ;Top word of the partial
                                                ; remainder

        CDebug2 4,"Entering RMF loop: Rarith, LR",Rarith,LR
        CDebug3 4," op1",OP1sue,OP1mhi,OP1mlo
        CDebug3 4," op2",RNDexp,OP2mhi,OP2mlo

        B       Rem_Loop_Entry

Rem_Loop_Shift

; Shift the partial remainder left by 1 bit, using a bit of trickery to do
; each word in 1 cycle.

        MOV     LR,OP1mhi,LSR #31
        ADDS    OP1mlo,OP1mlo,OP1mlo
        ADC     OP1mhi,OP1mhi,OP1mhi

Rem_Loop_Entry

; Do the trial subtraction of divisor - partial remainder; if it comes out
; non-negative, keep the previous partial remainder.

        RSBS    Rtmp,OP1mlo,OP2mlo
        RSCS    Rtmp2,OP1mhi,OP2mhi
        RSCS    LR,LR,#0
        BCS     Rem_Loop_End

; Otherwise, use the trial division result to form a new partial remainder
; equal to 2*divisor minus old partial remainder, and note that the sign of
; the partial remainder has changed.

        ADDS    OP1mlo,Rtmp,OP2mlo
        ADC     OP1mhi,Rtmp2,OP2mhi
        EOR     OP1sue,OP1sue,#Sign_bit

Rem_Loop_End

; Loop until finished. Note the partial remainder is completely contained in
; OP2mhi and OP2mlo at this point.

        SUBS    Rarith,Rarith,#1
        BGE     Rem_Loop_Shift

; The result will always be exact.

        MOV     Rarith,#0

; If we've now got a partial remainder of exactly zero, the result is zero,
; with sign equal to that of the original first operand. Otherwise, we've
; got to normalise the result.

        ORRS    Rtmp,OP1mhi,OP1mlo
        MOVEQ   OP1sue,OP1sue,LSL #1    ;Recover copy of original sign
        MOVEQ   RNDexp,#0
        ANDNE   OP1sue,OP1sue,#Sign_bit
        BLNE    $NormaliseOp1_str

; And return.

  IF Interworking :LOR: Thumbing
        LDMFD   Rsp!,{LR}
        BX      LR
  ELSE
        LDMFD   Rsp!,{PC}
  ENDIF


        ]                               ; Conditional assembly of Rem/mod

;===========================================================================

        [ :DEF: sqrt_s :LOR: FPEWanted :LOR: FPASCWanted

; Routine to take the square root of an internal format floating point
; number. Unlike the dyadic arithmetic instructions, only one entry point is
; required: we do however give it two labels for the sake of consistent
; naming.
;   The value returned is either a numeric value plus associated rounding
; information, with the uncommon bit clear, or an infinity or NaN, with the
; uncommon bit set.
;   This routine will not work correctly with an input which is an
; unnormalised URD result, or an invalid internal format number.
;
; Uses standard monadic operation entry and exit conventions - see top of
; this file.

        [ FPEWanted
SqrtFPE
        ]

        [ FPASCWanted
SqrtFPASC
        ]

        [ :LNOT: :DEF: sqrt_s

        CDebug3 3,"SqrtFPE/FPASC: operand =",OP1sue,OP1mhi,OP1mlo

; Start by splitting according to whether the operand is common or uncommon.
; The code to deal with uncommon operands lies a long way down in the
; source, to avoid addressability problems.

        TST     OP1sue,#Uncommon_bit
        BNE     Sqrt_Uncommon

; If the operand is a zero, the product is the same zero. Because the
; operand is common and assumed not to be an unnormalised URD result, we can
; check for zeros by means of the units bit.

        TST     OP1mhi,#EIUnits_bit
        BEQ     Sqrt_Zero

; The operand may now be assumed to be a normalised number. If it is
; negative, we have an invalid operation exception. Otherwise, the result
; sign is positive (equal to the operand sign) and we need to produce the
; result exponent.
;   We produce the result exponent by adding the exponent bias to the
; already biased exponent, producing (unbiased exponent) + 2*bias, then
; shifting right by one bit, producing ((unbiased exponent) DIV 2) + bias.
; We set the condition codes on this last instruction in order to transfer
; the least significant bit of the unbiased exponent into C.
        
        ]

        [ FPLibWanted
__fp_sqrt_common
        ]

Sqrt_Common

        AND     RNDexp,OP1sue,#ToExp_mask ;Extract operand exponent

        [ FPEWanted :LOR: FPASCWanted

        ANDS    OP1sue,OP1sue,#Sign_bit ;Isolate sign bit & check positive
        MOVNE   Rtmp,#InvReas_SqrtNeg
        BNE     InvalidOperation1ForSDE

        |

        ANDS    OP1sue,OP1sue,#Sign_bit ;Isolate sign bit
        ORRNE   OP1sue,OP1sue,#IVO_bits
  IF Interworking :LOR: Thumbing
        BXNE    LR
  ELSE
        MOVNE   PC,LR        
  ENDIF
        
        ]

        ADD     RNDexp,RNDexp,#EIExp_bias:AND:&FF00
        ADD     RNDexp,RNDexp,#EIExp_bias:AND:&FF
        ASSERT  (EIExp_bias-1) < &10000 ;Result exponent if mantissa
                                        ; overflow is (exp+bias) DIV 2
        MOVS    RNDexp,RNDexp,LSR #1

; This subsidiary entry point deals with taking the square root of a
; normalised mantissa.
; Entry: OP1sue = the result's sign, with an uncommon bit of 0 - the
;          remaining bits are zero;
;        OP1mhi = Operand mantissa, high word;
;        OP1mlo = Operand mantissa, low word;
;        RNDexp = Prospective result exponent;
;        Rins   = instruction (needed to determine the precision);
;        Rwp, Rfp, Rsp hold their usual values;
;        R14    = return link;
;        C      = least significant bit of operand's unbiased exponent.
; Exit:  OP1sue = the result's sign (always positive), with an uncommon bit
;          of 0; the remaining bits are zero;
;        OP1mhi, OP1mlo = the result's mantissa;
;        RNDexp = the result exponent;
;        Rarith holds the round bit (in bit 31) and the sticky bit (in bits
;          30:0) if the destination precision is extended; if the
;          destination precision is single or double, it holds part of the
;          sticky bit (the remainder of which is held in bits below the
;          round bit in OP1mhi and OP1mlo);
;        OP2mhi, OP2mlo, Rtmp, Rtmp2 and R14 may be corrupt;
;        All other registers preserved.
;
; Note that the result exponent is in fact always equal to the prospective
; result exponent: the process of taking the square root always results in a
; normalised mantissa. (Subsequent rounding may of course lead to mantissa
; overflow, but the raw unrounded result mantissa is always normalised.)

Sqrt_Mantissa

        CDebug2 4,"SqrtFPE/FPASC: mantissa =",OP1mhi,OP1mlo
        CDebug1 4,"               sign     =",OP1sue
        CDebug1 4,"               exponent =",RNDexp

; We do the square root by the standard "long square root" algorithm. (There
; is an optimisation possibility here, of doing square roots by
; Newton-Raphson followed by a final correction. This only applies to the
; FPASC, since the FPE's division is too slow for there to be any
; possibility of this making a profit - even the FPA's division will have to
; be used very carefully for it to have a hope of working.)
;
; A description of the long square root algorithm follows:
;
; The problem is to take the square root of a mantissa M in the range 1 <= M
; < 4. An initial approximation R[0]=1 to the root has the property that it
; is the rounded-down root to 0 places after the binary point - i.e. that
; R[0] is a multiple of 2^(-0) and R[0] <= Sqrt(M) < R[0] + 2^(-0). We will
; evaluate successive approximations R[i] to the root such that R[i] is the
; correct rounded-down root to i places after the binary point - i.e. that
; R[i] is a multiple of 2^(-i) and R[i] <= Sqrt(M) < R[i] + 2^(-i). If we
; know R[24], R[53] or R[64] respectively for single, double or extended
; precision, and in addition know whether the result is exact (i.e. whether
; R[i] = Sqrt(M) exactly), we have enough information to provide all the
; required fractional bits and the round and sticky bits, and so to
; calculate the correct IEEE square root. (Note that a guard bit is not
; required: the infinite precision square root of M will not suffer mantissa
; overflow or underflow, and so its finite precision approximations can only
; suffer mantissa overflow during rounding, not prior to rounding.)
;
; So we will use a partial remainder P[i] = M - R[i]^2; initially, P[0] =
; M-1. Next, we know that R[i+1] is either equal to R[i] or to R[i] +
; 2^(-i-1), depending on whether the next bit of the root is 0 or 1. To
; determine which, we need to know whether R[i] + 2^(-i-1) <= Sqrt(M): if it
; is, the next bit of the root is 1; if it isn't, the next bit of the root
; is 0.
;
; This is equivalent to asking whether (R[i] + 2^(-i-1))^2 <= M, i.e. to
; whether:
;
;   R[i]^2 + R[i]*2^(-i) + 2^(-2*i-2) <= M
;
; or to whether:
;
;   R[i]*2^(-i) + 2^(-2*i-2) <= P[i]
;
; If it is, then R[i+1] = R[i] - 2^(-i-1) and:
;
;   P[i+1] = M - R[i+1]^2
;          = M - (R[i] + 2^(-i-1))^2
;          = M - R[i]^2 - R[i]*2^(-i) - 2^(-2*i-2)
;          = P[i] - R[i]*2^(-i) - 2^(-2*i-2)
;
; If it isn't, then R[i+1] = R[i] and P[i+1] = M - R[i+1]^2 = M - R[i]^2 =
; P[i].
;
; So the long square root algorithm can be stated as follows, where N=24, 53
; or 64 respectively for single, double or extended precision:
;
; (1) Initialise: R[0] = 1, P[0] = M-1;
;
; (2) For i=0 to N-1:
;       Do a trial subtraction of R[i]*2^(-i) + 2^(-2*i-2) from P[i];
;       If result >= 0, put R[i+1] = R[i] + 2^(-i-1), P[i+1] = result of
;         trial subtraction;
;       Else put R[i+1] = R[i], P[i+1] = P[i];
;
; (3) The units, fractional and round bits of the result are in R[N], while
;     the sticky bit is 0 if P[N] = 0, 1 if P[N] > 0.
;
; Note that P[i] = M - R[i]^2
;                < M - (Sqrt(M) - 2^(-i))^2
;                = M - M + Sqrt(M)*2^(-i+1) - 2^(-2*i)
;                = Sqrt(M)*2^(-i+1) - 2^(-2*i)
;                < 2^(-i+2)
;
; So P[i] decreases greatly in magnitude during the long square root
; process. If we use it straightforwardly, this will result in a lot of
; spurious subtractions of bits known to be zero from other bits known to be
; zero during the algorithm. So instead, let us define Q[i] = P[i]*2^(i-1)
; and recast the algorithm in terms of Q[i]:
;
; (1) Initialise: R[0] = 1, Q[0] = (M-1)/2;
;
; (2) For i=0 to N-1:
;       Do a trial subtraction of R[i] + 2^(-i-2) from 2*Q[i];
;       If result >= 0, put R[i+1] = R[i] + 2^(-i-1), Q[i+1] = result of
;         trial subtraction;
;       Else put R[i+1] = R[i], Q[i+1] = 2*Q[i];
;
; (3) The units, fractional and round bits of the result are in R[N], while
;     the sticky bit is 0 if Q[N] = 0, 1 if Q[N] > 0.
;
; Introducing a travelling bit variable T[i] to represent 2^(-i-2) and
; rephrasing in terms of shifts:
;
; (1) Initialise: R[0] = 1, Q[0] = (M-1)/2, T[0] = 2^(-2);
;
; (2) For i=0 to N-1:
;       Do a trial subtraction of R[i] + T[i] from Q[i] << 1;
;       If result >= 0, put R[i+1] = R[i] + (T[i] << 1),
;                           Q[i+1] = (Q[i] << 1) - (R[i]+T[i]);
;       Else put R[i+1] = R[i], Q[i+1] = Q[i] << 1;
;
; (3) The units, fractional and round bits of the result are in R[N], while
;     the sticky bit is 0 if Q[N] = 0, 1 if Q[N] > 0.
;
; This is more-or-less the algorithm we use, though we split into different
; sections depending on how far the travelling bit has been shifted down so
; far, to avoid doing multi-word arithmetic until we have to.
;
; One thing we do have to look at is the precision required for Q[i]. We
; know that 0 < Q[i] = P[i]*2^(i-1) < 2^(-i+2)*2^(i-1) = 2, so one place
; before the binary point is enough. Initially, Q[0] = (M-1)/2 is a multiple
; of 2^(-64), requiring 64 places after the binary point, or 65 bits in
; total - one bit more than 2 words. This is highly inconvenient, but we can
; get around it by noticing that if M < 2, then the first two bits of the
; result are definitely 1.0, and we have R[1] = 1.0, Q[1] = M-1 and T[0] =
; 2^(-2). So Q[1] is a multiple of 2^(-63) and can be represented in two
; words. On the other hand, if M >= 2, then Q[0] = (M-1)/2 is a multiple of
; 2^(-63) and can also be represented by two words. This transforms the
; algorithm to:
;
; IF M < 1 THEN
;
;   (1) Initialise: R[1] = 1.0, Q[1] = M-1, T[1] = 2^(-3);
;
;   (2) For i=1 to N-1:
;         Do a trial subtraction of R[i] + T[i] from Q[i] << 1;
;         If result >= 0, put R[i+1] = R[i] + (T[i] << 1),
;                             Q[i+1] = (Q[i] << 1) - (R[i]+T[i]);
;         Else put R[i+1] = R[i], Q[i+1] = Q[i] << 1;
;
;   (3) The units, fractional and round bits of the result are in R[N], while
;       the sticky bit is 0 if Q[N] = 0, 1 if Q[N] > 0.
;
; ELSE
;
;   (1') Initialise: R[0] = 1, Q[0] = (M-1)/2, T[0] = 2^(-2);
;
;   (2') For i=0 to N-1:
;          Do a trial subtraction of R[i] + T[i] from Q[i] << 1;
;          If result >= 0, put R[i+1] = R[i] + (T[i] << 1),
;                              Q[i+1] = (Q[i] << 1) - (R[i]+T[i]);
;          Else put R[i+1] = R[i], Q[i+1] = Q[i] << 1;
;
;   (3') The units, fractional and round bits of the result are in R[N], while
;        the sticky bit is 0 if Q[N] = 0, 1 if Q[N] > 0.
;
; ENDIF
;
; Now Q[i] can be represented in two words up to the point where the trial
; subtraction produces results that overflow two words. We have the
; following situation at various iterations, remembering that T[i] = 2^(-i-1):
;
; For i < 30: R[i] and T[i] can be represented by 1 word, with the binary
;   point to the right of bit 31; Q[i+1] requires two words, with the trial
;   subtraction being performed on the top word only.
;
; For 30 <= i < 62: R[i] can be represented by 2 words, with the binary point
;   to the right of bit 31 of the top word (strictly, the low word isn't
;   required for R[30]); T[i] can be represented by 1 word, now with an
;   implicit word of zeros above it and the binary point to the right of bit
;   31 of this implicit word; Q[i+1] still requires two words, with the trial
;   subtraction occurring on both words;
;
; For i=62: R[i] can be represented by 2 words, with the binary point to the
;   right of bit 31 of the top word; T[i] can be represented by 1 word, now
;   with two implicit words of zeros above it and the binary point to the
;   right of bit 31 of the more significant of the two words; Q[i+1] still
;   contains two words, but a third word is required for the trial
;   subtraction.
;
; For i=63: R[i] now requires 3 words, with the binary point to the right of
;   bit 31 of the most significant word; T[i] can be represented by 1 word,
;   now with two implicit words of zeros above it and the binary point to
;   the right of bit 31 of the more significant of the two words; Q[i+1] will
;   require 3 words to represent it, with the trial subtraction occurring on
;   all three words.
;
; So we will actually perform the square root in 5 stages:
;
; (A) Initialisation and iterations with 0 <= i < 30. Terminated after i=23
;     for single precision.
; (B) Iterations with 30 <= i < 62. Terminated after i=52 for double
;     precision, not done at all for single precision.
; (C) Iteration with i=62. Only done for extended precision.
; (D) Iteration with i=63. Only done for extended precision.
; (E) Sticky bit construction. Done separately for single/double and
;     extended precisions.
;
; Register usage:
;   OP1mhi, OP1mlo: R[i] (the root so far); Rarith is also involved in this
;     at the end of the i=63 iteration.
;   OP2mhi, OP2mlo: Q[i] (the shifted partial remainder).
;   Rarith: temporary register.
;   Rtmp: T[i] (the travelling bit);
;   Rtmp2: loop counter.

; Initialise remainder (Q[0] for odd exponent, Q[1] for even exponent)

        SUBCC   OP2mhi,OP1mhi,#TopBit   ;Subtract 1 for even exponent
        SUBCS   OP2mhi,OP1mhi,#TopBit:SHR:1 ;Shift left, subtract 1 and shift
                                            ; right for odd exponent
        MOV     OP2mlo,OP1mlo           ;Bottom word is unaffected either way

; Initialise travelling bit. Due to the loop unwinding below, we actually
; want T[0] for an odd exponent, T[1] << 1 for an even exponent: both of
; these are 2^(-2).

        MOV     Rtmp,#TopBit:SHR:2

; Initialise result - both R[1] = 1.0 for even exponents and R[0] = 1 for
; odd exponents require the same bit pattern.

        MOV     OP1mhi,#TopBit
        MOV     OP1mlo,#0

; Initialise the loop counter. This is a bit esoteric: it contains minus the
; number of times the first loop below is executed in its top four bits,
; plus the number of times the second loop is exceuted in its bottom 4 bits.
; The idea is that the first loop adds 1 << 28 to it until it becomes
; positive, then the second subtracts one from it until it becomes zero.
;   This is the only time we actually need to look at the precision bits in
; the instruction!
;   Note that we must take great care not to change the C flag in this code.

 [ FPEWanted :LOR: FPASCWanted

        MOV     Rtmp2,#((-5):SHL:28) + 8        ;Correct value for extended
        [ Pr1_mask < &100                       ;I.e. if immediate won't set C
          TST     Rins,#Pr1_mask                ;Z := 1 if single/double
        |
          MOV     Rarith,Rins,LSR #Pr1_pos
          TST     Rarith,#(Pr1_mask:SHR:Pr1_pos)
        ]
        MOVEQ   Rtmp2,#((-5):SHL:28) + 6        ;Correct value for double
        [ Pr2_mask < &100                       ;I.e. if immediate won't set C
          TSTEQ   Rins,#Pr2_mask                ;Z := 1 if single
        |
          MOVEQ   Rarith,Rins,LSR #Pr2_pos
          TSTEQ   Rarith,#(Pr2_mask:SHR:Pr2_pos)
        ]
        MOVEQ   Rtmp2,#((-4):SHL:28) + 0        ;Correct value for single

 |

; Single precision square root is not allowed. Extended is though.

        [ Double_mask < &100
          TST   Rins,#Double_mask
        |
          MOV   Rarith,Rins,LSR #Double_pos
          TST   Rarith,#(Double_mask:SHR:Double_pos)
        ]
        MOVEQ   Rtmp2,#((-5):SHL:28) + 8
        MOVNE   Rtmp2,#((-5):SHL:28) + 6
 ]

; We now require the iterations with 0 <= i < 30 to be done - i.e.:
;
;   23 iterations for single precision, even exponent (1<=i<=23);
;   24 iterations for single precision, odd exponent (0<=i<=23);
;   29 iterations for double/extended precision, even exponent (1<=i<=29);
;   30 iterations for double/extended precision, odd exponent (0<=i<=29).
;
; We unwind this loop to produce 6 copies of the code, and branch in after
; the first one for even exponents.

        BCC     Sqrt_Loop1A

Sqrt_Loop1

; First copy of code

        ADDS    OP2mlo,OP2mlo,OP2mlo    ;Get Q[i] << 1 - note top bit goes
        ADCS    OP2mhi,OP2mhi,OP2mhi    ; into C

        ORR     Rarith,OP1mhi,Rtmp      ;And R[i] + T[i] - note no overlap
        CMPCC   OP2mhi,Rarith           ;Trial subtraction - always works
                                        ; if (Q[i] << 1) >= 2.
        SUBCS   OP2mhi,OP2mhi,Rarith    ;Do real subtraction if trial works
        ORRCS   OP1mhi,OP1mhi,Rtmp,LSL #1 ;Put 1 in result if trial works

Sqrt_Loop1A

; Second copy of code - similar to first copy except we use Rtmp >> 1
; instead of Rtmp.

        ADDS    OP2mlo,OP2mlo,OP2mlo
        ADCS    OP2mhi,OP2mhi,OP2mhi
        ORR     Rarith,OP1mhi,Rtmp,LSR #1
        CMPCC   OP2mhi,Rarith
        SUBCS   OP2mhi,OP2mhi,Rarith
        ORRCS   OP1mhi,OP1mhi,Rtmp

; Third copy of code - similar to first copy except we use Rtmp >> 2
; instead of Rtmp.

        ADDS    OP2mlo,OP2mlo,OP2mlo
        ADCS    OP2mhi,OP2mhi,OP2mhi
        ORR     Rarith,OP1mhi,Rtmp,LSR #2
        CMPCC   OP2mhi,Rarith
        SUBCS   OP2mhi,OP2mhi,Rarith
        ORRCS   OP1mhi,OP1mhi,Rtmp,LSR #1

; Fourth copy of code - similar to first copy except we use Rtmp >> 3
; instead of Rtmp.

        ADDS    OP2mlo,OP2mlo,OP2mlo
        ADCS    OP2mhi,OP2mhi,OP2mhi
        ORR     Rarith,OP1mhi,Rtmp,LSR #3
        CMPCC   OP2mhi,Rarith
        SUBCS   OP2mhi,OP2mhi,Rarith
        ORRCS   OP1mhi,OP1mhi,Rtmp,LSR #2

; Fifth copy of code - similar to first copy except we use Rtmp >> 4
; instead of Rtmp.

        ADDS    OP2mlo,OP2mlo,OP2mlo
        ADCS    OP2mhi,OP2mhi,OP2mhi
        ORR     Rarith,OP1mhi,Rtmp,LSR #4
        CMPCC   OP2mhi,Rarith
        SUBCS   OP2mhi,OP2mhi,Rarith
        ORRCS   OP1mhi,OP1mhi,Rtmp,LSR #3

; Sixth copy of code - similar to first copy except we use Rtmp >> 5
; instead of Rtmp.

        ADDS    OP2mlo,OP2mlo,OP2mlo
        ADCS    OP2mhi,OP2mhi,OP2mhi
        ORR     Rarith,OP1mhi,Rtmp,LSR #5
        CMPCC   OP2mhi,Rarith
        SUBCS   OP2mhi,OP2mhi,Rarith
        ORRCS   OP1mhi,OP1mhi,Rtmp,LSR #4

; Now update the travelling bit and loop counter, then loop if required.

        ADDS    Rtmp2,Rtmp2,#1:SHL:28   ;Increment loop counter
        MOV     Rtmp,Rtmp,ROR #6        ;ROR rather than LSR to set up
        BLT     Sqrt_Loop1              ; for next loop.

; If the result is exact at this point, we can obviously return with all the
; remaining fractional bits, the round bit and the sticky bit equal to 0. If
; the result is not exact but the precision is single, we can return with a
; sticky bit of 1. We only continue if the result is inexact and the
; precision is double or extended.

        ORRS    Rarith,OP2mhi,OP2mlo
        CMPNE   Rtmp,#TopBit:SHR:26     ;Will be EQ for single, NE for
  IF Interworking :LOR: Thumbing
        BXEQ    LR
  ELSE
        MOVEQ   PC,LR                   ; double or extended
  ENDIF


; Next, we need to do the iterations with 30 <= i < 62 - i.e.:
;
;   32 iterations for extended precision (30<=i<=61);
;   23 iterations for double precision (30<=i<=52).
;
; This is a bit awkward from the point of view of unwinding the loop, so we
; will instead do 24 iterations for double precision and unwind the loop to
; produce 4 copies of the code. The extra iteration for double precision is
; wasted work but does no harm.

        STMFD   Rsp!,{Rfpsr,Rins,LR}    ;We need a few more registers

Sqrt_Loop2

        ADDS    OP2mlo,OP2mlo,OP2mlo    ;Get Q[i] << 1,
        ADCS    OP2mhi,OP2mhi,OP2mhi
        ADC     LR,LR,LR                ; putting overflow bit into LR[0]

        ORR     Rarith,OP1mlo,Rtmp      ;(OP1mhi,Rarith) := R[i] + T[i]
        SUBS    Rins,OP2mlo,Rarith      ;Do trial subtraction, which
        SBCS    Rfpsr,OP2mhi,OP1mhi
        MOVCCS  LR,LR,LSR #1            ; always works if (Q[i] << 1) >= 2.

        MOVCS   OP2mlo,Rins             ;Use subtraction result if
        MOVCS   OP2mhi,Rfpsr            ; successful
        ORRCS   OP1mlo,OP1mlo,Rtmp,LSL #1  ;And put a 1 in the result
        ORRCS   OP1mhi,OP1mhi,Rtmp,LSR #31 ;(NB Rtmp may be &80000000)

; Second copy of code - similar to first copy except we use Rtmp >> 1 in
; place of Rtmp, and don't need to worry about putting the 1 into OP1mhi.

        ADDS    OP2mlo,OP2mlo,OP2mlo
        ADCS    OP2mhi,OP2mhi,OP2mhi
        ADC     LR,LR,LR
        ORR     Rarith,OP1mlo,Rtmp,LSR #1
        SUBS    Rins,OP2mlo,Rarith
        SBCS    Rfpsr,OP2mhi,OP1mhi
        MOVCCS  LR,LR,LSR #1
        MOVCS   OP2mlo,Rins
        MOVCS   OP2mhi,Rfpsr
        ORRCS   OP1mlo,OP1mlo,Rtmp

; Third copy of code - similar to first copy except we use Rtmp >> 2 in
; place of Rtmp, and don't need to worry about putting the 1 into OP1mhi.

        ADDS    OP2mlo,OP2mlo,OP2mlo
        ADCS    OP2mhi,OP2mhi,OP2mhi
        ADC     LR,LR,LR
        ORR     Rarith,OP1mlo,Rtmp,LSR #2
        SUBS    Rins,OP2mlo,Rarith
        SBCS    Rfpsr,OP2mhi,OP1mhi
        MOVCCS  LR,LR,LSR #1
        MOVCS   OP2mlo,Rins
        MOVCS   OP2mhi,Rfpsr
        ORRCS   OP1mlo,OP1mlo,Rtmp,LSR #1

; Fourth copy of code - similar to first copy except we use Rtmp >> 3 in
; place of Rtmp, and don't need to worry about putting the 1 into OP1mhi.

        ADDS    OP2mlo,OP2mlo,OP2mlo
        ADCS    OP2mhi,OP2mhi,OP2mhi
        ADC     LR,LR,LR
        ORR     Rarith,OP1mlo,Rtmp,LSR #3
        SUBS    Rins,OP2mlo,Rarith
        SBCS    Rfpsr,OP2mhi,OP1mhi
        MOVCCS  LR,LR,LSR #1
        MOVCS   OP2mlo,Rins
        MOVCS   OP2mhi,Rfpsr
        ORRCS   OP1mlo,OP1mlo,Rtmp,LSR #2

; Now update the travelling bit and loop counter, then loop if required.

        SUBS    Rtmp2,Rtmp2,#1          ;Decrement loop counter
        MOV     Rtmp,Rtmp,ROR #4        ;ROR rather than LSR to set up
        BNE     Sqrt_Loop2              ; for last couple of iterations.

; If the remainder is zero at this point, we've got an exact result: the
; last fractional bit, the round bit and the sticky bit must all be zero.
;   Otherwise, we know that the result will *not* be exact, since each of
; the last two iterations either doesn't change the partial remainder (thus
; leaving it non-zero) or subtracts a value with a 1 in a less significant
; bit than the lowest bit currently in the partial remainder, which must
; leave it non-zero.
;   So we can now return if either the result is currently exact or if it is
; inexact and the precision is double, taking care to make Rarith zero in
; the first case and non-zero in the second. We only need to perform the
; rest of the division if the precision is extended and the result is
; currently inexact - which implies that it will also ultimately be inexact
; and thus that the sticky bit is 1.

        ORRS    Rarith,OP2mhi,OP2mlo
        CMPNE   Rtmp,#TopBit:SHR:24     ;Will be EQ for double, NE for
  IF Interworking :LOR: Thumbing
        LDMEQFD Rsp!,{Rfpsr,Rins,LR}    ; extended
        BXEQ    LR
  ELSE
        LDMEQFD Rsp!,{Rfpsr,Rins,PC}    ; extended
  ENDIF


; Now we need to get the last fractional bit.

        ADDS    OP2mlo,OP2mlo,OP2mlo    ;Get Q[i] << 1,
        ADCS    OP2mhi,OP2mhi,OP2mhi
        ADC     LR,LR,LR                ; putting overflow bit into LR[0]

        RSBS    Rtmp,Rtmp,#0            ;Do trial subtraction, which
        RSCS    Rins,OP1mlo,OP2mlo
        RSCS    Rfpsr,OP1mhi,OP2mhi
        MOVCCS  LR,LR,LSR #1            ; always works if (Q[i] << 1) >= 2.

        MOVCS   OP2mlo,Rins             ;Use subtraction result if
        MOVCS   OP2mhi,Rfpsr            ; successful
        MOVCC   Rtmp,#0                 ;And forget it if not
        ORRCS   OP1mlo,OP1mlo,#1        ;And put a 1 in the result

; And the round bit.

        MOV     Rarith,#TopBit+1        ;We know sticky bit is 1 - assume
                                        ; round bit is also 1

        ADDS    Rtmp,Rtmp,Rtmp          ;Get Q[i] << 1.
        ADCS    OP2mlo,OP2mlo,OP2mlo
        ADCS    OP2mhi,OP2mhi,OP2mhi
  IF Interworking :LOR: Thumbing
        LDMCSFD Rsp!,{Rfpsr,Rins,LR}    ;If >= 2, round bit must be 1
        BXCS    LR
  ELSE
        LDMCSFD Rsp!,{Rfpsr,Rins,PC}    ;If >= 2, round bit must be 1
  ENDIF

                                        ;Omit low word of trial subtraction
                                        ; - we know it will borrow and thus
                                        ; leave C=0. But C=0 here anyway!
        SBCS    Rins,OP2mlo,OP1mlo      ;Do rest of trial subtraction
        SBCS    Rins,OP2mhi,OP1mhi
        MOVCC   Rarith,#1               ;If it fails, round=0, sticky=1
  IF Interworking :LOR: Thumbing
        LDMFD   Rsp!,{Rfpsr,Rins,LR}
        BX      LR
  ELSE
        LDMFD   Rsp!,{Rfpsr,Rins,PC}
  ENDIF

        ]                               ; Conditional compilation of sqrt

;===========================================================================

        [ FPEWanted :LOR: FPASCWanted

; Routine to do a move/move negated/absolute value of an internal format
; floating point number. It has the usual pair of entry points, one
; optimised for the FPASC, the other for the FPE.
;   The value returned is either a numeric value plus associated rounding
; information, with the uncommon bit clear, or an infinity or NaN, with the
; uncommon bit set.
;   This routine will not work correctly with an input which is an
; unnormalised URD result, or an invalid internal format number.
;
; Uses standard monadic operation entry and exit conventions - see top of
; this file.
;
; Note that these operations are usually very simple:
;   * Numeric values need their sign bits modified, then to be set up for
;     rounding; note that in the process, uncommon numeric values need to be
;     converted to zeros or normalised numbers to ensure that the rounding
;     works;
;   * Infinities and quiet NaNs need their sign bits modified;
;   * Signalling NaNs just need their sign bits modified if no change of
;     format is involved (what this means depends on the state of the FPSR
;     NE bit); if a change of format is required, they should generate the
;     usual invalid operation exception.

        [ FPEWanted

MoveFPE

        CDebug3 3,"MoveFPE: operand =",OP1sue,OP1mhi,OP1mlo

; If the value is common, it's a numeric value and there's no problem.

        TST     OP1sue,#Uncommon_bit
        BNE     Move_Uncommon

; Split out the exponent.

        AND     RNDexp,OP1sue,#ToExp_mask

        ]

Move_Numeric

; Isolate sign bit and clear uncommon bit. Also set Rarith to 0, since all
; rounding information is completely contained in OP1mhi and OP1mlo.

        AND     OP1sue,OP1sue,#Sign_bit
        MOV     Rarith,#0

Move_DoSigns

; Do the sign manipulations and return.

        TST     Rins,#MNF_bit
        EORNE   OP1sue,OP1sue,#Sign_bit
        TST     Rins,#ABS_bit
        BICNE   OP1sue,OP1sue,#Sign_bit
  IF Interworking :LOR: Thumbing
        BX      LR
  ELSE
        MOV     PC,LR
  ENDIF

        ]                               ; Conditional assembly of Move

;===========================================================================

        [ FPEWanted :LOR: FPASCWanted

; Routine to do a NRM instruction on an internal format floating point
; number. It has the usual pair of entry points, one optimised for the
; FPASC, the other for the FPE.
;   The value returned is either a numeric value plus associated rounding
; information, with the uncommon bit clear, or an infinity or NaN, with the
; uncommon bit set.
;
; Uses standard monadic operation entry and exit conventions - see top of
; this file.
;
; This operation is very similar to MVF, except that we have to cater for
; unnormalised values with the uncommon bit equal to zero - i.e. an URD
; result.

        [ FPEWanted

NormFPE

        CDebug3 3,"NormFPE: operand =",OP1sue,OP1mhi,OP1mlo

; Split according to whether the value is common or uncommon.

        TST     OP1sue,#Uncommon_bit
        BNE     Norm_Uncommon

; Split out the exponent.

        AND     RNDexp,OP1sue,#ToExp_mask

; If the units bit is clear, it's either a URD result or a zero. URD results
; can be treated just like extended unnormalised numbers and zeros.

        TST     OP1mhi,#EIUnits_bit
        BNE     Norm_Numeric

        ]

Norm_ZeroUnnormOrDenorm

; The value is an uncommon numeric value - i.e. a denormalised number, an
; extended unnormalised number or an extended unnormalised zero - or a
; proper zero or a URD result, which may be treated like an extended
; unnormalised number or zero. If it's any sort of zero, change it to a real
; zero and treat it as a numeric.

        ORRS    Rtmp,OP1mhi,OP1mlo
        MOVEQ   RNDexp,#0
        BEQ     Norm_Numeric

; The operand is now a denormalised number or extended unnormalised non-zero
; number. We will change it into the corresponding normalised number
; (possibly with a negative biased exponent), then treat it as a numeric.
;   The types of numbers that require converting are extended unnormalised
; numbers and denormalised numbers of all precisions. In the case of the
; extended denormalised and unnormalised numbers, this just requires us to
; normalise them; in the case of the single and double denormalised numbers,
; we need to clear their units bits and add 1 to their exponents before we
; normalise them.
;   At this stage, we can recognise that the numbers are single or double
; denormalised numbers simply by the fact that they have uncommon = units =
; 1: all other numbers with this property are NaNs or infinities and have
; been dealt with already.

        STMFD   Rsp!,{LR}       ;We will have subroutine calls below

        ANDS    Rarith,OP1mhi,OP1sue,LSL #EIUnits_pos-Uncommon_pos
        ASSERT  EIUnits_pos = 31
        BICMI   OP1mhi,OP1mhi,#EIUnits_bit
        ADDMI   RNDexp,RNDexp,#1

        BL      $NormaliseOp1_str       ;NB must be necessary, so no
                                        ; point in checking whether
                                        ; normalised

        LDMFD   Rsp!,{LR}

Norm_Numeric

; Isolate sign bit and clear uncommon bit. Also set Rarith to 0, since all
; rounding information is completely contained in OP1mhi and OP1mlo.

        AND     OP1sue,OP1sue,#Sign_bit
        MOV     Rarith,#0
  IF Interworking :LOR: Thumbing
        BX      LR
  ELSE
        MOV     PC,LR
  ENDIF

        ]                               ; Conditional assembly of Norm

;===========================================================================

        [ FPEWanted :LOR: FPASCWanted

; Routine to do a URD instruction on an internal format floating point
; number. There are the usual two entry points.
;   This routine will not work correctly with inputs which are unnormalised
; URD results, or with invalid internal format numbers.
;
; Uses standard monadic operation entry and exit conventions - see top of
; this file.

        [ FPEWanted

UrdFPE

        CDebug3 3,"UrdFPE: operand =",OP1sue,OP1mhi,OP1mlo

; Start by splitting between common and uncommon operands.

        TST     OP1sue,#Uncommon_bit
        BNE     Urd_Uncommon

        ]

Urd_Common

; The operand is common. Split OP1sue into sign and biased exponent.

        AND     Rarith,OP1sue,#ToExp_mask
        AND     OP1sue,OP1sue,#Sign_bit

Urd_Numeric

; Calculate shift amount to denormalise the number to put the true binary
; point at the rounding boundary - i.e. to give it an effective unbiased
; exponent of 23, 52 or 63 depending on whether the precision of the
; instruction is single, double or extended.

        MOV     RNDexp,#((EIExp_bias+23):AND:&FF)
        TST     Rins,#Pr2_mask
        MOVNE   RNDexp,#((EIExp_bias+52):AND:&FF)
        TST     Rins,#Pr1_mask
        MOVNE   RNDexp,#((EIExp_bias+63):AND:&FF)
        ORR     RNDexp,RNDexp,#((EIExp_bias+63):AND:&FF00)
        ASSERT  ((EIExp_bias+63):AND:&FF00) = ((EIExp_bias+52):AND:&FF00)
        ASSERT  ((EIExp_bias+63):AND:&FF00) = ((EIExp_bias+23):AND:&FF00)

        SUBS    Rtmp,RNDexp,Rarith
        BLS     Urd_Big

; Denormalise the number to have this unbiased exponent and return.

        Denorm  OP1mhi,OP1mlo,Rarith,Rtmp,Rtmp2,Rtmp
  IF Interworking :LOR: Thumbing
        BX      LR
  ELSE
        MOV     PC,LR
  ENDIF

Urd_Big

; We just need to return the number itself, with rounding bits equal to
; zero.

        MOV     RNDexp,Rarith
        MOV     Rarith,#0
  IF Interworking :LOR: Thumbing
        BX      LR
  ELSE
        MOV     PC,LR
  ENDIF

        ]                               ; Conditional assembly of Urd

;===========================================================================

        [ FPEWanted :LOR: FPASCWanted

; Routine to do a RND instruction on an internal format floating point
; number. There are the usual two entry points.
;   This routine will not work correctly with inputs which are unnormalised
; URD results, or with invalid internal format numbers.
;
; Uses standard monadic operation entry and exit conventions - see top of
; this file.

        [ FPEWanted
RndFPE
        ]

        [ FPASCWanted
RndFPASC
        ]

        CDebug3 3,"RndFPASC/FPE: operand =",OP1sue,OP1mhi,OP1mlo

; Start by splitting between common and uncommon operands.

        TST     OP1sue,#Uncommon_bit
        BNE     Rnd_Uncommon

Rnd_Common

; The operand is common. Split OP1sue into sign and biased exponent.

        AND     RNDexp,OP1sue,#ToExp_mask
        AND     OP1sue,OP1sue,#Sign_bit

; If the number is a zero, we're done.

        TST     OP1mhi,#EIUnits_bit
        BEQ     Rnd_Exact

Rnd_Numeric

; Find the position of the real binary point.

        MOVNE   Rarith,#((EIExp_bias+63):AND:&FF)
        ORR     Rarith,Rarith,#((EIExp_bias+63):AND:&FF00)
        ASSERT  (EIExp_bias + 63) < &10000

        SUBS    Rtmp,Rarith,RNDexp
        BLE     Rnd_Exact

; The rounding position for an integer - i.e. the real binary point - is now
; Rtmp bits above the bottom of the mantissa. Split according to whether
; this puts the round bit in the low word of the mantissa, the high word of
; the mantissa or above the high word of the mantissa.

        RSBS    Rtmp2,Rtmp,#32
        BLT     Rnd_AboveLowWord

Rnd_LowWord

; Branch out if rounding is exact.

        MOVS    Rtmp,OP1mlo,LSL Rtmp2
        BEQ     Rnd_Exact

; We now know we want to round down if we're rounding to zero, or if we're
; rounding to minus infinity and the number is positive, or if we're
; rounding to plus infinity and the number is negative.

        MOVS    Rtmp,OP1sue,LSL #32-Sign_pos
        TSTCS   Rins,#1:SHL:RM_pos
        TSTCC   Rins,#1:SHL:(RM_pos+1)
        ASSERT  RM_pos < 7              ;So that constants don't disturb C
        BNE     Rnd_LowWord_RoundDown

; If we're not rounding to nearest, we must now be rounding up.

        TST     Rins,#RM_mask
        BNE     Rnd_LowWord_RoundUp
        ASSERT  RM_Nearest = 0

; We're rounding to nearest. Produce the round and sticky bits, then work
; out which way we're rounding.

        ADD     Rtmp,Rtmp2,#1
        MOVS    Rtmp,OP1mlo,LSL Rtmp    ;C<-round, Z<-NOT(sticky)
        BNE     Rnd_LowWord_GotDir      ;Branch if not halfway case

        MOVS    Rtmp,OP1mhi,LSR #1      ;C<-least significant bit, from
        MOVS    Rtmp,OP1mlo,LSL Rtmp2   ; low word unless Rtmp2 is 0.

Rnd_LowWord_GotDir

        BCS     Rnd_LowWord_RoundUp

Rnd_LowWord_RoundDown

        RSB     Rtmp2,Rtmp2,#32         ;Clear all bits below rounding
        MOV     OP1mlo,OP1mlo,LSR Rtmp2 ; boundary
        MOV     OP1mlo,OP1mlo,LSL Rtmp2
        MOV     Rarith,#&40000000       ;And set round=0, sticky=1
  IF Interworking :LOR: Thumbing
        BX      LR
  ELSE
        MOV     PC,LR
  ENDIF

Rnd_LowWord_RoundUp

        RSB     Rtmp2,Rtmp2,#32         ;Set all bits below rounding
        MVN     OP1mlo,OP1mlo,LSR Rtmp2 ; boundary
        MVN     OP1mlo,OP1mlo,LSL Rtmp2
        MOV     Rarith,#&C0000000       ;And set round=1, sticky=1
  IF Interworking :LOR: Thumbing
        BX      LR
  ELSE
        MOV     PC,LR
  ENDIF

Rnd_AboveLowWord

        RSBS    Rtmp2,Rtmp,#64
        BLT     Rnd_AboveMantissa

Rnd_HighWord

; Branch out if rounding is exact.

        ORRS    Rtmp,OP1mlo,OP1mhi,LSL Rtmp2
        BEQ     Rnd_Exact

; We now know we want to round down if we're rounding to zero, or if we're
; rounding to minus infinity and the number is positive, or if we're
; rounding to plus infinity and the number is negative.

        MOVS    Rtmp,OP1sue,LSL #32-Sign_pos
        TSTCS   Rins,#1:SHL:RM_pos
        TSTCC   Rins,#1:SHL:(RM_pos+1)
        ASSERT  RM_pos < 7              ;So that constants don't disturb C
        BNE     Rnd_HighWord_RoundDown

; If we're not rounding to nearest, we must now be rounding up.

        TST     Rins,#RM_mask
        BNE     Rnd_HighWord_RoundUp
        ASSERT  RM_Nearest = 0

; We're rounding to nearest. Produce the round and sticky bits, then work
; out which way we're rounding.

        ADD     Rtmp,Rtmp2,#1
        ORRS    Rtmp,OP1mlo,OP1mhi,LSL Rtmp ;C<-round, Z<-NOT(sticky)
        BNE     Rnd_HighWord_GotDir     ;Branch if not halfway case

        CMP     Rtmp2,#1                ;C<-least significant bit, from
        MOVCSS  Rtmp,OP1mhi,LSL Rtmp2   ; high word unless Rtmp2 is 0.

Rnd_HighWord_GotDir

        BCS     Rnd_HighWord_RoundUp

Rnd_HighWord_RoundDown

        RSB     Rtmp2,Rtmp2,#32         ;Clear all bits below rounding
        MOV     OP1mhi,OP1mhi,LSR Rtmp2 ; boundary
        MOVS    OP1mhi,OP1mhi,LSL Rtmp2
        MOV     OP1mlo,#0
        MOVEQ   RNDexp,#0               ;Exponent must change for 0 result
        MOV     Rarith,#&40000000       ;And set round=0, sticky=1
  IF Interworking :LOR: Thumbing
        BX      LR
  ELSE
        MOV     PC,LR
  ENDIF

Rnd_HighWord_RoundUp

        RSB     Rtmp2,Rtmp2,#32         ;Set all bits below rounding
        MVN     OP1mhi,OP1mhi,LSR Rtmp2 ; boundary
        MVN     OP1mhi,OP1mhi,LSL Rtmp2
        MOV     OP1mlo,#&FFFFFFFF
        MOV     Rarith,#&C0000000       ;And set round=1, sticky=1
  IF Interworking :LOR: Thumbing
        BX      LR
  ELSE
        MOV     PC,LR
  ENDIF

Rnd_AboveMantissa

; The rounding cannot possibly be exact - we must either be rounding down to
; zero or up to one. Furthermore, we know that the round bit is 0 and the
; sticky bit is 1. So we can only be rounding up if we're rounding to plus
; or minus infinity, and the result must be of the correct sign as well.

        EOR     Rtmp,OP1sue,Rins,LSL #31-RM_pos   ;Somewhat tricky code to
        EOR     Rtmp2,OP1sue,Rins,LSL #30-RM_pos  ; establish the above
        BICS    Rtmp,Rtmp,Rtmp2
        BMI     Rnd_UpToOne

Rnd_DownToZero

        MOV     OP1mhi,#0
        MOV     OP1mlo,#0
        MOV     RNDexp,#0
        MOV     Rarith,#&40000000
  IF Interworking :LOR: Thumbing
        BX      LR
  ELSE
        MOV     PC,LR
  ENDIF

Rnd_UpToOne

        MOV     OP1mhi,#&FFFFFFFF
        MOV     OP1mlo,#&FFFFFFFF
        MOV     RNDexp,#(EIExp_bias-1):AND:&FF00
        ORR     RNDexp,RNDexp,#(EIExp_bias-1):AND:&FF
        ASSERT  (EIExp_bias-1) < &10000
        MOV     Rarith,#&C0000000
  IF Interworking :LOR: Thumbing
        BX      LR
  ELSE
        MOV     PC,LR
  ENDIF

Rnd_Exact

; We just need to return the number itself, with rounding bits equal to
; zero.

        MOV     Rarith,#0
  IF Interworking :LOR: Thumbing
        BX      LR
  ELSE
        MOV     PC,LR
  ENDIF

        ]                               ; Conditional assembly of Rnd

;===========================================================================

        [ :DEF: compare_s :LOR: FPEWanted :LOR: FPASCWanted

; Routine to compare two internal format floating point numbers. It has two
; entry points: "CompareFPE", which has an optimised fast track for common
; vs. common comparisons, and "CompareFPASC", which avoids the test for this
; optimised fast track - since it should never happen. The second entry
; point lies a long way down in the source to avoid addressing constraints.
;   This routine will not work correctly with inputs which are unnormalised
; URD results, or with invalid internal format numbers.
; Entry: OP1sue = First operand sign, uncommon, exponent;
;        OP1mhi = First operand mantissa, high word;
;        OP1mlo = First operand mantissa, low word;
;        OP2sue = Second operand sign, uncommon, exponent;
;        OP2mhi = Second operand mantissa, high word;
;        OP2mlo = Second operand mantissa, low word;
;        Rfpsr  = FPSR;
;        Rins   = instruction (needed to discriminate between
;                 CMF/CMFE/CNF/CNFE and for traps);
;        Rwp, Rfp, Rsp hold their usual values;
;        R14    = return link.
; Exit:  Rarith = result NZCV in bits 31:28; other bits zero;
;        OP1sue, OP1mhi, OP1mlo, OP2sue, OP2mhi, OP2mlo, Rtmp, Rtmp2 and R14
;          may be corrupt.
;        Rfpsr may be updated.
;        All other registers preserved.

        [ FPEWanted :LOR: FPLibWanted

CompareFPE

        [ FPLibWanted
__fp_compare
        ]

        CDebug3 3,"CompareFPE: op1 =",OP1sue,OP1mhi,OP1mlo
        CDebug3 3,"            op2 =",OP2sue,OP2mhi,OP2mlo

; Start by detecting the "fast track" case of both operands being common.

        TST     OP1sue,#Uncommon_bit
        TSTEQ   OP2sue,#Uncommon_bit
        BNE     Compare_Uncommon

        ]

Compare_Common

; Start by changing the sign of the second operand if the operation is
; CMF(E). (CNF(E) is easier than CMF(E), basically because addition is
; commutative and subtraction isn't.)

        [ FPEWanted :LOR: FPASCWanted
        TST     Rins,#CompNeg_bit
        EOREQ   OP2sue,OP2sue,#Sign_bit
        |
        EOR     OP2sue,OP2sue,#Sign_bit
        ]

; Both operands are common. We start with a magnitude comparison - life is
; fairly easy if (as is likely) it comes out not equal. In this case, the
; results are:
;
;   Magnitude    Operand 1   Operand 2  |  Result for
;   comparison     sign        sign     |    CNF(E)
;   ------------------------------------+------------
;       >           +           X       |      >
;       >           -           X       |      <
;       <           X           +       |      >
;       <           X           -       |      <

        ExpComp Rtmp,OP1sue,OP2sue,Rtmp2 ;Rtmp := left-aligned op1 exp.
        CMPEQ   OP1mhi,OP2mhi
        CMPEQ   OP1mlo,OP2mlo
        BEQ     Compare_EqualMag
        TEQCS   OP1sue,#0               ;NB does not affect C
        TEQCC   OP2sue,#0
        ASSERT  Sign_pos = 31
        MOVPL   Rarith,#Comp_GT
        MOVMI   Rarith,#Comp_LT
  IF Interworking :LOR: Thumbing
        BX      LR
  ELSE
        MOV     PC,LR
  ENDIF

Compare_EqualMag

; If the operands are equal magnitude, then if they're both zero, the
; results is equality. Otherwise, the result is given by the following
; table:
;
;   Operand 1   Operand 2  |  Result for
;     sign        sign     |    CNF(E)
;   -----------------------+------------
;      +           +       |      >
;      +           -       |      =
;      -           +       |      =
;      -           -       |      <
;
; Of course, since they're equal magnitude, they're both zero if the first
; one is. Note Rtmp still contains a left-aligned operand 1 exponent.

        EORS    Rtmp2,OP1sue,OP2sue     ;Are signs opposite or the same?
        ASSERT  Sign_pos = 31
        MOV     Rarith,#Comp_EQ         ;Result if signs opposite
  IF Interworking :LOR: Thumbing
        BXMI    LR
  ELSE
        MOVMI   PC,LR
  ENDIF
        ORR     Rtmp,Rtmp,OP1mhi        ;Otherwise, are they both zero?
        ORRS    Rtmp,Rtmp,OP1mlo
  IF Interworking :LOR: Thumbing
        BXEQ    LR
  ELSE
        MOVEQ   PC,LR
  ENDIF
        TST     OP1sue,#Sign_bit
        MOVEQ   Rarith,#Comp_GT
        MOVNE   Rarith,#Comp_LT
  IF Interworking :LOR: Thumbing
        BX      LR
  ELSE
        MOV     PC,LR
  ENDIF

        ]                               ; Conditional assembly of Compare

;===========================================================================

        [ FPEWanted :LOR: FPASCWanted :LOR: :DEF: fix_s :LOR: :DEF: fixu_s

; Routine to FIX an internal format floating point number. There are the
; usual two entry points.
;   This routine will not work correctly with inputs which are unnormalised
; URD results, or with invalid internal format numbers.
; Entry: OP1sue = Operand sign, uncommon, exponent;
;        OP1mhi = Operand mantissa, high word;
;        OP1mlo = Operand mantissa, low word;
;        Rfpsr  = FPSR;
;        Rins   = instruction (needed for rounding information and traps);
;        Rwp, Rfp, Rsp hold their usual values;
;        R14    = return link.
; Exit:  Rarith = result value;
;        OP1sue, OP1mhi, OP1mlo, OP2sue, OP2mhi, OP2mlo, Rtmp, Rtmp2 and R14
;          may be corrupt.
;        Rfpsr may be updated.
;        All other registers preserved.

        [ FPEWanted

FixFPE

        CDebug3 3,"FixFPE: operand =",OP1sue,OP1mhi,OP1mlo

; Start by splitting between common and uncommon operands.

        TST     OP1sue,#Uncommon_bit
        BNE     Fix_Uncommon

        ]

        [ :DEF: fix_s
__fp_fix_common
        ]
        [ :DEF: fixu_s
__fp_fixu_common
        ]

Fix_Common

; The operand is common. Split OP1sue into sign and biased exponent.

        AND     Rarith,OP1sue,#ToExp_mask
        [ :LNOT: :DEF: fixu_s
        AND     OP1sue,OP1sue,#Sign_bit
        ]

Fix_Numeric

; Calculate shift amount to denormalise the number to have effective
; unbiased exponent 63 - i.e. to put the true binary point at the rounding
; boundary.

        STMFD   Rsp!,{LR}       ;There may be a subroutine call below

        MOV     RNDexp,#((EIExp_bias+63):AND:&FF00)
        ORR     RNDexp,RNDexp,#((EIExp_bias+63):AND:&FF)
        ASSERT  (EIExp_bias+63) <= &FFFF
        SUBS    Rtmp,RNDexp,Rarith
        BLS     Fix_OutOfRange  ;Deal with massively out of range values

; Now denormalise the number to have this unbiased exponent.

        Denorm  OP1mhi,OP1mlo,Rarith,Rtmp,Rtmp2,Rtmp

; Next, we need to round the result to extended precision.

        [ FPEWanted :LOR: FPASCWanted

        AND     RNDprm,Rins,#RM_mask
        ORR     RNDprm,RNDprm,#2:SHL:(RM_pos+2)
        MOV     RNDdir,#0               ;Result has not been rounded so far
        BL      RoundNum_Extended

        |

; Expanded out rounding code

        MOVS    Rtmp,Rarith,LSL #1      ;C<-round, Z<-"tied case"
        BCC     Fix_NoRounding          ;Skip all rounding code...
        MOVEQS  Rtmp,OP1mlo,LSR #1      ; If "tied" C<-round
        ADDCSS  OP1mlo,OP1mlo,#1        ;Increment low word
        ADDCSS  OP1mlo,OP1mlo,#1        ;If carry out, increment high word
        MOVCS   OP1mhi,#EIUnits_bit     ;If mantissa overflow, adjust
        ADDCS   RNDexp,RNDexp,#1        ; mantissa and exponent

Fix_NoRounding

        ]

        [ :LNOT: :DEF: fixu_s

; Produce the potential result, checking for an out-of-range value.
;   We know at this point that (OP1mhi,OP1mlo) contains the unsigned integer
; result, which is in the range 0 to 2^63, *both ends included*, and that
; OP1sue contains the sign of the result. We first need to apply the sign to
; this value - this is done by some slightly tricky code to avoid branches.
;   Note we cannot tell the difference between a result of +2^63 and -2^63
; after this. This doesn't matter, though - they're both well out of range!

        MOVS    Rtmp,OP1sue,LSL #32-Sign_pos    ;CS if -ve, CC if +ve
        MVNCS   OP1mhi,OP1mhi                   ;If -ve, 1's compl't high
        RSBCSS  OP1mlo,OP1mlo,#0                ; word, 2's compl't low word
        ADDCS   OP1mhi,OP1mhi,#1                ; and do carry if needed

        ]

; The result is now in (OP1mhi,OP1mlo). Check for it being out of range -
; i.e. for its top 33 bits not being all identical.

        TEQ     OP1mhi,OP1mlo,ASR #31
        BNE     Fix_OutOfRange

        [ FPEWanted :LOR: FPASCWanted

        MOV     Rarith,OP1mlo

; The only remaining exception that could occur at this point is an inexact
; result.
;   If the result is exact, we don't want to do anything about the inexact
; exception. If it's inexact and the inexact trap is disabled, we want to
; set the inexact cumulative bit in the FPSR. If it's inexact and the
; inexact trap is enabled, we want to call the trap. We use some tricky
; code to distinguish the three cases in-line.
        
        CMP     RNDdir,#0       ;Leaves CS/EQ if exact, NE if inexact
        MOVNES  Rtmp,Rfpsr,LSR #IXE_pos+1
                                ;Now CS/EQ if exact, CS/NE if inexact &
                                ; trap enabled, CC/NE if inexact & trap
        ASSERT  SysID_FPA <> 0  ; disabled (since SysID non-zero & not
        ASSERT  SysID_FPE <> 0  ; shifted out)
        ASSERT  SysID_pos > IXE_pos
        ORRCC   Rfpsr,Rfpsr,#IXC_bit
        BLHI    InexactTrapForI ;Works because HI = CS/NE

        |

        MOV     OP1sue,#0       ;Signal no error

        ]

  IF Interworking :LOR: Thumbing
        LDMFD   Rsp!,{LR}
        BX      LR
  ELSE
        LDMFD   Rsp!,{PC}
  ENDIF

Fix_OutOfRange

; An out of range FIX produces an invalid operation, with a potential result
; of &7FFFFFFF or &80000000, depending on the sign of the operand.
        
        [ FPEWanted :LOR: FPASCWanted

        LDMFD   Rsp!,{LR}
        MOV     Rarith,#:NOT:TopBit             ;Make &7FFFFFFF
        EOR     Rarith,Rarith,OP1sue,ASR #31    ;Convert to &80000000 if -ve
        MOV     Rtmp,#InvReas_FixRange
        B       InvalidOperation1ForI

        |

        ORR     OP1sue,OP1sue,#IVO_bits
  IF Interworking :LOR: Thumbing
        LDMFD   Rsp!,{LR}
        BX      LR
  ELSE
        LDMFD   Rsp!,{PC}
  ENDIF

        ]

        ]                               ; Conditional assembly of Fix

;===========================================================================

        [ :DEF: addsub_s :LOR: FPEWanted :LOR: FPASCWanted

; The second entry point to the addition/subtraction routine, meant for use
; by the FPASC and without a fast track for common operands.
;   The value returned is either a numeric value plus associated rounding
; information, with the uncommon bit clear, or an infinity or NaN, with the
; uncommon bit set.
;   This routine will not work correctly with inputs which are unnormalised
; URD results, or with invalid internal format numbers.
;
; Uses standard dyadic operation entry and exit conventions - see top of
; this file.

        [ FPASCWanted

AddSubFPASC

        CDebug3 3,"AddSubFPASC: op1 =",OP1sue,OP1mhi,OP1mlo
        CDebug3 3,"             op2 =",OP2sue,OP2mhi,OP2mlo

        ]

        [ FPLibWanted
__fp_addsub_uncommon
        ]

AddSub_Uncommon

; We have to do a full addition/subtraction, since either or both of the
; operands may be uncommon. What we will do is:
;
;   (a) Check for NaNs. If found, produce an invalid operation exception and
;       suitable NaN result.
;
;   (b) Check for infinities. If found, the infinity effectively becomes the
;       result, unless both operands are infinities and (after taking
;       account of whether an addition or subtraction is involved) they are
;       effectively of opposite signs.
;
;   (c) If no NaNs or infinities, adjust the operands by replacing all
;       effectively unnormalised numbers by the corresponding normalised or
;       extended denormalised number. Then call AddSub_Common, which will
;       work correctly on zeros, normalised numbers and extended
;       denormalised numbers.
;
; So the first thing we do is check for NaNs and infinities - if we find
; one, we'll generate the result by special case code. Note that we check
; for them together, since they have similar bit patterns.

        TNaNInf Rtmp2,OP2sue,OP2mhi           ;Rtmp2[31] := (op2 is NaN/inf)
        TNaNInf Rtmp,OP1sue,OP1mhi            ;Rtmp[31] := (op1 is NaN/inf)
        BMI     AddSub_NaNInf1
        TST     Rtmp2,#TopBit                   ;Operand 2 NaN or infinity?
        BNE     AddSub_NaNInf2Only

; Now we know there are no NaNs or infinities and therefore no Invalid
; Operation or Divide-By-Zero exceptions - which means we no longer need to
; keep track of exactly what the operands are. Next, we will convert the
; remaining types of numbers to zeros, normalised numbers and extended
; denormalised numbers, which can be dealt with by a call to AddSub_Common
; and one to NormaliseOp1.
;   The types of numbers that require converting are extended unnormalised
; numbers and zeros, and single and double denormalised numbers. In the case
; of the extended unnormalised numbers and zeros, this just requires us to
; normalise them; in the case of the single and double denormalised numbers,
; we need to clear their units bits and add 1 to their exponents before we
; normalise them.
;   At this stage, we can recognise that the numbers are single or double
; denormalised numbers simply by the fact that they have uncommon = units =
; 1: all other numbers with this property are NaNs or infinities and have
; been dealt with already.

        STMFD   Rsp!,{LR}       ;We will have subroutine calls below

        ANDS    Rarith,OP1mhi,OP1sue,LSL #EIUnits_pos-Uncommon_pos
        ASSERT  EIUnits_pos = 31
        BICMI   OP1mhi,OP1mhi,#EIUnits_bit
        ADDMI   OP1sue,OP1sue,#1:SHL:EIExp_pos
        ANDS    Rarith,OP2mhi,OP2sue,LSL #EIUnits_pos-Uncommon_pos
        ASSERT  EIUnits_pos = 31
        BICMI   OP2mhi,OP2mhi,#EIUnits_bit
        ADDMI   OP2sue,OP2sue,#1:SHL:EIExp_pos

; Now we need to normalise all these types of numbers, which now means all
; uncommon numbers except those with exponent 0 (which are extended
; precision denormalised numbers and should be left alone).

        TST     OP1sue,#Uncommon_bit
        Exp2Top Rarith,OP1sue,NE,S      ;Complete test & set up for call
        BLNE    $NormDenormOp1_str
        TST     OP2sue,#Uncommon_bit
        Exp2Top Rarith,OP2sue,NE,S      ;Complete test & set up for call
        BLNE    $NormDenormOp2_str

; Call AddSub_Common to do the addition, then normalise the result if it
; isn't already normalised and isn't zero. (This is necessary because e.g. a
; magnitude sum of two denormalised numbers will only have been shifted 1
; bit by AddSub_Common.)

        BL      AddSub_Common
        TST     OP1mhi,#EIUnits_bit
  IF Interworking :LOR: Thumbing
        LDMNEFD Rsp!,{LR}
        BXNE    LR
  ELSE
        LDMNEFD Rsp!,{PC}
  ENDIF
        ORRS    LR,OP1mhi,OP1mlo
        BLNE    $NormaliseOp1_str
  IF Interworking :LOR: Thumbing
        LDMFD   Rsp!,{LR}
        BX      LR
  ELSE
        LDMFD   Rsp!,{PC}
  ENDIF

AddSub_NaNInf1

; The first operand is a NaN or infinity, the second may be (the top bit of
; Rtmp2 indicates whether it is).

        TST     Rtmp2,#TopBit
        BEQ     AddSub_NaNInf1Only

; Both operands are NaNs or infinities. If both operands are infinities, the
; result is an infinity with their shared sign if they have the same effective
; sign, or an invalid operation if they have opposite effective signs
; ("effective" means after taking ADF/SUF/RSF distinctions into account).
;   If either operand is a NaN, the standard exception/NaN propagation rules
; apply.

        ORR     Rtmp,OP1mlo,OP1mhi,LSL #1       ;Test if both are infinities
        ORR     Rtmp,Rtmp,OP2mlo
        ORRS    Rtmp,Rtmp,OP2mhi,LSL #1
        BNE     $ConvertNaNs_str                ;If not, use shared code
        BiShift EOR,Rtmp,OP2sue,Rins,LSR #SubNotAdd_pos,LSL #Sign_pos
        EORS    Rtmp,Rtmp,OP1sue                ;Check whether signs are
        ASSERT  Sign_pos = 31                   ; effectively same.
        ANDPL   Rtmp,OP1sue,#Sign_bit           ;If so, result is infinity
        BPL     AddSub_InfShared                ; (with op1 sign unless RSF)

        [ FPEWanted :LOR: FPASCWanted

        MOV     Rtmp,#InvReas_MagSubInf         ;If not, it's an invalid
        B       InvalidOperation2ForSDE         ; operation

        |

        ORR     OP1sue,OP1sue,#IVO_bits
  IF Interworking :LOR: Thumbing
        BX      LR
  ELSE
        MOV     PC,LR
  ENDIF

        ]

AddSub_NaNInf1Only

; The first operand is a NaN or infinity, the second isn't. The result is:
;   * an invalid operation exception if the first operand is a signalling
;     NaN;
;   * the first operand unchanged if it is a quiet NaN;
;   * the standard infinity if the first operand is an infinity, with its
;     sign determined by that of the first operand and whether the
;     instruction is RSF.

        ORRS    Rtmp,OP1mlo,OP1mhi,LSL #1 ;Is operand a NaN?
        BNE     $ConvertNaN1Of2_str     ;Use standard exception/quiet NaN
                                        ; propagation code if so
        AND     Rtmp,OP1sue,#Sign_bit   ;Make standard infinity with right
        B       AddSub_InfShared        ; sign

AddSub_NaNInf2Only

; The first operand is not a NaN or infinity, the second is. The result is:
;   * an invalid operation exception if the second operand is a signalling
;     NaN;
;   * the second operand unchanged if it is a quiet NaN;
;   * the standard infinity if the second operand is an infinity, with its
;     sign determined by that of the second operand and whether the
;     instruction is SUF.

        ORRS    Rtmp,OP2mlo,OP2mhi,LSL #1 ;Is operand a NaN?
        BNE     $ConvertNaN2Of2_str     ;Use standard exception/quiet NaN
                                        ; propagation code if so
        AND     Rtmp,OP2sue,#Sign_bit   ;Make standard infinity with right
        TST     Rins,#SubNotAdd_bit     ; sign
        EORNE   Rtmp,Rtmp,#Sign_bit
AddSub_InfShared
        TST     Rins,#RSF_bit
        EORNE   Rtmp,Rtmp,#Sign_bit
        [ CoreDebugging = 0
          ADR     OP1sue,Prototype_Infinity
        |
          ADRL    OP1sue,Prototype_Infinity
        ]
        LDMIA   OP1sue,OP1regs
        ORR     OP1sue,OP1sue,Rtmp
  IF Interworking :LOR: Thumbing
        BX      LR
  ELSE
        MOV     PC,LR
  ENDIF

        ]                               ; Conditional assembly of AddSub

;===========================================================================

        [ :DEF: mul_s :LOR: FPEWanted :LOR: FPASCWanted

; The second entry point to the normal/fast multiplication routine, meant
; for use by the FPASC and without a fast track for common operands.
;   The value returned is either a numeric value plus associated rounding
; information, with the uncommon bit clear, or an infinity or NaN, with the
; uncommon bit set.
;   This routine will not work correctly with inputs which are unnormalised
; URD results, or with invalid internal format numbers.
;
; Uses standard dyadic operation entry and exit conventions - see top of
; this file.

        [ FPASCWanted

MultFPASC

        CDebug3 3,"MultFPASC: op1 =",OP1sue,OP1mhi,OP1mlo
        CDebug3 3,"           op2 =",OP2sue,OP2mhi,OP2mlo

        ]

        [ FPLibWanted
__fp_mult_uncommon
        ]

Mult_Uncommon

; We have to do a full multiplication, since either or both of the operands
; may be uncommon. What we will do is:
;
;   (a) Check for NaNs. If found, produce an invalid operation exception and
;       suitable NaN result.
;
;   (b) Check for infinities. If found, the result is an infinity with sign
;       equal to the exclusive-OR of the two operand signs, unless the other
;       operand is a zero, in which case we have an invalid operation.
;
;   (c) Check for zeros. If found, the result is a zero with sign equal to
;       the exclusive-OR of the two operand signs.
;
;   (d) If no NaNs, infinities or zeros, we can transform the problem into
;       that of multiplying together two normalised numbers, though the
;       normalised numbers concerned may have unusual exponents.
;
; So the first thing we do is check for NaNs and infinities - if we find
; one, we'll generate the result by special case code. Note that we check
; for them together, since they have similar bit patterns.

        TNaNInf Rtmp2,OP2sue,OP2mhi           ;Rtmp2[31] := (op2 is NaN/inf)
        TNaNInf Rtmp,OP1sue,OP1mhi            ;Rtmp[31] := (op1 is NaN/inf)
        BMI     Mult_NaNInf1
        TST     Rtmp2,#TopBit                   ;Operand 2 NaN or infinity?
        BNE     Mult_NaNInf2Only

; Now if either operand is a zero, the result is zero. We can detect zeros
; by the mantissa being all zero, since only zeros, some unnormalised URD
; results, extended unnormalised zeros and extended infinities have this
; property, we're assuming the operands are not URD results and we've
; already dealt with extended infinities.

        ORRS    Rtmp,OP1mhi,OP1mlo
        ORRNES  Rtmp,OP2mhi,OP2mlo
        BEQ     Mult_Zero

; Both operands are now normalised numbers, denormalised numbers or extended
; unnormalised non-zero numbers. The first step is to convert all of these
; to normalised numbers, possibly with a negative biased exponent. After
; doing the exponent and sign calculations, we then call Mult_Mantissas to
; complete the calculation.
;   The types of numbers that require converting are extended unnormalised
; numbers and denormalised numbers of all precisions. In the case of the
; extended denormalised and unnormalised numbers, this just requires us to
; normalise them; in the case of the single and double denormalised numbers,
; we need to clear their units bits and add 1 to their exponents before we
; normalise them.
;   At this stage, we can recognise that the numbers are single or double
; denormalised numbers simply by the fact that they have uncommon = units =
; 1: all other numbers with this property are NaNs or infinities and have
; been dealt with already.

        ANDS    Rarith,OP1mhi,OP1sue,LSL #EIUnits_pos-Uncommon_pos
        ASSERT  EIUnits_pos = 31
        BICMI   OP1mhi,OP1mhi,#EIUnits_bit
        ADDMI   OP1sue,OP1sue,#1:SHL:EIExp_pos
        ANDS    Rarith,OP2mhi,OP2sue,LSL #EIUnits_pos-Uncommon_pos
        ASSERT  EIUnits_pos = 31
        BICMI   OP2mhi,OP2mhi,#EIUnits_bit
        ADDMI   OP2sue,OP2sue,#1:SHL:EIExp_pos

        AND     Rtmp,OP1sue,#ToExp_mask
        AND     Rtmp2,OP2sue,#ToExp_mask
        EOR     OP1sue,OP1sue,OP2sue    ;Produce result sign
        AND     OP1sue,OP1sue,#Sign_bit
        ADD     RNDexp,Rtmp,Rtmp2
        SUB     RNDexp,RNDexp,#(EIExp_bias-1):AND:&FF00
        SUB     RNDexp,RNDexp,#(EIExp_bias-1):AND:&FF
        ASSERT  (EIExp_bias-1) < &10000 ;Result exponent if mantissa
                                        ; overflow is exp1+exp2-bias+1

        STMFD   Rsp!,{LR}       ;We will have subroutine calls below

        TST     OP1mhi,#EIUnits_bit
        BLEQ    $NormaliseOp1_str
        TST     OP2mhi,#EIUnits_bit
        BLEQ    $NormaliseOp2_str

        LDMFD   Rsp!,{LR}
        B       Mult_Mantissas

Mult_Zero

; The result is zero.

        EOR     OP1sue,OP1sue,OP2sue    ;Get sign right
        AND     OP1sue,OP1sue,#Sign_bit
        MOV     OP1mhi,#0
        MOV     OP1mlo,#0
        MOV     RNDexp,#0               ;And exponent
        MOV     Rarith,#0               ;And round/sticky bits
  IF Interworking :LOR: Thumbing
        BX      LR
  ELSE
        MOV     PC,LR
  ENDIF

Mult_NaNInf1

; The first operand is a NaN or infinity, the second may be (the top bit of
; Rtmp2 indicates whether it is).

        TST     Rtmp2,#TopBit
        BEQ     Mult_NaNInf1Only

; Both operands are NaNs or infinities. If both operands are infinities, the
; result is an infinity with sign determined by those of the two operands.
;   If either operand is a NaN, the standard exception/NaN propagation rules
; apply.

        ORR     Rtmp,OP1mlo,OP1mhi,LSL #1       ;Test if both are infinities
        ORR     Rtmp,Rtmp,OP2mlo
        ORRS    Rtmp,Rtmp,OP2mhi,LSL #1
        BNE     $ConvertNaNs_str                ;If not, use shared code
Mult_InfShared
        EOR     Rtmp,OP1sue,OP2sue              ;If so, result is infinity
        AND     Rtmp,Rtmp,#Sign_bit             ; with correct sign
        ADR     OP1sue,Prototype_Infinity
        LDMIA   OP1sue,OP1regs
        ORR     OP1sue,OP1sue,Rtmp
  IF Interworking :LOR: Thumbing
        BX      LR
  ELSE
        MOV     PC,LR
  ENDIF

Mult_NaNInf1Only

; The first operand is a NaN or infinity, the second isn't. The result is:
;   * an invalid operation exception if the first operand is a signalling
;     NaN;
;   * the first operand unchanged if it is a quiet NaN;
;   * an invalid operation exception if the first operand is an infinity and
;     the second is a zero;
;   * the standard infinity if the first operand is an infinity and the
;     second operand is not a zero, with its sign determined by those of the
;     two operands.
; Note that we can detect the second operand being zero by its mantissa
; being all zero, since only zeros, some unnormalised URD results, extended
; unnormalised zeros and extended infinities have this property, we're
; assuming the operands are not URD results and we know the second operand
; isn't an extended infinity.

        ORRS    Rtmp,OP1mlo,OP1mhi,LSL #1 ;Is first operand a NaN?
        BNE     $ConvertNaN1Of2_str     ;Use standard exception/quiet NaN
                                        ; propagation code if so
        ORRS    Rtmp,OP2mhi,OP2mlo      ;Is second operand a zero?
        BNE     Mult_InfShared          ;If not, result is an infinity

        [ FPEWanted :LOR: FPASCWanted

        MOV     Rtmp,#InvReas_InfTimes0 ;Otherwise, an invalid operation
        B       InvalidOperation2ForSDE

        |

        ORR     OP1sue,OP1sue,#IVO_bits
  IF Interworking :LOR: Thumbing
        BX      LR
  ELSE
        MOV     PC,LR
  ENDIF

        ]

Mult_NaNInf2Only

; The first operand is not a NaN or infinity, the second is. The result is:
;   * an invalid operation exception if the second operand is a signalling
;     NaN;
;   * the second operand unchanged if it is a quiet NaN;
;   * an invalid operation exception if the first operand is a zero and the
;     second is an infinity;
;   * the standard infinity if the first operand is not a zero and the second
;     operand is an infinity, with its sign determined by those of the two
;     operands.
; Note that we can detect the first operand being zero by its mantissa being
; all zero, since only zeros, some unnormalised URD results, extended
; unnormalised zeros and extended infinities have this property, we're
; assuming the operands are not URD results and we know it isn't an extended
; infinity.

        ORRS    Rtmp,OP2mlo,OP2mhi,LSL #1 ;Is second operand a NaN?
        BNE     $ConvertNaN2Of2_str     ;Use standard exception/quiet NaN
                                        ; propagation code if so
        ORRS    Rtmp,OP1mhi,OP1mlo      ;Is first operand a zero?
        BNE     Mult_InfShared          ;If not, result is an infinity

        [ FPEWanted :LOR: FPASCWanted

        MOV     Rtmp,#InvReas_0TimesInf ;Otherwise, an invalid operation
        B       InvalidOperation2ForSDE

        |

        ORR     OP1sue,OP1sue,#IVO_bits
  IF Interworking :LOR: Thumbing
        BX      LR
  ELSE
        MOV     PC,LR
  ENDIF

        ]

        ]

;===========================================================================

        [ :DEF: div_s :LOR: FPEWanted :LOR: FPASCWanted

; The second entry point to the normal/fast division/reverse division
; routine, meant for use by the FPASC and without a fast track for common
; operands.
;   The value returned is either a numeric value plus associated rounding
; information, with the uncommon bit clear, or an infinity or NaN, with the
; uncommon bit set.
;   This routine will not work correctly with inputs which are unnormalised
; URD results, or with invalid internal format numbers.
;
; Uses standard dyadic operation entry and exit conventions - see top of
; this file.

        [ FPASCWanted

DivFPASC

        CDebug3 3,"DivFPASC: op1 =",OP1sue,OP1mhi,OP1mlo
        CDebug3 3,"          op2 =",OP2sue,OP2mhi,OP2mlo

        ]

        [ FPLibWanted
__fp_div_uncommon
__fp_rdv_uncommon
        ]

Div_Uncommon

; We have to do a full division, since either or both of the operands may be
; uncommon. What we will do is:
;
;   (a) Check for NaNs. If found, produce an invalid operation exception and
;       suitable NaN result.
;
;   (b) Check for infinities. If found, the result is:
;         * An invalid operation exception if both operands are infinities;
;         * An infinite result if the dividend is an infinity and the
;           divisor is numeric;
;         * A zero result if the dividend is numeric and the divisor is an
;           infinity;
;
;   (c) Check for zeros. If found, the result is:
;         * An invalid operation exception if both operands are zeros;
;         * A divide-by-zero exception if the dividend is non-zero and the
;           divisor is zero;
;         * A zero if the dividend is zero and the divisor is non-zero.
;
;   (d) If no NaNs, infinities or zeros, we can transform the problem into
;       that of dividing a normalised number by another, though the
;       normalised numbers concerned may have unusual exponents.
;
; So the first thing we do is check for NaNs and infinities - if we find
; one, we'll generate the result by special case code. Note that we check
; for them together, since they have similar bit patterns.

        TNaNInf Rtmp2,OP2sue,OP2mhi           ;Rtmp2[31] := (op2 is NaN/inf)
        TNaNInf Rtmp,OP1sue,OP1mhi            ;Rtmp[31] := (op1 is NaN/inf)
        BMI     Div_NaNInf1
        TST     Rtmp2,#TopBit                   ;Operand 2 NaN or infinity?
        BNE     Div_NaNInf2Only

; Now if either operand is a zero, we need to take special action. We can
; detect zeros by the mantissa being all zero, since only zeros, some
; unnormalised URD results, extended unnormalised zeros and extended
; infinities have this property, we're assuming the operands are not URD
; results and we've already dealt with extended infinities.

        [ FPEWanted :LOR: FPASCWanted

        ORRS    Rtmp,OP1mhi,OP1mlo
        ORRNES  Rtmp,OP2mhi,OP2mlo
        BEQ     Div_Zero

; Both operands are now going to be converted to normalised numbers. We now
; know that we are not going to need to know the operands for trap purposes,
; so we can swap them if this is a normal (rather than reverse) division.

        TST     Rins,#RevDiv_bit
        |
        TST     Rins,#Reverse
        ]
        BNE     Div_Uncommon_Swapped

        MOV     Rtmp,OP1sue
        MOV     OP1sue,OP2sue
        MOV     OP2sue,Rtmp
        MOV     Rtmp,OP1mhi
        MOV     OP1mhi,OP2mhi
        MOV     OP2mhi,Rtmp
        MOV     Rtmp,OP1mlo
        MOV     OP1mlo,OP2mlo
        MOV     OP2mlo,Rtmp

Div_Uncommon_Swapped

; Both operands are now normalised numbers, denormalised numbers or extended
; unnormalised non-zero numbers. The first step is to convert all of these
; to normalised numbers, possibly with a negative biased exponent. After
; doing the exponent and sign calculations, we then call Div_Mantissas to
; complete the calculation.
;   The types of numbers that require converting are extended unnormalised
; numbers and denormalised numbers of all precisions. In the case of the
; extended denormalised and unnormalised numbers, this just requires us to
; normalise them; in the case of the single and double denormalised numbers,
; we need to clear their units bits and add 1 to their exponents before we
; normalise them.
;   At this stage, we can recognise that the numbers are single or double
; denormalised numbers simply by the fact that they have uncommon = units =
; 1: all other numbers with this property are NaNs or infinities and have
; been dealt with already.

        ANDS    Rarith,OP1mhi,OP1sue,LSL #EIUnits_pos-Uncommon_pos
        ASSERT  EIUnits_pos = 31
        BICMI   OP1mhi,OP1mhi,#EIUnits_bit
        ADDMI   OP1sue,OP1sue,#1:SHL:EIExp_pos
        ANDS    Rarith,OP2mhi,OP2sue,LSL #EIUnits_pos-Uncommon_pos
        ASSERT  EIUnits_pos = 31
        BICMI   OP2mhi,OP2mhi,#EIUnits_bit
        ADDMI   OP2sue,OP2sue,#1:SHL:EIExp_pos

        AND     Rtmp,OP1sue,#ToExp_mask
        AND     Rtmp2,OP2sue,#ToExp_mask
        EOR     OP1sue,OP1sue,OP2sue    ;Produce result sign
        AND     OP1sue,OP1sue,#Sign_bit
        SUB     RNDexp,Rtmp2,Rtmp
        ADD     RNDexp,RNDexp,#EIExp_bias:AND:&FF00
        ADD     RNDexp,RNDexp,#EIExp_bias:AND:&FF
        ASSERT  EIExp_bias < &10000     ;Result exponent if no mantissa
                                        ; underflow is exp1-exp2+bias

        STMFD   Rsp!,{LR}       ;We will have subroutine calls below

        TST     OP1mhi,#EIUnits_bit
        BLEQ    $NormaliseOp1Neg_str
        TST     OP2mhi,#EIUnits_bit
        BLEQ    $NormaliseOp2_str

        LDMFD   Rsp!,{LR}
        B       Div_Mantissas

        [ FPEWanted :LOR: FPASCWanted

Div_Zero

; One or both operands are zeros, and both are numeric values (i.e. not NaNs
; or infinities). The result is:
;   * An invalid operation exception if both operands are zeros;
;   * A divide-by-zero exception if the dividend is non-zero and the divisor
;     is zero;
;   * A zero if the dividend is zero and the divisor is non-zero.
;
; Split according to whether this is a normal or reverse division.

        MOV     Rtmp,#InvReas_0Div0     ;The only type of invalid operation
                                        ; that occurs below
        TST     Rins,#RevDiv_bit
        BNE     Div_Zero_Reversed

; It's a normal division - check the three cases above.

        ORRS    Rtmp2,OP1mhi,OP1mlo     ;Check dividend
        BNE     DivideByZero2
        ORRS    Rtmp2,OP2mhi,OP2mlo     ;Check divisor
        BEQ     InvalidOperation2ForSDE

Div_ZeroByX

; The result is zero.

        EOR     OP1sue,OP1sue,OP2sue    ;Get sign right
        AND     OP1sue,OP1sue,#Sign_bit ;Uncommon bit is zero
        MOV     OP1mhi,#0               ;So is mantissa
        MOV     OP1mlo,#0
        MOV     RNDexp,#0               ;And exponent
        MOV     Rarith,#0               ;And round/sticky bits
  IF Interworking :LOR: Thumbing
        BX      LR
  ELSE
        MOV     PC,LR
  ENDIF

Div_Zero_Reversed

; It's a reverse division - check the three cases above.

        ORRS    Rtmp2,OP1mhi,OP1mlo     ;Check divisor
        BNE     Div_ZeroByX
        ORRS    Rtmp2,OP2mhi,OP2mlo     ;Check dividend
        BNE     DivideByZero2
        B       InvalidOperation2ForSDE

        ]

Div_NaNInf1

; The first operand is a NaN or infinity, the second may be (the top bit of
; Rtmp2 indicates whether it is).

        TST     Rtmp2,#TopBit
        BEQ     Div_NaNInf1Only

; Both operands are NaNs or infinities. If both operands are infinities, the
; result is an invalid operation.
;   If either operand is a NaN, the standard exception/NaN propagation rules
; apply.

        ORR     Rtmp,OP1mlo,OP1mhi,LSL #1       ;Test if both are infinities
        ORR     Rtmp,Rtmp,OP2mlo
        ORRS    Rtmp,Rtmp,OP2mhi,LSL #1
        BNE     $ConvertNaNs_str                ;If not, use shared code

        [ FPEWanted :LOR: FPASCWanted

        MOV     Rtmp,#InvReas_InfDivInf
        B       InvalidOperation2ForSDE

        |

        ORR     OP1sue,OP1sue,#IVO_bits
  IF Interworking :LOR: Thumbing
        BX      LR
  ELSE
        MOV     PC,LR
  ENDIF

        ]

Div_NaNInf1Only

; The first operand is a NaN or infinity, the second isn't. The result is:
;   * an invalid operation exception if the first operand is a signalling
;     NaN;
;   * the first operand unchanged if it is a quiet NaN;
;   * a standard infinity with sign equal to the exclusive-OR of the two
;     operand signs if the first operand is an infinity and the instruction
;     is a normal division;
;   * a zero with sign equal to the exclusive-OR of the two operand signs if
;     the first operand is an infinity and the instruction is a reverse
;     division.

        ORRS    Rtmp,OP1mlo,OP1mhi,LSL #1 ;Is first operand a NaN?
        BNE     $ConvertNaN1Of2_str     ;Use standard exception/quiet NaN
                                        ; propagation code if so
        EOR     Rtmp,OP1sue,OP2sue
        AND     Rtmp,Rtmp,#Sign_bit
        [ FPASCWanted :LOR: FPEWanted
        TST     Rins,#RevDiv_bit
        |
        TST     Rins,#Reverse
        ]
        ADREQ   OP1sue,Prototype_Infinity
        ADRNE   OP1sue,Prototype_Zero
        LDMIA   OP1sue,OP1regs
        ORR     OP1sue,OP1sue,Rtmp
        MOV     RNDexp,#0               ;These two are only needed when
        MOV     Rarith,#0               ; result is zero
  IF Interworking :LOR: Thumbing
        BX      LR
  ELSE
        MOV     PC,LR
  ENDIF

Div_NaNInf2Only

; The first operand is not a NaN or infinity, the second is. The result is:
;   * an invalid operation exception if the second operand is a signalling
;     NaN;
;   * the second operand unchanged if it is a quiet NaN;
;   * a standard infinity with sign equal to the exclusive-OR of the two
;     operand signs if the first operand is an infinity and the instruction
;     is a reverse division;
;   * a zero with sign equal to the exclusive-OR of the two operand signs if
;     the first operand is an infinity and the instruction is a normal
;     division.

        ORRS    Rtmp,OP2mlo,OP2mhi,LSL #1 ;Is second operand a NaN?
        BNE     $ConvertNaN2Of2_str     ;Use standard exception/quiet NaN
                                        ; propagation code if so

        EOR     Rtmp,OP1sue,OP2sue
        AND     Rtmp,Rtmp,#Sign_bit
        [ FPEWanted :LOR: FPASCWanted
        TST     Rins,#RevDiv_bit
        |
        TST     Rins,#Reverse
        ]
        ADRNE   OP1sue,Prototype_Infinity
        ADREQ   OP1sue,Prototype_Zero
        LDMIA   OP1sue,OP1regs
        ORR     OP1sue,OP1sue,Rtmp
        MOV     RNDexp,#0               ;These two are only needed when
        MOV     Rarith,#0               ; result is zero
  IF Interworking :LOR: Thumbing
        BX      LR
  ELSE
        MOV     PC,LR
  ENDIF

        ]

;===========================================================================

        [ :DEF: fmod_s :LOR: FPEWanted :LOR: FPASCWanted

; The second part of the IEEE remainder function.

Rem_Uncommon

; One or both of the operands may be uncommon. What we will do is:
;
;   (a) Check for NaNs. If found, produce an invalid operation exception and
;       suitable NaN result.
;
;   (b) Check for infinities. If found, the result is:
;         * An invalid operation exception if the first operand is an
;           infinity.
;         * Equal to the first operand if the second operand is an infinity
;           and the first isn't.
;
;   (c) Check for zeros. If found, the result is:
;         * An invalid operation exception if the second operand is a zero;
;         * Equal to the first operand if the first operand is a zero and
;           the second isn't;
;
;   (d) If no NaNs, infinities or zeros, we can transform the problem into
;       that of doing the remainder of one normalised number by another,
;       though the normalised numbers concerned may have unusual exponents.
;
; So the first thing we do is check for NaNs and infinities - if we find
; one, we'll generate the result by special case code. Note that we check
; for them together, since they have similar bit patterns.

        TNaNInf Rtmp2,OP2sue,OP2mhi           ;Rtmp2[31] := (op2 is NaN/inf)
        TNaNInf Rtmp,OP1sue,OP1mhi            ;Rtmp[31] := (op1 is NaN/inf)
        BMI     Rem_NaNInf1
        TST     Rtmp2,#TopBit                   ;Operand 2 NaN or infinity?
        BNE     Rem_NaNInf2Only

; Now if the second operand is a zero, we've got an invalid operation, and
; if it isn't but the first operand is, we've got a result equal to the
; first operand. We can detect zeros by the mantissa being all zero, since
; only zeros, some unnormalised URD results, extended unnormalised zeros and
; extended infinities have this property, we're assuming the operands are
; not URD results and we've already dealt with extended infinities.

        ORRS    Rtmp,OP2mhi,OP2mlo

        [ FPEWanted :LOR: FPASCWanted

        MOVEQ   Rtmp,#InvReas_XRem0
        BEQ     InvalidOperation2ForSDE

        |

        ORREQ   OP1sue,OP1sue,#IVO_bits
  IF Interworking :LOR: Thumbing
        BXEQ    LR
  ELSE
        MOVEQ   PC,LR
  ENDIF

        ]

        ORRS    Rarith,OP1mhi,OP1mlo
        BEQ     Rem_FirstOperand_Zero

; Both operands may now be forced to be normalised numbers - after we've
; dealt with signs and exponents, we can rejoin the main code.
;   The types of numbers that require converting are extended unnormalised
; numbers and denormalised numbers of all precisions. In the case of the
; extended denormalised and unnormalised numbers, this just requires us to
; normalise them; in the case of the single and double denormalised numbers,
; we need to clear their units bits and add 1 to their exponents before we
; normalise them.
;   At this stage, we can recognise that the numbers are single or double
; denormalised numbers simply by the fact that they have uncommon = units =
; 1: all other numbers with this property are NaNs or infinities and have
; been dealt with already.

        ANDS    Rarith,OP1mhi,OP1sue,LSL #EIUnits_pos-Uncommon_pos
        ASSERT  EIUnits_pos = 31
        BICMI   OP1mhi,OP1mhi,#EIUnits_bit
        ADDMI   OP1sue,OP1sue,#1:SHL:EIExp_pos
        ANDS    Rarith,OP2mhi,OP2sue,LSL #EIUnits_pos-Uncommon_pos
        ASSERT  EIUnits_pos = 31
        BICMI   OP2mhi,OP2mhi,#EIUnits_bit
        ADDMI   OP2sue,OP2sue,#1:SHL:EIExp_pos

        STMFD   Rsp!,{LR}       ;We will have subroutine calls below

        AND     RNDexp,OP2sue,#ToExp_mask       ;Raw second operand exponent
        TST     OP2mhi,#EIUnits_bit             ;Normalise second operand,
        BLEQ    $NormaliseOp2_str               ; then adjust to get
        SUB     Rtmp2,RNDexp,#1                 ; prospective result exp.

        AND     RNDexp,OP1sue,#ToExp_mask       ;Raw first operand exponent
        TST     OP2mhi,#EIUnits_bit             ;Normalise first operand
        BLEQ    $NormaliseOp1_str               ; then determine the number
        SUBS    Rarith,RNDexp,Rtmp2             ; of iterations - 1
        MOV     RNDexp,Rtmp2                    ;Get prospective result exp.
                                                ; back where it's wanted

; All the special exponent handling is done, so we might as well rejoin the
; main code.

        B       Rem_ExponentsDone

Rem_NaNInf1

; The first operand is a NaN or infinity, the second may be (the top bit of
; Rtmp2 indicates whether it is).

        TST     Rtmp2,#TopBit
        BEQ     Rem_NaNInf1Only

; Both operands are NaNs or infinities. If both operands are infinities, the
; result is an invalid operation.
;   If either operand is a NaN, the standard exception/NaN propagation rules
; apply.

        ORR     Rtmp,OP1mlo,OP1mhi,LSL #1       ;Test if both are infinities
        ORR     Rtmp,Rtmp,OP2mlo
        ORRS    Rtmp,Rtmp,OP2mhi,LSL #1
        BNE     $ConvertNaNs_str                ;If not, use shared code
        
        [ FPEWanted :LOR: FPASCWanted

        MOV     Rtmp,#InvReas_InfRemX
        B       InvalidOperation2ForSDE

        |

        ORR     OP1sue,OP1sue,#IVO_bits
  IF Interworking :LOR: Thumbing
        BX      LR
  ELSE
        MOV     PC,LR
  ENDIF

        ]

Rem_NaNInf1Only

; The first operand is a NaN or infinity, the second isn't. The result is:
;   * an invalid operation exception if the first operand is a signalling
;     NaN;
;   * the first operand unchanged if it is a quiet NaN;
;   * an invalid operation if it is an infinity.

        ORRS    Rtmp,OP1mlo,OP1mhi,LSL #1 ;Is first operand a NaN?
        BNE     $ConvertNaN1Of2_str     ;Use standard exception/quiet NaN
                                        ; propagation code if so
        
        [ FPEWanted :LOR: FPASCWanted

        MOV     Rtmp,#InvReas_InfRemX
        B       InvalidOperation2ForSDE

        |

        ORR     OP1sue,OP1sue,#IVO_bits
  IF Interworking :LOR: Thumbing
        BX      LR
  ELSE
        MOV     PC,LR
  ENDIF

        ]

Rem_NaNInf2Only

; The first operand is not a NaN or infinity, the second is. The result is:
;   * an invalid operation exception if the second operand is a signalling
;     NaN;
;   * the second operand unchanged if it is a quiet NaN;
;   * equal to the first operand if the second operand is an infinity.

        ORRS    Rtmp,OP2mlo,OP2mhi,LSL #1 ;Is second operand a NaN?
        BNE     $ConvertNaN2Of2_str     ;Use standard exception/quiet NaN
                                        ; propagation code if so

Rem_FirstOperand

; If the first operand is common, life is easy.

        TST     OP1sue,#Uncommon_bit
        ANDEQ   RNDexp,OP1sue,#ToExp_mask
        ANDEQ   OP1sue,OP1sue,#Sign_bit
        MOVEQ   Rarith,#0
  IF Interworking :LOR: Thumbing
        BXEQ    LR
  ELSE
        MOVEQ   PC,LR
  ENDIF

; If it's uncommon, life is trickier. First check for zeros.

        ORRS    Rarith,OP1mhi,OP1mlo
        BEQ     Rem_FirstOperand_Zero

; The operand is now a denormalised number or extended unnormalised non-zero
; number; it needs conversion to an internal precision number. In the case
; of the extended denormalised and unnormalised numbers, this just requires
; us to normalise them; in the case of the single and double denormalised
; numbers, we need to clear their units bits and add 1 to their exponents
; before we normalise them.
;
; At this stage, we can recognise that the numbers are single or double
; denormalised numbers simply by the fact that they have a units bit of 1:
; all other uncommon numbers with this property are NaNs or infinities and
; have been dealt with already.

        AND     RNDexp,OP1sue,#ToExp_mask ;Extract operand exponent
        AND     OP1sue,OP1sue,#Sign_bit   ; and its sign

        TST     OP1mhi,#EIUnits_bit
        BICNE   OP1mhi,OP1mhi,#EIUnits_bit
        ADDNE   RNDexp,RNDexp,#1

        MOV     Rarith,#0               ;Result is exact.
        B       $NormaliseOp1_str       ;NB must be necessary, so no
                                        ; point in checking whether
                                        ; normalised

Rem_FirstOperand_Zero

        AND     OP1sue,OP1sue,#Sign_bit
        MOV     RNDexp,#0               ;We already know OP1mhi, OP1mlo and
  IF Interworking :LOR: Thumbing
        BX      LR
  ELSE
        MOV     PC,LR                   ; Rarith are zero
  ENDIF

        ]

;===========================================================================

Prototype_Zero
        DCD     &00000000,&00000000,&00000000

Prototype_Infinity
        DCD     &40007FFF,&00000000,&00000000

;===========================================================================

        [ :DEF: sqrt_s :LOR: FPEWanted :LOR: FPASCWanted

; The second part of the square root routine, which deals with uncommon
; operands.

        [ FPLibWanted
__fp_sqrt_uncommon
        ]

Sqrt_Uncommon

; We have to deal with the square root of an uncommon value. The cases are:
;
;   * The square root of a signalling NaN is an invalid operation;
;
;   * The square root of a quiet NaN is the NaN itself;
;
;   * The square root of plus infinity is plus infinity;
;
;   * The square root of minus infinity is an invalid operation;
;
;   * The square root of an extended unnormalised zero is a zero of the same
;     sign;
;
;   * The square roots of denormalised numbers and extended unnormalised
;     numbers can be determined by transforming them into normalised numbers
;     (possibly with an out-of-range exponent), then using the standard
;     square root code above.
;
; So the first thing we do is check for NaNs and infinities - if we find
; one, we'll generate the result by special case code. Note that we check
; for them together, since they have similar bit patterns.

        TNaNInf Rtmp,OP1sue,OP1mhi            ;Rtmp[31] := (op is NaN/inf)
        BMI     Sqrt_NaNInf

; Now if the operand is a zero, the result is a zero of the same sign. We
; can detect zeros by the mantissa being all zero, since only zeros, some
; unnormalised URD results, extended unnormalised zeros and extended
; infinities have this property, we're assuming the operand is not a URD
; result and we've already dealt with extended infinities.

        ORRS    Rtmp,OP1mhi,OP1mlo
        ANDEQ   OP1sue,OP1sue,#Sign_bit
        BEQ     Sqrt_Zero

; The operand is now a denormalised number or extended unnormalised non-zero
; number. If it is negative, we've got an invalid operation. Otherwise, we
; know that no invalid operation or divide-by-zero exception is going to
; occur, so we can convert it to a normalised number, possibly with a
; negative biased exponent. After doing the exponent and sign calculations,
; we then call Sqrt_Mantissa to complete the calculation.
;   The types of numbers that require converting are extended unnormalised
; numbers and denormalised numbers of all precisions. In the case of the
; extended denormalised and unnormalised numbers, this just requires us to
; normalise them; in the case of the single and double denormalised numbers,
; we need to clear their units bits and add 1 to their exponents before we
; normalise them.
;   At this stage, we can recognise that the numbers are single or double
; denormalised numbers simply by the fact that they have a units bit of 1:
; all other numbers with this property are NaNs or infinities and have
; been dealt with already.

        AND     RNDexp,OP1sue,#ToExp_mask ;Extract operand exponent

        ANDS    OP1sue,OP1sue,#Sign_bit
        
        [ FPEWanted :LOR: FPASCWanted

        MOVNE   Rtmp,#InvReas_SqrtNeg
        BNE     InvalidOperation1ForSDE

        |

        ORRNE   OP1sue,OP1sue,#IVO_bits
  IF Interworking :LOR: Thumbing
        BX      LR
  ELSE
        MOV     PC,LR
  ENDIF

        ]

        STMFD   Rsp!,{LR}       ;We will have subroutine calls below

        TST     OP1mhi,#EIUnits_bit
        BICNE   OP1mhi,OP1mhi,#EIUnits_bit
        ADDNE   RNDexp,RNDexp,#1

        BL      $NormaliseOp1_str       ;NB must be necessary, so no
                                        ; point in checking whether
                                        ; normalised

        ADD     RNDexp,RNDexp,#EIExp_bias:AND:&FF00
        ADD     RNDexp,RNDexp,#EIExp_bias:AND:&FF
        ASSERT  (EIExp_bias-1) < &10000 ;Result exponent if mantissa
                                        ; overflow is (exp+bias) DIV 2
        MOVS    RNDexp,RNDexp,LSR #1

        LDMFD   Rsp!,{LR}
        B       Sqrt_Mantissa

Sqrt_Zero

; The result is equal to the operand, which is a zero.

        MOV     RNDexp,#0               ;Clear exponent
        MOV     Rarith,#0               ;And round/sticky bits
  IF Interworking :LOR: Thumbing
        BX      LR
  ELSE
        MOV     PC,LR
  ENDIF

Sqrt_NaNInf

; The operand is a NaN or infinity. If it's a NaN, we use the standard
; rules for propagating NaNs. If an infinity, we've got an invalid operation
; if it is negative and a result equal to the standard plus infinity if it
; is positive.

        ORRS    Rtmp,OP1mlo,OP1mhi,LSL #1 ;Is operand a NaN?
        BNE     $ConvertNaN1_str             ;Use standard exception/quiet NaN
                                        ; propagation code if so
        TST     OP1sue,#Sign_bit

        [ FPEWanted :LOR: FPASCWanted

        MOVNE   Rtmp,#InvReas_SqrtNeg
        BNE     InvalidOperation1ForSDE

        ADR     OP1sue,Prototype_Infinity
        LDMIA   OP1sue,OP1regs

        |

        ORRNE   OP1sue,OP1sue,#IVO_bits
        ADREQ   OP1sue,Prototype_Infinity
        LDMEQIA OP1sue,OP1regs

        ]

  IF Interworking :LOR: Thumbing
        BX      LR
  ELSE
        MOV     PC,LR
  ENDIF

        ]

;===========================================================================

        [ FPEWanted :LOR: FPASCWanted

; The second entry point to the move/move negated/absolute value routine,
; meant for use by the FPASC.
;   This routine will not work correctly with an input which is an
; unnormalised URD result, or an invalid internal format number.
;
; Uses standard monadic operation entry and exit conventions - see top of
; this file.

        [ FPASCWanted

MoveFPASC

        CDebug3 3,"MoveFPASC: operand =",OP1sue,OP1mhi,OP1mlo

; The FPA does not bounce common values in the Prepare stage for these
; instructions, so no need to check the uncommon bit.

        ]

Move_Uncommon

; Only uncommon values will get here. First split out NaNs and infinities.

        TNaNInf Rtmp,OP1sue,OP1mhi            ;Rtmp[31] := (op is NaN/inf)
        BMI     Move_NaNInf

; The value is an uncommon numeric value - i.e. a denormalised number, an
; extended unnormalised number or an extended unnormalised zero. If it's the
; last of these, change it to a real zero and treat it as a numeric.

        ORRS    Rtmp,OP1mhi,OP1mlo
        MOVEQ   RNDexp,#0
        BEQ     Move_Numeric

; The operand is now a denormalised number or extended unnormalised non-zero
; number. We will change it into the corresponding normalised number
; (possibly with a negative biased exponent), then treat it as a numeric.
;   The types of numbers that require converting are extended unnormalised
; numbers and denormalised numbers of all precisions. In the case of the
; extended denormalised and unnormalised numbers, this just requires us to
; normalise them; in the case of the single and double denormalised numbers,
; we need to clear their units bits and add 1 to their exponents before we
; normalise them.
;   At this stage, we can recognise that the numbers are single or double
; denormalised numbers simply by the fact that they have uncommon = units =
; 1: all other numbers with this property are NaNs or infinities and have
; been dealt with already.

        AND     RNDexp,OP1sue,#ToExp_mask
        ASSERT  EIExp_pos = 0

        STMFD   Rsp!,{LR}       ;We will have subroutine calls below

        ANDS    Rarith,OP1mhi,OP1sue,LSL #EIUnits_pos-Uncommon_pos
        ASSERT  EIUnits_pos = 31
        BICMI   OP1mhi,OP1mhi,#EIUnits_bit
        ADDMI   RNDexp,RNDexp,#1

        BL      $NormaliseOp1_str       ;NB must be necessary, so no
                                        ; point in checking whether
                                        ; normalised

        LDMFD   Rsp!,{LR}
        B       Move_Numeric

Move_NaNInf

; The operand is a NaN or infinity. If it's an infinity, we just want to
; perform the standard sign manipulations on it and return a standard
; infinity. If it's a NaN, we need to pay attention to the implicit IEEE
; format conversion.

        ORRS    Rtmp,OP1mlo,OP1mhi,LSL #1 ;Is operand a NaN?
        BNE     Move_NaN
        AND     Rtmp,OP1sue,#Sign_bit   ;Isolate sign
        TST     Rins,#MNF_bit           ;Do sign manipulations
        EORNE   Rtmp,Rtmp,#Sign_bit
        TST     Rins,#ABS_bit
        BICNE   Rtmp,Rtmp,#Sign_bit
        ADR     OP1sue,Prototype_Infinity
        LDMIA   OP1sue,OP1regs
        ORR     OP1sue,OP1sue,Rtmp
  IF Interworking :LOR: Thumbing
        BX      LR
  ELSE
        MOV     PC,LR
  ENDIF

Move_NaN

        STMFD   Rsp!,{LR}
        BL      NaNConversionNeeded
        TEQ     Rarith,#0               ;Conversion needed?
        BMI     Move_NaN_DoSigns        ;Just alter signs if not
        BL      ConvertNaN1_Special     ;Do correct NaN conversion
  IF Interworking :LOR: Thumbing
        LDMNEFD Rsp!,{LR}               ;We're done and must *not* alter
                                        ; signs if an invalid operation trap
                                        ; occurred
        BXNE    LR
  ELSE
        LDMNEFD Rsp!,{PC}               ;We're done and must *not* alter
                                        ; signs if an invalid operation trap
                                        ; occurred
  ENDIF

Move_NaN_DoSigns

; Do the sign manipulations and return.

        TST     Rins,#MNF_bit
        EORNE   OP1sue,OP1sue,#Sign_bit
        TST     Rins,#ABS_bit
        BICNE   OP1sue,OP1sue,#Sign_bit
  IF Interworking :LOR: Thumbing
        LDMFD   Rsp!,{LR}
        BX      LR
  ELSE
        LDMFD   Rsp!,{PC}
  ENDIF

        ]

;===========================================================================

        [ FPEWanted :LOR: FPASCWanted

; The second entry point to the NRM routine, intended for use by the FPASC.
;
; Uses standard monadic operation entry and exit conventions - see top of
; this file.

        [ FPASCWanted

NormFPASC

        CDebug3 3,"NormFPASC: operand =",OP1sue,OP1mhi,OP1mlo

; The FPA does not bounce common values in the Prepare stage for these
; instructions, so no need to check the uncommon bit.

        ]

Norm_Uncommon

; Only uncommon values will get here. First split out all but NaNs and
; infinities.

        TNaNInf Rtmp,OP1sue,OP1mhi            ;Rtmp[31] := (op is NaN/inf)
        ANDPL   RNDexp,OP1sue,#ToExp_mask
        BPL     Norm_ZeroUnnormOrDenorm

NormUrd_NaNInf

; The operand is a NaN or infinity. If it's an infinity, we just want to
; return a standard infinity. If it's a NaN, we use the standard NaN
; propagation code.

        ORRS    Rtmp,OP1mlo,OP1mhi,LSL #1 ;Check for NaNs
        BNE     $ConvertNaN1_str
        AND     Rtmp,OP1sue,#Sign_bit   ;Isolate sign
        ADR     OP1sue,Prototype_Infinity
        LDMIA   OP1sue,OP1regs
        ORR     OP1sue,OP1sue,Rtmp
  IF Interworking :LOR: Thumbing
        BX      LR
  ELSE
        MOV     PC,LR
  ENDIF

        ]

;===========================================================================

        [ FPEWanted :LOR: FPASCWanted

; The second entry point to the URD routine, meant for use by the FPASC and
; optimised for uncommon operands.
;   This routine will not work correctly with inputs which are unnormalised
; URD results, or with invalid internal format numbers.
;
; Uses standard monadic operation entry and exit conventions - see top of
; this file.

        [ FPASCWanted

UrdFPASC

        CDebug3 3,"UrdFPASC: operand =",OP1sue,OP1mhi,OP1mlo

; The FPA does not bounce common values in the Prepare stage for these
; instructions, so no need to check the uncommon bit.

        ]

Urd_Uncommon

; Split out NaNs and infinities, which are dealt with in exactly the same
; way as by the NRM instruction.

        TNaNInf Rtmp,OP1sue,OP1mhi            ;Rtmp[31] := (op1 is NaN/inf)
        BMI     NormUrd_NaNInf

; The operand is now known to be a denormalised number or an extended
; precision unnormalised number or zero. We have to take a little care about
; single and double precision denormalised numbers, since their exponents
; and mantissas need correcting. Otherwise, we can just use the standard
; Urd_Numeric routine on them once we have separated the sign and the
; exponent from each other. We can recognise the single and double
; denormalised numbers by the fact that they are the only remaining cases
; with a units bit of 1.

        AND     Rarith,OP1sue,#ToExp_mask       ;Extract operand exponent
        AND     OP1sue,OP1sue,#Sign_bit         ; and sign

        TST     OP1mhi,#EIUnits_bit
        BICNE   OP1mhi,OP1mhi,#EIUnits_bit
        ADDNE   Rarith,Rarith,#1

        B       Urd_Numeric

        ]

;===========================================================================

        [ FPEWanted :LOR: FPASCWanted

; The second part of the RND routine, which deals with uncommon operands.

Rnd_Uncommon

; Split out NaNs and infinities, which are dealt with in exactly the same
; way as by the NRM instruction.

        TNaNInf Rtmp,OP1sue,OP1mhi            ;Rtmp[31] := (op1 is NaN/inf)
        BMI     NormUrd_NaNInf

; The value is an uncommon numeric value - i.e. a denormalised number, an
; extended unnormalised number or an extended unnormalised zero. If it's the
; last of these, change it to a real zero and treat it as a numeric.

        ORRS    RNDexp,OP1mhi,OP1mlo
        ANDEQ   OP1sue,OP1sue,#Sign_bit
        BEQ     Rnd_Exact

; The operand is now a denormalised number or extended unnormalised non-zero
; number. We will change it into the corresponding normalised number
; (possibly with a negative biased exponent), then treat it as a numeric.
;   The types of numbers that require converting are extended unnormalised
; numbers and denormalised numbers of all precisions. In the case of the
; extended denormalised and unnormalised numbers, this just requires us to
; normalise them; in the case of the single and double denormalised numbers,
; we need to clear their units bits and add 1 to their exponents before we
; normalise them.
;   At this stage, we can recognise that the numbers are single or double
; denormalised numbers simply by the fact that they have uncommon = units =
; 1: all other numbers with this property are NaNs or infinities and have
; been dealt with already.

        AND     RNDexp,OP1sue,#ToExp_mask
        AND     OP1sue,OP1sue,#Sign_bit
        ASSERT  EIExp_pos = 0

        STMFD   Rsp!,{LR}       ;We will have subroutine calls below

        TST     OP1mhi,#EIUnits_bit
        BICNE   OP1mhi,OP1mhi,#EIUnits_bit
        ADDNE   RNDexp,RNDexp,#1

        BL      $NormaliseOp1_str       ;NB must be necessary, so no
                                        ; point in checking whether
                                        ; normalised

        LDMFD   Rsp!,{LR}
        B       Rnd_Numeric

        ]

;===========================================================================

        [ :DEF: compare_s :LOR: FPEWanted :LOR: FPASCWanted

; The second entry point to the comparison routine, meant for use by the
; FPASC and without a fast track for common operands.
;   This routine will not work correctly with inputs which are unnormalised
; URD results, or with invalid internal format numbers.
;
; Has the same entry and exit conventions as "CompareFPE" above.

        [ FPASCWanted

CompareFPASC

        CDebug3 3,"CompareFPASC: op1 =",OP1sue,OP1mhi,OP1mlo
        CDebug3 3,"              op2 =",OP2sue,OP2mhi,OP2mlo

        ]

Compare_Uncommon

; We have to do a full comparison, since either or both of the operands may
; be uncommon. What we will do is:
;
;   (a) Check for NaNs. If found, produce a trap if appropriate, or a result
;       of "unordered" otherwise.
;
;   (b) If no NaNs, adjust the operands by replacing all infinities by the
;       standard extended infinity, and all effectively unnormalised numbers
;       by the corresponding normalised or denormalised number. Then call
;       Compare_Common, which will work correctly on zeros, denormalised
;       numbers, normalised numbers and extended infinities.
;
; So the first thing we do is check for NaNs. This is done by first testing
; for a NaN or infinity (they have similar bit patterns) by a standard
; technique, then checking whether the fraction is non-zero.

        TNaNInf Rtmp,OP1sue,OP1mhi            ;Rtmp[31] := (op1 is NaN/inf)
        TNaNInf Rtmp2,OP2sue,OP2mhi           ;Rtmp2[31] := (op2 is NaN/inf)
        TST     Rtmp,#TopBit                    ;Operand 1 NaN or infinity?
        ORRNES  Rarith,OP1mlo,OP1mhi,LSL #1     ;If so, is it a NaN?
        BNE     Compare_Unordered
        TST     Rtmp2,#TopBit                   ;Operand 2 NaN or infinity?
        ORRNES  Rarith,OP2mlo,OP2mhi,LSL #1     ;If so, is it a NaN?
        BNE     Compare_Unordered

; Now we know there are no NaNs and therefore no exceptions - which means we
; no longer need to keep track of exactly what the operands are. We are
; going to massage the operands into a form where we can use the
; Compare_Common routine on them - note that it already works for zeros,
; normalised numbers, extended denormalised numbers and normal extended
; precision infinities. The remaining numbers are the other infinities, the
; extended unnormalised numbers and zeros, and the single and double
; precision denormalised numbers.
;   We will first convert all the infinities to a standard extended
; precision infinity, to ensure that they compare equal with each other. Or
; rather, an almost standard one - we will mark the result as common to
; avoid mistaking it for an unnormalised or denormalised number later on.

        STMFD   Rsp!,{LR}           ;We're likely to make subroutine calls

        TST     Rtmp,#TopBit
        ANDNE   OP1sue,OP1sue,#Sign_bit
        ORRNE   OP1sue,OP1sue,#&FF
        ORRNE   OP1sue,OP1sue,#&7F00
        BICNE   OP1mhi,OP1mhi,#EIUnits_bit
        TST     Rtmp2,#TopBit
        ANDNE   OP2sue,OP2sue,#Sign_bit
        ORRNE   OP2sue,OP2sue,#&FF
        ORRNE   OP2sue,OP2sue,#&7F00
        BICNE   OP2mhi,OP2mhi,#EIUnits_bit

; Now we need to deal with the extended unnormalised numbers and zeros, and
; the single and double denormalised numbers. These basically need
; converting to extended precision normalised or denormalised numbers. In
; the case of the extended unnormalised numbers and zeros, this just
; requires us to normalise them; in the case of the single and double
; denormalised numbers, we need to clear their units bits and add 1 to their
; exponents before we normalise them.
;   At this stage, we can recognise that the numbers are single or double
; denormalised numbers simply by the fact that they have uncommon = units =
; 1: all other numbers with this property are NaNs or infinities and have
; been dealt with already.

        ANDS    Rarith,OP1mhi,OP1sue,LSL #EIUnits_pos-Uncommon_pos
        ASSERT  EIUnits_pos = 31
        BICMI   OP1mhi,OP1mhi,#EIUnits_bit
        ADDMI   OP1sue,OP1sue,#1:SHL:EIExp_pos
        ANDS    Rarith,OP2mhi,OP2sue,LSL #EIUnits_pos-Uncommon_pos
        ASSERT  EIUnits_pos = 31
        BICMI   OP2mhi,OP2mhi,#EIUnits_bit
        ADDMI   OP2sue,OP2sue,#1:SHL:EIExp_pos

; Now we need to normalise all these types of numbers, which now means all
; uncommon numbers except those with exponent 0 (which are extended
; precision denormalised numbers and should be left alone).

        TST     OP1sue,#Uncommon_bit
        Exp2Top Rarith,OP1sue,NE,S      ;Complete test & set up for call
        BLNE    $NormDenormOp1_str
        TST     OP2sue,#Uncommon_bit
        Exp2Top Rarith,OP2sue,NE,S      ;Complete test & set up for call
        BLNE    $NormDenormOp2_str

; And now we can compare the results as though they were common numbers.

        LDMFD   Rsp!,{LR}
        B       Compare_Common

Compare_Unordered

; The result is definitely unordered. We need to choose the correct result.

        TST     Rfpsr,#AC_bit
        MOVEQ   Rarith,#Comp_Un_Orig
        MOVNE   Rarith,#Comp_Un_Alt

; Now we need to know whether there's an IEEE exception - there is one if
; either operand is a signalling NaN, or if the instruction is CMFE or CNFE.
; Note that the top bits of Rtmp and Rtmp2 are still NaN/infinity flags for
; the two operands.

        TST     Rtmp,#TopBit                    ;Is operand 1 a NaN?
        ORRNES  Rtmp,OP1mlo,OP1mhi,LSL #1
        BEQ     Compare_Unordered_Op1NotNaN     ;If not, operand 2 must be
        ANDS    Rtmp,OP1mhi,#EIFracTop_bit      ;If so, is it signalling?
        [ FPLibWanted
        MOVEQ   Rarith,#IVO_bits
  IF Interworking :LOR: Thumbing
        BXEQ    LR
  ELSE
        MOVEQ   PC,LR
  ENDIF
        |
        BEQ     InvalidOperation2ForI           ; (invalid operation if so)
        ASSERT  InvReas_SigNaN = 0
        ]

        TST     Rtmp2,#TopBit                   ;Is operand 2 a NaN?
        ORRNES  Rtmp,OP2mlo,OP2mhi,LSL #1
        [ FPEWanted :LOR: FPASCWanted
        BEQ     Compare_Unordered_Op2NotNaN     ;Branch if not
        |
  IF Interworking :LOR: Thumbing
        BXEQ    LR
  ELSE
        MOVEQ   PC,LR
  ENDIF
        ]
Compare_Unordered_Op1NotNaN
        ANDS    Rtmp,OP2mhi,#EIFracTop_bit      ;If so, is it signalling?
        [ FPLibWanted
        MOVEQ   Rarith,#IVO_bits
  IF Interworking :LOR: Thumbing
        BX      LR
  ELSE
        MOV     PC,LR
  ENDIF
        |
        BEQ     InvalidOperation2ForI           ; (invalid operation if so)
        ASSERT  InvReas_SigNaN = 0
        ]

        [ FPEWanted :LOR: FPASCWanted
Compare_Unordered_Op2NotNaN
        TST     Rins,#CompExc_bit               ;Is instruction CMFE/CNFE?
  IF Interworking :LOR: Thumbing
        BXEQ    LR
  ELSE
        MOVEQ   PC,LR                           ;If not, no exception
  ENDIF
        MOV     Rtmp,#InvReas_CompQNaN          ;Otherwise, invalid op
        B       InvalidOperation2ForI
        ]

        ]

;===========================================================================

        [ FPEWanted :LOR: FPASCWanted :LOR: :DEF: fix_s :LOR: :DEF: fixu_s

; The second entry point to the FIX routine, meant for use by the FPASC and
; optimised for uncommon operands.
;   This routine will not work correctly with inputs which are unnormalised
; URD results, or with invalid internal format numbers.
;
; Has the same entry and exit conventions as "FixFPE" above.

        [ FPASCWanted

FixFPASC

        CDebug3 3,"FixFPASC: operand =",OP1sue,OP1mhi,OP1mlo

; Start by splitting between common and uncommon operands.

        TST     OP1sue,#Uncommon_bit
        BEQ     Fix_Common

        ]

        [ :DEF: fix_s
__fp_fix_uncommon
        ]
        [ :DEF: fixu_s
__fp_fixu_uncommon
        ]

Fix_Uncommon

; NaNs and infinities will produce invalid operation exceptions, with the
; precise nature of the exception depending on whether the operand is a
; signalling NaN, a quiet NaN or an infinity.

        TNaNInf Rtmp,OP1sue,OP1mhi            ;Rtmp[31] := (op1 is NaN/inf)
        BMI     Fix_NaNInf

; The operand is now known to be a denormalised number or an extended
; precision unnormalised number or zero. We have to take a little care about
; single and double precision denormalised numbers, since their exponents
; and mantissas need correcting. Otherwise, we can just use the standard
; Fix_Numeric routine on them once we have separated the sign and the
; exponent from each other. We can recognise the single and double
; denormalised numbers by the fact that they are the only remaining cases
; with a units bit of 1.

        AND     Rarith,OP1sue,#ToExp_mask       ;Extract operand exponent
        [ :LNOT: :DEF: fixu_s
        AND     OP1sue,OP1sue,#Sign_bit         ; and sign
        ]

        TST     OP1mhi,#EIUnits_bit
        BICNE   OP1mhi,OP1mhi,#EIUnits_bit
        ADDNE   Rarith,Rarith,#1

        B       Fix_Numeric

Fix_NaNInf

; All of these produce an invalid operation exception, with the reason being
; InvReas_SigNaN for signalling NaNs, InvReas_FixQNaN for quiet NaNs and
; InvReas_FixInf for infinities.

        [ FPEWanted :LOR: FPASCWanted

        TST     OP1mhi,#EIFracTop_bit
        MOVEQ   Rtmp,#InvReas_SigNaN
        MOVNE   Rtmp,#InvReas_FixQNaN
        ORRS    Rarith,OP1mlo,OP1mhi,LSL #1
        MOVEQ   Rtmp,#InvReas_FixInf
        MOV     Rarith,#TopBit                  ;Some sort of integer result
        B       InvalidOperation1ForI

        |

        MOV     OP1sue,#IVO_bits
  IF Interworking :LOR: Thumbing
        BX      LR
  ELSE
        MOV     PC,LR
  ENDIF

        ]

        ]

;===========================================================================

        END