singrdk/base/Kernel/Native/arm/Crt/r_muld.asm

;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;;
;;; Microsoft Research Singularity
;;; 
;;; Copyright (c) Microsoft Corporation.  All rights reserved.
;;;
;;; This file contains ARM-specific assembly code.
;;;

	GBLL mul_s

	GET veneer_d.asm

	END


;;;;  THE BELOW ROUTINE SHOULD WORK, BUT THE ARM ROUTINES SHOULD BE FASTER.


;
; Translated to ARM from SH3 FP emulation routines.
;
; __muld  Double precision floating point multiplication.
; Input:
;   r0 - Arg1.low
;   r1 - Arg1.high
;   r2 - Arg2.low
;   r3 - Arg2.high
; Output:
;   r0 - Result.low
;   r1 - Result.high
;
; Note:
;   If any FP exceptions are enabled, this routine may raise an exception.
;
;
; IEEE DOUBLE FORMAT
;
; 8 BYTES (LONG WORD * 2)
; 63 62       52 51                                                 0
; +-+-----------+----------------------------------------------------+
; |s|   e(11)   |                         m(52)                      |
; +-+-----------+----------------------------------------------------+
;               ^ point
;
; INFINITY NUMBER : e =  2047          m = 0
; ZERO            : e =     0          m = 0
; NaN             : e =  2047          m != 0
; DENORMAL NUMBER : e =     0          m != 0
;
    GET      fpe.asm

    Export   __muld

    IMPORT   FPE_Raise

    AREA |.text|, CODE, READONLY


CARRY_CHECK EQU 0x01000000
MSB         EQU 0x00100000


__muld

    STMFD   sp!, {r0-r9, lr} ; Save off args and non-volatiles and lr

    MOV     r8, r1          ; Load parameter1 as R8 R0
    MOV     r4, r2          ; Load parameter2 as R2 R4
    MOV     r2, r3          ;   ...
    MOV     r5, #_FpMulD    ; Double multiply, assume no exceptions


; Unpack parameters.
;
; R8 R0:   mantissa1            R2 R4:   mantissa2
; R9:      exponent1            R1:      exponent2
; R7:      sign = sign1 XOR sign2
;
; R5:      Exception flags

    MOV     r9, r8, LSL #1     ; Extract exponent1
    MOV     r9, r9, LSR #21    ;   ...
    MOV     r1, r2, LSL #1     ; Extract exponent2
    MOV     r1, r1, LSR #21    ;   ...
    MVN     r3, #0             ; Set up to extract mantissas
    EOR     r7, r8, r2         ; Compute sign of result
    AND     r8, r8, r3, LSR #12; Extract mantissa1
    AND     r2, r2, r3, LSR #12; Extract mantissa2


; Check for exceptional cases.  All NaNs, infinities, and 0's are eliminated.
; Denormal numbers return here after normalizing them.  After these checks,
; both parameters are normalized numbers.
;
; if (exponent1 == 2047)
;     exception1; parameter1 is nonfinite
; if (exponent2 == 2047)
;     exception2; parameter1 is finite, parameter2 is nonfinite
; if (exponent1 ==    0)
;     exception3; parameter1 is 0 or denormal, parameter2 is finite
; if (exponent2 ==    0)
;     exception4; parameter1 is normalized, parameter2 is 0 or denormal

    ADD     r3, r9, #1      ; if (exponent1==2047)
    CMP     r3, #2048       ;   ..
    BEQ     exception1      ;   exception1

    ADD     r3, r1, #1      ; if (exponent2==2047)
    CMP     r3, #2048       ;   ..
    BEQ     exception2      ;   exception2

    CMP     r9, #0          ; if (exponent1==0)
    BEQ     exception3      ;   exception3
exception_return3

    CMP     r1, #0          ; if (exponent2==0)
    BEQ     exception4      ;   exception4
exception_return4

; Multiply the 53-bit mantissa1 and mantissa2 to produce a 106-bit product.
;
; Mantissas:
;
;  63       53   51                 32 31                                0
;  31       21   19                  0 31                                0
; +-----------+-+---------------------+-----------------------------------+
; |<--- 0 --->|1|      m1h, m2h       |             m1l, m2l              |
; +-----------+-+---------------------+-----------------------------------+
;               ^ Binary point
;
; Partial product terms:
;
;                                     m1l*m2l.h       m1l*m2l.l
;                     m1h*m2l.h < C + m1h*m2l.l
;                   + m1l*m2h.h < C + m1l*m2h.l
;   + m1h*m2h.h < C + m1h*m2h.l
;   -----------     -----------     -----------     -----------
;          res3            res2            res1            res0
;
; Intermediate result:
;
;  127                106 104 103   96 95       64 63       32 31        0
;  31                  10 9 8 7      0 31        0 31        0 31        0
; +----------------------+-+-+--------+----/\/----+----/\/----+----/\/----+
; |<-------- 0 --------->|?|?|R3: res3|  R8: res2 |  R0: res1 |  R6: res0 |
; +----------------------+-+-+--------+----/\/----+----/\/----+----/\/----+
;                            ^ Binary point

    ADD     r9, r9, r1      ; Compute exponent of result ...

    UMULL   r6, r1, r0, r4  ; Compute m1l * m2l
                            ;   r6 = m1l*m2l.l, res0
                            ;   r1 = m1l*m2l.h

    ORR     r8, r8, #MSB    ; Set mantissa1's hidden bit
    ORR     r2, r2, #MSB    ; Set mantissa2's hidden bit

    UMULL   r4, r3, r8,r4   ; Compute m1h * m2l
                            ;   r4 = m1h*m2l.l
                            ;   r3 = m1h*m2l.h

    ADDS    r4, r1, r4      ; Add 1st 2 terms of res1

    SUB     r9, r9, #0x400  ;   ... compute exponent of result
    ADD     r9, r9, #0x1    ;   ... compute exponent of result

    UMULL   r0, r1, r2, r0  ; Compute m1l * m2h
                            ;   r0 = m1l*m2h.l
                            ;   r1 = m1l*m2h.h

    ADCS    r1, r1, r3      ; Add 1st 2 terms of res2, no carry out
    ADCS    r0, r0, r4      ; Add 3rd term of res1, no carry in

    UMULL   r8, r3, r2, r8  ; Compute m1h * m2h
                            ;   r8 = m1h*m2h.l
                            ;   r3 = m1h*m2h.h

    ADCS    r8, r8, r1      ; Add 3rd term of res2
    ADC     r3, r3, #0      ; Add res2's carry to res3


; Shift the intermediate result right 17 bits, and 1 more if the product took
; 2 bits to the left of the binary point.  Fold all dropped bits from the right
; into the sticky bit S.  This leaves the result in standardized form for
; rounding.
;
; Result:
;  63     56   54                   32 31                            3 2 0
;  31     24   22                    0 31                            3 2 0
; +---------+-+-----------------------+-----------------------------------+
; |<-- 0 -->|1|         R8            |               R0             L|GRS|
; +---------+-+-----------------------+-----------------------------------+
;             ^ Binary point

normalize
    CMP     r6, #0              ; Fold bits we're about to lose into a
    ORRNE   r0, r0, #1          ;   sticky bit
    MOV     r6, r6, LSR #17     ; Shift intermediate result right 17
    ORR     r6, r6, r0, LSL #15 ;   ..
    MOV     r0, r0, LSR #17     ;   ..
    ORR     r0, r0, r8, LSL #15 ;   ..
    MOV     r8, r8, LSR #17     ;   ..
    ORR     r8, r8, r3, LSL #15 ;   ..
    TST     r8, #CARRY_CHECK    ; If product has 2 bits to the left of the
    BEQ     end_normalize       ;   binary point
    MOVS    r8, r8, LSR #1      ; Then normalize by scaling right 1
    MOVS    r0, r0, RRX         ;   more bit
    MOV     r6, r6, RRX         ;   ..
    ADD     r9, r9, #1          ;   ..
end_normalize

; There are still 17 or 18 guard bits on the left of R6 that need to be folded
; into the sticky bit S.  It's safe to check the right ones over again because
; we're only concerned with stickiness.

    CMP     r6,#0              ; If any guard bits below S are set
    ORRNE   r0, r0, #1         ;   fold them into S

; Denormalize the result if necessary, with no concern for performance.

    CMP     r9, #0             ; If exponent <= 0
    BGT     end_denormal       ;   ..
    RSB     r9, r9, #0         ; Then shift right exponent1 places
    ADD     r9, r9, #1         ;   +1 for the non-hidden bit
denormal_loop
    MOVS    r8, r8, LSR #1     ;   ..
    MOVS    r0, r0, RRX        ;   ..
    ORRCS   r0, r0, #1         ;   Fold the lost bit into the sticky bit
    SUBS    r9, r9, #1         ;   ..
    BNE     denormal_loop      ;   ..
end_denormal

; Round to nearest.  If rounding occurs, set inexact and
; mantissa += G & ( L | R | S ).  If the rounding carries, then renormalize.

; Test for inexact.
    TST     r0, #0x7           ; If G|R|S (=> rounding required)
    BEQ     end_round          ;   ..
    ORR     r5, r5, #INX_bit   ;   result is inexact

; Round to nearest.
    TST     r0, #0x4           ; If G &&
    BEQ     end_round          ;   ..
    TST     r0, #0xB           ;   L|R|S
    BEQ     end_round          ;     ..
    ADDS    r0, r0, #0x8       ; Then round the mantissa up
    ADC     r8, r8, #0         ;   ..

    CMP     r8, #CARRY_CHECK   ;   If the rounding carried
    BLT     no_normal_carry    ;     (mantissa >= 0x01000000)
    
    MOVS    r8, r8, LSR #1     ;   Then renormalize
    MOV     r0, r0, RRX        ;     ..
    ADD     r9, r9, #1         ;     ..
    B       end_round          ;     ..

no_normal_carry
    CMP     r9, #0             ;   Else if (exponent == 0)
    BNE     end_round          ;     ..
    CMP     r8, #CARRY_CHECK>>1;     && (mantissa >= 0x00800000)
    MOVGE   r9, #1             ;   Then rounded MaxDenorm to MinNormal

end_round

; Test for overflow.  Do this after rounding in case rounding caused overflow.
    ADD     r3, r9, #1         ; If (exponent >= 2047)
    CMP     r3, #2048          ;   ..
    BGE     return_overflow    ;   return overflow exception

; Test tininess after rounding.
    TST     r5, #INX_bit        ; If already inexact
    BEQ     end_check_underflow1;  ..
    CMP     r9, #0              ;   and if exponent = 0
    ORREQ   r5, r5, #UNF_bit    ;     result has underflowed too
end_check_underflow1

; Pack the result back into IEEE format.

return_value

    MOV     r0, r0, LSR #3      ; Shift mantissa right 3 to remove GRS
    ORR     r0, r0, r8, LSL #29 ;   ..
    MOV     r8, r8, LSR #3      ;   ..
    MVN     r3, #0              ; Mask away the hidden bit
    AND     r8, r8, r3, LSR #12 ;   ..
    ORR     r1, r8, r9, LSL #20 ; Merge exponent and mantissa
    MOVS    r7, r7              ; Merge sign with exponent and mantissa
    ORRMI   r1, r1, #0x80000000 ;   ..

; If any trap enable flags are set corresponding to exception flags set,
; set the corresponding cause bits and cause a trap.
;
; if (exception)
;     call handler
;     extract the possibly updated result
; return

return

    TST     r5, #FPECause_mask ; If any exceptions occurred ...
    BEQ     done               ;   ..

cause_trap
;;
;;  Register usage:
;;      r0 - Default result.low
;;      r1 - Default result.high
;;      r5 - Exception information
;;
;;  Stack:
;;      0x10(sp) - up: Saved registers
;;      0xC(sp): Original Arg2.high
;;      0x8(sp): Original Arg2.low
;;      0x4(sp): Original Arg1.high
;;      0x0(sp): Original Arg1.low
;;
        LDR     r2, [sp, #0x8]           ; Load original Arg2.low
        LDR     r3, [sp, #0xC]           ; Load original Arg2.high
        SUB     sp, sp, #0x8             ; Make room for exception information
        STR     r2, [sp, #0x0]           ; Store original Arg2.low
        STR     r3, [sp, #0x4]           ; Store original Arg2.high
        LDR     r3, [sp, #0x8]           ; Load original Arg1.low
        LDR     r2, [sp, #0xC]           ; Load original Arg1.high
        STR     r0, [sp, #0x8]           ; Store default result.low
        STR     r1, [sp, #0xC]           ; Store default result.high
        MOV     r1, r5                   ; Move exception information
        ADD     r0, sp, #0x10            ; Pointer for return value

;;  Register Usage:
;;      r0 - Address for return value = 0x10(sp)
;;      r1 - Exception information
;;      r2 - Original arg1.low
;;      r3 - Original arg1.high
;;
;;  Stack Usage:
;;      0x14(sp): Return result.high
;;      0x10(sp): Return result.low
;;      0xC(sp): Default result.high
;;      0x8(sp): Default result.low
;;      0x4(sp): Original arg2.high
;;      0x0(sp): Original arg2.low
        CALL    FPE_Raise             ; Deal with exception information

    IF Thumbing :LAND: :LNOT: Interworking
        CODE16
        bx      pc              ; switch back to ARM mode
        nop
        CODE32
    ENDIF	
	
        LDR     r0, [sp, #0x10]       ; Load up returned result
        LDR     r1, [sp, #0x14]       ;  ...
        ADD     sp, sp, #0x8          ; Restore extra arg passing space

done
    ADD     sp, sp, #0x10             ; Pop off original args
  IF Interworking :LOR: Thumbing
    LDMIA   sp!, {r4-r9, lr}
    BX      lr
  ELSE
    LDMIA   sp!, {r4-r9, pc}
  ENDIF
                                      ; Restore off non-volatiles and return


;%%%%%%%%%%%%%%%%%%%%%%%%%
;%  Exceptional process  %
;%%%%%%%%%%%%%%%%%%%%%%%%%

; Exception 1: parameter1 is non-finite (exponent1 == 2047); it's either a
; NaN or inf.  The mantissa has not been shifted left for the guard bits yet.
;
; If either parameter is an SNaN, return an invalid op exception with a QNaN.
; Otherwise, if either parameter is a QNaN, silently return a QNaN.  Otherwise,
; parameter1 is inf.  Return an invalid op exception with a QNaN for inf*0, or
; inf for inf*inf or inf*<non-0 finite>.
;
; if (mantissa1<51> == 0 &           // parameter1 is an SNaN
;     mantissa1 != 0)                //  ..
;     return invalid op exception
; else if (exponent2 == 2047 &       // parameter2 is an SNaN
;          mantissa2<51> == 0 &      //   ..
;          mantissa2 != 0)           //   ..
;     return invalid op exception
; else if (mantissa1 != 0)           // parameter1 is a QNaN
;     return QNaN
; else if (exponent2 != 2047)        // parameter2 is finite
;     if (parameter2 != 0)           // inf*<non-0 finite>
;         return inf
;     else                           // inf*0
;         return invalid op exception
;     return inf
; else if (mantissa2 != 0)           // parameter2 is a QNaN
;     return QNaN
; else                               // inf*inf
;     return inf

exception1
    ORRS    r3, r8, r0         ; if (mantissa1 !=0 &&
    BEQ     e1_p2_snan_check   ;   ..
    TST     r8, #dSignalBit    ;     mantissa1[MSb] == 0)
    BEQ     return_invalid     ;   return invalid operation

e1_p2_snan_check
    ADD     r3, r1, #1         ; else if (exponent2 == 2047 &&
    CMP     r3, #2048          ;   ..
    BNE     e1_p2_not_snan     ;   ..
    ORRS    r3, r2, r4         ;          mantissa2 != 0 &&
    BEQ     e1_p2_not_snan     ;   ..
    TST     r2, #dSignalBit    ;          mantissa2[MSb] == 0)
    MOVEQ   r8, r2             ;   copy mantissa2 to mantissa1
    MOVEQ   r0, r4             ;     ..
    BEQ     return_invalid     ;   return invalid operation

e1_p2_not_snan
    ORRS    r3, r8, r0         ; else if (mantissa1 != 0)
    BNE     return_QNaN        ;   return QNaN

e1_p1_is_INF
    ADD     r3, r1, #1         ; else if (exponent2 != 2047)
    CMP     r3, #2048          ;   ..
    BEQ     e1_p2_INF_NaN      ;   ..
    CMP     r1, #0             ;   if (parameter2 != 0)
    ORREQS  r3, r2, r4         ;     ..
    BNE     return_inf         ;     return INF
    MOV     r8, #0             ;   else
    MOV     r0, #0             ;     zero out mantissa1 for QNaN
    B       return_invalid     ;     return invalid operation

e1_p2_INF_NaN
    ORRS    r3, r2, r4         ; else if (mantissa2 != 0)
    MOV     r8, r2             ;   copy mantissa2 to mantissa1
    MOV     r0, r4             ;     ..
    BNE     return_QNaN        ;   return QNaN
    B       return_inf         ; else
                               ;   return INF


; Exception 2: parameter1 is finite.  parameter2 is non-finite (exponent2 ==
; 2047); it's either a NaN or inf.  The mantissa has not been shifted left
; for the guard bits yet.
;
; If parameter2 is an SNaN, return an invalid op exception with a QNaN.
; Otherwise, if it's a QNaN, silently return a QNaN.  Otherwise it's finite*inf
; so return an invalid op exception with a QNaN for 0*inf, or
; <non-0 finite>*inf.
;
; if (mantissa2 != 0 &
;     mantissa2<51> == 1)       // parameter2 is an SNaN
;     return invalid op exception
; else if (mantissa2 != 0)      // parameter2 is a QNaN
;     return QNaN
; else if (parameter1 != 0)     // parameter1 is non-0 finite
;     return inf
; else                          // it's 0*inf
;     return invalid op exception

exception2
    ORRS    r3, r2, r4      ; if (mantissa2 != 0 &&
    BEQ     e2_p2_is_inf    ;   ..
    TST     r2, #dSignalBit ;     mantissa2[MSb] == 0)
    BEQ     return_invalid  ;   return invalid operation
    MOV     r8, r2          ; else if (mantissa2 != 0)
    MOV     r0, r4          ;   copy mantissa2 into mantissa1 for QNaN
    B       return_QNaN     ;   return QNaN

e2_p2_is_inf
    ORRS    r3, r8, r0      ; else if (parameter1 != 0)
    CMPEQ   r9, #0          ;   ..
    MOVEQ   r8, r2          ; copy mantissa2 to mantissa1 for QNaN
    MOVEQ   r0, r4          ; ..
    BEQ     return_invalid  ; ..
    B       return_inf      ;   return INF


; Exception 3: parameter1 is 0 or denormal (exponent1 = 0), parameter2 is
; finite.
;
; if (mantissa1 == 0)
;     return zero
; else normalize parameter1

exception3
    ORRS    r3, r8, r0      ; if (mantissa1 == 0)
    BEQ     return_zero     ;   return zero
p1_norm                     ; Normalize parameter1 stop when shift into 1.0 bit
    MOVS    r0, r0, LSL #1  ; Account for the hidden mantissa bit
    MOV     r8, r8, LSL #1  ;   that denormals don't have
    ORRCS   r8, r8, #0x1    ;   ..
    CMP     r8, #MSB        ; While mantissa1 < 1.0
    BGE     end_p1_norm     ;   ..
p1_norm_loop
    MOVS    r0, r0, LSL #1  ;   Scale mantissa1 up by 1 place
    MOV     r8, r8, LSL #1  ;     ..
    ORRCS   r8, r8, #0x1    ;     ..
    SUB     r9, r9, #1      ;   and exponent1 down by 1
    CMP     r8, #MSB        ;
    BLT     p1_norm_loop    ;
end_p1_norm
    B       exception_return3
                            ; Done


; Exception 4: parameter1 is finite and (now) normalized, parameter2 is 0 or
; denormal (exponent2 = 0).
;
; if (mantissa2 == 0)
;     return zero
; else normalize parameter2
exception4
    ORRS    r3, r2, r4      ; if (mantissa2 == 0)
    BEQ     return_zero     ;   return zero
p2_norm                     ; Normalize parameter2 stop when shift into 1.0 bit
    MOVS    r4, r4, LSL #1  ; Account for the hidden mantissa bit
    MOV     r2, r2, LSL #1  ;   that denormals don't have
    ORRCS   r2, r2, #0x1    ;   ..
    CMP     r2, #MSB        ; While mantissa2 < 1.0
    BGE     end_p2_norm     ;   ..
p2_norm_loop
    MOVS    r4, r4, LSL #1  ;   Scale mantissa2 up by 1 place
    MOV     r2, r2, LSL #1  ;     ..
    ORRCS   r2, r2, #0x1    ;     ..
    SUB     r1, r1, #1      ;   and exponent2 down by 1
    CMP     r2, #MSB        ;
    BLT     p2_norm_loop    ;
end_p2_norm
    B       exception_return4
                            ; Done


; Cause an overflow exception (=> inexact) and return properly signed inf.
return_overflow
    ORR     r5, r5, #OVF_bit :OR: INX_bit
                            ; Report overflow (=> inexact)
                            ; Fall thru to return inf

; Return properly signed inf.
return_inf
    MVN     r9, #0          ; exponent1 = 2047
    MOV     r9, r9, LSR #21 ;   ..
    MOV     r8, #0          ; mantissa1 = 0
    MOV     r0, #0          ;   ..
    B       return_value


; Return 0.
return_zero
    MOV     r1, r7, LSR #31 ; Apply the sign to zero
    MOV     r1, r1, LSL #31 ;   ..
    MOV     r0, #0          ; Zero low mantissa
    B       return          ; Done

; Cause an invalid operation exception and return a QNaN.
return_invalid
    ORR     r5, r5, #IVO_bit; Report invalid op exception
                            ; Fall thru to return a QNaN

; Return a QNaN.
return_QNaN
    ORR     r1, r8, #0x7F000000
                            ; Return a QNaN
    ORR     r1, r1, #0x00F80000
                            ;  ..
    B       return          ;  ..

    END
RDK 2.0 2008-11-17 18:29:00 -05:00			`;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;`
			`;;;`
			`;;; Microsoft Research Singularity`
			`;;;`
			`;;; Copyright (c) Microsoft Corporation. All rights reserved.`
			`;;;`
			`;;; This file contains ARM-specific assembly code.`
			`;;;`

			`GBLL mul_s`

			`GET veneer_d.asm`

			`END`


			`;;;; THE BELOW ROUTINE SHOULD WORK, BUT THE ARM ROUTINES SHOULD BE FASTER.`


			`;`
			`; Translated to ARM from SH3 FP emulation routines.`
			`;`
			`; __muld Double precision floating point multiplication.`
			`; Input:`
			`; r0 - Arg1.low`
			`; r1 - Arg1.high`
			`; r2 - Arg2.low`
			`; r3 - Arg2.high`
			`; Output:`
			`; r0 - Result.low`
			`; r1 - Result.high`
			`;`
			`; Note:`
			`; If any FP exceptions are enabled, this routine may raise an exception.`
			`;`
			`;`
			`; IEEE DOUBLE FORMAT`
			`;`
			`; 8 BYTES (LONG WORD * 2)`
			`; 63 62 52 51 0`
			`; +-+-----------+----------------------------------------------------+`
			`; \|s\| e(11) \| m(52) \|`
			`; +-+-----------+----------------------------------------------------+`
			`; ^ point`
			`;`
			`; INFINITY NUMBER : e = 2047 m = 0`
			`; ZERO : e = 0 m = 0`
			`; NaN : e = 2047 m != 0`
			`; DENORMAL NUMBER : e = 0 m != 0`
			`;`
			`GET fpe.asm`

			`Export __muld`

			`IMPORT FPE_Raise`

			`AREA \|.text\|, CODE, READONLY`


			`CARRY_CHECK EQU 0x01000000`
			`MSB EQU 0x00100000`


			`__muld`

			`STMFD sp!, {r0-r9, lr} ; Save off args and non-volatiles and lr`

			`MOV r8, r1 ; Load parameter1 as R8 R0`
			`MOV r4, r2 ; Load parameter2 as R2 R4`
			`MOV r2, r3 ; ...`
			`MOV r5, #_FpMulD ; Double multiply, assume no exceptions`


			`; Unpack parameters.`
			`;`
			`; R8 R0: mantissa1 R2 R4: mantissa2`
			`; R9: exponent1 R1: exponent2`
			`; R7: sign = sign1 XOR sign2`
			`;`
			`; R5: Exception flags`

			`MOV r9, r8, LSL #1 ; Extract exponent1`
			`MOV r9, r9, LSR #21 ; ...`
			`MOV r1, r2, LSL #1 ; Extract exponent2`
			`MOV r1, r1, LSR #21 ; ...`
			`MVN r3, #0 ; Set up to extract mantissas`
			`EOR r7, r8, r2 ; Compute sign of result`
			`AND r8, r8, r3, LSR #12; Extract mantissa1`
			`AND r2, r2, r3, LSR #12; Extract mantissa2`


			`; Check for exceptional cases. All NaNs, infinities, and 0's are eliminated.`
			`; Denormal numbers return here after normalizing them. After these checks,`
			`; both parameters are normalized numbers.`
			`;`
			`; if (exponent1 == 2047)`
			`; exception1; parameter1 is nonfinite`
			`; if (exponent2 == 2047)`
			`; exception2; parameter1 is finite, parameter2 is nonfinite`
			`; if (exponent1 == 0)`
			`; exception3; parameter1 is 0 or denormal, parameter2 is finite`
			`; if (exponent2 == 0)`
			`; exception4; parameter1 is normalized, parameter2 is 0 or denormal`

			`ADD r3, r9, #1 ; if (exponent1==2047)`
			`CMP r3, #2048 ; ..`
			`BEQ exception1 ; exception1`

			`ADD r3, r1, #1 ; if (exponent2==2047)`
			`CMP r3, #2048 ; ..`
			`BEQ exception2 ; exception2`

			`CMP r9, #0 ; if (exponent1==0)`
			`BEQ exception3 ; exception3`
			`exception_return3`

			`CMP r1, #0 ; if (exponent2==0)`
			`BEQ exception4 ; exception4`
			`exception_return4`

			`; Multiply the 53-bit mantissa1 and mantissa2 to produce a 106-bit product.`
			`;`
			`; Mantissas:`
			`;`
			`; 63 53 51 32 31 0`
			`; 31 21 19 0 31 0`
			`; +-----------+-+---------------------+-----------------------------------+`
			`; \|<--- 0 --->\|1\| m1h, m2h \| m1l, m2l \|`
			`; +-----------+-+---------------------+-----------------------------------+`
			`; ^ Binary point`
			`;`
			`; Partial product terms:`
			`;`
			`; m1lm2l.h m1lm2l.l`
			`; m1hm2l.h < C + m1hm2l.l`
			`; + m1lm2h.h < C + m1lm2h.l`
			`; + m1hm2h.h < C + m1hm2h.l`
			`; ----------- ----------- ----------- -----------`
			`; res3 res2 res1 res0`
			`;`
			`; Intermediate result:`
			`;`
			`; 127 106 104 103 96 95 64 63 32 31 0`
			`; 31 10 9 8 7 0 31 0 31 0 31 0`
			`; +----------------------+-+-+--------+----/\/----+----/\/----+----/\/----+`
			`; \|<-------- 0 --------->\|?\|?\|R3: res3\| R8: res2 \| R0: res1 \| R6: res0 \|`
			`; +----------------------+-+-+--------+----/\/----+----/\/----+----/\/----+`
			`; ^ Binary point`

			`ADD r9, r9, r1 ; Compute exponent of result ...`

			`UMULL r6, r1, r0, r4 ; Compute m1l * m2l`
			`; r6 = m1l*m2l.l, res0`
			`; r1 = m1l*m2l.h`

			`ORR r8, r8, #MSB ; Set mantissa1's hidden bit`
			`ORR r2, r2, #MSB ; Set mantissa2's hidden bit`

			`UMULL r4, r3, r8,r4 ; Compute m1h * m2l`
			`; r4 = m1h*m2l.l`
			`; r3 = m1h*m2l.h`

			`ADDS r4, r1, r4 ; Add 1st 2 terms of res1`

			`SUB r9, r9, #0x400 ; ... compute exponent of result`
			`ADD r9, r9, #0x1 ; ... compute exponent of result`

			`UMULL r0, r1, r2, r0 ; Compute m1l * m2h`
			`; r0 = m1l*m2h.l`
			`; r1 = m1l*m2h.h`

			`ADCS r1, r1, r3 ; Add 1st 2 terms of res2, no carry out`
			`ADCS r0, r0, r4 ; Add 3rd term of res1, no carry in`

			`UMULL r8, r3, r2, r8 ; Compute m1h * m2h`
			`; r8 = m1h*m2h.l`
			`; r3 = m1h*m2h.h`

			`ADCS r8, r8, r1 ; Add 3rd term of res2`
			`ADC r3, r3, #0 ; Add res2's carry to res3`


			`; Shift the intermediate result right 17 bits, and 1 more if the product took`
			`; 2 bits to the left of the binary point. Fold all dropped bits from the right`
			`; into the sticky bit S. This leaves the result in standardized form for`
			`; rounding.`
			`;`
			`; Result:`
			`; 63 56 54 32 31 3 2 0`
			`; 31 24 22 0 31 3 2 0`
			`; +---------+-+-----------------------+-----------------------------------+`
			`; \|<-- 0 -->\|1\| R8 \| R0 L\|GRS\|`
			`; +---------+-+-----------------------+-----------------------------------+`
			`; ^ Binary point`

			`normalize`
			`CMP r6, #0 ; Fold bits we're about to lose into a`
			`ORRNE r0, r0, #1 ; sticky bit`
			`MOV r6, r6, LSR #17 ; Shift intermediate result right 17`
			`ORR r6, r6, r0, LSL #15 ; ..`
			`MOV r0, r0, LSR #17 ; ..`
			`ORR r0, r0, r8, LSL #15 ; ..`
			`MOV r8, r8, LSR #17 ; ..`
			`ORR r8, r8, r3, LSL #15 ; ..`
			`TST r8, #CARRY_CHECK ; If product has 2 bits to the left of the`
			`BEQ end_normalize ; binary point`
			`MOVS r8, r8, LSR #1 ; Then normalize by scaling right 1`
			`MOVS r0, r0, RRX ; more bit`
			`MOV r6, r6, RRX ; ..`
			`ADD r9, r9, #1 ; ..`
			`end_normalize`

			`; There are still 17 or 18 guard bits on the left of R6 that need to be folded`
			`; into the sticky bit S. It's safe to check the right ones over again because`
			`; we're only concerned with stickiness.`

			`CMP r6,#0 ; If any guard bits below S are set`
			`ORRNE r0, r0, #1 ; fold them into S`

			`; Denormalize the result if necessary, with no concern for performance.`

			`CMP r9, #0 ; If exponent <= 0`
			`BGT end_denormal ; ..`
			`RSB r9, r9, #0 ; Then shift right exponent1 places`
			`ADD r9, r9, #1 ; +1 for the non-hidden bit`
			`denormal_loop`
			`MOVS r8, r8, LSR #1 ; ..`
			`MOVS r0, r0, RRX ; ..`
			`ORRCS r0, r0, #1 ; Fold the lost bit into the sticky bit`
			`SUBS r9, r9, #1 ; ..`
			`BNE denormal_loop ; ..`
			`end_denormal`

			`; Round to nearest. If rounding occurs, set inexact and`
			`; mantissa += G & ( L \| R \| S ). If the rounding carries, then renormalize.`

			`; Test for inexact.`
			`TST r0, #0x7 ; If G\|R\|S (=> rounding required)`
			`BEQ end_round ; ..`
			`ORR r5, r5, #INX_bit ; result is inexact`

			`; Round to nearest.`
			`TST r0, #0x4 ; If G &&`
			`BEQ end_round ; ..`
			`TST r0, #0xB ; L\|R\|S`
			`BEQ end_round ; ..`
			`ADDS r0, r0, #0x8 ; Then round the mantissa up`
			`ADC r8, r8, #0 ; ..`

			`CMP r8, #CARRY_CHECK ; If the rounding carried`
			`BLT no_normal_carry ; (mantissa >= 0x01000000)`

			`MOVS r8, r8, LSR #1 ; Then renormalize`
			`MOV r0, r0, RRX ; ..`
			`ADD r9, r9, #1 ; ..`
			`B end_round ; ..`

			`no_normal_carry`
			`CMP r9, #0 ; Else if (exponent == 0)`
			`BNE end_round ; ..`
			`CMP r8, #CARRY_CHECK>>1; && (mantissa >= 0x00800000)`
			`MOVGE r9, #1 ; Then rounded MaxDenorm to MinNormal`

			`end_round`

			`; Test for overflow. Do this after rounding in case rounding caused overflow.`
			`ADD r3, r9, #1 ; If (exponent >= 2047)`
			`CMP r3, #2048 ; ..`
			`BGE return_overflow ; return overflow exception`

			`; Test tininess after rounding.`
			`TST r5, #INX_bit ; If already inexact`
			`BEQ end_check_underflow1; ..`
			`CMP r9, #0 ; and if exponent = 0`
			`ORREQ r5, r5, #UNF_bit ; result has underflowed too`
			`end_check_underflow1`

			`; Pack the result back into IEEE format.`

			`return_value`

			`MOV r0, r0, LSR #3 ; Shift mantissa right 3 to remove GRS`
			`ORR r0, r0, r8, LSL #29 ; ..`
			`MOV r8, r8, LSR #3 ; ..`
			`MVN r3, #0 ; Mask away the hidden bit`
			`AND r8, r8, r3, LSR #12 ; ..`
			`ORR r1, r8, r9, LSL #20 ; Merge exponent and mantissa`
			`MOVS r7, r7 ; Merge sign with exponent and mantissa`
			`ORRMI r1, r1, #0x80000000 ; ..`

			`; If any trap enable flags are set corresponding to exception flags set,`
			`; set the corresponding cause bits and cause a trap.`
			`;`
			`; if (exception)`
			`; call handler`
			`; extract the possibly updated result`
			`; return`

			`return`

			`TST r5, #FPECause_mask ; If any exceptions occurred ...`
			`BEQ done ; ..`

			`cause_trap`
			`;;`
			`;; Register usage:`
			`;; r0 - Default result.low`
			`;; r1 - Default result.high`
			`;; r5 - Exception information`
			`;;`
			`;; Stack:`
			`;; 0x10(sp) - up: Saved registers`
			`;; 0xC(sp): Original Arg2.high`
			`;; 0x8(sp): Original Arg2.low`
			`;; 0x4(sp): Original Arg1.high`
			`;; 0x0(sp): Original Arg1.low`
			`;;`
			`LDR r2, [sp, #0x8] ; Load original Arg2.low`
			`LDR r3, [sp, #0xC] ; Load original Arg2.high`
			`SUB sp, sp, #0x8 ; Make room for exception information`
			`STR r2, [sp, #0x0] ; Store original Arg2.low`
			`STR r3, [sp, #0x4] ; Store original Arg2.high`
			`LDR r3, [sp, #0x8] ; Load original Arg1.low`
			`LDR r2, [sp, #0xC] ; Load original Arg1.high`
			`STR r0, [sp, #0x8] ; Store default result.low`
			`STR r1, [sp, #0xC] ; Store default result.high`
			`MOV r1, r5 ; Move exception information`
			`ADD r0, sp, #0x10 ; Pointer for return value`

			`;; Register Usage:`
			`;; r0 - Address for return value = 0x10(sp)`
			`;; r1 - Exception information`
			`;; r2 - Original arg1.low`
			`;; r3 - Original arg1.high`
			`;;`
			`;; Stack Usage:`
			`;; 0x14(sp): Return result.high`
			`;; 0x10(sp): Return result.low`
			`;; 0xC(sp): Default result.high`
			`;; 0x8(sp): Default result.low`
			`;; 0x4(sp): Original arg2.high`
			`;; 0x0(sp): Original arg2.low`
			`CALL FPE_Raise ; Deal with exception information`

			`IF Thumbing :LAND: :LNOT: Interworking`
			`CODE16`
			`bx pc ; switch back to ARM mode`
			`nop`
			`CODE32`
			`ENDIF`

			`LDR r0, [sp, #0x10] ; Load up returned result`
			`LDR r1, [sp, #0x14] ; ...`
			`ADD sp, sp, #0x8 ; Restore extra arg passing space`

			`done`
			`ADD sp, sp, #0x10 ; Pop off original args`
			`IF Interworking :LOR: Thumbing`
			`LDMIA sp!, {r4-r9, lr}`
			`BX lr`
			`ELSE`
			`LDMIA sp!, {r4-r9, pc}`
			`ENDIF`
			`; Restore off non-volatiles and return`


			`;%%%%%%%%%%%%%%%%%%%%%%%%%`
			`;% Exceptional process %`
			`;%%%%%%%%%%%%%%%%%%%%%%%%%`

			`; Exception 1: parameter1 is non-finite (exponent1 == 2047); it's either a`
			`; NaN or inf. The mantissa has not been shifted left for the guard bits yet.`
			`;`
			`; If either parameter is an SNaN, return an invalid op exception with a QNaN.`
			`; Otherwise, if either parameter is a QNaN, silently return a QNaN. Otherwise,`
			`; parameter1 is inf. Return an invalid op exception with a QNaN for inf*0, or`
			`; inf for infinf or inf<non-0 finite>.`
			`;`
			`; if (mantissa1<51> == 0 & // parameter1 is an SNaN`
			`; mantissa1 != 0) // ..`
			`; return invalid op exception`
			`; else if (exponent2 == 2047 & // parameter2 is an SNaN`
			`; mantissa2<51> == 0 & // ..`
			`; mantissa2 != 0) // ..`
			`; return invalid op exception`
			`; else if (mantissa1 != 0) // parameter1 is a QNaN`
			`; return QNaN`
			`; else if (exponent2 != 2047) // parameter2 is finite`
			`; if (parameter2 != 0) // inf*<non-0 finite>`
			`; return inf`
			`; else // inf*0`
			`; return invalid op exception`
			`; return inf`
			`; else if (mantissa2 != 0) // parameter2 is a QNaN`
			`; return QNaN`
			`; else // inf*inf`
			`; return inf`

			`exception1`
			`ORRS r3, r8, r0 ; if (mantissa1 !=0 &&`
			`BEQ e1_p2_snan_check ; ..`
			`TST r8, #dSignalBit ; mantissa1[MSb] == 0)`
			`BEQ return_invalid ; return invalid operation`

			`e1_p2_snan_check`
			`ADD r3, r1, #1 ; else if (exponent2 == 2047 &&`
			`CMP r3, #2048 ; ..`
			`BNE e1_p2_not_snan ; ..`
			`ORRS r3, r2, r4 ; mantissa2 != 0 &&`
			`BEQ e1_p2_not_snan ; ..`
			`TST r2, #dSignalBit ; mantissa2[MSb] == 0)`
			`MOVEQ r8, r2 ; copy mantissa2 to mantissa1`
			`MOVEQ r0, r4 ; ..`
			`BEQ return_invalid ; return invalid operation`

			`e1_p2_not_snan`
			`ORRS r3, r8, r0 ; else if (mantissa1 != 0)`
			`BNE return_QNaN ; return QNaN`

			`e1_p1_is_INF`
			`ADD r3, r1, #1 ; else if (exponent2 != 2047)`
			`CMP r3, #2048 ; ..`
			`BEQ e1_p2_INF_NaN ; ..`
			`CMP r1, #0 ; if (parameter2 != 0)`
			`ORREQS r3, r2, r4 ; ..`
			`BNE return_inf ; return INF`
			`MOV r8, #0 ; else`
			`MOV r0, #0 ; zero out mantissa1 for QNaN`
			`B return_invalid ; return invalid operation`

			`e1_p2_INF_NaN`
			`ORRS r3, r2, r4 ; else if (mantissa2 != 0)`
			`MOV r8, r2 ; copy mantissa2 to mantissa1`
			`MOV r0, r4 ; ..`
			`BNE return_QNaN ; return QNaN`
			`B return_inf ; else`
			`; return INF`


			`; Exception 2: parameter1 is finite. parameter2 is non-finite (exponent2 ==`
			`; 2047); it's either a NaN or inf. The mantissa has not been shifted left`
			`; for the guard bits yet.`
			`;`
			`; If parameter2 is an SNaN, return an invalid op exception with a QNaN.`
			`; Otherwise, if it's a QNaN, silently return a QNaN. Otherwise it's finite*inf`
			`; so return an invalid op exception with a QNaN for 0*inf, or`
			`; <non-0 finite>*inf.`
			`;`
			`; if (mantissa2 != 0 &`
			`; mantissa2<51> == 1) // parameter2 is an SNaN`
			`; return invalid op exception`
			`; else if (mantissa2 != 0) // parameter2 is a QNaN`
			`; return QNaN`
			`; else if (parameter1 != 0) // parameter1 is non-0 finite`
			`; return inf`
			`; else // it's 0*inf`
			`; return invalid op exception`

			`exception2`
			`ORRS r3, r2, r4 ; if (mantissa2 != 0 &&`
			`BEQ e2_p2_is_inf ; ..`
			`TST r2, #dSignalBit ; mantissa2[MSb] == 0)`
			`BEQ return_invalid ; return invalid operation`
			`MOV r8, r2 ; else if (mantissa2 != 0)`
			`MOV r0, r4 ; copy mantissa2 into mantissa1 for QNaN`
			`B return_QNaN ; return QNaN`

			`e2_p2_is_inf`
			`ORRS r3, r8, r0 ; else if (parameter1 != 0)`
			`CMPEQ r9, #0 ; ..`
			`MOVEQ r8, r2 ; copy mantissa2 to mantissa1 for QNaN`
			`MOVEQ r0, r4 ; ..`
			`BEQ return_invalid ; ..`
			`B return_inf ; return INF`



			`; Exception 3: parameter1 is 0 or denormal (exponent1 = 0), parameter2 is`
			`; finite.`
			`;`
			`; if (mantissa1 == 0)`
			`; return zero`
			`; else normalize parameter1`

			`exception3`
			`ORRS r3, r8, r0 ; if (mantissa1 == 0)`
			`BEQ return_zero ; return zero`
			`p1_norm ; Normalize parameter1 stop when shift into 1.0 bit`
			`MOVS r0, r0, LSL #1 ; Account for the hidden mantissa bit`
			`MOV r8, r8, LSL #1 ; that denormals don't have`
			`ORRCS r8, r8, #0x1 ; ..`
			`CMP r8, #MSB ; While mantissa1 < 1.0`
			`BGE end_p1_norm ; ..`
			`p1_norm_loop`
			`MOVS r0, r0, LSL #1 ; Scale mantissa1 up by 1 place`
			`MOV r8, r8, LSL #1 ; ..`
			`ORRCS r8, r8, #0x1 ; ..`
			`SUB r9, r9, #1 ; and exponent1 down by 1`
			`CMP r8, #MSB ;`
			`BLT p1_norm_loop ;`
			`end_p1_norm`
			`B exception_return3`
			`; Done`


			`; Exception 4: parameter1 is finite and (now) normalized, parameter2 is 0 or`
			`; denormal (exponent2 = 0).`
			`;`
			`; if (mantissa2 == 0)`
			`; return zero`
			`; else normalize parameter2`
			`exception4`
			`ORRS r3, r2, r4 ; if (mantissa2 == 0)`
			`BEQ return_zero ; return zero`
			`p2_norm ; Normalize parameter2 stop when shift into 1.0 bit`
			`MOVS r4, r4, LSL #1 ; Account for the hidden mantissa bit`
			`MOV r2, r2, LSL #1 ; that denormals don't have`
			`ORRCS r2, r2, #0x1 ; ..`
			`CMP r2, #MSB ; While mantissa2 < 1.0`
			`BGE end_p2_norm ; ..`
			`p2_norm_loop`
			`MOVS r4, r4, LSL #1 ; Scale mantissa2 up by 1 place`
			`MOV r2, r2, LSL #1 ; ..`
			`ORRCS r2, r2, #0x1 ; ..`
			`SUB r1, r1, #1 ; and exponent2 down by 1`
			`CMP r2, #MSB ;`
			`BLT p2_norm_loop ;`
			`end_p2_norm`
			`B exception_return4`
			`; Done`



			`; Cause an overflow exception (=> inexact) and return properly signed inf.`
			`return_overflow`
			`ORR r5, r5, #OVF_bit :OR: INX_bit`
			`; Report overflow (=> inexact)`
			`; Fall thru to return inf`

			`; Return properly signed inf.`
			`return_inf`
			`MVN r9, #0 ; exponent1 = 2047`
			`MOV r9, r9, LSR #21 ; ..`
			`MOV r8, #0 ; mantissa1 = 0`
			`MOV r0, #0 ; ..`
			`B return_value`


			`; Return 0.`
			`return_zero`
			`MOV r1, r7, LSR #31 ; Apply the sign to zero`
			`MOV r1, r1, LSL #31 ; ..`
			`MOV r0, #0 ; Zero low mantissa`
			`B return ; Done`

			`; Cause an invalid operation exception and return a QNaN.`
			`return_invalid`
			`ORR r5, r5, #IVO_bit; Report invalid op exception`
			`; Fall thru to return a QNaN`

			`; Return a QNaN.`
			`return_QNaN`
			`ORR r1, r8, #0x7F000000`
			`; Return a QNaN`
			`ORR r1, r1, #0x00F80000`
			`; ..`
			`B return ; ..`

			`END`