635 lines
22 KiB
NASM
635 lines
22 KiB
NASM
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
;;;
|
|
;;; Microsoft Research Singularity
|
|
;;;
|
|
;;; Copyright (c) Microsoft Corporation. All rights reserved.
|
|
;;;
|
|
;;; This file contains ARM-specific assembly code.
|
|
;;;
|
|
|
|
GBLL add_s
|
|
|
|
GET veneer_d.asm
|
|
|
|
END
|
|
|
|
|
|
;;;; THE BELOW ROUTINE SHOULD WORK, BUT THE ARM ROUTINES SHOULD BE FASTER.
|
|
|
|
|
|
;
|
|
; Translated to ARM from SH3 FP emulation routines.
|
|
;
|
|
; __addd Double precision floating point addition.
|
|
; Input:
|
|
; r0 - Arg1.low
|
|
; r1 - Arg1.high
|
|
; r2 - Arg2.low
|
|
; r3 - Arg2.high
|
|
; Output:
|
|
; r0 - Result.low
|
|
; r1 - Result.high
|
|
;
|
|
; Note:
|
|
; If any FP exceptions are enabled, this routine may raise an exception.
|
|
;
|
|
;
|
|
; IEEE DOUBLE FORMAT
|
|
;
|
|
; 8 BYTES (LONG WORD * 2)
|
|
; 63 62 52 51 0
|
|
; +-+-----------+----------------------------------------------------+
|
|
; |s| e(11) | m(52) |
|
|
; +-+-----------+----------------------------------------------------+
|
|
; ^ point
|
|
;
|
|
; INFINITY NUMBER : e = 2047 m = 0
|
|
; ZERO : e = 0 m = 0
|
|
; NaN : e = 2047 m != 0
|
|
; DENORMAL NUMBER : e = 0 m != 0
|
|
;
|
|
|
|
|
|
GET fpe.asm
|
|
|
|
Export __addd
|
|
Export __subd
|
|
|
|
IMPORT FPE_Raise
|
|
|
|
AREA |.text|, CODE, READONLY
|
|
|
|
|
|
CARRY_CHECK EQU 0x01000000
|
|
MSB EQU 0x00800000
|
|
NORMAL EQU 0x00100000
|
|
|
|
|
|
; Note: the SEH prolog below must match the SEH prolog for __addd.
|
|
|
|
__subd
|
|
|
|
STMFD sp!, {r0-r10, lr} ; Save off args and non-volatiles and lr
|
|
|
|
MOV r8, r1 ; Load parameter1 as R8 R0
|
|
MOV r4, r2 ; Load parameter2 as R2 R4
|
|
MOV r2, r3 ; ...
|
|
MOV r5, #_FpSubD ; Double add, assume no exceptions
|
|
EOR r2, r2, #0x80000000
|
|
; Toggle sign bit on parameter2
|
|
B add_in ; Then go add
|
|
|
|
|
|
; Note: the SEH prolog below must match the SEH prolog for __subd
|
|
|
|
__addd
|
|
|
|
STMFD sp!, {r0-r10, lr} ; Save off args and non-volatiles and lr
|
|
|
|
MOV r8, r1 ; Load parameter1 as R8 R0
|
|
MOV r4, r2 ; Load parameter2 as R2 R4
|
|
MOV r2, r3 ; ...
|
|
MOV r5, #_FpAddD ; Double add, assume no exceptions
|
|
|
|
|
|
add_in
|
|
|
|
; If abs(parameter1) < abs(parameter2) then swap them so that the resulting
|
|
; parameter1 has the larger magnitude. This guarantees that only parameter2
|
|
; might need to be shifted right before adding. Because of denormal numbers,
|
|
; it's not sufficient to compare only the exponents; the entire mantissa must
|
|
; be checked as well.
|
|
;
|
|
; if ((abs(parameter1).hi < abs(parameter1).hi) ||
|
|
; ((abs(parameter1.hi == abs(parameter2)) &&
|
|
; (parameter1.lo < parameter2.lo)))
|
|
; swap parameter1 and parameter2
|
|
|
|
MOV r3, r8, LSL #1 ; Extract copies of just the magnitudes
|
|
CMP r3, r2, LSL #1 ; of each parameter
|
|
CMPEQ r0, r4 ; if ((abs(param1).hi < abs(param2)).hi
|
|
; ||
|
|
BHS end_swap ; ((abs(param1).hi == abs(param2).hi)
|
|
; &&
|
|
; (param1.lo < param2.lo)))
|
|
; ..
|
|
swap
|
|
MOV r3,r8 ; Swap parameter1 and parameter2
|
|
MOV r8,r2 ; ..
|
|
MOV r2,r3 ; ..
|
|
MOV r3,r0 ; ..
|
|
MOV r0,r4 ; ..
|
|
MOV r4,r3 ; ..
|
|
end_swap
|
|
|
|
; Unpack parameters.
|
|
;
|
|
; R8 R0: mantissa1 R2 R4: mantissa2
|
|
; R9: exponent1 R1: exponent2
|
|
; R10: sign1 R6: sign2
|
|
;
|
|
; R5: Exception flags
|
|
|
|
MOV r9, r8, LSL #1 ; Extract exponent1
|
|
MOV r9, r9, LSR #21 ; ...
|
|
MOV r1, r2, LSL #1 ; Extract exponent2
|
|
MOV r1, r1, LSR #21 ; ...
|
|
MVN r3, #0 ; Set up to extract mantissas
|
|
MOV r10, r8 ; Extract sign1
|
|
MOV r6, r2 ; Extract sign2
|
|
AND r8, r8, r3, LSR #12; Extract mantissa1
|
|
AND r2, r2, r3, LSR #12; Extract mantissa2
|
|
|
|
|
|
; Check for exceptional cases. All NaNs, infinities, and 0's are eliminated.
|
|
; Denormal numbers return here after normalizing them. After these checks,
|
|
; both parameters are normalized numbers.
|
|
;
|
|
; After potentially swapping the parameters above, it's sufficient to test
|
|
; just parameter1 for non-finite values (NaN, inf) to eliminate non-finite
|
|
; values in either parameter. Similarly, it's sufficient to test just
|
|
; parameter2 for the unnormalized numbers (exponent2 = 0; denormals and 0).
|
|
;
|
|
; if (exponent1 == 2047)
|
|
; exception1; parameter1 is nonfinite, parameter2 might be too
|
|
; if (exponent2 == 0)
|
|
; exception2; parameter is 0 or denormal, parameter1 might be too
|
|
|
|
ADD r3, r9, #1 ; if (exponent1==2047)
|
|
CMP r3, #2048 ; ...
|
|
BEQ exception1 ; exception1
|
|
CMP r1, #0 ; if (exponent2==0)
|
|
BEQ exception2 ; exception2
|
|
exception_return2
|
|
|
|
; Shift the mantissas left 3 bits to make room for guard, round and sticky bits
|
|
; (G,R,S). Then set their hidden bits.
|
|
|
|
MOV r8, r8, LSL #3 ; Shift mantissa1 left 3 for (G,R,S)
|
|
ORR r8, r8, r0, LSR #29; ...
|
|
MOV r0, r0, LSL #3 ; ...
|
|
|
|
MOV r2, r2, LSL #3 ; Shift mantissa2 left 3 for (G,R,S)
|
|
ORR r2, r2, r4, LSR #29; ...
|
|
MOV r4, r4, LSL #3 ; ...
|
|
|
|
|
|
ORR r8, r8, #0x00800000 ; Set each mantissa's hidden bit
|
|
ORR r2, r2, #0x00800000 ; ..
|
|
|
|
; Scale parameter2 so that its exponent matches that of parameter1, preparing
|
|
; for the addition. Because of the swap earlier, parameter2 always scales by
|
|
; shifting right (if it shifts at all).
|
|
;
|
|
; shift = exponent1 - exponent2
|
|
; if shift <= -55
|
|
; // entire mantissa2 shifts into the sticky bit; just set S
|
|
; else
|
|
; if (shift <= -32)
|
|
; // "shift" by moving high word to low word
|
|
; if (shift != 0)
|
|
; // shift by dynamic shifting
|
|
|
|
scale
|
|
SUBS r1, r9, r1 ; shift = exponent2 - exponent1
|
|
BEQ scale_end ; Shift == 0?
|
|
CMP r1, #3 ;**;
|
|
BLE scale_le_3 ;**; 0 < shift <= 3? ..
|
|
CMP r1, #55 ; If shift <= 55 then
|
|
BLE scale_le_55 ; ..
|
|
MOV R2, #0 ; Else (mantissa2,G,R,S) = 1
|
|
MOV R4, #1 ; ..
|
|
B scale_end
|
|
|
|
scale_le_3 ;**; No bits are ever lost
|
|
MOV r4, r4, LSR r1 ; mantissa2 >>= x where 0 < x <= 3
|
|
RSB r3, r1, #32
|
|
ORR r4, r4, r2, LSL r3
|
|
MOV r2, r2, LSR r1
|
|
B scale_end
|
|
|
|
scale_le_55 ; Else shift <= 55
|
|
CMP r1, #31 ; If shift < 32
|
|
BLE scale_le_31 ; ..
|
|
CMP r4, #0 ; Else S = mantissa2.l != 0
|
|
SUB r1, r1, #32 ; (32 fewer bits to shift)
|
|
MOV r4, r2 ; Shift 32 bits by moving
|
|
MOV r2, #0 ; ..
|
|
ORRNE r4, r4, #1 ; Set S if shifted out bits
|
|
|
|
scale_le_31
|
|
CMP r1, #0 ; If shift != 0
|
|
BEQ scale_end ; ..
|
|
RSB r3, r1, #32 ; Get 32 - shift
|
|
MOVS r7, r4, LSL r3 ; Extract low mantissa shifted out (Sticky==NE)
|
|
MOV r7, r2, LSL r3 ; Extract high mantissa shifted into lower
|
|
MOV r2, r2, LSR r1 ; Shift high mantissa into position
|
|
MOV r4, r4, LSR r1 ; Shift low mantissa into position
|
|
ORR r4, r4, r7 ; Insert bits from high mantissa into low
|
|
ORRNE r4, r4, #1 ; Set sticky if shifted out bits
|
|
|
|
scale_end
|
|
|
|
; Add the mantissas.
|
|
;
|
|
; if (sign1 == sign2)
|
|
; result = mantissa1 + mantissa2 // Same signs => addition
|
|
; Scale result right if it carried
|
|
; if (result overflowed)
|
|
; return properly signed inf
|
|
; else if (mantissa1 == mantissa2)
|
|
; return +0 // Equal values => result = +0
|
|
; else
|
|
; result = mantissa1 - mantissa2 // Opposite signs => subtraction
|
|
; Scale result left // High-order bits were lost
|
|
|
|
EORS r7, r10, r6 ; If sign1 != sign2
|
|
BMI mantissa_sub ; do subtract
|
|
ADDS r0, r0, r4 ; Else result = mantissa1 + mantissa2
|
|
ADC r8, r8, r2 ; ..
|
|
CMP r8, #CARRY_CHECK; If the result carried
|
|
BLT end_calc ; ..
|
|
MOVS r8, r8, LSR #1 ; Then scale right one
|
|
MOVS r0, r0, RRX ; ..
|
|
ORRCS r0, r0, #1 ; (fold lost bit into S)
|
|
ADD r9, r9, #1 ; Add 1 to exponent for shift
|
|
ADD r3, r9, #1 ; Add 1 to exponent for compare
|
|
CMP r3, #2048 ; EQ if overflow
|
|
BLT end_calc ; ..
|
|
; Overflowed so
|
|
ORR r5, r5, #OVF_bit :OR: INX_bit
|
|
; set exception flags
|
|
MOV r0, #0 ; and return properly signed inf
|
|
MOV r8, #0 ; ..
|
|
B return_value ; ..
|
|
|
|
; Return +0.
|
|
|
|
plus_zero
|
|
MOV r8, #0 ; Return +0
|
|
MOV r0, #0 ; ..
|
|
B return ; ..
|
|
|
|
mantissa_sub
|
|
CMP r8, r2 ; Else if mantissa1 = mantissa2
|
|
CMPEQ r0, r4 ; ..
|
|
BEQ plus_zero ; return +0
|
|
man_sub1
|
|
SUBS r0, r0, r4 ; Else result = mantissa1 - mantissa2
|
|
SBC r8, r8, r2 ; ..
|
|
;**; Parameter1 always has the larger magnitude; result is always its sign.
|
|
|
|
|
|
; Normalize since high-order bits are lost when subtracting. Do this in
|
|
; chunks.
|
|
|
|
normalize
|
|
CMP r8, #0 ; If mantissa.h = 0
|
|
BNE norm32_end ; ..
|
|
MOV r8, r0 ; mantissa <<= 32 by moving
|
|
MOV r0, #0 ; ..
|
|
SUB r9, r9, #32 ; exponent -= 32
|
|
norm32_end
|
|
MVN r3, #0 ; If (mantissa.h & 0xffff0000) = 0
|
|
TST r8, r3, LSL #16 ; ..
|
|
BNE norm16_end ;
|
|
MOV r8, r8, LSL #16 ; mantissa <<= 16
|
|
ORR r8, r8, r0, LSR #16
|
|
MOV r0, r0, LSL #16
|
|
SUB r9, r9, #16 ; exponent -= 16
|
|
norm16_end
|
|
CMP r8, #CARRY_CHECK ; If mantissa is not too far left
|
|
BLO overnorm_end ; keep normalizing, otherwise, undo
|
|
|
|
over_norm_loop
|
|
MOVS r8, r8, LSR #1 ; mantissa1 >>= 1
|
|
MOV r0, r0, RRX ; ..
|
|
ADD r9, r9, #1 ; exponent1++
|
|
CMP r8, #CARRY_CHECK ; If mantissa is still too far left
|
|
BHS over_norm_loop ; ..
|
|
B end_norm ; Done
|
|
|
|
overnorm_end
|
|
CMP r8, #MSB ; If mantissa is too far right
|
|
BGE end_norm ; ..
|
|
|
|
norm_loop
|
|
MOVS r0, r0, LSL #1 ; mantissa1 <<= 1
|
|
MOV r8, r8, LSL #1 ; ..
|
|
ORRCS r8, r8, #1 ; ..
|
|
SUB r9, r9, #1 ; exponent1--
|
|
CMP r8, #MSB ; If mantissa is still too far right
|
|
BLT norm_loop ; ..
|
|
|
|
end_norm
|
|
end_calc
|
|
|
|
; Denormalize the result if necessary, with no concern for performance.
|
|
; Addition (and thus subtraction) can never generate less significant bits than
|
|
; those of the original operands. Thus, denormalization never results in lost
|
|
; bits to fold into S.
|
|
|
|
CMP r9, #0 ; If exponent < 0
|
|
BGT end_denormal ; ..
|
|
RSB r9, r9, #0 ; Then shift right exponent1 places
|
|
ADD r9, r9, #1 ; +1 for the non-hidden bit
|
|
denormal_loop
|
|
MOVS r8, r8, LSR #1 ; ..
|
|
MOV r0, r0, RRX ; ..
|
|
SUBS r9, r9, #1 ; ..
|
|
BNE denormal_loop ; ..
|
|
end_denormal
|
|
|
|
; Round to nearest. If rounding occurs, set inexact and
|
|
; mantissa += G & ( L | R | S ). If the rounding carries, then renormalize.
|
|
;
|
|
; Addition (and thus subtraction) can never generate less significant bits than
|
|
; those of the original operands. Thus, rounding can never meet either of the
|
|
; IEEE loss of accuracy tests for underflow. Nor can rounding cause MaxDenorm
|
|
; to carry to MinNormal.
|
|
;
|
|
; Test for inexact.
|
|
TST r0, #0x7 ; If G|R|S (=> rounding required)
|
|
BEQ end_round ; ..
|
|
ORR r5, r5, #INX_bit; result is inexact (can't underflow)
|
|
|
|
; Round to nearest.
|
|
TST r0, #0x4 ; If G &&
|
|
BEQ end_round ; ..
|
|
TST r0, #0xB ; L|R|S
|
|
BEQ end_round ; ..
|
|
ADDS r0, r0, #0x8 ; Then round the mantissa up
|
|
ADC r8, r8, #0 ;
|
|
|
|
CMP r8, #CARRY_CHECK; If the rounding carried
|
|
BLT end_round ; (mantissa >= 0x01000000)
|
|
ADD r9, r9, #2 ; Then renormalize
|
|
CMP r9, #2048 ; If rounding caused overflow
|
|
SUB r9, r9, #1
|
|
ORREQ r5, r5, #OVF_bit :OR: INX_bit
|
|
; Report overflow (=> inexact)
|
|
end_round
|
|
|
|
; Pack the result back into IEEE format.
|
|
|
|
return_value
|
|
MOV r0, r0, LSR #3 ; Shift mantissa right 3
|
|
ORR r0, r0, r8, LSL #29 ; ..
|
|
MOV r1, r8, LSR #3 ; ..
|
|
BIC r1, r1, #0x0FF00000
|
|
; Mask away the hidden bit and possibly one bit
|
|
; higher if round incremented mantissa.
|
|
; 0xFF<<20 is probably overkill, but safe.
|
|
ORR r1, r1, r9, LSL #20
|
|
; Merge exponent and mantissa
|
|
AND r10, r10, #0x80000000
|
|
ORR r1, r1, r10 ; Merge sign with exponent and mantissa
|
|
|
|
; If any trap enable flags are set corresponding to exception flags set,
|
|
; set the corresponding cause bits and cause a trap.
|
|
;
|
|
; if (exception)
|
|
; call handler
|
|
; extract the possibly updated result
|
|
; return
|
|
|
|
return
|
|
TST r5, #FPECause_mask ; If any exceptions occurred ...
|
|
BEQ done
|
|
|
|
;;
|
|
;; Register usage:
|
|
;; r0 - Default result.low
|
|
;; r1 - Default result.high
|
|
;; r5 - Exception information
|
|
;;
|
|
;; Stack:
|
|
;; 0x10(sp) - up: Saved registers
|
|
;; 0xC(sp): Original Arg2.high
|
|
;; 0x8(sp): Original Arg2.low
|
|
;; 0x4(sp): Original Arg1.high
|
|
;; 0x0(sp): Original Arg1.low
|
|
;;
|
|
LDR r2, [sp, #0x8] ; Load original Arg2.low
|
|
LDR r3, [sp, #0xC] ; Load original Arg2.high
|
|
SUB sp, sp, #0x8 ; Make room for exception information
|
|
STR r2, [sp, #0x0] ; Store original Arg2.low
|
|
STR r3, [sp, #0x4] ; Store original Arg2.high
|
|
LDR r3, [sp, #0x8] ; Load original Arg1.low
|
|
LDR r2, [sp, #0xC] ; Load original Arg1.high
|
|
STR r0, [sp, #0x8] ; Store default result.low
|
|
STR r1, [sp, #0xC] ; Store default result.high
|
|
MOV r1, r5 ; Move exception information
|
|
ADD r0, sp, #0x10 ; Pointer for return value
|
|
|
|
;; Register Usage:
|
|
;; r0 - Address for return value = 0x10(sp)
|
|
;; r1 - Exception information
|
|
;; r2 - Original arg1.low
|
|
;; r3 - Original arg1.high
|
|
;;
|
|
;; Stack Usage:
|
|
;; 0x14(sp): Return result.high
|
|
;; 0x10(sp): Return result.low
|
|
;; 0xC(sp): Default result.high
|
|
;; 0x8(sp): Default result.low
|
|
;; 0x4(sp): Original arg2.high
|
|
;; 0x0(sp): Original arg2.low
|
|
|
|
CALL FPE_Raise ; Deal with exception information
|
|
|
|
IF Thumbing :LAND: :LNOT: Interworking
|
|
CODE16
|
|
bx pc ; switch back to ARM mode
|
|
nop
|
|
CODE32
|
|
ENDIF
|
|
|
|
LDR r0, [sp, #0x10] ; Load up returned result
|
|
LDR r1, [sp, #0x14] ; ...
|
|
ADD sp, sp, #0x8 ; Restore extra arg passing space
|
|
|
|
|
|
done
|
|
ADD sp, sp, #0x10 ; Pop off original args
|
|
IF Interworking :LOR: Thumbing
|
|
LDMIA sp!, {r4-r10, lr}
|
|
BX lr
|
|
ELSE
|
|
LDMIA sp!, {r4-r10, pc}
|
|
ENDIF
|
|
; Restore off non-volatiles and return
|
|
|
|
|
|
|
|
;%%%%%%%%%%%%%%%%%%%%%%%%%
|
|
;% Exceptional process %
|
|
;%%%%%%%%%%%%%%%%%%%%%%%%%
|
|
|
|
; Exception 1: parameter1 is non-finite (exponent1 == 2047). The mantissa has
|
|
; not been shifted left for the guard bits yet. The choice of ARM SNaN
|
|
; versus QNaN (mantissa<51> = 1 => QNaN) means that abs(<any QNaN>) >
|
|
; abs(<any SNaN>) > abs(<any inf>).
|
|
;
|
|
; exception1:
|
|
; if (mantissa1 == 0)
|
|
; CheckArg2INF(); // Arg1 is an INF. Must check Arg2 for INF.
|
|
; else if (mantissa1[MSb] == 0)
|
|
; SignalInvalid(); // Arg1 is an SNaN so signal invalid and return it.
|
|
; else
|
|
; CheckArg2SNaN(); // Arg1 is a QNaN. Check Arg2 for SNaN.
|
|
;
|
|
; CheckArg2SNaN:
|
|
; if (exponent2 == 2047 &&
|
|
; mantissa2 != 0 &&
|
|
; mantissa2[MSb] == 0)
|
|
; SignalInvalid();
|
|
; else
|
|
; ReturnQNaN();
|
|
;
|
|
; CheckArg2INF:
|
|
; if (exponent2 == 2047 &&
|
|
; mantissa2 == 0)
|
|
; if (sign1 ^ sign2)
|
|
; SignalInvalid(); // Arg1 and Arg2 are opposite INFs.
|
|
; else
|
|
; ReturnINF(); // Arg1 and Arg2 are same signed INFs.
|
|
; else
|
|
; ReturnINF(); // Arg1 is INF. Arg2 is not.
|
|
;
|
|
; SignalInvalid:
|
|
; cause |= INVALID_OPERATION;
|
|
; ReturnQNaN();
|
|
;
|
|
; ReturnQNaN:
|
|
; exponent1 = 2047;
|
|
; mantissa1[MSb] = 1;
|
|
; return();
|
|
;
|
|
; ReturnINF
|
|
; exponent1 = 2047;
|
|
; mantissa1 = 0;
|
|
; return();
|
|
;
|
|
exception1
|
|
ORRS r3, r8, r0 ; if (mantissa1 == 0)
|
|
BEQ CheckArg2INF ; CheckArg2INF
|
|
TST r8, #dSignalBit ; else if (mantissa1[MSb] == 0)
|
|
BEQ SignalInvalid ; SignalInvalid
|
|
; else
|
|
; CheckArg2SNaN
|
|
CheckArg2SNaN
|
|
ADD r3, r1, #1 ; if (exponent2 == 2047 &&
|
|
CMP r3, #2048 ; ..
|
|
BNE ReturnQNaN ; ..
|
|
ORRS r3, r2, r4 ; mantissa2 != 0 &&
|
|
BEQ ReturnQNaN ; ..
|
|
TST r2, #dSignalBit ; mantissa2[MSb] == 0)
|
|
BEQ SignalInvalid ; SignalInvalid
|
|
B ReturnQNaN ; else
|
|
; ReturnQNaN
|
|
CheckArg2INF
|
|
ADD r3, r1, #1 ; if (exponent2 == 2047 &&
|
|
CMP r3, #2048 ; ..
|
|
BNE ReturnINF ; ..
|
|
ORRS r3, r2, r4 ; mantissa2 == 0 &&
|
|
BNE ReturnINF ; ..
|
|
EORS r3, r10, r6 ; if (sign1 ^ sign2)
|
|
BMI SignalInvalid ; SignalInvalid
|
|
; else
|
|
; ReturnINF
|
|
|
|
ReturnINF
|
|
AND r1, r10, #0x80000000 ; Get sign bit
|
|
ORR r1, r1, r9, LSL #20 ; Insert exponent (exponent == 2047)
|
|
B return ; r0 is already 0 so just return
|
|
|
|
SignalInvalid
|
|
ORR r5, r5, #IVO_bit ; Set invalid operation
|
|
|
|
ReturnQNaN
|
|
AND r1, r10, #0x80000000 ; Get sign bit
|
|
ORR r1, r1, r9, LSL #20 ; Insert exponent (exponent == 2047)
|
|
ORR r1, r1, #dSignalBit ; Insert mantissa high bit to ensure QNaN
|
|
ORR r1, r1, r8 ; OR in rest of high mantissa bits
|
|
B return ; r0 already has the low mantissa bits so
|
|
; just return
|
|
|
|
|
|
; Exception 2: parameter1 is finite, parameter2 is not normal (0 or denormal).
|
|
;
|
|
; if (exponent1 == 0) // parameter1 is not normal
|
|
; if (mantissa1 == 0) // parameter1 is 0
|
|
; return properly signed 0
|
|
; else if (mantissa2 == 0) // denormal+denormal
|
|
; go normalize both and add
|
|
; else // denormal+0
|
|
; return parameter1
|
|
; else if (mantissa != 0) // parameter2 is denormal
|
|
; go normalize parameter2 and add
|
|
; else // parameter2 is 0
|
|
; return parameter1
|
|
|
|
exception2
|
|
CMP r9, #0 ; if parameter1 is not normal
|
|
BNE p1_normal ; ..
|
|
ORRS r7, r8, r0 ; if parameter1 is 0
|
|
BNE p1_denormal ; ..
|
|
;*** Rounding mode: proper sign is a function of the rounding mode.
|
|
AND r10, r10, r6 ; return properly signed 0
|
|
B return_value ;
|
|
|
|
p1_denormal
|
|
ORRS r7, r2, r4 ; else if parameter2 is denormal
|
|
BNE p1_normalize ; go normalize both and add
|
|
B return_p1 ; else parameter2 is 0
|
|
; return parameter1
|
|
p1_normal ; (parameter2 is denormal or 0)
|
|
ORRS r7, r2, r4 ; else if parameter2 is denormal
|
|
BNE p2_normalize ; go normalize parameter2 and add
|
|
return_p1 ; else parameter2 is 0
|
|
MOV r8, r8, LSL #3 ; return parameter1
|
|
ORR r8, r8, r0, LSR #29
|
|
; ..
|
|
MOV r0, r0, LSL #3 ; ..
|
|
B return_value ; ..
|
|
|
|
; Both parameter1 and parameter2 are denormal. Normalize both then go add.
|
|
|
|
p1_normalize ; Stop when we shift into 1.0 bit
|
|
MOVS r0, r0, LSL #1 ; Account for the hidden mantissa bit
|
|
MOV r8, r8, LSL #1 ; that denormals don't have
|
|
ORRCS r8, r8, #1 ; ..
|
|
CMP r8, #NORMAL ; While mantissa1 < 1.0
|
|
BGE end_p1_norm ; ..
|
|
p1_norm_loop
|
|
MOVS r0, r0, LSL #1 ; Scale mantissa1 up by 1 place
|
|
MOV r8, r8, LSL #1 ; ..
|
|
ORRCS r8, r8, #1 ; ..
|
|
SUB r9, r9, #1 ; and exponent1 down by 1
|
|
CMP r8, #NORMAL ; ..
|
|
BLT p1_norm_loop ; ..
|
|
end_p1_norm
|
|
|
|
; parameter1 is (now) normalized, parameter2 is denormal. Normalize
|
|
; parameter2 then go add.
|
|
|
|
p2_normalize ; Stop when we shift into 1.0 bit
|
|
MOVS r4, r4, LSL #1 ; Account for the hidden mantissa bit
|
|
MOV r2, r2, LSL #1 ; that denormals don't have
|
|
ORRCS r2, r2, #1 ; ..
|
|
CMP r2, #NORMAL ; While mantissa2 < 1.0
|
|
BGE end_p2_norm ; ..
|
|
p2_norm_loop
|
|
MOVS r4, r4, LSL #1 ; Scale mantissa2 up by 1 place
|
|
MOV r2, r2, LSL #1 ; ..
|
|
ORRCS r2, r2, #1 ; ..
|
|
SUB r1, r1, #1 ; and exponent2 down by 1
|
|
CMP r2, #NORMAL ; ..
|
|
BLT p2_norm_loop ; ..
|
|
end_p2_norm
|
|
B exception_return2
|
|
; Done
|
|
|
|
END
|