;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;; ;;; Microsoft Research Singularity ;;; ;;; Copyright (c) Microsoft Corporation. All rights reserved. ;;; ;;; This file contains ARM-specific assembly code. ;;; GBLL mul_s GET veneer_d.asm END ;;;; THE BELOW ROUTINE SHOULD WORK, BUT THE ARM ROUTINES SHOULD BE FASTER. ; ; Translated to ARM from SH3 FP emulation routines. ; ; __muld Double precision floating point multiplication. ; Input: ; r0 - Arg1.low ; r1 - Arg1.high ; r2 - Arg2.low ; r3 - Arg2.high ; Output: ; r0 - Result.low ; r1 - Result.high ; ; Note: ; If any FP exceptions are enabled, this routine may raise an exception. ; ; ; IEEE DOUBLE FORMAT ; ; 8 BYTES (LONG WORD * 2) ; 63 62 52 51 0 ; +-+-----------+----------------------------------------------------+ ; |s| e(11) | m(52) | ; +-+-----------+----------------------------------------------------+ ; ^ point ; ; INFINITY NUMBER : e = 2047 m = 0 ; ZERO : e = 0 m = 0 ; NaN : e = 2047 m != 0 ; DENORMAL NUMBER : e = 0 m != 0 ; GET fpe.asm Export __muld IMPORT FPE_Raise AREA |.text|, CODE, READONLY CARRY_CHECK EQU 0x01000000 MSB EQU 0x00100000 __muld STMFD sp!, {r0-r9, lr} ; Save off args and non-volatiles and lr MOV r8, r1 ; Load parameter1 as R8 R0 MOV r4, r2 ; Load parameter2 as R2 R4 MOV r2, r3 ; ... MOV r5, #_FpMulD ; Double multiply, assume no exceptions ; Unpack parameters. ; ; R8 R0: mantissa1 R2 R4: mantissa2 ; R9: exponent1 R1: exponent2 ; R7: sign = sign1 XOR sign2 ; ; R5: Exception flags MOV r9, r8, LSL #1 ; Extract exponent1 MOV r9, r9, LSR #21 ; ... MOV r1, r2, LSL #1 ; Extract exponent2 MOV r1, r1, LSR #21 ; ... MVN r3, #0 ; Set up to extract mantissas EOR r7, r8, r2 ; Compute sign of result AND r8, r8, r3, LSR #12; Extract mantissa1 AND r2, r2, r3, LSR #12; Extract mantissa2 ; Check for exceptional cases. All NaNs, infinities, and 0's are eliminated. ; Denormal numbers return here after normalizing them. After these checks, ; both parameters are normalized numbers. ; ; if (exponent1 == 2047) ; exception1; parameter1 is nonfinite ; if (exponent2 == 2047) ; exception2; parameter1 is finite, parameter2 is nonfinite ; if (exponent1 == 0) ; exception3; parameter1 is 0 or denormal, parameter2 is finite ; if (exponent2 == 0) ; exception4; parameter1 is normalized, parameter2 is 0 or denormal ADD r3, r9, #1 ; if (exponent1==2047) CMP r3, #2048 ; .. BEQ exception1 ; exception1 ADD r3, r1, #1 ; if (exponent2==2047) CMP r3, #2048 ; .. BEQ exception2 ; exception2 CMP r9, #0 ; if (exponent1==0) BEQ exception3 ; exception3 exception_return3 CMP r1, #0 ; if (exponent2==0) BEQ exception4 ; exception4 exception_return4 ; Multiply the 53-bit mantissa1 and mantissa2 to produce a 106-bit product. ; ; Mantissas: ; ; 63 53 51 32 31 0 ; 31 21 19 0 31 0 ; +-----------+-+---------------------+-----------------------------------+ ; |<--- 0 --->|1| m1h, m2h | m1l, m2l | ; +-----------+-+---------------------+-----------------------------------+ ; ^ Binary point ; ; Partial product terms: ; ; m1l*m2l.h m1l*m2l.l ; m1h*m2l.h < C + m1h*m2l.l ; + m1l*m2h.h < C + m1l*m2h.l ; + m1h*m2h.h < C + m1h*m2h.l ; ----------- ----------- ----------- ----------- ; res3 res2 res1 res0 ; ; Intermediate result: ; ; 127 106 104 103 96 95 64 63 32 31 0 ; 31 10 9 8 7 0 31 0 31 0 31 0 ; +----------------------+-+-+--------+----/\/----+----/\/----+----/\/----+ ; |<-------- 0 --------->|?|?|R3: res3| R8: res2 | R0: res1 | R6: res0 | ; +----------------------+-+-+--------+----/\/----+----/\/----+----/\/----+ ; ^ Binary point ADD r9, r9, r1 ; Compute exponent of result ... UMULL r6, r1, r0, r4 ; Compute m1l * m2l ; r6 = m1l*m2l.l, res0 ; r1 = m1l*m2l.h ORR r8, r8, #MSB ; Set mantissa1's hidden bit ORR r2, r2, #MSB ; Set mantissa2's hidden bit UMULL r4, r3, r8,r4 ; Compute m1h * m2l ; r4 = m1h*m2l.l ; r3 = m1h*m2l.h ADDS r4, r1, r4 ; Add 1st 2 terms of res1 SUB r9, r9, #0x400 ; ... compute exponent of result ADD r9, r9, #0x1 ; ... compute exponent of result UMULL r0, r1, r2, r0 ; Compute m1l * m2h ; r0 = m1l*m2h.l ; r1 = m1l*m2h.h ADCS r1, r1, r3 ; Add 1st 2 terms of res2, no carry out ADCS r0, r0, r4 ; Add 3rd term of res1, no carry in UMULL r8, r3, r2, r8 ; Compute m1h * m2h ; r8 = m1h*m2h.l ; r3 = m1h*m2h.h ADCS r8, r8, r1 ; Add 3rd term of res2 ADC r3, r3, #0 ; Add res2's carry to res3 ; Shift the intermediate result right 17 bits, and 1 more if the product took ; 2 bits to the left of the binary point. Fold all dropped bits from the right ; into the sticky bit S. This leaves the result in standardized form for ; rounding. ; ; Result: ; 63 56 54 32 31 3 2 0 ; 31 24 22 0 31 3 2 0 ; +---------+-+-----------------------+-----------------------------------+ ; |<-- 0 -->|1| R8 | R0 L|GRS| ; +---------+-+-----------------------+-----------------------------------+ ; ^ Binary point normalize CMP r6, #0 ; Fold bits we're about to lose into a ORRNE r0, r0, #1 ; sticky bit MOV r6, r6, LSR #17 ; Shift intermediate result right 17 ORR r6, r6, r0, LSL #15 ; .. MOV r0, r0, LSR #17 ; .. ORR r0, r0, r8, LSL #15 ; .. MOV r8, r8, LSR #17 ; .. ORR r8, r8, r3, LSL #15 ; .. TST r8, #CARRY_CHECK ; If product has 2 bits to the left of the BEQ end_normalize ; binary point MOVS r8, r8, LSR #1 ; Then normalize by scaling right 1 MOVS r0, r0, RRX ; more bit MOV r6, r6, RRX ; .. ADD r9, r9, #1 ; .. end_normalize ; There are still 17 or 18 guard bits on the left of R6 that need to be folded ; into the sticky bit S. It's safe to check the right ones over again because ; we're only concerned with stickiness. CMP r6,#0 ; If any guard bits below S are set ORRNE r0, r0, #1 ; fold them into S ; Denormalize the result if necessary, with no concern for performance. CMP r9, #0 ; If exponent <= 0 BGT end_denormal ; .. RSB r9, r9, #0 ; Then shift right exponent1 places ADD r9, r9, #1 ; +1 for the non-hidden bit denormal_loop MOVS r8, r8, LSR #1 ; .. MOVS r0, r0, RRX ; .. ORRCS r0, r0, #1 ; Fold the lost bit into the sticky bit SUBS r9, r9, #1 ; .. BNE denormal_loop ; .. end_denormal ; Round to nearest. If rounding occurs, set inexact and ; mantissa += G & ( L | R | S ). If the rounding carries, then renormalize. ; Test for inexact. TST r0, #0x7 ; If G|R|S (=> rounding required) BEQ end_round ; .. ORR r5, r5, #INX_bit ; result is inexact ; Round to nearest. TST r0, #0x4 ; If G && BEQ end_round ; .. TST r0, #0xB ; L|R|S BEQ end_round ; .. ADDS r0, r0, #0x8 ; Then round the mantissa up ADC r8, r8, #0 ; .. CMP r8, #CARRY_CHECK ; If the rounding carried BLT no_normal_carry ; (mantissa >= 0x01000000) MOVS r8, r8, LSR #1 ; Then renormalize MOV r0, r0, RRX ; .. ADD r9, r9, #1 ; .. B end_round ; .. no_normal_carry CMP r9, #0 ; Else if (exponent == 0) BNE end_round ; .. CMP r8, #CARRY_CHECK>>1; && (mantissa >= 0x00800000) MOVGE r9, #1 ; Then rounded MaxDenorm to MinNormal end_round ; Test for overflow. Do this after rounding in case rounding caused overflow. ADD r3, r9, #1 ; If (exponent >= 2047) CMP r3, #2048 ; .. BGE return_overflow ; return overflow exception ; Test tininess after rounding. TST r5, #INX_bit ; If already inexact BEQ end_check_underflow1; .. CMP r9, #0 ; and if exponent = 0 ORREQ r5, r5, #UNF_bit ; result has underflowed too end_check_underflow1 ; Pack the result back into IEEE format. return_value MOV r0, r0, LSR #3 ; Shift mantissa right 3 to remove GRS ORR r0, r0, r8, LSL #29 ; .. MOV r8, r8, LSR #3 ; .. MVN r3, #0 ; Mask away the hidden bit AND r8, r8, r3, LSR #12 ; .. ORR r1, r8, r9, LSL #20 ; Merge exponent and mantissa MOVS r7, r7 ; Merge sign with exponent and mantissa ORRMI r1, r1, #0x80000000 ; .. ; If any trap enable flags are set corresponding to exception flags set, ; set the corresponding cause bits and cause a trap. ; ; if (exception) ; call handler ; extract the possibly updated result ; return return TST r5, #FPECause_mask ; If any exceptions occurred ... BEQ done ; .. cause_trap ;; ;; Register usage: ;; r0 - Default result.low ;; r1 - Default result.high ;; r5 - Exception information ;; ;; Stack: ;; 0x10(sp) - up: Saved registers ;; 0xC(sp): Original Arg2.high ;; 0x8(sp): Original Arg2.low ;; 0x4(sp): Original Arg1.high ;; 0x0(sp): Original Arg1.low ;; LDR r2, [sp, #0x8] ; Load original Arg2.low LDR r3, [sp, #0xC] ; Load original Arg2.high SUB sp, sp, #0x8 ; Make room for exception information STR r2, [sp, #0x0] ; Store original Arg2.low STR r3, [sp, #0x4] ; Store original Arg2.high LDR r3, [sp, #0x8] ; Load original Arg1.low LDR r2, [sp, #0xC] ; Load original Arg1.high STR r0, [sp, #0x8] ; Store default result.low STR r1, [sp, #0xC] ; Store default result.high MOV r1, r5 ; Move exception information ADD r0, sp, #0x10 ; Pointer for return value ;; Register Usage: ;; r0 - Address for return value = 0x10(sp) ;; r1 - Exception information ;; r2 - Original arg1.low ;; r3 - Original arg1.high ;; ;; Stack Usage: ;; 0x14(sp): Return result.high ;; 0x10(sp): Return result.low ;; 0xC(sp): Default result.high ;; 0x8(sp): Default result.low ;; 0x4(sp): Original arg2.high ;; 0x0(sp): Original arg2.low CALL FPE_Raise ; Deal with exception information IF Thumbing :LAND: :LNOT: Interworking CODE16 bx pc ; switch back to ARM mode nop CODE32 ENDIF LDR r0, [sp, #0x10] ; Load up returned result LDR r1, [sp, #0x14] ; ... ADD sp, sp, #0x8 ; Restore extra arg passing space done ADD sp, sp, #0x10 ; Pop off original args IF Interworking :LOR: Thumbing LDMIA sp!, {r4-r9, lr} BX lr ELSE LDMIA sp!, {r4-r9, pc} ENDIF ; Restore off non-volatiles and return ;%%%%%%%%%%%%%%%%%%%%%%%%% ;% Exceptional process % ;%%%%%%%%%%%%%%%%%%%%%%%%% ; Exception 1: parameter1 is non-finite (exponent1 == 2047); it's either a ; NaN or inf. The mantissa has not been shifted left for the guard bits yet. ; ; If either parameter is an SNaN, return an invalid op exception with a QNaN. ; Otherwise, if either parameter is a QNaN, silently return a QNaN. Otherwise, ; parameter1 is inf. Return an invalid op exception with a QNaN for inf*0, or ; inf for inf*inf or inf*. ; ; if (mantissa1<51> == 0 & // parameter1 is an SNaN ; mantissa1 != 0) // .. ; return invalid op exception ; else if (exponent2 == 2047 & // parameter2 is an SNaN ; mantissa2<51> == 0 & // .. ; mantissa2 != 0) // .. ; return invalid op exception ; else if (mantissa1 != 0) // parameter1 is a QNaN ; return QNaN ; else if (exponent2 != 2047) // parameter2 is finite ; if (parameter2 != 0) // inf* ; return inf ; else // inf*0 ; return invalid op exception ; return inf ; else if (mantissa2 != 0) // parameter2 is a QNaN ; return QNaN ; else // inf*inf ; return inf exception1 ORRS r3, r8, r0 ; if (mantissa1 !=0 && BEQ e1_p2_snan_check ; .. TST r8, #dSignalBit ; mantissa1[MSb] == 0) BEQ return_invalid ; return invalid operation e1_p2_snan_check ADD r3, r1, #1 ; else if (exponent2 == 2047 && CMP r3, #2048 ; .. BNE e1_p2_not_snan ; .. ORRS r3, r2, r4 ; mantissa2 != 0 && BEQ e1_p2_not_snan ; .. TST r2, #dSignalBit ; mantissa2[MSb] == 0) MOVEQ r8, r2 ; copy mantissa2 to mantissa1 MOVEQ r0, r4 ; .. BEQ return_invalid ; return invalid operation e1_p2_not_snan ORRS r3, r8, r0 ; else if (mantissa1 != 0) BNE return_QNaN ; return QNaN e1_p1_is_INF ADD r3, r1, #1 ; else if (exponent2 != 2047) CMP r3, #2048 ; .. BEQ e1_p2_INF_NaN ; .. CMP r1, #0 ; if (parameter2 != 0) ORREQS r3, r2, r4 ; .. BNE return_inf ; return INF MOV r8, #0 ; else MOV r0, #0 ; zero out mantissa1 for QNaN B return_invalid ; return invalid operation e1_p2_INF_NaN ORRS r3, r2, r4 ; else if (mantissa2 != 0) MOV r8, r2 ; copy mantissa2 to mantissa1 MOV r0, r4 ; .. BNE return_QNaN ; return QNaN B return_inf ; else ; return INF ; Exception 2: parameter1 is finite. parameter2 is non-finite (exponent2 == ; 2047); it's either a NaN or inf. The mantissa has not been shifted left ; for the guard bits yet. ; ; If parameter2 is an SNaN, return an invalid op exception with a QNaN. ; Otherwise, if it's a QNaN, silently return a QNaN. Otherwise it's finite*inf ; so return an invalid op exception with a QNaN for 0*inf, or ; *inf. ; ; if (mantissa2 != 0 & ; mantissa2<51> == 1) // parameter2 is an SNaN ; return invalid op exception ; else if (mantissa2 != 0) // parameter2 is a QNaN ; return QNaN ; else if (parameter1 != 0) // parameter1 is non-0 finite ; return inf ; else // it's 0*inf ; return invalid op exception exception2 ORRS r3, r2, r4 ; if (mantissa2 != 0 && BEQ e2_p2_is_inf ; .. TST r2, #dSignalBit ; mantissa2[MSb] == 0) BEQ return_invalid ; return invalid operation MOV r8, r2 ; else if (mantissa2 != 0) MOV r0, r4 ; copy mantissa2 into mantissa1 for QNaN B return_QNaN ; return QNaN e2_p2_is_inf ORRS r3, r8, r0 ; else if (parameter1 != 0) CMPEQ r9, #0 ; .. MOVEQ r8, r2 ; copy mantissa2 to mantissa1 for QNaN MOVEQ r0, r4 ; .. BEQ return_invalid ; .. B return_inf ; return INF ; Exception 3: parameter1 is 0 or denormal (exponent1 = 0), parameter2 is ; finite. ; ; if (mantissa1 == 0) ; return zero ; else normalize parameter1 exception3 ORRS r3, r8, r0 ; if (mantissa1 == 0) BEQ return_zero ; return zero p1_norm ; Normalize parameter1 stop when shift into 1.0 bit MOVS r0, r0, LSL #1 ; Account for the hidden mantissa bit MOV r8, r8, LSL #1 ; that denormals don't have ORRCS r8, r8, #0x1 ; .. CMP r8, #MSB ; While mantissa1 < 1.0 BGE end_p1_norm ; .. p1_norm_loop MOVS r0, r0, LSL #1 ; Scale mantissa1 up by 1 place MOV r8, r8, LSL #1 ; .. ORRCS r8, r8, #0x1 ; .. SUB r9, r9, #1 ; and exponent1 down by 1 CMP r8, #MSB ; BLT p1_norm_loop ; end_p1_norm B exception_return3 ; Done ; Exception 4: parameter1 is finite and (now) normalized, parameter2 is 0 or ; denormal (exponent2 = 0). ; ; if (mantissa2 == 0) ; return zero ; else normalize parameter2 exception4 ORRS r3, r2, r4 ; if (mantissa2 == 0) BEQ return_zero ; return zero p2_norm ; Normalize parameter2 stop when shift into 1.0 bit MOVS r4, r4, LSL #1 ; Account for the hidden mantissa bit MOV r2, r2, LSL #1 ; that denormals don't have ORRCS r2, r2, #0x1 ; .. CMP r2, #MSB ; While mantissa2 < 1.0 BGE end_p2_norm ; .. p2_norm_loop MOVS r4, r4, LSL #1 ; Scale mantissa2 up by 1 place MOV r2, r2, LSL #1 ; .. ORRCS r2, r2, #0x1 ; .. SUB r1, r1, #1 ; and exponent2 down by 1 CMP r2, #MSB ; BLT p2_norm_loop ; end_p2_norm B exception_return4 ; Done ; Cause an overflow exception (=> inexact) and return properly signed inf. return_overflow ORR r5, r5, #OVF_bit :OR: INX_bit ; Report overflow (=> inexact) ; Fall thru to return inf ; Return properly signed inf. return_inf MVN r9, #0 ; exponent1 = 2047 MOV r9, r9, LSR #21 ; .. MOV r8, #0 ; mantissa1 = 0 MOV r0, #0 ; .. B return_value ; Return 0. return_zero MOV r1, r7, LSR #31 ; Apply the sign to zero MOV r1, r1, LSL #31 ; .. MOV r0, #0 ; Zero low mantissa B return ; Done ; Cause an invalid operation exception and return a QNaN. return_invalid ORR r5, r5, #IVO_bit; Report invalid op exception ; Fall thru to return a QNaN ; Return a QNaN. return_QNaN ORR r1, r8, #0x7F000000 ; Return a QNaN ORR r1, r1, #0x00F80000 ; .. B return ; .. END