928 lines
35 KiB
NASM
928 lines
35 KiB
NASM
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
;;;
|
|
;;; Microsoft Research Singularity
|
|
;;;
|
|
;;; Copyright (c) Microsoft Corporation. All rights reserved.
|
|
;;;
|
|
;;; This file contains ARM-specific assembly code.
|
|
;;;
|
|
|
|
; Assembler source for FPA support code and emulator
|
|
; ==================================================
|
|
; Some useful assembler macros. Also used by "fplib".
|
|
;
|
|
; Copyright (C) Advanced RISC Machines Limited, 1992-7. All rights reserved.
|
|
;
|
|
; RCS Revision: 1
|
|
; Checkin Date: 2007/06/29 02:59:16
|
|
; Revising Author
|
|
|
|
;===========================================================================
|
|
|
|
; Register names used when isolating the base register of a PC-relative or
|
|
; register-relative expression in the macros below. The technique is to set
|
|
; a temporary arithmetic variable Base to :BASE:(expression), then refer to
|
|
; R$Base.
|
|
|
|
R00000000 RN R0
|
|
R00000001 RN R1
|
|
R00000002 RN R2
|
|
R00000003 RN R3
|
|
R00000004 RN R4
|
|
R00000005 RN R5
|
|
R00000006 RN R6
|
|
R00000007 RN R7
|
|
R00000008 RN R8
|
|
R00000009 RN R9
|
|
R0000000A RN R10
|
|
R0000000B RN R11
|
|
R0000000C RN R12
|
|
R0000000D RN R13
|
|
R0000000E RN R14
|
|
R0000000F RN R15
|
|
|
|
;===========================================================================
|
|
|
|
; Two general purpose arithmetic variables.
|
|
|
|
GBLA Tempa
|
|
GBLA Tempa2
|
|
|
|
;===========================================================================
|
|
|
|
; The following macro is useful for shifting bit fields around when their
|
|
; positions are symbolic constants - which makes it unclear to the author
|
|
; whether LSR or LSL is needed.
|
|
|
|
MACRO
|
|
$label BiShift $opc,$Rd,$Rn,$Rm,$rshift,$lshift
|
|
[ "$lshift":LEFT:5 <> "LSL #"
|
|
! 4,"Left shift must start exactly 'LSL #'"
|
|
|
|
|
[ "$rshift":LEFT:5 <> "LSR #"
|
|
! 4,"Right shift must start exactly 'LSR #'"
|
|
|
|
|
LCLS left
|
|
LCLS right
|
|
left SETS "$lshift":RIGHT:(:LEN:"$lshift" - 5)
|
|
right SETS "$rshift":RIGHT:(:LEN:"$rshift" - 5)
|
|
[ "$Rn" = ""
|
|
ASSERT (("$opc":LEFT:3) <> "LDR") :LAND: (("$opc":LEFT:3) <> "STR")
|
|
[ ($right) > ($left)
|
|
$label $opc $Rd,$Rm,LSR #(($right) - ($left))
|
|
|
|
|
$label $opc $Rd,$Rm,LSL #(($left) - ($right))
|
|
]
|
|
|
|
|
[ ($right) > ($left)
|
|
[ (("$opc":LEFT:3) = "LDR") :LOR: (("$opc":LEFT:3) = "STR")
|
|
$label $opc $Rd,[$Rn,$Rm,LSR #(($right) - ($left))]
|
|
|
|
|
$label $opc $Rd,$Rn,$Rm,LSR #(($right) - ($left))
|
|
]
|
|
|
|
|
[ (("$opc":LEFT:3) = "LDR") :LOR: (("$opc":LEFT:3) = "STR")
|
|
$label $opc $Rd,[$Rn,$Rm,LSL #(($left) - ($right))]
|
|
|
|
|
$label $opc $Rd,$Rn,$Rm,LSL #(($left) - ($right))
|
|
]
|
|
]
|
|
]
|
|
]
|
|
]
|
|
MEND
|
|
|
|
;===========================================================================
|
|
|
|
; The following macro isolates the exponent field from the standard sign/
|
|
; uncommon bit/exponent word, putting it at the top of the destination
|
|
; register.
|
|
|
|
MACRO
|
|
$label Exp2Top $dest,$src,$cond,$s
|
|
[ EIExp_pos = 0
|
|
$label MOV$cond$s $dest,$src,LSL #32-EIExp_len
|
|
|
|
|
$label MOV$cond $dest,$src,LSR #EIExp_pos
|
|
MOV$cond$s $dest,$dest,LSL #32-EIExp_len
|
|
]
|
|
MEND
|
|
|
|
;===========================================================================
|
|
|
|
; The following macros isolate the exponent fields from two standard sign/
|
|
; uncommon bit/exponent words, putting the first one at the top of a
|
|
; destination register. ExpDiff puts the difference at the top of another
|
|
; register and sets the condition codes on it, while ExpComp simply sets the
|
|
; condition codes on the difference.
|
|
|
|
MACRO
|
|
$label ExpComp $dest,$src1,$src2,$tmp
|
|
ASSERT $dest <> $src1
|
|
ASSERT $dest <> $src2
|
|
ASSERT $dest <> $tmp
|
|
ASSERT $tmp <> $src1
|
|
ASSERT $tmp <> $src2
|
|
[ EIExp_pos = 0
|
|
$label MOV $dest,$src1,LSL #32-EIExp_len
|
|
CMP $dest,$src2,LSL #32-EIExp_len
|
|
|
|
|
$label MOV $dest,$src1,LSR #EIExp_pos
|
|
MOV $dest,$dest,LSL #32-EIExp_len
|
|
MOV $tmp,$src2,LSR #EIExp_pos
|
|
CMP $dest,$tmp,LSL #32-EIExp_len
|
|
]
|
|
MEND
|
|
|
|
MACRO
|
|
$label ExpDiff $diff,$dest,$src1,$src2
|
|
ASSERT $diff <> $dest
|
|
ASSERT $diff <> $src1
|
|
ASSERT $diff <> $src2
|
|
ASSERT $dest <> $src1
|
|
ASSERT $dest <> $src2
|
|
[ EIExp_pos = 0
|
|
$label MOV $dest,$src1,LSL #32-EIExp_len
|
|
SUBS $diff,$dest,$src2,LSL #32-EIExp_len
|
|
|
|
|
$label MOV $dest,$src1,LSR #EIExp_pos
|
|
MOV $dest,$dest,LSL #32-EIExp_len
|
|
MOV $diff,$src2,LSR #EIExp_pos
|
|
SUBS $diff,$dest,$diff,LSL #32-EIExp_len
|
|
]
|
|
MEND
|
|
|
|
;===========================================================================
|
|
|
|
; The following macro performs the standard test for infinities or NaNs on
|
|
; an internal floating point number. It only works on legitimate internal
|
|
; precision numbers - i.e. it produces undefined results if the bit pattern
|
|
; in the internal precision number is an undefined one. The parameters are:
|
|
; $res: On exit, the top bit of this register is set if the number is a
|
|
; NaN or infinity, clear if it isn't;
|
|
; $sue: Register holding sign, uncommon bit and exponent of number to be
|
|
; tested; preserved on exit;
|
|
; $mhi: Register containing high word of mantissa of number to be tested;
|
|
; preserved on exit;
|
|
; $mlo: Register containing low word of mantissa of number to be tested;
|
|
; preserved on exit;
|
|
;
|
|
; In addition, the N flag is set on exit if the number is a NaN or infinity,
|
|
; clear if it isn't.
|
|
;
|
|
; The criterion used for a number to be a NaN or infinity is:
|
|
;
|
|
; Uncommon bit = 1; and
|
|
; Exponent top bit = 1; and
|
|
; Exponent = MAX or units bit = 1.
|
|
;
|
|
; Whether the operand is in fact a NaN or an infinity is then determined by
|
|
; seeing whether the fraction is non-zero or zero.
|
|
|
|
MACRO
|
|
$label TNaNInf $res,$sue,$mhi
|
|
ASSERT $res <> $sue
|
|
ASSERT $res <> $mhi
|
|
$label MOV $res,$sue,LSL #32-(EIExp_pos+EIExp_len) ;Top bit of exponent
|
|
CMN $res,#1:SHL:(32-(EIExp_pos+EIExp_len)) ;Is exp. = MAX? If
|
|
ANDCC $res,$res,$mhi ; not, use units bit
|
|
ANDS $res,$res,$sue,LSL #31-Uncommon_pos ;Use uncommon anyway
|
|
MEND
|
|
|
|
;===========================================================================
|
|
|
|
; The following macro contains the standard code for denormalising a
|
|
; mantissa by a specified amount, producing guard, round and sticky bits in
|
|
; the process. The parameters are:
|
|
; $mhi: Register containing mantissa high word; updated on exit;
|
|
; $mlo: Register containing mantissa low word; updated on exit;
|
|
; $grs: Register that will contain the guard bit (in bit 31), the round
|
|
; bit (in bit 30) and the sticky bit (in whether bits 29:0 are zero
|
|
; or non-zero) on exit;
|
|
; $sh: Register containing the shift amount; corrupt on exit;
|
|
; $t1,$t2: Registers used as temporaries; corrupt on exit.
|
|
; $grs may be null to indicate that the guard, round and sticky information
|
|
; isn't wanted. $mlo can be null to indicate that we only need to
|
|
; denormalise a single word: in this case, $grs must be null.
|
|
; Note that the $grs register may alternatively be interpreted as
|
|
; containing a round bit in bit 31 and a sticky bit in bits 30:0, in cases
|
|
; when there is no need for a guard bit. Also note that $sh may be the same
|
|
; register as either of $grs and $t2; otherwise, the registers must be
|
|
; distinct from each other.
|
|
; Finally, note that branch instructions are used around a 4 instruction
|
|
; sequence and a 5 instruction sequence. This is because statistics show
|
|
; that larger shift amounts are less common than smaller ones in general:
|
|
; thus these instruction sequences are obeyed less than 50% of the time,
|
|
; which makes the code with branches slightly faster.
|
|
|
|
MACRO
|
|
$label Denorm $mhi,$mlo,$grs,$sh,$t1,$t2
|
|
ASSERT $mhi <> $sh
|
|
ASSERT $mhi <> $t1
|
|
ASSERT $mhi <> $t2
|
|
ASSERT $sh <> $t1
|
|
ASSERT $t1 <> $t2
|
|
$label MOV $t1,$sh,LSR #5 ;Number of words to shift by
|
|
BIC $t2,$sh,$t1,LSL #5 ;Number of odd bits to shift by
|
|
[ "$mlo" = ""
|
|
ASSERT "$grs" = ""
|
|
CMP $t1,#1 ;At least one word?
|
|
MOVLO $mhi,$mhi,LSR $t2 ;Shift by odd bits if not
|
|
MOVHS $mhi,#0 ;And clear out completely if so
|
|
|
|
|
ASSERT $mlo <> $mhi
|
|
ASSERT $mlo <> $sh
|
|
ASSERT $mlo <> $t1
|
|
ASSERT $mlo <> $t2
|
|
[ "$grs" = ""
|
|
CMP $t1,#1 ;HI for 2+ words, EQ for 1, LO for 0
|
|
RSBLS $t1,$t2,#32 ;Shift by the number of odd bits
|
|
MOVLS $mlo,$mlo,LSR $t2
|
|
ORRLS $mlo,$mlo,$mhi,LSL $t1
|
|
MOVLS $mhi,$mhi,LSR $t2
|
|
MOVEQ $mlo,$mhi ;Now do full words
|
|
MOVHI $mlo,#0
|
|
MOVHS $mhi,#0
|
|
|
|
|
ASSERT $grs <> $mhi
|
|
ASSERT $grs <> $mlo
|
|
ASSERT $grs <> $t1
|
|
ASSERT $grs <> $t2
|
|
CMP $t1,#2 ;CS/NE for 3+ words, CS/EQ for 2,
|
|
TEQCC $t1,#0 ; CC/NE for 1 and CC/EQ for 0.
|
|
RSB $t1,$t2,#32 ;Shift by the number of odd bits
|
|
MOV $grs,$mlo,LSL $t1
|
|
MOV $mlo,$mlo,LSR $t2
|
|
ORR $mlo,$mlo,$mhi,LSL $t1
|
|
MOV $mhi,$mhi,LSR $t2
|
|
BEQ %f90 ;Branch if no 32-bit shift
|
|
ORRNE $grs,$grs,$grs,LSL #2 ;Shift by 32 bits, accumulating
|
|
ORRNE $grs,$mlo,$grs,LSR #2 ; sticky bit
|
|
MOVNE $mlo,$mhi
|
|
MOVNE $mhi,#0
|
|
90
|
|
BCC %f99 ;Branch if no 64-bit shift
|
|
ORRCS $grs,$grs,$mlo ;Shift by 64 bits, accumulating
|
|
ORRCS $grs,$grs,$grs,LSL #2 ; sticky bit
|
|
ORRCS $grs,$mhi,$grs,LSR #2
|
|
MOVCS $mlo,#0
|
|
MOVCS $mhi,#0
|
|
99
|
|
]
|
|
]
|
|
MEND
|
|
|
|
;===========================================================================
|
|
|
|
; Macro to separate a 32-bit value in a register into its two 16-bit halves.
|
|
|
|
MACRO
|
|
$label Split16 $resh,$resl,$src
|
|
ASSERT $resh <> $src
|
|
$label MOV $resh,$src,LSR #16
|
|
BIC $resl,$src,$resh,LSL #16
|
|
MEND
|
|
|
|
;===========================================================================
|
|
|
|
; Macro to do a (16,16)x32 -> 64 multiplication. Done by breaking it up into
|
|
; four 16x16 multiplications and recombining the pieces. (N.B. The trick
|
|
; described in Knuth section 4.3.3 for reducing the four multiplications to
|
|
; three plus some additions and sign manipulations is not profitable at this
|
|
; size: it only becomes profitable when trying to synthesise a 64x64
|
|
; multiplication out of 32x32 multiplications.)
|
|
; Also allows the flags to be set on the high word of the result and an
|
|
; optional addend to be added into the high word of the result: however,
|
|
; combining these does *not* result in the C flag being set correctly for
|
|
; the carry-out from the notional addition of the addend and the high word.
|
|
; Only the Z and N flags have meaningful values.
|
|
; The operands are:
|
|
; $resh,$resl: Registers that will receive the 64-bit product;
|
|
; $op1h,$op1l: Registers containing the high and low 16 bits of the first
|
|
; 32-bit operand;
|
|
; $op2: Register containing the second 32-bit operand;
|
|
; $add: If present, register containing the addend;
|
|
; $s: "S" to set the condition codes;
|
|
; $t1,$t2,$t3: Three temporary registers required during the calculation.
|
|
; The restrictions on which registers may be the same are complicated and
|
|
; are detailed in the ASSERT statements below.
|
|
|
|
MACRO
|
|
$label Mul64 $resh,$resl,$op1h,$op1l,$op2,$add,$s,$t1,$t2,$t3
|
|
ASSERT $resh <> $resl
|
|
ASSERT $resl <> $op1h
|
|
ASSERT $resl <> $t1
|
|
ASSERT $resl <> $t2
|
|
ASSERT $resl <> $t3
|
|
ASSERT $op1h <> $op1l
|
|
ASSERT $op1h <> $op2
|
|
ASSERT $op1h <> $t1
|
|
ASSERT $op1h <> $t2
|
|
ASSERT $op1h <> $t3
|
|
ASSERT $op1l <> $op2
|
|
ASSERT $op1l <> $t1
|
|
ASSERT $op1l <> $t2
|
|
ASSERT $op1l <> $t3
|
|
ASSERT $op2 <> $t1
|
|
ASSERT $t1 <> $t2
|
|
ASSERT $t1 <> $t3
|
|
ASSERT $t2 <> $t3
|
|
$label Split16 $t1,$t2,$op2 ;t1 := op2h, t2 := op2l
|
|
[ "$add" <> ""
|
|
ASSERT $add <> $op1h
|
|
ASSERT $add <> $op1l
|
|
ASSERT $add <> $op2
|
|
ASSERT $add <> $t1
|
|
ASSERT $add <> $t2
|
|
MLA $t3,$op1h,$t1,$add ;t3 := op1h * op2h + add
|
|
|
|
|
MUL $t3,$op1h,$t1 ;t3 := op1h * op2h
|
|
]
|
|
MUL $t1,$op1l,$t1 ;t1 := op1l * op2h
|
|
MUL $resl,$t2,$op1l ;resl := op1l * op2l
|
|
ADDS $resl,$resl,$t1,LSL #16 ;Add op1l * op2h into (t3,resl)
|
|
ADC $t3,$t3,$t1,LSR #16
|
|
MUL $t2,$op1h,$t2 ;t2 := op1h * op2l
|
|
ADDS $resl,$resl,$t2,LSL #16 ;Add op1h * op2l into (t3,resl)
|
|
ADC$s $resh,$t3,$t2,LSR #16 ; to produce (resh,resl)
|
|
MEND
|
|
|
|
;===========================================================================
|
|
|
|
; Macro to transfer the destination register of an instruction to a set of
|
|
; registers. Operands are:
|
|
; $type: "FPASC" or "FPE";
|
|
; $dest: The destination register list;
|
|
; $instr: The instruction whose destination is to be transferred;
|
|
; $t: A temporary.
|
|
|
|
MACRO
|
|
$label GetDst $type,$dest,$instr,$t
|
|
LCLA Base
|
|
LCLA Offset
|
|
ASSERT ("$type" = "FPASC") :LOR: ("$type" = "FPE")
|
|
[ ("$type" = "FPASC")
|
|
$label TST $instr,#4:SHL:Ds_pos ;Check whether F0-3 or F4-7
|
|
SFMEQFD F0,4,[Rsp]! ;Dump set of 4 registers -
|
|
SFMNEFD F4,4,[Rsp]! ; faster than trying to get
|
|
; the correct register only
|
|
AND $t,$instr,#3:SHL:Ds_pos ;Get position in dump
|
|
ADD $t,$t,$t,LSL #1 ;Convert to number of words
|
|
BiShift ADD,$t,Rsp,$t,LSR #Ds_pos,LSL #2 ;Make address of register
|
|
LDMIA $t,$dest ; value, then get value
|
|
ADD Rsp,Rsp,#48 ;Discard dumped registers
|
|
ASSERT Ds_mask = ((4+3):SHL:Ds_pos)
|
|
|
|
|
$label AND $t,$instr,#Ds_mask
|
|
[ :LNOT:FPE4WordsPerReg
|
|
ADD $t,$t,$t,LSL #1
|
|
ASSERT Ds_pos <= 27
|
|
]
|
|
Base SETA :BASE:FPE_Regs
|
|
[ Base = 15
|
|
Offset SETA FPE_Regs-({PC}+8)
|
|
|
|
|
Offset SETA :INDEX:FPE_Regs
|
|
]
|
|
[ FPE4WordsPerReg
|
|
BiShift ADD,$t,R$Base,$t,LSR #Ds_pos,LSL #4
|
|
|
|
|
BiShift ADD,$t,R$Base,$t,LSR #Ds_pos,LSL #2
|
|
]
|
|
[ Offset <> 0
|
|
ADD $t,$t,#Offset
|
|
]
|
|
LDMIA $t,$dest
|
|
]
|
|
MEND
|
|
|
|
;===========================================================================
|
|
|
|
; Macro to transfer the destination register of a non-FLT instruction from a
|
|
; set of registers. Operands are:
|
|
; $type: "FPASC" or "FPE";
|
|
; $source: The source register list;
|
|
; $instr: The instruction whose destination is to be transferred;
|
|
; $t: A temporary.
|
|
; $l: If present, this produces a "long" form of the macro
|
|
|
|
MACRO
|
|
$label PutDst $type,$source,$instr,$t,$l
|
|
ASSERT $t <> $instr
|
|
LCLA Base
|
|
LCLA Offset
|
|
ASSERT ("$type" = "FPASC") :LOR: ("$type" = "FPE")
|
|
[ ("$type" = "FPASC")
|
|
ALIGN
|
|
$label
|
|
STMFD Rsp!,$source
|
|
AND $t,$instr,#Ds_mask
|
|
10
|
|
BiShift ADD,$t,PC,$t,LSR #Ds_pos,LSL #3
|
|
[ "$l"=""
|
|
MOV LR,PC
|
|
ADD PC,$t,#($type._PutDstRoutines - (%b10+8))
|
|
|
|
|
ADD $t,$t,#($type._PutDstRoutines - (%b10+8)):AND:&FF
|
|
MOV LR,PC
|
|
ADD PC,$t,#($type._PutDstRoutines - (%b10+8)):AND::NOT:&FF
|
|
]
|
|
|
|
|
; "$type" = "FPE"
|
|
$label AND $t,$instr,#Ds_mask
|
|
[ :LNOT:FPE4WordsPerReg
|
|
ADD $t,$t,$t,LSL #1
|
|
ASSERT Ds_pos <= 27
|
|
]
|
|
Base SETA :BASE:FPE_Regs
|
|
[ Base = 15
|
|
Offset SETA FPE_Regs-({PC}+8)
|
|
|
|
|
Offset SETA :INDEX:FPE_Regs
|
|
]
|
|
[ FPE4WordsPerReg
|
|
BiShift ADD,$t,R$Base,$t,LSR #Ds_pos,LSL #4
|
|
|
|
|
BiShift ADD,$t,R$Base,$t,LSR #Ds_pos,LSL #2
|
|
]
|
|
[ Offset <> 0
|
|
ADD $t,$t,#Offset
|
|
]
|
|
STMIA $t,$source
|
|
]
|
|
MEND
|
|
|
|
;===========================================================================
|
|
|
|
; Macro to transfer the destination register of a FLT instruction from a set
|
|
; of registers. Operands are:
|
|
; $type: "FPASC" or "FPE";
|
|
; $source: The source register list;
|
|
; $instr: The instruction whose destination is to be transferred;
|
|
; $t: A temporary.
|
|
|
|
MACRO
|
|
$label PutFDst $type,$source,$instr,$t
|
|
ASSERT $t <> $instr
|
|
LCLA Base
|
|
LCLA Offset
|
|
ASSERT ("$type" = "FPASC") :LOR: ("$type" = "FPE")
|
|
[ ("$type" = "FPASC")
|
|
ALIGN
|
|
$label
|
|
STMFD Rsp!,$source
|
|
AND $t,$instr,#S1_mask
|
|
10
|
|
BiShift ADD,$t,PC,$t,LSR #S1_pos,LSL #3
|
|
MOV LR,PC
|
|
ADD PC,$t,#($type._PutDstRoutines - (%b10+8))
|
|
|
|
|
; "$type" = "FPE"
|
|
$label AND $t,$instr,#S1_mask
|
|
[ :LNOT:FPE4WordsPerReg
|
|
ADD $t,$t,$t,LSL #1
|
|
ASSERT S1_pos <= 27
|
|
]
|
|
Base SETA :BASE:FPE_Regs
|
|
[ Base = 15
|
|
Offset SETA FPE_Regs-({PC}+8)
|
|
|
|
|
Offset SETA :INDEX:FPE_Regs
|
|
]
|
|
[ FPE4WordsPerReg
|
|
BiShift ADD,$t,R$Base,$t,LSR #S1_pos,LSL #4
|
|
|
|
|
BiShift ADD,$t,R$Base,$t,LSR #S1_pos,LSL #2
|
|
]
|
|
[ Offset <> 0
|
|
ADD $t,$t,#Offset
|
|
]
|
|
STMIA $t,$source
|
|
]
|
|
MEND
|
|
|
|
;===========================================================================
|
|
|
|
; Macro to get the first source register of an instruction into three
|
|
; registers. Operands are:
|
|
; $type: "FPASC" or "FPE";
|
|
; $dest: The destination register list;
|
|
; $instr: The instruction whose first source is to be transferred;
|
|
; $t: A temporary.
|
|
|
|
MACRO
|
|
$label GetS1 $type,$dest,$instr,$t
|
|
LCLA Base
|
|
LCLA Offset
|
|
ASSERT ("$type" = "FPASC") :LOR: ("$type" = "FPE")
|
|
[ ("$type" = "FPASC")
|
|
$label TST $instr,#4:SHL:S1_pos ;Check whether F0-3 or F4-7
|
|
SFMEQFD F0,4,[Rsp]! ;Dump set of 4 registers -
|
|
SFMNEFD F4,4,[Rsp]! ; faster than trying to get
|
|
; the correct register only
|
|
AND $t,$instr,#3:SHL:S1_pos ;Get position in dump
|
|
ADD $t,$t,$t,LSL #1 ;Convert to number of words
|
|
BiShift ADD,$t,Rsp,$t,LSR #S1_pos,LSL #2 ;Make address of register
|
|
LDMIA $t,$dest ; value, then get value
|
|
ADD Rsp,Rsp,#48 ;Discard dumped registers
|
|
ASSERT S1_mask = ((4+3):SHL:S1_pos)
|
|
|
|
|
$label AND $t,$instr,#S1_mask
|
|
[ :LNOT:FPE4WordsPerReg
|
|
ADD $t,$t,$t,LSL #1
|
|
ASSERT S1_pos <= 27
|
|
]
|
|
Base SETA :BASE:FPE_Regs
|
|
[ Base = 15
|
|
Offset SETA FPE_Regs-({PC}+8)
|
|
|
|
|
Offset SETA :INDEX:FPE_Regs
|
|
]
|
|
[ FPE4WordsPerReg
|
|
BiShift ADD,$t,R$Base,$t,LSR #S1_pos,LSL #4
|
|
|
|
|
BiShift ADD,$t,R$Base,$t,LSR #S1_pos,LSL #2
|
|
]
|
|
[ Offset <> 0
|
|
ADD $t,$t,#Offset
|
|
]
|
|
LDMIA $t,$dest
|
|
]
|
|
MEND
|
|
|
|
;===========================================================================
|
|
|
|
; Macro to get the second source register or constant of an instruction into
|
|
; three registers. Operands are:
|
|
; $type: "FPASC" or "FPE";
|
|
; $dest: The destination register list;
|
|
; $instr: The instruction whose second source is to be transferred;
|
|
; $t,$t2: Temporaries.
|
|
|
|
MACRO
|
|
$label GetS2 $type,$dest,$instr,$t,$t2
|
|
LCLA Base
|
|
LCLA Offset
|
|
ASSERT ("$type" = "FPASC") :LOR: ("$type" = "FPE")
|
|
ASSERT S2_Ibit = 1:SHL:(S2_pos+3)
|
|
ASSERT S2_pos = 0
|
|
ASSERT $t <> $t2
|
|
$label MOVS $t2,$instr,LSL #29 ;C:=S2_Ibit, N:=F4-7, not F0-3
|
|
;$t2 := left-al. reg/const no.
|
|
[ ("$type" = "FPASC")
|
|
Base SETA :BASE:$type.ConstTable
|
|
[ Base = 15
|
|
Offset SETA $type.ConstTable-({PC}+8)
|
|
|
|
|
Offset SETA :INDEX:$type.ConstTable
|
|
]
|
|
ADDCS $t,R$Base,$t2,LSR #25 ;Address the constant if it
|
|
[ Offset <> 0
|
|
ADDCS $t,$t,#Offset ; is one
|
|
]
|
|
BCS %f10
|
|
SFMPLFD F0,4,[Rsp]! ;Dump set of 4 registers -
|
|
SFMMIFD F4,4,[Rsp]! ; faster than trying to get
|
|
; the correct register only
|
|
BIC $t2,$t2,#TopBit ;Get position within set
|
|
ADD $t,Rsp,$t2,LSR #27 ;Make address of register
|
|
ADD $t,$t,$t2,LSR #26 ; value
|
|
10
|
|
LDMIA $t,$dest ;Get reg. value or constant
|
|
ADDCC Rsp,Rsp,#48 ;If reg, discard dumped regs
|
|
|
|
|
Base SETA :BASE:FPEConstTable
|
|
[ Base = 15
|
|
Offset SETA FPEConstTable-({PC}+8)
|
|
|
|
|
Offset SETA :INDEX:FPEConstTable
|
|
]
|
|
ADDCS $t,R$Base,$t2,LSR #25
|
|
[ Offset <> 0
|
|
ADDCS $t,$t,#Offset
|
|
]
|
|
Base SETA :BASE:FPE_Regs
|
|
[ Base = 15
|
|
Offset SETA FPE_Regs-({PC}+8)
|
|
|
|
|
Offset SETA :INDEX:FPE_Regs
|
|
]
|
|
[ FPE4WordsPerReg
|
|
ADDCC $t,R$Base,$t2,LSR #25
|
|
|
|
|
ADDCC $t,R$Base,$t2,LSR #27
|
|
ADDCC $t,$t,$t2,LSR #26
|
|
]
|
|
[ Offset <> 0
|
|
ADDCC $t,$t,#Offset
|
|
]
|
|
LDMIA $t,$dest
|
|
]
|
|
MEND
|
|
|
|
;===========================================================================
|
|
|
|
; Macro to get both source operands of an instruction into two groups of
|
|
; three registers. Operands are:
|
|
; $type: "FPASC" or "FPE";
|
|
; $dest1: The destination register list for the first operand;
|
|
; $dest2: The destination register list for the second operand;
|
|
; $instr: The instruction whose second source is to be transferred;
|
|
; $t, $t2: Temporaries.
|
|
|
|
MACRO
|
|
$label GetS12 $type,$dest1,$dest2,$instr,$t,$t2
|
|
LCLA Base
|
|
LCLA Offset
|
|
ASSERT ("$type" = "FPASC") :LOR: ("$type" = "FPE")
|
|
[ ("$type" = "FPASC")
|
|
SFMFD F4,4,[Rsp]! ;Dump all registers
|
|
SFMFD F0,4,[Rsp]!
|
|
AND $t,$instr,#S1_mask ;Get S1 position in dump
|
|
ADD $t,$t,$t,LSL #1 ;Convert to number of words
|
|
BiShift ADD,$t,Rsp,$t,LSR #S1_pos,LSL #2 ;Make address of register
|
|
LDMIA $t,$dest1 ; value, then get value
|
|
ASSERT S2_Ibit = 1:SHL:(S2_pos+3)
|
|
ASSERT S2_pos = 0
|
|
MOVS $t2,$instr,LSL #29 ;C:=S2_Ibit, N:=F4-7, not F0-3
|
|
;$t2 := left-al. reg/const no.
|
|
Base SETA :BASE:$type.ConstTable
|
|
[ Base = 15
|
|
Offset SETA $type.ConstTable-({PC}+8)
|
|
|
|
|
Offset SETA :INDEX:$type.ConstTable
|
|
]
|
|
ADDCS $t,R$Base,$t2,LSR #25 ;Address the constant if it
|
|
[ Offset <> 0
|
|
ADDCS $t,$t,#Offset ; is one
|
|
]
|
|
ADDCC $t,Rsp,$t2,LSR #27 ;Otherwise address the register
|
|
ADDCC $t,$t,$t2,LSR #26 ; value
|
|
LDMIA $t,$dest2
|
|
ADD Rsp,Rsp,#96 ;Discard the register dump
|
|
|
|
|
$label GetS1 $type,$dest1,$instr,$t
|
|
GetS2 $type,$dest2,$instr,$t,$t2
|
|
]
|
|
MEND
|
|
|
|
;===========================================================================
|
|
|
|
; The standard macro to return to the caller. Note that care has to be taken
|
|
; here never to leave Rsp pointing above any useful stack contents, in case
|
|
; of a badly-timed interrupt.
|
|
|
|
MACRO
|
|
$label Return
|
|
[ {CONFIG} = 26
|
|
$label MOV Rsp,Rfp ;Discard now-spurious stack contents
|
|
]
|
|
[ {CONFIG} = 32
|
|
$label LDMDB Rfp,{Rtmp,Rtmp2} ;Recover the SPSR and CPSR
|
|
MSR CPSR_all,Rtmp2 ; (restoring the CPSR re-disables
|
|
MSR SPSR_all,Rtmp ; interrupts, so the SPSR isn't ever
|
|
; valid when interrupts are enabled)
|
|
MOV Rsp,Rfp ;Discard now-spurious stack contents
|
|
]
|
|
LDMIA Rfp,{R0-R14}^ ;Coding rules: cannot use write-back
|
|
NOP ; and must protect next instruction
|
|
ADD Rsp,Rsp,#15*4 ;Do the write-back
|
|
LDMFD Rsp!,{PC}^ ;Restore R13_svr/R13_und, PC and PSR
|
|
MEND
|
|
|
|
;===========================================================================
|
|
|
|
; Macro to get the processor mode for the caller, with the 26/32 bit
|
|
; distinction removed. The flags are set on the result value, so Z indicates
|
|
; whether we're in user mode.
|
|
|
|
MACRO
|
|
$label GetMode $res
|
|
[ {CONFIG}=32
|
|
$label LDR $res,[Rfp,#-8] ;Recover original SPSR value
|
|
ANDS $res,$res,#Mode_mask-Mode_32not26
|
|
ASSERT (Mode_USR26:AND::NOT:Mode_32not26) = 0
|
|
ASSERT (Mode_USR32:AND::NOT:Mode_32not26) = 0
|
|
]
|
|
[ {CONFIG}=26
|
|
$label LDR $res,[Rfp,#15*4] ;Recover original LR value
|
|
ANDS $res,$res,#Mode_mask
|
|
ASSERT Mode_USR26 = 0
|
|
]
|
|
MEND
|
|
|
|
;===========================================================================
|
|
|
|
; Macro to insert the right amount of padding between a table-driven branch
|
|
; instruction (e.g. ADD PC,PC,reg,LSL #2) and the in-line branch table that
|
|
; follows it. Made into a macro for documentation purposes, and also just in
|
|
; case the user-visible pipeline depth has to change at some point in the
|
|
; future.
|
|
|
|
MACRO
|
|
BranchTablePad
|
|
DCD 0 ;Padding before branch table
|
|
MEND
|
|
|
|
;===========================================================================
|
|
|
|
; Macro to re-enable interrupts if "EnableInterrupts" is {TRUE}. Only
|
|
; argument is a temporary register.
|
|
|
|
MACRO
|
|
$label InterruptEnable $t
|
|
[ EnableInterrupts
|
|
[ {CONFIG} = 32
|
|
$label MRS $t,CPSR_all
|
|
BIC $t,$t,#I_bit
|
|
MSR CPSR_all,$t
|
|
]
|
|
[ {CONFIG} = 26
|
|
$label MOV $t,PC
|
|
BIC $t,$t,#I_bit
|
|
TEQP PC,$t
|
|
]
|
|
NOP
|
|
]
|
|
MEND
|
|
|
|
;===========================================================================
|
|
|
|
; Macro to re-disable interrupts if "EnableInterrupts" is {TRUE}. Only
|
|
; argument is a temporary register.
|
|
|
|
MACRO
|
|
$label InterruptDisable $t
|
|
[ EnableInterrupts
|
|
[ {CONFIG} = 32
|
|
$label MRS $t,CPSR_all
|
|
ORR $t,$t,#I_bit
|
|
MSR CPSR_all,$t
|
|
]
|
|
[ {CONFIG} = 26
|
|
$label MOV $t,PC
|
|
ORR $t,$t,#I_bit
|
|
TEQP PC,$t
|
|
]
|
|
NOP
|
|
]
|
|
MEND
|
|
|
|
;===========================================================================
|
|
|
|
; Macro to do the standard "enter recursive floating point code" processing.
|
|
; The operands are:
|
|
; $freeregs: Number of floating point registers to free up, in the range 1
|
|
; to 8;
|
|
; $extra: Number of bytes of extra space to be left on the stack above
|
|
; the register dump;
|
|
; $addr: Register to contain address of extra space - also used to hold
|
|
; temporary values during macro, so must be present even if no
|
|
; extra space is wanted;
|
|
; $nofpsr: If this operand is non-null, it inhibits the FPSR changes
|
|
; described below.
|
|
; Rsp and Rfpsr are also operands.
|
|
; This code is written quite carefully, to avoid the use of floating point
|
|
; instructions the FPE won't like (e.g. those which use mode-dependent
|
|
; registers). It also disables interrupts (if they were ever enabled) and
|
|
; leaves a record of what floating point registers are on the stack, to make
|
|
; certain core_abort works. Finally, it clears out the exception enable bits
|
|
; and cumulative flags from the real FPSR, since exceptions in recursive
|
|
; code must not go out to the user trap handlers. Note that Rfpsr holds the
|
|
; real FPSR value and will be written back to the real FPSR before control
|
|
; is returned to the user, either via a trap handler or by normal return.
|
|
|
|
MACRO
|
|
$label EnterRecursive $freeregs,$extra,$addr,$nofpsr
|
|
ASSERT ($freeregs) <= 8
|
|
ASSERT ($freeregs) >= 1
|
|
$label SUB Rsp,Rsp,#($freeregs)*12+4+($extra)
|
|
MOV $addr,Rfpsr,LSL #8
|
|
ORR $addr,$addr,#(1:SHL:($freeregs))-1
|
|
STR $addr,[Rsp]
|
|
InterruptDisable $addr
|
|
[ "$nofpsr" = ""
|
|
BIC $addr,Rfpsr,#IOE_bit+DZE_bit+OFE_bit+UFE_bit+IXE_bit
|
|
BIC $addr,$addr,#IOC_bit+DZC_bit+OFC_bit+UFC_bit+IXC_bit
|
|
WFS $addr
|
|
]
|
|
ADD $addr,Rsp,#($freeregs)*12+4
|
|
[ ($freeregs) <= 4
|
|
SFM F0,($freeregs),[$addr,#-12*($freeregs)]
|
|
|
|
|
SFM F0,4,[$addr,#-12*($freeregs)]
|
|
SFM F4,($freeregs)-4,[$addr,#-12*($freeregs)+48]
|
|
]
|
|
MEND
|
|
|
|
;===========================================================================
|
|
|
|
; Macro to do the standard "exit recursive floating point code" processing.
|
|
; The operands are:
|
|
; $freeregs: Number of floating point registers to recover from the stack,
|
|
; in the range 1 to 8;
|
|
; $extra: Number of bytes of extra space to recover from the stack;
|
|
; $t: A temporary register;
|
|
; $fpres: Floating point register that contains the floating point result
|
|
; - null if no such result;
|
|
; $result: Register list to take 3 word floating point result - null if
|
|
; no such result.
|
|
; Rsp is also an operand.
|
|
|
|
MACRO
|
|
$label ExitRecursive $freeregs,$extra,$t,$fpres,$result
|
|
ASSERT ($freeregs) <= 8
|
|
ASSERT ($freeregs) >= 1
|
|
ADD $t,Rsp,#($freeregs)*12+4
|
|
[ "$fpres" <> ""
|
|
ASSERT ($extra) >= 12
|
|
SFM $fpres,1,[$t]
|
|
LDMIA $t,$result
|
|
]
|
|
[ ($freeregs) <= 4
|
|
LFM F0,($freeregs),[$t,#-12*($freeregs)]
|
|
|
|
|
LFM F0,4,[$t,#-12*($freeregs)]
|
|
LFM F4,($freeregs)-4,[$t,#-12*($freeregs)+48]
|
|
]
|
|
ADD Rsp,Rsp,#($freeregs)*12+4+($extra)
|
|
InterruptEnable $t
|
|
MEND
|
|
|
|
;===========================================================================
|
|
|
|
; Macro to determine whether an exception was precise or imprecise. Implicit
|
|
; operands are Rfpsr (for the SO bit and to determine whether it is a
|
|
; hardware or software context) and Rins (to determine what type of
|
|
; instruction this is). Explicit operands are:
|
|
; $dst: Set to zero if precise, non-zero if imprecise;
|
|
; $t: A temporary register;
|
|
; $tbl_fpa: The address of a bit table used to determine whether FPA
|
|
; instructions are capable of producing imprecise exceptions.
|
|
;
|
|
; The full set of conditions for the operation to be imprecise are:
|
|
;
|
|
; * The FPA hardware is being used;
|
|
; * The SO bit is clear in the FPSR;
|
|
; * The instruction is a CPDO or CPRT;
|
|
; * The instruction is capable of delivering an imprecise exception (e.g.
|
|
; not a purely software-implemented instruction, a FIX or a compare);
|
|
;
|
|
; Testing this last condition is complicated: it is done via the bit table
|
|
; mentioned above.
|
|
;
|
|
; The optimisation that the whole test is unnecessary for FPE-only code is
|
|
; not performed here: it is instead done in the main assembly source and
|
|
; this macro is never used.
|
|
|
|
MACRO
|
|
$label TestImp $dst,$t,$tbl_fpa
|
|
ASSERT $dst <> Rins
|
|
ASSERT $dst <> Rfpsr
|
|
ASSERT $t <> Rins
|
|
ASSERT $t <> Rfpsr
|
|
ASSERT $t <> $dst
|
|
$label
|
|
; Address the table
|
|
ADR $t,$tbl_fpa
|
|
; Then look up correct table entry
|
|
AND $dst,Rins,#RTnotDO_bit
|
|
BiShift LDR,$dst,$t,$dst,LSR #RTnotDO_pos,LSL #2
|
|
TST Rins,#Op2_mask
|
|
MOVNE $dst,$dst,LSR #16
|
|
AND $t,Rins,#Op1_mask
|
|
MOV $t,$t,LSR #Op1_pos
|
|
; Now incorporate other tests
|
|
MOV $dst,$dst,LSR $t ;Bit0 is result so far
|
|
AND $dst,$dst,Rfpsr,LSR #31 ;Isolate bit0 and AND
|
|
; with (hardware in use)
|
|
BIC $dst,$dst,Rfpsr,LSR #SO_pos ;AND with (SO bit clear)
|
|
AND $dst,$dst,Rins,LSR #RTDOnotDT_pos ;AND with (CPRT or CPDO)
|
|
MEND
|
|
|
|
;===========================================================================
|
|
|
|
; Macros used for byte arrays.
|
|
|
|
GBLA ByteArrayCount
|
|
ByteArrayCount SETA 0
|
|
|
|
MACRO
|
|
$label BytesStart
|
|
ALIGN
|
|
$label
|
|
ByteArray_$ByteArrayCount
|
|
MEND
|
|
|
|
MACRO
|
|
$label BytesEnd
|
|
ALIGN
|
|
$label
|
|
ByteArrayEnd_$ByteArrayCount
|
|
ByteArrayCount SETA ByteArrayCount+1
|
|
MEND
|
|
|
|
;===========================================================================
|
|
|
|
END
|