;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;; ;;; Microsoft Research Singularity ;;; ;;; Copyright (c) Microsoft Corporation. All rights reserved. ;;; ;;; This file contains ARM-specific assembly code. ;;; ;********************************************************************** ; void * ; memmove( void *dest, const void *src, size_t count ); ; ; memmove() copies 'count' bytes from the source buffer to the ; destination buffer and returns a pointer to the destination ; buffer. memmove() guarantees the overlaping buffers will ; be copied successfully. ; ;********************************************************************** ; ; $$$ NOTE $$$: These routines use the LDRH opcode which is not ; supported on ARM 3 or 3T architectures. Hence, these routines ; assume ARM 4 or later architectures. ; ;********************************************************************** OPT 2 ; disable listing INCLUDE kxarm.inc OPT 1 ; reenable listing dest RN R0 source RN R1 count RN R2 temp1 RN R3 temp2 RN R4 temp3 RN R5 temp4 RN R12 IF Thumbing THUMBAREA ENDIF NESTED_ENTRY memmove ROUT IF Thumbing ; Switch from Thumb mode to ARM mode DCW 0x4778 ; bx pc DCW 0x46C0 ; nop ENDIF STMDB sp!, {dest,temp2,temp3,lr} ; save registers PROLOG_END ;if source comes before destination copy from tail to head CMP source, dest BGE HEAD_TO_TAIL ; if source < dest ;********************************************************************** ; Copy from tail to head to avoid source overwrite because the source ; precedes the destination ;********************************************************************** TAILTOHEAD ;Move pointers to the tails ADD source, source, count ADD dest, dest, count CMP count, #8 ;if < 8 bytes, byte moves BLT TTH_BYTEMOVE4 ;check if destination is word aligned, if not then align it ANDS temp1, dest, #3 ; 2-3 cycles BEQ TTH_CHKSRC_ALIGN ; ;read 1 to 3 bytes until the destination is word aligned, then ;see if the source is word aligned, if it is then go back to ;word length moves, else continue on with single byte moves ; TTH_ATTEMPTALIGN LDRB temp2, [source, #-1]! ; 8 cycles/1-3 bytes CMP temp1, #2 STRB temp2, [dest, #-1]! LDRGEB temp3, [source, #-1]! SUB count, count, temp1 LDRGTB temp2, [source, #-1]! STRGEB temp3, [dest, #-1]! STRGTB temp2, [dest, #-1]! ; Check if source is word aligned, if not check for word ; alignment. TTH_CHKSRC_ALIGN TST source, #1 ; 3-7 cycles BNE TTH_UNALIGNED ; Unaligned moves TST source, #2 BNE TTH_HWORD_ALIGNED ; Half Word aligned ; ; Word aligned source and destination. ; Move blocks of 32 bytes until we have less than 32 bytes left, ; then divide moves in half down to less than 4 then jump to byte ; moves. ; NOTE: Because of the overhead of pushing registers for 32 byte ; moves it is actually more efficient to use 16 byte moves for ; blocks of less than 128 bytes. ; TTH_REALIGNED SUBS count, count, #32 ; 2-3 cycles BLT TTH_BLK16 TTH_BLK32 ; 20 cycles/32 bytes LDMDB source!, {temp1,temp2,temp3,lr} STMDB dest!, {temp1,temp2,temp3,lr} LDMDB source!, {temp1,temp2,temp3,lr} SUBS count, count, #32 STMDB dest!, {temp1,temp2,temp3,lr} BGE TTH_BLK32 TTH_BLK16 ; 10 cycles/16 bytes ADDS count, count, #16 LDMGEDB source!, {temp1, temp2, temp3, lr} SUBGE count, count, #16 STMGEDB dest!, {temp1, temp2, temp3, lr} TTH_BLK8 ; 6 cycles / 8 bytes ADDS count, count, #8 LDMGEDB source!, {temp1, temp2} SUBGE count, count, #8 STMGEDB dest!, {temp1, temp2} TTH_BLK4 ; 6-9 cycles/4 bytes ADDS count, count, #4 LDRGE temp1, [source, #-4]! STRGE temp1, [dest, #-4]! TTH_WORD_BYTES ADDLTS count, count, #4 BEQ TTH_WORD_EXIT LDRB temp1, [source, #-1]! ; 4-7 cycles/1 byte CMP count, #2 STRB temp1, [dest, #-1]! LDRGEB temp2, [source, #-1]! LDRGTB temp3, [source, #-1]! ; 8 cycles/1-2 bytes STRGEB temp2, [dest, #-1]! STRGTB temp3, [dest, #-1]! TTH_WORD_EXIT IF Interworking :LOR: Thumbing LDMIA sp!, {dest, temp2, temp3, lr} BX lr ELSE LDMIA sp!, {dest, temp2, temp3, pc} ENDIF ; ; Source and Destination are half word aligned. ; For blocks < 96 bytes it's actually more efficient to jump to ; the 8 byte copy than take the hit for setup time on 32 byte copy. ; TTH_HWORD_ALIGNED LDRH lr, [source, #-2]! ; 4-5 cycles SUBS count, count, #32 MOV lr, lr, LSL #16 BLT TTH_HWORD8_TST TTH_HWORD32 ; 35 cycles/32 bytes LDMDB source!, {temp1,temp2,temp3,temp4} SUBS count, count, #32 ORR lr, lr, temp4, LSR #16 MOV temp4, temp4, LSL #16 ORR temp4, temp4, temp3, LSR #16 MOV temp3, temp3, LSL #16 ORR temp3, temp3, temp2, LSR #16 MOV temp2, temp2, LSL #16 ORR temp2, temp2, temp1, LSR #16 STMDB dest!, {temp3,temp4,lr} ; Store bytes 21-32 STR temp2, [dest, #-4]! ; Store bytes 17-20 MOV lr, temp1, LSL #16 LDR temp4, [source, #-4]! LDMDB source!, {temp1,temp2,temp3} ORR lr, lr, temp4, LSR #16 MOV temp4, temp4, LSL #16 ORR temp4, temp4, temp3, LSR #16 MOV temp3, temp3, LSL #16 ORR temp3, temp3, temp2, LSR #16 MOV temp2, temp2, LSL #16 ORR temp2, temp2, temp1, LSR #16 STMDB dest!, {temp3,temp4,lr} ; Store bytes 5-16 STR temp2, [dest, #-4]! ; Store bytes 1-4 MOV lr, temp1, LSL #16 BGE TTH_HWORD32 TTH_HWORD8_TST ADDS count, count, #24 BLT TTH_HWORD4 TTH_HWORD8 ; 11 cycles/8 bytes LDMDB source!, {temp2, temp3} SUBS count, count, #8 ORR lr, lr, temp3, LSR #16 MOV temp3, temp3, LSL #16 ORR temp3, temp3, temp2, LSR #16 STR lr, [dest, #-4]! STR temp3, [dest, #-4]! MOV lr, temp2, LSL #16 BGE TTH_HWORD8 TTH_HWORD4 ; 3-12 cycles/4 bytes ADDS count, count, #4 BLT TTH_HWORD_BYTES LDR temp1, [source, #-4]! ORR lr, lr, temp1, LSR #16 STR lr, [dest, #-4]! MOV lr, temp1, LSL #16 TTH_HWORD_BYTES ADDLTS count, count, #4 BEQ TTH_HWORD_EXIT MOV lr, lr, LSR #16 ; 11 cycles/1-3 bytes CMP count, #2 MOVLT lr, lr, LSR #8 STRLTB lr, [dest, #-1]! LDRGTB temp1, [source, #-1]! STRGEH lr, [dest, #-2]! STRGTB temp1, [dest, #-1]! TTH_HWORD_EXIT IF Interworking :LOR: Thumbing LDMIA sp!, {dest, temp2, temp3, lr} BX lr ELSE LDMIA sp!, {dest, temp2, temp3, pc} ENDIF TTH_UNALIGNED TST source, #2 BEQ TTH_OFFONE ; ; 3 Byte difference between word and source. ; TTH_OFFTHREE LDRB temp3, [source, #-1]! ; 5-6 cycles LDRH lr, [source, #-2]! SUBS count, count, #32 ORR lr, lr, temp3, LSL #16 MOV lr, lr, LSL #8 BLT TTH_OFFTHREE8_TST TTH_OFFTHREE32 ; 35 cycles/32 bytes LDMDB source!, {temp1,temp2,temp3,temp4} SUBS count, count, #32 ORR lr, lr, temp4, LSR #24 MOV temp4, temp4, LSL #8 ORR temp4, temp4, temp3, LSR #24 MOV temp3, temp3, LSL #8 ORR temp3, temp3, temp2, LSR #24 MOV temp2, temp2, LSL #8 ORR temp2, temp2, temp1, LSR #24 STMDB dest!, {temp3,temp4,lr} ; Store bytes 21-32 STR temp2, [dest, #-4]! ; Store bytes 17-20 MOV lr, temp1, LSL #8 LDR temp4, [source, #-4]! LDMDB source!, {temp1,temp2,temp3} ORR lr, lr, temp4, LSR #24 MOV temp4, temp4, LSL #8 ORR temp4, temp4, temp3, LSR #24 MOV temp3, temp3, LSL #8 ORR temp3, temp3, temp2, LSR #24 MOV temp2, temp2, LSL #8 ORR temp2, temp2, temp1, LSR #24 STMDB dest!, {temp3,temp4,lr} ; Store bytes 5-16 STR temp2, [dest, #-4]! ; Store bytes 1-4 MOV lr, temp1, LSL #8 BGE TTH_OFFTHREE32 TTH_OFFTHREE8_TST ADDS count, count, #24 BLT TTH_OFFTHREE4 TTH_OFFTHREE8 ; 11 cycles/8 bytes LDMDB source!, {temp1, temp2} SUBS count, count, #8 ORR lr, lr, temp2, LSR #24 MOV temp2, temp2, LSL #8 ORR temp2, temp2, temp1, LSR #24 STR lr, [dest, #-4]! ; Store bytes 5-8 STR temp2, [dest, #-4]! ; Store bytes 1-4 MOV lr, temp1, LSL #8 BGE TTH_OFFTHREE8 TTH_OFFTHREE4 ; 3-11 cycles/4 bytes ADDS count, count, #4 BLT TTH_OFFTHREE_BYTES LDR temp3, [source, #-4]! ORR lr, lr, temp3, LSR #24 STR lr, [dest, #-4]! MOV lr, temp3, LSL #8 TTH_OFFTHREE_BYTES ADDLTS count, count, #4 BEQ TTH_OFFTHREE_EXIT MOV lr, lr, LSR #8 ; 11 cycles/1-3 bytes CMP count, #2 MOVLT temp1, lr, LSR #16 STRLTB temp1, [dest, #-1]! MOVGE temp1, lr, LSR #8 STRGEH temp1, [dest, #-2]! STRGTB lr, [dest, #-1]! TTH_OFFTHREE_EXIT IF Interworking :LOR: Thumbing LDMIA sp!, {dest, temp2, temp3, lr} BX lr ELSE LDMIA sp!, {dest, temp2, temp3, pc} ENDIF ; ; One Byte difference between word and source. ; TTH_OFFONE LDRB lr, [source, #-1]! SUBS count, count, #32 ; 2-3 cycles MOV lr, lr, LSL #24 BLT TTH_OFFONE8_TST TTH_OFFONE32 ; 35 cycles/32 bytes LDMDB source!, {temp1,temp2,temp3,temp4} SUBS count, count, #32 ; avoid result delay ORR lr, lr, temp4, LSR #8 MOV temp4, temp4, LSL #24 ORR temp4, temp4, temp3, LSR #8 MOV temp3, temp3, LSL #24 ORR temp3, temp3, temp2, LSR #8 MOV temp2, temp2, LSL #24 ORR temp2, temp2, temp1, LSR #8 STMDB dest!, {temp3,temp4,lr} ; Store bytes 21-32 STR temp2, [dest, #-4]! ; STore bytes 17-20 MOV lr, temp1, LSL #24 LDR temp4, [source, #-4]! LDMDB source!, {temp1,temp2,temp3} ORR lr, lr, temp4, LSR #8 MOV temp4, temp4, LSL #24 ORR temp4, temp4, temp3, LSR #8 MOV temp3, temp3, LSL #24 ORR temp3, temp3, temp2, LSR #8 MOV temp2, temp2, LSL #24 ORR temp2, temp2, temp1, LSR #8 STMDB dest!, {temp3,temp4,lr} ; Store bytes 5-16 STR temp2, [dest, #-4]! ; STore bytes 1-4 MOV lr, temp1, LSL #24 BGE TTH_OFFONE32 TTH_OFFONE8_TST ADDS count, count, #24 BLT TTH_OFFONE4 TTH_OFFONE8 ; 11 cycles/8 bytes LDMDB source!, {temp2, temp3} SUBS count, count, #8 ORR lr, lr, temp3, LSR #8 MOV temp3, temp3, LSL #24 STR lr, [dest, #-4]! ORR temp3, temp3, temp2, LSR #8 STR temp3, [dest, #-4]! MOV lr, temp2, LSL #24 BGE TTH_OFFONE8 TTH_OFFONE4 ; 8-10 cycles/4 bytes ADDS count, count, #4 BLT TTH_OFFONE_BYTES LDR temp3, [source, #-4]! ORR lr, lr, temp3, LSR #8 STR lr, [dest, #-4]! MOV lr, temp3, LSL #24 TTH_OFFONE_BYTES ; 13 cycles/1-3 bytes ADDLTS count, count, #4 BEQ TTH_OFFONE_EXIT MOV lr, lr, LSR #24 CMP count, #2 STRB lr, [dest, #-1]! BLT TTH_OFFONE_EXIT LDRGEB temp1, [source, #-1]! LDRGTB temp2, [source, #-1]! STRGEB temp1, [dest, #-1]! STRGTB temp2, [dest, #-1]! TTH_OFFONE_EXIT IF Interworking :LOR: Thumbing LDMIA sp!, {dest, temp2, temp3, lr} BX lr ELSE LDMIA sp!, {dest, temp2, temp3, pc} ENDIF TTH_BYTEMOVE4 ; 12 cycles/4 bytes CMP count, #4 BLT TTH_LAST3 LDRB temp1, [source, #-1]! LDRB temp2, [source, #-1]! LDRB temp3, [source, #-1]! LDRB lr, [source, #-1]! SUB count, count, #4 STRB temp1, [dest, #-1]! STRB temp2, [dest, #-1]! STRB temp3, [dest, #-1]! STRB lr, [dest, #-1]! ; Move the last 0-3 bytes TTH_LAST3 CMP count, #0 ; 2 or 5 cycles BEQ TTH_BYTEMOVE_EXIT ; ;single byte moves ; TTH_BYTEMOVE ; 11 cycles/1-3 bytes LDRB temp1, [source, #-1]! CMP count, #2 STRB temp1, [dest, #-1]! BLT TTH_BYTEMOVE_EXIT LDRGEB temp2, [source, #-1]! LDRGTB temp3, [source, #-1]! STRGEB temp2, [dest, #-1]! STRGTB temp3, [dest, #-1]! TTH_BYTEMOVE_EXIT IF Interworking :LOR: Thumbing LDMIA sp!, {dest, temp2, temp3, lr} BX lr ELSE LDMIA sp!, {dest, temp2, temp3, pc} ENDIF ;********************************************************************** ; Copy from head to tail to avoid source overwrite because the source ; destination the source ;********************************************************************** HEAD_TO_TAIL ;if LT 8 bytes store them and exit CMP count, #8 ; 2-3 cycles BLT BYTEMOVE4 ;Check alignment of parameters ANDS temp1, dest, #3 ; 2-3 cycles BEQ SRCALIGN ; destination is at least 1 byte misaligned ; Read and write (4 - alignment) bytes to align destination. RSB temp1, temp1, #4 ; 9 cycles LDRB temp2, [source], #1 CMP temp1, #2 STRB temp2, [dest], #1 LDRGEB temp3, [source], #1 ; >= 2 == at least 2 bytes LDRGTB temp2, [source], #1 ; > 2 == 3 bytes unaligned SUB count, count, temp1 STRGEB temp3, [dest], #1 STRGTB temp2, [dest], #1 SRCALIGN ; 3 - 7 cycles TST source, #1 ; save alignment of src BNE UNALIGNED ; src 3 byte unaligned. TST source, #2 BNE HWORDMOVE ; src and dst are hword aligned ; ;word aligned source and destination, move blocks of 32 bytes ;until we have less than 32 bytes left, then divide moves in ;half down to less than 4, where we will move the last 3 or less ;bytes ; WORDMOVE SUBS count, count, #32 ; 2-3 cycles BLT BLK16 BLK32 ; 20 cycles/32 bytes LDMIA source!, {temp1,temp2,temp3,lr} STMIA dest!, {temp1,temp2,temp3,lr} LDMIA source!, {temp1,temp2,temp3,lr} SUBS count, count, #32 STMIA dest!, {temp1,temp2,temp3,lr} BGE BLK32 BLK16 ; 11-4 cycles/16 bytes ADDS count, count, #16 LDMGEIA source!, {temp1, temp2, temp3, lr} STMGEIA dest!, {temp1, temp2, temp3, lr} BEQ WORD_BYTES_EXIT SUBGTS count, count, #16 BLK8 ; 6 cycles/8 bytes ADDS count, count, #8 LDMGEIA source!, {temp1, temp2} SUBGE count, count, #8 STMGEIA dest!, {temp1, temp2} BLK4 ADDS count, count, #4 ; 6-9 cycles/4 bytes LDRGE temp1, [source], #4 STRGE temp1, [dest], #4 WORD_BYTES ADDLTS count, count, #4 BEQ WORD_BYTES_EXIT ; On zero, Return to caller LDR temp1, [source], #4 ; 10 cycles/1-3 bytes CMP count, #2 STRGEH temp1, [dest], #2 STRLTB temp1, [dest], #1 MOVGT temp1, temp1, LSR #16 STRGTB temp1, [dest], #1 WORD_BYTES_EXIT IF Interworking :LOR: Thumbing LDMIA sp!, {dest, temp2, temp3, lr} BX lr ELSE LDMIA sp!, {dest, temp2, temp3, pc} ENDIF ; ; half word align source and destination ; HWORDMOVE ; 2-3 cycles LDRH temp1, [source], #2 SUBS count, count, #32 BLT HWORD8_TST HWORD32 ; 35 cycles/32 bytes LDMIA source!, {temp2,temp3,temp4,lr} ORR temp1, temp1, temp2, LSL #16 MOV temp2, temp2, LSR #16 ORR temp2, temp2, temp3, LSL #16 MOV temp3, temp3, LSR #16 ORR temp3, temp3, temp4, LSL #16 MOV temp4, temp4, LSR #16 ORR temp4, temp4, lr, LSL #16 STMIA dest!, {temp1,temp2,temp3,temp4} ; Store bytes 1-16 MOV temp1, lr, LSR #16 LDMIA source!, {temp2,temp3,temp4,lr} ORR temp1, temp1, temp2, LSL #16 MOV temp2, temp2, LSR #16 ORR temp2, temp2, temp3, LSL #16 MOV temp3, temp3, LSR #16 ORR temp3, temp3, temp4, LSL #16 MOV temp4, temp4, LSR #16 ORR temp4, temp4, lr, LSL #16 STMIA dest!, {temp1,temp2,temp3,temp4} ; Store bytes 17-32 SUBS count, count, #32 MOV temp1, lr, LSR #16 BGE HWORD32 HWORD8_TST ADDS count, count, #24 BLT HWORD4 HWORD8 ; 11 cycles/8 bytes LDMIA source!, {temp2,temp3} ORR temp1, temp1, temp2, LSL #16 MOV temp2, temp2, LSR #16 ORR temp2, temp2, temp3, LSL #16 STMIA dest!, {temp1, temp2} SUBS count, count, #8 MOV temp1, temp3, LSR #16 BGE HWORD8 HWORD4 ; 3-7 cycles/4 bytes ADDS count, count, #4 BLT HWORD_BYTES LDR temp2, [source], #4 ORR temp1, temp1, temp2, LSL #16 STR temp1, [dest], #4 MOV temp1, temp2, LSR #16 HWORD_BYTES ; 5-11 cycles/1-3 bytes ADDLTS count, count, #4 BEQ HWORD_BYTES_EXIT ; On zero, Return to caller CMP count, #2 STRLTB temp1, [dest], #1 LDRGTB temp2, [source], #1 STRGEH temp1, [dest], #2 STRGTB temp2, [dest], #1 HWORD_BYTES_EXIT IF Interworking :LOR: Thumbing LDMIA sp!, {dest, temp2, temp3, lr} BX lr ELSE LDMIA sp!, {dest, temp2, temp3, pc} ENDIF ; ; Unaligned Moves ; UNALIGNED TST source, #2 BEQ UNALIGNED1 UNALIGNED3 ; 3-4 cycles LDRB temp1, [source], #1 SUBS count, count, #32 BLT OFFTHREE8_TST OFFTHREE32 ; 35 cycles/32 bytes LDMIA source!, {temp2,temp3,temp4,lr} ORR temp1, temp1, temp2, LSL #8 MOV temp2, temp2, LSR #24 ORR temp2, temp2, temp3, LSL #8 MOV temp3, temp3, LSR #24 ORR temp3, temp3, temp4, LSL #8 MOV temp4, temp4, LSR #24 ORR temp4, temp4, lr, LSL #8 STMIA dest!, {temp1,temp2,temp3,temp4} ; Store bytes 1-16 MOV temp1, lr, LSR #24 LDMIA source!, {temp2,temp3,temp4,lr} ORR temp1, temp1, temp2, LSL #8 MOV temp2, temp2, LSR #24 ORR temp2, temp2, temp3, LSL #8 MOV temp3, temp3, LSR #24 ORR temp3, temp3, temp4, LSL #8 MOV temp4, temp4, LSR #24 ORR temp4, temp4, lr, LSL #8 STMIA dest!, {temp1,temp2,temp3,temp4} ; Store bytes 17-32 SUBS count, count, #32 MOV temp1, lr, LSR #24 BGE OFFTHREE32 OFFTHREE8_TST ADDS count, count, #24 BLT OFFTHREE4 OFFTHREE8 ; 11 cycles/8 bytes LDMIA source!, {temp2,temp3} ORR temp1, temp1, temp2, LSL #8 MOV temp2, temp2, LSR #24 ORR temp2, temp2, temp3, LSL #8 STMIA dest!, {temp1, temp2} SUBS count, count, #8 MOV temp1, temp3, LSR #24 BGE OFFTHREE8 OFFTHREE4 ; 3-7 cycles/4 bytes ADDS count, count, #4 BLT OFFTHREE_BYTES LDR temp2, [source], #4 ORR temp1, temp1, temp2, LSL #8 STR temp1, [dest], #4 MOV temp1, temp2, LSR #24 OFFTHREE_BYTES ; 5-12 cycles/ 1-3 bytes ADDLTS count, count, #4 BEQ OFFTHREE_EXIT ; On zero, Return to caller CMP count, #2 LDRGEH temp2, [source], #2 STRB temp1, [dest], #1 STRGEB temp2, [dest], #1 MOVGT temp2, temp2, LSR #8 STRGTB temp2, [dest], #1 OFFTHREE_EXIT IF Interworking :LOR: Thumbing LDMIA sp!, {dest, temp2, temp3, lr} BX lr ELSE LDMIA sp!, {dest, temp2, temp3, pc} ; On zero, Return to caller ENDIF ; ; Source is one byte from word alignment. ; Read a byte & half word then multiple words and a byte. Then ; shift and ORR them into consecutive words for STM writes UNALIGNED1 ; 5-6 cycles LDRB temp1, [source], #1 LDRH temp2, [source], #2 SUBS count, count, #32 ORR temp1, temp1, temp2, LSL #8 BLT OFFONE8_TST OFFONE32 ; 35 cycles/32 bytes LDMIA source!, {temp2, temp3, temp4, lr} ORR temp1, temp1, temp2, LSL #24 MOV temp2, temp2, LSR #8 ORR temp2, temp2, temp3, LSL #24 MOV temp3, temp3, LSR #8 ORR temp3, temp3, temp4, LSL #24 MOV temp4, temp4, LSR #8 ORR temp4, temp4, lr, LSL #24 STMIA dest!, {temp1,temp2,temp3,temp4} ; Store bytes 1-16 MOV temp1, lr, LSR #8 LDMIA source!, {temp2,temp3,temp4,lr} ORR temp1, temp1, temp2, LSL #24 MOV temp2, temp2, LSR #8 ORR temp2, temp2, temp3, LSL #24 MOV temp3, temp3, LSR #8 ORR temp3, temp3, temp4, LSL #24 MOV temp4, temp4, LSR #8 ORR temp4, temp4, lr, LSL #24 STMIA dest!, {temp1,temp2,temp3,temp4} ; Store bytes 17-32 SUBS count, count, #32 MOV temp1, lr, LSR #8 BGE OFFONE32 OFFONE8_TST ADDS count, count, #24 BLT OFFONE4 OFFONE8 ; 11 cycles/8 bytes LDMIA source!, {temp2,temp3} ORR temp1, temp1, temp2, LSL #24 MOV temp2, temp2, LSR #8 ORR temp2, temp2, temp3, LSL #24 STMIA dest!, {temp1,temp2} SUBS count, count, #8 MOV temp1, temp3, LSR #8 BGE OFFONE8 OFFONE4 ; 3-9 cycles/4 bytes ADDS count, count, #4 BLT OFFONE_BYTES LDR temp2, [source], #4 ORR temp1, temp1, temp2, LSL #24 STR temp1, [dest], #4 BEQ OFFONE_EXIT MOV temp1, temp2, LSR #8 OFFONE_BYTES ; 11 cycles/1-3 bytes ADDLTS count, count, #4 BEQ OFFONE_EXIT CMP count, #2 STRLTB temp1, [dest], #1 STRGEH temp1, [dest], #2 MOVGT temp1, temp1, LSR #16 STRGTB temp1, [dest], #1 OFFONE_EXIT IF Interworking :LOR: Thumbing LDMIA sp!, {dest, temp2, temp3, lr} BX lr ELSE LDMIA sp!, {dest, temp2, temp3, pc} ; Return to caller ENDIF BYTEMOVE4 ; 12 cycles/4 bytes CMP count, #4 BLT MMOVEXIT LDRB temp1, [source], #1 SUB count, count, #4 LDRB temp2, [source], #1 LDRB temp3, [source], #1 LDRB lr, [source], #1 STRB temp1, [dest], #1 STRB temp2, [dest], #1 STRB temp3, [dest], #1 STRB lr, [dest], #1 MMOVEXIT ; 2-5 cycles CMP count, #0 IF Interworking :LOR: Thumbing LDMEQIA sp!, {dest, temp2, temp3, lr} BXEQ lr ELSE LDMEQIA sp!, {dest, temp2, temp3, pc} ; On zero, Return to caller ENDIF ; ; Store last 3 or so bytes and exit ; BYTEMOVE ; 4-7 cycles/1 byte LDRB temp1, [source], #1 CMP count, #2 STRB temp1, [dest], #1 BLT BYTEMOVE_EXIT LDRGEB temp2, [source], #1 ; 8 cycles/1-2 bytes LDRGTB temp3, [source], #1 STRGEB temp2, [dest], #1 STRGTB temp3, [dest], #1 BYTEMOVE_EXIT IF Interworking :LOR: Thumbing LDMIA sp!, {dest, temp2, temp3, lr} BX lr ELSE LDMIA sp!, {dest, temp2, temp3, pc} ; Return to caller ENDIF ENTRY_END memmove END