Monday, August 5, 2013

IE_tclib.S

/*
 *****************************************************************
 *name:         tclib.S
 *author:       Samuel Igwe
 *date:         08/04/2013
 *description:  my thumb2 library. asm optimized generic routines
 *
 *notes:        from ARM technical reference manual
 *              instructions are conditional
 *              eq/ne (eq = 0, ne = not 0)
 *              ge/gt/le/lt
 *              memory addressing formats ldr(s)/str
 *              pre-index with writeback   ldr r0, [r1,#4]!
 *                              data = mem[base+offset] 
 *                      base addr reg= base+offset
 *              pre-index                  ldr r0, [r1,#4]
 *                              data = mem[base+offset]
 *                      base addr reg= not updated
 *              post-index                 ldr r0, [r1],#4
 *                              data = mem[base]
 *                      base addr reg= base+offset 
 *
 *              #4 above could be replaced by another register
 *****************************************************************
 */





.thumb
.text
.syntax unified

.align  2
.global tclib_cinit
.thumb_func
.equ    SRAM_TOP, (0x20000000 + (20*1024))
tclib_cinit:
ldr     sp, =SRAM_TOP
mov     pc, lr



/*
 *****************************************************************
 *description:  software divider. meanth to be called from asm
 *              takes 
 *inputs:       r0 = dividend
 *              r1 = divisor
 *outputs:      r0 = quotient
 *              r1 = remainder
 *output:       0 on error 1 on success
 *****************************************************************
 */
.align  2
.global tclib_softdiv
.thumb_func
tclib_softdiv:
push    {r2, lr}

mov     r2, r0                                  /*put r0 in r2*/
mov     r0, #0                                  /*zero out quotient*/

tclib_softdiv_loop:
cmp     r2, r1
blt     tclib_softdiv_exit
sub     r2, r2, r1
add     r0, r0, #1
b tclib_softdiv_loop

tclib_softdiv_exit:
mov     r1, r2                                  /*r1 remainder*/

pop     {r2, lr}
mov     pc, lr





/*
 *****************************************************************
 *description:  acquire semaphore
 *input:        char *ptrWord   = pointer to memory word
 *output:       0 on error 1 on success
 *void
 *tclib_acquire_semaphore(unsigned char *ptrWord)
 *****************************************************************
 */
.align  2
.global tclib_acquire_semaphore
/*
 *****************************************************************
 *r0 = ptrWord
 *****************************************************************
 */
.thumb_func
tclib_acquire_semaphore:
push    {r1-r2}


mov     r1, #1
mov     r2, r0

ldrexb  r0, [r2]                        /*get value*/
cmp     r0, #0
eorne   r0, #1
bne     tclib_get_mutex_exit

strexb  r0, r1, [r2]
eor     r0, #1

tclib_get_mutex_exit:
clrex

pop     {r1-r2}
mov     pc, lr





/*
 *****************************************************************
 *description:  release semaphore
 *input:        char *ptrWord   = pointer to memory word
 *output:       0 on error 1 on success
 *void
 *tclib_release_semaphore(unsigned char *ptrWord)
 *****************************************************************
 */
.align  2
.global tclib_release_semaphore
/*
 *****************************************************************
 *r0 = ptrWord
 *****************************************************************
 */
.thumb_func
tclib_release_semaphore:
push    {r1-r2}


mov     r1, #0
mov     r2, r0

ldrexb  r0, [r2]                        /*get value*/
cmp     r0, #0
eorne   r0, #1
beq     tclib_release_mutex_exit

strexb  r0, r1, [r2]
eor     r0, #1

tclib_release_mutex_exit:
clrex

pop     {r1-r2}
mov     pc, lr





/*
 *****************************************************************
 *description:  sets a string buffer to a certain value
 *input:        char *ptrString =pointer to string
 *              int  wdValue    =value
 *              int  wdSize     =maximum length of string
 *void
 *tclib_memset(unsigned char *ptrString, int wdValue, int wdSize)
 *{
 *while ((wdSize--) >0)
 *       *(ptrString++) = (char)wdValue;
 *}
 *****************************************************************
 */
.align  2
.global tclib_memset
/*
 *****************************************************************
 *r0 = ptrString
 *r1 = wdValue
 *r2 = wdSize
 *r3 = scratch
 *****************************************************************
 */
.thumb_func
tclib_memset:
push    {r0-r3, lr}

mov     r3, r1, lsl #8
orr     r1, r1, r3                      /*put byte into word*/
mov     r3, r1, lsl #16
orr     r1, r1, r3                      /*put word into dword*/

/*
 *****************************************************************
 *the instructions below
 orr     r1, r1, lsl #8                 
 orr     r1, r1, lsl #16
 *
 *generate the error
 tclib/tclib.S: Assembler messages:
 tclib/tclib.S:203: Error: garbage following instruction -- `orr r1,r1,lsl#8'
 tclib/tclib.S:204: Error: garbage following instruction -- `orr r1,r1,lsl#16'
 *under arm-linux-gnueabi-gcc version 4.4.5
 *so I revised it to the instructions above this comment
 *****************************************************************
 */

andS    r3, r0, #0x3                    /*determine dword alignment*/
beq     tclib_memset_loop               /*take action if aligned*/


        tclib_memset_align:
        strb    r1, [r0], #1            /*store one byte - autoincr*/
        subS    r2, r2, #1              /*decrement counter*/
        popeq   {r0-r3, lr}             /*exit*/
        moveq   pc, lr                  /*exit*/

        andS    r3, r0, #0x3            /*determine dword alignment*/
        bne     tclib_memset_align


        tclib_memset_loop:              /*optimization*/
        cmp     r2, #4                  /*r2 >= 4*/
        blt     tclib_memset_align
                                        /*else*/
        str     r1, [r0], #4            /*dword*/
        subS    r2, #4                  /*decrement counter*/
        bne     tclib_memset_loop      
        pop     {r0-r3, lr}             /*exit*/
        mov     pc, lr                  /*exit*/




/*
 *****************************************************************
 *description:  copy from source to destination buffer
 *input:        char *ptrDstStr =pointer to dst string
 *              char *ptrSrcStr =pointer to src string
 *              int  wdSize     =max length of string
 *void
 *tclib_memcpy(unsigned char *ptrDstStr,\
 *              unsigned char *ptrSrcStr, \
 *              int wdSize)
 *{
 *while ((wdSize--) >0)
 *       *(ptrDstStr++) = *(ptrSrcStr++);
 *}
 *****************************************************************
 */
.align  2
.global tclib_memcpy
/*
 *****************************************************************
 *r0 = ptrDst
 *r1 = ptrSrc
 *r2 = wdSize
 *r3 = scratch
 *****************************************************************
 */
.thumb_func
tclib_memcpy:
push    {r0-r3, lr}

andS    r3, r0, #0x3                    /*check if dst aligned*/                        
bne     tclib_memcpy_bcopy
andS    r3, r1, #0x3                    /*check if src aligned*/
bne     tclib_memcpy_bcopy


        tclib_memcpy_loop:              /*optimization*/
        cmp     r2, #4                  /*check if size >= 4*/
        blt     tclib_memcpy_bcopy
                                        /*else*/
        ldr     r3, [r1], #4            /*get from src*/
        str     r3, [r0], #4            /*put in   dst*/
        subS    r2, #4                  /*decrement counter*/
        bne     tclib_memcpy_loop
        pop     {r0-r3, lr}             /*exit*/
        mov     pc, lr                  /*exit*/


        tclib_memcpy_bcopy:
        ldrb    r3, [r1], #1            /*get from src*/
        strb    r3, [r0], #1            /*put in   dst*/
        subS    r2, #1                  /*decrement counter*/
        bne     tclib_memcpy_bcopy
        pop     {r0-r3, lr}             /*exit*/
        mov     pc, lr                  /*exit*/





/*
 *****************************************************************
 *description:  ascii to integer
 *input:        char *ptrAscStr =pointer to string
 *output:       int             =value
 *int
 *tclib_atoi(unsigned char *ptrAscStr)
 *{
 *unsigned char     *ptrTemp;
 *volatile int wdValue,wdWeight,wdTemp;
 *
 *wdValue =0;
 *wdWeight=1;
 *ptrTemp =ptrAscStr;
 *
 *while ((*ptrTemp) != NULL)
 *       ptrTemp++;
 *
 *while ((--ptrTemp) >= ptrAscStr)
 *       {
 *       wdTemp = *ptrTemp;
 *       if (wdTemp >= '0' && wdTemp <='9')
 *               wdTemp-= '0';
 *       else
 *               {
 *               if (wdTemp >='a' && wdTemp <='f')
 *                       {
 *                       wdTemp-= 'a';
 *                       wdTemp+= 10;
 *                       }
 *               else
 *                       {
 *                       if (wdTemp >= 'A' && wdTemp <= 'F')
 *                               {
 *                               wdTemp-= 'A';
 *                               wdTemp+= 10;
 *                               }
 *                       else
 *                               return 0;
 *                       }
 *               }
 *
 *       wdValue  += wdTemp * wdWeight;
 *       wdWeight<<= 4;
 *       }
 *
 *return wdValue;
 *}
 *****************************************************************
 */
.align  2
.global tclib_atoi
/*
 *****************************************************************
 *r0 = ptrString
 *returns r0
 *r1 = r0 - address
 *r2 = scratch - wdValue
 *r3 = scratch - wdWeight
 *r4 = scratch
 *****************************************************************
 */
.thumb_func
tclib_atoi:
push    {r1-r4, lr}

mov     r1, r0                          /*to be return value*/
bl      tclib_strlen                    /*of strlen()*/
cmp     r0, #0                          /*is it zero?*/
pop     {r1-r4, lr}
moveq   pc, lr

mov     r3, #1                          /*init wdWeight*/
mov     r2, #0                          /*zero out*/


        tclib_atoi_loop:
        subS    r0, r0, #1              /*decrement loop*/
        movlt   r0, r2                  /*copy out arg when done*/
        poplt   {r1-r4, lr}
        movlt   pc, lr

        ldrb    r4, [r1,r0]             /*preindex without update*/
        and     r4, r4, #0x0ff          /*clear upper bits*/

        cmp     r4, #'0'
        movlt   r0, #0                  /*zero out before exiting*/
        poplt   {r1-r4, lr}             /*exit if less than '0'*/
        movlt   pc, lr                  /*exit*/

        cmp     r4, #'9'
        bgt     tclib_atoi_lcase
        sub     r4, r4, #'0'
        mla     r2, r4, r3, r2          /*r2=(r3*r4)+r2*/
        mov     r3, r3, lsl #4          /*adjust 16^x*/
        b       tclib_atoi_loop


        tclib_atoi_lcase:
        cmp     r4, #'a'
        blt     tclib_atoi_ucase
        cmp     r4, #'f'
        movgt   r0, #0                  /*zero out before exiting*/
        popgt   {r1-r4, lr}             /*exit if greater than 'f'*/
        movgt   pc, lr                  /*exit*/

        sub     r4, r4, #'a'
        add     r4, r4, #10
        mla     r2, r4, r3, r2          /*r2=(r3*r4)+r2*/
        mov     r3, r3, lsl #4
        b       tclib_atoi_loop

                
        tclib_atoi_ucase:
        cmp     r4, #'A'
        movlt   r0, #0                  /*zero out before exiting*/
        poplt   {r1-r4, lr}
        movlt   pc, lr

        cmp     r4, #'F'
        movgt   r0, #0                  /*zero out before exiting*/
        popgt   {r1-r4, lr}             /*exit if greater than 'F'*/
        movgt   pc, lr                  /*exit*/

        sub     r4, r4, #'A'
        add     r4, r4, #10
        mla     r2, r4, r3, r2          /*r2=(r3*r4)+r2*/
        mov     r3, r3, lsl #4
        b       tclib_atoi_loop
        




/*
 *****************************************************************
 *description:  integer to ascii. convert nibbles
 *input:        int     wdValue     =value
 *              char    *ptrAscStr  =pointer to string
 *void
 *tclib_itoa(int wdValue, unsigned char *ptrAscStr)
 *{
 *volatile int wdIndex,wdTemp;
 *
 *if (wdValue < 0)
 *       wdIndex =8;
 *else
 *       {
 *       if (wdValue < (1 << 8))
 *               wdIndex =2;
 *       else
 *               {
 *               if (wdValue < (1 << 16))
 *                       wdIndex =4;
 *               else
 *                       {
 *                       if (wdValue < (1 << 24))
 *                               wdIndex =6;
 *                       else
 *                               wdIndex =8;
 *                       }
 *               }
 *       }
 *
 *ptrAscStr[wdIndex--] =NULL;
 *while (wdIndex >=0)
 *       {
 *       wdTemp   = wdValue;
 *       wdTemp  &= 0x0f;
 *       wdValue>>= 4;
 *
 *       if (wdTemp >=0 && wdTemp < 10)
 *               ptrAscStr[wdIndex--] = wdTemp +'0';
 *       else
 *               {
 *               wdTemp -=10;
 *               ptrAscStr[wdIndex--] = wdTemp +'A';     
 *               }
 *       }
 *}
 *****************************************************************
 */
.align  2
.global tclib_itoa
/*
 *****************************************************************
 *r0 = value
 *r1 = ptrString
 *r2 = scratch - index
 *r3 = scratch
 *r4 = 1
 *****************************************************************
 */
.thumb_func
tclib_itoa:
push    {r0-r4, lr}

mov     r4, #1
mov     r3, r4, lsl #8                  /*(wdValue< (1<<8))*/
cmp     r0, r3
movlo   r2, #2
blo     tclib_itoa_prep_loop

mov     r3, r4, lsl #16                 /*(wdValue< (1<<16))*/
cmp     r0, r3
movlo   r2, #4
blo     tclib_itoa_prep_loop

mov     r3, r4, lsl #24                 /*(wdValue< (1<<24))*/
cmp     r0, r3
movlo   r2, #6
movhs   r2, #8


tclib_itoa_prep_loop:
add     r1, r1, r2                      /*setup addresses*/
mov     r3, #0
strb    r3, [r1],#-1                    /*nullify string*/


        tclib_itoa_loop:
        sub     r2, r2, #1              /*decrement counter*/
        cmp     r2, #0
        poplt   {r0-r4, lr}             /*exit*/
        movlt   pc, lr                  /*exit*/

        mov     r3, r0                  /*save r0 temporarily*/
        mov     r0, r0, lsr #4          /*new value to work on*/
        and     r3, r3, #0x0f           /*nibble*/

        cmp     r3, #10                 /*prepare to adjust*/
        addlt   r3, r3, #'0'
        subge   r3, r3, #10
        addge   r3, r3, #'A'
        
        strb    r3, [r1],#-1            /*store with auto-decr*/
        b       tclib_itoa_loop





/*
 *****************************************************************
 *description:  word swap
 *input:        int wdValue = value 
 *output:       int
 *int
 *tclib_word_swap(int wdValue)
 *{
 *unsigned int dwTemp;
 *
 *(dwTemp = 0;
 *dwTemp|= (wdValue << 24) & 0xff000000;
 *dwTemp|= (wdValue << 8 ) & 0x00ff0000;
 *dwTemp|= (wdValue >> 24) & 0x0ff;
 *dwTemp|= (wdValue >> 8 ) & 0x0ffff;
 *
 *return dwTemp;
 *}
 *****************************************************************
 */
.align  2
.global tclib_swap
/*
 *****************************************************************
 *r0 = value
 *returns r0 = word swapped value
 *r1 = scratch
 *r2 = scratch
 *r3 = 0x0ff
 *****************************************************************
 */
.thumb_func
tclib_swap:
push    {r1, lr}

mov     r1, r0                          /*safe keep - r0 = return*/
rev     r0, r1                          /*theres a single thumb ins*/
                                        /*for this now*/
pop     {r1, lr}                        /*exit*/
mov     pc, lr                          /*exit*/





/*
 *****************************************************************
 *description:  byte swap
 *input:        int wdValue = value 
 *output:       int
 *int
 *tclib_byte_swap(int wdValue)
 *{
 *unsigned int dwTemp;
 *
 *dwTemp = 0;
 *dwTemp|= (wdValue << 8 ) & 0xff00;
 *dwTemp|= (wdValue >> 8 ) & 0x0ff;
 *
 *return dwTemp;
 *}
 *****************************************************************
 */
.align  2
.global tclib_bswap
/*
 *****************************************************************
 *r0 = value
 *returns r0 = byte swapped value
 *r1 = scratch
 *r2 = scratch old value
 *****************************************************************
 */
.thumb_func
tclib_bswap:
push    {r1, lr}

mov     r1, r0
rev     r0, r1                          /*save original value*/
mov     r0, r0, lsl #16

pop     {r1, lr}                        /*exit*/
mov     pc, lr                          /*exit*/
 




/*
 *****************************************************************
 *description:  string len computation function
 *inputs:       char *ptrStrSrc
 *output:       int  = lenght of string
 *int
 *tclib_strlen(unsigned char *ptrStrSrc)
 *{
 *unsigned int wdCount;
 *for (wdCount=0; ptrStrSrc[wdCount]!=NULL; wdCount++)
 *      ;
 *
 *return wdCount;
 *}
 *****************************************************************
 */
.align  2
.global tclib_strlen
/*
 *****************************************************************
 *r0 = ptrString
 *returns r0 = byte length
 *r1 = ptrString
 *r2 = scratch
 *****************************************************************
 */
.thumb_func
tclib_strlen:
push    {r1-r2, lr}

mov     r1, r0                          /*put pointer in r1*/
mov     r0, #0                          /*zero out r0*/


        tclib_strlen_loop:
        ldrb    r2, [r1], #1            
        andS    r2, r2, #0x0ff          /*clear lower bits*/
            
        addne   r0, r0, #1              /*track strlen results*/

        popeq   {r1-r2, lr}             /*exit*/
        moveq   pc, lr                  /*exit*/

        b       tclib_strlen_loop              





/*
 *****************************************************************
 *description:  string comparison 
 *inputs:       char *ptrStrSrc=string 1
 *              char *ptrStrDst=string 2
 *              int  wdCount   =number of characters to compare 
 *output:       int = 0 on success failing index on error
 *int
 *tclib_strcmp(unsigned char *ptrStrSrc,\
 *             unsigned char *ptrStrDst,\
 *             int wdCount)
 *{
 *unsigned int wdIndex;
 *
 *for(wdIndex=0; wdIndex <wdCount; wdIndex++)
 *      {
 *      if (ptrStrSrc[wdIndex] == NULL || \
 *           ptrStrDst[wdIndex] == NULL || \
 *          (ptrStrSrc[wdIndex] - ptrStrDst[wdIndex]) !=0)
 *              return wdIndex;
 *      }
 *
 *return 0;          
 *}
 *****************************************************************
 */
.align  2
.global tclib_strcmp
/*
 *****************************************************************
 *r0 = ptrDst
 *r1 = ptrSrc
 *returns r0 strcmp value
 *r1 = src1
 *r2 = src2
 *r3 = scratch
 *r4 = scratch
 *r0 = cmp
 *****************************************************************
 */
.thumb_func
tclib_strcmp:
push    {r1-r4, lr}

mov     r2, r0                          /*save src1 in r2*/
mov     r0, #0                          /*zero out r0*/


        tclib_strcmp_loop:
        ldrb    r3, [r1], #1            
        andS    r3, r3, #0x0ff          /*clear upper bits*/

        ldrb    r4, [r2], #1
        andS    r4, r4, #0x0ff          /*clear upper bits*/

        cmp     r3, r4
        addeq   r0, r0, #1              /*track strcmp results*/
        beq     tclib_strcmp_cmp_zero
        popne   {r1-r4, lr}             /*exit*/
        movne   pc, lr                  /*exit*/
        
        
        tclib_strcmp_cmp_zero:
        orrS    r4, r3, r4              /*are both zero?*/
        moveq   r0, #0                  /*return 0*/

        popeq   {r1-r4, lr}             /*exit*/
        moveq   pc, lr                  /*exit*/

        b       tclib_strcmp_loop





.end

No comments:

Post a Comment