PowerPC bn_div_words routine rewrite

David Ho DavidHo at nanometrics.ca
Thu Jun 30 01:50:36 EST 2005


Hi all,

This is a rewrite of the bn_div_words routine for the PowerPC arch, tested
on a MPC8xx processor.
I initially thought there is maybe a small mistake in the code that
requires a one-liner change but it turns out I have to redo the routine.
I guess this routine is not called very often as I see that most other
routines are hand-crafted, whereas this routine is compiled from a C
function that apparently has not gone through a whole lot of testing.

I wrote a C function to confirm correctness of the code.

unsigned long div_words (unsigned long h,
                         unsigned long l,
                         unsigned long d)
{
  unsigned long i_h; /* intermediate dividend */
  unsigned long i_q; /* quotient of i_h/d */
  unsigned long i_r; /* remainder of i_h/d */

  unsigned long i_cntr;
  unsigned long i_carry;

  unsigned long ret_q; /* return quotient */

  /* cannot divide by zero */
  if (d == 0) return 0xffffffff;

  /* do simple 32-bit divide */
  if (h == 0) return l/d;

  i_q = h/d;
  i_r = h - (i_q*d);
  ret_q = i_q;

  i_cntr = 32;

  while (i_cntr--)
  {
    i_carry = (l & 0x80000000) ? 1:0;
    l = l << 1;

    i_h = (i_r << 1) | i_carry;
    i_q = i_h/d;
    i_r = i_h - (i_q*d);

    ret_q = (ret_q << 1) | i_q;
  }

  return ret_q;
}


Then I handcrafted the routine in PPC assembly.
The result is a 26 line assembly that is easy to understand and predictable
as opposed to a 81liner that I am still trying to decipher...
If anyone is interested in incorporating this routine to the openssl code
I'll be happy to assist.
At this point I think I will be taking a bit of a break from this 3 day
debugging/fixing marathon.

Regards,
David Ho


#
#       Handcrafted version of bn_div_words
#
#       r3 = h
#       r4 = l
#       r5 = d

        cmplwi  0,r5,0                  # compare r5 and 0
        bc      BO_IF_NOT,CR0_EQ,.Lppcasm_div1  # proceed if d!=0
        li      r3,-1                   # d=0 return -1
        bclr    BO_ALWAYS,CR0_LT
.Lppcasm_div1:
        cmplwi  0,r3,0                  # compare r3 and 0
        bc      BO_IF_NOT,CR0_EQ,.Lppcasm_div2  # proceed if h != 0
        divwu   r3,r4,r5                # ret_q = l/d
        bclr    BO_ALWAYS,CR0_LT        # return result in r3
.Lppcasm_div2:
        divwu   r9,r3,r5                # i_q = h/d
        mullw   r10,r9,r5               # i_r = h - (i_q*d)
        subf    r10,r10,r3
        mr      r3,r9                   # req_q = i_q
.Lppcasm_set_ctr:
        li      r12,32                  # ctr = bitsizeof(d)
        mtctr   r12
.Lppcasm_div_loop:
        addc    r4,r4,r4                # l = l << 1 -> i_carry
        adde    r11,r10,r10             # i_h = (i_r << 1) | i_carry
        divwu   r9,r11,r5               # i_q = i_h/d
        mullw   r10,r9,r5               # i_r = i_h - (i_q*d)
        subf    r10,r10,r11
        add     r3,r3,r3                # ret_q = ret_q << 1 | i_q
        add     r3,r3,r9
        bc      BO_dCTR_NZERO,CR0_EQ,.Lppcasm_div_loop
.Lppc_div_end:
        bclr    BO_ALWAYS,CR0_LT        # return result in r3
        .long   0x00000000




More information about the Linuxppc-embedded mailing list