Current egcs, binutils and kernel (fwd)
Gabriel Paubert
paubert at iram.es
Wed Apr 21 03:45:28 EST 1999
On Tue, 20 Apr 1999, Geert Uytterhoeven wrote:
>
> ---------- Forwarded message ----------
> Date: Tue, 20 Apr 1999 13:15:41 +0200
> From: Reinhard Nissl <rnissl at gmx.de>
> To: Geert Uytterhoeven <Geert.Uytterhoeven at cs.kuleuven.ac.be>
> Cc: "linux-apus at sunsite.auc.dk" <linux-apus at sunsite.auc.dk>
> Subject: Re: Current egcs, binutils and kernel
>
> Hi,
>
> Geert Uytterhoeven wrote:
>
> > On Wed, 14 Apr 1999, Reinhard Nissl wrote:
> > > has anyone had success in compiling (egcs-1.1.2 and binutils-2.9.1.0.23)
> > > the current APUS kernel with support for network block devices (nbd.c)?
> > >
> > > I get an undefined reference to __lshrdi3 from nbd_ioctl(), which looks
> > > like a compiler / binutils bug.
> >
> > Hence a __lshrdi3() routine needs to be added to arch/ppc/kernel/misc.S.
>
> I had a look into misc.S and found similar routines (__ashrdi3) there. Then I
> searched in the egcs-1.1.2 sources for files, where such functions are
> referenced. I found definitions in egcs-1.1.2/gcc/config/rs6000/rs6000.md but
> they are not native ppc assembler instructions. As I'm not that much used to
> *.md files and ppc assembly code, I'm currently not able to define the missing
> function in misc.S myself.
>
> I checked the kernel source diffs from version 2.2.4 to 2.2.6 for lshrdi3 and
> had only success for arch=sparc. So, is there anybody who can add the missing
> function to misc.S for arch=ppc?
I'd suggest the following patch. Note that the current versions of the
long long shifts will not work when the shift count is > 32. There is an
appendix in all good PPC manuals on how to do multiple precision shifts
and I've followed it (except for the exact order for better superscalar
issue/execution and completion, all the code should flow perfectly
through 2 pipes) with one exception: the arithmetic right
shift is one instruction longer but is branchless (conditional clear
of a register using a shift whose amount is computed by an rlwinm
instruction).
I've also fixed a few other oddities in the code:
- atomic_dec_and_test uses cntlzw the way God intended to evaluate
`(x==0) ? 1 : 0' without any branch
- the abs function is also branchless now (it would nevertheless be better
to use the __builtin_abs function of GCC)
Greetings,
Gabriel.
--- linux-2.2.6/arch/ppc/kernel/misc.S Thu Mar 11 05:30:32 1999
+++ linux/arch/ppc/kernel/misc.S Tue Apr 20 20:14:03 1999
@@ -228,10 +228,8 @@
subi r5,r5,1 /* Perform 'add' operation */
stwcx. r5,0,r3 /* Update with new value */
bne- 10b /* Retry if "reservation" (i.e. lock) lost */
- cmpi 0,r5,0 /* Return 'true' IFF 0 */
- li r3,1
- beqlr
- li r3,0
+ cntlzw r3,r5 /* Return 'true' IFF 0 */
+ srwi r3,r3,5 /* But do it the clever way */
blr
_GLOBAL(atomic_clear_mask)
10: lwarx r5,0,r4
@@ -355,38 +353,59 @@
blr
/*
- * Extended precision shifts
+ * Extended precision shifts.
+ *
+ * Updated to be valid for shift counts from 0 to 63 inclusive.
+ * -- Gabriel
*
* R3/R4 has 64 bit value
* R5 has shift count
* result in R3/R4
*
- * ashrdi3: XXXYYY/ZZZAAA -> SSSXXX/YYYZZZ
- * ashldi3: XXXYYY/ZZZAAA -> YYYZZZ/AAA000
+ * ashrdi3: arithmetic right shift (sign propagation)
+ * lslhdi3: logical right shift
+ * ashldi3: left shift
*/
_GLOBAL(__ashrdi3)
- li r6,32
- sub r6,r6,r5
- slw r7,r3,r6 /* isolate YYY */
- srw r4,r4,r5 /* isolate ZZZ */
- or r4,r4,r7 /* YYYZZZ */
- sraw r3,r3,r5 /* SSSXXX */
+ subfic r6,r5,32
+ srw r4,r4,r5 # LSW = count > 31 ? 0 : LSW >> count
+ addi r7,r5,32 # could be xori, or addi with -32
+ slw r6,r3,r6 # t1 = count > 31 ? 0 : MSW << (32-count)
+ rlwinm r8,r7,0,32 # t3 = (count < 32) ? 32 : 0
+ sraw r7,r3,r7 # t2 = MSW >> (count-32)
+ or r4,r4,r6 # LSW |= t1
+ slw r7,r7,r8 # t2 = (count < 32) ? 0 : t2
+ sraw r3,r3,r5 # MSW = MSW >> count
+ or r4,r4,r7 # LSW |= t2
blr
-
+
_GLOBAL(__ashldi3)
- li r6,32
- sub r6,r6,r5
- srw r7,r4,r6 /* isolate ZZZ */
- slw r4,r4,r5 /* AAA000 */
- slw r3,r3,r5 /* YYY--- */
- or r3,r3,r7 /* YYYZZZ */
+ subfic r6,r5,32
+ slw r3,r3,r5 # MSW = count > 31 ? 0 : MSW << count
+ addi r7,r5,32 # could be xori, or addi with -32
+ srw r6,r4,r6 # t1 = count > 31 ? 0 : LSW >> (32-count)
+ slw r7,r4,r7 # t2 = count < 32 ? 0 : LSW << (count-32)
+ or r3,r3,r6 # MSW |= t1
+ slw r4,r4,r5 # LSW = LSW << count
+ or r3,r3,r7 # MSW |= t2
+ blr
+
+_GLOBAL(__lshrdi3)
+ subfic r6,r5,32
+ srw r4,r4,r5 # LSW = count > 31 ? 0 : LSW >> count
+ addi r7,r5,32 # could be xori, or addi with -32
+ slw r6,r3,r6 # t1 = count > 31 ? 0 : MSW << (32-count)
+ srw r7,r3,r7 # t2 = count < 32 ? 0 : MSW >> (count-32)
+ or r4,r4,r6 # LSW |= t1
+ srw r3,r3,r5 # MSW = MSW >> count
+ or r4,r4,r7 # LSW |= t2
blr
_GLOBAL(abs)
- cmpi 0,r3,0
- bge 10f
- neg r3,r3
-10: blr
+ srawi r4,r3,31
+ xor r3,r3,r4
+ sub r3,r3,r4
+ blr
_GLOBAL(_get_SP)
mr r3,r1 /* Close enough */
[[ This message was sent via the linuxppc-dev mailing list. Replies are ]]
[[ not forced back to the list, so be sure to Cc linuxppc-dev if your ]]
[[ reply is of general interest. Please check http://lists.linuxppc.org/ ]]
[[ and http://www.linuxppc.org/ for useful information before posting. ]]
More information about the Linuxppc-dev
mailing list