RFC: Reducing the number of non volatile GPRs in the ppc64 kernel
Anton Blanchard
anton at samba.org
Mon Aug 10 14:52:28 AEST 2015
Hi Bill, Segher,
> I agree with Segher. We already know we have opportunities to do a
> better job with shrink-wrapping (pushing this kind of useless
> activity down past early exits), so having examples of code to look
> at to improve this would be useful.
I'll look out for specific examples. I noticed this one today when
analysing malloc(8). It is an instruction trace of _int_malloc().
The overall function is pretty huge, which I assume leads to gcc using
so many non volatiles. Perhaps in this case we should separate out the
slow path into another function marked noinline.
This is just an upstream glibc build, but I'll send the preprocessed
source off list.
Anton
--
0x410d538 mflr r0
0x410d53c li r9,-65
0x410d540 std r14,-144(r1) # 0x0000000fff00efe0
0x410d544 std r15,-136(r1) # 0x0000000fff00efe8
0x410d548 cmpld cr7,r4,r9
0x410d54c std r16,-128(r1) # 0x0000000fff00eff0
0x410d550 std r17,-120(r1) # 0x0000000fff00eff8
0x410d554 std r18,-112(r1) # 0x0000000fff00f000
0x410d558 std r19,-104(r1) # 0x0000000fff00f008
0x410d55c std r20,-96(r1) # 0x0000000fff00f010
0x410d560 std r21,-88(r1) # 0x0000000fff00f018
0x410d564 std r22,-80(r1) # 0x0000000fff00f020
0x410d568 std r23,-72(r1) # 0x0000000fff00f028
0x410d56c std r0,16(r1) # 0x0000000fff00f080
0x410d570 std r24,-64(r1) # 0x0000000fff00f030
0x410d574 std r25,-56(r1) # 0x0000000fff00f038
0x410d578 std r26,-48(r1) # 0x0000000fff00f040
0x410d57c std r27,-40(r1) # 0x0000000fff00f048
0x410d580 std r28,-32(r1) # 0x0000000fff00f050
0x410d584 std r29,-24(r1) # 0x0000000fff00f058
0x410d588 std r30,-16(r1) # 0x0000000fff00f060
0x410d58c std r31,-8(r1) # 0x0000000fff00f068
0x410d590 stdu r1,-224(r1) # 0x0000000fff00ef90
0x410d594 bgt cr7,0x410dda4
0x410d598 addi r9,r4,23
0x410d59c li r16,32
0x410d5a0 cmpldi cr7,r9,31
0x410d5a4 bgt cr7,0x410d700
0x410d5a8 cmpdi cr7,r3,0
0x410d5ac mr r14,r3
0x410d5b0 mr r30,r4
0x410d5b4 beq cr7,0x410ddc0
0x410d5b8 nop
0x410d5bc ld r9,-19136(r2) # 0x0000000004222840
0x410d5c0 rlwinm r29,r16,28,4,31
0x410d5c4 cmpld cr7,r16,r9
0x410d5c8 bgt cr7,0x410d650
0x410d5cc addi r6,r29,-2
0x410d5d0 clrldi r9,r6,32
0x410d5d4 rldicr r10,r9,3,60
0x410d5d8 addi r7,r9,1
0x410d5dc add r10,r3,r10
0x410d5e0 rldicr r7,r7,3,60
0x410d5e4 add r7,r3,r7
0x410d5e8 ld r9,8(r10) # 0x0000000004220ce0
0x410d5ec cmpdi cr7,r9,0
0x410d5f0 beq cr7,0x410d650
0x410d5f4 ld r10,16(r9) # 0x0000000010030010
0x410d5f8 ldarx r15,0,r7,1 # 0x0000000004220ce0
0x410d5fc cmpd r15,r9
0x410d600 bne 0x410d60c
0x410d604 stdcx. r10,0,r7 # 0x0000000004220ce0
0x410d608 bne- 0x410d5f8
0x410d60c isync
0x410d610 cmpld cr7,r15,r9
0x410d614 bne cr7,0x410d648
0x410d618 b 0x410da40
0x410da40 ld r9,8(r15) # 0x0000000010030008
0x410da44 rlwinm r9,r9,28,4,31
0x410da48 addi r9,r9,-2
0x410da4c cmplw cr7,r9,r6
0x410da50 bne cr7,0x410de08
0x410da54 nop
0x410da58 addi r31,r15,16
0x410da5c lwa r9,-19080(r2) # 0x0000000004222878
0x410da60 cmpdi cr7,r9,0
0x410da64 bne cr7,0x410d6e4
0x410da68 addi r1,r1,224
0x410da6c mr r3,r31
0x410da70 ld r0,16(r1) # 0x0000000fff00f080
0x410da74 ld r14,-144(r1) # 0x0000000fff00efe0
0x410da78 ld r15,-136(r1) # 0x0000000fff00efe8
0x410da7c ld r16,-128(r1) # 0x0000000fff00eff0
0x410da80 ld r17,-120(r1) # 0x0000000fff00eff8
0x410da84 ld r18,-112(r1) # 0x0000000fff00f000
0x410da88 ld r19,-104(r1) # 0x0000000fff00f008
0x410da8c ld r20,-96(r1) # 0x0000000fff00f010
0x410da90 ld r21,-88(r1) # 0x0000000fff00f018
0x410da94 ld r22,-80(r1) # 0x0000000fff00f020
0x410da98 ld r23,-72(r1) # 0x0000000fff00f028
0x410da9c ld r24,-64(r1) # 0x0000000fff00f030
0x410daa0 mtlr r0
0x410da70 ld r0,16(r1) # 0x0000000fff00f080
0x410da74 ld r14,-144(r1) # 0x0000000fff00efe0
0x410da78 ld r15,-136(r1) # 0x0000000fff00efe8
0x410da7c ld r16,-128(r1) # 0x0000000fff00eff0
0x410da80 ld r17,-120(r1) # 0x0000000fff00eff8
0x410da84 ld r18,-112(r1) # 0x0000000fff00f000
0x410da88 ld r19,-104(r1) # 0x0000000fff00f008
0x410da8c ld r20,-96(r1) # 0x0000000fff00f010
0x410da90 ld r21,-88(r1) # 0x0000000fff00f018
0x410da94 ld r22,-80(r1) # 0x0000000fff00f020
0x410da98 ld r23,-72(r1) # 0x0000000fff00f028
0x410da9c ld r24,-64(r1) # 0x0000000fff00f030
0x410daa0 mtlr r0
0x410daa4 ld r25,-56(r1) # 0x0000000fff00f038
0x410daa8 ld r26,-48(r1) # 0x0000000fff00f040
0x410daac ld r27,-40(r1) # 0x0000000fff00f048
0x410dab0 ld r28,-32(r1) # 0x0000000fff00f050
0x410dab4 ld r29,-24(r1) # 0x0000000fff00f058
0x410dab8 ld r30,-16(r1) # 0x0000000fff00f060
0x410dabc ld r31,-8(r1) # 0x0000000fff00f068
0x410dac0 blr
More information about the Linuxppc-dev
mailing list