gcc 4.6.3 miscompile on ppc32 (was Re: Regression in kernel 4.12-rc1 for Powerpc 32 - bisected to commit 3448890c32c3)

Al Viro viro at ZenIV.linux.org.uk
Mon Jun 26 06:53:24 AEST 2017


On Sun, Jun 25, 2017 at 12:14:04PM +0100, Al Viro wrote:
> On Sun, Jun 25, 2017 at 10:53:58AM +0100, Al Viro wrote:
> > On Sat, Jun 24, 2017 at 12:29:23PM -0500, Larry Finger wrote:
> > 
> > > I made a break through. If I turn off inline copy to/from users for 32-bit
> > > ppc with the following patch, then the system boots:
> > 
> > OK...  So it's 4.6.3 miscompiling something - it is hardware-independent,
> > reproduced in qemu.  I'd like to get more self-contained example of
> > miscompile, though; should be done by tonight...
> 
> OK, it's the call in rw_copy_check_uvector(); with INLINE_COPY_FROM_USER
> it's miscompiled by 4.6.3.  I hadn't looked through the generated code
> yet; will do that after I grab some sleep.

Confirmed.  It manages to bugger the loop immediately after the (successful)
copying of iovec array in rw_copy_check_uvector(); both with and without
INLINE_COPY_FROM_USER it has (just before the call of copy_from_user()) r27
set to nr_segs * sizeof(struct iovec).  The call is made, we check that it
has succeeded and that's when it hits the fan: without INLINE_COPY_FROM_USER
we have (interleaved with unrelated insns)
        addi 27,27,-8
        srwi 27,27,3
        addi 27,27,1
        mtctr 27
Weird, but manages to pass nr_segs to mtctr.  _With_ INLINE_COPY_FROM_USER we
get this:
        lis 9,0x2000
        mtctr 9
In other words, the loop will try to go through 8192 iterations.  No idea where
that number has come from, but it sure as hell is wrong.  That's where those
-EINVAL, etc. are coming from - we run into something negative in iov[seg].len,
after having run out of on-stack iovec array.

	Assembler generated out of rw_copy_check_uvector() with and without
INLINE_COPY_FROM_USER is attached; it's a definite miscompile.  Neither 4.4.5
nor 6.3.0 use mtctr/bdnz for that loop.

	The bottom line is, ppc cross-toolchain on kernel.org happens to be
the version that miscompiles rw_copy_check_uvector() with INLINE_COPY_FROM_USER
and hell knows what else.  Said that, I would rather have ppc32 drop the
INLINE_COPY_{TO,FROM}_USER anyway; that won't fix any other places where
the same 4.6.3 bug hits, but I seriously suspect that it will end up being
faster even on non^Wless buggy gcc versions.  Could powerpc folks check
what does removing those two defines from arch/powerpc/include/asm/uaccess.h
do to performance?  If there's no slowdown, I would strongly recommend just
removing those as in the patch Larry has posted upthread.

	Fixing whatever it is in gcc 4.6.3 that triggers that behaviour is
IMO pointless - it might make sense to switch kernel.org cross-toolchain to
something more recent, but that's it.
-------------- next part --------------
	.globl rw_copy_check_uvector
	.type	rw_copy_check_uvector, @function
rw_copy_check_uvector:
.LFB2683:
	.loc 1 773 0
	stwu 1,-32(1)	 #,,
.LCFI142:
	mflr 0	 #,
.LCFI143:
	stmw 27,12(1)	 #,
.LCFI144:
	.loc 1 783 0
	mr. 27,5	 # nr_segs, nr_segs
	.loc 1 773 0
	mr 30,3	 # type, type
	stw 0,36(1)	 #,
.LCFI145:
	.loc 1 773 0
	mr 31,4	 # uvector, uvector
	mr 29,8	 # ret_pointer, ret_pointer
	.loc 1 776 0
	mr 28,7	 # iov, fast_pointer
	.loc 1 784 0
	li 0,0	 # ret,
	.loc 1 783 0
	beq- 0,.L495	 #
	.loc 1 792 0
	cmplwi 7,27,1024	 #, tmp160, nr_segs
	.loc 1 793 0
	li 0,-22	 # ret,
	.loc 1 792 0
	bgt- 7,.L495	 #
	.loc 1 796 0
	cmplw 7,27,6	 # fast_segs, tmp161, nr_segs
	ble- 7,.L496	 #
.LBB1538:
.LBB1539:
	.file 21 "./include/linux/slab.h"
	.loc 21 495 0
	lis 4,0x140	 # tmp190,
	slwi 3,27,3	 #, nr_segs,
	ori 4,4,192	 #,, tmp190,
	bl __kmalloc	 #
.LBE1539:
.LBE1538:
	.loc 1 799 0
	li 0,-12	 # ret,
	.loc 1 798 0
	mr. 28,3	 # iov,
	beq- 0,.L495	 #
.L496:
.LBB1540:
.LBB1541:
.LBB1542:
.LBB1543:
	.loc 19 113 0
	lwz 0,1128(2)	 # current.192_185->thread.fs.seg, D.39493
.LBE1543:
.LBE1542:
.LBE1541:
.LBE1540:
	.loc 1 803 0
	slwi 27,27,3	 # n, nr_segs,
.LBB1549:
.LBB1548:
.LBB1547:
.LBB1546:
	mr 5,27	 # n, n
	.loc 19 113 0
	cmplw 7,31,0	 # D.39493, tmp165, uvector
	bgt- 7,.L497	 #
	addi 9,27,-1	 # tmp166, n,
	subf 0,31,0	 # tmp167, uvector, D.39493
	cmplw 7,9,0	 # tmp167, tmp168, tmp166
	bgt- 7,.L497	 #
.LBB1544:
.LBB1545:
	.file 22 "./arch/powerpc/include/asm/uaccess.h"
	.loc 22 305 0
	mr 3,28	 #, iov
	mr 4,31	 #, uvector
	bl __copy_tofrom_user	 #
.LBE1545:
.LBE1544:
	.loc 19 115 0
	mr. 5,3	 # n,
	beq+ 0,.L498	 #
.L497:
	.loc 19 116 0
	subf 3,5,27	 # tmp170, n, n
	li 4,0	 #,
	add 3,28,3	 #, iov, tmp170
	bl memset	 #
	b .L510	 #
.L498:
.LBE1546:
.LBE1547:
.LBE1548:
.LBE1549:
.LBB1550:
	.loc 1 833 0
	lis 9,0x2000	 #,
	.loc 1 828 0
	cmpwi 6,30,0	 #, tmp186, type
	.loc 1 833 0
	lis 6,0x7fff	 # tmp189,
	mtctr 9	 # tmp188,
	.loc 1 829 0
	mr 5,2	 # current.121, current
	li 8,0	 # ivtmp.533,
	li 0,0	 # ret,
	.loc 1 833 0
	ori 6,6,61440	 #, tmp187, tmp189,
.L501:
	.loc 1 819 0
	mr 11,28	 # D.40168, iov
	lwzux 10,11,8	 # MEM[base: iov_4, index: ivtmp.533_176, offset: 0B], buf
	.loc 1 820 0
	lwz 9,4(11)	 # MEM[base: D.40168_211, offset: 4B], len
	.loc 1 824 0
	cmpwi 7,9,0	 #, tmp175, len
	blt- 7,.L508	 #
	.loc 1 828 0
	blt- 6,.L499	 #
	.loc 1 829 0
	lwz 7,1128(5)	 # current.121_33->thread.fs.seg, D.36573
	cmplw 1,10,7	 # D.36573, tmp177, buf
	bgt- 1,.L510	 #
	.loc 1 829 0 is_stmt 0 discriminator 1
	beq- 7,.L499	 #
	.loc 1 829 0 discriminator 4
	addi 4,9,-1	 # tmp179, len,
	subf 10,10,7	 # tmp180, buf, D.36573
	cmplw 7,4,10	 # tmp180, tmp181, tmp179
	bgt- 7,.L510	 #
.L499:
	.loc 1 833 0 is_stmt 1
	subf 10,0,6	 # len, ret, tmp187
	cmpw 7,9,10	 # len, tmp183, len
	ble- 7,.L500	 #
	.loc 1 835 0
	stw 10,4(11)	 # MEM[base: D.40168_211, offset: 4B], len
	mr 9,10	 # len, len
.L500:
	.loc 1 837 0
	add 0,0,9	 # ret, ret, len
	addi 8,8,8	 # ivtmp.533, ivtmp.533,
.LBE1550:
	.loc 1 818 0
	bdnz .L501	 #
	b .L495	 #
.L508:
.LBB1551:
	.loc 1 825 0
	li 0,-22	 # ret,
	b .L495	 #
.L510:
	.loc 1 830 0
	li 0,-14	 # ret,
.L495:
.LBE1551:
	.loc 1 842 0
	addi 11,1,32	 #,,
	.loc 1 840 0
	stw 28,0(29)	 # *ret_pointer_53(D), iov
	.loc 1 842 0
	mr 3,0	 #, ret
	b _restgpr_27_x	 #
.LFE2683:
	.size	rw_copy_check_uvector,.-rw_copy_check_uvector
-------------- next part --------------
	.globl rw_copy_check_uvector
	.type	rw_copy_check_uvector, @function
rw_copy_check_uvector:
.LFB2683:
	.loc 1 773 0
	stwu 1,-32(1)	 #,,
.LCFI142:
	mflr 0	 #,
.LCFI143:
	stmw 27,12(1)	 #,
.LCFI144:
	.loc 1 783 0
	mr. 27,5	 # nr_segs, nr_segs
	.loc 1 773 0
	mr 31,3	 # type, type
	stw 0,36(1)	 #,
.LCFI145:
	.loc 1 773 0
	mr 30,4	 # uvector, uvector
	mr 29,8	 # ret_pointer, ret_pointer
	.loc 1 776 0
	mr 28,7	 # iov, fast_pointer
	.loc 1 784 0
	li 0,0	 # ret,
	.loc 1 783 0
	beq- 0,.L495	 #
	.loc 1 792 0
	cmplwi 7,27,1024	 #, tmp151, nr_segs
	.loc 1 793 0
	li 0,-22	 # ret,
	.loc 1 792 0
	bgt- 7,.L495	 #
	.loc 1 796 0
	cmplw 7,27,6	 # fast_segs, tmp152, nr_segs
	ble- 7,.L496	 #
.LBB1516:
.LBB1517:
	.file 21 "./include/linux/slab.h"
	.loc 21 495 0
	lis 4,0x140	 # tmp175,
	slwi 3,27,3	 #, nr_segs,
	ori 4,4,192	 #,, tmp175,
	bl __kmalloc	 #
.LBE1517:
.LBE1516:
	.loc 1 799 0
	li 0,-12	 # ret,
	.loc 1 798 0
	mr. 28,3	 # iov,
	beq- 0,.L495	 #
.L496:
	.loc 1 803 0
	slwi 27,27,3	 # n, nr_segs,
.LBB1518:
.LBB1519:
	.loc 19 153 0
	mr 3,28	 #, iov
	mr 4,30	 #, uvector
	mr 5,27	 #, n
	bl _copy_from_user	 #
.LBE1519:
.LBE1518:
	.loc 1 804 0
	li 0,-14	 # ret,
	.loc 1 803 0
	cmpwi 7,3,0	 #, tmp156,
	bne- 7,.L495	 #
.LBB1520:
	.loc 1 833 0
	addi 27,27,-8	 # tmp172, n,
	.loc 1 828 0
	cmpwi 6,31,0	 #, tmp168, type
	.loc 1 833 0
	srwi 27,27,3	 # tmp173, tmp172,
	lis 6,0x7fff	 # tmp174,
	addi 27,27,1	 #, tmp173,
	.loc 1 829 0
	mr 5,2	 # current.121, current
	.loc 1 833 0
	mtctr 27	 # tmp170,
	.loc 1 829 0
	li 8,0	 # ivtmp.528,
	li 0,0	 # ret,
	.loc 1 833 0
	ori 6,6,61440	 #, tmp169, tmp174,
.L499:
	.loc 1 819 0
	mr 11,28	 # D.40034, iov
	lwzux 10,11,8	 # MEM[base: iov_4, index: ivtmp.528_176, offset: 0B], buf
	.loc 1 820 0
	lwz 9,4(11)	 # MEM[base: D.40034_183, offset: 4B], len
	.loc 1 824 0
	cmpwi 7,9,0	 #, tmp157, len
	blt- 7,.L505	 #
	.loc 1 828 0
	blt- 6,.L497	 #
	.loc 1 829 0
	lwz 7,1128(5)	 # current.121_33->thread.fs.seg, D.36573
	cmplw 1,10,7	 # D.36573, tmp159, buf
	bgt- 1,.L507	 #
	.loc 1 829 0 is_stmt 0 discriminator 1
	beq- 7,.L497	 #
	.loc 1 829 0 discriminator 4
	addi 4,9,-1	 # tmp161, len,
	subf 10,10,7	 # tmp162, buf, D.36573
	cmplw 7,4,10	 # tmp162, tmp163, tmp161
	bgt- 7,.L507	 #
.L497:
	.loc 1 833 0 is_stmt 1
	subf 10,0,6	 # len, ret, tmp169
	cmpw 7,9,10	 # len, tmp165, len
	ble- 7,.L498	 #
	.loc 1 835 0
	stw 10,4(11)	 # MEM[base: D.40034_183, offset: 4B], len
	mr 9,10	 # len, len
.L498:
	.loc 1 837 0
	add 0,0,9	 # ret, ret, len
	addi 8,8,8	 # ivtmp.528, ivtmp.528,
.LBE1520:
	.loc 1 818 0
	bdnz .L499	 #
	b .L495	 #
.L505:
.LBB1521:
	.loc 1 825 0
	li 0,-22	 # ret,
	b .L495	 #
.L507:
	.loc 1 830 0
	li 0,-14	 # ret,
.L495:
.LBE1521:
	.loc 1 842 0
	addi 11,1,32	 #,,
	.loc 1 840 0
	stw 28,0(29)	 # *ret_pointer_53(D), iov
	.loc 1 842 0
	mr 3,0	 #, ret
	b _restgpr_27_x	 #
.LFE2683:
	.size	rw_copy_check_uvector,.-rw_copy_check_uvector


More information about the Linuxppc-dev mailing list