[PATCH v1 2/2] powerpc/code-patching: Convert to open_patch_window()/close_patch_window()

Christophe Leroy christophe.leroy at csgroup.eu
Sat Mar 16 20:50:15 AEDT 2024



Le 15/03/2024 à 09:38, Christophe Leroy a écrit :
> 
> 
> Le 15/03/2024 à 03:59, Benjamin Gray a écrit :
>> The existing patching alias page setup and teardown sections can be
>> simplified to make use of the new open_patch_window() abstraction.
>>
>> This eliminates the _mm variants of the helpers, consumers no longer
>> need to check mm_patch_enabled(), and consumers no longer need to worry
>> about synchronization and flushing beyond the changes they make in the
>> patching window.
> 
> With this patch, the time needed to activate or de-activate function 
> tracer is approx 10% longer on powerpc 8xx.

See below difference of patch_instruction() before and after your patch, 
both for 4k pages and 16k pages:

16k pages, before your patch:

00000278 <patch_instruction>:
  278:	48 00 00 84 	nop
  27c:	7c e0 00 a6 	mfmsr   r7
  280:	7c 51 13 a6 	mtspr   81,r2
  284:	3d 00 00 00 	lis     r8,0
			286: R_PPC_ADDR16_HA	.data
  288:	39 08 00 00 	addi    r8,r8,0
			28a: R_PPC_ADDR16_LO	.data
  28c:	7c 69 1b 78 	mr      r9,r3
  290:	3d 29 40 00 	addis   r9,r9,16384
  294:	81 48 00 08 	lwz     r10,8(r8)
  298:	55 29 00 22 	clrrwi  r9,r9,14
  29c:	81 08 00 04 	lwz     r8,4(r8)
  2a0:	61 29 01 2d 	ori     r9,r9,301
  2a4:	55 06 00 22 	clrrwi  r6,r8,14
  2a8:	91 2a 00 00 	stw     r9,0(r10)
  2ac:	91 2a 00 04 	stw     r9,4(r10)
  2b0:	91 2a 00 08 	stw     r9,8(r10)
  2b4:	91 2a 00 0c 	stw     r9,12(r10)
  2b8:	50 68 04 be 	rlwimi  r8,r3,0,18,31
  2bc:	90 88 00 00 	stw     r4,0(r8)
  2c0:	7c 00 40 6c 	dcbst   0,r8
  2c4:	7c 00 04 ac 	hwsync
  2c8:	7c 00 1f ac 	icbi    0,r3
  2cc:	7c 00 04 ac 	hwsync
  2d0:	4c 00 01 2c 	isync
  2d4:	38 60 00 00 	li      r3,0
  2d8:	39 20 00 00 	li      r9,0
  2dc:	91 2a 00 00 	stw     r9,0(r10)
  2e0:	91 2a 00 04 	stw     r9,4(r10)
  2e4:	91 2a 00 08 	stw     r9,8(r10)
  2e8:	91 2a 00 0c 	stw     r9,12(r10)
  2ec:	7c 00 32 64 	tlbie   r6,r0
  2f0:	7c 00 04 ac 	hwsync
  2f4:	7c e0 01 24 	mtmsr   r7
  2f8:	4e 80 00 20 	blr

16k pages, after your patch. Now we have a stack frame for the call to 
close_patch_window(). And the branch in close_patch_window() is 
unexpected as patch_instruction() works on single pages.

0000024c <close_patch_window>:
  24c:	81 23 00 04 	lwz     r9,4(r3)
  250:	39 40 00 00 	li      r10,0
  254:	91 49 00 00 	stw     r10,0(r9)
  258:	91 49 00 04 	stw     r10,4(r9)
  25c:	91 49 00 08 	stw     r10,8(r9)
  260:	91 49 00 0c 	stw     r10,12(r9)
  264:	81 23 00 00 	lwz     r9,0(r3)
  268:	55 2a 00 22 	clrrwi  r10,r9,14
  26c:	39 29 40 00 	addi    r9,r9,16384
  270:	7d 2a 48 50 	subf    r9,r10,r9
  274:	28 09 40 00 	cmplwi  r9,16384
  278:	41 81 00 10 	bgt     288 <close_patch_window+0x3c>
  27c:	7c 00 52 64 	tlbie   r10,r0
  280:	7c 00 04 ac 	hwsync
  284:	4e 80 00 20 	blr
  288:	7c 00 04 ac 	hwsync
  28c:	7c 00 02 e4 	tlbia
  290:	4c 00 01 2c 	isync
  294:	4e 80 00 20 	blr

000002c4 <patch_instruction>:
  2c4:	94 21 ff d0 	stwu    r1,-48(r1)
  2c8:	93 c1 00 28 	stw     r30,40(r1)
  2cc:	48 00 00 ac 	nop
  2d0:	7c 08 02 a6 	mflr    r0
  2d4:	90 01 00 34 	stw     r0,52(r1)
  2d8:	93 e1 00 2c 	stw     r31,44(r1)
  2dc:	7f e0 00 a6 	mfmsr   r31
  2e0:	7c 51 13 a6 	mtspr   81,r2
  2e4:	3d 40 00 00 	lis     r10,0
			2e6: R_PPC_ADDR16_HA	.data
  2e8:	39 4a 00 00 	addi    r10,r10,0
			2ea: R_PPC_ADDR16_LO	.data
  2ec:	7c 69 1b 78 	mr      r9,r3
  2f0:	3d 29 40 00 	addis   r9,r9,16384
  2f4:	81 0a 00 08 	lwz     r8,8(r10)
  2f8:	80 ca 00 04 	lwz     r6,4(r10)
  2fc:	55 29 00 22 	clrrwi  r9,r9,14
  300:	61 29 01 2d 	ori     r9,r9,301
  304:	38 e0 00 00 	li      r7,0
  308:	54 6a 04 be 	clrlwi  r10,r3,18
  30c:	91 28 00 00 	stw     r9,0(r8)
  310:	91 28 00 04 	stw     r9,4(r8)
  314:	91 28 00 08 	stw     r9,8(r8)
  318:	91 28 00 0c 	stw     r9,12(r8)
  31c:	91 01 00 0c 	stw     r8,12(r1)
  320:	90 c1 00 08 	stw     r6,8(r1)
  324:	7d 4a 32 14 	add     r10,r10,r6
  328:	90 e1 00 10 	stw     r7,16(r1)
  32c:	90 e1 00 14 	stw     r7,20(r1)
  330:	90 e1 00 18 	stw     r7,24(r1)
  334:	90 8a 00 00 	stw     r4,0(r10)
  338:	7c 00 50 6c 	dcbst   0,r10
  33c:	7c 00 04 ac 	hwsync
  340:	7c 00 1f ac 	icbi    0,r3
  344:	7c 00 04 ac 	hwsync
  348:	4c 00 01 2c 	isync
  34c:	3b c0 00 00 	li      r30,0
  350:	38 61 00 08 	addi    r3,r1,8
  354:	4b ff fe f9 	bl      24c <close_patch_window>
  358:	7f e0 01 24 	mtmsr   r31
  35c:	80 01 00 34 	lwz     r0,52(r1)
  360:	83 e1 00 2c 	lwz     r31,44(r1)
  364:	7c 08 03 a6 	mtlr    r0
  368:	7f c3 f3 78 	mr      r3,r30
  36c:	83 c1 00 28 	lwz     r30,40(r1)
  370:	38 21 00 30 	addi    r1,r1,48
  374:	4e 80 00 20 	blr

4k pages before your patch:

00000268 <patch_instruction>:
  268:	48 00 00 6c 	nop
  26c:	7c e0 00 a6 	mfmsr   r7
  270:	7c 51 13 a6 	mtspr   81,r2
  274:	3d 40 00 00 	lis     r10,0
			276: R_PPC_ADDR16_HA	.data
  278:	39 4a 00 00 	addi    r10,r10,0
			27a: R_PPC_ADDR16_LO	.data
  27c:	7c 69 1b 78 	mr      r9,r3
  280:	3d 29 40 00 	addis   r9,r9,16384
  284:	81 0a 00 08 	lwz     r8,8(r10)
  288:	55 29 00 26 	clrrwi  r9,r9,12
  28c:	81 4a 00 04 	lwz     r10,4(r10)
  290:	61 29 01 25 	ori     r9,r9,293
  294:	91 28 00 00 	stw     r9,0(r8)
  298:	55 49 00 26 	clrrwi  r9,r10,12
  29c:	50 6a 05 3e 	rlwimi  r10,r3,0,20,31
  2a0:	90 8a 00 00 	stw     r4,0(r10)
  2a4:	7c 00 50 6c 	dcbst   0,r10
  2a8:	7c 00 04 ac 	hwsync
  2ac:	7c 00 1f ac 	icbi    0,r3
  2b0:	7c 00 04 ac 	hwsync
  2b4:	4c 00 01 2c 	isync
  2b8:	38 60 00 00 	li      r3,0
  2bc:	39 40 00 00 	li      r10,0
  2c0:	91 48 00 00 	stw     r10,0(r8)
  2c4:	7c 00 4a 64 	tlbie   r9,r0
  2c8:	7c 00 04 ac 	hwsync
  2cc:	7c e0 01 24 	mtmsr   r7
  2d0:	4e 80 00 20 	blr

4k pages after your patch. Here close_patch_window() is inlined but the 
test at the end is crazy, we've been patching one instruction with the 
assumption it is properly aligned, so why is there a branch with a tlbia 
whereas only one page needs to be invalidated ?:

00000268 <patch_instruction>:
  268:	48 00 00 84 	nop
  26c:	7c c0 00 a6 	mfmsr   r6
  270:	7c 51 13 a6 	mtspr   81,r2
  274:	3d 40 00 00 	lis     r10,0
			276: R_PPC_ADDR16_HA	.data
  278:	39 4a 00 00 	addi    r10,r10,0
			27a: R_PPC_ADDR16_LO	.data
  27c:	7c 69 1b 78 	mr      r9,r3
  280:	3d 29 40 00 	addis   r9,r9,16384
  284:	80 ea 00 08 	lwz     r7,8(r10)
  288:	55 29 00 26 	clrrwi  r9,r9,12
  28c:	81 4a 00 04 	lwz     r10,4(r10)
  290:	61 29 01 25 	ori     r9,r9,293
  294:	54 68 05 3e 	clrlwi  r8,r3,20
  298:	91 27 00 00 	stw     r9,0(r7)
  29c:	7d 08 52 14 	add     r8,r8,r10
  2a0:	90 88 00 00 	stw     r4,0(r8)
  2a4:	7c 00 40 6c 	dcbst   0,r8
  2a8:	7c 00 04 ac 	hwsync
  2ac:	7c 00 1f ac 	icbi    0,r3
  2b0:	7c 00 04 ac 	hwsync
  2b4:	4c 00 01 2c 	isync
  2b8:	38 60 00 00 	li      r3,0
  2bc:	55 49 00 26 	clrrwi  r9,r10,12
  2c0:	39 4a 10 00 	addi    r10,r10,4096
  2c4:	7d 49 50 50 	subf    r10,r9,r10
  2c8:	28 0a 10 00 	cmplwi  r10,4096
  2cc:	39 40 00 00 	li      r10,0
  2d0:	91 47 00 00 	stw     r10,0(r7)
  2d4:	40 81 00 38 	ble     30c <patch_instruction+0xa4>
  2d8:	7c 00 04 ac 	hwsync
  2dc:	7c 00 02 e4 	tlbia
  2e0:	4c 00 01 2c 	isync
  2e4:	7c c0 01 24 	mtmsr   r6
  2e8:	4e 80 00 20 	blr
...
  30c:	7c 00 4a 64 	tlbie   r9,r0
  310:	7c 00 04 ac 	hwsync
  314:	7c c0 01 24 	mtmsr   r6
  318:	4e 80 00 20 	blr


More information about the Linuxppc-dev mailing list