[PATCH -V5 06/13] arch/powerpc: Increase the slice range to 64TB
Aneesh Kumar K.V
aneesh.kumar at linux.vnet.ibm.com
Wed Aug 1 17:43:37 EST 2012
Paul Mackerras <paulus at samba.org> writes:
> On Mon, Jul 30, 2012 at 04:52:12PM +0530, Aneesh Kumar K.V wrote:
>> From: "Aneesh Kumar K.V" <aneesh.kumar at linux.vnet.ibm.com>
>>
>> This patch makes the high psizes mask as an unsigned char array
>> so that we can have more than 16TB. Currently we support upto
>> 64TB
>
> Comments below...
>
>> diff --git a/arch/powerpc/mm/slb_low.S b/arch/powerpc/mm/slb_low.S
>> index b9ee79ce..c355af6 100644
>> --- a/arch/powerpc/mm/slb_low.S
>> +++ b/arch/powerpc/mm/slb_low.S
>> @@ -108,17 +108,34 @@ END_MMU_FTR_SECTION_IFCLR(MMU_FTR_1T_SEGMENT)
>> * between 4k and 64k standard page size
>> */
>> #ifdef CONFIG_PPC_MM_SLICES
>> + /* r10 have esid */
>> cmpldi r10,16
>> -
>> - /* Get the slice index * 4 in r11 and matching slice size mask in r9 */
>> - ld r9,PACALOWSLICESPSIZE(r13)
>> - sldi r11,r10,2
>> + /* below SLICE_LOW_TOP */
>> blt 5f
>> - ld r9,PACAHIGHSLICEPSIZE(r13)
>> - srdi r11,r10,(SLICE_HIGH_SHIFT - SLICE_LOW_SHIFT - 2)
>> - andi. r11,r11,0x3c
>> -
>> -5: /* Extract the psize and multiply to get an array offset */
>> + /*
>> + * Handle hpsizes,
>> + * r9 is get_paca()->context.high_slices_psize[index], r11 is mask_index
>> + * We use r10 here, later we restore it to esid.
>> + * Can we use other register instead of r10 ?
>
> Only r9, r10 and r11 are available here, and you're using them all.
> Restoring r10 with one integer instruction is going to be quicker than
> saving and restoring another register to/from memory.
>
>> + */
>> + srdi r10,r10,(SLICE_HIGH_SHIFT - SLICE_LOW_SHIFT) /* index */
>> + srdi r11,r10,1 /* r11 is array index */
>> + addi r9,r11,PACAHIGHSLICEPSIZE
>> + lbzx r9,r9,r13 /* r9 is hpsizes[r11] */
>> + sldi r11,r11,1
>> + subf r11,r11,r10 /* mask_index = index - (array_index << 1) */
>> + srdi r10,r3,28 /* restore r10 with esid */
>> + b 6f
>
> How about (untested):
>
> srdi r11,r10,(SLICE_HIGH_SHIFT - SLICE_LOW_SHIFT + 1) /* index */
> addi r9,r11,PACAHIGHSLICEPSIZE
> lbzx r9,r13,r9 /* r9 is hpsizes[r11] */
> /* r11 = (r10 >> 12) & 1, i.e. grab lowest bit of 1T ESID */
> rldicl r11,r10,(64 - (SLICE_HIGH_SHIFT - SLICE_LOW_SHIFT)),63
> b 6f
>
nice, I missed the assembly part when you asked to update the c code
in previous review. This change also bring it closer to the c code.
> Note that I swapped the RA and RB arguments for the lbzx. Our recent
> processors process indexed mode instructions more quickly if the value
> in RB is small.
>
>> static struct slice_mask slice_mask_for_size(struct mm_struct *mm, int psize)
>> {
>> + unsigned char *hpsizes;
>> + int index, mask_index;
>> struct slice_mask ret = { 0, 0 };
>> unsigned long i;
>> - u64 psizes;
>> + u64 lpsizes;
>>
>> - psizes = mm->context.low_slices_psize;
>> + lpsizes = mm->context.low_slices_psize;
>> for (i = 0; i < SLICE_NUM_LOW; i++)
>> - if (((psizes >> (i * 4)) & 0xf) == psize)
>> + if (((lpsizes >> (i * 4)) & 0xf) == psize)
>> ret.low_slices |= 1u << i;
>>
>> - psizes = mm->context.high_slices_psize;
>> - for (i = 0; i < SLICE_NUM_HIGH; i++)
>> - if (((psizes >> (i * 4)) & 0xf) == psize)
>> + hpsizes = mm->context.high_slices_psize;
>> + for (i = 0; i < SLICE_NUM_HIGH; i++) {
>> + mask_index = i & 0x1;
>> + index = i >> 1;
>> + if (((hpsizes[index] >> (mask_index * 4)) & 0xf) == psize)
>> ret.high_slices |= 1u << i;
>
> This needs to be 1ul not 1u, since we are creating a 64-bit mask.
>
>> static void slice_convert(struct mm_struct *mm, struct slice_mask mask, int psize)
>> {
>> + int index, mask_index;
>> /* Write the new slice psize bits */
>> - u64 lpsizes, hpsizes;
>> + unsigned char *hpsizes;
>> + u64 lpsizes;
>> unsigned long i, flags;
>>
>> slice_dbg("slice_convert(mm=%p, psize=%d)\n", mm, psize);
>> @@ -201,14 +208,18 @@ static void slice_convert(struct mm_struct *mm, struct slice_mask mask, int psiz
>> lpsizes = (lpsizes & ~(0xful << (i * 4))) |
>> (((unsigned long)psize) << (i * 4));
>>
>> + /* Assign the value back */
>> + mm->context.low_slices_psize = lpsizes;
>> +
>> hpsizes = mm->context.high_slices_psize;
>> - for (i = 0; i < SLICE_NUM_HIGH; i++)
>> + for (i = 0; i < SLICE_NUM_HIGH; i++) {
>> + mask_index = i & 0x1;
>> + index = i >> 1;
>> if (mask.high_slices & (1u << i))
>
> Again, 1ul now. Check all the other places where we manipulate a
> slice mask to see if there are any other instances of 1u that need to
> be changed.
I ended up with this.
diff --git a/arch/powerpc/mm/slice.c b/arch/powerpc/mm/slice.c
index 0136040..b4e996a 100644
--- a/arch/powerpc/mm/slice.c
+++ b/arch/powerpc/mm/slice.c
@@ -54,7 +54,7 @@ static void slice_print_mask(const char *label, struct slice_mask mask)
*(p++) = '-';
*(p++) = ' ';
for (i = 0; i < SLICE_NUM_HIGH; i++)
- *(p++) = (mask.high_slices & (1 << i)) ? '1' : '0';
+ *(p++) = (mask.high_slices & (1ul << i)) ? '1' : '0';
*(p++) = 0;
printk(KERN_DEBUG "%s:%s\n", label, buf);
@@ -84,8 +84,8 @@ static struct slice_mask slice_range_to_mask(unsigned long start,
}
if ((start + len) > SLICE_LOW_TOP)
- ret.high_slices = (1u << (GET_HIGH_SLICE_INDEX(end) + 1))
- - (1u << GET_HIGH_SLICE_INDEX(start));
+ ret.high_slices = (1ul << (GET_HIGH_SLICE_INDEX(end) + 1))
+ - (1ul << GET_HIGH_SLICE_INDEX(start));
return ret;
}
@@ -135,7 +135,7 @@ static struct slice_mask slice_mask_for_free(struct mm_struct *mm)
for (i = 0; i < SLICE_NUM_HIGH; i++)
if (!slice_high_has_vma(mm, i))
- ret.high_slices |= 1u << i;
+ ret.high_slices |= 1ul << i;
return ret;
}
@@ -158,7 +158,7 @@ static struct slice_mask slice_mask_for_size(struct mm_struct *mm, int psize)
mask_index = i & 0x1;
index = i >> 1;
if (((hpsizes[index] >> (mask_index * 4)) & 0xf) == psize)
- ret.high_slices |= 1u << i;
+ ret.high_slices |= 1ul << i;
}
return ret;
@@ -215,7 +215,7 @@ static void slice_convert(struct mm_struct *mm, struct slice_mask mask, int psiz
for (i = 0; i < SLICE_NUM_HIGH; i++) {
mask_index = i & 0x1;
index = i >> 1;
- if (mask.high_slices & (1u << i))
+ if (mask.high_slices & (1ul << i))
hpsizes[index] = (hpsizes[index] &
~(0xf << (mask_index * 4))) |
(((unsigned long)psize) << (mask_index * 4));
-aneesh
More information about the Linuxppc-dev
mailing list