[PATCH v2] powerpc/npu: Cleanup MMIO ATSD flushing
Alistair Popple
alistair at popple.id.au
Wed Feb 7 14:14:47 AEDT 2018
On Tuesday, 16 January 2018 3:15:05 PM AEDT Alistair Popple wrote:
> Thanks Balbir, one question below. I have no way of testing this at present but
> it looks ok to me. Thanks!
The below are more future optimisations once we can test. So in the meantime:
Acked-by: Alistair Popple <alistair at popple.id.au>
> On Thursday, 14 December 2017 12:10:08 PM AEDT Balbir Singh wrote:
> > While reviewing the code I found that the flush assumes all
> > pages are of mmu_linear_psize, which is not correct. The patch
> > uses find_linux_pte to find the right page size and uses that
> > for launching the ATSD invalidation. A new helper is added
> > to abstract the invalidation from the various notifiers.
> >
> > The patch also cleans up a bit by removing AP size from PID
> > flushes.
> >
> > Signed-off-by: Balbir Singh <bsingharora at gmail.com>
> > ---
> >
> > Changelog:
> > Refactor the handling of return values from find_linux_pte
> > as suggested by Aneesh
> >
> > arch/powerpc/platforms/powernv/npu-dma.c | 55 ++++++++++++++++++++++----------
> > 1 file changed, 38 insertions(+), 17 deletions(-)
> >
> > diff --git a/arch/powerpc/platforms/powernv/npu-dma.c b/arch/powerpc/platforms/powernv/npu-dma.c
> > index f6cbc1a71472..e8caa3e2019d 100644
> > --- a/arch/powerpc/platforms/powernv/npu-dma.c
> > +++ b/arch/powerpc/platforms/powernv/npu-dma.c
> > @@ -17,6 +17,7 @@
> > #include <linux/pci.h>
> > #include <linux/memblock.h>
> > #include <linux/iommu.h>
> > +#include <linux/huge_mm.h>
> >
> > #include <asm/tlb.h>
> > #include <asm/powernv.h>
> > @@ -27,6 +28,7 @@
> > #include <asm/pnv-pci.h>
> > #include <asm/msi_bitmap.h>
> > #include <asm/opal.h>
> > +#include <asm/pte-walk.h>
> >
> > #include "powernv.h"
> > #include "pci.h"
> > @@ -460,9 +462,6 @@ static int mmio_invalidate_pid(struct npu *npu, unsigned long pid, bool flush)
> > /* PRS set to process-scoped */
> > launch |= PPC_BIT(13);
> >
> > - /* AP */
> > - launch |= (u64) mmu_get_ap(mmu_virtual_psize) << PPC_BITLSHIFT(17);
> >
> > /* PID */
> > launch |= pid << PPC_BITLSHIFT(38);
> >
> > @@ -474,7 +473,8 @@ static int mmio_invalidate_pid(struct npu *npu, unsigned long pid, bool flush)
> > }
> >
> > static int mmio_invalidate_va(struct npu *npu, unsigned long va,
> > - unsigned long pid, bool flush)
> > + unsigned long pid, bool flush,
> > + unsigned int shift)
> > {
> > unsigned long launch;
> >
> > @@ -485,9 +485,8 @@ static int mmio_invalidate_va(struct npu *npu, unsigned long va,
> > launch |= PPC_BIT(13);
> >
> > /* AP */
> > - launch |= (u64) mmu_get_ap(mmu_virtual_psize) << PPC_BITLSHIFT(17);
> > + launch |= (u64) mmu_get_ap(shift) << PPC_BITLSHIFT(17);
> >
> > - /* PID */
> > launch |= pid << PPC_BITLSHIFT(38);
> >
> > /* No flush */
> > @@ -504,7 +503,8 @@ struct mmio_atsd_reg {
> > };
> >
> > static void mmio_invalidate_wait(
> > - struct mmio_atsd_reg mmio_atsd_reg[NV_MAX_NPUS], bool flush)
> > + struct mmio_atsd_reg mmio_atsd_reg[NV_MAX_NPUS], bool flush,
> > + unsigned int shift)
> > {
> > struct npu *npu;
> > int i, reg;
> > @@ -537,7 +537,8 @@ static void mmio_invalidate_wait(
> > * the value of va.
> > */
> > static void mmio_invalidate(struct npu_context *npu_context, int va,
> > - unsigned long address, bool flush)
> > + unsigned long address, bool flush,
> > + unsigned int shift)
> > {
> > int i, j;
> > struct npu *npu;
> > @@ -572,7 +573,7 @@ static void mmio_invalidate(struct npu_context *npu_context, int va,
> > if (va)
> > mmio_atsd_reg[i].reg =
> > mmio_invalidate_va(npu, address, pid,
> > - flush);
> > + flush, shift);
> > else
> > mmio_atsd_reg[i].reg =
> > mmio_invalidate_pid(npu, pid, flush);
> > @@ -585,10 +586,32 @@ static void mmio_invalidate(struct npu_context *npu_context, int va,
> > }
> > }
> >
> > - mmio_invalidate_wait(mmio_atsd_reg, flush);
> > + mmio_invalidate_wait(mmio_atsd_reg, flush, shift);
> > if (flush)
> > /* Wait for the flush to complete */
> > - mmio_invalidate_wait(mmio_atsd_reg, false);
> > + mmio_invalidate_wait(mmio_atsd_reg, false, shift);
> > +}
> > +
> > +static void pnv_npu2_invalidate_helper(struct npu_context *npu_context,
> > + struct mm_struct *mm, unsigned long start,
> > + unsigned long end, bool flush)
> > +{
> > + unsigned long address;
> > + unsigned int hshift = 0, shift;
> > +
> > + address = start;
> > + do {
> > + local_irq_disable();
> > + find_linux_pte(mm->pgd, address, NULL, &hshift);
> > + if (hshift)
> > + shift = hshift;
> > + else
> > + shift = PAGE_SHIFT;
>
> Looks better, thanks!
>
> Also in future we might be able to futher optimise this as I don't think the
> shift needs to match the actual page size as we are directly issuing ATSDs to
> the GPU. ie. we could bump shift to cover the whole of (start, end) or to
> invalidate larger chunks at a time - I don't think we need to limit it to
> PAGE_SHIFT.
>
> > + mmio_invalidate(npu_context, address > 0, address, flush,
> > + shift);
>
> If address == 0 we end up invalidating the entire PID rather than just the page
> at address 0. Probably not a big issue though as I'm guessing we wouldn't
> actually see invalidations for the page at 0 very often?
>
> > + local_irq_enable();
> > + address += (1ull << shift);
> > + } while (address < end);
> > }
> >
> > static void pnv_npu2_mn_release(struct mmu_notifier *mn,
> > @@ -604,7 +627,7 @@ static void pnv_npu2_mn_release(struct mmu_notifier *mn,
> > * There should be no more translation requests for this PID, but we
> > * need to ensure any entries for it are removed from the TLB.
> > */
> > - mmio_invalidate(npu_context, 0, 0, true);
> > + pnv_npu2_invalidate_helper(npu_context, mm, 0, PAGE_SIZE, true);
> > }
> >
> > static void pnv_npu2_mn_change_pte(struct mmu_notifier *mn,
> > @@ -614,7 +637,7 @@ static void pnv_npu2_mn_change_pte(struct mmu_notifier *mn,
> > {
> > struct npu_context *npu_context = mn_to_npu_context(mn);
> >
> > - mmio_invalidate(npu_context, 1, address, true);
> > + pnv_npu2_invalidate_helper(npu_context, mm, address, address, true);
> > }
> >
> > static void pnv_npu2_mn_invalidate_range(struct mmu_notifier *mn,
> > @@ -622,13 +645,11 @@ static void pnv_npu2_mn_invalidate_range(struct mmu_notifier *mn,
> > unsigned long start, unsigned long end)
> > {
> > struct npu_context *npu_context = mn_to_npu_context(mn);
> > - unsigned long address;
> >
> > - for (address = start; address < end; address += PAGE_SIZE)
> > - mmio_invalidate(npu_context, 1, address, false);
> > + pnv_npu2_invalidate_helper(npu_context, mm, start, end, false);
> >
> > /* Do the flush only on the final addess == end */
> > - mmio_invalidate(npu_context, 1, address, true);
> > + pnv_npu2_invalidate_helper(npu_context, mm, end, end, true);
> > }
> >
> > static const struct mmu_notifier_ops nv_nmmu_notifier_ops = {
> >
>
>
More information about the Linuxppc-dev
mailing list