[Cbe-oss-dev] PATCH [2/7] class 0 and 1 rework [Updated]

Luke Browning lukebr at linux.vnet.ibm.com
Fri Nov 2 01:50:01 EST 2007


Re-work spu exception handling such that the spu does not have to
be loaded. This should improve the concurrency of the spu scheduling
leading to greater spu utilization, when spus are overcommited.

From: Arnd Bergmann <arnd.bergman at de.ibm.com>
Signed-off-by: Luke Browning <lukebr at linux.vnet.ibm.com>

---

Index: linux-2.6.22/arch/powerpc/platforms/cell/spu_base.c
===================================================================
--- linux-2.6.22.orig/arch/powerpc/platforms/cell/spu_base.c
+++ linux-2.6.22/arch/powerpc/platforms/cell/spu_base.c
@@ -114,27 +114,6 @@ void spu_associate_mm(struct spu *spu, s
 }
 EXPORT_SYMBOL_GPL(spu_associate_mm);
 
-static int __spu_trap_invalid_dma(struct spu *spu)
-{
-	pr_debug("%s\n", __FUNCTION__);
-	spu->dma_callback(spu, SPE_EVENT_INVALID_DMA);
-	return 0;
-}
-
-static int __spu_trap_dma_align(struct spu *spu)
-{
-	pr_debug("%s\n", __FUNCTION__);
-	spu->dma_callback(spu, SPE_EVENT_DMA_ALIGNMENT);
-	return 0;
-}
-
-static int __spu_trap_error(struct spu *spu)
-{
-	pr_debug("%s\n", __FUNCTION__);
-	spu->dma_callback(spu, SPE_EVENT_SPE_ERROR);
-	return 0;
-}
-
 static void spu_restart_dma(struct spu *spu)
 {
 	struct spu_priv2 __iomem *priv2 = spu->priv2;
@@ -225,10 +204,12 @@ static int __spu_trap_data_map(struct sp
 		return 1;
 	}
 
+	spu->class_0_pending = 0;
 	spu->dar = ea;
 	spu->dsisr = dsisr;
-	mb();
+
 	spu->stop_callback(spu);
+
 	return 0;
 }
 
@@ -246,6 +227,8 @@ spu_irq_class_0(int irq, void *data)
 
 	spin_lock(&spu->register_lock);
 	spu->class_0_pending |= stat;
+	spu->dsisr = spu_mfc_dsisr_get(spu);
+	spu->dar = spu_mfc_dar_get(spu);
 	spin_unlock(&spu->register_lock);
 
 	spu->stop_callback(spu);
@@ -255,32 +238,6 @@ spu_irq_class_0(int irq, void *data)
 	return IRQ_HANDLED;
 }
 
-int
-spu_irq_class_0_bottom(struct spu *spu)
-{
-	unsigned long flags;
-	unsigned long stat;
-
-	spin_lock_irqsave(&spu->register_lock, flags);
-	stat = spu->class_0_pending;
-	spu->class_0_pending = 0;
-
-	if (stat & 1) /* invalid DMA alignment */
-		__spu_trap_dma_align(spu);
-
-	if (stat & 2) /* invalid MFC DMA */
-		__spu_trap_invalid_dma(spu);
-
-	if (stat & 4) /* error on SPU */
-		__spu_trap_error(spu);
-
-	spu_int_stat_clear(spu, 0, stat);
-	spin_unlock_irqrestore(&spu->register_lock, flags);
-
-	return (stat & 0x7) ? -EIO : 0;
-}
-EXPORT_SYMBOL_GPL(spu_irq_class_0_bottom);
-
 static irqreturn_t
 spu_irq_class_1(int irq, void *data)
 {
Index: linux-2.6.22/arch/powerpc/platforms/cell/spufs/fault.c
===================================================================
--- linux-2.6.22.orig/arch/powerpc/platforms/cell/spufs/fault.c
+++ linux-2.6.22/arch/powerpc/platforms/cell/spufs/fault.c
@@ -136,11 +136,22 @@ static void spufs_handle_dma_error(struc
 	}
 }
 
-void spufs_dma_callback(struct spu *spu, int type)
+int spu_handle_class0(struct spu_context *ctx)
 {
-	spufs_handle_dma_error(spu->ctx, spu->dar, type);
+	unsigned long stat = ctx->csa.class_0_pending;
+	unsigned long dar = ctx->csa.dar;
+
+	if (stat & 1) /* invalid DMA alignment */
+		spufs_handle_dma_error(ctx, dar, SPE_EVENT_DMA_ALIGNMENT);
+
+	if (stat & 2) /* invalid MFC DMA */
+		spufs_handle_dma_error(ctx, dar, SPE_EVENT_INVALID_DMA);
+
+	if (stat & 4) /* error on SPU */
+		spufs_handle_dma_error(ctx, dar, SPE_EVENT_SPE_ERROR);
+
+	return (stat & 0x7) ? -EIO : 0;
 }
-EXPORT_SYMBOL_GPL(spufs_dma_callback);
 
 /*
  * bottom half handler for page faults, we can't do this from
@@ -151,7 +162,7 @@ EXPORT_SYMBOL_GPL(spufs_dma_callback);
  * TODO: try calling hash_page from the interrupt handler first
  *       in order to speed up the easy case.
  */
-int spufs_handle_class1(struct spu_context *ctx)
+int spu_handle_class1(struct spu_context *ctx)
 {
 	u64 ea, dsisr, access;
 	unsigned long flags;
@@ -167,16 +178,8 @@ int spufs_handle_class1(struct spu_conte
 	 * in time, we can still expect to get the same fault
 	 * the immediately after the context restore.
 	 */
-	if (ctx->state == SPU_STATE_RUNNABLE) {
-		ea = ctx->spu->dar;
-		dsisr = ctx->spu->dsisr;
-		ctx->spu->dar= ctx->spu->dsisr = 0;
-	} else {
-		ea = ctx->csa.priv1.mfc_dar_RW;
-		dsisr = ctx->csa.priv1.mfc_dsisr_RW;
-		ctx->csa.priv1.mfc_dar_RW = 0;
-		ctx->csa.priv1.mfc_dsisr_RW = 0;
-	}
+	ea = ctx->csa.dar;
+	dsisr = ctx->csa.dsisr;
 
 	if (!(dsisr & (MFC_DSISR_PTE_NOT_FOUND | MFC_DSISR_ACCESS_DENIED)))
 		return 0;
@@ -204,6 +207,14 @@ int spufs_handle_class1(struct spu_conte
 		ret = spu_handle_mm_fault(current->mm, ea, dsisr, &flt);
 
 	spu_acquire(ctx);
+
+	/*
+	 * Clear dsisr under ctxt lock after handling the fault, so that
+	 * time slicing will not preempt the context while the page fault
+	 * handler is running. Context switch code removes mappings.
+	 */
+	ctx->csa.dar = ctx->csa.dsisr = 0;
+
 	/*
 	 * If we handled the fault successfully and are in runnable
 	 * state, restart the DMA.
@@ -229,4 +240,3 @@ int spufs_handle_class1(struct spu_conte
 	spuctx_switch_state(ctx, SPUCTX_UTIL_SYSTEM);
 	return ret;
 }
-EXPORT_SYMBOL_GPL(spufs_handle_class1);
Index: linux-2.6.22/arch/powerpc/platforms/cell/spufs/run.c
===================================================================
--- linux-2.6.22.orig/arch/powerpc/platforms/cell/spufs/run.c
+++ linux-2.6.22/arch/powerpc/platforms/cell/spufs/run.c
@@ -15,7 +15,28 @@ void spufs_stop_callback(struct spu *spu
 {
 	struct spu_context *ctx = spu->ctx;
 
-	wake_up_all(&ctx->stop_wq);
+	/*
+	 * It should be impossible to preempt a context while an exception
+	 * is being processed, since the context switch code is specially
+	 * coded to deal with interrupts ... But, just in case, sanity check
+	 * the context pointer.  It is OK to return doing nothing since
+	 * the exception will be regenerated when the context is resumed.
+	 */
+	if (ctx) {
+		/* Copy exception arguments into module specific structure */
+		ctx->csa.class_0_pending = spu->class_0_pending;
+		ctx->csa.dsisr = spu->dsisr;
+		ctx->csa.dar = spu->dar;
+
+		mb();
+
+		wake_up_all(&ctx->stop_wq);
+	}
+
+	/* Clear callback arguments from spu structure */
+	spu->class_0_pending = 0;
+	spu->dsisr = 0;
+	spu->dar = 0;
 }
 
 static inline int spu_stopped(struct spu_context *ctx, u32 * stat)
@@ -29,9 +50,9 @@ static inline int spu_stopped(struct spu
 	if (ctx->state != SPU_STATE_RUNNABLE ||
 	    test_bit(SPU_SCHED_NOTIFY_ACTIVE, &ctx->sched_flags))
 		return 1;
-	pte_fault = spu->dsisr &
+	pte_fault = ctx->csa.dsisr &
 	    (MFC_DSISR_PTE_NOT_FOUND | MFC_DSISR_ACCESS_DENIED);
-	return (!(*stat & SPU_STATUS_RUNNING) || pte_fault || spu->class_0_pending) ?
+	return (!(*stat & SPU_STATUS_RUNNING) || pte_fault || ctx->csa.class_0_pending) ?
 		1 : 0;
 }
 
@@ -289,15 +310,30 @@ int spu_process_callback(struct spu_cont
 	return ret;
 }
 
-static inline int spu_process_events(struct spu_context *ctx)
+static inline int spu_handle_class0_events(struct spu_context *ctx)
 {
-	struct spu *spu = ctx->spu;
 	int ret = 0;
+	u64 stat = ctx->csa.class_0_pending;
 
-	if (spu->class_0_pending)
-		ret = spu_irq_class_0_bottom(spu);
+	if (stat) {
+		if (ctx->flags & SPU_CREATE_EVENTS_ENABLED) {
+			if (stat & 1) 		/* invalid DMA alignment */
+				ctx->event_return |= SPE_EVENT_DMA_ALIGNMENT;
+
+			if (stat & 2) 		/* invalid MFC DMA */
+				ctx->event_return |= SPE_EVENT_INVALID_DMA;
+
+			if (stat & 4) 		/* error on SPU */
+				ctx->event_return |= SPE_EVENT_SPE_ERROR;
+
+			wake_up_all(&ctx->stop_wq);
+		} else {
+			ret = spu_handle_class0(ctx);
+		}
+	}
 	if (!ret && signal_pending(current))
 		ret = -ERESTARTSYS;
+	ctx->csa.class_0_pending = 0;
 	return ret;
 }
 
@@ -364,7 +400,7 @@ long spufs_run_spu(struct file *file, st
 				break;
 			status &= ~SPU_STATUS_STOPPED_BY_STOP;
 		}
-		ret = spufs_handle_class1(ctx);
+		ret = spu_handle_class1(ctx);
 		if (ret)
 			break;
 
@@ -374,7 +410,7 @@ long spufs_run_spu(struct file *file, st
 				goto out2;
 			continue;
 		}
-		ret = spu_process_events(ctx);
+		ret = spu_handle_class0_events(ctx);
 
 	} while (!ret && !(status & (SPU_STATUS_STOPPED_BY_STOP |
 				      SPU_STATUS_STOPPED_BY_HALT |
Index: linux-2.6.22/arch/powerpc/platforms/cell/spufs/sched.c
===================================================================
--- linux-2.6.22.orig/arch/powerpc/platforms/cell/spufs/sched.c
+++ linux-2.6.22/arch/powerpc/platforms/cell/spufs/sched.c
@@ -389,7 +389,6 @@ static void spu_bind_context(struct spu 
 	spu->wbox_callback = spufs_wbox_callback;
 	spu->stop_callback = spufs_stop_callback;
 	spu->mfc_callback = spufs_mfc_callback;
-	spu->dma_callback = spufs_dma_callback;
 	mb();
 	spu_unmap_mappings(ctx);
 	spu_restore(&ctx->csa, spu);
@@ -432,7 +431,6 @@ static void spu_unbind_context(struct sp
 	spu->wbox_callback = NULL;
 	spu->stop_callback = NULL;
 	spu->mfc_callback = NULL;
-	spu->dma_callback = NULL;
 	spu_associate_mm(spu, NULL);
 	spu->pid = 0;
 	spu->tgid = 0;
Index: linux-2.6.22/arch/powerpc/platforms/cell/spufs/spufs.h
===================================================================
--- linux-2.6.22.orig/arch/powerpc/platforms/cell/spufs/spufs.h
+++ linux-2.6.22/arch/powerpc/platforms/cell/spufs/spufs.h
@@ -232,7 +232,8 @@ void spu_gang_remove_ctx(struct spu_gang
 void spu_gang_add_ctx(struct spu_gang *gang, struct spu_context *ctx);
 
 /* fault handling */
-int spufs_handle_class1(struct spu_context *ctx);
+int spu_handle_class1(struct spu_context *ctx);
+int spu_handle_class0(struct spu_context *ctx);
 
 /* affinity */
 struct spu *affinity_check(struct spu_context *ctx);
Index: linux-2.6.22/arch/powerpc/platforms/cell/spufs/switch.c
===================================================================
--- linux-2.6.22.orig/arch/powerpc/platforms/cell/spufs/switch.c
+++ linux-2.6.22/arch/powerpc/platforms/cell/spufs/switch.c
@@ -2103,10 +2103,6 @@ int spu_save(struct spu_state *prev, str
 	int rc;
 
 	acquire_spu_lock(spu);	        /* Step 1.     */
-	prev->dar = spu->dar;
-	prev->dsisr = spu->dsisr;
-	spu->dar = 0;
-	spu->dsisr = 0;
 	rc = __do_spu_save(prev, spu);	/* Steps 2-53. */
 	release_spu_lock(spu);
 	if (rc != 0 && rc != 2 && rc != 6) {
@@ -2133,9 +2129,6 @@ int spu_restore(struct spu_state *new, s
 	acquire_spu_lock(spu);
 	harvest(NULL, spu);
 	spu->slb_replace = 0;
-	new->dar = 0;
-	new->dsisr = 0;
-	spu->class_0_pending = 0;
 	rc = __do_spu_restore(new, spu);
 	release_spu_lock(spu);
 	if (rc) {
Index: linux-2.6.22/include/asm-powerpc/spu.h
===================================================================
--- linux-2.6.22.orig/include/asm-powerpc/spu.h
+++ linux-2.6.22/include/asm-powerpc/spu.h
@@ -151,7 +151,6 @@ struct spu {
 	void (* ibox_callback)(struct spu *spu);
 	void (* stop_callback)(struct spu *spu);
 	void (* mfc_callback)(struct spu *spu);
-	void (* dma_callback)(struct spu *spu, int type);
 
 	char irq_c0[8];
 	char irq_c1[8];
@@ -202,8 +201,6 @@ struct cbe_spu_info {
 extern struct cbe_spu_info cbe_spu_info[];
 
 void spu_init_channels(struct spu *spu);
-int spu_irq_class_0_bottom(struct spu *spu);
-int spu_irq_class_1_bottom(struct spu *spu);
 void spu_irq_setaffinity(struct spu *spu, int cpu);
 
 #ifdef CONFIG_KEXEC
Index: linux-2.6.22/include/asm-powerpc/spu_csa.h
===================================================================
--- linux-2.6.22.orig/include/asm-powerpc/spu_csa.h
+++ linux-2.6.22/include/asm-powerpc/spu_csa.h
@@ -254,7 +254,7 @@ struct spu_state {
 	u64 spu_chnldata_RW[32];
 	u32 spu_mailbox_data[4];
 	u32 pu_mailbox_data[1];
-	u64 dar, dsisr;
+	u64 dar, dsisr, class_0_pending;
 	unsigned long suspend_time;
 	spinlock_t register_lock;
 };





More information about the cbe-oss-dev mailing list