[Cbe-oss-dev] [RFC/PATCH] libspe2: use mapped mailbox registers to speed up mailbox communication

stenzel at de.ibm.com stenzel at de.ibm.com
Fri May 18 20:25:27 EST 2007


The following patch speeds up mailbox communication by
accessing the mailbox registers directly via mapped problem
state if the context was created with the SPE_MAP_PS flag.

Comments are appreciated.

Signed-off-by: Gerhard Stenzel <stenzel at de.ibm.com>

===================================================================
Index: libspe2/spebase/mbox.c
===================================================================
--- libspe2/spebase/mbox.c	(revision 39)
+++ libspe2/spebase/mbox.c	(working copy)
@@ -30,6 +30,30 @@
  * -------------------------
  */
 
+static __inline__ int _base_spe_out_mbox_read_ps(spe_context_ptr_t spectx,
+                        unsigned int mbox_data[], 
+                        int count)
+{
+	int rc;
+	int entries;
+
+	volatile struct spe_spu_control_area *cntl_area =
+        	spectx->base_private->cntl_mmap_base;
+
+	_base_spe_context_lock(spectx, FD_MBOX);
+	rc = 0;
+	while (rc < count) {
+		entries = cntl_area->SPU_Mbox_Stat & 0xFF;
+		if (entries) {
+			mbox_data[rc] = cntl_area->SPU_Out_Mbox;
+			rc ++;
+		} else {
+			break;
+		}
+	}
+	_base_spe_context_unlock(spectx, FD_MBOX);
+	return rc;
+}
 
 int _base_spe_out_mbox_read(spe_context_ptr_t spectx, 
                         unsigned int mbox_data[], 
@@ -42,20 +66,48 @@ int _base_spe_out_mbox_read(spe_context_
 		return -1;
 	}
 
-	rc = read(open_if_closed(spectx,FD_MBOX, 0), mbox_data, count*4);
-	DEBUG_PRINTF("%s read rc: %d\n", __FUNCTION__, rc);
-	if (rc != -1) {
-		rc /= 4;
+	if (spectx->base_private->flags & SPE_MAP_PS) {
+		rc = _base_spe_out_mbox_read_ps(spectx, mbox_data, count);
 	} else {
-		if (errno == EAGAIN ) { // no data ready to be read
-			errno = 0;
-			rc = 0;
+		rc = read(open_if_closed(spectx,FD_MBOX, 0), mbox_data, count*4);
+		DEBUG_PRINTF("%s read rc: %d\n", __FUNCTION__, rc);
+		if (rc != -1) {
+			rc /= 4;
+		} else {
+			if (errno == EAGAIN ) { // no data ready to be read
+				errno = 0;
+				rc = 0;
+			}
 		}
 	}
-
 	return rc;
 }
 
+static __inline__ int _base_spe_in_mbox_write_ps(spe_context_ptr_t spectx,
+                        unsigned int *mbox_data, 
+                        int count)
+{
+	volatile struct spe_spu_control_area *cntl_area =
+        	spectx->base_private->cntl_mmap_base;
+	int total = 0;
+	unsigned int *aux;
+
+	_base_spe_context_lock(spectx, FD_WBOX);
+	aux = mbox_data;
+	while (total < count) {
+		int space = (cntl_area->SPU_Mbox_Stat >> 8) & 0xFF;
+		if (space) {
+			cntl_area->SPU_In_Mbox = *aux++;
+			total++;
+		} else {
+			break;
+		}
+	}
+	_base_spe_context_unlock(spectx, FD_WBOX);
+
+	return total;
+}
+
 int _base_spe_in_mbox_write(spe_context_ptr_t spectx, 
                         unsigned int *mbox_data, 
                         int count, 
@@ -63,6 +115,7 @@ int _base_spe_in_mbox_write(spe_context_
 {
 	int rc;
 	int total;
+	unsigned int *aux;
 
 	if (mbox_data == NULL || count < 1){
 		errno = EINVAL;
@@ -72,27 +125,48 @@ int _base_spe_in_mbox_write(spe_context_
 	switch (behavior_flag) {
 	case SPE_MBOX_ALL_BLOCKING: // write all, even if blocking
 		total = rc = 0;
-		while (total < 4*count) {
+
+		// first try to write directly if we map the PS
+		if (spectx->base_private->flags & SPE_MAP_PS) 
+			total = _base_spe_in_mbox_write_ps(spectx, mbox_data, count);
+
+		// now write the remaining via spufs
+		while (total < count) {
+			aux = mbox_data + total;
 			rc = write(open_if_closed(spectx,FD_WBOX, 0),
-					(const char *)mbox_data + total, 4*count - total);
+				  aux, 4*(count - total));
 			if (rc == -1) {
 				break;
 			}
-			total += rc;
+			total += (rc/4);
 		}
 		break;
 
 	case  SPE_MBOX_ANY_BLOCKING: // write at least one, even if blocking
-		total = rc = write(open_if_closed(spectx,FD_WBOX, 0), mbox_data, 4*count);
+		total = rc = 0;
+		// first try to write directly if we map the PS
+		if (spectx->base_private->flags & SPE_MAP_PS) 
+			total = _base_spe_in_mbox_write_ps(spectx, mbox_data, count);
+		// if none was written write via spufs
+		if (total == 0) {
+			rc = write(open_if_closed(spectx,FD_WBOX, 0), mbox_data, 4*count);
+			total = rc/4;
+		}
 		break;
 
 	case  SPE_MBOX_ANY_NONBLOCKING: // only write, if non blocking
-		rc = write(open_if_closed(spectx,FD_WBOX_NB, 0), mbox_data, 4*count);
-		if (rc == -1 && errno == EAGAIN) {
-			rc = 0;
-			errno = 0;
+		total = rc = 0;
+		// write directly if we map the PS else write via spufs
+		if (spectx->base_private->flags & SPE_MAP_PS) {
+			total = _base_spe_in_mbox_write_ps(spectx, mbox_data, count);
+		} else { 
+			rc = write(open_if_closed(spectx,FD_WBOX_NB, 0), mbox_data, 4*count);
+			if (rc == -1 && errno == EAGAIN) {
+				rc = 0;
+				errno = 0;
+			}
+			total = rc/4;
 		}
-		total = rc;
 		break;
 
 	default:
@@ -105,16 +179,24 @@ int _base_spe_in_mbox_write(spe_context_
 		return -1;
 	}
 
-	return total / 4;
+	return total;
 }
 
 int _base_spe_in_mbox_status(spe_context_ptr_t spectx)
 {
 	int rc, ret;
+	volatile struct spe_spu_control_area *cntl_area =
+        	spectx->base_private->cntl_mmap_base;
 
-	rc = read(open_if_closed(spectx,FD_WBOX_STAT, 0), &ret, 4);
-	if (rc != 4)
-		ret = -1;
+	if (spectx->base_private->flags & SPE_MAP_PS) {
+		_base_spe_context_lock(spectx, FD_WBOX_STAT);
+		ret = (cntl_area->SPU_Mbox_Stat >> 8) & 0xFF;
+		_base_spe_context_unlock(spectx, FD_WBOX_STAT);
+	} else {
+		rc = read(open_if_closed(spectx,FD_WBOX_STAT, 0), &ret, 4);
+		if (rc != 4)
+			ret = -1;
+	}
 
 	return ret;
 	
@@ -123,10 +205,18 @@ int _base_spe_in_mbox_status(spe_context
 int _base_spe_out_mbox_status(spe_context_ptr_t spectx)
 {
         int rc, ret;
+	volatile struct spe_spu_control_area *cntl_area =
+        	spectx->base_private->cntl_mmap_base;
 
-        rc = read(open_if_closed(spectx,FD_MBOX_STAT, 0), &ret, 4);
-        if (rc != 4)
-                ret = -1;
+	if (spectx->base_private->flags & SPE_MAP_PS) {
+		_base_spe_context_lock(spectx, FD_MBOX_STAT);
+		ret = cntl_area->SPU_Mbox_Stat & 0xFF;
+		_base_spe_context_unlock(spectx, FD_MBOX_STAT);
+	} else {
+        	rc = read(open_if_closed(spectx,FD_MBOX_STAT, 0), &ret, 4);
+	        if (rc != 4)
+        	        ret = -1;
+	}
 
         return ret;
 	
@@ -135,11 +225,19 @@ int _base_spe_out_mbox_status(spe_contex
 int _base_spe_out_intr_mbox_status(spe_context_ptr_t spectx)
 {
         int rc, ret;
+	volatile struct spe_spu_control_area *cntl_area =
+        	spectx->base_private->cntl_mmap_base;
 
-        rc = read(open_if_closed(spectx,FD_IBOX_STAT, 0), &ret, 4);
-        if (rc != 4)
-                ret = -1;
+	if (spectx->base_private->flags & SPE_MAP_PS) {
+		_base_spe_context_lock(spectx, FD_IBOX_STAT);
+		ret = (cntl_area->SPU_Mbox_Stat >> 16) & 0xFF;
+		_base_spe_context_unlock(spectx, FD_IBOX_STAT);
+	} else {
+	        rc = read(open_if_closed(spectx,FD_IBOX_STAT, 0), &ret, 4);
+	        if (rc != 4)
+        	        ret = -1;
 
+	}
         return ret;
 }
 
@@ -200,23 +298,43 @@ int _base_spe_out_intr_mbox_read(spe_con
                         unsigned int data )
 {
 	int rc;
+	volatile struct spe_spu_control_area *cntl_area =
+        	spectx->base_private->cntl_mmap_base;
+
+	if (spectx->base_private->flags & SPE_MAP_PS) {
+		if (signal_reg == SPE_SIG_NOTIFY_REG_1) {
+			spe_sig_notify_1_area_t *sig = spectx->base_private->signal1_mmap_base;
+
+			_base_spe_context_lock(spectx, FD_SIG1);
+			sig->SPU_Sig_Notify_1 = data;
+			_base_spe_context_unlock(spectx, FD_SIG1);
+		} else if (signal_reg == SPE_SIG_NOTIFY_REG_2) {
+			spe_sig_notify_2_area_t *sig = spectx->base_private->signal2_mmap_base;
+
+			_base_spe_context_lock(spectx, FD_SIG2);
+			sig->SPU_Sig_Notify_2 = data;
+			_base_spe_context_unlock(spectx, FD_SIG2);
+		}
+		rc = 0;
+	} else {
+		if (signal_reg == SPE_SIG_NOTIFY_REG_1)
+			rc = write(open_if_closed(spectx,FD_SIG1, 0), &data, 4);
+		else if (signal_reg == SPE_SIG_NOTIFY_REG_2)
+			rc = write(open_if_closed(spectx,FD_SIG2, 0), &data, 4);
+		else
+			return -1;
 		
-	if (signal_reg == SPE_SIG_NOTIFY_REG_1)
-		rc = write(open_if_closed(spectx,FD_SIG1, 0), &data, 4);
-	else if (signal_reg == SPE_SIG_NOTIFY_REG_2)
-		rc = write(open_if_closed(spectx,FD_SIG2, 0), &data, 4);
-	else
-		return -1;
+		if (rc == 4)
+			rc = 0;
 	
-	if (rc == 4)
-		rc = 0;
-
-	if (signal_reg == SPE_SIG_NOTIFY_REG_1)
-		close_if_open(spectx,FD_SIG1);
-	else if (signal_reg == SPE_SIG_NOTIFY_REG_2)
-		close_if_open(spectx,FD_SIG2);
+		if (signal_reg == SPE_SIG_NOTIFY_REG_1)
+			close_if_open(spectx,FD_SIG1);
+		else if (signal_reg == SPE_SIG_NOTIFY_REG_2)
+			close_if_open(spectx,FD_SIG2);
+	}
 
 	return rc;
 }
 
 
+

===================================================================

Best regards, 

Gerhard Stenzel, Linux on Cell Development, LTC
-------------------------------------------------------------------------------------
IBM Deutschland Entwicklung GmbH
Vorsitzender des Aufsichtsrats: Martin Jetter | Geschaeftsfuehrung: Herbert Kircher
Sitz der Gesellschaft: Boeblingen | Registergericht: Amtsgericht Stuttgart, HRB 243294




More information about the cbe-oss-dev mailing list