[Skiboot] [PATCH 29/60] xive: Implement internal VP allocator

Benjamin Herrenschmidt benh at kernel.crashing.org
Thu Dec 22 14:16:37 AEDT 2016


We use a buddy allocator. A global one if block group mode is
used, otherwise one per XIVE instance.

It is not yet wired up to the OPAL APIs

Signed-off-by: Benjamin Herrenschmidt <benh at kernel.crashing.org>
---
 hw/xive.c | 299 ++++++++++++++++++++++++++++++++++++++++++++++++++++++--------
 1 file changed, 260 insertions(+), 39 deletions(-)

diff --git a/hw/xive.c b/hw/xive.c
index 7fcfb03..81dd5c5 100644
--- a/hw/xive.c
+++ b/hw/xive.c
@@ -22,6 +22,7 @@
 #include <interrupts.h>
 #include <timebase.h>
 #include <bitmap.h>
+#include <buddy.h>
 
 /* Use Block group mode to move chip_id into block .... */
 #define USE_BLOCK_GROUP_MODE
@@ -197,11 +198,13 @@
  * XXX Adjust that based on BAR value ?
  */
 #ifdef USE_INDIRECT
-#define MAX_VP_COUNT		(512 * 1024)
+#define MAX_VP_ORDER		19	/* 512k */
+#define MAX_VP_COUNT		(1ul << MAX_VP_ORDER)
 #define VP_PER_PAGE		(0x10000 / 64) // Use sizeof ?
 #define IND_VP_TABLE_SIZE	((MAX_VP_COUNT / VP_PER_PAGE) * 8)
 #else
-#define MAX_VP_COUNT		(4 * 1024)
+#define MAX_VP_ORDER		12	/* 4k */
+#define MAX_VP_COUNT		(1ul << MAX_VP_ORDER)
 #define VPT_SIZE		(MAX_VP_COUNT * 64)
 #endif
 
@@ -217,7 +220,7 @@
 
 /* Initial number of VPs on block 0 only */
 #define INITIAL_BLK0_VP_BASE	0x800
-#define INITIAL_BLK0_VP_COUNT	(2 * 1024)
+#define INITIAL_BLK0_VP_COUNT  	0x800
 
 #endif
 
@@ -344,6 +347,12 @@ struct xive {
 #else
 	void		*vp_base;
 #endif
+
+#ifndef USE_BLOCK_GROUP_MODE
+	/* VP allocation buddy when not using block group mode */
+	struct buddy	*vp_buddy;
+#endif
+
 #ifdef USE_INDIRECT
 	/* Pool of donated pages for provisioning indirect EQ and VP pages */
 	struct list_head donated_pages;
@@ -464,6 +473,8 @@ static uint32_t xive_block_count;
 
 /* VP allocation */
 static uint32_t xive_chips_alloc_bits = 0;
+struct buddy *xive_vp_buddy;
+struct lock xive_buddy_lock = LOCK_UNLOCKED;
 
 /* VP# decoding/encoding */
 static bool xive_decode_vp(uint32_t vp, uint32_t *blk, uint32_t *idx,
@@ -753,7 +764,7 @@ static struct xive_vp *xive_get_vp(struct xive *x, unsigned int idx)
 }
 
 static void xive_init_vp(struct xive *x __unused, struct xive_vp *vp,
-			 uint32_t eq_blk, uint32_t eq_idx)
+			 uint32_t eq_blk, uint32_t eq_idx, bool valid)
 {
 	/* Stash the EQ base in the pressure relief interrupt field
 	 * and set the ACK# to 0xff to disable pressure relief interrupts
@@ -763,12 +774,13 @@ static void xive_init_vp(struct xive *x __unused, struct xive_vp *vp,
 	lwsync();
 
 	/* XXX TODO: Look at the special cache line stuff */
-	vp->w0 = VP_W0_VALID;
+	if (valid)
+		vp->w0 = VP_W0_VALID;
 }
 
-static void xive_init_eq(uint32_t vp_blk, uint32_t vp_idx,
-			 struct xive_eq *eq, void *backing_page,
-			 uint8_t prio)
+static void xive_init_default_eq(uint32_t vp_blk, uint32_t vp_idx,
+				 struct xive_eq *eq, void *backing_page,
+				 uint8_t prio)
 {
 	eq->w1 = EQ_W1_GENERATION;
 	eq->w3 = ((uint64_t)backing_page) & 0xffffffff;
@@ -872,6 +884,215 @@ static uint32_t xive_alloc_eq_set(struct xive *x, bool alloc_indirect __unused)
 	return idx;
 }
 
+#ifdef USE_INDIRECT
+static bool xive_provision_vp_ind(struct xive *x, uint32_t vp_idx, uint32_t order)
+{
+	uint32_t pbase, pend, i;
+
+	pbase = vp_idx / VP_PER_PAGE;
+	pend  = (vp_idx + (1 << order)) / VP_PER_PAGE;
+
+	for (i = pbase; i <= pend; i++) {
+		void *page;
+
+		/* Already provisioned ? */
+		if (x->vp_ind_base[i])
+			continue;
+
+		/* Try to grab a donated page */
+		page = xive_get_donated_page(x);
+		if (!page)
+			return false;
+
+		/* Install the page */
+		memset(page, 0, 0x10000);
+		x->vp_ind_base[i] = ((uint64_t)page) & VSD_ADDRESS_MASK;
+		x->vp_ind_base[i] |= SETFIELD(VSD_TSIZE, 0ull, 4);
+		x->vp_ind_base[i] |= SETFIELD(VSD_MODE, 0ull, VSD_MODE_EXCLUSIVE);
+	}
+	return true;
+}
+#else
+static inline bool xive_provision_vp_ind(struct xive *x __unused,
+					 uint32_t vp_idx __unused,
+					 uint32_t order __unused)
+{
+	return true;
+}
+#endif /* USE_INDIRECT */
+
+#ifdef USE_BLOCK_GROUP_MODE
+
+static void xive_init_vp_allocator(void)
+{
+	/* Initialize chip alloc bits */
+	xive_chips_alloc_bits = ilog2(xive_block_count);
+
+	prlog(PR_INFO, "XIVE: %d chips considered for VP allocations\n",
+	      1 << xive_chips_alloc_bits);
+
+	/* Allocate a buddy big enough for MAX_VP_ORDER allocations.
+	 *
+	 * each bit in the buddy represents 1 << xive_chips_alloc_bits
+	 * VPs.
+	 */
+	xive_vp_buddy = buddy_create(MAX_VP_ORDER);
+	assert(xive_vp_buddy);
+
+	/* We reserve the whole range of VPs representing HW chips.
+	 *
+	 * These are 0x80..0xff, so order 7 starting at 0x80. This will
+	 * reserve that range on each chip.
+	 *
+	 * XXX This can go away if we just call xive_reset ..
+	 */
+	assert(buddy_reserve(xive_vp_buddy, 0x80, 7));
+}
+
+static uint32_t xive_alloc_vps(uint32_t order)
+{
+	uint32_t local_order, i;
+	int vp;
+
+	/* The minimum order is 2 VPs per chip */
+	if (order < (xive_chips_alloc_bits + 1))
+		order = xive_chips_alloc_bits + 1;
+
+	/* We split the allocation */
+	local_order = order - xive_chips_alloc_bits;
+
+	/* We grab that in the global buddy */
+	assert(xive_vp_buddy);
+	lock(&xive_buddy_lock);
+	vp = buddy_alloc(xive_vp_buddy, local_order);
+	unlock(&xive_buddy_lock);
+	if (vp < 0)
+		return XIVE_ALLOC_NO_SPACE;
+
+	/* Provision on every chip considered for allocation */
+	for (i = 0; i < (1 << xive_chips_alloc_bits); i++) {
+		struct xive *x = xive_from_pc_blk(i);
+		bool success;
+
+		/* Return internal error & log rather than assert ? */
+		assert(x);
+		lock(&x->lock);
+		success = xive_provision_vp_ind(x, vp, local_order);
+		unlock(&x->lock);
+		if (!success) {
+			lock(&xive_buddy_lock);
+			buddy_free(xive_vp_buddy, vp, local_order);
+			unlock(&xive_buddy_lock);
+			return XIVE_ALLOC_NO_IND;
+		}
+	}
+
+	/* Encode the VP number. "blk" is 0 as this represents
+	 * all blocks and the allocation always starts at 0
+	 */
+	return xive_encode_vp(0, vp, order);
+}
+
+static void xive_free_vps(uint32_t vp)
+{
+	uint32_t idx;
+	uint8_t order, local_order;
+
+	assert(xive_decode_vp(vp, NULL, &idx, &order, NULL));
+
+	/* We split the allocation */
+	local_order = order - xive_chips_alloc_bits;
+
+	/* Free that in the buddy */
+	lock(&xive_buddy_lock);
+	buddy_free(xive_vp_buddy, idx, local_order);
+	unlock(&xive_buddy_lock);
+}
+
+#else /* USE_BLOCK_GROUP_MODE */
+
+static void xive_init_vp_allocator(void)
+{
+	struct proc_chip *chip;
+
+	for_each_chip(chip) {
+		struct xive *x = chip->xive;
+		if (!x)
+			continue;
+		/* Each chip has a MAX_VP_ORDER buddy */
+		x->vp_buddy = buddy_create(MAX_VP_ORDER);
+		assert(x->vp_buddy);
+
+		/* We reserve the whole range of VPs representing HW chips.
+		 *
+		 * These are 0x800..0xfff on block 0 only, so order 11
+		 * starting at 0x800.
+		 */
+		if (x->block_id == 0)
+			assert(buddy_reserve(x->vp_buddy, 0x800, 11));
+	}
+}
+
+static uint32_t xive_alloc_vps(uint32_t order)
+{
+	struct proc_chip *chip;
+	struct xive *x = NULL;
+	int vp = -1;
+
+	/* Minimum order is 1 */
+	if (order < 1)
+		order = 1;
+
+	/* Try on every chip */
+	for_each_chip(chip) {
+		x = chip->xive;
+		if (!x)
+			continue;
+		assert(x->vp_buddy);
+		lock(&x->lock);
+		vp = buddy_alloc(x->vp_buddy, order);
+		unlock(&x->lock);
+		if (vp >= 0)
+			break;
+	}
+	if (vp < 0)
+		return XIVE_ALLOC_NO_SPACE;
+
+	/* We have VPs, make sure we have backing for the
+	 * NVTs on that block
+	 */
+	if (!xive_provision_vp_ind(x, vp, order)) {
+		lock(&x->lock);
+		buddy_free(x->vp_buddy, vp, order);
+		unlock(&x->lock);
+		return XIVE_ALLOC_NO_IND;
+	}
+
+	/* Encode the VP number */
+	return xive_encode_vp(x->block_id, vp, order);
+}
+
+static void xive_free_vps(uint32_t vp)
+{
+	uint32_t idx, blk;
+	uint8_t order;
+	struct xive *x;
+
+	assert(xive_decode_vp(vp, &blk, &idx, &order, NULL));
+
+	/* Grab appropriate xive */
+	x = xive_from_pc_blk(blk);
+	/* XXX Return error instead ? */
+	assert(x);
+
+	/* Free that in the buddy */
+	lock(&x->lock);
+	buddy_free(x->vp_buddy, idx, order);
+	unlock(&x->lock);
+}
+
+#endif /* ndef USE_BLOCK_GROUP_MODE */
+
 #if 0 /* Not used yet. This will be used to kill the cache
        * of indirect VSDs
        */
@@ -1401,8 +1622,8 @@ static bool xive_setup_set_xlate(struct xive *x)
 
 static bool xive_prealloc_tables(struct xive *x)
 {
-	unsigned int i, vp_init_count, vp_init_base;
-	unsigned int pbase __unused, pend __unused;
+	uint32_t i __unused, vp_init_count __unused, vp_init_base __unused;
+	uint32_t pbase __unused, pend __unused;
 	uint64_t al __unused;
 
 	/* ESB/SBE has 4 entries per byte */
@@ -1450,29 +1671,7 @@ static bool xive_prealloc_tables(struct xive *x)
 	x->vp_ind_count = IND_VP_TABLE_SIZE / 8;
 	memset(x->vp_ind_base, 0, al);
 
-#else /* USE_INDIRECT */
-
-	x->eq_base = local_alloc(x->chip_id, EQT_SIZE, EQT_SIZE);
-	if (!x->eq_base) {
-		xive_err(x, "Failed to allocate EQ table\n");
-		return false;
-	}
-	memset(x->eq_base, 0, EQT_SIZE);
-
-	/* EAS/IVT entries are 8 bytes */
-	x->vp_base = local_alloc(x->chip_id, VPT_SIZE, VPT_SIZE);
-	if (!x->vp_base) {
-		xive_err(x, "Failed to allocate VP table\n");
-		return false;
-	}
-	/* We clear the entries (non-valid). They will be initialized
-	 * when actually used
-	 */
-	memset(x->vp_base, 0, VPT_SIZE);
-
-#endif /* USE_INDIRECT */
-
-	/* Populate/initialize VP/EQs */
+	/* Populate/initialize VP/EQs indirect backing */
 #ifdef USE_BLOCK_GROUP_MODE
 	vp_init_count = INITIAL_VP_COUNT;
 	vp_init_base = INITIAL_VP_BASE;
@@ -1481,12 +1680,12 @@ static bool xive_prealloc_tables(struct xive *x)
 	vp_init_base = INITIAL_BLK0_VP_BASE;
 #endif
 
-#ifdef USE_INDIRECT
 	/* Allocate pages for some VPs in indirect mode */
 	pbase = vp_init_base / VP_PER_PAGE;
 	pend  = (vp_init_base + vp_init_count) / VP_PER_PAGE;
+
 	xive_dbg(x, "Allocating pages %d to %d of VPs (for %d VPs)\n",
-		 pbase, pend, INITIAL_VP_COUNT);
+		 pbase, pend, vp_init_count);
 	for (i = pbase; i <= pend; i++) {
 		void *page;
 
@@ -1502,6 +1701,25 @@ static bool xive_prealloc_tables(struct xive *x)
 		x->vp_ind_base[i] |= SETFIELD(VSD_TSIZE, 0ull, 4);
 		x->vp_ind_base[i] |= SETFIELD(VSD_MODE, 0ull, VSD_MODE_EXCLUSIVE);
 	}
+
+#else /* USE_INDIRECT */
+
+	/* Allocate direct EQ and VP tables */
+	x->eq_base = local_alloc(x->chip_id, EQT_SIZE, EQT_SIZE);
+	if (!x->eq_base) {
+		xive_err(x, "Failed to allocate EQ table\n");
+		return false;
+	}
+	memset(x->eq_base, 0, EQT_SIZE);
+	x->vp_base = local_alloc(x->chip_id, VPT_SIZE, VPT_SIZE);
+	if (!x->vp_base) {
+		xive_err(x, "Failed to allocate VP table\n");
+		return false;
+	}
+	/* We clear the entries (non-valid). They will be initialized
+	 * when actually used
+	 */
+	memset(x->vp_base, 0, VPT_SIZE);
 #endif /* USE_INDIRECT */
 
 	return true;
@@ -2329,8 +2547,8 @@ static void xive_init_cpu_defaults(struct xive_cpu_state *xs)
 	assert(x_eq);
 
 	/* Initialize the structure */
-	xive_init_eq(xs->vp_blk, xs->vp_idx, &eq,
-		     xs->eq_page, XIVE_EMULATION_PRIO);
+	xive_init_default_eq(xs->vp_blk, xs->vp_idx, &eq,
+			     xs->eq_page, XIVE_EMULATION_PRIO);
 
 	/* Use the cache watch to write it out */
 	xive_eqc_cache_update(x_eq, xs->eq_blk,
@@ -2338,7 +2556,7 @@ static void xive_init_cpu_defaults(struct xive_cpu_state *xs)
 			      0, 4, &eq, false, true);
 
 	/* Initialize/enable the VP */
-	xive_init_vp(x_vp, &vp, xs->eq_blk, xs->eq_idx);
+	xive_init_vp(x_vp, &vp, xs->eq_blk, xs->eq_idx, true);
 
 	/* Use the cache watch to write it out */
 	xive_vpc_cache_update(x_vp, xs->vp_blk, xs->vp_idx,
@@ -2350,7 +2568,7 @@ static void xive_provision_cpu(struct xive_cpu_state *xs, struct cpu_thread *c)
 	struct xive *x;
 	void *p;
 
-	/* For now VPs are pre-allocated */
+	/* Physical VPs are pre-allocated */
 	xs->vp_blk = PIR2VP_BLK(c->pir);
 	xs->vp_idx = PIR2VP_IDX(c->pir);
 
@@ -3072,6 +3290,9 @@ void init_xive(void)
 		init_one_xive(np);
 	}
 
+	/* Init VP allocator */
+	xive_init_vp_allocator();
+
 	/* Some inits must be done after all xive have been created
 	 * such as setting up the forwarding ports
 	 */
-- 
2.9.3



More information about the Skiboot mailing list