[Skiboot] [PATCH 1/4] platforms/astbmc/witherspoon: Rework NPU presence detection
Andrew Donnellan
andrew.donnellan at au1.ibm.com
Thu Oct 18 12:16:14 AEDT 2018
Rework NPU presence detection in preparation for supporting both NVLink and
OpenCAPI devices operating simultaneously on the same NPU.
If an OpenCAPI card is connected to GPU#0, and an NVLink GPU is connected
to GPU#1, the GPU will only receive 2 links rather than the usual 3.
The reason for this is that without the OpenCAPI card, the GPU would be
use links 3-5, connected to NPU bricks 3-5, which needs both stacks 1 and 2
to be in NVLink mode.
However, with an OpenCAPI card in the GPU#0 slot that uses links 0-1, we
need to use NPU bricks 2-3, which means stack 1 must be set in OpenCAPI
mode. As such, the GPU will be restricted to using links 4 and 5.
Signed-off-by: Andrew Donnellan <andrew.donnellan at au1.ibm.com>
---
platforms/astbmc/witherspoon.c | 83 ++++++++++++++++++++++++++++--------------
1 file changed, 55 insertions(+), 28 deletions(-)
diff --git a/platforms/astbmc/witherspoon.c b/platforms/astbmc/witherspoon.c
index f45f739fbb2c..9cb3789acbc8 100644
--- a/platforms/astbmc/witherspoon.c
+++ b/platforms/astbmc/witherspoon.c
@@ -232,7 +232,8 @@ static void witherspoon_npu2_device_detect(struct npu2 *npu)
struct dt_node *dn;
int rc;
- bool gpu0_present, gpu1_present;
+ enum npu2_dev_type gpu0_type = NPU2_DEV_TYPE_UNKNOWN;
+ enum npu2_dev_type gpu1_type = NPU2_DEV_TYPE_UNKNOWN;
if (witherspoon_type != WITHERSPOON_TYPE_REDBUD) {
prlog(PR_DEBUG, "PLAT: Setting all NPU links to NVLink, OpenCAPI only supported on Redbud\n");
@@ -261,16 +262,6 @@ static void witherspoon_npu2_device_detect(struct npu2 *npu)
return;
}
- gpu0_present = occ_get_gpu_presence(chip, 0);
- if (gpu0_present) {
- prlog(PR_DEBUG, "PLAT: Chip %d GPU#0 slot present\n", chip->id);
- }
-
- gpu1_present = occ_get_gpu_presence(chip, 1);
- if (gpu1_present) {
- prlog(PR_DEBUG, "PLAT: Chip %d GPU#1 slot present\n", chip->id);
- }
-
/* Set pins to input */
state = 0xff;
rc = i2c_request_send(i2c_port_id,
@@ -287,40 +278,76 @@ static void witherspoon_npu2_device_detect(struct npu2 *npu)
if (rc)
goto i2c_failed;
- if (gpu0_present) {
+ if (occ_get_gpu_presence(chip, 0)) {
+ prlog(PR_DEBUG, "PLAT: Chip %d GPU#0 slot present\n", chip->id);
if (state & (1 << 0)) {
prlog(PR_DEBUG, "PLAT: Chip %d GPU#0 is OpenCAPI\n",
chip->id);
- /*
- * On witherspoon, bricks 2 and 3 are connected to
- * the lanes matching links 1 and 0 in OpenCAPI mode.
- */
- set_link_details(npu, 0, 3, NPU2_DEV_TYPE_OPENCAPI);
- /* We current don't support using the second link */
- set_link_details(npu, 1, 2, NPU2_DEV_TYPE_UNKNOWN);
+ gpu0_type = NPU2_DEV_TYPE_OPENCAPI;
} else {
prlog(PR_DEBUG, "PLAT: Chip %d GPU#0 is NVLink\n",
chip->id);
- set_link_details(npu, 0, 0, NPU2_DEV_TYPE_NVLINK);
- set_link_details(npu, 1, 1, NPU2_DEV_TYPE_NVLINK);
- set_link_details(npu, 2, 2, NPU2_DEV_TYPE_NVLINK);
+ gpu0_type = NPU2_DEV_TYPE_NVLINK;
}
}
- if (gpu1_present) {
+ if (occ_get_gpu_presence(chip, 1)) {
+ prlog(PR_DEBUG, "PLAT: Chip %d GPU#1 slot present\n", chip->id);
if (state & (1 << 1)) {
prlog(PR_DEBUG, "PLAT: Chip %d GPU#1 is OpenCAPI\n",
chip->id);
- set_link_details(npu, 4, 4, NPU2_DEV_TYPE_OPENCAPI);
- /* We current don't support using the second link */
- set_link_details(npu, 5, 5, NPU2_DEV_TYPE_UNKNOWN);
+ gpu1_type = NPU2_DEV_TYPE_OPENCAPI;
} else {
prlog(PR_DEBUG, "PLAT: Chip %d GPU#1 is NVLink\n",
chip->id);
+ gpu1_type = NPU2_DEV_TYPE_NVLINK;
+ }
+ }
+
+ if (gpu0_type == NPU2_DEV_TYPE_OPENCAPI) {
+ /*
+ * On witherspoon, bricks 2 and 3 are connected to
+ * the lanes matching links 1 and 0 in OpenCAPI mode.
+ */
+ set_link_details(npu, 0, 3, NPU2_DEV_TYPE_OPENCAPI);
+ /* We currently don't support using the second link */
+ set_link_details(npu, 1, 2, NPU2_DEV_TYPE_UNKNOWN);
+ }
+
+ if (gpu0_type == NPU2_DEV_TYPE_NVLINK) {
+ set_link_details(npu, 0, 0, NPU2_DEV_TYPE_NVLINK);
+ set_link_details(npu, 1, 1, NPU2_DEV_TYPE_NVLINK);
+ set_link_details(npu, 2, 2, NPU2_DEV_TYPE_NVLINK);
+ }
+
+ if (gpu1_type == NPU2_DEV_TYPE_OPENCAPI) {
+ set_link_details(npu, 4, 4, NPU2_DEV_TYPE_OPENCAPI);
+ /* We currently don't support using the second link */
+ set_link_details(npu, 5, 5, NPU2_DEV_TYPE_UNKNOWN);
+ }
+
+ /*
+ * If an OpenCAPI card is connected to GPU#0, and an NVLink GPU is
+ * connected to GPU#1, the GPU will only receive 2 links rather than the
+ * usual 3.
+ *
+ * The reason for this is that without the OpenCAPI card, the GPU would
+ * be use links 3-5, connected to NPU bricks 3-5, which needs both
+ * stacks 1 and 2 to be in NVLink mode.
+ *
+ * However, with an OpenCAPI card in the GPU#0 slot that uses links 0-1,
+ * we need to use NPU bricks 2-3, which means stack 1 must be set in
+ * OpenCAPI mode. As such, the GPU will be restricted to using links 4
+ * and 5.
+ */
+ if (gpu1_type == NPU2_DEV_TYPE_NVLINK) {
+ if (gpu0_type == NPU2_DEV_TYPE_OPENCAPI) {
+ prlog(PR_WARNING, "PLAT: Chip %d GPU#1 will operate at reduced performance due to presence of OpenCAPI device. For optimal performance, swap device locations\n", chip->id);
+ } else {
set_link_details(npu, 3, 3, NPU2_DEV_TYPE_NVLINK);
- set_link_details(npu, 4, 4, NPU2_DEV_TYPE_NVLINK);
- set_link_details(npu, 5, 5, NPU2_DEV_TYPE_NVLINK);
}
+ set_link_details(npu, 4, 4, NPU2_DEV_TYPE_NVLINK);
+ set_link_details(npu, 5, 5, NPU2_DEV_TYPE_NVLINK);
}
return;
--
2.11.0
More information about the Skiboot
mailing list