[Skiboot] [PATCH v3 6/6] platforms/astbmc/witherspoon: Implement OpenCAPI support
Frederic Barrat
fbarrat at linux.ibm.com
Sat Sep 1 00:30:15 AEST 2018
Le 31/08/2018 à 06:16, Andrew Donnellan a écrit :
> OpenCAPI on Witherspoon is slightly more involved than on Zaius and ZZ, due
> to the OpenCAPI links using the SXM2 connectors that are used for NVLink
> GPUs.
>
> This patch adds the regular OpenCAPI platform information, and also a
> Witherspoon-specific presence detection callback that uses the previously
> added OCC GPU presence detection to figure out the device types plugged
> into each SXM2 socket.
>
> The SXM2 connectors are capable of carrying 2 OpenCAPI links, and future
> OpenCAPI devices are expected to make use of this. However, we don't yet
> support ganged links and the various implications that has for handling
> things like device reset, so for now, we only enable 1 brick per device.
>
> Signed-off-by: Andrew Donnellan <andrew.donnellan at au1.ibm.com>
> Acked-by: Reza Arbab <arbab at linux.ibm.com>
> Reviewed-by: Alistair Popple <alistair at popple.id.au>
>
> ---
> v1->v2:
> - update platform field names
>
> v2->v3:
> - refactor link type/index setting (Fred/Alistair)
> - add explanatory comment for i2c reset settings (Fred)
> ---
There's a nitpick below, but I wouldn't do a respin for it, it can fit
in your next series.
Reviewed-by: Frederic Barrat <fbarrat at linux.ibm.com>
> platforms/astbmc/witherspoon.c | 203 +++++++++++++++++++++++++++++++++-
> 1 file changed, 200 insertions(+), 3 deletions(-)
>
> diff --git a/platforms/astbmc/witherspoon.c b/platforms/astbmc/witherspoon.c
> index ce83ff9701d3..b73bf2107ae2 100644
> --- a/platforms/astbmc/witherspoon.c
> +++ b/platforms/astbmc/witherspoon.c
> @@ -28,10 +28,18 @@
> #include <pci-slot.h>
> #include <phb4.h>
> #include <npu2.h>
> +#include <occ.h>
> +#include <i2c.h>
>
> #include "astbmc.h"
> #include "ast.h"
>
> +static enum {
> + WITHERSPOON_TYPE_UNKNOWN,
> + WITHERSPOON_TYPE_SEQUOIA,
> + WITHERSPOON_TYPE_REDBUD
> +} witherspoon_type;
> +
> /*
> * HACK: Hostboot doesn't export the correct data for the system VPD EEPROM
> * for this system. So we need to work around it here.
> @@ -50,8 +58,35 @@ static void vpd_dt_fixup(void)
> }
> }
>
> +static void witherspoon_create_ocapi_i2c_bus(void)
> +{
> + struct dt_node *xscom, *i2cm, *i2c_bus;
> + prlog(PR_DEBUG, "OCAPI: Adding I2C bus device node for OCAPI reset\n");
> + dt_for_each_compatible(dt_root, xscom, "ibm,xscom") {
> + i2cm = dt_find_by_name(xscom, "i2cm at a1000");
> + if (!i2cm) {
> + prlog(PR_ERR, "OCAPI: Failed to add I2C bus device node\n");
> + continue;
> + }
> +
> + if (dt_find_by_name(i2cm, "i2c-bus at 4"))
> + continue;
> +
> + i2c_bus = dt_new_addr(i2cm, "i2c-bus", 4);
> + dt_add_property_cells(i2c_bus, "reg", 4);
> + dt_add_property_cells(i2c_bus, "bus-frequency", 0x61a80);
> + dt_add_property_strings(i2c_bus, "compatible",
> + "ibm,opal-i2c", "ibm,power8-i2c-port",
> + "ibm,power9-i2c-port");
> + }
> +}
> +
> static bool witherspoon_probe(void)
> {
> + struct dt_node *np;
> + int highest_gpu_group_id = 0;
> + int gpu_group_id;
> +
> if (!dt_node_is_compatible(dt_root, "ibm,witherspoon"))
> return false;
>
> @@ -63,6 +98,26 @@ static bool witherspoon_probe(void)
>
> vpd_dt_fixup();
>
> + witherspoon_create_ocapi_i2c_bus();
> +
> + dt_for_each_compatible(dt_root, np, "ibm,npu-link") {
> + gpu_group_id = dt_prop_get_u32(np, "ibm,npu-group-id");
> + if (gpu_group_id > highest_gpu_group_id)
> + highest_gpu_group_id = gpu_group_id;
> + };
> +
> + switch (highest_gpu_group_id) {
> + case 1:
> + witherspoon_type = WITHERSPOON_TYPE_REDBUD;
> + break;
> + case 2:
> + witherspoon_type = WITHERSPOON_TYPE_SEQUOIA;
> + break;
> + default:
> + witherspoon_type = WITHERSPOON_TYPE_UNKNOWN;
> + prlog(PR_NOTICE, "PLAT: Unknown Witherspoon variant detected\n");
> + }
> +
> return true;
> }
>
> @@ -154,14 +209,155 @@ static void witherspoon_pre_pci_fixup(void)
> phb4_pre_pci_fixup_witherspoon();
> }
>
> -static void witherspoon_npu2_device_detect(struct npu2 *npu)
> +static void set_link_details(struct npu2 *npu, uint32_t link_index,
> + uint32_t brick_index, enum npu2_dev_type type)
> {
> - /* Stub until we implement real device detection */
> + struct npu2_dev *dev = NULL;
> for (int i = 0; i < npu->total_devices; i++) {
> - npu->devices[i].type = NPU2_DEV_TYPE_NVLINK;
> + if (npu->devices[i].link_index == link_index)
> + dev = &npu->devices[i];
We might as well break here.
Fred
> + }
> + if (!dev) {
> + prlog(PR_ERR, "PLAT: Could not find NPU link index %d\n",
> + link_index);
> + return;
> + }
> + dev->brick_index = brick_index;
> + dev->type = type;
> +}
> +
> +static void witherspoon_npu2_device_detect(struct npu2 *npu)
> +{
> + struct proc_chip *chip;
> + uint8_t state;
> + uint64_t i2c_port_id = 0;
> + char port_name[17];
> + struct dt_node *dn;
> + int rc;
> +
> + bool gpu0_present, gpu1_present;
> +
> + if (witherspoon_type != WITHERSPOON_TYPE_REDBUD) {
> + prlog(PR_DEBUG, "PLAT: Setting all NPU links to NVLink, OpenCAPI only supported on Redbud\n");
> + for (int i = 0; i < npu->total_devices; i++) {
> + npu->devices[i].type = NPU2_DEV_TYPE_NVLINK;
> + }
> + return;
> + }
> + assert(npu->total_devices == 6);
> +
> + chip = get_chip(npu->chip_id);
> +
> + /* Find I2C port */
> + snprintf(port_name, sizeof(port_name), "p8_%08x_e%dp%d",
> + chip->id, platform.ocapi->i2c_engine,
> + platform.ocapi->i2c_port);
> + dt_for_each_compatible(dt_root, dn, "ibm,power9-i2c-port") {
> + if (streq(port_name, dt_prop_get(dn, "ibm,port-name"))) {
> + i2c_port_id = dt_prop_get_u32(dn, "ibm,opal-id");
> + break;
> + }
> + }
> +
> + if (!i2c_port_id) {
> + prlog(PR_ERR, "PLAT: Could not find NPU presence I2C port\n");
> + return;
> + }
> +
> + gpu0_present = occ_get_gpu_presence(chip, 0);
> + if (gpu0_present) {
> + prlog(PR_DEBUG, "PLAT: Chip %d GPU#0 slot present\n", chip->id);
> + }
> +
> + gpu1_present = occ_get_gpu_presence(chip, 1);
> + if (gpu1_present) {
> + prlog(PR_DEBUG, "PLAT: Chip %d GPU#1 slot present\n", chip->id);
> }
> +
> + /* Set pins to input */
> + state = 0xff;
> + rc = i2c_request_send(i2c_port_id,
> + platform.ocapi->i2c_presence_addr, SMBUS_WRITE, 3,
> + 1, &state, 1, 120);
> + if (rc)
> + goto i2c_failed;
> +
> + /* Read the presence value */
> + state = 0x00;
> + rc = i2c_request_send(i2c_port_id,
> + platform.ocapi->i2c_presence_addr, SMBUS_READ, 0,
> + 1, &state, 1, 120);
> + if (rc)
> + goto i2c_failed;
> +
> + if (gpu0_present) {
> + if (state & (1 << 0)) {
> + prlog(PR_DEBUG, "PLAT: Chip %d GPU#0 is OpenCAPI\n",
> + chip->id);
> + /*
> + * On witherspoon, bricks 2 and 3 are connected to
> + * the lanes matching links 1 and 0 in OpenCAPI mode.
> + */
> + set_link_details(npu, 0, 3, NPU2_DEV_TYPE_OPENCAPI);
> + /* We current don't support using the second link */
> + set_link_details(npu, 1, 2, NPU2_DEV_TYPE_UNKNOWN);
> + } else {
> + prlog(PR_DEBUG, "PLAT: Chip %d GPU#0 is NVLink\n",
> + chip->id);
> + set_link_details(npu, 0, 0, NPU2_DEV_TYPE_NVLINK);
> + set_link_details(npu, 1, 1, NPU2_DEV_TYPE_NVLINK);
> + set_link_details(npu, 2, 2, NPU2_DEV_TYPE_NVLINK);
> + }
> + }
> +
> + if (gpu1_present) {
> + if (state & (1 << 1)) {
> + prlog(PR_DEBUG, "PLAT: Chip %d GPU#1 is OpenCAPI\n",
> + chip->id);
> + set_link_details(npu, 4, 4, NPU2_DEV_TYPE_OPENCAPI);
> + /* We current don't support using the second link */
> + set_link_details(npu, 5, 5, NPU2_DEV_TYPE_UNKNOWN);
> + } else {
> + prlog(PR_DEBUG, "PLAT: Chip %d GPU#1 is NVLink\n",
> + chip->id);
> + set_link_details(npu, 3, 3, NPU2_DEV_TYPE_NVLINK);
> + set_link_details(npu, 4, 4, NPU2_DEV_TYPE_NVLINK);
> + set_link_details(npu, 5, 5, NPU2_DEV_TYPE_NVLINK);
> + }
> + }
> +
> + return;
> +
> +i2c_failed:
> + prlog(PR_ERR, "PLAT: NPU device type detection failed, rc=%d\n", rc);
> + return;
> }
>
> +const struct platform_ocapi witherspoon_ocapi = {
> + .i2c_engine = 1,
> + .i2c_port = 4,
> + .odl_phy_swap = false,
> + .i2c_reset_addr = 0x20,
> + /*
> + * Witherspoon uses SXM2 connectors, carrying 2 OCAPI links
> + * over a single connector - hence each pair of bricks shares
> + * the same pin for resets. We currently only support using
> + * bricks 3 and 4, among other reasons because we can't handle
> + * a reset on one link causing the other link to reset as
> + * well.
> + */
> + .i2c_reset_brick2 = 1 << 0,
> + .i2c_reset_brick3 = 1 << 0,
> + .i2c_reset_brick4 = 1 << 1,
> + .i2c_reset_brick5 = 1 << 1,
> + .i2c_presence_addr = 0x20,
> + /* unused, we do this in custom presence detect */
> + .i2c_presence_brick2 = 0,
> + .i2c_presence_brick3 = 0,
> + .i2c_presence_brick4 = 0,
> + .i2c_presence_brick5 = 0,
> +};
> +
> /* The only difference between these is the PCI slot handling */
>
> DECLARE_PLATFORM(witherspoon) = {
> @@ -179,5 +375,6 @@ DECLARE_PLATFORM(witherspoon) = {
> .terminate = ipmi_terminate,
>
> .pci_get_slot_info = dt_slot_get_slot_info,
> + .ocapi = &witherspoon_ocapi,
> .npu2_device_detect = witherspoon_npu2_device_detect,
> };
>
More information about the Skiboot
mailing list