[Skiboot] [PATCH v5 07/15] core/pci: PCI slot management core

Gavin Shan gwshan at linux.vnet.ibm.com
Mon Apr 27 16:26:25 AEST 2015


The patch implements PCI slot core for unified PCI slot management.
Each PCIE bridge port or PHB is expected to be bound with PCI slot
instance, which in turn manages various states (e.g. power, link,
reset) states and accomodate state change request from OS.

With "struct pci_slot" introduced to support PCI slot instances, and
PCI slot states introduced to manage its power/link/reset states when
handling the request from upper OS. The more important, the methods
to manage the states allow to be overrided by individual platforms
to make their own implementation there.

Besides, when power supply to one particular hotpluggalbe slot changes,
the slot is expected to be cut off or rescanned. The patch adds helper
functon in pci.c for that purpose as well.

Signed-off-by: Gavin Shan <gwshan at linux.vnet.ibm.com>
---
 core/Makefile.inc         |   2 +-
 core/pci-slot.c           | 912 ++++++++++++++++++++++++++++++++++++++++++++++
 core/pci.c                | 159 ++++++--
 include/opal-api.h        |   1 +
 include/pci-slot.h        | 251 +++++++++++++
 include/pci.h             |  65 +---
 include/platform.h        |   3 +-
 platforms/ibm-fsp/lxvpd.c |   1 +
 8 files changed, 1310 insertions(+), 84 deletions(-)
 create mode 100644 core/pci-slot.c
 create mode 100644 include/pci-slot.h

diff --git a/core/Makefile.inc b/core/Makefile.inc
index 5f5fe04..905543c 100644
--- a/core/Makefile.inc
+++ b/core/Makefile.inc
@@ -3,7 +3,7 @@
 SUBDIRS += core
 CORE_OBJS = relocate.o console.o stack.o init.o chip.o mem_region.o
 CORE_OBJS += malloc.o lock.o cpu.o utils.o fdt.o opal.o interrupts.o
-CORE_OBJS += timebase.o opal-msg.o pci.o pci-opal.o fast-reboot.o
+CORE_OBJS += timebase.o opal-msg.o pci.o pci-slot.o pci-opal.o fast-reboot.o
 CORE_OBJS += device.o exceptions.o trace.o affinity.o vpd.o
 CORE_OBJS += hostservices.o platform.o nvram.o hmi.o
 CORE_OBJS += console-log.o ipmi.o time-utils.o pel.o pool.o errorlog.o
diff --git a/core/pci-slot.c b/core/pci-slot.c
new file mode 100644
index 0000000..f985820
--- /dev/null
+++ b/core/pci-slot.c
@@ -0,0 +1,912 @@
+/* Copyright 2013-2015 IBM Corp.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+ * implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <skiboot.h>
+#include <opal-msg.h>
+#include <pci-cfg.h>
+#include <pci.h>
+#include <pci-slot.h>
+
+/* Debugging options */
+#define PCI_SLOT_DBG(s, fmt, a...)	prlog(PR_DEBUG, "PCI-SLOT-%016llx " fmt, \
+					      (s)->id, ##a)
+
+static int64_t pcie_slot_get_presence_status(struct pci_slot *slot,
+					     uint8_t *val)
+{
+	struct phb *phb = slot->phb;
+	struct pci_device *pd = slot->pd;
+	uint32_t ecap;
+	int16_t slot_cap, slot_sts;
+
+	/*
+	 * Upstream ports always have downstream ports connected.
+	 * The presence bit of slot state is usually zero. Lets
+	 * have hard-code for it.
+	 */
+	if (pd->dev_type == PCIE_TYPE_SWITCH_UPPORT) {
+		*val = 1;
+		return OPAL_SUCCESS;
+	}
+
+	/*
+	 * If downstream port doesn't support slot capability,
+	 * we need hardcode it as "presence" according PCIE
+	 * spec.
+	 *
+	 * Note that the power should be supplied to the slot
+	 * before detecting the presence bit.
+	 */
+	ecap = pci_cap(pd, PCI_CFG_CAP_ID_EXP, false);
+	pci_cfg_read16(phb, pd->bdfn,
+		       ecap + PCICAP_EXP_CAPABILITY_REG, &slot_cap);
+	if (pd->dev_type == PCIE_TYPE_SWITCH_DNPORT &&
+	    !(slot_cap & PCICAP_EXP_CAP_SLOT)) {
+		*val = 1;
+		return OPAL_SUCCESS;
+	}
+
+	/* Check presence bit */
+	*val = 0;
+	pci_cfg_read16(phb, pd->bdfn,
+		       ecap + PCICAP_EXP_SLOTSTAT, &slot_sts);
+	if (slot_sts & PCICAP_EXP_SLOTSTAT_PDETECTST)
+		*val = 1;
+
+	return OPAL_SUCCESS;
+}
+
+static int64_t pcie_slot_get_link_status(struct pci_slot *slot,
+					 uint8_t *val)
+{
+	struct phb *phb = slot->phb;
+	struct pci_device *pd = slot->pd;
+	uint32_t ecap;
+	int16_t lstat;
+
+	/*
+	 * Upstream port doesn't have valid link indicator in
+	 * data-link status. Lets hardcode it as link-up as
+	 * there're always downtream ports.
+	 */
+	if (pd->dev_type == PCIE_TYPE_SWITCH_UPPORT) {
+		*val = 1;
+		return OPAL_SUCCESS;
+	}
+
+	ecap = pci_cap(pd, PCI_CFG_CAP_ID_EXP, false);
+	pci_cfg_read16(phb, pd->bdfn,
+		       ecap + PCICAP_EXP_LSTAT, &lstat);
+	if (!(lstat & PCICAP_EXP_LSTAT_DLLL_ACT)) {
+		*val = 0;
+		return OPAL_SUCCESS;
+	}
+
+	*val = GETFIELD(PCICAP_EXP_LSTAT_WIDTH, lstat);
+	return OPAL_SUCCESS;
+}
+
+static int64_t pcie_slot_get_power_status(struct pci_slot *slot,
+					  uint8_t *val)
+{
+	struct phb *phb = slot->phb;
+	struct pci_device *pd = slot->pd;
+	uint32_t ecap;
+	uint16_t slot_ctl;
+
+	/*
+	 * If we don't have power control functionality, the
+	 * power is always on.
+	 */
+	ecap = pci_cap(pd, PCI_CFG_CAP_ID_EXP, false);
+	if (!(slot->slot_cap & PCICAP_EXP_SLOTCAP_PWCTRL)) {
+		*val = PCI_SLOT_POWER_ON;
+		return OPAL_SUCCESS;
+	}
+
+	/* Check power supply bit */
+	*val = PCI_SLOT_POWER_OFF;
+	pci_cfg_read16(phb, pd->bdfn,
+		       ecap + PCICAP_EXP_SLOTCTL, &slot_ctl);
+	if (!(slot_ctl & PCICAP_EXP_SLOTCTL_PWRCTLR))
+		*val = PCI_SLOT_POWER_ON;
+
+	return OPAL_SUCCESS;
+}
+
+static int64_t pcie_slot_get_attention_status(struct pci_slot *slot,
+					      uint8_t *val)
+{
+	struct phb *phb = slot->phb;
+	struct pci_device *pd = slot->pd;
+	uint32_t ecap;
+	uint16_t slot_ctl;
+
+	/*
+	 * If the slot doesn't support attention capability,
+	 * it's always off.
+	 */
+	ecap = pci_cap(pd, PCI_CFG_CAP_ID_EXP, false);
+	if (!(slot->slot_cap & PCICAP_EXP_SLOTCAP_ATTNI)) {
+		*val = 0;
+		return OPAL_SUCCESS;
+	}
+
+	/* Check the attention bits */
+	pci_cfg_read16(phb, pd->bdfn,
+		       ecap + PCICAP_EXP_SLOTCTL, &slot_ctl);
+	switch (GETFIELD(PCICAP_EXP_SLOTCTL_ATTNI, slot_ctl)) {
+	case PCIE_INDIC_ON:
+	case PCIE_INDIC_BLINK:
+		*val = GETFIELD(PCICAP_EXP_SLOTCTL_ATTNI, slot_ctl);
+		break;
+	case PCIE_INDIC_OFF:
+	default:
+		*val = 0;
+	}
+
+	return OPAL_SUCCESS;
+}
+
+static int64_t pcie_slot_get_latch_status(struct pci_slot *slot,
+					  uint8_t *val)
+{
+	struct phb *phb = slot->phb;
+	struct pci_device *pd = slot->pd;
+	uint32_t ecap;
+	uint16_t slot_sts;
+
+	/*
+	 * If MRL sensor isn't supported, its state
+	 * is always off.
+	 */
+	ecap = pci_cap(pd, PCI_CFG_CAP_ID_EXP, false);
+	if (!(slot->slot_cap & PCICAP_EXP_SLOTCAP_MRLSENS)) {
+		*val = 0;
+		return OPAL_SUCCESS;
+	}
+
+	/* Check state of MRL sensor */
+	*val = 0;
+	pci_cfg_read16(phb, pd->bdfn,
+		       ecap + PCICAP_EXP_SLOTSTAT, &slot_sts);
+	if (slot_sts & PCICAP_EXP_SLOTSTAT_MRLSENSST)
+		*val = 1;
+
+	return OPAL_SUCCESS;
+}
+
+static int64_t pcie_slot_set_attention_status(struct pci_slot *slot,
+					      uint8_t val)
+{
+	struct phb *phb = slot->phb;
+	struct pci_device *pd = slot->pd;
+	uint32_t ecap;
+	uint16_t slot_ctl;
+
+	/*
+	 * Drop the request if the slot doesn't support
+	 * attention capability
+	 */
+	ecap = pci_cap(pd, PCI_CFG_CAP_ID_EXP, false);
+	if (!(slot->slot_cap & PCICAP_EXP_SLOTCAP_ATTNI))
+		return OPAL_SUCCESS;
+
+	/* Write the attention bits */
+	pci_cfg_read16(phb, pd->bdfn,
+		       ecap + PCICAP_EXP_SLOTCTL, &slot_ctl);
+	switch (val) {
+	case PCI_SLOT_ATTN_LED_OFF:
+		slot_ctl = SETFIELD(PCICAP_EXP_SLOTCTL_ATTNI,
+				    slot_ctl, PCIE_INDIC_OFF);
+		break;
+	case PCI_SLOT_ATTN_LED_ON:
+		slot_ctl = SETFIELD(PCICAP_EXP_SLOTCTL_ATTNI,
+				    slot_ctl, PCIE_INDIC_ON);
+		break;
+	case PCI_SLOT_ATTN_LED_BLINK:
+		slot_ctl = SETFIELD(PCICAP_EXP_SLOTCTL_ATTNI,
+				    slot_ctl, PCIE_INDIC_BLINK);
+		break;
+	}
+
+	pci_cfg_write16(phb, pd->bdfn, ecap + PCICAP_EXP_SLOTCTL,
+			slot_ctl);
+	return OPAL_SUCCESS;
+}
+
+static int64_t pcie_slot_set_power_status(struct pci_slot *slot,
+					  uint8_t val)
+{
+	struct phb *phb = slot->phb;
+	struct pci_device *pd = slot->pd;
+	uint32_t ecap;
+	uint16_t slot_ctl;
+
+	/*
+	 * Drop the request if the slot doesn't support
+	 * power control functionality.
+	 */
+	ecap = pci_cap(pd, PCI_CFG_CAP_ID_EXP, false);
+	if (!(slot->slot_cap & PCICAP_EXP_SLOTCAP_PWCTRL))
+		return OPAL_SUCCESS;
+
+	/* Set power supply bits */
+	pci_cfg_read16(phb, pd->bdfn,
+		       ecap + PCICAP_EXP_SLOTCTL, &slot_ctl);
+	switch (val) {
+	case PCI_SLOT_POWER_OFF:
+		slot_ctl |= PCICAP_EXP_SLOTCTL_PWRCTLR;
+		slot_ctl = SETFIELD(PCICAP_EXP_SLOTCTL_PWRI, slot_ctl,
+				    PCIE_INDIC_OFF);
+		break;
+	case PCI_SLOT_POWER_ON:
+		slot_ctl &= ~PCICAP_EXP_SLOTCTL_PWRCTLR;
+		slot_ctl = SETFIELD(PCICAP_EXP_SLOTCTL_PWRI, slot_ctl,
+				    PCIE_INDIC_ON);
+		break;
+	}
+
+	pci_cfg_write16(phb, pd->bdfn,
+			ecap + PCICAP_EXP_SLOTCTL, slot_ctl);
+	return OPAL_SUCCESS;
+}
+
+static void pcie_slot_prepare_link_change(struct pci_slot *slot,
+					  bool is_up)
+{
+	struct phb *phb = slot->phb;
+	struct pci_device *pd = slot->pd;
+	uint32_t aercap, mask;
+
+	/* Nothing to do without AER capability */
+	if (!pci_has_cap(pd, PCIECAP_ID_AER, true))
+		return;
+
+	/* Link down event */
+	aercap = pci_cap(pd, PCIECAP_ID_AER, true);
+	pci_cfg_read32(phb, pd->bdfn,
+		       aercap + PCIECAP_AER_UE_MASK, &mask);
+	if (is_up)
+		mask &= ~PCIECAP_AER_UE_MASK_SURPRISE_DOWN;
+	else
+		mask |= PCIECAP_AER_UE_MASK_SURPRISE_DOWN;
+	pci_cfg_write32(phb, pd->bdfn,
+			aercap + PCIECAP_AER_UE_MASK, mask);
+
+	/* Receiver error */
+	pci_cfg_read32(phb, pd->bdfn,
+		       aercap + PCIECAP_AER_CE_MASK, &mask);
+	if (is_up)
+		mask &= ~PCIECAP_AER_CE_RECVR_ERR;
+	else
+		mask |= PCIECAP_AER_CE_RECVR_ERR;
+	pci_cfg_write32(phb, pd->bdfn,
+			aercap + PCIECAP_AER_CE_MASK, mask);
+
+	/*
+	 * If it's link up event, we just had hot or fundamental
+	 * reset. The bus numbers for downstream bridges should
+	 * be restored for the case. And the downstream devices
+	 * should be reconfigured. However, we should restore the
+	 * bus numbers first of all.
+	 */
+	if (is_up) {
+		pci_restore_bridge_buses(phb, pd);
+		pci_device_init(phb, pd);
+	}
+}
+
+static int64_t pcie_slot_sm_poll_link(struct pci_slot *slot)
+{
+	struct phb *phb = slot->phb;
+	struct pci_device *pd = slot->pd;
+	uint32_t ecap;
+	uint16_t val;
+	uint8_t presence = 0;
+
+	switch (slot->state) {
+	case PCI_SLOT_STATE_LINK_START_POLL:
+		PCI_SLOT_DBG(slot, "LINK: Start polling\n");
+
+		/*
+		 * The link won't come up for ever if the slot
+		 * doesn't have an connected adapter
+		 */
+		if (slot->ops.get_presence_status)
+			slot->ops.get_presence_status(slot, &presence);
+		if (!presence) {
+			PCI_SLOT_DBG(slot, "LINK: No adapter, end polling\n");
+			if (slot->ops.prepare_link_change)
+				slot->ops.prepare_link_change(slot, true);
+			pci_slot_set_state(slot, PCI_SLOT_STATE_NORMAL);
+			return OPAL_SUCCESS;
+		}
+
+		/* Enable the link without check */
+		ecap = pci_cap(pd, PCI_CFG_CAP_ID_EXP, false);
+		pci_cfg_read16(phb, pd->bdfn,
+			       ecap + PCICAP_EXP_LCTL, &val);
+		val &= ~PCICAP_EXP_LCTL_LINK_DIS;
+		pci_cfg_write16(phb, pd->bdfn,
+				ecap + PCICAP_EXP_LCTL, val);
+
+		/*
+		 * If the slot doesn't support link change report
+		 * capability, we assume the link state is finalized
+		 * after 1 second.
+		 */
+		if (!(slot->link_cap & PCICAP_EXP_LCAP_DL_ACT_REP)) {
+			pci_slot_set_state(slot, PCI_SLOT_STATE_LINK_DELAY_FINALIZED);
+			return pci_slot_set_sm_timeout(slot, secs_to_tb(1));
+		}
+
+		/*
+		 * If the slot supports reporting link state change,
+		 * we need poll it in determined interval and it's
+		 * timeout would be 5s.
+		 */
+		pci_slot_set_state(slot, PCI_SLOT_STATE_LINK_POLLING);
+		slot->retries = 250;
+		return pci_slot_set_sm_timeout(slot, msecs_to_tb(20));
+	case PCI_SLOT_STATE_LINK_DELAY_FINALIZED:
+		/*
+		 * We assume the link state is finalized after
+		 * determined time delay.
+		 */
+		PCI_SLOT_DBG(slot, "LINK: No link report, end polling\n");
+		if (slot->ops.prepare_link_change)
+			slot->ops.prepare_link_change(slot, true);
+		pci_slot_set_state(slot, PCI_SLOT_STATE_NORMAL);
+		return OPAL_SUCCESS;
+	case PCI_SLOT_STATE_LINK_POLLING:
+		ecap = pci_cap(pd, PCI_CFG_CAP_ID_EXP, false);
+		pci_cfg_read16(phb, pd->bdfn,
+			       ecap + PCICAP_EXP_LSTAT, &val);
+		if (val & PCICAP_EXP_LSTAT_DLLL_ACT) {
+			PCI_SLOT_DBG(slot, "LINK: Link is up, end polling\n");
+			if (slot->ops.prepare_link_change)
+				slot->ops.prepare_link_change(slot, true);
+			pci_slot_set_state(slot, PCI_SLOT_STATE_NORMAL);
+			return OPAL_SUCCESS;
+		}
+
+		/* Check link state again until timeout */
+		if (slot->retries-- == 0) {
+			PCI_SLOT_DBG(slot, "LINK: Timeout waiting for up (%04x)\n",
+				     val);
+			if (slot->ops.prepare_link_change)
+				slot->ops.prepare_link_change(slot, true);
+			pci_slot_set_state(slot, PCI_SLOT_STATE_NORMAL);
+			return OPAL_SUCCESS;
+		}
+
+		return pci_slot_set_sm_timeout(slot, msecs_to_tb(20));
+	default:
+		PCI_SLOT_DBG(slot, "LINK: Unexpected slot state %08x\n",
+			     slot->state);
+	}
+
+	pci_slot_set_state(slot, PCI_SLOT_STATE_NORMAL);
+	return OPAL_HARDWARE;
+}
+
+/* Reset the secondary bus under the specified slot */
+static void pcie_slot_reset(struct pci_slot *slot, bool assert)
+{
+	struct phb *phb = slot->phb;
+	struct pci_device *pd = slot->pd;
+	uint16_t ctl;
+
+	pci_cfg_read16(phb, pd->bdfn, PCI_CFG_BRCTL, &ctl);
+	if (assert)
+		ctl |= PCI_CFG_BRCTL_SECONDARY_RESET;
+	else
+		ctl &= ~PCI_CFG_BRCTL_SECONDARY_RESET;
+	pci_cfg_write16(phb, pd->bdfn, PCI_CFG_BRCTL, ctl);
+}
+
+static int64_t pcie_slot_sm_hreset(struct pci_slot *slot)
+{
+	/*
+	 * After power-on in fundamental reset, we will
+	 * switch to hot reset. The case is coverred by
+	 */
+	switch (slot->state) {
+	case PCI_SLOT_STATE_NORMAL:
+		PCI_SLOT_DBG(slot, "HRESET: Starts\n");
+		if (slot->ops.prepare_link_change) {
+			PCI_SLOT_DBG(slot, "HRESET: Prepare for link down\n");
+			slot->ops.prepare_link_change(slot, false);
+		}
+		/* fall through */
+	case PCI_SLOT_STATE_HRESET_START:
+		PCI_SLOT_DBG(slot, "HRESET: Assert\n");
+		pcie_slot_reset(slot, true);
+		pci_slot_set_state(slot, PCI_SLOT_STATE_HRESET_HOLD);
+		return pci_slot_set_sm_timeout(slot, msecs_to_tb(250));
+	case PCI_SLOT_STATE_HRESET_HOLD:
+		PCI_SLOT_DBG(slot, "HRESET: Deassert\n");
+		pcie_slot_reset(slot, false);
+		pci_slot_set_state(slot, PCI_SLOT_STATE_LINK_START_POLL);
+		return pci_slot_set_sm_timeout(slot, msecs_to_tb(1800));
+	default:
+		PCI_SLOT_DBG(slot, "HRESET: Unexpected slot state %08x\n",
+			     slot->state);
+	}
+
+	pci_slot_set_state(slot, PCI_SLOT_STATE_NORMAL);
+	return OPAL_HARDWARE;
+}
+
+/*
+ * Usually, individual platforms need override the power
+ * management methods for fundamental reset because the
+ * hot reset is commonly shared.
+ */
+static int64_t pcie_slot_sm_freset(struct pci_slot *slot)
+{
+	uint8_t power_state = PCI_SLOT_POWER_ON;
+
+	switch (slot->state) {
+	case PCI_SLOT_STATE_NORMAL:
+		PCI_SLOT_DBG(slot, "FRESET: Starts\n");
+		if (slot->ops.prepare_link_change)
+			slot->ops.prepare_link_change(slot, false);
+
+		/* Retrieve power state */
+		if (slot->ops.get_power_status) {
+			PCI_SLOT_DBG(slot, "FRESET: Retrieve power state\n");
+			slot->ops.get_power_status(slot, &power_state);
+		}
+
+		/* In power on state, power it off */
+		if (power_state == PCI_SLOT_POWER_ON &&
+		    slot->ops.set_power_status) {
+			PCI_SLOT_DBG(slot, "FRESET: Power is on, turn off\n");
+			slot->ops.set_power_status(slot,
+						   PCI_SLOT_POWER_OFF);
+			pci_slot_set_state(slot,
+					   PCI_SLOT_STATE_FRESET_POWER_OFF);
+			return pci_slot_set_sm_timeout(slot, msecs_to_tb(50));
+		}
+		/* No power state change, fall through */
+	case PCI_SLOT_STATE_FRESET_POWER_OFF:
+		PCI_SLOT_DBG(slot, "FRESET: Power is off, turn on\n");
+		if (slot->ops.set_power_status)
+			slot->ops.set_power_status(slot,
+						   PCI_SLOT_POWER_ON);
+		pci_slot_set_state(slot, PCI_SLOT_STATE_HRESET_START);
+		return pci_slot_set_sm_timeout(slot, msecs_to_tb(50));
+	default:
+		PCI_SLOT_DBG(slot, "FRESET: Unexpected slot state %08x\n",
+			     slot->state);
+	}
+
+	pci_slot_set_state(slot, PCI_SLOT_STATE_NORMAL);
+	return OPAL_HARDWARE;
+}
+
+static int64_t pci_slot_sm_poll(struct pci_slot *slot, uint8_t *val)
+{
+	int64_t (*func)(struct pci_slot *) = NULL;
+	int64_t (*gfunc)(struct pci_slot *, uint8_t *) = NULL;
+	int64_t (*sfunc)(struct pci_slot *, uint8_t) = NULL;
+	uint64_t now = mftb();
+
+	/* Retrieve the return value */
+	if (slot->state == PCI_SLOT_STATE_NORMAL) {
+		if (val)
+			*val = slot->get_val;
+		return OPAL_SUCCESS;
+	}
+
+	/* Timer isn't expired yet ? */
+	if (slot->delay_tgt_tb &&
+	    tb_compare(now, slot->delay_tgt_tb) == TB_ABEFOREB)
+		return slot->delay_tgt_tb - now;
+
+	/* Run out of interval */
+	slot->delay_tgt_tb = 0;
+
+	switch (slot->state & PCI_SLOT_STATE_MASK) {
+	case PCI_SLOT_STATE_LINK:
+		func = slot->ops.poll_link;
+		break;
+	case PCI_SLOT_STATE_HRESET:
+		func = slot->ops.hreset;
+		break;
+	case PCI_SLOT_STATE_FRESET:
+		func = slot->ops.freset;
+		break;
+	case PCI_SLOT_STATE_PFRESET:
+		func = slot->ops.pfreset;
+		break;
+	case PCI_SLOT_STATE_CRESET:
+		func = slot->ops.creset;
+		break;
+	case PCI_SLOT_STATE_GPOWER:
+		gfunc = slot->ops.get_power_status;
+		break;
+	case PCI_SLOT_STATE_GPRESENCE:
+		gfunc = slot->ops.get_presence_status;
+		break;
+	case PCI_SLOT_STATE_SPOWER:
+		sfunc = slot->ops.set_power_status;
+		break;
+	}
+
+	if (func)
+		return func(slot);
+	else if (gfunc)
+		return gfunc(slot, &slot->get_val);
+	else if (sfunc)
+		return sfunc(slot, slot->set_val);
+
+	/* We can't handle the case */
+	prlog(PR_NOTICE, "Slot %016llx not support state %08x\n",
+	      slot->id, slot->state);
+	pci_slot_set_state(slot, PCI_SLOT_STATE_NORMAL);
+	return OPAL_HARDWARE;
+}
+
+void pci_slot_add_properties(struct pci_slot *slot,
+			     struct dt_node *np)
+{
+	/* Bail without device node */
+	if (!np)
+		return;
+
+	dt_add_property_cells(np, "ibm,reset-by-firmware", 1);
+	dt_add_property_cells(np, "ibm,slot-pluggable",
+			      slot->pluggable);
+	dt_add_property_cells(np, "ibm,slot-power-ctl",
+			      slot->power_ctl);
+	dt_add_property_cells(np, "ibm,slot-power-led-ctlled",
+			      slot->power_led_ctl);
+	dt_add_property_cells(np, "ibm,slot-attn-led",
+			      slot->attn_led_ctl);
+	dt_add_property_cells(np, "ibm,slot-connector-type",
+			      slot->connector_type);
+	dt_add_property_cells(np, "ibm,slot-card-desc",
+			      slot->card_desc);
+	dt_add_property_cells(np, "ibm,slot-card-mech",
+			      slot->card_mech);
+	dt_add_property_cells(np, "ibm,slot-wired-lanes",
+			      slot->wired_lanes);
+
+	if (slot->ops.add_properties)
+		slot->ops.add_properties(slot, np);
+}
+
+struct pci_slot *pci_slot_alloc(struct phb *phb,
+				struct pci_device *pd)
+{
+	struct pci_slot *slot = NULL;
+
+	/* PHB should be always valid */
+	if (!phb)
+		return NULL;
+
+	/*
+	 * In case we already had one. If we allocate PHB
+	 * slot, the passed 'pd' should be NULL. Otherwise,
+	 * both 'phb' and 'pd' are all valid
+	 */
+	if (!pd)
+		slot = phb->slot;
+	else
+		slot = pd->slot;
+	if (slot) {
+		PCI_SLOT_DBG(slot, "Already existing\n");
+		return slot;
+	}
+
+	/* Allocate memory chunk */
+	slot = zalloc(sizeof(struct pci_slot));
+	if (!slot) {
+		prerror("Cannot allocate PCI slot\n");
+		return NULL;
+	}
+
+	/*
+	 * Build the slot index, which might be combination of
+	 * PHB index and device's indicator
+	 */
+	if (pd) {
+		slot->id = pd->bdfn;
+		slot->id = ((0x1ul << 63) | (slot->id << 16));
+	}
+	slot->id |= phb->opal_id;
+
+	/*
+	 * Initialize the slot. The poll function is aleays
+	 * unified for all cases.
+	 */
+	slot->phb	= phb;
+	slot->pd	= pd;
+	pci_slot_set_state(slot, PCI_SLOT_STATE_NORMAL);
+
+	/*
+	 * PCI slot state poller, which isn't expected to
+	 * be overrided by individual platforms.
+	 */
+	slot->ops.poll = pci_slot_sm_poll;
+
+	/* The slot is belonged to PCI device or PHB */
+	if (pd)
+		pd->slot = slot;
+	else
+		phb->slot = slot;
+
+	return slot;
+}
+
+/*
+ * Find PCI slot. The ID might be combination of PHB and slot indexes, or
+ * single PHB index. We're using highest nibble as the indicator for that.
+ */
+struct pci_slot *pci_slot_find(uint64_t id)
+{
+	struct phb *phb;
+	struct pci_device *pd;
+	struct pci_slot *slot = NULL;
+	uint64_t phb_index;
+	uint16_t bdfn;
+
+	if (id & 0xf000000000000000) {
+		phb_index = (id & 0xfffful);
+		bdfn = ((id >> 16) & 0xfffful);
+	} else {
+		phb_index = id;
+	}
+
+	/* Search for PHB */
+	phb = pci_get_phb(phb_index);
+	if (phb)
+		slot = phb->slot;
+
+	/* Search for slot in case it's compound case */
+	if (id & 0xf000000000000000) {
+		pd = pci_find_dev(phb, bdfn);
+		if (pd)
+			slot = pd->slot;
+	}
+
+	return slot;
+}
+
+struct pci_slot *pcie_slot_create(struct phb *phb, struct pci_device *pd)
+{
+	struct pci_slot *slot;
+	uint32_t ecap;
+
+	/* Allocate PCI slot */
+	slot = pci_slot_alloc(phb, pd);
+	if (!slot)
+		return NULL;
+
+	/* Cache the link and slot capabilities */
+	if (pd) {
+		ecap = pci_cap(pd, PCI_CFG_CAP_ID_EXP, false);
+		pci_cfg_read32(phb, pd->bdfn,
+			       ecap + PCICAP_EXP_LCAP, &slot->link_cap);
+		pci_cfg_read32(phb, pd->bdfn,
+			       ecap + PCICAP_EXP_SLOTCAP, &slot->slot_cap);
+	}
+
+	/* Slot info */
+	if ((slot->slot_cap & PCICAP_EXP_SLOTCAP_HPLUG_SURP) &&
+	    (slot->slot_cap & PCICAP_EXP_SLOTCAP_HPLUG_CAP))
+		slot->pluggable = 1;
+	if (slot->slot_cap & PCICAP_EXP_SLOTCAP_PWCTRL)
+		slot->power_ctl = 1;
+	if (slot->slot_cap & PCICAP_EXP_SLOTCAP_PWRI)
+		slot->power_led_ctl = PCI_SLOT_PWR_LED_CTL_KERNEL;
+	if (slot->slot_cap & PCICAP_EXP_SLOTCAP_ATTNI)
+		slot->attn_led_ctl = PCI_SLOT_ATTN_LED_CTL_KERNEL;
+	slot->wired_lanes = GETFIELD(PCICAP_EXP_LCAP_MAXWDTH, slot->link_cap);
+
+	/* Standard slot operations */
+	slot->ops.get_presence_status   = pcie_slot_get_presence_status;
+	slot->ops.get_link_status       = pcie_slot_get_link_status;
+	slot->ops.get_power_status      = pcie_slot_get_power_status;
+	slot->ops.get_attention_status  = pcie_slot_get_attention_status;
+	slot->ops.get_latch_status      = pcie_slot_get_latch_status;
+	slot->ops.set_power_status      = pcie_slot_set_power_status;
+	slot->ops.set_attention_status  = pcie_slot_set_attention_status;
+
+	/*
+	 * SM based reset stuff. The poll function is always
+	 * unified for all cases.
+	 */
+	slot->ops.prepare_link_change   = pcie_slot_prepare_link_change;
+	slot->ops.poll_link             = pcie_slot_sm_poll_link;
+	slot->ops.hreset                = pcie_slot_sm_hreset;
+	slot->ops.freset                = pcie_slot_sm_freset;
+	slot->ops.pfreset               = NULL;
+
+	return slot;
+}
+
+/* The function can be used to calculate the length of device node
+ * blob, or produce device node blob. The buffer uses space characters
+ * to separate device node's full name, property names and their values.
+ * Each device node starts with prefix "[NODE]" following by one space.
+ *
+ * [NODE] <full name> [[PROP] <property name> <length> <value>] ...
+ */
+static size_t pci_slot_process_one_node(struct dt_node *dn,
+					char *buf, bool fill)
+{
+	struct dt_property *property;
+	char tmp[16];
+	char *cur = buf;
+	char *path;
+
+	/* Node prefix: [NODE] */
+	if (!fill) {
+		cur += 7;
+	} else {
+		memcpy(cur, "[NODE]", 6);
+		cur += 6;
+		*cur++ = ' ';
+	}
+
+	/* Node's full name */
+	path = dt_get_path(dn);
+	if (!fill) {
+		cur += (strlen(path) + 1);
+	} else {
+		memcpy(cur, path, strlen(path));
+		cur += strlen(path);
+		*cur++ = ' ';
+	}
+
+	/* Node's properties */
+	list_for_each(&dn->properties, property, list) {
+		/* Prefix */
+		if (!fill) {
+			cur += 7;
+		} else {
+			memcpy(cur, "[PROP]", 6);
+			cur += 6;
+			*cur++ = ' ';
+		}
+
+		/* property name */
+		if (!fill) {
+			cur += (strlen(property->name) + 1);
+		} else {
+			memcpy(cur, property->name,
+			       strlen(property->name));
+			cur += strlen(property->name);
+			*cur++ = ' ';
+		}
+
+		/* Property length */
+		if (!fill) {
+			cur += snprintf(tmp, sizeof(tmp),
+					"%ld", property->len);
+			cur += 1;
+		} else {
+			cur += snprintf(cur, sizeof(cur),
+					"%ld", property->len);
+			*cur++ = ' ';
+		}
+
+		/* Property value */
+		if (!fill) {
+			cur += (property->len + 1);
+		} else {
+			memcpy(cur, property->prop, property->len);
+			cur += property->len;
+			*cur++ = ' ';
+		}
+	}
+
+	return (cur - buf);
+}
+
+static size_t pci_slot_process_nodes(struct dt_node *dn,
+				     char *buf, bool fill)
+{
+	struct dt_node *child;
+	size_t len = 0;
+
+	list_for_each(&dn->children, child, list) {
+		len += pci_slot_process_one_node(child, buf + len, fill);
+		if (list_empty(&child->children))
+			continue;
+
+		len += pci_slot_process_nodes(child, buf + len, fill);
+	}
+
+	return len;
+}
+
+int pci_slot_hotplug_event(struct pci_slot *slot, bool add,
+			   void (*consume)(void *data))
+{
+	struct phb *phb = slot->phb;
+	struct pci_device *pd = slot->pd;
+	struct dt_node *dn = pd->dt_node;
+	uint64_t operation;
+	uint8_t link = 0;
+	void *buffer = NULL;
+	size_t len = 0;
+	int64_t rc;
+
+	/* For unplugging event, the phandle of the slot associated
+	 * PCI bridge's device node will be transported to OS.
+	 */
+	if (!add) {
+		slot->msg = NULL;
+		pci_bus_remove(phb, &pd->children);
+		operation = 0x0ul;
+		goto send_msg;
+	}
+
+	/* Check the link before rescanning */
+	operation = 0x1ul;
+	if (slot->ops.get_link_status &&
+	    (rc = slot->ops.get_link_status(slot, &link))) {
+		PCI_SLOT_DBG(slot, "Error %lld getting link status\n", rc);
+		link = 0;
+	}
+
+	/* Don't rescan in case the link is down. Otherwise, we
+	 * will run into EEH error without exception.
+	 */
+	if (!link) {
+		PCI_SLOT_DBG(slot, "Link is down and skip rescanning\n");
+		goto send_msg;
+	}
+
+	/* Rescanning the slot for hotplugging event */
+	pci_bus_scan(phb, pd->secondary_bus, pd->subordinate_bus,
+		     &pd->children, pd, true);
+	pci_add_device_nodes(phb, &pd->children, dn, &phb->lstate, 0);
+
+	/* Calculate the length of the buffer and then allocate the
+	 * buffer. Eventually to populate it. Note that we possibly
+	 * don't need buffer message.
+	 */
+	len = pci_slot_process_nodes(dn, NULL, false);
+	if (len > 0) {
+		buffer = zalloc(len);
+		if (!buffer) {
+			prlog(PR_NOTICE, "%s: Cannot allocate buffer (%ld)\n",
+			      __func__, len);
+			return -1;
+		}
+
+		/* Free the buffer for last time lazily and fill
+		 * out the buffer for current event
+		 */
+		if (slot->msg)
+			free(slot->msg);
+		slot->msg = buffer;
+		pci_slot_process_nodes(dn, buffer, true);
+	}
+
+	/* Send the message */
+send_msg:
+	opal_queue_msg(OPAL_MSG_PCI_HOTPLUG, slot, consume,
+		       operation, (u64)(dn->phandle),
+		       (u64)buffer, (u64)len);
+	return 0;
+}
diff --git a/core/pci.c b/core/pci.c
index d5dc3eb..c749d2a 100644
--- a/core/pci.c
+++ b/core/pci.c
@@ -18,9 +18,12 @@
 #include <cpu.h>
 #include <pci.h>
 #include <pci-cfg.h>
+#include <pci-slot.h>
 #include <timebase.h>
 #include <device.h>
 
+//#define PCI_SERIALIZED_JOBS
+
 static struct phb *phbs[64];
 
 #define PCITRACE(_p, _bdfn, fmt, a...) \
@@ -434,11 +437,79 @@ static void pci_cleanup_bridge(struct phb *phb, struct pci_device *pd)
 	pci_cfg_write16(phb, pd->bdfn, PCI_CFG_CMD, cmd);	
 }
 
+/*
+ * Turn off the power suply to the slot if there're nothing connected
+ * to it for 2 purposes: saving power obviously, and initializing the
+ * slot to initial power-off state for hotplug
+ */
+static void pci_try_power_off_slot(struct phb *phb, struct pci_device *pd)
+{
+	uint32_t flags = (PCI_SLOT_FLAG_BOOTUP |
+			  PCI_SLOT_FLAG_NO_HOTPLUG_MSG);
+	int64_t rc;
+
+	/* Check if it's pluggable slot */
+	if (!pd ||
+	    !pd->slot ||
+	    !pd->slot->pluggable ||
+	    !pd->slot->ops.set_power_status ||
+	    !pd->slot->ops.poll)
+		return;
+
+	/* Check if there're something connected */
+	if (!list_empty(&pd->children))
+		return;
+	/*
+	 * Power the slot off. We don't need the notification
+	 * message about the device-tree node changes because
+	 * we assume there're no devices behind the slot and
+	 * their device-tree nodes aren't populated yet. So
+	 * we needn't care the device-tree node changes at all.
+	 */
+	pci_slot_add_flags(pd->slot, flags);
+	rc = pd->slot->ops.set_power_status(pd->slot, 0);
+	while (rc > 0) {
+		time_wait(rc);
+		rc = pd->slot->ops.poll(pd->slot, NULL);
+	}
+
+	pci_slot_remove_flags(pd->slot, flags);
+	if (rc != OPAL_SUCCESS)
+		PCINOTICE(phb, pd->bdfn, "Error %lld powering off slot\n", rc);
+	else
+		PCIDBG(phb, pd->bdfn, "Power off hotpluggable slot\n");
+}
+
+/* Remove all subordinate PCI devices leading from the indicated
+ * PCI bus. It's used to remove all PCI devices behind one PCI
+ * slot at unplugging time
+ */
+void pci_bus_remove(struct phb *phb, struct list_head *list)
+{
+	struct pci_device *pd, *tmp;
+
+	if (list_empty(list))
+		return;
+
+	list_for_each_safe(list, pd, tmp, link) {
+		pci_bus_remove(phb, &pd->children);
+
+		/* Release device node and PCI slot */
+		if (pd->dt_node)
+			dt_free(pd->dt_node);
+		if (pd->slot)
+			free(pd->slot);
 
-/* pci_scan - Perform a recursive scan of the bus at bus_number
- *            populating the list passed as an argument. This also
- *            performs the bus numbering, so it returns the largest
- *            bus number that was assigned.
+		/* Remove from parent list and release itself */
+		list_del(&pd->link);
+		free(pd);
+	}
+}
+
+/* Perform a recursive scan of the bus at bus_number populating
+ * the list passed as an argument. This also performs the bus
+ * numbering, so it returns the largest bus number that was
+ * assigned.
  *
  * Note: Eventually this might want to access some VPD information
  *       in order to know what slots to scan and what not etc..
@@ -448,9 +519,9 @@ static void pci_cleanup_bridge(struct phb *phb, struct pci_device *pd)
  * XXX NOTE: We might also want to setup the PCIe MPS/MRSS properly
  *           here as Linux may or may not do it
  */
-static uint8_t pci_scan(struct phb *phb, uint8_t bus, uint8_t max_bus,
-			struct list_head *list, struct pci_device *parent,
-			bool scan_downstream)
+uint8_t pci_bus_scan(struct phb *phb, uint8_t bus, uint8_t max_bus,
+		     struct list_head *list, struct pci_device *parent,
+		     bool scan_downstream)
 {
 	struct pci_device *pd = NULL;
 	uint8_t dev, fn, next_bus, max_sub, save_max;
@@ -496,10 +567,15 @@ static uint8_t pci_scan(struct phb *phb, uint8_t bus, uint8_t max_bus,
 	 * We only scan downstream if instructed to do so by the
 	 * caller. Typically we avoid the scan when we know the
 	 * link is down already, which happens for the top level
-	 * root complex, and avoids a long secondary timeout
+	 * root complex, and avoids a long secondary timeout. The
+	 * power will be turned off if it's a empty hotpluggable
+	 * slot.
 	 */
-	if (!scan_downstream)
+	if (!scan_downstream) {
+		list_for_each(list, pd, link)
+			pci_try_power_off_slot(phb, pd);
 		return bus;
+	}
 
 	next_bus = bus + 1;
 	max_sub = bus;
@@ -567,8 +643,8 @@ static uint8_t pci_scan(struct phb *phb, uint8_t bus, uint8_t max_bus,
 
 		/* Perform recursive scan */
 		if (do_scan) {
-			max_sub = pci_scan(phb, next_bus, max_bus,
-					   &pd->children, pd, true);
+			max_sub = pci_bus_scan(phb, next_bus, max_bus,
+					       &pd->children, pd, true);
 		} else if (!use_max) {
 			/* XXX Empty bridge... we leave room for hotplug
 			 * slots etc.. but we should be smarter at figuring
@@ -585,6 +661,9 @@ static uint8_t pci_scan(struct phb *phb, uint8_t bus, uint8_t max_bus,
 		pd->subordinate_bus = max_sub;
 		pci_cfg_write8(phb, pd->bdfn, PCI_CFG_SUBORDINATE_BUS, max_sub);
 		next_bus = max_sub + 1;
+
+		/* Turn off its power if it's empty hotpluggable slot */
+		pci_try_power_off_slot(phb, pd);
 	}
 
 	return max_sub;
@@ -759,7 +838,7 @@ static void pci_scan_phb(void *data)
 	/* Scan root port and downstream ports if applicable */
 	PCIDBG(phb, 0, "Scanning (upstream%s)...\n",
 	       has_link ? "+downsteam" : " only");
-	pci_scan(phb, 0, 0xff, &phb->devices, NULL, has_link);
+	pci_bus_scan(phb, 0, 0xff, &phb->devices, NULL, has_link);
 
 	/* Configre MPS (Max Payload Size) for PCIe domain */
 	pci_walk_dev(phb, NULL, pci_get_mps, &mps);
@@ -1219,12 +1298,12 @@ static void pci_print_summary_line(struct phb *phb, struct pci_device *pd,
 			  rev_class & 0xff, rev_class >> 8, cname, slotstr);
 }
 
-
-static void pci_add_one_node(struct phb *phb, struct pci_device *pd,
-			     struct dt_node *parent_node,
-			     struct pci_lsi_state *lstate, uint8_t swizzle)
+static void pci_add_one_device_node(struct phb *phb,
+				    struct pci_device *pd,
+				    struct dt_node *parent_node,
+				    struct pci_lsi_state *lstate,
+				    uint8_t swizzle)
 {
-	struct pci_device *child;
 	struct dt_node *np;
 	const char *cname;
 #define MAX_NAME 256
@@ -1336,23 +1415,29 @@ static void pci_add_one_node(struct phb *phb, struct pci_device *pd,
 	 * XXX The kernel should probably fix that up
 	 */
 	dt_add_property(np, "ranges", NULL, 0);
-
-	list_for_each(&pd->children, child, link)
-		pci_add_one_node(phb, child, np, lstate, swizzle);
 }
 
-static void pci_add_nodes(struct phb *phb)
+void pci_add_device_nodes(struct phb *phb,
+			  struct list_head *list,
+			  struct dt_node *parent_node,
+			  struct pci_lsi_state *lstate,
+			  uint8_t swizzle)
 {
-	struct pci_lsi_state *lstate = &phb->lstate;
 	struct pci_device *pd;
 
 	/* If the PHB has its own slot info, add them */
 	if (phb->slot_info)
 		pci_add_slot_properties(phb, phb->slot_info, NULL);
 
-	/* Add all child devices */
-	list_for_each(&phb->devices, pd, link)
-		pci_add_one_node(phb, pd, phb->dt_node, lstate, 0);
+	list_for_each(list, pd, link) {
+		pci_add_one_device_node(phb, pd, parent_node,
+					lstate, swizzle);
+		if (list_empty(&pd->children))
+			continue;
+
+		pci_add_device_nodes(phb, &pd->children,
+				     pd->dt_node, lstate, swizzle);
+	}
 }
 
 static void __pci_reset(struct list_head *list)
@@ -1383,6 +1468,7 @@ void pci_reset(void)
 	}
 }
 
+#ifndef PCI_SERIALIZED_JOBS
 static void pci_do_jobs(void (*fn)(void *))
 {
 	struct cpu_thread *cpu = first_available_cpu();
@@ -1433,16 +1519,32 @@ static void pci_do_jobs(void (*fn)(void *))
 		cpu_wait_job(jobs[i], true);
 	}
 }
+#endif /* !PCI_SERIALIZED_JOBS */
 
 void pci_init_slots(void)
 {
 	unsigned int i;
 
+#ifndef PCI_SERIALIZED_JOBS
 	prlog(PR_NOTICE, "PCI: Resetting PHBs...\n");
-	pci_do_jobs(pci_reset_phb);
-
+        pci_do_jobs(pci_reset_phb);
 	prlog(PR_NOTICE, "PCI: Probing slots...\n");
 	pci_do_jobs(pci_scan_phb);
+#else
+	prlog(PR_NOTICE, "PCI: Resetting PHBs...\n");
+	for (i = 0; i < ARRAY_SIZE(phbs); i++) {
+		if (!phbs[i])
+			continue;
+		pci_reset_phb(phbs[i]);
+	}
+
+	prlog(PR_NOTICE, "PCI: Probing slots...\n");
+	for (i = 0; i < ARRAY_SIZE(phbs); i++) {
+		if (!phbs[i])
+			continue;
+		pci_scan_phb(phbs[i]);
+	}
+#endif /* !PCI_SERIALIZED_JOBS */
 
 	if (platform.pci_probe_complete)
 		platform.pci_probe_complete();
@@ -1452,7 +1554,8 @@ void pci_init_slots(void)
 	for (i = 0; i < ARRAY_SIZE(phbs); i++) {
 		if (!phbs[i])
 			continue;
-		pci_add_nodes(phbs[i]);
+		pci_add_device_nodes(phbs[i], &phbs[i]->devices,
+				     phbs[i]->dt_node, &phbs[i]->lstate, 0);
 	}
 }
 
diff --git a/include/opal-api.h b/include/opal-api.h
index 1698311..74897d0 100644
--- a/include/opal-api.h
+++ b/include/opal-api.h
@@ -417,6 +417,7 @@ enum opal_msg_type {
 	OPAL_MSG_HMI_EVT,
 	OPAL_MSG_DPO,
 	OPAL_MSG_PRD,
+	OPAL_MSG_PCI_HOTPLUG,
 	OPAL_MSG_TYPE_MAX,
 };
 
diff --git a/include/pci-slot.h b/include/pci-slot.h
new file mode 100644
index 0000000..899da27
--- /dev/null
+++ b/include/pci-slot.h
@@ -0,0 +1,251 @@
+/* Copyright 2013-2015 IBM Corp.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+ * implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __PCI_SLOT_H
+#define __PCI_SLOT_H
+
+#include <opal.h>
+#include <device.h>
+#include <timebase.h>
+#include <ccan/list/list.h>
+
+/*
+ * PCI Slot Info: Wired Lane Values
+ *
+ * Values 0 to 6 match slot map 1005. In case of *any* change here
+ * make sure to keep the lxvpd.c parsing code in sync *and* the
+ * corresponding label strings in pci.c
+ */
+#define PCI_SLOT_WIRED_LANES_UNKNOWN	0x00
+#define PCI_SLOT_WIRED_LANES_PCIE_X1	0x01
+#define PCI_SLOT_WIRED_LANES_PCIE_X2	0x02
+#define PCI_SLOT_WIRED_LANES_PCIE_X4	0x03
+#define PCI_SLOT_WIRED_LANES_PCIE_X8	0x04
+#define PCI_SLOT_WIRED_LANES_PCIE_X16	0x05
+#define PCI_SLOT_WIRED_LANES_PCIE_X32	0x06
+#define PCI_SLOT_WIRED_LANES_PCIX_32	0x07
+#define PCI_SLOT_WIRED_LANES_PCIX_64	0x08
+
+/* PCI Slot Info: Bus Clock Values */
+#define PCI_SLOT_BUS_CLK_RESERVED	0x00
+#define PCI_SLOT_BUS_CLK_GEN_1		0x01
+#define PCI_SLOT_BUS_CLK_GEN_2		0x02
+#define PCI_SLOT_BUS_CLK_GEN_3		0x03
+
+/* PCI Slot Info: Connector Type Values */
+#define PCI_SLOT_CONNECTOR_PCIE_EMBED	0x00
+#define PCI_SLOT_CONNECTOR_PCIE_X1	0x01
+#define PCI_SLOT_CONNECTOR_PCIE_X2	0x02
+#define PCI_SLOT_CONNECTOR_PCIE_X4	0x03
+#define PCI_SLOT_CONNECTOR_PCIE_X8	0x04
+#define PCI_SLOT_CONNECTOR_PCIE_X16	0x05
+#define PCI_SLOT_CONNECTOR_PCIE_NS	0x0E	/* Non-Standard */
+
+/* PCI Slot Info: Card Description Values */
+#define PCI_SLOT_DESC_NON_STANDARD	0x00	/* Embed/Non-Standard       */
+#define PCI_SLOT_DESC_PCIE_FH_FL	0x00	/* Full Height, Full Length */
+#define PCI_SLOT_DESC_PCIE_FH_HL	0x01	/* Full Height, Half Length */
+#define PCI_SLOT_DESC_PCIE_HH_FL	0x02	/* Half Height, Full Length */
+#define PCI_SLOT_DESC_PCIE_HH_HL	0x03	/* Half Height, Half Length */
+
+/* PCI Slot Info: Mechanicals Values */
+#define PCI_SLOT_MECH_NONE		0x00
+#define PCI_SLOT_MECH_RIGHT		0x01
+#define PCI_SLOT_MECH_LEFT		0x02
+#define PCI_SLOT_MECH_RIGHT_LEFT	0x03
+
+/* PCI Slot Info: Power LED Control Values */
+#define PCI_SLOT_PWR_LED_CTL_NONE	0x00	/* No Control        */
+#define PCI_SLOT_PWR_LED_CTL_FSP	0x01	/* FSP Controlled    */
+#define PCI_SLOT_PWR_LED_CTL_KERNEL	0x02	/* Kernel Controlled */
+
+/* PCI Slot Info: ATTN LED Control Values */
+#define PCI_SLOT_ATTN_LED_CTL_NONE	0x00	/* No Control        */
+#define PCI_SLOT_ATTN_LED_CTL_FSP	0x01	/* FSP Controlled    */
+#define PCI_SLOT_ATTN_LED_CTL_KERNEL	0x02	/* Kernel Controlled */
+
+/* Attention LED */
+#define PCI_SLOT_ATTN_LED_OFF		0
+#define PCI_SLOT_ATTN_LED_ON		1
+#define PCI_SLOT_ATTN_LED_BLINK		2
+
+/* Power state */
+#define PCI_SLOT_POWER_OFF		0
+#define PCI_SLOT_POWER_ON		1
+
+/*
+ * We have hard and soft reset for slot. Hard reset requires
+ * power-off and then power-on, but soft reset only resets
+ * secondary bus.
+ */
+struct pci_slot;
+struct pci_slot_ops {
+	/* For slot management */
+	int64_t (*get_presence_status)(struct pci_slot *slot, uint8_t *val);
+	int64_t (*get_link_status)(struct pci_slot *slot, uint8_t *val);
+	int64_t (*get_power_status)(struct pci_slot *slot, uint8_t *val);
+	int64_t (*get_attention_status)(struct pci_slot *slot, uint8_t *val);
+	int64_t (*get_latch_status)(struct pci_slot *slot, uint8_t *val);
+	int64_t (*set_power_status)(struct pci_slot *slot, uint8_t val);
+	int64_t (*set_attention_status)(struct pci_slot *slot, uint8_t val);
+
+	/* SM based functions for reset */
+	void (*prepare_link_change)(struct pci_slot *slot, bool is_up);
+	int64_t (*poll_link)(struct pci_slot *slot);
+	int64_t (*creset)(struct pci_slot *slot);
+	int64_t (*freset)(struct pci_slot *slot);
+	int64_t (*pfreset)(struct pci_slot *slot);
+	int64_t (*hreset)(struct pci_slot *slot);
+	int64_t (*poll)(struct pci_slot *slot, uint8_t *val);
+
+	/* Auxillary functions */
+	void (*add_properties)(struct pci_slot *slot, struct dt_node *np);
+};
+
+/*
+ * The PCI slot state is split up into base and number. With this
+ * design, the individual platforms can introduce their own PCI
+ * slot states with addition to the base. Eventually, the base
+ * state can be recognized by PCI slot core.
+ */
+#define PCI_SLOT_STATE_MASK			0xFFFFFF00
+#define PCI_SLOT_STATE_NORMAL			0x00000000
+#define PCI_SLOT_STATE_LINK			0x00000100
+#define   PCI_SLOT_STATE_LINK_START_POLL	0x00000101
+#define   PCI_SLOT_STATE_LINK_DELAY_FINALIZED	0x00000102
+#define   PCI_SLOT_STATE_LINK_POLLING		0x00000103
+#define PCI_SLOT_STATE_HRESET			0x00000200
+#define   PCI_SLOT_STATE_HRESET_START		0x00000201
+#define   PCI_SLOT_STATE_HRESET_HOLD		0x00000202
+#define PCI_SLOT_STATE_FRESET			0x00000300
+#define   PCI_SLOT_STATE_FRESET_POWER_OFF	0x00000301
+#define PCI_SLOT_STATE_PFRESET			0x00000400
+#define   PCI_SLOT_STATE_PFRESET_START		0x00000401
+#define PCI_SLOT_STATE_CRESET			0x00000500
+#define   PCI_SLOT_STATE_CRESET_START		0x00000501
+#define PCI_SLOT_STATE_GPOWER			0x00000600
+#define   PCI_SLOT_STATE_GPOWER_START		0x00000601
+#define PCI_SLOT_STATE_SPOWER			0x00000700
+#define   PCI_SLOT_STATE_SPOWER_START		0x00000701
+#define PCI_SLOT_STATE_GPRESENCE		0x00000800
+#define   PCI_SLOT_STATE_GPRESENCE_START	0x00000801
+
+struct pci_slot {
+	uint32_t		flags;
+#define PCI_SLOT_FLAG_BOOTUP		0x1
+#define PCI_SLOT_FLAG_NO_HOTPLUG_MSG	0x2
+
+	struct phb		*phb;
+	struct pci_device	*pd;
+
+	/* Identifier */
+	uint64_t		id;
+
+	/* Hotplug message. The hotplug events happen
+	 * during system boot time needn't hotplug
+	 * message
+	 */
+	void			*msg;
+
+	/* Slot information */
+	uint8_t			pluggable;
+	uint8_t			power_ctl;
+	uint8_t			power_led_ctl;
+	uint8_t			attn_led_ctl;
+	uint8_t			connector_type;
+	uint8_t			card_desc;
+	uint8_t			card_mech;
+	uint8_t			wired_lanes;
+
+	/* State machine */
+	uint32_t		state;
+	uint32_t		link_cap;
+	uint32_t		slot_cap;
+	uint64_t		delay_tgt_tb;
+	uint64_t		retries;
+	uint8_t			get_val;
+	uint8_t			set_val;
+	struct pci_slot_ops	ops;
+	void			*data;
+};
+
+static inline uint32_t pci_slot_add_flags(struct pci_slot *slot,
+					  uint32_t flags)
+{
+	uint32_t old = 0;
+
+	if (slot) {
+		old = slot->flags;
+		slot->flags |= flags;
+	}
+
+	return old;
+}
+
+static inline bool pci_slot_has_flags(struct pci_slot *slot,
+				      uint32_t flags)
+{
+	if (!slot)
+		return false;
+
+	if ((slot->flags & flags) == flags)
+		return true;
+
+	return false;
+}
+
+static inline uint32_t pci_slot_remove_flags(struct pci_slot *slot,
+					     uint32_t flags)
+{
+	uint32_t old = 0;
+
+	if (slot) {
+		old = slot->flags;
+		slot->flags &= ~flags;
+	}
+
+	return old;
+}
+
+static inline void pci_slot_set_state(struct pci_slot *slot, uint32_t state)
+{
+	if (slot)
+		slot->state = state;
+}
+
+static inline uint64_t pci_slot_set_sm_timeout(struct pci_slot *slot,
+					       uint64_t dur)
+{
+	uint64_t target, now = mftb();
+
+	target = now + dur;
+	if (target == 0)
+		target++;
+	slot->delay_tgt_tb = target;
+
+	return dur;
+}
+
+extern struct pci_slot *pci_slot_alloc(struct phb *phb,
+				       struct pci_device *pd);
+extern struct pci_slot *pcie_slot_create(struct phb *phb,
+					 struct pci_device *pd);
+extern void pci_slot_add_properties(struct pci_slot *slot,
+				    struct dt_node *np);
+extern struct pci_slot *pci_slot_find(uint64_t id);
+extern int pci_slot_hotplug_event(struct pci_slot *slot, bool add,
+				  void (*consume)(void *data));
+#endif /* __PCI_SLOT_H */
diff --git a/include/pci.h b/include/pci.h
index 9726a6e..66e22c8 100644
--- a/include/pci.h
+++ b/include/pci.h
@@ -21,60 +21,6 @@
 #include <device.h>
 #include <ccan/list/list.h>
 
-/* PCI Slot Info: Wired Lane Values
- *
- * Values 0 to 6 match slot map 1005. In case of *any* change here
- * make sure to keep the lxvpd.c parsing code in sync *and* the
- * corresponding label strings in pci.c
- */
-#define PCI_SLOT_WIRED_LANES_UNKNOWN   0x00
-#define PCI_SLOT_WIRED_LANES_PCIE_X1   0x01
-#define PCI_SLOT_WIRED_LANES_PCIE_X2   0x02
-#define PCI_SLOT_WIRED_LANES_PCIE_X4   0x03
-#define PCI_SLOT_WIRED_LANES_PCIE_X8   0x04
-#define PCI_SLOT_WIRED_LANES_PCIE_X16  0x05
-#define PCI_SLOT_WIRED_LANES_PCIE_X32  0x06
-#define PCI_SLOT_WIRED_LANES_PCIX_32   0x07
-#define PCI_SLOT_WIRED_LANES_PCIX_64   0x08
-
-/* PCI Slot Info: Bus Clock Values */
-#define PCI_SLOT_BUS_CLK_RESERVED      0x00
-#define PCI_SLOT_BUS_CLK_GEN_1         0x01
-#define PCI_SLOT_BUS_CLK_GEN_2         0x02
-#define PCI_SLOT_BUS_CLK_GEN_3         0x03
-
-/* PCI Slot Info: Connector Type Values */
-#define PCI_SLOT_CONNECTOR_PCIE_EMBED  0x00
-#define PCI_SLOT_CONNECTOR_PCIE_X1     0x01
-#define PCI_SLOT_CONNECTOR_PCIE_X2     0x02
-#define PCI_SLOT_CONNECTOR_PCIE_X4     0x03
-#define PCI_SLOT_CONNECTOR_PCIE_X8     0x04
-#define PCI_SLOT_CONNECTOR_PCIE_X16    0x05
-#define PCI_SLOT_CONNECTOR_PCIE_NS     0x0E  /* Non-Standard */
-
-/* PCI Slot Info: Card Description Values */
-#define PCI_SLOT_DESC_NON_STANDARD     0x00 /* Embed/Non-Standard Connector */
-#define PCI_SLOT_DESC_PCIE_FH_FL       0x00 /* Full Height, Full Length */
-#define PCI_SLOT_DESC_PCIE_FH_HL       0x01 /* Full Height, Half Length */
-#define PCI_SLOT_DESC_PCIE_HH_FL       0x02 /* Half Height, Full Length */
-#define PCI_SLOT_DESC_PCIE_HH_HL       0x03 /* Half Height, Half Length */
-
-/* PCI Slot Info: Mechanicals Values */
-#define PCI_SLOT_MECH_NONE             0x00
-#define PCI_SLOT_MECH_RIGHT            0x01
-#define PCI_SLOT_MECH_LEFT             0x02
-#define PCI_SLOT_MECH_RIGHT_LEFT       0x03
-
-/* PCI Slot Info: Power LED Control Values */
-#define PCI_SLOT_PWR_LED_CTL_NONE      0x00 /* No Control        */
-#define PCI_SLOT_PWR_LED_CTL_FSP       0x01 /* FSP Controlled    */
-#define PCI_SLOT_PWR_LED_CTL_KERNEL    0x02 /* Kernel Controlled */
-
-/* PCI Slot Info: ATTN LED Control Values */
-#define PCI_SLOT_ATTN_LED_CTL_NONE     0x00 /* No Control        */
-#define PCI_SLOT_ATTN_LED_CTL_FSP      0x01 /* FSP Controlled    */
-#define PCI_SLOT_ATTN_LED_CTL_KERNEL   0x02 /* Kernel Controlled */
-
 /* PCI Slot Entry Information */
 struct pci_slot_info {
 	uint8_t    switch_id;
@@ -127,6 +73,7 @@ struct pci_device {
 	uint32_t		mps;		/* Max payload size capability */
 
 	struct dt_node		*dt_node;
+	struct pci_slot		*slot;
 	struct pci_slot_info    *slot_info;
 	struct pci_device	*parent;
 	struct list_head	children;
@@ -437,6 +384,7 @@ struct phb {
 	uint32_t		mps;
 
 	/* PCI-X only slot info, for PCI-E this is in the RC bridge */
+	struct pci_slot		*slot;
 	struct pci_slot_info    *slot_info;
 
 	/* Base location code used to generate the children one */
@@ -484,6 +432,15 @@ static inline int64_t pci_cfg_write32(struct phb *phb, uint32_t bdfn,
 }
 
 /* Utilities */
+extern void pci_bus_remove(struct phb *phb, struct list_head *list);
+extern uint8_t pci_bus_scan(struct phb *phb, uint8_t bus, uint8_t max_bus,
+			    struct list_head *list, struct pci_device *parent,
+			    bool scan_downstream);
+extern void pci_add_device_nodes(struct phb *phb,
+				 struct list_head *list,
+				 struct dt_node *parent_node,
+				 struct pci_lsi_state *lstate,
+				 uint8_t swizzle);
 extern int64_t pci_find_cap(struct phb *phb, uint16_t bdfn, uint8_t cap);
 extern int64_t pci_find_ecap(struct phb *phb, uint16_t bdfn, uint16_t cap,
 			     uint8_t *version);
diff --git a/include/platform.h b/include/platform.h
index 80aa8ba..19c04f1 100644
--- a/include/platform.h
+++ b/include/platform.h
@@ -20,6 +20,7 @@
 /* Some fwd declarations for types used further down */
 struct phb;
 struct pci_device;
+struct pci_slot;
 struct errorlog;
 
 enum resource_id {
@@ -72,7 +73,7 @@ struct platform {
 
 	/*
 	 * This is called once per PHB before probing. It allows the
-	 * platform to setup some PHB private data that can be used
+	* platform to setup some PHB private data that can be used
 	 * later on by calls such as pci_get_slot_info() below. The
 	 * "index" argument is the PHB index within the IO HUB (or
 	 * P8 chip).
diff --git a/platforms/ibm-fsp/lxvpd.c b/platforms/ibm-fsp/lxvpd.c
index 43a1c5e..7ab4776 100644
--- a/platforms/ibm-fsp/lxvpd.c
+++ b/platforms/ibm-fsp/lxvpd.c
@@ -24,6 +24,7 @@
 #include <vpd.h>
 #include <pci.h>
 #include <pci-cfg.h>
+#include <pci-slot.h>
 
 #include "lxvpd.h"
 
-- 
2.1.0



More information about the Skiboot mailing list