[Skiboot] [PATCH v2] phb4: Enable PCI peer-to-peer
Benjamin Herrenschmidt
benh at kernel.crashing.org
Thu Jul 13 08:38:24 AEST 2017
On Mon, 2017-06-26 at 20:00 +0200, Frederic Barrat wrote:
> P9 supports PCI peer-to-peer: a PCI device can write directly to the
> mmio space of another PCI device. It completely by-passes the CPU.
>
> It requires some configuration on the PHBs involved:
>
> 1. on the initiating side, the address for the read/write operation is
> in the mmio space of the target, i.e. well outside the range normally
> allowed. So we disable range-checking on the TVT entry in bypass mode.
>
> 2. on the target side, we need to explicitly enable p2p by setting a
> bit in a configuration register. It has the side-effect of reserving
> an outbound (as seen from the CPU) store queue for p2p. Therefore we
> only enable p2p on the PHBs using it, as we don't want to waste the
> resource if we don't have to.
>
> P9 supports p2p mmio writes. Reads are currently only supported if the
> two devices are under the same PHB, but that is expected to change in
> the future.
>
> The patch adds a new OPAL call to allow the OS to declare a p2p
> (initiator, target) pair.
>
> Signed-off-by: Frederic Barrat <fbarrat at linux.vnet.ibm.com>
> ---
> Changelog:
> - change of API
> - allow disabling of p2p config
>
> core/pci-opal.c | 52 ++++++++++++++++++++
> doc/opal-api/opal-pci-set-p2p-148.rst | 53 ++++++++++++++++++++
> hw/phb4.c | 92 +++++++++++++++++++++++++++++++++++
> include/opal-api.h | 13 ++++-
> include/pci.h | 4 ++
> include/phb4-regs.h | 1 +
> include/phb4.h | 1 +
> 7 files changed, 215 insertions(+), 1 deletion(-)
> create mode 100644 doc/opal-api/opal-pci-set-p2p-148.rst
>
> diff --git a/core/pci-opal.c b/core/pci-opal.c
> index 5d58a88f..54132a7d 100644
> --- a/core/pci-opal.c
> +++ b/core/pci-opal.c
> @@ -74,6 +74,7 @@ opal_call(OPAL_PCI_CONFIG_WRITE_BYTE, opal_pci_config_write_byte, 4);
> opal_call(OPAL_PCI_CONFIG_WRITE_HALF_WORD, opal_pci_config_write_half_word, 4);
> opal_call(OPAL_PCI_CONFIG_WRITE_WORD, opal_pci_config_write_word, 4);
>
> +static struct lock opal_p2p_oper_lock = LOCK_UNLOCKED;
> static struct lock opal_eeh_evt_lock = LOCK_UNLOCKED;
> static uint64_t opal_eeh_evt = 0;
>
> @@ -977,3 +978,54 @@ static int64_t opal_pci_set_phb_capi_mode(uint64_t phb_id, uint64_t mode, uint64
> return rc;
> }
> opal_call(OPAL_PCI_SET_PHB_CAPI_MODE, opal_pci_set_phb_capi_mode, 3);
> +
> +static int64_t opal_pci_set_p2p(uint64_t phbid_init, uint64_t phbid_target,
> + uint64_t desc, uint16_t pe_number)
> +{
> + struct phb *phb_init = pci_get_phb(phbid_init);
> + struct phb *phb_target = pci_get_phb(phbid_target);
> + int64_t rc;
> +
> + if (!phb_init || !phb_target)
> + return OPAL_PARAMETER;
> + if (!phb_init->ops->set_p2p || !phb_target->ops->set_p2p)
> + return OPAL_UNSUPPORTED;
> + /*
> + * On p9, loads are only supported if initiator and target
> + * devices are on the same PHB.
> + */
> + if ((desc & OPAL_PCI_P2P_LOAD) && (phb_init != phb_target))
> + return OPAL_UNSUPPORTED;
If they are under the same PHB you don't need any of the other stuff
either... ie, the P2P accesses won't ever be visible to the PHB itself.
You *might* need however to enable P2P in the switches.
In fact for the "same PHB" case you don't need to check if they have a
set_p2p callback.
> + /*
> + * We need to modify the state of 2 PHBs. Only allow one
> + * p2p operation at a time to simplify error handling.
> + * Start with the target PHB, as it's easier to roll-back in case
> + * of errors.
> + */
> + lock(&opal_p2p_oper_lock);
> +
> + phb_lock(phb_target);
> + rc = phb_target->ops->set_p2p(phb_target, OPAL_PCI_P2P_TARGET, desc,
> + pe_number);
> + phb_unlock(phb_target);
> + if (rc)
> + goto out;
> +
> + phb_lock(phb_init);
> + rc = phb_init->ops->set_p2p(phb_init, OPAL_PCI_P2P_INITIATOR, desc,
> + pe_number);
> + phb_unlock(phb_init);
> + if (rc) {
> + /* revert enablement bit */
> + desc = desc ^ OPAL_PCI_P2P_ENABLE;
> + phb_lock(phb_target);
> + phb_init->ops->set_p2p(phb_target, OPAL_PCI_P2P_TARGET,
> + desc, pe_number);
> + phb_unlock(phb_target);
> + }
> +out:
> + unlock(&opal_p2p_oper_lock);
> + return rc;
> +}
> +opal_call(OPAL_PCI_SET_P2P, opal_pci_set_p2p, 4);
> diff --git a/doc/opal-api/opal-pci-set-p2p-148.rst b/doc/opal-api/opal-pci-set-p2p-148.rst
> new file mode 100644
> index 00000000..ea2125d6
> --- /dev/null
> +++ b/doc/opal-api/opal-pci-set-p2p-148.rst
> @@ -0,0 +1,53 @@
> +OPAL_PCI_SET_P2P
> +================
> +::
> +
> + #define OPAL_PCI_SET_P2P 149
> +
> + int64_t opal_pci_set_p2p(uint64_t phbid_init, uint64_t phbid_target,
> + uint64_t desc, uint16_t pe_number)
> +
> + /* PCI p2p descriptor */
> + #define OPAL_PCI_P2P_ENABLE 0x1
> + #define OPAL_PCI_P2P_LOAD 0x2
> + #define OPAL_PCI_P2P_STORE 0x4
> +
> +The host calls this function to enable PCI peer-to-peer on the PHBs.
> +
> +Parameters
> +----------
> +::
> +
> + uint64_t phbid_init
> + uint64_t phbid_target
> + uint64_t desc
> + uint16_t pe_number
> +
> +
> +``phbid_init``
> + is the value from the PHB node ibm,opal-phbid property for the device initiating the p2p operation
> +
> +``phbid_target``
> + is the value from the PHB node ibm,opal-phbid property for the device targeted by the p2p operation
> +
> +``desc``
> + tells whether the p2p operation is a store (OPAL_PCI_P2P_STORE) or load (OPAL_PCI_P2P_LOAD). Can be both.
> + OPAL_PCI_P2P_ENABLE enables/disables the setting
> +
> +``pe_number``
> + PE number for the initiating device
> +
> +Return Values
> +-------------
> +
> +``OPAL_SUCCESS``
> + Configuration was successful
> +
> +``OPAL_PARAMETER``
> + Invalid PHB or mode parameter
> +
> +``OPAL_UNSUPPORTED``
> + Not supported by hardware
> +
> +``OPAL_WRONG_STATE``
> + PHB can't be configured to expected state
> diff --git a/hw/phb4.c b/hw/phb4.c
> index 4d93b813..814be837 100644
> --- a/hw/phb4.c
> +++ b/hw/phb4.c
> @@ -2322,6 +2322,7 @@ static int64_t phb4_creset(struct pci_slot *slot)
> PHBDBG(p, "CRESET: Reinitialization\n");
> p->flags &= ~PHB4_AIB_FENCED;
> p->flags &= ~PHB4_CAPP_RECOVERY;
> + p->p2p_target_count = 0;
> phb4_init_hw(p, false);
> pci_slot_set_state(slot, PHB4_SLOT_CRESET_FRESET);
> return pci_slot_set_sm_timeout(slot, msecs_to_tb(100));
> @@ -3251,6 +3252,96 @@ static int64_t phb4_set_capi_mode(struct phb *phb, uint64_t mode,
> return OPAL_UNSUPPORTED;
> }
>
> +static int64_t phb4_p2p_enable_initiator(struct phb4 *p, uint16_t pe_number)
> +{
> + uint64_t tve;
> + uint16_t window_id = (pe_number << 1) + 1;
> +
> + /*
> + * initiator needs access to the MMIO space of the target,
> + * which is well beyond the 'normal' memory area. Set the TVE
> + * with no range checking.
> + */
> + PHBDBG(p, "Setting TVE#1 for peer-to-peer for pe %d\n", pe_number);
> + tve = PPC_BIT(51);
> + phb4_ioda_sel(p, IODA3_TBL_TVT, window_id, false);
> + out_be64(p->regs + PHB_IODA_DATA0, tve);
> + p->tve_cache[window_id] = tve;
> + return OPAL_SUCCESS;
> +}
Why refcounting on target and not on initiator ?
> +static int64_t phb4_p2p_enable_target(struct phb4 *p)
> +{
> + uint64_t val;
> +
> + /*
> + * Enabling p2p on a target PHB reserves an outbound (as seen
> + * from the CPU) store queue for p2p. Keep a reference count
> + * as the phb could be used by multiple targets.
> + */
> + if (p->p2p_target_count++ == 0) {
> + PHBDBG(p, "Enabling peer-to-peer\n");
> + xscom_read(p->chip_id,
> + p->pe_stk_xscom + XPEC_NEST_STK_PBCQ_MODE, &val);
> + val |= XPEC_NEST_STK_PBCQ_MODE_P2P;
> + xscom_write(p->chip_id,
> + p->pe_stk_xscom + XPEC_NEST_STK_PBCQ_MODE, val);
> + }
> + return OPAL_SUCCESS;
> +}
> +
> +static int64_t phb4_p2p_disable_target(struct phb4 *p)
> +{
> + uint64_t val;
> +
> + if (p->p2p_target_count == 0)
> + return OPAL_WRONG_STATE;
> +
> + if (--p->p2p_target_count == 0) {
> + PHBDBG(p, "Disabling peer-to-peer\n");
> + xscom_read(p->chip_id,
> + p->pe_stk_xscom + XPEC_NEST_STK_PBCQ_MODE, &val);
> + val &= ~XPEC_NEST_STK_PBCQ_MODE_P2P;
> + xscom_write(p->chip_id,
> + p->pe_stk_xscom + XPEC_NEST_STK_PBCQ_MODE, val);
> + }
> + return OPAL_SUCCESS;
> +}
> +
> +static int64_t phb4_set_p2p(struct phb *phb, uint64_t mode, uint64_t flags,
> + uint16_t pe_number)
> +{
> + struct phb4 *p = phb_to_phb4(phb);
> + int64_t rc = OPAL_SUCCESS;
> +
> + switch (mode) {
> + case OPAL_PCI_P2P_INITIATOR:
> + if (flags & OPAL_PCI_P2P_ENABLE)
> + rc = phb4_p2p_enable_initiator(p, pe_number);
> + /*
> + * Don't touch the initiator PHB when disabling p2p.
> + * We should reset the TVE to its default bypass
> + * setting. However, it is more easily done from the
> + * OS, as it knows the the start and end address. And
> + * if the initiator is used more than once, with a
> + * different target, we cannot reset the TVE setting
> + * just yet. We need a p2p ref count on the PE, which
> + * is more easily handled by the OS, as skiboot
> + * doesn't track PEs.
> + */
> + break;
> + case OPAL_PCI_P2P_TARGET:
> + if (flags & OPAL_PCI_P2P_ENABLE)
> + rc = phb4_p2p_enable_target(p);
> + else
> + rc = phb4_p2p_disable_target(p);
> + break;
> + default:
> + rc = OPAL_PARAMETER;
> + }
> + return rc;
> +}
> +
> static const struct phb_ops phb4_ops = {
> .cfg_read8 = phb4_pcicfg_read8,
> .cfg_read16 = phb4_pcicfg_read16,
> @@ -3284,6 +3375,7 @@ static const struct phb_ops phb4_ops = {
> .get_diag_data2 = phb4_get_diag_data,
> .tce_kill = phb4_tce_kill,
> .set_capi_mode = phb4_set_capi_mode,
> + .set_p2p = phb4_set_p2p,
> };
>
> static void phb4_init_ioda3(struct phb4 *p)
> diff --git a/include/opal-api.h b/include/opal-api.h
> index 8ffdd4b1..58d727c5 100644
> --- a/include/opal-api.h
> +++ b/include/opal-api.h
> @@ -204,7 +204,8 @@
> #define OPAL_NPU_INIT_CONTEXT 146
> #define OPAL_NPU_DESTROY_CONTEXT 147
> #define OPAL_NPU_MAP_LPAR 148
> -#define OPAL_LAST 148
> +#define OPAL_PCI_SET_P2P 149
> +#define OPAL_LAST 149
>
> /* Device tree flags */
>
> @@ -1245,6 +1246,16 @@ enum {
> XIVE_DUMP_EMU_STATE = 5,
> };
>
> +/* PCI p2p descriptor */
> +#define OPAL_PCI_P2P_ENABLE 0x1
> +#define OPAL_PCI_P2P_LOAD 0x2
> +#define OPAL_PCI_P2P_STORE 0x4
> +
> +enum {
> + OPAL_PCI_P2P_INITIATOR = 0,
> + OPAL_PCI_P2P_TARGET = 1,
> +};
> +
> #endif /* __ASSEMBLY__ */
>
> #endif /* __OPAL_API_H */
> diff --git a/include/pci.h b/include/pci.h
> index f216594c..6373ab02 100644
> --- a/include/pci.h
> +++ b/include/pci.h
> @@ -323,6 +323,10 @@ struct phb_ops {
> uint64_t pe_number);
>
> int64_t (*set_capp_recovery)(struct phb *phb);
> +
> + /* PCI peer-to-peer setup */
> + int64_t (*set_p2p)(struct phb *phb, uint64_t mode, uint64_t flags,
> + uint16_t pe_number);
> };
>
> enum phb_type {
> diff --git a/include/phb4-regs.h b/include/phb4-regs.h
> index d22acf4d..cee50e6c 100644
> --- a/include/phb4-regs.h
> +++ b/include/phb4-regs.h
> @@ -334,6 +334,7 @@
> #define XPEC_NEST_STK_ERR_RPT1 0xb
> #define XPEC_NEST_STK_PBCQ_STAT 0xc
> #define XPEC_NEST_STK_PBCQ_MODE 0xd
> +#define XPEC_NEST_STK_PBCQ_MODE_P2P PPC_BIT(0)
> #define XPEC_NEST_STK_MMIO_BAR0 0xe
> #define XPEC_NEST_STK_MMIO_BAR0_MASK 0xf
> #define XPEC_NEST_STK_MMIO_BAR1 0x10
> diff --git a/include/phb4.h b/include/phb4.h
> index 360889a5..e6617a9d 100644
> --- a/include/phb4.h
> +++ b/include/phb4.h
> @@ -279,6 +279,7 @@ struct phb4 {
> uint64_t nfir_cache; /* Used by complete reset */
> bool err_pending;
> struct phb4_err err;
> + uint32_t p2p_target_count;
>
> /* Cache some RC registers that need to be emulated */
> uint32_t rc_cache[4];
More information about the Skiboot
mailing list