[PATCH 7/8] powerpc/perf/hv-24x7: Support v2 of the hypervisor API
Sukadev Bhattiprolu
sukadev at linux.vnet.ibm.com
Wed Jun 14 10:13:50 AEST 2017
Thiago Jung Bauermann [bauerman at linux.vnet.ibm.com] wrote:
> POWER9 introduces a new version of the hypervisor API to access the 24x7
> perf counters. The new version changed some of the structures used for
> requests and results.
>
> Signed-off-by: Thiago Jung Bauermann <bauerman at linux.vnet.ibm.com>
> ---
> arch/powerpc/perf/hv-24x7.c | 145 +++++++++++++++++++++++++++------
> arch/powerpc/perf/hv-24x7.h | 59 ++++++++++++--
> arch/powerpc/platforms/pseries/Kconfig | 2 +-
> 3 files changed, 173 insertions(+), 33 deletions(-)
>
> diff --git a/arch/powerpc/perf/hv-24x7.c b/arch/powerpc/perf/hv-24x7.c
> index 043cbc78be98..95c44f1d2fd2 100644
> --- a/arch/powerpc/perf/hv-24x7.c
> +++ b/arch/powerpc/perf/hv-24x7.c
> @@ -18,6 +18,7 @@
> #include <linux/slab.h>
> #include <linux/vmalloc.h>
>
> +#include <asm/cputhreads.h>
> #include <asm/firmware.h>
> #include <asm/hvcall.h>
> #include <asm/io.h>
> @@ -27,6 +28,9 @@
> #include "hv-24x7-catalog.h"
> #include "hv-common.h"
>
> +/* Version of the 24x7 hypervisor API that we should use in this machine. */
> +static int interface_version;
> +
> static bool domain_is_valid(unsigned domain)
> {
> switch (domain) {
> @@ -74,7 +78,11 @@ static const char *domain_name(unsigned domain)
>
> static bool catalog_entry_domain_is_valid(unsigned domain)
> {
> - return is_physical_domain(domain);
> + /* POWER8 doesn't support virtual domains. */
> + if (interface_version == 1)
> + return is_physical_domain(domain);
> + else
> + return domain_is_valid(domain);
> }
>
> /*
> @@ -166,9 +174,12 @@ DEFINE_PER_CPU(struct hv_24x7_hw, hv_24x7_hw);
> DEFINE_PER_CPU(char, hv_24x7_reqb[H24x7_DATA_BUFFER_SIZE]) __aligned(4096);
> DEFINE_PER_CPU(char, hv_24x7_resb[H24x7_DATA_BUFFER_SIZE]) __aligned(4096);
>
> -#define MAX_NUM_REQUESTS ((H24x7_DATA_BUFFER_SIZE - \
> +#define MAX_NUM_REQUESTS_V1 ((H24x7_DATA_BUFFER_SIZE - \
> + sizeof(struct hv_24x7_request_buffer)) \
> + / H24x7_REQUEST_SIZE_V1)
> +#define MAX_NUM_REQUESTS_V2 ((H24x7_DATA_BUFFER_SIZE - \
> sizeof(struct hv_24x7_request_buffer)) \
> - / sizeof(struct hv_24x7_request))
> + / H24x7_REQUEST_SIZE_V2)
Nit: Can we define MAX_NUM_REQUESTS(version) - with a version parameter ? It
will...
>
> static char *event_name(struct hv_24x7_event_data *ev, int *len)
> {
> @@ -1052,7 +1063,7 @@ static void init_24x7_request(struct hv_24x7_request_buffer *request_buffer,
> memset(request_buffer, 0, H24x7_DATA_BUFFER_SIZE);
> memset(result_buffer, 0, H24x7_DATA_BUFFER_SIZE);
>
> - request_buffer->interface_version = HV_24X7_IF_VERSION_CURRENT;
> + request_buffer->interface_version = interface_version;
> /* memset above set request_buffer->num_requests to 0 */
> }
>
> @@ -1077,7 +1088,7 @@ static int make_24x7_request(struct hv_24x7_request_buffer *request_buffer,
> if (ret) {
> struct hv_24x7_request *req;
>
> - req = &request_buffer->requests[0];
> + req = request_buffer->requests;
> pr_notice_ratelimited("hcall failed: [%d %#x %#x %d] => ret 0x%lx (%ld) detail=0x%x failing ix=%x\n",
> req->performance_domain, req->data_offset,
> req->starting_ix, req->starting_lpar_ix,
> @@ -1101,9 +1112,13 @@ static int add_event_to_24x7_request(struct perf_event *event,
> {
> u16 idx;
> int i;
> + size_t req_size;
> struct hv_24x7_request *req;
>
> - if (request_buffer->num_requests >= MAX_NUM_REQUESTS) {
> + if ((request_buffer->interface_version == 1
> + && request_buffer->num_requests >= MAX_NUM_REQUESTS_V1)
> + || (request_buffer->interface_version > 1
> + && request_buffer->num_requests >= MAX_NUM_REQUESTS_V2)) {
> pr_devel("Too many requests for 24x7 HCALL %d\n",
...simplify this check to
if (request->buffer->num_requests >= MAX_NUM_REQUESTS(version))
> request_buffer->num_requests);
> return -EINVAL;
> @@ -1120,8 +1135,11 @@ static int add_event_to_24x7_request(struct perf_event *event,
> idx = event_get_vcpu(event);
> }
>
> + req_size = request_buffer->interface_version == 1 ?
> + H24x7_REQUEST_SIZE_V1 : H24x7_REQUEST_SIZE_V2;
> +
Maybe similarly, with H24x7_REQUEST_SIZE(version) ?
> i = request_buffer->num_requests++;
> - req = &request_buffer->requests[i];
> + req = (void *) request_buffer->requests + i * req_size;
>
> req->performance_domain = event_get_domain(event);
> req->data_size = cpu_to_be16(8);
> @@ -1131,14 +1149,97 @@ static int add_event_to_24x7_request(struct perf_event *event,
> req->starting_ix = cpu_to_be16(idx);
> req->max_ix = cpu_to_be16(1);
>
> + if (request_buffer->interface_version > 1 &&
> + req->performance_domain != HV_PERF_DOMAIN_PHYS_CHIP) {
> + req->starting_thread_group_ix = idx % 2;
> + req->max_num_thread_groups = 1;
> + }
> +
> return 0;
> }
>
> +/**
> + * get_count_from_result - get event count from the given result
> + *
> + * @event: Event associated with @res.
> + * @resb: Result buffer containing @res.
> + * @res: Result to work on.
> + * @countp: Output variable containing the event count.
> + * @next: Optional output variable pointing to the next result in @resb.
> + */
> +static int get_count_from_result(struct perf_event *event,
> + struct hv_24x7_data_result_buffer *resb,
> + struct hv_24x7_result *res, u64 *countp,
> + struct hv_24x7_result **next)
> +{
> + u16 num_elements = be16_to_cpu(res->num_elements_returned);
> + u16 data_size = be16_to_cpu(res->result_element_data_size);
> + unsigned int data_offset;
> + void *element_data;
> + int ret = 0;
> +
> + /*
> + * We can bail out early if the result is empty.
> + */
> + if (!num_elements) {
> + pr_debug("Result of request %hhu is empty, nothing to do\n",
> + res->result_ix);
> +
> + if (next)
> + *next = (struct hv_24x7_result *) res->elements;
> +
> + return -ENODATA;
> + }
> +
> + /*
> + * This code assumes that a result has only one element.
> + */
> + if (num_elements != 1) {
> + pr_debug("Error: result of request %hhu has %hu elements\n",
> + res->result_ix, num_elements);
Could this happen due to an user request or would this indicate a bug
in the way we submitted the request (perf should submit separate request
for each lpar/index - we set ->max_ix and ->max_num_lpars to cpu_be16(1).
Minor inconsistency with proceeding, is that if the next element passes,
this return code is lost/over written. IOW, h_24x7_event_commit_txn()'s
return value depends on the last element we process, even if intermediate
ones encounter an error.
> +
> + if (!next)
> + return -ENOTSUPP;
> +
> + /*
> + * We still need to go through the motions so that we can return
> + * a pointer to the next result.
> + */
> + ret = -ENOTSUPP;
> + }
> +
> + if (data_size != sizeof(u64)) {
> + pr_debug("Error: result of request %hhu has data of %hu bytes\n",
> + res->result_ix, data_size);
> +
> + if (!next)
> + return -ENOTSUPP;
> +
> + ret = -ENOTSUPP;
> + }
> +
> + if (resb->interface_version == 1)
> + data_offset = offsetof(struct hv_24x7_result_element_v1,
> + element_data);
> + else
> + data_offset = offsetof(struct hv_24x7_result_element_v2,
> + element_data);
> +
> + element_data = res->elements + data_offset;
> +
> + if (!ret)
> + *countp = be64_to_cpu(*((u64 *) element_data));
> +
> + /* The next result is after the result element. */
> + if (next)
> + *next = element_data + data_size;
> +
> + return ret;
> +}
> +
> static int single_24x7_request(struct perf_event *event, u64 *count)
> {
> int ret;
> - u16 num_elements;
> - struct hv_24x7_result *result;
> struct hv_24x7_request_buffer *request_buffer;
> struct hv_24x7_data_result_buffer *result_buffer;
>
> @@ -1158,14 +1259,9 @@ static int single_24x7_request(struct perf_event *event, u64 *count)
> if (ret)
> goto out;
>
> - result = result_buffer->results;
> -
> - /* This code assumes that a result has only one element. */
> - num_elements = be16_to_cpu(result->num_elements_returned);
> - WARN_ON_ONCE(num_elements != 1);
> -
> /* process result from hcall */
> - *count = be64_to_cpu(result->elements[0].element_data[0]);
> + ret = get_count_from_result(event, result_buffer,
> + result_buffer->results, count, NULL);
>
> out:
> put_cpu_var(hv_24x7_reqb);
> @@ -1425,16 +1521,13 @@ static int h_24x7_event_commit_txn(struct pmu *pmu)
> for (i = 0, res = result_buffer->results;
> i < result_buffer->num_results; i++, res = next_res) {
> struct perf_event *event = h24x7hw->events[res->result_ix];
> - u16 num_elements = be16_to_cpu(res->num_elements_returned);
> - u16 data_size = be16_to_cpu(res->result_element_data_size);
>
> - /* This code assumes that a result has only one element. */
> - WARN_ON_ONCE(num_elements != 1);
> + ret = get_count_from_result(event, result_buffer, res, &count,
> + &next_res);
> + if (ret)
> + continue;
>
> - count = be64_to_cpu(res->elements[0].element_data[0]);
> update_event_count(event, count);
> -
> - next_res = (void *) res->elements[0].element_data + data_size;
> }
>
> put_cpu_var(hv_24x7_hw);
> @@ -1486,6 +1579,12 @@ static int hv_24x7_init(void)
> return -ENODEV;
> }
>
> + /* POWER8 only supports v1, while POWER9 only supports v2. */
> + if (cpu_has_feature(CPU_FTR_ARCH_300))
> + interface_version = 2;
> + else
> + interface_version = 1;
> +
> hret = hv_perf_caps_get(&caps);
> if (hret) {
> pr_debug("could not obtain capabilities, not enabling, rc=%ld\n",
> diff --git a/arch/powerpc/perf/hv-24x7.h b/arch/powerpc/perf/hv-24x7.h
> index b95909400b2a..149af6e9538f 100644
> --- a/arch/powerpc/perf/hv-24x7.h
> +++ b/arch/powerpc/perf/hv-24x7.h
> @@ -10,6 +10,9 @@ enum hv_perf_domains {
> HV_PERF_DOMAIN_MAX,
> };
>
> +#define H24x7_REQUEST_SIZE_V1 16
> +#define H24x7_REQUEST_SIZE_V2 32
> +
> struct hv_24x7_request {
> /* PHYSICAL domains require enabling via phyp/hmc. */
> __u8 performance_domain;
> @@ -42,19 +45,47 @@ struct hv_24x7_request {
> /* chip, core, or virtual processor based on @performance_domain */
> __be16 starting_ix;
> __be16 max_ix;
> +
> + /* The following fields were added in v2 of the 24x7 interface. */
> +
> + __u8 starting_thread_group_ix;
> +
> + /* -1 means all thread groups starting at @starting_thread_group_ix */
> + __u8 max_num_thread_groups;
> +
> + __u8 reserved2[0xE];
> } __packed;
>
> struct hv_24x7_request_buffer {
> /* 0 - ? */
> /* 1 - ? */
> -#define HV_24X7_IF_VERSION_CURRENT 0x01
> __u8 interface_version;
> __u8 num_requests;
> __u8 reserved[0xE];
> - struct hv_24x7_request requests[1];
> + struct hv_24x7_request requests[];
> +} __packed;
> +
> +struct hv_24x7_result_element_v1 {
> + __be16 lpar_ix;
> +
> + /*
> + * represents the core, chip, or virtual processor based on the
> + * request's @performance_domain
> + */
> + __be16 domain_ix;
> +
> + /* -1 if @performance_domain does not refer to a virtual processor */
> + __be32 lpar_cfg_instance_id;
> +
> + /* size = @result_element_data_size of containing result. */
> + __u64 element_data[];
> } __packed;
>
> -struct hv_24x7_result_element {
> +/*
> + * We need a separate struct for v2 because the offset of @element_data changed
> + * between versions.
> + */
> +struct hv_24x7_result_element_v2 {
> __be16 lpar_ix;
>
> /*
> @@ -66,8 +97,12 @@ struct hv_24x7_result_element {
> /* -1 if @performance_domain does not refer to a virtual processor */
> __be32 lpar_cfg_instance_id;
>
> + __u8 thread_group_ix;
> +
> + __u8 reserved[7];
> +
> /* size = @result_element_data_size of containing result. */
> - __u64 element_data[1];
> + __u64 element_data[];
> } __packed;
>
> struct hv_24x7_result {
> @@ -94,10 +129,16 @@ struct hv_24x7_result {
> __be16 result_element_data_size;
> __u8 reserved[0x2];
>
> - /* WARNING: only valid for first result element due to variable sizes
> - * of result elements */
> - /* struct hv_24x7_result_element[@num_elements_returned] */
> - struct hv_24x7_result_element elements[1];
> + /*
> + * Either
> + * struct hv_24x7_result_element_v1[@num_elements_returned]
> + * or
> + * struct hv_24x7_result_element_v2[@num_elements_returned]
> + *
> + * depending on the interface_version field of the
> + * struct hv_24x7_data_result_buffer containing this result.
> + */
> + char elements[];
> } __packed;
>
> struct hv_24x7_data_result_buffer {
> @@ -113,7 +154,7 @@ struct hv_24x7_data_result_buffer {
> __u8 reserved2[0x8];
> /* WARNING: only valid for the first result due to variable sizes of
> * results */
> - struct hv_24x7_result results[1]; /* [@num_results] */
> + struct hv_24x7_result results[]; /* [@num_results] */
> } __packed;
>
> #endif
> diff --git a/arch/powerpc/platforms/pseries/Kconfig b/arch/powerpc/platforms/pseries/Kconfig
> index 913c54e23eea..3a6dfd14f64b 100644
> --- a/arch/powerpc/platforms/pseries/Kconfig
> +++ b/arch/powerpc/platforms/pseries/Kconfig
> @@ -124,7 +124,7 @@ config HV_PERF_CTRS
> Enable access to hypervisor supplied counters in perf. Currently,
> this enables code that uses the hcall GetPerfCounterInfo and 24x7
> interfaces to retrieve counters. GPCI exists on Power 6 and later
> - systems. 24x7 is available on Power 8 systems.
> + systems. 24x7 is available on Power 8 and later systems.
>
> If unsure, select Y.
>
> --
> 2.7.4
More information about the Linuxppc-dev
mailing list