[Skiboot] [RFC PATCH 4/6] core: Add statistics framework

Oliver O'Halloran oohall at gmail.com
Wed Sep 23 11:56:27 AEST 2020


On Fri, Sep 18, 2020 at 2:35 AM Cédric Le Goater <clg at kaod.org> wrote:
>
> This is a set of simple routines to collect statistics on function
> calls. It can used to measure low level HW operations and track
> possible issues in locking, polling, etc.

I don't mind the idea, but I'm a bit iffy about implementing a whole
new infrastructure for it. We've already got the per-core trace buffer
thing which fills more or less the same role and we barely use it. If
we could build something like this on top of the tracing infra I think
it'd be better overall since the trace buffer provides timestamping
and allows us to supply additional context along with the raw
durations, etc.

I think the main reason why we haven't used the trace stuff much is
because of how cumbersome it is to use. You need to define an ABI
structure then write a bunch of code in OPAL to pack the data into
that structure and a bunch more code in the userspace tool to
de-serialise and render to text in the userspace tool. Maybe we can
use your OPAL_DEBUG_READ call just use native structures and have
skiboot render them to text on-demand.

> Signed-off-by: Cédric Le Goater <clg at kaod.org>
> ---
>  include/stat.h    | 49 ++++++++++++++++++++++++
>  core/stat.c       | 98 +++++++++++++++++++++++++++++++++++++++++++++++
>  core/Makefile.inc |  2 +-
>  3 files changed, 148 insertions(+), 1 deletion(-)
>  create mode 100644 include/stat.h
>  create mode 100644 core/stat.c
>
> diff --git a/include/stat.h b/include/stat.h
> new file mode 100644
> index 000000000000..dc020862b199
> --- /dev/null
> +++ b/include/stat.h
> @@ -0,0 +1,49 @@
> +// SPDX-License-Identifier: Apache-2.0
> +/*
> + * Statistics
> + *
> + * Copyright 2020 IBM Corp.
> + */
> +
> +#ifndef __STAT_H
> +#define __STAT_H
> +
> +#include <lock.h>
> +
> +#define STAT_NR_RANGES         10
> +#define STAT_RANGE_WIDTH       200 /* values */

> +#define STAT_MAX_CALL_TIME     10  /* usecs */
unused

> +
> +struct stat_range {
> +       uint64_t count;
> +       uint64_t sum;
> +       uint64_t min;
> +       uint64_t max;
> +};

The names here suggest we want to collect arbitrary stats, but as far
as I can tell these are all used to store durations? Is that
intentional?

> +struct stat {
> +       const char *name;
> +       uint64_t max_call_time;
> +       uint64_t nr_ranges;
> +       uint64_t count;
> +       struct stat_range all;
> +       struct stat_range ranges[STAT_NR_RANGES];
> +
> +       struct lock lock;
> +};

It might be a good idea to drop the lock and have the caller be
responsible for ensuring mutual exclusion. Anything that we're
interested in collecting statistics about is probably going to have a
per-structure lock anyway (e.g. XIVEs, PHBs)

> +extern void stat_init(struct stat *s, const char *name, uint64_t max_call_time);
> +extern void stat_update(struct stat *s, uint64_t call_time);
> +extern int stat_printf(struct stat *s, void *buf, int size);
> +
> +#define stat_call(expr, s)                                             \
> +({                                                                     \
> +       uint64_t before = mftb();                                       \
> +       int64_t __rc = (expr);                                          \
> +       uint64_t call_time = tb_to_usecs(mftb() - before);              \
> +       stat_update(s, call_time);                                      \
> +       __rc;                                                           \
> +})
> +
> +#endif /* __STAT_H */
> diff --git a/core/stat.c b/core/stat.c
> new file mode 100644
> index 000000000000..6d1ddf2f1a80
> --- /dev/null
> +++ b/core/stat.c
> @@ -0,0 +1,98 @@
> +// SPDX-License-Identifier: Apache-2.0
> +/*
> + * Statistics
> + *
> + * Copyright 2020 IBM Corp.
> + */
> +
> +#define pr_fmt(fmt) "STAT: " fmt
> +
> +#include <cpu.h>
> +#include <opal.h>
> +#include <stat.h>
> +#include <stack.h>
> +
> +static void stat_range_reset(struct stat_range *r)
> +{
> +       r->count = 0;
> +       r->sum = 0;
> +       r->max = 0;
> +       r->min = 0xFFFFFFFFF;
> +}
> +
> +void stat_init(struct stat *s, const char *name, uint64_t max_call_time)
> +{
> +       int i;
> +
> +       s->name = name;
> +       s->max_call_time = max_call_time;
> +       s->nr_ranges = ARRAY_SIZE(s->ranges);
> +       for (i = 0; i < s->nr_ranges; i++)
> +               stat_range_reset(&s->ranges[i]);
> +       stat_range_reset(&s->all);
> +       init_lock(&s->lock);
> +}
> +
> +static void stat_range_update(struct stat_range *r, uint64_t call_time)
> +{
> +       r->sum += call_time;
> +       if (call_time > r->max)
> +               r->max = call_time;
> +       if (call_time < r->min)
> +               r->min = call_time;
> +       r->count++;
> +}
> +
> +#define stat_range_index(s) \
> +       ((((s)->count) / STAT_RANGE_WIDTH) % (s)->nr_ranges)
> +
> +void stat_update(struct stat *s, uint64_t call_time)
> +{
> +       int ridx;
> +       struct stat_range *r;
> +
> +       lock(&s->lock);
> +
> +       ridx = stat_range_index(s);
> +       r = &s->ranges[ridx];
> +
> +       if (!(r->count % STAT_RANGE_WIDTH))
> +               stat_range_reset(r);
> +
> +       stat_range_update(r, call_time);
> +       if (s->max_call_time && call_time > s->max_call_time &&
> +           call_time > s->all.max) {
> +               /* TODO: simulators need more time */
> +               prlog(PR_WARNING, "CPU 0x%04x new %s maximum %lld at #%lld\n",
> +                     this_cpu()->pir, s->name, r->max, s->count);
> +               backtrace();
> +       }
> +       stat_range_update(&s->all, call_time);
> +       s->count++;
> +
> +       unlock(&s->lock);
> +}
> +


More information about the Skiboot mailing list