[PATCH v3] powerpc/vdso64: Add support for CLOCK_{REALTIME/MONOTONIC}_COARSE
Benjamin Herrenschmidt
benh at kernel.crashing.org
Tue Jul 25 20:07:28 AEST 2017
On Tue, 2017-07-25 at 12:26 +0530, Santosh Sivaraj wrote:
> +static notrace void kernel_get_tspec(struct timespec *tp,
> + struct vdso_data *vdata, u32 *wtom_sec,
> + u32 *wtom_nsec)
> +{
> + u64 tb;
> + u32 update_count;
This is broken:
> + do {
> + /* check for update count & load values */
> + update_count = vdata->tb_update_count;
> +
> + /* Get TB, offset it and scale result */
> + tb = mulhdu((get_tb() - vdata->tb_orig_stamp) << 12,
> + vdata->tb_to_xs) + vdata->stamp_sec_fraction;
> + tp->tv_sec = vdata->stamp_xtime.tv_sec;
> + if (wtom_sec)
> + *wtom_sec = vdata->wtom_clock_sec;
> + if (wtom_nsec)
> + *wtom_nsec = vdata->wtom_clock_nsec;
> + } while (update_count != vdata->tb_update_count);
The assembly code is carefuly crafted to create a chain of data
dependencies in order to enforce the ordering in this function,
you completely broke it.
IE. the pointer used to access tb_orig_stamp etc... depends on the
initial update count, and the final read of the update count depends
on all the previously read values (or should), thus ordering those
loads. Withtout that you'll need more expensive lwsync's.
Additionally, you broke another semantic of the seqlock which is
to spin on the first update count if it has an odd value.
The same problem exist in all your other implementations.
I am really NOT a fan of that attempt at converting to C. The code is
hand crafted assembly for a number of reasons, including the above ones
and maximum performance.
As it is, it's deeply broken.
> +
> + tp->tv_nsec = ((u64)mulhwu(tb, NSEC_PER_SEC) << 32) >> 32;
> + tp->tv_sec += (tb >> 32);
> +}
> +
> +static notrace int clock_get_realtime(struct timespec *tp,
> + struct vdso_data *vdata)
> +{
> + kernel_get_tspec(tp, vdata, NULL, NULL);
> +
> + return 0;
> +}
> +
> +static notrace int clock_get_monotonic(struct timespec *tp,
> + struct vdso_data *vdata)
> +{
> + __s32 wtom_sec, wtom_nsec;
> + u64 nsec;
> +
> + kernel_get_tspec(tp, vdata, &wtom_sec, &wtom_nsec);
> +
> + tp->tv_sec += wtom_sec;
> +
> + nsec = tp->tv_nsec;
> + tp->tv_nsec = 0;
> + timespec_add_ns(tp, nsec + wtom_nsec);
> +
> + return 0;
> +}
> +
> +static notrace int clock_realtime_coarse(struct timespec *tp,
> + struct vdso_data *vdata)
> +{
> + u32 update_count;
> +
> + do {
> + /* check for update count & load values */
> + update_count = vdata->tb_update_count;
> +
> + tp->tv_sec = vdata->stamp_xtime.tv_sec;
> + tp->tv_nsec = vdata->stamp_xtime.tv_nsec;
> + } while (update_count != vdata->tb_update_count);
> +
> + return 0;
> +}
> +
> +static notrace int clock_monotonic_coarse(struct timespec *tp,
> + struct vdso_data *vdata)
> +{
> + __s32 wtom_sec, wtom_nsec;
> + u64 nsec;
> + u32 update_count;
> +
> + do {
> + /* check for update count & load values */
> + update_count = vdata->tb_update_count;
> +
> + tp->tv_sec = vdata->stamp_xtime.tv_sec;
> + tp->tv_nsec = vdata->stamp_xtime.tv_nsec;
> + wtom_sec = vdata->wtom_clock_sec;
> + wtom_nsec = vdata->wtom_clock_nsec;
> + } while (update_count != vdata->tb_update_count);
> +
> + tp->tv_sec += wtom_sec;
> + nsec = tp->tv_nsec;
> + tp->tv_nsec = 0;
> + timespec_add_ns(tp, nsec + wtom_nsec);
> +
> + return 0;
> +}
> +
> +notrace int kernel_clock_gettime(clockid_t clk_id, struct timespec *tp)
> +{
> + int ret;
> + struct vdso_data *vdata = __get_datapage();
> +
> + if (!tp || !vdata)
> + return -EBADR;
> +
> + switch (clk_id) {
> + case CLOCK_REALTIME:
> + ret = clock_get_realtime(tp, vdata);
> + break;
> + case CLOCK_MONOTONIC:
> + ret = clock_get_monotonic(tp, vdata);
> + break;
> + case CLOCK_REALTIME_COARSE:
> + ret = clock_realtime_coarse(tp, vdata);
> + break;
> + case CLOCK_MONOTONIC_COARSE:
> + ret = clock_monotonic_coarse(tp, vdata);
> + break;
> + default:
> + /* fallback to syscall */
> + ret = -1;
> + break;
> + }
> +
> + return ret;
> +}
> diff --git a/arch/powerpc/kernel/vdso64/gettimeofday.S b/arch/powerpc/kernel/vdso64/gettimeofday.S
> index 3820213..c3f6b24 100644
> --- a/arch/powerpc/kernel/vdso64/gettimeofday.S
> +++ b/arch/powerpc/kernel/vdso64/gettimeofday.S
> @@ -16,6 +16,8 @@
> #include <asm/asm-offsets.h>
> #include <asm/unistd.h>
>
> +.global kernel_clock_gettime
> +
> .text
> /*
> * Exact prototype of gettimeofday
> @@ -60,71 +62,23 @@ V_FUNCTION_END(__kernel_gettimeofday)
> */
> V_FUNCTION_BEGIN(__kernel_clock_gettime)
> .cfi_startproc
> - /* Check for supported clock IDs */
> - cmpwi cr0,r3,CLOCK_REALTIME
> - cmpwi cr1,r3,CLOCK_MONOTONIC
> - cror cr0*4+eq,cr0*4+eq,cr1*4+eq
> - bne cr0,99f
> -
> - mflr r12 /* r12 saves lr */
> - .cfi_register lr,r12
> - mr r11,r4 /* r11 saves tp */
> - bl V_LOCAL_FUNC(__get_datapage) /* get data page */
> - lis r7,NSEC_PER_SEC at h /* want nanoseconds */
> - ori r7,r7,NSEC_PER_SEC at l
> -50: bl V_LOCAL_FUNC(__do_get_tspec) /* get time from tb & kernel */
> - bne cr1,80f /* if not monotonic, all done */
> -
> - /*
> - * CLOCK_MONOTONIC
> - */
> -
> - /* now we must fixup using wall to monotonic. We need to snapshot
> - * that value and do the counter trick again. Fortunately, we still
> - * have the counter value in r8 that was returned by __do_get_tspec.
> - * At this point, r4,r5 contain our sec/nsec values.
> - */
> -
> - lwa r6,WTOM_CLOCK_SEC(r3)
> - lwa r9,WTOM_CLOCK_NSEC(r3)
> -
> - /* We now have our result in r6,r9. We create a fake dependency
> - * on that result and re-check the counter
> - */
> - or r0,r6,r9
> - xor r0,r0,r0
> - add r3,r3,r0
> - ld r0,CFG_TB_UPDATE_COUNT(r3)
> - cmpld cr0,r0,r8 /* check if updated */
> - bne- 50b
> -
> - /* Add wall->monotonic offset and check for overflow or underflow.
> - */
> - add r4,r4,r6
> - add r5,r5,r9
> - cmpd cr0,r5,r7
> - cmpdi cr1,r5,0
> - blt 1f
> - subf r5,r7,r5
> - addi r4,r4,1
> -1: bge cr1,80f
> - addi r4,r4,-1
> - add r5,r5,r7
> -
> -80: std r4,TSPC64_TV_SEC(r11)
> - std r5,TSPC64_TV_NSEC(r11)
> -
> - mtlr r12
> + mflr r6 /* r12 saves lr */
> + stwu r1,-112(r1)
> + .cfi_register lr,r6
> + std r6,24(r1)
> + std r3,32(r1)
> + std r4,40(r1)
> crclr cr0*4+so
> - li r3,0
> - blr
> -
> - /*
> - * syscall fallback
> - */
> -99:
> + bl V_LOCAL_FUNC(kernel_clock_gettime)
> + cmpwi r3,0
> + beq 77f
> li r0,__NR_clock_gettime
> + ld r3,32(r1)
> + ld r4,40(r1)
> sc
> +77: ld r6,24(r1)
> + addi r1,r1,112
> + mtlr r6
> blr
> .cfi_endproc
> V_FUNCTION_END(__kernel_clock_gettime)
More information about the Linuxppc-dev
mailing list