[PATCH v2] powerpc/math-emu: Update macros from gmp-6.1.2
Nick Desaulniers
ndesaulniers at google.com
Tue Nov 20 05:53:48 AEDT 2018
On Tue, Nov 13, 2018 at 6:44 PM Joel Stanley <joel at jms.id.au> wrote:
>
> The add_ssaaaa, sub_ddmmss, umul_ppmm and udiv_qrnnd macros originate
> from GMP's longlong.h.
> This was found when compiling with clang:
>
> arch/powerpc/math-emu/fnmsub.c:46:2: error: invalid use of a cast in a
> inline asm context requiring an l-value: remove the cast or build with
> -fheinous-gnu-extensions
> FP_ADD_D(R, T, B);
> ^~~~~~~~~~~~~~~~~
> ...
>
> ./arch/powerpc/include/asm/sfp-machine.h:283:27: note: expanded from
> macro 'sub_ddmmss'
> : "=r" ((USItype)(sh)), \
> ~~~~~~~~~~^~~
>
> Segher points out: this was fixed in GCC over 16 years ago
> ( https://gcc.gnu.org/r56600 ), and in GMP (where it comes from)
> presumably before that.
>
> Update to the latest version in order to git rid of the invalid casts.
Hi Joel, thanks for this patch. It's a much better fix IMO than v1.
One nit below.
>
> The only functional change I noticed was this in udiv_qrnnd.
>
> __r1 = (n1) % __d1;
> __q1 = (n1) / __d1;
>
> Becomes this:
>
> __q1 = (n1) / __d1;
> __r1 = (n1) - __q1 * __d1;
>
> This is equivalent as it instead of calculating the remainder
> using modulo, it uses the result of integer division to subtract the
> count of 'whole' d1 from r1.
I don't understand this; why was this functional change made?
>
> Link: https://github.com/ClangBuiltLinux/linux/issues/260
> Signed-off-by: Joel Stanley <joel at jms.id.au>
> ---
> v1:
> https://lore.kernel.org/linuxppc-dev/20181102033713.31916-1-joel@jms.id.au/
> v2:
> Instead of setting the -fheinous-gnu-extensions for clang, fix the code.
>
> arch/powerpc/include/asm/sfp-machine.h | 99 +++++++++-----------------
> 1 file changed, 32 insertions(+), 67 deletions(-)
>
> diff --git a/arch/powerpc/include/asm/sfp-machine.h b/arch/powerpc/include/asm/sfp-machine.h
> index d89beaba26ff..a6353d9dd5ba 100644
> --- a/arch/powerpc/include/asm/sfp-machine.h
> +++ b/arch/powerpc/include/asm/sfp-machine.h
> @@ -213,30 +213,18 @@
> * respectively. The result is placed in HIGH_SUM and LOW_SUM. Overflow
> * (i.e. carry out) is not stored anywhere, and is lost.
> */
> -#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
> +#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
> do { \
> if (__builtin_constant_p (bh) && (bh) == 0) \
> - __asm__ ("{a%I4|add%I4c} %1,%3,%4\n\t{aze|addze} %0,%2" \
> - : "=r" ((USItype)(sh)), \
> - "=&r" ((USItype)(sl)) \
> - : "%r" ((USItype)(ah)), \
> - "%r" ((USItype)(al)), \
> - "rI" ((USItype)(bl))); \
> - else if (__builtin_constant_p (bh) && (bh) ==~(USItype) 0) \
> - __asm__ ("{a%I4|add%I4c} %1,%3,%4\n\t{ame|addme} %0,%2" \
> - : "=r" ((USItype)(sh)), \
> - "=&r" ((USItype)(sl)) \
> - : "%r" ((USItype)(ah)), \
> - "%r" ((USItype)(al)), \
> - "rI" ((USItype)(bl))); \
> + __asm__ ("add%I4c %1,%3,%4\n\taddze %0,%2" \
> + : "=r" (sh), "=&r" (sl) : "r" (ah), "%r" (al), "rI" (bl)); \
> + else if (__builtin_constant_p (bh) && (bh) == ~(USItype) 0) \
> + __asm__ ("add%I4c %1,%3,%4\n\taddme %0,%2" \
> + : "=r" (sh), "=&r" (sl) : "r" (ah), "%r" (al), "rI" (bl)); \
> else \
> - __asm__ ("{a%I5|add%I5c} %1,%4,%5\n\t{ae|adde} %0,%2,%3" \
> - : "=r" ((USItype)(sh)), \
> - "=&r" ((USItype)(sl)) \
> - : "%r" ((USItype)(ah)), \
> - "r" ((USItype)(bh)), \
> - "%r" ((USItype)(al)), \
> - "rI" ((USItype)(bl))); \
> + __asm__ ("add%I5c %1,%4,%5\n\tadde %0,%2,%3" \
> + : "=r" (sh), "=&r" (sl) \
> + : "r" (ah), "r" (bh), "%r" (al), "rI" (bl)); \
> } while (0)
>
> /* sub_ddmmss is used in op-2.h and udivmodti4.c and should be equivalent to
> @@ -248,44 +236,24 @@
> * and LOW_DIFFERENCE. Overflow (i.e. carry out) is not stored anywhere,
> * and is lost.
> */
> -#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
> +#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
> do { \
> if (__builtin_constant_p (ah) && (ah) == 0) \
> - __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{sfze|subfze} %0,%2" \
> - : "=r" ((USItype)(sh)), \
> - "=&r" ((USItype)(sl)) \
> - : "r" ((USItype)(bh)), \
> - "rI" ((USItype)(al)), \
> - "r" ((USItype)(bl))); \
> - else if (__builtin_constant_p (ah) && (ah) ==~(USItype) 0) \
> - __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{sfme|subfme} %0,%2" \
> - : "=r" ((USItype)(sh)), \
> - "=&r" ((USItype)(sl)) \
> - : "r" ((USItype)(bh)), \
> - "rI" ((USItype)(al)), \
> - "r" ((USItype)(bl))); \
> + __asm__ ("subf%I3c %1,%4,%3\n\tsubfze %0,%2" \
> + : "=r" (sh), "=&r" (sl) : "r" (bh), "rI" (al), "r" (bl));\
> + else if (__builtin_constant_p (ah) && (ah) == ~(USItype) 0) \
> + __asm__ ("subf%I3c %1,%4,%3\n\tsubfme %0,%2" \
> + : "=r" (sh), "=&r" (sl) : "r" (bh), "rI" (al), "r" (bl));\
> else if (__builtin_constant_p (bh) && (bh) == 0) \
> - __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{ame|addme} %0,%2" \
> - : "=r" ((USItype)(sh)), \
> - "=&r" ((USItype)(sl)) \
> - : "r" ((USItype)(ah)), \
> - "rI" ((USItype)(al)), \
> - "r" ((USItype)(bl))); \
> - else if (__builtin_constant_p (bh) && (bh) ==~(USItype) 0) \
> - __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{aze|addze} %0,%2" \
> - : "=r" ((USItype)(sh)), \
> - "=&r" ((USItype)(sl)) \
> - : "r" ((USItype)(ah)), \
> - "rI" ((USItype)(al)), \
> - "r" ((USItype)(bl))); \
> + __asm__ ("subf%I3c %1,%4,%3\n\taddme %0,%2" \
> + : "=r" (sh), "=&r" (sl) : "r" (ah), "rI" (al), "r" (bl));\
> + else if (__builtin_constant_p (bh) && (bh) == ~(USItype) 0) \
> + __asm__ ("subf%I3c %1,%4,%3\n\taddze %0,%2" \
> + : "=r" (sh), "=&r" (sl) : "r" (ah), "rI" (al), "r" (bl));\
> else \
> - __asm__ ("{sf%I4|subf%I4c} %1,%5,%4\n\t{sfe|subfe} %0,%3,%2" \
> - : "=r" ((USItype)(sh)), \
> - "=&r" ((USItype)(sl)) \
> - : "r" ((USItype)(ah)), \
> - "r" ((USItype)(bh)), \
> - "rI" ((USItype)(al)), \
> - "r" ((USItype)(bl))); \
> + __asm__ ("subf%I4c %1,%5,%4\n\tsubfe %0,%3,%2" \
> + : "=r" (sh), "=&r" (sl) \
> + : "r" (ah), "r" (bh), "rI" (al), "r" (bl)); \
> } while (0)
>
> /* asm fragments for mul and div */
> @@ -294,13 +262,10 @@
> * UWtype integers MULTIPLER and MULTIPLICAND, and generates a two UWtype
> * word product in HIGH_PROD and LOW_PROD.
> */
> -#define umul_ppmm(ph, pl, m0, m1) \
> +#define umul_ppmm(ph, pl, m0, m1) \
> do { \
> USItype __m0 = (m0), __m1 = (m1); \
> - __asm__ ("mulhwu %0,%1,%2" \
> - : "=r" ((USItype)(ph)) \
> - : "%r" (__m0), \
> - "r" (__m1)); \
> + __asm__ ("mulhwu %0,%1,%2" : "=r" (ph) : "%r" (m0), "r" (m1)); \
> (pl) = __m0 * __m1; \
> } while (0)
>
> @@ -312,28 +277,28 @@
> * significant bit of DENOMINATOR must be 1, then the pre-processor symbol
> * UDIV_NEEDS_NORMALIZATION is defined to 1.
> */
> -#define udiv_qrnnd(q, r, n1, n0, d) \
> +#define udiv_qrnnd(q, r, n1, n0, d) \
> do { \
> UWtype __d1, __d0, __q1, __q0, __r1, __r0, __m; \
> __d1 = __ll_highpart (d); \
> __d0 = __ll_lowpart (d); \
> \
> - __r1 = (n1) % __d1; \
> __q1 = (n1) / __d1; \
> - __m = (UWtype) __q1 * __d0; \
> + __r1 = (n1) - __q1 * __d1; \
> + __m = __q1 * __d0; \
> __r1 = __r1 * __ll_B | __ll_highpart (n0); \
> if (__r1 < __m) \
> { \
> __q1--, __r1 += (d); \
> - if (__r1 >= (d)) /* we didn't get carry when adding to __r1 */ \
> + if (__r1 >= (d)) /* i.e. we didn't get carry when adding to __r1 */\
> if (__r1 < __m) \
> __q1--, __r1 += (d); \
> } \
> __r1 -= __m; \
> \
> - __r0 = __r1 % __d1; \
> __q0 = __r1 / __d1; \
> - __m = (UWtype) __q0 * __d0; \
> + __r0 = __r1 - __q0 * __d1; \
> + __m = __q0 * __d0; \
> __r0 = __r0 * __ll_B | __ll_lowpart (n0); \
> if (__r0 < __m) \
> { \
> @@ -344,7 +309,7 @@
> } \
> __r0 -= __m; \
> \
> - (q) = (UWtype) __q1 * __ll_B | __q0; \
> + (q) = __q1 * __ll_B | __q0; \
> (r) = __r0; \
> } while (0)
This appears to now differ from the upstream source:
https://github.com/gcc-mirror/gcc/blob/f7289f563a5e447ac21a5901ed75aad0dbd37732/include/longlong.h#L1679
If we're going to borrow implementations from GCC, let's borrow the
same implementation. Otherwise it's hard to have confidence in this
part of the patch.
--
Thanks,
~Nick Desaulniers
More information about the Linuxppc-dev
mailing list