[RFC Part1 PATCH v3 13/17] x86/io: Unroll string I/O when SEV is active
Brijesh Singh
brijesh.singh at amd.com
Thu Jul 27 06:07:14 AEST 2017
On 07/26/2017 02:26 PM, H. Peter Anvin wrote:
>>>>>
>> \
>>>>> static inline void outs##bwl(int port, const void *addr, unsigned
>> long count) \
>>>>> {
>>>
>>> This will clash with a fix I did to add a "memory" clobber
>>> for the traditional implementation, see
>>> https://patchwork.kernel.org/patch/9854573/
>>>
>>>> Is it even worth leaving these as inline functions?
>>>> Given the speed of IO cycles it is unlikely that the cost of calling
>> a real
>>>> function will be significant.
>>>> The code bloat reduction will be significant.
>>>
>>> I think the smallest code would be the original "rep insb" etc, which
>>> should be smaller than a function call, unlike the loop. Then again,
>>> there is a rather small number of affected device drivers, almost all
>>> of them for ancient hardware that you won't even build in a 64-bit
>>> x86 kernel, see the list below. The only user I found that is
>> actually
>>> still relevant is drivers/tty/hvc/hvc_xen.c, which uses it for the
>> early
>>> console.
>>
>>
>> There are some indirect user of string I/O functions. The following
>> functions
>> defined in lib/iomap.c calls rep version of ins and outs.
>>
>> - ioread8_rep, ioread16_rep, ioread32_rep
>> - iowrite8_rep, iowrite16_rep, iowrite32_rep
>>
>> I found that several drivers use above functions.
>>
>> Here is one approach to convert it into non-inline functions. In this
>> approach,
>> I have added a new file arch/x86/kernel/io.c which provides non rep
>> version of
>> string I/O routines. The file gets built and used only when
>> AMD_MEM_ENCRYPT is
>> enabled. On positive side, if we don't build kernel with
>> AMD_MEM_ENCRYPT support
>> then we use inline routines, when AMD_MEM_ENCRYPT is built then we make
>> a function
>> call. Inside the function we unroll only when SEV is active.
>>
>> Do you see any issue with this approach ? thanks
>>
>> diff --git a/arch/x86/include/asm/io.h b/arch/x86/include/asm/io.h
>> index e080a39..104927d 100644
>> --- a/arch/x86/include/asm/io.h
>> +++ b/arch/x86/include/asm/io.h
>> @@ -323,8 +323,9 @@ static inline unsigned type in##bwl##_p(int port)
>> \
>> unsigned type value = in##bwl(port); \
>> slow_down_io(); \
>> return value; \
>> -}
>> \
>> -
>> \
>> +}
>> +
>> +#define BUILDIO_REP(bwl, bw, type)
>> \
>> static inline void outs##bwl(int port, const void *addr, unsigned long
>> count) \
>> {
>> \
>> asm volatile("rep; outs" #bwl \
>> @@ -335,12 +336,31 @@ static inline void ins##bwl(int port, void *addr,
>> unsigned long count) \
>> {
>> \
>> asm volatile("rep; ins" #bwl \
>> : "+D"(addr), "+c"(count) : "d"(port)); \
>> -}
>> +}
>> \
>>
>> BUILDIO(b, b, char)
>> BUILDIO(w, w, short)
>> BUILDIO(l, , int)
>>
>> +#ifdef CONFIG_AMD_MEM_ENCRYPT
>> +extern void outsb_try_rep(int port, const void *addr, unsigned long
>> count);
>> +extern void insb_try_rep(int port, void *addr, unsigned long count);
>> +extern void outsw_try_rep(int port, const void *addr, unsigned long
>> count);
>> +extern void insw_try_rep(int port, void *addr, unsigned long count);
>> +extern void outsl_try_rep(int port, const void *addr, unsigned long
>> count);
>> +extern void insl_try_rep(int port, void *addr, unsigned long count);
>> +#define outsb outsb_try_rep
>> +#define insb insb_try_rep
>> +#define outsw outsw_try_rep
>> +#define insw insw_try_rep
>> +#define outsl outsl_try_rep
>> +#define insl insl_try_rep
>> +#else
>> +BUILDIO_REP(b, b, char)
>> +BUILDIO_REP(w, w, short)
>> +BUILDIO_REP(l, , int)
>> +#endif
>> +
>> extern void *xlate_dev_mem_ptr(phys_addr_t phys);
>> extern void unxlate_dev_mem_ptr(phys_addr_t phys, void *addr);
>>
>> diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
>> index a01892b..3b6e2a3 100644
>> --- a/arch/x86/kernel/Makefile
>> +++ b/arch/x86/kernel/Makefile
>> @@ -42,6 +42,7 @@ CFLAGS_irq.o := -I$(src)/../include/asm/trace
>>
>> obj-y := process_$(BITS).o signal.o
>> obj-$(CONFIG_COMPAT) += signal_compat.o
>> +obj-$(CONFIG_AMD_MEM_ENCRYPT) += io.o
>> obj-y += traps.o irq.o irq_$(BITS).o
>> dumpstack_$(BITS).o
>> obj-y += time.o ioport.o dumpstack.o nmi.o
>> obj-$(CONFIG_MODIFY_LDT_SYSCALL) += ldt.o
>> diff --git a/arch/x86/kernel/io.c b/arch/x86/kernel/io.c
>> new file mode 100644
>> index 0000000..f58afa9
>> --- /dev/null
>> +++ b/arch/x86/kernel/io.c
>> @@ -0,0 +1,87 @@
>> +#include <linux/types.h>
>> +#include <linux/io.h>
>> +#include <asm/io.h>
>> +
>> +void outsb_try_rep(int port, const void *addr, unsigned long count)
>> +{
>> + if (sev_active()) {
>> + unsigned char *value = (unsigned char *)addr;
>> + while (count) {
>> + outb(*value, port);
>> + value++;
>> + count--;
>> + }
>> + } else {
>> + asm volatile("rep; outsb" : "+S"(addr), "+c"(count) :
>> "d"(port));
>> + }
>> +}
>> +
>> +void insb_try_rep(int port, void *addr, unsigned long count)
>> +{
>> + if (sev_active()) {
>> + unsigned char *value = (unsigned char *)addr;
>> + while (count) {
>> + *value = inb(port);
>> + value++;
>> + count--;
>> + }
>> + } else {
>> + asm volatile("rep; insb" : "+D"(addr), "+c"(count) :
>> "d"(port));
>> + }
>> +}
>> +
>> +void outsw_try_rep(int port, const void *addr, unsigned long count)
>> +{
>> + if (sev_active()) {
>> + unsigned short *value = (unsigned short *)addr;
>> + while (count) {
>> + outw(*value, port);
>> + value++;
>> + count--;
>> + }
>> + } else {
>> + asm volatile("rep; outsw" : "+S"(addr), "+c"(count) :
>> "d"(port));
>> + }
>> +}
>> +void insw_try_rep(int port, void *addr, unsigned long count)
>> +{
>> + if (sev_active()) {
>> + unsigned short *value = (unsigned short *)addr;
>> + while (count) {
>> + *value = inw(port);
>> + value++;
>> + count--;
>> + }
>> + } else {
>> + asm volatile("rep; insw" : "+D"(addr), "+c"(count) :
>> "d"(port));
>> + }
>> +}
>> +
>> +void outsl_try_rep(int port, const void *addr, unsigned long count)
>> +{
>> + if (sev_active()) {
>> + unsigned int *value = (unsigned int *)addr;
>> + while (count) {
>> + outl(*value, port);
>> + value++;
>> + count--;
>> + }
>> + } else {
>> + asm volatile("rep; outsl" : "+S"(addr), "+c"(count) :
>> "d"(port));
>> + }
>> +}
>> +
>> +void insl_try_rep(int port, void *addr, unsigned long count)
>> +{
>> + if (sev_active()) {
>> + unsigned int *value = (unsigned int *)addr;
>> + while (count) {
>> + *value = inl(port);
>> + value++;
>> + count--;
>> + }
>> + } else {
>> + asm volatile("rep; insl" : "+D"(addr), "+c"(count) :
>> "d"(port));
>> + }
>> +}
>
> What the heck?
>
I am not sure if I understand your concern.
Are you commenting on amount of code duplication ? If so, I can certainly improve
and use the similar macro used into header file to generate the functions body.
Are you commenting that we should not attempt to make those functions non-inline
and you prefer them to stay inline ?
thanks
More information about the Linuxppc-dev
mailing list