[RFC Part1 PATCH v3 13/17] x86/io: Unroll string I/O when SEV is active

Brijesh Singh brijesh.singh at amd.com
Thu Jul 27 05:24:45 AEST 2017


Hi Arnd and David,

On 07/26/2017 05:45 AM, Arnd Bergmann wrote:
> On Tue, Jul 25, 2017 at 11:51 AM, David Laight <David.Laight at aculab.com> wrote:
>> From: Brijesh Singh
>>> Sent: 24 July 2017 20:08
>>> From: Tom Lendacky <thomas.lendacky at amd.com>
>>>
>>> Secure Encrypted Virtualization (SEV) does not support string I/O, so
>>> unroll the string I/O operation into a loop operating on one element at
>>> a time.
>>>
>>> Signed-off-by: Tom Lendacky <thomas.lendacky at amd.com>
>>> Signed-off-by: Brijesh Singh <brijesh.singh at amd.com>
>>> ---
>>>   arch/x86/include/asm/io.h | 26 ++++++++++++++++++++++----
>>>   1 file changed, 22 insertions(+), 4 deletions(-)
>>>
>>> diff --git a/arch/x86/include/asm/io.h b/arch/x86/include/asm/io.h
>>> index e080a39..2f3c002 100644
>>> --- a/arch/x86/include/asm/io.h
>>> +++ b/arch/x86/include/asm/io.h
>>> @@ -327,14 +327,32 @@ static inline unsigned type in##bwl##_p(int port)                       \
>>>                                                                        \
>>>   static inline void outs##bwl(int port, const void *addr, unsigned long count) \
>>>   {
> 
> This will clash with a fix I did to add a "memory" clobber
> for the traditional implementation, see
> https://patchwork.kernel.org/patch/9854573/
> 
>> Is it even worth leaving these as inline functions?
>> Given the speed of IO cycles it is unlikely that the cost of calling a real
>> function will be significant.
>> The code bloat reduction will be significant.
> 
> I think the smallest code would be the original "rep insb" etc, which
> should be smaller than a function call, unlike the loop. Then again,
> there is a rather small number of affected device drivers, almost all
> of them for ancient hardware that you won't even build in a 64-bit
> x86 kernel, see the list below. The only user I found that is actually
> still relevant is drivers/tty/hvc/hvc_xen.c, which uses it for the early
> console.


There are some indirect user of string I/O functions. The following functions
defined in lib/iomap.c calls rep version of ins and outs.

- ioread8_rep, ioread16_rep, ioread32_rep
- iowrite8_rep, iowrite16_rep, iowrite32_rep

I found that several drivers use above functions.

Here is one approach to convert it into non-inline functions. In this approach,
I have added a new file arch/x86/kernel/io.c which provides non rep version of
string I/O routines. The file gets built and used only when AMD_MEM_ENCRYPT is
enabled. On positive side, if we don't build kernel with AMD_MEM_ENCRYPT support
then we use inline routines, when AMD_MEM_ENCRYPT is built then we make a function
call. Inside the function we unroll only when SEV is active.

Do you see any issue with this approach ? thanks

diff --git a/arch/x86/include/asm/io.h b/arch/x86/include/asm/io.h
index e080a39..104927d 100644
--- a/arch/x86/include/asm/io.h
+++ b/arch/x86/include/asm/io.h
@@ -323,8 +323,9 @@ static inline unsigned type in##bwl##_p(int port)                   \
         unsigned type value = in##bwl(port);                            \
         slow_down_io();                                                 \
         return value;                                                   \
-}                                                                      \
-                                                                       \
+}
+
+#define BUILDIO_REP(bwl, bw, type)                                     \
  static inline void outs##bwl(int port, const void *addr, unsigned long count) \
  {                                                                      \
         asm volatile("rep; outs" #bwl                                   \
@@ -335,12 +336,31 @@ static inline void ins##bwl(int port, void *addr, unsigned long count)    \
  {                                                                      \
         asm volatile("rep; ins" #bwl                                    \
                      : "+D"(addr), "+c"(count) : "d"(port));            \
-}
+}                                                                      \
  
  BUILDIO(b, b, char)
  BUILDIO(w, w, short)
  BUILDIO(l, , int)
  
+#ifdef CONFIG_AMD_MEM_ENCRYPT
+extern void outsb_try_rep(int port, const void *addr, unsigned long count);
+extern void insb_try_rep(int port, void *addr, unsigned long count);
+extern void outsw_try_rep(int port, const void *addr, unsigned long count);
+extern void insw_try_rep(int port, void *addr, unsigned long count);
+extern void outsl_try_rep(int port, const void *addr, unsigned long count);
+extern void insl_try_rep(int port, void *addr, unsigned long count);
+#define outsb  outsb_try_rep
+#define insb   insb_try_rep
+#define outsw  outsw_try_rep
+#define insw   insw_try_rep
+#define outsl  outsl_try_rep
+#define insl   insl_try_rep
+#else
+BUILDIO_REP(b, b, char)
+BUILDIO_REP(w, w, short)
+BUILDIO_REP(l, , int)
+#endif
+
  extern void *xlate_dev_mem_ptr(phys_addr_t phys);
  extern void unxlate_dev_mem_ptr(phys_addr_t phys, void *addr);

diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index a01892b..3b6e2a3 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -42,6 +42,7 @@ CFLAGS_irq.o := -I$(src)/../include/asm/trace
  
  obj-y                  := process_$(BITS).o signal.o
  obj-$(CONFIG_COMPAT)   += signal_compat.o
+obj-$(CONFIG_AMD_MEM_ENCRYPT) += io.o
  obj-y                  += traps.o irq.o irq_$(BITS).o dumpstack_$(BITS).o
  obj-y                  += time.o ioport.o dumpstack.o nmi.o
  obj-$(CONFIG_MODIFY_LDT_SYSCALL)       += ldt.o
diff --git a/arch/x86/kernel/io.c b/arch/x86/kernel/io.c
new file mode 100644
index 0000000..f58afa9
--- /dev/null
+++ b/arch/x86/kernel/io.c
@@ -0,0 +1,87 @@
+#include <linux/types.h>
+#include <linux/io.h>
+#include <asm/io.h>
+
+void outsb_try_rep(int port, const void *addr, unsigned long count)
+{
+       if (sev_active()) {
+               unsigned char *value = (unsigned char *)addr;
+               while (count) {
+                       outb(*value, port);
+                       value++;
+                       count--;
+               }
+       } else {
+               asm volatile("rep; outsb" : "+S"(addr), "+c"(count) : "d"(port));
+       }
+}
+
+void insb_try_rep(int port, void *addr, unsigned long count)
+{
+       if (sev_active()) {
+               unsigned char *value = (unsigned char *)addr;
+               while (count) {
+                       *value = inb(port);
+                       value++;
+                       count--;
+               }
+       } else {
+               asm volatile("rep; insb" : "+D"(addr), "+c"(count) : "d"(port));
+       }
+}
+
+void outsw_try_rep(int port, const void *addr, unsigned long count)
+{
+       if (sev_active()) {
+               unsigned short *value = (unsigned short *)addr;
+               while (count) {
+                       outw(*value, port);
+                       value++;
+                       count--;
+               }
+       } else {
+               asm volatile("rep; outsw" : "+S"(addr), "+c"(count) : "d"(port));
+       }
+}
+void insw_try_rep(int port, void *addr, unsigned long count)
+{
+       if (sev_active()) {
+               unsigned short *value = (unsigned short *)addr;
+               while (count) {
+                       *value = inw(port);
+                       value++;
+                       count--;
+               }
+       } else {
+               asm volatile("rep; insw" : "+D"(addr), "+c"(count) : "d"(port));
+       }
+}
+
+void outsl_try_rep(int port, const void *addr, unsigned long count)
+{
+       if (sev_active()) {
+               unsigned int *value = (unsigned int *)addr;
+               while (count) {
+                       outl(*value, port);
+                       value++;
+                       count--;
+               }
+       } else {
+               asm volatile("rep; outsl" : "+S"(addr), "+c"(count) : "d"(port));
+       }
+}
+
+void insl_try_rep(int port, void *addr, unsigned long count)
+{
+       if (sev_active()) {
+               unsigned int *value = (unsigned int *)addr;
+               while (count) {
+                       *value = inl(port);
+                       value++;
+                       count--;
+               }
+       } else {
+               asm volatile("rep; insl" : "+D"(addr), "+c"(count) : "d"(port));
+       }
+}



More information about the Linuxppc-dev mailing list