[Lguest] [PATCH 13/25] [PATCH] turn msr.h functions into native versions

Glauber de Oliveira Costa gcosta at redhat.com
Wed Aug 8 14:19:00 EST 2007


This patch turns makes the basic operations in msr.h out of native
ones. Those operations are: rdmsr, wrmsr, rdtsc, rdtscp, rdpmc, and
cpuid. After they are turned into functions, some call sites need
casts, and so we provide them.

There is also a fixup needed in the functions located in the vsyscall
area, as they cannot call any of them anymore (otherwise, the call
would go through a kernel address, invalid in userspace mapping).

The solution is to call the now-provided native_ versions instead.

Signed-off-by: Glauber de Oliveira Costa <gcosta at redhat.com>
Signed-off-by: Steven Rostedt <rostedt at goodmis.org>
---
 arch/x86_64/ia32/syscall32.c  |    2 +-
 arch/x86_64/kernel/setup64.c  |    6 +-
 arch/x86_64/kernel/tsc.c      |   42 ++++++-
 arch/x86_64/kernel/vsyscall.c |    4 +-
 arch/x86_64/vdso/vgetcpu.c    |    4 +-
 include/asm-x86_64/msr.h      |  284 +++++++++++++++++++++++++---------------
 6 files changed, 226 insertions(+), 116 deletions(-)

diff --git a/arch/x86_64/ia32/syscall32.c b/arch/x86_64/ia32/syscall32.c
index 15013ba..dd1b4a3 100644
--- a/arch/x86_64/ia32/syscall32.c
+++ b/arch/x86_64/ia32/syscall32.c
@@ -79,5 +79,5 @@ void syscall32_cpu_init(void)
 	checking_wrmsrl(MSR_IA32_SYSENTER_ESP, 0ULL);
 	checking_wrmsrl(MSR_IA32_SYSENTER_EIP, (u64)ia32_sysenter_target);
 
-	wrmsrl(MSR_CSTAR, ia32_cstar_target);
+	wrmsrl(MSR_CSTAR, (u64)ia32_cstar_target);
 }
diff --git a/arch/x86_64/kernel/setup64.c b/arch/x86_64/kernel/setup64.c
index 1200aaa..395cf02 100644
--- a/arch/x86_64/kernel/setup64.c
+++ b/arch/x86_64/kernel/setup64.c
@@ -122,7 +122,7 @@ void pda_init(int cpu)
 	asm volatile("movl %0,%%fs ; movl %0,%%gs" :: "r" (0)); 
 	/* Memory clobbers used to order PDA accessed */
 	mb();
-	wrmsrl(MSR_GS_BASE, pda);
+	wrmsrl(MSR_GS_BASE, (u64)pda);
 	mb();
 
 	pda->cpunumber = cpu; 
@@ -161,8 +161,8 @@ void syscall_init(void)
 	 * but only a 32bit target. LSTAR sets the 64bit rip. 	 
 	 */ 
 	wrmsrl(MSR_STAR,  ((u64)__USER32_CS)<<48  | ((u64)__KERNEL_CS)<<32); 
-	wrmsrl(MSR_LSTAR, system_call); 
-	wrmsrl(MSR_CSTAR, ignore_sysret);
+	wrmsrl(MSR_LSTAR, (u64)system_call);
+	wrmsrl(MSR_CSTAR, (u64)ignore_sysret);
 
 #ifdef CONFIG_IA32_EMULATION   		
 	syscall32_cpu_init ();
diff --git a/arch/x86_64/kernel/tsc.c b/arch/x86_64/kernel/tsc.c
index 2a59bde..0db0041 100644
--- a/arch/x86_64/kernel/tsc.c
+++ b/arch/x86_64/kernel/tsc.c
@@ -9,6 +9,46 @@
 
 #include <asm/timex.h>
 
+#ifdef CONFIG_PARAVIRT
+/*
+ * When paravirt is on, some functionalities are executed through function
+ * pointers in the paravirt_ops structure, for both the host and guest.
+ * These function pointers exist inside the kernel and can not
+ * be accessed by user space. To avoid this, we make a copy of the
+ * get_cycles_sync (called in kernel) but force the use of native_read_tsc.
+ * For the host, it will simply do the native rdtsc. The guest
+ * should set up it's own clock and vread
+ */
+static __always_inline long long vget_cycles_sync(void)
+{
+	unsigned long long ret;
+	unsigned eax, edx;
+
+	/*
+	 * Use RDTSCP if possible; it is guaranteed to be synchronous
+	 * and doesn't cause a VMEXIT on Hypervisors
+	 */
+	alternative_io(ASM_NOP3, ".byte 0x0f,0x01,0xf9", X86_FEATURE_RDTSCP,
+		       ASM_OUTPUT2("=a" (eax), "=d" (edx)),
+		       "a" (0U), "d" (0U) : "ecx", "memory");
+	ret = (((unsigned long long)edx) << 32) | ((unsigned long long)eax);
+	if (ret)
+		return ret;
+
+	/*
+	 * Don't do an additional sync on CPUs where we know
+	 * RDTSC is already synchronous:
+	 */
+	alternative_io("cpuid", ASM_NOP2, X86_FEATURE_SYNC_RDTSC,
+			  "=a" (eax), "0" (1) : "ebx","ecx","edx","memory");
+	ret = native_read_tsc();
+
+	return ret;
+}
+#else
+# define vget_cycles_sync() get_cycles_sync()
+#endif
+
 static int notsc __initdata = 0;
 
 unsigned int cpu_khz;		/* TSC clocks / usec, not used here */
@@ -165,7 +205,7 @@ static cycle_t read_tsc(void)
 
 static cycle_t __vsyscall_fn vread_tsc(void)
 {
-	cycle_t ret = (cycle_t)get_cycles_sync();
+	cycle_t ret = (cycle_t)vget_cycles_sync();
 	return ret;
 }
 
diff --git a/arch/x86_64/kernel/vsyscall.c b/arch/x86_64/kernel/vsyscall.c
index 06c3494..22fc4c9 100644
--- a/arch/x86_64/kernel/vsyscall.c
+++ b/arch/x86_64/kernel/vsyscall.c
@@ -184,7 +184,7 @@ time_t __vsyscall(1) vtime(time_t *t)
 long __vsyscall(2)
 vgetcpu(unsigned *cpu, unsigned *node, struct getcpu_cache *tcache)
 {
-	unsigned int dummy, p;
+	unsigned int p;
 	unsigned long j = 0;
 
 	/* Fast cache - only recompute value once per jiffies and avoid
@@ -199,7 +199,7 @@ vgetcpu(unsigned *cpu, unsigned *node, struct getcpu_cache *tcache)
 		p = tcache->blob[1];
 	} else if (__vgetcpu_mode == VGETCPU_RDTSCP) {
 		/* Load per CPU data from RDTSCP */
-		rdtscp(dummy, dummy, p);
+		native_read_tscp(&p);
 	} else {
 		/* Load per CPU data from GDT */
 		asm("lsl %1,%0" : "=r" (p) : "r" (__PER_CPU_SEG));
diff --git a/arch/x86_64/vdso/vgetcpu.c b/arch/x86_64/vdso/vgetcpu.c
index 91f6e85..61d0def 100644
--- a/arch/x86_64/vdso/vgetcpu.c
+++ b/arch/x86_64/vdso/vgetcpu.c
@@ -15,7 +15,7 @@
 
 long __vdso_getcpu(unsigned *cpu, unsigned *node, struct getcpu_cache *tcache)
 {
-	unsigned int dummy, p;
+	unsigned int p;
 	unsigned long j = 0;
 
 	/* Fast cache - only recompute value once per jiffies and avoid
@@ -30,7 +30,7 @@ long __vdso_getcpu(unsigned *cpu, unsigned *node, struct getcpu_cache *tcache)
 		p = tcache->blob[1];
 	} else if (*vdso_vgetcpu_mode == VGETCPU_RDTSCP) {
 		/* Load per CPU data from RDTSCP */
-		rdtscp(dummy, dummy, p);
+		native_read_tscp(&p);
 	} else {
 		/* Load per CPU data from GDT */
 		asm("lsl %1,%0" : "=r" (p) : "r" (__PER_CPU_SEG));
diff --git a/include/asm-x86_64/msr.h b/include/asm-x86_64/msr.h
index d5c55b8..6ec254e 100644
--- a/include/asm-x86_64/msr.h
+++ b/include/asm-x86_64/msr.h
@@ -10,110 +10,193 @@
  * Note: the rd* operations modify the parameters directly (without using
  * pointer indirection), this allows gcc to optimize better
  */
+static inline unsigned long native_read_msr(unsigned int msr)
+{
+	unsigned long a, d;
+	asm volatile("rdmsr" : "=a" (a), "=d" (d) : "c" (msr));
+	return a | (d << 32);
+}
 
-#define rdmsr(msr,val1,val2) \
-       __asm__ __volatile__("rdmsr" \
-			    : "=a" (val1), "=d" (val2) \
-			    : "c" (msr))
+static inline unsigned long native_read_msr_safe(unsigned int msr, int *err)
+{
+	unsigned long a, d;
+	asm volatile (  "1: rdmsr; xorl %0, %0\n"
+			"2:\n"
+			".section .fixup,\"ax\"\n"
+			"3:       movl %4,%0\n"
+			" jmp 2b\n"
+			".previous\n"
+			".section __ex_table,\"a\"\n"
+			" .align 8\n"
+			" .quad 1b,3b\n"
+			".previous":"=&bDS" (*err), "=a"((a)), "=d"((d))
+			:"c"(msr), "i"(-EIO), "0"(0));
+	return a | (d << 32);
+}
 
+static inline unsigned long native_write_msr(unsigned int msr,
+					     unsigned long val)
+{
+	asm volatile(	"wrmsr"
+			: /* no outputs */
+			: "c" (msr), "a" ((u32)val), "d" ((u32)(val>>32)));
+	return 0;
+}
 
-#define rdmsrl(msr,val) do { unsigned long a__,b__; \
-       __asm__ __volatile__("rdmsr" \
-			    : "=a" (a__), "=d" (b__) \
-			    : "c" (msr)); \
-       val = a__ | (b__<<32); \
-} while(0)
+/* wrmsr with exception handling */
+static inline long native_write_msr_safe(unsigned int msr, unsigned long val)
+{
+	unsigned long ret;
+	asm volatile("2: wrmsr ; xorq %0,%0\n"
+		     "1:\n\t"
+		     ".section .fixup,\"ax\"\n\t"
+		     "3:  movq %4,%0 ; jmp 1b\n\t"
+		     ".previous\n\t"
+		     ".section __ex_table,\"a\"\n"
+		     "   .align 8\n\t"
+		     "   .quad 	2b,3b\n\t"
+		     ".previous"
+		     : "=r" (ret)
+		     : "c" (msr), "a" ((u32)val), "d" ((u32)(val>>32)),
+		       "i" (-EFAULT));
+	return ret;
+}
 
-#define wrmsr(msr,val1,val2) \
-     __asm__ __volatile__("wrmsr" \
-			  : /* no outputs */ \
-			  : "c" (msr), "a" (val1), "d" (val2))
+static inline unsigned long native_read_tsc(void)
+{
+	unsigned long low, high;
+	asm volatile("rdtsc" : "=a" (low), "=d" (high));
+	return low | (high << 32);
+}
 
-#define wrmsrl(msr,val) wrmsr(msr,(__u32)((__u64)(val)),((__u64)(val))>>32) 
+static inline unsigned long native_read_pmc(int counter)
+{
+	unsigned long low, high;
+	asm volatile ("rdpmc"
+		      : "=a" (low), "=d" (high)
+		      : "c" (counter));
 
-/* wrmsr with exception handling */
-#define wrmsr_safe(msr,a,b) ({ int ret__;			\
-	asm volatile("2: wrmsr ; xorl %0,%0\n"			\
-		     "1:\n\t"					\
-		     ".section .fixup,\"ax\"\n\t"		\
-		     "3:  movl %4,%0 ; jmp 1b\n\t"		\
-		     ".previous\n\t"				\
- 		     ".section __ex_table,\"a\"\n"		\
-		     "   .align 8\n\t"				\
-		     "   .quad 	2b,3b\n\t"			\
-		     ".previous"				\
-		     : "=a" (ret__)				\
-		     : "c" (msr), "0" (a), "d" (b), "i" (-EFAULT)); \
-	ret__; })
-
-#define checking_wrmsrl(msr,val) wrmsr_safe(msr,(u32)(val),(u32)((val)>>32))
-
-#define rdmsr_safe(msr,a,b) \
-	({ int ret__;						\
-	  asm volatile ("1:       rdmsr\n"			\
-                      "2:\n"					\
-                      ".section .fixup,\"ax\"\n"		\
-                      "3:       movl %4,%0\n"			\
-                      " jmp 2b\n"				\
-                      ".previous\n"				\
-                      ".section __ex_table,\"a\"\n"		\
-                      " .align 8\n"				\
-                      " .quad 1b,3b\n"				\
-                      ".previous":"=&bDS" (ret__), "=a"(*(a)), "=d"(*(b))\
-                      :"c"(msr), "i"(-EIO), "0"(0));		\
-	  ret__; })		
-
-#define rdtsc(low,high) \
-     __asm__ __volatile__("rdtsc" : "=a" (low), "=d" (high))
-
-#define rdtscl(low) \
-     __asm__ __volatile__ ("rdtsc" : "=a" (low) : : "edx")
-
-#define rdtscp(low,high,aux) \
-     asm volatile (".byte 0x0f,0x01,0xf9" : "=a" (low), "=d" (high), "=c" (aux))
-
-#define rdtscll(val) do { \
-     unsigned int __a,__d; \
-     asm volatile("rdtsc" : "=a" (__a), "=d" (__d)); \
-     (val) = ((unsigned long)__a) | (((unsigned long)__d)<<32); \
-} while(0)
-
-#define rdtscpll(val, aux) do { \
-     unsigned long __a, __d; \
-     asm volatile (".byte 0x0f,0x01,0xf9" : "=a" (__a), "=d" (__d), "=c" (aux)); \
-     (val) = (__d << 32) | __a; \
+	return low | (high << 32);
+}
+
+static inline void native_cpuid(unsigned int *eax, unsigned int *ebx,
+				unsigned int *ecx, unsigned int *edx)
+{
+	asm volatile ("cpuid"
+		      : "=a" (*eax),
+			"=b" (*ebx),
+			"=c" (*ecx),
+			"=d" (*edx)
+		      : "0" (*eax), "2" (*ecx));
+}
+
+static inline unsigned long native_read_tscp(int *aux)
+{
+	unsigned long low, high;
+	asm volatile (".byte 0x0f,0x01,0xf9"
+			: "=a" (low), "=d" (high), "=c" (*aux));
+	return low | (high >> 32);
+}
+
+#ifdef CONFIG_PARAVIRT
+#include <asm/paravirt.h>
+#else
+#define read_msr(msr)			native_read_msr(msr)
+#define read_msr_safe(msr, err)		native_read_msr_safe(msr, err)
+#define write_msr(msr, val) 		native_write_msr(msr, val)
+#define write_msr_safe(msr, val)	native_write_msr_safe(msr, val)
+#define read_tsc_reg()			native_read_tsc()
+#define read_tscp(aux)			native_read_tscp(aux)
+#define read_pmc(counter)		native_read_pmc(counter)
+#define __cpuid				native_cpuid
+#endif
+
+#define rdmsr(msr, low, high) 			\
+do {						\
+	unsigned long __val = read_msr(msr); 	\
+	low = (u32)__val;			\
+	high = (u32)(__val >> 32);		\
 } while (0)
 
-#define write_tsc(val1,val2) wrmsr(0x10, val1, val2)
+#define rdmsrl(msr, val)		(val) = read_msr(msr)
+
+#define rdmsr_safe(msr, a, d)					\
+({								\
+	int __ret;						\
+	unsigned long __val = read_msr_safe(msr, &__ret);	\
+	(*a) = (u32)(__val);					\
+	(*d) = (u32)(__val >> 32);				\
+	__ret;							\
+})
+
+#define wrmsr(msr, val1, val2)				\
+do {							\
+	unsigned long __val;				\
+	__val = (unsigned long)val2 << 32;		\
+	__val |= val1;					\
+	write_msr(msr, __val);				\
+} while (0)
+
+#define wrmsrl(msr, val) write_msr(msr, val)
+
+#define wrmsr_safe(msr, a, d) \
+	write_msr_safe(msr, a | ( ((u64)d) << 32))
+
+#define checking_wrmsrl(msr, val) wrmsr_safe(msr, (u32)(val),(u32)((val)>>32))
+
+#define write_tsc(val1, val2) wrmsr(0x10, val1, val2)
 
 #define write_rdtscp_aux(val) wrmsr(0xc0000103, val, 0)
 
-#define rdpmc(counter,low,high) \
-     __asm__ __volatile__("rdpmc" \
-			  : "=a" (low), "=d" (high) \
-			  : "c" (counter))
+#define rdtsc(low, high)			\
+do {						\
+	unsigned long __val  = read_tsc_reg();	\
+	low = (u32)__val;			\
+	high = (u32)(__val >> 32);		\
+} while (0)
+
+#define rdtscl(low) 	(low) = (u32)read_tsc_reg()
+
+#define rdtscll(val)	(val) = read_tsc_reg()
+
+#define rdtscp(low, high, aux)				\
+do {							\
+	int __aux;					\
+	unsigned long __val = read_tscp(&__aux);	\
+	(low) = (u32)__val;				\
+	(high) = (u32)(__val >> 32);			\
+	(aux) = __aux;					\
+} while (0)
+
+#define rdtscpll(val, aux)				\
+do {							\
+	unsigned long __aux; 				\
+	val = read_tscp(&__aux);			\
+	(aux) = __aux;					\
+} while (0)
+
+#define rdpmc(counter, low, high)			\
+do {							\
+	unsigned long __val = read_pmc(counter);	\
+	(low) = (u32)(__val);				\
+	(high) = (u32)(__val >> 32);			\
+} while (0)
 
 static inline void cpuid(int op, unsigned int *eax, unsigned int *ebx,
 			 unsigned int *ecx, unsigned int *edx)
 {
-	__asm__("cpuid"
-		: "=a" (*eax),
-		  "=b" (*ebx),
-		  "=c" (*ecx),
-		  "=d" (*edx)
-		: "0" (op));
+	*eax = op;
+	*ecx = 0;
+	__cpuid(eax, ebx, ecx, edx);
 }
 
 /* Some CPUID calls want 'count' to be placed in ecx */
 static inline void cpuid_count(int op, int count, int *eax, int *ebx, int *ecx,
 	       	int *edx)
 {
-	__asm__("cpuid"
-		: "=a" (*eax),
-		  "=b" (*ebx),
-		  "=c" (*ecx),
-		  "=d" (*edx)
-		: "0" (op), "c" (count));
+	*eax = op;
+	*ecx = count;
+	__cpuid(eax, ebx, ecx, edx);
 }
 
 /*
@@ -121,42 +204,29 @@ static inline void cpuid_count(int op, int count, int *eax, int *ebx, int *ecx,
  */
 static inline unsigned int cpuid_eax(unsigned int op)
 {
-	unsigned int eax;
-
-	__asm__("cpuid"
-		: "=a" (eax)
-		: "0" (op)
-		: "bx", "cx", "dx");
+	unsigned int eax, ebx, ecx, edx;
+	cpuid(op, &eax, &ebx, &ecx, &edx);
 	return eax;
 }
+
 static inline unsigned int cpuid_ebx(unsigned int op)
 {
-	unsigned int eax, ebx;
-
-	__asm__("cpuid"
-		: "=a" (eax), "=b" (ebx)
-		: "0" (op)
-		: "cx", "dx" );
+	unsigned int eax, ebx, ecx, edx;
+	cpuid(op, &eax, &ebx, &ecx, &edx);
 	return ebx;
 }
+
 static inline unsigned int cpuid_ecx(unsigned int op)
 {
-	unsigned int eax, ecx;
-
-	__asm__("cpuid"
-		: "=a" (eax), "=c" (ecx)
-		: "0" (op)
-		: "bx", "dx" );
+	unsigned int eax, ebx, ecx, edx;
+	cpuid(op, &eax, &ebx, &ecx, &edx);
 	return ecx;
 }
+
 static inline unsigned int cpuid_edx(unsigned int op)
 {
-	unsigned int eax, edx;
-
-	__asm__("cpuid"
-		: "=a" (eax), "=d" (edx)
-		: "0" (op)
-		: "bx", "cx");
+	unsigned int eax, ebx, ecx, edx;
+	cpuid(op, &eax, &ebx, &ecx, &edx);
 	return edx;
 }
 
-- 
1.4.4.2




More information about the Lguest mailing list