[PATCH] Fix race between pte_free and hash_page

Benjamin Herrenschmidt benh at kernel.crashing.org
Sat Dec 13 15:52:40 EST 2003


On Sat, 2003-12-13 at 03:17, Anton Blanchard wrote:

> Looks good. Since we hold the pagetable lock, can we also check for
> mm users == 1 and take the fast path?

Yup. Here's a new one doing that. I also removed the test with
CPU_MASK_NONE (seems useless especially with the mm_users one)
and added proper initialization of the "index" field in the
batch as pointed out by Olof.

===== include/asm/pgalloc.h 1.11 vs edited =====
--- 1.11/include/asm-ppc64/pgalloc.h	Fri Sep 19 16:55:11 2003
+++ edited/include/asm/pgalloc.h	Sat Dec 13 15:49:57 2003
@@ -3,7 +3,10 @@

 #include <linux/mm.h>
 #include <linux/slab.h>
+#include <linux/cpumask.h>
+#include <linux/percpu.h>
 #include <asm/processor.h>
+#include <asm/tlb.h>

 extern kmem_cache_t *zero_cache;

@@ -62,15 +65,55 @@

 	return NULL;
 }
-
-static inline void
-pte_free_kernel(pte_t *pte)
+
+static inline void pte_free_kernel(pte_t *pte)
 {
 	kmem_cache_free(zero_cache, pte);
 }

 #define pte_free(pte_page)	pte_free_kernel(page_address(pte_page))
-#define __pte_free_tlb(tlb, pte)	pte_free(pte)
+
+struct pte_freelist_batch
+{
+	struct rcu_head	rcu;
+	unsigned int	index;
+	struct page *	pages[0];
+};
+
+#define PTE_FREELIST_SIZE	((PAGE_SIZE - sizeof(struct pte_freelist_batch) / \
+				  sizeof(struct page *)))
+
+extern void pte_free_now(struct page *ptepage);
+extern void pte_free_submit(struct pte_freelist_batch *batch);
+
+DECLARE_PER_CPU(struct pte_freelist_batch *, pte_freelist_cur);
+
+static inline void __pte_free_tlb(struct mmu_gather *tlb, struct page *ptepage)
+{
+	/* This is safe as we are holding page_table_lock */
+        cpumask_t local_cpumask = cpumask_of_cpu(smp_processor_id());
+	struct pte_freelist_batch **batchp = &__get_cpu_var(pte_freelist_cur);
+
+	if (atomic_read(&tlb->mm->mm_users) < 2 ||
+	    cpus_equal(tlb->mm->cpu_vm_mask, local_cpumask)) {
+		pte_free(ptepage);
+		return;
+	}
+
+	if (*batchp == NULL) {
+		*batchp = (struct pte_freelist_batch *)__get_free_page(GFP_ATOMIC);
+		if (*batchp == NULL) {
+			pte_free_now(ptepage);
+			return;
+		}
+		(*batchp)->index = 0;
+	}
+	(*batchp)->pages[(*batchp)->index++] = ptepage;
+	if ((*batchp)->index == PTE_FREELIST_SIZE) {
+		pte_free_submit(*batchp);
+		*batchp = NULL;
+	}
+}

 #define check_pgt_cache()	do { } while (0)

===== include/asm/tlb.h 1.9 vs edited =====
--- 1.9/include/asm-ppc64/tlb.h	Tue Aug 19 12:46:23 2003
+++ edited/include/asm/tlb.h	Fri Dec 12 13:48:28 2003
@@ -74,6 +74,8 @@
 	batch->index = i;
 }

+extern void pte_free_finish(void);
+
 static inline void tlb_flush(struct mmu_gather *tlb)
 {
 	int cpu = smp_processor_id();
@@ -86,6 +88,8 @@

 	flush_hash_range(tlb->mm->context, batch->index, local);
 	batch->index = 0;
+
+	pte_free_finish();
 }

 #endif /* _PPC64_TLB_H */
===== arch/ppc64/mm/init.c 1.52 vs edited =====
--- 1.52/arch/ppc64/mm/init.c	Fri Oct 24 00:10:29 2003
+++ edited/arch/ppc64/mm/init.c	Fri Dec 12 17:09:58 2003
@@ -94,6 +94,52 @@
  * include/asm-ppc64/tlb.h file -- tgall
  */
 DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
+DEFINE_PER_CPU(struct pte_freelist_batch *, pte_freelist_cur);
+unsigned long pte_freelist_forced_free;
+
+static void pte_free_smp_sync(void *arg)
+{
+	/* Do nothing, just ensure we sync with all CPUs */
+}
+
+/* This is only called when we are critically out of memory
+ * (and fail to get a page in pte_free_tlb).
+ */
+void pte_free_now(struct page *ptepage)
+{
+	pte_freelist_forced_free++;
+
+	smp_call_function(pte_free_smp_sync, NULL, 0, 1);
+
+	pte_free(ptepage);
+}
+
+static void pte_free_rcu_callback(void *arg)
+{
+	struct pte_freelist_batch *batch = arg;
+	unsigned int i;
+
+	for (i = 0; i < batch->index; i++)
+		pte_free(batch->pages[i]);
+	free_page((unsigned long)batch);
+}
+
+void pte_free_submit(struct pte_freelist_batch *batch)
+{
+	INIT_RCU_HEAD(&batch->rcu);
+	call_rcu(&batch->rcu, pte_free_rcu_callback, batch);
+}
+
+void pte_free_finish(void)
+{
+	/* This is safe as we are holding page_table_lock */
+	struct pte_freelist_batch **batchp = &__get_cpu_var(pte_freelist_cur);
+
+	if (*batchp == NULL)
+		return;
+	pte_free_submit(*batchp);
+	*batchp = NULL;
+}

 void show_mem(void)
 {


** Sent via the linuxppc64-dev mail list. See http://lists.linuxppc.org/





More information about the Linuxppc64-dev mailing list