[PATCH] Fix race between pte_free and hash_page
Benjamin Herrenschmidt
benh at kernel.crashing.org
Sat Dec 13 15:52:40 EST 2003
On Sat, 2003-12-13 at 03:17, Anton Blanchard wrote:
> Looks good. Since we hold the pagetable lock, can we also check for
> mm users == 1 and take the fast path?
Yup. Here's a new one doing that. I also removed the test with
CPU_MASK_NONE (seems useless especially with the mm_users one)
and added proper initialization of the "index" field in the
batch as pointed out by Olof.
===== include/asm/pgalloc.h 1.11 vs edited =====
--- 1.11/include/asm-ppc64/pgalloc.h Fri Sep 19 16:55:11 2003
+++ edited/include/asm/pgalloc.h Sat Dec 13 15:49:57 2003
@@ -3,7 +3,10 @@
#include <linux/mm.h>
#include <linux/slab.h>
+#include <linux/cpumask.h>
+#include <linux/percpu.h>
#include <asm/processor.h>
+#include <asm/tlb.h>
extern kmem_cache_t *zero_cache;
@@ -62,15 +65,55 @@
return NULL;
}
-
-static inline void
-pte_free_kernel(pte_t *pte)
+
+static inline void pte_free_kernel(pte_t *pte)
{
kmem_cache_free(zero_cache, pte);
}
#define pte_free(pte_page) pte_free_kernel(page_address(pte_page))
-#define __pte_free_tlb(tlb, pte) pte_free(pte)
+
+struct pte_freelist_batch
+{
+ struct rcu_head rcu;
+ unsigned int index;
+ struct page * pages[0];
+};
+
+#define PTE_FREELIST_SIZE ((PAGE_SIZE - sizeof(struct pte_freelist_batch) / \
+ sizeof(struct page *)))
+
+extern void pte_free_now(struct page *ptepage);
+extern void pte_free_submit(struct pte_freelist_batch *batch);
+
+DECLARE_PER_CPU(struct pte_freelist_batch *, pte_freelist_cur);
+
+static inline void __pte_free_tlb(struct mmu_gather *tlb, struct page *ptepage)
+{
+ /* This is safe as we are holding page_table_lock */
+ cpumask_t local_cpumask = cpumask_of_cpu(smp_processor_id());
+ struct pte_freelist_batch **batchp = &__get_cpu_var(pte_freelist_cur);
+
+ if (atomic_read(&tlb->mm->mm_users) < 2 ||
+ cpus_equal(tlb->mm->cpu_vm_mask, local_cpumask)) {
+ pte_free(ptepage);
+ return;
+ }
+
+ if (*batchp == NULL) {
+ *batchp = (struct pte_freelist_batch *)__get_free_page(GFP_ATOMIC);
+ if (*batchp == NULL) {
+ pte_free_now(ptepage);
+ return;
+ }
+ (*batchp)->index = 0;
+ }
+ (*batchp)->pages[(*batchp)->index++] = ptepage;
+ if ((*batchp)->index == PTE_FREELIST_SIZE) {
+ pte_free_submit(*batchp);
+ *batchp = NULL;
+ }
+}
#define check_pgt_cache() do { } while (0)
===== include/asm/tlb.h 1.9 vs edited =====
--- 1.9/include/asm-ppc64/tlb.h Tue Aug 19 12:46:23 2003
+++ edited/include/asm/tlb.h Fri Dec 12 13:48:28 2003
@@ -74,6 +74,8 @@
batch->index = i;
}
+extern void pte_free_finish(void);
+
static inline void tlb_flush(struct mmu_gather *tlb)
{
int cpu = smp_processor_id();
@@ -86,6 +88,8 @@
flush_hash_range(tlb->mm->context, batch->index, local);
batch->index = 0;
+
+ pte_free_finish();
}
#endif /* _PPC64_TLB_H */
===== arch/ppc64/mm/init.c 1.52 vs edited =====
--- 1.52/arch/ppc64/mm/init.c Fri Oct 24 00:10:29 2003
+++ edited/arch/ppc64/mm/init.c Fri Dec 12 17:09:58 2003
@@ -94,6 +94,52 @@
* include/asm-ppc64/tlb.h file -- tgall
*/
DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
+DEFINE_PER_CPU(struct pte_freelist_batch *, pte_freelist_cur);
+unsigned long pte_freelist_forced_free;
+
+static void pte_free_smp_sync(void *arg)
+{
+ /* Do nothing, just ensure we sync with all CPUs */
+}
+
+/* This is only called when we are critically out of memory
+ * (and fail to get a page in pte_free_tlb).
+ */
+void pte_free_now(struct page *ptepage)
+{
+ pte_freelist_forced_free++;
+
+ smp_call_function(pte_free_smp_sync, NULL, 0, 1);
+
+ pte_free(ptepage);
+}
+
+static void pte_free_rcu_callback(void *arg)
+{
+ struct pte_freelist_batch *batch = arg;
+ unsigned int i;
+
+ for (i = 0; i < batch->index; i++)
+ pte_free(batch->pages[i]);
+ free_page((unsigned long)batch);
+}
+
+void pte_free_submit(struct pte_freelist_batch *batch)
+{
+ INIT_RCU_HEAD(&batch->rcu);
+ call_rcu(&batch->rcu, pte_free_rcu_callback, batch);
+}
+
+void pte_free_finish(void)
+{
+ /* This is safe as we are holding page_table_lock */
+ struct pte_freelist_batch **batchp = &__get_cpu_var(pte_freelist_cur);
+
+ if (*batchp == NULL)
+ return;
+ pte_free_submit(*batchp);
+ *batchp = NULL;
+}
void show_mem(void)
{
** Sent via the linuxppc64-dev mail list. See http://lists.linuxppc.org/
More information about the Linuxppc64-dev
mailing list