From: Hugh Dickins <hugh@veritas.com>

tlb_gather_mmu dates from before kernel preemption was allowed, and uses
smp_processor_id or __get_cpu_var to find its per-cpu mmu_gather.  That works
because it's currently only called after getting page_table_lock, which is not
dropped until after the matching tlb_finish_mmu.  But don't rely on that, it
will soon change: now disable preemption internally by proper get_cpu_var in
tlb_gather_mmu, put_cpu_var in tlb_finish_mmu.

Signed-off-by: Hugh Dickins <hugh@veritas.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
---

 include/asm-arm/tlb.h     |    5 +++--
 include/asm-arm26/tlb.h   |    7 ++++---
 include/asm-generic/tlb.h |   10 +++++-----
 include/asm-ia64/tlb.h    |    6 ++++--
 include/asm-sparc64/tlb.h |    4 +++-
 5 files changed, 19 insertions(+), 13 deletions(-)

diff -puN include/asm-arm26/tlb.h~mm-tlb_gather_mmu-get_cpu_var include/asm-arm26/tlb.h
--- devel/include/asm-arm26/tlb.h~mm-tlb_gather_mmu-get_cpu_var	2005-09-25 15:31:15.000000000 -0700
+++ devel-akpm/include/asm-arm26/tlb.h	2005-09-25 15:31:15.000000000 -0700
@@ -17,13 +17,12 @@ struct mmu_gather {
         unsigned int            avoided_flushes;
 };
 
-extern struct mmu_gather mmu_gathers[NR_CPUS];
+DECLARE_PER_CPU(struct mmu_gather, mmu_gathers);
 
 static inline struct mmu_gather *
 tlb_gather_mmu(struct mm_struct *mm, unsigned int full_mm_flush)
 {
-        int cpu = smp_processor_id();
-        struct mmu_gather *tlb = &mmu_gathers[cpu];
+        struct mmu_gather *tlb = &get_cpu_var(mmu_gathers);
 
         tlb->mm = mm;
         tlb->freed = 0;
@@ -52,6 +51,8 @@ tlb_finish_mmu(struct mmu_gather *tlb, u
 
         /* keep the page table cache within bounds */
         check_pgt_cache();
+
+        put_cpu_var(mmu_gathers);
 }
 
 
diff -puN include/asm-arm/tlb.h~mm-tlb_gather_mmu-get_cpu_var include/asm-arm/tlb.h
--- devel/include/asm-arm/tlb.h~mm-tlb_gather_mmu-get_cpu_var	2005-09-25 15:31:15.000000000 -0700
+++ devel-akpm/include/asm-arm/tlb.h	2005-09-25 15:31:15.000000000 -0700
@@ -39,8 +39,7 @@ DECLARE_PER_CPU(struct mmu_gather, mmu_g
 static inline struct mmu_gather *
 tlb_gather_mmu(struct mm_struct *mm, unsigned int full_mm_flush)
 {
-	int cpu = smp_processor_id();
-	struct mmu_gather *tlb = &per_cpu(mmu_gathers, cpu);
+	struct mmu_gather *tlb = &get_cpu_var(mmu_gathers);
 
 	tlb->mm = mm;
 	tlb->freed = 0;
@@ -65,6 +64,8 @@ tlb_finish_mmu(struct mmu_gather *tlb, u
 
 	/* keep the page table cache within bounds */
 	check_pgt_cache();
+
+	put_cpu_var(mmu_gathers);
 }
 
 static inline unsigned int tlb_is_full_mm(struct mmu_gather *tlb)
diff -puN include/asm-generic/tlb.h~mm-tlb_gather_mmu-get_cpu_var include/asm-generic/tlb.h
--- devel/include/asm-generic/tlb.h~mm-tlb_gather_mmu-get_cpu_var	2005-09-25 15:31:15.000000000 -0700
+++ devel-akpm/include/asm-generic/tlb.h	2005-09-25 15:31:15.000000000 -0700
@@ -35,9 +35,7 @@
 #endif
 
 /* struct mmu_gather is an opaque type used by the mm code for passing around
- * any data needed by arch specific code for tlb_remove_page.  This structure
- * can be per-CPU or per-MM as the page table lock is held for the duration of
- * TLB shootdown.
+ * any data needed by arch specific code for tlb_remove_page.
  */
 struct mmu_gather {
 	struct mm_struct	*mm;
@@ -57,7 +55,7 @@ DECLARE_PER_CPU(struct mmu_gather, mmu_g
 static inline struct mmu_gather *
 tlb_gather_mmu(struct mm_struct *mm, unsigned int full_mm_flush)
 {
-	struct mmu_gather *tlb = &per_cpu(mmu_gathers, smp_processor_id());
+	struct mmu_gather *tlb = &get_cpu_var(mmu_gathers);
 
 	tlb->mm = mm;
 
@@ -85,7 +83,7 @@ tlb_flush_mmu(struct mmu_gather *tlb, un
 
 /* tlb_finish_mmu
  *	Called at the end of the shootdown operation to free up any resources
- *	that were required.  The page table lock is still held at this point.
+ *	that were required.
  */
 static inline void
 tlb_finish_mmu(struct mmu_gather *tlb, unsigned long start, unsigned long end)
@@ -101,6 +99,8 @@ tlb_finish_mmu(struct mmu_gather *tlb, u
 
 	/* keep the page table cache within bounds */
 	check_pgt_cache();
+
+	put_cpu_var(mmu_gathers);
 }
 
 static inline unsigned int
diff -puN include/asm-ia64/tlb.h~mm-tlb_gather_mmu-get_cpu_var include/asm-ia64/tlb.h
--- devel/include/asm-ia64/tlb.h~mm-tlb_gather_mmu-get_cpu_var	2005-09-25 15:31:15.000000000 -0700
+++ devel-akpm/include/asm-ia64/tlb.h	2005-09-25 15:31:15.000000000 -0700
@@ -129,7 +129,7 @@ ia64_tlb_flush_mmu (struct mmu_gather *t
 static inline struct mmu_gather *
 tlb_gather_mmu (struct mm_struct *mm, unsigned int full_mm_flush)
 {
-	struct mmu_gather *tlb = &__get_cpu_var(mmu_gathers);
+	struct mmu_gather *tlb = &get_cpu_var(mmu_gathers);
 
 	tlb->mm = mm;
 	/*
@@ -154,7 +154,7 @@ tlb_gather_mmu (struct mm_struct *mm, un
 
 /*
  * Called at the end of the shootdown operation to free up any resources that were
- * collected.  The page table lock is still held at this point.
+ * collected.
  */
 static inline void
 tlb_finish_mmu (struct mmu_gather *tlb, unsigned long start, unsigned long end)
@@ -174,6 +174,8 @@ tlb_finish_mmu (struct mmu_gather *tlb, 
 
 	/* keep the page table cache within bounds */
 	check_pgt_cache();
+
+	put_cpu_var(mmu_gathers);
 }
 
 static inline unsigned int
diff -puN include/asm-sparc64/tlb.h~mm-tlb_gather_mmu-get_cpu_var include/asm-sparc64/tlb.h
--- devel/include/asm-sparc64/tlb.h~mm-tlb_gather_mmu-get_cpu_var	2005-09-25 15:31:15.000000000 -0700
+++ devel-akpm/include/asm-sparc64/tlb.h	2005-09-25 15:31:15.000000000 -0700
@@ -44,7 +44,7 @@ extern void flush_tlb_pending(void);
 
 static inline struct mmu_gather *tlb_gather_mmu(struct mm_struct *mm, unsigned int full_mm_flush)
 {
-	struct mmu_gather *mp = &__get_cpu_var(mmu_gathers);
+	struct mmu_gather *mp = &get_cpu_var(mmu_gathers);
 
 	BUG_ON(mp->tlb_nr);
 
@@ -97,6 +97,8 @@ static inline void tlb_finish_mmu(struct
 
 	/* keep the page table cache within bounds */
 	check_pgt_cache();
+
+	put_cpu_var(mmu_gathers);
 }
 
 static inline unsigned int tlb_is_full_mm(struct mmu_gather *mp)
_