First cut at proper TLB shootdown for page directory entries.

author Linus Torvalds <torvalds@penguin.transmeta.com>

Wed, 15 May 2002 10:13:39 +0000 (03:13 -0700)

committer Linus Torvalds <torvalds@penguin.transmeta.com>

Wed, 15 May 2002 10:13:39 +0000 (03:13 -0700)
author Linus Torvalds <torvalds@penguin.transmeta.com>
Wed, 15 May 2002 10:13:39 +0000 (03:13 -0700)
committer Linus Torvalds <torvalds@penguin.transmeta.com>
Wed, 15 May 2002 10:13:39 +0000 (03:13 -0700)
diff --git a/include/asm-generic/tlb.h b/include/asm-generic/tlb.h

index 9f5766d595cf61d192ac1433e2338ace15cac04d..bc1c3aec9c1acb7ced9538d94b9bb931bab7e22f 100644 (file)
--- a/include/asm-generic/tlb.h
+++ b/include/asm-generic/tlb.h
@@ -16,7 +16,6 @@
  #include <linux/config.h>
  #include <asm/tlbflush.h>
  
-#ifdef CONFIG_SMP
  /* aim for something that fits in the L1 cache */
  #define FREE_PTE_NR    508
  
@@ -26,90 +25,100 @@
   * shootdown.
   */
  typedef struct free_pte_ctx {
-       struct vm_area_struct   *vma;
+       struct mm_struct        *mm;
         unsigned long           nr;     /* set to ~0UL means fast mode */
-       unsigned long   start_addr, end_addr;
+       unsigned long           freed;
+       unsigned long           start_addr, end_addr;
         pte_t   ptes[FREE_PTE_NR];
  } mmu_gather_t;
  
  /* Users of the generic TLB shootdown code must declare this storage space. */
  extern mmu_gather_t    mmu_gathers[NR_CPUS];
  
+/* Do me later */
+#define tlb_start_vma(tlb, vma) do { } while (0)
+#define tlb_end_vma(tlb, vma) do { } while (0)
+
  /* tlb_gather_mmu
   *     Return a pointer to an initialized mmu_gather_t.
   */
-static inline mmu_gather_t *tlb_gather_mmu(struct vm_area_struct *vma)
+static inline mmu_gather_t *tlb_gather_mmu(struct mm_struct *mm)
  {
         mmu_gather_t *tlb = &mmu_gathers[smp_processor_id()];
-       struct mm_struct *mm = vma->vm_mm;
  
-       tlb->vma = vma;
+       tlb->mm = mm;
+       tlb->freed = 0;
         /* Use fast mode if there is only one user of this mm (this process) */
         tlb->nr = (atomic_read(&(mm)->mm_users) == 1) ? ~0UL : 0UL;
         return tlb;
  }
  
-/* void tlb_remove_page(mmu_gather_t *tlb, pte_t *ptep, unsigned long addr)
- *     Must perform the equivalent to __free_pte(pte_get_and_clear(ptep)), while
- *     handling the additional races in SMP caused by other CPUs caching valid
- *     mappings in their TLBs.
- */
-#define tlb_remove_page(ctxp, pte, addr) do {\
-               /* Handle the common case fast, first. */\
-               if ((ctxp)->nr == ~0UL) {\
-                       __free_pte(*(pte));\
-                       pte_clear((pte));\
-                       break;\
-               }\
-               if (!(ctxp)->nr) \
-                       (ctxp)->start_addr = (addr);\
-               (ctxp)->ptes[(ctxp)->nr++] = ptep_get_and_clear(pte);\
-               (ctxp)->end_addr = (addr) + PAGE_SIZE;\
-               if ((ctxp)->nr >= FREE_PTE_NR)\
-                       tlb_finish_mmu((ctxp), 0, 0);\
-       } while (0)
-
-/* tlb_finish_mmu
- *     Called at the end of the shootdown operation to free up any resources
- *     that were required.  The page table lock is still held at this point.
- */
-static inline void tlb_finish_mmu(struct free_pte_ctx *ctx, unsigned long start, unsigned long end)
+static inline void tlb_flush_mmu(mmu_gather_t *tlb, unsigned long start, unsigned long end)
  {
         unsigned long i, nr;
  
         /* Handle the fast case first. */
-       if (ctx->nr == ~0UL) {
-               flush_tlb_range(ctx->vma, start, end);
+       if (tlb->nr == ~0UL) {
+               flush_tlb_mm(tlb->mm);
                 return;
         }
-       nr = ctx->nr;
-       ctx->nr = 0;
+       nr = tlb->nr;
+       tlb->nr = 0;
         if (nr)
-               flush_tlb_range(ctx->vma, ctx->start_addr, ctx->end_addr);
+               flush_tlb_mm(tlb->mm);
         for (i=0; i < nr; i++) {
-               pte_t pte = ctx->ptes[i];
+               pte_t pte = tlb->ptes[i];
                 __free_pte(pte);
         }
  }
  
-#else
-
-/* The uniprocessor functions are quite simple and are inline macros in an
- * attempt to get gcc to generate optimal code since this code is run on each
- * page in a process at exit.
+/* tlb_finish_mmu
+ *     Called at the end of the shootdown operation to free up any resources
+ *     that were required.  The page table lock is still held at this point.
   */
-typedef struct vm_area_struct mmu_gather_t;
+static inline void tlb_finish_mmu(mmu_gather_t *tlb, unsigned long start, unsigned long end)
+{
+       int freed = tlb->freed;
+       struct mm_struct *mm = tlb->mm;
+       int rss = mm->rss;
+
+       if (rss < freed)
+               freed = rss;
+       mm->rss = rss - freed;
  
-#define tlb_gather_mmu(vma)    (vma)
-#define tlb_finish_mmu(tlb, start, end)        flush_tlb_range(tlb, start, end)
-#define tlb_remove_page(tlb, ptep, addr)       do {\
-               pte_t __pte = *(ptep);\
-               pte_clear(ptep);\
-               __free_pte(__pte);\
-       } while (0)
+       tlb_flush_mmu(tlb, start, end);
+}
  
-#endif
  
+/* void tlb_remove_page(mmu_gather_t *tlb, pte_t *ptep, unsigned long addr)
+ *     Must perform the equivalent to __free_pte(pte_get_and_clear(ptep)), while
+ *     handling the additional races in SMP caused by other CPUs caching valid
+ *     mappings in their TLBs.
+ */
+static inline void tlb_remove_page(mmu_gather_t *tlb, pte_t *pte, unsigned long addr)
+{
+       struct page *page;
+       unsigned long pfn = pte_pfn(*pte);
+
+       if (pfn_valid(pfn)) {
+               page = pfn_to_page(pfn);
+               if (!PageReserved(page))
+                       tlb->freed++;
+       }
+
+       /* Handle the common case fast, first. */\
+       if (tlb->nr == ~0UL) {
+               __free_pte(*pte);
+               pte_clear(pte);
+               return;
+       }
+       if (!tlb->nr)
+               tlb->start_addr = addr;
+       tlb->ptes[tlb->nr++] = ptep_get_and_clear(pte);
+       tlb->end_addr = addr + PAGE_SIZE;
+       if (tlb->nr >= FREE_PTE_NR)
+               tlb_finish_mmu(tlb, 0, 0);
+}
  
  #endif /* _ASM_GENERIC__TLB_H */
  
diff --git a/mm/memory.c b/mm/memory.c

index c15a59f0512f39a66004d3f3a019482d9808f35e..20ac8224141259b405fadc953897818be370b31f 100644 (file)
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -133,18 +133,18 @@ static inline void free_one_pgd(pgd_t * dir)
  /*
   * This function clears all user-level page tables of a process - this
   * is needed by execve(), so that old pages aren't in the way.
+ *
+ * Must be called with pagetable lock held.
   */
  void clear_page_tables(struct mm_struct *mm, unsigned long first, int nr)
  {
         pgd_t * page_dir = mm->pgd;
  
-       spin_lock(&mm->page_table_lock);
         page_dir += first;
         do {
                 free_one_pgd(page_dir);
                 page_dir++;
         } while (--nr);
-       spin_unlock(&mm->page_table_lock);
  
         /* keep the page table cache within bounds */
         check_pgt_cache();
@@ -340,18 +340,17 @@ static inline void forget_pte(pte_t page)
         }
  }
  
-static inline int zap_pte_range(mmu_gather_t *tlb, pmd_t * pmd, unsigned long address, unsigned long size)
+static void zap_pte_range(mmu_gather_t *tlb, pmd_t * pmd, unsigned long address, unsigned long size)
  {
         unsigned long offset;
         pte_t *ptep;
-       int freed = 0;
  
         if (pmd_none(*pmd))
-               return 0;
+               return;
         if (pmd_bad(*pmd)) {
                 pmd_ERROR(*pmd);
                 pmd_clear(pmd);
-               return 0;
+               return;
         }
         ptep = pte_offset_map(pmd, address);
         offset = address & ~PMD_MASK;
@@ -363,13 +362,6 @@ static inline int zap_pte_range(mmu_gather_t *tlb, pmd_t * pmd, unsigned long ad
                 if (pte_none(pte))
                         continue;
                 if (pte_present(pte)) {
-                       struct page *page;
-                       unsigned long pfn = pte_pfn(pte);
-                       if (pfn_valid(pfn)) {
-                               page = pfn_to_page(pfn);
-                               if (!PageReserved(page))
-                                       freed++;
-                       }
                         /* This will eventually call __free_pte on the pte. */
                         tlb_remove_page(tlb, ptep, address + offset);
                 } else {
@@ -378,34 +370,45 @@ static inline int zap_pte_range(mmu_gather_t *tlb, pmd_t * pmd, unsigned long ad
                 }
         }
         pte_unmap(ptep-1);
-
-       return freed;
  }
  
-static inline int zap_pmd_range(mmu_gather_t *tlb, pgd_t * dir, unsigned long address, unsigned long size)
+static void zap_pmd_range(mmu_gather_t *tlb, pgd_t * dir, unsigned long address, unsigned long size)
  {
         pmd_t * pmd;
         unsigned long end;
-       int freed;
  
         if (pgd_none(*dir))
-               return 0;
+               return;
         if (pgd_bad(*dir)) {
                 pgd_ERROR(*dir);
                 pgd_clear(dir);
-               return 0;
+               return;
         }
         pmd = pmd_offset(dir, address);
         end = address + size;
         if (end > ((address + PGDIR_SIZE) & PGDIR_MASK))
                 end = ((address + PGDIR_SIZE) & PGDIR_MASK);
-       freed = 0;
         do {
-               freed += zap_pte_range(tlb, pmd, address, end - address);
+               zap_pte_range(tlb, pmd, address, end - address);
                 address = (address + PMD_SIZE) & PMD_MASK; 
                 pmd++;
         } while (address < end);
-       return freed;
+}
+
+void unmap_page_range(mmu_gather_t *tlb, struct vm_area_struct *vma, unsigned long address, unsigned long end)
+{
+       pgd_t * dir;
+
+       if (address >= end)
+               BUG();
+       dir = pgd_offset(vma->vm_mm, address);
+       tlb_start_vma(tlb, vma);
+       do {
+               zap_pmd_range(tlb, dir, address, end - address);
+               address = (address + PGDIR_SIZE) & PGDIR_MASK;
+               dir++;
+       } while (address && (address < end));
+       tlb_end_vma(tlb, vma);
  }
  
  /*
@@ -417,7 +420,6 @@ void zap_page_range(struct vm_area_struct *vma, unsigned long address, unsigned
         mmu_gather_t *tlb;
         pgd_t * dir;
         unsigned long start = address, end = address + size;
-       int freed = 0;
  
         dir = pgd_offset(mm, address);
  
@@ -432,25 +434,10 @@ void zap_page_range(struct vm_area_struct *vma, unsigned long address, unsigned
                 BUG();
         spin_lock(&mm->page_table_lock);
         flush_cache_range(vma, address, end);
-       tlb = tlb_gather_mmu(vma);
  
-       do {
-               freed += zap_pmd_range(tlb, dir, address, end - address);
-               address = (address + PGDIR_SIZE) & PGDIR_MASK;
-               dir++;
-       } while (address && (address < end));
-
-       /* this will flush any remaining tlb entries */
+       tlb = tlb_gather_mmu(mm);
+       unmap_page_range(tlb, vma, address, end);
         tlb_finish_mmu(tlb, start, end);
-
-       /*
-        * Update rss for the mm_struct (not necessarily current->mm)
-        * Notice that rss is an unsigned long.
-        */
-       if (mm->rss > freed)
-               mm->rss -= freed;
-       else
-               mm->rss = 0;
         spin_unlock(&mm->page_table_lock);
  }
  
diff --git a/mm/mmap.c b/mm/mmap.c

index fbcab6f042df6fd8f3ebe4083962f5481074dd50..d7d119514567e90cadeea8e16d89cee454a54015 100644 (file)
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -17,7 +17,9 @@
  
  #include <asm/uaccess.h>
  #include <asm/pgalloc.h>
-#include <asm/tlbflush.h>
+#include <asm/tlb.h>
+
+extern void unmap_page_range(mmu_gather_t *,struct vm_area_struct *vma, unsigned long address, unsigned long size);
  
  /*
   * WARNING: the debugging will use recursive algorithms so never enable this
@@ -329,11 +331,11 @@ static void __vma_link(struct mm_struct * mm, struct vm_area_struct * vma,  stru
  static inline void vma_link(struct mm_struct * mm, struct vm_area_struct * vma, struct vm_area_struct * prev,
                             rb_node_t ** rb_link, rb_node_t * rb_parent)
  {
-       lock_vma_mappings(vma);
         spin_lock(&mm->page_table_lock);
+       lock_vma_mappings(vma);
         __vma_link(mm, vma, prev, rb_link, rb_parent);
-       spin_unlock(&mm->page_table_lock);
         unlock_vma_mappings(vma);
+       spin_unlock(&mm->page_table_lock);
  
         mm->map_count++;
         validate_mm(mm);
@@ -781,13 +783,11 @@ static struct vm_area_struct * unmap_fixup(struct mm_struct *mm,
                  */
                 area->vm_end = addr;
                 lock_vma_mappings(area);
-               spin_lock(&mm->page_table_lock);
         } else if (addr == area->vm_start) {
                 area->vm_pgoff += (end - area->vm_start) >> PAGE_SHIFT;
                 /* same locking considerations of the above case */
                 area->vm_start = end;
                 lock_vma_mappings(area);
-               spin_lock(&mm->page_table_lock);
         } else {
         /* Unmapping a hole: area->vm_start < addr <= end < area->vm_end */
                 /* Add end mapping -- leave beginning for below */
@@ -814,12 +814,10 @@ static struct vm_area_struct * unmap_fixup(struct mm_struct *mm,
                  * things correctly.
                  */
                 lock_vma_mappings(area);
-               spin_lock(&mm->page_table_lock);
                 __insert_vm_struct(mm, mpnt);
         }
  
         __insert_vm_struct(mm, area);
-       spin_unlock(&mm->page_table_lock);
         unlock_vma_mappings(area);
         return extra;
  }
@@ -889,6 +887,7 @@ no_mmaps:
   */
  int do_munmap(struct mm_struct *mm, unsigned long addr, size_t len)
  {
+       mmu_gather_t *tlb;
         struct vm_area_struct *mpnt, *prev, **npp, *free, *extra;
  
         if ((addr & ~PAGE_MASK) || addr > TASK_SIZE || len > TASK_SIZE-addr)
@@ -933,7 +932,8 @@ int do_munmap(struct mm_struct *mm, unsigned long addr, size_t len)
                 rb_erase(&mpnt->vm_rb, &mm->mm_rb);
         }
         mm->mmap_cache = NULL;  /* Kill the cache. */
-       spin_unlock(&mm->page_table_lock);
+
+       tlb = tlb_gather_mmu(mm);
  
         /* Ok - we have the memory areas we should free on the 'free' list,
          * so release them, and unmap the page range..
@@ -942,7 +942,7 @@ int do_munmap(struct mm_struct *mm, unsigned long addr, size_t len)
          * In that case we have to be careful with VM_DENYWRITE.
          */
         while ((mpnt = free) != NULL) {
-               unsigned long st, end, size;
+               unsigned long st, end;
                 struct file *file = NULL;
  
                 free = free->vm_next;
@@ -950,7 +950,6 @@ int do_munmap(struct mm_struct *mm, unsigned long addr, size_t len)
                 st = addr < mpnt->vm_start ? mpnt->vm_start : addr;
                 end = addr+len;
                 end = end > mpnt->vm_end ? mpnt->vm_end : end;
-               size = end - st;
  
                 if (mpnt->vm_flags & VM_DENYWRITE &&
                     (st != mpnt->vm_start || end != mpnt->vm_end) &&
@@ -960,12 +959,12 @@ int do_munmap(struct mm_struct *mm, unsigned long addr, size_t len)
                 remove_shared_vm_struct(mpnt);
                 mm->map_count--;
  
-               zap_page_range(mpnt, st, size);
+               unmap_page_range(tlb, mpnt, st, end);
  
                 /*
                  * Fix the mapping, and free the old area if it wasn't reused.
                  */
-               extra = unmap_fixup(mm, mpnt, st, size, extra);
+               extra = unmap_fixup(mm, mpnt, st, end-st, extra);
                 if (file)
                         atomic_inc(&file->f_dentry->d_inode->i_writecount);
         }
@@ -976,6 +975,8 @@ int do_munmap(struct mm_struct *mm, unsigned long addr, size_t len)
                 kmem_cache_free(vm_area_cachep, extra);
  
         free_pgtables(mm, prev, addr, addr+len);
+       tlb_finish_mmu(tlb, addr, addr+len);
+       spin_unlock(&mm->page_table_lock);
  
         return 0;
  }
@@ -1092,6 +1093,7 @@ void build_mmap_rb(struct mm_struct * mm)
  /* Release all mmaps. */
  void exit_mmap(struct mm_struct * mm)
  {
+       mmu_gather_t *tlb;
         struct vm_area_struct * mpnt;
  
         release_segments(mm);
@@ -1100,16 +1102,16 @@ void exit_mmap(struct mm_struct * mm)
         mm->mmap = mm->mmap_cache = NULL;
         mm->mm_rb = RB_ROOT;
         mm->rss = 0;
-       spin_unlock(&mm->page_table_lock);
         mm->total_vm = 0;
         mm->locked_vm = 0;
  
+       tlb = tlb_gather_mmu(mm);
+
         flush_cache_mm(mm);
         while (mpnt) {
                 struct vm_area_struct * next = mpnt->vm_next;
                 unsigned long start = mpnt->vm_start;
                 unsigned long end = mpnt->vm_end;
-               unsigned long size = end - start;
  
                 if (mpnt->vm_ops) {
                         if (mpnt->vm_ops->close)
@@ -1117,19 +1119,20 @@ void exit_mmap(struct mm_struct * mm)
                 }
                 mm->map_count--;
                 remove_shared_vm_struct(mpnt);
-               zap_page_range(mpnt, start, size);
+               unmap_page_range(tlb, mpnt, start, end);
                 if (mpnt->vm_file)
                         fput(mpnt->vm_file);
                 kmem_cache_free(vm_area_cachep, mpnt);
                 mpnt = next;
         }
-       flush_tlb_mm(mm);
  
         /* This is just debugging */
         if (mm->map_count)
                 BUG();
  
         clear_page_tables(mm, FIRST_USER_PGD_NR, USER_PTRS_PER_PGD);
+       tlb_finish_mmu(tlb, FIRST_USER_PGD_NR*PGDIR_SIZE, USER_PTRS_PER_PGD*PGDIR_SIZE);
+       spin_unlock(&mm->page_table_lock);
  }
  
  /* Insert vm structure into process list sorted by address
author	Linus Torvalds <torvalds@penguin.transmeta.com>
	Wed, 15 May 2002 10:13:39 +0000 (03:13 -0700)
committer	Linus Torvalds <torvalds@penguin.transmeta.com>
	Wed, 15 May 2002 10:13:39 +0000 (03:13 -0700)
include/asm-generic/tlb.h		patch \| blob \| history
mm/memory.c		patch \| blob \| history
mm/mmap.c		patch \| blob \| history