]> git.neil.brown.name Git - history.git/commitdiff
[PATCH] ppc64: SLB rewrite
authorAndrew Morton <akpm@osdl.org>
Sat, 31 Jan 2004 02:00:40 +0000 (18:00 -0800)
committerLinus Torvalds <torvalds@home.osdl.org>
Sat, 31 Jan 2004 02:00:40 +0000 (18:00 -0800)
From: Anton Blanchard <anton@samba.org>

The current SLB handling code has a number of problems:

- We loop trying to find an empty SLB entry before deciding to cast one
  out.  On large working sets this really hurts since the SLB is always full
  and we end up looping through all 64 entries unnecessarily.

- During castout we currently invalidate the entry we are replacing.  This
  is to avoid a nasty race where the entry is in the ERAT but not the SLB and
  another cpu does a tlbie that removes the ERAT at a critical point.  If
  this race is fixed the SLB can be removed.

- The SLB prefault code doesnt work properly

The following patch addresses all the above concerns and adds some more
optimisations:

- feature nop out some segment table only code

- slb invalidate the kernel segment on context switch (avoids us having to
  slb invalidate at each cast out)

- optimise flush on context switch, the lazy tlb stuff avoids it being
  called when going from userspace to kernel thread, but it gets called when
  going to kernel thread to userspace.  In many cases we are returning to the
  same userspace task, we now check for this and avoid the flush

- use the optimised POWER4 mtcrf where possible

arch/ppc64/kernel/head.S
arch/ppc64/kernel/pacaData.c
arch/ppc64/kernel/process.c
arch/ppc64/kernel/stab.c
include/asm-ppc64/cputable.h
include/asm-ppc64/mmu.h
include/asm-ppc64/mmu_context.h
include/asm-ppc64/paca.h

index c1c0c5022cea9253cbbfe7a096420a9116143bb8..6a16bfac8ad5cec49fe971ddc6beeda7cae1dbac 100644 (file)
@@ -646,12 +646,14 @@ fast_exception_return:
  */
        .globl DataAccess_common
 DataAccess_common:
+BEGIN_FTR_SECTION
        mfspr   r22,DAR
        srdi    r22,r22,60
        cmpi    0,r22,0xc
 
        /* Segment fault on a bolted segment. Go off and map that segment. */
        beq-    .do_stab_bolted
+END_FTR_SECTION_IFCLR(CPU_FTR_SLB)
 stab_bolted_user_return:
        EXCEPTION_PROLOG_COMMON
        ld      r3,_DSISR(r1)
@@ -661,10 +663,12 @@ stab_bolted_user_return:
        rlwinm  r4,r3,32-23,29,29       /* DSISR_STORE -> _PAGE_RW */
        ld      r3,_DAR(r1)             /* into the hash table */
 
+BEGIN_FTR_SECTION
        beq+    2f                      /* If so handle it */
        li      r4,0x300                /* Trap number */
        bl      .do_stab_SI
        b       1f
+END_FTR_SECTION_IFCLR(CPU_FTR_SLB)
 
 2:     li      r5,0x300
        bl      .do_hash_page_DSI       /* Try to handle as hpte fault */
@@ -690,7 +694,7 @@ DataAccessSLB_common:
        EXCEPTION_PROLOG_COMMON
        ld      r3,_DAR(r1)
        li      r4,0x380                /* Exception vector  */
-       bl      .ste_allocate
+       bl      .slb_allocate
        or.     r3,r3,r3                /* Check return code */
        beq     fast_exception_return   /* Return if we succeeded */
        addi    r3,r1,STACK_FRAME_OVERHEAD
@@ -705,12 +709,14 @@ DataAccessSLB_common:
 InstructionAccess_common:
        EXCEPTION_PROLOG_COMMON
 
+BEGIN_FTR_SECTION
        andis.  r0,r23,0x0020           /* no ste found? */
        beq+    2f
        mr      r3,r22                  /* SRR0 at interrupt */
        li      r4,0x400                /* Trap number       */
        bl      .do_stab_SI
        b       1f
+END_FTR_SECTION_IFCLR(CPU_FTR_SLB)
 
 2:     mr      r3,r22
        li      r5,0x400
@@ -730,7 +736,7 @@ InstructionAccessSLB_common:
        EXCEPTION_PROLOG_COMMON
        mr      r3,r22                  /* SRR0 = NIA        */
        li      r4,0x480                /* Exception vector  */
-       bl      .ste_allocate
+       bl      .slb_allocate
        or.     r3,r3,r3                /* Check return code */
        beq+    fast_exception_return   /* Return if we succeeded */
 
@@ -1006,48 +1012,27 @@ _GLOBAL(do_stab_bolted)
  * r20 - r23, SRR0 and SRR1 are saved in the exception frame.
  * We assume we aren't going to take any exceptions during this procedure.
  */
+/* XXX note fix masking in get_kernel_vsid to match */
 _GLOBAL(do_slb_bolted)
-       stw     r23,EX_CCR(r21) /* save CR in exc. frame */
+       stw     r23,EX_CCR(r21)         /* save CR in exc. frame */
 
-       /* (((ea >> 28) & 0x1fff) << 15) | (ea >> 60) */
-       mfspr   r21,DAR
-       rldicl  r20,r21,36,32   /* Permits a full 32b of ESID */
-       rldicr  r20,r20,15,48
-       rldicl  r21,r21,4,60
-       or      r20,r20,r21
-
-       li      r21,9           /* VSID_RANDOMIZER */
-       sldi    r21,r21,32
-       oris    r21,r21,58231
-       ori     r21,r21,39831
-
-       mulld   r20,r20,r21
-       clrldi  r20,r20,28      /* r20 = vsid */
-
-       /* Search the SLB for a free entry */
-       li      r22,1
-1:
-       slbmfee r23,r22
-       rldicl  r23,r23,37,63
-       cmpwi   r23,0
-       beq     4f              /* Found an invalid entry              */
-
-       addi    r22,r22,1
-       cmpldi  r22,64
-       blt     1b
+       /*
+        * We take the next entry, round robin. Previously we tried
+        * to find a free slot first but that took too long. Unfortunately
+        * we dont have any LRU information to help us choose a slot.
+        */
 
-       /* No free entry - just take the next entry, round-robin */
-       /* XXX we should get the number of SLB entries from the naca */
+       /* r20 = paca */
+       /* use a cpu feature mask if we ever change our slb size */
 SLB_NUM_ENTRIES = 64
-2:     mfspr   r21,SPRG3
-       ld      r22,PACASTABRR(r21)
-       addi    r23,r22,1
-       cmpdi   r23,SLB_NUM_ENTRIES
-       blt     3f
-       li      r23,1
-3:     std     r23,PACASTABRR(r21)
+1:     ld      r22,PACASTABRR(r20)
+       addi    r21,r22,1
+       cmpdi   r21,SLB_NUM_ENTRIES
+       blt+    2f
+       li      r21,1                   /* dont touch bolted slot 0 */
+2:     std     r21,PACASTABRR(r20)
 
-       /* r20 = vsid, r22 = entry */
+       /* r20 = paca, r22 = entry */
 
        /* 
         * Never cast out the segment for our kernel stack. Since we
@@ -1056,48 +1041,86 @@ SLB_NUM_ENTRIES = 64
         * which gets invalidated due to a tlbie from another cpu at a
         * non recoverable point (after setting srr0/1) - Anton
         */
-       slbmfee r23,r22
-       srdi    r23,r23,28
+       slbmfee r21,r22
+       srdi    r21,r21,27
        /*
         * This is incorrect (r1 is not the kernel stack) if we entered
         * from userspace but there is no critical window from userspace
         * so this should be OK. Also if we cast out the userspace stack
         * segment while in userspace we will fault it straight back in.
         */
-       srdi    r21,r1,28
-       cmpd    r21,r23
-       beq-    2b
-
-       /* Put together the vsid portion of the entry. */
-4:     li      r21,0
-       rldimi  r21,r20,12,0
-       ori     r20,r21,1024
-       ori     r20,r20,128    /* set class bit for kernel region */
-#ifndef CONFIG_PPC_ISERIES
-       ori     r20,r20,256    /* map kernel region with large ptes */
-#endif
+       srdi    r23,r1,27
+       ori     r23,r23,1
+       cmpd    r23,r21
+       beq-    1b
+
+       /* r20 = paca, r22 = entry */
+
+       /* (((ea >> 28) & 0x1fff) << 15) | (ea >> 60) */
+       mfspr   r21,DAR
+       rldicl  r23,r21,36,51
+       sldi    r23,r23,15
+       srdi    r21,r21,60
+       or      r23,r23,r21
+
+       /* VSID_RANDOMIZER */
+       li      r21,9
+       sldi    r21,r21,32
+       oris    r21,r21,58231
+       ori     r21,r21,39831
+
+       /* vsid = (ordinal * VSID_RANDOMIZER) & VSID_MASK */
+       mulld   r23,r23,r21
+       clrldi  r23,r23,28
+
+       /* r20 = paca, r22 = entry, r23 = vsid */
+
+       /* Put together slb word1 */
+       sldi    r23,r23,12
+
+BEGIN_FTR_SECTION
+       /* set kp and c bits */
+       ori     r23,r23,0x480
+END_FTR_SECTION_IFCLR(CPU_FTR_16M_PAGE)
+BEGIN_FTR_SECTION
+       /* set kp, l and c bits */
+       ori     r23,r23,0x580
+END_FTR_SECTION_IFSET(CPU_FTR_16M_PAGE)
+
+       /* r20 = paca, r22 = entry, r23 = slb word1 */
+
+       /* Put together slb word0 */
+       mfspr   r21,DAR
+       rldicr  r21,r21,0,35    /* get the new esid */
+       oris    r21,r21,2048    /* set valid bit */
+       rldimi  r21,r22,0,52    /* insert entry */
 
-       /* Put together the esid portion of the entry. */
-       mfspr   r21,DAR        /* Get the new esid                     */
-       rldicl  r21,r21,36,28  /* Permits a full 36b of ESID           */
-       li      r23,0
-       rldimi  r23,r21,28,0   /* Insert esid  */
-       oris    r21,r23,2048   /* valid bit    */
-       rldimi  r21,r22,0,52   /* Insert entry */
+       /* r20 = paca, r21 = slb word0, r23 = slb word1 */
 
        /* 
         * No need for an isync before or after this slbmte. The exception
         * we enter with and the rfid we exit with are context synchronizing .
         */
-       slbmte  r20,r21
+       slbmte  r23,r21
 
        /* All done -- return from exception. */
-       mfsprg  r20,3                   /* Load the PACA pointer  */
-       ld      r21,PACAEXCSP(r20)      /* Get the exception frame pointer */
-       addi    r21,r21,EXC_FRAME_SIZE
+       ld      r21,PACAEXCSP(r20)      /* Get the exception frame pointer */
+       addi    r21,r21,EXC_FRAME_SIZE
        lwz     r23,EX_CCR(r21)         /* get saved CR */
        /* note that this is almost identical to maskable_exception_exit */
-       mtcr    r23                     /* restore CR */
+
+       /*
+        * Until everyone updates binutils hardwire the POWER4 optimised
+        * single field mtcrf
+        */
+#if 0
+       .machine        push
+       .machine        "power4"
+       mtcrf   0x80,r23
+       .machine        pop
+#else
+       .long 0x7ef80120
+#endif
 
        mfmsr   r22
        li      r23, MSR_RI
@@ -1107,10 +1130,10 @@ SLB_NUM_ENTRIES = 64
        ld      r22,EX_SRR0(r21)        /* Get SRR0 from exc. frame */
        ld      r23,EX_SRR1(r21)        /* Get SRR1 from exc. frame */
        mtspr   SRR0,r22
-       mtspr   SRR1,r23
+       mtspr   SRR1,r23
        ld      r22,EX_R22(r21)         /* restore r22 and r23 */
        ld      r23,EX_R23(r21)
-       mfspr   r20,SPRG2
+       ld      r20,EX_R20(r21)
        mfspr   r21,SPRG1
        rfid
 
index 1cc6e4d910e5cd31a2094a9a308971d52152b148..422254f349e437e2cb05288f54f12cf0ad72759b 100644 (file)
@@ -41,7 +41,6 @@ struct systemcfg *systemcfg;
        .xStab_data = {                                                     \
                .real = (asrr),         /* Real pointer to segment table */ \
                .virt = (asrv),         /* Virt pointer to segment table */ \
-               .next_round_robin = 1   /* Round robin index */             \
        },                                                                  \
        .lpQueuePtr = (lpq),            /* &xItLpQueue, */                  \
        /* .xRtas = {                                                       \
index 041827788def7c1d8ece41a3859accfc8dacd992..320b0b2e7e88bd29a267e4052b8a2751236708eb 100644 (file)
@@ -151,7 +151,31 @@ struct task_struct *__switch_to(struct task_struct *prev,
 
        local_irq_save(flags);
        last = _switch(old_thread, new_thread);
+
+       /*
+        * force our kernel stack out of the ERAT and SLB, this is to
+        * avoid the race where we it hangs around in the ERAT but not the
+        * SLB and the ERAT gets invalidated at just the wrong moment by
+        * another CPU doing a tlbie.
+        *
+        * We definitely dont want to flush our bolted segment, so check
+        * for that first.
+        */
+       if ((cur_cpu_spec->cpu_features & CPU_FTR_SLB) &&
+           GET_ESID((unsigned long)_get_SP()) != GET_ESID(PAGE_OFFSET)) {
+               union {
+                       unsigned long word0;
+                       slb_dword0 data;
+               } esid_data;
+
+               esid_data.word0 = 0;
+               /* class bit is in valid field for slbie instruction */
+               esid_data.data.v = 1;
+               esid_data.data.esid = GET_ESID((unsigned long)_get_SP());
+               asm volatile("isync; slbie %0; isync" : : "r" (esid_data));
+       }
        local_irq_restore(flags);
+
        return last;
 }
 
index 656a347c22b2b1ff64b5a23319e29a4bf5a4a0fe..65b340d3e08ee28304456d9323f3b3ea699386dc 100644 (file)
@@ -12,8 +12,6 @@
  *      2 of the License, or (at your option) any later version.
  */
 
-/* XXX Note: Changes for bolted region have not been merged - Anton */
-
 #include <linux/config.h>
 #include <asm/pgtable.h>
 #include <asm/mmu.h>
@@ -59,6 +57,15 @@ void stab_initialize(unsigned long stab)
        }
 }
 
+/* Both the segment table and SLB code uses the following cache */
+#define NR_STAB_CACHE_ENTRIES 8
+DEFINE_PER_CPU(long, stab_cache_ptr);
+DEFINE_PER_CPU(long, stab_cache[NR_STAB_CACHE_ENTRIES]);
+
+/*
+ * Segment table stuff
+ */
+
 /*
  * Create a segment table entry for the given esid/vsid pair.
  */
@@ -91,14 +98,8 @@ int make_ste(unsigned long stab, unsigned long esid, unsigned long vsid)
 
        /*
         * Could not find empty entry, pick one with a round robin selection.
-        * Search all entries in the two groups.  Note that the first time
-        * we get here, we start with entry 1 so the initializer
-        * can be common with the SLB castout code.
+        * Search all entries in the two groups.
         */
-
-       /* This assumes we never castout when initializing the stab. */
-       PMC_SW_PROCESSOR(stab_capacity_castouts); 
-
        castout_entry = get_paca()->xStab_data.next_round_robin;
        for (i = 0; i < 16; i++) {
                if (castout_entry < 8) {
@@ -123,23 +124,169 @@ int make_ste(unsigned long stab, unsigned long esid, unsigned long vsid)
        /* Modify the old entry to the new value. */
 
        /* Force previous translations to complete. DRENG */
-       asm volatile("isync" : : : "memory" );
+       asm volatile("isync" : : : "memory");
 
        castout_ste->dw0.dw0.v = 0;
-       asm volatile("sync" : : : "memory" );    /* Order update */
+       asm volatile("sync" : : : "memory");    /* Order update */
        castout_ste->dw1.dw1.vsid = vsid;
        old_esid = castout_ste->dw0.dw0.esid;
        castout_ste->dw0.dw0.esid = esid;
        castout_ste->dw0.dw0.kp = 1;
-       asm volatile("eieio" : : : "memory" );   /* Order update */
+       asm volatile("eieio" : : : "memory");   /* Order update */
        castout_ste->dw0.dw0.v  = 1;
        asm volatile("slbie  %0" : : "r" (old_esid << SID_SHIFT)); 
        /* Ensure completion of slbie */
-       asm volatile("sync" : : : "memory" );
+       asm volatile("sync" : : : "memory");
 
        return (global_entry | (castout_entry & 0x7));
 }
 
+static inline void __ste_allocate(unsigned long esid, unsigned long vsid,
+                                 mm_context_t context)
+{
+       unsigned char stab_entry;
+       unsigned long *offset;
+       int region_id = REGION_ID(esid << SID_SHIFT);
+
+       stab_entry = make_ste(get_paca()->xStab_data.virt, esid, vsid);
+
+       if (region_id != USER_REGION_ID)
+               return;
+
+       offset = &__get_cpu_var(stab_cache_ptr);
+       if (*offset < NR_STAB_CACHE_ENTRIES) {
+               __get_cpu_var(stab_cache[*offset]) = stab_entry;
+       }
+       (*offset)++;
+}
+
+/*
+ * Allocate a segment table entry for the given ea.
+ */
+int ste_allocate(unsigned long ea)
+{
+       unsigned long vsid, esid;
+       mm_context_t context;
+
+       /* Check for invalid effective addresses. */
+       if (!IS_VALID_EA(ea))
+               return 1;
+
+       /* Kernel or user address? */
+       if (REGION_ID(ea) >= KERNEL_REGION_ID) {
+               vsid = get_kernel_vsid(ea);
+               context = REGION_ID(ea);
+       } else {
+               if (!current->mm)
+                       return 1;
+
+               context = current->mm->context;
+               vsid = get_vsid(context, ea);
+       }
+
+       esid = GET_ESID(ea);
+       __ste_allocate(esid, vsid, context);
+       /* Order update */
+       asm volatile("sync":::"memory");
+
+       return 0;
+}
+
+/*
+ * preload some userspace segments into the segment table.
+ */
+static void preload_stab(struct task_struct *tsk, struct mm_struct *mm)
+{
+       unsigned long pc = KSTK_EIP(tsk);
+       unsigned long stack = KSTK_ESP(tsk);
+       unsigned long unmapped_base;
+       unsigned long pc_esid = GET_ESID(pc);
+       unsigned long stack_esid = GET_ESID(stack);
+       unsigned long unmapped_base_esid;
+       unsigned long vsid;
+
+       if (test_tsk_thread_flag(tsk, TIF_32BIT))
+               unmapped_base = TASK_UNMAPPED_BASE_USER32;
+       else
+               unmapped_base = TASK_UNMAPPED_BASE_USER64;
+
+       unmapped_base_esid = GET_ESID(unmapped_base);
+
+       if (!IS_VALID_EA(pc) || (REGION_ID(pc) >= KERNEL_REGION_ID))
+               return;
+       vsid = get_vsid(mm->context, pc);
+       __ste_allocate(pc_esid, vsid, mm->context);
+
+       if (pc_esid == stack_esid)
+               return;
+
+       if (!IS_VALID_EA(stack) || (REGION_ID(stack) >= KERNEL_REGION_ID))
+               return;
+       vsid = get_vsid(mm->context, stack);
+       __ste_allocate(stack_esid, vsid, mm->context);
+
+       if (pc_esid == unmapped_base_esid || stack_esid == unmapped_base_esid)
+               return;
+
+       if (!IS_VALID_EA(unmapped_base) ||
+           (REGION_ID(unmapped_base) >= KERNEL_REGION_ID))
+               return;
+       vsid = get_vsid(mm->context, unmapped_base);
+       __ste_allocate(unmapped_base_esid, vsid, mm->context);
+
+       /* Order update */
+       asm volatile("sync" : : : "memory");
+}
+
+/* Flush all user entries from the segment table of the current processor. */
+void flush_stab(struct task_struct *tsk, struct mm_struct *mm)
+{
+       STE *stab = (STE *) get_paca()->xStab_data.virt;
+       STE *ste;
+       unsigned long *offset = &__get_cpu_var(stab_cache_ptr);
+
+       /* Force previous translations to complete. DRENG */
+       asm volatile("isync" : : : "memory");
+
+       if (*offset <= NR_STAB_CACHE_ENTRIES) {
+               int i;
+
+               for (i = 0; i < *offset; i++) {
+                       ste = stab + __get_cpu_var(stab_cache[i]);
+                       ste->dw0.dw0.v = 0;
+               }
+
+               asm volatile("sync; slbia; sync":::"memory");
+       } else {
+               unsigned long entry;
+
+               /* Invalidate all entries. */
+               ste = stab;
+
+               /* Never flush the first entry. */
+               ste += 1;
+               for (entry = 1;
+                    entry < (PAGE_SIZE / sizeof(STE));
+                    entry++, ste++) {
+                       unsigned long ea;
+                       ea = ste->dw0.dw0.esid << SID_SHIFT;
+                       if (ea < KERNELBASE) {
+                               ste->dw0.dw0.v = 0;
+                       }
+               }
+
+               asm volatile("sync; slbia; sync":::"memory");
+       }
+
+       *offset = 0;
+
+       preload_stab(tsk, mm);
+}
+
+/*
+ * SLB stuff
+ */
+
 /*
  * Create a segment buffer entry for the given esid/vsid pair.
  *
@@ -160,22 +307,11 @@ void make_slbe(unsigned long esid, unsigned long vsid, int large,
        } vsid_data;
 
        /*
-        * Find an empty entry, if one exists. Must start at 0 because
-        * we use this code to load SLB entry 0 at boot.
-        */
-       for (entry = 0; entry < naca->slb_size; entry++) {
-               asm volatile("slbmfee  %0,%1" 
-                            : "=r" (esid_data) : "r" (entry)); 
-               if (!esid_data.data.v)
-                       goto write_entry;
-       }
-
-       /*
-        * Could not find empty entry, pick one with a round robin selection.
+        * We take the next entry, round robin. Previously we tried
+        * to find a free slot first but that took too long. Unfortunately
+        * we dont have any LRU information to help us choose a slot.
         */
 
-       PMC_SW_PROCESSOR(stab_capacity_castouts); 
-
        /* 
         * Never cast out the segment for our kernel stack. Since we
         * dont invalidate the ERAT we could have a valid translation
@@ -190,13 +326,13 @@ void make_slbe(unsigned long esid, unsigned long vsid, int large,
                if (castout_entry >= naca->slb_size)
                        castout_entry = 1; 
                asm volatile("slbmfee  %0,%1" : "=r" (esid_data) : "r" (entry));
-       } while (esid_data.data.esid == GET_ESID((unsigned long)_get_SP()));
+       } while (esid_data.data.v &&
+                esid_data.data.esid == GET_ESID((unsigned long)_get_SP()));
 
        get_paca()->xStab_data.next_round_robin = castout_entry;
 
        /* slbie not needed as the previous mapping is still valid. */
 
-write_entry:   
        /* 
         * Write the new SLB entry.
         */
@@ -220,211 +356,129 @@ write_entry:
        asm volatile("slbmte  %0,%1" : : "r" (vsid_data), "r" (esid_data)); 
 }
 
-static inline void __ste_allocate(unsigned long esid, unsigned long vsid,
-                                 int kernel_segment, mm_context_t context)
+static inline void __slb_allocate(unsigned long esid, unsigned long vsid,
+                                 mm_context_t context)
 {
-       if (cur_cpu_spec->cpu_features & CPU_FTR_SLB) {
-               int large = 0;
+       int large = 0;
+       int region_id = REGION_ID(esid << SID_SHIFT);
+       unsigned long *offset;
 
-#ifndef CONFIG_PPC_ISERIES
-               if (REGION_ID(esid << SID_SHIFT) == KERNEL_REGION_ID)
+       if (cur_cpu_spec->cpu_features & CPU_FTR_16M_PAGE) {
+               if (region_id == KERNEL_REGION_ID)
                        large = 1;
-               else if (REGION_ID(esid << SID_SHIFT) == USER_REGION_ID)
+               else if (region_id == USER_REGION_ID)
                        large = in_hugepage_area(context, esid << SID_SHIFT);
-#endif
-               make_slbe(esid, vsid, large, kernel_segment);
-       } else {
-               unsigned char top_entry, stab_entry, *segments; 
-
-               stab_entry = make_ste(get_paca()->xStab_data.virt, esid, vsid);
-               PMC_SW_PROCESSOR_A(stab_entry_use, stab_entry & 0xf); 
-
-               segments = get_paca()->xSegments;               
-               top_entry = get_paca()->stab_cache_pointer;
-               if (!kernel_segment && top_entry < STAB_CACHE_SIZE) {
-                       segments[top_entry] = stab_entry;
-                       if (top_entry == STAB_CACHE_SIZE)
-                               top_entry = 0xff;
-                       top_entry++;
-                       get_paca()->stab_cache_pointer = top_entry;
-               }
        }
+
+       make_slbe(esid, vsid, large, region_id != USER_REGION_ID);
+
+       if (region_id != USER_REGION_ID)
+               return;
+
+       offset = &__get_cpu_var(stab_cache_ptr);
+       if (*offset < NR_STAB_CACHE_ENTRIES) {
+               __get_cpu_var(stab_cache[*offset]) = esid;
+       }
+       (*offset)++;
 }
 
 /*
  * Allocate a segment table entry for the given ea.
  */
-int ste_allocate(unsigned long ea)
+int slb_allocate(unsigned long ea)
 {
        unsigned long vsid, esid;
-       int kernel_segment = 0;
        mm_context_t context;
 
-       PMC_SW_PROCESSOR(stab_faults); 
-
        /* Check for invalid effective addresses. */
-       if (!IS_VALID_EA(ea))
+       if (unlikely(!IS_VALID_EA(ea)))
                return 1;
 
        /* Kernel or user address? */
        if (REGION_ID(ea) >= KERNEL_REGION_ID) {
-               kernel_segment = 1;
-               vsid = get_kernel_vsid(ea);
                context = REGION_ID(ea);
+               vsid = get_kernel_vsid(ea);
        } else {
-               if (! current->mm)
+               if (unlikely(!current->mm))
                        return 1;
 
                context = current->mm->context;
-               
                vsid = get_vsid(context, ea);
        }
 
        esid = GET_ESID(ea);
-       __ste_allocate(esid, vsid, kernel_segment, context);
-       if (!(cur_cpu_spec->cpu_features & CPU_FTR_SLB)) {
-               /* Order update */
-               asm volatile("sync":::"memory"); 
-       }
+       __slb_allocate(esid, vsid, context);
 
        return 0;
 }
 
-unsigned long ppc64_preload_all_segments;
-unsigned long ppc64_stab_preload = 1;
-#define STAB_PRESSURE 0
-#define USE_SLBIE_ON_STAB 0
-
 /*
- * preload all 16 segments for a 32 bit process and the PC and SP segments
- * for a 64 bit process.
+ * preload some userspace segments into the SLB.
  */
-static void preload_stab(struct task_struct *tsk, struct mm_struct *mm)
+static void preload_slb(struct task_struct *tsk, struct mm_struct *mm)
 {
-       if (ppc64_preload_all_segments &&
-           test_tsk_thread_flag(tsk, TIF_32BIT)) {
-               unsigned long esid, vsid;
-
-               for (esid = 0; esid < 16; esid++) {
-                       unsigned long ea = esid << SID_SHIFT;
-                       vsid = get_vsid(mm->context, ea);
-                       __ste_allocate(esid, vsid, 0, mm->context);
-               }
-       } else {
-               unsigned long pc = KSTK_EIP(tsk);
-               unsigned long stack = KSTK_ESP(tsk);
-               unsigned long pc_segment = pc & ~SID_MASK;
-               unsigned long stack_segment = stack & ~SID_MASK;
-               unsigned long vsid;
-
-               if (pc) {
-                       if (!IS_VALID_EA(pc) || 
-                           (REGION_ID(pc) >= KERNEL_REGION_ID))
-                               return;
-                       vsid = get_vsid(mm->context, pc);
-                       __ste_allocate(GET_ESID(pc), vsid, 0, mm->context);
-               }
-
-               if (stack && (pc_segment != stack_segment)) {
-                       if (!IS_VALID_EA(stack) || 
-                           (REGION_ID(stack) >= KERNEL_REGION_ID))
-                               return;
-                       vsid = get_vsid(mm->context, stack);
-                       __ste_allocate(GET_ESID(stack), vsid, 0, mm->context);
-               }
-       }
-
-       if (!(cur_cpu_spec->cpu_features & CPU_FTR_SLB)) {
-               /* Order update */
-               asm volatile("sync" : : : "memory"); 
-       }
+       unsigned long pc = KSTK_EIP(tsk);
+       unsigned long stack = KSTK_ESP(tsk);
+       unsigned long unmapped_base;
+       unsigned long pc_esid = GET_ESID(pc);
+       unsigned long stack_esid = GET_ESID(stack);
+       unsigned long unmapped_base_esid;
+       unsigned long vsid;
+
+       if (test_tsk_thread_flag(tsk, TIF_32BIT))
+               unmapped_base = TASK_UNMAPPED_BASE_USER32;
+       else
+               unmapped_base = TASK_UNMAPPED_BASE_USER64;
+
+       unmapped_base_esid = GET_ESID(unmapped_base);
+
+       if (!IS_VALID_EA(pc) || (REGION_ID(pc) >= KERNEL_REGION_ID))
+               return;
+       vsid = get_vsid(mm->context, pc);
+       __slb_allocate(pc_esid, vsid, mm->context);
+
+       if (pc_esid == stack_esid)
+               return;
+
+       if (!IS_VALID_EA(stack) || (REGION_ID(stack) >= KERNEL_REGION_ID))
+               return;
+       vsid = get_vsid(mm->context, stack);
+       __slb_allocate(stack_esid, vsid, mm->context);
+
+       if (pc_esid == unmapped_base_esid || stack_esid == unmapped_base_esid)
+               return;
+
+       if (!IS_VALID_EA(unmapped_base) ||
+           (REGION_ID(unmapped_base) >= KERNEL_REGION_ID))
+               return;
+       vsid = get_vsid(mm->context, unmapped_base);
+       __slb_allocate(unmapped_base_esid, vsid, mm->context);
 }
 
 /* Flush all user entries from the segment table of the current processor. */
-void flush_stab(struct task_struct *tsk, struct mm_struct *mm)
+void flush_slb(struct task_struct *tsk, struct mm_struct *mm)
 {
-       if (cur_cpu_spec->cpu_features & CPU_FTR_SLB) {
-               /*
-                * XXX disable 32bit slb invalidate optimisation until we fix
-                * the issue where a 32bit app execed out of a 64bit app can
-                * cause segments above 4GB not to be flushed - Anton
-                */
-               if (0 && !STAB_PRESSURE && test_thread_flag(TIF_32BIT)) {
-                       union {
-                               unsigned long word0;
-                               slb_dword0 data;
-                       } esid_data;
-                       unsigned long esid;
-
-                       asm volatile("isync" : : : "memory");
-                       for (esid = 0; esid < 16; esid++) {
-                               esid_data.word0 = 0;
-                               esid_data.data.esid = esid;
-                               asm volatile("slbie %0" : : "r" (esid_data));
-                       }
-                       asm volatile("isync" : : : "memory");
-               } else {
-                       asm volatile("isync; slbia; isync":::"memory");
-               }
+       unsigned long *offset = &__get_cpu_var(stab_cache_ptr);
 
-               PMC_SW_PROCESSOR(stab_invalidations);
-       } else {
-               STE *stab = (STE *) get_paca()->xStab_data.virt;
-               STE *ste;
-               unsigned long flags;
+       if (*offset <= NR_STAB_CACHE_ENTRIES) {
+               int i;
+               union {
+                       unsigned long word0;
+                       slb_dword0 data;
+               } esid_data;
 
-               /* Force previous translations to complete. DRENG */
                asm volatile("isync" : : : "memory");
-
-               local_irq_save(flags);
-               if (get_paca()->stab_cache_pointer != 0xff && !STAB_PRESSURE) {
-                       int i;
-                       unsigned char *segments = get_paca()->xSegments;
-
-                       for (i = 0; i < get_paca()->stab_cache_pointer; i++) {
-                               ste = stab + segments[i]; 
-                               ste->dw0.dw0.v = 0;
-                               PMC_SW_PROCESSOR(stab_invalidations); 
-                       }
-
-#if USE_SLBIE_ON_STAB
-                       asm volatile("sync":::"memory");
-                       for (i = 0; i < get_paca()->stab_cache_pointer; i++) {
-                               ste = stab + segments[i]; 
-                               asm volatile("slbie  %0" : :
-                                       "r" (ste->dw0.dw0.esid << SID_SHIFT)); 
-                       }
-                       asm volatile("sync":::"memory");
-#else
-                       asm volatile("sync; slbia; sync":::"memory");
-#endif
-
-               } else {
-                       unsigned long entry;
-
-                       /* Invalidate all entries. */
-                       ste = stab;
-
-                       /* Never flush the first entry. */ 
-                       ste += 1;
-                       for (entry = 1;
-                            entry < (PAGE_SIZE / sizeof(STE)); 
-                            entry++, ste++) {
-                               unsigned long ea;
-                               ea = ste->dw0.dw0.esid << SID_SHIFT;
-                               if (STAB_PRESSURE || ea < KERNELBASE) {
-                                       ste->dw0.dw0.v = 0;
-                                       PMC_SW_PROCESSOR(stab_invalidations); 
-                               }
-                       }
-
-                       asm volatile("sync; slbia; sync":::"memory");
+               for (i = 0; i < *offset; i++) {
+                       esid_data.word0 = 0;
+                       esid_data.data.esid = __get_cpu_var(stab_cache[i]);
+                       asm volatile("slbie %0" : : "r" (esid_data));
                }
-
-               get_paca()->stab_cache_pointer = 0;
-               local_irq_restore(flags);
+               asm volatile("isync" : : : "memory");
+       } else {
+               asm volatile("isync; slbia; isync" : : : "memory");
        }
 
-       if (ppc64_stab_preload)
-               preload_stab(tsk, mm);
+       *offset = 0;
+
+       preload_slb(tsk, mm);
 }
index e3c4ad343254ab315c5424dde9fb3fb40167b8bf..99c3abfba70495b8369297cbf3f9018fe3e3269e 100644 (file)
@@ -135,10 +135,17 @@ extern firmware_feature_t firmware_features_table[];
 #define COMMON_USER_PPC64      (PPC_FEATURE_32 | PPC_FEATURE_64 | \
                                 PPC_FEATURE_HAS_FPU | PPC_FEATURE_HAS_MMU)
 
-#define CPU_FTR_PPCAS_ARCH_V2   (CPU_FTR_SLB | CPU_FTR_16M_PAGE | \
+#define CPU_FTR_PPCAS_ARCH_V2_BASE (CPU_FTR_SLB | \
                                  CPU_FTR_TLBIEL | CPU_FTR_NOEXECUTE | \
                                  CPU_FTR_NODSISRALIGN)
 
+/* iSeries doesn't support large pages */
+#ifdef CONFIG_PPC_ISERIES
+#define CPU_FTR_PPCAS_ARCH_V2  (CPU_FTR_PPCAS_ARCH_V2_BASE)
+#else
+#define CPU_FTR_PPCAS_ARCH_V2  (CPU_FTR_PPCAS_ARCH_V2_BASE | CPU_FTR_16M_PAGE)
+#endif
+
 #define COMMON_PPC64_FW        (0)
 #endif
 
index 95f3f8b4fbfde8294468b7aa9020e9c9b9fed5ea..3ecacc7de85f310c7972a589275a16f916270206 100644 (file)
@@ -27,14 +27,6 @@ typedef unsigned long mm_context_t;
 #define CONTEXT_LOW_HPAGES     0
 #endif
 
-/*
- * Define the size of the cache used for segment table entries.  The first
- * entry is used as a cache pointer, therefore the actual number of entries
- * stored is one less than defined here.  Do not change this value without
- * considering the impact it will have on the layout of the paca in paca.h.
- */
-#define STAB_CACHE_SIZE 16
-
 /*
  * Hardware Segment Lookaside Buffer Entry
  * This structure has been padded out to two 64b doublewords (actual SLBE's are
index 48893d641bdc0d41ed14825734d536f4b28914dc..cdaf8abac56467c7bff9d3403b6ddd00b64b6ae8 100644 (file)
@@ -139,6 +139,7 @@ destroy_context(struct mm_struct *mm)
 }
 
 extern void flush_stab(struct task_struct *tsk, struct mm_struct *mm);
+extern void flush_slb(struct task_struct *tsk, struct mm_struct *mm);
 
 /*
  * switch_mm is the entry point called from the architecture independent
@@ -154,7 +155,15 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
  END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
         : : );
 #endif /* CONFIG_ALTIVEC */
-       flush_stab(tsk, next);
+
+       /* No need to flush userspace segments if the mm doesnt change */
+       if (prev == next)
+               return;
+
+       if (cur_cpu_spec->cpu_features & CPU_FTR_SLB)
+               flush_slb(tsk, next);
+       else
+               flush_stab(tsk, next);
        cpu_set(smp_processor_id(), next->cpu_vm_mask);
 }
 
index d41818ab9a835c546646e2c94773e6dbbfd0466d..1babe35028d502b27475946284ba45bb8070378b 100644 (file)
@@ -63,20 +63,15 @@ struct paca_struct {
        u16 xPacaIndex;                 /* Logical processor number             0x18 */
         u16 xHwProcNum;                 /* Physical processor number            0x1A */
        u32 default_decr;               /* Default decrementer value            0x1c */ 
-       u64 unused1;
-       u64 xKsave;                     /* Saved Kernel stack addr or zero      0x28 */
-       u64 pvr;                        /* Processor version register           0x30 */
-       u8 *exception_sp;               /*                                      0x38 */
-
-       struct ItLpQueue *lpQueuePtr;   /* LpQueue handled by this processor    0x40 */
-       u64  xTOC;                      /* Kernel TOC address                   0x48 */
-       STAB xStab_data;                /* Segment table information            0x50,0x58,0x60 */
-       u8 xSegments[STAB_CACHE_SIZE];  /* Cache of used stab entries           0x68,0x70 */
-       u8 xProcEnabled;                /* 1=soft enabled                       0x78 */
-       u8 unused2;
-       u8 prof_enabled;                /* 1=iSeries profiling enabled          0x7A */
-       u8 stab_cache_pointer;  
-       u8 resv1[4];                    /*                                      0x7B-0x7F */
+       u64 xKsave;                     /* Saved Kernel stack addr or zero      0x20 */
+       u64 pvr;                        /* Processor version register           0x28 */
+       struct ItLpQueue *lpQueuePtr;   /* LpQueue handled by this processor    0x30 */
+       u64  xTOC;                      /* Kernel TOC address                   0x38 */
+       STAB xStab_data;                /* Segment table information            0x40,0x48,0x50 */
+       u8 *exception_sp;               /*                                      0x58 */
+       u8 xProcEnabled;                /*                                      0x59 */
+       u8 prof_enabled;                /* 1=iSeries profiling enabled          0x60 */
+       u8 resv1[30];                   /*                                      0x61-0x7F */
 
 /*=====================================================================================
  * CACHE_LINE_2 0x0080 - 0x00FF