]> git.neil.brown.name Git - history.git/commitdiff
Import 2.0.33pre1 2.0.33pre1
authorAlan Cox <alan@lxorguk.ukuu.org.uk>
Fri, 23 Nov 2007 20:11:36 +0000 (15:11 -0500)
committerAlan Cox <alan@lxorguk.ukuu.org.uk>
Fri, 23 Nov 2007 20:11:36 +0000 (15:11 -0500)
Makefile
arch/i386/kernel/head.S
arch/i386/kernel/traps.c
arch/i386/mm/fault.c
include/asm-i386/pgtable.h
include/linux/head.h

index d36bea55159ef7dc02fd2dd7ac939917e4113a66..5300782c5d95eb0cb2c1863a5607e2069b7acb9b 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -1,6 +1,6 @@
 VERSION = 2
 PATCHLEVEL = 0
-SUBLEVEL = 32
+SUBLEVEL = 33
 
 ARCH = i386
 
index 28b8160067dc91827a877e16574a2e8c98fdd51a..f941d366a2d1ca85ba0b13bb061bee1ce54149c0 100644 (file)
@@ -260,7 +260,7 @@ setup_idt:
        movw %dx,%ax            /* selector = 0x0010 = cs */
        movw $0x8E00,%dx        /* interrupt gate - dpl=0, present */
 
-       lea SYMBOL_NAME(__idt),%edi
+       lea SYMBOL_NAME(idt),%edi
        mov $256,%ecx
 rp_sidt:
        movl %eax,(%edi)
@@ -342,6 +342,11 @@ stack_start:
        .long SYMBOL_NAME(init_user_stack)+4096
        .long KERNEL_DS
 
+/* NOTE: keep the idt short behind the above '.org 0x6000'
+        It must fit completely within _one_ page */
+ENTRY(idt)
+       .fill 256,8,0           # idt is uninitialized
+
 /* This is the default interrupt "handler" :-) */
 int_msg:
        .asciz "Unknown interrupt\n"
@@ -376,10 +381,7 @@ ignore_int:
 .word 0
 idt_descr:
        .word 256*8-1           # idt contains 256 entries
-       .long 0xc0000000+SYMBOL_NAME(__idt)
-
-ENTRY(__idt)
-       .fill 256,8,0           # idt is uninitialized
+       .long 0xc0000000+SYMBOL_NAME(idt)
 
        ALIGN
 .word 0
index 0b36152db1f6c6901991cc7963e4fdce6c6edf61..7c580ba6d73d0e329f1f09018c99e5a1d68a5eab 100644 (file)
@@ -337,7 +337,6 @@ asmlinkage void math_emulate(long arg)
 
 #endif /* CONFIG_MATH_EMULATION */
 
-struct desc_struct *idt = __idt+0;
 struct {
        unsigned short limit;
        unsigned long addr __attribute__((packed));
@@ -348,34 +347,36 @@ void trap_init_f00f_bug(void)
        pgd_t * pgd;
        pmd_t * pmd;
        pte_t * pte;
-       unsigned long twopage;
-       struct desc_struct *new_idt;
+       unsigned long page;
+       unsigned long idtpage = (unsigned long)idt;
+       struct desc_struct *alias_idt;
 
-       printk("moving IDT ... ");
+       printk("alias mapping IDT readonly ... ");
 
-       twopage = (unsigned long) vmalloc (2*PAGE_SIZE);
-
-       new_idt = (void *)(twopage + 4096-7*8);
-
-       memcpy(new_idt,idt,256*8);
+               /* just to get free address space */
+       page = (unsigned long) vmalloc (PAGE_SIZE);
 
+       alias_idt = (void *)(page + (idtpage & ~PAGE_MASK));
        idt_descriptor.limit = 256*8-1;
-       idt_descriptor.addr = VMALLOC_VMADDR(new_idt);
-
-        __asm__ __volatile__("\tlidt %0": "=m" (idt_descriptor));
-        idt = new_idt;
+       idt_descriptor.addr = VMALLOC_VMADDR(alias_idt);
 
        /*
-        * Unmap lower page:
+        * alias map the original idt to the alias page:
         */
-       twopage = VMALLOC_VMADDR(twopage);
-       pgd = pgd_offset(current->mm, twopage);
-       pmd = pmd_offset(pgd, twopage);
-       pte = pte_offset(pmd, twopage);
-
-       pte_clear(pte);
+       page = VMALLOC_VMADDR(page);
+       pgd = pgd_offset(&init_mm, page);
+       pmd = pmd_offset(pgd, page);
+       pte = pte_offset(pmd, page);
+               /* give memory back to the pool, don't need it */
+       free_page(pte_page(*pte));
+               /* ... and set the readonly alias */
+       set_pte(pte, mk_pte(idtpage  & PAGE_MASK, PAGE_KERNEL));
+       *pte = pte_wrprotect(*pte);
        flush_tlb_all();
 
+               /* now we have the mapping ok, we can do LIDT */
+        __asm__ __volatile__("\tlidt %0": "=m" (idt_descriptor));
+
        printk(" ... done\n");
 }
 
index 3c575a2bcaed2567eede354dec684d2de351cae3..471a9ae3d88e1c7f7ffd4e6dfe4e0235af88b834 100644 (file)
 
 extern void die_if_kernel(const char *,struct pt_regs *,long);
 
-asmlinkage void do_divide_error (struct pt_regs *, unsigned long);
-asmlinkage void do_debug (struct pt_regs *, unsigned long);
-asmlinkage void do_nmi (struct pt_regs *, unsigned long);
-asmlinkage void do_int3 (struct pt_regs *, unsigned long);
-asmlinkage void do_overflow (struct pt_regs *, unsigned long);
-asmlinkage void do_bounds (struct pt_regs *, unsigned long);
 asmlinkage void do_invalid_op (struct pt_regs *, unsigned long);
-asmlinkage void do_general_protection (struct pt_regs *, unsigned long);
 
 extern int pentium_f00f_bug;
 
-static int handle_intx_eip_adjust(struct pt_regs *regs)
-{
-       unsigned char *addr, *csp = 0;
-       int wrap = 0;
-       int count = 8; /* only check for reasonable number of bytes
-                        * else we do it the save 'simple way' */
-       unsigned long _eip;
-#define XX_WRAP(x) (wrap ? *((unsigned short *)&x) : x)
-
-       /* We rely on being able to access the memory pointed to by cs:eip
-        * and the bytes behind it up to the faulting instruction,
-        * because we just got an exception for this instruction and
-        * hence the memory should just be successfully accessed.
-        * In case of crossing a page boundary or when accessing kernel space
-        * we just do the simple fix (increase eip by one).
-        * This assumption also obsoletes checking of segment limit.
-        * ( should be veryfied, however, if this assumption is true )
-        */
-
-       if (regs->cs == KERNEL_CS) {
-               /* not what we expect */
-               regs->eip++;
-               return 0;
-       }
-
-       if (regs->eflags & VM_MASK) {
-               /* we have real mode type selector */
-               wrap = 1;
-               csp = (unsigned char *)((unsigned long)regs->cs << 4);
-       }
-       else if (regs->cs & 4) {
-               /* we have a LDT selector */
-               struct desc_struct *p, *ldt = current->ldt;
-               if (!ldt)
-                       ldt = (struct desc_struct*) &default_ldt;
-               p = ldt + (regs->cs >> 3);
-               csp = (unsigned char *)((p->a >> 16) | ((p->b & 0xff) << 16) | (p->b & 0xFF000000));
-               if (!(p->b & 0x400000))
-                       wrap = 1;       /* 16-bit segment */
-       }
-
-       _eip = regs->eip;
-       addr = csp+XX_WRAP(_eip);
-       while (count-- > 0) {
-               if ((unsigned long)addr >= TASK_SIZE) {
-                       /* accessing kernel space, do the simple case */
-                       regs->eip++;
-                       return 0;
-               }
-               switch (get_user(addr)) {
-
-                       case 0xCC:      /* single byte INT3 */
-                               XX_WRAP(_eip)++;
-                               regs->eip = _eip;
-                               return 0;
-
-                       case 0xCD:      /* two byte INT 3 */
-                               XX_WRAP(_eip)++;
-                               /* fall through */
-                       case 0xCE:      /* INTO, single byte */
-                               XX_WRAP(_eip)++;
-                               if ( (regs->eflags & VM_MASK)
-                                       && ((regs->eflags & IOPL_MASK) != IOPL_MASK)) {
-                                       /* not allowed, do GP0 fault */
-                                       do_general_protection(regs, 0);
-                                       return -1;
-                               }
-                               regs->eip = _eip;
-                               return 0;
-
-                                       /* the prefixes from the Intel patch */
-                       case 0xF2 ... 0xF3:
-                       case 0x2E:
-                       case 0x36:
-                       case 0x3E:
-                       case 0x26:
-                       case 0x64 ... 0x67:
-                               break;  /* just skipping them */
-
-                       default:
-                               /* not what we handle here,
-                                * just doing the simple fix
-                                */
-                               regs->eip++;
-                               return 0;
-               }
-
-               if ( !(++XX_WRAP(_eip)) ) {
-                       /* we wrapped around */
-                       regs->eip++;
-                       return 0;
-               }
-
-               addr = csp+XX_WRAP(_eip);
-               if ( !((unsigned long)addr & ~(PAGE_SIZE -1)) ) {
-                       /* we would cross page boundary, not good,
-                        * doing the simple fix
-                        */
-                       regs->eip++;
-                       return 0;
-               }
-       }
-
-       /* if we come here something weird happened,
-        * just doing the simple fix
-        */
-       regs->eip++;
-       return 0;
-}
-
-
 /*
  * This routine handles page faults.  It determines the address,
  * and the problem, and then passes it off to one of the appropriate
@@ -245,22 +127,15 @@ bad_area:
         */
        if ( pentium_f00f_bug ) {
                unsigned long nr;
+               extern struct {
+                       unsigned short limit;
+                       unsigned long addr __attribute__((packed));
+               } idt_descriptor;
 
-               nr = (address - TASK_SIZE - (unsigned long) idt) >> 3;
+               nr = (address - idt_descriptor.addr) >> 3;
 
-               if (nr < 7) {
-                       static void (*handler[])(struct pt_regs *, unsigned long) = {
-                               do_divide_error,        /* 0 - divide overflow */
-                               do_debug,               /* 1 - debug trap */
-                               do_nmi,                 /* 2 - NMI */
-                               do_int3,                /* 3 - int 3 */
-                               do_overflow,            /* 4 - overflow */
-                               do_bounds,              /* 5 - bound range */
-                               do_invalid_op };        /* 6 - invalid opcode */
-                       if ((nr == 3) || (nr == 4))
-                               if (handle_intx_eip_adjust(regs))
-                                       return;
-                       handler[nr](regs, error_code);
+               if (nr == 6) {
+                       do_invalid_op(regs, 0);
                        return;
                }
        }
index f48954ca8259732061af5987309c382604c98081..c46514d2cc65c7f4f62a035750ed7e2a7a053db3 100644 (file)
 #define __flush_tlb() \
 do { unsigned long tmpreg; __asm__ __volatile__("movl %%cr3,%0\n\tmovl %0,%%cr3":"=r" (tmpreg) : :"memory"); } while (0)
 
+/*
+ * NOTE! The intel "invlpg" semantics are extremely strange. The
+ * chip will add the segment base to the memory address, even though
+ * no segment checking is done. We correct for this by using an
+ * offset of 0x40000000 that will wrap around the kernel segment base
+ * of 0xC0000000 to get the correct address (it will always be outside
+ * the kernel segment, but we're only interested in the final linear
+ * address.
+ */
+#define __invlpg_mem(addr) \
+       (((char *)(addr))[0x40000000])
+#define __invlpg(addr) \
+       __asm__ __volatile__("invlpg %0": :"m" (__invlpg_mem(addr)))
+
+/*
+ * The i386 doesn't have a page-granular invalidate. Invalidate
+ * everything for it.
+ */
 #ifdef CONFIG_M386
-#define __flush_tlb_one(addr) flush_tlb()
+  #define __flush_tlb_one(addr) __flush_tlb()
 #else
-#define __flush_tlb_one(addr) \
-__asm__ __volatile__("invlpg %0": :"m" (*(char *) addr))
+  #define __flush_tlb_one(addr) __invlpg(addr)
 #endif
  
 #ifndef __SMP__
index 60f7fed44e181021d1936d28b0855cb7db37b6a2..3829b1c36ec475fd0519f5c0340e5b352d6a8184 100644 (file)
@@ -5,8 +5,7 @@ typedef struct desc_struct {
        unsigned long a,b;
 } desc_table[256];
 
-extern desc_table __idt,gdt;
-extern struct desc_struct *idt;
+extern desc_table idt,gdt;
 
 #define GDT_NUL 0
 #define GDT_CODE 1